1 /* $NetBSD: fitblk.c,v 1.1.1.1 2006/01/14 20:11:08 christos Exp $ */
2
3 /* fitblk.c: example of fitting compressed output to a specified size
4 Not copyrighted -- provided to the public domain
5 Version 1.1 25 November 2004 Mark Adler */
6
7 /* Version history:
8 1.0 24 Nov 2004 First version
9 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
10 Use fixed-size, stack-allocated raw buffers
11 Simplify code moving compression to subroutines
12 Use assert() for internal errors
13 Add detailed description of approach
14 */
15
16 /* Approach to just fitting a requested compressed size:
17
18 fitblk performs three compression passes on a portion of the input
19 data in order to determine how much of that input will compress to
20 nearly the requested output block size. The first pass generates
21 enough deflate blocks to produce output to fill the requested
22 output size plus a specfied excess amount (see the EXCESS define
23 below). The last deflate block may go quite a bit past that, but
24 is discarded. The second pass decompresses and recompresses just
25 the compressed data that fit in the requested plus excess sized
26 buffer. The deflate process is terminated after that amount of
27 input, which is less than the amount consumed on the first pass.
28 The last deflate block of the result will be of a comparable size
29 to the final product, so that the header for that deflate block and
30 the compression ratio for that block will be about the same as in
31 the final product. The third compression pass decompresses the
32 result of the second step, but only the compressed data up to the
33 requested size minus an amount to allow the compressed stream to
34 complete (see the MARGIN define below). That will result in a
35 final compressed stream whose length is less than or equal to the
36 requested size. Assuming sufficient input and a requested size
37 greater than a few hundred bytes, the shortfall will typically be
38 less than ten bytes.
39
40 If the input is short enough that the first compression completes
41 before filling the requested output size, then that compressed
42 stream is return with no recompression.
43
44 EXCESS is chosen to be just greater than the shortfall seen in a
45 two pass approach similar to the above. That shortfall is due to
46 the last deflate block compressing more efficiently with a smaller
47 header on the second pass. EXCESS is set to be large enough so
48 that there is enough uncompressed data for the second pass to fill
49 out the requested size, and small enough so that the final deflate
50 block of the second pass will be close in size to the final deflate
51 block of the third and final pass. MARGIN is chosen to be just
52 large enough to assure that the final compression has enough room
53 to complete in all cases.
54 */
55
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <assert.h>
59 #include "zlib.h"
60
61 #define local static
62
63 /* print nastygram and leave */
quit(char * why)64 local void quit(char *why)
65 {
66 fprintf(stderr, "fitblk abort: %s\n", why);
67 exit(1);
68 }
69
70 #define RAWLEN 4096 /* intermediate uncompressed buffer size */
71
72 /* compress from file to def until provided buffer is full or end of
73 input reached; return last deflate() return value, or Z_ERRNO if
74 there was read error on the file */
partcompress(FILE * in,z_streamp def)75 local int partcompress(FILE *in, z_streamp def)
76 {
77 int ret, flush;
78 unsigned char raw[RAWLEN];
79
80 flush = Z_NO_FLUSH;
81 do {
82 def->avail_in = fread(raw, 1, RAWLEN, in);
83 if (ferror(in))
84 return Z_ERRNO;
85 def->next_in = raw;
86 if (feof(in))
87 flush = Z_FINISH;
88 ret = deflate(def, flush);
89 assert(ret != Z_STREAM_ERROR);
90 } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
91 return ret;
92 }
93
94 /* recompress from inf's input to def's output; the input for inf and
95 the output for def are set in those structures before calling;
96 return last deflate() return value, or Z_MEM_ERROR if inflate()
97 was not able to allocate enough memory when it needed to */
recompress(z_streamp inf,z_streamp def)98 local int recompress(z_streamp inf, z_streamp def)
99 {
100 int ret, flush;
101 unsigned char raw[RAWLEN];
102
103 flush = Z_NO_FLUSH;
104 do {
105 /* decompress */
106 inf->avail_out = RAWLEN;
107 inf->next_out = raw;
108 ret = inflate(inf, Z_NO_FLUSH);
109 assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
110 ret != Z_NEED_DICT);
111 if (ret == Z_MEM_ERROR)
112 return ret;
113
114 /* compress what was decompresed until done or no room */
115 def->avail_in = RAWLEN - inf->avail_out;
116 def->next_in = raw;
117 if (inf->avail_out != 0)
118 flush = Z_FINISH;
119 ret = deflate(def, flush);
120 assert(ret != Z_STREAM_ERROR);
121 } while (ret != Z_STREAM_END && def->avail_out != 0);
122 return ret;
123 }
124
125 #define EXCESS 256 /* empirically determined stream overage */
126 #define MARGIN 8 /* amount to back off for completion */
127
128 /* compress from stdin to fixed-size block on stdout */
main(int argc,char ** argv)129 int main(int argc, char **argv)
130 {
131 int ret; /* return code */
132 unsigned size; /* requested fixed output block size */
133 unsigned have; /* bytes written by deflate() call */
134 unsigned char *blk; /* intermediate and final stream */
135 unsigned char *tmp; /* close to desired size stream */
136 z_stream def, inf; /* zlib deflate and inflate states */
137
138 /* get requested output size */
139 if (argc != 2)
140 quit("need one argument: size of output block");
141 ret = strtol(argv[1], argv + 1, 10);
142 if (argv[1][0] != 0)
143 quit("argument must be a number");
144 if (ret < 8) /* 8 is minimum zlib stream size */
145 quit("need positive size of 8 or greater");
146 size = (unsigned)ret;
147
148 /* allocate memory for buffers and compression engine */
149 blk = malloc(size + EXCESS);
150 def.zalloc = Z_NULL;
151 def.zfree = Z_NULL;
152 def.opaque = Z_NULL;
153 ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
154 if (ret != Z_OK || blk == NULL)
155 quit("out of memory");
156
157 /* compress from stdin until output full, or no more input */
158 def.avail_out = size + EXCESS;
159 def.next_out = blk;
160 ret = partcompress(stdin, &def);
161 if (ret == Z_ERRNO)
162 quit("error reading input");
163
164 /* if it all fit, then size was undersubscribed -- done! */
165 if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
166 /* write block to stdout */
167 have = size + EXCESS - def.avail_out;
168 if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
169 quit("error writing output");
170
171 /* clean up and print results to stderr */
172 ret = deflateEnd(&def);
173 assert(ret != Z_STREAM_ERROR);
174 free(blk);
175 fprintf(stderr,
176 "%u bytes unused out of %u requested (all input)\n",
177 size - have, size);
178 return 0;
179 }
180
181 /* it didn't all fit -- set up for recompression */
182 inf.zalloc = Z_NULL;
183 inf.zfree = Z_NULL;
184 inf.opaque = Z_NULL;
185 inf.avail_in = 0;
186 inf.next_in = Z_NULL;
187 ret = inflateInit(&inf);
188 tmp = malloc(size + EXCESS);
189 if (ret != Z_OK || tmp == NULL)
190 quit("out of memory");
191 ret = deflateReset(&def);
192 assert(ret != Z_STREAM_ERROR);
193
194 /* do first recompression close to the right amount */
195 inf.avail_in = size + EXCESS;
196 inf.next_in = blk;
197 def.avail_out = size + EXCESS;
198 def.next_out = tmp;
199 ret = recompress(&inf, &def);
200 if (ret == Z_MEM_ERROR)
201 quit("out of memory");
202
203 /* set up for next reocmpression */
204 ret = inflateReset(&inf);
205 assert(ret != Z_STREAM_ERROR);
206 ret = deflateReset(&def);
207 assert(ret != Z_STREAM_ERROR);
208
209 /* do second and final recompression (third compression) */
210 inf.avail_in = size - MARGIN; /* assure stream will complete */
211 inf.next_in = tmp;
212 def.avail_out = size;
213 def.next_out = blk;
214 ret = recompress(&inf, &def);
215 if (ret == Z_MEM_ERROR)
216 quit("out of memory");
217 assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
218
219 /* done -- write block to stdout */
220 have = size - def.avail_out;
221 if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
222 quit("error writing output");
223
224 /* clean up and print results to stderr */
225 free(tmp);
226 ret = inflateEnd(&inf);
227 assert(ret != Z_STREAM_ERROR);
228 ret = deflateEnd(&def);
229 assert(ret != Z_STREAM_ERROR);
230 free(blk);
231 fprintf(stderr,
232 "%u bytes unused out of %u requested (%lu input)\n",
233 size - have, size, def.total_in);
234 return 0;
235 }
236