1*f22f0ef4Schristos /* gznorm.c -- normalize a gzip stream
2*f22f0ef4Schristos  * Copyright (C) 2018 Mark Adler
3*f22f0ef4Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
4*f22f0ef4Schristos  * Version 1.0  7 Oct 2018  Mark Adler */
5*f22f0ef4Schristos 
6*f22f0ef4Schristos // gznorm takes a gzip stream, potentially containing multiple members, and
7*f22f0ef4Schristos // converts it to a gzip stream with a single member. In addition the gzip
8*f22f0ef4Schristos // header is normalized, removing the file name and time stamp, and setting the
9*f22f0ef4Schristos // other header contents (XFL, OS) to fixed values. gznorm does not recompress
10*f22f0ef4Schristos // the data, so it is fast, but no advantage is gained from the history that
11*f22f0ef4Schristos // could be available across member boundaries.
12*f22f0ef4Schristos 
13*f22f0ef4Schristos #include <stdio.h>      // fread, fwrite, putc, fflush, ferror, fprintf,
14*f22f0ef4Schristos                         // vsnprintf, stdout, stderr, NULL, FILE
15*f22f0ef4Schristos #include <stdlib.h>     // malloc, free
16*f22f0ef4Schristos #include <string.h>     // strerror
17*f22f0ef4Schristos #include <errno.h>      // errno
18*f22f0ef4Schristos #include <stdarg.h>     // va_list, va_start, va_end
19*f22f0ef4Schristos #include "zlib.h"       // inflateInit2, inflate, inflateReset, inflateEnd,
20*f22f0ef4Schristos                         // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK,
21*f22f0ef4Schristos                         // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR,
22*f22f0ef4Schristos                         // Z_MEM_ERROR
23*f22f0ef4Schristos 
24*f22f0ef4Schristos #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25*f22f0ef4Schristos #  include <fcntl.h>
26*f22f0ef4Schristos #  include <io.h>
27*f22f0ef4Schristos #  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
28*f22f0ef4Schristos #else
29*f22f0ef4Schristos #  define SET_BINARY_MODE(file)
30*f22f0ef4Schristos #endif
31*f22f0ef4Schristos 
32*f22f0ef4Schristos #define local static
33*f22f0ef4Schristos 
34*f22f0ef4Schristos // printf to an allocated string. Return the string, or NULL if the printf or
35*f22f0ef4Schristos // allocation fails.
aprintf(char * fmt,...)36*f22f0ef4Schristos local char *aprintf(char *fmt, ...) {
37*f22f0ef4Schristos     // Get the length of the result of the printf.
38*f22f0ef4Schristos     va_list args;
39*f22f0ef4Schristos     va_start(args, fmt);
40*f22f0ef4Schristos     int len = vsnprintf(NULL, 0, fmt, args);
41*f22f0ef4Schristos     va_end(args);
42*f22f0ef4Schristos     if (len < 0)
43*f22f0ef4Schristos         return NULL;
44*f22f0ef4Schristos 
45*f22f0ef4Schristos     // Allocate the required space and printf to it.
46*f22f0ef4Schristos     char *str = malloc(len + 1);
47*f22f0ef4Schristos     if (str == NULL)
48*f22f0ef4Schristos         return NULL;
49*f22f0ef4Schristos     va_start(args, fmt);
50*f22f0ef4Schristos     vsnprintf(str, len + 1, fmt, args);
51*f22f0ef4Schristos     va_end(args);
52*f22f0ef4Schristos     return str;
53*f22f0ef4Schristos }
54*f22f0ef4Schristos 
55*f22f0ef4Schristos // Return with an error, putting an allocated error message in *err. Doing an
56*f22f0ef4Schristos // inflateEnd() on an already ended state, or one with state set to Z_NULL, is
57*f22f0ef4Schristos // permitted.
58*f22f0ef4Schristos #define BYE(...) \
59*f22f0ef4Schristos     do { \
60*f22f0ef4Schristos         inflateEnd(&strm); \
61*f22f0ef4Schristos         *err = aprintf(__VA_ARGS__); \
62*f22f0ef4Schristos         return 1; \
63*f22f0ef4Schristos     } while (0)
64*f22f0ef4Schristos 
65*f22f0ef4Schristos // Chunk size for buffered reads and for decompression. Twice this many bytes
66*f22f0ef4Schristos // will be allocated on the stack by gzip_normalize(). Must fit in an unsigned.
67*f22f0ef4Schristos #define CHUNK 16384
68*f22f0ef4Schristos 
69*f22f0ef4Schristos // Read a gzip stream from in and write an equivalent normalized gzip stream to
70*f22f0ef4Schristos // out. If given no input, an empty gzip stream will be written. If successful,
71*f22f0ef4Schristos // 0 is returned, and *err is set to NULL. On error, 1 is returned, where the
72*f22f0ef4Schristos // details of the error are returned in *err, a pointer to an allocated string.
73*f22f0ef4Schristos //
74*f22f0ef4Schristos // The input may be a stream with multiple gzip members, which is converted to
75*f22f0ef4Schristos // a single gzip member on the output. Each gzip member is decompressed at the
76*f22f0ef4Schristos // level of deflate blocks. This enables clearing the last-block bit, shifting
77*f22f0ef4Schristos // the compressed data to concatenate to the previous member's compressed data,
78*f22f0ef4Schristos // which can end at an arbitrary bit boundary, and identifying stored blocks in
79*f22f0ef4Schristos // order to resynchronize those to byte boundaries. The deflate compressed data
80*f22f0ef4Schristos // is terminated with a 10-bit empty fixed block. If any members on the input
81*f22f0ef4Schristos // end with a 10-bit empty fixed block, then that block is excised from the
82*f22f0ef4Schristos // stream. This avoids appending empty fixed blocks for every normalization,
83*f22f0ef4Schristos // and assures that gzip_normalize applied a second time will not change the
84*f22f0ef4Schristos // input. The pad bits after stored block headers and after the final deflate
85*f22f0ef4Schristos // block are all forced to zeros.
gzip_normalize(FILE * in,FILE * out,char ** err)86*f22f0ef4Schristos local int gzip_normalize(FILE *in, FILE *out, char **err) {
87*f22f0ef4Schristos     // initialize the inflate engine to process a gzip member
88*f22f0ef4Schristos     z_stream strm;
89*f22f0ef4Schristos     strm.zalloc = Z_NULL;
90*f22f0ef4Schristos     strm.zfree = Z_NULL;
91*f22f0ef4Schristos     strm.opaque = Z_NULL;
92*f22f0ef4Schristos     strm.avail_in = 0;
93*f22f0ef4Schristos     strm.next_in = Z_NULL;
94*f22f0ef4Schristos     if (inflateInit2(&strm, 15 + 16) != Z_OK)
95*f22f0ef4Schristos         BYE("out of memory");
96*f22f0ef4Schristos 
97*f22f0ef4Schristos     // State while processing the input gzip stream.
98*f22f0ef4Schristos     enum {              // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ...
99*f22f0ef4Schristos         BETWEEN,        // between gzip members (must end in this state)
100*f22f0ef4Schristos         HEAD,           // reading a gzip header
101*f22f0ef4Schristos         BLOCK,          // reading deflate blocks
102*f22f0ef4Schristos         TAIL            // reading a gzip trailer
103*f22f0ef4Schristos     } state = BETWEEN;              // current component being processed
104*f22f0ef4Schristos     unsigned long crc = 0;          // accumulated CRC of uncompressed data
105*f22f0ef4Schristos     unsigned long len = 0;          // accumulated length of uncompressed data
106*f22f0ef4Schristos     unsigned long buf = 0;          // deflate stream bit buffer of num bits
107*f22f0ef4Schristos     int num = 0;                    // number of bits in buf (at bottom)
108*f22f0ef4Schristos 
109*f22f0ef4Schristos     // Write a canonical gzip header (no mod time, file name, comment, extra
110*f22f0ef4Schristos     // block, or extra flags, and OS is marked as unknown).
111*f22f0ef4Schristos     fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
112*f22f0ef4Schristos 
113*f22f0ef4Schristos     // Process the gzip stream from in until reaching the end of the input,
114*f22f0ef4Schristos     // encountering invalid input, or experiencing an i/o error.
115*f22f0ef4Schristos     int more;                       // true if not at the end of the input
116*f22f0ef4Schristos     do {
117*f22f0ef4Schristos         // State inside this loop.
118*f22f0ef4Schristos         unsigned char *put;         // next input buffer location to process
119*f22f0ef4Schristos         int prev;                   // number of bits from previous block in
120*f22f0ef4Schristos                                     // the bit buffer, or -1 if not at the
121*f22f0ef4Schristos                                     // start of a block
122*f22f0ef4Schristos         unsigned long long memb;    // uncompressed length of member
123*f22f0ef4Schristos         size_t tail;                // number of trailer bytes read (0..8)
124*f22f0ef4Schristos         unsigned long part;         // accumulated trailer component
125*f22f0ef4Schristos 
126*f22f0ef4Schristos         // Get the next chunk of input from in.
127*f22f0ef4Schristos         unsigned char dat[CHUNK];
128*f22f0ef4Schristos         strm.avail_in = fread(dat, 1, CHUNK, in);
129*f22f0ef4Schristos         if (strm.avail_in == 0)
130*f22f0ef4Schristos             break;
131*f22f0ef4Schristos         more = strm.avail_in == CHUNK;
132*f22f0ef4Schristos         strm.next_in = put = dat;
133*f22f0ef4Schristos 
134*f22f0ef4Schristos         // Run that chunk of input through the inflate engine to exhaustion.
135*f22f0ef4Schristos         do {
136*f22f0ef4Schristos             // At this point it is assured that strm.avail_in > 0.
137*f22f0ef4Schristos 
138*f22f0ef4Schristos             // Inflate until the end of a gzip component (header, deflate
139*f22f0ef4Schristos             // block, trailer) is reached, or until all of the chunk is
140*f22f0ef4Schristos             // consumed. The resulting decompressed data is discarded, though
141*f22f0ef4Schristos             // the total size of the decompressed data in each member is
142*f22f0ef4Schristos             // tracked, for the calculation of the total CRC.
143*f22f0ef4Schristos             do {
144*f22f0ef4Schristos                 // inflate and handle any errors
145*f22f0ef4Schristos                 unsigned char scrap[CHUNK];
146*f22f0ef4Schristos                 strm.avail_out = CHUNK;
147*f22f0ef4Schristos                 strm.next_out = scrap;
148*f22f0ef4Schristos                 int ret = inflate(&strm, Z_BLOCK);
149*f22f0ef4Schristos                 if (ret == Z_MEM_ERROR)
150*f22f0ef4Schristos                     BYE("out of memory");
151*f22f0ef4Schristos                 if (ret == Z_DATA_ERROR)
152*f22f0ef4Schristos                     BYE("input invalid: %s", strm.msg);
153*f22f0ef4Schristos                 if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END)
154*f22f0ef4Schristos                     BYE("internal error");
155*f22f0ef4Schristos 
156*f22f0ef4Schristos                 // Update the number of uncompressed bytes generated in this
157*f22f0ef4Schristos                 // member. The actual count (not modulo 2^32) is required to
158*f22f0ef4Schristos                 // correctly compute the total CRC.
159*f22f0ef4Schristos                 unsigned got = CHUNK - strm.avail_out;
160*f22f0ef4Schristos                 memb += got;
161*f22f0ef4Schristos                 if (memb < got)
162*f22f0ef4Schristos                     BYE("overflow error");
163*f22f0ef4Schristos 
164*f22f0ef4Schristos                 // Continue to process this chunk until it is consumed, or
165*f22f0ef4Schristos                 // until the end of a component (header, deflate block, or
166*f22f0ef4Schristos                 // trailer) is reached.
167*f22f0ef4Schristos             } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0);
168*f22f0ef4Schristos 
169*f22f0ef4Schristos             // Since strm.avail_in was > 0 for the inflate call, some input was
170*f22f0ef4Schristos             // just consumed. It is therefore assured that put < strm.next_in.
171*f22f0ef4Schristos 
172*f22f0ef4Schristos             // Disposition the consumed component or part of a component.
173*f22f0ef4Schristos             switch (state) {
174*f22f0ef4Schristos                 case BETWEEN:
175*f22f0ef4Schristos                     state = HEAD;
176*f22f0ef4Schristos                     // Fall through to HEAD when some or all of the header is
177*f22f0ef4Schristos                     // processed.
178*f22f0ef4Schristos 
179*f22f0ef4Schristos                 case HEAD:
180*f22f0ef4Schristos                     // Discard the header.
181*f22f0ef4Schristos                     if (strm.data_type & 0x80) {
182*f22f0ef4Schristos                         // End of header reached -- deflate blocks follow.
183*f22f0ef4Schristos                         put = strm.next_in;
184*f22f0ef4Schristos                         prev = num;
185*f22f0ef4Schristos                         memb = 0;
186*f22f0ef4Schristos                         state = BLOCK;
187*f22f0ef4Schristos                     }
188*f22f0ef4Schristos                     break;
189*f22f0ef4Schristos 
190*f22f0ef4Schristos                 case BLOCK:
191*f22f0ef4Schristos                     // Copy the deflate stream to the output, but with the
192*f22f0ef4Schristos                     // last-block-bit cleared. Re-synchronize stored block
193*f22f0ef4Schristos                     // headers to the output byte boundaries. The bytes at
194*f22f0ef4Schristos                     // put..strm.next_in-1 is the compressed data that has been
195*f22f0ef4Schristos                     // processed and is ready to be copied to the output.
196*f22f0ef4Schristos 
197*f22f0ef4Schristos                     // At this point, it is assured that new compressed data is
198*f22f0ef4Schristos                     // available, i.e., put < strm.next_in. If prev is -1, then
199*f22f0ef4Schristos                     // that compressed data starts in the middle of a deflate
200*f22f0ef4Schristos                     // block. If prev is not -1, then the bits in the bit
201*f22f0ef4Schristos                     // buffer, possibly combined with the bits in *put, contain
202*f22f0ef4Schristos                     // the three-bit header of the new deflate block. In that
203*f22f0ef4Schristos                     // case, prev is the number of bits from the previous block
204*f22f0ef4Schristos                     // that remain in the bit buffer. Since num is the number
205*f22f0ef4Schristos                     // of bits in the bit buffer, we have that num - prev is
206*f22f0ef4Schristos                     // the number of bits from the new block currently in the
207*f22f0ef4Schristos                     // bit buffer.
208*f22f0ef4Schristos 
209*f22f0ef4Schristos                     // If strm.data_type & 0xc0 is 0x80, then the last byte of
210*f22f0ef4Schristos                     // the available compressed data includes the last bits of
211*f22f0ef4Schristos                     // the end of a deflate block. In that case, that last byte
212*f22f0ef4Schristos                     // also has strm.data_type & 0x1f bits of the next deflate
213*f22f0ef4Schristos                     // block, in the range 0..7. If strm.data_type & 0xc0 is
214*f22f0ef4Schristos                     // 0xc0, then the last byte of the compressed data is the
215*f22f0ef4Schristos                     // end of the deflate stream, followed by strm.data_type &
216*f22f0ef4Schristos                     // 0x1f pad bits, also in the range 0..7.
217*f22f0ef4Schristos 
218*f22f0ef4Schristos                     // Set bits to the number of bits not yet consumed from the
219*f22f0ef4Schristos                     // last byte. If we are at the end of the block, bits is
220*f22f0ef4Schristos                     // either the number of bits in the last byte belonging to
221*f22f0ef4Schristos                     // the next block, or the number of pad bits after the
222*f22f0ef4Schristos                     // final block. In either of those cases, bits is in the
223*f22f0ef4Schristos                     // range 0..7.
224*f22f0ef4Schristos                     ;                   // (required due to C syntax oddity)
225*f22f0ef4Schristos                     int bits = strm.data_type & 0x1f;
226*f22f0ef4Schristos 
227*f22f0ef4Schristos                     if (prev != -1) {
228*f22f0ef4Schristos                         // We are at the start of a new block. Clear the last
229*f22f0ef4Schristos                         // block bit, and check for special cases. If it is a
230*f22f0ef4Schristos                         // stored block, then emit the header and pad to the
231*f22f0ef4Schristos                         // next byte boundary. If it is a final, empty fixed
232*f22f0ef4Schristos                         // block, then excise it.
233*f22f0ef4Schristos 
234*f22f0ef4Schristos                         // Some or all of the three header bits for this block
235*f22f0ef4Schristos                         // may already be in the bit buffer. Load any remaining
236*f22f0ef4Schristos                         // header bits into the bit buffer.
237*f22f0ef4Schristos                         if (num - prev < 3) {
238*f22f0ef4Schristos                             buf += (unsigned long)*put++ << num;
239*f22f0ef4Schristos                             num += 8;
240*f22f0ef4Schristos                         }
241*f22f0ef4Schristos 
242*f22f0ef4Schristos                         // Set last to have a 1 in the position of the last
243*f22f0ef4Schristos                         // block bit in the bit buffer.
244*f22f0ef4Schristos                         unsigned long last = (unsigned long)1 << prev;
245*f22f0ef4Schristos 
246*f22f0ef4Schristos                         if (((buf >> prev) & 7) == 3) {
247*f22f0ef4Schristos                             // This is a final fixed block. Load at least ten
248*f22f0ef4Schristos                             // bits from this block, including the header, into
249*f22f0ef4Schristos                             // the bit buffer. We already have at least three,
250*f22f0ef4Schristos                             // so at most one more byte needs to be loaded.
251*f22f0ef4Schristos                             if (num - prev < 10) {
252*f22f0ef4Schristos                                 if (put == strm.next_in)
253*f22f0ef4Schristos                                     // Need to go get and process more input.
254*f22f0ef4Schristos                                     // We'll end up back here to finish this.
255*f22f0ef4Schristos                                     break;
256*f22f0ef4Schristos                                 buf += (unsigned long)*put++ << num;
257*f22f0ef4Schristos                                 num += 8;
258*f22f0ef4Schristos                             }
259*f22f0ef4Schristos                             if (((buf >> prev) & 0x3ff) == 3) {
260*f22f0ef4Schristos                                 // That final fixed block is empty. Delete it
261*f22f0ef4Schristos                                 // to avoid adding an empty block every time a
262*f22f0ef4Schristos                                 // gzip stream is normalized.
263*f22f0ef4Schristos                                 num = prev;
264*f22f0ef4Schristos                                 buf &= last - 1;    // zero the pad bits
265*f22f0ef4Schristos                             }
266*f22f0ef4Schristos                         }
267*f22f0ef4Schristos                         else if (((buf >> prev) & 6) == 0) {
268*f22f0ef4Schristos                             // This is a stored block. Flush to the next
269*f22f0ef4Schristos                             // byte boundary after the three-bit header.
270*f22f0ef4Schristos                             num = (prev + 10) & ~7;
271*f22f0ef4Schristos                             buf &= last - 1;        // zero the pad bits
272*f22f0ef4Schristos                         }
273*f22f0ef4Schristos 
274*f22f0ef4Schristos                         // Clear the last block bit.
275*f22f0ef4Schristos                         buf &= ~last;
276*f22f0ef4Schristos 
277*f22f0ef4Schristos                         // Write out complete bytes in the bit buffer.
278*f22f0ef4Schristos                         while (num >= 8) {
279*f22f0ef4Schristos                             putc(buf, out);
280*f22f0ef4Schristos                             buf >>= 8;
281*f22f0ef4Schristos                             num -= 8;
282*f22f0ef4Schristos                         }
283*f22f0ef4Schristos 
284*f22f0ef4Schristos                         // If no more bytes left to process, then we have
285*f22f0ef4Schristos                         // consumed the byte that had bits from the next block.
286*f22f0ef4Schristos                         if (put == strm.next_in)
287*f22f0ef4Schristos                             bits = 0;
288*f22f0ef4Schristos                     }
289*f22f0ef4Schristos 
290*f22f0ef4Schristos                     // We are done handling the deflate block header. Now copy
291*f22f0ef4Schristos                     // all or almost all of the remaining compressed data that
292*f22f0ef4Schristos                     // has been processed so far. Don't copy one byte at the
293*f22f0ef4Schristos                     // end if it contains bits from the next deflate block or
294*f22f0ef4Schristos                     // pad bits at the end of a deflate block.
295*f22f0ef4Schristos 
296*f22f0ef4Schristos                     // mix is 1 if we are at the end of a deflate block, and if
297*f22f0ef4Schristos                     // some of the bits in the last byte follow this block. mix
298*f22f0ef4Schristos                     // is 0 if we are in the middle of a deflate block, if the
299*f22f0ef4Schristos                     // deflate block ended on a byte boundary, or if all of the
300*f22f0ef4Schristos                     // compressed data processed so far has been consumed.
301*f22f0ef4Schristos                     int mix = (strm.data_type & 0x80) && bits;
302*f22f0ef4Schristos 
303*f22f0ef4Schristos                     // Copy all of the processed compressed data to the output,
304*f22f0ef4Schristos                     // except for the last byte if it contains bits from the
305*f22f0ef4Schristos                     // next deflate block or pad bits at the end of the deflate
306*f22f0ef4Schristos                     // stream. Copy the data after shifting in num bits from
307*f22f0ef4Schristos                     // buf in front of it, leaving num bits from the end of the
308*f22f0ef4Schristos                     // compressed data in buf when done.
309*f22f0ef4Schristos                     unsigned char *end = strm.next_in - mix;
310*f22f0ef4Schristos                     if (put < end) {
311*f22f0ef4Schristos                         if (num)
312*f22f0ef4Schristos                             // Insert num bits from buf before the data being
313*f22f0ef4Schristos                             // copied.
314*f22f0ef4Schristos                             do {
315*f22f0ef4Schristos                                 buf += (unsigned)(*put++) << num;
316*f22f0ef4Schristos                                 putc(buf, out);
317*f22f0ef4Schristos                                 buf >>= 8;
318*f22f0ef4Schristos                             } while (put < end);
319*f22f0ef4Schristos                         else {
320*f22f0ef4Schristos                             // No shifting needed -- write directly.
321*f22f0ef4Schristos                             fwrite(put, 1, end - put, out);
322*f22f0ef4Schristos                             put = end;
323*f22f0ef4Schristos                         }
324*f22f0ef4Schristos                     }
325*f22f0ef4Schristos 
326*f22f0ef4Schristos                     // Process the last processed byte if it wasn't written.
327*f22f0ef4Schristos                     if (mix) {
328*f22f0ef4Schristos                         // Load the last byte into the bit buffer.
329*f22f0ef4Schristos                         buf += (unsigned)(*put++) << num;
330*f22f0ef4Schristos                         num += 8;
331*f22f0ef4Schristos 
332*f22f0ef4Schristos                         if (strm.data_type & 0x40) {
333*f22f0ef4Schristos                             // We are at the end of the deflate stream and
334*f22f0ef4Schristos                             // there are bits pad bits. Discard the pad bits
335*f22f0ef4Schristos                             // and write a byte to the output, if available.
336*f22f0ef4Schristos                             // Leave the num bits left over in buf to prepend
337*f22f0ef4Schristos                             // to the next deflate stream.
338*f22f0ef4Schristos                             num -= bits;
339*f22f0ef4Schristos                             if (num >= 8) {
340*f22f0ef4Schristos                                 putc(buf, out);
341*f22f0ef4Schristos                                 num -= 8;
342*f22f0ef4Schristos                                 buf >>= 8;
343*f22f0ef4Schristos                             }
344*f22f0ef4Schristos 
345*f22f0ef4Schristos                             // Force the pad bits in the bit buffer to zeros.
346*f22f0ef4Schristos                             buf &= ((unsigned long)1 << num) - 1;
347*f22f0ef4Schristos 
348*f22f0ef4Schristos                             // Don't need to set prev here since going to TAIL.
349*f22f0ef4Schristos                         }
350*f22f0ef4Schristos                         else
351*f22f0ef4Schristos                             // At the end of an internal deflate block. Leave
352*f22f0ef4Schristos                             // the last byte in the bit buffer to examine on
353*f22f0ef4Schristos                             // the next entry to BLOCK, when more bits from the
354*f22f0ef4Schristos                             // next block will be available.
355*f22f0ef4Schristos                             prev = num - bits;      // number of bits in buffer
356*f22f0ef4Schristos                                                     // from current block
357*f22f0ef4Schristos                     }
358*f22f0ef4Schristos 
359*f22f0ef4Schristos                     // Don't have a byte left over, so we are in the middle of
360*f22f0ef4Schristos                     // a deflate block, or the deflate block ended on a byte
361*f22f0ef4Schristos                     // boundary. Set prev appropriately for the next entry into
362*f22f0ef4Schristos                     // BLOCK.
363*f22f0ef4Schristos                     else if (strm.data_type & 0x80)
364*f22f0ef4Schristos                         // The block ended on a byte boundary, so no header
365*f22f0ef4Schristos                         // bits are in the bit buffer.
366*f22f0ef4Schristos                         prev = num;
367*f22f0ef4Schristos                     else
368*f22f0ef4Schristos                         // In the middle of a deflate block, so no header here.
369*f22f0ef4Schristos                         prev = -1;
370*f22f0ef4Schristos 
371*f22f0ef4Schristos                     // Check for the end of the deflate stream.
372*f22f0ef4Schristos                     if ((strm.data_type & 0xc0) == 0xc0) {
373*f22f0ef4Schristos                         // That ends the deflate stream on the input side, the
374*f22f0ef4Schristos                         // pad bits were discarded, and any remaining bits from
375*f22f0ef4Schristos                         // the last block in the stream are saved in the bit
376*f22f0ef4Schristos                         // buffer to prepend to the next stream. Process the
377*f22f0ef4Schristos                         // gzip trailer next.
378*f22f0ef4Schristos                         tail = 0;
379*f22f0ef4Schristos                         part = 0;
380*f22f0ef4Schristos                         state = TAIL;
381*f22f0ef4Schristos                     }
382*f22f0ef4Schristos                     break;
383*f22f0ef4Schristos 
384*f22f0ef4Schristos                 case TAIL:
385*f22f0ef4Schristos                     // Accumulate available trailer bytes to update the total
386*f22f0ef4Schristos                     // CRC and the total uncompressed length.
387*f22f0ef4Schristos                     do {
388*f22f0ef4Schristos                         part = (part >> 8) + ((unsigned long)(*put++) << 24);
389*f22f0ef4Schristos                         tail++;
390*f22f0ef4Schristos                         if (tail == 4) {
391*f22f0ef4Schristos                             // Update the total CRC.
392*f22f0ef4Schristos                             z_off_t len2 = memb;
393*f22f0ef4Schristos                             if (len2 < 0 || (unsigned long long)len2 != memb)
394*f22f0ef4Schristos                                 BYE("overflow error");
395*f22f0ef4Schristos                             crc = crc ? crc32_combine(crc, part, len2) : part;
396*f22f0ef4Schristos                             part = 0;
397*f22f0ef4Schristos                         }
398*f22f0ef4Schristos                         else if (tail == 8) {
399*f22f0ef4Schristos                             // Update the total uncompressed length. (It's ok
400*f22f0ef4Schristos                             // if this sum is done modulo 2^32.)
401*f22f0ef4Schristos                             len += part;
402*f22f0ef4Schristos 
403*f22f0ef4Schristos                             // At the end of a member. Set up to inflate an
404*f22f0ef4Schristos                             // immediately following gzip member. (If we made
405*f22f0ef4Schristos                             // it this far, then the trailer was valid.)
406*f22f0ef4Schristos                             if (inflateReset(&strm) != Z_OK)
407*f22f0ef4Schristos                                 BYE("internal error");
408*f22f0ef4Schristos                             state = BETWEEN;
409*f22f0ef4Schristos                             break;
410*f22f0ef4Schristos                         }
411*f22f0ef4Schristos                     } while (put < strm.next_in);
412*f22f0ef4Schristos                     break;
413*f22f0ef4Schristos             }
414*f22f0ef4Schristos 
415*f22f0ef4Schristos             // Process the input buffer until completely consumed.
416*f22f0ef4Schristos         } while (strm.avail_in > 0);
417*f22f0ef4Schristos 
418*f22f0ef4Schristos         // Process input until end of file, invalid input, or i/o error.
419*f22f0ef4Schristos     } while (more);
420*f22f0ef4Schristos 
421*f22f0ef4Schristos     // Done with the inflate engine.
422*f22f0ef4Schristos     inflateEnd(&strm);
423*f22f0ef4Schristos 
424*f22f0ef4Schristos     // Verify the validity of the input.
425*f22f0ef4Schristos     if (state != BETWEEN)
426*f22f0ef4Schristos         BYE("input invalid: incomplete gzip stream");
427*f22f0ef4Schristos 
428*f22f0ef4Schristos     // Write the remaining deflate stream bits, followed by a terminating
429*f22f0ef4Schristos     // deflate fixed block.
430*f22f0ef4Schristos     buf += (unsigned long)3 << num;
431*f22f0ef4Schristos     putc(buf, out);
432*f22f0ef4Schristos     putc(buf >> 8, out);
433*f22f0ef4Schristos     if (num > 6)
434*f22f0ef4Schristos         putc(0, out);
435*f22f0ef4Schristos 
436*f22f0ef4Schristos     // Write the gzip trailer, which is the CRC and the uncompressed length
437*f22f0ef4Schristos     // modulo 2^32, both in little-endian order.
438*f22f0ef4Schristos     putc(crc, out);
439*f22f0ef4Schristos     putc(crc >> 8, out);
440*f22f0ef4Schristos     putc(crc >> 16, out);
441*f22f0ef4Schristos     putc(crc >> 24, out);
442*f22f0ef4Schristos     putc(len, out);
443*f22f0ef4Schristos     putc(len >> 8, out);
444*f22f0ef4Schristos     putc(len >> 16, out);
445*f22f0ef4Schristos     putc(len >> 24, out);
446*f22f0ef4Schristos     fflush(out);
447*f22f0ef4Schristos 
448*f22f0ef4Schristos     // Check for any i/o errors.
449*f22f0ef4Schristos     if (ferror(in) || ferror(out))
450*f22f0ef4Schristos         BYE("i/o error: %s", strerror(errno));
451*f22f0ef4Schristos 
452*f22f0ef4Schristos     // All good!
453*f22f0ef4Schristos     *err = NULL;
454*f22f0ef4Schristos     return 0;
455*f22f0ef4Schristos }
456*f22f0ef4Schristos 
457*f22f0ef4Schristos // Normalize the gzip stream on stdin, writing the result to stdout.
main(void)458*f22f0ef4Schristos int main(void) {
459*f22f0ef4Schristos     // Avoid end-of-line conversions on evil operating systems.
460*f22f0ef4Schristos     SET_BINARY_MODE(stdin);
461*f22f0ef4Schristos     SET_BINARY_MODE(stdout);
462*f22f0ef4Schristos 
463*f22f0ef4Schristos     // Normalize from stdin to stdout, returning 1 on error, 0 if ok.
464*f22f0ef4Schristos     char *err;
465*f22f0ef4Schristos     int ret = gzip_normalize(stdin, stdout, &err);
466*f22f0ef4Schristos     if (ret)
467*f22f0ef4Schristos         fprintf(stderr, "gznorm error: %s\n", err);
468*f22f0ef4Schristos     free(err);
469*f22f0ef4Schristos     return ret;
470*f22f0ef4Schristos }
471