xref: /openbsd/gnu/usr.bin/cvs/src/zlib.c (revision d485f761)
1 /* zlib.c --- interface to the zlib compression library
2    Ian Lance Taylor <ian@cygnus.com>
3 
4    This file is part of GNU CVS.
5 
6    GNU CVS is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by the
8    Free Software Foundation; either version 2, or (at your option) any
9    later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.  */
15 
16 /* The routines in this file are the interface between the CVS
17    client/server support and the zlib compression library.  */
18 
19 #include <assert.h>
20 #include "cvs.h"
21 #include "buffer.h"
22 
23 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
24 
25 #include "zlib.h"
26 
27 /* OS/2 doesn't have EIO.  FIXME: this whole notion of turning
28    a different error into EIO strikes me as pretty dubious.  */
29 #if !defined (EIO)
30 #define EIO EBADPOS
31 #endif
32 
33 /* The compression interface is built upon the buffer data structure.
34    We provide a buffer type which compresses or decompresses the data
35    which passes through it.  An input buffer decompresses the data
36    read from an underlying buffer, and an output buffer compresses the
37    data before writing it to an underlying buffer.  */
38 
39 /* This structure is the closure field of the buffer.  */
40 
41 struct compress_buffer
42 {
43     /* The underlying buffer.  */
44     struct buffer *buf;
45     /* The compression information.  */
46     z_stream zstr;
47 };
48 
49 static void compress_error PROTO((int, int, z_stream *, const char *));
50 static int compress_buffer_input PROTO((void *, char *, int, int, int *));
51 static int compress_buffer_output PROTO((void *, const char *, int, int *));
52 static int compress_buffer_flush PROTO((void *));
53 static int compress_buffer_block PROTO((void *, int));
54 static int compress_buffer_shutdown_input PROTO((void *));
55 static int compress_buffer_shutdown_output PROTO((void *));
56 
57 /* Report an error from one of the zlib functions.  */
58 
59 static void
60 compress_error (status, zstatus, zstr, msg)
61      int status;
62      int zstatus;
63      z_stream *zstr;
64      const char *msg;
65 {
66     int hold_errno;
67     const char *zmsg;
68     char buf[100];
69 
70     hold_errno = errno;
71 
72     zmsg = zstr->msg;
73     if (zmsg == NULL)
74     {
75         sprintf (buf, "error %d", zstatus);
76 	zmsg = buf;
77     }
78 
79     error (status,
80 	   zstatus == Z_ERRNO ? hold_errno : 0,
81 	   "%s: %s", msg, zmsg);
82 }
83 
84 /* Create a compression buffer.  */
85 
86 struct buffer *
87 compress_buffer_initialize (buf, input, level, memory)
88      struct buffer *buf;
89      int input;
90      int level;
91      void (*memory) PROTO((struct buffer *));
92 {
93     struct compress_buffer *n;
94     int zstatus;
95 
96     n = (struct compress_buffer *) xmalloc (sizeof *n);
97     memset (n, 0, sizeof *n);
98 
99     n->buf = buf;
100 
101     if (input)
102 	zstatus = inflateInit (&n->zstr);
103     else
104 	zstatus = deflateInit (&n->zstr, level);
105     if (zstatus != Z_OK)
106 	compress_error (1, zstatus, &n->zstr, "compression initialization");
107 
108     /* There may already be data buffered on BUF.  For an output
109        buffer, this is OK, because these routines will just use the
110        buffer routines to append data to the (uncompressed) data
111        already on BUF.  An input buffer expects to handle a single
112        buffer_data of buffered input to be uncompressed, so that is OK
113        provided there is only one buffer.  At present that is all
114        there ever will be; if this changes, compress_buffer_input must
115        be modified to handle multiple input buffers.  */
116     assert (! input || buf->data == NULL || buf->data->next == NULL);
117 
118     return buf_initialize (input ? compress_buffer_input : NULL,
119 			   input ? NULL : compress_buffer_output,
120 			   input ? NULL : compress_buffer_flush,
121 			   compress_buffer_block,
122 			   (input
123 			    ? compress_buffer_shutdown_input
124 			    : compress_buffer_shutdown_output),
125 			   memory,
126 			   n);
127 }
128 
129 /* Input data from a compression buffer.  */
130 
131 static int
132 compress_buffer_input (closure, data, need, size, got)
133      void *closure;
134      char *data;
135      int need;
136      int size;
137      int *got;
138 {
139     struct compress_buffer *cb = (struct compress_buffer *) closure;
140     struct buffer_data *bd;
141 
142     if (cb->buf->input == NULL)
143 	abort ();
144 
145     /* We use a single buffer_data structure to buffer up data which
146        the z_stream structure won't use yet.  We can safely store this
147        on cb->buf->data, because we never call the buffer routines on
148        cb->buf; we only call the buffer input routine, since that
149        gives us the semantics we want.  As noted in
150        compress_buffer_initialize, the buffer_data structure may
151        already exist, and hold data which was already read and
152        buffered before the decompression began.  */
153     bd = cb->buf->data;
154     if (bd == NULL)
155     {
156 	bd = ((struct buffer_data *) malloc (sizeof (struct buffer_data)));
157 	if (bd == NULL)
158 	    return -2;
159 	bd->text = (char *) malloc (BUFFER_DATA_SIZE);
160 	if (bd->text == NULL)
161 	{
162 	    free (bd);
163 	    return -2;
164 	}
165 	bd->bufp = bd->text;
166 	bd->size = 0;
167 	cb->buf->data = bd;
168     }
169 
170     cb->zstr.avail_out = size;
171     cb->zstr.next_out = (Bytef *) data;
172 
173     while (1)
174     {
175 	int zstatus, sofar, status, nread;
176 
177 	/* First try to inflate any data we already have buffered up.
178 	   This is useful even if we don't have any buffered data,
179 	   because there may be data buffered inside the z_stream
180 	   structure.  */
181 
182 	cb->zstr.avail_in = bd->size;
183 	cb->zstr.next_in = (Bytef *) bd->bufp;
184 
185 	do
186 	{
187 	    zstatus = inflate (&cb->zstr, Z_NO_FLUSH);
188 	    if (zstatus == Z_STREAM_END)
189 		break;
190 	    if (zstatus != Z_OK && zstatus != Z_BUF_ERROR)
191 	    {
192 		compress_error (0, zstatus, &cb->zstr, "inflate");
193 		return EIO;
194 	    }
195 	} while (cb->zstr.avail_in > 0
196 		 && cb->zstr.avail_out > 0);
197 
198 	bd->size = cb->zstr.avail_in;
199 	bd->bufp = (char *) cb->zstr.next_in;
200 
201 	if (zstatus == Z_STREAM_END)
202 	    return -1;
203 
204 	/* If we have obtained NEED bytes, then return, unless NEED is
205            zero and we haven't obtained anything at all.  If NEED is
206            zero, we will keep reading from the underlying buffer until
207            we either can't read anything, or we have managed to
208            inflate at least one byte.  */
209 	sofar = size - cb->zstr.avail_out;
210 	if (sofar > 0 && sofar >= need)
211 	    break;
212 
213 	/* All our buffered data should have been processed at this
214            point.  */
215 	assert (bd->size == 0);
216 
217 	/* This will work well in the server, because this call will
218 	   do an unblocked read and fetch all the available data.  In
219 	   the client, this will read a single byte from the stdio
220 	   stream, which will cause us to call inflate once per byte.
221 	   It would be more efficient if we could make a call which
222 	   would fetch all the available bytes, and at least one byte.  */
223 
224 	status = (*cb->buf->input) (cb->buf->closure, bd->text,
225 				    need > 0 ? 1 : 0,
226 				    BUFFER_DATA_SIZE, &nread);
227 	if (status != 0)
228 	    return status;
229 
230 	/* If we didn't read anything, then presumably the buffer is
231            in nonblocking mode, and we should just get out now with
232            whatever we've inflated.  */
233 	if (nread == 0)
234 	{
235 	    assert (need == 0);
236 	    break;
237 	}
238 
239 	bd->bufp = bd->text;
240 	bd->size = nread;
241     }
242 
243     *got = size - cb->zstr.avail_out;
244 
245     return 0;
246 }
247 
248 /* Output data to a compression buffer.  */
249 
250 static int
251 compress_buffer_output (closure, data, have, wrote)
252      void *closure;
253      const char *data;
254      int have;
255      int *wrote;
256 {
257     struct compress_buffer *cb = (struct compress_buffer *) closure;
258 
259     cb->zstr.avail_in = have;
260     cb->zstr.next_in = (unsigned char *) data;
261 
262     while (cb->zstr.avail_in > 0)
263     {
264 	char buffer[BUFFER_DATA_SIZE];
265 	int zstatus;
266 
267 	cb->zstr.avail_out = BUFFER_DATA_SIZE;
268 	cb->zstr.next_out = (unsigned char *) buffer;
269 
270 	zstatus = deflate (&cb->zstr, Z_NO_FLUSH);
271 	if (zstatus != Z_OK)
272 	{
273 	    compress_error (0, zstatus, &cb->zstr, "deflate");
274 	    return EIO;
275 	}
276 
277 	if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
278 	    buf_output (cb->buf, buffer,
279 			BUFFER_DATA_SIZE - cb->zstr.avail_out);
280     }
281 
282     *wrote = have;
283 
284     /* We will only be here because buf_send_output was called on the
285        compression buffer.  That means that we should now call
286        buf_send_output on the underlying buffer.  */
287     return buf_send_output (cb->buf);
288 }
289 
290 /* Flush a compression buffer.  */
291 
292 static int
293 compress_buffer_flush (closure)
294      void *closure;
295 {
296     struct compress_buffer *cb = (struct compress_buffer *) closure;
297 
298     cb->zstr.avail_in = 0;
299     cb->zstr.next_in = NULL;
300 
301     while (1)
302     {
303 	char buffer[BUFFER_DATA_SIZE];
304 	int zstatus;
305 
306 	cb->zstr.avail_out = BUFFER_DATA_SIZE;
307 	cb->zstr.next_out = (unsigned char *) buffer;
308 
309 	zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH);
310 
311 	/* The deflate function will return Z_BUF_ERROR if it can't do
312            anything, which in this case means that all data has been
313            flushed.  */
314 	if (zstatus == Z_BUF_ERROR)
315 	    break;
316 
317 	if (zstatus != Z_OK)
318 	{
319 	    compress_error (0, zstatus, &cb->zstr, "deflate flush");
320 	    return EIO;
321 	}
322 
323 	if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
324 	    buf_output (cb->buf, buffer,
325 			BUFFER_DATA_SIZE - cb->zstr.avail_out);
326 
327 	/* If the deflate function did not fill the output buffer,
328            then all data has been flushed.  */
329 	if (cb->zstr.avail_out > 0)
330 	    break;
331     }
332 
333     /* Now flush the underlying buffer.  Note that if the original
334        call to buf_flush passed 1 for the BLOCK argument, then the
335        buffer will already have been set into blocking mode, so we
336        should always pass 0 here.  */
337     return buf_flush (cb->buf, 0);
338 }
339 
340 /* The block routine for a compression buffer.  */
341 
342 static int
343 compress_buffer_block (closure, block)
344      void *closure;
345      int block;
346 {
347     struct compress_buffer *cb = (struct compress_buffer *) closure;
348 
349     if (block)
350 	return set_block (cb->buf);
351     else
352 	return set_nonblock (cb->buf);
353 }
354 
355 /* Shut down an input buffer.  */
356 
357 static int
358 compress_buffer_shutdown_input (closure)
359      void *closure;
360 {
361     struct compress_buffer *cb = (struct compress_buffer *) closure;
362     int zstatus;
363 
364     /* Pick up any trailing data, such as the checksum.  */
365     while (1)
366     {
367 	int status, nread;
368 	char buf[100];
369 
370 	status = compress_buffer_input (cb, buf, 0, sizeof buf, &nread);
371 	if (status == -1)
372 	    break;
373 	if (status != 0)
374 	    return status;
375     }
376 
377     zstatus = inflateEnd (&cb->zstr);
378     if (zstatus != Z_OK)
379     {
380 	compress_error (0, zstatus, &cb->zstr, "inflateEnd");
381 	return EIO;
382     }
383 
384     return buf_shutdown (cb->buf);
385 }
386 
387 /* Shut down an output buffer.  */
388 
389 static int
390 compress_buffer_shutdown_output (closure)
391      void *closure;
392 {
393     struct compress_buffer *cb = (struct compress_buffer *) closure;
394     int zstatus, status;
395 
396     do
397     {
398 	char buffer[BUFFER_DATA_SIZE];
399 
400 	cb->zstr.avail_out = BUFFER_DATA_SIZE;
401 	cb->zstr.next_out = (unsigned char *) buffer;
402 
403 	zstatus = deflate (&cb->zstr, Z_FINISH);
404 	if (zstatus != Z_OK && zstatus != Z_STREAM_END)
405 	{
406 	    compress_error (0, zstatus, &cb->zstr, "deflate finish");
407 	    return EIO;
408 	}
409 
410 	if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
411 	    buf_output (cb->buf, buffer,
412 			BUFFER_DATA_SIZE - cb->zstr.avail_out);
413     } while (zstatus != Z_STREAM_END);
414 
415     zstatus = deflateEnd (&cb->zstr);
416     if (zstatus != Z_OK)
417     {
418 	compress_error (0, zstatus, &cb->zstr, "deflateEnd");
419 	return EIO;
420     }
421 
422     status = buf_flush (cb->buf, 1);
423     if (status != 0)
424 	return status;
425 
426     return buf_shutdown (cb->buf);
427 }
428 
429 
430 
431 /* Here is our librarified gzip implementation.  It is very minimal
432    but attempts to be RFC1952 compliant.  */
433 
434 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
435    We are to uncompress the data and write the result to the file
436    descriptor FD.  If something goes wrong, give a nonfatal error message
437    mentioning FULLNAME as the name of the file for FD.  Return 1 if
438    it is an error we can't recover from.  */
439 
440 int
441 gunzip_and_write (fd, fullname, buf, size)
442     int fd;
443     char *fullname;
444     unsigned char *buf;
445     size_t size;
446 {
447     size_t pos;
448     z_stream zstr;
449     int zstatus;
450     unsigned char outbuf[32768];
451     unsigned long crc;
452 
453     if (buf[0] != 31 || buf[1] != 139)
454     {
455 	error (0, 0, "gzipped data does not start with gzip identification");
456 	return 1;
457     }
458     if (buf[2] != 8)
459     {
460 	error (0, 0, "only the deflate compression method is supported");
461 	return 1;
462     }
463 
464     /* Skip over the fixed header, and then skip any of the variable-length
465        fields.  */
466     pos = 10;
467     if (buf[3] & 4)
468 	pos += buf[pos] + (buf[pos + 1] << 8) + 2;
469     if (buf[3] & 8)
470 	pos += strlen ((char *) buf + pos) + 1;
471     if (buf[3] & 16)
472 	pos += strlen ((char *) buf + pos) + 1;
473     if (buf[3] & 2)
474 	pos += 2;
475 
476     memset (&zstr, 0, sizeof zstr);
477     /* Passing a negative argument tells zlib not to look for a zlib
478        (RFC1950) header.  This is an undocumented feature; I suppose if
479        we wanted to be anal we could synthesize a header instead,
480        but why bother?  */
481     zstatus = inflateInit2 (&zstr, -15);
482 
483     if (zstatus != Z_OK)
484 	compress_error (1, zstatus, &zstr, fullname);
485 
486     /* I don't see why we should have to include the 8 byte trailer in
487        avail_in.  But I see that zlib/gzio.c does, and it seemed to fix
488        a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
489        reason.  */
490     zstr.avail_in = size - pos;
491     zstr.next_in = buf + pos;
492 
493     crc = crc32 (0, NULL, 0);
494 
495     do
496     {
497 	zstr.avail_out = sizeof (outbuf);
498 	zstr.next_out = outbuf;
499 	zstatus = inflate (&zstr, Z_NO_FLUSH);
500 	if (zstatus != Z_STREAM_END && zstatus != Z_OK)
501 	{
502 	    compress_error (0, zstatus, &zstr, fullname);
503 	    return 1;
504 	}
505 	if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0)
506 	{
507 	    error (0, errno, "writing decompressed file %s", fullname);
508 	    return 1;
509 	}
510 	crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out);
511     } while (zstatus != Z_STREAM_END);
512     zstatus = inflateEnd (&zstr);
513     if (zstatus != Z_OK)
514 	compress_error (0, zstatus, &zstr, fullname);
515 
516     if (crc != (buf[zstr.total_in + 10]
517 		+ (buf[zstr.total_in + 11] << 8)
518 		+ (buf[zstr.total_in + 12] << 16)
519 		+ (buf[zstr.total_in + 13] << 24)))
520     {
521 	error (0, 0, "CRC error uncompressing %s", fullname);
522 	return 1;
523     }
524 
525     if (zstr.total_out != (buf[zstr.total_in + 14]
526 			   + (buf[zstr.total_in + 15] << 8)
527 			   + (buf[zstr.total_in + 16] << 16)
528 			   + (buf[zstr.total_in + 17] << 24)))
529     {
530 	error (0, 0, "invalid length uncompressing %s", fullname);
531 	return 1;
532     }
533 
534     return 0;
535 }
536 
537 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
538    replacing previous contents of *BUF.  *BUF is malloc'd and *SIZE is
539    its allocated size.  Put the actual number of bytes of data in
540    *LEN.  If something goes wrong, give a nonfatal error mentioning
541    FULLNAME as the name of the file for FD, and return 1 if we can't
542    recover from it).  LEVEL is the compression level (1-9).  */
543 
544 int
545 read_and_gzip (fd, fullname, buf, size, len, level)
546     int fd;
547     char *fullname;
548     unsigned char **buf;
549     size_t *size;
550     size_t *len;
551     int level;
552 {
553     z_stream zstr;
554     int zstatus;
555     unsigned char inbuf[8192];
556     int nread;
557     unsigned long crc;
558 
559     if (*size < 1024)
560     {
561 	unsigned char *newbuf;
562 
563 	*size = 1024;
564 	newbuf = realloc (*buf, *size);
565 	if (newbuf == NULL)
566 	{
567 	    error (0, 0, "out of memory");
568 	    return 1;
569 	}
570 	*buf = newbuf;
571     }
572     (*buf)[0] = 31;
573     (*buf)[1] = 139;
574     (*buf)[2] = 8;
575     (*buf)[3] = 0;
576     (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0;
577     /* Could set this based on level, but why bother?  */
578     (*buf)[8] = 0;
579     (*buf)[9] = 255;
580 
581     memset (&zstr, 0, sizeof zstr);
582     zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8,
583 			    Z_DEFAULT_STRATEGY);
584     crc = crc32 (0, NULL, 0);
585     if (zstatus != Z_OK)
586     {
587 	compress_error (0, zstatus, &zstr, fullname);
588 	return 1;
589     }
590     zstr.avail_out = *size;
591     zstr.next_out = *buf + 10;
592 
593     while (1)
594     {
595 	int finish = 0;
596 
597 	nread = read (fd, inbuf, sizeof inbuf);
598 	if (nread < 0)
599 	{
600 	    error (0, errno, "cannot read %s", fullname);
601 	    return 1;
602 	}
603 	else if (nread == 0)
604 	    /* End of file.  */
605 	    finish = 1;
606 	crc = crc32 (crc, inbuf, nread);
607 	zstr.next_in = inbuf;
608 	zstr.avail_in = nread;
609 
610 	do
611 	{
612 	    size_t offset;
613 
614 	    /* I don't see this documented anywhere, but deflate seems
615 	       to tend to dump core sometimes if we pass it Z_FINISH and
616 	       a small (e.g. 2147 byte) avail_out.  So we insist on at
617 	       least 4096 bytes (that is what zlib/gzio.c uses).  */
618 
619 	    if (zstr.avail_out < 4096)
620 	    {
621 		unsigned char *newbuf;
622 
623 		offset = zstr.next_out - *buf;
624 		*size *= 2;
625 		newbuf = realloc (*buf, *size);
626 		if (newbuf == NULL)
627 		{
628 		    error (0, 0, "out of memory");
629 		    return 1;
630 		}
631 		*buf = newbuf;
632 		zstr.next_out = *buf + offset;
633 		zstr.avail_out = *size - offset;
634 	    }
635 
636 	    zstatus = deflate (&zstr, finish ? Z_FINISH : 0);
637 	    if (zstatus == Z_STREAM_END)
638 		goto done;
639 	    else if (zstatus != Z_OK)
640 		compress_error (0, zstatus, &zstr, fullname);
641 	} while (zstr.avail_out == 0);
642     }
643  done:
644     *(*buf + zstr.total_out + 10) = crc & 0xff;
645     *(*buf + zstr.total_out + 11) = (crc >> 8) & 0xff;
646     *(*buf + zstr.total_out + 12) = (crc >> 16) & 0xff;
647     *(*buf + zstr.total_out + 13) = (crc >> 24) & 0xff;
648 
649     *(*buf + zstr.total_out + 14) = zstr.total_in & 0xff;
650     *(*buf + zstr.total_out + 15) = (zstr.total_in >> 8) & 0xff;
651     *(*buf + zstr.total_out + 16) = (zstr.total_in >> 16) & 0xff;
652     *(*buf + zstr.total_out + 17) = (zstr.total_in >> 24) & 0xff;
653 
654     *len = zstr.total_out + 18;
655 
656     zstatus = deflateEnd (&zstr);
657     if (zstatus != Z_OK)
658 	compress_error (0, zstatus, &zstr, fullname);
659 
660     return 0;
661 }
662 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */
663