1 /* zlib.c --- interface to the zlib compression library
2 Ian Lance Taylor <ian@cygnus.com>
3
4 This file is part of GNU CVS.
5
6 GNU CVS is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details. */
15
16 /* The routines in this file are the interface between the CVS
17 client/server support and the zlib compression library. */
18
19 #include <assert.h>
20 #include "cvs.h"
21 #include "buffer.h"
22
23 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
24
25 #include "zlib.h"
26
27 /* OS/2 doesn't have EIO. FIXME: this whole notion of turning
28 a different error into EIO strikes me as pretty dubious. */
29 #if !defined (EIO)
30 #define EIO EBADPOS
31 #endif
32
33 /* The compression interface is built upon the buffer data structure.
34 We provide a buffer type which compresses or decompresses the data
35 which passes through it. An input buffer decompresses the data
36 read from an underlying buffer, and an output buffer compresses the
37 data before writing it to an underlying buffer. */
38
39 /* This structure is the closure field of the buffer. */
40
41 struct compress_buffer
42 {
43 /* The underlying buffer. */
44 struct buffer *buf;
45 /* The compression information. */
46 z_stream zstr;
47 };
48
49 static void compress_error PROTO((int, int, z_stream *, const char *));
50 static int compress_buffer_input PROTO((void *, char *, int, int, int *));
51 static int compress_buffer_output PROTO((void *, const char *, int, int *));
52 static int compress_buffer_flush PROTO((void *));
53 static int compress_buffer_block PROTO((void *, int));
54 static int compress_buffer_shutdown_input PROTO((void *));
55 static int compress_buffer_shutdown_output PROTO((void *));
56
57 /* Report an error from one of the zlib functions. */
58
59 static void
compress_error(status,zstatus,zstr,msg)60 compress_error (status, zstatus, zstr, msg)
61 int status;
62 int zstatus;
63 z_stream *zstr;
64 const char *msg;
65 {
66 int hold_errno;
67 const char *zmsg;
68 char buf[100];
69
70 hold_errno = errno;
71
72 zmsg = zstr->msg;
73 if (zmsg == NULL)
74 {
75 snprintf (buf, sizeof buf, "error %d", zstatus);
76 zmsg = buf;
77 }
78
79 error (status,
80 zstatus == Z_ERRNO ? hold_errno : 0,
81 "%s: %s", msg, zmsg);
82 }
83
84 /* Create a compression buffer. */
85
86 struct buffer *
compress_buffer_initialize(buf,input,level,memory)87 compress_buffer_initialize (buf, input, level, memory)
88 struct buffer *buf;
89 int input;
90 int level;
91 void (*memory) PROTO((struct buffer *));
92 {
93 struct compress_buffer *n;
94 int zstatus;
95
96 n = (struct compress_buffer *) xmalloc (sizeof *n);
97 memset (n, 0, sizeof *n);
98
99 n->buf = buf;
100
101 if (input)
102 zstatus = inflateInit (&n->zstr);
103 else
104 zstatus = deflateInit (&n->zstr, level);
105 if (zstatus != Z_OK)
106 compress_error (1, zstatus, &n->zstr, "compression initialization");
107
108 /* There may already be data buffered on BUF. For an output
109 buffer, this is OK, because these routines will just use the
110 buffer routines to append data to the (uncompressed) data
111 already on BUF. An input buffer expects to handle a single
112 buffer_data of buffered input to be uncompressed, so that is OK
113 provided there is only one buffer. At present that is all
114 there ever will be; if this changes, compress_buffer_input must
115 be modified to handle multiple input buffers. */
116 assert (! input || buf->data == NULL || buf->data->next == NULL);
117
118 return buf_initialize (input ? compress_buffer_input : NULL,
119 input ? NULL : compress_buffer_output,
120 input ? NULL : compress_buffer_flush,
121 compress_buffer_block,
122 (input
123 ? compress_buffer_shutdown_input
124 : compress_buffer_shutdown_output),
125 memory,
126 n);
127 }
128
129 /* Input data from a compression buffer. */
130
131 static int
compress_buffer_input(closure,data,need,size,got)132 compress_buffer_input (closure, data, need, size, got)
133 void *closure;
134 char *data;
135 int need;
136 int size;
137 int *got;
138 {
139 struct compress_buffer *cb = (struct compress_buffer *) closure;
140 struct buffer_data *bd;
141
142 if (cb->buf->input == NULL)
143 abort ();
144
145 /* We use a single buffer_data structure to buffer up data which
146 the z_stream structure won't use yet. We can safely store this
147 on cb->buf->data, because we never call the buffer routines on
148 cb->buf; we only call the buffer input routine, since that
149 gives us the semantics we want. As noted in
150 compress_buffer_initialize, the buffer_data structure may
151 already exist, and hold data which was already read and
152 buffered before the decompression began. */
153 bd = cb->buf->data;
154 if (bd == NULL)
155 {
156 bd = ((struct buffer_data *) malloc (sizeof (struct buffer_data)));
157 if (bd == NULL)
158 return -2;
159 bd->text = (char *) malloc (BUFFER_DATA_SIZE);
160 if (bd->text == NULL)
161 {
162 free (bd);
163 return -2;
164 }
165 bd->bufp = bd->text;
166 bd->size = 0;
167 cb->buf->data = bd;
168 }
169
170 cb->zstr.avail_out = size;
171 cb->zstr.next_out = (Bytef *) data;
172
173 while (1)
174 {
175 int zstatus, sofar, status, nread;
176
177 /* First try to inflate any data we already have buffered up.
178 This is useful even if we don't have any buffered data,
179 because there may be data buffered inside the z_stream
180 structure. */
181
182 cb->zstr.avail_in = bd->size;
183 cb->zstr.next_in = (Bytef *) bd->bufp;
184
185 do
186 {
187 zstatus = inflate (&cb->zstr, Z_NO_FLUSH);
188 if (zstatus == Z_STREAM_END)
189 break;
190 if (zstatus != Z_OK && zstatus != Z_BUF_ERROR)
191 {
192 compress_error (0, zstatus, &cb->zstr, "inflate");
193 return EIO;
194 }
195 } while (cb->zstr.avail_in > 0
196 && cb->zstr.avail_out > 0);
197
198 bd->size = cb->zstr.avail_in;
199 bd->bufp = (char *) cb->zstr.next_in;
200
201 if (zstatus == Z_STREAM_END)
202 return -1;
203
204 /* If we have obtained NEED bytes, then return, unless NEED is
205 zero and we haven't obtained anything at all. If NEED is
206 zero, we will keep reading from the underlying buffer until
207 we either can't read anything, or we have managed to
208 inflate at least one byte. */
209 sofar = size - cb->zstr.avail_out;
210 if (sofar > 0 && sofar >= need)
211 break;
212
213 /* All our buffered data should have been processed at this
214 point. */
215 assert (bd->size == 0);
216
217 /* This will work well in the server, because this call will
218 do an unblocked read and fetch all the available data. In
219 the client, this will read a single byte from the stdio
220 stream, which will cause us to call inflate once per byte.
221 It would be more efficient if we could make a call which
222 would fetch all the available bytes, and at least one byte. */
223
224 status = (*cb->buf->input) (cb->buf->closure, bd->text,
225 need > 0 ? 1 : 0,
226 BUFFER_DATA_SIZE, &nread);
227 if (status != 0)
228 return status;
229
230 /* If we didn't read anything, then presumably the buffer is
231 in nonblocking mode, and we should just get out now with
232 whatever we've inflated. */
233 if (nread == 0)
234 {
235 assert (need == 0);
236 break;
237 }
238
239 bd->bufp = bd->text;
240 bd->size = nread;
241 }
242
243 *got = size - cb->zstr.avail_out;
244
245 return 0;
246 }
247
248 /* Output data to a compression buffer. */
249
250 static int
compress_buffer_output(closure,data,have,wrote)251 compress_buffer_output (closure, data, have, wrote)
252 void *closure;
253 const char *data;
254 int have;
255 int *wrote;
256 {
257 struct compress_buffer *cb = (struct compress_buffer *) closure;
258
259 cb->zstr.avail_in = have;
260 cb->zstr.next_in = (unsigned char *) data;
261
262 while (cb->zstr.avail_in > 0)
263 {
264 char buffer[BUFFER_DATA_SIZE];
265 int zstatus;
266
267 cb->zstr.avail_out = BUFFER_DATA_SIZE;
268 cb->zstr.next_out = (unsigned char *) buffer;
269
270 zstatus = deflate (&cb->zstr, Z_NO_FLUSH);
271 if (zstatus != Z_OK)
272 {
273 compress_error (0, zstatus, &cb->zstr, "deflate");
274 return EIO;
275 }
276
277 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
278 buf_output (cb->buf, buffer,
279 BUFFER_DATA_SIZE - cb->zstr.avail_out);
280 }
281
282 *wrote = have;
283
284 /* We will only be here because buf_send_output was called on the
285 compression buffer. That means that we should now call
286 buf_send_output on the underlying buffer. */
287 return buf_send_output (cb->buf);
288 }
289
290 /* Flush a compression buffer. */
291
292 static int
compress_buffer_flush(closure)293 compress_buffer_flush (closure)
294 void *closure;
295 {
296 struct compress_buffer *cb = (struct compress_buffer *) closure;
297
298 cb->zstr.avail_in = 0;
299 cb->zstr.next_in = NULL;
300
301 while (1)
302 {
303 char buffer[BUFFER_DATA_SIZE];
304 int zstatus;
305
306 cb->zstr.avail_out = BUFFER_DATA_SIZE;
307 cb->zstr.next_out = (unsigned char *) buffer;
308
309 zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH);
310
311 /* The deflate function will return Z_BUF_ERROR if it can't do
312 anything, which in this case means that all data has been
313 flushed. */
314 if (zstatus == Z_BUF_ERROR)
315 break;
316
317 if (zstatus != Z_OK)
318 {
319 compress_error (0, zstatus, &cb->zstr, "deflate flush");
320 return EIO;
321 }
322
323 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
324 buf_output (cb->buf, buffer,
325 BUFFER_DATA_SIZE - cb->zstr.avail_out);
326
327 /* If the deflate function did not fill the output buffer,
328 then all data has been flushed. */
329 if (cb->zstr.avail_out > 0)
330 break;
331 }
332
333 /* Now flush the underlying buffer. Note that if the original
334 call to buf_flush passed 1 for the BLOCK argument, then the
335 buffer will already have been set into blocking mode, so we
336 should always pass 0 here. */
337 return buf_flush (cb->buf, 0);
338 }
339
340 /* The block routine for a compression buffer. */
341
342 static int
compress_buffer_block(closure,block)343 compress_buffer_block (closure, block)
344 void *closure;
345 int block;
346 {
347 struct compress_buffer *cb = (struct compress_buffer *) closure;
348
349 if (block)
350 return set_block (cb->buf);
351 else
352 return set_nonblock (cb->buf);
353 }
354
355 /* Shut down an input buffer. */
356
357 static int
compress_buffer_shutdown_input(closure)358 compress_buffer_shutdown_input (closure)
359 void *closure;
360 {
361 struct compress_buffer *cb = (struct compress_buffer *) closure;
362 int zstatus;
363
364 /* Pick up any trailing data, such as the checksum. */
365 while (1)
366 {
367 int status, nread;
368 char buf[100];
369
370 status = compress_buffer_input (cb, buf, 0, sizeof buf, &nread);
371 if (status == -1)
372 break;
373 if (status != 0)
374 return status;
375 }
376
377 zstatus = inflateEnd (&cb->zstr);
378 if (zstatus != Z_OK)
379 {
380 compress_error (0, zstatus, &cb->zstr, "inflateEnd");
381 return EIO;
382 }
383
384 return buf_shutdown (cb->buf);
385 }
386
387 /* Shut down an output buffer. */
388
389 static int
compress_buffer_shutdown_output(closure)390 compress_buffer_shutdown_output (closure)
391 void *closure;
392 {
393 struct compress_buffer *cb = (struct compress_buffer *) closure;
394 int zstatus, status;
395
396 do
397 {
398 char buffer[BUFFER_DATA_SIZE];
399
400 cb->zstr.avail_out = BUFFER_DATA_SIZE;
401 cb->zstr.next_out = (unsigned char *) buffer;
402
403 zstatus = deflate (&cb->zstr, Z_FINISH);
404 if (zstatus != Z_OK && zstatus != Z_STREAM_END)
405 {
406 compress_error (0, zstatus, &cb->zstr, "deflate finish");
407 return EIO;
408 }
409
410 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
411 buf_output (cb->buf, buffer,
412 BUFFER_DATA_SIZE - cb->zstr.avail_out);
413 } while (zstatus != Z_STREAM_END);
414
415 zstatus = deflateEnd (&cb->zstr);
416 if (zstatus != Z_OK)
417 {
418 compress_error (0, zstatus, &cb->zstr, "deflateEnd");
419 return EIO;
420 }
421
422 status = buf_flush (cb->buf, 1);
423 if (status != 0)
424 return status;
425
426 return buf_shutdown (cb->buf);
427 }
428
429
430
431 /* Here is our librarified gzip implementation. It is very minimal
432 but attempts to be RFC1952 compliant. */
433
434 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
435 We are to uncompress the data and write the result to the file
436 descriptor FD. If something goes wrong, give a nonfatal error message
437 mentioning FULLNAME as the name of the file for FD. Return 1 if
438 it is an error we can't recover from. */
439
440 int
gunzip_and_write(fd,fullname,buf,size)441 gunzip_and_write (fd, fullname, buf, size)
442 int fd;
443 char *fullname;
444 unsigned char *buf;
445 size_t size;
446 {
447 size_t pos;
448 z_stream zstr;
449 int zstatus;
450 unsigned char outbuf[32768];
451 unsigned long crc;
452
453 if (buf[0] != 31 || buf[1] != 139)
454 {
455 error (0, 0, "gzipped data does not start with gzip identification");
456 return 1;
457 }
458 if (buf[2] != 8)
459 {
460 error (0, 0, "only the deflate compression method is supported");
461 return 1;
462 }
463
464 /* Skip over the fixed header, and then skip any of the variable-length
465 fields. */
466 pos = 10;
467 if (buf[3] & 4)
468 pos += buf[pos] + (buf[pos + 1] << 8) + 2;
469 if (buf[3] & 8)
470 pos += strlen ((char *) buf + pos) + 1;
471 if (buf[3] & 16)
472 pos += strlen ((char *) buf + pos) + 1;
473 if (buf[3] & 2)
474 pos += 2;
475
476 memset (&zstr, 0, sizeof zstr);
477 /* Passing a negative argument tells zlib not to look for a zlib
478 (RFC1950) header. This is an undocumented feature; I suppose if
479 we wanted to be anal we could synthesize a header instead,
480 but why bother? */
481 zstatus = inflateInit2 (&zstr, -15);
482
483 if (zstatus != Z_OK)
484 compress_error (1, zstatus, &zstr, fullname);
485
486 /* I don't see why we should have to include the 8 byte trailer in
487 avail_in. But I see that zlib/gzio.c does, and it seemed to fix
488 a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
489 reason. */
490 zstr.avail_in = size - pos;
491 zstr.next_in = buf + pos;
492
493 crc = crc32 (0, NULL, 0);
494
495 do
496 {
497 zstr.avail_out = sizeof (outbuf);
498 zstr.next_out = outbuf;
499 zstatus = inflate (&zstr, Z_NO_FLUSH);
500 if (zstatus != Z_STREAM_END && zstatus != Z_OK)
501 {
502 compress_error (0, zstatus, &zstr, fullname);
503 return 1;
504 }
505 if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0)
506 {
507 error (0, errno, "writing decompressed file %s", fullname);
508 return 1;
509 }
510 crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out);
511 } while (zstatus != Z_STREAM_END);
512 zstatus = inflateEnd (&zstr);
513 if (zstatus != Z_OK)
514 compress_error (0, zstatus, &zstr, fullname);
515
516 if (crc != (buf[zstr.total_in + 10]
517 + (buf[zstr.total_in + 11] << 8)
518 + (buf[zstr.total_in + 12] << 16)
519 + (buf[zstr.total_in + 13] << 24)))
520 {
521 error (0, 0, "CRC error uncompressing %s", fullname);
522 return 1;
523 }
524
525 if (zstr.total_out != (buf[zstr.total_in + 14]
526 + (buf[zstr.total_in + 15] << 8)
527 + (buf[zstr.total_in + 16] << 16)
528 + (buf[zstr.total_in + 17] << 24)))
529 {
530 error (0, 0, "invalid length uncompressing %s", fullname);
531 return 1;
532 }
533
534 return 0;
535 }
536
537 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
538 replacing previous contents of *BUF. *BUF is malloc'd and *SIZE is
539 its allocated size. Put the actual number of bytes of data in
540 *LEN. If something goes wrong, give a nonfatal error mentioning
541 FULLNAME as the name of the file for FD, and return 1 if we can't
542 recover from it). LEVEL is the compression level (1-9). */
543
544 int
read_and_gzip(fd,fullname,buf,size,len,level)545 read_and_gzip (fd, fullname, buf, size, len, level)
546 int fd;
547 char *fullname;
548 unsigned char **buf;
549 size_t *size;
550 size_t *len;
551 int level;
552 {
553 z_stream zstr;
554 int zstatus;
555 unsigned char inbuf[8192];
556 int nread;
557 unsigned long crc;
558
559 if (*size < 1024)
560 {
561 unsigned char *newbuf;
562
563 *size = 1024;
564 newbuf = realloc (*buf, *size);
565 if (newbuf == NULL)
566 {
567 error (0, 0, "out of memory");
568 return 1;
569 }
570 *buf = newbuf;
571 }
572 (*buf)[0] = 31;
573 (*buf)[1] = 139;
574 (*buf)[2] = 8;
575 (*buf)[3] = 0;
576 (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0;
577 /* Could set this based on level, but why bother? */
578 (*buf)[8] = 0;
579 (*buf)[9] = 255;
580
581 memset (&zstr, 0, sizeof zstr);
582 zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8,
583 Z_DEFAULT_STRATEGY);
584 crc = crc32 (0, NULL, 0);
585 if (zstatus != Z_OK)
586 {
587 compress_error (0, zstatus, &zstr, fullname);
588 return 1;
589 }
590 zstr.avail_out = *size;
591 zstr.next_out = *buf + 10;
592
593 while (1)
594 {
595 int finish = 0;
596
597 nread = read (fd, inbuf, sizeof inbuf);
598 if (nread < 0)
599 {
600 error (0, errno, "cannot read %s", fullname);
601 return 1;
602 }
603 else if (nread == 0)
604 /* End of file. */
605 finish = 1;
606 crc = crc32 (crc, inbuf, nread);
607 zstr.next_in = inbuf;
608 zstr.avail_in = nread;
609
610 do
611 {
612 size_t offset;
613
614 /* I don't see this documented anywhere, but deflate seems
615 to tend to dump core sometimes if we pass it Z_FINISH and
616 a small (e.g. 2147 byte) avail_out. So we insist on at
617 least 4096 bytes (that is what zlib/gzio.c uses). */
618
619 if (zstr.avail_out < 4096)
620 {
621 unsigned char *newbuf;
622
623 offset = zstr.next_out - *buf;
624 *size *= 2;
625 newbuf = realloc (*buf, *size);
626 if (newbuf == NULL)
627 {
628 error (0, 0, "out of memory");
629 return 1;
630 }
631 *buf = newbuf;
632 zstr.next_out = *buf + offset;
633 zstr.avail_out = *size - offset;
634 }
635
636 zstatus = deflate (&zstr, finish ? Z_FINISH : 0);
637 if (zstatus == Z_STREAM_END)
638 goto done;
639 else if (zstatus != Z_OK)
640 compress_error (0, zstatus, &zstr, fullname);
641 } while (zstr.avail_out == 0);
642 }
643 done:
644 *(*buf + zstr.total_out + 10) = crc & 0xff;
645 *(*buf + zstr.total_out + 11) = (crc >> 8) & 0xff;
646 *(*buf + zstr.total_out + 12) = (crc >> 16) & 0xff;
647 *(*buf + zstr.total_out + 13) = (crc >> 24) & 0xff;
648
649 *(*buf + zstr.total_out + 14) = zstr.total_in & 0xff;
650 *(*buf + zstr.total_out + 15) = (zstr.total_in >> 8) & 0xff;
651 *(*buf + zstr.total_out + 16) = (zstr.total_in >> 16) & 0xff;
652 *(*buf + zstr.total_out + 17) = (zstr.total_in >> 24) & 0xff;
653
654 *len = zstr.total_out + 18;
655
656 zstatus = deflateEnd (&zstr);
657 if (zstatus != Z_OK)
658 compress_error (0, zstatus, &zstr, fullname);
659
660 return 0;
661 }
662 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */
663