1 /*-------------------------------------------------------------------------
2  *
3  * compress_io.c
4  *	 Routines for archivers to write an uncompressed or compressed data
5  *	 stream.
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * This file includes two APIs for dealing with compressed data. The first
11  * provides more flexibility, using callbacks to read/write data from the
12  * underlying stream. The second API is a wrapper around fopen/gzopen and
13  * friends, providing an interface similar to those, but abstracts away
14  * the possible compression. Both APIs use libz for the compression, but
15  * the second API uses gzip headers, so the resulting files can be easily
16  * manipulated with the gzip utility.
17  *
18  * Compressor API
19  * --------------
20  *
21  *	The interface for writing to an archive consists of three functions:
22  *	AllocateCompressor, WriteDataToArchive and EndCompressor. First you call
23  *	AllocateCompressor, then write all the data by calling WriteDataToArchive
24  *	as many times as needed, and finally EndCompressor. WriteDataToArchive
25  *	and EndCompressor will call the WriteFunc that was provided to
26  *	AllocateCompressor for each chunk of compressed data.
27  *
28  *	The interface for reading an archive consists of just one function:
29  *	ReadDataFromArchive. ReadDataFromArchive reads the whole compressed input
30  *	stream, by repeatedly calling the given ReadFunc. ReadFunc returns the
31  *	compressed data chunk at a time, and ReadDataFromArchive decompresses it
32  *	and passes the decompressed data to ahwrite(), until ReadFunc returns 0
33  *	to signal EOF.
34  *
35  *	The interface is the same for compressed and uncompressed streams.
36  *
37  * Compressed stream API
38  * ----------------------
39  *
40  *	The compressed stream API is a wrapper around the C standard fopen() and
41  *	libz's gzopen() APIs. It allows you to use the same functions for
42  *	compressed and uncompressed streams. cfopen_read() first tries to open
43  *	the file with given name, and if it fails, it tries to open the same
44  *	file with the .gz suffix. cfopen_write() opens a file for writing, an
45  *	extra argument specifies if the file should be compressed, and adds the
46  *	.gz suffix to the filename if so. This allows you to easily handle both
47  *	compressed and uncompressed files.
48  *
49  * IDENTIFICATION
50  *	   src/bin/pg_dump/compress_io.c
51  *
52  *-------------------------------------------------------------------------
53  */
54 #include "postgres_fe.h"
55 
56 #include "compress_io.h"
57 #include "pg_backup_utils.h"
58 
59 /*----------------------
60  * Compressor API
61  *----------------------
62  */
63 
64 /* typedef appears in compress_io.h */
65 struct CompressorState
66 {
67 	CompressionAlgorithm comprAlg;
68 	WriteFunc	writeF;
69 
70 #ifdef HAVE_LIBZ
71 	z_streamp	zp;
72 	char	   *zlibOut;
73 	size_t		zlibOutSize;
74 #endif
75 };
76 
77 static void ParseCompressionOption(int compression, CompressionAlgorithm *alg,
78 								   int *level);
79 
80 /* Routines that support zlib compressed data I/O */
81 #ifdef HAVE_LIBZ
82 static void InitCompressorZlib(CompressorState *cs, int level);
83 static void DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs,
84 								  bool flush);
85 static void ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF);
86 static void WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
87 								   const char *data, size_t dLen);
88 static void EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs);
89 #endif
90 
91 /* Routines that support uncompressed data I/O */
92 static void ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF);
93 static void WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
94 								   const char *data, size_t dLen);
95 
96 /*
97  * Interprets a numeric 'compression' value. The algorithm implied by the
98  * value (zlib or none at the moment), is returned in *alg, and the
99  * zlib compression level in *level.
100  */
101 static void
ParseCompressionOption(int compression,CompressionAlgorithm * alg,int * level)102 ParseCompressionOption(int compression, CompressionAlgorithm *alg, int *level)
103 {
104 	if (compression == Z_DEFAULT_COMPRESSION ||
105 		(compression > 0 && compression <= 9))
106 		*alg = COMPR_ALG_LIBZ;
107 	else if (compression == 0)
108 		*alg = COMPR_ALG_NONE;
109 	else
110 	{
111 		fatal("invalid compression code: %d", compression);
112 		*alg = COMPR_ALG_NONE;	/* keep compiler quiet */
113 	}
114 
115 	/* The level is just the passed-in value. */
116 	if (level)
117 		*level = compression;
118 }
119 
120 /* Public interface routines */
121 
122 /* Allocate a new compressor */
123 CompressorState *
AllocateCompressor(int compression,WriteFunc writeF)124 AllocateCompressor(int compression, WriteFunc writeF)
125 {
126 	CompressorState *cs;
127 	CompressionAlgorithm alg;
128 	int			level;
129 
130 	ParseCompressionOption(compression, &alg, &level);
131 
132 #ifndef HAVE_LIBZ
133 	if (alg == COMPR_ALG_LIBZ)
134 		fatal("not built with zlib support");
135 #endif
136 
137 	cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
138 	cs->writeF = writeF;
139 	cs->comprAlg = alg;
140 
141 	/*
142 	 * Perform compression algorithm specific initialization.
143 	 */
144 #ifdef HAVE_LIBZ
145 	if (alg == COMPR_ALG_LIBZ)
146 		InitCompressorZlib(cs, level);
147 #endif
148 
149 	return cs;
150 }
151 
152 /*
153  * Read all compressed data from the input stream (via readF) and print it
154  * out with ahwrite().
155  */
156 void
ReadDataFromArchive(ArchiveHandle * AH,int compression,ReadFunc readF)157 ReadDataFromArchive(ArchiveHandle *AH, int compression, ReadFunc readF)
158 {
159 	CompressionAlgorithm alg;
160 
161 	ParseCompressionOption(compression, &alg, NULL);
162 
163 	if (alg == COMPR_ALG_NONE)
164 		ReadDataFromArchiveNone(AH, readF);
165 	if (alg == COMPR_ALG_LIBZ)
166 	{
167 #ifdef HAVE_LIBZ
168 		ReadDataFromArchiveZlib(AH, readF);
169 #else
170 		fatal("not built with zlib support");
171 #endif
172 	}
173 }
174 
175 /*
176  * Compress and write data to the output stream (via writeF).
177  */
178 void
WriteDataToArchive(ArchiveHandle * AH,CompressorState * cs,const void * data,size_t dLen)179 WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
180 				   const void *data, size_t dLen)
181 {
182 	switch (cs->comprAlg)
183 	{
184 		case COMPR_ALG_LIBZ:
185 #ifdef HAVE_LIBZ
186 			WriteDataToArchiveZlib(AH, cs, data, dLen);
187 #else
188 			fatal("not built with zlib support");
189 #endif
190 			break;
191 		case COMPR_ALG_NONE:
192 			WriteDataToArchiveNone(AH, cs, data, dLen);
193 			break;
194 	}
195 }
196 
197 /*
198  * Terminate compression library context and flush its buffers.
199  */
200 void
EndCompressor(ArchiveHandle * AH,CompressorState * cs)201 EndCompressor(ArchiveHandle *AH, CompressorState *cs)
202 {
203 #ifdef HAVE_LIBZ
204 	if (cs->comprAlg == COMPR_ALG_LIBZ)
205 		EndCompressorZlib(AH, cs);
206 #endif
207 	free(cs);
208 }
209 
210 /* Private routines, specific to each compression method. */
211 
212 #ifdef HAVE_LIBZ
213 /*
214  * Functions for zlib compressed output.
215  */
216 
217 static void
InitCompressorZlib(CompressorState * cs,int level)218 InitCompressorZlib(CompressorState *cs, int level)
219 {
220 	z_streamp	zp;
221 
222 	zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
223 	zp->zalloc = Z_NULL;
224 	zp->zfree = Z_NULL;
225 	zp->opaque = Z_NULL;
226 
227 	/*
228 	 * zlibOutSize is the buffer size we tell zlib it can output to.  We
229 	 * actually allocate one extra byte because some routines want to append a
230 	 * trailing zero byte to the zlib output.
231 	 */
232 	cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
233 	cs->zlibOutSize = ZLIB_OUT_SIZE;
234 
235 	if (deflateInit(zp, level) != Z_OK)
236 		fatal("could not initialize compression library: %s",
237 			  zp->msg);
238 
239 	/* Just be paranoid - maybe End is called after Start, with no Write */
240 	zp->next_out = (void *) cs->zlibOut;
241 	zp->avail_out = cs->zlibOutSize;
242 }
243 
244 static void
EndCompressorZlib(ArchiveHandle * AH,CompressorState * cs)245 EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs)
246 {
247 	z_streamp	zp = cs->zp;
248 
249 	zp->next_in = NULL;
250 	zp->avail_in = 0;
251 
252 	/* Flush any remaining data from zlib buffer */
253 	DeflateCompressorZlib(AH, cs, true);
254 
255 	if (deflateEnd(zp) != Z_OK)
256 		fatal("could not close compression stream: %s", zp->msg);
257 
258 	free(cs->zlibOut);
259 	free(cs->zp);
260 }
261 
262 static void
DeflateCompressorZlib(ArchiveHandle * AH,CompressorState * cs,bool flush)263 DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
264 {
265 	z_streamp	zp = cs->zp;
266 	char	   *out = cs->zlibOut;
267 	int			res = Z_OK;
268 
269 	while (cs->zp->avail_in != 0 || flush)
270 	{
271 		res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
272 		if (res == Z_STREAM_ERROR)
273 			fatal("could not compress data: %s", zp->msg);
274 		if ((flush && (zp->avail_out < cs->zlibOutSize))
275 			|| (zp->avail_out == 0)
276 			|| (zp->avail_in != 0)
277 			)
278 		{
279 			/*
280 			 * Extra paranoia: avoid zero-length chunks, since a zero length
281 			 * chunk is the EOF marker in the custom format. This should never
282 			 * happen but...
283 			 */
284 			if (zp->avail_out < cs->zlibOutSize)
285 			{
286 				/*
287 				 * Any write function should do its own error checking but to
288 				 * make sure we do a check here as well...
289 				 */
290 				size_t		len = cs->zlibOutSize - zp->avail_out;
291 
292 				cs->writeF(AH, out, len);
293 			}
294 			zp->next_out = (void *) out;
295 			zp->avail_out = cs->zlibOutSize;
296 		}
297 
298 		if (res == Z_STREAM_END)
299 			break;
300 	}
301 }
302 
303 static void
WriteDataToArchiveZlib(ArchiveHandle * AH,CompressorState * cs,const char * data,size_t dLen)304 WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
305 					   const char *data, size_t dLen)
306 {
307 	cs->zp->next_in = (void *) unconstify(char *, data);
308 	cs->zp->avail_in = dLen;
309 	DeflateCompressorZlib(AH, cs, false);
310 }
311 
312 static void
ReadDataFromArchiveZlib(ArchiveHandle * AH,ReadFunc readF)313 ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
314 {
315 	z_streamp	zp;
316 	char	   *out;
317 	int			res = Z_OK;
318 	size_t		cnt;
319 	char	   *buf;
320 	size_t		buflen;
321 
322 	zp = (z_streamp) pg_malloc(sizeof(z_stream));
323 	zp->zalloc = Z_NULL;
324 	zp->zfree = Z_NULL;
325 	zp->opaque = Z_NULL;
326 
327 	buf = pg_malloc(ZLIB_IN_SIZE);
328 	buflen = ZLIB_IN_SIZE;
329 
330 	out = pg_malloc(ZLIB_OUT_SIZE + 1);
331 
332 	if (inflateInit(zp) != Z_OK)
333 		fatal("could not initialize compression library: %s",
334 			  zp->msg);
335 
336 	/* no minimal chunk size for zlib */
337 	while ((cnt = readF(AH, &buf, &buflen)))
338 	{
339 		zp->next_in = (void *) buf;
340 		zp->avail_in = cnt;
341 
342 		while (zp->avail_in > 0)
343 		{
344 			zp->next_out = (void *) out;
345 			zp->avail_out = ZLIB_OUT_SIZE;
346 
347 			res = inflate(zp, 0);
348 			if (res != Z_OK && res != Z_STREAM_END)
349 				fatal("could not uncompress data: %s", zp->msg);
350 
351 			out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
352 			ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
353 		}
354 	}
355 
356 	zp->next_in = NULL;
357 	zp->avail_in = 0;
358 	while (res != Z_STREAM_END)
359 	{
360 		zp->next_out = (void *) out;
361 		zp->avail_out = ZLIB_OUT_SIZE;
362 		res = inflate(zp, 0);
363 		if (res != Z_OK && res != Z_STREAM_END)
364 			fatal("could not uncompress data: %s", zp->msg);
365 
366 		out[ZLIB_OUT_SIZE - zp->avail_out] = '\0';
367 		ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH);
368 	}
369 
370 	if (inflateEnd(zp) != Z_OK)
371 		fatal("could not close compression library: %s", zp->msg);
372 
373 	free(buf);
374 	free(out);
375 	free(zp);
376 }
377 #endif							/* HAVE_LIBZ */
378 
379 
380 /*
381  * Functions for uncompressed output.
382  */
383 
384 static void
ReadDataFromArchiveNone(ArchiveHandle * AH,ReadFunc readF)385 ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF)
386 {
387 	size_t		cnt;
388 	char	   *buf;
389 	size_t		buflen;
390 
391 	buf = pg_malloc(ZLIB_OUT_SIZE);
392 	buflen = ZLIB_OUT_SIZE;
393 
394 	while ((cnt = readF(AH, &buf, &buflen)))
395 	{
396 		ahwrite(buf, 1, cnt, AH);
397 	}
398 
399 	free(buf);
400 }
401 
402 static void
WriteDataToArchiveNone(ArchiveHandle * AH,CompressorState * cs,const char * data,size_t dLen)403 WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
404 					   const char *data, size_t dLen)
405 {
406 	cs->writeF(AH, data, dLen);
407 }
408 
409 
410 /*----------------------
411  * Compressed stream API
412  *----------------------
413  */
414 
415 /*
416  * cfp represents an open stream, wrapping the underlying FILE or gzFile
417  * pointer. This is opaque to the callers.
418  */
419 struct cfp
420 {
421 	FILE	   *uncompressedfp;
422 #ifdef HAVE_LIBZ
423 	gzFile		compressedfp;
424 #endif
425 };
426 
427 #ifdef HAVE_LIBZ
428 static int	hasSuffix(const char *filename, const char *suffix);
429 #endif
430 
431 /* free() without changing errno; useful in several places below */
432 static void
free_keep_errno(void * p)433 free_keep_errno(void *p)
434 {
435 	int			save_errno = errno;
436 
437 	free(p);
438 	errno = save_errno;
439 }
440 
441 /*
442  * Open a file for reading. 'path' is the file to open, and 'mode' should
443  * be either "r" or "rb".
444  *
445  * If the file at 'path' does not exist, we append the ".gz" suffix (if 'path'
446  * doesn't already have it) and try again. So if you pass "foo" as 'path',
447  * this will open either "foo" or "foo.gz".
448  *
449  * On failure, return NULL with an error code in errno.
450  */
451 cfp *
cfopen_read(const char * path,const char * mode)452 cfopen_read(const char *path, const char *mode)
453 {
454 	cfp		   *fp;
455 
456 #ifdef HAVE_LIBZ
457 	if (hasSuffix(path, ".gz"))
458 		fp = cfopen(path, mode, 1);
459 	else
460 #endif
461 	{
462 		fp = cfopen(path, mode, 0);
463 #ifdef HAVE_LIBZ
464 		if (fp == NULL)
465 		{
466 			char	   *fname;
467 
468 			fname = psprintf("%s.gz", path);
469 			fp = cfopen(fname, mode, 1);
470 			free_keep_errno(fname);
471 		}
472 #endif
473 	}
474 	return fp;
475 }
476 
477 /*
478  * Open a file for writing. 'path' indicates the path name, and 'mode' must
479  * be a filemode as accepted by fopen() and gzopen() that indicates writing
480  * ("w", "wb", "a", or "ab").
481  *
482  * If 'compression' is non-zero, a gzip compressed stream is opened, and
483  * 'compression' indicates the compression level used. The ".gz" suffix
484  * is automatically added to 'path' in that case.
485  *
486  * On failure, return NULL with an error code in errno.
487  */
488 cfp *
cfopen_write(const char * path,const char * mode,int compression)489 cfopen_write(const char *path, const char *mode, int compression)
490 {
491 	cfp		   *fp;
492 
493 	if (compression == 0)
494 		fp = cfopen(path, mode, 0);
495 	else
496 	{
497 #ifdef HAVE_LIBZ
498 		char	   *fname;
499 
500 		fname = psprintf("%s.gz", path);
501 		fp = cfopen(fname, mode, compression);
502 		free_keep_errno(fname);
503 #else
504 		fatal("not built with zlib support");
505 		fp = NULL;				/* keep compiler quiet */
506 #endif
507 	}
508 	return fp;
509 }
510 
511 /*
512  * Opens file 'path' in 'mode'. If 'compression' is non-zero, the file
513  * is opened with libz gzopen(), otherwise with plain fopen().
514  *
515  * On failure, return NULL with an error code in errno.
516  */
517 cfp *
cfopen(const char * path,const char * mode,int compression)518 cfopen(const char *path, const char *mode, int compression)
519 {
520 	cfp		   *fp = pg_malloc(sizeof(cfp));
521 
522 	if (compression != 0)
523 	{
524 #ifdef HAVE_LIBZ
525 		if (compression != Z_DEFAULT_COMPRESSION)
526 		{
527 			/* user has specified a compression level, so tell zlib to use it */
528 			char		mode_compression[32];
529 
530 			snprintf(mode_compression, sizeof(mode_compression), "%s%d",
531 					 mode, compression);
532 			fp->compressedfp = gzopen(path, mode_compression);
533 		}
534 		else
535 		{
536 			/* don't specify a level, just use the zlib default */
537 			fp->compressedfp = gzopen(path, mode);
538 		}
539 
540 		fp->uncompressedfp = NULL;
541 		if (fp->compressedfp == NULL)
542 		{
543 			free_keep_errno(fp);
544 			fp = NULL;
545 		}
546 #else
547 		fatal("not built with zlib support");
548 #endif
549 	}
550 	else
551 	{
552 #ifdef HAVE_LIBZ
553 		fp->compressedfp = NULL;
554 #endif
555 		fp->uncompressedfp = fopen(path, mode);
556 		if (fp->uncompressedfp == NULL)
557 		{
558 			free_keep_errno(fp);
559 			fp = NULL;
560 		}
561 	}
562 
563 	return fp;
564 }
565 
566 
567 int
cfread(void * ptr,int size,cfp * fp)568 cfread(void *ptr, int size, cfp *fp)
569 {
570 	int			ret;
571 
572 	if (size == 0)
573 		return 0;
574 
575 #ifdef HAVE_LIBZ
576 	if (fp->compressedfp)
577 	{
578 		ret = gzread(fp->compressedfp, ptr, size);
579 		if (ret != size && !gzeof(fp->compressedfp))
580 		{
581 			int			errnum;
582 			const char *errmsg = gzerror(fp->compressedfp, &errnum);
583 
584 			fatal("could not read from input file: %s",
585 				  errnum == Z_ERRNO ? strerror(errno) : errmsg);
586 		}
587 	}
588 	else
589 #endif
590 	{
591 		ret = fread(ptr, 1, size, fp->uncompressedfp);
592 		if (ret != size && !feof(fp->uncompressedfp))
593 			READ_ERROR_EXIT(fp->uncompressedfp);
594 	}
595 	return ret;
596 }
597 
598 int
cfwrite(const void * ptr,int size,cfp * fp)599 cfwrite(const void *ptr, int size, cfp *fp)
600 {
601 #ifdef HAVE_LIBZ
602 	if (fp->compressedfp)
603 		return gzwrite(fp->compressedfp, ptr, size);
604 	else
605 #endif
606 		return fwrite(ptr, 1, size, fp->uncompressedfp);
607 }
608 
609 int
cfgetc(cfp * fp)610 cfgetc(cfp *fp)
611 {
612 	int			ret;
613 
614 #ifdef HAVE_LIBZ
615 	if (fp->compressedfp)
616 	{
617 		ret = gzgetc(fp->compressedfp);
618 		if (ret == EOF)
619 		{
620 			if (!gzeof(fp->compressedfp))
621 				fatal("could not read from input file: %s", strerror(errno));
622 			else
623 				fatal("could not read from input file: end of file");
624 		}
625 	}
626 	else
627 #endif
628 	{
629 		ret = fgetc(fp->uncompressedfp);
630 		if (ret == EOF)
631 			READ_ERROR_EXIT(fp->uncompressedfp);
632 	}
633 
634 	return ret;
635 }
636 
637 char *
cfgets(cfp * fp,char * buf,int len)638 cfgets(cfp *fp, char *buf, int len)
639 {
640 #ifdef HAVE_LIBZ
641 	if (fp->compressedfp)
642 		return gzgets(fp->compressedfp, buf, len);
643 	else
644 #endif
645 		return fgets(buf, len, fp->uncompressedfp);
646 }
647 
648 int
cfclose(cfp * fp)649 cfclose(cfp *fp)
650 {
651 	int			result;
652 
653 	if (fp == NULL)
654 	{
655 		errno = EBADF;
656 		return EOF;
657 	}
658 #ifdef HAVE_LIBZ
659 	if (fp->compressedfp)
660 	{
661 		result = gzclose(fp->compressedfp);
662 		fp->compressedfp = NULL;
663 	}
664 	else
665 #endif
666 	{
667 		result = fclose(fp->uncompressedfp);
668 		fp->uncompressedfp = NULL;
669 	}
670 	free_keep_errno(fp);
671 
672 	return result;
673 }
674 
675 int
cfeof(cfp * fp)676 cfeof(cfp *fp)
677 {
678 #ifdef HAVE_LIBZ
679 	if (fp->compressedfp)
680 		return gzeof(fp->compressedfp);
681 	else
682 #endif
683 		return feof(fp->uncompressedfp);
684 }
685 
686 const char *
get_cfp_error(cfp * fp)687 get_cfp_error(cfp *fp)
688 {
689 #ifdef HAVE_LIBZ
690 	if (fp->compressedfp)
691 	{
692 		int			errnum;
693 		const char *errmsg = gzerror(fp->compressedfp, &errnum);
694 
695 		if (errnum != Z_ERRNO)
696 			return errmsg;
697 	}
698 #endif
699 	return strerror(errno);
700 }
701 
702 #ifdef HAVE_LIBZ
703 static int
hasSuffix(const char * filename,const char * suffix)704 hasSuffix(const char *filename, const char *suffix)
705 {
706 	int			filenamelen = strlen(filename);
707 	int			suffixlen = strlen(suffix);
708 
709 	if (filenamelen < suffixlen)
710 		return 0;
711 
712 	return memcmp(&filename[filenamelen - suffixlen],
713 				  suffix,
714 				  suffixlen) == 0;
715 }
716 
717 #endif
718