1 /*-------------------------------------------------------------------------
2  *
3  * pg_backup_tar.c
4  *
5  *	This file is copied from the 'files' format file, but dumps data into
6  *	one temp file then sends it to the output TAR archive.
7  *
8  *	The tar format also includes a 'restore.sql' script which is there for
9  *	the benefit of humans. This script is never used by pg_restore.
10  *
11  *	NOTE: If you untar the created 'tar' file, the resulting files are
12  *	compatible with the 'directory' format. Please keep the two formats in
13  *	sync.
14  *
15  *	See the headers to pg_backup_directory & pg_restore for more details.
16  *
17  * Copyright (c) 2000, Philip Warner
18  *		Rights are granted to use this software in any way so long
19  *		as this notice is not removed.
20  *
21  *	The author is not responsible for loss or damages that may
22  *	result from its use.
23  *
24  *
25  * IDENTIFICATION
26  *		src/bin/pg_dump/pg_backup_tar.c
27  *
28  *-------------------------------------------------------------------------
29  */
30 #include "postgres_fe.h"
31 
32 #include <sys/stat.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <unistd.h>
36 
37 #include "common/file_utils.h"
38 #include "fe_utils/string_utils.h"
39 #include "pg_backup_archiver.h"
40 #include "pg_backup_tar.h"
41 #include "pg_backup_utils.h"
42 #include "pgtar.h"
43 
44 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
45 static void _StartData(ArchiveHandle *AH, TocEntry *te);
46 static void _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
47 static void _EndData(ArchiveHandle *AH, TocEntry *te);
48 static int	_WriteByte(ArchiveHandle *AH, const int i);
49 static int	_ReadByte(ArchiveHandle *);
50 static void _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
51 static void _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
52 static void _CloseArchive(ArchiveHandle *AH);
53 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te);
54 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
55 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
56 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
57 
58 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
59 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
60 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
61 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
62 
63 #define K_STD_BUF_SIZE 1024
64 
65 
66 typedef struct
67 {
68 #ifdef HAVE_LIBZ
69 	gzFile		zFH;
70 #else
71 	FILE	   *zFH;
72 #endif
73 	FILE	   *nFH;
74 	FILE	   *tarFH;
75 	FILE	   *tmpFH;
76 	char	   *targetFile;
77 	char		mode;
78 	pgoff_t		pos;
79 	pgoff_t		fileLen;
80 	ArchiveHandle *AH;
81 } TAR_MEMBER;
82 
83 typedef struct
84 {
85 	int			hasSeek;
86 	pgoff_t		filePos;
87 	TAR_MEMBER *blobToc;
88 	FILE	   *tarFH;
89 	pgoff_t		tarFHpos;
90 	pgoff_t		tarNextMember;
91 	TAR_MEMBER *FH;
92 	int			isSpecialScript;
93 	TAR_MEMBER *scriptTH;
94 } lclContext;
95 
96 typedef struct
97 {
98 	TAR_MEMBER *TH;
99 	char	   *filename;
100 } lclTocEntry;
101 
102 static void _LoadBlobs(ArchiveHandle *AH);
103 
104 static TAR_MEMBER *tarOpen(ArchiveHandle *AH, const char *filename, char mode);
105 static void tarClose(ArchiveHandle *AH, TAR_MEMBER *TH);
106 
107 #ifdef __NOT_USED__
108 static char *tarGets(char *buf, size_t len, TAR_MEMBER *th);
109 #endif
110 static int	tarPrintf(TAR_MEMBER *th, const char *fmt,...) pg_attribute_printf(2, 3);
111 
112 static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th);
113 static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename);
114 static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th);
115 static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th);
116 static void _tarWriteHeader(TAR_MEMBER *th);
117 static int	_tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th);
118 static size_t _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh);
119 
120 static size_t _scriptOut(ArchiveHandle *AH, const void *buf, size_t len);
121 
122 /*
123  *	Initializer
124  */
125 void
InitArchiveFmt_Tar(ArchiveHandle * AH)126 InitArchiveFmt_Tar(ArchiveHandle *AH)
127 {
128 	lclContext *ctx;
129 
130 	/* Assuming static functions, this can be copied for each format. */
131 	AH->ArchiveEntryPtr = _ArchiveEntry;
132 	AH->StartDataPtr = _StartData;
133 	AH->WriteDataPtr = _WriteData;
134 	AH->EndDataPtr = _EndData;
135 	AH->WriteBytePtr = _WriteByte;
136 	AH->ReadBytePtr = _ReadByte;
137 	AH->WriteBufPtr = _WriteBuf;
138 	AH->ReadBufPtr = _ReadBuf;
139 	AH->ClosePtr = _CloseArchive;
140 	AH->ReopenPtr = NULL;
141 	AH->PrintTocDataPtr = _PrintTocData;
142 	AH->ReadExtraTocPtr = _ReadExtraToc;
143 	AH->WriteExtraTocPtr = _WriteExtraToc;
144 	AH->PrintExtraTocPtr = _PrintExtraToc;
145 
146 	AH->StartBlobsPtr = _StartBlobs;
147 	AH->StartBlobPtr = _StartBlob;
148 	AH->EndBlobPtr = _EndBlob;
149 	AH->EndBlobsPtr = _EndBlobs;
150 	AH->ClonePtr = NULL;
151 	AH->DeClonePtr = NULL;
152 
153 	AH->WorkerJobDumpPtr = NULL;
154 	AH->WorkerJobRestorePtr = NULL;
155 
156 	/*
157 	 * Set up some special context used in compressing data.
158 	 */
159 	ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
160 	AH->formatData = (void *) ctx;
161 	ctx->filePos = 0;
162 	ctx->isSpecialScript = 0;
163 
164 	/* Initialize LO buffering */
165 	AH->lo_buf_size = LOBBUFSIZE;
166 	AH->lo_buf = (void *) pg_malloc(LOBBUFSIZE);
167 
168 	/*
169 	 * Now open the tar file, and load the TOC if we're in read mode.
170 	 */
171 	if (AH->mode == archModeWrite)
172 	{
173 		if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
174 		{
175 			ctx->tarFH = fopen(AH->fSpec, PG_BINARY_W);
176 			if (ctx->tarFH == NULL)
177 				fatal("could not open TOC file \"%s\" for output: %m",
178 					  AH->fSpec);
179 		}
180 		else
181 		{
182 			ctx->tarFH = stdout;
183 			if (ctx->tarFH == NULL)
184 				fatal("could not open TOC file for output: %m");
185 		}
186 
187 		ctx->tarFHpos = 0;
188 
189 		/*
190 		 * Make unbuffered since we will dup() it, and the buffers screw each
191 		 * other
192 		 */
193 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
194 
195 		ctx->hasSeek = checkSeek(ctx->tarFH);
196 
197 		/*
198 		 * We don't support compression because reading the files back is not
199 		 * possible since gzdopen uses buffered IO which totally screws file
200 		 * positioning.
201 		 */
202 		if (AH->compression != 0)
203 			fatal("compression is not supported by tar archive format");
204 	}
205 	else
206 	{							/* Read Mode */
207 		if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
208 		{
209 			ctx->tarFH = fopen(AH->fSpec, PG_BINARY_R);
210 			if (ctx->tarFH == NULL)
211 				fatal("could not open TOC file \"%s\" for input: %m",
212 					  AH->fSpec);
213 		}
214 		else
215 		{
216 			ctx->tarFH = stdin;
217 			if (ctx->tarFH == NULL)
218 				fatal("could not open TOC file for input: %m");
219 		}
220 
221 		/*
222 		 * Make unbuffered since we will dup() it, and the buffers screw each
223 		 * other
224 		 */
225 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
226 
227 		ctx->tarFHpos = 0;
228 
229 		ctx->hasSeek = checkSeek(ctx->tarFH);
230 
231 		ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
232 		ReadHead(AH);
233 		ReadToc(AH);
234 		tarClose(AH, ctx->FH);	/* Nothing else in the file... */
235 	}
236 }
237 
238 /*
239  * - Start a new TOC entry
240  *	 Setup the output file name.
241  */
242 static void
_ArchiveEntry(ArchiveHandle * AH,TocEntry * te)243 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
244 {
245 	lclTocEntry *ctx;
246 	char		fn[K_STD_BUF_SIZE];
247 
248 	ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
249 	if (te->dataDumper != NULL)
250 	{
251 #ifdef HAVE_LIBZ
252 		if (AH->compression == 0)
253 			sprintf(fn, "%d.dat", te->dumpId);
254 		else
255 			sprintf(fn, "%d.dat.gz", te->dumpId);
256 #else
257 		sprintf(fn, "%d.dat", te->dumpId);
258 #endif
259 		ctx->filename = pg_strdup(fn);
260 	}
261 	else
262 	{
263 		ctx->filename = NULL;
264 		ctx->TH = NULL;
265 	}
266 	te->formatData = (void *) ctx;
267 }
268 
269 static void
_WriteExtraToc(ArchiveHandle * AH,TocEntry * te)270 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
271 {
272 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
273 
274 	if (ctx->filename)
275 		WriteStr(AH, ctx->filename);
276 	else
277 		WriteStr(AH, "");
278 }
279 
280 static void
_ReadExtraToc(ArchiveHandle * AH,TocEntry * te)281 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
282 {
283 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
284 
285 	if (ctx == NULL)
286 	{
287 		ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
288 		te->formatData = (void *) ctx;
289 	}
290 
291 	ctx->filename = ReadStr(AH);
292 	if (strlen(ctx->filename) == 0)
293 	{
294 		free(ctx->filename);
295 		ctx->filename = NULL;
296 	}
297 	ctx->TH = NULL;
298 }
299 
300 static void
_PrintExtraToc(ArchiveHandle * AH,TocEntry * te)301 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
302 {
303 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
304 
305 	if (AH->public.verbose && ctx->filename != NULL)
306 		ahprintf(AH, "-- File: %s\n", ctx->filename);
307 }
308 
309 static void
_StartData(ArchiveHandle * AH,TocEntry * te)310 _StartData(ArchiveHandle *AH, TocEntry *te)
311 {
312 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
313 
314 	tctx->TH = tarOpen(AH, tctx->filename, 'w');
315 }
316 
317 static TAR_MEMBER *
tarOpen(ArchiveHandle * AH,const char * filename,char mode)318 tarOpen(ArchiveHandle *AH, const char *filename, char mode)
319 {
320 	lclContext *ctx = (lclContext *) AH->formatData;
321 	TAR_MEMBER *tm;
322 
323 #ifdef HAVE_LIBZ
324 	char		fmode[14];
325 #endif
326 
327 	if (mode == 'r')
328 	{
329 		tm = _tarPositionTo(AH, filename);
330 		if (!tm)				/* Not found */
331 		{
332 			if (filename)
333 			{
334 				/*
335 				 * Couldn't find the requested file. Future: do SEEK(0) and
336 				 * retry.
337 				 */
338 				fatal("could not find file \"%s\" in archive", filename);
339 			}
340 			else
341 			{
342 				/* Any file OK, none left, so return NULL */
343 				return NULL;
344 			}
345 		}
346 
347 #ifdef HAVE_LIBZ
348 
349 		if (AH->compression == 0)
350 			tm->nFH = ctx->tarFH;
351 		else
352 			fatal("compression is not supported by tar archive format");
353 		/* tm->zFH = gzdopen(dup(fileno(ctx->tarFH)), "rb"); */
354 #else
355 		tm->nFH = ctx->tarFH;
356 #endif
357 	}
358 	else
359 	{
360 		int			old_umask;
361 
362 		tm = pg_malloc0(sizeof(TAR_MEMBER));
363 
364 		/*
365 		 * POSIX does not require, but permits, tmpfile() to restrict file
366 		 * permissions.  Given an OS crash after we write data, the filesystem
367 		 * might retain the data but forget tmpfile()'s unlink().  If so, the
368 		 * file mode protects confidentiality of the data written.
369 		 */
370 		old_umask = umask(S_IRWXG | S_IRWXO);
371 
372 #ifndef WIN32
373 		tm->tmpFH = tmpfile();
374 #else
375 
376 		/*
377 		 * On WIN32, tmpfile() generates a filename in the root directory,
378 		 * which requires administrative permissions on certain systems. Loop
379 		 * until we find a unique file name we can create.
380 		 */
381 		while (1)
382 		{
383 			char	   *name;
384 			int			fd;
385 
386 			name = _tempnam(NULL, "pg_temp_");
387 			if (name == NULL)
388 				break;
389 			fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY |
390 					  O_TEMPORARY, S_IRUSR | S_IWUSR);
391 			free(name);
392 
393 			if (fd != -1)		/* created a file */
394 			{
395 				tm->tmpFH = fdopen(fd, "w+b");
396 				break;
397 			}
398 			else if (errno != EEXIST)	/* failure other than file exists */
399 				break;
400 		}
401 #endif
402 
403 		if (tm->tmpFH == NULL)
404 			fatal("could not generate temporary file name: %m");
405 
406 		umask(old_umask);
407 
408 #ifdef HAVE_LIBZ
409 
410 		if (AH->compression != 0)
411 		{
412 			sprintf(fmode, "wb%d", AH->compression);
413 			tm->zFH = gzdopen(dup(fileno(tm->tmpFH)), fmode);
414 			if (tm->zFH == NULL)
415 				fatal("could not open temporary file");
416 		}
417 		else
418 			tm->nFH = tm->tmpFH;
419 #else
420 
421 		tm->nFH = tm->tmpFH;
422 #endif
423 
424 		tm->AH = AH;
425 		tm->targetFile = pg_strdup(filename);
426 	}
427 
428 	tm->mode = mode;
429 	tm->tarFH = ctx->tarFH;
430 
431 	return tm;
432 }
433 
434 static void
tarClose(ArchiveHandle * AH,TAR_MEMBER * th)435 tarClose(ArchiveHandle *AH, TAR_MEMBER *th)
436 {
437 	/*
438 	 * Close the GZ file since we dup'd. This will flush the buffers.
439 	 */
440 	if (AH->compression != 0)
441 		if (GZCLOSE(th->zFH) != 0)
442 			fatal("could not close tar member");
443 
444 	if (th->mode == 'w')
445 		_tarAddFile(AH, th);	/* This will close the temp file */
446 
447 	/*
448 	 * else Nothing to do for normal read since we don't dup() normal file
449 	 * handle, and we don't use temp files.
450 	 */
451 
452 	if (th->targetFile)
453 		free(th->targetFile);
454 
455 	th->nFH = NULL;
456 	th->zFH = NULL;
457 }
458 
459 #ifdef __NOT_USED__
460 static char *
tarGets(char * buf,size_t len,TAR_MEMBER * th)461 tarGets(char *buf, size_t len, TAR_MEMBER *th)
462 {
463 	char	   *s;
464 	size_t		cnt = 0;
465 	char		c = ' ';
466 	int			eof = 0;
467 
468 	/* Can't read past logical EOF */
469 	if (len > (th->fileLen - th->pos))
470 		len = th->fileLen - th->pos;
471 
472 	while (cnt < len && c != '\n')
473 	{
474 		if (_tarReadRaw(th->AH, &c, 1, th, NULL) <= 0)
475 		{
476 			eof = 1;
477 			break;
478 		}
479 		buf[cnt++] = c;
480 	}
481 
482 	if (eof && cnt == 0)
483 		s = NULL;
484 	else
485 	{
486 		buf[cnt++] = '\0';
487 		s = buf;
488 	}
489 
490 	if (s)
491 	{
492 		len = strlen(s);
493 		th->pos += len;
494 	}
495 
496 	return s;
497 }
498 #endif
499 
500 /*
501  * Just read bytes from the archive. This is the low level read routine
502  * that is used for ALL reads on a tar file.
503  */
504 static size_t
_tarReadRaw(ArchiveHandle * AH,void * buf,size_t len,TAR_MEMBER * th,FILE * fh)505 _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh)
506 {
507 	lclContext *ctx = (lclContext *) AH->formatData;
508 	size_t		avail;
509 	size_t		used = 0;
510 	size_t		res = 0;
511 
512 	Assert(th || fh);
513 
514 	avail = AH->lookaheadLen - AH->lookaheadPos;
515 	if (avail > 0)
516 	{
517 		/* We have some lookahead bytes to use */
518 		if (avail >= len)		/* Just use the lookahead buffer */
519 			used = len;
520 		else
521 			used = avail;
522 
523 		/* Copy, and adjust buffer pos */
524 		memcpy(buf, AH->lookahead + AH->lookaheadPos, used);
525 		AH->lookaheadPos += used;
526 
527 		/* Adjust required length */
528 		len -= used;
529 	}
530 
531 	/* Read the file if len > 0 */
532 	if (len > 0)
533 	{
534 		if (fh)
535 		{
536 			res = fread(&((char *) buf)[used], 1, len, fh);
537 			if (res != len && !feof(fh))
538 				READ_ERROR_EXIT(fh);
539 		}
540 		else if (th)
541 		{
542 			if (th->zFH)
543 			{
544 				res = GZREAD(&((char *) buf)[used], 1, len, th->zFH);
545 				if (res != len && !GZEOF(th->zFH))
546 				{
547 #ifdef HAVE_LIBZ
548 					int			errnum;
549 					const char *errmsg = gzerror(th->zFH, &errnum);
550 
551 					fatal("could not read from input file: %s",
552 						  errnum == Z_ERRNO ? strerror(errno) : errmsg);
553 #else
554 					fatal("could not read from input file: %s",
555 						  strerror(errno));
556 #endif
557 				}
558 			}
559 			else
560 			{
561 				res = fread(&((char *) buf)[used], 1, len, th->nFH);
562 				if (res != len && !feof(th->nFH))
563 					READ_ERROR_EXIT(th->nFH);
564 			}
565 		}
566 	}
567 
568 	ctx->tarFHpos += res + used;
569 
570 	return (res + used);
571 }
572 
573 static size_t
tarRead(void * buf,size_t len,TAR_MEMBER * th)574 tarRead(void *buf, size_t len, TAR_MEMBER *th)
575 {
576 	size_t		res;
577 
578 	if (th->pos + len > th->fileLen)
579 		len = th->fileLen - th->pos;
580 
581 	if (len <= 0)
582 		return 0;
583 
584 	res = _tarReadRaw(th->AH, buf, len, th, NULL);
585 
586 	th->pos += res;
587 
588 	return res;
589 }
590 
591 static size_t
tarWrite(const void * buf,size_t len,TAR_MEMBER * th)592 tarWrite(const void *buf, size_t len, TAR_MEMBER *th)
593 {
594 	size_t		res;
595 
596 	if (th->zFH != NULL)
597 		res = GZWRITE(buf, 1, len, th->zFH);
598 	else
599 		res = fwrite(buf, 1, len, th->nFH);
600 
601 	th->pos += res;
602 	return res;
603 }
604 
605 static void
_WriteData(ArchiveHandle * AH,const void * data,size_t dLen)606 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
607 {
608 	lclTocEntry *tctx = (lclTocEntry *) AH->currToc->formatData;
609 
610 	if (tarWrite(data, dLen, tctx->TH) != dLen)
611 		WRITE_ERROR_EXIT;
612 }
613 
614 static void
_EndData(ArchiveHandle * AH,TocEntry * te)615 _EndData(ArchiveHandle *AH, TocEntry *te)
616 {
617 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
618 
619 	/* Close the file */
620 	tarClose(AH, tctx->TH);
621 	tctx->TH = NULL;
622 }
623 
624 /*
625  * Print data for a given file
626  */
627 static void
_PrintFileData(ArchiveHandle * AH,char * filename)628 _PrintFileData(ArchiveHandle *AH, char *filename)
629 {
630 	lclContext *ctx = (lclContext *) AH->formatData;
631 	char		buf[4096];
632 	size_t		cnt;
633 	TAR_MEMBER *th;
634 
635 	if (!filename)
636 		return;
637 
638 	th = tarOpen(AH, filename, 'r');
639 	ctx->FH = th;
640 
641 	while ((cnt = tarRead(buf, 4095, th)) > 0)
642 	{
643 		buf[cnt] = '\0';
644 		ahwrite(buf, 1, cnt, AH);
645 	}
646 
647 	tarClose(AH, th);
648 }
649 
650 
651 /*
652  * Print data for a given TOC entry
653 */
654 static void
_PrintTocData(ArchiveHandle * AH,TocEntry * te)655 _PrintTocData(ArchiveHandle *AH, TocEntry *te)
656 {
657 	lclContext *ctx = (lclContext *) AH->formatData;
658 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
659 	int			pos1;
660 
661 	if (!tctx->filename)
662 		return;
663 
664 	/*
665 	 * If we're writing the special restore.sql script, emit a suitable
666 	 * command to include each table's data from the corresponding file.
667 	 *
668 	 * In the COPY case this is a bit klugy because the regular COPY command
669 	 * was already printed before we get control.
670 	 */
671 	if (ctx->isSpecialScript)
672 	{
673 		if (te->copyStmt)
674 		{
675 			/* Abort the COPY FROM stdin */
676 			ahprintf(AH, "\\.\n");
677 
678 			/*
679 			 * The COPY statement should look like "COPY ... FROM stdin;\n",
680 			 * see dumpTableData().
681 			 */
682 			pos1 = (int) strlen(te->copyStmt) - 13;
683 			if (pos1 < 6 || strncmp(te->copyStmt, "COPY ", 5) != 0 ||
684 				strcmp(te->copyStmt + pos1, " FROM stdin;\n") != 0)
685 				fatal("unexpected COPY statement syntax: \"%s\"",
686 					  te->copyStmt);
687 
688 			/* Emit all but the FROM part ... */
689 			ahwrite(te->copyStmt, 1, pos1, AH);
690 			/* ... and insert modified FROM */
691 			ahprintf(AH, " FROM '$$PATH$$/%s';\n\n", tctx->filename);
692 		}
693 		else
694 		{
695 			/* --inserts mode, no worries, just include the data file */
696 			ahprintf(AH, "\\i $$PATH$$/%s\n\n", tctx->filename);
697 		}
698 
699 		return;
700 	}
701 
702 	if (strcmp(te->desc, "BLOBS") == 0)
703 		_LoadBlobs(AH);
704 	else
705 		_PrintFileData(AH, tctx->filename);
706 }
707 
708 static void
_LoadBlobs(ArchiveHandle * AH)709 _LoadBlobs(ArchiveHandle *AH)
710 {
711 	Oid			oid;
712 	lclContext *ctx = (lclContext *) AH->formatData;
713 	TAR_MEMBER *th;
714 	size_t		cnt;
715 	bool		foundBlob = false;
716 	char		buf[4096];
717 
718 	StartRestoreBlobs(AH);
719 
720 	th = tarOpen(AH, NULL, 'r');	/* Open next file */
721 	while (th != NULL)
722 	{
723 		ctx->FH = th;
724 
725 		if (strncmp(th->targetFile, "blob_", 5) == 0)
726 		{
727 			oid = atooid(&th->targetFile[5]);
728 			if (oid != 0)
729 			{
730 				pg_log_info("restoring large object with OID %u", oid);
731 
732 				StartRestoreBlob(AH, oid, AH->public.ropt->dropSchema);
733 
734 				while ((cnt = tarRead(buf, 4095, th)) > 0)
735 				{
736 					buf[cnt] = '\0';
737 					ahwrite(buf, 1, cnt, AH);
738 				}
739 				EndRestoreBlob(AH, oid);
740 				foundBlob = true;
741 			}
742 			tarClose(AH, th);
743 		}
744 		else
745 		{
746 			tarClose(AH, th);
747 
748 			/*
749 			 * Once we have found the first blob, stop at the first non-blob
750 			 * entry (which will be 'blobs.toc').  This coding would eat all
751 			 * the rest of the archive if there are no blobs ... but this
752 			 * function shouldn't be called at all in that case.
753 			 */
754 			if (foundBlob)
755 				break;
756 		}
757 
758 		th = tarOpen(AH, NULL, 'r');
759 	}
760 	EndRestoreBlobs(AH);
761 }
762 
763 
764 static int
_WriteByte(ArchiveHandle * AH,const int i)765 _WriteByte(ArchiveHandle *AH, const int i)
766 {
767 	lclContext *ctx = (lclContext *) AH->formatData;
768 	char		b = i;			/* Avoid endian problems */
769 
770 	if (tarWrite(&b, 1, ctx->FH) != 1)
771 		WRITE_ERROR_EXIT;
772 
773 	ctx->filePos += 1;
774 	return 1;
775 }
776 
777 static int
_ReadByte(ArchiveHandle * AH)778 _ReadByte(ArchiveHandle *AH)
779 {
780 	lclContext *ctx = (lclContext *) AH->formatData;
781 	size_t		res;
782 	unsigned char c;
783 
784 	res = tarRead(&c, 1, ctx->FH);
785 	if (res != 1)
786 		/* We already would have exited for errors on reads, must be EOF */
787 		fatal("could not read from input file: end of file");
788 	ctx->filePos += 1;
789 	return c;
790 }
791 
792 static void
_WriteBuf(ArchiveHandle * AH,const void * buf,size_t len)793 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
794 {
795 	lclContext *ctx = (lclContext *) AH->formatData;
796 
797 	if (tarWrite(buf, len, ctx->FH) != len)
798 		WRITE_ERROR_EXIT;
799 
800 	ctx->filePos += len;
801 }
802 
803 static void
_ReadBuf(ArchiveHandle * AH,void * buf,size_t len)804 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
805 {
806 	lclContext *ctx = (lclContext *) AH->formatData;
807 
808 	if (tarRead(buf, len, ctx->FH) != len)
809 		/* We already would have exited for errors on reads, must be EOF */
810 		fatal("could not read from input file: end of file");
811 
812 	ctx->filePos += len;
813 }
814 
815 static void
_CloseArchive(ArchiveHandle * AH)816 _CloseArchive(ArchiveHandle *AH)
817 {
818 	lclContext *ctx = (lclContext *) AH->formatData;
819 	TAR_MEMBER *th;
820 	RestoreOptions *ropt;
821 	RestoreOptions *savRopt;
822 	DumpOptions *savDopt;
823 	int			savVerbose,
824 				i;
825 
826 	if (AH->mode == archModeWrite)
827 	{
828 		/*
829 		 * Write the Header & TOC to the archive FIRST
830 		 */
831 		th = tarOpen(AH, "toc.dat", 'w');
832 		ctx->FH = th;
833 		WriteHead(AH);
834 		WriteToc(AH);
835 		tarClose(AH, th);		/* Not needed any more */
836 
837 		/*
838 		 * Now send the data (tables & blobs)
839 		 */
840 		WriteDataChunks(AH, NULL);
841 
842 		/*
843 		 * Now this format wants to append a script which does a full restore
844 		 * if the files have been extracted.
845 		 */
846 		th = tarOpen(AH, "restore.sql", 'w');
847 
848 		tarPrintf(th, "--\n"
849 				  "-- NOTE:\n"
850 				  "--\n"
851 				  "-- File paths need to be edited. Search for $$PATH$$ and\n"
852 				  "-- replace it with the path to the directory containing\n"
853 				  "-- the extracted data files.\n"
854 				  "--\n");
855 
856 		AH->CustomOutPtr = _scriptOut;
857 
858 		ctx->isSpecialScript = 1;
859 		ctx->scriptTH = th;
860 
861 		ropt = NewRestoreOptions();
862 		memcpy(ropt, AH->public.ropt, sizeof(RestoreOptions));
863 		ropt->filename = NULL;
864 		ropt->dropSchema = 1;
865 		ropt->compression = 0;
866 		ropt->superuser = NULL;
867 		ropt->suppressDumpWarnings = true;
868 
869 		savDopt = AH->public.dopt;
870 		savRopt = AH->public.ropt;
871 
872 		SetArchiveOptions((Archive *) AH, NULL, ropt);
873 
874 		savVerbose = AH->public.verbose;
875 		AH->public.verbose = 0;
876 
877 		RestoreArchive((Archive *) AH);
878 
879 		SetArchiveOptions((Archive *) AH, savDopt, savRopt);
880 
881 		AH->public.verbose = savVerbose;
882 
883 		tarClose(AH, th);
884 
885 		ctx->isSpecialScript = 0;
886 
887 		/*
888 		 * EOF marker for tar files is two blocks of NULLs.
889 		 */
890 		for (i = 0; i < TAR_BLOCK_SIZE * 2; i++)
891 		{
892 			if (fputc(0, ctx->tarFH) == EOF)
893 				WRITE_ERROR_EXIT;
894 		}
895 
896 		/* Sync the output file if one is defined */
897 		if (AH->dosync && AH->fSpec)
898 			(void) fsync_fname(AH->fSpec, false);
899 	}
900 
901 	AH->FH = NULL;
902 }
903 
904 static size_t
_scriptOut(ArchiveHandle * AH,const void * buf,size_t len)905 _scriptOut(ArchiveHandle *AH, const void *buf, size_t len)
906 {
907 	lclContext *ctx = (lclContext *) AH->formatData;
908 
909 	return tarWrite(buf, len, ctx->scriptTH);
910 }
911 
912 /*
913  * BLOB support
914  */
915 
916 /*
917  * Called by the archiver when starting to save all BLOB DATA (not schema).
918  * This routine should save whatever format-specific information is needed
919  * to read the BLOBs back into memory.
920  *
921  * It is called just prior to the dumper's DataDumper routine.
922  *
923  * Optional, but strongly recommended.
924  *
925  */
926 static void
_StartBlobs(ArchiveHandle * AH,TocEntry * te)927 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
928 {
929 	lclContext *ctx = (lclContext *) AH->formatData;
930 	char		fname[K_STD_BUF_SIZE];
931 
932 	sprintf(fname, "blobs.toc");
933 	ctx->blobToc = tarOpen(AH, fname, 'w');
934 }
935 
936 /*
937  * Called by the archiver when the dumper calls StartBlob.
938  *
939  * Mandatory.
940  *
941  * Must save the passed OID for retrieval at restore-time.
942  */
943 static void
_StartBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)944 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
945 {
946 	lclContext *ctx = (lclContext *) AH->formatData;
947 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
948 	char		fname[255];
949 	char	   *sfx;
950 
951 	if (oid == 0)
952 		fatal("invalid OID for large object (%u)", oid);
953 
954 	if (AH->compression != 0)
955 		sfx = ".gz";
956 	else
957 		sfx = "";
958 
959 	sprintf(fname, "blob_%u.dat%s", oid, sfx);
960 
961 	tarPrintf(ctx->blobToc, "%u %s\n", oid, fname);
962 
963 	tctx->TH = tarOpen(AH, fname, 'w');
964 }
965 
966 /*
967  * Called by the archiver when the dumper calls EndBlob.
968  *
969  * Optional.
970  *
971  */
972 static void
_EndBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)973 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
974 {
975 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
976 
977 	tarClose(AH, tctx->TH);
978 }
979 
980 /*
981  * Called by the archiver when finishing saving all BLOB DATA.
982  *
983  * Optional.
984  *
985  */
986 static void
_EndBlobs(ArchiveHandle * AH,TocEntry * te)987 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
988 {
989 	lclContext *ctx = (lclContext *) AH->formatData;
990 
991 	/* Write out a fake zero OID to mark end-of-blobs. */
992 	/* WriteInt(AH, 0); */
993 
994 	tarClose(AH, ctx->blobToc);
995 }
996 
997 
998 
999 /*------------
1000  * TAR Support
1001  *------------
1002  */
1003 
1004 static int
tarPrintf(TAR_MEMBER * th,const char * fmt,...)1005 tarPrintf(TAR_MEMBER *th, const char *fmt,...)
1006 {
1007 	int			save_errno = errno;
1008 	char	   *p;
1009 	size_t		len = 128;		/* initial assumption about buffer size */
1010 	size_t		cnt;
1011 
1012 	for (;;)
1013 	{
1014 		va_list		args;
1015 
1016 		/* Allocate work buffer. */
1017 		p = (char *) pg_malloc(len);
1018 
1019 		/* Try to format the data. */
1020 		errno = save_errno;
1021 		va_start(args, fmt);
1022 		cnt = pvsnprintf(p, len, fmt, args);
1023 		va_end(args);
1024 
1025 		if (cnt < len)
1026 			break;				/* success */
1027 
1028 		/* Release buffer and loop around to try again with larger len. */
1029 		free(p);
1030 		len = cnt;
1031 	}
1032 
1033 	cnt = tarWrite(p, cnt, th);
1034 	free(p);
1035 	return (int) cnt;
1036 }
1037 
1038 bool
isValidTarHeader(char * header)1039 isValidTarHeader(char *header)
1040 {
1041 	int			sum;
1042 	int			chk = tarChecksum(header);
1043 
1044 	sum = read_tar_number(&header[148], 8);
1045 
1046 	if (sum != chk)
1047 		return false;
1048 
1049 	/* POSIX tar format */
1050 	if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1051 		memcmp(&header[263], "00", 2) == 0)
1052 		return true;
1053 	/* GNU tar format */
1054 	if (memcmp(&header[257], "ustar  \0", 8) == 0)
1055 		return true;
1056 	/* not-quite-POSIX format written by pre-9.3 pg_dump */
1057 	if (memcmp(&header[257], "ustar00\0", 8) == 0)
1058 		return true;
1059 
1060 	return false;
1061 }
1062 
1063 /* Given the member, write the TAR header & copy the file */
1064 static void
_tarAddFile(ArchiveHandle * AH,TAR_MEMBER * th)1065 _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
1066 {
1067 	lclContext *ctx = (lclContext *) AH->formatData;
1068 	FILE	   *tmp = th->tmpFH;	/* Grab it for convenience */
1069 	char		buf[32768];
1070 	size_t		cnt;
1071 	pgoff_t		len = 0;
1072 	size_t		res;
1073 	size_t		i,
1074 				pad;
1075 
1076 	/*
1077 	 * Find file len & go back to start.
1078 	 */
1079 	if (fseeko(tmp, 0, SEEK_END) != 0)
1080 		fatal("error during file seek: %m");
1081 	th->fileLen = ftello(tmp);
1082 	if (th->fileLen < 0)
1083 		fatal("could not determine seek position in archive file: %m");
1084 	if (fseeko(tmp, 0, SEEK_SET) != 0)
1085 		fatal("error during file seek: %m");
1086 
1087 	_tarWriteHeader(th);
1088 
1089 	while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
1090 	{
1091 		if ((res = fwrite(buf, 1, cnt, th->tarFH)) != cnt)
1092 			WRITE_ERROR_EXIT;
1093 		len += res;
1094 	}
1095 	if (!feof(tmp))
1096 		READ_ERROR_EXIT(tmp);
1097 
1098 	if (fclose(tmp) != 0)		/* This *should* delete it... */
1099 		fatal("could not close temporary file: %m");
1100 
1101 	if (len != th->fileLen)
1102 	{
1103 		char		buf1[32],
1104 					buf2[32];
1105 
1106 		snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) len);
1107 		snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) th->fileLen);
1108 		fatal("actual file length (%s) does not match expected (%s)",
1109 			  buf1, buf2);
1110 	}
1111 
1112 	pad = tarPaddingBytesRequired(len);
1113 	for (i = 0; i < pad; i++)
1114 	{
1115 		if (fputc('\0', th->tarFH) == EOF)
1116 			WRITE_ERROR_EXIT;
1117 	}
1118 
1119 	ctx->tarFHpos += len + pad;
1120 }
1121 
1122 /* Locate the file in the archive, read header and position to data */
1123 static TAR_MEMBER *
_tarPositionTo(ArchiveHandle * AH,const char * filename)1124 _tarPositionTo(ArchiveHandle *AH, const char *filename)
1125 {
1126 	lclContext *ctx = (lclContext *) AH->formatData;
1127 	TAR_MEMBER *th = pg_malloc0(sizeof(TAR_MEMBER));
1128 	char		c;
1129 	char		header[TAR_BLOCK_SIZE];
1130 	size_t		i,
1131 				len,
1132 				blks;
1133 	int			id;
1134 
1135 	th->AH = AH;
1136 
1137 	/* Go to end of current file, if any */
1138 	if (ctx->tarFHpos != 0)
1139 	{
1140 		char		buf1[100],
1141 					buf2[100];
1142 
1143 		snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) ctx->tarFHpos);
1144 		snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) ctx->tarNextMember);
1145 		pg_log_debug("moving from position %s to next member at file position %s",
1146 					 buf1, buf2);
1147 
1148 		while (ctx->tarFHpos < ctx->tarNextMember)
1149 			_tarReadRaw(AH, &c, 1, NULL, ctx->tarFH);
1150 	}
1151 
1152 	{
1153 		char		buf[100];
1154 
1155 		snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ctx->tarFHpos);
1156 		pg_log_debug("now at file position %s", buf);
1157 	}
1158 
1159 	/* We are at the start of the file, or at the next member */
1160 
1161 	/* Get the header */
1162 	if (!_tarGetHeader(AH, th))
1163 	{
1164 		if (filename)
1165 			fatal("could not find header for file \"%s\" in tar archive", filename);
1166 		else
1167 		{
1168 			/*
1169 			 * We're just scanning the archive for the next file, so return
1170 			 * null
1171 			 */
1172 			free(th);
1173 			return NULL;
1174 		}
1175 	}
1176 
1177 	while (filename != NULL && strcmp(th->targetFile, filename) != 0)
1178 	{
1179 		pg_log_debug("skipping tar member %s", th->targetFile);
1180 
1181 		id = atoi(th->targetFile);
1182 		if ((TocIDRequired(AH, id) & REQ_DATA) != 0)
1183 			fatal("restoring data out of order is not supported in this archive format: "
1184 				  "\"%s\" is required, but comes before \"%s\" in the archive file.",
1185 				  th->targetFile, filename);
1186 
1187 		/* Header doesn't match, so read to next header */
1188 		len = th->fileLen;
1189 		len += tarPaddingBytesRequired(th->fileLen);
1190 		blks = len / TAR_BLOCK_SIZE;	/* # of tar blocks */
1191 
1192 		for (i = 0; i < blks; i++)
1193 			_tarReadRaw(AH, &header[0], TAR_BLOCK_SIZE, NULL, ctx->tarFH);
1194 
1195 		if (!_tarGetHeader(AH, th))
1196 			fatal("could not find header for file \"%s\" in tar archive", filename);
1197 	}
1198 
1199 	ctx->tarNextMember = ctx->tarFHpos + th->fileLen
1200 		+ tarPaddingBytesRequired(th->fileLen);
1201 	th->pos = 0;
1202 
1203 	return th;
1204 }
1205 
1206 /* Read & verify a header */
1207 static int
_tarGetHeader(ArchiveHandle * AH,TAR_MEMBER * th)1208 _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
1209 {
1210 	lclContext *ctx = (lclContext *) AH->formatData;
1211 	char		h[TAR_BLOCK_SIZE];
1212 	char		tag[100 + 1];
1213 	int			sum,
1214 				chk;
1215 	pgoff_t		len;
1216 	pgoff_t		hPos;
1217 	bool		gotBlock = false;
1218 
1219 	while (!gotBlock)
1220 	{
1221 		/* Save the pos for reporting purposes */
1222 		hPos = ctx->tarFHpos;
1223 
1224 		/* Read the next tar block, return EOF, exit if short */
1225 		len = _tarReadRaw(AH, h, TAR_BLOCK_SIZE, NULL, ctx->tarFH);
1226 		if (len == 0)			/* EOF */
1227 			return 0;
1228 
1229 		if (len != TAR_BLOCK_SIZE)
1230 			fatal(ngettext("incomplete tar header found (%lu byte)",
1231 						   "incomplete tar header found (%lu bytes)",
1232 						   len),
1233 				  (unsigned long) len);
1234 
1235 		/* Calc checksum */
1236 		chk = tarChecksum(h);
1237 		sum = read_tar_number(&h[148], 8);
1238 
1239 		/*
1240 		 * If the checksum failed, see if it is a null block. If so, silently
1241 		 * continue to the next block.
1242 		 */
1243 		if (chk == sum)
1244 			gotBlock = true;
1245 		else
1246 		{
1247 			int			i;
1248 
1249 			for (i = 0; i < TAR_BLOCK_SIZE; i++)
1250 			{
1251 				if (h[i] != 0)
1252 				{
1253 					gotBlock = true;
1254 					break;
1255 				}
1256 			}
1257 		}
1258 	}
1259 
1260 	/* Name field is 100 bytes, might not be null-terminated */
1261 	strlcpy(tag, &h[0], 100 + 1);
1262 
1263 	len = read_tar_number(&h[124], 12);
1264 
1265 	{
1266 		char		posbuf[32];
1267 		char		lenbuf[32];
1268 
1269 		snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
1270 		snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
1271 		pg_log_debug("TOC Entry %s at %s (length %s, checksum %d)",
1272 					 tag, posbuf, lenbuf, sum);
1273 	}
1274 
1275 	if (chk != sum)
1276 	{
1277 		char		posbuf[32];
1278 
1279 		snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
1280 				 (uint64) ftello(ctx->tarFH));
1281 		fatal("corrupt tar header found in %s (expected %d, computed %d) file position %s",
1282 			  tag, sum, chk, posbuf);
1283 	}
1284 
1285 	th->targetFile = pg_strdup(tag);
1286 	th->fileLen = len;
1287 
1288 	return 1;
1289 }
1290 
1291 
1292 static void
_tarWriteHeader(TAR_MEMBER * th)1293 _tarWriteHeader(TAR_MEMBER *th)
1294 {
1295 	char		h[TAR_BLOCK_SIZE];
1296 
1297 	tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
1298 					0600, 04000, 02000, time(NULL));
1299 
1300 	/* Now write the completed header. */
1301 	if (fwrite(h, 1, TAR_BLOCK_SIZE, th->tarFH) != TAR_BLOCK_SIZE)
1302 		WRITE_ERROR_EXIT;
1303 }
1304