1 /*-------------------------------------------------------------------------
2  *
3  * pg_backup_tar.c
4  *
5  *	This file is copied from the 'files' format file, but dumps data into
6  *	one temp file then sends it to the output TAR archive.
7  *
8  *	The tar format also includes a 'restore.sql' script which is there for
9  *	the benefit of humans. This script is never used by pg_restore.
10  *
11  *	NOTE: If you untar the created 'tar' file, the resulting files are
12  *	compatible with the 'directory' format. Please keep the two formats in
13  *	sync.
14  *
15  *	See the headers to pg_backup_directory & pg_restore for more details.
16  *
17  * Copyright (c) 2000, Philip Warner
18  *		Rights are granted to use this software in any way so long
19  *		as this notice is not removed.
20  *
21  *	The author is not responsible for loss or damages that may
22  *	result from it's use.
23  *
24  *
25  * IDENTIFICATION
26  *		src/bin/pg_dump/pg_backup_tar.c
27  *
28  *-------------------------------------------------------------------------
29  */
30 #include "postgres_fe.h"
31 
32 #include "pg_backup_archiver.h"
33 #include "pg_backup_tar.h"
34 #include "pg_backup_utils.h"
35 #include "pgtar.h"
36 #include "common/file_utils.h"
37 #include "fe_utils/string_utils.h"
38 
39 #include <sys/stat.h>
40 #include <ctype.h>
41 #include <limits.h>
42 #include <unistd.h>
43 
44 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
45 static void _StartData(ArchiveHandle *AH, TocEntry *te);
46 static void _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
47 static void _EndData(ArchiveHandle *AH, TocEntry *te);
48 static int	_WriteByte(ArchiveHandle *AH, const int i);
49 static int	_ReadByte(ArchiveHandle *);
50 static void _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
51 static void _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
52 static void _CloseArchive(ArchiveHandle *AH);
53 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te);
54 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
55 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
56 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
57 
58 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
59 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
60 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
61 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
62 
63 #define K_STD_BUF_SIZE 1024
64 
65 
66 typedef struct
67 {
68 #ifdef HAVE_LIBZ
69 	gzFile		zFH;
70 #else
71 	FILE	   *zFH;
72 #endif
73 	FILE	   *nFH;
74 	FILE	   *tarFH;
75 	FILE	   *tmpFH;
76 	char	   *targetFile;
77 	char		mode;
78 	pgoff_t		pos;
79 	pgoff_t		fileLen;
80 	ArchiveHandle *AH;
81 } TAR_MEMBER;
82 
83 typedef struct
84 {
85 	int			hasSeek;
86 	pgoff_t		filePos;
87 	TAR_MEMBER *blobToc;
88 	FILE	   *tarFH;
89 	pgoff_t		tarFHpos;
90 	pgoff_t		tarNextMember;
91 	TAR_MEMBER *FH;
92 	int			isSpecialScript;
93 	TAR_MEMBER *scriptTH;
94 } lclContext;
95 
96 typedef struct
97 {
98 	TAR_MEMBER *TH;
99 	char	   *filename;
100 } lclTocEntry;
101 
102 /* translator: this is a module name */
103 static const char *modulename = gettext_noop("tar archiver");
104 
105 static void _LoadBlobs(ArchiveHandle *AH);
106 
107 static TAR_MEMBER *tarOpen(ArchiveHandle *AH, const char *filename, char mode);
108 static void tarClose(ArchiveHandle *AH, TAR_MEMBER *TH);
109 
110 #ifdef __NOT_USED__
111 static char *tarGets(char *buf, size_t len, TAR_MEMBER *th);
112 #endif
113 static int	tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...) pg_attribute_printf(3, 4);
114 
115 static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th);
116 static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename);
117 static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th);
118 static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th);
119 static void _tarWriteHeader(TAR_MEMBER *th);
120 static int	_tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th);
121 static size_t _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh);
122 
123 static size_t _scriptOut(ArchiveHandle *AH, const void *buf, size_t len);
124 
125 /*
126  *	Initializer
127  */
128 void
InitArchiveFmt_Tar(ArchiveHandle * AH)129 InitArchiveFmt_Tar(ArchiveHandle *AH)
130 {
131 	lclContext *ctx;
132 
133 	/* Assuming static functions, this can be copied for each format. */
134 	AH->ArchiveEntryPtr = _ArchiveEntry;
135 	AH->StartDataPtr = _StartData;
136 	AH->WriteDataPtr = _WriteData;
137 	AH->EndDataPtr = _EndData;
138 	AH->WriteBytePtr = _WriteByte;
139 	AH->ReadBytePtr = _ReadByte;
140 	AH->WriteBufPtr = _WriteBuf;
141 	AH->ReadBufPtr = _ReadBuf;
142 	AH->ClosePtr = _CloseArchive;
143 	AH->ReopenPtr = NULL;
144 	AH->PrintTocDataPtr = _PrintTocData;
145 	AH->ReadExtraTocPtr = _ReadExtraToc;
146 	AH->WriteExtraTocPtr = _WriteExtraToc;
147 	AH->PrintExtraTocPtr = _PrintExtraToc;
148 
149 	AH->StartBlobsPtr = _StartBlobs;
150 	AH->StartBlobPtr = _StartBlob;
151 	AH->EndBlobPtr = _EndBlob;
152 	AH->EndBlobsPtr = _EndBlobs;
153 	AH->ClonePtr = NULL;
154 	AH->DeClonePtr = NULL;
155 
156 	AH->WorkerJobDumpPtr = NULL;
157 	AH->WorkerJobRestorePtr = NULL;
158 
159 	/*
160 	 * Set up some special context used in compressing data.
161 	 */
162 	ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
163 	AH->formatData = (void *) ctx;
164 	ctx->filePos = 0;
165 	ctx->isSpecialScript = 0;
166 
167 	/* Initialize LO buffering */
168 	AH->lo_buf_size = LOBBUFSIZE;
169 	AH->lo_buf = (void *) pg_malloc(LOBBUFSIZE);
170 
171 	/*
172 	 * Now open the tar file, and load the TOC if we're in read mode.
173 	 */
174 	if (AH->mode == archModeWrite)
175 	{
176 		if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
177 		{
178 			ctx->tarFH = fopen(AH->fSpec, PG_BINARY_W);
179 			if (ctx->tarFH == NULL)
180 				exit_horribly(modulename,
181 							  "could not open TOC file \"%s\" for output: %s\n",
182 							  AH->fSpec, strerror(errno));
183 		}
184 		else
185 		{
186 			ctx->tarFH = stdout;
187 			if (ctx->tarFH == NULL)
188 				exit_horribly(modulename,
189 							  "could not open TOC file for output: %s\n",
190 							  strerror(errno));
191 		}
192 
193 		ctx->tarFHpos = 0;
194 
195 		/*
196 		 * Make unbuffered since we will dup() it, and the buffers screw each
197 		 * other
198 		 */
199 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
200 
201 		ctx->hasSeek = checkSeek(ctx->tarFH);
202 
203 		/*
204 		 * We don't support compression because reading the files back is not
205 		 * possible since gzdopen uses buffered IO which totally screws file
206 		 * positioning.
207 		 */
208 		if (AH->compression != 0)
209 			exit_horribly(modulename,
210 						  "compression is not supported by tar archive format\n");
211 	}
212 	else
213 	{							/* Read Mode */
214 		if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
215 		{
216 			ctx->tarFH = fopen(AH->fSpec, PG_BINARY_R);
217 			if (ctx->tarFH == NULL)
218 				exit_horribly(modulename, "could not open TOC file \"%s\" for input: %s\n",
219 							  AH->fSpec, strerror(errno));
220 		}
221 		else
222 		{
223 			ctx->tarFH = stdin;
224 			if (ctx->tarFH == NULL)
225 				exit_horribly(modulename, "could not open TOC file for input: %s\n",
226 							  strerror(errno));
227 		}
228 
229 		/*
230 		 * Make unbuffered since we will dup() it, and the buffers screw each
231 		 * other
232 		 */
233 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
234 
235 		ctx->tarFHpos = 0;
236 
237 		ctx->hasSeek = checkSeek(ctx->tarFH);
238 
239 		ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
240 		ReadHead(AH);
241 		ReadToc(AH);
242 		tarClose(AH, ctx->FH);	/* Nothing else in the file... */
243 	}
244 }
245 
246 /*
247  * - Start a new TOC entry
248  *	 Setup the output file name.
249  */
250 static void
_ArchiveEntry(ArchiveHandle * AH,TocEntry * te)251 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
252 {
253 	lclTocEntry *ctx;
254 	char		fn[K_STD_BUF_SIZE];
255 
256 	ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
257 	if (te->dataDumper != NULL)
258 	{
259 #ifdef HAVE_LIBZ
260 		if (AH->compression == 0)
261 			sprintf(fn, "%d.dat", te->dumpId);
262 		else
263 			sprintf(fn, "%d.dat.gz", te->dumpId);
264 #else
265 		sprintf(fn, "%d.dat", te->dumpId);
266 #endif
267 		ctx->filename = pg_strdup(fn);
268 	}
269 	else
270 	{
271 		ctx->filename = NULL;
272 		ctx->TH = NULL;
273 	}
274 	te->formatData = (void *) ctx;
275 }
276 
277 static void
_WriteExtraToc(ArchiveHandle * AH,TocEntry * te)278 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
279 {
280 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
281 
282 	if (ctx->filename)
283 		WriteStr(AH, ctx->filename);
284 	else
285 		WriteStr(AH, "");
286 }
287 
288 static void
_ReadExtraToc(ArchiveHandle * AH,TocEntry * te)289 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
290 {
291 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
292 
293 	if (ctx == NULL)
294 	{
295 		ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
296 		te->formatData = (void *) ctx;
297 	}
298 
299 	ctx->filename = ReadStr(AH);
300 	if (strlen(ctx->filename) == 0)
301 	{
302 		free(ctx->filename);
303 		ctx->filename = NULL;
304 	}
305 	ctx->TH = NULL;
306 }
307 
308 static void
_PrintExtraToc(ArchiveHandle * AH,TocEntry * te)309 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
310 {
311 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
312 
313 	if (AH->public.verbose && ctx->filename != NULL)
314 		ahprintf(AH, "-- File: %s\n", ctx->filename);
315 }
316 
317 static void
_StartData(ArchiveHandle * AH,TocEntry * te)318 _StartData(ArchiveHandle *AH, TocEntry *te)
319 {
320 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
321 
322 	tctx->TH = tarOpen(AH, tctx->filename, 'w');
323 }
324 
325 static TAR_MEMBER *
tarOpen(ArchiveHandle * AH,const char * filename,char mode)326 tarOpen(ArchiveHandle *AH, const char *filename, char mode)
327 {
328 	lclContext *ctx = (lclContext *) AH->formatData;
329 	TAR_MEMBER *tm;
330 
331 #ifdef HAVE_LIBZ
332 	char		fmode[10];
333 #endif
334 
335 	if (mode == 'r')
336 	{
337 		tm = _tarPositionTo(AH, filename);
338 		if (!tm)				/* Not found */
339 		{
340 			if (filename)
341 			{
342 				/*
343 				 * Couldn't find the requested file. Future: do SEEK(0) and
344 				 * retry.
345 				 */
346 				exit_horribly(modulename, "could not find file \"%s\" in archive\n", filename);
347 			}
348 			else
349 			{
350 				/* Any file OK, none left, so return NULL */
351 				return NULL;
352 			}
353 		}
354 
355 #ifdef HAVE_LIBZ
356 
357 		if (AH->compression == 0)
358 			tm->nFH = ctx->tarFH;
359 		else
360 			exit_horribly(modulename, "compression is not supported by tar archive format\n");
361 		/* tm->zFH = gzdopen(dup(fileno(ctx->tarFH)), "rb"); */
362 #else
363 		tm->nFH = ctx->tarFH;
364 #endif
365 	}
366 	else
367 	{
368 		int			old_umask;
369 
370 		tm = pg_malloc0(sizeof(TAR_MEMBER));
371 
372 		/*
373 		 * POSIX does not require, but permits, tmpfile() to restrict file
374 		 * permissions.  Given an OS crash after we write data, the filesystem
375 		 * might retain the data but forget tmpfile()'s unlink().  If so, the
376 		 * file mode protects confidentiality of the data written.
377 		 */
378 		old_umask = umask(S_IRWXG | S_IRWXO);
379 
380 #ifndef WIN32
381 		tm->tmpFH = tmpfile();
382 #else
383 
384 		/*
385 		 * On WIN32, tmpfile() generates a filename in the root directory,
386 		 * which requires administrative permissions on certain systems. Loop
387 		 * until we find a unique file name we can create.
388 		 */
389 		while (1)
390 		{
391 			char	   *name;
392 			int			fd;
393 
394 			name = _tempnam(NULL, "pg_temp_");
395 			if (name == NULL)
396 				break;
397 			fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY |
398 					  O_TEMPORARY, S_IRUSR | S_IWUSR);
399 			free(name);
400 
401 			if (fd != -1)		/* created a file */
402 			{
403 				tm->tmpFH = fdopen(fd, "w+b");
404 				break;
405 			}
406 			else if (errno != EEXIST)	/* failure other than file exists */
407 				break;
408 		}
409 #endif
410 
411 		if (tm->tmpFH == NULL)
412 			exit_horribly(modulename, "could not generate temporary file name: %s\n", strerror(errno));
413 
414 		umask(old_umask);
415 
416 #ifdef HAVE_LIBZ
417 
418 		if (AH->compression != 0)
419 		{
420 			sprintf(fmode, "wb%d", AH->compression);
421 			tm->zFH = gzdopen(dup(fileno(tm->tmpFH)), fmode);
422 			if (tm->zFH == NULL)
423 				exit_horribly(modulename, "could not open temporary file\n");
424 		}
425 		else
426 			tm->nFH = tm->tmpFH;
427 #else
428 
429 		tm->nFH = tm->tmpFH;
430 #endif
431 
432 		tm->AH = AH;
433 		tm->targetFile = pg_strdup(filename);
434 	}
435 
436 	tm->mode = mode;
437 	tm->tarFH = ctx->tarFH;
438 
439 	return tm;
440 }
441 
442 static void
tarClose(ArchiveHandle * AH,TAR_MEMBER * th)443 tarClose(ArchiveHandle *AH, TAR_MEMBER *th)
444 {
445 	/*
446 	 * Close the GZ file since we dup'd. This will flush the buffers.
447 	 */
448 	if (AH->compression != 0)
449 		if (GZCLOSE(th->zFH) != 0)
450 			exit_horribly(modulename, "could not close tar member\n");
451 
452 	if (th->mode == 'w')
453 		_tarAddFile(AH, th);	/* This will close the temp file */
454 
455 	/*
456 	 * else Nothing to do for normal read since we don't dup() normal file
457 	 * handle, and we don't use temp files.
458 	 */
459 
460 	if (th->targetFile)
461 		free(th->targetFile);
462 
463 	th->nFH = NULL;
464 	th->zFH = NULL;
465 }
466 
467 #ifdef __NOT_USED__
468 static char *
tarGets(char * buf,size_t len,TAR_MEMBER * th)469 tarGets(char *buf, size_t len, TAR_MEMBER *th)
470 {
471 	char	   *s;
472 	size_t		cnt = 0;
473 	char		c = ' ';
474 	int			eof = 0;
475 
476 	/* Can't read past logical EOF */
477 	if (len > (th->fileLen - th->pos))
478 		len = th->fileLen - th->pos;
479 
480 	while (cnt < len && c != '\n')
481 	{
482 		if (_tarReadRaw(th->AH, &c, 1, th, NULL) <= 0)
483 		{
484 			eof = 1;
485 			break;
486 		}
487 		buf[cnt++] = c;
488 	}
489 
490 	if (eof && cnt == 0)
491 		s = NULL;
492 	else
493 	{
494 		buf[cnt++] = '\0';
495 		s = buf;
496 	}
497 
498 	if (s)
499 	{
500 		len = strlen(s);
501 		th->pos += len;
502 	}
503 
504 	return s;
505 }
506 #endif
507 
508 /*
509  * Just read bytes from the archive. This is the low level read routine
510  * that is used for ALL reads on a tar file.
511  */
512 static size_t
_tarReadRaw(ArchiveHandle * AH,void * buf,size_t len,TAR_MEMBER * th,FILE * fh)513 _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh)
514 {
515 	lclContext *ctx = (lclContext *) AH->formatData;
516 	size_t		avail;
517 	size_t		used = 0;
518 	size_t		res = 0;
519 
520 	avail = AH->lookaheadLen - AH->lookaheadPos;
521 	if (avail > 0)
522 	{
523 		/* We have some lookahead bytes to use */
524 		if (avail >= len)		/* Just use the lookahead buffer */
525 			used = len;
526 		else
527 			used = avail;
528 
529 		/* Copy, and adjust buffer pos */
530 		memcpy(buf, AH->lookahead + AH->lookaheadPos, used);
531 		AH->lookaheadPos += used;
532 
533 		/* Adjust required length */
534 		len -= used;
535 	}
536 
537 	/* Read the file if len > 0 */
538 	if (len > 0)
539 	{
540 		if (fh)
541 		{
542 			res = fread(&((char *) buf)[used], 1, len, fh);
543 			if (res != len && !feof(fh))
544 				READ_ERROR_EXIT(fh);
545 		}
546 		else if (th)
547 		{
548 			if (th->zFH)
549 			{
550 				res = GZREAD(&((char *) buf)[used], 1, len, th->zFH);
551 				if (res != len && !GZEOF(th->zFH))
552 				{
553 #ifdef HAVE_LIBZ
554 					int			errnum;
555 					const char *errmsg = gzerror(th->zFH, &errnum);
556 
557 					exit_horribly(modulename,
558 								  "could not read from input file: %s\n",
559 								  errnum == Z_ERRNO ? strerror(errno) : errmsg);
560 #else
561 					exit_horribly(modulename,
562 								  "could not read from input file: %s\n",
563 								  strerror(errno));
564 #endif
565 				}
566 			}
567 			else
568 			{
569 				res = fread(&((char *) buf)[used], 1, len, th->nFH);
570 				if (res != len && !feof(th->nFH))
571 					READ_ERROR_EXIT(th->nFH);
572 			}
573 		}
574 		else
575 			exit_horribly(modulename, "internal error -- neither th nor fh specified in tarReadRaw()\n");
576 	}
577 
578 	ctx->tarFHpos += res + used;
579 
580 	return (res + used);
581 }
582 
583 static size_t
tarRead(void * buf,size_t len,TAR_MEMBER * th)584 tarRead(void *buf, size_t len, TAR_MEMBER *th)
585 {
586 	size_t		res;
587 
588 	if (th->pos + len > th->fileLen)
589 		len = th->fileLen - th->pos;
590 
591 	if (len <= 0)
592 		return 0;
593 
594 	res = _tarReadRaw(th->AH, buf, len, th, NULL);
595 
596 	th->pos += res;
597 
598 	return res;
599 }
600 
601 static size_t
tarWrite(const void * buf,size_t len,TAR_MEMBER * th)602 tarWrite(const void *buf, size_t len, TAR_MEMBER *th)
603 {
604 	size_t		res;
605 
606 	if (th->zFH != NULL)
607 		res = GZWRITE(buf, 1, len, th->zFH);
608 	else
609 		res = fwrite(buf, 1, len, th->nFH);
610 
611 	th->pos += res;
612 	return res;
613 }
614 
615 static void
_WriteData(ArchiveHandle * AH,const void * data,size_t dLen)616 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
617 {
618 	lclTocEntry *tctx = (lclTocEntry *) AH->currToc->formatData;
619 
620 	if (tarWrite(data, dLen, tctx->TH) != dLen)
621 		WRITE_ERROR_EXIT;
622 
623 	return;
624 }
625 
626 static void
_EndData(ArchiveHandle * AH,TocEntry * te)627 _EndData(ArchiveHandle *AH, TocEntry *te)
628 {
629 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
630 
631 	/* Close the file */
632 	tarClose(AH, tctx->TH);
633 	tctx->TH = NULL;
634 }
635 
636 /*
637  * Print data for a given file
638  */
639 static void
_PrintFileData(ArchiveHandle * AH,char * filename)640 _PrintFileData(ArchiveHandle *AH, char *filename)
641 {
642 	lclContext *ctx = (lclContext *) AH->formatData;
643 	char		buf[4096];
644 	size_t		cnt;
645 	TAR_MEMBER *th;
646 
647 	if (!filename)
648 		return;
649 
650 	th = tarOpen(AH, filename, 'r');
651 	ctx->FH = th;
652 
653 	while ((cnt = tarRead(buf, 4095, th)) > 0)
654 	{
655 		buf[cnt] = '\0';
656 		ahwrite(buf, 1, cnt, AH);
657 	}
658 
659 	tarClose(AH, th);
660 }
661 
662 
663 /*
664  * Print data for a given TOC entry
665 */
666 static void
_PrintTocData(ArchiveHandle * AH,TocEntry * te)667 _PrintTocData(ArchiveHandle *AH, TocEntry *te)
668 {
669 	lclContext *ctx = (lclContext *) AH->formatData;
670 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
671 	int			pos1;
672 
673 	if (!tctx->filename)
674 		return;
675 
676 	/*
677 	 * If we're writing the special restore.sql script, emit a suitable
678 	 * command to include each table's data from the corresponding file.
679 	 *
680 	 * In the COPY case this is a bit klugy because the regular COPY command
681 	 * was already printed before we get control.
682 	 */
683 	if (ctx->isSpecialScript)
684 	{
685 		if (te->copyStmt)
686 		{
687 			/* Abort the COPY FROM stdin */
688 			ahprintf(AH, "\\.\n");
689 
690 			/*
691 			 * The COPY statement should look like "COPY ... FROM stdin;\n",
692 			 * see dumpTableData().
693 			 */
694 			pos1 = (int) strlen(te->copyStmt) - 13;
695 			if (pos1 < 6 || strncmp(te->copyStmt, "COPY ", 5) != 0 ||
696 				strcmp(te->copyStmt + pos1, " FROM stdin;\n") != 0)
697 				exit_horribly(modulename,
698 							  "unexpected COPY statement syntax: \"%s\"\n",
699 							  te->copyStmt);
700 
701 			/* Emit all but the FROM part ... */
702 			ahwrite(te->copyStmt, 1, pos1, AH);
703 			/* ... and insert modified FROM */
704 			ahprintf(AH, " FROM '$$PATH$$/%s';\n\n", tctx->filename);
705 		}
706 		else
707 		{
708 			/* --inserts mode, no worries, just include the data file */
709 			ahprintf(AH, "\\i $$PATH$$/%s\n\n", tctx->filename);
710 		}
711 
712 		return;
713 	}
714 
715 	if (strcmp(te->desc, "BLOBS") == 0)
716 		_LoadBlobs(AH);
717 	else
718 		_PrintFileData(AH, tctx->filename);
719 }
720 
721 static void
_LoadBlobs(ArchiveHandle * AH)722 _LoadBlobs(ArchiveHandle *AH)
723 {
724 	Oid			oid;
725 	lclContext *ctx = (lclContext *) AH->formatData;
726 	TAR_MEMBER *th;
727 	size_t		cnt;
728 	bool		foundBlob = false;
729 	char		buf[4096];
730 
731 	StartRestoreBlobs(AH);
732 
733 	th = tarOpen(AH, NULL, 'r');	/* Open next file */
734 	while (th != NULL)
735 	{
736 		ctx->FH = th;
737 
738 		if (strncmp(th->targetFile, "blob_", 5) == 0)
739 		{
740 			oid = atooid(&th->targetFile[5]);
741 			if (oid != 0)
742 			{
743 				ahlog(AH, 1, "restoring large object with OID %u\n", oid);
744 
745 				StartRestoreBlob(AH, oid, AH->public.ropt->dropSchema);
746 
747 				while ((cnt = tarRead(buf, 4095, th)) > 0)
748 				{
749 					buf[cnt] = '\0';
750 					ahwrite(buf, 1, cnt, AH);
751 				}
752 				EndRestoreBlob(AH, oid);
753 				foundBlob = true;
754 			}
755 			tarClose(AH, th);
756 		}
757 		else
758 		{
759 			tarClose(AH, th);
760 
761 			/*
762 			 * Once we have found the first blob, stop at the first non-blob
763 			 * entry (which will be 'blobs.toc').  This coding would eat all
764 			 * the rest of the archive if there are no blobs ... but this
765 			 * function shouldn't be called at all in that case.
766 			 */
767 			if (foundBlob)
768 				break;
769 		}
770 
771 		th = tarOpen(AH, NULL, 'r');
772 	}
773 	EndRestoreBlobs(AH);
774 }
775 
776 
777 static int
_WriteByte(ArchiveHandle * AH,const int i)778 _WriteByte(ArchiveHandle *AH, const int i)
779 {
780 	lclContext *ctx = (lclContext *) AH->formatData;
781 	char		b = i;			/* Avoid endian problems */
782 
783 	if (tarWrite(&b, 1, ctx->FH) != 1)
784 		WRITE_ERROR_EXIT;
785 
786 	ctx->filePos += 1;
787 	return 1;
788 }
789 
790 static int
_ReadByte(ArchiveHandle * AH)791 _ReadByte(ArchiveHandle *AH)
792 {
793 	lclContext *ctx = (lclContext *) AH->formatData;
794 	size_t		res;
795 	unsigned char c;
796 
797 	res = tarRead(&c, 1, ctx->FH);
798 	if (res != 1)
799 		/* We already would have exited for errors on reads, must be EOF */
800 		exit_horribly(modulename,
801 					  "could not read from input file: end of file\n");
802 	ctx->filePos += 1;
803 	return c;
804 }
805 
806 static void
_WriteBuf(ArchiveHandle * AH,const void * buf,size_t len)807 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
808 {
809 	lclContext *ctx = (lclContext *) AH->formatData;
810 
811 	if (tarWrite(buf, len, ctx->FH) != len)
812 		WRITE_ERROR_EXIT;
813 
814 	ctx->filePos += len;
815 }
816 
817 static void
_ReadBuf(ArchiveHandle * AH,void * buf,size_t len)818 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
819 {
820 	lclContext *ctx = (lclContext *) AH->formatData;
821 
822 	if (tarRead(buf, len, ctx->FH) != len)
823 		/* We already would have exited for errors on reads, must be EOF */
824 		exit_horribly(modulename,
825 					  "could not read from input file: end of file\n");
826 
827 	ctx->filePos += len;
828 	return;
829 }
830 
831 static void
_CloseArchive(ArchiveHandle * AH)832 _CloseArchive(ArchiveHandle *AH)
833 {
834 	lclContext *ctx = (lclContext *) AH->formatData;
835 	TAR_MEMBER *th;
836 	RestoreOptions *ropt;
837 	RestoreOptions *savRopt;
838 	DumpOptions *savDopt;
839 	int			savVerbose,
840 				i;
841 
842 	if (AH->mode == archModeWrite)
843 	{
844 		/*
845 		 * Write the Header & TOC to the archive FIRST
846 		 */
847 		th = tarOpen(AH, "toc.dat", 'w');
848 		ctx->FH = th;
849 		WriteHead(AH);
850 		WriteToc(AH);
851 		tarClose(AH, th);		/* Not needed any more */
852 
853 		/*
854 		 * Now send the data (tables & blobs)
855 		 */
856 		WriteDataChunks(AH, NULL);
857 
858 		/*
859 		 * Now this format wants to append a script which does a full restore
860 		 * if the files have been extracted.
861 		 */
862 		th = tarOpen(AH, "restore.sql", 'w');
863 
864 		tarPrintf(AH, th, "--\n"
865 				  "-- NOTE:\n"
866 				  "--\n"
867 				  "-- File paths need to be edited. Search for $$PATH$$ and\n"
868 				  "-- replace it with the path to the directory containing\n"
869 				  "-- the extracted data files.\n"
870 				  "--\n");
871 
872 		AH->CustomOutPtr = _scriptOut;
873 
874 		ctx->isSpecialScript = 1;
875 		ctx->scriptTH = th;
876 
877 		ropt = NewRestoreOptions();
878 		memcpy(ropt, AH->public.ropt, sizeof(RestoreOptions));
879 		ropt->filename = NULL;
880 		ropt->dropSchema = 1;
881 		ropt->compression = 0;
882 		ropt->superuser = NULL;
883 		ropt->suppressDumpWarnings = true;
884 
885 		savDopt = AH->public.dopt;
886 		savRopt = AH->public.ropt;
887 
888 		SetArchiveOptions((Archive *) AH, NULL, ropt);
889 
890 		savVerbose = AH->public.verbose;
891 		AH->public.verbose = 0;
892 
893 		RestoreArchive((Archive *) AH);
894 
895 		SetArchiveOptions((Archive *) AH, savDopt, savRopt);
896 
897 		AH->public.verbose = savVerbose;
898 
899 		tarClose(AH, th);
900 
901 		ctx->isSpecialScript = 0;
902 
903 		/*
904 		 * EOF marker for tar files is two blocks of NULLs.
905 		 */
906 		for (i = 0; i < 512 * 2; i++)
907 		{
908 			if (fputc(0, ctx->tarFH) == EOF)
909 				WRITE_ERROR_EXIT;
910 		}
911 
912 		/* Sync the output file if one is defined */
913 		if (AH->dosync && AH->fSpec)
914 			(void) fsync_fname(AH->fSpec, false, progname);
915 	}
916 
917 	AH->FH = NULL;
918 }
919 
920 static size_t
_scriptOut(ArchiveHandle * AH,const void * buf,size_t len)921 _scriptOut(ArchiveHandle *AH, const void *buf, size_t len)
922 {
923 	lclContext *ctx = (lclContext *) AH->formatData;
924 
925 	return tarWrite(buf, len, ctx->scriptTH);
926 }
927 
928 /*
929  * BLOB support
930  */
931 
932 /*
933  * Called by the archiver when starting to save all BLOB DATA (not schema).
934  * This routine should save whatever format-specific information is needed
935  * to read the BLOBs back into memory.
936  *
937  * It is called just prior to the dumper's DataDumper routine.
938  *
939  * Optional, but strongly recommended.
940  *
941  */
942 static void
_StartBlobs(ArchiveHandle * AH,TocEntry * te)943 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
944 {
945 	lclContext *ctx = (lclContext *) AH->formatData;
946 	char		fname[K_STD_BUF_SIZE];
947 
948 	sprintf(fname, "blobs.toc");
949 	ctx->blobToc = tarOpen(AH, fname, 'w');
950 }
951 
952 /*
953  * Called by the archiver when the dumper calls StartBlob.
954  *
955  * Mandatory.
956  *
957  * Must save the passed OID for retrieval at restore-time.
958  */
959 static void
_StartBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)960 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
961 {
962 	lclContext *ctx = (lclContext *) AH->formatData;
963 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
964 	char		fname[255];
965 	char	   *sfx;
966 
967 	if (oid == 0)
968 		exit_horribly(modulename, "invalid OID for large object (%u)\n", oid);
969 
970 	if (AH->compression != 0)
971 		sfx = ".gz";
972 	else
973 		sfx = "";
974 
975 	sprintf(fname, "blob_%u.dat%s", oid, sfx);
976 
977 	tarPrintf(AH, ctx->blobToc, "%u %s\n", oid, fname);
978 
979 	tctx->TH = tarOpen(AH, fname, 'w');
980 }
981 
982 /*
983  * Called by the archiver when the dumper calls EndBlob.
984  *
985  * Optional.
986  *
987  */
988 static void
_EndBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)989 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
990 {
991 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
992 
993 	tarClose(AH, tctx->TH);
994 }
995 
996 /*
997  * Called by the archiver when finishing saving all BLOB DATA.
998  *
999  * Optional.
1000  *
1001  */
1002 static void
_EndBlobs(ArchiveHandle * AH,TocEntry * te)1003 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
1004 {
1005 	lclContext *ctx = (lclContext *) AH->formatData;
1006 
1007 	/* Write out a fake zero OID to mark end-of-blobs. */
1008 	/* WriteInt(AH, 0); */
1009 
1010 	tarClose(AH, ctx->blobToc);
1011 }
1012 
1013 
1014 
1015 /*------------
1016  * TAR Support
1017  *------------
1018  */
1019 
1020 static int
tarPrintf(ArchiveHandle * AH,TAR_MEMBER * th,const char * fmt,...)1021 tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...)
1022 {
1023 	char	   *p;
1024 	size_t		len = 128;		/* initial assumption about buffer size */
1025 	size_t		cnt;
1026 
1027 	for (;;)
1028 	{
1029 		va_list		args;
1030 
1031 		/* Allocate work buffer. */
1032 		p = (char *) pg_malloc(len);
1033 
1034 		/* Try to format the data. */
1035 		va_start(args, fmt);
1036 		cnt = pvsnprintf(p, len, fmt, args);
1037 		va_end(args);
1038 
1039 		if (cnt < len)
1040 			break;				/* success */
1041 
1042 		/* Release buffer and loop around to try again with larger len. */
1043 		free(p);
1044 		len = cnt;
1045 	}
1046 
1047 	cnt = tarWrite(p, cnt, th);
1048 	free(p);
1049 	return (int) cnt;
1050 }
1051 
1052 bool
isValidTarHeader(char * header)1053 isValidTarHeader(char *header)
1054 {
1055 	int			sum;
1056 	int			chk = tarChecksum(header);
1057 
1058 	sum = read_tar_number(&header[148], 8);
1059 
1060 	if (sum != chk)
1061 		return false;
1062 
1063 	/* POSIX tar format */
1064 	if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1065 		memcmp(&header[263], "00", 2) == 0)
1066 		return true;
1067 	/* GNU tar format */
1068 	if (memcmp(&header[257], "ustar  \0", 8) == 0)
1069 		return true;
1070 	/* not-quite-POSIX format written by pre-9.3 pg_dump */
1071 	if (memcmp(&header[257], "ustar00\0", 8) == 0)
1072 		return true;
1073 
1074 	return false;
1075 }
1076 
1077 /* Given the member, write the TAR header & copy the file */
1078 static void
_tarAddFile(ArchiveHandle * AH,TAR_MEMBER * th)1079 _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
1080 {
1081 	lclContext *ctx = (lclContext *) AH->formatData;
1082 	FILE	   *tmp = th->tmpFH;	/* Grab it for convenience */
1083 	char		buf[32768];
1084 	size_t		cnt;
1085 	pgoff_t		len = 0;
1086 	size_t		res;
1087 	size_t		i,
1088 				pad;
1089 
1090 	/*
1091 	 * Find file len & go back to start.
1092 	 */
1093 	if (fseeko(tmp, 0, SEEK_END) != 0)
1094 		exit_horribly(modulename, "error during file seek: %s\n",
1095 					  strerror(errno));
1096 	th->fileLen = ftello(tmp);
1097 	if (th->fileLen < 0)
1098 		exit_horribly(modulename, "could not determine seek position in archive file: %s\n",
1099 					  strerror(errno));
1100 	if (fseeko(tmp, 0, SEEK_SET) != 0)
1101 		exit_horribly(modulename, "error during file seek: %s\n",
1102 					  strerror(errno));
1103 
1104 	_tarWriteHeader(th);
1105 
1106 	while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
1107 	{
1108 		if ((res = fwrite(buf, 1, cnt, th->tarFH)) != cnt)
1109 			WRITE_ERROR_EXIT;
1110 		len += res;
1111 	}
1112 	if (!feof(tmp))
1113 		READ_ERROR_EXIT(tmp);
1114 
1115 	if (fclose(tmp) != 0)		/* This *should* delete it... */
1116 		exit_horribly(modulename, "could not close temporary file: %s\n",
1117 					  strerror(errno));
1118 
1119 	if (len != th->fileLen)
1120 	{
1121 		char		buf1[32],
1122 					buf2[32];
1123 
1124 		snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) len);
1125 		snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) th->fileLen);
1126 		exit_horribly(modulename, "actual file length (%s) does not match expected (%s)\n",
1127 					  buf1, buf2);
1128 	}
1129 
1130 	pad = ((len + 511) & ~511) - len;
1131 	for (i = 0; i < pad; i++)
1132 	{
1133 		if (fputc('\0', th->tarFH) == EOF)
1134 			WRITE_ERROR_EXIT;
1135 	}
1136 
1137 	ctx->tarFHpos += len + pad;
1138 }
1139 
1140 /* Locate the file in the archive, read header and position to data */
1141 static TAR_MEMBER *
_tarPositionTo(ArchiveHandle * AH,const char * filename)1142 _tarPositionTo(ArchiveHandle *AH, const char *filename)
1143 {
1144 	lclContext *ctx = (lclContext *) AH->formatData;
1145 	TAR_MEMBER *th = pg_malloc0(sizeof(TAR_MEMBER));
1146 	char		c;
1147 	char		header[512];
1148 	size_t		i,
1149 				len,
1150 				blks;
1151 	int			id;
1152 
1153 	th->AH = AH;
1154 
1155 	/* Go to end of current file, if any */
1156 	if (ctx->tarFHpos != 0)
1157 	{
1158 		char		buf1[100],
1159 					buf2[100];
1160 
1161 		snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) ctx->tarFHpos);
1162 		snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) ctx->tarNextMember);
1163 		ahlog(AH, 4, "moving from position %s to next member at file position %s\n",
1164 			  buf1, buf2);
1165 
1166 		while (ctx->tarFHpos < ctx->tarNextMember)
1167 			_tarReadRaw(AH, &c, 1, NULL, ctx->tarFH);
1168 	}
1169 
1170 	{
1171 		char		buf[100];
1172 
1173 		snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ctx->tarFHpos);
1174 		ahlog(AH, 4, "now at file position %s\n", buf);
1175 	}
1176 
1177 	/* We are at the start of the file, or at the next member */
1178 
1179 	/* Get the header */
1180 	if (!_tarGetHeader(AH, th))
1181 	{
1182 		if (filename)
1183 			exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1184 		else
1185 		{
1186 			/*
1187 			 * We're just scanning the archive for the next file, so return
1188 			 * null
1189 			 */
1190 			free(th);
1191 			return NULL;
1192 		}
1193 	}
1194 
1195 	while (filename != NULL && strcmp(th->targetFile, filename) != 0)
1196 	{
1197 		ahlog(AH, 4, "skipping tar member %s\n", th->targetFile);
1198 
1199 		id = atoi(th->targetFile);
1200 		if ((TocIDRequired(AH, id) & REQ_DATA) != 0)
1201 			exit_horribly(modulename, "restoring data out of order is not supported in this archive format: "
1202 						  "\"%s\" is required, but comes before \"%s\" in the archive file.\n",
1203 						  th->targetFile, filename);
1204 
1205 		/* Header doesn't match, so read to next header */
1206 		len = ((th->fileLen + 511) & ~511); /* Padded length */
1207 		blks = len >> 9;		/* # of 512 byte blocks */
1208 
1209 		for (i = 0; i < blks; i++)
1210 			_tarReadRaw(AH, &header[0], 512, NULL, ctx->tarFH);
1211 
1212 		if (!_tarGetHeader(AH, th))
1213 			exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1214 	}
1215 
1216 	ctx->tarNextMember = ctx->tarFHpos + ((th->fileLen + 511) & ~511);
1217 	th->pos = 0;
1218 
1219 	return th;
1220 }
1221 
1222 /* Read & verify a header */
1223 static int
_tarGetHeader(ArchiveHandle * AH,TAR_MEMBER * th)1224 _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
1225 {
1226 	lclContext *ctx = (lclContext *) AH->formatData;
1227 	char		h[512];
1228 	char		tag[100 + 1];
1229 	int			sum,
1230 				chk;
1231 	pgoff_t		len;
1232 	pgoff_t		hPos;
1233 	bool		gotBlock = false;
1234 
1235 	while (!gotBlock)
1236 	{
1237 		/* Save the pos for reporting purposes */
1238 		hPos = ctx->tarFHpos;
1239 
1240 		/* Read a 512 byte block, return EOF, exit if short */
1241 		len = _tarReadRaw(AH, h, 512, NULL, ctx->tarFH);
1242 		if (len == 0)			/* EOF */
1243 			return 0;
1244 
1245 		if (len != 512)
1246 			exit_horribly(modulename,
1247 						  ngettext("incomplete tar header found (%lu byte)\n",
1248 								   "incomplete tar header found (%lu bytes)\n",
1249 								   len),
1250 						  (unsigned long) len);
1251 
1252 		/* Calc checksum */
1253 		chk = tarChecksum(h);
1254 		sum = read_tar_number(&h[148], 8);
1255 
1256 		/*
1257 		 * If the checksum failed, see if it is a null block. If so, silently
1258 		 * continue to the next block.
1259 		 */
1260 		if (chk == sum)
1261 			gotBlock = true;
1262 		else
1263 		{
1264 			int			i;
1265 
1266 			for (i = 0; i < 512; i++)
1267 			{
1268 				if (h[i] != 0)
1269 				{
1270 					gotBlock = true;
1271 					break;
1272 				}
1273 			}
1274 		}
1275 	}
1276 
1277 	/* Name field is 100 bytes, might not be null-terminated */
1278 	strlcpy(tag, &h[0], 100 + 1);
1279 
1280 	len = read_tar_number(&h[124], 12);
1281 
1282 	{
1283 		char		posbuf[32];
1284 		char		lenbuf[32];
1285 
1286 		snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
1287 		snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
1288 		ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n",
1289 			  tag, posbuf, lenbuf, sum);
1290 	}
1291 
1292 	if (chk != sum)
1293 	{
1294 		char		posbuf[32];
1295 
1296 		snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
1297 				 (uint64) ftello(ctx->tarFH));
1298 		exit_horribly(modulename,
1299 					  "corrupt tar header found in %s "
1300 					  "(expected %d, computed %d) file position %s\n",
1301 					  tag, sum, chk, posbuf);
1302 	}
1303 
1304 	th->targetFile = pg_strdup(tag);
1305 	th->fileLen = len;
1306 
1307 	return 1;
1308 }
1309 
1310 
1311 static void
_tarWriteHeader(TAR_MEMBER * th)1312 _tarWriteHeader(TAR_MEMBER *th)
1313 {
1314 	char		h[512];
1315 
1316 	tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
1317 					0600, 04000, 02000, time(NULL));
1318 
1319 	/* Now write the completed header. */
1320 	if (fwrite(h, 1, 512, th->tarFH) != 512)
1321 		WRITE_ERROR_EXIT;
1322 }
1323