1 /*-------------------------------------------------------------------------
2  *
3  * pg_backup_tar.c
4  *
5  *	This file is copied from the 'files' format file, but dumps data into
6  *	one temp file then sends it to the output TAR archive.
7  *
8  *	The tar format also includes a 'restore.sql' script which is there for
9  *	the benefit of humans. This script is never used by pg_restore.
10  *
11  *	NOTE: If you untar the created 'tar' file, the resulting files are
12  *	compatible with the 'directory' format. Please keep the two formats in
13  *	sync.
14  *
15  *	See the headers to pg_backup_directory & pg_restore for more details.
16  *
17  * Copyright (c) 2000, Philip Warner
18  *		Rights are granted to use this software in any way so long
19  *		as this notice is not removed.
20  *
21  *	The author is not responsible for loss or damages that may
22  *	result from it's use.
23  *
24  *
25  * IDENTIFICATION
26  *		src/bin/pg_dump/pg_backup_tar.c
27  *
28  *-------------------------------------------------------------------------
29  */
30 #include "postgres_fe.h"
31 
32 #include "pg_backup_archiver.h"
33 #include "pg_backup_tar.h"
34 #include "pg_backup_utils.h"
35 #include "pgtar.h"
36 #include "fe_utils/string_utils.h"
37 
38 #include <sys/stat.h>
39 #include <ctype.h>
40 #include <limits.h>
41 #include <unistd.h>
42 
43 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
44 static void _StartData(ArchiveHandle *AH, TocEntry *te);
45 static void _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
46 static void _EndData(ArchiveHandle *AH, TocEntry *te);
47 static int	_WriteByte(ArchiveHandle *AH, const int i);
48 static int	_ReadByte(ArchiveHandle *);
49 static void _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
50 static void _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
51 static void _CloseArchive(ArchiveHandle *AH);
52 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te);
53 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
54 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
55 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
56 
57 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
58 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
59 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
60 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
61 
62 #define K_STD_BUF_SIZE 1024
63 
64 
65 typedef struct
66 {
67 #ifdef HAVE_LIBZ
68 	gzFile		zFH;
69 #else
70 	FILE	   *zFH;
71 #endif
72 	FILE	   *nFH;
73 	FILE	   *tarFH;
74 	FILE	   *tmpFH;
75 	char	   *targetFile;
76 	char		mode;
77 	pgoff_t		pos;
78 	pgoff_t		fileLen;
79 	ArchiveHandle *AH;
80 } TAR_MEMBER;
81 
82 typedef struct
83 {
84 	int			hasSeek;
85 	pgoff_t		filePos;
86 	TAR_MEMBER *blobToc;
87 	FILE	   *tarFH;
88 	pgoff_t		tarFHpos;
89 	pgoff_t		tarNextMember;
90 	TAR_MEMBER *FH;
91 	int			isSpecialScript;
92 	TAR_MEMBER *scriptTH;
93 } lclContext;
94 
95 typedef struct
96 {
97 	TAR_MEMBER *TH;
98 	char	   *filename;
99 } lclTocEntry;
100 
101 /* translator: this is a module name */
102 static const char *modulename = gettext_noop("tar archiver");
103 
104 static void _LoadBlobs(ArchiveHandle *AH);
105 
106 static TAR_MEMBER *tarOpen(ArchiveHandle *AH, const char *filename, char mode);
107 static void tarClose(ArchiveHandle *AH, TAR_MEMBER *TH);
108 
109 #ifdef __NOT_USED__
110 static char *tarGets(char *buf, size_t len, TAR_MEMBER *th);
111 #endif
112 static int	tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...) pg_attribute_printf(3, 4);
113 
114 static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th);
115 static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename);
116 static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th);
117 static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th);
118 static void _tarWriteHeader(TAR_MEMBER *th);
119 static int	_tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th);
120 static size_t _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh);
121 
122 static size_t _scriptOut(ArchiveHandle *AH, const void *buf, size_t len);
123 
124 /*
125  *	Initializer
126  */
127 void
InitArchiveFmt_Tar(ArchiveHandle * AH)128 InitArchiveFmt_Tar(ArchiveHandle *AH)
129 {
130 	lclContext *ctx;
131 
132 	/* Assuming static functions, this can be copied for each format. */
133 	AH->ArchiveEntryPtr = _ArchiveEntry;
134 	AH->StartDataPtr = _StartData;
135 	AH->WriteDataPtr = _WriteData;
136 	AH->EndDataPtr = _EndData;
137 	AH->WriteBytePtr = _WriteByte;
138 	AH->ReadBytePtr = _ReadByte;
139 	AH->WriteBufPtr = _WriteBuf;
140 	AH->ReadBufPtr = _ReadBuf;
141 	AH->ClosePtr = _CloseArchive;
142 	AH->ReopenPtr = NULL;
143 	AH->PrintTocDataPtr = _PrintTocData;
144 	AH->ReadExtraTocPtr = _ReadExtraToc;
145 	AH->WriteExtraTocPtr = _WriteExtraToc;
146 	AH->PrintExtraTocPtr = _PrintExtraToc;
147 
148 	AH->StartBlobsPtr = _StartBlobs;
149 	AH->StartBlobPtr = _StartBlob;
150 	AH->EndBlobPtr = _EndBlob;
151 	AH->EndBlobsPtr = _EndBlobs;
152 	AH->ClonePtr = NULL;
153 	AH->DeClonePtr = NULL;
154 
155 	AH->MasterStartParallelItemPtr = NULL;
156 	AH->MasterEndParallelItemPtr = NULL;
157 
158 	AH->WorkerJobDumpPtr = NULL;
159 	AH->WorkerJobRestorePtr = NULL;
160 
161 	/*
162 	 * Set up some special context used in compressing data.
163 	 */
164 	ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
165 	AH->formatData = (void *) ctx;
166 	ctx->filePos = 0;
167 	ctx->isSpecialScript = 0;
168 
169 	/* Initialize LO buffering */
170 	AH->lo_buf_size = LOBBUFSIZE;
171 	AH->lo_buf = (void *) pg_malloc(LOBBUFSIZE);
172 
173 	/*
174 	 * Now open the tar file, and load the TOC if we're in read mode.
175 	 */
176 	if (AH->mode == archModeWrite)
177 	{
178 		if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
179 		{
180 			ctx->tarFH = fopen(AH->fSpec, PG_BINARY_W);
181 			if (ctx->tarFH == NULL)
182 				exit_horribly(modulename,
183 						   "could not open TOC file \"%s\" for output: %s\n",
184 							  AH->fSpec, strerror(errno));
185 		}
186 		else
187 		{
188 			ctx->tarFH = stdout;
189 			if (ctx->tarFH == NULL)
190 				exit_horribly(modulename,
191 							  "could not open TOC file for output: %s\n",
192 							  strerror(errno));
193 		}
194 
195 		ctx->tarFHpos = 0;
196 
197 		/*
198 		 * Make unbuffered since we will dup() it, and the buffers screw each
199 		 * other
200 		 */
201 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
202 
203 		ctx->hasSeek = checkSeek(ctx->tarFH);
204 
205 		/*
206 		 * We don't support compression because reading the files back is not
207 		 * possible since gzdopen uses buffered IO which totally screws file
208 		 * positioning.
209 		 */
210 		if (AH->compression != 0)
211 			exit_horribly(modulename,
212 					 "compression is not supported by tar archive format\n");
213 	}
214 	else
215 	{							/* Read Mode */
216 		if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
217 		{
218 			ctx->tarFH = fopen(AH->fSpec, PG_BINARY_R);
219 			if (ctx->tarFH == NULL)
220 				exit_horribly(modulename, "could not open TOC file \"%s\" for input: %s\n",
221 							  AH->fSpec, strerror(errno));
222 		}
223 		else
224 		{
225 			ctx->tarFH = stdin;
226 			if (ctx->tarFH == NULL)
227 				exit_horribly(modulename, "could not open TOC file for input: %s\n",
228 							  strerror(errno));
229 		}
230 
231 		/*
232 		 * Make unbuffered since we will dup() it, and the buffers screw each
233 		 * other
234 		 */
235 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
236 
237 		ctx->tarFHpos = 0;
238 
239 		ctx->hasSeek = checkSeek(ctx->tarFH);
240 
241 		ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
242 		ReadHead(AH);
243 		ReadToc(AH);
244 		tarClose(AH, ctx->FH);	/* Nothing else in the file... */
245 	}
246 }
247 
248 /*
249  * - Start a new TOC entry
250  *	 Setup the output file name.
251  */
252 static void
_ArchiveEntry(ArchiveHandle * AH,TocEntry * te)253 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
254 {
255 	lclTocEntry *ctx;
256 	char		fn[K_STD_BUF_SIZE];
257 
258 	ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
259 	if (te->dataDumper != NULL)
260 	{
261 #ifdef HAVE_LIBZ
262 		if (AH->compression == 0)
263 			sprintf(fn, "%d.dat", te->dumpId);
264 		else
265 			sprintf(fn, "%d.dat.gz", te->dumpId);
266 #else
267 		sprintf(fn, "%d.dat", te->dumpId);
268 #endif
269 		ctx->filename = pg_strdup(fn);
270 	}
271 	else
272 	{
273 		ctx->filename = NULL;
274 		ctx->TH = NULL;
275 	}
276 	te->formatData = (void *) ctx;
277 }
278 
279 static void
_WriteExtraToc(ArchiveHandle * AH,TocEntry * te)280 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
281 {
282 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
283 
284 	if (ctx->filename)
285 		WriteStr(AH, ctx->filename);
286 	else
287 		WriteStr(AH, "");
288 }
289 
290 static void
_ReadExtraToc(ArchiveHandle * AH,TocEntry * te)291 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
292 {
293 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
294 
295 	if (ctx == NULL)
296 	{
297 		ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
298 		te->formatData = (void *) ctx;
299 	}
300 
301 	ctx->filename = ReadStr(AH);
302 	if (strlen(ctx->filename) == 0)
303 	{
304 		free(ctx->filename);
305 		ctx->filename = NULL;
306 	}
307 	ctx->TH = NULL;
308 }
309 
310 static void
_PrintExtraToc(ArchiveHandle * AH,TocEntry * te)311 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
312 {
313 	lclTocEntry *ctx = (lclTocEntry *) te->formatData;
314 
315 	if (AH->public.verbose && ctx->filename != NULL)
316 		ahprintf(AH, "-- File: %s\n", ctx->filename);
317 }
318 
319 static void
_StartData(ArchiveHandle * AH,TocEntry * te)320 _StartData(ArchiveHandle *AH, TocEntry *te)
321 {
322 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
323 
324 	tctx->TH = tarOpen(AH, tctx->filename, 'w');
325 }
326 
327 static TAR_MEMBER *
tarOpen(ArchiveHandle * AH,const char * filename,char mode)328 tarOpen(ArchiveHandle *AH, const char *filename, char mode)
329 {
330 	lclContext *ctx = (lclContext *) AH->formatData;
331 	TAR_MEMBER *tm;
332 
333 #ifdef HAVE_LIBZ
334 	char		fmode[10];
335 #endif
336 
337 	if (mode == 'r')
338 	{
339 		tm = _tarPositionTo(AH, filename);
340 		if (!tm)				/* Not found */
341 		{
342 			if (filename)
343 			{
344 				/*
345 				 * Couldn't find the requested file. Future: do SEEK(0) and
346 				 * retry.
347 				 */
348 				exit_horribly(modulename, "could not find file \"%s\" in archive\n", filename);
349 			}
350 			else
351 			{
352 				/* Any file OK, none left, so return NULL */
353 				return NULL;
354 			}
355 		}
356 
357 #ifdef HAVE_LIBZ
358 
359 		if (AH->compression == 0)
360 			tm->nFH = ctx->tarFH;
361 		else
362 			exit_horribly(modulename, "compression is not supported by tar archive format\n");
363 		/* tm->zFH = gzdopen(dup(fileno(ctx->tarFH)), "rb"); */
364 #else
365 		tm->nFH = ctx->tarFH;
366 #endif
367 	}
368 	else
369 	{
370 		int			old_umask;
371 
372 		tm = pg_malloc0(sizeof(TAR_MEMBER));
373 
374 		/*
375 		 * POSIX does not require, but permits, tmpfile() to restrict file
376 		 * permissions.  Given an OS crash after we write data, the filesystem
377 		 * might retain the data but forget tmpfile()'s unlink().  If so, the
378 		 * file mode protects confidentiality of the data written.
379 		 */
380 		old_umask = umask(S_IRWXG | S_IRWXO);
381 
382 #ifndef WIN32
383 		tm->tmpFH = tmpfile();
384 #else
385 
386 		/*
387 		 * On WIN32, tmpfile() generates a filename in the root directory,
388 		 * which requires administrative permissions on certain systems. Loop
389 		 * until we find a unique file name we can create.
390 		 */
391 		while (1)
392 		{
393 			char	   *name;
394 			int			fd;
395 
396 			name = _tempnam(NULL, "pg_temp_");
397 			if (name == NULL)
398 				break;
399 			fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY |
400 					  O_TEMPORARY, S_IRUSR | S_IWUSR);
401 			free(name);
402 
403 			if (fd != -1)		/* created a file */
404 			{
405 				tm->tmpFH = fdopen(fd, "w+b");
406 				break;
407 			}
408 			else if (errno != EEXIST)	/* failure other than file exists */
409 				break;
410 		}
411 #endif
412 
413 		if (tm->tmpFH == NULL)
414 			exit_horribly(modulename, "could not generate temporary file name: %s\n", strerror(errno));
415 
416 		umask(old_umask);
417 
418 #ifdef HAVE_LIBZ
419 
420 		if (AH->compression != 0)
421 		{
422 			sprintf(fmode, "wb%d", AH->compression);
423 			tm->zFH = gzdopen(dup(fileno(tm->tmpFH)), fmode);
424 			if (tm->zFH == NULL)
425 				exit_horribly(modulename, "could not open temporary file\n");
426 		}
427 		else
428 			tm->nFH = tm->tmpFH;
429 #else
430 
431 		tm->nFH = tm->tmpFH;
432 #endif
433 
434 		tm->AH = AH;
435 		tm->targetFile = pg_strdup(filename);
436 	}
437 
438 	tm->mode = mode;
439 	tm->tarFH = ctx->tarFH;
440 
441 	return tm;
442 }
443 
444 static void
tarClose(ArchiveHandle * AH,TAR_MEMBER * th)445 tarClose(ArchiveHandle *AH, TAR_MEMBER *th)
446 {
447 	/*
448 	 * Close the GZ file since we dup'd. This will flush the buffers.
449 	 */
450 	if (AH->compression != 0)
451 		if (GZCLOSE(th->zFH) != 0)
452 			exit_horribly(modulename, "could not close tar member\n");
453 
454 	if (th->mode == 'w')
455 		_tarAddFile(AH, th);	/* This will close the temp file */
456 
457 	/*
458 	 * else Nothing to do for normal read since we don't dup() normal file
459 	 * handle, and we don't use temp files.
460 	 */
461 
462 	if (th->targetFile)
463 		free(th->targetFile);
464 
465 	th->nFH = NULL;
466 	th->zFH = NULL;
467 }
468 
469 #ifdef __NOT_USED__
470 static char *
tarGets(char * buf,size_t len,TAR_MEMBER * th)471 tarGets(char *buf, size_t len, TAR_MEMBER *th)
472 {
473 	char	   *s;
474 	size_t		cnt = 0;
475 	char		c = ' ';
476 	int			eof = 0;
477 
478 	/* Can't read past logical EOF */
479 	if (len > (th->fileLen - th->pos))
480 		len = th->fileLen - th->pos;
481 
482 	while (cnt < len && c != '\n')
483 	{
484 		if (_tarReadRaw(th->AH, &c, 1, th, NULL) <= 0)
485 		{
486 			eof = 1;
487 			break;
488 		}
489 		buf[cnt++] = c;
490 	}
491 
492 	if (eof && cnt == 0)
493 		s = NULL;
494 	else
495 	{
496 		buf[cnt++] = '\0';
497 		s = buf;
498 	}
499 
500 	if (s)
501 	{
502 		len = strlen(s);
503 		th->pos += len;
504 	}
505 
506 	return s;
507 }
508 #endif
509 
510 /*
511  * Just read bytes from the archive. This is the low level read routine
512  * that is used for ALL reads on a tar file.
513  */
514 static size_t
_tarReadRaw(ArchiveHandle * AH,void * buf,size_t len,TAR_MEMBER * th,FILE * fh)515 _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh)
516 {
517 	lclContext *ctx = (lclContext *) AH->formatData;
518 	size_t		avail;
519 	size_t		used = 0;
520 	size_t		res = 0;
521 
522 	avail = AH->lookaheadLen - AH->lookaheadPos;
523 	if (avail > 0)
524 	{
525 		/* We have some lookahead bytes to use */
526 		if (avail >= len)		/* Just use the lookahead buffer */
527 			used = len;
528 		else
529 			used = avail;
530 
531 		/* Copy, and adjust buffer pos */
532 		memcpy(buf, AH->lookahead + AH->lookaheadPos, used);
533 		AH->lookaheadPos += used;
534 
535 		/* Adjust required length */
536 		len -= used;
537 	}
538 
539 	/* Read the file if len > 0 */
540 	if (len > 0)
541 	{
542 		if (fh)
543 		{
544 			res = fread(&((char *) buf)[used], 1, len, fh);
545 			if (res != len && !feof(fh))
546 				READ_ERROR_EXIT(fh);
547 		}
548 		else if (th)
549 		{
550 			if (th->zFH)
551 			{
552 				res = GZREAD(&((char *) buf)[used], 1, len, th->zFH);
553 				if (res != len && !GZEOF(th->zFH))
554 				{
555 #ifdef HAVE_LIBZ
556 					int		errnum;
557 					const char *errmsg = gzerror(th->zFH, &errnum);
558 
559 					exit_horribly(modulename,
560 								  "could not read from input file: %s\n",
561 								  errnum == Z_ERRNO ? strerror(errno) : errmsg);
562 #else
563 					exit_horribly(modulename,
564 								  "could not read from input file: %s\n",
565 								  strerror(errno));
566 #endif
567 				}
568 			}
569 			else
570 			{
571 				res = fread(&((char *) buf)[used], 1, len, th->nFH);
572 				if (res != len && !feof(th->nFH))
573 					READ_ERROR_EXIT(th->nFH);
574 			}
575 		}
576 		else
577 			exit_horribly(modulename, "internal error -- neither th nor fh specified in tarReadRaw()\n");
578 	}
579 
580 	ctx->tarFHpos += res + used;
581 
582 	return (res + used);
583 }
584 
585 static size_t
tarRead(void * buf,size_t len,TAR_MEMBER * th)586 tarRead(void *buf, size_t len, TAR_MEMBER *th)
587 {
588 	size_t		res;
589 
590 	if (th->pos + len > th->fileLen)
591 		len = th->fileLen - th->pos;
592 
593 	if (len <= 0)
594 		return 0;
595 
596 	res = _tarReadRaw(th->AH, buf, len, th, NULL);
597 
598 	th->pos += res;
599 
600 	return res;
601 }
602 
603 static size_t
tarWrite(const void * buf,size_t len,TAR_MEMBER * th)604 tarWrite(const void *buf, size_t len, TAR_MEMBER *th)
605 {
606 	size_t		res;
607 
608 	if (th->zFH != NULL)
609 		res = GZWRITE(buf, 1, len, th->zFH);
610 	else
611 		res = fwrite(buf, 1, len, th->nFH);
612 
613 	th->pos += res;
614 	return res;
615 }
616 
617 static void
_WriteData(ArchiveHandle * AH,const void * data,size_t dLen)618 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
619 {
620 	lclTocEntry *tctx = (lclTocEntry *) AH->currToc->formatData;
621 
622 	if (tarWrite(data, dLen, tctx->TH) != dLen)
623 		WRITE_ERROR_EXIT;
624 
625 	return;
626 }
627 
628 static void
_EndData(ArchiveHandle * AH,TocEntry * te)629 _EndData(ArchiveHandle *AH, TocEntry *te)
630 {
631 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
632 
633 	/* Close the file */
634 	tarClose(AH, tctx->TH);
635 	tctx->TH = NULL;
636 }
637 
638 /*
639  * Print data for a given file
640  */
641 static void
_PrintFileData(ArchiveHandle * AH,char * filename)642 _PrintFileData(ArchiveHandle *AH, char *filename)
643 {
644 	lclContext *ctx = (lclContext *) AH->formatData;
645 	char		buf[4096];
646 	size_t		cnt;
647 	TAR_MEMBER *th;
648 
649 	if (!filename)
650 		return;
651 
652 	th = tarOpen(AH, filename, 'r');
653 	ctx->FH = th;
654 
655 	while ((cnt = tarRead(buf, 4095, th)) > 0)
656 	{
657 		buf[cnt] = '\0';
658 		ahwrite(buf, 1, cnt, AH);
659 	}
660 
661 	tarClose(AH, th);
662 }
663 
664 
665 /*
666  * Print data for a given TOC entry
667 */
668 static void
_PrintTocData(ArchiveHandle * AH,TocEntry * te)669 _PrintTocData(ArchiveHandle *AH, TocEntry *te)
670 {
671 	lclContext *ctx = (lclContext *) AH->formatData;
672 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
673 	int			pos1;
674 
675 	if (!tctx->filename)
676 		return;
677 
678 	/*
679 	 * If we're writing the special restore.sql script, emit a suitable
680 	 * command to include each table's data from the corresponding file.
681 	 *
682 	 * In the COPY case this is a bit klugy because the regular COPY command
683 	 * was already printed before we get control.
684 	 */
685 	if (ctx->isSpecialScript)
686 	{
687 		if (te->copyStmt)
688 		{
689 			/* Abort the COPY FROM stdin */
690 			ahprintf(AH, "\\.\n");
691 
692 			/*
693 			 * The COPY statement should look like "COPY ... FROM stdin;\n",
694 			 * see dumpTableData().
695 			 */
696 			pos1 = (int) strlen(te->copyStmt) - 13;
697 			if (pos1 < 6 || strncmp(te->copyStmt, "COPY ", 5) != 0 ||
698 				strcmp(te->copyStmt + pos1, " FROM stdin;\n") != 0)
699 				exit_horribly(modulename,
700 							  "unexpected COPY statement syntax: \"%s\"\n",
701 							  te->copyStmt);
702 
703 			/* Emit all but the FROM part ... */
704 			ahwrite(te->copyStmt, 1, pos1, AH);
705 			/* ... and insert modified FROM */
706 			ahprintf(AH, " FROM '$$PATH$$/%s';\n\n", tctx->filename);
707 		}
708 		else
709 		{
710 			/* --inserts mode, no worries, just include the data file */
711 			ahprintf(AH, "\\i $$PATH$$/%s\n\n", tctx->filename);
712 		}
713 
714 		return;
715 	}
716 
717 	if (strcmp(te->desc, "BLOBS") == 0)
718 		_LoadBlobs(AH);
719 	else
720 		_PrintFileData(AH, tctx->filename);
721 }
722 
723 static void
_LoadBlobs(ArchiveHandle * AH)724 _LoadBlobs(ArchiveHandle *AH)
725 {
726 	Oid			oid;
727 	lclContext *ctx = (lclContext *) AH->formatData;
728 	TAR_MEMBER *th;
729 	size_t		cnt;
730 	bool		foundBlob = false;
731 	char		buf[4096];
732 
733 	StartRestoreBlobs(AH);
734 
735 	th = tarOpen(AH, NULL, 'r');	/* Open next file */
736 	while (th != NULL)
737 	{
738 		ctx->FH = th;
739 
740 		if (strncmp(th->targetFile, "blob_", 5) == 0)
741 		{
742 			oid = atooid(&th->targetFile[5]);
743 			if (oid != 0)
744 			{
745 				ahlog(AH, 1, "restoring large object with OID %u\n", oid);
746 
747 				StartRestoreBlob(AH, oid, AH->public.ropt->dropSchema);
748 
749 				while ((cnt = tarRead(buf, 4095, th)) > 0)
750 				{
751 					buf[cnt] = '\0';
752 					ahwrite(buf, 1, cnt, AH);
753 				}
754 				EndRestoreBlob(AH, oid);
755 				foundBlob = true;
756 			}
757 			tarClose(AH, th);
758 		}
759 		else
760 		{
761 			tarClose(AH, th);
762 
763 			/*
764 			 * Once we have found the first blob, stop at the first non-blob
765 			 * entry (which will be 'blobs.toc').  This coding would eat all
766 			 * the rest of the archive if there are no blobs ... but this
767 			 * function shouldn't be called at all in that case.
768 			 */
769 			if (foundBlob)
770 				break;
771 		}
772 
773 		th = tarOpen(AH, NULL, 'r');
774 	}
775 	EndRestoreBlobs(AH);
776 }
777 
778 
779 static int
_WriteByte(ArchiveHandle * AH,const int i)780 _WriteByte(ArchiveHandle *AH, const int i)
781 {
782 	lclContext *ctx = (lclContext *) AH->formatData;
783 	char		b = i;			/* Avoid endian problems */
784 
785 	if (tarWrite(&b, 1, ctx->FH) != 1)
786 		WRITE_ERROR_EXIT;
787 
788 	ctx->filePos += 1;
789 	return 1;
790 }
791 
792 static int
_ReadByte(ArchiveHandle * AH)793 _ReadByte(ArchiveHandle *AH)
794 {
795 	lclContext *ctx = (lclContext *) AH->formatData;
796 	size_t		res;
797 	unsigned char c;
798 
799 	res = tarRead(&c, 1, ctx->FH);
800 	if (res != 1)
801 		/* We already would have exited for errors on reads, must be EOF */
802 		exit_horribly(modulename,
803 					  "could not read from input file: end of file\n");
804 	ctx->filePos += 1;
805 	return c;
806 }
807 
808 static void
_WriteBuf(ArchiveHandle * AH,const void * buf,size_t len)809 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
810 {
811 	lclContext *ctx = (lclContext *) AH->formatData;
812 
813 	if (tarWrite(buf, len, ctx->FH) != len)
814 		WRITE_ERROR_EXIT;
815 
816 	ctx->filePos += len;
817 }
818 
819 static void
_ReadBuf(ArchiveHandle * AH,void * buf,size_t len)820 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
821 {
822 	lclContext *ctx = (lclContext *) AH->formatData;
823 
824 	if (tarRead(buf, len, ctx->FH) != len)
825 		/* We already would have exited for errors on reads, must be EOF */
826 		exit_horribly(modulename,
827 					  "could not read from input file: end of file\n");
828 
829 	ctx->filePos += len;
830 	return;
831 }
832 
833 static void
_CloseArchive(ArchiveHandle * AH)834 _CloseArchive(ArchiveHandle *AH)
835 {
836 	lclContext *ctx = (lclContext *) AH->formatData;
837 	TAR_MEMBER *th;
838 	RestoreOptions *ropt;
839 	RestoreOptions *savRopt;
840 	DumpOptions *savDopt;
841 	int			savVerbose,
842 				i;
843 
844 	if (AH->mode == archModeWrite)
845 	{
846 		/*
847 		 * Write the Header & TOC to the archive FIRST
848 		 */
849 		th = tarOpen(AH, "toc.dat", 'w');
850 		ctx->FH = th;
851 		WriteHead(AH);
852 		WriteToc(AH);
853 		tarClose(AH, th);		/* Not needed any more */
854 
855 		/*
856 		 * Now send the data (tables & blobs)
857 		 */
858 		WriteDataChunks(AH, NULL);
859 
860 		/*
861 		 * Now this format wants to append a script which does a full restore
862 		 * if the files have been extracted.
863 		 */
864 		th = tarOpen(AH, "restore.sql", 'w');
865 
866 		tarPrintf(AH, th, "--\n"
867 				  "-- NOTE:\n"
868 				  "--\n"
869 				  "-- File paths need to be edited. Search for $$PATH$$ and\n"
870 				  "-- replace it with the path to the directory containing\n"
871 				  "-- the extracted data files.\n"
872 				  "--\n");
873 
874 		AH->CustomOutPtr = _scriptOut;
875 
876 		ctx->isSpecialScript = 1;
877 		ctx->scriptTH = th;
878 
879 		ropt = NewRestoreOptions();
880 		memcpy(ropt, AH->public.ropt, sizeof(RestoreOptions));
881 		ropt->filename = NULL;
882 		ropt->dropSchema = 1;
883 		ropt->compression = 0;
884 		ropt->superuser = NULL;
885 		ropt->suppressDumpWarnings = true;
886 
887 		savDopt = AH->public.dopt;
888 		savRopt = AH->public.ropt;
889 
890 		SetArchiveOptions((Archive *) AH, NULL, ropt);
891 
892 		savVerbose = AH->public.verbose;
893 		AH->public.verbose = 0;
894 
895 		RestoreArchive((Archive *) AH);
896 
897 		SetArchiveOptions((Archive *) AH, savDopt, savRopt);
898 
899 		AH->public.verbose = savVerbose;
900 
901 		tarClose(AH, th);
902 
903 		ctx->isSpecialScript = 0;
904 
905 		/*
906 		 * EOF marker for tar files is two blocks of NULLs.
907 		 */
908 		for (i = 0; i < 512 * 2; i++)
909 		{
910 			if (fputc(0, ctx->tarFH) == EOF)
911 				WRITE_ERROR_EXIT;
912 		}
913 	}
914 
915 	AH->FH = NULL;
916 }
917 
918 static size_t
_scriptOut(ArchiveHandle * AH,const void * buf,size_t len)919 _scriptOut(ArchiveHandle *AH, const void *buf, size_t len)
920 {
921 	lclContext *ctx = (lclContext *) AH->formatData;
922 
923 	return tarWrite(buf, len, ctx->scriptTH);
924 }
925 
926 /*
927  * BLOB support
928  */
929 
930 /*
931  * Called by the archiver when starting to save all BLOB DATA (not schema).
932  * This routine should save whatever format-specific information is needed
933  * to read the BLOBs back into memory.
934  *
935  * It is called just prior to the dumper's DataDumper routine.
936  *
937  * Optional, but strongly recommended.
938  *
939  */
940 static void
_StartBlobs(ArchiveHandle * AH,TocEntry * te)941 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
942 {
943 	lclContext *ctx = (lclContext *) AH->formatData;
944 	char		fname[K_STD_BUF_SIZE];
945 
946 	sprintf(fname, "blobs.toc");
947 	ctx->blobToc = tarOpen(AH, fname, 'w');
948 }
949 
950 /*
951  * Called by the archiver when the dumper calls StartBlob.
952  *
953  * Mandatory.
954  *
955  * Must save the passed OID for retrieval at restore-time.
956  */
957 static void
_StartBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)958 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
959 {
960 	lclContext *ctx = (lclContext *) AH->formatData;
961 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
962 	char		fname[255];
963 	char	   *sfx;
964 
965 	if (oid == 0)
966 		exit_horribly(modulename, "invalid OID for large object (%u)\n", oid);
967 
968 	if (AH->compression != 0)
969 		sfx = ".gz";
970 	else
971 		sfx = "";
972 
973 	sprintf(fname, "blob_%u.dat%s", oid, sfx);
974 
975 	tarPrintf(AH, ctx->blobToc, "%u %s\n", oid, fname);
976 
977 	tctx->TH = tarOpen(AH, fname, 'w');
978 }
979 
980 /*
981  * Called by the archiver when the dumper calls EndBlob.
982  *
983  * Optional.
984  *
985  */
986 static void
_EndBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)987 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
988 {
989 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
990 
991 	tarClose(AH, tctx->TH);
992 }
993 
994 /*
995  * Called by the archiver when finishing saving all BLOB DATA.
996  *
997  * Optional.
998  *
999  */
1000 static void
_EndBlobs(ArchiveHandle * AH,TocEntry * te)1001 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
1002 {
1003 	lclContext *ctx = (lclContext *) AH->formatData;
1004 
1005 	/* Write out a fake zero OID to mark end-of-blobs. */
1006 	/* WriteInt(AH, 0); */
1007 
1008 	tarClose(AH, ctx->blobToc);
1009 }
1010 
1011 
1012 
1013 /*------------
1014  * TAR Support
1015  *------------
1016  */
1017 
1018 static int
tarPrintf(ArchiveHandle * AH,TAR_MEMBER * th,const char * fmt,...)1019 tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...)
1020 {
1021 	char	   *p;
1022 	size_t		len = 128;		/* initial assumption about buffer size */
1023 	size_t		cnt;
1024 
1025 	for (;;)
1026 	{
1027 		va_list		args;
1028 
1029 		/* Allocate work buffer. */
1030 		p = (char *) pg_malloc(len);
1031 
1032 		/* Try to format the data. */
1033 		va_start(args, fmt);
1034 		cnt = pvsnprintf(p, len, fmt, args);
1035 		va_end(args);
1036 
1037 		if (cnt < len)
1038 			break;				/* success */
1039 
1040 		/* Release buffer and loop around to try again with larger len. */
1041 		free(p);
1042 		len = cnt;
1043 	}
1044 
1045 	cnt = tarWrite(p, cnt, th);
1046 	free(p);
1047 	return (int) cnt;
1048 }
1049 
1050 bool
isValidTarHeader(char * header)1051 isValidTarHeader(char *header)
1052 {
1053 	int			sum;
1054 	int			chk = tarChecksum(header);
1055 
1056 	sum = read_tar_number(&header[148], 8);
1057 
1058 	if (sum != chk)
1059 		return false;
1060 
1061 	/* POSIX tar format */
1062 	if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1063 		memcmp(&header[263], "00", 2) == 0)
1064 		return true;
1065 	/* GNU tar format */
1066 	if (memcmp(&header[257], "ustar  \0", 8) == 0)
1067 		return true;
1068 	/* not-quite-POSIX format written by pre-9.3 pg_dump */
1069 	if (memcmp(&header[257], "ustar00\0", 8) == 0)
1070 		return true;
1071 
1072 	return false;
1073 }
1074 
1075 /* Given the member, write the TAR header & copy the file */
1076 static void
_tarAddFile(ArchiveHandle * AH,TAR_MEMBER * th)1077 _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
1078 {
1079 	lclContext *ctx = (lclContext *) AH->formatData;
1080 	FILE	   *tmp = th->tmpFH;	/* Grab it for convenience */
1081 	char		buf[32768];
1082 	size_t		cnt;
1083 	pgoff_t		len = 0;
1084 	size_t		res;
1085 	size_t		i,
1086 				pad;
1087 
1088 	/*
1089 	 * Find file len & go back to start.
1090 	 */
1091 	if (fseeko(tmp, 0, SEEK_END) != 0)
1092 		exit_horribly(modulename, "error during file seek: %s\n",
1093 					  strerror(errno));
1094 	th->fileLen = ftello(tmp);
1095 	if (th->fileLen < 0)
1096 		exit_horribly(modulename, "could not determine seek position in archive file: %s\n",
1097 					  strerror(errno));
1098 	if (fseeko(tmp, 0, SEEK_SET) != 0)
1099 		exit_horribly(modulename, "error during file seek: %s\n",
1100 					  strerror(errno));
1101 
1102 	_tarWriteHeader(th);
1103 
1104 	while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
1105 	{
1106 		if ((res = fwrite(buf, 1, cnt, th->tarFH)) != cnt)
1107 			WRITE_ERROR_EXIT;
1108 		len += res;
1109 	}
1110 	if (!feof(tmp))
1111 		READ_ERROR_EXIT(tmp);
1112 
1113 	if (fclose(tmp) != 0)		/* This *should* delete it... */
1114 		exit_horribly(modulename, "could not close temporary file: %s\n",
1115 					  strerror(errno));
1116 
1117 	if (len != th->fileLen)
1118 	{
1119 		char		buf1[32],
1120 					buf2[32];
1121 
1122 		snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) len);
1123 		snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) th->fileLen);
1124 		exit_horribly(modulename, "actual file length (%s) does not match expected (%s)\n",
1125 					  buf1, buf2);
1126 	}
1127 
1128 	pad = ((len + 511) & ~511) - len;
1129 	for (i = 0; i < pad; i++)
1130 	{
1131 		if (fputc('\0', th->tarFH) == EOF)
1132 			WRITE_ERROR_EXIT;
1133 	}
1134 
1135 	ctx->tarFHpos += len + pad;
1136 }
1137 
1138 /* Locate the file in the archive, read header and position to data */
1139 static TAR_MEMBER *
_tarPositionTo(ArchiveHandle * AH,const char * filename)1140 _tarPositionTo(ArchiveHandle *AH, const char *filename)
1141 {
1142 	lclContext *ctx = (lclContext *) AH->formatData;
1143 	TAR_MEMBER *th = pg_malloc0(sizeof(TAR_MEMBER));
1144 	char		c;
1145 	char		header[512];
1146 	size_t		i,
1147 				len,
1148 				blks;
1149 	int			id;
1150 
1151 	th->AH = AH;
1152 
1153 	/* Go to end of current file, if any */
1154 	if (ctx->tarFHpos != 0)
1155 	{
1156 		char		buf1[100],
1157 					buf2[100];
1158 
1159 		snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) ctx->tarFHpos);
1160 		snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) ctx->tarNextMember);
1161 		ahlog(AH, 4, "moving from position %s to next member at file position %s\n",
1162 			  buf1, buf2);
1163 
1164 		while (ctx->tarFHpos < ctx->tarNextMember)
1165 			_tarReadRaw(AH, &c, 1, NULL, ctx->tarFH);
1166 	}
1167 
1168 	{
1169 		char		buf[100];
1170 
1171 		snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ctx->tarFHpos);
1172 		ahlog(AH, 4, "now at file position %s\n", buf);
1173 	}
1174 
1175 	/* We are at the start of the file, or at the next member */
1176 
1177 	/* Get the header */
1178 	if (!_tarGetHeader(AH, th))
1179 	{
1180 		if (filename)
1181 			exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1182 		else
1183 		{
1184 			/*
1185 			 * We're just scanning the archive for the next file, so return
1186 			 * null
1187 			 */
1188 			free(th);
1189 			return NULL;
1190 		}
1191 	}
1192 
1193 	while (filename != NULL && strcmp(th->targetFile, filename) != 0)
1194 	{
1195 		ahlog(AH, 4, "skipping tar member %s\n", th->targetFile);
1196 
1197 		id = atoi(th->targetFile);
1198 		if ((TocIDRequired(AH, id) & REQ_DATA) != 0)
1199 			exit_horribly(modulename, "restoring data out of order is not supported in this archive format: "
1200 						  "\"%s\" is required, but comes before \"%s\" in the archive file.\n",
1201 						  th->targetFile, filename);
1202 
1203 		/* Header doesn't match, so read to next header */
1204 		len = ((th->fileLen + 511) & ~511);		/* Padded length */
1205 		blks = len >> 9;		/* # of 512 byte blocks */
1206 
1207 		for (i = 0; i < blks; i++)
1208 			_tarReadRaw(AH, &header[0], 512, NULL, ctx->tarFH);
1209 
1210 		if (!_tarGetHeader(AH, th))
1211 			exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1212 	}
1213 
1214 	ctx->tarNextMember = ctx->tarFHpos + ((th->fileLen + 511) & ~511);
1215 	th->pos = 0;
1216 
1217 	return th;
1218 }
1219 
1220 /* Read & verify a header */
1221 static int
_tarGetHeader(ArchiveHandle * AH,TAR_MEMBER * th)1222 _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
1223 {
1224 	lclContext *ctx = (lclContext *) AH->formatData;
1225 	char		h[512];
1226 	char		tag[100 + 1];
1227 	int			sum,
1228 				chk;
1229 	pgoff_t		len;
1230 	pgoff_t		hPos;
1231 	bool		gotBlock = false;
1232 
1233 	while (!gotBlock)
1234 	{
1235 		/* Save the pos for reporting purposes */
1236 		hPos = ctx->tarFHpos;
1237 
1238 		/* Read a 512 byte block, return EOF, exit if short */
1239 		len = _tarReadRaw(AH, h, 512, NULL, ctx->tarFH);
1240 		if (len == 0)			/* EOF */
1241 			return 0;
1242 
1243 		if (len != 512)
1244 			exit_horribly(modulename,
1245 						  ngettext("incomplete tar header found (%lu byte)\n",
1246 								 "incomplete tar header found (%lu bytes)\n",
1247 								   len),
1248 						  (unsigned long) len);
1249 
1250 		/* Calc checksum */
1251 		chk = tarChecksum(h);
1252 		sum = read_tar_number(&h[148], 8);
1253 
1254 		/*
1255 		 * If the checksum failed, see if it is a null block. If so, silently
1256 		 * continue to the next block.
1257 		 */
1258 		if (chk == sum)
1259 			gotBlock = true;
1260 		else
1261 		{
1262 			int			i;
1263 
1264 			for (i = 0; i < 512; i++)
1265 			{
1266 				if (h[i] != 0)
1267 				{
1268 					gotBlock = true;
1269 					break;
1270 				}
1271 			}
1272 		}
1273 	}
1274 
1275 	/* Name field is 100 bytes, might not be null-terminated */
1276 	strlcpy(tag, &h[0], 100 + 1);
1277 
1278 	len = read_tar_number(&h[124], 12);
1279 
1280 	{
1281 		char		posbuf[32];
1282 		char		lenbuf[32];
1283 
1284 		snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
1285 		snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
1286 		ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n",
1287 			  tag, posbuf, lenbuf, sum);
1288 	}
1289 
1290 	if (chk != sum)
1291 	{
1292 		char		posbuf[32];
1293 
1294 		snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
1295 				 (uint64) ftello(ctx->tarFH));
1296 		exit_horribly(modulename,
1297 					  "corrupt tar header found in %s "
1298 					  "(expected %d, computed %d) file position %s\n",
1299 					  tag, sum, chk, posbuf);
1300 	}
1301 
1302 	th->targetFile = pg_strdup(tag);
1303 	th->fileLen = len;
1304 
1305 	return 1;
1306 }
1307 
1308 
1309 static void
_tarWriteHeader(TAR_MEMBER * th)1310 _tarWriteHeader(TAR_MEMBER *th)
1311 {
1312 	char		h[512];
1313 
1314 	tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
1315 					0600, 04000, 02000, time(NULL));
1316 
1317 	/* Now write the completed header. */
1318 	if (fwrite(h, 1, 512, th->tarFH) != 512)
1319 		WRITE_ERROR_EXIT;
1320 }
1321