1 /*-------------------------------------------------------------------------
2 *
3 * pg_backup_tar.c
4 *
5 * This file is copied from the 'files' format file, but dumps data into
6 * one temp file then sends it to the output TAR archive.
7 *
8 * The tar format also includes a 'restore.sql' script which is there for
9 * the benefit of humans. This script is never used by pg_restore.
10 *
11 * NOTE: If you untar the created 'tar' file, the resulting files are
12 * compatible with the 'directory' format. Please keep the two formats in
13 * sync.
14 *
15 * See the headers to pg_backup_directory & pg_restore for more details.
16 *
17 * Copyright (c) 2000, Philip Warner
18 * Rights are granted to use this software in any way so long
19 * as this notice is not removed.
20 *
21 * The author is not responsible for loss or damages that may
22 * result from it's use.
23 *
24 *
25 * IDENTIFICATION
26 * src/bin/pg_dump/pg_backup_tar.c
27 *
28 *-------------------------------------------------------------------------
29 */
30 #include "postgres_fe.h"
31
32 #include "pg_backup_archiver.h"
33 #include "pg_backup_tar.h"
34 #include "pg_backup_utils.h"
35 #include "pgtar.h"
36 #include "fe_utils/string_utils.h"
37
38 #include <sys/stat.h>
39 #include <ctype.h>
40 #include <limits.h>
41 #include <unistd.h>
42
43 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
44 static void _StartData(ArchiveHandle *AH, TocEntry *te);
45 static void _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
46 static void _EndData(ArchiveHandle *AH, TocEntry *te);
47 static int _WriteByte(ArchiveHandle *AH, const int i);
48 static int _ReadByte(ArchiveHandle *);
49 static void _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
50 static void _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
51 static void _CloseArchive(ArchiveHandle *AH);
52 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te);
53 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
54 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
55 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
56
57 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
58 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
59 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
60 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
61
62 #define K_STD_BUF_SIZE 1024
63
64
65 typedef struct
66 {
67 #ifdef HAVE_LIBZ
68 gzFile zFH;
69 #else
70 FILE *zFH;
71 #endif
72 FILE *nFH;
73 FILE *tarFH;
74 FILE *tmpFH;
75 char *targetFile;
76 char mode;
77 pgoff_t pos;
78 pgoff_t fileLen;
79 ArchiveHandle *AH;
80 } TAR_MEMBER;
81
82 typedef struct
83 {
84 int hasSeek;
85 pgoff_t filePos;
86 TAR_MEMBER *blobToc;
87 FILE *tarFH;
88 pgoff_t tarFHpos;
89 pgoff_t tarNextMember;
90 TAR_MEMBER *FH;
91 int isSpecialScript;
92 TAR_MEMBER *scriptTH;
93 } lclContext;
94
95 typedef struct
96 {
97 TAR_MEMBER *TH;
98 char *filename;
99 } lclTocEntry;
100
101 /* translator: this is a module name */
102 static const char *modulename = gettext_noop("tar archiver");
103
104 static void _LoadBlobs(ArchiveHandle *AH);
105
106 static TAR_MEMBER *tarOpen(ArchiveHandle *AH, const char *filename, char mode);
107 static void tarClose(ArchiveHandle *AH, TAR_MEMBER *TH);
108
109 #ifdef __NOT_USED__
110 static char *tarGets(char *buf, size_t len, TAR_MEMBER *th);
111 #endif
112 static int tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...) pg_attribute_printf(3, 4);
113
114 static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th);
115 static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename);
116 static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th);
117 static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th);
118 static void _tarWriteHeader(TAR_MEMBER *th);
119 static int _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th);
120 static size_t _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh);
121
122 static size_t _scriptOut(ArchiveHandle *AH, const void *buf, size_t len);
123
124 /*
125 * Initializer
126 */
127 void
InitArchiveFmt_Tar(ArchiveHandle * AH)128 InitArchiveFmt_Tar(ArchiveHandle *AH)
129 {
130 lclContext *ctx;
131
132 /* Assuming static functions, this can be copied for each format. */
133 AH->ArchiveEntryPtr = _ArchiveEntry;
134 AH->StartDataPtr = _StartData;
135 AH->WriteDataPtr = _WriteData;
136 AH->EndDataPtr = _EndData;
137 AH->WriteBytePtr = _WriteByte;
138 AH->ReadBytePtr = _ReadByte;
139 AH->WriteBufPtr = _WriteBuf;
140 AH->ReadBufPtr = _ReadBuf;
141 AH->ClosePtr = _CloseArchive;
142 AH->ReopenPtr = NULL;
143 AH->PrintTocDataPtr = _PrintTocData;
144 AH->ReadExtraTocPtr = _ReadExtraToc;
145 AH->WriteExtraTocPtr = _WriteExtraToc;
146 AH->PrintExtraTocPtr = _PrintExtraToc;
147
148 AH->StartBlobsPtr = _StartBlobs;
149 AH->StartBlobPtr = _StartBlob;
150 AH->EndBlobPtr = _EndBlob;
151 AH->EndBlobsPtr = _EndBlobs;
152 AH->ClonePtr = NULL;
153 AH->DeClonePtr = NULL;
154
155 AH->MasterStartParallelItemPtr = NULL;
156 AH->MasterEndParallelItemPtr = NULL;
157
158 AH->WorkerJobDumpPtr = NULL;
159 AH->WorkerJobRestorePtr = NULL;
160
161 /*
162 * Set up some special context used in compressing data.
163 */
164 ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
165 AH->formatData = (void *) ctx;
166 ctx->filePos = 0;
167 ctx->isSpecialScript = 0;
168
169 /* Initialize LO buffering */
170 AH->lo_buf_size = LOBBUFSIZE;
171 AH->lo_buf = (void *) pg_malloc(LOBBUFSIZE);
172
173 /*
174 * Now open the tar file, and load the TOC if we're in read mode.
175 */
176 if (AH->mode == archModeWrite)
177 {
178 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
179 {
180 ctx->tarFH = fopen(AH->fSpec, PG_BINARY_W);
181 if (ctx->tarFH == NULL)
182 exit_horribly(modulename,
183 "could not open TOC file \"%s\" for output: %s\n",
184 AH->fSpec, strerror(errno));
185 }
186 else
187 {
188 ctx->tarFH = stdout;
189 if (ctx->tarFH == NULL)
190 exit_horribly(modulename,
191 "could not open TOC file for output: %s\n",
192 strerror(errno));
193 }
194
195 ctx->tarFHpos = 0;
196
197 /*
198 * Make unbuffered since we will dup() it, and the buffers screw each
199 * other
200 */
201 /* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
202
203 ctx->hasSeek = checkSeek(ctx->tarFH);
204
205 /*
206 * We don't support compression because reading the files back is not
207 * possible since gzdopen uses buffered IO which totally screws file
208 * positioning.
209 */
210 if (AH->compression != 0)
211 exit_horribly(modulename,
212 "compression is not supported by tar archive format\n");
213 }
214 else
215 { /* Read Mode */
216 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
217 {
218 ctx->tarFH = fopen(AH->fSpec, PG_BINARY_R);
219 if (ctx->tarFH == NULL)
220 exit_horribly(modulename, "could not open TOC file \"%s\" for input: %s\n",
221 AH->fSpec, strerror(errno));
222 }
223 else
224 {
225 ctx->tarFH = stdin;
226 if (ctx->tarFH == NULL)
227 exit_horribly(modulename, "could not open TOC file for input: %s\n",
228 strerror(errno));
229 }
230
231 /*
232 * Make unbuffered since we will dup() it, and the buffers screw each
233 * other
234 */
235 /* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
236
237 ctx->tarFHpos = 0;
238
239 ctx->hasSeek = checkSeek(ctx->tarFH);
240
241 ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
242 ReadHead(AH);
243 ReadToc(AH);
244 tarClose(AH, ctx->FH); /* Nothing else in the file... */
245 }
246 }
247
248 /*
249 * - Start a new TOC entry
250 * Setup the output file name.
251 */
252 static void
_ArchiveEntry(ArchiveHandle * AH,TocEntry * te)253 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
254 {
255 lclTocEntry *ctx;
256 char fn[K_STD_BUF_SIZE];
257
258 ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
259 if (te->dataDumper != NULL)
260 {
261 #ifdef HAVE_LIBZ
262 if (AH->compression == 0)
263 sprintf(fn, "%d.dat", te->dumpId);
264 else
265 sprintf(fn, "%d.dat.gz", te->dumpId);
266 #else
267 sprintf(fn, "%d.dat", te->dumpId);
268 #endif
269 ctx->filename = pg_strdup(fn);
270 }
271 else
272 {
273 ctx->filename = NULL;
274 ctx->TH = NULL;
275 }
276 te->formatData = (void *) ctx;
277 }
278
279 static void
_WriteExtraToc(ArchiveHandle * AH,TocEntry * te)280 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
281 {
282 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
283
284 if (ctx->filename)
285 WriteStr(AH, ctx->filename);
286 else
287 WriteStr(AH, "");
288 }
289
290 static void
_ReadExtraToc(ArchiveHandle * AH,TocEntry * te)291 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
292 {
293 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
294
295 if (ctx == NULL)
296 {
297 ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
298 te->formatData = (void *) ctx;
299 }
300
301 ctx->filename = ReadStr(AH);
302 if (strlen(ctx->filename) == 0)
303 {
304 free(ctx->filename);
305 ctx->filename = NULL;
306 }
307 ctx->TH = NULL;
308 }
309
310 static void
_PrintExtraToc(ArchiveHandle * AH,TocEntry * te)311 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
312 {
313 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
314
315 if (AH->public.verbose && ctx->filename != NULL)
316 ahprintf(AH, "-- File: %s\n", ctx->filename);
317 }
318
319 static void
_StartData(ArchiveHandle * AH,TocEntry * te)320 _StartData(ArchiveHandle *AH, TocEntry *te)
321 {
322 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
323
324 tctx->TH = tarOpen(AH, tctx->filename, 'w');
325 }
326
327 static TAR_MEMBER *
tarOpen(ArchiveHandle * AH,const char * filename,char mode)328 tarOpen(ArchiveHandle *AH, const char *filename, char mode)
329 {
330 lclContext *ctx = (lclContext *) AH->formatData;
331 TAR_MEMBER *tm;
332
333 #ifdef HAVE_LIBZ
334 char fmode[10];
335 #endif
336
337 if (mode == 'r')
338 {
339 tm = _tarPositionTo(AH, filename);
340 if (!tm) /* Not found */
341 {
342 if (filename)
343 {
344 /*
345 * Couldn't find the requested file. Future: do SEEK(0) and
346 * retry.
347 */
348 exit_horribly(modulename, "could not find file \"%s\" in archive\n", filename);
349 }
350 else
351 {
352 /* Any file OK, none left, so return NULL */
353 return NULL;
354 }
355 }
356
357 #ifdef HAVE_LIBZ
358
359 if (AH->compression == 0)
360 tm->nFH = ctx->tarFH;
361 else
362 exit_horribly(modulename, "compression is not supported by tar archive format\n");
363 /* tm->zFH = gzdopen(dup(fileno(ctx->tarFH)), "rb"); */
364 #else
365 tm->nFH = ctx->tarFH;
366 #endif
367 }
368 else
369 {
370 int old_umask;
371
372 tm = pg_malloc0(sizeof(TAR_MEMBER));
373
374 /*
375 * POSIX does not require, but permits, tmpfile() to restrict file
376 * permissions. Given an OS crash after we write data, the filesystem
377 * might retain the data but forget tmpfile()'s unlink(). If so, the
378 * file mode protects confidentiality of the data written.
379 */
380 old_umask = umask(S_IRWXG | S_IRWXO);
381
382 #ifndef WIN32
383 tm->tmpFH = tmpfile();
384 #else
385
386 /*
387 * On WIN32, tmpfile() generates a filename in the root directory,
388 * which requires administrative permissions on certain systems. Loop
389 * until we find a unique file name we can create.
390 */
391 while (1)
392 {
393 char *name;
394 int fd;
395
396 name = _tempnam(NULL, "pg_temp_");
397 if (name == NULL)
398 break;
399 fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY |
400 O_TEMPORARY, S_IRUSR | S_IWUSR);
401 free(name);
402
403 if (fd != -1) /* created a file */
404 {
405 tm->tmpFH = fdopen(fd, "w+b");
406 break;
407 }
408 else if (errno != EEXIST) /* failure other than file exists */
409 break;
410 }
411 #endif
412
413 if (tm->tmpFH == NULL)
414 exit_horribly(modulename, "could not generate temporary file name: %s\n", strerror(errno));
415
416 umask(old_umask);
417
418 #ifdef HAVE_LIBZ
419
420 if (AH->compression != 0)
421 {
422 sprintf(fmode, "wb%d", AH->compression);
423 tm->zFH = gzdopen(dup(fileno(tm->tmpFH)), fmode);
424 if (tm->zFH == NULL)
425 exit_horribly(modulename, "could not open temporary file\n");
426 }
427 else
428 tm->nFH = tm->tmpFH;
429 #else
430
431 tm->nFH = tm->tmpFH;
432 #endif
433
434 tm->AH = AH;
435 tm->targetFile = pg_strdup(filename);
436 }
437
438 tm->mode = mode;
439 tm->tarFH = ctx->tarFH;
440
441 return tm;
442 }
443
444 static void
tarClose(ArchiveHandle * AH,TAR_MEMBER * th)445 tarClose(ArchiveHandle *AH, TAR_MEMBER *th)
446 {
447 /*
448 * Close the GZ file since we dup'd. This will flush the buffers.
449 */
450 if (AH->compression != 0)
451 if (GZCLOSE(th->zFH) != 0)
452 exit_horribly(modulename, "could not close tar member\n");
453
454 if (th->mode == 'w')
455 _tarAddFile(AH, th); /* This will close the temp file */
456
457 /*
458 * else Nothing to do for normal read since we don't dup() normal file
459 * handle, and we don't use temp files.
460 */
461
462 if (th->targetFile)
463 free(th->targetFile);
464
465 th->nFH = NULL;
466 th->zFH = NULL;
467 }
468
469 #ifdef __NOT_USED__
470 static char *
tarGets(char * buf,size_t len,TAR_MEMBER * th)471 tarGets(char *buf, size_t len, TAR_MEMBER *th)
472 {
473 char *s;
474 size_t cnt = 0;
475 char c = ' ';
476 int eof = 0;
477
478 /* Can't read past logical EOF */
479 if (len > (th->fileLen - th->pos))
480 len = th->fileLen - th->pos;
481
482 while (cnt < len && c != '\n')
483 {
484 if (_tarReadRaw(th->AH, &c, 1, th, NULL) <= 0)
485 {
486 eof = 1;
487 break;
488 }
489 buf[cnt++] = c;
490 }
491
492 if (eof && cnt == 0)
493 s = NULL;
494 else
495 {
496 buf[cnt++] = '\0';
497 s = buf;
498 }
499
500 if (s)
501 {
502 len = strlen(s);
503 th->pos += len;
504 }
505
506 return s;
507 }
508 #endif
509
510 /*
511 * Just read bytes from the archive. This is the low level read routine
512 * that is used for ALL reads on a tar file.
513 */
514 static size_t
_tarReadRaw(ArchiveHandle * AH,void * buf,size_t len,TAR_MEMBER * th,FILE * fh)515 _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh)
516 {
517 lclContext *ctx = (lclContext *) AH->formatData;
518 size_t avail;
519 size_t used = 0;
520 size_t res = 0;
521
522 avail = AH->lookaheadLen - AH->lookaheadPos;
523 if (avail > 0)
524 {
525 /* We have some lookahead bytes to use */
526 if (avail >= len) /* Just use the lookahead buffer */
527 used = len;
528 else
529 used = avail;
530
531 /* Copy, and adjust buffer pos */
532 memcpy(buf, AH->lookahead + AH->lookaheadPos, used);
533 AH->lookaheadPos += used;
534
535 /* Adjust required length */
536 len -= used;
537 }
538
539 /* Read the file if len > 0 */
540 if (len > 0)
541 {
542 if (fh)
543 {
544 res = fread(&((char *) buf)[used], 1, len, fh);
545 if (res != len && !feof(fh))
546 READ_ERROR_EXIT(fh);
547 }
548 else if (th)
549 {
550 if (th->zFH)
551 {
552 res = GZREAD(&((char *) buf)[used], 1, len, th->zFH);
553 if (res != len && !GZEOF(th->zFH))
554 {
555 #ifdef HAVE_LIBZ
556 int errnum;
557 const char *errmsg = gzerror(th->zFH, &errnum);
558
559 exit_horribly(modulename,
560 "could not read from input file: %s\n",
561 errnum == Z_ERRNO ? strerror(errno) : errmsg);
562 #else
563 exit_horribly(modulename,
564 "could not read from input file: %s\n",
565 strerror(errno));
566 #endif
567 }
568 }
569 else
570 {
571 res = fread(&((char *) buf)[used], 1, len, th->nFH);
572 if (res != len && !feof(th->nFH))
573 READ_ERROR_EXIT(th->nFH);
574 }
575 }
576 else
577 exit_horribly(modulename, "internal error -- neither th nor fh specified in tarReadRaw()\n");
578 }
579
580 ctx->tarFHpos += res + used;
581
582 return (res + used);
583 }
584
585 static size_t
tarRead(void * buf,size_t len,TAR_MEMBER * th)586 tarRead(void *buf, size_t len, TAR_MEMBER *th)
587 {
588 size_t res;
589
590 if (th->pos + len > th->fileLen)
591 len = th->fileLen - th->pos;
592
593 if (len <= 0)
594 return 0;
595
596 res = _tarReadRaw(th->AH, buf, len, th, NULL);
597
598 th->pos += res;
599
600 return res;
601 }
602
603 static size_t
tarWrite(const void * buf,size_t len,TAR_MEMBER * th)604 tarWrite(const void *buf, size_t len, TAR_MEMBER *th)
605 {
606 size_t res;
607
608 if (th->zFH != NULL)
609 res = GZWRITE(buf, 1, len, th->zFH);
610 else
611 res = fwrite(buf, 1, len, th->nFH);
612
613 th->pos += res;
614 return res;
615 }
616
617 static void
_WriteData(ArchiveHandle * AH,const void * data,size_t dLen)618 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
619 {
620 lclTocEntry *tctx = (lclTocEntry *) AH->currToc->formatData;
621
622 if (tarWrite(data, dLen, tctx->TH) != dLen)
623 WRITE_ERROR_EXIT;
624
625 return;
626 }
627
628 static void
_EndData(ArchiveHandle * AH,TocEntry * te)629 _EndData(ArchiveHandle *AH, TocEntry *te)
630 {
631 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
632
633 /* Close the file */
634 tarClose(AH, tctx->TH);
635 tctx->TH = NULL;
636 }
637
638 /*
639 * Print data for a given file
640 */
641 static void
_PrintFileData(ArchiveHandle * AH,char * filename)642 _PrintFileData(ArchiveHandle *AH, char *filename)
643 {
644 lclContext *ctx = (lclContext *) AH->formatData;
645 char buf[4096];
646 size_t cnt;
647 TAR_MEMBER *th;
648
649 if (!filename)
650 return;
651
652 th = tarOpen(AH, filename, 'r');
653 ctx->FH = th;
654
655 while ((cnt = tarRead(buf, 4095, th)) > 0)
656 {
657 buf[cnt] = '\0';
658 ahwrite(buf, 1, cnt, AH);
659 }
660
661 tarClose(AH, th);
662 }
663
664
665 /*
666 * Print data for a given TOC entry
667 */
668 static void
_PrintTocData(ArchiveHandle * AH,TocEntry * te)669 _PrintTocData(ArchiveHandle *AH, TocEntry *te)
670 {
671 lclContext *ctx = (lclContext *) AH->formatData;
672 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
673 int pos1;
674
675 if (!tctx->filename)
676 return;
677
678 /*
679 * If we're writing the special restore.sql script, emit a suitable
680 * command to include each table's data from the corresponding file.
681 *
682 * In the COPY case this is a bit klugy because the regular COPY command
683 * was already printed before we get control.
684 */
685 if (ctx->isSpecialScript)
686 {
687 if (te->copyStmt)
688 {
689 /* Abort the COPY FROM stdin */
690 ahprintf(AH, "\\.\n");
691
692 /*
693 * The COPY statement should look like "COPY ... FROM stdin;\n",
694 * see dumpTableData().
695 */
696 pos1 = (int) strlen(te->copyStmt) - 13;
697 if (pos1 < 6 || strncmp(te->copyStmt, "COPY ", 5) != 0 ||
698 strcmp(te->copyStmt + pos1, " FROM stdin;\n") != 0)
699 exit_horribly(modulename,
700 "unexpected COPY statement syntax: \"%s\"\n",
701 te->copyStmt);
702
703 /* Emit all but the FROM part ... */
704 ahwrite(te->copyStmt, 1, pos1, AH);
705 /* ... and insert modified FROM */
706 ahprintf(AH, " FROM '$$PATH$$/%s';\n\n", tctx->filename);
707 }
708 else
709 {
710 /* --inserts mode, no worries, just include the data file */
711 ahprintf(AH, "\\i $$PATH$$/%s\n\n", tctx->filename);
712 }
713
714 return;
715 }
716
717 if (strcmp(te->desc, "BLOBS") == 0)
718 _LoadBlobs(AH);
719 else
720 _PrintFileData(AH, tctx->filename);
721 }
722
723 static void
_LoadBlobs(ArchiveHandle * AH)724 _LoadBlobs(ArchiveHandle *AH)
725 {
726 Oid oid;
727 lclContext *ctx = (lclContext *) AH->formatData;
728 TAR_MEMBER *th;
729 size_t cnt;
730 bool foundBlob = false;
731 char buf[4096];
732
733 StartRestoreBlobs(AH);
734
735 th = tarOpen(AH, NULL, 'r'); /* Open next file */
736 while (th != NULL)
737 {
738 ctx->FH = th;
739
740 if (strncmp(th->targetFile, "blob_", 5) == 0)
741 {
742 oid = atooid(&th->targetFile[5]);
743 if (oid != 0)
744 {
745 ahlog(AH, 1, "restoring large object with OID %u\n", oid);
746
747 StartRestoreBlob(AH, oid, AH->public.ropt->dropSchema);
748
749 while ((cnt = tarRead(buf, 4095, th)) > 0)
750 {
751 buf[cnt] = '\0';
752 ahwrite(buf, 1, cnt, AH);
753 }
754 EndRestoreBlob(AH, oid);
755 foundBlob = true;
756 }
757 tarClose(AH, th);
758 }
759 else
760 {
761 tarClose(AH, th);
762
763 /*
764 * Once we have found the first blob, stop at the first non-blob
765 * entry (which will be 'blobs.toc'). This coding would eat all
766 * the rest of the archive if there are no blobs ... but this
767 * function shouldn't be called at all in that case.
768 */
769 if (foundBlob)
770 break;
771 }
772
773 th = tarOpen(AH, NULL, 'r');
774 }
775 EndRestoreBlobs(AH);
776 }
777
778
779 static int
_WriteByte(ArchiveHandle * AH,const int i)780 _WriteByte(ArchiveHandle *AH, const int i)
781 {
782 lclContext *ctx = (lclContext *) AH->formatData;
783 char b = i; /* Avoid endian problems */
784
785 if (tarWrite(&b, 1, ctx->FH) != 1)
786 WRITE_ERROR_EXIT;
787
788 ctx->filePos += 1;
789 return 1;
790 }
791
792 static int
_ReadByte(ArchiveHandle * AH)793 _ReadByte(ArchiveHandle *AH)
794 {
795 lclContext *ctx = (lclContext *) AH->formatData;
796 size_t res;
797 unsigned char c;
798
799 res = tarRead(&c, 1, ctx->FH);
800 if (res != 1)
801 /* We already would have exited for errors on reads, must be EOF */
802 exit_horribly(modulename,
803 "could not read from input file: end of file\n");
804 ctx->filePos += 1;
805 return c;
806 }
807
808 static void
_WriteBuf(ArchiveHandle * AH,const void * buf,size_t len)809 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
810 {
811 lclContext *ctx = (lclContext *) AH->formatData;
812
813 if (tarWrite(buf, len, ctx->FH) != len)
814 WRITE_ERROR_EXIT;
815
816 ctx->filePos += len;
817 }
818
819 static void
_ReadBuf(ArchiveHandle * AH,void * buf,size_t len)820 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
821 {
822 lclContext *ctx = (lclContext *) AH->formatData;
823
824 if (tarRead(buf, len, ctx->FH) != len)
825 /* We already would have exited for errors on reads, must be EOF */
826 exit_horribly(modulename,
827 "could not read from input file: end of file\n");
828
829 ctx->filePos += len;
830 return;
831 }
832
833 static void
_CloseArchive(ArchiveHandle * AH)834 _CloseArchive(ArchiveHandle *AH)
835 {
836 lclContext *ctx = (lclContext *) AH->formatData;
837 TAR_MEMBER *th;
838 RestoreOptions *ropt;
839 RestoreOptions *savRopt;
840 DumpOptions *savDopt;
841 int savVerbose,
842 i;
843
844 if (AH->mode == archModeWrite)
845 {
846 /*
847 * Write the Header & TOC to the archive FIRST
848 */
849 th = tarOpen(AH, "toc.dat", 'w');
850 ctx->FH = th;
851 WriteHead(AH);
852 WriteToc(AH);
853 tarClose(AH, th); /* Not needed any more */
854
855 /*
856 * Now send the data (tables & blobs)
857 */
858 WriteDataChunks(AH, NULL);
859
860 /*
861 * Now this format wants to append a script which does a full restore
862 * if the files have been extracted.
863 */
864 th = tarOpen(AH, "restore.sql", 'w');
865
866 tarPrintf(AH, th, "--\n"
867 "-- NOTE:\n"
868 "--\n"
869 "-- File paths need to be edited. Search for $$PATH$$ and\n"
870 "-- replace it with the path to the directory containing\n"
871 "-- the extracted data files.\n"
872 "--\n");
873
874 AH->CustomOutPtr = _scriptOut;
875
876 ctx->isSpecialScript = 1;
877 ctx->scriptTH = th;
878
879 ropt = NewRestoreOptions();
880 memcpy(ropt, AH->public.ropt, sizeof(RestoreOptions));
881 ropt->filename = NULL;
882 ropt->dropSchema = 1;
883 ropt->compression = 0;
884 ropt->superuser = NULL;
885 ropt->suppressDumpWarnings = true;
886
887 savDopt = AH->public.dopt;
888 savRopt = AH->public.ropt;
889
890 SetArchiveOptions((Archive *) AH, NULL, ropt);
891
892 savVerbose = AH->public.verbose;
893 AH->public.verbose = 0;
894
895 RestoreArchive((Archive *) AH);
896
897 SetArchiveOptions((Archive *) AH, savDopt, savRopt);
898
899 AH->public.verbose = savVerbose;
900
901 tarClose(AH, th);
902
903 ctx->isSpecialScript = 0;
904
905 /*
906 * EOF marker for tar files is two blocks of NULLs.
907 */
908 for (i = 0; i < 512 * 2; i++)
909 {
910 if (fputc(0, ctx->tarFH) == EOF)
911 WRITE_ERROR_EXIT;
912 }
913 }
914
915 AH->FH = NULL;
916 }
917
918 static size_t
_scriptOut(ArchiveHandle * AH,const void * buf,size_t len)919 _scriptOut(ArchiveHandle *AH, const void *buf, size_t len)
920 {
921 lclContext *ctx = (lclContext *) AH->formatData;
922
923 return tarWrite(buf, len, ctx->scriptTH);
924 }
925
926 /*
927 * BLOB support
928 */
929
930 /*
931 * Called by the archiver when starting to save all BLOB DATA (not schema).
932 * This routine should save whatever format-specific information is needed
933 * to read the BLOBs back into memory.
934 *
935 * It is called just prior to the dumper's DataDumper routine.
936 *
937 * Optional, but strongly recommended.
938 *
939 */
940 static void
_StartBlobs(ArchiveHandle * AH,TocEntry * te)941 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
942 {
943 lclContext *ctx = (lclContext *) AH->formatData;
944 char fname[K_STD_BUF_SIZE];
945
946 sprintf(fname, "blobs.toc");
947 ctx->blobToc = tarOpen(AH, fname, 'w');
948 }
949
950 /*
951 * Called by the archiver when the dumper calls StartBlob.
952 *
953 * Mandatory.
954 *
955 * Must save the passed OID for retrieval at restore-time.
956 */
957 static void
_StartBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)958 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
959 {
960 lclContext *ctx = (lclContext *) AH->formatData;
961 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
962 char fname[255];
963 char *sfx;
964
965 if (oid == 0)
966 exit_horribly(modulename, "invalid OID for large object (%u)\n", oid);
967
968 if (AH->compression != 0)
969 sfx = ".gz";
970 else
971 sfx = "";
972
973 sprintf(fname, "blob_%u.dat%s", oid, sfx);
974
975 tarPrintf(AH, ctx->blobToc, "%u %s\n", oid, fname);
976
977 tctx->TH = tarOpen(AH, fname, 'w');
978 }
979
980 /*
981 * Called by the archiver when the dumper calls EndBlob.
982 *
983 * Optional.
984 *
985 */
986 static void
_EndBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)987 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
988 {
989 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
990
991 tarClose(AH, tctx->TH);
992 }
993
994 /*
995 * Called by the archiver when finishing saving all BLOB DATA.
996 *
997 * Optional.
998 *
999 */
1000 static void
_EndBlobs(ArchiveHandle * AH,TocEntry * te)1001 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
1002 {
1003 lclContext *ctx = (lclContext *) AH->formatData;
1004
1005 /* Write out a fake zero OID to mark end-of-blobs. */
1006 /* WriteInt(AH, 0); */
1007
1008 tarClose(AH, ctx->blobToc);
1009 }
1010
1011
1012
1013 /*------------
1014 * TAR Support
1015 *------------
1016 */
1017
1018 static int
tarPrintf(ArchiveHandle * AH,TAR_MEMBER * th,const char * fmt,...)1019 tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...)
1020 {
1021 char *p;
1022 size_t len = 128; /* initial assumption about buffer size */
1023 size_t cnt;
1024
1025 for (;;)
1026 {
1027 va_list args;
1028
1029 /* Allocate work buffer. */
1030 p = (char *) pg_malloc(len);
1031
1032 /* Try to format the data. */
1033 va_start(args, fmt);
1034 cnt = pvsnprintf(p, len, fmt, args);
1035 va_end(args);
1036
1037 if (cnt < len)
1038 break; /* success */
1039
1040 /* Release buffer and loop around to try again with larger len. */
1041 free(p);
1042 len = cnt;
1043 }
1044
1045 cnt = tarWrite(p, cnt, th);
1046 free(p);
1047 return (int) cnt;
1048 }
1049
1050 bool
isValidTarHeader(char * header)1051 isValidTarHeader(char *header)
1052 {
1053 int sum;
1054 int chk = tarChecksum(header);
1055
1056 sum = read_tar_number(&header[148], 8);
1057
1058 if (sum != chk)
1059 return false;
1060
1061 /* POSIX tar format */
1062 if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1063 memcmp(&header[263], "00", 2) == 0)
1064 return true;
1065 /* GNU tar format */
1066 if (memcmp(&header[257], "ustar \0", 8) == 0)
1067 return true;
1068 /* not-quite-POSIX format written by pre-9.3 pg_dump */
1069 if (memcmp(&header[257], "ustar00\0", 8) == 0)
1070 return true;
1071
1072 return false;
1073 }
1074
1075 /* Given the member, write the TAR header & copy the file */
1076 static void
_tarAddFile(ArchiveHandle * AH,TAR_MEMBER * th)1077 _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
1078 {
1079 lclContext *ctx = (lclContext *) AH->formatData;
1080 FILE *tmp = th->tmpFH; /* Grab it for convenience */
1081 char buf[32768];
1082 size_t cnt;
1083 pgoff_t len = 0;
1084 size_t res;
1085 size_t i,
1086 pad;
1087
1088 /*
1089 * Find file len & go back to start.
1090 */
1091 if (fseeko(tmp, 0, SEEK_END) != 0)
1092 exit_horribly(modulename, "error during file seek: %s\n",
1093 strerror(errno));
1094 th->fileLen = ftello(tmp);
1095 if (th->fileLen < 0)
1096 exit_horribly(modulename, "could not determine seek position in archive file: %s\n",
1097 strerror(errno));
1098 if (fseeko(tmp, 0, SEEK_SET) != 0)
1099 exit_horribly(modulename, "error during file seek: %s\n",
1100 strerror(errno));
1101
1102 _tarWriteHeader(th);
1103
1104 while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
1105 {
1106 if ((res = fwrite(buf, 1, cnt, th->tarFH)) != cnt)
1107 WRITE_ERROR_EXIT;
1108 len += res;
1109 }
1110 if (!feof(tmp))
1111 READ_ERROR_EXIT(tmp);
1112
1113 if (fclose(tmp) != 0) /* This *should* delete it... */
1114 exit_horribly(modulename, "could not close temporary file: %s\n",
1115 strerror(errno));
1116
1117 if (len != th->fileLen)
1118 {
1119 char buf1[32],
1120 buf2[32];
1121
1122 snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) len);
1123 snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) th->fileLen);
1124 exit_horribly(modulename, "actual file length (%s) does not match expected (%s)\n",
1125 buf1, buf2);
1126 }
1127
1128 pad = ((len + 511) & ~511) - len;
1129 for (i = 0; i < pad; i++)
1130 {
1131 if (fputc('\0', th->tarFH) == EOF)
1132 WRITE_ERROR_EXIT;
1133 }
1134
1135 ctx->tarFHpos += len + pad;
1136 }
1137
1138 /* Locate the file in the archive, read header and position to data */
1139 static TAR_MEMBER *
_tarPositionTo(ArchiveHandle * AH,const char * filename)1140 _tarPositionTo(ArchiveHandle *AH, const char *filename)
1141 {
1142 lclContext *ctx = (lclContext *) AH->formatData;
1143 TAR_MEMBER *th = pg_malloc0(sizeof(TAR_MEMBER));
1144 char c;
1145 char header[512];
1146 size_t i,
1147 len,
1148 blks;
1149 int id;
1150
1151 th->AH = AH;
1152
1153 /* Go to end of current file, if any */
1154 if (ctx->tarFHpos != 0)
1155 {
1156 char buf1[100],
1157 buf2[100];
1158
1159 snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) ctx->tarFHpos);
1160 snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) ctx->tarNextMember);
1161 ahlog(AH, 4, "moving from position %s to next member at file position %s\n",
1162 buf1, buf2);
1163
1164 while (ctx->tarFHpos < ctx->tarNextMember)
1165 _tarReadRaw(AH, &c, 1, NULL, ctx->tarFH);
1166 }
1167
1168 {
1169 char buf[100];
1170
1171 snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ctx->tarFHpos);
1172 ahlog(AH, 4, "now at file position %s\n", buf);
1173 }
1174
1175 /* We are at the start of the file, or at the next member */
1176
1177 /* Get the header */
1178 if (!_tarGetHeader(AH, th))
1179 {
1180 if (filename)
1181 exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1182 else
1183 {
1184 /*
1185 * We're just scanning the archive for the next file, so return
1186 * null
1187 */
1188 free(th);
1189 return NULL;
1190 }
1191 }
1192
1193 while (filename != NULL && strcmp(th->targetFile, filename) != 0)
1194 {
1195 ahlog(AH, 4, "skipping tar member %s\n", th->targetFile);
1196
1197 id = atoi(th->targetFile);
1198 if ((TocIDRequired(AH, id) & REQ_DATA) != 0)
1199 exit_horribly(modulename, "restoring data out of order is not supported in this archive format: "
1200 "\"%s\" is required, but comes before \"%s\" in the archive file.\n",
1201 th->targetFile, filename);
1202
1203 /* Header doesn't match, so read to next header */
1204 len = ((th->fileLen + 511) & ~511); /* Padded length */
1205 blks = len >> 9; /* # of 512 byte blocks */
1206
1207 for (i = 0; i < blks; i++)
1208 _tarReadRaw(AH, &header[0], 512, NULL, ctx->tarFH);
1209
1210 if (!_tarGetHeader(AH, th))
1211 exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1212 }
1213
1214 ctx->tarNextMember = ctx->tarFHpos + ((th->fileLen + 511) & ~511);
1215 th->pos = 0;
1216
1217 return th;
1218 }
1219
1220 /* Read & verify a header */
1221 static int
_tarGetHeader(ArchiveHandle * AH,TAR_MEMBER * th)1222 _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
1223 {
1224 lclContext *ctx = (lclContext *) AH->formatData;
1225 char h[512];
1226 char tag[100 + 1];
1227 int sum,
1228 chk;
1229 pgoff_t len;
1230 pgoff_t hPos;
1231 bool gotBlock = false;
1232
1233 while (!gotBlock)
1234 {
1235 /* Save the pos for reporting purposes */
1236 hPos = ctx->tarFHpos;
1237
1238 /* Read a 512 byte block, return EOF, exit if short */
1239 len = _tarReadRaw(AH, h, 512, NULL, ctx->tarFH);
1240 if (len == 0) /* EOF */
1241 return 0;
1242
1243 if (len != 512)
1244 exit_horribly(modulename,
1245 ngettext("incomplete tar header found (%lu byte)\n",
1246 "incomplete tar header found (%lu bytes)\n",
1247 len),
1248 (unsigned long) len);
1249
1250 /* Calc checksum */
1251 chk = tarChecksum(h);
1252 sum = read_tar_number(&h[148], 8);
1253
1254 /*
1255 * If the checksum failed, see if it is a null block. If so, silently
1256 * continue to the next block.
1257 */
1258 if (chk == sum)
1259 gotBlock = true;
1260 else
1261 {
1262 int i;
1263
1264 for (i = 0; i < 512; i++)
1265 {
1266 if (h[i] != 0)
1267 {
1268 gotBlock = true;
1269 break;
1270 }
1271 }
1272 }
1273 }
1274
1275 /* Name field is 100 bytes, might not be null-terminated */
1276 strlcpy(tag, &h[0], 100 + 1);
1277
1278 len = read_tar_number(&h[124], 12);
1279
1280 {
1281 char posbuf[32];
1282 char lenbuf[32];
1283
1284 snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
1285 snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
1286 ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n",
1287 tag, posbuf, lenbuf, sum);
1288 }
1289
1290 if (chk != sum)
1291 {
1292 char posbuf[32];
1293
1294 snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
1295 (uint64) ftello(ctx->tarFH));
1296 exit_horribly(modulename,
1297 "corrupt tar header found in %s "
1298 "(expected %d, computed %d) file position %s\n",
1299 tag, sum, chk, posbuf);
1300 }
1301
1302 th->targetFile = pg_strdup(tag);
1303 th->fileLen = len;
1304
1305 return 1;
1306 }
1307
1308
1309 static void
_tarWriteHeader(TAR_MEMBER * th)1310 _tarWriteHeader(TAR_MEMBER *th)
1311 {
1312 char h[512];
1313
1314 tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
1315 0600, 04000, 02000, time(NULL));
1316
1317 /* Now write the completed header. */
1318 if (fwrite(h, 1, 512, th->tarFH) != 512)
1319 WRITE_ERROR_EXIT;
1320 }
1321