1 /*-------------------------------------------------------------------------
2 *
3 * pg_backup_tar.c
4 *
5 * This file is copied from the 'files' format file, but dumps data into
6 * one temp file then sends it to the output TAR archive.
7 *
8 * The tar format also includes a 'restore.sql' script which is there for
9 * the benefit of humans. This script is never used by pg_restore.
10 *
11 * NOTE: If you untar the created 'tar' file, the resulting files are
12 * compatible with the 'directory' format. Please keep the two formats in
13 * sync.
14 *
15 * See the headers to pg_backup_directory & pg_restore for more details.
16 *
17 * Copyright (c) 2000, Philip Warner
18 * Rights are granted to use this software in any way so long
19 * as this notice is not removed.
20 *
21 * The author is not responsible for loss or damages that may
22 * result from it's use.
23 *
24 *
25 * IDENTIFICATION
26 * src/bin/pg_dump/pg_backup_tar.c
27 *
28 *-------------------------------------------------------------------------
29 */
30 #include "postgres_fe.h"
31
32 #include "pg_backup_archiver.h"
33 #include "pg_backup_tar.h"
34 #include "pg_backup_utils.h"
35 #include "pgtar.h"
36 #include "common/file_utils.h"
37 #include "fe_utils/string_utils.h"
38
39 #include <sys/stat.h>
40 #include <ctype.h>
41 #include <limits.h>
42 #include <unistd.h>
43
44 static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
45 static void _StartData(ArchiveHandle *AH, TocEntry *te);
46 static void _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
47 static void _EndData(ArchiveHandle *AH, TocEntry *te);
48 static int _WriteByte(ArchiveHandle *AH, const int i);
49 static int _ReadByte(ArchiveHandle *);
50 static void _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
51 static void _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
52 static void _CloseArchive(ArchiveHandle *AH);
53 static void _PrintTocData(ArchiveHandle *AH, TocEntry *te);
54 static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
55 static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
56 static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
57
58 static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
59 static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
60 static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
61 static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
62
63 #define K_STD_BUF_SIZE 1024
64
65
66 typedef struct
67 {
68 #ifdef HAVE_LIBZ
69 gzFile zFH;
70 #else
71 FILE *zFH;
72 #endif
73 FILE *nFH;
74 FILE *tarFH;
75 FILE *tmpFH;
76 char *targetFile;
77 char mode;
78 pgoff_t pos;
79 pgoff_t fileLen;
80 ArchiveHandle *AH;
81 } TAR_MEMBER;
82
83 typedef struct
84 {
85 int hasSeek;
86 pgoff_t filePos;
87 TAR_MEMBER *blobToc;
88 FILE *tarFH;
89 pgoff_t tarFHpos;
90 pgoff_t tarNextMember;
91 TAR_MEMBER *FH;
92 int isSpecialScript;
93 TAR_MEMBER *scriptTH;
94 } lclContext;
95
96 typedef struct
97 {
98 TAR_MEMBER *TH;
99 char *filename;
100 } lclTocEntry;
101
102 /* translator: this is a module name */
103 static const char *modulename = gettext_noop("tar archiver");
104
105 static void _LoadBlobs(ArchiveHandle *AH);
106
107 static TAR_MEMBER *tarOpen(ArchiveHandle *AH, const char *filename, char mode);
108 static void tarClose(ArchiveHandle *AH, TAR_MEMBER *TH);
109
110 #ifdef __NOT_USED__
111 static char *tarGets(char *buf, size_t len, TAR_MEMBER *th);
112 #endif
113 static int tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...) pg_attribute_printf(3, 4);
114
115 static void _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th);
116 static TAR_MEMBER *_tarPositionTo(ArchiveHandle *AH, const char *filename);
117 static size_t tarRead(void *buf, size_t len, TAR_MEMBER *th);
118 static size_t tarWrite(const void *buf, size_t len, TAR_MEMBER *th);
119 static void _tarWriteHeader(TAR_MEMBER *th);
120 static int _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th);
121 static size_t _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh);
122
123 static size_t _scriptOut(ArchiveHandle *AH, const void *buf, size_t len);
124
125 /*
126 * Initializer
127 */
128 void
InitArchiveFmt_Tar(ArchiveHandle * AH)129 InitArchiveFmt_Tar(ArchiveHandle *AH)
130 {
131 lclContext *ctx;
132
133 /* Assuming static functions, this can be copied for each format. */
134 AH->ArchiveEntryPtr = _ArchiveEntry;
135 AH->StartDataPtr = _StartData;
136 AH->WriteDataPtr = _WriteData;
137 AH->EndDataPtr = _EndData;
138 AH->WriteBytePtr = _WriteByte;
139 AH->ReadBytePtr = _ReadByte;
140 AH->WriteBufPtr = _WriteBuf;
141 AH->ReadBufPtr = _ReadBuf;
142 AH->ClosePtr = _CloseArchive;
143 AH->ReopenPtr = NULL;
144 AH->PrintTocDataPtr = _PrintTocData;
145 AH->ReadExtraTocPtr = _ReadExtraToc;
146 AH->WriteExtraTocPtr = _WriteExtraToc;
147 AH->PrintExtraTocPtr = _PrintExtraToc;
148
149 AH->StartBlobsPtr = _StartBlobs;
150 AH->StartBlobPtr = _StartBlob;
151 AH->EndBlobPtr = _EndBlob;
152 AH->EndBlobsPtr = _EndBlobs;
153 AH->ClonePtr = NULL;
154 AH->DeClonePtr = NULL;
155
156 AH->WorkerJobDumpPtr = NULL;
157 AH->WorkerJobRestorePtr = NULL;
158
159 /*
160 * Set up some special context used in compressing data.
161 */
162 ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
163 AH->formatData = (void *) ctx;
164 ctx->filePos = 0;
165 ctx->isSpecialScript = 0;
166
167 /* Initialize LO buffering */
168 AH->lo_buf_size = LOBBUFSIZE;
169 AH->lo_buf = (void *) pg_malloc(LOBBUFSIZE);
170
171 /*
172 * Now open the tar file, and load the TOC if we're in read mode.
173 */
174 if (AH->mode == archModeWrite)
175 {
176 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
177 {
178 ctx->tarFH = fopen(AH->fSpec, PG_BINARY_W);
179 if (ctx->tarFH == NULL)
180 exit_horribly(modulename,
181 "could not open TOC file \"%s\" for output: %s\n",
182 AH->fSpec, strerror(errno));
183 }
184 else
185 {
186 ctx->tarFH = stdout;
187 if (ctx->tarFH == NULL)
188 exit_horribly(modulename,
189 "could not open TOC file for output: %s\n",
190 strerror(errno));
191 }
192
193 ctx->tarFHpos = 0;
194
195 /*
196 * Make unbuffered since we will dup() it, and the buffers screw each
197 * other
198 */
199 /* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
200
201 ctx->hasSeek = checkSeek(ctx->tarFH);
202
203 /*
204 * We don't support compression because reading the files back is not
205 * possible since gzdopen uses buffered IO which totally screws file
206 * positioning.
207 */
208 if (AH->compression != 0)
209 exit_horribly(modulename,
210 "compression is not supported by tar archive format\n");
211 }
212 else
213 { /* Read Mode */
214 if (AH->fSpec && strcmp(AH->fSpec, "") != 0)
215 {
216 ctx->tarFH = fopen(AH->fSpec, PG_BINARY_R);
217 if (ctx->tarFH == NULL)
218 exit_horribly(modulename, "could not open TOC file \"%s\" for input: %s\n",
219 AH->fSpec, strerror(errno));
220 }
221 else
222 {
223 ctx->tarFH = stdin;
224 if (ctx->tarFH == NULL)
225 exit_horribly(modulename, "could not open TOC file for input: %s\n",
226 strerror(errno));
227 }
228
229 /*
230 * Make unbuffered since we will dup() it, and the buffers screw each
231 * other
232 */
233 /* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
234
235 ctx->tarFHpos = 0;
236
237 ctx->hasSeek = checkSeek(ctx->tarFH);
238
239 ctx->FH = (void *) tarOpen(AH, "toc.dat", 'r');
240 ReadHead(AH);
241 ReadToc(AH);
242 tarClose(AH, ctx->FH); /* Nothing else in the file... */
243 }
244 }
245
246 /*
247 * - Start a new TOC entry
248 * Setup the output file name.
249 */
250 static void
_ArchiveEntry(ArchiveHandle * AH,TocEntry * te)251 _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
252 {
253 lclTocEntry *ctx;
254 char fn[K_STD_BUF_SIZE];
255
256 ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
257 if (te->dataDumper != NULL)
258 {
259 #ifdef HAVE_LIBZ
260 if (AH->compression == 0)
261 sprintf(fn, "%d.dat", te->dumpId);
262 else
263 sprintf(fn, "%d.dat.gz", te->dumpId);
264 #else
265 sprintf(fn, "%d.dat", te->dumpId);
266 #endif
267 ctx->filename = pg_strdup(fn);
268 }
269 else
270 {
271 ctx->filename = NULL;
272 ctx->TH = NULL;
273 }
274 te->formatData = (void *) ctx;
275 }
276
277 static void
_WriteExtraToc(ArchiveHandle * AH,TocEntry * te)278 _WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
279 {
280 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
281
282 if (ctx->filename)
283 WriteStr(AH, ctx->filename);
284 else
285 WriteStr(AH, "");
286 }
287
288 static void
_ReadExtraToc(ArchiveHandle * AH,TocEntry * te)289 _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
290 {
291 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
292
293 if (ctx == NULL)
294 {
295 ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
296 te->formatData = (void *) ctx;
297 }
298
299 ctx->filename = ReadStr(AH);
300 if (strlen(ctx->filename) == 0)
301 {
302 free(ctx->filename);
303 ctx->filename = NULL;
304 }
305 ctx->TH = NULL;
306 }
307
308 static void
_PrintExtraToc(ArchiveHandle * AH,TocEntry * te)309 _PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
310 {
311 lclTocEntry *ctx = (lclTocEntry *) te->formatData;
312
313 if (AH->public.verbose && ctx->filename != NULL)
314 ahprintf(AH, "-- File: %s\n", ctx->filename);
315 }
316
317 static void
_StartData(ArchiveHandle * AH,TocEntry * te)318 _StartData(ArchiveHandle *AH, TocEntry *te)
319 {
320 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
321
322 tctx->TH = tarOpen(AH, tctx->filename, 'w');
323 }
324
325 static TAR_MEMBER *
tarOpen(ArchiveHandle * AH,const char * filename,char mode)326 tarOpen(ArchiveHandle *AH, const char *filename, char mode)
327 {
328 lclContext *ctx = (lclContext *) AH->formatData;
329 TAR_MEMBER *tm;
330
331 #ifdef HAVE_LIBZ
332 char fmode[10];
333 #endif
334
335 if (mode == 'r')
336 {
337 tm = _tarPositionTo(AH, filename);
338 if (!tm) /* Not found */
339 {
340 if (filename)
341 {
342 /*
343 * Couldn't find the requested file. Future: do SEEK(0) and
344 * retry.
345 */
346 exit_horribly(modulename, "could not find file \"%s\" in archive\n", filename);
347 }
348 else
349 {
350 /* Any file OK, none left, so return NULL */
351 return NULL;
352 }
353 }
354
355 #ifdef HAVE_LIBZ
356
357 if (AH->compression == 0)
358 tm->nFH = ctx->tarFH;
359 else
360 exit_horribly(modulename, "compression is not supported by tar archive format\n");
361 /* tm->zFH = gzdopen(dup(fileno(ctx->tarFH)), "rb"); */
362 #else
363 tm->nFH = ctx->tarFH;
364 #endif
365 }
366 else
367 {
368 int old_umask;
369
370 tm = pg_malloc0(sizeof(TAR_MEMBER));
371
372 /*
373 * POSIX does not require, but permits, tmpfile() to restrict file
374 * permissions. Given an OS crash after we write data, the filesystem
375 * might retain the data but forget tmpfile()'s unlink(). If so, the
376 * file mode protects confidentiality of the data written.
377 */
378 old_umask = umask(S_IRWXG | S_IRWXO);
379
380 #ifndef WIN32
381 tm->tmpFH = tmpfile();
382 #else
383
384 /*
385 * On WIN32, tmpfile() generates a filename in the root directory,
386 * which requires administrative permissions on certain systems. Loop
387 * until we find a unique file name we can create.
388 */
389 while (1)
390 {
391 char *name;
392 int fd;
393
394 name = _tempnam(NULL, "pg_temp_");
395 if (name == NULL)
396 break;
397 fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY |
398 O_TEMPORARY, S_IRUSR | S_IWUSR);
399 free(name);
400
401 if (fd != -1) /* created a file */
402 {
403 tm->tmpFH = fdopen(fd, "w+b");
404 break;
405 }
406 else if (errno != EEXIST) /* failure other than file exists */
407 break;
408 }
409 #endif
410
411 if (tm->tmpFH == NULL)
412 exit_horribly(modulename, "could not generate temporary file name: %s\n", strerror(errno));
413
414 umask(old_umask);
415
416 #ifdef HAVE_LIBZ
417
418 if (AH->compression != 0)
419 {
420 sprintf(fmode, "wb%d", AH->compression);
421 tm->zFH = gzdopen(dup(fileno(tm->tmpFH)), fmode);
422 if (tm->zFH == NULL)
423 exit_horribly(modulename, "could not open temporary file\n");
424 }
425 else
426 tm->nFH = tm->tmpFH;
427 #else
428
429 tm->nFH = tm->tmpFH;
430 #endif
431
432 tm->AH = AH;
433 tm->targetFile = pg_strdup(filename);
434 }
435
436 tm->mode = mode;
437 tm->tarFH = ctx->tarFH;
438
439 return tm;
440 }
441
442 static void
tarClose(ArchiveHandle * AH,TAR_MEMBER * th)443 tarClose(ArchiveHandle *AH, TAR_MEMBER *th)
444 {
445 /*
446 * Close the GZ file since we dup'd. This will flush the buffers.
447 */
448 if (AH->compression != 0)
449 if (GZCLOSE(th->zFH) != 0)
450 exit_horribly(modulename, "could not close tar member\n");
451
452 if (th->mode == 'w')
453 _tarAddFile(AH, th); /* This will close the temp file */
454
455 /*
456 * else Nothing to do for normal read since we don't dup() normal file
457 * handle, and we don't use temp files.
458 */
459
460 if (th->targetFile)
461 free(th->targetFile);
462
463 th->nFH = NULL;
464 th->zFH = NULL;
465 }
466
467 #ifdef __NOT_USED__
468 static char *
tarGets(char * buf,size_t len,TAR_MEMBER * th)469 tarGets(char *buf, size_t len, TAR_MEMBER *th)
470 {
471 char *s;
472 size_t cnt = 0;
473 char c = ' ';
474 int eof = 0;
475
476 /* Can't read past logical EOF */
477 if (len > (th->fileLen - th->pos))
478 len = th->fileLen - th->pos;
479
480 while (cnt < len && c != '\n')
481 {
482 if (_tarReadRaw(th->AH, &c, 1, th, NULL) <= 0)
483 {
484 eof = 1;
485 break;
486 }
487 buf[cnt++] = c;
488 }
489
490 if (eof && cnt == 0)
491 s = NULL;
492 else
493 {
494 buf[cnt++] = '\0';
495 s = buf;
496 }
497
498 if (s)
499 {
500 len = strlen(s);
501 th->pos += len;
502 }
503
504 return s;
505 }
506 #endif
507
508 /*
509 * Just read bytes from the archive. This is the low level read routine
510 * that is used for ALL reads on a tar file.
511 */
512 static size_t
_tarReadRaw(ArchiveHandle * AH,void * buf,size_t len,TAR_MEMBER * th,FILE * fh)513 _tarReadRaw(ArchiveHandle *AH, void *buf, size_t len, TAR_MEMBER *th, FILE *fh)
514 {
515 lclContext *ctx = (lclContext *) AH->formatData;
516 size_t avail;
517 size_t used = 0;
518 size_t res = 0;
519
520 avail = AH->lookaheadLen - AH->lookaheadPos;
521 if (avail > 0)
522 {
523 /* We have some lookahead bytes to use */
524 if (avail >= len) /* Just use the lookahead buffer */
525 used = len;
526 else
527 used = avail;
528
529 /* Copy, and adjust buffer pos */
530 memcpy(buf, AH->lookahead + AH->lookaheadPos, used);
531 AH->lookaheadPos += used;
532
533 /* Adjust required length */
534 len -= used;
535 }
536
537 /* Read the file if len > 0 */
538 if (len > 0)
539 {
540 if (fh)
541 {
542 res = fread(&((char *) buf)[used], 1, len, fh);
543 if (res != len && !feof(fh))
544 READ_ERROR_EXIT(fh);
545 }
546 else if (th)
547 {
548 if (th->zFH)
549 {
550 res = GZREAD(&((char *) buf)[used], 1, len, th->zFH);
551 if (res != len && !GZEOF(th->zFH))
552 {
553 #ifdef HAVE_LIBZ
554 int errnum;
555 const char *errmsg = gzerror(th->zFH, &errnum);
556
557 exit_horribly(modulename,
558 "could not read from input file: %s\n",
559 errnum == Z_ERRNO ? strerror(errno) : errmsg);
560 #else
561 exit_horribly(modulename,
562 "could not read from input file: %s\n",
563 strerror(errno));
564 #endif
565 }
566 }
567 else
568 {
569 res = fread(&((char *) buf)[used], 1, len, th->nFH);
570 if (res != len && !feof(th->nFH))
571 READ_ERROR_EXIT(th->nFH);
572 }
573 }
574 else
575 exit_horribly(modulename, "internal error -- neither th nor fh specified in tarReadRaw()\n");
576 }
577
578 ctx->tarFHpos += res + used;
579
580 return (res + used);
581 }
582
583 static size_t
tarRead(void * buf,size_t len,TAR_MEMBER * th)584 tarRead(void *buf, size_t len, TAR_MEMBER *th)
585 {
586 size_t res;
587
588 if (th->pos + len > th->fileLen)
589 len = th->fileLen - th->pos;
590
591 if (len <= 0)
592 return 0;
593
594 res = _tarReadRaw(th->AH, buf, len, th, NULL);
595
596 th->pos += res;
597
598 return res;
599 }
600
601 static size_t
tarWrite(const void * buf,size_t len,TAR_MEMBER * th)602 tarWrite(const void *buf, size_t len, TAR_MEMBER *th)
603 {
604 size_t res;
605
606 if (th->zFH != NULL)
607 res = GZWRITE(buf, 1, len, th->zFH);
608 else
609 res = fwrite(buf, 1, len, th->nFH);
610
611 th->pos += res;
612 return res;
613 }
614
615 static void
_WriteData(ArchiveHandle * AH,const void * data,size_t dLen)616 _WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
617 {
618 lclTocEntry *tctx = (lclTocEntry *) AH->currToc->formatData;
619
620 if (tarWrite(data, dLen, tctx->TH) != dLen)
621 WRITE_ERROR_EXIT;
622
623 return;
624 }
625
626 static void
_EndData(ArchiveHandle * AH,TocEntry * te)627 _EndData(ArchiveHandle *AH, TocEntry *te)
628 {
629 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
630
631 /* Close the file */
632 tarClose(AH, tctx->TH);
633 tctx->TH = NULL;
634 }
635
636 /*
637 * Print data for a given file
638 */
639 static void
_PrintFileData(ArchiveHandle * AH,char * filename)640 _PrintFileData(ArchiveHandle *AH, char *filename)
641 {
642 lclContext *ctx = (lclContext *) AH->formatData;
643 char buf[4096];
644 size_t cnt;
645 TAR_MEMBER *th;
646
647 if (!filename)
648 return;
649
650 th = tarOpen(AH, filename, 'r');
651 ctx->FH = th;
652
653 while ((cnt = tarRead(buf, 4095, th)) > 0)
654 {
655 buf[cnt] = '\0';
656 ahwrite(buf, 1, cnt, AH);
657 }
658
659 tarClose(AH, th);
660 }
661
662
663 /*
664 * Print data for a given TOC entry
665 */
666 static void
_PrintTocData(ArchiveHandle * AH,TocEntry * te)667 _PrintTocData(ArchiveHandle *AH, TocEntry *te)
668 {
669 lclContext *ctx = (lclContext *) AH->formatData;
670 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
671 int pos1;
672
673 if (!tctx->filename)
674 return;
675
676 /*
677 * If we're writing the special restore.sql script, emit a suitable
678 * command to include each table's data from the corresponding file.
679 *
680 * In the COPY case this is a bit klugy because the regular COPY command
681 * was already printed before we get control.
682 */
683 if (ctx->isSpecialScript)
684 {
685 if (te->copyStmt)
686 {
687 /* Abort the COPY FROM stdin */
688 ahprintf(AH, "\\.\n");
689
690 /*
691 * The COPY statement should look like "COPY ... FROM stdin;\n",
692 * see dumpTableData().
693 */
694 pos1 = (int) strlen(te->copyStmt) - 13;
695 if (pos1 < 6 || strncmp(te->copyStmt, "COPY ", 5) != 0 ||
696 strcmp(te->copyStmt + pos1, " FROM stdin;\n") != 0)
697 exit_horribly(modulename,
698 "unexpected COPY statement syntax: \"%s\"\n",
699 te->copyStmt);
700
701 /* Emit all but the FROM part ... */
702 ahwrite(te->copyStmt, 1, pos1, AH);
703 /* ... and insert modified FROM */
704 ahprintf(AH, " FROM '$$PATH$$/%s';\n\n", tctx->filename);
705 }
706 else
707 {
708 /* --inserts mode, no worries, just include the data file */
709 ahprintf(AH, "\\i $$PATH$$/%s\n\n", tctx->filename);
710 }
711
712 return;
713 }
714
715 if (strcmp(te->desc, "BLOBS") == 0)
716 _LoadBlobs(AH);
717 else
718 _PrintFileData(AH, tctx->filename);
719 }
720
721 static void
_LoadBlobs(ArchiveHandle * AH)722 _LoadBlobs(ArchiveHandle *AH)
723 {
724 Oid oid;
725 lclContext *ctx = (lclContext *) AH->formatData;
726 TAR_MEMBER *th;
727 size_t cnt;
728 bool foundBlob = false;
729 char buf[4096];
730
731 StartRestoreBlobs(AH);
732
733 th = tarOpen(AH, NULL, 'r'); /* Open next file */
734 while (th != NULL)
735 {
736 ctx->FH = th;
737
738 if (strncmp(th->targetFile, "blob_", 5) == 0)
739 {
740 oid = atooid(&th->targetFile[5]);
741 if (oid != 0)
742 {
743 ahlog(AH, 1, "restoring large object with OID %u\n", oid);
744
745 StartRestoreBlob(AH, oid, AH->public.ropt->dropSchema);
746
747 while ((cnt = tarRead(buf, 4095, th)) > 0)
748 {
749 buf[cnt] = '\0';
750 ahwrite(buf, 1, cnt, AH);
751 }
752 EndRestoreBlob(AH, oid);
753 foundBlob = true;
754 }
755 tarClose(AH, th);
756 }
757 else
758 {
759 tarClose(AH, th);
760
761 /*
762 * Once we have found the first blob, stop at the first non-blob
763 * entry (which will be 'blobs.toc'). This coding would eat all
764 * the rest of the archive if there are no blobs ... but this
765 * function shouldn't be called at all in that case.
766 */
767 if (foundBlob)
768 break;
769 }
770
771 th = tarOpen(AH, NULL, 'r');
772 }
773 EndRestoreBlobs(AH);
774 }
775
776
777 static int
_WriteByte(ArchiveHandle * AH,const int i)778 _WriteByte(ArchiveHandle *AH, const int i)
779 {
780 lclContext *ctx = (lclContext *) AH->formatData;
781 char b = i; /* Avoid endian problems */
782
783 if (tarWrite(&b, 1, ctx->FH) != 1)
784 WRITE_ERROR_EXIT;
785
786 ctx->filePos += 1;
787 return 1;
788 }
789
790 static int
_ReadByte(ArchiveHandle * AH)791 _ReadByte(ArchiveHandle *AH)
792 {
793 lclContext *ctx = (lclContext *) AH->formatData;
794 size_t res;
795 unsigned char c;
796
797 res = tarRead(&c, 1, ctx->FH);
798 if (res != 1)
799 /* We already would have exited for errors on reads, must be EOF */
800 exit_horribly(modulename,
801 "could not read from input file: end of file\n");
802 ctx->filePos += 1;
803 return c;
804 }
805
806 static void
_WriteBuf(ArchiveHandle * AH,const void * buf,size_t len)807 _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
808 {
809 lclContext *ctx = (lclContext *) AH->formatData;
810
811 if (tarWrite(buf, len, ctx->FH) != len)
812 WRITE_ERROR_EXIT;
813
814 ctx->filePos += len;
815 }
816
817 static void
_ReadBuf(ArchiveHandle * AH,void * buf,size_t len)818 _ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
819 {
820 lclContext *ctx = (lclContext *) AH->formatData;
821
822 if (tarRead(buf, len, ctx->FH) != len)
823 /* We already would have exited for errors on reads, must be EOF */
824 exit_horribly(modulename,
825 "could not read from input file: end of file\n");
826
827 ctx->filePos += len;
828 return;
829 }
830
831 static void
_CloseArchive(ArchiveHandle * AH)832 _CloseArchive(ArchiveHandle *AH)
833 {
834 lclContext *ctx = (lclContext *) AH->formatData;
835 TAR_MEMBER *th;
836 RestoreOptions *ropt;
837 RestoreOptions *savRopt;
838 DumpOptions *savDopt;
839 int savVerbose,
840 i;
841
842 if (AH->mode == archModeWrite)
843 {
844 /*
845 * Write the Header & TOC to the archive FIRST
846 */
847 th = tarOpen(AH, "toc.dat", 'w');
848 ctx->FH = th;
849 WriteHead(AH);
850 WriteToc(AH);
851 tarClose(AH, th); /* Not needed any more */
852
853 /*
854 * Now send the data (tables & blobs)
855 */
856 WriteDataChunks(AH, NULL);
857
858 /*
859 * Now this format wants to append a script which does a full restore
860 * if the files have been extracted.
861 */
862 th = tarOpen(AH, "restore.sql", 'w');
863
864 tarPrintf(AH, th, "--\n"
865 "-- NOTE:\n"
866 "--\n"
867 "-- File paths need to be edited. Search for $$PATH$$ and\n"
868 "-- replace it with the path to the directory containing\n"
869 "-- the extracted data files.\n"
870 "--\n");
871
872 AH->CustomOutPtr = _scriptOut;
873
874 ctx->isSpecialScript = 1;
875 ctx->scriptTH = th;
876
877 ropt = NewRestoreOptions();
878 memcpy(ropt, AH->public.ropt, sizeof(RestoreOptions));
879 ropt->filename = NULL;
880 ropt->dropSchema = 1;
881 ropt->compression = 0;
882 ropt->superuser = NULL;
883 ropt->suppressDumpWarnings = true;
884
885 savDopt = AH->public.dopt;
886 savRopt = AH->public.ropt;
887
888 SetArchiveOptions((Archive *) AH, NULL, ropt);
889
890 savVerbose = AH->public.verbose;
891 AH->public.verbose = 0;
892
893 RestoreArchive((Archive *) AH);
894
895 SetArchiveOptions((Archive *) AH, savDopt, savRopt);
896
897 AH->public.verbose = savVerbose;
898
899 tarClose(AH, th);
900
901 ctx->isSpecialScript = 0;
902
903 /*
904 * EOF marker for tar files is two blocks of NULLs.
905 */
906 for (i = 0; i < 512 * 2; i++)
907 {
908 if (fputc(0, ctx->tarFH) == EOF)
909 WRITE_ERROR_EXIT;
910 }
911
912 /* Sync the output file if one is defined */
913 if (AH->dosync && AH->fSpec)
914 (void) fsync_fname(AH->fSpec, false, progname);
915 }
916
917 AH->FH = NULL;
918 }
919
920 static size_t
_scriptOut(ArchiveHandle * AH,const void * buf,size_t len)921 _scriptOut(ArchiveHandle *AH, const void *buf, size_t len)
922 {
923 lclContext *ctx = (lclContext *) AH->formatData;
924
925 return tarWrite(buf, len, ctx->scriptTH);
926 }
927
928 /*
929 * BLOB support
930 */
931
932 /*
933 * Called by the archiver when starting to save all BLOB DATA (not schema).
934 * This routine should save whatever format-specific information is needed
935 * to read the BLOBs back into memory.
936 *
937 * It is called just prior to the dumper's DataDumper routine.
938 *
939 * Optional, but strongly recommended.
940 *
941 */
942 static void
_StartBlobs(ArchiveHandle * AH,TocEntry * te)943 _StartBlobs(ArchiveHandle *AH, TocEntry *te)
944 {
945 lclContext *ctx = (lclContext *) AH->formatData;
946 char fname[K_STD_BUF_SIZE];
947
948 sprintf(fname, "blobs.toc");
949 ctx->blobToc = tarOpen(AH, fname, 'w');
950 }
951
952 /*
953 * Called by the archiver when the dumper calls StartBlob.
954 *
955 * Mandatory.
956 *
957 * Must save the passed OID for retrieval at restore-time.
958 */
959 static void
_StartBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)960 _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
961 {
962 lclContext *ctx = (lclContext *) AH->formatData;
963 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
964 char fname[255];
965 char *sfx;
966
967 if (oid == 0)
968 exit_horribly(modulename, "invalid OID for large object (%u)\n", oid);
969
970 if (AH->compression != 0)
971 sfx = ".gz";
972 else
973 sfx = "";
974
975 sprintf(fname, "blob_%u.dat%s", oid, sfx);
976
977 tarPrintf(AH, ctx->blobToc, "%u %s\n", oid, fname);
978
979 tctx->TH = tarOpen(AH, fname, 'w');
980 }
981
982 /*
983 * Called by the archiver when the dumper calls EndBlob.
984 *
985 * Optional.
986 *
987 */
988 static void
_EndBlob(ArchiveHandle * AH,TocEntry * te,Oid oid)989 _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
990 {
991 lclTocEntry *tctx = (lclTocEntry *) te->formatData;
992
993 tarClose(AH, tctx->TH);
994 }
995
996 /*
997 * Called by the archiver when finishing saving all BLOB DATA.
998 *
999 * Optional.
1000 *
1001 */
1002 static void
_EndBlobs(ArchiveHandle * AH,TocEntry * te)1003 _EndBlobs(ArchiveHandle *AH, TocEntry *te)
1004 {
1005 lclContext *ctx = (lclContext *) AH->formatData;
1006
1007 /* Write out a fake zero OID to mark end-of-blobs. */
1008 /* WriteInt(AH, 0); */
1009
1010 tarClose(AH, ctx->blobToc);
1011 }
1012
1013
1014
1015 /*------------
1016 * TAR Support
1017 *------------
1018 */
1019
1020 static int
tarPrintf(ArchiveHandle * AH,TAR_MEMBER * th,const char * fmt,...)1021 tarPrintf(ArchiveHandle *AH, TAR_MEMBER *th, const char *fmt,...)
1022 {
1023 char *p;
1024 size_t len = 128; /* initial assumption about buffer size */
1025 size_t cnt;
1026
1027 for (;;)
1028 {
1029 va_list args;
1030
1031 /* Allocate work buffer. */
1032 p = (char *) pg_malloc(len);
1033
1034 /* Try to format the data. */
1035 va_start(args, fmt);
1036 cnt = pvsnprintf(p, len, fmt, args);
1037 va_end(args);
1038
1039 if (cnt < len)
1040 break; /* success */
1041
1042 /* Release buffer and loop around to try again with larger len. */
1043 free(p);
1044 len = cnt;
1045 }
1046
1047 cnt = tarWrite(p, cnt, th);
1048 free(p);
1049 return (int) cnt;
1050 }
1051
1052 bool
isValidTarHeader(char * header)1053 isValidTarHeader(char *header)
1054 {
1055 int sum;
1056 int chk = tarChecksum(header);
1057
1058 sum = read_tar_number(&header[148], 8);
1059
1060 if (sum != chk)
1061 return false;
1062
1063 /* POSIX tar format */
1064 if (memcmp(&header[257], "ustar\0", 6) == 0 &&
1065 memcmp(&header[263], "00", 2) == 0)
1066 return true;
1067 /* GNU tar format */
1068 if (memcmp(&header[257], "ustar \0", 8) == 0)
1069 return true;
1070 /* not-quite-POSIX format written by pre-9.3 pg_dump */
1071 if (memcmp(&header[257], "ustar00\0", 8) == 0)
1072 return true;
1073
1074 return false;
1075 }
1076
1077 /* Given the member, write the TAR header & copy the file */
1078 static void
_tarAddFile(ArchiveHandle * AH,TAR_MEMBER * th)1079 _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th)
1080 {
1081 lclContext *ctx = (lclContext *) AH->formatData;
1082 FILE *tmp = th->tmpFH; /* Grab it for convenience */
1083 char buf[32768];
1084 size_t cnt;
1085 pgoff_t len = 0;
1086 size_t res;
1087 size_t i,
1088 pad;
1089
1090 /*
1091 * Find file len & go back to start.
1092 */
1093 if (fseeko(tmp, 0, SEEK_END) != 0)
1094 exit_horribly(modulename, "error during file seek: %s\n",
1095 strerror(errno));
1096 th->fileLen = ftello(tmp);
1097 if (th->fileLen < 0)
1098 exit_horribly(modulename, "could not determine seek position in archive file: %s\n",
1099 strerror(errno));
1100 if (fseeko(tmp, 0, SEEK_SET) != 0)
1101 exit_horribly(modulename, "error during file seek: %s\n",
1102 strerror(errno));
1103
1104 _tarWriteHeader(th);
1105
1106 while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0)
1107 {
1108 if ((res = fwrite(buf, 1, cnt, th->tarFH)) != cnt)
1109 WRITE_ERROR_EXIT;
1110 len += res;
1111 }
1112 if (!feof(tmp))
1113 READ_ERROR_EXIT(tmp);
1114
1115 if (fclose(tmp) != 0) /* This *should* delete it... */
1116 exit_horribly(modulename, "could not close temporary file: %s\n",
1117 strerror(errno));
1118
1119 if (len != th->fileLen)
1120 {
1121 char buf1[32],
1122 buf2[32];
1123
1124 snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) len);
1125 snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) th->fileLen);
1126 exit_horribly(modulename, "actual file length (%s) does not match expected (%s)\n",
1127 buf1, buf2);
1128 }
1129
1130 pad = ((len + 511) & ~511) - len;
1131 for (i = 0; i < pad; i++)
1132 {
1133 if (fputc('\0', th->tarFH) == EOF)
1134 WRITE_ERROR_EXIT;
1135 }
1136
1137 ctx->tarFHpos += len + pad;
1138 }
1139
1140 /* Locate the file in the archive, read header and position to data */
1141 static TAR_MEMBER *
_tarPositionTo(ArchiveHandle * AH,const char * filename)1142 _tarPositionTo(ArchiveHandle *AH, const char *filename)
1143 {
1144 lclContext *ctx = (lclContext *) AH->formatData;
1145 TAR_MEMBER *th = pg_malloc0(sizeof(TAR_MEMBER));
1146 char c;
1147 char header[512];
1148 size_t i,
1149 len,
1150 blks;
1151 int id;
1152
1153 th->AH = AH;
1154
1155 /* Go to end of current file, if any */
1156 if (ctx->tarFHpos != 0)
1157 {
1158 char buf1[100],
1159 buf2[100];
1160
1161 snprintf(buf1, sizeof(buf1), INT64_FORMAT, (int64) ctx->tarFHpos);
1162 snprintf(buf2, sizeof(buf2), INT64_FORMAT, (int64) ctx->tarNextMember);
1163 ahlog(AH, 4, "moving from position %s to next member at file position %s\n",
1164 buf1, buf2);
1165
1166 while (ctx->tarFHpos < ctx->tarNextMember)
1167 _tarReadRaw(AH, &c, 1, NULL, ctx->tarFH);
1168 }
1169
1170 {
1171 char buf[100];
1172
1173 snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ctx->tarFHpos);
1174 ahlog(AH, 4, "now at file position %s\n", buf);
1175 }
1176
1177 /* We are at the start of the file, or at the next member */
1178
1179 /* Get the header */
1180 if (!_tarGetHeader(AH, th))
1181 {
1182 if (filename)
1183 exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1184 else
1185 {
1186 /*
1187 * We're just scanning the archive for the next file, so return
1188 * null
1189 */
1190 free(th);
1191 return NULL;
1192 }
1193 }
1194
1195 while (filename != NULL && strcmp(th->targetFile, filename) != 0)
1196 {
1197 ahlog(AH, 4, "skipping tar member %s\n", th->targetFile);
1198
1199 id = atoi(th->targetFile);
1200 if ((TocIDRequired(AH, id) & REQ_DATA) != 0)
1201 exit_horribly(modulename, "restoring data out of order is not supported in this archive format: "
1202 "\"%s\" is required, but comes before \"%s\" in the archive file.\n",
1203 th->targetFile, filename);
1204
1205 /* Header doesn't match, so read to next header */
1206 len = ((th->fileLen + 511) & ~511); /* Padded length */
1207 blks = len >> 9; /* # of 512 byte blocks */
1208
1209 for (i = 0; i < blks; i++)
1210 _tarReadRaw(AH, &header[0], 512, NULL, ctx->tarFH);
1211
1212 if (!_tarGetHeader(AH, th))
1213 exit_horribly(modulename, "could not find header for file \"%s\" in tar archive\n", filename);
1214 }
1215
1216 ctx->tarNextMember = ctx->tarFHpos + ((th->fileLen + 511) & ~511);
1217 th->pos = 0;
1218
1219 return th;
1220 }
1221
1222 /* Read & verify a header */
1223 static int
_tarGetHeader(ArchiveHandle * AH,TAR_MEMBER * th)1224 _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
1225 {
1226 lclContext *ctx = (lclContext *) AH->formatData;
1227 char h[512];
1228 char tag[100 + 1];
1229 int sum,
1230 chk;
1231 pgoff_t len;
1232 pgoff_t hPos;
1233 bool gotBlock = false;
1234
1235 while (!gotBlock)
1236 {
1237 /* Save the pos for reporting purposes */
1238 hPos = ctx->tarFHpos;
1239
1240 /* Read a 512 byte block, return EOF, exit if short */
1241 len = _tarReadRaw(AH, h, 512, NULL, ctx->tarFH);
1242 if (len == 0) /* EOF */
1243 return 0;
1244
1245 if (len != 512)
1246 exit_horribly(modulename,
1247 ngettext("incomplete tar header found (%lu byte)\n",
1248 "incomplete tar header found (%lu bytes)\n",
1249 len),
1250 (unsigned long) len);
1251
1252 /* Calc checksum */
1253 chk = tarChecksum(h);
1254 sum = read_tar_number(&h[148], 8);
1255
1256 /*
1257 * If the checksum failed, see if it is a null block. If so, silently
1258 * continue to the next block.
1259 */
1260 if (chk == sum)
1261 gotBlock = true;
1262 else
1263 {
1264 int i;
1265
1266 for (i = 0; i < 512; i++)
1267 {
1268 if (h[i] != 0)
1269 {
1270 gotBlock = true;
1271 break;
1272 }
1273 }
1274 }
1275 }
1276
1277 /* Name field is 100 bytes, might not be null-terminated */
1278 strlcpy(tag, &h[0], 100 + 1);
1279
1280 len = read_tar_number(&h[124], 12);
1281
1282 {
1283 char posbuf[32];
1284 char lenbuf[32];
1285
1286 snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos);
1287 snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len);
1288 ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n",
1289 tag, posbuf, lenbuf, sum);
1290 }
1291
1292 if (chk != sum)
1293 {
1294 char posbuf[32];
1295
1296 snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT,
1297 (uint64) ftello(ctx->tarFH));
1298 exit_horribly(modulename,
1299 "corrupt tar header found in %s "
1300 "(expected %d, computed %d) file position %s\n",
1301 tag, sum, chk, posbuf);
1302 }
1303
1304 th->targetFile = pg_strdup(tag);
1305 th->fileLen = len;
1306
1307 return 1;
1308 }
1309
1310
1311 static void
_tarWriteHeader(TAR_MEMBER * th)1312 _tarWriteHeader(TAR_MEMBER *th)
1313 {
1314 char h[512];
1315
1316 tarCreateHeader(h, th->targetFile, NULL, th->fileLen,
1317 0600, 04000, 02000, time(NULL));
1318
1319 /* Now write the completed header. */
1320 if (fwrite(h, 1, 512, th->tarFH) != 512)
1321 WRITE_ERROR_EXIT;
1322 }
1323