1 /*-------------------------------------------------------------------------
2  *
3  * pg_backup_archiver.c
4  *
5  *	Private implementation of the archiver routines.
6  *
7  *	See the headers to pg_restore for more details.
8  *
9  * Copyright (c) 2000, Philip Warner
10  *	Rights are granted to use this software in any way so long
11  *	as this notice is not removed.
12  *
13  *	The author is not responsible for loss or damages that may
14  *	result from its use.
15  *
16  *
17  * IDENTIFICATION
18  *		src/bin/pg_dump/pg_backup_archiver.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres_fe.h"
23 
24 #include <ctype.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/stat.h>
28 #include <sys/wait.h>
29 #ifdef WIN32
30 #include <io.h>
31 #endif
32 
33 #include "common/string.h"
34 #include "dumputils.h"
35 #include "fe_utils/string_utils.h"
36 #include "lib/stringinfo.h"
37 #include "libpq/libpq-fs.h"
38 #include "parallel.h"
39 #include "pg_backup_archiver.h"
40 #include "pg_backup_db.h"
41 #include "pg_backup_utils.h"
42 
43 #define TEXT_DUMP_HEADER "--\n-- PostgreSQL database dump\n--\n\n"
44 #define TEXT_DUMPALL_HEADER "--\n-- PostgreSQL database cluster dump\n--\n\n"
45 
46 /* state needed to save/restore an archive's output target */
47 typedef struct _outputContext
48 {
49 	void	   *OF;
50 	int			gzOut;
51 } OutputContext;
52 
53 /*
54  * State for tracking TocEntrys that are ready to process during a parallel
55  * restore.  (This used to be a list, and we still call it that, though now
56  * it's really an array so that we can apply qsort to it.)
57  *
58  * tes[] is sized large enough that we can't overrun it.
59  * The valid entries are indexed first_te .. last_te inclusive.
60  * We periodically sort the array to bring larger-by-dataLength entries to
61  * the front; "sorted" is true if the valid entries are known sorted.
62  */
63 typedef struct _parallelReadyList
64 {
65 	TocEntry  **tes;			/* Ready-to-dump TocEntrys */
66 	int			first_te;		/* index of first valid entry in tes[] */
67 	int			last_te;		/* index of last valid entry in tes[] */
68 	bool		sorted;			/* are valid entries currently sorted? */
69 } ParallelReadyList;
70 
71 
72 static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
73 							   const int compression, bool dosync, ArchiveMode mode,
74 							   SetupWorkerPtrType setupWorkerPtr);
75 static void _getObjectDescription(PQExpBuffer buf, TocEntry *te);
76 static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
77 static char *sanitize_line(const char *str, bool want_hyphen);
78 static void _doSetFixedOutputState(ArchiveHandle *AH);
79 static void _doSetSessionAuth(ArchiveHandle *AH, const char *user);
80 static void _reconnectToDB(ArchiveHandle *AH, const char *dbname);
81 static void _becomeUser(ArchiveHandle *AH, const char *user);
82 static void _becomeOwner(ArchiveHandle *AH, TocEntry *te);
83 static void _selectOutputSchema(ArchiveHandle *AH, const char *schemaName);
84 static void _selectTablespace(ArchiveHandle *AH, const char *tablespace);
85 static void _selectTableAccessMethod(ArchiveHandle *AH, const char *tableam);
86 static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te);
87 static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te);
88 static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te);
89 static int	_tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH);
90 static RestorePass _tocEntryRestorePass(TocEntry *te);
91 static bool _tocEntryIsACL(TocEntry *te);
92 static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
93 static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
94 static void buildTocEntryArrays(ArchiveHandle *AH);
95 static void _moveBefore(TocEntry *pos, TocEntry *te);
96 static int	_discoverArchiveFormat(ArchiveHandle *AH);
97 
98 static int	RestoringToDB(ArchiveHandle *AH);
99 static void dump_lo_buf(ArchiveHandle *AH);
100 static void dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim);
101 static void SetOutput(ArchiveHandle *AH, const char *filename, int compression);
102 static OutputContext SaveOutput(ArchiveHandle *AH);
103 static void RestoreOutput(ArchiveHandle *AH, OutputContext savedContext);
104 
105 static int	restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel);
106 static void restore_toc_entries_prefork(ArchiveHandle *AH,
107 										TocEntry *pending_list);
108 static void restore_toc_entries_parallel(ArchiveHandle *AH,
109 										 ParallelState *pstate,
110 										 TocEntry *pending_list);
111 static void restore_toc_entries_postfork(ArchiveHandle *AH,
112 										 TocEntry *pending_list);
113 static void pending_list_header_init(TocEntry *l);
114 static void pending_list_append(TocEntry *l, TocEntry *te);
115 static void pending_list_remove(TocEntry *te);
116 static void ready_list_init(ParallelReadyList *ready_list, int tocCount);
117 static void ready_list_free(ParallelReadyList *ready_list);
118 static void ready_list_insert(ParallelReadyList *ready_list, TocEntry *te);
119 static void ready_list_remove(ParallelReadyList *ready_list, int i);
120 static void ready_list_sort(ParallelReadyList *ready_list);
121 static int	TocEntrySizeCompare(const void *p1, const void *p2);
122 static void move_to_ready_list(TocEntry *pending_list,
123 							   ParallelReadyList *ready_list,
124 							   RestorePass pass);
125 static TocEntry *pop_next_work_item(ParallelReadyList *ready_list,
126 									ParallelState *pstate);
127 static void mark_dump_job_done(ArchiveHandle *AH,
128 							   TocEntry *te,
129 							   int status,
130 							   void *callback_data);
131 static void mark_restore_job_done(ArchiveHandle *AH,
132 								  TocEntry *te,
133 								  int status,
134 								  void *callback_data);
135 static void fix_dependencies(ArchiveHandle *AH);
136 static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2);
137 static void repoint_table_dependencies(ArchiveHandle *AH);
138 static void identify_locking_dependencies(ArchiveHandle *AH, TocEntry *te);
139 static void reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
140 								ParallelReadyList *ready_list);
141 static void mark_create_done(ArchiveHandle *AH, TocEntry *te);
142 static void inhibit_data_for_failed_table(ArchiveHandle *AH, TocEntry *te);
143 
144 static void StrictNamesCheck(RestoreOptions *ropt);
145 
146 
147 /*
148  * Allocate a new DumpOptions block containing all default values.
149  */
150 DumpOptions *
NewDumpOptions(void)151 NewDumpOptions(void)
152 {
153 	DumpOptions *opts = (DumpOptions *) pg_malloc(sizeof(DumpOptions));
154 
155 	InitDumpOptions(opts);
156 	return opts;
157 }
158 
159 /*
160  * Initialize a DumpOptions struct to all default values
161  */
162 void
InitDumpOptions(DumpOptions * opts)163 InitDumpOptions(DumpOptions *opts)
164 {
165 	memset(opts, 0, sizeof(DumpOptions));
166 	/* set any fields that shouldn't default to zeroes */
167 	opts->include_everything = true;
168 	opts->cparams.promptPassword = TRI_DEFAULT;
169 	opts->dumpSections = DUMP_UNSECTIONED;
170 }
171 
172 /*
173  * Create a freshly allocated DumpOptions with options equivalent to those
174  * found in the given RestoreOptions.
175  */
176 DumpOptions *
dumpOptionsFromRestoreOptions(RestoreOptions * ropt)177 dumpOptionsFromRestoreOptions(RestoreOptions *ropt)
178 {
179 	DumpOptions *dopt = NewDumpOptions();
180 
181 	/* this is the inverse of what's at the end of pg_dump.c's main() */
182 	dopt->cparams.dbname = ropt->cparams.dbname ? pg_strdup(ropt->cparams.dbname) : NULL;
183 	dopt->cparams.pgport = ropt->cparams.pgport ? pg_strdup(ropt->cparams.pgport) : NULL;
184 	dopt->cparams.pghost = ropt->cparams.pghost ? pg_strdup(ropt->cparams.pghost) : NULL;
185 	dopt->cparams.username = ropt->cparams.username ? pg_strdup(ropt->cparams.username) : NULL;
186 	dopt->cparams.promptPassword = ropt->cparams.promptPassword;
187 	dopt->outputClean = ropt->dropSchema;
188 	dopt->dataOnly = ropt->dataOnly;
189 	dopt->schemaOnly = ropt->schemaOnly;
190 	dopt->if_exists = ropt->if_exists;
191 	dopt->column_inserts = ropt->column_inserts;
192 	dopt->dumpSections = ropt->dumpSections;
193 	dopt->aclsSkip = ropt->aclsSkip;
194 	dopt->outputSuperuser = ropt->superuser;
195 	dopt->outputCreateDB = ropt->createDB;
196 	dopt->outputNoOwner = ropt->noOwner;
197 	dopt->outputNoTablespaces = ropt->noTablespace;
198 	dopt->disable_triggers = ropt->disable_triggers;
199 	dopt->use_setsessauth = ropt->use_setsessauth;
200 	dopt->disable_dollar_quoting = ropt->disable_dollar_quoting;
201 	dopt->dump_inserts = ropt->dump_inserts;
202 	dopt->no_comments = ropt->no_comments;
203 	dopt->no_publications = ropt->no_publications;
204 	dopt->no_security_labels = ropt->no_security_labels;
205 	dopt->no_subscriptions = ropt->no_subscriptions;
206 	dopt->lockWaitTimeout = ropt->lockWaitTimeout;
207 	dopt->include_everything = ropt->include_everything;
208 	dopt->enable_row_security = ropt->enable_row_security;
209 	dopt->sequence_data = ropt->sequence_data;
210 
211 	return dopt;
212 }
213 
214 
215 /*
216  *	Wrapper functions.
217  *
218  *	The objective is to make writing new formats and dumpers as simple
219  *	as possible, if necessary at the expense of extra function calls etc.
220  *
221  */
222 
223 /*
224  * The dump worker setup needs lots of knowledge of the internals of pg_dump,
225  * so it's defined in pg_dump.c and passed into OpenArchive. The restore worker
226  * setup doesn't need to know anything much, so it's defined here.
227  */
228 static void
setupRestoreWorker(Archive * AHX)229 setupRestoreWorker(Archive *AHX)
230 {
231 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
232 
233 	AH->ReopenPtr(AH);
234 }
235 
236 
237 /* Create a new archive */
238 /* Public */
239 Archive *
CreateArchive(const char * FileSpec,const ArchiveFormat fmt,const int compression,bool dosync,ArchiveMode mode,SetupWorkerPtrType setupDumpWorker)240 CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
241 			  const int compression, bool dosync, ArchiveMode mode,
242 			  SetupWorkerPtrType setupDumpWorker)
243 
244 {
245 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression, dosync,
246 								 mode, setupDumpWorker);
247 
248 	return (Archive *) AH;
249 }
250 
251 /* Open an existing archive */
252 /* Public */
253 Archive *
OpenArchive(const char * FileSpec,const ArchiveFormat fmt)254 OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
255 {
256 	ArchiveHandle *AH = _allocAH(FileSpec, fmt, 0, true, archModeRead, setupRestoreWorker);
257 
258 	return (Archive *) AH;
259 }
260 
261 /* Public */
262 void
CloseArchive(Archive * AHX)263 CloseArchive(Archive *AHX)
264 {
265 	int			res = 0;
266 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
267 
268 	AH->ClosePtr(AH);
269 
270 	/* Close the output */
271 	if (AH->gzOut)
272 		res = GZCLOSE(AH->OF);
273 	else if (AH->OF != stdout)
274 		res = fclose(AH->OF);
275 
276 	if (res != 0)
277 		fatal("could not close output file: %m");
278 }
279 
280 /* Public */
281 void
SetArchiveOptions(Archive * AH,DumpOptions * dopt,RestoreOptions * ropt)282 SetArchiveOptions(Archive *AH, DumpOptions *dopt, RestoreOptions *ropt)
283 {
284 	/* Caller can omit dump options, in which case we synthesize them */
285 	if (dopt == NULL && ropt != NULL)
286 		dopt = dumpOptionsFromRestoreOptions(ropt);
287 
288 	/* Save options for later access */
289 	AH->dopt = dopt;
290 	AH->ropt = ropt;
291 }
292 
293 /* Public */
294 void
ProcessArchiveRestoreOptions(Archive * AHX)295 ProcessArchiveRestoreOptions(Archive *AHX)
296 {
297 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
298 	RestoreOptions *ropt = AH->public.ropt;
299 	TocEntry   *te;
300 	teSection	curSection;
301 
302 	/* Decide which TOC entries will be dumped/restored, and mark them */
303 	curSection = SECTION_PRE_DATA;
304 	for (te = AH->toc->next; te != AH->toc; te = te->next)
305 	{
306 		/*
307 		 * When writing an archive, we also take this opportunity to check
308 		 * that we have generated the entries in a sane order that respects
309 		 * the section divisions.  When reading, don't complain, since buggy
310 		 * old versions of pg_dump might generate out-of-order archives.
311 		 */
312 		if (AH->mode != archModeRead)
313 		{
314 			switch (te->section)
315 			{
316 				case SECTION_NONE:
317 					/* ok to be anywhere */
318 					break;
319 				case SECTION_PRE_DATA:
320 					if (curSection != SECTION_PRE_DATA)
321 						pg_log_warning("archive items not in correct section order");
322 					break;
323 				case SECTION_DATA:
324 					if (curSection == SECTION_POST_DATA)
325 						pg_log_warning("archive items not in correct section order");
326 					break;
327 				case SECTION_POST_DATA:
328 					/* ok no matter which section we were in */
329 					break;
330 				default:
331 					fatal("unexpected section code %d",
332 						  (int) te->section);
333 					break;
334 			}
335 		}
336 
337 		if (te->section != SECTION_NONE)
338 			curSection = te->section;
339 
340 		te->reqs = _tocEntryRequired(te, curSection, AH);
341 	}
342 
343 	/* Enforce strict names checking */
344 	if (ropt->strict_names)
345 		StrictNamesCheck(ropt);
346 }
347 
348 /* Public */
349 void
RestoreArchive(Archive * AHX)350 RestoreArchive(Archive *AHX)
351 {
352 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
353 	RestoreOptions *ropt = AH->public.ropt;
354 	bool		parallel_mode;
355 	TocEntry   *te;
356 	OutputContext sav;
357 
358 	AH->stage = STAGE_INITIALIZING;
359 
360 	/*
361 	 * If we're going to do parallel restore, there are some restrictions.
362 	 */
363 	parallel_mode = (AH->public.numWorkers > 1 && ropt->useDB);
364 	if (parallel_mode)
365 	{
366 		/* We haven't got round to making this work for all archive formats */
367 		if (AH->ClonePtr == NULL || AH->ReopenPtr == NULL)
368 			fatal("parallel restore is not supported with this archive file format");
369 
370 		/* Doesn't work if the archive represents dependencies as OIDs */
371 		if (AH->version < K_VERS_1_8)
372 			fatal("parallel restore is not supported with archives made by pre-8.0 pg_dump");
373 
374 		/*
375 		 * It's also not gonna work if we can't reopen the input file, so
376 		 * let's try that immediately.
377 		 */
378 		AH->ReopenPtr(AH);
379 	}
380 
381 	/*
382 	 * Make sure we won't need (de)compression we haven't got
383 	 */
384 #ifndef HAVE_LIBZ
385 	if (AH->compression != 0 && AH->PrintTocDataPtr != NULL)
386 	{
387 		for (te = AH->toc->next; te != AH->toc; te = te->next)
388 		{
389 			if (te->hadDumper && (te->reqs & REQ_DATA) != 0)
390 				fatal("cannot restore from compressed archive (compression not supported in this installation)");
391 		}
392 	}
393 #endif
394 
395 	/*
396 	 * Prepare index arrays, so we can assume we have them throughout restore.
397 	 * It's possible we already did this, though.
398 	 */
399 	if (AH->tocsByDumpId == NULL)
400 		buildTocEntryArrays(AH);
401 
402 	/*
403 	 * If we're using a DB connection, then connect it.
404 	 */
405 	if (ropt->useDB)
406 	{
407 		pg_log_info("connecting to database for restore");
408 		if (AH->version < K_VERS_1_3)
409 			fatal("direct database connections are not supported in pre-1.3 archives");
410 
411 		/*
412 		 * We don't want to guess at whether the dump will successfully
413 		 * restore; allow the attempt regardless of the version of the restore
414 		 * target.
415 		 */
416 		AHX->minRemoteVersion = 0;
417 		AHX->maxRemoteVersion = 9999999;
418 
419 		ConnectDatabase(AHX, &ropt->cparams, false);
420 
421 		/*
422 		 * If we're talking to the DB directly, don't send comments since they
423 		 * obscure SQL when displaying errors
424 		 */
425 		AH->noTocComments = 1;
426 	}
427 
428 	/*
429 	 * Work out if we have an implied data-only restore. This can happen if
430 	 * the dump was data only or if the user has used a toc list to exclude
431 	 * all of the schema data. All we do is look for schema entries - if none
432 	 * are found then we set the dataOnly flag.
433 	 *
434 	 * We could scan for wanted TABLE entries, but that is not the same as
435 	 * dataOnly. At this stage, it seems unnecessary (6-Mar-2001).
436 	 */
437 	if (!ropt->dataOnly)
438 	{
439 		int			impliedDataOnly = 1;
440 
441 		for (te = AH->toc->next; te != AH->toc; te = te->next)
442 		{
443 			if ((te->reqs & REQ_SCHEMA) != 0)
444 			{					/* It's schema, and it's wanted */
445 				impliedDataOnly = 0;
446 				break;
447 			}
448 		}
449 		if (impliedDataOnly)
450 		{
451 			ropt->dataOnly = impliedDataOnly;
452 			pg_log_info("implied data-only restore");
453 		}
454 	}
455 
456 	/*
457 	 * Setup the output file if necessary.
458 	 */
459 	sav = SaveOutput(AH);
460 	if (ropt->filename || ropt->compression)
461 		SetOutput(AH, ropt->filename, ropt->compression);
462 
463 	ahprintf(AH, "--\n-- PostgreSQL database dump\n--\n\n");
464 
465 	if (AH->archiveRemoteVersion)
466 		ahprintf(AH, "-- Dumped from database version %s\n",
467 				 AH->archiveRemoteVersion);
468 	if (AH->archiveDumpVersion)
469 		ahprintf(AH, "-- Dumped by pg_dump version %s\n",
470 				 AH->archiveDumpVersion);
471 
472 	ahprintf(AH, "\n");
473 
474 	if (AH->public.verbose)
475 		dumpTimestamp(AH, "Started on", AH->createDate);
476 
477 	if (ropt->single_txn)
478 	{
479 		if (AH->connection)
480 			StartTransaction(AHX);
481 		else
482 			ahprintf(AH, "BEGIN;\n\n");
483 	}
484 
485 	/*
486 	 * Establish important parameter values right away.
487 	 */
488 	_doSetFixedOutputState(AH);
489 
490 	AH->stage = STAGE_PROCESSING;
491 
492 	/*
493 	 * Drop the items at the start, in reverse order
494 	 */
495 	if (ropt->dropSchema)
496 	{
497 		for (te = AH->toc->prev; te != AH->toc; te = te->prev)
498 		{
499 			AH->currentTE = te;
500 
501 			/*
502 			 * In createDB mode, issue a DROP *only* for the database as a
503 			 * whole.  Issuing drops against anything else would be wrong,
504 			 * because at this point we're connected to the wrong database.
505 			 * (The DATABASE PROPERTIES entry, if any, should be treated like
506 			 * the DATABASE entry.)
507 			 */
508 			if (ropt->createDB)
509 			{
510 				if (strcmp(te->desc, "DATABASE") != 0 &&
511 					strcmp(te->desc, "DATABASE PROPERTIES") != 0)
512 					continue;
513 			}
514 
515 			/* Otherwise, drop anything that's selected and has a dropStmt */
516 			if (((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0) && te->dropStmt)
517 			{
518 				pg_log_info("dropping %s %s", te->desc, te->tag);
519 				/* Select owner and schema as necessary */
520 				_becomeOwner(AH, te);
521 				_selectOutputSchema(AH, te->namespace);
522 
523 				/*
524 				 * Now emit the DROP command, if the object has one.  Note we
525 				 * don't necessarily emit it verbatim; at this point we add an
526 				 * appropriate IF EXISTS clause, if the user requested it.
527 				 */
528 				if (*te->dropStmt != '\0')
529 				{
530 					if (!ropt->if_exists)
531 					{
532 						/* No --if-exists?	Then just use the original */
533 						ahprintf(AH, "%s", te->dropStmt);
534 					}
535 					else
536 					{
537 						/*
538 						 * Inject an appropriate spelling of "if exists".  For
539 						 * large objects, we have a separate routine that
540 						 * knows how to do it, without depending on
541 						 * te->dropStmt; use that.  For other objects we need
542 						 * to parse the command.
543 						 */
544 						if (strncmp(te->desc, "BLOB", 4) == 0)
545 						{
546 							DropBlobIfExists(AH, te->catalogId.oid);
547 						}
548 						else
549 						{
550 							char	   *dropStmt = pg_strdup(te->dropStmt);
551 							char	   *dropStmtOrig = dropStmt;
552 							PQExpBuffer ftStmt = createPQExpBuffer();
553 
554 							/*
555 							 * Need to inject IF EXISTS clause after ALTER
556 							 * TABLE part in ALTER TABLE .. DROP statement
557 							 */
558 							if (strncmp(dropStmt, "ALTER TABLE", 11) == 0)
559 							{
560 								appendPQExpBufferStr(ftStmt,
561 													 "ALTER TABLE IF EXISTS");
562 								dropStmt = dropStmt + 11;
563 							}
564 
565 							/*
566 							 * ALTER TABLE..ALTER COLUMN..DROP DEFAULT does
567 							 * not support the IF EXISTS clause, and therefore
568 							 * we simply emit the original command for DEFAULT
569 							 * objects (modulo the adjustment made above).
570 							 *
571 							 * Likewise, don't mess with DATABASE PROPERTIES.
572 							 *
573 							 * If we used CREATE OR REPLACE VIEW as a means of
574 							 * quasi-dropping an ON SELECT rule, that should
575 							 * be emitted unchanged as well.
576 							 *
577 							 * For other object types, we need to extract the
578 							 * first part of the DROP which includes the
579 							 * object type.  Most of the time this matches
580 							 * te->desc, so search for that; however for the
581 							 * different kinds of CONSTRAINTs, we know to
582 							 * search for hardcoded "DROP CONSTRAINT" instead.
583 							 */
584 							if (strcmp(te->desc, "DEFAULT") == 0 ||
585 								strcmp(te->desc, "DATABASE PROPERTIES") == 0 ||
586 								strncmp(dropStmt, "CREATE OR REPLACE VIEW", 22) == 0)
587 								appendPQExpBufferStr(ftStmt, dropStmt);
588 							else
589 							{
590 								char		buffer[40];
591 								char	   *mark;
592 
593 								if (strcmp(te->desc, "CONSTRAINT") == 0 ||
594 									strcmp(te->desc, "CHECK CONSTRAINT") == 0 ||
595 									strcmp(te->desc, "FK CONSTRAINT") == 0)
596 									strcpy(buffer, "DROP CONSTRAINT");
597 								else
598 									snprintf(buffer, sizeof(buffer), "DROP %s",
599 											 te->desc);
600 
601 								mark = strstr(dropStmt, buffer);
602 
603 								if (mark)
604 								{
605 									*mark = '\0';
606 									appendPQExpBuffer(ftStmt, "%s%s IF EXISTS%s",
607 													  dropStmt, buffer,
608 													  mark + strlen(buffer));
609 								}
610 								else
611 								{
612 									/* complain and emit unmodified command */
613 									pg_log_warning("could not find where to insert IF EXISTS in statement \"%s\"",
614 												   dropStmtOrig);
615 									appendPQExpBufferStr(ftStmt, dropStmt);
616 								}
617 							}
618 
619 							ahprintf(AH, "%s", ftStmt->data);
620 
621 							destroyPQExpBuffer(ftStmt);
622 							pg_free(dropStmtOrig);
623 						}
624 					}
625 				}
626 			}
627 		}
628 
629 		/*
630 		 * _selectOutputSchema may have set currSchema to reflect the effect
631 		 * of a "SET search_path" command it emitted.  However, by now we may
632 		 * have dropped that schema; or it might not have existed in the first
633 		 * place.  In either case the effective value of search_path will not
634 		 * be what we think.  Forcibly reset currSchema so that we will
635 		 * re-establish the search_path setting when needed (after creating
636 		 * the schema).
637 		 *
638 		 * If we treated users as pg_dump'able objects then we'd need to reset
639 		 * currUser here too.
640 		 */
641 		if (AH->currSchema)
642 			free(AH->currSchema);
643 		AH->currSchema = NULL;
644 	}
645 
646 	if (parallel_mode)
647 	{
648 		/*
649 		 * In parallel mode, turn control over to the parallel-restore logic.
650 		 */
651 		ParallelState *pstate;
652 		TocEntry	pending_list;
653 
654 		/* The archive format module may need some setup for this */
655 		if (AH->PrepParallelRestorePtr)
656 			AH->PrepParallelRestorePtr(AH);
657 
658 		pending_list_header_init(&pending_list);
659 
660 		/* This runs PRE_DATA items and then disconnects from the database */
661 		restore_toc_entries_prefork(AH, &pending_list);
662 		Assert(AH->connection == NULL);
663 
664 		/* ParallelBackupStart() will actually fork the processes */
665 		pstate = ParallelBackupStart(AH);
666 		restore_toc_entries_parallel(AH, pstate, &pending_list);
667 		ParallelBackupEnd(AH, pstate);
668 
669 		/* reconnect the leader and see if we missed something */
670 		restore_toc_entries_postfork(AH, &pending_list);
671 		Assert(AH->connection != NULL);
672 	}
673 	else
674 	{
675 		/*
676 		 * In serial mode, process everything in three phases: normal items,
677 		 * then ACLs, then post-ACL items.  We might be able to skip one or
678 		 * both extra phases in some cases, eg data-only restores.
679 		 */
680 		bool		haveACL = false;
681 		bool		havePostACL = false;
682 
683 		for (te = AH->toc->next; te != AH->toc; te = te->next)
684 		{
685 			if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) == 0)
686 				continue;		/* ignore if not to be dumped at all */
687 
688 			switch (_tocEntryRestorePass(te))
689 			{
690 				case RESTORE_PASS_MAIN:
691 					(void) restore_toc_entry(AH, te, false);
692 					break;
693 				case RESTORE_PASS_ACL:
694 					haveACL = true;
695 					break;
696 				case RESTORE_PASS_POST_ACL:
697 					havePostACL = true;
698 					break;
699 			}
700 		}
701 
702 		if (haveACL)
703 		{
704 			for (te = AH->toc->next; te != AH->toc; te = te->next)
705 			{
706 				if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0 &&
707 					_tocEntryRestorePass(te) == RESTORE_PASS_ACL)
708 					(void) restore_toc_entry(AH, te, false);
709 			}
710 		}
711 
712 		if (havePostACL)
713 		{
714 			for (te = AH->toc->next; te != AH->toc; te = te->next)
715 			{
716 				if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0 &&
717 					_tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL)
718 					(void) restore_toc_entry(AH, te, false);
719 			}
720 		}
721 	}
722 
723 	if (ropt->single_txn)
724 	{
725 		if (AH->connection)
726 			CommitTransaction(AHX);
727 		else
728 			ahprintf(AH, "COMMIT;\n\n");
729 	}
730 
731 	if (AH->public.verbose)
732 		dumpTimestamp(AH, "Completed on", time(NULL));
733 
734 	ahprintf(AH, "--\n-- PostgreSQL database dump complete\n--\n\n");
735 
736 	/*
737 	 * Clean up & we're done.
738 	 */
739 	AH->stage = STAGE_FINALIZING;
740 
741 	if (ropt->filename || ropt->compression)
742 		RestoreOutput(AH, sav);
743 
744 	if (ropt->useDB)
745 		DisconnectDatabase(&AH->public);
746 }
747 
748 /*
749  * Restore a single TOC item.  Used in both parallel and non-parallel restore;
750  * is_parallel is true if we are in a worker child process.
751  *
752  * Returns 0 normally, but WORKER_CREATE_DONE or WORKER_INHIBIT_DATA if
753  * the parallel parent has to make the corresponding status update.
754  */
755 static int
restore_toc_entry(ArchiveHandle * AH,TocEntry * te,bool is_parallel)756 restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
757 {
758 	RestoreOptions *ropt = AH->public.ropt;
759 	int			status = WORKER_OK;
760 	int			reqs;
761 	bool		defnDumped;
762 
763 	AH->currentTE = te;
764 
765 	/* Dump any relevant dump warnings to stderr */
766 	if (!ropt->suppressDumpWarnings && strcmp(te->desc, "WARNING") == 0)
767 	{
768 		if (!ropt->dataOnly && te->defn != NULL && strlen(te->defn) != 0)
769 			pg_log_warning("warning from original dump file: %s", te->defn);
770 		else if (te->copyStmt != NULL && strlen(te->copyStmt) != 0)
771 			pg_log_warning("warning from original dump file: %s", te->copyStmt);
772 	}
773 
774 	/* Work out what, if anything, we want from this entry */
775 	reqs = te->reqs;
776 
777 	defnDumped = false;
778 
779 	/*
780 	 * If it has a schema component that we want, then process that
781 	 */
782 	if ((reqs & REQ_SCHEMA) != 0)
783 	{
784 		/* Show namespace in log message if available */
785 		if (te->namespace)
786 			pg_log_info("creating %s \"%s.%s\"",
787 						te->desc, te->namespace, te->tag);
788 		else
789 			pg_log_info("creating %s \"%s\"",
790 						te->desc, te->tag);
791 
792 		_printTocEntry(AH, te, false);
793 		defnDumped = true;
794 
795 		if (strcmp(te->desc, "TABLE") == 0)
796 		{
797 			if (AH->lastErrorTE == te)
798 			{
799 				/*
800 				 * We failed to create the table. If
801 				 * --no-data-for-failed-tables was given, mark the
802 				 * corresponding TABLE DATA to be ignored.
803 				 *
804 				 * In the parallel case this must be done in the parent, so we
805 				 * just set the return value.
806 				 */
807 				if (ropt->noDataForFailedTables)
808 				{
809 					if (is_parallel)
810 						status = WORKER_INHIBIT_DATA;
811 					else
812 						inhibit_data_for_failed_table(AH, te);
813 				}
814 			}
815 			else
816 			{
817 				/*
818 				 * We created the table successfully.  Mark the corresponding
819 				 * TABLE DATA for possible truncation.
820 				 *
821 				 * In the parallel case this must be done in the parent, so we
822 				 * just set the return value.
823 				 */
824 				if (is_parallel)
825 					status = WORKER_CREATE_DONE;
826 				else
827 					mark_create_done(AH, te);
828 			}
829 		}
830 
831 		/*
832 		 * If we created a DB, connect to it.  Also, if we changed DB
833 		 * properties, reconnect to ensure that relevant GUC settings are
834 		 * applied to our session.
835 		 */
836 		if (strcmp(te->desc, "DATABASE") == 0 ||
837 			strcmp(te->desc, "DATABASE PROPERTIES") == 0)
838 		{
839 			pg_log_info("connecting to new database \"%s\"", te->tag);
840 			_reconnectToDB(AH, te->tag);
841 		}
842 	}
843 
844 	/*
845 	 * If it has a data component that we want, then process that
846 	 */
847 	if ((reqs & REQ_DATA) != 0)
848 	{
849 		/*
850 		 * hadDumper will be set if there is genuine data component for this
851 		 * node. Otherwise, we need to check the defn field for statements
852 		 * that need to be executed in data-only restores.
853 		 */
854 		if (te->hadDumper)
855 		{
856 			/*
857 			 * If we can output the data, then restore it.
858 			 */
859 			if (AH->PrintTocDataPtr != NULL)
860 			{
861 				_printTocEntry(AH, te, true);
862 
863 				if (strcmp(te->desc, "BLOBS") == 0 ||
864 					strcmp(te->desc, "BLOB COMMENTS") == 0)
865 				{
866 					pg_log_info("processing %s", te->desc);
867 
868 					_selectOutputSchema(AH, "pg_catalog");
869 
870 					/* Send BLOB COMMENTS data to ExecuteSimpleCommands() */
871 					if (strcmp(te->desc, "BLOB COMMENTS") == 0)
872 						AH->outputKind = OUTPUT_OTHERDATA;
873 
874 					AH->PrintTocDataPtr(AH, te);
875 
876 					AH->outputKind = OUTPUT_SQLCMDS;
877 				}
878 				else
879 				{
880 					_disableTriggersIfNecessary(AH, te);
881 
882 					/* Select owner and schema as necessary */
883 					_becomeOwner(AH, te);
884 					_selectOutputSchema(AH, te->namespace);
885 
886 					pg_log_info("processing data for table \"%s.%s\"",
887 								te->namespace, te->tag);
888 
889 					/*
890 					 * In parallel restore, if we created the table earlier in
891 					 * the run then we wrap the COPY in a transaction and
892 					 * precede it with a TRUNCATE.  If archiving is not on
893 					 * this prevents WAL-logging the COPY.  This obtains a
894 					 * speedup similar to that from using single_txn mode in
895 					 * non-parallel restores.
896 					 */
897 					if (is_parallel && te->created)
898 					{
899 						/*
900 						 * Parallel restore is always talking directly to a
901 						 * server, so no need to see if we should issue BEGIN.
902 						 */
903 						StartTransaction(&AH->public);
904 
905 						/*
906 						 * If the server version is >= 8.4, make sure we issue
907 						 * TRUNCATE with ONLY so that child tables are not
908 						 * wiped.
909 						 */
910 						ahprintf(AH, "TRUNCATE TABLE %s%s;\n\n",
911 								 (PQserverVersion(AH->connection) >= 80400 ?
912 								  "ONLY " : ""),
913 								 fmtQualifiedId(te->namespace, te->tag));
914 					}
915 
916 					/*
917 					 * If we have a copy statement, use it.
918 					 */
919 					if (te->copyStmt && strlen(te->copyStmt) > 0)
920 					{
921 						ahprintf(AH, "%s", te->copyStmt);
922 						AH->outputKind = OUTPUT_COPYDATA;
923 					}
924 					else
925 						AH->outputKind = OUTPUT_OTHERDATA;
926 
927 					AH->PrintTocDataPtr(AH, te);
928 
929 					/*
930 					 * Terminate COPY if needed.
931 					 */
932 					if (AH->outputKind == OUTPUT_COPYDATA &&
933 						RestoringToDB(AH))
934 						EndDBCopyMode(&AH->public, te->tag);
935 					AH->outputKind = OUTPUT_SQLCMDS;
936 
937 					/* close out the transaction started above */
938 					if (is_parallel && te->created)
939 						CommitTransaction(&AH->public);
940 
941 					_enableTriggersIfNecessary(AH, te);
942 				}
943 			}
944 		}
945 		else if (!defnDumped)
946 		{
947 			/* If we haven't already dumped the defn part, do so now */
948 			pg_log_info("executing %s %s", te->desc, te->tag);
949 			_printTocEntry(AH, te, false);
950 		}
951 	}
952 
953 	if (AH->public.n_errors > 0 && status == WORKER_OK)
954 		status = WORKER_IGNORED_ERRORS;
955 
956 	return status;
957 }
958 
959 /*
960  * Allocate a new RestoreOptions block.
961  * This is mainly so we can initialize it, but also for future expansion,
962  */
963 RestoreOptions *
NewRestoreOptions(void)964 NewRestoreOptions(void)
965 {
966 	RestoreOptions *opts;
967 
968 	opts = (RestoreOptions *) pg_malloc0(sizeof(RestoreOptions));
969 
970 	/* set any fields that shouldn't default to zeroes */
971 	opts->format = archUnknown;
972 	opts->cparams.promptPassword = TRI_DEFAULT;
973 	opts->dumpSections = DUMP_UNSECTIONED;
974 
975 	return opts;
976 }
977 
978 static void
_disableTriggersIfNecessary(ArchiveHandle * AH,TocEntry * te)979 _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te)
980 {
981 	RestoreOptions *ropt = AH->public.ropt;
982 
983 	/* This hack is only needed in a data-only restore */
984 	if (!ropt->dataOnly || !ropt->disable_triggers)
985 		return;
986 
987 	pg_log_info("disabling triggers for %s", te->tag);
988 
989 	/*
990 	 * Become superuser if possible, since they are the only ones who can
991 	 * disable constraint triggers.  If -S was not given, assume the initial
992 	 * user identity is a superuser.  (XXX would it be better to become the
993 	 * table owner?)
994 	 */
995 	_becomeUser(AH, ropt->superuser);
996 
997 	/*
998 	 * Disable them.
999 	 */
1000 	ahprintf(AH, "ALTER TABLE %s DISABLE TRIGGER ALL;\n\n",
1001 			 fmtQualifiedId(te->namespace, te->tag));
1002 }
1003 
1004 static void
_enableTriggersIfNecessary(ArchiveHandle * AH,TocEntry * te)1005 _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te)
1006 {
1007 	RestoreOptions *ropt = AH->public.ropt;
1008 
1009 	/* This hack is only needed in a data-only restore */
1010 	if (!ropt->dataOnly || !ropt->disable_triggers)
1011 		return;
1012 
1013 	pg_log_info("enabling triggers for %s", te->tag);
1014 
1015 	/*
1016 	 * Become superuser if possible, since they are the only ones who can
1017 	 * disable constraint triggers.  If -S was not given, assume the initial
1018 	 * user identity is a superuser.  (XXX would it be better to become the
1019 	 * table owner?)
1020 	 */
1021 	_becomeUser(AH, ropt->superuser);
1022 
1023 	/*
1024 	 * Enable them.
1025 	 */
1026 	ahprintf(AH, "ALTER TABLE %s ENABLE TRIGGER ALL;\n\n",
1027 			 fmtQualifiedId(te->namespace, te->tag));
1028 }
1029 
1030 /*
1031  * This is a routine that is part of the dumper interface, hence the 'Archive*' parameter.
1032  */
1033 
1034 /* Public */
1035 void
WriteData(Archive * AHX,const void * data,size_t dLen)1036 WriteData(Archive *AHX, const void *data, size_t dLen)
1037 {
1038 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
1039 
1040 	if (!AH->currToc)
1041 		fatal("internal error -- WriteData cannot be called outside the context of a DataDumper routine");
1042 
1043 	AH->WriteDataPtr(AH, data, dLen);
1044 }
1045 
1046 /*
1047  * Create a new TOC entry. The TOC was designed as a TOC, but is now the
1048  * repository for all metadata. But the name has stuck.
1049  *
1050  * The new entry is added to the Archive's TOC list.  Most callers can ignore
1051  * the result value because nothing else need be done, but a few want to
1052  * manipulate the TOC entry further.
1053  */
1054 
1055 /* Public */
1056 TocEntry *
ArchiveEntry(Archive * AHX,CatalogId catalogId,DumpId dumpId,ArchiveOpts * opts)1057 ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
1058 			 ArchiveOpts *opts)
1059 {
1060 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
1061 	TocEntry   *newToc;
1062 
1063 	newToc = (TocEntry *) pg_malloc0(sizeof(TocEntry));
1064 
1065 	AH->tocCount++;
1066 	if (dumpId > AH->maxDumpId)
1067 		AH->maxDumpId = dumpId;
1068 
1069 	newToc->prev = AH->toc->prev;
1070 	newToc->next = AH->toc;
1071 	AH->toc->prev->next = newToc;
1072 	AH->toc->prev = newToc;
1073 
1074 	newToc->catalogId = catalogId;
1075 	newToc->dumpId = dumpId;
1076 	newToc->section = opts->section;
1077 
1078 	newToc->tag = pg_strdup(opts->tag);
1079 	newToc->namespace = opts->namespace ? pg_strdup(opts->namespace) : NULL;
1080 	newToc->tablespace = opts->tablespace ? pg_strdup(opts->tablespace) : NULL;
1081 	newToc->tableam = opts->tableam ? pg_strdup(opts->tableam) : NULL;
1082 	newToc->owner = opts->owner ? pg_strdup(opts->owner) : NULL;
1083 	newToc->desc = pg_strdup(opts->description);
1084 	newToc->defn = opts->createStmt ? pg_strdup(opts->createStmt) : NULL;
1085 	newToc->dropStmt = opts->dropStmt ? pg_strdup(opts->dropStmt) : NULL;
1086 	newToc->copyStmt = opts->copyStmt ? pg_strdup(opts->copyStmt) : NULL;
1087 
1088 	if (opts->nDeps > 0)
1089 	{
1090 		newToc->dependencies = (DumpId *) pg_malloc(opts->nDeps * sizeof(DumpId));
1091 		memcpy(newToc->dependencies, opts->deps, opts->nDeps * sizeof(DumpId));
1092 		newToc->nDeps = opts->nDeps;
1093 	}
1094 	else
1095 	{
1096 		newToc->dependencies = NULL;
1097 		newToc->nDeps = 0;
1098 	}
1099 
1100 	newToc->dataDumper = opts->dumpFn;
1101 	newToc->dataDumperArg = opts->dumpArg;
1102 	newToc->hadDumper = opts->dumpFn ? true : false;
1103 
1104 	newToc->formatData = NULL;
1105 	newToc->dataLength = 0;
1106 
1107 	if (AH->ArchiveEntryPtr != NULL)
1108 		AH->ArchiveEntryPtr(AH, newToc);
1109 
1110 	return newToc;
1111 }
1112 
1113 /* Public */
1114 void
PrintTOCSummary(Archive * AHX)1115 PrintTOCSummary(Archive *AHX)
1116 {
1117 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
1118 	RestoreOptions *ropt = AH->public.ropt;
1119 	TocEntry   *te;
1120 	teSection	curSection;
1121 	OutputContext sav;
1122 	const char *fmtName;
1123 	char		stamp_str[64];
1124 
1125 	sav = SaveOutput(AH);
1126 	if (ropt->filename)
1127 		SetOutput(AH, ropt->filename, 0 /* no compression */ );
1128 
1129 	if (strftime(stamp_str, sizeof(stamp_str), PGDUMP_STRFTIME_FMT,
1130 				 localtime(&AH->createDate)) == 0)
1131 		strcpy(stamp_str, "[unknown]");
1132 
1133 	ahprintf(AH, ";\n; Archive created at %s\n", stamp_str);
1134 	ahprintf(AH, ";     dbname: %s\n;     TOC Entries: %d\n;     Compression: %d\n",
1135 			 sanitize_line(AH->archdbname, false),
1136 			 AH->tocCount, AH->compression);
1137 
1138 	switch (AH->format)
1139 	{
1140 		case archCustom:
1141 			fmtName = "CUSTOM";
1142 			break;
1143 		case archDirectory:
1144 			fmtName = "DIRECTORY";
1145 			break;
1146 		case archTar:
1147 			fmtName = "TAR";
1148 			break;
1149 		default:
1150 			fmtName = "UNKNOWN";
1151 	}
1152 
1153 	ahprintf(AH, ";     Dump Version: %d.%d-%d\n",
1154 			 ARCHIVE_MAJOR(AH->version), ARCHIVE_MINOR(AH->version), ARCHIVE_REV(AH->version));
1155 	ahprintf(AH, ";     Format: %s\n", fmtName);
1156 	ahprintf(AH, ";     Integer: %d bytes\n", (int) AH->intSize);
1157 	ahprintf(AH, ";     Offset: %d bytes\n", (int) AH->offSize);
1158 	if (AH->archiveRemoteVersion)
1159 		ahprintf(AH, ";     Dumped from database version: %s\n",
1160 				 AH->archiveRemoteVersion);
1161 	if (AH->archiveDumpVersion)
1162 		ahprintf(AH, ";     Dumped by pg_dump version: %s\n",
1163 				 AH->archiveDumpVersion);
1164 
1165 	ahprintf(AH, ";\n;\n; Selected TOC Entries:\n;\n");
1166 
1167 	curSection = SECTION_PRE_DATA;
1168 	for (te = AH->toc->next; te != AH->toc; te = te->next)
1169 	{
1170 		if (te->section != SECTION_NONE)
1171 			curSection = te->section;
1172 		if (ropt->verbose ||
1173 			(_tocEntryRequired(te, curSection, AH) & (REQ_SCHEMA | REQ_DATA)) != 0)
1174 		{
1175 			char	   *sanitized_name;
1176 			char	   *sanitized_schema;
1177 			char	   *sanitized_owner;
1178 
1179 			/*
1180 			 */
1181 			sanitized_name = sanitize_line(te->tag, false);
1182 			sanitized_schema = sanitize_line(te->namespace, true);
1183 			sanitized_owner = sanitize_line(te->owner, false);
1184 
1185 			ahprintf(AH, "%d; %u %u %s %s %s %s\n", te->dumpId,
1186 					 te->catalogId.tableoid, te->catalogId.oid,
1187 					 te->desc, sanitized_schema, sanitized_name,
1188 					 sanitized_owner);
1189 
1190 			free(sanitized_name);
1191 			free(sanitized_schema);
1192 			free(sanitized_owner);
1193 		}
1194 		if (ropt->verbose && te->nDeps > 0)
1195 		{
1196 			int			i;
1197 
1198 			ahprintf(AH, ";\tdepends on:");
1199 			for (i = 0; i < te->nDeps; i++)
1200 				ahprintf(AH, " %d", te->dependencies[i]);
1201 			ahprintf(AH, "\n");
1202 		}
1203 	}
1204 
1205 	/* Enforce strict names checking */
1206 	if (ropt->strict_names)
1207 		StrictNamesCheck(ropt);
1208 
1209 	if (ropt->filename)
1210 		RestoreOutput(AH, sav);
1211 }
1212 
1213 /***********
1214  * BLOB Archival
1215  ***********/
1216 
1217 /* Called by a dumper to signal start of a BLOB */
1218 int
StartBlob(Archive * AHX,Oid oid)1219 StartBlob(Archive *AHX, Oid oid)
1220 {
1221 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
1222 
1223 	if (!AH->StartBlobPtr)
1224 		fatal("large-object output not supported in chosen format");
1225 
1226 	AH->StartBlobPtr(AH, AH->currToc, oid);
1227 
1228 	return 1;
1229 }
1230 
1231 /* Called by a dumper to signal end of a BLOB */
1232 int
EndBlob(Archive * AHX,Oid oid)1233 EndBlob(Archive *AHX, Oid oid)
1234 {
1235 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
1236 
1237 	if (AH->EndBlobPtr)
1238 		AH->EndBlobPtr(AH, AH->currToc, oid);
1239 
1240 	return 1;
1241 }
1242 
1243 /**********
1244  * BLOB Restoration
1245  **********/
1246 
1247 /*
1248  * Called by a format handler before any blobs are restored
1249  */
1250 void
StartRestoreBlobs(ArchiveHandle * AH)1251 StartRestoreBlobs(ArchiveHandle *AH)
1252 {
1253 	RestoreOptions *ropt = AH->public.ropt;
1254 
1255 	if (!ropt->single_txn)
1256 	{
1257 		if (AH->connection)
1258 			StartTransaction(&AH->public);
1259 		else
1260 			ahprintf(AH, "BEGIN;\n\n");
1261 	}
1262 
1263 	AH->blobCount = 0;
1264 }
1265 
1266 /*
1267  * Called by a format handler after all blobs are restored
1268  */
1269 void
EndRestoreBlobs(ArchiveHandle * AH)1270 EndRestoreBlobs(ArchiveHandle *AH)
1271 {
1272 	RestoreOptions *ropt = AH->public.ropt;
1273 
1274 	if (!ropt->single_txn)
1275 	{
1276 		if (AH->connection)
1277 			CommitTransaction(&AH->public);
1278 		else
1279 			ahprintf(AH, "COMMIT;\n\n");
1280 	}
1281 
1282 	pg_log_info(ngettext("restored %d large object",
1283 						 "restored %d large objects",
1284 						 AH->blobCount),
1285 				AH->blobCount);
1286 }
1287 
1288 
1289 /*
1290  * Called by a format handler to initiate restoration of a blob
1291  */
1292 void
StartRestoreBlob(ArchiveHandle * AH,Oid oid,bool drop)1293 StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop)
1294 {
1295 	bool		old_blob_style = (AH->version < K_VERS_1_12);
1296 	Oid			loOid;
1297 
1298 	AH->blobCount++;
1299 
1300 	/* Initialize the LO Buffer */
1301 	AH->lo_buf_used = 0;
1302 
1303 	pg_log_info("restoring large object with OID %u", oid);
1304 
1305 	/* With an old archive we must do drop and create logic here */
1306 	if (old_blob_style && drop)
1307 		DropBlobIfExists(AH, oid);
1308 
1309 	if (AH->connection)
1310 	{
1311 		if (old_blob_style)
1312 		{
1313 			loOid = lo_create(AH->connection, oid);
1314 			if (loOid == 0 || loOid != oid)
1315 				fatal("could not create large object %u: %s",
1316 					  oid, PQerrorMessage(AH->connection));
1317 		}
1318 		AH->loFd = lo_open(AH->connection, oid, INV_WRITE);
1319 		if (AH->loFd == -1)
1320 			fatal("could not open large object %u: %s",
1321 				  oid, PQerrorMessage(AH->connection));
1322 	}
1323 	else
1324 	{
1325 		if (old_blob_style)
1326 			ahprintf(AH, "SELECT pg_catalog.lo_open(pg_catalog.lo_create('%u'), %d);\n",
1327 					 oid, INV_WRITE);
1328 		else
1329 			ahprintf(AH, "SELECT pg_catalog.lo_open('%u', %d);\n",
1330 					 oid, INV_WRITE);
1331 	}
1332 
1333 	AH->writingBlob = 1;
1334 }
1335 
1336 void
EndRestoreBlob(ArchiveHandle * AH,Oid oid)1337 EndRestoreBlob(ArchiveHandle *AH, Oid oid)
1338 {
1339 	if (AH->lo_buf_used > 0)
1340 	{
1341 		/* Write remaining bytes from the LO buffer */
1342 		dump_lo_buf(AH);
1343 	}
1344 
1345 	AH->writingBlob = 0;
1346 
1347 	if (AH->connection)
1348 	{
1349 		lo_close(AH->connection, AH->loFd);
1350 		AH->loFd = -1;
1351 	}
1352 	else
1353 	{
1354 		ahprintf(AH, "SELECT pg_catalog.lo_close(0);\n\n");
1355 	}
1356 }
1357 
1358 /***********
1359  * Sorting and Reordering
1360  ***********/
1361 
1362 void
SortTocFromFile(Archive * AHX)1363 SortTocFromFile(Archive *AHX)
1364 {
1365 	ArchiveHandle *AH = (ArchiveHandle *) AHX;
1366 	RestoreOptions *ropt = AH->public.ropt;
1367 	FILE	   *fh;
1368 	StringInfoData linebuf;
1369 
1370 	/* Allocate space for the 'wanted' array, and init it */
1371 	ropt->idWanted = (bool *) pg_malloc0(sizeof(bool) * AH->maxDumpId);
1372 
1373 	/* Setup the file */
1374 	fh = fopen(ropt->tocFile, PG_BINARY_R);
1375 	if (!fh)
1376 		fatal("could not open TOC file \"%s\": %m", ropt->tocFile);
1377 
1378 	initStringInfo(&linebuf);
1379 
1380 	while (pg_get_line_buf(fh, &linebuf))
1381 	{
1382 		char	   *cmnt;
1383 		char	   *endptr;
1384 		DumpId		id;
1385 		TocEntry   *te;
1386 
1387 		/* Truncate line at comment, if any */
1388 		cmnt = strchr(linebuf.data, ';');
1389 		if (cmnt != NULL)
1390 		{
1391 			cmnt[0] = '\0';
1392 			linebuf.len = cmnt - linebuf.data;
1393 		}
1394 
1395 		/* Ignore if all blank */
1396 		if (strspn(linebuf.data, " \t\r\n") == linebuf.len)
1397 			continue;
1398 
1399 		/* Get an ID, check it's valid and not already seen */
1400 		id = strtol(linebuf.data, &endptr, 10);
1401 		if (endptr == linebuf.data || id <= 0 || id > AH->maxDumpId ||
1402 			ropt->idWanted[id - 1])
1403 		{
1404 			pg_log_warning("line ignored: %s", linebuf.data);
1405 			continue;
1406 		}
1407 
1408 		/* Find TOC entry */
1409 		te = getTocEntryByDumpId(AH, id);
1410 		if (!te)
1411 			fatal("could not find entry for ID %d",
1412 				  id);
1413 
1414 		/* Mark it wanted */
1415 		ropt->idWanted[id - 1] = true;
1416 
1417 		/*
1418 		 * Move each item to the end of the list as it is selected, so that
1419 		 * they are placed in the desired order.  Any unwanted items will end
1420 		 * up at the front of the list, which may seem unintuitive but it's
1421 		 * what we need.  In an ordinary serial restore that makes no
1422 		 * difference, but in a parallel restore we need to mark unrestored
1423 		 * items' dependencies as satisfied before we start examining
1424 		 * restorable items.  Otherwise they could have surprising
1425 		 * side-effects on the order in which restorable items actually get
1426 		 * restored.
1427 		 */
1428 		_moveBefore(AH->toc, te);
1429 	}
1430 
1431 	pg_free(linebuf.data);
1432 
1433 	if (fclose(fh) != 0)
1434 		fatal("could not close TOC file: %m");
1435 }
1436 
1437 /**********************
1438  * Convenience functions that look like standard IO functions
1439  * for writing data when in dump mode.
1440  **********************/
1441 
1442 /* Public */
1443 void
archputs(const char * s,Archive * AH)1444 archputs(const char *s, Archive *AH)
1445 {
1446 	WriteData(AH, s, strlen(s));
1447 }
1448 
1449 /* Public */
1450 int
archprintf(Archive * AH,const char * fmt,...)1451 archprintf(Archive *AH, const char *fmt,...)
1452 {
1453 	int			save_errno = errno;
1454 	char	   *p;
1455 	size_t		len = 128;		/* initial assumption about buffer size */
1456 	size_t		cnt;
1457 
1458 	for (;;)
1459 	{
1460 		va_list		args;
1461 
1462 		/* Allocate work buffer. */
1463 		p = (char *) pg_malloc(len);
1464 
1465 		/* Try to format the data. */
1466 		errno = save_errno;
1467 		va_start(args, fmt);
1468 		cnt = pvsnprintf(p, len, fmt, args);
1469 		va_end(args);
1470 
1471 		if (cnt < len)
1472 			break;				/* success */
1473 
1474 		/* Release buffer and loop around to try again with larger len. */
1475 		free(p);
1476 		len = cnt;
1477 	}
1478 
1479 	WriteData(AH, p, cnt);
1480 	free(p);
1481 	return (int) cnt;
1482 }
1483 
1484 
1485 /*******************************
1486  * Stuff below here should be 'private' to the archiver routines
1487  *******************************/
1488 
1489 static void
SetOutput(ArchiveHandle * AH,const char * filename,int compression)1490 SetOutput(ArchiveHandle *AH, const char *filename, int compression)
1491 {
1492 	int			fn;
1493 
1494 	if (filename)
1495 	{
1496 		if (strcmp(filename, "-") == 0)
1497 			fn = fileno(stdout);
1498 		else
1499 			fn = -1;
1500 	}
1501 	else if (AH->FH)
1502 		fn = fileno(AH->FH);
1503 	else if (AH->fSpec)
1504 	{
1505 		fn = -1;
1506 		filename = AH->fSpec;
1507 	}
1508 	else
1509 		fn = fileno(stdout);
1510 
1511 	/* If compression explicitly requested, use gzopen */
1512 #ifdef HAVE_LIBZ
1513 	if (compression != 0)
1514 	{
1515 		char		fmode[14];
1516 
1517 		/* Don't use PG_BINARY_x since this is zlib */
1518 		sprintf(fmode, "wb%d", compression);
1519 		if (fn >= 0)
1520 			AH->OF = gzdopen(dup(fn), fmode);
1521 		else
1522 			AH->OF = gzopen(filename, fmode);
1523 		AH->gzOut = 1;
1524 	}
1525 	else
1526 #endif
1527 	{							/* Use fopen */
1528 		if (AH->mode == archModeAppend)
1529 		{
1530 			if (fn >= 0)
1531 				AH->OF = fdopen(dup(fn), PG_BINARY_A);
1532 			else
1533 				AH->OF = fopen(filename, PG_BINARY_A);
1534 		}
1535 		else
1536 		{
1537 			if (fn >= 0)
1538 				AH->OF = fdopen(dup(fn), PG_BINARY_W);
1539 			else
1540 				AH->OF = fopen(filename, PG_BINARY_W);
1541 		}
1542 		AH->gzOut = 0;
1543 	}
1544 
1545 	if (!AH->OF)
1546 	{
1547 		if (filename)
1548 			fatal("could not open output file \"%s\": %m", filename);
1549 		else
1550 			fatal("could not open output file: %m");
1551 	}
1552 }
1553 
1554 static OutputContext
SaveOutput(ArchiveHandle * AH)1555 SaveOutput(ArchiveHandle *AH)
1556 {
1557 	OutputContext sav;
1558 
1559 	sav.OF = AH->OF;
1560 	sav.gzOut = AH->gzOut;
1561 
1562 	return sav;
1563 }
1564 
1565 static void
RestoreOutput(ArchiveHandle * AH,OutputContext savedContext)1566 RestoreOutput(ArchiveHandle *AH, OutputContext savedContext)
1567 {
1568 	int			res;
1569 
1570 	if (AH->gzOut)
1571 		res = GZCLOSE(AH->OF);
1572 	else
1573 		res = fclose(AH->OF);
1574 
1575 	if (res != 0)
1576 		fatal("could not close output file: %m");
1577 
1578 	AH->gzOut = savedContext.gzOut;
1579 	AH->OF = savedContext.OF;
1580 }
1581 
1582 
1583 
1584 /*
1585  *	Print formatted text to the output file (usually stdout).
1586  */
1587 int
ahprintf(ArchiveHandle * AH,const char * fmt,...)1588 ahprintf(ArchiveHandle *AH, const char *fmt,...)
1589 {
1590 	int			save_errno = errno;
1591 	char	   *p;
1592 	size_t		len = 128;		/* initial assumption about buffer size */
1593 	size_t		cnt;
1594 
1595 	for (;;)
1596 	{
1597 		va_list		args;
1598 
1599 		/* Allocate work buffer. */
1600 		p = (char *) pg_malloc(len);
1601 
1602 		/* Try to format the data. */
1603 		errno = save_errno;
1604 		va_start(args, fmt);
1605 		cnt = pvsnprintf(p, len, fmt, args);
1606 		va_end(args);
1607 
1608 		if (cnt < len)
1609 			break;				/* success */
1610 
1611 		/* Release buffer and loop around to try again with larger len. */
1612 		free(p);
1613 		len = cnt;
1614 	}
1615 
1616 	ahwrite(p, 1, cnt, AH);
1617 	free(p);
1618 	return (int) cnt;
1619 }
1620 
1621 /*
1622  * Single place for logic which says 'We are restoring to a direct DB connection'.
1623  */
1624 static int
RestoringToDB(ArchiveHandle * AH)1625 RestoringToDB(ArchiveHandle *AH)
1626 {
1627 	RestoreOptions *ropt = AH->public.ropt;
1628 
1629 	return (ropt && ropt->useDB && AH->connection);
1630 }
1631 
1632 /*
1633  * Dump the current contents of the LO data buffer while writing a BLOB
1634  */
1635 static void
dump_lo_buf(ArchiveHandle * AH)1636 dump_lo_buf(ArchiveHandle *AH)
1637 {
1638 	if (AH->connection)
1639 	{
1640 		int			res;
1641 
1642 		res = lo_write(AH->connection, AH->loFd, AH->lo_buf, AH->lo_buf_used);
1643 		pg_log_debug(ngettext("wrote %zu byte of large object data (result = %d)",
1644 							  "wrote %zu bytes of large object data (result = %d)",
1645 							  AH->lo_buf_used),
1646 					 AH->lo_buf_used, res);
1647 		/* We assume there are no short writes, only errors */
1648 		if (res != AH->lo_buf_used)
1649 			warn_or_exit_horribly(AH, "could not write to large object: %s",
1650 								  PQerrorMessage(AH->connection));
1651 	}
1652 	else
1653 	{
1654 		PQExpBuffer buf = createPQExpBuffer();
1655 
1656 		appendByteaLiteralAHX(buf,
1657 							  (const unsigned char *) AH->lo_buf,
1658 							  AH->lo_buf_used,
1659 							  AH);
1660 
1661 		/* Hack: turn off writingBlob so ahwrite doesn't recurse to here */
1662 		AH->writingBlob = 0;
1663 		ahprintf(AH, "SELECT pg_catalog.lowrite(0, %s);\n", buf->data);
1664 		AH->writingBlob = 1;
1665 
1666 		destroyPQExpBuffer(buf);
1667 	}
1668 	AH->lo_buf_used = 0;
1669 }
1670 
1671 
1672 /*
1673  *	Write buffer to the output file (usually stdout). This is used for
1674  *	outputting 'restore' scripts etc. It is even possible for an archive
1675  *	format to create a custom output routine to 'fake' a restore if it
1676  *	wants to generate a script (see TAR output).
1677  */
1678 void
ahwrite(const void * ptr,size_t size,size_t nmemb,ArchiveHandle * AH)1679 ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH)
1680 {
1681 	int			bytes_written = 0;
1682 
1683 	if (AH->writingBlob)
1684 	{
1685 		size_t		remaining = size * nmemb;
1686 
1687 		while (AH->lo_buf_used + remaining > AH->lo_buf_size)
1688 		{
1689 			size_t		avail = AH->lo_buf_size - AH->lo_buf_used;
1690 
1691 			memcpy((char *) AH->lo_buf + AH->lo_buf_used, ptr, avail);
1692 			ptr = (const void *) ((const char *) ptr + avail);
1693 			remaining -= avail;
1694 			AH->lo_buf_used += avail;
1695 			dump_lo_buf(AH);
1696 		}
1697 
1698 		memcpy((char *) AH->lo_buf + AH->lo_buf_used, ptr, remaining);
1699 		AH->lo_buf_used += remaining;
1700 
1701 		bytes_written = size * nmemb;
1702 	}
1703 	else if (AH->gzOut)
1704 		bytes_written = GZWRITE(ptr, size, nmemb, AH->OF);
1705 	else if (AH->CustomOutPtr)
1706 		bytes_written = AH->CustomOutPtr(AH, ptr, size * nmemb);
1707 
1708 	else
1709 	{
1710 		/*
1711 		 * If we're doing a restore, and it's direct to DB, and we're
1712 		 * connected then send it to the DB.
1713 		 */
1714 		if (RestoringToDB(AH))
1715 			bytes_written = ExecuteSqlCommandBuf(&AH->public, (const char *) ptr, size * nmemb);
1716 		else
1717 			bytes_written = fwrite(ptr, size, nmemb, AH->OF) * size;
1718 	}
1719 
1720 	if (bytes_written != size * nmemb)
1721 		WRITE_ERROR_EXIT;
1722 }
1723 
1724 /* on some error, we may decide to go on... */
1725 void
warn_or_exit_horribly(ArchiveHandle * AH,const char * fmt,...)1726 warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...)
1727 {
1728 	va_list		ap;
1729 
1730 	switch (AH->stage)
1731 	{
1732 
1733 		case STAGE_NONE:
1734 			/* Do nothing special */
1735 			break;
1736 
1737 		case STAGE_INITIALIZING:
1738 			if (AH->stage != AH->lastErrorStage)
1739 				pg_log_generic(PG_LOG_INFO, "while INITIALIZING:");
1740 			break;
1741 
1742 		case STAGE_PROCESSING:
1743 			if (AH->stage != AH->lastErrorStage)
1744 				pg_log_generic(PG_LOG_INFO, "while PROCESSING TOC:");
1745 			break;
1746 
1747 		case STAGE_FINALIZING:
1748 			if (AH->stage != AH->lastErrorStage)
1749 				pg_log_generic(PG_LOG_INFO, "while FINALIZING:");
1750 			break;
1751 	}
1752 	if (AH->currentTE != NULL && AH->currentTE != AH->lastErrorTE)
1753 	{
1754 		pg_log_generic(PG_LOG_INFO, "from TOC entry %d; %u %u %s %s %s",
1755 					   AH->currentTE->dumpId,
1756 					   AH->currentTE->catalogId.tableoid,
1757 					   AH->currentTE->catalogId.oid,
1758 					   AH->currentTE->desc ? AH->currentTE->desc : "(no desc)",
1759 					   AH->currentTE->tag ? AH->currentTE->tag : "(no tag)",
1760 					   AH->currentTE->owner ? AH->currentTE->owner : "(no owner)");
1761 	}
1762 	AH->lastErrorStage = AH->stage;
1763 	AH->lastErrorTE = AH->currentTE;
1764 
1765 	va_start(ap, fmt);
1766 	pg_log_generic_v(PG_LOG_ERROR, fmt, ap);
1767 	va_end(ap);
1768 
1769 	if (AH->public.exit_on_error)
1770 		exit_nicely(1);
1771 	else
1772 		AH->public.n_errors++;
1773 }
1774 
1775 #ifdef NOT_USED
1776 
1777 static void
_moveAfter(ArchiveHandle * AH,TocEntry * pos,TocEntry * te)1778 _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te)
1779 {
1780 	/* Unlink te from list */
1781 	te->prev->next = te->next;
1782 	te->next->prev = te->prev;
1783 
1784 	/* and insert it after "pos" */
1785 	te->prev = pos;
1786 	te->next = pos->next;
1787 	pos->next->prev = te;
1788 	pos->next = te;
1789 }
1790 #endif
1791 
1792 static void
_moveBefore(TocEntry * pos,TocEntry * te)1793 _moveBefore(TocEntry *pos, TocEntry *te)
1794 {
1795 	/* Unlink te from list */
1796 	te->prev->next = te->next;
1797 	te->next->prev = te->prev;
1798 
1799 	/* and insert it before "pos" */
1800 	te->prev = pos->prev;
1801 	te->next = pos;
1802 	pos->prev->next = te;
1803 	pos->prev = te;
1804 }
1805 
1806 /*
1807  * Build index arrays for the TOC list
1808  *
1809  * This should be invoked only after we have created or read in all the TOC
1810  * items.
1811  *
1812  * The arrays are indexed by dump ID (so entry zero is unused).  Note that the
1813  * array entries run only up to maxDumpId.  We might see dependency dump IDs
1814  * beyond that (if the dump was partial); so always check the array bound
1815  * before trying to touch an array entry.
1816  */
1817 static void
buildTocEntryArrays(ArchiveHandle * AH)1818 buildTocEntryArrays(ArchiveHandle *AH)
1819 {
1820 	DumpId		maxDumpId = AH->maxDumpId;
1821 	TocEntry   *te;
1822 
1823 	AH->tocsByDumpId = (TocEntry **) pg_malloc0((maxDumpId + 1) * sizeof(TocEntry *));
1824 	AH->tableDataId = (DumpId *) pg_malloc0((maxDumpId + 1) * sizeof(DumpId));
1825 
1826 	for (te = AH->toc->next; te != AH->toc; te = te->next)
1827 	{
1828 		/* this check is purely paranoia, maxDumpId should be correct */
1829 		if (te->dumpId <= 0 || te->dumpId > maxDumpId)
1830 			fatal("bad dumpId");
1831 
1832 		/* tocsByDumpId indexes all TOCs by their dump ID */
1833 		AH->tocsByDumpId[te->dumpId] = te;
1834 
1835 		/*
1836 		 * tableDataId provides the TABLE DATA item's dump ID for each TABLE
1837 		 * TOC entry that has a DATA item.  We compute this by reversing the
1838 		 * TABLE DATA item's dependency, knowing that a TABLE DATA item has
1839 		 * just one dependency and it is the TABLE item.
1840 		 */
1841 		if (strcmp(te->desc, "TABLE DATA") == 0 && te->nDeps > 0)
1842 		{
1843 			DumpId		tableId = te->dependencies[0];
1844 
1845 			/*
1846 			 * The TABLE item might not have been in the archive, if this was
1847 			 * a data-only dump; but its dump ID should be less than its data
1848 			 * item's dump ID, so there should be a place for it in the array.
1849 			 */
1850 			if (tableId <= 0 || tableId > maxDumpId)
1851 				fatal("bad table dumpId for TABLE DATA item");
1852 
1853 			AH->tableDataId[tableId] = te->dumpId;
1854 		}
1855 	}
1856 }
1857 
1858 TocEntry *
getTocEntryByDumpId(ArchiveHandle * AH,DumpId id)1859 getTocEntryByDumpId(ArchiveHandle *AH, DumpId id)
1860 {
1861 	/* build index arrays if we didn't already */
1862 	if (AH->tocsByDumpId == NULL)
1863 		buildTocEntryArrays(AH);
1864 
1865 	if (id > 0 && id <= AH->maxDumpId)
1866 		return AH->tocsByDumpId[id];
1867 
1868 	return NULL;
1869 }
1870 
1871 int
TocIDRequired(ArchiveHandle * AH,DumpId id)1872 TocIDRequired(ArchiveHandle *AH, DumpId id)
1873 {
1874 	TocEntry   *te = getTocEntryByDumpId(AH, id);
1875 
1876 	if (!te)
1877 		return 0;
1878 
1879 	return te->reqs;
1880 }
1881 
1882 size_t
WriteOffset(ArchiveHandle * AH,pgoff_t o,int wasSet)1883 WriteOffset(ArchiveHandle *AH, pgoff_t o, int wasSet)
1884 {
1885 	int			off;
1886 
1887 	/* Save the flag */
1888 	AH->WriteBytePtr(AH, wasSet);
1889 
1890 	/* Write out pgoff_t smallest byte first, prevents endian mismatch */
1891 	for (off = 0; off < sizeof(pgoff_t); off++)
1892 	{
1893 		AH->WriteBytePtr(AH, o & 0xFF);
1894 		o >>= 8;
1895 	}
1896 	return sizeof(pgoff_t) + 1;
1897 }
1898 
1899 int
ReadOffset(ArchiveHandle * AH,pgoff_t * o)1900 ReadOffset(ArchiveHandle *AH, pgoff_t * o)
1901 {
1902 	int			i;
1903 	int			off;
1904 	int			offsetFlg;
1905 
1906 	/* Initialize to zero */
1907 	*o = 0;
1908 
1909 	/* Check for old version */
1910 	if (AH->version < K_VERS_1_7)
1911 	{
1912 		/* Prior versions wrote offsets using WriteInt */
1913 		i = ReadInt(AH);
1914 		/* -1 means not set */
1915 		if (i < 0)
1916 			return K_OFFSET_POS_NOT_SET;
1917 		else if (i == 0)
1918 			return K_OFFSET_NO_DATA;
1919 
1920 		/* Cast to pgoff_t because it was written as an int. */
1921 		*o = (pgoff_t) i;
1922 		return K_OFFSET_POS_SET;
1923 	}
1924 
1925 	/*
1926 	 * Read the flag indicating the state of the data pointer. Check if valid
1927 	 * and die if not.
1928 	 *
1929 	 * This used to be handled by a negative or zero pointer, now we use an
1930 	 * extra byte specifically for the state.
1931 	 */
1932 	offsetFlg = AH->ReadBytePtr(AH) & 0xFF;
1933 
1934 	switch (offsetFlg)
1935 	{
1936 		case K_OFFSET_POS_NOT_SET:
1937 		case K_OFFSET_NO_DATA:
1938 		case K_OFFSET_POS_SET:
1939 
1940 			break;
1941 
1942 		default:
1943 			fatal("unexpected data offset flag %d", offsetFlg);
1944 	}
1945 
1946 	/*
1947 	 * Read the bytes
1948 	 */
1949 	for (off = 0; off < AH->offSize; off++)
1950 	{
1951 		if (off < sizeof(pgoff_t))
1952 			*o |= ((pgoff_t) (AH->ReadBytePtr(AH))) << (off * 8);
1953 		else
1954 		{
1955 			if (AH->ReadBytePtr(AH) != 0)
1956 				fatal("file offset in dump file is too large");
1957 		}
1958 	}
1959 
1960 	return offsetFlg;
1961 }
1962 
1963 size_t
WriteInt(ArchiveHandle * AH,int i)1964 WriteInt(ArchiveHandle *AH, int i)
1965 {
1966 	int			b;
1967 
1968 	/*
1969 	 * This is a bit yucky, but I don't want to make the binary format very
1970 	 * dependent on representation, and not knowing much about it, I write out
1971 	 * a sign byte. If you change this, don't forget to change the file
1972 	 * version #, and modify ReadInt to read the new format AS WELL AS the old
1973 	 * formats.
1974 	 */
1975 
1976 	/* SIGN byte */
1977 	if (i < 0)
1978 	{
1979 		AH->WriteBytePtr(AH, 1);
1980 		i = -i;
1981 	}
1982 	else
1983 		AH->WriteBytePtr(AH, 0);
1984 
1985 	for (b = 0; b < AH->intSize; b++)
1986 	{
1987 		AH->WriteBytePtr(AH, i & 0xFF);
1988 		i >>= 8;
1989 	}
1990 
1991 	return AH->intSize + 1;
1992 }
1993 
1994 int
ReadInt(ArchiveHandle * AH)1995 ReadInt(ArchiveHandle *AH)
1996 {
1997 	int			res = 0;
1998 	int			bv,
1999 				b;
2000 	int			sign = 0;		/* Default positive */
2001 	int			bitShift = 0;
2002 
2003 	if (AH->version > K_VERS_1_0)
2004 		/* Read a sign byte */
2005 		sign = AH->ReadBytePtr(AH);
2006 
2007 	for (b = 0; b < AH->intSize; b++)
2008 	{
2009 		bv = AH->ReadBytePtr(AH) & 0xFF;
2010 		if (bv != 0)
2011 			res = res + (bv << bitShift);
2012 		bitShift += 8;
2013 	}
2014 
2015 	if (sign)
2016 		res = -res;
2017 
2018 	return res;
2019 }
2020 
2021 size_t
WriteStr(ArchiveHandle * AH,const char * c)2022 WriteStr(ArchiveHandle *AH, const char *c)
2023 {
2024 	size_t		res;
2025 
2026 	if (c)
2027 	{
2028 		int			len = strlen(c);
2029 
2030 		res = WriteInt(AH, len);
2031 		AH->WriteBufPtr(AH, c, len);
2032 		res += len;
2033 	}
2034 	else
2035 		res = WriteInt(AH, -1);
2036 
2037 	return res;
2038 }
2039 
2040 char *
ReadStr(ArchiveHandle * AH)2041 ReadStr(ArchiveHandle *AH)
2042 {
2043 	char	   *buf;
2044 	int			l;
2045 
2046 	l = ReadInt(AH);
2047 	if (l < 0)
2048 		buf = NULL;
2049 	else
2050 	{
2051 		buf = (char *) pg_malloc(l + 1);
2052 		AH->ReadBufPtr(AH, (void *) buf, l);
2053 
2054 		buf[l] = '\0';
2055 	}
2056 
2057 	return buf;
2058 }
2059 
2060 static int
_discoverArchiveFormat(ArchiveHandle * AH)2061 _discoverArchiveFormat(ArchiveHandle *AH)
2062 {
2063 	FILE	   *fh;
2064 	char		sig[6];			/* More than enough */
2065 	size_t		cnt;
2066 	int			wantClose = 0;
2067 
2068 	pg_log_debug("attempting to ascertain archive format");
2069 
2070 	if (AH->lookahead)
2071 		free(AH->lookahead);
2072 
2073 	AH->readHeader = 0;
2074 	AH->lookaheadSize = 512;
2075 	AH->lookahead = pg_malloc0(512);
2076 	AH->lookaheadLen = 0;
2077 	AH->lookaheadPos = 0;
2078 
2079 	if (AH->fSpec)
2080 	{
2081 		struct stat st;
2082 
2083 		wantClose = 1;
2084 
2085 		/*
2086 		 * Check if the specified archive is a directory. If so, check if
2087 		 * there's a "toc.dat" (or "toc.dat.gz") file in it.
2088 		 */
2089 		if (stat(AH->fSpec, &st) == 0 && S_ISDIR(st.st_mode))
2090 		{
2091 			char		buf[MAXPGPATH];
2092 
2093 			if (snprintf(buf, MAXPGPATH, "%s/toc.dat", AH->fSpec) >= MAXPGPATH)
2094 				fatal("directory name too long: \"%s\"",
2095 					  AH->fSpec);
2096 			if (stat(buf, &st) == 0 && S_ISREG(st.st_mode))
2097 			{
2098 				AH->format = archDirectory;
2099 				return AH->format;
2100 			}
2101 
2102 #ifdef HAVE_LIBZ
2103 			if (snprintf(buf, MAXPGPATH, "%s/toc.dat.gz", AH->fSpec) >= MAXPGPATH)
2104 				fatal("directory name too long: \"%s\"",
2105 					  AH->fSpec);
2106 			if (stat(buf, &st) == 0 && S_ISREG(st.st_mode))
2107 			{
2108 				AH->format = archDirectory;
2109 				return AH->format;
2110 			}
2111 #endif
2112 			fatal("directory \"%s\" does not appear to be a valid archive (\"toc.dat\" does not exist)",
2113 				  AH->fSpec);
2114 			fh = NULL;			/* keep compiler quiet */
2115 		}
2116 		else
2117 		{
2118 			fh = fopen(AH->fSpec, PG_BINARY_R);
2119 			if (!fh)
2120 				fatal("could not open input file \"%s\": %m", AH->fSpec);
2121 		}
2122 	}
2123 	else
2124 	{
2125 		fh = stdin;
2126 		if (!fh)
2127 			fatal("could not open input file: %m");
2128 	}
2129 
2130 	if ((cnt = fread(sig, 1, 5, fh)) != 5)
2131 	{
2132 		if (ferror(fh))
2133 			fatal("could not read input file: %m");
2134 		else
2135 			fatal("input file is too short (read %lu, expected 5)",
2136 				  (unsigned long) cnt);
2137 	}
2138 
2139 	/* Save it, just in case we need it later */
2140 	memcpy(&AH->lookahead[0], sig, 5);
2141 	AH->lookaheadLen = 5;
2142 
2143 	if (strncmp(sig, "PGDMP", 5) == 0)
2144 	{
2145 		/* It's custom format, stop here */
2146 		AH->format = archCustom;
2147 		AH->readHeader = 1;
2148 	}
2149 	else
2150 	{
2151 		/*
2152 		 * *Maybe* we have a tar archive format file or a text dump ... So,
2153 		 * read first 512 byte header...
2154 		 */
2155 		cnt = fread(&AH->lookahead[AH->lookaheadLen], 1, 512 - AH->lookaheadLen, fh);
2156 		/* read failure is checked below */
2157 		AH->lookaheadLen += cnt;
2158 
2159 		if (AH->lookaheadLen >= strlen(TEXT_DUMPALL_HEADER) &&
2160 			(strncmp(AH->lookahead, TEXT_DUMP_HEADER, strlen(TEXT_DUMP_HEADER)) == 0 ||
2161 			 strncmp(AH->lookahead, TEXT_DUMPALL_HEADER, strlen(TEXT_DUMPALL_HEADER)) == 0))
2162 		{
2163 			/*
2164 			 * looks like it's probably a text format dump. so suggest they
2165 			 * try psql
2166 			 */
2167 			fatal("input file appears to be a text format dump. Please use psql.");
2168 		}
2169 
2170 		if (AH->lookaheadLen != 512)
2171 		{
2172 			if (feof(fh))
2173 				fatal("input file does not appear to be a valid archive (too short?)");
2174 			else
2175 				READ_ERROR_EXIT(fh);
2176 		}
2177 
2178 		if (!isValidTarHeader(AH->lookahead))
2179 			fatal("input file does not appear to be a valid archive");
2180 
2181 		AH->format = archTar;
2182 	}
2183 
2184 	/* Close the file if we opened it */
2185 	if (wantClose)
2186 	{
2187 		if (fclose(fh) != 0)
2188 			fatal("could not close input file: %m");
2189 		/* Forget lookahead, since we'll re-read header after re-opening */
2190 		AH->readHeader = 0;
2191 		AH->lookaheadLen = 0;
2192 	}
2193 
2194 	return AH->format;
2195 }
2196 
2197 
2198 /*
2199  * Allocate an archive handle
2200  */
2201 static ArchiveHandle *
_allocAH(const char * FileSpec,const ArchiveFormat fmt,const int compression,bool dosync,ArchiveMode mode,SetupWorkerPtrType setupWorkerPtr)2202 _allocAH(const char *FileSpec, const ArchiveFormat fmt,
2203 		 const int compression, bool dosync, ArchiveMode mode,
2204 		 SetupWorkerPtrType setupWorkerPtr)
2205 {
2206 	ArchiveHandle *AH;
2207 
2208 	pg_log_debug("allocating AH for %s, format %d",
2209 				 FileSpec ? FileSpec : "(stdio)", fmt);
2210 
2211 	AH = (ArchiveHandle *) pg_malloc0(sizeof(ArchiveHandle));
2212 
2213 	AH->version = K_VERS_SELF;
2214 
2215 	/* initialize for backwards compatible string processing */
2216 	AH->public.encoding = 0;	/* PG_SQL_ASCII */
2217 	AH->public.std_strings = false;
2218 
2219 	/* sql error handling */
2220 	AH->public.exit_on_error = true;
2221 	AH->public.n_errors = 0;
2222 
2223 	AH->archiveDumpVersion = PG_VERSION;
2224 
2225 	AH->createDate = time(NULL);
2226 
2227 	AH->intSize = sizeof(int);
2228 	AH->offSize = sizeof(pgoff_t);
2229 	if (FileSpec)
2230 	{
2231 		AH->fSpec = pg_strdup(FileSpec);
2232 
2233 		/*
2234 		 * Not used; maybe later....
2235 		 *
2236 		 * AH->workDir = pg_strdup(FileSpec); for(i=strlen(FileSpec) ; i > 0 ;
2237 		 * i--) if (AH->workDir[i-1] == '/')
2238 		 */
2239 	}
2240 	else
2241 		AH->fSpec = NULL;
2242 
2243 	AH->currUser = NULL;		/* unknown */
2244 	AH->currSchema = NULL;		/* ditto */
2245 	AH->currTablespace = NULL;	/* ditto */
2246 	AH->currTableAm = NULL;		/* ditto */
2247 
2248 	AH->toc = (TocEntry *) pg_malloc0(sizeof(TocEntry));
2249 
2250 	AH->toc->next = AH->toc;
2251 	AH->toc->prev = AH->toc;
2252 
2253 	AH->mode = mode;
2254 	AH->compression = compression;
2255 	AH->dosync = dosync;
2256 
2257 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
2258 
2259 	/* Open stdout with no compression for AH output handle */
2260 	AH->gzOut = 0;
2261 	AH->OF = stdout;
2262 
2263 	/*
2264 	 * On Windows, we need to use binary mode to read/write non-text files,
2265 	 * which include all archive formats as well as compressed plain text.
2266 	 * Force stdin/stdout into binary mode if that is what we are using.
2267 	 */
2268 #ifdef WIN32
2269 	if ((fmt != archNull || compression != 0) &&
2270 		(AH->fSpec == NULL || strcmp(AH->fSpec, "") == 0))
2271 	{
2272 		if (mode == archModeWrite)
2273 			_setmode(fileno(stdout), O_BINARY);
2274 		else
2275 			_setmode(fileno(stdin), O_BINARY);
2276 	}
2277 #endif
2278 
2279 	AH->SetupWorkerPtr = setupWorkerPtr;
2280 
2281 	if (fmt == archUnknown)
2282 		AH->format = _discoverArchiveFormat(AH);
2283 	else
2284 		AH->format = fmt;
2285 
2286 	switch (AH->format)
2287 	{
2288 		case archCustom:
2289 			InitArchiveFmt_Custom(AH);
2290 			break;
2291 
2292 		case archNull:
2293 			InitArchiveFmt_Null(AH);
2294 			break;
2295 
2296 		case archDirectory:
2297 			InitArchiveFmt_Directory(AH);
2298 			break;
2299 
2300 		case archTar:
2301 			InitArchiveFmt_Tar(AH);
2302 			break;
2303 
2304 		default:
2305 			fatal("unrecognized file format \"%d\"", fmt);
2306 	}
2307 
2308 	return AH;
2309 }
2310 
2311 /*
2312  * Write out all data (tables & blobs)
2313  */
2314 void
WriteDataChunks(ArchiveHandle * AH,ParallelState * pstate)2315 WriteDataChunks(ArchiveHandle *AH, ParallelState *pstate)
2316 {
2317 	TocEntry   *te;
2318 
2319 	if (pstate && pstate->numWorkers > 1)
2320 	{
2321 		/*
2322 		 * In parallel mode, this code runs in the leader process.  We
2323 		 * construct an array of candidate TEs, then sort it into decreasing
2324 		 * size order, then dispatch each TE to a data-transfer worker.  By
2325 		 * dumping larger tables first, we avoid getting into a situation
2326 		 * where we're down to one job and it's big, losing parallelism.
2327 		 */
2328 		TocEntry  **tes;
2329 		int			ntes;
2330 
2331 		tes = (TocEntry **) pg_malloc(AH->tocCount * sizeof(TocEntry *));
2332 		ntes = 0;
2333 		for (te = AH->toc->next; te != AH->toc; te = te->next)
2334 		{
2335 			/* Consider only TEs with dataDumper functions ... */
2336 			if (!te->dataDumper)
2337 				continue;
2338 			/* ... and ignore ones not enabled for dump */
2339 			if ((te->reqs & REQ_DATA) == 0)
2340 				continue;
2341 
2342 			tes[ntes++] = te;
2343 		}
2344 
2345 		if (ntes > 1)
2346 			qsort((void *) tes, ntes, sizeof(TocEntry *),
2347 				  TocEntrySizeCompare);
2348 
2349 		for (int i = 0; i < ntes; i++)
2350 			DispatchJobForTocEntry(AH, pstate, tes[i], ACT_DUMP,
2351 								   mark_dump_job_done, NULL);
2352 
2353 		pg_free(tes);
2354 
2355 		/* Now wait for workers to finish. */
2356 		WaitForWorkers(AH, pstate, WFW_ALL_IDLE);
2357 	}
2358 	else
2359 	{
2360 		/* Non-parallel mode: just dump all candidate TEs sequentially. */
2361 		for (te = AH->toc->next; te != AH->toc; te = te->next)
2362 		{
2363 			/* Must have same filter conditions as above */
2364 			if (!te->dataDumper)
2365 				continue;
2366 			if ((te->reqs & REQ_DATA) == 0)
2367 				continue;
2368 
2369 			WriteDataChunksForTocEntry(AH, te);
2370 		}
2371 	}
2372 }
2373 
2374 
2375 /*
2376  * Callback function that's invoked in the leader process after a step has
2377  * been parallel dumped.
2378  *
2379  * We don't need to do anything except check for worker failure.
2380  */
2381 static void
mark_dump_job_done(ArchiveHandle * AH,TocEntry * te,int status,void * callback_data)2382 mark_dump_job_done(ArchiveHandle *AH,
2383 				   TocEntry *te,
2384 				   int status,
2385 				   void *callback_data)
2386 {
2387 	pg_log_info("finished item %d %s %s",
2388 				te->dumpId, te->desc, te->tag);
2389 
2390 	if (status != 0)
2391 		fatal("worker process failed: exit code %d",
2392 			  status);
2393 }
2394 
2395 
2396 void
WriteDataChunksForTocEntry(ArchiveHandle * AH,TocEntry * te)2397 WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te)
2398 {
2399 	StartDataPtrType startPtr;
2400 	EndDataPtrType endPtr;
2401 
2402 	AH->currToc = te;
2403 
2404 	if (strcmp(te->desc, "BLOBS") == 0)
2405 	{
2406 		startPtr = AH->StartBlobsPtr;
2407 		endPtr = AH->EndBlobsPtr;
2408 	}
2409 	else
2410 	{
2411 		startPtr = AH->StartDataPtr;
2412 		endPtr = AH->EndDataPtr;
2413 	}
2414 
2415 	if (startPtr != NULL)
2416 		(*startPtr) (AH, te);
2417 
2418 	/*
2419 	 * The user-provided DataDumper routine needs to call AH->WriteData
2420 	 */
2421 	te->dataDumper((Archive *) AH, te->dataDumperArg);
2422 
2423 	if (endPtr != NULL)
2424 		(*endPtr) (AH, te);
2425 
2426 	AH->currToc = NULL;
2427 }
2428 
2429 void
WriteToc(ArchiveHandle * AH)2430 WriteToc(ArchiveHandle *AH)
2431 {
2432 	TocEntry   *te;
2433 	char		workbuf[32];
2434 	int			tocCount;
2435 	int			i;
2436 
2437 	/* count entries that will actually be dumped */
2438 	tocCount = 0;
2439 	for (te = AH->toc->next; te != AH->toc; te = te->next)
2440 	{
2441 		if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_SPECIAL)) != 0)
2442 			tocCount++;
2443 	}
2444 
2445 	/* printf("%d TOC Entries to save\n", tocCount); */
2446 
2447 	WriteInt(AH, tocCount);
2448 
2449 	for (te = AH->toc->next; te != AH->toc; te = te->next)
2450 	{
2451 		if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_SPECIAL)) == 0)
2452 			continue;
2453 
2454 		WriteInt(AH, te->dumpId);
2455 		WriteInt(AH, te->dataDumper ? 1 : 0);
2456 
2457 		/* OID is recorded as a string for historical reasons */
2458 		sprintf(workbuf, "%u", te->catalogId.tableoid);
2459 		WriteStr(AH, workbuf);
2460 		sprintf(workbuf, "%u", te->catalogId.oid);
2461 		WriteStr(AH, workbuf);
2462 
2463 		WriteStr(AH, te->tag);
2464 		WriteStr(AH, te->desc);
2465 		WriteInt(AH, te->section);
2466 		WriteStr(AH, te->defn);
2467 		WriteStr(AH, te->dropStmt);
2468 		WriteStr(AH, te->copyStmt);
2469 		WriteStr(AH, te->namespace);
2470 		WriteStr(AH, te->tablespace);
2471 		WriteStr(AH, te->tableam);
2472 		WriteStr(AH, te->owner);
2473 		WriteStr(AH, "false");
2474 
2475 		/* Dump list of dependencies */
2476 		for (i = 0; i < te->nDeps; i++)
2477 		{
2478 			sprintf(workbuf, "%d", te->dependencies[i]);
2479 			WriteStr(AH, workbuf);
2480 		}
2481 		WriteStr(AH, NULL);		/* Terminate List */
2482 
2483 		if (AH->WriteExtraTocPtr)
2484 			AH->WriteExtraTocPtr(AH, te);
2485 	}
2486 }
2487 
2488 void
ReadToc(ArchiveHandle * AH)2489 ReadToc(ArchiveHandle *AH)
2490 {
2491 	int			i;
2492 	char	   *tmp;
2493 	DumpId	   *deps;
2494 	int			depIdx;
2495 	int			depSize;
2496 	TocEntry   *te;
2497 
2498 	AH->tocCount = ReadInt(AH);
2499 	AH->maxDumpId = 0;
2500 
2501 	for (i = 0; i < AH->tocCount; i++)
2502 	{
2503 		te = (TocEntry *) pg_malloc0(sizeof(TocEntry));
2504 		te->dumpId = ReadInt(AH);
2505 
2506 		if (te->dumpId > AH->maxDumpId)
2507 			AH->maxDumpId = te->dumpId;
2508 
2509 		/* Sanity check */
2510 		if (te->dumpId <= 0)
2511 			fatal("entry ID %d out of range -- perhaps a corrupt TOC",
2512 				  te->dumpId);
2513 
2514 		te->hadDumper = ReadInt(AH);
2515 
2516 		if (AH->version >= K_VERS_1_8)
2517 		{
2518 			tmp = ReadStr(AH);
2519 			sscanf(tmp, "%u", &te->catalogId.tableoid);
2520 			free(tmp);
2521 		}
2522 		else
2523 			te->catalogId.tableoid = InvalidOid;
2524 		tmp = ReadStr(AH);
2525 		sscanf(tmp, "%u", &te->catalogId.oid);
2526 		free(tmp);
2527 
2528 		te->tag = ReadStr(AH);
2529 		te->desc = ReadStr(AH);
2530 
2531 		if (AH->version >= K_VERS_1_11)
2532 		{
2533 			te->section = ReadInt(AH);
2534 		}
2535 		else
2536 		{
2537 			/*
2538 			 * Rules for pre-8.4 archives wherein pg_dump hasn't classified
2539 			 * the entries into sections.  This list need not cover entry
2540 			 * types added later than 8.4.
2541 			 */
2542 			if (strcmp(te->desc, "COMMENT") == 0 ||
2543 				strcmp(te->desc, "ACL") == 0 ||
2544 				strcmp(te->desc, "ACL LANGUAGE") == 0)
2545 				te->section = SECTION_NONE;
2546 			else if (strcmp(te->desc, "TABLE DATA") == 0 ||
2547 					 strcmp(te->desc, "BLOBS") == 0 ||
2548 					 strcmp(te->desc, "BLOB COMMENTS") == 0)
2549 				te->section = SECTION_DATA;
2550 			else if (strcmp(te->desc, "CONSTRAINT") == 0 ||
2551 					 strcmp(te->desc, "CHECK CONSTRAINT") == 0 ||
2552 					 strcmp(te->desc, "FK CONSTRAINT") == 0 ||
2553 					 strcmp(te->desc, "INDEX") == 0 ||
2554 					 strcmp(te->desc, "RULE") == 0 ||
2555 					 strcmp(te->desc, "TRIGGER") == 0)
2556 				te->section = SECTION_POST_DATA;
2557 			else
2558 				te->section = SECTION_PRE_DATA;
2559 		}
2560 
2561 		te->defn = ReadStr(AH);
2562 		te->dropStmt = ReadStr(AH);
2563 
2564 		if (AH->version >= K_VERS_1_3)
2565 			te->copyStmt = ReadStr(AH);
2566 
2567 		if (AH->version >= K_VERS_1_6)
2568 			te->namespace = ReadStr(AH);
2569 
2570 		if (AH->version >= K_VERS_1_10)
2571 			te->tablespace = ReadStr(AH);
2572 
2573 		if (AH->version >= K_VERS_1_14)
2574 			te->tableam = ReadStr(AH);
2575 
2576 		te->owner = ReadStr(AH);
2577 		if (AH->version < K_VERS_1_9 || strcmp(ReadStr(AH), "true") == 0)
2578 			pg_log_warning("restoring tables WITH OIDS is not supported anymore");
2579 
2580 		/* Read TOC entry dependencies */
2581 		if (AH->version >= K_VERS_1_5)
2582 		{
2583 			depSize = 100;
2584 			deps = (DumpId *) pg_malloc(sizeof(DumpId) * depSize);
2585 			depIdx = 0;
2586 			for (;;)
2587 			{
2588 				tmp = ReadStr(AH);
2589 				if (!tmp)
2590 					break;		/* end of list */
2591 				if (depIdx >= depSize)
2592 				{
2593 					depSize *= 2;
2594 					deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
2595 				}
2596 				sscanf(tmp, "%d", &deps[depIdx]);
2597 				free(tmp);
2598 				depIdx++;
2599 			}
2600 
2601 			if (depIdx > 0)		/* We have a non-null entry */
2602 			{
2603 				deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depIdx);
2604 				te->dependencies = deps;
2605 				te->nDeps = depIdx;
2606 			}
2607 			else
2608 			{
2609 				free(deps);
2610 				te->dependencies = NULL;
2611 				te->nDeps = 0;
2612 			}
2613 		}
2614 		else
2615 		{
2616 			te->dependencies = NULL;
2617 			te->nDeps = 0;
2618 		}
2619 		te->dataLength = 0;
2620 
2621 		if (AH->ReadExtraTocPtr)
2622 			AH->ReadExtraTocPtr(AH, te);
2623 
2624 		pg_log_debug("read TOC entry %d (ID %d) for %s %s",
2625 					 i, te->dumpId, te->desc, te->tag);
2626 
2627 		/* link completed entry into TOC circular list */
2628 		te->prev = AH->toc->prev;
2629 		AH->toc->prev->next = te;
2630 		AH->toc->prev = te;
2631 		te->next = AH->toc;
2632 
2633 		/* special processing immediately upon read for some items */
2634 		if (strcmp(te->desc, "ENCODING") == 0)
2635 			processEncodingEntry(AH, te);
2636 		else if (strcmp(te->desc, "STDSTRINGS") == 0)
2637 			processStdStringsEntry(AH, te);
2638 		else if (strcmp(te->desc, "SEARCHPATH") == 0)
2639 			processSearchPathEntry(AH, te);
2640 	}
2641 }
2642 
2643 static void
processEncodingEntry(ArchiveHandle * AH,TocEntry * te)2644 processEncodingEntry(ArchiveHandle *AH, TocEntry *te)
2645 {
2646 	/* te->defn should have the form SET client_encoding = 'foo'; */
2647 	char	   *defn = pg_strdup(te->defn);
2648 	char	   *ptr1;
2649 	char	   *ptr2 = NULL;
2650 	int			encoding;
2651 
2652 	ptr1 = strchr(defn, '\'');
2653 	if (ptr1)
2654 		ptr2 = strchr(++ptr1, '\'');
2655 	if (ptr2)
2656 	{
2657 		*ptr2 = '\0';
2658 		encoding = pg_char_to_encoding(ptr1);
2659 		if (encoding < 0)
2660 			fatal("unrecognized encoding \"%s\"",
2661 				  ptr1);
2662 		AH->public.encoding = encoding;
2663 	}
2664 	else
2665 		fatal("invalid ENCODING item: %s",
2666 			  te->defn);
2667 
2668 	free(defn);
2669 }
2670 
2671 static void
processStdStringsEntry(ArchiveHandle * AH,TocEntry * te)2672 processStdStringsEntry(ArchiveHandle *AH, TocEntry *te)
2673 {
2674 	/* te->defn should have the form SET standard_conforming_strings = 'x'; */
2675 	char	   *ptr1;
2676 
2677 	ptr1 = strchr(te->defn, '\'');
2678 	if (ptr1 && strncmp(ptr1, "'on'", 4) == 0)
2679 		AH->public.std_strings = true;
2680 	else if (ptr1 && strncmp(ptr1, "'off'", 5) == 0)
2681 		AH->public.std_strings = false;
2682 	else
2683 		fatal("invalid STDSTRINGS item: %s",
2684 			  te->defn);
2685 }
2686 
2687 static void
processSearchPathEntry(ArchiveHandle * AH,TocEntry * te)2688 processSearchPathEntry(ArchiveHandle *AH, TocEntry *te)
2689 {
2690 	/*
2691 	 * te->defn should contain a command to set search_path.  We just copy it
2692 	 * verbatim for use later.
2693 	 */
2694 	AH->public.searchpath = pg_strdup(te->defn);
2695 }
2696 
2697 static void
StrictNamesCheck(RestoreOptions * ropt)2698 StrictNamesCheck(RestoreOptions *ropt)
2699 {
2700 	const char *missing_name;
2701 
2702 	Assert(ropt->strict_names);
2703 
2704 	if (ropt->schemaNames.head != NULL)
2705 	{
2706 		missing_name = simple_string_list_not_touched(&ropt->schemaNames);
2707 		if (missing_name != NULL)
2708 			fatal("schema \"%s\" not found", missing_name);
2709 	}
2710 
2711 	if (ropt->tableNames.head != NULL)
2712 	{
2713 		missing_name = simple_string_list_not_touched(&ropt->tableNames);
2714 		if (missing_name != NULL)
2715 			fatal("table \"%s\" not found", missing_name);
2716 	}
2717 
2718 	if (ropt->indexNames.head != NULL)
2719 	{
2720 		missing_name = simple_string_list_not_touched(&ropt->indexNames);
2721 		if (missing_name != NULL)
2722 			fatal("index \"%s\" not found", missing_name);
2723 	}
2724 
2725 	if (ropt->functionNames.head != NULL)
2726 	{
2727 		missing_name = simple_string_list_not_touched(&ropt->functionNames);
2728 		if (missing_name != NULL)
2729 			fatal("function \"%s\" not found", missing_name);
2730 	}
2731 
2732 	if (ropt->triggerNames.head != NULL)
2733 	{
2734 		missing_name = simple_string_list_not_touched(&ropt->triggerNames);
2735 		if (missing_name != NULL)
2736 			fatal("trigger \"%s\" not found", missing_name);
2737 	}
2738 }
2739 
2740 /*
2741  * Determine whether we want to restore this TOC entry.
2742  *
2743  * Returns 0 if entry should be skipped, or some combination of the
2744  * REQ_SCHEMA and REQ_DATA bits if we want to restore schema and/or data
2745  * portions of this TOC entry, or REQ_SPECIAL if it's a special entry.
2746  */
2747 static int
_tocEntryRequired(TocEntry * te,teSection curSection,ArchiveHandle * AH)2748 _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
2749 {
2750 	int			res = REQ_SCHEMA | REQ_DATA;
2751 	RestoreOptions *ropt = AH->public.ropt;
2752 
2753 	/* These items are treated specially */
2754 	if (strcmp(te->desc, "ENCODING") == 0 ||
2755 		strcmp(te->desc, "STDSTRINGS") == 0 ||
2756 		strcmp(te->desc, "SEARCHPATH") == 0)
2757 		return REQ_SPECIAL;
2758 
2759 	/*
2760 	 * DATABASE and DATABASE PROPERTIES also have a special rule: they are
2761 	 * restored in createDB mode, and not restored otherwise, independently of
2762 	 * all else.
2763 	 */
2764 	if (strcmp(te->desc, "DATABASE") == 0 ||
2765 		strcmp(te->desc, "DATABASE PROPERTIES") == 0)
2766 	{
2767 		if (ropt->createDB)
2768 			return REQ_SCHEMA;
2769 		else
2770 			return 0;
2771 	}
2772 
2773 	/*
2774 	 * Process exclusions that affect certain classes of TOC entries.
2775 	 */
2776 
2777 	/* If it's an ACL, maybe ignore it */
2778 	if (ropt->aclsSkip && _tocEntryIsACL(te))
2779 		return 0;
2780 
2781 	/* If it's a comment, maybe ignore it */
2782 	if (ropt->no_comments && strcmp(te->desc, "COMMENT") == 0)
2783 		return 0;
2784 
2785 	/*
2786 	 * If it's a publication or a table part of a publication, maybe ignore
2787 	 * it.
2788 	 */
2789 	if (ropt->no_publications &&
2790 		(strcmp(te->desc, "PUBLICATION") == 0 ||
2791 		 strcmp(te->desc, "PUBLICATION TABLE") == 0))
2792 		return 0;
2793 
2794 	/* If it's a security label, maybe ignore it */
2795 	if (ropt->no_security_labels && strcmp(te->desc, "SECURITY LABEL") == 0)
2796 		return 0;
2797 
2798 	/* If it's a subscription, maybe ignore it */
2799 	if (ropt->no_subscriptions && strcmp(te->desc, "SUBSCRIPTION") == 0)
2800 		return 0;
2801 
2802 	/* Ignore it if section is not to be dumped/restored */
2803 	switch (curSection)
2804 	{
2805 		case SECTION_PRE_DATA:
2806 			if (!(ropt->dumpSections & DUMP_PRE_DATA))
2807 				return 0;
2808 			break;
2809 		case SECTION_DATA:
2810 			if (!(ropt->dumpSections & DUMP_DATA))
2811 				return 0;
2812 			break;
2813 		case SECTION_POST_DATA:
2814 			if (!(ropt->dumpSections & DUMP_POST_DATA))
2815 				return 0;
2816 			break;
2817 		default:
2818 			/* shouldn't get here, really, but ignore it */
2819 			return 0;
2820 	}
2821 
2822 	/* Ignore it if rejected by idWanted[] (cf. SortTocFromFile) */
2823 	if (ropt->idWanted && !ropt->idWanted[te->dumpId - 1])
2824 		return 0;
2825 
2826 	/*
2827 	 * Check options for selective dump/restore.
2828 	 */
2829 	if (strcmp(te->desc, "ACL") == 0 ||
2830 		strcmp(te->desc, "COMMENT") == 0 ||
2831 		strcmp(te->desc, "SECURITY LABEL") == 0)
2832 	{
2833 		/* Database properties react to createDB, not selectivity options. */
2834 		if (strncmp(te->tag, "DATABASE ", 9) == 0)
2835 		{
2836 			if (!ropt->createDB)
2837 				return 0;
2838 		}
2839 		else if (ropt->schemaNames.head != NULL ||
2840 				 ropt->schemaExcludeNames.head != NULL ||
2841 				 ropt->selTypes)
2842 		{
2843 			/*
2844 			 * In a selective dump/restore, we want to restore these dependent
2845 			 * TOC entry types only if their parent object is being restored.
2846 			 * Without selectivity options, we let through everything in the
2847 			 * archive.  Note there may be such entries with no parent, eg
2848 			 * non-default ACLs for built-in objects.
2849 			 *
2850 			 * This code depends on the parent having been marked already,
2851 			 * which should be the case; if it isn't, perhaps due to
2852 			 * SortTocFromFile rearrangement, skipping the dependent entry
2853 			 * seems prudent anyway.
2854 			 *
2855 			 * Ideally we'd handle, eg, table CHECK constraints this way too.
2856 			 * But it's hard to tell which of their dependencies is the one to
2857 			 * consult.
2858 			 */
2859 			if (te->nDeps != 1 ||
2860 				TocIDRequired(AH, te->dependencies[0]) == 0)
2861 				return 0;
2862 		}
2863 	}
2864 	else
2865 	{
2866 		/* Apply selective-restore rules for standalone TOC entries. */
2867 		if (ropt->schemaNames.head != NULL)
2868 		{
2869 			/* If no namespace is specified, it means all. */
2870 			if (!te->namespace)
2871 				return 0;
2872 			if (!simple_string_list_member(&ropt->schemaNames, te->namespace))
2873 				return 0;
2874 		}
2875 
2876 		if (ropt->schemaExcludeNames.head != NULL &&
2877 			te->namespace &&
2878 			simple_string_list_member(&ropt->schemaExcludeNames, te->namespace))
2879 			return 0;
2880 
2881 		if (ropt->selTypes)
2882 		{
2883 			if (strcmp(te->desc, "TABLE") == 0 ||
2884 				strcmp(te->desc, "TABLE DATA") == 0 ||
2885 				strcmp(te->desc, "VIEW") == 0 ||
2886 				strcmp(te->desc, "FOREIGN TABLE") == 0 ||
2887 				strcmp(te->desc, "MATERIALIZED VIEW") == 0 ||
2888 				strcmp(te->desc, "MATERIALIZED VIEW DATA") == 0 ||
2889 				strcmp(te->desc, "SEQUENCE") == 0 ||
2890 				strcmp(te->desc, "SEQUENCE SET") == 0)
2891 			{
2892 				if (!ropt->selTable)
2893 					return 0;
2894 				if (ropt->tableNames.head != NULL &&
2895 					!simple_string_list_member(&ropt->tableNames, te->tag))
2896 					return 0;
2897 			}
2898 			else if (strcmp(te->desc, "INDEX") == 0)
2899 			{
2900 				if (!ropt->selIndex)
2901 					return 0;
2902 				if (ropt->indexNames.head != NULL &&
2903 					!simple_string_list_member(&ropt->indexNames, te->tag))
2904 					return 0;
2905 			}
2906 			else if (strcmp(te->desc, "FUNCTION") == 0 ||
2907 					 strcmp(te->desc, "AGGREGATE") == 0 ||
2908 					 strcmp(te->desc, "PROCEDURE") == 0)
2909 			{
2910 				if (!ropt->selFunction)
2911 					return 0;
2912 				if (ropt->functionNames.head != NULL &&
2913 					!simple_string_list_member(&ropt->functionNames, te->tag))
2914 					return 0;
2915 			}
2916 			else if (strcmp(te->desc, "TRIGGER") == 0)
2917 			{
2918 				if (!ropt->selTrigger)
2919 					return 0;
2920 				if (ropt->triggerNames.head != NULL &&
2921 					!simple_string_list_member(&ropt->triggerNames, te->tag))
2922 					return 0;
2923 			}
2924 			else
2925 				return 0;
2926 		}
2927 	}
2928 
2929 	/*
2930 	 * Determine whether the TOC entry contains schema and/or data components,
2931 	 * and mask off inapplicable REQ bits.  If it had a dataDumper, assume
2932 	 * it's both schema and data.  Otherwise it's probably schema-only, but
2933 	 * there are exceptions.
2934 	 */
2935 	if (!te->hadDumper)
2936 	{
2937 		/*
2938 		 * Special Case: If 'SEQUENCE SET' or anything to do with BLOBs, then
2939 		 * it is considered a data entry.  We don't need to check for the
2940 		 * BLOBS entry or old-style BLOB COMMENTS, because they will have
2941 		 * hadDumper = true ... but we do need to check new-style BLOB ACLs,
2942 		 * comments, etc.
2943 		 */
2944 		if (strcmp(te->desc, "SEQUENCE SET") == 0 ||
2945 			strcmp(te->desc, "BLOB") == 0 ||
2946 			(strcmp(te->desc, "ACL") == 0 &&
2947 			 strncmp(te->tag, "LARGE OBJECT ", 13) == 0) ||
2948 			(strcmp(te->desc, "COMMENT") == 0 &&
2949 			 strncmp(te->tag, "LARGE OBJECT ", 13) == 0) ||
2950 			(strcmp(te->desc, "SECURITY LABEL") == 0 &&
2951 			 strncmp(te->tag, "LARGE OBJECT ", 13) == 0))
2952 			res = res & REQ_DATA;
2953 		else
2954 			res = res & ~REQ_DATA;
2955 	}
2956 
2957 	/* If there's no definition command, there's no schema component */
2958 	if (!te->defn || !te->defn[0])
2959 		res = res & ~REQ_SCHEMA;
2960 
2961 	/*
2962 	 * Special case: <Init> type with <Max OID> tag; this is obsolete and we
2963 	 * always ignore it.
2964 	 */
2965 	if ((strcmp(te->desc, "<Init>") == 0) && (strcmp(te->tag, "Max OID") == 0))
2966 		return 0;
2967 
2968 	/* Mask it if we only want schema */
2969 	if (ropt->schemaOnly)
2970 	{
2971 		/*
2972 		 * The sequence_data option overrides schemaOnly for SEQUENCE SET.
2973 		 *
2974 		 * In binary-upgrade mode, even with schemaOnly set, we do not mask
2975 		 * out large objects.  (Only large object definitions, comments and
2976 		 * other metadata should be generated in binary-upgrade mode, not the
2977 		 * actual data, but that need not concern us here.)
2978 		 */
2979 		if (!(ropt->sequence_data && strcmp(te->desc, "SEQUENCE SET") == 0) &&
2980 			!(ropt->binary_upgrade &&
2981 			  (strcmp(te->desc, "BLOB") == 0 ||
2982 			   (strcmp(te->desc, "ACL") == 0 &&
2983 				strncmp(te->tag, "LARGE OBJECT ", 13) == 0) ||
2984 			   (strcmp(te->desc, "COMMENT") == 0 &&
2985 				strncmp(te->tag, "LARGE OBJECT ", 13) == 0) ||
2986 			   (strcmp(te->desc, "SECURITY LABEL") == 0 &&
2987 				strncmp(te->tag, "LARGE OBJECT ", 13) == 0))))
2988 			res = res & REQ_SCHEMA;
2989 	}
2990 
2991 	/* Mask it if we only want data */
2992 	if (ropt->dataOnly)
2993 		res = res & REQ_DATA;
2994 
2995 	return res;
2996 }
2997 
2998 /*
2999  * Identify which pass we should restore this TOC entry in.
3000  *
3001  * See notes with the RestorePass typedef in pg_backup_archiver.h.
3002  */
3003 static RestorePass
_tocEntryRestorePass(TocEntry * te)3004 _tocEntryRestorePass(TocEntry *te)
3005 {
3006 	/* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
3007 	if (strcmp(te->desc, "ACL") == 0 ||
3008 		strcmp(te->desc, "ACL LANGUAGE") == 0 ||
3009 		strcmp(te->desc, "DEFAULT ACL") == 0)
3010 		return RESTORE_PASS_ACL;
3011 	if (strcmp(te->desc, "EVENT TRIGGER") == 0 ||
3012 		strcmp(te->desc, "MATERIALIZED VIEW DATA") == 0)
3013 		return RESTORE_PASS_POST_ACL;
3014 
3015 	/*
3016 	 * Comments need to be emitted in the same pass as their parent objects.
3017 	 * ACLs haven't got comments, and neither do matview data objects, but
3018 	 * event triggers do.  (Fortunately, event triggers haven't got ACLs, or
3019 	 * we'd need yet another weird special case.)
3020 	 */
3021 	if (strcmp(te->desc, "COMMENT") == 0 &&
3022 		strncmp(te->tag, "EVENT TRIGGER ", 14) == 0)
3023 		return RESTORE_PASS_POST_ACL;
3024 
3025 	/* All else can be handled in the main pass. */
3026 	return RESTORE_PASS_MAIN;
3027 }
3028 
3029 /*
3030  * Identify TOC entries that are ACLs.
3031  *
3032  * Note: it seems worth duplicating some code here to avoid a hard-wired
3033  * assumption that these are exactly the same entries that we restore during
3034  * the RESTORE_PASS_ACL phase.
3035  */
3036 static bool
_tocEntryIsACL(TocEntry * te)3037 _tocEntryIsACL(TocEntry *te)
3038 {
3039 	/* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
3040 	if (strcmp(te->desc, "ACL") == 0 ||
3041 		strcmp(te->desc, "ACL LANGUAGE") == 0 ||
3042 		strcmp(te->desc, "DEFAULT ACL") == 0)
3043 		return true;
3044 	return false;
3045 }
3046 
3047 /*
3048  * Issue SET commands for parameters that we want to have set the same way
3049  * at all times during execution of a restore script.
3050  */
3051 static void
_doSetFixedOutputState(ArchiveHandle * AH)3052 _doSetFixedOutputState(ArchiveHandle *AH)
3053 {
3054 	RestoreOptions *ropt = AH->public.ropt;
3055 
3056 	/*
3057 	 * Disable timeouts to allow for slow commands, idle parallel workers, etc
3058 	 */
3059 	ahprintf(AH, "SET statement_timeout = 0;\n");
3060 	ahprintf(AH, "SET lock_timeout = 0;\n");
3061 	ahprintf(AH, "SET idle_in_transaction_session_timeout = 0;\n");
3062 
3063 	/* Select the correct character set encoding */
3064 	ahprintf(AH, "SET client_encoding = '%s';\n",
3065 			 pg_encoding_to_char(AH->public.encoding));
3066 
3067 	/* Select the correct string literal syntax */
3068 	ahprintf(AH, "SET standard_conforming_strings = %s;\n",
3069 			 AH->public.std_strings ? "on" : "off");
3070 
3071 	/* Select the role to be used during restore */
3072 	if (ropt && ropt->use_role)
3073 		ahprintf(AH, "SET ROLE %s;\n", fmtId(ropt->use_role));
3074 
3075 	/* Select the dump-time search_path */
3076 	if (AH->public.searchpath)
3077 		ahprintf(AH, "%s", AH->public.searchpath);
3078 
3079 	/* Make sure function checking is disabled */
3080 	ahprintf(AH, "SET check_function_bodies = false;\n");
3081 
3082 	/* Ensure that all valid XML data will be accepted */
3083 	ahprintf(AH, "SET xmloption = content;\n");
3084 
3085 	/* Avoid annoying notices etc */
3086 	ahprintf(AH, "SET client_min_messages = warning;\n");
3087 	if (!AH->public.std_strings)
3088 		ahprintf(AH, "SET escape_string_warning = off;\n");
3089 
3090 	/* Adjust row-security state */
3091 	if (ropt && ropt->enable_row_security)
3092 		ahprintf(AH, "SET row_security = on;\n");
3093 	else
3094 		ahprintf(AH, "SET row_security = off;\n");
3095 
3096 	ahprintf(AH, "\n");
3097 }
3098 
3099 /*
3100  * Issue a SET SESSION AUTHORIZATION command.  Caller is responsible
3101  * for updating state if appropriate.  If user is NULL or an empty string,
3102  * the specification DEFAULT will be used.
3103  */
3104 static void
_doSetSessionAuth(ArchiveHandle * AH,const char * user)3105 _doSetSessionAuth(ArchiveHandle *AH, const char *user)
3106 {
3107 	PQExpBuffer cmd = createPQExpBuffer();
3108 
3109 	appendPQExpBufferStr(cmd, "SET SESSION AUTHORIZATION ");
3110 
3111 	/*
3112 	 * SQL requires a string literal here.  Might as well be correct.
3113 	 */
3114 	if (user && *user)
3115 		appendStringLiteralAHX(cmd, user, AH);
3116 	else
3117 		appendPQExpBufferStr(cmd, "DEFAULT");
3118 	appendPQExpBufferChar(cmd, ';');
3119 
3120 	if (RestoringToDB(AH))
3121 	{
3122 		PGresult   *res;
3123 
3124 		res = PQexec(AH->connection, cmd->data);
3125 
3126 		if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
3127 			/* NOT warn_or_exit_horribly... use -O instead to skip this. */
3128 			fatal("could not set session user to \"%s\": %s",
3129 				  user, PQerrorMessage(AH->connection));
3130 
3131 		PQclear(res);
3132 	}
3133 	else
3134 		ahprintf(AH, "%s\n\n", cmd->data);
3135 
3136 	destroyPQExpBuffer(cmd);
3137 }
3138 
3139 
3140 /*
3141  * Issue the commands to connect to the specified database.
3142  *
3143  * If we're currently restoring right into a database, this will
3144  * actually establish a connection. Otherwise it puts a \connect into
3145  * the script output.
3146  */
3147 static void
_reconnectToDB(ArchiveHandle * AH,const char * dbname)3148 _reconnectToDB(ArchiveHandle *AH, const char *dbname)
3149 {
3150 	if (RestoringToDB(AH))
3151 		ReconnectToServer(AH, dbname);
3152 	else
3153 	{
3154 		PQExpBufferData connectbuf;
3155 
3156 		initPQExpBuffer(&connectbuf);
3157 		appendPsqlMetaConnect(&connectbuf, dbname);
3158 		ahprintf(AH, "%s\n", connectbuf.data);
3159 		termPQExpBuffer(&connectbuf);
3160 	}
3161 
3162 	/*
3163 	 * NOTE: currUser keeps track of what the imaginary session user in our
3164 	 * script is.  It's now effectively reset to the original userID.
3165 	 */
3166 	if (AH->currUser)
3167 		free(AH->currUser);
3168 	AH->currUser = NULL;
3169 
3170 	/* don't assume we still know the output schema, tablespace, etc either */
3171 	if (AH->currSchema)
3172 		free(AH->currSchema);
3173 	AH->currSchema = NULL;
3174 	if (AH->currTablespace)
3175 		free(AH->currTablespace);
3176 	AH->currTablespace = NULL;
3177 
3178 	/* re-establish fixed state */
3179 	_doSetFixedOutputState(AH);
3180 }
3181 
3182 /*
3183  * Become the specified user, and update state to avoid redundant commands
3184  *
3185  * NULL or empty argument is taken to mean restoring the session default
3186  */
3187 static void
_becomeUser(ArchiveHandle * AH,const char * user)3188 _becomeUser(ArchiveHandle *AH, const char *user)
3189 {
3190 	if (!user)
3191 		user = "";				/* avoid null pointers */
3192 
3193 	if (AH->currUser && strcmp(AH->currUser, user) == 0)
3194 		return;					/* no need to do anything */
3195 
3196 	_doSetSessionAuth(AH, user);
3197 
3198 	/*
3199 	 * NOTE: currUser keeps track of what the imaginary session user in our
3200 	 * script is
3201 	 */
3202 	if (AH->currUser)
3203 		free(AH->currUser);
3204 	AH->currUser = pg_strdup(user);
3205 }
3206 
3207 /*
3208  * Become the owner of the given TOC entry object.  If
3209  * changes in ownership are not allowed, this doesn't do anything.
3210  */
3211 static void
_becomeOwner(ArchiveHandle * AH,TocEntry * te)3212 _becomeOwner(ArchiveHandle *AH, TocEntry *te)
3213 {
3214 	RestoreOptions *ropt = AH->public.ropt;
3215 
3216 	if (ropt && (ropt->noOwner || !ropt->use_setsessauth))
3217 		return;
3218 
3219 	_becomeUser(AH, te->owner);
3220 }
3221 
3222 
3223 /*
3224  * Issue the commands to select the specified schema as the current schema
3225  * in the target database.
3226  */
3227 static void
_selectOutputSchema(ArchiveHandle * AH,const char * schemaName)3228 _selectOutputSchema(ArchiveHandle *AH, const char *schemaName)
3229 {
3230 	PQExpBuffer qry;
3231 
3232 	/*
3233 	 * If there was a SEARCHPATH TOC entry, we're supposed to just stay with
3234 	 * that search_path rather than switching to entry-specific paths.
3235 	 * Otherwise, it's an old archive that will not restore correctly unless
3236 	 * we set the search_path as it's expecting.
3237 	 */
3238 	if (AH->public.searchpath)
3239 		return;
3240 
3241 	if (!schemaName || *schemaName == '\0' ||
3242 		(AH->currSchema && strcmp(AH->currSchema, schemaName) == 0))
3243 		return;					/* no need to do anything */
3244 
3245 	qry = createPQExpBuffer();
3246 
3247 	appendPQExpBuffer(qry, "SET search_path = %s",
3248 					  fmtId(schemaName));
3249 	if (strcmp(schemaName, "pg_catalog") != 0)
3250 		appendPQExpBufferStr(qry, ", pg_catalog");
3251 
3252 	if (RestoringToDB(AH))
3253 	{
3254 		PGresult   *res;
3255 
3256 		res = PQexec(AH->connection, qry->data);
3257 
3258 		if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
3259 			warn_or_exit_horribly(AH,
3260 								  "could not set search_path to \"%s\": %s",
3261 								  schemaName, PQerrorMessage(AH->connection));
3262 
3263 		PQclear(res);
3264 	}
3265 	else
3266 		ahprintf(AH, "%s;\n\n", qry->data);
3267 
3268 	if (AH->currSchema)
3269 		free(AH->currSchema);
3270 	AH->currSchema = pg_strdup(schemaName);
3271 
3272 	destroyPQExpBuffer(qry);
3273 }
3274 
3275 /*
3276  * Issue the commands to select the specified tablespace as the current one
3277  * in the target database.
3278  */
3279 static void
_selectTablespace(ArchiveHandle * AH,const char * tablespace)3280 _selectTablespace(ArchiveHandle *AH, const char *tablespace)
3281 {
3282 	RestoreOptions *ropt = AH->public.ropt;
3283 	PQExpBuffer qry;
3284 	const char *want,
3285 			   *have;
3286 
3287 	/* do nothing in --no-tablespaces mode */
3288 	if (ropt->noTablespace)
3289 		return;
3290 
3291 	have = AH->currTablespace;
3292 	want = tablespace;
3293 
3294 	/* no need to do anything for non-tablespace object */
3295 	if (!want)
3296 		return;
3297 
3298 	if (have && strcmp(want, have) == 0)
3299 		return;					/* no need to do anything */
3300 
3301 	qry = createPQExpBuffer();
3302 
3303 	if (strcmp(want, "") == 0)
3304 	{
3305 		/* We want the tablespace to be the database's default */
3306 		appendPQExpBufferStr(qry, "SET default_tablespace = ''");
3307 	}
3308 	else
3309 	{
3310 		/* We want an explicit tablespace */
3311 		appendPQExpBuffer(qry, "SET default_tablespace = %s", fmtId(want));
3312 	}
3313 
3314 	if (RestoringToDB(AH))
3315 	{
3316 		PGresult   *res;
3317 
3318 		res = PQexec(AH->connection, qry->data);
3319 
3320 		if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
3321 			warn_or_exit_horribly(AH,
3322 								  "could not set default_tablespace to %s: %s",
3323 								  fmtId(want), PQerrorMessage(AH->connection));
3324 
3325 		PQclear(res);
3326 	}
3327 	else
3328 		ahprintf(AH, "%s;\n\n", qry->data);
3329 
3330 	if (AH->currTablespace)
3331 		free(AH->currTablespace);
3332 	AH->currTablespace = pg_strdup(want);
3333 
3334 	destroyPQExpBuffer(qry);
3335 }
3336 
3337 /*
3338  * Set the proper default_table_access_method value for the table.
3339  */
3340 static void
_selectTableAccessMethod(ArchiveHandle * AH,const char * tableam)3341 _selectTableAccessMethod(ArchiveHandle *AH, const char *tableam)
3342 {
3343 	PQExpBuffer cmd;
3344 	const char *want,
3345 			   *have;
3346 
3347 	have = AH->currTableAm;
3348 	want = tableam;
3349 
3350 	if (!want)
3351 		return;
3352 
3353 	if (have && strcmp(want, have) == 0)
3354 		return;
3355 
3356 	cmd = createPQExpBuffer();
3357 	appendPQExpBuffer(cmd, "SET default_table_access_method = %s;", fmtId(want));
3358 
3359 	if (RestoringToDB(AH))
3360 	{
3361 		PGresult   *res;
3362 
3363 		res = PQexec(AH->connection, cmd->data);
3364 
3365 		if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
3366 			warn_or_exit_horribly(AH,
3367 								  "could not set default_table_access_method: %s",
3368 								  PQerrorMessage(AH->connection));
3369 
3370 		PQclear(res);
3371 	}
3372 	else
3373 		ahprintf(AH, "%s\n\n", cmd->data);
3374 
3375 	destroyPQExpBuffer(cmd);
3376 
3377 	AH->currTableAm = pg_strdup(want);
3378 }
3379 
3380 /*
3381  * Extract an object description for a TOC entry, and append it to buf.
3382  *
3383  * This is used for ALTER ... OWNER TO.
3384  */
3385 static void
_getObjectDescription(PQExpBuffer buf,TocEntry * te)3386 _getObjectDescription(PQExpBuffer buf, TocEntry *te)
3387 {
3388 	const char *type = te->desc;
3389 
3390 	/* Use ALTER TABLE for views and sequences */
3391 	if (strcmp(type, "VIEW") == 0 || strcmp(type, "SEQUENCE") == 0 ||
3392 		strcmp(type, "MATERIALIZED VIEW") == 0)
3393 		type = "TABLE";
3394 
3395 	/* objects that don't require special decoration */
3396 	if (strcmp(type, "COLLATION") == 0 ||
3397 		strcmp(type, "CONVERSION") == 0 ||
3398 		strcmp(type, "DOMAIN") == 0 ||
3399 		strcmp(type, "TABLE") == 0 ||
3400 		strcmp(type, "TYPE") == 0 ||
3401 		strcmp(type, "FOREIGN TABLE") == 0 ||
3402 		strcmp(type, "TEXT SEARCH DICTIONARY") == 0 ||
3403 		strcmp(type, "TEXT SEARCH CONFIGURATION") == 0 ||
3404 		strcmp(type, "STATISTICS") == 0 ||
3405 	/* non-schema-specified objects */
3406 		strcmp(type, "DATABASE") == 0 ||
3407 		strcmp(type, "PROCEDURAL LANGUAGE") == 0 ||
3408 		strcmp(type, "SCHEMA") == 0 ||
3409 		strcmp(type, "EVENT TRIGGER") == 0 ||
3410 		strcmp(type, "FOREIGN DATA WRAPPER") == 0 ||
3411 		strcmp(type, "SERVER") == 0 ||
3412 		strcmp(type, "PUBLICATION") == 0 ||
3413 		strcmp(type, "SUBSCRIPTION") == 0 ||
3414 		strcmp(type, "USER MAPPING") == 0)
3415 	{
3416 		appendPQExpBuffer(buf, "%s ", type);
3417 		if (te->namespace && *te->namespace)
3418 			appendPQExpBuffer(buf, "%s.", fmtId(te->namespace));
3419 		appendPQExpBufferStr(buf, fmtId(te->tag));
3420 		return;
3421 	}
3422 
3423 	/* BLOBs just have a name, but it's numeric so must not use fmtId */
3424 	if (strcmp(type, "BLOB") == 0)
3425 	{
3426 		appendPQExpBuffer(buf, "LARGE OBJECT %s", te->tag);
3427 		return;
3428 	}
3429 
3430 	/*
3431 	 * These object types require additional decoration.  Fortunately, the
3432 	 * information needed is exactly what's in the DROP command.
3433 	 */
3434 	if (strcmp(type, "AGGREGATE") == 0 ||
3435 		strcmp(type, "FUNCTION") == 0 ||
3436 		strcmp(type, "OPERATOR") == 0 ||
3437 		strcmp(type, "OPERATOR CLASS") == 0 ||
3438 		strcmp(type, "OPERATOR FAMILY") == 0 ||
3439 		strcmp(type, "PROCEDURE") == 0)
3440 	{
3441 		/* Chop "DROP " off the front and make a modifiable copy */
3442 		char	   *first = pg_strdup(te->dropStmt + 5);
3443 		char	   *last;
3444 
3445 		/* point to last character in string */
3446 		last = first + strlen(first) - 1;
3447 
3448 		/* Strip off any ';' or '\n' at the end */
3449 		while (last >= first && (*last == '\n' || *last == ';'))
3450 			last--;
3451 		*(last + 1) = '\0';
3452 
3453 		appendPQExpBufferStr(buf, first);
3454 
3455 		free(first);
3456 		return;
3457 	}
3458 
3459 	pg_log_warning("don't know how to set owner for object type \"%s\"",
3460 				   type);
3461 }
3462 
3463 /*
3464  * Emit the SQL commands to create the object represented by a TOC entry
3465  *
3466  * This now also includes issuing an ALTER OWNER command to restore the
3467  * object's ownership, if wanted.  But note that the object's permissions
3468  * will remain at default, until the matching ACL TOC entry is restored.
3469  */
3470 static void
_printTocEntry(ArchiveHandle * AH,TocEntry * te,bool isData)3471 _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData)
3472 {
3473 	RestoreOptions *ropt = AH->public.ropt;
3474 
3475 	/* Select owner, schema, tablespace and default AM as necessary */
3476 	_becomeOwner(AH, te);
3477 	_selectOutputSchema(AH, te->namespace);
3478 	_selectTablespace(AH, te->tablespace);
3479 	_selectTableAccessMethod(AH, te->tableam);
3480 
3481 	/* Emit header comment for item */
3482 	if (!AH->noTocComments)
3483 	{
3484 		const char *pfx;
3485 		char	   *sanitized_name;
3486 		char	   *sanitized_schema;
3487 		char	   *sanitized_owner;
3488 
3489 		if (isData)
3490 			pfx = "Data for ";
3491 		else
3492 			pfx = "";
3493 
3494 		ahprintf(AH, "--\n");
3495 		if (AH->public.verbose)
3496 		{
3497 			ahprintf(AH, "-- TOC entry %d (class %u OID %u)\n",
3498 					 te->dumpId, te->catalogId.tableoid, te->catalogId.oid);
3499 			if (te->nDeps > 0)
3500 			{
3501 				int			i;
3502 
3503 				ahprintf(AH, "-- Dependencies:");
3504 				for (i = 0; i < te->nDeps; i++)
3505 					ahprintf(AH, " %d", te->dependencies[i]);
3506 				ahprintf(AH, "\n");
3507 			}
3508 		}
3509 
3510 		sanitized_name = sanitize_line(te->tag, false);
3511 		sanitized_schema = sanitize_line(te->namespace, true);
3512 		sanitized_owner = sanitize_line(ropt->noOwner ? NULL : te->owner, true);
3513 
3514 		ahprintf(AH, "-- %sName: %s; Type: %s; Schema: %s; Owner: %s",
3515 				 pfx, sanitized_name, te->desc, sanitized_schema,
3516 				 sanitized_owner);
3517 
3518 		free(sanitized_name);
3519 		free(sanitized_schema);
3520 		free(sanitized_owner);
3521 
3522 		if (te->tablespace && strlen(te->tablespace) > 0 && !ropt->noTablespace)
3523 		{
3524 			char	   *sanitized_tablespace;
3525 
3526 			sanitized_tablespace = sanitize_line(te->tablespace, false);
3527 			ahprintf(AH, "; Tablespace: %s", sanitized_tablespace);
3528 			free(sanitized_tablespace);
3529 		}
3530 		ahprintf(AH, "\n");
3531 
3532 		if (AH->PrintExtraTocPtr != NULL)
3533 			AH->PrintExtraTocPtr(AH, te);
3534 		ahprintf(AH, "--\n\n");
3535 	}
3536 
3537 	/*
3538 	 * Actually print the definition.
3539 	 *
3540 	 * Really crude hack for suppressing AUTHORIZATION clause that old pg_dump
3541 	 * versions put into CREATE SCHEMA.  We have to do this when --no-owner
3542 	 * mode is selected.  This is ugly, but I see no other good way ...
3543 	 */
3544 	if (ropt->noOwner && strcmp(te->desc, "SCHEMA") == 0)
3545 	{
3546 		ahprintf(AH, "CREATE SCHEMA %s;\n\n\n", fmtId(te->tag));
3547 	}
3548 	else
3549 	{
3550 		if (te->defn && strlen(te->defn) > 0)
3551 			ahprintf(AH, "%s\n\n", te->defn);
3552 	}
3553 
3554 	/*
3555 	 * If we aren't using SET SESSION AUTH to determine ownership, we must
3556 	 * instead issue an ALTER OWNER command.  We assume that anything without
3557 	 * a DROP command is not a separately ownable object.  All the categories
3558 	 * with DROP commands must appear in one list or the other.
3559 	 */
3560 	if (!ropt->noOwner && !ropt->use_setsessauth &&
3561 		te->owner && strlen(te->owner) > 0 &&
3562 		te->dropStmt && strlen(te->dropStmt) > 0)
3563 	{
3564 		if (strcmp(te->desc, "AGGREGATE") == 0 ||
3565 			strcmp(te->desc, "BLOB") == 0 ||
3566 			strcmp(te->desc, "COLLATION") == 0 ||
3567 			strcmp(te->desc, "CONVERSION") == 0 ||
3568 			strcmp(te->desc, "DATABASE") == 0 ||
3569 			strcmp(te->desc, "DOMAIN") == 0 ||
3570 			strcmp(te->desc, "FUNCTION") == 0 ||
3571 			strcmp(te->desc, "OPERATOR") == 0 ||
3572 			strcmp(te->desc, "OPERATOR CLASS") == 0 ||
3573 			strcmp(te->desc, "OPERATOR FAMILY") == 0 ||
3574 			strcmp(te->desc, "PROCEDURE") == 0 ||
3575 			strcmp(te->desc, "PROCEDURAL LANGUAGE") == 0 ||
3576 			strcmp(te->desc, "SCHEMA") == 0 ||
3577 			strcmp(te->desc, "EVENT TRIGGER") == 0 ||
3578 			strcmp(te->desc, "TABLE") == 0 ||
3579 			strcmp(te->desc, "TYPE") == 0 ||
3580 			strcmp(te->desc, "VIEW") == 0 ||
3581 			strcmp(te->desc, "MATERIALIZED VIEW") == 0 ||
3582 			strcmp(te->desc, "SEQUENCE") == 0 ||
3583 			strcmp(te->desc, "FOREIGN TABLE") == 0 ||
3584 			strcmp(te->desc, "TEXT SEARCH DICTIONARY") == 0 ||
3585 			strcmp(te->desc, "TEXT SEARCH CONFIGURATION") == 0 ||
3586 			strcmp(te->desc, "FOREIGN DATA WRAPPER") == 0 ||
3587 			strcmp(te->desc, "SERVER") == 0 ||
3588 			strcmp(te->desc, "STATISTICS") == 0 ||
3589 			strcmp(te->desc, "PUBLICATION") == 0 ||
3590 			strcmp(te->desc, "SUBSCRIPTION") == 0)
3591 		{
3592 			PQExpBuffer temp = createPQExpBuffer();
3593 
3594 			appendPQExpBufferStr(temp, "ALTER ");
3595 			_getObjectDescription(temp, te);
3596 			appendPQExpBuffer(temp, " OWNER TO %s;", fmtId(te->owner));
3597 			ahprintf(AH, "%s\n\n", temp->data);
3598 			destroyPQExpBuffer(temp);
3599 		}
3600 		else if (strcmp(te->desc, "CAST") == 0 ||
3601 				 strcmp(te->desc, "CHECK CONSTRAINT") == 0 ||
3602 				 strcmp(te->desc, "CONSTRAINT") == 0 ||
3603 				 strcmp(te->desc, "DATABASE PROPERTIES") == 0 ||
3604 				 strcmp(te->desc, "DEFAULT") == 0 ||
3605 				 strcmp(te->desc, "FK CONSTRAINT") == 0 ||
3606 				 strcmp(te->desc, "INDEX") == 0 ||
3607 				 strcmp(te->desc, "RULE") == 0 ||
3608 				 strcmp(te->desc, "TRIGGER") == 0 ||
3609 				 strcmp(te->desc, "ROW SECURITY") == 0 ||
3610 				 strcmp(te->desc, "POLICY") == 0 ||
3611 				 strcmp(te->desc, "USER MAPPING") == 0)
3612 		{
3613 			/* these object types don't have separate owners */
3614 		}
3615 		else
3616 		{
3617 			pg_log_warning("don't know how to set owner for object type \"%s\"",
3618 						   te->desc);
3619 		}
3620 	}
3621 
3622 	/*
3623 	 * If it's an ACL entry, it might contain SET SESSION AUTHORIZATION
3624 	 * commands, so we can no longer assume we know the current auth setting.
3625 	 */
3626 	if (_tocEntryIsACL(te))
3627 	{
3628 		if (AH->currUser)
3629 			free(AH->currUser);
3630 		AH->currUser = NULL;
3631 	}
3632 }
3633 
3634 /*
3635  * Sanitize a string to be included in an SQL comment or TOC listing, by
3636  * replacing any newlines with spaces.  This ensures each logical output line
3637  * is in fact one physical output line, to prevent corruption of the dump
3638  * (which could, in the worst case, present an SQL injection vulnerability
3639  * if someone were to incautiously load a dump containing objects with
3640  * maliciously crafted names).
3641  *
3642  * The result is a freshly malloc'd string.  If the input string is NULL,
3643  * return a malloc'ed empty string, unless want_hyphen, in which case return a
3644  * malloc'ed hyphen.
3645  *
3646  * Note that we currently don't bother to quote names, meaning that the name
3647  * fields aren't automatically parseable.  "pg_restore -L" doesn't care because
3648  * it only examines the dumpId field, but someday we might want to try harder.
3649  */
3650 static char *
sanitize_line(const char * str,bool want_hyphen)3651 sanitize_line(const char *str, bool want_hyphen)
3652 {
3653 	char	   *result;
3654 	char	   *s;
3655 
3656 	if (!str)
3657 		return pg_strdup(want_hyphen ? "-" : "");
3658 
3659 	result = pg_strdup(str);
3660 
3661 	for (s = result; *s != '\0'; s++)
3662 	{
3663 		if (*s == '\n' || *s == '\r')
3664 			*s = ' ';
3665 	}
3666 
3667 	return result;
3668 }
3669 
3670 /*
3671  * Write the file header for a custom-format archive
3672  */
3673 void
WriteHead(ArchiveHandle * AH)3674 WriteHead(ArchiveHandle *AH)
3675 {
3676 	struct tm	crtm;
3677 
3678 	AH->WriteBufPtr(AH, "PGDMP", 5);	/* Magic code */
3679 	AH->WriteBytePtr(AH, ARCHIVE_MAJOR(AH->version));
3680 	AH->WriteBytePtr(AH, ARCHIVE_MINOR(AH->version));
3681 	AH->WriteBytePtr(AH, ARCHIVE_REV(AH->version));
3682 	AH->WriteBytePtr(AH, AH->intSize);
3683 	AH->WriteBytePtr(AH, AH->offSize);
3684 	AH->WriteBytePtr(AH, AH->format);
3685 	WriteInt(AH, AH->compression);
3686 	crtm = *localtime(&AH->createDate);
3687 	WriteInt(AH, crtm.tm_sec);
3688 	WriteInt(AH, crtm.tm_min);
3689 	WriteInt(AH, crtm.tm_hour);
3690 	WriteInt(AH, crtm.tm_mday);
3691 	WriteInt(AH, crtm.tm_mon);
3692 	WriteInt(AH, crtm.tm_year);
3693 	WriteInt(AH, crtm.tm_isdst);
3694 	WriteStr(AH, PQdb(AH->connection));
3695 	WriteStr(AH, AH->public.remoteVersionStr);
3696 	WriteStr(AH, PG_VERSION);
3697 }
3698 
3699 void
ReadHead(ArchiveHandle * AH)3700 ReadHead(ArchiveHandle *AH)
3701 {
3702 	char		vmaj,
3703 				vmin,
3704 				vrev;
3705 	int			fmt;
3706 
3707 	/*
3708 	 * If we haven't already read the header, do so.
3709 	 *
3710 	 * NB: this code must agree with _discoverArchiveFormat().  Maybe find a
3711 	 * way to unify the cases?
3712 	 */
3713 	if (!AH->readHeader)
3714 	{
3715 		char		tmpMag[7];
3716 
3717 		AH->ReadBufPtr(AH, tmpMag, 5);
3718 
3719 		if (strncmp(tmpMag, "PGDMP", 5) != 0)
3720 			fatal("did not find magic string in file header");
3721 	}
3722 
3723 	vmaj = AH->ReadBytePtr(AH);
3724 	vmin = AH->ReadBytePtr(AH);
3725 
3726 	if (vmaj > 1 || (vmaj == 1 && vmin > 0))	/* Version > 1.0 */
3727 		vrev = AH->ReadBytePtr(AH);
3728 	else
3729 		vrev = 0;
3730 
3731 	AH->version = MAKE_ARCHIVE_VERSION(vmaj, vmin, vrev);
3732 
3733 	if (AH->version < K_VERS_1_0 || AH->version > K_VERS_MAX)
3734 		fatal("unsupported version (%d.%d) in file header",
3735 			  vmaj, vmin);
3736 
3737 	AH->intSize = AH->ReadBytePtr(AH);
3738 	if (AH->intSize > 32)
3739 		fatal("sanity check on integer size (%lu) failed",
3740 			  (unsigned long) AH->intSize);
3741 
3742 	if (AH->intSize > sizeof(int))
3743 		pg_log_warning("archive was made on a machine with larger integers, some operations might fail");
3744 
3745 	if (AH->version >= K_VERS_1_7)
3746 		AH->offSize = AH->ReadBytePtr(AH);
3747 	else
3748 		AH->offSize = AH->intSize;
3749 
3750 	fmt = AH->ReadBytePtr(AH);
3751 
3752 	if (AH->format != fmt)
3753 		fatal("expected format (%d) differs from format found in file (%d)",
3754 			  AH->format, fmt);
3755 
3756 	if (AH->version >= K_VERS_1_2)
3757 	{
3758 		if (AH->version < K_VERS_1_4)
3759 			AH->compression = AH->ReadBytePtr(AH);
3760 		else
3761 			AH->compression = ReadInt(AH);
3762 	}
3763 	else
3764 		AH->compression = Z_DEFAULT_COMPRESSION;
3765 
3766 #ifndef HAVE_LIBZ
3767 	if (AH->compression != 0)
3768 		pg_log_warning("archive is compressed, but this installation does not support compression -- no data will be available");
3769 #endif
3770 
3771 	if (AH->version >= K_VERS_1_4)
3772 	{
3773 		struct tm	crtm;
3774 
3775 		crtm.tm_sec = ReadInt(AH);
3776 		crtm.tm_min = ReadInt(AH);
3777 		crtm.tm_hour = ReadInt(AH);
3778 		crtm.tm_mday = ReadInt(AH);
3779 		crtm.tm_mon = ReadInt(AH);
3780 		crtm.tm_year = ReadInt(AH);
3781 		crtm.tm_isdst = ReadInt(AH);
3782 
3783 		/*
3784 		 * Newer versions of glibc have mktime() report failure if tm_isdst is
3785 		 * inconsistent with the prevailing timezone, e.g. tm_isdst = 1 when
3786 		 * TZ=UTC.  This is problematic when restoring an archive under a
3787 		 * different timezone setting.  If we get a failure, try again with
3788 		 * tm_isdst set to -1 ("don't know").
3789 		 *
3790 		 * XXX with or without this hack, we reconstruct createDate
3791 		 * incorrectly when the prevailing timezone is different from
3792 		 * pg_dump's.  Next time we bump the archive version, we should flush
3793 		 * this representation and store a plain seconds-since-the-Epoch
3794 		 * timestamp instead.
3795 		 */
3796 		AH->createDate = mktime(&crtm);
3797 		if (AH->createDate == (time_t) -1)
3798 		{
3799 			crtm.tm_isdst = -1;
3800 			AH->createDate = mktime(&crtm);
3801 			if (AH->createDate == (time_t) -1)
3802 				pg_log_warning("invalid creation date in header");
3803 		}
3804 	}
3805 
3806 	if (AH->version >= K_VERS_1_4)
3807 	{
3808 		AH->archdbname = ReadStr(AH);
3809 	}
3810 
3811 	if (AH->version >= K_VERS_1_10)
3812 	{
3813 		AH->archiveRemoteVersion = ReadStr(AH);
3814 		AH->archiveDumpVersion = ReadStr(AH);
3815 	}
3816 }
3817 
3818 
3819 /*
3820  * checkSeek
3821  *	  check to see if ftell/fseek can be performed.
3822  */
3823 bool
checkSeek(FILE * fp)3824 checkSeek(FILE *fp)
3825 {
3826 	pgoff_t		tpos;
3827 
3828 	/* Check that ftello works on this file */
3829 	tpos = ftello(fp);
3830 	if (tpos < 0)
3831 		return false;
3832 
3833 	/*
3834 	 * Check that fseeko(SEEK_SET) works, too.  NB: we used to try to test
3835 	 * this with fseeko(fp, 0, SEEK_CUR).  But some platforms treat that as a
3836 	 * successful no-op even on files that are otherwise unseekable.
3837 	 */
3838 	if (fseeko(fp, tpos, SEEK_SET) != 0)
3839 		return false;
3840 
3841 	return true;
3842 }
3843 
3844 
3845 /*
3846  * dumpTimestamp
3847  */
3848 static void
dumpTimestamp(ArchiveHandle * AH,const char * msg,time_t tim)3849 dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim)
3850 {
3851 	char		buf[64];
3852 
3853 	if (strftime(buf, sizeof(buf), PGDUMP_STRFTIME_FMT, localtime(&tim)) != 0)
3854 		ahprintf(AH, "-- %s %s\n\n", msg, buf);
3855 }
3856 
3857 /*
3858  * Main engine for parallel restore.
3859  *
3860  * Parallel restore is done in three phases.  In this first phase,
3861  * we'll process all SECTION_PRE_DATA TOC entries that are allowed to be
3862  * processed in the RESTORE_PASS_MAIN pass.  (In practice, that's all
3863  * PRE_DATA items other than ACLs.)  Entries we can't process now are
3864  * added to the pending_list for later phases to deal with.
3865  */
3866 static void
restore_toc_entries_prefork(ArchiveHandle * AH,TocEntry * pending_list)3867 restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list)
3868 {
3869 	bool		skipped_some;
3870 	TocEntry   *next_work_item;
3871 
3872 	pg_log_debug("entering restore_toc_entries_prefork");
3873 
3874 	/* Adjust dependency information */
3875 	fix_dependencies(AH);
3876 
3877 	/*
3878 	 * Do all the early stuff in a single connection in the parent. There's no
3879 	 * great point in running it in parallel, in fact it will actually run
3880 	 * faster in a single connection because we avoid all the connection and
3881 	 * setup overhead.  Also, pre-9.2 pg_dump versions were not very good
3882 	 * about showing all the dependencies of SECTION_PRE_DATA items, so we do
3883 	 * not risk trying to process them out-of-order.
3884 	 *
3885 	 * Stuff that we can't do immediately gets added to the pending_list.
3886 	 * Note: we don't yet filter out entries that aren't going to be restored.
3887 	 * They might participate in dependency chains connecting entries that
3888 	 * should be restored, so we treat them as live until we actually process
3889 	 * them.
3890 	 *
3891 	 * Note: as of 9.2, it should be guaranteed that all PRE_DATA items appear
3892 	 * before DATA items, and all DATA items before POST_DATA items.  That is
3893 	 * not certain to be true in older archives, though, and in any case use
3894 	 * of a list file would destroy that ordering (cf. SortTocFromFile).  So
3895 	 * this loop cannot assume that it holds.
3896 	 */
3897 	AH->restorePass = RESTORE_PASS_MAIN;
3898 	skipped_some = false;
3899 	for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
3900 	{
3901 		bool		do_now = true;
3902 
3903 		if (next_work_item->section != SECTION_PRE_DATA)
3904 		{
3905 			/* DATA and POST_DATA items are just ignored for now */
3906 			if (next_work_item->section == SECTION_DATA ||
3907 				next_work_item->section == SECTION_POST_DATA)
3908 			{
3909 				do_now = false;
3910 				skipped_some = true;
3911 			}
3912 			else
3913 			{
3914 				/*
3915 				 * SECTION_NONE items, such as comments, can be processed now
3916 				 * if we are still in the PRE_DATA part of the archive.  Once
3917 				 * we've skipped any items, we have to consider whether the
3918 				 * comment's dependencies are satisfied, so skip it for now.
3919 				 */
3920 				if (skipped_some)
3921 					do_now = false;
3922 			}
3923 		}
3924 
3925 		/*
3926 		 * Also skip items that need to be forced into later passes.  We need
3927 		 * not set skipped_some in this case, since by assumption no main-pass
3928 		 * items could depend on these.
3929 		 */
3930 		if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN)
3931 			do_now = false;
3932 
3933 		if (do_now)
3934 		{
3935 			/* OK, restore the item and update its dependencies */
3936 			pg_log_info("processing item %d %s %s",
3937 						next_work_item->dumpId,
3938 						next_work_item->desc, next_work_item->tag);
3939 
3940 			(void) restore_toc_entry(AH, next_work_item, false);
3941 
3942 			/* Reduce dependencies, but don't move anything to ready_list */
3943 			reduce_dependencies(AH, next_work_item, NULL);
3944 		}
3945 		else
3946 		{
3947 			/* Nope, so add it to pending_list */
3948 			pending_list_append(pending_list, next_work_item);
3949 		}
3950 	}
3951 
3952 	/*
3953 	 * Now close parent connection in prep for parallel steps.  We do this
3954 	 * mainly to ensure that we don't exceed the specified number of parallel
3955 	 * connections.
3956 	 */
3957 	DisconnectDatabase(&AH->public);
3958 
3959 	/* blow away any transient state from the old connection */
3960 	if (AH->currUser)
3961 		free(AH->currUser);
3962 	AH->currUser = NULL;
3963 	if (AH->currSchema)
3964 		free(AH->currSchema);
3965 	AH->currSchema = NULL;
3966 	if (AH->currTablespace)
3967 		free(AH->currTablespace);
3968 	AH->currTablespace = NULL;
3969 	if (AH->currTableAm)
3970 		free(AH->currTableAm);
3971 	AH->currTableAm = NULL;
3972 }
3973 
3974 /*
3975  * Main engine for parallel restore.
3976  *
3977  * Parallel restore is done in three phases.  In this second phase,
3978  * we process entries by dispatching them to parallel worker children
3979  * (processes on Unix, threads on Windows), each of which connects
3980  * separately to the database.  Inter-entry dependencies are respected,
3981  * and so is the RestorePass multi-pass structure.  When we can no longer
3982  * make any entries ready to process, we exit.  Normally, there will be
3983  * nothing left to do; but if there is, the third phase will mop up.
3984  */
3985 static void
restore_toc_entries_parallel(ArchiveHandle * AH,ParallelState * pstate,TocEntry * pending_list)3986 restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
3987 							 TocEntry *pending_list)
3988 {
3989 	ParallelReadyList ready_list;
3990 	TocEntry   *next_work_item;
3991 
3992 	pg_log_debug("entering restore_toc_entries_parallel");
3993 
3994 	/* Set up ready_list with enough room for all known TocEntrys */
3995 	ready_list_init(&ready_list, AH->tocCount);
3996 
3997 	/*
3998 	 * The pending_list contains all items that we need to restore.  Move all
3999 	 * items that are available to process immediately into the ready_list.
4000 	 * After this setup, the pending list is everything that needs to be done
4001 	 * but is blocked by one or more dependencies, while the ready list
4002 	 * contains items that have no remaining dependencies and are OK to
4003 	 * process in the current restore pass.
4004 	 */
4005 	AH->restorePass = RESTORE_PASS_MAIN;
4006 	move_to_ready_list(pending_list, &ready_list, AH->restorePass);
4007 
4008 	/*
4009 	 * main parent loop
4010 	 *
4011 	 * Keep going until there is no worker still running AND there is no work
4012 	 * left to be done.  Note invariant: at top of loop, there should always
4013 	 * be at least one worker available to dispatch a job to.
4014 	 */
4015 	pg_log_info("entering main parallel loop");
4016 
4017 	for (;;)
4018 	{
4019 		/* Look for an item ready to be dispatched to a worker */
4020 		next_work_item = pop_next_work_item(&ready_list, pstate);
4021 		if (next_work_item != NULL)
4022 		{
4023 			/* If not to be restored, don't waste time launching a worker */
4024 			if ((next_work_item->reqs & (REQ_SCHEMA | REQ_DATA)) == 0)
4025 			{
4026 				pg_log_info("skipping item %d %s %s",
4027 							next_work_item->dumpId,
4028 							next_work_item->desc, next_work_item->tag);
4029 				/* Update its dependencies as though we'd completed it */
4030 				reduce_dependencies(AH, next_work_item, &ready_list);
4031 				/* Loop around to see if anything else can be dispatched */
4032 				continue;
4033 			}
4034 
4035 			pg_log_info("launching item %d %s %s",
4036 						next_work_item->dumpId,
4037 						next_work_item->desc, next_work_item->tag);
4038 
4039 			/* Dispatch to some worker */
4040 			DispatchJobForTocEntry(AH, pstate, next_work_item, ACT_RESTORE,
4041 								   mark_restore_job_done, &ready_list);
4042 		}
4043 		else if (IsEveryWorkerIdle(pstate))
4044 		{
4045 			/*
4046 			 * Nothing is ready and no worker is running, so we're done with
4047 			 * the current pass or maybe with the whole process.
4048 			 */
4049 			if (AH->restorePass == RESTORE_PASS_LAST)
4050 				break;			/* No more parallel processing is possible */
4051 
4052 			/* Advance to next restore pass */
4053 			AH->restorePass++;
4054 			/* That probably allows some stuff to be made ready */
4055 			move_to_ready_list(pending_list, &ready_list, AH->restorePass);
4056 			/* Loop around to see if anything's now ready */
4057 			continue;
4058 		}
4059 		else
4060 		{
4061 			/*
4062 			 * We have nothing ready, but at least one child is working, so
4063 			 * wait for some subjob to finish.
4064 			 */
4065 		}
4066 
4067 		/*
4068 		 * Before dispatching another job, check to see if anything has
4069 		 * finished.  We should check every time through the loop so as to
4070 		 * reduce dependencies as soon as possible.  If we were unable to
4071 		 * dispatch any job this time through, wait until some worker finishes
4072 		 * (and, hopefully, unblocks some pending item).  If we did dispatch
4073 		 * something, continue as soon as there's at least one idle worker.
4074 		 * Note that in either case, there's guaranteed to be at least one
4075 		 * idle worker when we return to the top of the loop.  This ensures we
4076 		 * won't block inside DispatchJobForTocEntry, which would be
4077 		 * undesirable: we'd rather postpone dispatching until we see what's
4078 		 * been unblocked by finished jobs.
4079 		 */
4080 		WaitForWorkers(AH, pstate,
4081 					   next_work_item ? WFW_ONE_IDLE : WFW_GOT_STATUS);
4082 	}
4083 
4084 	/* There should now be nothing in ready_list. */
4085 	Assert(ready_list.first_te > ready_list.last_te);
4086 
4087 	ready_list_free(&ready_list);
4088 
4089 	pg_log_info("finished main parallel loop");
4090 }
4091 
4092 /*
4093  * Main engine for parallel restore.
4094  *
4095  * Parallel restore is done in three phases.  In this third phase,
4096  * we mop up any remaining TOC entries by processing them serially.
4097  * This phase normally should have nothing to do, but if we've somehow
4098  * gotten stuck due to circular dependencies or some such, this provides
4099  * at least some chance of completing the restore successfully.
4100  */
4101 static void
restore_toc_entries_postfork(ArchiveHandle * AH,TocEntry * pending_list)4102 restore_toc_entries_postfork(ArchiveHandle *AH, TocEntry *pending_list)
4103 {
4104 	RestoreOptions *ropt = AH->public.ropt;
4105 	TocEntry   *te;
4106 
4107 	pg_log_debug("entering restore_toc_entries_postfork");
4108 
4109 	/*
4110 	 * Now reconnect the single parent connection.
4111 	 */
4112 	ConnectDatabase((Archive *) AH, &ropt->cparams, true);
4113 
4114 	/* re-establish fixed state */
4115 	_doSetFixedOutputState(AH);
4116 
4117 	/*
4118 	 * Make sure there is no work left due to, say, circular dependencies, or
4119 	 * some other pathological condition.  If so, do it in the single parent
4120 	 * connection.  We don't sweat about RestorePass ordering; it's likely we
4121 	 * already violated that.
4122 	 */
4123 	for (te = pending_list->pending_next; te != pending_list; te = te->pending_next)
4124 	{
4125 		pg_log_info("processing missed item %d %s %s",
4126 					te->dumpId, te->desc, te->tag);
4127 		(void) restore_toc_entry(AH, te, false);
4128 	}
4129 }
4130 
4131 /*
4132  * Check if te1 has an exclusive lock requirement for an item that te2 also
4133  * requires, whether or not te2's requirement is for an exclusive lock.
4134  */
4135 static bool
has_lock_conflicts(TocEntry * te1,TocEntry * te2)4136 has_lock_conflicts(TocEntry *te1, TocEntry *te2)
4137 {
4138 	int			j,
4139 				k;
4140 
4141 	for (j = 0; j < te1->nLockDeps; j++)
4142 	{
4143 		for (k = 0; k < te2->nDeps; k++)
4144 		{
4145 			if (te1->lockDeps[j] == te2->dependencies[k])
4146 				return true;
4147 		}
4148 	}
4149 	return false;
4150 }
4151 
4152 
4153 /*
4154  * Initialize the header of the pending-items list.
4155  *
4156  * This is a circular list with a dummy TocEntry as header, just like the
4157  * main TOC list; but we use separate list links so that an entry can be in
4158  * the main TOC list as well as in the pending list.
4159  */
4160 static void
pending_list_header_init(TocEntry * l)4161 pending_list_header_init(TocEntry *l)
4162 {
4163 	l->pending_prev = l->pending_next = l;
4164 }
4165 
4166 /* Append te to the end of the pending-list headed by l */
4167 static void
pending_list_append(TocEntry * l,TocEntry * te)4168 pending_list_append(TocEntry *l, TocEntry *te)
4169 {
4170 	te->pending_prev = l->pending_prev;
4171 	l->pending_prev->pending_next = te;
4172 	l->pending_prev = te;
4173 	te->pending_next = l;
4174 }
4175 
4176 /* Remove te from the pending-list */
4177 static void
pending_list_remove(TocEntry * te)4178 pending_list_remove(TocEntry *te)
4179 {
4180 	te->pending_prev->pending_next = te->pending_next;
4181 	te->pending_next->pending_prev = te->pending_prev;
4182 	te->pending_prev = NULL;
4183 	te->pending_next = NULL;
4184 }
4185 
4186 
4187 /*
4188  * Initialize the ready_list with enough room for up to tocCount entries.
4189  */
4190 static void
ready_list_init(ParallelReadyList * ready_list,int tocCount)4191 ready_list_init(ParallelReadyList *ready_list, int tocCount)
4192 {
4193 	ready_list->tes = (TocEntry **)
4194 		pg_malloc(tocCount * sizeof(TocEntry *));
4195 	ready_list->first_te = 0;
4196 	ready_list->last_te = -1;
4197 	ready_list->sorted = false;
4198 }
4199 
4200 /*
4201  * Free storage for a ready_list.
4202  */
4203 static void
ready_list_free(ParallelReadyList * ready_list)4204 ready_list_free(ParallelReadyList *ready_list)
4205 {
4206 	pg_free(ready_list->tes);
4207 }
4208 
4209 /* Add te to the ready_list */
4210 static void
ready_list_insert(ParallelReadyList * ready_list,TocEntry * te)4211 ready_list_insert(ParallelReadyList *ready_list, TocEntry *te)
4212 {
4213 	ready_list->tes[++ready_list->last_te] = te;
4214 	/* List is (probably) not sorted anymore. */
4215 	ready_list->sorted = false;
4216 }
4217 
4218 /* Remove the i'th entry in the ready_list */
4219 static void
ready_list_remove(ParallelReadyList * ready_list,int i)4220 ready_list_remove(ParallelReadyList *ready_list, int i)
4221 {
4222 	int			f = ready_list->first_te;
4223 
4224 	Assert(i >= f && i <= ready_list->last_te);
4225 
4226 	/*
4227 	 * In the typical case where the item to be removed is the first ready
4228 	 * entry, we need only increment first_te to remove it.  Otherwise, move
4229 	 * the entries before it to compact the list.  (This preserves sortedness,
4230 	 * if any.)  We could alternatively move the entries after i, but there
4231 	 * are typically many more of those.
4232 	 */
4233 	if (i > f)
4234 	{
4235 		TocEntry  **first_te_ptr = &ready_list->tes[f];
4236 
4237 		memmove(first_te_ptr + 1, first_te_ptr, (i - f) * sizeof(TocEntry *));
4238 	}
4239 	ready_list->first_te++;
4240 }
4241 
4242 /* Sort the ready_list into the desired order */
4243 static void
ready_list_sort(ParallelReadyList * ready_list)4244 ready_list_sort(ParallelReadyList *ready_list)
4245 {
4246 	if (!ready_list->sorted)
4247 	{
4248 		int			n = ready_list->last_te - ready_list->first_te + 1;
4249 
4250 		if (n > 1)
4251 			qsort(ready_list->tes + ready_list->first_te, n,
4252 				  sizeof(TocEntry *),
4253 				  TocEntrySizeCompare);
4254 		ready_list->sorted = true;
4255 	}
4256 }
4257 
4258 /* qsort comparator for sorting TocEntries by dataLength */
4259 static int
TocEntrySizeCompare(const void * p1,const void * p2)4260 TocEntrySizeCompare(const void *p1, const void *p2)
4261 {
4262 	const TocEntry *te1 = *(const TocEntry *const *) p1;
4263 	const TocEntry *te2 = *(const TocEntry *const *) p2;
4264 
4265 	/* Sort by decreasing dataLength */
4266 	if (te1->dataLength > te2->dataLength)
4267 		return -1;
4268 	if (te1->dataLength < te2->dataLength)
4269 		return 1;
4270 
4271 	/* For equal dataLengths, sort by dumpId, just to be stable */
4272 	if (te1->dumpId < te2->dumpId)
4273 		return -1;
4274 	if (te1->dumpId > te2->dumpId)
4275 		return 1;
4276 
4277 	return 0;
4278 }
4279 
4280 
4281 /*
4282  * Move all immediately-ready items from pending_list to ready_list.
4283  *
4284  * Items are considered ready if they have no remaining dependencies and
4285  * they belong in the current restore pass.  (See also reduce_dependencies,
4286  * which applies the same logic one-at-a-time.)
4287  */
4288 static void
move_to_ready_list(TocEntry * pending_list,ParallelReadyList * ready_list,RestorePass pass)4289 move_to_ready_list(TocEntry *pending_list,
4290 				   ParallelReadyList *ready_list,
4291 				   RestorePass pass)
4292 {
4293 	TocEntry   *te;
4294 	TocEntry   *next_te;
4295 
4296 	for (te = pending_list->pending_next; te != pending_list; te = next_te)
4297 	{
4298 		/* must save list link before possibly removing te from list */
4299 		next_te = te->pending_next;
4300 
4301 		if (te->depCount == 0 &&
4302 			_tocEntryRestorePass(te) == pass)
4303 		{
4304 			/* Remove it from pending_list ... */
4305 			pending_list_remove(te);
4306 			/* ... and add to ready_list */
4307 			ready_list_insert(ready_list, te);
4308 		}
4309 	}
4310 }
4311 
4312 /*
4313  * Find the next work item (if any) that is capable of being run now,
4314  * and remove it from the ready_list.
4315  *
4316  * Returns the item, or NULL if nothing is runnable.
4317  *
4318  * To qualify, the item must have no remaining dependencies
4319  * and no requirements for locks that are incompatible with
4320  * items currently running.  Items in the ready_list are known to have
4321  * no remaining dependencies, but we have to check for lock conflicts.
4322  */
4323 static TocEntry *
pop_next_work_item(ParallelReadyList * ready_list,ParallelState * pstate)4324 pop_next_work_item(ParallelReadyList *ready_list,
4325 				   ParallelState *pstate)
4326 {
4327 	/*
4328 	 * Sort the ready_list so that we'll tackle larger jobs first.
4329 	 */
4330 	ready_list_sort(ready_list);
4331 
4332 	/*
4333 	 * Search the ready_list until we find a suitable item.
4334 	 */
4335 	for (int i = ready_list->first_te; i <= ready_list->last_te; i++)
4336 	{
4337 		TocEntry   *te = ready_list->tes[i];
4338 		bool		conflicts = false;
4339 
4340 		/*
4341 		 * Check to see if the item would need exclusive lock on something
4342 		 * that a currently running item also needs lock on, or vice versa. If
4343 		 * so, we don't want to schedule them together.
4344 		 */
4345 		for (int k = 0; k < pstate->numWorkers; k++)
4346 		{
4347 			TocEntry   *running_te = pstate->te[k];
4348 
4349 			if (running_te == NULL)
4350 				continue;
4351 			if (has_lock_conflicts(te, running_te) ||
4352 				has_lock_conflicts(running_te, te))
4353 			{
4354 				conflicts = true;
4355 				break;
4356 			}
4357 		}
4358 
4359 		if (conflicts)
4360 			continue;
4361 
4362 		/* passed all tests, so this item can run */
4363 		ready_list_remove(ready_list, i);
4364 		return te;
4365 	}
4366 
4367 	pg_log_debug("no item ready");
4368 	return NULL;
4369 }
4370 
4371 
4372 /*
4373  * Restore a single TOC item in parallel with others
4374  *
4375  * this is run in the worker, i.e. in a thread (Windows) or a separate process
4376  * (everything else). A worker process executes several such work items during
4377  * a parallel backup or restore. Once we terminate here and report back that
4378  * our work is finished, the leader process will assign us a new work item.
4379  */
4380 int
parallel_restore(ArchiveHandle * AH,TocEntry * te)4381 parallel_restore(ArchiveHandle *AH, TocEntry *te)
4382 {
4383 	int			status;
4384 
4385 	Assert(AH->connection != NULL);
4386 
4387 	/* Count only errors associated with this TOC entry */
4388 	AH->public.n_errors = 0;
4389 
4390 	/* Restore the TOC item */
4391 	status = restore_toc_entry(AH, te, true);
4392 
4393 	return status;
4394 }
4395 
4396 
4397 /*
4398  * Callback function that's invoked in the leader process after a step has
4399  * been parallel restored.
4400  *
4401  * Update status and reduce the dependency count of any dependent items.
4402  */
4403 static void
mark_restore_job_done(ArchiveHandle * AH,TocEntry * te,int status,void * callback_data)4404 mark_restore_job_done(ArchiveHandle *AH,
4405 					  TocEntry *te,
4406 					  int status,
4407 					  void *callback_data)
4408 {
4409 	ParallelReadyList *ready_list = (ParallelReadyList *) callback_data;
4410 
4411 	pg_log_info("finished item %d %s %s",
4412 				te->dumpId, te->desc, te->tag);
4413 
4414 	if (status == WORKER_CREATE_DONE)
4415 		mark_create_done(AH, te);
4416 	else if (status == WORKER_INHIBIT_DATA)
4417 	{
4418 		inhibit_data_for_failed_table(AH, te);
4419 		AH->public.n_errors++;
4420 	}
4421 	else if (status == WORKER_IGNORED_ERRORS)
4422 		AH->public.n_errors++;
4423 	else if (status != 0)
4424 		fatal("worker process failed: exit code %d",
4425 			  status);
4426 
4427 	reduce_dependencies(AH, te, ready_list);
4428 }
4429 
4430 
4431 /*
4432  * Process the dependency information into a form useful for parallel restore.
4433  *
4434  * This function takes care of fixing up some missing or badly designed
4435  * dependencies, and then prepares subsidiary data structures that will be
4436  * used in the main parallel-restore logic, including:
4437  * 1. We build the revDeps[] arrays of incoming dependency dumpIds.
4438  * 2. We set up depCount fields that are the number of as-yet-unprocessed
4439  * dependencies for each TOC entry.
4440  *
4441  * We also identify locking dependencies so that we can avoid trying to
4442  * schedule conflicting items at the same time.
4443  */
4444 static void
fix_dependencies(ArchiveHandle * AH)4445 fix_dependencies(ArchiveHandle *AH)
4446 {
4447 	TocEntry   *te;
4448 	int			i;
4449 
4450 	/*
4451 	 * Initialize the depCount/revDeps/nRevDeps fields, and make sure the TOC
4452 	 * items are marked as not being in any parallel-processing list.
4453 	 */
4454 	for (te = AH->toc->next; te != AH->toc; te = te->next)
4455 	{
4456 		te->depCount = te->nDeps;
4457 		te->revDeps = NULL;
4458 		te->nRevDeps = 0;
4459 		te->pending_prev = NULL;
4460 		te->pending_next = NULL;
4461 	}
4462 
4463 	/*
4464 	 * POST_DATA items that are shown as depending on a table need to be
4465 	 * re-pointed to depend on that table's data, instead.  This ensures they
4466 	 * won't get scheduled until the data has been loaded.
4467 	 */
4468 	repoint_table_dependencies(AH);
4469 
4470 	/*
4471 	 * Pre-8.4 versions of pg_dump neglected to set up a dependency from BLOB
4472 	 * COMMENTS to BLOBS.  Cope.  (We assume there's only one BLOBS and only
4473 	 * one BLOB COMMENTS in such files.)
4474 	 */
4475 	if (AH->version < K_VERS_1_11)
4476 	{
4477 		for (te = AH->toc->next; te != AH->toc; te = te->next)
4478 		{
4479 			if (strcmp(te->desc, "BLOB COMMENTS") == 0 && te->nDeps == 0)
4480 			{
4481 				TocEntry   *te2;
4482 
4483 				for (te2 = AH->toc->next; te2 != AH->toc; te2 = te2->next)
4484 				{
4485 					if (strcmp(te2->desc, "BLOBS") == 0)
4486 					{
4487 						te->dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
4488 						te->dependencies[0] = te2->dumpId;
4489 						te->nDeps++;
4490 						te->depCount++;
4491 						break;
4492 					}
4493 				}
4494 				break;
4495 			}
4496 		}
4497 	}
4498 
4499 	/*
4500 	 * At this point we start to build the revDeps reverse-dependency arrays,
4501 	 * so all changes of dependencies must be complete.
4502 	 */
4503 
4504 	/*
4505 	 * Count the incoming dependencies for each item.  Also, it is possible
4506 	 * that the dependencies list items that are not in the archive at all
4507 	 * (that should not happen in 9.2 and later, but is highly likely in older
4508 	 * archives).  Subtract such items from the depCounts.
4509 	 */
4510 	for (te = AH->toc->next; te != AH->toc; te = te->next)
4511 	{
4512 		for (i = 0; i < te->nDeps; i++)
4513 		{
4514 			DumpId		depid = te->dependencies[i];
4515 
4516 			if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
4517 				AH->tocsByDumpId[depid]->nRevDeps++;
4518 			else
4519 				te->depCount--;
4520 		}
4521 	}
4522 
4523 	/*
4524 	 * Allocate space for revDeps[] arrays, and reset nRevDeps so we can use
4525 	 * it as a counter below.
4526 	 */
4527 	for (te = AH->toc->next; te != AH->toc; te = te->next)
4528 	{
4529 		if (te->nRevDeps > 0)
4530 			te->revDeps = (DumpId *) pg_malloc(te->nRevDeps * sizeof(DumpId));
4531 		te->nRevDeps = 0;
4532 	}
4533 
4534 	/*
4535 	 * Build the revDeps[] arrays of incoming-dependency dumpIds.  This had
4536 	 * better agree with the loops above.
4537 	 */
4538 	for (te = AH->toc->next; te != AH->toc; te = te->next)
4539 	{
4540 		for (i = 0; i < te->nDeps; i++)
4541 		{
4542 			DumpId		depid = te->dependencies[i];
4543 
4544 			if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
4545 			{
4546 				TocEntry   *otherte = AH->tocsByDumpId[depid];
4547 
4548 				otherte->revDeps[otherte->nRevDeps++] = te->dumpId;
4549 			}
4550 		}
4551 	}
4552 
4553 	/*
4554 	 * Lastly, work out the locking dependencies.
4555 	 */
4556 	for (te = AH->toc->next; te != AH->toc; te = te->next)
4557 	{
4558 		te->lockDeps = NULL;
4559 		te->nLockDeps = 0;
4560 		identify_locking_dependencies(AH, te);
4561 	}
4562 }
4563 
4564 /*
4565  * Change dependencies on table items to depend on table data items instead,
4566  * but only in POST_DATA items.
4567  *
4568  * Also, for any item having such dependency(s), set its dataLength to the
4569  * largest dataLength of the table data items it depends on.  This ensures
4570  * that parallel restore will prioritize larger jobs (index builds, FK
4571  * constraint checks, etc) over smaller ones, avoiding situations where we
4572  * end a restore with only one active job working on a large table.
4573  */
4574 static void
repoint_table_dependencies(ArchiveHandle * AH)4575 repoint_table_dependencies(ArchiveHandle *AH)
4576 {
4577 	TocEntry   *te;
4578 	int			i;
4579 	DumpId		olddep;
4580 
4581 	for (te = AH->toc->next; te != AH->toc; te = te->next)
4582 	{
4583 		if (te->section != SECTION_POST_DATA)
4584 			continue;
4585 		for (i = 0; i < te->nDeps; i++)
4586 		{
4587 			olddep = te->dependencies[i];
4588 			if (olddep <= AH->maxDumpId &&
4589 				AH->tableDataId[olddep] != 0)
4590 			{
4591 				DumpId		tabledataid = AH->tableDataId[olddep];
4592 				TocEntry   *tabledatate = AH->tocsByDumpId[tabledataid];
4593 
4594 				te->dependencies[i] = tabledataid;
4595 				te->dataLength = Max(te->dataLength, tabledatate->dataLength);
4596 				pg_log_debug("transferring dependency %d -> %d to %d",
4597 							 te->dumpId, olddep, tabledataid);
4598 			}
4599 		}
4600 	}
4601 }
4602 
4603 /*
4604  * Identify which objects we'll need exclusive lock on in order to restore
4605  * the given TOC entry (*other* than the one identified by the TOC entry
4606  * itself).  Record their dump IDs in the entry's lockDeps[] array.
4607  */
4608 static void
identify_locking_dependencies(ArchiveHandle * AH,TocEntry * te)4609 identify_locking_dependencies(ArchiveHandle *AH, TocEntry *te)
4610 {
4611 	DumpId	   *lockids;
4612 	int			nlockids;
4613 	int			i;
4614 
4615 	/*
4616 	 * We only care about this for POST_DATA items.  PRE_DATA items are not
4617 	 * run in parallel, and DATA items are all independent by assumption.
4618 	 */
4619 	if (te->section != SECTION_POST_DATA)
4620 		return;
4621 
4622 	/* Quick exit if no dependencies at all */
4623 	if (te->nDeps == 0)
4624 		return;
4625 
4626 	/*
4627 	 * Most POST_DATA items are ALTER TABLEs or some moral equivalent of that,
4628 	 * and hence require exclusive lock.  However, we know that CREATE INDEX
4629 	 * does not.  (Maybe someday index-creating CONSTRAINTs will fall in that
4630 	 * category too ... but today is not that day.)
4631 	 */
4632 	if (strcmp(te->desc, "INDEX") == 0)
4633 		return;
4634 
4635 	/*
4636 	 * We assume the entry requires exclusive lock on each TABLE or TABLE DATA
4637 	 * item listed among its dependencies.  Originally all of these would have
4638 	 * been TABLE items, but repoint_table_dependencies would have repointed
4639 	 * them to the TABLE DATA items if those are present (which they might not
4640 	 * be, eg in a schema-only dump).  Note that all of the entries we are
4641 	 * processing here are POST_DATA; otherwise there might be a significant
4642 	 * difference between a dependency on a table and a dependency on its
4643 	 * data, so that closer analysis would be needed here.
4644 	 */
4645 	lockids = (DumpId *) pg_malloc(te->nDeps * sizeof(DumpId));
4646 	nlockids = 0;
4647 	for (i = 0; i < te->nDeps; i++)
4648 	{
4649 		DumpId		depid = te->dependencies[i];
4650 
4651 		if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL &&
4652 			((strcmp(AH->tocsByDumpId[depid]->desc, "TABLE DATA") == 0) ||
4653 			 strcmp(AH->tocsByDumpId[depid]->desc, "TABLE") == 0))
4654 			lockids[nlockids++] = depid;
4655 	}
4656 
4657 	if (nlockids == 0)
4658 	{
4659 		free(lockids);
4660 		return;
4661 	}
4662 
4663 	te->lockDeps = pg_realloc(lockids, nlockids * sizeof(DumpId));
4664 	te->nLockDeps = nlockids;
4665 }
4666 
4667 /*
4668  * Remove the specified TOC entry from the depCounts of items that depend on
4669  * it, thereby possibly making them ready-to-run.  Any pending item that
4670  * becomes ready should be moved to the ready_list, if that's provided.
4671  */
4672 static void
reduce_dependencies(ArchiveHandle * AH,TocEntry * te,ParallelReadyList * ready_list)4673 reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
4674 					ParallelReadyList *ready_list)
4675 {
4676 	int			i;
4677 
4678 	pg_log_debug("reducing dependencies for %d", te->dumpId);
4679 
4680 	for (i = 0; i < te->nRevDeps; i++)
4681 	{
4682 		TocEntry   *otherte = AH->tocsByDumpId[te->revDeps[i]];
4683 
4684 		Assert(otherte->depCount > 0);
4685 		otherte->depCount--;
4686 
4687 		/*
4688 		 * It's ready if it has no remaining dependencies, and it belongs in
4689 		 * the current restore pass, and it is currently a member of the
4690 		 * pending list (that check is needed to prevent double restore in
4691 		 * some cases where a list-file forces out-of-order restoring).
4692 		 * However, if ready_list == NULL then caller doesn't want any list
4693 		 * memberships changed.
4694 		 */
4695 		if (otherte->depCount == 0 &&
4696 			_tocEntryRestorePass(otherte) == AH->restorePass &&
4697 			otherte->pending_prev != NULL &&
4698 			ready_list != NULL)
4699 		{
4700 			/* Remove it from pending list ... */
4701 			pending_list_remove(otherte);
4702 			/* ... and add to ready_list */
4703 			ready_list_insert(ready_list, otherte);
4704 		}
4705 	}
4706 }
4707 
4708 /*
4709  * Set the created flag on the DATA member corresponding to the given
4710  * TABLE member
4711  */
4712 static void
mark_create_done(ArchiveHandle * AH,TocEntry * te)4713 mark_create_done(ArchiveHandle *AH, TocEntry *te)
4714 {
4715 	if (AH->tableDataId[te->dumpId] != 0)
4716 	{
4717 		TocEntry   *ted = AH->tocsByDumpId[AH->tableDataId[te->dumpId]];
4718 
4719 		ted->created = true;
4720 	}
4721 }
4722 
4723 /*
4724  * Mark the DATA member corresponding to the given TABLE member
4725  * as not wanted
4726  */
4727 static void
inhibit_data_for_failed_table(ArchiveHandle * AH,TocEntry * te)4728 inhibit_data_for_failed_table(ArchiveHandle *AH, TocEntry *te)
4729 {
4730 	pg_log_info("table \"%s\" could not be created, will not restore its data",
4731 				te->tag);
4732 
4733 	if (AH->tableDataId[te->dumpId] != 0)
4734 	{
4735 		TocEntry   *ted = AH->tocsByDumpId[AH->tableDataId[te->dumpId]];
4736 
4737 		ted->reqs = 0;
4738 	}
4739 }
4740 
4741 /*
4742  * Clone and de-clone routines used in parallel restoration.
4743  *
4744  * Enough of the structure is cloned to ensure that there is no
4745  * conflict between different threads each with their own clone.
4746  */
4747 ArchiveHandle *
CloneArchive(ArchiveHandle * AH)4748 CloneArchive(ArchiveHandle *AH)
4749 {
4750 	ArchiveHandle *clone;
4751 
4752 	/* Make a "flat" copy */
4753 	clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
4754 	memcpy(clone, AH, sizeof(ArchiveHandle));
4755 
4756 	/* Handle format-independent fields */
4757 	memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));
4758 
4759 	/* The clone will have its own connection, so disregard connection state */
4760 	clone->connection = NULL;
4761 	clone->connCancel = NULL;
4762 	clone->currUser = NULL;
4763 	clone->currSchema = NULL;
4764 	clone->currTablespace = NULL;
4765 
4766 	/* savedPassword must be local in case we change it while connecting */
4767 	if (clone->savedPassword)
4768 		clone->savedPassword = pg_strdup(clone->savedPassword);
4769 
4770 	/* clone has its own error count, too */
4771 	clone->public.n_errors = 0;
4772 
4773 	/*
4774 	 * Connect our new clone object to the database, using the same connection
4775 	 * parameters used for the original connection.
4776 	 */
4777 	ConnectDatabase((Archive *) clone, &clone->public.ropt->cparams, true);
4778 
4779 	/* re-establish fixed state */
4780 	if (AH->mode == archModeRead)
4781 		_doSetFixedOutputState(clone);
4782 	/* in write case, setupDumpWorker will fix up connection state */
4783 
4784 	/* Let the format-specific code have a chance too */
4785 	clone->ClonePtr(clone);
4786 
4787 	Assert(clone->connection != NULL);
4788 	return clone;
4789 }
4790 
4791 /*
4792  * Release clone-local storage.
4793  *
4794  * Note: we assume any clone-local connection was already closed.
4795  */
4796 void
DeCloneArchive(ArchiveHandle * AH)4797 DeCloneArchive(ArchiveHandle *AH)
4798 {
4799 	/* Should not have an open database connection */
4800 	Assert(AH->connection == NULL);
4801 
4802 	/* Clear format-specific state */
4803 	AH->DeClonePtr(AH);
4804 
4805 	/* Clear state allocated by CloneArchive */
4806 	if (AH->sqlparse.curCmd)
4807 		destroyPQExpBuffer(AH->sqlparse.curCmd);
4808 
4809 	/* Clear any connection-local state */
4810 	if (AH->currUser)
4811 		free(AH->currUser);
4812 	if (AH->currSchema)
4813 		free(AH->currSchema);
4814 	if (AH->currTablespace)
4815 		free(AH->currTablespace);
4816 	if (AH->currTableAm)
4817 		free(AH->currTableAm);
4818 	if (AH->savedPassword)
4819 		free(AH->savedPassword);
4820 
4821 	free(AH);
4822 }
4823