1 /*-------------------------------------------------------------------------
2  *
3  * pg_backup_archiver.h
4  *
5  *	Private interface to the pg_dump archiver routines.
6  *	It is NOT intended that these routines be called by any
7  *	dumper directly.
8  *
9  *	See the headers to pg_restore for more details.
10  *
11  * Copyright (c) 2000, Philip Warner
12  *		Rights are granted to use this software in any way so long
13  *		as this notice is not removed.
14  *
15  *	The author is not responsible for loss or damages that may
16  *	result from its use.
17  *
18  *
19  * IDENTIFICATION
20  *		src/bin/pg_dump/pg_backup_archiver.h
21  *
22  *-------------------------------------------------------------------------
23  */
24 #ifndef __PG_BACKUP_ARCHIVE__
25 #define __PG_BACKUP_ARCHIVE__
26 
27 
28 #include <time.h>
29 
30 #include "pg_backup.h"
31 
32 #include "libpq-fe.h"
33 #include "pqexpbuffer.h"
34 
35 #define LOBBUFSIZE 16384
36 
37 /*
38  * Note: zlib.h must be included *after* libpq-fe.h, because the latter may
39  * include ssl.h, which has a naming conflict with zlib.h.
40  */
41 #ifdef HAVE_LIBZ
42 #include <zlib.h>
43 #define GZCLOSE(fh) gzclose(fh)
44 #define GZWRITE(p, s, n, fh) gzwrite(fh, p, (n) * (s))
45 #define GZREAD(p, s, n, fh) gzread(fh, p, (n) * (s))
46 #define GZEOF(fh)	gzeof(fh)
47 #else
48 #define GZCLOSE(fh) fclose(fh)
49 #define GZWRITE(p, s, n, fh) (fwrite(p, s, n, fh) * (s))
50 #define GZREAD(p, s, n, fh) fread(p, s, n, fh)
51 #define GZEOF(fh)	feof(fh)
52 /* this is just the redefinition of a libz constant */
53 #define Z_DEFAULT_COMPRESSION (-1)
54 
55 typedef struct _z_stream
56 {
57 	void	   *next_in;
58 	void	   *next_out;
59 	size_t		avail_in;
60 	size_t		avail_out;
61 } z_stream;
62 typedef z_stream *z_streamp;
63 #endif
64 
65 /* Data block types */
66 #define BLK_DATA 1
67 #define BLK_BLOBS 3
68 
69 /* Encode version components into a convenient integer <maj><min><rev> */
70 #define MAKE_ARCHIVE_VERSION(major, minor, rev) (((major) * 256 + (minor)) * 256 + (rev))
71 
72 #define ARCHIVE_MAJOR(version) (((version) >> 16) & 255)
73 #define ARCHIVE_MINOR(version) (((version) >>  8) & 255)
74 #define ARCHIVE_REV(version)   (((version)		) & 255)
75 
76 /* Historical version numbers (checked in code) */
77 #define K_VERS_1_0	MAKE_ARCHIVE_VERSION(1, 0, 0)
78 #define K_VERS_1_2	MAKE_ARCHIVE_VERSION(1, 2, 0)	/* Allow No ZLIB */
79 #define K_VERS_1_3	MAKE_ARCHIVE_VERSION(1, 3, 0)	/* BLOBs */
80 #define K_VERS_1_4	MAKE_ARCHIVE_VERSION(1, 4, 0)	/* Date & name in header */
81 #define K_VERS_1_5	MAKE_ARCHIVE_VERSION(1, 5, 0)	/* Handle dependencies */
82 #define K_VERS_1_6	MAKE_ARCHIVE_VERSION(1, 6, 0)	/* Schema field in TOCs */
83 #define K_VERS_1_7	MAKE_ARCHIVE_VERSION(1, 7, 0)	/* File Offset size in
84 													 * header */
85 #define K_VERS_1_8	MAKE_ARCHIVE_VERSION(1, 8, 0)	/* change interpretation
86 													 * of ID numbers and
87 													 * dependencies */
88 #define K_VERS_1_9	MAKE_ARCHIVE_VERSION(1, 9, 0)	/* add default_with_oids
89 													 * tracking */
90 #define K_VERS_1_10 MAKE_ARCHIVE_VERSION(1, 10, 0)	/* add tablespace */
91 #define K_VERS_1_11 MAKE_ARCHIVE_VERSION(1, 11, 0)	/* add toc section
92 													 * indicator */
93 #define K_VERS_1_12 MAKE_ARCHIVE_VERSION(1, 12, 0)	/* add separate BLOB
94 													 * entries */
95 #define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0)	/* change search_path
96 													 * behavior */
97 #define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0)	/* add tableam */
98 
99 /* Current archive version number (the format we can output) */
100 #define K_VERS_MAJOR 1
101 #define K_VERS_MINOR 14
102 #define K_VERS_REV 0
103 #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV)
104 
105 /* Newest format we can read */
106 #define K_VERS_MAX	MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, 255)
107 
108 
109 /* Flags to indicate disposition of offsets stored in files */
110 #define K_OFFSET_POS_NOT_SET 1
111 #define K_OFFSET_POS_SET 2
112 #define K_OFFSET_NO_DATA 3
113 
114 /*
115  * Special exit values from worker children.  We reserve 0 for normal
116  * success; 1 and other small values should be interpreted as crashes.
117  */
118 #define WORKER_OK					  0
119 #define WORKER_CREATE_DONE			  10
120 #define WORKER_INHIBIT_DATA			  11
121 #define WORKER_IGNORED_ERRORS		  12
122 
123 typedef struct _archiveHandle ArchiveHandle;
124 typedef struct _tocEntry TocEntry;
125 struct ParallelState;
126 
127 #define READ_ERROR_EXIT(fd) \
128 	do { \
129 		if (feof(fd)) \
130 			fatal("could not read from input file: end of file"); \
131 		else \
132 			fatal("could not read from input file: %m"); \
133 	} while (0)
134 
135 #define WRITE_ERROR_EXIT \
136 	do { \
137 		fatal("could not write to output file: %m"); \
138 	} while (0)
139 
140 typedef enum T_Action
141 {
142 	ACT_DUMP,
143 	ACT_RESTORE
144 } T_Action;
145 
146 typedef void (*ClosePtrType) (ArchiveHandle *AH);
147 typedef void (*ReopenPtrType) (ArchiveHandle *AH);
148 typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te);
149 
150 typedef void (*StartDataPtrType) (ArchiveHandle *AH, TocEntry *te);
151 typedef void (*WriteDataPtrType) (ArchiveHandle *AH, const void *data, size_t dLen);
152 typedef void (*EndDataPtrType) (ArchiveHandle *AH, TocEntry *te);
153 
154 typedef void (*StartBlobsPtrType) (ArchiveHandle *AH, TocEntry *te);
155 typedef void (*StartBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
156 typedef void (*EndBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
157 typedef void (*EndBlobsPtrType) (ArchiveHandle *AH, TocEntry *te);
158 
159 typedef int (*WriteBytePtrType) (ArchiveHandle *AH, const int i);
160 typedef int (*ReadBytePtrType) (ArchiveHandle *AH);
161 typedef void (*WriteBufPtrType) (ArchiveHandle *AH, const void *c, size_t len);
162 typedef void (*ReadBufPtrType) (ArchiveHandle *AH, void *buf, size_t len);
163 typedef void (*WriteExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
164 typedef void (*ReadExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
165 typedef void (*PrintExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
166 typedef void (*PrintTocDataPtrType) (ArchiveHandle *AH, TocEntry *te);
167 
168 typedef void (*PrepParallelRestorePtrType) (ArchiveHandle *AH);
169 typedef void (*ClonePtrType) (ArchiveHandle *AH);
170 typedef void (*DeClonePtrType) (ArchiveHandle *AH);
171 
172 typedef int (*WorkerJobDumpPtrType) (ArchiveHandle *AH, TocEntry *te);
173 typedef int (*WorkerJobRestorePtrType) (ArchiveHandle *AH, TocEntry *te);
174 
175 typedef size_t (*CustomOutPtrType) (ArchiveHandle *AH, const void *buf, size_t len);
176 
177 typedef enum
178 {
179 	SQL_SCAN = 0,				/* normal */
180 	SQL_IN_SINGLE_QUOTE,		/* '...' literal */
181 	SQL_IN_DOUBLE_QUOTE			/* "..." identifier */
182 } sqlparseState;
183 
184 typedef struct
185 {
186 	sqlparseState state;		/* see above */
187 	bool		backSlash;		/* next char is backslash quoted? */
188 	PQExpBuffer curCmd;			/* incomplete line (NULL if not created) */
189 } sqlparseInfo;
190 
191 typedef enum
192 {
193 	STAGE_NONE = 0,
194 	STAGE_INITIALIZING,
195 	STAGE_PROCESSING,
196 	STAGE_FINALIZING
197 } ArchiverStage;
198 
199 typedef enum
200 {
201 	OUTPUT_SQLCMDS = 0,			/* emitting general SQL commands */
202 	OUTPUT_COPYDATA,			/* writing COPY data */
203 	OUTPUT_OTHERDATA			/* writing data as INSERT commands */
204 } ArchiverOutput;
205 
206 /*
207  * For historical reasons, ACL items are interspersed with everything else in
208  * a dump file's TOC; typically they're right after the object they're for.
209  * However, we need to restore data before ACLs, as otherwise a read-only
210  * table (ie one where the owner has revoked her own INSERT privilege) causes
211  * data restore failures.  On the other hand, matview REFRESH commands should
212  * come out after ACLs, as otherwise non-superuser-owned matviews might not
213  * be able to execute.  (If the permissions at the time of dumping would not
214  * allow a REFRESH, too bad; we won't fix that for you.)  We also want event
215  * triggers to be restored after ACLs, so that they can't mess those up.
216  *
217  * These considerations force us to make three passes over the TOC,
218  * restoring the appropriate subset of items in each pass.  We assume that
219  * the dependency sort resulted in an appropriate ordering of items within
220  * each subset.
221  *
222  * XXX This mechanism should be superseded by tracking dependencies on ACLs
223  * properly; but we'll still need it for old dump files even after that.
224  */
225 typedef enum
226 {
227 	RESTORE_PASS_MAIN = 0,		/* Main pass (most TOC item types) */
228 	RESTORE_PASS_ACL,			/* ACL item types */
229 	RESTORE_PASS_POST_ACL		/* Event trigger and matview refresh items */
230 
231 #define RESTORE_PASS_LAST RESTORE_PASS_POST_ACL
232 } RestorePass;
233 
234 typedef enum
235 {
236 	REQ_SCHEMA = 0x01,			/* want schema */
237 	REQ_DATA = 0x02,			/* want data */
238 	REQ_SPECIAL = 0x04			/* for special TOC entries */
239 } teReqs;
240 
241 struct _archiveHandle
242 {
243 	Archive		public;			/* Public part of archive */
244 	int			version;		/* Version of file */
245 
246 	char	   *archiveRemoteVersion;	/* When reading an archive, the
247 										 * version of the dumped DB */
248 	char	   *archiveDumpVersion; /* When reading an archive, the version of
249 									 * the dumper */
250 
251 	size_t		intSize;		/* Size of an integer in the archive */
252 	size_t		offSize;		/* Size of a file offset in the archive -
253 								 * Added V1.7 */
254 	ArchiveFormat format;		/* Archive format */
255 
256 	sqlparseInfo sqlparse;		/* state for parsing INSERT data */
257 
258 	time_t		createDate;		/* Date archive created */
259 
260 	/*
261 	 * Fields used when discovering archive format.  For tar format, we load
262 	 * the first block into the lookahead buffer, and verify that it looks
263 	 * like a tar header.  The tar module must then consume bytes from the
264 	 * lookahead buffer before reading any more from the file.  For custom
265 	 * format, we load only the "PGDMP" marker into the buffer, and then set
266 	 * readHeader after confirming it matches.  The buffer is vestigial in
267 	 * this case, as the subsequent code just checks readHeader and doesn't
268 	 * examine the buffer.
269 	 */
270 	int			readHeader;		/* Set if we already read "PGDMP" marker */
271 	char	   *lookahead;		/* Buffer used when reading header to discover
272 								 * format */
273 	size_t		lookaheadSize;	/* Allocated size of buffer */
274 	size_t		lookaheadLen;	/* Length of valid data in lookahead */
275 	size_t		lookaheadPos;	/* Current read position in lookahead buffer */
276 
277 	ArchiveEntryPtrType ArchiveEntryPtr;	/* Called for each metadata object */
278 	StartDataPtrType StartDataPtr;	/* Called when table data is about to be
279 									 * dumped */
280 	WriteDataPtrType WriteDataPtr;	/* Called to send some table data to the
281 									 * archive */
282 	EndDataPtrType EndDataPtr;	/* Called when table data dump is finished */
283 	WriteBytePtrType WriteBytePtr;	/* Write a byte to output */
284 	ReadBytePtrType ReadBytePtr;	/* Read a byte from an archive */
285 	WriteBufPtrType WriteBufPtr;	/* Write a buffer of output to the archive */
286 	ReadBufPtrType ReadBufPtr;	/* Read a buffer of input from the archive */
287 	ClosePtrType ClosePtr;		/* Close the archive */
288 	ReopenPtrType ReopenPtr;	/* Reopen the archive */
289 	WriteExtraTocPtrType WriteExtraTocPtr;	/* Write extra TOC entry data
290 											 * associated with the current
291 											 * archive format */
292 	ReadExtraTocPtrType ReadExtraTocPtr;	/* Read extra info associated with
293 											 * archive format */
294 	PrintExtraTocPtrType PrintExtraTocPtr;	/* Extra TOC info for format */
295 	PrintTocDataPtrType PrintTocDataPtr;
296 
297 	StartBlobsPtrType StartBlobsPtr;
298 	EndBlobsPtrType EndBlobsPtr;
299 	StartBlobPtrType StartBlobPtr;
300 	EndBlobPtrType EndBlobPtr;
301 
302 	SetupWorkerPtrType SetupWorkerPtr;
303 	WorkerJobDumpPtrType WorkerJobDumpPtr;
304 	WorkerJobRestorePtrType WorkerJobRestorePtr;
305 
306 	PrepParallelRestorePtrType PrepParallelRestorePtr;
307 	ClonePtrType ClonePtr;		/* Clone format-specific fields */
308 	DeClonePtrType DeClonePtr;	/* Clean up cloned fields */
309 
310 	CustomOutPtrType CustomOutPtr;	/* Alternative script output routine */
311 
312 	/* Stuff for direct DB connection */
313 	char	   *archdbname;		/* DB name *read* from archive */
314 	char	   *savedPassword;	/* password for ropt->username, if known */
315 	char	   *use_role;
316 	PGconn	   *connection;
317 	/* If connCancel isn't NULL, SIGINT handler will send a cancel */
318 	PGcancel   *volatile connCancel;
319 
320 	int			connectToDB;	/* Flag to indicate if direct DB connection is
321 								 * required */
322 	ArchiverOutput outputKind;	/* Flag for what we're currently writing */
323 	bool		pgCopyIn;		/* Currently in libpq 'COPY IN' mode. */
324 
325 	int			loFd;			/* BLOB fd */
326 	int			writingBlob;	/* Flag */
327 	int			blobCount;		/* # of blobs restored */
328 
329 	char	   *fSpec;			/* Archive File Spec */
330 	FILE	   *FH;				/* General purpose file handle */
331 	void	   *OF;
332 	int			gzOut;			/* Output file */
333 
334 	struct _tocEntry *toc;		/* Header of circular list of TOC entries */
335 	int			tocCount;		/* Number of TOC entries */
336 	DumpId		maxDumpId;		/* largest DumpId among all TOC entries */
337 
338 	/* arrays created after the TOC list is complete: */
339 	struct _tocEntry **tocsByDumpId;	/* TOCs indexed by dumpId */
340 	DumpId	   *tableDataId;	/* TABLE DATA ids, indexed by table dumpId */
341 
342 	struct _tocEntry *currToc;	/* Used when dumping data */
343 	int			compression;	/* Compression requested on open Possible
344 								 * values for compression: -1
345 								 * Z_DEFAULT_COMPRESSION 0	COMPRESSION_NONE
346 								 * 1-9 levels for gzip compression */
347 	bool		dosync;			/* data requested to be synced on sight */
348 	ArchiveMode mode;			/* File mode - r or w */
349 	void	   *formatData;		/* Header data specific to file format */
350 
351 	/* these vars track state to avoid sending redundant SET commands */
352 	char	   *currUser;		/* current username, or NULL if unknown */
353 	char	   *currSchema;		/* current schema, or NULL */
354 	char	   *currTablespace; /* current tablespace, or NULL */
355 	char	   *currTableAm;	/* current table access method, or NULL */
356 
357 	void	   *lo_buf;
358 	size_t		lo_buf_used;
359 	size_t		lo_buf_size;
360 
361 	int			noTocComments;
362 	ArchiverStage stage;
363 	ArchiverStage lastErrorStage;
364 	RestorePass restorePass;	/* used only during parallel restore */
365 	struct _tocEntry *currentTE;
366 	struct _tocEntry *lastErrorTE;
367 };
368 
369 struct _tocEntry
370 {
371 	struct _tocEntry *prev;
372 	struct _tocEntry *next;
373 	CatalogId	catalogId;
374 	DumpId		dumpId;
375 	teSection	section;
376 	bool		hadDumper;		/* Archiver was passed a dumper routine (used
377 								 * in restore) */
378 	char	   *tag;			/* index tag */
379 	char	   *namespace;		/* null or empty string if not in a schema */
380 	char	   *tablespace;		/* null if not in a tablespace; empty string
381 								 * means use database default */
382 	char	   *tableam;		/* table access method, only for TABLE tags */
383 	char	   *owner;
384 	char	   *desc;
385 	char	   *defn;
386 	char	   *dropStmt;
387 	char	   *copyStmt;
388 	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
389 	int			nDeps;			/* number of dependencies */
390 
391 	DataDumperPtr dataDumper;	/* Routine to dump data for object */
392 	void	   *dataDumperArg;	/* Arg for above routine */
393 	void	   *formatData;		/* TOC Entry data specific to file format */
394 
395 	/* working state while dumping/restoring */
396 	pgoff_t		dataLength;		/* item's data size; 0 if none or unknown */
397 	teReqs		reqs;			/* do we need schema and/or data of object */
398 	bool		created;		/* set for DATA member if TABLE was created */
399 
400 	/* working state (needed only for parallel restore) */
401 	struct _tocEntry *pending_prev; /* list links for pending-items list; */
402 	struct _tocEntry *pending_next; /* NULL if not in that list */
403 	int			depCount;		/* number of dependencies not yet restored */
404 	DumpId	   *revDeps;		/* dumpIds of objects depending on this one */
405 	int			nRevDeps;		/* number of such dependencies */
406 	DumpId	   *lockDeps;		/* dumpIds of objects this one needs lock on */
407 	int			nLockDeps;		/* number of such dependencies */
408 };
409 
410 extern int	parallel_restore(ArchiveHandle *AH, TocEntry *te);
411 extern void on_exit_close_archive(Archive *AHX);
412 
413 extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
414 
415 /* Options for ArchiveEntry */
416 typedef struct _archiveOpts
417 {
418 	const char *tag;
419 	const char *namespace;
420 	const char *tablespace;
421 	const char *tableam;
422 	const char *owner;
423 	const char *description;
424 	teSection	section;
425 	const char *createStmt;
426 	const char *dropStmt;
427 	const char *copyStmt;
428 	const DumpId *deps;
429 	int			nDeps;
430 	DataDumperPtr dumpFn;
431 	void	   *dumpArg;
432 } ArchiveOpts;
433 #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
434 /* Called to add a TOC entry */
435 extern TocEntry *ArchiveEntry(Archive *AHX, CatalogId catalogId,
436 							  DumpId dumpId, ArchiveOpts *opts);
437 
438 extern void WriteTOC(ArchiveHandle *AH);
439 extern void ReadTOC(ArchiveHandle *AH);
440 extern void WriteHead(ArchiveHandle *AH);
441 extern void ReadHead(ArchiveHandle *AH);
442 extern void WriteToc(ArchiveHandle *AH);
443 extern void ReadToc(ArchiveHandle *AH);
444 extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate);
445 extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te);
446 extern ArchiveHandle *CloneArchive(ArchiveHandle *AH);
447 extern void DeCloneArchive(ArchiveHandle *AH);
448 
449 extern teReqs TocIDRequired(ArchiveHandle *AH, DumpId id);
450 TocEntry   *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
451 extern bool checkSeek(FILE *fp);
452 
453 #define appendStringLiteralAHX(buf,str,AH) \
454 	appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings)
455 
456 #define appendByteaLiteralAHX(buf,str,len,AH) \
457 	appendByteaLiteral(buf, str, len, (AH)->public.std_strings)
458 
459 /*
460  * Mandatory routines for each supported format
461  */
462 
463 extern size_t WriteInt(ArchiveHandle *AH, int i);
464 extern int	ReadInt(ArchiveHandle *AH);
465 extern char *ReadStr(ArchiveHandle *AH);
466 extern size_t WriteStr(ArchiveHandle *AH, const char *s);
467 
468 int			ReadOffset(ArchiveHandle *, pgoff_t *);
469 size_t		WriteOffset(ArchiveHandle *, pgoff_t, int);
470 
471 extern void StartRestoreBlobs(ArchiveHandle *AH);
472 extern void StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop);
473 extern void EndRestoreBlob(ArchiveHandle *AH, Oid oid);
474 extern void EndRestoreBlobs(ArchiveHandle *AH);
475 
476 extern void InitArchiveFmt_Custom(ArchiveHandle *AH);
477 extern void InitArchiveFmt_Null(ArchiveHandle *AH);
478 extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
479 extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
480 
481 extern bool isValidTarHeader(char *header);
482 
483 extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname);
484 extern void DropBlobIfExists(ArchiveHandle *AH, Oid oid);
485 
486 void		ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH);
487 int			ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
488 
489 #endif
490