1 /*-------------------------------------------------------------------------
2  *
3  * pg_backup_archiver.h
4  *
5  *	Private interface to the pg_dump archiver routines.
6  *	It is NOT intended that these routines be called by any
7  *	dumper directly.
8  *
9  *	See the headers to pg_restore for more details.
10  *
11  * Copyright (c) 2000, Philip Warner
12  *		Rights are granted to use this software in any way so long
13  *		as this notice is not removed.
14  *
15  *	The author is not responsible for loss or damages that may
16  *	result from it's use.
17  *
18  *
19  * IDENTIFICATION
20  *		src/bin/pg_dump/pg_backup_archiver.h
21  *
22  *-------------------------------------------------------------------------
23  */
24 #ifndef __PG_BACKUP_ARCHIVE__
25 #define __PG_BACKUP_ARCHIVE__
26 
27 
28 #include <time.h>
29 
30 #include "pg_backup.h"
31 
32 #include "libpq-fe.h"
33 #include "pqexpbuffer.h"
34 
35 #define LOBBUFSIZE 16384
36 
37 /*
38  * Note: zlib.h must be included *after* libpq-fe.h, because the latter may
39  * include ssl.h, which has a naming conflict with zlib.h.
40  */
41 #ifdef HAVE_LIBZ
42 #include <zlib.h>
43 #define GZCLOSE(fh) gzclose(fh)
44 #define GZWRITE(p, s, n, fh) gzwrite(fh, p, (n) * (s))
45 #define GZREAD(p, s, n, fh) gzread(fh, p, (n) * (s))
46 #define GZEOF(fh)	gzeof(fh)
47 #else
48 #define GZCLOSE(fh) fclose(fh)
49 #define GZWRITE(p, s, n, fh) (fwrite(p, s, n, fh) * (s))
50 #define GZREAD(p, s, n, fh) fread(p, s, n, fh)
51 #define GZEOF(fh)	feof(fh)
52 /* this is just the redefinition of a libz constant */
53 #define Z_DEFAULT_COMPRESSION (-1)
54 
55 typedef struct _z_stream
56 {
57 	void	   *next_in;
58 	void	   *next_out;
59 	size_t		avail_in;
60 	size_t		avail_out;
61 } z_stream;
62 typedef z_stream *z_streamp;
63 #endif
64 
65 /* Current archive version number (the format we can output) */
66 #define K_VERS_MAJOR 1
67 #define K_VERS_MINOR 13
68 #define K_VERS_REV 0
69 
70 /* Data block types */
71 #define BLK_DATA 1
72 #define BLK_BLOBS 3
73 
74 /* Historical version numbers (checked in code) */
75 #define K_VERS_1_0 (( (1 * 256 + 0) * 256 + 0) * 256 + 0)
76 #define K_VERS_1_2 (( (1 * 256 + 2) * 256 + 0) * 256 + 0)		/* Allow No ZLIB */
77 #define K_VERS_1_3 (( (1 * 256 + 3) * 256 + 0) * 256 + 0)		/* BLOBs */
78 #define K_VERS_1_4 (( (1 * 256 + 4) * 256 + 0) * 256 + 0)		/* Date & name in header */
79 #define K_VERS_1_5 (( (1 * 256 + 5) * 256 + 0) * 256 + 0)		/* Handle dependencies */
80 #define K_VERS_1_6 (( (1 * 256 + 6) * 256 + 0) * 256 + 0)		/* Schema field in TOCs */
81 #define K_VERS_1_7 (( (1 * 256 + 7) * 256 + 0) * 256 + 0)		/* File Offset size in
82 																 * header */
83 #define K_VERS_1_8 (( (1 * 256 + 8) * 256 + 0) * 256 + 0)		/* change interpretation
84 																 * of ID numbers and
85 																 * dependencies */
86 #define K_VERS_1_9 (( (1 * 256 + 9) * 256 + 0) * 256 + 0)		/* add default_with_oids
87 																 * tracking */
88 #define K_VERS_1_10 (( (1 * 256 + 10) * 256 + 0) * 256 + 0)		/* add tablespace */
89 #define K_VERS_1_11 (( (1 * 256 + 11) * 256 + 0) * 256 + 0)		/* add toc section
90 																 * indicator */
91 #define K_VERS_1_12 (( (1 * 256 + 12) * 256 + 0) * 256 + 0)		/* add separate BLOB
92 																 * entries */
93 #define K_VERS_1_13 (( (1 * 256 + 13) * 256 + 0) * 256 + 0)		/* change search_path
94 																 * behavior */
95 
96 /* Newest format we can read */
97 #define K_VERS_MAX (( (1 * 256 + 13) * 256 + 255) * 256 + 0)
98 
99 
100 /* Flags to indicate disposition of offsets stored in files */
101 #define K_OFFSET_POS_NOT_SET 1
102 #define K_OFFSET_POS_SET 2
103 #define K_OFFSET_NO_DATA 3
104 
105 /*
106  * Special exit values from worker children.  We reserve 0 for normal
107  * success; 1 and other small values should be interpreted as crashes.
108  */
109 #define WORKER_OK					  0
110 #define WORKER_CREATE_DONE			  10
111 #define WORKER_INHIBIT_DATA			  11
112 #define WORKER_IGNORED_ERRORS		  12
113 
114 typedef struct _archiveHandle ArchiveHandle;
115 typedef struct _tocEntry TocEntry;
116 struct ParallelArgs;
117 struct ParallelState;
118 
119 #define READ_ERROR_EXIT(fd) \
120 	do { \
121 		if (feof(fd)) \
122 			exit_horribly(modulename, \
123 						  "could not read from input file: end of file\n"); \
124 		else \
125 			exit_horribly(modulename, \
126 					"could not read from input file: %s\n", strerror(errno)); \
127 	} while (0)
128 
129 #define WRITE_ERROR_EXIT \
130 	do { \
131 		exit_horribly(modulename, "could not write to output file: %s\n", \
132 					  strerror(errno)); \
133 	} while (0)
134 
135 typedef enum T_Action
136 {
137 	ACT_DUMP,
138 	ACT_RESTORE
139 } T_Action;
140 
141 typedef void (*ClosePtr) (ArchiveHandle *AH);
142 typedef void (*ReopenPtr) (ArchiveHandle *AH);
143 typedef void (*ArchiveEntryPtr) (ArchiveHandle *AH, TocEntry *te);
144 
145 typedef void (*StartDataPtr) (ArchiveHandle *AH, TocEntry *te);
146 typedef void (*WriteDataPtr) (ArchiveHandle *AH, const void *data, size_t dLen);
147 typedef void (*EndDataPtr) (ArchiveHandle *AH, TocEntry *te);
148 
149 typedef void (*StartBlobsPtr) (ArchiveHandle *AH, TocEntry *te);
150 typedef void (*StartBlobPtr) (ArchiveHandle *AH, TocEntry *te, Oid oid);
151 typedef void (*EndBlobPtr) (ArchiveHandle *AH, TocEntry *te, Oid oid);
152 typedef void (*EndBlobsPtr) (ArchiveHandle *AH, TocEntry *te);
153 
154 typedef int (*WriteBytePtr) (ArchiveHandle *AH, const int i);
155 typedef int (*ReadBytePtr) (ArchiveHandle *AH);
156 typedef void (*WriteBufPtr) (ArchiveHandle *AH, const void *c, size_t len);
157 typedef void (*ReadBufPtr) (ArchiveHandle *AH, void *buf, size_t len);
158 typedef void (*SaveArchivePtr) (ArchiveHandle *AH);
159 typedef void (*WriteExtraTocPtr) (ArchiveHandle *AH, TocEntry *te);
160 typedef void (*ReadExtraTocPtr) (ArchiveHandle *AH, TocEntry *te);
161 typedef void (*PrintExtraTocPtr) (ArchiveHandle *AH, TocEntry *te);
162 typedef void (*PrintTocDataPtr) (ArchiveHandle *AH, TocEntry *te);
163 
164 typedef void (*ClonePtr) (ArchiveHandle *AH);
165 typedef void (*DeClonePtr) (ArchiveHandle *AH);
166 
167 typedef char *(*WorkerJobRestorePtr) (ArchiveHandle *AH, TocEntry *te);
168 typedef char *(*WorkerJobDumpPtr) (ArchiveHandle *AH, TocEntry *te);
169 typedef char *(*MasterStartParallelItemPtr) (ArchiveHandle *AH, TocEntry *te,
170 														 T_Action act);
171 typedef int (*MasterEndParallelItemPtr) (ArchiveHandle *AH, TocEntry *te,
172 											  const char *str, T_Action act);
173 
174 typedef size_t (*CustomOutPtr) (ArchiveHandle *AH, const void *buf, size_t len);
175 
176 typedef enum
177 {
178 	SQL_SCAN = 0,				/* normal */
179 	SQL_IN_SINGLE_QUOTE,		/* '...' literal */
180 	SQL_IN_DOUBLE_QUOTE			/* "..." identifier */
181 } sqlparseState;
182 
183 typedef struct
184 {
185 	sqlparseState state;		/* see above */
186 	bool		backSlash;		/* next char is backslash quoted? */
187 	PQExpBuffer curCmd;			/* incomplete line (NULL if not created) */
188 } sqlparseInfo;
189 
190 typedef enum
191 {
192 	STAGE_NONE = 0,
193 	STAGE_INITIALIZING,
194 	STAGE_PROCESSING,
195 	STAGE_FINALIZING
196 } ArchiverStage;
197 
198 typedef enum
199 {
200 	OUTPUT_SQLCMDS = 0,			/* emitting general SQL commands */
201 	OUTPUT_COPYDATA,			/* writing COPY data */
202 	OUTPUT_OTHERDATA			/* writing data as INSERT commands */
203 } ArchiverOutput;
204 
205 /*
206  * For historical reasons, ACL items are interspersed with everything else in
207  * a dump file's TOC; typically they're right after the object they're for.
208  * However, we need to restore data before ACLs, as otherwise a read-only
209  * table (ie one where the owner has revoked her own INSERT privilege) causes
210  * data restore failures.  On the other hand, matview REFRESH commands should
211  * come out after ACLs, as otherwise non-superuser-owned matviews might not
212  * be able to execute.  (If the permissions at the time of dumping would not
213  * allow a REFRESH, too bad; we won't fix that for you.)  We also want event
214  * triggers to be restored after ACLs, so that they can't mess those up.
215  *
216  * These considerations force us to make three passes over the TOC,
217  * restoring the appropriate subset of items in each pass.  We assume that
218  * the dependency sort resulted in an appropriate ordering of items within
219  * each subset.
220  *
221  * XXX This mechanism should be superseded by tracking dependencies on ACLs
222  * properly; but we'll still need it for old dump files even after that.
223  */
224 typedef enum
225 {
226 	RESTORE_PASS_MAIN = 0,		/* Main pass (most TOC item types) */
227 	RESTORE_PASS_ACL,			/* ACL item types */
228 	RESTORE_PASS_POST_ACL		/* Event trigger and matview refresh items */
229 
230 #define RESTORE_PASS_LAST RESTORE_PASS_POST_ACL
231 } RestorePass;
232 
233 typedef enum
234 {
235 	REQ_SCHEMA = 0x01,			/* want schema */
236 	REQ_DATA = 0x02,			/* want data */
237 	REQ_SPECIAL = 0x04			/* for special TOC entries */
238 } teReqs;
239 
240 struct _archiveHandle
241 {
242 	Archive		public;			/* Public part of archive */
243 	char		vmaj;			/* Version of file */
244 	char		vmin;
245 	char		vrev;
246 	int			version;		/* Conveniently formatted version */
247 
248 	char	   *archiveRemoteVersion;	/* When reading an archive, the
249 										 * version of the dumped DB */
250 	char	   *archiveDumpVersion;		/* When reading an archive, the
251 										 * version of the dumper */
252 
253 	int			debugLevel;		/* Used for logging (currently only by
254 								 * --verbose) */
255 	size_t		intSize;		/* Size of an integer in the archive */
256 	size_t		offSize;		/* Size of a file offset in the archive -
257 								 * Added V1.7 */
258 	ArchiveFormat format;		/* Archive format */
259 
260 	sqlparseInfo sqlparse;		/* state for parsing INSERT data */
261 
262 	time_t		createDate;		/* Date archive created */
263 
264 	/*
265 	 * Fields used when discovering archive format.  For tar format, we load
266 	 * the first block into the lookahead buffer, and verify that it looks
267 	 * like a tar header.  The tar module must then consume bytes from the
268 	 * lookahead buffer before reading any more from the file.  For custom
269 	 * format, we load only the "PGDMP" marker into the buffer, and then set
270 	 * readHeader after confirming it matches.  The buffer is vestigial in
271 	 * this case, as the subsequent code just checks readHeader and doesn't
272 	 * examine the buffer.
273 	 */
274 	int			readHeader;		/* Set if we already read "PGDMP" marker */
275 	char	   *lookahead;		/* Buffer used when reading header to discover
276 								 * format */
277 	size_t		lookaheadSize;	/* Allocated size of buffer */
278 	size_t		lookaheadLen;	/* Length of valid data in lookahead */
279 	size_t		lookaheadPos;	/* Current read position in lookahead buffer */
280 
281 	ArchiveEntryPtr ArchiveEntryPtr;	/* Called for each metadata object */
282 	StartDataPtr StartDataPtr;	/* Called when table data is about to be
283 								 * dumped */
284 	WriteDataPtr WriteDataPtr;	/* Called to send some table data to the
285 								 * archive */
286 	EndDataPtr EndDataPtr;		/* Called when table data dump is finished */
287 	WriteBytePtr WriteBytePtr;	/* Write a byte to output */
288 	ReadBytePtr ReadBytePtr;	/* Read a byte from an archive */
289 	WriteBufPtr WriteBufPtr;	/* Write a buffer of output to the archive */
290 	ReadBufPtr ReadBufPtr;		/* Read a buffer of input from the archive */
291 	ClosePtr ClosePtr;			/* Close the archive */
292 	ReopenPtr ReopenPtr;		/* Reopen the archive */
293 	WriteExtraTocPtr WriteExtraTocPtr;	/* Write extra TOC entry data
294 										 * associated with the current archive
295 										 * format */
296 	ReadExtraTocPtr ReadExtraTocPtr;	/* Read extr info associated with
297 										 * archie format */
298 	PrintExtraTocPtr PrintExtraTocPtr;	/* Extra TOC info for format */
299 	PrintTocDataPtr PrintTocDataPtr;
300 
301 	StartBlobsPtr StartBlobsPtr;
302 	EndBlobsPtr EndBlobsPtr;
303 	StartBlobPtr StartBlobPtr;
304 	EndBlobPtr EndBlobPtr;
305 
306 	MasterStartParallelItemPtr MasterStartParallelItemPtr;
307 	MasterEndParallelItemPtr MasterEndParallelItemPtr;
308 
309 	SetupWorkerPtr SetupWorkerPtr;
310 	WorkerJobDumpPtr WorkerJobDumpPtr;
311 	WorkerJobRestorePtr WorkerJobRestorePtr;
312 
313 	ClonePtr ClonePtr;			/* Clone format-specific fields */
314 	DeClonePtr DeClonePtr;		/* Clean up cloned fields */
315 
316 	CustomOutPtr CustomOutPtr;	/* Alternative script output routine */
317 
318 	/* Stuff for direct DB connection */
319 	char	   *archdbname;		/* DB name *read* from archive */
320 	char	   *savedPassword;	/* password for ropt->username, if known */
321 	char	   *use_role;
322 	PGconn	   *connection;
323 	/* If connCancel isn't NULL, SIGINT handler will send a cancel */
324 	PGcancel   *volatile connCancel;
325 
326 	int			connectToDB;	/* Flag to indicate if direct DB connection is
327 								 * required */
328 	ArchiverOutput outputKind;	/* Flag for what we're currently writing */
329 	bool		pgCopyIn;		/* Currently in libpq 'COPY IN' mode. */
330 
331 	int			loFd;			/* BLOB fd */
332 	int			writingBlob;	/* Flag */
333 	int			blobCount;		/* # of blobs restored */
334 
335 	char	   *fSpec;			/* Archive File Spec */
336 	FILE	   *FH;				/* General purpose file handle */
337 	void	   *OF;
338 	int			gzOut;			/* Output file */
339 
340 	struct _tocEntry *toc;		/* Header of circular list of TOC entries */
341 	int			tocCount;		/* Number of TOC entries */
342 	DumpId		maxDumpId;		/* largest DumpId among all TOC entries */
343 
344 	/* arrays created after the TOC list is complete: */
345 	struct _tocEntry **tocsByDumpId;	/* TOCs indexed by dumpId */
346 	DumpId	   *tableDataId;	/* TABLE DATA ids, indexed by table dumpId */
347 
348 	struct _tocEntry *currToc;	/* Used when dumping data */
349 	int			compression;	/* Compression requested on open Possible
350 								 * values for compression: -1
351 								 * Z_DEFAULT_COMPRESSION 0	COMPRESSION_NONE
352 								 * 1-9 levels for gzip compression */
353 	ArchiveMode mode;			/* File mode - r or w */
354 	void	   *formatData;		/* Header data specific to file format */
355 
356 	/* these vars track state to avoid sending redundant SET commands */
357 	char	   *currUser;		/* current username, or NULL if unknown */
358 	char	   *currSchema;		/* current schema, or NULL */
359 	char	   *currTablespace; /* current tablespace, or NULL */
360 	bool		currWithOids;	/* current default_with_oids setting */
361 
362 	void	   *lo_buf;
363 	size_t		lo_buf_used;
364 	size_t		lo_buf_size;
365 
366 	int			noTocComments;
367 	ArchiverStage stage;
368 	ArchiverStage lastErrorStage;
369 	RestorePass restorePass;	/* used only during parallel restore */
370 	struct _tocEntry *currentTE;
371 	struct _tocEntry *lastErrorTE;
372 };
373 
374 struct _tocEntry
375 {
376 	struct _tocEntry *prev;
377 	struct _tocEntry *next;
378 	CatalogId	catalogId;
379 	DumpId		dumpId;
380 	teSection	section;
381 	bool		hadDumper;		/* Archiver was passed a dumper routine (used
382 								 * in restore) */
383 	char	   *tag;			/* index tag */
384 	char	   *namespace;		/* null or empty string if not in a schema */
385 	char	   *tablespace;		/* null if not in a tablespace; empty string
386 								 * means use database default */
387 	char	   *owner;
388 	bool		withOids;		/* Used only by "TABLE" tags */
389 	char	   *desc;
390 	char	   *defn;
391 	char	   *dropStmt;
392 	char	   *copyStmt;
393 	DumpId	   *dependencies;	/* dumpIds of objects this one depends on */
394 	int			nDeps;			/* number of dependencies */
395 
396 	DataDumperPtr dataDumper;	/* Routine to dump data for object */
397 	void	   *dataDumperArg;	/* Arg for above routine */
398 	void	   *formatData;		/* TOC Entry data specific to file format */
399 
400 	/* working state while dumping/restoring */
401 	teReqs		reqs;			/* do we need schema and/or data of object */
402 	bool		created;		/* set for DATA member if TABLE was created */
403 
404 	/* working state (needed only for parallel restore) */
405 	struct _tocEntry *par_prev; /* list links for pending/ready items; */
406 	struct _tocEntry *par_next; /* these are NULL if not in either list */
407 	int			depCount;		/* number of dependencies not yet restored */
408 	DumpId	   *revDeps;		/* dumpIds of objects depending on this one */
409 	int			nRevDeps;		/* number of such dependencies */
410 	DumpId	   *lockDeps;		/* dumpIds of objects this one needs lock on */
411 	int			nLockDeps;		/* number of such dependencies */
412 };
413 
414 extern int	parallel_restore(struct ParallelArgs *args);
415 extern void on_exit_close_archive(Archive *AHX);
416 
417 extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *modulename, const char *fmt,...) pg_attribute_printf(3, 4);
418 
419 extern void WriteTOC(ArchiveHandle *AH);
420 extern void ReadTOC(ArchiveHandle *AH);
421 extern void WriteHead(ArchiveHandle *AH);
422 extern void ReadHead(ArchiveHandle *AH);
423 extern void WriteToc(ArchiveHandle *AH);
424 extern void ReadToc(ArchiveHandle *AH);
425 extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate);
426 extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te);
427 extern ArchiveHandle *CloneArchive(ArchiveHandle *AH);
428 extern void DeCloneArchive(ArchiveHandle *AH);
429 
430 extern teReqs TocIDRequired(ArchiveHandle *AH, DumpId id);
431 TocEntry   *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
432 extern bool checkSeek(FILE *fp);
433 
434 #define appendStringLiteralAHX(buf,str,AH) \
435 	appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings)
436 
437 #define appendByteaLiteralAHX(buf,str,len,AH) \
438 	appendByteaLiteral(buf, str, len, (AH)->public.std_strings)
439 
440 /*
441  * Mandatory routines for each supported format
442  */
443 
444 extern size_t WriteInt(ArchiveHandle *AH, int i);
445 extern int	ReadInt(ArchiveHandle *AH);
446 extern char *ReadStr(ArchiveHandle *AH);
447 extern size_t WriteStr(ArchiveHandle *AH, const char *s);
448 
449 int			ReadOffset(ArchiveHandle *, pgoff_t *);
450 size_t		WriteOffset(ArchiveHandle *, pgoff_t, int);
451 
452 extern void StartRestoreBlobs(ArchiveHandle *AH);
453 extern void StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop);
454 extern void EndRestoreBlob(ArchiveHandle *AH, Oid oid);
455 extern void EndRestoreBlobs(ArchiveHandle *AH);
456 
457 extern void InitArchiveFmt_Custom(ArchiveHandle *AH);
458 extern void InitArchiveFmt_Null(ArchiveHandle *AH);
459 extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
460 extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
461 
462 extern bool isValidTarHeader(char *header);
463 
464 extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname);
465 extern void DropBlobIfExists(ArchiveHandle *AH, Oid oid);
466 
467 void		ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH);
468 int			ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
469 
470 void		ahlog(ArchiveHandle *AH, int level, const char *fmt,...) pg_attribute_printf(3, 4);
471 
472 #endif
473