1 /*------------------------------------------------------------------------- 2 * 3 * pg_backup_archiver.h 4 * 5 * Private interface to the pg_dump archiver routines. 6 * It is NOT intended that these routines be called by any 7 * dumper directly. 8 * 9 * See the headers to pg_restore for more details. 10 * 11 * Copyright (c) 2000, Philip Warner 12 * Rights are granted to use this software in any way so long 13 * as this notice is not removed. 14 * 15 * The author is not responsible for loss or damages that may 16 * result from its use. 17 * 18 * 19 * IDENTIFICATION 20 * src/bin/pg_dump/pg_backup_archiver.h 21 * 22 *------------------------------------------------------------------------- 23 */ 24 #ifndef __PG_BACKUP_ARCHIVE__ 25 #define __PG_BACKUP_ARCHIVE__ 26 27 28 #include <time.h> 29 30 #include "pg_backup.h" 31 32 #include "libpq-fe.h" 33 #include "pqexpbuffer.h" 34 35 #define LOBBUFSIZE 16384 36 37 /* 38 * Note: zlib.h must be included *after* libpq-fe.h, because the latter may 39 * include ssl.h, which has a naming conflict with zlib.h. 40 */ 41 #ifdef HAVE_LIBZ 42 #include <zlib.h> 43 #define GZCLOSE(fh) gzclose(fh) 44 #define GZWRITE(p, s, n, fh) gzwrite(fh, p, (n) * (s)) 45 #define GZREAD(p, s, n, fh) gzread(fh, p, (n) * (s)) 46 #define GZEOF(fh) gzeof(fh) 47 #else 48 #define GZCLOSE(fh) fclose(fh) 49 #define GZWRITE(p, s, n, fh) (fwrite(p, s, n, fh) * (s)) 50 #define GZREAD(p, s, n, fh) fread(p, s, n, fh) 51 #define GZEOF(fh) feof(fh) 52 /* this is just the redefinition of a libz constant */ 53 #define Z_DEFAULT_COMPRESSION (-1) 54 55 typedef struct _z_stream 56 { 57 void *next_in; 58 void *next_out; 59 size_t avail_in; 60 size_t avail_out; 61 } z_stream; 62 typedef z_stream *z_streamp; 63 #endif 64 65 /* Data block types */ 66 #define BLK_DATA 1 67 #define BLK_BLOBS 3 68 69 /* Encode version components into a convenient integer <maj><min><rev> */ 70 #define MAKE_ARCHIVE_VERSION(major, minor, rev) (((major) * 256 + (minor)) * 256 + (rev)) 71 72 #define ARCHIVE_MAJOR(version) (((version) >> 16) & 255) 73 #define ARCHIVE_MINOR(version) (((version) >> 8) & 255) 74 #define ARCHIVE_REV(version) (((version) ) & 255) 75 76 /* Historical version numbers (checked in code) */ 77 #define K_VERS_1_0 MAKE_ARCHIVE_VERSION(1, 0, 0) 78 #define K_VERS_1_2 MAKE_ARCHIVE_VERSION(1, 2, 0) /* Allow No ZLIB */ 79 #define K_VERS_1_3 MAKE_ARCHIVE_VERSION(1, 3, 0) /* BLOBs */ 80 #define K_VERS_1_4 MAKE_ARCHIVE_VERSION(1, 4, 0) /* Date & name in header */ 81 #define K_VERS_1_5 MAKE_ARCHIVE_VERSION(1, 5, 0) /* Handle dependencies */ 82 #define K_VERS_1_6 MAKE_ARCHIVE_VERSION(1, 6, 0) /* Schema field in TOCs */ 83 #define K_VERS_1_7 MAKE_ARCHIVE_VERSION(1, 7, 0) /* File Offset size in 84 * header */ 85 #define K_VERS_1_8 MAKE_ARCHIVE_VERSION(1, 8, 0) /* change interpretation 86 * of ID numbers and 87 * dependencies */ 88 #define K_VERS_1_9 MAKE_ARCHIVE_VERSION(1, 9, 0) /* add default_with_oids 89 * tracking */ 90 #define K_VERS_1_10 MAKE_ARCHIVE_VERSION(1, 10, 0) /* add tablespace */ 91 #define K_VERS_1_11 MAKE_ARCHIVE_VERSION(1, 11, 0) /* add toc section 92 * indicator */ 93 #define K_VERS_1_12 MAKE_ARCHIVE_VERSION(1, 12, 0) /* add separate BLOB 94 * entries */ 95 #define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0) /* change search_path 96 * behavior */ 97 #define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0) /* add tableam */ 98 99 /* Current archive version number (the format we can output) */ 100 #define K_VERS_MAJOR 1 101 #define K_VERS_MINOR 14 102 #define K_VERS_REV 0 103 #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV) 104 105 /* Newest format we can read */ 106 #define K_VERS_MAX MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, 255) 107 108 109 /* Flags to indicate disposition of offsets stored in files */ 110 #define K_OFFSET_POS_NOT_SET 1 111 #define K_OFFSET_POS_SET 2 112 #define K_OFFSET_NO_DATA 3 113 114 /* 115 * Special exit values from worker children. We reserve 0 for normal 116 * success; 1 and other small values should be interpreted as crashes. 117 */ 118 #define WORKER_OK 0 119 #define WORKER_CREATE_DONE 10 120 #define WORKER_INHIBIT_DATA 11 121 #define WORKER_IGNORED_ERRORS 12 122 123 typedef struct _archiveHandle ArchiveHandle; 124 typedef struct _tocEntry TocEntry; 125 struct ParallelState; 126 127 #define READ_ERROR_EXIT(fd) \ 128 do { \ 129 if (feof(fd)) \ 130 fatal("could not read from input file: end of file"); \ 131 else \ 132 fatal("could not read from input file: %m"); \ 133 } while (0) 134 135 #define WRITE_ERROR_EXIT \ 136 do { \ 137 fatal("could not write to output file: %m"); \ 138 } while (0) 139 140 typedef enum T_Action 141 { 142 ACT_DUMP, 143 ACT_RESTORE 144 } T_Action; 145 146 typedef void (*ClosePtrType) (ArchiveHandle *AH); 147 typedef void (*ReopenPtrType) (ArchiveHandle *AH); 148 typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te); 149 150 typedef void (*StartDataPtrType) (ArchiveHandle *AH, TocEntry *te); 151 typedef void (*WriteDataPtrType) (ArchiveHandle *AH, const void *data, size_t dLen); 152 typedef void (*EndDataPtrType) (ArchiveHandle *AH, TocEntry *te); 153 154 typedef void (*StartBlobsPtrType) (ArchiveHandle *AH, TocEntry *te); 155 typedef void (*StartBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid); 156 typedef void (*EndBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid); 157 typedef void (*EndBlobsPtrType) (ArchiveHandle *AH, TocEntry *te); 158 159 typedef int (*WriteBytePtrType) (ArchiveHandle *AH, const int i); 160 typedef int (*ReadBytePtrType) (ArchiveHandle *AH); 161 typedef void (*WriteBufPtrType) (ArchiveHandle *AH, const void *c, size_t len); 162 typedef void (*ReadBufPtrType) (ArchiveHandle *AH, void *buf, size_t len); 163 typedef void (*WriteExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te); 164 typedef void (*ReadExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te); 165 typedef void (*PrintExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te); 166 typedef void (*PrintTocDataPtrType) (ArchiveHandle *AH, TocEntry *te); 167 168 typedef void (*PrepParallelRestorePtrType) (ArchiveHandle *AH); 169 typedef void (*ClonePtrType) (ArchiveHandle *AH); 170 typedef void (*DeClonePtrType) (ArchiveHandle *AH); 171 172 typedef int (*WorkerJobDumpPtrType) (ArchiveHandle *AH, TocEntry *te); 173 typedef int (*WorkerJobRestorePtrType) (ArchiveHandle *AH, TocEntry *te); 174 175 typedef size_t (*CustomOutPtrType) (ArchiveHandle *AH, const void *buf, size_t len); 176 177 typedef enum 178 { 179 SQL_SCAN = 0, /* normal */ 180 SQL_IN_SINGLE_QUOTE, /* '...' literal */ 181 SQL_IN_DOUBLE_QUOTE /* "..." identifier */ 182 } sqlparseState; 183 184 typedef struct 185 { 186 sqlparseState state; /* see above */ 187 bool backSlash; /* next char is backslash quoted? */ 188 PQExpBuffer curCmd; /* incomplete line (NULL if not created) */ 189 } sqlparseInfo; 190 191 typedef enum 192 { 193 STAGE_NONE = 0, 194 STAGE_INITIALIZING, 195 STAGE_PROCESSING, 196 STAGE_FINALIZING 197 } ArchiverStage; 198 199 typedef enum 200 { 201 OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */ 202 OUTPUT_COPYDATA, /* writing COPY data */ 203 OUTPUT_OTHERDATA /* writing data as INSERT commands */ 204 } ArchiverOutput; 205 206 /* 207 * For historical reasons, ACL items are interspersed with everything else in 208 * a dump file's TOC; typically they're right after the object they're for. 209 * However, we need to restore data before ACLs, as otherwise a read-only 210 * table (ie one where the owner has revoked her own INSERT privilege) causes 211 * data restore failures. On the other hand, matview REFRESH commands should 212 * come out after ACLs, as otherwise non-superuser-owned matviews might not 213 * be able to execute. (If the permissions at the time of dumping would not 214 * allow a REFRESH, too bad; we won't fix that for you.) We also want event 215 * triggers to be restored after ACLs, so that they can't mess those up. 216 * 217 * These considerations force us to make three passes over the TOC, 218 * restoring the appropriate subset of items in each pass. We assume that 219 * the dependency sort resulted in an appropriate ordering of items within 220 * each subset. 221 * 222 * XXX This mechanism should be superseded by tracking dependencies on ACLs 223 * properly; but we'll still need it for old dump files even after that. 224 */ 225 typedef enum 226 { 227 RESTORE_PASS_MAIN = 0, /* Main pass (most TOC item types) */ 228 RESTORE_PASS_ACL, /* ACL item types */ 229 RESTORE_PASS_POST_ACL /* Event trigger and matview refresh items */ 230 231 #define RESTORE_PASS_LAST RESTORE_PASS_POST_ACL 232 } RestorePass; 233 234 typedef enum 235 { 236 REQ_SCHEMA = 0x01, /* want schema */ 237 REQ_DATA = 0x02, /* want data */ 238 REQ_SPECIAL = 0x04 /* for special TOC entries */ 239 } teReqs; 240 241 struct _archiveHandle 242 { 243 Archive public; /* Public part of archive */ 244 int version; /* Version of file */ 245 246 char *archiveRemoteVersion; /* When reading an archive, the 247 * version of the dumped DB */ 248 char *archiveDumpVersion; /* When reading an archive, the version of 249 * the dumper */ 250 251 size_t intSize; /* Size of an integer in the archive */ 252 size_t offSize; /* Size of a file offset in the archive - 253 * Added V1.7 */ 254 ArchiveFormat format; /* Archive format */ 255 256 sqlparseInfo sqlparse; /* state for parsing INSERT data */ 257 258 time_t createDate; /* Date archive created */ 259 260 /* 261 * Fields used when discovering archive format. For tar format, we load 262 * the first block into the lookahead buffer, and verify that it looks 263 * like a tar header. The tar module must then consume bytes from the 264 * lookahead buffer before reading any more from the file. For custom 265 * format, we load only the "PGDMP" marker into the buffer, and then set 266 * readHeader after confirming it matches. The buffer is vestigial in 267 * this case, as the subsequent code just checks readHeader and doesn't 268 * examine the buffer. 269 */ 270 int readHeader; /* Set if we already read "PGDMP" marker */ 271 char *lookahead; /* Buffer used when reading header to discover 272 * format */ 273 size_t lookaheadSize; /* Allocated size of buffer */ 274 size_t lookaheadLen; /* Length of valid data in lookahead */ 275 size_t lookaheadPos; /* Current read position in lookahead buffer */ 276 277 ArchiveEntryPtrType ArchiveEntryPtr; /* Called for each metadata object */ 278 StartDataPtrType StartDataPtr; /* Called when table data is about to be 279 * dumped */ 280 WriteDataPtrType WriteDataPtr; /* Called to send some table data to the 281 * archive */ 282 EndDataPtrType EndDataPtr; /* Called when table data dump is finished */ 283 WriteBytePtrType WriteBytePtr; /* Write a byte to output */ 284 ReadBytePtrType ReadBytePtr; /* Read a byte from an archive */ 285 WriteBufPtrType WriteBufPtr; /* Write a buffer of output to the archive */ 286 ReadBufPtrType ReadBufPtr; /* Read a buffer of input from the archive */ 287 ClosePtrType ClosePtr; /* Close the archive */ 288 ReopenPtrType ReopenPtr; /* Reopen the archive */ 289 WriteExtraTocPtrType WriteExtraTocPtr; /* Write extra TOC entry data 290 * associated with the current 291 * archive format */ 292 ReadExtraTocPtrType ReadExtraTocPtr; /* Read extra info associated with 293 * archive format */ 294 PrintExtraTocPtrType PrintExtraTocPtr; /* Extra TOC info for format */ 295 PrintTocDataPtrType PrintTocDataPtr; 296 297 StartBlobsPtrType StartBlobsPtr; 298 EndBlobsPtrType EndBlobsPtr; 299 StartBlobPtrType StartBlobPtr; 300 EndBlobPtrType EndBlobPtr; 301 302 SetupWorkerPtrType SetupWorkerPtr; 303 WorkerJobDumpPtrType WorkerJobDumpPtr; 304 WorkerJobRestorePtrType WorkerJobRestorePtr; 305 306 PrepParallelRestorePtrType PrepParallelRestorePtr; 307 ClonePtrType ClonePtr; /* Clone format-specific fields */ 308 DeClonePtrType DeClonePtr; /* Clean up cloned fields */ 309 310 CustomOutPtrType CustomOutPtr; /* Alternative script output routine */ 311 312 /* Stuff for direct DB connection */ 313 char *archdbname; /* DB name *read* from archive */ 314 char *savedPassword; /* password for ropt->username, if known */ 315 char *use_role; 316 PGconn *connection; 317 /* If connCancel isn't NULL, SIGINT handler will send a cancel */ 318 PGcancel *volatile connCancel; 319 320 int connectToDB; /* Flag to indicate if direct DB connection is 321 * required */ 322 ArchiverOutput outputKind; /* Flag for what we're currently writing */ 323 bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */ 324 325 int loFd; /* BLOB fd */ 326 int writingBlob; /* Flag */ 327 int blobCount; /* # of blobs restored */ 328 329 char *fSpec; /* Archive File Spec */ 330 FILE *FH; /* General purpose file handle */ 331 void *OF; 332 int gzOut; /* Output file */ 333 334 struct _tocEntry *toc; /* Header of circular list of TOC entries */ 335 int tocCount; /* Number of TOC entries */ 336 DumpId maxDumpId; /* largest DumpId among all TOC entries */ 337 338 /* arrays created after the TOC list is complete: */ 339 struct _tocEntry **tocsByDumpId; /* TOCs indexed by dumpId */ 340 DumpId *tableDataId; /* TABLE DATA ids, indexed by table dumpId */ 341 342 struct _tocEntry *currToc; /* Used when dumping data */ 343 int compression; /* Compression requested on open Possible 344 * values for compression: -1 345 * Z_DEFAULT_COMPRESSION 0 COMPRESSION_NONE 346 * 1-9 levels for gzip compression */ 347 bool dosync; /* data requested to be synced on sight */ 348 ArchiveMode mode; /* File mode - r or w */ 349 void *formatData; /* Header data specific to file format */ 350 351 /* these vars track state to avoid sending redundant SET commands */ 352 char *currUser; /* current username, or NULL if unknown */ 353 char *currSchema; /* current schema, or NULL */ 354 char *currTablespace; /* current tablespace, or NULL */ 355 char *currTableAm; /* current table access method, or NULL */ 356 357 void *lo_buf; 358 size_t lo_buf_used; 359 size_t lo_buf_size; 360 361 int noTocComments; 362 ArchiverStage stage; 363 ArchiverStage lastErrorStage; 364 RestorePass restorePass; /* used only during parallel restore */ 365 struct _tocEntry *currentTE; 366 struct _tocEntry *lastErrorTE; 367 }; 368 369 struct _tocEntry 370 { 371 struct _tocEntry *prev; 372 struct _tocEntry *next; 373 CatalogId catalogId; 374 DumpId dumpId; 375 teSection section; 376 bool hadDumper; /* Archiver was passed a dumper routine (used 377 * in restore) */ 378 char *tag; /* index tag */ 379 char *namespace; /* null or empty string if not in a schema */ 380 char *tablespace; /* null if not in a tablespace; empty string 381 * means use database default */ 382 char *tableam; /* table access method, only for TABLE tags */ 383 char *owner; 384 char *desc; 385 char *defn; 386 char *dropStmt; 387 char *copyStmt; 388 DumpId *dependencies; /* dumpIds of objects this one depends on */ 389 int nDeps; /* number of dependencies */ 390 391 DataDumperPtr dataDumper; /* Routine to dump data for object */ 392 void *dataDumperArg; /* Arg for above routine */ 393 void *formatData; /* TOC Entry data specific to file format */ 394 395 /* working state while dumping/restoring */ 396 pgoff_t dataLength; /* item's data size; 0 if none or unknown */ 397 teReqs reqs; /* do we need schema and/or data of object */ 398 bool created; /* set for DATA member if TABLE was created */ 399 400 /* working state (needed only for parallel restore) */ 401 struct _tocEntry *pending_prev; /* list links for pending-items list; */ 402 struct _tocEntry *pending_next; /* NULL if not in that list */ 403 int depCount; /* number of dependencies not yet restored */ 404 DumpId *revDeps; /* dumpIds of objects depending on this one */ 405 int nRevDeps; /* number of such dependencies */ 406 DumpId *lockDeps; /* dumpIds of objects this one needs lock on */ 407 int nLockDeps; /* number of such dependencies */ 408 }; 409 410 extern int parallel_restore(ArchiveHandle *AH, TocEntry *te); 411 extern void on_exit_close_archive(Archive *AHX); 412 413 extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3); 414 415 /* Options for ArchiveEntry */ 416 typedef struct _archiveOpts 417 { 418 const char *tag; 419 const char *namespace; 420 const char *tablespace; 421 const char *tableam; 422 const char *owner; 423 const char *description; 424 teSection section; 425 const char *createStmt; 426 const char *dropStmt; 427 const char *copyStmt; 428 const DumpId *deps; 429 int nDeps; 430 DataDumperPtr dumpFn; 431 void *dumpArg; 432 } ArchiveOpts; 433 #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__} 434 /* Called to add a TOC entry */ 435 extern TocEntry *ArchiveEntry(Archive *AHX, CatalogId catalogId, 436 DumpId dumpId, ArchiveOpts *opts); 437 438 extern void WriteTOC(ArchiveHandle *AH); 439 extern void ReadTOC(ArchiveHandle *AH); 440 extern void WriteHead(ArchiveHandle *AH); 441 extern void ReadHead(ArchiveHandle *AH); 442 extern void WriteToc(ArchiveHandle *AH); 443 extern void ReadToc(ArchiveHandle *AH); 444 extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate); 445 extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te); 446 extern ArchiveHandle *CloneArchive(ArchiveHandle *AH); 447 extern void DeCloneArchive(ArchiveHandle *AH); 448 449 extern teReqs TocIDRequired(ArchiveHandle *AH, DumpId id); 450 TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id); 451 extern bool checkSeek(FILE *fp); 452 453 #define appendStringLiteralAHX(buf,str,AH) \ 454 appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings) 455 456 #define appendByteaLiteralAHX(buf,str,len,AH) \ 457 appendByteaLiteral(buf, str, len, (AH)->public.std_strings) 458 459 /* 460 * Mandatory routines for each supported format 461 */ 462 463 extern size_t WriteInt(ArchiveHandle *AH, int i); 464 extern int ReadInt(ArchiveHandle *AH); 465 extern char *ReadStr(ArchiveHandle *AH); 466 extern size_t WriteStr(ArchiveHandle *AH, const char *s); 467 468 int ReadOffset(ArchiveHandle *, pgoff_t *); 469 size_t WriteOffset(ArchiveHandle *, pgoff_t, int); 470 471 extern void StartRestoreBlobs(ArchiveHandle *AH); 472 extern void StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop); 473 extern void EndRestoreBlob(ArchiveHandle *AH, Oid oid); 474 extern void EndRestoreBlobs(ArchiveHandle *AH); 475 476 extern void InitArchiveFmt_Custom(ArchiveHandle *AH); 477 extern void InitArchiveFmt_Null(ArchiveHandle *AH); 478 extern void InitArchiveFmt_Directory(ArchiveHandle *AH); 479 extern void InitArchiveFmt_Tar(ArchiveHandle *AH); 480 481 extern bool isValidTarHeader(char *header); 482 483 extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname); 484 extern void DropBlobIfExists(ArchiveHandle *AH, Oid oid); 485 486 void ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH); 487 int ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3); 488 489 #endif 490