1 /*------------------------------------------------------------------------- 2 * 3 * pg_control.h 4 * The system control file "pg_control" is not a heap relation. 5 * However, we define it here so that the format is documented. 6 * 7 * 8 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 9 * Portions Copyright (c) 1994, Regents of the University of California 10 * 11 * src/include/catalog/pg_control.h 12 * 13 *------------------------------------------------------------------------- 14 */ 15 #ifndef PG_CONTROL_H 16 #define PG_CONTROL_H 17 18 #include "access/transam.h" 19 #include "access/xlogdefs.h" 20 #include "pgtime.h" /* for pg_time_t */ 21 #include "port/pg_crc32c.h" 22 23 24 /* Version identifier for this pg_control format */ 25 #define PG_CONTROL_VERSION 1201 26 27 /* Nonce key length, see below */ 28 #define MOCK_AUTH_NONCE_LEN 32 29 30 /* 31 * Body of CheckPoint XLOG records. This is declared here because we keep 32 * a copy of the latest one in pg_control for possible disaster recovery. 33 * Changing this struct requires a PG_CONTROL_VERSION bump. 34 */ 35 typedef struct CheckPoint 36 { 37 XLogRecPtr redo; /* next RecPtr available when we began to 38 * create CheckPoint (i.e. REDO start point) */ 39 TimeLineID ThisTimeLineID; /* current TLI */ 40 TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new 41 * timeline (equals ThisTimeLineID otherwise) */ 42 bool fullPageWrites; /* current full_page_writes */ 43 FullTransactionId nextFullXid; /* next free full transaction ID */ 44 Oid nextOid; /* next free OID */ 45 MultiXactId nextMulti; /* next free MultiXactId */ 46 MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ 47 TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ 48 Oid oldestXidDB; /* database with minimum datfrozenxid */ 49 MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ 50 Oid oldestMultiDB; /* database with minimum datminmxid */ 51 pg_time_t time; /* time stamp of checkpoint */ 52 TransactionId oldestCommitTsXid; /* oldest Xid with valid commit 53 * timestamp */ 54 TransactionId newestCommitTsXid; /* newest Xid with valid commit 55 * timestamp */ 56 57 /* 58 * Oldest XID still running. This is only needed to initialize hot standby 59 * mode from an online checkpoint, so we only bother calculating this for 60 * online checkpoints and only when wal_level is replica. Otherwise it's 61 * set to InvalidTransactionId. 62 */ 63 TransactionId oldestActiveXid; 64 } CheckPoint; 65 66 /* XLOG info values for XLOG rmgr */ 67 #define XLOG_CHECKPOINT_SHUTDOWN 0x00 68 #define XLOG_CHECKPOINT_ONLINE 0x10 69 #define XLOG_NOOP 0x20 70 #define XLOG_NEXTOID 0x30 71 #define XLOG_SWITCH 0x40 72 #define XLOG_BACKUP_END 0x50 73 #define XLOG_PARAMETER_CHANGE 0x60 74 #define XLOG_RESTORE_POINT 0x70 75 #define XLOG_FPW_CHANGE 0x80 76 #define XLOG_END_OF_RECOVERY 0x90 77 #define XLOG_FPI_FOR_HINT 0xA0 78 #define XLOG_FPI 0xB0 79 /* 0xC0 is used in Postgres 9.5-11 */ 80 #define XLOG_OVERWRITE_CONTRECORD 0xD0 81 82 83 /* 84 * System status indicator. Note this is stored in pg_control; if you change 85 * it, you must bump PG_CONTROL_VERSION 86 */ 87 typedef enum DBState 88 { 89 DB_STARTUP = 0, 90 DB_SHUTDOWNED, 91 DB_SHUTDOWNED_IN_RECOVERY, 92 DB_SHUTDOWNING, 93 DB_IN_CRASH_RECOVERY, 94 DB_IN_ARCHIVE_RECOVERY, 95 DB_IN_PRODUCTION 96 } DBState; 97 98 /* 99 * Contents of pg_control. 100 */ 101 102 typedef struct ControlFileData 103 { 104 /* 105 * Unique system identifier --- to ensure we match up xlog files with the 106 * installation that produced them. 107 */ 108 uint64 system_identifier; 109 110 /* 111 * Version identifier information. Keep these fields at the same offset, 112 * especially pg_control_version; they won't be real useful if they move 113 * around. (For historical reasons they must be 8 bytes into the file 114 * rather than immediately at the front.) 115 * 116 * pg_control_version identifies the format of pg_control itself. 117 * catalog_version_no identifies the format of the system catalogs. 118 * 119 * There are additional version identifiers in individual files; for 120 * example, WAL logs contain per-page magic numbers that can serve as 121 * version cues for the WAL log. 122 */ 123 uint32 pg_control_version; /* PG_CONTROL_VERSION */ 124 uint32 catalog_version_no; /* see catversion.h */ 125 126 /* 127 * System status data 128 */ 129 DBState state; /* see enum above */ 130 pg_time_t time; /* time stamp of last pg_control update */ 131 XLogRecPtr checkPoint; /* last check point record ptr */ 132 133 CheckPoint checkPointCopy; /* copy of last check point record */ 134 135 XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ 136 137 /* 138 * These two values determine the minimum point we must recover up to 139 * before starting up: 140 * 141 * minRecoveryPoint is updated to the latest replayed LSN whenever we 142 * flush a data change during archive recovery. That guards against 143 * starting archive recovery, aborting it, and restarting with an earlier 144 * stop location. If we've already flushed data changes from WAL record X 145 * to disk, we mustn't start up until we reach X again. Zero when not 146 * doing archive recovery. 147 * 148 * backupStartPoint is the redo pointer of the backup start checkpoint, if 149 * we are recovering from an online backup and haven't reached the end of 150 * backup yet. It is reset to zero when the end of backup is reached, and 151 * we mustn't start up before that. A boolean would suffice otherwise, but 152 * we use the redo pointer as a cross-check when we see an end-of-backup 153 * record, to make sure the end-of-backup record corresponds the base 154 * backup we're recovering from. 155 * 156 * backupEndPoint is the backup end location, if we are recovering from an 157 * online backup which was taken from the standby and haven't reached the 158 * end of backup yet. It is initialized to the minimum recovery point in 159 * pg_control which was backed up last. It is reset to zero when the end 160 * of backup is reached, and we mustn't start up before that. 161 * 162 * If backupEndRequired is true, we know for sure that we're restoring 163 * from a backup, and must see a backup-end record before we can safely 164 * start up. If it's false, but backupStartPoint is set, a backup_label 165 * file was found at startup but it may have been a leftover from a stray 166 * pg_start_backup() call, not accompanied by pg_stop_backup(). 167 */ 168 XLogRecPtr minRecoveryPoint; 169 TimeLineID minRecoveryPointTLI; 170 XLogRecPtr backupStartPoint; 171 XLogRecPtr backupEndPoint; 172 bool backupEndRequired; 173 174 /* 175 * Parameter settings that determine if the WAL can be used for archival 176 * or hot standby. 177 */ 178 int wal_level; 179 bool wal_log_hints; 180 int MaxConnections; 181 int max_worker_processes; 182 int max_wal_senders; 183 int max_prepared_xacts; 184 int max_locks_per_xact; 185 bool track_commit_timestamp; 186 187 /* 188 * This data is used to check for hardware-architecture compatibility of 189 * the database and the backend executable. We need not check endianness 190 * explicitly, since the pg_control version will surely look wrong to a 191 * machine of different endianness, but we do need to worry about MAXALIGN 192 * and floating-point format. (Note: storage layout nominally also 193 * depends on SHORTALIGN and INTALIGN, but in practice these are the same 194 * on all architectures of interest.) 195 * 196 * Testing just one double value is not a very bulletproof test for 197 * floating-point compatibility, but it will catch most cases. 198 */ 199 uint32 maxAlign; /* alignment requirement for tuples */ 200 double floatFormat; /* constant 1234567.0 */ 201 #define FLOATFORMAT_VALUE 1234567.0 202 203 /* 204 * This data is used to make sure that configuration of this database is 205 * compatible with the backend executable. 206 */ 207 uint32 blcksz; /* data block size for this DB */ 208 uint32 relseg_size; /* blocks per segment of large relation */ 209 210 uint32 xlog_blcksz; /* block size within WAL files */ 211 uint32 xlog_seg_size; /* size of each WAL segment */ 212 213 uint32 nameDataLen; /* catalog name field width */ 214 uint32 indexMaxKeys; /* max number of columns in an index */ 215 216 uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ 217 uint32 loblksize; /* chunk size in pg_largeobject */ 218 219 /* flags indicating pass-by-value status of various types */ 220 bool float4ByVal; /* float4 pass-by-value? */ 221 bool float8ByVal; /* float8, int8, etc pass-by-value? */ 222 223 /* Are data pages protected by checksums? Zero if no checksum version */ 224 uint32 data_checksum_version; 225 226 /* 227 * Random nonce, used in authentication requests that need to proceed 228 * based on values that are cluster-unique, like a SASL exchange that 229 * failed at an early stage. 230 */ 231 char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]; 232 233 /* CRC of all above ... MUST BE LAST! */ 234 pg_crc32c crc; 235 } ControlFileData; 236 237 /* 238 * Maximum safe value of sizeof(ControlFileData). For reliability's sake, 239 * it's critical that pg_control updates be atomic writes. That generally 240 * means the active data can't be more than one disk sector, which is 512 241 * bytes on common hardware. Be very careful about raising this limit. 242 */ 243 #define PG_CONTROL_MAX_SAFE_SIZE 512 244 245 /* 246 * Physical size of the pg_control file. Note that this is considerably 247 * bigger than the actually used size (ie, sizeof(ControlFileData)). 248 * The idea is to keep the physical size constant independent of format 249 * changes, so that ReadControlFile will deliver a suitable wrong-version 250 * message instead of a read error if it's looking at an incompatible file. 251 */ 252 #define PG_CONTROL_FILE_SIZE 8192 253 254 #endif /* PG_CONTROL_H */ 255