1 /* 2 * xlog.h 3 * 4 * PostgreSQL write-ahead log manager 5 * 6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * src/include/access/xlog.h 10 */ 11 #ifndef XLOG_H 12 #define XLOG_H 13 14 #include "access/rmgr.h" 15 #include "access/xlogdefs.h" 16 #include "access/xloginsert.h" 17 #include "access/xlogreader.h" 18 #include "datatype/timestamp.h" 19 #include "lib/stringinfo.h" 20 #include "nodes/pg_list.h" 21 #include "storage/fd.h" 22 23 24 /* Sync methods */ 25 #define SYNC_METHOD_FSYNC 0 26 #define SYNC_METHOD_FDATASYNC 1 27 #define SYNC_METHOD_OPEN 2 /* for O_SYNC */ 28 #define SYNC_METHOD_FSYNC_WRITETHROUGH 3 29 #define SYNC_METHOD_OPEN_DSYNC 4 /* for O_DSYNC */ 30 extern int sync_method; 31 32 extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */ 33 34 /* 35 * Prior to 8.4, all activity during recovery was carried out by the startup 36 * process. This local variable continues to be used in many parts of the 37 * code to indicate actions taken by RecoveryManagers. Other processes that 38 * potentially perform work during recovery should check RecoveryInProgress(). 39 * See XLogCtl notes in xlog.c. 40 */ 41 extern bool InRecovery; 42 43 /* 44 * Like InRecovery, standbyState is only valid in the startup process. 45 * In all other processes it will have the value STANDBY_DISABLED (so 46 * InHotStandby will read as FALSE). 47 * 48 * In DISABLED state, we're performing crash recovery or hot standby was 49 * disabled in postgresql.conf. 50 * 51 * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but 52 * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record 53 * to initialize our master-transaction tracking system. 54 * 55 * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING 56 * state. The tracked information might still be incomplete, so we can't allow 57 * connections yet, but redo functions must update the in-memory state when 58 * appropriate. 59 * 60 * In SNAPSHOT_READY mode, we have full knowledge of transactions that are 61 * (or were) running in the master at the current WAL location. Snapshots 62 * can be taken, and read-only queries can be run. 63 */ 64 typedef enum 65 { 66 STANDBY_DISABLED, 67 STANDBY_INITIALIZED, 68 STANDBY_SNAPSHOT_PENDING, 69 STANDBY_SNAPSHOT_READY 70 } HotStandbyState; 71 72 extern HotStandbyState standbyState; 73 74 #define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING) 75 76 /* 77 * Recovery target type. 78 * Only set during a Point in Time recovery, not when standby_mode = on 79 */ 80 typedef enum 81 { 82 RECOVERY_TARGET_UNSET, 83 RECOVERY_TARGET_XID, 84 RECOVERY_TARGET_TIME, 85 RECOVERY_TARGET_NAME, 86 RECOVERY_TARGET_LSN, 87 RECOVERY_TARGET_IMMEDIATE 88 } RecoveryTargetType; 89 90 extern XLogRecPtr ProcLastRecPtr; 91 extern XLogRecPtr XactLastRecEnd; 92 extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd; 93 94 extern bool reachedConsistency; 95 96 /* these variables are GUC parameters related to XLOG */ 97 extern int min_wal_size_mb; 98 extern int max_wal_size_mb; 99 extern int wal_keep_segments; 100 extern int XLOGbuffers; 101 extern int XLogArchiveTimeout; 102 extern int wal_retrieve_retry_interval; 103 extern char *XLogArchiveCommand; 104 extern bool EnableHotStandby; 105 extern bool fullPageWrites; 106 extern bool wal_log_hints; 107 extern bool wal_compression; 108 extern bool *wal_consistency_checking; 109 extern char *wal_consistency_checking_string; 110 extern bool log_checkpoints; 111 112 extern int CheckPointSegments; 113 114 /* Archive modes */ 115 typedef enum ArchiveMode 116 { 117 ARCHIVE_MODE_OFF = 0, /* disabled */ 118 ARCHIVE_MODE_ON, /* enabled while server is running normally */ 119 ARCHIVE_MODE_ALWAYS /* enabled always (even during recovery) */ 120 } ArchiveMode; 121 extern int XLogArchiveMode; 122 123 /* WAL levels */ 124 typedef enum WalLevel 125 { 126 WAL_LEVEL_MINIMAL = 0, 127 WAL_LEVEL_REPLICA, 128 WAL_LEVEL_LOGICAL 129 } WalLevel; 130 131 /* Recovery states */ 132 typedef enum RecoveryState 133 { 134 RECOVERY_STATE_CRASH = 0, /* crash recovery */ 135 RECOVERY_STATE_ARCHIVE, /* archive recovery */ 136 RECOVERY_STATE_DONE /* currently in production */ 137 } RecoveryState; 138 139 extern PGDLLIMPORT int wal_level; 140 141 /* Is WAL archiving enabled (always or only while server is running normally)? */ 142 #define XLogArchivingActive() \ 143 (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF) 144 /* Is WAL archiving enabled always (even during recovery)? */ 145 #define XLogArchivingAlways() \ 146 (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS) 147 #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0') 148 149 /* 150 * Is WAL-logging necessary for archival or log-shipping, or can we skip 151 * WAL-logging if we fsync() the data before committing instead? 152 */ 153 #define XLogIsNeeded() (wal_level >= WAL_LEVEL_REPLICA) 154 155 /* 156 * Is a full-page image needed for hint bit updates? 157 * 158 * Normally, we don't WAL-log hint bit updates, but if checksums are enabled, 159 * we have to protect them against torn page writes. When you only set 160 * individual bits on a page, it's still consistent no matter what combination 161 * of the bits make it to disk, but the checksum wouldn't match. Also WAL-log 162 * them if forced by wal_log_hints=on. 163 */ 164 #define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints) 165 166 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ 167 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) 168 169 /* Do we need to WAL-log information required only for logical replication? */ 170 #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL) 171 172 #ifdef WAL_DEBUG 173 extern bool XLOG_DEBUG; 174 #endif 175 176 /* 177 * OR-able request flag bits for checkpoints. The "cause" bits are used only 178 * for logging purposes. Note: the flags must be defined so that it's 179 * sensible to OR together request flags arising from different requestors. 180 */ 181 182 /* These directly affect the behavior of CreateCheckPoint and subsidiaries */ 183 #define CHECKPOINT_IS_SHUTDOWN 0x0001 /* Checkpoint is for shutdown */ 184 #define CHECKPOINT_END_OF_RECOVERY 0x0002 /* Like shutdown checkpoint, but 185 * issued at end of WAL recovery */ 186 #define CHECKPOINT_IMMEDIATE 0x0004 /* Do it without delays */ 187 #define CHECKPOINT_FORCE 0x0008 /* Force even if no activity */ 188 #define CHECKPOINT_FLUSH_ALL 0x0010 /* Flush all pages, including those 189 * belonging to unlogged tables */ 190 /* These are important to RequestCheckpoint */ 191 #define CHECKPOINT_WAIT 0x0020 /* Wait for completion */ 192 /* These indicate the cause of a checkpoint request */ 193 #define CHECKPOINT_CAUSE_XLOG 0x0040 /* XLOG consumption */ 194 #define CHECKPOINT_CAUSE_TIME 0x0080 /* Elapsed time */ 195 /* We set this to ensure that ckpt_flags is not 0 if a request has been made */ 196 #define CHECKPOINT_REQUESTED 0x0100 /* Checkpoint request has been made */ 197 198 /* 199 * Flag bits for the record being inserted, set using XLogSetRecordFlags(). 200 */ 201 #define XLOG_INCLUDE_ORIGIN 0x01 /* include the replication origin */ 202 #define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */ 203 204 205 /* Checkpoint statistics */ 206 typedef struct CheckpointStatsData 207 { 208 TimestampTz ckpt_start_t; /* start of checkpoint */ 209 TimestampTz ckpt_write_t; /* start of flushing buffers */ 210 TimestampTz ckpt_sync_t; /* start of fsyncs */ 211 TimestampTz ckpt_sync_end_t; /* end of fsyncs */ 212 TimestampTz ckpt_end_t; /* end of checkpoint */ 213 214 int ckpt_bufs_written; /* # of buffers written */ 215 216 int ckpt_segs_added; /* # of new xlog segments created */ 217 int ckpt_segs_removed; /* # of xlog segments deleted */ 218 int ckpt_segs_recycled; /* # of xlog segments recycled */ 219 220 int ckpt_sync_rels; /* # of relations synced */ 221 uint64 ckpt_longest_sync; /* Longest sync for one relation */ 222 uint64 ckpt_agg_sync_time; /* The sum of all the individual sync 223 * times, which is not necessarily the 224 * same as the total elapsed time for the 225 * entire sync phase. */ 226 } CheckpointStatsData; 227 228 extern CheckpointStatsData CheckpointStats; 229 230 struct XLogRecData; 231 232 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, 233 XLogRecPtr fpw_lsn, 234 uint8 flags); 235 extern void XLogFlush(XLogRecPtr RecPtr); 236 extern bool XLogBackgroundFlush(void); 237 extern bool XLogNeedsFlush(XLogRecPtr RecPtr); 238 extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock); 239 extern int XLogFileOpen(XLogSegNo segno); 240 241 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli); 242 extern XLogSegNo XLogGetLastRemovedSegno(void); 243 extern void XLogSetAsyncXactLSN(XLogRecPtr record); 244 extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn); 245 246 extern void xlog_redo(XLogReaderState *record); 247 extern void xlog_desc(StringInfo buf, XLogReaderState *record); 248 extern const char *xlog_identify(uint8 info); 249 250 extern void issue_xlog_fsync(int fd, XLogSegNo segno); 251 252 extern bool RecoveryInProgress(void); 253 extern RecoveryState GetRecoveryState(void); 254 extern bool HotStandbyActive(void); 255 extern bool HotStandbyActiveInReplay(void); 256 extern bool XLogInsertAllowed(void); 257 extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); 258 extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI); 259 extern XLogRecPtr GetXLogInsertRecPtr(void); 260 extern XLogRecPtr GetXLogWriteRecPtr(void); 261 extern bool RecoveryIsPaused(void); 262 extern void SetRecoveryPause(bool recoveryPause); 263 extern TimestampTz GetLatestXTime(void); 264 extern TimestampTz GetCurrentChunkReplayStartTime(void); 265 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno); 266 267 extern void UpdateControlFile(void); 268 extern uint64 GetSystemIdentifier(void); 269 extern char *GetMockAuthenticationNonce(void); 270 extern bool DataChecksumsEnabled(void); 271 extern XLogRecPtr GetFakeLSNForUnloggedRel(void); 272 extern Size XLOGShmemSize(void); 273 extern void XLOGShmemInit(void); 274 extern void BootStrapXLOG(void); 275 extern void StartupXLOG(void); 276 extern void ShutdownXLOG(int code, Datum arg); 277 extern void InitXLOGAccess(void); 278 extern void CreateCheckPoint(int flags); 279 extern bool CreateRestartPoint(int flags); 280 extern void XLogPutNextOid(Oid nextOid); 281 extern XLogRecPtr XLogRestorePoint(const char *rpName); 282 extern void UpdateFullPageWrites(void); 283 extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); 284 extern XLogRecPtr GetRedoRecPtr(void); 285 extern XLogRecPtr GetInsertRecPtr(void); 286 extern XLogRecPtr GetFlushRecPtr(void); 287 extern XLogRecPtr GetLastImportantRecPtr(void); 288 extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); 289 extern void RemovePromoteSignalFiles(void); 290 291 extern bool CheckPromoteSignal(void); 292 extern void WakeupRecovery(void); 293 extern void SetWalWriterSleeping(bool sleeping); 294 295 extern void XLogRequestWalReceiverReply(void); 296 297 extern void assign_max_wal_size(int newval, void *extra); 298 extern void assign_checkpoint_completion_target(double newval, void *extra); 299 300 /* 301 * Routines to start, stop, and get status of a base backup. 302 */ 303 304 /* 305 * Session-level status of base backups 306 * 307 * This is used in parallel with the shared memory status to control parallel 308 * execution of base backup functions for a given session, be it a backend 309 * dedicated to replication or a normal backend connected to a database. The 310 * update of the session-level status happens at the same time as the shared 311 * memory counters to keep a consistent global and local state of the backups 312 * running. 313 */ 314 typedef enum SessionBackupState 315 { 316 SESSION_BACKUP_NONE, 317 SESSION_BACKUP_EXCLUSIVE, 318 SESSION_BACKUP_NON_EXCLUSIVE 319 } SessionBackupState; 320 321 extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, 322 TimeLineID *starttli_p, StringInfo labelfile, DIR *tblspcdir, 323 List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, 324 bool needtblspcmapfile); 325 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive, 326 TimeLineID *stoptli_p); 327 extern void do_pg_abort_backup(void); 328 extern SessionBackupState get_backup_status(void); 329 330 /* File path names (all relative to $PGDATA) */ 331 #define BACKUP_LABEL_FILE "backup_label" 332 #define BACKUP_LABEL_OLD "backup_label.old" 333 334 #define TABLESPACE_MAP "tablespace_map" 335 #define TABLESPACE_MAP_OLD "tablespace_map.old" 336 337 #endif /* XLOG_H */ 338