1 /*
2  * xlog.h
3  *
4  * PostgreSQL write-ahead log manager
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/include/access/xlog.h
10  */
11 #ifndef XLOG_H
12 #define XLOG_H
13 
14 #include "access/rmgr.h"
15 #include "access/xlogdefs.h"
16 #include "access/xloginsert.h"
17 #include "access/xlogreader.h"
18 #include "datatype/timestamp.h"
19 #include "lib/stringinfo.h"
20 #include "nodes/pg_list.h"
21 #include "storage/fd.h"
22 
23 
24 /* Sync methods */
25 #define SYNC_METHOD_FSYNC		0
26 #define SYNC_METHOD_FDATASYNC	1
27 #define SYNC_METHOD_OPEN		2	/* for O_SYNC */
28 #define SYNC_METHOD_FSYNC_WRITETHROUGH	3
29 #define SYNC_METHOD_OPEN_DSYNC	4	/* for O_DSYNC */
30 extern int	sync_method;
31 
32 extern PGDLLIMPORT TimeLineID ThisTimeLineID;	/* current TLI */
33 
34 /*
35  * Prior to 8.4, all activity during recovery was carried out by the startup
36  * process. This local variable continues to be used in many parts of the
37  * code to indicate actions taken by RecoveryManagers. Other processes that
38  * potentially perform work during recovery should check RecoveryInProgress().
39  * See XLogCtl notes in xlog.c.
40  */
41 extern bool InRecovery;
42 
43 /*
44  * Like InRecovery, standbyState is only valid in the startup process.
45  * In all other processes it will have the value STANDBY_DISABLED (so
46  * InHotStandby will read as FALSE).
47  *
48  * In DISABLED state, we're performing crash recovery or hot standby was
49  * disabled in postgresql.conf.
50  *
51  * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
52  * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
53  * to initialize our master-transaction tracking system.
54  *
55  * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
56  * state. The tracked information might still be incomplete, so we can't allow
57  * connections yet, but redo functions must update the in-memory state when
58  * appropriate.
59  *
60  * In SNAPSHOT_READY mode, we have full knowledge of transactions that are
61  * (or were) running in the master at the current WAL location. Snapshots
62  * can be taken, and read-only queries can be run.
63  */
64 typedef enum
65 {
66 	STANDBY_DISABLED,
67 	STANDBY_INITIALIZED,
68 	STANDBY_SNAPSHOT_PENDING,
69 	STANDBY_SNAPSHOT_READY
70 } HotStandbyState;
71 
72 extern HotStandbyState standbyState;
73 
74 #define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
75 
76 /*
77  * Recovery target type.
78  * Only set during a Point in Time recovery, not when standby_mode = on
79  */
80 typedef enum
81 {
82 	RECOVERY_TARGET_UNSET,
83 	RECOVERY_TARGET_XID,
84 	RECOVERY_TARGET_TIME,
85 	RECOVERY_TARGET_NAME,
86 	RECOVERY_TARGET_LSN,
87 	RECOVERY_TARGET_IMMEDIATE
88 } RecoveryTargetType;
89 
90 extern XLogRecPtr ProcLastRecPtr;
91 extern XLogRecPtr XactLastRecEnd;
92 extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
93 
94 extern bool reachedConsistency;
95 
96 /* these variables are GUC parameters related to XLOG */
97 extern int	min_wal_size_mb;
98 extern int	max_wal_size_mb;
99 extern int	wal_keep_segments;
100 extern int	XLOGbuffers;
101 extern int	XLogArchiveTimeout;
102 extern int	wal_retrieve_retry_interval;
103 extern char *XLogArchiveCommand;
104 extern bool EnableHotStandby;
105 extern bool fullPageWrites;
106 extern bool wal_log_hints;
107 extern bool wal_compression;
108 extern bool *wal_consistency_checking;
109 extern char *wal_consistency_checking_string;
110 extern bool log_checkpoints;
111 
112 extern int	CheckPointSegments;
113 
114 /* Archive modes */
115 typedef enum ArchiveMode
116 {
117 	ARCHIVE_MODE_OFF = 0,		/* disabled */
118 	ARCHIVE_MODE_ON,			/* enabled while server is running normally */
119 	ARCHIVE_MODE_ALWAYS			/* enabled always (even during recovery) */
120 } ArchiveMode;
121 extern int	XLogArchiveMode;
122 
123 /* WAL levels */
124 typedef enum WalLevel
125 {
126 	WAL_LEVEL_MINIMAL = 0,
127 	WAL_LEVEL_REPLICA,
128 	WAL_LEVEL_LOGICAL
129 } WalLevel;
130 
131 /* Recovery states */
132 typedef enum RecoveryState
133 {
134 	RECOVERY_STATE_CRASH = 0,	/* crash recovery */
135 	RECOVERY_STATE_ARCHIVE,		/* archive recovery */
136 	RECOVERY_STATE_DONE			/* currently in production */
137 } RecoveryState;
138 
139 extern PGDLLIMPORT int wal_level;
140 
141 /* Is WAL archiving enabled (always or only while server is running normally)? */
142 #define XLogArchivingActive() \
143 	(AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
144 /* Is WAL archiving enabled always (even during recovery)? */
145 #define XLogArchivingAlways() \
146 	(AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
147 #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
148 
149 /*
150  * Is WAL-logging necessary for archival or log-shipping, or can we skip
151  * WAL-logging if we fsync() the data before committing instead?
152  */
153 #define XLogIsNeeded() (wal_level >= WAL_LEVEL_REPLICA)
154 
155 /*
156  * Is a full-page image needed for hint bit updates?
157  *
158  * Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
159  * we have to protect them against torn page writes.  When you only set
160  * individual bits on a page, it's still consistent no matter what combination
161  * of the bits make it to disk, but the checksum wouldn't match.  Also WAL-log
162  * them if forced by wal_log_hints=on.
163  */
164 #define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
165 
166 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
167 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
168 
169 /* Do we need to WAL-log information required only for logical replication? */
170 #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL)
171 
172 #ifdef WAL_DEBUG
173 extern bool XLOG_DEBUG;
174 #endif
175 
176 /*
177  * OR-able request flag bits for checkpoints.  The "cause" bits are used only
178  * for logging purposes.  Note: the flags must be defined so that it's
179  * sensible to OR together request flags arising from different requestors.
180  */
181 
182 /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
183 #define CHECKPOINT_IS_SHUTDOWN	0x0001	/* Checkpoint is for shutdown */
184 #define CHECKPOINT_END_OF_RECOVERY	0x0002	/* Like shutdown checkpoint, but
185 											 * issued at end of WAL recovery */
186 #define CHECKPOINT_IMMEDIATE	0x0004	/* Do it without delays */
187 #define CHECKPOINT_FORCE		0x0008	/* Force even if no activity */
188 #define CHECKPOINT_FLUSH_ALL	0x0010	/* Flush all pages, including those
189 										 * belonging to unlogged tables */
190 /* These are important to RequestCheckpoint */
191 #define CHECKPOINT_WAIT			0x0020	/* Wait for completion */
192 /* These indicate the cause of a checkpoint request */
193 #define CHECKPOINT_CAUSE_XLOG	0x0040	/* XLOG consumption */
194 #define CHECKPOINT_CAUSE_TIME	0x0080	/* Elapsed time */
195 /* We set this to ensure that ckpt_flags is not 0 if a request has been made */
196 #define CHECKPOINT_REQUESTED	0x0100	/* Checkpoint request has been made */
197 
198 /*
199  * Flag bits for the record being inserted, set using XLogSetRecordFlags().
200  */
201 #define XLOG_INCLUDE_ORIGIN		0x01	/* include the replication origin */
202 #define XLOG_MARK_UNIMPORTANT	0x02	/* record not important for durability */
203 
204 
205 /* Checkpoint statistics */
206 typedef struct CheckpointStatsData
207 {
208 	TimestampTz ckpt_start_t;	/* start of checkpoint */
209 	TimestampTz ckpt_write_t;	/* start of flushing buffers */
210 	TimestampTz ckpt_sync_t;	/* start of fsyncs */
211 	TimestampTz ckpt_sync_end_t;	/* end of fsyncs */
212 	TimestampTz ckpt_end_t;		/* end of checkpoint */
213 
214 	int			ckpt_bufs_written;	/* # of buffers written */
215 
216 	int			ckpt_segs_added;	/* # of new xlog segments created */
217 	int			ckpt_segs_removed;	/* # of xlog segments deleted */
218 	int			ckpt_segs_recycled; /* # of xlog segments recycled */
219 
220 	int			ckpt_sync_rels; /* # of relations synced */
221 	uint64		ckpt_longest_sync;	/* Longest sync for one relation */
222 	uint64		ckpt_agg_sync_time; /* The sum of all the individual sync
223 									 * times, which is not necessarily the
224 									 * same as the total elapsed time for the
225 									 * entire sync phase. */
226 } CheckpointStatsData;
227 
228 extern CheckpointStatsData CheckpointStats;
229 
230 struct XLogRecData;
231 
232 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
233 				 XLogRecPtr fpw_lsn,
234 				 uint8 flags);
235 extern void XLogFlush(XLogRecPtr RecPtr);
236 extern bool XLogBackgroundFlush(void);
237 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
238 extern int	XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
239 extern int	XLogFileOpen(XLogSegNo segno);
240 
241 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
242 extern XLogSegNo XLogGetLastRemovedSegno(void);
243 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
244 extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
245 
246 extern void xlog_redo(XLogReaderState *record);
247 extern void xlog_desc(StringInfo buf, XLogReaderState *record);
248 extern const char *xlog_identify(uint8 info);
249 
250 extern void issue_xlog_fsync(int fd, XLogSegNo segno);
251 
252 extern bool RecoveryInProgress(void);
253 extern RecoveryState GetRecoveryState(void);
254 extern bool HotStandbyActive(void);
255 extern bool HotStandbyActiveInReplay(void);
256 extern bool XLogInsertAllowed(void);
257 extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
258 extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
259 extern XLogRecPtr GetXLogInsertRecPtr(void);
260 extern XLogRecPtr GetXLogWriteRecPtr(void);
261 extern bool RecoveryIsPaused(void);
262 extern void SetRecoveryPause(bool recoveryPause);
263 extern TimestampTz GetLatestXTime(void);
264 extern TimestampTz GetCurrentChunkReplayStartTime(void);
265 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
266 
267 extern void UpdateControlFile(void);
268 extern uint64 GetSystemIdentifier(void);
269 extern char *GetMockAuthenticationNonce(void);
270 extern bool DataChecksumsEnabled(void);
271 extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
272 extern Size XLOGShmemSize(void);
273 extern void XLOGShmemInit(void);
274 extern void BootStrapXLOG(void);
275 extern void StartupXLOG(void);
276 extern void ShutdownXLOG(int code, Datum arg);
277 extern void InitXLOGAccess(void);
278 extern void CreateCheckPoint(int flags);
279 extern bool CreateRestartPoint(int flags);
280 extern void XLogPutNextOid(Oid nextOid);
281 extern XLogRecPtr XLogRestorePoint(const char *rpName);
282 extern void UpdateFullPageWrites(void);
283 extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
284 extern XLogRecPtr GetRedoRecPtr(void);
285 extern XLogRecPtr GetInsertRecPtr(void);
286 extern XLogRecPtr GetFlushRecPtr(void);
287 extern XLogRecPtr GetLastImportantRecPtr(void);
288 extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
289 extern void RemovePromoteSignalFiles(void);
290 
291 extern bool CheckPromoteSignal(void);
292 extern void WakeupRecovery(void);
293 extern void SetWalWriterSleeping(bool sleeping);
294 
295 extern void XLogRequestWalReceiverReply(void);
296 
297 extern void assign_max_wal_size(int newval, void *extra);
298 extern void assign_checkpoint_completion_target(double newval, void *extra);
299 
300 /*
301  * Routines to start, stop, and get status of a base backup.
302  */
303 
304 /*
305  * Session-level status of base backups
306  *
307  * This is used in parallel with the shared memory status to control parallel
308  * execution of base backup functions for a given session, be it a backend
309  * dedicated to replication or a normal backend connected to a database. The
310  * update of the session-level status happens at the same time as the shared
311  * memory counters to keep a consistent global and local state of the backups
312  * running.
313  */
314 typedef enum SessionBackupState
315 {
316 	SESSION_BACKUP_NONE,
317 	SESSION_BACKUP_EXCLUSIVE,
318 	SESSION_BACKUP_NON_EXCLUSIVE
319 } SessionBackupState;
320 
321 extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
322 				   TimeLineID *starttli_p, StringInfo labelfile, DIR *tblspcdir,
323 				   List **tablespaces, StringInfo tblspcmapfile, bool infotbssize,
324 				   bool needtblspcmapfile);
325 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
326 				  TimeLineID *stoptli_p);
327 extern void do_pg_abort_backup(void);
328 extern SessionBackupState get_backup_status(void);
329 
330 /* File path names (all relative to $PGDATA) */
331 #define BACKUP_LABEL_FILE		"backup_label"
332 #define BACKUP_LABEL_OLD		"backup_label.old"
333 
334 #define TABLESPACE_MAP			"tablespace_map"
335 #define TABLESPACE_MAP_OLD		"tablespace_map.old"
336 
337 #endif							/* XLOG_H */
338