1 /*-------------------------------------------------------------------------
2  *
3  * pg_control.h
4  *	  The system control file "pg_control" is not a heap relation.
5  *	  However, we define it here so that the format is documented.
6  *
7  *
8  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/include/catalog/pg_control.h
12  *
13  *-------------------------------------------------------------------------
14  */
15 #ifndef PG_CONTROL_H
16 #define PG_CONTROL_H
17 
18 #include "access/xlogdefs.h"
19 #include "pgtime.h"				/* for pg_time_t */
20 #include "port/pg_crc32c.h"
21 
22 
23 /* Version identifier for this pg_control format */
24 #define PG_CONTROL_VERSION	1100
25 
26 /* Nonce key length, see below */
27 #define MOCK_AUTH_NONCE_LEN		32
28 
29 /*
30  * Body of CheckPoint XLOG records.  This is declared here because we keep
31  * a copy of the latest one in pg_control for possible disaster recovery.
32  * Changing this struct requires a PG_CONTROL_VERSION bump.
33  */
34 typedef struct CheckPoint
35 {
36 	XLogRecPtr	redo;			/* next RecPtr available when we began to
37 								 * create CheckPoint (i.e. REDO start point) */
38 	TimeLineID	ThisTimeLineID; /* current TLI */
39 	TimeLineID	PrevTimeLineID; /* previous TLI, if this record begins a new
40 								 * timeline (equals ThisTimeLineID otherwise) */
41 	bool		fullPageWrites; /* current full_page_writes */
42 	uint32		nextXidEpoch;	/* higher-order bits of nextXid */
43 	TransactionId nextXid;		/* next free XID */
44 	Oid			nextOid;		/* next free OID */
45 	MultiXactId nextMulti;		/* next free MultiXactId */
46 	MultiXactOffset nextMultiOffset;	/* next free MultiXact offset */
47 	TransactionId oldestXid;	/* cluster-wide minimum datfrozenxid */
48 	Oid			oldestXidDB;	/* database with minimum datfrozenxid */
49 	MultiXactId oldestMulti;	/* cluster-wide minimum datminmxid */
50 	Oid			oldestMultiDB;	/* database with minimum datminmxid */
51 	pg_time_t	time;			/* time stamp of checkpoint */
52 	TransactionId oldestCommitTsXid;	/* oldest Xid with valid commit
53 										 * timestamp */
54 	TransactionId newestCommitTsXid;	/* newest Xid with valid commit
55 										 * timestamp */
56 
57 	/*
58 	 * Oldest XID still running. This is only needed to initialize hot standby
59 	 * mode from an online checkpoint, so we only bother calculating this for
60 	 * online checkpoints and only when wal_level is replica. Otherwise it's
61 	 * set to InvalidTransactionId.
62 	 */
63 	TransactionId oldestActiveXid;
64 } CheckPoint;
65 
66 /* XLOG info values for XLOG rmgr */
67 #define XLOG_CHECKPOINT_SHUTDOWN		0x00
68 #define XLOG_CHECKPOINT_ONLINE			0x10
69 #define XLOG_NOOP						0x20
70 #define XLOG_NEXTOID					0x30
71 #define XLOG_SWITCH						0x40
72 #define XLOG_BACKUP_END					0x50
73 #define XLOG_PARAMETER_CHANGE			0x60
74 #define XLOG_RESTORE_POINT				0x70
75 #define XLOG_FPW_CHANGE					0x80
76 #define XLOG_END_OF_RECOVERY			0x90
77 #define XLOG_FPI_FOR_HINT				0xA0
78 #define XLOG_FPI						0xB0
79 #define XLOG_FPI_MULTI					0xC0
80 #define XLOG_OVERWRITE_CONTRECORD		0xD0
81 
82 
83 /*
84  * System status indicator.  Note this is stored in pg_control; if you change
85  * it, you must bump PG_CONTROL_VERSION
86  */
87 typedef enum DBState
88 {
89 	DB_STARTUP = 0,
90 	DB_SHUTDOWNED,
91 	DB_SHUTDOWNED_IN_RECOVERY,
92 	DB_SHUTDOWNING,
93 	DB_IN_CRASH_RECOVERY,
94 	DB_IN_ARCHIVE_RECOVERY,
95 	DB_IN_PRODUCTION
96 } DBState;
97 
98 /*
99  * Contents of pg_control.
100  */
101 
102 typedef struct ControlFileData
103 {
104 	/*
105 	 * Unique system identifier --- to ensure we match up xlog files with the
106 	 * installation that produced them.
107 	 */
108 	uint64		system_identifier;
109 
110 	/*
111 	 * Version identifier information.  Keep these fields at the same offset,
112 	 * especially pg_control_version; they won't be real useful if they move
113 	 * around.  (For historical reasons they must be 8 bytes into the file
114 	 * rather than immediately at the front.)
115 	 *
116 	 * pg_control_version identifies the format of pg_control itself.
117 	 * catalog_version_no identifies the format of the system catalogs.
118 	 *
119 	 * There are additional version identifiers in individual files; for
120 	 * example, WAL logs contain per-page magic numbers that can serve as
121 	 * version cues for the WAL log.
122 	 */
123 	uint32		pg_control_version; /* PG_CONTROL_VERSION */
124 	uint32		catalog_version_no; /* see catversion.h */
125 
126 	/*
127 	 * System status data
128 	 */
129 	DBState		state;			/* see enum above */
130 	pg_time_t	time;			/* time stamp of last pg_control update */
131 	XLogRecPtr	checkPoint;		/* last check point record ptr */
132 
133 	CheckPoint	checkPointCopy; /* copy of last check point record */
134 
135 	XLogRecPtr	unloggedLSN;	/* current fake LSN value, for unlogged rels */
136 
137 	/*
138 	 * These two values determine the minimum point we must recover up to
139 	 * before starting up:
140 	 *
141 	 * minRecoveryPoint is updated to the latest replayed LSN whenever we
142 	 * flush a data change during archive recovery. That guards against
143 	 * starting archive recovery, aborting it, and restarting with an earlier
144 	 * stop location. If we've already flushed data changes from WAL record X
145 	 * to disk, we mustn't start up until we reach X again. Zero when not
146 	 * doing archive recovery.
147 	 *
148 	 * backupStartPoint is the redo pointer of the backup start checkpoint, if
149 	 * we are recovering from an online backup and haven't reached the end of
150 	 * backup yet. It is reset to zero when the end of backup is reached, and
151 	 * we mustn't start up before that. A boolean would suffice otherwise, but
152 	 * we use the redo pointer as a cross-check when we see an end-of-backup
153 	 * record, to make sure the end-of-backup record corresponds the base
154 	 * backup we're recovering from.
155 	 *
156 	 * backupEndPoint is the backup end location, if we are recovering from an
157 	 * online backup which was taken from the standby and haven't reached the
158 	 * end of backup yet. It is initialized to the minimum recovery point in
159 	 * pg_control which was backed up last. It is reset to zero when the end
160 	 * of backup is reached, and we mustn't start up before that.
161 	 *
162 	 * If backupEndRequired is true, we know for sure that we're restoring
163 	 * from a backup, and must see a backup-end record before we can safely
164 	 * start up. If it's false, but backupStartPoint is set, a backup_label
165 	 * file was found at startup but it may have been a leftover from a stray
166 	 * pg_start_backup() call, not accompanied by pg_stop_backup().
167 	 */
168 	XLogRecPtr	minRecoveryPoint;
169 	TimeLineID	minRecoveryPointTLI;
170 	XLogRecPtr	backupStartPoint;
171 	XLogRecPtr	backupEndPoint;
172 	bool		backupEndRequired;
173 
174 	/*
175 	 * Parameter settings that determine if the WAL can be used for archival
176 	 * or hot standby.
177 	 */
178 	int			wal_level;
179 	bool		wal_log_hints;
180 	int			MaxConnections;
181 	int			max_worker_processes;
182 	int			max_prepared_xacts;
183 	int			max_locks_per_xact;
184 	bool		track_commit_timestamp;
185 
186 	/*
187 	 * This data is used to check for hardware-architecture compatibility of
188 	 * the database and the backend executable.  We need not check endianness
189 	 * explicitly, since the pg_control version will surely look wrong to a
190 	 * machine of different endianness, but we do need to worry about MAXALIGN
191 	 * and floating-point format.  (Note: storage layout nominally also
192 	 * depends on SHORTALIGN and INTALIGN, but in practice these are the same
193 	 * on all architectures of interest.)
194 	 *
195 	 * Testing just one double value is not a very bulletproof test for
196 	 * floating-point compatibility, but it will catch most cases.
197 	 */
198 	uint32		maxAlign;		/* alignment requirement for tuples */
199 	double		floatFormat;	/* constant 1234567.0 */
200 #define FLOATFORMAT_VALUE	1234567.0
201 
202 	/*
203 	 * This data is used to make sure that configuration of this database is
204 	 * compatible with the backend executable.
205 	 */
206 	uint32		blcksz;			/* data block size for this DB */
207 	uint32		relseg_size;	/* blocks per segment of large relation */
208 
209 	uint32		xlog_blcksz;	/* block size within WAL files */
210 	uint32		xlog_seg_size;	/* size of each WAL segment */
211 
212 	uint32		nameDataLen;	/* catalog name field width */
213 	uint32		indexMaxKeys;	/* max number of columns in an index */
214 
215 	uint32		toast_max_chunk_size;	/* chunk size in TOAST tables */
216 	uint32		loblksize;		/* chunk size in pg_largeobject */
217 
218 	/* flags indicating pass-by-value status of various types */
219 	bool		float4ByVal;	/* float4 pass-by-value? */
220 	bool		float8ByVal;	/* float8, int8, etc pass-by-value? */
221 
222 	/* Are data pages protected by checksums? Zero if no checksum version */
223 	uint32		data_checksum_version;
224 
225 	/*
226 	 * Random nonce, used in authentication requests that need to proceed
227 	 * based on values that are cluster-unique, like a SASL exchange that
228 	 * failed at an early stage.
229 	 */
230 	char		mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
231 
232 	/* CRC of all above ... MUST BE LAST! */
233 	pg_crc32c	crc;
234 } ControlFileData;
235 
236 /*
237  * Maximum safe value of sizeof(ControlFileData).  For reliability's sake,
238  * it's critical that pg_control updates be atomic writes.  That generally
239  * means the active data can't be more than one disk sector, which is 512
240  * bytes on common hardware.  Be very careful about raising this limit.
241  */
242 #define PG_CONTROL_MAX_SAFE_SIZE	512
243 
244 /*
245  * Physical size of the pg_control file.  Note that this is considerably
246  * bigger than the actually used size (ie, sizeof(ControlFileData)).
247  * The idea is to keep the physical size constant independent of format
248  * changes, so that ReadControlFile will deliver a suitable wrong-version
249  * message instead of a read error if it's looking at an incompatible file.
250  */
251 #define PG_CONTROL_FILE_SIZE		8192
252 
253 #endif							/* PG_CONTROL_H */
254