1 /*-------------------------------------------------------------------------
2  *
3  * xlogreader.h
4  *		Definitions for the generic XLog reading facility
5  *
6  * Portions Copyright (c) 2013-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		src/include/access/xlogreader.h
10  *
11  * NOTES
12  *		See the definition of the XLogReaderState struct for instructions on
13  *		how to use the XLogReader infrastructure.
14  *
15  *		The basic idea is to allocate an XLogReaderState via
16  *		XLogReaderAllocate(), position the reader to the first record with
17  *		XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18  *		until it returns NULL.
19  *
20  *		Callers supply a page_read callback if they want to call
21  *		XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22  *		otherwise.  The WALRead function can be used as a helper to write
23  *		page_read callbacks, but it is not mandatory; callers that use it,
24  *		must supply segment_open callbacks.  The segment_close callback
25  *		must always be supplied.
26  *
27  *		After reading a record with XLogReadRecord(), it's decomposed into
28  *		the per-block and main data parts, and the parts can be accessed
29  *		with the XLogRec* macros and functions. You can also decode a
30  *		record that's already constructed in memory, without reading from
31  *		disk, by calling the DecodeXLogRecord() function.
32  *-------------------------------------------------------------------------
33  */
34 #ifndef XLOGREADER_H
35 #define XLOGREADER_H
36 
37 #ifndef FRONTEND
38 #include "access/transam.h"
39 #endif
40 
41 #include "access/xlogrecord.h"
42 
43 /* WALOpenSegment represents a WAL segment being read. */
44 typedef struct WALOpenSegment
45 {
46 	int			ws_file;		/* segment file descriptor */
47 	XLogSegNo	ws_segno;		/* segment number */
48 	TimeLineID	ws_tli;			/* timeline ID of the currently open file */
49 } WALOpenSegment;
50 
51 /* WALSegmentContext carries context information about WAL segments to read */
52 typedef struct WALSegmentContext
53 {
54 	char		ws_dir[MAXPGPATH];
55 	int			ws_segsize;
56 } WALSegmentContext;
57 
58 typedef struct XLogReaderState XLogReaderState;
59 
60 /* Function type definitions for various xlogreader interactions */
61 typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
62 							   XLogRecPtr targetPagePtr,
63 							   int reqLen,
64 							   XLogRecPtr targetRecPtr,
65 							   char *readBuf);
66 typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
67 								  XLogSegNo nextSegNo,
68 								  TimeLineID *tli_p);
69 typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
70 
71 typedef struct XLogReaderRoutine
72 {
73 	/*
74 	 * Data input callback
75 	 *
76 	 * This callback shall read at least reqLen valid bytes of the xlog page
77 	 * starting at targetPagePtr, and store them in readBuf.  The callback
78 	 * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
79 	 * -1 on failure.  The callback shall sleep, if necessary, to wait for the
80 	 * requested bytes to become available.  The callback will not be invoked
81 	 * again for the same page unless more than the returned number of bytes
82 	 * are needed.
83 	 *
84 	 * targetRecPtr is the position of the WAL record we're reading.  Usually
85 	 * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
86 	 * to read and verify the page or segment header, before it reads the
87 	 * actual WAL record it's interested in.  In that case, targetRecPtr can
88 	 * be used to determine which timeline to read the page from.
89 	 *
90 	 * The callback shall set ->seg.ws_tli to the TLI of the file the page was
91 	 * read from.
92 	 */
93 	XLogPageReadCB page_read;
94 
95 	/*
96 	 * Callback to open the specified WAL segment for reading.  ->seg.ws_file
97 	 * shall be set to the file descriptor of the opened segment.  In case of
98 	 * failure, an error shall be raised by the callback and it shall not
99 	 * return.
100 	 *
101 	 * "nextSegNo" is the number of the segment to be opened.
102 	 *
103 	 * "tli_p" is an input/output argument. WALRead() uses it to pass the
104 	 * timeline in which the new segment should be found, but the callback can
105 	 * use it to return the TLI that it actually opened.
106 	 */
107 	WALSegmentOpenCB segment_open;
108 
109 	/*
110 	 * WAL segment close callback.  ->seg.ws_file shall be set to a negative
111 	 * number.
112 	 */
113 	WALSegmentCloseCB segment_close;
114 } XLogReaderRoutine;
115 
116 #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
117 
118 typedef struct
119 {
120 	/* Is this block ref in use? */
121 	bool		in_use;
122 
123 	/* Identify the block this refers to */
124 	RelFileNode rnode;
125 	ForkNumber	forknum;
126 	BlockNumber blkno;
127 
128 	/* copy of the fork_flags field from the XLogRecordBlockHeader */
129 	uint8		flags;
130 
131 	/* Information on full-page image, if any */
132 	bool		has_image;		/* has image, even for consistency checking */
133 	bool		apply_image;	/* has image that should be restored */
134 	char	   *bkp_image;
135 	uint16		hole_offset;
136 	uint16		hole_length;
137 	uint16		bimg_len;
138 	uint8		bimg_info;
139 
140 	/* Buffer holding the rmgr-specific data associated with this block */
141 	bool		has_data;
142 	char	   *data;
143 	uint16		data_len;
144 	uint16		data_bufsz;
145 } DecodedBkpBlock;
146 
147 struct XLogReaderState
148 {
149 	/*
150 	 * Operational callbacks
151 	 */
152 	XLogReaderRoutine routine;
153 
154 	/* ----------------------------------------
155 	 * Public parameters
156 	 * ----------------------------------------
157 	 */
158 
159 	/*
160 	 * System identifier of the xlog files we're about to read.  Set to zero
161 	 * (the default value) if unknown or unimportant.
162 	 */
163 	uint64		system_identifier;
164 
165 	/*
166 	 * Opaque data for callbacks to use.  Not used by XLogReader.
167 	 */
168 	void	   *private_data;
169 
170 	/*
171 	 * Start and end point of last record read.  EndRecPtr is also used as the
172 	 * position to read next.  Calling XLogBeginRead() sets EndRecPtr to the
173 	 * starting position and ReadRecPtr to invalid.
174 	 */
175 	XLogRecPtr	ReadRecPtr;		/* start of last record read */
176 	XLogRecPtr	EndRecPtr;		/* end+1 of last record read */
177 
178 
179 	/* ----------------------------------------
180 	 * Decoded representation of current record
181 	 *
182 	 * Use XLogRecGet* functions to investigate the record; these fields
183 	 * should not be accessed directly.
184 	 * ----------------------------------------
185 	 */
186 	XLogRecord *decoded_record; /* currently decoded record */
187 
188 	char	   *main_data;		/* record's main data portion */
189 	uint32		main_data_len;	/* main data portion's length */
190 	uint32		main_data_bufsz;	/* allocated size of the buffer */
191 
192 	RepOriginId record_origin;
193 
194 	TransactionId toplevel_xid; /* XID of top-level transaction */
195 
196 	/* information about blocks referenced by the record. */
197 	DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
198 
199 	int			max_block_id;	/* highest block_id in use (-1 if none) */
200 
201 	/* ----------------------------------------
202 	 * private/internal state
203 	 * ----------------------------------------
204 	 */
205 
206 	/*
207 	 * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
208 	 * readLen bytes)
209 	 */
210 	char	   *readBuf;
211 	uint32		readLen;
212 
213 	/* last read XLOG position for data currently in readBuf */
214 	WALSegmentContext segcxt;
215 	WALOpenSegment seg;
216 	uint32		segoff;
217 
218 	/*
219 	 * beginning of prior page read, and its TLI.  Doesn't necessarily
220 	 * correspond to what's in readBuf; used for timeline sanity checks.
221 	 */
222 	XLogRecPtr	latestPagePtr;
223 	TimeLineID	latestPageTLI;
224 
225 	/* beginning of the WAL record being read. */
226 	XLogRecPtr	currRecPtr;
227 	/* timeline to read it from, 0 if a lookup is required */
228 	TimeLineID	currTLI;
229 
230 	/*
231 	 * Safe point to read to in currTLI if current TLI is historical
232 	 * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
233 	 *
234 	 * Actually set to the start of the segment containing the timeline switch
235 	 * that ends currTLI's validity, not the LSN of the switch its self, since
236 	 * we can't assume the old segment will be present.
237 	 */
238 	XLogRecPtr	currTLIValidUntil;
239 
240 	/*
241 	 * If currTLI is not the most recent known timeline, the next timeline to
242 	 * read from when currTLIValidUntil is reached.
243 	 */
244 	TimeLineID	nextTLI;
245 
246 	/*
247 	 * Buffer for current ReadRecord result (expandable), used when a record
248 	 * crosses a page boundary.
249 	 */
250 	char	   *readRecordBuf;
251 	uint32		readRecordBufSize;
252 
253 	/* Buffer to hold error message */
254 	char	   *errormsg_buf;
255 
256 	/*
257 	 * Set at the end of recovery: the start point of a partial record at the
258 	 * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
259 	 * location of its first contrecord that went missing.
260 	 */
261 	XLogRecPtr	abortedRecPtr;
262 	XLogRecPtr	missingContrecPtr;
263 	/* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
264 	XLogRecPtr	overwrittenRecPtr;
265 };
266 
267 /* Get a new XLogReader */
268 extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
269 										   const char *waldir,
270 										   XLogReaderRoutine *routine,
271 										   void *private_data);
272 extern XLogReaderRoutine *LocalXLogReaderRoutine(void);
273 
274 /* Free an XLogReader */
275 extern void XLogReaderFree(XLogReaderState *state);
276 
277 /* Position the XLogReader to given record */
278 extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
279 #ifdef FRONTEND
280 extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
281 #endif							/* FRONTEND */
282 
283 /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
284 extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
285 										 char **errormsg);
286 
287 /* Validate a page */
288 extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
289 										 XLogRecPtr recptr, char *phdr);
290 
291 /*
292  * Error information from WALRead that both backend and frontend caller can
293  * process.  Currently only errors from pg_pread can be reported.
294  */
295 typedef struct WALReadError
296 {
297 	int			wre_errno;		/* errno set by the last pg_pread() */
298 	int			wre_off;		/* Offset we tried to read from. */
299 	int			wre_req;		/* Bytes requested to be read. */
300 	int			wre_read;		/* Bytes read by the last read(). */
301 	WALOpenSegment wre_seg;		/* Segment we tried to read from. */
302 } WALReadError;
303 
304 extern bool WALRead(XLogReaderState *state,
305 					char *buf, XLogRecPtr startptr, Size count,
306 					TimeLineID tli, WALReadError *errinfo);
307 
308 /* Functions for decoding an XLogRecord */
309 
310 extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
311 							 char **errmsg);
312 
313 #define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
314 #define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
315 #define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
316 #define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
317 #define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
318 #define XLogRecGetOrigin(decoder) ((decoder)->record_origin)
319 #define XLogRecGetTopXid(decoder) ((decoder)->toplevel_xid)
320 #define XLogRecGetData(decoder) ((decoder)->main_data)
321 #define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
322 #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
323 #define XLogRecHasBlockRef(decoder, block_id) \
324 	((decoder)->blocks[block_id].in_use)
325 #define XLogRecHasBlockImage(decoder, block_id) \
326 	((decoder)->blocks[block_id].has_image)
327 #define XLogRecBlockImageApply(decoder, block_id) \
328 	((decoder)->blocks[block_id].apply_image)
329 
330 #ifndef FRONTEND
331 extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
332 #endif
333 
334 extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
335 extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
336 extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
337 							   RelFileNode *rnode, ForkNumber *forknum,
338 							   BlockNumber *blknum);
339 
340 #endif							/* XLOGREADER_H */
341