1 /*-------------------------------------------------------------------------
2  *
3  * nbtxlog.h
4  *	  header file for postgres btree xlog routines
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/include/access/nbtxlog.h
10  *
11  *-------------------------------------------------------------------------
12  */
13 #ifndef NBTXLOG_H
14 #define NBTXLOG_H
15 
16 #include "access/xlogreader.h"
17 #include "lib/stringinfo.h"
18 #include "storage/off.h"
19 
20 /*
21  * XLOG records for btree operations
22  *
23  * XLOG allows to store some information in high 4 bits of log
24  * record xl_info field
25  */
26 #define XLOG_BTREE_INSERT_LEAF	0x00	/* add index tuple without split */
27 #define XLOG_BTREE_INSERT_UPPER 0x10	/* same, on a non-leaf page */
28 #define XLOG_BTREE_INSERT_META	0x20	/* same, plus update metapage */
29 #define XLOG_BTREE_SPLIT_L		0x30	/* add index tuple with split */
30 #define XLOG_BTREE_SPLIT_R		0x40	/* as above, new item on right */
31 #define XLOG_BTREE_SPLIT_L_HIGHKEY 0x50 /* as above, include truncated highkey */
32 #define XLOG_BTREE_SPLIT_R_HIGHKEY 0x60 /* as above, include truncated highkey */
33 #define XLOG_BTREE_DELETE		0x70	/* delete leaf index tuples for a page */
34 #define XLOG_BTREE_UNLINK_PAGE	0x80	/* delete a half-dead page */
35 #define XLOG_BTREE_UNLINK_PAGE_META 0x90	/* same, and update metapage */
36 #define XLOG_BTREE_NEWROOT		0xA0	/* new root page */
37 #define XLOG_BTREE_MARK_PAGE_HALFDEAD 0xB0	/* mark a leaf as half-dead */
38 #define XLOG_BTREE_VACUUM		0xC0	/* delete entries on a page during
39 										 * vacuum */
40 #define XLOG_BTREE_REUSE_PAGE	0xD0	/* old page is about to be reused from
41 										 * FSM */
42 #define XLOG_BTREE_META_CLEANUP	0xE0	/* update cleanup-related data in the
43 										 * metapage */
44 
45 /*
46  * All that we need to regenerate the meta-data page
47  */
48 typedef struct xl_btree_metadata
49 {
50 	BlockNumber root;
51 	uint32		level;
52 	BlockNumber fastroot;
53 	uint32		fastlevel;
54 	TransactionId oldest_btpo_xact;
55 	float8		last_cleanup_num_heap_tuples;
56 } xl_btree_metadata;
57 
58 /*
59  * This is what we need to know about simple (without split) insert.
60  *
61  * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
62  * Note that INSERT_META implies it's not a leaf page.
63  *
64  * Backup Blk 0: original page (data contains the inserted tuple)
65  * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
66  * Backup Blk 2: xl_btree_metadata, if INSERT_META
67  */
68 typedef struct xl_btree_insert
69 {
70 	OffsetNumber offnum;
71 } xl_btree_insert;
72 
73 #define SizeOfBtreeInsert	(offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
74 
75 /*
76  * On insert with split, we save all the items going into the right sibling
77  * so that we can restore it completely from the log record.  This way takes
78  * less xlog space than the normal approach, because if we did it standardly,
79  * XLogInsert would almost always think the right page is new and store its
80  * whole page image.  The left page, however, is handled in the normal
81  * incremental-update fashion.
82  *
83  * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
84  * The _L and _R variants indicate whether the inserted tuple went into the
85  * left or right split page (and thus, whether the new item is stored or not).
86  * The _HIGHKEY variants indicate that we've logged explicitly left page high
87  * key value, otherwise redo should use right page leftmost key as a left page
88  * high key.  _HIGHKEY is specified for internal pages where right page
89  * leftmost key is suppressed, and for leaf pages of covering indexes where
90  * high key have non-key attributes truncated.
91  *
92  * Backup Blk 0: original page / new left page
93  *
94  * The left page's data portion contains the new item, if it's the _L variant.
95  * (In the _R variants, the new item is one of the right page's tuples.)
96  * If level > 0, an IndexTuple representing the HIKEY of the left page
97  * follows.  We don't need this on leaf pages, because it's the same as the
98  * leftmost key in the new right page.
99  *
100  * Backup Blk 1: new right page
101  *
102  * The right page's data portion contains the right page's tuples in the
103  * form used by _bt_restore_page.
104  *
105  * Backup Blk 2: next block (orig page's rightlink), if any
106  * Backup Blk 3: child's left sibling, if non-leaf split
107  */
108 typedef struct xl_btree_split
109 {
110 	uint32		level;			/* tree level of page being split */
111 	OffsetNumber firstright;	/* first item moved to right page */
112 	OffsetNumber newitemoff;	/* new item's offset (useful for _L variant) */
113 } xl_btree_split;
114 
115 #define SizeOfBtreeSplit	(offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
116 
117 /*
118  * This is what we need to know about delete of individual leaf index tuples.
119  * The WAL record can represent deletion of any number of index tuples on a
120  * single index page when *not* executed by VACUUM.
121  *
122  * Backup Blk 0: index page
123  */
124 typedef struct xl_btree_delete
125 {
126 	RelFileNode hnode;			/* RelFileNode of the heap the index currently
127 								 * points at */
128 	int			nitems;
129 
130 	/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
131 } xl_btree_delete;
132 
133 #define SizeOfBtreeDelete	(offsetof(xl_btree_delete, nitems) + sizeof(int))
134 
135 /*
136  * This is what we need to know about page reuse within btree.
137  */
138 typedef struct xl_btree_reuse_page
139 {
140 	RelFileNode node;
141 	BlockNumber block;
142 	TransactionId latestRemovedXid;
143 } xl_btree_reuse_page;
144 
145 #define SizeOfBtreeReusePage	(sizeof(xl_btree_reuse_page))
146 
147 /*
148  * This is what we need to know about vacuum of individual leaf index tuples.
149  * The WAL record can represent deletion of any number of index tuples on a
150  * single index page when executed by VACUUM.
151  *
152  * For MVCC scans, lastBlockVacuumed will be set to InvalidBlockNumber.
153  * For a non-MVCC index scans there is an additional correctness requirement
154  * for applying these changes during recovery, which is that we must do one
155  * of these two things for every block in the index:
156  *		* lock the block for cleanup and apply any required changes
157  *		* EnsureBlockUnpinned()
158  * The purpose of this is to ensure that no index scans started before we
159  * finish scanning the index are still running by the time we begin to remove
160  * heap tuples.
161  *
162  * Any changes to any one block are registered on just one WAL record. All
163  * blocks that we need to run EnsureBlockUnpinned() are listed as a block range
164  * starting from the last block vacuumed through until this one. Individual
165  * block numbers aren't given.
166  *
167  * Note that the *last* WAL record in any vacuum of an index is allowed to
168  * have a zero length array of offsets. Earlier records must have at least one.
169  */
170 typedef struct xl_btree_vacuum
171 {
172 	BlockNumber lastBlockVacuumed;
173 
174 	/* TARGET OFFSET NUMBERS FOLLOW */
175 } xl_btree_vacuum;
176 
177 #define SizeOfBtreeVacuum	(offsetof(xl_btree_vacuum, lastBlockVacuumed) + sizeof(BlockNumber))
178 
179 /*
180  * This is what we need to know about marking an empty branch for deletion.
181  * The target identifies the tuple removed from the parent page (note that we
182  * remove this tuple's downlink and the *following* tuple's key).  Note that
183  * the leaf page is empty, so we don't need to store its content --- it is
184  * just reinitialized during recovery using the rest of the fields.
185  *
186  * Backup Blk 0: leaf block
187  * Backup Blk 1: top parent
188  */
189 typedef struct xl_btree_mark_page_halfdead
190 {
191 	OffsetNumber poffset;		/* deleted tuple id in parent page */
192 
193 	/* information needed to recreate the leaf page: */
194 	BlockNumber leafblk;		/* leaf block ultimately being deleted */
195 	BlockNumber leftblk;		/* leaf block's left sibling, if any */
196 	BlockNumber rightblk;		/* leaf block's right sibling */
197 	BlockNumber topparent;		/* topmost internal page in the branch */
198 } xl_btree_mark_page_halfdead;
199 
200 #define SizeOfBtreeMarkPageHalfDead (offsetof(xl_btree_mark_page_halfdead, topparent) + sizeof(BlockNumber))
201 
202 /*
203  * This is what we need to know about deletion of a btree page.  Note we do
204  * not store any content for the deleted page --- it is just rewritten as empty
205  * during recovery, apart from resetting the btpo.xact.
206  *
207  * Backup Blk 0: target block being deleted
208  * Backup Blk 1: target block's left sibling, if any
209  * Backup Blk 2: target block's right sibling
210  * Backup Blk 3: leaf block (if different from target)
211  * Backup Blk 4: metapage (if rightsib becomes new fast root)
212  */
213 typedef struct xl_btree_unlink_page
214 {
215 	BlockNumber leftsib;		/* target block's left sibling, if any */
216 	BlockNumber rightsib;		/* target block's right sibling */
217 
218 	/*
219 	 * Information needed to recreate the leaf page, when target is an
220 	 * internal page.
221 	 */
222 	BlockNumber leafleftsib;
223 	BlockNumber leafrightsib;
224 	BlockNumber topparent;		/* next child down in the branch */
225 
226 	TransactionId btpo_xact;	/* value of btpo.xact for use in recovery */
227 	/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_UNLINK_PAGE_META */
228 } xl_btree_unlink_page;
229 
230 #define SizeOfBtreeUnlinkPage	(offsetof(xl_btree_unlink_page, btpo_xact) + sizeof(TransactionId))
231 
232 /*
233  * New root log record.  There are zero tuples if this is to establish an
234  * empty root, or two if it is the result of splitting an old root.
235  *
236  * Note that although this implies rewriting the metadata page, we don't need
237  * an xl_btree_metadata record --- the rootblk and level are sufficient.
238  *
239  * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
240  * Backup Blk 1: left child (if splitting an old root)
241  * Backup Blk 2: metapage
242  */
243 typedef struct xl_btree_newroot
244 {
245 	BlockNumber rootblk;		/* location of new root (redundant with blk 0) */
246 	uint32		level;			/* its tree level */
247 } xl_btree_newroot;
248 
249 #define SizeOfBtreeNewroot	(offsetof(xl_btree_newroot, level) + sizeof(uint32))
250 
251 
252 /*
253  * prototypes for functions in nbtxlog.c
254  */
255 extern void btree_redo(XLogReaderState *record);
256 extern void btree_desc(StringInfo buf, XLogReaderState *record);
257 extern const char *btree_identify(uint8 info);
258 extern void btree_mask(char *pagedata, BlockNumber blkno);
259 
260 #endif							/* NBXLOG_H */
261