1 /*-------------------------------------------------------------------------
2  *
3  * bufpage.h
4  *	  Standard POSTGRES buffer page definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/bufpage.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef BUFPAGE_H
15 #define BUFPAGE_H
16 
17 #include "access/xlogdefs.h"
18 #include "storage/block.h"
19 #include "storage/item.h"
20 #include "storage/off.h"
21 
22 /*
23  * A postgres disk page is an abstraction layered on top of a postgres
24  * disk block (which is simply a unit of i/o, see block.h).
25  *
26  * specifically, while a disk block can be unformatted, a postgres
27  * disk page is always a slotted page of the form:
28  *
29  * +----------------+---------------------------------+
30  * | PageHeaderData | linp1 linp2 linp3 ...           |
31  * +-----------+----+---------------------------------+
32  * | ... linpN |									  |
33  * +-----------+--------------------------------------+
34  * |		   ^ pd_lower							  |
35  * |												  |
36  * |			 v pd_upper							  |
37  * +-------------+------------------------------------+
38  * |			 | tupleN ...                         |
39  * +-------------+------------------+-----------------+
40  * |	   ... tuple3 tuple2 tuple1 | "special space" |
41  * +--------------------------------+-----------------+
42  *									^ pd_special
43  *
44  * a page is full when nothing can be added between pd_lower and
45  * pd_upper.
46  *
47  * all blocks written out by an access method must be disk pages.
48  *
49  * EXCEPTIONS:
50  *
51  * obviously, a page is not formatted before it is initialized by
52  * a call to PageInit.
53  *
54  * NOTES:
55  *
56  * linp1..N form an ItemId (line pointer) array.  ItemPointers point
57  * to a physical block number and a logical offset (line pointer
58  * number) within that block/page.  Note that OffsetNumbers
59  * conventionally start at 1, not 0.
60  *
61  * tuple1..N are added "backwards" on the page.  Since an ItemPointer
62  * offset is used to access an ItemId entry rather than an actual
63  * byte-offset position, tuples can be physically shuffled on a page
64  * whenever the need arises.  This indirection also keeps crash recovery
65  * relatively simple, because the low-level details of page space
66  * management can be controlled by standard buffer page code during
67  * logging, and during recovery.
68  *
69  * AM-generic per-page information is kept in PageHeaderData.
70  *
71  * AM-specific per-page data (if any) is kept in the area marked "special
72  * space"; each AM has an "opaque" structure defined somewhere that is
73  * stored as the page trailer.  an access method should always
74  * initialize its pages with PageInit and then set its own opaque
75  * fields.
76  */
77 
78 typedef Pointer Page;
79 
80 
81 /*
82  * location (byte offset) within a page.
83  *
84  * note that this is actually limited to 2^15 because we have limited
85  * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
86  */
87 typedef uint16 LocationIndex;
88 
89 
90 /*
91  * For historical reasons, the 64-bit LSN value is stored as two 32-bit
92  * values.
93  */
94 typedef struct
95 {
96 	uint32		xlogid;			/* high bits */
97 	uint32		xrecoff;		/* low bits */
98 } PageXLogRecPtr;
99 
100 #define PageXLogRecPtrGet(val) \
101 	((uint64) (val).xlogid << 32 | (val).xrecoff)
102 #define PageXLogRecPtrSet(ptr, lsn) \
103 	((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
104 
105 /*
106  * disk page organization
107  *
108  * space management information generic to any page
109  *
110  *		pd_lsn		- identifies xlog record for last change to this page.
111  *		pd_checksum - page checksum, if set.
112  *		pd_flags	- flag bits.
113  *		pd_lower	- offset to start of free space.
114  *		pd_upper	- offset to end of free space.
115  *		pd_special	- offset to start of special space.
116  *		pd_pagesize_version - size in bytes and page layout version number.
117  *		pd_prune_xid - oldest XID among potentially prunable tuples on page.
118  *
119  * The LSN is used by the buffer manager to enforce the basic rule of WAL:
120  * "thou shalt write xlog before data".  A dirty buffer cannot be dumped
121  * to disk until xlog has been flushed at least as far as the page's LSN.
122  *
123  * pd_checksum stores the page checksum, if it has been set for this page;
124  * zero is a valid value for a checksum. If a checksum is not in use then
125  * we leave the field unset. This will typically mean the field is zero
126  * though non-zero values may also be present if databases have been
127  * pg_upgraded from releases prior to 9.3, when the same byte offset was
128  * used to store the current timelineid when the page was last updated.
129  * Note that there is no indication on a page as to whether the checksum
130  * is valid or not, a deliberate design choice which avoids the problem
131  * of relying on the page contents to decide whether to verify it. Hence
132  * there are no flag bits relating to checksums.
133  *
134  * pd_prune_xid is a hint field that helps determine whether pruning will be
135  * useful.  It is currently unused in index pages.
136  *
137  * The page version number and page size are packed together into a single
138  * uint16 field.  This is for historical reasons: before PostgreSQL 7.3,
139  * there was no concept of a page version number, and doing it this way
140  * lets us pretend that pre-7.3 databases have page version number zero.
141  * We constrain page sizes to be multiples of 256, leaving the low eight
142  * bits available for a version number.
143  *
144  * Minimum possible page size is perhaps 64B to fit page header, opaque space
145  * and a minimal tuple; of course, in reality you want it much bigger, so
146  * the constraint on pagesize mod 256 is not an important restriction.
147  * On the high end, we can only support pages up to 32KB because lp_off/lp_len
148  * are 15 bits.
149  */
150 
151 typedef struct PageHeaderData
152 {
153 	/* XXX LSN is member of *any* block, not only page-organized ones */
154 	PageXLogRecPtr pd_lsn;		/* LSN: next byte after last byte of xlog
155 								 * record for last change to this page */
156 	uint16		pd_checksum;	/* checksum */
157 	uint16		pd_flags;		/* flag bits, see below */
158 	LocationIndex pd_lower;		/* offset to start of free space */
159 	LocationIndex pd_upper;		/* offset to end of free space */
160 	LocationIndex pd_special;	/* offset to start of special space */
161 	uint16		pd_pagesize_version;
162 	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
163 	ItemIdData	pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
164 } PageHeaderData;
165 
166 typedef PageHeaderData *PageHeader;
167 
168 /*
169  * pd_flags contains the following flag bits.  Undefined bits are initialized
170  * to zero and may be used in the future.
171  *
172  * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
173  * pd_lower.  This should be considered a hint rather than the truth, since
174  * changes to it are not WAL-logged.
175  *
176  * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
177  * page for its new tuple version; this suggests that a prune is needed.
178  * Again, this is just a hint.
179  */
180 #define PD_HAS_FREE_LINES	0x0001	/* are there any unused line pointers? */
181 #define PD_PAGE_FULL		0x0002	/* not enough free space for new tuple? */
182 #define PD_ALL_VISIBLE		0x0004	/* all tuples on page are visible to
183 									 * everyone */
184 
185 #define PD_VALID_FLAG_BITS	0x0007	/* OR of all valid pd_flags bits */
186 
187 /*
188  * Page layout version number 0 is for pre-7.3 Postgres releases.
189  * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
190  * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
191  * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
192  * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
193  *		added the pd_flags field (by stealing some bits from pd_tli),
194  *		as well as adding the pd_prune_xid field (which enlarges the header).
195  *
196  * As of Release 9.3, the checksum version must also be considered when
197  * handling pages.
198  */
199 #define PG_PAGE_LAYOUT_VERSION		4
200 #define PG_DATA_CHECKSUM_VERSION	1
201 
202 /* ----------------------------------------------------------------
203  *						page support macros
204  * ----------------------------------------------------------------
205  */
206 
207 /*
208  * PageIsValid
209  *		True iff page is valid.
210  */
211 #define PageIsValid(page) PointerIsValid(page)
212 
213 /*
214  * line pointer(s) do not count as part of header
215  */
216 #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
217 
218 /*
219  * PageIsEmpty
220  *		returns true iff no itemid has been allocated on the page
221  */
222 #define PageIsEmpty(page) \
223 	(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
224 
225 /*
226  * PageIsNew
227  *		returns true iff page has not been initialized (by PageInit)
228  */
229 #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
230 
231 /*
232  * PageGetItemId
233  *		Returns an item identifier of a page.
234  */
235 #define PageGetItemId(page, offsetNumber) \
236 	((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
237 
238 /*
239  * PageGetContents
240  *		To be used in cases where the page does not contain line pointers.
241  *
242  * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
243  * Now it is.  Beware of old code that might think the offset to the contents
244  * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
245  */
246 #define PageGetContents(page) \
247 	((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
248 
249 /* ----------------
250  *		macros to access page size info
251  * ----------------
252  */
253 
254 /*
255  * PageSizeIsValid
256  *		True iff the page size is valid.
257  */
258 #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
259 
260 /*
261  * PageGetPageSize
262  *		Returns the page size of a page.
263  *
264  * this can only be called on a formatted page (unlike
265  * BufferGetPageSize, which can be called on an unformatted page).
266  * however, it can be called on a page that is not stored in a buffer.
267  */
268 #define PageGetPageSize(page) \
269 	((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
270 
271 /*
272  * PageGetPageLayoutVersion
273  *		Returns the page layout version of a page.
274  */
275 #define PageGetPageLayoutVersion(page) \
276 	(((PageHeader) (page))->pd_pagesize_version & 0x00FF)
277 
278 /*
279  * PageSetPageSizeAndVersion
280  *		Sets the page size and page layout version number of a page.
281  *
282  * We could support setting these two values separately, but there's
283  * no real need for it at the moment.
284  */
285 #define PageSetPageSizeAndVersion(page, size, version) \
286 ( \
287 	AssertMacro(((size) & 0xFF00) == (size)), \
288 	AssertMacro(((version) & 0x00FF) == (version)), \
289 	((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
290 )
291 
292 /* ----------------
293  *		page special data macros
294  * ----------------
295  */
296 /*
297  * PageGetSpecialSize
298  *		Returns size of special space on a page.
299  */
300 #define PageGetSpecialSize(page) \
301 	((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
302 
303 /*
304  * Using assertions, validate that the page special pointer is OK.
305  *
306  * This is intended to catch use of the pointer before page initialization.
307  * It is implemented as a function due to the limitations of the MSVC
308  * compiler, which choked on doing all these tests within another macro.  We
309  * return true so that AssertMacro() can be used while still getting the
310  * specifics from the macro failure within this function.
311  */
312 static inline bool
PageValidateSpecialPointer(Page page)313 PageValidateSpecialPointer(Page page)
314 {
315 	Assert(PageIsValid(page));
316 	Assert(((PageHeader) (page))->pd_special <= BLCKSZ);
317 	Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData);
318 
319 	return true;
320 }
321 
322 /*
323  * PageGetSpecialPointer
324  *		Returns pointer to special space on a page.
325  */
326 #define PageGetSpecialPointer(page) \
327 ( \
328 	AssertMacro(PageValidateSpecialPointer(page)), \
329 	(char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
330 )
331 
332 /*
333  * PageGetItem
334  *		Retrieves an item on the given page.
335  *
336  * Note:
337  *		This does not change the status of any of the resources passed.
338  *		The semantics may change in the future.
339  */
340 #define PageGetItem(page, itemId) \
341 ( \
342 	AssertMacro(PageIsValid(page)), \
343 	AssertMacro(ItemIdHasStorage(itemId)), \
344 	(Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
345 )
346 
347 /*
348  * PageGetMaxOffsetNumber
349  *		Returns the maximum offset number used by the given page.
350  *		Since offset numbers are 1-based, this is also the number
351  *		of items on the page.
352  *
353  *		NOTE: if the page is not initialized (pd_lower == 0), we must
354  *		return zero to ensure sane behavior.  Accept double evaluation
355  *		of the argument so that we can ensure this.
356  */
357 #define PageGetMaxOffsetNumber(page) \
358 	(((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
359 	 ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
360 	  / sizeof(ItemIdData)))
361 
362 /*
363  * Additional macros for access to page headers. (Beware multiple evaluation
364  * of the arguments!)
365  */
366 #define PageGetLSN(page) \
367 	PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
368 #define PageSetLSN(page, lsn) \
369 	PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
370 
371 #define PageHasFreeLinePointers(page) \
372 	(((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
373 #define PageSetHasFreeLinePointers(page) \
374 	(((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
375 #define PageClearHasFreeLinePointers(page) \
376 	(((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
377 
378 #define PageIsFull(page) \
379 	(((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
380 #define PageSetFull(page) \
381 	(((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
382 #define PageClearFull(page) \
383 	(((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
384 
385 #define PageIsAllVisible(page) \
386 	(((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
387 #define PageSetAllVisible(page) \
388 	(((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
389 #define PageClearAllVisible(page) \
390 	(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
391 
392 #define PageSetPrunable(page, xid) \
393 do { \
394 	Assert(TransactionIdIsNormal(xid)); \
395 	if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
396 		TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
397 		((PageHeader) (page))->pd_prune_xid = (xid); \
398 } while (0)
399 #define PageClearPrunable(page) \
400 	(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
401 
402 
403 /* ----------------------------------------------------------------
404  *		extern declarations
405  * ----------------------------------------------------------------
406  */
407 
408 /* flags for PageAddItemExtended() */
409 #define PAI_OVERWRITE			(1 << 0)
410 #define PAI_IS_HEAP				(1 << 1)
411 
412 /* flags for PageIsVerifiedExtended() */
413 #define PIV_LOG_WARNING			(1 << 0)
414 #define PIV_REPORT_STAT			(1 << 1)
415 
416 #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
417 	PageAddItemExtended(page, item, size, offsetNumber, \
418 						((overwrite) ? PAI_OVERWRITE : 0) | \
419 						((is_heap) ? PAI_IS_HEAP : 0))
420 
421 #define PageIsVerified(page, blkno) \
422 	PageIsVerifiedExtended(page, blkno, \
423 						   PIV_LOG_WARNING | PIV_REPORT_STAT)
424 
425 /*
426  * Check that BLCKSZ is a multiple of sizeof(size_t).  In
427  * PageIsVerifiedExtended(), it is much faster to check if a page is
428  * full of zeroes using the native word size.  Note that this assertion
429  * is kept within a header to make sure that StaticAssertDecl() works
430  * across various combinations of platforms and compilers.
431  */
432 StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
433 				 "BLCKSZ has to be a multiple of sizeof(size_t)");
434 
435 extern void PageInit(Page page, Size pageSize, Size specialSize);
436 extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
437 extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
438 										OffsetNumber offsetNumber, int flags);
439 extern Page PageGetTempPage(Page page);
440 extern Page PageGetTempPageCopy(Page page);
441 extern Page PageGetTempPageCopySpecial(Page page);
442 extern void PageRestoreTempPage(Page tempPage, Page oldPage);
443 extern void PageRepairFragmentation(Page page);
444 extern void PageTruncateLinePointerArray(Page page);
445 extern Size PageGetFreeSpace(Page page);
446 extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
447 extern Size PageGetExactFreeSpace(Page page);
448 extern Size PageGetHeapFreeSpace(Page page);
449 extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
450 extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
451 extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset);
452 extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
453 									Item newtup, Size newsize);
454 extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
455 extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
456 
457 #endif							/* BUFPAGE_H */
458