1 /* 2 * xlogrecord.h 3 * 4 * Definitions for the WAL record format. 5 * 6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * src/include/access/xlogrecord.h 10 */ 11 #ifndef XLOGRECORD_H 12 #define XLOGRECORD_H 13 14 #include "access/rmgr.h" 15 #include "access/xlogdefs.h" 16 #include "port/pg_crc32c.h" 17 #include "storage/block.h" 18 #include "storage/relfilenode.h" 19 20 /* 21 * The overall layout of an XLOG record is: 22 * Fixed-size header (XLogRecord struct) 23 * XLogRecordBlockHeader struct 24 * XLogRecordBlockHeader struct 25 * ... 26 * XLogRecordDataHeader[Short|Long] struct 27 * block data 28 * block data 29 * ... 30 * main data 31 * 32 * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of 33 * rmgr-specific data not associated with a block. XLogRecord structs 34 * always start on MAXALIGN boundaries in the WAL files, but the rest of 35 * the fields are not aligned. 36 * 37 * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and 38 * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's 39 * used to distinguish between block references, and the main data structs. 40 */ 41 typedef struct XLogRecord 42 { 43 uint32 xl_tot_len; /* total len of entire record */ 44 TransactionId xl_xid; /* xact id */ 45 XLogRecPtr xl_prev; /* ptr to previous record in log */ 46 uint8 xl_info; /* flag bits, see below */ 47 RmgrId xl_rmid; /* resource manager for this record */ 48 /* 2 bytes of padding here, initialize to zero */ 49 pg_crc32c xl_crc; /* CRC for this record */ 50 51 /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ 52 53 } XLogRecord; 54 55 #define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c)) 56 57 /* 58 * The high 4 bits in xl_info may be used freely by rmgr. The 59 * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by 60 * XLogInsert caller. The rest are set internally by XLogInsert. 61 */ 62 #define XLR_INFO_MASK 0x0F 63 #define XLR_RMGR_INFO_MASK 0xF0 64 65 /* 66 * If a WAL record modifies any relation files, in ways not covered by the 67 * usual block references, this flag is set. This is not used for anything 68 * by PostgreSQL itself, but it allows external tools that read WAL and keep 69 * track of modified blocks to recognize such special record types. 70 */ 71 #define XLR_SPECIAL_REL_UPDATE 0x01 72 73 /* 74 * Enforces consistency checks of replayed WAL at recovery. If enabled, 75 * each record will log a full-page write for each block modified by the 76 * record and will reuse it afterwards for consistency checks. The caller 77 * of XLogInsert can use this value if necessary, but if 78 * wal_consistency_checking is enabled for a rmgr this is set unconditionally. 79 */ 80 #define XLR_CHECK_CONSISTENCY 0x02 81 82 /* 83 * Header info for block data appended to an XLOG record. 84 * 85 * 'data_length' is the length of the rmgr-specific payload data associated 86 * with this block. It does not include the possible full page image, nor 87 * XLogRecordBlockHeader struct itself. 88 * 89 * Note that we don't attempt to align the XLogRecordBlockHeader struct! 90 * So, the struct must be copied to aligned local storage before use. 91 */ 92 typedef struct XLogRecordBlockHeader 93 { 94 uint8 id; /* block reference ID */ 95 uint8 fork_flags; /* fork within the relation, and flags */ 96 uint16 data_length; /* number of payload bytes (not including page 97 * image) */ 98 99 /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */ 100 /* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */ 101 /* BlockNumber follows */ 102 } XLogRecordBlockHeader; 103 104 #define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16)) 105 106 /* 107 * Additional header information when a full-page image is included 108 * (i.e. when BKPBLOCK_HAS_IMAGE is set). 109 * 110 * The XLOG code is aware that PG data pages usually contain an unused "hole" 111 * in the middle, which contains only zero bytes. Since we know that the 112 * "hole" is all zeros, we remove it from the stored data (and it's not counted 113 * in the XLOG record's CRC, either). Hence, the amount of block data actually 114 * present is (BLCKSZ - <length of "hole" bytes>). 115 * 116 * Additionally, when wal_compression is enabled, we will try to compress full 117 * page images using the PGLZ compression algorithm, after removing the "hole". 118 * This can reduce the WAL volume, but at some extra cost of CPU spent 119 * on the compression during WAL logging. In this case, since the "hole" 120 * length cannot be calculated by subtracting the number of page image bytes 121 * from BLCKSZ, basically it needs to be stored as an extra information. 122 * But when no "hole" exists, we can assume that the "hole" length is zero 123 * and no such an extra information needs to be stored. Note that 124 * the original version of page image is stored in WAL instead of the 125 * compressed one if the number of bytes saved by compression is less than 126 * the length of extra information. Hence, when a page image is successfully 127 * compressed, the amount of block data actually present is less than 128 * BLCKSZ - the length of "hole" bytes - the length of extra information. 129 */ 130 typedef struct XLogRecordBlockImageHeader 131 { 132 uint16 length; /* number of page image bytes */ 133 uint16 hole_offset; /* number of bytes before "hole" */ 134 uint8 bimg_info; /* flag bits, see below */ 135 136 /* 137 * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an 138 * XLogRecordBlockCompressHeader struct follows. 139 */ 140 } XLogRecordBlockImageHeader; 141 142 #define SizeOfXLogRecordBlockImageHeader \ 143 (offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8)) 144 145 /* Information stored in bimg_info */ 146 #define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */ 147 #define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */ 148 #define BKPIMAGE_APPLY 0x04 /* page image should be restored during 149 * replay */ 150 151 /* 152 * Extra header information used when page image has "hole" and 153 * is compressed. 154 */ 155 typedef struct XLogRecordBlockCompressHeader 156 { 157 uint16 hole_length; /* number of bytes in "hole" */ 158 } XLogRecordBlockCompressHeader; 159 160 #define SizeOfXLogRecordBlockCompressHeader \ 161 sizeof(XLogRecordBlockCompressHeader) 162 163 /* 164 * Maximum size of the header for a block reference. This is used to size a 165 * temporary buffer for constructing the header. 166 */ 167 #define MaxSizeOfXLogRecordBlockHeader \ 168 (SizeOfXLogRecordBlockHeader + \ 169 SizeOfXLogRecordBlockImageHeader + \ 170 SizeOfXLogRecordBlockCompressHeader + \ 171 sizeof(RelFileNode) + \ 172 sizeof(BlockNumber)) 173 174 /* 175 * The fork number fits in the lower 4 bits in the fork_flags field. The upper 176 * bits are used for flags. 177 */ 178 #define BKPBLOCK_FORK_MASK 0x0F 179 #define BKPBLOCK_FLAG_MASK 0xF0 180 #define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */ 181 #define BKPBLOCK_HAS_DATA 0x20 182 #define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */ 183 #define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */ 184 185 /* 186 * XLogRecordDataHeaderShort/Long are used for the "main data" portion of 187 * the record. If the length of the data is less than 256 bytes, the short 188 * form is used, with a single byte to hold the length. Otherwise the long 189 * form is used. 190 * 191 * (These structs are currently not used in the code, they are here just for 192 * documentation purposes). 193 */ 194 typedef struct XLogRecordDataHeaderShort 195 { 196 uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */ 197 uint8 data_length; /* number of payload bytes */ 198 } XLogRecordDataHeaderShort; 199 200 #define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2) 201 202 typedef struct XLogRecordDataHeaderLong 203 { 204 uint8 id; /* XLR_BLOCK_ID_DATA_LONG */ 205 /* followed by uint32 data_length, unaligned */ 206 } XLogRecordDataHeaderLong; 207 208 #define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32)) 209 210 /* 211 * Block IDs used to distinguish different kinds of record fragments. Block 212 * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use 213 * any ID number in that range (although you should stick to small numbers, 214 * because the WAL machinery is optimized for that case). A few ID 215 * numbers are reserved to denote the "main" data portion of the record, 216 * as well as replication-supporting transaction metadata. 217 * 218 * The maximum is currently set at 32, quite arbitrarily. Most records only 219 * need a handful of block references, but there are a few exceptions that 220 * need more. 221 */ 222 #define XLR_MAX_BLOCK_ID 32 223 224 #define XLR_BLOCK_ID_DATA_SHORT 255 225 #define XLR_BLOCK_ID_DATA_LONG 254 226 #define XLR_BLOCK_ID_ORIGIN 253 227 #define XLR_BLOCK_ID_TOPLEVEL_XID 252 228 229 #endif /* XLOGRECORD_H */ 230