1 /*-------------------------------------------------------------------------
2 *
3 * subtrans.c
4 * PostgreSQL subtransaction-log manager
5 *
6 * The pg_subtrans manager is a pg_xact-like manager that stores the parent
7 * transaction Id for each transaction. It is a fundamental part of the
8 * nested transactions implementation. A main transaction has a parent
9 * of InvalidTransactionId, and each subtransaction has its immediate parent.
10 * The tree can easily be walked from child to parent, but not in the
11 * opposite direction.
12 *
13 * This code is based on xact.c, but the robustness requirements
14 * are completely different from pg_xact, because we only need to remember
15 * pg_subtrans information for currently-open transactions. Thus, there is
16 * no need to preserve data over a crash and restart.
17 *
18 * There are no XLOG interactions since we do not care about preserving
19 * data across crashes. During database startup, we simply force the
20 * currently-active page of SUBTRANS to zeroes.
21 *
22 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
23 * Portions Copyright (c) 1994, Regents of the University of California
24 *
25 * src/backend/access/transam/subtrans.c
26 *
27 *-------------------------------------------------------------------------
28 */
29 #include "postgres.h"
30
31 #include "access/slru.h"
32 #include "access/subtrans.h"
33 #include "access/transam.h"
34 #include "pg_trace.h"
35 #include "utils/snapmgr.h"
36
37
38 /*
39 * Defines for SubTrans page sizes. A page is the same BLCKSZ as is used
40 * everywhere else in Postgres.
41 *
42 * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
43 * SubTrans page numbering also wraps around at
44 * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
45 * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no
46 * explicit notice of that fact in this module, except when comparing segment
47 * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes) and zeroing
48 * them in StartupSUBTRANS.
49 */
50
51 /* We need four bytes per xact */
52 #define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
53
54 #define TransactionIdToPage(xid) ((xid) / (TransactionId) SUBTRANS_XACTS_PER_PAGE)
55 #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
56
57
58 /*
59 * Link to shared-memory data structures for SUBTRANS control
60 */
61 static SlruCtlData SubTransCtlData;
62
63 #define SubTransCtl (&SubTransCtlData)
64
65
66 static int ZeroSUBTRANSPage(int pageno);
67 static bool SubTransPagePrecedes(int page1, int page2);
68
69
70 /*
71 * Record the parent of a subtransaction in the subtrans log.
72 */
73 void
SubTransSetParent(TransactionId xid,TransactionId parent)74 SubTransSetParent(TransactionId xid, TransactionId parent)
75 {
76 int pageno = TransactionIdToPage(xid);
77 int entryno = TransactionIdToEntry(xid);
78 int slotno;
79 TransactionId *ptr;
80
81 Assert(TransactionIdIsValid(parent));
82 Assert(TransactionIdFollows(xid, parent));
83
84 LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
85
86 slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid);
87 ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
88 ptr += entryno;
89
90 /*
91 * It's possible we'll try to set the parent xid multiple times but we
92 * shouldn't ever be changing the xid from one valid xid to another valid
93 * xid, which would corrupt the data structure.
94 */
95 if (*ptr != parent)
96 {
97 Assert(*ptr == InvalidTransactionId);
98 *ptr = parent;
99 SubTransCtl->shared->page_dirty[slotno] = true;
100 }
101
102 LWLockRelease(SubtransControlLock);
103 }
104
105 /*
106 * Interrogate the parent of a transaction in the subtrans log.
107 */
108 TransactionId
SubTransGetParent(TransactionId xid)109 SubTransGetParent(TransactionId xid)
110 {
111 int pageno = TransactionIdToPage(xid);
112 int entryno = TransactionIdToEntry(xid);
113 int slotno;
114 TransactionId *ptr;
115 TransactionId parent;
116
117 /* Can't ask about stuff that might not be around anymore */
118 Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
119
120 /* Bootstrap and frozen XIDs have no parent */
121 if (!TransactionIdIsNormal(xid))
122 return InvalidTransactionId;
123
124 /* lock is acquired by SimpleLruReadPage_ReadOnly */
125
126 slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
127 ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
128 ptr += entryno;
129
130 parent = *ptr;
131
132 LWLockRelease(SubtransControlLock);
133
134 return parent;
135 }
136
137 /*
138 * SubTransGetTopmostTransaction
139 *
140 * Returns the topmost transaction of the given transaction id.
141 *
142 * Because we cannot look back further than TransactionXmin, it is possible
143 * that this function will lie and return an intermediate subtransaction ID
144 * instead of the true topmost parent ID. This is OK, because in practice
145 * we only care about detecting whether the topmost parent is still running
146 * or is part of a current snapshot's list of still-running transactions.
147 * Therefore, any XID before TransactionXmin is as good as any other.
148 */
149 TransactionId
SubTransGetTopmostTransaction(TransactionId xid)150 SubTransGetTopmostTransaction(TransactionId xid)
151 {
152 TransactionId parentXid = xid,
153 previousXid = xid;
154
155 /* Can't ask about stuff that might not be around anymore */
156 Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
157
158 while (TransactionIdIsValid(parentXid))
159 {
160 previousXid = parentXid;
161 if (TransactionIdPrecedes(parentXid, TransactionXmin))
162 break;
163 parentXid = SubTransGetParent(parentXid);
164
165 /*
166 * By convention the parent xid gets allocated first, so should always
167 * precede the child xid. Anything else points to a corrupted data
168 * structure that could lead to an infinite loop, so exit.
169 */
170 if (!TransactionIdPrecedes(parentXid, previousXid))
171 elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
172 previousXid, parentXid);
173 }
174
175 Assert(TransactionIdIsValid(previousXid));
176
177 return previousXid;
178 }
179
180
181 /*
182 * Initialization of shared memory for SUBTRANS
183 */
184 Size
SUBTRANSShmemSize(void)185 SUBTRANSShmemSize(void)
186 {
187 return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
188 }
189
190 void
SUBTRANSShmemInit(void)191 SUBTRANSShmemInit(void)
192 {
193 SubTransCtl->PagePrecedes = SubTransPagePrecedes;
194 SimpleLruInit(SubTransCtl, "subtrans", NUM_SUBTRANS_BUFFERS, 0,
195 SubtransControlLock, "pg_subtrans",
196 LWTRANCHE_SUBTRANS_BUFFERS);
197 /* Override default assumption that writes should be fsync'd */
198 SubTransCtl->do_fsync = false;
199 SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
200 }
201
202 /*
203 * This func must be called ONCE on system install. It creates
204 * the initial SUBTRANS segment. (The SUBTRANS directory is assumed to
205 * have been created by the initdb shell script, and SUBTRANSShmemInit
206 * must have been called already.)
207 *
208 * Note: it's not really necessary to create the initial segment now,
209 * since slru.c would create it on first write anyway. But we may as well
210 * do it to be sure the directory is set up correctly.
211 */
212 void
BootStrapSUBTRANS(void)213 BootStrapSUBTRANS(void)
214 {
215 int slotno;
216
217 LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
218
219 /* Create and zero the first page of the subtrans log */
220 slotno = ZeroSUBTRANSPage(0);
221
222 /* Make sure it's written out */
223 SimpleLruWritePage(SubTransCtl, slotno);
224 Assert(!SubTransCtl->shared->page_dirty[slotno]);
225
226 LWLockRelease(SubtransControlLock);
227 }
228
229 /*
230 * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
231 *
232 * The page is not actually written, just set up in shared memory.
233 * The slot number of the new page is returned.
234 *
235 * Control lock must be held at entry, and will be held at exit.
236 */
237 static int
ZeroSUBTRANSPage(int pageno)238 ZeroSUBTRANSPage(int pageno)
239 {
240 return SimpleLruZeroPage(SubTransCtl, pageno);
241 }
242
243 /*
244 * This must be called ONCE during postmaster or standalone-backend startup,
245 * after StartupXLOG has initialized ShmemVariableCache->nextXid.
246 *
247 * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
248 * if there are none.
249 */
250 void
StartupSUBTRANS(TransactionId oldestActiveXID)251 StartupSUBTRANS(TransactionId oldestActiveXID)
252 {
253 int startPage;
254 int endPage;
255
256 /*
257 * Since we don't expect pg_subtrans to be valid across crashes, we
258 * initialize the currently-active page(s) to zeroes during startup.
259 * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
260 * the new page without regard to whatever was previously on disk.
261 */
262 LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
263
264 startPage = TransactionIdToPage(oldestActiveXID);
265 endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
266
267 while (startPage != endPage)
268 {
269 (void) ZeroSUBTRANSPage(startPage);
270 startPage++;
271 /* must account for wraparound */
272 if (startPage > TransactionIdToPage(MaxTransactionId))
273 startPage = 0;
274 }
275 (void) ZeroSUBTRANSPage(startPage);
276
277 LWLockRelease(SubtransControlLock);
278 }
279
280 /*
281 * This must be called ONCE during postmaster or standalone-backend shutdown
282 */
283 void
ShutdownSUBTRANS(void)284 ShutdownSUBTRANS(void)
285 {
286 /*
287 * Flush dirty SUBTRANS pages to disk
288 *
289 * This is not actually necessary from a correctness point of view. We do
290 * it merely as a debugging aid.
291 */
292 TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(false);
293 SimpleLruFlush(SubTransCtl, false);
294 TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(false);
295 }
296
297 /*
298 * Perform a checkpoint --- either during shutdown, or on-the-fly
299 */
300 void
CheckPointSUBTRANS(void)301 CheckPointSUBTRANS(void)
302 {
303 /*
304 * Flush dirty SUBTRANS pages to disk
305 *
306 * This is not actually necessary from a correctness point of view. We do
307 * it merely to improve the odds that writing of dirty pages is done by
308 * the checkpoint process and not by backends.
309 */
310 TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
311 SimpleLruFlush(SubTransCtl, true);
312 TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
313 }
314
315
316 /*
317 * Make sure that SUBTRANS has room for a newly-allocated XID.
318 *
319 * NB: this is called while holding XidGenLock. We want it to be very fast
320 * most of the time; even when it's not so fast, no actual I/O need happen
321 * unless we're forced to write out a dirty subtrans page to make room
322 * in shared memory.
323 */
324 void
ExtendSUBTRANS(TransactionId newestXact)325 ExtendSUBTRANS(TransactionId newestXact)
326 {
327 int pageno;
328
329 /*
330 * No work except at first XID of a page. But beware: just after
331 * wraparound, the first XID of page zero is FirstNormalTransactionId.
332 */
333 if (TransactionIdToEntry(newestXact) != 0 &&
334 !TransactionIdEquals(newestXact, FirstNormalTransactionId))
335 return;
336
337 pageno = TransactionIdToPage(newestXact);
338
339 LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
340
341 /* Zero the page */
342 ZeroSUBTRANSPage(pageno);
343
344 LWLockRelease(SubtransControlLock);
345 }
346
347
348 /*
349 * Remove all SUBTRANS segments before the one holding the passed transaction ID
350 *
351 * oldestXact is the oldest TransactionXmin of any running transaction. This
352 * is called only during checkpoint.
353 */
354 void
TruncateSUBTRANS(TransactionId oldestXact)355 TruncateSUBTRANS(TransactionId oldestXact)
356 {
357 int cutoffPage;
358
359 /*
360 * The cutoff point is the start of the segment containing oldestXact. We
361 * pass the *page* containing oldestXact to SimpleLruTruncate. We step
362 * back one transaction to avoid passing a cutoff page that hasn't been
363 * created yet in the rare case that oldestXact would be the first item on
364 * a page and oldestXact == next XID. In that case, if we didn't subtract
365 * one, we'd trigger SimpleLruTruncate's wraparound detection.
366 */
367 TransactionIdRetreat(oldestXact);
368 cutoffPage = TransactionIdToPage(oldestXact);
369
370 SimpleLruTruncate(SubTransCtl, cutoffPage);
371 }
372
373
374 /*
375 * Decide whether a SUBTRANS page number is "older" for truncation purposes.
376 * Analogous to CLOGPagePrecedes().
377 */
378 static bool
SubTransPagePrecedes(int page1,int page2)379 SubTransPagePrecedes(int page1, int page2)
380 {
381 TransactionId xid1;
382 TransactionId xid2;
383
384 xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE;
385 xid1 += FirstNormalTransactionId + 1;
386 xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE;
387 xid2 += FirstNormalTransactionId + 1;
388
389 return (TransactionIdPrecedes(xid1, xid2) &&
390 TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1));
391 }
392