1 /*-------------------------------------------------------------------------
2  *
3  * subtrans.c
4  *		PostgreSQL subtransaction-log manager
5  *
6  * The pg_subtrans manager is a pg_xact-like manager that stores the parent
7  * transaction Id for each transaction.  It is a fundamental part of the
8  * nested transactions implementation.  A main transaction has a parent
9  * of InvalidTransactionId, and each subtransaction has its immediate parent.
10  * The tree can easily be walked from child to parent, but not in the
11  * opposite direction.
12  *
13  * This code is based on xact.c, but the robustness requirements
14  * are completely different from pg_xact, because we only need to remember
15  * pg_subtrans information for currently-open transactions.  Thus, there is
16  * no need to preserve data over a crash and restart.
17  *
18  * There are no XLOG interactions since we do not care about preserving
19  * data across crashes.  During database startup, we simply force the
20  * currently-active page of SUBTRANS to zeroes.
21  *
22  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  * src/backend/access/transam/subtrans.c
26  *
27  *-------------------------------------------------------------------------
28  */
29 #include "postgres.h"
30 
31 #include "access/slru.h"
32 #include "access/subtrans.h"
33 #include "access/transam.h"
34 #include "pg_trace.h"
35 #include "utils/snapmgr.h"
36 
37 
38 /*
39  * Defines for SubTrans page sizes.  A page is the same BLCKSZ as is used
40  * everywhere else in Postgres.
41  *
42  * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
43  * SubTrans page numbering also wraps around at
44  * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at
45  * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need take no
46  * explicit notice of that fact in this module, except when comparing segment
47  * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes) and zeroing
48  * them in StartupSUBTRANS.
49  */
50 
51 /* We need four bytes per xact */
52 #define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
53 
54 #define TransactionIdToPage(xid) ((xid) / (TransactionId) SUBTRANS_XACTS_PER_PAGE)
55 #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
56 
57 
58 /*
59  * Link to shared-memory data structures for SUBTRANS control
60  */
61 static SlruCtlData SubTransCtlData;
62 
63 #define SubTransCtl  (&SubTransCtlData)
64 
65 
66 static int	ZeroSUBTRANSPage(int pageno);
67 static bool SubTransPagePrecedes(int page1, int page2);
68 
69 
70 /*
71  * Record the parent of a subtransaction in the subtrans log.
72  */
73 void
SubTransSetParent(TransactionId xid,TransactionId parent)74 SubTransSetParent(TransactionId xid, TransactionId parent)
75 {
76 	int			pageno = TransactionIdToPage(xid);
77 	int			entryno = TransactionIdToEntry(xid);
78 	int			slotno;
79 	TransactionId *ptr;
80 
81 	Assert(TransactionIdIsValid(parent));
82 	Assert(TransactionIdFollows(xid, parent));
83 
84 	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
85 
86 	slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid);
87 	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
88 	ptr += entryno;
89 
90 	/*
91 	 * It's possible we'll try to set the parent xid multiple times but we
92 	 * shouldn't ever be changing the xid from one valid xid to another valid
93 	 * xid, which would corrupt the data structure.
94 	 */
95 	if (*ptr != parent)
96 	{
97 		Assert(*ptr == InvalidTransactionId);
98 		*ptr = parent;
99 		SubTransCtl->shared->page_dirty[slotno] = true;
100 	}
101 
102 	LWLockRelease(SubtransControlLock);
103 }
104 
105 /*
106  * Interrogate the parent of a transaction in the subtrans log.
107  */
108 TransactionId
SubTransGetParent(TransactionId xid)109 SubTransGetParent(TransactionId xid)
110 {
111 	int			pageno = TransactionIdToPage(xid);
112 	int			entryno = TransactionIdToEntry(xid);
113 	int			slotno;
114 	TransactionId *ptr;
115 	TransactionId parent;
116 
117 	/* Can't ask about stuff that might not be around anymore */
118 	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
119 
120 	/* Bootstrap and frozen XIDs have no parent */
121 	if (!TransactionIdIsNormal(xid))
122 		return InvalidTransactionId;
123 
124 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
125 
126 	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
127 	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
128 	ptr += entryno;
129 
130 	parent = *ptr;
131 
132 	LWLockRelease(SubtransControlLock);
133 
134 	return parent;
135 }
136 
137 /*
138  * SubTransGetTopmostTransaction
139  *
140  * Returns the topmost transaction of the given transaction id.
141  *
142  * Because we cannot look back further than TransactionXmin, it is possible
143  * that this function will lie and return an intermediate subtransaction ID
144  * instead of the true topmost parent ID.  This is OK, because in practice
145  * we only care about detecting whether the topmost parent is still running
146  * or is part of a current snapshot's list of still-running transactions.
147  * Therefore, any XID before TransactionXmin is as good as any other.
148  */
149 TransactionId
SubTransGetTopmostTransaction(TransactionId xid)150 SubTransGetTopmostTransaction(TransactionId xid)
151 {
152 	TransactionId parentXid = xid,
153 				previousXid = xid;
154 
155 	/* Can't ask about stuff that might not be around anymore */
156 	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
157 
158 	while (TransactionIdIsValid(parentXid))
159 	{
160 		previousXid = parentXid;
161 		if (TransactionIdPrecedes(parentXid, TransactionXmin))
162 			break;
163 		parentXid = SubTransGetParent(parentXid);
164 
165 		/*
166 		 * By convention the parent xid gets allocated first, so should always
167 		 * precede the child xid. Anything else points to a corrupted data
168 		 * structure that could lead to an infinite loop, so exit.
169 		 */
170 		if (!TransactionIdPrecedes(parentXid, previousXid))
171 			elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
172 				 previousXid, parentXid);
173 	}
174 
175 	Assert(TransactionIdIsValid(previousXid));
176 
177 	return previousXid;
178 }
179 
180 
181 /*
182  * Initialization of shared memory for SUBTRANS
183  */
184 Size
SUBTRANSShmemSize(void)185 SUBTRANSShmemSize(void)
186 {
187 	return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
188 }
189 
190 void
SUBTRANSShmemInit(void)191 SUBTRANSShmemInit(void)
192 {
193 	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
194 	SimpleLruInit(SubTransCtl, "subtrans", NUM_SUBTRANS_BUFFERS, 0,
195 				  SubtransControlLock, "pg_subtrans",
196 				  LWTRANCHE_SUBTRANS_BUFFERS);
197 	/* Override default assumption that writes should be fsync'd */
198 	SubTransCtl->do_fsync = false;
199 	SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
200 }
201 
202 /*
203  * This func must be called ONCE on system install.  It creates
204  * the initial SUBTRANS segment.  (The SUBTRANS directory is assumed to
205  * have been created by the initdb shell script, and SUBTRANSShmemInit
206  * must have been called already.)
207  *
208  * Note: it's not really necessary to create the initial segment now,
209  * since slru.c would create it on first write anyway.  But we may as well
210  * do it to be sure the directory is set up correctly.
211  */
212 void
BootStrapSUBTRANS(void)213 BootStrapSUBTRANS(void)
214 {
215 	int			slotno;
216 
217 	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
218 
219 	/* Create and zero the first page of the subtrans log */
220 	slotno = ZeroSUBTRANSPage(0);
221 
222 	/* Make sure it's written out */
223 	SimpleLruWritePage(SubTransCtl, slotno);
224 	Assert(!SubTransCtl->shared->page_dirty[slotno]);
225 
226 	LWLockRelease(SubtransControlLock);
227 }
228 
229 /*
230  * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
231  *
232  * The page is not actually written, just set up in shared memory.
233  * The slot number of the new page is returned.
234  *
235  * Control lock must be held at entry, and will be held at exit.
236  */
237 static int
ZeroSUBTRANSPage(int pageno)238 ZeroSUBTRANSPage(int pageno)
239 {
240 	return SimpleLruZeroPage(SubTransCtl, pageno);
241 }
242 
243 /*
244  * This must be called ONCE during postmaster or standalone-backend startup,
245  * after StartupXLOG has initialized ShmemVariableCache->nextXid.
246  *
247  * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
248  * if there are none.
249  */
250 void
StartupSUBTRANS(TransactionId oldestActiveXID)251 StartupSUBTRANS(TransactionId oldestActiveXID)
252 {
253 	int			startPage;
254 	int			endPage;
255 
256 	/*
257 	 * Since we don't expect pg_subtrans to be valid across crashes, we
258 	 * initialize the currently-active page(s) to zeroes during startup.
259 	 * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
260 	 * the new page without regard to whatever was previously on disk.
261 	 */
262 	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
263 
264 	startPage = TransactionIdToPage(oldestActiveXID);
265 	endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
266 
267 	while (startPage != endPage)
268 	{
269 		(void) ZeroSUBTRANSPage(startPage);
270 		startPage++;
271 		/* must account for wraparound */
272 		if (startPage > TransactionIdToPage(MaxTransactionId))
273 			startPage = 0;
274 	}
275 	(void) ZeroSUBTRANSPage(startPage);
276 
277 	LWLockRelease(SubtransControlLock);
278 }
279 
280 /*
281  * This must be called ONCE during postmaster or standalone-backend shutdown
282  */
283 void
ShutdownSUBTRANS(void)284 ShutdownSUBTRANS(void)
285 {
286 	/*
287 	 * Flush dirty SUBTRANS pages to disk
288 	 *
289 	 * This is not actually necessary from a correctness point of view. We do
290 	 * it merely as a debugging aid.
291 	 */
292 	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(false);
293 	SimpleLruFlush(SubTransCtl, false);
294 	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(false);
295 }
296 
297 /*
298  * Perform a checkpoint --- either during shutdown, or on-the-fly
299  */
300 void
CheckPointSUBTRANS(void)301 CheckPointSUBTRANS(void)
302 {
303 	/*
304 	 * Flush dirty SUBTRANS pages to disk
305 	 *
306 	 * This is not actually necessary from a correctness point of view. We do
307 	 * it merely to improve the odds that writing of dirty pages is done by
308 	 * the checkpoint process and not by backends.
309 	 */
310 	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
311 	SimpleLruFlush(SubTransCtl, true);
312 	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
313 }
314 
315 
316 /*
317  * Make sure that SUBTRANS has room for a newly-allocated XID.
318  *
319  * NB: this is called while holding XidGenLock.  We want it to be very fast
320  * most of the time; even when it's not so fast, no actual I/O need happen
321  * unless we're forced to write out a dirty subtrans page to make room
322  * in shared memory.
323  */
324 void
ExtendSUBTRANS(TransactionId newestXact)325 ExtendSUBTRANS(TransactionId newestXact)
326 {
327 	int			pageno;
328 
329 	/*
330 	 * No work except at first XID of a page.  But beware: just after
331 	 * wraparound, the first XID of page zero is FirstNormalTransactionId.
332 	 */
333 	if (TransactionIdToEntry(newestXact) != 0 &&
334 		!TransactionIdEquals(newestXact, FirstNormalTransactionId))
335 		return;
336 
337 	pageno = TransactionIdToPage(newestXact);
338 
339 	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
340 
341 	/* Zero the page */
342 	ZeroSUBTRANSPage(pageno);
343 
344 	LWLockRelease(SubtransControlLock);
345 }
346 
347 
348 /*
349  * Remove all SUBTRANS segments before the one holding the passed transaction ID
350  *
351  * oldestXact is the oldest TransactionXmin of any running transaction.  This
352  * is called only during checkpoint.
353  */
354 void
TruncateSUBTRANS(TransactionId oldestXact)355 TruncateSUBTRANS(TransactionId oldestXact)
356 {
357 	int			cutoffPage;
358 
359 	/*
360 	 * The cutoff point is the start of the segment containing oldestXact. We
361 	 * pass the *page* containing oldestXact to SimpleLruTruncate.  We step
362 	 * back one transaction to avoid passing a cutoff page that hasn't been
363 	 * created yet in the rare case that oldestXact would be the first item on
364 	 * a page and oldestXact == next XID.  In that case, if we didn't subtract
365 	 * one, we'd trigger SimpleLruTruncate's wraparound detection.
366 	 */
367 	TransactionIdRetreat(oldestXact);
368 	cutoffPage = TransactionIdToPage(oldestXact);
369 
370 	SimpleLruTruncate(SubTransCtl, cutoffPage);
371 }
372 
373 
374 /*
375  * Decide whether a SUBTRANS page number is "older" for truncation purposes.
376  * Analogous to CLOGPagePrecedes().
377  */
378 static bool
SubTransPagePrecedes(int page1,int page2)379 SubTransPagePrecedes(int page1, int page2)
380 {
381 	TransactionId xid1;
382 	TransactionId xid2;
383 
384 	xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE;
385 	xid1 += FirstNormalTransactionId + 1;
386 	xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE;
387 	xid2 += FirstNormalTransactionId + 1;
388 
389 	return (TransactionIdPrecedes(xid1, xid2) &&
390 			TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1));
391 }
392