1 /*-------------------------------------------------------------------------
2  *
3  * timeline.c
4  *		Functions for reading and writing timeline history files.
5  *
6  * A timeline history file lists the timeline changes of the timeline, in
7  * a simple text format. They are archived along with the WAL segments.
8  *
9  * The files are named like "<tli>.history". For example, if the database
10  * starts up and switches to timeline 5, the timeline history file would be
11  * called "00000005.history".
12  *
13  * Each line in the file represents a timeline switch:
14  *
15  * <parentTLI> <switchpoint> <reason>
16  *
17  *	parentTLI	ID of the parent timeline
18  *	switchpoint XLogRecPtr of the WAL location where the switch happened
19  *	reason		human-readable explanation of why the timeline was changed
20  *
21  * The fields are separated by tabs. Lines beginning with # are comments, and
22  * are ignored. Empty lines are also ignored.
23  *
24  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  * src/backend/access/transam/timeline.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include <sys/stat.h>
35 #include <unistd.h>
36 
37 #include "access/timeline.h"
38 #include "access/xlog.h"
39 #include "access/xlog_internal.h"
40 #include "access/xlogarchive.h"
41 #include "access/xlogdefs.h"
42 #include "pgstat.h"
43 #include "storage/fd.h"
44 
45 /*
46  * Copies all timeline history files with id's between 'begin' and 'end'
47  * from archive to pg_wal.
48  */
49 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)50 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
51 {
52 	char		path[MAXPGPATH];
53 	char		histfname[MAXFNAMELEN];
54 	TimeLineID	tli;
55 
56 	for (tli = begin; tli < end; tli++)
57 	{
58 		if (tli == 1)
59 			continue;
60 
61 		TLHistoryFileName(histfname, tli);
62 		if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
63 			KeepFileRestoredFromArchive(path, histfname);
64 	}
65 }
66 
67 /*
68  * Try to read a timeline's history file.
69  *
70  * If successful, return the list of component TLIs (the given TLI followed by
71  * its ancestor TLIs).  If we can't find the history file, assume that the
72  * timeline has no parents, and return a list of just the specified timeline
73  * ID.
74  */
75 List *
readTimeLineHistory(TimeLineID targetTLI)76 readTimeLineHistory(TimeLineID targetTLI)
77 {
78 	List	   *result;
79 	char		path[MAXPGPATH];
80 	char		histfname[MAXFNAMELEN];
81 	FILE	   *fd;
82 	TimeLineHistoryEntry *entry;
83 	TimeLineID	lasttli = 0;
84 	XLogRecPtr	prevend;
85 	bool		fromArchive = false;
86 
87 	/* Timeline 1 does not have a history file, so no need to check */
88 	if (targetTLI == 1)
89 	{
90 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
91 		entry->tli = targetTLI;
92 		entry->begin = entry->end = InvalidXLogRecPtr;
93 		return list_make1(entry);
94 	}
95 
96 	if (ArchiveRecoveryRequested)
97 	{
98 		TLHistoryFileName(histfname, targetTLI);
99 		fromArchive =
100 			RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
101 	}
102 	else
103 		TLHistoryFilePath(path, targetTLI);
104 
105 	fd = AllocateFile(path, "r");
106 	if (fd == NULL)
107 	{
108 		if (errno != ENOENT)
109 			ereport(FATAL,
110 					(errcode_for_file_access(),
111 					 errmsg("could not open file \"%s\": %m", path)));
112 		/* Not there, so assume no parents */
113 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
114 		entry->tli = targetTLI;
115 		entry->begin = entry->end = InvalidXLogRecPtr;
116 		return list_make1(entry);
117 	}
118 
119 	result = NIL;
120 
121 	/*
122 	 * Parse the file...
123 	 */
124 	prevend = InvalidXLogRecPtr;
125 	for (;;)
126 	{
127 		char		fline[MAXPGPATH];
128 		char	   *res;
129 		char	   *ptr;
130 		TimeLineID	tli;
131 		uint32		switchpoint_hi;
132 		uint32		switchpoint_lo;
133 		int			nfields;
134 
135 		pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
136 		res = fgets(fline, sizeof(fline), fd);
137 		pgstat_report_wait_end();
138 		if (res == NULL)
139 		{
140 			if (ferror(fd))
141 				ereport(ERROR,
142 						(errcode_for_file_access(),
143 						 errmsg("could not read file \"%s\": %m", path)));
144 
145 			break;
146 		}
147 
148 		/* skip leading whitespace and check for # comment */
149 		for (ptr = fline; *ptr; ptr++)
150 		{
151 			if (!isspace((unsigned char) *ptr))
152 				break;
153 		}
154 		if (*ptr == '\0' || *ptr == '#')
155 			continue;
156 
157 		nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
158 
159 		if (nfields < 1)
160 		{
161 			/* expect a numeric timeline ID as first field of line */
162 			ereport(FATAL,
163 					(errmsg("syntax error in history file: %s", fline),
164 					 errhint("Expected a numeric timeline ID.")));
165 		}
166 		if (nfields != 3)
167 			ereport(FATAL,
168 					(errmsg("syntax error in history file: %s", fline),
169 					 errhint("Expected a write-ahead log switchpoint location.")));
170 
171 		if (result && tli <= lasttli)
172 			ereport(FATAL,
173 					(errmsg("invalid data in history file: %s", fline),
174 					 errhint("Timeline IDs must be in increasing sequence.")));
175 
176 		lasttli = tli;
177 
178 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
179 		entry->tli = tli;
180 		entry->begin = prevend;
181 		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
182 		prevend = entry->end;
183 
184 		/* Build list with newest item first */
185 		result = lcons(entry, result);
186 
187 		/* we ignore the remainder of each line */
188 	}
189 
190 	FreeFile(fd);
191 
192 	if (result && targetTLI <= lasttli)
193 		ereport(FATAL,
194 				(errmsg("invalid data in history file \"%s\"", path),
195 				 errhint("Timeline IDs must be less than child timeline's ID.")));
196 
197 	/*
198 	 * Create one more entry for the "tip" of the timeline, which has no entry
199 	 * in the history file.
200 	 */
201 	entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
202 	entry->tli = targetTLI;
203 	entry->begin = prevend;
204 	entry->end = InvalidXLogRecPtr;
205 
206 	result = lcons(entry, result);
207 
208 	/*
209 	 * If the history file was fetched from archive, save it in pg_wal for
210 	 * future reference.
211 	 */
212 	if (fromArchive)
213 		KeepFileRestoredFromArchive(path, histfname);
214 
215 	return result;
216 }
217 
218 /*
219  * Probe whether a timeline history file exists for the given timeline ID
220  */
221 bool
existsTimeLineHistory(TimeLineID probeTLI)222 existsTimeLineHistory(TimeLineID probeTLI)
223 {
224 	char		path[MAXPGPATH];
225 	char		histfname[MAXFNAMELEN];
226 	FILE	   *fd;
227 
228 	/* Timeline 1 does not have a history file, so no need to check */
229 	if (probeTLI == 1)
230 		return false;
231 
232 	if (ArchiveRecoveryRequested)
233 	{
234 		TLHistoryFileName(histfname, probeTLI);
235 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
236 	}
237 	else
238 		TLHistoryFilePath(path, probeTLI);
239 
240 	fd = AllocateFile(path, "r");
241 	if (fd != NULL)
242 	{
243 		FreeFile(fd);
244 		return true;
245 	}
246 	else
247 	{
248 		if (errno != ENOENT)
249 			ereport(FATAL,
250 					(errcode_for_file_access(),
251 					 errmsg("could not open file \"%s\": %m", path)));
252 		return false;
253 	}
254 }
255 
256 /*
257  * Find the newest existing timeline, assuming that startTLI exists.
258  *
259  * Note: while this is somewhat heuristic, it does positively guarantee
260  * that (result + 1) is not a known timeline, and therefore it should
261  * be safe to assign that ID to a new timeline.
262  */
263 TimeLineID
findNewestTimeLine(TimeLineID startTLI)264 findNewestTimeLine(TimeLineID startTLI)
265 {
266 	TimeLineID	newestTLI;
267 	TimeLineID	probeTLI;
268 
269 	/*
270 	 * The algorithm is just to probe for the existence of timeline history
271 	 * files.  XXX is it useful to allow gaps in the sequence?
272 	 */
273 	newestTLI = startTLI;
274 
275 	for (probeTLI = startTLI + 1;; probeTLI++)
276 	{
277 		if (existsTimeLineHistory(probeTLI))
278 		{
279 			newestTLI = probeTLI;	/* probeTLI exists */
280 		}
281 		else
282 		{
283 			/* doesn't exist, assume we're done */
284 			break;
285 		}
286 	}
287 
288 	return newestTLI;
289 }
290 
291 /*
292  * Create a new timeline history file.
293  *
294  *	newTLI: ID of the new timeline
295  *	parentTLI: ID of its immediate parent
296  *	switchpoint: WAL location where the system switched to the new timeline
297  *	reason: human-readable explanation of why the timeline was switched
298  *
299  * Currently this is only used at the end recovery, and so there are no locking
300  * considerations.  But we should be just as tense as XLogFileInit to avoid
301  * emplacing a bogus file.
302  */
303 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)304 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
305 					 XLogRecPtr switchpoint, char *reason)
306 {
307 	char		path[MAXPGPATH];
308 	char		tmppath[MAXPGPATH];
309 	char		histfname[MAXFNAMELEN];
310 	char		buffer[BLCKSZ];
311 	int			srcfd;
312 	int			fd;
313 	int			nbytes;
314 
315 	Assert(newTLI > parentTLI); /* else bad selection of newTLI */
316 
317 	/*
318 	 * Write into a temp file name.
319 	 */
320 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
321 
322 	unlink(tmppath);
323 
324 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
325 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
326 	if (fd < 0)
327 		ereport(ERROR,
328 				(errcode_for_file_access(),
329 				 errmsg("could not create file \"%s\": %m", tmppath)));
330 
331 	/*
332 	 * If a history file exists for the parent, copy it verbatim
333 	 */
334 	if (ArchiveRecoveryRequested)
335 	{
336 		TLHistoryFileName(histfname, parentTLI);
337 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
338 	}
339 	else
340 		TLHistoryFilePath(path, parentTLI);
341 
342 	srcfd = OpenTransientFile(path, O_RDONLY);
343 	if (srcfd < 0)
344 	{
345 		if (errno != ENOENT)
346 			ereport(ERROR,
347 					(errcode_for_file_access(),
348 					 errmsg("could not open file \"%s\": %m", path)));
349 		/* Not there, so assume parent has no parents */
350 	}
351 	else
352 	{
353 		for (;;)
354 		{
355 			errno = 0;
356 			pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
357 			nbytes = (int) read(srcfd, buffer, sizeof(buffer));
358 			pgstat_report_wait_end();
359 			if (nbytes < 0 || errno != 0)
360 				ereport(ERROR,
361 						(errcode_for_file_access(),
362 						 errmsg("could not read file \"%s\": %m", path)));
363 			if (nbytes == 0)
364 				break;
365 			errno = 0;
366 			pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
367 			if ((int) write(fd, buffer, nbytes) != nbytes)
368 			{
369 				int			save_errno = errno;
370 
371 				/*
372 				 * If we fail to make the file, delete it to release disk
373 				 * space
374 				 */
375 				unlink(tmppath);
376 
377 				/*
378 				 * if write didn't set errno, assume problem is no disk space
379 				 */
380 				errno = save_errno ? save_errno : ENOSPC;
381 
382 				ereport(ERROR,
383 						(errcode_for_file_access(),
384 						 errmsg("could not write to file \"%s\": %m", tmppath)));
385 			}
386 			pgstat_report_wait_end();
387 		}
388 
389 		if (CloseTransientFile(srcfd) != 0)
390 			ereport(ERROR,
391 					(errcode_for_file_access(),
392 					 errmsg("could not close file \"%s\": %m", path)));
393 	}
394 
395 	/*
396 	 * Append one line with the details of this timeline split.
397 	 *
398 	 * If we did have a parent file, insert an extra newline just in case the
399 	 * parent file failed to end with one.
400 	 */
401 	snprintf(buffer, sizeof(buffer),
402 			 "%s%u\t%X/%X\t%s\n",
403 			 (srcfd < 0) ? "" : "\n",
404 			 parentTLI,
405 			 LSN_FORMAT_ARGS(switchpoint),
406 			 reason);
407 
408 	nbytes = strlen(buffer);
409 	errno = 0;
410 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
411 	if ((int) write(fd, buffer, nbytes) != nbytes)
412 	{
413 		int			save_errno = errno;
414 
415 		/*
416 		 * If we fail to make the file, delete it to release disk space
417 		 */
418 		unlink(tmppath);
419 		/* if write didn't set errno, assume problem is no disk space */
420 		errno = save_errno ? save_errno : ENOSPC;
421 
422 		ereport(ERROR,
423 				(errcode_for_file_access(),
424 				 errmsg("could not write to file \"%s\": %m", tmppath)));
425 	}
426 	pgstat_report_wait_end();
427 
428 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
429 	if (pg_fsync(fd) != 0)
430 		ereport(data_sync_elevel(ERROR),
431 				(errcode_for_file_access(),
432 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
433 	pgstat_report_wait_end();
434 
435 	if (CloseTransientFile(fd) != 0)
436 		ereport(ERROR,
437 				(errcode_for_file_access(),
438 				 errmsg("could not close file \"%s\": %m", tmppath)));
439 
440 	/*
441 	 * Now move the completed history file into place with its final name.
442 	 */
443 	TLHistoryFilePath(path, newTLI);
444 
445 	/*
446 	 * Perform the rename using link if available, paranoidly trying to avoid
447 	 * overwriting an existing file (there shouldn't be one).
448 	 */
449 	durable_rename_excl(tmppath, path, ERROR);
450 
451 	/* The history file can be archived immediately. */
452 	if (XLogArchivingActive())
453 	{
454 		TLHistoryFileName(histfname, newTLI);
455 		XLogArchiveNotify(histfname);
456 	}
457 }
458 
459 /*
460  * Writes a history file for given timeline and contents.
461  *
462  * Currently this is only used in the walreceiver process, and so there are
463  * no locking considerations.  But we should be just as tense as XLogFileInit
464  * to avoid emplacing a bogus file.
465  */
466 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)467 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
468 {
469 	char		path[MAXPGPATH];
470 	char		tmppath[MAXPGPATH];
471 	int			fd;
472 
473 	/*
474 	 * Write into a temp file name.
475 	 */
476 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
477 
478 	unlink(tmppath);
479 
480 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
481 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
482 	if (fd < 0)
483 		ereport(ERROR,
484 				(errcode_for_file_access(),
485 				 errmsg("could not create file \"%s\": %m", tmppath)));
486 
487 	errno = 0;
488 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
489 	if ((int) write(fd, content, size) != size)
490 	{
491 		int			save_errno = errno;
492 
493 		/*
494 		 * If we fail to make the file, delete it to release disk space
495 		 */
496 		unlink(tmppath);
497 		/* if write didn't set errno, assume problem is no disk space */
498 		errno = save_errno ? save_errno : ENOSPC;
499 
500 		ereport(ERROR,
501 				(errcode_for_file_access(),
502 				 errmsg("could not write to file \"%s\": %m", tmppath)));
503 	}
504 	pgstat_report_wait_end();
505 
506 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
507 	if (pg_fsync(fd) != 0)
508 		ereport(data_sync_elevel(ERROR),
509 				(errcode_for_file_access(),
510 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
511 	pgstat_report_wait_end();
512 
513 	if (CloseTransientFile(fd) != 0)
514 		ereport(ERROR,
515 				(errcode_for_file_access(),
516 				 errmsg("could not close file \"%s\": %m", tmppath)));
517 
518 	/*
519 	 * Now move the completed history file into place with its final name.
520 	 */
521 	TLHistoryFilePath(path, tli);
522 
523 	/*
524 	 * Perform the rename using link if available, paranoidly trying to avoid
525 	 * overwriting an existing file (there shouldn't be one).
526 	 */
527 	durable_rename_excl(tmppath, path, ERROR);
528 }
529 
530 /*
531  * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
532  */
533 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)534 tliInHistory(TimeLineID tli, List *expectedTLEs)
535 {
536 	ListCell   *cell;
537 
538 	foreach(cell, expectedTLEs)
539 	{
540 		if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
541 			return true;
542 	}
543 
544 	return false;
545 }
546 
547 /*
548  * Returns the ID of the timeline in use at a particular point in time, in
549  * the given timeline history.
550  */
551 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)552 tliOfPointInHistory(XLogRecPtr ptr, List *history)
553 {
554 	ListCell   *cell;
555 
556 	foreach(cell, history)
557 	{
558 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
559 
560 		if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
561 			(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
562 		{
563 			/* found it */
564 			return tle->tli;
565 		}
566 	}
567 
568 	/* shouldn't happen. */
569 	elog(ERROR, "timeline history was not contiguous");
570 	return 0;					/* keep compiler quiet */
571 }
572 
573 /*
574  * Returns the point in history where we branched off the given timeline,
575  * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
576  * the timeline is current, ie. we have not branched off from it, and throws
577  * an error if the timeline is not part of this server's history.
578  */
579 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)580 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
581 {
582 	ListCell   *cell;
583 
584 	if (nextTLI)
585 		*nextTLI = 0;
586 	foreach(cell, history)
587 	{
588 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
589 
590 		if (tle->tli == tli)
591 			return tle->end;
592 		if (nextTLI)
593 			*nextTLI = tle->tli;
594 	}
595 
596 	ereport(ERROR,
597 			(errmsg("requested timeline %u is not in this server's history",
598 					tli)));
599 	return InvalidXLogRecPtr;	/* keep compiler quiet */
600 }
601