1 /*-------------------------------------------------------------------------
2  *
3  * timeline.c
4  *		Functions for reading and writing timeline history files.
5  *
6  * A timeline history file lists the timeline changes of the timeline, in
7  * a simple text format. They are archived along with the WAL segments.
8  *
9  * The files are named like "<tli>.history". For example, if the database
10  * starts up and switches to timeline 5, the timeline history file would be
11  * called "00000005.history".
12  *
13  * Each line in the file represents a timeline switch:
14  *
15  * <parentTLI> <switchpoint> <reason>
16  *
17  *	parentTLI	ID of the parent timeline
18  *	switchpoint XLogRecPtr of the WAL location where the switch happened
19  *	reason		human-readable explanation of why the timeline was changed
20  *
21  * The fields are separated by tabs. Lines beginning with # are comments, and
22  * are ignored. Empty lines are also ignored.
23  *
24  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  * src/backend/access/transam/timeline.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include <sys/stat.h>
35 #include <unistd.h>
36 
37 #include "access/timeline.h"
38 #include "access/xlog.h"
39 #include "access/xlog_internal.h"
40 #include "access/xlogdefs.h"
41 #include "pgstat.h"
42 #include "storage/fd.h"
43 
44 /*
45  * Copies all timeline history files with id's between 'begin' and 'end'
46  * from archive to pg_wal.
47  */
48 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)49 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50 {
51 	char		path[MAXPGPATH];
52 	char		histfname[MAXFNAMELEN];
53 	TimeLineID	tli;
54 
55 	for (tli = begin; tli < end; tli++)
56 	{
57 		if (tli == 1)
58 			continue;
59 
60 		TLHistoryFileName(histfname, tli);
61 		if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62 			KeepFileRestoredFromArchive(path, histfname);
63 	}
64 }
65 
66 /*
67  * Try to read a timeline's history file.
68  *
69  * If successful, return the list of component TLIs (the given TLI followed by
70  * its ancestor TLIs).  If we can't find the history file, assume that the
71  * timeline has no parents, and return a list of just the specified timeline
72  * ID.
73  */
74 List *
readTimeLineHistory(TimeLineID targetTLI)75 readTimeLineHistory(TimeLineID targetTLI)
76 {
77 	List	   *result;
78 	char		path[MAXPGPATH];
79 	char		histfname[MAXFNAMELEN];
80 	FILE	   *fd;
81 	TimeLineHistoryEntry *entry;
82 	TimeLineID	lasttli = 0;
83 	XLogRecPtr	prevend;
84 	bool		fromArchive = false;
85 
86 	/* Timeline 1 does not have a history file, so no need to check */
87 	if (targetTLI == 1)
88 	{
89 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
90 		entry->tli = targetTLI;
91 		entry->begin = entry->end = InvalidXLogRecPtr;
92 		return list_make1(entry);
93 	}
94 
95 	if (ArchiveRecoveryRequested)
96 	{
97 		TLHistoryFileName(histfname, targetTLI);
98 		fromArchive =
99 			RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
100 	}
101 	else
102 		TLHistoryFilePath(path, targetTLI);
103 
104 	fd = AllocateFile(path, "r");
105 	if (fd == NULL)
106 	{
107 		if (errno != ENOENT)
108 			ereport(FATAL,
109 					(errcode_for_file_access(),
110 					 errmsg("could not open file \"%s\": %m", path)));
111 		/* Not there, so assume no parents */
112 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
113 		entry->tli = targetTLI;
114 		entry->begin = entry->end = InvalidXLogRecPtr;
115 		return list_make1(entry);
116 	}
117 
118 	result = NIL;
119 
120 	/*
121 	 * Parse the file...
122 	 */
123 	prevend = InvalidXLogRecPtr;
124 	for (;;)
125 	{
126 		char		fline[MAXPGPATH];
127 		char	   *res;
128 		char	   *ptr;
129 		TimeLineID	tli;
130 		uint32		switchpoint_hi;
131 		uint32		switchpoint_lo;
132 		int			nfields;
133 
134 		pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
135 		res = fgets(fline, sizeof(fline), fd);
136 		pgstat_report_wait_end();
137 		if (res == NULL)
138 		{
139 			if (ferror(fd))
140 				ereport(ERROR,
141 						(errcode_for_file_access(),
142 						 errmsg("could not read file \"%s\": %m", path)));
143 
144 			break;
145 		}
146 
147 		/* skip leading whitespace and check for # comment */
148 		for (ptr = fline; *ptr; ptr++)
149 		{
150 			if (!isspace((unsigned char) *ptr))
151 				break;
152 		}
153 		if (*ptr == '\0' || *ptr == '#')
154 			continue;
155 
156 		nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
157 
158 		if (nfields < 1)
159 		{
160 			/* expect a numeric timeline ID as first field of line */
161 			ereport(FATAL,
162 					(errmsg("syntax error in history file: %s", fline),
163 					 errhint("Expected a numeric timeline ID.")));
164 		}
165 		if (nfields != 3)
166 			ereport(FATAL,
167 					(errmsg("syntax error in history file: %s", fline),
168 					 errhint("Expected a write-ahead log switchpoint location.")));
169 
170 		if (result && tli <= lasttli)
171 			ereport(FATAL,
172 					(errmsg("invalid data in history file: %s", fline),
173 					 errhint("Timeline IDs must be in increasing sequence.")));
174 
175 		lasttli = tli;
176 
177 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
178 		entry->tli = tli;
179 		entry->begin = prevend;
180 		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
181 		prevend = entry->end;
182 
183 		/* Build list with newest item first */
184 		result = lcons(entry, result);
185 
186 		/* we ignore the remainder of each line */
187 	}
188 
189 	FreeFile(fd);
190 
191 	if (result && targetTLI <= lasttli)
192 		ereport(FATAL,
193 				(errmsg("invalid data in history file \"%s\"", path),
194 				 errhint("Timeline IDs must be less than child timeline's ID.")));
195 
196 	/*
197 	 * Create one more entry for the "tip" of the timeline, which has no entry
198 	 * in the history file.
199 	 */
200 	entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
201 	entry->tli = targetTLI;
202 	entry->begin = prevend;
203 	entry->end = InvalidXLogRecPtr;
204 
205 	result = lcons(entry, result);
206 
207 	/*
208 	 * If the history file was fetched from archive, save it in pg_wal for
209 	 * future reference.
210 	 */
211 	if (fromArchive)
212 		KeepFileRestoredFromArchive(path, histfname);
213 
214 	return result;
215 }
216 
217 /*
218  * Probe whether a timeline history file exists for the given timeline ID
219  */
220 bool
existsTimeLineHistory(TimeLineID probeTLI)221 existsTimeLineHistory(TimeLineID probeTLI)
222 {
223 	char		path[MAXPGPATH];
224 	char		histfname[MAXFNAMELEN];
225 	FILE	   *fd;
226 
227 	/* Timeline 1 does not have a history file, so no need to check */
228 	if (probeTLI == 1)
229 		return false;
230 
231 	if (ArchiveRecoveryRequested)
232 	{
233 		TLHistoryFileName(histfname, probeTLI);
234 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
235 	}
236 	else
237 		TLHistoryFilePath(path, probeTLI);
238 
239 	fd = AllocateFile(path, "r");
240 	if (fd != NULL)
241 	{
242 		FreeFile(fd);
243 		return true;
244 	}
245 	else
246 	{
247 		if (errno != ENOENT)
248 			ereport(FATAL,
249 					(errcode_for_file_access(),
250 					 errmsg("could not open file \"%s\": %m", path)));
251 		return false;
252 	}
253 }
254 
255 /*
256  * Find the newest existing timeline, assuming that startTLI exists.
257  *
258  * Note: while this is somewhat heuristic, it does positively guarantee
259  * that (result + 1) is not a known timeline, and therefore it should
260  * be safe to assign that ID to a new timeline.
261  */
262 TimeLineID
findNewestTimeLine(TimeLineID startTLI)263 findNewestTimeLine(TimeLineID startTLI)
264 {
265 	TimeLineID	newestTLI;
266 	TimeLineID	probeTLI;
267 
268 	/*
269 	 * The algorithm is just to probe for the existence of timeline history
270 	 * files.  XXX is it useful to allow gaps in the sequence?
271 	 */
272 	newestTLI = startTLI;
273 
274 	for (probeTLI = startTLI + 1;; probeTLI++)
275 	{
276 		if (existsTimeLineHistory(probeTLI))
277 		{
278 			newestTLI = probeTLI;	/* probeTLI exists */
279 		}
280 		else
281 		{
282 			/* doesn't exist, assume we're done */
283 			break;
284 		}
285 	}
286 
287 	return newestTLI;
288 }
289 
290 /*
291  * Create a new timeline history file.
292  *
293  *	newTLI: ID of the new timeline
294  *	parentTLI: ID of its immediate parent
295  *	switchpoint: WAL location where the system switched to the new timeline
296  *	reason: human-readable explanation of why the timeline was switched
297  *
298  * Currently this is only used at the end recovery, and so there are no locking
299  * considerations.  But we should be just as tense as XLogFileInit to avoid
300  * emplacing a bogus file.
301  */
302 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)303 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
304 					 XLogRecPtr switchpoint, char *reason)
305 {
306 	char		path[MAXPGPATH];
307 	char		tmppath[MAXPGPATH];
308 	char		histfname[MAXFNAMELEN];
309 	char		buffer[BLCKSZ];
310 	int			srcfd;
311 	int			fd;
312 	int			nbytes;
313 
314 	Assert(newTLI > parentTLI); /* else bad selection of newTLI */
315 
316 	/*
317 	 * Write into a temp file name.
318 	 */
319 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
320 
321 	unlink(tmppath);
322 
323 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
324 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
325 	if (fd < 0)
326 		ereport(ERROR,
327 				(errcode_for_file_access(),
328 				 errmsg("could not create file \"%s\": %m", tmppath)));
329 
330 	/*
331 	 * If a history file exists for the parent, copy it verbatim
332 	 */
333 	if (ArchiveRecoveryRequested)
334 	{
335 		TLHistoryFileName(histfname, parentTLI);
336 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
337 	}
338 	else
339 		TLHistoryFilePath(path, parentTLI);
340 
341 	srcfd = OpenTransientFile(path, O_RDONLY);
342 	if (srcfd < 0)
343 	{
344 		if (errno != ENOENT)
345 			ereport(ERROR,
346 					(errcode_for_file_access(),
347 					 errmsg("could not open file \"%s\": %m", path)));
348 		/* Not there, so assume parent has no parents */
349 	}
350 	else
351 	{
352 		for (;;)
353 		{
354 			errno = 0;
355 			pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
356 			nbytes = (int) read(srcfd, buffer, sizeof(buffer));
357 			pgstat_report_wait_end();
358 			if (nbytes < 0 || errno != 0)
359 				ereport(ERROR,
360 						(errcode_for_file_access(),
361 						 errmsg("could not read file \"%s\": %m", path)));
362 			if (nbytes == 0)
363 				break;
364 			errno = 0;
365 			pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
366 			if ((int) write(fd, buffer, nbytes) != nbytes)
367 			{
368 				int			save_errno = errno;
369 
370 				/*
371 				 * If we fail to make the file, delete it to release disk
372 				 * space
373 				 */
374 				unlink(tmppath);
375 
376 				/*
377 				 * if write didn't set errno, assume problem is no disk space
378 				 */
379 				errno = save_errno ? save_errno : ENOSPC;
380 
381 				ereport(ERROR,
382 						(errcode_for_file_access(),
383 						 errmsg("could not write to file \"%s\": %m", tmppath)));
384 			}
385 			pgstat_report_wait_end();
386 		}
387 		CloseTransientFile(srcfd);
388 	}
389 
390 	/*
391 	 * Append one line with the details of this timeline split.
392 	 *
393 	 * If we did have a parent file, insert an extra newline just in case the
394 	 * parent file failed to end with one.
395 	 */
396 	snprintf(buffer, sizeof(buffer),
397 			 "%s%u\t%X/%X\t%s\n",
398 			 (srcfd < 0) ? "" : "\n",
399 			 parentTLI,
400 			 (uint32) (switchpoint >> 32), (uint32) (switchpoint),
401 			 reason);
402 
403 	nbytes = strlen(buffer);
404 	errno = 0;
405 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
406 	if ((int) write(fd, buffer, nbytes) != nbytes)
407 	{
408 		int			save_errno = errno;
409 
410 		/*
411 		 * If we fail to make the file, delete it to release disk space
412 		 */
413 		unlink(tmppath);
414 		/* if write didn't set errno, assume problem is no disk space */
415 		errno = save_errno ? save_errno : ENOSPC;
416 
417 		ereport(ERROR,
418 				(errcode_for_file_access(),
419 				 errmsg("could not write to file \"%s\": %m", tmppath)));
420 	}
421 	pgstat_report_wait_end();
422 
423 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
424 	if (pg_fsync(fd) != 0)
425 		ereport(data_sync_elevel(ERROR),
426 				(errcode_for_file_access(),
427 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
428 	pgstat_report_wait_end();
429 
430 	if (CloseTransientFile(fd))
431 		ereport(ERROR,
432 				(errcode_for_file_access(),
433 				 errmsg("could not close file \"%s\": %m", tmppath)));
434 
435 
436 	/*
437 	 * Now move the completed history file into place with its final name.
438 	 */
439 	TLHistoryFilePath(path, newTLI);
440 
441 	/*
442 	 * Perform the rename using link if available, paranoidly trying to avoid
443 	 * overwriting an existing file (there shouldn't be one).
444 	 */
445 	durable_link_or_rename(tmppath, path, ERROR);
446 
447 	/* The history file can be archived immediately. */
448 	if (XLogArchivingActive())
449 	{
450 		TLHistoryFileName(histfname, newTLI);
451 		XLogArchiveNotify(histfname);
452 	}
453 }
454 
455 /*
456  * Writes a history file for given timeline and contents.
457  *
458  * Currently this is only used in the walreceiver process, and so there are
459  * no locking considerations.  But we should be just as tense as XLogFileInit
460  * to avoid emplacing a bogus file.
461  */
462 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)463 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
464 {
465 	char		path[MAXPGPATH];
466 	char		tmppath[MAXPGPATH];
467 	int			fd;
468 
469 	/*
470 	 * Write into a temp file name.
471 	 */
472 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
473 
474 	unlink(tmppath);
475 
476 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
477 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
478 	if (fd < 0)
479 		ereport(ERROR,
480 				(errcode_for_file_access(),
481 				 errmsg("could not create file \"%s\": %m", tmppath)));
482 
483 	errno = 0;
484 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
485 	if ((int) write(fd, content, size) != size)
486 	{
487 		int			save_errno = errno;
488 
489 		/*
490 		 * If we fail to make the file, delete it to release disk space
491 		 */
492 		unlink(tmppath);
493 		/* if write didn't set errno, assume problem is no disk space */
494 		errno = save_errno ? save_errno : ENOSPC;
495 
496 		ereport(ERROR,
497 				(errcode_for_file_access(),
498 				 errmsg("could not write to file \"%s\": %m", tmppath)));
499 	}
500 	pgstat_report_wait_end();
501 
502 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
503 	if (pg_fsync(fd) != 0)
504 		ereport(data_sync_elevel(ERROR),
505 				(errcode_for_file_access(),
506 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
507 	pgstat_report_wait_end();
508 
509 	if (CloseTransientFile(fd))
510 		ereport(ERROR,
511 				(errcode_for_file_access(),
512 				 errmsg("could not close file \"%s\": %m", tmppath)));
513 
514 
515 	/*
516 	 * Now move the completed history file into place with its final name.
517 	 */
518 	TLHistoryFilePath(path, tli);
519 
520 	/*
521 	 * Perform the rename using link if available, paranoidly trying to avoid
522 	 * overwriting an existing file (there shouldn't be one).
523 	 */
524 	durable_link_or_rename(tmppath, path, ERROR);
525 }
526 
527 /*
528  * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
529  */
530 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)531 tliInHistory(TimeLineID tli, List *expectedTLEs)
532 {
533 	ListCell   *cell;
534 
535 	foreach(cell, expectedTLEs)
536 	{
537 		if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
538 			return true;
539 	}
540 
541 	return false;
542 }
543 
544 /*
545  * Returns the ID of the timeline in use at a particular point in time, in
546  * the given timeline history.
547  */
548 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)549 tliOfPointInHistory(XLogRecPtr ptr, List *history)
550 {
551 	ListCell   *cell;
552 
553 	foreach(cell, history)
554 	{
555 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
556 
557 		if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
558 			(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
559 		{
560 			/* found it */
561 			return tle->tli;
562 		}
563 	}
564 
565 	/* shouldn't happen. */
566 	elog(ERROR, "timeline history was not contiguous");
567 	return 0;					/* keep compiler quiet */
568 }
569 
570 /*
571  * Returns the point in history where we branched off the given timeline,
572  * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
573  * the timeline is current, ie. we have not branched off from it, and throws
574  * an error if the timeline is not part of this server's history.
575  */
576 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)577 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
578 {
579 	ListCell   *cell;
580 
581 	if (nextTLI)
582 		*nextTLI = 0;
583 	foreach(cell, history)
584 	{
585 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
586 
587 		if (tle->tli == tli)
588 			return tle->end;
589 		if (nextTLI)
590 			*nextTLI = tle->tli;
591 	}
592 
593 	ereport(ERROR,
594 			(errmsg("requested timeline %u is not in this server's history",
595 					tli)));
596 	return InvalidXLogRecPtr;	/* keep compiler quiet */
597 }
598