1 /*-------------------------------------------------------------------------
2  *
3  * timeline.c
4  *		Functions for reading and writing timeline history files.
5  *
6  * A timeline history file lists the timeline changes of the timeline, in
7  * a simple text format. They are archived along with the WAL segments.
8  *
9  * The files are named like "<tli>.history". For example, if the database
10  * starts up and switches to timeline 5, the timeline history file would be
11  * called "00000005.history".
12  *
13  * Each line in the file represents a timeline switch:
14  *
15  * <parentTLI> <switchpoint> <reason>
16  *
17  *	parentTLI	ID of the parent timeline
18  *	switchpoint XLogRecPtr of the WAL location where the switch happened
19  *	reason		human-readable explanation of why the timeline was changed
20  *
21  * The fields are separated by tabs. Lines beginning with # are comments, and
22  * are ignored. Empty lines are also ignored.
23  *
24  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  * src/backend/access/transam/timeline.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include <sys/stat.h>
35 #include <unistd.h>
36 
37 #include "access/timeline.h"
38 #include "access/xlog.h"
39 #include "access/xlog_internal.h"
40 #include "access/xlogdefs.h"
41 #include "pgstat.h"
42 #include "storage/fd.h"
43 
44 /*
45  * Copies all timeline history files with id's between 'begin' and 'end'
46  * from archive to pg_wal.
47  */
48 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)49 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50 {
51 	char		path[MAXPGPATH];
52 	char		histfname[MAXFNAMELEN];
53 	TimeLineID	tli;
54 
55 	for (tli = begin; tli < end; tli++)
56 	{
57 		if (tli == 1)
58 			continue;
59 
60 		TLHistoryFileName(histfname, tli);
61 		if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62 			KeepFileRestoredFromArchive(path, histfname);
63 	}
64 }
65 
66 /*
67  * Try to read a timeline's history file.
68  *
69  * If successful, return the list of component TLIs (the given TLI followed by
70  * its ancestor TLIs).  If we can't find the history file, assume that the
71  * timeline has no parents, and return a list of just the specified timeline
72  * ID.
73  */
74 List *
readTimeLineHistory(TimeLineID targetTLI)75 readTimeLineHistory(TimeLineID targetTLI)
76 {
77 	List	   *result;
78 	char		path[MAXPGPATH];
79 	char		histfname[MAXFNAMELEN];
80 	FILE	   *fd;
81 	TimeLineHistoryEntry *entry;
82 	TimeLineID	lasttli = 0;
83 	XLogRecPtr	prevend;
84 	bool		fromArchive = false;
85 
86 	/* Timeline 1 does not have a history file, so no need to check */
87 	if (targetTLI == 1)
88 	{
89 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
90 		entry->tli = targetTLI;
91 		entry->begin = entry->end = InvalidXLogRecPtr;
92 		return list_make1(entry);
93 	}
94 
95 	if (ArchiveRecoveryRequested)
96 	{
97 		TLHistoryFileName(histfname, targetTLI);
98 		fromArchive =
99 			RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
100 	}
101 	else
102 		TLHistoryFilePath(path, targetTLI);
103 
104 	fd = AllocateFile(path, "r");
105 	if (fd == NULL)
106 	{
107 		if (errno != ENOENT)
108 			ereport(FATAL,
109 					(errcode_for_file_access(),
110 					 errmsg("could not open file \"%s\": %m", path)));
111 		/* Not there, so assume no parents */
112 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
113 		entry->tli = targetTLI;
114 		entry->begin = entry->end = InvalidXLogRecPtr;
115 		return list_make1(entry);
116 	}
117 
118 	result = NIL;
119 
120 	/*
121 	 * Parse the file...
122 	 */
123 	prevend = InvalidXLogRecPtr;
124 	for (;;)
125 	{
126 		char		fline[MAXPGPATH];
127 		char	   *res;
128 		char	   *ptr;
129 		TimeLineID	tli;
130 		uint32		switchpoint_hi;
131 		uint32		switchpoint_lo;
132 		int			nfields;
133 
134 		pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
135 		res = fgets(fline, sizeof(fline), fd);
136 		pgstat_report_wait_end();
137 		if (res == NULL)
138 		{
139 			if (ferror(fd))
140 				ereport(ERROR,
141 						(errcode_for_file_access(),
142 						 errmsg("could not read file \"%s\": %m", path)));
143 
144 			break;
145 		}
146 
147 		/* skip leading whitespace and check for # comment */
148 		for (ptr = fline; *ptr; ptr++)
149 		{
150 			if (!isspace((unsigned char) *ptr))
151 				break;
152 		}
153 		if (*ptr == '\0' || *ptr == '#')
154 			continue;
155 
156 		nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
157 
158 		if (nfields < 1)
159 		{
160 			/* expect a numeric timeline ID as first field of line */
161 			ereport(FATAL,
162 					(errmsg("syntax error in history file: %s", fline),
163 					 errhint("Expected a numeric timeline ID.")));
164 		}
165 		if (nfields != 3)
166 			ereport(FATAL,
167 					(errmsg("syntax error in history file: %s", fline),
168 					 errhint("Expected a write-ahead log switchpoint location.")));
169 
170 		if (result && tli <= lasttli)
171 			ereport(FATAL,
172 					(errmsg("invalid data in history file: %s", fline),
173 					 errhint("Timeline IDs must be in increasing sequence.")));
174 
175 		lasttli = tli;
176 
177 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
178 		entry->tli = tli;
179 		entry->begin = prevend;
180 		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
181 		prevend = entry->end;
182 
183 		/* Build list with newest item first */
184 		result = lcons(entry, result);
185 
186 		/* we ignore the remainder of each line */
187 	}
188 
189 	FreeFile(fd);
190 
191 	if (result && targetTLI <= lasttli)
192 		ereport(FATAL,
193 				(errmsg("invalid data in history file \"%s\"", path),
194 				 errhint("Timeline IDs must be less than child timeline's ID.")));
195 
196 	/*
197 	 * Create one more entry for the "tip" of the timeline, which has no entry
198 	 * in the history file.
199 	 */
200 	entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
201 	entry->tli = targetTLI;
202 	entry->begin = prevend;
203 	entry->end = InvalidXLogRecPtr;
204 
205 	result = lcons(entry, result);
206 
207 	/*
208 	 * If the history file was fetched from archive, save it in pg_wal for
209 	 * future reference.
210 	 */
211 	if (fromArchive)
212 		KeepFileRestoredFromArchive(path, histfname);
213 
214 	return result;
215 }
216 
217 /*
218  * Probe whether a timeline history file exists for the given timeline ID
219  */
220 bool
existsTimeLineHistory(TimeLineID probeTLI)221 existsTimeLineHistory(TimeLineID probeTLI)
222 {
223 	char		path[MAXPGPATH];
224 	char		histfname[MAXFNAMELEN];
225 	FILE	   *fd;
226 
227 	/* Timeline 1 does not have a history file, so no need to check */
228 	if (probeTLI == 1)
229 		return false;
230 
231 	if (ArchiveRecoveryRequested)
232 	{
233 		TLHistoryFileName(histfname, probeTLI);
234 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
235 	}
236 	else
237 		TLHistoryFilePath(path, probeTLI);
238 
239 	fd = AllocateFile(path, "r");
240 	if (fd != NULL)
241 	{
242 		FreeFile(fd);
243 		return true;
244 	}
245 	else
246 	{
247 		if (errno != ENOENT)
248 			ereport(FATAL,
249 					(errcode_for_file_access(),
250 					 errmsg("could not open file \"%s\": %m", path)));
251 		return false;
252 	}
253 }
254 
255 /*
256  * Find the newest existing timeline, assuming that startTLI exists.
257  *
258  * Note: while this is somewhat heuristic, it does positively guarantee
259  * that (result + 1) is not a known timeline, and therefore it should
260  * be safe to assign that ID to a new timeline.
261  */
262 TimeLineID
findNewestTimeLine(TimeLineID startTLI)263 findNewestTimeLine(TimeLineID startTLI)
264 {
265 	TimeLineID	newestTLI;
266 	TimeLineID	probeTLI;
267 
268 	/*
269 	 * The algorithm is just to probe for the existence of timeline history
270 	 * files.  XXX is it useful to allow gaps in the sequence?
271 	 */
272 	newestTLI = startTLI;
273 
274 	for (probeTLI = startTLI + 1;; probeTLI++)
275 	{
276 		if (existsTimeLineHistory(probeTLI))
277 		{
278 			newestTLI = probeTLI;	/* probeTLI exists */
279 		}
280 		else
281 		{
282 			/* doesn't exist, assume we're done */
283 			break;
284 		}
285 	}
286 
287 	return newestTLI;
288 }
289 
290 /*
291  * Create a new timeline history file.
292  *
293  *	newTLI: ID of the new timeline
294  *	parentTLI: ID of its immediate parent
295  *	switchpoint: WAL location where the system switched to the new timeline
296  *	reason: human-readable explanation of why the timeline was switched
297  *
298  * Currently this is only used at the end recovery, and so there are no locking
299  * considerations.  But we should be just as tense as XLogFileInit to avoid
300  * emplacing a bogus file.
301  */
302 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)303 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
304 					 XLogRecPtr switchpoint, char *reason)
305 {
306 	char		path[MAXPGPATH];
307 	char		tmppath[MAXPGPATH];
308 	char		histfname[MAXFNAMELEN];
309 	char		buffer[BLCKSZ];
310 	int			srcfd;
311 	int			fd;
312 	int			nbytes;
313 
314 	Assert(newTLI > parentTLI); /* else bad selection of newTLI */
315 
316 	/*
317 	 * Write into a temp file name.
318 	 */
319 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
320 
321 	unlink(tmppath);
322 
323 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
324 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
325 						   S_IRUSR | S_IWUSR);
326 	if (fd < 0)
327 		ereport(ERROR,
328 				(errcode_for_file_access(),
329 				 errmsg("could not create file \"%s\": %m", tmppath)));
330 
331 	/*
332 	 * If a history file exists for the parent, copy it verbatim
333 	 */
334 	if (ArchiveRecoveryRequested)
335 	{
336 		TLHistoryFileName(histfname, parentTLI);
337 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
338 	}
339 	else
340 		TLHistoryFilePath(path, parentTLI);
341 
342 	srcfd = OpenTransientFile(path, O_RDONLY, 0);
343 	if (srcfd < 0)
344 	{
345 		if (errno != ENOENT)
346 			ereport(ERROR,
347 					(errcode_for_file_access(),
348 					 errmsg("could not open file \"%s\": %m", path)));
349 		/* Not there, so assume parent has no parents */
350 	}
351 	else
352 	{
353 		for (;;)
354 		{
355 			errno = 0;
356 			pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
357 			nbytes = (int) read(srcfd, buffer, sizeof(buffer));
358 			pgstat_report_wait_end();
359 			if (nbytes < 0 || errno != 0)
360 				ereport(ERROR,
361 						(errcode_for_file_access(),
362 						 errmsg("could not read file \"%s\": %m", path)));
363 			if (nbytes == 0)
364 				break;
365 			errno = 0;
366 			pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
367 			if ((int) write(fd, buffer, nbytes) != nbytes)
368 			{
369 				int			save_errno = errno;
370 
371 				/*
372 				 * If we fail to make the file, delete it to release disk
373 				 * space
374 				 */
375 				unlink(tmppath);
376 
377 				/*
378 				 * if write didn't set errno, assume problem is no disk space
379 				 */
380 				errno = save_errno ? save_errno : ENOSPC;
381 
382 				ereport(ERROR,
383 						(errcode_for_file_access(),
384 						 errmsg("could not write to file \"%s\": %m", tmppath)));
385 			}
386 			pgstat_report_wait_end();
387 		}
388 		CloseTransientFile(srcfd);
389 	}
390 
391 	/*
392 	 * Append one line with the details of this timeline split.
393 	 *
394 	 * If we did have a parent file, insert an extra newline just in case the
395 	 * parent file failed to end with one.
396 	 */
397 	snprintf(buffer, sizeof(buffer),
398 			 "%s%u\t%X/%X\t%s\n",
399 			 (srcfd < 0) ? "" : "\n",
400 			 parentTLI,
401 			 (uint32) (switchpoint >> 32), (uint32) (switchpoint),
402 			 reason);
403 
404 	nbytes = strlen(buffer);
405 	errno = 0;
406 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
407 	if ((int) write(fd, buffer, nbytes) != nbytes)
408 	{
409 		int			save_errno = errno;
410 
411 		/*
412 		 * If we fail to make the file, delete it to release disk space
413 		 */
414 		unlink(tmppath);
415 		/* if write didn't set errno, assume problem is no disk space */
416 		errno = save_errno ? save_errno : ENOSPC;
417 
418 		ereport(ERROR,
419 				(errcode_for_file_access(),
420 				 errmsg("could not write to file \"%s\": %m", tmppath)));
421 	}
422 	pgstat_report_wait_end();
423 
424 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
425 	if (pg_fsync(fd) != 0)
426 		ereport(data_sync_elevel(ERROR),
427 				(errcode_for_file_access(),
428 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
429 	pgstat_report_wait_end();
430 
431 	if (CloseTransientFile(fd))
432 		ereport(ERROR,
433 				(errcode_for_file_access(),
434 				 errmsg("could not close file \"%s\": %m", tmppath)));
435 
436 
437 	/*
438 	 * Now move the completed history file into place with its final name.
439 	 */
440 	TLHistoryFilePath(path, newTLI);
441 
442 	/*
443 	 * Perform the rename using link if available, paranoidly trying to avoid
444 	 * overwriting an existing file (there shouldn't be one).
445 	 */
446 	durable_link_or_rename(tmppath, path, ERROR);
447 
448 	/* The history file can be archived immediately. */
449 	if (XLogArchivingActive())
450 	{
451 		TLHistoryFileName(histfname, newTLI);
452 		XLogArchiveNotify(histfname);
453 	}
454 }
455 
456 /*
457  * Writes a history file for given timeline and contents.
458  *
459  * Currently this is only used in the walreceiver process, and so there are
460  * no locking considerations.  But we should be just as tense as XLogFileInit
461  * to avoid emplacing a bogus file.
462  */
463 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)464 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
465 {
466 	char		path[MAXPGPATH];
467 	char		tmppath[MAXPGPATH];
468 	int			fd;
469 
470 	/*
471 	 * Write into a temp file name.
472 	 */
473 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
474 
475 	unlink(tmppath);
476 
477 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
478 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
479 						   S_IRUSR | S_IWUSR);
480 	if (fd < 0)
481 		ereport(ERROR,
482 				(errcode_for_file_access(),
483 				 errmsg("could not create file \"%s\": %m", tmppath)));
484 
485 	errno = 0;
486 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
487 	if ((int) write(fd, content, size) != size)
488 	{
489 		int			save_errno = errno;
490 
491 		/*
492 		 * If we fail to make the file, delete it to release disk space
493 		 */
494 		unlink(tmppath);
495 		/* if write didn't set errno, assume problem is no disk space */
496 		errno = save_errno ? save_errno : ENOSPC;
497 
498 		ereport(ERROR,
499 				(errcode_for_file_access(),
500 				 errmsg("could not write to file \"%s\": %m", tmppath)));
501 	}
502 	pgstat_report_wait_end();
503 
504 	pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
505 	if (pg_fsync(fd) != 0)
506 		ereport(data_sync_elevel(ERROR),
507 				(errcode_for_file_access(),
508 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
509 	pgstat_report_wait_end();
510 
511 	if (CloseTransientFile(fd))
512 		ereport(ERROR,
513 				(errcode_for_file_access(),
514 				 errmsg("could not close file \"%s\": %m", tmppath)));
515 
516 
517 	/*
518 	 * Now move the completed history file into place with its final name.
519 	 */
520 	TLHistoryFilePath(path, tli);
521 
522 	/*
523 	 * Perform the rename using link if available, paranoidly trying to avoid
524 	 * overwriting an existing file (there shouldn't be one).
525 	 */
526 	durable_link_or_rename(tmppath, path, ERROR);
527 }
528 
529 /*
530  * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
531  */
532 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)533 tliInHistory(TimeLineID tli, List *expectedTLEs)
534 {
535 	ListCell   *cell;
536 
537 	foreach(cell, expectedTLEs)
538 	{
539 		if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
540 			return true;
541 	}
542 
543 	return false;
544 }
545 
546 /*
547  * Returns the ID of the timeline in use at a particular point in time, in
548  * the given timeline history.
549  */
550 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)551 tliOfPointInHistory(XLogRecPtr ptr, List *history)
552 {
553 	ListCell   *cell;
554 
555 	foreach(cell, history)
556 	{
557 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
558 
559 		if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
560 			(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
561 		{
562 			/* found it */
563 			return tle->tli;
564 		}
565 	}
566 
567 	/* shouldn't happen. */
568 	elog(ERROR, "timeline history was not contiguous");
569 	return 0;					/* keep compiler quiet */
570 }
571 
572 /*
573  * Returns the point in history where we branched off the given timeline,
574  * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
575  * the timeline is current, ie. we have not branched off from it, and throws
576  * an error if the timeline is not part of this server's history.
577  */
578 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)579 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
580 {
581 	ListCell   *cell;
582 
583 	if (nextTLI)
584 		*nextTLI = 0;
585 	foreach(cell, history)
586 	{
587 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
588 
589 		if (tle->tli == tli)
590 			return tle->end;
591 		if (nextTLI)
592 			*nextTLI = tle->tli;
593 	}
594 
595 	ereport(ERROR,
596 			(errmsg("requested timeline %u is not in this server's history",
597 					tli)));
598 	return InvalidXLogRecPtr;	/* keep compiler quiet */
599 }
600