1 /*-------------------------------------------------------------------------
2  *
3  * timeline.c
4  *		Functions for reading and writing timeline history files.
5  *
6  * A timeline history file lists the timeline changes of the timeline, in
7  * a simple text format. They are archived along with the WAL segments.
8  *
9  * The files are named like "<tli>.history". For example, if the database
10  * starts up and switches to timeline 5, the timeline history file would be
11  * called "00000005.history".
12  *
13  * Each line in the file represents a timeline switch:
14  *
15  * <parentTLI> <switchpoint> <reason>
16  *
17  *	parentTLI	ID of the parent timeline
18  *	switchpoint XLogRecPtr of the WAL position where the switch happened
19  *	reason		human-readable explanation of why the timeline was changed
20  *
21  * The fields are separated by tabs. Lines beginning with # are comments, and
22  * are ignored. Empty lines are also ignored.
23  *
24  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  * src/backend/access/transam/timeline.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include <sys/stat.h>
35 #include <stdio.h>
36 #include <unistd.h>
37 
38 #include "access/timeline.h"
39 #include "access/xlog.h"
40 #include "access/xlog_internal.h"
41 #include "access/xlogdefs.h"
42 #include "storage/fd.h"
43 
44 /*
45  * Copies all timeline history files with id's between 'begin' and 'end'
46  * from archive to pg_xlog.
47  */
48 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)49 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50 {
51 	char		path[MAXPGPATH];
52 	char		histfname[MAXFNAMELEN];
53 	TimeLineID	tli;
54 
55 	for (tli = begin; tli < end; tli++)
56 	{
57 		if (tli == 1)
58 			continue;
59 
60 		TLHistoryFileName(histfname, tli);
61 		if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62 			KeepFileRestoredFromArchive(path, histfname);
63 	}
64 }
65 
66 /*
67  * Try to read a timeline's history file.
68  *
69  * If successful, return the list of component TLIs (the given TLI followed by
70  * its ancestor TLIs).  If we can't find the history file, assume that the
71  * timeline has no parents, and return a list of just the specified timeline
72  * ID.
73  */
74 List *
readTimeLineHistory(TimeLineID targetTLI)75 readTimeLineHistory(TimeLineID targetTLI)
76 {
77 	List	   *result;
78 	char		path[MAXPGPATH];
79 	char		histfname[MAXFNAMELEN];
80 	char		fline[MAXPGPATH];
81 	FILE	   *fd;
82 	TimeLineHistoryEntry *entry;
83 	TimeLineID	lasttli = 0;
84 	XLogRecPtr	prevend;
85 	bool		fromArchive = false;
86 
87 	/* Timeline 1 does not have a history file, so no need to check */
88 	if (targetTLI == 1)
89 	{
90 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
91 		entry->tli = targetTLI;
92 		entry->begin = entry->end = InvalidXLogRecPtr;
93 		return list_make1(entry);
94 	}
95 
96 	if (ArchiveRecoveryRequested)
97 	{
98 		TLHistoryFileName(histfname, targetTLI);
99 		fromArchive =
100 			RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
101 	}
102 	else
103 		TLHistoryFilePath(path, targetTLI);
104 
105 	fd = AllocateFile(path, "r");
106 	if (fd == NULL)
107 	{
108 		if (errno != ENOENT)
109 			ereport(FATAL,
110 					(errcode_for_file_access(),
111 					 errmsg("could not open file \"%s\": %m", path)));
112 		/* Not there, so assume no parents */
113 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
114 		entry->tli = targetTLI;
115 		entry->begin = entry->end = InvalidXLogRecPtr;
116 		return list_make1(entry);
117 	}
118 
119 	result = NIL;
120 
121 	/*
122 	 * Parse the file...
123 	 */
124 	prevend = InvalidXLogRecPtr;
125 	while (fgets(fline, sizeof(fline), fd) != NULL)
126 	{
127 		/* skip leading whitespace and check for # comment */
128 		char	   *ptr;
129 		TimeLineID	tli;
130 		uint32		switchpoint_hi;
131 		uint32		switchpoint_lo;
132 		int			nfields;
133 
134 		for (ptr = fline; *ptr; ptr++)
135 		{
136 			if (!isspace((unsigned char) *ptr))
137 				break;
138 		}
139 		if (*ptr == '\0' || *ptr == '#')
140 			continue;
141 
142 		nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
143 
144 		if (nfields < 1)
145 		{
146 			/* expect a numeric timeline ID as first field of line */
147 			ereport(FATAL,
148 					(errmsg("syntax error in history file: %s", fline),
149 					 errhint("Expected a numeric timeline ID.")));
150 		}
151 		if (nfields != 3)
152 			ereport(FATAL,
153 					(errmsg("syntax error in history file: %s", fline),
154 			   errhint("Expected a transaction log switchpoint location.")));
155 
156 		if (result && tli <= lasttli)
157 			ereport(FATAL,
158 					(errmsg("invalid data in history file: %s", fline),
159 				   errhint("Timeline IDs must be in increasing sequence.")));
160 
161 		lasttli = tli;
162 
163 		entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
164 		entry->tli = tli;
165 		entry->begin = prevend;
166 		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
167 		prevend = entry->end;
168 
169 		/* Build list with newest item first */
170 		result = lcons(entry, result);
171 
172 		/* we ignore the remainder of each line */
173 	}
174 
175 	FreeFile(fd);
176 
177 	if (result && targetTLI <= lasttli)
178 		ereport(FATAL,
179 				(errmsg("invalid data in history file \"%s\"", path),
180 			errhint("Timeline IDs must be less than child timeline's ID.")));
181 
182 	/*
183 	 * Create one more entry for the "tip" of the timeline, which has no entry
184 	 * in the history file.
185 	 */
186 	entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
187 	entry->tli = targetTLI;
188 	entry->begin = prevend;
189 	entry->end = InvalidXLogRecPtr;
190 
191 	result = lcons(entry, result);
192 
193 	/*
194 	 * If the history file was fetched from archive, save it in pg_xlog for
195 	 * future reference.
196 	 */
197 	if (fromArchive)
198 		KeepFileRestoredFromArchive(path, histfname);
199 
200 	return result;
201 }
202 
203 /*
204  * Probe whether a timeline history file exists for the given timeline ID
205  */
206 bool
existsTimeLineHistory(TimeLineID probeTLI)207 existsTimeLineHistory(TimeLineID probeTLI)
208 {
209 	char		path[MAXPGPATH];
210 	char		histfname[MAXFNAMELEN];
211 	FILE	   *fd;
212 
213 	/* Timeline 1 does not have a history file, so no need to check */
214 	if (probeTLI == 1)
215 		return false;
216 
217 	if (ArchiveRecoveryRequested)
218 	{
219 		TLHistoryFileName(histfname, probeTLI);
220 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
221 	}
222 	else
223 		TLHistoryFilePath(path, probeTLI);
224 
225 	fd = AllocateFile(path, "r");
226 	if (fd != NULL)
227 	{
228 		FreeFile(fd);
229 		return true;
230 	}
231 	else
232 	{
233 		if (errno != ENOENT)
234 			ereport(FATAL,
235 					(errcode_for_file_access(),
236 					 errmsg("could not open file \"%s\": %m", path)));
237 		return false;
238 	}
239 }
240 
241 /*
242  * Find the newest existing timeline, assuming that startTLI exists.
243  *
244  * Note: while this is somewhat heuristic, it does positively guarantee
245  * that (result + 1) is not a known timeline, and therefore it should
246  * be safe to assign that ID to a new timeline.
247  */
248 TimeLineID
findNewestTimeLine(TimeLineID startTLI)249 findNewestTimeLine(TimeLineID startTLI)
250 {
251 	TimeLineID	newestTLI;
252 	TimeLineID	probeTLI;
253 
254 	/*
255 	 * The algorithm is just to probe for the existence of timeline history
256 	 * files.  XXX is it useful to allow gaps in the sequence?
257 	 */
258 	newestTLI = startTLI;
259 
260 	for (probeTLI = startTLI + 1;; probeTLI++)
261 	{
262 		if (existsTimeLineHistory(probeTLI))
263 		{
264 			newestTLI = probeTLI;		/* probeTLI exists */
265 		}
266 		else
267 		{
268 			/* doesn't exist, assume we're done */
269 			break;
270 		}
271 	}
272 
273 	return newestTLI;
274 }
275 
276 /*
277  * Create a new timeline history file.
278  *
279  *	newTLI: ID of the new timeline
280  *	parentTLI: ID of its immediate parent
281  *	switchpoint: XLOG position where the system switched to the new timeline
282  *	reason: human-readable explanation of why the timeline was switched
283  *
284  * Currently this is only used at the end recovery, and so there are no locking
285  * considerations.  But we should be just as tense as XLogFileInit to avoid
286  * emplacing a bogus file.
287  */
288 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)289 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
290 					 XLogRecPtr switchpoint, char *reason)
291 {
292 	char		path[MAXPGPATH];
293 	char		tmppath[MAXPGPATH];
294 	char		histfname[MAXFNAMELEN];
295 	char		buffer[BLCKSZ];
296 	int			srcfd;
297 	int			fd;
298 	int			nbytes;
299 
300 	Assert(newTLI > parentTLI); /* else bad selection of newTLI */
301 
302 	/*
303 	 * Write into a temp file name.
304 	 */
305 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
306 
307 	unlink(tmppath);
308 
309 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
310 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
311 						   S_IRUSR | S_IWUSR);
312 	if (fd < 0)
313 		ereport(ERROR,
314 				(errcode_for_file_access(),
315 				 errmsg("could not create file \"%s\": %m", tmppath)));
316 
317 	/*
318 	 * If a history file exists for the parent, copy it verbatim
319 	 */
320 	if (ArchiveRecoveryRequested)
321 	{
322 		TLHistoryFileName(histfname, parentTLI);
323 		RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
324 	}
325 	else
326 		TLHistoryFilePath(path, parentTLI);
327 
328 	srcfd = OpenTransientFile(path, O_RDONLY, 0);
329 	if (srcfd < 0)
330 	{
331 		if (errno != ENOENT)
332 			ereport(ERROR,
333 					(errcode_for_file_access(),
334 					 errmsg("could not open file \"%s\": %m", path)));
335 		/* Not there, so assume parent has no parents */
336 	}
337 	else
338 	{
339 		for (;;)
340 		{
341 			errno = 0;
342 			nbytes = (int) read(srcfd, buffer, sizeof(buffer));
343 			if (nbytes < 0 || errno != 0)
344 				ereport(ERROR,
345 						(errcode_for_file_access(),
346 						 errmsg("could not read file \"%s\": %m", path)));
347 			if (nbytes == 0)
348 				break;
349 			errno = 0;
350 			if ((int) write(fd, buffer, nbytes) != nbytes)
351 			{
352 				int			save_errno = errno;
353 
354 				/*
355 				 * If we fail to make the file, delete it to release disk
356 				 * space
357 				 */
358 				unlink(tmppath);
359 
360 				/*
361 				 * if write didn't set errno, assume problem is no disk space
362 				 */
363 				errno = save_errno ? save_errno : ENOSPC;
364 
365 				ereport(ERROR,
366 						(errcode_for_file_access(),
367 					 errmsg("could not write to file \"%s\": %m", tmppath)));
368 			}
369 		}
370 		CloseTransientFile(srcfd);
371 	}
372 
373 	/*
374 	 * Append one line with the details of this timeline split.
375 	 *
376 	 * If we did have a parent file, insert an extra newline just in case the
377 	 * parent file failed to end with one.
378 	 */
379 	snprintf(buffer, sizeof(buffer),
380 			 "%s%u\t%X/%X\t%s\n",
381 			 (srcfd < 0) ? "" : "\n",
382 			 parentTLI,
383 			 (uint32) (switchpoint >> 32), (uint32) (switchpoint),
384 			 reason);
385 
386 	nbytes = strlen(buffer);
387 	errno = 0;
388 	if ((int) write(fd, buffer, nbytes) != nbytes)
389 	{
390 		int			save_errno = errno;
391 
392 		/*
393 		 * If we fail to make the file, delete it to release disk space
394 		 */
395 		unlink(tmppath);
396 		/* if write didn't set errno, assume problem is no disk space */
397 		errno = save_errno ? save_errno : ENOSPC;
398 
399 		ereport(ERROR,
400 				(errcode_for_file_access(),
401 				 errmsg("could not write to file \"%s\": %m", tmppath)));
402 	}
403 
404 	if (pg_fsync(fd) != 0)
405 		ereport(data_sync_elevel(ERROR),
406 				(errcode_for_file_access(),
407 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
408 
409 	if (CloseTransientFile(fd))
410 		ereport(ERROR,
411 				(errcode_for_file_access(),
412 				 errmsg("could not close file \"%s\": %m", tmppath)));
413 
414 
415 	/*
416 	 * Now move the completed history file into place with its final name.
417 	 */
418 	TLHistoryFilePath(path, newTLI);
419 
420 	/*
421 	 * Perform the rename using link if available, paranoidly trying to avoid
422 	 * overwriting an existing file (there shouldn't be one).
423 	 */
424 	durable_link_or_rename(tmppath, path, ERROR);
425 
426 	/* The history file can be archived immediately. */
427 	if (XLogArchivingActive())
428 	{
429 		TLHistoryFileName(histfname, newTLI);
430 		XLogArchiveNotify(histfname);
431 	}
432 }
433 
434 /*
435  * Writes a history file for given timeline and contents.
436  *
437  * Currently this is only used in the walreceiver process, and so there are
438  * no locking considerations.  But we should be just as tense as XLogFileInit
439  * to avoid emplacing a bogus file.
440  */
441 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)442 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
443 {
444 	char		path[MAXPGPATH];
445 	char		tmppath[MAXPGPATH];
446 	int			fd;
447 
448 	/*
449 	 * Write into a temp file name.
450 	 */
451 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
452 
453 	unlink(tmppath);
454 
455 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
456 	fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
457 						   S_IRUSR | S_IWUSR);
458 	if (fd < 0)
459 		ereport(ERROR,
460 				(errcode_for_file_access(),
461 				 errmsg("could not create file \"%s\": %m", tmppath)));
462 
463 	errno = 0;
464 	if ((int) write(fd, content, size) != size)
465 	{
466 		int			save_errno = errno;
467 
468 		/*
469 		 * If we fail to make the file, delete it to release disk space
470 		 */
471 		unlink(tmppath);
472 		/* if write didn't set errno, assume problem is no disk space */
473 		errno = save_errno ? save_errno : ENOSPC;
474 
475 		ereport(ERROR,
476 				(errcode_for_file_access(),
477 				 errmsg("could not write to file \"%s\": %m", tmppath)));
478 	}
479 
480 	if (pg_fsync(fd) != 0)
481 		ereport(data_sync_elevel(ERROR),
482 				(errcode_for_file_access(),
483 				 errmsg("could not fsync file \"%s\": %m", tmppath)));
484 
485 	if (CloseTransientFile(fd))
486 		ereport(ERROR,
487 				(errcode_for_file_access(),
488 				 errmsg("could not close file \"%s\": %m", tmppath)));
489 
490 
491 	/*
492 	 * Now move the completed history file into place with its final name.
493 	 */
494 	TLHistoryFilePath(path, tli);
495 
496 	/*
497 	 * Perform the rename using link if available, paranoidly trying to avoid
498 	 * overwriting an existing file (there shouldn't be one).
499 	 */
500 	durable_link_or_rename(tmppath, path, ERROR);
501 }
502 
503 /*
504  * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
505  */
506 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)507 tliInHistory(TimeLineID tli, List *expectedTLEs)
508 {
509 	ListCell   *cell;
510 
511 	foreach(cell, expectedTLEs)
512 	{
513 		if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
514 			return true;
515 	}
516 
517 	return false;
518 }
519 
520 /*
521  * Returns the ID of the timeline in use at a particular point in time, in
522  * the given timeline history.
523  */
524 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)525 tliOfPointInHistory(XLogRecPtr ptr, List *history)
526 {
527 	ListCell   *cell;
528 
529 	foreach(cell, history)
530 	{
531 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
532 
533 		if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
534 			(XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
535 		{
536 			/* found it */
537 			return tle->tli;
538 		}
539 	}
540 
541 	/* shouldn't happen. */
542 	elog(ERROR, "timeline history was not contiguous");
543 	return 0;					/* keep compiler quiet */
544 }
545 
546 /*
547  * Returns the point in history where we branched off the given timeline,
548  * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
549  * the timeline is current, ie. we have not branched off from it, and throws
550  * an error if the timeline is not part of this server's history.
551  */
552 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)553 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
554 {
555 	ListCell   *cell;
556 
557 	if (nextTLI)
558 		*nextTLI = 0;
559 	foreach(cell, history)
560 	{
561 		TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
562 
563 		if (tle->tli == tli)
564 			return tle->end;
565 		if (nextTLI)
566 			*nextTLI = tle->tli;
567 	}
568 
569 	ereport(ERROR,
570 			(errmsg("requested timeline %u is not in this server's history",
571 					tli)));
572 	return InvalidXLogRecPtr;	/* keep compiler quiet */
573 }
574