1 /*-------------------------------------------------------------------------
2 *
3 * timeline.c
4 * Functions for reading and writing timeline history files.
5 *
6 * A timeline history file lists the timeline changes of the timeline, in
7 * a simple text format. They are archived along with the WAL segments.
8 *
9 * The files are named like "<tli>.history". For example, if the database
10 * starts up and switches to timeline 5, the timeline history file would be
11 * called "00000005.history".
12 *
13 * Each line in the file represents a timeline switch:
14 *
15 * <parentTLI> <switchpoint> <reason>
16 *
17 * parentTLI ID of the parent timeline
18 * switchpoint XLogRecPtr of the WAL location where the switch happened
19 * reason human-readable explanation of why the timeline was changed
20 *
21 * The fields are separated by tabs. Lines beginning with # are comments, and
22 * are ignored. Empty lines are also ignored.
23 *
24 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994, Regents of the University of California
26 *
27 * src/backend/access/transam/timeline.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32 #include "postgres.h"
33
34 #include <sys/stat.h>
35 #include <unistd.h>
36
37 #include "access/timeline.h"
38 #include "access/xlog.h"
39 #include "access/xlog_internal.h"
40 #include "access/xlogdefs.h"
41 #include "pgstat.h"
42 #include "storage/fd.h"
43
44 /*
45 * Copies all timeline history files with id's between 'begin' and 'end'
46 * from archive to pg_wal.
47 */
48 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)49 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50 {
51 char path[MAXPGPATH];
52 char histfname[MAXFNAMELEN];
53 TimeLineID tli;
54
55 for (tli = begin; tli < end; tli++)
56 {
57 if (tli == 1)
58 continue;
59
60 TLHistoryFileName(histfname, tli);
61 if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62 KeepFileRestoredFromArchive(path, histfname);
63 }
64 }
65
66 /*
67 * Try to read a timeline's history file.
68 *
69 * If successful, return the list of component TLIs (the given TLI followed by
70 * its ancestor TLIs). If we can't find the history file, assume that the
71 * timeline has no parents, and return a list of just the specified timeline
72 * ID.
73 */
74 List *
readTimeLineHistory(TimeLineID targetTLI)75 readTimeLineHistory(TimeLineID targetTLI)
76 {
77 List *result;
78 char path[MAXPGPATH];
79 char histfname[MAXFNAMELEN];
80 FILE *fd;
81 TimeLineHistoryEntry *entry;
82 TimeLineID lasttli = 0;
83 XLogRecPtr prevend;
84 bool fromArchive = false;
85
86 /* Timeline 1 does not have a history file, so no need to check */
87 if (targetTLI == 1)
88 {
89 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
90 entry->tli = targetTLI;
91 entry->begin = entry->end = InvalidXLogRecPtr;
92 return list_make1(entry);
93 }
94
95 if (ArchiveRecoveryRequested)
96 {
97 TLHistoryFileName(histfname, targetTLI);
98 fromArchive =
99 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
100 }
101 else
102 TLHistoryFilePath(path, targetTLI);
103
104 fd = AllocateFile(path, "r");
105 if (fd == NULL)
106 {
107 if (errno != ENOENT)
108 ereport(FATAL,
109 (errcode_for_file_access(),
110 errmsg("could not open file \"%s\": %m", path)));
111 /* Not there, so assume no parents */
112 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
113 entry->tli = targetTLI;
114 entry->begin = entry->end = InvalidXLogRecPtr;
115 return list_make1(entry);
116 }
117
118 result = NIL;
119
120 /*
121 * Parse the file...
122 */
123 prevend = InvalidXLogRecPtr;
124 for (;;)
125 {
126 char fline[MAXPGPATH];
127 char *res;
128 char *ptr;
129 TimeLineID tli;
130 uint32 switchpoint_hi;
131 uint32 switchpoint_lo;
132 int nfields;
133
134 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
135 res = fgets(fline, sizeof(fline), fd);
136 pgstat_report_wait_end();
137 if (res == NULL)
138 {
139 if (ferror(fd))
140 ereport(ERROR,
141 (errcode_for_file_access(),
142 errmsg("could not read file \"%s\": %m", path)));
143
144 break;
145 }
146
147 /* skip leading whitespace and check for # comment */
148 for (ptr = fline; *ptr; ptr++)
149 {
150 if (!isspace((unsigned char) *ptr))
151 break;
152 }
153 if (*ptr == '\0' || *ptr == '#')
154 continue;
155
156 nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
157
158 if (nfields < 1)
159 {
160 /* expect a numeric timeline ID as first field of line */
161 ereport(FATAL,
162 (errmsg("syntax error in history file: %s", fline),
163 errhint("Expected a numeric timeline ID.")));
164 }
165 if (nfields != 3)
166 ereport(FATAL,
167 (errmsg("syntax error in history file: %s", fline),
168 errhint("Expected a write-ahead log switchpoint location.")));
169
170 if (result && tli <= lasttli)
171 ereport(FATAL,
172 (errmsg("invalid data in history file: %s", fline),
173 errhint("Timeline IDs must be in increasing sequence.")));
174
175 lasttli = tli;
176
177 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
178 entry->tli = tli;
179 entry->begin = prevend;
180 entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
181 prevend = entry->end;
182
183 /* Build list with newest item first */
184 result = lcons(entry, result);
185
186 /* we ignore the remainder of each line */
187 }
188
189 FreeFile(fd);
190
191 if (result && targetTLI <= lasttli)
192 ereport(FATAL,
193 (errmsg("invalid data in history file \"%s\"", path),
194 errhint("Timeline IDs must be less than child timeline's ID.")));
195
196 /*
197 * Create one more entry for the "tip" of the timeline, which has no entry
198 * in the history file.
199 */
200 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
201 entry->tli = targetTLI;
202 entry->begin = prevend;
203 entry->end = InvalidXLogRecPtr;
204
205 result = lcons(entry, result);
206
207 /*
208 * If the history file was fetched from archive, save it in pg_wal for
209 * future reference.
210 */
211 if (fromArchive)
212 KeepFileRestoredFromArchive(path, histfname);
213
214 return result;
215 }
216
217 /*
218 * Probe whether a timeline history file exists for the given timeline ID
219 */
220 bool
existsTimeLineHistory(TimeLineID probeTLI)221 existsTimeLineHistory(TimeLineID probeTLI)
222 {
223 char path[MAXPGPATH];
224 char histfname[MAXFNAMELEN];
225 FILE *fd;
226
227 /* Timeline 1 does not have a history file, so no need to check */
228 if (probeTLI == 1)
229 return false;
230
231 if (ArchiveRecoveryRequested)
232 {
233 TLHistoryFileName(histfname, probeTLI);
234 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
235 }
236 else
237 TLHistoryFilePath(path, probeTLI);
238
239 fd = AllocateFile(path, "r");
240 if (fd != NULL)
241 {
242 FreeFile(fd);
243 return true;
244 }
245 else
246 {
247 if (errno != ENOENT)
248 ereport(FATAL,
249 (errcode_for_file_access(),
250 errmsg("could not open file \"%s\": %m", path)));
251 return false;
252 }
253 }
254
255 /*
256 * Find the newest existing timeline, assuming that startTLI exists.
257 *
258 * Note: while this is somewhat heuristic, it does positively guarantee
259 * that (result + 1) is not a known timeline, and therefore it should
260 * be safe to assign that ID to a new timeline.
261 */
262 TimeLineID
findNewestTimeLine(TimeLineID startTLI)263 findNewestTimeLine(TimeLineID startTLI)
264 {
265 TimeLineID newestTLI;
266 TimeLineID probeTLI;
267
268 /*
269 * The algorithm is just to probe for the existence of timeline history
270 * files. XXX is it useful to allow gaps in the sequence?
271 */
272 newestTLI = startTLI;
273
274 for (probeTLI = startTLI + 1;; probeTLI++)
275 {
276 if (existsTimeLineHistory(probeTLI))
277 {
278 newestTLI = probeTLI; /* probeTLI exists */
279 }
280 else
281 {
282 /* doesn't exist, assume we're done */
283 break;
284 }
285 }
286
287 return newestTLI;
288 }
289
290 /*
291 * Create a new timeline history file.
292 *
293 * newTLI: ID of the new timeline
294 * parentTLI: ID of its immediate parent
295 * switchpoint: WAL location where the system switched to the new timeline
296 * reason: human-readable explanation of why the timeline was switched
297 *
298 * Currently this is only used at the end recovery, and so there are no locking
299 * considerations. But we should be just as tense as XLogFileInit to avoid
300 * emplacing a bogus file.
301 */
302 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)303 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
304 XLogRecPtr switchpoint, char *reason)
305 {
306 char path[MAXPGPATH];
307 char tmppath[MAXPGPATH];
308 char histfname[MAXFNAMELEN];
309 char buffer[BLCKSZ];
310 int srcfd;
311 int fd;
312 int nbytes;
313
314 Assert(newTLI > parentTLI); /* else bad selection of newTLI */
315
316 /*
317 * Write into a temp file name.
318 */
319 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
320
321 unlink(tmppath);
322
323 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
324 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
325 if (fd < 0)
326 ereport(ERROR,
327 (errcode_for_file_access(),
328 errmsg("could not create file \"%s\": %m", tmppath)));
329
330 /*
331 * If a history file exists for the parent, copy it verbatim
332 */
333 if (ArchiveRecoveryRequested)
334 {
335 TLHistoryFileName(histfname, parentTLI);
336 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
337 }
338 else
339 TLHistoryFilePath(path, parentTLI);
340
341 srcfd = OpenTransientFile(path, O_RDONLY);
342 if (srcfd < 0)
343 {
344 if (errno != ENOENT)
345 ereport(ERROR,
346 (errcode_for_file_access(),
347 errmsg("could not open file \"%s\": %m", path)));
348 /* Not there, so assume parent has no parents */
349 }
350 else
351 {
352 for (;;)
353 {
354 errno = 0;
355 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
356 nbytes = (int) read(srcfd, buffer, sizeof(buffer));
357 pgstat_report_wait_end();
358 if (nbytes < 0 || errno != 0)
359 ereport(ERROR,
360 (errcode_for_file_access(),
361 errmsg("could not read file \"%s\": %m", path)));
362 if (nbytes == 0)
363 break;
364 errno = 0;
365 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
366 if ((int) write(fd, buffer, nbytes) != nbytes)
367 {
368 int save_errno = errno;
369
370 /*
371 * If we fail to make the file, delete it to release disk
372 * space
373 */
374 unlink(tmppath);
375
376 /*
377 * if write didn't set errno, assume problem is no disk space
378 */
379 errno = save_errno ? save_errno : ENOSPC;
380
381 ereport(ERROR,
382 (errcode_for_file_access(),
383 errmsg("could not write to file \"%s\": %m", tmppath)));
384 }
385 pgstat_report_wait_end();
386 }
387 CloseTransientFile(srcfd);
388 }
389
390 /*
391 * Append one line with the details of this timeline split.
392 *
393 * If we did have a parent file, insert an extra newline just in case the
394 * parent file failed to end with one.
395 */
396 snprintf(buffer, sizeof(buffer),
397 "%s%u\t%X/%X\t%s\n",
398 (srcfd < 0) ? "" : "\n",
399 parentTLI,
400 (uint32) (switchpoint >> 32), (uint32) (switchpoint),
401 reason);
402
403 nbytes = strlen(buffer);
404 errno = 0;
405 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
406 if ((int) write(fd, buffer, nbytes) != nbytes)
407 {
408 int save_errno = errno;
409
410 /*
411 * If we fail to make the file, delete it to release disk space
412 */
413 unlink(tmppath);
414 /* if write didn't set errno, assume problem is no disk space */
415 errno = save_errno ? save_errno : ENOSPC;
416
417 ereport(ERROR,
418 (errcode_for_file_access(),
419 errmsg("could not write to file \"%s\": %m", tmppath)));
420 }
421 pgstat_report_wait_end();
422
423 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
424 if (pg_fsync(fd) != 0)
425 ereport(data_sync_elevel(ERROR),
426 (errcode_for_file_access(),
427 errmsg("could not fsync file \"%s\": %m", tmppath)));
428 pgstat_report_wait_end();
429
430 if (CloseTransientFile(fd))
431 ereport(ERROR,
432 (errcode_for_file_access(),
433 errmsg("could not close file \"%s\": %m", tmppath)));
434
435
436 /*
437 * Now move the completed history file into place with its final name.
438 */
439 TLHistoryFilePath(path, newTLI);
440
441 /*
442 * Perform the rename using link if available, paranoidly trying to avoid
443 * overwriting an existing file (there shouldn't be one).
444 */
445 durable_link_or_rename(tmppath, path, ERROR);
446
447 /* The history file can be archived immediately. */
448 if (XLogArchivingActive())
449 {
450 TLHistoryFileName(histfname, newTLI);
451 XLogArchiveNotify(histfname);
452 }
453 }
454
455 /*
456 * Writes a history file for given timeline and contents.
457 *
458 * Currently this is only used in the walreceiver process, and so there are
459 * no locking considerations. But we should be just as tense as XLogFileInit
460 * to avoid emplacing a bogus file.
461 */
462 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)463 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
464 {
465 char path[MAXPGPATH];
466 char tmppath[MAXPGPATH];
467 int fd;
468
469 /*
470 * Write into a temp file name.
471 */
472 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
473
474 unlink(tmppath);
475
476 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
477 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
478 if (fd < 0)
479 ereport(ERROR,
480 (errcode_for_file_access(),
481 errmsg("could not create file \"%s\": %m", tmppath)));
482
483 errno = 0;
484 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
485 if ((int) write(fd, content, size) != size)
486 {
487 int save_errno = errno;
488
489 /*
490 * If we fail to make the file, delete it to release disk space
491 */
492 unlink(tmppath);
493 /* if write didn't set errno, assume problem is no disk space */
494 errno = save_errno ? save_errno : ENOSPC;
495
496 ereport(ERROR,
497 (errcode_for_file_access(),
498 errmsg("could not write to file \"%s\": %m", tmppath)));
499 }
500 pgstat_report_wait_end();
501
502 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
503 if (pg_fsync(fd) != 0)
504 ereport(data_sync_elevel(ERROR),
505 (errcode_for_file_access(),
506 errmsg("could not fsync file \"%s\": %m", tmppath)));
507 pgstat_report_wait_end();
508
509 if (CloseTransientFile(fd))
510 ereport(ERROR,
511 (errcode_for_file_access(),
512 errmsg("could not close file \"%s\": %m", tmppath)));
513
514
515 /*
516 * Now move the completed history file into place with its final name.
517 */
518 TLHistoryFilePath(path, tli);
519
520 /*
521 * Perform the rename using link if available, paranoidly trying to avoid
522 * overwriting an existing file (there shouldn't be one).
523 */
524 durable_link_or_rename(tmppath, path, ERROR);
525 }
526
527 /*
528 * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
529 */
530 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)531 tliInHistory(TimeLineID tli, List *expectedTLEs)
532 {
533 ListCell *cell;
534
535 foreach(cell, expectedTLEs)
536 {
537 if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
538 return true;
539 }
540
541 return false;
542 }
543
544 /*
545 * Returns the ID of the timeline in use at a particular point in time, in
546 * the given timeline history.
547 */
548 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)549 tliOfPointInHistory(XLogRecPtr ptr, List *history)
550 {
551 ListCell *cell;
552
553 foreach(cell, history)
554 {
555 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
556
557 if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
558 (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
559 {
560 /* found it */
561 return tle->tli;
562 }
563 }
564
565 /* shouldn't happen. */
566 elog(ERROR, "timeline history was not contiguous");
567 return 0; /* keep compiler quiet */
568 }
569
570 /*
571 * Returns the point in history where we branched off the given timeline,
572 * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
573 * the timeline is current, ie. we have not branched off from it, and throws
574 * an error if the timeline is not part of this server's history.
575 */
576 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)577 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
578 {
579 ListCell *cell;
580
581 if (nextTLI)
582 *nextTLI = 0;
583 foreach(cell, history)
584 {
585 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
586
587 if (tle->tli == tli)
588 return tle->end;
589 if (nextTLI)
590 *nextTLI = tle->tli;
591 }
592
593 ereport(ERROR,
594 (errmsg("requested timeline %u is not in this server's history",
595 tli)));
596 return InvalidXLogRecPtr; /* keep compiler quiet */
597 }
598