1 /*-------------------------------------------------------------------------
2 *
3 * timeline.c
4 * Functions for reading and writing timeline history files.
5 *
6 * A timeline history file lists the timeline changes of the timeline, in
7 * a simple text format. They are archived along with the WAL segments.
8 *
9 * The files are named like "<tli>.history". For example, if the database
10 * starts up and switches to timeline 5, the timeline history file would be
11 * called "00000005.history".
12 *
13 * Each line in the file represents a timeline switch:
14 *
15 * <parentTLI> <switchpoint> <reason>
16 *
17 * parentTLI ID of the parent timeline
18 * switchpoint XLogRecPtr of the WAL location where the switch happened
19 * reason human-readable explanation of why the timeline was changed
20 *
21 * The fields are separated by tabs. Lines beginning with # are comments, and
22 * are ignored. Empty lines are also ignored.
23 *
24 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994, Regents of the University of California
26 *
27 * src/backend/access/transam/timeline.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32 #include "postgres.h"
33
34 #include <sys/stat.h>
35 #include <unistd.h>
36
37 #include "access/timeline.h"
38 #include "access/xlog.h"
39 #include "access/xlog_internal.h"
40 #include "access/xlogarchive.h"
41 #include "access/xlogdefs.h"
42 #include "pgstat.h"
43 #include "storage/fd.h"
44
45 /*
46 * Copies all timeline history files with id's between 'begin' and 'end'
47 * from archive to pg_wal.
48 */
49 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)50 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
51 {
52 char path[MAXPGPATH];
53 char histfname[MAXFNAMELEN];
54 TimeLineID tli;
55
56 for (tli = begin; tli < end; tli++)
57 {
58 if (tli == 1)
59 continue;
60
61 TLHistoryFileName(histfname, tli);
62 if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
63 KeepFileRestoredFromArchive(path, histfname);
64 }
65 }
66
67 /*
68 * Try to read a timeline's history file.
69 *
70 * If successful, return the list of component TLIs (the given TLI followed by
71 * its ancestor TLIs). If we can't find the history file, assume that the
72 * timeline has no parents, and return a list of just the specified timeline
73 * ID.
74 */
75 List *
readTimeLineHistory(TimeLineID targetTLI)76 readTimeLineHistory(TimeLineID targetTLI)
77 {
78 List *result;
79 char path[MAXPGPATH];
80 char histfname[MAXFNAMELEN];
81 FILE *fd;
82 TimeLineHistoryEntry *entry;
83 TimeLineID lasttli = 0;
84 XLogRecPtr prevend;
85 bool fromArchive = false;
86
87 /* Timeline 1 does not have a history file, so no need to check */
88 if (targetTLI == 1)
89 {
90 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
91 entry->tli = targetTLI;
92 entry->begin = entry->end = InvalidXLogRecPtr;
93 return list_make1(entry);
94 }
95
96 if (ArchiveRecoveryRequested)
97 {
98 TLHistoryFileName(histfname, targetTLI);
99 fromArchive =
100 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
101 }
102 else
103 TLHistoryFilePath(path, targetTLI);
104
105 fd = AllocateFile(path, "r");
106 if (fd == NULL)
107 {
108 if (errno != ENOENT)
109 ereport(FATAL,
110 (errcode_for_file_access(),
111 errmsg("could not open file \"%s\": %m", path)));
112 /* Not there, so assume no parents */
113 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
114 entry->tli = targetTLI;
115 entry->begin = entry->end = InvalidXLogRecPtr;
116 return list_make1(entry);
117 }
118
119 result = NIL;
120
121 /*
122 * Parse the file...
123 */
124 prevend = InvalidXLogRecPtr;
125 for (;;)
126 {
127 char fline[MAXPGPATH];
128 char *res;
129 char *ptr;
130 TimeLineID tli;
131 uint32 switchpoint_hi;
132 uint32 switchpoint_lo;
133 int nfields;
134
135 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
136 res = fgets(fline, sizeof(fline), fd);
137 pgstat_report_wait_end();
138 if (res == NULL)
139 {
140 if (ferror(fd))
141 ereport(ERROR,
142 (errcode_for_file_access(),
143 errmsg("could not read file \"%s\": %m", path)));
144
145 break;
146 }
147
148 /* skip leading whitespace and check for # comment */
149 for (ptr = fline; *ptr; ptr++)
150 {
151 if (!isspace((unsigned char) *ptr))
152 break;
153 }
154 if (*ptr == '\0' || *ptr == '#')
155 continue;
156
157 nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
158
159 if (nfields < 1)
160 {
161 /* expect a numeric timeline ID as first field of line */
162 ereport(FATAL,
163 (errmsg("syntax error in history file: %s", fline),
164 errhint("Expected a numeric timeline ID.")));
165 }
166 if (nfields != 3)
167 ereport(FATAL,
168 (errmsg("syntax error in history file: %s", fline),
169 errhint("Expected a write-ahead log switchpoint location.")));
170
171 if (result && tli <= lasttli)
172 ereport(FATAL,
173 (errmsg("invalid data in history file: %s", fline),
174 errhint("Timeline IDs must be in increasing sequence.")));
175
176 lasttli = tli;
177
178 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
179 entry->tli = tli;
180 entry->begin = prevend;
181 entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
182 prevend = entry->end;
183
184 /* Build list with newest item first */
185 result = lcons(entry, result);
186
187 /* we ignore the remainder of each line */
188 }
189
190 FreeFile(fd);
191
192 if (result && targetTLI <= lasttli)
193 ereport(FATAL,
194 (errmsg("invalid data in history file \"%s\"", path),
195 errhint("Timeline IDs must be less than child timeline's ID.")));
196
197 /*
198 * Create one more entry for the "tip" of the timeline, which has no entry
199 * in the history file.
200 */
201 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
202 entry->tli = targetTLI;
203 entry->begin = prevend;
204 entry->end = InvalidXLogRecPtr;
205
206 result = lcons(entry, result);
207
208 /*
209 * If the history file was fetched from archive, save it in pg_wal for
210 * future reference.
211 */
212 if (fromArchive)
213 KeepFileRestoredFromArchive(path, histfname);
214
215 return result;
216 }
217
218 /*
219 * Probe whether a timeline history file exists for the given timeline ID
220 */
221 bool
existsTimeLineHistory(TimeLineID probeTLI)222 existsTimeLineHistory(TimeLineID probeTLI)
223 {
224 char path[MAXPGPATH];
225 char histfname[MAXFNAMELEN];
226 FILE *fd;
227
228 /* Timeline 1 does not have a history file, so no need to check */
229 if (probeTLI == 1)
230 return false;
231
232 if (ArchiveRecoveryRequested)
233 {
234 TLHistoryFileName(histfname, probeTLI);
235 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
236 }
237 else
238 TLHistoryFilePath(path, probeTLI);
239
240 fd = AllocateFile(path, "r");
241 if (fd != NULL)
242 {
243 FreeFile(fd);
244 return true;
245 }
246 else
247 {
248 if (errno != ENOENT)
249 ereport(FATAL,
250 (errcode_for_file_access(),
251 errmsg("could not open file \"%s\": %m", path)));
252 return false;
253 }
254 }
255
256 /*
257 * Find the newest existing timeline, assuming that startTLI exists.
258 *
259 * Note: while this is somewhat heuristic, it does positively guarantee
260 * that (result + 1) is not a known timeline, and therefore it should
261 * be safe to assign that ID to a new timeline.
262 */
263 TimeLineID
findNewestTimeLine(TimeLineID startTLI)264 findNewestTimeLine(TimeLineID startTLI)
265 {
266 TimeLineID newestTLI;
267 TimeLineID probeTLI;
268
269 /*
270 * The algorithm is just to probe for the existence of timeline history
271 * files. XXX is it useful to allow gaps in the sequence?
272 */
273 newestTLI = startTLI;
274
275 for (probeTLI = startTLI + 1;; probeTLI++)
276 {
277 if (existsTimeLineHistory(probeTLI))
278 {
279 newestTLI = probeTLI; /* probeTLI exists */
280 }
281 else
282 {
283 /* doesn't exist, assume we're done */
284 break;
285 }
286 }
287
288 return newestTLI;
289 }
290
291 /*
292 * Create a new timeline history file.
293 *
294 * newTLI: ID of the new timeline
295 * parentTLI: ID of its immediate parent
296 * switchpoint: WAL location where the system switched to the new timeline
297 * reason: human-readable explanation of why the timeline was switched
298 *
299 * Currently this is only used at the end recovery, and so there are no locking
300 * considerations. But we should be just as tense as XLogFileInit to avoid
301 * emplacing a bogus file.
302 */
303 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)304 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
305 XLogRecPtr switchpoint, char *reason)
306 {
307 char path[MAXPGPATH];
308 char tmppath[MAXPGPATH];
309 char histfname[MAXFNAMELEN];
310 char buffer[BLCKSZ];
311 int srcfd;
312 int fd;
313 int nbytes;
314
315 Assert(newTLI > parentTLI); /* else bad selection of newTLI */
316
317 /*
318 * Write into a temp file name.
319 */
320 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
321
322 unlink(tmppath);
323
324 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
325 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
326 if (fd < 0)
327 ereport(ERROR,
328 (errcode_for_file_access(),
329 errmsg("could not create file \"%s\": %m", tmppath)));
330
331 /*
332 * If a history file exists for the parent, copy it verbatim
333 */
334 if (ArchiveRecoveryRequested)
335 {
336 TLHistoryFileName(histfname, parentTLI);
337 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
338 }
339 else
340 TLHistoryFilePath(path, parentTLI);
341
342 srcfd = OpenTransientFile(path, O_RDONLY);
343 if (srcfd < 0)
344 {
345 if (errno != ENOENT)
346 ereport(ERROR,
347 (errcode_for_file_access(),
348 errmsg("could not open file \"%s\": %m", path)));
349 /* Not there, so assume parent has no parents */
350 }
351 else
352 {
353 for (;;)
354 {
355 errno = 0;
356 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
357 nbytes = (int) read(srcfd, buffer, sizeof(buffer));
358 pgstat_report_wait_end();
359 if (nbytes < 0 || errno != 0)
360 ereport(ERROR,
361 (errcode_for_file_access(),
362 errmsg("could not read file \"%s\": %m", path)));
363 if (nbytes == 0)
364 break;
365 errno = 0;
366 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
367 if ((int) write(fd, buffer, nbytes) != nbytes)
368 {
369 int save_errno = errno;
370
371 /*
372 * If we fail to make the file, delete it to release disk
373 * space
374 */
375 unlink(tmppath);
376
377 /*
378 * if write didn't set errno, assume problem is no disk space
379 */
380 errno = save_errno ? save_errno : ENOSPC;
381
382 ereport(ERROR,
383 (errcode_for_file_access(),
384 errmsg("could not write to file \"%s\": %m", tmppath)));
385 }
386 pgstat_report_wait_end();
387 }
388
389 if (CloseTransientFile(srcfd) != 0)
390 ereport(ERROR,
391 (errcode_for_file_access(),
392 errmsg("could not close file \"%s\": %m", path)));
393 }
394
395 /*
396 * Append one line with the details of this timeline split.
397 *
398 * If we did have a parent file, insert an extra newline just in case the
399 * parent file failed to end with one.
400 */
401 snprintf(buffer, sizeof(buffer),
402 "%s%u\t%X/%X\t%s\n",
403 (srcfd < 0) ? "" : "\n",
404 parentTLI,
405 LSN_FORMAT_ARGS(switchpoint),
406 reason);
407
408 nbytes = strlen(buffer);
409 errno = 0;
410 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
411 if ((int) write(fd, buffer, nbytes) != nbytes)
412 {
413 int save_errno = errno;
414
415 /*
416 * If we fail to make the file, delete it to release disk space
417 */
418 unlink(tmppath);
419 /* if write didn't set errno, assume problem is no disk space */
420 errno = save_errno ? save_errno : ENOSPC;
421
422 ereport(ERROR,
423 (errcode_for_file_access(),
424 errmsg("could not write to file \"%s\": %m", tmppath)));
425 }
426 pgstat_report_wait_end();
427
428 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
429 if (pg_fsync(fd) != 0)
430 ereport(data_sync_elevel(ERROR),
431 (errcode_for_file_access(),
432 errmsg("could not fsync file \"%s\": %m", tmppath)));
433 pgstat_report_wait_end();
434
435 if (CloseTransientFile(fd) != 0)
436 ereport(ERROR,
437 (errcode_for_file_access(),
438 errmsg("could not close file \"%s\": %m", tmppath)));
439
440 /*
441 * Now move the completed history file into place with its final name.
442 */
443 TLHistoryFilePath(path, newTLI);
444
445 /*
446 * Perform the rename using link if available, paranoidly trying to avoid
447 * overwriting an existing file (there shouldn't be one).
448 */
449 durable_rename_excl(tmppath, path, ERROR);
450
451 /* The history file can be archived immediately. */
452 if (XLogArchivingActive())
453 {
454 TLHistoryFileName(histfname, newTLI);
455 XLogArchiveNotify(histfname);
456 }
457 }
458
459 /*
460 * Writes a history file for given timeline and contents.
461 *
462 * Currently this is only used in the walreceiver process, and so there are
463 * no locking considerations. But we should be just as tense as XLogFileInit
464 * to avoid emplacing a bogus file.
465 */
466 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)467 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
468 {
469 char path[MAXPGPATH];
470 char tmppath[MAXPGPATH];
471 int fd;
472
473 /*
474 * Write into a temp file name.
475 */
476 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
477
478 unlink(tmppath);
479
480 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
481 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
482 if (fd < 0)
483 ereport(ERROR,
484 (errcode_for_file_access(),
485 errmsg("could not create file \"%s\": %m", tmppath)));
486
487 errno = 0;
488 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
489 if ((int) write(fd, content, size) != size)
490 {
491 int save_errno = errno;
492
493 /*
494 * If we fail to make the file, delete it to release disk space
495 */
496 unlink(tmppath);
497 /* if write didn't set errno, assume problem is no disk space */
498 errno = save_errno ? save_errno : ENOSPC;
499
500 ereport(ERROR,
501 (errcode_for_file_access(),
502 errmsg("could not write to file \"%s\": %m", tmppath)));
503 }
504 pgstat_report_wait_end();
505
506 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
507 if (pg_fsync(fd) != 0)
508 ereport(data_sync_elevel(ERROR),
509 (errcode_for_file_access(),
510 errmsg("could not fsync file \"%s\": %m", tmppath)));
511 pgstat_report_wait_end();
512
513 if (CloseTransientFile(fd) != 0)
514 ereport(ERROR,
515 (errcode_for_file_access(),
516 errmsg("could not close file \"%s\": %m", tmppath)));
517
518 /*
519 * Now move the completed history file into place with its final name.
520 */
521 TLHistoryFilePath(path, tli);
522
523 /*
524 * Perform the rename using link if available, paranoidly trying to avoid
525 * overwriting an existing file (there shouldn't be one).
526 */
527 durable_rename_excl(tmppath, path, ERROR);
528 }
529
530 /*
531 * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
532 */
533 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)534 tliInHistory(TimeLineID tli, List *expectedTLEs)
535 {
536 ListCell *cell;
537
538 foreach(cell, expectedTLEs)
539 {
540 if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
541 return true;
542 }
543
544 return false;
545 }
546
547 /*
548 * Returns the ID of the timeline in use at a particular point in time, in
549 * the given timeline history.
550 */
551 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)552 tliOfPointInHistory(XLogRecPtr ptr, List *history)
553 {
554 ListCell *cell;
555
556 foreach(cell, history)
557 {
558 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
559
560 if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
561 (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
562 {
563 /* found it */
564 return tle->tli;
565 }
566 }
567
568 /* shouldn't happen. */
569 elog(ERROR, "timeline history was not contiguous");
570 return 0; /* keep compiler quiet */
571 }
572
573 /*
574 * Returns the point in history where we branched off the given timeline,
575 * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
576 * the timeline is current, ie. we have not branched off from it, and throws
577 * an error if the timeline is not part of this server's history.
578 */
579 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)580 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
581 {
582 ListCell *cell;
583
584 if (nextTLI)
585 *nextTLI = 0;
586 foreach(cell, history)
587 {
588 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
589
590 if (tle->tli == tli)
591 return tle->end;
592 if (nextTLI)
593 *nextTLI = tle->tli;
594 }
595
596 ereport(ERROR,
597 (errmsg("requested timeline %u is not in this server's history",
598 tli)));
599 return InvalidXLogRecPtr; /* keep compiler quiet */
600 }
601