1 /*-------------------------------------------------------------------------
2 *
3 * timeline.c
4 * Functions for reading and writing timeline history files.
5 *
6 * A timeline history file lists the timeline changes of the timeline, in
7 * a simple text format. They are archived along with the WAL segments.
8 *
9 * The files are named like "<tli>.history". For example, if the database
10 * starts up and switches to timeline 5, the timeline history file would be
11 * called "00000005.history".
12 *
13 * Each line in the file represents a timeline switch:
14 *
15 * <parentTLI> <switchpoint> <reason>
16 *
17 * parentTLI ID of the parent timeline
18 * switchpoint XLogRecPtr of the WAL location where the switch happened
19 * reason human-readable explanation of why the timeline was changed
20 *
21 * The fields are separated by tabs. Lines beginning with # are comments, and
22 * are ignored. Empty lines are also ignored.
23 *
24 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994, Regents of the University of California
26 *
27 * src/backend/access/transam/timeline.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32 #include "postgres.h"
33
34 #include <sys/stat.h>
35 #include <unistd.h>
36
37 #include "access/timeline.h"
38 #include "access/xlog.h"
39 #include "access/xlog_internal.h"
40 #include "access/xlogdefs.h"
41 #include "pgstat.h"
42 #include "storage/fd.h"
43
44 /*
45 * Copies all timeline history files with id's between 'begin' and 'end'
46 * from archive to pg_wal.
47 */
48 void
restoreTimeLineHistoryFiles(TimeLineID begin,TimeLineID end)49 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50 {
51 char path[MAXPGPATH];
52 char histfname[MAXFNAMELEN];
53 TimeLineID tli;
54
55 for (tli = begin; tli < end; tli++)
56 {
57 if (tli == 1)
58 continue;
59
60 TLHistoryFileName(histfname, tli);
61 if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62 KeepFileRestoredFromArchive(path, histfname);
63 }
64 }
65
66 /*
67 * Try to read a timeline's history file.
68 *
69 * If successful, return the list of component TLIs (the given TLI followed by
70 * its ancestor TLIs). If we can't find the history file, assume that the
71 * timeline has no parents, and return a list of just the specified timeline
72 * ID.
73 */
74 List *
readTimeLineHistory(TimeLineID targetTLI)75 readTimeLineHistory(TimeLineID targetTLI)
76 {
77 List *result;
78 char path[MAXPGPATH];
79 char histfname[MAXFNAMELEN];
80 FILE *fd;
81 TimeLineHistoryEntry *entry;
82 TimeLineID lasttli = 0;
83 XLogRecPtr prevend;
84 bool fromArchive = false;
85
86 /* Timeline 1 does not have a history file, so no need to check */
87 if (targetTLI == 1)
88 {
89 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
90 entry->tli = targetTLI;
91 entry->begin = entry->end = InvalidXLogRecPtr;
92 return list_make1(entry);
93 }
94
95 if (ArchiveRecoveryRequested)
96 {
97 TLHistoryFileName(histfname, targetTLI);
98 fromArchive =
99 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
100 }
101 else
102 TLHistoryFilePath(path, targetTLI);
103
104 fd = AllocateFile(path, "r");
105 if (fd == NULL)
106 {
107 if (errno != ENOENT)
108 ereport(FATAL,
109 (errcode_for_file_access(),
110 errmsg("could not open file \"%s\": %m", path)));
111 /* Not there, so assume no parents */
112 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
113 entry->tli = targetTLI;
114 entry->begin = entry->end = InvalidXLogRecPtr;
115 return list_make1(entry);
116 }
117
118 result = NIL;
119
120 /*
121 * Parse the file...
122 */
123 prevend = InvalidXLogRecPtr;
124 for (;;)
125 {
126 char fline[MAXPGPATH];
127 char *res;
128 char *ptr;
129 TimeLineID tli;
130 uint32 switchpoint_hi;
131 uint32 switchpoint_lo;
132 int nfields;
133
134 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
135 res = fgets(fline, sizeof(fline), fd);
136 pgstat_report_wait_end();
137 if (res == NULL)
138 {
139 if (ferror(fd))
140 ereport(ERROR,
141 (errcode_for_file_access(),
142 errmsg("could not read file \"%s\": %m", path)));
143
144 break;
145 }
146
147 /* skip leading whitespace and check for # comment */
148 for (ptr = fline; *ptr; ptr++)
149 {
150 if (!isspace((unsigned char) *ptr))
151 break;
152 }
153 if (*ptr == '\0' || *ptr == '#')
154 continue;
155
156 nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
157
158 if (nfields < 1)
159 {
160 /* expect a numeric timeline ID as first field of line */
161 ereport(FATAL,
162 (errmsg("syntax error in history file: %s", fline),
163 errhint("Expected a numeric timeline ID.")));
164 }
165 if (nfields != 3)
166 ereport(FATAL,
167 (errmsg("syntax error in history file: %s", fline),
168 errhint("Expected a write-ahead log switchpoint location.")));
169
170 if (result && tli <= lasttli)
171 ereport(FATAL,
172 (errmsg("invalid data in history file: %s", fline),
173 errhint("Timeline IDs must be in increasing sequence.")));
174
175 lasttli = tli;
176
177 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
178 entry->tli = tli;
179 entry->begin = prevend;
180 entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
181 prevend = entry->end;
182
183 /* Build list with newest item first */
184 result = lcons(entry, result);
185
186 /* we ignore the remainder of each line */
187 }
188
189 FreeFile(fd);
190
191 if (result && targetTLI <= lasttli)
192 ereport(FATAL,
193 (errmsg("invalid data in history file \"%s\"", path),
194 errhint("Timeline IDs must be less than child timeline's ID.")));
195
196 /*
197 * Create one more entry for the "tip" of the timeline, which has no entry
198 * in the history file.
199 */
200 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
201 entry->tli = targetTLI;
202 entry->begin = prevend;
203 entry->end = InvalidXLogRecPtr;
204
205 result = lcons(entry, result);
206
207 /*
208 * If the history file was fetched from archive, save it in pg_wal for
209 * future reference.
210 */
211 if (fromArchive)
212 KeepFileRestoredFromArchive(path, histfname);
213
214 return result;
215 }
216
217 /*
218 * Probe whether a timeline history file exists for the given timeline ID
219 */
220 bool
existsTimeLineHistory(TimeLineID probeTLI)221 existsTimeLineHistory(TimeLineID probeTLI)
222 {
223 char path[MAXPGPATH];
224 char histfname[MAXFNAMELEN];
225 FILE *fd;
226
227 /* Timeline 1 does not have a history file, so no need to check */
228 if (probeTLI == 1)
229 return false;
230
231 if (ArchiveRecoveryRequested)
232 {
233 TLHistoryFileName(histfname, probeTLI);
234 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
235 }
236 else
237 TLHistoryFilePath(path, probeTLI);
238
239 fd = AllocateFile(path, "r");
240 if (fd != NULL)
241 {
242 FreeFile(fd);
243 return true;
244 }
245 else
246 {
247 if (errno != ENOENT)
248 ereport(FATAL,
249 (errcode_for_file_access(),
250 errmsg("could not open file \"%s\": %m", path)));
251 return false;
252 }
253 }
254
255 /*
256 * Find the newest existing timeline, assuming that startTLI exists.
257 *
258 * Note: while this is somewhat heuristic, it does positively guarantee
259 * that (result + 1) is not a known timeline, and therefore it should
260 * be safe to assign that ID to a new timeline.
261 */
262 TimeLineID
findNewestTimeLine(TimeLineID startTLI)263 findNewestTimeLine(TimeLineID startTLI)
264 {
265 TimeLineID newestTLI;
266 TimeLineID probeTLI;
267
268 /*
269 * The algorithm is just to probe for the existence of timeline history
270 * files. XXX is it useful to allow gaps in the sequence?
271 */
272 newestTLI = startTLI;
273
274 for (probeTLI = startTLI + 1;; probeTLI++)
275 {
276 if (existsTimeLineHistory(probeTLI))
277 {
278 newestTLI = probeTLI; /* probeTLI exists */
279 }
280 else
281 {
282 /* doesn't exist, assume we're done */
283 break;
284 }
285 }
286
287 return newestTLI;
288 }
289
290 /*
291 * Create a new timeline history file.
292 *
293 * newTLI: ID of the new timeline
294 * parentTLI: ID of its immediate parent
295 * switchpoint: WAL location where the system switched to the new timeline
296 * reason: human-readable explanation of why the timeline was switched
297 *
298 * Currently this is only used at the end recovery, and so there are no locking
299 * considerations. But we should be just as tense as XLogFileInit to avoid
300 * emplacing a bogus file.
301 */
302 void
writeTimeLineHistory(TimeLineID newTLI,TimeLineID parentTLI,XLogRecPtr switchpoint,char * reason)303 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
304 XLogRecPtr switchpoint, char *reason)
305 {
306 char path[MAXPGPATH];
307 char tmppath[MAXPGPATH];
308 char histfname[MAXFNAMELEN];
309 char buffer[BLCKSZ];
310 int srcfd;
311 int fd;
312 int nbytes;
313
314 Assert(newTLI > parentTLI); /* else bad selection of newTLI */
315
316 /*
317 * Write into a temp file name.
318 */
319 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
320
321 unlink(tmppath);
322
323 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
324 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
325 S_IRUSR | S_IWUSR);
326 if (fd < 0)
327 ereport(ERROR,
328 (errcode_for_file_access(),
329 errmsg("could not create file \"%s\": %m", tmppath)));
330
331 /*
332 * If a history file exists for the parent, copy it verbatim
333 */
334 if (ArchiveRecoveryRequested)
335 {
336 TLHistoryFileName(histfname, parentTLI);
337 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
338 }
339 else
340 TLHistoryFilePath(path, parentTLI);
341
342 srcfd = OpenTransientFile(path, O_RDONLY, 0);
343 if (srcfd < 0)
344 {
345 if (errno != ENOENT)
346 ereport(ERROR,
347 (errcode_for_file_access(),
348 errmsg("could not open file \"%s\": %m", path)));
349 /* Not there, so assume parent has no parents */
350 }
351 else
352 {
353 for (;;)
354 {
355 errno = 0;
356 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
357 nbytes = (int) read(srcfd, buffer, sizeof(buffer));
358 pgstat_report_wait_end();
359 if (nbytes < 0 || errno != 0)
360 ereport(ERROR,
361 (errcode_for_file_access(),
362 errmsg("could not read file \"%s\": %m", path)));
363 if (nbytes == 0)
364 break;
365 errno = 0;
366 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
367 if ((int) write(fd, buffer, nbytes) != nbytes)
368 {
369 int save_errno = errno;
370
371 /*
372 * If we fail to make the file, delete it to release disk
373 * space
374 */
375 unlink(tmppath);
376
377 /*
378 * if write didn't set errno, assume problem is no disk space
379 */
380 errno = save_errno ? save_errno : ENOSPC;
381
382 ereport(ERROR,
383 (errcode_for_file_access(),
384 errmsg("could not write to file \"%s\": %m", tmppath)));
385 }
386 pgstat_report_wait_end();
387 }
388 CloseTransientFile(srcfd);
389 }
390
391 /*
392 * Append one line with the details of this timeline split.
393 *
394 * If we did have a parent file, insert an extra newline just in case the
395 * parent file failed to end with one.
396 */
397 snprintf(buffer, sizeof(buffer),
398 "%s%u\t%X/%X\t%s\n",
399 (srcfd < 0) ? "" : "\n",
400 parentTLI,
401 (uint32) (switchpoint >> 32), (uint32) (switchpoint),
402 reason);
403
404 nbytes = strlen(buffer);
405 errno = 0;
406 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
407 if ((int) write(fd, buffer, nbytes) != nbytes)
408 {
409 int save_errno = errno;
410
411 /*
412 * If we fail to make the file, delete it to release disk space
413 */
414 unlink(tmppath);
415 /* if write didn't set errno, assume problem is no disk space */
416 errno = save_errno ? save_errno : ENOSPC;
417
418 ereport(ERROR,
419 (errcode_for_file_access(),
420 errmsg("could not write to file \"%s\": %m", tmppath)));
421 }
422 pgstat_report_wait_end();
423
424 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
425 if (pg_fsync(fd) != 0)
426 ereport(data_sync_elevel(ERROR),
427 (errcode_for_file_access(),
428 errmsg("could not fsync file \"%s\": %m", tmppath)));
429 pgstat_report_wait_end();
430
431 if (CloseTransientFile(fd))
432 ereport(ERROR,
433 (errcode_for_file_access(),
434 errmsg("could not close file \"%s\": %m", tmppath)));
435
436
437 /*
438 * Now move the completed history file into place with its final name.
439 */
440 TLHistoryFilePath(path, newTLI);
441
442 /*
443 * Perform the rename using link if available, paranoidly trying to avoid
444 * overwriting an existing file (there shouldn't be one).
445 */
446 durable_link_or_rename(tmppath, path, ERROR);
447
448 /* The history file can be archived immediately. */
449 if (XLogArchivingActive())
450 {
451 TLHistoryFileName(histfname, newTLI);
452 XLogArchiveNotify(histfname);
453 }
454 }
455
456 /*
457 * Writes a history file for given timeline and contents.
458 *
459 * Currently this is only used in the walreceiver process, and so there are
460 * no locking considerations. But we should be just as tense as XLogFileInit
461 * to avoid emplacing a bogus file.
462 */
463 void
writeTimeLineHistoryFile(TimeLineID tli,char * content,int size)464 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
465 {
466 char path[MAXPGPATH];
467 char tmppath[MAXPGPATH];
468 int fd;
469
470 /*
471 * Write into a temp file name.
472 */
473 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
474
475 unlink(tmppath);
476
477 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
478 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
479 S_IRUSR | S_IWUSR);
480 if (fd < 0)
481 ereport(ERROR,
482 (errcode_for_file_access(),
483 errmsg("could not create file \"%s\": %m", tmppath)));
484
485 errno = 0;
486 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
487 if ((int) write(fd, content, size) != size)
488 {
489 int save_errno = errno;
490
491 /*
492 * If we fail to make the file, delete it to release disk space
493 */
494 unlink(tmppath);
495 /* if write didn't set errno, assume problem is no disk space */
496 errno = save_errno ? save_errno : ENOSPC;
497
498 ereport(ERROR,
499 (errcode_for_file_access(),
500 errmsg("could not write to file \"%s\": %m", tmppath)));
501 }
502 pgstat_report_wait_end();
503
504 pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
505 if (pg_fsync(fd) != 0)
506 ereport(data_sync_elevel(ERROR),
507 (errcode_for_file_access(),
508 errmsg("could not fsync file \"%s\": %m", tmppath)));
509 pgstat_report_wait_end();
510
511 if (CloseTransientFile(fd))
512 ereport(ERROR,
513 (errcode_for_file_access(),
514 errmsg("could not close file \"%s\": %m", tmppath)));
515
516
517 /*
518 * Now move the completed history file into place with its final name.
519 */
520 TLHistoryFilePath(path, tli);
521
522 /*
523 * Perform the rename using link if available, paranoidly trying to avoid
524 * overwriting an existing file (there shouldn't be one).
525 */
526 durable_link_or_rename(tmppath, path, ERROR);
527 }
528
529 /*
530 * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
531 */
532 bool
tliInHistory(TimeLineID tli,List * expectedTLEs)533 tliInHistory(TimeLineID tli, List *expectedTLEs)
534 {
535 ListCell *cell;
536
537 foreach(cell, expectedTLEs)
538 {
539 if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
540 return true;
541 }
542
543 return false;
544 }
545
546 /*
547 * Returns the ID of the timeline in use at a particular point in time, in
548 * the given timeline history.
549 */
550 TimeLineID
tliOfPointInHistory(XLogRecPtr ptr,List * history)551 tliOfPointInHistory(XLogRecPtr ptr, List *history)
552 {
553 ListCell *cell;
554
555 foreach(cell, history)
556 {
557 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
558
559 if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
560 (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
561 {
562 /* found it */
563 return tle->tli;
564 }
565 }
566
567 /* shouldn't happen. */
568 elog(ERROR, "timeline history was not contiguous");
569 return 0; /* keep compiler quiet */
570 }
571
572 /*
573 * Returns the point in history where we branched off the given timeline,
574 * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
575 * the timeline is current, ie. we have not branched off from it, and throws
576 * an error if the timeline is not part of this server's history.
577 */
578 XLogRecPtr
tliSwitchPoint(TimeLineID tli,List * history,TimeLineID * nextTLI)579 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
580 {
581 ListCell *cell;
582
583 if (nextTLI)
584 *nextTLI = 0;
585 foreach(cell, history)
586 {
587 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
588
589 if (tle->tli == tli)
590 return tle->end;
591 if (nextTLI)
592 *nextTLI = tle->tli;
593 }
594
595 ereport(ERROR,
596 (errmsg("requested timeline %u is not in this server's history",
597 tli)));
598 return InvalidXLogRecPtr; /* keep compiler quiet */
599 }
600