1 /*-------------------------------------------------------------------------
2  *
3  * xlogfuncs.c
4  *
5  * PostgreSQL write-ahead log manager user interface functions
6  *
7  * This file contains WAL control and information functions.
8  *
9  *
10  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * src/backend/access/transam/xlogfuncs.c
14  *
15  *-------------------------------------------------------------------------
16  */
17 #include "postgres.h"
18 
19 #include <unistd.h>
20 
21 #include "access/htup_details.h"
22 #include "access/xlog.h"
23 #include "access/xlog_internal.h"
24 #include "access/xlogutils.h"
25 #include "catalog/pg_type.h"
26 #include "funcapi.h"
27 #include "miscadmin.h"
28 #include "pgstat.h"
29 #include "replication/walreceiver.h"
30 #include "storage/fd.h"
31 #include "storage/ipc.h"
32 #include "storage/smgr.h"
33 #include "utils/builtins.h"
34 #include "utils/guc.h"
35 #include "utils/memutils.h"
36 #include "utils/numeric.h"
37 #include "utils/pg_lsn.h"
38 #include "utils/timestamp.h"
39 #include "utils/tuplestore.h"
40 
41 /*
42  * Store label file and tablespace map during non-exclusive backups.
43  */
44 static StringInfo label_file;
45 static StringInfo tblspc_map_file;
46 
47 /*
48  * pg_start_backup: set up for taking an on-line backup dump
49  *
50  * Essentially what this does is to create a backup label file in $PGDATA,
51  * where it will be archived as part of the backup dump.  The label file
52  * contains the user-supplied label string (typically this would be used
53  * to tell where the backup dump will be stored) and the starting time and
54  * starting WAL location for the dump.
55  *
56  * Permission checking for this function is managed through the normal
57  * GRANT system.
58  */
59 Datum
pg_start_backup(PG_FUNCTION_ARGS)60 pg_start_backup(PG_FUNCTION_ARGS)
61 {
62 	text	   *backupid = PG_GETARG_TEXT_PP(0);
63 	bool		fast = PG_GETARG_BOOL(1);
64 	bool		exclusive = PG_GETARG_BOOL(2);
65 	char	   *backupidstr;
66 	XLogRecPtr	startpoint;
67 	SessionBackupState status = get_backup_status();
68 
69 	backupidstr = text_to_cstring(backupid);
70 
71 	if (status == SESSION_BACKUP_NON_EXCLUSIVE)
72 		ereport(ERROR,
73 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
74 				 errmsg("a backup is already in progress in this session")));
75 
76 	if (exclusive)
77 	{
78 		startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL,
79 										NULL, NULL);
80 	}
81 	else
82 	{
83 		MemoryContext oldcontext;
84 
85 		/*
86 		 * Label file and tablespace map file need to be long-lived, since
87 		 * they are read in pg_stop_backup.
88 		 */
89 		oldcontext = MemoryContextSwitchTo(TopMemoryContext);
90 		label_file = makeStringInfo();
91 		tblspc_map_file = makeStringInfo();
92 		MemoryContextSwitchTo(oldcontext);
93 
94 		register_persistent_abort_backup_handler();
95 
96 		startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file,
97 										NULL, tblspc_map_file);
98 	}
99 
100 	PG_RETURN_LSN(startpoint);
101 }
102 
103 /*
104  * pg_stop_backup: finish taking an on-line backup dump
105  *
106  * We write an end-of-backup WAL record, and remove the backup label file
107  * created by pg_start_backup, creating a backup history file in pg_wal
108  * instead (whence it will immediately be archived). The backup history file
109  * contains the same info found in the label file, plus the backup-end time
110  * and WAL location. Before 9.0, the backup-end time was read from the backup
111  * history file at the beginning of archive recovery, but we now use the WAL
112  * record for that and the file is for informational and debug purposes only.
113  *
114  * Note: different from CancelBackup which just cancels online backup mode.
115  *
116  * Note: this version is only called to stop an exclusive backup. The function
117  *		 pg_stop_backup_v2 (overloaded as pg_stop_backup in SQL) is called to
118  *		 stop non-exclusive backups.
119  *
120  * Permission checking for this function is managed through the normal
121  * GRANT system.
122  */
123 Datum
pg_stop_backup(PG_FUNCTION_ARGS)124 pg_stop_backup(PG_FUNCTION_ARGS)
125 {
126 	XLogRecPtr	stoppoint;
127 	SessionBackupState status = get_backup_status();
128 
129 	if (status == SESSION_BACKUP_NON_EXCLUSIVE)
130 		ereport(ERROR,
131 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
132 				 errmsg("non-exclusive backup in progress"),
133 				 errhint("Did you mean to use pg_stop_backup('f')?")));
134 
135 	/*
136 	 * Exclusive backups were typically started in a different connection, so
137 	 * don't try to verify that status of backup is set to
138 	 * SESSION_BACKUP_EXCLUSIVE in this function. Actual verification that an
139 	 * exclusive backup is in fact running is handled inside
140 	 * do_pg_stop_backup.
141 	 */
142 	stoppoint = do_pg_stop_backup(NULL, true, NULL);
143 
144 	PG_RETURN_LSN(stoppoint);
145 }
146 
147 
148 /*
149  * pg_stop_backup_v2: finish taking exclusive or nonexclusive on-line backup.
150  *
151  * Works the same as pg_stop_backup, except for non-exclusive backups it returns
152  * the backup label and tablespace map files as text fields in as part of the
153  * resultset.
154  *
155  * The first parameter (variable 'exclusive') allows the user to tell us if
156  * this is an exclusive or a non-exclusive backup.
157  *
158  * The second parameter (variable 'waitforarchive'), which is optional,
159  * allows the user to choose if they want to wait for the WAL to be archived
160  * or if we should just return as soon as the WAL record is written.
161  *
162  * Permission checking for this function is managed through the normal
163  * GRANT system.
164  */
165 Datum
pg_stop_backup_v2(PG_FUNCTION_ARGS)166 pg_stop_backup_v2(PG_FUNCTION_ARGS)
167 {
168 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
169 	TupleDesc	tupdesc;
170 	Tuplestorestate *tupstore;
171 	MemoryContext per_query_ctx;
172 	MemoryContext oldcontext;
173 	Datum		values[3];
174 	bool		nulls[3];
175 
176 	bool		exclusive = PG_GETARG_BOOL(0);
177 	bool		waitforarchive = PG_GETARG_BOOL(1);
178 	XLogRecPtr	stoppoint;
179 	SessionBackupState status = get_backup_status();
180 
181 	/* check to see if caller supports us returning a tuplestore */
182 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
183 		ereport(ERROR,
184 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
185 				 errmsg("set-valued function called in context that cannot accept a set")));
186 	if (!(rsinfo->allowedModes & SFRM_Materialize))
187 		ereport(ERROR,
188 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
189 				 errmsg("materialize mode required, but it is not allowed in this context")));
190 
191 	/* Build a tuple descriptor for our result type */
192 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
193 		elog(ERROR, "return type must be a row type");
194 
195 	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
196 	oldcontext = MemoryContextSwitchTo(per_query_ctx);
197 
198 	tupstore = tuplestore_begin_heap(true, false, work_mem);
199 	rsinfo->returnMode = SFRM_Materialize;
200 	rsinfo->setResult = tupstore;
201 	rsinfo->setDesc = tupdesc;
202 
203 	MemoryContextSwitchTo(oldcontext);
204 
205 	MemSet(values, 0, sizeof(values));
206 	MemSet(nulls, 0, sizeof(nulls));
207 
208 	if (exclusive)
209 	{
210 		if (status == SESSION_BACKUP_NON_EXCLUSIVE)
211 			ereport(ERROR,
212 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
213 					 errmsg("non-exclusive backup in progress"),
214 					 errhint("Did you mean to use pg_stop_backup('f')?")));
215 
216 		/*
217 		 * Stop the exclusive backup, and since we're in an exclusive backup
218 		 * return NULL for both backup_label and tablespace_map.
219 		 */
220 		stoppoint = do_pg_stop_backup(NULL, waitforarchive, NULL);
221 
222 		nulls[1] = true;
223 		nulls[2] = true;
224 	}
225 	else
226 	{
227 		if (status != SESSION_BACKUP_NON_EXCLUSIVE)
228 			ereport(ERROR,
229 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
230 					 errmsg("non-exclusive backup is not in progress"),
231 					 errhint("Did you mean to use pg_stop_backup('t')?")));
232 
233 		/*
234 		 * Stop the non-exclusive backup. Return a copy of the backup label
235 		 * and tablespace map so they can be written to disk by the caller.
236 		 */
237 		stoppoint = do_pg_stop_backup(label_file->data, waitforarchive, NULL);
238 
239 		values[1] = CStringGetTextDatum(label_file->data);
240 		values[2] = CStringGetTextDatum(tblspc_map_file->data);
241 
242 		/* Free structures allocated in TopMemoryContext */
243 		pfree(label_file->data);
244 		pfree(label_file);
245 		label_file = NULL;
246 		pfree(tblspc_map_file->data);
247 		pfree(tblspc_map_file);
248 		tblspc_map_file = NULL;
249 	}
250 
251 	/* Stoppoint is included on both exclusive and nonexclusive backups */
252 	values[0] = LSNGetDatum(stoppoint);
253 
254 	tuplestore_putvalues(tupstore, tupdesc, values, nulls);
255 	tuplestore_donestoring(tupstore);
256 
257 	return (Datum) 0;
258 }
259 
260 /*
261  * pg_switch_wal: switch to next xlog file
262  *
263  * Permission checking for this function is managed through the normal
264  * GRANT system.
265  */
266 Datum
pg_switch_wal(PG_FUNCTION_ARGS)267 pg_switch_wal(PG_FUNCTION_ARGS)
268 {
269 	XLogRecPtr	switchpoint;
270 
271 	if (RecoveryInProgress())
272 		ereport(ERROR,
273 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
274 				 errmsg("recovery is in progress"),
275 				 errhint("WAL control functions cannot be executed during recovery.")));
276 
277 	switchpoint = RequestXLogSwitch(false);
278 
279 	/*
280 	 * As a convenience, return the WAL location of the switch record
281 	 */
282 	PG_RETURN_LSN(switchpoint);
283 }
284 
285 /*
286  * pg_create_restore_point: a named point for restore
287  *
288  * Permission checking for this function is managed through the normal
289  * GRANT system.
290  */
291 Datum
pg_create_restore_point(PG_FUNCTION_ARGS)292 pg_create_restore_point(PG_FUNCTION_ARGS)
293 {
294 	text	   *restore_name = PG_GETARG_TEXT_PP(0);
295 	char	   *restore_name_str;
296 	XLogRecPtr	restorepoint;
297 
298 	if (RecoveryInProgress())
299 		ereport(ERROR,
300 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
301 				 errmsg("recovery is in progress"),
302 				 errhint("WAL control functions cannot be executed during recovery.")));
303 
304 	if (!XLogIsNeeded())
305 		ereport(ERROR,
306 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
307 				 errmsg("WAL level not sufficient for creating a restore point"),
308 				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
309 
310 	restore_name_str = text_to_cstring(restore_name);
311 
312 	if (strlen(restore_name_str) >= MAXFNAMELEN)
313 		ereport(ERROR,
314 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
315 				 errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1)));
316 
317 	restorepoint = XLogRestorePoint(restore_name_str);
318 
319 	/*
320 	 * As a convenience, return the WAL location of the restore point record
321 	 */
322 	PG_RETURN_LSN(restorepoint);
323 }
324 
325 /*
326  * Report the current WAL write location (same format as pg_start_backup etc)
327  *
328  * This is useful for determining how much of WAL is visible to an external
329  * archiving process.  Note that the data before this point is written out
330  * to the kernel, but is not necessarily synced to disk.
331  */
332 Datum
pg_current_wal_lsn(PG_FUNCTION_ARGS)333 pg_current_wal_lsn(PG_FUNCTION_ARGS)
334 {
335 	XLogRecPtr	current_recptr;
336 
337 	if (RecoveryInProgress())
338 		ereport(ERROR,
339 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
340 				 errmsg("recovery is in progress"),
341 				 errhint("WAL control functions cannot be executed during recovery.")));
342 
343 	current_recptr = GetXLogWriteRecPtr();
344 
345 	PG_RETURN_LSN(current_recptr);
346 }
347 
348 /*
349  * Report the current WAL insert location (same format as pg_start_backup etc)
350  *
351  * This function is mostly for debugging purposes.
352  */
353 Datum
pg_current_wal_insert_lsn(PG_FUNCTION_ARGS)354 pg_current_wal_insert_lsn(PG_FUNCTION_ARGS)
355 {
356 	XLogRecPtr	current_recptr;
357 
358 	if (RecoveryInProgress())
359 		ereport(ERROR,
360 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
361 				 errmsg("recovery is in progress"),
362 				 errhint("WAL control functions cannot be executed during recovery.")));
363 
364 	current_recptr = GetXLogInsertRecPtr();
365 
366 	PG_RETURN_LSN(current_recptr);
367 }
368 
369 /*
370  * Report the current WAL flush location (same format as pg_start_backup etc)
371  *
372  * This function is mostly for debugging purposes.
373  */
374 Datum
pg_current_wal_flush_lsn(PG_FUNCTION_ARGS)375 pg_current_wal_flush_lsn(PG_FUNCTION_ARGS)
376 {
377 	XLogRecPtr	current_recptr;
378 
379 	if (RecoveryInProgress())
380 		ereport(ERROR,
381 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
382 				 errmsg("recovery is in progress"),
383 				 errhint("WAL control functions cannot be executed during recovery.")));
384 
385 	current_recptr = GetFlushRecPtr();
386 
387 	PG_RETURN_LSN(current_recptr);
388 }
389 
390 /*
391  * Report the last WAL receive location (same format as pg_start_backup etc)
392  *
393  * This is useful for determining how much of WAL is guaranteed to be received
394  * and synced to disk by walreceiver.
395  */
396 Datum
pg_last_wal_receive_lsn(PG_FUNCTION_ARGS)397 pg_last_wal_receive_lsn(PG_FUNCTION_ARGS)
398 {
399 	XLogRecPtr	recptr;
400 
401 	recptr = GetWalRcvFlushRecPtr(NULL, NULL);
402 
403 	if (recptr == 0)
404 		PG_RETURN_NULL();
405 
406 	PG_RETURN_LSN(recptr);
407 }
408 
409 /*
410  * Report the last WAL replay location (same format as pg_start_backup etc)
411  *
412  * This is useful for determining how much of WAL is visible to read-only
413  * connections during recovery.
414  */
415 Datum
pg_last_wal_replay_lsn(PG_FUNCTION_ARGS)416 pg_last_wal_replay_lsn(PG_FUNCTION_ARGS)
417 {
418 	XLogRecPtr	recptr;
419 
420 	recptr = GetXLogReplayRecPtr(NULL);
421 
422 	if (recptr == 0)
423 		PG_RETURN_NULL();
424 
425 	PG_RETURN_LSN(recptr);
426 }
427 
428 /*
429  * Compute an xlog file name and decimal byte offset given a WAL location,
430  * such as is returned by pg_stop_backup() or pg_switch_wal().
431  *
432  * Note that a location exactly at a segment boundary is taken to be in
433  * the previous segment.  This is usually the right thing, since the
434  * expected usage is to determine which xlog file(s) are ready to archive.
435  */
436 Datum
pg_walfile_name_offset(PG_FUNCTION_ARGS)437 pg_walfile_name_offset(PG_FUNCTION_ARGS)
438 {
439 	XLogSegNo	xlogsegno;
440 	uint32		xrecoff;
441 	XLogRecPtr	locationpoint = PG_GETARG_LSN(0);
442 	char		xlogfilename[MAXFNAMELEN];
443 	Datum		values[2];
444 	bool		isnull[2];
445 	TupleDesc	resultTupleDesc;
446 	HeapTuple	resultHeapTuple;
447 	Datum		result;
448 
449 	if (RecoveryInProgress())
450 		ereport(ERROR,
451 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
452 				 errmsg("recovery is in progress"),
453 				 errhint("%s cannot be executed during recovery.",
454 						 "pg_walfile_name_offset()")));
455 
456 	/*
457 	 * Construct a tuple descriptor for the result row.  This must match this
458 	 * function's pg_proc entry!
459 	 */
460 	resultTupleDesc = CreateTemplateTupleDesc(2);
461 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name",
462 					   TEXTOID, -1, 0);
463 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset",
464 					   INT4OID, -1, 0);
465 
466 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
467 
468 	/*
469 	 * xlogfilename
470 	 */
471 	XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
472 	XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size);
473 
474 	values[0] = CStringGetTextDatum(xlogfilename);
475 	isnull[0] = false;
476 
477 	/*
478 	 * offset
479 	 */
480 	xrecoff = XLogSegmentOffset(locationpoint, wal_segment_size);
481 
482 	values[1] = UInt32GetDatum(xrecoff);
483 	isnull[1] = false;
484 
485 	/*
486 	 * Tuple jam: Having first prepared your Datums, then squash together
487 	 */
488 	resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull);
489 
490 	result = HeapTupleGetDatum(resultHeapTuple);
491 
492 	PG_RETURN_DATUM(result);
493 }
494 
495 /*
496  * Compute an xlog file name given a WAL location,
497  * such as is returned by pg_stop_backup() or pg_switch_wal().
498  */
499 Datum
pg_walfile_name(PG_FUNCTION_ARGS)500 pg_walfile_name(PG_FUNCTION_ARGS)
501 {
502 	XLogSegNo	xlogsegno;
503 	XLogRecPtr	locationpoint = PG_GETARG_LSN(0);
504 	char		xlogfilename[MAXFNAMELEN];
505 
506 	if (RecoveryInProgress())
507 		ereport(ERROR,
508 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
509 				 errmsg("recovery is in progress"),
510 				 errhint("%s cannot be executed during recovery.",
511 						 "pg_walfile_name()")));
512 
513 	XLByteToPrevSeg(locationpoint, xlogsegno, wal_segment_size);
514 	XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno, wal_segment_size);
515 
516 	PG_RETURN_TEXT_P(cstring_to_text(xlogfilename));
517 }
518 
519 /*
520  * pg_wal_replay_pause - Request to pause recovery
521  *
522  * Permission checking for this function is managed through the normal
523  * GRANT system.
524  */
525 Datum
pg_wal_replay_pause(PG_FUNCTION_ARGS)526 pg_wal_replay_pause(PG_FUNCTION_ARGS)
527 {
528 	if (!RecoveryInProgress())
529 		ereport(ERROR,
530 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
531 				 errmsg("recovery is not in progress"),
532 				 errhint("Recovery control functions can only be executed during recovery.")));
533 
534 	if (PromoteIsTriggered())
535 		ereport(ERROR,
536 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
537 				 errmsg("standby promotion is ongoing"),
538 				 errhint("%s cannot be executed after promotion is triggered.",
539 						 "pg_wal_replay_pause()")));
540 
541 	SetRecoveryPause(true);
542 
543 	/* wake up the recovery process so that it can process the pause request */
544 	WakeupRecovery();
545 
546 	PG_RETURN_VOID();
547 }
548 
549 /*
550  * pg_wal_replay_resume - resume recovery now
551  *
552  * Permission checking for this function is managed through the normal
553  * GRANT system.
554  */
555 Datum
pg_wal_replay_resume(PG_FUNCTION_ARGS)556 pg_wal_replay_resume(PG_FUNCTION_ARGS)
557 {
558 	if (!RecoveryInProgress())
559 		ereport(ERROR,
560 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
561 				 errmsg("recovery is not in progress"),
562 				 errhint("Recovery control functions can only be executed during recovery.")));
563 
564 	if (PromoteIsTriggered())
565 		ereport(ERROR,
566 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
567 				 errmsg("standby promotion is ongoing"),
568 				 errhint("%s cannot be executed after promotion is triggered.",
569 						 "pg_wal_replay_resume()")));
570 
571 	SetRecoveryPause(false);
572 
573 	PG_RETURN_VOID();
574 }
575 
576 /*
577  * pg_is_wal_replay_paused
578  */
579 Datum
pg_is_wal_replay_paused(PG_FUNCTION_ARGS)580 pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
581 {
582 	if (!RecoveryInProgress())
583 		ereport(ERROR,
584 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
585 				 errmsg("recovery is not in progress"),
586 				 errhint("Recovery control functions can only be executed during recovery.")));
587 
588 	PG_RETURN_BOOL(GetRecoveryPauseState() != RECOVERY_NOT_PAUSED);
589 }
590 
591 /*
592  * pg_get_wal_replay_pause_state - Returns the recovery pause state.
593  *
594  * Returned values:
595  *
596  * 'not paused' - if pause is not requested
597  * 'pause requested' - if pause is requested but recovery is not yet paused
598  * 'paused' - if recovery is paused
599  */
600 Datum
pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)601 pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)
602 {
603 	char	   *statestr = NULL;
604 
605 	if (!RecoveryInProgress())
606 		ereport(ERROR,
607 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
608 				 errmsg("recovery is not in progress"),
609 				 errhint("Recovery control functions can only be executed during recovery.")));
610 
611 	/* get the recovery pause state */
612 	switch (GetRecoveryPauseState())
613 	{
614 		case RECOVERY_NOT_PAUSED:
615 			statestr = "not paused";
616 			break;
617 		case RECOVERY_PAUSE_REQUESTED:
618 			statestr = "pause requested";
619 			break;
620 		case RECOVERY_PAUSED:
621 			statestr = "paused";
622 			break;
623 	}
624 
625 	Assert(statestr != NULL);
626 	PG_RETURN_TEXT_P(cstring_to_text(statestr));
627 }
628 
629 /*
630  * Returns timestamp of latest processed commit/abort record.
631  *
632  * When the server has been started normally without recovery the function
633  * returns NULL.
634  */
635 Datum
pg_last_xact_replay_timestamp(PG_FUNCTION_ARGS)636 pg_last_xact_replay_timestamp(PG_FUNCTION_ARGS)
637 {
638 	TimestampTz xtime;
639 
640 	xtime = GetLatestXTime();
641 	if (xtime == 0)
642 		PG_RETURN_NULL();
643 
644 	PG_RETURN_TIMESTAMPTZ(xtime);
645 }
646 
647 /*
648  * Returns bool with current recovery mode, a global state.
649  */
650 Datum
pg_is_in_recovery(PG_FUNCTION_ARGS)651 pg_is_in_recovery(PG_FUNCTION_ARGS)
652 {
653 	PG_RETURN_BOOL(RecoveryInProgress());
654 }
655 
656 /*
657  * Compute the difference in bytes between two WAL locations.
658  */
659 Datum
pg_wal_lsn_diff(PG_FUNCTION_ARGS)660 pg_wal_lsn_diff(PG_FUNCTION_ARGS)
661 {
662 	Datum		result;
663 
664 	result = DirectFunctionCall2(pg_lsn_mi,
665 								 PG_GETARG_DATUM(0),
666 								 PG_GETARG_DATUM(1));
667 
668 	PG_RETURN_NUMERIC(result);
669 }
670 
671 /*
672  * Returns bool with current on-line backup mode, a global state.
673  */
674 Datum
pg_is_in_backup(PG_FUNCTION_ARGS)675 pg_is_in_backup(PG_FUNCTION_ARGS)
676 {
677 	PG_RETURN_BOOL(BackupInProgress());
678 }
679 
680 /*
681  * Returns start time of an online exclusive backup.
682  *
683  * When there's no exclusive backup in progress, the function
684  * returns NULL.
685  */
686 Datum
pg_backup_start_time(PG_FUNCTION_ARGS)687 pg_backup_start_time(PG_FUNCTION_ARGS)
688 {
689 	Datum		xtime;
690 	FILE	   *lfp;
691 	char		fline[MAXPGPATH];
692 	char		backup_start_time[30];
693 
694 	/*
695 	 * See if label file is present
696 	 */
697 	lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
698 	if (lfp == NULL)
699 	{
700 		if (errno != ENOENT)
701 			ereport(ERROR,
702 					(errcode_for_file_access(),
703 					 errmsg("could not read file \"%s\": %m",
704 							BACKUP_LABEL_FILE)));
705 		PG_RETURN_NULL();
706 	}
707 
708 	/*
709 	 * Parse the file to find the START TIME line.
710 	 */
711 	backup_start_time[0] = '\0';
712 	while (fgets(fline, sizeof(fline), lfp) != NULL)
713 	{
714 		if (sscanf(fline, "START TIME: %25[^\n]\n", backup_start_time) == 1)
715 			break;
716 	}
717 
718 	/* Check for a read error. */
719 	if (ferror(lfp))
720 		ereport(ERROR,
721 				(errcode_for_file_access(),
722 				 errmsg("could not read file \"%s\": %m", BACKUP_LABEL_FILE)));
723 
724 	/* Close the backup label file. */
725 	if (FreeFile(lfp))
726 		ereport(ERROR,
727 				(errcode_for_file_access(),
728 				 errmsg("could not close file \"%s\": %m", BACKUP_LABEL_FILE)));
729 
730 	if (strlen(backup_start_time) == 0)
731 		ereport(ERROR,
732 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
733 				 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
734 
735 	/*
736 	 * Convert the time string read from file to TimestampTz form.
737 	 */
738 	xtime = DirectFunctionCall3(timestamptz_in,
739 								CStringGetDatum(backup_start_time),
740 								ObjectIdGetDatum(InvalidOid),
741 								Int32GetDatum(-1));
742 
743 	PG_RETURN_DATUM(xtime);
744 }
745 
746 /*
747  * Promotes a standby server.
748  *
749  * A result of "true" means that promotion has been completed if "wait" is
750  * "true", or initiated if "wait" is false.
751  */
752 Datum
pg_promote(PG_FUNCTION_ARGS)753 pg_promote(PG_FUNCTION_ARGS)
754 {
755 	bool		wait = PG_GETARG_BOOL(0);
756 	int			wait_seconds = PG_GETARG_INT32(1);
757 	FILE	   *promote_file;
758 	int			i;
759 
760 	if (!RecoveryInProgress())
761 		ereport(ERROR,
762 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
763 				 errmsg("recovery is not in progress"),
764 				 errhint("Recovery control functions can only be executed during recovery.")));
765 
766 	if (wait_seconds <= 0)
767 		ereport(ERROR,
768 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
769 				 errmsg("\"wait_seconds\" must not be negative or zero")));
770 
771 	/* create the promote signal file */
772 	promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
773 	if (!promote_file)
774 		ereport(ERROR,
775 				(errcode_for_file_access(),
776 				 errmsg("could not create file \"%s\": %m",
777 						PROMOTE_SIGNAL_FILE)));
778 
779 	if (FreeFile(promote_file))
780 		ereport(ERROR,
781 				(errcode_for_file_access(),
782 				 errmsg("could not write file \"%s\": %m",
783 						PROMOTE_SIGNAL_FILE)));
784 
785 	/* signal the postmaster */
786 	if (kill(PostmasterPid, SIGUSR1) != 0)
787 	{
788 		ereport(WARNING,
789 				(errmsg("failed to send signal to postmaster: %m")));
790 		(void) unlink(PROMOTE_SIGNAL_FILE);
791 		PG_RETURN_BOOL(false);
792 	}
793 
794 	/* return immediately if waiting was not requested */
795 	if (!wait)
796 		PG_RETURN_BOOL(true);
797 
798 	/* wait for the amount of time wanted until promotion */
799 #define WAITS_PER_SECOND 10
800 	for (i = 0; i < WAITS_PER_SECOND * wait_seconds; i++)
801 	{
802 		int			rc;
803 
804 		ResetLatch(MyLatch);
805 
806 		if (!RecoveryInProgress())
807 			PG_RETURN_BOOL(true);
808 
809 		CHECK_FOR_INTERRUPTS();
810 
811 		rc = WaitLatch(MyLatch,
812 					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
813 					   1000L / WAITS_PER_SECOND,
814 					   WAIT_EVENT_PROMOTE);
815 
816 		/*
817 		 * Emergency bailout if postmaster has died.  This is to avoid the
818 		 * necessity for manual cleanup of all postmaster children.
819 		 */
820 		if (rc & WL_POSTMASTER_DEATH)
821 			PG_RETURN_BOOL(false);
822 	}
823 
824 	ereport(WARNING,
825 			(errmsg_plural("server did not promote within %d second",
826 						   "server did not promote within %d seconds",
827 						   wait_seconds,
828 						   wait_seconds)));
829 	PG_RETURN_BOOL(false);
830 }
831