1 /*-------------------------------------------------------------------------
2  *
3  * pg_waldump.c - decode and display WAL
4  *
5  * Copyright (c) 2013-2020, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		  src/bin/pg_waldump/pg_waldump.c
9  *-------------------------------------------------------------------------
10  */
11 
12 #define FRONTEND 1
13 #include "postgres.h"
14 
15 #include <dirent.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 
19 #include "access/transam.h"
20 #include "access/xlog_internal.h"
21 #include "access/xlogreader.h"
22 #include "access/xlogrecord.h"
23 #include "common/fe_memutils.h"
24 #include "common/logging.h"
25 #include "getopt_long.h"
26 #include "rmgrdesc.h"
27 
28 static const char *progname;
29 
30 static int	WalSegSz;
31 
32 typedef struct XLogDumpPrivate
33 {
34 	TimeLineID	timeline;
35 	XLogRecPtr	startptr;
36 	XLogRecPtr	endptr;
37 	bool		endptr_reached;
38 } XLogDumpPrivate;
39 
40 typedef struct XLogDumpConfig
41 {
42 	/* display options */
43 	bool		quiet;
44 	bool		bkp_details;
45 	int			stop_after_records;
46 	int			already_displayed_records;
47 	bool		follow;
48 	bool		stats;
49 	bool		stats_per_record;
50 
51 	/* filter options */
52 	int			filter_by_rmgr;
53 	TransactionId filter_by_xid;
54 	bool		filter_by_xid_enabled;
55 } XLogDumpConfig;
56 
57 typedef struct Stats
58 {
59 	uint64		count;
60 	uint64		rec_len;
61 	uint64		fpi_len;
62 } Stats;
63 
64 #define MAX_XLINFO_TYPES 16
65 
66 typedef struct XLogDumpStats
67 {
68 	uint64		count;
69 	Stats		rmgr_stats[RM_NEXT_ID];
70 	Stats		record_stats[RM_NEXT_ID][MAX_XLINFO_TYPES];
71 } XLogDumpStats;
72 
73 #define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0)
74 
75 static void
print_rmgr_list(void)76 print_rmgr_list(void)
77 {
78 	int			i;
79 
80 	for (i = 0; i <= RM_MAX_ID; i++)
81 	{
82 		printf("%s\n", RmgrDescTable[i].rm_name);
83 	}
84 }
85 
86 /*
87  * Check whether directory exists and whether we can open it. Keep errno set so
88  * that the caller can report errors somewhat more accurately.
89  */
90 static bool
verify_directory(const char * directory)91 verify_directory(const char *directory)
92 {
93 	DIR		   *dir = opendir(directory);
94 
95 	if (dir == NULL)
96 		return false;
97 	closedir(dir);
98 	return true;
99 }
100 
101 /*
102  * Split a pathname as dirname(1) and basename(1) would.
103  *
104  * XXX this probably doesn't do very well on Windows.  We probably need to
105  * apply canonicalize_path(), at the very least.
106  */
107 static void
split_path(const char * path,char ** dir,char ** fname)108 split_path(const char *path, char **dir, char **fname)
109 {
110 	char	   *sep;
111 
112 	/* split filepath into directory & filename */
113 	sep = strrchr(path, '/');
114 
115 	/* directory path */
116 	if (sep != NULL)
117 	{
118 		*dir = pnstrdup(path, sep - path);
119 		*fname = pg_strdup(sep + 1);
120 	}
121 	/* local directory */
122 	else
123 	{
124 		*dir = NULL;
125 		*fname = pg_strdup(path);
126 	}
127 }
128 
129 /*
130  * Open the file in the valid target directory.
131  *
132  * return a read only fd
133  */
134 static int
open_file_in_directory(const char * directory,const char * fname)135 open_file_in_directory(const char *directory, const char *fname)
136 {
137 	int			fd = -1;
138 	char		fpath[MAXPGPATH];
139 
140 	Assert(directory != NULL);
141 
142 	snprintf(fpath, MAXPGPATH, "%s/%s", directory, fname);
143 	fd = open(fpath, O_RDONLY | PG_BINARY, 0);
144 
145 	if (fd < 0 && errno != ENOENT)
146 		fatal_error("could not open file \"%s\": %m", fname);
147 	return fd;
148 }
149 
150 /*
151  * Try to find fname in the given directory. Returns true if it is found,
152  * false otherwise. If fname is NULL, search the complete directory for any
153  * file with a valid WAL file name. If file is successfully opened, set the
154  * wal segment size.
155  */
156 static bool
search_directory(const char * directory,const char * fname)157 search_directory(const char *directory, const char *fname)
158 {
159 	int			fd = -1;
160 	DIR		   *xldir;
161 
162 	/* open file if valid filename is provided */
163 	if (fname != NULL)
164 		fd = open_file_in_directory(directory, fname);
165 
166 	/*
167 	 * A valid file name is not passed, so search the complete directory.  If
168 	 * we find any file whose name is a valid WAL file name then try to open
169 	 * it.  If we cannot open it, bail out.
170 	 */
171 	else if ((xldir = opendir(directory)) != NULL)
172 	{
173 		struct dirent *xlde;
174 
175 		while ((xlde = readdir(xldir)) != NULL)
176 		{
177 			if (IsXLogFileName(xlde->d_name))
178 			{
179 				fd = open_file_in_directory(directory, xlde->d_name);
180 				fname = xlde->d_name;
181 				break;
182 			}
183 		}
184 
185 		closedir(xldir);
186 	}
187 
188 	/* set WalSegSz if file is successfully opened */
189 	if (fd >= 0)
190 	{
191 		PGAlignedXLogBlock buf;
192 		int			r;
193 
194 		r = read(fd, buf.data, XLOG_BLCKSZ);
195 		if (r == XLOG_BLCKSZ)
196 		{
197 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
198 
199 			WalSegSz = longhdr->xlp_seg_size;
200 
201 			if (!IsValidWalSegSize(WalSegSz))
202 				fatal_error(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d byte",
203 									 "WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d bytes",
204 									 WalSegSz),
205 							fname, WalSegSz);
206 		}
207 		else
208 		{
209 			if (errno != 0)
210 				fatal_error("could not read file \"%s\": %m",
211 							fname);
212 			else
213 				fatal_error("could not read file \"%s\": read %d of %zu",
214 							fname, r, (Size) XLOG_BLCKSZ);
215 		}
216 		close(fd);
217 		return true;
218 	}
219 
220 	return false;
221 }
222 
223 /*
224  * Identify the target directory.
225  *
226  * Try to find the file in several places:
227  * if directory != NULL:
228  *	 directory /
229  *	 directory / XLOGDIR /
230  * else
231  *	 .
232  *	 XLOGDIR /
233  *	 $PGDATA / XLOGDIR /
234  *
235  * The valid target directory is returned.
236  */
237 static char *
identify_target_directory(char * directory,char * fname)238 identify_target_directory(char *directory, char *fname)
239 {
240 	char		fpath[MAXPGPATH];
241 
242 	if (directory != NULL)
243 	{
244 		if (search_directory(directory, fname))
245 			return pg_strdup(directory);
246 
247 		/* directory / XLOGDIR */
248 		snprintf(fpath, MAXPGPATH, "%s/%s", directory, XLOGDIR);
249 		if (search_directory(fpath, fname))
250 			return pg_strdup(fpath);
251 	}
252 	else
253 	{
254 		const char *datadir;
255 
256 		/* current directory */
257 		if (search_directory(".", fname))
258 			return pg_strdup(".");
259 		/* XLOGDIR */
260 		if (search_directory(XLOGDIR, fname))
261 			return pg_strdup(XLOGDIR);
262 
263 		datadir = getenv("PGDATA");
264 		/* $PGDATA / XLOGDIR */
265 		if (datadir != NULL)
266 		{
267 			snprintf(fpath, MAXPGPATH, "%s/%s", datadir, XLOGDIR);
268 			if (search_directory(fpath, fname))
269 				return pg_strdup(fpath);
270 		}
271 	}
272 
273 	/* could not locate WAL file */
274 	if (fname)
275 		fatal_error("could not locate WAL file \"%s\"", fname);
276 	else
277 		fatal_error("could not find any WAL file");
278 
279 	return NULL;				/* not reached */
280 }
281 
282 /* pg_waldump's XLogReaderRoutine->segment_open callback */
283 static void
WALDumpOpenSegment(XLogReaderState * state,XLogSegNo nextSegNo,TimeLineID * tli_p)284 WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo,
285 				   TimeLineID *tli_p)
286 {
287 	TimeLineID	tli = *tli_p;
288 	char		fname[MAXPGPATH];
289 	int			tries;
290 
291 	XLogFileName(fname, tli, nextSegNo, state->segcxt.ws_segsize);
292 
293 	/*
294 	 * In follow mode there is a short period of time after the server has
295 	 * written the end of the previous file before the new file is available.
296 	 * So we loop for 5 seconds looking for the file to appear before giving
297 	 * up.
298 	 */
299 	for (tries = 0; tries < 10; tries++)
300 	{
301 		state->seg.ws_file = open_file_in_directory(state->segcxt.ws_dir, fname);
302 		if (state->seg.ws_file >= 0)
303 			return;
304 		if (errno == ENOENT)
305 		{
306 			int			save_errno = errno;
307 
308 			/* File not there yet, try again */
309 			pg_usleep(500 * 1000);
310 
311 			errno = save_errno;
312 			continue;
313 		}
314 		/* Any other error, fall through and fail */
315 		break;
316 	}
317 
318 	fatal_error("could not find file \"%s\": %m", fname);
319 }
320 
321 /*
322  * pg_waldump's XLogReaderRoutine->segment_close callback.  Same as
323  * wal_segment_close
324  */
325 static void
WALDumpCloseSegment(XLogReaderState * state)326 WALDumpCloseSegment(XLogReaderState *state)
327 {
328 	close(state->seg.ws_file);
329 	/* need to check errno? */
330 	state->seg.ws_file = -1;
331 }
332 
333 /* pg_waldump's XLogReaderRoutine->page_read callback */
334 static int
WALDumpReadPage(XLogReaderState * state,XLogRecPtr targetPagePtr,int reqLen,XLogRecPtr targetPtr,char * readBuff)335 WALDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
336 				XLogRecPtr targetPtr, char *readBuff)
337 {
338 	XLogDumpPrivate *private = state->private_data;
339 	int			count = XLOG_BLCKSZ;
340 	WALReadError errinfo;
341 
342 	if (private->endptr != InvalidXLogRecPtr)
343 	{
344 		if (targetPagePtr + XLOG_BLCKSZ <= private->endptr)
345 			count = XLOG_BLCKSZ;
346 		else if (targetPagePtr + reqLen <= private->endptr)
347 			count = private->endptr - targetPagePtr;
348 		else
349 		{
350 			private->endptr_reached = true;
351 			return -1;
352 		}
353 	}
354 
355 	if (!WALRead(state, readBuff, targetPagePtr, count, private->timeline,
356 				 &errinfo))
357 	{
358 		WALOpenSegment *seg = &errinfo.wre_seg;
359 		char		fname[MAXPGPATH];
360 
361 		XLogFileName(fname, seg->ws_tli, seg->ws_segno,
362 					 state->segcxt.ws_segsize);
363 
364 		if (errinfo.wre_errno != 0)
365 		{
366 			errno = errinfo.wre_errno;
367 			fatal_error("could not read from file %s, offset %u: %m",
368 						fname, errinfo.wre_off);
369 		}
370 		else
371 			fatal_error("could not read from file %s, offset %u: read %d of %zu",
372 						fname, errinfo.wre_off, errinfo.wre_read,
373 						(Size) errinfo.wre_req);
374 	}
375 
376 	return count;
377 }
378 
379 /*
380  * Calculate the size of a record, split into !FPI and FPI parts.
381  */
382 static void
XLogDumpRecordLen(XLogReaderState * record,uint32 * rec_len,uint32 * fpi_len)383 XLogDumpRecordLen(XLogReaderState *record, uint32 *rec_len, uint32 *fpi_len)
384 {
385 	int			block_id;
386 
387 	/*
388 	 * Calculate the amount of FPI data in the record.
389 	 *
390 	 * XXX: We peek into xlogreader's private decoded backup blocks for the
391 	 * bimg_len indicating the length of FPI data. It doesn't seem worth it to
392 	 * add an accessor macro for this.
393 	 */
394 	*fpi_len = 0;
395 	for (block_id = 0; block_id <= record->max_block_id; block_id++)
396 	{
397 		if (XLogRecHasBlockImage(record, block_id))
398 			*fpi_len += record->blocks[block_id].bimg_len;
399 	}
400 
401 	/*
402 	 * Calculate the length of the record as the total length - the length of
403 	 * all the block images.
404 	 */
405 	*rec_len = XLogRecGetTotalLen(record) - *fpi_len;
406 }
407 
408 /*
409  * Store per-rmgr and per-record statistics for a given record.
410  */
411 static void
XLogDumpCountRecord(XLogDumpConfig * config,XLogDumpStats * stats,XLogReaderState * record)412 XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
413 					XLogReaderState *record)
414 {
415 	RmgrId		rmid;
416 	uint8		recid;
417 	uint32		rec_len;
418 	uint32		fpi_len;
419 
420 	stats->count++;
421 
422 	rmid = XLogRecGetRmid(record);
423 
424 	XLogDumpRecordLen(record, &rec_len, &fpi_len);
425 
426 	/* Update per-rmgr statistics */
427 
428 	stats->rmgr_stats[rmid].count++;
429 	stats->rmgr_stats[rmid].rec_len += rec_len;
430 	stats->rmgr_stats[rmid].fpi_len += fpi_len;
431 
432 	/*
433 	 * Update per-record statistics, where the record is identified by a
434 	 * combination of the RmgrId and the four bits of the xl_info field that
435 	 * are the rmgr's domain (resulting in sixteen possible entries per
436 	 * RmgrId).
437 	 */
438 
439 	recid = XLogRecGetInfo(record) >> 4;
440 
441 	/*
442 	 * XACT records need to be handled differently. Those records use the
443 	 * first bit of those four bits for an optional flag variable and the
444 	 * following three bits for the opcode. We filter opcode out of xl_info
445 	 * and use it as the identifier of the record.
446 	 */
447 	if (rmid == RM_XACT_ID)
448 		recid &= 0x07;
449 
450 	stats->record_stats[rmid][recid].count++;
451 	stats->record_stats[rmid][recid].rec_len += rec_len;
452 	stats->record_stats[rmid][recid].fpi_len += fpi_len;
453 }
454 
455 /*
456  * Print a record to stdout
457  */
458 static void
XLogDumpDisplayRecord(XLogDumpConfig * config,XLogReaderState * record)459 XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
460 {
461 	const char *id;
462 	const RmgrDescData *desc = &RmgrDescTable[XLogRecGetRmid(record)];
463 	uint32		rec_len;
464 	uint32		fpi_len;
465 	RelFileNode rnode;
466 	ForkNumber	forknum;
467 	BlockNumber blk;
468 	int			block_id;
469 	uint8		info = XLogRecGetInfo(record);
470 	XLogRecPtr	xl_prev = XLogRecGetPrev(record);
471 	StringInfoData s;
472 
473 	XLogDumpRecordLen(record, &rec_len, &fpi_len);
474 
475 	printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
476 		   desc->rm_name,
477 		   rec_len, XLogRecGetTotalLen(record),
478 		   XLogRecGetXid(record),
479 		   (uint32) (record->ReadRecPtr >> 32), (uint32) record->ReadRecPtr,
480 		   (uint32) (xl_prev >> 32), (uint32) xl_prev);
481 
482 	id = desc->rm_identify(info);
483 	if (id == NULL)
484 		printf("desc: UNKNOWN (%x) ", info & ~XLR_INFO_MASK);
485 	else
486 		printf("desc: %s ", id);
487 
488 	initStringInfo(&s);
489 	desc->rm_desc(&s, record);
490 	printf("%s", s.data);
491 	pfree(s.data);
492 
493 	if (!config->bkp_details)
494 	{
495 		/* print block references (short format) */
496 		for (block_id = 0; block_id <= record->max_block_id; block_id++)
497 		{
498 			if (!XLogRecHasBlockRef(record, block_id))
499 				continue;
500 
501 			XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
502 			if (forknum != MAIN_FORKNUM)
503 				printf(", blkref #%u: rel %u/%u/%u fork %s blk %u",
504 					   block_id,
505 					   rnode.spcNode, rnode.dbNode, rnode.relNode,
506 					   forkNames[forknum],
507 					   blk);
508 			else
509 				printf(", blkref #%u: rel %u/%u/%u blk %u",
510 					   block_id,
511 					   rnode.spcNode, rnode.dbNode, rnode.relNode,
512 					   blk);
513 			if (XLogRecHasBlockImage(record, block_id))
514 			{
515 				if (XLogRecBlockImageApply(record, block_id))
516 					printf(" FPW");
517 				else
518 					printf(" FPW for WAL verification");
519 			}
520 		}
521 		putchar('\n');
522 	}
523 	else
524 	{
525 		/* print block references (detailed format) */
526 		putchar('\n');
527 		for (block_id = 0; block_id <= record->max_block_id; block_id++)
528 		{
529 			if (!XLogRecHasBlockRef(record, block_id))
530 				continue;
531 
532 			XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
533 			printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
534 				   block_id,
535 				   rnode.spcNode, rnode.dbNode, rnode.relNode,
536 				   forkNames[forknum],
537 				   blk);
538 			if (XLogRecHasBlockImage(record, block_id))
539 			{
540 				if (record->blocks[block_id].bimg_info &
541 					BKPIMAGE_IS_COMPRESSED)
542 				{
543 					printf(" (FPW%s); hole: offset: %u, length: %u, "
544 						   "compression saved: %u",
545 						   XLogRecBlockImageApply(record, block_id) ?
546 						   "" : " for WAL verification",
547 						   record->blocks[block_id].hole_offset,
548 						   record->blocks[block_id].hole_length,
549 						   BLCKSZ -
550 						   record->blocks[block_id].hole_length -
551 						   record->blocks[block_id].bimg_len);
552 				}
553 				else
554 				{
555 					printf(" (FPW%s); hole: offset: %u, length: %u",
556 						   XLogRecBlockImageApply(record, block_id) ?
557 						   "" : " for WAL verification",
558 						   record->blocks[block_id].hole_offset,
559 						   record->blocks[block_id].hole_length);
560 				}
561 			}
562 			putchar('\n');
563 		}
564 	}
565 }
566 
567 /*
568  * Display a single row of record counts and sizes for an rmgr or record.
569  */
570 static void
XLogDumpStatsRow(const char * name,uint64 n,uint64 total_count,uint64 rec_len,uint64 total_rec_len,uint64 fpi_len,uint64 total_fpi_len,uint64 tot_len,uint64 total_len)571 XLogDumpStatsRow(const char *name,
572 				 uint64 n, uint64 total_count,
573 				 uint64 rec_len, uint64 total_rec_len,
574 				 uint64 fpi_len, uint64 total_fpi_len,
575 				 uint64 tot_len, uint64 total_len)
576 {
577 	double		n_pct,
578 				rec_len_pct,
579 				fpi_len_pct,
580 				tot_len_pct;
581 
582 	n_pct = 0;
583 	if (total_count != 0)
584 		n_pct = 100 * (double) n / total_count;
585 
586 	rec_len_pct = 0;
587 	if (total_rec_len != 0)
588 		rec_len_pct = 100 * (double) rec_len / total_rec_len;
589 
590 	fpi_len_pct = 0;
591 	if (total_fpi_len != 0)
592 		fpi_len_pct = 100 * (double) fpi_len / total_fpi_len;
593 
594 	tot_len_pct = 0;
595 	if (total_len != 0)
596 		tot_len_pct = 100 * (double) tot_len / total_len;
597 
598 	printf("%-27s "
599 		   "%20" INT64_MODIFIER "u (%6.02f) "
600 		   "%20" INT64_MODIFIER "u (%6.02f) "
601 		   "%20" INT64_MODIFIER "u (%6.02f) "
602 		   "%20" INT64_MODIFIER "u (%6.02f)\n",
603 		   name, n, n_pct, rec_len, rec_len_pct, fpi_len, fpi_len_pct,
604 		   tot_len, tot_len_pct);
605 }
606 
607 
608 /*
609  * Display summary statistics about the records seen so far.
610  */
611 static void
XLogDumpDisplayStats(XLogDumpConfig * config,XLogDumpStats * stats)612 XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
613 {
614 	int			ri,
615 				rj;
616 	uint64		total_count = 0;
617 	uint64		total_rec_len = 0;
618 	uint64		total_fpi_len = 0;
619 	uint64		total_len = 0;
620 	double		rec_len_pct,
621 				fpi_len_pct;
622 
623 	/* ---
624 	 * Make a first pass to calculate column totals:
625 	 * count(*),
626 	 * sum(xl_len+SizeOfXLogRecord),
627 	 * sum(xl_tot_len-xl_len-SizeOfXLogRecord), and
628 	 * sum(xl_tot_len).
629 	 * These are used to calculate percentages for each record type.
630 	 * ---
631 	 */
632 
633 	for (ri = 0; ri < RM_NEXT_ID; ri++)
634 	{
635 		total_count += stats->rmgr_stats[ri].count;
636 		total_rec_len += stats->rmgr_stats[ri].rec_len;
637 		total_fpi_len += stats->rmgr_stats[ri].fpi_len;
638 	}
639 	total_len = total_rec_len + total_fpi_len;
640 
641 	/*
642 	 * 27 is strlen("Transaction/COMMIT_PREPARED"), 20 is strlen(2^64), 8 is
643 	 * strlen("(100.00%)")
644 	 */
645 
646 	printf("%-27s %20s %8s %20s %8s %20s %8s %20s %8s\n"
647 		   "%-27s %20s %8s %20s %8s %20s %8s %20s %8s\n",
648 		   "Type", "N", "(%)", "Record size", "(%)", "FPI size", "(%)", "Combined size", "(%)",
649 		   "----", "-", "---", "-----------", "---", "--------", "---", "-------------", "---");
650 
651 	for (ri = 0; ri < RM_NEXT_ID; ri++)
652 	{
653 		uint64		count,
654 					rec_len,
655 					fpi_len,
656 					tot_len;
657 		const RmgrDescData *desc = &RmgrDescTable[ri];
658 
659 		if (!config->stats_per_record)
660 		{
661 			count = stats->rmgr_stats[ri].count;
662 			rec_len = stats->rmgr_stats[ri].rec_len;
663 			fpi_len = stats->rmgr_stats[ri].fpi_len;
664 			tot_len = rec_len + fpi_len;
665 
666 			XLogDumpStatsRow(desc->rm_name,
667 							 count, total_count, rec_len, total_rec_len,
668 							 fpi_len, total_fpi_len, tot_len, total_len);
669 		}
670 		else
671 		{
672 			for (rj = 0; rj < MAX_XLINFO_TYPES; rj++)
673 			{
674 				const char *id;
675 
676 				count = stats->record_stats[ri][rj].count;
677 				rec_len = stats->record_stats[ri][rj].rec_len;
678 				fpi_len = stats->record_stats[ri][rj].fpi_len;
679 				tot_len = rec_len + fpi_len;
680 
681 				/* Skip undefined combinations and ones that didn't occur */
682 				if (count == 0)
683 					continue;
684 
685 				/* the upper four bits in xl_info are the rmgr's */
686 				id = desc->rm_identify(rj << 4);
687 				if (id == NULL)
688 					id = psprintf("UNKNOWN (%x)", rj << 4);
689 
690 				XLogDumpStatsRow(psprintf("%s/%s", desc->rm_name, id),
691 								 count, total_count, rec_len, total_rec_len,
692 								 fpi_len, total_fpi_len, tot_len, total_len);
693 			}
694 		}
695 	}
696 
697 	printf("%-27s %20s %8s %20s %8s %20s %8s %20s\n",
698 		   "", "--------", "", "--------", "", "--------", "", "--------");
699 
700 	/*
701 	 * The percentages in earlier rows were calculated against the column
702 	 * total, but the ones that follow are against the row total. Note that
703 	 * these are displayed with a % symbol to differentiate them from the
704 	 * earlier ones, and are thus up to 9 characters long.
705 	 */
706 
707 	rec_len_pct = 0;
708 	if (total_len != 0)
709 		rec_len_pct = 100 * (double) total_rec_len / total_len;
710 
711 	fpi_len_pct = 0;
712 	if (total_len != 0)
713 		fpi_len_pct = 100 * (double) total_fpi_len / total_len;
714 
715 	printf("%-27s "
716 		   "%20" INT64_MODIFIER "u %-9s"
717 		   "%20" INT64_MODIFIER "u %-9s"
718 		   "%20" INT64_MODIFIER "u %-9s"
719 		   "%20" INT64_MODIFIER "u %-6s\n",
720 		   "Total", stats->count, "",
721 		   total_rec_len, psprintf("[%.02f%%]", rec_len_pct),
722 		   total_fpi_len, psprintf("[%.02f%%]", fpi_len_pct),
723 		   total_len, "[100%]");
724 }
725 
726 static void
usage(void)727 usage(void)
728 {
729 	printf(_("%s decodes and displays PostgreSQL write-ahead logs for debugging.\n\n"),
730 		   progname);
731 	printf(_("Usage:\n"));
732 	printf(_("  %s [OPTION]... [STARTSEG [ENDSEG]]\n"), progname);
733 	printf(_("\nOptions:\n"));
734 	printf(_("  -b, --bkp-details      output detailed information about backup blocks\n"));
735 	printf(_("  -e, --end=RECPTR       stop reading at WAL location RECPTR\n"));
736 	printf(_("  -f, --follow           keep retrying after reaching end of WAL\n"));
737 	printf(_("  -n, --limit=N          number of records to display\n"));
738 	printf(_("  -p, --path=PATH        directory in which to find log segment files or a\n"
739 			 "                         directory with a ./pg_wal that contains such files\n"
740 			 "                         (default: current directory, ./pg_wal, $PGDATA/pg_wal)\n"));
741 	printf(_("  -q, --quiet            do not print any output, except for errors\n"));
742 	printf(_("  -r, --rmgr=RMGR        only show records generated by resource manager RMGR;\n"
743 			 "                         use --rmgr=list to list valid resource manager names\n"));
744 	printf(_("  -s, --start=RECPTR     start reading at WAL location RECPTR\n"));
745 	printf(_("  -t, --timeline=TLI     timeline from which to read log records\n"
746 			 "                         (default: 1 or the value used in STARTSEG)\n"));
747 	printf(_("  -V, --version          output version information, then exit\n"));
748 	printf(_("  -x, --xid=XID          only show records with transaction ID XID\n"));
749 	printf(_("  -z, --stats[=record]   show statistics instead of records\n"
750 			 "                         (optionally, show per-record statistics)\n"));
751 	printf(_("  -?, --help             show this help, then exit\n"));
752 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
753 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
754 }
755 
756 int
main(int argc,char ** argv)757 main(int argc, char **argv)
758 {
759 	uint32		xlogid;
760 	uint32		xrecoff;
761 	XLogReaderState *xlogreader_state;
762 	XLogDumpPrivate private;
763 	XLogDumpConfig config;
764 	XLogDumpStats stats;
765 	XLogRecord *record;
766 	XLogRecPtr	first_record;
767 	char	   *waldir = NULL;
768 	char	   *errormsg;
769 
770 	static struct option long_options[] = {
771 		{"bkp-details", no_argument, NULL, 'b'},
772 		{"end", required_argument, NULL, 'e'},
773 		{"follow", no_argument, NULL, 'f'},
774 		{"help", no_argument, NULL, '?'},
775 		{"limit", required_argument, NULL, 'n'},
776 		{"path", required_argument, NULL, 'p'},
777 		{"quiet", no_argument, NULL, 'q'},
778 		{"rmgr", required_argument, NULL, 'r'},
779 		{"start", required_argument, NULL, 's'},
780 		{"timeline", required_argument, NULL, 't'},
781 		{"xid", required_argument, NULL, 'x'},
782 		{"version", no_argument, NULL, 'V'},
783 		{"stats", optional_argument, NULL, 'z'},
784 		{NULL, 0, NULL, 0}
785 	};
786 
787 	int			option;
788 	int			optindex = 0;
789 
790 	pg_logging_init(argv[0]);
791 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_waldump"));
792 	progname = get_progname(argv[0]);
793 
794 	if (argc > 1)
795 	{
796 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
797 		{
798 			usage();
799 			exit(0);
800 		}
801 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
802 		{
803 			puts("pg_waldump (PostgreSQL) " PG_VERSION);
804 			exit(0);
805 		}
806 	}
807 
808 	memset(&private, 0, sizeof(XLogDumpPrivate));
809 	memset(&config, 0, sizeof(XLogDumpConfig));
810 	memset(&stats, 0, sizeof(XLogDumpStats));
811 
812 	private.timeline = 1;
813 	private.startptr = InvalidXLogRecPtr;
814 	private.endptr = InvalidXLogRecPtr;
815 	private.endptr_reached = false;
816 
817 	config.quiet = false;
818 	config.bkp_details = false;
819 	config.stop_after_records = -1;
820 	config.already_displayed_records = 0;
821 	config.follow = false;
822 	config.filter_by_rmgr = -1;
823 	config.filter_by_xid = InvalidTransactionId;
824 	config.filter_by_xid_enabled = false;
825 	config.stats = false;
826 	config.stats_per_record = false;
827 
828 	if (argc <= 1)
829 	{
830 		pg_log_error("no arguments specified");
831 		goto bad_argument;
832 	}
833 
834 	while ((option = getopt_long(argc, argv, "be:fn:p:qr:s:t:x:z",
835 								 long_options, &optindex)) != -1)
836 	{
837 		switch (option)
838 		{
839 			case 'b':
840 				config.bkp_details = true;
841 				break;
842 			case 'e':
843 				if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2)
844 				{
845 					pg_log_error("could not parse end WAL location \"%s\"",
846 								 optarg);
847 					goto bad_argument;
848 				}
849 				private.endptr = (uint64) xlogid << 32 | xrecoff;
850 				break;
851 			case 'f':
852 				config.follow = true;
853 				break;
854 			case 'n':
855 				if (sscanf(optarg, "%d", &config.stop_after_records) != 1)
856 				{
857 					pg_log_error("could not parse limit \"%s\"", optarg);
858 					goto bad_argument;
859 				}
860 				break;
861 			case 'p':
862 				waldir = pg_strdup(optarg);
863 				break;
864 			case 'q':
865 				config.quiet = true;
866 				break;
867 			case 'r':
868 				{
869 					int			i;
870 
871 					if (pg_strcasecmp(optarg, "list") == 0)
872 					{
873 						print_rmgr_list();
874 						exit(EXIT_SUCCESS);
875 					}
876 
877 					for (i = 0; i <= RM_MAX_ID; i++)
878 					{
879 						if (pg_strcasecmp(optarg, RmgrDescTable[i].rm_name) == 0)
880 						{
881 							config.filter_by_rmgr = i;
882 							break;
883 						}
884 					}
885 
886 					if (config.filter_by_rmgr == -1)
887 					{
888 						pg_log_error("resource manager \"%s\" does not exist",
889 									 optarg);
890 						goto bad_argument;
891 					}
892 				}
893 				break;
894 			case 's':
895 				if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2)
896 				{
897 					pg_log_error("could not parse start WAL location \"%s\"",
898 								 optarg);
899 					goto bad_argument;
900 				}
901 				else
902 					private.startptr = (uint64) xlogid << 32 | xrecoff;
903 				break;
904 			case 't':
905 				if (sscanf(optarg, "%d", &private.timeline) != 1)
906 				{
907 					pg_log_error("could not parse timeline \"%s\"", optarg);
908 					goto bad_argument;
909 				}
910 				break;
911 			case 'x':
912 				if (sscanf(optarg, "%u", &config.filter_by_xid) != 1)
913 				{
914 					pg_log_error("could not parse \"%s\" as a transaction ID",
915 								 optarg);
916 					goto bad_argument;
917 				}
918 				config.filter_by_xid_enabled = true;
919 				break;
920 			case 'z':
921 				config.stats = true;
922 				config.stats_per_record = false;
923 				if (optarg)
924 				{
925 					if (strcmp(optarg, "record") == 0)
926 						config.stats_per_record = true;
927 					else if (strcmp(optarg, "rmgr") != 0)
928 					{
929 						pg_log_error("unrecognized argument to --stats: %s",
930 									 optarg);
931 						goto bad_argument;
932 					}
933 				}
934 				break;
935 			default:
936 				goto bad_argument;
937 		}
938 	}
939 
940 	if ((optind + 2) < argc)
941 	{
942 		pg_log_error("too many command-line arguments (first is \"%s\")",
943 					 argv[optind + 2]);
944 		goto bad_argument;
945 	}
946 
947 	if (waldir != NULL)
948 	{
949 		/* validate path points to directory */
950 		if (!verify_directory(waldir))
951 		{
952 			pg_log_error("could not open directory \"%s\": %m", waldir);
953 			goto bad_argument;
954 		}
955 	}
956 
957 	/* parse files as start/end boundaries, extract path if not specified */
958 	if (optind < argc)
959 	{
960 		char	   *directory = NULL;
961 		char	   *fname = NULL;
962 		int			fd;
963 		XLogSegNo	segno;
964 
965 		split_path(argv[optind], &directory, &fname);
966 
967 		if (waldir == NULL && directory != NULL)
968 		{
969 			waldir = directory;
970 
971 			if (!verify_directory(waldir))
972 				fatal_error("could not open directory \"%s\": %m", waldir);
973 		}
974 
975 		waldir = identify_target_directory(waldir, fname);
976 		fd = open_file_in_directory(waldir, fname);
977 		if (fd < 0)
978 			fatal_error("could not open file \"%s\"", fname);
979 		close(fd);
980 
981 		/* parse position from file */
982 		XLogFromFileName(fname, &private.timeline, &segno, WalSegSz);
983 
984 		if (XLogRecPtrIsInvalid(private.startptr))
985 			XLogSegNoOffsetToRecPtr(segno, 0, WalSegSz, private.startptr);
986 		else if (!XLByteInSeg(private.startptr, segno, WalSegSz))
987 		{
988 			pg_log_error("start WAL location %X/%X is not inside file \"%s\"",
989 						 (uint32) (private.startptr >> 32),
990 						 (uint32) private.startptr,
991 						 fname);
992 			goto bad_argument;
993 		}
994 
995 		/* no second file specified, set end position */
996 		if (!(optind + 1 < argc) && XLogRecPtrIsInvalid(private.endptr))
997 			XLogSegNoOffsetToRecPtr(segno + 1, 0, WalSegSz, private.endptr);
998 
999 		/* parse ENDSEG if passed */
1000 		if (optind + 1 < argc)
1001 		{
1002 			XLogSegNo	endsegno;
1003 
1004 			/* ignore directory, already have that */
1005 			split_path(argv[optind + 1], &directory, &fname);
1006 
1007 			fd = open_file_in_directory(waldir, fname);
1008 			if (fd < 0)
1009 				fatal_error("could not open file \"%s\"", fname);
1010 			close(fd);
1011 
1012 			/* parse position from file */
1013 			XLogFromFileName(fname, &private.timeline, &endsegno, WalSegSz);
1014 
1015 			if (endsegno < segno)
1016 				fatal_error("ENDSEG %s is before STARTSEG %s",
1017 							argv[optind + 1], argv[optind]);
1018 
1019 			if (XLogRecPtrIsInvalid(private.endptr))
1020 				XLogSegNoOffsetToRecPtr(endsegno + 1, 0, WalSegSz,
1021 										private.endptr);
1022 
1023 			/* set segno to endsegno for check of --end */
1024 			segno = endsegno;
1025 		}
1026 
1027 
1028 		if (!XLByteInSeg(private.endptr, segno, WalSegSz) &&
1029 			private.endptr != (segno + 1) * WalSegSz)
1030 		{
1031 			pg_log_error("end WAL location %X/%X is not inside file \"%s\"",
1032 						 (uint32) (private.endptr >> 32),
1033 						 (uint32) private.endptr,
1034 						 argv[argc - 1]);
1035 			goto bad_argument;
1036 		}
1037 	}
1038 	else
1039 		waldir = identify_target_directory(waldir, NULL);
1040 
1041 	/* we don't know what to print */
1042 	if (XLogRecPtrIsInvalid(private.startptr))
1043 	{
1044 		pg_log_error("no start WAL location given");
1045 		goto bad_argument;
1046 	}
1047 
1048 	/* done with argument parsing, do the actual work */
1049 
1050 	/* we have everything we need, start reading */
1051 	xlogreader_state =
1052 		XLogReaderAllocate(WalSegSz, waldir,
1053 						   XL_ROUTINE(.page_read = WALDumpReadPage,
1054 									  .segment_open = WALDumpOpenSegment,
1055 									  .segment_close = WALDumpCloseSegment),
1056 						   &private);
1057 	if (!xlogreader_state)
1058 		fatal_error("out of memory");
1059 
1060 	/* first find a valid recptr to start from */
1061 	first_record = XLogFindNextRecord(xlogreader_state, private.startptr);
1062 
1063 	if (first_record == InvalidXLogRecPtr)
1064 		fatal_error("could not find a valid record after %X/%X",
1065 					(uint32) (private.startptr >> 32),
1066 					(uint32) private.startptr);
1067 
1068 	/*
1069 	 * Display a message that we're skipping data if `from` wasn't a pointer
1070 	 * to the start of a record and also wasn't a pointer to the beginning of
1071 	 * a segment (e.g. we were used in file mode).
1072 	 */
1073 	if (first_record != private.startptr &&
1074 		XLogSegmentOffset(private.startptr, WalSegSz) != 0)
1075 		printf(ngettext("first record is after %X/%X, at %X/%X, skipping over %u byte\n",
1076 						"first record is after %X/%X, at %X/%X, skipping over %u bytes\n",
1077 						(first_record - private.startptr)),
1078 			   (uint32) (private.startptr >> 32), (uint32) private.startptr,
1079 			   (uint32) (first_record >> 32), (uint32) first_record,
1080 			   (uint32) (first_record - private.startptr));
1081 
1082 	for (;;)
1083 	{
1084 		/* try to read the next record */
1085 		record = XLogReadRecord(xlogreader_state, &errormsg);
1086 		if (!record)
1087 		{
1088 			if (!config.follow || private.endptr_reached)
1089 				break;
1090 			else
1091 			{
1092 				pg_usleep(1000000L);	/* 1 second */
1093 				continue;
1094 			}
1095 		}
1096 
1097 		/* apply all specified filters */
1098 		if (config.filter_by_rmgr != -1 &&
1099 			config.filter_by_rmgr != record->xl_rmid)
1100 			continue;
1101 
1102 		if (config.filter_by_xid_enabled &&
1103 			config.filter_by_xid != record->xl_xid)
1104 			continue;
1105 
1106 		/* perform any per-record work */
1107 		if (!config.quiet)
1108 		{
1109 			if (config.stats == true)
1110 				XLogDumpCountRecord(&config, &stats, xlogreader_state);
1111 			else
1112 				XLogDumpDisplayRecord(&config, xlogreader_state);
1113 		}
1114 
1115 		/* check whether we printed enough */
1116 		config.already_displayed_records++;
1117 		if (config.stop_after_records > 0 &&
1118 			config.already_displayed_records >= config.stop_after_records)
1119 			break;
1120 	}
1121 
1122 	if (config.stats == true && !config.quiet)
1123 		XLogDumpDisplayStats(&config, &stats);
1124 
1125 	if (errormsg)
1126 		fatal_error("error in WAL record at %X/%X: %s",
1127 					(uint32) (xlogreader_state->ReadRecPtr >> 32),
1128 					(uint32) xlogreader_state->ReadRecPtr,
1129 					errormsg);
1130 
1131 	XLogReaderFree(xlogreader_state);
1132 
1133 	return EXIT_SUCCESS;
1134 
1135 bad_argument:
1136 	fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
1137 	return EXIT_FAILURE;
1138 }
1139