1 /*-------------------------------------------------------------------------
2  *
3  * pg_waldump.c - decode and display WAL
4  *
5  * Copyright (c) 2013-2019, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		  src/bin/pg_waldump/pg_waldump.c
9  *-------------------------------------------------------------------------
10  */
11 
12 #define FRONTEND 1
13 #include "postgres.h"
14 
15 #include <dirent.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 
19 #include "access/xlogreader.h"
20 #include "access/xlogrecord.h"
21 #include "access/xlog_internal.h"
22 #include "access/transam.h"
23 #include "common/fe_memutils.h"
24 #include "common/logging.h"
25 #include "getopt_long.h"
26 #include "rmgrdesc.h"
27 
28 
29 static const char *progname;
30 
31 static int	WalSegSz;
32 
33 typedef struct XLogDumpPrivate
34 {
35 	TimeLineID	timeline;
36 	char	   *inpath;
37 	XLogRecPtr	startptr;
38 	XLogRecPtr	endptr;
39 	bool		endptr_reached;
40 } XLogDumpPrivate;
41 
42 typedef struct XLogDumpConfig
43 {
44 	/* display options */
45 	bool		bkp_details;
46 	int			stop_after_records;
47 	int			already_displayed_records;
48 	bool		follow;
49 	bool		stats;
50 	bool		stats_per_record;
51 
52 	/* filter options */
53 	int			filter_by_rmgr;
54 	TransactionId filter_by_xid;
55 	bool		filter_by_xid_enabled;
56 } XLogDumpConfig;
57 
58 typedef struct Stats
59 {
60 	uint64		count;
61 	uint64		rec_len;
62 	uint64		fpi_len;
63 } Stats;
64 
65 #define MAX_XLINFO_TYPES 16
66 
67 typedef struct XLogDumpStats
68 {
69 	uint64		count;
70 	Stats		rmgr_stats[RM_NEXT_ID];
71 	Stats		record_stats[RM_NEXT_ID][MAX_XLINFO_TYPES];
72 } XLogDumpStats;
73 
74 #define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0)
75 
76 static void
77 print_rmgr_list(void)
78 {
79 	int			i;
80 
81 	for (i = 0; i <= RM_MAX_ID; i++)
82 	{
83 		printf("%s\n", RmgrDescTable[i].rm_name);
84 	}
85 }
86 
87 /*
88  * Check whether directory exists and whether we can open it. Keep errno set so
89  * that the caller can report errors somewhat more accurately.
90  */
91 static bool
92 verify_directory(const char *directory)
93 {
94 	DIR		   *dir = opendir(directory);
95 
96 	if (dir == NULL)
97 		return false;
98 	closedir(dir);
99 	return true;
100 }
101 
102 /*
103  * Split a pathname as dirname(1) and basename(1) would.
104  *
105  * XXX this probably doesn't do very well on Windows.  We probably need to
106  * apply canonicalize_path(), at the very least.
107  */
108 static void
109 split_path(const char *path, char **dir, char **fname)
110 {
111 	char	   *sep;
112 
113 	/* split filepath into directory & filename */
114 	sep = strrchr(path, '/');
115 
116 	/* directory path */
117 	if (sep != NULL)
118 	{
119 		*dir = pg_strdup(path);
120 		(*dir)[(sep - path) + 1] = '\0';	/* no strndup */
121 		*fname = pg_strdup(sep + 1);
122 	}
123 	/* local directory */
124 	else
125 	{
126 		*dir = NULL;
127 		*fname = pg_strdup(path);
128 	}
129 }
130 
131 /*
132  * Open the file in the valid target directory.
133  *
134  * return a read only fd
135  */
136 static int
137 open_file_in_directory(const char *directory, const char *fname)
138 {
139 	int			fd = -1;
140 	char		fpath[MAXPGPATH];
141 
142 	Assert(directory != NULL);
143 
144 	snprintf(fpath, MAXPGPATH, "%s/%s", directory, fname);
145 	fd = open(fpath, O_RDONLY | PG_BINARY, 0);
146 
147 	if (fd < 0 && errno != ENOENT)
148 		fatal_error("could not open file \"%s\": %s",
149 					fname, strerror(errno));
150 	return fd;
151 }
152 
153 /*
154  * Try to find fname in the given directory. Returns true if it is found,
155  * false otherwise. If fname is NULL, search the complete directory for any
156  * file with a valid WAL file name. If file is successfully opened, set the
157  * wal segment size.
158  */
159 static bool
160 search_directory(const char *directory, const char *fname)
161 {
162 	int			fd = -1;
163 	DIR		   *xldir;
164 
165 	/* open file if valid filename is provided */
166 	if (fname != NULL)
167 		fd = open_file_in_directory(directory, fname);
168 
169 	/*
170 	 * A valid file name is not passed, so search the complete directory.  If
171 	 * we find any file whose name is a valid WAL file name then try to open
172 	 * it.  If we cannot open it, bail out.
173 	 */
174 	else if ((xldir = opendir(directory)) != NULL)
175 	{
176 		struct dirent *xlde;
177 
178 		while ((xlde = readdir(xldir)) != NULL)
179 		{
180 			if (IsXLogFileName(xlde->d_name))
181 			{
182 				fd = open_file_in_directory(directory, xlde->d_name);
183 				fname = xlde->d_name;
184 				break;
185 			}
186 		}
187 
188 		closedir(xldir);
189 	}
190 
191 	/* set WalSegSz if file is successfully opened */
192 	if (fd >= 0)
193 	{
194 		PGAlignedXLogBlock buf;
195 		int			r;
196 
197 		r = read(fd, buf.data, XLOG_BLCKSZ);
198 		if (r == XLOG_BLCKSZ)
199 		{
200 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
201 
202 			WalSegSz = longhdr->xlp_seg_size;
203 
204 			if (!IsValidWalSegSize(WalSegSz))
205 				fatal_error(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d byte",
206 									 "WAL segment size must be a power of two between 1 MB and 1 GB, but the WAL file \"%s\" header specifies %d bytes",
207 									 WalSegSz),
208 							fname, WalSegSz);
209 		}
210 		else
211 		{
212 			if (errno != 0)
213 				fatal_error("could not read file \"%s\": %s",
214 							fname, strerror(errno));
215 			else
216 				fatal_error("could not read file \"%s\": read %d of %zu",
217 							fname, r, (Size) XLOG_BLCKSZ);
218 		}
219 		close(fd);
220 		return true;
221 	}
222 
223 	return false;
224 }
225 
226 /*
227  * Identify the target directory and set WalSegSz.
228  *
229  * Try to find the file in several places:
230  * if directory != NULL:
231  *	 directory /
232  *	 directory / XLOGDIR /
233  * else
234  *	 .
235  *	 XLOGDIR /
236  *	 $PGDATA / XLOGDIR /
237  *
238  * Set the valid target directory in private->inpath.
239  */
240 static void
241 identify_target_directory(XLogDumpPrivate *private, char *directory,
242 						  char *fname)
243 {
244 	char		fpath[MAXPGPATH];
245 
246 	if (directory != NULL)
247 	{
248 		if (search_directory(directory, fname))
249 		{
250 			private->inpath = pg_strdup(directory);
251 			return;
252 		}
253 
254 		/* directory / XLOGDIR */
255 		snprintf(fpath, MAXPGPATH, "%s/%s", directory, XLOGDIR);
256 		if (search_directory(fpath, fname))
257 		{
258 			private->inpath = pg_strdup(fpath);
259 			return;
260 		}
261 	}
262 	else
263 	{
264 		const char *datadir;
265 
266 		/* current directory */
267 		if (search_directory(".", fname))
268 		{
269 			private->inpath = pg_strdup(".");
270 			return;
271 		}
272 		/* XLOGDIR */
273 		if (search_directory(XLOGDIR, fname))
274 		{
275 			private->inpath = pg_strdup(XLOGDIR);
276 			return;
277 		}
278 
279 		datadir = getenv("PGDATA");
280 		/* $PGDATA / XLOGDIR */
281 		if (datadir != NULL)
282 		{
283 			snprintf(fpath, MAXPGPATH, "%s/%s", datadir, XLOGDIR);
284 			if (search_directory(fpath, fname))
285 			{
286 				private->inpath = pg_strdup(fpath);
287 				return;
288 			}
289 		}
290 	}
291 
292 	/* could not locate WAL file */
293 	if (fname)
294 		fatal_error("could not locate WAL file \"%s\"", fname);
295 	else
296 		fatal_error("could not find any WAL file");
297 }
298 
299 /*
300  * Read count bytes from a segment file in the specified directory, for the
301  * given timeline, containing the specified record pointer; store the data in
302  * the passed buffer.
303  */
304 static void
305 XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
306 				 XLogRecPtr startptr, char *buf, Size count)
307 {
308 	char	   *p;
309 	XLogRecPtr	recptr;
310 	Size		nbytes;
311 
312 	static int	sendFile = -1;
313 	static XLogSegNo sendSegNo = 0;
314 	static uint32 sendOff = 0;
315 
316 	p = buf;
317 	recptr = startptr;
318 	nbytes = count;
319 
320 	while (nbytes > 0)
321 	{
322 		uint32		startoff;
323 		int			segbytes;
324 		int			readbytes;
325 
326 		startoff = XLogSegmentOffset(recptr, WalSegSz);
327 
328 		if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo, WalSegSz))
329 		{
330 			char		fname[MAXFNAMELEN];
331 			int			tries;
332 
333 			/* Switch to another logfile segment */
334 			if (sendFile >= 0)
335 				close(sendFile);
336 
337 			XLByteToSeg(recptr, sendSegNo, WalSegSz);
338 
339 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
340 
341 			/*
342 			 * In follow mode there is a short period of time after the server
343 			 * has written the end of the previous file before the new file is
344 			 * available. So we loop for 5 seconds looking for the file to
345 			 * appear before giving up.
346 			 */
347 			for (tries = 0; tries < 10; tries++)
348 			{
349 				sendFile = open_file_in_directory(directory, fname);
350 				if (sendFile >= 0)
351 					break;
352 				if (errno == ENOENT)
353 				{
354 					int			save_errno = errno;
355 
356 					/* File not there yet, try again */
357 					pg_usleep(500 * 1000);
358 
359 					errno = save_errno;
360 					continue;
361 				}
362 				/* Any other error, fall through and fail */
363 				break;
364 			}
365 
366 			if (sendFile < 0)
367 				fatal_error("could not find file \"%s\": %s",
368 							fname, strerror(errno));
369 			sendOff = 0;
370 		}
371 
372 		/* Need to seek in the file? */
373 		if (sendOff != startoff)
374 		{
375 			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
376 			{
377 				int			err = errno;
378 				char		fname[MAXPGPATH];
379 
380 				XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
381 
382 				fatal_error("could not seek in log file %s to offset %u: %s",
383 							fname, startoff, strerror(err));
384 			}
385 			sendOff = startoff;
386 		}
387 
388 		/* How many bytes are within this segment? */
389 		if (nbytes > (WalSegSz - startoff))
390 			segbytes = WalSegSz - startoff;
391 		else
392 			segbytes = nbytes;
393 
394 		readbytes = read(sendFile, p, segbytes);
395 		if (readbytes <= 0)
396 		{
397 			int			err = errno;
398 			char		fname[MAXPGPATH];
399 			int			save_errno = errno;
400 
401 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
402 			errno = save_errno;
403 
404 			if (readbytes < 0)
405 				fatal_error("could not read from log file %s, offset %u, length %d: %s",
406 							fname, sendOff, segbytes, strerror(err));
407 			else if (readbytes == 0)
408 				fatal_error("could not read from log file %s, offset %u: read %d of %zu",
409 							fname, sendOff, readbytes, (Size) segbytes);
410 		}
411 
412 		/* Update state for read */
413 		recptr += readbytes;
414 
415 		sendOff += readbytes;
416 		nbytes -= readbytes;
417 		p += readbytes;
418 	}
419 }
420 
421 /*
422  * XLogReader read_page callback
423  */
424 static int
425 XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
426 				 XLogRecPtr targetPtr, char *readBuff, TimeLineID *curFileTLI)
427 {
428 	XLogDumpPrivate *private = state->private_data;
429 	int			count = XLOG_BLCKSZ;
430 
431 	if (private->endptr != InvalidXLogRecPtr)
432 	{
433 		if (targetPagePtr + XLOG_BLCKSZ <= private->endptr)
434 			count = XLOG_BLCKSZ;
435 		else if (targetPagePtr + reqLen <= private->endptr)
436 			count = private->endptr - targetPagePtr;
437 		else
438 		{
439 			private->endptr_reached = true;
440 			return -1;
441 		}
442 	}
443 
444 	XLogDumpXLogRead(private->inpath, private->timeline, targetPagePtr,
445 					 readBuff, count);
446 
447 	return count;
448 }
449 
450 /*
451  * Calculate the size of a record, split into !FPI and FPI parts.
452  */
453 static void
454 XLogDumpRecordLen(XLogReaderState *record, uint32 *rec_len, uint32 *fpi_len)
455 {
456 	int			block_id;
457 
458 	/*
459 	 * Calculate the amount of FPI data in the record.
460 	 *
461 	 * XXX: We peek into xlogreader's private decoded backup blocks for the
462 	 * bimg_len indicating the length of FPI data. It doesn't seem worth it to
463 	 * add an accessor macro for this.
464 	 */
465 	*fpi_len = 0;
466 	for (block_id = 0; block_id <= record->max_block_id; block_id++)
467 	{
468 		if (XLogRecHasBlockImage(record, block_id))
469 			*fpi_len += record->blocks[block_id].bimg_len;
470 	}
471 
472 	/*
473 	 * Calculate the length of the record as the total length - the length of
474 	 * all the block images.
475 	 */
476 	*rec_len = XLogRecGetTotalLen(record) - *fpi_len;
477 }
478 
479 /*
480  * Store per-rmgr and per-record statistics for a given record.
481  */
482 static void
483 XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
484 					XLogReaderState *record)
485 {
486 	RmgrId		rmid;
487 	uint8		recid;
488 	uint32		rec_len;
489 	uint32		fpi_len;
490 
491 	stats->count++;
492 
493 	rmid = XLogRecGetRmid(record);
494 
495 	XLogDumpRecordLen(record, &rec_len, &fpi_len);
496 
497 	/* Update per-rmgr statistics */
498 
499 	stats->rmgr_stats[rmid].count++;
500 	stats->rmgr_stats[rmid].rec_len += rec_len;
501 	stats->rmgr_stats[rmid].fpi_len += fpi_len;
502 
503 	/*
504 	 * Update per-record statistics, where the record is identified by a
505 	 * combination of the RmgrId and the four bits of the xl_info field that
506 	 * are the rmgr's domain (resulting in sixteen possible entries per
507 	 * RmgrId).
508 	 */
509 
510 	recid = XLogRecGetInfo(record) >> 4;
511 
512 	/*
513 	 * XACT records need to be handled differently. Those records use the
514 	 * first bit of those four bits for an optional flag variable and the
515 	 * following three bits for the opcode. We filter opcode out of xl_info
516 	 * and use it as the identifier of the record.
517 	 */
518 	if (rmid == RM_XACT_ID)
519 		recid &= 0x07;
520 
521 	stats->record_stats[rmid][recid].count++;
522 	stats->record_stats[rmid][recid].rec_len += rec_len;
523 	stats->record_stats[rmid][recid].fpi_len += fpi_len;
524 }
525 
526 /*
527  * Print a record to stdout
528  */
529 static void
530 XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
531 {
532 	const char *id;
533 	const RmgrDescData *desc = &RmgrDescTable[XLogRecGetRmid(record)];
534 	uint32		rec_len;
535 	uint32		fpi_len;
536 	RelFileNode rnode;
537 	ForkNumber	forknum;
538 	BlockNumber blk;
539 	int			block_id;
540 	uint8		info = XLogRecGetInfo(record);
541 	XLogRecPtr	xl_prev = XLogRecGetPrev(record);
542 
543 	XLogDumpRecordLen(record, &rec_len, &fpi_len);
544 
545 	printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
546 		   desc->rm_name,
547 		   rec_len, XLogRecGetTotalLen(record),
548 		   XLogRecGetXid(record),
549 		   (uint32) (record->ReadRecPtr >> 32), (uint32) record->ReadRecPtr,
550 		   (uint32) (xl_prev >> 32), (uint32) xl_prev);
551 
552 	id = desc->rm_identify(info);
553 	if (id == NULL)
554 		printf("desc: UNKNOWN (%x) ", info & ~XLR_INFO_MASK);
555 	else
556 		printf("desc: %s ", id);
557 
558 	/* the desc routine will printf the description directly to stdout */
559 	desc->rm_desc(NULL, record);
560 
561 	if (!config->bkp_details)
562 	{
563 		/* print block references (short format) */
564 		for (block_id = 0; block_id <= record->max_block_id; block_id++)
565 		{
566 			if (!XLogRecHasBlockRef(record, block_id))
567 				continue;
568 
569 			XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
570 			if (forknum != MAIN_FORKNUM)
571 				printf(", blkref #%u: rel %u/%u/%u fork %s blk %u",
572 					   block_id,
573 					   rnode.spcNode, rnode.dbNode, rnode.relNode,
574 					   forkNames[forknum],
575 					   blk);
576 			else
577 				printf(", blkref #%u: rel %u/%u/%u blk %u",
578 					   block_id,
579 					   rnode.spcNode, rnode.dbNode, rnode.relNode,
580 					   blk);
581 			if (XLogRecHasBlockImage(record, block_id))
582 			{
583 				if (XLogRecBlockImageApply(record, block_id))
584 					printf(" FPW");
585 				else
586 					printf(" FPW for WAL verification");
587 			}
588 		}
589 		putchar('\n');
590 	}
591 	else
592 	{
593 		/* print block references (detailed format) */
594 		putchar('\n');
595 		for (block_id = 0; block_id <= record->max_block_id; block_id++)
596 		{
597 			if (!XLogRecHasBlockRef(record, block_id))
598 				continue;
599 
600 			XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
601 			printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
602 				   block_id,
603 				   rnode.spcNode, rnode.dbNode, rnode.relNode,
604 				   forkNames[forknum],
605 				   blk);
606 			if (XLogRecHasBlockImage(record, block_id))
607 			{
608 				if (record->blocks[block_id].bimg_info &
609 					BKPIMAGE_IS_COMPRESSED)
610 				{
611 					printf(" (FPW%s); hole: offset: %u, length: %u, "
612 						   "compression saved: %u",
613 						   XLogRecBlockImageApply(record, block_id) ?
614 						   "" : " for WAL verification",
615 						   record->blocks[block_id].hole_offset,
616 						   record->blocks[block_id].hole_length,
617 						   BLCKSZ -
618 						   record->blocks[block_id].hole_length -
619 						   record->blocks[block_id].bimg_len);
620 				}
621 				else
622 				{
623 					printf(" (FPW%s); hole: offset: %u, length: %u",
624 						   XLogRecBlockImageApply(record, block_id) ?
625 						   "" : " for WAL verification",
626 						   record->blocks[block_id].hole_offset,
627 						   record->blocks[block_id].hole_length);
628 				}
629 			}
630 			putchar('\n');
631 		}
632 	}
633 }
634 
635 /*
636  * Display a single row of record counts and sizes for an rmgr or record.
637  */
638 static void
639 XLogDumpStatsRow(const char *name,
640 				 uint64 n, uint64 total_count,
641 				 uint64 rec_len, uint64 total_rec_len,
642 				 uint64 fpi_len, uint64 total_fpi_len,
643 				 uint64 tot_len, uint64 total_len)
644 {
645 	double		n_pct,
646 				rec_len_pct,
647 				fpi_len_pct,
648 				tot_len_pct;
649 
650 	n_pct = 0;
651 	if (total_count != 0)
652 		n_pct = 100 * (double) n / total_count;
653 
654 	rec_len_pct = 0;
655 	if (total_rec_len != 0)
656 		rec_len_pct = 100 * (double) rec_len / total_rec_len;
657 
658 	fpi_len_pct = 0;
659 	if (total_fpi_len != 0)
660 		fpi_len_pct = 100 * (double) fpi_len / total_fpi_len;
661 
662 	tot_len_pct = 0;
663 	if (total_len != 0)
664 		tot_len_pct = 100 * (double) tot_len / total_len;
665 
666 	printf("%-27s "
667 		   "%20" INT64_MODIFIER "u (%6.02f) "
668 		   "%20" INT64_MODIFIER "u (%6.02f) "
669 		   "%20" INT64_MODIFIER "u (%6.02f) "
670 		   "%20" INT64_MODIFIER "u (%6.02f)\n",
671 		   name, n, n_pct, rec_len, rec_len_pct, fpi_len, fpi_len_pct,
672 		   tot_len, tot_len_pct);
673 }
674 
675 
676 /*
677  * Display summary statistics about the records seen so far.
678  */
679 static void
680 XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
681 {
682 	int			ri,
683 				rj;
684 	uint64		total_count = 0;
685 	uint64		total_rec_len = 0;
686 	uint64		total_fpi_len = 0;
687 	uint64		total_len = 0;
688 	double		rec_len_pct,
689 				fpi_len_pct;
690 
691 	/* ---
692 	 * Make a first pass to calculate column totals:
693 	 * count(*),
694 	 * sum(xl_len+SizeOfXLogRecord),
695 	 * sum(xl_tot_len-xl_len-SizeOfXLogRecord), and
696 	 * sum(xl_tot_len).
697 	 * These are used to calculate percentages for each record type.
698 	 * ---
699 	 */
700 
701 	for (ri = 0; ri < RM_NEXT_ID; ri++)
702 	{
703 		total_count += stats->rmgr_stats[ri].count;
704 		total_rec_len += stats->rmgr_stats[ri].rec_len;
705 		total_fpi_len += stats->rmgr_stats[ri].fpi_len;
706 	}
707 	total_len = total_rec_len + total_fpi_len;
708 
709 	/*
710 	 * 27 is strlen("Transaction/COMMIT_PREPARED"), 20 is strlen(2^64), 8 is
711 	 * strlen("(100.00%)")
712 	 */
713 
714 	printf("%-27s %20s %8s %20s %8s %20s %8s %20s %8s\n"
715 		   "%-27s %20s %8s %20s %8s %20s %8s %20s %8s\n",
716 		   "Type", "N", "(%)", "Record size", "(%)", "FPI size", "(%)", "Combined size", "(%)",
717 		   "----", "-", "---", "-----------", "---", "--------", "---", "-------------", "---");
718 
719 	for (ri = 0; ri < RM_NEXT_ID; ri++)
720 	{
721 		uint64		count,
722 					rec_len,
723 					fpi_len,
724 					tot_len;
725 		const RmgrDescData *desc = &RmgrDescTable[ri];
726 
727 		if (!config->stats_per_record)
728 		{
729 			count = stats->rmgr_stats[ri].count;
730 			rec_len = stats->rmgr_stats[ri].rec_len;
731 			fpi_len = stats->rmgr_stats[ri].fpi_len;
732 			tot_len = rec_len + fpi_len;
733 
734 			XLogDumpStatsRow(desc->rm_name,
735 							 count, total_count, rec_len, total_rec_len,
736 							 fpi_len, total_fpi_len, tot_len, total_len);
737 		}
738 		else
739 		{
740 			for (rj = 0; rj < MAX_XLINFO_TYPES; rj++)
741 			{
742 				const char *id;
743 
744 				count = stats->record_stats[ri][rj].count;
745 				rec_len = stats->record_stats[ri][rj].rec_len;
746 				fpi_len = stats->record_stats[ri][rj].fpi_len;
747 				tot_len = rec_len + fpi_len;
748 
749 				/* Skip undefined combinations and ones that didn't occur */
750 				if (count == 0)
751 					continue;
752 
753 				/* the upper four bits in xl_info are the rmgr's */
754 				id = desc->rm_identify(rj << 4);
755 				if (id == NULL)
756 					id = psprintf("UNKNOWN (%x)", rj << 4);
757 
758 				XLogDumpStatsRow(psprintf("%s/%s", desc->rm_name, id),
759 								 count, total_count, rec_len, total_rec_len,
760 								 fpi_len, total_fpi_len, tot_len, total_len);
761 			}
762 		}
763 	}
764 
765 	printf("%-27s %20s %8s %20s %8s %20s %8s %20s\n",
766 		   "", "--------", "", "--------", "", "--------", "", "--------");
767 
768 	/*
769 	 * The percentages in earlier rows were calculated against the column
770 	 * total, but the ones that follow are against the row total. Note that
771 	 * these are displayed with a % symbol to differentiate them from the
772 	 * earlier ones, and are thus up to 9 characters long.
773 	 */
774 
775 	rec_len_pct = 0;
776 	if (total_len != 0)
777 		rec_len_pct = 100 * (double) total_rec_len / total_len;
778 
779 	fpi_len_pct = 0;
780 	if (total_len != 0)
781 		fpi_len_pct = 100 * (double) total_fpi_len / total_len;
782 
783 	printf("%-27s "
784 		   "%20" INT64_MODIFIER "u %-9s"
785 		   "%20" INT64_MODIFIER "u %-9s"
786 		   "%20" INT64_MODIFIER "u %-9s"
787 		   "%20" INT64_MODIFIER "u %-6s\n",
788 		   "Total", stats->count, "",
789 		   total_rec_len, psprintf("[%.02f%%]", rec_len_pct),
790 		   total_fpi_len, psprintf("[%.02f%%]", fpi_len_pct),
791 		   total_len, "[100%]");
792 }
793 
794 static void
795 usage(void)
796 {
797 	printf(_("%s decodes and displays PostgreSQL write-ahead logs for debugging.\n\n"),
798 		   progname);
799 	printf(_("Usage:\n"));
800 	printf(_("  %s [OPTION]... [STARTSEG [ENDSEG]]\n"), progname);
801 	printf(_("\nOptions:\n"));
802 	printf(_("  -b, --bkp-details      output detailed information about backup blocks\n"));
803 	printf(_("  -e, --end=RECPTR       stop reading at WAL location RECPTR\n"));
804 	printf(_("  -f, --follow           keep retrying after reaching end of WAL\n"));
805 	printf(_("  -n, --limit=N          number of records to display\n"));
806 	printf(_("  -p, --path=PATH        directory in which to find log segment files or a\n"
807 			 "                         directory with a ./pg_wal that contains such files\n"
808 			 "                         (default: current directory, ./pg_wal, $PGDATA/pg_wal)\n"));
809 	printf(_("  -r, --rmgr=RMGR        only show records generated by resource manager RMGR;\n"
810 			 "                         use --rmgr=list to list valid resource manager names\n"));
811 	printf(_("  -s, --start=RECPTR     start reading at WAL location RECPTR\n"));
812 	printf(_("  -t, --timeline=TLI     timeline from which to read log records\n"
813 			 "                         (default: 1 or the value used in STARTSEG)\n"));
814 	printf(_("  -V, --version          output version information, then exit\n"));
815 	printf(_("  -x, --xid=XID          only show records with transaction ID XID\n"));
816 	printf(_("  -z, --stats[=record]   show statistics instead of records\n"
817 			 "                         (optionally, show per-record statistics)\n"));
818 	printf(_("  -?, --help             show this help, then exit\n"));
819 	printf(_("\nReport bugs to <pgsql-bugs@lists.postgresql.org>.\n"));
820 }
821 
822 int
823 main(int argc, char **argv)
824 {
825 	uint32		xlogid;
826 	uint32		xrecoff;
827 	XLogReaderState *xlogreader_state;
828 	XLogDumpPrivate private;
829 	XLogDumpConfig config;
830 	XLogDumpStats stats;
831 	XLogRecord *record;
832 	XLogRecPtr	first_record;
833 	char	   *errormsg;
834 
835 	static struct option long_options[] = {
836 		{"bkp-details", no_argument, NULL, 'b'},
837 		{"end", required_argument, NULL, 'e'},
838 		{"follow", no_argument, NULL, 'f'},
839 		{"help", no_argument, NULL, '?'},
840 		{"limit", required_argument, NULL, 'n'},
841 		{"path", required_argument, NULL, 'p'},
842 		{"rmgr", required_argument, NULL, 'r'},
843 		{"start", required_argument, NULL, 's'},
844 		{"timeline", required_argument, NULL, 't'},
845 		{"xid", required_argument, NULL, 'x'},
846 		{"version", no_argument, NULL, 'V'},
847 		{"stats", optional_argument, NULL, 'z'},
848 		{NULL, 0, NULL, 0}
849 	};
850 
851 	int			option;
852 	int			optindex = 0;
853 
854 	pg_logging_init(argv[0]);
855 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_waldump"));
856 	progname = get_progname(argv[0]);
857 
858 	if (argc > 1)
859 	{
860 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
861 		{
862 			usage();
863 			exit(0);
864 		}
865 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
866 		{
867 			puts("pg_waldump (PostgreSQL) " PG_VERSION);
868 			exit(0);
869 		}
870 	}
871 
872 	memset(&private, 0, sizeof(XLogDumpPrivate));
873 	memset(&config, 0, sizeof(XLogDumpConfig));
874 	memset(&stats, 0, sizeof(XLogDumpStats));
875 
876 	private.timeline = 1;
877 	private.startptr = InvalidXLogRecPtr;
878 	private.endptr = InvalidXLogRecPtr;
879 	private.endptr_reached = false;
880 
881 	config.bkp_details = false;
882 	config.stop_after_records = -1;
883 	config.already_displayed_records = 0;
884 	config.follow = false;
885 	config.filter_by_rmgr = -1;
886 	config.filter_by_xid = InvalidTransactionId;
887 	config.filter_by_xid_enabled = false;
888 	config.stats = false;
889 	config.stats_per_record = false;
890 
891 	if (argc <= 1)
892 	{
893 		pg_log_error("no arguments specified");
894 		goto bad_argument;
895 	}
896 
897 	while ((option = getopt_long(argc, argv, "be:fn:p:r:s:t:x:z",
898 								 long_options, &optindex)) != -1)
899 	{
900 		switch (option)
901 		{
902 			case 'b':
903 				config.bkp_details = true;
904 				break;
905 			case 'e':
906 				if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2)
907 				{
908 					pg_log_error("could not parse end WAL location \"%s\"",
909 								 optarg);
910 					goto bad_argument;
911 				}
912 				private.endptr = (uint64) xlogid << 32 | xrecoff;
913 				break;
914 			case 'f':
915 				config.follow = true;
916 				break;
917 			case 'n':
918 				if (sscanf(optarg, "%d", &config.stop_after_records) != 1)
919 				{
920 					pg_log_error("could not parse limit \"%s\"", optarg);
921 					goto bad_argument;
922 				}
923 				break;
924 			case 'p':
925 				private.inpath = pg_strdup(optarg);
926 				break;
927 			case 'r':
928 				{
929 					int			i;
930 
931 					if (pg_strcasecmp(optarg, "list") == 0)
932 					{
933 						print_rmgr_list();
934 						exit(EXIT_SUCCESS);
935 					}
936 
937 					for (i = 0; i <= RM_MAX_ID; i++)
938 					{
939 						if (pg_strcasecmp(optarg, RmgrDescTable[i].rm_name) == 0)
940 						{
941 							config.filter_by_rmgr = i;
942 							break;
943 						}
944 					}
945 
946 					if (config.filter_by_rmgr == -1)
947 					{
948 						pg_log_error("resource manager \"%s\" does not exist",
949 									 optarg);
950 						goto bad_argument;
951 					}
952 				}
953 				break;
954 			case 's':
955 				if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2)
956 				{
957 					pg_log_error("could not parse start WAL location \"%s\"",
958 								 optarg);
959 					goto bad_argument;
960 				}
961 				else
962 					private.startptr = (uint64) xlogid << 32 | xrecoff;
963 				break;
964 			case 't':
965 				if (sscanf(optarg, "%d", &private.timeline) != 1)
966 				{
967 					pg_log_error("could not parse timeline \"%s\"", optarg);
968 					goto bad_argument;
969 				}
970 				break;
971 			case 'x':
972 				if (sscanf(optarg, "%u", &config.filter_by_xid) != 1)
973 				{
974 					pg_log_error("could not parse \"%s\" as a transaction ID",
975 								 optarg);
976 					goto bad_argument;
977 				}
978 				config.filter_by_xid_enabled = true;
979 				break;
980 			case 'z':
981 				config.stats = true;
982 				config.stats_per_record = false;
983 				if (optarg)
984 				{
985 					if (strcmp(optarg, "record") == 0)
986 						config.stats_per_record = true;
987 					else if (strcmp(optarg, "rmgr") != 0)
988 					{
989 						pg_log_error("unrecognized argument to --stats: %s",
990 									 optarg);
991 						goto bad_argument;
992 					}
993 				}
994 				break;
995 			default:
996 				goto bad_argument;
997 		}
998 	}
999 
1000 	if ((optind + 2) < argc)
1001 	{
1002 		pg_log_error("too many command-line arguments (first is \"%s\")",
1003 					 argv[optind + 2]);
1004 		goto bad_argument;
1005 	}
1006 
1007 	if (private.inpath != NULL)
1008 	{
1009 		/* validate path points to directory */
1010 		if (!verify_directory(private.inpath))
1011 		{
1012 			pg_log_error("path \"%s\" could not be opened: %s",
1013 						 private.inpath, strerror(errno));
1014 			goto bad_argument;
1015 		}
1016 	}
1017 
1018 	/* parse files as start/end boundaries, extract path if not specified */
1019 	if (optind < argc)
1020 	{
1021 		char	   *directory = NULL;
1022 		char	   *fname = NULL;
1023 		int			fd;
1024 		XLogSegNo	segno;
1025 
1026 		split_path(argv[optind], &directory, &fname);
1027 
1028 		if (private.inpath == NULL && directory != NULL)
1029 		{
1030 			private.inpath = directory;
1031 
1032 			if (!verify_directory(private.inpath))
1033 				fatal_error("could not open directory \"%s\": %s",
1034 							private.inpath, strerror(errno));
1035 		}
1036 
1037 		identify_target_directory(&private, private.inpath, fname);
1038 		fd = open_file_in_directory(private.inpath, fname);
1039 		if (fd < 0)
1040 			fatal_error("could not open file \"%s\"", fname);
1041 		close(fd);
1042 
1043 		/* parse position from file */
1044 		XLogFromFileName(fname, &private.timeline, &segno, WalSegSz);
1045 
1046 		if (XLogRecPtrIsInvalid(private.startptr))
1047 			XLogSegNoOffsetToRecPtr(segno, 0, WalSegSz, private.startptr);
1048 		else if (!XLByteInSeg(private.startptr, segno, WalSegSz))
1049 		{
1050 			pg_log_error("start WAL location %X/%X is not inside file \"%s\"",
1051 						 (uint32) (private.startptr >> 32),
1052 						 (uint32) private.startptr,
1053 						 fname);
1054 			goto bad_argument;
1055 		}
1056 
1057 		/* no second file specified, set end position */
1058 		if (!(optind + 1 < argc) && XLogRecPtrIsInvalid(private.endptr))
1059 			XLogSegNoOffsetToRecPtr(segno + 1, 0, WalSegSz, private.endptr);
1060 
1061 		/* parse ENDSEG if passed */
1062 		if (optind + 1 < argc)
1063 		{
1064 			XLogSegNo	endsegno;
1065 
1066 			/* ignore directory, already have that */
1067 			split_path(argv[optind + 1], &directory, &fname);
1068 
1069 			fd = open_file_in_directory(private.inpath, fname);
1070 			if (fd < 0)
1071 				fatal_error("could not open file \"%s\"", fname);
1072 			close(fd);
1073 
1074 			/* parse position from file */
1075 			XLogFromFileName(fname, &private.timeline, &endsegno, WalSegSz);
1076 
1077 			if (endsegno < segno)
1078 				fatal_error("ENDSEG %s is before STARTSEG %s",
1079 							argv[optind + 1], argv[optind]);
1080 
1081 			if (XLogRecPtrIsInvalid(private.endptr))
1082 				XLogSegNoOffsetToRecPtr(endsegno + 1, 0, WalSegSz,
1083 										private.endptr);
1084 
1085 			/* set segno to endsegno for check of --end */
1086 			segno = endsegno;
1087 		}
1088 
1089 
1090 		if (!XLByteInSeg(private.endptr, segno, WalSegSz) &&
1091 			private.endptr != (segno + 1) * WalSegSz)
1092 		{
1093 			pg_log_error("end WAL location %X/%X is not inside file \"%s\"",
1094 						 (uint32) (private.endptr >> 32),
1095 						 (uint32) private.endptr,
1096 						 argv[argc - 1]);
1097 			goto bad_argument;
1098 		}
1099 	}
1100 	else
1101 		identify_target_directory(&private, private.inpath, NULL);
1102 
1103 	/* we don't know what to print */
1104 	if (XLogRecPtrIsInvalid(private.startptr))
1105 	{
1106 		pg_log_error("no start WAL location given");
1107 		goto bad_argument;
1108 	}
1109 
1110 	/* done with argument parsing, do the actual work */
1111 
1112 	/* we have everything we need, start reading */
1113 	xlogreader_state = XLogReaderAllocate(WalSegSz, XLogDumpReadPage,
1114 										  &private);
1115 	if (!xlogreader_state)
1116 		fatal_error("out of memory");
1117 
1118 	/* first find a valid recptr to start from */
1119 	first_record = XLogFindNextRecord(xlogreader_state, private.startptr);
1120 
1121 	if (first_record == InvalidXLogRecPtr)
1122 		fatal_error("could not find a valid record after %X/%X",
1123 					(uint32) (private.startptr >> 32),
1124 					(uint32) private.startptr);
1125 
1126 	/*
1127 	 * Display a message that we're skipping data if `from` wasn't a pointer
1128 	 * to the start of a record and also wasn't a pointer to the beginning of
1129 	 * a segment (e.g. we were used in file mode).
1130 	 */
1131 	if (first_record != private.startptr &&
1132 		XLogSegmentOffset(private.startptr, WalSegSz) != 0)
1133 		printf(ngettext("first record is after %X/%X, at %X/%X, skipping over %u byte\n",
1134 						"first record is after %X/%X, at %X/%X, skipping over %u bytes\n",
1135 						(first_record - private.startptr)),
1136 			   (uint32) (private.startptr >> 32), (uint32) private.startptr,
1137 			   (uint32) (first_record >> 32), (uint32) first_record,
1138 			   (uint32) (first_record - private.startptr));
1139 
1140 	for (;;)
1141 	{
1142 		/* try to read the next record */
1143 		record = XLogReadRecord(xlogreader_state, first_record, &errormsg);
1144 		if (!record)
1145 		{
1146 			if (!config.follow || private.endptr_reached)
1147 				break;
1148 			else
1149 			{
1150 				pg_usleep(1000000L);	/* 1 second */
1151 				continue;
1152 			}
1153 		}
1154 
1155 		/* after reading the first record, continue at next one */
1156 		first_record = InvalidXLogRecPtr;
1157 
1158 		/* apply all specified filters */
1159 		if (config.filter_by_rmgr != -1 &&
1160 			config.filter_by_rmgr != record->xl_rmid)
1161 			continue;
1162 
1163 		if (config.filter_by_xid_enabled &&
1164 			config.filter_by_xid != record->xl_xid)
1165 			continue;
1166 
1167 		/* process the record */
1168 		if (config.stats == true)
1169 			XLogDumpCountRecord(&config, &stats, xlogreader_state);
1170 		else
1171 			XLogDumpDisplayRecord(&config, xlogreader_state);
1172 
1173 		/* check whether we printed enough */
1174 		config.already_displayed_records++;
1175 		if (config.stop_after_records > 0 &&
1176 			config.already_displayed_records >= config.stop_after_records)
1177 			break;
1178 	}
1179 
1180 	if (config.stats == true)
1181 		XLogDumpDisplayStats(&config, &stats);
1182 
1183 	if (errormsg)
1184 		fatal_error("error in WAL record at %X/%X: %s",
1185 					(uint32) (xlogreader_state->ReadRecPtr >> 32),
1186 					(uint32) xlogreader_state->ReadRecPtr,
1187 					errormsg);
1188 
1189 	XLogReaderFree(xlogreader_state);
1190 
1191 	return EXIT_SUCCESS;
1192 
1193 bad_argument:
1194 	fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
1195 	return EXIT_FAILURE;
1196 }
1197