1 /*-------------------------------------------------------------------------
2  *
3  * filemap.c
4  *	  A data structure for keeping track of files that have changed.
5  *
6  * Copyright (c) 2013-2018, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 
11 #include "postgres_fe.h"
12 
13 #include <sys/stat.h>
14 #include <unistd.h>
15 
16 #include "datapagemap.h"
17 #include "filemap.h"
18 #include "logging.h"
19 #include "pg_rewind.h"
20 
21 #include "common/string.h"
22 #include "catalog/pg_tablespace_d.h"
23 #include "storage/fd.h"
24 
25 filemap_t  *filemap = NULL;
26 
27 static bool isRelDataFile(const char *path);
28 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
29 			BlockNumber segno);
30 static int	path_cmp(const void *a, const void *b);
31 static int	final_filemap_cmp(const void *a, const void *b);
32 static void filemap_list_to_array(filemap_t *map);
33 static bool check_file_excluded(const char *path, bool is_source);
34 
35 /*
36  * Definition of one element part of an exclusion list, used to exclude
37  * contents when rewinding.  "name" is the name of the file or path to
38  * check for exclusion.  If "match_prefix" is true, any items matching
39  * the name as prefix are excluded.
40  */
41 struct exclude_list_item
42 {
43 	const char *name;
44 	bool		match_prefix;
45 };
46 
47 /*
48  * The contents of these directories are removed or recreated during server
49  * start so they are not included in data processed by pg_rewind.
50  *
51  * Note: those lists should be kept in sync with what basebackup.c provides.
52  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
53  * they are defined in backend-only headers.  So this list is maintained
54  * with a best effort in mind.
55  */
56 static const char *excludeDirContents[] =
57 {
58 	/*
59 	 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
60 	 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
61 	 * created there.
62 	 */
63 	"pg_stat_tmp",				/* defined as PG_STAT_TMP_DIR */
64 
65 	/*
66 	 * It is generally not useful to backup the contents of this directory
67 	 * even if the intention is to restore to another master. See backup.sgml
68 	 * for a more detailed description.
69 	 */
70 	"pg_replslot",
71 
72 	/* Contents removed on startup, see dsm_cleanup_for_mmap(). */
73 	"pg_dynshmem",				/* defined as PG_DYNSHMEM_DIR */
74 
75 	/* Contents removed on startup, see AsyncShmemInit(). */
76 	"pg_notify",
77 
78 	/*
79 	 * Old contents are loaded for possible debugging but are not required for
80 	 * normal operation, see OldSerXidInit().
81 	 */
82 	"pg_serial",
83 
84 	/* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
85 	"pg_snapshots",
86 
87 	/* Contents zeroed on startup, see StartupSUBTRANS(). */
88 	"pg_subtrans",
89 
90 	/* end of list */
91 	NULL
92 };
93 
94 /*
95  * List of files excluded from filemap processing.   Files are excluded
96  * if their prefix match.
97  */
98 static const struct exclude_list_item excludeFiles[] =
99 {
100 	/* Skip auto conf temporary file. */
101 	{"postgresql.auto.conf.tmp", false},	/* defined as PG_AUTOCONF_FILENAME */
102 
103 	/* Skip current log file temporary file */
104 	{"current_logfiles.tmp", false},	/* defined as
105 										 * LOG_METAINFO_DATAFILE_TMP */
106 
107 	/* Skip relation cache because it is rebuilt on startup */
108 	{"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
109 
110 	/*
111 	 * If there's a backup_label or tablespace_map file, it belongs to a
112 	 * backup started by the user with pg_start_backup().  It is *not* correct
113 	 * for this backup.  Our backup_label is written later on separately.
114 	 */
115 	{"backup_label", false},	/* defined as BACKUP_LABEL_FILE */
116 	{"tablespace_map", false},	/* defined as TABLESPACE_MAP */
117 
118 	{"postmaster.pid", false},
119 	{"postmaster.opts", false},
120 
121 	/* end of list */
122 	{NULL, false}
123 };
124 
125 /*
126  * Create a new file map (stored in the global pointer "filemap").
127  */
128 void
filemap_create(void)129 filemap_create(void)
130 {
131 	filemap_t  *map;
132 
133 	map = pg_malloc(sizeof(filemap_t));
134 	map->first = map->last = NULL;
135 	map->nlist = 0;
136 	map->array = NULL;
137 	map->narray = 0;
138 
139 	Assert(filemap == NULL);
140 	filemap = map;
141 }
142 
143 /*
144  * Callback for processing source file list.
145  *
146  * This is called once for every file in the source server. We decide what
147  * action needs to be taken for the file, depending on whether the file
148  * exists in the target and whether the size matches.
149  */
150 void
process_source_file(const char * path,file_type_t type,size_t newsize,const char * link_target)151 process_source_file(const char *path, file_type_t type, size_t newsize,
152 					const char *link_target)
153 {
154 	bool		exists;
155 	char		localpath[MAXPGPATH];
156 	struct stat statbuf;
157 	filemap_t  *map = filemap;
158 	file_action_t action = FILE_ACTION_NONE;
159 	size_t		oldsize = 0;
160 	file_entry_t *entry;
161 
162 	Assert(map->array == NULL);
163 
164 	/*
165 	 * Skip any files matching the exclusion filters. This has the effect to
166 	 * remove all those files on the target.
167 	 */
168 	if (check_file_excluded(path, true))
169 		return;
170 
171 	/*
172 	 * Pretend that pg_wal is a directory, even if it's really a symlink. We
173 	 * don't want to mess with the symlink itself, nor complain if it's a
174 	 * symlink in source but not in target or vice versa.
175 	 */
176 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
177 		type = FILE_TYPE_DIRECTORY;
178 
179 	/*
180 	 * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
181 	 * This has the effect that all temporary files in the destination will be
182 	 * removed.
183 	 */
184 	if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
185 		return;
186 	if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
187 		return;
188 
189 	/*
190 	 * sanity check: a filename that looks like a data file better be a
191 	 * regular file
192 	 */
193 	if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
194 		pg_fatal("data file \"%s\" in source is not a regular file\n", path);
195 
196 	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
197 
198 	/* Does the corresponding file exist in the target data dir? */
199 	if (lstat(localpath, &statbuf) < 0)
200 	{
201 		if (errno != ENOENT)
202 			pg_fatal("could not stat file \"%s\": %s\n",
203 					 localpath, strerror(errno));
204 
205 		exists = false;
206 	}
207 	else
208 		exists = true;
209 
210 	switch (type)
211 	{
212 		case FILE_TYPE_DIRECTORY:
213 			if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
214 			{
215 				/* it's a directory in source, but not in target. Strange.. */
216 				pg_fatal("\"%s\" is not a directory\n", localpath);
217 			}
218 
219 			if (!exists)
220 				action = FILE_ACTION_CREATE;
221 			else
222 				action = FILE_ACTION_NONE;
223 			oldsize = 0;
224 			break;
225 
226 		case FILE_TYPE_SYMLINK:
227 			if (exists &&
228 #ifndef WIN32
229 				!S_ISLNK(statbuf.st_mode)
230 #else
231 				!pgwin32_is_junction(localpath)
232 #endif
233 				)
234 			{
235 				/*
236 				 * It's a symbolic link in source, but not in target.
237 				 * Strange..
238 				 */
239 				pg_fatal("\"%s\" is not a symbolic link\n", localpath);
240 			}
241 
242 			if (!exists)
243 				action = FILE_ACTION_CREATE;
244 			else
245 				action = FILE_ACTION_NONE;
246 			oldsize = 0;
247 			break;
248 
249 		case FILE_TYPE_REGULAR:
250 			if (exists && !S_ISREG(statbuf.st_mode))
251 				pg_fatal("\"%s\" is not a regular file\n", localpath);
252 
253 			if (!exists || !isRelDataFile(path))
254 			{
255 				/*
256 				 * File exists in source, but not in target. Or it's a
257 				 * non-data file that we have no special processing for. Copy
258 				 * it in toto.
259 				 *
260 				 * An exception: PG_VERSIONs should be identical, but avoid
261 				 * overwriting it for paranoia.
262 				 */
263 				if (pg_str_endswith(path, "PG_VERSION"))
264 				{
265 					action = FILE_ACTION_NONE;
266 					oldsize = statbuf.st_size;
267 				}
268 				else
269 				{
270 					action = FILE_ACTION_COPY;
271 					oldsize = 0;
272 				}
273 			}
274 			else
275 			{
276 				/*
277 				 * It's a data file that exists in both.
278 				 *
279 				 * If it's larger in target, we can truncate it. There will
280 				 * also be a WAL record of the truncation in the source
281 				 * system, so WAL replay would eventually truncate the target
282 				 * too, but we might as well do it now.
283 				 *
284 				 * If it's smaller in the target, it means that it has been
285 				 * truncated in the target, or enlarged in the source, or
286 				 * both. If it was truncated in the target, we need to copy
287 				 * the missing tail from the source system. If it was enlarged
288 				 * in the source system, there will be WAL records in the
289 				 * source system for the new blocks, so we wouldn't need to
290 				 * copy them here. But we don't know which scenario we're
291 				 * dealing with, and there's no harm in copying the missing
292 				 * blocks now, so do it now.
293 				 *
294 				 * If it's the same size, do nothing here. Any blocks modified
295 				 * in the target will be copied based on parsing the target
296 				 * system's WAL, and any blocks modified in the source will be
297 				 * updated after rewinding, when the source system's WAL is
298 				 * replayed.
299 				 */
300 				oldsize = statbuf.st_size;
301 				if (oldsize < newsize)
302 					action = FILE_ACTION_COPY_TAIL;
303 				else if (oldsize > newsize)
304 					action = FILE_ACTION_TRUNCATE;
305 				else
306 					action = FILE_ACTION_NONE;
307 			}
308 			break;
309 	}
310 
311 	/* Create a new entry for this file */
312 	entry = pg_malloc(sizeof(file_entry_t));
313 	entry->path = pg_strdup(path);
314 	entry->type = type;
315 	entry->action = action;
316 	entry->oldsize = oldsize;
317 	entry->newsize = newsize;
318 	entry->link_target = link_target ? pg_strdup(link_target) : NULL;
319 	entry->next = NULL;
320 	entry->pagemap.bitmap = NULL;
321 	entry->pagemap.bitmapsize = 0;
322 	entry->isrelfile = isRelDataFile(path);
323 
324 	if (map->last)
325 	{
326 		map->last->next = entry;
327 		map->last = entry;
328 	}
329 	else
330 		map->first = map->last = entry;
331 	map->nlist++;
332 }
333 
334 /*
335  * Callback for processing target file list.
336  *
337  * All source files must be already processed before calling this. This only
338  * marks target data directory's files that didn't exist in the source for
339  * deletion.
340  */
341 void
process_target_file(const char * path,file_type_t type,size_t oldsize,const char * link_target)342 process_target_file(const char *path, file_type_t type, size_t oldsize,
343 					const char *link_target)
344 {
345 	bool		exists;
346 	file_entry_t key;
347 	file_entry_t *key_ptr;
348 	filemap_t  *map = filemap;
349 	file_entry_t *entry;
350 
351 	/*
352 	 * Do not apply any exclusion filters here.  This has advantage to remove
353 	 * from the target data folder all paths which have been filtered out from
354 	 * the source data folder when processing the source files.
355 	 */
356 
357 	if (map->array == NULL)
358 	{
359 		/* on first call, initialize lookup array */
360 		if (map->nlist == 0)
361 		{
362 			/* should not happen */
363 			pg_fatal("source file list is empty\n");
364 		}
365 
366 		filemap_list_to_array(map);
367 
368 		Assert(map->array != NULL);
369 
370 		qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
371 	}
372 
373 	/*
374 	 * Like in process_source_file, pretend that xlog is always a  directory.
375 	 */
376 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
377 		type = FILE_TYPE_DIRECTORY;
378 
379 	key.path = (char *) path;
380 	key_ptr = &key;
381 	exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
382 					  path_cmp) != NULL);
383 
384 	/* Remove any file or folder that doesn't exist in the source system. */
385 	if (!exists)
386 	{
387 		entry = pg_malloc(sizeof(file_entry_t));
388 		entry->path = pg_strdup(path);
389 		entry->type = type;
390 		entry->action = FILE_ACTION_REMOVE;
391 		entry->oldsize = oldsize;
392 		entry->newsize = 0;
393 		entry->link_target = link_target ? pg_strdup(link_target) : NULL;
394 		entry->next = NULL;
395 		entry->pagemap.bitmap = NULL;
396 		entry->pagemap.bitmapsize = 0;
397 		entry->isrelfile = isRelDataFile(path);
398 
399 		if (map->last == NULL)
400 			map->first = entry;
401 		else
402 			map->last->next = entry;
403 		map->last = entry;
404 		map->nlist++;
405 	}
406 	else
407 	{
408 		/*
409 		 * We already handled all files that exist in the source system in
410 		 * process_source_file().
411 		 */
412 	}
413 }
414 
415 /*
416  * This callback gets called while we read the WAL in the target, for every
417  * block that have changed in the target system. It makes note of all the
418  * changed blocks in the pagemap of the file.
419  */
420 void
process_block_change(ForkNumber forknum,RelFileNode rnode,BlockNumber blkno)421 process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
422 {
423 	char	   *path;
424 	file_entry_t key;
425 	file_entry_t *key_ptr;
426 	file_entry_t *entry;
427 	BlockNumber blkno_inseg;
428 	int			segno;
429 	filemap_t  *map = filemap;
430 	file_entry_t **e;
431 
432 	Assert(map->array);
433 
434 	segno = blkno / RELSEG_SIZE;
435 	blkno_inseg = blkno % RELSEG_SIZE;
436 
437 	path = datasegpath(rnode, forknum, segno);
438 
439 	key.path = (char *) path;
440 	key_ptr = &key;
441 
442 	e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
443 				path_cmp);
444 	if (e)
445 		entry = *e;
446 	else
447 		entry = NULL;
448 	pfree(path);
449 
450 	if (entry)
451 	{
452 		Assert(entry->isrelfile);
453 
454 		switch (entry->action)
455 		{
456 			case FILE_ACTION_NONE:
457 			case FILE_ACTION_TRUNCATE:
458 				/* skip if we're truncating away the modified block anyway */
459 				if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
460 					datapagemap_add(&entry->pagemap, blkno_inseg);
461 				break;
462 
463 			case FILE_ACTION_COPY_TAIL:
464 
465 				/*
466 				 * skip the modified block if it is part of the "tail" that
467 				 * we're copying anyway.
468 				 */
469 				if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
470 					datapagemap_add(&entry->pagemap, blkno_inseg);
471 				break;
472 
473 			case FILE_ACTION_COPY:
474 			case FILE_ACTION_REMOVE:
475 				break;
476 
477 			case FILE_ACTION_CREATE:
478 				pg_fatal("unexpected page modification for directory or symbolic link \"%s\"\n", entry->path);
479 		}
480 	}
481 	else
482 	{
483 		/*
484 		 * If we don't have any record of this file in the file map, it means
485 		 * that it's a relation that doesn't exist in the source system, and
486 		 * it was subsequently removed in the target system, too. We can
487 		 * safely ignore it.
488 		 */
489 	}
490 }
491 
492 /*
493  * Is this the path of file that pg_rewind can skip copying?
494  */
495 static bool
check_file_excluded(const char * path,bool is_source)496 check_file_excluded(const char *path, bool is_source)
497 {
498 	char		localpath[MAXPGPATH];
499 	int			excludeIdx;
500 	const char *filename;
501 
502 	/* check individual files... */
503 	for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
504 	{
505 		int			cmplen = strlen(excludeFiles[excludeIdx].name);
506 
507 		filename = last_dir_separator(path);
508 		if (filename == NULL)
509 			filename = path;
510 		else
511 			filename++;
512 
513 		if (!excludeFiles[excludeIdx].match_prefix)
514 			cmplen++;
515 		if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
516 		{
517 			if (is_source)
518 				pg_log(PG_DEBUG, "entry \"%s\" excluded from source file list\n",
519 					   path);
520 			else
521 				pg_log(PG_DEBUG, "entry \"%s\" excluded from target file list\n",
522 					   path);
523 			return true;
524 		}
525 	}
526 
527 	/*
528 	 * ... And check some directories.  Note that this includes any contents
529 	 * within the directories themselves.
530 	 */
531 	for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
532 	{
533 		snprintf(localpath, sizeof(localpath), "%s/",
534 				 excludeDirContents[excludeIdx]);
535 		if (strstr(path, localpath) == path)
536 		{
537 			if (is_source)
538 				pg_log(PG_DEBUG, "entry \"%s\" excluded from source file list\n",
539 					   path);
540 			else
541 				pg_log(PG_DEBUG, "entry \"%s\" excluded from target file list\n",
542 					   path);
543 			return true;
544 		}
545 	}
546 
547 	return false;
548 }
549 
550 /*
551  * Convert the linked list of entries in map->first/last to the array,
552  * map->array.
553  */
554 static void
filemap_list_to_array(filemap_t * map)555 filemap_list_to_array(filemap_t *map)
556 {
557 	int			narray;
558 	file_entry_t *entry,
559 			   *next;
560 
561 	map->array = (file_entry_t **)
562 		pg_realloc(map->array,
563 				   (map->nlist + map->narray) * sizeof(file_entry_t *));
564 
565 	narray = map->narray;
566 	for (entry = map->first; entry != NULL; entry = next)
567 	{
568 		map->array[narray++] = entry;
569 		next = entry->next;
570 		entry->next = NULL;
571 	}
572 	Assert(narray == map->nlist + map->narray);
573 	map->narray = narray;
574 	map->nlist = 0;
575 	map->first = map->last = NULL;
576 }
577 
578 void
filemap_finalize(void)579 filemap_finalize(void)
580 {
581 	filemap_t  *map = filemap;
582 
583 	filemap_list_to_array(map);
584 	qsort(map->array, map->narray, sizeof(file_entry_t *),
585 		  final_filemap_cmp);
586 }
587 
588 static const char *
action_to_str(file_action_t action)589 action_to_str(file_action_t action)
590 {
591 	switch (action)
592 	{
593 		case FILE_ACTION_NONE:
594 			return "NONE";
595 		case FILE_ACTION_COPY:
596 			return "COPY";
597 		case FILE_ACTION_TRUNCATE:
598 			return "TRUNCATE";
599 		case FILE_ACTION_COPY_TAIL:
600 			return "COPY_TAIL";
601 		case FILE_ACTION_CREATE:
602 			return "CREATE";
603 		case FILE_ACTION_REMOVE:
604 			return "REMOVE";
605 
606 		default:
607 			return "unknown";
608 	}
609 }
610 
611 /*
612  * Calculate the totals needed for progress reports.
613  */
614 void
calculate_totals(void)615 calculate_totals(void)
616 {
617 	file_entry_t *entry;
618 	int			i;
619 	filemap_t  *map = filemap;
620 
621 	map->total_size = 0;
622 	map->fetch_size = 0;
623 
624 	for (i = 0; i < map->narray; i++)
625 	{
626 		entry = map->array[i];
627 
628 		if (entry->type != FILE_TYPE_REGULAR)
629 			continue;
630 
631 		map->total_size += entry->newsize;
632 
633 		if (entry->action == FILE_ACTION_COPY)
634 		{
635 			map->fetch_size += entry->newsize;
636 			continue;
637 		}
638 
639 		if (entry->action == FILE_ACTION_COPY_TAIL)
640 			map->fetch_size += (entry->newsize - entry->oldsize);
641 
642 		if (entry->pagemap.bitmapsize > 0)
643 		{
644 			datapagemap_iterator_t *iter;
645 			BlockNumber blk;
646 
647 			iter = datapagemap_iterate(&entry->pagemap);
648 			while (datapagemap_next(iter, &blk))
649 				map->fetch_size += BLCKSZ;
650 
651 			pg_free(iter);
652 		}
653 	}
654 }
655 
656 void
print_filemap(void)657 print_filemap(void)
658 {
659 	filemap_t  *map = filemap;
660 	file_entry_t *entry;
661 	int			i;
662 
663 	for (i = 0; i < map->narray; i++)
664 	{
665 		entry = map->array[i];
666 		if (entry->action != FILE_ACTION_NONE ||
667 			entry->pagemap.bitmapsize > 0)
668 		{
669 			pg_log(PG_DEBUG,
670 			/*------
671 			   translator: first %s is a file path, second is a keyword such as COPY */
672 				   "%s (%s)\n", entry->path,
673 				   action_to_str(entry->action));
674 
675 			if (entry->pagemap.bitmapsize > 0)
676 				datapagemap_print(&entry->pagemap);
677 		}
678 	}
679 	fflush(stdout);
680 }
681 
682 /*
683  * Does it look like a relation data file?
684  *
685  * For our purposes, only files belonging to the main fork are considered
686  * relation files. Other forks are always copied in toto, because we cannot
687  * reliably track changes to them, because WAL only contains block references
688  * for the main fork.
689  */
690 static bool
isRelDataFile(const char * path)691 isRelDataFile(const char *path)
692 {
693 	RelFileNode rnode;
694 	unsigned int segNo;
695 	int			nmatch;
696 	bool		matched;
697 
698 	/*----
699 	 * Relation data files can be in one of the following directories:
700 	 *
701 	 * global/
702 	 *		shared relations
703 	 *
704 	 * base/<db oid>/
705 	 *		regular relations, default tablespace
706 	 *
707 	 * pg_tblspc/<tblspc oid>/<tblspc version>/
708 	 *		within a non-default tablespace (the name of the directory
709 	 *		depends on version)
710 	 *
711 	 * And the relation data files themselves have a filename like:
712 	 *
713 	 * <oid>.<segment number>
714 	 *
715 	 *----
716 	 */
717 	rnode.spcNode = InvalidOid;
718 	rnode.dbNode = InvalidOid;
719 	rnode.relNode = InvalidOid;
720 	segNo = 0;
721 	matched = false;
722 
723 	nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo);
724 	if (nmatch == 1 || nmatch == 2)
725 	{
726 		rnode.spcNode = GLOBALTABLESPACE_OID;
727 		rnode.dbNode = 0;
728 		matched = true;
729 	}
730 	else
731 	{
732 		nmatch = sscanf(path, "base/%u/%u.%u",
733 						&rnode.dbNode, &rnode.relNode, &segNo);
734 		if (nmatch == 2 || nmatch == 3)
735 		{
736 			rnode.spcNode = DEFAULTTABLESPACE_OID;
737 			matched = true;
738 		}
739 		else
740 		{
741 			nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
742 							&rnode.spcNode, &rnode.dbNode, &rnode.relNode,
743 							&segNo);
744 			if (nmatch == 3 || nmatch == 4)
745 				matched = true;
746 		}
747 	}
748 
749 	/*
750 	 * The sscanf tests above can match files that have extra characters at
751 	 * the end. To eliminate such cases, cross-check that GetRelationPath
752 	 * creates the exact same filename, when passed the RelFileNode
753 	 * information we extracted from the filename.
754 	 */
755 	if (matched)
756 	{
757 		char	   *check_path = datasegpath(rnode, MAIN_FORKNUM, segNo);
758 
759 		if (strcmp(check_path, path) != 0)
760 			matched = false;
761 
762 		pfree(check_path);
763 	}
764 
765 	return matched;
766 }
767 
768 /*
769  * A helper function to create the path of a relation file and segment.
770  *
771  * The returned path is palloc'd
772  */
773 static char *
datasegpath(RelFileNode rnode,ForkNumber forknum,BlockNumber segno)774 datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
775 {
776 	char	   *path;
777 	char	   *segpath;
778 
779 	path = relpathperm(rnode, forknum);
780 	if (segno > 0)
781 	{
782 		segpath = psprintf("%s.%u", path, segno);
783 		pfree(path);
784 		return segpath;
785 	}
786 	else
787 		return path;
788 }
789 
790 static int
path_cmp(const void * a,const void * b)791 path_cmp(const void *a, const void *b)
792 {
793 	file_entry_t *fa = *((file_entry_t **) a);
794 	file_entry_t *fb = *((file_entry_t **) b);
795 
796 	return strcmp(fa->path, fb->path);
797 }
798 
799 /*
800  * In the final stage, the filemap is sorted so that removals come last.
801  * From disk space usage point of view, it would be better to do removals
802  * first, but for now, safety first. If a whole directory is deleted, all
803  * files and subdirectories inside it need to removed first. On creation,
804  * parent directory needs to be created before files and directories inside
805  * it. To achieve that, the file_action_t enum is ordered so that we can
806  * just sort on that first. Furthermore, sort REMOVE entries in reverse
807  * path order, so that "foo/bar" subdirectory is removed before "foo".
808  */
809 static int
final_filemap_cmp(const void * a,const void * b)810 final_filemap_cmp(const void *a, const void *b)
811 {
812 	file_entry_t *fa = *((file_entry_t **) a);
813 	file_entry_t *fb = *((file_entry_t **) b);
814 
815 	if (fa->action > fb->action)
816 		return 1;
817 	if (fa->action < fb->action)
818 		return -1;
819 
820 	if (fa->action == FILE_ACTION_REMOVE)
821 		return strcmp(fb->path, fa->path);
822 	else
823 		return strcmp(fa->path, fb->path);
824 }
825