1 /*-------------------------------------------------------------------------
2  *
3  * filemap.c
4  *	  A data structure for keeping track of files that have changed.
5  *
6  * Copyright (c) 2013-2020, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 
11 #include "postgres_fe.h"
12 
13 #include <sys/stat.h>
14 #include <unistd.h>
15 
16 #include "catalog/pg_tablespace_d.h"
17 #include "common/string.h"
18 #include "datapagemap.h"
19 #include "filemap.h"
20 #include "pg_rewind.h"
21 #include "storage/fd.h"
22 
23 filemap_t  *filemap = NULL;
24 
25 static bool isRelDataFile(const char *path);
26 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
27 						 BlockNumber segno);
28 static int	path_cmp(const void *a, const void *b);
29 static int	final_filemap_cmp(const void *a, const void *b);
30 static void filemap_list_to_array(filemap_t *map);
31 static bool check_file_excluded(const char *path, bool is_source);
32 
33 /*
34  * Definition of one element part of an exclusion list, used to exclude
35  * contents when rewinding.  "name" is the name of the file or path to
36  * check for exclusion.  If "match_prefix" is true, any items matching
37  * the name as prefix are excluded.
38  */
39 struct exclude_list_item
40 {
41 	const char *name;
42 	bool		match_prefix;
43 };
44 
45 /*
46  * The contents of these directories are removed or recreated during server
47  * start so they are not included in data processed by pg_rewind.
48  *
49  * Note: those lists should be kept in sync with what basebackup.c provides.
50  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
51  * they are defined in backend-only headers.  So this list is maintained
52  * with a best effort in mind.
53  */
54 static const char *excludeDirContents[] =
55 {
56 	/*
57 	 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
58 	 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
59 	 * created there.
60 	 */
61 	"pg_stat_tmp",				/* defined as PG_STAT_TMP_DIR */
62 
63 	/*
64 	 * It is generally not useful to backup the contents of this directory
65 	 * even if the intention is to restore to another master. See backup.sgml
66 	 * for a more detailed description.
67 	 */
68 	"pg_replslot",
69 
70 	/* Contents removed on startup, see dsm_cleanup_for_mmap(). */
71 	"pg_dynshmem",				/* defined as PG_DYNSHMEM_DIR */
72 
73 	/* Contents removed on startup, see AsyncShmemInit(). */
74 	"pg_notify",
75 
76 	/*
77 	 * Old contents are loaded for possible debugging but are not required for
78 	 * normal operation, see SerialInit().
79 	 */
80 	"pg_serial",
81 
82 	/* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
83 	"pg_snapshots",
84 
85 	/* Contents zeroed on startup, see StartupSUBTRANS(). */
86 	"pg_subtrans",
87 
88 	/* end of list */
89 	NULL
90 };
91 
92 /*
93  * List of files excluded from filemap processing.   Files are excluded
94  * if their prefix match.
95  */
96 static const struct exclude_list_item excludeFiles[] =
97 {
98 	/* Skip auto conf temporary file. */
99 	{"postgresql.auto.conf.tmp", false},	/* defined as PG_AUTOCONF_FILENAME */
100 
101 	/* Skip current log file temporary file */
102 	{"current_logfiles.tmp", false},	/* defined as
103 										 * LOG_METAINFO_DATAFILE_TMP */
104 
105 	/* Skip relation cache because it is rebuilt on startup */
106 	{"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
107 
108 	/*
109 	 * If there's a backup_label or tablespace_map file, it belongs to a
110 	 * backup started by the user with pg_start_backup().  It is *not* correct
111 	 * for this backup.  Our backup_label is written later on separately.
112 	 */
113 	{"backup_label", false},	/* defined as BACKUP_LABEL_FILE */
114 	{"tablespace_map", false},	/* defined as TABLESPACE_MAP */
115 
116 	/*
117 	 * If there's a backup_manifest, it belongs to a backup that was used to
118 	 * start this server. It is *not* correct for this backup. Our
119 	 * backup_manifest is injected into the backup separately if users want
120 	 * it.
121 	 */
122 	{"backup_manifest", false},
123 
124 	{"postmaster.pid", false},
125 	{"postmaster.opts", false},
126 
127 	/* end of list */
128 	{NULL, false}
129 };
130 
131 /*
132  * Create a new file map (stored in the global pointer "filemap").
133  */
134 void
filemap_create(void)135 filemap_create(void)
136 {
137 	filemap_t  *map;
138 
139 	map = pg_malloc(sizeof(filemap_t));
140 	map->first = map->last = NULL;
141 	map->nlist = 0;
142 	map->array = NULL;
143 	map->narray = 0;
144 
145 	Assert(filemap == NULL);
146 	filemap = map;
147 }
148 
149 /*
150  * Callback for processing source file list.
151  *
152  * This is called once for every file in the source server. We decide what
153  * action needs to be taken for the file, depending on whether the file
154  * exists in the target and whether the size matches.
155  */
156 void
process_source_file(const char * path,file_type_t type,size_t newsize,const char * link_target)157 process_source_file(const char *path, file_type_t type, size_t newsize,
158 					const char *link_target)
159 {
160 	bool		exists;
161 	char		localpath[MAXPGPATH];
162 	struct stat statbuf;
163 	filemap_t  *map = filemap;
164 	file_action_t action = FILE_ACTION_NONE;
165 	size_t		oldsize = 0;
166 	file_entry_t *entry;
167 
168 	Assert(map->array == NULL);
169 
170 	/*
171 	 * Skip any files matching the exclusion filters. This has the effect to
172 	 * remove all those files on the target.
173 	 */
174 	if (check_file_excluded(path, true))
175 		return;
176 
177 	/*
178 	 * Pretend that pg_wal is a directory, even if it's really a symlink. We
179 	 * don't want to mess with the symlink itself, nor complain if it's a
180 	 * symlink in source but not in target or vice versa.
181 	 */
182 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
183 		type = FILE_TYPE_DIRECTORY;
184 
185 	/*
186 	 * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
187 	 * This has the effect that all temporary files in the destination will be
188 	 * removed.
189 	 */
190 	if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
191 		return;
192 	if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
193 		return;
194 
195 	/*
196 	 * sanity check: a filename that looks like a data file better be a
197 	 * regular file
198 	 */
199 	if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
200 		pg_fatal("data file \"%s\" in source is not a regular file", path);
201 
202 	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
203 
204 	/* Does the corresponding file exist in the target data dir? */
205 	if (lstat(localpath, &statbuf) < 0)
206 	{
207 		if (errno != ENOENT)
208 			pg_fatal("could not stat file \"%s\": %m",
209 					 localpath);
210 
211 		exists = false;
212 	}
213 	else
214 		exists = true;
215 
216 	switch (type)
217 	{
218 		case FILE_TYPE_DIRECTORY:
219 			if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
220 			{
221 				/* it's a directory in source, but not in target. Strange.. */
222 				pg_fatal("\"%s\" is not a directory", localpath);
223 			}
224 
225 			if (!exists)
226 				action = FILE_ACTION_CREATE;
227 			else
228 				action = FILE_ACTION_NONE;
229 			oldsize = 0;
230 			break;
231 
232 		case FILE_TYPE_SYMLINK:
233 			if (exists &&
234 #ifndef WIN32
235 				!S_ISLNK(statbuf.st_mode)
236 #else
237 				!pgwin32_is_junction(localpath)
238 #endif
239 				)
240 			{
241 				/*
242 				 * It's a symbolic link in source, but not in target.
243 				 * Strange..
244 				 */
245 				pg_fatal("\"%s\" is not a symbolic link", localpath);
246 			}
247 
248 			if (!exists)
249 				action = FILE_ACTION_CREATE;
250 			else
251 				action = FILE_ACTION_NONE;
252 			oldsize = 0;
253 			break;
254 
255 		case FILE_TYPE_REGULAR:
256 			if (exists && !S_ISREG(statbuf.st_mode))
257 				pg_fatal("\"%s\" is not a regular file", localpath);
258 
259 			if (!exists || !isRelDataFile(path))
260 			{
261 				/*
262 				 * File exists in source, but not in target. Or it's a
263 				 * non-data file that we have no special processing for. Copy
264 				 * it in toto.
265 				 *
266 				 * An exception: PG_VERSIONs should be identical, but avoid
267 				 * overwriting it for paranoia.
268 				 */
269 				if (pg_str_endswith(path, "PG_VERSION"))
270 				{
271 					action = FILE_ACTION_NONE;
272 					oldsize = statbuf.st_size;
273 				}
274 				else
275 				{
276 					action = FILE_ACTION_COPY;
277 					oldsize = 0;
278 				}
279 			}
280 			else
281 			{
282 				/*
283 				 * It's a data file that exists in both.
284 				 *
285 				 * If it's larger in target, we can truncate it. There will
286 				 * also be a WAL record of the truncation in the source
287 				 * system, so WAL replay would eventually truncate the target
288 				 * too, but we might as well do it now.
289 				 *
290 				 * If it's smaller in the target, it means that it has been
291 				 * truncated in the target, or enlarged in the source, or
292 				 * both. If it was truncated in the target, we need to copy
293 				 * the missing tail from the source system. If it was enlarged
294 				 * in the source system, there will be WAL records in the
295 				 * source system for the new blocks, so we wouldn't need to
296 				 * copy them here. But we don't know which scenario we're
297 				 * dealing with, and there's no harm in copying the missing
298 				 * blocks now, so do it now.
299 				 *
300 				 * If it's the same size, do nothing here. Any blocks modified
301 				 * in the target will be copied based on parsing the target
302 				 * system's WAL, and any blocks modified in the source will be
303 				 * updated after rewinding, when the source system's WAL is
304 				 * replayed.
305 				 */
306 				oldsize = statbuf.st_size;
307 				if (oldsize < newsize)
308 					action = FILE_ACTION_COPY_TAIL;
309 				else if (oldsize > newsize)
310 					action = FILE_ACTION_TRUNCATE;
311 				else
312 					action = FILE_ACTION_NONE;
313 			}
314 			break;
315 	}
316 
317 	/* Create a new entry for this file */
318 	entry = pg_malloc(sizeof(file_entry_t));
319 	entry->path = pg_strdup(path);
320 	entry->type = type;
321 	entry->action = action;
322 	entry->oldsize = oldsize;
323 	entry->newsize = newsize;
324 	entry->link_target = link_target ? pg_strdup(link_target) : NULL;
325 	entry->next = NULL;
326 	entry->pagemap.bitmap = NULL;
327 	entry->pagemap.bitmapsize = 0;
328 	entry->isrelfile = isRelDataFile(path);
329 
330 	if (map->last)
331 	{
332 		map->last->next = entry;
333 		map->last = entry;
334 	}
335 	else
336 		map->first = map->last = entry;
337 	map->nlist++;
338 }
339 
340 /*
341  * Callback for processing target file list.
342  *
343  * All source files must be already processed before calling this. This only
344  * marks target data directory's files that didn't exist in the source for
345  * deletion.
346  */
347 void
process_target_file(const char * path,file_type_t type,size_t oldsize,const char * link_target)348 process_target_file(const char *path, file_type_t type, size_t oldsize,
349 					const char *link_target)
350 {
351 	bool		exists;
352 	file_entry_t key;
353 	file_entry_t *key_ptr;
354 	filemap_t  *map = filemap;
355 	file_entry_t *entry;
356 
357 	/*
358 	 * Do not apply any exclusion filters here.  This has advantage to remove
359 	 * from the target data folder all paths which have been filtered out from
360 	 * the source data folder when processing the source files.
361 	 */
362 
363 	if (map->array == NULL)
364 	{
365 		/* on first call, initialize lookup array */
366 		if (map->nlist == 0)
367 		{
368 			/* should not happen */
369 			pg_fatal("source file list is empty");
370 		}
371 
372 		filemap_list_to_array(map);
373 
374 		Assert(map->array != NULL);
375 
376 		qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
377 	}
378 
379 	/*
380 	 * Like in process_source_file, pretend that xlog is always a  directory.
381 	 */
382 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
383 		type = FILE_TYPE_DIRECTORY;
384 
385 	key.path = (char *) path;
386 	key_ptr = &key;
387 	exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
388 					  path_cmp) != NULL);
389 
390 	/* Remove any file or folder that doesn't exist in the source system. */
391 	if (!exists)
392 	{
393 		entry = pg_malloc(sizeof(file_entry_t));
394 		entry->path = pg_strdup(path);
395 		entry->type = type;
396 		entry->action = FILE_ACTION_REMOVE;
397 		entry->oldsize = oldsize;
398 		entry->newsize = 0;
399 		entry->link_target = link_target ? pg_strdup(link_target) : NULL;
400 		entry->next = NULL;
401 		entry->pagemap.bitmap = NULL;
402 		entry->pagemap.bitmapsize = 0;
403 		entry->isrelfile = isRelDataFile(path);
404 
405 		if (map->last == NULL)
406 			map->first = entry;
407 		else
408 			map->last->next = entry;
409 		map->last = entry;
410 		map->nlist++;
411 	}
412 	else
413 	{
414 		/*
415 		 * We already handled all files that exist in the source system in
416 		 * process_source_file().
417 		 */
418 	}
419 }
420 
421 /*
422  * This callback gets called while we read the WAL in the target, for every
423  * block that have changed in the target system. It makes note of all the
424  * changed blocks in the pagemap of the file.
425  */
426 void
process_block_change(ForkNumber forknum,RelFileNode rnode,BlockNumber blkno)427 process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
428 {
429 	char	   *path;
430 	file_entry_t key;
431 	file_entry_t *key_ptr;
432 	file_entry_t *entry;
433 	BlockNumber blkno_inseg;
434 	int			segno;
435 	filemap_t  *map = filemap;
436 	file_entry_t **e;
437 
438 	Assert(map->array);
439 
440 	segno = blkno / RELSEG_SIZE;
441 	blkno_inseg = blkno % RELSEG_SIZE;
442 
443 	path = datasegpath(rnode, forknum, segno);
444 
445 	key.path = (char *) path;
446 	key_ptr = &key;
447 
448 	e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
449 				path_cmp);
450 	if (e)
451 		entry = *e;
452 	else
453 		entry = NULL;
454 	pfree(path);
455 
456 	if (entry)
457 	{
458 		Assert(entry->isrelfile);
459 
460 		switch (entry->action)
461 		{
462 			case FILE_ACTION_NONE:
463 			case FILE_ACTION_TRUNCATE:
464 				/* skip if we're truncating away the modified block anyway */
465 				if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
466 					datapagemap_add(&entry->pagemap, blkno_inseg);
467 				break;
468 
469 			case FILE_ACTION_COPY_TAIL:
470 
471 				/*
472 				 * skip the modified block if it is part of the "tail" that
473 				 * we're copying anyway.
474 				 */
475 				if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
476 					datapagemap_add(&entry->pagemap, blkno_inseg);
477 				break;
478 
479 			case FILE_ACTION_COPY:
480 			case FILE_ACTION_REMOVE:
481 				break;
482 
483 			case FILE_ACTION_CREATE:
484 				pg_fatal("unexpected page modification for directory or symbolic link \"%s\"", entry->path);
485 		}
486 	}
487 	else
488 	{
489 		/*
490 		 * If we don't have any record of this file in the file map, it means
491 		 * that it's a relation that doesn't exist in the source system, and
492 		 * it was subsequently removed in the target system, too. We can
493 		 * safely ignore it.
494 		 */
495 	}
496 }
497 
498 /*
499  * Is this the path of file that pg_rewind can skip copying?
500  */
501 static bool
check_file_excluded(const char * path,bool is_source)502 check_file_excluded(const char *path, bool is_source)
503 {
504 	char		localpath[MAXPGPATH];
505 	int			excludeIdx;
506 	const char *filename;
507 
508 	/* check individual files... */
509 	for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
510 	{
511 		int			cmplen = strlen(excludeFiles[excludeIdx].name);
512 
513 		filename = last_dir_separator(path);
514 		if (filename == NULL)
515 			filename = path;
516 		else
517 			filename++;
518 
519 		if (!excludeFiles[excludeIdx].match_prefix)
520 			cmplen++;
521 		if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
522 		{
523 			if (is_source)
524 				pg_log_debug("entry \"%s\" excluded from source file list",
525 							 path);
526 			else
527 				pg_log_debug("entry \"%s\" excluded from target file list",
528 							 path);
529 			return true;
530 		}
531 	}
532 
533 	/*
534 	 * ... And check some directories.  Note that this includes any contents
535 	 * within the directories themselves.
536 	 */
537 	for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
538 	{
539 		snprintf(localpath, sizeof(localpath), "%s/",
540 				 excludeDirContents[excludeIdx]);
541 		if (strstr(path, localpath) == path)
542 		{
543 			if (is_source)
544 				pg_log_debug("entry \"%s\" excluded from source file list",
545 							 path);
546 			else
547 				pg_log_debug("entry \"%s\" excluded from target file list",
548 							 path);
549 			return true;
550 		}
551 	}
552 
553 	return false;
554 }
555 
556 /*
557  * Convert the linked list of entries in map->first/last to the array,
558  * map->array.
559  */
560 static void
filemap_list_to_array(filemap_t * map)561 filemap_list_to_array(filemap_t *map)
562 {
563 	int			narray;
564 	file_entry_t *entry,
565 			   *next;
566 
567 	map->array = (file_entry_t **)
568 		pg_realloc(map->array,
569 				   (map->nlist + map->narray) * sizeof(file_entry_t *));
570 
571 	narray = map->narray;
572 	for (entry = map->first; entry != NULL; entry = next)
573 	{
574 		map->array[narray++] = entry;
575 		next = entry->next;
576 		entry->next = NULL;
577 	}
578 	Assert(narray == map->nlist + map->narray);
579 	map->narray = narray;
580 	map->nlist = 0;
581 	map->first = map->last = NULL;
582 }
583 
584 void
filemap_finalize(void)585 filemap_finalize(void)
586 {
587 	filemap_t  *map = filemap;
588 
589 	filemap_list_to_array(map);
590 	qsort(map->array, map->narray, sizeof(file_entry_t *),
591 		  final_filemap_cmp);
592 }
593 
594 static const char *
action_to_str(file_action_t action)595 action_to_str(file_action_t action)
596 {
597 	switch (action)
598 	{
599 		case FILE_ACTION_NONE:
600 			return "NONE";
601 		case FILE_ACTION_COPY:
602 			return "COPY";
603 		case FILE_ACTION_TRUNCATE:
604 			return "TRUNCATE";
605 		case FILE_ACTION_COPY_TAIL:
606 			return "COPY_TAIL";
607 		case FILE_ACTION_CREATE:
608 			return "CREATE";
609 		case FILE_ACTION_REMOVE:
610 			return "REMOVE";
611 
612 		default:
613 			return "unknown";
614 	}
615 }
616 
617 /*
618  * Calculate the totals needed for progress reports.
619  */
620 void
calculate_totals(void)621 calculate_totals(void)
622 {
623 	file_entry_t *entry;
624 	int			i;
625 	filemap_t  *map = filemap;
626 
627 	map->total_size = 0;
628 	map->fetch_size = 0;
629 
630 	for (i = 0; i < map->narray; i++)
631 	{
632 		entry = map->array[i];
633 
634 		if (entry->type != FILE_TYPE_REGULAR)
635 			continue;
636 
637 		map->total_size += entry->newsize;
638 
639 		if (entry->action == FILE_ACTION_COPY)
640 		{
641 			map->fetch_size += entry->newsize;
642 			continue;
643 		}
644 
645 		if (entry->action == FILE_ACTION_COPY_TAIL)
646 			map->fetch_size += (entry->newsize - entry->oldsize);
647 
648 		if (entry->pagemap.bitmapsize > 0)
649 		{
650 			datapagemap_iterator_t *iter;
651 			BlockNumber blk;
652 
653 			iter = datapagemap_iterate(&entry->pagemap);
654 			while (datapagemap_next(iter, &blk))
655 				map->fetch_size += BLCKSZ;
656 
657 			pg_free(iter);
658 		}
659 	}
660 }
661 
662 void
print_filemap(void)663 print_filemap(void)
664 {
665 	filemap_t  *map = filemap;
666 	file_entry_t *entry;
667 	int			i;
668 
669 	for (i = 0; i < map->narray; i++)
670 	{
671 		entry = map->array[i];
672 		if (entry->action != FILE_ACTION_NONE ||
673 			entry->pagemap.bitmapsize > 0)
674 		{
675 			pg_log_debug("%s (%s)", entry->path,
676 						 action_to_str(entry->action));
677 
678 			if (entry->pagemap.bitmapsize > 0)
679 				datapagemap_print(&entry->pagemap);
680 		}
681 	}
682 	fflush(stdout);
683 }
684 
685 /*
686  * Does it look like a relation data file?
687  *
688  * For our purposes, only files belonging to the main fork are considered
689  * relation files. Other forks are always copied in toto, because we cannot
690  * reliably track changes to them, because WAL only contains block references
691  * for the main fork.
692  */
693 static bool
isRelDataFile(const char * path)694 isRelDataFile(const char *path)
695 {
696 	RelFileNode rnode;
697 	unsigned int segNo;
698 	int			nmatch;
699 	bool		matched;
700 
701 	/*----
702 	 * Relation data files can be in one of the following directories:
703 	 *
704 	 * global/
705 	 *		shared relations
706 	 *
707 	 * base/<db oid>/
708 	 *		regular relations, default tablespace
709 	 *
710 	 * pg_tblspc/<tblspc oid>/<tblspc version>/
711 	 *		within a non-default tablespace (the name of the directory
712 	 *		depends on version)
713 	 *
714 	 * And the relation data files themselves have a filename like:
715 	 *
716 	 * <oid>.<segment number>
717 	 *
718 	 *----
719 	 */
720 	rnode.spcNode = InvalidOid;
721 	rnode.dbNode = InvalidOid;
722 	rnode.relNode = InvalidOid;
723 	segNo = 0;
724 	matched = false;
725 
726 	nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo);
727 	if (nmatch == 1 || nmatch == 2)
728 	{
729 		rnode.spcNode = GLOBALTABLESPACE_OID;
730 		rnode.dbNode = 0;
731 		matched = true;
732 	}
733 	else
734 	{
735 		nmatch = sscanf(path, "base/%u/%u.%u",
736 						&rnode.dbNode, &rnode.relNode, &segNo);
737 		if (nmatch == 2 || nmatch == 3)
738 		{
739 			rnode.spcNode = DEFAULTTABLESPACE_OID;
740 			matched = true;
741 		}
742 		else
743 		{
744 			nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
745 							&rnode.spcNode, &rnode.dbNode, &rnode.relNode,
746 							&segNo);
747 			if (nmatch == 3 || nmatch == 4)
748 				matched = true;
749 		}
750 	}
751 
752 	/*
753 	 * The sscanf tests above can match files that have extra characters at
754 	 * the end. To eliminate such cases, cross-check that GetRelationPath
755 	 * creates the exact same filename, when passed the RelFileNode
756 	 * information we extracted from the filename.
757 	 */
758 	if (matched)
759 	{
760 		char	   *check_path = datasegpath(rnode, MAIN_FORKNUM, segNo);
761 
762 		if (strcmp(check_path, path) != 0)
763 			matched = false;
764 
765 		pfree(check_path);
766 	}
767 
768 	return matched;
769 }
770 
771 /*
772  * A helper function to create the path of a relation file and segment.
773  *
774  * The returned path is palloc'd
775  */
776 static char *
datasegpath(RelFileNode rnode,ForkNumber forknum,BlockNumber segno)777 datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
778 {
779 	char	   *path;
780 	char	   *segpath;
781 
782 	path = relpathperm(rnode, forknum);
783 	if (segno > 0)
784 	{
785 		segpath = psprintf("%s.%u", path, segno);
786 		pfree(path);
787 		return segpath;
788 	}
789 	else
790 		return path;
791 }
792 
793 static int
path_cmp(const void * a,const void * b)794 path_cmp(const void *a, const void *b)
795 {
796 	file_entry_t *fa = *((file_entry_t **) a);
797 	file_entry_t *fb = *((file_entry_t **) b);
798 
799 	return strcmp(fa->path, fb->path);
800 }
801 
802 /*
803  * In the final stage, the filemap is sorted so that removals come last.
804  * From disk space usage point of view, it would be better to do removals
805  * first, but for now, safety first. If a whole directory is deleted, all
806  * files and subdirectories inside it need to removed first. On creation,
807  * parent directory needs to be created before files and directories inside
808  * it. To achieve that, the file_action_t enum is ordered so that we can
809  * just sort on that first. Furthermore, sort REMOVE entries in reverse
810  * path order, so that "foo/bar" subdirectory is removed before "foo".
811  */
812 static int
final_filemap_cmp(const void * a,const void * b)813 final_filemap_cmp(const void *a, const void *b)
814 {
815 	file_entry_t *fa = *((file_entry_t **) a);
816 	file_entry_t *fb = *((file_entry_t **) b);
817 
818 	if (fa->action > fb->action)
819 		return 1;
820 	if (fa->action < fb->action)
821 		return -1;
822 
823 	if (fa->action == FILE_ACTION_REMOVE)
824 		return strcmp(fb->path, fa->path);
825 	else
826 		return strcmp(fa->path, fb->path);
827 }
828