1 /*-------------------------------------------------------------------------
2  *
3  * filemap.c
4  *	  A data structure for keeping track of files that have changed.
5  *
6  * Copyright (c) 2013-2019, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 
11 #include "postgres_fe.h"
12 
13 #include <sys/stat.h>
14 #include <unistd.h>
15 
16 #include "datapagemap.h"
17 #include "filemap.h"
18 #include "pg_rewind.h"
19 
20 #include "common/string.h"
21 #include "catalog/pg_tablespace_d.h"
22 #include "storage/fd.h"
23 
24 filemap_t  *filemap = NULL;
25 
26 static bool isRelDataFile(const char *path);
27 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
28 						 BlockNumber segno);
29 static int	path_cmp(const void *a, const void *b);
30 static int	final_filemap_cmp(const void *a, const void *b);
31 static void filemap_list_to_array(filemap_t *map);
32 static bool check_file_excluded(const char *path, bool is_source);
33 
34 /*
35  * Definition of one element part of an exclusion list, used to exclude
36  * contents when rewinding.  "name" is the name of the file or path to
37  * check for exclusion.  If "match_prefix" is true, any items matching
38  * the name as prefix are excluded.
39  */
40 struct exclude_list_item
41 {
42 	const char *name;
43 	bool		match_prefix;
44 };
45 
46 /*
47  * The contents of these directories are removed or recreated during server
48  * start so they are not included in data processed by pg_rewind.
49  *
50  * Note: those lists should be kept in sync with what basebackup.c provides.
51  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
52  * they are defined in backend-only headers.  So this list is maintained
53  * with a best effort in mind.
54  */
55 static const char *excludeDirContents[] =
56 {
57 	/*
58 	 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
59 	 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
60 	 * created there.
61 	 */
62 	"pg_stat_tmp",				/* defined as PG_STAT_TMP_DIR */
63 
64 	/*
65 	 * It is generally not useful to backup the contents of this directory
66 	 * even if the intention is to restore to another master. See backup.sgml
67 	 * for a more detailed description.
68 	 */
69 	"pg_replslot",
70 
71 	/* Contents removed on startup, see dsm_cleanup_for_mmap(). */
72 	"pg_dynshmem",				/* defined as PG_DYNSHMEM_DIR */
73 
74 	/* Contents removed on startup, see AsyncShmemInit(). */
75 	"pg_notify",
76 
77 	/*
78 	 * Old contents are loaded for possible debugging but are not required for
79 	 * normal operation, see OldSerXidInit().
80 	 */
81 	"pg_serial",
82 
83 	/* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
84 	"pg_snapshots",
85 
86 	/* Contents zeroed on startup, see StartupSUBTRANS(). */
87 	"pg_subtrans",
88 
89 	/* end of list */
90 	NULL
91 };
92 
93 /*
94  * List of files excluded from filemap processing.   Files are excluded
95  * if their prefix match.
96  */
97 static const struct exclude_list_item excludeFiles[] =
98 {
99 	/* Skip auto conf temporary file. */
100 	{"postgresql.auto.conf.tmp", false},	/* defined as PG_AUTOCONF_FILENAME */
101 
102 	/* Skip current log file temporary file */
103 	{"current_logfiles.tmp", false},	/* defined as
104 										 * LOG_METAINFO_DATAFILE_TMP */
105 
106 	/* Skip relation cache because it is rebuilt on startup */
107 	{"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
108 
109 	/*
110 	 * If there's a backup_label or tablespace_map file, it belongs to a
111 	 * backup started by the user with pg_start_backup().  It is *not* correct
112 	 * for this backup.  Our backup_label is written later on separately.
113 	 */
114 	{"backup_label", false},	/* defined as BACKUP_LABEL_FILE */
115 	{"tablespace_map", false},	/* defined as TABLESPACE_MAP */
116 
117 	{"postmaster.pid", false},
118 	{"postmaster.opts", false},
119 
120 	/* end of list */
121 	{NULL, false}
122 };
123 
124 /*
125  * Create a new file map (stored in the global pointer "filemap").
126  */
127 void
filemap_create(void)128 filemap_create(void)
129 {
130 	filemap_t  *map;
131 
132 	map = pg_malloc(sizeof(filemap_t));
133 	map->first = map->last = NULL;
134 	map->nlist = 0;
135 	map->array = NULL;
136 	map->narray = 0;
137 
138 	Assert(filemap == NULL);
139 	filemap = map;
140 }
141 
142 /*
143  * Callback for processing source file list.
144  *
145  * This is called once for every file in the source server. We decide what
146  * action needs to be taken for the file, depending on whether the file
147  * exists in the target and whether the size matches.
148  */
149 void
process_source_file(const char * path,file_type_t type,size_t newsize,const char * link_target)150 process_source_file(const char *path, file_type_t type, size_t newsize,
151 					const char *link_target)
152 {
153 	bool		exists;
154 	char		localpath[MAXPGPATH];
155 	struct stat statbuf;
156 	filemap_t  *map = filemap;
157 	file_action_t action = FILE_ACTION_NONE;
158 	size_t		oldsize = 0;
159 	file_entry_t *entry;
160 
161 	Assert(map->array == NULL);
162 
163 	/*
164 	 * Skip any files matching the exclusion filters. This has the effect to
165 	 * remove all those files on the target.
166 	 */
167 	if (check_file_excluded(path, true))
168 		return;
169 
170 	/*
171 	 * Pretend that pg_wal is a directory, even if it's really a symlink. We
172 	 * don't want to mess with the symlink itself, nor complain if it's a
173 	 * symlink in source but not in target or vice versa.
174 	 */
175 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
176 		type = FILE_TYPE_DIRECTORY;
177 
178 	/*
179 	 * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
180 	 * This has the effect that all temporary files in the destination will be
181 	 * removed.
182 	 */
183 	if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
184 		return;
185 	if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
186 		return;
187 
188 	/*
189 	 * sanity check: a filename that looks like a data file better be a
190 	 * regular file
191 	 */
192 	if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
193 		pg_fatal("data file \"%s\" in source is not a regular file", path);
194 
195 	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
196 
197 	/* Does the corresponding file exist in the target data dir? */
198 	if (lstat(localpath, &statbuf) < 0)
199 	{
200 		if (errno != ENOENT)
201 			pg_fatal("could not stat file \"%s\": %m",
202 					 localpath);
203 
204 		exists = false;
205 	}
206 	else
207 		exists = true;
208 
209 	switch (type)
210 	{
211 		case FILE_TYPE_DIRECTORY:
212 			if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
213 			{
214 				/* it's a directory in source, but not in target. Strange.. */
215 				pg_fatal("\"%s\" is not a directory", localpath);
216 			}
217 
218 			if (!exists)
219 				action = FILE_ACTION_CREATE;
220 			else
221 				action = FILE_ACTION_NONE;
222 			oldsize = 0;
223 			break;
224 
225 		case FILE_TYPE_SYMLINK:
226 			if (exists &&
227 #ifndef WIN32
228 				!S_ISLNK(statbuf.st_mode)
229 #else
230 				!pgwin32_is_junction(localpath)
231 #endif
232 				)
233 			{
234 				/*
235 				 * It's a symbolic link in source, but not in target.
236 				 * Strange..
237 				 */
238 				pg_fatal("\"%s\" is not a symbolic link", localpath);
239 			}
240 
241 			if (!exists)
242 				action = FILE_ACTION_CREATE;
243 			else
244 				action = FILE_ACTION_NONE;
245 			oldsize = 0;
246 			break;
247 
248 		case FILE_TYPE_REGULAR:
249 			if (exists && !S_ISREG(statbuf.st_mode))
250 				pg_fatal("\"%s\" is not a regular file", localpath);
251 
252 			if (!exists || !isRelDataFile(path))
253 			{
254 				/*
255 				 * File exists in source, but not in target. Or it's a
256 				 * non-data file that we have no special processing for. Copy
257 				 * it in toto.
258 				 *
259 				 * An exception: PG_VERSIONs should be identical, but avoid
260 				 * overwriting it for paranoia.
261 				 */
262 				if (pg_str_endswith(path, "PG_VERSION"))
263 				{
264 					action = FILE_ACTION_NONE;
265 					oldsize = statbuf.st_size;
266 				}
267 				else
268 				{
269 					action = FILE_ACTION_COPY;
270 					oldsize = 0;
271 				}
272 			}
273 			else
274 			{
275 				/*
276 				 * It's a data file that exists in both.
277 				 *
278 				 * If it's larger in target, we can truncate it. There will
279 				 * also be a WAL record of the truncation in the source
280 				 * system, so WAL replay would eventually truncate the target
281 				 * too, but we might as well do it now.
282 				 *
283 				 * If it's smaller in the target, it means that it has been
284 				 * truncated in the target, or enlarged in the source, or
285 				 * both. If it was truncated in the target, we need to copy
286 				 * the missing tail from the source system. If it was enlarged
287 				 * in the source system, there will be WAL records in the
288 				 * source system for the new blocks, so we wouldn't need to
289 				 * copy them here. But we don't know which scenario we're
290 				 * dealing with, and there's no harm in copying the missing
291 				 * blocks now, so do it now.
292 				 *
293 				 * If it's the same size, do nothing here. Any blocks modified
294 				 * in the target will be copied based on parsing the target
295 				 * system's WAL, and any blocks modified in the source will be
296 				 * updated after rewinding, when the source system's WAL is
297 				 * replayed.
298 				 */
299 				oldsize = statbuf.st_size;
300 				if (oldsize < newsize)
301 					action = FILE_ACTION_COPY_TAIL;
302 				else if (oldsize > newsize)
303 					action = FILE_ACTION_TRUNCATE;
304 				else
305 					action = FILE_ACTION_NONE;
306 			}
307 			break;
308 	}
309 
310 	/* Create a new entry for this file */
311 	entry = pg_malloc(sizeof(file_entry_t));
312 	entry->path = pg_strdup(path);
313 	entry->type = type;
314 	entry->action = action;
315 	entry->oldsize = oldsize;
316 	entry->newsize = newsize;
317 	entry->link_target = link_target ? pg_strdup(link_target) : NULL;
318 	entry->next = NULL;
319 	entry->pagemap.bitmap = NULL;
320 	entry->pagemap.bitmapsize = 0;
321 	entry->isrelfile = isRelDataFile(path);
322 
323 	if (map->last)
324 	{
325 		map->last->next = entry;
326 		map->last = entry;
327 	}
328 	else
329 		map->first = map->last = entry;
330 	map->nlist++;
331 }
332 
333 /*
334  * Callback for processing target file list.
335  *
336  * All source files must be already processed before calling this. This only
337  * marks target data directory's files that didn't exist in the source for
338  * deletion.
339  */
340 void
process_target_file(const char * path,file_type_t type,size_t oldsize,const char * link_target)341 process_target_file(const char *path, file_type_t type, size_t oldsize,
342 					const char *link_target)
343 {
344 	bool		exists;
345 	file_entry_t key;
346 	file_entry_t *key_ptr;
347 	filemap_t  *map = filemap;
348 	file_entry_t *entry;
349 
350 	/*
351 	 * Do not apply any exclusion filters here.  This has advantage to remove
352 	 * from the target data folder all paths which have been filtered out from
353 	 * the source data folder when processing the source files.
354 	 */
355 
356 	if (map->array == NULL)
357 	{
358 		/* on first call, initialize lookup array */
359 		if (map->nlist == 0)
360 		{
361 			/* should not happen */
362 			pg_fatal("source file list is empty");
363 		}
364 
365 		filemap_list_to_array(map);
366 
367 		Assert(map->array != NULL);
368 
369 		qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
370 	}
371 
372 	/*
373 	 * Like in process_source_file, pretend that xlog is always a  directory.
374 	 */
375 	if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
376 		type = FILE_TYPE_DIRECTORY;
377 
378 	key.path = (char *) path;
379 	key_ptr = &key;
380 	exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
381 					  path_cmp) != NULL);
382 
383 	/* Remove any file or folder that doesn't exist in the source system. */
384 	if (!exists)
385 	{
386 		entry = pg_malloc(sizeof(file_entry_t));
387 		entry->path = pg_strdup(path);
388 		entry->type = type;
389 		entry->action = FILE_ACTION_REMOVE;
390 		entry->oldsize = oldsize;
391 		entry->newsize = 0;
392 		entry->link_target = link_target ? pg_strdup(link_target) : NULL;
393 		entry->next = NULL;
394 		entry->pagemap.bitmap = NULL;
395 		entry->pagemap.bitmapsize = 0;
396 		entry->isrelfile = isRelDataFile(path);
397 
398 		if (map->last == NULL)
399 			map->first = entry;
400 		else
401 			map->last->next = entry;
402 		map->last = entry;
403 		map->nlist++;
404 	}
405 	else
406 	{
407 		/*
408 		 * We already handled all files that exist in the source system in
409 		 * process_source_file().
410 		 */
411 	}
412 }
413 
414 /*
415  * This callback gets called while we read the WAL in the target, for every
416  * block that have changed in the target system. It makes note of all the
417  * changed blocks in the pagemap of the file.
418  */
419 void
process_block_change(ForkNumber forknum,RelFileNode rnode,BlockNumber blkno)420 process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
421 {
422 	char	   *path;
423 	file_entry_t key;
424 	file_entry_t *key_ptr;
425 	file_entry_t *entry;
426 	BlockNumber blkno_inseg;
427 	int			segno;
428 	filemap_t  *map = filemap;
429 	file_entry_t **e;
430 
431 	Assert(map->array);
432 
433 	segno = blkno / RELSEG_SIZE;
434 	blkno_inseg = blkno % RELSEG_SIZE;
435 
436 	path = datasegpath(rnode, forknum, segno);
437 
438 	key.path = (char *) path;
439 	key_ptr = &key;
440 
441 	e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
442 				path_cmp);
443 	if (e)
444 		entry = *e;
445 	else
446 		entry = NULL;
447 	pfree(path);
448 
449 	if (entry)
450 	{
451 		Assert(entry->isrelfile);
452 
453 		switch (entry->action)
454 		{
455 			case FILE_ACTION_NONE:
456 			case FILE_ACTION_TRUNCATE:
457 				/* skip if we're truncating away the modified block anyway */
458 				if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
459 					datapagemap_add(&entry->pagemap, blkno_inseg);
460 				break;
461 
462 			case FILE_ACTION_COPY_TAIL:
463 
464 				/*
465 				 * skip the modified block if it is part of the "tail" that
466 				 * we're copying anyway.
467 				 */
468 				if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
469 					datapagemap_add(&entry->pagemap, blkno_inseg);
470 				break;
471 
472 			case FILE_ACTION_COPY:
473 			case FILE_ACTION_REMOVE:
474 				break;
475 
476 			case FILE_ACTION_CREATE:
477 				pg_fatal("unexpected page modification for directory or symbolic link \"%s\"", entry->path);
478 		}
479 	}
480 	else
481 	{
482 		/*
483 		 * If we don't have any record of this file in the file map, it means
484 		 * that it's a relation that doesn't exist in the source system, and
485 		 * it was subsequently removed in the target system, too. We can
486 		 * safely ignore it.
487 		 */
488 	}
489 }
490 
491 /*
492  * Is this the path of file that pg_rewind can skip copying?
493  */
494 static bool
check_file_excluded(const char * path,bool is_source)495 check_file_excluded(const char *path, bool is_source)
496 {
497 	char		localpath[MAXPGPATH];
498 	int			excludeIdx;
499 	const char *filename;
500 
501 	/* check individual files... */
502 	for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
503 	{
504 		int			cmplen = strlen(excludeFiles[excludeIdx].name);
505 
506 		filename = last_dir_separator(path);
507 		if (filename == NULL)
508 			filename = path;
509 		else
510 			filename++;
511 
512 		if (!excludeFiles[excludeIdx].match_prefix)
513 			cmplen++;
514 		if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
515 		{
516 			if (is_source)
517 				pg_log_debug("entry \"%s\" excluded from source file list",
518 							 path);
519 			else
520 				pg_log_debug("entry \"%s\" excluded from target file list",
521 							 path);
522 			return true;
523 		}
524 	}
525 
526 	/*
527 	 * ... And check some directories.  Note that this includes any contents
528 	 * within the directories themselves.
529 	 */
530 	for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
531 	{
532 		snprintf(localpath, sizeof(localpath), "%s/",
533 				 excludeDirContents[excludeIdx]);
534 		if (strstr(path, localpath) == path)
535 		{
536 			if (is_source)
537 				pg_log_debug("entry \"%s\" excluded from source file list",
538 							 path);
539 			else
540 				pg_log_debug("entry \"%s\" excluded from target file list",
541 							 path);
542 			return true;
543 		}
544 	}
545 
546 	return false;
547 }
548 
549 /*
550  * Convert the linked list of entries in map->first/last to the array,
551  * map->array.
552  */
553 static void
filemap_list_to_array(filemap_t * map)554 filemap_list_to_array(filemap_t *map)
555 {
556 	int			narray;
557 	file_entry_t *entry,
558 			   *next;
559 
560 	map->array = (file_entry_t **)
561 		pg_realloc(map->array,
562 				   (map->nlist + map->narray) * sizeof(file_entry_t *));
563 
564 	narray = map->narray;
565 	for (entry = map->first; entry != NULL; entry = next)
566 	{
567 		map->array[narray++] = entry;
568 		next = entry->next;
569 		entry->next = NULL;
570 	}
571 	Assert(narray == map->nlist + map->narray);
572 	map->narray = narray;
573 	map->nlist = 0;
574 	map->first = map->last = NULL;
575 }
576 
577 void
filemap_finalize(void)578 filemap_finalize(void)
579 {
580 	filemap_t  *map = filemap;
581 
582 	filemap_list_to_array(map);
583 	qsort(map->array, map->narray, sizeof(file_entry_t *),
584 		  final_filemap_cmp);
585 }
586 
587 static const char *
action_to_str(file_action_t action)588 action_to_str(file_action_t action)
589 {
590 	switch (action)
591 	{
592 		case FILE_ACTION_NONE:
593 			return "NONE";
594 		case FILE_ACTION_COPY:
595 			return "COPY";
596 		case FILE_ACTION_TRUNCATE:
597 			return "TRUNCATE";
598 		case FILE_ACTION_COPY_TAIL:
599 			return "COPY_TAIL";
600 		case FILE_ACTION_CREATE:
601 			return "CREATE";
602 		case FILE_ACTION_REMOVE:
603 			return "REMOVE";
604 
605 		default:
606 			return "unknown";
607 	}
608 }
609 
610 /*
611  * Calculate the totals needed for progress reports.
612  */
613 void
calculate_totals(void)614 calculate_totals(void)
615 {
616 	file_entry_t *entry;
617 	int			i;
618 	filemap_t  *map = filemap;
619 
620 	map->total_size = 0;
621 	map->fetch_size = 0;
622 
623 	for (i = 0; i < map->narray; i++)
624 	{
625 		entry = map->array[i];
626 
627 		if (entry->type != FILE_TYPE_REGULAR)
628 			continue;
629 
630 		map->total_size += entry->newsize;
631 
632 		if (entry->action == FILE_ACTION_COPY)
633 		{
634 			map->fetch_size += entry->newsize;
635 			continue;
636 		}
637 
638 		if (entry->action == FILE_ACTION_COPY_TAIL)
639 			map->fetch_size += (entry->newsize - entry->oldsize);
640 
641 		if (entry->pagemap.bitmapsize > 0)
642 		{
643 			datapagemap_iterator_t *iter;
644 			BlockNumber blk;
645 
646 			iter = datapagemap_iterate(&entry->pagemap);
647 			while (datapagemap_next(iter, &blk))
648 				map->fetch_size += BLCKSZ;
649 
650 			pg_free(iter);
651 		}
652 	}
653 }
654 
655 void
print_filemap(void)656 print_filemap(void)
657 {
658 	filemap_t  *map = filemap;
659 	file_entry_t *entry;
660 	int			i;
661 
662 	for (i = 0; i < map->narray; i++)
663 	{
664 		entry = map->array[i];
665 		if (entry->action != FILE_ACTION_NONE ||
666 			entry->pagemap.bitmapsize > 0)
667 		{
668 			pg_log_debug("%s (%s)", entry->path,
669 						 action_to_str(entry->action));
670 
671 			if (entry->pagemap.bitmapsize > 0)
672 				datapagemap_print(&entry->pagemap);
673 		}
674 	}
675 	fflush(stdout);
676 }
677 
678 /*
679  * Does it look like a relation data file?
680  *
681  * For our purposes, only files belonging to the main fork are considered
682  * relation files. Other forks are always copied in toto, because we cannot
683  * reliably track changes to them, because WAL only contains block references
684  * for the main fork.
685  */
686 static bool
isRelDataFile(const char * path)687 isRelDataFile(const char *path)
688 {
689 	RelFileNode rnode;
690 	unsigned int segNo;
691 	int			nmatch;
692 	bool		matched;
693 
694 	/*----
695 	 * Relation data files can be in one of the following directories:
696 	 *
697 	 * global/
698 	 *		shared relations
699 	 *
700 	 * base/<db oid>/
701 	 *		regular relations, default tablespace
702 	 *
703 	 * pg_tblspc/<tblspc oid>/<tblspc version>/
704 	 *		within a non-default tablespace (the name of the directory
705 	 *		depends on version)
706 	 *
707 	 * And the relation data files themselves have a filename like:
708 	 *
709 	 * <oid>.<segment number>
710 	 *
711 	 *----
712 	 */
713 	rnode.spcNode = InvalidOid;
714 	rnode.dbNode = InvalidOid;
715 	rnode.relNode = InvalidOid;
716 	segNo = 0;
717 	matched = false;
718 
719 	nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo);
720 	if (nmatch == 1 || nmatch == 2)
721 	{
722 		rnode.spcNode = GLOBALTABLESPACE_OID;
723 		rnode.dbNode = 0;
724 		matched = true;
725 	}
726 	else
727 	{
728 		nmatch = sscanf(path, "base/%u/%u.%u",
729 						&rnode.dbNode, &rnode.relNode, &segNo);
730 		if (nmatch == 2 || nmatch == 3)
731 		{
732 			rnode.spcNode = DEFAULTTABLESPACE_OID;
733 			matched = true;
734 		}
735 		else
736 		{
737 			nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
738 							&rnode.spcNode, &rnode.dbNode, &rnode.relNode,
739 							&segNo);
740 			if (nmatch == 3 || nmatch == 4)
741 				matched = true;
742 		}
743 	}
744 
745 	/*
746 	 * The sscanf tests above can match files that have extra characters at
747 	 * the end. To eliminate such cases, cross-check that GetRelationPath
748 	 * creates the exact same filename, when passed the RelFileNode
749 	 * information we extracted from the filename.
750 	 */
751 	if (matched)
752 	{
753 		char	   *check_path = datasegpath(rnode, MAIN_FORKNUM, segNo);
754 
755 		if (strcmp(check_path, path) != 0)
756 			matched = false;
757 
758 		pfree(check_path);
759 	}
760 
761 	return matched;
762 }
763 
764 /*
765  * A helper function to create the path of a relation file and segment.
766  *
767  * The returned path is palloc'd
768  */
769 static char *
datasegpath(RelFileNode rnode,ForkNumber forknum,BlockNumber segno)770 datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
771 {
772 	char	   *path;
773 	char	   *segpath;
774 
775 	path = relpathperm(rnode, forknum);
776 	if (segno > 0)
777 	{
778 		segpath = psprintf("%s.%u", path, segno);
779 		pfree(path);
780 		return segpath;
781 	}
782 	else
783 		return path;
784 }
785 
786 static int
path_cmp(const void * a,const void * b)787 path_cmp(const void *a, const void *b)
788 {
789 	file_entry_t *fa = *((file_entry_t **) a);
790 	file_entry_t *fb = *((file_entry_t **) b);
791 
792 	return strcmp(fa->path, fb->path);
793 }
794 
795 /*
796  * In the final stage, the filemap is sorted so that removals come last.
797  * From disk space usage point of view, it would be better to do removals
798  * first, but for now, safety first. If a whole directory is deleted, all
799  * files and subdirectories inside it need to removed first. On creation,
800  * parent directory needs to be created before files and directories inside
801  * it. To achieve that, the file_action_t enum is ordered so that we can
802  * just sort on that first. Furthermore, sort REMOVE entries in reverse
803  * path order, so that "foo/bar" subdirectory is removed before "foo".
804  */
805 static int
final_filemap_cmp(const void * a,const void * b)806 final_filemap_cmp(const void *a, const void *b)
807 {
808 	file_entry_t *fa = *((file_entry_t **) a);
809 	file_entry_t *fb = *((file_entry_t **) b);
810 
811 	if (fa->action > fb->action)
812 		return 1;
813 	if (fa->action < fb->action)
814 		return -1;
815 
816 	if (fa->action == FILE_ACTION_REMOVE)
817 		return strcmp(fb->path, fa->path);
818 	else
819 		return strcmp(fa->path, fb->path);
820 }
821