1 /**
2  *  This file is part of rmlint.
3  *
4  *  rmlint is free software: you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation, either version 3 of the License, or
7  *  (at your option) any later version.
8  *
9  *  rmlint is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with rmlint.  If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Authors:
18  *
19  *  - Christopher <sahib> Pahl 2010-2020 (https://github.com/sahib)
20  *  - Daniel <SeeSpotRun> T.   2014-2020 (https://github.com/SeeSpotRun)
21  *
22  * Hosted on http://github.com/sahib/rmlint
23  *
24  */
25 
26 #include <ctype.h>
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 
33 #include "config.h"
34 #include "session.h"
35 
36 /* Be safe: This header is not essential and might be missing on some systems.
37  * We only include it here, because it fixes some recent warning...
38  * */
39 #if HAVE_SYSMACROS_H
40 # include <sys/sysmacros.h>
41 #endif
42 
43 
44 #include <sys/ioctl.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 
48 #include <grp.h>
49 #include <pwd.h>
50 
51 #include <libgen.h>
52 
53 /* Not available there,
54  * but might be on other non-linux systems
55  * */
56 #if HAVE_GIO_UNIX
57 #include <gio/gunixmounts.h>
58 #endif
59 
60 #if HAVE_FIEMAP
61 #include <linux/fiemap.h>
62 #include <linux/fs.h>
63 #endif
64 
65 /* Internal headers */
66 #include "config.h"
67 #include "file.h"
68 #include "utilities.h"
69 
70 /* External libraries */
71 #include <glib.h>
72 
73 #if HAVE_LIBELF
74 #include <gelf.h>
75 #include <libelf.h>
76 #endif
77 
78 #if HAVE_BLKID
79 #include <blkid/blkid.h>
80 #endif
81 
82 #if HAVE_JSON_GLIB
83 #include <json-glib/json-glib.h>
84 #endif
85 
86 #define RM_MOUNTTABLE_IS_USABLE (HAVE_BLKID && HAVE_GIO_UNIX)
87 
88 ////////////////////////////////////
89 //       GENERAL UTILITES         //
90 ////////////////////////////////////
91 
rm_util_strsub(const char * string,const char * subs,const char * with)92 char *rm_util_strsub(const char *string, const char *subs, const char *with) {
93     gchar *result = NULL;
94     if(string != NULL && string[0] != '\0') {
95         gchar **split = g_strsplit(string, subs, 0);
96         if(split != NULL) {
97             result = g_strjoinv(with, split);
98         }
99         g_strfreev(split);
100     }
101     return result;
102 }
103 
rm_util_basename(const char * filename)104 char *rm_util_basename(const char *filename) {
105     char *base = strrchr(filename, G_DIR_SEPARATOR);
106     if(base != NULL) {
107         /* Return a pointer to the part behind it
108          * (which may be the empty string)
109          * */
110         return base + 1;
111     }
112 
113     /* It's the full path anyway */
114     return (char *)filename;
115 }
116 
rm_util_path_extension(const char * basename)117 char *rm_util_path_extension(const char *basename) {
118     char *point = strrchr(basename, '.');
119     if(point) {
120         return point + 1;
121     } else {
122         return NULL;
123     }
124 }
125 
rm_util_path_is_hidden(const char * path)126 bool rm_util_path_is_hidden(const char *path) {
127     if(path == NULL) {
128         return false;
129     }
130 
131     if(*path == '.') {
132         return true;
133     }
134 
135     while(*path++) {
136         /* Search for '/.' */
137         if(*path == G_DIR_SEPARATOR && *(path + 1) == '.') {
138             return true;
139         }
140     }
141 
142     return false;
143 }
144 
rm_util_path_depth(const char * path)145 int rm_util_path_depth(const char *path) {
146     int depth = 0;
147 
148     while(path && *path) {
149         /* Skip trailing slashes */
150         if(*path == G_DIR_SEPARATOR && path[1] != 0) {
151             depth++;
152         }
153         path = strchr(&path[1], G_DIR_SEPARATOR);
154     }
155 
156     return depth;
157 }
158 
rm_hash_table_setdefault(GHashTable * table,gpointer key,RmNewFunc default_func)159 GQueue *rm_hash_table_setdefault(GHashTable *table, gpointer key,
160                                  RmNewFunc default_func) {
161     gpointer value = g_hash_table_lookup(table, key);
162     if(value == NULL) {
163         value = default_func();
164         g_hash_table_insert(table, key, value);
165     }
166 
167     return value;
168 }
169 
rm_util_parent_node(const char * path)170 ino_t rm_util_parent_node(const char *path) {
171     char *parent_path = g_path_get_dirname(path);
172 
173     RmStat stat_buf;
174     if(!rm_sys_stat(parent_path, &stat_buf)) {
175         g_free(parent_path);
176         return stat_buf.st_ino;
177     } else {
178         g_free(parent_path);
179         return -1;
180     }
181 }
182 
rm_util_queue_push_tail_queue(GQueue * dest,GQueue * src)183 void rm_util_queue_push_tail_queue(GQueue *dest, GQueue *src) {
184     g_return_if_fail(dest);
185     g_return_if_fail(src);
186 
187     if(src->length == 0) {
188         return;
189     }
190 
191     src->head->prev = dest->tail;
192     if(dest->tail) {
193         dest->tail->next = src->head;
194     } else {
195         dest->head = src->head;
196     }
197     dest->tail = src->tail;
198     dest->length += src->length;
199     src->length = 0;
200     src->head = src->tail = NULL;
201 }
202 
rm_util_queue_foreach_remove(GQueue * queue,RmRFunc func,gpointer user_data)203 gint rm_util_queue_foreach_remove(GQueue *queue, RmRFunc func, gpointer user_data) {
204     gint removed = 0;
205 
206     for(GList *iter = queue->head, *next = NULL; iter; iter = next) {
207         next = iter->next;
208         if(func(iter->data, user_data)) {
209             g_queue_delete_link(queue, iter);
210             ++removed;
211         }
212     }
213     return removed;
214 }
215 
rm_util_list_foreach_remove(GList ** list,RmRFunc func,gpointer user_data)216 gint rm_util_list_foreach_remove(GList **list, RmRFunc func, gpointer user_data) {
217     gint removed = 0;
218 
219     /* iterate over list */
220     for(GList *iter = *list, *next = NULL; iter; iter = next) {
221         next = iter->next;
222         if(func(iter->data, user_data)) {
223             /* delete iter from GList */
224             if(iter->prev) {
225                 (iter->prev)->next = next;
226             } else {
227                 *list = next;
228             }
229             g_list_free_1(iter);
230             ++removed;
231         }
232     }
233     return removed;
234 }
235 
rm_util_slist_foreach_remove(GSList ** list,RmRFunc func,gpointer user_data)236 gint rm_util_slist_foreach_remove(GSList **list, RmRFunc func, gpointer user_data) {
237     gint removed = 0;
238 
239     /* iterate over list, keeping track of previous and next entries */
240     for(GSList *prev = NULL, *iter = *list, *next = NULL; iter; iter = next) {
241         next = iter->next;
242         if(func(iter->data, user_data)) {
243             /* delete iter from GSList */
244             g_slist_free1(iter);
245             if(prev) {
246                 prev->next = next;
247             } else {
248                 *list = next;
249             }
250             ++removed;
251         } else {
252             prev = iter;
253         }
254     }
255     return removed;
256 }
257 
rm_util_slist_pop(GSList ** list,GMutex * lock)258 gpointer rm_util_slist_pop(GSList **list, GMutex *lock) {
259     gpointer result = NULL;
260     if(lock) {
261         g_mutex_lock(lock);
262     }
263     if(*list) {
264         result = (*list)->data;
265         *list = g_slist_delete_link(*list, *list);
266     }
267     if(lock) {
268         g_mutex_unlock(lock);
269     }
270     return result;
271 }
272 
273 /* checks uid and gid; returns 0 if both ok, else RM_LINT_TYPE_ corresponding *
274  * to RmFile->filter types
275  * */
rm_util_uid_gid_check(RmStat * statp,RmUserList * userlist)276 int rm_util_uid_gid_check(RmStat *statp, RmUserList *userlist) {
277     bool has_gid = 1, has_uid = 1;
278     if(!rm_userlist_contains(userlist, statp->st_uid, statp->st_gid, &has_uid,
279                              &has_gid)) {
280         if(has_gid == false && has_uid == false) {
281             return RM_LINT_TYPE_BADUGID;
282         } else if(has_gid == false && has_uid == true) {
283             return RM_LINT_TYPE_BADGID;
284         } else if(has_gid == true && has_uid == false) {
285             return RM_LINT_TYPE_BADUID;
286         }
287     }
288 
289     return RM_LINT_TYPE_UNKNOWN;
290 }
291 
292 /* Method to test if a file is non stripped binary. Uses libelf*/
rm_util_is_nonstripped(_UNUSED const char * path,_UNUSED RmStat * statp)293 bool rm_util_is_nonstripped(_UNUSED const char *path, _UNUSED RmStat *statp) {
294     bool is_ns = false;
295 
296 #if HAVE_LIBELF
297     g_return_val_if_fail(path, false);
298 
299     if(statp && (statp->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
300         return false;
301     }
302 
303     /* inspired by "jschmier"'s answer at http://stackoverflow.com/a/5159890 */
304     int fd;
305 
306     /* ELF handle */
307     Elf *elf;
308 
309     /* section descriptor pointer */
310     Elf_Scn *scn;
311 
312     /* section header */
313     GElf_Shdr shdr;
314 
315     /* Open ELF file to obtain file descriptor */
316     if((fd = rm_sys_open(path, O_RDONLY)) == -1) {
317         rm_log_warning_line(_("cannot open file '%s' for nonstripped test: "), path);
318         rm_log_perror("");
319         return 0;
320     }
321 
322     /* Protect program from using an older library */
323     if(elf_version(EV_CURRENT) == EV_NONE) {
324         rm_log_error_line(_("ELF Library is out of date!"));
325         rm_sys_close(fd);
326         return false;
327     }
328 
329     /* Initialize elf pointer for examining contents of file */
330     elf = elf_begin(fd, ELF_C_READ, NULL);
331 
332     /* Initialize section descriptor pointer so that elf_nextscn()
333      * returns a pointer to the section descriptor at index 1.
334      * */
335     scn = NULL;
336 
337     /* Iterate through ELF sections */
338     while((scn = elf_nextscn(elf, scn)) != NULL) {
339         /* Retrieve section header */
340         gelf_getshdr(scn, &shdr);
341 
342         /* If a section header holding a symbol table (.symtab)
343          * is found, this ELF file has not been stripped. */
344         if(shdr.sh_type == SHT_SYMTAB) {
345             is_ns = true;
346             break;
347         }
348     }
349     elf_end(elf);
350     rm_sys_close(fd);
351 #endif
352 
353     return is_ns;
354 }
355 
rm_util_get_username(void)356 char *rm_util_get_username(void) {
357     struct passwd *user = getpwuid(geteuid());
358     if(user) {
359         return user->pw_name;
360     } else {
361         return NULL;
362     }
363 }
364 
rm_util_get_groupname(void)365 char *rm_util_get_groupname(void) {
366     struct passwd *user = getpwuid(geteuid());
367     struct group *grp = getgrgid(user->pw_gid);
368     if(grp) {
369         return grp->gr_name;
370     } else {
371         return NULL;
372     }
373 }
374 
rm_util_size_to_human_readable(RmOff num,char * in,gsize len)375 void rm_util_size_to_human_readable(RmOff num, char *in, gsize len) {
376     if(num < 512) {
377         snprintf(in, len, "%" LLU " B", num);
378     } else if(num < 512 * 1024) {
379         snprintf(in, len, "%.2f KB", num / 1024.0);
380     } else if(num < 512 * 1024 * 1024) {
381         snprintf(in, len, "%.2f MB", num / (1024.0 * 1024.0));
382     } else {
383         snprintf(in, len, "%.2f GB", num / (1024.0 * 1024.0 * 1024.0));
384     }
385 }
386 
387 /////////////////////////////////////
388 //   UID/GID VALIDITY CHECKING     //
389 /////////////////////////////////////
390 
rm_userlist_cmp_ids(gconstpointer a,gconstpointer b,_UNUSED gpointer ud)391 static int rm_userlist_cmp_ids(gconstpointer a, gconstpointer b, _UNUSED gpointer ud) {
392     return GPOINTER_TO_UINT(a) - GPOINTER_TO_UINT(b);
393 }
394 
rm_userlist_new(void)395 RmUserList *rm_userlist_new(void) {
396     struct passwd *node = NULL;
397     struct group *grp = NULL;
398 
399     RmUserList *self = g_malloc0(sizeof(RmUserList));
400     self->users = g_sequence_new(NULL);
401     self->groups = g_sequence_new(NULL);
402 
403     setpwent();
404     while((node = getpwent()) != NULL) {
405         g_sequence_insert_sorted(self->users, GUINT_TO_POINTER(node->pw_uid),
406                                  rm_userlist_cmp_ids, NULL);
407         g_sequence_insert_sorted(self->groups, GUINT_TO_POINTER(node->pw_gid),
408                                  rm_userlist_cmp_ids, NULL);
409     }
410     endpwent();
411 
412     /* add all groups, not just those that are user primary gid's */
413     while((grp = getgrent()) != NULL) {
414         g_sequence_insert_sorted(self->groups, GUINT_TO_POINTER(grp->gr_gid),
415                                  rm_userlist_cmp_ids, NULL);
416     }
417 
418     endgrent();
419     g_mutex_init(&self->lock);
420     return self;
421 }
422 
rm_userlist_contains(RmUserList * self,unsigned long uid,unsigned gid,bool * valid_uid,bool * valid_gid)423 bool rm_userlist_contains(RmUserList *self, unsigned long uid, unsigned gid,
424                           bool *valid_uid, bool *valid_gid) {
425     g_assert(self);
426     bool gid_found = FALSE;
427     bool uid_found = FALSE;
428 
429     g_mutex_lock(&self->lock);
430     {
431         gid_found = g_sequence_lookup(self->groups, GUINT_TO_POINTER(gid),
432                                       rm_userlist_cmp_ids, NULL);
433         uid_found = g_sequence_lookup(self->users, GUINT_TO_POINTER(uid),
434                                       rm_userlist_cmp_ids, NULL);
435     }
436     g_mutex_unlock(&self->lock);
437 
438     if(valid_uid != NULL) {
439         *valid_uid = uid_found;
440     }
441 
442     if(valid_gid != NULL) {
443         *valid_gid = gid_found;
444     }
445 
446     return (gid_found && uid_found);
447 }
448 
rm_userlist_destroy(RmUserList * self)449 void rm_userlist_destroy(RmUserList *self) {
450     g_assert(self);
451 
452     g_sequence_free(self->users);
453     g_sequence_free(self->groups);
454     g_mutex_clear(&self->lock);
455     g_free(self);
456 }
457 
458 /////////////////////////////////////
459 //    MOUNTTABLE IMPLEMENTATION    //
460 /////////////////////////////////////
461 
462 typedef struct RmDiskInfo {
463     char *name;
464     bool is_rotational;
465 } RmDiskInfo;
466 
467 typedef struct RmPartitionInfo {
468     char *name;
469     char *fsname;
470     dev_t disk;
471 } RmPartitionInfo;
472 
473 #if RM_MOUNTTABLE_IS_USABLE
474 
rm_part_info_new(char * name,char * fsname,dev_t disk)475 RmPartitionInfo *rm_part_info_new(char *name, char *fsname, dev_t disk) {
476     RmPartitionInfo *self = g_new0(RmPartitionInfo, 1);
477     self->name = g_strdup(name);
478     self->fsname = g_strdup(fsname);
479     self->disk = disk;
480     return self;
481 }
482 
rm_part_info_free(RmPartitionInfo * self)483 void rm_part_info_free(RmPartitionInfo *self) {
484     g_free(self->name);
485     g_free(self->fsname);
486     g_free(self);
487 }
488 
rm_disk_info_new(char * name,char is_rotational)489 RmDiskInfo *rm_disk_info_new(char *name, char is_rotational) {
490     RmDiskInfo *self = g_new0(RmDiskInfo, 1);
491     self->name = g_strdup(name);
492     self->is_rotational = is_rotational;
493     return self;
494 }
495 
rm_disk_info_free(RmDiskInfo * self)496 void rm_disk_info_free(RmDiskInfo *self) {
497     g_free(self->name);
498     g_free(self);
499 }
500 
rm_mounts_is_rotational_blockdev(const char * dev)501 static gchar rm_mounts_is_rotational_blockdev(const char *dev) {
502     gchar is_rotational = -1;
503 
504 #if HAVE_SYSBLOCK /* this works only on linux */
505     char sys_path[PATH_MAX + 30];
506     snprintf(sys_path, sizeof(sys_path) - 1, "/sys/block/%s/queue/rotational", dev);
507 
508     FILE *sys_fdes = fopen(sys_path, "r");
509     if(sys_fdes == NULL) {
510         return -1;
511     }
512 
513     if(fread(&is_rotational, 1, 1, sys_fdes) == 1) {
514         is_rotational -= '0';
515     }
516 
517     fclose(sys_fdes);
518 #else
519     (void)dev;
520 #endif
521 
522     return is_rotational;
523 }
524 
rm_mounts_is_ramdisk(const char * fs_type)525 static bool rm_mounts_is_ramdisk(const char *fs_type) {
526     const char *valid[] = {"tmpfs", "rootfs", "devtmpfs", "cgroup",
527                            "proc",  "sys",    "dev",      NULL};
528 
529     for(int i = 0; valid[i]; ++i) {
530         if(strcmp(valid[i], fs_type) == 0) {
531             return true;
532         }
533     }
534 
535     return false;
536 }
537 
538 typedef struct RmMountEntry {
539     char *fsname; /* name of mounted file system */
540     char *dir;    /* file system path prefix     */
541     char *type;   /* Type of fs: ufs, nfs, etc   */
542 } RmMountEntry;
543 
544 typedef struct RmMountEntries {
545     GList *mnt_entries;
546     GList *entries;
547     GList *current;
548 } RmMountEntries;
549 
rm_mount_list_close(RmMountEntries * self)550 static void rm_mount_list_close(RmMountEntries *self) {
551     g_assert(self);
552 
553     for(GList *iter = self->entries; iter; iter = iter->next) {
554         RmMountEntry *entry = iter->data;
555         g_free(entry->fsname);
556         g_free(entry->dir);
557         g_free(entry->type);
558         g_slice_free(RmMountEntry, entry);
559     }
560 
561     g_list_free_full(self->mnt_entries, (GDestroyNotify)g_unix_mount_free);
562     g_list_free(self->entries);
563     g_slice_free(RmMountEntries, self);
564 }
565 
rm_mount_list_next(RmMountEntries * self)566 static RmMountEntry *rm_mount_list_next(RmMountEntries *self) {
567     g_assert(self);
568 
569     if(self->current) {
570         self->current = self->current->next;
571     } else {
572         self->current = self->entries;
573     }
574 
575     if(self->current) {
576         return self->current->data;
577     } else {
578         return NULL;
579     }
580 }
581 
fs_supports_reflinks(char * fstype,char * mountpoint)582 static bool fs_supports_reflinks(char *fstype, char *mountpoint) {
583     if(strcmp(fstype, "btrfs")==0) {
584         return true;
585     }
586     if(strcmp(fstype, "ocfs2")==0) {
587         return true;
588     }
589     if(strcmp(fstype, "xfs")==0) {
590         /* xfs *might* support reflinks...*/
591         char *cmd = g_strdup_printf("xfs_info '%s' | grep -q 'reflink=1'", mountpoint);
592         int res = system(cmd);
593         g_free(cmd);
594         return(res==0);
595     }
596     return false;
597 }
598 
rm_mount_list_open(RmMountTable * table)599 static RmMountEntries *rm_mount_list_open(RmMountTable *table) {
600     RmMountEntries *self = g_slice_new(RmMountEntries);
601 
602     self->mnt_entries = g_unix_mounts_get(NULL);
603     self->entries = NULL;
604     self->current = NULL;
605 
606     for(GList *iter = self->mnt_entries; iter; iter = iter->next) {
607         RmMountEntry *wrap_entry = g_slice_new(RmMountEntry);
608         GUnixMountEntry *entry = iter->data;
609 
610         wrap_entry->fsname = g_strdup(g_unix_mount_get_device_path(entry));
611         wrap_entry->dir = g_strdup(g_unix_mount_get_mount_path(entry));
612         wrap_entry->type = g_strdup(g_unix_mount_get_fs_type(entry));
613 
614         self->entries = g_list_prepend(self->entries, wrap_entry);
615     }
616 
617     RmMountEntry *wrap_entry = NULL;
618     while((wrap_entry = rm_mount_list_next(self))) {
619         /* bindfs mounts mirror directory trees.
620         * This cannot be detected properly by rmlint since
621         * files in it have the same inode as their unmirrored file, but
622         * a different dev_t.
623         *
624         * Also ignore kernel filesystems.
625         *
626         * So better go and ignore it.
627         */
628         static struct RmEvilFs {
629             /* fsname as show by `mount` */
630             const char *name;
631 
632             /* Wether to warn about the exclusion on this */
633             bool unusual;
634         } evilfs_types[] = {{"bindfs", 1},
635                             {"nullfs", 1},
636                             /* Ignore the usual linux file system spam */
637                             {"proc", 0},
638                             {"cgroup", 0},
639                             {"configfs", 0},
640                             {"sys", 0},
641                             {"devtmpfs", 0},
642                             {"debugfs", 0},
643                             {NULL, 0}};
644 
645 
646         const struct RmEvilFs *evilfs_found = NULL;
647         for(int i = 0; evilfs_types[i].name && !evilfs_found; ++i) {
648             if(strcmp(evilfs_types[i].name, wrap_entry->type) == 0) {
649                 evilfs_found = &evilfs_types[i];
650             }
651         }
652 
653         if(evilfs_found != NULL) {
654             RmStat dir_stat;
655             if(rm_sys_stat(wrap_entry->dir, &dir_stat) < 0) {
656                 /* not an evil fs if we can't read it */
657                 continue;
658             }
659 
660             g_hash_table_insert(table->evilfs_table,
661                                 GUINT_TO_POINTER(dir_stat.st_dev),
662                                 GUINT_TO_POINTER(1));
663 
664             GLogLevelFlags log_level = G_LOG_LEVEL_DEBUG;
665 
666             if(evilfs_found->unusual) {
667                 log_level = G_LOG_LEVEL_WARNING;
668                 rm_log_warning_prefix();
669             } else {
670                 rm_log_debug_prefix();
671             }
672 
673             g_log("rmlint", log_level,
674                   _("`%s` mount detected at %s (#%u); Ignoring all files in it.\n"),
675                   evilfs_found->name, wrap_entry->dir, (unsigned)dir_stat.st_dev);
676         }
677 
678         if(fs_supports_reflinks(wrap_entry->type, wrap_entry->dir)) {
679             RmStat dir_stat;
680             if(rm_sys_stat(wrap_entry->dir, &dir_stat) == 0) {
681                 g_hash_table_insert(table->reflinkfs_table,
682                                     GUINT_TO_POINTER(dir_stat.st_dev),
683                                     wrap_entry->type);
684                 rm_log_debug_line("Filesystem %s: reflink capable", wrap_entry->dir);
685                 continue;
686             }
687         }
688 
689         rm_log_debug_line("Filesystem %s: not reflink capable", wrap_entry->dir);
690     }
691 
692     return self;
693 }
694 
rm_mounts_devno_to_wholedisk(_UNUSED RmMountEntry * entry,_UNUSED dev_t rdev,_UNUSED char * disk,_UNUSED size_t disk_size,_UNUSED dev_t * result)695 int rm_mounts_devno_to_wholedisk(_UNUSED RmMountEntry *entry, _UNUSED dev_t rdev,
696                                  _UNUSED char *disk, _UNUSED size_t disk_size,
697                                  _UNUSED dev_t *result) {
698     return blkid_devno_to_wholedisk(rdev, disk, disk_size, result);
699 }
700 
rm_mounts_create_tables(RmMountTable * self,bool force_fiemap)701 static bool rm_mounts_create_tables(RmMountTable *self, bool force_fiemap) {
702     /* partition dev_t to disk dev_t */
703     self->part_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,
704                                              (GDestroyNotify)rm_part_info_free);
705 
706     /* disk dev_t to boolean indication if disk is rotational */
707     self->disk_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,
708                                              (GDestroyNotify)rm_disk_info_free);
709 
710     self->nfs_table = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
711 
712     /* Mapping dev_t => true (used as set) */
713     self->evilfs_table = g_hash_table_new(NULL, NULL);
714     self->reflinkfs_table = g_hash_table_new(NULL, NULL);
715 
716     RmMountEntry *entry = NULL;
717     RmMountEntries *mnt_entries = rm_mount_list_open(self);
718 
719     if(mnt_entries == NULL) {
720         return false;
721     }
722 
723     while((entry = rm_mount_list_next(mnt_entries))) {
724         RmStat stat_buf_folder;
725         if(rm_sys_stat(entry->dir, &stat_buf_folder) == -1) {
726             continue;
727         }
728 
729         dev_t whole_disk = 0;
730         gchar is_rotational = true;
731         char diskname[PATH_MAX];
732         memset(diskname, 0, sizeof(diskname));
733 
734         RmStat stat_buf_dev;
735         if(rm_sys_stat(entry->fsname, &stat_buf_dev) == -1) {
736             char *nfs_marker = NULL;
737             /* folder rm_sys_stat() is ok but devname rm_sys_stat() is not; this happens
738              * for example
739              * with tmpfs and with nfs mounts.  Try to handle a few such cases.
740              * */
741             if(rm_mounts_is_ramdisk(entry->fsname)) {
742                 strncpy(diskname, entry->fsname, sizeof(diskname)-1);
743                 is_rotational = false;
744                 whole_disk = stat_buf_folder.st_dev;
745             } else if((nfs_marker = strstr(entry->fsname, ":/")) != NULL) {
746                 size_t until_slash =
747                     MIN((int)sizeof(entry->fsname), nfs_marker - entry->fsname);
748                 strncpy(diskname, entry->fsname, until_slash);
749                 is_rotational = true;
750 
751                 /* Assign different dev ids (with major id 0) to different nfs servers */
752                 if(!g_hash_table_contains(self->nfs_table, diskname)) {
753                     g_hash_table_insert(self->nfs_table, g_strdup(diskname), NULL);
754                 }
755                 whole_disk = makedev(0, g_hash_table_size(self->nfs_table));
756             } else {
757                 strncpy(diskname, "unknown", sizeof(diskname));
758                 is_rotational = true;
759                 whole_disk = 0;
760             }
761         } else {
762             if(rm_mounts_devno_to_wholedisk(entry, stat_buf_dev.st_rdev, diskname,
763                                             sizeof(diskname), &whole_disk) == -1) {
764                 /* folder and devname rm_sys_stat() are ok but blkid failed; this happens
765                  * when?
766                  * Treat as a non-rotational device using devname dev as whole_disk key
767                  * */
768                 rm_log_debug_line(RED "devno_to_wholedisk failed for %s" RESET,
769                                   entry->fsname);
770                 whole_disk = stat_buf_dev.st_dev;
771                 strncpy(diskname, entry->fsname, sizeof(diskname)-1);
772                 is_rotational = false;
773             } else {
774                 is_rotational = rm_mounts_is_rotational_blockdev(diskname);
775             }
776         }
777 
778         is_rotational |= force_fiemap;
779 
780         RmPartitionInfo *existing = g_hash_table_lookup(
781             self->part_table, GUINT_TO_POINTER(stat_buf_folder.st_dev));
782         if(!existing || (existing->disk == 0 && whole_disk != 0)) {
783             if(existing) {
784                 rm_log_debug_line("Replacing part_table entry %s for path %s with %s",
785                                   existing->fsname, entry->dir, entry->fsname);
786             }
787             g_hash_table_insert(self->part_table,
788                                 GUINT_TO_POINTER(stat_buf_folder.st_dev),
789                                 rm_part_info_new(entry->dir, entry->fsname, whole_disk));
790         } else {
791             rm_log_debug_line("Skipping duplicate mount entry for dir %s dev %02u:%02u",
792                               entry->dir, major(stat_buf_folder.st_dev),
793                               minor(stat_buf_folder.st_dev));
794             continue;
795         }
796 
797         /* small hack, so also the full disk id can be given to the api below */
798         if(!g_hash_table_contains(self->part_table, GINT_TO_POINTER(whole_disk))) {
799             g_hash_table_insert(self->part_table,
800                                 GUINT_TO_POINTER(whole_disk),
801                                 rm_part_info_new(entry->dir, entry->fsname, whole_disk));
802         }
803 
804         if(!g_hash_table_contains(self->disk_table, GINT_TO_POINTER(whole_disk))) {
805             g_hash_table_insert(self->disk_table,
806                                 GINT_TO_POINTER(whole_disk),
807                                 rm_disk_info_new(diskname, is_rotational));
808         }
809 
810         rm_log_debug_line(
811             "%02u:%02u %50s -> %02u:%02u %-12s (underlying disk: %s; rotational: %3s)",
812             major(stat_buf_folder.st_dev), minor(stat_buf_folder.st_dev), entry->dir,
813             major(whole_disk), minor(whole_disk), entry->fsname, diskname,
814             is_rotational ? "yes" : "no");
815     }
816 
817     rm_mount_list_close(mnt_entries);
818     return true;
819 }
820 
821 /////////////////////////////////
822 //         PUBLIC API          //
823 /////////////////////////////////
824 
rm_mounts_table_new(bool force_fiemap)825 RmMountTable *rm_mounts_table_new(bool force_fiemap) {
826     RmMountTable *self = g_slice_new(RmMountTable);
827     if(rm_mounts_create_tables(self, force_fiemap) == false) {
828         g_slice_free(RmMountTable, self);
829         return NULL;
830     } else {
831         return self;
832     }
833 }
834 
rm_mounts_table_destroy(RmMountTable * self)835 void rm_mounts_table_destroy(RmMountTable *self) {
836     g_hash_table_unref(self->part_table);
837     g_hash_table_unref(self->disk_table);
838     g_hash_table_unref(self->nfs_table);
839     g_hash_table_unref(self->evilfs_table);
840     g_hash_table_unref(self->reflinkfs_table);
841     g_slice_free(RmMountTable, self);
842 }
843 
844 #else /* probably FreeBSD */
845 
rm_mounts_table_new(_UNUSED bool force_fiemap)846 RmMountTable *rm_mounts_table_new(_UNUSED bool force_fiemap) {
847     return NULL;
848 }
849 
rm_mounts_table_destroy(_UNUSED RmMountTable * self)850 void rm_mounts_table_destroy(_UNUSED RmMountTable *self) {
851     /* NO-OP */
852 }
853 
854 #endif /* RM_MOUNTTABLE_IS_USABLE */
855 
rm_mounts_is_nonrotational(RmMountTable * self,dev_t device)856 bool rm_mounts_is_nonrotational(RmMountTable *self, dev_t device) {
857     if(self == NULL) {
858         return true;
859     }
860 
861     RmPartitionInfo *part =
862         g_hash_table_lookup(self->part_table, GINT_TO_POINTER(device));
863     if(part) {
864         RmDiskInfo *disk =
865             g_hash_table_lookup(self->disk_table, GINT_TO_POINTER(part->disk));
866         if(disk) {
867             return !disk->is_rotational;
868         } else {
869             rm_log_error_line("Disk not found in rm_mounts_is_nonrotational");
870             return true;
871         }
872     } else {
873         rm_log_error_line("Partition not found in rm_mounts_is_nonrotational");
874         return true;
875     }
876 }
877 
rm_mounts_get_disk_id(RmMountTable * self,_UNUSED dev_t dev,_UNUSED const char * path)878 dev_t rm_mounts_get_disk_id(RmMountTable *self, _UNUSED dev_t dev,
879                             _UNUSED const char *path) {
880     if(self == NULL) {
881         return 0;
882     }
883 
884 #if RM_MOUNTTABLE_IS_USABLE
885 
886     RmPartitionInfo *part = g_hash_table_lookup(self->part_table, GINT_TO_POINTER(dev));
887     if(part != NULL) {
888         return part->disk;
889     }
890 
891     /* probably a btrfs subvolume which is not a mountpoint;
892      * walk up tree until we get to a recognisable partition
893      * */
894     char *prev = g_strdup(path);
895     while(TRUE) {
896         char *parent_path = g_path_get_dirname(prev);
897 
898         RmStat stat_buf;
899         if(!rm_sys_stat(parent_path, &stat_buf)) {
900             RmPartitionInfo *parent_part =
901                 g_hash_table_lookup(self->part_table, GINT_TO_POINTER(stat_buf.st_dev));
902             if(parent_part) {
903                 /* create new partition table entry for dev pointing to parent_part*/
904                 rm_log_debug_line("Adding partition info for " GREEN "%s" RESET
905                                   " - looks like subvolume %s on volume " GREEN
906                                   "%s" RESET,
907                                   path, prev, parent_part->name);
908                 part = rm_part_info_new(prev, parent_part->fsname, parent_part->disk);
909                 g_hash_table_insert(self->part_table, GINT_TO_POINTER(dev), part);
910                 /* if parent_part is in the reflinkfs_table, add dev as well */
911                 char *parent_type = g_hash_table_lookup(
912                     self->reflinkfs_table, GUINT_TO_POINTER(stat_buf.st_dev));
913                 if(parent_type) {
914                     g_hash_table_insert(self->reflinkfs_table, GUINT_TO_POINTER(dev),
915                                         parent_type);
916                 }
917                 g_free(prev);
918                 g_free(parent_path);
919                 return parent_part->disk;
920             }
921         }
922 
923         if(strcmp(prev, "/") == 0) {
924             g_free(prev);
925             break;
926         }
927 
928         g_free(prev);
929         prev = parent_path;
930     }
931 
932     return 0;
933 #else
934     (void)dev;
935     (void)path;
936     return 0;
937 #endif
938 }
939 
rm_mounts_get_disk_id_by_path(RmMountTable * self,const char * path)940 dev_t rm_mounts_get_disk_id_by_path(RmMountTable *self, const char *path) {
941     if(self == NULL) {
942         return 0;
943     }
944 
945     RmStat stat_buf;
946     if(rm_sys_stat(path, &stat_buf) == -1) {
947         return 0;
948     }
949 
950     return rm_mounts_get_disk_id(self, stat_buf.st_dev, path);
951 }
952 
rm_mounts_is_evil(RmMountTable * self,dev_t to_check)953 bool rm_mounts_is_evil(RmMountTable *self, dev_t to_check) {
954     if(self == NULL) {
955         return false;
956     }
957 
958     return g_hash_table_contains(self->evilfs_table, GUINT_TO_POINTER(to_check));
959 }
960 
rm_mounts_can_reflink(RmMountTable * self,dev_t source,dev_t dest)961 bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest) {
962     g_assert(self);
963     if(g_hash_table_contains(self->reflinkfs_table, GUINT_TO_POINTER(source))) {
964         if(source == dest) {
965             return true;
966         } else {
967             RmPartitionInfo *source_part =
968                 g_hash_table_lookup(self->part_table, GINT_TO_POINTER(source));
969             RmPartitionInfo *dest_part =
970                 g_hash_table_lookup(self->part_table, GINT_TO_POINTER(dest));
971             g_assert(source_part);
972             g_assert(dest_part);
973             return (strcmp(source_part->fsname, dest_part->fsname) == 0);
974         }
975     } else {
976         return false;
977     }
978 }
979 
980 /////////////////////////////////
981 //    FIEMAP IMPLEMENATION     //
982 /////////////////////////////////
983 
984 #if HAVE_FIEMAP
985 
986 #define _RM_OFFSET_DEBUG 0
987 
988 /* Return fiemap structure containing n_extents for file descriptor fd.
989  * Return NULL if errors encountered.
990  * Needs to be freed with g_free if not NULL.
991  * */
rm_offset_get_fiemap(int fd,const int n_extents,const uint64_t file_offset)992 static struct fiemap *rm_offset_get_fiemap(int fd, const int n_extents,
993                                            const uint64_t file_offset) {
994 #if _RM_OFFSET_DEBUG
995     rm_log_debug_line(_("rm_offset_get_fiemap: fd=%d, n_extents=%d, file_offset=%d"),
996                       fd, n_extents, file_offset);
997 #endif
998     /* struct fiemap does not allocate any extents by default,
999      * so we allocate the nominated number
1000      * */
1001     struct fiemap *fm =
1002         g_malloc0(sizeof(struct fiemap) + n_extents * sizeof(struct fiemap_extent));
1003 
1004     fm->fm_flags = 0;
1005     fm->fm_extent_count = n_extents;
1006     fm->fm_length = FIEMAP_MAX_OFFSET;
1007     fm->fm_start = file_offset;
1008 
1009     if(ioctl(fd, FS_IOC_FIEMAP, (unsigned long)fm) == -1) {
1010         g_free(fm);
1011         fm = NULL;
1012     }
1013     return fm;
1014 }
1015 
1016 /* Return physical (disk) offset of the beginning of the file extent containing the
1017  * specified logical file_offset.
1018  * If a pointer to file_offset_next is provided then read fiemap extents until
1019  * the next non-contiguous extent (fragment) is encountered and writes the corresponding
1020  * file offset to &file_offset_next.
1021  * */
rm_offset_get_from_fd(int fd,RmOff file_offset,RmOff * file_offset_next,bool * is_last)1022 RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last) {
1023     RmOff result = 0;
1024     bool done = FALSE;
1025     bool first = TRUE;
1026 
1027     /* used for detecting contiguous extents */
1028     unsigned long expected = 0;
1029 
1030     fsync(fd);
1031 
1032     while(!done) {
1033         /* read in next extent */
1034         struct fiemap *fm = rm_offset_get_fiemap(fd, 1, file_offset);
1035 
1036         if(fm==NULL) {
1037             /* got no extent data */
1038 #if _RM_OFFSET_DEBUG
1039             rm_log_info_line(_("rm_offset_get_fiemap: got no fiemap for %d"), fd);
1040 #endif
1041             break;
1042         }
1043 
1044         if (fm->fm_mapped_extents == 0) {
1045 #if _RM_OFFSET_DEBUG
1046             rm_log_info_line(_("rm_offset_get_fiemap: got no extents for %d"), fd);
1047 #endif
1048             done = TRUE;
1049         } else {
1050 
1051             /* retrieve data from fiemap */
1052             struct fiemap_extent fm_ext = fm->fm_extents[0];
1053 
1054             if (first) {
1055                 /* remember disk location of start of data */
1056                 result = fm_ext.fe_physical;
1057                 first=FALSE;
1058             } else {
1059                 /* check if subsequent extents are contiguous */
1060                 if(fm_ext.fe_physical != expected)  {
1061                     /* current extent is not contiguous with previous, so we can stop */
1062                     done = TRUE;
1063                 }
1064             }
1065 
1066             if (!done && file_offset_next != NULL) {
1067                 /* update logical offset of next fragment */
1068                 *file_offset_next = fm_ext.fe_logical + fm_ext.fe_length;
1069             }
1070 
1071             if(fm_ext.fe_flags & FIEMAP_EXTENT_LAST) {
1072                 done = TRUE;
1073 
1074                 if(is_last != NULL) {
1075                     *is_last = TRUE;
1076                 }
1077             }
1078 
1079             if(fm_ext.fe_length <= 0) {
1080                 /* going nowhere; bail out rather than looping indefinitely */
1081                 done = TRUE;
1082             }
1083 
1084             /* move offsets in preparation for reading next extent */
1085             file_offset += fm_ext.fe_length;
1086             expected = fm_ext.fe_physical + fm_ext.fe_length;
1087         }
1088 
1089         g_free(fm);
1090     }
1091 
1092     if (file_offset_next != NULL) {
1093         /* return value of *file_offset_next: */
1094         *file_offset_next = file_offset;
1095     }
1096 
1097     return result;
1098 }
1099 
rm_offset_get_from_path(const char * path,RmOff file_offset,RmOff * file_offset_next)1100 RmOff rm_offset_get_from_path(const char *path, RmOff file_offset,
1101                               RmOff *file_offset_next) {
1102     int fd = rm_sys_open(path, O_RDONLY);
1103     if(fd == -1) {
1104         rm_log_info("Error opening %s in rm_offset_get_from_path\n", path);
1105         return 0;
1106     }
1107     RmOff result = rm_offset_get_from_fd(fd, file_offset, file_offset_next, NULL);
1108     rm_sys_close(fd);
1109     return result;
1110 }
1111 
1112 #else /* Probably FreeBSD */
1113 
rm_offset_get_from_fd(_UNUSED int fd,_UNUSED RmOff file_offset,_UNUSED RmOff * file_offset_next,_UNUSED bool * is_last)1114 RmOff rm_offset_get_from_fd(_UNUSED int fd, _UNUSED RmOff file_offset,
1115                             _UNUSED RmOff *file_offset_next, _UNUSED bool *is_last) {
1116     return 0;
1117 }
1118 
rm_offset_get_from_path(_UNUSED const char * path,_UNUSED RmOff file_offset,_UNUSED RmOff * file_offset_next)1119 RmOff rm_offset_get_from_path(_UNUSED const char *path, _UNUSED RmOff file_offset,
1120                               _UNUSED RmOff *file_offset_next) {
1121     return 0;
1122 }
1123 
1124 #endif
1125 
rm_util_is_path_double(char * path1,char * path2)1126 static gboolean rm_util_is_path_double(char *path1, char *path2) {
1127     char *basename1 = rm_util_basename(path1);
1128     char *basename2 = rm_util_basename(path2);
1129     return (strcmp(basename1, basename2) == 0 &&
1130             rm_util_parent_node(path1) == rm_util_parent_node(path2));
1131 }
1132 
1133 /* test if two file paths are on the same device (even if on different
1134  * mountpoints)
1135  */
rm_util_same_device(const char * path1,const char * path2)1136 static gboolean rm_util_same_device(const char *path1, const char *path2) {
1137     const char *best1 = NULL;
1138     const char *best2 = NULL;
1139     int len1 = 0;
1140     int len2 = 0;
1141 
1142     GList *mounts = g_unix_mounts_get(NULL);
1143     for(GList *iter = mounts; iter; iter = iter->next) {
1144         GUnixMountEntry *mount = iter->data;
1145         const char *mountpath = g_unix_mount_get_mount_path(mount);
1146         int len = strlen(mountpath);
1147         if(len > len1 && strncmp(mountpath, path1, len) == 0) {
1148             best1 = g_unix_mount_get_device_path(mount);
1149             len1 = len;
1150         }
1151         if(len > len2 && strncmp(mountpath, path2, len) == 0) {
1152             best2 = g_unix_mount_get_device_path(mount);
1153             len2 = len;
1154         }
1155     }
1156     gboolean result = (best1 && best2 && strcmp(best1, best2) == 0);
1157     g_list_free_full(mounts, (GDestroyNotify)g_unix_mount_free);
1158     return result;
1159 }
1160 
rm_util_link_type(char * path1,char * path2)1161 RmLinkType rm_util_link_type(char *path1, char *path2) {
1162 #if _RM_OFFSET_DEBUG
1163     rm_log_debug_line("Checking link type for %s vs %s", path1, path2);
1164 #endif
1165     int fd1 = rm_sys_open(path1, O_RDONLY);
1166     if(fd1 == -1) {
1167         rm_log_perrorf("rm_util_link_type: Error opening %s", path1);
1168         return RM_LINK_ERROR;
1169     }
1170 
1171 #define RM_RETURN(value)   \
1172     {                      \
1173         rm_sys_close(fd1); \
1174         return (value);    \
1175     }
1176 
1177     RmStat stat1;
1178     int stat_state = rm_sys_lstat(path1, &stat1);
1179     if(stat_state == -1) {
1180         rm_log_perrorf("rm_util_link_type: Unable to stat file %s", path1);
1181         RM_RETURN(RM_LINK_ERROR);
1182     }
1183 
1184     if(!S_ISREG(stat1.st_mode)) {
1185         RM_RETURN(RM_LINK_NOT_FILE);
1186     }
1187 
1188     int fd2 = rm_sys_open(path2, O_RDONLY);
1189     if(fd2 == -1) {
1190         rm_log_perrorf("rm_util_link_type: Error opening %s", path2);
1191         RM_RETURN(RM_LINK_ERROR);
1192     }
1193 
1194 #undef RM_RETURN
1195 #define RM_RETURN(value)   \
1196     {                      \
1197         rm_sys_close(fd1); \
1198         rm_sys_close(fd2); \
1199         return (value);    \
1200     }
1201 
1202     RmStat stat2;
1203     stat_state = rm_sys_lstat(path2, &stat2);
1204     if(stat_state == -1) {
1205         rm_log_perrorf("rm_util_link_type: Unable to stat file %s", path2);
1206         RM_RETURN(RM_LINK_ERROR);
1207     }
1208 
1209     if(!S_ISREG(stat2.st_mode)) {
1210         RM_RETURN(RM_LINK_NOT_FILE);
1211     }
1212 
1213     if(stat1.st_size != stat2.st_size) {
1214 #if _RM_OFFSET_DEBUG
1215         rm_log_debug_line("rm_util_link_type: Files have different sizes: %" G_GUINT64_FORMAT
1216                           " <> %" G_GUINT64_FORMAT, stat1.st_size,
1217                           stat2.st_size);
1218 #endif
1219         RM_RETURN(RM_LINK_WRONG_SIZE);
1220     }
1221 
1222     if(stat1.st_dev == stat2.st_dev && stat1.st_ino == stat2.st_ino) {
1223         /* hardlinks or maybe even same file */
1224         if(strcmp(path1, path2) == 0) {
1225             RM_RETURN(RM_LINK_SAME_FILE);
1226         } else if(rm_util_is_path_double(path1, path2)) {
1227             RM_RETURN(RM_LINK_PATH_DOUBLE);
1228         } else {
1229             RM_RETURN(RM_LINK_HARDLINK);
1230         }
1231     }
1232 
1233     if(stat1.st_dev != stat2.st_dev) {
1234         /* reflinks must be on same filesystem but not necessarily
1235          * same st_dev (btrfs subvolumes have different st_dev's) */
1236         if(!rm_util_same_device(path1, path2)) {
1237             RM_RETURN(RM_LINK_XDEV);
1238         }
1239     }
1240 
1241     /* If both are symbolic links we do not follow them */
1242     if(S_ISLNK(stat1.st_mode) || S_ISLNK(stat2.st_mode)) {
1243         RM_RETURN(RM_LINK_SYMLINK);
1244     }
1245 
1246 #if HAVE_FIEMAP
1247 
1248     RmOff logical_current = 0;
1249 
1250     bool is_last_1 = false;
1251     bool is_last_2 = false;
1252     bool at_least_one_checked = false;
1253 
1254     while(!rm_session_was_aborted()) {
1255         RmOff logical_next_1 = 0;
1256         RmOff logical_next_2 = 0;
1257 
1258         RmOff physical_1 = rm_offset_get_from_fd(fd1, logical_current, &logical_next_1, &is_last_1);
1259         RmOff physical_2 = rm_offset_get_from_fd(fd2, logical_current, &logical_next_2, &is_last_2);
1260 
1261         if(is_last_1 != is_last_2) {
1262             RM_RETURN(RM_LINK_NONE);
1263         }
1264 
1265         if(is_last_1 && is_last_2 && at_least_one_checked) {
1266             RM_RETURN(RM_LINK_REFLINK);
1267         }
1268 
1269         if(physical_1 != physical_2) {
1270 #if _RM_OFFSET_DEBUG
1271             rm_log_debug_line("Physical offsets differ at byte %" G_GUINT64_FORMAT
1272                               ": %"G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT,
1273                               logical_current, physical_1, physical_2);
1274 #endif
1275             RM_RETURN(RM_LINK_NONE);
1276         }
1277         if(logical_next_1 != logical_next_2) {
1278 #if _RM_OFFSET_DEBUG
1279             rm_log_debug_line("File offsets differ after %" G_GUINT64_FORMAT
1280                               " bytes: %" G_GUINT64_FORMAT "<> %" G_GUINT64_FORMAT,
1281                               logical_current, logical_next_1, logical_next_2);
1282 #endif
1283             RM_RETURN(RM_LINK_NONE);
1284         }
1285 
1286         if(physical_1 == 0) {
1287 #if _RM_OFFSET_DEBUG
1288             rm_log_debug_line(
1289                 "Can't determine whether files are clones (maybe inline extents?)");
1290 #endif
1291             RM_RETURN(RM_LINK_MAYBE_REFLINK);
1292         }
1293 
1294 #if _RM_OFFSET_DEBUG
1295         rm_log_debug_line("Offsets match at fd1=%d, fd2=%d, logical=%" G_GUINT64_FORMAT ", physical=%" G_GUINT64_FORMAT,
1296                           fd1, fd2, logical_current, physical_1);
1297 #endif
1298         if(logical_next_1 <= logical_current) {
1299             /* oops we seem to be getting nowhere (this shouldn't really happen) */
1300             rm_log_info_line(
1301                 "rm_util_link_type() giving up: file1_offset_next<=file_offset_current for %s vs %s", path1, path2);
1302             RM_RETURN(RM_LINK_ERROR)
1303         }
1304 
1305         if(logical_next_1 >= (RmOff)stat1.st_size) {
1306             /* phew, we got to the end */
1307 #if _RM_OFFSET_DEBUG
1308             rm_log_debug_line("Files are clones (share same data)")
1309 #endif
1310             RM_RETURN(RM_LINK_REFLINK)
1311         }
1312 
1313         logical_current = logical_next_1;
1314         at_least_one_checked = true;
1315     }
1316 
1317     RM_RETURN(RM_LINK_ERROR);
1318 #else
1319     RM_RETURN(RM_LINK_NONE);
1320 #endif
1321 
1322 #undef RM_RETURN
1323 }
1324 
1325 
1326 /////////////////////////////////
1327 //  GTHREADPOOL WRAPPERS       //
1328 /////////////////////////////////
1329 
1330 /* wrapper for g_thread_pool_push with error reporting */
rm_util_thread_pool_push(GThreadPool * pool,gpointer data)1331 bool rm_util_thread_pool_push(GThreadPool *pool, gpointer data) {
1332     GError *error = NULL;
1333     g_thread_pool_push(pool, data, &error);
1334     if(error != NULL) {
1335         rm_log_error_line("Unable to push thread to pool %p: %s", pool, error->message);
1336         g_error_free(error);
1337         return false;
1338     } else {
1339         return true;
1340     }
1341 }
1342 
1343 /* wrapper for g_thread_pool_new with error reporting */
rm_util_thread_pool_new(GFunc func,gpointer data,int threads)1344 GThreadPool *rm_util_thread_pool_new(GFunc func, gpointer data, int threads) {
1345     GError *error = NULL;
1346     GThreadPool *pool = g_thread_pool_new(func, data, threads, FALSE, &error);
1347 
1348     if(error != NULL) {
1349         rm_log_error_line("Unable to create thread pool.");
1350         g_error_free(error);
1351     }
1352     return pool;
1353 }
1354 
1355 //////////////////////////////
1356 //    TIMESTAMP HELPERS     //
1357 //////////////////////////////
1358 
rm_iso8601_parse(const char * string)1359 gdouble rm_iso8601_parse(const char *string) {
1360 #if GLIB_CHECK_VERSION(2,56,0)
1361     GDateTime *time_result = g_date_time_new_from_iso8601(string, NULL);
1362     if(time_result == NULL) {
1363         rm_log_perror("Converting time failed");
1364         return 0;
1365     }
1366 
1367 
1368     gdouble result = g_date_time_to_unix(time_result);
1369     result += g_date_time_get_microsecond(time_result) / (gdouble)(G_USEC_PER_SEC);
1370 
1371     g_date_time_unref(time_result);
1372     return result;
1373 #else
1374     /* Remove this branch in a few years (written end of 2019) */
1375 
1376     GTimeVal time_result;
1377     if(!g_time_val_from_iso8601(string, &time_result)) {
1378         rm_log_perror("Converting time failed");
1379         return 0;
1380     }
1381 
1382     return time_result.tv_sec + time_result.tv_usec / (gdouble)(G_USEC_PER_SEC);
1383 #endif
1384 }
1385 
rm_iso8601_format(time_t stamp,char * buf,gsize buf_size)1386 bool rm_iso8601_format(time_t stamp, char *buf, gsize buf_size) {
1387     struct tm now_ctime;
1388     if(localtime_r(&stamp, &now_ctime) != NULL) {
1389         return (strftime(buf, buf_size, "%FT%T%z", &now_ctime) != 0);
1390     }
1391 
1392     return false;
1393 }
1394 
1395 #define SECONDS_PER_DAY (24 * 60 * 60)
1396 #define SECONDS_PER_HOUR (60 * 60)
1397 #define SECONDS_PER_MINUTE (60)
1398 
rm_format_elapsed_time(gfloat elapsed_sec,int sec_precision)1399 char *rm_format_elapsed_time(gfloat elapsed_sec, int sec_precision) {
1400     GString *buf = g_string_new(NULL);
1401 
1402     if(elapsed_sec > SECONDS_PER_DAY) {
1403         gint days = elapsed_sec / SECONDS_PER_DAY;
1404         elapsed_sec -= days * SECONDS_PER_DAY;
1405         g_string_append_printf(buf, "%2dd ", days);
1406     }
1407 
1408     if(elapsed_sec > SECONDS_PER_HOUR) {
1409         gint hours = elapsed_sec / SECONDS_PER_HOUR;
1410         elapsed_sec -= hours * SECONDS_PER_HOUR;
1411         g_string_append_printf(buf, "%2dh ", hours);
1412     }
1413 
1414     if(elapsed_sec > SECONDS_PER_MINUTE) {
1415         gint minutes = elapsed_sec / SECONDS_PER_MINUTE;
1416         elapsed_sec -= minutes * SECONDS_PER_MINUTE;
1417         g_string_append_printf(buf, "%2dm ", minutes);
1418     }
1419 
1420     g_string_append_printf(buf, "%2.*fs", sec_precision, elapsed_sec);
1421     return g_string_free(buf, FALSE);
1422 }
1423 
rm_running_mean_init(RmRunningMean * m,int max_values)1424 void rm_running_mean_init(RmRunningMean *m, int max_values) {
1425     m->sum = 0;
1426     m->values = g_malloc0(max_values * sizeof(gdouble));
1427     m->max_values = max_values;
1428     m->cursor = 0;
1429 }
1430 
rm_running_mean_add(RmRunningMean * m,gdouble value)1431 void rm_running_mean_add(RmRunningMean *m, gdouble value) {
1432     int pos = (++m->cursor) % m->max_values;
1433     m->sum += value;
1434     m->sum -= m->values[pos];
1435     m->values[pos] = value;
1436 }
1437 
rm_running_mean_get(RmRunningMean * m)1438 gdouble rm_running_mean_get(RmRunningMean *m) {
1439     int n = MIN(m->max_values, m->cursor);
1440     if(n == 0) {
1441         return 0.0;
1442     }
1443 
1444     return m->sum / n;
1445 }
1446 
rm_running_mean_unref(RmRunningMean * m)1447 void rm_running_mean_unref(RmRunningMean *m) {
1448     if(m->values) {
1449         g_free(m->values);
1450         m->values = NULL;
1451     }
1452 }
1453 
1454 /* This a complete copy of the GLib version here:
1455  *
1456  *  https://github.com/GNOME/glib/blob/3dec72b946a527f4b1f35262bddd4afb060409b7/glib/gfileutils.c#L2552
1457  *
1458  * The reason we have this here is since rmlint is still often used
1459  * on older systems (Debian 9...) that don't have a recent enoug GLib.
1460  * Remove this once some years have progressed.
1461  */
rm_canonicalize_filename(const gchar * filename,const gchar * relative_to)1462 gchar *rm_canonicalize_filename (const gchar *filename, const gchar *relative_to) {
1463   gchar *canon, *start, *p, *q;
1464   guint i;
1465 
1466   g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL);
1467 
1468   if (!g_path_is_absolute (filename))
1469     {
1470       gchar *cwd_allocated = NULL;
1471       const gchar  *cwd;
1472 
1473       if (relative_to != NULL)
1474         cwd = relative_to;
1475       else
1476         cwd = cwd_allocated = g_get_current_dir ();
1477 
1478       canon = g_build_filename (cwd, filename, NULL);
1479       g_free (cwd_allocated);
1480     }
1481   else
1482     {
1483       canon = g_strdup (filename);
1484     }
1485 
1486   start = (char *)g_path_skip_root (canon);
1487 
1488   if (start == NULL)
1489     {
1490       /* This shouldn't really happen, as g_get_current_dir() should
1491          return an absolute pathname, but bug 573843 shows this is
1492          not always happening */
1493       g_free (canon);
1494       return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL);
1495     }
1496 
1497   /* POSIX allows double slashes at the start to
1498    * mean something special (as does windows too).
1499    * So, "//" != "/", but more than two slashes
1500    * is treated as "/".
1501    */
1502   i = 0;
1503   for (p = start - 1;
1504        (p >= canon) &&
1505          G_IS_DIR_SEPARATOR (*p);
1506        p--)
1507     i++;
1508   if (i > 2)
1509     {
1510       i -= 1;
1511       start -= i;
1512       memmove (start, start+i, strlen (start+i) + 1);
1513     }
1514 
1515   /* Make sure we're using the canonical dir separator */
1516   p++;
1517   while (p < start && G_IS_DIR_SEPARATOR (*p))
1518     *p++ = G_DIR_SEPARATOR;
1519 
1520   p = start;
1521   while (*p != 0)
1522     {
1523       if (p[0] == '.' && (p[1] == 0 || G_IS_DIR_SEPARATOR (p[1])))
1524         {
1525           memmove (p, p+1, strlen (p+1)+1);
1526         }
1527       else if (p[0] == '.' && p[1] == '.' && (p[2] == 0 || G_IS_DIR_SEPARATOR (p[2])))
1528         {
1529           q = p + 2;
1530           /* Skip previous separator */
1531           p = p - 2;
1532           if (p < start)
1533             p = start;
1534           while (p > start && !G_IS_DIR_SEPARATOR (*p))
1535             p--;
1536           if (G_IS_DIR_SEPARATOR (*p))
1537             *p++ = G_DIR_SEPARATOR;
1538           memmove (p, q, strlen (q)+1);
1539         }
1540       else
1541         {
1542           /* Skip until next separator */
1543           while (*p != 0 && !G_IS_DIR_SEPARATOR (*p))
1544             p++;
1545 
1546           if (*p != 0)
1547             {
1548               /* Canonicalize one separator */
1549               *p++ = G_DIR_SEPARATOR;
1550             }
1551         }
1552 
1553       /* Remove additional separators */
1554       q = p;
1555       while (*q && G_IS_DIR_SEPARATOR (*q))
1556         q++;
1557 
1558       if (p != q)
1559         memmove (p, q, strlen (q) + 1);
1560     }
1561 
1562   /* Remove trailing slashes */
1563   if (p > start && G_IS_DIR_SEPARATOR (*(p-1)))
1564     *(p-1) = 0;
1565 
1566   return canon;
1567 }
1568