1 /**
2 * This file is part of rmlint.
3 *
4 *  rmlint is free software: you can redistribute it and/or modify
5 *  it under the terms of the GNU General Public License as published by
6 *  the Free Software Foundation, either version 3 of the License, or
7 *  (at your option) any later version.
8 *
9 *  rmlint is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 *  GNU General Public License for more details.
13 *
14 *  You should have received a copy of the GNU General Public License
15 *  along with rmlint.  If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Authors:
18 *
19 *  - Christopher <sahib> Pahl 2010-2020 (https://github.com/sahib)
20 *  - Daniel <SeeSpotRun> T.   2014-2020 (https://github.com/SeeSpotRun)
21 *
22 * Hosted on http://github.com/sahib/rmlint
23 *
24 */
25 
26 #ifndef RM_UTILITIES_H_INCLUDE
27 #define RM_UTILITIES_H_INCLUDE
28 
29 #include <glib.h>
30 #include <stdbool.h>
31 
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include <sys/uio.h>
35 #include <time.h>
36 #include <unistd.h>
37 
38 /* Pat(h)tricia Trie implementation */
39 #include "pathtricia.h"
40 
41 /* return values for rm_offsets_match */
42 typedef enum RmLinkType {
43     RM_LINK_REFLINK         = EXIT_SUCCESS,
44     RM_LINK_NONE            = EXIT_FAILURE,
45     RM_LINK_NOT_FILE        = 3,
46     RM_LINK_WRONG_SIZE      = 4,
47     RM_LINK_MAYBE_REFLINK   = 5,
48     RM_LINK_SAME_FILE       = 6,
49     RM_LINK_PATH_DOUBLE     = 7,
50     RM_LINK_HARDLINK        = 8,
51     RM_LINK_SYMLINK         = 9,
52     RM_LINK_XDEV            = 10,
53     RM_LINK_ERROR           = 11,
54 } RmLinkType;
55 
56 
57 #if HAVE_STAT64 && !RM_IS_APPLE
58 typedef struct stat64 RmStat;
59 #else
60 typedef struct stat RmStat;
61 #endif
62 
63 ////////////////////////
64 //  MATHS SHORTCUTS   //
65 ////////////////////////
66 
67 // Signum function
68 #define SIGN(X) ((X) > 0 ? 1 : ((X) < 0 ? -1 : 0))
69 
70 // Returns 1 if X>Y, -1 if X<Y or 0 if X==Y
71 #define SIGN_DIFF(X, Y) (((X) > (Y)) - ((X) < (Y))) /* handy for comparing uint64's */
72 
73 // Compare two floats; tolerate +/- tol when testing for equality
74 // See also:
75 // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition
76 #define FLOAT_SIGN_DIFF(X, Y, tol) ((X) - (Y) > (tol) ? 1 : ((Y) - (X) > (tol) ? -1 : 0))
77 
78 // Time tolerance (seconds) when comparing two mtimes
79 #define MTIME_TOL (0.000001)
80 
81 #define RETURN_IF_NONZERO(X) \
82     if((X) != 0) {           \
83         return (X);          \
84     }
85 
86 ////////////////////////////////////
87 //       SYSCALL WRAPPERS         //
88 ////////////////////////////////////
89 
rm_sys_stat(const char * path,RmStat * buf)90 WARN_UNUSED_RESULT static inline int rm_sys_stat(const char *path, RmStat *buf)  {
91 #if HAVE_STAT64 && !RM_IS_APPLE
92     return stat64(path, buf);
93 #else
94     return stat(path, buf);
95 #endif
96 }
97 
rm_sys_lstat(const char * path,RmStat * buf)98 WARN_UNUSED_RESULT static inline int rm_sys_lstat(const char *path, RmStat *buf) {
99 #if HAVE_STAT64 && !RM_IS_APPLE
100     return lstat64(path, buf);
101 #else
102     return lstat(path, buf);
103 #endif
104 }
105 
rm_sys_stat_mtime_float(RmStat * stat)106 static inline gdouble rm_sys_stat_mtime_float(RmStat *stat) {
107 #if RM_IS_APPLE
108     return (gdouble)stat->st_mtimespec.tv_sec + stat->st_mtimespec.tv_nsec / 1000000000.0;
109 #else
110     return (gdouble)stat->st_mtim.tv_sec + stat->st_mtim.tv_nsec / 1000000000.0;
111 #endif
112 }
113 
rm_sys_open(const char * path,int mode)114 static inline int rm_sys_open(const char *path, int mode) {
115 #if HAVE_STAT64
116 #ifdef O_LARGEFILE
117     mode |= O_LARGEFILE;
118 #endif
119 #endif
120 
121     return open(path, mode, (S_IRUSR | S_IWUSR));
122 }
123 
rm_sys_close(int fd)124 static inline void rm_sys_close(int fd) {
125     if(close(fd) == -1) {
126         rm_log_perror("close(2) failed");
127     }
128 }
129 
130 #ifndef HAVE_LSEEK64
131 #define lseek64 lseek
132 #endif
133 
rm_sys_preadv(int fd,const struct iovec * iov,int iovcnt,RmOff offset)134 static inline gint64 rm_sys_preadv(int fd, const struct iovec *iov, int iovcnt,
135                                    RmOff offset) {
136 #if RM_IS_APPLE || RM_IS_CYGWIN
137     if(lseek(fd, offset, SEEK_SET) == -1) {
138         rm_log_perror("seek in emulated preadv failed");
139         return 0;
140     }
141     return readv(fd, iov, iovcnt);
142 #elif RM_PLATFORM_32
143     if(lseek64(fd, offset, SEEK_SET) == -1) {
144         rm_log_perror("seek in emulated preadv failed");
145         return 0;
146     }
147     return readv(fd, iov, iovcnt);
148 #else
149     return preadv(fd, iov, iovcnt, offset);
150 #endif
151 }
152 
153 /////////////////////////////////////
154 //   UID/GID VALIDITY CHECKING     //
155 /////////////////////////////////////
156 
157 typedef struct RmUserList {
158     GSequence *users;
159     GSequence *groups;
160     GMutex lock;
161 } RmUserList;
162 
163 /**
164  * @brief Create a new list of users.
165  */
166 RmUserList *rm_userlist_new(void);
167 
168 /**
169  * @brief Check if a uid and gid is contained in the list.
170  *
171  * @param valid_uid (out)
172  * @param valid_gid (out)
173  *
174  * @return true if both are valid.
175  */
176 bool rm_userlist_contains(RmUserList *list, unsigned long uid, unsigned gid,
177                           bool *valid_uid, bool *valid_gid);
178 
179 /**
180  * @brief Deallocate the memory allocated by rm_userlist_new()
181  */
182 void rm_userlist_destroy(RmUserList *list);
183 
184 /**
185  * @brief Get the name of the user running rmlint.
186  */
187 char *rm_util_get_username(void);
188 
189 /**
190  * @brief Get the group of the user running rmlint.
191  */
192 char *rm_util_get_groupname(void);
193 
194 ////////////////////////////////////
195 //       GENERAL UTILITES         //
196 ////////////////////////////////////
197 
198 #define RM_LIST_NEXT(node) ((node) ? node->next : NULL)
199 
200 /**
201  * @brief Replace {subs} with {with} in {string}
202  *
203  * @return a newly allocated string, g_free it.
204  */
205 char *rm_util_strsub(const char *string, const char *subs, const char *with);
206 
207 /**
208  * @brief Check if a file has an invalid gid/uid or both.
209  *
210  * @return the appropiate RmLintType for the file
211  */
212 int rm_util_uid_gid_check(RmStat *statp, RmUserList *userlist);
213 
214 /**
215  * @brief Check if a file is a binary that is not stripped.
216  *
217  * @path: Path to the file to be checked.
218  * @statp: valid stat pointer with st_mode filled (allow-none).
219  *
220  * @return: if it is a binary with debug symbols.
221   */
222 bool rm_util_is_nonstripped(const char *path, RmStat *statp);
223 
224 /**
225  * @brief Get the basename part of the file. It does not change filename.
226  *
227  * @return NULL on failure, the pointer after the last / on success.
228  */
229 char *rm_util_basename(const char *filename);
230 
231 /**
232  * @brief Check if the file or any components of it are hidden.
233  *
234  * @return true if it is.
235  */
236 bool rm_util_path_is_hidden(const char *path);
237 
238 /**
239  * @brief Get the depth of a path
240  *
241  * @param path
242  *
243  * @return depth of path or 0.
244  */
245 int rm_util_path_depth(const char *path);
246 
247 typedef gpointer (*RmNewFunc)(void);
248 
249 /**
250  * @brief A setdefault supplementary function for GHashTable.
251  *
252  * This is about the same as dict.setdefault in python.
253  *
254  * @param table the table to use
255  * @param key key to lookup
256  * @param default_func if the key does not exist in table, return default_func
257  * and insert it into table
258  *
259  * @return value, which may be default_func() if key does not exist.
260  */
261 GQueue *rm_hash_table_setdefault(GHashTable *table, gpointer key, RmNewFunc default_func);
262 
263 /**
264  * @brief Push all elements in `src` at the tail of `dst`
265  *
266  * @param dest The queue to append to.
267  * @param src The queue to append from. Will be empty afterwards.
268  */
269 void rm_util_queue_push_tail_queue(GQueue *dest, GQueue *src);
270 
271 /**
272  * @brief Function prototype for remove-iterating over a GQueue/GList/GSList.
273  *
274  * @param data current element
275  * @param user_data optional user_data
276  *
277  * @return True if the element should be removed.
278  */
279 typedef gint (*RmRFunc)(gpointer data, gpointer user_data);
280 
281 /**
282  * @brief Iterate over a GQueue and call `func` on each element.
283  *
284  * If func returns true, the element is removed from the queue.
285  *
286  * @param queue GQueue to iterate
287  * @param func Function that evaluates the removal of the item
288  * @param user_data optional user data
289  *
290  * @return Number of removed items.
291  */
292 gint rm_util_queue_foreach_remove(GQueue *queue, RmRFunc func, gpointer user_data);
293 
294 /**
295  * @brief Iterate over a GList and call `func` on each element.
296  *
297  * If func returns true, the element is removed from the GList.
298  *
299  * @param list pointer to GList to iterate
300  * @param func Function that evaluates the removal of the item
301  * @param user_data optional user data
302  *
303  * @return Number of removed items.
304  */
305 gint rm_util_list_foreach_remove(GList **list, RmRFunc func, gpointer user_data);
306 
307 /**
308  * @brief Iterate over a GSList and call `func` on each element.
309  *
310  * If func returns true, the element is removed from the GSList.
311  *
312  * @param list pointer to GSList to iterate
313  * @param func Function that evaluates the removal of the item
314  * @param user_data optional user data
315  *
316  * @return Number of removed items.
317  */
318 gint rm_util_slist_foreach_remove(GSList **list, RmRFunc func, gpointer user_data);
319 
320 /**
321 * @brief Pop the first element from a GSList
322 *
323 * @return pointer to the data associated with the popped element.
324 *
325 * Note this function returns null if the list is empty, or if the first item
326 * has NULL as its data.
327 */
328 gpointer rm_util_slist_pop(GSList **list, GMutex *lock);
329 
330 /**
331  * @brief Return a pointer to the extension part of the file or NULL if none.
332  *
333  * @return: a pointer >= basename or NULL.
334  */
335 char *rm_util_path_extension(const char *basename);
336 
337 /**
338  * @brief Get the inode of the directory of the file specified in path.
339  */
340 ino_t rm_util_parent_node(const char *path);
341 
342 /*
343  * @brief Takes num and converts into some human readable string. 1024 -> 1KB
344  */
345 void rm_util_size_to_human_readable(RmOff num, char *in, gsize len);
346 
347 /////////////////////////////////////
348 //    MOUNTTABLE IMPLEMENTATION    //
349 /////////////////////////////////////
350 
351 typedef struct RmMountTable {
352     GHashTable *part_table;
353     GHashTable *disk_table;
354     GHashTable *nfs_table;
355     GHashTable *evilfs_table;
356     GHashTable *reflinkfs_table;
357 } RmMountTable;
358 
359 /**
360  * @brief Allocates a new mounttable.
361  * @param force_fiemap Create random fiemap data always. Useful for testing.
362  *
363  * @return The mounttable. Free with rm_mounts_table_destroy.
364  */
365 RmMountTable *rm_mounts_table_new(bool force_fiemap);
366 
367 /**
368  * @brief Destroy a previously allocated mounttable.
369  *
370  * @param self the table to destroy.
371  */
372 void rm_mounts_table_destroy(RmMountTable *self);
373 
374 /**
375  * @brief Check if the device is or is part of a nonrotational device.
376  *
377  * This operation has constant time.
378  *
379  * @param self the table to lookup from.
380  * @param device the dev_t of a file, e.g. looked up from rm_sys_stat(2)
381  *
382  * @return true if it is non a nonrational device.
383  */
384 bool rm_mounts_is_nonrotational(RmMountTable *self, dev_t device);
385 
386 /**
387  * @brief Get the disk behind the partition.
388  *
389  * @param self the table to lookup from.
390  * @param partition the dev_t of a partition (sda1 -> 8:1), e.g. looked up from
391  *rm_sys_stat(2)
392  *
393  * @return the dev_t of the whole disk. (sda 8:0)
394  */
395 dev_t rm_mounts_get_disk_id(RmMountTable *self, dev_t dev, const char *path);
396 
397 /**
398  * @brief Same as above, but calls rm_sys_stat(2) on path for you.
399  */
400 dev_t rm_mounts_get_disk_id_by_path(RmMountTable *self, const char *path);
401 
402 /**
403  * @brief Indicates true if dev_t points to a filesystem that might confuse
404  * rmlint.
405  */
406 bool rm_mounts_is_evil(RmMountTable *self, dev_t to_check);
407 
408 /**
409  * @brief Indicates true if source and dest are on same partition, and the
410  * partition supports reflink copies (cp --reflink).
411  */
412 bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest);
413 
414 /////////////////////////////////
415 //    FIEMAP IMPLEMENATION     //
416 /////////////////////////////////
417 
418 /**
419  * @brief Lookup the physical offset of a file fd at any given offset.
420  *
421  * @return the physical offset starting from the disk.
422  */
423 RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last);
424 
425 /**
426  * @brief Lookup the physical offset of a file path at any given offset.
427  *
428  * @return the physical offset starting from the disk.
429  */
430 RmOff rm_offset_get_from_path(const char *path, RmOff file_offset,
431                               RmOff *file_offset_next);
432 
433 /**
434  * @brief Test if two files have identical fiemaps.
435  * @retval see RmOffsetsMatchCode enum definition.
436  */
437 RmLinkType rm_util_link_type(char *path1, char *path2);
438 
439 //////////////////////////////
440 //    TIMESTAMP HELPERS     //
441 //////////////////////////////
442 
443 /**
444  * @brief Parse an ISO8601 timestamp to a unix timestamp.
445  */
446 gdouble rm_iso8601_parse(const char *string);
447 
448 /**
449  * @brief convert a unix timestamp as iso8601 timestamp string.
450  *
451  * @param stamp unix timestamp
452  * @param buf result buffer to hold the string.
453  * @param buf_size sizeof buf.
454  *
455  * @return true if conversion succeeded.
456  */
457 bool rm_iso8601_format(time_t stamp, char *buf, gsize buf_size);
458 
459 ///////////////////////////////
460 //    THREADPOOL HELPERS     //
461 ///////////////////////////////
462 
463 /**
464  * @brief Create a new GThreadPool with default cfg.
465  *
466  * @param func func to execute
467  * @param data user_data to pass
468  * @param threads how many threads at max to use.
469  *
470  * @return newly allocated GThreadPool
471  */
472 GThreadPool *rm_util_thread_pool_new(GFunc func, gpointer data, int threads);
473 
474 /**
475  * @brief Push a new job to a threadpool.
476  *
477  * @return true on success.
478  */
479 bool rm_util_thread_pool_push(GThreadPool *pool, gpointer data);
480 
481 /**
482  * @brief Format some elapsed seconds into a human readable timestamp.
483  *
484  * @return The formatted string, free with g_free.
485  */
486 char *rm_format_elapsed_time(gfloat elapsed_sec, int sec_precision);
487 
488 typedef struct {
489     gdouble sum;
490     gdouble *values;
491 
492     int max_values;
493     int cursor;
494 } RmRunningMean;
495 
496 /**
497  * @brief Initialize a running mean window.
498  *
499  * The window has a fixed length. rm_running_mean_get() can be used
500  * to efficiently calculate the mean of this window. When new values
501  * are added, the oldest values will be removed.
502  */
503 void rm_running_mean_init(RmRunningMean *m, int max_values);
504 
505 /**
506  * @brief Add a new value to the mean window.
507  */
508 void rm_running_mean_add(RmRunningMean *m, gdouble value);
509 
510 /**
511  * @brief Get the current mean.
512  *
513  * @return The current mean (0.0 if no values available)
514  */
515 gdouble rm_running_mean_get(RmRunningMean *m);
516 
517 /**
518  * @brief Release internal mem used to store values.
519  */
520 void rm_running_mean_unref(RmRunningMean *m);
521 
522 /**
523  * @brief See GLib docs for g_canonicalize_filename().
524  */
525 gchar *rm_canonicalize_filename(const gchar *filename, const gchar *relative_to);
526 
527 #endif /* RM_UTILITIES_H_INCLUDE*/
528