1 /**
2 * This file is part of rmlint.
3 *
4 * rmlint is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * rmlint is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Authors:
18 *
19 * - Christopher <sahib> Pahl 2010-2020 (https://github.com/sahib)
20 * - Daniel <SeeSpotRun> T. 2014-2020 (https://github.com/SeeSpotRun)
21 *
22 * Hosted on http://github.com/sahib/rmlint
23 *
24 */
25
26 #ifndef RM_UTILITIES_H_INCLUDE
27 #define RM_UTILITIES_H_INCLUDE
28
29 #include <glib.h>
30 #include <stdbool.h>
31
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include <sys/uio.h>
35 #include <time.h>
36 #include <unistd.h>
37
38 /* Pat(h)tricia Trie implementation */
39 #include "pathtricia.h"
40
41 /* return values for rm_offsets_match */
42 typedef enum RmLinkType {
43 RM_LINK_REFLINK = EXIT_SUCCESS,
44 RM_LINK_NONE = EXIT_FAILURE,
45 RM_LINK_NOT_FILE = 3,
46 RM_LINK_WRONG_SIZE = 4,
47 RM_LINK_MAYBE_REFLINK = 5,
48 RM_LINK_SAME_FILE = 6,
49 RM_LINK_PATH_DOUBLE = 7,
50 RM_LINK_HARDLINK = 8,
51 RM_LINK_SYMLINK = 9,
52 RM_LINK_XDEV = 10,
53 RM_LINK_ERROR = 11,
54 } RmLinkType;
55
56
57 #if HAVE_STAT64 && !RM_IS_APPLE
58 typedef struct stat64 RmStat;
59 #else
60 typedef struct stat RmStat;
61 #endif
62
63 ////////////////////////
64 // MATHS SHORTCUTS //
65 ////////////////////////
66
67 // Signum function
68 #define SIGN(X) ((X) > 0 ? 1 : ((X) < 0 ? -1 : 0))
69
70 // Returns 1 if X>Y, -1 if X<Y or 0 if X==Y
71 #define SIGN_DIFF(X, Y) (((X) > (Y)) - ((X) < (Y))) /* handy for comparing uint64's */
72
73 // Compare two floats; tolerate +/- tol when testing for equality
74 // See also:
75 // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition
76 #define FLOAT_SIGN_DIFF(X, Y, tol) ((X) - (Y) > (tol) ? 1 : ((Y) - (X) > (tol) ? -1 : 0))
77
78 // Time tolerance (seconds) when comparing two mtimes
79 #define MTIME_TOL (0.000001)
80
81 #define RETURN_IF_NONZERO(X) \
82 if((X) != 0) { \
83 return (X); \
84 }
85
86 ////////////////////////////////////
87 // SYSCALL WRAPPERS //
88 ////////////////////////////////////
89
rm_sys_stat(const char * path,RmStat * buf)90 WARN_UNUSED_RESULT static inline int rm_sys_stat(const char *path, RmStat *buf) {
91 #if HAVE_STAT64 && !RM_IS_APPLE
92 return stat64(path, buf);
93 #else
94 return stat(path, buf);
95 #endif
96 }
97
rm_sys_lstat(const char * path,RmStat * buf)98 WARN_UNUSED_RESULT static inline int rm_sys_lstat(const char *path, RmStat *buf) {
99 #if HAVE_STAT64 && !RM_IS_APPLE
100 return lstat64(path, buf);
101 #else
102 return lstat(path, buf);
103 #endif
104 }
105
rm_sys_stat_mtime_float(RmStat * stat)106 static inline gdouble rm_sys_stat_mtime_float(RmStat *stat) {
107 #if RM_IS_APPLE
108 return (gdouble)stat->st_mtimespec.tv_sec + stat->st_mtimespec.tv_nsec / 1000000000.0;
109 #else
110 return (gdouble)stat->st_mtim.tv_sec + stat->st_mtim.tv_nsec / 1000000000.0;
111 #endif
112 }
113
rm_sys_open(const char * path,int mode)114 static inline int rm_sys_open(const char *path, int mode) {
115 #if HAVE_STAT64
116 #ifdef O_LARGEFILE
117 mode |= O_LARGEFILE;
118 #endif
119 #endif
120
121 return open(path, mode, (S_IRUSR | S_IWUSR));
122 }
123
rm_sys_close(int fd)124 static inline void rm_sys_close(int fd) {
125 if(close(fd) == -1) {
126 rm_log_perror("close(2) failed");
127 }
128 }
129
130 #ifndef HAVE_LSEEK64
131 #define lseek64 lseek
132 #endif
133
rm_sys_preadv(int fd,const struct iovec * iov,int iovcnt,RmOff offset)134 static inline gint64 rm_sys_preadv(int fd, const struct iovec *iov, int iovcnt,
135 RmOff offset) {
136 #if RM_IS_APPLE || RM_IS_CYGWIN
137 if(lseek(fd, offset, SEEK_SET) == -1) {
138 rm_log_perror("seek in emulated preadv failed");
139 return 0;
140 }
141 return readv(fd, iov, iovcnt);
142 #elif RM_PLATFORM_32
143 if(lseek64(fd, offset, SEEK_SET) == -1) {
144 rm_log_perror("seek in emulated preadv failed");
145 return 0;
146 }
147 return readv(fd, iov, iovcnt);
148 #else
149 return preadv(fd, iov, iovcnt, offset);
150 #endif
151 }
152
153 /////////////////////////////////////
154 // UID/GID VALIDITY CHECKING //
155 /////////////////////////////////////
156
157 typedef struct RmUserList {
158 GSequence *users;
159 GSequence *groups;
160 GMutex lock;
161 } RmUserList;
162
163 /**
164 * @brief Create a new list of users.
165 */
166 RmUserList *rm_userlist_new(void);
167
168 /**
169 * @brief Check if a uid and gid is contained in the list.
170 *
171 * @param valid_uid (out)
172 * @param valid_gid (out)
173 *
174 * @return true if both are valid.
175 */
176 bool rm_userlist_contains(RmUserList *list, unsigned long uid, unsigned gid,
177 bool *valid_uid, bool *valid_gid);
178
179 /**
180 * @brief Deallocate the memory allocated by rm_userlist_new()
181 */
182 void rm_userlist_destroy(RmUserList *list);
183
184 /**
185 * @brief Get the name of the user running rmlint.
186 */
187 char *rm_util_get_username(void);
188
189 /**
190 * @brief Get the group of the user running rmlint.
191 */
192 char *rm_util_get_groupname(void);
193
194 ////////////////////////////////////
195 // GENERAL UTILITES //
196 ////////////////////////////////////
197
198 #define RM_LIST_NEXT(node) ((node) ? node->next : NULL)
199
200 /**
201 * @brief Replace {subs} with {with} in {string}
202 *
203 * @return a newly allocated string, g_free it.
204 */
205 char *rm_util_strsub(const char *string, const char *subs, const char *with);
206
207 /**
208 * @brief Check if a file has an invalid gid/uid or both.
209 *
210 * @return the appropiate RmLintType for the file
211 */
212 int rm_util_uid_gid_check(RmStat *statp, RmUserList *userlist);
213
214 /**
215 * @brief Check if a file is a binary that is not stripped.
216 *
217 * @path: Path to the file to be checked.
218 * @statp: valid stat pointer with st_mode filled (allow-none).
219 *
220 * @return: if it is a binary with debug symbols.
221 */
222 bool rm_util_is_nonstripped(const char *path, RmStat *statp);
223
224 /**
225 * @brief Get the basename part of the file. It does not change filename.
226 *
227 * @return NULL on failure, the pointer after the last / on success.
228 */
229 char *rm_util_basename(const char *filename);
230
231 /**
232 * @brief Check if the file or any components of it are hidden.
233 *
234 * @return true if it is.
235 */
236 bool rm_util_path_is_hidden(const char *path);
237
238 /**
239 * @brief Get the depth of a path
240 *
241 * @param path
242 *
243 * @return depth of path or 0.
244 */
245 int rm_util_path_depth(const char *path);
246
247 typedef gpointer (*RmNewFunc)(void);
248
249 /**
250 * @brief A setdefault supplementary function for GHashTable.
251 *
252 * This is about the same as dict.setdefault in python.
253 *
254 * @param table the table to use
255 * @param key key to lookup
256 * @param default_func if the key does not exist in table, return default_func
257 * and insert it into table
258 *
259 * @return value, which may be default_func() if key does not exist.
260 */
261 GQueue *rm_hash_table_setdefault(GHashTable *table, gpointer key, RmNewFunc default_func);
262
263 /**
264 * @brief Push all elements in `src` at the tail of `dst`
265 *
266 * @param dest The queue to append to.
267 * @param src The queue to append from. Will be empty afterwards.
268 */
269 void rm_util_queue_push_tail_queue(GQueue *dest, GQueue *src);
270
271 /**
272 * @brief Function prototype for remove-iterating over a GQueue/GList/GSList.
273 *
274 * @param data current element
275 * @param user_data optional user_data
276 *
277 * @return True if the element should be removed.
278 */
279 typedef gint (*RmRFunc)(gpointer data, gpointer user_data);
280
281 /**
282 * @brief Iterate over a GQueue and call `func` on each element.
283 *
284 * If func returns true, the element is removed from the queue.
285 *
286 * @param queue GQueue to iterate
287 * @param func Function that evaluates the removal of the item
288 * @param user_data optional user data
289 *
290 * @return Number of removed items.
291 */
292 gint rm_util_queue_foreach_remove(GQueue *queue, RmRFunc func, gpointer user_data);
293
294 /**
295 * @brief Iterate over a GList and call `func` on each element.
296 *
297 * If func returns true, the element is removed from the GList.
298 *
299 * @param list pointer to GList to iterate
300 * @param func Function that evaluates the removal of the item
301 * @param user_data optional user data
302 *
303 * @return Number of removed items.
304 */
305 gint rm_util_list_foreach_remove(GList **list, RmRFunc func, gpointer user_data);
306
307 /**
308 * @brief Iterate over a GSList and call `func` on each element.
309 *
310 * If func returns true, the element is removed from the GSList.
311 *
312 * @param list pointer to GSList to iterate
313 * @param func Function that evaluates the removal of the item
314 * @param user_data optional user data
315 *
316 * @return Number of removed items.
317 */
318 gint rm_util_slist_foreach_remove(GSList **list, RmRFunc func, gpointer user_data);
319
320 /**
321 * @brief Pop the first element from a GSList
322 *
323 * @return pointer to the data associated with the popped element.
324 *
325 * Note this function returns null if the list is empty, or if the first item
326 * has NULL as its data.
327 */
328 gpointer rm_util_slist_pop(GSList **list, GMutex *lock);
329
330 /**
331 * @brief Return a pointer to the extension part of the file or NULL if none.
332 *
333 * @return: a pointer >= basename or NULL.
334 */
335 char *rm_util_path_extension(const char *basename);
336
337 /**
338 * @brief Get the inode of the directory of the file specified in path.
339 */
340 ino_t rm_util_parent_node(const char *path);
341
342 /*
343 * @brief Takes num and converts into some human readable string. 1024 -> 1KB
344 */
345 void rm_util_size_to_human_readable(RmOff num, char *in, gsize len);
346
347 /////////////////////////////////////
348 // MOUNTTABLE IMPLEMENTATION //
349 /////////////////////////////////////
350
351 typedef struct RmMountTable {
352 GHashTable *part_table;
353 GHashTable *disk_table;
354 GHashTable *nfs_table;
355 GHashTable *evilfs_table;
356 GHashTable *reflinkfs_table;
357 } RmMountTable;
358
359 /**
360 * @brief Allocates a new mounttable.
361 * @param force_fiemap Create random fiemap data always. Useful for testing.
362 *
363 * @return The mounttable. Free with rm_mounts_table_destroy.
364 */
365 RmMountTable *rm_mounts_table_new(bool force_fiemap);
366
367 /**
368 * @brief Destroy a previously allocated mounttable.
369 *
370 * @param self the table to destroy.
371 */
372 void rm_mounts_table_destroy(RmMountTable *self);
373
374 /**
375 * @brief Check if the device is or is part of a nonrotational device.
376 *
377 * This operation has constant time.
378 *
379 * @param self the table to lookup from.
380 * @param device the dev_t of a file, e.g. looked up from rm_sys_stat(2)
381 *
382 * @return true if it is non a nonrational device.
383 */
384 bool rm_mounts_is_nonrotational(RmMountTable *self, dev_t device);
385
386 /**
387 * @brief Get the disk behind the partition.
388 *
389 * @param self the table to lookup from.
390 * @param partition the dev_t of a partition (sda1 -> 8:1), e.g. looked up from
391 *rm_sys_stat(2)
392 *
393 * @return the dev_t of the whole disk. (sda 8:0)
394 */
395 dev_t rm_mounts_get_disk_id(RmMountTable *self, dev_t dev, const char *path);
396
397 /**
398 * @brief Same as above, but calls rm_sys_stat(2) on path for you.
399 */
400 dev_t rm_mounts_get_disk_id_by_path(RmMountTable *self, const char *path);
401
402 /**
403 * @brief Indicates true if dev_t points to a filesystem that might confuse
404 * rmlint.
405 */
406 bool rm_mounts_is_evil(RmMountTable *self, dev_t to_check);
407
408 /**
409 * @brief Indicates true if source and dest are on same partition, and the
410 * partition supports reflink copies (cp --reflink).
411 */
412 bool rm_mounts_can_reflink(RmMountTable *self, dev_t source, dev_t dest);
413
414 /////////////////////////////////
415 // FIEMAP IMPLEMENATION //
416 /////////////////////////////////
417
418 /**
419 * @brief Lookup the physical offset of a file fd at any given offset.
420 *
421 * @return the physical offset starting from the disk.
422 */
423 RmOff rm_offset_get_from_fd(int fd, RmOff file_offset, RmOff *file_offset_next, bool *is_last);
424
425 /**
426 * @brief Lookup the physical offset of a file path at any given offset.
427 *
428 * @return the physical offset starting from the disk.
429 */
430 RmOff rm_offset_get_from_path(const char *path, RmOff file_offset,
431 RmOff *file_offset_next);
432
433 /**
434 * @brief Test if two files have identical fiemaps.
435 * @retval see RmOffsetsMatchCode enum definition.
436 */
437 RmLinkType rm_util_link_type(char *path1, char *path2);
438
439 //////////////////////////////
440 // TIMESTAMP HELPERS //
441 //////////////////////////////
442
443 /**
444 * @brief Parse an ISO8601 timestamp to a unix timestamp.
445 */
446 gdouble rm_iso8601_parse(const char *string);
447
448 /**
449 * @brief convert a unix timestamp as iso8601 timestamp string.
450 *
451 * @param stamp unix timestamp
452 * @param buf result buffer to hold the string.
453 * @param buf_size sizeof buf.
454 *
455 * @return true if conversion succeeded.
456 */
457 bool rm_iso8601_format(time_t stamp, char *buf, gsize buf_size);
458
459 ///////////////////////////////
460 // THREADPOOL HELPERS //
461 ///////////////////////////////
462
463 /**
464 * @brief Create a new GThreadPool with default cfg.
465 *
466 * @param func func to execute
467 * @param data user_data to pass
468 * @param threads how many threads at max to use.
469 *
470 * @return newly allocated GThreadPool
471 */
472 GThreadPool *rm_util_thread_pool_new(GFunc func, gpointer data, int threads);
473
474 /**
475 * @brief Push a new job to a threadpool.
476 *
477 * @return true on success.
478 */
479 bool rm_util_thread_pool_push(GThreadPool *pool, gpointer data);
480
481 /**
482 * @brief Format some elapsed seconds into a human readable timestamp.
483 *
484 * @return The formatted string, free with g_free.
485 */
486 char *rm_format_elapsed_time(gfloat elapsed_sec, int sec_precision);
487
488 typedef struct {
489 gdouble sum;
490 gdouble *values;
491
492 int max_values;
493 int cursor;
494 } RmRunningMean;
495
496 /**
497 * @brief Initialize a running mean window.
498 *
499 * The window has a fixed length. rm_running_mean_get() can be used
500 * to efficiently calculate the mean of this window. When new values
501 * are added, the oldest values will be removed.
502 */
503 void rm_running_mean_init(RmRunningMean *m, int max_values);
504
505 /**
506 * @brief Add a new value to the mean window.
507 */
508 void rm_running_mean_add(RmRunningMean *m, gdouble value);
509
510 /**
511 * @brief Get the current mean.
512 *
513 * @return The current mean (0.0 if no values available)
514 */
515 gdouble rm_running_mean_get(RmRunningMean *m);
516
517 /**
518 * @brief Release internal mem used to store values.
519 */
520 void rm_running_mean_unref(RmRunningMean *m);
521
522 /**
523 * @brief See GLib docs for g_canonicalize_filename().
524 */
525 gchar *rm_canonicalize_filename(const gchar *filename, const gchar *relative_to);
526
527 #endif /* RM_UTILITIES_H_INCLUDE*/
528