1 /*-
2 * Copyright (c) 2011-2021 Ganael LAPLANCHE <ganael.laplanche@martymac.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include "types.h"
28 #include "utils.h"
29 #include "options.h"
30
31 /* log10(3) */
32 #include <math.h>
33
34 /* malloc(3) */
35 #include <stdlib.h>
36
37 /* fprintf(3), snprintf(3) */
38 #include <stdio.h>
39
40 /* fts(3) */
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #if defined(EMBED_FTS)
44 #include "fts.h"
45 #else
46 #include <fts.h>
47 #endif
48
49 /* strerror(3), strlen(3), strchr(3) */
50 #include <string.h>
51
52 /* errno */
53 #include <errno.h>
54
55 /* getcwd(3) */
56 #include <unistd.h>
57
58 /* MAXPATHLEN */
59 #include <sys/param.h>
60
61 /* assert(3) */
62 #include <assert.h>
63
64 /* opendir(3) */
65 #include <dirent.h>
66
67 /* fnmatch(3) */
68 #include <fnmatch.h>
69
70 /* isblank(3) */
71 #include <ctype.h>
72
73 /* strtoumax(3) */
74 #include <limits.h>
75 #include <inttypes.h>
76
77 /****************
78 Helper functions
79 ****************/
80
81 /* Convert a char (K, M, G, ...) to a size multiplier */
82 uintmax_t
char_to_multiplier(const char c)83 char_to_multiplier(const char c)
84 {
85 uintmax_t ret = 0;
86
87 switch(c) {
88 case 'k':
89 case 'K':
90 ret = 1 << 10;
91 break;
92 case 'm':
93 case 'M':
94 ret = 1 << 20;
95 break;
96 case 'g':
97 case 'G':
98 ret = 1 << 30;
99 break;
100 case 't':
101 case 'T':
102 ret = (uintmax_t)1 << 40;
103 break;
104 case 'p':
105 case 'P':
106 ret = (uintmax_t)1 << 50;
107 break;
108 }
109
110 return (ret);
111 }
112
113 /* Return the number of digits necessary to print i */
114 unsigned int
get_num_digits(double i)115 get_num_digits(double i)
116 {
117 if((int)i == 0)
118 return (1);
119
120 double logvalue = log10(i);
121 return (logvalue >= 0 ? (unsigned int)logvalue + 1 : 0);
122 }
123
124 /* Return the size of a file or directory
125 - a pointer to an existing stat must be provided
126
127 We assume that when that function is called, then the choice of including or
128 excluding the related file or directory has already been made. Thus,
129 exclusion list is only honored when computing size of a directory and when
130 depth is > 0 (i.e. we always accept the root dir but may skip subdirs).
131 */
132 fsize_t
get_size(char * file_path,struct stat * file_stat,struct program_options * options)133 get_size(char *file_path, struct stat *file_stat,
134 struct program_options *options)
135 {
136 assert(file_path != NULL);
137 assert(file_stat != NULL);
138 assert(options != NULL);
139
140 fsize_t file_size = 0; /* current return value */
141
142 /* if file_path is not a directory, return st_size for regular files (only).
143 We do *not* check for valid_file() here because if the function has been
144 called, then the choice of including the file has already been made
145 before */
146 if(!S_ISDIR(file_stat->st_mode)) {
147 return (S_ISREG(file_stat->st_mode) ? file_stat->st_size : 0);
148 }
149
150 /* directory, use fts */
151 FTS *ftsp = NULL;
152 FTSENT *p = NULL;
153 int fts_options = (options->follow_symbolic_links == OPT_FOLLOWSYMLINKS) ?
154 FTS_LOGICAL : FTS_PHYSICAL;
155 fts_options |= (options->cross_fs_boundaries == OPT_NOCROSSFSBOUNDARIES) ?
156 FTS_XDEV : 0;
157
158 char *fts_argv[] = { file_path, NULL };
159 if((ftsp = fts_open(fts_argv, fts_options, NULL)) == NULL) {
160 fprintf(stderr, "%s: fts_open()\n", file_path);
161 return (0);
162 }
163
164 while((p = fts_read(ftsp)) != NULL) {
165 switch (p->fts_info) {
166 case FTS_ERR: /* misc error */
167 case FTS_DNR: /* un-readable directory */
168 case FTS_NS: /* stat() error */
169 fprintf(stderr, "%s: %s\n", p->fts_path,
170 strerror(p->fts_errno));
171 case FTS_NSOK: /* no stat(2) available (not requested) */
172 continue;
173
174 case FTS_DC:
175 fprintf(stderr, "%s: filesystem loop detected\n", p->fts_path);
176 case FTS_DOT: /* ignore "." and ".." */
177 case FTS_DP:
178 continue;
179
180 case FTS_D:
181 /* Excluded directories do not account for returned size.
182 Always accept root dir here because, if the function has been
183 called, then the choice of including the directory has
184 already been made before */
185 if((!valid_file(p, options, VF_EXCLUDEONLY)) &&
186 (p->fts_level > 0)) {
187 #if defined(DEBUG)
188 fprintf(stderr, "%s(): skipping directory: %s\n", __func__,
189 p->fts_path);
190 #endif
191 fts_set(ftsp, p, FTS_SKIP);
192 }
193 continue;
194
195 default:
196 /* XXX default means remaining file types:
197 FTS_F, FTS_SL, FTS_SLNONE, FTS_DEFAULT */
198
199 /* Excluded files do not account for returned size */
200 if(!valid_file(p, options, VF_EXCLUDEONLY)) {
201 #if defined(DEBUG)
202 fprintf(stderr, "%s(): skipping file: %s\n", __func__,
203 p->fts_path);
204 #endif
205 }
206 else
207 file_size += p->fts_statp->st_size;
208 continue;
209 }
210 }
211
212 if(errno != 0)
213 fprintf(stderr, "%s: fts_read()\n", file_path);
214
215 if(fts_close(ftsp) < 0)
216 fprintf(stderr, "%s: fts_close()\n", file_path);
217
218 return (file_size);
219 }
220
221 /* Return absolute path for given path
222 - '/xxx' and '-' are considered absolute, e.g.
223 will not be prefixed by cwd. Everything else will.
224 - returned pointer must be manually freed later */
225 char *
abs_path(const char * path)226 abs_path(const char *path)
227 {
228 assert(path != NULL);
229
230 char *cwd = NULL; /* current working directory */
231 char *abs = NULL; /* will be returned */
232 size_t malloc_size = 0;
233
234 if(path[0] == '\0') {
235 errno = ENOENT;
236 return (NULL);
237 }
238
239 if((path[0] != '/') &&
240 ((path[0] != '-') || (path[1] != '\0'))) {
241 /* relative path given */
242 if_not_malloc(cwd, MAXPATHLEN,
243 return (NULL);
244 )
245 if(getcwd(cwd, MAXPATHLEN) == NULL) {
246 free(cwd);
247 return (NULL);
248 }
249 malloc_size += strlen(cwd) + 1; /* cwd + '/' */
250 }
251 malloc_size += strlen(path) + 1; /* path + '\0' */
252
253 if_not_malloc(abs, malloc_size,
254 /* just print error message (within macro code) */
255 )
256 else {
257 if(cwd != NULL)
258 snprintf(abs, malloc_size, "%s/%s", cwd, path);
259 else
260 snprintf(abs, malloc_size, "%s", path);
261 }
262
263 if(cwd != NULL)
264 free(cwd);
265
266 return (abs);
267 }
268
269 /* Push str into array and update num
270 - allocate memory for array if NULL
271 - return 0 (success) or 1 (failure) */
272 int
str_push(char *** array,unsigned int * num,const char * const str)273 str_push(char ***array, unsigned int *num, const char * const str)
274 {
275 assert(array != NULL);
276 assert(num != NULL);
277 assert(str != NULL);
278 assert(((*array == NULL) && (*num == 0)) ||
279 ((*num > 0) && (*array != NULL)));
280
281 /* allocate new string */
282 char *tmp_str = NULL;
283 size_t malloc_size = strlen(str) + 1;
284 if_not_malloc(tmp_str, malloc_size,
285 return (1);
286 )
287 snprintf(tmp_str, malloc_size, "%s", str);
288
289 /* add new char *pointer to array */
290 if_not_realloc(*array, sizeof(char *) * ((*num) + 1),
291 free(tmp_str);
292 return (1);
293 )
294 (*array)[*num] = tmp_str;
295 *num += 1;
296
297 return (0);
298 }
299
300 /* Cleanup str array
301 - remove and free() every str from array
302 - free() and NULL'ify array
303 - update num */
304 void
str_cleanup(char *** array,unsigned int * num)305 str_cleanup(char ***array, unsigned int *num)
306 {
307 assert(num != NULL);
308 assert(array != NULL);
309 assert(((*array == NULL) && (*num == 0)) ||
310 ((*num > 0) && (*array != NULL)));
311
312 while(*num > 0) {
313 if((*array)[(*num) - 1] != NULL) {
314 free((*array)[(*num) - 1]);
315 (*array)[(*num) - 1] = NULL;
316 *num -= 1;
317 }
318 }
319 free(*array);
320 *array = NULL;
321
322 return;
323 }
324
325 /* Check if a string begins with a '-' sign
326 - return 1 if it is the case, else 0 */
327 int
str_is_negative(const char * str)328 str_is_negative(const char *str)
329 {
330 assert(str != NULL);
331
332 /* skip blanks to test first character */
333 while(isblank(*str))
334 str++;
335
336 if(*str == '-')
337 return (1);
338 else
339 return (0);
340 }
341
342 /* Convert a str to a uintmax > 0
343 - support human-friendly multipliers
344 - only accept values > 0 as input
345 - return 0 if an error occurs */
346 uintmax_t
str_to_uintmax(const char * str,const unsigned char handle_multiplier)347 str_to_uintmax(const char *str, const unsigned char handle_multiplier)
348 {
349 assert(str != NULL);
350
351 char *endptr = NULL;
352 uintmax_t val = 0;
353 uintmax_t multiplier = 0;
354
355 /* check if a negative value has been provided */
356 if(str_is_negative(str))
357 return (0);
358
359 errno = 0;
360 val = strtoumax(str, &endptr, 10);
361 /* check that something was converted and refuse invalid values */
362 if((endptr == optarg) || (val == 0))
363 return (0);
364 /* check for other errors */
365 if(errno != 0) {
366 fprintf(stderr, "%s(): %s\n", __func__, strerror(errno));
367 return (0);
368 }
369 /* if characters remain, handle multiplier */
370 if(*endptr != '\0') {
371 /* return an error if we do not want to handle multiplier */
372 if(!handle_multiplier) {
373 fprintf(stderr, "%s(): %s\n", __func__, "unexpected unit provided");
374 return (0);
375 }
376
377 uintmax_t orig_val = val;
378 /* more than one character remain or invalid multiplier specified */
379 if ((*(endptr + 1) != '\0') ||
380 (multiplier = char_to_multiplier(*endptr)) == 0) {
381 fprintf(stderr, "%s(): %s\n", __func__, "unknown unit provided");
382 return (0);
383 }
384 /* check for overflow */
385 val *= multiplier;
386 if((val / multiplier) != orig_val) {
387 fprintf(stderr, "%s(): %s\n", __func__, strerror(ERANGE));
388 return (0);
389 }
390 }
391 #if defined(DEBUG)
392 fprintf(stderr, "%s(): converted string %s to value %ju\n", __func__,
393 optarg, val);
394 #endif
395 return (val);
396 }
397
398 /* Match an fts entry against an array of strings
399 - return 0 (no match) or 1 (match) */
400 int
file_match(const char * const * const array,const unsigned int num,const FTSENT * const p,const unsigned char ignore_case)401 file_match(const char * const * const array, const unsigned int num,
402 const FTSENT * const p, const unsigned char ignore_case)
403 {
404 assert(p != NULL);
405 assert(p->fts_name != NULL);
406 assert(p->fts_path != NULL);
407
408 if(array == NULL)
409 return (0);
410
411 unsigned int i = 0;
412 while(i < num) {
413 if(strchr(array[i], '/') == NULL) {
414 /* Current string contains a file name */
415 if(fnmatch(array[i], p->fts_name, FNM_PERIOD |
416 (ignore_case ? FNM_CASEFOLD : 0)) == 0)
417 return(1);
418 }
419 else {
420 /* Current string contains a path */
421 if(fnmatch(array[i], p->fts_path, FNM_PATHNAME | FNM_PERIOD |
422 (ignore_case ? FNM_CASEFOLD : 0)) == 0)
423 return(1);
424 }
425 i++;
426 }
427 return (0);
428 }
429
430 /* Validate a file regarding program options
431 - exclude_only (ignore include lists) is useful to:
432 - be able to crawl the entire file hierarchy (honoring include lists would
433 prevent the caller from entering a non-included directory and break
434 crawling)
435 - compute leaf directory size, when only exclude lists are needed
436 - return 0 if file is not valid, 1 if it is */
437 int
valid_file(const FTSENT * const p,struct program_options * options,unsigned char exclude_only)438 valid_file(const FTSENT * const p, struct program_options *options,
439 unsigned char exclude_only)
440 {
441 assert(p != NULL);
442 assert(p->fts_name != NULL);
443 assert(p->fts_path != NULL);
444 assert(options != NULL);
445
446 int valid = 1;
447
448 #if defined(DEBUG)
449 fprintf(stderr, "%s(): checking name validity (%s includes): %s (path: %s)\n",
450 __func__, exclude_only ? "without" : "with",
451 (p->fts_namelen > 0) ? p->fts_name : "<empty>", p->fts_path);
452 #endif
453
454 /* check for includes (options -y and -Y), if requested */
455 if(!exclude_only) {
456 if((options->include_files != NULL) ||
457 (options->include_files_ci != NULL)) {
458 /* switch to default exclude, unless file found in lists */
459 valid = 0;
460
461 if(file_match((const char * const * const)(options->include_files),
462 options->ninclude_files, p, 0) ||
463 file_match((const char * const * const)(options->include_files_ci),
464 options->ninclude_files_ci, p, 1))
465 valid = 1;
466 }
467 }
468
469 /* check for excludes (options -x and -X) */
470 if(file_match((const char * const * const)(options->exclude_files),
471 options->nexclude_files, p, 0) ||
472 file_match((const char * const * const)(options->exclude_files_ci),
473 options->nexclude_files_ci, p, 1))
474 valid = 0;
475
476 #if defined(DEBUG)
477 fprintf(stderr, "%s(): %s, validity: %s\n", __func__,
478 (p->fts_namelen > 0) ? p->fts_name : "<empty>",
479 valid ? "valid" : "invalid");
480 #endif
481
482 return (valid);
483 }
484
485 /* Create a copy of environ(7) and return its address
486 - return a pointer to the copy or NULL if error
487 - returned environ must be freed later */
488 char **
clone_env(void)489 clone_env(void)
490 {
491 unsigned int env_size = 0;
492 char **new_env = NULL;
493
494 /* import original environ */
495 extern char **environ;
496
497 /* compute environ size */
498 while(environ[env_size]) env_size++;
499 /* ending NULL */
500 env_size++;
501
502 size_t malloc_size = sizeof(char *) * env_size;
503 if_not_malloc(new_env, malloc_size,
504 /* just print error message (within macro code) */
505 )
506 else {
507 /* copy each pointer, beginning from the ending NULL value */
508 while(env_size > 0) {
509 new_env[env_size - 1] = environ[env_size - 1];
510 env_size--;
511 }
512 }
513 return (new_env);
514 }
515
516 /* Push a str pointer to a cloned environ(7)
517 - return enlarged environ through env
518 - returned environ must be freed later
519 - return 0 (success) or 1 (failure) */
520 int
push_env(char * str,char *** env)521 push_env(char *str, char ***env)
522 {
523 assert(str != NULL);
524 assert(env != NULL);
525 assert(*env != NULL);
526
527 unsigned int env_size = 0;
528 char **new_env = NULL;
529
530 /* compute environ size */
531 while((*env)[env_size]) env_size++;
532 /* add our pointer */
533 env_size++;
534 /* add ending NULL */
535 env_size++;
536
537 size_t malloc_size = sizeof(char *) * env_size;
538 if_not_malloc(new_env, malloc_size,
539 return (1);
540 )
541
542 /* copy each pointer, beginning from the ending NULL value */
543 new_env[env_size - 1] = NULL;
544 new_env[env_size - 2] = str;
545 env_size -= 2;
546 while(env_size > 0) {
547 new_env[env_size - 1] = (*env)[env_size - 1];
548 env_size--;
549 }
550
551 /* free previous environment and update env */
552 free(*env);
553 *env = new_env;
554
555 return (0);
556 }
557