1 /*-
2  * Copyright (c) 2011-2021 Ganael LAPLANCHE <ganael.laplanche@martymac.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "types.h"
28 #include "utils.h"
29 #include "options.h"
30 
31 /* log10(3) */
32 #include <math.h>
33 
34 /* malloc(3) */
35 #include <stdlib.h>
36 
37 /* fprintf(3), snprintf(3) */
38 #include <stdio.h>
39 
40 /* fts(3) */
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #if defined(EMBED_FTS)
44 #include "fts.h"
45 #else
46 #include <fts.h>
47 #endif
48 
49 /* strerror(3), strlen(3), strchr(3) */
50 #include <string.h>
51 
52 /* errno */
53 #include <errno.h>
54 
55 /* getcwd(3) */
56 #include <unistd.h>
57 
58 /* MAXPATHLEN */
59 #include <sys/param.h>
60 
61 /* assert(3) */
62 #include <assert.h>
63 
64 /* opendir(3) */
65 #include <dirent.h>
66 
67 /* fnmatch(3) */
68 #include <fnmatch.h>
69 
70 /* isblank(3) */
71 #include <ctype.h>
72 
73 /* strtoumax(3) */
74 #include <limits.h>
75 #include <inttypes.h>
76 
77 /****************
78  Helper functions
79  ****************/
80 
81 /* Convert a char (K, M, G, ...) to a size multiplier */
82 uintmax_t
char_to_multiplier(const char c)83 char_to_multiplier(const char c)
84 {
85     uintmax_t ret = 0;
86 
87     switch(c) {
88         case 'k':
89         case 'K':
90             ret = 1 << 10;
91             break;
92         case 'm':
93         case 'M':
94             ret = 1 << 20;
95             break;
96         case 'g':
97         case 'G':
98             ret = 1 << 30;
99             break;
100         case 't':
101         case 'T':
102             ret = (uintmax_t)1 << 40;
103             break;
104         case 'p':
105         case 'P':
106             ret = (uintmax_t)1 << 50;
107             break;
108     }
109 
110     return (ret);
111 }
112 
113 /* Return the number of digits necessary to print i */
114 unsigned int
get_num_digits(double i)115 get_num_digits(double i)
116 {
117     if((int)i == 0)
118         return (1);
119 
120     double logvalue = log10(i);
121     return (logvalue >= 0 ? (unsigned int)logvalue + 1 : 0);
122 }
123 
124 /* Return the size of a file or directory
125    - a pointer to an existing stat must be provided
126 
127    We assume that when that function is called, then the choice of including or
128    excluding the related file or directory has already been made. Thus,
129    exclusion list is only honored when computing size of a directory and when
130    depth is > 0 (i.e. we always accept the root dir but may skip subdirs).
131 */
132 fsize_t
get_size(char * file_path,struct stat * file_stat,struct program_options * options)133 get_size(char *file_path, struct stat *file_stat,
134     struct program_options *options)
135 {
136     assert(file_path != NULL);
137     assert(file_stat != NULL);
138     assert(options != NULL);
139 
140     fsize_t file_size = 0;  /* current return value */
141 
142     /* if file_path is not a directory, return st_size for regular files (only).
143        We do *not* check for valid_file() here because if the function has been
144        called, then the choice of including the file has already been made
145        before */
146     if(!S_ISDIR(file_stat->st_mode)) {
147         return (S_ISREG(file_stat->st_mode) ? file_stat->st_size : 0);
148     }
149 
150     /* directory, use fts */
151     FTS *ftsp = NULL;
152     FTSENT *p = NULL;
153     int fts_options = (options->follow_symbolic_links == OPT_FOLLOWSYMLINKS) ?
154         FTS_LOGICAL : FTS_PHYSICAL;
155     fts_options |= (options->cross_fs_boundaries == OPT_NOCROSSFSBOUNDARIES) ?
156         FTS_XDEV : 0;
157 
158     char *fts_argv[] = { file_path, NULL };
159     if((ftsp = fts_open(fts_argv, fts_options, NULL)) == NULL) {
160         fprintf(stderr, "%s: fts_open()\n", file_path);
161         return (0);
162     }
163 
164     while((p = fts_read(ftsp)) != NULL) {
165         switch (p->fts_info) {
166             case FTS_ERR:   /* misc error */
167             case FTS_DNR:   /* un-readable directory */
168             case FTS_NS:    /* stat() error */
169                 fprintf(stderr, "%s: %s\n", p->fts_path,
170                     strerror(p->fts_errno));
171             case FTS_NSOK: /* no stat(2) available (not requested) */
172                 continue;
173 
174             case FTS_DC:
175                 fprintf(stderr, "%s: filesystem loop detected\n", p->fts_path);
176             case FTS_DOT:  /* ignore "." and ".." */
177             case FTS_DP:
178                 continue;
179 
180             case FTS_D:
181                 /* Excluded directories do not account for returned size.
182                    Always accept root dir here because, if the function has been
183                    called, then the choice of including the directory has
184                    already been made before */
185                 if((!valid_file(p, options, VF_EXCLUDEONLY)) &&
186                     (p->fts_level > 0)) {
187 #if defined(DEBUG)
188                     fprintf(stderr, "%s(): skipping directory: %s\n", __func__,
189                         p->fts_path);
190 #endif
191                     fts_set(ftsp, p, FTS_SKIP);
192                 }
193                 continue;
194 
195             default:
196                 /* XXX default means remaining file types:
197                    FTS_F, FTS_SL, FTS_SLNONE, FTS_DEFAULT */
198 
199                 /* Excluded files do not account for returned size */
200                 if(!valid_file(p, options, VF_EXCLUDEONLY)) {
201 #if defined(DEBUG)
202                     fprintf(stderr, "%s(): skipping file: %s\n", __func__,
203                         p->fts_path);
204 #endif
205                 }
206                 else
207                     file_size += p->fts_statp->st_size;
208                 continue;
209         }
210     }
211 
212     if(errno != 0)
213         fprintf(stderr, "%s: fts_read()\n", file_path);
214 
215     if(fts_close(ftsp) < 0)
216         fprintf(stderr, "%s: fts_close()\n", file_path);
217 
218     return (file_size);
219 }
220 
221 /* Return absolute path for given path
222    - '/xxx' and '-' are considered absolute, e.g.
223      will not be prefixed by cwd. Everything else will.
224    - returned pointer must be manually freed later */
225 char *
abs_path(const char * path)226 abs_path(const char *path)
227 {
228     assert(path != NULL);
229 
230     char *cwd = NULL;       /* current working directory */
231     char *abs = NULL;       /* will be returned */
232     size_t malloc_size = 0;
233 
234     if(path[0] == '\0') {
235         errno = ENOENT;
236         return (NULL);
237     }
238 
239     if((path[0] != '/') &&
240         ((path[0] != '-') || (path[1] != '\0'))) {
241         /* relative path given */
242         if_not_malloc(cwd, MAXPATHLEN,
243             return (NULL);
244         )
245         if(getcwd(cwd, MAXPATHLEN) == NULL) {
246             free(cwd);
247             return (NULL);
248         }
249         malloc_size += strlen(cwd) + 1; /* cwd + '/' */
250     }
251     malloc_size += strlen(path) + 1; /* path + '\0' */
252 
253     if_not_malloc(abs, malloc_size,
254         /* just print error message (within macro code) */
255     )
256     else {
257         if(cwd != NULL)
258             snprintf(abs, malloc_size, "%s/%s", cwd, path);
259         else
260             snprintf(abs, malloc_size, "%s", path);
261     }
262 
263     if(cwd != NULL)
264         free(cwd);
265 
266     return (abs);
267 }
268 
269 /* Push str into array and update num
270    - allocate memory for array if NULL
271    - return 0 (success) or 1 (failure) */
272 int
str_push(char *** array,unsigned int * num,const char * const str)273 str_push(char ***array, unsigned int *num, const char * const str)
274 {
275     assert(array != NULL);
276     assert(num != NULL);
277     assert(str != NULL);
278     assert(((*array == NULL) && (*num == 0)) ||
279         ((*num > 0) && (*array != NULL)));
280 
281     /* allocate new string */
282     char *tmp_str = NULL;
283     size_t malloc_size = strlen(str) + 1;
284     if_not_malloc(tmp_str, malloc_size,
285         return (1);
286     )
287     snprintf(tmp_str, malloc_size, "%s", str);
288 
289     /* add new char *pointer to array */
290     if_not_realloc(*array, sizeof(char *) * ((*num) + 1),
291         free(tmp_str);
292         return (1);
293     )
294     (*array)[*num] = tmp_str;
295     *num += 1;
296 
297     return (0);
298 }
299 
300 /* Cleanup str array
301    - remove and free() every str from array
302    - free() and NULL'ify array
303    - update num */
304 void
str_cleanup(char *** array,unsigned int * num)305 str_cleanup(char ***array, unsigned int *num)
306 {
307     assert(num != NULL);
308     assert(array != NULL);
309     assert(((*array == NULL) && (*num == 0)) ||
310         ((*num > 0) && (*array != NULL)));
311 
312     while(*num > 0) {
313         if((*array)[(*num) - 1] != NULL) {
314             free((*array)[(*num) - 1]);
315             (*array)[(*num) - 1] = NULL;
316             *num -= 1;
317         }
318     }
319     free(*array);
320     *array = NULL;
321 
322     return;
323 }
324 
325 /* Check if a string begins with a '-' sign
326    - return 1 if it is the case, else 0 */
327 int
str_is_negative(const char * str)328 str_is_negative(const char *str)
329 {
330     assert(str != NULL);
331 
332     /* skip blanks to test first character */
333     while(isblank(*str))
334         str++;
335 
336     if(*str == '-')
337         return (1);
338     else
339         return (0);
340 }
341 
342 /* Convert a str to a uintmax > 0
343    - support human-friendly multipliers
344    - only accept values > 0 as input
345    - return 0 if an error occurs */
346 uintmax_t
str_to_uintmax(const char * str,const unsigned char handle_multiplier)347 str_to_uintmax(const char *str, const unsigned char handle_multiplier)
348 {
349     assert(str != NULL);
350 
351     char *endptr = NULL;
352     uintmax_t val = 0;
353     uintmax_t multiplier = 0;
354 
355     /* check if a negative value has been provided */
356     if(str_is_negative(str))
357         return (0);
358 
359     errno = 0;
360     val = strtoumax(str, &endptr, 10);
361     /* check that something was converted and refuse invalid values */
362     if((endptr == optarg) || (val == 0))
363         return (0);
364     /* check for other errors */
365     if(errno != 0) {
366         fprintf(stderr, "%s(): %s\n", __func__, strerror(errno));
367         return (0);
368     }
369     /* if characters remain, handle multiplier */
370     if(*endptr != '\0') {
371         /* return an error if we do not want to handle multiplier */
372         if(!handle_multiplier) {
373             fprintf(stderr, "%s(): %s\n", __func__, "unexpected unit provided");
374             return (0);
375         }
376 
377         uintmax_t orig_val = val;
378         /* more than one character remain or invalid multiplier specified */
379         if ((*(endptr + 1) != '\0') ||
380             (multiplier = char_to_multiplier(*endptr)) == 0) {
381             fprintf(stderr, "%s(): %s\n", __func__, "unknown unit provided");
382             return (0);
383         }
384         /* check for overflow */
385         val *= multiplier;
386         if((val / multiplier) != orig_val) {
387             fprintf(stderr, "%s(): %s\n", __func__, strerror(ERANGE));
388             return (0);
389         }
390     }
391 #if defined(DEBUG)
392     fprintf(stderr, "%s(): converted string %s to value %ju\n", __func__,
393         optarg, val);
394 #endif
395     return (val);
396 }
397 
398 /* Match an fts entry against an array of strings
399    - return 0 (no match) or 1 (match) */
400 int
file_match(const char * const * const array,const unsigned int num,const FTSENT * const p,const unsigned char ignore_case)401 file_match(const char * const * const array, const unsigned int num,
402     const FTSENT * const p, const unsigned char ignore_case)
403 {
404     assert(p != NULL);
405     assert(p->fts_name != NULL);
406     assert(p->fts_path != NULL);
407 
408     if(array == NULL)
409         return (0);
410 
411     unsigned int i = 0;
412     while(i < num) {
413         if(strchr(array[i], '/') == NULL) {
414             /* Current string contains a file name */
415             if(fnmatch(array[i], p->fts_name, FNM_PERIOD |
416                 (ignore_case ? FNM_CASEFOLD : 0)) == 0)
417                 return(1);
418         }
419         else {
420             /* Current string contains a path */
421             if(fnmatch(array[i], p->fts_path, FNM_PATHNAME | FNM_PERIOD |
422                 (ignore_case ? FNM_CASEFOLD : 0)) == 0)
423                 return(1);
424         }
425         i++;
426     }
427     return (0);
428 }
429 
430 /* Validate a file regarding program options
431    - exclude_only (ignore include lists) is useful to:
432      - be able to crawl the entire file hierarchy (honoring include lists would
433        prevent the caller from entering a non-included directory and break
434        crawling)
435      - compute leaf directory size, when only exclude lists are needed
436    - return 0 if file is not valid, 1 if it is */
437 int
valid_file(const FTSENT * const p,struct program_options * options,unsigned char exclude_only)438 valid_file(const FTSENT * const p, struct program_options *options,
439     unsigned char exclude_only)
440 {
441     assert(p != NULL);
442     assert(p->fts_name != NULL);
443     assert(p->fts_path != NULL);
444     assert(options != NULL);
445 
446     int valid = 1;
447 
448 #if defined(DEBUG)
449     fprintf(stderr, "%s(): checking name validity (%s includes): %s (path: %s)\n",
450         __func__, exclude_only ? "without" : "with",
451         (p->fts_namelen > 0) ? p->fts_name : "<empty>", p->fts_path);
452 #endif
453 
454     /* check for includes (options -y and -Y), if requested */
455     if(!exclude_only) {
456         if((options->include_files != NULL) ||
457             (options->include_files_ci != NULL)) {
458             /* switch to default exclude, unless file found in lists */
459             valid = 0;
460 
461             if(file_match((const char * const * const)(options->include_files),
462                 options->ninclude_files, p, 0) ||
463                 file_match((const char * const * const)(options->include_files_ci),
464                 options->ninclude_files_ci, p, 1))
465                 valid = 1;
466         }
467     }
468 
469     /* check for excludes (options -x and -X) */
470     if(file_match((const char * const * const)(options->exclude_files),
471         options->nexclude_files, p, 0) ||
472         file_match((const char * const * const)(options->exclude_files_ci),
473         options->nexclude_files_ci, p, 1))
474         valid = 0;
475 
476 #if defined(DEBUG)
477     fprintf(stderr, "%s(): %s, validity: %s\n", __func__,
478         (p->fts_namelen > 0) ? p->fts_name : "<empty>",
479         valid ? "valid" : "invalid");
480 #endif
481 
482     return (valid);
483 }
484 
485 /* Create a copy of environ(7) and return its address
486    - return a pointer to the copy or NULL if error
487    - returned environ must be freed later */
488 char **
clone_env(void)489 clone_env(void)
490 {
491     unsigned int env_size = 0;
492     char **new_env = NULL;
493 
494     /* import original environ */
495     extern char **environ;
496 
497     /* compute environ size */
498     while(environ[env_size]) env_size++;
499     /* ending NULL */
500     env_size++;
501 
502     size_t malloc_size = sizeof(char *) * env_size;
503     if_not_malloc(new_env, malloc_size,
504         /* just print error message (within macro code) */
505     )
506     else {
507         /* copy each pointer, beginning from the ending NULL value */
508         while(env_size > 0) {
509             new_env[env_size - 1] = environ[env_size - 1];
510             env_size--;
511         }
512     }
513     return (new_env);
514 }
515 
516 /* Push a str pointer to a cloned environ(7)
517    - return enlarged environ through env
518    - returned environ must be freed later
519    - return 0 (success) or 1 (failure) */
520 int
push_env(char * str,char *** env)521 push_env(char *str, char ***env)
522 {
523     assert(str != NULL);
524     assert(env != NULL);
525     assert(*env != NULL);
526 
527     unsigned int env_size = 0;
528     char **new_env = NULL;
529 
530     /* compute environ size */
531     while((*env)[env_size]) env_size++;
532     /* add our pointer */
533     env_size++;
534     /* add ending NULL */
535     env_size++;
536 
537     size_t malloc_size = sizeof(char *) * env_size;
538     if_not_malloc(new_env, malloc_size,
539         return (1);
540     )
541 
542     /* copy each pointer, beginning from the ending NULL value */
543     new_env[env_size - 1] = NULL;
544     new_env[env_size - 2] = str;
545     env_size -= 2;
546     while(env_size > 0) {
547         new_env[env_size - 1] = (*env)[env_size - 1];
548         env_size--;
549     }
550 
551     /* free previous environment and update env */
552     free(*env);
553     *env = new_env;
554 
555     return (0);
556 }
557