1 /*  $Id: fastrm.c 10283 2018-05-14 12:43:05Z iulius $
2 **
3 **  Delete a list of filenames or tokens from stdin.
4 **
5 **  Originally written by <kre@munnari.oz.au> (to only handle files).
6 **
7 **  Files that can't be unlinked because they didn't exist are considered
8 **  okay.  Any error condition results in exiting with non-zero exit
9 **  status.  Input lines in the form @...@ are taken to be storage API
10 **  tokens.  Input filenames should be fully qualified.  For maximum
11 **  efficiency, input filenames should be sorted; fastrm will cd into each
12 **  directory to avoid additional directory lookups when removing a lot of
13 **  files in a single directory.
14 */
15 
16 #include "config.h"
17 #include "clibrary.h"
18 #include <ctype.h>
19 #include <dirent.h>
20 #include <errno.h>
21 #include <sys/stat.h>
22 #include <syslog.h>
23 
24 #include "inn/innconf.h"
25 #include "inn/messages.h"
26 #include "inn/qio.h"
27 #include "inn/libinn.h"
28 #include "inn/storage.h"
29 
30 /* We reject any path names longer than this. */
31 #define MAX_DIR_LEN 2048
32 
33 /* Data structure for a list of files in a single directory. */
34 typedef struct filelist {
35     int count;
36     int size;
37     char *dir;
38     char **files;
39 } filelist;
40 
41 /* All relative paths are relative to this directory. */
42 static char *base_dir = NULL;
43 
44 /* The absolute path of the current working directory. */
45 static char current_dir[MAX_DIR_LEN];
46 
47 /* The prefix for the files that we're currently working with.  We sometimes
48    also use this as working space for forming file names to remove, so give
49    ourselves a bit of additional leeway just in case. */
50 static char prefix_dir[MAX_DIR_LEN * 2];
51 static int prefix_len;
52 
53 /* Some threshold values that govern the optimizations that we are willing
54    to perform.  chdir_threshold determines how many files to be removed we
55    want in a directory before we chdir to that directory.  sort_threshold
56    determines how many files must be in a directory before we use readdir to
57    remove them in order.  relative_threshold determines how many levels of
58    "../" we're willing to try to use to move to the next directory rather
59    than just calling chdir with the new absolute path. */
60 static int chdir_threshold = 3;
61 static int relative_threshold = 0;
62 static int sort_threshold = 0;
63 
64 /* True if we should only print what we would do, not actually do it. */
65 static bool debug_only = false;
66 
67 /* A string used for constructing relative paths. */
68 static const char dotdots[] = "../../../../";
69 
70 /* The number of errors encountered, used to determine exit status. */
71 static int error_count = 0;
72 
73 /* Whether the storage manager has been initialized. */
74 static bool sm_initialized = false;
75 
76 /* True if unlink may be able to remove directories. */
77 static bool unlink_dangerous = false;
78 
79 
80 
81 /*
82 **  Sorting predicate for qsort and bsearch.
83 */
84 static int
file_compare(const void * a,const void * b)85 file_compare(const void *a, const void *b)
86 {
87     const char *f1, *f2;
88 
89     f1 = *((const char *const *) a);
90     f2 = *((const char *const *) b);
91     return strcmp(f1, f2);
92 }
93 
94 
95 /*
96 **  Create a new filelist.
97 */
98 static filelist *
filelist_new(char * dir)99 filelist_new(char *dir)
100 {
101     filelist *new;
102 
103     new = xmalloc(sizeof(filelist));
104     new->count = 0;
105     new->size = 0;
106     new->dir = dir;
107     new->files = NULL;
108     return new;
109 }
110 
111 
112 /*
113 **  Insert a file name into a list of files (unsorted).
114 */
115 static void
filelist_insert(filelist * list,char * name)116 filelist_insert(filelist *list, char *name)
117 {
118     if (list->count == list->size) {
119         list->size = (list->size == 0) ? 16 : list->size * 2;
120         list->files = xrealloc(list->files, list->size * sizeof(char *));
121     }
122     list->files[list->count++] = xstrdup(name);
123 }
124 
125 
126 /*
127 **  Find a file name in a sorted list of files.
128 */
129 static char *
filelist_lookup(filelist * list,const char * name)130 filelist_lookup(filelist *list, const char *name)
131 {
132     char **p;
133 
134     p = bsearch(&name, list->files, list->count, sizeof(char *),
135                 file_compare);
136     return (p == NULL ? NULL : *p);
137 }
138 
139 
140 /*
141 **  Empty a list of files, freeing all of the names but keeping the
142 **  structure intact.
143 */
144 static void
filelist_empty(filelist * list)145 filelist_empty(filelist *list)
146 {
147     int i;
148 
149     if (list->files == NULL)
150         return;
151     for (i = 0; i < list->count; i++)
152         free(list->files[i]);
153     list->count = 0;
154 }
155 
156 
157 /*
158 **  Free a list of files.
159 */
160 static void
filelist_free(filelist * list)161 filelist_free(filelist *list)
162 {
163     filelist_empty(list);
164     if (list->files != NULL)
165         free(list->files);
166     if (list->dir != NULL)
167         free(list->dir);
168     free(list);
169 }
170 
171 
172 /*
173 **  Exit handler for die.  Shut down the storage manager before exiting.
174 */
175 static int
sm_cleanup(void)176 sm_cleanup(void)
177 {
178     SMshutdown();
179     return 1;
180 }
181 
182 
183 /*
184 **  Initialize the storage manager.  This includes parsing inn.conf, which
185 **  fastrm doesn't need for any other purpose.
186 */
187 static void
sm_initialize(void)188 sm_initialize(void)
189 {
190     bool value;
191 
192     if (!innconf_read(NULL))
193         exit(1);
194     value = true;
195     if (!SMsetup(SM_RDWR, &value) || !SMsetup(SM_PREOPEN, &value))
196         die("can't set up storage manager");
197     if (!SMinit())
198         die("can't initialize storage manager: %s", SMerrorstr);
199     sm_initialized = true;
200     message_fatal_cleanup = sm_cleanup;
201 }
202 
203 
204 /*
205 **  Get a line from a given QIO stream, returning a pointer to it.  Warn
206 **  about and then skip lines that are too long.  Returns NULL at EOF or on
207 **  an error.
208 */
209 static char *
get_line(QIOSTATE * qp)210 get_line(QIOSTATE *qp)
211 {
212     static int count;
213     char *p;
214 
215     p = QIOread(qp);
216     count++;
217     while (QIOtoolong(qp) || (p != NULL && strlen(p) >= MAX_DIR_LEN)) {
218         warn("line %d too long", count);
219         error_count++;
220         p = QIOread(qp);
221     }
222     if (p == NULL) {
223         if (QIOerror(qp)) {
224             syswarn("read error");
225             error_count++;
226         }
227         return NULL;
228     }
229     return p;
230 }
231 
232 
233 /*
234 **  Read lines from stdin (including the first that may have been there
235 **  from our last time in) until we reach EOF or until we get a line that
236 **  names a file not in the same directory as the previous lot.  Remember
237 **  the file names in the directory we're examining and return the list.
238 */
239 static filelist *
process_line(QIOSTATE * qp,int * queued,int * deleted)240 process_line(QIOSTATE *qp, int *queued, int *deleted)
241 {
242     static char *line = NULL;
243     filelist *list = NULL;
244     char *p;
245     char *dir = NULL;
246     int dlen = -1;
247 
248     *queued = 0;
249     *deleted = 0;
250 
251     if (line == NULL)
252         line = get_line(qp);
253 
254     for (; line != NULL; line = get_line(qp)) {
255         if (IsToken(line)) {
256             (*deleted)++;
257             if (debug_only) {
258                 printf("Token %s\n", line);
259                 continue;
260             }
261             if (!sm_initialized)
262                 sm_initialize();
263             if (!SMcancel(TextToToken(line)))
264                 if (SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) {
265                     warn("can't cancel %s", line);
266                     error_count++;
267                 }
268         } else {
269             if (list == NULL) {
270                 p = strrchr(line, '/');
271                 if (p != NULL) {
272                     *p++ = '\0';
273                     dlen = strlen(line);
274                     dir = xstrdup(line);
275                 } else {
276                     dlen = -1;
277                     dir = NULL;
278                 }
279                 list = filelist_new(dir);
280             } else {
281                 if ((dlen < 0 && strchr(line, '/'))
282                     || (dlen >= 0 && (line[dlen] != '/'
283                                       || strchr(line + dlen + 1, '/')
284                                       || strncmp(dir, line, dlen))))
285                     return list;
286             }
287             filelist_insert(list, line + dlen + 1);
288             (*queued)++;
289         }
290     }
291     return list;
292 }
293 
294 
295 /*
296 **  Copy n leading segments of a path.
297 */
298 static void
copy_segments(char * to,const char * from,int n)299 copy_segments(char *to, const char *from, int n)
300 {
301     char c;
302 
303     for (c = *from++; c != '\0'; c = *from++) {
304         if (c == '/' && --n <= 0)
305             break;
306         *to++ = c;
307     }
308     *to = '\0';
309 }
310 
311 
312 /*
313 **  Return the count of path segments in a file name (the number of
314 **  slashes).
315 */
316 static int
slashcount(char * name)317 slashcount(char *name)
318 {
319     int i;
320 
321     for (i = 0; *name != '\0'; name++)
322         if (*name == '/')
323             i++;
324     return i;
325 }
326 
327 
328 /*
329 **  Unlink a file, reporting errors if the unlink fails for a reason other
330 **  than the file not existing doesn't exist.  Be careful to avoid unlinking
331 **  a directory if unlink_dangerous is true.
332 */
333 static void
unlink_file(const char * file)334 unlink_file(const char *file)
335 {
336     struct stat st;
337 
338     /* On some systems, unlink will remove directories if used by root.  If
339        we're running as root, unlink_dangerous will be set, and we need to
340        make sure that the file isn't a directory first. */
341     if (unlink_dangerous) {
342         if (stat(file, &st) < 0) {
343             if (errno != ENOENT) {
344                 if (*file == '/')
345                     syswarn("can't stat %s", file);
346                 else
347                     syswarn("can't stat %s in %s", file, current_dir);
348                 error_count++;
349             }
350             return;
351         }
352         if (S_ISDIR(st.st_mode)) {
353             if (*file == '/')
354                 syswarn("%s is a directory", file);
355             else
356                 syswarn("%s in %s is a directory", file, current_dir);
357             error_count++;
358             return;
359         }
360     }
361 
362     if (debug_only) {
363         if (*file != '/')
364             printf("%s / ", current_dir);
365         printf("%s\n", file);
366         return;
367     }
368 
369     if (unlink(file) < 0 && errno != ENOENT) {
370         if (*file == '/')
371             syswarn("can't unlink %s", file);
372         else
373             syswarn("can't unlink %s in %s", file, current_dir);
374     }
375 }
376 
377 
378 /*
379 **  A wrapper around chdir that dies if chdir fails for a reason other than
380 **  the directory not existing, returns false if the directory doesn't
381 **  exist (reporting an error), and otherwise returns true.  It also checks
382 **  to make sure that filecount is larger than chdir_threshold, and if it
383 **  isn't it instead just sets prefix_dir and prefix_len to point to the new
384 **  directory without changing the working directory.
385 */
386 static bool
chdir_checked(const char * path,int filecount)387 chdir_checked(const char *path, int filecount)
388 {
389     if (filecount < chdir_threshold) {
390         strlcpy(prefix_dir, path, sizeof(prefix_dir));
391         prefix_len = strlen(path);
392     } else {
393         prefix_len = 0;
394         if (chdir(path) < 0) {
395             if (errno != ENOENT)
396                 sysdie("can't chdir from %s to %s", current_dir, path);
397             else {
398                 syswarn("can't chdir from %s to %s", current_dir, path);
399                 return false;
400             }
401         }
402     }
403     return true;
404 }
405 
406 
407 /*
408 **  Set our environment (process working directory, and global vars) to
409 **  reflect a change of directory to dir (relative to base_dir if dir is not
410 **  an absolute path).  We're likely to want to do different things
411 **  depending on the amount of work to do in dir, so we also take the number
412 **  of files to remove in dir as the second argument.  Return false if the
413 **  directory doesn't exist (and therefore all files in it have already been
414 **  removed; otherwise, return true.
415 */
416 static bool
setup_dir(char * dir,int filecount)417 setup_dir(char *dir, int filecount)
418 {
419     char *p, *q, *absolute;
420     char path[MAX_DIR_LEN];
421     int base_depth, depth;
422 
423     /* Set absolute to the absolute path to the new directory. */
424     if (dir == NULL)
425         absolute = base_dir;
426     else if (*dir == '/')
427         absolute = dir;
428     else if (*dir == '\0') {
429         strlcpy(path, "/", sizeof(path));
430         absolute = path;
431     } else {
432         /* Strip off leading "./". */
433         while (dir[0] == '.' && dir[1] == '/')
434             for (dir += 2; *dir == '/'; dir++)
435                 ;
436 
437         /* Handle any leading "../", but only up to the number of segments
438            in base_dir. */
439         base_depth = slashcount(base_dir);
440         while (base_depth > 0 && strncmp(dir, "../", 3) == 0)
441             for (base_depth--, dir += 3; *dir == '/'; dir++)
442                 ;
443         if (base_depth <= 0)
444             die("too many ../'s in path %s", dir);
445         copy_segments(path, base_dir, base_depth + 1);
446         if (strlen(path) + strlen(dir) + 2 > MAX_DIR_LEN)
447             die("path %s too long", dir);
448         strlcat(path, "/", sizeof(path));
449         strlcat(path, dir, sizeof(path));
450         absolute = path;
451     }
452 
453     /* Find the first point of difference between absolute and current_dir.
454        If there is no difference, we're done; we're changing to the same
455        directory we were in (this is probably some sort of error, but can
456        happen with odd relative paths). */
457     for (p = absolute, q = current_dir; *p == *q; p++, q++)
458         if (*p == '\0')
459             return true;
460 
461     /* If we reached the end of current_dir and there's more left of
462        absolute, we're changing to a subdirectory of where we were. */
463     if (*q == '\0' && *p == '/') {
464         p++;
465         if (!chdir_checked(p, filecount))
466             return false;
467         if (prefix_len == 0)
468             strlcpy(current_dir, absolute, sizeof(current_dir));
469         return true;
470     }
471 
472     /* Otherwise, if we were promised that we have a pure tree (in other
473        words, no symbolic links to directories), see if it's worth going up
474        the tree with ".." and then down again rather than chdir to the
475        absolute path.  relative_threshold determines how many levels of ".."
476        we're willing to use; the default of 1 seems fractionally faster than
477        2 and 0 indicates to always use absolute paths.  Values larger than 3
478        would require extending the dotdots string, but are unlikely to be
479        worth it.
480 
481        FIXME: It's too hard to figure out what this code does.  It needs to be
482        rewritten. */
483     if (*p != '\0' && relative_threshold > 0) {
484         depth = slashcount(q);
485         if (depth <= relative_threshold) {
486             while (p > absolute && *--p != '/')
487                 ;
488             p++;
489             strlcpy(prefix_dir, dotdots + 9 - depth * 3, sizeof(prefix_dir));
490             strlcat(prefix_dir, p, sizeof(prefix_dir));
491             if (!chdir_checked(prefix_dir, filecount))
492                 return false;
493 
494             /* Now patch up current_dir to reflect where we are. */
495             if (prefix_len == 0) {
496                 while (q > current_dir && *--q != '/')
497                     ;
498                 q[1] = '\0';
499                 strlcat(current_dir, p, sizeof(current_dir));
500             }
501             return true;
502         }
503     }
504 
505     /* All else has failed; just use the absolute path.  This includes the
506        case where current_dir is a subdirectory of absolute, in which case
507        it may be somewhat faster to use chdir("../..") or the like rather
508        than the absolute path, but this case rarely happens when the user
509        cares about speed (it usually doesn't happen with sorted input).  So
510        we don't bother. */
511     if (!chdir_checked(absolute, filecount))
512         return false;
513     if (prefix_len == 0)
514         strlcpy(current_dir, absolute, sizeof(current_dir));
515     return true;
516 }
517 
518 
519 /*
520 **  Process a filelist of files to be deleted, all in the same directory.
521 */
522 static void
unlink_filelist(filelist * list,int filecount)523 unlink_filelist(filelist *list, int filecount)
524 {
525     bool sorted;
526     DIR *dir;
527     struct dirent *entry;
528     char *file;
529     int i;
530 
531     /* If setup_dir returns false, the directory doesn't exist and we're
532        already all done. */
533     if (!setup_dir(list->dir, filecount)) {
534         filelist_free(list);
535         return;
536     }
537 
538     /* We'll use prefix_dir as a buffer to write each file name into as we
539        go, so get it set up. */
540     if (prefix_len == 0)
541         file = prefix_dir;
542     else {
543         prefix_dir[prefix_len++] = '/';
544         file = prefix_dir + prefix_len;
545         *file = '\0';
546     }
547 
548     /* If we're not sorting directories or if the number of files is under
549        the threshold, just remove the files. */
550     if (sort_threshold == 0 || filecount < sort_threshold) {
551         for (i = 0; i < list->count; i++) {
552             strlcpy(file, list->files[i], sizeof(prefix_dir) - prefix_len);
553             unlink_file(prefix_dir);
554         }
555         filelist_free(list);
556         return;
557     }
558 
559     /* We have enough files to remove in this directory that it's worth
560        optimizing.  First, make sure the list of files is sorted.  It's not
561        uncommon for the files to already be sorted, so check first. */
562     for (sorted = true, i = 1; sorted && i < list->count; i++)
563         sorted = (strcmp(list->files[i - 1], list->files[i]) <= 0);
564     if (!sorted)
565         qsort(list->files, list->count, sizeof(char *), file_compare);
566 
567     /* Now, begin doing our optimized unlinks.  The technique we use is to
568        open the directory containing the files and read through it, checking
569        each file in the directory to see if it's one of the files we should
570        be removing.  The theory is that we want to minimize the amount of
571        time the operating system spends doing string compares trying to find
572        the file to be removed in the directory.  This is often an O(n)
573        operation.  Note that this optimization may slightly slow more
574        effecient operating systems. */
575     dir = opendir(prefix_len == 0 ? "." : prefix_dir);
576     if (dir == NULL) {
577         if (prefix_len > 0 && prefix_dir[0] == '/')
578             warn("can't open directory %s", prefix_dir);
579         else
580             warn("can't open directory %s in %s",
581                  (prefix_len == 0) ? "." : prefix_dir, current_dir);
582         error_count++;
583         filelist_free(list);
584         return;
585     }
586     for (i = 0, entry = readdir(dir); entry != NULL; entry = readdir(dir))
587         if (filelist_lookup(list, entry->d_name) != NULL) {
588             i++;
589             strlcpy(file, entry->d_name, sizeof(prefix_dir) - prefix_len);
590             unlink_file(prefix_dir);
591             if (i == list->count)
592                 break;
593         }
594     closedir(dir);
595     filelist_free(list);
596 }
597 
598 
599 /*
600 **  Check a path to see if it's okay (not likely to confuse us).  This
601 **  ensures that it doesn't contain elements like "./" or "../" and doesn't
602 **  contain doubled slashes.
603 */
604 static bool
bad_path(const char * p)605 bad_path(const char *p)
606 {
607     if (strlen(p) >= MAX_DIR_LEN)
608         return true;
609     while (*p) {
610         if (p[0] == '.' && (p[1] == '/' || (p[1] == '.' && p[2] == '/')))
611             return true;
612         while (*p && *p != '/')
613             p++;
614         if (p[0] == '/' && p[1] == '/')
615             return true;
616         if (*p == '/')
617             p++;
618     }
619     return false;
620 }
621 
622 
623 /*
624 **  Main routine.  Parse options, initialize the storage manager, and
625 **  initialize various global variables, and then go into a loop calling
626 **  process_line and unlink_filelist as needed.
627 */
628 int
main(int argc,char * argv[])629 main(int argc, char *argv[])
630 {
631     const char *name;
632     char *p, **arg;
633     QIOSTATE *qp;
634     filelist *list;
635     int filecount, deleted;
636     bool empty_error = false;
637 
638     /* Establish our identity.  Since we use the storage manager, we need to
639        set up syslog as well, although we won't use it ourselves. */
640     name = argv[0];
641     if (*name == '\0')
642         name = "fastrm";
643     else {
644         p = strrchr(name, '/');
645         if (p != NULL)
646             name = p + 1;
647     }
648     message_program_name = name;
649     openlog(name, LOG_CONS | LOG_PID, LOG_INN_PROG);
650 
651     /* If we're running as root, unlink may remove directories. */
652     unlink_dangerous = (geteuid() == 0);
653 
654     /* Unfortunately, we can't use getopt, because several of our options
655        take optional arguments.  Bleh. */
656     arg = argv + 1;
657     while (argc >= 2 && **arg == '-') {
658         p = *arg;
659         while (*++p) {
660             switch (*p) {
661             default:
662                 die("invalid option -- %c", *p);
663             case 'a':
664             case 'r':
665                 continue;
666             case 'c':
667                 chdir_threshold = 1;
668                 if (!isdigit((unsigned char) p[1]))
669                     continue;
670                 chdir_threshold = atoi(p + 1);
671                 break;
672             case 'd':
673                 debug_only = true;
674                 continue;
675             case 'e':
676                 empty_error = true;
677                 continue;
678             case 's':
679                 sort_threshold = 5;
680                 if (!isdigit((unsigned char) p[1]))
681                     continue;
682                 sort_threshold = atoi(p + 1);
683                 break;
684             case 'u':
685                 relative_threshold = 1;
686                 if (!isdigit((unsigned char) p[1]))
687                     continue;
688                 relative_threshold = atoi(p + 1);
689                 if (relative_threshold >= (int) strlen(dotdots) / 3)
690                     relative_threshold = strlen(dotdots) / 3 - 1;
691                 break;
692             }
693             break;
694         }
695         argc--;
696         arg++;
697     }
698     if (argc != 2)
699         die("usage error, wrong number of arguments");
700 
701     /* The remaining argument is the base path.  Make sure it's valid and
702        not excessively large and then change to it. */
703     base_dir = *arg;
704     if (*base_dir != '/' || bad_path(base_dir))
705         die("bad base path %s", base_dir);
706     strlcpy(current_dir, base_dir, sizeof(current_dir));
707     if (chdir(current_dir) < 0)
708         sysdie("can't chdir to base path %s", current_dir);
709 
710     /* Open our input stream and then loop through it, building filelists
711        and processing them until done. */
712     qp = QIOfdopen(fileno(stdin));
713     if (qp == NULL)
714         sysdie("can't reopen stdin");
715     while ((list = process_line(qp, &filecount, &deleted)) != NULL) {
716         empty_error = false;
717         unlink_filelist(list, filecount);
718     }
719     if (deleted > 0)
720         empty_error = false;
721 
722     /* All done. */
723     SMshutdown();
724     if (empty_error)
725         die("no files to remove");
726     exit(error_count > 0 ? 1 : 0);
727 }
728