1 /*
2 **  Delete a list of filenames or tokens from stdin.
3 **
4 **  Originally written by <kre@munnari.oz.au> (to only handle files).
5 **
6 **  Files that can't be unlinked because they didn't exist are considered
7 **  okay.  Any error condition results in exiting with non-zero exit
8 **  status.  Input lines in the form @...@ are taken to be storage API
9 **  tokens.  Input filenames should be fully qualified.  For maximum
10 **  efficiency, input filenames should be sorted; fastrm will cd into each
11 **  directory to avoid additional directory lookups when removing a lot of
12 **  files in a single directory.
13 */
14 
15 #include "portable/system.h"
16 
17 #include <ctype.h>
18 #include <dirent.h>
19 #include <errno.h>
20 #include <sys/stat.h>
21 #include <syslog.h>
22 
23 #include "inn/innconf.h"
24 #include "inn/libinn.h"
25 #include "inn/messages.h"
26 #include "inn/qio.h"
27 #include "inn/storage.h"
28 
29 /* We reject any path names longer than this. */
30 #define MAX_DIR_LEN 2048
31 
32 /* Data structure for a list of files in a single directory. */
33 typedef struct filelist {
34     int count;
35     int size;
36     char *dir;
37     char **files;
38 } filelist;
39 
40 /* All relative paths are relative to this directory. */
41 static char *base_dir = NULL;
42 
43 /* The absolute path of the current working directory. */
44 static char current_dir[MAX_DIR_LEN];
45 
46 /* The prefix for the files that we're currently working with.  We sometimes
47    also use this as working space for forming file names to remove, so give
48    ourselves a bit of additional leeway just in case. */
49 static char prefix_dir[MAX_DIR_LEN * 2];
50 static int prefix_len;
51 
52 /* Some threshold values that govern the optimizations that we are willing
53    to perform.  chdir_threshold determines how many files to be removed we
54    want in a directory before we chdir to that directory.  sort_threshold
55    determines how many files must be in a directory before we use readdir to
56    remove them in order.  relative_threshold determines how many levels of
57    "../" we're willing to try to use to move to the next directory rather
58    than just calling chdir with the new absolute path. */
59 static int chdir_threshold = 3;
60 static int relative_threshold = 0;
61 static int sort_threshold = 0;
62 
63 /* True if we should only print what we would do, not actually do it. */
64 static bool debug_only = false;
65 
66 /* A string used for constructing relative paths. */
67 static const char dotdots[] = "../../../../";
68 
69 /* The number of errors encountered, used to determine exit status. */
70 static int error_count = 0;
71 
72 /* Whether the storage manager has been initialized. */
73 static bool sm_initialized = false;
74 
75 /* True if unlink may be able to remove directories. */
76 static bool unlink_dangerous = false;
77 
78 
79 /*
80 **  Sorting predicate for qsort and bsearch.
81 */
82 static int
file_compare(const void * a,const void * b)83 file_compare(const void *a, const void *b)
84 {
85     const char *f1, *f2;
86 
87     f1 = *((const char *const *) a);
88     f2 = *((const char *const *) b);
89     return strcmp(f1, f2);
90 }
91 
92 
93 /*
94 **  Create a new filelist.
95 */
96 static filelist *
filelist_new(char * dir)97 filelist_new(char *dir)
98 {
99     filelist *new;
100 
101     new = xmalloc(sizeof(filelist));
102     new->count = 0;
103     new->size = 0;
104     new->dir = dir;
105     new->files = NULL;
106     return new;
107 }
108 
109 
110 /*
111 **  Insert a file name into a list of files (unsorted).
112 */
113 static void
filelist_insert(filelist * list,char * name)114 filelist_insert(filelist *list, char *name)
115 {
116     if (list->count == list->size) {
117         list->size = (list->size == 0) ? 16 : list->size * 2;
118         list->files = xrealloc(list->files, list->size * sizeof(char *));
119     }
120     list->files[list->count++] = xstrdup(name);
121 }
122 
123 
124 /*
125 **  Find a file name in a sorted list of files.
126 */
127 static char *
filelist_lookup(filelist * list,const char * name)128 filelist_lookup(filelist *list, const char *name)
129 {
130     char **p;
131 
132     p = bsearch(&name, list->files, list->count, sizeof(char *), file_compare);
133     return (p == NULL ? NULL : *p);
134 }
135 
136 
137 /*
138 **  Empty a list of files, freeing all of the names but keeping the
139 **  structure intact.
140 */
141 static void
filelist_empty(filelist * list)142 filelist_empty(filelist *list)
143 {
144     int i;
145 
146     if (list->files == NULL)
147         return;
148     for (i = 0; i < list->count; i++)
149         free(list->files[i]);
150     list->count = 0;
151 }
152 
153 
154 /*
155 **  Free a list of files.
156 */
157 static void
filelist_free(filelist * list)158 filelist_free(filelist *list)
159 {
160     filelist_empty(list);
161     if (list->files != NULL)
162         free(list->files);
163     if (list->dir != NULL)
164         free(list->dir);
165     free(list);
166 }
167 
168 
169 /*
170 **  Exit handler for die.  Shut down the storage manager before exiting.
171 */
172 static int
sm_cleanup(void)173 sm_cleanup(void)
174 {
175     SMshutdown();
176     return 1;
177 }
178 
179 
180 /*
181 **  Initialize the storage manager.  This includes parsing inn.conf, which
182 **  fastrm doesn't need for any other purpose.
183 */
184 static void
sm_initialize(void)185 sm_initialize(void)
186 {
187     bool value;
188 
189     if (!innconf_read(NULL))
190         exit(1);
191     value = true;
192     if (!SMsetup(SM_RDWR, &value) || !SMsetup(SM_PREOPEN, &value))
193         die("can't set up storage manager");
194     if (!SMinit())
195         die("can't initialize storage manager: %s", SMerrorstr);
196     sm_initialized = true;
197     message_fatal_cleanup = sm_cleanup;
198 }
199 
200 
201 /*
202 **  Get a line from a given QIO stream, returning a pointer to it.  Warn
203 **  about and then skip lines that are too long.  Returns NULL at EOF or on
204 **  an error.
205 */
206 static char *
get_line(QIOSTATE * qp)207 get_line(QIOSTATE *qp)
208 {
209     static int count;
210     char *p;
211 
212     p = QIOread(qp);
213     count++;
214     while (QIOtoolong(qp) || (p != NULL && strlen(p) >= MAX_DIR_LEN)) {
215         warn("line %d too long", count);
216         error_count++;
217         p = QIOread(qp);
218     }
219     if (p == NULL) {
220         if (QIOerror(qp)) {
221             syswarn("read error");
222             error_count++;
223         }
224         return NULL;
225     }
226     return p;
227 }
228 
229 
230 /*
231 **  Read lines from stdin (including the first that may have been there
232 **  from our last time in) until we reach EOF or until we get a line that
233 **  names a file not in the same directory as the previous lot.  Remember
234 **  the file names in the directory we're examining and return the list.
235 */
236 static filelist *
process_line(QIOSTATE * qp,int * queued,int * deleted)237 process_line(QIOSTATE *qp, int *queued, int *deleted)
238 {
239     static char *line = NULL;
240     filelist *list = NULL;
241     char *p;
242     char *dir = NULL;
243     int dlen = -1;
244 
245     *queued = 0;
246     *deleted = 0;
247 
248     if (line == NULL)
249         line = get_line(qp);
250 
251     for (; line != NULL; line = get_line(qp)) {
252         if (IsToken(line)) {
253             (*deleted)++;
254             if (debug_only) {
255                 printf("Token %s\n", line);
256                 continue;
257             }
258             if (!sm_initialized)
259                 sm_initialize();
260             if (!SMcancel(TextToToken(line)))
261                 if (SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) {
262                     warn("can't cancel %s", line);
263                     error_count++;
264                 }
265         } else {
266             if (list == NULL) {
267                 p = strrchr(line, '/');
268                 if (p != NULL) {
269                     *p++ = '\0';
270                     dlen = strlen(line);
271                     dir = xstrdup(line);
272                 } else {
273                     dlen = -1;
274                     dir = NULL;
275                 }
276                 list = filelist_new(dir);
277             } else {
278                 if ((dlen < 0 && strchr(line, '/'))
279                     || (dlen >= 0
280                         && (line[dlen] != '/' || strchr(line + dlen + 1, '/')
281                             || strncmp(dir, line, dlen))))
282                     return list;
283             }
284             filelist_insert(list, line + dlen + 1);
285             (*queued)++;
286         }
287     }
288     return list;
289 }
290 
291 
292 /*
293 **  Copy n leading segments of a path.
294 */
295 static void
copy_segments(char * to,const char * from,int n)296 copy_segments(char *to, const char *from, int n)
297 {
298     char c;
299 
300     for (c = *from++; c != '\0'; c = *from++) {
301         if (c == '/' && --n <= 0)
302             break;
303         *to++ = c;
304     }
305     *to = '\0';
306 }
307 
308 
309 /*
310 **  Return the count of path segments in a file name (the number of
311 **  slashes).
312 */
313 static int
slashcount(char * name)314 slashcount(char *name)
315 {
316     int i;
317 
318     for (i = 0; *name != '\0'; name++)
319         if (*name == '/')
320             i++;
321     return i;
322 }
323 
324 
325 /*
326 **  Unlink a file, reporting errors if the unlink fails for a reason other
327 **  than the file not existing doesn't exist.  Be careful to avoid unlinking
328 **  a directory if unlink_dangerous is true.
329 */
330 static void
unlink_file(const char * file)331 unlink_file(const char *file)
332 {
333     struct stat st;
334 
335     /* On some systems, unlink will remove directories if used by root.  If
336        we're running as root, unlink_dangerous will be set, and we need to
337        make sure that the file isn't a directory first. */
338     if (unlink_dangerous) {
339         if (stat(file, &st) < 0) {
340             if (errno != ENOENT) {
341                 if (*file == '/')
342                     syswarn("can't stat %s", file);
343                 else
344                     syswarn("can't stat %s in %s", file, current_dir);
345                 error_count++;
346             }
347             return;
348         }
349         if (S_ISDIR(st.st_mode)) {
350             if (*file == '/')
351                 syswarn("%s is a directory", file);
352             else
353                 syswarn("%s in %s is a directory", file, current_dir);
354             error_count++;
355             return;
356         }
357     }
358 
359     if (debug_only) {
360         if (*file != '/')
361             printf("%s / ", current_dir);
362         printf("%s\n", file);
363         return;
364     }
365 
366     if (unlink(file) < 0 && errno != ENOENT) {
367         if (*file == '/')
368             syswarn("can't unlink %s", file);
369         else
370             syswarn("can't unlink %s in %s", file, current_dir);
371     }
372 }
373 
374 
375 /*
376 **  A wrapper around chdir that dies if chdir fails for a reason other than
377 **  the directory not existing, returns false if the directory doesn't
378 **  exist (reporting an error), and otherwise returns true.  It also checks
379 **  to make sure that filecount is larger than chdir_threshold, and if it
380 **  isn't it instead just sets prefix_dir and prefix_len to point to the new
381 **  directory without changing the working directory.
382 */
383 static bool
chdir_checked(const char * path,int filecount)384 chdir_checked(const char *path, int filecount)
385 {
386     if (filecount < chdir_threshold) {
387         strlcpy(prefix_dir, path, sizeof(prefix_dir));
388         prefix_len = strlen(path);
389     } else {
390         prefix_len = 0;
391         if (chdir(path) < 0) {
392             if (errno != ENOENT)
393                 sysdie("can't chdir from %s to %s", current_dir, path);
394             else {
395                 syswarn("can't chdir from %s to %s", current_dir, path);
396                 return false;
397             }
398         }
399     }
400     return true;
401 }
402 
403 
404 /*
405 **  Set our environment (process working directory, and global vars) to
406 **  reflect a change of directory to dir (relative to base_dir if dir is not
407 **  an absolute path).  We're likely to want to do different things
408 **  depending on the amount of work to do in dir, so we also take the number
409 **  of files to remove in dir as the second argument.  Return false if the
410 **  directory doesn't exist (and therefore all files in it have already been
411 **  removed; otherwise, return true.
412 */
413 static bool
setup_dir(char * dir,int filecount)414 setup_dir(char *dir, int filecount)
415 {
416     char *p, *q, *absolute;
417     char path[MAX_DIR_LEN];
418     int base_depth, depth;
419 
420     /* Set absolute to the absolute path to the new directory. */
421     if (dir == NULL)
422         absolute = base_dir;
423     else if (*dir == '/')
424         absolute = dir;
425     else if (*dir == '\0') {
426         strlcpy(path, "/", sizeof(path));
427         absolute = path;
428     } else {
429         /* Strip off leading "./". */
430         while (dir[0] == '.' && dir[1] == '/')
431             for (dir += 2; *dir == '/'; dir++)
432                 ;
433 
434         /* Handle any leading "../", but only up to the number of segments
435            in base_dir. */
436         base_depth = slashcount(base_dir);
437         while (base_depth > 0 && strncmp(dir, "../", 3) == 0)
438             for (base_depth--, dir += 3; *dir == '/'; dir++)
439                 ;
440         if (base_depth <= 0)
441             die("too many ../'s in path %s", dir);
442         copy_segments(path, base_dir, base_depth + 1);
443         if (strlen(path) + strlen(dir) + 2 > MAX_DIR_LEN)
444             die("path %s too long", dir);
445         strlcat(path, "/", sizeof(path));
446         strlcat(path, dir, sizeof(path));
447         absolute = path;
448     }
449 
450     /* Find the first point of difference between absolute and current_dir.
451        If there is no difference, we're done; we're changing to the same
452        directory we were in (this is probably some sort of error, but can
453        happen with odd relative paths). */
454     for (p = absolute, q = current_dir; *p == *q; p++, q++)
455         if (*p == '\0')
456             return true;
457 
458     /* If we reached the end of current_dir and there's more left of
459        absolute, we're changing to a subdirectory of where we were. */
460     if (*q == '\0' && *p == '/') {
461         p++;
462         if (!chdir_checked(p, filecount))
463             return false;
464         if (prefix_len == 0)
465             strlcpy(current_dir, absolute, sizeof(current_dir));
466         return true;
467     }
468 
469     /* Otherwise, if we were promised that we have a pure tree (in other
470        words, no symbolic links to directories), see if it's worth going up
471        the tree with ".." and then down again rather than chdir to the
472        absolute path.  relative_threshold determines how many levels of ".."
473        we're willing to use; the default of 1 seems fractionally faster than
474        2 and 0 indicates to always use absolute paths.  Values larger than 3
475        would require extending the dotdots string, but are unlikely to be
476        worth it.
477 
478        FIXME: It's too hard to figure out what this code does.  It needs to be
479        rewritten. */
480     if (*p != '\0' && relative_threshold > 0) {
481         depth = slashcount(q);
482         if (depth <= relative_threshold) {
483             while (p > absolute && *--p != '/')
484                 ;
485             p++;
486             strlcpy(prefix_dir, dotdots + 9 - depth * 3, sizeof(prefix_dir));
487             strlcat(prefix_dir, p, sizeof(prefix_dir));
488             if (!chdir_checked(prefix_dir, filecount))
489                 return false;
490 
491             /* Now patch up current_dir to reflect where we are. */
492             if (prefix_len == 0) {
493                 while (q > current_dir && *--q != '/')
494                     ;
495                 q[1] = '\0';
496                 strlcat(current_dir, p, sizeof(current_dir));
497             }
498             return true;
499         }
500     }
501 
502     /* All else has failed; just use the absolute path.  This includes the
503        case where current_dir is a subdirectory of absolute, in which case
504        it may be somewhat faster to use chdir("../..") or the like rather
505        than the absolute path, but this case rarely happens when the user
506        cares about speed (it usually doesn't happen with sorted input).  So
507        we don't bother. */
508     if (!chdir_checked(absolute, filecount))
509         return false;
510     if (prefix_len == 0)
511         strlcpy(current_dir, absolute, sizeof(current_dir));
512     return true;
513 }
514 
515 
516 /*
517 **  Process a filelist of files to be deleted, all in the same directory.
518 */
519 static void
unlink_filelist(filelist * list,int filecount)520 unlink_filelist(filelist *list, int filecount)
521 {
522     bool sorted;
523     DIR *dir;
524     struct dirent *entry;
525     char *file;
526     int i;
527 
528     /* If setup_dir returns false, the directory doesn't exist and we're
529        already all done. */
530     if (!setup_dir(list->dir, filecount)) {
531         filelist_free(list);
532         return;
533     }
534 
535     /* We'll use prefix_dir as a buffer to write each file name into as we
536        go, so get it set up. */
537     if (prefix_len == 0)
538         file = prefix_dir;
539     else {
540         prefix_dir[prefix_len++] = '/';
541         file = prefix_dir + prefix_len;
542         *file = '\0';
543     }
544 
545     /* If we're not sorting directories or if the number of files is under
546        the threshold, just remove the files. */
547     if (sort_threshold == 0 || filecount < sort_threshold) {
548         for (i = 0; i < list->count; i++) {
549             strlcpy(file, list->files[i], sizeof(prefix_dir) - prefix_len);
550             unlink_file(prefix_dir);
551         }
552         filelist_free(list);
553         return;
554     }
555 
556     /* We have enough files to remove in this directory that it's worth
557        optimizing.  First, make sure the list of files is sorted.  It's not
558        uncommon for the files to already be sorted, so check first. */
559     for (sorted = true, i = 1; sorted && i < list->count; i++)
560         sorted = (strcmp(list->files[i - 1], list->files[i]) <= 0);
561     if (!sorted)
562         qsort(list->files, list->count, sizeof(char *), file_compare);
563 
564     /* Now, begin doing our optimized unlinks.  The technique we use is to
565        open the directory containing the files and read through it, checking
566        each file in the directory to see if it's one of the files we should
567        be removing.  The theory is that we want to minimize the amount of
568        time the operating system spends doing string compares trying to find
569        the file to be removed in the directory.  This is often an O(n)
570        operation.  Note that this optimization may slightly slow more
571        effecient operating systems. */
572     dir = opendir(prefix_len == 0 ? "." : prefix_dir);
573     if (dir == NULL) {
574         if (prefix_len > 0 && prefix_dir[0] == '/')
575             warn("can't open directory %s", prefix_dir);
576         else
577             warn("can't open directory %s in %s",
578                  (prefix_len == 0) ? "." : prefix_dir, current_dir);
579         error_count++;
580         filelist_free(list);
581         return;
582     }
583     for (i = 0, entry = readdir(dir); entry != NULL; entry = readdir(dir))
584         if (filelist_lookup(list, entry->d_name) != NULL) {
585             i++;
586             strlcpy(file, entry->d_name, sizeof(prefix_dir) - prefix_len);
587             unlink_file(prefix_dir);
588             if (i == list->count)
589                 break;
590         }
591     closedir(dir);
592     filelist_free(list);
593 }
594 
595 
596 /*
597 **  Check a path to see if it's okay (not likely to confuse us).  This
598 **  ensures that it doesn't contain elements like "./" or "../" and doesn't
599 **  contain doubled slashes.
600 */
601 static bool
bad_path(const char * p)602 bad_path(const char *p)
603 {
604     if (strlen(p) >= MAX_DIR_LEN)
605         return true;
606     while (*p) {
607         if (p[0] == '.' && (p[1] == '/' || (p[1] == '.' && p[2] == '/')))
608             return true;
609         while (*p && *p != '/')
610             p++;
611         if (p[0] == '/' && p[1] == '/')
612             return true;
613         if (*p == '/')
614             p++;
615     }
616     return false;
617 }
618 
619 
620 /*
621 **  Main routine.  Parse options, initialize the storage manager, and
622 **  initialize various global variables, and then go into a loop calling
623 **  process_line and unlink_filelist as needed.
624 */
625 int
main(int argc,char * argv[])626 main(int argc, char *argv[])
627 {
628     const char *name;
629     char *p, **arg;
630     QIOSTATE *qp;
631     filelist *list;
632     int filecount, deleted;
633     bool empty_error = false;
634 
635     /* Establish our identity.  Since we use the storage manager, we need to
636        set up syslog as well, although we won't use it ourselves. */
637     name = argv[0];
638     if (*name == '\0')
639         name = "fastrm";
640     else {
641         p = strrchr(name, '/');
642         if (p != NULL)
643             name = p + 1;
644     }
645     message_program_name = name;
646     openlog(name, LOG_CONS | LOG_PID, LOG_INN_PROG);
647 
648     /* If we're running as root, unlink may remove directories. */
649     unlink_dangerous = (geteuid() == 0);
650 
651     /* Unfortunately, we can't use getopt, because several of our options
652        take optional arguments.  Bleh. */
653     arg = argv + 1;
654     while (argc >= 2 && **arg == '-') {
655         p = *arg;
656         while (*++p) {
657             switch (*p) {
658             default:
659                 die("invalid option -- %c", *p);
660             case 'a':
661             case 'r':
662                 continue;
663             case 'c':
664                 chdir_threshold = 1;
665                 if (!isdigit((unsigned char) p[1]))
666                     continue;
667                 chdir_threshold = atoi(p + 1);
668                 break;
669             case 'd':
670                 debug_only = true;
671                 continue;
672             case 'e':
673                 empty_error = true;
674                 continue;
675             case 's':
676                 sort_threshold = 5;
677                 if (!isdigit((unsigned char) p[1]))
678                     continue;
679                 sort_threshold = atoi(p + 1);
680                 break;
681             case 'u':
682                 relative_threshold = 1;
683                 if (!isdigit((unsigned char) p[1]))
684                     continue;
685                 relative_threshold = atoi(p + 1);
686                 if (relative_threshold >= (int) strlen(dotdots) / 3)
687                     relative_threshold = strlen(dotdots) / 3 - 1;
688                 break;
689             }
690             break;
691         }
692         argc--;
693         arg++;
694     }
695     if (argc != 2)
696         die("usage error, wrong number of arguments");
697 
698     /* The remaining argument is the base path.  Make sure it's valid and
699        not excessively large and then change to it. */
700     base_dir = *arg;
701     if (*base_dir != '/' || bad_path(base_dir))
702         die("bad base path %s", base_dir);
703     strlcpy(current_dir, base_dir, sizeof(current_dir));
704     if (chdir(current_dir) < 0)
705         sysdie("can't chdir to base path %s", current_dir);
706 
707     /* Open our input stream and then loop through it, building filelists
708        and processing them until done. */
709     qp = QIOfdopen(fileno(stdin));
710     if (qp == NULL)
711         sysdie("can't reopen stdin");
712     while ((list = process_line(qp, &filecount, &deleted)) != NULL) {
713         empty_error = false;
714         unlink_filelist(list, filecount);
715     }
716     if (deleted > 0)
717         empty_error = false;
718 
719     /* All done. */
720     SMshutdown();
721     if (empty_error)
722         die("no files to remove");
723     exit(error_count > 0 ? 1 : 0);
724 }
725