1 /*
2 ** Delete a list of filenames or tokens from stdin.
3 **
4 ** Originally written by <kre@munnari.oz.au> (to only handle files).
5 **
6 ** Files that can't be unlinked because they didn't exist are considered
7 ** okay. Any error condition results in exiting with non-zero exit
8 ** status. Input lines in the form @...@ are taken to be storage API
9 ** tokens. Input filenames should be fully qualified. For maximum
10 ** efficiency, input filenames should be sorted; fastrm will cd into each
11 ** directory to avoid additional directory lookups when removing a lot of
12 ** files in a single directory.
13 */
14
15 #include "portable/system.h"
16
17 #include <ctype.h>
18 #include <dirent.h>
19 #include <errno.h>
20 #include <sys/stat.h>
21 #include <syslog.h>
22
23 #include "inn/innconf.h"
24 #include "inn/libinn.h"
25 #include "inn/messages.h"
26 #include "inn/qio.h"
27 #include "inn/storage.h"
28
29 /* We reject any path names longer than this. */
30 #define MAX_DIR_LEN 2048
31
32 /* Data structure for a list of files in a single directory. */
33 typedef struct filelist {
34 int count;
35 int size;
36 char *dir;
37 char **files;
38 } filelist;
39
40 /* All relative paths are relative to this directory. */
41 static char *base_dir = NULL;
42
43 /* The absolute path of the current working directory. */
44 static char current_dir[MAX_DIR_LEN];
45
46 /* The prefix for the files that we're currently working with. We sometimes
47 also use this as working space for forming file names to remove, so give
48 ourselves a bit of additional leeway just in case. */
49 static char prefix_dir[MAX_DIR_LEN * 2];
50 static int prefix_len;
51
52 /* Some threshold values that govern the optimizations that we are willing
53 to perform. chdir_threshold determines how many files to be removed we
54 want in a directory before we chdir to that directory. sort_threshold
55 determines how many files must be in a directory before we use readdir to
56 remove them in order. relative_threshold determines how many levels of
57 "../" we're willing to try to use to move to the next directory rather
58 than just calling chdir with the new absolute path. */
59 static int chdir_threshold = 3;
60 static int relative_threshold = 0;
61 static int sort_threshold = 0;
62
63 /* True if we should only print what we would do, not actually do it. */
64 static bool debug_only = false;
65
66 /* A string used for constructing relative paths. */
67 static const char dotdots[] = "../../../../";
68
69 /* The number of errors encountered, used to determine exit status. */
70 static int error_count = 0;
71
72 /* Whether the storage manager has been initialized. */
73 static bool sm_initialized = false;
74
75 /* True if unlink may be able to remove directories. */
76 static bool unlink_dangerous = false;
77
78
79 /*
80 ** Sorting predicate for qsort and bsearch.
81 */
82 static int
file_compare(const void * a,const void * b)83 file_compare(const void *a, const void *b)
84 {
85 const char *f1, *f2;
86
87 f1 = *((const char *const *) a);
88 f2 = *((const char *const *) b);
89 return strcmp(f1, f2);
90 }
91
92
93 /*
94 ** Create a new filelist.
95 */
96 static filelist *
filelist_new(char * dir)97 filelist_new(char *dir)
98 {
99 filelist *new;
100
101 new = xmalloc(sizeof(filelist));
102 new->count = 0;
103 new->size = 0;
104 new->dir = dir;
105 new->files = NULL;
106 return new;
107 }
108
109
110 /*
111 ** Insert a file name into a list of files (unsorted).
112 */
113 static void
filelist_insert(filelist * list,char * name)114 filelist_insert(filelist *list, char *name)
115 {
116 if (list->count == list->size) {
117 list->size = (list->size == 0) ? 16 : list->size * 2;
118 list->files = xrealloc(list->files, list->size * sizeof(char *));
119 }
120 list->files[list->count++] = xstrdup(name);
121 }
122
123
124 /*
125 ** Find a file name in a sorted list of files.
126 */
127 static char *
filelist_lookup(filelist * list,const char * name)128 filelist_lookup(filelist *list, const char *name)
129 {
130 char **p;
131
132 p = bsearch(&name, list->files, list->count, sizeof(char *), file_compare);
133 return (p == NULL ? NULL : *p);
134 }
135
136
137 /*
138 ** Empty a list of files, freeing all of the names but keeping the
139 ** structure intact.
140 */
141 static void
filelist_empty(filelist * list)142 filelist_empty(filelist *list)
143 {
144 int i;
145
146 if (list->files == NULL)
147 return;
148 for (i = 0; i < list->count; i++)
149 free(list->files[i]);
150 list->count = 0;
151 }
152
153
154 /*
155 ** Free a list of files.
156 */
157 static void
filelist_free(filelist * list)158 filelist_free(filelist *list)
159 {
160 filelist_empty(list);
161 if (list->files != NULL)
162 free(list->files);
163 if (list->dir != NULL)
164 free(list->dir);
165 free(list);
166 }
167
168
169 /*
170 ** Exit handler for die. Shut down the storage manager before exiting.
171 */
172 static int
sm_cleanup(void)173 sm_cleanup(void)
174 {
175 SMshutdown();
176 return 1;
177 }
178
179
180 /*
181 ** Initialize the storage manager. This includes parsing inn.conf, which
182 ** fastrm doesn't need for any other purpose.
183 */
184 static void
sm_initialize(void)185 sm_initialize(void)
186 {
187 bool value;
188
189 if (!innconf_read(NULL))
190 exit(1);
191 value = true;
192 if (!SMsetup(SM_RDWR, &value) || !SMsetup(SM_PREOPEN, &value))
193 die("can't set up storage manager");
194 if (!SMinit())
195 die("can't initialize storage manager: %s", SMerrorstr);
196 sm_initialized = true;
197 message_fatal_cleanup = sm_cleanup;
198 }
199
200
201 /*
202 ** Get a line from a given QIO stream, returning a pointer to it. Warn
203 ** about and then skip lines that are too long. Returns NULL at EOF or on
204 ** an error.
205 */
206 static char *
get_line(QIOSTATE * qp)207 get_line(QIOSTATE *qp)
208 {
209 static int count;
210 char *p;
211
212 p = QIOread(qp);
213 count++;
214 while (QIOtoolong(qp) || (p != NULL && strlen(p) >= MAX_DIR_LEN)) {
215 warn("line %d too long", count);
216 error_count++;
217 p = QIOread(qp);
218 }
219 if (p == NULL) {
220 if (QIOerror(qp)) {
221 syswarn("read error");
222 error_count++;
223 }
224 return NULL;
225 }
226 return p;
227 }
228
229
230 /*
231 ** Read lines from stdin (including the first that may have been there
232 ** from our last time in) until we reach EOF or until we get a line that
233 ** names a file not in the same directory as the previous lot. Remember
234 ** the file names in the directory we're examining and return the list.
235 */
236 static filelist *
process_line(QIOSTATE * qp,int * queued,int * deleted)237 process_line(QIOSTATE *qp, int *queued, int *deleted)
238 {
239 static char *line = NULL;
240 filelist *list = NULL;
241 char *p;
242 char *dir = NULL;
243 int dlen = -1;
244
245 *queued = 0;
246 *deleted = 0;
247
248 if (line == NULL)
249 line = get_line(qp);
250
251 for (; line != NULL; line = get_line(qp)) {
252 if (IsToken(line)) {
253 (*deleted)++;
254 if (debug_only) {
255 printf("Token %s\n", line);
256 continue;
257 }
258 if (!sm_initialized)
259 sm_initialize();
260 if (!SMcancel(TextToToken(line)))
261 if (SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) {
262 warn("can't cancel %s", line);
263 error_count++;
264 }
265 } else {
266 if (list == NULL) {
267 p = strrchr(line, '/');
268 if (p != NULL) {
269 *p++ = '\0';
270 dlen = strlen(line);
271 dir = xstrdup(line);
272 } else {
273 dlen = -1;
274 dir = NULL;
275 }
276 list = filelist_new(dir);
277 } else {
278 if ((dlen < 0 && strchr(line, '/'))
279 || (dlen >= 0
280 && (line[dlen] != '/' || strchr(line + dlen + 1, '/')
281 || strncmp(dir, line, dlen))))
282 return list;
283 }
284 filelist_insert(list, line + dlen + 1);
285 (*queued)++;
286 }
287 }
288 return list;
289 }
290
291
292 /*
293 ** Copy n leading segments of a path.
294 */
295 static void
copy_segments(char * to,const char * from,int n)296 copy_segments(char *to, const char *from, int n)
297 {
298 char c;
299
300 for (c = *from++; c != '\0'; c = *from++) {
301 if (c == '/' && --n <= 0)
302 break;
303 *to++ = c;
304 }
305 *to = '\0';
306 }
307
308
309 /*
310 ** Return the count of path segments in a file name (the number of
311 ** slashes).
312 */
313 static int
slashcount(char * name)314 slashcount(char *name)
315 {
316 int i;
317
318 for (i = 0; *name != '\0'; name++)
319 if (*name == '/')
320 i++;
321 return i;
322 }
323
324
325 /*
326 ** Unlink a file, reporting errors if the unlink fails for a reason other
327 ** than the file not existing doesn't exist. Be careful to avoid unlinking
328 ** a directory if unlink_dangerous is true.
329 */
330 static void
unlink_file(const char * file)331 unlink_file(const char *file)
332 {
333 struct stat st;
334
335 /* On some systems, unlink will remove directories if used by root. If
336 we're running as root, unlink_dangerous will be set, and we need to
337 make sure that the file isn't a directory first. */
338 if (unlink_dangerous) {
339 if (stat(file, &st) < 0) {
340 if (errno != ENOENT) {
341 if (*file == '/')
342 syswarn("can't stat %s", file);
343 else
344 syswarn("can't stat %s in %s", file, current_dir);
345 error_count++;
346 }
347 return;
348 }
349 if (S_ISDIR(st.st_mode)) {
350 if (*file == '/')
351 syswarn("%s is a directory", file);
352 else
353 syswarn("%s in %s is a directory", file, current_dir);
354 error_count++;
355 return;
356 }
357 }
358
359 if (debug_only) {
360 if (*file != '/')
361 printf("%s / ", current_dir);
362 printf("%s\n", file);
363 return;
364 }
365
366 if (unlink(file) < 0 && errno != ENOENT) {
367 if (*file == '/')
368 syswarn("can't unlink %s", file);
369 else
370 syswarn("can't unlink %s in %s", file, current_dir);
371 }
372 }
373
374
375 /*
376 ** A wrapper around chdir that dies if chdir fails for a reason other than
377 ** the directory not existing, returns false if the directory doesn't
378 ** exist (reporting an error), and otherwise returns true. It also checks
379 ** to make sure that filecount is larger than chdir_threshold, and if it
380 ** isn't it instead just sets prefix_dir and prefix_len to point to the new
381 ** directory without changing the working directory.
382 */
383 static bool
chdir_checked(const char * path,int filecount)384 chdir_checked(const char *path, int filecount)
385 {
386 if (filecount < chdir_threshold) {
387 strlcpy(prefix_dir, path, sizeof(prefix_dir));
388 prefix_len = strlen(path);
389 } else {
390 prefix_len = 0;
391 if (chdir(path) < 0) {
392 if (errno != ENOENT)
393 sysdie("can't chdir from %s to %s", current_dir, path);
394 else {
395 syswarn("can't chdir from %s to %s", current_dir, path);
396 return false;
397 }
398 }
399 }
400 return true;
401 }
402
403
404 /*
405 ** Set our environment (process working directory, and global vars) to
406 ** reflect a change of directory to dir (relative to base_dir if dir is not
407 ** an absolute path). We're likely to want to do different things
408 ** depending on the amount of work to do in dir, so we also take the number
409 ** of files to remove in dir as the second argument. Return false if the
410 ** directory doesn't exist (and therefore all files in it have already been
411 ** removed; otherwise, return true.
412 */
413 static bool
setup_dir(char * dir,int filecount)414 setup_dir(char *dir, int filecount)
415 {
416 char *p, *q, *absolute;
417 char path[MAX_DIR_LEN];
418 int base_depth, depth;
419
420 /* Set absolute to the absolute path to the new directory. */
421 if (dir == NULL)
422 absolute = base_dir;
423 else if (*dir == '/')
424 absolute = dir;
425 else if (*dir == '\0') {
426 strlcpy(path, "/", sizeof(path));
427 absolute = path;
428 } else {
429 /* Strip off leading "./". */
430 while (dir[0] == '.' && dir[1] == '/')
431 for (dir += 2; *dir == '/'; dir++)
432 ;
433
434 /* Handle any leading "../", but only up to the number of segments
435 in base_dir. */
436 base_depth = slashcount(base_dir);
437 while (base_depth > 0 && strncmp(dir, "../", 3) == 0)
438 for (base_depth--, dir += 3; *dir == '/'; dir++)
439 ;
440 if (base_depth <= 0)
441 die("too many ../'s in path %s", dir);
442 copy_segments(path, base_dir, base_depth + 1);
443 if (strlen(path) + strlen(dir) + 2 > MAX_DIR_LEN)
444 die("path %s too long", dir);
445 strlcat(path, "/", sizeof(path));
446 strlcat(path, dir, sizeof(path));
447 absolute = path;
448 }
449
450 /* Find the first point of difference between absolute and current_dir.
451 If there is no difference, we're done; we're changing to the same
452 directory we were in (this is probably some sort of error, but can
453 happen with odd relative paths). */
454 for (p = absolute, q = current_dir; *p == *q; p++, q++)
455 if (*p == '\0')
456 return true;
457
458 /* If we reached the end of current_dir and there's more left of
459 absolute, we're changing to a subdirectory of where we were. */
460 if (*q == '\0' && *p == '/') {
461 p++;
462 if (!chdir_checked(p, filecount))
463 return false;
464 if (prefix_len == 0)
465 strlcpy(current_dir, absolute, sizeof(current_dir));
466 return true;
467 }
468
469 /* Otherwise, if we were promised that we have a pure tree (in other
470 words, no symbolic links to directories), see if it's worth going up
471 the tree with ".." and then down again rather than chdir to the
472 absolute path. relative_threshold determines how many levels of ".."
473 we're willing to use; the default of 1 seems fractionally faster than
474 2 and 0 indicates to always use absolute paths. Values larger than 3
475 would require extending the dotdots string, but are unlikely to be
476 worth it.
477
478 FIXME: It's too hard to figure out what this code does. It needs to be
479 rewritten. */
480 if (*p != '\0' && relative_threshold > 0) {
481 depth = slashcount(q);
482 if (depth <= relative_threshold) {
483 while (p > absolute && *--p != '/')
484 ;
485 p++;
486 strlcpy(prefix_dir, dotdots + 9 - depth * 3, sizeof(prefix_dir));
487 strlcat(prefix_dir, p, sizeof(prefix_dir));
488 if (!chdir_checked(prefix_dir, filecount))
489 return false;
490
491 /* Now patch up current_dir to reflect where we are. */
492 if (prefix_len == 0) {
493 while (q > current_dir && *--q != '/')
494 ;
495 q[1] = '\0';
496 strlcat(current_dir, p, sizeof(current_dir));
497 }
498 return true;
499 }
500 }
501
502 /* All else has failed; just use the absolute path. This includes the
503 case where current_dir is a subdirectory of absolute, in which case
504 it may be somewhat faster to use chdir("../..") or the like rather
505 than the absolute path, but this case rarely happens when the user
506 cares about speed (it usually doesn't happen with sorted input). So
507 we don't bother. */
508 if (!chdir_checked(absolute, filecount))
509 return false;
510 if (prefix_len == 0)
511 strlcpy(current_dir, absolute, sizeof(current_dir));
512 return true;
513 }
514
515
516 /*
517 ** Process a filelist of files to be deleted, all in the same directory.
518 */
519 static void
unlink_filelist(filelist * list,int filecount)520 unlink_filelist(filelist *list, int filecount)
521 {
522 bool sorted;
523 DIR *dir;
524 struct dirent *entry;
525 char *file;
526 int i;
527
528 /* If setup_dir returns false, the directory doesn't exist and we're
529 already all done. */
530 if (!setup_dir(list->dir, filecount)) {
531 filelist_free(list);
532 return;
533 }
534
535 /* We'll use prefix_dir as a buffer to write each file name into as we
536 go, so get it set up. */
537 if (prefix_len == 0)
538 file = prefix_dir;
539 else {
540 prefix_dir[prefix_len++] = '/';
541 file = prefix_dir + prefix_len;
542 *file = '\0';
543 }
544
545 /* If we're not sorting directories or if the number of files is under
546 the threshold, just remove the files. */
547 if (sort_threshold == 0 || filecount < sort_threshold) {
548 for (i = 0; i < list->count; i++) {
549 strlcpy(file, list->files[i], sizeof(prefix_dir) - prefix_len);
550 unlink_file(prefix_dir);
551 }
552 filelist_free(list);
553 return;
554 }
555
556 /* We have enough files to remove in this directory that it's worth
557 optimizing. First, make sure the list of files is sorted. It's not
558 uncommon for the files to already be sorted, so check first. */
559 for (sorted = true, i = 1; sorted && i < list->count; i++)
560 sorted = (strcmp(list->files[i - 1], list->files[i]) <= 0);
561 if (!sorted)
562 qsort(list->files, list->count, sizeof(char *), file_compare);
563
564 /* Now, begin doing our optimized unlinks. The technique we use is to
565 open the directory containing the files and read through it, checking
566 each file in the directory to see if it's one of the files we should
567 be removing. The theory is that we want to minimize the amount of
568 time the operating system spends doing string compares trying to find
569 the file to be removed in the directory. This is often an O(n)
570 operation. Note that this optimization may slightly slow more
571 effecient operating systems. */
572 dir = opendir(prefix_len == 0 ? "." : prefix_dir);
573 if (dir == NULL) {
574 if (prefix_len > 0 && prefix_dir[0] == '/')
575 warn("can't open directory %s", prefix_dir);
576 else
577 warn("can't open directory %s in %s",
578 (prefix_len == 0) ? "." : prefix_dir, current_dir);
579 error_count++;
580 filelist_free(list);
581 return;
582 }
583 for (i = 0, entry = readdir(dir); entry != NULL; entry = readdir(dir))
584 if (filelist_lookup(list, entry->d_name) != NULL) {
585 i++;
586 strlcpy(file, entry->d_name, sizeof(prefix_dir) - prefix_len);
587 unlink_file(prefix_dir);
588 if (i == list->count)
589 break;
590 }
591 closedir(dir);
592 filelist_free(list);
593 }
594
595
596 /*
597 ** Check a path to see if it's okay (not likely to confuse us). This
598 ** ensures that it doesn't contain elements like "./" or "../" and doesn't
599 ** contain doubled slashes.
600 */
601 static bool
bad_path(const char * p)602 bad_path(const char *p)
603 {
604 if (strlen(p) >= MAX_DIR_LEN)
605 return true;
606 while (*p) {
607 if (p[0] == '.' && (p[1] == '/' || (p[1] == '.' && p[2] == '/')))
608 return true;
609 while (*p && *p != '/')
610 p++;
611 if (p[0] == '/' && p[1] == '/')
612 return true;
613 if (*p == '/')
614 p++;
615 }
616 return false;
617 }
618
619
620 /*
621 ** Main routine. Parse options, initialize the storage manager, and
622 ** initialize various global variables, and then go into a loop calling
623 ** process_line and unlink_filelist as needed.
624 */
625 int
main(int argc,char * argv[])626 main(int argc, char *argv[])
627 {
628 const char *name;
629 char *p, **arg;
630 QIOSTATE *qp;
631 filelist *list;
632 int filecount, deleted;
633 bool empty_error = false;
634
635 /* Establish our identity. Since we use the storage manager, we need to
636 set up syslog as well, although we won't use it ourselves. */
637 name = argv[0];
638 if (*name == '\0')
639 name = "fastrm";
640 else {
641 p = strrchr(name, '/');
642 if (p != NULL)
643 name = p + 1;
644 }
645 message_program_name = name;
646 openlog(name, LOG_CONS | LOG_PID, LOG_INN_PROG);
647
648 /* If we're running as root, unlink may remove directories. */
649 unlink_dangerous = (geteuid() == 0);
650
651 /* Unfortunately, we can't use getopt, because several of our options
652 take optional arguments. Bleh. */
653 arg = argv + 1;
654 while (argc >= 2 && **arg == '-') {
655 p = *arg;
656 while (*++p) {
657 switch (*p) {
658 default:
659 die("invalid option -- %c", *p);
660 case 'a':
661 case 'r':
662 continue;
663 case 'c':
664 chdir_threshold = 1;
665 if (!isdigit((unsigned char) p[1]))
666 continue;
667 chdir_threshold = atoi(p + 1);
668 break;
669 case 'd':
670 debug_only = true;
671 continue;
672 case 'e':
673 empty_error = true;
674 continue;
675 case 's':
676 sort_threshold = 5;
677 if (!isdigit((unsigned char) p[1]))
678 continue;
679 sort_threshold = atoi(p + 1);
680 break;
681 case 'u':
682 relative_threshold = 1;
683 if (!isdigit((unsigned char) p[1]))
684 continue;
685 relative_threshold = atoi(p + 1);
686 if (relative_threshold >= (int) strlen(dotdots) / 3)
687 relative_threshold = strlen(dotdots) / 3 - 1;
688 break;
689 }
690 break;
691 }
692 argc--;
693 arg++;
694 }
695 if (argc != 2)
696 die("usage error, wrong number of arguments");
697
698 /* The remaining argument is the base path. Make sure it's valid and
699 not excessively large and then change to it. */
700 base_dir = *arg;
701 if (*base_dir != '/' || bad_path(base_dir))
702 die("bad base path %s", base_dir);
703 strlcpy(current_dir, base_dir, sizeof(current_dir));
704 if (chdir(current_dir) < 0)
705 sysdie("can't chdir to base path %s", current_dir);
706
707 /* Open our input stream and then loop through it, building filelists
708 and processing them until done. */
709 qp = QIOfdopen(fileno(stdin));
710 if (qp == NULL)
711 sysdie("can't reopen stdin");
712 while ((list = process_line(qp, &filecount, &deleted)) != NULL) {
713 empty_error = false;
714 unlink_filelist(list, filecount);
715 }
716 if (deleted > 0)
717 empty_error = false;
718
719 /* All done. */
720 SMshutdown();
721 if (empty_error)
722 die("no files to remove");
723 exit(error_count > 0 ? 1 : 0);
724 }
725