1 /* $Id: fastrm.c 10283 2018-05-14 12:43:05Z iulius $
2 **
3 ** Delete a list of filenames or tokens from stdin.
4 **
5 ** Originally written by <kre@munnari.oz.au> (to only handle files).
6 **
7 ** Files that can't be unlinked because they didn't exist are considered
8 ** okay. Any error condition results in exiting with non-zero exit
9 ** status. Input lines in the form @...@ are taken to be storage API
10 ** tokens. Input filenames should be fully qualified. For maximum
11 ** efficiency, input filenames should be sorted; fastrm will cd into each
12 ** directory to avoid additional directory lookups when removing a lot of
13 ** files in a single directory.
14 */
15
16 #include "config.h"
17 #include "clibrary.h"
18 #include <ctype.h>
19 #include <dirent.h>
20 #include <errno.h>
21 #include <sys/stat.h>
22 #include <syslog.h>
23
24 #include "inn/innconf.h"
25 #include "inn/messages.h"
26 #include "inn/qio.h"
27 #include "inn/libinn.h"
28 #include "inn/storage.h"
29
30 /* We reject any path names longer than this. */
31 #define MAX_DIR_LEN 2048
32
33 /* Data structure for a list of files in a single directory. */
34 typedef struct filelist {
35 int count;
36 int size;
37 char *dir;
38 char **files;
39 } filelist;
40
41 /* All relative paths are relative to this directory. */
42 static char *base_dir = NULL;
43
44 /* The absolute path of the current working directory. */
45 static char current_dir[MAX_DIR_LEN];
46
47 /* The prefix for the files that we're currently working with. We sometimes
48 also use this as working space for forming file names to remove, so give
49 ourselves a bit of additional leeway just in case. */
50 static char prefix_dir[MAX_DIR_LEN * 2];
51 static int prefix_len;
52
53 /* Some threshold values that govern the optimizations that we are willing
54 to perform. chdir_threshold determines how many files to be removed we
55 want in a directory before we chdir to that directory. sort_threshold
56 determines how many files must be in a directory before we use readdir to
57 remove them in order. relative_threshold determines how many levels of
58 "../" we're willing to try to use to move to the next directory rather
59 than just calling chdir with the new absolute path. */
60 static int chdir_threshold = 3;
61 static int relative_threshold = 0;
62 static int sort_threshold = 0;
63
64 /* True if we should only print what we would do, not actually do it. */
65 static bool debug_only = false;
66
67 /* A string used for constructing relative paths. */
68 static const char dotdots[] = "../../../../";
69
70 /* The number of errors encountered, used to determine exit status. */
71 static int error_count = 0;
72
73 /* Whether the storage manager has been initialized. */
74 static bool sm_initialized = false;
75
76 /* True if unlink may be able to remove directories. */
77 static bool unlink_dangerous = false;
78
79
80
81 /*
82 ** Sorting predicate for qsort and bsearch.
83 */
84 static int
file_compare(const void * a,const void * b)85 file_compare(const void *a, const void *b)
86 {
87 const char *f1, *f2;
88
89 f1 = *((const char *const *) a);
90 f2 = *((const char *const *) b);
91 return strcmp(f1, f2);
92 }
93
94
95 /*
96 ** Create a new filelist.
97 */
98 static filelist *
filelist_new(char * dir)99 filelist_new(char *dir)
100 {
101 filelist *new;
102
103 new = xmalloc(sizeof(filelist));
104 new->count = 0;
105 new->size = 0;
106 new->dir = dir;
107 new->files = NULL;
108 return new;
109 }
110
111
112 /*
113 ** Insert a file name into a list of files (unsorted).
114 */
115 static void
filelist_insert(filelist * list,char * name)116 filelist_insert(filelist *list, char *name)
117 {
118 if (list->count == list->size) {
119 list->size = (list->size == 0) ? 16 : list->size * 2;
120 list->files = xrealloc(list->files, list->size * sizeof(char *));
121 }
122 list->files[list->count++] = xstrdup(name);
123 }
124
125
126 /*
127 ** Find a file name in a sorted list of files.
128 */
129 static char *
filelist_lookup(filelist * list,const char * name)130 filelist_lookup(filelist *list, const char *name)
131 {
132 char **p;
133
134 p = bsearch(&name, list->files, list->count, sizeof(char *),
135 file_compare);
136 return (p == NULL ? NULL : *p);
137 }
138
139
140 /*
141 ** Empty a list of files, freeing all of the names but keeping the
142 ** structure intact.
143 */
144 static void
filelist_empty(filelist * list)145 filelist_empty(filelist *list)
146 {
147 int i;
148
149 if (list->files == NULL)
150 return;
151 for (i = 0; i < list->count; i++)
152 free(list->files[i]);
153 list->count = 0;
154 }
155
156
157 /*
158 ** Free a list of files.
159 */
160 static void
filelist_free(filelist * list)161 filelist_free(filelist *list)
162 {
163 filelist_empty(list);
164 if (list->files != NULL)
165 free(list->files);
166 if (list->dir != NULL)
167 free(list->dir);
168 free(list);
169 }
170
171
172 /*
173 ** Exit handler for die. Shut down the storage manager before exiting.
174 */
175 static int
sm_cleanup(void)176 sm_cleanup(void)
177 {
178 SMshutdown();
179 return 1;
180 }
181
182
183 /*
184 ** Initialize the storage manager. This includes parsing inn.conf, which
185 ** fastrm doesn't need for any other purpose.
186 */
187 static void
sm_initialize(void)188 sm_initialize(void)
189 {
190 bool value;
191
192 if (!innconf_read(NULL))
193 exit(1);
194 value = true;
195 if (!SMsetup(SM_RDWR, &value) || !SMsetup(SM_PREOPEN, &value))
196 die("can't set up storage manager");
197 if (!SMinit())
198 die("can't initialize storage manager: %s", SMerrorstr);
199 sm_initialized = true;
200 message_fatal_cleanup = sm_cleanup;
201 }
202
203
204 /*
205 ** Get a line from a given QIO stream, returning a pointer to it. Warn
206 ** about and then skip lines that are too long. Returns NULL at EOF or on
207 ** an error.
208 */
209 static char *
get_line(QIOSTATE * qp)210 get_line(QIOSTATE *qp)
211 {
212 static int count;
213 char *p;
214
215 p = QIOread(qp);
216 count++;
217 while (QIOtoolong(qp) || (p != NULL && strlen(p) >= MAX_DIR_LEN)) {
218 warn("line %d too long", count);
219 error_count++;
220 p = QIOread(qp);
221 }
222 if (p == NULL) {
223 if (QIOerror(qp)) {
224 syswarn("read error");
225 error_count++;
226 }
227 return NULL;
228 }
229 return p;
230 }
231
232
233 /*
234 ** Read lines from stdin (including the first that may have been there
235 ** from our last time in) until we reach EOF or until we get a line that
236 ** names a file not in the same directory as the previous lot. Remember
237 ** the file names in the directory we're examining and return the list.
238 */
239 static filelist *
process_line(QIOSTATE * qp,int * queued,int * deleted)240 process_line(QIOSTATE *qp, int *queued, int *deleted)
241 {
242 static char *line = NULL;
243 filelist *list = NULL;
244 char *p;
245 char *dir = NULL;
246 int dlen = -1;
247
248 *queued = 0;
249 *deleted = 0;
250
251 if (line == NULL)
252 line = get_line(qp);
253
254 for (; line != NULL; line = get_line(qp)) {
255 if (IsToken(line)) {
256 (*deleted)++;
257 if (debug_only) {
258 printf("Token %s\n", line);
259 continue;
260 }
261 if (!sm_initialized)
262 sm_initialize();
263 if (!SMcancel(TextToToken(line)))
264 if (SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) {
265 warn("can't cancel %s", line);
266 error_count++;
267 }
268 } else {
269 if (list == NULL) {
270 p = strrchr(line, '/');
271 if (p != NULL) {
272 *p++ = '\0';
273 dlen = strlen(line);
274 dir = xstrdup(line);
275 } else {
276 dlen = -1;
277 dir = NULL;
278 }
279 list = filelist_new(dir);
280 } else {
281 if ((dlen < 0 && strchr(line, '/'))
282 || (dlen >= 0 && (line[dlen] != '/'
283 || strchr(line + dlen + 1, '/')
284 || strncmp(dir, line, dlen))))
285 return list;
286 }
287 filelist_insert(list, line + dlen + 1);
288 (*queued)++;
289 }
290 }
291 return list;
292 }
293
294
295 /*
296 ** Copy n leading segments of a path.
297 */
298 static void
copy_segments(char * to,const char * from,int n)299 copy_segments(char *to, const char *from, int n)
300 {
301 char c;
302
303 for (c = *from++; c != '\0'; c = *from++) {
304 if (c == '/' && --n <= 0)
305 break;
306 *to++ = c;
307 }
308 *to = '\0';
309 }
310
311
312 /*
313 ** Return the count of path segments in a file name (the number of
314 ** slashes).
315 */
316 static int
slashcount(char * name)317 slashcount(char *name)
318 {
319 int i;
320
321 for (i = 0; *name != '\0'; name++)
322 if (*name == '/')
323 i++;
324 return i;
325 }
326
327
328 /*
329 ** Unlink a file, reporting errors if the unlink fails for a reason other
330 ** than the file not existing doesn't exist. Be careful to avoid unlinking
331 ** a directory if unlink_dangerous is true.
332 */
333 static void
unlink_file(const char * file)334 unlink_file(const char *file)
335 {
336 struct stat st;
337
338 /* On some systems, unlink will remove directories if used by root. If
339 we're running as root, unlink_dangerous will be set, and we need to
340 make sure that the file isn't a directory first. */
341 if (unlink_dangerous) {
342 if (stat(file, &st) < 0) {
343 if (errno != ENOENT) {
344 if (*file == '/')
345 syswarn("can't stat %s", file);
346 else
347 syswarn("can't stat %s in %s", file, current_dir);
348 error_count++;
349 }
350 return;
351 }
352 if (S_ISDIR(st.st_mode)) {
353 if (*file == '/')
354 syswarn("%s is a directory", file);
355 else
356 syswarn("%s in %s is a directory", file, current_dir);
357 error_count++;
358 return;
359 }
360 }
361
362 if (debug_only) {
363 if (*file != '/')
364 printf("%s / ", current_dir);
365 printf("%s\n", file);
366 return;
367 }
368
369 if (unlink(file) < 0 && errno != ENOENT) {
370 if (*file == '/')
371 syswarn("can't unlink %s", file);
372 else
373 syswarn("can't unlink %s in %s", file, current_dir);
374 }
375 }
376
377
378 /*
379 ** A wrapper around chdir that dies if chdir fails for a reason other than
380 ** the directory not existing, returns false if the directory doesn't
381 ** exist (reporting an error), and otherwise returns true. It also checks
382 ** to make sure that filecount is larger than chdir_threshold, and if it
383 ** isn't it instead just sets prefix_dir and prefix_len to point to the new
384 ** directory without changing the working directory.
385 */
386 static bool
chdir_checked(const char * path,int filecount)387 chdir_checked(const char *path, int filecount)
388 {
389 if (filecount < chdir_threshold) {
390 strlcpy(prefix_dir, path, sizeof(prefix_dir));
391 prefix_len = strlen(path);
392 } else {
393 prefix_len = 0;
394 if (chdir(path) < 0) {
395 if (errno != ENOENT)
396 sysdie("can't chdir from %s to %s", current_dir, path);
397 else {
398 syswarn("can't chdir from %s to %s", current_dir, path);
399 return false;
400 }
401 }
402 }
403 return true;
404 }
405
406
407 /*
408 ** Set our environment (process working directory, and global vars) to
409 ** reflect a change of directory to dir (relative to base_dir if dir is not
410 ** an absolute path). We're likely to want to do different things
411 ** depending on the amount of work to do in dir, so we also take the number
412 ** of files to remove in dir as the second argument. Return false if the
413 ** directory doesn't exist (and therefore all files in it have already been
414 ** removed; otherwise, return true.
415 */
416 static bool
setup_dir(char * dir,int filecount)417 setup_dir(char *dir, int filecount)
418 {
419 char *p, *q, *absolute;
420 char path[MAX_DIR_LEN];
421 int base_depth, depth;
422
423 /* Set absolute to the absolute path to the new directory. */
424 if (dir == NULL)
425 absolute = base_dir;
426 else if (*dir == '/')
427 absolute = dir;
428 else if (*dir == '\0') {
429 strlcpy(path, "/", sizeof(path));
430 absolute = path;
431 } else {
432 /* Strip off leading "./". */
433 while (dir[0] == '.' && dir[1] == '/')
434 for (dir += 2; *dir == '/'; dir++)
435 ;
436
437 /* Handle any leading "../", but only up to the number of segments
438 in base_dir. */
439 base_depth = slashcount(base_dir);
440 while (base_depth > 0 && strncmp(dir, "../", 3) == 0)
441 for (base_depth--, dir += 3; *dir == '/'; dir++)
442 ;
443 if (base_depth <= 0)
444 die("too many ../'s in path %s", dir);
445 copy_segments(path, base_dir, base_depth + 1);
446 if (strlen(path) + strlen(dir) + 2 > MAX_DIR_LEN)
447 die("path %s too long", dir);
448 strlcat(path, "/", sizeof(path));
449 strlcat(path, dir, sizeof(path));
450 absolute = path;
451 }
452
453 /* Find the first point of difference between absolute and current_dir.
454 If there is no difference, we're done; we're changing to the same
455 directory we were in (this is probably some sort of error, but can
456 happen with odd relative paths). */
457 for (p = absolute, q = current_dir; *p == *q; p++, q++)
458 if (*p == '\0')
459 return true;
460
461 /* If we reached the end of current_dir and there's more left of
462 absolute, we're changing to a subdirectory of where we were. */
463 if (*q == '\0' && *p == '/') {
464 p++;
465 if (!chdir_checked(p, filecount))
466 return false;
467 if (prefix_len == 0)
468 strlcpy(current_dir, absolute, sizeof(current_dir));
469 return true;
470 }
471
472 /* Otherwise, if we were promised that we have a pure tree (in other
473 words, no symbolic links to directories), see if it's worth going up
474 the tree with ".." and then down again rather than chdir to the
475 absolute path. relative_threshold determines how many levels of ".."
476 we're willing to use; the default of 1 seems fractionally faster than
477 2 and 0 indicates to always use absolute paths. Values larger than 3
478 would require extending the dotdots string, but are unlikely to be
479 worth it.
480
481 FIXME: It's too hard to figure out what this code does. It needs to be
482 rewritten. */
483 if (*p != '\0' && relative_threshold > 0) {
484 depth = slashcount(q);
485 if (depth <= relative_threshold) {
486 while (p > absolute && *--p != '/')
487 ;
488 p++;
489 strlcpy(prefix_dir, dotdots + 9 - depth * 3, sizeof(prefix_dir));
490 strlcat(prefix_dir, p, sizeof(prefix_dir));
491 if (!chdir_checked(prefix_dir, filecount))
492 return false;
493
494 /* Now patch up current_dir to reflect where we are. */
495 if (prefix_len == 0) {
496 while (q > current_dir && *--q != '/')
497 ;
498 q[1] = '\0';
499 strlcat(current_dir, p, sizeof(current_dir));
500 }
501 return true;
502 }
503 }
504
505 /* All else has failed; just use the absolute path. This includes the
506 case where current_dir is a subdirectory of absolute, in which case
507 it may be somewhat faster to use chdir("../..") or the like rather
508 than the absolute path, but this case rarely happens when the user
509 cares about speed (it usually doesn't happen with sorted input). So
510 we don't bother. */
511 if (!chdir_checked(absolute, filecount))
512 return false;
513 if (prefix_len == 0)
514 strlcpy(current_dir, absolute, sizeof(current_dir));
515 return true;
516 }
517
518
519 /*
520 ** Process a filelist of files to be deleted, all in the same directory.
521 */
522 static void
unlink_filelist(filelist * list,int filecount)523 unlink_filelist(filelist *list, int filecount)
524 {
525 bool sorted;
526 DIR *dir;
527 struct dirent *entry;
528 char *file;
529 int i;
530
531 /* If setup_dir returns false, the directory doesn't exist and we're
532 already all done. */
533 if (!setup_dir(list->dir, filecount)) {
534 filelist_free(list);
535 return;
536 }
537
538 /* We'll use prefix_dir as a buffer to write each file name into as we
539 go, so get it set up. */
540 if (prefix_len == 0)
541 file = prefix_dir;
542 else {
543 prefix_dir[prefix_len++] = '/';
544 file = prefix_dir + prefix_len;
545 *file = '\0';
546 }
547
548 /* If we're not sorting directories or if the number of files is under
549 the threshold, just remove the files. */
550 if (sort_threshold == 0 || filecount < sort_threshold) {
551 for (i = 0; i < list->count; i++) {
552 strlcpy(file, list->files[i], sizeof(prefix_dir) - prefix_len);
553 unlink_file(prefix_dir);
554 }
555 filelist_free(list);
556 return;
557 }
558
559 /* We have enough files to remove in this directory that it's worth
560 optimizing. First, make sure the list of files is sorted. It's not
561 uncommon for the files to already be sorted, so check first. */
562 for (sorted = true, i = 1; sorted && i < list->count; i++)
563 sorted = (strcmp(list->files[i - 1], list->files[i]) <= 0);
564 if (!sorted)
565 qsort(list->files, list->count, sizeof(char *), file_compare);
566
567 /* Now, begin doing our optimized unlinks. The technique we use is to
568 open the directory containing the files and read through it, checking
569 each file in the directory to see if it's one of the files we should
570 be removing. The theory is that we want to minimize the amount of
571 time the operating system spends doing string compares trying to find
572 the file to be removed in the directory. This is often an O(n)
573 operation. Note that this optimization may slightly slow more
574 effecient operating systems. */
575 dir = opendir(prefix_len == 0 ? "." : prefix_dir);
576 if (dir == NULL) {
577 if (prefix_len > 0 && prefix_dir[0] == '/')
578 warn("can't open directory %s", prefix_dir);
579 else
580 warn("can't open directory %s in %s",
581 (prefix_len == 0) ? "." : prefix_dir, current_dir);
582 error_count++;
583 filelist_free(list);
584 return;
585 }
586 for (i = 0, entry = readdir(dir); entry != NULL; entry = readdir(dir))
587 if (filelist_lookup(list, entry->d_name) != NULL) {
588 i++;
589 strlcpy(file, entry->d_name, sizeof(prefix_dir) - prefix_len);
590 unlink_file(prefix_dir);
591 if (i == list->count)
592 break;
593 }
594 closedir(dir);
595 filelist_free(list);
596 }
597
598
599 /*
600 ** Check a path to see if it's okay (not likely to confuse us). This
601 ** ensures that it doesn't contain elements like "./" or "../" and doesn't
602 ** contain doubled slashes.
603 */
604 static bool
bad_path(const char * p)605 bad_path(const char *p)
606 {
607 if (strlen(p) >= MAX_DIR_LEN)
608 return true;
609 while (*p) {
610 if (p[0] == '.' && (p[1] == '/' || (p[1] == '.' && p[2] == '/')))
611 return true;
612 while (*p && *p != '/')
613 p++;
614 if (p[0] == '/' && p[1] == '/')
615 return true;
616 if (*p == '/')
617 p++;
618 }
619 return false;
620 }
621
622
623 /*
624 ** Main routine. Parse options, initialize the storage manager, and
625 ** initialize various global variables, and then go into a loop calling
626 ** process_line and unlink_filelist as needed.
627 */
628 int
main(int argc,char * argv[])629 main(int argc, char *argv[])
630 {
631 const char *name;
632 char *p, **arg;
633 QIOSTATE *qp;
634 filelist *list;
635 int filecount, deleted;
636 bool empty_error = false;
637
638 /* Establish our identity. Since we use the storage manager, we need to
639 set up syslog as well, although we won't use it ourselves. */
640 name = argv[0];
641 if (*name == '\0')
642 name = "fastrm";
643 else {
644 p = strrchr(name, '/');
645 if (p != NULL)
646 name = p + 1;
647 }
648 message_program_name = name;
649 openlog(name, LOG_CONS | LOG_PID, LOG_INN_PROG);
650
651 /* If we're running as root, unlink may remove directories. */
652 unlink_dangerous = (geteuid() == 0);
653
654 /* Unfortunately, we can't use getopt, because several of our options
655 take optional arguments. Bleh. */
656 arg = argv + 1;
657 while (argc >= 2 && **arg == '-') {
658 p = *arg;
659 while (*++p) {
660 switch (*p) {
661 default:
662 die("invalid option -- %c", *p);
663 case 'a':
664 case 'r':
665 continue;
666 case 'c':
667 chdir_threshold = 1;
668 if (!isdigit((unsigned char) p[1]))
669 continue;
670 chdir_threshold = atoi(p + 1);
671 break;
672 case 'd':
673 debug_only = true;
674 continue;
675 case 'e':
676 empty_error = true;
677 continue;
678 case 's':
679 sort_threshold = 5;
680 if (!isdigit((unsigned char) p[1]))
681 continue;
682 sort_threshold = atoi(p + 1);
683 break;
684 case 'u':
685 relative_threshold = 1;
686 if (!isdigit((unsigned char) p[1]))
687 continue;
688 relative_threshold = atoi(p + 1);
689 if (relative_threshold >= (int) strlen(dotdots) / 3)
690 relative_threshold = strlen(dotdots) / 3 - 1;
691 break;
692 }
693 break;
694 }
695 argc--;
696 arg++;
697 }
698 if (argc != 2)
699 die("usage error, wrong number of arguments");
700
701 /* The remaining argument is the base path. Make sure it's valid and
702 not excessively large and then change to it. */
703 base_dir = *arg;
704 if (*base_dir != '/' || bad_path(base_dir))
705 die("bad base path %s", base_dir);
706 strlcpy(current_dir, base_dir, sizeof(current_dir));
707 if (chdir(current_dir) < 0)
708 sysdie("can't chdir to base path %s", current_dir);
709
710 /* Open our input stream and then loop through it, building filelists
711 and processing them until done. */
712 qp = QIOfdopen(fileno(stdin));
713 if (qp == NULL)
714 sysdie("can't reopen stdin");
715 while ((list = process_line(qp, &filecount, &deleted)) != NULL) {
716 empty_error = false;
717 unlink_filelist(list, filecount);
718 }
719 if (deleted > 0)
720 empty_error = false;
721
722 /* All done. */
723 SMshutdown();
724 if (empty_error)
725 die("no files to remove");
726 exit(error_count > 0 ? 1 : 0);
727 }
728