1 /*
2  * du.c: implementation of du.h.
3  */
4 
5 #include "agedu.h"
6 #include "du.h"
7 #include "alloc.h"
8 
9 #if !defined __linux__ || !defined O_NOATIME || HAVE_FDOPENDIR
10 
11 #if HAVE_DIRENT_H
12 #  include <dirent.h>
13 #endif
14 #if HAVE_NDIR_H
15 #  include <ndir.h>
16 #endif
17 #if HAVE_SYS_DIR_H
18 #  include <sys/dir.h>
19 #endif
20 #if HAVE_SYS_NDIR_H
21 #  include <sys/ndir.h>
22 #endif
23 
24 /*
25  * Wrappers around POSIX opendir, readdir and closedir, which
26  * permit me to replace them with different wrappers in special
27  * circumstances.
28  */
29 
30 typedef DIR *dirhandle;
31 
open_dir(const char * path,dirhandle * dh)32 int open_dir(const char *path, dirhandle *dh)
33 {
34 #if defined O_NOATIME && HAVE_FDOPENDIR
35 
36     /*
37      * On Linux, we have the O_NOATIME flag. This means we can
38      * read the contents of directories without affecting their
39      * atimes, which enables us to at least try to include them in
40      * the age display rather than exempting them.
41      *
42      * Unfortunately, opendir() doesn't let us open a directory
43      * with O_NOATIME. So instead, we have to open the directory
44      * with vanilla open(), and then use fdopendir() to translate
45      * the fd into a POSIX dir handle.
46      */
47     int fd;
48 
49     fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY | O_LARGEFILE |
50 	      O_NOATIME | O_DIRECTORY);
51     if (fd < 0) {
52 	/*
53 	 * Opening a file with O_NOATIME is not unconditionally
54 	 * permitted by the Linux kernel. As far as I can tell,
55 	 * it's permitted only for files on which the user would
56 	 * have been able to call utime(2): in other words, files
57 	 * for which the user could have deliberately set the
58 	 * atime back to its original value after finishing with
59 	 * it. Hence, O_NOATIME has no security implications; it's
60 	 * simply a cleaner, faster and more race-condition-free
61 	 * alternative to stat(), a normal open(), and a utimes()
62 	 * when finished.
63 	 *
64 	 * The upshot of all of which, for these purposes, is that
65 	 * we must be prepared to try again without O_NOATIME if
66 	 * we receive EPERM.
67 	 */
68 	if (errno == EPERM)
69 	    fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY |
70 		      O_LARGEFILE | O_DIRECTORY);
71 	if (fd < 0)
72 	    return -1;
73     }
74 
75     *dh = fdopendir(fd);
76 #else
77     *dh = opendir(path);
78 #endif
79 
80     if (!*dh)
81 	return -1;
82     return 0;
83 }
84 
read_dir(dirhandle * dh)85 const char *read_dir(dirhandle *dh)
86 {
87     struct dirent *de = readdir(*dh);
88     return de ? de->d_name : NULL;
89 }
90 
close_dir(dirhandle * dh)91 void close_dir(dirhandle *dh)
92 {
93     closedir(*dh);
94 }
95 
96 #else /* defined __linux__ && defined O_NOATIME && !HAVE_FDOPENDIR */
97 
98 /*
99  * Earlier versions of glibc do not have fdopendir(). Therefore,
100  * if we are on Linux and still wish to make use of O_NOATIME, we
101  * have no option but to talk directly to the kernel system call
102  * interface which underlies the POSIX opendir/readdir machinery.
103  */
104 
105 #define __KERNEL__
106 #include <linux/types.h>
107 #include <linux/dirent.h>
108 #include <linux/unistd.h>
109 
110 _syscall3(int, getdents, uint, fd, struct dirent *, dirp, uint, count)
111 
112 typedef struct {
113     int fd;
114     struct dirent data[32];
115     struct dirent *curr;
116     int pos, endpos;
117 } dirhandle;
118 
open_dir(const char * path,dirhandle * dh)119 int open_dir(const char *path, dirhandle *dh)
120 {
121     /*
122      * As above, we try with O_NOATIME and then fall back to
123      * trying without it.
124      */
125     dh->fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY | O_LARGEFILE |
126 		  O_NOATIME | O_DIRECTORY);
127     if (dh->fd < 0) {
128 	if (errno == EPERM)
129 	    dh->fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY |
130 			  O_LARGEFILE | O_DIRECTORY);
131 	if (dh->fd < 0)
132 	    return -1;
133     }
134 
135     dh->pos = dh->endpos = 0;
136 
137     return 0;
138 }
139 
read_dir(dirhandle * dh)140 const char *read_dir(dirhandle *dh)
141 {
142     const char *ret;
143 
144     if (dh->pos >= dh->endpos) {
145 	dh->curr = dh->data;
146 	dh->pos = 0;
147 	dh->endpos = getdents(dh->fd, dh->data, sizeof(dh->data));
148 	if (dh->endpos <= 0)
149 	    return NULL;
150     }
151 
152     ret = dh->curr->d_name;
153 
154     dh->pos += dh->curr->d_reclen;
155     dh->curr = (struct dirent *)((char *)dh->data + dh->pos);
156 
157     return ret;
158 }
159 
close_dir(dirhandle * dh)160 void close_dir(dirhandle *dh)
161 {
162     close(dh->fd);
163 }
164 
165 #endif /* !defined __linux__ || !defined O_NOATIME || HAVE_FDOPENDIR */
166 
str_cmp(const void * av,const void * bv)167 static int str_cmp(const void *av, const void *bv)
168 {
169     return strcmp(*(const char **)av, *(const char **)bv);
170 }
171 
du_recurse(char ** path,size_t pathlen,size_t * pathsize,gotdata_fn_t gotdata,err_fn_t err,void * gotdata_ctx,int toplevel)172 static void du_recurse(char **path, size_t pathlen, size_t *pathsize,
173 		       gotdata_fn_t gotdata, err_fn_t err, void *gotdata_ctx,
174 		       int toplevel)
175 {
176     const char *name;
177     dirhandle d;
178     STRUCT_STAT st;
179     char **names;
180     size_t i, nnames, namesize;
181     int statret;
182 
183     /*
184      * Special case: at the very top of the scan, we follow a
185      * symlink.
186      */
187     if (toplevel)
188 	statret = STAT_FUNC(*path, &st);
189     else
190 	statret = LSTAT_FUNC(*path, &st);
191     if (statret < 0) {
192 	err(gotdata_ctx, "%s: lstat: %s\n", *path, strerror(errno));
193 	return;
194     }
195 
196     if (!gotdata(gotdata_ctx, *path, &st))
197 	return;
198 
199     if (!S_ISDIR(st.st_mode))
200 	return;
201 
202     names = NULL;
203     nnames = namesize = 0;
204 
205     if (open_dir(*path, &d) < 0) {
206 	err(gotdata_ctx, "%s: opendir: %s\n", *path, strerror(errno));
207 	return;
208     }
209     while ((name = read_dir(&d)) != NULL) {
210 	if (name[0] == '.' && (!name[1] || (name[1] == '.' && !name[2]))) {
211 	    /* do nothing - we skip "." and ".." */
212 	} else {
213 	    if (nnames >= namesize) {
214 		namesize = nnames * 3 / 2 + 64;
215 		names = sresize(names, namesize, char *);
216 	    }
217 	    names[nnames++] = dupstr(name);
218 	}
219     }
220     close_dir(&d);
221 
222     if (nnames == 0)
223 	return;
224 
225     qsort(names, nnames, sizeof(*names), str_cmp);
226 
227     for (i = 0; i < nnames; i++) {
228         /*
229          * readdir(3) has occasionally been known to report two copies
230          * of the identical file name, in cases involving strange file
231          * system implementations or (possibly) race conditions. To
232          * avoid failing an assertion in the trie code, de-duplicate.
233          */
234         if (i+1 < nnames && !strcmp(names[i], names[i+1]))
235             continue;
236 
237 	size_t newpathlen = pathlen + 1 + strlen(names[i]);
238 	if (*pathsize <= newpathlen) {
239 	    *pathsize = newpathlen * 3 / 2 + 256;
240 	    *path = sresize(*path, *pathsize, char);
241 	}
242 	/*
243 	 * Avoid duplicating a slash if we got a trailing one to
244 	 * begin with (i.e. if we're starting the scan in '/' itself).
245 	 */
246 	if (pathlen > 0 && (*path)[pathlen-1] == '/') {
247 	    strcpy(*path + pathlen, names[i]);
248 	    newpathlen--;
249 	} else {
250 	    sprintf(*path + pathlen, "/%s", names[i]);
251 	}
252 
253 	du_recurse(path, newpathlen, pathsize, gotdata, err, gotdata_ctx, 0);
254 
255 	sfree(names[i]);
256     }
257     sfree(names);
258 }
259 
du(const char * inpath,gotdata_fn_t gotdata,err_fn_t err,void * gotdata_ctx)260 void du(const char *inpath, gotdata_fn_t gotdata, err_fn_t err,
261 	void *gotdata_ctx)
262 {
263     char *path;
264     size_t pathlen, pathsize;
265 
266     pathlen = strlen(inpath);
267 
268     /*
269      * Trim any trailing slashes from the input path, otherwise we'll
270      * store them in the index with confusing effects.
271      */
272     while (pathlen > 1 && inpath[pathlen-1] == '/')
273         pathlen--;
274 
275     pathsize = pathlen + 256;
276     path = snewn(pathsize, char);
277     memcpy(path, inpath, pathlen);
278     path[pathlen] = '\0';
279 
280     du_recurse(&path, pathlen, &pathsize, gotdata, err, gotdata_ctx, 1);
281 }
282