1 /** @file
2  * @brief Iterator through entries in a directory.
3  */
4 /* Copyright (C) 2007,2008,2010,2011,2012,2013,2014,2015,2018 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #ifndef OMEGA_INCLUDED_DIRITOR_H
22 #define OMEGA_INCLUDED_DIRITOR_H
23 
24 #ifndef PACKAGE
25 # error config.h must be included first in each C++ source file
26 #endif
27 
28 #include <cerrno>
29 #include <string>
30 
31 #include "safedirent.h"
32 #include "safefcntl.h"
33 #include "safesysstat.h"
34 #include "safeunistd.h"
35 
36 #include <sys/types.h>
37 
38 #ifndef __WIN32__
39 #include <grp.h> // For getgrgid().
40 #include <pwd.h> // For getpwuid().
41 #endif
42 
43 #include <magic.h>
44 #include <zlib.h>
45 
46 #include "common/noreturn.h"
47 
48 #include "loadfile.h"
49 #include "md5wrap.h"
50 #include "runfilter.h" // For class ReadError.
51 
52 struct FileNotFound { };
53 
54 // Exception to signify changes should be committed, but indexing aborted.
55 class CommitAndExit {
56     std::string msg;
57 
58   public:
59     CommitAndExit(const char * msg_, const std::string & path, int errno_);
60     CommitAndExit(const char * msg_, int errno_);
61     CommitAndExit(const char * msg_, const char * error);
62 
what()63     const std::string & what() const { return msg; }
64 };
65 
66 class DirectoryIterator {
67 #if defined O_NOATIME && O_NOATIME != 0
68     static uid_t euid;
69 #endif
70 
71     static magic_t magic_cookie;
72 
73     std::string path;
74     std::string::size_type path_len;
75 
76     DIR * dir;
77     struct dirent *entry;
78     struct stat statbuf;
79     bool statbuf_valid;
80     bool follow_symlinks;
81     int fd;
82 
83     void call_stat();
84 
ensure_statbuf_valid()85     void ensure_statbuf_valid() {
86 	if (!statbuf_valid) {
87 	    call_stat();
88 	    statbuf_valid = true;
89 	}
90     }
91 
92     void build_path();
93 
94     void open_fd();
95 
96     void close_fd();
97 
98   public:
99 
DirectoryIterator(bool follow_symlinks_)100     explicit DirectoryIterator(bool follow_symlinks_)
101 	: dir(NULL), follow_symlinks(follow_symlinks_), fd(-1) { }
102 
~DirectoryIterator()103     ~DirectoryIterator() {
104 	if (dir) closedir(dir);
105 	if (fd >= 0) close_fd();
106     }
107 
108     /// Start iterating through entries in @a path.
109     //
110     //  Throws a std::string exception upon failure.
111     void start(const std::string & path);
112 
113     /// Read the next directory entry which doesn't start with ".".
114     //
115     //  We do this to skip ".", "..", and Unix hidden files.
116     //
117     //  @return false if there are no more entries.
next()118     bool next() {
119 	if (fd >= 0) close_fd();
120 	path.resize(path_len);
121 	errno = 0;
122 	do {
123 	    entry = readdir(dir);
124 	} while (entry && entry->d_name[0] == '.');
125 	statbuf_valid = false;
126 	if (entry == NULL && errno != 0) next_failed();
127 	return (entry != NULL);
128     }
129 
130     XAPIAN_NORETURN(void next_failed() const);
131 
leafname()132     const char * leafname() const { return entry->d_name; }
133 
pathname()134     const std::string & pathname() const { return path; }
135 
136     typedef enum { REGULAR_FILE, DIRECTORY, OTHER } type;
137 
get_type()138     type get_type() {
139 #ifdef DT_UNKNOWN
140 	/* Possible values:
141 	 * DT_UNKNOWN DT_FIFO DT_CHR DT_DIR DT_BLK DT_REG DT_LNK DT_SOCK DT_WHT
142 	 */
143 	switch (entry->d_type) {
144 	    case DT_UNKNOWN:
145 		// The current filing system doesn't support d_type.
146 		break;
147 	    case DT_REG:
148 		return REGULAR_FILE;
149 	    case DT_DIR:
150 		return DIRECTORY;
151 #ifdef HAVE_LSTAT
152 	    case DT_LNK:
153 		if (follow_symlinks) break;
154 		return OTHER;
155 #endif
156 	    default:
157 		return OTHER;
158 	}
159 #endif
160 
161 	ensure_statbuf_valid();
162 
163 	if (S_ISREG(statbuf.st_mode)) return REGULAR_FILE;
164 	if (S_ISDIR(statbuf.st_mode)) return DIRECTORY;
165 	return OTHER;
166     }
167 
get_size()168     off_t get_size() {
169 	ensure_statbuf_valid();
170 	return statbuf.st_size;
171     }
172 
get_mtime()173     time_t get_mtime() {
174 	ensure_statbuf_valid();
175 	return statbuf.st_mtime;
176     }
177 
get_ctime()178     time_t get_ctime() {
179 	ensure_statbuf_valid();
180 	return statbuf.st_ctime;
181     }
182 
get_owner()183     const char * get_owner() {
184 #ifndef __WIN32__
185 	ensure_statbuf_valid();
186 	struct passwd * pwentry = getpwuid(statbuf.st_uid);
187 	return pwentry ? pwentry->pw_name : NULL;
188 #else
189 	return NULL;
190 #endif
191     }
192 
get_group()193     const char * get_group() {
194 #ifndef __WIN32__
195 	ensure_statbuf_valid();
196 	struct group * grentry = getgrgid(statbuf.st_gid);
197 	return grentry ? grentry->gr_name : NULL;
198 #else
199 	return NULL;
200 #endif
201     }
202 
is_owner_readable()203     bool is_owner_readable() {
204 	ensure_statbuf_valid();
205 #ifndef __WIN32__
206 	return (statbuf.st_mode & S_IRUSR);
207 #else
208 	return (statbuf.st_mode & S_IREAD);
209 #endif
210     }
211 
is_group_readable()212     bool is_group_readable() {
213 	ensure_statbuf_valid();
214 #ifndef __WIN32__
215 	return (statbuf.st_mode & S_IRGRP);
216 #else
217 	return false;
218 #endif
219     }
220 
is_other_readable()221     bool is_other_readable() {
222 	ensure_statbuf_valid();
223 #ifndef __WIN32__
224 	return (statbuf.st_mode & S_IROTH);
225 #else
226 	return false;
227 #endif
228     }
229 
try_noatime()230     bool try_noatime() {
231 #if defined O_NOATIME && O_NOATIME != 0
232 	if (euid == 0) return true;
233 	ensure_statbuf_valid();
234 	return statbuf.st_uid == euid;
235 #else
236 	return false;
237 #endif
238     }
239 
240     std::string get_magic_mimetype();
241 
file_to_string()242     std::string file_to_string() {
243 	std::string out;
244 	if (!load_file_from_fd(get_fd(), out)) {
245 	    throw ReadError("loading file failed");
246 	}
247 	return out;
248     }
249 
gzfile_to_string()250     std::string gzfile_to_string() {
251 	int dup_fd = dup(get_fd());
252 	if (fd < 0) {
253 	    throw ReadError("dup() failed");
254 	}
255 	gzFile zfh = gzdopen(dup_fd, "rb");
256 	if (zfh == NULL) {
257 	    throw ReadError("gzdopen() failed");
258 	}
259 	std::string out;
260 	char buf[8192];
261 	while (true) {
262 	    int r = gzread(zfh, buf, sizeof(buf));
263 	    if (r < 0) {
264 		gzclose(zfh);
265 		throw ReadError("gzread() failed");
266 	    }
267 	    out.append(buf, r);
268 	    if (unsigned(r) < sizeof(buf)) break;
269 	}
270 	gzclose(zfh);
271 	return out;
272     }
273 
get_fd()274     int get_fd() {
275 	if (fd < 0) {
276 	    open_fd();
277 	} else {
278 	    if (lseek(fd, 0, SEEK_SET) < 0)
279 		throw CommitAndExit("Can't rewind file descriptor", path, errno);
280 	}
281 	return fd;
282     }
283 
md5(std::string & out)284     bool md5(std::string& out) {
285 	return md5_fd(get_fd(), out);
286     }
287 };
288 
289 #endif // OMEGA_INCLUDED_DIRITOR_H
290