1 /** @file 2 * @brief Iterator through entries in a directory. 3 */ 4 /* Copyright (C) 2007,2008,2010,2011,2012,2013,2014,2015,2018 Olly Betts 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef OMEGA_INCLUDED_DIRITOR_H 22 #define OMEGA_INCLUDED_DIRITOR_H 23 24 #ifndef PACKAGE 25 # error config.h must be included first in each C++ source file 26 #endif 27 28 #include <cerrno> 29 #include <string> 30 31 #include "safedirent.h" 32 #include "safefcntl.h" 33 #include "safesysstat.h" 34 #include "safeunistd.h" 35 36 #include <sys/types.h> 37 38 #ifndef __WIN32__ 39 #include <grp.h> // For getgrgid(). 40 #include <pwd.h> // For getpwuid(). 41 #endif 42 43 #include <magic.h> 44 #include <zlib.h> 45 46 #include "common/noreturn.h" 47 48 #include "loadfile.h" 49 #include "md5wrap.h" 50 #include "runfilter.h" // For class ReadError. 51 52 struct FileNotFound { }; 53 54 // Exception to signify changes should be committed, but indexing aborted. 55 class CommitAndExit { 56 std::string msg; 57 58 public: 59 CommitAndExit(const char * msg_, const std::string & path, int errno_); 60 CommitAndExit(const char * msg_, int errno_); 61 CommitAndExit(const char * msg_, const char * error); 62 what()63 const std::string & what() const { return msg; } 64 }; 65 66 class DirectoryIterator { 67 #if defined O_NOATIME && O_NOATIME != 0 68 static uid_t euid; 69 #endif 70 71 static magic_t magic_cookie; 72 73 std::string path; 74 std::string::size_type path_len; 75 76 DIR * dir; 77 struct dirent *entry; 78 struct stat statbuf; 79 bool statbuf_valid; 80 bool follow_symlinks; 81 int fd; 82 83 void call_stat(); 84 ensure_statbuf_valid()85 void ensure_statbuf_valid() { 86 if (!statbuf_valid) { 87 call_stat(); 88 statbuf_valid = true; 89 } 90 } 91 92 void build_path(); 93 94 void open_fd(); 95 96 void close_fd(); 97 98 public: 99 DirectoryIterator(bool follow_symlinks_)100 explicit DirectoryIterator(bool follow_symlinks_) 101 : dir(NULL), follow_symlinks(follow_symlinks_), fd(-1) { } 102 ~DirectoryIterator()103 ~DirectoryIterator() { 104 if (dir) closedir(dir); 105 if (fd >= 0) close_fd(); 106 } 107 108 /// Start iterating through entries in @a path. 109 // 110 // Throws a std::string exception upon failure. 111 void start(const std::string & path); 112 113 /// Read the next directory entry which doesn't start with ".". 114 // 115 // We do this to skip ".", "..", and Unix hidden files. 116 // 117 // @return false if there are no more entries. next()118 bool next() { 119 if (fd >= 0) close_fd(); 120 path.resize(path_len); 121 errno = 0; 122 do { 123 entry = readdir(dir); 124 } while (entry && entry->d_name[0] == '.'); 125 statbuf_valid = false; 126 if (entry == NULL && errno != 0) next_failed(); 127 return (entry != NULL); 128 } 129 130 XAPIAN_NORETURN(void next_failed() const); 131 leafname()132 const char * leafname() const { return entry->d_name; } 133 pathname()134 const std::string & pathname() const { return path; } 135 136 typedef enum { REGULAR_FILE, DIRECTORY, OTHER } type; 137 get_type()138 type get_type() { 139 #ifdef DT_UNKNOWN 140 /* Possible values: 141 * DT_UNKNOWN DT_FIFO DT_CHR DT_DIR DT_BLK DT_REG DT_LNK DT_SOCK DT_WHT 142 */ 143 switch (entry->d_type) { 144 case DT_UNKNOWN: 145 // The current filing system doesn't support d_type. 146 break; 147 case DT_REG: 148 return REGULAR_FILE; 149 case DT_DIR: 150 return DIRECTORY; 151 #ifdef HAVE_LSTAT 152 case DT_LNK: 153 if (follow_symlinks) break; 154 return OTHER; 155 #endif 156 default: 157 return OTHER; 158 } 159 #endif 160 161 ensure_statbuf_valid(); 162 163 if (S_ISREG(statbuf.st_mode)) return REGULAR_FILE; 164 if (S_ISDIR(statbuf.st_mode)) return DIRECTORY; 165 return OTHER; 166 } 167 get_size()168 off_t get_size() { 169 ensure_statbuf_valid(); 170 return statbuf.st_size; 171 } 172 get_mtime()173 time_t get_mtime() { 174 ensure_statbuf_valid(); 175 return statbuf.st_mtime; 176 } 177 get_ctime()178 time_t get_ctime() { 179 ensure_statbuf_valid(); 180 return statbuf.st_ctime; 181 } 182 get_owner()183 const char * get_owner() { 184 #ifndef __WIN32__ 185 ensure_statbuf_valid(); 186 struct passwd * pwentry = getpwuid(statbuf.st_uid); 187 return pwentry ? pwentry->pw_name : NULL; 188 #else 189 return NULL; 190 #endif 191 } 192 get_group()193 const char * get_group() { 194 #ifndef __WIN32__ 195 ensure_statbuf_valid(); 196 struct group * grentry = getgrgid(statbuf.st_gid); 197 return grentry ? grentry->gr_name : NULL; 198 #else 199 return NULL; 200 #endif 201 } 202 is_owner_readable()203 bool is_owner_readable() { 204 ensure_statbuf_valid(); 205 #ifndef __WIN32__ 206 return (statbuf.st_mode & S_IRUSR); 207 #else 208 return (statbuf.st_mode & S_IREAD); 209 #endif 210 } 211 is_group_readable()212 bool is_group_readable() { 213 ensure_statbuf_valid(); 214 #ifndef __WIN32__ 215 return (statbuf.st_mode & S_IRGRP); 216 #else 217 return false; 218 #endif 219 } 220 is_other_readable()221 bool is_other_readable() { 222 ensure_statbuf_valid(); 223 #ifndef __WIN32__ 224 return (statbuf.st_mode & S_IROTH); 225 #else 226 return false; 227 #endif 228 } 229 try_noatime()230 bool try_noatime() { 231 #if defined O_NOATIME && O_NOATIME != 0 232 if (euid == 0) return true; 233 ensure_statbuf_valid(); 234 return statbuf.st_uid == euid; 235 #else 236 return false; 237 #endif 238 } 239 240 std::string get_magic_mimetype(); 241 file_to_string()242 std::string file_to_string() { 243 std::string out; 244 if (!load_file_from_fd(get_fd(), out)) { 245 throw ReadError("loading file failed"); 246 } 247 return out; 248 } 249 gzfile_to_string()250 std::string gzfile_to_string() { 251 int dup_fd = dup(get_fd()); 252 if (fd < 0) { 253 throw ReadError("dup() failed"); 254 } 255 gzFile zfh = gzdopen(dup_fd, "rb"); 256 if (zfh == NULL) { 257 throw ReadError("gzdopen() failed"); 258 } 259 std::string out; 260 char buf[8192]; 261 while (true) { 262 int r = gzread(zfh, buf, sizeof(buf)); 263 if (r < 0) { 264 gzclose(zfh); 265 throw ReadError("gzread() failed"); 266 } 267 out.append(buf, r); 268 if (unsigned(r) < sizeof(buf)) break; 269 } 270 gzclose(zfh); 271 return out; 272 } 273 get_fd()274 int get_fd() { 275 if (fd < 0) { 276 open_fd(); 277 } else { 278 if (lseek(fd, 0, SEEK_SET) < 0) 279 throw CommitAndExit("Can't rewind file descriptor", path, errno); 280 } 281 return fd; 282 } 283 md5(std::string & out)284 bool md5(std::string& out) { 285 return md5_fd(get_fd(), out); 286 } 287 }; 288 289 #endif // OMEGA_INCLUDED_DIRITOR_H 290