1 /*------------------------------------------------------------------------- 2 * 3 * File-processing utility routines. 4 * 5 * Assorted utility functions to work on files. 6 * 7 * 8 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 9 * Portions Copyright (c) 1994, Regents of the University of California 10 * 11 * src/common/file_utils.c 12 * 13 *------------------------------------------------------------------------- 14 */ 15 #include "postgres_fe.h" 16 17 #include <dirent.h> 18 #include <fcntl.h> 19 #include <sys/stat.h> 20 #include <unistd.h> 21 22 #include "common/file_utils.h" 23 #include "common/logging.h" 24 25 26 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ 27 #if defined(HAVE_SYNC_FILE_RANGE) 28 #define PG_FLUSH_DATA_WORKS 1 29 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) 30 #define PG_FLUSH_DATA_WORKS 1 31 #endif 32 33 /* 34 * pg_xlog has been renamed to pg_wal in version 10. 35 */ 36 #define MINIMUM_VERSION_FOR_PG_WAL 100000 37 38 #ifdef PG_FLUSH_DATA_WORKS 39 static int pre_sync_fname(const char *fname, bool isdir); 40 #endif 41 static void walkdir(const char *path, 42 int (*action) (const char *fname, bool isdir), 43 bool process_symlinks); 44 45 /* 46 * Issue fsync recursively on PGDATA and all its contents. 47 * 48 * We fsync regular files and directories wherever they are, but we follow 49 * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc. 50 * Other symlinks are presumed to point at files we're not responsible for 51 * fsyncing, and might not have privileges to write at all. 52 * 53 * serverVersion indicates the version of the server to be fsync'd. 54 */ 55 void 56 fsync_pgdata(const char *pg_data, 57 int serverVersion) 58 { 59 bool xlog_is_symlink; 60 char pg_wal[MAXPGPATH]; 61 char pg_tblspc[MAXPGPATH]; 62 63 /* handle renaming of pg_xlog to pg_wal in post-10 clusters */ 64 snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data, 65 serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal"); 66 snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); 67 68 /* 69 * If pg_wal is a symlink, we'll need to recurse into it separately, 70 * because the first walkdir below will ignore it. 71 */ 72 xlog_is_symlink = false; 73 74 #ifndef WIN32 75 { 76 struct stat st; 77 78 if (lstat(pg_wal, &st) < 0) 79 pg_log_error("could not stat file \"%s\": %m", pg_wal); 80 else if (S_ISLNK(st.st_mode)) 81 xlog_is_symlink = true; 82 } 83 #else 84 if (pgwin32_is_junction(pg_wal)) 85 xlog_is_symlink = true; 86 #endif 87 88 /* 89 * If possible, hint to the kernel that we're soon going to fsync the data 90 * directory and its contents. 91 */ 92 #ifdef PG_FLUSH_DATA_WORKS 93 walkdir(pg_data, pre_sync_fname, false); 94 if (xlog_is_symlink) 95 walkdir(pg_wal, pre_sync_fname, false); 96 walkdir(pg_tblspc, pre_sync_fname, true); 97 #endif 98 99 /* 100 * Now we do the fsync()s in the same order. 101 * 102 * The main call ignores symlinks, so in addition to specially processing 103 * pg_wal if it's a symlink, pg_tblspc has to be visited separately with 104 * process_symlinks = true. Note that if there are any plain directories 105 * in pg_tblspc, they'll get fsync'd twice. That's not an expected case 106 * so we don't worry about optimizing it. 107 */ 108 walkdir(pg_data, fsync_fname, false); 109 if (xlog_is_symlink) 110 walkdir(pg_wal, fsync_fname, false); 111 walkdir(pg_tblspc, fsync_fname, true); 112 } 113 114 /* 115 * Issue fsync recursively on the given directory and all its contents. 116 * 117 * This is a convenient wrapper on top of walkdir(). 118 */ 119 void 120 fsync_dir_recurse(const char *dir) 121 { 122 /* 123 * If possible, hint to the kernel that we're soon going to fsync the data 124 * directory and its contents. 125 */ 126 #ifdef PG_FLUSH_DATA_WORKS 127 walkdir(dir, pre_sync_fname, false); 128 #endif 129 130 walkdir(dir, fsync_fname, false); 131 } 132 133 /* 134 * walkdir: recursively walk a directory, applying the action to each 135 * regular file and directory (including the named directory itself). 136 * 137 * If process_symlinks is true, the action and recursion are also applied 138 * to regular files and directories that are pointed to by symlinks in the 139 * given directory; otherwise symlinks are ignored. Symlinks are always 140 * ignored in subdirectories, ie we intentionally don't pass down the 141 * process_symlinks flag to recursive calls. 142 * 143 * Errors are reported but not considered fatal. 144 * 145 * See also walkdir in fd.c, which is a backend version of this logic. 146 */ 147 static void 148 walkdir(const char *path, 149 int (*action) (const char *fname, bool isdir), 150 bool process_symlinks) 151 { 152 DIR *dir; 153 struct dirent *de; 154 155 dir = opendir(path); 156 if (dir == NULL) 157 { 158 pg_log_error("could not open directory \"%s\": %m", path); 159 return; 160 } 161 162 while (errno = 0, (de = readdir(dir)) != NULL) 163 { 164 char subpath[MAXPGPATH * 2]; 165 struct stat fst; 166 int sret; 167 168 if (strcmp(de->d_name, ".") == 0 || 169 strcmp(de->d_name, "..") == 0) 170 continue; 171 172 snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); 173 174 if (process_symlinks) 175 sret = stat(subpath, &fst); 176 else 177 sret = lstat(subpath, &fst); 178 179 if (sret < 0) 180 { 181 pg_log_error("could not stat file \"%s\": %m", subpath); 182 continue; 183 } 184 185 if (S_ISREG(fst.st_mode)) 186 (*action) (subpath, false); 187 else if (S_ISDIR(fst.st_mode)) 188 walkdir(subpath, action, false); 189 } 190 191 if (errno) 192 pg_log_error("could not read directory \"%s\": %m", path); 193 194 (void) closedir(dir); 195 196 /* 197 * It's important to fsync the destination directory itself as individual 198 * file fsyncs don't guarantee that the directory entry for the file is 199 * synced. Recent versions of ext4 have made the window much wider but 200 * it's been an issue for ext3 and other filesystems in the past. 201 */ 202 (*action) (path, true); 203 } 204 205 /* 206 * Hint to the OS that it should get ready to fsync() this file. 207 * 208 * Ignores errors trying to open unreadable files, and reports other errors 209 * non-fatally. 210 */ 211 #ifdef PG_FLUSH_DATA_WORKS 212 213 static int 214 pre_sync_fname(const char *fname, bool isdir) 215 { 216 int fd; 217 218 fd = open(fname, O_RDONLY | PG_BINARY, 0); 219 220 if (fd < 0) 221 { 222 if (errno == EACCES || (isdir && errno == EISDIR)) 223 return 0; 224 pg_log_error("could not open file \"%s\": %m", fname); 225 return -1; 226 } 227 228 /* 229 * We do what pg_flush_data() would do in the backend: prefer to use 230 * sync_file_range, but fall back to posix_fadvise. We ignore errors 231 * because this is only a hint. 232 */ 233 #if defined(HAVE_SYNC_FILE_RANGE) 234 (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); 235 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) 236 (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); 237 #else 238 #error PG_FLUSH_DATA_WORKS should not have been defined 239 #endif 240 241 (void) close(fd); 242 return 0; 243 } 244 245 #endif /* PG_FLUSH_DATA_WORKS */ 246 247 /* 248 * fsync_fname -- Try to fsync a file or directory 249 * 250 * Ignores errors trying to open unreadable files, or trying to fsync 251 * directories on systems where that isn't allowed/required. All other errors 252 * are fatal. 253 */ 254 int 255 fsync_fname(const char *fname, bool isdir) 256 { 257 int fd; 258 int flags; 259 int returncode; 260 261 /* 262 * Some OSs require directories to be opened read-only whereas other 263 * systems don't allow us to fsync files opened read-only; so we need both 264 * cases here. Using O_RDWR will cause us to fail to fsync files that are 265 * not writable by our userid, but we assume that's OK. 266 */ 267 flags = PG_BINARY; 268 if (!isdir) 269 flags |= O_RDWR; 270 else 271 flags |= O_RDONLY; 272 273 /* 274 * Open the file, silently ignoring errors about unreadable files (or 275 * unsupported operations, e.g. opening a directory under Windows), and 276 * logging others. 277 */ 278 fd = open(fname, flags, 0); 279 if (fd < 0) 280 { 281 if (errno == EACCES || (isdir && errno == EISDIR)) 282 return 0; 283 pg_log_error("could not open file \"%s\": %m", fname); 284 return -1; 285 } 286 287 returncode = fsync(fd); 288 289 /* 290 * Some OSes don't allow us to fsync directories at all, so we can ignore 291 * those errors. Anything else needs to be reported. 292 */ 293 if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL))) 294 { 295 pg_log_fatal("could not fsync file \"%s\": %m", fname); 296 (void) close(fd); 297 exit(EXIT_FAILURE); 298 } 299 300 (void) close(fd); 301 return 0; 302 } 303 304 /* 305 * fsync_parent_path -- fsync the parent path of a file or directory 306 * 307 * This is aimed at making file operations persistent on disk in case of 308 * an OS crash or power failure. 309 */ 310 int 311 fsync_parent_path(const char *fname) 312 { 313 char parentpath[MAXPGPATH]; 314 315 strlcpy(parentpath, fname, MAXPGPATH); 316 get_parent_directory(parentpath); 317 318 /* 319 * get_parent_directory() returns an empty string if the input argument is 320 * just a file name (see comments in path.c), so handle that as being the 321 * current directory. 322 */ 323 if (strlen(parentpath) == 0) 324 strlcpy(parentpath, ".", MAXPGPATH); 325 326 if (fsync_fname(parentpath, true) != 0) 327 return -1; 328 329 return 0; 330 } 331 332 /* 333 * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability 334 * 335 * Wrapper around rename, similar to the backend version. 336 */ 337 int 338 durable_rename(const char *oldfile, const char *newfile) 339 { 340 int fd; 341 342 /* 343 * First fsync the old and target path (if it exists), to ensure that they 344 * are properly persistent on disk. Syncing the target file is not 345 * strictly necessary, but it makes it easier to reason about crashes; 346 * because it's then guaranteed that either source or target file exists 347 * after a crash. 348 */ 349 if (fsync_fname(oldfile, false) != 0) 350 return -1; 351 352 fd = open(newfile, PG_BINARY | O_RDWR, 0); 353 if (fd < 0) 354 { 355 if (errno != ENOENT) 356 { 357 pg_log_error("could not open file \"%s\": %m", newfile); 358 return -1; 359 } 360 } 361 else 362 { 363 if (fsync(fd) != 0) 364 { 365 pg_log_fatal("could not fsync file \"%s\": %m", newfile); 366 close(fd); 367 exit(EXIT_FAILURE); 368 } 369 close(fd); 370 } 371 372 /* Time to do the real deal... */ 373 if (rename(oldfile, newfile) != 0) 374 { 375 pg_log_error("could not rename file \"%s\" to \"%s\": %m", 376 oldfile, newfile); 377 return -1; 378 } 379 380 /* 381 * To guarantee renaming the file is persistent, fsync the file with its 382 * new name, and its containing directory. 383 */ 384 if (fsync_fname(newfile, false) != 0) 385 return -1; 386 387 if (fsync_parent_path(newfile) != 0) 388 return -1; 389 390 return 0; 391 } 392