1 // Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors
2 // Licensed under the MIT License:
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21
22 #if !_WIN32
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE
26 #endif
27
28 #include "filesystem.h"
29 #include "debug.h"
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <sys/ioctl.h>
33 #include <fcntl.h>
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <sys/mman.h>
37 #include <errno.h>
38 #include <dirent.h>
39 #include <stdlib.h>
40 #include "vector.h"
41 #include "miniposix.h"
42 #include <algorithm>
43
44 #if __linux__
45 #include <syscall.h>
46 #include <linux/fs.h>
47 #include <sys/sendfile.h>
48 #endif
49
50 namespace kj {
51 namespace {
52
53 #define HIDDEN_PREFIX ".kj-tmp."
54 // Prefix for temp files which should be hidden when listing a directory.
55 //
56 // If you change this, make sure to update the unit test.
57
58 #ifdef O_CLOEXEC
59 #define MAYBE_O_CLOEXEC O_CLOEXEC
60 #else
61 #define MAYBE_O_CLOEXEC 0
62 #endif
63
64 #ifdef O_DIRECTORY
65 #define MAYBE_O_DIRECTORY O_DIRECTORY
66 #else
67 #define MAYBE_O_DIRECTORY 0
68 #endif
69
70 #if __APPLE__
71 // Mac OSX defines SEEK_HOLE, but it doesn't work. ("Inappropriate ioctl for device", it says.)
72 #undef SEEK_HOLE
73 #endif
74
75 #ifndef DTTOIF
76 #define DTTOIF(dirtype) ((dirtype) << 12)
77 #endif
78
79 #if __BIONIC__
80 // No no DTTOIF function
81 #undef DT_UNKNOWN
82 #endif
83
84 static void setCloexec(int fd) KJ_UNUSED;
setCloexec(int fd)85 static void setCloexec(int fd) {
86 // Set the O_CLOEXEC flag on the given fd.
87 //
88 // We try to avoid the need to call this by taking advantage of syscall flags that set it
89 // atomically on new file descriptors. Unfortunately some platforms do not support such syscalls.
90
91 #ifdef FIOCLEX
92 // Yay, we can set the flag in one call.
93 KJ_SYSCALL_HANDLE_ERRORS(ioctl(fd, FIOCLEX)) {
94 case EINVAL:
95 case EOPNOTSUPP:
96 break;
97 default:
98 KJ_FAIL_SYSCALL("ioctl(fd, FIOCLEX)", error) { break; }
99 break;
100 } else {
101 // success
102 return;
103 }
104 #endif
105
106 // Sadness, we must resort to read/modify/write.
107 //
108 // (On many platforms, FD_CLOEXEC is the only flag modifiable via F_SETFD and therefore we could
109 // skip the read... but it seems dangerous to assume that's true of all platforms, and anyway
110 // most platforms support FIOCLEX.)
111 int flags;
112 KJ_SYSCALL(flags = fcntl(fd, F_GETFD));
113 if (!(flags & FD_CLOEXEC)) {
114 KJ_SYSCALL(fcntl(fd, F_SETFD, flags | FD_CLOEXEC));
115 }
116 }
117
toKjDate(struct timespec tv)118 static Date toKjDate(struct timespec tv) {
119 return tv.tv_sec * SECONDS + tv.tv_nsec * NANOSECONDS + UNIX_EPOCH;
120 }
121
modeToType(mode_t mode)122 static FsNode::Type modeToType(mode_t mode) {
123 switch (mode & S_IFMT) {
124 case S_IFREG : return FsNode::Type::FILE;
125 case S_IFDIR : return FsNode::Type::DIRECTORY;
126 case S_IFLNK : return FsNode::Type::SYMLINK;
127 case S_IFBLK : return FsNode::Type::BLOCK_DEVICE;
128 case S_IFCHR : return FsNode::Type::CHARACTER_DEVICE;
129 case S_IFIFO : return FsNode::Type::NAMED_PIPE;
130 case S_IFSOCK: return FsNode::Type::SOCKET;
131 default: return FsNode::Type::OTHER;
132 }
133 }
134
statToMetadata(struct stat & stats)135 static FsNode::Metadata statToMetadata(struct stat& stats) {
136 // Probably st_ino and st_dev are usually under 32 bits, so mix by rotating st_dev left 32 bits
137 // and XOR.
138 uint64_t d = stats.st_dev;
139 uint64_t hash = ((d << 32) | (d >> 32)) ^ stats.st_ino;
140
141 return FsNode::Metadata {
142 modeToType(stats.st_mode),
143 implicitCast<uint64_t>(stats.st_size),
144 implicitCast<uint64_t>(stats.st_blocks * 512u),
145 #if __APPLE__
146 toKjDate(stats.st_mtimespec),
147 #else
148 toKjDate(stats.st_mtim),
149 #endif
150 implicitCast<uint>(stats.st_nlink),
151 hash
152 };
153 }
154
155 static bool rmrf(int fd, StringPtr path);
156
rmrfChildrenAndClose(int fd)157 static void rmrfChildrenAndClose(int fd) {
158 // Assumes fd is seeked to beginning.
159
160 DIR* dir = fdopendir(fd);
161 if (dir == nullptr) {
162 close(fd);
163 KJ_FAIL_SYSCALL("fdopendir", errno);
164 };
165 KJ_DEFER(closedir(dir));
166
167 for (;;) {
168 errno = 0;
169 struct dirent* entry = readdir(dir);
170 if (entry == nullptr) {
171 int error = errno;
172 if (error == 0) {
173 break;
174 } else {
175 KJ_FAIL_SYSCALL("readdir", error);
176 }
177 }
178
179 if (entry->d_name[0] == '.' &&
180 (entry->d_name[1] == '\0' ||
181 (entry->d_name[1] == '.' &&
182 entry->d_name[2] == '\0'))) {
183 // ignore . and ..
184 } else {
185 #ifdef DT_UNKNOWN // d_type is not available on all platforms.
186 if (entry->d_type == DT_DIR) {
187 int subdirFd;
188 KJ_SYSCALL(subdirFd = openat(
189 fd, entry->d_name, O_RDONLY | MAYBE_O_DIRECTORY | MAYBE_O_CLOEXEC));
190 rmrfChildrenAndClose(subdirFd);
191 KJ_SYSCALL(unlinkat(fd, entry->d_name, AT_REMOVEDIR));
192 } else if (entry->d_type != DT_UNKNOWN) {
193 KJ_SYSCALL(unlinkat(fd, entry->d_name, 0));
194 } else {
195 #endif
196 KJ_ASSERT(rmrf(fd, entry->d_name));
197 #ifdef DT_UNKNOWN
198 }
199 #endif
200 }
201 }
202 }
203
rmrf(int fd,StringPtr path)204 static bool rmrf(int fd, StringPtr path) {
205 struct stat stats;
206 KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, path.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) {
207 case ENOENT:
208 case ENOTDIR:
209 // Doesn't exist.
210 return false;
211 default:
212 KJ_FAIL_SYSCALL("lstat(path)", error, path) { return false; }
213 }
214
215 if (S_ISDIR(stats.st_mode)) {
216 int subdirFd;
217 KJ_SYSCALL(subdirFd = openat(
218 fd, path.cStr(), O_RDONLY | MAYBE_O_DIRECTORY | MAYBE_O_CLOEXEC)) { return false; }
219 rmrfChildrenAndClose(subdirFd);
220 KJ_SYSCALL(unlinkat(fd, path.cStr(), AT_REMOVEDIR)) { return false; }
221 } else {
222 KJ_SYSCALL(unlinkat(fd, path.cStr(), 0)) { return false; }
223 }
224
225 return true;
226 }
227
228 struct MmapRange {
229 uint64_t offset;
230 uint64_t size;
231 };
232
getMmapRange(uint64_t offset,uint64_t size)233 static MmapRange getMmapRange(uint64_t offset, uint64_t size) {
234 // Comes up with an offset and size to pass to mmap(), given an offset and size requested by
235 // the caller, and considering the fact that mappings must start at a page boundary.
236 //
237 // The offset is rounded down to the nearest page boundary, and the size is increased to
238 // compensate. Note that the endpoint of the mapping is *not* rounded up to a page boundary, as
239 // mmap() does not actually require this, and it causes trouble on some systems (notably Cygwin).
240
241 #ifndef _SC_PAGESIZE
242 #define _SC_PAGESIZE _SC_PAGE_SIZE
243 #endif
244 static const uint64_t pageSize = sysconf(_SC_PAGESIZE);
245 uint64_t pageMask = pageSize - 1;
246
247 uint64_t realOffset = offset & ~pageMask;
248
249 return { realOffset, offset + size - realOffset };
250 }
251
252 class MmapDisposer: public ArrayDisposer {
253 protected:
disposeImpl(void * firstElement,size_t elementSize,size_t elementCount,size_t capacity,void (* destroyElement)(void *)) const254 void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount,
255 size_t capacity, void (*destroyElement)(void*)) const {
256 auto range = getMmapRange(reinterpret_cast<uintptr_t>(firstElement),
257 elementSize * elementCount);
258 KJ_SYSCALL(munmap(reinterpret_cast<byte*>(range.offset), range.size)) { break; }
259 }
260 };
261
262 constexpr MmapDisposer mmapDisposer = MmapDisposer();
263
264 class DiskHandle {
265 // We need to implement each of ReadableFile, AppendableFile, File, ReadableDirectory, and
266 // Directory for disk handles. There is a lot of implementation overlap between these, especially
267 // stat(), sync(), etc. We can't have everything inherit from a common DiskFsNode that implements
268 // these because then we get diamond inheritance which means we need to make all our inheritance
269 // virtual which means downcasting requires RTTI which violates our goal of supporting compiling
270 // with no RTTI. So instead we have the DiskHandle class which implements all the methods without
271 // inheriting anything, and then we have DiskFile, DiskDirectory, etc. hold this and delegate to
272 // it. Ugly, but works.
273
274 public:
DiskHandle(AutoCloseFd && fd)275 DiskHandle(AutoCloseFd&& fd): fd(kj::mv(fd)) {}
276
277 // OsHandle ------------------------------------------------------------------
278
clone() const279 AutoCloseFd clone() const {
280 int fd2;
281 #ifdef F_DUPFD_CLOEXEC
282 KJ_SYSCALL_HANDLE_ERRORS(fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 3)) {
283 case EINVAL:
284 case EOPNOTSUPP:
285 // fall back
286 break;
287 default:
288 KJ_FAIL_SYSCALL("fnctl(fd, F_DUPFD_CLOEXEC, 3)", error) { break; }
289 break;
290 } else {
291 return AutoCloseFd(fd2);
292 }
293 #endif
294
295 KJ_SYSCALL(fd2 = ::dup(fd));
296 AutoCloseFd result(fd2);
297 setCloexec(result);
298 return result;
299 }
300
getFd() const301 int getFd() const {
302 return fd.get();
303 }
304
305 // FsNode --------------------------------------------------------------------
306
stat() const307 FsNode::Metadata stat() const {
308 struct stat stats;
309 KJ_SYSCALL(::fstat(fd, &stats));
310 return statToMetadata(stats);
311 }
312
sync() const313 void sync() const {
314 #if __APPLE__
315 // For whatever reason, fsync() on OSX only flushes kernel buffers. It does not flush hardware
316 // disk buffers. This makes it not very useful. But OSX documents fcntl F_FULLFSYNC which does
317 // the right thing. Why they don't just make fsync() do the right thing, I do not know.
318 KJ_SYSCALL(fcntl(fd, F_FULLFSYNC));
319 #else
320 KJ_SYSCALL(fsync(fd));
321 #endif
322 }
323
datasync() const324 void datasync() const {
325 // The presence of the _POSIX_SYNCHRONIZED_IO define is supposed to tell us that fdatasync()
326 // exists. But Apple defines this yet doesn't offer fdatasync(). Thanks, Apple.
327 #if _POSIX_SYNCHRONIZED_IO && !__APPLE__
328 KJ_SYSCALL(fdatasync(fd));
329 #else
330 this->sync();
331 #endif
332 }
333
334 // ReadableFile --------------------------------------------------------------
335
read(uint64_t offset,ArrayPtr<byte> buffer) const336 size_t read(uint64_t offset, ArrayPtr<byte> buffer) const {
337 // pread() probably never returns short reads unless it hits EOF. Unfortunately, though, per
338 // spec we are not allowed to assume this.
339
340 size_t total = 0;
341 while (buffer.size() > 0) {
342 ssize_t n;
343 KJ_SYSCALL(n = pread(fd, buffer.begin(), buffer.size(), offset));
344 if (n == 0) break;
345 total += n;
346 offset += n;
347 buffer = buffer.slice(n, buffer.size());
348 }
349 return total;
350 }
351
mmap(uint64_t offset,uint64_t size) const352 Array<const byte> mmap(uint64_t offset, uint64_t size) const {
353 if (size == 0) return nullptr; // zero-length mmap() returns EINVAL, so avoid it
354 auto range = getMmapRange(offset, size);
355 const void* mapping = ::mmap(NULL, range.size, PROT_READ, MAP_SHARED, fd, range.offset);
356 if (mapping == MAP_FAILED) {
357 KJ_FAIL_SYSCALL("mmap", errno);
358 }
359 return Array<const byte>(reinterpret_cast<const byte*>(mapping) + (offset - range.offset),
360 size, mmapDisposer);
361 }
362
mmapPrivate(uint64_t offset,uint64_t size) const363 Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const {
364 if (size == 0) return nullptr; // zero-length mmap() returns EINVAL, so avoid it
365 auto range = getMmapRange(offset, size);
366 void* mapping = ::mmap(NULL, range.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, range.offset);
367 if (mapping == MAP_FAILED) {
368 KJ_FAIL_SYSCALL("mmap", errno);
369 }
370 return Array<byte>(reinterpret_cast<byte*>(mapping) + (offset - range.offset),
371 size, mmapDisposer);
372 }
373
374 // File ----------------------------------------------------------------------
375
write(uint64_t offset,ArrayPtr<const byte> data) const376 void write(uint64_t offset, ArrayPtr<const byte> data) const {
377 // pwrite() probably never returns short writes unless there's no space left on disk.
378 // Unfortunately, though, per spec we are not allowed to assume this.
379
380 while (data.size() > 0) {
381 ssize_t n;
382 KJ_SYSCALL(n = pwrite(fd, data.begin(), data.size(), offset));
383 KJ_ASSERT(n > 0, "pwrite() returned zero?");
384 offset += n;
385 data = data.slice(n, data.size());
386 }
387 }
388
zero(uint64_t offset,uint64_t size) const389 void zero(uint64_t offset, uint64_t size) const {
390 #ifdef FALLOC_FL_PUNCH_HOLE
391 KJ_SYSCALL_HANDLE_ERRORS(
392 fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size)) {
393 case EOPNOTSUPP:
394 // fall back to below
395 break;
396 default:
397 KJ_FAIL_SYSCALL("fallocate(FALLOC_FL_PUNCH_HOLE)", error) { return; }
398 } else {
399 return;
400 }
401 #endif
402
403 static const byte ZEROS[4096] = { 0 };
404
405 #if __APPLE__ || __CYGWIN__ || (defined(__ANDROID__) && __ANDROID_API__ < 24)
406 // Mac & Cygwin & Android API levels 23 and lower doesn't have pwritev().
407 while (size > sizeof(ZEROS)) {
408 write(offset, ZEROS);
409 size -= sizeof(ZEROS);
410 offset += sizeof(ZEROS);
411 }
412 write(offset, kj::arrayPtr(ZEROS, size));
413 #else
414 // Use a 4k buffer of zeros amplified by iov to write zeros with as few syscalls as possible.
415 size_t count = (size + sizeof(ZEROS) - 1) / sizeof(ZEROS);
416 const size_t iovmax = miniposix::iovMax(count);
417 KJ_STACK_ARRAY(struct iovec, iov, kj::min(iovmax, count), 16, 256);
418
419 for (auto& item: iov) {
420 item.iov_base = const_cast<byte*>(ZEROS);
421 item.iov_len = sizeof(ZEROS);
422 }
423
424 while (size > 0) {
425 size_t iovCount;
426 if (size >= iov.size() * sizeof(ZEROS)) {
427 iovCount = iov.size();
428 } else {
429 iovCount = size / sizeof(ZEROS);
430 size_t rem = size % sizeof(ZEROS);
431 if (rem > 0) {
432 iov[iovCount++].iov_len = rem;
433 }
434 }
435
436 ssize_t n;
437 KJ_SYSCALL(n = pwritev(fd, iov.begin(), count, offset));
438 KJ_ASSERT(n > 0, "pwrite() returned zero?");
439
440 offset += n;
441 size -= n;
442 }
443 #endif
444 }
445
truncate(uint64_t size) const446 void truncate(uint64_t size) const {
447 KJ_SYSCALL(ftruncate(fd, size));
448 }
449
450 class WritableFileMappingImpl final: public WritableFileMapping {
451 public:
WritableFileMappingImpl(Array<byte> bytes)452 WritableFileMappingImpl(Array<byte> bytes): bytes(kj::mv(bytes)) {}
453
get() const454 ArrayPtr<byte> get() const override {
455 // const_cast OK because WritableFileMapping does indeed provide a writable view despite
456 // being const itself.
457 return arrayPtr(const_cast<byte*>(bytes.begin()), bytes.size());
458 }
459
changed(ArrayPtr<byte> slice) const460 void changed(ArrayPtr<byte> slice) const override {
461 KJ_REQUIRE(slice.begin() >= bytes.begin() && slice.end() <= bytes.end(),
462 "byte range is not part of this mapping");
463 if (slice.size() == 0) return;
464
465 // msync() requires page-alignment, apparently, so use getMmapRange() to accomplish that.
466 auto range = getMmapRange(reinterpret_cast<uintptr_t>(slice.begin()), slice.size());
467 KJ_SYSCALL(msync(reinterpret_cast<void*>(range.offset), range.size, MS_ASYNC));
468 }
469
sync(ArrayPtr<byte> slice) const470 void sync(ArrayPtr<byte> slice) const override {
471 KJ_REQUIRE(slice.begin() >= bytes.begin() && slice.end() <= bytes.end(),
472 "byte range is not part of this mapping");
473 if (slice.size() == 0) return;
474
475 // msync() requires page-alignment, apparently, so use getMmapRange() to accomplish that.
476 auto range = getMmapRange(reinterpret_cast<uintptr_t>(slice.begin()), slice.size());
477 KJ_SYSCALL(msync(reinterpret_cast<void*>(range.offset), range.size, MS_SYNC));
478 }
479
480 private:
481 Array<byte> bytes;
482 };
483
mmapWritable(uint64_t offset,uint64_t size) const484 Own<const WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) const {
485 if (size == 0) {
486 // zero-length mmap() returns EINVAL, so avoid it
487 return heap<WritableFileMappingImpl>(nullptr);
488 }
489 auto range = getMmapRange(offset, size);
490 void* mapping = ::mmap(NULL, range.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, range.offset);
491 if (mapping == MAP_FAILED) {
492 KJ_FAIL_SYSCALL("mmap", errno);
493 }
494 auto array = Array<byte>(reinterpret_cast<byte*>(mapping) + (offset - range.offset),
495 size, mmapDisposer);
496 return heap<WritableFileMappingImpl>(kj::mv(array));
497 }
498
copyChunk(uint64_t offset,int fromFd,uint64_t fromOffset,uint64_t size) const499 size_t copyChunk(uint64_t offset, int fromFd, uint64_t fromOffset, uint64_t size) const {
500 // Copies a range of bytes from `fromFd` to this file in the most efficient way possible for
501 // the OS. Only returns less than `size` if EOF. Does not account for holes.
502
503 #if __linux__
504 {
505 KJ_SYSCALL(lseek(fd, offset, SEEK_SET));
506 off_t fromPos = fromOffset;
507 off_t end = fromOffset + size;
508 while (fromPos < end) {
509 ssize_t n;
510 KJ_SYSCALL_HANDLE_ERRORS(n = sendfile(fd, fromFd, &fromPos, end - fromPos)) {
511 case EINVAL:
512 case ENOSYS:
513 goto sendfileNotAvailable;
514 default:
515 KJ_FAIL_SYSCALL("sendfile", error) { return fromPos - fromOffset; }
516 }
517 if (n == 0) break;
518 }
519 return fromPos - fromOffset;
520 }
521
522 sendfileNotAvailable:
523 #endif
524 uint64_t total = 0;
525 while (size > 0) {
526 byte buffer[4096];
527 ssize_t n;
528 KJ_SYSCALL(n = pread(fromFd, buffer, kj::min(sizeof(buffer), size), fromOffset));
529 if (n == 0) break;
530 write(offset, arrayPtr(buffer, n));
531 fromOffset += n;
532 offset += n;
533 total += n;
534 size -= n;
535 }
536 return total;
537 }
538
copy(uint64_t offset,const ReadableFile & from,uint64_t fromOffset,uint64_t size) const539 kj::Maybe<size_t> copy(uint64_t offset, const ReadableFile& from,
540 uint64_t fromOffset, uint64_t size) const {
541 KJ_IF_MAYBE(otherFd, from.getFd()) {
542 #ifdef FICLONE
543 if (offset == 0 && fromOffset == 0 && size == kj::maxValue && stat().size == 0) {
544 if (ioctl(fd, FICLONE, *otherFd) >= 0) {
545 return stat().size;
546 }
547 } else if (size > 0) { // src_length = 0 has special meaning for the syscall, so avoid.
548 struct file_clone_range range;
549 memset(&range, 0, sizeof(range));
550 range.src_fd = *otherFd;
551 range.dest_offset = offset;
552 range.src_offset = fromOffset;
553 range.src_length = size == kj::maxValue ? 0 : size;
554 if (ioctl(fd, FICLONERANGE, &range) >= 0) {
555 // TODO(someday): What does FICLONERANGE actually do if the range goes past EOF? The docs
556 // don't say. Maybe it only copies the parts that exist. Maybe it punches holes for the
557 // rest. Where does the destination file's EOF marker end up? Who knows?
558 return kj::min(from.stat().size - fromOffset, size);
559 }
560 } else {
561 // size == 0
562 return size_t(0);
563 }
564
565 // ioctl failed. Almost all failures documented for these are of the form "the operation is
566 // not supported for the filesystem(s) specified", so fall back to other approaches.
567 #endif
568
569 off_t toPos = offset;
570 off_t fromPos = fromOffset;
571 off_t end = size == kj::maxValue ? off_t(kj::maxValue) : off_t(fromOffset + size);
572
573 for (;;) {
574 // Handle data.
575 {
576 // Find out how much data there is before the next hole.
577 off_t nextHole;
578 #ifdef SEEK_HOLE
579 KJ_SYSCALL_HANDLE_ERRORS(nextHole = lseek(*otherFd, fromPos, SEEK_HOLE)) {
580 case EINVAL:
581 // SEEK_HOLE probably not supported. Assume no holes.
582 nextHole = end;
583 break;
584 case ENXIO:
585 // Past EOF. Stop here.
586 return fromPos - fromOffset;
587 default:
588 KJ_FAIL_SYSCALL("lseek(fd, pos, SEEK_HOLE)", error) { return fromPos - fromOffset; }
589 }
590 #else
591 // SEEK_HOLE not supported. Assume no holes.
592 nextHole = end;
593 #endif
594
595 // Copy the next chunk of data.
596 off_t copyTo = kj::min(end, nextHole);
597 size_t amount = copyTo - fromPos;
598 if (amount > 0) {
599 size_t n = copyChunk(toPos, *otherFd, fromPos, amount);
600 fromPos += n;
601 toPos += n;
602
603 if (n < amount) {
604 return fromPos - fromOffset;
605 }
606 }
607
608 if (fromPos == end) {
609 return fromPos - fromOffset;
610 }
611 }
612
613 #ifdef SEEK_HOLE
614 // Handle hole.
615 {
616 // Find out how much hole there is before the next data.
617 off_t nextData;
618 KJ_SYSCALL_HANDLE_ERRORS(nextData = lseek(*otherFd, fromPos, SEEK_DATA)) {
619 case EINVAL:
620 // SEEK_DATA probably not supported. But we should only have gotten here if we
621 // were expecting a hole.
622 KJ_FAIL_ASSERT("can't determine hole size; SEEK_DATA not supported");
623 break;
624 case ENXIO:
625 // No more data. Set to EOF.
626 KJ_SYSCALL(nextData = lseek(*otherFd, 0, SEEK_END));
627 if (nextData > end) {
628 end = nextData;
629 }
630 break;
631 default:
632 KJ_FAIL_SYSCALL("lseek(fd, pos, SEEK_HOLE)", error) { return fromPos - fromOffset; }
633 }
634
635 // Write zeros.
636 off_t zeroTo = kj::min(end, nextData);
637 off_t amount = zeroTo - fromPos;
638 if (amount > 0) {
639 zero(toPos, amount);
640 toPos += amount;
641 fromPos = zeroTo;
642 }
643
644 if (fromPos == end) {
645 return fromPos - fromOffset;
646 }
647 }
648 #endif
649 }
650 }
651
652 // Indicates caller should call File::copy() default implementation.
653 return nullptr;
654 }
655
656 // ReadableDirectory ---------------------------------------------------------
657
658 template <typename Func>
list(bool needTypes,Func && func) const659 auto list(bool needTypes, Func&& func) const
660 -> Array<Decay<decltype(func(instance<StringPtr>(), instance<FsNode::Type>()))>> {
661 // Seek to start of directory.
662 KJ_SYSCALL(lseek(fd, 0, SEEK_SET));
663
664 // Unfortunately, fdopendir() takes ownership of the file descriptor. Therefore we need to
665 // make a duplicate.
666 int duped;
667 KJ_SYSCALL(duped = dup(fd));
668 DIR* dir = fdopendir(duped);
669 if (dir == nullptr) {
670 close(duped);
671 KJ_FAIL_SYSCALL("fdopendir", errno);
672 }
673
674 KJ_DEFER(closedir(dir));
675 typedef Decay<decltype(func(instance<StringPtr>(), instance<FsNode::Type>()))> Entry;
676 kj::Vector<Entry> entries;
677
678 for (;;) {
679 errno = 0;
680 struct dirent* entry = readdir(dir);
681 if (entry == nullptr) {
682 int error = errno;
683 if (error == 0) {
684 break;
685 } else {
686 KJ_FAIL_SYSCALL("readdir", error);
687 }
688 }
689
690 kj::StringPtr name = entry->d_name;
691 if (name != "." && name != ".." && !name.startsWith(HIDDEN_PREFIX)) {
692 #ifdef DT_UNKNOWN // d_type is not available on all platforms.
693 if (entry->d_type != DT_UNKNOWN) {
694 entries.add(func(name, modeToType(DTTOIF(entry->d_type))));
695 } else {
696 #endif
697 if (needTypes) {
698 // Unknown type. Fall back to stat.
699 struct stat stats;
700 KJ_SYSCALL(fstatat(fd, name.cStr(), &stats, AT_SYMLINK_NOFOLLOW));
701 entries.add(func(name, modeToType(stats.st_mode)));
702 } else {
703 entries.add(func(name, FsNode::Type::OTHER));
704 }
705 #ifdef DT_UNKNOWN
706 }
707 #endif
708 }
709 }
710
711 auto result = entries.releaseAsArray();
712 std::sort(result.begin(), result.end());
713 return result;
714 }
715
listNames() const716 Array<String> listNames() const {
717 return list(false, [](StringPtr name, FsNode::Type type) { return heapString(name); });
718 }
719
listEntries() const720 Array<ReadableDirectory::Entry> listEntries() const {
721 return list(true, [](StringPtr name, FsNode::Type type) {
722 return ReadableDirectory::Entry { type, heapString(name), };
723 });
724 }
725
exists(PathPtr path) const726 bool exists(PathPtr path) const {
727 KJ_SYSCALL_HANDLE_ERRORS(faccessat(fd, path.toString().cStr(), F_OK, 0)) {
728 case ENOENT:
729 case ENOTDIR:
730 return false;
731 default:
732 KJ_FAIL_SYSCALL("faccessat(fd, path)", error, path) { return false; }
733 }
734 return true;
735 }
736
tryLstat(PathPtr path) const737 Maybe<FsNode::Metadata> tryLstat(PathPtr path) const {
738 struct stat stats;
739 KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, path.toString().cStr(), &stats, AT_SYMLINK_NOFOLLOW)) {
740 case ENOENT:
741 case ENOTDIR:
742 return nullptr;
743 default:
744 KJ_FAIL_SYSCALL("faccessat(fd, path)", error, path) { return nullptr; }
745 }
746 return statToMetadata(stats);
747 }
748
tryOpenFile(PathPtr path) const749 Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const {
750 int newFd;
751 KJ_SYSCALL_HANDLE_ERRORS(newFd = openat(
752 fd, path.toString().cStr(), O_RDONLY | MAYBE_O_CLOEXEC)) {
753 case ENOENT:
754 case ENOTDIR:
755 return nullptr;
756 default:
757 KJ_FAIL_SYSCALL("openat(fd, path, O_RDONLY)", error, path) { return nullptr; }
758 }
759
760 kj::AutoCloseFd result(newFd);
761 #ifndef O_CLOEXEC
762 setCloexec(result);
763 #endif
764
765 return newDiskReadableFile(kj::mv(result));
766 }
767
tryOpenSubdirInternal(PathPtr path) const768 Maybe<AutoCloseFd> tryOpenSubdirInternal(PathPtr path) const {
769 int newFd;
770 KJ_SYSCALL_HANDLE_ERRORS(newFd = openat(
771 fd, path.toString().cStr(), O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)) {
772 case ENOENT:
773 return nullptr;
774 case ENOTDIR:
775 // Could mean that a parent is not a directory, which we treat as "doesn't exist".
776 // Could also mean that the specified file is not a directory, which should throw.
777 // Check using exists().
778 if (!exists(path)) {
779 return nullptr;
780 }
781 // fallthrough
782 default:
783 KJ_FAIL_SYSCALL("openat(fd, path, O_DIRECTORY)", error, path) { return nullptr; }
784 }
785
786 kj::AutoCloseFd result(newFd);
787 #ifndef O_CLOEXEC
788 setCloexec(result);
789 #endif
790
791 return kj::mv(result);
792 }
793
tryOpenSubdir(PathPtr path) const794 Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const {
795 return tryOpenSubdirInternal(path).map(newDiskReadableDirectory);
796 }
797
tryReadlink(PathPtr path) const798 Maybe<String> tryReadlink(PathPtr path) const {
799 size_t trySize = 256;
800 for (;;) {
801 KJ_STACK_ARRAY(char, buf, trySize, 256, 4096);
802 ssize_t n = readlinkat(fd, path.toString().cStr(), buf.begin(), buf.size());
803 if (n < 0) {
804 int error = errno;
805 switch (error) {
806 case EINTR:
807 continue;
808 case ENOENT:
809 case ENOTDIR:
810 case EINVAL: // not a link
811 return nullptr;
812 default:
813 KJ_FAIL_SYSCALL("readlinkat(fd, path)", error, path) { return nullptr; }
814 }
815 }
816
817 if (n >= buf.size()) {
818 // Didn't give it enough space. Better retry with a bigger buffer.
819 trySize *= 2;
820 continue;
821 }
822
823 return heapString(buf.begin(), n);
824 }
825 }
826
827 // Directory -----------------------------------------------------------------
828
tryMkdir(PathPtr path,WriteMode mode,bool noThrow) const829 bool tryMkdir(PathPtr path, WriteMode mode, bool noThrow) const {
830 // Internal function to make a directory.
831
832 auto filename = path.toString();
833 mode_t acl = has(mode, WriteMode::PRIVATE) ? 0700 : 0777;
834
835 KJ_SYSCALL_HANDLE_ERRORS(mkdirat(fd, filename.cStr(), acl)) {
836 case EEXIST: {
837 // Apparently this path exists.
838 if (!has(mode, WriteMode::MODIFY)) {
839 // Require exclusive create.
840 return false;
841 }
842
843 // MODIFY is allowed, so we just need to check whether the existing entry is a directory.
844 struct stat stats;
845 KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, filename.cStr(), &stats, 0)) {
846 default:
847 // mkdir() says EEXIST but we can't stat it. Maybe it's a dangling link, or maybe
848 // we can't access it for some reason. Assume failure.
849 //
850 // TODO(someday): Maybe we should be creating the directory at the target of the
851 // link?
852 goto failed;
853 }
854 return (stats.st_mode & S_IFMT) == S_IFDIR;
855 }
856 case ENOENT:
857 if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 &&
858 tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY |
859 WriteMode::CREATE_PARENT, true)) {
860 // Retry, but make sure we don't try to create the parent again.
861 return tryMkdir(path, mode - WriteMode::CREATE_PARENT, noThrow);
862 } else {
863 goto failed;
864 }
865 default:
866 failed:
867 if (noThrow) {
868 // Caller requested no throwing.
869 return false;
870 } else {
871 KJ_FAIL_SYSCALL("mkdirat(fd, path)", error, path);
872 }
873 }
874
875 return true;
876 }
877
createNamedTemporary(PathPtr finalName,WriteMode mode,Function<int (StringPtr)> tryCreate) const878 kj::Maybe<String> createNamedTemporary(
879 PathPtr finalName, WriteMode mode, Function<int(StringPtr)> tryCreate) const {
880 // Create a temporary file which will eventually replace `finalName`.
881 //
882 // Calls `tryCreate` to actually create the temporary, passing in the desired path. tryCreate()
883 // is expected to behave like a syscall, returning a negative value and setting `errno` on
884 // error. tryCreate() MUST fail with EEXIST if the path exists -- this is not checked in
885 // advance, since it needs to be checked atomically. In the case of EEXIST, tryCreate() will
886 // be called again with a new path.
887 //
888 // Returns the temporary path that succeeded. Only returns nullptr if there was an exception
889 // but we're compiled with -fno-exceptions.
890
891 if (finalName.size() == 0) {
892 KJ_FAIL_REQUIRE("can't replace self") { break; }
893 return nullptr;
894 }
895
896 static uint counter = 0;
897 static const pid_t pid = getpid();
898 String pathPrefix;
899 if (finalName.size() > 1) {
900 pathPrefix = kj::str(finalName.parent(), '/');
901 }
902 auto path = kj::str(pathPrefix, HIDDEN_PREFIX, pid, '.', counter++, '.',
903 finalName.basename()[0], ".partial");
904
905 KJ_SYSCALL_HANDLE_ERRORS(tryCreate(path)) {
906 case EEXIST:
907 return createNamedTemporary(finalName, mode, kj::mv(tryCreate));
908 case ENOENT:
909 if (has(mode, WriteMode::CREATE_PARENT) && finalName.size() > 1 &&
910 tryMkdir(finalName.parent(), WriteMode::CREATE | WriteMode::MODIFY |
911 WriteMode::CREATE_PARENT, true)) {
912 // Retry, but make sure we don't try to create the parent again.
913 mode = mode - WriteMode::CREATE_PARENT;
914 return createNamedTemporary(finalName, mode, kj::mv(tryCreate));
915 }
916 // fallthrough
917 default:
918 KJ_FAIL_SYSCALL("create(path)", error, path) { break; }
919 return nullptr;
920 }
921
922 return kj::mv(path);
923 }
924
tryReplaceNode(PathPtr path,WriteMode mode,Function<int (StringPtr)> tryCreate) const925 bool tryReplaceNode(PathPtr path, WriteMode mode, Function<int(StringPtr)> tryCreate) const {
926 // Replaces the given path with an object created by calling tryCreate().
927 //
928 // tryCreate() must behave like a syscall which creates the node at the path passed to it,
929 // returning a negative value on error. If the path passed to tryCreate already exists, it
930 // MUST fail with EEXIST.
931 //
932 // When `mode` includes MODIFY, replaceNode() reacts to EEXIST by creating the node in a
933 // temporary location and then rename()ing it into place.
934
935 if (path.size() == 0) {
936 KJ_FAIL_REQUIRE("can't replace self") { return false; }
937 }
938
939 auto filename = path.toString();
940
941 if (has(mode, WriteMode::CREATE)) {
942 // First try just cerating the node in-place.
943 KJ_SYSCALL_HANDLE_ERRORS(tryCreate(filename)) {
944 case EEXIST:
945 // Target exists.
946 if (has(mode, WriteMode::MODIFY)) {
947 // Fall back to MODIFY path, below.
948 break;
949 } else {
950 return false;
951 }
952 case ENOENT:
953 if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 &&
954 tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY |
955 WriteMode::CREATE_PARENT, true)) {
956 // Retry, but make sure we don't try to create the parent again.
957 return tryReplaceNode(path, mode - WriteMode::CREATE_PARENT, kj::mv(tryCreate));
958 }
959 // fallthrough
960 default:
961 KJ_FAIL_SYSCALL("create(path)", error, path) { return false; }
962 } else {
963 // Success.
964 return true;
965 }
966 }
967
968 // Either we don't have CREATE mode or the target already exists. We need to perform a
969 // replacement instead.
970
971 KJ_IF_MAYBE(tempPath, createNamedTemporary(path, mode, kj::mv(tryCreate))) {
972 if (tryCommitReplacement(filename, fd, *tempPath, mode)) {
973 return true;
974 } else {
975 KJ_SYSCALL_HANDLE_ERRORS(unlinkat(fd, tempPath->cStr(), 0)) {
976 case ENOENT:
977 // meh
978 break;
979 default:
980 KJ_FAIL_SYSCALL("unlinkat(fd, tempPath, 0)", error, *tempPath);
981 }
982 return false;
983 }
984 } else {
985 // threw, but exceptions are disabled
986 return false;
987 }
988 }
989
tryOpenFileInternal(PathPtr path,WriteMode mode,bool append) const990 Maybe<AutoCloseFd> tryOpenFileInternal(PathPtr path, WriteMode mode, bool append) const {
991 uint flags = O_RDWR | MAYBE_O_CLOEXEC;
992 mode_t acl = 0666;
993 if (has(mode, WriteMode::CREATE)) {
994 flags |= O_CREAT;
995 }
996 if (!has(mode, WriteMode::MODIFY)) {
997 if (!has(mode, WriteMode::CREATE)) {
998 // Neither CREATE nor MODIFY -- impossible to satisfy preconditions.
999 return nullptr;
1000 }
1001 flags |= O_EXCL;
1002 }
1003 if (append) {
1004 flags |= O_APPEND;
1005 }
1006 if (has(mode, WriteMode::EXECUTABLE)) {
1007 acl = 0777;
1008 }
1009 if (has(mode, WriteMode::PRIVATE)) {
1010 acl &= 0700;
1011 }
1012
1013 auto filename = path.toString();
1014
1015 int newFd;
1016 KJ_SYSCALL_HANDLE_ERRORS(newFd = openat(fd, filename.cStr(), flags, acl)) {
1017 case ENOENT:
1018 if (has(mode, WriteMode::CREATE)) {
1019 // Either:
1020 // - The file is a broken symlink.
1021 // - A parent directory didn't exist.
1022 if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 &&
1023 tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY |
1024 WriteMode::CREATE_PARENT, true)) {
1025 // Retry, but make sure we don't try to create the parent again.
1026 return tryOpenFileInternal(path, mode - WriteMode::CREATE_PARENT, append);
1027 }
1028
1029 // Check for broken link.
1030 if (!has(mode, WriteMode::MODIFY) &&
1031 faccessat(fd, filename.cStr(), F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
1032 // Yep. We treat this as already-exists, which means in CREATE-only mode this is a
1033 // simple failure.
1034 return nullptr;
1035 }
1036
1037 KJ_FAIL_REQUIRE("parent is not a directory", path) { return nullptr; }
1038 } else {
1039 // MODIFY-only mode. ENOENT = doesn't exist = return null.
1040 return nullptr;
1041 }
1042 case ENOTDIR:
1043 if (!has(mode, WriteMode::CREATE)) {
1044 // MODIFY-only mode. ENOTDIR = parent not a directory = doesn't exist = return null.
1045 return nullptr;
1046 }
1047 goto failed;
1048 case EEXIST:
1049 if (!has(mode, WriteMode::MODIFY)) {
1050 // CREATE-only mode. EEXIST = already exists = return null.
1051 return nullptr;
1052 }
1053 goto failed;
1054 default:
1055 failed:
1056 KJ_FAIL_SYSCALL("openat(fd, path, O_RDWR | ...)", error, path) { return nullptr; }
1057 }
1058
1059 kj::AutoCloseFd result(newFd);
1060 #ifndef O_CLOEXEC
1061 setCloexec(result);
1062 #endif
1063
1064 return kj::mv(result);
1065 }
1066
tryCommitReplacement(StringPtr toPath,int fromDirFd,StringPtr fromPath,WriteMode mode,int * errorReason=nullptr) const1067 bool tryCommitReplacement(StringPtr toPath, int fromDirFd, StringPtr fromPath, WriteMode mode,
1068 int* errorReason = nullptr) const {
1069 if (has(mode, WriteMode::CREATE) && has(mode, WriteMode::MODIFY)) {
1070 // Always clobber. Try it.
1071 KJ_SYSCALL_HANDLE_ERRORS(renameat(fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr())) {
1072 case EISDIR:
1073 case ENOTDIR:
1074 case ENOTEMPTY:
1075 case EEXIST:
1076 // Failed because target exists and due to the various weird quirks of rename(), it
1077 // can't remove it for us. On Linux we can try an exchange instead. On others we have
1078 // to move the target out of the way.
1079 break;
1080 default:
1081 if (errorReason == nullptr) {
1082 KJ_FAIL_SYSCALL("rename(fromPath, toPath)", error, fromPath, toPath) { return false; }
1083 } else {
1084 *errorReason = error;
1085 return false;
1086 }
1087 } else {
1088 return true;
1089 }
1090 }
1091
1092 #if __linux__ && defined(RENAME_EXCHANGE)
1093 // Try to use Linux's renameat2() to atomically check preconditions and apply.
1094
1095 if (has(mode, WriteMode::MODIFY)) {
1096 // Use an exchange to implement modification.
1097 //
1098 // We reach this branch when performing a MODIFY-only, or when performing a CREATE | MODIFY
1099 // in which we determined above that there's a node of a different type blocking the
1100 // exchange.
1101
1102 KJ_SYSCALL_HANDLE_ERRORS(syscall(SYS_renameat2,
1103 fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr(), RENAME_EXCHANGE)) {
1104 case ENOSYS:
1105 break; // fall back to traditional means
1106 case ENOENT:
1107 // Presumably because the target path doesn't exist.
1108 if (has(mode, WriteMode::CREATE)) {
1109 KJ_FAIL_ASSERT("rename(tmp, path) claimed path exists but "
1110 "renameat2(fromPath, toPath, EXCAHNGE) said it doest; concurrent modification?",
1111 fromPath, toPath) { return false; }
1112 } else {
1113 // Assume target doesn't exist.
1114 return false;
1115 }
1116 default:
1117 if (errorReason == nullptr) {
1118 KJ_FAIL_SYSCALL("renameat2(fromPath, toPath, EXCHANGE)", error, fromPath, toPath) {
1119 return false;
1120 }
1121 } else {
1122 *errorReason = error;
1123 return false;
1124 }
1125 } else {
1126 // Successful swap! Delete swapped-out content.
1127 rmrf(fromDirFd, fromPath);
1128 return true;
1129 }
1130 } else if (has(mode, WriteMode::CREATE)) {
1131 KJ_SYSCALL_HANDLE_ERRORS(syscall(SYS_renameat2,
1132 fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr(), RENAME_NOREPLACE)) {
1133 case ENOSYS:
1134 break; // fall back to traditional means
1135 case EEXIST:
1136 return false;
1137 default:
1138 if (errorReason == nullptr) {
1139 KJ_FAIL_SYSCALL("renameat2(fromPath, toPath, NOREPLACE)", error, fromPath, toPath) {
1140 return false;
1141 }
1142 } else {
1143 *errorReason = error;
1144 return false;
1145 }
1146 } else {
1147 return true;
1148 }
1149 }
1150 #endif
1151
1152 // We're unable to do what we wanted atomically. :(
1153
1154 if (has(mode, WriteMode::CREATE) && has(mode, WriteMode::MODIFY)) {
1155 // We failed to atomically delete the target previously. So now we need to do two calls in
1156 // rapid succession to move the old file away then move the new one into place.
1157
1158 // Find out what kind of file exists at the target path.
1159 struct stat stats;
1160 KJ_SYSCALL(fstatat(fd, toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { return false; }
1161
1162 // Create a temporary location to move the existing object to. Note that rename() allows a
1163 // non-directory to replace a non-directory, and allows a directory to replace an empty
1164 // directory. So we have to create the right type.
1165 Path toPathParsed = Path::parse(toPath);
1166 String away;
1167 KJ_IF_MAYBE(awayPath, createNamedTemporary(toPathParsed, WriteMode::CREATE,
1168 [&](StringPtr candidatePath) {
1169 if (S_ISDIR(stats.st_mode)) {
1170 return mkdirat(fd, candidatePath.cStr(), 0700);
1171 } else {
1172 #if __APPLE__
1173 // No mknodat() on OSX, gotta open() a file, ugh.
1174 int newFd = openat(fd, candidatePath.cStr(),
1175 O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, 0700);
1176 if (newFd >= 0) close(newFd);
1177 return newFd;
1178 #else
1179 return mknodat(fd, candidatePath.cStr(), S_IFREG | 0600, dev_t());
1180 #endif
1181 }
1182 })) {
1183 away = kj::mv(*awayPath);
1184 } else {
1185 // Already threw.
1186 return false;
1187 }
1188
1189 // OK, now move the target object to replace the thing we just created.
1190 KJ_SYSCALL(renameat(fd, toPath.cStr(), fd, away.cStr())) {
1191 // Something went wrong. Remove the thing we just created.
1192 unlinkat(fd, away.cStr(), S_ISDIR(stats.st_mode) ? AT_REMOVEDIR : 0);
1193 return false;
1194 }
1195
1196 // Now move the source object to the target location.
1197 KJ_SYSCALL_HANDLE_ERRORS(renameat(fromDirFd, fromPath.cStr(), fd, toPath.cStr())) {
1198 default:
1199 // Try to put things back where they were. If this fails, though, then we have little
1200 // choice but to leave things broken.
1201 KJ_SYSCALL_HANDLE_ERRORS(renameat(fd, away.cStr(), fd, toPath.cStr())) {
1202 default: break;
1203 }
1204
1205 if (errorReason == nullptr) {
1206 KJ_FAIL_SYSCALL("rename(fromPath, toPath)", error, fromPath, toPath) {
1207 return false;
1208 }
1209 } else {
1210 *errorReason = error;
1211 return false;
1212 }
1213 }
1214
1215 // OK, success. Delete the old content.
1216 rmrf(fd, away);
1217 return true;
1218 } else {
1219 // Only one of CREATE or MODIFY is specified, so we need to verify non-atomically that the
1220 // corresponding precondition (must-not-exist or must-exist, respectively) is held.
1221 if (has(mode, WriteMode::CREATE)) {
1222 struct stat stats;
1223 KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd.get(), toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) {
1224 case ENOENT:
1225 case ENOTDIR:
1226 break; // doesn't exist; continue
1227 default:
1228 KJ_FAIL_SYSCALL("fstatat(fd, toPath)", error, toPath) { return false; }
1229 } else {
1230 return false; // already exists; fail
1231 }
1232 } else if (has(mode, WriteMode::MODIFY)) {
1233 struct stat stats;
1234 KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd.get(), toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) {
1235 case ENOENT:
1236 case ENOTDIR:
1237 return false; // doesn't exist; fail
1238 default:
1239 KJ_FAIL_SYSCALL("fstatat(fd, toPath)", error, toPath) { return false; }
1240 } else {
1241 // already exists; continue
1242 }
1243 } else {
1244 // Neither CREATE nor MODIFY.
1245 return false;
1246 }
1247
1248 // Start over in create-and-modify mode.
1249 return tryCommitReplacement(toPath, fromDirFd, fromPath,
1250 WriteMode::CREATE | WriteMode::MODIFY,
1251 errorReason);
1252 }
1253 }
1254
1255 template <typename T>
1256 class ReplacerImpl final: public Directory::Replacer<T> {
1257 public:
ReplacerImpl(Own<const T> && object,const DiskHandle & handle,String && tempPath,String && path,WriteMode mode)1258 ReplacerImpl(Own<const T>&& object, const DiskHandle& handle,
1259 String&& tempPath, String&& path, WriteMode mode)
1260 : Directory::Replacer<T>(mode),
1261 object(kj::mv(object)), handle(handle),
1262 tempPath(kj::mv(tempPath)), path(kj::mv(path)) {}
1263
~ReplacerImpl()1264 ~ReplacerImpl() noexcept(false) {
1265 if (!committed) {
1266 rmrf(handle.fd, tempPath);
1267 }
1268 }
1269
get()1270 const T& get() override {
1271 return *object;
1272 }
1273
tryCommit()1274 bool tryCommit() override {
1275 KJ_ASSERT(!committed, "already committed") { return false; }
1276 return committed = handle.tryCommitReplacement(path, handle.fd, tempPath,
1277 Directory::Replacer<T>::mode);
1278 }
1279
1280 private:
1281 Own<const T> object;
1282 const DiskHandle& handle;
1283 String tempPath;
1284 String path;
1285 bool committed = false; // true if *successfully* committed (in which case tempPath is gone)
1286 };
1287
1288 template <typename T>
1289 class BrokenReplacer final: public Directory::Replacer<T> {
1290 // For recovery path when exceptions are disabled.
1291
1292 public:
BrokenReplacer(Own<const T> inner)1293 BrokenReplacer(Own<const T> inner)
1294 : Directory::Replacer<T>(WriteMode::CREATE | WriteMode::MODIFY),
1295 inner(kj::mv(inner)) {}
1296
get()1297 const T& get() override { return *inner; }
tryCommit()1298 bool tryCommit() override { return false; }
1299
1300 private:
1301 Own<const T> inner;
1302 };
1303
tryOpenFile(PathPtr path,WriteMode mode) const1304 Maybe<Own<const File>> tryOpenFile(PathPtr path, WriteMode mode) const {
1305 return tryOpenFileInternal(path, mode, false).map(newDiskFile);
1306 }
1307
replaceFile(PathPtr path,WriteMode mode) const1308 Own<Directory::Replacer<File>> replaceFile(PathPtr path, WriteMode mode) const {
1309 mode_t acl = 0666;
1310 if (has(mode, WriteMode::EXECUTABLE)) {
1311 acl = 0777;
1312 }
1313 if (has(mode, WriteMode::PRIVATE)) {
1314 acl &= 0700;
1315 }
1316
1317 int newFd_;
1318 KJ_IF_MAYBE(temp, createNamedTemporary(path, mode,
1319 [&](StringPtr candidatePath) {
1320 return newFd_ = openat(fd, candidatePath.cStr(),
1321 O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, acl);
1322 })) {
1323 AutoCloseFd newFd(newFd_);
1324 #ifndef O_CLOEXEC
1325 setCloexec(newFd);
1326 #endif
1327 return heap<ReplacerImpl<File>>(newDiskFile(kj::mv(newFd)), *this, kj::mv(*temp),
1328 path.toString(), mode);
1329 } else {
1330 // threw, but exceptions are disabled
1331 return heap<BrokenReplacer<File>>(newInMemoryFile(nullClock()));
1332 }
1333 }
1334
createTemporary() const1335 Own<const File> createTemporary() const {
1336 int newFd_;
1337
1338 #if __linux__ && defined(O_TMPFILE)
1339 // Use syscall() to work around glibc bug with O_TMPFILE:
1340 // https://sourceware.org/bugzilla/show_bug.cgi?id=17523
1341 KJ_SYSCALL_HANDLE_ERRORS(newFd_ = syscall(
1342 SYS_openat, fd.get(), ".", O_RDWR | O_TMPFILE, 0700)) {
1343 case EOPNOTSUPP:
1344 case EINVAL:
1345 case EISDIR:
1346 // Maybe not supported by this kernel / filesystem. Fall back to below.
1347 break;
1348 default:
1349 KJ_FAIL_SYSCALL("open(O_TMPFILE)", error) { break; }
1350 break;
1351 } else {
1352 AutoCloseFd newFd(newFd_);
1353 #ifndef O_CLOEXEC
1354 setCloexec(newFd);
1355 #endif
1356 return newDiskFile(kj::mv(newFd));
1357 }
1358 #endif
1359
1360 KJ_IF_MAYBE(temp, createNamedTemporary(Path("unnamed"), WriteMode::CREATE,
1361 [&](StringPtr path) {
1362 return newFd_ = openat(fd, path.cStr(), O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, 0600);
1363 })) {
1364 AutoCloseFd newFd(newFd_);
1365 #ifndef O_CLOEXEC
1366 setCloexec(newFd);
1367 #endif
1368 auto result = newDiskFile(kj::mv(newFd));
1369 KJ_SYSCALL(unlinkat(fd, temp->cStr(), 0)) { break; }
1370 return kj::mv(result);
1371 } else {
1372 // threw, but exceptions are disabled
1373 return newInMemoryFile(nullClock());
1374 }
1375 }
1376
tryAppendFile(PathPtr path,WriteMode mode) const1377 Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) const {
1378 return tryOpenFileInternal(path, mode, true).map(newDiskAppendableFile);
1379 }
1380
tryOpenSubdir(PathPtr path,WriteMode mode) const1381 Maybe<Own<const Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) const {
1382 // Must create before open.
1383 if (has(mode, WriteMode::CREATE)) {
1384 if (!tryMkdir(path, mode, false)) return nullptr;
1385 }
1386
1387 return tryOpenSubdirInternal(path).map(newDiskDirectory);
1388 }
1389
replaceSubdir(PathPtr path,WriteMode mode) const1390 Own<Directory::Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) const {
1391 mode_t acl = has(mode, WriteMode::PRIVATE) ? 0700 : 0777;
1392
1393 KJ_IF_MAYBE(temp, createNamedTemporary(path, mode,
1394 [&](StringPtr candidatePath) {
1395 return mkdirat(fd, candidatePath.cStr(), acl);
1396 })) {
1397 int subdirFd_;
1398 KJ_SYSCALL_HANDLE_ERRORS(subdirFd_ = openat(
1399 fd, temp->cStr(), O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)) {
1400 default:
1401 KJ_FAIL_SYSCALL("open(just-created-temporary)", error);
1402 return heap<BrokenReplacer<Directory>>(newInMemoryDirectory(nullClock()));
1403 }
1404
1405 AutoCloseFd subdirFd(subdirFd_);
1406 #ifndef O_CLOEXEC
1407 setCloexec(subdirFd);
1408 #endif
1409 return heap<ReplacerImpl<Directory>>(
1410 newDiskDirectory(kj::mv(subdirFd)), *this, kj::mv(*temp), path.toString(), mode);
1411 } else {
1412 // threw, but exceptions are disabled
1413 return heap<BrokenReplacer<Directory>>(newInMemoryDirectory(nullClock()));
1414 }
1415 }
1416
trySymlink(PathPtr linkpath,StringPtr content,WriteMode mode) const1417 bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) const {
1418 return tryReplaceNode(linkpath, mode, [&](StringPtr candidatePath) {
1419 return symlinkat(content.cStr(), fd, candidatePath.cStr());
1420 });
1421 }
1422
tryTransfer(PathPtr toPath,WriteMode toMode,const Directory & fromDirectory,PathPtr fromPath,TransferMode mode,const Directory & self) const1423 bool tryTransfer(PathPtr toPath, WriteMode toMode,
1424 const Directory& fromDirectory, PathPtr fromPath,
1425 TransferMode mode, const Directory& self) const {
1426 KJ_REQUIRE(toPath.size() > 0, "can't replace self") { return false; }
1427
1428 if (mode == TransferMode::LINK) {
1429 KJ_IF_MAYBE(fromFd, fromDirectory.getFd()) {
1430 // Other is a disk directory, so we can hopefully do an efficient move/link.
1431 return tryReplaceNode(toPath, toMode, [&](StringPtr candidatePath) {
1432 return linkat(*fromFd, fromPath.toString().cStr(), fd, candidatePath.cStr(), 0);
1433 });
1434 };
1435 } else if (mode == TransferMode::MOVE) {
1436 KJ_IF_MAYBE(fromFd, fromDirectory.getFd()) {
1437 KJ_ASSERT(mode == TransferMode::MOVE);
1438
1439 int error = 0;
1440 if (tryCommitReplacement(toPath.toString(), *fromFd, fromPath.toString(), toMode,
1441 &error)) {
1442 return true;
1443 } else switch (error) {
1444 case 0:
1445 // Plain old WriteMode precondition failure.
1446 return false;
1447 case EXDEV:
1448 // Can't move between devices. Fall back to default implementation, which does
1449 // copy/delete.
1450 break;
1451 case ENOENT:
1452 // Either the destination directory doesn't exist or the source path doesn't exist.
1453 // Unfortunately we don't really know. If CREATE_PARENT was provided, try creating
1454 // the parent directory. Otherwise, we don't actually need to distinguish between
1455 // these two errors; just return false.
1456 if (has(toMode, WriteMode::CREATE) && has(toMode, WriteMode::CREATE_PARENT) &&
1457 toPath.size() > 0 && tryMkdir(toPath.parent(),
1458 WriteMode::CREATE | WriteMode::MODIFY | WriteMode::CREATE_PARENT, true)) {
1459 // Retry, but make sure we don't try to create the parent again.
1460 return tryTransfer(toPath, toMode - WriteMode::CREATE_PARENT,
1461 fromDirectory, fromPath, mode, self);
1462 }
1463 return false;
1464 default:
1465 KJ_FAIL_SYSCALL("rename(fromPath, toPath)", error, fromPath, toPath) {
1466 return false;
1467 }
1468 }
1469 }
1470 }
1471
1472 // OK, we can't do anything efficient using the OS. Fall back to default implementation.
1473 return self.Directory::tryTransfer(toPath, toMode, fromDirectory, fromPath, mode);
1474 }
1475
tryRemove(PathPtr path) const1476 bool tryRemove(PathPtr path) const {
1477 return rmrf(fd, path.toString());
1478 }
1479
1480 protected:
1481 AutoCloseFd fd;
1482 };
1483
1484 #define FSNODE_METHODS(classname) \
1485 Maybe<int> getFd() const override { return DiskHandle::getFd(); } \
1486 \
1487 Own<const FsNode> cloneFsNode() const override { \
1488 return heap<classname>(DiskHandle::clone()); \
1489 } \
1490 \
1491 Metadata stat() const override { return DiskHandle::stat(); } \
1492 void sync() const override { DiskHandle::sync(); } \
1493 void datasync() const override { DiskHandle::datasync(); }
1494
1495 class DiskReadableFile final: public ReadableFile, public DiskHandle {
1496 public:
DiskReadableFile(AutoCloseFd && fd)1497 DiskReadableFile(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {}
1498
1499 FSNODE_METHODS(DiskReadableFile);
1500
read(uint64_t offset,ArrayPtr<byte> buffer) const1501 size_t read(uint64_t offset, ArrayPtr<byte> buffer) const override {
1502 return DiskHandle::read(offset, buffer);
1503 }
mmap(uint64_t offset,uint64_t size) const1504 Array<const byte> mmap(uint64_t offset, uint64_t size) const override {
1505 return DiskHandle::mmap(offset, size);
1506 }
mmapPrivate(uint64_t offset,uint64_t size) const1507 Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const override {
1508 return DiskHandle::mmapPrivate(offset, size);
1509 }
1510 };
1511
1512 class DiskAppendableFile final: public AppendableFile, public DiskHandle, public FdOutputStream {
1513 public:
DiskAppendableFile(AutoCloseFd && fd)1514 DiskAppendableFile(AutoCloseFd&& fd)
1515 : DiskHandle(kj::mv(fd)),
1516 FdOutputStream(DiskHandle::fd.get()) {}
1517
1518 FSNODE_METHODS(DiskAppendableFile);
1519
write(const void * buffer,size_t size)1520 void write(const void* buffer, size_t size) override {
1521 FdOutputStream::write(buffer, size);
1522 }
write(ArrayPtr<const ArrayPtr<const byte>> pieces)1523 void write(ArrayPtr<const ArrayPtr<const byte>> pieces) override {
1524 FdOutputStream::write(pieces);
1525 }
1526 };
1527
1528 class DiskFile final: public File, public DiskHandle {
1529 public:
DiskFile(AutoCloseFd && fd)1530 DiskFile(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {}
1531
1532 FSNODE_METHODS(DiskFile);
1533
read(uint64_t offset,ArrayPtr<byte> buffer) const1534 size_t read(uint64_t offset, ArrayPtr<byte> buffer) const override {
1535 return DiskHandle::read(offset, buffer);
1536 }
mmap(uint64_t offset,uint64_t size) const1537 Array<const byte> mmap(uint64_t offset, uint64_t size) const override {
1538 return DiskHandle::mmap(offset, size);
1539 }
mmapPrivate(uint64_t offset,uint64_t size) const1540 Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const override {
1541 return DiskHandle::mmapPrivate(offset, size);
1542 }
1543
write(uint64_t offset,ArrayPtr<const byte> data) const1544 void write(uint64_t offset, ArrayPtr<const byte> data) const override {
1545 DiskHandle::write(offset, data);
1546 }
zero(uint64_t offset,uint64_t size) const1547 void zero(uint64_t offset, uint64_t size) const override {
1548 DiskHandle::zero(offset, size);
1549 }
truncate(uint64_t size) const1550 void truncate(uint64_t size) const override {
1551 DiskHandle::truncate(size);
1552 }
mmapWritable(uint64_t offset,uint64_t size) const1553 Own<const WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) const override {
1554 return DiskHandle::mmapWritable(offset, size);
1555 }
copy(uint64_t offset,const ReadableFile & from,uint64_t fromOffset,uint64_t size) const1556 size_t copy(uint64_t offset, const ReadableFile& from,
1557 uint64_t fromOffset, uint64_t size) const override {
1558 KJ_IF_MAYBE(result, DiskHandle::copy(offset, from, fromOffset, size)) {
1559 return *result;
1560 } else {
1561 return File::copy(offset, from, fromOffset, size);
1562 }
1563 }
1564 };
1565
1566 class DiskReadableDirectory final: public ReadableDirectory, public DiskHandle {
1567 public:
DiskReadableDirectory(AutoCloseFd && fd)1568 DiskReadableDirectory(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {}
1569
1570 FSNODE_METHODS(DiskReadableDirectory);
1571
listNames() const1572 Array<String> listNames() const override { return DiskHandle::listNames(); }
listEntries() const1573 Array<Entry> listEntries() const override { return DiskHandle::listEntries(); }
exists(PathPtr path) const1574 bool exists(PathPtr path) const override { return DiskHandle::exists(path); }
tryLstat(PathPtr path) const1575 Maybe<FsNode::Metadata> tryLstat(PathPtr path) const override {
1576 return DiskHandle::tryLstat(path);
1577 }
tryOpenFile(PathPtr path) const1578 Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const override {
1579 return DiskHandle::tryOpenFile(path);
1580 }
tryOpenSubdir(PathPtr path) const1581 Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const override {
1582 return DiskHandle::tryOpenSubdir(path);
1583 }
tryReadlink(PathPtr path) const1584 Maybe<String> tryReadlink(PathPtr path) const override { return DiskHandle::tryReadlink(path); }
1585 };
1586
1587 class DiskDirectory final: public Directory, public DiskHandle {
1588 public:
DiskDirectory(AutoCloseFd && fd)1589 DiskDirectory(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {}
1590
1591 FSNODE_METHODS(DiskDirectory);
1592
listNames() const1593 Array<String> listNames() const override { return DiskHandle::listNames(); }
listEntries() const1594 Array<Entry> listEntries() const override { return DiskHandle::listEntries(); }
exists(PathPtr path) const1595 bool exists(PathPtr path) const override { return DiskHandle::exists(path); }
tryLstat(PathPtr path) const1596 Maybe<FsNode::Metadata> tryLstat(PathPtr path) const override {
1597 return DiskHandle::tryLstat(path);
1598 }
tryOpenFile(PathPtr path) const1599 Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const override {
1600 return DiskHandle::tryOpenFile(path);
1601 }
tryOpenSubdir(PathPtr path) const1602 Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const override {
1603 return DiskHandle::tryOpenSubdir(path);
1604 }
tryReadlink(PathPtr path) const1605 Maybe<String> tryReadlink(PathPtr path) const override { return DiskHandle::tryReadlink(path); }
1606
tryOpenFile(PathPtr path,WriteMode mode) const1607 Maybe<Own<const File>> tryOpenFile(PathPtr path, WriteMode mode) const override {
1608 return DiskHandle::tryOpenFile(path, mode);
1609 }
replaceFile(PathPtr path,WriteMode mode) const1610 Own<Replacer<File>> replaceFile(PathPtr path, WriteMode mode) const override {
1611 return DiskHandle::replaceFile(path, mode);
1612 }
createTemporary() const1613 Own<const File> createTemporary() const override {
1614 return DiskHandle::createTemporary();
1615 }
tryAppendFile(PathPtr path,WriteMode mode) const1616 Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) const override {
1617 return DiskHandle::tryAppendFile(path, mode);
1618 }
tryOpenSubdir(PathPtr path,WriteMode mode) const1619 Maybe<Own<const Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) const override {
1620 return DiskHandle::tryOpenSubdir(path, mode);
1621 }
replaceSubdir(PathPtr path,WriteMode mode) const1622 Own<Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) const override {
1623 return DiskHandle::replaceSubdir(path, mode);
1624 }
trySymlink(PathPtr linkpath,StringPtr content,WriteMode mode) const1625 bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) const override {
1626 return DiskHandle::trySymlink(linkpath, content, mode);
1627 }
tryTransfer(PathPtr toPath,WriteMode toMode,const Directory & fromDirectory,PathPtr fromPath,TransferMode mode) const1628 bool tryTransfer(PathPtr toPath, WriteMode toMode,
1629 const Directory& fromDirectory, PathPtr fromPath,
1630 TransferMode mode) const override {
1631 return DiskHandle::tryTransfer(toPath, toMode, fromDirectory, fromPath, mode, *this);
1632 }
1633 // tryTransferTo() not implemented because we have nothing special we can do.
tryRemove(PathPtr path) const1634 bool tryRemove(PathPtr path) const override {
1635 return DiskHandle::tryRemove(path);
1636 }
1637 };
1638
1639 class DiskFilesystem final: public Filesystem {
1640 public:
DiskFilesystem()1641 DiskFilesystem()
1642 : root(openDir("/")),
1643 current(openDir(".")),
1644 currentPath(computeCurrentPath()) {}
1645
getRoot() const1646 const Directory& getRoot() const override {
1647 return root;
1648 }
1649
getCurrent() const1650 const Directory& getCurrent() const override {
1651 return current;
1652 }
1653
getCurrentPath() const1654 PathPtr getCurrentPath() const override {
1655 return currentPath;
1656 }
1657
1658 private:
1659 DiskDirectory root;
1660 DiskDirectory current;
1661 Path currentPath;
1662
openDir(const char * dir)1663 static AutoCloseFd openDir(const char* dir) {
1664 int newFd;
1665 KJ_SYSCALL(newFd = open(dir, O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY));
1666 AutoCloseFd result(newFd);
1667 #ifndef O_CLOEXEC
1668 setCloexec(result);
1669 #endif
1670 return result;
1671 }
1672
computeCurrentPath()1673 static Path computeCurrentPath() {
1674 // If env var PWD is set and points to the current directory, use it. This captures the current
1675 // path according to the user's shell, which may differ from the kernel's idea in the presence
1676 // of symlinks.
1677 const char* pwd = getenv("PWD");
1678 if (pwd != nullptr) {
1679 Path result = nullptr;
1680 struct stat pwdStat, dotStat;
1681 KJ_IF_MAYBE(e, kj::runCatchingExceptions([&]() {
1682 KJ_ASSERT(pwd[0] == '/') { return; }
1683 result = Path::parse(pwd + 1);
1684 KJ_SYSCALL(lstat(result.toString(true).cStr(), &pwdStat), result) { return; }
1685 KJ_SYSCALL(lstat(".", &dotStat)) { return; }
1686 })) {
1687 // failed, give up on PWD
1688 KJ_LOG(WARNING, "PWD environment variable seems invalid", pwd, *e);
1689 } else {
1690 if (pwdStat.st_ino == dotStat.st_ino &&
1691 pwdStat.st_dev == dotStat.st_dev) {
1692 return kj::mv(result);
1693 } else {
1694 KJ_LOG(WARNING, "PWD environment variable doesn't match current directory", pwd);
1695 }
1696 }
1697 }
1698
1699 size_t size = 256;
1700 retry:
1701 KJ_STACK_ARRAY(char, buf, size, 256, 4096);
1702 if (getcwd(buf.begin(), size) == nullptr) {
1703 int error = errno;
1704 if (error == ENAMETOOLONG) {
1705 size *= 2;
1706 goto retry;
1707 } else {
1708 KJ_FAIL_SYSCALL("getcwd()", error);
1709 }
1710 }
1711
1712 StringPtr path = buf.begin();
1713
1714 // On Linux, the path will start with "(unreachable)" if the working directory is not a subdir
1715 // of the root directory, which is possible via chroot() or mount namespaces.
1716 KJ_ASSERT(!path.startsWith("(unreachable)"),
1717 "working directory is not reachable from root", path);
1718 KJ_ASSERT(path.startsWith("/"), "current directory is not absolute", path);
1719
1720 return Path::parse(path.slice(1));
1721 }
1722 };
1723
1724 } // namespace
1725
newDiskReadableFile(kj::AutoCloseFd fd)1726 Own<ReadableFile> newDiskReadableFile(kj::AutoCloseFd fd) {
1727 return heap<DiskReadableFile>(kj::mv(fd));
1728 }
newDiskAppendableFile(kj::AutoCloseFd fd)1729 Own<AppendableFile> newDiskAppendableFile(kj::AutoCloseFd fd) {
1730 return heap<DiskAppendableFile>(kj::mv(fd));
1731 }
newDiskFile(kj::AutoCloseFd fd)1732 Own<File> newDiskFile(kj::AutoCloseFd fd) {
1733 return heap<DiskFile>(kj::mv(fd));
1734 }
newDiskReadableDirectory(kj::AutoCloseFd fd)1735 Own<ReadableDirectory> newDiskReadableDirectory(kj::AutoCloseFd fd) {
1736 return heap<DiskReadableDirectory>(kj::mv(fd));
1737 }
newDiskDirectory(kj::AutoCloseFd fd)1738 Own<Directory> newDiskDirectory(kj::AutoCloseFd fd) {
1739 return heap<DiskDirectory>(kj::mv(fd));
1740 }
1741
newDiskFilesystem()1742 Own<Filesystem> newDiskFilesystem() {
1743 return heap<DiskFilesystem>();
1744 }
1745
1746 } // namespace kj
1747
1748 #endif // !_WIN32
1749