1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Ensure 64-bit off_t for platforms where it matters
19 #ifdef _FILE_OFFSET_BITS
20 #undef _FILE_OFFSET_BITS
21 #endif
22 
23 #define _FILE_OFFSET_BITS 64
24 
25 #if defined(sun) || defined(__sun)
26 // According to https://bugs.python.org/issue1759169#msg82201, __EXTENSIONS__
27 // is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
28 // (see also
29 // https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h)
30 #undef __EXTENSIONS__
31 #define __EXTENSIONS__
32 #endif
33 
34 #include "arrow/util/windows_compatibility.h"  // IWYU pragma: keep
35 
36 #include <algorithm>
37 #include <cerrno>
38 #include <cstdint>
39 #include <cstring>
40 #include <iostream>
41 #include <random>
42 #include <sstream>
43 #include <string>
44 #include <thread>
45 #include <utility>
46 #include <vector>
47 
48 #include <fcntl.h>
49 #include <signal.h>
50 #include <stdlib.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>  // IWYU pragma: keep
53 
54 // ----------------------------------------------------------------------
55 // file compatibility stuff
56 
57 #ifdef _WIN32
58 #include <io.h>
59 #include <share.h>
60 #else  // POSIX-like platforms
61 #include <dirent.h>
62 #endif
63 
64 #ifdef _WIN32
65 #include "arrow/io/mman.h"
66 #undef Realloc
67 #undef Free
68 #else  // POSIX-like platforms
69 #include <sys/mman.h>
70 #include <unistd.h>
71 #endif
72 
73 // define max read/write count
74 #ifdef _WIN32
75 #define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
76 #else
77 
78 #ifdef __APPLE__
79 // due to macOS bug, we need to set read/write max
80 #define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
81 #else
82 // see notes on Linux read/write manpage
83 #define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
84 #endif
85 
86 #endif
87 
88 #include "arrow/buffer.h"
89 #include "arrow/result.h"
90 #include "arrow/util/checked_cast.h"
91 #include "arrow/util/io_util.h"
92 #include "arrow/util/logging.h"
93 
94 // For filename conversion
95 #if defined(_WIN32)
96 #include "arrow/util/utf8.h"
97 #endif
98 
99 namespace arrow {
100 
101 using internal::checked_cast;
102 
103 namespace internal {
104 
105 namespace {
106 
107 template <typename CharT>
ReplaceChars(std::basic_string<CharT> s,CharT find,CharT rep)108 std::basic_string<CharT> ReplaceChars(std::basic_string<CharT> s, CharT find, CharT rep) {
109   if (find != rep) {
110     for (size_t i = 0; i < s.length(); ++i) {
111       if (s[i] == find) {
112         s[i] = rep;
113       }
114     }
115   }
116   return s;
117 }
118 
StringToNative(const std::string & s)119 Result<NativePathString> StringToNative(const std::string& s) {
120 #if _WIN32
121   return ::arrow::util::UTF8ToWideString(s);
122 #else
123   return s;
124 #endif
125 }
126 
127 #if _WIN32
NativeToString(const NativePathString & ws)128 Result<std::string> NativeToString(const NativePathString& ws) {
129   return ::arrow::util::WideStringToUTF8(ws);
130 }
131 #endif
132 
133 #if _WIN32
134 const wchar_t kNativeSep = L'\\';
135 const wchar_t kGenericSep = L'/';
136 const wchar_t* kAllSeps = L"\\/";
137 #else
138 const char kNativeSep = '/';
139 const char kGenericSep = '/';
140 const char* kAllSeps = "/";
141 #endif
142 
NativeSlashes(NativePathString s)143 NativePathString NativeSlashes(NativePathString s) {
144   return ReplaceChars(std::move(s), kGenericSep, kNativeSep);
145 }
146 
GenericSlashes(NativePathString s)147 NativePathString GenericSlashes(NativePathString s) {
148   return ReplaceChars(std::move(s), kNativeSep, kGenericSep);
149 }
150 
NativeParent(const NativePathString & s)151 NativePathString NativeParent(const NativePathString& s) {
152   auto last_sep = s.find_last_of(kAllSeps);
153   if (last_sep == s.length() - 1) {
154     // Last separator is a trailing separator, skip all trailing separators
155     // and try again
156     auto before_last_seps = s.find_last_not_of(kAllSeps);
157     if (before_last_seps == NativePathString::npos) {
158       // Only separators in path
159       return s;
160     }
161     last_sep = s.find_last_of(kAllSeps, before_last_seps);
162   }
163   if (last_sep == NativePathString::npos) {
164     // No (other) separator in path
165     return s;
166   }
167   // There may be multiple contiguous separators, skip all of them
168   auto before_last_seps = s.find_last_not_of(kAllSeps, last_sep);
169   if (before_last_seps == NativePathString::npos) {
170     // All separators are at start of string, keep them all
171     return s.substr(0, last_sep + 1);
172   } else {
173     return s.substr(0, before_last_seps + 1);
174   }
175 }
176 
ValidatePath(const std::string & s)177 Status ValidatePath(const std::string& s) {
178   if (s.find_first_of('\0') != std::string::npos) {
179     return Status::Invalid("Embedded NUL char in path: '", s, "'");
180   }
181   return Status::OK();
182 }
183 
184 }  // namespace
185 
ErrnoMessage(int errnum)186 std::string ErrnoMessage(int errnum) { return std::strerror(errnum); }
187 
188 #if _WIN32
WinErrorMessage(int errnum)189 std::string WinErrorMessage(int errnum) {
190   char buf[1024];
191   auto nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
192                                NULL, errnum, 0, buf, sizeof(buf), NULL);
193   if (nchars == 0) {
194     // Fallback
195     std::stringstream ss;
196     ss << "Windows error #" << errnum;
197     return ss.str();
198   }
199   return std::string(buf, nchars);
200 }
201 #endif
202 
203 namespace {
204 
205 const char kErrnoDetailTypeId[] = "arrow::ErrnoDetail";
206 
207 class ErrnoDetail : public StatusDetail {
208  public:
ErrnoDetail(int errnum)209   explicit ErrnoDetail(int errnum) : errnum_(errnum) {}
210 
type_id() const211   const char* type_id() const override { return kErrnoDetailTypeId; }
212 
ToString() const213   std::string ToString() const override {
214     std::stringstream ss;
215     ss << "[errno " << errnum_ << "] " << ErrnoMessage(errnum_);
216     return ss.str();
217   }
218 
errnum() const219   int errnum() const { return errnum_; }
220 
221  protected:
222   int errnum_;
223 };
224 
225 #if _WIN32
226 const char kWinErrorDetailTypeId[] = "arrow::WinErrorDetail";
227 
228 class WinErrorDetail : public StatusDetail {
229  public:
WinErrorDetail(int errnum)230   explicit WinErrorDetail(int errnum) : errnum_(errnum) {}
231 
type_id() const232   const char* type_id() const override { return kWinErrorDetailTypeId; }
233 
ToString() const234   std::string ToString() const override {
235     std::stringstream ss;
236     ss << "[Windows error " << errnum_ << "] " << WinErrorMessage(errnum_);
237     return ss.str();
238   }
239 
errnum() const240   int errnum() const { return errnum_; }
241 
242  protected:
243   int errnum_;
244 };
245 #endif
246 
247 const char kSignalDetailTypeId[] = "arrow::SignalDetail";
248 
249 class SignalDetail : public StatusDetail {
250  public:
SignalDetail(int signum)251   explicit SignalDetail(int signum) : signum_(signum) {}
252 
type_id() const253   const char* type_id() const override { return kSignalDetailTypeId; }
254 
ToString() const255   std::string ToString() const override {
256     std::stringstream ss;
257     ss << "received signal " << signum_;
258     return ss.str();
259   }
260 
signum() const261   int signum() const { return signum_; }
262 
263  protected:
264   int signum_;
265 };
266 
267 }  // namespace
268 
StatusDetailFromErrno(int errnum)269 std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum) {
270   return std::make_shared<ErrnoDetail>(errnum);
271 }
272 
273 #if _WIN32
StatusDetailFromWinError(int errnum)274 std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum) {
275   return std::make_shared<WinErrorDetail>(errnum);
276 }
277 #endif
278 
StatusDetailFromSignal(int signum)279 std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum) {
280   return std::make_shared<SignalDetail>(signum);
281 }
282 
ErrnoFromStatus(const Status & status)283 int ErrnoFromStatus(const Status& status) {
284   const auto detail = status.detail();
285   if (detail != nullptr && detail->type_id() == kErrnoDetailTypeId) {
286     return checked_cast<const ErrnoDetail&>(*detail).errnum();
287   }
288   return 0;
289 }
290 
WinErrorFromStatus(const Status & status)291 int WinErrorFromStatus(const Status& status) {
292 #if _WIN32
293   const auto detail = status.detail();
294   if (detail != nullptr && detail->type_id() == kWinErrorDetailTypeId) {
295     return checked_cast<const WinErrorDetail&>(*detail).errnum();
296   }
297 #endif
298   return 0;
299 }
300 
SignalFromStatus(const Status & status)301 int SignalFromStatus(const Status& status) {
302   const auto detail = status.detail();
303   if (detail != nullptr && detail->type_id() == kSignalDetailTypeId) {
304     return checked_cast<const SignalDetail&>(*detail).signum();
305   }
306   return 0;
307 }
308 
309 //
310 // PlatformFilename implementation
311 //
312 
313 struct PlatformFilename::Impl {
314   Impl() = default;
Implarrow::internal::PlatformFilename::Impl315   explicit Impl(NativePathString p) : native_(NativeSlashes(std::move(p))) {}
316 
317   NativePathString native_;
318 
319   // '/'-separated
genericarrow::internal::PlatformFilename::Impl320   NativePathString generic() const { return GenericSlashes(native_); }
321 };
322 
PlatformFilename()323 PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
324 
~PlatformFilename()325 PlatformFilename::~PlatformFilename() {}
326 
PlatformFilename(Impl impl)327 PlatformFilename::PlatformFilename(Impl impl) : impl_(new Impl(std::move(impl))) {}
328 
PlatformFilename(const PlatformFilename & other)329 PlatformFilename::PlatformFilename(const PlatformFilename& other)
330     : PlatformFilename(Impl{other.impl_->native_}) {}
331 
PlatformFilename(PlatformFilename && other)332 PlatformFilename::PlatformFilename(PlatformFilename&& other)
333     : impl_(std::move(other.impl_)) {}
334 
operator =(const PlatformFilename & other)335 PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
336   this->impl_.reset(new Impl{other.impl_->native_});
337   return *this;
338 }
339 
operator =(PlatformFilename && other)340 PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
341   this->impl_ = std::move(other.impl_);
342   return *this;
343 }
344 
PlatformFilename(const NativePathString & path)345 PlatformFilename::PlatformFilename(const NativePathString& path)
346     : PlatformFilename(Impl{path}) {}
347 
PlatformFilename(const NativePathString::value_type * path)348 PlatformFilename::PlatformFilename(const NativePathString::value_type* path)
349     : PlatformFilename(NativePathString(path)) {}
350 
operator ==(const PlatformFilename & other) const351 bool PlatformFilename::operator==(const PlatformFilename& other) const {
352   return impl_->native_ == other.impl_->native_;
353 }
354 
operator !=(const PlatformFilename & other) const355 bool PlatformFilename::operator!=(const PlatformFilename& other) const {
356   return impl_->native_ != other.impl_->native_;
357 }
358 
ToNative() const359 const NativePathString& PlatformFilename::ToNative() const { return impl_->native_; }
360 
ToString() const361 std::string PlatformFilename::ToString() const {
362 #if _WIN32
363   auto result = NativeToString(impl_->generic());
364   if (!result.ok()) {
365     std::stringstream ss;
366     ss << "<Unrepresentable filename: " << result.status().ToString() << ">";
367     return ss.str();
368   }
369   return *std::move(result);
370 #else
371   return impl_->generic();
372 #endif
373 }
374 
Parent() const375 PlatformFilename PlatformFilename::Parent() const {
376   return PlatformFilename(NativeParent(ToNative()));
377 }
378 
FromString(const std::string & file_name)379 Result<PlatformFilename> PlatformFilename::FromString(const std::string& file_name) {
380   RETURN_NOT_OK(ValidatePath(file_name));
381   ARROW_ASSIGN_OR_RAISE(auto ns, StringToNative(file_name));
382   return PlatformFilename(std::move(ns));
383 }
384 
Join(const PlatformFilename & child) const385 PlatformFilename PlatformFilename::Join(const PlatformFilename& child) const {
386   if (impl_->native_.empty() || impl_->native_.back() == kNativeSep) {
387     return PlatformFilename(Impl{impl_->native_ + child.impl_->native_});
388   } else {
389     return PlatformFilename(Impl{impl_->native_ + kNativeSep + child.impl_->native_});
390   }
391 }
392 
Join(const std::string & child_name) const393 Result<PlatformFilename> PlatformFilename::Join(const std::string& child_name) const {
394   ARROW_ASSIGN_OR_RAISE(auto child, PlatformFilename::FromString(child_name));
395   return Join(child);
396 }
397 
398 //
399 // Filesystem access routines
400 //
401 
402 namespace {
403 
DoCreateDir(const PlatformFilename & dir_path,bool create_parents)404 Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
405 #ifdef _WIN32
406   const auto s = dir_path.ToNative().c_str();
407   if (CreateDirectoryW(s, nullptr)) {
408     return true;
409   }
410   int errnum = GetLastError();
411   if (errnum == ERROR_ALREADY_EXISTS) {
412     const auto attrs = GetFileAttributesW(s);
413     if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
414       // Note we propagate the original error, not the GetFileAttributesW() error
415       return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '",
416                                  dir_path.ToString(), "': non-directory entry exists");
417     }
418     return false;
419   }
420   if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
421     auto parent_path = dir_path.Parent();
422     if (parent_path != dir_path) {
423       RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
424       return DoCreateDir(dir_path, false);  // Retry
425     }
426   }
427   return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
428                              dir_path.ToString(), "'");
429 #else
430   const auto s = dir_path.ToNative().c_str();
431   if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
432     return true;
433   }
434   if (errno == EEXIST) {
435     struct stat st;
436     if (stat(s, &st) || !S_ISDIR(st.st_mode)) {
437       // Note we propagate the original errno, not the stat() errno
438       return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(),
439                               "': non-directory entry exists");
440     }
441     return false;
442   }
443   if (create_parents && errno == ENOENT) {
444     auto parent_path = dir_path.Parent();
445     if (parent_path != dir_path) {
446       RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
447       return DoCreateDir(dir_path, false);  // Retry
448     }
449   }
450   return IOErrorFromErrno(errno, "Cannot create directory '", dir_path.ToString(), "'");
451 #endif
452 }
453 
454 }  // namespace
455 
CreateDir(const PlatformFilename & dir_path)456 Result<bool> CreateDir(const PlatformFilename& dir_path) {
457   return DoCreateDir(dir_path, false);
458 }
459 
CreateDirTree(const PlatformFilename & dir_path)460 Result<bool> CreateDirTree(const PlatformFilename& dir_path) {
461   return DoCreateDir(dir_path, true);
462 }
463 
464 #ifdef _WIN32
465 
466 namespace {
467 
FindHandleDeleter(HANDLE * handle)468 void FindHandleDeleter(HANDLE* handle) {
469   if (!FindClose(*handle)) {
470     ARROW_LOG(WARNING) << "Cannot close directory handle: "
471                        << WinErrorMessage(GetLastError());
472   }
473 }
474 
PathWithoutTrailingSlash(const PlatformFilename & fn)475 std::wstring PathWithoutTrailingSlash(const PlatformFilename& fn) {
476   std::wstring path = fn.ToNative();
477   while (!path.empty() && path.back() == kNativeSep) {
478     path.pop_back();
479   }
480   return path;
481 }
482 
ListDirInternal(const PlatformFilename & dir_path)483 Result<std::vector<WIN32_FIND_DATAW>> ListDirInternal(const PlatformFilename& dir_path) {
484   WIN32_FIND_DATAW find_data;
485   std::wstring pattern = PathWithoutTrailingSlash(dir_path) + L"\\*.*";
486   HANDLE handle = FindFirstFileW(pattern.c_str(), &find_data);
487   if (handle == INVALID_HANDLE_VALUE) {
488     return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
489                                dir_path.ToString(), "'");
490   }
491 
492   std::unique_ptr<HANDLE, decltype(&FindHandleDeleter)> handle_guard(&handle,
493                                                                      FindHandleDeleter);
494 
495   std::vector<WIN32_FIND_DATAW> results;
496   do {
497     // Skip "." and ".."
498     if (find_data.cFileName[0] == L'.') {
499       if (find_data.cFileName[1] == L'\0' ||
500           (find_data.cFileName[1] == L'.' && find_data.cFileName[2] == L'\0')) {
501         continue;
502       }
503     }
504     results.push_back(find_data);
505   } while (FindNextFileW(handle, &find_data));
506 
507   int errnum = GetLastError();
508   if (errnum != ERROR_NO_MORE_FILES) {
509     return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
510                                dir_path.ToString(), "'");
511   }
512   return results;
513 }
514 
FindOneFile(const PlatformFilename & fn,WIN32_FIND_DATAW * find_data,bool * exists=nullptr)515 Status FindOneFile(const PlatformFilename& fn, WIN32_FIND_DATAW* find_data,
516                    bool* exists = nullptr) {
517   HANDLE handle = FindFirstFileW(PathWithoutTrailingSlash(fn).c_str(), find_data);
518   if (handle == INVALID_HANDLE_VALUE) {
519     int errnum = GetLastError();
520     if (exists == nullptr ||
521         (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND)) {
522       return IOErrorFromWinError(GetLastError(), "Cannot get information for path '",
523                                  fn.ToString(), "'");
524     }
525     *exists = false;
526   } else {
527     if (exists != nullptr) {
528       *exists = true;
529     }
530     FindHandleDeleter(&handle);
531   }
532   return Status::OK();
533 }
534 
535 }  // namespace
536 
ListDir(const PlatformFilename & dir_path)537 Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
538   ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
539 
540   std::vector<PlatformFilename> results;
541   results.reserve(entries.size());
542   for (const auto& entry : entries) {
543     results.emplace_back(std::wstring(entry.cFileName));
544   }
545   return results;
546 }
547 
548 #else
549 
ListDir(const PlatformFilename & dir_path)550 Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
551   DIR* dir = opendir(dir_path.ToNative().c_str());
552   if (dir == nullptr) {
553     return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
554   }
555 
556   auto dir_deleter = [](DIR* dir) -> void {
557     if (closedir(dir) != 0) {
558       ARROW_LOG(WARNING) << "Cannot close directory handle: " << ErrnoMessage(errno);
559     }
560   };
561   std::unique_ptr<DIR, decltype(dir_deleter)> dir_guard(dir, dir_deleter);
562 
563   std::vector<PlatformFilename> results;
564   errno = 0;
565   struct dirent* entry = readdir(dir);
566   while (entry != nullptr) {
567     std::string path = entry->d_name;
568     if (path != "." && path != "..") {
569       results.emplace_back(std::move(path));
570     }
571     entry = readdir(dir);
572   }
573   if (errno != 0) {
574     return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
575   }
576   return results;
577 }
578 
579 #endif
580 
581 namespace {
582 
583 #ifdef _WIN32
584 
585 Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
586 
587 // Remove a directory entry that's always a directory
DeleteDirEntryDir(const PlatformFilename & path,const WIN32_FIND_DATAW & entry,bool remove_top_dir=true)588 Status DeleteDirEntryDir(const PlatformFilename& path, const WIN32_FIND_DATAW& entry,
589                          bool remove_top_dir = true) {
590   if ((entry.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) {
591     // It's a directory that doesn't have a reparse point => recurse
592     RETURN_NOT_OK(DeleteDirTreeInternal(path));
593   }
594   if (remove_top_dir) {
595     // Remove now empty directory or reparse point (e.g. symlink to dir)
596     if (!RemoveDirectoryW(path.ToNative().c_str())) {
597       return IOErrorFromWinError(GetLastError(), "Cannot delete directory entry '",
598                                  path.ToString(), "': ");
599     }
600   }
601   return Status::OK();
602 }
603 
DeleteDirEntry(const PlatformFilename & path,const WIN32_FIND_DATAW & entry)604 Status DeleteDirEntry(const PlatformFilename& path, const WIN32_FIND_DATAW& entry) {
605   if ((entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) {
606     return DeleteDirEntryDir(path, entry);
607   }
608   // It's a non-directory entry, most likely a regular file
609   if (!DeleteFileW(path.ToNative().c_str())) {
610     return IOErrorFromWinError(GetLastError(), "Cannot delete file '", path.ToString(),
611                                "': ");
612   }
613   return Status::OK();
614 }
615 
DeleteDirTreeInternal(const PlatformFilename & dir_path)616 Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
617   ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
618   for (const auto& entry : entries) {
619     PlatformFilename path = dir_path.Join(PlatformFilename(entry.cFileName));
620     RETURN_NOT_OK(DeleteDirEntry(path, entry));
621   }
622   return Status::OK();
623 }
624 
DeleteDirContents(const PlatformFilename & dir_path,bool allow_not_found,bool remove_top_dir)625 Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
626                                bool remove_top_dir) {
627   bool exists = true;
628   WIN32_FIND_DATAW entry;
629   if (allow_not_found) {
630     RETURN_NOT_OK(FindOneFile(dir_path, &entry, &exists));
631   } else {
632     // Will raise if dir_path does not exist
633     RETURN_NOT_OK(FindOneFile(dir_path, &entry));
634   }
635   if (exists) {
636     RETURN_NOT_OK(DeleteDirEntryDir(dir_path, entry, remove_top_dir));
637   }
638   return exists;
639 }
640 
641 #else  // POSIX
642 
643 Status LinkStat(const PlatformFilename& path, struct stat* lst, bool* exists = nullptr) {
644   if (lstat(path.ToNative().c_str(), lst) != 0) {
645     if (exists == nullptr || (errno != ENOENT && errno != ENOTDIR && errno != ELOOP)) {
646       return IOErrorFromErrno(errno, "Cannot get information for path '", path.ToString(),
647                               "'");
648     }
649     *exists = false;
650   } else if (exists != nullptr) {
651     *exists = true;
652   }
653   return Status::OK();
654 }
655 
656 Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
657 
658 Status DeleteDirEntryDir(const PlatformFilename& path, const struct stat& lst,
659                          bool remove_top_dir = true) {
660   if (!S_ISLNK(lst.st_mode)) {
661     // Not a symlink => delete contents recursively
662     DCHECK(S_ISDIR(lst.st_mode));
663     RETURN_NOT_OK(DeleteDirTreeInternal(path));
664     if (remove_top_dir && rmdir(path.ToNative().c_str()) != 0) {
665       return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
666                               "'");
667     }
668   } else {
669     // Remove symlink
670     if (remove_top_dir && unlink(path.ToNative().c_str()) != 0) {
671       return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
672                               "'");
673     }
674   }
675   return Status::OK();
676 }
677 
678 Status DeleteDirEntry(const PlatformFilename& path, const struct stat& lst) {
679   if (S_ISDIR(lst.st_mode)) {
680     return DeleteDirEntryDir(path, lst);
681   }
682   if (unlink(path.ToNative().c_str()) != 0) {
683     return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
684                             "'");
685   }
686   return Status::OK();
687 }
688 
689 Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
690   ARROW_ASSIGN_OR_RAISE(auto children, ListDir(dir_path));
691   for (const auto& child : children) {
692     struct stat lst;
693     PlatformFilename full_path = dir_path.Join(child);
694     RETURN_NOT_OK(LinkStat(full_path, &lst));
695     RETURN_NOT_OK(DeleteDirEntry(full_path, lst));
696   }
697   return Status::OK();
698 }
699 
700 Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
701                                bool remove_top_dir) {
702   bool exists = true;
703   struct stat lst;
704   if (allow_not_found) {
705     RETURN_NOT_OK(LinkStat(dir_path, &lst, &exists));
706   } else {
707     // Will raise if dir_path does not exist
708     RETURN_NOT_OK(LinkStat(dir_path, &lst));
709   }
710   if (exists) {
711     if (!S_ISDIR(lst.st_mode) && !S_ISLNK(lst.st_mode)) {
712       return Status::IOError("Cannot delete directory '", dir_path.ToString(),
713                              "': not a directory");
714     }
715     RETURN_NOT_OK(DeleteDirEntryDir(dir_path, lst, remove_top_dir));
716   }
717   return exists;
718 }
719 
720 #endif
721 
722 }  // namespace
723 
DeleteDirContents(const PlatformFilename & dir_path,bool allow_not_found)724 Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found) {
725   return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/false);
726 }
727 
DeleteDirTree(const PlatformFilename & dir_path,bool allow_not_found)728 Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found) {
729   return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/true);
730 }
731 
DeleteFile(const PlatformFilename & file_path,bool allow_not_found)732 Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found) {
733 #ifdef _WIN32
734   if (DeleteFileW(file_path.ToNative().c_str())) {
735     return true;
736   } else {
737     int errnum = GetLastError();
738     if (!allow_not_found || errnum != ERROR_FILE_NOT_FOUND) {
739       return IOErrorFromWinError(GetLastError(), "Cannot delete file '",
740                                  file_path.ToString(), "'");
741     }
742   }
743 #else
744   if (unlink(file_path.ToNative().c_str()) == 0) {
745     return true;
746   } else {
747     if (!allow_not_found || errno != ENOENT) {
748       return IOErrorFromErrno(errno, "Cannot delete file '", file_path.ToString(), "'");
749     }
750   }
751 #endif
752   return false;
753 }
754 
FileExists(const PlatformFilename & path)755 Result<bool> FileExists(const PlatformFilename& path) {
756 #ifdef _WIN32
757   if (GetFileAttributesW(path.ToNative().c_str()) != INVALID_FILE_ATTRIBUTES) {
758     return true;
759   } else {
760     int errnum = GetLastError();
761     if (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND) {
762       return IOErrorFromWinError(GetLastError(), "Failed getting information for path '",
763                                  path.ToString(), "'");
764     }
765     return false;
766   }
767 #else
768   struct stat st;
769   if (stat(path.ToNative().c_str(), &st) == 0) {
770     return true;
771   } else {
772     if (errno != ENOENT && errno != ENOTDIR) {
773       return IOErrorFromErrno(errno, "Failed getting information for path '",
774                               path.ToString(), "'");
775     }
776     return false;
777   }
778 #endif
779 }
780 
781 //
782 // Functions for creating file descriptors
783 //
784 
785 #define CHECK_LSEEK(retval) \
786   if ((retval) == -1) return Status::IOError("lseek failed");
787 
lseek64_compat(int fd,int64_t pos,int whence)788 static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) {
789 #if defined(_WIN32)
790   return _lseeki64(fd, pos, whence);
791 #else
792   return lseek(fd, pos, whence);
793 #endif
794 }
795 
CheckFileOpResult(int fd_ret,int errno_actual,const PlatformFilename & file_name,const char * opname)796 static inline Result<int> CheckFileOpResult(int fd_ret, int errno_actual,
797                                             const PlatformFilename& file_name,
798                                             const char* opname) {
799   if (fd_ret == -1) {
800 #ifdef _WIN32
801     int winerr = GetLastError();
802     if (winerr != ERROR_SUCCESS) {
803       return IOErrorFromWinError(GetLastError(), "Failed to ", opname, " file '",
804                                  file_name.ToString(), "'");
805     }
806 #endif
807     return IOErrorFromErrno(errno_actual, "Failed to ", opname, " file '",
808                             file_name.ToString(), "'");
809   }
810   return fd_ret;
811 }
812 
FileOpenReadable(const PlatformFilename & file_name)813 Result<int> FileOpenReadable(const PlatformFilename& file_name) {
814   int fd, errno_actual;
815 #if defined(_WIN32)
816   SetLastError(0);
817   HANDLE file_handle = CreateFileW(file_name.ToNative().c_str(), GENERIC_READ,
818                                    FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
819                                    OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
820 
821   DWORD last_error = GetLastError();
822   if (last_error == ERROR_SUCCESS) {
823     errno_actual = 0;
824     fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle),
825                          _O_RDONLY | _O_BINARY | _O_NOINHERIT);
826   } else {
827     return IOErrorFromWinError(last_error, "Failed to open local file '",
828                                file_name.ToString(), "'");
829   }
830 #else
831   fd = open(file_name.ToNative().c_str(), O_RDONLY);
832   errno_actual = errno;
833 
834   if (fd >= 0) {
835     // open(O_RDONLY) succeeds on directories, check for it
836     struct stat st;
837     int ret = fstat(fd, &st);
838     if (ret == -1) {
839       ARROW_UNUSED(FileClose(fd));
840       // Will propagate error below
841     } else if (S_ISDIR(st.st_mode)) {
842       ARROW_UNUSED(FileClose(fd));
843       return Status::IOError("Cannot open for reading: path '", file_name.ToString(),
844                              "' is a directory");
845     }
846   }
847 #endif
848 
849   return CheckFileOpResult(fd, errno_actual, file_name, "open local");
850 }
851 
FileOpenWritable(const PlatformFilename & file_name,bool write_only,bool truncate,bool append)852 Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only,
853                              bool truncate, bool append) {
854   int fd, errno_actual;
855 
856 #if defined(_WIN32)
857   SetLastError(0);
858   int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
859   DWORD desired_access = GENERIC_WRITE;
860   DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
861   DWORD creation_disposition = OPEN_ALWAYS;
862 
863   if (append) {
864     oflag |= _O_APPEND;
865   }
866 
867   if (truncate) {
868     oflag |= _O_TRUNC;
869     creation_disposition = CREATE_ALWAYS;
870   }
871 
872   if (write_only) {
873     oflag |= _O_WRONLY;
874   } else {
875     oflag |= _O_RDWR;
876     desired_access |= GENERIC_READ;
877   }
878 
879   HANDLE file_handle =
880       CreateFileW(file_name.ToNative().c_str(), desired_access, share_mode, NULL,
881                   creation_disposition, FILE_ATTRIBUTE_NORMAL, NULL);
882 
883   DWORD last_error = GetLastError();
884   if (last_error == ERROR_SUCCESS || last_error == ERROR_ALREADY_EXISTS) {
885     errno_actual = 0;
886     fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle), oflag);
887   } else {
888     return IOErrorFromWinError(last_error, "Failed to open local file '",
889                                file_name.ToString(), "'");
890   }
891 #else
892   int oflag = O_CREAT;
893 
894   if (truncate) {
895     oflag |= O_TRUNC;
896   }
897   if (append) {
898     oflag |= O_APPEND;
899   }
900 
901   if (write_only) {
902     oflag |= O_WRONLY;
903   } else {
904     oflag |= O_RDWR;
905   }
906 
907   fd = open(file_name.ToNative().c_str(), oflag, 0666);
908   errno_actual = errno;
909 #endif
910 
911   RETURN_NOT_OK(CheckFileOpResult(fd, errno_actual, file_name, "open local"));
912   if (append) {
913     // Seek to end, as O_APPEND does not necessarily do it
914     auto ret = lseek64_compat(fd, 0, SEEK_END);
915     if (ret == -1) {
916       ARROW_UNUSED(FileClose(fd));
917       return Status::IOError("lseek failed");
918     }
919   }
920   return fd;
921 }
922 
FileTell(int fd)923 Result<int64_t> FileTell(int fd) {
924   int64_t current_pos;
925 #if defined(_WIN32)
926   current_pos = _telli64(fd);
927   if (current_pos == -1) {
928     return Status::IOError("_telli64 failed");
929   }
930 #else
931   current_pos = lseek64_compat(fd, 0, SEEK_CUR);
932   CHECK_LSEEK(current_pos);
933 #endif
934   return current_pos;
935 }
936 
CreatePipe()937 Result<Pipe> CreatePipe() {
938   int ret;
939   int fd[2];
940 #if defined(_WIN32)
941   ret = _pipe(fd, 4096, _O_BINARY);
942 #else
943   ret = pipe(fd);
944 #endif
945 
946   if (ret == -1) {
947     return IOErrorFromErrno(errno, "Error creating pipe");
948   }
949   return Pipe{fd[0], fd[1]};
950 }
951 
StatusFromMmapErrno(const char * prefix)952 static Status StatusFromMmapErrno(const char* prefix) {
953 #ifdef _WIN32
954   errno = __map_mman_error(GetLastError(), EPERM);
955 #endif
956   return IOErrorFromErrno(errno, prefix);
957 }
958 
959 namespace {
960 
GetPageSizeInternal()961 int64_t GetPageSizeInternal() {
962 #if defined(__APPLE__)
963   return getpagesize();
964 #elif defined(_WIN32)
965   SYSTEM_INFO si;
966   GetSystemInfo(&si);
967   return si.dwPageSize;
968 #else
969   errno = 0;
970   const auto ret = sysconf(_SC_PAGESIZE);
971   if (ret == -1) {
972     ARROW_LOG(FATAL) << "sysconf(_SC_PAGESIZE) failed: " << ErrnoMessage(errno);
973   }
974   return static_cast<int64_t>(ret);
975 #endif
976 }
977 
978 }  // namespace
979 
GetPageSize()980 int64_t GetPageSize() {
981   static const int64_t kPageSize = GetPageSizeInternal();  // cache it
982   return kPageSize;
983 }
984 
985 //
986 // Compatible way to remap a memory map
987 //
988 
MemoryMapRemap(void * addr,size_t old_size,size_t new_size,int fildes,void ** new_addr)989 Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
990                       void** new_addr) {
991   // should only be called with writable files
992   *new_addr = MAP_FAILED;
993 #ifdef _WIN32
994   // flags are ignored on windows
995   HANDLE fm, h;
996 
997   if (!UnmapViewOfFile(addr)) {
998     return StatusFromMmapErrno("UnmapViewOfFile failed");
999   }
1000 
1001   h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
1002   if (h == INVALID_HANDLE_VALUE) {
1003     return StatusFromMmapErrno("Cannot get file handle");
1004   }
1005 
1006   uint64_t new_size64 = new_size;
1007   LONG new_size_low = static_cast<LONG>(new_size64 & 0xFFFFFFFFUL);
1008   LONG new_size_high = static_cast<LONG>((new_size64 >> 32) & 0xFFFFFFFFUL);
1009 
1010   SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
1011   SetEndOfFile(h);
1012   fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
1013   if (fm == NULL) {
1014     return StatusFromMmapErrno("CreateFileMapping failed");
1015   }
1016   *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
1017   CloseHandle(fm);
1018   if (new_addr == NULL) {
1019     return StatusFromMmapErrno("MapViewOfFile failed");
1020   }
1021   return Status::OK();
1022 #elif defined(__linux__)
1023   if (ftruncate(fildes, new_size) == -1) {
1024     return StatusFromMmapErrno("ftruncate failed");
1025   }
1026   *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
1027   if (*new_addr == MAP_FAILED) {
1028     return StatusFromMmapErrno("mremap failed");
1029   }
1030   return Status::OK();
1031 #else
1032   // we have to close the mmap first, truncate the file to the new size
1033   // and recreate the mmap
1034   if (munmap(addr, old_size) == -1) {
1035     return StatusFromMmapErrno("munmap failed");
1036   }
1037   if (ftruncate(fildes, new_size) == -1) {
1038     return StatusFromMmapErrno("ftruncate failed");
1039   }
1040   // we set READ / WRITE flags on the new map, since we could only have
1041   // unlarged a RW map in the first place
1042   *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
1043   if (*new_addr == MAP_FAILED) {
1044     return StatusFromMmapErrno("mmap failed");
1045   }
1046   return Status::OK();
1047 #endif
1048 }
1049 
MemoryAdviseWillNeed(const std::vector<MemoryRegion> & regions)1050 Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
1051   const auto page_size = static_cast<size_t>(GetPageSize());
1052   DCHECK_GT(page_size, 0);
1053   const size_t page_mask = ~(page_size - 1);
1054   DCHECK_EQ(page_mask & page_size, page_size);
1055 
1056   auto align_region = [=](const MemoryRegion& region) -> MemoryRegion {
1057     const auto addr = reinterpret_cast<uintptr_t>(region.addr);
1058     const auto aligned_addr = addr & page_mask;
1059     DCHECK_LT(addr - aligned_addr, page_size);
1060     return {reinterpret_cast<void*>(aligned_addr),
1061             region.size + static_cast<size_t>(addr - aligned_addr)};
1062   };
1063 
1064 #ifdef _WIN32
1065   // PrefetchVirtualMemory() is available on Windows 8 or later
1066   struct PrefetchEntry {  // Like WIN32_MEMORY_RANGE_ENTRY
1067     void* VirtualAddress;
1068     size_t NumberOfBytes;
1069 
1070     PrefetchEntry(const MemoryRegion& region)  // NOLINT runtime/explicit
1071         : VirtualAddress(region.addr), NumberOfBytes(region.size) {}
1072   };
1073   using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG);
1074   static const auto prefetch_virtual_memory = reinterpret_cast<PrefetchVirtualMemoryFunc>(
1075       GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory"));
1076   if (prefetch_virtual_memory != nullptr) {
1077     std::vector<PrefetchEntry> entries;
1078     entries.reserve(regions.size());
1079     for (const auto& region : regions) {
1080       if (region.size != 0) {
1081         entries.emplace_back(align_region(region));
1082       }
1083     }
1084     if (!entries.empty() &&
1085         !prefetch_virtual_memory(GetCurrentProcess(),
1086                                  static_cast<ULONG_PTR>(entries.size()), entries.data(),
1087                                  0)) {
1088       return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed");
1089     }
1090   }
1091   return Status::OK();
1092 #elif defined(POSIX_MADV_WILLNEED)
1093   for (const auto& region : regions) {
1094     if (region.size != 0) {
1095       const auto aligned = align_region(region);
1096       int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED);
1097       // EBADF can be returned on Linux in the following cases:
1098       // - the kernel version is older than 3.9
1099       // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577)
1100       if (err != 0 && err != EBADF) {
1101         return IOErrorFromErrno(err, "posix_madvise failed");
1102       }
1103     }
1104   }
1105   return Status::OK();
1106 #else
1107   return Status::OK();
1108 #endif
1109 }
1110 
1111 //
1112 // Closing files
1113 //
1114 
FileClose(int fd)1115 Status FileClose(int fd) {
1116   int ret;
1117 
1118 #if defined(_WIN32)
1119   ret = static_cast<int>(_close(fd));
1120 #else
1121   ret = static_cast<int>(close(fd));
1122 #endif
1123 
1124   if (ret == -1) {
1125     return Status::IOError("error closing file");
1126   }
1127   return Status::OK();
1128 }
1129 
1130 //
1131 // Seeking and telling
1132 //
1133 
FileSeek(int fd,int64_t pos,int whence)1134 Status FileSeek(int fd, int64_t pos, int whence) {
1135   int64_t ret = lseek64_compat(fd, pos, whence);
1136   CHECK_LSEEK(ret);
1137   return Status::OK();
1138 }
1139 
FileSeek(int fd,int64_t pos)1140 Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
1141 
FileGetSize(int fd)1142 Result<int64_t> FileGetSize(int fd) {
1143 #if defined(_WIN32)
1144   struct __stat64 st;
1145 #else
1146   struct stat st;
1147 #endif
1148   st.st_size = -1;
1149 
1150 #if defined(_WIN32)
1151   int ret = _fstat64(fd, &st);
1152 #else
1153   int ret = fstat(fd, &st);
1154 #endif
1155 
1156   if (ret == -1) {
1157     return Status::IOError("error stat()ing file");
1158   }
1159   if (st.st_size == 0) {
1160     // Maybe the file doesn't support getting its size, double-check by
1161     // trying to tell() (seekable files usually have a size, while
1162     // non-seekable files don't)
1163     RETURN_NOT_OK(FileTell(fd));
1164   } else if (st.st_size < 0) {
1165     return Status::IOError("error getting file size");
1166   }
1167   return st.st_size;
1168 }
1169 
1170 //
1171 // Reading data
1172 //
1173 
pread_compat(int fd,void * buf,int64_t nbytes,int64_t pos)1174 static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) {
1175 #if defined(_WIN32)
1176   HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
1177   DWORD dwBytesRead = 0;
1178   OVERLAPPED overlapped = {0};
1179   overlapped.Offset = static_cast<uint32_t>(pos);
1180   overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32);
1181 
1182   // Note: ReadFile() will update the file position
1183   BOOL bRet =
1184       ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped);
1185   if (bRet || GetLastError() == ERROR_HANDLE_EOF) {
1186     return dwBytesRead;
1187   } else {
1188     return -1;
1189   }
1190 #else
1191   return static_cast<int64_t>(
1192       pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos)));
1193 #endif
1194 }
1195 
FileRead(int fd,uint8_t * buffer,int64_t nbytes)1196 Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes) {
1197   int64_t bytes_read = 0;
1198 
1199   while (bytes_read < nbytes) {
1200     int64_t chunksize =
1201         std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
1202 #if defined(_WIN32)
1203     int64_t ret =
1204         static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize)));
1205 #else
1206     int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize)));
1207 #endif
1208 
1209     if (ret == -1) {
1210       return IOErrorFromErrno(errno, "Error reading bytes from file");
1211     }
1212     if (ret == 0) {
1213       // EOF
1214       break;
1215     }
1216     buffer += ret;
1217     bytes_read += ret;
1218   }
1219   return bytes_read;
1220 }
1221 
FileReadAt(int fd,uint8_t * buffer,int64_t position,int64_t nbytes)1222 Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes) {
1223   int64_t bytes_read = 0;
1224 
1225   while (bytes_read < nbytes) {
1226     int64_t chunksize =
1227         std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
1228     int64_t ret = pread_compat(fd, buffer, chunksize, position);
1229 
1230     if (ret == -1) {
1231       return IOErrorFromErrno(errno, "Error reading bytes from file");
1232     }
1233     if (ret == 0) {
1234       // EOF
1235       break;
1236     }
1237     buffer += ret;
1238     position += ret;
1239     bytes_read += ret;
1240   }
1241   return bytes_read;
1242 }
1243 
1244 //
1245 // Writing data
1246 //
1247 
FileWrite(int fd,const uint8_t * buffer,const int64_t nbytes)1248 Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) {
1249   int ret = 0;
1250   int64_t bytes_written = 0;
1251 
1252   while (ret != -1 && bytes_written < nbytes) {
1253     int64_t chunksize =
1254         std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
1255 #if defined(_WIN32)
1256     ret = static_cast<int>(
1257         _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize)));
1258 #else
1259     ret = static_cast<int>(
1260         write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));
1261 #endif
1262 
1263     if (ret != -1) {
1264       bytes_written += ret;
1265     }
1266   }
1267 
1268   if (ret == -1) {
1269     return IOErrorFromErrno(errno, "Error writing bytes to file");
1270   }
1271   return Status::OK();
1272 }
1273 
FileTruncate(int fd,const int64_t size)1274 Status FileTruncate(int fd, const int64_t size) {
1275   int ret, errno_actual;
1276 
1277 #ifdef _WIN32
1278   errno_actual = _chsize_s(fd, static_cast<size_t>(size));
1279   ret = errno_actual == 0 ? 0 : -1;
1280 #else
1281   ret = ftruncate(fd, static_cast<size_t>(size));
1282   errno_actual = errno;
1283 #endif
1284 
1285   if (ret == -1) {
1286     return IOErrorFromErrno(errno_actual, "Error writing bytes to file");
1287   }
1288   return Status::OK();
1289 }
1290 
1291 //
1292 // Environment variables
1293 //
1294 
GetEnvVar(const char * name)1295 Result<std::string> GetEnvVar(const char* name) {
1296 #ifdef _WIN32
1297   // On Windows, getenv() reads an early copy of the process' environment
1298   // which doesn't get updated when SetEnvironmentVariable() is called.
1299   constexpr int32_t bufsize = 2000;
1300   char c_str[bufsize];
1301   auto res = GetEnvironmentVariableA(name, c_str, bufsize);
1302   if (res >= bufsize) {
1303     return Status::CapacityError("environment variable value too long");
1304   } else if (res == 0) {
1305     return Status::KeyError("environment variable undefined");
1306   }
1307   return std::string(c_str);
1308 #else
1309   char* c_str = getenv(name);
1310   if (c_str == nullptr) {
1311     return Status::KeyError("environment variable undefined");
1312   }
1313   return std::string(c_str);
1314 #endif
1315 }
1316 
GetEnvVar(const std::string & name)1317 Result<std::string> GetEnvVar(const std::string& name) { return GetEnvVar(name.c_str()); }
1318 
1319 #ifdef _WIN32
GetEnvVarNative(const std::string & name)1320 Result<NativePathString> GetEnvVarNative(const std::string& name) {
1321   NativePathString w_name;
1322   constexpr int32_t bufsize = 2000;
1323   wchar_t w_str[bufsize];
1324 
1325   ARROW_ASSIGN_OR_RAISE(w_name, StringToNative(name));
1326   auto res = GetEnvironmentVariableW(w_name.c_str(), w_str, bufsize);
1327   if (res >= bufsize) {
1328     return Status::CapacityError("environment variable value too long");
1329   } else if (res == 0) {
1330     return Status::KeyError("environment variable undefined");
1331   }
1332   return NativePathString(w_str);
1333 }
1334 
GetEnvVarNative(const char * name)1335 Result<NativePathString> GetEnvVarNative(const char* name) {
1336   return GetEnvVarNative(std::string(name));
1337 }
1338 
1339 #else
1340 
GetEnvVarNative(const std::string & name)1341 Result<NativePathString> GetEnvVarNative(const std::string& name) {
1342   return GetEnvVar(name);
1343 }
1344 
GetEnvVarNative(const char * name)1345 Result<NativePathString> GetEnvVarNative(const char* name) { return GetEnvVar(name); }
1346 #endif
1347 
SetEnvVar(const char * name,const char * value)1348 Status SetEnvVar(const char* name, const char* value) {
1349 #ifdef _WIN32
1350   if (SetEnvironmentVariableA(name, value)) {
1351     return Status::OK();
1352   } else {
1353     return Status::Invalid("failed setting environment variable");
1354   }
1355 #else
1356   if (setenv(name, value, 1) == 0) {
1357     return Status::OK();
1358   } else {
1359     return Status::Invalid("failed setting environment variable");
1360   }
1361 #endif
1362 }
1363 
SetEnvVar(const std::string & name,const std::string & value)1364 Status SetEnvVar(const std::string& name, const std::string& value) {
1365   return SetEnvVar(name.c_str(), value.c_str());
1366 }
1367 
DelEnvVar(const char * name)1368 Status DelEnvVar(const char* name) {
1369 #ifdef _WIN32
1370   if (SetEnvironmentVariableA(name, nullptr)) {
1371     return Status::OK();
1372   } else {
1373     return Status::Invalid("failed deleting environment variable");
1374   }
1375 #else
1376   if (unsetenv(name) == 0) {
1377     return Status::OK();
1378   } else {
1379     return Status::Invalid("failed deleting environment variable");
1380   }
1381 #endif
1382 }
1383 
DelEnvVar(const std::string & name)1384 Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
1385 
1386 //
1387 // Temporary directories
1388 //
1389 
1390 namespace {
1391 
1392 #if _WIN32
GetWindowsDirectoryPath()1393 NativePathString GetWindowsDirectoryPath() {
1394   auto size = GetWindowsDirectoryW(nullptr, 0);
1395   ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
1396   std::vector<wchar_t> w_str(size);
1397   size = GetWindowsDirectoryW(w_str.data(), size);
1398   ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
1399   return {w_str.data(), size};
1400 }
1401 #endif
1402 
1403 // Return a list of preferred locations for temporary files
GetPlatformTemporaryDirs()1404 std::vector<NativePathString> GetPlatformTemporaryDirs() {
1405   struct TempDirSelector {
1406     std::string env_var;
1407     NativePathString path_append;
1408   };
1409 
1410   std::vector<TempDirSelector> selectors;
1411   NativePathString fallback_tmp;
1412 
1413 #if _WIN32
1414   selectors = {
1415       {"TMP", L""}, {"TEMP", L""}, {"LOCALAPPDATA", L"Temp"}, {"USERPROFILE", L"Temp"}};
1416   fallback_tmp = GetWindowsDirectoryPath();
1417 
1418 #else
1419   selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
1420 #ifdef __ANDROID__
1421   fallback_tmp = "/data/local/tmp";
1422 #else
1423   fallback_tmp = "/tmp";
1424 #endif
1425 #endif
1426 
1427   std::vector<NativePathString> temp_dirs;
1428   for (const auto& sel : selectors) {
1429     auto result = GetEnvVarNative(sel.env_var);
1430     if (result.status().IsKeyError()) {
1431       // Environment variable absent, skip
1432       continue;
1433     }
1434     if (!result.ok()) {
1435       ARROW_LOG(WARNING) << "Failed getting env var '" << sel.env_var
1436                          << "': " << result.status().ToString();
1437       continue;
1438     }
1439     NativePathString p = *std::move(result);
1440     if (p.empty()) {
1441       // Environment variable set to empty string, skip
1442       continue;
1443     }
1444     if (sel.path_append.empty()) {
1445       temp_dirs.push_back(p);
1446     } else {
1447       temp_dirs.push_back(p + kNativeSep + sel.path_append);
1448     }
1449   }
1450   temp_dirs.push_back(fallback_tmp);
1451   return temp_dirs;
1452 }
1453 
MakeRandomName(int num_chars)1454 std::string MakeRandomName(int num_chars) {
1455   static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
1456   std::default_random_engine gen(
1457       static_cast<std::default_random_engine::result_type>(GetRandomSeed()));
1458   std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 1));
1459 
1460   std::string s;
1461   s.reserve(num_chars);
1462   for (int i = 0; i < num_chars; ++i) {
1463     s += chars[dist(gen)];
1464   }
1465   return s;
1466 }
1467 
1468 }  // namespace
1469 
Make(const std::string & prefix)1470 Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
1471   const int kNumChars = 8;
1472 
1473   NativePathString base_name;
1474 
1475   auto MakeBaseName = [&]() {
1476     std::string suffix = MakeRandomName(kNumChars);
1477     return StringToNative(prefix + suffix);
1478   };
1479 
1480   auto TryCreatingDirectory =
1481       [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
1482     Status st;
1483     for (int attempt = 0; attempt < 3; ++attempt) {
1484       PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
1485       auto result = CreateDir(fn);
1486       if (!result.ok()) {
1487         // Probably a permissions error or a non-existing base_dir
1488         return nullptr;
1489       }
1490       if (*result) {
1491         return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
1492       }
1493       // The random name already exists in base_dir, try with another name
1494       st = Status::IOError("Path already exists: '", fn.ToString(), "'");
1495       ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
1496     }
1497     return st;
1498   };
1499 
1500   ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
1501 
1502   auto base_dirs = GetPlatformTemporaryDirs();
1503   DCHECK_NE(base_dirs.size(), 0);
1504 
1505   for (const auto& base_dir : base_dirs) {
1506     ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
1507     if (ptr) {
1508       return std::move(ptr);
1509     }
1510     // Cannot create in this directory, try the next one
1511   }
1512 
1513   return Status::IOError(
1514       "Cannot create temporary subdirectory in any "
1515       "of the platform temporary directories");
1516 }
1517 
TemporaryDir(PlatformFilename && path)1518 TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
1519 
~TemporaryDir()1520 TemporaryDir::~TemporaryDir() {
1521   Status st = DeleteDirTree(path_).status();
1522   if (!st.ok()) {
1523     ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
1524   }
1525 }
1526 
SignalHandler()1527 SignalHandler::SignalHandler() : SignalHandler(static_cast<Callback>(nullptr)) {}
1528 
SignalHandler(Callback cb)1529 SignalHandler::SignalHandler(Callback cb) {
1530 #if ARROW_HAVE_SIGACTION
1531   sa_.sa_handler = cb;
1532   sa_.sa_flags = 0;
1533   sigemptyset(&sa_.sa_mask);
1534 #else
1535   cb_ = cb;
1536 #endif
1537 }
1538 
1539 #if ARROW_HAVE_SIGACTION
SignalHandler(const struct sigaction & sa)1540 SignalHandler::SignalHandler(const struct sigaction& sa) {
1541   memcpy(&sa_, &sa, sizeof(sa));
1542 }
1543 #endif
1544 
callback() const1545 SignalHandler::Callback SignalHandler::callback() const {
1546 #if ARROW_HAVE_SIGACTION
1547   return sa_.sa_handler;
1548 #else
1549   return cb_;
1550 #endif
1551 }
1552 
1553 #if ARROW_HAVE_SIGACTION
action() const1554 const struct sigaction& SignalHandler::action() const { return sa_; }
1555 #endif
1556 
GetSignalHandler(int signum)1557 Result<SignalHandler> GetSignalHandler(int signum) {
1558 #if ARROW_HAVE_SIGACTION
1559   struct sigaction sa;
1560   int ret = sigaction(signum, nullptr, &sa);
1561   if (ret != 0) {
1562     // TODO more detailed message using errno
1563     return Status::IOError("sigaction call failed");
1564   }
1565   return SignalHandler(sa);
1566 #else
1567   // To read the old handler, set the signal handler to something else temporarily
1568   SignalHandler::Callback cb = signal(signum, SIG_IGN);
1569   if (cb == SIG_ERR || signal(signum, cb) == SIG_ERR) {
1570     // TODO more detailed message using errno
1571     return Status::IOError("signal call failed");
1572   }
1573   return SignalHandler(cb);
1574 #endif
1575 }
1576 
SetSignalHandler(int signum,const SignalHandler & handler)1577 Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler) {
1578 #if ARROW_HAVE_SIGACTION
1579   struct sigaction old_sa;
1580   int ret = sigaction(signum, &handler.action(), &old_sa);
1581   if (ret != 0) {
1582     // TODO more detailed message using errno
1583     return Status::IOError("sigaction call failed");
1584   }
1585   return SignalHandler(old_sa);
1586 #else
1587   SignalHandler::Callback cb = signal(signum, handler.callback());
1588   if (cb == SIG_ERR) {
1589     // TODO more detailed message using errno
1590     return Status::IOError("signal call failed");
1591   }
1592   return SignalHandler(cb);
1593 #endif
1594   return Status::OK();
1595 }
1596 
ReinstateSignalHandler(int signum,SignalHandler::Callback handler)1597 void ReinstateSignalHandler(int signum, SignalHandler::Callback handler) {
1598 #if !ARROW_HAVE_SIGACTION
1599   // Cannot report any errors from signal() (but there shouldn't be any)
1600   signal(signum, handler);
1601 #endif
1602 }
1603 
SendSignal(int signum)1604 Status SendSignal(int signum) {
1605   if (raise(signum) == 0) {
1606     return Status::OK();
1607   }
1608   if (errno == EINVAL) {
1609     return Status::Invalid("Invalid signal number ", signum);
1610   }
1611   return IOErrorFromErrno(errno, "Failed to raise signal");
1612 }
1613 
SendSignalToThread(int signum,uint64_t thread_id)1614 Status SendSignalToThread(int signum, uint64_t thread_id) {
1615 #ifdef _WIN32
1616   return Status::NotImplemented("Cannot send signal to specific thread on Windows");
1617 #else
1618   // Have to use a C-style cast because pthread_t can be a pointer *or* integer type
1619   int r = pthread_kill((pthread_t)thread_id, signum);  // NOLINT readability-casting
1620   if (r == 0) {
1621     return Status::OK();
1622   }
1623   if (r == EINVAL) {
1624     return Status::Invalid("Invalid signal number ", signum);
1625   }
1626   return IOErrorFromErrno(r, "Failed to raise signal");
1627 #endif
1628 }
1629 
1630 namespace {
1631 
GetPid()1632 int64_t GetPid() {
1633 #ifdef _WIN32
1634   return GetCurrentProcessId();
1635 #else
1636   return getpid();
1637 #endif
1638 }
1639 
GetSeedGenerator()1640 std::mt19937_64 GetSeedGenerator() {
1641   // Initialize Mersenne Twister PRNG with a true random seed.
1642   // Make sure to mix in process id to minimize risks of clashes when parallel testing.
1643 #ifdef ARROW_VALGRIND
1644   // Valgrind can crash, hang or enter an infinite loop on std::random_device,
1645   // use a crude initializer instead.
1646   const uint8_t dummy = 0;
1647   ARROW_UNUSED(dummy);
1648   std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
1649                            static_cast<uintptr_t>(GetPid()));
1650 #else
1651   std::random_device true_random;
1652   std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
1653                            (static_cast<uint64_t>(true_random()) << 32) ^
1654                            static_cast<uint64_t>(GetPid()));
1655 #endif
1656   return seed_gen;
1657 }
1658 
1659 }  // namespace
1660 
GetRandomSeed()1661 int64_t GetRandomSeed() {
1662   // The process-global seed generator to aims to avoid calling std::random_device
1663   // unless truly necessary (it can block on some systems, see ARROW-10287).
1664   static auto seed_gen = GetSeedGenerator();
1665   return static_cast<int64_t>(seed_gen());
1666 }
1667 
GetThreadId()1668 uint64_t GetThreadId() {
1669   uint64_t equiv{0};
1670   // std::thread::id is trivially copyable as per C++ spec,
1671   // so type punning as a uint64_t should work
1672   static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
1673                 "std::thread::id can't fit into uint64_t");
1674   const auto tid = std::this_thread::get_id();
1675   memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
1676   return equiv;
1677 }
1678 
GetOptionalThreadId()1679 uint64_t GetOptionalThreadId() {
1680   auto tid = GetThreadId();
1681   return (tid == 0) ? tid - 1 : tid;
1682 }
1683 
1684 }  // namespace internal
1685 }  // namespace arrow
1686