1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <chrono>
19 #include <cstring>
20 #include <sstream>
21 #include <utility>
22 
23 #ifdef _WIN32
24 #include "arrow/util/windows_compatibility.h"
25 #else
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <sys/stat.h>
30 #endif
31 
32 #include "arrow/filesystem/localfs.h"
33 #include "arrow/filesystem/path_util.h"
34 #include "arrow/filesystem/util_internal.h"
35 #include "arrow/io/file.h"
36 #include "arrow/util/io_util.h"
37 #include "arrow/util/logging.h"
38 #include "arrow/util/uri.h"
39 #include "arrow/util/windows_fixup.h"
40 
41 namespace arrow {
42 namespace fs {
43 
44 using ::arrow::internal::IOErrorFromErrno;
45 #ifdef _WIN32
46 using ::arrow::internal::IOErrorFromWinError;
47 #endif
48 using ::arrow::internal::NativePathString;
49 using ::arrow::internal::PlatformFilename;
50 
51 namespace internal {
52 
53 #ifdef _WIN32
IsDriveLetter(char c)54 static bool IsDriveLetter(char c) {
55   // Can't use locale-dependent functions from the C/C++ stdlib
56   return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
57 }
58 #endif
59 
DetectAbsolutePath(const std::string & s)60 bool DetectAbsolutePath(const std::string& s) {
61   // Is it a /-prefixed local path?
62   if (s.length() >= 1 && s[0] == '/') {
63     return true;
64   }
65 #ifdef _WIN32
66   // Is it a \-prefixed local path?
67   if (s.length() >= 1 && s[0] == '\\') {
68     return true;
69   }
70   // Does it start with a drive letter in addition to being /- or \-prefixed,
71   // e.g. "C:\..."?
72   if (s.length() >= 3 && s[1] == ':' && (s[2] == '/' || s[2] == '\\') &&
73       IsDriveLetter(s[0])) {
74     return true;
75   }
76 #endif
77   return false;
78 }
79 
80 }  // namespace internal
81 
82 namespace {
83 
84 #ifdef _WIN32
85 
NativeToString(const NativePathString & ns)86 std::string NativeToString(const NativePathString& ns) {
87   PlatformFilename fn(ns);
88   return fn.ToString();
89 }
90 
ToTimePoint(FILETIME ft)91 TimePoint ToTimePoint(FILETIME ft) {
92   // Hundreds of nanoseconds between January 1, 1601 (UTC) and the Unix epoch.
93   static constexpr int64_t kFileTimeEpoch = 11644473600LL * 10000000;
94 
95   int64_t hundreds = (static_cast<int64_t>(ft.dwHighDateTime) << 32) + ft.dwLowDateTime -
96                      kFileTimeEpoch;  // hundreds of ns since Unix epoch
97   std::chrono::nanoseconds ns_count(100 * hundreds);
98   return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
99 }
100 
FileInformationToFileInfo(const BY_HANDLE_FILE_INFORMATION & information)101 FileInfo FileInformationToFileInfo(const BY_HANDLE_FILE_INFORMATION& information) {
102   FileInfo info;
103   if (information.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
104     info.set_type(FileType::Directory);
105     info.set_size(kNoSize);
106   } else {
107     // Regular file
108     info.set_type(FileType::File);
109     info.set_size((static_cast<int64_t>(information.nFileSizeHigh) << 32) +
110                   information.nFileSizeLow);
111   }
112   info.set_mtime(ToTimePoint(information.ftLastWriteTime));
113   return info;
114 }
115 
StatFile(const std::wstring & path)116 Result<FileInfo> StatFile(const std::wstring& path) {
117   HANDLE h;
118   std::string bytes_path = NativeToString(path);
119   FileInfo info;
120 
121   /* Inspired by CPython, see Modules/posixmodule.c */
122   h = CreateFileW(path.c_str(), FILE_READ_ATTRIBUTES, /* desired access */
123                   0,                                  /* share mode */
124                   NULL,                               /* security attributes */
125                   OPEN_EXISTING,
126                   /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */
127                   FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS, NULL);
128 
129   if (h == INVALID_HANDLE_VALUE) {
130     DWORD err = GetLastError();
131     if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) {
132       info.set_path(bytes_path);
133       info.set_type(FileType::NotFound);
134       info.set_mtime(kNoTime);
135       info.set_size(kNoSize);
136       return info;
137     } else {
138       return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
139                                  bytes_path, "'");
140     }
141   }
142   BY_HANDLE_FILE_INFORMATION information;
143   if (!GetFileInformationByHandle(h, &information)) {
144     CloseHandle(h);
145     return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
146                                bytes_path, "'");
147   }
148   CloseHandle(h);
149   info = FileInformationToFileInfo(information);
150   info.set_path(bytes_path);
151   return info;
152 }
153 
154 #else  // POSIX systems
155 
156 TimePoint ToTimePoint(const struct timespec& s) {
157   std::chrono::nanoseconds ns_count(static_cast<int64_t>(s.tv_sec) * 1000000000 +
158                                     static_cast<int64_t>(s.tv_nsec));
159   return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
160 }
161 
162 FileInfo StatToFileInfo(const struct stat& s) {
163   FileInfo info;
164   if (S_ISREG(s.st_mode)) {
165     info.set_type(FileType::File);
166     info.set_size(static_cast<int64_t>(s.st_size));
167   } else if (S_ISDIR(s.st_mode)) {
168     info.set_type(FileType::Directory);
169     info.set_size(kNoSize);
170   } else {
171     info.set_type(FileType::Unknown);
172     info.set_size(kNoSize);
173   }
174 #ifdef __APPLE__
175   // macOS doesn't use the POSIX-compliant spelling
176   info.set_mtime(ToTimePoint(s.st_mtimespec));
177 #else
178   info.set_mtime(ToTimePoint(s.st_mtim));
179 #endif
180   return info;
181 }
182 
183 Result<FileInfo> StatFile(const std::string& path) {
184   FileInfo info;
185   struct stat s;
186   int r = stat(path.c_str(), &s);
187   if (r == -1) {
188     if (errno == ENOENT || errno == ENOTDIR || errno == ELOOP) {
189       info.set_type(FileType::NotFound);
190       info.set_mtime(kNoTime);
191       info.set_size(kNoSize);
192     } else {
193       return IOErrorFromErrno(errno, "Failed stat()ing path '", path, "'");
194     }
195   } else {
196     info = StatToFileInfo(s);
197   }
198   info.set_path(path);
199   return info;
200 }
201 
202 #endif
203 
StatSelector(const PlatformFilename & dir_fn,const FileSelector & select,int32_t nesting_depth,std::vector<FileInfo> * out)204 Status StatSelector(const PlatformFilename& dir_fn, const FileSelector& select,
205                     int32_t nesting_depth, std::vector<FileInfo>* out) {
206   auto result = ListDir(dir_fn);
207   if (!result.ok()) {
208     auto status = result.status();
209     if (select.allow_not_found && status.IsIOError()) {
210       ARROW_ASSIGN_OR_RAISE(bool exists, FileExists(dir_fn));
211       if (!exists) {
212         return Status::OK();
213       }
214     }
215     return status;
216   }
217 
218   for (const auto& child_fn : *result) {
219     PlatformFilename full_fn = dir_fn.Join(child_fn);
220     ARROW_ASSIGN_OR_RAISE(FileInfo info, StatFile(full_fn.ToNative()));
221     if (info.type() != FileType::NotFound) {
222       out->push_back(std::move(info));
223     }
224     if (nesting_depth < select.max_recursion && select.recursive &&
225         info.type() == FileType::Directory) {
226       RETURN_NOT_OK(StatSelector(full_fn, select, nesting_depth + 1, out));
227     }
228   }
229   return Status::OK();
230 }
231 
232 }  // namespace
233 
Defaults()234 LocalFileSystemOptions LocalFileSystemOptions::Defaults() {
235   return LocalFileSystemOptions();
236 }
237 
Equals(const LocalFileSystemOptions & other) const238 bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const {
239   return use_mmap == other.use_mmap;
240 }
241 
FromUri(const::arrow::internal::Uri & uri,std::string * out_path)242 Result<LocalFileSystemOptions> LocalFileSystemOptions::FromUri(
243     const ::arrow::internal::Uri& uri, std::string* out_path) {
244   if (!uri.username().empty() || !uri.password().empty()) {
245     return Status::Invalid("Unsupported username or password in local URI: '",
246                            uri.ToString(), "'");
247   }
248   std::string path;
249   const auto host = uri.host();
250   if (!host.empty()) {
251 #ifdef _WIN32
252     std::stringstream ss;
253     ss << "//" << host << "/" << internal::RemoveLeadingSlash(uri.path());
254     *out_path = ss.str();
255 #else
256     return Status::Invalid("Unsupported hostname in non-Windows local URI: '",
257                            uri.ToString(), "'");
258 #endif
259   } else {
260     *out_path = uri.path();
261   }
262 
263   // TODO handle use_mmap option
264   return LocalFileSystemOptions();
265 }
266 
LocalFileSystem(const io::IOContext & io_context)267 LocalFileSystem::LocalFileSystem(const io::IOContext& io_context)
268     : FileSystem(io_context), options_(LocalFileSystemOptions::Defaults()) {}
269 
LocalFileSystem(const LocalFileSystemOptions & options,const io::IOContext & io_context)270 LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions& options,
271                                  const io::IOContext& io_context)
272     : FileSystem(io_context), options_(options) {}
273 
~LocalFileSystem()274 LocalFileSystem::~LocalFileSystem() {}
275 
NormalizePath(std::string path)276 Result<std::string> LocalFileSystem::NormalizePath(std::string path) {
277   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
278   return fn.ToString();
279 }
280 
Equals(const FileSystem & other) const281 bool LocalFileSystem::Equals(const FileSystem& other) const {
282   if (other.type_name() != type_name()) {
283     return false;
284   } else {
285     const auto& localfs = ::arrow::internal::checked_cast<const LocalFileSystem&>(other);
286     return options_.Equals(localfs.options());
287   }
288 }
289 
GetFileInfo(const std::string & path)290 Result<FileInfo> LocalFileSystem::GetFileInfo(const std::string& path) {
291   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
292   return StatFile(fn.ToNative());
293 }
294 
GetFileInfo(const FileSelector & select)295 Result<std::vector<FileInfo>> LocalFileSystem::GetFileInfo(const FileSelector& select) {
296   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(select.base_dir));
297   std::vector<FileInfo> results;
298   RETURN_NOT_OK(StatSelector(fn, select, 0, &results));
299   return results;
300 }
301 
CreateDir(const std::string & path,bool recursive)302 Status LocalFileSystem::CreateDir(const std::string& path, bool recursive) {
303   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
304   if (recursive) {
305     return ::arrow::internal::CreateDirTree(fn).status();
306   } else {
307     return ::arrow::internal::CreateDir(fn).status();
308   }
309 }
310 
DeleteDir(const std::string & path)311 Status LocalFileSystem::DeleteDir(const std::string& path) {
312   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
313   auto st = ::arrow::internal::DeleteDirTree(fn, /*allow_not_found=*/false).status();
314   if (!st.ok()) {
315     // TODO Status::WithPrefix()?
316     std::stringstream ss;
317     ss << "Cannot delete directory '" << path << "': " << st.message();
318     return st.WithMessage(ss.str());
319   }
320   return Status::OK();
321 }
322 
DeleteDirContents(const std::string & path)323 Status LocalFileSystem::DeleteDirContents(const std::string& path) {
324   if (internal::IsEmptyPath(path)) {
325     return internal::InvalidDeleteDirContents(path);
326   }
327   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
328   auto st = ::arrow::internal::DeleteDirContents(fn, /*allow_not_found=*/false).status();
329   if (!st.ok()) {
330     std::stringstream ss;
331     ss << "Cannot delete directory contents in '" << path << "': " << st.message();
332     return st.WithMessage(ss.str());
333   }
334   return Status::OK();
335 }
336 
DeleteRootDirContents()337 Status LocalFileSystem::DeleteRootDirContents() {
338   return Status::Invalid("LocalFileSystem::DeleteRootDirContents is strictly forbidden");
339 }
340 
DeleteFile(const std::string & path)341 Status LocalFileSystem::DeleteFile(const std::string& path) {
342   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
343   return ::arrow::internal::DeleteFile(fn, /*allow_not_found=*/false).status();
344 }
345 
Move(const std::string & src,const std::string & dest)346 Status LocalFileSystem::Move(const std::string& src, const std::string& dest) {
347   ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
348   ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
349 
350 #ifdef _WIN32
351   if (!MoveFileExW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
352                    MOVEFILE_REPLACE_EXISTING)) {
353     return IOErrorFromWinError(GetLastError(), "Failed renaming '", sfn.ToString(),
354                                "' to '", dfn.ToString(), "'");
355   }
356 #else
357   if (rename(sfn.ToNative().c_str(), dfn.ToNative().c_str()) == -1) {
358     return IOErrorFromErrno(errno, "Failed renaming '", sfn.ToString(), "' to '",
359                             dfn.ToString(), "'");
360   }
361 #endif
362   return Status::OK();
363 }
364 
CopyFile(const std::string & src,const std::string & dest)365 Status LocalFileSystem::CopyFile(const std::string& src, const std::string& dest) {
366   ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
367   ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
368   // XXX should we use fstat() to compare inodes?
369   if (sfn.ToNative() == dfn.ToNative()) {
370     return Status::OK();
371   }
372 
373 #ifdef _WIN32
374   if (!CopyFileW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
375                  FALSE /* bFailIfExists */)) {
376     return IOErrorFromWinError(GetLastError(), "Failed copying '", sfn.ToString(),
377                                "' to '", dfn.ToString(), "'");
378   }
379   return Status::OK();
380 #else
381   ARROW_ASSIGN_OR_RAISE(auto is, OpenInputStream(src));
382   ARROW_ASSIGN_OR_RAISE(auto os, OpenOutputStream(dest));
383   RETURN_NOT_OK(internal::CopyStream(is, os, 1024 * 1024 /* chunk_size */, io_context()));
384   RETURN_NOT_OK(os->Close());
385   return is->Close();
386 #endif
387 }
388 
389 namespace {
390 
391 template <typename InputStreamType>
OpenInputStreamGeneric(const std::string & path,const LocalFileSystemOptions & options,const io::IOContext & io_context)392 Result<std::shared_ptr<InputStreamType>> OpenInputStreamGeneric(
393     const std::string& path, const LocalFileSystemOptions& options,
394     const io::IOContext& io_context) {
395   if (options.use_mmap) {
396     return io::MemoryMappedFile::Open(path, io::FileMode::READ);
397   } else {
398     return io::ReadableFile::Open(path, io_context.pool());
399   }
400 }
401 
402 }  // namespace
403 
OpenInputStream(const std::string & path)404 Result<std::shared_ptr<io::InputStream>> LocalFileSystem::OpenInputStream(
405     const std::string& path) {
406   return OpenInputStreamGeneric<io::InputStream>(path, options_, io_context());
407 }
408 
OpenInputFile(const std::string & path)409 Result<std::shared_ptr<io::RandomAccessFile>> LocalFileSystem::OpenInputFile(
410     const std::string& path) {
411   return OpenInputStreamGeneric<io::RandomAccessFile>(path, options_, io_context());
412 }
413 
414 namespace {
415 
OpenOutputStreamGeneric(const std::string & path,bool truncate,bool append)416 Result<std::shared_ptr<io::OutputStream>> OpenOutputStreamGeneric(const std::string& path,
417                                                                   bool truncate,
418                                                                   bool append) {
419   int fd;
420   bool write_only = true;
421   ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
422   ARROW_ASSIGN_OR_RAISE(
423       fd, ::arrow::internal::FileOpenWritable(fn, write_only, truncate, append));
424   auto maybe_stream = io::FileOutputStream::Open(fd);
425   if (!maybe_stream.ok()) {
426     ARROW_UNUSED(::arrow::internal::FileClose(fd));
427   }
428   return maybe_stream;
429 }
430 
431 }  // namespace
432 
OpenOutputStream(const std::string & path,const std::shared_ptr<const KeyValueMetadata> & metadata)433 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenOutputStream(
434     const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
435   bool truncate = true;
436   bool append = false;
437   return OpenOutputStreamGeneric(path, truncate, append);
438 }
439 
OpenAppendStream(const std::string & path,const std::shared_ptr<const KeyValueMetadata> & metadata)440 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
441     const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
442   bool truncate = false;
443   bool append = true;
444   return OpenOutputStreamGeneric(path, truncate, append);
445 }
446 
447 }  // namespace fs
448 }  // namespace arrow
449