1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18#' @include arrow-package.R
19#' @title FileSystem entry info
20#' @usage NULL
21#' @format NULL
22#'
23#' @section Methods:
24#'
25#' - `base_name()` : The file base name (component after the last directory
26#'    separator).
27#' - `extension()` : The file extension
28#'
29#' @section Active bindings:
30#'
31#' - `$type`: The file type
32#' - `$path`: The full file path in the filesystem
33#' - `$size`: The size in bytes, if available.  Only regular files are
34#'    guaranteed to have a size.
35#' - `$mtime`: The time of last modification, if available.
36#'
37#' @rdname FileInfo
38#' @export
39FileInfo <- R6Class("FileInfo",
40  inherit = ArrowObject,
41  public = list(
42    base_name = function() fs___FileInfo__base_name(self),
43    extension = function() fs___FileInfo__extension(self)
44  ),
45  active = list(
46    type = function(type) {
47      if (missing(type)) {
48        fs___FileInfo__type(self)
49      } else {
50        fs___FileInfo__set_type(self, type)
51      }
52    },
53    path = function(path) {
54      if (missing(path)) {
55        fs___FileInfo__path(self)
56      } else {
57        invisible(fs___FileInfo__set_path(self))
58      }
59    },
60
61    size = function(size) {
62      if (missing(size)) {
63        fs___FileInfo__size(self)
64      } else {
65        invisible(fs___FileInfo__set_size(self, size))
66      }
67    },
68
69    mtime = function(time) {
70      if (missing(time)) {
71        fs___FileInfo__mtime(self)
72      } else {
73        if (!inherits(time, "POSIXct") && length(time) == 1L) {
74          abort("invalid time")
75        }
76        invisible(fs___FileInfo__set_mtime(self, time))
77      }
78    }
79  )
80)
81
82#' @title file selector
83#' @format NULL
84#'
85#' @section Factory:
86#'
87#' The `$create()` factory method instantiates a `FileSelector` given the 3 fields
88#' described below.
89#'
90#' @section Fields:
91#'
92#' - `base_dir`: The directory in which to select files. If the path exists but
93#'    doesn't point to a directory, this should be an error.
94#' - `allow_not_found`: The behavior if `base_dir` doesn't exist in the
95#'    filesystem. If `FALSE`, an error is returned.  If `TRUE`, an empty
96#'    selection is returned
97#' - `recursive`: Whether to recurse into subdirectories.
98#'
99#' @rdname FileSelector
100#' @export
101FileSelector <- R6Class("FileSelector",
102  inherit = ArrowObject,
103  active = list(
104    base_dir = function() fs___FileSelector__base_dir(self),
105    allow_not_found = function() fs___FileSelector__allow_not_found(self),
106    recursive = function() fs___FileSelector__recursive(self)
107  )
108)
109
110FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = FALSE) {
111  shared_ptr(
112    FileSelector,
113    fs___FileSelector__create(clean_path_rel(base_dir), allow_not_found, recursive)
114  )
115}
116
117#' @title FileSystem classes
118#' @description `FileSystem` is an abstract file system API,
119#' `LocalFileSystem` is an implementation accessing files
120#' on the local machine. `SubTreeFileSystem` is an implementation that delegates
121#' to another implementation after prepending a fixed base path
122#'
123#' @section Factory:
124#'
125#' The `$create()` factory methods instantiate the `FileSystem` object and
126#' take the following arguments, depending on the subclass:
127#'
128#' - no argument is needed for instantiating a `LocalFileSystem`
129#' - `base_path` and `base_fs` for instantiating a `SubTreeFileSystem`
130#'
131#' @section Methods:
132#'
133#' - `$GetFileInfo(x)`: `x` may be a [FileSelector][FileSelector] or a character
134#'    vector of paths. Returns a list of [FileInfo][FileInfo]
135#' - `$CreateDir(path, recursive = TRUE)`: Create a directory and subdirectories.
136#' - `$DeleteDir(path)`: Delete a directory and its contents, recursively.
137#' - `$DeleteDirContents(path)`: Delete a directory's contents, recursively.
138#'    Like `$DeleteDir()`,
139#'    but doesn't delete the directory itself. Passing an empty path (`""`) will
140#'    wipe the entire filesystem tree.
141#' - `$DeleteFile(path)` : Delete a file.
142#' - `$DeleteFiles(paths)` : Delete many files. The default implementation
143#'    issues individual delete operations in sequence.
144#' - `$Move(src, dest)`: Move / rename a file or directory. If the destination
145#'    exists:
146#'      if it is a non-empty directory, an error is returned
147#'      otherwise, if it has the same type as the source, it is replaced
148#'      otherwise, behavior is unspecified (implementation-dependent).
149#' - `$CopyFile(src, dest)`: Copy a file. If the destination exists and is a
150#'    directory, an error is returned. Otherwise, it is replaced.
151#' - `$OpenInputStream(path)`: Open an [input stream][InputStream] for
152#'    sequential reading.
153#' - `$OpenInputFile(path)`: Open an [input file][RandomAccessFile] for random
154#'    access reading.
155#' - `$OpenOutputStream(path)`: Open an [output stream][OutputStream] for
156#'    sequential writing.
157#' - `$OpenAppendStream(path)`: Open an [output stream][OutputStream] for
158#'    appending.
159#'
160#' @usage NULL
161#' @format NULL
162#' @docType class
163#'
164#' @rdname FileSystem
165#' @name FileSystem
166#' @export
167FileSystem <- R6Class("FileSystem", inherit = ArrowObject,
168  public = list(
169    ..dispatch = function() {
170      type_name <- self$type_name
171      if (type_name == "local") {
172        shared_ptr(LocalFileSystem, self$pointer())
173      } else if (type_name == "s3") {
174        shared_ptr(S3FileSystem, self$pointer())
175      } else if (type_name == "subtree") {
176        shared_ptr(SubTreeFileSystem, self$pointer())
177      } else {
178        self
179      }
180    },
181    GetFileInfo = function(x) {
182      if (inherits(x, "FileSelector")) {
183        map(
184          fs___FileSystem__GetTargetInfos_FileSelector(self, x),
185          shared_ptr,
186          class = FileInfo
187        )
188      } else if (is.character(x)){
189        map(
190          fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x)),
191          shared_ptr,
192          class = FileInfo
193        )
194      } else {
195        abort("incompatible type for FileSystem$GetFileInfo()")
196      }
197    },
198
199    CreateDir = function(path, recursive = TRUE) {
200      fs___FileSystem__CreateDir(self, clean_path_rel(path), isTRUE(recursive))
201    },
202
203    DeleteDir = function(path) {
204      fs___FileSystem__DeleteDir(self, clean_path_rel(path))
205    },
206
207    DeleteDirContents = function(path) {
208      fs___FileSystem__DeleteDirContents(self, clean_path_rel(path))
209    },
210
211    DeleteFile = function(path) {
212      fs___FileSystem__DeleteFile(self, clean_path_rel(path))
213    },
214
215    DeleteFiles = function(paths) {
216      fs___FileSystem__DeleteFiles(self, clean_path_rel(paths))
217    },
218
219    Move = function(src, dest) {
220      fs___FileSystem__Move(self, clean_path_rel(src), clean_path_rel(dest))
221    },
222
223    CopyFile = function(src, dest) {
224      fs___FileSystem__CopyFile(self, clean_path_rel(src), clean_path_rel(dest))
225    },
226
227    OpenInputStream = function(path) {
228      shared_ptr(InputStream, fs___FileSystem__OpenInputStream(self, clean_path_rel(path)))
229    },
230    OpenInputFile = function(path) {
231      shared_ptr(InputStream, fs___FileSystem__OpenInputFile(self, clean_path_rel(path)))
232    },
233    OpenOutputStream = function(path) {
234      shared_ptr(OutputStream, fs___FileSystem__OpenOutputStream(self, clean_path_rel(path)))
235    },
236    OpenAppendStream = function(path) {
237      shared_ptr(OutputStream, fs___FileSystem__OpenAppendStream(self, clean_path_rel(path)))
238    }
239  ),
240  active = list(
241    type_name = function() fs___FileSystem__type_name(self)
242  )
243)
244FileSystem$from_uri <- function(uri) {
245  out <- fs___FileSystemFromUri(uri)
246  out$fs <- shared_ptr(FileSystem, out$fs)$..dispatch()
247  out
248}
249
250#' @usage NULL
251#' @format NULL
252#' @rdname FileSystem
253#' @export
254LocalFileSystem <- R6Class("LocalFileSystem", inherit = FileSystem)
255LocalFileSystem$create <- function() {
256  shared_ptr(LocalFileSystem, fs___LocalFileSystem__create())
257}
258
259#' @usage NULL
260#' @format NULL
261#' @rdname FileSystem
262#' @export
263S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem)
264S3FileSystem$create <- function() {
265  fs___EnsureS3Initialized()
266  shared_ptr(S3FileSystem, fs___S3FileSystem__create())
267}
268
269arrow_with_s3 <- function() {
270  .Call(`_s3_available`)
271}
272
273#' @usage NULL
274#' @format NULL
275#' @rdname FileSystem
276#' @export
277SubTreeFileSystem <- R6Class("SubTreeFileSystem", inherit = FileSystem)
278SubTreeFileSystem$create <- function(base_path, base_fs) {
279  xp <- fs___SubTreeFileSystem__create(clean_path_rel(base_path), base_fs)
280  shared_ptr(SubTreeFileSystem, xp)
281}
282
283clean_path_abs <- function(path) {
284  # Make sure we have a valid, absolute, forward-slashed path for passing to Arrow
285  normalizePath(path, winslash = "/", mustWork = FALSE)
286}
287
288clean_path_rel <- function(path) {
289  # Make sure all path separators are "/", not "\" as on Windows
290  path_sep <- ifelse(tolower(Sys.info()[["sysname"]]) == "windows", "\\\\", "/")
291  gsub(path_sep, "/", path)
292}
293