1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18#' @include arrow-package.R 19#' @title FileSystem entry info 20#' @usage NULL 21#' @format NULL 22#' 23#' @section Methods: 24#' 25#' - `base_name()` : The file base name (component after the last directory 26#' separator). 27#' - `extension()` : The file extension 28#' 29#' @section Active bindings: 30#' 31#' - `$type`: The file type 32#' - `$path`: The full file path in the filesystem 33#' - `$size`: The size in bytes, if available. Only regular files are 34#' guaranteed to have a size. 35#' - `$mtime`: The time of last modification, if available. 36#' 37#' @rdname FileInfo 38#' @export 39FileInfo <- R6Class("FileInfo", 40 inherit = ArrowObject, 41 public = list( 42 base_name = function() fs___FileInfo__base_name(self), 43 extension = function() fs___FileInfo__extension(self) 44 ), 45 active = list( 46 type = function(type) { 47 if (missing(type)) { 48 fs___FileInfo__type(self) 49 } else { 50 fs___FileInfo__set_type(self, type) 51 } 52 }, 53 path = function(path) { 54 if (missing(path)) { 55 fs___FileInfo__path(self) 56 } else { 57 invisible(fs___FileInfo__set_path(self)) 58 } 59 }, 60 61 size = function(size) { 62 if (missing(size)) { 63 fs___FileInfo__size(self) 64 } else { 65 invisible(fs___FileInfo__set_size(self, size)) 66 } 67 }, 68 69 mtime = function(time) { 70 if (missing(time)) { 71 fs___FileInfo__mtime(self) 72 } else { 73 if (!inherits(time, "POSIXct") && length(time) == 1L) { 74 abort("invalid time") 75 } 76 invisible(fs___FileInfo__set_mtime(self, time)) 77 } 78 } 79 ) 80) 81 82#' @title file selector 83#' @format NULL 84#' 85#' @section Factory: 86#' 87#' The `$create()` factory method instantiates a `FileSelector` given the 3 fields 88#' described below. 89#' 90#' @section Fields: 91#' 92#' - `base_dir`: The directory in which to select files. If the path exists but 93#' doesn't point to a directory, this should be an error. 94#' - `allow_not_found`: The behavior if `base_dir` doesn't exist in the 95#' filesystem. If `FALSE`, an error is returned. If `TRUE`, an empty 96#' selection is returned 97#' - `recursive`: Whether to recurse into subdirectories. 98#' 99#' @rdname FileSelector 100#' @export 101FileSelector <- R6Class("FileSelector", 102 inherit = ArrowObject, 103 active = list( 104 base_dir = function() fs___FileSelector__base_dir(self), 105 allow_not_found = function() fs___FileSelector__allow_not_found(self), 106 recursive = function() fs___FileSelector__recursive(self) 107 ) 108) 109 110FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = FALSE) { 111 shared_ptr( 112 FileSelector, 113 fs___FileSelector__create(clean_path_rel(base_dir), allow_not_found, recursive) 114 ) 115} 116 117#' @title FileSystem classes 118#' @description `FileSystem` is an abstract file system API, 119#' `LocalFileSystem` is an implementation accessing files 120#' on the local machine. `SubTreeFileSystem` is an implementation that delegates 121#' to another implementation after prepending a fixed base path 122#' 123#' @section Factory: 124#' 125#' The `$create()` factory methods instantiate the `FileSystem` object and 126#' take the following arguments, depending on the subclass: 127#' 128#' - no argument is needed for instantiating a `LocalFileSystem` 129#' - `base_path` and `base_fs` for instantiating a `SubTreeFileSystem` 130#' 131#' @section Methods: 132#' 133#' - `$GetFileInfo(x)`: `x` may be a [FileSelector][FileSelector] or a character 134#' vector of paths. Returns a list of [FileInfo][FileInfo] 135#' - `$CreateDir(path, recursive = TRUE)`: Create a directory and subdirectories. 136#' - `$DeleteDir(path)`: Delete a directory and its contents, recursively. 137#' - `$DeleteDirContents(path)`: Delete a directory's contents, recursively. 138#' Like `$DeleteDir()`, 139#' but doesn't delete the directory itself. Passing an empty path (`""`) will 140#' wipe the entire filesystem tree. 141#' - `$DeleteFile(path)` : Delete a file. 142#' - `$DeleteFiles(paths)` : Delete many files. The default implementation 143#' issues individual delete operations in sequence. 144#' - `$Move(src, dest)`: Move / rename a file or directory. If the destination 145#' exists: 146#' if it is a non-empty directory, an error is returned 147#' otherwise, if it has the same type as the source, it is replaced 148#' otherwise, behavior is unspecified (implementation-dependent). 149#' - `$CopyFile(src, dest)`: Copy a file. If the destination exists and is a 150#' directory, an error is returned. Otherwise, it is replaced. 151#' - `$OpenInputStream(path)`: Open an [input stream][InputStream] for 152#' sequential reading. 153#' - `$OpenInputFile(path)`: Open an [input file][RandomAccessFile] for random 154#' access reading. 155#' - `$OpenOutputStream(path)`: Open an [output stream][OutputStream] for 156#' sequential writing. 157#' - `$OpenAppendStream(path)`: Open an [output stream][OutputStream] for 158#' appending. 159#' 160#' @usage NULL 161#' @format NULL 162#' @docType class 163#' 164#' @rdname FileSystem 165#' @name FileSystem 166#' @export 167FileSystem <- R6Class("FileSystem", inherit = ArrowObject, 168 public = list( 169 ..dispatch = function() { 170 type_name <- self$type_name 171 if (type_name == "local") { 172 shared_ptr(LocalFileSystem, self$pointer()) 173 } else if (type_name == "s3") { 174 shared_ptr(S3FileSystem, self$pointer()) 175 } else if (type_name == "subtree") { 176 shared_ptr(SubTreeFileSystem, self$pointer()) 177 } else { 178 self 179 } 180 }, 181 GetFileInfo = function(x) { 182 if (inherits(x, "FileSelector")) { 183 map( 184 fs___FileSystem__GetTargetInfos_FileSelector(self, x), 185 shared_ptr, 186 class = FileInfo 187 ) 188 } else if (is.character(x)){ 189 map( 190 fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x)), 191 shared_ptr, 192 class = FileInfo 193 ) 194 } else { 195 abort("incompatible type for FileSystem$GetFileInfo()") 196 } 197 }, 198 199 CreateDir = function(path, recursive = TRUE) { 200 fs___FileSystem__CreateDir(self, clean_path_rel(path), isTRUE(recursive)) 201 }, 202 203 DeleteDir = function(path) { 204 fs___FileSystem__DeleteDir(self, clean_path_rel(path)) 205 }, 206 207 DeleteDirContents = function(path) { 208 fs___FileSystem__DeleteDirContents(self, clean_path_rel(path)) 209 }, 210 211 DeleteFile = function(path) { 212 fs___FileSystem__DeleteFile(self, clean_path_rel(path)) 213 }, 214 215 DeleteFiles = function(paths) { 216 fs___FileSystem__DeleteFiles(self, clean_path_rel(paths)) 217 }, 218 219 Move = function(src, dest) { 220 fs___FileSystem__Move(self, clean_path_rel(src), clean_path_rel(dest)) 221 }, 222 223 CopyFile = function(src, dest) { 224 fs___FileSystem__CopyFile(self, clean_path_rel(src), clean_path_rel(dest)) 225 }, 226 227 OpenInputStream = function(path) { 228 shared_ptr(InputStream, fs___FileSystem__OpenInputStream(self, clean_path_rel(path))) 229 }, 230 OpenInputFile = function(path) { 231 shared_ptr(InputStream, fs___FileSystem__OpenInputFile(self, clean_path_rel(path))) 232 }, 233 OpenOutputStream = function(path) { 234 shared_ptr(OutputStream, fs___FileSystem__OpenOutputStream(self, clean_path_rel(path))) 235 }, 236 OpenAppendStream = function(path) { 237 shared_ptr(OutputStream, fs___FileSystem__OpenAppendStream(self, clean_path_rel(path))) 238 } 239 ), 240 active = list( 241 type_name = function() fs___FileSystem__type_name(self) 242 ) 243) 244FileSystem$from_uri <- function(uri) { 245 out <- fs___FileSystemFromUri(uri) 246 out$fs <- shared_ptr(FileSystem, out$fs)$..dispatch() 247 out 248} 249 250#' @usage NULL 251#' @format NULL 252#' @rdname FileSystem 253#' @export 254LocalFileSystem <- R6Class("LocalFileSystem", inherit = FileSystem) 255LocalFileSystem$create <- function() { 256 shared_ptr(LocalFileSystem, fs___LocalFileSystem__create()) 257} 258 259#' @usage NULL 260#' @format NULL 261#' @rdname FileSystem 262#' @export 263S3FileSystem <- R6Class("S3FileSystem", inherit = FileSystem) 264S3FileSystem$create <- function() { 265 fs___EnsureS3Initialized() 266 shared_ptr(S3FileSystem, fs___S3FileSystem__create()) 267} 268 269arrow_with_s3 <- function() { 270 .Call(`_s3_available`) 271} 272 273#' @usage NULL 274#' @format NULL 275#' @rdname FileSystem 276#' @export 277SubTreeFileSystem <- R6Class("SubTreeFileSystem", inherit = FileSystem) 278SubTreeFileSystem$create <- function(base_path, base_fs) { 279 xp <- fs___SubTreeFileSystem__create(clean_path_rel(base_path), base_fs) 280 shared_ptr(SubTreeFileSystem, xp) 281} 282 283clean_path_abs <- function(path) { 284 # Make sure we have a valid, absolute, forward-slashed path for passing to Arrow 285 normalizePath(path, winslash = "/", mustWork = FALSE) 286} 287 288clean_path_rel <- function(path) { 289 # Make sure all path separators are "/", not "\" as on Windows 290 path_sep <- ifelse(tolower(Sys.info()[["sysname"]]) == "windows", "\\\\", "/") 291 gsub(path_sep, "/", path) 292} 293