1#' Create a file cache object 2#' 3#' This creates a file cache which is to be used by sass for caching generated 4#' .css files. 5#' 6#' @param dir The directory in which to store the cached files. 7#' @param max_size The maximum size of the cache, in bytes. If the cache grows 8#' past this size, the least-recently-used objects will be removed until it 9#' fits within this size. 10#' @param max_age The maximum age of objects in the cache, in seconds. The 11#' default is one week. 12#' 13#' @seealso [sass_cache_get()], [sass_cache_context_dir()], [FileCache] 14#' @return A [FileCache] object. 15#' 16#' @examples 17#' \dontrun{ 18#' # Create a cache with the default settings 19#' cache <- sass_file_cache(sass_cache_context_dir()) 20#' 21#' # Clear the cache 22#' cache$reset() 23#' } 24#' 25#' @export 26sass_file_cache <- function( 27 dir, 28 max_size = 40 * 1024 ^ 2, 29 max_age = Inf 30) { 31 FileCache$new(dir, max_size = max_size, max_age = max_age) 32} 33 34#' Create a file cache object 35#' 36#' A file cache object is a key-file store that saves the values as files in a 37#' directory on disk. The objects are files on disk. They are stored and 38#' retrieved using the `get_file()`, `get_content()`, `set_file()`, and 39#' `set_content()` methods. Objects are automatically pruned from the cache 40#' according to the parameters `max_size`, `max_age`, `max_n`, and `evict`. 41#' 42#' @section Cache pruning: 43#' 44#' Cache pruning occurs when `set_file()` or `set_content()` is called, or it 45#' can be invoked manually by calling `prune()`. 46#' 47#' The disk cache will throttle the pruning so that it does not happen on 48#' every call to `set_file()` or `set_content()`, because the filesystem 49#' operations for checking the status of files can be slow. Instead, it will 50#' prune once in every 20 calls to `set_file()` or `set_content()`, or if at 51#' least 5 seconds have elapsed since the last prune occurred, whichever is 52#' first. These parameters are currently not customizable, but may be in the 53#' future. 54#' 55#' When a pruning occurs, if there are any objects that are older than 56#' `max_age`, they will be removed. 57#' 58#' The `max_size` and `max_n` parameters are applied to the cache as a whole, 59#' in contrast to `max_age`, which is applied to each object individually. 60#' 61#' If the number of objects in the cache exceeds `max_n`, then objects will be 62#' removed from the cache according to the eviction policy, which is set with 63#' the `evict` parameter. Objects will be removed so that the number of items 64#' is `max_n`. 65#' 66#' If the size of the objects in the cache exceeds `max_size`, then objects 67#' will be removed from the cache. Objects will be removed from the cache so 68#' that the total size remains under `max_size`. Note that the size is 69#' calculated using the size of the files, not the size of disk space used by 70#' the files --- these two values can differ because of files are stored in 71#' blocks on disk. For example, if the block size is 4096 bytes, then a file 72#' that is one byte in size will take 4096 bytes on disk. 73#' 74#' Another time that objects can be removed from the cache is when 75#' `get_file()` or `get_content()` is called. If the target object is older 76#' than `max_age`, it will be removed and the cache will report it as a 77#' missing value. 78#' 79#' @section Eviction policies: 80#' 81#' If `max_n` or `max_size` are used, then objects will be removed from the 82#' cache according to an eviction policy. The available eviction policies are: 83#' 84#' \describe{ \item{`"lru"`}{ Least Recently Used. The least recently used 85#' objects will be removed. This uses the filesystem's mtime property. When 86#' "lru" is used, each time `get_file()` or `get_content()` is called, it will 87#' update the file's mtime. } \item{`"fifo"`}{ First-in-first-out. The oldest 88#' objects will be removed. } } 89#' 90#' Both of these policies use files' mtime. Note that some filesystems 91#' (notably FAT) have poor mtime resolution. (atime is not used because 92#' support for atime is worse than mtime.) 93#' 94#' @section Sharing among multiple processes: 95#' 96#' The directory for a FileCache can be shared among multiple R processes. To 97#' do this, each R process should have a FileCache object that uses the same 98#' directory. Each FileCache will do pruning independently of the others, so 99#' if they have different pruning parameters, then one FileCache may remove 100#' cached objects before another FileCache would do so. 101#' 102#' Even though it is possible for multiple processes to share a FileCache 103#' directory, this should not be done on networked file systems, because of 104#' slow performance of networked file systems can cause problems. If you need 105#' a high-performance shared cache, you can use one built on a database like 106#' Redis, SQLite, mySQL, or similar. 107#' 108#' When multiple processes share a cache directory, there are some potential 109#' race conditions. For example, if your code calls `exists(key)` to check if 110#' an object is in the cache, and then call `get_file(key)`, the object may be 111#' removed from the cache in between those two calls, and `get_file(key)` will 112#' throw an error. Instead of calling the two functions, it is better to 113#' simply call `get_file(key)`, and use `tryCatch()` to handle the error that 114#' is thrown if the object is not in the cache. This effectively tests for 115#' existence and gets the object in one operation. 116#' 117#' It is also possible for one processes to prune objects at the same time 118#' that another processes is trying to prune objects. If this happens, you may 119#' see a warning from `file.remove()` failing to remove a file that has 120#' already been deleted. 121#' 122#' @keywords internal 123#' @importFrom R6 R6Class 124FileCache <- R6::R6Class("FileCache", 125 public = list( 126 #' @description Create a FileCache object. 127 #' @param dir Directory to store files for the cache. If `NULL` (the default) it 128 #' will create and use a temporary directory. 129 #' @param max_age Maximum age of files in cache before they are evicted, in 130 #' seconds. Use `Inf` for no age limit. 131 #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds 132 #' this size, cached objects will be removed according to the value of the 133 #' `evict`. Use `Inf` for no size limit. 134 #' @param max_n Maximum number of objects in the cache. If the number of objects 135 #' exceeds this value, then cached objects will be removed according to the 136 #' value of `evict`. Use `Inf` for no limit of number of items. 137 #' @param evict The eviction policy to use to decide which objects are removed 138 #' when a cache pruning occurs. Currently, `"lru"` and `"fifo"` are supported. 139 #' @param destroy_on_finalize If `TRUE`, then when the FileCache object is 140 #' garbage collected, the cache directory and all objects inside of it will be 141 #' deleted from disk. If `FALSE` (the default), it will do nothing when 142 #' finalized. 143 #' @param logfile An optional filename or connection object to where logging 144 #' information will be written. To log to the console, use `stdout()`. 145 initialize = function( 146 dir = NULL, 147 max_size = 40 * 1024 ^ 2, 148 max_age = Inf, 149 max_n = Inf, 150 evict = c("lru", "fifo"), 151 destroy_on_finalize = FALSE, 152 logfile = NULL 153 ) { 154 if (is.null(dir)) { 155 dir <- tempfile("FileCache-") 156 } 157 if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.") 158 if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.") 159 if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.") 160 161 if (!dir.exists(dir)) { 162 private$log(paste0("initialize: Creating ", dir)) 163 dir.create(dir, recursive = TRUE) 164 } 165 166 private$dir_ <- normalizePath(dir, mustWork = TRUE) 167 private$max_size <- max_size 168 private$max_age <- max_age 169 private$max_n <- max_n 170 private$evict <- match.arg(evict) 171 private$destroy_on_finalize <- destroy_on_finalize 172 private$logfile <- logfile 173 174 private$prune_last_time <- as.numeric(Sys.time()) 175 }, 176 177 #' @description Get the content associated with `key`, and save in a file 178 #' named `outfile`. 179 #' @return `TRUE` if the object is found in the cache and copying succeeds, 180 #' `FALSE` otherwise. 181 #' @param outfile Name of output file. If `NULL`, return the content as 182 #' @param key Key. Must be lowercase numbers and letters. 183 #' @param overwrite If the output file already exists, should it be 184 #' overwritten? 185 get_file = function(key, outfile, overwrite = TRUE) { 186 private$log(paste0('get: key "', key, '"')) 187 self$is_destroyed(throw = TRUE) 188 validate_key(key) 189 if (!is.character(outfile) || length(outfile) != 1) { 190 stop("`outfile` must be a length-1 character vector.") 191 } 192 193 private$maybe_prune_single(key) 194 195 cache_file <- private$filename_full_path(key) 196 197 if (file.copy(cache_file, outfile, overwrite = overwrite)) { 198 private$log(paste0('get: key "', key, '" found and copied to ', outfile)) 199 if (private$evict == "lru"){ 200 Sys.setFileTime(cache_file, Sys.time()) 201 } 202 return(TRUE) 203 } 204 205 private$log(paste0('get: key "', key, '" is missing')) 206 FALSE 207 }, 208 209 #' @description Get the content associated with `key`, and return as either 210 #' string or a raw vector. 211 #' @return A character or raw vector if the object is found in the cache, 212 #' `NULL` otherwise. 213 #' @param key Key. Must be lowercase numbers and letters. 214 #' @param mode If `"text"`, return the content as a UTF-8-encoded text 215 #' string (a one element char vector). If `"raw"`, return the content as a 216 #' raw vector. 217 get_content = function(key, mode = c("text", "raw")) { 218 private$log(paste0('get_content: key "', key, '"')) 219 self$is_destroyed(throw = TRUE) 220 validate_key(key) 221 mode <- match.arg(mode) 222 223 private$maybe_prune_single(key) 224 225 cache_file <- private$filename_full_path(key) 226 227 errored <- FALSE 228 tryCatch( 229 { 230 if (mode == "text") { 231 result <- read_utf8(cache_file) 232 } else if (mode == "raw") { 233 result <- read_raw(cache_file) 234 } 235 if (private$evict == "lru"){ 236 Sys.setFileTime(cache_file, Sys.time()) 237 } 238 }, 239 error = function(e) { errored <<- TRUE } 240 ) 241 242 if (errored) { 243 private$log(paste0('get_content: key "', key, '" is missing')) 244 return(NULL) 245 } 246 247 result 248 }, 249 250 #' @description Sets content associated with `key`, from a file named 251 #' `infile`. 252 #' @return `TRUE` if copying the file into the cache succeeds, `FALSE` 253 #' otherwise. 254 #' @param key Key. Must be lowercase numbers and letters. 255 #' @param infile Name of input file. 256 set_file = function(key, infile) { 257 private$log(paste0('set: key "', key, '"')) 258 self$is_destroyed(throw = TRUE) 259 validate_key(key) 260 if (!is.character(infile) || length(infile) != 1) { 261 stop("`infile` must be a length-1 character vector.") 262 } 263 infile <- normalizePath(infile) 264 265 cache_file <- private$filename_full_path(key) 266 267 success <- file.copy(infile, cache_file, overwrite = TRUE) 268 if (success) { 269 private$log(paste0('set: key "', key, ' from file ', infile)) 270 } else { 271 private$log(paste0('set: error setting key "', key, ' from file ', infile)) 272 } 273 274 private$prune_throttled() 275 276 success 277 }, 278 279 #' @description Sets content associated with `key`, from a single-element 280 #' vector. 281 #' @return `TRUE` if setting the content in the cache succeeds, `FALSE` 282 #' otherwise. 283 #' @param key Key. Must be lowercase numbers and letters. 284 #' @param content A character or raw vector. If it is a character vector, 285 #' it will be written with UTF-8 encoding, with with elements collapsed 286 #' with `\\n` (consistent across platforms). 287 set_content = function(key, content) { 288 private$log(paste0('set_content: key "', key, '"')) 289 self$is_destroyed(throw = TRUE) 290 validate_key(key) 291 if (!is.character(content) && !is.raw(content)) { 292 stop("`content` must be a character or raw vector.") 293 } 294 cache_file <- private$filename_full_path(key) 295 296 success <- TRUE 297 tryCatch( 298 { 299 if (is.character(content)) { 300 write_utf8(content, cache_file) 301 } else if (is.raw(content)) { 302 writeBin(content, cache_file) 303 } 304 }, 305 error = function(e) { success <<- FALSE } 306 ) 307 if (!success) { 308 private$log(paste0('set_content: error setting key "', key, '"')) 309 } 310 311 private$prune_throttled() 312 313 success 314 }, 315 316 #' @description Check if content associated with `key` exists in cache 317 #' @param key Key. Must be lowercase numbers and letters. 318 #' @return `TRUE` if the object is in the cache, `FALSE` otherwise. 319 exists = function(key) { 320 self$is_destroyed(throw = TRUE) 321 validate_key(key) 322 file.exists(private$filename_full_path(key)) 323 }, 324 325 #' @description Get all keys 326 #' @return A character vector of all keys currently in the cache. 327 keys = function() { 328 self$is_destroyed(throw = TRUE) 329 dir(private$dir_) 330 }, 331 332 #' @description Remove an object 333 #' @param key Key. Must be lowercase numbers and letters. 334 #' @return `TRUE` if the object was found and successfully removed, `FALSE` 335 #' otherwise. 336 remove = function(key) { 337 private$log(paste0('remove: key "', key, '"')) 338 self$is_destroyed(throw = TRUE) 339 validate_key(key) 340 file.remove(private$filename_full_path(key)) 341 }, 342 343 #' @description Clear all objects from the cache. 344 reset = function() { 345 private$log(paste0('reset')) 346 self$is_destroyed(throw = TRUE) 347 file.remove(dir(private$dir_, full.names = TRUE)) 348 invisible(self) 349 }, 350 351 #' @description Returns the directory used for the cache. 352 dir = function() { 353 private$dir_ 354 }, 355 356 #' @description Prune the cache, using the parameters specified by 357 #' `max_size`, `max_age`, `max_n`, and `evict`. 358 prune = function() { 359 # TODO: It would be good to add parameters `n` and `size`, so that the 360 # cache can be pruned to `max_n - n` and `max_size - size` before adding 361 # an object. Right now we prune after adding the object, so the cache 362 # can temporarily grow past the limits. The reason we don't do this now 363 # is because it is expensive to find the size of the serialized object 364 # before adding it. 365 366 private$log('prune') 367 self$is_destroyed(throw = TRUE) 368 369 current_time <- Sys.time() 370 371 filenames <- dir(private$dir_, full.names = TRUE) 372 info <- file.info(filenames) 373 info <- info[info$isdir == FALSE, ] 374 info$name <- rownames(info) 375 rownames(info) <- NULL 376 # Files could be removed between the dir() and file.info() calls. The 377 # entire row for such files will have NA values. Remove those rows. 378 info <- info[!is.na(info$size), ] 379 380 # 1. Remove any files where the age exceeds max age. 381 if (is.finite(private$max_age)) { 382 timediff <- as.numeric(current_time - info$mtime, units = "secs") 383 rm_idx <- timediff > private$max_age 384 if (any(rm_idx)) { 385 private$log(paste0("prune max_age: Removing ", paste(info$name[rm_idx], collapse = ", "))) 386 rm_success <- file.remove(info$name[rm_idx]) 387 # This maps rm_success back into the TRUEs in the rm_idx vector. 388 # If (for example) rm_idx is c(F,T,F,T,T) and rm_success is c(T,F,T), 389 # then this line modifies rm_idx to be c(F,T,F,F,T). 390 rm_idx[rm_idx] <- rm_success 391 info <- info[!rm_idx, ] 392 } 393 } 394 395 # Sort objects by priority. The sorting is done in a function which can be 396 # called multiple times but only does the work the first time. 397 info_is_sorted <- FALSE 398 ensure_info_is_sorted <- function() { 399 if (info_is_sorted) return() 400 401 info <<- info[order(info$mtime, decreasing = TRUE), ] 402 info_is_sorted <<- TRUE 403 } 404 405 # 2. Remove files if there are too many. 406 if (is.finite(private$max_n) && nrow(info) > private$max_n) { 407 ensure_info_is_sorted() 408 rm_idx <- seq_len(nrow(info)) > private$max_n 409 private$log(paste0("prune max_n: Removing ", paste(info$name[rm_idx], collapse = ", "))) 410 rm_success <- file.remove(info$name[rm_idx]) 411 rm_idx[rm_idx] <- rm_success 412 info <- info[!rm_idx, ] 413 } 414 415 # 3. Remove files if cache is too large. 416 if (is.finite(private$max_size) && sum(info$size) > private$max_size) { 417 ensure_info_is_sorted() 418 cum_size <- cumsum(info$size) 419 rm_idx <- cum_size > private$max_size 420 private$log(paste0("prune max_size: Removing ", paste(info$name[rm_idx], collapse = ", "))) 421 rm_success <- file.remove(info$name[rm_idx]) 422 rm_idx[rm_idx] <- rm_success 423 info <- info[!rm_idx, ] 424 } 425 426 private$prune_last_time <- as.numeric(current_time) 427 428 invisible(self) 429 }, 430 431 #' @description Return the number of items currently in the cache. 432 size = function() { 433 self$is_destroyed(throw = TRUE) 434 length(dir(private$dir_)) 435 }, 436 437 #' @description Clears all objects in the cache, and removes the cache 438 #' directory from disk. 439 destroy = function() { 440 if (self$is_destroyed()) { 441 return(invisible(self)) 442 } 443 444 private$log(paste0("destroy: Removing ", private$dir_)) 445 # First create a sentinel file so that other processes sharing this 446 # cache know that the cache is to be destroyed. This is needed because 447 # the recursive unlink is not atomic: another process can add a file to 448 # the directory after unlink starts removing files but before it removes 449 # the directory, and when that happens, the directory removal will fail. 450 file.create(file.path(private$dir_, "._destroyed__")) 451 # Remove all the cache files. This will not remove the sentinel file. 452 file.remove(dir(private$dir_, full.names = TRUE)) 453 # Next remove dir recursively, including sentinel file. 454 unlink(private$dir_, recursive = TRUE) 455 private$destroyed <- TRUE 456 invisible(self) 457 }, 458 459 #' @description Reports whether the cache has been destroyed. 460 #' @param throw Should this function throw an error if the cache has been 461 #' destroyed? 462 is_destroyed = function(throw = FALSE) { 463 if (!dir.exists(private$dir_) || 464 file.exists(file.path(private$dir_, "._destroyed__"))) 465 { 466 # It's possible for another process to destroy a shared cache directory 467 private$destroyed <- TRUE 468 } 469 470 if (throw) { 471 if (private$destroyed) { 472 stop("Attempted to use cache which has been destroyed:\n ", private$dir_) 473 } 474 475 } else { 476 private$destroyed 477 } 478 }, 479 480 #' @description A finalizer for the cache. 481 finalize = function() { 482 if (private$destroy_on_finalize) { 483 self$destroy() 484 } 485 } 486 ), 487 488 private = list( 489 dir_ = NULL, 490 max_age = NULL, 491 max_size = NULL, 492 max_n = NULL, 493 evict = NULL, 494 destroy_on_finalize = NULL, 495 destroyed = FALSE, 496 logfile = NULL, 497 498 prune_throttle_counter = 0, 499 prune_last_time = NULL, 500 501 filename_full_path = function(filename) { 502 file.path(private$dir_, filename) 503 }, 504 505 # A wrapper for prune() that throttles it, because prune() can be 506 # expensive due to filesystem operations. This function will prune only 507 # once every 20 times it is called, or if it has been more than 5 seconds 508 # since the last time the cache was actually pruned, whichever is first. 509 # In the future, the behavior may be customizable. 510 prune_throttled = function() { 511 # Count the number of times prune() has been called. 512 private$prune_throttle_counter <- private$prune_throttle_counter + 1 513 514 if (private$prune_throttle_counter > 20 || 515 private$prune_last_time - as.numeric(Sys.time()) > 5) 516 { 517 self$prune() 518 private$prune_throttle_counter <- 0 519 } 520 }, 521 522 # Prunes a single object if it exceeds max_age. If the object does not 523 # exceed max_age, or if the object doesn't exist, do nothing. 524 maybe_prune_single = function(key) { 525 obj <- private$cache[[key]] 526 if (is.null(obj)) return() 527 528 timediff <- as.numeric(Sys.time()) - obj$mtime 529 if (timediff > private$max_age) { 530 private$log(paste0("pruning single object exceeding max_age: Removing ", key)) 531 rm(list = key, envir = private$cache) 532 } 533 }, 534 535 log = function(text) { 536 if (is.null(private$logfile)) return() 537 538 text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] FileCache "), text) 539 cat(text, sep = "\n", file = private$logfile, append = TRUE) 540 } 541 ) 542) 543 544 545validate_key <- function(key) { 546 if (!is.character(key) || length(key) != 1 || nchar(key) == 0) { 547 stop("Invalid key: key must be single non-empty string.") 548 } 549 if (grepl("[^a-z0-9]", key)) { 550 stop("Invalid key: ", key, ". Only lowercase letters and numbers are allowed.") 551 } 552} 553