1#' Create a file cache object
2#'
3#' This creates a file cache which is to be used by sass for caching generated
4#' .css files.
5#'
6#' @param dir The directory in which to store the cached files.
7#' @param max_size The maximum size of the cache, in bytes. If the cache grows
8#'   past this size, the least-recently-used objects will be removed until it
9#'   fits within this size.
10#' @param max_age The maximum age of objects in the cache, in seconds. The
11#'   default is one week.
12#'
13#' @seealso [sass_cache_get()], [sass_cache_context_dir()], [FileCache]
14#' @return A [FileCache] object.
15#'
16#' @examples
17#' \dontrun{
18#' # Create a cache with the default settings
19#' cache <- sass_file_cache(sass_cache_context_dir())
20#'
21#' # Clear the cache
22#' cache$reset()
23#' }
24#'
25#' @export
26sass_file_cache <- function(
27  dir,
28  max_size = 40 * 1024 ^ 2,
29  max_age = Inf
30) {
31  FileCache$new(dir, max_size = max_size, max_age = max_age)
32}
33
34#' Create a file cache object
35#'
36#' A file cache object is a key-file store that saves the values as files in a
37#' directory on disk. The objects are files on disk. They are stored and
38#' retrieved using the `get_file()`, `get_content()`, `set_file()`, and
39#' `set_content()` methods. Objects are automatically pruned from the cache
40#' according to the parameters `max_size`, `max_age`, `max_n`, and `evict`.
41#'
42#' @section Cache pruning:
43#'
44#'   Cache pruning occurs when `set_file()` or `set_content()` is called, or it
45#'   can be invoked manually by calling `prune()`.
46#'
47#'   The disk cache will throttle the pruning so that it does not happen on
48#'   every call to `set_file()` or `set_content()`, because the filesystem
49#'   operations for checking the status of files can be slow. Instead, it will
50#'   prune once in every 20 calls to `set_file()` or `set_content()`, or if at
51#'   least 5 seconds have elapsed since the last prune occurred, whichever is
52#'   first. These parameters are currently not customizable, but may be in the
53#'   future.
54#'
55#'   When a pruning occurs, if there are any objects that are older than
56#'   `max_age`, they will be removed.
57#'
58#'   The `max_size` and `max_n` parameters are applied to the cache as a whole,
59#'   in contrast to `max_age`, which is applied to each object individually.
60#'
61#'   If the number of objects in the cache exceeds `max_n`, then objects will be
62#'   removed from the cache according to the eviction policy, which is set with
63#'   the `evict` parameter. Objects will be removed so that the number of items
64#'   is `max_n`.
65#'
66#'   If the size of the objects in the cache exceeds `max_size`, then objects
67#'   will be removed from the cache. Objects will be removed from the cache so
68#'   that the total size remains under `max_size`. Note that the size is
69#'   calculated using the size of the files, not the size of disk space used by
70#'   the files --- these two values can differ because of files are stored in
71#'   blocks on disk. For example, if the block size is 4096 bytes, then a file
72#'   that is one byte in size will take 4096 bytes on disk.
73#'
74#'   Another time that objects can be removed from the cache is when
75#'   `get_file()` or `get_content()` is called. If the target object is older
76#'   than `max_age`, it will be removed and the cache will report it as a
77#'   missing value.
78#'
79#' @section Eviction policies:
80#'
81#'   If `max_n` or `max_size` are used, then objects will be removed from the
82#'   cache according to an eviction policy. The available eviction policies are:
83#'
84#'   \describe{ \item{`"lru"`}{ Least Recently Used. The least recently used
85#'   objects will be removed. This uses the filesystem's mtime property. When
86#'   "lru" is used, each time `get_file()` or `get_content()` is called, it will
87#'   update the file's mtime. } \item{`"fifo"`}{ First-in-first-out. The oldest
88#'   objects will be removed. } }
89#'
90#'   Both of these policies use files' mtime. Note that some filesystems
91#'   (notably FAT) have poor mtime resolution. (atime is not used because
92#'   support for atime is worse than mtime.)
93#'
94#' @section Sharing among multiple processes:
95#'
96#'   The directory for a FileCache can be shared among multiple R processes. To
97#'   do this, each R process should have a FileCache object that uses the same
98#'   directory. Each FileCache will do pruning independently of the others, so
99#'   if they have different pruning parameters, then one FileCache may remove
100#'   cached objects before another FileCache would do so.
101#'
102#'   Even though it is possible for multiple processes to share a FileCache
103#'   directory, this should not be done on networked file systems, because of
104#'   slow performance of networked file systems can cause problems. If you need
105#'   a high-performance shared cache, you can use one built on a database like
106#'   Redis, SQLite, mySQL, or similar.
107#'
108#'   When multiple processes share a cache directory, there are some potential
109#'   race conditions. For example, if your code calls `exists(key)` to check if
110#'   an object is in the cache, and then call `get_file(key)`, the object may be
111#'   removed from the cache in between those two calls, and `get_file(key)` will
112#'   throw an error. Instead of calling the two functions, it is better to
113#'   simply call `get_file(key)`, and use `tryCatch()` to handle the error that
114#'   is thrown if the object is not in the cache. This effectively tests for
115#'   existence and gets the object in one operation.
116#'
117#'   It is also possible for one processes to prune objects at the same time
118#'   that another processes is trying to prune objects. If this happens, you may
119#'   see a warning from `file.remove()` failing to remove a file that has
120#'   already been deleted.
121#'
122#' @keywords internal
123#' @importFrom R6 R6Class
124FileCache <- R6::R6Class("FileCache",
125  public = list(
126    #' @description Create a FileCache object.
127    #' @param dir Directory to store files for the cache. If `NULL` (the default) it
128    #'   will create and use a temporary directory.
129    #' @param max_age Maximum age of files in cache before they are evicted, in
130    #'   seconds. Use `Inf` for no age limit.
131    #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds
132    #'   this size, cached objects will be removed according to the value of the
133    #'   `evict`. Use `Inf` for no size limit.
134    #' @param max_n Maximum number of objects in the cache. If the number of objects
135    #'   exceeds this value, then cached objects will be removed according to the
136    #'   value of `evict`. Use `Inf` for no limit of number of items.
137    #' @param evict The eviction policy to use to decide which objects are removed
138    #'   when a cache pruning occurs. Currently, `"lru"` and `"fifo"` are supported.
139    #' @param destroy_on_finalize If `TRUE`, then when the FileCache object is
140    #'   garbage collected, the cache directory and all objects inside of it will be
141    #'   deleted from disk. If `FALSE` (the default), it will do nothing when
142    #'   finalized.
143    #' @param logfile An optional filename or connection object to where logging
144    #'   information will be written. To log to the console, use `stdout()`.
145    initialize = function(
146      dir = NULL,
147      max_size = 40 * 1024 ^ 2,
148      max_age = Inf,
149      max_n = Inf,
150      evict = c("lru", "fifo"),
151      destroy_on_finalize = FALSE,
152      logfile = NULL
153    ) {
154      if (is.null(dir)) {
155        dir <- tempfile("FileCache-")
156      }
157      if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
158      if (!is.numeric(max_age))  stop("max_age must be a number. Use `Inf` for no limit.")
159      if (!is.numeric(max_n))    stop("max_n must be a number. Use `Inf` for no limit.")
160
161      if (!dir.exists(dir)) {
162        private$log(paste0("initialize: Creating ", dir))
163        dir.create(dir, recursive = TRUE)
164      }
165
166      private$dir_                <- normalizePath(dir, mustWork = TRUE)
167      private$max_size            <- max_size
168      private$max_age             <- max_age
169      private$max_n               <- max_n
170      private$evict               <- match.arg(evict)
171      private$destroy_on_finalize <- destroy_on_finalize
172      private$logfile             <- logfile
173
174      private$prune_last_time     <- as.numeric(Sys.time())
175    },
176
177    #' @description Get the content associated with `key`, and save in a file
178    #'   named `outfile`.
179    #' @return `TRUE` if the object is found in the cache and copying succeeds,
180    #'   `FALSE` otherwise.
181    #' @param outfile Name of output file. If `NULL`, return the content as
182    #' @param key Key. Must be lowercase numbers and letters.
183    #' @param overwrite If the output file already exists, should it be
184    #'   overwritten?
185    get_file = function(key, outfile, overwrite = TRUE) {
186      private$log(paste0('get: key "', key, '"'))
187      self$is_destroyed(throw = TRUE)
188      validate_key(key)
189      if (!is.character(outfile) || length(outfile) != 1) {
190        stop("`outfile` must be a length-1 character vector.")
191      }
192
193      private$maybe_prune_single(key)
194
195      cache_file <- private$filename_full_path(key)
196
197      if (file.copy(cache_file, outfile, overwrite = overwrite)) {
198        private$log(paste0('get: key "', key, '" found and copied to ', outfile))
199        if (private$evict == "lru"){
200          Sys.setFileTime(cache_file, Sys.time())
201        }
202        return(TRUE)
203      }
204
205      private$log(paste0('get: key "', key, '" is missing'))
206      FALSE
207    },
208
209    #' @description Get the content associated with `key`, and return as either
210    #'   string or a raw vector.
211    #' @return A character or raw vector if the object is found in the cache,
212    #'   `NULL` otherwise.
213    #' @param key Key. Must be lowercase numbers and letters.
214    #' @param mode If `"text"`, return the content as a UTF-8-encoded text
215    #'   string (a one element char vector). If `"raw"`, return the content as a
216    #'   raw vector.
217    get_content = function(key, mode = c("text", "raw")) {
218      private$log(paste0('get_content: key "', key, '"'))
219      self$is_destroyed(throw = TRUE)
220      validate_key(key)
221      mode <- match.arg(mode)
222
223      private$maybe_prune_single(key)
224
225      cache_file <- private$filename_full_path(key)
226
227      errored <- FALSE
228      tryCatch(
229        {
230          if (mode == "text") {
231            result <- read_utf8(cache_file)
232          } else if (mode == "raw") {
233            result <- read_raw(cache_file)
234          }
235          if (private$evict == "lru"){
236            Sys.setFileTime(cache_file, Sys.time())
237          }
238        },
239        error = function(e) { errored <<- TRUE }
240      )
241
242      if (errored) {
243        private$log(paste0('get_content: key "', key, '" is missing'))
244        return(NULL)
245      }
246
247      result
248    },
249
250    #' @description Sets content associated with `key`, from a file named
251    #' `infile`.
252    #' @return `TRUE` if copying the file into the cache succeeds, `FALSE`
253    #'   otherwise.
254    #' @param key Key. Must be lowercase numbers and letters.
255    #' @param infile Name of input file.
256    set_file = function(key, infile) {
257      private$log(paste0('set: key "', key, '"'))
258      self$is_destroyed(throw = TRUE)
259      validate_key(key)
260      if (!is.character(infile) || length(infile) != 1) {
261        stop("`infile` must be a length-1 character vector.")
262      }
263      infile <- normalizePath(infile)
264
265      cache_file <- private$filename_full_path(key)
266
267      success <- file.copy(infile, cache_file, overwrite = TRUE)
268      if (success) {
269        private$log(paste0('set: key "', key, ' from file ', infile))
270      } else {
271        private$log(paste0('set: error setting key "', key, ' from file ', infile))
272      }
273
274      private$prune_throttled()
275
276      success
277    },
278
279    #' @description Sets content associated with `key`, from a single-element
280    #'   vector.
281    #' @return `TRUE` if setting the content in the cache succeeds, `FALSE`
282    #'   otherwise.
283    #' @param key Key. Must be lowercase numbers and letters.
284    #' @param content A character or raw vector. If it is a character vector,
285    #'   it will be written with UTF-8 encoding, with with elements collapsed
286    #'   with `\\n` (consistent across platforms).
287    set_content = function(key, content) {
288      private$log(paste0('set_content: key "', key, '"'))
289      self$is_destroyed(throw = TRUE)
290      validate_key(key)
291      if (!is.character(content) && !is.raw(content)) {
292        stop("`content` must be a character or raw vector.")
293      }
294      cache_file <- private$filename_full_path(key)
295
296      success <- TRUE
297      tryCatch(
298        {
299          if (is.character(content)) {
300            write_utf8(content, cache_file)
301          } else if (is.raw(content)) {
302            writeBin(content, cache_file)
303          }
304        },
305        error = function(e) { success <<- FALSE }
306      )
307      if (!success) {
308        private$log(paste0('set_content: error setting key "', key, '"'))
309      }
310
311      private$prune_throttled()
312
313      success
314    },
315
316    #' @description Check if content associated with `key` exists in cache
317    #' @param key Key. Must be lowercase numbers and letters.
318    #' @return `TRUE` if the object is in the cache, `FALSE` otherwise.
319    exists = function(key) {
320      self$is_destroyed(throw = TRUE)
321      validate_key(key)
322      file.exists(private$filename_full_path(key))
323    },
324
325    #' @description Get all keys
326    #' @return A character vector of all keys currently in the cache.
327    keys = function() {
328      self$is_destroyed(throw = TRUE)
329      dir(private$dir_)
330    },
331
332    #' @description Remove an object
333    #' @param key Key. Must be lowercase numbers and letters.
334    #' @return `TRUE` if the object was found and successfully removed, `FALSE`
335    #'   otherwise.
336    remove = function(key) {
337      private$log(paste0('remove: key "', key, '"'))
338      self$is_destroyed(throw = TRUE)
339      validate_key(key)
340      file.remove(private$filename_full_path(key))
341    },
342
343    #' @description Clear all objects from the cache.
344    reset = function() {
345      private$log(paste0('reset'))
346      self$is_destroyed(throw = TRUE)
347      file.remove(dir(private$dir_, full.names = TRUE))
348      invisible(self)
349    },
350
351    #' @description Returns the directory used for the cache.
352    dir = function() {
353      private$dir_
354    },
355
356    #' @description Prune the cache, using the parameters specified by
357    #'   `max_size`,   `max_age`, `max_n`, and `evict`.
358    prune = function() {
359      # TODO: It would be good to add parameters `n` and `size`, so that the
360      # cache can be pruned to `max_n - n` and `max_size - size` before adding
361      # an object. Right now we prune after adding the object, so the cache
362      # can temporarily grow past the limits. The reason we don't do this now
363      # is because it is expensive to find the size of the serialized object
364      # before adding it.
365
366      private$log('prune')
367      self$is_destroyed(throw = TRUE)
368
369      current_time <- Sys.time()
370
371      filenames <- dir(private$dir_, full.names = TRUE)
372      info <- file.info(filenames)
373      info <- info[info$isdir == FALSE, ]
374      info$name <- rownames(info)
375      rownames(info) <- NULL
376      # Files could be removed between the dir() and file.info() calls. The
377      # entire row for such files will have NA values. Remove those rows.
378      info <- info[!is.na(info$size), ]
379
380      # 1. Remove any files where the age exceeds max age.
381      if (is.finite(private$max_age)) {
382        timediff <- as.numeric(current_time - info$mtime, units = "secs")
383        rm_idx <- timediff > private$max_age
384        if (any(rm_idx)) {
385          private$log(paste0("prune max_age: Removing ", paste(info$name[rm_idx], collapse = ", ")))
386          rm_success <- file.remove(info$name[rm_idx])
387          # This maps rm_success back into the TRUEs in the rm_idx vector.
388          # If (for example) rm_idx is c(F,T,F,T,T) and rm_success is c(T,F,T),
389          # then this line modifies rm_idx to be c(F,T,F,F,T).
390          rm_idx[rm_idx] <- rm_success
391          info <- info[!rm_idx, ]
392        }
393      }
394
395      # Sort objects by priority. The sorting is done in a function which can be
396      # called multiple times but only does the work the first time.
397      info_is_sorted <- FALSE
398      ensure_info_is_sorted <- function() {
399        if (info_is_sorted) return()
400
401        info <<- info[order(info$mtime, decreasing = TRUE), ]
402        info_is_sorted <<- TRUE
403      }
404
405      # 2. Remove files if there are too many.
406      if (is.finite(private$max_n) && nrow(info) > private$max_n) {
407        ensure_info_is_sorted()
408        rm_idx <- seq_len(nrow(info)) > private$max_n
409        private$log(paste0("prune max_n: Removing ", paste(info$name[rm_idx], collapse = ", ")))
410        rm_success <- file.remove(info$name[rm_idx])
411        rm_idx[rm_idx] <- rm_success
412        info <- info[!rm_idx, ]
413      }
414
415      # 3. Remove files if cache is too large.
416      if (is.finite(private$max_size) && sum(info$size) > private$max_size) {
417        ensure_info_is_sorted()
418        cum_size <- cumsum(info$size)
419        rm_idx <- cum_size > private$max_size
420        private$log(paste0("prune max_size: Removing ", paste(info$name[rm_idx], collapse = ", ")))
421        rm_success <- file.remove(info$name[rm_idx])
422        rm_idx[rm_idx] <- rm_success
423        info <- info[!rm_idx, ]
424      }
425
426      private$prune_last_time <- as.numeric(current_time)
427
428      invisible(self)
429    },
430
431    #' @description Return the number of items currently in the cache.
432    size = function() {
433      self$is_destroyed(throw = TRUE)
434      length(dir(private$dir_))
435    },
436
437    #' @description Clears all objects in the cache, and removes the cache
438    #'   directory from disk.
439    destroy = function() {
440      if (self$is_destroyed()) {
441        return(invisible(self))
442      }
443
444      private$log(paste0("destroy: Removing ", private$dir_))
445      # First create a sentinel file so that other processes sharing this
446      # cache know that the cache is to be destroyed. This is needed because
447      # the recursive unlink is not atomic: another process can add a file to
448      # the directory after unlink starts removing files but before it removes
449      # the directory, and when that happens, the directory removal will fail.
450      file.create(file.path(private$dir_, "._destroyed__"))
451      # Remove all the cache files. This will not remove the sentinel file.
452      file.remove(dir(private$dir_, full.names = TRUE))
453      # Next remove dir recursively, including sentinel file.
454      unlink(private$dir_, recursive = TRUE)
455      private$destroyed <- TRUE
456      invisible(self)
457    },
458
459    #' @description Reports whether the cache has been destroyed.
460    #' @param throw Should this function throw an error if the cache has been
461    #'   destroyed?
462    is_destroyed = function(throw = FALSE) {
463      if (!dir.exists(private$dir_) ||
464          file.exists(file.path(private$dir_, "._destroyed__")))
465      {
466        # It's possible for another process to destroy a shared cache directory
467        private$destroyed <- TRUE
468      }
469
470      if (throw) {
471        if (private$destroyed) {
472          stop("Attempted to use cache which has been destroyed:\n  ", private$dir_)
473        }
474
475      } else {
476        private$destroyed
477      }
478    },
479
480    #' @description A finalizer for the cache.
481    finalize = function() {
482      if (private$destroy_on_finalize) {
483        self$destroy()
484      }
485    }
486  ),
487
488  private = list(
489    dir_ = NULL,
490    max_age = NULL,
491    max_size = NULL,
492    max_n = NULL,
493    evict = NULL,
494    destroy_on_finalize = NULL,
495    destroyed = FALSE,
496    logfile = NULL,
497
498    prune_throttle_counter = 0,
499    prune_last_time = NULL,
500
501    filename_full_path = function(filename) {
502      file.path(private$dir_, filename)
503    },
504
505    # A wrapper for prune() that throttles it, because prune() can be
506    # expensive due to filesystem operations. This function will prune only
507    # once every 20 times it is called, or if it has been more than 5 seconds
508    # since the last time the cache was actually pruned, whichever is first.
509    # In the future, the behavior may be customizable.
510    prune_throttled = function() {
511      # Count the number of times prune() has been called.
512      private$prune_throttle_counter <- private$prune_throttle_counter + 1
513
514      if (private$prune_throttle_counter > 20 ||
515          private$prune_last_time - as.numeric(Sys.time()) > 5)
516      {
517        self$prune()
518        private$prune_throttle_counter <- 0
519      }
520    },
521
522    # Prunes a single object if it exceeds max_age. If the object does not
523    # exceed max_age, or if the object doesn't exist, do nothing.
524    maybe_prune_single = function(key) {
525      obj <- private$cache[[key]]
526      if (is.null(obj)) return()
527
528      timediff <- as.numeric(Sys.time()) - obj$mtime
529      if (timediff > private$max_age) {
530        private$log(paste0("pruning single object exceeding max_age: Removing ", key))
531        rm(list = key, envir = private$cache)
532      }
533    },
534
535    log = function(text) {
536      if (is.null(private$logfile)) return()
537
538      text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] FileCache "), text)
539      cat(text, sep = "\n", file = private$logfile, append = TRUE)
540    }
541  )
542)
543
544
545validate_key <- function(key) {
546  if (!is.character(key) || length(key) != 1 || nchar(key) == 0) {
547    stop("Invalid key: key must be single non-empty string.")
548  }
549  if (grepl("[^a-z0-9]", key)) {
550    stop("Invalid key: ", key, ". Only lowercase letters and numbers are allowed.")
551  }
552}
553