1 2## digest -- hash digest functions for R 3## 4## Copyright (C) 2003 - 2019 Dirk Eddelbuettel <edd@debian.org> 5## Copyright (C) 2009 - 2019 Henrik Bengtsson 6## Copyright (C) 2012 - 2019 Hannes Muehleisen 7## Copyright (C) 2014 - 2019 Jim Hester 8## Copyright (C) 2019 Kendon Bell 9## Copyright (C) 2019 Matthew de Queljoe 10## 11## This file is part of digest. 12## 13## digest is free software: you can redistribute it and/or modify 14## it under the terms of the GNU General Public License as published by 15## the Free Software Foundation, either version 2 of the License, or 16## (at your option) any later version. 17## 18## digest is distributed in the hope that it will be useful, 19## but WITHOUT ANY WARRANTY; without even the implied warranty of 20## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21## GNU General Public License for more details. 22## 23## You should have received a copy of the GNU General Public License 24## along with digest. If not, see <http://www.gnu.org/licenses/>. 25 26digest <- function(object, algo=c("md5", "sha1", "crc32", "sha256", "sha512", 27 "xxhash32", "xxhash64", "murmur32", 28 "spookyhash", "blake3"), 29 serialize=TRUE, 30 file=FALSE, 31 length=Inf, 32 skip="auto", 33 ascii=FALSE, 34 raw=FALSE, 35 seed=0, 36 errormode=c("stop","warn","silent"), 37 serializeVersion=.getSerializeVersion()) { 38 39 # Explicitly specify choices; this is much faster than having match.arg() 40 # infer them from the function's formals. 41 algo <- match.arg(algo, c("md5", "sha1", "crc32", "sha256", "sha512", 42 "xxhash32", "xxhash64", "murmur32", 43 "spookyhash", "blake3")) 44 errormode <- match.arg(errormode, c("stop", "warn", "silent")) 45 46 if (is.infinite(length)) { 47 length <- -1 # internally we use -1 for infinite len 48 } 49 50 if (is.character(file) && missing(object)) { 51 object <- file # nocov 52 file <- TRUE # nocov 53 } 54 55 is_streaming_algo <- algo == "spookyhash" 56 57 if (is_streaming_algo && !serialize) { 58 .errorhandler(paste0(algo, " algorithm is not available without serialization."), # #nocov 59 mode=errormode) # #nocov 60 } 61 62 if (serialize && !file) { 63 if (!is_streaming_algo) { 64 ## support the 'nosharing' option in pqR's serialize() 65 object <- if (.hasNoSharing()) 66 serialize (object, connection=NULL, ascii=ascii, 67 nosharing=TRUE, version=serializeVersion) 68 else 69 serialize (object, connection=NULL, ascii=ascii, 70 version=serializeVersion) 71 } 72 ## we support raw vectors, so no mangling of 'object' is necessary 73 ## regardless of R version 74 ## skip="auto" - skips the serialization header [SU] 75 if (is.character(skip) && skip == "auto") 76 skip <- set_skip(object, ascii) 77 78 } else if (!is.character(object) && !inherits(object,"raw") && 79 !is_streaming_algo) { 80 return(.errorhandler(paste("Argument object must be of type character", # #nocov 81 "or raw vector if serialize is FALSE"), mode=errormode)) # #nocov 82 } 83 if (file && !is.character(object)) 84 return(.errorhandler("file=TRUE can only be used with a character object", # #nocov 85 mode=errormode)) # #nocov 86 87 if (file && is_streaming_algo) 88 return(.errorhandler(paste0(algo, " algorithm can not be used with files."), # #nocov 89 mode=errormode)) # #nocov 90 91 ## HB 14 Mar 2007: null op, only turned to char if alreadt char 92 ##if (!inherits(object,"raw")) 93 ## object <- as.character(object) 94 algoint <- algo_int(algo) 95 if (file) { 96 algoint <- algoint+100 97 object <- path.expand(object) 98 if (.isWindows()) object <- enc2utf8(object) 99 check_file(object, errormode) 100 } 101 ## if skip is auto (or any other text for that matter), we just turn it 102 ## into 0 because auto should have been converted into a number earlier 103 ## if it was valid [SU] 104 if (is.character(skip)) skip <- 0 105 if (!is_streaming_algo) { 106 val <- .Call(digest_impl, 107 object, 108 as.integer(algoint), 109 as.integer(length), 110 as.integer(skip), 111 as.integer(raw), 112 as.integer(seed)) 113 } else if (algo == "spookyhash"){ 114 # 0s are the seeds. They are included to enable testing against fastdigest. 115 val <- paste(.Call(spookydigest_impl, object, skip, 0, 0, serializeVersion, NULL), collapse="") 116 } 117 118 ## crc32 output was not guaranteed to be eight chars long, which we corrected 119 ## this allows to get the old behaviour back for compatibility 120 if ((algoint == 3 || algoint == 103) && .getCRC32PreferOldOutput()) { 121 val <- sub("^0+", "", val) 122 } 123 124 return(val) 125} 126 127## utility functions used by digest() and getVDigest() below 128 129.errorhandler <- function(txt, obj="", mode="stop") { 130 if (mode == "stop") { # nocov start 131 stop(txt, obj, call.=FALSE) 132 } else if (mode == "warn") { 133 warning(txt, obj, call.=FALSE) 134 return(invisible(NA)) 135 } else { 136 return(invisible(NULL)) # nocov end 137 } 138} 139 140algo_int <- function(algo) 141 switch( 142 algo, 143 md5 = 1, 144 sha1 = 2, 145 crc32 = 3, 146 sha256 = 4, 147 sha512 = 5, 148 xxhash32 = 6, 149 xxhash64 = 7, 150 murmur32 = 8, 151 spookyhash = 9, 152 blake3 = 10 153 ) 154 155## HB 14 Mar 2007: 156## Exclude serialization header (non-data dependent bytes but R 157## version specific). In ASCII, the header consists of for rows 158## ending with a newline ('\n'). We need to skip these. 159## The end of 4th row is *typically* within the first 18 bytes 160set_skip <- function(object, ascii){ 161 if (!ascii) 162 return(14) 163 ## Was: skip <- if (ascii) 18 else 14 164 which(object[1:30] == as.raw(10))[4] # nocov 165} 166 167check_file <- function(object, errormode){ 168 if (!file.exists(object)) { 169 return(.errorhandler("The file does not exist: ", object, mode=errormode)) # nocov start 170 } 171 if (!isTRUE(!file.info(object)$isdir)) { 172 return(.errorhandler("The specified pathname is not a file: ", 173 object, mode=errormode)) 174 } 175 if (file.access(object, 4)) { 176 return(.errorhandler("The specified file is not readable: ", 177 object, mode=errormode)) # #nocov end 178 } 179} 180