1 2## vdigest -- Vectorist digest functions for R 3## 4## Copyright (C) 2019 Matthew de Queljoe and Dirk Eddelbuettel 5## 6## This file is part of digest. 7## 8## digest is free software: you can redistribute it and/or modify 9## it under the terms of the GNU General Public License as published by 10## the Free Software Foundation, either version 2 of the License, or 11## (at your option) any later version. 12## 13## digest is distributed in the hope that it will be useful, 14## but WITHOUT ANY WARRANTY; without even the implied warranty of 15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16## GNU General Public License for more details. 17## 18## You should have received a copy of the GNU General Public License 19## along with digest. If not, see <http://www.gnu.org/licenses/>. 20 21getVDigest <- function(algo = c("md5", "sha1", "crc32", "sha256", "sha512", 22 "xxhash32", "xxhash64", "murmur32", "spookyhash"), 23 errormode=c("stop","warn","silent")){ 24 algo <- match.arg(algo, c("md5", "sha1", "crc32", "sha256", "sha512", 25 "xxhash32", "xxhash64", "murmur32", "spookyhash")) 26 errormode <- match.arg(errormode, c("stop","warn","silent")) 27 algoint <- algo_int(algo) 28 non_streaming_algos <- c("md5", "sha1", "crc32", "sha256", "sha512", 29 "xxhash32", "xxhash64", "murmur32") 30 if (algo %in% non_streaming_algos) 31 return(non_streaming_digest(algo, errormode, algoint)) 32 streaming_digest(algo, errormode, algoint) 33} 34 35non_streaming_digest <- function(algo, errormode, algoint){ 36 function(object, 37 serialize=TRUE, 38 file=FALSE, 39 length=Inf, 40 skip="auto", 41 ascii=FALSE, 42 seed=0, 43 serializeVersion=.getSerializeVersion()){ 44 45 if (is.infinite(length)) 46 length <- -1 # internally we use -1 for infinite len 47 48 if (is.character(file) && missing(object)) { 49 object <- file # nocov 50 file <- TRUE # nocov 51 } 52 53 if (serialize && !file) { 54 ## support the 'nosharing' option in pqR's serialize() 55 object <- if (.hasNoSharing()) 56 serialize_( 57 object, 58 connection = NULL, 59 ascii = ascii, 60 nosharing = TRUE, 61 version = serializeVersion 62 ) 63 else 64 serialize_(object, 65 connection = NULL, 66 ascii = ascii, 67 version = serializeVersion) 68 ## we support raw vectors, so no mangling of 'object' is necessary 69 ## regardless of R version 70 ## skip="auto" - skips the serialization header [SU] 71 if (any(!is.na(pmatch(skip,"auto")))) 72 skip <- set_skip(object, ascii) 73 74 } else if (!is.character(object) && !inherits(object,"raw")) { 75 return(.errorhandler(paste("Argument object must be of type character", # #nocov 76 "or raw vector if serialize is FALSE"), mode=errormode)) # #nocov 77 } 78 if (file && !is.character(object)) 79 return(.errorhandler("file=TRUE can only be used with a character object", # #nocov 80 mode=errormode)) # #nocov 81 82 if (file) { 83 algoint <- algoint+100 84 object <- path.expand(object) 85 if (.isWindows()) object <- enc2utf8(object) 86 check_file(object, errormode) 87 } 88 ## if skip is auto (or any other text for that matter), we just turn it 89 ## into 0 because auto should have been converted into a number earlier 90 ## if it was valid [SU] 91 if (is.character(skip)) skip <- 0 92 val <- .Call( 93 vdigest_impl, 94 object, 95 as.integer(algoint), 96 as.integer(length), 97 as.integer(skip), 98 0L, # raw always FALSE 99 as.integer(seed) 100 ) 101 ## crc32 output was not guaranteed to be eight chars long, which we corrected 102 ## this allows to get the old behaviour back for compatibility 103 if ((algoint == 3 || algoint == 103) && .getCRC32PreferOldOutput()) { 104 val <- sub("^0+", "", val) # #nocov 105 } 106 107 return(val) 108 } 109} 110 111streaming_digest <- function(algo, errormode, algoint){ 112 function(object, 113 serialize=TRUE, 114 file=FALSE, 115 length=Inf, 116 skip="auto", 117 ascii=FALSE, 118 seed=0, 119 serializeVersion=.getSerializeVersion()){ 120 121 if (is.infinite(length)) 122 length <- -1 # internally we use -1 for infinite len 123 124 if (is.character(file) && missing(object)) { 125 object <- file # nocov 126 file <- TRUE # nocov 127 } 128 129 if (!serialize){ 130 .errorhandler(paste0(algo, " algorithm is not available without serialization."), # #nocov 131 mode=errormode) # #nocov 132 } 133 134 if (serialize && !file) { 135 ## we support raw vectors, so no mangling of 'object' is necessary 136 ## regardless of R version 137 ## skip="auto" - skips the serialization header [SU] 138 if (any(!is.na(pmatch(skip,"auto")))) 139 skip <- set_skip(object, ascii) 140 } 141 142 if (file) 143 return(.errorhandler(paste0(algo, " algorithm can not be used with files."), # #nocov 144 mode=errormode)) # #nocov 145 146 147 ## if skip is auto (or any other text for that matter), we just turn it 148 ## into 0 because auto should have been converted into a number earlier 149 ## if it was valid [SU] 150 if (is.character(skip)) skip <- 0 # #nocov 151 if (algo == "spookyhash"){ 152 # 0s are the seeds. They are included to enable testing against fastdigest. 153 val <- vapply(object, 154 function(o) 155 paste( 156 .Call(spookydigest_impl, o, skip, 0, 0, serializeVersion), 157 collapse = "" 158 ), 159 character(1), 160 USE.NAMES = FALSE) 161 } 162 163 ## crc32 output was not guaranteed to be eight chars long, which we corrected 164 ## this allows to get the old behaviour back for compatibility 165 if ((algoint == 3 || algoint == 103) && .getCRC32PreferOldOutput()) { 166 val <- sub("^0+", "", val) # #nocov 167 } 168 169 return(val) 170 } 171} 172 173serialize_ <- function(object, ...){ 174 if (length(object)) 175 return(lapply(object, serialize, ...)) 176 serialize(object, ...) 177} 178