1
2##  vdigest -- Vectorist digest functions for R
3##
4##  Copyright (C) 2019         Matthew de Queljoe and Dirk Eddelbuettel
5##
6##  This file is part of digest.
7##
8##  digest is free software: you can redistribute it and/or modify
9##  it under the terms of the GNU General Public License as published by
10##  the Free Software Foundation, either version 2 of the License, or
11##  (at your option) any later version.
12##
13##  digest is distributed in the hope that it will be useful,
14##  but WITHOUT ANY WARRANTY; without even the implied warranty of
15##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16##  GNU General Public License for more details.
17##
18##  You should have received a copy of the GNU General Public License
19##  along with digest.  If not, see <http://www.gnu.org/licenses/>.
20
21getVDigest <- function(algo = c("md5", "sha1", "crc32", "sha256", "sha512",
22                                 "xxhash32", "xxhash64", "murmur32", "spookyhash"),
23                        errormode=c("stop","warn","silent")){
24    algo <- match.arg(algo, c("md5", "sha1", "crc32", "sha256", "sha512",
25                              "xxhash32", "xxhash64", "murmur32", "spookyhash"))
26    errormode <- match.arg(errormode, c("stop","warn","silent"))
27    algoint <- algo_int(algo)
28    non_streaming_algos <- c("md5", "sha1", "crc32", "sha256", "sha512",
29                             "xxhash32", "xxhash64", "murmur32")
30    if (algo %in% non_streaming_algos)
31        return(non_streaming_digest(algo, errormode, algoint))
32    streaming_digest(algo, errormode, algoint)
33}
34
35non_streaming_digest <- function(algo, errormode, algoint){
36    function(object,
37             serialize=TRUE,
38             file=FALSE,
39             length=Inf,
40             skip="auto",
41             ascii=FALSE,
42             seed=0,
43             serializeVersion=.getSerializeVersion()){
44
45        if (is.infinite(length))
46            length <- -1               # internally we use -1 for infinite len
47
48        if (is.character(file) && missing(object)) {
49            object <- file                  # nocov
50            file <- TRUE                  	# nocov
51        }
52
53        if (serialize && !file) {
54            ## support the 'nosharing' option in pqR's serialize()
55            object <- if (.hasNoSharing())
56                serialize_(
57                    object,
58                    connection = NULL,
59                    ascii = ascii,
60                    nosharing = TRUE,
61                    version = serializeVersion
62                )
63            else
64                serialize_(object,
65                           connection = NULL,
66                           ascii = ascii,
67                           version = serializeVersion)
68            ## we support raw vectors, so no mangling of 'object' is necessary
69            ## regardless of R version
70            ## skip="auto" - skips the serialization header [SU]
71            if (any(!is.na(pmatch(skip,"auto"))))
72                skip <- set_skip(object, ascii)
73
74        } else if (!is.character(object) && !inherits(object,"raw")) {
75            return(.errorhandler(paste("Argument object must be of type character",	  # #nocov
76                                       "or raw vector if serialize is FALSE"), mode=errormode)) # #nocov
77        }
78        if (file && !is.character(object))
79            return(.errorhandler("file=TRUE can only be used with a character object",          # #nocov
80                                 mode=errormode))                                               # #nocov
81
82        if (file) {
83            algoint <- algoint+100
84            object <- path.expand(object)
85            if (.isWindows()) object <- enc2utf8(object)
86            check_file(object, errormode)
87        }
88        ## if skip is auto (or any other text for that matter), we just turn it
89        ## into 0 because auto should have been converted into a number earlier
90        ## if it was valid [SU]
91        if (is.character(skip)) skip <- 0
92        val <- .Call(
93            vdigest_impl,
94            object,
95            as.integer(algoint),
96            as.integer(length),
97            as.integer(skip),
98            0L, # raw always FALSE
99            as.integer(seed)
100        )
101        ## crc32 output was not guaranteed to be eight chars long, which we corrected
102        ## this allows to get the old behaviour back for compatibility
103        if ((algoint == 3 || algoint == 103) && .getCRC32PreferOldOutput()) {
104            val <- sub("^0+", "", val)                                          		# #nocov
105        }
106
107        return(val)
108    }
109}
110
111streaming_digest <- function(algo, errormode, algoint){
112    function(object,
113             serialize=TRUE,
114             file=FALSE,
115             length=Inf,
116             skip="auto",
117             ascii=FALSE,
118             seed=0,
119             serializeVersion=.getSerializeVersion()){
120
121        if (is.infinite(length))
122            length <- -1               # internally we use -1 for infinite len
123
124        if (is.character(file) && missing(object)) {
125            object <- file                  # nocov
126            file <- TRUE                  	# nocov
127        }
128
129        if (!serialize){
130            .errorhandler(paste0(algo, " algorithm is not available without serialization."),  # #nocov
131                          mode=errormode)                                                      # #nocov
132        }
133
134        if (serialize && !file) {
135            ## we support raw vectors, so no mangling of 'object' is necessary
136            ## regardless of R version
137            ## skip="auto" - skips the serialization header [SU]
138            if (any(!is.na(pmatch(skip,"auto"))))
139                skip <- set_skip(object, ascii)
140        }
141
142        if (file)
143            return(.errorhandler(paste0(algo, " algorithm can not be used with files."),        # #nocov
144                                 mode=errormode))                                               # #nocov
145
146
147        ## if skip is auto (or any other text for that matter), we just turn it
148        ## into 0 because auto should have been converted into a number earlier
149        ## if it was valid [SU]
150        if (is.character(skip)) skip <- 0                                          		# #nocov
151        if (algo == "spookyhash"){
152            # 0s are the seeds. They are included to enable testing against fastdigest.
153            val <- vapply(object,
154                          function(o)
155                              paste(
156                                  .Call(spookydigest_impl, o, skip, 0, 0, serializeVersion),
157                                  collapse = ""
158                              ),
159                          character(1),
160                          USE.NAMES = FALSE)
161        }
162
163        ## crc32 output was not guaranteed to be eight chars long, which we corrected
164        ## this allows to get the old behaviour back for compatibility
165        if ((algoint == 3 || algoint == 103) && .getCRC32PreferOldOutput()) {
166            val <- sub("^0+", "", val)                                          		# #nocov
167        }
168
169        return(val)
170    }
171}
172
173serialize_ <- function(object, ...){
174    if (length(object))
175        return(lapply(object, serialize, ...))
176    serialize(object, ...)
177}
178