1
2##  digest -- hash digest functions for R
3##
4##  Copyright (C) 2003 - 2019  Dirk Eddelbuettel <edd@debian.org>
5##  Copyright (C) 2009 - 2019  Henrik Bengtsson
6##  Copyright (C) 2012 - 2019  Hannes Muehleisen
7##  Copyright (C) 2014 - 2019  Jim Hester
8##  Copyright (C) 2019         Kendon Bell
9##  Copyright (C) 2019         Matthew de Queljoe
10##
11##  This file is part of digest.
12##
13##  digest is free software: you can redistribute it and/or modify
14##  it under the terms of the GNU General Public License as published by
15##  the Free Software Foundation, either version 2 of the License, or
16##  (at your option) any later version.
17##
18##  digest is distributed in the hope that it will be useful,
19##  but WITHOUT ANY WARRANTY; without even the implied warranty of
20##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21##  GNU General Public License for more details.
22##
23##  You should have received a copy of the GNU General Public License
24##  along with digest.  If not, see <http://www.gnu.org/licenses/>.
25
26digest <- function(object, algo=c("md5", "sha1", "crc32", "sha256", "sha512",
27                                  "xxhash32", "xxhash64", "murmur32",
28                                  "spookyhash", "blake3"),
29                   serialize=TRUE,
30                   file=FALSE,
31                   length=Inf,
32                   skip="auto",
33                   ascii=FALSE,
34                   raw=FALSE,
35                   seed=0,
36                   errormode=c("stop","warn","silent"),
37                   serializeVersion=.getSerializeVersion()) {
38
39    # Explicitly specify choices; this is much faster than having match.arg()
40    # infer them from the function's formals.
41    algo <- match.arg(algo, c("md5", "sha1", "crc32", "sha256", "sha512",
42                              "xxhash32", "xxhash64", "murmur32",
43                              "spookyhash", "blake3"))
44    errormode <- match.arg(errormode, c("stop", "warn", "silent"))
45
46    if (is.infinite(length)) {
47        length <- -1               # internally we use -1 for infinite len
48    }
49
50    if (is.character(file) && missing(object)) {
51        object <- file                  # nocov
52        file <- TRUE                  	# nocov
53    }
54
55    is_streaming_algo <- algo == "spookyhash"
56
57    if (is_streaming_algo && !serialize) {
58        .errorhandler(paste0(algo, " algorithm is not available without serialization."),  # #nocov
59                      mode=errormode)                                                      # #nocov
60    }
61
62    if (serialize && !file) {
63        if (!is_streaming_algo) {
64            ## support the 'nosharing' option in pqR's serialize()
65            object <- if (.hasNoSharing())
66                serialize (object, connection=NULL, ascii=ascii,
67                                 nosharing=TRUE, version=serializeVersion)
68            else
69                serialize (object, connection=NULL, ascii=ascii,
70                                 version=serializeVersion)
71        }
72        ## we support raw vectors, so no mangling of 'object' is necessary
73        ## regardless of R version
74        ## skip="auto" - skips the serialization header [SU]
75        if (is.character(skip) && skip == "auto")
76            skip <- set_skip(object, ascii)
77
78    } else if (!is.character(object) && !inherits(object,"raw") &&
79               !is_streaming_algo) {
80        return(.errorhandler(paste("Argument object must be of type character",		    # #nocov
81                                   "or raw vector if serialize is FALSE"), mode=errormode)) # #nocov
82    }
83    if (file && !is.character(object))
84        return(.errorhandler("file=TRUE can only be used with a character object",          # #nocov
85                             mode=errormode))                                               # #nocov
86
87    if (file && is_streaming_algo)
88        return(.errorhandler(paste0(algo, " algorithm can not be used with files."),        # #nocov
89                             mode=errormode))                                               # #nocov
90
91    ## HB 14 Mar 2007:  null op, only turned to char if alreadt char
92    ##if (!inherits(object,"raw"))
93    ##  object <- as.character(object)
94    algoint <- algo_int(algo)
95    if (file) {
96        algoint <- algoint+100
97        object <- path.expand(object)
98        if (.isWindows()) object <- enc2utf8(object)
99        check_file(object, errormode)
100    }
101    ## if skip is auto (or any other text for that matter), we just turn it
102    ## into 0 because auto should have been converted into a number earlier
103    ## if it was valid [SU]
104    if (is.character(skip)) skip <- 0
105    if (!is_streaming_algo) {
106        val <- .Call(digest_impl,
107                     object,
108                     as.integer(algoint),
109                     as.integer(length),
110                     as.integer(skip),
111                     as.integer(raw),
112                     as.integer(seed))
113    } else if (algo == "spookyhash"){
114        # 0s are the seeds. They are included to enable testing against fastdigest.
115        val <- paste(.Call(spookydigest_impl, object, skip, 0, 0, serializeVersion, NULL), collapse="")
116    }
117
118    ## crc32 output was not guaranteed to be eight chars long, which we corrected
119    ## this allows to get the old behaviour back for compatibility
120    if ((algoint == 3 || algoint == 103) && .getCRC32PreferOldOutput()) {
121        val <- sub("^0+", "", val)
122    }
123
124    return(val)
125}
126
127## utility functions used by digest() and  getVDigest() below
128
129.errorhandler <- function(txt, obj="", mode="stop") {
130    if (mode == "stop") {                                                                # nocov start
131        stop(txt, obj, call.=FALSE)
132    } else if (mode == "warn") {
133        warning(txt, obj, call.=FALSE)
134        return(invisible(NA))
135    } else {
136        return(invisible(NULL))                                                          # nocov end
137    }
138}
139
140algo_int <- function(algo)
141    switch(
142        algo,
143        md5 = 1,
144        sha1 = 2,
145        crc32 = 3,
146        sha256 = 4,
147        sha512 = 5,
148        xxhash32 = 6,
149        xxhash64 = 7,
150        murmur32 = 8,
151        spookyhash = 9,
152        blake3 = 10
153    )
154
155## HB 14 Mar 2007:
156## Exclude serialization header (non-data dependent bytes but R
157## version specific).  In ASCII, the header consists of for rows
158## ending with a newline ('\n').  We need to skip these.
159## The end of 4th row is *typically* within the first 18 bytes
160set_skip <- function(object, ascii){
161    if (!ascii)
162        return(14)
163    ## Was: skip <- if (ascii) 18 else 14
164    which(object[1:30] == as.raw(10))[4]         					# nocov
165}
166
167check_file <- function(object, errormode){
168    if (!file.exists(object)) {
169        return(.errorhandler("The file does not exist: ", object, mode=errormode)) 	# nocov start
170    }
171    if (!isTRUE(!file.info(object)$isdir)) {
172        return(.errorhandler("The specified pathname is not a file: ",
173                             object, mode=errormode))
174    }
175    if (file.access(object, 4)) {
176        return(.errorhandler("The specified file is not readable: ",
177                             object, mode=errormode))                  			# #nocov end
178    }
179}
180