1# convert character string, as typically PostgreSQL returned blobs, to raw vector; 2# skips a leading "0x", as this is created by PostGIS when using ST_asBinary() 3# 4# most wkb read/write stuff was modified & extended from Ian Cook's wkb package, 5# https://cran.r-project.org/web/packages/wkb/index.html 6# 7hex_to_raw = function(y) { 8 stopifnot((nchar(y) %% 2) == 0) 9 if (substr(y, 1, 2) == "0x") 10 y = substr(y, 3, nchar(y)) 11 as.raw(as.numeric(paste0("0x", vapply(seq_len(nchar(y)/2), 12 function(x) substr(y, (x-1)*2+1, x*2), "")))) # SLOW, hence the Rcpp implementation 13} 14 15skip0x = function(x) { 16 if (is.na(x)) 17 "010700000000000000" # empty GeometryCollection, st_as_binary(st_geometrycollection()) 18 else if (substr(x, 1, 2) == "0x") 19 substr(x, 3, nchar(x)) 20 else 21 x 22} 23 24#' @name st_as_sfc 25#' @param EWKB logical; if TRUE, parse as EWKB (extended WKB; PostGIS: ST_AsEWKB), otherwise as ISO WKB (PostGIS: ST_AsBinary) 26#' @param spatialite logical; if \code{TRUE}, WKB is assumed to be in the spatialite dialect, see \url{https://www.gaia-gis.it/gaia-sins/BLOB-Geometry.html}; this is only supported in native endian-ness (i.e., files written on system with the same endian-ness as that on which it is being read). 27#' @param pureR logical; if TRUE, use only R code, if FALSE, use compiled (C++) code; use TRUE when the endian-ness of the binary differs from the host machine (\code{.Platform$endian}). 28#' @details When converting from WKB, the object \code{x} is either a character vector such as typically obtained from PostGIS (either with leading "0x" or without), or a list with raw vectors representing the features in binary (raw) form. 29#' @examples 30#' wkb = structure(list("01010000204071000000000000801A064100000000AC5C1441"), class = "WKB") 31#' st_as_sfc(wkb, EWKB = TRUE) 32#' wkb = structure(list("0x01010000204071000000000000801A064100000000AC5C1441"), class = "WKB") 33#' st_as_sfc(wkb, EWKB = TRUE) 34#' @export 35st_as_sfc.WKB = function(x, ..., EWKB = FALSE, spatialite = FALSE, pureR = FALSE, crs = NA_crs_) { 36 if (EWKB && spatialite) 37 stop("arguments EWKB and spatialite cannot both be TRUE") 38 if (spatialite && pureR) 39 stop("pureR implementation for spatialite reading is not available") 40 if (all(vapply(x, is.character, TRUE))) { 41 x <- if (pureR) 42 structure(lapply(x, hex_to_raw), class = "WKB") 43 else 44 structure(CPL_hex_to_raw(vapply(x, skip0x, USE.NAMES = FALSE, "")), class = "WKB") 45 } else # direct call with raw: 46 stopifnot(inherits(x, "WKB") && all(vapply(x, is.raw, TRUE))) # WKB as raw 47 if (any(lengths(x) == 0)) 48 stop("cannot read WKB object from zero-length raw vector") 49 ret = if (pureR) 50 R_read_wkb(x, readWKB, EWKB = EWKB) 51 else 52 CPL_read_wkb(x, EWKB, spatialite) 53 if (is.na(crs) && (EWKB || spatialite) && !is.null(attr(ret, "srid")) && attr(ret, "srid") != 0) 54 crs = attr(ret, "srid") 55 if (! is.na(st_crs(crs))) { 56 attr(ret, "srid") = NULL # remove 57 st_sfc(ret, crs = crs) 58 } else 59 st_sfc(ret) # leave attr srid in place: PostGIS srid that is not an EPSG code 60} 61 62#' @export 63#' @examples 64#' st_as_sfc(st_as_binary(st_sfc(st_point(0:1)))[[1]], crs = 4326) 65#' @name st_as_sfc 66st_as_sfc.raw = function(x, ...) { 67 st_as_sfc(structure(list(x), class = "WKB"), ...) 68} 69 70R_read_wkb = function(x, readWKB, EWKB = EWKB) { 71 ret = lapply(x, readWKB, EWKB = EWKB) 72 srid = attr(ret[[1]], "srid") 73 ret = lapply(ret, function(x) { attr(x, "srid") = NULL; x }) 74 attr(ret, "srid") = srid 75 ret 76} 77 78sf.tp = toupper(c( 79 # "Geometry", # 0 80 "Point", # 1 81 "LineString", # 2 82 "Polygon", # 3 83 "MultiPoint", # 4 84 "MultiLineString", # 5 85 "MultiPolygon", # 6 86 "GeometryCollection", # 7 87 "CircularString", # 8 x 88 "CompoundCurve", # 9 x 89 "CurvePolygon", # 10 x 90 "MultiCurve", # 11 x 91 "MultiSurface", # 12 x 92 "Curve", # 13 x * 93 "Surface", # 14 x * 94 "PolyhedralSurface", # 15 95 "TIN", # 16 96 "Triangle" # 17 97 )) # "Geometry" = 0, should not be matched, is a superclass only 98 # x: not described in ISO document 99 # *: GDAL support see https://trac.osgeo.org/gdal/ticket/6401 100 101readWKB = function(x, EWKB = FALSE) { 102 stopifnot(inherits(x, "raw")) 103 rc <- rawConnection(x, "r") 104 on.exit(close(rc)) 105 seek(rc, 0L) 106 # read data: 107 readData(rc, EWKB = EWKB) 108} 109 110parseTypeEWKB = function(wkbType, endian) { 111 # following the OGC doc, 3001 is POINT with ZM; turns out, PostGIS does sth else - 112 # read WKB, as well as EWKB; this post is more inormative of what is going on: 113 # https://lists.osgeo.org/pipermail/postgis-devel/2004-December/000710.html 114 # (without SRID, Z, M and ZM this all doesn't matter) 115 # comparison ISO WKB and EWKB: 116 # https://lists.osgeo.org/pipermail/postgis-devel/2004-December/000695.html 117 stopifnot(length(wkbType) == 4) 118 if (endian == "little") { 119 sf_type = as.numeric(wkbType[1]) 120 info = as.raw(as.integer(wkbType[4]) %/% 2^4) 121 } else { 122 sf_type = as.numeric(wkbType[4]) 123 info = as.raw(as.integer(wkbType[1]) %/% 2^4) 124 } 125 tp = sf.tp[sf_type] 126 stopifnot(!is.na(tp)) 127 has_srid = as.logical(info & as.raw(2)) # 2-bit is "on"? 128 zm = if ((info & as.raw(12)) == as.raw(12)) 129 "XYZM" 130 else if (info & as.raw(8)) 131 "XYZ" 132 else if (info & as.raw(4)) 133 "XYM" 134 else if (info == as.raw(0) || info == as.raw(2)) 135 "XY" 136 else 137 stop(paste("unknown value for info:", info)) 138 list(dims = nchar(zm), zm = zm, tp = tp, has_srid = has_srid) 139} 140 141parseTypeISO = function(wkbType) { 142 tp = sf.tp[wkbType %% 1000] 143 stopifnot(!is.na(tp)) 144 dd = wkbType %/% 1000 145 zm = if (dd == 0) 146 "XY" 147 else if (dd == 1) 148 "XYZ" 149 else if (dd == 2) 150 "XYM" 151 else if (dd == 3) 152 "XYZM" 153 else 154 stop(paste("unknown value for wkbType:", wkbType)) 155 list(dims = nchar(zm), zm = zm, tp = tp, has_srid = FALSE) 156} 157 158readData = function(rc, EWKB = FALSE) { 159 # read byte order: 160 byteOrder <- readBin(rc, what = "raw", size = 1L) 161 stopifnot(byteOrder %in% c(as.raw(0L), as.raw(1L))) 162 endian = ifelse(byteOrder == as.raw(1L), "little", "big") 163 # read wkbType: 164 srid = NA_integer_ 165 if (EWKB) { 166 wkbType <- readBin(rc, what = "raw", n = 4L, size = 1L, endian = endian) 167 pt <- parseTypeEWKB(wkbType, endian) 168 if (pt$has_srid) 169 srid <- readBin(rc, what = "integer", size = 4L, endian = endian) 170 } else { 171 wkbType <- readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian) 172 pt <- parseTypeISO(wkbType) 173 } 174 # read data part: 175 ret = switch(pt$tp, 176 POINT = readPoint(rc, pt$dims, endian), 177 CURVE = , 178 CIRCULARSTRING = , 179 LINESTRING = readMatrix(rc, pt$dims, endian), 180 SURFACE = , 181 POLYGON = , 182 TRIANGLE = readMatrixList(rc, pt$dims, endian), 183 MULTIPOINT = readMPoints(rc, pt$dims, endian, EWKB), 184 MULTILINESTRING = , 185 MULTICURVE = , 186 MULTIPOLYGON = , 187 MULTISURFACE = , 188 POLYHEDRALSURFACE = , 189 TIN = lapply(readGC(rc, pt$dims, endian, EWKB), unclass), 190 GEOMETRYCOLLECTION = readGC(rc, pt$dims, endian, EWKB), 191 CURVEPOLYGON = readGC(rc, pt$dims, endian, EWKB), 192 stop(paste("type", pt$tp, "unsupported"))) 193 class(ret) <- c(pt$zm, pt$tp, "sfg") 194 if (!is.na(srid)) 195 attr(ret, "srid") <- srid 196 ret 197} 198 199readPoint = function(rc, dims, endian) { 200 readBin(rc, what = "double", n = as.integer(dims), size = 8L, endian = endian) 201} 202readMPoints = function(rc, dims, endian, EWKB) { 203 npts = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian) 204 do.call(rbind, lapply(seq_len(npts), function(x) readData(rc, EWKB))) 205} 206readMatrix = function(rc, dims, endian) { 207 npts = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian) 208 m = readBin(rc, what = "double", n = as.integer(npts * dims), size = 8L, endian = endian) 209 t(matrix(m, nrow = dims)) # x1 y1, x2 y2 etc -> t() 210} 211readMatrixList = function(rc, dims, endian) { 212 nmtrx = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian) 213 lapply(seq_len(nmtrx), function(x) readMatrix(rc, dims, endian)) 214} 215#readMatrixListList = function(rc, dims, endian) { 216# nmtrxl = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian) 217# lapply(seq_len(nmtrxl), function(x) readMatrixList(rc, dims, endian)) 218#} 219readGC = function(rc, dims, endian, EWKB) { 220 ngc = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian) 221 lapply(seq_len(ngc), function(x) readData(rc, EWKB)) 222} 223 224#' Convert sfc object to an WKB object 225#' 226#' Convert sfc object to an WKB object 227#' @param x object to convert 228#' @param ... ignored 229#' @name st_as_binary 230#' @export 231st_as_binary = function(x, ...) UseMethod("st_as_binary") 232 233#' @name st_as_binary 234#' @param endian character; either "big" or "little"; default: use that of platform 235#' @param EWKB logical; use EWKB (PostGIS), or (default) ISO-WKB? 236#' @param pureR logical; use pure R solution, or C++? 237#' @param precision numeric; if zero, do not modify; to reduce precision: negative values convert to float (4-byte real); positive values convert to round(x*precision)/precision. See details. 238#' @param hex logical; return as (unclassed) hexadecimal encoded character vector? 239#' @param srid integer; override srid (can be used when the srid is unavailable locally). 240#' @details \code{st_as_binary} is called on sfc objects on their way to the GDAL or GEOS libraries, and hence does rounding (if requested) on the fly before e.g. computing spatial predicates like \link{st_intersects}. The examples show a round-trip of an \code{sfc} to and from binary. 241#' 242#' For the precision model used, see also \url{https://locationtech.github.io/jts/javadoc/org/locationtech/jts/geom/PrecisionModel.html}. There, it is written that: ``... to specify 3 decimal places of precision, use a scale factor of 1000. To specify -3 decimal places of precision (i.e. rounding to the nearest 1000), use a scale factor of 0.001.''. Note that ALL coordinates, so also Z or M values (if present) are affected. 243#' @export 244#' @examples 245#' # examples of setting precision: 246#' st_point(c(1/3, 1/6)) %>% st_sfc(precision = 1000) %>% st_as_binary %>% st_as_sfc 247#' st_point(c(1/3, 1/6)) %>% st_sfc(precision = 100) %>% st_as_binary %>% st_as_sfc 248#' st_point(1e6 * c(1/3, 1/6)) %>% st_sfc(precision = 0.01) %>% st_as_binary %>% st_as_sfc 249#' st_point(1e6 * c(1/3, 1/6)) %>% st_sfc(precision = 0.001) %>% st_as_binary %>% st_as_sfc 250st_as_binary.sfc = function(x, ..., EWKB = FALSE, endian = .Platform$endian, pureR = FALSE, 251 precision = attr(x, "precision"), hex = FALSE) { 252 stopifnot(endian %in% c("big", "little")) 253 if (pureR && precision != 0.0) 254 stop("for non-zero precision values, use pureR = FALSE") 255 ret = if (pureR) 256 structure(lapply(x, st_as_binary.sfg, EWKB = EWKB, pureR = pureR, endian = endian), class = "WKB") 257 else { 258 stopifnot(endian == .Platform$endian) 259 attr(x, "precision") = precision 260 structure(CPL_write_wkb(x, EWKB), class = "WKB") 261 } 262 if (hex) 263 vapply(ret, CPL_raw_to_hex, "") 264 else 265 ret 266} 267 268createType = function(x, endian, EWKB = FALSE) { 269 dims = x[1] # "XY", "XYZ", "XYM", or "XYZM" 270 cl = x[2] 271 m = match(cl, sf.tp) 272 if (is.na(m)) 273 stop(paste("Class", cl, "not matched")) 274 # return: 275 if (! EWKB) # ISO: add 1000s 276 as.integer(m + switch(dims, "XYZ" = 1000, "XYM" = 2000, "XYZM" = 3000, 0)) 277 else { # EWKB: set higher bits 278 ret = raw(4) 279 ret[1] = as.raw(m) # set up little-endian 280 ret[4] = as.raw(switch(dims, "XYZ" = 0x80, "XYM" = 0x40, "XYZM" = 0xC0, 0)) 281 if (endian == "big") 282 rev(ret) 283 else 284 ret 285 } 286} 287 288#' @name st_as_binary 289#' @export 290st_as_binary.sfg = function(x, ..., endian = .Platform$endian, EWKB = FALSE, pureR = FALSE, 291 hex = FALSE, srid = 0) { 292# if pureR, it's done here, if not, it's done in st_as_binary.sfc 293 stopifnot(endian %in% c("big", "little")) 294 if (! pureR) 295 st_as_binary.sfc(st_sfc(x), endian == endian, EWKB = EWKB, pureR = pureR, hex = hex, srid = srid, ...)[[1]] 296 else { 297 rc <- rawConnection(raw(0), "r+") 298 on.exit(close(rc)) 299 writeData(x, rc, endian, EWKB) 300 r = rawConnectionValue(rc) 301 if (hex) 302 r = rawToHex(r) 303 r 304 } 305} 306 307#' Convert raw vector(s) into hexadecimal character string(s) 308#' 309#' Convert raw vector(s) into hexadecimal character string(s) 310#' @param x raw vector, or list with raw vectors 311#' @export 312rawToHex = function(x) { 313 if (is.raw(x)) 314 CPL_raw_to_hex(x) 315 else if (is.list(x) && all(vapply(x, is.raw, TRUE))) 316 vapply(x, function(rw) CPL_raw_to_hex(rw), "") 317 else 318 stop(paste("not implemented for objects of class", class(x))) 319} 320 321writeData = function(x, rc, endian, EWKB = FALSE) { 322 if (endian == "big") 323 writeBin(as.raw(0L), rc) 324 else 325 writeBin(as.raw(1L), rc) 326 if (EWKB) 327 writeBin(createType(class(x), endian, TRUE), rc, size = 1L, endian = endian) 328 else 329 writeBin(createType(class(x)), rc, size = 4L, endian = endian) 330 # TODO (?): write SRID in case of EWKB? 331 # write out x: 332 switch(class(x)[2], 333 POINT = writeBin(as.vector(as.double(x)), rc, size = 8L, endian = endian), 334 LINESTRING = writeMatrix(x, rc, endian), 335 POLYGON = , 336 TRIANGLE = writeMatrixList(x, rc, endian), 337 MULTIPOINT = writeMPoints(x, rc, endian, EWKB), 338 POLYHEDRALSURFACE = , 339 TIN = , 340 MULTILINESTRING = , 341 MULTIPOLYGON = writeMulti(x, rc, endian, EWKB), 342 GEOMETRYCOLLECTION = writeGC(x, rc, endian, EWKB), 343 stop(paste("unimplemented class to write:", class(x)[2])) 344 ) 345} 346 347writeMulti = function(x, rc, endian, EWKB) { 348 unMulti = if (inherits(x, "MULTILINESTRING")) 349 st_linestring 350 else # MULTIPOLYGON, POLYHEDRALSURFACE, TIN: 351 st_polygon 352 writeBin(as.integer(length(x)), rc, size = 4L, endian = endian) 353 lapply(lapply(x, unMulti, class(x)[1]), writeData, rc = rc, endian = endian, EWKB = EWKB) 354} 355writeGC = function(x, rc, endian, EWKB) { 356 writeBin(as.integer(length(x)), rc, size = 4L, endian = endian) 357 lapply(x, writeData, rc = rc, endian = endian, EWKB = EWKB) 358} 359writeMatrix = function(x, rc, endian) { 360 writeBin(as.integer(nrow(x)), rc, size = 4L, endian = endian) 361 writeBin(as.double(as.vector(t(x))), rc, size = 8L, endian = endian) 362} 363writeMatrixList = function(x, rc, endian) { 364 writeBin(as.integer(length(x)), rc, size = 4L, endian = endian) 365 lapply(x, function(y) writeMatrix(y, rc, endian)) 366} 367writeMPoints = function(x, rc, endian, EWKB) { 368 writeBin(as.integer(nrow(x)), rc, size = 4L, endian = endian) 369 if (nrow(x)) 370 apply(x, 1, function(y) writeData(st_point(y, class(x)[1]), rc, endian, EWKB)) 371} 372