1# convert character string, as typically PostgreSQL returned blobs, to raw vector;
2# skips a leading "0x", as this is created by PostGIS when using ST_asBinary()
3#
4# most wkb read/write stuff was modified & extended from Ian Cook's wkb package,
5# https://cran.r-project.org/web/packages/wkb/index.html
6#
7hex_to_raw = function(y) {
8	stopifnot((nchar(y) %% 2) == 0)
9	if (substr(y, 1, 2) == "0x")
10		y = substr(y, 3, nchar(y))
11	as.raw(as.numeric(paste0("0x", vapply(seq_len(nchar(y)/2),
12		function(x) substr(y, (x-1)*2+1, x*2), "")))) # SLOW, hence the Rcpp implementation
13}
14
15skip0x = function(x) {
16	if (is.na(x))
17		"010700000000000000" # empty GeometryCollection, st_as_binary(st_geometrycollection())
18	else if (substr(x, 1, 2) == "0x")
19		substr(x, 3, nchar(x))
20	else
21		x
22}
23
24#' @name st_as_sfc
25#' @param EWKB logical; if TRUE, parse as EWKB (extended WKB; PostGIS: ST_AsEWKB), otherwise as ISO WKB (PostGIS: ST_AsBinary)
26#' @param spatialite logical; if \code{TRUE}, WKB is assumed to be in the spatialite dialect, see \url{https://www.gaia-gis.it/gaia-sins/BLOB-Geometry.html}; this is only supported in native endian-ness (i.e., files written on system with the same endian-ness as that on which it is being read).
27#' @param pureR logical; if TRUE, use only R code, if FALSE, use compiled (C++) code; use TRUE when the endian-ness of the binary differs from the host machine (\code{.Platform$endian}).
28#' @details When converting from WKB, the object \code{x} is either a character vector such as typically obtained from PostGIS (either with leading "0x" or without), or a list with raw vectors representing the features in binary (raw) form.
29#' @examples
30#' wkb = structure(list("01010000204071000000000000801A064100000000AC5C1441"), class = "WKB")
31#' st_as_sfc(wkb, EWKB = TRUE)
32#' wkb = structure(list("0x01010000204071000000000000801A064100000000AC5C1441"), class = "WKB")
33#' st_as_sfc(wkb, EWKB = TRUE)
34#' @export
35st_as_sfc.WKB = function(x, ..., EWKB = FALSE, spatialite = FALSE, pureR = FALSE, crs = NA_crs_) {
36	if (EWKB && spatialite)
37		stop("arguments EWKB and spatialite cannot both be TRUE")
38	if (spatialite && pureR)
39		stop("pureR implementation for spatialite reading is not available")
40    if (all(vapply(x, is.character, TRUE))) {
41		x <- if (pureR)
42				structure(lapply(x, hex_to_raw), class = "WKB")
43			else
44				structure(CPL_hex_to_raw(vapply(x, skip0x, USE.NAMES = FALSE, "")), class = "WKB")
45	} else # direct call with raw:
46		stopifnot(inherits(x, "WKB") && all(vapply(x, is.raw, TRUE))) # WKB as raw
47	if (any(lengths(x) == 0))
48		stop("cannot read WKB object from zero-length raw vector")
49	ret = if (pureR)
50			R_read_wkb(x, readWKB, EWKB = EWKB)
51		else
52			CPL_read_wkb(x, EWKB, spatialite)
53	if (is.na(crs) && (EWKB || spatialite) && !is.null(attr(ret, "srid")) && attr(ret, "srid") != 0)
54		crs = attr(ret, "srid")
55	if (! is.na(st_crs(crs))) {
56		attr(ret, "srid") = NULL # remove
57		st_sfc(ret, crs = crs)
58	} else
59		st_sfc(ret) # leave attr srid in place: PostGIS srid that is not an EPSG code
60}
61
62#' @export
63#' @examples
64#' st_as_sfc(st_as_binary(st_sfc(st_point(0:1)))[[1]], crs = 4326)
65#' @name st_as_sfc
66st_as_sfc.raw = function(x, ...) {
67	st_as_sfc(structure(list(x), class = "WKB"), ...)
68}
69
70R_read_wkb = function(x, readWKB, EWKB = EWKB) {
71	ret = lapply(x, readWKB, EWKB = EWKB)
72	srid = attr(ret[[1]], "srid")
73	ret = lapply(ret, function(x) { attr(x, "srid") = NULL; x })
74	attr(ret, "srid") = srid
75	ret
76}
77
78sf.tp = toupper(c(
79	# "Geometry",          # 0
80	"Point",               # 1
81	"LineString",          # 2
82	"Polygon",             # 3
83	"MultiPoint",          # 4
84	"MultiLineString",     # 5
85	"MultiPolygon",        # 6
86	"GeometryCollection",  # 7
87	"CircularString",      # 8 x
88	"CompoundCurve",       # 9 x
89	"CurvePolygon",        # 10 x
90	"MultiCurve",          # 11 x
91	"MultiSurface",        # 12 x
92	"Curve",               # 13 x *
93	"Surface",             # 14 x *
94	"PolyhedralSurface",   # 15
95	"TIN",                 # 16
96	"Triangle"             # 17
97	)) # "Geometry" = 0, should not be matched, is a superclass only
98	   # x: not described in ISO document
99	   # *: GDAL support see https://trac.osgeo.org/gdal/ticket/6401
100
101readWKB = function(x, EWKB = FALSE) {
102	stopifnot(inherits(x, "raw"))
103	rc <- rawConnection(x, "r")
104	on.exit(close(rc))
105	seek(rc, 0L)
106	# read data:
107	readData(rc, EWKB = EWKB)
108}
109
110parseTypeEWKB = function(wkbType, endian) {
111	# following the OGC doc, 3001 is POINT with ZM; turns out, PostGIS does sth else -
112	# read WKB, as well as EWKB; this post is more inormative of what is going on:
113	# https://lists.osgeo.org/pipermail/postgis-devel/2004-December/000710.html
114	# (without SRID, Z, M and ZM this all doesn't matter)
115	# comparison ISO WKB and EWKB:
116	# https://lists.osgeo.org/pipermail/postgis-devel/2004-December/000695.html
117	stopifnot(length(wkbType) == 4)
118	if (endian == "little") {
119		sf_type = as.numeric(wkbType[1])
120		info = as.raw(as.integer(wkbType[4]) %/% 2^4)
121	} else {
122		sf_type = as.numeric(wkbType[4])
123		info = as.raw(as.integer(wkbType[1]) %/% 2^4)
124	}
125	tp = sf.tp[sf_type]
126	stopifnot(!is.na(tp))
127	has_srid = as.logical(info & as.raw(2)) # 2-bit is "on"?
128	zm = if ((info & as.raw(12)) == as.raw(12))
129		"XYZM"
130	else if (info & as.raw(8))
131		"XYZ"
132	else if (info & as.raw(4))
133		"XYM"
134	else if (info == as.raw(0) || info == as.raw(2))
135		"XY"
136	else
137		stop(paste("unknown value for info:", info))
138	list(dims = nchar(zm), zm = zm, tp = tp, has_srid = has_srid)
139}
140
141parseTypeISO = function(wkbType) {
142	tp = sf.tp[wkbType %% 1000]
143	stopifnot(!is.na(tp))
144	dd = wkbType %/% 1000
145	zm = if (dd == 0)
146		"XY"
147	else if (dd == 1)
148		"XYZ"
149	else if (dd == 2)
150		"XYM"
151	else if (dd == 3)
152		"XYZM"
153	else
154		stop(paste("unknown value for wkbType:", wkbType))
155	list(dims = nchar(zm), zm = zm, tp = tp, has_srid = FALSE)
156}
157
158readData = function(rc, EWKB = FALSE) {
159	# read byte order:
160	byteOrder <- readBin(rc, what = "raw", size = 1L)
161	stopifnot(byteOrder %in% c(as.raw(0L), as.raw(1L)))
162	endian = ifelse(byteOrder == as.raw(1L), "little", "big")
163	# read wkbType:
164	srid = NA_integer_
165	if (EWKB) {
166		wkbType <- readBin(rc, what = "raw", n = 4L, size = 1L, endian = endian)
167		pt <- parseTypeEWKB(wkbType, endian)
168		if (pt$has_srid)
169			srid <- readBin(rc, what = "integer", size = 4L, endian = endian)
170	} else {
171		wkbType <- readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian)
172		pt <- parseTypeISO(wkbType)
173	}
174	# read data part:
175	ret = switch(pt$tp,
176		POINT = readPoint(rc, pt$dims, endian),
177		CURVE = ,
178		CIRCULARSTRING = ,
179		LINESTRING = readMatrix(rc, pt$dims, endian),
180		SURFACE = ,
181		POLYGON = ,
182		TRIANGLE = readMatrixList(rc, pt$dims, endian),
183		MULTIPOINT = readMPoints(rc, pt$dims, endian, EWKB),
184		MULTILINESTRING = ,
185		MULTICURVE = ,
186		MULTIPOLYGON = ,
187		MULTISURFACE = ,
188		POLYHEDRALSURFACE = ,
189		TIN = lapply(readGC(rc, pt$dims, endian, EWKB), unclass),
190		GEOMETRYCOLLECTION = readGC(rc, pt$dims, endian, EWKB),
191		CURVEPOLYGON = readGC(rc, pt$dims, endian, EWKB),
192		stop(paste("type", pt$tp, "unsupported")))
193	class(ret) <- c(pt$zm, pt$tp, "sfg")
194	if (!is.na(srid))
195		attr(ret, "srid") <- srid
196	ret
197}
198
199readPoint = function(rc, dims, endian) {
200	readBin(rc, what = "double", n = as.integer(dims), size = 8L, endian = endian)
201}
202readMPoints = function(rc, dims, endian, EWKB) {
203	npts = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian)
204	do.call(rbind, lapply(seq_len(npts), function(x) readData(rc, EWKB)))
205}
206readMatrix = function(rc, dims, endian) {
207	npts = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian)
208	m = readBin(rc, what = "double", n = as.integer(npts * dims), size = 8L, endian = endian)
209	t(matrix(m, nrow = dims)) # x1 y1, x2 y2 etc -> t()
210}
211readMatrixList = function(rc, dims, endian) {
212	nmtrx = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian)
213	lapply(seq_len(nmtrx), function(x) readMatrix(rc, dims, endian))
214}
215#readMatrixListList = function(rc, dims, endian) {
216#	nmtrxl = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian)
217#	lapply(seq_len(nmtrxl), function(x) readMatrixList(rc, dims, endian))
218#}
219readGC = function(rc, dims, endian, EWKB) {
220	ngc = readBin(rc, what = "integer", n = 1L, size = 4L, endian = endian)
221	lapply(seq_len(ngc), function(x) readData(rc, EWKB))
222}
223
224#' Convert sfc object to an WKB object
225#'
226#' Convert sfc object to an WKB object
227#' @param x object to convert
228#' @param ... ignored
229#' @name st_as_binary
230#' @export
231st_as_binary = function(x, ...) UseMethod("st_as_binary")
232
233#' @name st_as_binary
234#' @param endian character; either "big" or "little"; default: use that of platform
235#' @param EWKB logical; use EWKB (PostGIS), or (default) ISO-WKB?
236#' @param pureR logical; use pure R solution, or C++?
237#' @param precision numeric; if zero, do not modify; to reduce precision: negative values convert to float (4-byte real); positive values convert to round(x*precision)/precision. See details.
238#' @param hex logical; return as (unclassed) hexadecimal encoded character vector?
239#' @param srid integer; override srid (can be used when the srid is unavailable locally).
240#' @details \code{st_as_binary} is called on sfc objects on their way to the GDAL or GEOS libraries, and hence does rounding (if requested) on the fly before e.g. computing spatial predicates like \link{st_intersects}. The examples show a round-trip of an \code{sfc} to and from binary.
241#'
242#' For the precision model used, see also \url{https://locationtech.github.io/jts/javadoc/org/locationtech/jts/geom/PrecisionModel.html}. There, it is written that: ``... to specify 3 decimal places of precision, use a scale factor of 1000. To specify -3 decimal places of precision (i.e. rounding to the nearest 1000), use a scale factor of 0.001.''. Note that ALL coordinates, so also Z or M values (if present) are affected.
243#' @export
244#' @examples
245#' # examples of setting precision:
246#' st_point(c(1/3, 1/6)) %>% st_sfc(precision = 1000) %>% st_as_binary %>% st_as_sfc
247#' st_point(c(1/3, 1/6)) %>% st_sfc(precision =  100) %>% st_as_binary %>% st_as_sfc
248#' st_point(1e6 * c(1/3, 1/6)) %>% st_sfc(precision = 0.01) %>% st_as_binary %>% st_as_sfc
249#' st_point(1e6 * c(1/3, 1/6)) %>% st_sfc(precision = 0.001) %>% st_as_binary %>% st_as_sfc
250st_as_binary.sfc = function(x, ..., EWKB = FALSE, endian = .Platform$endian, pureR = FALSE,
251		precision = attr(x, "precision"), hex = FALSE) {
252	stopifnot(endian %in% c("big", "little"))
253	if (pureR && precision != 0.0)
254		stop("for non-zero precision values, use pureR = FALSE")
255	ret = if (pureR)
256		structure(lapply(x, st_as_binary.sfg, EWKB = EWKB, pureR = pureR, endian = endian), class = "WKB")
257	else {
258		stopifnot(endian == .Platform$endian)
259		attr(x, "precision") = precision
260		structure(CPL_write_wkb(x, EWKB), class = "WKB")
261	}
262	if (hex)
263		vapply(ret, CPL_raw_to_hex, "")
264	else
265		ret
266}
267
268createType = function(x, endian, EWKB = FALSE) {
269	dims = x[1]  # "XY", "XYZ", "XYM", or "XYZM"
270	cl = x[2]
271	m = match(cl, sf.tp)
272	if (is.na(m))
273		stop(paste("Class", cl, "not matched"))
274	# return:
275	if (! EWKB) # ISO: add 1000s
276		as.integer(m + switch(dims, "XYZ" = 1000, "XYM" = 2000, "XYZM" = 3000, 0))
277	else { # EWKB: set higher bits
278		ret = raw(4)
279		ret[1] = as.raw(m) # set up little-endian
280		ret[4] = as.raw(switch(dims, "XYZ" = 0x80, "XYM" = 0x40, "XYZM" = 0xC0, 0))
281		if (endian == "big")
282			rev(ret)
283		else
284			ret
285	}
286}
287
288#' @name st_as_binary
289#' @export
290st_as_binary.sfg = function(x, ..., endian = .Platform$endian, EWKB = FALSE, pureR = FALSE,
291		hex = FALSE, srid = 0) {
292# if pureR, it's done here, if not, it's done in st_as_binary.sfc
293	stopifnot(endian %in% c("big", "little"))
294	if (! pureR)
295		st_as_binary.sfc(st_sfc(x), endian == endian, EWKB = EWKB, pureR = pureR, hex = hex, srid = srid, ...)[[1]]
296	else {
297		rc <- rawConnection(raw(0), "r+")
298		on.exit(close(rc))
299		writeData(x, rc, endian, EWKB)
300		r = rawConnectionValue(rc)
301		if (hex)
302			r = rawToHex(r)
303		r
304	}
305}
306
307#' Convert raw vector(s) into hexadecimal character string(s)
308#'
309#' Convert raw vector(s) into hexadecimal character string(s)
310#' @param x raw vector, or list with raw vectors
311#' @export
312rawToHex = function(x) {
313	if (is.raw(x))
314		CPL_raw_to_hex(x)
315	else if (is.list(x) && all(vapply(x, is.raw, TRUE)))
316		vapply(x, function(rw) CPL_raw_to_hex(rw), "")
317	else
318		stop(paste("not implemented for objects of class", class(x)))
319}
320
321writeData = function(x, rc, endian, EWKB = FALSE) {
322	if (endian == "big")
323		writeBin(as.raw(0L), rc)
324	else
325		writeBin(as.raw(1L), rc)
326	if (EWKB)
327		writeBin(createType(class(x), endian, TRUE), rc, size = 1L, endian = endian)
328	else
329		writeBin(createType(class(x)), rc, size = 4L, endian = endian)
330	# TODO (?): write SRID in case of EWKB?
331	# write out x:
332	switch(class(x)[2],
333		POINT = writeBin(as.vector(as.double(x)), rc, size = 8L, endian = endian),
334		LINESTRING = writeMatrix(x, rc, endian),
335		POLYGON = ,
336		TRIANGLE = writeMatrixList(x, rc, endian),
337		MULTIPOINT = writeMPoints(x, rc, endian, EWKB),
338		POLYHEDRALSURFACE = ,
339		TIN = ,
340		MULTILINESTRING = ,
341		MULTIPOLYGON = writeMulti(x, rc, endian, EWKB),
342		GEOMETRYCOLLECTION = writeGC(x, rc, endian, EWKB),
343		stop(paste("unimplemented class to write:", class(x)[2]))
344	)
345}
346
347writeMulti = function(x, rc, endian, EWKB) {
348	unMulti = if (inherits(x, "MULTILINESTRING"))
349		st_linestring
350	else # MULTIPOLYGON, POLYHEDRALSURFACE, TIN:
351		st_polygon
352	writeBin(as.integer(length(x)), rc, size = 4L, endian = endian)
353	lapply(lapply(x, unMulti, class(x)[1]), writeData, rc = rc, endian = endian, EWKB = EWKB)
354}
355writeGC = function(x, rc, endian, EWKB) {
356	writeBin(as.integer(length(x)), rc, size = 4L, endian = endian)
357	lapply(x, writeData, rc = rc, endian = endian, EWKB = EWKB)
358}
359writeMatrix = function(x, rc, endian) {
360	writeBin(as.integer(nrow(x)), rc, size = 4L, endian = endian)
361	writeBin(as.double(as.vector(t(x))), rc, size = 8L, endian = endian)
362}
363writeMatrixList = function(x, rc, endian) {
364	writeBin(as.integer(length(x)), rc, size = 4L, endian = endian)
365	lapply(x, function(y) writeMatrix(y, rc, endian))
366}
367writeMPoints = function(x, rc, endian, EWKB) {
368	writeBin(as.integer(nrow(x)), rc, size = 4L, endian = endian)
369	if (nrow(x))
370		apply(x, 1, function(y) writeData(st_point(y, class(x)[1]), rc, endian, EWKB))
371}
372