1 /**********************************************************************
2  *
3  * PostGIS - Spatial Types for PostgreSQL
4  * http://postgis.net
5  *
6  * PostGIS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * PostGIS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PostGIS.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  **********************************************************************
20  *
21  * Copyright (C) 2014 Nicklas Avén
22  *
23  **********************************************************************/
24 
25 
26 #include <math.h>
27 #include "liblwgeom_internal.h"
28 #include "lwgeom_log.h"
29 #include "varint.h"
30 
31 #define TWKB_IN_MAXCOORDS 4
32 
33 /**
34 * Used for passing the parse state between the parsing functions.
35 */
36 typedef struct
37 {
38 	/* Pointers to the bytes */
39 	const uint8_t *twkb; /* Points to start of TWKB */
40 	const uint8_t *twkb_end; /* Points to end of TWKB */
41 	const uint8_t *pos; /* Current read position */
42 
43 	uint32_t check; /* Simple validity checks on geometries */
44 	uint32_t lwtype; /* Current type we are handling */
45 
46 	uint8_t has_bbox;
47 	uint8_t has_size;
48 	uint8_t has_idlist;
49 	uint8_t has_z;
50 	uint8_t has_m;
51 	uint8_t is_empty;
52 
53 	/* Precision factors to convert ints to double */
54 	double factor;
55 	double factor_z;
56 	double factor_m;
57 
58 	uint64_t size;
59 
60 	/* Info about current geometry */
61 	uint8_t magic_byte; /* the magic byte contain info about if twkb contain id, size info, bboxes and precision */
62 
63 	int ndims; /* Number of dimensions */
64 
65 	int64_t *coords; /* An array to keep delta values from 4 dimensions */
66 
67 } twkb_parse_state;
68 
69 
70 /**
71 * Internal function declarations.
72 */
73 LWGEOM* lwgeom_from_twkb_state(twkb_parse_state *s);
74 
75 
76 /**********************************************************************/
77 
78 /**
79 * Check that we are not about to read off the end of the WKB
80 * array.
81 */
twkb_parse_state_advance(twkb_parse_state * s,size_t next)82 static inline void twkb_parse_state_advance(twkb_parse_state *s, size_t next)
83 {
84 	if( (s->pos + next) > s->twkb_end)
85 	{
86 		lwerror("%s: TWKB structure does not match expected size!", __func__);
87 		// lwnotice("TWKB structure does not match expected size!");
88 	}
89 
90 	s->pos += next;
91 }
92 
twkb_parse_state_varint(twkb_parse_state * s)93 static inline int64_t twkb_parse_state_varint(twkb_parse_state *s)
94 {
95 	size_t size;
96 	int64_t val = varint_s64_decode(s->pos, s->twkb_end, &size);
97 	twkb_parse_state_advance(s, size);
98 	return val;
99 }
100 
twkb_parse_state_uvarint(twkb_parse_state * s)101 static inline uint64_t twkb_parse_state_uvarint(twkb_parse_state *s)
102 {
103 	size_t size;
104 	uint64_t val = varint_u64_decode(s->pos, s->twkb_end, &size);
105 	twkb_parse_state_advance(s, size);
106 	return val;
107 }
108 
twkb_parse_state_double(twkb_parse_state * s,double factor)109 static inline double twkb_parse_state_double(twkb_parse_state *s, double factor)
110 {
111 	size_t size;
112 	int64_t val = varint_s64_decode(s->pos, s->twkb_end, &size);
113 	twkb_parse_state_advance(s, size);
114 	return val / factor;
115 }
116 
twkb_parse_state_varint_skip(twkb_parse_state * s)117 static inline void twkb_parse_state_varint_skip(twkb_parse_state *s)
118 {
119 	size_t size = varint_size(s->pos, s->twkb_end);
120 
121 	if ( ! size )
122 		lwerror("%s: no varint to skip", __func__);
123 
124 	twkb_parse_state_advance(s, size);
125 	return;
126 }
127 
128 
129 
lwtype_from_twkb_type(uint8_t twkb_type)130 static uint32_t lwtype_from_twkb_type(uint8_t twkb_type)
131 {
132 	switch (twkb_type)
133 	{
134 		case 1:
135 			return POINTTYPE;
136 		case 2:
137 			return LINETYPE;
138 		case 3:
139 			return POLYGONTYPE;
140 		case 4:
141 			return MULTIPOINTTYPE;
142 		case 5:
143 			return MULTILINETYPE;
144 		case 6:
145 			return MULTIPOLYGONTYPE;
146 		case 7:
147 			return COLLECTIONTYPE;
148 
149 		default: /* Error! */
150 			lwerror("Unknown WKB type");
151 			return 0;
152 	}
153 	return 0;
154 }
155 
156 /**
157 * Byte
158 * Read a byte and advance the parse state forward.
159 */
byte_from_twkb_state(twkb_parse_state * s)160 static uint8_t byte_from_twkb_state(twkb_parse_state *s)
161 {
162 	uint8_t val = *(s->pos);
163 	twkb_parse_state_advance(s, WKB_BYTE_SIZE);
164 	return val;
165 }
166 
167 
168 /**
169 * POINTARRAY
170 * Read a dynamically sized point array and advance the parse state forward.
171 */
ptarray_from_twkb_state(twkb_parse_state * s,uint32_t npoints)172 static POINTARRAY* ptarray_from_twkb_state(twkb_parse_state *s, uint32_t npoints)
173 {
174 	POINTARRAY *pa = NULL;
175 	uint32_t ndims = s->ndims;
176 	uint32_t i;
177 	double *dlist;
178 
179 	LWDEBUG(2,"Entering ptarray_from_twkb_state");
180 	LWDEBUGF(4,"Pointarray has %d points", npoints);
181 
182 	/* Empty! */
183 	if( npoints == 0 )
184 		return ptarray_construct_empty(s->has_z, s->has_m, 0);
185 
186 	pa = ptarray_construct(s->has_z, s->has_m, npoints);
187 	dlist = (double*)(pa->serialized_pointlist);
188 	for( i = 0; i < npoints; i++ )
189 	{
190 		int j = 0;
191 		/* X */
192 		s->coords[j] += twkb_parse_state_varint(s);
193 		dlist[ndims*i + j] = s->coords[j] / s->factor;
194 		j++;
195 		/* Y */
196 		s->coords[j] += twkb_parse_state_varint(s);
197 		dlist[ndims*i + j] = s->coords[j] / s->factor;
198 		j++;
199 		/* Z */
200 		if ( s->has_z )
201 		{
202 			s->coords[j] += twkb_parse_state_varint(s);
203 			dlist[ndims*i + j] = s->coords[j] / s->factor_z;
204 			j++;
205 		}
206 		/* M */
207 		if ( s->has_m )
208 		{
209 			s->coords[j] += twkb_parse_state_varint(s);
210 			dlist[ndims*i + j] = s->coords[j] / s->factor_m;
211 			j++;
212 		}
213 	}
214 
215 	return pa;
216 }
217 
218 /**
219 * POINT
220 */
lwpoint_from_twkb_state(twkb_parse_state * s)221 static LWPOINT* lwpoint_from_twkb_state(twkb_parse_state *s)
222 {
223 	static uint32_t npoints = 1;
224 	POINTARRAY *pa;
225 
226 	LWDEBUG(2,"Entering lwpoint_from_twkb_state");
227 
228 	if ( s->is_empty )
229 		return lwpoint_construct_empty(SRID_UNKNOWN, s->has_z, s->has_m);
230 
231 	pa = ptarray_from_twkb_state(s, npoints);
232 	return lwpoint_construct(SRID_UNKNOWN, NULL, pa);
233 }
234 
235 /**
236 * LINESTRING
237 */
lwline_from_twkb_state(twkb_parse_state * s)238 static LWLINE* lwline_from_twkb_state(twkb_parse_state *s)
239 {
240 	uint32_t npoints;
241 	POINTARRAY *pa;
242 
243 	LWDEBUG(2,"Entering lwline_from_twkb_state");
244 
245 	if ( s->is_empty )
246 		return lwline_construct_empty(SRID_UNKNOWN, s->has_z, s->has_m);
247 
248 	/* Read number of points */
249 	npoints = twkb_parse_state_uvarint(s);
250 
251 	if ( npoints == 0 )
252 		return lwline_construct_empty(SRID_UNKNOWN, s->has_z, s->has_m);
253 
254 	/* Read coordinates */
255 	pa = ptarray_from_twkb_state(s, npoints);
256 
257 	if( pa == NULL )
258 		return lwline_construct_empty(SRID_UNKNOWN, s->has_z, s->has_m);
259 
260 	if( s->check & LW_PARSER_CHECK_MINPOINTS && pa->npoints < 2 )
261 	{
262 		lwerror("%s must have at least two points", lwtype_name(s->lwtype));
263 		return NULL;
264 	}
265 
266 	return lwline_construct(SRID_UNKNOWN, NULL, pa);
267 }
268 
269 /**
270 * POLYGON
271 */
lwpoly_from_twkb_state(twkb_parse_state * s)272 static LWPOLY* lwpoly_from_twkb_state(twkb_parse_state *s)
273 {
274 	uint32_t nrings;
275 	uint32_t i;
276 	LWPOLY *poly;
277 
278 	LWDEBUG(2,"Entering lwpoly_from_twkb_state");
279 
280 	if ( s->is_empty )
281 		return lwpoly_construct_empty(SRID_UNKNOWN, s->has_z, s->has_m);
282 
283 	/* Read number of rings */
284 	nrings = twkb_parse_state_uvarint(s);
285 
286 	/* Start w/ empty polygon */
287 	poly = lwpoly_construct_empty(SRID_UNKNOWN, s->has_z, s->has_m);
288 
289 	LWDEBUGF(4,"Polygon has %d rings", nrings);
290 
291 	/* Empty polygon? */
292 	if( nrings == 0 )
293 		return poly;
294 
295 	for( i = 0; i < nrings; i++ )
296 	{
297 		/* Ret number of points */
298 		uint32_t npoints = twkb_parse_state_uvarint(s);
299 		POINTARRAY *pa = ptarray_from_twkb_state(s, npoints);
300 
301 		/* Skip empty rings */
302 		if( pa == NULL )
303 			continue;
304 
305 		/* Force first and last points to be the same. */
306 		if( ! ptarray_is_closed_2d(pa) )
307 		{
308 			POINT4D pt;
309 			getPoint4d_p(pa, 0, &pt);
310 			ptarray_append_point(pa, &pt, LW_FALSE);
311 		}
312 
313 		/* Check for at least four points. */
314 		if( s->check & LW_PARSER_CHECK_MINPOINTS && pa->npoints < 4 )
315 		{
316 			LWDEBUGF(2, "%s must have at least four points in each ring", lwtype_name(s->lwtype));
317 			lwerror("%s must have at least four points in each ring", lwtype_name(s->lwtype));
318 			return NULL;
319 		}
320 
321 		/* Add ring to polygon */
322 		if ( lwpoly_add_ring(poly, pa) == LW_FAILURE )
323 		{
324 			LWDEBUG(2, "Unable to add ring to polygon");
325 			lwerror("Unable to add ring to polygon");
326 		}
327 
328 	}
329 	return poly;
330 }
331 
332 
333 /**
334 * MULTIPOINT
335 */
lwmultipoint_from_twkb_state(twkb_parse_state * s)336 static LWCOLLECTION* lwmultipoint_from_twkb_state(twkb_parse_state *s)
337 {
338 	int ngeoms, i;
339 	LWGEOM *geom = NULL;
340 	LWCOLLECTION *col = lwcollection_construct_empty(s->lwtype, SRID_UNKNOWN, s->has_z, s->has_m);
341 
342 	LWDEBUG(2,"Entering lwmultipoint_from_twkb_state");
343 
344 	if ( s->is_empty )
345 		return col;
346 
347 	/* Read number of geometries */
348 	ngeoms = twkb_parse_state_uvarint(s);
349 	LWDEBUGF(4,"Number of geometries %d", ngeoms);
350 
351 	/* It has an idlist, we need to skip that */
352 	if ( s->has_idlist )
353 	{
354 		for ( i = 0; i < ngeoms; i++ )
355 			twkb_parse_state_varint_skip(s);
356 	}
357 
358 	for ( i = 0; i < ngeoms; i++ )
359 	{
360 		geom = lwpoint_as_lwgeom(lwpoint_from_twkb_state(s));
361 		if ( lwcollection_add_lwgeom(col, geom) == NULL )
362 		{
363 			lwerror("Unable to add geometry (%p) to collection (%p)", geom, col);
364 			return NULL;
365 		}
366 	}
367 
368 	return col;
369 }
370 
371 /**
372 * MULTILINESTRING
373 */
lwmultiline_from_twkb_state(twkb_parse_state * s)374 static LWCOLLECTION* lwmultiline_from_twkb_state(twkb_parse_state *s)
375 {
376 	int ngeoms, i;
377 	LWGEOM *geom = NULL;
378 	LWCOLLECTION *col = lwcollection_construct_empty(s->lwtype, SRID_UNKNOWN, s->has_z, s->has_m);
379 
380 	LWDEBUG(2,"Entering lwmultilinestring_from_twkb_state");
381 
382 	if ( s->is_empty )
383 		return col;
384 
385 	/* Read number of geometries */
386 	ngeoms = twkb_parse_state_uvarint(s);
387 
388 	LWDEBUGF(4,"Number of geometries %d",ngeoms);
389 
390 	/* It has an idlist, we need to skip that */
391 	if ( s->has_idlist )
392 	{
393 		for ( i = 0; i < ngeoms; i++ )
394 			twkb_parse_state_varint_skip(s);
395 	}
396 
397 	for ( i = 0; i < ngeoms; i++ )
398 	{
399 		geom = lwline_as_lwgeom(lwline_from_twkb_state(s));
400 		if ( lwcollection_add_lwgeom(col, geom) == NULL )
401 		{
402 			lwerror("Unable to add geometry (%p) to collection (%p)", geom, col);
403 			return NULL;
404 		}
405 	}
406 
407 	return col;
408 }
409 
410 /**
411 * MULTIPOLYGON
412 */
lwmultipoly_from_twkb_state(twkb_parse_state * s)413 static LWCOLLECTION* lwmultipoly_from_twkb_state(twkb_parse_state *s)
414 {
415 	int ngeoms, i;
416 	LWGEOM *geom = NULL;
417 	LWCOLLECTION *col = lwcollection_construct_empty(s->lwtype, SRID_UNKNOWN, s->has_z, s->has_m);
418 
419 	LWDEBUG(2,"Entering lwmultipolygon_from_twkb_state");
420 
421 	if ( s->is_empty )
422 		return col;
423 
424 	/* Read number of geometries */
425 	ngeoms = twkb_parse_state_uvarint(s);
426 	LWDEBUGF(4,"Number of geometries %d",ngeoms);
427 
428 	/* It has an idlist, we need to skip that */
429 	if ( s->has_idlist )
430 	{
431 		for ( i = 0; i < ngeoms; i++ )
432 			twkb_parse_state_varint_skip(s);
433 	}
434 
435 	for ( i = 0; i < ngeoms; i++ )
436 	{
437 		geom = lwpoly_as_lwgeom(lwpoly_from_twkb_state(s));
438 		if ( lwcollection_add_lwgeom(col, geom) == NULL )
439 		{
440 			lwerror("Unable to add geometry (%p) to collection (%p)", geom, col);
441 			return NULL;
442 		}
443 	}
444 
445 	return col;
446 }
447 
448 
449 /**
450 * COLLECTION, MULTIPOINTTYPE, MULTILINETYPE, MULTIPOLYGONTYPE
451 **/
lwcollection_from_twkb_state(twkb_parse_state * s)452 static LWCOLLECTION* lwcollection_from_twkb_state(twkb_parse_state *s)
453 {
454 	int ngeoms, i;
455 	LWGEOM *geom = NULL;
456 	LWCOLLECTION *col = lwcollection_construct_empty(s->lwtype, SRID_UNKNOWN, s->has_z, s->has_m);
457 
458 	LWDEBUG(2,"Entering lwcollection_from_twkb_state");
459 
460 	if ( s->is_empty )
461 		return col;
462 
463 	/* Read number of geometries */
464 	ngeoms = twkb_parse_state_uvarint(s);
465 
466 	LWDEBUGF(4,"Number of geometries %d",ngeoms);
467 
468 	/* It has an idlist, we need to skip that */
469 	if ( s->has_idlist )
470 	{
471 		for ( i = 0; i < ngeoms; i++ )
472 			twkb_parse_state_varint_skip(s);
473 	}
474 
475 	for ( i = 0; i < ngeoms; i++ )
476 	{
477 		geom = lwgeom_from_twkb_state(s);
478 		if ( lwcollection_add_lwgeom(col, geom) == NULL )
479 		{
480 			lwerror("Unable to add geometry (%p) to collection (%p)", geom, col);
481 			return NULL;
482 		}
483 	}
484 
485 
486 	return col;
487 }
488 
489 
header_from_twkb_state(twkb_parse_state * s)490 static void header_from_twkb_state(twkb_parse_state *s)
491 {
492 	LWDEBUG(2,"Entering magicbyte_from_twkb_state");
493 
494 	uint8_t extended_dims;
495 
496 	/* Read the first two bytes */
497 	uint8_t type_precision = byte_from_twkb_state(s);
498 	uint8_t metadata = byte_from_twkb_state(s);
499 
500 	/* Strip type and precision out of first byte */
501 	uint8_t type = type_precision & 0x0F;
502 	int8_t precision = unzigzag8((type_precision & 0xF0) >> 4);
503 
504 	/* Convert TWKB type to internal type */
505 	s->lwtype = lwtype_from_twkb_type(type);
506 
507 	/* Convert the precision into factor */
508 	s->factor = pow(10, (double)precision);
509 
510 	/* Strip metadata flags out of second byte */
511 	s->has_bbox   =  metadata & 0x01;
512 	s->has_size   = (metadata & 0x02) >> 1;
513 	s->has_idlist = (metadata & 0x04) >> 2;
514 	extended_dims = (metadata & 0x08) >> 3;
515 	s->is_empty   = (metadata & 0x10) >> 4;
516 
517 	/* Flag for higher dims means read a third byte */
518 	if ( extended_dims )
519 	{
520 		int8_t precision_z, precision_m;
521 
522 		extended_dims = byte_from_twkb_state(s);
523 
524 		/* Strip Z/M presence and precision from ext byte */
525 		s->has_z    = (extended_dims & 0x01);
526 		s->has_m    = (extended_dims & 0x02) >> 1;
527 		precision_z = (extended_dims & 0x1C) >> 2;
528 		precision_m = (extended_dims & 0xE0) >> 5;
529 
530 		/* Convert the precision into factor */
531 		s->factor_z = pow(10, (double)precision_z);
532 		s->factor_m = pow(10, (double)precision_m);
533 	}
534 	else
535 	{
536 		s->has_z = 0;
537 		s->has_m = 0;
538 		s->factor_z = 0;
539 		s->factor_m = 0;
540 	}
541 
542 	/* Read the size, if there is one */
543 	if ( s->has_size )
544 	{
545 		s->size = twkb_parse_state_uvarint(s);
546 	}
547 
548 	/* Calculate the number of dimensions */
549 	s->ndims = 2 + s->has_z + s->has_m;
550 
551 	return;
552 }
553 
554 
555 
556 /**
557 * Generic handling for TWKB geometries. The front of every TWKB geometry
558 * (including those embedded in collections) is a type byte and metadata byte,
559 * then optional size, bbox, etc. Read those, then switch to particular type
560 * handling code.
561 */
lwgeom_from_twkb_state(twkb_parse_state * s)562 LWGEOM* lwgeom_from_twkb_state(twkb_parse_state *s)
563 {
564 	GBOX bbox;
565 	LWGEOM *geom = NULL;
566 	uint32_t has_bbox = LW_FALSE;
567 	int i;
568 
569 	/* Read the first two bytes, and optional */
570 	/* extended precision info and optional size info */
571 	header_from_twkb_state(s);
572 
573 	/* Just experienced a geometry header, so now we */
574 	/* need to reset our coordinate deltas */
575 	for ( i = 0; i < TWKB_IN_MAXCOORDS; i++ )
576 	{
577 		s->coords[i] = 0.0;
578 	}
579 
580 	/* Read the bounding box, is there is one */
581 	if ( s->has_bbox )
582 	{
583 		/* Initialize */
584 		has_bbox = s->has_bbox;
585 		memset(&bbox, 0, sizeof(GBOX));
586 		bbox.flags = lwflags(s->has_z, s->has_m, 0);
587 
588 		/* X */
589 		bbox.xmin = twkb_parse_state_double(s, s->factor);
590 		bbox.xmax = bbox.xmin + twkb_parse_state_double(s, s->factor);
591 		/* Y */
592 		bbox.ymin = twkb_parse_state_double(s, s->factor);
593 		bbox.ymax = bbox.ymin + twkb_parse_state_double(s, s->factor);
594 		/* Z */
595 		if ( s->has_z )
596 		{
597 			bbox.zmin = twkb_parse_state_double(s, s->factor_z);
598 			bbox.zmax = bbox.zmin + twkb_parse_state_double(s, s->factor_z);
599 		}
600 		/* M */
601 		if ( s->has_m )
602 		{
603 			bbox.mmin = twkb_parse_state_double(s, s->factor_m);
604 			bbox.mmax = bbox.mmin + twkb_parse_state_double(s, s->factor_m);
605 		}
606 	}
607 
608 	/* Switch to code for the particular type we're dealing with */
609 	switch( s->lwtype )
610 	{
611 		case POINTTYPE:
612 			geom = lwpoint_as_lwgeom(lwpoint_from_twkb_state(s));
613 			break;
614 		case LINETYPE:
615 			geom = lwline_as_lwgeom(lwline_from_twkb_state(s));
616 			break;
617 		case POLYGONTYPE:
618 			geom = lwpoly_as_lwgeom(lwpoly_from_twkb_state(s));
619 			break;
620 		case MULTIPOINTTYPE:
621 			geom = lwcollection_as_lwgeom(lwmultipoint_from_twkb_state(s));
622 			break;
623 		case MULTILINETYPE:
624 			geom = lwcollection_as_lwgeom(lwmultiline_from_twkb_state(s));
625 			break;
626 		case MULTIPOLYGONTYPE:
627 			geom = lwcollection_as_lwgeom(lwmultipoly_from_twkb_state(s));
628 			break;
629 		case COLLECTIONTYPE:
630 			geom = lwcollection_as_lwgeom(lwcollection_from_twkb_state(s));
631 			break;
632 		/* Unknown type! */
633 		default:
634 			lwerror("%s: Unsupported geometry type: %s", __func__, lwtype_name(s->lwtype));
635 			break;
636 	}
637 
638 	if ( has_bbox )
639 		geom->bbox = gbox_clone(&bbox);
640 
641 	return geom;
642 }
643 
644 
645 /**
646 * WKB inputs *must* have a declared size, to prevent malformed WKB from reading
647 * off the end of the memory segment (this stops a malevolent user from declaring
648 * a one-ring polygon to have 10 rings, causing the WKB reader to walk off the
649 * end of the memory).
650 *
651 * Check is a bitmask of: LW_PARSER_CHECK_MINPOINTS, LW_PARSER_CHECK_ODD,
652 * LW_PARSER_CHECK_CLOSURE, LW_PARSER_CHECK_NONE, LW_PARSER_CHECK_ALL
653 */
lwgeom_from_twkb(const uint8_t * twkb,size_t twkb_size,char check)654 LWGEOM* lwgeom_from_twkb(const uint8_t *twkb, size_t twkb_size, char check)
655 {
656 	int64_t coords[TWKB_IN_MAXCOORDS] = {0, 0, 0, 0};
657 	twkb_parse_state s;
658 
659 	LWDEBUG(2,"Entering lwgeom_from_twkb");
660 	LWDEBUGF(4,"twkb_size: %d",(int) twkb_size);
661 
662 	/* Zero out the state */
663 	memset(&s, 0, sizeof(twkb_parse_state));
664 
665 	/* Initialize the state appropriately */
666 	s.twkb = s.pos = twkb;
667 	s.twkb_end = twkb + twkb_size;
668 	s.check = check;
669 	s.coords = coords;
670 
671 	/* Read the rest of the geometry */
672 	return lwgeom_from_twkb_state(&s);
673 }
674