1 
2 #define R_NO_REMAP
3 #include <R.h>
4 #include <Rinternals.h>
5 #include "wk-v1.h"
6 #include <memory.h>
7 #include <stdint.h>
8 #include <stdarg.h>
9 
10 #define WK_DEFAULT_ERROR_CODE 0
11 #define WK_NO_ERROR_CODE -1
12 
13 // IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, bswap_32(), bswap_64()
14 #include "port.h"
15 
16 #define EWKB_Z_BIT 0x80000000
17 #define EWKB_M_BIT 0x40000000
18 #define EWKB_SRID_BIT 0x20000000
19 
20 typedef struct {
21     wk_handler_t* handler;
22     R_xlen_t feat_id;
23     unsigned char* buffer;
24     size_t size;
25     size_t offset;
26     char swap_endian;
27     int error_code;
28     char error_buf[1024];
29 } wkb_reader_t;
30 
31 int wkb_read_geometry(wkb_reader_t* reader, uint32_t part_id);
32 int wkb_read_endian(wkb_reader_t* reader, unsigned char* value);
33 int wkb_read_geometry_type(wkb_reader_t* reader, wk_meta_t* meta);
34 int wkb_read_uint(wkb_reader_t* reader, uint32_t* value);
35 int wkb_read_coordinates(wkb_reader_t* reader, const wk_meta_t* meta, uint32_t n_coords, int n_dim);
36 void wkb_read_set_errorf(wkb_reader_t* reader, const char* error_buf, ...);
37 
wkb_read_check_buffer(wkb_reader_t * reader,size_t bytes)38 static inline int wkb_read_check_buffer(wkb_reader_t* reader, size_t bytes) {
39     if ((reader->offset + bytes) <= reader->size) {
40         return WK_CONTINUE;
41     } else {
42         wkb_read_set_errorf(reader, "Unexpected end of buffer (%d/%d)", reader->offset + bytes, reader->size);
43         return WK_ABORT_FEATURE;
44     }
45 }
46 
47 #define HANDLE_OR_RETURN(expr)                                 \
48     result = expr;                                             \
49     if (result != WK_CONTINUE) return result
50 
51 #define HANDLE_CONTINUE_OR_BREAK(expr)                         \
52     result = expr;                                             \
53     if (result == WK_ABORT_FEATURE) continue; else if (result == WK_ABORT) break
54 
wkb_read_geometry(wkb_reader_t * reader,uint32_t part_id)55 int wkb_read_geometry(wkb_reader_t* reader, uint32_t part_id) {
56     int result;
57 
58     unsigned char endian;
59     HANDLE_OR_RETURN(wkb_read_endian(reader, &endian));
60 
61 #ifdef IS_LITTLE_ENDIAN
62     reader->swap_endian = endian != 1;
63 #else
64     reader->swap_endian = endian != 0;
65 #endif
66 
67     wk_meta_t meta;
68     WK_META_RESET(meta, WK_GEOMETRY);
69     HANDLE_OR_RETURN(wkb_read_geometry_type(reader, &meta));
70     int n_dim = 2 + ((meta.flags & WK_FLAG_HAS_Z) != 0) + ((meta.flags & WK_FLAG_HAS_M) != 0);
71 
72     HANDLE_OR_RETURN(reader->handler->geometry_start(&meta, part_id, reader->handler->handler_data));
73 
74     switch (meta.geometry_type) {
75     case WK_POINT:
76     case WK_LINESTRING:
77         HANDLE_OR_RETURN(wkb_read_coordinates(reader, &meta, meta.size, n_dim));
78         break;
79     case WK_POLYGON:
80         for (uint32_t i = 0; i < meta.size; i++) {
81             uint32_t n_coords;
82             HANDLE_OR_RETURN(wkb_read_uint(reader, &n_coords));
83             HANDLE_OR_RETURN(reader->handler->ring_start(&meta, n_coords, i, reader->handler->handler_data));
84             HANDLE_OR_RETURN(wkb_read_coordinates(reader, &meta, n_coords, n_dim));
85             HANDLE_OR_RETURN(reader->handler->ring_end(&meta, n_coords, i, reader->handler->handler_data));
86         }
87         break;
88     case WK_MULTIPOINT:
89     case WK_MULTILINESTRING:
90     case WK_MULTIPOLYGON:
91     case WK_GEOMETRYCOLLECTION:
92         for (uint32_t i = 0; i < meta.size; i++) {
93             HANDLE_OR_RETURN(wkb_read_geometry(reader, i));
94         }
95         break;
96     default:
97         wkb_read_set_errorf(reader, "Unrecognized geometry type code '%d'", meta.geometry_type);
98         return WK_ABORT_FEATURE;
99     }
100 
101     return reader->handler->geometry_end(&meta, part_id, reader->handler->handler_data);
102 }
103 
wkb_read_endian(wkb_reader_t * reader,unsigned char * value)104 int wkb_read_endian(wkb_reader_t* reader, unsigned char* value) {
105     int result;
106     HANDLE_OR_RETURN(wkb_read_check_buffer(reader, sizeof(unsigned char)));
107 
108     memcpy(value, reader->buffer + reader->offset, sizeof(unsigned char));
109     reader->offset += sizeof(unsigned char);
110     return WK_CONTINUE;
111 }
112 
wkb_read_uint(wkb_reader_t * reader,uint32_t * value)113 int wkb_read_uint(wkb_reader_t* reader, uint32_t* value) {
114     int result;
115     HANDLE_OR_RETURN(wkb_read_check_buffer(reader, sizeof(uint32_t)));
116 
117     if (reader->swap_endian) {
118         uint32_t swappable;
119         memcpy(&swappable, reader->buffer + reader->offset, sizeof(uint32_t));
120         reader->offset += sizeof(uint32_t);
121         *value = bswap_32(swappable);
122     } else {
123         memcpy(value, reader->buffer + reader->offset, sizeof(uint32_t));
124         reader->offset += sizeof(uint32_t);
125     }
126 
127     return WK_CONTINUE;
128 }
129 
wkb_read_geometry_type(wkb_reader_t * reader,wk_meta_t * meta)130 int wkb_read_geometry_type(wkb_reader_t* reader, wk_meta_t* meta) {
131     int result;
132     uint32_t geometry_type;
133     HANDLE_OR_RETURN(wkb_read_uint(reader, &geometry_type));
134 
135     if (geometry_type & EWKB_Z_BIT) {
136         meta->flags |= WK_FLAG_HAS_Z;
137     }
138 
139     if (geometry_type & EWKB_M_BIT) {
140       meta->flags |= WK_FLAG_HAS_M;
141     }
142 
143     if (geometry_type & EWKB_SRID_BIT) {
144         HANDLE_OR_RETURN(wkb_read_uint(reader, &(meta->srid)));
145     }
146 
147     geometry_type = geometry_type & 0x0000ffff;
148 
149     if (geometry_type >= 3000) {
150         meta->geometry_type = geometry_type - 3000;
151         meta->flags |= WK_FLAG_HAS_Z;
152         meta->flags |= WK_FLAG_HAS_M;
153     } else  if (geometry_type >= 2000) {
154         meta->geometry_type = geometry_type - 2000;
155         meta->flags |= WK_FLAG_HAS_M;
156     } else if (geometry_type >= 1000) {
157         meta->geometry_type = geometry_type - 1000;
158         meta->flags |= WK_FLAG_HAS_Z;
159     } else {
160         meta->geometry_type = geometry_type;
161     }
162 
163     if (meta->geometry_type == WK_POINT) {
164         meta->size = 1;
165     } else {
166         HANDLE_OR_RETURN(wkb_read_uint(reader, &(meta->size)));
167     }
168 
169     return WK_CONTINUE;
170 }
171 
wkb_read_coordinates(wkb_reader_t * reader,const wk_meta_t * meta,uint32_t n_coords,int n_dim)172 int wkb_read_coordinates(wkb_reader_t* reader, const wk_meta_t* meta, uint32_t n_coords, int n_dim) {
173     double coord[4];
174     int result;
175 
176     HANDLE_OR_RETURN(wkb_read_check_buffer(reader, n_dim * n_coords * sizeof(double)));
177 
178     if (reader->swap_endian) {
179         uint64_t swappable, swapped;
180         for (uint32_t i = 0; i < n_coords; i++) {
181             for (int j = 0; j < n_dim; j++) {
182                 memcpy(&swappable, reader->buffer + reader->offset, sizeof(uint64_t));
183                 reader->offset += sizeof(double);
184 
185                 swapped = bswap_64(swappable);
186                 memcpy(coord + j, &swapped, sizeof(double));
187             }
188 
189           HANDLE_OR_RETURN(reader->handler->coord(meta, coord, i, reader->handler->handler_data));
190         }
191     } else {
192         // seems to be slightly faster than memcpy(coord, ..., coord_size)
193         uint64_t swappable;
194         for (uint32_t i = 0; i < n_coords; i++) {
195             for (int j = 0; j < n_dim; j++) {
196                 memcpy(&swappable, reader->buffer + reader->offset, sizeof(uint64_t));
197                 reader->offset += sizeof(double);
198                 memcpy(coord + j, &swappable, sizeof(double));
199             }
200 
201           HANDLE_OR_RETURN(reader->handler->coord(meta, coord, i, reader->handler->handler_data));
202         }
203     }
204 
205     return WK_CONTINUE;
206 }
207 
wkb_read_set_errorf(wkb_reader_t * reader,const char * error_buf,...)208 void wkb_read_set_errorf(wkb_reader_t* reader, const char* error_buf, ...) {
209     reader->error_code = WK_DEFAULT_ERROR_CODE;
210     va_list args;
211     va_start(args, error_buf);
212     vsnprintf(reader->error_buf, 1024, error_buf, args);
213     va_end(args);
214 }
215 
wkb_read_wkb(SEXP data,wk_handler_t * handler)216 SEXP wkb_read_wkb(SEXP data, wk_handler_t* handler) {
217     R_xlen_t n_features = Rf_xlength(data);
218 
219     wk_vector_meta_t vector_meta;
220     WK_VECTOR_META_RESET(vector_meta, WK_GEOMETRY);
221     vector_meta.size = n_features;
222     vector_meta.flags |= WK_FLAG_DIMS_UNKNOWN;
223 
224     if (handler->vector_start(&vector_meta, handler->handler_data) == WK_CONTINUE) {
225         int result;
226         SEXP item;
227         wkb_reader_t reader;
228         reader.handler = handler;
229         memset(reader.error_buf, 0, 1024);
230 
231         for (R_xlen_t i = 0; i < n_features; i++) {
232             // each feature could be huge, so check frequently
233             if (((i + 1) % 1000) == 0) R_CheckUserInterrupt();
234 
235             reader.feat_id = i;
236             item = VECTOR_ELT(data, i);
237 
238             HANDLE_CONTINUE_OR_BREAK(handler->feature_start(&vector_meta, i, handler->handler_data));
239 
240             if (item == R_NilValue) {
241                 HANDLE_CONTINUE_OR_BREAK(handler->null_feature(handler->handler_data));
242             } else {
243                 reader.buffer = RAW(item);
244                 reader.size = Rf_xlength(item);
245                 reader.offset = 0;
246                 reader.error_code = WK_NO_ERROR_CODE;
247                 reader.error_buf[0] = '\0';
248 
249                 result = wkb_read_geometry(&reader, WK_PART_ID_NONE);
250                 if (result == WK_ABORT_FEATURE && reader.error_code != WK_NO_ERROR_CODE) {
251                     result = handler->error(reader.error_buf, handler->handler_data);
252                 }
253 
254                 if (result == WK_ABORT_FEATURE) {
255                     continue;
256                 } else if (result == WK_ABORT) {
257                     break;
258                 }
259             }
260 
261             if (handler->feature_end(&vector_meta, i, handler->handler_data) == WK_ABORT) {
262                 break;
263             }
264         }
265     }
266 
267     SEXP result = PROTECT(handler->vector_end(&vector_meta, handler->handler_data));
268     UNPROTECT(1);
269     return result;
270 }
271 
wk_c_read_wkb(SEXP data,SEXP handler_xptr)272 SEXP wk_c_read_wkb(SEXP data, SEXP handler_xptr) {
273     return wk_handler_run_xptr(&wkb_read_wkb, data, handler_xptr);
274 }
275