1 /***************************************
2  A simple osm-specific PBF parser where the structure is hard-coded.
3 
4  Part of the Routino routing software.
5  ******************/ /******************
6  This file Copyright 2012-2015 Andrew M. Bishop
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU Affero General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  GNU Affero General Public License for more details.
17 
18  You should have received a copy of the GNU Affero General Public License
19  along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ***************************************/
21 
22 
23 #include <stdio.h>
24 
25 #if defined(_MSC_VER)
26 #include <io.h>
27 #include <basetsd.h>
28 #define read(fd,address,length) _read(fd,address,(unsigned int)(length))
29 #define ssize_t SSIZE_T
30 #else
31 #include <unistd.h>
32 #endif
33 
34 #include <stdlib.h>
35 #include <stdint.h>
36 #include <string.h>
37 
38 #if defined(USE_GZIP) && USE_GZIP
39 #include <zlib.h>
40 #endif
41 
42 #include "osmparser.h"
43 #include "tagging.h"
44 #include "logging.h"
45 
46 
47 /* Inside a BlobHeader message */
48 
49 #define PBF_VAL_BLOBHEADER_TYPE    1
50 #define PBF_VAL_BLOBHEADER_SIZE    3
51 
52 /* Inside a Blob message */
53 
54 #define PBF_VAL_BLOB_RAW_DATA      1
55 #define PBF_VAL_BLOB_RAW_SIZE      2
56 #define PBF_VAL_BLOB_ZLIB_DATA     3
57 
58 /* Inside a HeaderBlock message */
59 
60 #define PBF_VAL_REQUIRED_FEATURES  4
61 #define PBF_VAL_OPTIONAL_FEATURES  5
62 
63 /* Inside a PrimitiveBlock message */
64 
65 #define PBF_VAL_STRING_TABLE       1
66 #define PBF_VAL_PRIMITIVE_GROUP    2
67 #define PBF_VAL_GRANULARITY       17
68 #define PBF_VAL_LAT_OFFSET        19
69 #define PBF_VAL_LON_OFFSET        20
70 
71 /* Inside a PrimitiveGroup message */
72 
73 #define PBF_VAL_NODES              1
74 #define PBF_VAL_DENSE_NODES        2
75 #define PBF_VAL_WAYS               3
76 #define PBF_VAL_RELATIONS          4
77 
78 /* Inside a StringTable message */
79 
80 #define PBF_VAL_STRING             1
81 
82 /* Inside a Node message */
83 
84 #define PBF_VAL_NODE_ID            1
85 #define PBF_VAL_NODE_KEYS          2
86 #define PBF_VAL_NODE_VALS          3
87 #define PBF_VAL_NODE_LAT           8
88 #define PBF_VAL_NODE_LON           9
89 
90 /* Inside a DenseNode message */
91 
92 #define PBF_VAL_DENSE_NODE_ID         1
93 #define PBF_VAL_DENSE_NODE_LAT        8
94 #define PBF_VAL_DENSE_NODE_LON        9
95 #define PBF_VAL_DENSE_NODE_KEYS_VALS 10
96 
97 /* Inside a Way message */
98 
99 #define PBF_VAL_WAY_ID             1
100 #define PBF_VAL_WAY_KEYS           2
101 #define PBF_VAL_WAY_VALS           3
102 #define PBF_VAL_WAY_REFS           8
103 
104 /* Inside a Relation message */
105 
106 #define PBF_VAL_RELATION_ID        1
107 #define PBF_VAL_RELATION_KEYS      2
108 #define PBF_VAL_RELATION_VALS      3
109 #define PBF_VAL_RELATION_ROLES     8
110 #define PBF_VAL_RELATION_MEMIDS    9
111 #define PBF_VAL_RELATION_TYPES    10
112 
113 /* Errors */
114 
115 #define PBF_EOF                     0
116 
117 #define PBF_ERROR_UNEXP_EOF       100
118 #define PBF_ERROR_BLOB_HEADER_LEN 101
119 #define PBF_ERROR_BLOB_LEN        102
120 #define PBF_ERROR_NOT_OSM         103
121 #define PBF_ERROR_BLOB_BOTH       104
122 #define PBF_ERROR_BLOB_NEITHER    105
123 #define PBF_ERROR_NO_GZIP         106
124 #define PBF_ERROR_GZIP_INIT       107
125 #define PBF_ERROR_GZIP_INFLATE    108
126 #define PBF_ERROR_GZIP_WRONG_LEN  109
127 #define PBF_ERROR_GZIP_END        110
128 #define PBF_ERROR_UNSUPPORTED     111
129 #define PBF_ERROR_TOO_MANY_GROUPS 112
130 
131 
132 /* Local parsing variables (re-initialised for each file) */
133 
134 static uint64_t byteno;
135 static uint64_t nnodes,nways,nrelations;
136 
137 static uint32_t buffer_allocated,zbuffer_allocated;
138 static unsigned char *buffer=NULL,*zbuffer=NULL;
139 static unsigned char *buffer_ptr,*buffer_end;
140 
141 static int string_table_length=0,string_table_allocated=0;
142 static unsigned char **string_table=NULL;
143 static uint32_t *string_table_string_lengths=NULL;
144 
145 static int32_t granularity=100;
146 static int64_t lat_offset=0,lon_offset=0;
147 
148 #define LENGTH_32M (32*1024*1024)
149 
150 
151 /*++++++++++++++++++++++++++++++++++++++
152   Refill the data buffer and set the pointers.
153 
154   int buffer_refill Return 0 if everything is OK or 1 for EOF.
155 
156   int fd The file descriptor to read from.
157 
158   uint32_t bytes The number of bytes to read.
159   ++++++++++++++++++++++++++++++++++++++*/
160 
buffer_refill(int fd,uint32_t bytes)161 static inline int buffer_refill(int fd,uint32_t bytes)
162 {
163  ssize_t n;
164 
165  if(bytes>buffer_allocated)
166     buffer=(unsigned char *)realloc(buffer,buffer_allocated=bytes);
167 
168  byteno+=bytes;
169 
170  buffer_ptr=buffer;
171  buffer_end=buffer;
172 
173  do
174    {
175     n=read(fd,buffer_end,bytes);
176 
177     if(n<=0)
178        return(1);
179 
180     buffer_end+=n;
181     bytes-=n;
182    }
183  while(bytes>0);
184 
185  return(0);
186 }
187 
188 #if defined(USE_GZIP) && USE_GZIP
189 static int uncompress_pbf(unsigned char *data,uint32_t compressed,uint32_t uncompressed);
190 #endif /* USE_GZIP */
191 
192 static void process_string_table(unsigned char *data,uint32_t length);
193 static void process_primitive_group(unsigned char *data,uint32_t length);
194 static void process_nodes(unsigned char *data,uint32_t length);
195 static void process_dense_nodes(unsigned char *data,uint32_t length);
196 static void process_ways(unsigned char *data,uint32_t length);
197 static void process_relations(unsigned char *data,uint32_t length);
198 
199 
200 /* Macros to simplify the parser (and make it look more like the XML parser) */
201 
202 #define BEGIN(xx)            do{ state=(xx); goto finish_parsing; } while(0)
203 
204 #define BUFFER_CHARS_EOF(xx) do{ if(buffer_refill(fd,(xx))) BEGIN(PBF_EOF); } while(0)
205 
206 #define BUFFER_CHARS(xx)     do{ if(buffer_refill(fd,(xx))) BEGIN(PBF_ERROR_UNEXP_EOF); } while(0)
207 
208 
209 /* PBF decoding */
210 
211 #define PBF_FIELD(xx)   (int)(((xx)&0xFFF8)>>3)
212 #define PBF_TYPE(xx)    (int)((xx)&0x0007)
213 
214 #define PBF_LATITUDE(xx)  (double)(1E-9*(granularity*(xx)+lat_offset))
215 #define PBF_LONGITUDE(xx) (double)(1E-9*(granularity*(xx)+lon_offset))
216 
217 
218 /*++++++++++++++++++++++++++++++++++++++
219   Parse a PBF int32 data value.
220 
221   uint32_t pbf_int32 Returns the integer value.
222 
223   unsigned char **ptr The pointer to read the data from.
224   ++++++++++++++++++++++++++++++++++++++*/
225 
pbf_int32(unsigned char ** ptr)226 static inline uint32_t pbf_int32(unsigned char **ptr)
227 {
228  uint32_t result=(**ptr)&0x7F;
229 
230  if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<7;
231  if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<14;
232  if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<21;
233  if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<28;
234 
235  (*ptr)++;
236 
237  return(result);
238 }
239 
240 
241 /*++++++++++++++++++++++++++++++++++++++
242   Parse a PBF int64 data value.
243 
244   int64_t pbf_int64 Returns the integer value.
245 
246   unsigned char **ptr The pointer to read the data from.
247   ++++++++++++++++++++++++++++++++++++++*/
248 
pbf_int64(unsigned char ** ptr)249 static inline int64_t pbf_int64(unsigned char **ptr)
250 {
251  uint64_t result=(**ptr)&0x7F;
252 
253  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<7;
254  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<14;
255  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<21;
256  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<28;
257  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<35;
258  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<42;
259  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<49;
260  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<56;
261  if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<63;
262 
263  (*ptr)++;
264 
265  return(result);
266 }
267 
268 
269 /*++++++++++++++++++++++++++++++++++++++
270   Parse a PBF sint64 data value.
271 
272   int64_t pbf_sint64 Returns the integer value.
273 
274   unsigned char **ptr The pointer to read the data from.
275   ++++++++++++++++++++++++++++++++++++++*/
276 
pbf_sint64(unsigned char ** ptr)277 static inline int64_t pbf_sint64(unsigned char **ptr)
278 {
279  int64_t result=((**ptr)&0x7E)>>1;
280  int sign=(**ptr)&0x01;
281 
282  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<6;
283  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<13;
284  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<20;
285  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<27;
286  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<34;
287  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<41;
288  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<48;
289  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<55;
290  if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<62;
291 
292  (*ptr)++;
293 
294  if(sign)
295     result=-result-1;
296 
297  return(result);
298 }
299 
300 
301 /*++++++++++++++++++++++++++++++++++++++
302   Parse a PBF length delimited data value.
303 
304   unsigned char *pbf_length_delimited Returns a pointer to the start of the data.
305 
306   unsigned char **ptr The pointer to read the data from.
307 
308   uint32_t *length Returns the length of the data.
309   ++++++++++++++++++++++++++++++++++++++*/
310 
pbf_length_delimited(unsigned char ** ptr,uint32_t * length)311 static inline unsigned char *pbf_length_delimited(unsigned char **ptr,uint32_t *length)
312 {
313  uint32_t len=pbf_int32(ptr);
314 
315  if(length)
316     *length=len;
317 
318  *ptr+=len;
319 
320  return(*ptr-len);
321 }
322 
323 
324 /*++++++++++++++++++++++++++++++++++++++
325   Skip any pbf field from a message.
326 
327   unsigned char **ptr The pointer to read the data from.
328 
329   int type The type of the data.
330   ++++++++++++++++++++++++++++++++++++++*/
331 
pbf_skip(unsigned char ** ptr,int type)332 static inline void pbf_skip(unsigned char **ptr,int type)
333 {
334  uint32_t length;
335 
336  switch(type)
337    {
338    case 0: /* varint */
339     while((**ptr)&0x80) (*ptr)++;
340     (*ptr)++;
341     break;
342    case 1: /* 64-bit */
343     *ptr+=8;
344     break;
345    case 2: /* length delimited */
346     length=pbf_int32(ptr);
347     *ptr+=length;
348     break;
349    case 3: /* deprecated */
350     break;
351    case 4: /* deprecated */
352     break;
353    case 5: /* 32-bit */
354     *ptr+=4;
355     break;
356    }
357 }
358 
359 
360 /*++++++++++++++++++++++++++++++++++++++
361   Parse the PBF and call the functions for each OSM item as seen.
362 
363   int ParsePBF Returns 0 if OK or something else in case of an error.
364 
365   int fd The file descriptor of the file to parse.
366   ++++++++++++++++++++++++++++++++++++++*/
367 
ParsePBF(int fd)368 static int ParsePBF(int fd)
369 {
370  int state;
371  unsigned char *error=NULL;
372 
373  /* Print the initial message */
374 
375  printf_first("Reading: Bytes=0 Nodes=0 Ways=0 Relations=0");
376 
377  /* The actual parser. */
378 
379  byteno=0;
380 
381  nnodes=0,nways=0,nrelations=0;
382 
383  string_table_allocated=16384;
384  string_table_length=0;
385  string_table=(unsigned char **)malloc(string_table_allocated*sizeof(unsigned char *));
386  string_table_string_lengths=(uint32_t *)malloc(string_table_allocated*sizeof(uint32_t));
387 
388  zbuffer_allocated=0;
389  zbuffer=NULL;
390 
391  buffer_allocated=65536;
392  buffer=(unsigned char*)malloc(buffer_allocated);
393 
394  buffer_ptr=buffer_end=buffer;
395 
396  while(1)
397    {
398     int32_t blob_header_length=0;
399     int osm_data=0,osm_header=0;
400     int32_t blob_length=0;
401     uint32_t raw_size=0,compressed_size=0,uncompressed_size=0;
402     unsigned char *raw_data=NULL,*zlib_data=NULL;
403     uint32_t length;
404     unsigned char *data;
405 
406     /* ================ Parsing states ================ */
407 
408 
409     BUFFER_CHARS_EOF(4);
410 
411     blob_header_length=(256*(256*(256*(int)buffer_ptr[0])+(int)buffer_ptr[1])+(int)buffer_ptr[2])+buffer_ptr[3];
412     buffer_ptr+=4;
413 
414     if(blob_header_length==0 || blob_header_length>LENGTH_32M)
415        BEGIN(PBF_ERROR_BLOB_HEADER_LEN);
416 
417 
418     BUFFER_CHARS(blob_header_length);
419 
420     osm_header=0;
421     osm_data=0;
422 
423     while(buffer_ptr<buffer_end)
424       {
425        int fieldtype=pbf_int32(&buffer_ptr);
426        int field=PBF_FIELD(fieldtype);
427 
428        switch(field)
429          {
430          case PBF_VAL_BLOBHEADER_TYPE: /* string */
431           {
432            uint32_t length=0;
433            unsigned char *type=NULL;
434 
435            type=pbf_length_delimited(&buffer_ptr,&length);
436 
437            if(length==9 && !strncmp((char*)type,"OSMHeader",9))
438               osm_header=1;
439 
440            if(length==7 && !strncmp((char*)type,"OSMData",7))
441               osm_data=1;
442           }
443           break;
444 
445          case PBF_VAL_BLOBHEADER_SIZE: /* int32 */
446           blob_length=pbf_int32(&buffer_ptr);
447           break;
448 
449          default:
450           pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
451          }
452       }
453 
454     if(blob_length==0 || blob_length>LENGTH_32M)
455        BEGIN(PBF_ERROR_BLOB_LEN);
456 
457     if(!osm_data && !osm_header)
458        BEGIN(PBF_ERROR_NOT_OSM);
459 
460 
461     BUFFER_CHARS(blob_length);
462 
463     while(buffer_ptr<buffer_end)
464       {
465        int fieldtype=pbf_int32(&buffer_ptr);
466        int field=PBF_FIELD(fieldtype);
467 
468        switch(field)
469          {
470          case PBF_VAL_BLOB_RAW_DATA: /* bytes */
471           raw_data=pbf_length_delimited(&buffer_ptr,&raw_size);
472           break;
473 
474          case PBF_VAL_BLOB_RAW_SIZE: /* int32 */
475           uncompressed_size=pbf_int32(&buffer_ptr);
476           break;
477 
478          case PBF_VAL_BLOB_ZLIB_DATA: /* bytes */
479           zlib_data=pbf_length_delimited(&buffer_ptr,&compressed_size);
480           break;
481 
482          default:
483           pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
484          }
485       }
486 
487     if(raw_data && zlib_data)
488        BEGIN(PBF_ERROR_BLOB_BOTH);
489 
490     if(!raw_data && !zlib_data)
491        BEGIN(PBF_ERROR_BLOB_NEITHER);
492 
493     if(zlib_data)
494       {
495 #if defined(USE_GZIP) && USE_GZIP
496        int newstate=uncompress_pbf(zlib_data,compressed_size,uncompressed_size);
497 
498        if(newstate)
499           BEGIN(newstate);
500 #else
501        BEGIN(PBF_ERROR_NO_GZIP);
502 #endif
503       }
504     else
505       {
506        buffer_ptr=raw_data;
507        buffer_end=raw_data+raw_size;
508       }
509 
510 
511     if(osm_header)
512       {
513        while(buffer_ptr<buffer_end)
514          {
515           int fieldtype=pbf_int32(&buffer_ptr);
516           int field=PBF_FIELD(fieldtype);
517 
518           switch(field)
519             {
520             case PBF_VAL_REQUIRED_FEATURES: /* string */
521              {
522               uint32_t length=0;
523               unsigned char *feature=NULL;
524 
525               feature=pbf_length_delimited(&buffer_ptr,&length);
526 
527               if(strncmp((char*)feature,"OsmSchema-V0.6",14) &&
528                  strncmp((char*)feature,"DenseNodes",10))
529                 {
530                  feature[length]=0;
531                  error=feature;
532                  BEGIN(PBF_ERROR_UNSUPPORTED);
533                 }
534              }
535              break;
536 
537             case PBF_VAL_OPTIONAL_FEATURES: /* string */
538              pbf_length_delimited(&buffer_ptr,NULL);
539              break;
540 
541             default:
542              pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
543             }
544          }
545       }
546 
547 
548     if(osm_data)
549       {
550        unsigned char *primitive_group[8]={NULL};
551        uint32_t primitive_group_length[8]={0};
552        uint32_t nprimitive_groups=0,i;
553 
554        granularity=100;
555        lat_offset=lon_offset=0;
556 
557        while(buffer_ptr<buffer_end)
558          {
559           int fieldtype=pbf_int32(&buffer_ptr);
560           int field=PBF_FIELD(fieldtype);
561 
562           switch(field)
563             {
564             case PBF_VAL_STRING_TABLE: /* bytes */
565              data=pbf_length_delimited(&buffer_ptr,&length);
566              process_string_table(data,length);
567              break;
568 
569             case PBF_VAL_PRIMITIVE_GROUP: /* bytes */
570              primitive_group[nprimitive_groups]=pbf_length_delimited(&buffer_ptr,&primitive_group_length[nprimitive_groups]);
571 
572              if(++nprimitive_groups>(sizeof(primitive_group)/sizeof(primitive_group[0])))
573                 BEGIN(PBF_ERROR_TOO_MANY_GROUPS);
574              break;
575 
576             case PBF_VAL_GRANULARITY: /* int32 */
577              granularity=pbf_int32(&buffer_ptr);
578              break;
579 
580             case PBF_VAL_LAT_OFFSET: /* int64 */
581              lat_offset=pbf_int64(&buffer_ptr);
582              break;
583 
584             case PBF_VAL_LON_OFFSET: /* int64 */
585              lon_offset=pbf_int64(&buffer_ptr);
586              break;
587 
588             default:
589              pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
590             }
591          }
592 
593        if(nprimitive_groups)
594           for(i=0;i<nprimitive_groups;i++)
595              process_primitive_group(primitive_group[i],primitive_group_length[i]);
596       }
597    }
598 
599 
600  finish_parsing:
601 
602  switch(state)
603    {
604     /* End of file */
605 
606    case PBF_EOF:
607     break;
608 
609 
610     /* ================ Error states ================ */
611 
612 
613    case PBF_ERROR_UNEXP_EOF:
614     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": unexpected end of file seen.\n",byteno);
615     break;
616 
617    case PBF_ERROR_BLOB_HEADER_LEN:
618     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": BlobHeader length is wrong (0<x<=32M).\n",byteno);
619     break;
620 
621    case PBF_ERROR_BLOB_LEN:
622     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob length is wrong (0<x<=32M).\n",byteno);
623     break;
624 
625    case PBF_ERROR_NOT_OSM:
626     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": BlobHeader is neither 'OSMData' or 'OSMHeader'.\n",byteno);
627     break;
628 
629    case PBF_ERROR_BLOB_BOTH:
630     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob has both zlib compressed and raw uncompressed data.\n",byteno);
631     break;
632 
633    case PBF_ERROR_BLOB_NEITHER:
634     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob has neither zlib compressed or raw uncompressed data.\n",byteno);
635     break;
636 
637    case PBF_ERROR_NO_GZIP:
638     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob is compressed but no gzip support is available.\n",byteno);
639     break;
640 
641    case PBF_ERROR_GZIP_INIT:
642     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob is compressed but failed to initialise decompression.\n",byteno);
643     break;
644 
645    case PBF_ERROR_GZIP_INFLATE:
646     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob is compressed but failed to uncompress it.\n",byteno);
647     break;
648 
649    case PBF_ERROR_GZIP_WRONG_LEN:
650     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob is compressed and wrong size when uncompressed.\n",byteno);
651     break;
652 
653    case PBF_ERROR_GZIP_END:
654     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Blob is compressed but failed to finalise decompression.\n",byteno);
655     break;
656 
657    case PBF_ERROR_UNSUPPORTED:
658     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": Unsupported required feature '%s'.\n",byteno,error);
659     break;
660 
661    case PBF_ERROR_TOO_MANY_GROUPS:
662     fprintf(stderr,"PBF Parser: Error at byte %"PRIu64": OsmData message contains too many PrimitiveGroup messages.\n",byteno);
663     break;
664    }
665 
666  /* Free the parser variables */
667 
668  free(string_table);
669  free(string_table_string_lengths);
670 
671  free(buffer);
672  if(zbuffer)
673     free(zbuffer);
674 
675  /* Print the final message */
676 
677  printf_last("Read: Bytes=%"PRIu64" Nodes=%"PRIu64" Ways=%"PRIu64" Relations=%"PRIu64,byteno,nnodes,nways,nrelations);
678 
679  return(state);
680 }
681 
682 
683 /*++++++++++++++++++++++++++++++++++++++
684   Process a PBF StringTable message.
685 
686   unsigned char *data The data to process.
687 
688   uint32_t length The length of the data.
689   ++++++++++++++++++++++++++++++++++++++*/
690 
process_string_table(unsigned char * data,uint32_t length)691 static void process_string_table(unsigned char *data,uint32_t length)
692 {
693  unsigned char *end=data+length;
694  unsigned char *string;
695  uint32_t string_length;
696 
697  string_table_length=0;
698 
699  while(data<end)
700    {
701     int fieldtype=pbf_int32(&data);
702     int field=PBF_FIELD(fieldtype);
703 
704     switch(field)
705       {
706       case PBF_VAL_STRING:      /* string */
707        string=pbf_length_delimited(&data,&string_length);
708 
709        if(string_table_length==string_table_allocated)
710          {
711           string_table_allocated+=8192;
712           string_table=(unsigned char **)realloc(string_table,string_table_allocated*sizeof(unsigned char *));
713           string_table_string_lengths=(uint32_t *)realloc(string_table_string_lengths,string_table_allocated*sizeof(uint32_t));
714          }
715 
716        string_table[string_table_length]=string;
717        string_table_string_lengths[string_table_length]=string_length;
718 
719        string_table_length++;
720        break;
721 
722       default:
723        pbf_skip(&data,PBF_TYPE(fieldtype));
724       }
725    }
726 }
727 
728 
729 /*++++++++++++++++++++++++++++++++++++++
730   Process a PBF PrimitiveGroup message.
731 
732   unsigned char *data The data to process.
733 
734   uint32_t length The length of the data.
735   ++++++++++++++++++++++++++++++++++++++*/
736 
process_primitive_group(unsigned char * data,uint32_t length)737 static void process_primitive_group(unsigned char *data,uint32_t length)
738 {
739  unsigned char *end=data+length;
740  unsigned char *subdata;
741  uint32_t sublength;
742  int i;
743 
744  /* Fixup the strings (not null terminated in buffer) */
745 
746  for(i=0;i<string_table_length;i++)
747     string_table[i][string_table_string_lengths[i]]=0;
748 
749 
750  while(data<end)
751    {
752     int fieldtype=pbf_int32(&data);
753     int field=PBF_FIELD(fieldtype);
754 
755     switch(field)
756       {
757       case PBF_VAL_NODES:       /* message */
758        subdata=pbf_length_delimited(&data,&sublength);
759        process_nodes(subdata,sublength);
760        break;
761 
762       case PBF_VAL_DENSE_NODES: /* message */
763        subdata=pbf_length_delimited(&data,&sublength);
764        process_dense_nodes(subdata,sublength);
765        break;
766 
767       case PBF_VAL_WAYS:        /* message */
768        subdata=pbf_length_delimited(&data,&sublength);
769        process_ways(subdata,sublength);
770        break;
771 
772       case PBF_VAL_RELATIONS:   /* message */
773        subdata=pbf_length_delimited(&data,&sublength);
774        process_relations(subdata,sublength);
775        break;
776 
777       default:
778        pbf_skip(&data,PBF_TYPE(fieldtype));
779       }
780    }
781 }
782 
783 
784 /*++++++++++++++++++++++++++++++++++++++
785   Process a PBF Node message.
786 
787   unsigned char *data The data to process.
788 
789   uint32_t length The length of the data.
790   ++++++++++++++++++++++++++++++++++++++*/
791 
process_nodes(unsigned char * data,uint32_t length)792 static void process_nodes(unsigned char *data,uint32_t length)
793 {
794  unsigned char *end=data+length;
795  int64_t id=0;
796  unsigned char *keys=NULL,*vals=NULL;
797  unsigned char *keys_end=NULL,*vals_end=NULL;
798  uint32_t keylen=0,vallen=0;
799  int64_t lat=0,lon=0;
800  TagList *tags=NULL,*result=NULL;
801 
802  while(data<end)
803    {
804     int fieldtype=pbf_int32(&data);
805     int field=PBF_FIELD(fieldtype);
806 
807     switch(field)
808       {
809       case PBF_VAL_NODE_ID:     /* sint64 */
810        id=pbf_sint64(&data);
811        break;
812 
813       case PBF_VAL_NODE_KEYS:   /* packed int32 */
814        keys=pbf_length_delimited(&data,&keylen);
815        keys_end=keys+keylen;
816        break;
817 
818       case PBF_VAL_NODE_VALS:   /* packed int32 */
819        vals=pbf_length_delimited(&data,&vallen);
820        vals_end=vals+vallen;
821        break;
822 
823       case PBF_VAL_NODE_LAT:    /* sint64 */
824        lat=pbf_sint64(&data);
825        break;
826 
827       case PBF_VAL_NODE_LON:    /* sint64 */
828        lon=pbf_sint64(&data);
829        break;
830 
831       default:
832        pbf_skip(&data,PBF_TYPE(fieldtype));
833       }
834    }
835 
836  /* Mangle the data and send it to the OSM parser */
837 
838  nnodes++;
839 
840  if(!(nnodes%10000))
841     printf_middle("Reading: Bytes=%"PRIu64" Nodes=%"PRIu64" Ways=%"PRIu64" Relations=%"PRIu64,byteno,nnodes,nways,nrelations);
842 
843  tags=NewTagList();
844 
845  if(keys && vals)
846    {
847     while(keys<keys_end && vals<vals_end)
848       {
849        uint32_t key=pbf_int32(&keys);
850        uint32_t val=pbf_int32(&vals);
851 
852        AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
853       }
854    }
855 
856  result=ApplyNodeTaggingRules(tags,id);
857 
858  ProcessNodeTags(result,id,PBF_LATITUDE(lat),PBF_LONGITUDE(lon),MODE_NORMAL);
859 
860  DeleteTagList(tags);
861  DeleteTagList(result);
862 }
863 
864 
865 /*++++++++++++++++++++++++++++++++++++++
866   Process a PBF DenseNode message.
867 
868   unsigned char *data The data to process.
869 
870   uint32_t length The length of the data.
871   ++++++++++++++++++++++++++++++++++++++*/
872 
process_dense_nodes(unsigned char * data,uint32_t length)873 static void process_dense_nodes(unsigned char *data,uint32_t length)
874 {
875  unsigned char *end=data+length;
876  unsigned char *ids=NULL,*keys_vals=NULL,*lats=NULL,*lons=NULL;
877  unsigned char *ids_end=NULL;
878  uint32_t idlen=0;
879  int64_t id=0;
880  int64_t lat=0,lon=0;
881  TagList *tags=NULL,*result;
882 
883  while(data<end)
884    {
885     int fieldtype=pbf_int32(&data);
886     int field=PBF_FIELD(fieldtype);
887 
888     switch(field)
889       {
890       case PBF_VAL_DENSE_NODE_ID: /* packed sint64 */
891        ids=pbf_length_delimited(&data,&idlen);
892        ids_end=ids+idlen;
893        break;
894 
895       case PBF_VAL_DENSE_NODE_LAT: /* packed sint64 */
896        lats=pbf_length_delimited(&data,NULL);
897        break;
898 
899       case PBF_VAL_DENSE_NODE_LON: /* packed sint64 */
900        lons=pbf_length_delimited(&data,NULL);
901        break;
902 
903       case PBF_VAL_DENSE_NODE_KEYS_VALS: /* packed int32 */
904        keys_vals=pbf_length_delimited(&data,NULL);
905        break;
906 
907       default:
908        pbf_skip(&data,PBF_TYPE(fieldtype));
909       }
910    }
911 
912  while(ids<ids_end)
913    {
914     int64_t delta_id;
915     int64_t delta_lat,delta_lon;
916 
917     delta_id=pbf_sint64(&ids);
918     delta_lat=pbf_sint64(&lats);
919     delta_lon=pbf_sint64(&lons);
920 
921     id+=delta_id;
922     lat+=delta_lat;
923     lon+=delta_lon;
924 
925     /* Mangle the data and send it to the OSM parser */
926 
927     nnodes++;
928 
929     if(!(nnodes%10000))
930        printf_middle("Reading: Bytes=%"PRIu64" Nodes=%"PRIu64" Ways=%"PRIu64" Relations=%"PRIu64,byteno,nnodes,nways,nrelations);
931 
932     tags=NewTagList();
933 
934     if(keys_vals)
935       {
936        while(1)
937          {
938           uint32_t key=pbf_int32(&keys_vals),val;
939 
940           if(key==0)
941              break;
942 
943           val=pbf_int32(&keys_vals);
944 
945           AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
946          }
947       }
948 
949     result=ApplyNodeTaggingRules(tags,id);
950 
951     ProcessNodeTags(result,id,PBF_LATITUDE(lat),PBF_LONGITUDE(lon),MODE_NORMAL);
952 
953     DeleteTagList(tags);
954     DeleteTagList(result);
955    }
956 }
957 
958 
959 /*++++++++++++++++++++++++++++++++++++++
960   Process a PBF Way message.
961 
962   unsigned char *data The data to process.
963 
964   uint32_t length The length of the data.
965   ++++++++++++++++++++++++++++++++++++++*/
966 
process_ways(unsigned char * data,uint32_t length)967 static void process_ways(unsigned char *data,uint32_t length)
968 {
969  unsigned char *end=data+length;
970  int64_t id=0;
971  unsigned char *keys=NULL,*vals=NULL,*refs=NULL;
972  unsigned char *keys_end=NULL,*vals_end=NULL,*refs_end=NULL;
973  uint32_t keylen=0,vallen=0,reflen=0;
974  int64_t ref=0;
975  TagList *tags=NULL,*result;
976 
977  while(data<end)
978    {
979     int fieldtype=pbf_int32(&data);
980     int field=PBF_FIELD(fieldtype);
981 
982     switch(field)
983       {
984       case PBF_VAL_WAY_ID:      /* int64 */
985        id=pbf_int64(&data);
986        break;
987 
988       case PBF_VAL_WAY_KEYS:    /* packed int32 */
989        keys=pbf_length_delimited(&data,&keylen);
990        keys_end=keys+keylen;
991        break;
992 
993       case PBF_VAL_WAY_VALS:    /* packed int32 */
994        vals=pbf_length_delimited(&data,&vallen);
995        vals_end=vals+vallen;
996        break;
997 
998       case PBF_VAL_WAY_REFS:    /* packed sint64 */
999        refs=pbf_length_delimited(&data,&reflen);
1000        refs_end=refs+reflen;
1001        break;
1002 
1003       default:
1004        pbf_skip(&data,PBF_TYPE(fieldtype));
1005       }
1006    }
1007 
1008  /* Mangle the data and send it to the OSM parser */
1009 
1010  nways++;
1011 
1012  if(!(nways%1000))
1013     printf_middle("Reading: Bytes=%"PRIu64" Nodes=%"PRIu64" Ways=%"PRIu64" Relations=%"PRIu64,byteno,nnodes,nways,nrelations);
1014 
1015  tags=NewTagList();
1016 
1017  if(keys && vals)
1018    {
1019     while(keys<keys_end && vals<vals_end)
1020       {
1021        uint32_t key=pbf_int32(&keys);
1022        uint32_t val=pbf_int32(&vals);
1023 
1024        AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
1025       }
1026    }
1027 
1028  AddWayRefs(0);
1029 
1030  if(refs)
1031     while(refs<refs_end)
1032       {
1033        int64_t delta_ref;
1034 
1035        delta_ref=pbf_sint64(&refs);
1036 
1037        ref+=delta_ref;
1038 
1039        if(ref==0)
1040           break;
1041 
1042        AddWayRefs(ref);
1043       }
1044 
1045  result=ApplyWayTaggingRules(tags,id);
1046 
1047  ProcessWayTags(result,id,MODE_NORMAL);
1048 
1049  DeleteTagList(tags);
1050  DeleteTagList(result);
1051 }
1052 
1053 
1054 /*++++++++++++++++++++++++++++++++++++++
1055   Process a PBF Relation message.
1056 
1057   unsigned char *data The data to process.
1058 
1059   uint32_t length The length of the data.
1060   ++++++++++++++++++++++++++++++++++++++*/
1061 
process_relations(unsigned char * data,uint32_t length)1062 static void process_relations(unsigned char *data,uint32_t length)
1063 {
1064  unsigned char *end=data+length;
1065  int64_t id=0;
1066  unsigned char *keys=NULL,*vals=NULL,*roles=NULL,*memids=NULL,*types=NULL;
1067  unsigned char *keys_end=NULL,*vals_end=NULL,*memids_end=NULL,*types_end=NULL;
1068  uint32_t keylen=0,vallen=0,rolelen=0,memidlen=0,typelen=0;
1069  int64_t memid=0;
1070  TagList *tags=NULL,*result;
1071 
1072  while(data<end)
1073    {
1074     int fieldtype=pbf_int32(&data);
1075     int field=PBF_FIELD(fieldtype);
1076 
1077     switch(field)
1078       {
1079       case PBF_VAL_RELATION_ID: /* int64 */
1080        id=pbf_int64(&data);
1081        break;
1082 
1083       case PBF_VAL_RELATION_KEYS: /* packed string */
1084        keys=pbf_length_delimited(&data,&keylen);
1085        keys_end=keys+keylen;
1086        break;
1087 
1088       case PBF_VAL_RELATION_VALS: /* packed string */
1089        vals=pbf_length_delimited(&data,&vallen);
1090        vals_end=vals+vallen;
1091        break;
1092 
1093       case PBF_VAL_RELATION_ROLES: /* packed int32 */
1094        roles=pbf_length_delimited(&data,&rolelen);
1095        break;
1096 
1097       case PBF_VAL_RELATION_MEMIDS: /* packed sint64 */
1098        memids=pbf_length_delimited(&data,&memidlen);
1099        memids_end=memids+memidlen;
1100        break;
1101 
1102       case PBF_VAL_RELATION_TYPES: /* packed enum */
1103        types=pbf_length_delimited(&data,&typelen);
1104        types_end=types+typelen;
1105        break;
1106 
1107       default:
1108        pbf_skip(&data,PBF_TYPE(fieldtype));
1109       }
1110    }
1111 
1112  /* Mangle the data and send it to the OSM parser */
1113 
1114  nrelations++;
1115 
1116  if(!(nrelations%1000))
1117     printf_middle("Reading: Bytes=%"PRIu64" Nodes=%"PRIu64" Ways=%"PRIu64" Relations=%"PRIu64,byteno,nnodes,nways,nrelations);
1118 
1119  AddRelationRefs(0,0,0,NULL);
1120 
1121  tags=NewTagList();
1122 
1123  if(keys && vals)
1124    {
1125     while(keys<keys_end && vals<vals_end)
1126       {
1127        uint32_t key=pbf_int32(&keys);
1128        uint32_t val=pbf_int32(&vals);
1129 
1130        AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
1131       }
1132    }
1133 
1134  if(memids && types)
1135     while(memids<memids_end && types<types_end)
1136       {
1137        int64_t delta_memid;
1138        unsigned char *role=NULL;
1139        int type;
1140 
1141        delta_memid=pbf_sint64(&memids);
1142        type=pbf_int32(&types);
1143 
1144        if(roles)
1145           role=string_table[pbf_int32(&roles)];
1146 
1147        memid+=delta_memid;
1148 
1149        if(type==0)
1150           AddRelationRefs(memid,0,0,(char*)role);
1151        else if(type==1)
1152           AddRelationRefs(0,memid,0,(char*)role);
1153        else if(type==2)
1154           AddRelationRefs(0,0,memid,(char*)role);
1155       }
1156 
1157  result=ApplyRelationTaggingRules(tags,id);
1158 
1159  ProcessRelationTags(result,id,MODE_NORMAL);
1160 
1161  DeleteTagList(tags);
1162  DeleteTagList(result);
1163 }
1164 
1165 
1166 #if defined(USE_GZIP) && USE_GZIP
1167 
1168 /*++++++++++++++++++++++++++++++++++++++
1169   Uncompress the part of the PBF data that is compressed.
1170 
1171   int uncompress_pbf Returns the error state or 0 if OK.
1172 
1173   unsigned char *data The data to uncompress.
1174 
1175   uint32_t compressed The number of bytes to uncompress.
1176 
1177   uint32_t uncompressed The number of bytes expected when uncompressed.
1178   ++++++++++++++++++++++++++++++++++++++*/
1179 
uncompress_pbf(unsigned char * data,uint32_t compressed,uint32_t uncompressed)1180 static int uncompress_pbf(unsigned char *data,uint32_t compressed,uint32_t uncompressed)
1181 {
1182  z_stream z={0};
1183 
1184  if(uncompressed>zbuffer_allocated)
1185     zbuffer=(unsigned char *)realloc(zbuffer,zbuffer_allocated=uncompressed);
1186 
1187  if(inflateInit2(&z,15+32)!=Z_OK)
1188     return(PBF_ERROR_GZIP_INIT);
1189 
1190  z.next_in=data;
1191  z.avail_in=compressed;
1192 
1193  z.next_out=zbuffer;
1194  z.avail_out=uncompressed;
1195 
1196  if(inflate(&z,Z_FINISH)!=Z_STREAM_END)
1197     return(PBF_ERROR_GZIP_INFLATE);
1198 
1199  if(z.avail_out!=0)
1200     return(PBF_ERROR_GZIP_WRONG_LEN);
1201 
1202  if(inflateEnd(&z)!=Z_OK)
1203     return(PBF_ERROR_GZIP_END);
1204 
1205  buffer_ptr=zbuffer;
1206  buffer_end=zbuffer+uncompressed;
1207 
1208  return(0);
1209 }
1210 
1211 #endif /* USE_GZIP */
1212 
1213 
1214 /*++++++++++++++++++++++++++++++++++++++
1215   Parse a PBF format OSM file (from planet download).
1216 
1217   int ParsePBFFile Returns 0 if OK or something else in case of an error.
1218 
1219   int fd The file descriptor of the file to read from.
1220 
1221   NodesX *OSMNodes The data structure of nodes to fill in.
1222 
1223   WaysX *OSMWays The data structure of ways to fill in.
1224 
1225   RelationsX *OSMRelations The data structure of relations to fill in.
1226   ++++++++++++++++++++++++++++++++++++++*/
1227 
ParsePBFFile(int fd,NodesX * OSMNodes,WaysX * OSMWays,RelationsX * OSMRelations)1228 int ParsePBFFile(int fd,NodesX *OSMNodes,WaysX *OSMWays,RelationsX *OSMRelations)
1229 {
1230  int retval;
1231 
1232  /* Initialise the parser */
1233 
1234  InitialiseParser(OSMNodes,OSMWays,OSMRelations);
1235 
1236  /* Parse the file */
1237 
1238  retval=ParsePBF(fd);
1239 
1240  /* Cleanup the parser */
1241 
1242  CleanupParser();
1243 
1244  return(retval);
1245 }
1246