1 
2 /* Copyright 2002 Daniel Egnor.  See LICENSE.geocoder file.
3  * Portions Copyright (C) 2000-2019 The Xastir Group
4  *
5  * The geo_find() function defined here uses an address map built by
6  * geo-*-to-* to parse an address and convert it to geographical
7  * coordinates.
8  */
9 
10 #ifdef HAVE_CONFIG_H
11   #include "config.h"
12 #endif  // HAVE_CONFIG_H
13 
14 #include "geo.h"
15 #include "io.h"
16 
17 #include <assert.h>
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdio.h>
21 
22 // Must be last include file
23 #include "leak_detection.h"
24 
25 
26 
27 #define D(x)
28 
29 
30 
31 struct state
32 {
33   struct io_file *index;
34 
35   /* Text input */
36   int input_depth;
37   const char *input_begin,*input_end;
38   char buffer[100],*buffer_next,*buffer_end;
39   const char *next;
40 
41   /* Working hypothesis */
42   int address;
43   struct
44   {
45     int begin,end;
46   } range[4];
47   int range_count;
48 
49   /* Answer */
50   struct geo_location *out;
51 };
52 
53 
54 
55 
56 
is_valid(struct state * s)57 static int is_valid(struct state *s)
58 {
59   int i,pos[sizeof(s->range)/sizeof(*s->range)];
60 
61   if (0 == s->range_count)
62   {
63     return 0;
64   }
65 
66   for (i = 0; i < s->range_count; ++i)
67   {
68     D(printf("    Metarange #%d: %d entries\n",i,(s->range[i].end - s->range[i].begin) / 8));
69     pos[i] = s->range[i].begin;
70   }
71 
72   for (;;)
73   {
74     int i_begin = 1,i_end = 1;
75     int best_begin = 1,best_end = 1;
76     char out_side = 'X';
77     int out_addr = 0,out_lat = 0,out_long = 0;
78 
79     D(printf("Searching ---\n"));
80     for (i = 0; i != i_begin; i = (i + 1) % s->range_count)
81     {
82       int r_begin,r_end;
83       for (;;)
84       {
85         int n;
86         if (pos[i] >= s->range[i].end || pos[i] < 0)
87         {
88           D(printf("    Range #%d @ %d: End\n",i,(pos[i] - s->range[i].begin) / 8));
89           return 0;
90         }
91         n = io_in_i4(s->index,
92                      io_in_i4(s->index,pos[i],&r_begin),&r_end);
93         if (n < 0)
94         {
95           return 0;
96         }
97         D(printf("    Range #%d @ %d: [%d,%d)\n",i,(pos[i] - s->range[i].begin) / 8,r_begin,r_end));
98         if (r_end > best_begin && r_begin < r_end)
99         {
100           break;
101         }
102         pos[i] = n;
103       }
104       if (r_begin > best_begin)
105       {
106         best_begin = r_begin;
107         best_end = r_end;
108         i_begin = i_end = i;
109         D(printf("        Best -> [%d#%d,%d#%d)\n",best_begin,i_begin,best_end,i_end));
110       }
111       else if (r_end < best_end)
112       {
113         best_end = r_end;
114         i_end = i;
115         D(printf("        Best -> [%d#%d,%d#%d)\n",best_begin,i_begin,best_end,i_end));
116       }
117     }
118 
119     D(printf("Address range ---\n"));
120     pos[i_end] += 8; /* hack... */
121     do
122     {
123       int next,lon,lat,addr;
124       best_begin = io_in_i4(s->index,best_begin,&next);
125       best_begin = io_in_i4(s->index,best_begin,&lon);
126       best_begin = io_in_i4(s->index,best_begin,&lat);
127       best_begin = io_in_i4(s->index,best_begin,&addr);
128 
129       /* PERF: This should be a binary search... */
130       while (best_begin >= 0 && best_begin < next)
131       {
132         short da;
133         signed char dln,dlt;
134         char side;
135         best_begin = io_in_i2(s->index,best_begin,&da);
136         best_begin = io_in_i1(s->index,best_begin,&dln);
137         best_begin = io_in_i1(s->index,best_begin,&dlt);
138         best_begin = io_in(s->index,best_begin,&side,1);
139         D(printf("    %c: %d vs. %d\n",side,addr + da,s->address / 2));
140         if (addr + da <= s->address / 2)
141         {
142           out_side = side;
143           out_addr = addr + da;
144           out_lat = lat + dlt;
145           out_long = lon + dln;
146         }
147         else if ('X' == out_side)
148         {
149           best_begin = next;
150           D(printf("    Not this time.\n"));
151           break;
152         }
153         else if (NULL != s->out)
154         {
155           const int parity = s->address % 2;
156 
157           s->out->before.address =
158             2 * out_addr + parity;
159           s->out->before.latitude =
160             out_lat / 100000.0;
161           s->out->before.longitude =
162             out_long / 100000.0;
163 
164           if ('X' == side)
165           {
166             --da;
167           }
168           s->out->after.address =
169             2 * (addr + da) + parity;
170           s->out->after.latitude =
171             (lat + dlt) / 100000.0;
172           s->out->after.longitude =
173             (lon + dln) / 100000.0;
174 
175           s->out->at.address = s->address;
176           s->out->at.latitude = (out_lat +
177                                  (lat + dlt - out_lat) *
178                                  (s->address / 2 - out_addr) /
179                                  (addr + da - out_addr)) /
180                                 100000.0;
181           s->out->at.longitude = (out_long +
182                                   (lon + dln - out_long) *
183                                   (s->address / 2 - out_addr) /
184                                   (addr + da - out_addr)) /
185                                  100000.0;
186 
187           s->out->side = out_side;
188           D(printf("    Success!\n"));
189           return 1;
190         }
191         else
192         {
193           D(printf("    Success...\n"));
194           return 1;
195         }
196       }
197     }
198     while (best_begin > 0 && best_begin < best_end);
199   }
200 }
201 
202 
203 
204 
205 
find_name(struct io_file * index,int begin,int end,char type,const char * name,unsigned int name_len)206 static int find_name(
207   struct io_file *index,int begin,int end,
208   char type,const char *name, unsigned int name_len)
209 {
210   const int size = 45;
211   const int count = (end - begin) / size;
212   const int mid = begin + size * (count / 2);
213   char test[41];
214   if (count <= 1)
215   {
216     return begin;
217   }
218   if (io_in(index,mid - size,test,sizeof test) < 0)
219   {
220     return -1;
221   }
222   if (name_len > sizeof test - 1)
223   {
224     name_len = sizeof test - 1;
225   }
226   if (type > test[0]
227       || (type == test[0] && strncasecmp(name,test + 1,name_len) > 0))
228   {
229     return find_name(index,mid,end,type,name,name_len);
230   }
231   else
232   {
233     return find_name(index,begin,mid,type,name,name_len);
234   }
235 }
236 
237 
238 
239 static const char *next_word(struct state *,const char *);
240 
241 
242 
get_name_at(struct state * s,char type,int (* f)(struct state *),const char * last)243 static int get_name_at( struct state *s,
244                         char type,
245                         int (*f)(struct state *),
246                         const char *last)
247 {
248 
249   char n[41];
250   const char *next,*save;
251   unsigned int len = last - s->next;
252   int begin,end,pos;
253 
254   if (io_in_i4(s->index,
255                io_in_i4(s->index,
256                         io_in_i4(s->index,0,NULL),&begin),&end) < 0
257       || (pos = find_name(s->index,begin,end,type,s->next,len)) < 0
258       ||  pos == end
259       || (pos = io_in_i4(s->index,io_in(s->index,pos,n,sizeof n),&begin)) < 0
260       ||  pos == end
261       || (pos = io_in_i4(s->index,io_in(s->index,pos,NULL,sizeof n),&end)) < 0
262       ||  n[0] != type
263       ||  strncasecmp(n + 1,s->next,len))
264   {
265     D(printf("    '%c' \"%.*s\" not found\n",type,len,s->next));
266     return 0;
267   }
268 
269   if ('=' == n[len + 1])   /* alias expansion */
270   {
271     char * const replace = (s->next - s->buffer) + s->buffer;
272     int delta;
273 
274     begin = len + 2;
275 
276     while (begin < (int)sizeof n && ' ' == n[begin])
277     {
278       ++begin;
279     }
280 
281     end = sizeof n;
282 
283     while (end > begin && ' ' == n[end - 1])
284     {
285       --end;
286     }
287 
288     if (end < (int)sizeof n)
289     {
290       ++end;
291     }
292 
293     if (end - begin > (int)len)
294     {
295       end = begin + len;
296     }
297 
298     D(printf("    Replacing '%.*s' with '%.*s'\n",len,n + 1,end - begin,&n[begin]));
299 
300     memcpy(replace,n + begin,end - begin);
301     memmove(replace + (end - begin),
302             replace + len,
303             s->buffer_end - replace - len);
304 
305     delta = len - (end - begin);
306     s->buffer_end -= delta;
307     s->buffer_next -= delta;
308     D(printf("    Buffer is now: '%.*s'\n",s->buffer_end - s->buffer,s->buffer));
309     return 1;
310   }
311 
312   next = next_word(s,last);
313   if (next != last && get_name_at(s,type,f,next))
314   {
315     return 1;
316   }
317 
318   pos = len;
319 
320   while (++pos < (int)sizeof n) if (' ' != n[pos])
321     {
322       return 0;
323     }
324 
325   s->range[s->range_count].begin = begin;
326   s->range[s->range_count].end = end;
327   ++s->range_count;
328   save = s->next;
329   s->next = last;
330   D(printf(">>> '%c' \"%.*s\" found\n",type,len - 1,n + 1));
331   if (NULL != f && f(s))
332   {
333     char *out = NULL;
334 
335     switch (type)
336     {
337       case 'E':
338       case 'O':
339         out = s->out ? s->out->street_name : NULL;
340         break;
341       case 'C':
342         out = s->out ? s->out->city_name : NULL;
343         break;
344       case 'S':
345         out = s->out ? s->out->state_name : NULL;
346         break;
347     }
348 
349     if (NULL != out)
350     {
351       // strncpy is dangerous as it can leave a string
352       // unterminated if the destination isn't big enough to
353       // hold the '\0' character.  Must terminate the string
354       // manually in all cases, as we do here.
355       strncpy(out,n + 1,len - 1);
356       out[len - 1] = '\0';
357     }
358 
359     s->next = save;
360     return 1;
361   }
362 
363   D(printf("<<< '%c' \"%.*s\"\n",type,len - 1,n + 1));
364   --s->range_count;
365   s->next = save;
366   return 0;
367 }
368 
369 
370 
371 
372 
input_word(struct state * s,const char * pos)373 static const char *input_word(struct state *s,const char *pos)
374 {
375   assert(pos >= s->buffer && pos <= s->buffer_end);
376   if (pos != s->buffer_end)
377   {
378     while (pos < s->buffer_end && *pos != ' ')
379     {
380       ++pos;
381     }
382     while (pos < s->buffer_end && *pos == ' ')
383     {
384       ++pos;
385     }
386     return pos;
387   }
388 
389   while (s->input_begin != s->input_end
390          && (s->input_depth > 0 || !isalnum((int)*s->input_begin)))
391   {
392     const char ch = *s->input_begin++;
393     if ('(' == ch)
394     {
395       ++s->input_depth;
396     }
397     if (')' == ch && s->input_depth > 0)
398     {
399       --s->input_depth;
400     }
401   }
402 
403   while (s->input_begin != s->input_end
404          &&  s->buffer_end != &s->buffer[sizeof s->buffer]
405          &&  isalnum((int)*s->input_begin))
406   {
407     if (s->buffer == s->buffer_end
408         || !isdigit((int)s->buffer_end[-1]) || !isalpha((int)*s->input_begin))
409     {
410       *s->buffer_end++ = *s->input_begin;
411     }
412     ++s->input_begin;
413   }
414 
415   if (pos != s->buffer_end
416       &&  s->buffer_end != &s->buffer[sizeof s->buffer])
417   {
418     *s->buffer_end++ = ' ';
419   }
420   return s->buffer_end;
421 }
422 
423 
424 
425 
426 
next_word(struct state * s,const char * pos)427 static const char *next_word(struct state *s,const char *pos)
428 {
429   const char * const next = input_word(s,pos);
430   const char * const save = s->next;
431   assert(pos >= s->buffer && pos <= s->buffer_next);
432   if (pos != s->buffer_next || next == pos)
433   {
434     return next;
435   }
436 
437   s->next = pos;
438   s->buffer_next = s->buffer + (next - s->buffer);
439   if (get_name_at(s,'A',NULL,s->buffer_next))
440   {
441     s->buffer_next = s->buffer + (pos - s->buffer);
442     s->next = save;
443     return next_word(s,pos);
444     /* NOTE! make sure aliases do not form a cycle */
445   }
446 
447   s->next = save;
448   return next;
449 }
450 
451 
452 
453 
454 
get_name(struct state * s,char type,int (* f)(struct state *))455 static int get_name(struct state *s,char type,int (*f)(struct state *))
456 {
457   const char * const next = next_word(s,s->next);
458   if (s->next == next)
459   {
460     return 0;
461   }
462   return get_name_at(s,type,f,next);
463 }
464 
465 
466 
467 
468 
get_zip(struct state * s)469 static int get_zip(struct state *s)
470 {
471   const char * const next = next_word(s,s->next);
472   int zip,zip_offset;
473 
474   if (s->next == next)
475   {
476     return 0;
477   }
478   zip = io_strntoi(s->next,next - s->next);
479   if (zip <= 0 || zip >= 100000
480       ||  io_in_i4(s->index,0,&zip_offset) < 0)
481   {
482     return 0;
483   }
484 
485   s->range[s->range_count].begin = zip_offset + 4 * zip;
486   s->range[s->range_count].end = zip_offset + 4 * zip + 8;
487   ++s->range_count;
488   D(printf(">>> %d\n",zip));
489   if (is_valid(s))
490   {
491     if (NULL != s->out)
492     {
493       s->out->zip_code = zip;
494     }
495     return 1;
496   }
497   D(printf("<<< %d\n",zip));
498   --s->range_count;
499   return 0;
500 }
501 
502 
503 
504 
505 
optional_zip(struct state * s)506 static int optional_zip(struct state *s)
507 {
508   return get_zip(s) || is_valid(s);
509 }
510 
511 
512 
513 
514 
get_state(struct state * s)515 static int get_state(struct state *s)
516 {
517   return get_name(s,'S',optional_zip) || optional_zip(s);
518 }
519 
520 
521 
522 
523 
get_city(struct state * s)524 static int get_city(struct state *s)
525 {
526   return get_name(s,'C',get_state) || get_zip(s);
527 }
528 
529 
530 
531 
532 
skip_stuff(struct state * s)533 static int skip_stuff(struct state *s)
534 {
535   const char * const begin = s->next;
536   int skipped;
537   for (skipped = 0; skipped < 7; ++skipped)
538     if (get_city(s))
539     {
540       s->next = begin;
541       return 1;
542     }
543     else
544     {
545       const char * const next = next_word(s,s->next);
546       if (next == s->next)
547       {
548         break;
549       }
550       s->next = next;
551     }
552 
553   s->next = begin;
554   return 0;
555 }
556 
557 
558 
559 
560 
get_street(struct state * s)561 static int get_street(struct state *s)
562 {
563   return get_name(s,(s->address % 2) ? 'O' : 'E',skip_stuff);
564 }
565 
566 
567 
568 
569 
get_address(struct state * s)570 static int get_address(struct state *s)
571 {
572   const char * const begin = s->next;
573   const char * const next = next_word(s,begin);
574 
575 
576 //fprintf(stderr,"get_address\n");
577 
578   if (begin == next)
579   {
580     return 0;
581   }
582 
583   s->address = io_strntoi(begin,next - s->next);
584   if (0 == s->address)
585   {
586     return 0;
587   }
588 
589   s->next = next;
590   if (get_street(s))
591   {
592     s->next = begin;
593     return 1;
594   }
595 
596   s->next = next_word(s,s->next);
597   if (get_street(s))
598   {
599     s->next = begin;
600     return 1;
601   }
602 
603   s->next = next_word(s,s->next);
604   if (get_street(s))
605   {
606     s->next = begin;
607     return 1;
608   }
609 
610   s->next = begin;
611 
612   return 0;
613 }
614 
615 
616 
617 
618 
geo_find(struct io_file * index,const char * str,int len,struct geo_location * out)619 int geo_find( struct io_file *index,
620               const char *str,
621               int len,
622               struct geo_location *out)
623 {
624 
625   struct state s;
626 
627 
628   if (NULL == index)
629   {
630     return 0;
631   }
632 
633   s.index = index;
634   s.input_depth = 0;
635   s.input_begin = str;
636   s.input_end = str + len;
637   s.next = s.buffer_end = s.buffer_next = s.buffer;
638   s.address = s.range_count = 0;
639 
640   if (NULL != (s.out = out))
641   {
642     out->zip_code = 0;
643     out->street_name[0] = '\0';
644     out->city_name[0] = '\0';
645     out->state_name[0] = '\0';
646   }
647   return get_address(&s);
648 }
649 
650 
651