1
2 /* Copyright 2002 Daniel Egnor. See LICENSE.geocoder file.
3 * Portions Copyright (C) 2000-2019 The Xastir Group
4 *
5 * The geo_find() function defined here uses an address map built by
6 * geo-*-to-* to parse an address and convert it to geographical
7 * coordinates.
8 */
9
10 #ifdef HAVE_CONFIG_H
11 #include "config.h"
12 #endif // HAVE_CONFIG_H
13
14 #include "geo.h"
15 #include "io.h"
16
17 #include <assert.h>
18 #include <string.h>
19 #include <ctype.h>
20 #include <stdio.h>
21
22 // Must be last include file
23 #include "leak_detection.h"
24
25
26
27 #define D(x)
28
29
30
31 struct state
32 {
33 struct io_file *index;
34
35 /* Text input */
36 int input_depth;
37 const char *input_begin,*input_end;
38 char buffer[100],*buffer_next,*buffer_end;
39 const char *next;
40
41 /* Working hypothesis */
42 int address;
43 struct
44 {
45 int begin,end;
46 } range[4];
47 int range_count;
48
49 /* Answer */
50 struct geo_location *out;
51 };
52
53
54
55
56
is_valid(struct state * s)57 static int is_valid(struct state *s)
58 {
59 int i,pos[sizeof(s->range)/sizeof(*s->range)];
60
61 if (0 == s->range_count)
62 {
63 return 0;
64 }
65
66 for (i = 0; i < s->range_count; ++i)
67 {
68 D(printf(" Metarange #%d: %d entries\n",i,(s->range[i].end - s->range[i].begin) / 8));
69 pos[i] = s->range[i].begin;
70 }
71
72 for (;;)
73 {
74 int i_begin = 1,i_end = 1;
75 int best_begin = 1,best_end = 1;
76 char out_side = 'X';
77 int out_addr = 0,out_lat = 0,out_long = 0;
78
79 D(printf("Searching ---\n"));
80 for (i = 0; i != i_begin; i = (i + 1) % s->range_count)
81 {
82 int r_begin,r_end;
83 for (;;)
84 {
85 int n;
86 if (pos[i] >= s->range[i].end || pos[i] < 0)
87 {
88 D(printf(" Range #%d @ %d: End\n",i,(pos[i] - s->range[i].begin) / 8));
89 return 0;
90 }
91 n = io_in_i4(s->index,
92 io_in_i4(s->index,pos[i],&r_begin),&r_end);
93 if (n < 0)
94 {
95 return 0;
96 }
97 D(printf(" Range #%d @ %d: [%d,%d)\n",i,(pos[i] - s->range[i].begin) / 8,r_begin,r_end));
98 if (r_end > best_begin && r_begin < r_end)
99 {
100 break;
101 }
102 pos[i] = n;
103 }
104 if (r_begin > best_begin)
105 {
106 best_begin = r_begin;
107 best_end = r_end;
108 i_begin = i_end = i;
109 D(printf(" Best -> [%d#%d,%d#%d)\n",best_begin,i_begin,best_end,i_end));
110 }
111 else if (r_end < best_end)
112 {
113 best_end = r_end;
114 i_end = i;
115 D(printf(" Best -> [%d#%d,%d#%d)\n",best_begin,i_begin,best_end,i_end));
116 }
117 }
118
119 D(printf("Address range ---\n"));
120 pos[i_end] += 8; /* hack... */
121 do
122 {
123 int next,lon,lat,addr;
124 best_begin = io_in_i4(s->index,best_begin,&next);
125 best_begin = io_in_i4(s->index,best_begin,&lon);
126 best_begin = io_in_i4(s->index,best_begin,&lat);
127 best_begin = io_in_i4(s->index,best_begin,&addr);
128
129 /* PERF: This should be a binary search... */
130 while (best_begin >= 0 && best_begin < next)
131 {
132 short da;
133 signed char dln,dlt;
134 char side;
135 best_begin = io_in_i2(s->index,best_begin,&da);
136 best_begin = io_in_i1(s->index,best_begin,&dln);
137 best_begin = io_in_i1(s->index,best_begin,&dlt);
138 best_begin = io_in(s->index,best_begin,&side,1);
139 D(printf(" %c: %d vs. %d\n",side,addr + da,s->address / 2));
140 if (addr + da <= s->address / 2)
141 {
142 out_side = side;
143 out_addr = addr + da;
144 out_lat = lat + dlt;
145 out_long = lon + dln;
146 }
147 else if ('X' == out_side)
148 {
149 best_begin = next;
150 D(printf(" Not this time.\n"));
151 break;
152 }
153 else if (NULL != s->out)
154 {
155 const int parity = s->address % 2;
156
157 s->out->before.address =
158 2 * out_addr + parity;
159 s->out->before.latitude =
160 out_lat / 100000.0;
161 s->out->before.longitude =
162 out_long / 100000.0;
163
164 if ('X' == side)
165 {
166 --da;
167 }
168 s->out->after.address =
169 2 * (addr + da) + parity;
170 s->out->after.latitude =
171 (lat + dlt) / 100000.0;
172 s->out->after.longitude =
173 (lon + dln) / 100000.0;
174
175 s->out->at.address = s->address;
176 s->out->at.latitude = (out_lat +
177 (lat + dlt - out_lat) *
178 (s->address / 2 - out_addr) /
179 (addr + da - out_addr)) /
180 100000.0;
181 s->out->at.longitude = (out_long +
182 (lon + dln - out_long) *
183 (s->address / 2 - out_addr) /
184 (addr + da - out_addr)) /
185 100000.0;
186
187 s->out->side = out_side;
188 D(printf(" Success!\n"));
189 return 1;
190 }
191 else
192 {
193 D(printf(" Success...\n"));
194 return 1;
195 }
196 }
197 }
198 while (best_begin > 0 && best_begin < best_end);
199 }
200 }
201
202
203
204
205
find_name(struct io_file * index,int begin,int end,char type,const char * name,unsigned int name_len)206 static int find_name(
207 struct io_file *index,int begin,int end,
208 char type,const char *name, unsigned int name_len)
209 {
210 const int size = 45;
211 const int count = (end - begin) / size;
212 const int mid = begin + size * (count / 2);
213 char test[41];
214 if (count <= 1)
215 {
216 return begin;
217 }
218 if (io_in(index,mid - size,test,sizeof test) < 0)
219 {
220 return -1;
221 }
222 if (name_len > sizeof test - 1)
223 {
224 name_len = sizeof test - 1;
225 }
226 if (type > test[0]
227 || (type == test[0] && strncasecmp(name,test + 1,name_len) > 0))
228 {
229 return find_name(index,mid,end,type,name,name_len);
230 }
231 else
232 {
233 return find_name(index,begin,mid,type,name,name_len);
234 }
235 }
236
237
238
239 static const char *next_word(struct state *,const char *);
240
241
242
get_name_at(struct state * s,char type,int (* f)(struct state *),const char * last)243 static int get_name_at( struct state *s,
244 char type,
245 int (*f)(struct state *),
246 const char *last)
247 {
248
249 char n[41];
250 const char *next,*save;
251 unsigned int len = last - s->next;
252 int begin,end,pos;
253
254 if (io_in_i4(s->index,
255 io_in_i4(s->index,
256 io_in_i4(s->index,0,NULL),&begin),&end) < 0
257 || (pos = find_name(s->index,begin,end,type,s->next,len)) < 0
258 || pos == end
259 || (pos = io_in_i4(s->index,io_in(s->index,pos,n,sizeof n),&begin)) < 0
260 || pos == end
261 || (pos = io_in_i4(s->index,io_in(s->index,pos,NULL,sizeof n),&end)) < 0
262 || n[0] != type
263 || strncasecmp(n + 1,s->next,len))
264 {
265 D(printf(" '%c' \"%.*s\" not found\n",type,len,s->next));
266 return 0;
267 }
268
269 if ('=' == n[len + 1]) /* alias expansion */
270 {
271 char * const replace = (s->next - s->buffer) + s->buffer;
272 int delta;
273
274 begin = len + 2;
275
276 while (begin < (int)sizeof n && ' ' == n[begin])
277 {
278 ++begin;
279 }
280
281 end = sizeof n;
282
283 while (end > begin && ' ' == n[end - 1])
284 {
285 --end;
286 }
287
288 if (end < (int)sizeof n)
289 {
290 ++end;
291 }
292
293 if (end - begin > (int)len)
294 {
295 end = begin + len;
296 }
297
298 D(printf(" Replacing '%.*s' with '%.*s'\n",len,n + 1,end - begin,&n[begin]));
299
300 memcpy(replace,n + begin,end - begin);
301 memmove(replace + (end - begin),
302 replace + len,
303 s->buffer_end - replace - len);
304
305 delta = len - (end - begin);
306 s->buffer_end -= delta;
307 s->buffer_next -= delta;
308 D(printf(" Buffer is now: '%.*s'\n",s->buffer_end - s->buffer,s->buffer));
309 return 1;
310 }
311
312 next = next_word(s,last);
313 if (next != last && get_name_at(s,type,f,next))
314 {
315 return 1;
316 }
317
318 pos = len;
319
320 while (++pos < (int)sizeof n) if (' ' != n[pos])
321 {
322 return 0;
323 }
324
325 s->range[s->range_count].begin = begin;
326 s->range[s->range_count].end = end;
327 ++s->range_count;
328 save = s->next;
329 s->next = last;
330 D(printf(">>> '%c' \"%.*s\" found\n",type,len - 1,n + 1));
331 if (NULL != f && f(s))
332 {
333 char *out = NULL;
334
335 switch (type)
336 {
337 case 'E':
338 case 'O':
339 out = s->out ? s->out->street_name : NULL;
340 break;
341 case 'C':
342 out = s->out ? s->out->city_name : NULL;
343 break;
344 case 'S':
345 out = s->out ? s->out->state_name : NULL;
346 break;
347 }
348
349 if (NULL != out)
350 {
351 // strncpy is dangerous as it can leave a string
352 // unterminated if the destination isn't big enough to
353 // hold the '\0' character. Must terminate the string
354 // manually in all cases, as we do here.
355 strncpy(out,n + 1,len - 1);
356 out[len - 1] = '\0';
357 }
358
359 s->next = save;
360 return 1;
361 }
362
363 D(printf("<<< '%c' \"%.*s\"\n",type,len - 1,n + 1));
364 --s->range_count;
365 s->next = save;
366 return 0;
367 }
368
369
370
371
372
input_word(struct state * s,const char * pos)373 static const char *input_word(struct state *s,const char *pos)
374 {
375 assert(pos >= s->buffer && pos <= s->buffer_end);
376 if (pos != s->buffer_end)
377 {
378 while (pos < s->buffer_end && *pos != ' ')
379 {
380 ++pos;
381 }
382 while (pos < s->buffer_end && *pos == ' ')
383 {
384 ++pos;
385 }
386 return pos;
387 }
388
389 while (s->input_begin != s->input_end
390 && (s->input_depth > 0 || !isalnum((int)*s->input_begin)))
391 {
392 const char ch = *s->input_begin++;
393 if ('(' == ch)
394 {
395 ++s->input_depth;
396 }
397 if (')' == ch && s->input_depth > 0)
398 {
399 --s->input_depth;
400 }
401 }
402
403 while (s->input_begin != s->input_end
404 && s->buffer_end != &s->buffer[sizeof s->buffer]
405 && isalnum((int)*s->input_begin))
406 {
407 if (s->buffer == s->buffer_end
408 || !isdigit((int)s->buffer_end[-1]) || !isalpha((int)*s->input_begin))
409 {
410 *s->buffer_end++ = *s->input_begin;
411 }
412 ++s->input_begin;
413 }
414
415 if (pos != s->buffer_end
416 && s->buffer_end != &s->buffer[sizeof s->buffer])
417 {
418 *s->buffer_end++ = ' ';
419 }
420 return s->buffer_end;
421 }
422
423
424
425
426
next_word(struct state * s,const char * pos)427 static const char *next_word(struct state *s,const char *pos)
428 {
429 const char * const next = input_word(s,pos);
430 const char * const save = s->next;
431 assert(pos >= s->buffer && pos <= s->buffer_next);
432 if (pos != s->buffer_next || next == pos)
433 {
434 return next;
435 }
436
437 s->next = pos;
438 s->buffer_next = s->buffer + (next - s->buffer);
439 if (get_name_at(s,'A',NULL,s->buffer_next))
440 {
441 s->buffer_next = s->buffer + (pos - s->buffer);
442 s->next = save;
443 return next_word(s,pos);
444 /* NOTE! make sure aliases do not form a cycle */
445 }
446
447 s->next = save;
448 return next;
449 }
450
451
452
453
454
get_name(struct state * s,char type,int (* f)(struct state *))455 static int get_name(struct state *s,char type,int (*f)(struct state *))
456 {
457 const char * const next = next_word(s,s->next);
458 if (s->next == next)
459 {
460 return 0;
461 }
462 return get_name_at(s,type,f,next);
463 }
464
465
466
467
468
get_zip(struct state * s)469 static int get_zip(struct state *s)
470 {
471 const char * const next = next_word(s,s->next);
472 int zip,zip_offset;
473
474 if (s->next == next)
475 {
476 return 0;
477 }
478 zip = io_strntoi(s->next,next - s->next);
479 if (zip <= 0 || zip >= 100000
480 || io_in_i4(s->index,0,&zip_offset) < 0)
481 {
482 return 0;
483 }
484
485 s->range[s->range_count].begin = zip_offset + 4 * zip;
486 s->range[s->range_count].end = zip_offset + 4 * zip + 8;
487 ++s->range_count;
488 D(printf(">>> %d\n",zip));
489 if (is_valid(s))
490 {
491 if (NULL != s->out)
492 {
493 s->out->zip_code = zip;
494 }
495 return 1;
496 }
497 D(printf("<<< %d\n",zip));
498 --s->range_count;
499 return 0;
500 }
501
502
503
504
505
optional_zip(struct state * s)506 static int optional_zip(struct state *s)
507 {
508 return get_zip(s) || is_valid(s);
509 }
510
511
512
513
514
get_state(struct state * s)515 static int get_state(struct state *s)
516 {
517 return get_name(s,'S',optional_zip) || optional_zip(s);
518 }
519
520
521
522
523
get_city(struct state * s)524 static int get_city(struct state *s)
525 {
526 return get_name(s,'C',get_state) || get_zip(s);
527 }
528
529
530
531
532
skip_stuff(struct state * s)533 static int skip_stuff(struct state *s)
534 {
535 const char * const begin = s->next;
536 int skipped;
537 for (skipped = 0; skipped < 7; ++skipped)
538 if (get_city(s))
539 {
540 s->next = begin;
541 return 1;
542 }
543 else
544 {
545 const char * const next = next_word(s,s->next);
546 if (next == s->next)
547 {
548 break;
549 }
550 s->next = next;
551 }
552
553 s->next = begin;
554 return 0;
555 }
556
557
558
559
560
get_street(struct state * s)561 static int get_street(struct state *s)
562 {
563 return get_name(s,(s->address % 2) ? 'O' : 'E',skip_stuff);
564 }
565
566
567
568
569
get_address(struct state * s)570 static int get_address(struct state *s)
571 {
572 const char * const begin = s->next;
573 const char * const next = next_word(s,begin);
574
575
576 //fprintf(stderr,"get_address\n");
577
578 if (begin == next)
579 {
580 return 0;
581 }
582
583 s->address = io_strntoi(begin,next - s->next);
584 if (0 == s->address)
585 {
586 return 0;
587 }
588
589 s->next = next;
590 if (get_street(s))
591 {
592 s->next = begin;
593 return 1;
594 }
595
596 s->next = next_word(s,s->next);
597 if (get_street(s))
598 {
599 s->next = begin;
600 return 1;
601 }
602
603 s->next = next_word(s,s->next);
604 if (get_street(s))
605 {
606 s->next = begin;
607 return 1;
608 }
609
610 s->next = begin;
611
612 return 0;
613 }
614
615
616
617
618
geo_find(struct io_file * index,const char * str,int len,struct geo_location * out)619 int geo_find( struct io_file *index,
620 const char *str,
621 int len,
622 struct geo_location *out)
623 {
624
625 struct state s;
626
627
628 if (NULL == index)
629 {
630 return 0;
631 }
632
633 s.index = index;
634 s.input_depth = 0;
635 s.input_begin = str;
636 s.input_end = str + len;
637 s.next = s.buffer_end = s.buffer_next = s.buffer;
638 s.address = s.range_count = 0;
639
640 if (NULL != (s.out = out))
641 {
642 out->zip_code = 0;
643 out->street_name[0] = '\0';
644 out->city_name[0] = '\0';
645 out->state_name[0] = '\0';
646 }
647 return get_address(&s);
648 }
649
650
651