1import argparse
2import logging
3import os
4import sys
5import six
6
7this_dir = os.path.realpath(os.path.dirname(__file__))
8sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
9
10from geodata.address_expansions.abbreviations import abbreviate
11from geodata.coordinates.conversion import latlon_to_decimal
12from geodata.math.floats import isclose
13from geodata.osm.extract import parse_osm
14from geodata.points.index import PointIndex
15from geodata.encoding import safe_decode
16
17
18class PlaceReverseGeocoder(PointIndex):
19    GEOHASH_PRECISION = 5
20
21    include_property_patterns = set([
22        'id',
23        'type',
24        'name',
25        'name:*',
26        'ISO3166-1:alpha2',
27        'ISO3166-1:alpha3',
28        'int_name',
29        'is_in',
30        'is_in:*',
31        'official_name',
32        'official_name:*',
33        'alt_name',
34        'alt_name:*',
35        'short_name',
36        'short_name:*',
37        'admin_level',
38        'place',
39        'population',
40        'designation',
41        'description',
42        'wikipedia',
43        'wikipedia:*',
44    ])
45
46    @classmethod
47    def create_from_osm_file(cls, filename, output_dir, precision=None):
48        '''
49        Given an OSM file (planet or some other bounds) containing relations
50        and their dependencies, create an R-tree index for coarse-grained
51        reverse geocoding.
52
53        Note: the input file is expected to have been created using
54        osmfilter. Use fetch_osm_address_data.sh for planet or copy the
55        admin borders commands if using other bounds.
56        '''
57        if precision is None:
58            precision = cls.GEOHASH_PRECISION
59
60        index = cls(save_dir=output_dir, precision=precision)
61
62        i = 0
63        for element_id, props, deps in parse_osm(filename):
64            props = {safe_decode(k): safe_decode(v) for k, v in six.iteritems(props)}
65
66            node_id = long(element_id.split(':')[-1])
67            lat = props.get('lat')
68            lon = props.get('lon')
69            if lat is None or lon is None:
70                continue
71            lat, lon = latlon_to_decimal(lat, lon)
72            if lat is None or lon is None:
73                continue
74
75            if isclose(lon, 180.0):
76                lon = 179.999
77
78            props = {k: v for k, v in six.iteritems(props)
79                     if k in ('id', 'type') or k in cls.include_property_patterns or (six.u(':') in k and
80                     six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)}
81
82            props['type'] = 'node'
83            props['id'] = node_id
84
85            index.add_point(lat, lon, props)
86
87            if i % 1000 == 0 and i > 0:
88                print('did {} points'.format(i))
89            i += 1
90
91        return index
92
93if __name__ == '__main__':
94    # Handle argument parsing here
95    parser = argparse.ArgumentParser()
96
97    parser.add_argument('-i', '--osm-places-file',
98                        help='Path to OSM places file')
99
100    parser.add_argument('-p', '--precision',
101                        type=int,
102                        default=PlaceReverseGeocoder.GEOHASH_PRECISION,
103                        help='Geohash precision')
104
105    parser.add_argument('-o', '--out-dir',
106                        default=os.getcwd(),
107                        help='Output directory')
108
109    logging.basicConfig(level=logging.INFO)
110
111    args = parser.parse_args()
112    if args.osm_places_file:
113        index = PlaceReverseGeocoder.create_from_osm_file(args.osm_places_file, args.out_dir, precision=args.precision)
114    else:
115        parser.error('Must specify places file')
116
117    index.save()
118