1# -*- coding: utf-8 -*-
2'''
3openaddresses_training_data.py
4------------------------------
5
6This script generates several training sets from OpenAddresses.
7'''
8
9import argparse
10import logging
11import os
12import sys
13
14from shapely.geos import LOG as shapely_geos_logger
15shapely_geos_logger.setLevel(logging.CRITICAL)
16
17this_dir = os.path.realpath(os.path.dirname(__file__))
18sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
19
20from geodata.openaddresses.formatter import OpenAddressesFormatter
21
22from geodata.addresses.components import AddressComponents
23from geodata.polygons.language_polys import LanguagePolygonIndex
24from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder
25from geodata.places.reverse_geocode import PlaceReverseGeocoder
26from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder
27
28
29if __name__ == '__main__':
30    # Handle argument parsing here
31    parser = argparse.ArgumentParser()
32
33    parser.add_argument('sources', nargs='*')
34
35    parser.add_argument('-i', '--openaddresses-dir',
36                        help='Path to OpenAddresses directory')
37
38    parser.add_argument('-f', '--format',
39                        action='store_true',
40                        default=False,
41                        help='Save formatted addresses (slow)')
42
43    parser.add_argument('-u', '--untagged',
44                        action='store_true',
45                        default=False,
46                        help='Save untagged formatted addresses (slow)')
47
48    parser.add_argument('--country-rtree-dir',
49                        required=True,
50                        help='Country RTree directory')
51
52    parser.add_argument('--rtree-dir',
53                        default=None,
54                        help='OSM reverse geocoder RTree directory')
55
56    parser.add_argument('--places-index-dir',
57                        default=None,
58                        help='Places index directory')
59
60    parser.add_argument('--neighborhoods-rtree-dir',
61                        default=None,
62                        help='Neighborhoods reverse geocoder RTree directory')
63
64    parser.add_argument('--debug',
65                        action='store_true',
66                        default=False,
67                        help='Test on a sample of each file to debug config')
68
69    parser.add_argument('-o', '--out-dir',
70                        default=os.getcwd(),
71                        help='Output directory')
72
73    args = parser.parse_args()
74
75    country_rtree = OSMCountryReverseGeocoder.load(args.country_rtree_dir)
76
77    osm_rtree = None
78    if args.rtree_dir:
79        osm_rtree = OSMReverseGeocoder.load(args.rtree_dir)
80
81    neighborhoods_rtree = None
82    if args.neighborhoods_rtree_dir:
83        neighborhoods_rtree = NeighborhoodReverseGeocoder.load(args.neighborhoods_rtree_dir)
84
85    places_index = None
86    if args.places_index_dir:
87        places_index = PlaceReverseGeocoder.load(args.places_index_dir)
88
89    if args.openaddresses_dir and args.format:
90        components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index)
91
92        oa_formatter = OpenAddressesFormatter(components, country_rtree, debug=args.debug)
93        oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged, sources_only=args.sources or None)
94