1# -*- coding: utf-8 -*- 2''' 3openaddresses_training_data.py 4------------------------------ 5 6This script generates several training sets from OpenAddresses. 7''' 8 9import argparse 10import logging 11import os 12import sys 13 14from shapely.geos import LOG as shapely_geos_logger 15shapely_geos_logger.setLevel(logging.CRITICAL) 16 17this_dir = os.path.realpath(os.path.dirname(__file__)) 18sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir))) 19 20from geodata.openaddresses.formatter import OpenAddressesFormatter 21 22from geodata.addresses.components import AddressComponents 23from geodata.polygons.language_polys import LanguagePolygonIndex 24from geodata.neighborhoods.reverse_geocode import NeighborhoodReverseGeocoder 25from geodata.places.reverse_geocode import PlaceReverseGeocoder 26from geodata.polygons.reverse_geocode import OSMReverseGeocoder, OSMCountryReverseGeocoder 27 28 29if __name__ == '__main__': 30 # Handle argument parsing here 31 parser = argparse.ArgumentParser() 32 33 parser.add_argument('sources', nargs='*') 34 35 parser.add_argument('-i', '--openaddresses-dir', 36 help='Path to OpenAddresses directory') 37 38 parser.add_argument('-f', '--format', 39 action='store_true', 40 default=False, 41 help='Save formatted addresses (slow)') 42 43 parser.add_argument('-u', '--untagged', 44 action='store_true', 45 default=False, 46 help='Save untagged formatted addresses (slow)') 47 48 parser.add_argument('--country-rtree-dir', 49 required=True, 50 help='Country RTree directory') 51 52 parser.add_argument('--rtree-dir', 53 default=None, 54 help='OSM reverse geocoder RTree directory') 55 56 parser.add_argument('--places-index-dir', 57 default=None, 58 help='Places index directory') 59 60 parser.add_argument('--neighborhoods-rtree-dir', 61 default=None, 62 help='Neighborhoods reverse geocoder RTree directory') 63 64 parser.add_argument('--debug', 65 action='store_true', 66 default=False, 67 help='Test on a sample of each file to debug config') 68 69 parser.add_argument('-o', '--out-dir', 70 default=os.getcwd(), 71 help='Output directory') 72 73 args = parser.parse_args() 74 75 country_rtree = OSMCountryReverseGeocoder.load(args.country_rtree_dir) 76 77 osm_rtree = None 78 if args.rtree_dir: 79 osm_rtree = OSMReverseGeocoder.load(args.rtree_dir) 80 81 neighborhoods_rtree = None 82 if args.neighborhoods_rtree_dir: 83 neighborhoods_rtree = NeighborhoodReverseGeocoder.load(args.neighborhoods_rtree_dir) 84 85 places_index = None 86 if args.places_index_dir: 87 places_index = PlaceReverseGeocoder.load(args.places_index_dir) 88 89 if args.openaddresses_dir and args.format: 90 components = AddressComponents(osm_rtree, neighborhoods_rtree, places_index) 91 92 oa_formatter = OpenAddressesFormatter(components, country_rtree, debug=args.debug) 93 oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged, sources_only=args.sources or None) 94