1#!/usr/bin/env bash 2 3# script created 14-MAR-04 by William Baguhn, kc9asi 4# This script is in the public domain. 5# Comments or suggestions to kc9asi@arrl.net 6 7# This script uses the "fromdos" program. You may need to change 8# that line to use "dos2unix" instead, depending on what utilities 9# are available on your system. 10 11# This will take a GNIS datapoint file (typically for a whole state, 8+Mb), 12# break it down into smaller chunks (typically for a county, 30-200k) 13# it will also throw away the stupid trailing spaces and <CR>'s at EOL. 14 15# My short experiment: the state of wisconsin. 16# Started with a 12.5Mb file. 17# ended with 93 files, totaling 6.7Mb. 18# and, the data files run a whole lot faster, especially when zoomed in. 19 20test -e $1 || (echo Try calling $0 with a file as an argument.; exit) 21 22# field 4 isn't just counties, but it's an acceptable label 23# as it's usually counties 24cut -f4 -d, <$1 >$1.counties 25 26# remove duplicates (sort, uniq) 27# the cut here gets rid of any "quirks" because of commas that came earlier 28# than were expected, as cut doesn't recognize quoting depths 29sort <$1.counties | uniq | cut -f2 -d\" >$1.counties.uniq 30 31# now we want to replace spaces with periods, so that counties with 32# spaces in their names work appropriately both for grep and filenaming 33tr " " . <$1.counties.uniq >$1.counties 34rm $1.counties.uniq 35 36# OK, now we should have a file with a list of the various divisions 37# SO, split each one apart 38 39# the regexp for grep assures that we just get "county" and not county, 40# hopefully this will make more sensible breaks as county names are 41# sometimes found in other names as well. 42# (i.e. Grant county, and Grant Community Park) 43# the \"county\" should get the former, and ignore the latter. 44 45# the fromdos/sed call will drop any dead whitespace at the end of a line 46 47# the test/rm call will delete files if they are zero length. 48 49 50for foo in `cat $1.counties` ; do 51 rm -f $1.$foo 52 echo Extracting $foo 53 grep ,\"$foo\", $1 | fromdos | sed -e 's/[ ]*$//g' >>$1.$foo.gnis 54 test -s $1.$foo.gnis || rm $1.$foo.gnis 55done 56 57 58# clean up after ourselves 59rm $1.counties 60