1#!/usr/bin/env bash
2
3# script created 14-MAR-04 by William Baguhn, kc9asi
4# This script is in the public domain.
5# Comments or suggestions to kc9asi@arrl.net
6
7# This script uses the "fromdos" program.  You may need to change
8# that line to use "dos2unix" instead, depending on what utilities
9# are available on your system.
10
11# This will take a GNIS datapoint file (typically for a whole state, 8+Mb),
12# break it down into smaller chunks (typically for a county, 30-200k)
13# it will also throw away the stupid trailing spaces and <CR>'s at EOL.
14
15# My short experiment: the state of wisconsin.
16# Started with a 12.5Mb file.
17# ended with 93 files, totaling 6.7Mb.
18# and, the data files run a whole lot faster, especially when zoomed in.
19
20test -e $1 || (echo Try calling $0 with a file as an argument.; exit)
21
22# field 4 isn't just counties, but it's an acceptable label
23# as it's usually counties
24cut -f4 -d, <$1 >$1.counties
25
26# remove duplicates (sort, uniq)
27# the cut here gets rid of any "quirks" because of commas that came earlier
28# than were expected, as cut doesn't recognize quoting depths
29sort <$1.counties | uniq | cut -f2 -d\" >$1.counties.uniq
30
31# now we want to replace spaces with periods, so that counties with
32# spaces in their names work appropriately both for grep and filenaming
33tr " " . <$1.counties.uniq >$1.counties
34rm $1.counties.uniq
35
36# OK, now we should have a file with a list of the various divisions
37# SO, split each one apart
38
39# the regexp for grep assures that we just get "county" and not county,
40# hopefully this will make more sensible breaks as county names are
41# sometimes found in other names as well.
42# (i.e. Grant county, and Grant Community Park)
43# the \"county\" should get the former, and ignore the latter.
44
45# the fromdos/sed call will drop any dead whitespace at the end of a line
46
47# the test/rm call will delete files if they are zero length.
48
49
50for foo in `cat $1.counties` ; do
51  rm -f $1.$foo
52  echo Extracting $foo
53  grep ,\"$foo\", $1 | fromdos | sed -e 's/[ ]*$//g' >>$1.$foo.gnis
54  test -s $1.$foo.gnis || rm $1.$foo.gnis
55done
56
57
58# clean up after ourselves
59rm $1.counties
60