1-- pagc_normalize_address(addressString)
2-- This takes an address string and parses it into address (internal/street)
3-- street name, type, direction prefix and suffix, location, state and
4-- zip code, depending on what can be found in the string.
5-- This is a drop in replacement for packaged normalize_address
6-- that uses the pagc address standardizer C library instead
7-- USAGE: SELECT * FROM tiger.pagc_normalize_address('One Devonshire Place, PH 301, Boston, MA 02109');
8SELECT tiger.SetSearchPathForInstall('tiger');
9CREATE OR REPLACE FUNCTION pagc_normalize_address(in_rawinput character varying)
10  RETURNS norm_addy AS
11$$
12DECLARE
13  result norm_addy;
14  var_rec RECORD;
15  var_parse_rec RECORD;
16  rawInput VARCHAR;
17
18BEGIN
19  result.parsed := FALSE;
20
21  rawInput := trim(in_rawinput);
22  var_parse_rec := parse_address(rawInput);
23  result.location := var_parse_rec.city;
24  result.stateAbbrev := trim(var_parse_rec.state);
25  result.zip := var_parse_rec.zip;
26  result.zip4 := NULLIF(var_parse_rec.zipplus,'');
27
28 var_rec := standardize_address('pagc_lex'
29       , 'pagc_gaz'
30       , 'pagc_rules'
31, COALESCE(var_parse_rec.address1,''),
32   COALESCE(var_parse_rec.city,'') || COALESCE(', ' || var_parse_rec.state, '') || COALESCE(' ' || var_parse_rec.zip,'')  ) ;
33
34 -- For address number only put numbers and stop if reach a non-number e.g. 123-456 will return 123
35  result.address := to_number(substring(var_rec.house_num, '[0-9]+'), '99999999');
36  result.address_alphanumeric := var_rec.house_num;
37   --get rid of extraneous spaces before we return
38  result.zip := COALESCE(var_rec.postcode,result.zip);
39  result.streetName := trim(var_rec.name);
40  result.location := trim(var_rec.city);
41  result.stateAbbrev := trim(var_rec.state);
42  --this should be broken out separately like pagc, but normalizer doesn't have a slot for it
43  result.streettypeAbbrev := trim(COALESCE(var_rec.suftype, var_rec.pretype));
44  result.preDirAbbrev := trim(var_rec.predir);
45  result.postDirAbbrev := trim(var_rec.sufdir);
46  result.internal := trim(regexp_replace(replace(var_rec.unit, '#',''), '([0-9]+)\s+([A-Za-z]){0,1}', E'\\1\\2'));
47  result.parsed := TRUE;
48  RETURN result;
49END
50$$
51  LANGUAGE plpgsql IMMUTABLE STRICT
52  COST 100;