1-- pagc_normalize_address(addressString) 2-- This takes an address string and parses it into address (internal/street) 3-- street name, type, direction prefix and suffix, location, state and 4-- zip code, depending on what can be found in the string. 5-- This is a drop in replacement for packaged normalize_address 6-- that uses the pagc address standardizer C library instead 7-- USAGE: SELECT * FROM tiger.pagc_normalize_address('One Devonshire Place, PH 301, Boston, MA 02109'); 8SELECT tiger.SetSearchPathForInstall('tiger'); 9CREATE OR REPLACE FUNCTION pagc_normalize_address(in_rawinput character varying) 10 RETURNS norm_addy AS 11$$ 12DECLARE 13 result norm_addy; 14 var_rec RECORD; 15 var_parse_rec RECORD; 16 rawInput VARCHAR; 17 18BEGIN 19 result.parsed := FALSE; 20 21 rawInput := trim(in_rawinput); 22 var_parse_rec := parse_address(rawInput); 23 result.location := var_parse_rec.city; 24 result.stateAbbrev := trim(var_parse_rec.state); 25 result.zip := var_parse_rec.zip; 26 result.zip4 := NULLIF(var_parse_rec.zipplus,''); 27 28 var_rec := standardize_address('pagc_lex' 29 , 'pagc_gaz' 30 , 'pagc_rules' 31, COALESCE(var_parse_rec.address1,''), 32 COALESCE(var_parse_rec.city,'') || COALESCE(', ' || var_parse_rec.state, '') || COALESCE(' ' || var_parse_rec.zip,'') ) ; 33 34 -- For address number only put numbers and stop if reach a non-number e.g. 123-456 will return 123 35 result.address := to_number(substring(var_rec.house_num, '[0-9]+'), '99999999'); 36 result.address_alphanumeric := var_rec.house_num; 37 --get rid of extraneous spaces before we return 38 result.zip := COALESCE(var_rec.postcode,result.zip); 39 result.streetName := trim(var_rec.name); 40 result.location := trim(var_rec.city); 41 result.stateAbbrev := trim(var_rec.state); 42 --this should be broken out separately like pagc, but normalizer doesn't have a slot for it 43 result.streettypeAbbrev := trim(COALESCE(var_rec.suftype, var_rec.pretype)); 44 result.preDirAbbrev := trim(var_rec.predir); 45 result.postDirAbbrev := trim(var_rec.sufdir); 46 result.internal := trim(regexp_replace(replace(var_rec.unit, '#',''), '([0-9]+)\s+([A-Za-z]){0,1}', E'\\1\\2')); 47 result.parsed := TRUE; 48 RETURN result; 49END 50$$ 51 LANGUAGE plpgsql IMMUTABLE STRICT 52 COST 100;