1 /* -- export.c
2 
3 This file contains the routines for extracting the sequence of
4 postal attributes and definitions produced by the standardizer
5 into strings of text (in __standard_fields__).
6 
7 Prototype 7H08 (This file was written by Walter Sinclair).
8 
9 Copyright (c) 2009 Walter Bruce Sinclair
10 
11 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 
18 */
19 
20 /* For pagc-0.4.0 : last revised 2009-10-03 */
21 
22 #include <stdio.h>
23 #include <string.h>
24 #include <stddef.h>
25 #include "pagc_api.h"
26 #include "pagc_tools.h"
27 
28 #define ORDER_DISPLACEMENT 2
29 
30 /* -- local prototypes -- */
31 static void _copy_standard_( STAND_PARAM * , SYMB , int , int  ) ;
32 static void _scan_target_( STAND_PARAM * , SYMB , int  ) ;
33 static char *_get_standard_( STAND_PARAM * , int , int ) ;
34 static char *_get_definition_text_( STAND_PARAM * , int ) ;
35 
36 //#ifndef BUILD_API
37 
38 /* -- local storage -- */
39 static const char *__field_start_tag__[][3] = {
40    { "    <Build>",  "\"", "Building:         " },
41    { "    <Civic>",  "\"", "House Address:    " },
42    { "    <PreDir>", "\"", "Prefix Direction: " },
43    { "    <Qualif>", "\"", "Qualifier:        " },
44    { "    <PreTyp>", "\"", "Prefix Type:      " },
45    { "    <Street>", "\"", "Street Name:      " },
46    { "    <SufTyp>", "\"", "Suffix Type:      " },
47    { "    <SufDir>", "\"", "Suffix Direction: " },
48    { "    <Rural>",  "\"", "Rural Route:      " },
49    { "    <Extra>",  "\"", "Additional Info:  " },
50    { "    <City>",   "\"", "Municipal:        " },
51    { "    <Prov>",   "\"", "Province/State:   " },
52    { "    <Nation>", "\"", "Country:          " },
53    { "    <Postal>", "\"", "Postal/Zip Code:  " },
54    { "    <Box>",    "\"", "Box:              " },
55    { "    <Unit>",   "\"", "Unit:             " }
56 } ;
57 static const char *__land_field_start_tag__[][3] = {
58    { "<FeatureName>",  "\"", "FeatureName       " },
59    { "<FeatureType>",  "\"", "FeatureType       " },
60    { "<FeatureArea>", "\"", "FeatureArea       " }
61 } ;
62 static const char *__land_field_tag_end__[][3] = {
63    { "</FeatureName>\n",  "\",", "\n" },
64    { "</FeatureType>\n",  "\",", "\n" },
65    { "</FeatureArea>\n", "\",", "\n" }
66 } ;
67 static const char *__field_tag_end__[][3] = {
68    { "</Build>\n",  "\",", "\n" },
69    { "</Civic>\n",  "\",", "\n" },
70    { "</PreDir>\n", "\",", "\n" },
71    { "</Qualif>\n", "\",", "\n" },
72    { "</PreTyp>\n", "\",", "\n" },
73    { "</Street>\n", "\",", "\n" },
74    { "</SufTyp>\n", "\",", "\n" },
75    { "</SufDir>\n", "\",", "\n" },
76    { "</Rural>\n",  "\",", "\n" },
77    { "</Extra>\n",  "\",", "\n" },
78    { "</City>\n",   "\",", "\n" },
79    { "</Prov>\n",   "\",", "\n" },
80    { "</Nation>\n", "\",", "\n" },
81    { "</Postal>\n", "\",", "\n" },
82    { "</Box>\n",    "\",", "\n" },
83    { "</Unit>\n",   "\",", "\n" }
84 } ;
85 static const char *__record_start_tag__[ ] = {
86    "   <address>\n" , "\n", "\n"
87 } ;
88 static const char *__landmark_record_start_tag__[ ] = {
89    "   <landmark>\n" , "\n", "\n"
90 } ;
91 static const char *__record_end_tag__[ ] = {
92   "   </address>\n", "\n", "\n"
93 } ;
94 static const char *__landmark_record_end_tag__[ ] = {
95    "   </landmark>\n" , "\n", "\n"
96 } ;
97 
98 //#endif
99 
100 static SYMB __ord_list__[] = { ORD, FAIL } ;
101 
102 /*----------------------------------------------------------------
103 export.c (init_output_fields)
104 ----------------------------------------------------------------*/
init_output_fields(STAND_PARAM * __stand_param__,int which_fields)105 void init_output_fields( STAND_PARAM *__stand_param__ , int which_fields )
106 {
107 	/* -- called with BOTH to erase both the micro and macro fields
108 		called with RIGHT to erase only the macro fields, and
109 		LEFT to erase only the micro fields -- */
110 	int i  ;
111 	char **__standard_fields__ = __stand_param__->standard_fields ;
112 	/*-- Decide which set of fields to initialize --*/
113 	if ( which_fields == BOTH )
114 	{
115 		for ( i = 0 ; i < MAXOUTSYM ; i++ )
116 		{
117 			__standard_fields__[i][0] = SENTINEL ;
118 		}
119 	}
120 	else
121 	{
122 		/*-- Clean only one set --*/
123 		if ( which_fields == RIGHT )
124 		{
125 			/*-- Erase the macro fields only --*/
126 			for ( i = CITY ; i < NEEDHEAD ; i++ )
127 			{
128 				__standard_fields__[i][0] = SENTINEL ;
129 			}
130 		}
131 		else
132 		{
133 			/*-- Erase the micro fields only --*/
134 			for ( i = BLDNG ; i < CITY ; i++ )
135 			{
136 				__standard_fields__[i][0] = SENTINEL ;
137 			}
138 			for ( i = NEEDHEAD ; i < MAXOUTSYM ; i++ )
139 			{
140 				__standard_fields__[i][0] = SENTINEL ;
141 			}
142 		}
143 	}
144 }
145 
146 /*-----------------------------------------
147 export.c (sym_to_field)
148 -------------------------------------------*/
sym_to_field(SYMB sym)149 int sym_to_field( SYMB sym )
150 {
151 	int fld = NEEDHEAD ;
152 	if ( sym == BOXH || sym == BOXT ) return fld ;
153 	fld++ ;
154 	if ( sym == UNITH || sym == UNITT ) return fld ;
155 	if ( sym >= BLDNG && sym < MAXOUTSYM ) return sym ;
156 	return FAIL ;
157 }
158 
159 /*--------------------------------------------------
160 export.c (_get_definition_text_)
161 -- called by export.c (_get_standard_)
162 ---------------------------------------------------*/
_get_definition_text_(STAND_PARAM * __stand_param__,int lex_pos)163 static char *_get_definition_text_( STAND_PARAM *__stand_param__ , int lex_pos )
164 {
165 	DEF *__best_DEF__ = __stand_param__->best_defs[lex_pos] ;
166 	if (!( __best_DEF__->Protect ))
167 	{
168 		return ( __best_DEF__->Standard ) ;
169 	}
170 	return ( __stand_param__->lex_vector[lex_pos].Text ) ;
171 }
172 
173 /*-----------------------------------------
174 export.c (stuff_fields)
175 --calls export.c (_scan_target_)
176 -------------------------------------------*/
stuff_fields(STAND_PARAM * __stand_param__)177 void stuff_fields( STAND_PARAM *__stand_param__ )
178 {
179 	int fld ;
180 	/*-- Translate the symbols and definitions of the standardization into
181 		the __standard_fields__ for output --*/
182 	for (fld = 0 ;fld < NEEDHEAD ;fld++)
183 	{
184 		/*-- Fields that correspond one to one with the symbols --*/
185 		_scan_target_(__stand_param__ ,fld,fld) ;
186 	}
187 	/*-- These two fields have two tokens for each field --*/
188 	_scan_target_( __stand_param__ , BOXH, NEEDHEAD ) ;
189 	_scan_target_( __stand_param__ , BOXT, NEEDHEAD ) ;
190 	_scan_target_( __stand_param__ , UNITH, NEEDHEAD+1 ) ;
191 	_scan_target_( __stand_param__ , UNITT, NEEDHEAD+1 ) ;
192 }
193 
194 //#ifndef BUILD_API
195 
196 /*---------------------------------------------------------------------
197 export.c (send_fields_to_stream)
198 uses BLANK_STRING
199 2009-09-27 modify to display landmark fields
200 ----------------------------------------------------------------------*/
201 #define STREAM_BUF_SIZE MAXSTRLEN
send_fields_to_stream(char ** __standard_fields__,FILE * __dest_file__,int opt,int is_landmark)202 void send_fields_to_stream( char **__standard_fields__ , FILE *__dest_file__ , int opt , int is_landmark)
203 {
204 	int output_order ;
205 	if (opt < NO_FORMAT)
206 	{
207 		if (__dest_file__ != NULL)
208 		{
209 			fprintf(__dest_file__,"%s\n",(is_landmark? __landmark_record_start_tag__[opt] : __record_start_tag__[opt])) ;
210 		}
211 		else
212 		{
213 			printf("%s\n",(is_landmark? __landmark_record_start_tag__[opt] : __record_start_tag__[opt])) ;
214 		}
215 	}
216 	/*-- We want to rearrange so that unit and box come first --*/
217 	for (output_order = 0; output_order < (NEEDHEAD + ORDER_DISPLACEMENT); output_order++)
218 	{
219 		char __line_buf__[STREAM_BUF_SIZE] ;
220 		int loc = ((output_order < ORDER_DISPLACEMENT)? (NEEDHEAD + output_order) : (output_order - ORDER_DISPLACEMENT)) ;
221 		char *__field_string__ = __standard_fields__[loc] ;
222 		BLANK_STRING(__line_buf__) ;
223 		if (*__field_string__ != SENTINEL)
224 		{
225 			if (opt < NO_FORMAT)
226 			{
227 				char * __source_start_tag__ ;
228 				if (is_landmark)
229 				{
230 					switch (loc)
231 					{
232 			        case FEATNAME :
233 						__source_start_tag__ = ( char *) __land_field_start_tag__[0][opt] ;
234      				    break ;
235                     case FEATTYPE :
236                        __source_start_tag__ = ( char *) __land_field_start_tag__[1][opt] ;
237                         break ;
238                     case FEATAREA :
239 						__source_start_tag__ = ( char *) __land_field_start_tag__[2][opt] ;
240 						break ;
241 					default :
242 						__source_start_tag__ = ( char * ) __field_start_tag__[loc][opt] ;
243 					}
244 				}
245 				else
246 				{
247 					__source_start_tag__ = (char *) __field_start_tag__[loc][opt] ;
248 				}
249 				append_string_to_max(__line_buf__, __source_start_tag__ , STREAM_BUF_SIZE) ;
250 			}
251 			append_string_to_max( __line_buf__,  __field_string__ , STREAM_BUF_SIZE ) ;
252 			if (opt < NO_FORMAT)
253 			{
254 				char * __source_end_tag__ ;
255 				if (is_landmark)
256 				{
257  					switch (loc)
258 					{
259 					case FEATNAME :
260 						__source_end_tag__ = ( char *) __land_field_tag_end__[ 0 ][ opt ] ;
261 						break ;
262 					case FEATTYPE :
263 						__source_end_tag__ = ( char *) __land_field_tag_end__[ 1 ][ opt ] ;
264 						break ;
265 					case FEATAREA :
266 						__source_end_tag__ = ( char *) __land_field_tag_end__[ 2 ][ opt ] ;
267 						break ;
268 					default :
269 						__source_end_tag__ = ( char * ) __field_tag_end__[ loc ][ opt ] ;
270 					}
271 				}
272 				else
273 				{
274 					__source_end_tag__ = ( char * ) __field_tag_end__[ loc ][ opt ] ;
275 				}
276 				append_string_to_max( __line_buf__ , __source_end_tag__ , STREAM_BUF_SIZE ) ;
277 			}
278 			if ( __dest_file__ != NULL )
279 			{
280 				fprintf( __dest_file__ , "%s" , __line_buf__ ) ;
281 			}
282 			else
283 			{
284 				printf( "%s" , __line_buf__ ) ;
285 			}
286 		}
287 	}
288 	if ( opt < NO_FORMAT )
289 	{
290 		if ( __dest_file__ != NULL )
291 		{
292 			fprintf( __dest_file__ , "%s\n", ( is_landmark? __landmark_record_end_tag__[ opt ] : __record_end_tag__[ opt ]));
293 		}
294 		else
295 		{
296 			printf( "%s\n" , ( is_landmark? __landmark_record_end_tag__[ opt ] :  __record_end_tag__[ opt ] ) );
297 		}
298 	}
299 	if ( __dest_file__ != NULL )
300 	{
301 		fflush( __dest_file__ ) ;
302 	}
303 	else
304 	{
305 		fflush( stdout ) ;
306 	}
307 }
308 
309 //#endif
310 
311 /*-----------------------------------------
312 export.c (_get_standard_)
313 -- called by export.c (_copy_standard_)
314 -- calls _get_definition_text_ , find_def_type
315 uses MACRO BLANK_STRING
316 -------------------------------------------*/
_get_standard_(STAND_PARAM * __stand_param__,int lex_pos,int output_sym)317 static char *_get_standard_(STAND_PARAM *__stand_param__ ,int lex_pos, int output_sym)
318 {
319 	char *__selected_standardization__ ;
320 	DEF *__best_DEF__ = __stand_param__->best_defs[lex_pos] ;
321 	if ((output_sym == STREET) && (find_def_type(__best_DEF__,__ord_list__)) && (__best_DEF__->Type == WORD))
322 	{
323 		/*-- <remarks> If the best definition is a streetname typed as a word, but also
324 			including an ordinal type, then substitute the ordinal
325 			standardization - however, the lexicon should take care of most
326 			cases of this. </remarks> --*/
327 
328 		DEF *__scan_DEF__ ;
329 
330 		for (__scan_DEF__ = __stand_param__->lex_vector[lex_pos].DefList;__scan_DEF__ != NULL;__scan_DEF__ = __scan_DEF__->Next)
331 		{
332 			if (__scan_DEF__->Type == ORD)
333 			{
334 				if ((__selected_standardization__ = __scan_DEF__->Standard) != NULL)
335 				{
336 					return (__selected_standardization__) ;
337 				}
338 				break ;
339 			}
340 		}
341 	}
342 
343 	/*-- If it is in the lexicon, use the standardization there, otherwise
344 		use the form that emerged from tokenization --*/
345 
346 	__selected_standardization__ = _get_definition_text_(__stand_param__,lex_pos) ;
347 	if ((output_sym == HOUSE) && (*__selected_standardization__ == '0'))
348 	{
349 		/*-- Remove leading zeroes to simplify match comparisons
350 			on the house number that use strings rather than integers -
351 			we won't do this on zip codes. There may arise some need to
352 			do it for unit and box numbers in the future. --*/
353 		char *__zero_pointer__ ;
354 		char *__buffer_pointer__ = __zero_pointer__ = __selected_standardization__ ;
355 		while ( *__zero_pointer__ == '0' ) __zero_pointer__++ ; /*-- Move to first nonzero character --*/
356 		while ( *__zero_pointer__ != SENTINEL ) *__buffer_pointer__++ = *__zero_pointer__++ ; /*-- Move down in buffer --*/
357 		/*-- Trim down all-zeroes to a single zero: if deleting all
358 			the zeros leaves an empty buffer, put a zero back --*/
359 		if ( __buffer_pointer__ == __selected_standardization__ ) *__buffer_pointer__++ = '0' ;
360 		BLANK_STRING( __buffer_pointer__ ) ;
361 	}
362 	return ( __selected_standardization__ ) ;
363 }
364 
365 /*-----------------------------------------
366 export.c (_scan_target_ )
367 -- calls export.c (_copy_standard_)
368 -- called by export.c (stuff_fields)
369 -------------------------------------------*/
_scan_target_(STAND_PARAM * __stand_param__,SYMB sym,int dest)370 static void _scan_target_(STAND_PARAM *__stand_param__,SYMB sym , int dest)
371 {
372 	int i ;
373 
374 	int n = __stand_param__->LexNum ;
375 	SYMB *__output_syms__ = __stand_param__->best_output ;
376 	/*-- <remarks> Probe the array of output symbols in the best output and find
377       the position of a matching symbol and send it to be copied to
378       the output string fields. The order of the words in each field
379       will therefore follow the order that they appear in the input </remarks> --*/
380 	for (i = FIRST_LEX_POS;i < n;i++)
381 	{
382 		if (__output_syms__[i] == sym)
383 		{
384 			_copy_standard_(__stand_param__,sym,dest,i) ;
385 		}
386 	}
387 }
388 
389 /*-----------------------------------------
390 export.c (_copy_standard_)
391 -- called by export.c (_scan_target_) --
392 --calls export.c (_get_standard_,
393 strlen, strcpy
394 uses macro SPACE_APPEND_WITH_LEN
395 -------------------------------------------*/
_copy_standard_(STAND_PARAM * __stand_param__,SYMB output_sym,int fld,int lex_pos)396 static void _copy_standard_( STAND_PARAM *__stand_param__ , SYMB output_sym , int fld , int lex_pos )
397 {
398 
399 	/*-- Retrieve the standardized string --*/
400 	char *__stan_str__ = _get_standard_( __stand_param__ , lex_pos , output_sym ) ;
401 	char *__dest_buf__ = __stand_param__->standard_fields[fld] ;
402 	if (( strlen( __stan_str__ ) + strlen( __dest_buf__ )) > MAXFLDLEN )
403 	{
404 		/*-- Truncate without warning --*/
405 		return ;
406 	}
407 	if ( *__dest_buf__ != SENTINEL )
408 	{
409 		SPACE_APPEND_WITH_LEN( __dest_buf__ , __stan_str__ , MAXFLDLEN ) ;
410 	}
411 	else if ( output_sym == UNITT )
412 	{
413 		/*-- If the unit id type is missing, one needs to be provided.
414          This might result in a mismatch, when the type is implicit
415          in one of the compared addresses, and explicit in the
416          other. Not much you can do with implicit. Better a generic
417          identifier than nothing at all --*/
418 
419 		strcpy( __dest_buf__ , "# " ) ; /* -- reconsider this -- */
420 		append_string_to_max( __dest_buf__ , __stan_str__ , MAXFLDLEN ) ;
421 	}
422 	else if ( output_sym == BOXT )
423 	{
424 		strcpy( __dest_buf__, "BOX " ) ;
425 		append_string_to_max( __dest_buf__ , __stan_str__ ,MAXFLDLEN ) ;
426 	}
427 	else
428 	{
429 		strcpy( __dest_buf__ , __stan_str__ ) ;
430 	}
431 }
432 
433