1 /* -- export.c
2
3 This file contains the routines for extracting the sequence of
4 postal attributes and definitions produced by the standardizer
5 into strings of text (in __standard_fields__).
6
7 Prototype 7H08 (This file was written by Walter Sinclair).
8
9 Copyright (c) 2009 Walter Bruce Sinclair
10
11 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
18 */
19
20 /* For pagc-0.4.0 : last revised 2009-10-03 */
21
22 #include <stdio.h>
23 #include <string.h>
24 #include <stddef.h>
25 #include "pagc_api.h"
26 #include "pagc_tools.h"
27
28 #define ORDER_DISPLACEMENT 2
29
30 /* -- local prototypes -- */
31 static void _copy_standard_( STAND_PARAM * , SYMB , int , int ) ;
32 static void _scan_target_( STAND_PARAM * , SYMB , int ) ;
33 static char *_get_standard_( STAND_PARAM * , int , int ) ;
34 static char *_get_definition_text_( STAND_PARAM * , int ) ;
35
36 //#ifndef BUILD_API
37
38 /* -- local storage -- */
39 static const char *__field_start_tag__[][3] = {
40 { " <Build>", "\"", "Building: " },
41 { " <Civic>", "\"", "House Address: " },
42 { " <PreDir>", "\"", "Prefix Direction: " },
43 { " <Qualif>", "\"", "Qualifier: " },
44 { " <PreTyp>", "\"", "Prefix Type: " },
45 { " <Street>", "\"", "Street Name: " },
46 { " <SufTyp>", "\"", "Suffix Type: " },
47 { " <SufDir>", "\"", "Suffix Direction: " },
48 { " <Rural>", "\"", "Rural Route: " },
49 { " <Extra>", "\"", "Additional Info: " },
50 { " <City>", "\"", "Municipal: " },
51 { " <Prov>", "\"", "Province/State: " },
52 { " <Nation>", "\"", "Country: " },
53 { " <Postal>", "\"", "Postal/Zip Code: " },
54 { " <Box>", "\"", "Box: " },
55 { " <Unit>", "\"", "Unit: " }
56 } ;
57 static const char *__land_field_start_tag__[][3] = {
58 { "<FeatureName>", "\"", "FeatureName " },
59 { "<FeatureType>", "\"", "FeatureType " },
60 { "<FeatureArea>", "\"", "FeatureArea " }
61 } ;
62 static const char *__land_field_tag_end__[][3] = {
63 { "</FeatureName>\n", "\",", "\n" },
64 { "</FeatureType>\n", "\",", "\n" },
65 { "</FeatureArea>\n", "\",", "\n" }
66 } ;
67 static const char *__field_tag_end__[][3] = {
68 { "</Build>\n", "\",", "\n" },
69 { "</Civic>\n", "\",", "\n" },
70 { "</PreDir>\n", "\",", "\n" },
71 { "</Qualif>\n", "\",", "\n" },
72 { "</PreTyp>\n", "\",", "\n" },
73 { "</Street>\n", "\",", "\n" },
74 { "</SufTyp>\n", "\",", "\n" },
75 { "</SufDir>\n", "\",", "\n" },
76 { "</Rural>\n", "\",", "\n" },
77 { "</Extra>\n", "\",", "\n" },
78 { "</City>\n", "\",", "\n" },
79 { "</Prov>\n", "\",", "\n" },
80 { "</Nation>\n", "\",", "\n" },
81 { "</Postal>\n", "\",", "\n" },
82 { "</Box>\n", "\",", "\n" },
83 { "</Unit>\n", "\",", "\n" }
84 } ;
85 static const char *__record_start_tag__[ ] = {
86 " <address>\n" , "\n", "\n"
87 } ;
88 static const char *__landmark_record_start_tag__[ ] = {
89 " <landmark>\n" , "\n", "\n"
90 } ;
91 static const char *__record_end_tag__[ ] = {
92 " </address>\n", "\n", "\n"
93 } ;
94 static const char *__landmark_record_end_tag__[ ] = {
95 " </landmark>\n" , "\n", "\n"
96 } ;
97
98 //#endif
99
100 static SYMB __ord_list__[] = { ORD, FAIL } ;
101
102 /*----------------------------------------------------------------
103 export.c (init_output_fields)
104 ----------------------------------------------------------------*/
init_output_fields(STAND_PARAM * __stand_param__,int which_fields)105 void init_output_fields( STAND_PARAM *__stand_param__ , int which_fields )
106 {
107 /* -- called with BOTH to erase both the micro and macro fields
108 called with RIGHT to erase only the macro fields, and
109 LEFT to erase only the micro fields -- */
110 int i ;
111 char **__standard_fields__ = __stand_param__->standard_fields ;
112 /*-- Decide which set of fields to initialize --*/
113 if ( which_fields == BOTH )
114 {
115 for ( i = 0 ; i < MAXOUTSYM ; i++ )
116 {
117 __standard_fields__[i][0] = SENTINEL ;
118 }
119 }
120 else
121 {
122 /*-- Clean only one set --*/
123 if ( which_fields == RIGHT )
124 {
125 /*-- Erase the macro fields only --*/
126 for ( i = CITY ; i < NEEDHEAD ; i++ )
127 {
128 __standard_fields__[i][0] = SENTINEL ;
129 }
130 }
131 else
132 {
133 /*-- Erase the micro fields only --*/
134 for ( i = BLDNG ; i < CITY ; i++ )
135 {
136 __standard_fields__[i][0] = SENTINEL ;
137 }
138 for ( i = NEEDHEAD ; i < MAXOUTSYM ; i++ )
139 {
140 __standard_fields__[i][0] = SENTINEL ;
141 }
142 }
143 }
144 }
145
146 /*-----------------------------------------
147 export.c (sym_to_field)
148 -------------------------------------------*/
sym_to_field(SYMB sym)149 int sym_to_field( SYMB sym )
150 {
151 int fld = NEEDHEAD ;
152 if ( sym == BOXH || sym == BOXT ) return fld ;
153 fld++ ;
154 if ( sym == UNITH || sym == UNITT ) return fld ;
155 if ( sym >= BLDNG && sym < MAXOUTSYM ) return sym ;
156 return FAIL ;
157 }
158
159 /*--------------------------------------------------
160 export.c (_get_definition_text_)
161 -- called by export.c (_get_standard_)
162 ---------------------------------------------------*/
_get_definition_text_(STAND_PARAM * __stand_param__,int lex_pos)163 static char *_get_definition_text_( STAND_PARAM *__stand_param__ , int lex_pos )
164 {
165 DEF *__best_DEF__ = __stand_param__->best_defs[lex_pos] ;
166 if (!( __best_DEF__->Protect ))
167 {
168 return ( __best_DEF__->Standard ) ;
169 }
170 return ( __stand_param__->lex_vector[lex_pos].Text ) ;
171 }
172
173 /*-----------------------------------------
174 export.c (stuff_fields)
175 --calls export.c (_scan_target_)
176 -------------------------------------------*/
stuff_fields(STAND_PARAM * __stand_param__)177 void stuff_fields( STAND_PARAM *__stand_param__ )
178 {
179 int fld ;
180 /*-- Translate the symbols and definitions of the standardization into
181 the __standard_fields__ for output --*/
182 for (fld = 0 ;fld < NEEDHEAD ;fld++)
183 {
184 /*-- Fields that correspond one to one with the symbols --*/
185 _scan_target_(__stand_param__ ,fld,fld) ;
186 }
187 /*-- These two fields have two tokens for each field --*/
188 _scan_target_( __stand_param__ , BOXH, NEEDHEAD ) ;
189 _scan_target_( __stand_param__ , BOXT, NEEDHEAD ) ;
190 _scan_target_( __stand_param__ , UNITH, NEEDHEAD+1 ) ;
191 _scan_target_( __stand_param__ , UNITT, NEEDHEAD+1 ) ;
192 }
193
194 //#ifndef BUILD_API
195
196 /*---------------------------------------------------------------------
197 export.c (send_fields_to_stream)
198 uses BLANK_STRING
199 2009-09-27 modify to display landmark fields
200 ----------------------------------------------------------------------*/
201 #define STREAM_BUF_SIZE MAXSTRLEN
send_fields_to_stream(char ** __standard_fields__,FILE * __dest_file__,int opt,int is_landmark)202 void send_fields_to_stream( char **__standard_fields__ , FILE *__dest_file__ , int opt , int is_landmark)
203 {
204 int output_order ;
205 if (opt < NO_FORMAT)
206 {
207 if (__dest_file__ != NULL)
208 {
209 fprintf(__dest_file__,"%s\n",(is_landmark? __landmark_record_start_tag__[opt] : __record_start_tag__[opt])) ;
210 }
211 else
212 {
213 printf("%s\n",(is_landmark? __landmark_record_start_tag__[opt] : __record_start_tag__[opt])) ;
214 }
215 }
216 /*-- We want to rearrange so that unit and box come first --*/
217 for (output_order = 0; output_order < (NEEDHEAD + ORDER_DISPLACEMENT); output_order++)
218 {
219 char __line_buf__[STREAM_BUF_SIZE] ;
220 int loc = ((output_order < ORDER_DISPLACEMENT)? (NEEDHEAD + output_order) : (output_order - ORDER_DISPLACEMENT)) ;
221 char *__field_string__ = __standard_fields__[loc] ;
222 BLANK_STRING(__line_buf__) ;
223 if (*__field_string__ != SENTINEL)
224 {
225 if (opt < NO_FORMAT)
226 {
227 char * __source_start_tag__ ;
228 if (is_landmark)
229 {
230 switch (loc)
231 {
232 case FEATNAME :
233 __source_start_tag__ = ( char *) __land_field_start_tag__[0][opt] ;
234 break ;
235 case FEATTYPE :
236 __source_start_tag__ = ( char *) __land_field_start_tag__[1][opt] ;
237 break ;
238 case FEATAREA :
239 __source_start_tag__ = ( char *) __land_field_start_tag__[2][opt] ;
240 break ;
241 default :
242 __source_start_tag__ = ( char * ) __field_start_tag__[loc][opt] ;
243 }
244 }
245 else
246 {
247 __source_start_tag__ = (char *) __field_start_tag__[loc][opt] ;
248 }
249 append_string_to_max(__line_buf__, __source_start_tag__ , STREAM_BUF_SIZE) ;
250 }
251 append_string_to_max( __line_buf__, __field_string__ , STREAM_BUF_SIZE ) ;
252 if (opt < NO_FORMAT)
253 {
254 char * __source_end_tag__ ;
255 if (is_landmark)
256 {
257 switch (loc)
258 {
259 case FEATNAME :
260 __source_end_tag__ = ( char *) __land_field_tag_end__[ 0 ][ opt ] ;
261 break ;
262 case FEATTYPE :
263 __source_end_tag__ = ( char *) __land_field_tag_end__[ 1 ][ opt ] ;
264 break ;
265 case FEATAREA :
266 __source_end_tag__ = ( char *) __land_field_tag_end__[ 2 ][ opt ] ;
267 break ;
268 default :
269 __source_end_tag__ = ( char * ) __field_tag_end__[ loc ][ opt ] ;
270 }
271 }
272 else
273 {
274 __source_end_tag__ = ( char * ) __field_tag_end__[ loc ][ opt ] ;
275 }
276 append_string_to_max( __line_buf__ , __source_end_tag__ , STREAM_BUF_SIZE ) ;
277 }
278 if ( __dest_file__ != NULL )
279 {
280 fprintf( __dest_file__ , "%s" , __line_buf__ ) ;
281 }
282 else
283 {
284 printf( "%s" , __line_buf__ ) ;
285 }
286 }
287 }
288 if ( opt < NO_FORMAT )
289 {
290 if ( __dest_file__ != NULL )
291 {
292 fprintf( __dest_file__ , "%s\n", ( is_landmark? __landmark_record_end_tag__[ opt ] : __record_end_tag__[ opt ]));
293 }
294 else
295 {
296 printf( "%s\n" , ( is_landmark? __landmark_record_end_tag__[ opt ] : __record_end_tag__[ opt ] ) );
297 }
298 }
299 if ( __dest_file__ != NULL )
300 {
301 fflush( __dest_file__ ) ;
302 }
303 else
304 {
305 fflush( stdout ) ;
306 }
307 }
308
309 //#endif
310
311 /*-----------------------------------------
312 export.c (_get_standard_)
313 -- called by export.c (_copy_standard_)
314 -- calls _get_definition_text_ , find_def_type
315 uses MACRO BLANK_STRING
316 -------------------------------------------*/
_get_standard_(STAND_PARAM * __stand_param__,int lex_pos,int output_sym)317 static char *_get_standard_(STAND_PARAM *__stand_param__ ,int lex_pos, int output_sym)
318 {
319 char *__selected_standardization__ ;
320 DEF *__best_DEF__ = __stand_param__->best_defs[lex_pos] ;
321 if ((output_sym == STREET) && (find_def_type(__best_DEF__,__ord_list__)) && (__best_DEF__->Type == WORD))
322 {
323 /*-- <remarks> If the best definition is a streetname typed as a word, but also
324 including an ordinal type, then substitute the ordinal
325 standardization - however, the lexicon should take care of most
326 cases of this. </remarks> --*/
327
328 DEF *__scan_DEF__ ;
329
330 for (__scan_DEF__ = __stand_param__->lex_vector[lex_pos].DefList;__scan_DEF__ != NULL;__scan_DEF__ = __scan_DEF__->Next)
331 {
332 if (__scan_DEF__->Type == ORD)
333 {
334 if ((__selected_standardization__ = __scan_DEF__->Standard) != NULL)
335 {
336 return (__selected_standardization__) ;
337 }
338 break ;
339 }
340 }
341 }
342
343 /*-- If it is in the lexicon, use the standardization there, otherwise
344 use the form that emerged from tokenization --*/
345
346 __selected_standardization__ = _get_definition_text_(__stand_param__,lex_pos) ;
347 if ((output_sym == HOUSE) && (*__selected_standardization__ == '0'))
348 {
349 /*-- Remove leading zeroes to simplify match comparisons
350 on the house number that use strings rather than integers -
351 we won't do this on zip codes. There may arise some need to
352 do it for unit and box numbers in the future. --*/
353 char *__zero_pointer__ ;
354 char *__buffer_pointer__ = __zero_pointer__ = __selected_standardization__ ;
355 while ( *__zero_pointer__ == '0' ) __zero_pointer__++ ; /*-- Move to first nonzero character --*/
356 while ( *__zero_pointer__ != SENTINEL ) *__buffer_pointer__++ = *__zero_pointer__++ ; /*-- Move down in buffer --*/
357 /*-- Trim down all-zeroes to a single zero: if deleting all
358 the zeros leaves an empty buffer, put a zero back --*/
359 if ( __buffer_pointer__ == __selected_standardization__ ) *__buffer_pointer__++ = '0' ;
360 BLANK_STRING( __buffer_pointer__ ) ;
361 }
362 return ( __selected_standardization__ ) ;
363 }
364
365 /*-----------------------------------------
366 export.c (_scan_target_ )
367 -- calls export.c (_copy_standard_)
368 -- called by export.c (stuff_fields)
369 -------------------------------------------*/
_scan_target_(STAND_PARAM * __stand_param__,SYMB sym,int dest)370 static void _scan_target_(STAND_PARAM *__stand_param__,SYMB sym , int dest)
371 {
372 int i ;
373
374 int n = __stand_param__->LexNum ;
375 SYMB *__output_syms__ = __stand_param__->best_output ;
376 /*-- <remarks> Probe the array of output symbols in the best output and find
377 the position of a matching symbol and send it to be copied to
378 the output string fields. The order of the words in each field
379 will therefore follow the order that they appear in the input </remarks> --*/
380 for (i = FIRST_LEX_POS;i < n;i++)
381 {
382 if (__output_syms__[i] == sym)
383 {
384 _copy_standard_(__stand_param__,sym,dest,i) ;
385 }
386 }
387 }
388
389 /*-----------------------------------------
390 export.c (_copy_standard_)
391 -- called by export.c (_scan_target_) --
392 --calls export.c (_get_standard_,
393 strlen, strcpy
394 uses macro SPACE_APPEND_WITH_LEN
395 -------------------------------------------*/
_copy_standard_(STAND_PARAM * __stand_param__,SYMB output_sym,int fld,int lex_pos)396 static void _copy_standard_( STAND_PARAM *__stand_param__ , SYMB output_sym , int fld , int lex_pos )
397 {
398
399 /*-- Retrieve the standardized string --*/
400 char *__stan_str__ = _get_standard_( __stand_param__ , lex_pos , output_sym ) ;
401 char *__dest_buf__ = __stand_param__->standard_fields[fld] ;
402 if (( strlen( __stan_str__ ) + strlen( __dest_buf__ )) > MAXFLDLEN )
403 {
404 /*-- Truncate without warning --*/
405 return ;
406 }
407 if ( *__dest_buf__ != SENTINEL )
408 {
409 SPACE_APPEND_WITH_LEN( __dest_buf__ , __stan_str__ , MAXFLDLEN ) ;
410 }
411 else if ( output_sym == UNITT )
412 {
413 /*-- If the unit id type is missing, one needs to be provided.
414 This might result in a mismatch, when the type is implicit
415 in one of the compared addresses, and explicit in the
416 other. Not much you can do with implicit. Better a generic
417 identifier than nothing at all --*/
418
419 strcpy( __dest_buf__ , "# " ) ; /* -- reconsider this -- */
420 append_string_to_max( __dest_buf__ , __stan_str__ , MAXFLDLEN ) ;
421 }
422 else if ( output_sym == BOXT )
423 {
424 strcpy( __dest_buf__, "BOX " ) ;
425 append_string_to_max( __dest_buf__ , __stan_str__ ,MAXFLDLEN ) ;
426 }
427 else
428 {
429 strcpy( __dest_buf__ , __stan_str__ ) ;
430 }
431 }
432
433