1 /* -- standard.c
2 
3 interface for the standardizer
4 
5 Prototype 7H08 (This file was written by Walter Sinclair).
6 
7 This file is part of PAGC.
8 
9 Copyright (c) 2009 Walter Bruce Sinclair
10 
11 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 
18 */
19 
20 /*-- For pagc-0.4.2 : last revised 2012-07-18 --*/
21 
22 #undef DEBUG
23 //#define DEBUG 1
24 
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "pagc_api.h"
30 #ifdef BUILD_API
31 #include "pagc_std_api.h"
32 #endif
33 
34 #define GAZ_LEXICON
35 
36 /* -- local prototypes -- */
37 /*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
38 static int _Close_Stand_Field_(STAND_PARAM *) ;
39 static int _Scan_String_(STAND_PARAM *, char *) ;
40 static char * _Scan_Next_(STAND_PARAM *, char *) ;
41 
42 static char __spacer__[] = " \\-.)}>_" ;
43 
44 #define TERM_AND_LENGTH \
45 	*__dest__ = SENTINEL ; \
46 	n = strlen(__scan_buf__)
47 
48 #define RETURN_NEW_MORPH(TOKEN_ARG) \
49 	if (!new_morph(__stand_param__,TOKEN_ARG,__scan_buf__,n))\
50 	{\
51 		return NULL ; \
52 	} \
53 	return __src__
54 
55 #define COLLECT_LOOKAHEAD \
56 	*__dest__++ = a ; __src__++ ; *__dest__++ = b ; __src__++
57 
58 #define COLLECT_WHILE(COND) \
59 	do { *__dest__++ = a ; __src__++ ; a = *__src__ ; } while (COND)
60 
61 #define NO_COLLECT_WHILE(COND) \
62 	do { __dest__++ ; __src__++ ; a = *__src__ ; } while (COND)
63 
64 #define TEST_FOR_ORD_DIGIT(N,NEXT_LOW,NEXT_UP) \
65 	if ((b == NEXT_LOW) || (b == NEXT_UP)) \
66 	{ \
67 		if (last_digit == N)\
68 		{ \
69 			if ((n < 2 ) || (*(__dest__-2) != '1')) \
70 			{ \
71 				COLLECT_LOOKAHEAD ; \
72 				TERM_AND_LENGTH ; \
73 				RETURN_NEW_MORPH(DORD) ; \
74 			} \
75 		} \
76 	} \
77 	break
78 
79 
80 /*========================================================================
81 <summary>
82 	<function name='standard.c (standardize_field)'/>
83 		<remarks>This function is called with a pointer to the
84 			str to standardize and a start state indicating
85 			the kind of standardization to perform. It invokes
86 			the scanner to start the creation of the morphemes
87 		<calls><functionref='tokenize.c (initialize_morphs)'/></calls>
88 		<calls><functionref='_Close_Stand_Field_s'/></calls>
89 		<calls><functionref='_Scan_String_'/></calls>
90 </summary>
91 =========================================================================*/
standardize_field(STAND_PARAM * __stand_param__,char * __in_str__,int client_start_state)92 int standardize_field(STAND_PARAM *__stand_param__ ,char *__in_str__ , int client_start_state )
93 {
94 	/*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
95 	/*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on start_state= MACRO </revision> --*/
96 	__stand_param__->lexicon = __stand_param__->address_lexicon ;
97 	if (client_start_state > EXTRA_STATE)
98 	{
99 		__stand_param__->lexicon = __stand_param__->poi_lexicon ;
100 	}
101 #ifdef GAZ_LEXICON
102 	else
103 	{
104 		if (client_start_state == MACRO)
105 		{
106 		   __stand_param__->lexicon = __stand_param__->gaz_lexicon ;
107 		}
108 	}
109 #endif
110 	/*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
111 	__stand_param__->start_state = client_start_state ;
112 	initialize_morphs(__stand_param__) ;
113 	if (!_Scan_String_(__stand_param__,__in_str__))
114 	{
115 		return FALSE ;
116 	}
117 	/*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
118 	return (_Close_Stand_Field_(__stand_param__)) ;
119 }
120 
_Scan_String_(STAND_PARAM * __stand_param__,char * __in_str__)121 static int _Scan_String_(STAND_PARAM *__stand_param__ ,char *__in_str__ )
122 {
123 	char *__src__ = __in_str__ ;
124 	while (TRUE)
125 	{
126 		char a = *__src__ ;
127 		/*-- <remarks> If we're done, process the tokens: </remarks> --*/
128 		if ((a == '\n') || (a == SENTINEL))
129 		{
130 			return (process_input(__stand_param__)) ;
131 		}
132 		/*-- <remarks> Gather sequences into tokens: </remarks> --*/
133 		__src__ = _Scan_Next_(__stand_param__,__src__) ;
134 		/*-- <remarks> Check for errors: </remarks> --*/
135 		if (__src__ == NULL)
136 		{
137 			break ;
138 		}
139 	}
140 	return FALSE ;
141 }
142 
_Scan_Next_(STAND_PARAM * __stand_param__,char * __in_ptr__)143 static char * _Scan_Next_( STAND_PARAM *__stand_param__,char * __in_ptr__)
144 {
145 	int n ;
146 	char __scan_buf__[MAXSTRLEN] ;
147 
148 /*-- <remarks> Collect a sequence of characters into the scan_buf </remarks> --*/
149 
150 	char *__src__ = __in_ptr__ ;
151 	char a = *__src__ ;
152 	char *__dest__ = __scan_buf__ ;
153 	*__dest__ = SENTINEL ;
154 
155 	/*-- <remarks> Type one terminators </remarks> --*/
156 	if ((a == ',') || (a == '\t') || (a == ';'))
157 	{
158 		*__dest__++ = a ;
159 		*__dest__ = SENTINEL;
160 		set_term(__stand_param__,1,__scan_buf__) ;
161 		/*-- <remarks> Point to next input char </remarks> --*/
162 		return (__src__ + 1) ;
163 	}
164 	/*-- <remarks> Numeric sequences : ordinals, fractions and numbers </remarks> --*/
165 	if (isdigit(a))
166 	{
167         char b ;
168         char last_digit ;
169 
170 		COLLECT_WHILE(isdigit(a)) ;
171 		/*-- <remarks> Get a character of lookahead and one of lookbehind </remarks> --*/
172 		b = *(__src__ + 1 ) ;
173 		last_digit = *(__dest__ - 1 ) ; /*-- last digit collected --*/
174 		n = __dest__ - __scan_buf__ ;
175 		switch (a)
176 		{
177 			/*-- <remarks> Fractions </remarks> --*/
178 		case '/' :
179 			/*-- <remarks> Collect the rest of the fraction </remarks> --*/
180 			if (isdigit(b))
181 			{
182 				switch (b)
183 				{
184 				case '2' :
185 					if (last_digit == '1')
186 					{
187 						COLLECT_LOOKAHEAD ;
188 						TERM_AND_LENGTH ;
189 						RETURN_NEW_MORPH(DFRACT) ;
190 					}
191 					break ;
192 				case '3' :
193 					if ((last_digit == '1') || (last_digit == '2'))
194 					{
195 						COLLECT_LOOKAHEAD ;
196 						TERM_AND_LENGTH ;
197 						RETURN_NEW_MORPH(DFRACT) ;
198 					}
199 					break ;
200 				case '4' :
201 					if ((last_digit == '1') || (last_digit == '3'))
202 					{
203 						COLLECT_LOOKAHEAD ;
204 						TERM_AND_LENGTH ;
205 						RETURN_NEW_MORPH(DFRACT) ;
206 					}
207 					break ;
208 				} /*-- <remarks> end of switch on lookahead </remarks> --*/
209 			}
210 			break ;
211 			/*-- <remarks> ordinals -- */
212 		case 's' : case 'S' :
213 			/*-- <remarks> 1st, 21st, 31st, -- for 1 </remarks> --*/
214 			/*-- <remarks> Point to next input char </remarks> --*/
215 			TEST_FOR_ORD_DIGIT('1','t','T') ;
216 		case 'r' : case 'R' :
217 			/*-- <remarks> 3rd, 23rd, 33rd, -- for 3 </remarks> --*/
218 			/*-- <remarks> Point to next input char </remarks> --*/
219 			TEST_FOR_ORD_DIGIT('3','d','D') ;
220 		case 'n' : case 'N' :
221 			/*-- <remarks> 2nd, 22nd, 32nd, -- for 2 </remarks> --*/
222 			/*-- <remarks> Point to next input char </remarks> --*/
223 			TEST_FOR_ORD_DIGIT('2','d','D') ;
224 		case 't' : case 'T' :
225 			if ((b == 'h') || (b == 'H'))
226 			{
227 				switch (last_digit)
228 				{
229 				case '1' : case '2' : case '3' :
230 					/*-- <remarks> 11th, 111th, 211th etc -- for 11-13 </remarks> --*/
231 					if ((n > 1) && (*(__dest__ - 2) == '1'))
232 					{
233 						COLLECT_LOOKAHEAD ;
234 						TERM_AND_LENGTH ;
235 						/*-- <remarks> Point to next input char </remarks> --*/
236 						RETURN_NEW_MORPH(DORD) ;
237 					}
238 					break ;
239 				default :
240 					/*-- <remarks> 4th, 14th, 24th etc -- for 0, 4-9 </remarks> --*/
241 					COLLECT_LOOKAHEAD ;
242 					TERM_AND_LENGTH ;
243 					/*-- <remarks> Point to next input char </remarks> --*/
244 					RETURN_NEW_MORPH(DORD) ;
245 				}
246 			}
247 			break ;
248 		}
249 		/*-- <remarks> ordinary numeric sequence </remarks> --*/
250 		TERM_AND_LENGTH ;
251 		/*-- <remarks> Retain position </remarks> --*/
252 		RETURN_NEW_MORPH(DNUMBER) ;
253 	}
254 	/*-- <revision date='2009-08-15'> Fix ampersand : P&R --> P & R </remarks> --*/
255 	if (a == '&')
256 	{
257 		COLLECT_WHILE(a == '&') ;
258 		TERM_AND_LENGTH ;
259 		RETURN_NEW_MORPH(DSINGLE) ;
260 	}
261 	/*-- <remarks> Alphabetic sequence </remarks> --*/
262 	if ((isalpha(a)) || (a == '\'') || (a == '#'))
263 	{
264 		COLLECT_WHILE((isalpha(a)) || (a == '\'')) ;
265 		TERM_AND_LENGTH ;
266 		/*-- <remarks> Retain position </remarks> --*/
267 		switch (n)
268 		{
269 		case 1 :
270 			RETURN_NEW_MORPH(DSINGLE) ;
271 		case 2 :
272 			RETURN_NEW_MORPH(DDOUBLE) ;
273 		default :
274 			RETURN_NEW_MORPH( DWORDT ) ;
275 		}
276 		/*-- <remarks> Retain position </remarks> --*/
277 		return __src__ ;
278 	}
279 	/*-- <remarks> Type 2 terminators ( spacing ) </remarks> --*/
280 	if (strchr(__spacer__,a) != NULL)
281 	{
282 		NO_COLLECT_WHILE(strchr(__spacer__,a) != NULL) ;
283 		set_term(__stand_param__,2,__scan_buf__) ;
284 		/*-- <remarks> Retain position </remarks> --*/
285 		return (__src__) ;
286 	}
287 	/*-- <remarks> Ignore everything not specified. Point to next input char. </remarks> --*/
288 	return (__src__ + 1) ;
289 }
290 
291 #ifdef BUILD_API
292 
293 /*
294 typedef struct STANDARDIZER_s {
295     int data;
296     char *err_msg;
297 } STANDARDIZER;
298 
299 typedef struct STDADDR_s {  // define as required
300    char *house_num;
301    char *prequal;
302    char *pretype;
303    char *predir;
304    char *name;
305    char *suftype;
306    char *sufdir;
307    char *sufqual;
308    char *extra;
309    char *city;
310    char *state;
311    char *postcode;
312    char *country;
313 } STDADDR;
314 
315 */
316 
std_init()317 STANDARDIZER *std_init()
318 {
319     STANDARDIZER *std;
320 
321     std = (STANDARDIZER *) calloc(1,sizeof(STANDARDIZER)) ;
322     if ( std == NULL ) return NULL ;
323 
324     std -> pagc_p = (PAGC_GLOBAL *) calloc(1,sizeof(PAGC_GLOBAL)) ;
325     if ( std -> pagc_p == NULL ) {
326         free( std ) ;
327         return NULL ;
328     }
329 
330     std -> pagc_p -> process_errors = init_errors(std -> pagc_p, NULL) ;
331     std -> err_p = std -> pagc_p -> process_errors ;
332 
333     return std;
334 }
335 
336 
std_use_lex(STANDARDIZER * std,LEXICON * lex)337 int std_use_lex(STANDARDIZER *std, LEXICON *lex)
338 {
339     std -> pagc_p -> addr_lexicon = lex -> hash_table ;
340     lex -> hash_table = NULL;
341     lex_free(lex);
342     if (!setup_default_defs(std -> pagc_p)) return FALSE ;
343     return (install_def_block_table(std -> pagc_p -> addr_lexicon, std -> pagc_p -> process_errors)) ;
344 }
345 
346 
std_use_gaz(STANDARDIZER * std,LEXICON * gaz)347 int std_use_gaz(STANDARDIZER *std, LEXICON *gaz)
348 {
349     std -> pagc_p -> gaz_lexicon = gaz -> hash_table ;
350     gaz -> hash_table = NULL;
351     lex_free(gaz);
352     return 0;
353 }
354 
355 
std_use_rules(STANDARDIZER * std,RULES * rules)356 int std_use_rules(STANDARDIZER *std, RULES *rules)
357 {
358     if ( ! rules -> ready ) {
359         RET_ERR("std_use_rules: Rules have not been readied!", std -> err_p, 1);
360     }
361     std -> pagc_p -> rules = rules -> r_p ;
362     rules -> r_p = NULL;
363     rules_free(rules);
364     return 0;
365 }
366 
std_ready_standardizer(STANDARDIZER * std)367 int std_ready_standardizer(STANDARDIZER *std)
368 {
369     std -> misc_stand =
370         init_stand_context(std -> pagc_p, std -> err_p, 1);
371 
372     if (std -> misc_stand == NULL)
373         return 1;
374     return 0;
375 }
376 
377 
std_free(STANDARDIZER * std)378 void std_free(STANDARDIZER *std)
379 {
380     if ( std == NULL ) return;
381     DBG("Calling close_stand_process");
382     if ( std -> pagc_p != NULL ) close_stand_process( std -> pagc_p ) ;
383     if ( std -> pagc_p -> process_errors != NULL ) {
384         DBG("Calling close_errors");
385         close_errors( std -> pagc_p -> process_errors );
386         DBG("Calling FREE_AND_NULL");
387         FREE_AND_NULL( std -> pagc_p ) ;
388     }
389     DBG("Calling close_stand_context");
390     close_stand_context( std -> misc_stand );
391     DBG("Calling free");
392     free( std );
393 }
394 
395 
stdaddr_free(STDADDR * stdaddr)396 void stdaddr_free(STDADDR *stdaddr)
397 {
398     if (!stdaddr) return;
399     if (stdaddr->building)   free(stdaddr->building);
400     if (stdaddr->house_num)  free(stdaddr->house_num);
401     if (stdaddr->predir)     free(stdaddr->predir);
402     if (stdaddr->qual)       free(stdaddr->qual);
403     if (stdaddr->pretype)    free(stdaddr->pretype);
404     if (stdaddr->name)       free(stdaddr->name);
405     if (stdaddr->suftype)    free(stdaddr->suftype);
406     if (stdaddr->sufdir)     free(stdaddr->sufdir);
407     if (stdaddr->ruralroute) free(stdaddr->ruralroute);
408     if (stdaddr->extra)      free(stdaddr->extra);
409     if (stdaddr->city)       free(stdaddr->city);
410     if (stdaddr->state)      free(stdaddr->state);
411     if (stdaddr->country)    free(stdaddr->country);
412     if (stdaddr->postcode)   free(stdaddr->postcode);
413     if (stdaddr->box)        free(stdaddr->box);
414     if (stdaddr->unit)       free(stdaddr->unit);
415     free(stdaddr);
416     stdaddr = NULL;
417 }
418 
coalesce(char * a,char * b)419 static char *coalesce( char *a, char *b )
420 {
421     return a?a:b;
422 }
423 
print_stdaddr(STDADDR * result)424 void print_stdaddr( STDADDR *result )
425 {
426     if (result) {
427         printf("  building: %s\n", coalesce(result -> building, ""));
428         printf(" house_num: %s\n", coalesce(result -> house_num, ""));
429         printf("    predir: %s\n", coalesce(result -> predir, ""));
430         printf("      qual: %s\n", coalesce(result -> qual, ""));
431         printf("   pretype: %s\n", coalesce(result -> pretype, ""));
432         printf("      name: %s\n", coalesce(result -> name, ""));
433         printf("   suftype: %s\n", coalesce(result -> suftype, ""));
434         printf("    sufdir: %s\n", coalesce(result -> sufdir, ""));
435         printf("ruralroute: %s\n", coalesce(result -> ruralroute, ""));
436         printf("     extra: %s\n", coalesce(result -> extra, ""));
437         printf("      city: %s\n", coalesce(result -> city, ""));
438         printf("     state: %s\n", coalesce(result -> state, ""));
439         printf("   country: %s\n", coalesce(result -> country, ""));
440         printf("  postcode: %s\n", coalesce(result -> postcode, ""));
441         printf("       box: %s\n", coalesce(result -> box, ""));
442         printf("      unit: %s\n", coalesce(result -> unit, ""));
443     }
444 }
445 
446 /*
447 STDADDR *std_standardize_one(STANDARDIZER *std, char *address_one_line, int options)
448 {
449     return NULL;
450 }
451 */
452 
std_standardize_mm(STANDARDIZER * std,char * micro,char * macro,int options)453 STDADDR *std_standardize_mm(STANDARDIZER *std, char *micro, char *macro, int options)
454 {
455     STAND_PARAM *stand_address;
456     STDADDR *stdaddr;
457     int err;
458 
459     stand_address = std -> misc_stand ;
460     if (stand_address == NULL)
461         return NULL;
462 
463     if (!micro || ( IS_BLANK( micro ))) {
464         RET_ERR("std_standardize_mm: micro attribute to standardize!", std -> err_p, NULL);
465     }
466 
467     init_output_fields( stand_address, BOTH );
468     if (macro && macro[0] != '\0') {
469         err = standardize_field( stand_address, macro, MACRO );
470         if (!err) {
471             RET_ERR1("std_standardize_mm: No standardization of %s!",
472                      macro, std -> err_p, NULL);
473         }
474 
475         if (options & 1) {
476             printf("After standardize_field for macro:\n");
477             output_raw_elements( stand_address , NULL ) ;
478             send_fields_to_stream(stand_address->standard_fields , NULL, 0, 0);
479         }
480     }
481 
482     err = standardize_field( stand_address, micro, MICRO_M );
483     if (!err) {
484         RET_ERR1("std_standardize_mm: No standardization of %s!",
485                  micro, std -> err_p, NULL);
486     }
487 
488     if (options & 1) {
489         printf("After standardize_field for micro:\n");
490         send_fields_to_stream(stand_address->standard_fields , NULL, 0, 0);
491     }
492 
493     PAGC_CALLOC_STRUC(stdaddr,STDADDR,1,std -> err_p,NULL);
494 
495     if (strlen(stand_address -> standard_fields[0]))
496         stdaddr->building   = strdup(stand_address -> standard_fields[0]);
497     if (strlen(stand_address -> standard_fields[1]))
498         stdaddr->house_num  = strdup(stand_address -> standard_fields[1]);
499     if (strlen(stand_address -> standard_fields[2]))
500         stdaddr->predir     = strdup(stand_address -> standard_fields[2]);
501     if (strlen(stand_address -> standard_fields[3]))
502         stdaddr->qual       = strdup(stand_address -> standard_fields[3]);
503     if (strlen(stand_address -> standard_fields[4]))
504         stdaddr->pretype    = strdup(stand_address -> standard_fields[4]);
505     if (strlen(stand_address -> standard_fields[5]))
506         stdaddr->name       = strdup(stand_address -> standard_fields[5]);
507     if (strlen(stand_address -> standard_fields[6]))
508         stdaddr->suftype    = strdup(stand_address -> standard_fields[6]);
509     if (strlen(stand_address -> standard_fields[7]))
510         stdaddr->sufdir     = strdup(stand_address -> standard_fields[7]);
511     if (strlen(stand_address -> standard_fields[8]))
512         stdaddr->ruralroute = strdup(stand_address -> standard_fields[8]);
513     if (strlen(stand_address -> standard_fields[9]))
514         stdaddr->extra      = strdup(stand_address -> standard_fields[9]);
515     if (strlen(stand_address -> standard_fields[10]))
516         stdaddr->city       = strdup(stand_address -> standard_fields[10]);
517     if (strlen(stand_address -> standard_fields[11]))
518         stdaddr->state      = strdup(stand_address -> standard_fields[11]);
519     if (strlen(stand_address -> standard_fields[12]))
520         stdaddr->country    = strdup(stand_address -> standard_fields[12]);
521     if (strlen(stand_address -> standard_fields[13]))
522         stdaddr->postcode   = strdup(stand_address -> standard_fields[13]);
523     if (strlen(stand_address -> standard_fields[14]))
524         stdaddr->box        = strdup(stand_address -> standard_fields[14]);
525     if (strlen(stand_address -> standard_fields[15]))
526         stdaddr->unit       = strdup(stand_address -> standard_fields[15]);
527 
528     return stdaddr;
529 }
530 
531 
std_standardize(STANDARDIZER * std,char * address,char * city,char * state,char * postcode,char * country,int options)532 STDADDR *std_standardize(STANDARDIZER *std, char *address, char *city, char *state, char *postcode, char *country, int options)
533 {
534     return NULL;
535 }
536 
537 #else
538 
539 /*========================================================================
540 <summary>
541 	<function name='standard.c (init_stand_process)'/>
542 	<remarks>set up process level, opens the lexicons and rules
543 		and default definitions for the tokenizer</remarks>
544 	<calls><functionref='(gamma.c) create_rules'/>, <functionref='(lexicon.c) create_lexicon'/>,
545 		<functionref='(tokenize.c) setup_default_defs'/> and
546 		<functionref='(analyze.c) install_def_block_table'/></calls>
547 </summary>
548 =========================================================================*/
init_stand_process(PAGC_GLOBAL * __pagc_global__,const char * __rule_name__,const char * __lexicon_name__,const char * __gazetteer_name__,const char * __featword_name__)549 int init_stand_process(PAGC_GLOBAL *__pagc_global__ ,const char *__rule_name__, const char *__lexicon_name__ , const char *__gazetteer_name__ , const char *__featword_name__)
550 {
551 	if ((__pagc_global__->rules = create_rules(__rule_name__,__pagc_global__)) == NULL)
552 	{
553 		return FALSE ;
554 	}
555 	/*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
556 	if ((__pagc_global__->addr_lexicon = create_lexicon(__pagc_global__ ,__lexicon_name__ , __gazetteer_name__)) == NULL)
557 	{
558 		return FALSE ;
559 	}
560 	if ((__pagc_global__->poi_lexicon = create_lexicon(__pagc_global__ ,__featword_name__ ,NULL)) == NULL)
561 	{
562 		return FALSE ;
563 	}
564 #ifdef GAZ_LEXICON
565 	/*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on _start_state_ = MACRO </revision> --*/
566 	if ((__pagc_global__->gaz_lexicon = create_lexicon(__pagc_global__,__gazetteer_name__,NULL)) == NULL)
567 	{
568 		return FALSE ;
569 	}
570 #endif
571 	if (!setup_default_defs(__pagc_global__))
572 	{
573 		return FALSE ;
574 	}
575 	return (install_def_block_table(__pagc_global__->addr_lexicon ,__pagc_global__->process_errors)) ;
576 }
577 
578 #endif
579 
580 /*========================================================================
581 <summary>
582 	<function name='standard.c (close_stand_process)'/>
583 	<remarks> Called on exit to close down standardizer </remarks>
584 	<calls> <functionref='(tokenize.c) remove_default_defs'/>,
585 		<functionref='(gamma.c) destroy_rules'/> and
586 		<functionref='lexicon.c (destroy_lexicon)'/></calls>
587 </summary>
588 =========================================================================*/
close_stand_process(PAGC_GLOBAL * __pagc_global__)589 void close_stand_process(PAGC_GLOBAL * __pagc_global__)
590 {
591 	if (__pagc_global__ == NULL)
592 	{
593 		return ;
594 	}
595     DBG("remove_default_defs(__pagc_global__)");
596 	remove_default_defs(__pagc_global__) ;
597     DBG("destroy_rules(__pagc_global__->rules) ;");
598 	destroy_rules(__pagc_global__->rules) ;
599 	/*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
600     DBG("destroy_lexicon(__pagc_global__->addr_lexicon)");
601 	destroy_lexicon(__pagc_global__->addr_lexicon) ;
602     DBG("destroy_lexicon(__pagc_global__->poi_lexicon)");
603 	destroy_lexicon(__pagc_global__->poi_lexicon) ;
604 	/*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on _start_state_ = MACRO </revision> --*/
605 #ifdef GAZ_LEXICON
606     DBG("destroy_lexicon(__pagc_global__->gaz_lexicon)");
607 	destroy_lexicon(__pagc_global__->gaz_lexicon) ;
608 #endif
609 }
610 
611 /*========================================================================
612 <summary>
613 	<function name='standard.c (init_stand_context)'/>
614 	<param name='__err_param__'>belongs to the dataset context.</param>
615 	<calls><functionref='analyze.c (create_segments)'/>
616 	<returns>NULL returned on error - if so, call <functionref='close_stand_context'/></returns>
617 </summary>
618 =========================================================================*/
init_stand_context(PAGC_GLOBAL * __pagc_global__,ERR_PARAM * __err_param__,int exhaustive_flag)619 STAND_PARAM *init_stand_context(PAGC_GLOBAL *__pagc_global__,ERR_PARAM *__err_param__,int exhaustive_flag)
620 {
621 	STAND_PARAM *__stand_param__ ;
622 	/*-- <remarks> Initialization-time allocation </remarks> --*/
623 	PAGC_CALLOC_STRUC(__stand_param__,STAND_PARAM,1,__err_param__,NULL) ;
624 	if ((__stand_param__->stz_info = create_segments(__err_param__)) == NULL)
625 	{
626 		return NULL ;
627 	}
628 	PAGC_CALLOC_2D_ARRAY(__stand_param__->standard_fields, char, MAXOUTSYM, MAXFLDLEN, __err_param__, NULL) ;
629 	__stand_param__->analyze_complete = exhaustive_flag ;
630 	__stand_param__->errors = __err_param__ ;
631 	__stand_param__->have_ref_att = NULL  ;
632 	/*-- <remarks> Transfer from global </remarks> --*/
633 	__stand_param__->rules = __pagc_global__->rules ;
634 	/*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
635 	/*-- <remarks> Transfer from global </remarks> --*/
636 	__stand_param__->address_lexicon = __pagc_global__->addr_lexicon ;
637 	/*-- <remarks> Transfer from global </remarks> --*/
638 	__stand_param__->poi_lexicon = __pagc_global__->poi_lexicon ;
639 	/*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on _start_state_ = MACRO </revision> --*/
640 #ifdef GAZ_LEXICON
641 	__stand_param__->gaz_lexicon = __pagc_global__->gaz_lexicon ;
642 #endif
643 	__stand_param__->default_def = __pagc_global__->default_def ;
644 	return __stand_param__ ;
645 }
646 
647 
648 /*========================================================================
649 <summary>
650 	<function name='standard.c (close_stand_context)'/>
651 	<remarks> Closes the <code>STAND_PARAM</code> record </remarks>
652 	<calls> <functionref='analyze.c (destroy_segments)'/>,
653 		<macroref='FREE_AND_NULL'/></calls>
654 <summary>
655 =========================================================================*/
close_stand_context(STAND_PARAM * __stand_param__)656 void close_stand_context( STAND_PARAM *__stand_param__ )
657 {
658 	if (__stand_param__ == NULL)
659 	{
660 		return ;
661 	}
662 	destroy_segments(__stand_param__->stz_info) ;
663 	if (__stand_param__->standard_fields != NULL)
664 	{
665 		PAGC_DESTROY_2D_ARRAY(__stand_param__->standard_fields,char,MAXOUTSYM) ;
666 	}
667 	/*-- <remarks> Cleanup time memory release </remarks> --*/
668 	FREE_AND_NULL(__stand_param__) ;
669 }
670 
671 /*========================================================================
672 <summary>
673 	<function name='standard.c (_Close_Stand_Field_)'/>
674 	<remarks> Sends the scanned and processed input to the evaluator </remarks>
675 	<called-by> <functionref='standard.c (standardize_field)'/></called-by>
676 	<calls> <functionref='analyze.c (evaluator)'/> , <functionref='export.c (stuff_fields)'/></calls>
677 	<returns>FALSE on error</returns>
678 	<revision date='2012-07-22'> Keep track of start_state </revision>
679 </summary>
680 =========================================================================*/
_Close_Stand_Field_(STAND_PARAM * __stand_param__)681 static int _Close_Stand_Field_(STAND_PARAM *__stand_param__)
682 {
683 	/*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
684 	if (evaluator(__stand_param__))
685 	{
686 		/*-- <remarks> Write the output into the fields. </remarks> --*/
687 		stuff_fields(__stand_param__) ;
688 		return TRUE ;
689 	}
690 	RET_ERR("_Close_Stand_Field_: Address failed to standardize",__stand_param__->errors,FALSE) ;
691 }
692 
693