1 /* -- standard.c
2
3 interface for the standardizer
4
5 Prototype 7H08 (This file was written by Walter Sinclair).
6
7 This file is part of PAGC.
8
9 Copyright (c) 2009 Walter Bruce Sinclair
10
11 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
18 */
19
20 /*-- For pagc-0.4.2 : last revised 2012-07-18 --*/
21
22 #undef DEBUG
23 //#define DEBUG 1
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "pagc_api.h"
30 #ifdef BUILD_API
31 #include "pagc_std_api.h"
32 #endif
33
34 #define GAZ_LEXICON
35
36 /* -- local prototypes -- */
37 /*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
38 static int _Close_Stand_Field_(STAND_PARAM *) ;
39 static int _Scan_String_(STAND_PARAM *, char *) ;
40 static char * _Scan_Next_(STAND_PARAM *, char *) ;
41
42 static char __spacer__[] = " \\-.)}>_" ;
43
44 #define TERM_AND_LENGTH \
45 *__dest__ = SENTINEL ; \
46 n = strlen(__scan_buf__)
47
48 #define RETURN_NEW_MORPH(TOKEN_ARG) \
49 if (!new_morph(__stand_param__,TOKEN_ARG,__scan_buf__,n))\
50 {\
51 return NULL ; \
52 } \
53 return __src__
54
55 #define COLLECT_LOOKAHEAD \
56 *__dest__++ = a ; __src__++ ; *__dest__++ = b ; __src__++
57
58 #define COLLECT_WHILE(COND) \
59 do { *__dest__++ = a ; __src__++ ; a = *__src__ ; } while (COND)
60
61 #define NO_COLLECT_WHILE(COND) \
62 do { __dest__++ ; __src__++ ; a = *__src__ ; } while (COND)
63
64 #define TEST_FOR_ORD_DIGIT(N,NEXT_LOW,NEXT_UP) \
65 if ((b == NEXT_LOW) || (b == NEXT_UP)) \
66 { \
67 if (last_digit == N)\
68 { \
69 if ((n < 2 ) || (*(__dest__-2) != '1')) \
70 { \
71 COLLECT_LOOKAHEAD ; \
72 TERM_AND_LENGTH ; \
73 RETURN_NEW_MORPH(DORD) ; \
74 } \
75 } \
76 } \
77 break
78
79
80 /*========================================================================
81 <summary>
82 <function name='standard.c (standardize_field)'/>
83 <remarks>This function is called with a pointer to the
84 str to standardize and a start state indicating
85 the kind of standardization to perform. It invokes
86 the scanner to start the creation of the morphemes
87 <calls><functionref='tokenize.c (initialize_morphs)'/></calls>
88 <calls><functionref='_Close_Stand_Field_s'/></calls>
89 <calls><functionref='_Scan_String_'/></calls>
90 </summary>
91 =========================================================================*/
standardize_field(STAND_PARAM * __stand_param__,char * __in_str__,int client_start_state)92 int standardize_field(STAND_PARAM *__stand_param__ ,char *__in_str__ , int client_start_state )
93 {
94 /*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
95 /*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on start_state= MACRO </revision> --*/
96 __stand_param__->lexicon = __stand_param__->address_lexicon ;
97 if (client_start_state > EXTRA_STATE)
98 {
99 __stand_param__->lexicon = __stand_param__->poi_lexicon ;
100 }
101 #ifdef GAZ_LEXICON
102 else
103 {
104 if (client_start_state == MACRO)
105 {
106 __stand_param__->lexicon = __stand_param__->gaz_lexicon ;
107 }
108 }
109 #endif
110 /*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
111 __stand_param__->start_state = client_start_state ;
112 initialize_morphs(__stand_param__) ;
113 if (!_Scan_String_(__stand_param__,__in_str__))
114 {
115 return FALSE ;
116 }
117 /*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
118 return (_Close_Stand_Field_(__stand_param__)) ;
119 }
120
_Scan_String_(STAND_PARAM * __stand_param__,char * __in_str__)121 static int _Scan_String_(STAND_PARAM *__stand_param__ ,char *__in_str__ )
122 {
123 char *__src__ = __in_str__ ;
124 while (TRUE)
125 {
126 char a = *__src__ ;
127 /*-- <remarks> If we're done, process the tokens: </remarks> --*/
128 if ((a == '\n') || (a == SENTINEL))
129 {
130 return (process_input(__stand_param__)) ;
131 }
132 /*-- <remarks> Gather sequences into tokens: </remarks> --*/
133 __src__ = _Scan_Next_(__stand_param__,__src__) ;
134 /*-- <remarks> Check for errors: </remarks> --*/
135 if (__src__ == NULL)
136 {
137 break ;
138 }
139 }
140 return FALSE ;
141 }
142
_Scan_Next_(STAND_PARAM * __stand_param__,char * __in_ptr__)143 static char * _Scan_Next_( STAND_PARAM *__stand_param__,char * __in_ptr__)
144 {
145 int n ;
146 char __scan_buf__[MAXSTRLEN] ;
147
148 /*-- <remarks> Collect a sequence of characters into the scan_buf </remarks> --*/
149
150 char *__src__ = __in_ptr__ ;
151 char a = *__src__ ;
152 char *__dest__ = __scan_buf__ ;
153 *__dest__ = SENTINEL ;
154
155 /*-- <remarks> Type one terminators </remarks> --*/
156 if ((a == ',') || (a == '\t') || (a == ';'))
157 {
158 *__dest__++ = a ;
159 *__dest__ = SENTINEL;
160 set_term(__stand_param__,1,__scan_buf__) ;
161 /*-- <remarks> Point to next input char </remarks> --*/
162 return (__src__ + 1) ;
163 }
164 /*-- <remarks> Numeric sequences : ordinals, fractions and numbers </remarks> --*/
165 if (isdigit(a))
166 {
167 char b ;
168 char last_digit ;
169
170 COLLECT_WHILE(isdigit(a)) ;
171 /*-- <remarks> Get a character of lookahead and one of lookbehind </remarks> --*/
172 b = *(__src__ + 1 ) ;
173 last_digit = *(__dest__ - 1 ) ; /*-- last digit collected --*/
174 n = __dest__ - __scan_buf__ ;
175 switch (a)
176 {
177 /*-- <remarks> Fractions </remarks> --*/
178 case '/' :
179 /*-- <remarks> Collect the rest of the fraction </remarks> --*/
180 if (isdigit(b))
181 {
182 switch (b)
183 {
184 case '2' :
185 if (last_digit == '1')
186 {
187 COLLECT_LOOKAHEAD ;
188 TERM_AND_LENGTH ;
189 RETURN_NEW_MORPH(DFRACT) ;
190 }
191 break ;
192 case '3' :
193 if ((last_digit == '1') || (last_digit == '2'))
194 {
195 COLLECT_LOOKAHEAD ;
196 TERM_AND_LENGTH ;
197 RETURN_NEW_MORPH(DFRACT) ;
198 }
199 break ;
200 case '4' :
201 if ((last_digit == '1') || (last_digit == '3'))
202 {
203 COLLECT_LOOKAHEAD ;
204 TERM_AND_LENGTH ;
205 RETURN_NEW_MORPH(DFRACT) ;
206 }
207 break ;
208 } /*-- <remarks> end of switch on lookahead </remarks> --*/
209 }
210 break ;
211 /*-- <remarks> ordinals -- */
212 case 's' : case 'S' :
213 /*-- <remarks> 1st, 21st, 31st, -- for 1 </remarks> --*/
214 /*-- <remarks> Point to next input char </remarks> --*/
215 TEST_FOR_ORD_DIGIT('1','t','T') ;
216 case 'r' : case 'R' :
217 /*-- <remarks> 3rd, 23rd, 33rd, -- for 3 </remarks> --*/
218 /*-- <remarks> Point to next input char </remarks> --*/
219 TEST_FOR_ORD_DIGIT('3','d','D') ;
220 case 'n' : case 'N' :
221 /*-- <remarks> 2nd, 22nd, 32nd, -- for 2 </remarks> --*/
222 /*-- <remarks> Point to next input char </remarks> --*/
223 TEST_FOR_ORD_DIGIT('2','d','D') ;
224 case 't' : case 'T' :
225 if ((b == 'h') || (b == 'H'))
226 {
227 switch (last_digit)
228 {
229 case '1' : case '2' : case '3' :
230 /*-- <remarks> 11th, 111th, 211th etc -- for 11-13 </remarks> --*/
231 if ((n > 1) && (*(__dest__ - 2) == '1'))
232 {
233 COLLECT_LOOKAHEAD ;
234 TERM_AND_LENGTH ;
235 /*-- <remarks> Point to next input char </remarks> --*/
236 RETURN_NEW_MORPH(DORD) ;
237 }
238 break ;
239 default :
240 /*-- <remarks> 4th, 14th, 24th etc -- for 0, 4-9 </remarks> --*/
241 COLLECT_LOOKAHEAD ;
242 TERM_AND_LENGTH ;
243 /*-- <remarks> Point to next input char </remarks> --*/
244 RETURN_NEW_MORPH(DORD) ;
245 }
246 }
247 break ;
248 }
249 /*-- <remarks> ordinary numeric sequence </remarks> --*/
250 TERM_AND_LENGTH ;
251 /*-- <remarks> Retain position </remarks> --*/
252 RETURN_NEW_MORPH(DNUMBER) ;
253 }
254 /*-- <revision date='2009-08-15'> Fix ampersand : P&R --> P & R </remarks> --*/
255 if (a == '&')
256 {
257 COLLECT_WHILE(a == '&') ;
258 TERM_AND_LENGTH ;
259 RETURN_NEW_MORPH(DSINGLE) ;
260 }
261 /*-- <remarks> Alphabetic sequence </remarks> --*/
262 if ((isalpha(a)) || (a == '\'') || (a == '#'))
263 {
264 COLLECT_WHILE((isalpha(a)) || (a == '\'')) ;
265 TERM_AND_LENGTH ;
266 /*-- <remarks> Retain position </remarks> --*/
267 switch (n)
268 {
269 case 1 :
270 RETURN_NEW_MORPH(DSINGLE) ;
271 case 2 :
272 RETURN_NEW_MORPH(DDOUBLE) ;
273 default :
274 RETURN_NEW_MORPH( DWORDT ) ;
275 }
276 /*-- <remarks> Retain position </remarks> --*/
277 return __src__ ;
278 }
279 /*-- <remarks> Type 2 terminators ( spacing ) </remarks> --*/
280 if (strchr(__spacer__,a) != NULL)
281 {
282 NO_COLLECT_WHILE(strchr(__spacer__,a) != NULL) ;
283 set_term(__stand_param__,2,__scan_buf__) ;
284 /*-- <remarks> Retain position </remarks> --*/
285 return (__src__) ;
286 }
287 /*-- <remarks> Ignore everything not specified. Point to next input char. </remarks> --*/
288 return (__src__ + 1) ;
289 }
290
291 #ifdef BUILD_API
292
293 /*
294 typedef struct STANDARDIZER_s {
295 int data;
296 char *err_msg;
297 } STANDARDIZER;
298
299 typedef struct STDADDR_s { // define as required
300 char *house_num;
301 char *prequal;
302 char *pretype;
303 char *predir;
304 char *name;
305 char *suftype;
306 char *sufdir;
307 char *sufqual;
308 char *extra;
309 char *city;
310 char *state;
311 char *postcode;
312 char *country;
313 } STDADDR;
314
315 */
316
std_init()317 STANDARDIZER *std_init()
318 {
319 STANDARDIZER *std;
320
321 std = (STANDARDIZER *) calloc(1,sizeof(STANDARDIZER)) ;
322 if ( std == NULL ) return NULL ;
323
324 std -> pagc_p = (PAGC_GLOBAL *) calloc(1,sizeof(PAGC_GLOBAL)) ;
325 if ( std -> pagc_p == NULL ) {
326 free( std ) ;
327 return NULL ;
328 }
329
330 std -> pagc_p -> process_errors = init_errors(std -> pagc_p, NULL) ;
331 std -> err_p = std -> pagc_p -> process_errors ;
332
333 return std;
334 }
335
336
std_use_lex(STANDARDIZER * std,LEXICON * lex)337 int std_use_lex(STANDARDIZER *std, LEXICON *lex)
338 {
339 std -> pagc_p -> addr_lexicon = lex -> hash_table ;
340 lex -> hash_table = NULL;
341 lex_free(lex);
342 if (!setup_default_defs(std -> pagc_p)) return FALSE ;
343 return (install_def_block_table(std -> pagc_p -> addr_lexicon, std -> pagc_p -> process_errors)) ;
344 }
345
346
std_use_gaz(STANDARDIZER * std,LEXICON * gaz)347 int std_use_gaz(STANDARDIZER *std, LEXICON *gaz)
348 {
349 std -> pagc_p -> gaz_lexicon = gaz -> hash_table ;
350 gaz -> hash_table = NULL;
351 lex_free(gaz);
352 return 0;
353 }
354
355
std_use_rules(STANDARDIZER * std,RULES * rules)356 int std_use_rules(STANDARDIZER *std, RULES *rules)
357 {
358 if ( ! rules -> ready ) {
359 RET_ERR("std_use_rules: Rules have not been readied!", std -> err_p, 1);
360 }
361 std -> pagc_p -> rules = rules -> r_p ;
362 rules -> r_p = NULL;
363 rules_free(rules);
364 return 0;
365 }
366
std_ready_standardizer(STANDARDIZER * std)367 int std_ready_standardizer(STANDARDIZER *std)
368 {
369 std -> misc_stand =
370 init_stand_context(std -> pagc_p, std -> err_p, 1);
371
372 if (std -> misc_stand == NULL)
373 return 1;
374 return 0;
375 }
376
377
std_free(STANDARDIZER * std)378 void std_free(STANDARDIZER *std)
379 {
380 if ( std == NULL ) return;
381 DBG("Calling close_stand_process");
382 if ( std -> pagc_p != NULL ) close_stand_process( std -> pagc_p ) ;
383 if ( std -> pagc_p -> process_errors != NULL ) {
384 DBG("Calling close_errors");
385 close_errors( std -> pagc_p -> process_errors );
386 DBG("Calling FREE_AND_NULL");
387 FREE_AND_NULL( std -> pagc_p ) ;
388 }
389 DBG("Calling close_stand_context");
390 close_stand_context( std -> misc_stand );
391 DBG("Calling free");
392 free( std );
393 }
394
395
stdaddr_free(STDADDR * stdaddr)396 void stdaddr_free(STDADDR *stdaddr)
397 {
398 if (!stdaddr) return;
399 if (stdaddr->building) free(stdaddr->building);
400 if (stdaddr->house_num) free(stdaddr->house_num);
401 if (stdaddr->predir) free(stdaddr->predir);
402 if (stdaddr->qual) free(stdaddr->qual);
403 if (stdaddr->pretype) free(stdaddr->pretype);
404 if (stdaddr->name) free(stdaddr->name);
405 if (stdaddr->suftype) free(stdaddr->suftype);
406 if (stdaddr->sufdir) free(stdaddr->sufdir);
407 if (stdaddr->ruralroute) free(stdaddr->ruralroute);
408 if (stdaddr->extra) free(stdaddr->extra);
409 if (stdaddr->city) free(stdaddr->city);
410 if (stdaddr->state) free(stdaddr->state);
411 if (stdaddr->country) free(stdaddr->country);
412 if (stdaddr->postcode) free(stdaddr->postcode);
413 if (stdaddr->box) free(stdaddr->box);
414 if (stdaddr->unit) free(stdaddr->unit);
415 free(stdaddr);
416 stdaddr = NULL;
417 }
418
coalesce(char * a,char * b)419 static char *coalesce( char *a, char *b )
420 {
421 return a?a:b;
422 }
423
print_stdaddr(STDADDR * result)424 void print_stdaddr( STDADDR *result )
425 {
426 if (result) {
427 printf(" building: %s\n", coalesce(result -> building, ""));
428 printf(" house_num: %s\n", coalesce(result -> house_num, ""));
429 printf(" predir: %s\n", coalesce(result -> predir, ""));
430 printf(" qual: %s\n", coalesce(result -> qual, ""));
431 printf(" pretype: %s\n", coalesce(result -> pretype, ""));
432 printf(" name: %s\n", coalesce(result -> name, ""));
433 printf(" suftype: %s\n", coalesce(result -> suftype, ""));
434 printf(" sufdir: %s\n", coalesce(result -> sufdir, ""));
435 printf("ruralroute: %s\n", coalesce(result -> ruralroute, ""));
436 printf(" extra: %s\n", coalesce(result -> extra, ""));
437 printf(" city: %s\n", coalesce(result -> city, ""));
438 printf(" state: %s\n", coalesce(result -> state, ""));
439 printf(" country: %s\n", coalesce(result -> country, ""));
440 printf(" postcode: %s\n", coalesce(result -> postcode, ""));
441 printf(" box: %s\n", coalesce(result -> box, ""));
442 printf(" unit: %s\n", coalesce(result -> unit, ""));
443 }
444 }
445
446 /*
447 STDADDR *std_standardize_one(STANDARDIZER *std, char *address_one_line, int options)
448 {
449 return NULL;
450 }
451 */
452
std_standardize_mm(STANDARDIZER * std,char * micro,char * macro,int options)453 STDADDR *std_standardize_mm(STANDARDIZER *std, char *micro, char *macro, int options)
454 {
455 STAND_PARAM *stand_address;
456 STDADDR *stdaddr;
457 int err;
458
459 stand_address = std -> misc_stand ;
460 if (stand_address == NULL)
461 return NULL;
462
463 if (!micro || ( IS_BLANK( micro ))) {
464 RET_ERR("std_standardize_mm: micro attribute to standardize!", std -> err_p, NULL);
465 }
466
467 init_output_fields( stand_address, BOTH );
468 if (macro && macro[0] != '\0') {
469 err = standardize_field( stand_address, macro, MACRO );
470 if (!err) {
471 RET_ERR1("std_standardize_mm: No standardization of %s!",
472 macro, std -> err_p, NULL);
473 }
474
475 if (options & 1) {
476 printf("After standardize_field for macro:\n");
477 output_raw_elements( stand_address , NULL ) ;
478 send_fields_to_stream(stand_address->standard_fields , NULL, 0, 0);
479 }
480 }
481
482 err = standardize_field( stand_address, micro, MICRO_M );
483 if (!err) {
484 RET_ERR1("std_standardize_mm: No standardization of %s!",
485 micro, std -> err_p, NULL);
486 }
487
488 if (options & 1) {
489 printf("After standardize_field for micro:\n");
490 send_fields_to_stream(stand_address->standard_fields , NULL, 0, 0);
491 }
492
493 PAGC_CALLOC_STRUC(stdaddr,STDADDR,1,std -> err_p,NULL);
494
495 if (strlen(stand_address -> standard_fields[0]))
496 stdaddr->building = strdup(stand_address -> standard_fields[0]);
497 if (strlen(stand_address -> standard_fields[1]))
498 stdaddr->house_num = strdup(stand_address -> standard_fields[1]);
499 if (strlen(stand_address -> standard_fields[2]))
500 stdaddr->predir = strdup(stand_address -> standard_fields[2]);
501 if (strlen(stand_address -> standard_fields[3]))
502 stdaddr->qual = strdup(stand_address -> standard_fields[3]);
503 if (strlen(stand_address -> standard_fields[4]))
504 stdaddr->pretype = strdup(stand_address -> standard_fields[4]);
505 if (strlen(stand_address -> standard_fields[5]))
506 stdaddr->name = strdup(stand_address -> standard_fields[5]);
507 if (strlen(stand_address -> standard_fields[6]))
508 stdaddr->suftype = strdup(stand_address -> standard_fields[6]);
509 if (strlen(stand_address -> standard_fields[7]))
510 stdaddr->sufdir = strdup(stand_address -> standard_fields[7]);
511 if (strlen(stand_address -> standard_fields[8]))
512 stdaddr->ruralroute = strdup(stand_address -> standard_fields[8]);
513 if (strlen(stand_address -> standard_fields[9]))
514 stdaddr->extra = strdup(stand_address -> standard_fields[9]);
515 if (strlen(stand_address -> standard_fields[10]))
516 stdaddr->city = strdup(stand_address -> standard_fields[10]);
517 if (strlen(stand_address -> standard_fields[11]))
518 stdaddr->state = strdup(stand_address -> standard_fields[11]);
519 if (strlen(stand_address -> standard_fields[12]))
520 stdaddr->country = strdup(stand_address -> standard_fields[12]);
521 if (strlen(stand_address -> standard_fields[13]))
522 stdaddr->postcode = strdup(stand_address -> standard_fields[13]);
523 if (strlen(stand_address -> standard_fields[14]))
524 stdaddr->box = strdup(stand_address -> standard_fields[14]);
525 if (strlen(stand_address -> standard_fields[15]))
526 stdaddr->unit = strdup(stand_address -> standard_fields[15]);
527
528 return stdaddr;
529 }
530
531
std_standardize(STANDARDIZER * std,char * address,char * city,char * state,char * postcode,char * country,int options)532 STDADDR *std_standardize(STANDARDIZER *std, char *address, char *city, char *state, char *postcode, char *country, int options)
533 {
534 return NULL;
535 }
536
537 #else
538
539 /*========================================================================
540 <summary>
541 <function name='standard.c (init_stand_process)'/>
542 <remarks>set up process level, opens the lexicons and rules
543 and default definitions for the tokenizer</remarks>
544 <calls><functionref='(gamma.c) create_rules'/>, <functionref='(lexicon.c) create_lexicon'/>,
545 <functionref='(tokenize.c) setup_default_defs'/> and
546 <functionref='(analyze.c) install_def_block_table'/></calls>
547 </summary>
548 =========================================================================*/
init_stand_process(PAGC_GLOBAL * __pagc_global__,const char * __rule_name__,const char * __lexicon_name__,const char * __gazetteer_name__,const char * __featword_name__)549 int init_stand_process(PAGC_GLOBAL *__pagc_global__ ,const char *__rule_name__, const char *__lexicon_name__ , const char *__gazetteer_name__ , const char *__featword_name__)
550 {
551 if ((__pagc_global__->rules = create_rules(__rule_name__,__pagc_global__)) == NULL)
552 {
553 return FALSE ;
554 }
555 /*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
556 if ((__pagc_global__->addr_lexicon = create_lexicon(__pagc_global__ ,__lexicon_name__ , __gazetteer_name__)) == NULL)
557 {
558 return FALSE ;
559 }
560 if ((__pagc_global__->poi_lexicon = create_lexicon(__pagc_global__ ,__featword_name__ ,NULL)) == NULL)
561 {
562 return FALSE ;
563 }
564 #ifdef GAZ_LEXICON
565 /*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on _start_state_ = MACRO </revision> --*/
566 if ((__pagc_global__->gaz_lexicon = create_lexicon(__pagc_global__,__gazetteer_name__,NULL)) == NULL)
567 {
568 return FALSE ;
569 }
570 #endif
571 if (!setup_default_defs(__pagc_global__))
572 {
573 return FALSE ;
574 }
575 return (install_def_block_table(__pagc_global__->addr_lexicon ,__pagc_global__->process_errors)) ;
576 }
577
578 #endif
579
580 /*========================================================================
581 <summary>
582 <function name='standard.c (close_stand_process)'/>
583 <remarks> Called on exit to close down standardizer </remarks>
584 <calls> <functionref='(tokenize.c) remove_default_defs'/>,
585 <functionref='(gamma.c) destroy_rules'/> and
586 <functionref='lexicon.c (destroy_lexicon)'/></calls>
587 </summary>
588 =========================================================================*/
close_stand_process(PAGC_GLOBAL * __pagc_global__)589 void close_stand_process(PAGC_GLOBAL * __pagc_global__)
590 {
591 if (__pagc_global__ == NULL)
592 {
593 return ;
594 }
595 DBG("remove_default_defs(__pagc_global__)");
596 remove_default_defs(__pagc_global__) ;
597 DBG("destroy_rules(__pagc_global__->rules) ;");
598 destroy_rules(__pagc_global__->rules) ;
599 /*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
600 DBG("destroy_lexicon(__pagc_global__->addr_lexicon)");
601 destroy_lexicon(__pagc_global__->addr_lexicon) ;
602 DBG("destroy_lexicon(__pagc_global__->poi_lexicon)");
603 destroy_lexicon(__pagc_global__->poi_lexicon) ;
604 /*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on _start_state_ = MACRO </revision> --*/
605 #ifdef GAZ_LEXICON
606 DBG("destroy_lexicon(__pagc_global__->gaz_lexicon)");
607 destroy_lexicon(__pagc_global__->gaz_lexicon) ;
608 #endif
609 }
610
611 /*========================================================================
612 <summary>
613 <function name='standard.c (init_stand_context)'/>
614 <param name='__err_param__'>belongs to the dataset context.</param>
615 <calls><functionref='analyze.c (create_segments)'/>
616 <returns>NULL returned on error - if so, call <functionref='close_stand_context'/></returns>
617 </summary>
618 =========================================================================*/
init_stand_context(PAGC_GLOBAL * __pagc_global__,ERR_PARAM * __err_param__,int exhaustive_flag)619 STAND_PARAM *init_stand_context(PAGC_GLOBAL *__pagc_global__,ERR_PARAM *__err_param__,int exhaustive_flag)
620 {
621 STAND_PARAM *__stand_param__ ;
622 /*-- <remarks> Initialization-time allocation </remarks> --*/
623 PAGC_CALLOC_STRUC(__stand_param__,STAND_PARAM,1,__err_param__,NULL) ;
624 if ((__stand_param__->stz_info = create_segments(__err_param__)) == NULL)
625 {
626 return NULL ;
627 }
628 PAGC_CALLOC_2D_ARRAY(__stand_param__->standard_fields, char, MAXOUTSYM, MAXFLDLEN, __err_param__, NULL) ;
629 __stand_param__->analyze_complete = exhaustive_flag ;
630 __stand_param__->errors = __err_param__ ;
631 __stand_param__->have_ref_att = NULL ;
632 /*-- <remarks> Transfer from global </remarks> --*/
633 __stand_param__->rules = __pagc_global__->rules ;
634 /*-- <revision date='2009-08-13'> Support multiple lexicons </revision> --*/
635 /*-- <remarks> Transfer from global </remarks> --*/
636 __stand_param__->address_lexicon = __pagc_global__->addr_lexicon ;
637 /*-- <remarks> Transfer from global </remarks> --*/
638 __stand_param__->poi_lexicon = __pagc_global__->poi_lexicon ;
639 /*-- <revision date='2012-06-01'> Add gaz_lexicon to be triggered on _start_state_ = MACRO </revision> --*/
640 #ifdef GAZ_LEXICON
641 __stand_param__->gaz_lexicon = __pagc_global__->gaz_lexicon ;
642 #endif
643 __stand_param__->default_def = __pagc_global__->default_def ;
644 return __stand_param__ ;
645 }
646
647
648 /*========================================================================
649 <summary>
650 <function name='standard.c (close_stand_context)'/>
651 <remarks> Closes the <code>STAND_PARAM</code> record </remarks>
652 <calls> <functionref='analyze.c (destroy_segments)'/>,
653 <macroref='FREE_AND_NULL'/></calls>
654 <summary>
655 =========================================================================*/
close_stand_context(STAND_PARAM * __stand_param__)656 void close_stand_context( STAND_PARAM *__stand_param__ )
657 {
658 if (__stand_param__ == NULL)
659 {
660 return ;
661 }
662 destroy_segments(__stand_param__->stz_info) ;
663 if (__stand_param__->standard_fields != NULL)
664 {
665 PAGC_DESTROY_2D_ARRAY(__stand_param__->standard_fields,char,MAXOUTSYM) ;
666 }
667 /*-- <remarks> Cleanup time memory release </remarks> --*/
668 FREE_AND_NULL(__stand_param__) ;
669 }
670
671 /*========================================================================
672 <summary>
673 <function name='standard.c (_Close_Stand_Field_)'/>
674 <remarks> Sends the scanned and processed input to the evaluator </remarks>
675 <called-by> <functionref='standard.c (standardize_field)'/></called-by>
676 <calls> <functionref='analyze.c (evaluator)'/> , <functionref='export.c (stuff_fields)'/></calls>
677 <returns>FALSE on error</returns>
678 <revision date='2012-07-22'> Keep track of start_state </revision>
679 </summary>
680 =========================================================================*/
_Close_Stand_Field_(STAND_PARAM * __stand_param__)681 static int _Close_Stand_Field_(STAND_PARAM *__stand_param__)
682 {
683 /*-- <revision date='2012-07-22'> Keep track of start_state </revision> --*/
684 if (evaluator(__stand_param__))
685 {
686 /*-- <remarks> Write the output into the fields. </remarks> --*/
687 stuff_fields(__stand_param__) ;
688 return TRUE ;
689 }
690 RET_ERR("_Close_Stand_Field_: Address failed to standardize",__stand_param__->errors,FALSE) ;
691 }
692
693