1 /* -- gamma.c
2
3 This file reads the rules file into memory and sets up the rule
4 lookup structures. These are based on the optimized Aho-Corasick
5 algorithms in Watson (1994).
6
7 Copyright (c) 2008 Walter Bruce Sinclair
8
9 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
14 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15
16 */
17 /* For pagc-0.4.0 : last revised 2010-11-01 */
18
19 #undef DEBUG
20 //#define DEBUG
21
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <stddef.h>
25 #include "pagc_api.h"
26 #include "gamma.h"
27
28 #ifdef BUILD_API
29 #include "pagc_std_api.h"
30 RULES *rules_init( ERR_PARAM *err_p ) ;
31 #endif
32
33 /* -- local prototypes -- */
34 static int initialize_link( ERR_PARAM *, KW *** , NODE ) ;
35 static void classify_link( RULE_PARAM * , KW ***, KW *, NODE , SYMB , SYMB ) ;
36 static void add_failure_linkage( KW ***, NODE , NODE ) ;
37 static NODE **precompute_gamma_function( ERR_PARAM *, NODE ** , KW ***, NODE ) ;
38
39 static double load_value[ NUMBER_OF_WEIGHTS ] = {
40 0.00, 0.325, 0.35 , 0.375 , 0.4 ,
41 0.475 , 0.55, 0.6 , 0.65 , 0.675 ,
42 0.7 , 0.75 , 0.8 , 0.825 , 0.85 ,
43 0.9 , 0.95 , 1.00 } ;
44
45 /*---------------------------------------------------------------------------
46 gamma.c (refresh_transducer)
47 called by analyze.c (prepare_target_pattern)
48 The registry of matching keywords is regenerated with the use of the
49 precomputed Gamma function, Output Links and the current target.
50 ----------------------------------------------------------------------------*/
refresh_transducer(NODE * r,SYMB * S,NODE ** gamma_function)51 void refresh_transducer( NODE *r ,
52 SYMB *S ,
53 NODE **gamma_function ) {
54 NODE q ;
55 int i ;
56
57 i = 0 ;
58 q = r[ i ] = EPSILON ;
59 while ( S[ i ] != FAIL ) {
60 q = gamma_function[ q ][ S[ i ] ] ;
61 i++ ;
62 r[ i ] = q ;
63 }
64 }
65
66 /*---------------------------------------------------------------------------
67 gamma.c (is_input_symbol)
68 called by gamma.c (create_rules)
69 ----------------------------------------------------------------------------*/
is_input_symbol(SYMB sym)70 int is_input_symbol( SYMB sym ) {
71
72 if ( sym > MAXINSYM ||
73 sym < 0 )
74 return FALSE ;
75 return TRUE ;
76 }
77
78 /*---------------------------------------------------------------------------
79 gamma.c (is_output_symbol)
80 called by gamma.c (create_rules)
81 ----------------------------------------------------------------------------*/
is_output_symbol(SYMB sym)82 int is_output_symbol( SYMB sym ) {
83 if ( sym > MAXOUTSYM ||
84 sym < 0 )
85 return FALSE ;
86 return TRUE ;
87 }
88
89 #ifdef BUILD_API
90
91 /*
92 typedef struct RULES_s {
93 int ready;
94 int rule_number;
95 int last_node;
96 RULE_PARAM *r_p;
97 ERR_PARAM *err_p;
98 NODE **Trie;
99 SYMB *rule_end ;
100 SYMB *r ;
101 } RULES;
102 */
103
104 /*---------------------------------------------------------------------------
105 gamma.c (rules_init)
106 api interface to replace (create_rules)
107 ---------------------------------------------------------------------------*/
rules_init(ERR_PARAM * err_p)108 RULES *rules_init( ERR_PARAM *err_p ) {
109 RULES *rules;
110 /* -- returns size of Gamma Function Matrix -- */
111 SYMB a ;
112 KW *k_s ;
113 KW ***o_l ;
114 NODE **Trie ;
115 SYMB *r_s ;
116 RULE_PARAM *r_p ;
117
118
119 PAGC_CALLOC_STRUC(rules,RULES,1,err_p,NULL);
120 rules->err_p = err_p;
121 rules->ready = 0;
122 rules->rule_number = 0;
123 rules->last_node = EPSILON;
124
125 PAGC_ALLOC_STRUC(r_p,RULE_PARAM,err_p,NULL) ;
126 rules->r_p = r_p;
127
128 /* -- initialize the statistics record -- */
129 r_p -> collect_statistics = FALSE ;
130 r_p -> total_best_keys = 0 ;
131 r_p -> total_key_hits = 0 ;
132
133 /* -- storage for input and output records -- */
134 PAGC_CALLOC_STRUC(r_s,SYMB,RULESPACESIZE,err_p,NULL);
135
136 /* -- storage for temporary trie for rules -- */
137 PAGC_CALLOC_STRUC(Trie,NODE *,MAXNODES,err_p,NULL);
138
139 /* -- initialize the first( EPSILON ) node of the trie -- */
140 PAGC_CALLOC_STRUC(Trie[EPSILON],NODE,MAXINSYM,err_p,NULL);
141
142 for ( a = 0 ;
143 a < MAXINSYM ;
144 a++ ) {
145 Trie[ EPSILON ][ a ] = FAIL ;
146 }
147
148 /* -- storage for global output_link -- */
149 PAGC_CALLOC_STRUC(o_l,KW **,MAXNODES,err_p,NULL);
150 PAGC_CALLOC_STRUC(k_s,KW,MAXRULES,err_p,NULL);
151
152 if ( !initialize_link( err_p ,
153 o_l ,
154 EPSILON ) ) {
155
156 /* Cleanup allocated resources */
157 FREE_AND_NULL(o_l);
158 FREE_AND_NULL(k_s);
159 FREE_AND_NULL(r_p);
160
161 PAGC_DESTROY_2D_ARRAY(rules -> Trie,NODE,MAXINSYM);
162 rules -> Trie = NULL;
163
164 rules_free(rules);
165 FREE_AND_NULL(rules);
166
167 return NULL ;
168 }
169
170 rules -> r_p -> rule_space = r_s ;
171 rules -> r_p -> key_space = k_s ;
172 rules -> r_p -> output_link = o_l ;
173
174 rules -> Trie = Trie ;
175 rules -> rule_end = r_s + RULESPACESIZE ;
176
177 rules -> r = r_s ;
178
179 return rules;
180 }
181
182
rules_add_rule(RULES * rules,int num,int * rule)183 int rules_add_rule(RULES *rules, int num, int *rule) {
184 int i ,
185 w ;
186 SYMB a ,
187 t ;
188 SYMB *rule_start , *r ;
189 NODE u ;
190 NODE **Trie ;
191 KW *keyw ,
192 *k_s ;
193 KW ***o_l ;
194
195 if ( !rules ) return 1; /* error rules obj not initialized */
196 if ( !rules -> r_p ) return 2; /* RULE_PARAM not allocated */
197 if ( rules -> ready ) return 3; /* rules have already be readied */
198 if ( rules -> rule_number >= MAXRULES ) {
199 RET_ERR( "rules_add_rule: Too many rules are being added.",
200 rules -> err_p, 4);
201 }
202
203 /* get local copies of stuff saved in RULES */
204 o_l = rules -> r_p -> output_link ;
205 k_s = rules -> r_p -> key_space ;
206
207 Trie = rules -> Trie ;
208 r = rules -> r ;
209
210 keyw = k_s + rules -> rule_number ;
211 MEM_ERR(keyw, rules -> err_p, 5);
212
213 u = EPSILON ;
214 rule_start = r ; /* save rule start for inclusion in the record */
215 if ( rule_start > rules -> rule_end ) {
216 RET_ERR( "rules_add_rule: Too many rules for allocated memory.",
217 rules -> err_p, 5);
218 }
219
220 for (i=0; ; i++, r++ ) {
221 if (i >= num) {
222 RET_ERR( "rules_add_rule: invalid rule structure.",
223 rules -> err_p, 6);
224 }
225
226 *r = rule[i] ;
227 /* -- a fail at the beginning of a field indicates end of record
228 unless it's at the beginning of the record, in which case
229 it's the end of file -- */
230 if ( *r == FAIL ) {
231 if ( i == 0 ) return 0;
232 break;
233 }
234
235 /* -- check the input -- */
236 if ( !is_input_symbol( *r ) ) {
237 RET_ERR2( "rules_add_rule: Bad Input Token %d at rule %d",
238 *r,
239 rules -> rule_number ,
240 rules -> err_p,
241 7 ) ;
242 }
243
244 /* -- build the trie structure -- */
245 if ( Trie[ u ][ *r ] == FAIL ) {
246 if ( ++rules -> last_node >= MAXNODES ) {
247 RET_ERR( "rules_add_rule: Too many nodes in gamma function",
248 rules -> err_p,
249 8 ) ;
250 }
251 Trie[ u ][ *r ] = rules -> last_node ;
252 PAGC_CALLOC_STRUC(Trie[rules -> last_node],NODE,MAXINSYM,rules -> err_p,9) ;
253 for ( a = 0 ;
254 a < MAXINSYM ;
255 a++ ) {
256 Trie[ rules -> last_node ][ a ] = FAIL ;
257 }
258 if ( !initialize_link( rules -> err_p ,
259 o_l ,
260 rules -> last_node ) ) {
261 return 10;
262 }
263 }
264 u = Trie[ u ][ *r ] ;
265 } /* end of for loop */
266
267 keyw -> Input = rule_start ;
268 if ( ( keyw -> Length = i ) == 0 ) {
269 RET_ERR1( "rules_add_rule: Error 0 length rule #%d",
270 rules -> rule_number,
271 rules -> err_p,
272 11 ) ;
273 }
274
275 /* -- read the output tokens into the rule_space -- */
276 r++ ; /* -- move to beginning of the output tokens -- */
277 rule_start = r ; /* -- remember the beginning -- */
278 while ( TRUE ) {
279 i++;
280 if ( i >= num ) {
281 RET_ERR( "rules_add_rule: invalid rule structure.",
282 rules -> err_p, 6);
283 }
284 *r = rule[i] ;
285 if ( *r == FAIL ) break;
286 if ( !is_output_symbol( *r ) ) {
287 RET_ERR2( "rules_add_rule: Rule File: Non-Token %d in Rule #%d\n",
288 *r ,
289 rules -> rule_number,
290 rules -> err_p,
291 7 ) ;
292 }
293 r++ ;
294 }
295 keyw -> Output = rule_start ;
296
297 /* -- classify the output -- */
298 i++ ;
299 t = rule[i] ;
300 i++ ;
301 w = rule[i] ;
302
303 classify_link( rules -> r_p ,
304 o_l ,
305 keyw ,
306 u ,
307 w ,
308 t ) ;
309
310 rules -> rule_number++ ;
311 rules -> r = ++r ; ;
312 return 0;
313 }
314
315
rules_ready(RULES * rules)316 int rules_ready(RULES *rules) {
317 SYMB a;
318
319 if (!rules) return 1; /* error rules obj not initialized */
320 if (!rules->r_p) return 2; /* RULE_PARAM not allocated */
321 if (rules->ready) return 3; /* rules have already be readied */
322
323 rules -> r_p -> rules_read = rules->rule_number ;
324
325 if ( ++rules -> last_node >= MAXNODES ) {
326 RET_ERR( "rules_ready: Too many nodes in gamma function" ,
327 rules -> err_p, 4) ;
328 }
329
330 /* -- change the EPSILON node transitions in preparation for Gamma -- */
331 for ( a = 0 ;
332 a < MAXINSYM ;
333 a++ ) {
334 if ( rules -> Trie[ EPSILON ][ a ] == FAIL ) {
335 rules -> Trie[ EPSILON ][ a ] = EPSILON ;
336 }
337 }
338
339 /* -- create the global Gamma function matrix -- */
340 if ( ( rules -> r_p -> gamma_matrix =
341 precompute_gamma_function( rules -> err_p,
342 rules -> Trie ,
343 rules -> r_p -> output_link ,
344 rules -> last_node ) ) == NULL ) {
345 return 5 ;
346 }
347
348 /* -- no longer need the Trie -- */
349 PAGC_DESTROY_2D_ARRAY(rules -> Trie,NODE,rules -> last_node) ;
350 rules -> Trie = NULL ;
351
352 rules -> r_p -> num_nodes = rules -> last_node ;
353
354 /*
355 if ( glo_p -> log_init ) {
356 CLIENT_ERR( err_p ) ;
357 LOG_MESS2( "create_rules: Rules installed with %d nodes and %d rules",
358 rules -> last_node ,
359 rules->rule_number ,
360 err_p ) ;
361 }
362 */
363
364 rules -> ready = 1 ;
365
366 return 0;
367 }
368
rules_free(RULES * rules)369 void rules_free(RULES *rules) {
370
371 if (!rules) return;
372 if (rules->r_p) destroy_rules(rules->r_p);
373 free(rules);
374 rules = NULL;
375 }
376
377 #else
378
379 /*---------------------------------------------------------------------------
380 gamma.c (create_rules)
381 called by standard.l (init_stand_process)
382 calls util.c (open_aux_file)
383 calls gamma.c (initialize_link, is_input_symbol, is_output_symbol,
384 classify_link,precompute_gamma_function)
385 ----------------------------------------------------------------------------*/
create_rules(const char * rule_name,PAGC_GLOBAL * glo_p)386 RULE_PARAM *create_rules( const char *rule_name ,
387 PAGC_GLOBAL *glo_p ) {
388 /* -- returns size of Gamma Function Matrix -- */
389 SYMB a ,
390 t ;
391 NODE u ;
392 int i ,
393 w ;
394 int is_eof = FALSE ;
395 int rule_number = 0 ;
396 int last_node = EPSILON ;
397 FILE *rule_file ;
398 SYMB *rule_start ,
399 *rule_end ,
400 *r ;
401 KW *keyw , *k_s ;
402 KW ***o_l ;
403 NODE **Trie ;
404 SYMB *r_s ;
405 RULE_PARAM *r_p ;
406 ERR_PARAM *err_p ;
407
408 err_p = glo_p -> process_errors ;
409
410 PAGC_ALLOC_STRUC(r_p,RULE_PARAM,err_p,NULL) ;
411
412 /* -- initialize the statistics record -- */
413 r_p -> collect_statistics = FALSE ;
414 r_p -> total_best_keys = 0 ;
415 r_p -> total_key_hits = 0 ;
416
417
418 /* -- open the rule file, if possible -- */
419 if ( ( rule_file = open_aux_file( glo_p ,
420 rule_name ) ) == NULL ) {
421 return NULL ;
422 }
423 /* -- rule file has the format of i i ... i -1 o o ... o -1 t f -- */
424
425
426 /* -- storage for input and output records -- */
427 PAGC_CALLOC_STRUC(r_s,SYMB,RULESPACESIZE,err_p,NULL);
428
429 /* -- storage for temporary trie for rules -- */
430 PAGC_CALLOC_STRUC(Trie,NODE *,MAXNODES,err_p,NULL);
431
432 /* -- initialize the first( EPSILON ) node of the trie -- */
433 PAGC_CALLOC_STRUC(Trie[EPSILON],NODE,MAXINSYM,err_p,NULL);
434
435 for ( a = 0 ;
436 a < MAXINSYM ;
437 a++ ) {
438 Trie[ EPSILON ][ a ] = FAIL ;
439 }
440
441 /* -- storage for global output_link -- */
442 PAGC_CALLOC_STRUC(o_l,KW **,MAXNODES,err_p,NULL);
443 PAGC_CALLOC_STRUC(k_s,KW,MAXRULES,err_p,NULL);
444
445 rule_end = r_s + RULESPACESIZE ;
446 if ( !initialize_link( err_p ,
447 o_l ,
448 EPSILON ) ) {
449 return NULL ;
450 }
451 for ( r = r_s ;
452 !feof( rule_file ) ;
453 r++, rule_number++ ) {
454 if ( rule_number >= MAXRULES ) {
455 CLIENT_ERR( err_p ) ;
456 RET_ERR( "create_rules: Too many rules in file",
457 err_p,
458 NULL) ;
459 }
460 keyw = k_s + rule_number ;
461 MEM_ERR(keyw,err_p,NULL);
462 /* -- get input record -- */
463
464 u = EPSILON ;
465 rule_start = r ; /* -- save rule start for inclusion in record -- */
466 if ( rule_start > rule_end ) {
467 RET_ERR( "create_rules: Too many rules for allocated memory",
468 err_p,
469 NULL ) ;
470 }
471 for ( i = 0 ;
472 ;
473 i++, r++ ) {
474
475 /* -- read the first integer -- */
476 fscanf( rule_file,
477 "%d",
478 r ) ;
479 /* -- a fail at the beginning of a field indicates end of record
480 unless it's at the beginning of the record, in which case
481 it's the end of file -- */
482 if ( *r == FAIL ) {
483 if ( i == 0 ) {
484 is_eof = TRUE ;
485 }
486 break ;
487 }
488 /* -- check the input -- */
489 if ( !is_input_symbol( *r ) ) {
490 CLIENT_ERR( err_p ) ;
491 RET_ERR2( "create_rules: Rule file: Bad Input Token %d at rule %d",
492 *r,
493 rule_number ,
494 err_p,
495 NULL ) ;
496 }
497
498 /* -- build the trie structure -- */
499 if ( Trie[ u ][ *r ] == FAIL ) {
500 if ( ++last_node >= MAXNODES ) {
501 RET_ERR( "create_rules: Too many nodes in gamma function",
502 err_p,
503 NULL ) ;
504 }
505 Trie[ u ][ *r ] = last_node ;
506 PAGC_CALLOC_STRUC(Trie[last_node],NODE,MAXINSYM,err_p,NULL) ;
507 for ( a = 0 ;
508 a < MAXINSYM ;
509 a++ ) {
510 Trie[ last_node ][ a ] = FAIL ;
511 }
512 if ( !initialize_link( err_p ,
513 o_l ,
514 last_node ) ) {
515 return NULL ;
516 }
517 }
518 u = Trie[ u ][ *r ] ;
519 }
520 if ( is_eof )
521 break ;
522 keyw -> Input = rule_start ;
523 if ( ( keyw -> Length = i ) == 0 ) {
524 CLIENT_ERR( err_p ) ;
525 RET_ERR1( "create_rules: Error Rule File: 0 length rule #%d",
526 rule_number,
527 err_p,
528 NULL ) ;
529 }
530
531 /* -- read the output tokens into the rule_space -- */
532 r++ ; /* -- move to beginning of the output tokens -- */
533 rule_start = r ; /* -- remember the beginning -- */
534 while ( TRUE ) {
535 fscanf( rule_file,
536 "%d",
537 r ) ;
538 if ( *r == FAIL )
539 break ;
540 if ( !is_output_symbol( *r ) ) {
541 RET_ERR2( "create_rules: Rule File: Non-Token %d in Rule #%d\n",
542 *r ,
543 rule_number,
544 err_p,
545 NULL ) ;
546 }
547 r++ ;
548 }
549 keyw -> Output = rule_start ;
550
551 /* -- classify the output -- */
552 fscanf( rule_file ,
553 "%d" ,
554 &t ) ;
555 fscanf( rule_file ,
556 "%d" ,
557 &w ) ;
558
559 classify_link( r_p ,
560 o_l ,
561 keyw ,
562 u ,
563 w ,
564 t ) ;
565 } /* -- end of file read -- */
566
567
568 r_p -> rule_space = r_s ;
569 r_p -> key_space = k_s ;
570 r_p -> output_link = o_l ;
571 r_p -> rules_read = rule_number ;
572
573 fclose( rule_file ) ;
574
575
576 if ( ++last_node >= MAXNODES ) {
577 RET_ERR( "create_rules: Too many nodes in gamma function" ,
578 err_p,
579 NULL) ;
580 }
581 /* -- change the EPSILON node transitions in preparation for Gamma -- */
582 for ( a = 0 ;
583 a < MAXINSYM ;
584 a++ ) {
585 if ( Trie[ EPSILON ][ a ] == FAIL ) {
586 Trie[ EPSILON ][ a ] = EPSILON ;
587 }
588 }
589
590 /* -- create the global Gamma function matrix -- */
591 if ( ( r_p -> gamma_matrix = precompute_gamma_function( err_p,
592 Trie ,
593 o_l ,
594 last_node ) ) == NULL ) {
595 return NULL ;
596 }
597
598 /* -- no longer need the Trie -- */
599 PAGC_DESTROY_2D_ARRAY(Trie,NODE,last_node) ;
600
601
602 r_p -> num_nodes = last_node ;
603
604 if ( glo_p -> log_init ) {
605 CLIENT_ERR( err_p ) ;
606 LOG_MESS2( "create_rules: Rules installed with %d nodes and %d rules",
607 last_node ,
608 rule_number ,
609 err_p ) ;
610 }
611
612 return r_p ;
613 }
614
615 #endif
616
617 /*---------------------------------------------------------------------------
618 gamma.c (destroy_rules)
619 ----------------------------------------------------------------------------*/
destroy_rules(RULE_PARAM * r_p)620 void destroy_rules( RULE_PARAM * r_p ) {
621 if ( r_p != NULL ) {
622 DBG("destroy_rules 1");
623 FREE_AND_NULL( r_p -> rule_space ) ;
624 DBG("destroy_rules 2");
625 FREE_AND_NULL( r_p -> key_space ) ;
626 DBG("destroy_rules 3");
627 PAGC_DESTROY_2D_ARRAY(r_p->output_link,KW*,r_p->num_nodes) ;
628 DBG("destroy_rules 4");
629 PAGC_DESTROY_2D_ARRAY(r_p->gamma_matrix,NODE,r_p->num_nodes) ;
630 DBG(" destroy_rules 5");
631 FREE_AND_NULL( r_p ) ;
632 }
633 }
634
635 /* ========================= Output Links ========================= */
636
637 /*---------------------------------------------------------------------------
638 gamma.c (initalize_link)
639 called by gamma.c (create_rules)
640 ----------------------------------------------------------------------------*/
initialize_link(ERR_PARAM * err_p,KW *** o_l,NODE u)641 static int initialize_link( ERR_PARAM *err_p ,
642 KW ***o_l ,
643 NODE u ) {
644 int cl ;
645
646 /* -- classification by clause type -- */
647
648 PAGC_CALLOC_STRUC(o_l[u],KW *,MAX_CL,err_p,FALSE);
649 for ( cl = 0 ;
650 cl < MAX_CL ;
651 cl++ ) {
652
653 o_l[ u ][ cl ] = NULL ;
654 }
655 return TRUE ;
656 }
657
658 /*---------------------------------------------------------------------------
659 gamma.c (classify_link)
660 called by gamma.c (create_rules)
661 ----------------------------------------------------------------------------*/
classify_link(RULE_PARAM * r_p,KW *** o_l,KW * k,NODE u,SYMB w,SYMB c)662 static void classify_link( RULE_PARAM *r_p ,
663 KW ***o_l , /* -- 2006-11-02 : arg -- */
664 KW *k ,
665 NODE u ,
666 SYMB w ,
667 SYMB c ) {
668
669 /* -- classification by clause type -- */
670 KW * last_key ,
671 * penult ;
672
673 k -> hits = 0 ;
674 k -> best = 0 ;
675 k -> Type = c ;
676 k -> Weight = w ;
677 last_key = o_l[ u ][ c ] ; /* -- 2006-11-02 : arg -- */
678 if ( last_key == NULL ) {
679 o_l[ u ][ c ] = k ; /* -- 2006-11-02 : arg -- */
680
681 } else {
682 /* -- if the same input symbols are used... -- */
683 while ( ( penult = last_key -> OutputNext ) != NULL )
684 last_key = penult ;
685 last_key -> OutputNext = k ;
686 }
687 /* -- initialize in anticipation of failure extensions -- */
688 k -> OutputNext = NULL ;
689
690 }
691
692 /*---------------------------------------------------------------------------
693 gamma.c (add_failure_linkage)
694 called by gamma.c (precompute_gamma_function)
695 ----------------------------------------------------------------------------*/
add_failure_linkage(KW *** o_l,NODE x,NODE u)696 static void add_failure_linkage( KW ***o_l ,
697 NODE x ,
698 NODE u ) {
699 /* -- called by precompute_gamma_function
700 -- x is the node in the failure function of the node u
701 -- classification by clause type -- */
702 KW *k ,
703 *fk ;
704 int cl ;
705
706 for ( cl = 0 ;
707 cl < MAX_CL ;
708 cl++ ) {
709 /* -- append the failure keys for each class to the end of the
710 appropriate chain -- */
711 fk = o_l[ x ][ cl ] ;
712 k = o_l[ u ][ cl ] ;
713 if ( k == NULL ) {
714 o_l[ u ][ cl ] = fk ;
715 } else {
716 /* -- since the chain will be already null-terminated, we only find
717 the end of the chain if fk is non-null -- */
718 if ( fk != NULL ) {
719 /* -- append to the end of the list and make sure that the longer
720 lengths go first - this is probably redundant. -- */
721 while ( k -> OutputNext != NULL ) {
722 k = k -> OutputNext ;
723 }
724 k -> OutputNext = fk ;
725 }
726 }
727 }
728 }
729
730 /*---------------------------------------------------------------------------
731 gamma.c (precompute_gamma_function)
732 called by gamma.c (create_rules)
733 calls gamma.c (add_failure_linkage)
734 ----------------------------------------------------------------------------*/
precompute_gamma_function(ERR_PARAM * err_p,NODE ** Trie,KW *** o_l,NODE n)735 static NODE **precompute_gamma_function( ERR_PARAM *err_p ,
736 NODE **Trie ,
737 KW ***o_l ,
738 NODE n ) {
739 NODE u ,
740 ua ,
741 x ;
742 SYMB a ;
743 int i ,
744 j ;
745 NODE **Gamma ;
746 NODE *Failure ,
747 *Queue ;
748
749 /* -- Storage for Failure Function -- */
750 PAGC_CALLOC_STRUC(Failure,NODE,n,err_p,NULL) ;
751 /* -- Storage for Breadth First Search Queue -- */
752 PAGC_CALLOC_STRUC(Queue,NODE,n,err_p,NULL) ;
753
754 PAGC_CALLOC_2D_ARRAY(Gamma,NODE,n,MAXINSYM,err_p,NULL) ;
755
756 u = EPSILON ;
757 i = 0 ;
758 for ( a = 0 ;
759 a < MAXINSYM ;
760 a++ ) {
761 x = Trie[ EPSILON ][ a ] ;
762 Gamma[ EPSILON ][ a ] = x ;
763 Failure[ x ] = EPSILON ;
764 /* -- add to Queue for breadth-first search -- */
765 if ( x != EPSILON ) {
766 Queue[ i++ ] = x ;
767 }
768 }
769 Queue[ i ] = FAIL ; /* -- terminate the list of nodes to process -- */
770
771 for ( j = 0 ;
772 Queue[ j ] != FAIL ;
773 j++ ) {
774 u = Queue[ j ] ;
775 /* -- get non-Fail transitions from Trie onto queue -- */
776 for ( a = 0 ;
777 a < MAXINSYM ;
778 a++ ) {
779 if ( ( x = Trie[ u ][ a ] ) != FAIL ) {
780 Queue[ i++ ] = x ;
781 }
782 }
783 Queue[ i ] = FAIL ; /* -- mark end of list -- */
784 x = Failure[ u ] ;
785 add_failure_linkage( o_l ,
786 x ,
787 u ) ;
788 for ( a = 0 ;
789 a < MAXINSYM ;
790 a ++ ) {
791 ua = Trie[ u ][ a ] ;
792 if ( ua != FAIL ) {
793 Gamma[ u ][ a ] = ua ;
794 Failure[ ua ] = Gamma[ x ][ a ] ;
795 } else {
796 Gamma[ u ][ a ] = Gamma[ x ][ a ] ;
797 }
798 }
799 }
800 FREE_AND_NULL( Failure ) ;
801 FREE_AND_NULL( Queue ) ;
802 return Gamma ;
803 }
804
805
806
807 static const char *rule_type_names[] = {
808 "MACRO" , "MICRO" , "ARC" , "CIVIC" , "EXTRA"
809 } ;
810
811 /* =========================================
812 gamma.c (output_rule_statistics)
813 uses macro OPEN_ALLOCATED_NAME
814 stdio.h (printf,fprintf,fflush,fclose)
815 ===========================================*/
816 #ifdef BUILD_API
output_rule_statistics(RULE_PARAM * r_p,ERR_PARAM * err_p)817 int output_rule_statistics( RULE_PARAM *r_p, ERR_PARAM *err_p ) {
818 #else
819 int output_rule_statistics( RULE_PARAM *r_p ,
820 ERR_PARAM *err_p ,
821 char *name ,
822 DS_Handle _file_sys_p ) {
823 #endif
824 int i ,
825 found_count ,
826 n ;
827 SYMB *OL ;
828 char *sts_name = NULL ;
829 FILE *sts_file = NULL ;
830 KW * k ;
831 KW * k_s ;
832 double hit_frequency ,
833 best_frequency ;
834
835 if ( !r_p -> collect_statistics ) {
836 printf( "Statistics were not collected\n" ) ;
837 return FALSE ;
838 }
839
840 #ifndef BUILD_API
841 if ( name != NULL && name[ 0 ] != SENTINEL ) {
842 OPEN_ALLOCATED_NAME(sts_name,"sts",sts_file,name,"wb+",_file_sys_p,err_p,FALSE) ;
843 }
844 #endif
845
846 /* -- cycle through the keys -- */
847 n = r_p -> rules_read ;
848 k_s = r_p -> key_space ;
849 for ( i = 0 , found_count = 0 ;
850 i < n ;
851 i++ ) {
852 k = k_s + i ;
853 if ( k -> hits == 0 ) {
854 continue ;
855 }
856
857 found_count++ ;
858 if ( sts_file == NULL ) {
859 printf( "\nRule %d is of type %d (%s)\n: " ,
860 i ,
861 k -> Type ,
862 rule_type_names[ k -> Type ] ) ;
863 printf( "Input : " ) ;
864 } else {
865 fprintf( sts_file ,
866 "\nRule %d is of type %d (%s)\n: " ,
867 i ,
868 k -> Type ,
869 rule_type_names[ k -> Type ] ) ;
870 fprintf( sts_file ,
871 "Input : " ) ;
872 }
873 for ( OL = k -> Input ;
874 *OL != FAIL ;
875 OL++ ) {
876 if ( sts_file == NULL ) {
877 printf( "|%d (%s)|" ,
878 *OL ,
879 in_symb_name( *OL ) ) ;
880 } else {
881 fprintf( sts_file ,
882 "|%d (%s)|" ,
883 *OL ,
884 in_symb_name( *OL ) ) ;
885 }
886 }
887 if ( sts_file == NULL ) {
888 printf( "\nOutput: " ) ;
889
890 } else {
891 fprintf( sts_file ,
892 "\nOutput: " ) ;
893 }
894 /* -- output the output symbols -- */
895 for ( OL = k -> Output ;
896 *OL != FAIL ;
897 OL++ ) {
898 if ( sts_file == NULL ) {
899 printf( "|%d (%s)|" ,
900 *OL ,
901 out_symb_name( *OL ) ) ;
902 } else {
903 fprintf( sts_file ,
904 "|%d (%s)|" ,
905 *OL ,
906 out_symb_name( *OL ) ) ;
907 }
908 }
909 if ( sts_file == NULL ) {
910 printf ( "\nrank %d ( %f): hits %d out of %d\n" ,
911 k -> Weight ,
912 load_value[ k -> Weight ] ,
913 k->hits,
914 r_p -> total_key_hits ) ;
915 } else {
916 hit_frequency = ( ( double ) k -> hits ) / ( ( double ) r_p -> total_key_hits ) ;
917 best_frequency = ( ( double ) k -> best ) / ( ( double ) r_p -> total_best_keys ) ;
918 fprintf( sts_file ,
919 "\nrank %d ( %f): hit frequency: %f, best frequency: %f" ,
920 k -> Weight ,
921 load_value[ k -> Weight ] ,
922 hit_frequency ,
923 best_frequency ) ;
924 fprintf ( sts_file ,
925 "\n%d hits out of %d, best %d out of %d\n" ,
926 k->hits, r_p -> total_key_hits, k-> best, r_p -> total_best_keys ) ;
927 }
928 k -> hits = 0 ;
929 k -> best = 0 ;
930 }
931 if ( sts_file == NULL ) {
932 printf( "Found %d rules hit\n" , found_count ) ;
933 } else {
934 fprintf( sts_file ,
935 "Found %d rules hit\n" ,
936 found_count ) ;
937 }
938 /* -- start over -- */
939 r_p -> total_key_hits = 0 ;
940 r_p -> total_best_keys = 0 ;
941 if ( sts_file != NULL ) {
942 fflush( sts_file ) ;
943 fclose( sts_file ) ;
944 FREE_AND_NULL( sts_name ) ;
945 } else {
946 fflush( stdout ) ;
947 }
948 return TRUE ;
949 }
950
951