1 /* -- gamma.c
2 
3 This file reads the rules file into memory and sets up the rule
4 lookup structures. These are based on the optimized Aho-Corasick
5 algorithms in Watson (1994).
6 
7 Copyright (c) 2008 Walter Bruce Sinclair
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 
13 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
14 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 
16 */
17 /* For pagc-0.4.0 : last revised 2010-11-01 */
18 
19 #undef DEBUG
20 //#define DEBUG
21 
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <stddef.h>
25 #include "pagc_api.h"
26 #include "gamma.h"
27 
28 #ifdef BUILD_API
29 #include "pagc_std_api.h"
30 RULES *rules_init( ERR_PARAM *err_p ) ;
31 #endif
32 
33 /* -- local prototypes -- */
34 static int initialize_link( ERR_PARAM *, KW *** , NODE ) ;
35 static void classify_link( RULE_PARAM * , KW ***, KW *, NODE , SYMB , SYMB  ) ;
36 static void add_failure_linkage( KW ***, NODE , NODE  ) ;
37 static NODE **precompute_gamma_function( ERR_PARAM *, NODE ** , KW ***, NODE  ) ;
38 
39 static double load_value[ NUMBER_OF_WEIGHTS ] = {
40    0.00, 0.325, 0.35 , 0.375 , 0.4 ,
41    0.475 , 0.55, 0.6 , 0.65 , 0.675 ,
42    0.7 , 0.75 , 0.8 , 0.825 , 0.85 ,
43    0.9 , 0.95 , 1.00 } ;
44 
45 /*---------------------------------------------------------------------------
46 gamma.c (refresh_transducer)
47 called by analyze.c (prepare_target_pattern)
48 The registry of matching keywords is regenerated with the use of the
49 precomputed Gamma function, Output Links and the current target.
50 ----------------------------------------------------------------------------*/
refresh_transducer(NODE * r,SYMB * S,NODE ** gamma_function)51 void refresh_transducer( NODE *r ,
52                          SYMB *S ,
53                          NODE **gamma_function ) {
54    NODE q ;
55    int i ;
56 
57    i = 0 ;
58    q = r[ i ] = EPSILON ;
59    while ( S[ i ] != FAIL ) {
60       q = gamma_function[ q ][ S[ i ] ] ;
61       i++ ;
62       r[ i ] = q ;
63    }
64 }
65 
66 /*---------------------------------------------------------------------------
67 gamma.c (is_input_symbol)
68 called by gamma.c (create_rules)
69 ----------------------------------------------------------------------------*/
is_input_symbol(SYMB sym)70 int is_input_symbol( SYMB sym ) {
71 
72    if ( sym > MAXINSYM ||
73         sym < 0 )
74       return FALSE ;
75    return TRUE ;
76 }
77 
78 /*---------------------------------------------------------------------------
79 gamma.c (is_output_symbol)
80 called by gamma.c (create_rules)
81 ----------------------------------------------------------------------------*/
is_output_symbol(SYMB sym)82 int is_output_symbol( SYMB sym ) {
83    if ( sym > MAXOUTSYM ||
84         sym < 0 )
85       return FALSE ;
86    return TRUE ;
87 }
88 
89 #ifdef BUILD_API
90 
91 /*
92 typedef struct RULES_s {
93     int ready;
94     int rule_number;
95     int last_node;
96     RULE_PARAM *r_p;
97     ERR_PARAM *err_p;
98     NODE **Trie;
99     SYMB *rule_end ;
100     SYMB *r ;
101 } RULES;
102 */
103 
104 /*---------------------------------------------------------------------------
105 gamma.c (rules_init)
106 api interface to replace (create_rules)
107 ---------------------------------------------------------------------------*/
rules_init(ERR_PARAM * err_p)108 RULES *rules_init( ERR_PARAM *err_p ) {
109     RULES *rules;
110     /* -- returns size of Gamma Function Matrix -- */
111     SYMB a ;
112     KW *k_s ;
113     KW ***o_l ;
114     NODE **Trie ;
115     SYMB *r_s ;
116     RULE_PARAM *r_p ;
117 
118 
119     PAGC_CALLOC_STRUC(rules,RULES,1,err_p,NULL);
120     rules->err_p = err_p;
121     rules->ready = 0;
122     rules->rule_number = 0;
123     rules->last_node = EPSILON;
124 
125     PAGC_ALLOC_STRUC(r_p,RULE_PARAM,err_p,NULL) ;
126     rules->r_p = r_p;
127 
128     /* -- initialize the statistics record -- */
129     r_p -> collect_statistics = FALSE ;
130     r_p -> total_best_keys = 0 ;
131     r_p -> total_key_hits = 0 ;
132 
133     /* -- storage for input and output records -- */
134     PAGC_CALLOC_STRUC(r_s,SYMB,RULESPACESIZE,err_p,NULL);
135 
136     /* -- storage for temporary trie for rules -- */
137     PAGC_CALLOC_STRUC(Trie,NODE *,MAXNODES,err_p,NULL);
138 
139     /* -- initialize the first( EPSILON ) node of the trie -- */
140     PAGC_CALLOC_STRUC(Trie[EPSILON],NODE,MAXINSYM,err_p,NULL);
141 
142     for ( a = 0 ;
143           a < MAXINSYM ;
144           a++ ) {
145        Trie[ EPSILON ][ a ] = FAIL ;
146     }
147 
148     /* -- storage for global output_link -- */
149     PAGC_CALLOC_STRUC(o_l,KW **,MAXNODES,err_p,NULL);
150     PAGC_CALLOC_STRUC(k_s,KW,MAXRULES,err_p,NULL);
151 
152     if ( !initialize_link( err_p ,
153                            o_l ,
154                            EPSILON ) ) {
155 
156        /* Cleanup allocated resources */
157        FREE_AND_NULL(o_l);
158        FREE_AND_NULL(k_s);
159        FREE_AND_NULL(r_p);
160 
161        PAGC_DESTROY_2D_ARRAY(rules -> Trie,NODE,MAXINSYM);
162        rules -> Trie = NULL;
163 
164        rules_free(rules);
165        FREE_AND_NULL(rules);
166 
167        return NULL ;
168     }
169 
170     rules -> r_p -> rule_space = r_s ;
171     rules -> r_p -> key_space = k_s ;
172     rules -> r_p -> output_link = o_l ;
173 
174     rules -> Trie = Trie ;
175     rules -> rule_end = r_s + RULESPACESIZE ;
176 
177     rules -> r = r_s ;
178 
179     return rules;
180 }
181 
182 
rules_add_rule(RULES * rules,int num,int * rule)183 int rules_add_rule(RULES *rules, int num, int *rule) {
184     int i ,
185         w ;
186     SYMB a ,
187          t ;
188     SYMB *rule_start , *r ;
189     NODE u ;
190     NODE **Trie ;
191     KW *keyw ,
192        *k_s ;
193     KW ***o_l ;
194 
195     if ( !rules ) return 1;       /* error rules obj not initialized */
196     if ( !rules -> r_p ) return 2;  /* RULE_PARAM not allocated */
197     if ( rules -> ready ) return 3; /* rules have already be readied */
198     if ( rules -> rule_number >= MAXRULES ) {
199         RET_ERR( "rules_add_rule: Too many rules are being added.",
200                  rules -> err_p, 4);
201     }
202 
203     /* get local copies of stuff saved in RULES */
204     o_l = rules -> r_p -> output_link ;
205     k_s = rules -> r_p -> key_space ;
206 
207     Trie = rules -> Trie ;
208     r = rules -> r ;
209 
210     keyw = k_s + rules -> rule_number ;
211     MEM_ERR(keyw, rules -> err_p, 5);
212 
213     u = EPSILON ;
214     rule_start = r ; /* save rule start for inclusion in the record */
215     if ( rule_start > rules -> rule_end ) {
216         RET_ERR( "rules_add_rule: Too many rules for allocated memory.",
217                  rules -> err_p, 5);
218     }
219 
220     for (i=0; ; i++, r++ ) {
221         if (i >= num) {
222             RET_ERR( "rules_add_rule: invalid rule structure.",
223                      rules -> err_p, 6);
224         }
225 
226         *r = rule[i] ;
227         /* -- a fail at the beginning of a field indicates end of record
228            unless it's at the beginning of the record, in which case
229            it's the end of file -- */
230         if ( *r == FAIL ) {
231             if ( i == 0 ) return 0;
232             break;
233         }
234 
235         /* -- check the input -- */
236         if ( !is_input_symbol( *r ) ) {
237             RET_ERR2( "rules_add_rule: Bad Input Token %d at rule %d",
238                       *r,
239                       rules -> rule_number ,
240                       rules -> err_p,
241                       7 ) ;
242         }
243 
244         /* -- build the trie structure -- */
245         if ( Trie[ u ][ *r ] == FAIL ) {
246             if ( ++rules -> last_node >= MAXNODES ) {
247                 RET_ERR( "rules_add_rule: Too many nodes in gamma function",
248                         rules -> err_p,
249                         8 ) ;
250             }
251             Trie[ u ][ *r ] = rules -> last_node ;
252             PAGC_CALLOC_STRUC(Trie[rules -> last_node],NODE,MAXINSYM,rules -> err_p,9) ;
253             for ( a = 0 ;
254                   a < MAXINSYM ;
255                   a++ ) {
256                 Trie[ rules -> last_node ][ a ] = FAIL ;
257             }
258             if ( !initialize_link( rules -> err_p ,
259                                    o_l ,
260                                    rules -> last_node ) ) {
261                 return 10;
262             }
263         }
264         u = Trie[ u ][ *r ] ;
265     } /* end of for loop */
266 
267     keyw -> Input = rule_start ;
268     if ( ( keyw -> Length = i ) == 0 ) {
269         RET_ERR1( "rules_add_rule: Error 0 length rule #%d",
270                   rules -> rule_number,
271                   rules -> err_p,
272                   11 ) ;
273     }
274 
275     /* -- read the output tokens into the rule_space -- */
276     r++ ; /* -- move to beginning of the output tokens -- */
277     rule_start = r ; /* -- remember the beginning -- */
278     while ( TRUE ) {
279         i++;
280         if ( i >= num ) {
281             RET_ERR( "rules_add_rule: invalid rule structure.",
282                      rules -> err_p, 6);
283         }
284         *r = rule[i] ;
285         if ( *r == FAIL ) break;
286         if ( !is_output_symbol( *r ) ) {
287             RET_ERR2( "rules_add_rule: Rule File: Non-Token %d in Rule #%d\n",
288                       *r ,
289                       rules -> rule_number,
290                       rules -> err_p,
291                       7 ) ;
292         }
293         r++ ;
294     }
295     keyw -> Output = rule_start ;
296 
297     /* -- classify the output -- */
298     i++ ;
299     t = rule[i] ;
300     i++ ;
301     w = rule[i] ;
302 
303     classify_link( rules -> r_p ,
304                    o_l ,
305                    keyw ,
306                    u ,
307                    w ,
308                    t ) ;
309 
310     rules -> rule_number++ ;
311     rules -> r = ++r ; ;
312     return 0;
313 }
314 
315 
rules_ready(RULES * rules)316 int rules_ready(RULES *rules) {
317     SYMB a;
318 
319     if (!rules) return 1;       /* error rules obj not initialized */
320     if (!rules->r_p) return 2;  /* RULE_PARAM not allocated */
321     if (rules->ready) return 3; /* rules have already be readied */
322 
323     rules -> r_p -> rules_read = rules->rule_number ;
324 
325     if ( ++rules -> last_node >= MAXNODES ) {
326         RET_ERR( "rules_ready: Too many nodes in gamma function" ,
327                  rules -> err_p, 4) ;
328     }
329 
330     /* -- change the EPSILON node transitions in preparation for Gamma -- */
331     for ( a = 0 ;
332           a < MAXINSYM ;
333           a++ ) {
334        if ( rules -> Trie[ EPSILON ][ a ] == FAIL ) {
335           rules -> Trie[ EPSILON ][ a ] = EPSILON ;
336        }
337     }
338 
339     /* -- create the global Gamma function matrix -- */
340     if ( ( rules -> r_p -> gamma_matrix =
341             precompute_gamma_function( rules -> err_p,
342                                        rules -> Trie ,
343                                        rules -> r_p -> output_link ,
344                                        rules -> last_node ) ) == NULL ) {
345        return 5 ;
346     }
347 
348     /* -- no longer need the Trie -- */
349     PAGC_DESTROY_2D_ARRAY(rules -> Trie,NODE,rules -> last_node) ;
350     rules -> Trie = NULL ;
351 
352     rules -> r_p -> num_nodes = rules -> last_node ;
353 
354 /*
355     if ( glo_p -> log_init ) {
356        CLIENT_ERR( err_p ) ;
357        LOG_MESS2( "create_rules: Rules installed with %d nodes and %d rules",
358                   rules -> last_node ,
359                   rules->rule_number ,
360                   err_p ) ;
361     }
362 */
363 
364     rules -> ready = 1 ;
365 
366     return 0;
367 }
368 
rules_free(RULES * rules)369 void rules_free(RULES *rules) {
370 
371     if (!rules) return;
372     if (rules->r_p) destroy_rules(rules->r_p);
373     free(rules);
374     rules = NULL;
375 }
376 
377 #else
378 
379 /*---------------------------------------------------------------------------
380 gamma.c (create_rules)
381 called by standard.l (init_stand_process)
382 calls util.c (open_aux_file)
383 calls gamma.c (initialize_link, is_input_symbol, is_output_symbol,
384 classify_link,precompute_gamma_function)
385 ----------------------------------------------------------------------------*/
create_rules(const char * rule_name,PAGC_GLOBAL * glo_p)386 RULE_PARAM *create_rules( const char *rule_name ,
387                           PAGC_GLOBAL *glo_p ) {
388    /* -- returns size of Gamma Function Matrix -- */
389    SYMB a ,
390         t ;
391    NODE u ;
392    int i ,
393        w ;
394    int is_eof = FALSE ;
395    int rule_number = 0 ;
396    int last_node = EPSILON ;
397    FILE *rule_file ;
398    SYMB *rule_start ,
399         *rule_end ,
400         *r ;
401    KW *keyw , *k_s ;
402    KW ***o_l ;
403    NODE **Trie ;
404    SYMB *r_s ;
405    RULE_PARAM *r_p ;
406    ERR_PARAM *err_p ;
407 
408    err_p = glo_p -> process_errors ;
409 
410    PAGC_ALLOC_STRUC(r_p,RULE_PARAM,err_p,NULL) ;
411 
412    /* -- initialize the statistics record -- */
413    r_p -> collect_statistics = FALSE ;
414    r_p -> total_best_keys = 0 ;
415    r_p -> total_key_hits = 0 ;
416 
417 
418    /* -- open the rule file, if possible -- */
419    if ( ( rule_file = open_aux_file( glo_p ,
420                                      rule_name ) ) == NULL ) {
421       return NULL ;
422    }
423    /* -- rule file has the format of i i ... i -1 o o ... o -1 t f -- */
424 
425 
426    /* -- storage for input and output records -- */
427    PAGC_CALLOC_STRUC(r_s,SYMB,RULESPACESIZE,err_p,NULL);
428 
429    /* -- storage for temporary trie for rules -- */
430    PAGC_CALLOC_STRUC(Trie,NODE *,MAXNODES,err_p,NULL);
431 
432    /* -- initialize the first( EPSILON ) node of the trie -- */
433    PAGC_CALLOC_STRUC(Trie[EPSILON],NODE,MAXINSYM,err_p,NULL);
434 
435    for ( a = 0 ;
436          a < MAXINSYM ;
437          a++ ) {
438       Trie[ EPSILON ][ a ] = FAIL ;
439    }
440 
441    /* -- storage for global output_link -- */
442    PAGC_CALLOC_STRUC(o_l,KW **,MAXNODES,err_p,NULL);
443    PAGC_CALLOC_STRUC(k_s,KW,MAXRULES,err_p,NULL);
444 
445    rule_end = r_s + RULESPACESIZE ;
446    if ( !initialize_link( err_p ,
447                           o_l ,
448                           EPSILON ) ) {
449       return NULL ;
450    }
451    for ( r = r_s ;
452          !feof( rule_file ) ;
453          r++, rule_number++ ) {
454       if ( rule_number >= MAXRULES ) {
455          CLIENT_ERR( err_p ) ;
456          RET_ERR( "create_rules: Too many rules in file",
457                   err_p,
458                   NULL) ;
459       }
460       keyw = k_s + rule_number ;
461       MEM_ERR(keyw,err_p,NULL);
462       /* -- get input record -- */
463 
464       u = EPSILON ;
465       rule_start = r ; /* -- save rule start for inclusion in record -- */
466       if ( rule_start > rule_end ) {
467          RET_ERR( "create_rules: Too many rules for allocated memory",
468                   err_p,
469                   NULL ) ;
470       }
471       for ( i = 0 ;
472             ;
473             i++, r++  ) {
474 
475          /* -- read the first integer -- */
476          fscanf( rule_file,
477                  "%d",
478                  r ) ;
479          /* -- a fail at the beginning of a field indicates end of record
480             unless it's at the beginning of the record, in which case
481             it's the end of file -- */
482          if ( *r == FAIL ) {
483             if ( i == 0 ) {
484                is_eof = TRUE ;
485             }
486             break ;
487          }
488          /* -- check the input -- */
489          if ( !is_input_symbol( *r ) ) {
490             CLIENT_ERR( err_p ) ;
491             RET_ERR2( "create_rules: Rule file: Bad Input Token %d at rule %d",
492                       *r,
493                       rule_number ,
494                       err_p,
495                       NULL ) ;
496          }
497 
498          /* -- build the trie structure -- */
499          if ( Trie[ u ][ *r ] == FAIL ) {
500             if ( ++last_node >= MAXNODES ) {
501                RET_ERR( "create_rules: Too many nodes in gamma function",
502                         err_p,
503                         NULL ) ;
504             }
505             Trie[ u ][ *r ] = last_node ;
506             PAGC_CALLOC_STRUC(Trie[last_node],NODE,MAXINSYM,err_p,NULL) ;
507             for ( a = 0 ;
508                   a < MAXINSYM ;
509                   a++ ) {
510                Trie[ last_node ][ a ] = FAIL ;
511             }
512             if ( !initialize_link( err_p ,
513                                    o_l ,
514                                    last_node ) ) {
515                return NULL ;
516             }
517          }
518          u = Trie[ u ][ *r ] ;
519       }
520       if ( is_eof )
521          break ;
522       keyw -> Input = rule_start ;
523       if ( ( keyw -> Length = i ) == 0 ) {
524          CLIENT_ERR( err_p ) ;
525          RET_ERR1( "create_rules: Error Rule File: 0 length rule #%d",
526                    rule_number,
527                    err_p,
528                    NULL ) ;
529       }
530 
531       /* -- read the output tokens into the rule_space -- */
532       r++ ; /* -- move to beginning of the output tokens -- */
533       rule_start = r ; /* -- remember the beginning -- */
534       while ( TRUE ) {
535          fscanf( rule_file,
536                  "%d",
537                  r ) ;
538          if ( *r == FAIL )
539             break ;
540          if ( !is_output_symbol( *r ) ) {
541             RET_ERR2( "create_rules: Rule File: Non-Token %d in Rule #%d\n",
542                       *r ,
543                       rule_number,
544                       err_p,
545                       NULL ) ;
546          }
547          r++ ;
548       }
549       keyw -> Output = rule_start ;
550 
551       /* -- classify the output -- */
552       fscanf( rule_file ,
553               "%d" ,
554               &t ) ;
555       fscanf( rule_file ,
556               "%d" ,
557               &w ) ;
558 
559       classify_link( r_p ,
560                      o_l ,
561                      keyw ,
562                      u ,
563                      w ,
564                      t ) ;
565    } /* -- end of file read -- */
566 
567 
568    r_p -> rule_space = r_s ;
569    r_p -> key_space = k_s ;
570    r_p -> output_link = o_l ;
571    r_p -> rules_read = rule_number ;
572 
573    fclose( rule_file ) ;
574 
575 
576    if ( ++last_node >= MAXNODES ) {
577       RET_ERR( "create_rules: Too many nodes in gamma function" ,
578                err_p,
579                NULL) ;
580    }
581    /* -- change the EPSILON node transitions in preparation for Gamma -- */
582    for ( a = 0 ;
583          a < MAXINSYM ;
584          a++ ) {
585       if ( Trie[ EPSILON ][ a ] == FAIL ) {
586          Trie[ EPSILON ][ a ] = EPSILON ;
587       }
588    }
589 
590    /* -- create the global Gamma function matrix -- */
591    if ( ( r_p -> gamma_matrix = precompute_gamma_function( err_p,
592                                                            Trie ,
593                                                            o_l ,
594                                                            last_node ) ) == NULL ) {
595       return NULL ;
596    }
597 
598    /* -- no longer need the Trie -- */
599    PAGC_DESTROY_2D_ARRAY(Trie,NODE,last_node) ;
600 
601 
602    r_p -> num_nodes = last_node ;
603 
604    if ( glo_p -> log_init ) {
605       CLIENT_ERR( err_p ) ;
606       LOG_MESS2( "create_rules: Rules installed with %d nodes and %d rules",
607                  last_node ,
608                  rule_number ,
609                  err_p ) ;
610    }
611 
612    return r_p ;
613 }
614 
615 #endif
616 
617 /*---------------------------------------------------------------------------
618 gamma.c (destroy_rules)
619 ----------------------------------------------------------------------------*/
destroy_rules(RULE_PARAM * r_p)620 void destroy_rules( RULE_PARAM * r_p ) {
621    if ( r_p != NULL ) {
622       DBG("destroy_rules 1");
623       FREE_AND_NULL( r_p -> rule_space ) ;
624       DBG("destroy_rules 2");
625       FREE_AND_NULL( r_p -> key_space ) ;
626       DBG("destroy_rules 3");
627       PAGC_DESTROY_2D_ARRAY(r_p->output_link,KW*,r_p->num_nodes) ;
628       DBG("destroy_rules 4");
629       PAGC_DESTROY_2D_ARRAY(r_p->gamma_matrix,NODE,r_p->num_nodes) ;
630       DBG(" destroy_rules 5");
631       FREE_AND_NULL( r_p ) ;
632    }
633 }
634 
635 /* ========================= Output Links ========================= */
636 
637 /*---------------------------------------------------------------------------
638 gamma.c (initalize_link)
639 called by gamma.c (create_rules)
640 ----------------------------------------------------------------------------*/
initialize_link(ERR_PARAM * err_p,KW *** o_l,NODE u)641 static int initialize_link( ERR_PARAM *err_p ,
642                             KW ***o_l ,
643                             NODE u ) {
644    int cl ;
645 
646    /* -- classification by clause type -- */
647 
648    PAGC_CALLOC_STRUC(o_l[u],KW *,MAX_CL,err_p,FALSE);
649    for ( cl = 0 ;
650          cl < MAX_CL ;
651          cl++ ) {
652 
653       o_l[ u ][ cl ] = NULL ;
654    }
655    return TRUE ;
656 }
657 
658 /*---------------------------------------------------------------------------
659 gamma.c (classify_link)
660 called by gamma.c (create_rules)
661 ----------------------------------------------------------------------------*/
classify_link(RULE_PARAM * r_p,KW *** o_l,KW * k,NODE u,SYMB w,SYMB c)662 static void classify_link( RULE_PARAM *r_p ,
663                            KW ***o_l , /* -- 2006-11-02 : arg -- */
664                            KW *k ,
665                            NODE u ,
666                            SYMB w ,
667                            SYMB c ) {
668 
669    /* -- classification by clause type -- */
670    KW * last_key ,
671       * penult ;
672 
673    k -> hits = 0 ;
674    k -> best = 0 ;
675    k -> Type = c ;
676    k -> Weight = w ;
677    last_key = o_l[ u ][ c ] ; /* -- 2006-11-02 : arg -- */
678    if ( last_key == NULL ) {
679       o_l[ u ][ c ] = k ; /* -- 2006-11-02 : arg -- */
680 
681    } else {
682       /* -- if the same input symbols are used... -- */
683       while ( ( penult = last_key -> OutputNext ) != NULL )
684           last_key = penult ;
685       last_key -> OutputNext = k ;
686    }
687    /* -- initialize in anticipation of failure extensions -- */
688    k -> OutputNext = NULL ;
689 
690 }
691 
692 /*---------------------------------------------------------------------------
693 gamma.c (add_failure_linkage)
694 called by gamma.c (precompute_gamma_function)
695 ----------------------------------------------------------------------------*/
add_failure_linkage(KW *** o_l,NODE x,NODE u)696 static void add_failure_linkage( KW ***o_l ,
697                                  NODE x ,
698                                  NODE u ) {
699    /* -- called by precompute_gamma_function
700       -- x is the node in the failure function of the node u
701       -- classification by clause type -- */
702    KW *k ,
703       *fk ;
704    int cl ;
705 
706    for ( cl = 0 ;
707          cl < MAX_CL ;
708          cl++ ) {
709       /* -- append the failure keys for each class to the end of the
710          appropriate chain -- */
711       fk = o_l[ x ][ cl ] ;
712       k = o_l[ u ][ cl ] ;
713       if ( k == NULL ) {
714          o_l[ u ][ cl ] = fk ;
715       } else {
716          /* -- since the chain will be already null-terminated, we only find
717             the end of the chain if fk is non-null -- */
718          if ( fk != NULL ) {
719             /* -- append to the end of the list and make sure that the longer
720                lengths go first - this is probably redundant. -- */
721             while ( k -> OutputNext != NULL ) {
722                k = k -> OutputNext ;
723             }
724             k -> OutputNext = fk ;
725          }
726       }
727    }
728 }
729 
730 /*---------------------------------------------------------------------------
731 gamma.c (precompute_gamma_function)
732 called by gamma.c (create_rules)
733 calls gamma.c (add_failure_linkage)
734 ----------------------------------------------------------------------------*/
precompute_gamma_function(ERR_PARAM * err_p,NODE ** Trie,KW *** o_l,NODE n)735 static NODE **precompute_gamma_function( ERR_PARAM *err_p ,
736                                          NODE **Trie ,
737                                          KW ***o_l ,
738                                          NODE n ) {
739    NODE u ,
740         ua ,
741         x ;
742    SYMB a ;
743    int i ,
744        j ;
745    NODE **Gamma ;
746    NODE *Failure ,
747         *Queue ;
748 
749    /* -- Storage for Failure Function -- */
750    PAGC_CALLOC_STRUC(Failure,NODE,n,err_p,NULL) ;
751    /* -- Storage for Breadth First Search Queue -- */
752    PAGC_CALLOC_STRUC(Queue,NODE,n,err_p,NULL) ;
753 
754    PAGC_CALLOC_2D_ARRAY(Gamma,NODE,n,MAXINSYM,err_p,NULL) ;
755 
756    u = EPSILON ;
757    i = 0 ;
758    for ( a = 0 ;
759          a < MAXINSYM ;
760          a++ ) {
761       x = Trie[ EPSILON ][ a ] ;
762       Gamma[ EPSILON ][ a ] = x ;
763       Failure[ x ] = EPSILON ;
764       /* -- add to Queue for breadth-first search -- */
765       if ( x != EPSILON ) {
766          Queue[ i++ ] = x ;
767       }
768    }
769    Queue[ i ] = FAIL ; /* -- terminate the list of nodes to process -- */
770 
771    for ( j = 0 ;
772          Queue[ j ] != FAIL ;
773          j++ ) {
774       u = Queue[ j ] ;
775       /* -- get non-Fail transitions from Trie onto queue -- */
776       for ( a = 0 ;
777             a < MAXINSYM ;
778             a++ ) {
779          if ( ( x = Trie[ u ][ a ] ) != FAIL ) {
780            Queue[ i++ ] = x ;
781          }
782       }
783       Queue[ i ] = FAIL ; /* -- mark end of list -- */
784       x = Failure[ u ] ;
785       add_failure_linkage( o_l ,
786                            x ,
787                            u ) ;
788       for ( a = 0 ;
789             a < MAXINSYM ;
790             a ++ ) {
791          ua = Trie[ u ][ a ] ;
792          if ( ua != FAIL ) {
793             Gamma[ u ][ a ] = ua ;
794             Failure[ ua ] = Gamma[ x ][ a ] ;
795          } else {
796             Gamma[ u ][ a ] = Gamma[ x ][ a ] ;
797          }
798       }
799    }
800    FREE_AND_NULL( Failure ) ;
801    FREE_AND_NULL( Queue ) ;
802    return Gamma ;
803 }
804 
805 
806 
807 static const char *rule_type_names[] = {
808    "MACRO" , "MICRO" , "ARC" , "CIVIC" , "EXTRA"
809 } ;
810 
811 /* =========================================
812 gamma.c (output_rule_statistics)
813 uses macro OPEN_ALLOCATED_NAME
814 stdio.h (printf,fprintf,fflush,fclose)
815 ===========================================*/
816 #ifdef BUILD_API
output_rule_statistics(RULE_PARAM * r_p,ERR_PARAM * err_p)817 int output_rule_statistics( RULE_PARAM *r_p, ERR_PARAM *err_p ) {
818 #else
819 int output_rule_statistics( RULE_PARAM *r_p ,
820                             ERR_PARAM *err_p ,
821                             char *name ,
822                             DS_Handle _file_sys_p ) {
823 #endif
824    int i ,
825        found_count ,
826        n ;
827    SYMB *OL ;
828    char *sts_name = NULL ;
829    FILE *sts_file = NULL ;
830    KW * k ;
831    KW * k_s ;
832    double hit_frequency ,
833           best_frequency ;
834 
835    if ( !r_p -> collect_statistics ) {
836       printf( "Statistics were not collected\n" ) ;
837       return FALSE ;
838    }
839 
840 #ifndef BUILD_API
841    if ( name != NULL && name[ 0 ] != SENTINEL ) {
842       OPEN_ALLOCATED_NAME(sts_name,"sts",sts_file,name,"wb+",_file_sys_p,err_p,FALSE) ;
843    }
844 #endif
845 
846    /* -- cycle through the keys -- */
847    n = r_p -> rules_read ;
848    k_s = r_p -> key_space ;
849    for ( i = 0 , found_count = 0 ;
850          i < n ;
851          i++ ) {
852       k = k_s + i ;
853       if ( k -> hits == 0 ) {
854          continue ;
855       }
856 
857       found_count++ ;
858       if ( sts_file == NULL ) {
859          printf( "\nRule %d is of type %d (%s)\n: " ,
860                  i ,
861                  k -> Type ,
862                  rule_type_names[ k -> Type ] ) ;
863          printf( "Input : " ) ;
864       } else {
865          fprintf( sts_file ,
866                   "\nRule %d is of type %d (%s)\n: " ,
867                   i ,
868                   k -> Type ,
869                   rule_type_names[ k -> Type ]  ) ;
870          fprintf( sts_file ,
871                   "Input : " ) ;
872       }
873       for ( OL = k -> Input ;
874             *OL != FAIL ;
875             OL++ ) {
876          if ( sts_file == NULL ) {
877              printf( "|%d (%s)|" ,
878                      *OL ,
879                      in_symb_name( *OL ) ) ;
880          } else {
881              fprintf( sts_file ,
882                       "|%d (%s)|" ,
883                       *OL ,
884                       in_symb_name( *OL ) ) ;
885          }
886       }
887       if ( sts_file == NULL ) {
888          printf( "\nOutput: " ) ;
889 
890       } else {
891          fprintf( sts_file ,
892                   "\nOutput: " ) ;
893       }
894       /* -- output the output symbols -- */
895       for ( OL = k -> Output ;
896             *OL != FAIL ;
897             OL++ ) {
898          if ( sts_file == NULL ) {
899             printf( "|%d (%s)|" ,
900                     *OL ,
901                     out_symb_name( *OL ) ) ;
902          } else {
903             fprintf( sts_file ,
904                      "|%d (%s)|" ,
905                      *OL ,
906                      out_symb_name( *OL ) ) ;
907          }
908       }
909       if ( sts_file == NULL ) {
910          printf ( "\nrank %d ( %f): hits %d out of %d\n" ,
911                   k -> Weight ,
912                   load_value[ k -> Weight ] ,
913                   k->hits,
914                   r_p -> total_key_hits ) ;
915       } else {
916          hit_frequency = ( ( double ) k -> hits ) / ( ( double ) r_p -> total_key_hits ) ;
917          best_frequency = ( ( double )  k -> best ) / ( ( double ) r_p -> total_best_keys ) ;
918          fprintf( sts_file ,
919                   "\nrank %d ( %f): hit frequency: %f, best frequency: %f" ,
920                   k -> Weight ,
921                   load_value[ k -> Weight ] ,
922                   hit_frequency ,
923                   best_frequency ) ;
924          fprintf ( sts_file ,
925                    "\n%d hits out of %d, best %d out of %d\n" ,
926                    k->hits, r_p -> total_key_hits, k-> best, r_p -> total_best_keys ) ;
927       }
928       k -> hits = 0 ;
929       k -> best = 0 ;
930    }
931    if ( sts_file == NULL ) {
932       printf( "Found %d rules hit\n" , found_count ) ;
933    } else {
934       fprintf( sts_file ,
935                "Found %d rules hit\n" ,
936                found_count ) ;
937    }
938    /* -- start over -- */
939    r_p -> total_key_hits = 0 ;
940    r_p -> total_best_keys = 0 ;
941    if ( sts_file != NULL ) {
942       fflush( sts_file ) ;
943       fclose( sts_file ) ;
944       FREE_AND_NULL( sts_name ) ;
945    } else {
946       fflush( stdout ) ;
947    }
948    return TRUE ;
949 }
950 
951