1 /*
2  * ahocorasick.c: implementation of ahocorasick library's functions
3  * This file is part of multifast.
4  *
5  Copyright 2010-2012 Kamiar Kanani <kamiar.kanani@gmail.com>
6  Copyright 2012-21   ntop.org (Incremental improvements)
7 
8  multifast is free software: you can redistribute it and/or modify
9  it under the terms of the GNU Lesser General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  multifast is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  GNU Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public License
19  along with multifast.  If not, see <http://www.gnu.org/licenses/>.
20 */
21 
22 #ifndef __KERNEL__
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #ifndef WIN32
28 #include <unistd.h>
29 #else
30 #define __SIZEOF_LONG__ 4
31 #endif
32 #include <stdint.h>
33 #include <sys/types.h>
34 #else
35 #include <asm/byteorder.h>
36 #include <linux/kernel.h>
37 #include <linux/types.h>
38 typedef __kernel_size_t size_t;
39 #include <linux/string.h>
40 #include <linux/slab.h>
41 #endif
42 
43 #include "ndpi_api.h"
44 #include "ahocorasick.h"
45 
46 /* TODO: For different depth of node, number of outgoing edges differs
47    considerably, It is efficient to use different chunk size for
48    different depths */
49 
50 /* Private function prototype */
51 static int  node_edge_compare (struct edge * e, int a, int b);
52 static int  node_has_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * newstr);
53 
54 static AC_NODE_t * node_create            (void);
55 static AC_NODE_t * node_create_next       (AC_NODE_t * thiz, AC_ALPHABET_t alpha);
56 static int         node_register_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * str, int is_existing);
57 static int         node_register_outgoing (AC_NODE_t * thiz, AC_NODE_t * next, AC_ALPHABET_t alpha);
58 static AC_NODE_t * node_find_next         (AC_NODE_t * thiz, AC_ALPHABET_t alpha);
59 static AC_NODE_t * node_findbs_next       (AC_NODE_t * thiz, uint8_t alpha);
60 static AC_NODE_t * node_findbs_next_ac    (AC_NODE_t * thiz, uint8_t alpha,int icase);
61 static void        node_release           (AC_NODE_t * thiz, int free_pattern);
62 static void        node_release_pattern   (AC_NODE_t * thiz);
63 static int         node_range_edges       (AC_AUTOMATA_t *thiz, AC_NODE_t * node);
64 static inline void node_sort_edges        (AC_NODE_t * thiz);
65 
66 #ifndef __KERNEL__
67 struct aho_dump_info {
68   size_t memcnt,node_oc,node_8c,node_xc,node_xr;
69   int    buf_pos,ip;
70   char   *bufstr;
71   size_t bufstr_len;
72   FILE   *file;
73 };
74 
75 static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *);
76 static int ac_automata_global_debug = 0;
77 #endif
78 
79 /* Private function prototype */
80 static int ac_automata_union_matchstrs (AC_NODE_t * node);
81 static void ac_automata_set_failure
82         (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_NODE_t * next, int idx, void *);
83 static void ac_automata_traverse_setfailure
84         (AC_AUTOMATA_t * thiz);
85 
edge_get_alpha(struct edge * e)86 static inline AC_ALPHABET_t *edge_get_alpha(struct edge *e) {
87         return (AC_ALPHABET_t *)(&e->next[e->max]);
88 }
edge_data_size(int num)89 static inline size_t edge_data_size(int num) {
90         return sizeof(void *)*num + ((num + sizeof(void *) - 1) & ~(sizeof(void *)-1));
91 }
92 
93 #ifdef __KERNEL__
acho_calloc(size_t nmemb,size_t size)94 static inline void *acho_calloc(size_t nmemb, size_t size) {
95     return kcalloc(nmemb, size, GFP_ATOMIC);
96 }
acho_malloc(size_t size)97 static inline void *acho_malloc(size_t size) {
98     return kmalloc(size, GFP_ATOMIC);
99 }
acho_free(void * old)100 static inline void acho_free(void *old) {
101     return kfree(old);
102 }
103 #else
104 
105 #define acho_calloc(a,b) ndpi_calloc(a,b)
106 #define acho_malloc(a) ndpi_malloc(a)
107 #define acho_free(a) ndpi_free(a)
108 #endif
109 
110 static void acho_sort(struct edge *e, size_t num,
111       int (*cmp_func)(struct edge *e, int a, int b),
112       void (*swap_func)(struct edge *e, int a, int b));
113 
114 /* tolower() from glibc */
115 static uint8_t aho_lc[256] = {
116   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
117  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
118  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
119  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
120  64, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
121 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',  91,  92,  93,  94,  95,
122  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
123 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
124 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
125 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
126 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
127 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
128 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
129 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
130 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
131 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
132 };
133 
134 /* toupper() from glibc */
135 static uint8_t aho_xc[256] = {
136   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
137   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
138   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
139   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
140   0,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
141  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,   0,   0,   0,   0,   0,
142   0,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
143  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,   0,   0,   0,   0,   0,
144   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
145   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
146   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
147   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
148   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
149   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
150   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
151   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0
152 };
153 
154 /******************************************************************************
155  * FUNCTION: ac_automata_init
156  * Initialize automata; allocate memories and set initial values
157  * PARAMS:
158  * MATCH_CALLBACK mc: call-back function
159  * the call-back function will be used to reach the caller on match occurrence
160  ******************************************************************************/
ac_automata_init(MATCH_CALLBACK_f mc)161 AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc)
162 {
163   AC_AUTOMATA_t * thiz;
164 //  if(!mc) return NULL;
165   thiz = (AC_AUTOMATA_t *)acho_calloc(1,sizeof(AC_AUTOMATA_t));
166   if(!thiz) return NULL;
167   thiz->root = node_create ();
168   if(!thiz->root) {
169       acho_free(thiz);
170       return NULL;
171   }
172   thiz->root->id = 1;
173   thiz->root->root = 1;
174   thiz->total_patterns = 0;
175   thiz->automata_open = 1;
176   thiz->match_handler = mc;
177   thiz->to_lc = 0;
178   thiz->no_root_range = 0;
179   thiz->add_to_range = REALLOC_CHUNK_OUTGOING*2;
180   return thiz;
181 }
182 /******************************************************************************
183  * FUNCTION: ac_automata_casecmp
184  * Case-insensitive comparison mode
185  * PARAMS:
186  * AC_AUTOMATA_t * thiz: the pointer to the automata
187  * lc: 1 for case-insensitive comparison mode
188  * RETUERN VALUE: AC_ERROR_t
189  * the return value indicates the success or failure of changes
190  ******************************************************************************/
ac_automata_feature(AC_AUTOMATA_t * thiz,unsigned int feature)191 AC_ERROR_t ac_automata_feature (AC_AUTOMATA_t * thiz, unsigned int feature)
192 {
193   if(!thiz) return ACERR_ERROR;
194   if(thiz->all_nodes_num || thiz->total_patterns) return ACERR_ERROR;
195   thiz->to_lc = (feature & AC_FEATURE_LC) != 0;
196   thiz->no_root_range = (feature & AC_FEATURE_NO_ROOT_RANGE) != 0;
197   return ACERR_SUCCESS;
198 }
199 
ac_automata_name(AC_AUTOMATA_t * thiz,char * name,int debug)200 AC_ERROR_t ac_automata_name (AC_AUTOMATA_t * thiz, char *name, int debug)
201 {
202   if(!thiz) return ACERR_ERROR;
203   strncpy(thiz->name,name,sizeof(thiz->name)-1);
204   thiz->debug = debug != 0;
205   return ACERR_SUCCESS;
206 }
207 
208 #ifndef __KERNEL__
ac_automata_enable_debug(int debug)209 void ac_automata_enable_debug (int debug) {
210     ac_automata_global_debug = debug != 0;
211 }
212 #endif
213 
214 /******************************************************************************
215  * FUNCTION: ac_automata_add
216  * Adds pattern to the automata.
217  * PARAMS:
218  * AC_AUTOMATA_t * thiz: the pointer to the automata
219  * AC_PATTERN_t * patt: the pointer to added pattern
220  * RETUERN VALUE: AC_ERROR_t
221  * the return value indicates the success or failure of adding action
222  ******************************************************************************/
ac_automata_add(AC_AUTOMATA_t * thiz,AC_PATTERN_t * patt)223 AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * patt)
224 {
225   unsigned int i;
226   AC_NODE_t * n = thiz->root;
227   AC_NODE_t * next;
228   AC_ALPHABET_t alpha;
229 
230   if(!thiz->automata_open)
231     return ACERR_AUTOMATA_CLOSED;
232 
233   if (!patt->length)
234     return ACERR_ZERO_PATTERN;
235 
236   if (patt->length > AC_PATTRN_MAX_LENGTH)
237     return ACERR_LONG_PATTERN;
238 
239   for (i=0; i<patt->length; i++) {
240       alpha = patt->astring[i];
241       if(thiz->to_lc)
242            alpha = (AC_ALPHABET_t)aho_lc[(uint8_t)alpha];
243 
244       if((next = node_find_next(n, alpha)) != 0) {
245           n = next;
246           continue;
247       }
248       if(!(next = node_create_next(n, alpha)))
249               return ACERR_ERROR;
250       next->id = ++thiz->id;
251       thiz->all_nodes_num++;
252       n = next;
253     }
254   if(thiz->max_str_len < patt->length)
255      thiz->max_str_len = patt->length;
256 
257   if(n->final) {
258     patt->rep.number = n->matched_patterns->patterns[0].rep.number;
259     return ACERR_DUPLICATE_PATTERN;
260   }
261 
262   if(node_register_matchstr(n, patt, 0))
263       return ACERR_ERROR;
264 
265   thiz->total_patterns++;
266 
267   return ACERR_SUCCESS;
268 }
269 
ac_automata_walk(AC_AUTOMATA_t * thiz,NODE_CALLBACK_f node_cb,ALPHA_CALLBACK_f alpha_cb,void * data)270 AC_ERROR_t ac_automata_walk(AC_AUTOMATA_t * thiz,
271         NODE_CALLBACK_f node_cb, ALPHA_CALLBACK_f alpha_cb, void *data)
272 {
273   unsigned int ip;
274   AC_NODE_t *next, *n;
275   struct ac_path * path = thiz->ac_path;
276   AC_ERROR_t r;
277 
278   ip = 1;
279   path[1].n = thiz->root;
280   path[1].idx = 0;
281 
282   while(ip) {
283     unsigned int i,last;
284     n = path[ip].n;
285     i = path[ip].idx;
286     last = !n->outgoing || (n->one && i > 0) || (!n->one && i >= n->outgoing->degree);
287     if(node_cb && (!i || last)) {
288             r = node_cb(thiz, n, i, data);
289             if(r != ACERR_SUCCESS) return r;
290     }
291     if(last) {
292         ip--; continue;
293     }
294     next = NULL;
295     if(n->one) {
296         next = (AC_NODE_t *)n->outgoing;
297     } else {
298         while(i < n->outgoing->degree) {
299             next = n->outgoing->next[i];
300             if(next) break;
301             i++;
302         }
303     }
304     if(!next) {
305         if(!n->range || i >= n->outgoing->degree) {
306             r = node_cb ? node_cb(thiz, n, i, data):ACERR_SUCCESS;
307             if(r != ACERR_SUCCESS) return r;
308         }
309         ip--; continue;
310     }
311 
312     if(n->depth < AC_PATTRN_MAX_LENGTH) {
313             path[n->depth].l = n->one ? n->one_alpha:
314                                     edge_get_alpha(n->outgoing)[i];
315             if(alpha_cb)
316                 alpha_cb(thiz, n, next, i, data);
317     }
318 
319     path[ip].idx = i+1;
320     if(ip >= AC_PATTRN_MAX_LENGTH)
321         continue;
322 
323     ip++;
324 
325     path[ip].n = next;
326     path[ip].idx = 0;
327 
328   }
329   return ACERR_SUCCESS;
330 }
331 
332 
ac_finalize_node(AC_AUTOMATA_t * thiz,AC_NODE_t * n,int idx,void * data)333 static AC_ERROR_t ac_finalize_node(AC_AUTOMATA_t * thiz,AC_NODE_t * n, int idx, void *data) {
334     if(!n->ff) {
335         n->id = ++(thiz->id);
336         n->ff = 1;
337         if(ac_automata_union_matchstrs (n))
338             return ACERR_ERROR;
339         if(n->use) {
340             if(!n->one) {
341                 if(node_range_edges (thiz,n)) {
342                     node_sort_edges (n);
343                     thiz->n_range++;
344                 } else
345                     thiz->n_find++;
346             } else
347                 thiz->n_oc++;
348         }
349     }
350     if(!n->a_ptr && n->outgoing && !n->one) {
351         n->a_ptr = edge_get_alpha(n->outgoing);
352     }
353     return ACERR_SUCCESS;
354 }
355 
356 
357 /******************************************************************************
358  * FUNCTION: ac_automata_finalize
359  * Locate the failure node for all nodes and collect all matched pattern for
360  * every node. it also sorts outgoing edges of node, so binary search could be
361  * performed on them. after calling this function the automate literally will
362  * be finalized and you can not add new patterns to the automate.
363  * PARAMS:
364  * AC_AUTOMATA_t * thiz: the pointer to the automata
365  ******************************************************************************/
366 
ac_automata_finalize(AC_AUTOMATA_t * thiz)367 AC_ERROR_t ac_automata_finalize (AC_AUTOMATA_t * thiz) {
368 
369     AC_ERROR_t r = ACERR_SUCCESS;
370     if(!thiz->automata_open) return r;
371 
372     ac_automata_traverse_setfailure (thiz);
373     thiz->id=0;
374     thiz->n_oc = 0;
375     thiz->n_range = 0;
376     thiz->n_find = 0;
377     r = ac_automata_walk(thiz,ac_finalize_node,NULL,NULL);
378     if(r == ACERR_SUCCESS)
379         thiz->automata_open = 0;
380     return r;
381 }
382 
ac_automata_exact_match(AC_PATTERNS_t * mp,int pos,AC_TEXT_t * txt)383 int ac_automata_exact_match(AC_PATTERNS_t *mp,int pos, AC_TEXT_t *txt) {
384     AC_PATTERN_t *patterns = mp->patterns;
385     AC_PATTERN_t **matched = txt->match.matched;
386     int i;
387     int match_map = 0;
388     for(i=0; i < mp->num && i < (__SIZEOF_INT__*8-1); i++,patterns++) {
389       do {
390         if(patterns->rep.from_start && patterns->rep.at_end) {
391             if(pos == txt->length && patterns->length == pos)
392                 matched[0] = patterns, match_map |= 1 << i;
393             break;
394         }
395         if(patterns->rep.from_start) {
396             if(patterns->length == pos)
397                 matched[1] = patterns, match_map |= 1 << i;
398             break;
399         }
400         if(patterns->rep.at_end) {
401             if(pos == txt->length)
402                 matched[2] = patterns, match_map |= 1 << i;
403             break;
404         }
405         matched[3] = patterns, match_map |= 1 << i;
406       } while(0);
407     }
408     return match_map;
409 }
410 
411 /******************************************************************************
412  * FUNCTION: ac_automata_search
413  * Search in the input text using the given automata. on match event it will
414  * call the call-back function. and the call-back function in turn after doing
415  * its job, will return an integer value to ac_automata_search(). 0 value means
416  * continue search, and non-0 value means stop search and return to the caller.
417  * PARAMS:
418  * AC_AUTOMATA_t * thiz: the pointer to the automata
419  * AC_TEXT_t * txt: the input text that must be searched
420  * void * param: this parameter will be send to call-back function. it is
421  * useful for sending parameter to call-back function from caller function.
422  * RETURN VALUE:
423  * -1: failed call; automata is not finalized
424  *  0: success; continue searching; call-back sent me a 0 value
425  *  1: success; stop searching; call-back sent me a non-0 value
426  ******************************************************************************/
ac_automata_search(AC_AUTOMATA_t * thiz,AC_TEXT_t * txt,AC_REP_t * param)427 int ac_automata_search (AC_AUTOMATA_t * thiz,
428         AC_TEXT_t * txt, AC_REP_t * param)
429 {
430   unsigned long position;
431   int icase = 0,i,debug=0;
432   AC_MATCH_t *match;
433   AC_NODE_t *curr;
434   AC_NODE_t *next;
435   AC_ALPHABET_t *apos;
436 
437   if(thiz->automata_open)
438     /* you must call ac_automata_locate_failure() first */
439     return -1;
440   position = 0;
441   curr = thiz->root;
442   apos = txt->astring;
443 #ifndef __KERNEL__
444   if(thiz->debug && ac_automata_global_debug) debug = 1;
445   if(debug) {
446       txt->option = debug;  /* for callback */
447       printf("aho %s: search %.*s\n", thiz->name[0] ? thiz->name:"unknown", txt->length, apos);
448   }
449 #endif
450   match = &txt->match;
451   memset((char*)match,0,sizeof(*match));
452 
453   /* The 'txt->ignore_case' option is checked
454    * separately otherwise clang will detect
455    * uninitialized memory usage much later. */
456   if(txt->option & AC_FEATURE_LC) icase = 1;
457   /* This is the main search loop.
458    * it must be keep as lightweight as possible. */
459   while (position < txt->length) {
460       uint8_t alpha = (uint8_t)apos[position];
461       if(thiz->to_lc) alpha = aho_lc[alpha];
462       if(!(next = node_findbs_next_ac(curr, (uint8_t)alpha, icase))) {
463           if(curr->failure_node) /* we are not in the root node */
464             curr = curr->failure_node;
465           else
466             position++;
467       } else {
468           curr = next;
469           position++;
470           if(curr->final) {
471               /* select best match */
472               match->match_map = ac_automata_exact_match(curr->matched_patterns,position,txt);
473               if(match->match_map) {
474                   match->match_counter++; /* we have a matching */
475 #ifndef __KERNEL__
476                   if(debug) {
477                       int i;
478                       AC_PATTERN_t *patterns = curr->matched_patterns->patterns;
479                       for(i=0; i < curr->matched_patterns->num; i++) {
480                           if(!(match->match_map & (1 << i))) continue;
481                           printf("  match%d: %c%.*s%c [%u]\n",i+1,
482                               patterns[i].rep.from_start ? '^':' ',
483                               patterns[i].length,patterns[i].astring,
484                               patterns[i].rep.at_end ? '$':' ',
485                               patterns[i].rep.number);
486                       }
487                   }
488 #endif
489                   if(thiz->match_handler) {
490                       /* We check 'next' to find out if we came here after a alphabet
491                        * transition or due to a fail. in second case we should not report
492                        * matching because it was reported in previous node */
493                       match->position = position;
494                       match->match_num = curr->matched_patterns->num;
495                       match->patterns = curr->matched_patterns->patterns;
496                       if (thiz->match_handler(match, txt, param))
497                           return 1;
498                   }
499               } /* match->match_map */
500           }
501       }
502   }
503   if(thiz->match_handler)
504     return match->match_counter > 0 ? 1:0;
505 
506   for(i = 0; i < 4; i++)
507       if(txt->match.matched[i]) {
508             *param = (txt->match.matched[i])->rep;
509 #ifndef __KERNEL__
510             if(debug) {
511                 AC_PATTERN_t *pattern = txt->match.matched[i];
512                 printf("best match: %c%.*s%c [%u]\n",
513                           pattern->rep.from_start ? '^':' ',
514                           pattern->length,pattern->astring,
515                           pattern->rep.at_end ? '$':' ',
516                           pattern->rep.number);
517             }
518 #endif
519             return 1;
520       }
521   return 0;
522 }
523 
524 /******************************************************************************
525  * FUNCTION: ac_automata_release
526  * Release all allocated memories to the automata
527  * PARAMS:
528  * AC_AUTOMATA_t * thiz: the pointer to the automata
529  * free_pattern:
530  *  0 - free all struct w/o free pattern
531  *  1 - free all struct and pattern
532  *  2 - clean struct w/o free pattern
533  ******************************************************************************/
534 
ac_automata_release_node(AC_AUTOMATA_t * thiz,AC_NODE_t * n,int idx,void * data)535 static AC_ERROR_t ac_automata_release_node(AC_AUTOMATA_t * thiz,
536         AC_NODE_t *n, int idx, void *data) {
537 
538     if(!n->outgoing || idx) {
539         if(n->outgoing) {
540           if(n->one) thiz->n_oc--;
541             else if(n->range) thiz->n_range--;
542                   else thiz->n_find--;
543         }
544         node_release(n,data != NULL);
545     }
546 
547     return ACERR_SUCCESS;
548 }
ac_automata_release(AC_AUTOMATA_t * thiz,uint8_t free_pattern)549 void ac_automata_release (AC_AUTOMATA_t * thiz, uint8_t free_pattern) {
550 
551     ac_automata_walk(thiz,ac_automata_release_node,NULL,free_pattern ? (void *)1:NULL);
552 
553     if(free_pattern <= 1) {
554         node_release(thiz->root,free_pattern | 0x4);
555         thiz->root = NULL;
556         acho_free(thiz);
557     } else {
558         AC_NODE_t *n;
559         thiz->all_nodes_num  = 0;
560         thiz->total_patterns = 0;
561         thiz->max_str_len    = 0;
562         thiz->automata_open  = 1;
563 
564         n = thiz->root;
565         n->failure_node = NULL;
566         n->id    = 0;
567         n->final = 0;
568         n->depth = 0;
569         if(n->outgoing) {
570             acho_free(n->outgoing);
571             n->outgoing = NULL;
572         }
573         if(n->matched_patterns) {
574             acho_free(n->matched_patterns);
575             n->matched_patterns=NULL;
576         }
577         n->use = 0;
578         n->one = 0;
579     }
580 }
581 
582 #ifndef __KERNEL__
583 
dump_node_header(AC_NODE_t * n,struct aho_dump_info * ai)584 static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *ai) {
585     char *c;
586     int i;
587     fprintf(ai->file,"%04d: ",n->id);
588     if(n->failure_node) fprintf(ai->file," failure %04d:",n->failure_node->id);
589     fprintf(ai->file," d:%d %c",n->depth, n->use ? '+':'-');
590     ai->memcnt += sizeof(*n);
591     if(n->matched_patterns) {
592         ai->memcnt += sizeof(n->matched_patterns) +
593                 n->matched_patterns->max*sizeof(n->matched_patterns->patterns[0]);
594     }
595     if(!n->use) { fprintf(ai->file,"\n"); return; }
596     if(n->one) {
597             (ai->node_oc)++;
598             fprintf(ai->file," '%c' next->%d\n",n->one_alpha,
599                 n->outgoing ? ((AC_NODE_t *)n->outgoing)->id : -1);
600             return;
601     }
602     if(!n->outgoing) {
603             fprintf(ai->file," BUG! !outgoing\n");
604             return;
605     }
606     fprintf(ai->file,"%s\n",n->range ? " RANGE":"");
607     c = (char *)edge_get_alpha(n->outgoing);
608     if(n->outgoing->degree <= 8)
609             (ai->node_8c)++;
610        else
611             (ai->node_xc)++;
612     if(n->range)
613             (ai->node_xr)++;
614     for(i=0; i < n->outgoing->degree; i++) {
615             fprintf(ai->file,"  %d: \"%c\" -> %d\n",i,c[i],
616                     n->outgoing->next[i] ? n->outgoing->next[i]->id:-1);
617     }
618     ai->memcnt += sizeof(n->outgoing) + edge_data_size(n->outgoing->max);
619 }
620 
dump_node_common(AC_AUTOMATA_t * thiz,AC_NODE_t * n,int idx,void * data)621 static AC_ERROR_t dump_node_common(AC_AUTOMATA_t * thiz,
622         AC_NODE_t * n, int idx, void *data) {
623     struct aho_dump_info *ai = (struct aho_dump_info *)data;
624     char *rstr = ai->bufstr;
625 
626     if(idx) return ACERR_SUCCESS;
627     dump_node_header(n,ai);
628     if (n->matched_patterns && n->matched_patterns->num && n->final) {
629         char lbuf[512];
630         int nl = 0,j;
631 
632         nl = snprintf(lbuf,sizeof(lbuf),"'%.100s' N:%d{",rstr,n->matched_patterns->num);
633         for (j=0; j<n->matched_patterns->num; j++) {
634             AC_PATTERN_t *sid = &n->matched_patterns->patterns[j];
635             if(j) nl += snprintf(&lbuf[nl],sizeof(lbuf)-nl-1,", ");
636             nl += snprintf(&lbuf[nl],sizeof(lbuf)-nl-1,"%d %c%.100s%c",
637                             sid->rep.number & 0x3fff,
638                             sid->rep.number & 0x8000 ? '^':' ',
639                             sid->astring,
640                             sid->rep.number & 0x4000 ? '$':' ');
641         }
642         fprintf(ai->file,"%s}\n",lbuf);
643       }
644     return ACERR_SUCCESS;
645 }
dump_node_str(AC_AUTOMATA_t * thiz,AC_NODE_t * node,AC_NODE_t * next,int idx,void * data)646 static void dump_node_str(AC_AUTOMATA_t * thiz, AC_NODE_t * node,
647         AC_NODE_t * next, int idx, void *data) {
648     struct aho_dump_info *ai = (struct aho_dump_info *)data;
649     ai->bufstr[node->depth] = thiz->ac_path[node->depth].l;
650     ai->bufstr[node->depth+1] = 0;
651 }
652 
653 /******************************************************************************
654  * FUNCTION: ac_automata_dump
655  * Prints the automata to output in human readable form. it is useful for
656  * debugging purpose.
657  * PARAMS:
658  * AC_AUTOMATA_t * thiz: the pointer to the automata
659  * rstr: char[] buffer
660  * rstr_size: size of rstr buffser
661  * char repcast: 'n': print AC_REP_t as number, 's': print AC_REP_t as string
662  ******************************************************************************/
663 
ac_automata_dump(AC_AUTOMATA_t * thiz,FILE * file)664 void ac_automata_dump(AC_AUTOMATA_t * thiz, FILE *file) {
665   struct aho_dump_info ai;
666 
667   memset((char *)&ai,0,sizeof(ai));
668   ai.file = file ? file : stdout;
669   fprintf(ai.file,"---DUMP- all nodes %u - max strlen %u -%s---\n",
670           (unsigned int)thiz->all_nodes_num,
671           (unsigned int)thiz->max_str_len,
672           thiz->automata_open ? "open":"ready");
673 
674   ai.bufstr = acho_malloc(AC_PATTRN_MAX_LENGTH+1);
675   ai.bufstr_len = AC_PATTRN_MAX_LENGTH;
676   if(!ai.bufstr) return;
677   ai.bufstr[0] = '\0';
678 
679   ac_automata_walk(thiz,dump_node_common,dump_node_str,(void *)&ai);
680   fprintf(ai.file,"---\n mem size %zu avg node size %d, node one char %d, <=8c %d, >8c %d, range %d\n---DUMP-END-\n",
681               ai.memcnt,(int)ai.memcnt/(thiz->all_nodes_num+1),(int)ai.node_oc,(int)ai.node_8c,(int)ai.node_xc,(int)ai.node_xr);
682 }
683 #endif
684 
685 /******************************************************************************
686  * FUNCTION: ac_automata_union_matchstrs
687  * Collect accepted patterns of the node. the accepted patterns consist of the
688  * node's own accepted pattern plus accepted patterns of its failure node.
689  ******************************************************************************/
ac_automata_union_matchstrs(AC_NODE_t * node)690 static int ac_automata_union_matchstrs (AC_NODE_t * node)
691 {
692   unsigned int i;
693   AC_NODE_t * m;
694 
695   for (m = node; m; m = m->failure_node) {
696       if(!m->matched_patterns) continue;
697 
698       for (i=0; i < m->matched_patterns->num; i++)
699         if(node_register_matchstr(node, &(m->matched_patterns->patterns[i]), 1))
700         return 1;
701 
702       if (m->final)
703         node->final = 1;
704     }
705   return 0;
706 }
707 
708 /******************************************************************************
709  * FUNCTION: ac_automata_set_failure
710  * find failure node for the given node.
711  ******************************************************************************/
ac_automata_set_failure(AC_AUTOMATA_t * thiz,AC_NODE_t * node,AC_NODE_t * next,int idx,void * data)712 static void ac_automata_set_failure
713 (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_NODE_t * next, int idx, void *data)
714 {
715   unsigned int i, j;
716   AC_NODE_t * m;
717   struct ac_path * path = thiz->ac_path;
718 
719   for (i=1; i < next->depth; i++) {
720         m = thiz->root;
721         for (j=i; j < next->depth && m; j++) {
722             m = node_find_next (m, path[j].l);
723         }
724         if (m) {
725           next->failure_node = m;
726           break;
727         }
728   }
729   if (!next->failure_node)
730     next->failure_node = thiz->root;
731 }
732 
733 /******************************************************************************
734  * FUNCTION: ac_automata_traverse_setfailure
735  * Traverse all automata nodes using DFS (Depth First Search), meanwhile it set
736  * the failure node for every node it passes through. this function must be
737  * called after adding last pattern to automata. i.e. after calling this you
738  * can not add further pattern to automata.
739  ******************************************************************************/
ac_automata_traverse_setfailure(AC_AUTOMATA_t * thiz)740 static inline void ac_automata_traverse_setfailure (AC_AUTOMATA_t * thiz)
741 {
742     ac_automata_walk(thiz,NULL,ac_automata_set_failure,NULL);
743 }
744 
745 /******************************************************************************
746  * FUNCTION: node_create
747  * Create the node
748  ******************************************************************************/
node_create(void)749 static inline AC_NODE_t * node_create(void)
750 {
751   return  (AC_NODE_t *) acho_calloc (1,sizeof(AC_NODE_t));
752 }
753 
754 
node_release_pattern(AC_NODE_t * thiz)755 static void node_release_pattern(AC_NODE_t * thiz)
756 {
757   int i;
758   AC_PATTERN_t * str;
759 
760     if(!thiz->matched_patterns) return;
761     str = thiz->matched_patterns->patterns;
762 
763     for (i=0; i < thiz->matched_patterns->num; str++,i++)
764     {
765       if(!str->is_existing && str->astring) {
766               acho_free(str->astring);
767               str->astring = NULL;
768       }
769     }
770 }
771 
772 
773 /******************************************************************************
774  * FUNCTION: node_release
775  * Release node
776  ******************************************************************************/
node_release(AC_NODE_t * thiz,int free_pattern)777 static void node_release(AC_NODE_t * thiz, int free_pattern)
778 {
779   if(thiz->root && (free_pattern & 0x4) == 0) return;
780 
781   if(free_pattern & 1) node_release_pattern(thiz);
782 
783   if(thiz->matched_patterns) {
784     acho_free(thiz->matched_patterns);
785     thiz->matched_patterns = NULL;
786   }
787   if(!thiz->one && thiz->outgoing) {
788     acho_free(thiz->outgoing);
789   }
790   thiz->outgoing = NULL;
791   acho_free(thiz);
792 }
793 
794 /* Nonzero if X is not aligned on a "long" boundary.  */
795 #undef UNALIGNED /* Windows defined it but differently from what Aho expects */
796 #define UNALIGNED(X) ((long)X & (__SIZEOF_LONG__ - 1))
797 
798 #define LBLOCKSIZE __SIZEOF_LONG__
799 
800 #if __SIZEOF_LONG__ == 4
801 #define DETECTNULL(X) (((X) - 0x01010101UL) & ~(X) & 0x80808080UL)
802 #define DUPC 0x01010101UL
803 
bsf(uint32_t bits)804 static inline size_t bsf(uint32_t bits)
805 {
806 #ifdef __GNUC__
807     return __builtin_ctz(bits);
808 #else
809     size_t i=0;
810     if(!bits) return i;
811     if((bits & 0xffff) == 0) { i+=16; bits >>=16; }
812     if((bits & 0xff) == 0) i+=8;
813     return i;
814 #endif
815 }
816 
817 #else
818 #define DETECTNULL(X) (((X) - 0x0101010101010101ULL) & ~(X) & 0x8080808080808080ULL)
819 #define DUPC 0x0101010101010101UL
820 
bsf(uint64_t bits)821 static inline size_t bsf(uint64_t bits)
822 {
823 #ifdef __GNUC__
824     return __builtin_ctzll(bits);
825 #else
826     size_t i=0;
827     if(!bits) return i;
828     if((bits & 0xffffffff) == 0) { i+=32; bits >>=32; }
829     if((bits & 0xffff) == 0) { i+=16; bits >>=16; }
830     if((bits & 0xff) == 0) i+=8;
831     return i;
832 #endif
833 }
834 #endif
835 
836 static inline char *
xmemchr(char * s,char i,int n)837 xmemchr(char *s, char i,int n)
838 {
839   unsigned char c = (unsigned char)i;
840 
841   while(n > 0) {
842     if (n >= LBLOCKSIZE && !UNALIGNED (s)) {
843       unsigned long int mask;
844       mask = c * DUPC;
845 
846       while (n >= LBLOCKSIZE) {
847         unsigned long int nc = DETECTNULL((*(unsigned long int *)s) ^ mask);
848         if(nc)
849             return s + (bsf(nc) >> 3);
850         s += LBLOCKSIZE;
851         n -= LBLOCKSIZE;
852       }
853       if(!n) return NULL;
854     }
855     if (*s == c) return s;
856     s++;
857     n--;
858   }
859   return NULL;
860 }
861 
862 
863 /******************************************************************************
864  * FUNCTION: node_find_next
865  * Find out the next node for a given Alpha to move. this function is used in
866  * the pre-processing stage in which edge array is not sorted. so it uses
867  * linear search.
868  ******************************************************************************/
node_find_next(AC_NODE_t * thiz,AC_ALPHABET_t alpha)869 static AC_NODE_t * node_find_next(AC_NODE_t * thiz, AC_ALPHABET_t alpha)
870 {
871   AC_ALPHABET_t  *alphas, *fc;
872 
873   if(thiz->one) return alpha == thiz->one_alpha ? (AC_NODE_t *)thiz->outgoing:NULL;
874   if(!thiz->outgoing) return NULL;
875 
876   alphas = edge_get_alpha(thiz->outgoing);
877   fc = xmemchr((char *)alphas,(char)alpha,thiz->outgoing->degree);
878   return fc ? thiz->outgoing->next[fc-alphas] : NULL;
879 }
880 
881 
882 /******************************************************************************
883  * FUNCTION: node_findbs_next
884  * Find out the next node for a given Alpha. this function is used after the
885  * pre-processing stage in which we sort edges. so it uses Binary Search.
886  ******************************************************************************/
887 
node_findbs_next(AC_NODE_t * thiz,uint8_t alpha)888 static inline AC_NODE_t *node_findbs_next (AC_NODE_t * thiz, uint8_t alpha)
889 {
890 
891   if(thiz->one)
892         return alpha == thiz->one_alpha ? (AC_NODE_t *)thiz->outgoing:NULL;
893 
894   if(!(thiz->outgoing->cmap[(uint8_t)alpha >> 5] & (1u << (alpha & 0x1f))))
895         return NULL;
896 
897   if(thiz->range)
898         return thiz->outgoing->next[(uint8_t)alpha - (uint8_t)thiz->one_alpha];
899 
900   return thiz->outgoing->next[
901       xmemchr((char *)thiz->a_ptr,(char)alpha,thiz->outgoing->degree) - thiz->a_ptr];
902 }
903 
node_findbs_next_ac(AC_NODE_t * thiz,uint8_t alpha,int icase)904 static AC_NODE_t *node_findbs_next_ac (AC_NODE_t * thiz, uint8_t alpha,int icase) {
905   AC_NODE_t *next;
906   uint8_t alpha_c;
907 
908   if(!thiz->outgoing) return NULL;
909 
910   next = node_findbs_next(thiz,alpha);
911   if(next || !icase) return next;
912 
913   alpha_c = aho_xc[alpha];
914   if(!alpha_c) return NULL;
915   return  node_findbs_next(thiz, alpha ^ alpha_c);
916 }
917 
918 /******************************************************************************
919  * FUNCTION: node_has_matchstr
920  * Determine if a final node contains a pattern in its accepted pattern list
921  * or not. return values: 1 = it has, 0 = it hasn't
922  ******************************************************************************/
node_has_matchstr(AC_NODE_t * thiz,AC_PATTERN_t * newstr)923 static int node_has_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * newstr)
924 {
925   int i;
926 
927   if(!thiz->matched_patterns) return 0;
928 
929   for (i=0; i < thiz->matched_patterns->num; i++)
930   {
931     AC_PATTERN_t *str = &(thiz->matched_patterns->patterns[i]);
932 
933     if (str->length != newstr->length)
934       continue;
935 
936     if(!memcmp(str->astring,newstr->astring,str->length))
937       return 1;
938   }
939 
940   return 0;
941 }
942 
943 /******************************************************************************
944  * FUNCTION: node_create_next
945  * Create the next node for the given alpha.
946  ******************************************************************************/
node_create_next(AC_NODE_t * thiz,AC_ALPHABET_t alpha)947 static AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha)
948 {
949   AC_NODE_t * next;
950   next = node_find_next (thiz, alpha);
951   if (next)
952     /* The edge already exists */
953     return NULL;
954   /* Otherwise register new edge */
955   next = node_create ();
956   if(next) {
957     if(node_register_outgoing(thiz, next, alpha)) {
958         node_release(next,0);
959         return NULL;
960     }
961     next->depth = thiz->depth+1;
962   }
963 
964   return next;
965 }
966 
mp_data_size(int n)967 static inline size_t mp_data_size(int n) {
968     return sizeof(AC_PATTERNS_t) + n*sizeof(AC_PATTERN_t);
969 }
970 
node_resize_mp(AC_PATTERNS_t * m)971 static AC_PATTERNS_t * node_resize_mp(AC_PATTERNS_t *m) {
972 AC_PATTERNS_t *new_m;
973 
974     if(!m) {
975         m = acho_calloc(1,mp_data_size(REALLOC_CHUNK_MATCHSTR));
976         if(!m) return m;
977         m->max = REALLOC_CHUNK_MATCHSTR;
978         return m;
979     }
980     new_m = acho_malloc(mp_data_size(m->max+REALLOC_CHUNK_MATCHSTR));
981     if(!new_m) return new_m;
982     memcpy((char *)new_m,(char *)m,mp_data_size(m->max));
983     new_m->max += REALLOC_CHUNK_MATCHSTR;
984     acho_free(m);
985     return new_m;
986 }
987 
988 /******************************************************************************
989  * FUNCTION: node_register_matchstr
990  * Adds the pattern to the list of accepted pattern.
991  ******************************************************************************/
node_register_matchstr(AC_NODE_t * thiz,AC_PATTERN_t * str,int is_existing)992 static int node_register_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * str,int is_existing)
993 {
994   AC_PATTERN_t *l;
995 
996   if(!is_existing)
997       thiz->final = 1;
998   /* Check if the new pattern already exists in the node list */
999   if (thiz->matched_patterns && node_has_matchstr(thiz, str))
1000     return 0;
1001 
1002   if(!thiz->matched_patterns)
1003     thiz->matched_patterns = node_resize_mp(thiz->matched_patterns);
1004 
1005   /* Manage memory */
1006   if (thiz->matched_patterns->num >= thiz->matched_patterns->max) {
1007       AC_PATTERNS_t *new_mp = node_resize_mp(thiz->matched_patterns);
1008       if(!new_mp) return 1;
1009       thiz->matched_patterns = new_mp;
1010     }
1011   l = &thiz->matched_patterns->patterns[thiz->matched_patterns->num];
1012   l->astring = str->astring;
1013   l->length  = str->length;
1014   l->is_existing = is_existing;
1015   l->rep = str->rep;
1016   thiz->matched_patterns->num++;
1017   return 0;
1018 }
1019 
node_resize_outgoing(struct edge * e,size_t added)1020 static struct edge *node_resize_outgoing(struct edge * e,size_t added) {
1021 struct edge *new_e;
1022 int ds;
1023 
1024     if(!added) added = REALLOC_CHUNK_OUTGOING;
1025     if(!e) {
1026         e = acho_calloc(1,sizeof(struct edge) + edge_data_size(REALLOC_CHUNK_OUTGOING));
1027         if(!e) return e;
1028         e->max = REALLOC_CHUNK_OUTGOING;
1029         return e;
1030     }
1031     ds = edge_data_size(e->max + added);
1032     new_e = acho_calloc(1,sizeof(struct edge) + ds);
1033     if(!new_e) return new_e;
1034     memcpy(new_e,e,sizeof(struct edge) + sizeof(AC_NODE_t *)*e->max);
1035     new_e->max += added;
1036 
1037     if(e->degree)
1038         memcpy(edge_get_alpha(new_e),edge_get_alpha(e),e->degree);
1039 
1040     acho_free(e);
1041     return new_e;
1042 }
1043 
1044 /******************************************************************************
1045  * FUNCTION: node_register_outgoing
1046  * Establish an edge between two nodes
1047  ******************************************************************************/
node_register_outgoing(AC_NODE_t * thiz,AC_NODE_t * next,AC_ALPHABET_t alpha)1048 static int node_register_outgoing
1049 (AC_NODE_t * thiz, AC_NODE_t * next, AC_ALPHABET_t alpha)
1050 {
1051   struct edge *o;
1052   if(!thiz->use) {
1053         thiz->use = 1;
1054         thiz->one = 1;
1055         thiz->one_alpha = alpha;
1056         thiz->outgoing = (struct edge *)next;
1057         return 0;
1058   }
1059   if(thiz->one) {
1060         o = node_resize_outgoing(NULL,0);
1061         if(!o) return 1;
1062         o->next[0] = (AC_NODE_t *)thiz->outgoing;
1063         *edge_get_alpha(o) = thiz->one_alpha;
1064         o->degree = 1;
1065         thiz->one = 0;
1066         thiz->one_alpha = 0;
1067         thiz->outgoing = o;
1068   } else
1069         o = thiz->outgoing;
1070 
1071   if(!o) return 1;
1072 
1073   if(o->degree >= o->max)
1074     {
1075         struct edge *new_o = node_resize_outgoing(thiz->outgoing,0);
1076         if(!new_o) return 1;
1077 
1078         thiz->outgoing = new_o;
1079         o = new_o;
1080     }
1081   edge_get_alpha(o)[o->degree] = alpha;
1082   o->next[o->degree] = next;
1083   o->degree++;
1084   return 0;
1085 }
1086 
1087 /******************************************************************************
1088  * FUNCTION: node_edge_compare
1089  * Comparison function for qsort. see man qsort.
1090  ******************************************************************************/
node_edge_compare(struct edge * e,int a,int b)1091 static int node_edge_compare (struct edge * e, int a, int b) {
1092     AC_ALPHABET_t *c = edge_get_alpha(e);
1093     return c[a] >= c[b] ? 1:0;
1094 }
1095 
node_edge_swap(struct edge * e,int a,int b)1096 static void node_edge_swap (struct edge * e, int a, int b)
1097 {
1098 AC_ALPHABET_t *c,tc;
1099 AC_NODE_t *tn;
1100     c = edge_get_alpha(e);
1101     tc = c[a]; c[a] = c[b]; c[b] = tc;
1102     tn = e->next[a]; e->next[a] = e->next[b]; e->next[b] = tn;
1103 }
1104 
1105 /******************************************************************************
1106  * FUNCTION: acho_2range
1107  * Adds missing characters in the range low - high
1108  ******************************************************************************/
acho_2range(AC_NODE_t * thiz,uint8_t low,uint8_t high)1109 static void acho_2range(AC_NODE_t * thiz,uint8_t low, uint8_t high) {
1110     struct edge *e;
1111     int i;
1112     uint8_t *c = (uint8_t *)edge_get_alpha(thiz->outgoing);
1113 
1114     thiz->range = 1;
1115     thiz->one_alpha = (AC_ALPHABET_t)low;
1116     e = thiz->outgoing;
1117     for (i=0; low <= high && i < e->max; i++,low++) {
1118       if(e->cmap[(low >> 5) & 0x7] & (1u << (low & 0x1f))) continue;
1119       c[e->degree] = low;
1120       e->next[e->degree] = NULL;
1121       e->degree++;
1122     }
1123 }
1124 
1125 /******************************************************************************
1126  * FUNCTION: node_range_edges
1127  * Converts to a range if possible.
1128  ******************************************************************************/
node_range_edges(AC_AUTOMATA_t * thiz,AC_NODE_t * node)1129 static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node)
1130 {
1131     struct edge *e = node->outgoing;
1132     uint8_t *c = (uint8_t *)edge_get_alpha(node->outgoing);
1133     uint8_t low = 0xff,high = 0;
1134     int i;
1135 
1136     memset((char *)&e->cmap,0,sizeof(e->cmap));
1137     for(i = 0; i < e->degree; i++) {
1138       uint8_t cc = c[i];
1139       if(cc < low) low = cc;
1140       if(cc > high) high = cc;
1141       e->cmap[(cc >> 5) & 0x7] |= 1u << (cc & 0x1f);
1142     }
1143     if(high - low + 1 == e->degree) {
1144         node->range = 1;
1145         node->one_alpha = (AC_ALPHABET_t)low;
1146         return 1;
1147     }
1148     if(high - low + 1 < e->max) {
1149         acho_2range(node,low,high);
1150         return 1;
1151     }
1152 
1153     i = (high - low)/8;
1154     if (i < thiz->add_to_range) i = thiz->add_to_range;
1155     i += REALLOC_CHUNK_OUTGOING-1;
1156     i -= i % REALLOC_CHUNK_OUTGOING;
1157 
1158     if(high - low + 1 < e->max + i || (node->root && !thiz->no_root_range)) {
1159         int added = (high - low + 1) - e->max;
1160         struct edge *new_o = node_resize_outgoing(node->outgoing,added);
1161         if(new_o) {
1162             node->outgoing = new_o;
1163             acho_2range(node,low,high);
1164             return 1;
1165         }
1166         return 0;
1167     }
1168 
1169     return 0;
1170 }
1171 /******************************************************************************
1172  * FUNCTION: node_sort_edges
1173  * sorts edges alphabets.
1174  ******************************************************************************/
node_sort_edges(AC_NODE_t * thiz)1175 static inline void node_sort_edges (AC_NODE_t * thiz)
1176 {
1177 
1178   acho_sort (thiz->outgoing, thiz->outgoing->degree,
1179         node_edge_compare, node_edge_swap);
1180 }
1181 
1182 /**
1183  * sort - sort an array of elements
1184  * @base: pointer to data to sort
1185  * @num: number of elements
1186  * @size: size of each element
1187  * @cmp_func: pointer to comparison function
1188  * @swap_func: pointer to swap function or NULL
1189  *
1190  * This function does a heapsort on the given array. You may provide a
1191  * swap_func function optimized to your element type.
1192  *
1193  * Sorting time is O(n log n) both on average and worst-case. While
1194  * qsort is about 20% faster on average, it suffers from exploitable
1195  * O(n*n) worst-case behavior and extra memory requirements that make
1196  * it less suitable for kernel use.
1197  */
1198 
acho_sort(struct edge * e,size_t num,int (* cmp_func)(struct edge * e,int a,int b),void (* swap_func)(struct edge * e,int a,int b))1199  void acho_sort(struct edge *e, size_t num,
1200       int (*cmp_func)(struct edge *e, int a, int b),
1201       void (*swap_func)(struct edge *e, int a, int b))
1202 {
1203   /* pre-scale counters for performance */
1204   int i = (num/2 - 1) , n = num, c, r;
1205 
1206   if (!swap_func) return;
1207   if (!cmp_func) return;
1208 
1209   /* heapify */
1210   for ( ; i >= 0; i -= 1) {
1211     for (r = i; r * 2 + 1 < n; r = c) {
1212       c = r * 2 + 1;
1213       if (c < n - 1 && cmp_func(e, c, c + 1) == 0)
1214             c += 1;
1215       if (cmp_func(e, r, c) != 0)
1216             break;
1217       swap_func(e, r, c);
1218     }
1219   }
1220 
1221   /* sort */
1222   for (i = n - 1; i > 0; i -= 1) {
1223     swap_func(e,0,i);
1224     for (r = 0; r * 2 + 1 < i; r = c) {
1225       c = r * 2 + 1;
1226       if (c < i - 1 && cmp_func(e, c, c + 1) == 0)
1227         c += 1;
1228       if (cmp_func(e, r, c) != 0)
1229         break;
1230       swap_func(e, r, c);
1231     }
1232   }
1233 }
1234 
1235 /* vim: set ts=4 sw=4 et :  */
1236 
1237