1 /*
2 * ahocorasick.c: implementation of ahocorasick library's functions
3 * This file is part of multifast.
4 *
5 Copyright 2010-2012 Kamiar Kanani <kamiar.kanani@gmail.com>
6 Copyright 2012-21 ntop.org (Incremental improvements)
7
8 multifast is free software: you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 multifast is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with multifast. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #ifndef __KERNEL__
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #ifndef WIN32
28 #include <unistd.h>
29 #else
30 #define __SIZEOF_LONG__ 4
31 #endif
32 #include <stdint.h>
33 #include <sys/types.h>
34 #else
35 #include <asm/byteorder.h>
36 #include <linux/kernel.h>
37 #include <linux/types.h>
38 typedef __kernel_size_t size_t;
39 #include <linux/string.h>
40 #include <linux/slab.h>
41 #endif
42
43 #include "ndpi_api.h"
44 #include "ahocorasick.h"
45
46 /* TODO: For different depth of node, number of outgoing edges differs
47 considerably, It is efficient to use different chunk size for
48 different depths */
49
50 /* Private function prototype */
51 static int node_edge_compare (struct edge * e, int a, int b);
52 static int node_has_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * newstr);
53
54 static AC_NODE_t * node_create (void);
55 static AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha);
56 static int node_register_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * str, int is_existing);
57 static int node_register_outgoing (AC_NODE_t * thiz, AC_NODE_t * next, AC_ALPHABET_t alpha);
58 static AC_NODE_t * node_find_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha);
59 static AC_NODE_t * node_findbs_next (AC_NODE_t * thiz, uint8_t alpha);
60 static AC_NODE_t * node_findbs_next_ac (AC_NODE_t * thiz, uint8_t alpha,int icase);
61 static void node_release (AC_NODE_t * thiz, int free_pattern);
62 static void node_release_pattern (AC_NODE_t * thiz);
63 static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node);
64 static inline void node_sort_edges (AC_NODE_t * thiz);
65
66 #ifndef __KERNEL__
67 struct aho_dump_info {
68 size_t memcnt,node_oc,node_8c,node_xc,node_xr;
69 int buf_pos,ip;
70 char *bufstr;
71 size_t bufstr_len;
72 FILE *file;
73 };
74
75 static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *);
76 static int ac_automata_global_debug = 0;
77 #endif
78
79 /* Private function prototype */
80 static int ac_automata_union_matchstrs (AC_NODE_t * node);
81 static void ac_automata_set_failure
82 (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_NODE_t * next, int idx, void *);
83 static void ac_automata_traverse_setfailure
84 (AC_AUTOMATA_t * thiz);
85
edge_get_alpha(struct edge * e)86 static inline AC_ALPHABET_t *edge_get_alpha(struct edge *e) {
87 return (AC_ALPHABET_t *)(&e->next[e->max]);
88 }
edge_data_size(int num)89 static inline size_t edge_data_size(int num) {
90 return sizeof(void *)*num + ((num + sizeof(void *) - 1) & ~(sizeof(void *)-1));
91 }
92
93 #ifdef __KERNEL__
acho_calloc(size_t nmemb,size_t size)94 static inline void *acho_calloc(size_t nmemb, size_t size) {
95 return kcalloc(nmemb, size, GFP_ATOMIC);
96 }
acho_malloc(size_t size)97 static inline void *acho_malloc(size_t size) {
98 return kmalloc(size, GFP_ATOMIC);
99 }
acho_free(void * old)100 static inline void acho_free(void *old) {
101 return kfree(old);
102 }
103 #else
104
105 #define acho_calloc(a,b) ndpi_calloc(a,b)
106 #define acho_malloc(a) ndpi_malloc(a)
107 #define acho_free(a) ndpi_free(a)
108 #endif
109
110 static void acho_sort(struct edge *e, size_t num,
111 int (*cmp_func)(struct edge *e, int a, int b),
112 void (*swap_func)(struct edge *e, int a, int b));
113
114 /* tolower() from glibc */
115 static uint8_t aho_lc[256] = {
116 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
117 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
118 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
119 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
120 64, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
121 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 91, 92, 93, 94, 95,
122 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
123 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
124 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
125 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
126 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
127 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
128 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
129 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
130 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
131 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
132 };
133
134 /* toupper() from glibc */
135 static uint8_t aho_xc[256] = {
136 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
141 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
142 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
143 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
144 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
148 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
149 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
152 };
153
154 /******************************************************************************
155 * FUNCTION: ac_automata_init
156 * Initialize automata; allocate memories and set initial values
157 * PARAMS:
158 * MATCH_CALLBACK mc: call-back function
159 * the call-back function will be used to reach the caller on match occurrence
160 ******************************************************************************/
ac_automata_init(MATCH_CALLBACK_f mc)161 AC_AUTOMATA_t * ac_automata_init (MATCH_CALLBACK_f mc)
162 {
163 AC_AUTOMATA_t * thiz;
164 // if(!mc) return NULL;
165 thiz = (AC_AUTOMATA_t *)acho_calloc(1,sizeof(AC_AUTOMATA_t));
166 if(!thiz) return NULL;
167 thiz->root = node_create ();
168 if(!thiz->root) {
169 acho_free(thiz);
170 return NULL;
171 }
172 thiz->root->id = 1;
173 thiz->root->root = 1;
174 thiz->total_patterns = 0;
175 thiz->automata_open = 1;
176 thiz->match_handler = mc;
177 thiz->to_lc = 0;
178 thiz->no_root_range = 0;
179 thiz->add_to_range = REALLOC_CHUNK_OUTGOING*2;
180 return thiz;
181 }
182 /******************************************************************************
183 * FUNCTION: ac_automata_casecmp
184 * Case-insensitive comparison mode
185 * PARAMS:
186 * AC_AUTOMATA_t * thiz: the pointer to the automata
187 * lc: 1 for case-insensitive comparison mode
188 * RETUERN VALUE: AC_ERROR_t
189 * the return value indicates the success or failure of changes
190 ******************************************************************************/
ac_automata_feature(AC_AUTOMATA_t * thiz,unsigned int feature)191 AC_ERROR_t ac_automata_feature (AC_AUTOMATA_t * thiz, unsigned int feature)
192 {
193 if(!thiz) return ACERR_ERROR;
194 if(thiz->all_nodes_num || thiz->total_patterns) return ACERR_ERROR;
195 thiz->to_lc = (feature & AC_FEATURE_LC) != 0;
196 thiz->no_root_range = (feature & AC_FEATURE_NO_ROOT_RANGE) != 0;
197 return ACERR_SUCCESS;
198 }
199
ac_automata_name(AC_AUTOMATA_t * thiz,char * name,int debug)200 AC_ERROR_t ac_automata_name (AC_AUTOMATA_t * thiz, char *name, int debug)
201 {
202 if(!thiz) return ACERR_ERROR;
203 strncpy(thiz->name,name,sizeof(thiz->name)-1);
204 thiz->debug = debug != 0;
205 return ACERR_SUCCESS;
206 }
207
208 #ifndef __KERNEL__
ac_automata_enable_debug(int debug)209 void ac_automata_enable_debug (int debug) {
210 ac_automata_global_debug = debug != 0;
211 }
212 #endif
213
214 /******************************************************************************
215 * FUNCTION: ac_automata_add
216 * Adds pattern to the automata.
217 * PARAMS:
218 * AC_AUTOMATA_t * thiz: the pointer to the automata
219 * AC_PATTERN_t * patt: the pointer to added pattern
220 * RETUERN VALUE: AC_ERROR_t
221 * the return value indicates the success or failure of adding action
222 ******************************************************************************/
ac_automata_add(AC_AUTOMATA_t * thiz,AC_PATTERN_t * patt)223 AC_ERROR_t ac_automata_add (AC_AUTOMATA_t * thiz, AC_PATTERN_t * patt)
224 {
225 unsigned int i;
226 AC_NODE_t * n = thiz->root;
227 AC_NODE_t * next;
228 AC_ALPHABET_t alpha;
229
230 if(!thiz->automata_open)
231 return ACERR_AUTOMATA_CLOSED;
232
233 if (!patt->length)
234 return ACERR_ZERO_PATTERN;
235
236 if (patt->length > AC_PATTRN_MAX_LENGTH)
237 return ACERR_LONG_PATTERN;
238
239 for (i=0; i<patt->length; i++) {
240 alpha = patt->astring[i];
241 if(thiz->to_lc)
242 alpha = (AC_ALPHABET_t)aho_lc[(uint8_t)alpha];
243
244 if((next = node_find_next(n, alpha)) != 0) {
245 n = next;
246 continue;
247 }
248 if(!(next = node_create_next(n, alpha)))
249 return ACERR_ERROR;
250 next->id = ++thiz->id;
251 thiz->all_nodes_num++;
252 n = next;
253 }
254 if(thiz->max_str_len < patt->length)
255 thiz->max_str_len = patt->length;
256
257 if(n->final) {
258 patt->rep.number = n->matched_patterns->patterns[0].rep.number;
259 return ACERR_DUPLICATE_PATTERN;
260 }
261
262 if(node_register_matchstr(n, patt, 0))
263 return ACERR_ERROR;
264
265 thiz->total_patterns++;
266
267 return ACERR_SUCCESS;
268 }
269
ac_automata_walk(AC_AUTOMATA_t * thiz,NODE_CALLBACK_f node_cb,ALPHA_CALLBACK_f alpha_cb,void * data)270 AC_ERROR_t ac_automata_walk(AC_AUTOMATA_t * thiz,
271 NODE_CALLBACK_f node_cb, ALPHA_CALLBACK_f alpha_cb, void *data)
272 {
273 unsigned int ip;
274 AC_NODE_t *next, *n;
275 struct ac_path * path = thiz->ac_path;
276 AC_ERROR_t r;
277
278 ip = 1;
279 path[1].n = thiz->root;
280 path[1].idx = 0;
281
282 while(ip) {
283 unsigned int i,last;
284 n = path[ip].n;
285 i = path[ip].idx;
286 last = !n->outgoing || (n->one && i > 0) || (!n->one && i >= n->outgoing->degree);
287 if(node_cb && (!i || last)) {
288 r = node_cb(thiz, n, i, data);
289 if(r != ACERR_SUCCESS) return r;
290 }
291 if(last) {
292 ip--; continue;
293 }
294 next = NULL;
295 if(n->one) {
296 next = (AC_NODE_t *)n->outgoing;
297 } else {
298 while(i < n->outgoing->degree) {
299 next = n->outgoing->next[i];
300 if(next) break;
301 i++;
302 }
303 }
304 if(!next) {
305 if(!n->range || i >= n->outgoing->degree) {
306 r = node_cb ? node_cb(thiz, n, i, data):ACERR_SUCCESS;
307 if(r != ACERR_SUCCESS) return r;
308 }
309 ip--; continue;
310 }
311
312 if(n->depth < AC_PATTRN_MAX_LENGTH) {
313 path[n->depth].l = n->one ? n->one_alpha:
314 edge_get_alpha(n->outgoing)[i];
315 if(alpha_cb)
316 alpha_cb(thiz, n, next, i, data);
317 }
318
319 path[ip].idx = i+1;
320 if(ip >= AC_PATTRN_MAX_LENGTH)
321 continue;
322
323 ip++;
324
325 path[ip].n = next;
326 path[ip].idx = 0;
327
328 }
329 return ACERR_SUCCESS;
330 }
331
332
ac_finalize_node(AC_AUTOMATA_t * thiz,AC_NODE_t * n,int idx,void * data)333 static AC_ERROR_t ac_finalize_node(AC_AUTOMATA_t * thiz,AC_NODE_t * n, int idx, void *data) {
334 if(!n->ff) {
335 n->id = ++(thiz->id);
336 n->ff = 1;
337 if(ac_automata_union_matchstrs (n))
338 return ACERR_ERROR;
339 if(n->use) {
340 if(!n->one) {
341 if(node_range_edges (thiz,n)) {
342 node_sort_edges (n);
343 thiz->n_range++;
344 } else
345 thiz->n_find++;
346 } else
347 thiz->n_oc++;
348 }
349 }
350 if(!n->a_ptr && n->outgoing && !n->one) {
351 n->a_ptr = edge_get_alpha(n->outgoing);
352 }
353 return ACERR_SUCCESS;
354 }
355
356
357 /******************************************************************************
358 * FUNCTION: ac_automata_finalize
359 * Locate the failure node for all nodes and collect all matched pattern for
360 * every node. it also sorts outgoing edges of node, so binary search could be
361 * performed on them. after calling this function the automate literally will
362 * be finalized and you can not add new patterns to the automate.
363 * PARAMS:
364 * AC_AUTOMATA_t * thiz: the pointer to the automata
365 ******************************************************************************/
366
ac_automata_finalize(AC_AUTOMATA_t * thiz)367 AC_ERROR_t ac_automata_finalize (AC_AUTOMATA_t * thiz) {
368
369 AC_ERROR_t r = ACERR_SUCCESS;
370 if(!thiz->automata_open) return r;
371
372 ac_automata_traverse_setfailure (thiz);
373 thiz->id=0;
374 thiz->n_oc = 0;
375 thiz->n_range = 0;
376 thiz->n_find = 0;
377 r = ac_automata_walk(thiz,ac_finalize_node,NULL,NULL);
378 if(r == ACERR_SUCCESS)
379 thiz->automata_open = 0;
380 return r;
381 }
382
ac_automata_exact_match(AC_PATTERNS_t * mp,int pos,AC_TEXT_t * txt)383 int ac_automata_exact_match(AC_PATTERNS_t *mp,int pos, AC_TEXT_t *txt) {
384 AC_PATTERN_t *patterns = mp->patterns;
385 AC_PATTERN_t **matched = txt->match.matched;
386 int i;
387 int match_map = 0;
388 for(i=0; i < mp->num && i < (__SIZEOF_INT__*8-1); i++,patterns++) {
389 do {
390 if(patterns->rep.from_start && patterns->rep.at_end) {
391 if(pos == txt->length && patterns->length == pos)
392 matched[0] = patterns, match_map |= 1 << i;
393 break;
394 }
395 if(patterns->rep.from_start) {
396 if(patterns->length == pos)
397 matched[1] = patterns, match_map |= 1 << i;
398 break;
399 }
400 if(patterns->rep.at_end) {
401 if(pos == txt->length)
402 matched[2] = patterns, match_map |= 1 << i;
403 break;
404 }
405 matched[3] = patterns, match_map |= 1 << i;
406 } while(0);
407 }
408 return match_map;
409 }
410
411 /******************************************************************************
412 * FUNCTION: ac_automata_search
413 * Search in the input text using the given automata. on match event it will
414 * call the call-back function. and the call-back function in turn after doing
415 * its job, will return an integer value to ac_automata_search(). 0 value means
416 * continue search, and non-0 value means stop search and return to the caller.
417 * PARAMS:
418 * AC_AUTOMATA_t * thiz: the pointer to the automata
419 * AC_TEXT_t * txt: the input text that must be searched
420 * void * param: this parameter will be send to call-back function. it is
421 * useful for sending parameter to call-back function from caller function.
422 * RETURN VALUE:
423 * -1: failed call; automata is not finalized
424 * 0: success; continue searching; call-back sent me a 0 value
425 * 1: success; stop searching; call-back sent me a non-0 value
426 ******************************************************************************/
ac_automata_search(AC_AUTOMATA_t * thiz,AC_TEXT_t * txt,AC_REP_t * param)427 int ac_automata_search (AC_AUTOMATA_t * thiz,
428 AC_TEXT_t * txt, AC_REP_t * param)
429 {
430 unsigned long position;
431 int icase = 0,i,debug=0;
432 AC_MATCH_t *match;
433 AC_NODE_t *curr;
434 AC_NODE_t *next;
435 AC_ALPHABET_t *apos;
436
437 if(thiz->automata_open)
438 /* you must call ac_automata_locate_failure() first */
439 return -1;
440 position = 0;
441 curr = thiz->root;
442 apos = txt->astring;
443 #ifndef __KERNEL__
444 if(thiz->debug && ac_automata_global_debug) debug = 1;
445 if(debug) {
446 txt->option = debug; /* for callback */
447 printf("aho %s: search %.*s\n", thiz->name[0] ? thiz->name:"unknown", txt->length, apos);
448 }
449 #endif
450 match = &txt->match;
451 memset((char*)match,0,sizeof(*match));
452
453 /* The 'txt->ignore_case' option is checked
454 * separately otherwise clang will detect
455 * uninitialized memory usage much later. */
456 if(txt->option & AC_FEATURE_LC) icase = 1;
457 /* This is the main search loop.
458 * it must be keep as lightweight as possible. */
459 while (position < txt->length) {
460 uint8_t alpha = (uint8_t)apos[position];
461 if(thiz->to_lc) alpha = aho_lc[alpha];
462 if(!(next = node_findbs_next_ac(curr, (uint8_t)alpha, icase))) {
463 if(curr->failure_node) /* we are not in the root node */
464 curr = curr->failure_node;
465 else
466 position++;
467 } else {
468 curr = next;
469 position++;
470 if(curr->final) {
471 /* select best match */
472 match->match_map = ac_automata_exact_match(curr->matched_patterns,position,txt);
473 if(match->match_map) {
474 match->match_counter++; /* we have a matching */
475 #ifndef __KERNEL__
476 if(debug) {
477 int i;
478 AC_PATTERN_t *patterns = curr->matched_patterns->patterns;
479 for(i=0; i < curr->matched_patterns->num; i++) {
480 if(!(match->match_map & (1 << i))) continue;
481 printf(" match%d: %c%.*s%c [%u]\n",i+1,
482 patterns[i].rep.from_start ? '^':' ',
483 patterns[i].length,patterns[i].astring,
484 patterns[i].rep.at_end ? '$':' ',
485 patterns[i].rep.number);
486 }
487 }
488 #endif
489 if(thiz->match_handler) {
490 /* We check 'next' to find out if we came here after a alphabet
491 * transition or due to a fail. in second case we should not report
492 * matching because it was reported in previous node */
493 match->position = position;
494 match->match_num = curr->matched_patterns->num;
495 match->patterns = curr->matched_patterns->patterns;
496 if (thiz->match_handler(match, txt, param))
497 return 1;
498 }
499 } /* match->match_map */
500 }
501 }
502 }
503 if(thiz->match_handler)
504 return match->match_counter > 0 ? 1:0;
505
506 for(i = 0; i < 4; i++)
507 if(txt->match.matched[i]) {
508 *param = (txt->match.matched[i])->rep;
509 #ifndef __KERNEL__
510 if(debug) {
511 AC_PATTERN_t *pattern = txt->match.matched[i];
512 printf("best match: %c%.*s%c [%u]\n",
513 pattern->rep.from_start ? '^':' ',
514 pattern->length,pattern->astring,
515 pattern->rep.at_end ? '$':' ',
516 pattern->rep.number);
517 }
518 #endif
519 return 1;
520 }
521 return 0;
522 }
523
524 /******************************************************************************
525 * FUNCTION: ac_automata_release
526 * Release all allocated memories to the automata
527 * PARAMS:
528 * AC_AUTOMATA_t * thiz: the pointer to the automata
529 * free_pattern:
530 * 0 - free all struct w/o free pattern
531 * 1 - free all struct and pattern
532 * 2 - clean struct w/o free pattern
533 ******************************************************************************/
534
ac_automata_release_node(AC_AUTOMATA_t * thiz,AC_NODE_t * n,int idx,void * data)535 static AC_ERROR_t ac_automata_release_node(AC_AUTOMATA_t * thiz,
536 AC_NODE_t *n, int idx, void *data) {
537
538 if(!n->outgoing || idx) {
539 if(n->outgoing) {
540 if(n->one) thiz->n_oc--;
541 else if(n->range) thiz->n_range--;
542 else thiz->n_find--;
543 }
544 node_release(n,data != NULL);
545 }
546
547 return ACERR_SUCCESS;
548 }
ac_automata_release(AC_AUTOMATA_t * thiz,uint8_t free_pattern)549 void ac_automata_release (AC_AUTOMATA_t * thiz, uint8_t free_pattern) {
550
551 ac_automata_walk(thiz,ac_automata_release_node,NULL,free_pattern ? (void *)1:NULL);
552
553 if(free_pattern <= 1) {
554 node_release(thiz->root,free_pattern | 0x4);
555 thiz->root = NULL;
556 acho_free(thiz);
557 } else {
558 AC_NODE_t *n;
559 thiz->all_nodes_num = 0;
560 thiz->total_patterns = 0;
561 thiz->max_str_len = 0;
562 thiz->automata_open = 1;
563
564 n = thiz->root;
565 n->failure_node = NULL;
566 n->id = 0;
567 n->final = 0;
568 n->depth = 0;
569 if(n->outgoing) {
570 acho_free(n->outgoing);
571 n->outgoing = NULL;
572 }
573 if(n->matched_patterns) {
574 acho_free(n->matched_patterns);
575 n->matched_patterns=NULL;
576 }
577 n->use = 0;
578 n->one = 0;
579 }
580 }
581
582 #ifndef __KERNEL__
583
dump_node_header(AC_NODE_t * n,struct aho_dump_info * ai)584 static void dump_node_header(AC_NODE_t * n, struct aho_dump_info *ai) {
585 char *c;
586 int i;
587 fprintf(ai->file,"%04d: ",n->id);
588 if(n->failure_node) fprintf(ai->file," failure %04d:",n->failure_node->id);
589 fprintf(ai->file," d:%d %c",n->depth, n->use ? '+':'-');
590 ai->memcnt += sizeof(*n);
591 if(n->matched_patterns) {
592 ai->memcnt += sizeof(n->matched_patterns) +
593 n->matched_patterns->max*sizeof(n->matched_patterns->patterns[0]);
594 }
595 if(!n->use) { fprintf(ai->file,"\n"); return; }
596 if(n->one) {
597 (ai->node_oc)++;
598 fprintf(ai->file," '%c' next->%d\n",n->one_alpha,
599 n->outgoing ? ((AC_NODE_t *)n->outgoing)->id : -1);
600 return;
601 }
602 if(!n->outgoing) {
603 fprintf(ai->file," BUG! !outgoing\n");
604 return;
605 }
606 fprintf(ai->file,"%s\n",n->range ? " RANGE":"");
607 c = (char *)edge_get_alpha(n->outgoing);
608 if(n->outgoing->degree <= 8)
609 (ai->node_8c)++;
610 else
611 (ai->node_xc)++;
612 if(n->range)
613 (ai->node_xr)++;
614 for(i=0; i < n->outgoing->degree; i++) {
615 fprintf(ai->file," %d: \"%c\" -> %d\n",i,c[i],
616 n->outgoing->next[i] ? n->outgoing->next[i]->id:-1);
617 }
618 ai->memcnt += sizeof(n->outgoing) + edge_data_size(n->outgoing->max);
619 }
620
dump_node_common(AC_AUTOMATA_t * thiz,AC_NODE_t * n,int idx,void * data)621 static AC_ERROR_t dump_node_common(AC_AUTOMATA_t * thiz,
622 AC_NODE_t * n, int idx, void *data) {
623 struct aho_dump_info *ai = (struct aho_dump_info *)data;
624 char *rstr = ai->bufstr;
625
626 if(idx) return ACERR_SUCCESS;
627 dump_node_header(n,ai);
628 if (n->matched_patterns && n->matched_patterns->num && n->final) {
629 char lbuf[512];
630 int nl = 0,j;
631
632 nl = snprintf(lbuf,sizeof(lbuf),"'%.100s' N:%d{",rstr,n->matched_patterns->num);
633 for (j=0; j<n->matched_patterns->num; j++) {
634 AC_PATTERN_t *sid = &n->matched_patterns->patterns[j];
635 if(j) nl += snprintf(&lbuf[nl],sizeof(lbuf)-nl-1,", ");
636 nl += snprintf(&lbuf[nl],sizeof(lbuf)-nl-1,"%d %c%.100s%c",
637 sid->rep.number & 0x3fff,
638 sid->rep.number & 0x8000 ? '^':' ',
639 sid->astring,
640 sid->rep.number & 0x4000 ? '$':' ');
641 }
642 fprintf(ai->file,"%s}\n",lbuf);
643 }
644 return ACERR_SUCCESS;
645 }
dump_node_str(AC_AUTOMATA_t * thiz,AC_NODE_t * node,AC_NODE_t * next,int idx,void * data)646 static void dump_node_str(AC_AUTOMATA_t * thiz, AC_NODE_t * node,
647 AC_NODE_t * next, int idx, void *data) {
648 struct aho_dump_info *ai = (struct aho_dump_info *)data;
649 ai->bufstr[node->depth] = thiz->ac_path[node->depth].l;
650 ai->bufstr[node->depth+1] = 0;
651 }
652
653 /******************************************************************************
654 * FUNCTION: ac_automata_dump
655 * Prints the automata to output in human readable form. it is useful for
656 * debugging purpose.
657 * PARAMS:
658 * AC_AUTOMATA_t * thiz: the pointer to the automata
659 * rstr: char[] buffer
660 * rstr_size: size of rstr buffser
661 * char repcast: 'n': print AC_REP_t as number, 's': print AC_REP_t as string
662 ******************************************************************************/
663
ac_automata_dump(AC_AUTOMATA_t * thiz,FILE * file)664 void ac_automata_dump(AC_AUTOMATA_t * thiz, FILE *file) {
665 struct aho_dump_info ai;
666
667 memset((char *)&ai,0,sizeof(ai));
668 ai.file = file ? file : stdout;
669 fprintf(ai.file,"---DUMP- all nodes %u - max strlen %u -%s---\n",
670 (unsigned int)thiz->all_nodes_num,
671 (unsigned int)thiz->max_str_len,
672 thiz->automata_open ? "open":"ready");
673
674 ai.bufstr = acho_malloc(AC_PATTRN_MAX_LENGTH+1);
675 ai.bufstr_len = AC_PATTRN_MAX_LENGTH;
676 if(!ai.bufstr) return;
677 ai.bufstr[0] = '\0';
678
679 ac_automata_walk(thiz,dump_node_common,dump_node_str,(void *)&ai);
680 fprintf(ai.file,"---\n mem size %zu avg node size %d, node one char %d, <=8c %d, >8c %d, range %d\n---DUMP-END-\n",
681 ai.memcnt,(int)ai.memcnt/(thiz->all_nodes_num+1),(int)ai.node_oc,(int)ai.node_8c,(int)ai.node_xc,(int)ai.node_xr);
682 }
683 #endif
684
685 /******************************************************************************
686 * FUNCTION: ac_automata_union_matchstrs
687 * Collect accepted patterns of the node. the accepted patterns consist of the
688 * node's own accepted pattern plus accepted patterns of its failure node.
689 ******************************************************************************/
ac_automata_union_matchstrs(AC_NODE_t * node)690 static int ac_automata_union_matchstrs (AC_NODE_t * node)
691 {
692 unsigned int i;
693 AC_NODE_t * m;
694
695 for (m = node; m; m = m->failure_node) {
696 if(!m->matched_patterns) continue;
697
698 for (i=0; i < m->matched_patterns->num; i++)
699 if(node_register_matchstr(node, &(m->matched_patterns->patterns[i]), 1))
700 return 1;
701
702 if (m->final)
703 node->final = 1;
704 }
705 return 0;
706 }
707
708 /******************************************************************************
709 * FUNCTION: ac_automata_set_failure
710 * find failure node for the given node.
711 ******************************************************************************/
ac_automata_set_failure(AC_AUTOMATA_t * thiz,AC_NODE_t * node,AC_NODE_t * next,int idx,void * data)712 static void ac_automata_set_failure
713 (AC_AUTOMATA_t * thiz, AC_NODE_t * node, AC_NODE_t * next, int idx, void *data)
714 {
715 unsigned int i, j;
716 AC_NODE_t * m;
717 struct ac_path * path = thiz->ac_path;
718
719 for (i=1; i < next->depth; i++) {
720 m = thiz->root;
721 for (j=i; j < next->depth && m; j++) {
722 m = node_find_next (m, path[j].l);
723 }
724 if (m) {
725 next->failure_node = m;
726 break;
727 }
728 }
729 if (!next->failure_node)
730 next->failure_node = thiz->root;
731 }
732
733 /******************************************************************************
734 * FUNCTION: ac_automata_traverse_setfailure
735 * Traverse all automata nodes using DFS (Depth First Search), meanwhile it set
736 * the failure node for every node it passes through. this function must be
737 * called after adding last pattern to automata. i.e. after calling this you
738 * can not add further pattern to automata.
739 ******************************************************************************/
ac_automata_traverse_setfailure(AC_AUTOMATA_t * thiz)740 static inline void ac_automata_traverse_setfailure (AC_AUTOMATA_t * thiz)
741 {
742 ac_automata_walk(thiz,NULL,ac_automata_set_failure,NULL);
743 }
744
745 /******************************************************************************
746 * FUNCTION: node_create
747 * Create the node
748 ******************************************************************************/
node_create(void)749 static inline AC_NODE_t * node_create(void)
750 {
751 return (AC_NODE_t *) acho_calloc (1,sizeof(AC_NODE_t));
752 }
753
754
node_release_pattern(AC_NODE_t * thiz)755 static void node_release_pattern(AC_NODE_t * thiz)
756 {
757 int i;
758 AC_PATTERN_t * str;
759
760 if(!thiz->matched_patterns) return;
761 str = thiz->matched_patterns->patterns;
762
763 for (i=0; i < thiz->matched_patterns->num; str++,i++)
764 {
765 if(!str->is_existing && str->astring) {
766 acho_free(str->astring);
767 str->astring = NULL;
768 }
769 }
770 }
771
772
773 /******************************************************************************
774 * FUNCTION: node_release
775 * Release node
776 ******************************************************************************/
node_release(AC_NODE_t * thiz,int free_pattern)777 static void node_release(AC_NODE_t * thiz, int free_pattern)
778 {
779 if(thiz->root && (free_pattern & 0x4) == 0) return;
780
781 if(free_pattern & 1) node_release_pattern(thiz);
782
783 if(thiz->matched_patterns) {
784 acho_free(thiz->matched_patterns);
785 thiz->matched_patterns = NULL;
786 }
787 if(!thiz->one && thiz->outgoing) {
788 acho_free(thiz->outgoing);
789 }
790 thiz->outgoing = NULL;
791 acho_free(thiz);
792 }
793
794 /* Nonzero if X is not aligned on a "long" boundary. */
795 #undef UNALIGNED /* Windows defined it but differently from what Aho expects */
796 #define UNALIGNED(X) ((long)X & (__SIZEOF_LONG__ - 1))
797
798 #define LBLOCKSIZE __SIZEOF_LONG__
799
800 #if __SIZEOF_LONG__ == 4
801 #define DETECTNULL(X) (((X) - 0x01010101UL) & ~(X) & 0x80808080UL)
802 #define DUPC 0x01010101UL
803
bsf(uint32_t bits)804 static inline size_t bsf(uint32_t bits)
805 {
806 #ifdef __GNUC__
807 return __builtin_ctz(bits);
808 #else
809 size_t i=0;
810 if(!bits) return i;
811 if((bits & 0xffff) == 0) { i+=16; bits >>=16; }
812 if((bits & 0xff) == 0) i+=8;
813 return i;
814 #endif
815 }
816
817 #else
818 #define DETECTNULL(X) (((X) - 0x0101010101010101ULL) & ~(X) & 0x8080808080808080ULL)
819 #define DUPC 0x0101010101010101UL
820
bsf(uint64_t bits)821 static inline size_t bsf(uint64_t bits)
822 {
823 #ifdef __GNUC__
824 return __builtin_ctzll(bits);
825 #else
826 size_t i=0;
827 if(!bits) return i;
828 if((bits & 0xffffffff) == 0) { i+=32; bits >>=32; }
829 if((bits & 0xffff) == 0) { i+=16; bits >>=16; }
830 if((bits & 0xff) == 0) i+=8;
831 return i;
832 #endif
833 }
834 #endif
835
836 static inline char *
xmemchr(char * s,char i,int n)837 xmemchr(char *s, char i,int n)
838 {
839 unsigned char c = (unsigned char)i;
840
841 while(n > 0) {
842 if (n >= LBLOCKSIZE && !UNALIGNED (s)) {
843 unsigned long int mask;
844 mask = c * DUPC;
845
846 while (n >= LBLOCKSIZE) {
847 unsigned long int nc = DETECTNULL((*(unsigned long int *)s) ^ mask);
848 if(nc)
849 return s + (bsf(nc) >> 3);
850 s += LBLOCKSIZE;
851 n -= LBLOCKSIZE;
852 }
853 if(!n) return NULL;
854 }
855 if (*s == c) return s;
856 s++;
857 n--;
858 }
859 return NULL;
860 }
861
862
863 /******************************************************************************
864 * FUNCTION: node_find_next
865 * Find out the next node for a given Alpha to move. this function is used in
866 * the pre-processing stage in which edge array is not sorted. so it uses
867 * linear search.
868 ******************************************************************************/
node_find_next(AC_NODE_t * thiz,AC_ALPHABET_t alpha)869 static AC_NODE_t * node_find_next(AC_NODE_t * thiz, AC_ALPHABET_t alpha)
870 {
871 AC_ALPHABET_t *alphas, *fc;
872
873 if(thiz->one) return alpha == thiz->one_alpha ? (AC_NODE_t *)thiz->outgoing:NULL;
874 if(!thiz->outgoing) return NULL;
875
876 alphas = edge_get_alpha(thiz->outgoing);
877 fc = xmemchr((char *)alphas,(char)alpha,thiz->outgoing->degree);
878 return fc ? thiz->outgoing->next[fc-alphas] : NULL;
879 }
880
881
882 /******************************************************************************
883 * FUNCTION: node_findbs_next
884 * Find out the next node for a given Alpha. this function is used after the
885 * pre-processing stage in which we sort edges. so it uses Binary Search.
886 ******************************************************************************/
887
node_findbs_next(AC_NODE_t * thiz,uint8_t alpha)888 static inline AC_NODE_t *node_findbs_next (AC_NODE_t * thiz, uint8_t alpha)
889 {
890
891 if(thiz->one)
892 return alpha == thiz->one_alpha ? (AC_NODE_t *)thiz->outgoing:NULL;
893
894 if(!(thiz->outgoing->cmap[(uint8_t)alpha >> 5] & (1u << (alpha & 0x1f))))
895 return NULL;
896
897 if(thiz->range)
898 return thiz->outgoing->next[(uint8_t)alpha - (uint8_t)thiz->one_alpha];
899
900 return thiz->outgoing->next[
901 xmemchr((char *)thiz->a_ptr,(char)alpha,thiz->outgoing->degree) - thiz->a_ptr];
902 }
903
node_findbs_next_ac(AC_NODE_t * thiz,uint8_t alpha,int icase)904 static AC_NODE_t *node_findbs_next_ac (AC_NODE_t * thiz, uint8_t alpha,int icase) {
905 AC_NODE_t *next;
906 uint8_t alpha_c;
907
908 if(!thiz->outgoing) return NULL;
909
910 next = node_findbs_next(thiz,alpha);
911 if(next || !icase) return next;
912
913 alpha_c = aho_xc[alpha];
914 if(!alpha_c) return NULL;
915 return node_findbs_next(thiz, alpha ^ alpha_c);
916 }
917
918 /******************************************************************************
919 * FUNCTION: node_has_matchstr
920 * Determine if a final node contains a pattern in its accepted pattern list
921 * or not. return values: 1 = it has, 0 = it hasn't
922 ******************************************************************************/
node_has_matchstr(AC_NODE_t * thiz,AC_PATTERN_t * newstr)923 static int node_has_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * newstr)
924 {
925 int i;
926
927 if(!thiz->matched_patterns) return 0;
928
929 for (i=0; i < thiz->matched_patterns->num; i++)
930 {
931 AC_PATTERN_t *str = &(thiz->matched_patterns->patterns[i]);
932
933 if (str->length != newstr->length)
934 continue;
935
936 if(!memcmp(str->astring,newstr->astring,str->length))
937 return 1;
938 }
939
940 return 0;
941 }
942
943 /******************************************************************************
944 * FUNCTION: node_create_next
945 * Create the next node for the given alpha.
946 ******************************************************************************/
node_create_next(AC_NODE_t * thiz,AC_ALPHABET_t alpha)947 static AC_NODE_t * node_create_next (AC_NODE_t * thiz, AC_ALPHABET_t alpha)
948 {
949 AC_NODE_t * next;
950 next = node_find_next (thiz, alpha);
951 if (next)
952 /* The edge already exists */
953 return NULL;
954 /* Otherwise register new edge */
955 next = node_create ();
956 if(next) {
957 if(node_register_outgoing(thiz, next, alpha)) {
958 node_release(next,0);
959 return NULL;
960 }
961 next->depth = thiz->depth+1;
962 }
963
964 return next;
965 }
966
mp_data_size(int n)967 static inline size_t mp_data_size(int n) {
968 return sizeof(AC_PATTERNS_t) + n*sizeof(AC_PATTERN_t);
969 }
970
node_resize_mp(AC_PATTERNS_t * m)971 static AC_PATTERNS_t * node_resize_mp(AC_PATTERNS_t *m) {
972 AC_PATTERNS_t *new_m;
973
974 if(!m) {
975 m = acho_calloc(1,mp_data_size(REALLOC_CHUNK_MATCHSTR));
976 if(!m) return m;
977 m->max = REALLOC_CHUNK_MATCHSTR;
978 return m;
979 }
980 new_m = acho_malloc(mp_data_size(m->max+REALLOC_CHUNK_MATCHSTR));
981 if(!new_m) return new_m;
982 memcpy((char *)new_m,(char *)m,mp_data_size(m->max));
983 new_m->max += REALLOC_CHUNK_MATCHSTR;
984 acho_free(m);
985 return new_m;
986 }
987
988 /******************************************************************************
989 * FUNCTION: node_register_matchstr
990 * Adds the pattern to the list of accepted pattern.
991 ******************************************************************************/
node_register_matchstr(AC_NODE_t * thiz,AC_PATTERN_t * str,int is_existing)992 static int node_register_matchstr (AC_NODE_t * thiz, AC_PATTERN_t * str,int is_existing)
993 {
994 AC_PATTERN_t *l;
995
996 if(!is_existing)
997 thiz->final = 1;
998 /* Check if the new pattern already exists in the node list */
999 if (thiz->matched_patterns && node_has_matchstr(thiz, str))
1000 return 0;
1001
1002 if(!thiz->matched_patterns)
1003 thiz->matched_patterns = node_resize_mp(thiz->matched_patterns);
1004
1005 /* Manage memory */
1006 if (thiz->matched_patterns->num >= thiz->matched_patterns->max) {
1007 AC_PATTERNS_t *new_mp = node_resize_mp(thiz->matched_patterns);
1008 if(!new_mp) return 1;
1009 thiz->matched_patterns = new_mp;
1010 }
1011 l = &thiz->matched_patterns->patterns[thiz->matched_patterns->num];
1012 l->astring = str->astring;
1013 l->length = str->length;
1014 l->is_existing = is_existing;
1015 l->rep = str->rep;
1016 thiz->matched_patterns->num++;
1017 return 0;
1018 }
1019
node_resize_outgoing(struct edge * e,size_t added)1020 static struct edge *node_resize_outgoing(struct edge * e,size_t added) {
1021 struct edge *new_e;
1022 int ds;
1023
1024 if(!added) added = REALLOC_CHUNK_OUTGOING;
1025 if(!e) {
1026 e = acho_calloc(1,sizeof(struct edge) + edge_data_size(REALLOC_CHUNK_OUTGOING));
1027 if(!e) return e;
1028 e->max = REALLOC_CHUNK_OUTGOING;
1029 return e;
1030 }
1031 ds = edge_data_size(e->max + added);
1032 new_e = acho_calloc(1,sizeof(struct edge) + ds);
1033 if(!new_e) return new_e;
1034 memcpy(new_e,e,sizeof(struct edge) + sizeof(AC_NODE_t *)*e->max);
1035 new_e->max += added;
1036
1037 if(e->degree)
1038 memcpy(edge_get_alpha(new_e),edge_get_alpha(e),e->degree);
1039
1040 acho_free(e);
1041 return new_e;
1042 }
1043
1044 /******************************************************************************
1045 * FUNCTION: node_register_outgoing
1046 * Establish an edge between two nodes
1047 ******************************************************************************/
node_register_outgoing(AC_NODE_t * thiz,AC_NODE_t * next,AC_ALPHABET_t alpha)1048 static int node_register_outgoing
1049 (AC_NODE_t * thiz, AC_NODE_t * next, AC_ALPHABET_t alpha)
1050 {
1051 struct edge *o;
1052 if(!thiz->use) {
1053 thiz->use = 1;
1054 thiz->one = 1;
1055 thiz->one_alpha = alpha;
1056 thiz->outgoing = (struct edge *)next;
1057 return 0;
1058 }
1059 if(thiz->one) {
1060 o = node_resize_outgoing(NULL,0);
1061 if(!o) return 1;
1062 o->next[0] = (AC_NODE_t *)thiz->outgoing;
1063 *edge_get_alpha(o) = thiz->one_alpha;
1064 o->degree = 1;
1065 thiz->one = 0;
1066 thiz->one_alpha = 0;
1067 thiz->outgoing = o;
1068 } else
1069 o = thiz->outgoing;
1070
1071 if(!o) return 1;
1072
1073 if(o->degree >= o->max)
1074 {
1075 struct edge *new_o = node_resize_outgoing(thiz->outgoing,0);
1076 if(!new_o) return 1;
1077
1078 thiz->outgoing = new_o;
1079 o = new_o;
1080 }
1081 edge_get_alpha(o)[o->degree] = alpha;
1082 o->next[o->degree] = next;
1083 o->degree++;
1084 return 0;
1085 }
1086
1087 /******************************************************************************
1088 * FUNCTION: node_edge_compare
1089 * Comparison function for qsort. see man qsort.
1090 ******************************************************************************/
node_edge_compare(struct edge * e,int a,int b)1091 static int node_edge_compare (struct edge * e, int a, int b) {
1092 AC_ALPHABET_t *c = edge_get_alpha(e);
1093 return c[a] >= c[b] ? 1:0;
1094 }
1095
node_edge_swap(struct edge * e,int a,int b)1096 static void node_edge_swap (struct edge * e, int a, int b)
1097 {
1098 AC_ALPHABET_t *c,tc;
1099 AC_NODE_t *tn;
1100 c = edge_get_alpha(e);
1101 tc = c[a]; c[a] = c[b]; c[b] = tc;
1102 tn = e->next[a]; e->next[a] = e->next[b]; e->next[b] = tn;
1103 }
1104
1105 /******************************************************************************
1106 * FUNCTION: acho_2range
1107 * Adds missing characters in the range low - high
1108 ******************************************************************************/
acho_2range(AC_NODE_t * thiz,uint8_t low,uint8_t high)1109 static void acho_2range(AC_NODE_t * thiz,uint8_t low, uint8_t high) {
1110 struct edge *e;
1111 int i;
1112 uint8_t *c = (uint8_t *)edge_get_alpha(thiz->outgoing);
1113
1114 thiz->range = 1;
1115 thiz->one_alpha = (AC_ALPHABET_t)low;
1116 e = thiz->outgoing;
1117 for (i=0; low <= high && i < e->max; i++,low++) {
1118 if(e->cmap[(low >> 5) & 0x7] & (1u << (low & 0x1f))) continue;
1119 c[e->degree] = low;
1120 e->next[e->degree] = NULL;
1121 e->degree++;
1122 }
1123 }
1124
1125 /******************************************************************************
1126 * FUNCTION: node_range_edges
1127 * Converts to a range if possible.
1128 ******************************************************************************/
node_range_edges(AC_AUTOMATA_t * thiz,AC_NODE_t * node)1129 static int node_range_edges (AC_AUTOMATA_t *thiz, AC_NODE_t * node)
1130 {
1131 struct edge *e = node->outgoing;
1132 uint8_t *c = (uint8_t *)edge_get_alpha(node->outgoing);
1133 uint8_t low = 0xff,high = 0;
1134 int i;
1135
1136 memset((char *)&e->cmap,0,sizeof(e->cmap));
1137 for(i = 0; i < e->degree; i++) {
1138 uint8_t cc = c[i];
1139 if(cc < low) low = cc;
1140 if(cc > high) high = cc;
1141 e->cmap[(cc >> 5) & 0x7] |= 1u << (cc & 0x1f);
1142 }
1143 if(high - low + 1 == e->degree) {
1144 node->range = 1;
1145 node->one_alpha = (AC_ALPHABET_t)low;
1146 return 1;
1147 }
1148 if(high - low + 1 < e->max) {
1149 acho_2range(node,low,high);
1150 return 1;
1151 }
1152
1153 i = (high - low)/8;
1154 if (i < thiz->add_to_range) i = thiz->add_to_range;
1155 i += REALLOC_CHUNK_OUTGOING-1;
1156 i -= i % REALLOC_CHUNK_OUTGOING;
1157
1158 if(high - low + 1 < e->max + i || (node->root && !thiz->no_root_range)) {
1159 int added = (high - low + 1) - e->max;
1160 struct edge *new_o = node_resize_outgoing(node->outgoing,added);
1161 if(new_o) {
1162 node->outgoing = new_o;
1163 acho_2range(node,low,high);
1164 return 1;
1165 }
1166 return 0;
1167 }
1168
1169 return 0;
1170 }
1171 /******************************************************************************
1172 * FUNCTION: node_sort_edges
1173 * sorts edges alphabets.
1174 ******************************************************************************/
node_sort_edges(AC_NODE_t * thiz)1175 static inline void node_sort_edges (AC_NODE_t * thiz)
1176 {
1177
1178 acho_sort (thiz->outgoing, thiz->outgoing->degree,
1179 node_edge_compare, node_edge_swap);
1180 }
1181
1182 /**
1183 * sort - sort an array of elements
1184 * @base: pointer to data to sort
1185 * @num: number of elements
1186 * @size: size of each element
1187 * @cmp_func: pointer to comparison function
1188 * @swap_func: pointer to swap function or NULL
1189 *
1190 * This function does a heapsort on the given array. You may provide a
1191 * swap_func function optimized to your element type.
1192 *
1193 * Sorting time is O(n log n) both on average and worst-case. While
1194 * qsort is about 20% faster on average, it suffers from exploitable
1195 * O(n*n) worst-case behavior and extra memory requirements that make
1196 * it less suitable for kernel use.
1197 */
1198
acho_sort(struct edge * e,size_t num,int (* cmp_func)(struct edge * e,int a,int b),void (* swap_func)(struct edge * e,int a,int b))1199 void acho_sort(struct edge *e, size_t num,
1200 int (*cmp_func)(struct edge *e, int a, int b),
1201 void (*swap_func)(struct edge *e, int a, int b))
1202 {
1203 /* pre-scale counters for performance */
1204 int i = (num/2 - 1) , n = num, c, r;
1205
1206 if (!swap_func) return;
1207 if (!cmp_func) return;
1208
1209 /* heapify */
1210 for ( ; i >= 0; i -= 1) {
1211 for (r = i; r * 2 + 1 < n; r = c) {
1212 c = r * 2 + 1;
1213 if (c < n - 1 && cmp_func(e, c, c + 1) == 0)
1214 c += 1;
1215 if (cmp_func(e, r, c) != 0)
1216 break;
1217 swap_func(e, r, c);
1218 }
1219 }
1220
1221 /* sort */
1222 for (i = n - 1; i > 0; i -= 1) {
1223 swap_func(e,0,i);
1224 for (r = 0; r * 2 + 1 < i; r = c) {
1225 c = r * 2 + 1;
1226 if (c < i - 1 && cmp_func(e, c, c + 1) == 0)
1227 c += 1;
1228 if (cmp_func(e, r, c) != 0)
1229 break;
1230 swap_func(e, r, c);
1231 }
1232 }
1233 }
1234
1235 /* vim: set ts=4 sw=4 et : */
1236
1237