1 /*
2 * Fast String Matcher Substring Matching Implementation
3 *
4 * Copyright (c) 2015-2017 by Farsight Security, Inc.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #include "private.h"
20
21 /* \cond */
22 /*
23 * Calculating Aho-Corasick metadata efficiently involves a breadth-first
24 * trie traversal, which requires a queue of trie nodes to organize.
25 */
26 struct _fsmtrie_nodeq {
27 int size, head, tail;
28 fsmtrie_node_t **queue;
29 };
30 /* \endcond */
31
32 static bool
_fsmtrie_nodeq_init(struct _fsmtrie_nodeq * q,int size)33 _fsmtrie_nodeq_init(struct _fsmtrie_nodeq *q, int size)
34 {
35 q->queue = calloc(size, sizeof(fsmtrie_node_t *));
36 if (q->queue == NULL)
37 {
38 return false;
39 }
40 q->size = size;
41 q->head = q->tail = 0;
42 return true;
43 }
44
45 static void
_fsmtrie_nodeq_destroy(struct _fsmtrie_nodeq * q)46 _fsmtrie_nodeq_destroy(struct _fsmtrie_nodeq *q)
47 {
48 free(q->queue);
49 }
50
51 static bool
_fsmtrie_nodeq_empty(struct _fsmtrie_nodeq * q)52 _fsmtrie_nodeq_empty(struct _fsmtrie_nodeq *q)
53 {
54 return q->head == q->tail;
55 }
56
57 static bool
_fsmtrie_nodeq_enqueue(struct _fsmtrie_nodeq * q,fsmtrie_node_t * n)58 _fsmtrie_nodeq_enqueue(struct _fsmtrie_nodeq *q, fsmtrie_node_t *n) {
59 int next = (q->tail + 1) % q->size;
60 if (next == q->head) {
61 /* queue full */
62 return false;
63 }
64 q->queue[q->tail] = n;
65 q->tail = next;
66 return true;
67 }
68
69 static fsmtrie_node_t *
_fsmtrie_nodeq_dequeue(struct _fsmtrie_nodeq * q)70 _fsmtrie_nodeq_dequeue(struct _fsmtrie_nodeq *q) {
71 fsmtrie_node_t *n;
72 int next = (q->head + 1) % q->size;
73 if (q->head == q->tail)
74 return NULL;
75 n = q->queue[q->head];
76 q->head = next;
77 return n;
78 }
79
80 static void
_fsmtrie_ac_compile(struct fsmtrie * f)81 _fsmtrie_ac_compile(struct fsmtrie *f)
82 {
83 fsmtrie_node_t *node, *child, *suffix;
84 struct _fsmtrie_nodeq queue;
85 int c;
86
87 /*
88 * During the traversal, the queue will contain less than two
89 * levels of the trie. Each level of the trie contains at most
90 * a number of nodes equal to the leaf nodes (inserted strings)
91 * in the trie. This provides an upper bound for the queue length.
92 */
93 assert(_fsmtrie_nodeq_init(&queue, 2 * f->node_cnt));
94
95 /*
96 * The root node has no proper suffix. The single-character
97 * nodes have no nonempty proper suffixes, but the root node
98 * is their empty proper suffix.
99 */
100 f->root->suffix = NULL;
101 for (c = 0; c < f->nrnodes; c++)
102 {
103 if (f->root->nodes[c] == NULL)
104 continue;
105 f->root->nodes[c]->suffix = f->root;
106 assert(_fsmtrie_nodeq_enqueue(&queue, f->root->nodes[c]));
107 }
108
109 while (!_fsmtrie_nodeq_empty(&queue))
110 {
111 node = _fsmtrie_nodeq_dequeue(&queue);
112 assert(node != NULL);
113 for (c = 0; c < f->nrnodes; c++) {
114
115 if (node->nodes[c] == NULL)
116 continue;
117
118 child = node->nodes[c];
119 assert(_fsmtrie_nodeq_enqueue(&queue, child));
120
121 child->suffix = f->root;
122 if (child->type & FSMTRIE_NODE_LEAF)
123 child->type |= FSMTRIE_NODE_OUTPUT;
124 else
125 child->type &= ~FSMTRIE_NODE_OUTPUT;
126
127
128 /*
129 * Traverse the parent's suffixes to find the longest
130 * suffix for the child node.
131 */
132 for (suffix = node->suffix; suffix; suffix = suffix->suffix)
133 {
134 if (suffix->nodes[c] == NULL)
135 continue;
136 child->suffix = suffix->nodes[c];
137 if (child->suffix->type & FSMTRIE_NODE_OUTPUT)
138 child->type |= FSMTRIE_NODE_OUTPUT;
139 break;
140 }
141
142 }
143 }
144 _fsmtrie_nodeq_destroy(&queue);
145 f->flags |= FSMTRIE_AC_COMPILED;
146 }
147
148 int
fsmtrie_search_substring(struct fsmtrie * f,const char * str,void (* cb)(const char *,int,void *),void * cbdata)149 fsmtrie_search_substring(struct fsmtrie *f, const char *str,
150 void (*cb)(const char *, int, void *), void *cbdata)
151 {
152 fsmtrie_node_t *node, *next, *n;
153 const unsigned char *c;
154
155 if (f->mode == fsmtrie_mode_token)
156 {
157 snprintf(f->err_buf,
158 sizeof (f->err_buf),
159 "%s() is incompatible with %s mode fsmtrie",
160 __func__, _mode_to_str(f->mode));
161 return (-1);
162 }
163
164 assert(f->root);
165
166 if ((f->flags & FSMTRIE_AC_COMPILED) == 0)
167 _fsmtrie_ac_compile(f);
168
169 node = f->root;
170 for (c = (unsigned char *)str; *c; c++) {
171 next = node->nodes[(int)*c];
172
173 /*
174 * If our current path does not continue, walk the list of
175 * suffixes to find the next node. If no suffixes continue
176 * with the next character, restart at the root.
177 */
178 while (next == NULL)
179 {
180 node = node->suffix;
181 if (node == NULL)
182 next = f->root;
183 else
184 next = node->nodes[(int)*c];
185 }
186 node = next;
187
188 if (node->type & FSMTRIE_NODE_OUTPUT)
189 {
190 /*
191 * Traverse the suffix list. Any leaf node on this
192 * list is a match.
193 */
194 for (n = node; n; n = n->suffix)
195 {
196 if (n->type & FSMTRIE_NODE_LEAF) {
197 /*
198 * amoff is the offset in the subject
199 * string of the first character after
200 * the match. moff is the offset of
201 * the match string in the subject
202 * string.
203 */
204 int amoff = (int)(c - (unsigned char *)str) + 1;
205 int moff = amoff - strlen(n->str);
206 cb(n->str, moff, cbdata);
207 }
208 }
209 }
210 }
211 return (1);
212 }
213