1 /*
2  * Fast String Matcher Substring Matching Implementation
3  *
4  *  Copyright (c) 2015-2017 by Farsight Security, Inc.
5  *
6  *  Licensed under the Apache License, Version 2.0 (the "License");
7  *  you may not use this file except in compliance with the License.
8  *  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  *  Unless required by applicable law or agreed to in writing, software
13  *  distributed under the License is distributed on an "AS IS" BASIS,
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  *  See the License for the specific language governing permissions and
16  *  limitations under the License.
17  */
18 
19 #include "private.h"
20 
21 /* \cond */
22 /*
23  * Calculating Aho-Corasick metadata efficiently involves a breadth-first
24  * trie traversal, which requires a queue of trie nodes to organize.
25  */
26 struct _fsmtrie_nodeq {
27         int size, head, tail;
28         fsmtrie_node_t **queue;
29 };
30 /* \endcond */
31 
32 static bool
_fsmtrie_nodeq_init(struct _fsmtrie_nodeq * q,int size)33 _fsmtrie_nodeq_init(struct _fsmtrie_nodeq *q, int size)
34 {
35         q->queue = calloc(size, sizeof(fsmtrie_node_t *));
36         if (q->queue == NULL)
37         {
38                 return false;
39         }
40         q->size = size;
41         q->head = q->tail = 0;
42         return true;
43 }
44 
45 static void
_fsmtrie_nodeq_destroy(struct _fsmtrie_nodeq * q)46 _fsmtrie_nodeq_destroy(struct _fsmtrie_nodeq *q)
47 {
48         free(q->queue);
49 }
50 
51 static bool
_fsmtrie_nodeq_empty(struct _fsmtrie_nodeq * q)52 _fsmtrie_nodeq_empty(struct _fsmtrie_nodeq *q)
53 {
54         return q->head == q->tail;
55 }
56 
57 static bool
_fsmtrie_nodeq_enqueue(struct _fsmtrie_nodeq * q,fsmtrie_node_t * n)58 _fsmtrie_nodeq_enqueue(struct _fsmtrie_nodeq *q, fsmtrie_node_t *n) {
59         int next = (q->tail + 1) % q->size;
60         if (next == q->head) {
61                 /* queue full */
62                 return false;
63         }
64         q->queue[q->tail] = n;
65         q->tail = next;
66         return true;
67 }
68 
69 static fsmtrie_node_t *
_fsmtrie_nodeq_dequeue(struct _fsmtrie_nodeq * q)70 _fsmtrie_nodeq_dequeue(struct _fsmtrie_nodeq *q) {
71         fsmtrie_node_t *n;
72         int next = (q->head + 1) % q->size;
73         if (q->head == q->tail)
74                 return NULL;
75         n = q->queue[q->head];
76         q->head = next;
77         return n;
78 }
79 
80 static void
_fsmtrie_ac_compile(struct fsmtrie * f)81 _fsmtrie_ac_compile(struct fsmtrie *f)
82 {
83         fsmtrie_node_t *node, *child, *suffix;
84         struct _fsmtrie_nodeq queue;
85         int c;
86 
87         /*
88          * During the traversal, the queue will contain less than two
89          * levels of the trie. Each level of the trie contains at most
90          * a number of nodes equal to the leaf nodes (inserted strings)
91          * in the trie. This provides an upper bound for the queue length.
92          */
93         assert(_fsmtrie_nodeq_init(&queue, 2 * f->node_cnt));
94 
95         /*
96          * The root node has no proper suffix. The single-character
97          * nodes have no nonempty proper suffixes, but the root node
98          * is their empty proper suffix.
99          */
100         f->root->suffix = NULL;
101         for (c = 0; c < f->nrnodes; c++)
102         {
103                 if (f->root->nodes[c] == NULL)
104                         continue;
105                 f->root->nodes[c]->suffix = f->root;
106                 assert(_fsmtrie_nodeq_enqueue(&queue, f->root->nodes[c]));
107         }
108 
109         while (!_fsmtrie_nodeq_empty(&queue))
110         {
111                 node = _fsmtrie_nodeq_dequeue(&queue);
112                 assert(node != NULL);
113                 for (c = 0; c < f->nrnodes; c++) {
114 
115                         if (node->nodes[c] == NULL)
116                                 continue;
117 
118                         child = node->nodes[c];
119                         assert(_fsmtrie_nodeq_enqueue(&queue, child));
120 
121                         child->suffix = f->root;
122                         if (child->type & FSMTRIE_NODE_LEAF)
123                                 child->type |= FSMTRIE_NODE_OUTPUT;
124                         else
125                                 child->type &= ~FSMTRIE_NODE_OUTPUT;
126 
127 
128                         /*
129                          *  Traverse the parent's suffixes to find the longest
130                          *  suffix for the child node.
131                          */
132                         for (suffix = node->suffix; suffix; suffix = suffix->suffix)
133                         {
134                                 if (suffix->nodes[c] == NULL)
135                                         continue;
136                                 child->suffix = suffix->nodes[c];
137                                 if (child->suffix->type & FSMTRIE_NODE_OUTPUT)
138                                         child->type |= FSMTRIE_NODE_OUTPUT;
139                                 break;
140                         }
141 
142                 }
143         }
144         _fsmtrie_nodeq_destroy(&queue);
145         f->flags |= FSMTRIE_AC_COMPILED;
146 }
147 
148 int
fsmtrie_search_substring(struct fsmtrie * f,const char * str,void (* cb)(const char *,int,void *),void * cbdata)149 fsmtrie_search_substring(struct fsmtrie *f, const char *str,
150                         void (*cb)(const char *, int, void *), void *cbdata)
151 {
152         fsmtrie_node_t *node, *next, *n;
153         const unsigned char *c;
154 
155 	if (f->mode == fsmtrie_mode_token)
156 	{
157 		snprintf(f->err_buf,
158 			sizeof (f->err_buf),
159 			"%s() is incompatible with %s mode fsmtrie",
160 			__func__, _mode_to_str(f->mode));
161 		return (-1);
162 	}
163 
164         assert(f->root);
165 
166         if ((f->flags & FSMTRIE_AC_COMPILED) == 0)
167                 _fsmtrie_ac_compile(f);
168 
169         node = f->root;
170         for (c = (unsigned char *)str; *c; c++) {
171                 next = node->nodes[(int)*c];
172 
173                 /*
174                  * If our current path does not continue, walk the list of
175                  * suffixes to find the next node. If no suffixes continue
176                  * with the next character, restart at the root.
177                  */
178                 while (next == NULL)
179                 {
180                         node = node->suffix;
181                         if (node == NULL)
182                                 next = f->root;
183                         else
184                                 next = node->nodes[(int)*c];
185                 }
186                 node = next;
187 
188                 if (node->type & FSMTRIE_NODE_OUTPUT)
189                 {
190                         /*
191                          *  Traverse the suffix list. Any leaf node on this
192                          *  list is a match.
193                          */
194                         for (n = node; n; n = n->suffix)
195                         {
196                                 if (n->type & FSMTRIE_NODE_LEAF) {
197 					/*
198 					 * amoff is the offset in the subject
199 					 * string of the first character after
200 					 * the match. moff is the offset of
201 					 * the match string in the subject
202 					 * string.
203 					 */
204 					int amoff = (int)(c - (unsigned char *)str) + 1;
205 					int moff = amoff - strlen(n->str);
206                                         cb(n->str, moff, cbdata);
207 				}
208                         }
209                 }
210         }
211 	return (1);
212 }
213