1 /* $Id$ */
2 /*
3 ** Copyright (C) 2002-2009 Sourcefire, Inc.
4 ** Copyright (C) 1998-2002 Martin Roesch <roesch@sourcefire.com>
5
6 ** This program is free software; you can redistribute it and/or modify
7 ** it under the terms of the GNU General Public License Version 2 as
8 ** published by the Free Software Foundation. You may not use, modify or
9 ** distribute this program under any other version of the GNU General
10 ** Public License.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU General Public License for more details.
16 **
17 ** You should have received a copy of the GNU General Public License
18 ** along with this program; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */
21
22 /***************************************************************************
23 *
24 * File: MSTRING.C
25 *
26 * Purpose: Provide a variety of string functions not included in libc. Makes
27 * up for the fact that the libstdc++ is hard to get reference
28 * material on and I don't want to write any more non-portable c++
29 * code until I have solid references and libraries to use.
30 *
31 * History:
32 *
33 * Date: Author: Notes:
34 * ---------- ------- ----------------------------------------------
35 * 08/19/98 MFR Initial coding begun
36 * 03/06/99 MFR Added Boyer-Moore pattern match routine, don't use
37 * mContainsSubstr() any more if you don't have to
38 * 12/31/99 JGW Added a full Boyer-Moore implementation to increase
39 * performance. Added a case insensitive version of mSearch
40 * 07/24/01 MFR Fixed Regex pattern matcher introduced by Fyodor
41 *
42 **************************************************************************/
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <ctype.h>
51 #include <sys/types.h>
52
53 #include "mstring.h"
54 #include "debug.h"
55 #include "plugbase.h" /* needed for fasthex() */
56 #include "util.h"
57
58 static char * mSplitAddTok(const char *, const int, const char *, const char);
59
60 #ifdef TEST_MSTRING
61
main()62 int main()
63 {
64 char test[] = "\0\0\0\0\0\0\0\0\0CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\0\0";
65 char find[] = "CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\0\0";
66
67 /* char test[] = "\x90\x90\x90\x90\x90\x90\xe8\xc0\xff\xff\xff/bin/sh\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90";
68 char find[] = "\xe8\xc0\xff\xff\xff/bin/sh"; */
69 int i;
70 int toks;
71 int *shift;
72 int *skip;
73
74 /* shift=make_shift(find,sizeof(find)-1);
75 skip=make_skip(find,sizeof(find)-1); */
76
77 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"%d\n",
78 mSearch(test, sizeof(test) - 1, find,
79 sizeof(find) - 1, shift, skip)););
80
81 return 0;
82 }
83
84 #endif
85
86 /****************************************************************
87 *
88 * Function: mSplit()
89 *
90 * Purpose: Splits a string into tokens non-destructively.
91 *
92 * Parameters:
93 * char *
94 * The string to be split
95 * char *
96 * A string of token seperaters
97 * int
98 * The maximum number of tokens to be returned. A value
99 * of 0 means to get them all.
100 * int *
101 * Place to store the number of tokens returned
102 * char
103 * The "escape metacharacter", treat the character after
104 * this character as a literal and "escape" a seperator.
105 *
106 * Note if max_toks is reached, the last tok in the returned
107 * token array will possibly have separator characters in it.
108 *
109 * Returns:
110 * 2D char array with one token per "row" of the returned
111 * array.
112 *
113 ****************************************************************/
mSplit(const char * str,const char * sep_chars,const int max_toks,int * num_toks,const char meta_char)114 char ** mSplit(const char *str, const char *sep_chars, const int max_toks,
115 int *num_toks, const char meta_char)
116 {
117 size_t cur_tok = 0; /* current token index into array of strings */
118 size_t tok_start; /* index to start of token */
119 size_t i, j;
120 int escaped = 0;
121 /* It's rare we'll need more than this even if max_toks is set really
122 * high. Store toks here until finished, then allocate. If more than
123 * this is necessary, then allocate max toks */
124 char *toks_buf[TOKS_BUF_SIZE];
125 size_t toks_buf_size = TOKS_BUF_SIZE;
126 int toks_buf_size_increment = 10;
127 char **toks_alloc = NULL; /* Used if the static buf isn't enough */
128 char **toks = toks_buf; /* Pointer to one of the two above */
129 char **retstr;
130 char *whitespace = " \t";
131
132 if (num_toks == NULL)
133 return NULL;
134
135 *num_toks = 0;
136
137 if ((str == NULL) || (strlen(str) == 0) ||
138 ((sep_chars != NULL) && (strlen(sep_chars) == 0)))
139 {
140 return NULL;
141 }
142
143 if (sep_chars == NULL)
144 sep_chars = whitespace;
145
146 /* Meta char cannot also be a separator char */
147 for (i = 0; i < strlen(sep_chars); i++)
148 {
149 if (sep_chars[i] == meta_char)
150 return NULL;
151 }
152
153 /* Move past initial separator characters and whitespace */
154 for (i = 0; i < strlen(str); i++)
155 {
156 for (j = 0; j < strlen(sep_chars); j++)
157 {
158 if ((str[i] == sep_chars[j]) ||
159 isspace((int)str[i]))
160 {
161 break;
162 }
163 }
164
165 /* Not a separator character or whitespace */
166 if (j == strlen(sep_chars))
167 break;
168 }
169
170 if (i == strlen(str))
171 {
172 /* Nothing but separator characters or whitespace in string */
173 return NULL;
174 }
175
176 /* User only wanted one tok so return the rest of the string in
177 * one tok */
178 if ((cur_tok + 1) == (size_t)max_toks)
179 {
180 retstr = (char **)SnortAlloc(sizeof(char *));
181 retstr[cur_tok] = SnortStrndup(&str[i], strlen(str) - i);
182 if (retstr[cur_tok] == NULL)
183 {
184 mSplitFree(&retstr, cur_tok + 1);
185 return NULL;
186 }
187
188 *num_toks = cur_tok + 1;
189 return retstr;
190 }
191
192 /* Mark the beginning of the next tok */
193 tok_start = i;
194 for (; i < strlen(str); i++)
195 {
196 if (!escaped)
197 {
198 /* Got an escape character. Don't include it now, but
199 * must be a character after it. */
200 if (str[i] == meta_char)
201 {
202 escaped = 1;
203 continue;
204 }
205
206 /* See if the current character is a separator */
207 for (j = 0; j < strlen(sep_chars); j++)
208 {
209 if (str[i] == sep_chars[j])
210 break;
211 }
212
213 /* It's a normal character */
214 if (j == strlen(sep_chars))
215 continue;
216
217 /* Current character matched a separator character. Trim off
218 * whitespace previous to the separator. If we get here, there
219 * is at least one savable character */
220 for (j = i; j > tok_start; j--)
221 {
222 if (!isspace((int)str[j - 1]))
223 break;
224 }
225
226 /* Allocate a buffer. The length will not have included the
227 * meta char of escaped separators */
228 toks[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char);
229
230 /* Increment current token index */
231 cur_tok++;
232
233 /* Move past any more separator characters or whitespace */
234 for (; i < strlen(str); i++)
235 {
236 for (j = 0; j < strlen(sep_chars); j++)
237 {
238 if ((str[i] == sep_chars[j]) ||
239 isspace((int)str[i]))
240 {
241 break;
242 }
243 }
244
245 /* Not a separator character or whitespace */
246 if (j == strlen(sep_chars))
247 break;
248 }
249
250 /* Nothing but separator characters or whitespace left in the string */
251 if (i == strlen(str))
252 {
253 *num_toks = cur_tok;
254
255 if (toks != toks_alloc)
256 {
257 retstr = (char **)SnortAlloc(sizeof(char *) * cur_tok);
258 memcpy(retstr, toks, (sizeof(char *) * cur_tok));
259 }
260 else
261 {
262 retstr = toks;
263 }
264
265 return retstr;
266 }
267
268 /* Reached the size of our current string buffer and need to
269 * allocate something bigger. Only get here once if max toks
270 * set to something other than 0 because we'll just allocate
271 * max toks in that case. */
272 if (cur_tok == toks_buf_size)
273 {
274 char **tmp;
275
276 if (toks_alloc != NULL)
277 tmp = toks_alloc;
278 else
279 tmp = toks_buf;
280
281 if (max_toks != 0)
282 toks_buf_size = max_toks;
283 else
284 toks_buf_size = cur_tok + toks_buf_size_increment;
285
286 toks_alloc = (char **)SnortAlloc(sizeof(char *) * toks_buf_size);
287 memcpy(toks_alloc, tmp, (sizeof(char *) * cur_tok));
288 toks = toks_alloc;
289
290 if (tmp != toks_buf)
291 free(tmp);
292 }
293
294 if ((max_toks != 0) && ((cur_tok + 1) == (size_t)max_toks))
295 {
296 /* Return rest of string as last tok */
297 *num_toks = cur_tok + 1;
298
299 /* Already got a ret string */
300 if (toks != toks_alloc)
301 {
302 retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1));
303 memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1)));
304 }
305 else
306 {
307 retstr = toks;
308 }
309
310 /* Trim whitespace at end of last tok */
311 for (j = strlen(str); j > tok_start; j--)
312 {
313 if (!isspace((int)str[j - 1]))
314 break;
315 }
316
317 retstr[cur_tok] = SnortStrndup(&str[i], j - i);
318 if (retstr[cur_tok] == NULL)
319 {
320 mSplitFree(&retstr, cur_tok + 1);
321 return NULL;
322 }
323
324 return retstr;
325 }
326
327 tok_start = i;
328 }
329 else
330 {
331 /* This character is escaped with the meta char */
332 escaped = 0;
333 }
334 }
335
336 /* Last character was an escape character */
337 if (escaped)
338 {
339 for (i = 0; i < cur_tok; i++)
340 free(toks[i]);
341
342 if (toks == toks_alloc)
343 free(toks_alloc);
344
345 return NULL;
346 }
347
348 /* Trim whitespace at end of last tok */
349 for (j = i; j > tok_start; j--)
350 {
351 if (!isspace((int)str[j - 1]))
352 break;
353 }
354
355 /* Last character was not a separator character so we've got
356 * one more tok. Unescape escaped sepatator charactors */
357 if (toks != toks_alloc)
358 {
359 retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1));
360 memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1)));
361 }
362 else
363 {
364 retstr = toks;
365 }
366
367 retstr[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char);
368
369 /* Just add one to cur_tok index instead of incrementing
370 * since we're done */
371 *num_toks = cur_tok + 1;
372 return retstr;
373 }
374
375 /****************************************************************
376 *
377 * Function: mSplitSpecial()
378 *
379 * Purpose: Splits a string into tokens non-destructively. The token can be
380 * a multi-character token.
381 *
382 * Parameters:
383 * char *str => the string to be split
384 * char *sep => a string of token separator
385 * int max_toks => how many tokens should be returned
386 * int *toks => place to store the number of tokens found in str
387 * char meta => the "escape metacharacter", treat the character
388 * after this character as a literal and "escape" a
389 * seperator
390 *
391 * Returns:
392 * 2D char array with one token per "row" of the returned
393 * array.
394 *
395 ****************************************************************/
mSplitSpecial(char * str,const char * sep,int max_toks,int * toks,const char meta)396 char **mSplitSpecial(char *str, const char *sep, int max_toks,
397 int *toks, const char meta)
398 {
399 char **retstr; /* 2D array which is returned to caller */
400 char *idx; /* index pointer into str */
401 char *end; /* ptr to end of str */
402 char *last_match_idx; /* index pointer into str */
403 const char *sep_end;/* ptr to end of separator string */
404 const char *sep_idx;/* index ptr into separator string */
405 int len = 0; /* length of current token string */
406 int sep_len = 0; /* length of seperator string */
407 int curr_str = 0; /* current index into the 2D return array */
408 unsigned char last_char = 0xFF; /* initialize to something that won't be in meta */
409
410
411 int matched = 0;
412
413 if(!toks) return NULL;
414
415 *toks = 0;
416
417 if (!str || !*str) return NULL;
418
419 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
420 "[*] Splitting string: %s\n", str);
421 DebugMessage(DEBUG_PATTERN_MATCH, "curr_str = %d\n", curr_str););
422
423 /*
424 * find the ends of the respective passed strings so our while() loops
425 * know where to stop
426 */
427 sep_len = strlen(sep);
428 sep_end = sep + sep_len;
429 end = str + strlen(str);
430
431 /* remove trailing whitespace */
432 while(isspace((int) *(end - 1)) && ((end - 1) >= str))
433 *(--end) = '\0'; /* -1 because of NULL */
434
435 /* set our indexing pointers */
436 idx = str;
437
438 /*
439 * alloc space for the return string, this is where the pointers to the
440 * tokens will be stored
441 */
442 retstr = (char **) SnortAlloc( sizeof(char **) * max_toks );
443
444 max_toks--;
445
446 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
447 "max_toks = %d curr_str = %d\n",
448 max_toks, curr_str););
449
450 /* loop thru each letter in the string being tokenized */
451 while(idx < end)
452 {
453 /*
454 * if the current string-indexed char matches the initial
455 * seperator char...
456 */
457 if((*idx == *sep) && (last_char != meta))
458 {
459 /* assume we have a complete match and determine otherwise */
460 matched = 1;
461
462 /* check length of remaining string */
463 if (idx + sep_len < end)
464 {
465 /* we suspect a complete match and save pointer */
466 last_match_idx = idx;
467
468 /* check that the next chars match the remainder of the separator */
469 for ( sep_idx = sep; sep_idx < sep_end; sep_idx++, idx++ )
470 {
471 if (*idx != *sep_idx)
472 matched = 0;
473 }
474
475 /* reset the check pointer */
476 idx = last_match_idx;
477 }
478 else
479 matched = 0;
480
481 if ( matched )
482 {
483 /* if there's something to store... */
484 if(len > 0)
485 {
486 DEBUG_WRAP(
487 DebugMessage(DEBUG_PATTERN_MATCH,
488 "Allocating %d bytes for token ", len + 1););
489 if(curr_str <= max_toks)
490 {
491 /* allocate space for the new token */
492 retstr[curr_str] = (char *)SnortAlloc((len + 1) * sizeof(char));
493
494 /* copy the token into the return string array */
495 memcpy(retstr[curr_str], (idx - len), len);
496 retstr[curr_str][len] = 0;
497 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
498 "tok[%d]: %s\n", curr_str,
499 retstr[curr_str]););
500
501 /* twiddle the necessary pointers and vars */
502 len = 0;
503 curr_str++;
504 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
505 "curr_str = %d\n", curr_str);
506 DebugMessage(DEBUG_PATTERN_MATCH,
507 "max_toks = %d curr_str = %d\n",
508 max_toks, curr_str););
509
510 last_char = *idx;
511 idx+=sep_len;
512 }
513
514 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
515 "Checking if curr_str (%d) >= max_toks (%d)\n",
516 curr_str, max_toks););
517
518 /*
519 * if we've gotten all the tokens requested, return the
520 * list
521 */
522 if(curr_str >= max_toks)
523 {
524 while(isspace((int) *idx))
525 idx++;
526
527 len = end - idx;
528 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
529 "Finishing up...\n");
530 DebugMessage(DEBUG_PATTERN_MATCH,
531 "Allocating %d bytes "
532 "for last token ", len + 1););
533 fflush(stdout);
534
535 retstr[curr_str] = (char *)SnortAlloc((len + 1) * sizeof(char));
536
537 memcpy(retstr[curr_str], idx, len);
538 retstr[curr_str][len] = 0;
539
540 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
541 "tok[%d]: %s\n", curr_str,
542 retstr[curr_str]););
543
544 *toks = curr_str + 1;
545 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
546 "max_toks = %d curr_str = %d\n",
547 max_toks, curr_str);
548 DebugMessage(DEBUG_PATTERN_MATCH,
549 "mSplit got %d tokens!\n", *toks););
550
551 return retstr;
552 }
553 }
554 else
555 /*
556 * otherwise, the previous char was a seperator as well,
557 * and we should just continue
558 */
559 {
560 last_char = *idx;
561 idx+=sep_len;
562 /* make sure to reset this so we test all the sep. chars */
563 sep_idx = sep;
564 len = 0;
565 }
566 }
567 }
568
569 sep_idx = sep;
570 len++;
571 last_char = *idx;
572 idx++;
573 }
574
575 /* put the last string into the list */
576
577 if(len > 0)
578 {
579 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
580 "Allocating %d bytes for last token ", len + 1););
581
582 retstr[curr_str] = (char *)SnortAlloc((len + 1) * sizeof(char));
583
584 memcpy(retstr[curr_str], (idx - len), len);
585 retstr[curr_str][len] = 0;
586
587 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"tok[%d]: %s\n", curr_str,
588 retstr[curr_str]););
589 *toks = curr_str + 1;
590 }
591
592 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
593 "mSplitSpecial got %d tokens!\n", *toks););
594
595 /* return the token list */
596 return retstr;
597 }
598
599 /* Will not return NULL. SnortAlloc will fatal if it fails */
mSplitAddTok(const char * str,const int len,const char * sep_chars,const char meta_char)600 static char * mSplitAddTok(const char *str, const int len, const char *sep_chars, const char meta_char)
601 {
602 size_t i, j, k;
603 char *tok;
604 int tok_len = 0;
605 int got_meta = 0;
606
607 /* Get the length of the returned tok
608 * Could have a maximum token length and use a fixed sized array and
609 * fill it in as we go but don't want to put on that constraint */
610 for (i = 0; (int)i < len; i++)
611 {
612 if (!got_meta)
613 {
614 if (str[i] == meta_char)
615 {
616 got_meta = 1;
617 continue;
618 }
619 }
620 else
621 {
622 /* See if the current character is a separator */
623 for (j = 0; j < strlen(sep_chars); j++)
624 {
625 if (str[i] == sep_chars[j])
626 break;
627 }
628
629 /* It's a non-separator character, so include
630 * the meta character in the return tok */
631 if (j == strlen(sep_chars))
632 tok_len++;
633
634 got_meta = 0;
635 }
636
637 tok_len++;
638 }
639
640 /* Allocate it and fill it in */
641 tok = (char *)SnortAlloc(tok_len + 1);
642 for (i = 0, k = 0; (int)i < len; i++)
643 {
644 if (!got_meta)
645 {
646 if (str[i] == meta_char)
647 {
648 got_meta = 1;
649 continue;
650 }
651 }
652 else
653 {
654 /* See if the current character is a separator */
655 for (j = 0; j < strlen(sep_chars); j++)
656 {
657 if (str[i] == sep_chars[j])
658 break;
659 }
660
661 /* It's a non-separator character, so include
662 * the meta character in the return tok */
663 if (j == strlen(sep_chars))
664 tok[k++] = meta_char;
665
666 got_meta = 0;
667 }
668
669 tok[k++] = str[i];
670 }
671
672 return tok;
673 }
674
675 /****************************************************************
676 *
677 * Free the buffer allocated by mSplit().
678 *
679 * char** toks = NULL;
680 * int num_toks = 0;
681 * toks = (str, " ", 2, &num_toks, 0);
682 * mSplitFree(&toks, num_toks);
683 *
684 * At this point, toks is again NULL.
685 *
686 ****************************************************************/
mSplitFree(char *** pbuf,int num_toks)687 void mSplitFree(char ***pbuf, int num_toks)
688 {
689 int i;
690 char** buf; /* array of string pointers */
691
692 if( pbuf==NULL || *pbuf==NULL )
693 {
694 return;
695 }
696
697 buf = *pbuf;
698
699 for( i=0; i<num_toks; i++ )
700 {
701 if( buf[i] != NULL )
702 {
703 free( buf[i] );
704 buf[i] = NULL;
705 }
706 }
707
708 free(buf);
709 *pbuf = NULL;
710 }
711
712 /****************************************************************
713 *
714 * Function: mContainsSubstr(char *, int, char *, int)
715 *
716 * Purpose: Determines if a string contains a (non-regex)
717 * substring.
718 *
719 * Parameters:
720 * buf => data buffer we want to find the data in
721 * b_len => data buffer length
722 * pat => pattern to find
723 * p_len => length of the data in the pattern buffer
724 *
725 * Returns:
726 * Integer value, 1 on success (str constains substr), 0 on
727 * failure (substr not in str)
728 *
729 ****************************************************************/
mContainsSubstr(const char * buf,int b_len,const char * pat,int p_len)730 int mContainsSubstr(const char *buf, int b_len, const char *pat, int p_len)
731 {
732 const char *b_idx; /* index ptr into the data buffer */
733 const char *p_idx; /* index ptr into the pattern buffer */
734 const char *b_end; /* ptr to the end of the data buffer */
735 int m_cnt = 0; /* number of pattern matches so far... */
736 #ifdef DEBUG
737 unsigned long loopcnt = 0;
738 #endif
739
740 /* mark the end of the strs */
741 b_end = (char *) (buf + b_len);
742
743 /* init the index ptrs */
744 b_idx = buf;
745 p_idx = pat;
746
747 do
748 {
749 #ifdef DEBUG
750 loopcnt++;
751 #endif
752
753 if(*p_idx == *b_idx)
754 {
755
756 if(m_cnt == (p_len - 1))
757 {
758 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
759 "\n%ld compares for match\n", loopcnt););
760 return 1;
761 }
762 m_cnt++;
763 b_idx++;
764 p_idx++;
765 }
766 else
767 {
768 if(m_cnt == 0)
769 {
770 b_idx++;
771 }
772 else
773 {
774 b_idx = b_idx - (m_cnt - 1);
775 }
776
777 p_idx = pat;
778
779 m_cnt = 0;
780 }
781
782 } while(b_idx < b_end);
783
784
785 /* if we make it here we didn't find what we were looking for */
786 return 0;
787 }
788
789
790
791
792 /****************************************************************
793 *
794 * Function: make_skip(char *, int)
795 *
796 * Purpose: Create a Boyer-Moore skip table for a given pattern
797 *
798 * Parameters:
799 * ptrn => pattern
800 * plen => length of the data in the pattern buffer
801 *
802 * Returns:
803 * int * - the skip table
804 *
805 ****************************************************************/
make_skip(char * ptrn,int plen)806 int *make_skip(char *ptrn, int plen)
807 {
808 int i;
809 int *skip = (int *) SnortAlloc(256* sizeof(int));
810
811 for ( i = 0; i < 256; i++ )
812 skip[i] = plen + 1;
813
814 while(plen != 0)
815 skip[(unsigned char) *ptrn++] = plen--;
816
817 return skip;
818 }
819
820
821
822 /****************************************************************
823 *
824 * Function: make_shift(char *, int)
825 *
826 * Purpose: Create a Boyer-Moore shift table for a given pattern
827 *
828 * Parameters:
829 * ptrn => pattern
830 * plen => length of the data in the pattern buffer
831 *
832 * Returns:
833 * int * - the shift table
834 *
835 ****************************************************************/
make_shift(char * ptrn,int plen)836 int *make_shift(char *ptrn, int plen)
837 {
838 int *shift = (int *) SnortAlloc(plen * sizeof(int));
839 int *sptr = shift + plen - 1;
840 char *pptr = ptrn + plen - 1;
841 char c;
842
843 c = ptrn[plen - 1];
844
845 *sptr = 1;
846
847 while(sptr-- != shift)
848 {
849 char *p1 = ptrn + plen - 2, *p2, *p3;
850
851 do
852 {
853 while(p1 >= ptrn && *p1-- != c);
854
855 p2 = ptrn + plen - 2;
856 p3 = p1;
857
858 while(p3 >= ptrn && *p3-- == *p2-- && p2 >= pptr);
859 }
860 while(p3 >= ptrn && p2 >= pptr);
861
862 *sptr = shift + plen - sptr + p2 - p3;
863
864 pptr--;
865 }
866
867 return shift;
868 }
869
870
871
872 /****************************************************************
873 *
874 * Function: mSearch(char *, int, char *, int)
875 *
876 * Purpose: Determines if a string contains a (non-regex)
877 * substring.
878 *
879 * Parameters:
880 * buf => data buffer we want to find the data in
881 * blen => data buffer length
882 * ptrn => pattern to find
883 * plen => length of the data in the pattern buffer
884 * skip => the B-M skip array
885 * shift => the B-M shift array
886 *
887 * Returns:
888 * Integer value, 1 on success (str constains substr), 0 on
889 * failure (substr not in str)
890 *
891 ****************************************************************/
mSearch(const char * buf,int blen,const char * ptrn,int plen,int * skip,int * shift)892 int mSearch(const char *buf, int blen, const char *ptrn, int plen, int *skip, int *shift)
893 {
894 int b_idx = plen;
895
896 #ifdef DEBUG
897 char *hexbuf;
898 int cmpcnt = 0;
899 #endif
900
901 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"buf: %p blen: %d ptrn: %p "
902 "plen: %d\n", buf, blen, ptrn, plen););
903
904 #ifdef DEBUG
905 hexbuf = fasthex((const u_char *)buf, blen);
906 DebugMessage(DEBUG_PATTERN_MATCH,"buf: %s\n", hexbuf);
907 free(hexbuf);
908 hexbuf = fasthex((const u_char *)ptrn, plen);
909 DebugMessage(DEBUG_PATTERN_MATCH,"ptrn: %s\n", hexbuf);
910 free(hexbuf);
911 DebugMessage(DEBUG_PATTERN_MATCH,"buf: %p blen: %d ptrn: %p "
912 "plen: %d\n", buf, blen, ptrn, plen);
913 #endif /* DEBUG */
914 if(plen == 0)
915 return 1;
916
917 while(b_idx <= blen)
918 {
919 int p_idx = plen, skip_stride, shift_stride;
920
921 while(buf[--b_idx] == ptrn[--p_idx])
922 {
923 #ifdef DEBUG
924 cmpcnt++;
925 #endif
926 if(b_idx < 0)
927 return 0;
928
929 if(p_idx == 0)
930 {
931 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
932 "match: compares = %d.\n", cmpcnt););
933
934 return 1;
935 }
936 }
937
938 skip_stride = skip[(unsigned char) buf[b_idx]];
939 shift_stride = shift[p_idx];
940
941 b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride;
942 }
943
944 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
945 "no match: compares = %d.\n", cmpcnt););
946
947 return 0;
948 }
949
950
951
952 /****************************************************************
953 *
954 * Function: mSearchCI(char *, int, char *, int)
955 *
956 * Purpose: Determines if a string contains a (non-regex)
957 * substring matching is case insensitive
958 *
959 * Parameters:
960 * buf => data buffer we want to find the data in
961 * blen => data buffer length
962 * ptrn => pattern to find
963 * plen => length of the data in the pattern buffer
964 * skip => the B-M skip array
965 * shift => the B-M shift array
966 *
967 * Returns:
968 * Integer value, 1 on success (str constains substr), 0 on
969 * failure (substr not in str)
970 *
971 ****************************************************************/
mSearchCI(const char * buf,int blen,const char * ptrn,int plen,int * skip,int * shift)972 int mSearchCI(const char *buf, int blen, const char *ptrn, int plen, int *skip, int *shift)
973 {
974 int b_idx = plen;
975 #ifdef DEBUG
976 int cmpcnt = 0;
977 #endif
978
979 if(plen == 0)
980 return 1;
981
982 while(b_idx <= blen)
983 {
984 int p_idx = plen, skip_stride, shift_stride;
985
986 while((unsigned char) ptrn[--p_idx] ==
987 toupper((unsigned char) buf[--b_idx]))
988 {
989 #ifdef DEBUG
990 cmpcnt++;
991 #endif
992 if(p_idx == 0)
993 {
994 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
995 "match: compares = %d.\n",
996 cmpcnt););
997 return 1;
998 }
999 }
1000
1001 skip_stride = skip[toupper((unsigned char) buf[b_idx])];
1002 shift_stride = shift[p_idx];
1003
1004 b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride;
1005 }
1006
1007 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "no match: compares = %d.\n", cmpcnt););
1008
1009 return 0;
1010 }
1011
1012
1013 /****************************************************************
1014 *
1015 * Function: mSearchREG(char *, int, char *, int)
1016 *
1017 * Purpose: Determines if a string contains a (regex)
1018 * substring.
1019 *
1020 * Parameters:
1021 * buf => data buffer we want to find the data in
1022 * blen => data buffer length
1023 * ptrn => pattern to find
1024 * plen => length of the data in the pattern buffer
1025 * skip => the B-M skip array
1026 * shift => the B-M shift array
1027 *
1028 * Returns:
1029 * Integer value, 1 on success (str constains substr), 0 on
1030 * failure (substr not in str)
1031 *
1032 ****************************************************************/
mSearchREG(const char * buf,int blen,const char * ptrn,int plen,int * skip,int * shift)1033 int mSearchREG(const char *buf, int blen, const char *ptrn, int plen, int *skip, int *shift)
1034 {
1035 int b_idx = plen;
1036 int literal = 0;
1037 int regexcomp = 0;
1038 #ifdef DEBUG
1039 int cmpcnt = 0;
1040 #endif /*DEBUG*/
1041
1042 DEBUG_WRAP(
1043 DebugMessage(DEBUG_PATTERN_MATCH, "buf: %p blen: %d ptrn: %p "
1044 " plen: %d b_idx: %d\n", buf, blen, ptrn, plen, b_idx);
1045 DebugMessage(DEBUG_PATTERN_MATCH, "packet data: \"%s\"\n", buf);
1046 DebugMessage(DEBUG_PATTERN_MATCH, "matching for \"%s\"\n", ptrn);
1047 );
1048
1049 if(plen == 0)
1050 return 1;
1051
1052 while(b_idx <= blen)
1053 {
1054 int p_idx = plen, skip_stride, shift_stride;
1055
1056 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "Looping... "
1057 "([%d]0x%X (%c) -> [%d]0x%X(%c))\n",
1058 b_idx, buf[b_idx-1],
1059 buf[b_idx-1],
1060 p_idx, ptrn[p_idx-1], ptrn[p_idx-1]););
1061
1062 while(buf[--b_idx] == ptrn[--p_idx]
1063 || (ptrn[p_idx] == '?' && !literal)
1064 || (ptrn[p_idx] == '*' && !literal)
1065 || (ptrn[p_idx] == '\\' && !literal))
1066 {
1067 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "comparing: b:%c -> p:%c\n",
1068 buf[b_idx], ptrn[p_idx]););
1069 #ifdef DEBUG
1070 cmpcnt++;
1071 #endif
1072
1073 if(literal)
1074 literal = 0;
1075 if(!literal && ptrn[p_idx] == '\\')
1076 literal = 1;
1077 if(ptrn[p_idx] == '*')
1078 {
1079 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"Checking wildcard matching...\n"););
1080 while(p_idx != 0 && ptrn[--p_idx] == '*'); /* fool-proof */
1081
1082 while(buf[--b_idx] != ptrn[p_idx])
1083 {
1084 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "comparing: b[%d]:%c -> p[%d]:%c\n",
1085 b_idx, buf[b_idx], p_idx, ptrn[p_idx]););
1086
1087 regexcomp++;
1088 if(b_idx == 0)
1089 {
1090 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
1091 "b_idx went to 0, returning 0\n");)
1092 return 0;
1093 }
1094 }
1095
1096 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "got wildcard final char match! (b[%d]: %c -> p[%d]: %c\n", b_idx, buf[b_idx], p_idx, ptrn[p_idx]););
1097 }
1098
1099 if(p_idx == 0)
1100 {
1101 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "match: compares = %d.\n",
1102 cmpcnt););
1103 return 1;
1104 }
1105
1106 if(b_idx == 0)
1107 break;
1108 }
1109
1110 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "skip-shifting...\n"););
1111 skip_stride = skip[(unsigned char) buf[b_idx]];
1112 shift_stride = shift[p_idx];
1113
1114 b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride;
1115 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "b_idx skip-shifted to %d\n", b_idx););
1116 b_idx += regexcomp;
1117 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
1118 "b_idx regex compensated %d steps, to %d\n", regexcomp, b_idx););
1119 regexcomp = 0;
1120 }
1121
1122 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "no match: compares = %d, b_idx = %d, "
1123 "blen = %d\n", cmpcnt, b_idx, blen););
1124
1125 return 0;
1126 }
1127
1128