1 /* $Id$ */
2 /*
3 ** Copyright (C) 2002-2009 Sourcefire, Inc.
4 ** Copyright (C) 1998-2002 Martin Roesch <roesch@sourcefire.com>
5 
6 ** This program is free software; you can redistribute it and/or modify
7 ** it under the terms of the GNU General Public License Version 2 as
8 ** published by the Free Software Foundation.  You may not use, modify or
9 ** distribute this program under any other version of the GNU General
10 ** Public License.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 ** GNU General Public License for more details.
16 **
17 ** You should have received a copy of the GNU General Public License
18 ** along with this program; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */
21 
22 /***************************************************************************
23  *
24  * File: MSTRING.C
25  *
26  * Purpose: Provide a variety of string functions not included in libc.  Makes
27  *          up for the fact that the libstdc++ is hard to get reference
28  *          material on and I don't want to write any more non-portable c++
29  *          code until I have solid references and libraries to use.
30  *
31  * History:
32  *
33  * Date:      Author:  Notes:
34  * ---------- ------- ----------------------------------------------
35  *  08/19/98    MFR    Initial coding begun
36  *  03/06/99    MFR    Added Boyer-Moore pattern match routine, don't use
37  *                     mContainsSubstr() any more if you don't have to
38  *  12/31/99	JGW    Added a full Boyer-Moore implementation to increase
39  *                     performance. Added a case insensitive version of mSearch
40  *  07/24/01    MFR    Fixed Regex pattern matcher introduced by Fyodor
41  *
42  **************************************************************************/
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <ctype.h>
51 #include <sys/types.h>
52 
53 #include "mstring.h"
54 #include "debug.h"
55 #include "plugbase.h" /* needed for fasthex() */
56 #include "util.h"
57 
58 static char * mSplitAddTok(const char *, const int, const char *, const char);
59 
60 #ifdef TEST_MSTRING
61 
main()62 int main()
63 {
64     char test[] = "\0\0\0\0\0\0\0\0\0CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\0\0";
65     char find[] = "CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\0\0";
66 
67 /*   char test[] = "\x90\x90\x90\x90\x90\x90\xe8\xc0\xff\xff\xff/bin/sh\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90";
68      char find[] = "\xe8\xc0\xff\xff\xff/bin/sh";  */
69     int i;
70     int toks;
71     int *shift;
72     int *skip;
73 
74 /*   shift=make_shift(find,sizeof(find)-1);
75      skip=make_skip(find,sizeof(find)-1); */
76 
77     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"%d\n",
78 			    mSearch(test, sizeof(test) - 1, find,
79 				    sizeof(find) - 1, shift, skip)););
80 
81     return 0;
82 }
83 
84 #endif
85 
86 /****************************************************************
87  *
88  * Function: mSplit()
89  *
90  * Purpose: Splits a string into tokens non-destructively.
91  *
92  * Parameters:
93  *  char *
94  *      The string to be split
95  *  char *
96  *      A string of token seperaters
97  *  int
98  *      The maximum number of tokens to be returned. A value
99  *      of 0 means to get them all.
100  *  int *
101  *      Place to store the number of tokens returned
102  *  char
103  *      The "escape metacharacter", treat the character after
104  *      this character as a literal and "escape" a seperator.
105  *
106  *  Note if max_toks is reached, the last tok in the returned
107  *  token array will possibly have separator characters in it.
108  *
109  *  Returns:
110  *      2D char array with one token per "row" of the returned
111  *      array.
112  *
113  ****************************************************************/
mSplit(const char * str,const char * sep_chars,const int max_toks,int * num_toks,const char meta_char)114 char ** mSplit(const char *str, const char *sep_chars, const int max_toks,
115                int *num_toks, const char meta_char)
116 {
117     size_t cur_tok = 0;  /* current token index into array of strings */
118     size_t tok_start;    /* index to start of token */
119     size_t i, j;
120     int escaped = 0;
121     /* It's rare we'll need more than this even if max_toks is set really
122      * high.  Store toks here until finished, then allocate.  If more than
123      * this is necessary, then allocate max toks */
124     char *toks_buf[TOKS_BUF_SIZE];
125     size_t toks_buf_size = TOKS_BUF_SIZE;
126     int toks_buf_size_increment = 10;
127     char **toks_alloc = NULL;   /* Used if the static buf isn't enough */
128     char **toks = toks_buf;     /* Pointer to one of the two above */
129     char **retstr;
130     char *whitespace = " \t";
131 
132     if (num_toks == NULL)
133         return NULL;
134 
135     *num_toks = 0;
136 
137     if ((str == NULL) || (strlen(str) == 0) ||
138         ((sep_chars != NULL) && (strlen(sep_chars) == 0)))
139     {
140         return NULL;
141     }
142 
143     if (sep_chars == NULL)
144         sep_chars = whitespace;
145 
146     /* Meta char cannot also be a separator char */
147     for (i = 0; i < strlen(sep_chars); i++)
148     {
149         if (sep_chars[i] == meta_char)
150             return NULL;
151     }
152 
153     /* Move past initial separator characters and whitespace */
154     for (i = 0; i < strlen(str); i++)
155     {
156         for (j = 0; j < strlen(sep_chars); j++)
157         {
158             if ((str[i] == sep_chars[j]) ||
159                 isspace((int)str[i]))
160             {
161                 break;
162             }
163         }
164 
165         /* Not a separator character or whitespace */
166         if (j == strlen(sep_chars))
167             break;
168     }
169 
170     if (i == strlen(str))
171     {
172         /* Nothing but separator characters or whitespace in string */
173         return NULL;
174     }
175 
176     /* User only wanted one tok so return the rest of the string in
177      * one tok */
178     if ((cur_tok + 1) == (size_t)max_toks)
179     {
180         retstr = (char **)SnortAlloc(sizeof(char *));
181         retstr[cur_tok] = SnortStrndup(&str[i], strlen(str) - i);
182         if (retstr[cur_tok] == NULL)
183         {
184             mSplitFree(&retstr, cur_tok + 1);
185             return NULL;
186         }
187 
188         *num_toks = cur_tok + 1;
189         return retstr;
190     }
191 
192     /* Mark the beginning of the next tok */
193     tok_start = i;
194     for (; i < strlen(str); i++)
195     {
196         if (!escaped)
197         {
198             /* Got an escape character.  Don't include it now, but
199              * must be a character after it. */
200             if (str[i] == meta_char)
201             {
202                 escaped = 1;
203                 continue;
204             }
205 
206             /* See if the current character is a separator */
207             for (j = 0; j < strlen(sep_chars); j++)
208             {
209                 if (str[i] == sep_chars[j])
210                     break;
211             }
212 
213             /* It's a normal character */
214             if (j == strlen(sep_chars))
215                 continue;
216 
217             /* Current character matched a separator character.  Trim off
218              * whitespace previous to the separator.  If we get here, there
219              * is at least one savable character */
220             for (j = i; j > tok_start; j--)
221             {
222                 if (!isspace((int)str[j - 1]))
223                     break;
224             }
225 
226             /* Allocate a buffer.  The length will not have included the
227              * meta char of escaped separators */
228             toks[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char);
229 
230             /* Increment current token index */
231             cur_tok++;
232 
233             /* Move past any more separator characters or whitespace */
234             for (; i < strlen(str); i++)
235             {
236                 for (j = 0; j < strlen(sep_chars); j++)
237                 {
238                     if ((str[i] == sep_chars[j]) ||
239                         isspace((int)str[i]))
240                     {
241                         break;
242                     }
243                 }
244 
245                 /* Not a separator character or whitespace */
246                 if (j == strlen(sep_chars))
247                     break;
248             }
249 
250             /* Nothing but separator characters or whitespace left in the string */
251             if (i == strlen(str))
252             {
253                 *num_toks = cur_tok;
254 
255                 if (toks != toks_alloc)
256                 {
257                     retstr = (char **)SnortAlloc(sizeof(char *) * cur_tok);
258                     memcpy(retstr, toks, (sizeof(char *) * cur_tok));
259                 }
260                 else
261                 {
262                     retstr = toks;
263                 }
264 
265                 return retstr;
266             }
267 
268             /* Reached the size of our current string buffer and need to
269              * allocate something bigger.  Only get here once if max toks
270              * set to something other than 0 because we'll just allocate
271              * max toks in that case. */
272             if (cur_tok == toks_buf_size)
273             {
274                 char **tmp;
275 
276                 if (toks_alloc != NULL)
277                     tmp = toks_alloc;
278                 else
279                     tmp = toks_buf;
280 
281                 if (max_toks != 0)
282                     toks_buf_size = max_toks;
283                 else
284                     toks_buf_size = cur_tok + toks_buf_size_increment;
285 
286                 toks_alloc = (char **)SnortAlloc(sizeof(char *) * toks_buf_size);
287                 memcpy(toks_alloc, tmp, (sizeof(char *) * cur_tok));
288                 toks = toks_alloc;
289 
290                 if (tmp != toks_buf)
291                     free(tmp);
292             }
293 
294             if ((max_toks != 0) && ((cur_tok + 1) == (size_t)max_toks))
295             {
296                 /* Return rest of string as last tok */
297                 *num_toks = cur_tok + 1;
298 
299                 /* Already got a ret string */
300                 if (toks != toks_alloc)
301                 {
302                     retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1));
303                     memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1)));
304                 }
305                 else
306                 {
307                     retstr = toks;
308                 }
309 
310                 /* Trim whitespace at end of last tok */
311                 for (j = strlen(str); j > tok_start; j--)
312                 {
313                     if (!isspace((int)str[j - 1]))
314                         break;
315                 }
316 
317                 retstr[cur_tok] = SnortStrndup(&str[i], j - i);
318                 if (retstr[cur_tok] == NULL)
319                 {
320                     mSplitFree(&retstr, cur_tok + 1);
321                     return NULL;
322                 }
323 
324                 return retstr;
325             }
326 
327             tok_start = i;
328         }
329         else
330         {
331             /* This character is escaped with the meta char */
332             escaped = 0;
333         }
334     }
335 
336     /* Last character was an escape character */
337     if (escaped)
338     {
339         for (i = 0; i < cur_tok; i++)
340             free(toks[i]);
341 
342         if (toks == toks_alloc)
343             free(toks_alloc);
344 
345         return NULL;
346     }
347 
348     /* Trim whitespace at end of last tok */
349     for (j = i; j > tok_start; j--)
350     {
351         if (!isspace((int)str[j - 1]))
352             break;
353     }
354 
355     /* Last character was not a separator character so we've got
356      * one more tok.  Unescape escaped sepatator charactors */
357     if (toks != toks_alloc)
358     {
359         retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1));
360         memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1)));
361     }
362     else
363     {
364         retstr = toks;
365     }
366 
367     retstr[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char);
368 
369     /* Just add one to cur_tok index instead of incrementing
370      * since we're done */
371     *num_toks = cur_tok + 1;
372     return retstr;
373 }
374 
375 /****************************************************************
376  *
377  *  Function: mSplitSpecial()
378  *
379  *  Purpose: Splits a string into tokens non-destructively. The token can be
380  *  a multi-character token.
381  *
382  *  Parameters:
383  *      char *str => the string to be split
384  *      char *sep => a string of token separator
385  *      int max_toks => how many tokens should be returned
386  *      int *toks => place to store the number of tokens found in str
387  *      char meta => the "escape metacharacter", treat the character
388  *                   after this character as a literal and "escape" a
389  *                   seperator
390  *
391  *  Returns:
392  *      2D char array with one token per "row" of the returned
393  *      array.
394  *
395  ****************************************************************/
mSplitSpecial(char * str,const char * sep,int max_toks,int * toks,const char meta)396 char **mSplitSpecial(char *str, const char *sep, int max_toks,
397                      int *toks, const char meta)
398 {
399     char **retstr;      /* 2D array which is returned to caller */
400     char *idx;          /* index pointer into str */
401     char *end;          /* ptr to end of str */
402     char *last_match_idx;          /* index pointer into str */
403     const char *sep_end;/* ptr to end of separator string */
404     const char *sep_idx;/* index ptr into separator string */
405     int len = 0;        /* length of current token string */
406     int sep_len = 0;        /* length of seperator string */
407     int curr_str = 0;       /* current index into the 2D return array */
408     unsigned char last_char = 0xFF;  /* initialize to something that won't be in meta */
409 
410 
411     int matched = 0;
412 
413     if(!toks) return NULL;
414 
415     *toks = 0;
416 
417     if (!str || !*str) return NULL;
418 
419     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
420                 "[*] Splitting string: %s\n", str);
421             DebugMessage(DEBUG_PATTERN_MATCH, "curr_str = %d\n", curr_str););
422 
423     /*
424      * find the ends of the respective passed strings so our while() loops
425      * know where to stop
426      */
427     sep_len = strlen(sep);
428     sep_end = sep + sep_len;
429     end = str + strlen(str);
430 
431     /* remove trailing whitespace */
432     while(isspace((int) *(end - 1)) && ((end - 1) >= str))
433         *(--end) = '\0';    /* -1 because of NULL */
434 
435     /* set our indexing pointers */
436     idx = str;
437 
438     /*
439      * alloc space for the return string, this is where the pointers to the
440      * tokens will be stored
441      */
442     retstr = (char **) SnortAlloc( sizeof(char **) * max_toks );
443 
444     max_toks--;
445 
446     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
447                 "max_toks = %d  curr_str = %d\n",
448                 max_toks, curr_str););
449 
450     /* loop thru each letter in the string being tokenized */
451     while(idx < end)
452     {
453         /*
454          * if the current string-indexed char matches the initial
455          * seperator char...
456          */
457         if((*idx == *sep) && (last_char != meta))
458         {
459             /* assume we have a complete match and determine otherwise */
460             matched = 1;
461 
462             /* check length of remaining string */
463             if (idx + sep_len < end)
464             {
465                 /* we suspect a complete match and save pointer */
466                 last_match_idx = idx;
467 
468                 /* check that the next chars match the remainder of the separator */
469                 for ( sep_idx = sep; sep_idx < sep_end; sep_idx++, idx++ )
470                 {
471                     if (*idx != *sep_idx)
472                         matched = 0;
473                 }
474 
475                 /* reset the check pointer */
476                 idx = last_match_idx;
477             }
478             else
479                 matched = 0;
480 
481             if ( matched )
482             {
483                 /* if there's something to store... */
484                 if(len > 0)
485                 {
486                     DEBUG_WRAP(
487                             DebugMessage(DEBUG_PATTERN_MATCH,
488                                 "Allocating %d bytes for token ", len + 1););
489                     if(curr_str <= max_toks)
490                     {
491                         /* allocate space for the new token */
492                         retstr[curr_str] = (char *)SnortAlloc((len + 1) * sizeof(char));
493 
494                         /* copy the token into the return string array */
495                         memcpy(retstr[curr_str], (idx - len), len);
496                         retstr[curr_str][len] = 0;
497                         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
498                                     "tok[%d]: %s\n", curr_str,
499                                     retstr[curr_str]););
500 
501                         /* twiddle the necessary pointers and vars */
502                         len = 0;
503                         curr_str++;
504                         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
505                                     "curr_str = %d\n", curr_str);
506                                 DebugMessage(DEBUG_PATTERN_MATCH,
507                                     "max_toks = %d  curr_str = %d\n",
508                                     max_toks, curr_str););
509 
510                         last_char = *idx;
511                         idx+=sep_len;
512                     }
513 
514                     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
515                                 "Checking if curr_str (%d) >= max_toks (%d)\n",
516                                curr_str, max_toks););
517 
518                     /*
519                      * if we've gotten all the tokens requested, return the
520                      * list
521                      */
522                     if(curr_str >= max_toks)
523                     {
524                         while(isspace((int) *idx))
525                             idx++;
526 
527                         len = end - idx;
528                         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
529                                     "Finishing up...\n");
530                                 DebugMessage(DEBUG_PATTERN_MATCH,
531                                     "Allocating %d bytes "
532                                     "for last token ", len + 1););
533                         fflush(stdout);
534 
535                         retstr[curr_str] = (char *)SnortAlloc((len + 1) * sizeof(char));
536 
537                         memcpy(retstr[curr_str], idx, len);
538                         retstr[curr_str][len] = 0;
539 
540                         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
541                                     "tok[%d]: %s\n", curr_str,
542                                     retstr[curr_str]););
543 
544                         *toks = curr_str + 1;
545                         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
546                                     "max_toks = %d  curr_str = %d\n",
547                                     max_toks, curr_str);
548                                 DebugMessage(DEBUG_PATTERN_MATCH,
549                                     "mSplit got %d tokens!\n", *toks););
550 
551                         return retstr;
552                     }
553                 }
554                 else
555                     /*
556                      * otherwise, the previous char was a seperator as well,
557                      * and we should just continue
558                      */
559                 {
560                     last_char = *idx;
561                     idx+=sep_len;
562                     /* make sure to reset this so we test all the sep. chars */
563                     sep_idx = sep;
564                     len = 0;
565                 }
566             }
567         }
568 
569         sep_idx = sep;
570         len++;
571         last_char = *idx;
572         idx++;
573     }
574 
575     /* put the last string into the list */
576 
577     if(len > 0)
578     {
579         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
580                     "Allocating %d bytes for last token ", len + 1););
581 
582         retstr[curr_str] = (char *)SnortAlloc((len + 1) * sizeof(char));
583 
584         memcpy(retstr[curr_str], (idx - len), len);
585         retstr[curr_str][len] = 0;
586 
587         DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"tok[%d]: %s\n", curr_str,
588                     retstr[curr_str]););
589         *toks = curr_str + 1;
590     }
591 
592     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
593                 "mSplitSpecial got %d tokens!\n", *toks););
594 
595     /* return the token list */
596     return retstr;
597 }
598 
599 /* Will not return NULL.  SnortAlloc will fatal if it fails */
mSplitAddTok(const char * str,const int len,const char * sep_chars,const char meta_char)600 static char * mSplitAddTok(const char *str, const int len, const char *sep_chars, const char meta_char)
601 {
602     size_t i, j, k;
603     char *tok;
604     int tok_len = 0;
605     int got_meta = 0;
606 
607     /* Get the length of the returned tok
608      * Could have a maximum token length and use a fixed sized array and
609      * fill it in as we go but don't want to put on that constraint */
610     for (i = 0; (int)i < len; i++)
611     {
612         if (!got_meta)
613         {
614             if (str[i] == meta_char)
615             {
616                 got_meta = 1;
617                 continue;
618             }
619         }
620         else
621         {
622             /* See if the current character is a separator */
623             for (j = 0; j < strlen(sep_chars); j++)
624             {
625                 if (str[i] == sep_chars[j])
626                     break;
627             }
628 
629             /* It's a non-separator character, so include
630              * the meta character in the return tok */
631             if (j == strlen(sep_chars))
632                 tok_len++;
633 
634             got_meta = 0;
635         }
636 
637         tok_len++;
638     }
639 
640     /* Allocate it and fill it in */
641     tok = (char *)SnortAlloc(tok_len + 1);
642     for (i = 0, k = 0; (int)i < len; i++)
643     {
644         if (!got_meta)
645         {
646             if (str[i] == meta_char)
647             {
648                 got_meta = 1;
649                 continue;
650             }
651         }
652         else
653         {
654             /* See if the current character is a separator */
655             for (j = 0; j < strlen(sep_chars); j++)
656             {
657                 if (str[i] == sep_chars[j])
658                     break;
659             }
660 
661             /* It's a non-separator character, so include
662              * the meta character in the return tok */
663             if (j == strlen(sep_chars))
664                 tok[k++] = meta_char;
665 
666             got_meta = 0;
667         }
668 
669         tok[k++] = str[i];
670     }
671 
672     return tok;
673 }
674 
675 /****************************************************************
676  *
677  * Free the buffer allocated by mSplit().
678  *
679  * char** toks = NULL;
680  * int num_toks = 0;
681  * toks = (str, " ", 2, &num_toks, 0);
682  * mSplitFree(&toks, num_toks);
683  *
684  * At this point, toks is again NULL.
685  *
686  ****************************************************************/
mSplitFree(char *** pbuf,int num_toks)687 void mSplitFree(char ***pbuf, int num_toks)
688 {
689     int i;
690     char** buf;  /* array of string pointers */
691 
692     if( pbuf==NULL || *pbuf==NULL )
693     {
694         return;
695     }
696 
697     buf = *pbuf;
698 
699     for( i=0; i<num_toks; i++ )
700     {
701         if( buf[i] != NULL )
702         {
703             free( buf[i] );
704             buf[i] = NULL;
705         }
706     }
707 
708     free(buf);
709     *pbuf = NULL;
710 }
711 
712 /****************************************************************
713  *
714  *  Function: mContainsSubstr(char *, int, char *, int)
715  *
716  *  Purpose: Determines if a string contains a (non-regex)
717  *           substring.
718  *
719  *  Parameters:
720  *      buf => data buffer we want to find the data in
721  *      b_len => data buffer length
722  *      pat => pattern to find
723  *      p_len => length of the data in the pattern buffer
724  *
725  *  Returns:
726  *      Integer value, 1 on success (str constains substr), 0 on
727  *      failure (substr not in str)
728  *
729  ****************************************************************/
mContainsSubstr(const char * buf,int b_len,const char * pat,int p_len)730 int mContainsSubstr(const char *buf, int b_len, const char *pat, int p_len)
731 {
732     const char *b_idx;  /* index ptr into the data buffer */
733     const char *p_idx;  /* index ptr into the pattern buffer */
734     const char *b_end;  /* ptr to the end of the data buffer */
735     int m_cnt = 0;      /* number of pattern matches so far... */
736 #ifdef DEBUG
737     unsigned long loopcnt = 0;
738 #endif
739 
740     /* mark the end of the strs */
741     b_end = (char *) (buf + b_len);
742 
743     /* init the index ptrs */
744     b_idx = buf;
745     p_idx = pat;
746 
747     do
748     {
749 #ifdef DEBUG
750         loopcnt++;
751 #endif
752 
753         if(*p_idx == *b_idx)
754         {
755 
756             if(m_cnt == (p_len - 1))
757             {
758 		DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
759 					"\n%ld compares for match\n", loopcnt););
760                 return 1;
761             }
762             m_cnt++;
763             b_idx++;
764             p_idx++;
765         }
766         else
767         {
768             if(m_cnt == 0)
769             {
770                 b_idx++;
771             }
772             else
773             {
774                 b_idx = b_idx - (m_cnt - 1);
775             }
776 
777             p_idx = pat;
778 
779             m_cnt = 0;
780         }
781 
782     } while(b_idx < b_end);
783 
784 
785     /* if we make it here we didn't find what we were looking for */
786     return 0;
787 }
788 
789 
790 
791 
792 /****************************************************************
793  *
794  *  Function: make_skip(char *, int)
795  *
796  *  Purpose: Create a Boyer-Moore skip table for a given pattern
797  *
798  *  Parameters:
799  *      ptrn => pattern
800  *      plen => length of the data in the pattern buffer
801  *
802  *  Returns:
803  *      int * - the skip table
804  *
805  ****************************************************************/
make_skip(char * ptrn,int plen)806 int *make_skip(char *ptrn, int plen)
807 {
808     int  i;
809     int *skip = (int *) SnortAlloc(256* sizeof(int));
810 
811     for ( i = 0; i < 256; i++ )
812         skip[i] = plen + 1;
813 
814     while(plen != 0)
815         skip[(unsigned char) *ptrn++] = plen--;
816 
817     return skip;
818 }
819 
820 
821 
822 /****************************************************************
823  *
824  *  Function: make_shift(char *, int)
825  *
826  *  Purpose: Create a Boyer-Moore shift table for a given pattern
827  *
828  *  Parameters:
829  *      ptrn => pattern
830  *      plen => length of the data in the pattern buffer
831  *
832  *  Returns:
833  *      int * - the shift table
834  *
835  ****************************************************************/
make_shift(char * ptrn,int plen)836 int *make_shift(char *ptrn, int plen)
837 {
838     int *shift = (int *) SnortAlloc(plen * sizeof(int));
839     int *sptr = shift + plen - 1;
840     char *pptr = ptrn + plen - 1;
841     char c;
842 
843      c = ptrn[plen - 1];
844 
845     *sptr = 1;
846 
847     while(sptr-- != shift)
848     {
849         char *p1 = ptrn + plen - 2, *p2, *p3;
850 
851         do
852         {
853             while(p1 >= ptrn && *p1-- != c);
854 
855             p2 = ptrn + plen - 2;
856             p3 = p1;
857 
858             while(p3 >= ptrn && *p3-- == *p2-- && p2 >= pptr);
859         }
860         while(p3 >= ptrn && p2 >= pptr);
861 
862         *sptr = shift + plen - sptr + p2 - p3;
863 
864         pptr--;
865     }
866 
867     return shift;
868 }
869 
870 
871 
872 /****************************************************************
873  *
874  *  Function: mSearch(char *, int, char *, int)
875  *
876  *  Purpose: Determines if a string contains a (non-regex)
877  *           substring.
878  *
879  *  Parameters:
880  *      buf => data buffer we want to find the data in
881  *      blen => data buffer length
882  *      ptrn => pattern to find
883  *      plen => length of the data in the pattern buffer
884  *      skip => the B-M skip array
885  *      shift => the B-M shift array
886  *
887  *  Returns:
888  *      Integer value, 1 on success (str constains substr), 0 on
889  *      failure (substr not in str)
890  *
891  ****************************************************************/
mSearch(const char * buf,int blen,const char * ptrn,int plen,int * skip,int * shift)892 int mSearch(const char *buf, int blen, const char *ptrn, int plen, int *skip, int *shift)
893 {
894     int b_idx = plen;
895 
896 #ifdef DEBUG
897     char *hexbuf;
898     int cmpcnt = 0;
899 #endif
900 
901     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"buf: %p  blen: %d  ptrn: %p  "
902                 "plen: %d\n", buf, blen, ptrn, plen););
903 
904 #ifdef DEBUG
905     hexbuf = fasthex((const u_char *)buf, blen);
906     DebugMessage(DEBUG_PATTERN_MATCH,"buf: %s\n", hexbuf);
907     free(hexbuf);
908     hexbuf = fasthex((const u_char *)ptrn, plen);
909     DebugMessage(DEBUG_PATTERN_MATCH,"ptrn: %s\n", hexbuf);
910     free(hexbuf);
911     DebugMessage(DEBUG_PATTERN_MATCH,"buf: %p  blen: %d  ptrn: %p  "
912                  "plen: %d\n", buf, blen, ptrn, plen);
913 #endif /* DEBUG */
914     if(plen == 0)
915         return 1;
916 
917     while(b_idx <= blen)
918     {
919         int p_idx = plen, skip_stride, shift_stride;
920 
921         while(buf[--b_idx] == ptrn[--p_idx])
922         {
923 #ifdef DEBUG
924             cmpcnt++;
925 #endif
926             if(b_idx < 0)
927                 return 0;
928 
929             if(p_idx == 0)
930             {
931                 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
932                             "match: compares = %d.\n", cmpcnt););
933 
934                 return 1;
935             }
936         }
937 
938         skip_stride = skip[(unsigned char) buf[b_idx]];
939         shift_stride = shift[p_idx];
940 
941         b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride;
942     }
943 
944     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
945                 "no match: compares = %d.\n", cmpcnt););
946 
947     return 0;
948 }
949 
950 
951 
952 /****************************************************************
953  *
954  *  Function: mSearchCI(char *, int, char *, int)
955  *
956  *  Purpose: Determines if a string contains a (non-regex)
957  *           substring matching is case insensitive
958  *
959  *  Parameters:
960  *      buf => data buffer we want to find the data in
961  *      blen => data buffer length
962  *      ptrn => pattern to find
963  *      plen => length of the data in the pattern buffer
964  *      skip => the B-M skip array
965  *      shift => the B-M shift array
966  *
967  *  Returns:
968  *      Integer value, 1 on success (str constains substr), 0 on
969  *      failure (substr not in str)
970  *
971  ****************************************************************/
mSearchCI(const char * buf,int blen,const char * ptrn,int plen,int * skip,int * shift)972 int mSearchCI(const char *buf, int blen, const char *ptrn, int plen, int *skip, int *shift)
973 {
974     int b_idx = plen;
975 #ifdef DEBUG
976     int cmpcnt = 0;
977 #endif
978 
979     if(plen == 0)
980         return 1;
981 
982     while(b_idx <= blen)
983     {
984         int p_idx = plen, skip_stride, shift_stride;
985 
986         while((unsigned char) ptrn[--p_idx] ==
987                 toupper((unsigned char) buf[--b_idx]))
988         {
989 #ifdef DEBUG
990             cmpcnt++;
991 #endif
992             if(p_idx == 0)
993             {
994                 DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
995                             "match: compares = %d.\n",
996                             cmpcnt););
997                 return 1;
998             }
999         }
1000 
1001         skip_stride = skip[toupper((unsigned char) buf[b_idx])];
1002         shift_stride = shift[p_idx];
1003 
1004         b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride;
1005     }
1006 
1007     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "no match: compares = %d.\n", cmpcnt););
1008 
1009     return 0;
1010 }
1011 
1012 
1013 /****************************************************************
1014  *
1015  *  Function: mSearchREG(char *, int, char *, int)
1016  *
1017  *  Purpose: Determines if a string contains a (regex)
1018  *           substring.
1019  *
1020  *  Parameters:
1021  *      buf => data buffer we want to find the data in
1022  *      blen => data buffer length
1023  *      ptrn => pattern to find
1024  *      plen => length of the data in the pattern buffer
1025  *      skip => the B-M skip array
1026  *      shift => the B-M shift array
1027  *
1028  *  Returns:
1029  *      Integer value, 1 on success (str constains substr), 0 on
1030  *      failure (substr not in str)
1031  *
1032  ****************************************************************/
mSearchREG(const char * buf,int blen,const char * ptrn,int plen,int * skip,int * shift)1033 int mSearchREG(const char *buf, int blen, const char *ptrn, int plen, int *skip, int *shift)
1034 {
1035     int b_idx = plen;
1036     int literal = 0;
1037     int regexcomp = 0;
1038 #ifdef DEBUG
1039     int cmpcnt = 0;
1040 #endif /*DEBUG*/
1041 
1042     DEBUG_WRAP(
1043 	       DebugMessage(DEBUG_PATTERN_MATCH, "buf: %p  blen: %d  ptrn: %p "
1044 			    " plen: %d b_idx: %d\n", buf, blen, ptrn, plen, b_idx);
1045 	       DebugMessage(DEBUG_PATTERN_MATCH, "packet data: \"%s\"\n", buf);
1046 	       DebugMessage(DEBUG_PATTERN_MATCH, "matching for \"%s\"\n", ptrn);
1047 	       );
1048 
1049     if(plen == 0)
1050         return 1;
1051 
1052     while(b_idx <= blen)
1053     {
1054         int p_idx = plen, skip_stride, shift_stride;
1055 
1056 	DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "Looping... "
1057 				"([%d]0x%X (%c) -> [%d]0x%X(%c))\n",
1058 				b_idx, buf[b_idx-1],
1059 				buf[b_idx-1],
1060 				p_idx, ptrn[p_idx-1], ptrn[p_idx-1]););
1061 
1062         while(buf[--b_idx] == ptrn[--p_idx]
1063               || (ptrn[p_idx] == '?' && !literal)
1064               || (ptrn[p_idx] == '*' && !literal)
1065               || (ptrn[p_idx] == '\\' && !literal))
1066         {
1067 	    DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "comparing: b:%c -> p:%c\n",
1068 				    buf[b_idx], ptrn[p_idx]););
1069 #ifdef DEBUG
1070             cmpcnt++;
1071 #endif
1072 
1073             if(literal)
1074                 literal = 0;
1075             if(!literal && ptrn[p_idx] == '\\')
1076                 literal = 1;
1077             if(ptrn[p_idx] == '*')
1078             {
1079 		DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,"Checking wildcard matching...\n"););
1080                 while(p_idx != 0 && ptrn[--p_idx] == '*'); /* fool-proof */
1081 
1082                 while(buf[--b_idx] != ptrn[p_idx])
1083                 {
1084 		    DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "comparing: b[%d]:%c -> p[%d]:%c\n",
1085 					    b_idx, buf[b_idx], p_idx, ptrn[p_idx]););
1086 
1087                    regexcomp++;
1088                     if(b_idx == 0)
1089                     {
1090 			DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
1091 						"b_idx went to 0, returning 0\n");)
1092                         return 0;
1093                     }
1094                 }
1095 
1096 		DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "got wildcard final char match! (b[%d]: %c -> p[%d]: %c\n", b_idx, buf[b_idx], p_idx, ptrn[p_idx]););
1097             }
1098 
1099             if(p_idx == 0)
1100             {
1101 		DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "match: compares = %d.\n",
1102 					cmpcnt););
1103                 return 1;
1104             }
1105 
1106             if(b_idx == 0)
1107                 break;
1108         }
1109 
1110 	DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "skip-shifting...\n"););
1111 	skip_stride = skip[(unsigned char) buf[b_idx]];
1112 	shift_stride = shift[p_idx];
1113 
1114 	b_idx += (skip_stride > shift_stride) ? skip_stride : shift_stride;
1115 	DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "b_idx skip-shifted to %d\n", b_idx););
1116 	b_idx += regexcomp;
1117 	DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH,
1118 				"b_idx regex compensated %d steps, to %d\n", regexcomp, b_idx););
1119 	regexcomp = 0;
1120     }
1121 
1122     DEBUG_WRAP(DebugMessage(DEBUG_PATTERN_MATCH, "no match: compares = %d, b_idx = %d, "
1123 			    "blen = %d\n", cmpcnt, b_idx, blen););
1124 
1125     return 0;
1126 }
1127 
1128