1*0a6a1f1dSLionel Sambuc /* $NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $ */
22fe8fb19SBen Gras
32fe8fb19SBen Gras /*-
42fe8fb19SBen Gras * Copyright (c) 1994
52fe8fb19SBen Gras * The Regents of the University of California. All rights reserved.
62fe8fb19SBen Gras *
72fe8fb19SBen Gras * This code is derived from software contributed to Berkeley by
82fe8fb19SBen Gras * Andrew Hume of AT&T Bell Laboratories.
92fe8fb19SBen Gras *
102fe8fb19SBen Gras * Redistribution and use in source and binary forms, with or without
112fe8fb19SBen Gras * modification, are permitted provided that the following conditions
122fe8fb19SBen Gras * are met:
132fe8fb19SBen Gras * 1. Redistributions of source code must retain the above copyright
142fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer.
152fe8fb19SBen Gras * 2. Redistributions in binary form must reproduce the above copyright
162fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer in the
172fe8fb19SBen Gras * documentation and/or other materials provided with the distribution.
182fe8fb19SBen Gras * 3. Neither the name of the University nor the names of its contributors
192fe8fb19SBen Gras * may be used to endorse or promote products derived from this software
202fe8fb19SBen Gras * without specific prior written permission.
212fe8fb19SBen Gras *
222fe8fb19SBen Gras * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
232fe8fb19SBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
242fe8fb19SBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
252fe8fb19SBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
262fe8fb19SBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
272fe8fb19SBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
282fe8fb19SBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
292fe8fb19SBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
302fe8fb19SBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
312fe8fb19SBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
322fe8fb19SBen Gras * SUCH DAMAGE.
332fe8fb19SBen Gras */
342fe8fb19SBen Gras
352fe8fb19SBen Gras #include <sys/cdefs.h>
362fe8fb19SBen Gras #if defined(LIBC_SCCS) && !defined(lint)
372fe8fb19SBen Gras #if 0
382fe8fb19SBen Gras static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94";
392fe8fb19SBen Gras #else
40*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $");
412fe8fb19SBen Gras #endif
422fe8fb19SBen Gras #endif /* LIBC_SCCS && not lint */
432fe8fb19SBen Gras
442fe8fb19SBen Gras #include "namespace.h"
452fe8fb19SBen Gras #include <sys/types.h>
462fe8fb19SBen Gras
472fe8fb19SBen Gras #include <assert.h>
482fe8fb19SBen Gras #include <bm.h>
492fe8fb19SBen Gras #include <errno.h>
502fe8fb19SBen Gras #include <stdlib.h>
512fe8fb19SBen Gras #include <string.h>
522fe8fb19SBen Gras
532fe8fb19SBen Gras #ifdef __weak_alias
542fe8fb19SBen Gras __weak_alias(bm_comp,_bm_comp)
552fe8fb19SBen Gras __weak_alias(bm_exec,_bm_exec)
562fe8fb19SBen Gras __weak_alias(bm_free,_bm_free)
572fe8fb19SBen Gras #endif
582fe8fb19SBen Gras
592fe8fb19SBen Gras /*
602fe8fb19SBen Gras * XXX
612fe8fb19SBen Gras * The default frequency table starts at 99 and counts down. The default
622fe8fb19SBen Gras * table should probably be oriented toward text, and will necessarily be
632fe8fb19SBen Gras * locale specific. This one is for English. It was derived from the
642fe8fb19SBen Gras * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
652fe8fb19SBen Gras * of email and random text. Change it if you can find something better.
662fe8fb19SBen Gras */
672fe8fb19SBen Gras static u_char const freq_def[256] = {
682fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
692fe8fb19SBen Gras 0, 77, 90, 0, 0, 0, 0, 0,
702fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
712fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
722fe8fb19SBen Gras 99, 28, 42, 27, 16, 14, 20, 51,
732fe8fb19SBen Gras 66, 65, 59, 24, 75, 76, 84, 56,
742fe8fb19SBen Gras 72, 74, 64, 55, 54, 47, 41, 37,
752fe8fb19SBen Gras 44, 61, 70, 43, 23, 53, 49, 22,
762fe8fb19SBen Gras 33, 58, 40, 46, 45, 57, 60, 26,
772fe8fb19SBen Gras 30, 63, 21, 12, 32, 50, 38, 39,
782fe8fb19SBen Gras 34, 11, 48, 67, 62, 35, 15, 29,
792fe8fb19SBen Gras 71, 18, 9, 17, 25, 13, 10, 52,
802fe8fb19SBen Gras 36, 95, 78, 86, 87, 98, 82, 80,
812fe8fb19SBen Gras 88, 94, 19, 68, 89, 83, 93, 96,
822fe8fb19SBen Gras 81, 7, 91, 92, 97, 85, 69, 73,
832fe8fb19SBen Gras 31, 79, 8, 5, 4, 6, 3, 0,
842fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
852fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
862fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
872fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
882fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
892fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
902fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
912fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
922fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
932fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
942fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
952fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
962fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
972fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
982fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
992fe8fb19SBen Gras 0, 0, 0, 0, 0, 0, 0, 0,
1002fe8fb19SBen Gras };
1012fe8fb19SBen Gras
1022fe8fb19SBen Gras bm_pat *
bm_comp(u_char const * pb,size_t len,u_char const * freq)103f14fb602SLionel Sambuc bm_comp(u_char const *pb, size_t len, u_char const *freq)
1042fe8fb19SBen Gras {
1052fe8fb19SBen Gras u_char const *pe, *p;
1062fe8fb19SBen Gras size_t *d, r;
1072fe8fb19SBen Gras int j;
1082fe8fb19SBen Gras int sv_errno;
1092fe8fb19SBen Gras bm_pat *pat;
1102fe8fb19SBen Gras
1112fe8fb19SBen Gras _DIAGASSERT(pb != NULL);
1122fe8fb19SBen Gras /* freq may be NULL */
1132fe8fb19SBen Gras
1142fe8fb19SBen Gras if (len == 0) {
1152fe8fb19SBen Gras errno = EINVAL;
1162fe8fb19SBen Gras return (NULL);
1172fe8fb19SBen Gras }
1182fe8fb19SBen Gras if ((pat = malloc(sizeof(*pat))) == NULL)
1192fe8fb19SBen Gras return (NULL);
1202fe8fb19SBen Gras pat->pat = NULL;
1212fe8fb19SBen Gras pat->delta = NULL;
1222fe8fb19SBen Gras
1232fe8fb19SBen Gras pat->patlen = len; /* copy pattern */
1242fe8fb19SBen Gras if ((pat->pat = malloc(pat->patlen)) == NULL)
1252fe8fb19SBen Gras goto mem;
1262fe8fb19SBen Gras memcpy(pat->pat, pb, pat->patlen);
1272fe8fb19SBen Gras /* get skip delta */
1282fe8fb19SBen Gras if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
1292fe8fb19SBen Gras goto mem;
1302fe8fb19SBen Gras for (j = 0, d = pat->delta; j < 256; j++)
1312fe8fb19SBen Gras d[j] = pat->patlen;
1322fe8fb19SBen Gras for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
1332fe8fb19SBen Gras d[*pb] = pe - pb;
1342fe8fb19SBen Gras
1352fe8fb19SBen Gras if (freq == NULL) /* default freq table */
1362fe8fb19SBen Gras freq = freq_def;
1372fe8fb19SBen Gras r = 0; /* get guard */
1382fe8fb19SBen Gras for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
1392fe8fb19SBen Gras if (freq[*pb] < freq[pat->pat[r]])
1402fe8fb19SBen Gras r = pb - pat->pat;
1412fe8fb19SBen Gras pat->rarec = pat->pat[r];
1422fe8fb19SBen Gras pat->rareoff = r - (pat->patlen - 1);
1432fe8fb19SBen Gras
1442fe8fb19SBen Gras /* get md2 shift */
1452fe8fb19SBen Gras for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
1462fe8fb19SBen Gras if (*p == *pe)
1472fe8fb19SBen Gras break;
1482fe8fb19SBen Gras
1492fe8fb19SBen Gras /* *p is first leftward reoccurrence of *pe */
1502fe8fb19SBen Gras pat->md2 = pe - p;
1512fe8fb19SBen Gras return (pat);
1522fe8fb19SBen Gras
1532fe8fb19SBen Gras mem: sv_errno = errno;
1542fe8fb19SBen Gras bm_free(pat);
1552fe8fb19SBen Gras errno = sv_errno;
1562fe8fb19SBen Gras return (NULL);
1572fe8fb19SBen Gras }
1582fe8fb19SBen Gras
1592fe8fb19SBen Gras void
bm_free(bm_pat * pat)160f14fb602SLionel Sambuc bm_free(bm_pat *pat)
1612fe8fb19SBen Gras {
1622fe8fb19SBen Gras
1632fe8fb19SBen Gras _DIAGASSERT(pat != NULL);
1642fe8fb19SBen Gras
1652fe8fb19SBen Gras free(pat->pat);
1662fe8fb19SBen Gras free(pat->delta);
1672fe8fb19SBen Gras free(pat);
1682fe8fb19SBen Gras }
1692fe8fb19SBen Gras
1702fe8fb19SBen Gras u_char *
bm_exec(bm_pat * pat,u_char * base,size_t n)171f14fb602SLionel Sambuc bm_exec(bm_pat *pat, u_char *base, size_t n)
1722fe8fb19SBen Gras {
1732fe8fb19SBen Gras u_char *e, *ep, *p, *q, *s;
1742fe8fb19SBen Gras size_t *d0, k, md2, n1, ro;
1752fe8fb19SBen Gras int rc;
1762fe8fb19SBen Gras
1772fe8fb19SBen Gras _DIAGASSERT(pat != NULL);
1782fe8fb19SBen Gras _DIAGASSERT(base != NULL);
1792fe8fb19SBen Gras
1802fe8fb19SBen Gras if (n == 0)
1812fe8fb19SBen Gras return (NULL);
1822fe8fb19SBen Gras
1832fe8fb19SBen Gras d0 = pat->delta;
1842fe8fb19SBen Gras n1 = pat->patlen - 1;
1852fe8fb19SBen Gras md2 = pat->md2;
1862fe8fb19SBen Gras ro = pat->rareoff;
1872fe8fb19SBen Gras rc = pat->rarec;
1882fe8fb19SBen Gras ep = pat->pat + pat->patlen - 1;
1892fe8fb19SBen Gras s = base + (pat->patlen - 1);
1902fe8fb19SBen Gras
1912fe8fb19SBen Gras /* fast loop up to n - 3 * patlen */
1922fe8fb19SBen Gras e = base + n - 3 * pat->patlen;
1932fe8fb19SBen Gras while (s < e) {
1942fe8fb19SBen Gras k = d0[*s]; /* ufast skip loop */
195*0a6a1f1dSLionel Sambuc while (k && s < e) {
1962fe8fb19SBen Gras k = d0[*(s += k)];
1972fe8fb19SBen Gras k = d0[*(s += k)];
1982fe8fb19SBen Gras }
1992fe8fb19SBen Gras if (s >= e)
2002fe8fb19SBen Gras break;
2012fe8fb19SBen Gras if (s[ro] != rc) /* guard test */
2022fe8fb19SBen Gras goto mismatch1;
2032fe8fb19SBen Gras /* fwd match */
2042fe8fb19SBen Gras for (p = pat->pat, q = s - n1; p < ep;)
2052fe8fb19SBen Gras if (*q++ != *p++)
2062fe8fb19SBen Gras goto mismatch1;
2072fe8fb19SBen Gras return (s - n1);
2082fe8fb19SBen Gras
2092fe8fb19SBen Gras mismatch1: s += md2; /* md2 shift */
2102fe8fb19SBen Gras }
2112fe8fb19SBen Gras
2122fe8fb19SBen Gras /* slow loop up to end */
2132fe8fb19SBen Gras e = base + n;
2142fe8fb19SBen Gras while (s < e) {
2152fe8fb19SBen Gras s += d0[*s]; /* step */
2162fe8fb19SBen Gras if (s >= e)
2172fe8fb19SBen Gras break;
2182fe8fb19SBen Gras if (s[ro] != rc) /* guard test */
2192fe8fb19SBen Gras goto mismatch2;
2202fe8fb19SBen Gras /* fwd match */
2212fe8fb19SBen Gras for (p = pat->pat, q = s - n1; p <= ep;)
2222fe8fb19SBen Gras if (*q++ != *p++)
2232fe8fb19SBen Gras goto mismatch2;
2242fe8fb19SBen Gras return (s - n1);
2252fe8fb19SBen Gras
2262fe8fb19SBen Gras mismatch2: s += md2; /* md2 shift */
2272fe8fb19SBen Gras }
2282fe8fb19SBen Gras
2292fe8fb19SBen Gras return (NULL);
2302fe8fb19SBen Gras }
231