xref: /minix/lib/libc/string/bm.c (revision 0a6a1f1d)
1*0a6a1f1dSLionel Sambuc /*	$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $	*/
22fe8fb19SBen Gras 
32fe8fb19SBen Gras /*-
42fe8fb19SBen Gras  * Copyright (c) 1994
52fe8fb19SBen Gras  *	The Regents of the University of California.  All rights reserved.
62fe8fb19SBen Gras  *
72fe8fb19SBen Gras  * This code is derived from software contributed to Berkeley by
82fe8fb19SBen Gras  * Andrew Hume of AT&T Bell Laboratories.
92fe8fb19SBen Gras  *
102fe8fb19SBen Gras  * Redistribution and use in source and binary forms, with or without
112fe8fb19SBen Gras  * modification, are permitted provided that the following conditions
122fe8fb19SBen Gras  * are met:
132fe8fb19SBen Gras  * 1. Redistributions of source code must retain the above copyright
142fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer.
152fe8fb19SBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
162fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer in the
172fe8fb19SBen Gras  *    documentation and/or other materials provided with the distribution.
182fe8fb19SBen Gras  * 3. Neither the name of the University nor the names of its contributors
192fe8fb19SBen Gras  *    may be used to endorse or promote products derived from this software
202fe8fb19SBen Gras  *    without specific prior written permission.
212fe8fb19SBen Gras  *
222fe8fb19SBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
232fe8fb19SBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
242fe8fb19SBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
252fe8fb19SBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
262fe8fb19SBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
272fe8fb19SBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
282fe8fb19SBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
292fe8fb19SBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
302fe8fb19SBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
312fe8fb19SBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
322fe8fb19SBen Gras  * SUCH DAMAGE.
332fe8fb19SBen Gras  */
342fe8fb19SBen Gras 
352fe8fb19SBen Gras #include <sys/cdefs.h>
362fe8fb19SBen Gras #if defined(LIBC_SCCS) && !defined(lint)
372fe8fb19SBen Gras #if 0
382fe8fb19SBen Gras static char sccsid[] = "@(#)bm.c	8.7 (Berkeley) 6/21/94";
392fe8fb19SBen Gras #else
40*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $");
412fe8fb19SBen Gras #endif
422fe8fb19SBen Gras #endif /* LIBC_SCCS && not lint */
432fe8fb19SBen Gras 
442fe8fb19SBen Gras #include "namespace.h"
452fe8fb19SBen Gras #include <sys/types.h>
462fe8fb19SBen Gras 
472fe8fb19SBen Gras #include <assert.h>
482fe8fb19SBen Gras #include <bm.h>
492fe8fb19SBen Gras #include <errno.h>
502fe8fb19SBen Gras #include <stdlib.h>
512fe8fb19SBen Gras #include <string.h>
522fe8fb19SBen Gras 
532fe8fb19SBen Gras #ifdef __weak_alias
542fe8fb19SBen Gras __weak_alias(bm_comp,_bm_comp)
552fe8fb19SBen Gras __weak_alias(bm_exec,_bm_exec)
562fe8fb19SBen Gras __weak_alias(bm_free,_bm_free)
572fe8fb19SBen Gras #endif
582fe8fb19SBen Gras 
592fe8fb19SBen Gras /*
602fe8fb19SBen Gras  * XXX
612fe8fb19SBen Gras  * The default frequency table starts at 99 and counts down.  The default
622fe8fb19SBen Gras  * table should probably be oriented toward text, and will necessarily be
632fe8fb19SBen Gras  * locale specific.  This one is for English.  It was derived from the
642fe8fb19SBen Gras  * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
652fe8fb19SBen Gras  * of email and random text.  Change it if you can find something better.
662fe8fb19SBen Gras  */
672fe8fb19SBen Gras static u_char const freq_def[256] = {
682fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
692fe8fb19SBen Gras 	 0, 77, 90,  0,  0,  0,  0,  0,
702fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
712fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
722fe8fb19SBen Gras 	99, 28, 42, 27, 16, 14, 20, 51,
732fe8fb19SBen Gras 	66, 65, 59, 24, 75, 76, 84, 56,
742fe8fb19SBen Gras 	72, 74, 64, 55, 54, 47, 41, 37,
752fe8fb19SBen Gras 	44, 61, 70, 43, 23, 53, 49, 22,
762fe8fb19SBen Gras 	33, 58, 40, 46, 45, 57, 60, 26,
772fe8fb19SBen Gras 	30, 63, 21, 12, 32, 50, 38, 39,
782fe8fb19SBen Gras 	34, 11, 48, 67, 62, 35, 15, 29,
792fe8fb19SBen Gras 	71, 18,  9, 17, 25, 13, 10, 52,
802fe8fb19SBen Gras 	36, 95, 78, 86, 87, 98, 82, 80,
812fe8fb19SBen Gras 	88, 94, 19, 68, 89, 83, 93, 96,
822fe8fb19SBen Gras 	81,  7, 91, 92, 97, 85, 69, 73,
832fe8fb19SBen Gras 	31, 79,  8,  5,  4,  6,  3,  0,
842fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
852fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
862fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
872fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
882fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
892fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
902fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
912fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
922fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
932fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
942fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
952fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
962fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
972fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
982fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
992fe8fb19SBen Gras 	 0,  0,  0,  0,  0,  0,  0,  0,
1002fe8fb19SBen Gras };
1012fe8fb19SBen Gras 
1022fe8fb19SBen Gras bm_pat *
bm_comp(u_char const * pb,size_t len,u_char const * freq)103f14fb602SLionel Sambuc bm_comp(u_char const *pb, size_t len, u_char const *freq)
1042fe8fb19SBen Gras {
1052fe8fb19SBen Gras 	u_char const *pe, *p;
1062fe8fb19SBen Gras 	size_t *d, r;
1072fe8fb19SBen Gras 	int j;
1082fe8fb19SBen Gras 	int sv_errno;
1092fe8fb19SBen Gras 	bm_pat *pat;
1102fe8fb19SBen Gras 
1112fe8fb19SBen Gras 	_DIAGASSERT(pb != NULL);
1122fe8fb19SBen Gras 	/* freq may be NULL */
1132fe8fb19SBen Gras 
1142fe8fb19SBen Gras 	if (len == 0) {
1152fe8fb19SBen Gras 		errno = EINVAL;
1162fe8fb19SBen Gras 		return (NULL);
1172fe8fb19SBen Gras 	}
1182fe8fb19SBen Gras 	if ((pat = malloc(sizeof(*pat))) == NULL)
1192fe8fb19SBen Gras 		return (NULL);
1202fe8fb19SBen Gras 	pat->pat = NULL;
1212fe8fb19SBen Gras 	pat->delta = NULL;
1222fe8fb19SBen Gras 
1232fe8fb19SBen Gras 	pat->patlen = len;			/* copy pattern */
1242fe8fb19SBen Gras 	if ((pat->pat = malloc(pat->patlen)) == NULL)
1252fe8fb19SBen Gras 		goto mem;
1262fe8fb19SBen Gras 	memcpy(pat->pat, pb, pat->patlen);
1272fe8fb19SBen Gras 						/* get skip delta */
1282fe8fb19SBen Gras 	if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
1292fe8fb19SBen Gras 		goto mem;
1302fe8fb19SBen Gras 	for (j = 0, d = pat->delta; j < 256; j++)
1312fe8fb19SBen Gras 		d[j] = pat->patlen;
1322fe8fb19SBen Gras 	for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
1332fe8fb19SBen Gras 		d[*pb] = pe - pb;
1342fe8fb19SBen Gras 
1352fe8fb19SBen Gras 	if (freq == NULL)			/* default freq table */
1362fe8fb19SBen Gras 		freq = freq_def;
1372fe8fb19SBen Gras 	r = 0;					/* get guard */
1382fe8fb19SBen Gras 	for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
1392fe8fb19SBen Gras 		if (freq[*pb] < freq[pat->pat[r]])
1402fe8fb19SBen Gras 			r = pb - pat->pat;
1412fe8fb19SBen Gras 	pat->rarec = pat->pat[r];
1422fe8fb19SBen Gras 	pat->rareoff = r - (pat->patlen - 1);
1432fe8fb19SBen Gras 
1442fe8fb19SBen Gras 						/* get md2 shift */
1452fe8fb19SBen Gras 	for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
1462fe8fb19SBen Gras 		if (*p == *pe)
1472fe8fb19SBen Gras 			break;
1482fe8fb19SBen Gras 
1492fe8fb19SBen Gras 	/* *p is first leftward reoccurrence of *pe */
1502fe8fb19SBen Gras 	pat->md2 = pe - p;
1512fe8fb19SBen Gras 	return (pat);
1522fe8fb19SBen Gras 
1532fe8fb19SBen Gras mem:	sv_errno = errno;
1542fe8fb19SBen Gras 	bm_free(pat);
1552fe8fb19SBen Gras 	errno = sv_errno;
1562fe8fb19SBen Gras 	return (NULL);
1572fe8fb19SBen Gras }
1582fe8fb19SBen Gras 
1592fe8fb19SBen Gras void
bm_free(bm_pat * pat)160f14fb602SLionel Sambuc bm_free(bm_pat *pat)
1612fe8fb19SBen Gras {
1622fe8fb19SBen Gras 
1632fe8fb19SBen Gras 	_DIAGASSERT(pat != NULL);
1642fe8fb19SBen Gras 
1652fe8fb19SBen Gras 	free(pat->pat);
1662fe8fb19SBen Gras 	free(pat->delta);
1672fe8fb19SBen Gras 	free(pat);
1682fe8fb19SBen Gras }
1692fe8fb19SBen Gras 
1702fe8fb19SBen Gras u_char *
bm_exec(bm_pat * pat,u_char * base,size_t n)171f14fb602SLionel Sambuc bm_exec(bm_pat *pat, u_char *base, size_t n)
1722fe8fb19SBen Gras {
1732fe8fb19SBen Gras 	u_char *e, *ep, *p, *q, *s;
1742fe8fb19SBen Gras 	size_t *d0, k, md2, n1, ro;
1752fe8fb19SBen Gras 	int rc;
1762fe8fb19SBen Gras 
1772fe8fb19SBen Gras 	_DIAGASSERT(pat != NULL);
1782fe8fb19SBen Gras 	_DIAGASSERT(base != NULL);
1792fe8fb19SBen Gras 
1802fe8fb19SBen Gras 	if (n == 0)
1812fe8fb19SBen Gras 		return (NULL);
1822fe8fb19SBen Gras 
1832fe8fb19SBen Gras 	d0 = pat->delta;
1842fe8fb19SBen Gras 	n1 = pat->patlen - 1;
1852fe8fb19SBen Gras 	md2 = pat->md2;
1862fe8fb19SBen Gras 	ro = pat->rareoff;
1872fe8fb19SBen Gras 	rc = pat->rarec;
1882fe8fb19SBen Gras 	ep = pat->pat + pat->patlen - 1;
1892fe8fb19SBen Gras 	s = base + (pat->patlen - 1);
1902fe8fb19SBen Gras 
1912fe8fb19SBen Gras 	/* fast loop up to n - 3 * patlen */
1922fe8fb19SBen Gras 	e = base + n - 3 * pat->patlen;
1932fe8fb19SBen Gras 	while (s < e) {
1942fe8fb19SBen Gras 		k = d0[*s];		/* ufast skip loop */
195*0a6a1f1dSLionel Sambuc 		while (k && s < e) {
1962fe8fb19SBen Gras 			k = d0[*(s += k)];
1972fe8fb19SBen Gras 			k = d0[*(s += k)];
1982fe8fb19SBen Gras 		}
1992fe8fb19SBen Gras 		if (s >= e)
2002fe8fb19SBen Gras 			break;
2012fe8fb19SBen Gras 		if (s[ro] != rc)	/* guard test */
2022fe8fb19SBen Gras 			goto mismatch1;
2032fe8fb19SBen Gras 					/* fwd match */
2042fe8fb19SBen Gras 		for (p = pat->pat, q = s - n1; p < ep;)
2052fe8fb19SBen Gras 			if (*q++ != *p++)
2062fe8fb19SBen Gras 				goto mismatch1;
2072fe8fb19SBen Gras 		return (s - n1);
2082fe8fb19SBen Gras 
2092fe8fb19SBen Gras mismatch1:	s += md2;		/* md2 shift */
2102fe8fb19SBen Gras 	}
2112fe8fb19SBen Gras 
2122fe8fb19SBen Gras 	/* slow loop up to end */
2132fe8fb19SBen Gras 	e = base + n;
2142fe8fb19SBen Gras 	while (s < e) {
2152fe8fb19SBen Gras 		s += d0[*s];		/* step */
2162fe8fb19SBen Gras 		if (s >= e)
2172fe8fb19SBen Gras 			break;
2182fe8fb19SBen Gras 		if (s[ro] != rc)	/* guard test */
2192fe8fb19SBen Gras 			goto mismatch2;
2202fe8fb19SBen Gras 					/* fwd match */
2212fe8fb19SBen Gras 		for (p = pat->pat, q = s - n1; p <= ep;)
2222fe8fb19SBen Gras 			if (*q++ != *p++)
2232fe8fb19SBen Gras 				goto mismatch2;
2242fe8fb19SBen Gras 		return (s - n1);
2252fe8fb19SBen Gras 
2262fe8fb19SBen Gras mismatch2:	s += md2;		/* md2 shift */
2272fe8fb19SBen Gras 	}
2282fe8fb19SBen Gras 
2292fe8fb19SBen Gras 	return (NULL);
2302fe8fb19SBen Gras }
231