xref: /netbsd/lib/libc/string/bm.c (revision bf9ec67e)
1 /*	$NetBSD: bm.c,v 1.10 2000/01/22 22:19:20 mycroft Exp $	*/
2 
3 /*-
4  * Copyright (c) 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Andrew Hume of AT&T Bell Laboratories.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 #if defined(LIBC_SCCS) && !defined(lint)
41 #if 0
42 static char sccsid[] = "@(#)bm.c	8.7 (Berkeley) 6/21/94";
43 #else
44 __RCSID("$NetBSD: bm.c,v 1.10 2000/01/22 22:19:20 mycroft Exp $");
45 #endif
46 #endif /* LIBC_SCCS && not lint */
47 
48 #include "namespace.h"
49 #include <sys/types.h>
50 
51 #include <assert.h>
52 #include <bm.h>
53 #include <errno.h>
54 #include <stdlib.h>
55 #include <string.h>
56 
57 #ifdef __weak_alias
58 __weak_alias(bm_comp,_bm_comp)
59 __weak_alias(bm_exec,_bm_exec)
60 __weak_alias(bm_free,_bm_free)
61 #endif
62 
63 /*
64  * XXX
65  * The default frequency table starts at 99 and counts down.  The default
66  * table should probably be oriented toward text, and will necessarily be
67  * locale specific.  This one is for English.  It was derived from the
68  * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
69  * of email and random text.  Change it if you can find something better.
70  */
71 static u_char const freq_def[256] = {
72 	 0,  0,  0,  0,  0,  0,  0,  0,
73 	 0, 77, 90,  0,  0,  0,  0,  0,
74 	 0,  0,  0,  0,  0,  0,  0,  0,
75 	 0,  0,  0,  0,  0,  0,  0,  0,
76 	99, 28, 42, 27, 16, 14, 20, 51,
77 	66, 65, 59, 24, 75, 76, 84, 56,
78 	72, 74, 64, 55, 54, 47, 41, 37,
79 	44, 61, 70, 43, 23, 53, 49, 22,
80 	33, 58, 40, 46, 45, 57, 60, 26,
81 	30, 63, 21, 12, 32, 50, 38, 39,
82 	34, 11, 48, 67, 62, 35, 15, 29,
83 	71, 18,  9, 17, 25, 13, 10, 52,
84 	36, 95, 78, 86, 87, 98, 82, 80,
85 	88, 94, 19, 68, 89, 83, 93, 96,
86 	81,  7, 91, 92, 97, 85, 69, 73,
87 	31, 79,  8,  5,  4,  6,  3,  0,
88 	 0,  0,  0,  0,  0,  0,  0,  0,
89 	 0,  0,  0,  0,  0,  0,  0,  0,
90 	 0,  0,  0,  0,  0,  0,  0,  0,
91 	 0,  0,  0,  0,  0,  0,  0,  0,
92 	 0,  0,  0,  0,  0,  0,  0,  0,
93 	 0,  0,  0,  0,  0,  0,  0,  0,
94 	 0,  0,  0,  0,  0,  0,  0,  0,
95 	 0,  0,  0,  0,  0,  0,  0,  0,
96 	 0,  0,  0,  0,  0,  0,  0,  0,
97 	 0,  0,  0,  0,  0,  0,  0,  0,
98 	 0,  0,  0,  0,  0,  0,  0,  0,
99 	 0,  0,  0,  0,  0,  0,  0,  0,
100 	 0,  0,  0,  0,  0,  0,  0,  0,
101 	 0,  0,  0,  0,  0,  0,  0,  0,
102 	 0,  0,  0,  0,  0,  0,  0,  0,
103 	 0,  0,  0,  0,  0,  0,  0,  0,
104 };
105 
106 bm_pat *
107 bm_comp(pb, len, freq)
108 	u_char const *pb;
109 	size_t len;
110 	u_char const *freq;
111 {
112 	u_char const *pe, *p;
113 	size_t *d, r;
114 	int j;
115 	int sv_errno;
116 	bm_pat *pat;
117 
118 	_DIAGASSERT(pb != NULL);
119 	/* freq may be NULL */
120 
121 	if (len == 0) {
122 		errno = EINVAL;
123 		return (NULL);
124 	}
125 	if ((pat = malloc(sizeof(*pat))) == NULL)
126 		return (NULL);
127 	pat->pat = NULL;
128 	pat->delta = NULL;
129 
130 	pat->patlen = len;			/* copy pattern */
131 	if ((pat->pat = malloc(pat->patlen)) == NULL)
132 		goto mem;
133 	memcpy(pat->pat, pb, pat->patlen);
134 						/* get skip delta */
135 	if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
136 		goto mem;
137 	for (j = 0, d = pat->delta; j < 256; j++)
138 		d[j] = pat->patlen;
139 	for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
140 		d[*pb] = pe - pb;
141 
142 	if (freq == NULL)			/* default freq table */
143 		freq = freq_def;
144 	r = 0;					/* get guard */
145 	for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
146 		if (freq[*pb] < freq[pat->pat[r]])
147 			r = pb - pat->pat;
148 	pat->rarec = pat->pat[r];
149 	pat->rareoff = r - (pat->patlen - 1);
150 
151 						/* get md2 shift */
152 	for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
153 		if (*p == *pe)
154 			break;
155 
156 	/* *p is first leftward reoccurrence of *pe */
157 	pat->md2 = pe - p;
158 	return (pat);
159 
160 mem:	sv_errno = errno;
161 	bm_free(pat);
162 	errno = sv_errno;
163 	return (NULL);
164 }
165 
166 void
167 bm_free(pat)
168 	bm_pat *pat;
169 {
170 
171 	_DIAGASSERT(pat != NULL);
172 
173 	if (pat->pat != NULL)
174 		free(pat->pat);
175 	if (pat->delta != NULL)
176 		free(pat->delta);
177 	free(pat);
178 }
179 
180 u_char *
181 bm_exec(pat, base, n)
182 	bm_pat *pat;
183 	u_char *base;
184 	size_t n;
185 {
186 	u_char *e, *ep, *p, *q, *s;
187 	size_t *d0, k, md2, n1, ro;
188 	int rc;
189 
190 	_DIAGASSERT(pat != NULL);
191 	_DIAGASSERT(base != NULL);
192 
193 	if (n == 0)
194 		return (NULL);
195 
196 	d0 = pat->delta;
197 	n1 = pat->patlen - 1;
198 	md2 = pat->md2;
199 	ro = pat->rareoff;
200 	rc = pat->rarec;
201 	ep = pat->pat + pat->patlen - 1;
202 	s = base + (pat->patlen - 1);
203 
204 	/* fast loop up to n - 3 * patlen */
205 	e = base + n - 3 * pat->patlen;
206 	while (s < e) {
207 		k = d0[*s];		/* ufast skip loop */
208 		while (k) {
209 			k = d0[*(s += k)];
210 			k = d0[*(s += k)];
211 		}
212 		if (s >= e)
213 			break;
214 		if (s[ro] != rc)	/* guard test */
215 			goto mismatch1;
216 					/* fwd match */
217 		for (p = pat->pat, q = s - n1; p < ep;)
218 			if (*q++ != *p++)
219 				goto mismatch1;
220 		return (s - n1);
221 
222 mismatch1:	s += md2;		/* md2 shift */
223 	}
224 
225 	/* slow loop up to end */
226 	e = base + n;
227 	while (s < e) {
228 		s += d0[*s];		/* step */
229 		if (s >= e)
230 			break;
231 		if (s[ro] != rc)	/* guard test */
232 			goto mismatch2;
233 					/* fwd match */
234 		for (p = pat->pat, q = s - n1; p <= ep;)
235 			if (*q++ != *p++)
236 				goto mismatch2;
237 		return (s - n1);
238 
239 mismatch2:	s += md2;		/* md2 shift */
240 	}
241 
242 	return (NULL);
243 }
244