1 /* $NetBSD: bm.c,v 1.10 2000/01/22 22:19:20 mycroft Exp $ */ 2 3 /*- 4 * Copyright (c) 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Andrew Hume of AT&T Bell Laboratories. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #if defined(LIBC_SCCS) && !defined(lint) 41 #if 0 42 static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94"; 43 #else 44 __RCSID("$NetBSD: bm.c,v 1.10 2000/01/22 22:19:20 mycroft Exp $"); 45 #endif 46 #endif /* LIBC_SCCS && not lint */ 47 48 #include "namespace.h" 49 #include <sys/types.h> 50 51 #include <assert.h> 52 #include <bm.h> 53 #include <errno.h> 54 #include <stdlib.h> 55 #include <string.h> 56 57 #ifdef __weak_alias 58 __weak_alias(bm_comp,_bm_comp) 59 __weak_alias(bm_exec,_bm_exec) 60 __weak_alias(bm_free,_bm_free) 61 #endif 62 63 /* 64 * XXX 65 * The default frequency table starts at 99 and counts down. The default 66 * table should probably be oriented toward text, and will necessarily be 67 * locale specific. This one is for English. It was derived from the 68 * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb 69 * of email and random text. Change it if you can find something better. 70 */ 71 static u_char const freq_def[256] = { 72 0, 0, 0, 0, 0, 0, 0, 0, 73 0, 77, 90, 0, 0, 0, 0, 0, 74 0, 0, 0, 0, 0, 0, 0, 0, 75 0, 0, 0, 0, 0, 0, 0, 0, 76 99, 28, 42, 27, 16, 14, 20, 51, 77 66, 65, 59, 24, 75, 76, 84, 56, 78 72, 74, 64, 55, 54, 47, 41, 37, 79 44, 61, 70, 43, 23, 53, 49, 22, 80 33, 58, 40, 46, 45, 57, 60, 26, 81 30, 63, 21, 12, 32, 50, 38, 39, 82 34, 11, 48, 67, 62, 35, 15, 29, 83 71, 18, 9, 17, 25, 13, 10, 52, 84 36, 95, 78, 86, 87, 98, 82, 80, 85 88, 94, 19, 68, 89, 83, 93, 96, 86 81, 7, 91, 92, 97, 85, 69, 73, 87 31, 79, 8, 5, 4, 6, 3, 0, 88 0, 0, 0, 0, 0, 0, 0, 0, 89 0, 0, 0, 0, 0, 0, 0, 0, 90 0, 0, 0, 0, 0, 0, 0, 0, 91 0, 0, 0, 0, 0, 0, 0, 0, 92 0, 0, 0, 0, 0, 0, 0, 0, 93 0, 0, 0, 0, 0, 0, 0, 0, 94 0, 0, 0, 0, 0, 0, 0, 0, 95 0, 0, 0, 0, 0, 0, 0, 0, 96 0, 0, 0, 0, 0, 0, 0, 0, 97 0, 0, 0, 0, 0, 0, 0, 0, 98 0, 0, 0, 0, 0, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 0, 0, 0, 0, 0, 0, 0, 104 }; 105 106 bm_pat * 107 bm_comp(pb, len, freq) 108 u_char const *pb; 109 size_t len; 110 u_char const *freq; 111 { 112 u_char const *pe, *p; 113 size_t *d, r; 114 int j; 115 int sv_errno; 116 bm_pat *pat; 117 118 _DIAGASSERT(pb != NULL); 119 /* freq may be NULL */ 120 121 if (len == 0) { 122 errno = EINVAL; 123 return (NULL); 124 } 125 if ((pat = malloc(sizeof(*pat))) == NULL) 126 return (NULL); 127 pat->pat = NULL; 128 pat->delta = NULL; 129 130 pat->patlen = len; /* copy pattern */ 131 if ((pat->pat = malloc(pat->patlen)) == NULL) 132 goto mem; 133 memcpy(pat->pat, pb, pat->patlen); 134 /* get skip delta */ 135 if ((pat->delta = malloc(256 * sizeof(*d))) == NULL) 136 goto mem; 137 for (j = 0, d = pat->delta; j < 256; j++) 138 d[j] = pat->patlen; 139 for (pe = pb + pat->patlen - 1; pb <= pe; pb++) 140 d[*pb] = pe - pb; 141 142 if (freq == NULL) /* default freq table */ 143 freq = freq_def; 144 r = 0; /* get guard */ 145 for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++) 146 if (freq[*pb] < freq[pat->pat[r]]) 147 r = pb - pat->pat; 148 pat->rarec = pat->pat[r]; 149 pat->rareoff = r - (pat->patlen - 1); 150 151 /* get md2 shift */ 152 for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--) 153 if (*p == *pe) 154 break; 155 156 /* *p is first leftward reoccurrence of *pe */ 157 pat->md2 = pe - p; 158 return (pat); 159 160 mem: sv_errno = errno; 161 bm_free(pat); 162 errno = sv_errno; 163 return (NULL); 164 } 165 166 void 167 bm_free(pat) 168 bm_pat *pat; 169 { 170 171 _DIAGASSERT(pat != NULL); 172 173 if (pat->pat != NULL) 174 free(pat->pat); 175 if (pat->delta != NULL) 176 free(pat->delta); 177 free(pat); 178 } 179 180 u_char * 181 bm_exec(pat, base, n) 182 bm_pat *pat; 183 u_char *base; 184 size_t n; 185 { 186 u_char *e, *ep, *p, *q, *s; 187 size_t *d0, k, md2, n1, ro; 188 int rc; 189 190 _DIAGASSERT(pat != NULL); 191 _DIAGASSERT(base != NULL); 192 193 if (n == 0) 194 return (NULL); 195 196 d0 = pat->delta; 197 n1 = pat->patlen - 1; 198 md2 = pat->md2; 199 ro = pat->rareoff; 200 rc = pat->rarec; 201 ep = pat->pat + pat->patlen - 1; 202 s = base + (pat->patlen - 1); 203 204 /* fast loop up to n - 3 * patlen */ 205 e = base + n - 3 * pat->patlen; 206 while (s < e) { 207 k = d0[*s]; /* ufast skip loop */ 208 while (k) { 209 k = d0[*(s += k)]; 210 k = d0[*(s += k)]; 211 } 212 if (s >= e) 213 break; 214 if (s[ro] != rc) /* guard test */ 215 goto mismatch1; 216 /* fwd match */ 217 for (p = pat->pat, q = s - n1; p < ep;) 218 if (*q++ != *p++) 219 goto mismatch1; 220 return (s - n1); 221 222 mismatch1: s += md2; /* md2 shift */ 223 } 224 225 /* slow loop up to end */ 226 e = base + n; 227 while (s < e) { 228 s += d0[*s]; /* step */ 229 if (s >= e) 230 break; 231 if (s[ro] != rc) /* guard test */ 232 goto mismatch2; 233 /* fwd match */ 234 for (p = pat->pat, q = s - n1; p <= ep;) 235 if (*q++ != *p++) 236 goto mismatch2; 237 return (s - n1); 238 239 mismatch2: s += md2; /* md2 shift */ 240 } 241 242 return (NULL); 243 } 244