1 /*
2 mxbmse -- Fast Boyer Moore Search Algorithm (Version 0.9)
3
4 The implementation is reentrant and thread safe. While the
5 general ideas behind the Boyer Moore algorithm are in the public
6 domain, this implementation falls under the following copyright:
7
8 Copyright (c) 1997-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
9 Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com
10
11 All Rights Reserved
12
13 See the documentation for copying information or contact the author
14 (mal@lemburg.com).
15 */
16
17 /* to turn on the debugging printfs (DPRINTF):*/
18 /* #define MAL_DEBUG */
19
20 /* Logging file used by debugging facility */
21 #ifndef MAL_DEBUG_OUTPUTFILE
22 # define MAL_DEBUG_OUTPUTFILE "mxTextSearch.log"
23 #endif
24
25 #ifdef MAL_DEBUG_WITH_PYTHON
26 # include "mx.h"
27 #endif
28
29 #include "mxstdlib.h"
30 #include "mxbmse.h"
31
32 /* --- Fast Boyer-Moore Implementation (8-bit) ---------------------------- */
33
bm_init(char * match,int match_len)34 mxbmse_data *bm_init(char *match,
35 int match_len)
36 {
37 mxbmse_data *c;
38 int i;
39 BM_SHIFT_TYPE *shift;
40 char *m;
41
42 c = newstruct(mxbmse_data);
43 c->match = match;
44 c->match_len = match_len;
45 c->eom = match + match_len - 1;
46
47 /* Length 1 matching does not use a shift table */
48 if (match_len == 1)
49 return c;
50
51 /* Init shift table */
52 for ( shift = c->shift, i = 256; i > 0; i--, shift++ )
53 *shift = (BM_SHIFT_TYPE) match_len;
54
55 DPRINTF("shift table for match='%s'\n",match);
56 for ( shift = c->shift, m = match, i = match_len - 1;
57 i >= 0;
58 i--, m++ ) {
59 shift[ (unsigned char) *m ] = (BM_SHIFT_TYPE) i;
60 DPRINTF(" char = '%c' shift = %i\n", *m, i);
61 }
62
63 return c;
64 }
65
bm_free(mxbmse_data * c)66 void bm_free(mxbmse_data *c)
67 {
68 if (c)
69 free(c);
70 }
71
bm_search(mxbmse_data * c,char * text,int start,int text_len)72 int bm_search(mxbmse_data *c,
73 char *text,
74 int start,
75 int text_len)
76 {
77 register char *pt;
78 register char *eot = text + text_len;
79
80 /* Error check */
81 if (c == NULL)
82 return -1;
83
84 /* Init text pointer */
85 pt = text + start + c->match_len - 1;
86
87 DPRINTF("Init : %2i %20.20s \t text: %2i %20.20s\n",
88 c->match_len,c->match,start,text+start);
89
90 if (c->match_len > 1)
91 for (;;) {
92 register char *pm;
93
94 pm = c->eom;
95
96 for (;pt < eot && *pt != *pm;
97 pt += c->shift[(unsigned char) *pt]);
98
99 if (pt >= eot)
100 break;
101
102 /* First char matches.. what about the others ? */
103 {
104 register int im = c->match_len;
105
106 do {
107 DPRINTF("=match: %2i '%20.20s' \t text: '%20.20s'\n",
108 im,pm,pt);
109 if (--im == 0)
110 /* Match */
111 return pt - text + c->match_len;
112 pt--;
113 pm--;
114 } while (*pt == *pm);
115
116 /* Mismatch after match: use shift-table */
117 {
118 register int a,b;
119
120 a = c->shift[(unsigned char) *pt];
121 b = c->match_len - im + 1;
122 DPRINTF("!match: %2i '%20.20s' \t text: '%20.20s' "
123 "(sh=%i)\n",
124 im,pm,pt,max(a,b));
125 pt += (a > b) ? a : b;
126 }
127 }
128
129 }
130
131 /* Special case: matching string has length 1 */
132 else {
133 register char m = *c->eom;
134
135 for (;pt < eot; pt++)
136 if (*pt == m)
137 /* Match */
138 return pt - text + 1;
139 }
140
141 return start; /* no match */
142 }
143
144 /* bm search using the translate table -- 45% slower */
145
bm_tr_search(mxbmse_data * c,char * text,int start,int text_len,char * tr)146 int bm_tr_search(mxbmse_data *c,
147 char *text,
148 int start,
149 int text_len,
150 char *tr)
151 {
152 register char *pt;
153 register char *eot = text + text_len;
154
155 /* Error check */
156 if (c == NULL)
157 return -1;
158
159 /* Init text pointer */
160 pt = text + start + c->match_len - 1;
161
162 DPRINTF("Init : %2i '%20.20s' \t text: %2i '%20.20s'\n",
163 c->match_len,c->match,start,text+start);
164
165 if (c->match_len > 1)
166 for (;;) {
167 register char *pm;
168
169 pm = c->eom;
170
171 for (;pt < eot && tr[(unsigned char) *pt] != *pm;
172 pt += c->shift[(unsigned char) tr[(unsigned char) *pt]]);
173
174 if (pt >= eot)
175 break;
176
177 /* First char matches.. what about the others ? */
178 {
179 register int im = c->match_len;
180
181 do {
182 DPRINTF("=match: %2i '%20.20s' \t text: '%20.20s'\n",
183 im,pm,pt);
184 if (--im == 0)
185 /* Match */
186 return pt - text + c->match_len;
187 pt--;
188 pm--;
189 } while (tr[(unsigned char) *pt] == *pm);
190
191 /* Mismatch after match: use shift-table */
192 {
193 register int a,b;
194
195 a = c->shift[(unsigned char) tr[(unsigned char) *pt]];
196 b = c->match_len - im + 1;
197 DPRINTF("!match: %2i '%20.20s' \t text: '%20.20s' "
198 "(sh=%i)\n",
199 im,pm,pt,max(a,b));
200 pt += (a > b)?a:b;
201 }
202 }
203
204 }
205
206 /* Special case: matching string has length 1 */
207 else {
208 register char m = *c->eom;
209
210 for (;pt < eot; pt++)
211 if (*pt == m)
212 /* Match */
213 return pt - text + 1;
214 }
215
216 return start; /* no match */
217 }
218
219