1 static char rcsid[] = "$Id: boyer-moore.c 198071 2016-09-21 00:19:07Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #ifndef STANDALONE
7 #include "boyer-moore.h"
8 #endif
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #ifdef STANDALONE
14 #define CALLOC calloc
15 #define FREE free
16 #else
17 #include "mem.h"
18 #endif
19 #include "bool.h"
20 #include "complement.h"
21 #include "genome.h"
22
23
24 #define ASIZE 5 /* In genomic sequence: A, C, G, T, other */
25 #define MAX(a,b) ((a) > (b) ? (a) : (b))
26
27 #ifdef DEBUG
28 #define debug(x) x
29 #else
30 #define debug(x)
31 #endif
32
33 /* Successes */
34 #ifdef DEBUG1
35 #define debug1(x) x
36 #else
37 #define debug1(x)
38 #endif
39
40 /* PMAP good suffix calculation */
41 #ifdef DEBUG2
42 #define debug2(x) x
43 #else
44 #define debug2(x)
45 #endif
46
47 #ifdef PMAP
48 /* Same as in dynprog.c */
49 /* Handle only cases in iupac table in dynprog.c */
50 static bool matchtable[26][26] =
51 /* A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z */
52 {{1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0}, /* A */
53 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* B */
54 {0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0}, /* C */
55 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* D */
56 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* E */
57 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* F */
58 {0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0}, /* G */
59 {1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* H = [ACT] */
60 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* I */
61 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* J */
62 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* K */
63 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* L */
64 {1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,1,0}, /* M = [AC] */
65 {1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* N = [ACGT] */
66 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* O */
67 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* P */
68 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* Q */
69 {1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0}, /* R = [AG] */
70 {0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0}, /* S = [CG] */
71 {0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0}, /* T */
72 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* U */
73 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* V */
74 {1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0}, /* W = [AT] */
75 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* X */
76 {0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0}, /* Y = [CT] */
77 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}}; /* Z */
78
79 static bool inverse_matchtable[26][26] =
80 /* A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z */
81 {{0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not A = [CGT] */
82 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* B */
83 {1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not C = [AGT] */
84 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* D */
85 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* E */
86 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* F */
87 {1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not G = [ACT] */
88 {0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0}, /* not H = [G] */
89 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* I */
90 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* J */
91 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* K */
92 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* L */
93 {0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not M = [GT] */
94 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* not N = [] */
95 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* O */
96 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* P */
97 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* Q */
98 {0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0}, /* not R = [CT] */
99 {1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0}, /* not S = [AT] */
100 {1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,1,0}, /* not T = [ACG] */
101 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* U */
102 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* V */
103 {0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0}, /* not W = [CG] */
104 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* X */
105 {1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0}, /* not Y = [AG] */
106 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}}; /* Z */
107 #endif
108
109
110 static int
na_index(char c)111 na_index (char c) {
112 switch (c) {
113 case 'A': return 0;
114 case 'C': return 1;
115 case 'G': return 2;
116 case 'T': return 3;
117 default: return 4;
118 }
119 }
120
121 static void
precompute_bad_char_shift(int * bad_char_shift,char * query,int querylen)122 precompute_bad_char_shift (int *bad_char_shift, char *query, int querylen) {
123 int i;
124 char *p;
125
126 for (i = 0; i < ASIZE; i++) {
127 bad_char_shift[i] = querylen;
128 }
129 for (i = 0, p = query; i < querylen - 1; i++, p++) {
130 #ifdef PMAP
131 if (matchtable[*p-'A']['A'-'A'] == true) {
132 bad_char_shift[na_index('A')] = querylen - i - 1;
133 }
134 if (matchtable[*p-'A']['C'-'A'] == true) {
135 bad_char_shift[na_index('C')] = querylen - i - 1;
136 }
137 if (matchtable[*p-'A']['G'-'A'] == true) {
138 bad_char_shift[na_index('G')] = querylen - i - 1;
139 }
140 if (matchtable[*p-'A']['T'-'A'] == true) {
141 bad_char_shift[na_index('T')] = querylen - i - 1;
142 }
143 #else
144 bad_char_shift[na_index(*p)] = querylen - i - 1;
145 #endif
146 }
147
148 return;
149 }
150
151
152 #ifdef PMAP
153 static bool
suffixp(char * query,int querylen,int subseqlen,int j)154 suffixp (char *query, int querylen, int subseqlen, int j) {
155 int i;
156
157 debug2(printf("Entering suffixp at subseqlen = %d, j = %d\n",subseqlen,j));
158 for (i = querylen - 1; i >= querylen - 1 - subseqlen + 1 && j >= 0; i--, j--) {
159 if (matchtable[query[i]-'A'][query[j]-'A'] == false) {
160 debug2(printf("Returning false at i = %d\n",i));
161 return false;
162 }
163 }
164 if (j >= 0) {
165 if (inverse_matchtable[query[i]-'A'][query[j]-'A'] == false) {
166 debug2(printf("Returning false at i = %d\n",i));
167 return false;
168 }
169 }
170 debug2(printf("Returning true\n"));
171 return true;
172 }
173
174 static int
suffix_jump(char * query,int querylen,int subseqlen)175 suffix_jump (char *query, int querylen, int subseqlen) {
176 int j;
177
178 for (j = querylen - 1; j >= 0; j--) {
179 if (suffixp(query,querylen,subseqlen,j) == true) {
180 return querylen - 1 - j;
181 }
182 }
183 return querylen;
184 }
185
186 static void
precompute_good_suffix_shift(int * good_suffix_shift,char * query,int querylen)187 precompute_good_suffix_shift (int *good_suffix_shift, char *query, int querylen) {
188 int subseqlen;
189
190 for (subseqlen = 0; subseqlen < querylen; subseqlen++) {
191 good_suffix_shift[querylen - 1 - subseqlen] = suffix_jump(query,querylen,subseqlen);
192 debug2(printf("Assigning %d to position %d\n",good_suffix_shift[querylen - 1 - subseqlen],querylen-1-subseqlen));
193 }
194
195 return;
196 }
197
198 #else
199
200 static void
precompute_suffix(int * suffix,char * query,int querylen)201 precompute_suffix (int *suffix, char *query, int querylen) {
202 /* Note: initialization of f not necessary, since i < g in first iteration */
203 int f, g, i;
204
205 suffix[querylen - 1] = querylen;
206 g = querylen - 1;
207 for (i = querylen - 2; i >= 0; i--) {
208 if (i > g && suffix[i + querylen - 1 - f] < i - g) {
209 suffix[i] = suffix[i + querylen - 1 - f];
210 } else {
211 if (i < g) {
212 g = i;
213 }
214 f = i;
215 while (g >= 0 && query[g] == query[g + querylen - 1 - f]) {
216 g--;
217 }
218 suffix[i] = f - g;
219 }
220 }
221 debug(
222 printf("suffix:\n");
223 for (i = 0; i < querylen; i++) {
224 printf("%d %d\n",i,suffix[i]);
225 }
226 printf("\n");
227 );
228
229 return;
230 }
231
232 static void
precompute_good_suffix_shift(int * good_suffix_shift,char * query,int querylen)233 precompute_good_suffix_shift (int *good_suffix_shift, char *query, int querylen) {
234 int i, j, *suffix;
235
236 suffix = (int *) MALLOCA(querylen * sizeof(int));
237 precompute_suffix(suffix,query,querylen);
238
239 for (i = 0; i < querylen; i++) {
240 good_suffix_shift[i] = querylen;
241 }
242 j = 0;
243 for (i = querylen - 1; i >= -1; i--) {
244 if (i == -1 || suffix[i] == i + 1) {
245 for ( ; j < querylen - 1 - i; j++) {
246 if (good_suffix_shift[j] == querylen) {
247 good_suffix_shift[j] = querylen - 1 - i;
248 }
249 }
250 }
251 }
252 for (i = 0; i <= querylen - 2; i++) {
253 good_suffix_shift[querylen - 1 - suffix[i]] = querylen - 1 - i;
254 }
255
256 FREEA(suffix);
257
258 return;
259 }
260 #endif
261
262 static bool
query_okay(char * query,int querylen)263 query_okay (char *query, int querylen) {
264 int i;
265 char *p, c;
266
267 #ifdef PMAP
268 for (i = 0, p = query; i < querylen; i++, p++) {
269 c = *p;
270 if (c < 'A' || c > 'Y') {
271 return false;
272 }
273 }
274 #else
275 for (i = 0, p = query; i < querylen; i++, p++) {
276 c = *p;
277 if (c != 'A' && c != 'C' && c != 'G' && c != 'T') {
278 return false;
279 }
280 }
281 #endif
282 return true;
283 }
284
285 #ifdef STANDALONE
286 static void
287 #else
288 Intlist_T
289 #endif
BoyerMoore(char * query,int querylen,char * text,int textlen)290 BoyerMoore (char *query, int querylen, char *text, int textlen) {
291 #ifndef STANDALONE
292 Intlist_T hits = NULL;
293 #endif
294 int i, j, *good_suffix_shift;
295 int bad_char_shift[ASIZE];
296
297 if (query_okay(query,querylen)) {
298 good_suffix_shift = (int *) MALLOCA(querylen * sizeof(int));
299 precompute_good_suffix_shift(good_suffix_shift,query,querylen);
300 precompute_bad_char_shift(bad_char_shift,query,querylen);
301
302 debug(
303 printf("bad_char_shift:\n");
304 for (i = 0; i < ASIZE; i++) {
305 printf("%d %d\n",i,bad_char_shift[i]);
306 }
307 printf("\n");
308 printf("good_suffix_shift:\n");
309 for (i = 0; i < querylen; i++) {
310 printf("%d %d\n",i,good_suffix_shift[i]);
311 }
312 );
313
314 j = 0;
315 while (j <= textlen - querylen) {
316 #ifdef PMAP
317 for (i = querylen - 1; i >= 0 && matchtable[query[i]-'A'][text[i+j]-'A'] == true; i--) ;
318 #else
319 for (i = querylen - 1; i >= 0 && query[i] == text[i+j]; i--) ;
320 #endif
321 if (i < 0) {
322 #ifndef STANDALONE
323 hits = Intlist_push(hits,j);
324 #endif
325
326 debug1(printf("Success at %d\n",j));
327 debug(printf("Shift by %d (Gs[0])\n",good_suffix_shift[0]));
328 j += good_suffix_shift[0];
329 } else {
330 debug(
331 if (good_suffix_shift[i] >
332 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i) {
333 printf("Shift by %d (Gs[%d])\n",
334 good_suffix_shift[i],i);
335 } else {
336 printf("Shift by %d (Gs[%d] == Bc[%c] - %d + %d)\n",
337 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i,
338 i,text[i+j],querylen,i+1);
339 }
340 );
341 j += MAX(good_suffix_shift[i],
342 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i);
343 }
344 }
345
346 FREEA(good_suffix_shift);
347 }
348
349 #ifndef STANDALONE
350 return hits;
351 #endif
352 }
353
354
355 #if 0
356 static char complCode[128] = COMPLEMENT_LC;
357
358 static char
359 get_genomic_nt (char *g_alt, int genomicpos,
360 Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp) {
361 char c2, c2_alt;
362
363 if (watsonp) {
364 #if 0
365 if (genome) {
366 return Genome_get_char(genome,chroffset + genomicpos);
367 } else {
368 #endif
369 return Genome_get_char_blocks(&(*g_alt),chroffset + genomicpos);
370 #if 0
371 }
372 #endif
373
374 } else {
375 #if 0
376 if (genome) {
377 c2 = Genome_get_char(genome,chrhigh - genomicpos);
378 } else {
379 #endif
380 c2 = Genome_get_char_blocks(&c2_alt,chrhigh - genomicpos);
381 #if 0
382 }
383 #endif
384 *g_alt = complCode[(int) c2_alt];
385 return complCode[(int) c2];
386 }
387 }
388 #endif
389
390
391 Intlist_T
BoyerMoore_nt(char * query,int querylen,int textoffset,int textlen,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp)392 BoyerMoore_nt (char *query, int querylen, int textoffset, int textlen,
393 Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp) {
394 Intlist_T hits = NULL;
395 int i, j, *good_suffix_shift;
396 int bad_char_shift[ASIZE];
397 char *text, *text_alt;
398
399 debug(printf("Entered BoyerMoore_nt\n"));
400 if (query_okay(query,querylen)) {
401 good_suffix_shift = (int *) MALLOCA(querylen * sizeof(int));
402 text = (char *) MALLOC((textlen+querylen+1) * sizeof(char)); /* alloca could cause stack overflow */
403 text_alt = (char *) MALLOC((textlen+querylen+1) * sizeof(char)); /* alloca could cause stack overflow */
404
405 precompute_good_suffix_shift(good_suffix_shift,query,querylen);
406 precompute_bad_char_shift(bad_char_shift,query,querylen);
407 if (watsonp) {
408 Genome_get_segment_blocks_right(text,text_alt,/*left*/chroffset+textoffset,/*length*/textlen+querylen,chrhigh,/*revcomp*/false);
409 } else {
410 Genome_get_segment_blocks_left(text,text_alt,/*right*/chrhigh-textoffset+1,/*length*/textlen+querylen,chroffset,/*revcomp*/true);
411 }
412 if (text[0] == '\0') {
413 FREE(text_alt);
414 FREE(text);
415 FREEA(good_suffix_shift);
416 return hits;
417 }
418
419 /* This makes text[i+j] == get_genomic_nt(&g_alt,textoffset+i+j,chroffset,chrhigh,watsonp) */
420
421 debug(
422 printf("bad_char_shift:\n");
423 for (i = 0; i < ASIZE; i++) {
424 printf("%d %d\n",i,bad_char_shift[i]);
425 }
426 printf("\n");
427 printf("good_suffix_shift:\n");
428 for (i = 0; i < querylen; i++) {
429 printf("%d %d\n",i,good_suffix_shift[i]);
430 }
431 );
432
433 j = 0;
434 while (j <= textlen - querylen) {
435 #ifdef PMAP
436 for (i = querylen - 1; i >= 0 && matchtable[query[i]-'A'][text[i+j]-'A'] == true; i--) ;
437 #else
438 for (i = querylen - 1; i >= 0 && query[i] == text[i+j]; i--) ;
439 #endif
440 if (i < 0) {
441 hits = Intlist_push(hits,j);
442
443 debug1(printf("Success at %d\n",j));
444 debug(printf("Shift by %d (Gs[0])\n",good_suffix_shift[0]));
445 j += good_suffix_shift[0];
446 } else {
447 debug(
448 if (good_suffix_shift[i] >
449 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i) {
450 printf("Shift by %d (Gs[%d])\n",
451 good_suffix_shift[i],i);
452 } else {
453 printf("Shift by %d (Gs[%d] == Bc[%c] - %d + %d)\n",
454 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i,
455 i,text[i+j],querylen,i+1);
456 }
457 );
458 j += MAX(good_suffix_shift[i],
459 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i);
460 }
461 }
462
463 FREE(text_alt);
464 FREE(text);
465 FREEA(good_suffix_shift);
466 }
467
468 return hits;
469 }
470
471
472 void
BoyerMoore_bad_char_shift(int * bad_char_shift,char * query,int querylen)473 BoyerMoore_bad_char_shift (int *bad_char_shift, char *query, int querylen) {
474 #ifdef DEBUG
475 int i;
476 #endif
477
478 if (query_okay(query,querylen)) {
479 precompute_bad_char_shift(bad_char_shift,query,querylen);
480 } else {
481 fprintf(stderr,"Query cannot have bad characters\n");
482 abort();
483 }
484
485 debug(
486 printf("bad_char_shift:\n");
487 for (i = 0; i < ASIZE; i++) {
488 printf("%d %d\n",i,bad_char_shift[i]);
489 }
490 printf("\n");
491 );
492
493 return;
494 }
495
496
497 int
BoyerMoore_maxprefix(char * query,int querylen,char * text,int textlen,int * bad_char_shift)498 BoyerMoore_maxprefix (char *query, int querylen, char *text, int textlen,
499 int *bad_char_shift) {
500 int maxprefix = 0;
501 int i, j;
502
503 debug(printf("Query: %s\n",query));
504 debug(printf("Text: %s\n",text));
505
506 j = -querylen + 1;
507 while (j <= textlen - querylen) {
508 #ifdef PMAP
509 for (i = querylen - 1; i >= 0 && matchtable[query[i]-'A'][text[i+j]-'A'] == true; i--) ;
510 #else
511 for (i = querylen - 1; i+j >= 0 && query[i] == text[i+j]; i--) ;
512 #endif
513 if (i < 0 || i+j < 0) {
514 maxprefix = querylen+j;
515 debug1(printf("Success at %d (length %d)\n",j,querylen+j));
516 debug(printf("Shift by 1\n"));
517 j += 1;
518 } else {
519 debug1(printf("Failure at j=%d, i=%d\n",j,i));
520 debug(
521 printf("Shift by %d (Gs[%d] == Bc[%c] - %d + %d)\n",
522 bad_char_shift[na_index(text[i+j])],
523 i,text[i+j],querylen,i+1);
524 );
525 j += bad_char_shift[na_index(text[i+j])];
526 }
527 }
528
529 return maxprefix;
530 }
531
532
533
534 #if 0
535 static char *
536 string_reverse (char *original, int length) {
537 char *reverse;
538 int i, j;
539
540 reverse = (char *) CALLOC(length,sizeof(char));
541
542 for (i = 0, j = length-1; i < length; i++, j--) {
543 reverse[i] = original[j];
544 }
545 return reverse;
546 }
547 #endif
548
549
550 #ifdef STANDALONE
551 int
main(int argc,char * argv[])552 main (int argc, char *argv[]) {
553 char *query, *text;
554 char *query_rev, *text_rev;
555 int querylen, textlen;
556 int bad_char_shift[ASIZE];
557
558 #ifdef PMAP
559 int i, j;
560 for (i = 0; i < 26; i++) {
561 for (j = 0; j < 26; j++) {
562 if (matchtable[i][j] != matchtable[j][i]) {
563 fprintf(stderr,"Assymetry problem at %d,%d\n",i,j);
564 }
565 }
566 }
567 #endif
568
569 query = argv[1];
570 text = argv[2];
571 querylen = strlen(query);
572 textlen = strlen(text);
573
574 #if 0
575 BoyerMoore(query,querylen,text,textlen);
576 #else
577 /* Test of maxprefix */
578 query_rev = string_reverse(query,querylen);
579 text_rev = string_reverse(text,textlen);
580
581 printf("Query rev: %s\n",query_rev);
582 printf("Text rev: %s\n",text_rev);
583 BoyerMoore_bad_char_shift(bad_char_shift,query_rev,querylen);
584 BoyerMoore_maxprefix(query_rev,querylen,text_rev,textlen,bad_char_shift);
585 #endif
586
587 return 0;
588 }
589 #endif
590