1 static char rcsid[] = "$Id: boyer-moore.c 198071 2016-09-21 00:19:07Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #ifndef STANDALONE
7 #include "boyer-moore.h"
8 #endif
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #ifdef STANDALONE
14 #define CALLOC calloc
15 #define FREE free
16 #else
17 #include "mem.h"
18 #endif
19 #include "bool.h"
20 #include "complement.h"
21 #include "genome.h"
22 
23 
24 #define ASIZE 5			/* In genomic sequence: A, C, G, T, other */
25 #define MAX(a,b) ((a) > (b) ? (a) : (b))
26 
27 #ifdef DEBUG
28 #define debug(x) x
29 #else
30 #define debug(x)
31 #endif
32 
33 /* Successes */
34 #ifdef DEBUG1
35 #define debug1(x) x
36 #else
37 #define debug1(x)
38 #endif
39 
40 /* PMAP good suffix calculation */
41 #ifdef DEBUG2
42 #define debug2(x) x
43 #else
44 #define debug2(x)
45 #endif
46 
47 #ifdef PMAP
48 /* Same as in dynprog.c */
49 /* Handle only cases in iupac table in dynprog.c */
50 static bool matchtable[26][26] =
51 /*  A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z */
52   {{1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0}, /* A */
53    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* B */
54    {0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0}, /* C */
55    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* D */
56    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* E */
57    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* F */
58    {0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0}, /* G */
59    {1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* H = [ACT] */
60    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* I */
61    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* J */
62    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* K */
63    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* L */
64    {1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,1,0}, /* M = [AC] */
65    {1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* N = [ACGT] */
66    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* O */
67    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* P */
68    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* Q */
69    {1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0}, /* R = [AG] */
70    {0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0}, /* S = [CG] */
71    {0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,0}, /* T */
72    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* U */
73    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* V */
74    {1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0}, /* W = [AT] */
75    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* X */
76    {0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0}, /* Y = [CT] */
77    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}}; /* Z */
78 
79 static bool inverse_matchtable[26][26] =
80 /*  A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z */
81   {{0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not A = [CGT] */
82    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* B */
83    {1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not C = [AGT] */
84    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* D */
85    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* E */
86    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* F */
87    {1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not G = [ACT] */
88    {0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0}, /* not H = [G] */
89    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* I */
90    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* J */
91    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* K */
92    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* L */
93    {0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,1,0}, /* not M = [GT] */
94    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* not N = [] */
95    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* O */
96    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* P */
97    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* Q */
98    {0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,0}, /* not R = [CT] */
99    {1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0,0,1,0,1,0}, /* not S = [AT] */
100    {1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,1,0}, /* not T = [ACG] */
101    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* U */
102    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* V */
103    {0,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,1,0}, /* not W = [CG] */
104    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, /* X */
105    {1,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,0,0}, /* not Y = [AG] */
106    {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}}; /* Z */
107 #endif
108 
109 
110 static int
na_index(char c)111 na_index (char c) {
112   switch (c) {
113   case 'A': return 0;
114   case 'C': return 1;
115   case 'G': return 2;
116   case 'T': return 3;
117   default: return 4;
118   }
119 }
120 
121 static void
precompute_bad_char_shift(int * bad_char_shift,char * query,int querylen)122 precompute_bad_char_shift (int *bad_char_shift, char *query, int querylen) {
123   int i;
124   char *p;
125 
126   for (i = 0; i < ASIZE; i++) {
127     bad_char_shift[i] = querylen;
128   }
129   for (i = 0, p = query; i < querylen - 1; i++, p++) {
130 #ifdef PMAP
131     if (matchtable[*p-'A']['A'-'A'] == true) {
132       bad_char_shift[na_index('A')] = querylen - i - 1;
133     }
134     if (matchtable[*p-'A']['C'-'A'] == true) {
135       bad_char_shift[na_index('C')] = querylen - i - 1;
136     }
137     if (matchtable[*p-'A']['G'-'A'] == true) {
138       bad_char_shift[na_index('G')] = querylen - i - 1;
139     }
140     if (matchtable[*p-'A']['T'-'A'] == true) {
141       bad_char_shift[na_index('T')] = querylen - i - 1;
142     }
143 #else
144     bad_char_shift[na_index(*p)] = querylen - i - 1;
145 #endif
146   }
147 
148   return;
149 }
150 
151 
152 #ifdef PMAP
153 static bool
suffixp(char * query,int querylen,int subseqlen,int j)154 suffixp (char *query, int querylen, int subseqlen, int j) {
155   int i;
156 
157   debug2(printf("Entering suffixp at subseqlen = %d, j = %d\n",subseqlen,j));
158   for (i = querylen - 1; i >= querylen - 1 - subseqlen + 1 && j >= 0; i--, j--) {
159     if (matchtable[query[i]-'A'][query[j]-'A'] == false) {
160       debug2(printf("Returning false at i = %d\n",i));
161       return false;
162     }
163   }
164   if (j >= 0) {
165     if (inverse_matchtable[query[i]-'A'][query[j]-'A'] == false) {
166       debug2(printf("Returning false at i = %d\n",i));
167       return false;
168     }
169   }
170   debug2(printf("Returning true\n"));
171   return true;
172 }
173 
174 static int
suffix_jump(char * query,int querylen,int subseqlen)175 suffix_jump (char *query, int querylen, int subseqlen) {
176   int j;
177 
178   for (j = querylen - 1; j >= 0; j--) {
179     if (suffixp(query,querylen,subseqlen,j) == true) {
180       return querylen - 1 - j;
181     }
182   }
183   return querylen;
184 }
185 
186 static void
precompute_good_suffix_shift(int * good_suffix_shift,char * query,int querylen)187 precompute_good_suffix_shift (int *good_suffix_shift, char *query, int querylen) {
188   int subseqlen;
189 
190   for (subseqlen = 0; subseqlen < querylen; subseqlen++) {
191     good_suffix_shift[querylen - 1 - subseqlen] = suffix_jump(query,querylen,subseqlen);
192     debug2(printf("Assigning %d to position %d\n",good_suffix_shift[querylen - 1 - subseqlen],querylen-1-subseqlen));
193   }
194 
195   return;
196 }
197 
198 #else
199 
200 static void
precompute_suffix(int * suffix,char * query,int querylen)201 precompute_suffix (int *suffix, char *query, int querylen) {
202   /* Note: initialization of f not necessary, since i < g in first iteration */
203   int f, g, i;
204 
205   suffix[querylen - 1] = querylen;
206   g = querylen - 1;
207   for (i = querylen - 2; i >= 0; i--) {
208     if (i > g && suffix[i + querylen - 1 - f] < i - g) {
209       suffix[i] = suffix[i + querylen - 1 - f];
210     } else {
211       if (i < g) {
212 	g = i;
213       }
214       f = i;
215       while (g >= 0 && query[g] == query[g + querylen - 1 - f]) {
216 	g--;
217       }
218       suffix[i] = f - g;
219     }
220   }
221   debug(
222 	printf("suffix:\n");
223 	for (i = 0; i < querylen; i++) {
224 	  printf("%d %d\n",i,suffix[i]);
225 	}
226 	printf("\n");
227 	);
228 
229   return;
230 }
231 
232 static void
precompute_good_suffix_shift(int * good_suffix_shift,char * query,int querylen)233 precompute_good_suffix_shift (int *good_suffix_shift, char *query, int querylen) {
234   int i, j, *suffix;
235 
236   suffix = (int *) MALLOCA(querylen * sizeof(int));
237   precompute_suffix(suffix,query,querylen);
238 
239   for (i = 0; i < querylen; i++) {
240     good_suffix_shift[i] = querylen;
241   }
242   j = 0;
243   for (i = querylen - 1; i >= -1; i--) {
244     if (i == -1 || suffix[i] == i + 1) {
245       for ( ; j < querylen - 1 - i; j++) {
246 	if (good_suffix_shift[j] == querylen) {
247 	  good_suffix_shift[j] = querylen - 1 - i;
248 	}
249       }
250     }
251   }
252   for (i = 0; i <= querylen - 2; i++) {
253     good_suffix_shift[querylen - 1 - suffix[i]] = querylen - 1 - i;
254   }
255 
256   FREEA(suffix);
257 
258   return;
259 }
260 #endif
261 
262 static bool
query_okay(char * query,int querylen)263 query_okay (char *query, int querylen) {
264   int i;
265   char *p, c;
266 
267 #ifdef PMAP
268   for (i = 0, p = query; i < querylen; i++, p++) {
269     c = *p;
270     if (c < 'A' || c > 'Y') {
271       return false;
272     }
273   }
274 #else
275   for (i = 0, p = query; i < querylen; i++, p++) {
276     c = *p;
277     if (c != 'A' && c != 'C' && c != 'G' && c != 'T') {
278       return false;
279     }
280   }
281 #endif
282   return true;
283 }
284 
285 #ifdef STANDALONE
286 static void
287 #else
288 Intlist_T
289 #endif
BoyerMoore(char * query,int querylen,char * text,int textlen)290 BoyerMoore (char *query, int querylen, char *text, int textlen) {
291 #ifndef STANDALONE
292   Intlist_T hits = NULL;
293 #endif
294   int i, j, *good_suffix_shift;
295   int bad_char_shift[ASIZE];
296 
297   if (query_okay(query,querylen)) {
298     good_suffix_shift = (int *) MALLOCA(querylen * sizeof(int));
299     precompute_good_suffix_shift(good_suffix_shift,query,querylen);
300     precompute_bad_char_shift(bad_char_shift,query,querylen);
301 
302     debug(
303 	  printf("bad_char_shift:\n");
304 	  for (i = 0; i < ASIZE; i++) {
305 	    printf("%d %d\n",i,bad_char_shift[i]);
306 	  }
307 	  printf("\n");
308 	  printf("good_suffix_shift:\n");
309 	  for (i = 0; i < querylen; i++) {
310 	    printf("%d %d\n",i,good_suffix_shift[i]);
311 	  }
312 	  );
313 
314     j = 0;
315     while (j <= textlen - querylen) {
316 #ifdef PMAP
317       for (i = querylen - 1; i >= 0 && matchtable[query[i]-'A'][text[i+j]-'A'] == true; i--) ;
318 #else
319       for (i = querylen - 1; i >= 0 && query[i] == text[i+j]; i--) ;
320 #endif
321       if (i < 0) {
322 #ifndef STANDALONE
323 	hits = Intlist_push(hits,j);
324 #endif
325 
326 	debug1(printf("Success at %d\n",j));
327 	debug(printf("Shift by %d (Gs[0])\n",good_suffix_shift[0]));
328 	j += good_suffix_shift[0];
329       } else {
330 	debug(
331 	      if (good_suffix_shift[i] >
332 		  bad_char_shift[na_index(text[i+j])] - querylen + 1 + i) {
333 		printf("Shift by %d (Gs[%d])\n",
334 		       good_suffix_shift[i],i);
335 	      } else {
336 		printf("Shift by %d (Gs[%d] == Bc[%c] - %d + %d)\n",
337 		       bad_char_shift[na_index(text[i+j])] - querylen + 1 + i,
338 		       i,text[i+j],querylen,i+1);
339 	      }
340 	      );
341 	j += MAX(good_suffix_shift[i],
342 		 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i);
343       }
344     }
345 
346     FREEA(good_suffix_shift);
347   }
348 
349 #ifndef STANDALONE
350   return hits;
351 #endif
352 }
353 
354 
355 #if 0
356 static char complCode[128] = COMPLEMENT_LC;
357 
358 static char
359 get_genomic_nt (char *g_alt, int genomicpos,
360 		Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp) {
361   char c2, c2_alt;
362 
363   if (watsonp) {
364 #if 0
365     if (genome) {
366       return Genome_get_char(genome,chroffset + genomicpos);
367     } else {
368 #endif
369       return Genome_get_char_blocks(&(*g_alt),chroffset + genomicpos);
370 #if 0
371     }
372 #endif
373 
374   } else {
375 #if 0
376     if (genome) {
377       c2 = Genome_get_char(genome,chrhigh - genomicpos);
378     } else {
379 #endif
380       c2 = Genome_get_char_blocks(&c2_alt,chrhigh - genomicpos);
381 #if 0
382     }
383 #endif
384     *g_alt = complCode[(int) c2_alt];
385     return complCode[(int) c2];
386   }
387 }
388 #endif
389 
390 
391 Intlist_T
BoyerMoore_nt(char * query,int querylen,int textoffset,int textlen,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp)392 BoyerMoore_nt (char *query, int querylen, int textoffset, int textlen,
393 	       Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp) {
394   Intlist_T hits = NULL;
395   int i, j, *good_suffix_shift;
396   int bad_char_shift[ASIZE];
397   char *text, *text_alt;
398 
399   debug(printf("Entered BoyerMoore_nt\n"));
400   if (query_okay(query,querylen)) {
401     good_suffix_shift = (int *) MALLOCA(querylen * sizeof(int));
402     text = (char *) MALLOC((textlen+querylen+1) * sizeof(char)); /* alloca could cause stack overflow */
403     text_alt = (char *) MALLOC((textlen+querylen+1) * sizeof(char)); /* alloca could cause stack overflow */
404 
405     precompute_good_suffix_shift(good_suffix_shift,query,querylen);
406     precompute_bad_char_shift(bad_char_shift,query,querylen);
407     if (watsonp) {
408       Genome_get_segment_blocks_right(text,text_alt,/*left*/chroffset+textoffset,/*length*/textlen+querylen,chrhigh,/*revcomp*/false);
409     } else {
410       Genome_get_segment_blocks_left(text,text_alt,/*right*/chrhigh-textoffset+1,/*length*/textlen+querylen,chroffset,/*revcomp*/true);
411     }
412     if (text[0] == '\0') {
413       FREE(text_alt);
414       FREE(text);
415       FREEA(good_suffix_shift);
416       return hits;
417     }
418 
419     /* This makes text[i+j] == get_genomic_nt(&g_alt,textoffset+i+j,chroffset,chrhigh,watsonp) */
420 
421     debug(
422 	  printf("bad_char_shift:\n");
423 	  for (i = 0; i < ASIZE; i++) {
424 	    printf("%d %d\n",i,bad_char_shift[i]);
425 	  }
426 	  printf("\n");
427 	  printf("good_suffix_shift:\n");
428 	  for (i = 0; i < querylen; i++) {
429 	    printf("%d %d\n",i,good_suffix_shift[i]);
430 	  }
431 	  );
432 
433     j = 0;
434     while (j <= textlen - querylen) {
435 #ifdef PMAP
436       for (i = querylen - 1; i >= 0 && matchtable[query[i]-'A'][text[i+j]-'A'] == true; i--) ;
437 #else
438       for (i = querylen - 1; i >= 0 && query[i] == text[i+j]; i--) ;
439 #endif
440       if (i < 0) {
441 	hits = Intlist_push(hits,j);
442 
443 	debug1(printf("Success at %d\n",j));
444 	debug(printf("Shift by %d (Gs[0])\n",good_suffix_shift[0]));
445 	j += good_suffix_shift[0];
446       } else {
447 	debug(
448 	      if (good_suffix_shift[i] >
449 		  bad_char_shift[na_index(text[i+j])] - querylen + 1 + i) {
450 		printf("Shift by %d (Gs[%d])\n",
451 		       good_suffix_shift[i],i);
452 	      } else {
453 		printf("Shift by %d (Gs[%d] == Bc[%c] - %d + %d)\n",
454 		       bad_char_shift[na_index(text[i+j])] - querylen + 1 + i,
455 		       i,text[i+j],querylen,i+1);
456 	      }
457 	      );
458 	j += MAX(good_suffix_shift[i],
459 		 bad_char_shift[na_index(text[i+j])] - querylen + 1 + i);
460       }
461     }
462 
463     FREE(text_alt);
464     FREE(text);
465     FREEA(good_suffix_shift);
466   }
467 
468   return hits;
469 }
470 
471 
472 void
BoyerMoore_bad_char_shift(int * bad_char_shift,char * query,int querylen)473 BoyerMoore_bad_char_shift (int *bad_char_shift, char *query, int querylen) {
474 #ifdef DEBUG
475   int i;
476 #endif
477 
478   if (query_okay(query,querylen)) {
479     precompute_bad_char_shift(bad_char_shift,query,querylen);
480   } else {
481     fprintf(stderr,"Query cannot have bad characters\n");
482     abort();
483   }
484 
485   debug(
486 	printf("bad_char_shift:\n");
487 	for (i = 0; i < ASIZE; i++) {
488 	  printf("%d %d\n",i,bad_char_shift[i]);
489 	}
490 	printf("\n");
491 	);
492 
493   return;
494 }
495 
496 
497 int
BoyerMoore_maxprefix(char * query,int querylen,char * text,int textlen,int * bad_char_shift)498 BoyerMoore_maxprefix (char *query, int querylen, char *text, int textlen,
499 		      int *bad_char_shift) {
500   int maxprefix = 0;
501   int i, j;
502 
503   debug(printf("Query: %s\n",query));
504   debug(printf("Text:  %s\n",text));
505 
506   j = -querylen + 1;
507   while (j <= textlen - querylen) {
508 #ifdef PMAP
509     for (i = querylen - 1; i >= 0 && matchtable[query[i]-'A'][text[i+j]-'A'] == true; i--) ;
510 #else
511     for (i = querylen - 1; i+j >= 0 && query[i] == text[i+j]; i--) ;
512 #endif
513     if (i < 0 || i+j < 0) {
514       maxprefix = querylen+j;
515       debug1(printf("Success at %d (length %d)\n",j,querylen+j));
516       debug(printf("Shift by 1\n"));
517       j += 1;
518     } else {
519       debug1(printf("Failure at j=%d, i=%d\n",j,i));
520       debug(
521 	    printf("Shift by %d (Gs[%d] == Bc[%c] - %d + %d)\n",
522 		   bad_char_shift[na_index(text[i+j])],
523 		   i,text[i+j],querylen,i+1);
524 	    );
525       j += bad_char_shift[na_index(text[i+j])];
526     }
527   }
528 
529   return maxprefix;
530 }
531 
532 
533 
534 #if 0
535 static char *
536 string_reverse (char *original, int length) {
537   char *reverse;
538   int i, j;
539 
540   reverse = (char *) CALLOC(length,sizeof(char));
541 
542   for (i = 0, j = length-1; i < length; i++, j--) {
543     reverse[i] = original[j];
544   }
545   return reverse;
546 }
547 #endif
548 
549 
550 #ifdef STANDALONE
551 int
main(int argc,char * argv[])552 main (int argc, char *argv[]) {
553   char *query, *text;
554   char *query_rev, *text_rev;
555   int querylen, textlen;
556   int bad_char_shift[ASIZE];
557 
558 #ifdef PMAP
559   int i, j;
560   for (i = 0; i < 26; i++) {
561     for (j = 0; j < 26; j++) {
562       if (matchtable[i][j] != matchtable[j][i]) {
563 	fprintf(stderr,"Assymetry problem at %d,%d\n",i,j);
564       }
565     }
566   }
567 #endif
568 
569   query = argv[1];
570   text = argv[2];
571   querylen = strlen(query);
572   textlen = strlen(text);
573 
574 #if 0
575   BoyerMoore(query,querylen,text,textlen);
576 #else
577   /* Test of maxprefix */
578   query_rev = string_reverse(query,querylen);
579   text_rev = string_reverse(text,textlen);
580 
581   printf("Query rev: %s\n",query_rev);
582   printf("Text rev: %s\n",text_rev);
583   BoyerMoore_bad_char_shift(bad_char_shift,query_rev,querylen);
584   BoyerMoore_maxprefix(query_rev,querylen,text_rev,textlen,bad_char_shift);
585 #endif
586 
587   return 0;
588 }
589 #endif
590