1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <search/extern.h>
28 #include <compiler.h>
29 #include <os-native.h>
30 #include <sysalloc.h>
31 
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 
38 #include "search-priv.h"
39 
40 
41 /*
42    We don't use longs here because the addition means we need to keep
43    some bits in reserve to calculate carries.
44 */
45 
46 typedef int32_t schunk;
47 typedef uint32_t uchunk;
48 
49 typedef struct CHUNK {
50     int32_t size;
51     uchunk *chunks;
52 } CHUNK;
53 
54 struct MyersUnlimitedSearch {
55     int32_t m;
56     CHUNK *PEq[256];
57     CHUNK *PEq_R[256];
58 };
59 
60 static const char NA2KEY [] = "ACGT";
61 static const char NA4KEY [] = " ACMGRSVTWYHKDBN";
62 
63 static
any_non_4na_chars(const char * pattern)64 int32_t any_non_4na_chars(const char *pattern)
65 {
66     int32_t len = strlen(pattern);
67     int32_t i;
68     char *p;
69 
70     for ( i=0; i<len; i++ )
71     {
72         p = strchr( NA4KEY, pattern[i] );
73         if ( p == NULL )
74             return 1;
75     }
76     return 0;
77 }
78 
79 static
na4key_matches(AgrepFlags mode,char na4,char acgt)80 int32_t na4key_matches(AgrepFlags mode, char na4, char acgt)
81 {
82     char *p;
83     int32_t pos4, pos2;
84     p = strchr( NA4KEY, na4 );
85     if ( p == NULL )
86     {
87         p = strchr( NA4KEY, 'N' );
88     }
89     pos4 = p - NA4KEY;
90     pos2 = strchr( NA2KEY, acgt ) - NA2KEY;
91     if ( pos4 & ( 1 << pos2 ) )
92         return 1;
93     return 0;
94 }
95 
96 
97 
98 
chunksize(int32_t size)99 int32_t chunksize(int32_t size) {
100     int32_t ret;
101     ret = 1 + (size / (8*sizeof(uchunk)));
102     return ret;
103 }
104 
chunk_or_in(CHUNK * chunk,CHUNK * or)105 void chunk_or_in(CHUNK *chunk, CHUNK *or)
106 {
107     int32_t i;
108     int32_t size = chunk->size;
109     for (i=0; i<size; i++)
110         chunk->chunks[i] |= or->chunks[i];
111 }
112 
chunk_xor_in(CHUNK * chunk,CHUNK * xor)113 void chunk_xor_in(CHUNK *chunk, CHUNK *xor)
114 {
115     int32_t i;
116     int32_t size = chunk->size;
117     for (i=0; i<size; i++)
118         chunk->chunks[i] ^= xor->chunks[i];
119 }
120 
121 
chunk_and_in(CHUNK * chunk,CHUNK * and)122 void chunk_and_in(CHUNK *chunk, CHUNK *and)
123 {
124     int32_t i;
125     int32_t size = chunk->size;
126     for (i=0; i<size; i++)
127         chunk->chunks[i] &= and->chunks[i];
128 }
129 
chunk_add_in(CHUNK * chunk,CHUNK * add)130 void chunk_add_in(CHUNK *chunk, CHUNK *add)
131 {
132     int32_t i;
133     int32_t size = chunk->size;
134     uint64_t carry = 0;
135     uint64_t newcarry;
136     for (i=size-1; i>=0; i--) {
137         newcarry =
138             ((uint64_t)chunk->chunks[i] + (uint64_t)add->chunks[i]
139              + carry)
140             >> (8*sizeof(uchunk));
141         chunk->chunks[i] += add->chunks[i] + carry;
142         carry = newcarry;
143     }
144 }
145 
146 
chunk_set(CHUNK * chunk,CHUNK * copy)147 void chunk_set(CHUNK *chunk, CHUNK *copy)
148 {
149     int32_t i;
150     for (i=0; i<chunk->size; i++) {
151         chunk->chunks[i] = copy->chunks[i];
152     }
153 }
154 
155 
chunk_isbit_set(CHUNK * chunk,int32_t bit)156 int32_t chunk_isbit_set(CHUNK *chunk, int32_t bit)
157 {
158     int32_t cn = chunk->size - 1 - (bit / (8*sizeof(uchunk)));
159     int32_t chunkbit = bit % (8*sizeof(uchunk));
160     return chunk->chunks[cn] & (1 << chunkbit);
161 }
162 
163 
chunk_set_bit(CHUNK * chunk,int32_t bit)164 void chunk_set_bit(CHUNK *chunk, int32_t bit)
165 {
166     int32_t cn = chunk->size - 1 - (bit / (8*sizeof(uchunk)));
167     int32_t chunkbit = bit % (8*sizeof(uchunk));
168     chunk->chunks[cn] |= (1 << chunkbit);
169 }
170 
171 
chunk_negate(CHUNK * chunk)172 void chunk_negate(CHUNK *chunk)
173 {
174     int32_t i;
175     for (i=0; i<chunk->size; i++) {
176         chunk->chunks[i] = ~chunk->chunks[i];
177     }
178 }
179 
alloc_chunk_and_zero(CHUNK * chunk,int32_t size)180 void alloc_chunk_and_zero(CHUNK *chunk, int32_t size)
181 {
182     chunk->chunks = malloc(size * sizeof(uchunk));
183     memset(chunk->chunks, 0, size * sizeof(uchunk));
184 }
185 
free_chunk_parts(CHUNK * chunk)186 void free_chunk_parts(CHUNK *chunk)
187 {
188     free(chunk->chunks);
189     chunk->chunks = NULL;
190 }
191 
free_chunk(CHUNK * chunk)192 void free_chunk(CHUNK *chunk)
193 {
194     free(chunk->chunks);
195     free(chunk);
196 }
197 
198 
alloc_chunk(int32_t size)199 CHUNK *alloc_chunk(int32_t size)
200 {
201     CHUNK *ret;
202     ret = malloc(sizeof(CHUNK));
203     ret->size = size;
204     ret->chunks = malloc(size * sizeof(uchunk));
205     memset(ret->chunks, 0, size * sizeof(uchunk));
206     return ret;
207 }
208 
chunk_set_minusone(CHUNK * src)209 void chunk_set_minusone(CHUNK *src)
210 {
211     int32_t i;
212     for (i=0; i<src->size; i++) {
213         src->chunks[i] = (uchunk)-1;
214     }
215 }
216 
chunk_zero(CHUNK * src)217 void chunk_zero(CHUNK *src)
218 {
219     int32_t i;
220     for (i=0; i<src->size; i++) {
221         src->chunks[i] = (uchunk)0;
222     }
223 }
224 
print_chunk(CHUNK * src)225 void print_chunk(CHUNK *src)
226 {
227     uchunk chunk;
228     unsigned char byte;
229     char buf[9];
230     int32_t i, j, k;
231     buf[8] = '\0';
232     for (i=0; i<src->size; i++) {
233         chunk = src->chunks[i];
234         for (j=0; j<sizeof(uchunk); j++) {
235             byte = chunk >> (8*(sizeof(uchunk) - j - 1));
236             for (k=0; k<8; k++) {
237                 buf[k] = "01"[1&(byte>>(7-k))];
238             }
239             printf("%s ", buf);
240         }
241     }
242     printf("\n");
243 }
244 
245 
246 /*
247   This currently does not preserve the sign bit.
248 */
lshift_chunk(CHUNK * dest,CHUNK * src,int32_t n)249 void lshift_chunk(CHUNK *dest, CHUNK *src, int32_t n)
250 {
251     int32_t i, j;
252     uchunk slop;
253     int32_t chunkshift = n / (8*sizeof(uchunk));
254     int32_t rem = n % (8*sizeof(uchunk));
255     /* Assumes they're both the same size. */
256     int32_t size = src->size;
257 
258 
259     /* i is the destination chunk */
260     slop = 0;
261     for (i=size-1; i>=0; i--) {
262         j = i+chunkshift;
263         if (j >= size) {
264             dest->chunks[i] = 0;
265         } else {
266             dest->chunks[i] = slop | src->chunks[j] << rem;
267             slop = src->chunks[j] >> (8*sizeof(uchunk) - rem);
268         }
269     }
270 }
271 
chunk_lshift_one_inplace(CHUNK * dest)272 void chunk_lshift_one_inplace(CHUNK *dest)
273 {
274     int32_t i;
275     uchunk slop, newslop;
276     int32_t size = dest->size;
277 
278     slop = 0;
279     for (i=size-1; i>=0; i--) {
280         newslop = dest->chunks[i] >> (8*sizeof(uchunk) - 1);
281         dest->chunks[i] = slop | dest->chunks[i] << 1;
282         slop = newslop;
283     }
284 }
285 
286 
chunk_lshift_one(CHUNK * dest,CHUNK * src)287 void chunk_lshift_one(CHUNK *dest, CHUNK *src)
288 {
289     int32_t i;
290     uchunk slop;
291     int32_t size = src->size;
292 
293     slop = 0;
294     for (i=size-1; i>=0; i--) {
295         dest->chunks[i] = slop | src->chunks[i] << 1;
296         slop = src->chunks[i] >> (8*sizeof(uchunk) - 1);
297     }
298 }
299 
300 
301 
302 #ifdef UNUSED_MAY_NOT_WORK
303 
304 /*
305  * In this, chunks are big-endian, meaning 0 is the most significant chunk.
306  */
rshift_uchunk(CHUNK * dest,CHUNK * src,int32_t n)307 void rshift_uchunk(CHUNK *dest, CHUNK *src, int32_t n)
308 {
309     int32_t size = src->size;
310     uchunk slop;
311     int32_t chunkshift = n / (8*sizeof(uchunk));
312     int32_t rem = n % (8*sizeof(uchunk));
313     int32_t i, j;
314 
315     /* i is the destination chunk */
316     slop = 0;
317     for (i=0; i<size; i++) {
318         j = i-chunkshift;
319         if (j < 0) {
320             dest->chunks[i] = 0;
321         } else {
322             dest->chunks[i] = slop | src->chunks[j] >> rem;
323             slop = src->chunks[j] << (8*sizeof(uchunk) - rem);
324         }
325     }
326 }
327 
rshift_schunk(CHUNK * dest,CHUNK * src,int32_t n)328 void rshift_schunk(CHUNK *dest, CHUNK *src, int32_t n)
329 {
330     uchunk slop;
331     int32_t chunkshift = n / (8*sizeof(uchunk));
332     int32_t rem = n % (8*sizeof(uchunk));
333     int32_t i, j, size;
334 
335     /* i is the destination chunk */
336     slop = -1;
337     for (i=0; i<size; i++) {
338         j = i-chunkshift;
339         if (j < 0) {
340             dest->chunks[i] = -1;
341         } else {
342             dest->chunks[i] = slop | src->chunks[j] >> rem;
343             slop = src->chunks[j] << (8*sizeof(uchunk) - rem);
344         }
345     }
346 }
347 
348 #endif
349 
350 
MyersUnlimitedFree(MyersUnlimitedSearch * self)351 void MyersUnlimitedFree ( MyersUnlimitedSearch *self )
352 {
353     int32_t j;
354     for (j=0; j<256; j++) {
355         free_chunk(self->PEq[j]);
356         free_chunk(self->PEq_R[j]);
357     }
358     free(self);
359 }
360 
361 
MyersUnlimitedMake(MyersUnlimitedSearch ** self,AgrepFlags mode,const char * pattern)362 rc_t MyersUnlimitedMake ( MyersUnlimitedSearch **self,
363         AgrepFlags mode, const char *pattern )
364 {
365     const unsigned char *upattern = (const unsigned char *)pattern;
366     int32_t rc;
367     int32_t len = strlen(pattern);
368     int32_t plen = len;
369     int32_t i, j;
370     int32_t m;
371     int32_t chunks;
372 
373     if (!(mode & AGREP_ANYTHING_ELSE_IS_N) &&
374         any_non_4na_chars(pattern))
375         /* This should be a return code. */
376         return RC( rcText, rcString, rcSearching, rcParam, rcUnrecognized);
377 
378 
379     *self = malloc(sizeof(MyersUnlimitedSearch));
380     m = (*self)->m = len;
381     chunks = chunksize(m);
382     for (j=0; j<256; j++) {
383         (*self)->PEq[j] = alloc_chunk(chunks);
384         (*self)->PEq_R[j] = alloc_chunk(chunks);
385     }
386 
387     for(j = 0; j < m; j++) {
388         chunk_set_bit((*self)->PEq[upattern[j]], j);
389         if( pattern[j] == 'a' ) {
390             chunk_set_bit((*self)->PEq['t'], j); /* t == a */
391         }
392     }
393 
394     for(j = 0; j < m; j++) {
395         chunk_set_bit((*self)->PEq_R[upattern[m-j-1]], j);
396         if( pattern[m-j-1] == 'a' ) {
397             chunk_set_bit((*self)->PEq_R['t'], j); /* t == a */
398         }
399     }
400 
401     for (i=0; i<4; i++) {
402         unsigned char acgt = NA2KEY[i];
403         for (j=0; j<plen; j++) {
404             if (na4key_matches(mode, pattern[j], acgt)) {
405                 /* bits |= (unsigned long)1<<j; */
406                 chunk_set_bit((*self)->PEq[acgt], j);
407                 if (mode & AGREP_TEXT_EXPANDED_2NA)
408                     chunk_set_bit((*self)->PEq[i], j);
409             }
410         }
411     }
412     for (i=0; i<4; i++) {
413         unsigned char acgt = NA2KEY[i];
414         for (j=0; j<plen; j++) {
415             if (na4key_matches(mode, pattern[plen-j-1], acgt)) {
416                 chunk_set_bit((*self)->PEq_R[acgt], j);
417                 if (mode & AGREP_TEXT_EXPANDED_2NA)
418                     chunk_set_bit((*self)->PEq_R[i], j);
419             }
420         }
421     }
422     return 0;
423 
424     for (j=0; j<256; j++) {
425         free_chunk((*self)->PEq[j]);
426         free_chunk((*self)->PEq_R[j]);
427     }
428     free(*self);
429     *self = NULL;
430     return rc;
431 
432 }
433 
434 
435 /*
436    This finds the first match forward in the string less than or equal
437    the threshold.  If nonzero, this will be a prefix of any exact match.
438 */
MyersUnlimitedFindFirst(MyersUnlimitedSearch * self,int32_t threshold,const char * text,size_t n,AgrepMatch * match)439 uint32_t MyersUnlimitedFindFirst ( MyersUnlimitedSearch *self,
440         int32_t threshold, const char* text, size_t n, AgrepMatch *match )
441 {
442     const unsigned char *utext = (const unsigned char *)text;
443     CHUNK *Pv;
444     CHUNK *Mv;
445     CHUNK *Xv, *Xh, *Ph, *Mh;
446 
447     int32_t m = self->m;
448     int32_t csize = chunksize(m);
449     int32_t Score;
450     int32_t BestScore = m;
451     int32_t from = 0;
452     int32_t to = -1;
453 
454     int32_t j;
455 
456     CHUNK *Eq;
457 
458     Pv = alloc_chunk(csize);
459     Mv = alloc_chunk(csize);
460     Xv = alloc_chunk(csize);
461     Xh = alloc_chunk(csize);
462     Ph = alloc_chunk(csize);
463     Mh = alloc_chunk(csize);
464 
465 
466     Score = m;
467     chunk_set_minusone(Pv);
468     chunk_zero(Mv);
469 
470     for(j = 0; j < n; j++) {
471 #ifdef DEBUG
472         printf("%d j loop\n", j);
473 #endif
474         Eq = self->PEq[utext[j]];
475 #ifdef DEBUG
476         printf("Eq: "); print_chunk(Eq); printf("\n");
477 #endif
478         /* Xv = Eq | Mv; */
479         chunk_set(Xv, Eq);
480         chunk_or_in(Xv, Mv);
481 #ifdef DEBUG
482         printf("Xv: "); print_chunk(Xv); printf("\n");
483 #endif
484         /* Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; */
485         chunk_set(Xh, Eq);
486         chunk_and_in(Xh, Pv);
487         chunk_add_in(Xh, Pv);
488         chunk_xor_in(Xh, Pv);
489         chunk_or_in(Xh, Eq);
490 #ifdef DEBUG
491         printf("Xh: "); print_chunk(Xh); printf("\n");
492 #endif
493         /* Ph = Mv | ~ (Xh | Pv); */
494         chunk_set(Ph, Xh);
495         chunk_or_in(Ph, Pv);
496         chunk_negate(Ph);
497         chunk_or_in(Ph, Mv);
498 #ifdef DEBUG
499         printf("Ph: "); print_chunk(Ph); printf("\n");
500 #endif
501         /* Mh = Pv & Xh; */
502         chunk_set(Mh, Pv);
503         chunk_and_in(Mh, Xh);
504 #ifdef DEBUG
505         printf("Mh: "); print_chunk(Mh); printf("\n");
506 #endif
507         /* Ph & (1 << (m - 1)) */
508         if (chunk_isbit_set(Ph, m-1)) {
509             Score++;
510             /* Mh & (1 << (m - 1)) */
511         } else if (chunk_isbit_set(Mh, m-1)) {
512             Score--;
513         }
514         /* Ph <<= 1; */
515         chunk_lshift_one_inplace(Ph);
516 #ifdef DEBUG
517         printf("Ph: "); print_chunk(Ph); printf("\n");
518 #endif
519         /* Mh <<= 1; */
520         chunk_lshift_one_inplace(Mh);
521 #ifdef DEBUG
522         printf("Mh: "); print_chunk(Mh); printf("\n");
523 #endif
524         /* Pv = Mh | ~(Xv | Ph); */
525         chunk_set(Pv, Xv);
526         chunk_or_in(Pv, Ph);
527         chunk_negate(Pv);
528         chunk_or_in(Pv, Mh);
529 #ifdef DEBUG
530         printf("Pv: "); print_chunk(Pv); printf("\n");
531 #endif
532         /* Mv = Ph & Xv; */
533         chunk_set(Mv, Ph);
534         chunk_and_in(Mv, Xv);
535 #ifdef DEBUG
536         printf("Mv: "); print_chunk(Mv); printf("\n");
537 #endif
538 #ifdef DEBUG
539         printf("%3d. score %d\n", j, Score);
540 #endif
541         if (Score <= threshold) {
542             BestScore = Score;
543             to = j;
544             break;
545         }
546     }
547 
548     /* Continue while score decreases under the threshold */
549     for(j++; j < n; j++) {
550         Eq = self->PEq[utext[j]];
551         /* Xv = Eq | Mv; */
552         chunk_set(Xv, Eq);
553         chunk_or_in(Xv, Mv);
554         /* Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; */
555         chunk_set(Xh, Eq);
556         chunk_and_in(Xh, Pv);
557         chunk_add_in(Xh, Pv);
558         chunk_xor_in(Xh, Pv);
559         chunk_or_in(Xh, Eq);
560         /* Ph = Mv | ~ (Xh | Pv); */
561         chunk_set(Ph, Xh);
562         chunk_or_in(Ph, Pv);
563         chunk_negate(Ph);
564         chunk_or_in(Ph, Mv);
565         /* Mh = Pv & Xh; */
566         chunk_set(Mh, Pv);
567         chunk_and_in(Mh, Xh);
568         /* Ph & (1 << (m - 1)) */
569         if (chunk_isbit_set(Ph, m-1)) {
570             Score++;
571             /* Mh & (1 << (m - 1)) */
572         } else if (chunk_isbit_set(Mh, m-1)) {
573             Score--;
574         }
575         /* Ph <<= 1; */
576         chunk_lshift_one_inplace(Ph);
577         /* Mh <<= 1; */
578         chunk_lshift_one_inplace(Mh);
579         /* Pv = Mh | ~(Xv | Ph); */
580         chunk_set(Pv, Xv);
581         chunk_or_in(Pv, Ph);
582         chunk_negate(Pv);
583         chunk_or_in(Pv, Mh);
584         /* Mv = Ph & Xv; */
585         chunk_set(Mv, Ph);
586         chunk_and_in(Mv, Xv);
587 
588 #ifdef DEBUG
589         printf("%3d. score %d\n", j, Score);
590 #endif
591         if (Score < BestScore) {
592             BestScore = Score;
593             to = j;
594         } else {
595             break;
596         }
597     }
598 
599     /* Re-initialize for next scan! */
600     Score = m;
601     chunk_set_minusone(Pv);
602     chunk_zero(Mv);
603 
604     for(j = to; j >= 0; j--) {
605         Eq = self->PEq_R[utext[j]]; 	/* This line is different. */
606         /* Xv = Eq | Mv; */
607         chunk_set(Xv, Eq);
608         chunk_or_in(Xv, Mv);
609         /* Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; */
610         chunk_set(Xh, Eq);
611         chunk_and_in(Xh, Pv);
612         chunk_add_in(Xh, Pv);
613         chunk_xor_in(Xh, Pv);
614         chunk_or_in(Xh, Eq);
615         /* Ph = Mv | ~ (Xh | Pv); */
616         chunk_set(Ph, Xh);
617         chunk_or_in(Ph, Pv);
618         chunk_negate(Ph);
619         chunk_or_in(Ph, Mv);
620         /* Mh = Pv & Xh; */
621         chunk_set(Mh, Pv);
622         chunk_and_in(Mh, Xh);
623         /* Ph & (1 << (m - 1)) */
624         if (chunk_isbit_set(Ph, m-1)) {
625             Score++;
626             /* Mh & (1 << (m - 1)) */
627         } else if (chunk_isbit_set(Mh, m-1)) {
628             Score--;
629         }
630         /* Ph <<= 1; */
631         chunk_lshift_one_inplace(Ph);
632         /* Mh <<= 1; */
633         chunk_lshift_one_inplace(Mh);
634         /* Pv = Mh | ~(Xv | Ph); */
635         chunk_set(Pv, Xv);
636         chunk_or_in(Pv, Ph);
637         chunk_negate(Pv);
638         chunk_or_in(Pv, Mh);
639         /* Mv = Ph & Xv; */
640         chunk_set(Mv, Ph);
641         chunk_and_in(Mv, Xv);
642 
643 #ifdef DEBUG
644         printf("%3d. score %d\n", j, Score);
645 #endif
646         if(Score <= BestScore) {
647 #ifdef DEBUG
648             printf("rev match at position %d\n", j);
649 #endif
650             from = j;
651             break;
652         }
653     }
654 
655     free_chunk(Pv);
656     free_chunk(Mv);
657     free_chunk(Xv);
658     free_chunk(Xh);
659     free_chunk(Ph);
660     free_chunk(Mh);
661 
662     if (BestScore <= threshold) {
663         match->position = from;
664         match->length = to-from+1;
665         match->score = BestScore;
666         return 1;
667     } else {
668         return 0;
669     }
670 }
671 
672 /*
673    This finds the first match forward in the string less than or equal
674    the threshold.  If nonzero, this will be a prefix of any exact match.
675 */
MyersUnlimitedFindAll(const AgrepCallArgs * args)676 void MyersUnlimitedFindAll ( const AgrepCallArgs *args )
677 {
678     AgrepFlags mode = args->self->mode;
679     MyersUnlimitedSearch *self = args->self->myersunltd;
680     int32_t threshold = args->threshold;
681     const unsigned char *utext = (const unsigned char *)args->buf;
682     int32_t n = args->buflen;
683     AgrepMatchCallback cb = dp_end_callback;
684     const void *cbinfo = args;
685 
686     AgrepMatch match;
687     AgrepContinueFlag cont;
688 
689     CHUNK *Pv;
690     CHUNK *Mv;
691     CHUNK *Xv, *Xh, *Ph, *Mh;
692 
693     int32_t m = self->m;
694     int32_t csize = chunksize(m);
695     int32_t Score;
696 
697     int32_t curscore = 0;
698     int32_t curlast = 0;
699     int32_t continuing = 0;
700 
701     int32_t j;
702 
703     CHUNK *Eq;
704 
705 
706 
707     Pv = alloc_chunk(csize);
708     Mv = alloc_chunk(csize);
709     Xv = alloc_chunk(csize);
710     Xh = alloc_chunk(csize);
711     Ph = alloc_chunk(csize);
712     Mh = alloc_chunk(csize);
713 
714     Score = m;
715     chunk_set_minusone(Pv);
716     chunk_zero(Mv);
717 
718     for(j = 0; j < n; j++) {
719 #ifdef DEBUG
720         printf("%d j loop\n", j);
721 #endif
722         Eq = self->PEq[utext[j]];
723 #ifdef DEBUG
724         printf("Eq: "); print_chunk(Eq); printf("\n");
725 #endif
726         /* Xv = Eq | Mv; */
727         chunk_set(Xv, Eq);
728         chunk_or_in(Xv, Mv);
729 #ifdef DEBUG
730         printf("Xv: "); print_chunk(Xv); printf("\n");
731 #endif
732         /* Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; */
733         chunk_set(Xh, Eq);
734         chunk_and_in(Xh, Pv);
735         chunk_add_in(Xh, Pv);
736         chunk_xor_in(Xh, Pv);
737         chunk_or_in(Xh, Eq);
738 #ifdef DEBUG
739         printf("Xh: "); print_chunk(Xh); printf("\n");
740 #endif
741         /* Ph = Mv | ~ (Xh | Pv); */
742         chunk_set(Ph, Xh);
743         chunk_or_in(Ph, Pv);
744         chunk_negate(Ph);
745         chunk_or_in(Ph, Mv);
746 #ifdef DEBUG
747         printf("Ph: "); print_chunk(Ph); printf("\n");
748 #endif
749         /* Mh = Pv & Xh; */
750         chunk_set(Mh, Pv);
751         chunk_and_in(Mh, Xh);
752 #ifdef DEBUG
753         printf("Mh: "); print_chunk(Mh); printf("\n");
754 #endif
755         /* Ph & (1 << (m - 1)) */
756         if (chunk_isbit_set(Ph, m-1)) {
757             Score++;
758             /* Mh & (1 << (m - 1)) */
759         } else if (chunk_isbit_set(Mh, m-1)) {
760             Score--;
761         }
762         /* Ph <<= 1; */
763         chunk_lshift_one_inplace(Ph);
764 #ifdef DEBUG
765         printf("Ph: "); print_chunk(Ph); printf("\n");
766 #endif
767         /* Mh <<= 1; */
768         chunk_lshift_one_inplace(Mh);
769 #ifdef DEBUG
770         printf("Mh: "); print_chunk(Mh); printf("\n");
771 #endif
772         /* Pv = Mh | ~(Xv | Ph); */
773         chunk_set(Pv, Xv);
774         chunk_or_in(Pv, Ph);
775         chunk_negate(Pv);
776         chunk_or_in(Pv, Mh);
777 #ifdef DEBUG
778         printf("Pv: "); print_chunk(Pv); printf("\n");
779 #endif
780         /* Mv = Ph & Xv; */
781         chunk_set(Mv, Ph);
782         chunk_and_in(Mv, Xv);
783 #ifdef DEBUG
784         printf("Mv: "); print_chunk(Mv); printf("\n");
785 #endif
786 #ifdef DEBUG
787         printf("%3d. score %d\n", j, Score);
788 #endif
789         if (Score <= threshold) {
790             if (continuing) {
791                 if (Score < curscore &&
792                     ((mode & AGREP_EXTEND_BETTER) ||
793                      (mode & AGREP_EXTEND_SAME))) {
794                     curscore = Score;
795                     curlast = j;
796                 } else if (Score == curscore &&
797                            ((mode & AGREP_EXTEND_BETTER) ||
798                             (mode & AGREP_EXTEND_SAME))) {
799                     if (mode & AGREP_EXTEND_SAME) {
800                         curlast = j;
801                     }
802                 } else {
803                     continuing = 0;
804                     match.score = curscore;
805                     match.position = curlast;
806                     match.length = -1;
807                     cont = AGREP_CONTINUE;
808                     (*cb)(cbinfo, &match, &cont);
809                     if (cont != AGREP_CONTINUE)
810                         goto EXIT;
811                 }
812             } else if ((mode & AGREP_EXTEND_SAME) ||
813                        (mode & AGREP_EXTEND_BETTER)) {
814                 curscore = Score;
815                 curlast = j;
816                 continuing = 1;
817             } else {
818                 match.score = Score;
819                 match.position = j;
820                 match.length = -1;
821                 cont = AGREP_CONTINUE;
822                 (*cb)(cbinfo, &match, &cont);
823                 if (cont != AGREP_CONTINUE)
824                     goto EXIT;
825             }
826             /* If we're no longer under the threshold, we might
827                have been moving forward looking for a better match
828             */
829         } else if (continuing) {
830             continuing = 0;
831             match.score = curscore;
832             match.position = curlast;
833             match.length = -1;
834             cont = AGREP_CONTINUE;
835             (*cb)(cbinfo, &match, &cont);
836             if (cont != AGREP_CONTINUE)
837                 goto EXIT;
838         }
839         /* print_col_as_row(nxt, plen); */
840     }
841     if (continuing) {
842         continuing = 0;
843         match.score = curscore;
844         match.position = curlast;
845         match.length = -1;
846         (*cb)(cbinfo, &match, &cont);
847     }
848 
849 EXIT:
850 
851     free_chunk(Pv);
852     free_chunk(Mv);
853     free_chunk(Xv);
854     free_chunk(Xh);
855     free_chunk(Ph);
856     free_chunk(Mh);
857 }
858 
859 
MyersUnlimitedFindBest(MyersUnlimitedSearch * self,const char * text,size_t n,int32_t * pos,int32_t * len)860 LIB_EXPORT int32_t CC MyersUnlimitedFindBest ( MyersUnlimitedSearch *self,
861         const char* text, size_t n, int32_t *pos, int32_t *len )
862 {
863     const unsigned char *utext = (const unsigned char *)text;
864     CHUNK *Pv;
865     CHUNK *Mv;
866     CHUNK *Xv, *Xh, *Ph, *Mh;
867 
868     int32_t m = self->m;
869     int32_t csize = chunksize(m);
870     int32_t Score;
871     int32_t BestScore = m;
872     int32_t from = 0;
873     int32_t to = -1;
874 
875     int32_t j;
876 
877     CHUNK *Eq;
878 
879     Pv = alloc_chunk(csize);
880     Mv = alloc_chunk(csize);
881     Xv = alloc_chunk(csize);
882     Xh = alloc_chunk(csize);
883     Ph = alloc_chunk(csize);
884     Mh = alloc_chunk(csize);
885 
886 
887     Score = m;
888     chunk_set_minusone(Pv);
889     chunk_zero(Mv);
890 
891     for(j = 0; j < n; j++) {
892 #ifdef DEBUG
893         printf("%d j loop\n", j);
894 #endif
895         Eq = self->PEq[utext[j]];
896 #ifdef DEBUG
897         printf("Eq: "); print_chunk(Eq); printf("\n");
898 #endif
899         /* Xv = Eq | Mv; */
900         chunk_set(Xv, Eq);
901         chunk_or_in(Xv, Mv);
902 #ifdef DEBUG
903         printf("Xv: "); print_chunk(Xv); printf("\n");
904 #endif
905         /* Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; */
906         chunk_set(Xh, Eq);
907         chunk_and_in(Xh, Pv);
908         chunk_add_in(Xh, Pv);
909         chunk_xor_in(Xh, Pv);
910         chunk_or_in(Xh, Eq);
911 #ifdef DEBUG
912         printf("Xh: "); print_chunk(Xh); printf("\n");
913 #endif
914         /* Ph = Mv | ~ (Xh | Pv); */
915         chunk_set(Ph, Xh);
916         chunk_or_in(Ph, Pv);
917         chunk_negate(Ph);
918         chunk_or_in(Ph, Mv);
919 #ifdef DEBUG
920         printf("Ph: "); print_chunk(Ph); printf("\n");
921 #endif
922         /* Mh = Pv & Xh; */
923         chunk_set(Mh, Pv);
924         chunk_and_in(Mh, Xh);
925 #ifdef DEBUG
926         printf("Mh: "); print_chunk(Mh); printf("\n");
927 #endif
928         /* Ph & (1 << (m - 1)) */
929         if (chunk_isbit_set(Ph, m-1)) {
930             Score++;
931             /* Mh & (1 << (m - 1)) */
932         } else if (chunk_isbit_set(Mh, m-1)) {
933             Score--;
934         }
935         /* Ph <<= 1; */
936         chunk_lshift_one_inplace(Ph);
937 #ifdef DEBUG
938         printf("Ph: "); print_chunk(Ph); printf("\n");
939 #endif
940         /* Mh <<= 1; */
941         chunk_lshift_one_inplace(Mh);
942 #ifdef DEBUG
943         printf("Mh: "); print_chunk(Mh); printf("\n");
944 #endif
945         /* Pv = Mh | ~(Xv | Ph); */
946         chunk_set(Pv, Xv);
947         chunk_or_in(Pv, Ph);
948         chunk_negate(Pv);
949         chunk_or_in(Pv, Mh);
950 #ifdef DEBUG
951         printf("Pv: "); print_chunk(Pv); printf("\n");
952 #endif
953         /* Mv = Ph & Xv; */
954         chunk_set(Mv, Ph);
955         chunk_and_in(Mv, Xv);
956 #ifdef DEBUG
957         printf("Mv: "); print_chunk(Mv); printf("\n");
958 #endif
959 #ifdef DEBUG
960         printf("%3d. score %d\n", j, Score);
961 #endif
962         if (Score < BestScore) {
963             BestScore = Score;
964             to = j;
965         }
966     }
967 
968     /* Re-initialize for next scan! */
969     Score = m;
970     chunk_set_minusone(Pv);
971     chunk_zero(Mv);
972 
973     for(j = to; j >= 0; j--) {
974         Eq = self->PEq_R[utext[j]]; 	/* This line is different. */
975         /* Xv = Eq | Mv; */
976         chunk_set(Xv, Eq);
977         chunk_or_in(Xv, Mv);
978         /* Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; */
979         chunk_set(Xh, Eq);
980         chunk_and_in(Xh, Pv);
981         chunk_add_in(Xh, Pv);
982         chunk_xor_in(Xh, Pv);
983         chunk_or_in(Xh, Eq);
984         /* Ph = Mv | ~ (Xh | Pv); */
985         chunk_set(Ph, Xh);
986         chunk_or_in(Ph, Pv);
987         chunk_negate(Ph);
988         chunk_or_in(Ph, Mv);
989         /* Mh = Pv & Xh; */
990         chunk_set(Mh, Pv);
991         chunk_and_in(Mh, Xh);
992         /* Ph & (1 << (m - 1)) */
993         if (chunk_isbit_set(Ph, m-1)) {
994             Score++;
995             /* Mh & (1 << (m - 1)) */
996         } else if (chunk_isbit_set(Mh, m-1)) {
997             Score--;
998         }
999         /* Ph <<= 1; */
1000         chunk_lshift_one_inplace(Ph);
1001         /* Mh <<= 1; */
1002         chunk_lshift_one_inplace(Mh);
1003         /* Pv = Mh | ~(Xv | Ph); */
1004         chunk_set(Pv, Xv);
1005         chunk_or_in(Pv, Ph);
1006         chunk_negate(Pv);
1007         chunk_or_in(Pv, Mh);
1008         /* Mv = Ph & Xv; */
1009         chunk_set(Mv, Ph);
1010         chunk_and_in(Mv, Xv);
1011 
1012 #ifdef DEBUG
1013         printf("%3d. score %d\n", j, Score);
1014 #endif
1015         if(Score <= BestScore) {
1016 #ifdef DEBUG
1017             printf("rev match at position %d\n", j);
1018 #endif
1019             from = j;
1020             break;
1021         }
1022     }
1023 
1024     free_chunk(Pv);
1025     free_chunk(Mv);
1026     free_chunk(Xv);
1027     free_chunk(Xh);
1028     free_chunk(Ph);
1029     free_chunk(Mh);
1030 
1031     *pos = from;
1032     *len = to-from+1;
1033 
1034     return BestScore;
1035 
1036     /* printf("Found [%d,%d]\n", from, to);
1037        printf("Found '%.*s'\n", to - from + 1, &text[from]); */
1038 }
1039 
1040