1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "simlib.h"
7 #define maxi(x, y) (((x) > (y)) ? x: y)
8
9 extern int have_stats;
10 extern double s_to_E4(int, int, int, int);
11
12 /* extern char name0[], name1[]; */
13 /* extern int match, mismh; */
14 extern char *sq, sqnam[], *seqc0, *seqc1;
15 extern char ttitle[], ltitle[];
16 extern int min0,min1,max0,max1;
17 extern int smin0, smin1;
18 extern int markx;
19 int gscore;
20 #ifdef TPLOT
21 int pmirror=0;
22 #endif
23
24 extern void opnline(long n0, long n1, int score, double e_val, double percent, int nc);
25
26 #define min(x,y) ((x)<=(y) ? (x) : (y))
27
28 extern FILE *outfd;
29
30
31 typedef struct ONE { int COL ; struct ONE *NEXT ;} pair, *pairptr;
32 static pairptr *row, z; /* for saving used aligned pairs */
33
34 #define PAIRNULL (pairptr)NULL
35
36 static int tt;
37
38 typedef struct NODE
39 { int SCORE;
40 int STARI;
41 int STARJ;
42 int ENDI;
43 int ENDJ;
44 int TOP;
45 int BOT;
46 int LEFT;
47 int RIGHT;
48 struct NODE *next;
49 } vertex, *vertexptr;
50
51 vertexptr LIST; /* an array for saving best scores */
52 vertexptr /*@null@*/ low = 0; /* lowest score node in LIST */
53 vertexptr /*@null@*/ most = 0; /* latestly accessed node in LIST */
54 static int numnode; /* the number of nodes in LIST */
55
56 /*@only@*/ static int *CC, *DD; /* saving matrix scores */
57 static int *RR, *SS, *EE, *FF; /* saving start-points */
58 static int *HH, *WW; /* saving matrix scores */
59 static int *II, *JJ, *XX, *YY; /* saving start-points */
60 static int m1, mm, n1, nn; /* boundaries of recomputed area */
61 static int rl, cl; /* left and top boundaries */
62 static int min; /* minimum score in LIST */
63 static bool flag; /* indicate if recomputation necessary*/
64 static int q, r; /* gap penalties */
65 static int qr; /* qr = q + r */
66
67 static bool tt;
68
69 void SIM(uchar *A,uchar *B,int M,int N,int K,int V[][32], int Q,
70 int R, int nseq, int max_count, int z_size);
71
72 static void big_pass(uchar *A,uchar *B,int M,int N,int mini_score,
73 int ss[][32], int Q, int R, int nseq);
74 static void locate(uchar *A,uchar *B,int ss[][32], int Q, int R, int nseq);
75 static void small_pass(uchar *A,uchar *B,int count,int ss[][32], int Q, int R, int nseq);
76 static void addnode(int c, int ci, int cj, int i, int j);
77 static bool no_cross();
78 static int diff(uchar *A,uchar *B,int M,int N,int tb,int te, int ss[][32], int q, int r);
79 static vertexptr findmax();
80
81 /* DIAG() assigns value to x if (ii,jj) is never used before */
82 #define DIAG(ii, jj, x, value) \
83 { for ( z = row[(ii)]; z != 0 && z->COL != (jj); z = z->NEXT ) ; \
84 if ( !z ) x = ( value ); \
85 }
86
87 /* replace (ss1, xx1, yy1) by (ss2, xx2, yy2) if the latter is large */
88
89 #define ORDER(ss1, xx1, yy1, ss2, xx2, yy2) \
90 { if ( ss1 < ss2 ) \
91 { ss1 = ss2; xx1 = xx2; yy1 = yy2; } \
92 else \
93 if ( ss1 == ss2 ) \
94 { if ( xx1 < xx2 ) \
95 { xx1 = xx2; yy1 = yy2; } \
96 else \
97 if ( xx1 == xx2 && yy1 < yy2 ) \
98 yy1 = yy2; \
99 } \
100 }
101
102 #define ORDER1(ss1, xx1, yy1, ss2, xx2, yy2) \
103 { if (ss1 <= ss2) { \
104 if (ss1 == ss2) { \
105 if (xx1 < xx2) { \
106 xx1 = xx2; yy1 = yy2; \
107 } else { \
108 if (xx1 == xx2 && yy1 < yy2) \
109 yy1 = yy2; \
110 } \
111 } else { \
112 ss1 = ss2; xx1 = xx2; yy1 = yy2; \
113 } \
114 } \
115 }
116
117 #define ORDER2(ss1, xx1, ss2, xx2) \
118 { \
119 if (ss1 <= ss2) { \
120 if (ss1 == ss2) { \
121 if (xx1 < xx2) xx1 = xx2; \
122 } else { \
123 ss1 = ss2; xx1 = xx2; \
124 } \
125 } \
126 }
127
128 /* The following definitions are for function diff() */
129
130 #define gap(k) ((k) <= 0 ? 0 : q+r*(k)) /* k-symbol indel score */
131
132 static int *sapp; /* Current script append ptr */
133 static int last; /* Last script op appended */
134
135 static int I, J; /* current positions of A ,B */
136 static int no_mat; /* number of matches */
137 static int no_mis; /* number of mismatches */
138 static int al_len; /* length of alignment */
139 /* Append "Delete k" op */
140 static int NL;
141 #define DEL(k) \
142 { I += k; \
143 al_len += k; \
144 if (last < 0) \
145 last = sapp[-1] -= (k); \
146 else \
147 last = *sapp++ = -(k); \
148 }
149 /* Append "Insert k" op */
150 #define INS(k) \
151 { J += k; \
152 al_len += k; \
153 if (last < 0) \
154 { sapp[-1] = (k); *sapp++ = last; } \
155 else \
156 last = *sapp++ = (k); \
157 }
158
159 /* Append "Replace" op */
160 #define REP \
161 { last = *sapp++ = 0; \
162 al_len += 1; \
163 }
164
165 typedef struct spa *space_ptr;
166
167 typedef struct spa {
168 int CC, RR, EE, DD, SS, FF;
169 } space;
170
171 space_ptr CCC;
172
173 /* SIM(A,B,M,N,K,Q,R) reports best non-intersecting alignments with
174 score >= K of the segments of A and B in order of similarity
175 scores, where ss[a][b] is the score of aligning a and b, and
176 -(Q+R*i) is the score of an i-symbol indel. */
177
SIM(uchar * A,uchar * B,int M,int N,int K,int V[][32],int Q,int R,int nseq,int max_count,int z_size)178 void SIM(uchar *A,uchar *B,int M,int N,int K,int V[][32], int Q,int R,
179 int nseq,int max_count, int z_size)
180 { int endi, endj, stari, starj; /* endpoint and startpoint */
181 int score; /* the max score in LIST */
182 register int i; /* row and column indices */
183 bool first_pass;
184 int *S; /* saving operations for diff */
185 int (*ss)[32];
186 int count; /* maximum size of list */
187 int nc, nd, ns, nident; /* for display */
188 long a_min0, a_max0, a_min1, a_max1; /* alignment coordinates */
189 int tmp; /* for switching min0,min1 */
190 vertexptr cur; /* temporary pointer */
191 double percent;
192 double e_val;
193
194 /* allocate space for all vectors */
195 { size_t i, j;
196 j = (N + 1) * sizeof(int); NL = N+1;
197 CCC = ( space_ptr ) ckalloc((N+1)*sizeof(space));
198 CC = ( int * ) ckalloc(j);
199 DD = ( int * ) ckalloc(j);
200 RR = ( int * ) ckalloc(j);
201 SS = ( int * ) ckalloc(j);
202 EE = ( int * ) ckalloc(j);
203 FF = ( int * ) ckalloc(j);
204 i = (M + 1) * sizeof(int);
205 HH = ( int * ) ckalloc(i);
206 WW = ( int * ) ckalloc(i);
207 II = ( int * ) ckalloc(i);
208 JJ = ( int * ) ckalloc(i);
209 XX = ( int * ) ckalloc(i);
210 YY = ( int * ) ckalloc(i);
211 S = ( int * ) ckalloc(i + j);
212 row = ( pairptr * ) ckalloc( (M + 1) * sizeof(pairptr));
213 }
214
215 /* set up list for each row */
216 for ( i = 1; i <= M; i++ )
217 if ( nseq == 2 )
218 row[i] = 0;
219 else
220 { row[i] = z = ( pairptr ) ckalloc(sizeof(pair));
221 z->COL = i;
222 z->NEXT = 0;
223 }
224
225 ss = V;
226 q = Q; r = R; qr = q + r;
227
228 LIST = NULL;
229
230 numnode = 0;
231 big_pass(A,B,M,N,K,V,Q,R,nseq);
232 first_pass= 1;
233
234 /* Report the K best alignments one by one. After each alignment is
235 output, recompute part of the matrix. First determine the size
236 of the area to be recomputed, then do the recomputation */
237
238 count = 0;
239 while (count < max_count) {
240 if ( numnode == 0 ) break;
241 #ifdef __MWERKS__
242 ChkEvent();
243 #endif
244 cur = findmax(); /* Return a pointer to a node with max score*/
245 score = cur->SCORE;
246 stari = ++cur->STARI;
247 starj = ++cur->STARJ;
248 endi = cur->ENDI;
249 endj = cur->ENDJ;
250 m1 = cur->TOP;
251 mm = cur->BOT;
252 n1 = cur->LEFT;
253 nn = cur->RIGHT;
254 rl = endi - stari + 1;
255 cl = endj - starj + 1;
256 I = stari - 1;
257 J = starj - 1;
258 sapp = S;
259 last = 0;
260 al_len = 0;
261 no_mat = 0;
262 no_mis = 0;
263 (void) diff(&A[stari]-1, &B[starj]-1,rl,cl,q,q,ss,q,r);
264 #ifdef __MWERKS__
265 ChkEvent();
266 #endif
267 /* Output the best alignment */
268 min0 = stari;
269 min1 = starj;
270 max0 = stari+rl-1;
271 max1 = starj+cl-1;
272 ns=calcons(A+1,M,B+1,N,S,&nc,&nident);
273 percent = (double)nident*100.0/(double)nc;
274 cal_coord(M,N,&a_min0,&a_max0,&a_min1,&a_max1);
275 #ifdef __MWERKS__
276 ChkEvent();
277 #endif
278
279 if (have_stats) e_val = s_to_E4(score,M,N,z_size);
280 #ifndef TPLOT
281 if (markx < 10) {
282 if (have_stats)
283 printf("\n %5.1f%% identity in %d %s overlap (%d-%d:%d-%d); score: %4d E(%d): %6.2g\n",
284 percent,nc,sqnam,a_min0,a_max0,a_min1,a_max1,score,z_size,e_val);
285 else
286 printf("\n %5.1f%% identity in %d %s overlap (%d-%d:%d-%d); score: %4d\n",
287 percent,nc,sqnam,a_min0,a_max0,a_min1,a_max1,score);
288 }
289 else if (markx==10) {
290 printf(">>#%d\n",K-count);
291 printf("; sw_score: %d\n",score);
292 printf("; sw_ident: %5.3f\n",percent/100.0);
293 printf("; sw_overlap: %d\n",nc);
294 if (have_stats) printf("; sw_expect: %6.2g\n",e_val);
295 }
296 #endif
297 gscore=score;
298 opnline((long)smin0,(long)smin1,score,e_val,percent,nc);
299 if (markx == 5 || markx == 6) {
300 disgraph(M,N,percent,score,min0,min1,max0,max1);
301 printf("\n");
302 }
303 discons(seqc0,seqc1,ns);
304 clsline((long)smin0,(long)smin1,score);
305 #ifdef TPLOT
306 if (nseq==1) {
307 pmirror = 1;
308 tmp = smin0;
309 smin0 = smin1;
310 smin1 = tmp;
311 opnline((long)smin0,(long)smin1,score,e_val,percent,nc);
312 discons(seqc1,seqc0,ns);
313 clsline((long)smin0,(long)smin1,score);
314 pmirror = 0;
315 }
316 #endif
317
318 #ifndef TPLOT
319 if (markx < 10) printf("\n----------\n");
320 #endif
321 fflush(stdout);
322 /*
323 print_align(score, stari, endi, starj, endj, S);
324 (void)fflush(stdout);
325 */
326 free(cur);
327
328 flag = 0;
329 /* if ((double) (mm-maxi(0,stari-score/R))/ (double)mm * (double)
330 (nn-maxi(0, starj-score/R))/(double) nn > 0.6) {
331 */
332 if (first_pass && maxi(rl, cl) > maxi(M,N)/4) {
333 /*printf("no locate\n");*/
334 flag = 1; n1 = m1 = 0;
335 } else locate(A,B,ss,Q,R,nseq);
336 if ( flag ) {
337 /*printf("small pass\n");*/
338 small_pass(A,B,0,ss,Q,R,nseq);
339 }
340 first_pass= 0;
341 count++;
342 }
343 }
344
345 /* A big pass to compute classes scoring over K */
346
big_pass(uchar * A,uchar * B,int M,int N,int mini_score,int ss[][32],int Q,int R,int nseq)347 static void big_pass(uchar *A,uchar *B,int M,int N,int mini_score,
348 int ss[][32], int Q, int R, int nseq)
349 { register int i, j; /* row and column indices */
350 register int c; /* best score at current point */
351 register int f; /* best score ending with insertion */
352 register int d; /* best score ending with deletion */
353 register int p; /* best score at (i-1, j-1) */
354 register int ci, cj; /* end-point associated with c */
355 register int fi, fj; /* end-point associated with f */
356 register int pi, pj; /* end-point associated with p */
357 register space_ptr sp;
358 int q, r, qr;
359 int *va; /* pointer to v(A[i], B[j]) */
360
361 q = Q; r=R; qr = q+r;
362
363 /* Compute the matrix and save the best scores in LIST
364 CC : the scores of the current row
365 RR and EE : the starting point that leads to score CC
366 DD : the scores of the current row, ending with deletion
367 SS and FF : the starting point that leads to score DD */
368 /* Initialize the 0 th row */
369
370 min = mini_score;
371 for ( sp = &CCC[1], j = 1; sp <= &CCC[N] ; sp++, j++ ) {
372 sp->CC = 0;
373 sp->RR = 0;
374 sp->EE = j;
375 sp->DD = - (qr);
376 sp->SS = 1;
377 sp->FF = j;
378 }
379 for ( i = 1; i <= M; i++) {
380 c = 0; /* Initialize column 0 */
381 f = - (qr);
382 va = ss[A[i]];
383 ci = fi = i;
384 if ( nseq == 2 )
385 { p = 0;
386 pi = (i - 1);
387 cj = fj = pj = 0;
388 }
389 else
390 { p = CCC[i].CC;
391 pi = CCC[i].RR;
392 pj = CCC[i].EE;
393 cj = fj = i;
394 }
395 j = (nseq == 2 ? 1: i+1);
396 for ( sp = &CCC[j]; sp <= &CCC[N]; j++) {
397
398 d = sp->DD;
399 c = -1;
400 DIAG(i, j, c, p+va[B[j]]) /* diagonal */
401 if (c < 0) {
402 p = sp->CC; pi = sp->RR; pj = sp->EE;
403 if (f >= 0) {
404 c = f; ci = fi; cj = fj;
405 ORDER1(c, ci, cj, d, sp->SS, sp->FF)
406 sp->CC = c; sp->RR = ci; sp->EE = cj;
407 sp++->DD -= r; f-=r;
408 } else if (d >= 0) {
409 sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;
410 sp++->DD -= r;
411 } else {
412 sp->CC = 0; sp->RR=i; sp++->EE = j;
413 }
414 } else {
415 ci = pi; cj = pj;
416 ORDER1(c, ci, cj, f, fi, fj)
417 ORDER1(c, ci, cj, d, sp->SS, sp->FF)
418 p = sp->CC;
419 sp->CC = c;
420 pi = sp->RR;
421 sp->RR = ci;
422 pj = sp->EE;
423 sp->EE = cj;
424 f-=r;
425 if (c >= qr) {
426 if ( c > min) /* add the score into list */
427 addnode(c, ci, cj, i, j);
428 d -= r; c-=qr;
429 ORDER1(f, fi, fj, c, ci, cj)
430 ORDER1(d, sp->SS, sp->FF, c, ci, cj)
431 sp++->DD = d;
432 } else {
433 sp++->DD -= r;
434 }
435 }
436 }
437 }
438 }
439
440 /* Determine the left and top boundaries of the recomputed area */
441
locate(uchar * A,uchar * B,int ss[][32],int Q,int R,int nseq)442 static void locate(uchar *A,uchar *B,int ss[][32], int Q, int R, int nseq)
443 { register int i, j; /* row and column indices */
444 register int c; /* best score at current point */
445 register int f; /* best score ending with insertion */
446 register int d; /* best score ending with deletion */
447 register int p; /* best score at (i-1, j-1) */
448 register int ci, cj; /* end-point associated with c */
449 register int di, dj;
450 register int fi, fj; /* end-point associated with f */
451 register int pi, pj; /* end-point associated with p */
452 register space_ptr sp;
453 bool cflag, rflag; /* for recomputation */
454 int *va; /* pointer to v(A[i], B[j]) */
455 int limit; /* the bound on j */
456 int q, r, qr;
457
458 q = Q; r = R; qr = q + r;
459
460 /* Reverse pass
461 rows
462 CC : the scores on the current row
463 RR and EE : the endpoints that lead to CC
464 DD : the deletion scores
465 SS and FF : the endpoints that lead to DD
466
467 columns
468 HH : the scores on the current columns
469 II and JJ : the endpoints that lead to HH
470 WW : the deletion scores
471 XX and YY : the endpoints that lead to WW
472 */
473
474 for ( j = nn; j >= n1 ; j-- ) {
475 CCC[j].CC = 0;
476 CCC[j].EE = j;
477 CCC[j].DD = - (q);
478 CCC[j].FF = j;
479 if ( nseq == 2 || j > mm )
480 CCC[j].RR = CCC[j].SS = mm + 1;
481 else
482 CCC[j].RR = CCC[j].SS = j;
483 }
484
485 for ( i = mm; i >= m1; i-- ) {
486 c = p = 0;
487 f = - (q);
488 ci = fi = i;
489 pi = i + 1;
490 cj = fj = pj = nn + 1;
491 va = ss[A[i]];
492 if ( nseq == 2 || n1 > i )
493 limit = n1;
494 else
495 limit = i + 1;
496 for ( j = nn, sp = &CCC[j]; j >= limit ; j-- )
497 { f = f - r;
498 c = c - qr;
499 ORDER(f, fi, fj, c, ci, cj)
500 c = sp->CC - qr;
501 d = sp->DD - r;
502 ORDER(d, sp->SS, sp->FF, c, sp->RR, sp->EE)
503 c = 0;
504 DIAG(i, j, c, p+va[B[j]]) /* diagonal */
505 if ( c <= 0 )
506 { c = 0; ci = i; cj = j; }
507 else
508 { ci = pi; cj = pj; }
509 ORDER1(c, ci, cj, d, sp->SS, sp->FF)
510 ORDER1(c, ci, cj, f, fi, fj)
511 p = sp->CC;
512 sp->CC = c;
513 pi = sp->RR;
514 pj = sp->EE;
515 sp->RR = ci;
516 sp->EE = cj;
517 sp--->DD = d;
518 if ( c > min )
519 flag = 1;
520 }
521 if ( nseq == 2 || i < n1 )
522 { HH[i] = CCC[n1].CC;
523 II[i] = CCC[n1].RR;
524 JJ[i] = CCC[n1].EE;
525 WW[i] = f;
526 XX[i] = fi;
527 YY[i] = fj;
528 }
529 }
530
531 for ( rl = m1, cl = n1; ; ) {
532 for ( rflag = cflag = 1; ( rflag && m1 > 1 ) || ( cflag && n1 > 1 ) ; ) {
533 if ( rflag && m1 > 1 ) { /* Compute one row */
534 rflag = 0;
535 m1--;
536 c = p = 0;
537 f = - (q);
538 ci = fi = m1;
539 pi = m1 + 1;
540 cj = fj = pj = nn + 1;
541 va = ss[A[m1]];
542 for ( j = nn, sp = &CCC[j]; j >= n1 ; j-- )
543 { f = f - r;
544 c = c - qr;
545 ORDER(f, fi, fj, c, ci, cj)
546 c = sp->CC - qr;
547 ci = sp->RR;
548 cj = sp->EE;
549 d = sp->DD - r;
550 di = sp->SS;
551 dj = sp->FF;
552 ORDER(d, di, dj, c, ci, cj)
553 c = 0;
554 DIAG(m1, j, c, p+va[B[j]]) /* diagonal */
555 if ( c <= 0 )
556 { c = 0; ci = m1; cj = j; }
557 else
558 { ci = pi; cj = pj; }
559 ORDER1(c, ci, cj, d, di, dj)
560 ORDER1(c, ci, cj, f, fi, fj)
561 sp->SS = di;
562 sp->FF = dj;
563 p = sp->CC;
564 sp->CC = c;
565 pi = sp->RR;
566 pj = sp->EE;
567 sp->RR = ci;
568 sp->EE = cj;
569 sp--->DD = d;
570 if ( c > min )
571 flag = 1;
572 if ( ! rflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) || (fi > rl && fj > cl) ) )
573 rflag = 1;
574 }
575 HH[m1] = CCC[n1].CC;
576 II[m1] = CCC[n1].RR;
577 JJ[m1] = CCC[n1].EE;
578 WW[m1] = f;
579 XX[m1] = fi;
580 YY[m1] = fj;
581 if ( ! cflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) || (fi > rl && fj > cl ) ))
582 cflag = 1;
583 }
584
585 if ( nseq == 1 && n1 == (m1 + 1) && ! rflag )
586 cflag = 0;
587 if ( cflag && n1 > 1 ) /* Compute one column */
588 { cflag = 0;
589 n1--;
590 c = 0;
591 f = - (q);
592 cj = fj = n1;
593 va = ss[B[n1]];
594 if ( nseq == 2 || mm < n1 )
595 { p = 0;
596 ci = fi = pi = mm + 1;
597 pj = n1 + 1;
598 limit = mm;
599 }
600 else
601 { p = HH[n1];
602 pi = II[n1];
603 pj = JJ[n1];
604 ci = fi = n1;
605 limit = n1 - 1;
606 }
607 for ( i = limit; i >= m1 ; i-- )
608 { f = f - r;
609 c = c - qr;
610 ORDER(f, fi, fj, c, ci, cj)
611 c = HH[i] - qr;
612 ci = II[i];
613 cj = JJ[i];
614 d = WW[i] - r;
615 di = XX[i];
616 dj = YY[i];
617 ORDER(d, di, dj, c, ci, cj)
618 c = 0;
619 DIAG(i, n1, c, p+va[A[i]])
620 if ( c <= 0 )
621 { c = 0; ci = i; cj = n1; }
622 else
623 { ci = pi; cj = pj; }
624 ORDER1(c, ci, cj, d, di, dj)
625 ORDER1(c, ci, cj, f, fi, fj)
626 p = HH[i];
627 HH[i] = c;
628 pi = II[i];
629 pj = JJ[i];
630 II[i] = ci;
631 JJ[i] = cj;
632 WW[i] = d;
633 XX[i] = di;
634 YY[i] = dj;
635 if ( c > min )
636 flag = 1;
637 if ( ! cflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl)
638 || (fi > rl && fj > cl) ) )
639 cflag = 1;
640 }
641 CCC[n1].CC = HH[m1];
642 CCC[n1].RR = II[m1];
643 CCC[n1].EE = JJ[m1];
644 CCC[n1].DD = f;
645 CCC[n1].SS = fi;
646 CCC[n1].FF = fj;
647 if ( ! rflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl)
648 || (fi > rl && fj > cl )) )
649 rflag = 1;
650 }
651 }
652 if (( m1 == 1 && n1 == 1) || no_cross() )
653 break;
654 }
655 m1--;
656 n1--;
657 }
658
659 /* recompute the area on forward pass */
small_pass(uchar * A,uchar * B,int count,int ss[][32],int Q,int R,int nseq)660 static void small_pass(uchar *A,uchar *B,int count,int ss[][32], int Q, int R, int nseq)
661 { register int i, j; /* row and column indices */
662 register int c; /* best score at current point */
663 register int f; /* best score ending with insertion */
664 register int d; /* best score ending with deletion */
665 register int p; /* best score at (i-1, j-1) */
666 register int ci, cj; /* end-point associated with c */
667 register int fi, fj; /* end-point associated with f */
668 register int pi, pj; /* end-point associated with p */
669 register space_ptr sp;
670 int *va; /* pointer to v(A[i], B[j]) */
671
672 int limit; /* lower bound on j */
673
674 q = Q; r = R; qr = q + r;
675
676 for ( sp = &CCC[n1 + 1], j = n1+1; sp <= &CCC[nn] ; sp++, j++ )
677 { sp->CC = 0;
678 sp->RR = m1;
679 sp->EE = j;
680 sp->DD = - (qr);
681 sp->SS = m1+1;
682 sp->FF = j;
683 }
684
685 for ( i = m1 + 1; i <= mm; i++)
686 { c = 0; /* Initialize column 0 */
687 f = - (qr);
688 ci = fi = i;
689 va = ss[A[i]];
690 if ( nseq == 2 || i <= n1 )
691 { p = 0;
692 pi = i - 1;
693 cj = fj = pj = n1;
694 limit = n1 + 1;
695 }
696 else
697 { p = CCC[i].CC;
698 pi = CCC[i].RR;
699 pj = CCC[i].EE;
700 cj = fj = i;
701 limit = i + 1;
702 }
703 for ( j = limit, sp = &CCC[j] ; j <= nn ; j++ )
704 {
705 d = sp->DD;
706 c = -1;
707 DIAG(i, j, c, p+va[B[j]]) /* diagonal */
708 if (c < 0) {
709 p = sp->CC; pi = sp->RR; pj = sp->EE;
710 if (f >= 0) {
711 c = f; ci = fi; cj = fj;
712 ORDER1(c, ci, cj, d, sp->SS, sp->FF)
713 sp->CC = c; sp->RR = ci; sp->EE = cj;
714 sp++->DD -= r; f-=r;
715 } else if (d >= 0) {
716 sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;
717 sp++->DD -= r;
718 } else {
719 sp->CC = 0; sp->RR=i; sp++->EE = j;
720 }
721 } else {
722 ci = pi; cj = pj;
723 ORDER1(c, ci, cj, f, fi, fj)
724 ORDER1(c, ci, cj, d, sp->SS, sp->FF)
725 p = sp->CC;
726 sp->CC = c;
727 pi = sp->RR;
728 sp->RR = ci;
729 pj = sp->EE;
730 sp->EE = cj;
731 f-=r;
732 if (c >= qr) {
733 if ( c > min ) /* add the score into list */
734 addnode(c, ci, cj, i, j);
735 d -= r; c-=qr;
736 ORDER1(f, fi, fj, c, ci, cj)
737 ORDER1(d, sp->SS, sp->FF, c, ci, cj)
738 sp++->DD = d;
739 } else {
740 sp++->DD -= r;
741 }
742 }
743 }
744 }
745 }
746
747 /* Add a new node into list. */
748
addnode(int c,int ci,int cj,int i,int j)749 static void addnode(int c, int ci, int cj, int i, int j)
750 { bool found; /* 1 if the node is in LIST */
751
752 found = 0;
753 if ( most != 0 && most->STARI == ci && most->STARJ == cj)
754 found = 1;
755 else
756 for ( most = LIST; most; most = most->next )
757 {
758 if ( most->STARI == ci && most->STARJ == cj)
759 { found = 1;
760 break;
761 }
762 }
763 if ( found )
764 { if ( most->SCORE < c )
765 { most->SCORE = c;
766 most->ENDI = i;
767 most->ENDJ = j;
768 }
769 if ( most->TOP > i ) most->TOP = i;
770 if ( most->BOT < i ) most->BOT = i;
771 if ( most->LEFT > j ) most->LEFT = j;
772 if ( most->RIGHT < j ) most->RIGHT = j;
773 }
774 else
775 {
776 numnode++;
777 most = (vertexptr) ckalloc(sizeof(vertex));
778 most->SCORE = c;
779 most->STARI = ci;
780 most->STARJ = cj;
781 most->ENDI = i;
782 most->ENDJ = j;
783 most->TOP = most->BOT = i;
784 most->LEFT = most->RIGHT = j;
785 most->next = LIST;
786 LIST = most;
787 }
788 /*
789 if ( numnode == K )
790 { if ( low == most || ! low )
791 { for ( low = LIST[0], d = 1; d < numnode ; d++ )
792 if ( LIST[d]->SCORE < low->SCORE )
793 low = LIST[d];
794 }
795 return ( low->SCORE ) ;
796 }
797 else
798 return cost;
799 */
800 }
801
802 /* Find and remove the largest score in list */
803
findmax()804 static vertexptr findmax()
805 { vertexptr ap, cur;
806 register int i;
807
808 for ( i = LIST->SCORE, cur = NULL, ap = LIST; ap->next; ap = ap->next)
809 if ( ap->next->SCORE > i ) {
810 cur = ap; i = ap->next->SCORE;
811 }
812 if (cur) {ap = cur->next; cur->next = ap->next; }
813 else { ap = LIST; LIST = LIST->next;}
814 numnode--;
815 most = LIST;
816 if ( low == ap ) low = LIST;
817 return ( ap );
818 }
819
820 /* return 1 if no node in LIST share vertices with the area */
821
no_cross()822 static bool no_cross()
823 { vertexptr cur;
824
825 for ( cur = LIST; cur; cur = cur->next )
826 {
827 if ( cur->STARI <= mm && cur->STARJ <= nn && cur->BOT >= m1-1 &&
828 cur->RIGHT >= n1-1 && (cur->STARI < rl || cur->STARJ < cl)) {
829 if ( cur->STARI < rl ) rl = cur->STARI;
830 if ( cur->STARJ < cl ) cl = cur->STARJ;
831 flag = 1;
832 break;
833 }
834 }
835 return !cur;
836 }
837
838 /* diff(A,B,M,N,tb,te) returns the score of an optimum conversion between
839 A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
840 and appends such a conversion to the current script. */
841
diff(uchar * A,uchar * B,int M,int N,int tb,int te,int ss[][32],int Q,int R)842 static int diff(uchar *A,uchar *B,int M,int N,int tb,int te, int ss[][32], int Q, int R)
843
844 { int midi, midj, type; /* Midpoint, type, and cost */
845 int midc;
846
847 { register int i, j;
848 register int c, e, d, s;
849 int t;
850 int *va;
851 int q, r, qr;
852
853 q = Q; r = R; qr = q + r;
854
855 /* Boundary cases: M <= 1 or N == 0 */
856
857 if (N <= 0)
858 { if (M > 0) DEL(M)
859 return - gap(M);
860 }
861 if (M <= 1)
862 { if (M <= 0)
863 { INS(N);
864 return - gap(N);
865 }
866 if (tb > te) tb = te;
867 midc = - (tb + r + gap(N) );
868 midj = 0;
869 va = ss[A[1]];
870 for (j = 1; j <= N; j++)
871 { for ( tt = 1, z = row[I+1]; z != 0; z = z->NEXT )
872 if ( z->COL == j+J )
873 { tt = 0; break; }
874 if ( tt )
875 { c = va[B[j]] - ( gap(j-1) + gap(N-j) );
876 if (c > midc)
877 { midc = c;
878 midj = j;
879 }
880 }
881 }
882 if (midj == 0)
883 { INS(N) DEL(1) }
884 else
885 { if (midj > 1) INS(midj-1)
886 REP
887 if ( A[1] == B[midj] )
888 no_mat += 1;
889 else
890 no_mis += 1;
891 /* mark (A[I],B[J]) as used: put J into list row[I] */
892 I++; J++;
893 z = ( pairptr ) ckalloc(sizeof(pair));
894 z->COL = J;
895 z->NEXT = row[I];
896 row[I] = z;
897 if (midj < N) INS(N-midj)
898 }
899 return midc;
900 }
901
902 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
903
904 midi = M/2; /* Forward phase: */
905 CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */
906 t = -q;
907 for (j = 1; j <= N; j++)
908 { CC[j] = t = t-r;
909 DD[j] = t-q;
910 }
911 t = -tb;
912 for (i = 1; i <= midi; i++)
913 { s = CC[0];
914 CC[0] = c = t = t-r;
915 e = t-q;
916 va = ss[A[i]];
917 for (j = 1; j <= N; j++)
918 { if ((c = c - qr) > (e = e - r)) e = c;
919 if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c;
920 DIAG(i+I, j+J, c, s+va[B[j]])
921 if (c < d) c = d;
922 if (c < e) c = e;
923 s = CC[j];
924 CC[j] = c;
925 DD[j] = d;
926 }
927 }
928 DD[0] = CC[0];
929
930 RR[N] = 0; /* Reverse phase: */
931 t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */
932 for (j = N-1; j >= 0; j--)
933 { RR[j] = t = t-r;
934 SS[j] = t-q;
935 }
936 t = -te;
937 for (i = M-1; i >= midi; i--)
938 { s = RR[N];
939 RR[N] = c = t = t-r;
940 e = t-q;
941 va = ss[A[i+1]];
942 for (j = N-1; j >= 0; j--)
943 { if ((c = c - qr) > (e = e - r)) e = c;
944 if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c;
945 DIAG(i+1+I, j+1+J, c, s+va[B[j+1]])
946 if (c < d) c = d;
947 if (c < e) c = e;
948 s = RR[j];
949 RR[j] = c;
950 SS[j] = d;
951 }
952 }
953 SS[N] = RR[N];
954
955 midc = CC[0]+RR[0]; /* Find optimal midpoint */
956 midj = 0;
957 type = 1;
958 for (j = 0; j <= N; j++)
959 if ((c = CC[j] + RR[j]) >= midc)
960 if (c > midc || (CC[j] != DD[j] && RR[j] == SS[j]))
961 { midc = c;
962 midj = j;
963 }
964 for (j = N; j >= 0; j--)
965 if ((c = DD[j] + SS[j] + q) > midc)
966 { midc = c;
967 midj = j;
968 type = 2;
969 }
970 }
971
972 /* Conquer: recursively around midpoint */
973
974 if (type == 1)
975 { (void)diff(A,B,midi,midj,tb,q,ss,q,r);
976 (void)diff(A+midi,B+midj,M-midi,N-midj,q,te,ss,q,r);
977 }
978 else
979 { (void)diff(A,B,midi-1,midj,tb,0,ss,q,r);
980 DEL(2);
981 (void)diff(A+midi+1,B+midj,M-midi-1,N-midj,0,te,ss,q,r);
982 }
983 return midc;
984 }
985
986 /* ckalloc - allocate space; check for success */
ckalloc(size_t amount)987 void *ckalloc(size_t amount)
988 {
989 char *p;
990 static long mtotal;
991
992 mtotal += (long)amount;
993
994 if ((p = malloc( (unsigned) amount)) == NULL) {
995 fprintf(stderr,"Ran out of near memory: %d/%ld\n",amount,mtotal);
996 exit(1);
997 }
998 return(p);
999 }
1000
1001