1 
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include "simlib.h"
7 #define maxi(x, y)  (((x) > (y)) ? x: y)
8 
9 extern int have_stats;
10 extern double s_to_E4(int, int, int, int);
11 
12 /* extern char name0[], name1[]; */
13 /* extern int match, mismh; */
14 extern char *sq, sqnam[], *seqc0, *seqc1;
15 extern char ttitle[], ltitle[];
16 extern int min0,min1,max0,max1;
17 extern int smin0, smin1;
18 extern int markx;
19 int gscore;
20 #ifdef TPLOT
21 int pmirror=0;
22 #endif
23 
24 extern void opnline(long n0, long n1, int score, double e_val, double percent, int nc);
25 
26 #define min(x,y) ((x)<=(y) ? (x) : (y))
27 
28 extern FILE *outfd;
29 
30 
31 typedef struct ONE { int COL ;  struct ONE  *NEXT ;} pair, *pairptr;
32 static pairptr *row, z; 		/* for saving used aligned pairs */
33 
34 #define PAIRNULL (pairptr)NULL
35 
36 static int tt;
37 
38 typedef struct NODE
39 { int  SCORE;
40   int  STARI;
41   int  STARJ;
42   int  ENDI;
43   int  ENDJ;
44   int  TOP;
45   int  BOT;
46   int  LEFT;
47   int  RIGHT;
48   struct NODE *next;
49 }  vertex, *vertexptr;
50 
51 vertexptr  LIST;			/* an array for saving best scores */
52 vertexptr  /*@null@*/ low = 0;			/* lowest score node in LIST */
53 vertexptr  /*@null@*/ most = 0;			/* latestly accessed node in LIST */
54 static int numnode;			/* the number of nodes in LIST */
55 
56 /*@only@*/ static int *CC, *DD;			/* saving matrix scores */
57 static int *RR, *SS, *EE, *FF; 	/* saving start-points */
58 static int *HH, *WW;		 	/* saving matrix scores */
59 static int *II, *JJ, *XX, *YY; 	/* saving start-points */
60 static int  m1, mm, n1, nn;		/* boundaries of recomputed area */
61 static int  rl, cl;			/* left and top boundaries */
62 static int  min;			/* minimum score in LIST */
63 static bool flag;			/* indicate if recomputation necessary*/
64 static int q, r;			/* gap penalties */
65 static int qr;				/* qr = q + r */
66 
67 static bool tt;
68 
69 void SIM(uchar *A,uchar *B,int M,int N,int K,int V[][32], int Q,
70 	 int R, int nseq, int max_count, int z_size);
71 
72 static void big_pass(uchar *A,uchar *B,int M,int N,int mini_score,
73 		     int ss[][32], int Q, int R, int nseq);
74 static void locate(uchar *A,uchar *B,int ss[][32], int Q, int R, int nseq);
75 static void small_pass(uchar *A,uchar *B,int count,int ss[][32], int Q, int R, int nseq);
76 static void addnode(int c, int ci, int cj, int i, int j);
77 static bool no_cross();
78 static int diff(uchar *A,uchar *B,int M,int N,int tb,int te, int ss[][32], int q, int r);
79 static vertexptr findmax();
80 
81 /* DIAG() assigns value to x if (ii,jj) is never used before */
82 #define DIAG(ii, jj, x, value)				\
83 { for ( z = row[(ii)]; z != 0 && z->COL != (jj); z = z->NEXT ) ;	\
84 									  if ( !z )   x = ( value );					\
85 																	  }
86 
87 /* replace (ss1, xx1, yy1) by (ss2, xx2, yy2) if the latter is large */
88 
89 #define ORDER(ss1, xx1, yy1, ss2, xx2, yy2)             \
90 { if ( ss1 < ss2 )                                      \
91   { ss1 = ss2; xx1 = xx2; yy1 = yy2; }                \
92 							else                                                  \
93 														if ( ss1 == ss2 )                                   \
94   { if ( xx1 < xx2 )                                \
95     { xx1 = xx2; yy1 = yy2; }                     \
96 						    else                                            \
97 												      if ( xx1 == xx2 && yy1 < yy2 )                \
98 																		      yy1 = yy2;                                  \
99 																								    }                                                 \
100 																															}
101 
102 #define ORDER1(ss1, xx1, yy1, ss2, xx2, yy2)  \
103 { if (ss1 <= ss2) {                                     \
104 							  if (ss1 == ss2) {                                   \
105 														if (xx1 < xx2) {                                \
106 																				  xx1 = xx2; yy1 = yy2;                       \
107 																										} else {                           \
108 																														     if (xx1 == xx2 && yy1 < yy2)  \
109 																																		     yy1 = yy2; \
110 																																				  } \
111 																																				      } else {  \
112 																																						  ss1 = ss2; xx1 = xx2; yy1 = yy2;  \
113 																																										      } \
114 																																											  } \
115 																																											      }
116 
117 #define ORDER2(ss1, xx1, ss2, xx2) \
118 {  \
119      if (ss1 <= ss2) { \
120 			 if (ss1 == ss2) {  \
121 					      if (xx1 < xx2) xx1 = xx2; \
122 									  } else { \
123 										     ss1 = ss2; xx1 = xx2; \
124 													     } \
125 														 } \
126 														     }
127 
128 /* The following definitions are for function diff() */
129 
130 #define gap(k)  ((k) <= 0 ? 0 : q+r*(k))	/* k-symbol indel score */
131 
132 static int *sapp;				/* Current script append ptr */
133 static int  last;				/* Last script op appended */
134 
135 static int I, J;				/* current positions of A ,B */
136 static int no_mat; 				/* number of matches */
137 static int no_mis; 				/* number of mismatches */
138 static int al_len; 				/* length of alignment */
139 /* Append "Delete k" op */
140 static int NL;
141 #define DEL(k)				\
142 { I += k;				\
143 					  al_len += k;				\
144 										  if (last < 0)				\
145 															  last = sapp[-1] -= (k);		\
146 																				  else					\
147 																									  last = *sapp++ = -(k);		\
148 																														  }
149 /* Append "Insert k" op */
150 #define INS(k)				\
151 { J += k;				\
152 					  al_len += k;				\
153 										  if (last < 0)				\
154 					    { sapp[-1] = (k); *sapp++ = last; }	\
155 										  else					\
156 															  last = *sapp++ = (k);		\
157 																			  }
158 
159 /* Append "Replace" op */
160 #define REP 				\
161 { last = *sapp++ = 0; 			\
162 					  al_len += 1;				\
163 										  }
164 
165 typedef struct spa *space_ptr;
166 
167 typedef struct spa {
168   int CC, RR, EE, DD, SS, FF;
169 } space;
170 
171 space_ptr CCC;
172 
173 /* SIM(A,B,M,N,K,Q,R) reports best non-intersecting alignments with
174    score >= K of the segments of A and B in order of similarity
175    scores, where ss[a][b] is the score of aligning a and b, and
176    -(Q+R*i) is the score of an i-symbol indel.  */
177 
SIM(uchar * A,uchar * B,int M,int N,int K,int V[][32],int Q,int R,int nseq,int max_count,int z_size)178 void SIM(uchar *A,uchar *B,int M,int N,int K,int V[][32], int Q,int R,
179 	 int nseq,int max_count, int z_size)
180 { int endi, endj, stari, starj;	/* endpoint and startpoint */
181  int  score;   			/* the max score in LIST */
182  register  int  i;			/* row and column indices */
183  bool first_pass;
184  int  *S;				/* saving operations for diff */
185  int (*ss)[32];
186  int count;				/* maximum size of list */
187  int nc, nd, ns, nident;		/* for display */
188  long a_min0, a_max0, a_min1, a_max1;	/* alignment coordinates */
189  int tmp;				/* for switching min0,min1 */
190  vertexptr cur; 			/* temporary pointer */
191  double percent;
192  double e_val;
193 
194 	/* allocate space for all vectors */
195  { size_t i, j;
196   j = (N + 1) * sizeof(int); NL = N+1;
197   CCC = ( space_ptr ) ckalloc((N+1)*sizeof(space));
198   CC = ( int * ) ckalloc(j);
199   DD = ( int * ) ckalloc(j);
200   RR = ( int * ) ckalloc(j);
201   SS = ( int * ) ckalloc(j);
202   EE = ( int * ) ckalloc(j);
203   FF = ( int * ) ckalloc(j);
204   i = (M + 1) * sizeof(int);
205   HH = ( int * ) ckalloc(i);
206   WW = ( int * ) ckalloc(i);
207   II = ( int * ) ckalloc(i);
208   JJ = ( int * ) ckalloc(i);
209   XX = ( int * ) ckalloc(i);
210   YY = ( int * ) ckalloc(i);
211   S = ( int * ) ckalloc(i + j);
212   row = ( pairptr * ) ckalloc( (M + 1) * sizeof(pairptr));
213   }
214 
215   /* set up list for each row */
216 	for ( i = 1; i <= M; i++ )
217 	  if ( nseq == 2 )
218 	     row[i] = 0;
219 	  else
220 	    { row[i] = z = ( pairptr ) ckalloc(sizeof(pair));
221               z->COL = i;
222               z->NEXT = 0;
223 	    }
224 
225 	ss = V;
226 	q = Q; r = R; qr = q + r;
227 
228 	LIST = NULL;
229 
230 	numnode = 0;
231 	big_pass(A,B,M,N,K,V,Q,R,nseq);
232 	first_pass= 1;
233 
234         /* Report the K best alignments one by one. After each alignment is
235            output, recompute part of the matrix. First determine the size
236 	   of the area to be recomputed, then do the recomputation         */
237 
238 	count = 0;
239 	while (count < max_count) {
240 	  if ( numnode == 0 ) break;
241 #ifdef __MWERKS__
242 	  ChkEvent();
243 #endif
244             cur = findmax();	/* Return a pointer to a node with max score*/
245             score = cur->SCORE;
246       	    stari = ++cur->STARI;
247             starj = ++cur->STARJ;
248             endi = cur->ENDI;
249             endj = cur->ENDJ;
250             m1 = cur->TOP;
251             mm = cur->BOT;
252             n1 = cur->LEFT;
253             nn = cur->RIGHT;
254             rl = endi - stari + 1;
255             cl = endj - starj + 1;
256             I = stari - 1;
257             J = starj - 1;
258             sapp = S;
259             last = 0;
260             al_len = 0;
261             no_mat = 0;
262 	    no_mis = 0;
263             (void) diff(&A[stari]-1, &B[starj]-1,rl,cl,q,q,ss,q,r);
264 #ifdef __MWERKS__
265 	    ChkEvent();
266 #endif
267             /* Output the best alignment */
268 	    min0 = stari;
269 	    min1 = starj;
270 	    max0 = stari+rl-1;
271 	    max1 = starj+cl-1;
272 	    ns=calcons(A+1,M,B+1,N,S,&nc,&nident);
273 	    percent = (double)nident*100.0/(double)nc;
274 	    cal_coord(M,N,&a_min0,&a_max0,&a_min1,&a_max1);
275 #ifdef __MWERKS__
276 	    ChkEvent();
277 #endif
278 
279 	    if (have_stats) e_val = s_to_E4(score,M,N,z_size);
280 #ifndef TPLOT
281 	    if (markx < 10) {
282 	      if (have_stats)
283 		printf("\n %5.1f%% identity in %d %s overlap (%d-%d:%d-%d); score: %4d E(%d): %6.2g\n",
284 		       percent,nc,sqnam,a_min0,a_max0,a_min1,a_max1,score,z_size,e_val);
285 	      else
286 		printf("\n %5.1f%% identity in %d %s overlap (%d-%d:%d-%d); score: %4d\n",
287 		       percent,nc,sqnam,a_min0,a_max0,a_min1,a_max1,score);
288 	    }
289 	    else if (markx==10) {
290 	      printf(">>#%d\n",K-count);
291 	      printf("; sw_score: %d\n",score);
292 	      printf("; sw_ident: %5.3f\n",percent/100.0);
293 	      printf("; sw_overlap: %d\n",nc);
294 	      if (have_stats) printf("; sw_expect: %6.2g\n",e_val);
295 	    }
296 #endif
297 	    gscore=score;
298 	    opnline((long)smin0,(long)smin1,score,e_val,percent,nc);
299 	    if (markx == 5 || markx == 6) {
300 	      disgraph(M,N,percent,score,min0,min1,max0,max1);
301 	      printf("\n");
302 	    }
303 	    discons(seqc0,seqc1,ns);
304 	    clsline((long)smin0,(long)smin1,score);
305 #ifdef TPLOT
306 	    if (nseq==1) {
307 	      pmirror = 1;
308 	      tmp = smin0;
309 	      smin0 = smin1;
310 	      smin1 = tmp;
311 	      opnline((long)smin0,(long)smin1,score,e_val,percent,nc);
312 	      discons(seqc1,seqc0,ns);
313 	      clsline((long)smin0,(long)smin1,score);
314 	      pmirror = 0;
315 	    }
316 #endif
317 
318 #ifndef TPLOT
319 	    if (markx < 10) printf("\n----------\n");
320 #endif
321 	    fflush(stdout);
322 	    /*
323 	    print_align(score, stari, endi, starj, endj, S);
324 	    (void)fflush(stdout);
325 	    */
326 	    free(cur);
327 
328 	    flag = 0;
329 	/*    if ((double) (mm-maxi(0,stari-score/R))/ (double)mm * (double)
330 		(nn-maxi(0, starj-score/R))/(double) nn > 0.6) {
331 	*/
332 	    if (first_pass && maxi(rl, cl) > maxi(M,N)/4) {
333 		/*printf("no locate\n");*/
334 		flag = 1; n1 = m1 = 0;
335 	    } else locate(A,B,ss,Q,R,nseq);
336 	    if ( flag ) {
337 		/*printf("small pass\n");*/
338 		small_pass(A,B,0,ss,Q,R,nseq);
339 	    }
340 	    first_pass= 0;
341 	    count++;
342 	}
343 }
344 
345 /* A big pass to compute classes scoring over K */
346 
big_pass(uchar * A,uchar * B,int M,int N,int mini_score,int ss[][32],int Q,int R,int nseq)347 static void big_pass(uchar *A,uchar *B,int M,int N,int mini_score,
348 		     int ss[][32], int Q, int R, int nseq)
349 { register  int  i, j;			/* row and column indices */
350   register  int  c;			/* best score at current point */
351   register  int  f;			/* best score ending with insertion */
352   register  int  d;			/* best score ending with deletion */
353   register  int  p;			/* best score at (i-1, j-1) */
354   register  int  ci, cj;		/* end-point associated with c */
355   register  int  fi, fj;		/* end-point associated with f */
356   register  int  pi, pj;		/* end-point associated with p */
357   register space_ptr sp;
358   int q, r, qr;
359   int  *va;				/* pointer to v(A[i], B[j]) */
360 
361   q = Q; r=R; qr = q+r;
362 
363   /* Compute the matrix and save the best scores in LIST
364 	   CC : the scores of the current row
365 	   RR and EE : the starting point that leads to score CC
366 	   DD : the scores of the current row, ending with deletion
367 	   SS and FF : the starting point that leads to score DD        */
368  	/* Initialize the 0 th row */
369 
370   min = mini_score;
371   for ( sp = &CCC[1], j = 1; sp <= &CCC[N] ; sp++, j++ ) {
372     sp->CC = 0;
373     sp->RR = 0;
374     sp->EE = j;
375     sp->DD = - (qr);
376     sp->SS = 1;
377     sp->FF = j;
378   }
379   for ( i = 1; i <= M; i++) {
380     c = 0;				/* Initialize column 0 */
381     f = - (qr);
382     va = ss[A[i]];
383     ci = fi = i;
384     if ( nseq == 2 )
385       { p = 0;
386       pi = (i - 1);
387       cj = fj = pj = 0;
388       }
389     else
390       { p = CCC[i].CC;
391       pi = CCC[i].RR;
392       pj = CCC[i].EE;
393       cj = fj = i;
394       }
395     j = (nseq == 2 ? 1: i+1);
396     for ( sp = &CCC[j]; sp <= &CCC[N]; j++) {
397 
398       d = sp->DD;
399       c = -1;
400       DIAG(i, j, c, p+va[B[j]])		/* diagonal */
401 	if (c < 0) {
402 	  p = sp->CC; pi = sp->RR; pj = sp->EE;
403 	  if (f >= 0) {
404 	    c = f; ci = fi; cj = fj;
405 	    ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
406 	      sp->CC = c; sp->RR = ci; sp->EE = cj;
407 	      sp++->DD -= r; f-=r;
408 	  } else if (d >= 0) {
409 	    sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;
410 	    sp++->DD -= r;
411 	  } else {
412 	    sp->CC = 0; sp->RR=i; sp++->EE = j;
413 	  }
414 	} else {
415 	  ci = pi; cj = pj;
416 	  ORDER1(c, ci, cj,  f, fi, fj)
417 	    ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
418 	    p = sp->CC;
419 	  sp->CC = c;
420 	  pi = sp->RR;
421 	  sp->RR = ci;
422 	  pj = sp->EE;
423 	  sp->EE = cj;
424 	  f-=r;
425 	  if (c >= qr) {
426 	    if ( c > min)	/* add the score into list */
427 	      addnode(c, ci, cj, i, j);
428 	    d -= r; c-=qr;
429 	    ORDER1(f, fi, fj, c, ci, cj)
430 	      ORDER1(d, sp->SS, sp->FF, c, ci, cj)
431 	      sp++->DD = d;
432 	  } else {
433 	    sp++->DD -= r;
434 	  }
435 	}
436     }
437   }
438 }
439 
440 /* Determine the left and top boundaries of the recomputed area */
441 
locate(uchar * A,uchar * B,int ss[][32],int Q,int R,int nseq)442 static void locate(uchar *A,uchar *B,int ss[][32], int Q, int R, int nseq)
443 { register  int  i, j;			/* row and column indices */
444   register  int  c;			/* best score at current point */
445   register  int  f;			/* best score ending with insertion */
446   register  int  d;			/* best score ending with deletion */
447   register  int  p;			/* best score at (i-1, j-1) */
448   register  int  ci, cj;		/* end-point associated with c */
449   register int di, dj;
450   register  int  fi, fj;		/* end-point associated with f */
451   register  int  pi, pj;		/* end-point associated with p */
452   register space_ptr sp;
453   bool  cflag, rflag;			/* for recomputation */
454   int  *va;				/* pointer to v(A[i], B[j]) */
455   int  limit;				/* the bound on j */
456   int q, r, qr;
457 
458   q = Q; r = R; qr = q + r;
459 
460   /* Reverse pass
461      rows
462      CC : the scores on the current row
463      RR and EE : the endpoints that lead to CC
464      DD : the deletion scores
465      SS and FF : the endpoints that lead to DD
466 
467      columns
468      HH : the scores on the current columns
469      II and JJ : the endpoints that lead to HH
470      WW : the deletion scores
471      XX and YY : the endpoints that lead to WW
472   */
473 
474   for ( j = nn; j >= n1 ; j-- ) {
475     CCC[j].CC = 0;
476     CCC[j].EE = j;
477     CCC[j].DD = - (q);
478     CCC[j].FF = j;
479     if ( nseq == 2 || j > mm )
480       CCC[j].RR = CCC[j].SS = mm + 1;
481     else
482       CCC[j].RR = CCC[j].SS = j;
483   }
484 
485   for ( i = mm; i >= m1; i-- )  {
486     c = p = 0;
487     f = - (q);
488     ci = fi = i;
489     pi = i + 1;
490     cj = fj = pj = nn + 1;
491     va = ss[A[i]];
492     if ( nseq == 2 || n1 > i )
493       limit = n1;
494     else
495       limit = i + 1;
496     for ( j = nn, sp = &CCC[j]; j >= limit ; j-- )
497       {  f = f - r;
498       c = c - qr;
499       ORDER(f, fi, fj, c, ci, cj)
500 	c = sp->CC - qr;
501       d = sp->DD - r;
502       ORDER(d, sp->SS, sp->FF, c, sp->RR, sp->EE)
503 	c = 0;
504       DIAG(i, j, c, p+va[B[j]])		/* diagonal */
505 	if ( c <= 0 )
506 	  { c = 0; ci = i; cj = j; }
507 	else
508 	  { ci = pi; cj = pj; }
509       ORDER1(c, ci, cj, d, sp->SS, sp->FF)
510 	ORDER1(c, ci, cj, f, fi, fj)
511 	p = sp->CC;
512       sp->CC = c;
513       pi = sp->RR;
514       pj = sp->EE;
515       sp->RR = ci;
516       sp->EE = cj;
517       sp--->DD = d;
518       if ( c > min )
519 	flag = 1;
520       }
521     if ( nseq == 2 || i < n1 )
522       { HH[i] = CCC[n1].CC;
523       II[i] = CCC[n1].RR;
524       JJ[i] = CCC[n1].EE;
525       WW[i] = f;
526       XX[i] = fi;
527       YY[i] = fj;
528       }
529   }
530 
531   for ( rl = m1, cl = n1; ; ) {
532     for ( rflag = cflag = 1; ( rflag && m1 > 1 ) || ( cflag && n1 > 1 ) ;  ) {
533       if ( rflag && m1 > 1 ) {	/* Compute one row */
534 	rflag = 0;
535 	m1--;
536 	c = p = 0;
537 	f = - (q);
538 	ci = fi = m1;
539 	pi = m1 + 1;
540 	cj = fj = pj = nn + 1;
541 	va = ss[A[m1]];
542 	for ( j = nn, sp = &CCC[j]; j >= n1 ; j-- )
543 	  { f = f - r;
544 	  c = c - qr;
545 	  ORDER(f, fi, fj, c, ci, cj)
546 	    c = sp->CC - qr;
547 	  ci = sp->RR;
548 	  cj = sp->EE;
549 	  d = sp->DD - r;
550 	  di = sp->SS;
551 	  dj = sp->FF;
552 	  ORDER(d, di, dj, c, ci, cj)
553 	    c = 0;
554 	  DIAG(m1, j, c, p+va[B[j]])		/* diagonal */
555 	    if ( c <= 0 )
556 	      { c = 0; ci = m1; cj = j; }
557 	    else
558 	      { ci = pi; cj = pj; }
559 	  ORDER1(c, ci, cj, d, di, dj)
560 	    ORDER1(c, ci, cj, f, fi, fj)
561 	    sp->SS = di;
562 	  sp->FF = dj;
563 	  p = sp->CC;
564 	  sp->CC = c;
565 	  pi = sp->RR;
566 	  pj = sp->EE;
567 	  sp->RR = ci;
568 	  sp->EE = cj;
569 	  sp--->DD = d;
570 	  if ( c > min )
571 	    flag = 1;
572 	  if ( ! rflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl) || (fi > rl && fj > cl) ) )
573 	    rflag = 1;
574 	  }
575 	HH[m1] = CCC[n1].CC;
576 	II[m1] = CCC[n1].RR;
577 	JJ[m1] = CCC[n1].EE;
578 	WW[m1] = f;
579 	XX[m1] = fi;
580 	YY[m1] = fj;
581 	if ( ! cflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl)  || (fi > rl && fj > cl ) ))
582 	  cflag = 1;
583       }
584 
585       if ( nseq == 1 && n1 == (m1 + 1) && ! rflag )
586 	cflag = 0;
587       if ( cflag && n1 > 1 )	/* Compute one column */
588 	{ cflag = 0;
589 	n1--;
590 	c = 0;
591 	f = - (q);
592 	cj = fj = n1;
593 	va = ss[B[n1]];
594 	if ( nseq == 2 || mm < n1 )
595 	  { p = 0;
596 	  ci = fi = pi = mm + 1;
597 	  pj = n1 + 1;
598 	  limit = mm;
599 	  }
600 	else
601 	  { p = HH[n1];
602 	  pi = II[n1];
603 	  pj = JJ[n1];
604 	  ci = fi = n1;
605 	  limit = n1 - 1;
606 	  }
607 	for ( i = limit; i >= m1 ; i-- )
608 	  { f = f - r;
609 	  c = c - qr;
610 	  ORDER(f, fi, fj, c, ci, cj)
611 	    c = HH[i] - qr;
612 	  ci = II[i];
613 	  cj = JJ[i];
614 	  d = WW[i] - r;
615 	  di = XX[i];
616 	  dj = YY[i];
617 	  ORDER(d, di, dj, c, ci, cj)
618 	    c = 0;
619 	  DIAG(i, n1, c, p+va[A[i]])
620 	    if ( c <= 0 )
621 	      { c = 0; ci = i; cj = n1; }
622 	    else
623 	      { ci = pi; cj = pj; }
624 	  ORDER1(c, ci, cj, d, di, dj)
625 	    ORDER1(c, ci, cj, f, fi, fj)
626 	    p = HH[i];
627 	  HH[i] = c;
628 	  pi = II[i];
629 	  pj = JJ[i];
630 	  II[i] = ci;
631 	  JJ[i] = cj;
632 	  WW[i] = d;
633 	  XX[i] = di;
634 	  YY[i] = dj;
635 	  if ( c > min )
636 	    flag = 1;
637 	  if ( ! cflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl)
638 			    || (fi > rl && fj > cl) ) )
639 	    cflag = 1;
640 	  }
641 	CCC[n1].CC = HH[m1];
642 	CCC[n1].RR = II[m1];
643 	CCC[n1].EE = JJ[m1];
644 	CCC[n1].DD = f;
645 	CCC[n1].SS = fi;
646 	CCC[n1].FF = fj;
647 	if ( ! rflag && ( (ci > rl && cj > cl) || (di > rl && dj > cl)
648 			  || (fi > rl && fj > cl )) )
649 	  rflag = 1;
650 	}
651     }
652     if (( m1 == 1 && n1 == 1) || no_cross() )
653       break;
654   }
655   m1--;
656   n1--;
657 }
658 
659 /* recompute the area on forward pass */
small_pass(uchar * A,uchar * B,int count,int ss[][32],int Q,int R,int nseq)660 static void small_pass(uchar *A,uchar *B,int count,int ss[][32], int Q, int R, int nseq)
661 { register  int  i, j;			/* row and column indices */
662   register  int  c;			/* best score at current point */
663   register  int  f;			/* best score ending with insertion */
664   register  int  d;			/* best score ending with deletion */
665   register  int  p;			/* best score at (i-1, j-1) */
666   register  int  ci, cj;		/* end-point associated with c */
667   register  int  fi, fj;		/* end-point associated with f */
668   register  int  pi, pj;		/* end-point associated with p */
669   register space_ptr sp;
670   int  *va;				/* pointer to v(A[i], B[j]) */
671 
672   int  limit;				/* lower bound on j */
673 
674   q = Q; r = R; qr = q + r;
675 
676   for ( sp = &CCC[n1 + 1], j = n1+1; sp <= &CCC[nn] ; sp++, j++ )
677     {  sp->CC = 0;
678     sp->RR = m1;
679     sp->EE = j;
680     sp->DD = - (qr);
681     sp->SS = m1+1;
682     sp->FF = j;
683     }
684 
685   for ( i = m1 + 1; i <= mm; i++)
686     {  c = 0;				/* Initialize column 0 */
687     f = - (qr);
688     ci = fi = i;
689     va = ss[A[i]];
690     if ( nseq == 2 || i <= n1 )
691       { p = 0;
692       pi = i - 1;
693       cj = fj = pj = n1;
694       limit = n1 + 1;
695       }
696     else
697       { p = CCC[i].CC;
698       pi = CCC[i].RR;
699       pj = CCC[i].EE;
700       cj = fj = i;
701       limit = i + 1;
702       }
703     for ( j = limit, sp = &CCC[j] ; j <= nn ; j++ )
704       {
705 	d = sp->DD;
706 	c = -1;
707 	DIAG(i, j, c, p+va[B[j]])		/* diagonal */
708 	  if (c < 0) {
709 	    p = sp->CC; pi = sp->RR; pj = sp->EE;
710 	    if (f >= 0) {
711 	      c = f; ci = fi; cj = fj;
712 	      ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
713 		sp->CC = c; sp->RR = ci; sp->EE = cj;
714 		sp++->DD -= r; f-=r;
715 	    } else if (d >= 0) {
716 	      sp->CC = d; sp->RR = sp->SS; sp->EE = sp->FF;
717 	      sp++->DD -= r;
718 	    } else {
719 	      sp->CC = 0; sp->RR=i; sp++->EE = j;
720 	    }
721 	  } else {
722 	    ci = pi; cj = pj;
723 	    ORDER1(c, ci, cj,  f, fi, fj)
724 	      ORDER1(c, ci, cj,  d, sp->SS, sp->FF)
725 	      p = sp->CC;
726 	    sp->CC = c;
727 	    pi = sp->RR;
728 	    sp->RR = ci;
729 	    pj = sp->EE;
730 	    sp->EE = cj;
731 	    f-=r;
732 	    if (c >= qr) {
733 	      if ( c > min )	/* add the score into list */
734 		addnode(c, ci, cj, i, j);
735 	      d -= r; c-=qr;
736 	      ORDER1(f, fi, fj, c, ci, cj)
737 		ORDER1(d, sp->SS, sp->FF, c, ci, cj)
738 		sp++->DD = d;
739 	    } else {
740 	      sp++->DD -= r;
741 	    }
742 	  }
743       }
744     }
745 }
746 
747 /* Add a new node into list.  */
748 
addnode(int c,int ci,int cj,int i,int j)749 static void addnode(int c, int ci, int cj, int i, int j)
750 { bool found;				/* 1 if the node is in LIST */
751 
752   found = 0;
753   if ( most != 0 && most->STARI == ci && most->STARJ == cj)
754     found = 1;
755   else
756      for ( most = LIST; most; most = most->next )
757 	{
758 	  if ( most->STARI == ci && most->STARJ == cj)
759 	    { found = 1;
760 	      break;
761 	    }
762         }
763   if ( found )
764     { if ( most->SCORE < c )
765         { most->SCORE = c;
766           most->ENDI = i;
767           most->ENDJ = j;
768         }
769       if ( most->TOP > i ) most->TOP = i;
770       if ( most->BOT < i ) most->BOT = i;
771       if ( most->LEFT > j ) most->LEFT = j;
772       if ( most->RIGHT < j ) most->RIGHT = j;
773     }
774   else
775     {
776       numnode++;
777       most = (vertexptr) ckalloc(sizeof(vertex));
778       most->SCORE = c;
779       most->STARI = ci;
780       most->STARJ = cj;
781       most->ENDI = i;
782       most->ENDJ = j;
783       most->TOP = most->BOT = i;
784       most->LEFT = most->RIGHT = j;
785       most->next = LIST;
786       LIST = most;
787     }
788 /*
789   if ( numnode == K )
790     { if ( low == most || ! low )
791         { for ( low = LIST[0], d = 1; d < numnode ; d++ )
792             if ( LIST[d]->SCORE < low->SCORE )
793               low = LIST[d];
794 	}
795       return ( low->SCORE ) ;
796     }
797   else
798     return cost;
799     */
800 }
801 
802 /* Find and remove the largest score in list */
803 
findmax()804 static vertexptr findmax()
805 { vertexptr  ap, cur;
806   register int i;
807 
808   for ( i = LIST->SCORE, cur = NULL, ap = LIST; ap->next; ap = ap->next)
809       if ( ap->next->SCORE > i ) {
810 	  cur = ap; i = ap->next->SCORE;
811       }
812   if (cur) {ap = cur->next; cur->next = ap->next; }
813   else { ap = LIST; LIST = LIST->next;}
814   numnode--;
815   most = LIST;
816   if ( low == ap ) low = LIST;
817   return ( ap );
818 }
819 
820 /* return 1 if no node in LIST share vertices with the area */
821 
no_cross()822 static bool no_cross()
823 { vertexptr  cur;
824 
825       for ( cur = LIST; cur; cur = cur->next )
826 	{
827 	    if ( cur->STARI <= mm && cur->STARJ <= nn && cur->BOT >= m1-1 &&
828 		 cur->RIGHT >= n1-1 && (cur->STARI < rl || cur->STARJ < cl)) {
829 		if ( cur->STARI < rl ) rl = cur->STARI;
830 		if ( cur->STARJ < cl ) cl = cur->STARJ;
831 		flag = 1;
832 		break;
833 	    }
834 	}
835       return !cur;
836 }
837 
838 /* diff(A,B,M,N,tb,te) returns the score of an optimum conversion between
839    A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
840    and appends such a conversion to the current script.                   */
841 
diff(uchar * A,uchar * B,int M,int N,int tb,int te,int ss[][32],int Q,int R)842 static int diff(uchar *A,uchar *B,int M,int N,int tb,int te, int ss[][32], int Q, int R)
843 
844 { int   midi, midj, type;	/* Midpoint, type, and cost */
845   int midc;
846 
847 { register int   i, j;
848   register int c, e, d, s;
849   int t;
850   int *va;
851   int q, r, qr;
852 
853   q = Q; r = R; qr = q + r;
854 
855 /* Boundary cases: M <= 1 or N == 0 */
856 
857   if (N <= 0)
858     { if (M > 0) DEL(M)
859       return - gap(M);
860     }
861   if (M <= 1)
862     { if (M <= 0)
863         { INS(N);
864           return - gap(N);
865         }
866       if (tb > te) tb = te;
867       midc = - (tb + r + gap(N) );
868       midj = 0;
869       va = ss[A[1]];
870       for (j = 1; j <= N; j++)
871         {  for ( tt = 1, z = row[I+1]; z != 0; z = z->NEXT )
872               if ( z->COL == j+J )
873 	         { tt = 0; break; }
874            if ( tt )
875             { c = va[B[j]] - ( gap(j-1) + gap(N-j) );
876               if (c > midc)
877                { midc = c;
878                  midj = j;
879                }
880 	    }
881 	}
882       if (midj == 0)
883         { INS(N) DEL(1) }
884       else
885         { if (midj > 1) INS(midj-1)
886           REP
887 	  if ( A[1] == B[midj] )
888 	     no_mat += 1;
889 	  else
890 	     no_mis += 1;
891 	  /* mark (A[I],B[J]) as used: put J into list row[I] */
892           I++; J++;
893 	  z = ( pairptr ) ckalloc(sizeof(pair));
894           z->COL = J;
895           z->NEXT = row[I];
896 	  row[I] = z;
897           if (midj < N) INS(N-midj)
898         }
899       return midc;
900     }
901 
902 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
903 
904   midi = M/2;			/* Forward phase:                          */
905   CC[0] = 0;			/*   Compute C(M/2,k) & D(M/2,k) for all k */
906   t = -q;
907   for (j = 1; j <= N; j++)
908     { CC[j] = t = t-r;
909       DD[j] = t-q;
910     }
911   t = -tb;
912   for (i = 1; i <= midi; i++)
913     { s = CC[0];
914       CC[0] = c = t = t-r;
915       e = t-q;
916       va = ss[A[i]];
917       for (j = 1; j <= N; j++)
918         { if ((c = c - qr) > (e = e - r)) e = c;
919           if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c;
920 	  DIAG(i+I, j+J, c, s+va[B[j]])
921           if (c < d) c = d;
922           if (c < e) c = e;
923           s = CC[j];
924           CC[j] = c;
925           DD[j] = d;
926         }
927     }
928   DD[0] = CC[0];
929 
930   RR[N] = 0;			/* Reverse phase:                          */
931   t = -q;			/*   Compute R(M/2,k) & S(M/2,k) for all k */
932   for (j = N-1; j >= 0; j--)
933     { RR[j] = t = t-r;
934       SS[j] = t-q;
935     }
936   t = -te;
937   for (i = M-1; i >= midi; i--)
938     { s = RR[N];
939       RR[N] = c = t = t-r;
940       e = t-q;
941       va = ss[A[i+1]];
942       for (j = N-1; j >= 0; j--)
943         { if ((c = c - qr) > (e = e - r)) e = c;
944           if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c;
945 	  DIAG(i+1+I, j+1+J, c, s+va[B[j+1]])
946           if (c < d) c = d;
947           if (c < e) c = e;
948           s = RR[j];
949           RR[j] = c;
950           SS[j] = d;
951         }
952     }
953   SS[N] = RR[N];
954 
955   midc = CC[0]+RR[0];		/* Find optimal midpoint */
956   midj = 0;
957   type = 1;
958   for (j = 0; j <= N; j++)
959     if ((c = CC[j] + RR[j]) >= midc)
960       if (c > midc || (CC[j] != DD[j] && RR[j] == SS[j]))
961         { midc = c;
962           midj = j;
963         }
964   for (j = N; j >= 0; j--)
965     if ((c = DD[j] + SS[j] + q) > midc)
966       { midc = c;
967         midj = j;
968         type = 2;
969       }
970 }
971 
972 /* Conquer: recursively around midpoint */
973 
974   if (type == 1)
975     { (void)diff(A,B,midi,midj,tb,q,ss,q,r);
976       (void)diff(A+midi,B+midj,M-midi,N-midj,q,te,ss,q,r);
977     }
978   else
979     { (void)diff(A,B,midi-1,midj,tb,0,ss,q,r);
980       DEL(2);
981       (void)diff(A+midi+1,B+midj,M-midi-1,N-midj,0,te,ss,q,r);
982     }
983   return midc;
984 }
985 
986 /* ckalloc - allocate space; check for success */
ckalloc(size_t amount)987 void *ckalloc(size_t amount)
988 {
989   char *p;
990   static long mtotal;
991 
992   mtotal += (long)amount;
993 
994   if ((p = malloc( (unsigned) amount)) == NULL) {
995     fprintf(stderr,"Ran out of near memory: %d/%ld\n",amount,mtotal);
996     exit(1);
997   }
998   return(p);
999 }
1000 
1001