1 
2 /* A PACKAGE FOR SEQUENCE COMPARISON WITH AFFINE WEIGHTS */
3 /* Maximizes a similarity score and doesn't penalize end-gaps */
4 
5 /* Globally passed params and macros */
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 
10 #ifdef BIGMEM
11 #define NMAX 80000
12 #else
13 #define NMAX 3000
14 #endif
15 
16 static int CHECK_SCORE();
17 
18 static int **w;				/* w = W */
19 static int g, h, m;				/* g = G, h = H, m = g+h */
20 
21 #define gap(k)  ((k) <= 0 ? 0 : g+h*(k))	/* k-symbol indel cost */
22 
23 static int *sapp;				/* Current script append ptr */
24 static int  last;				/* Last script op appended */
25 
26 						/* Append "Delete k" op */
27 #define DEL(k)				\
28 { if (last < 0)				\
29     last = sapp[-1] -= (k);		\
30   else					\
31     last = *sapp++ = -(k);		\
32 }
33 						/* Append "Insert k" op */
34 #define INS(k)				\
35 { if (last < 0)				\
36     { sapp[-1] = (k); *sapp++ = last; }	\
37   else					\
38     last = *sapp++ = (k);		\
39 }
40 
41 #define REP { last = *sapp++ = 0; }		/* Append "Replace" op */
42 
43 static int CC[NMAX+1], DD[NMAX+1];	/* Forward cost-only vectors */
44 static int RR[NMAX+1], SS[NMAX+1];	/* Reverse cost-only vectors */
45 
46 /* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
47    A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
48    and appends such a conversion to the current script.                   */
49 
align(A,B,M,N,tb,te,topr,botr,lc,rc)50 static int align(A,B,M,N,tb,te,topr,botr,lc,rc) char *A, *B; int M, N;
51 int tb, te; char topr, botr, lc, rc;
52 
53 {        int   midi, midj, type;	/* Midpoint, type, and cost */
54          int midc;
55 	 int c1, c2;
56 
57 { register int   i, j;
58   register int c, e, d, s;
59            int t, *wa;
60 
61 /* Boundary cases: M <= 1 or N == 0 */
62 
63   if (N <= 0)
64     { if (M > 0) DEL(M)
65       if (topr || botr) return 0;
66       else return -gap(M);
67     }
68   if (M <= 1)
69     { if (M <= 0)
70         { INS(N);
71 	  if (topr || botr) return 0;
72           else return -gap(N);
73         }
74       if (topr) {
75 	 if (rc) midc = 0;
76 	 else midc = te-h;
77 	 midj = 0;
78 	 wa = w[A[1]];
79 	 for (j = 1; j <= N; j++)
80 	   { c = wa[B[j]] - gap(N-j);
81              if (c > midc)
82                { midc = c;
83                  midj = j;
84                }
85            }
86       } else if (botr) {
87 	 if (lc) midc = 0;
88 	 else midc = tb-h;
89 	 midj = 0;
90 	 wa = w[A[1]];
91 	 for (j = 1; j <= N; j++)
92 	   { c = -gap(j-1) + wa[B[j]];
93              if (c > midc)
94                { midc = c;
95                  midj = j;
96                }
97            }
98       } else {
99          if (tb < te) tb = te;
100 	 if (lc || rc) midc = -gap(N);
101          else midc = (tb-h) - gap(N);
102          midj = 0;
103          wa = w[A[1]];
104          for (j = 1; j <= N; j++)
105            { c = -gap(j-1) + wa[B[j]] - gap(N-j);
106              if (c > midc)
107                { midc = c;
108                  midj = j;
109                }
110            }
111       }
112       if (midj == 0)
113         { INS(N) DEL(1) }
114       else
115         { if (midj > 1) INS(midj-1)
116           REP
117           if (midj < N) INS(N-midj)
118         }
119       return midc;
120     }
121 
122 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
123 
124   midi = M/2;			/* Forward phase:                          */
125   CC[0] = 0;			/*   Compute C(M/2,k) & D(M/2,k) for all k */
126   if (topr) {
127 	for (j = 1; j <= N; j++)
128 	  { CC[j] = 0;
129 	    DD[j] = -g;
130 	  }
131   } else {
132      	t = -g;
133      	for (j = 1; j <= N; j++)
134        	{ CC[j] = t = t-h;
135        	  DD[j] = t-g;
136         }
137   }
138   t = tb;
139   for (i = 1; i <= midi; i++)
140     { s = CC[0];
141       if (lc) {
142 	CC[0] = c = 0;
143 	e = -g;
144       } else {
145         CC[0] = c = t = t-h;
146         e = t-g;
147       }
148       wa = w[A[i]];
149       for (j = 1; j <= N; j++)
150         { if ((c =   c   - m) > (e =   e   - h)) e = c;
151 	  if ((j == N) && rc) {
152              if ((c = CC[j]) > (d = DD[j])) d = c;
153 	  } else {
154              if ((c = CC[j] - m) > (d = DD[j] - h)) d = c;
155 	  }
156           c = s + wa[B[j]];
157           if (e > c) c = e;
158           if (d > c) c = d;
159           s = CC[j];
160           CC[j] = c;
161           DD[j] = d;
162         }
163     }
164   DD[0] = CC[0];
165 
166   RR[N] = 0;			/* Reverse phase:                          */
167   				/*   Compute R(M/2,k) & S(M/2,k) for all k */
168   if (botr) {
169 	for (j = N-1; j >= 0; j--)
170 	  { RR[j] = 0;
171 	    SS[j] = -g;
172           }
173   } else {
174   	t = -g;
175   	for (j = N-1; j >= 0; j--)
176     	{ RR[j] = t = t-h;
177       	  SS[j] = t-g;
178     	}
179   }
180   t = te;
181   for (i = M-1; i >= midi; i--)
182     { s = RR[N];
183       if (rc) {
184 	RR[N] = c = 0;
185 	e = -g;
186       } else {
187       	RR[N] = c = t = t-h;
188       	e = t-g;
189       }
190       wa = w[A[i+1]];
191       for (j = N-1; j >= 0; j--)
192         { if ((c =   c   - m) > (e =   e   - h)) e = c;
193 	  if ((j == 0) && lc) {
194              if ((c = RR[j]) > (d = SS[j])) d = c;
195 	  } else {
196              if ((c = RR[j] - m) > (d = SS[j] - h)) d = c;
197 	  }
198           c = s + wa[B[j+1]];
199           if (e > c) c = e;
200           if (d > c) c = d;
201           s = RR[j];
202           RR[j] = c;
203           SS[j] = d;
204         }
205     }
206   SS[N] = RR[N];
207 
208   midc = CC[0]+RR[0];		/* Find optimal midpoint */
209   midj = 0;
210   type = 1;
211   for (j = 0; j <= N; j++)
212     if ((c = CC[j] + RR[j]) >= midc)
213       if (c > midc || CC[j] != DD[j] && RR[j] == SS[j])
214         { midc = c;
215           midj = j;
216         }
217   if (rc) {
218     if ((c = DD[N] + SS[N]) > midc)
219       { midc = c;
220         midj = N;
221         type = 2;
222       }
223   } else {
224     if ((c = DD[N] + SS[N] + g) > midc)
225       { midc = c;
226         midj = N;
227         type = 2;
228       }
229   }
230   for (j = N-1; j > 0; j--)
231     if ((c = DD[j] + SS[j] + g) > midc)
232       { midc = c;
233         midj = j;
234         type = 2;
235       }
236   if (lc) {
237     if ((c = DD[0] + SS[0]) > midc)
238       { midc = c;
239         midj = 0;
240         type = 2;
241       }
242   } else {
243     if ((c = DD[0] + SS[0] + g) > midc)
244       { midc = c;
245         midj = 0;
246         type = 2;
247       }
248   }
249 }
250 
251 /* Conquer: recursively around midpoint */
252 
253   if (midj == 0 || midj == N) {
254      if (type == 1)
255        { align(A,B,midi,midj,tb,-g,topr,0,lc,rc);
256          align(A+midi,B+midj,M-midi,N-midj,-g,te,0,botr,lc,rc);
257        }
258      else
259        { align(A,B,midi-1,midj,tb,0,topr,0,lc,rc);
260          DEL(2);
261          align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,0,botr,lc,rc);
262        }
263   } else {
264      if (type == 1)
265        { align(A,B,midi,midj,tb,-g,topr,0,lc,0);
266          align(A+midi,B+midj,M-midi,N-midj,-g,te,0,botr,0,rc);
267        }
268      else
269        { align(A,B,midi-1,midj,tb,0,topr,0,lc,0);
270          DEL(2);
271          align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,0,botr,0,rc);
272        }
273   }
274   return midc;
275 }
276 
277 /* Interface and top level of comparator */
278 
ALIGN(A,B,M,N,W,G,H,S)279 int ALIGN(A,B,M,N,W,G,H,S)
280      char A[],B[];
281      int M,N;
282      int **W,G,H;
283      int S[];
284 {
285   int c, ck;
286   int t;
287 
288   if (N > NMAX) return -1;	/* Error check */
289 
290   w = W;			/* Setup global parameters */
291   g = G;
292   h = H;
293   m = g+h;
294   sapp = S;
295   last = 0;
296 
297   c = align(A,B,M,N,-g,-g,1,1,1,1);   /* OK, do it */
298   if ((abs(S[0]) < abs(S[1])) && S[0] != 0) {
299      t = S[1];
300      S[1] = S[0];
301      S[0] = t;
302   }
303   if ((abs(sapp[0]) < abs(sapp[-1])) && sapp[0] != 0) {
304      t = sapp[-1];
305      sapp[-1] = sapp[0];
306      sapp[0] = t;
307   }
308   ck = CHECK_SCORE(A,B,M,N,S);
309   if (c != ck) printf("Check_score error. c=%d, ck=%d\n",c,ck);
310   return c;
311 }
312 
313 /* Alignment display routine */
314 
315 static char ALINE[51], BLINE[51], CLINE[51];
316 
317 void
DISPLAY(unsigned char * A,unsigned char * B,int M,int N,int * S,int AP,int BP)318 DISPLAY(unsigned char *A, unsigned char *B,
319 	    int M, int N,
320 	    int *S, int AP, int BP) {
321   register char *a, *b, *c;
322   register int   i,  j, op;
323   int   lines, ap, bp;
324 
325   i = j = op = lines = 0;
326   ap = AP;
327   bp = BP;
328   a = ALINE;
329   b = BLINE;
330   c = CLINE;
331   while (i < M || j < N) {
332     if (op == 0 && *S == 0) {
333       op = *S++;
334       *a = A[++i];
335       *b = B[++j];
336       *c++ = (*a++ == *b++) ? '|' : ' ';
337       }
338     else {
339       if (op == 0) op = *S++;
340       if (op > 0) {
341 	*a++ = ' ';
342 	*b++ = B[++j];
343 	op--;
344       }
345       else {
346 	*a++ = A[++i];
347 	*b++ = ' ';
348 	op++;
349       }
350       *c++ = '-';
351     }
352     if (a >= ALINE+50 || (i >= M && j >= N)) {
353       *a = *b = *c = '\0';
354       printf("\n%5d ",50*lines++);
355       for (b = ALINE+10; b <= a; b += 10) printf("    .    :");
356       if (b <= a+5) printf("    .");
357       printf("\n%5d %s\n      %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
358       ap = AP + i;
359       bp = BP + j;
360       a = ALINE;
361       b = BLINE;
362       c = CLINE;
363     }
364   }
365 }
366 
367 /* CHECK_SCORE - return the score of the alignment stored in S */
368 
CHECK_SCORE(A,B,M,N,S)369 static int CHECK_SCORE(A,B,M,N,S) char A[], B[]; int M, N; int S[];
370 {
371   register int   i,  j, op;
372   int score;
373 
374   score = i = j = op = 0;
375   while (i < M || j < N) {
376 	op = *S++;
377 	if (i == 0 && j == 0 && op != 0) {
378 		if (op > 0) j = j+op;
379 		else i = i-op;
380 	} else if (i == M || j == N) {
381 		i = M;
382 		j = N;
383 	} else if (op == 0)
384 		score = w[A[++i]][B[++j]] + score;
385 	else if (op > 0) {
386 		score = score - (g+op*h);
387 		j = j+op;
388 	} else {
389 		score = score - (g-op*h);
390 		i = i-op;
391 	}
392   }
393   return(score);
394 }
395 
396 /* lib.c - library of C procedures. */
397 
398 /* fatal - print message and die */
399 void
fatal(char * msg)400 fatal(char *msg) {
401 
402   fprintf(stderr, "%s\n", msg);
403   exit(1);
404 }
405 
406 /* fatalf - format message, print it, and die */
407 void
fatalf(char * msg,char * val)408 fatalf(char *msg, char *val) {
409   fprintf(stderr, msg, val);
410   putc('\n', stderr);
411   exit(1);
412 }
413 
414 /* ckalloc - allocate space; check for success */
ckalloc(int amount)415 char *ckalloc(int amount) {
416   char *p;
417 
418   if ((p = malloc((size_t) amount)) == NULL)
419     fatal("Ran out of memory.");
420   return(p);
421 }
422 
423