1 /* A PACKAGE FOR SEQUENCE COMPARISON WITH AFFINE WEIGHTS */
2 /* Here we maximize the similarity score */
3 
4 /* Globally passed params and macros */
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 
9 #ifdef BIGMEM
10 #define NMAX 80000
11 #else
12 #define NMAX 3000
13 #endif
14 
15 static int CHECK_SCORE();
16 
17 static int **w;				/* w = W */
18 static int g, hh, m;				/* g = G, hh = H, m = g+h */
19 
20 #define gap(k)  ((k) <= 0 ? 0 : g+hh*(k))	/* k-symbol indel cost */
21 
22 static int *sapp;				/* Current script append ptr */
23 static int  last;				/* Last script op appended */
24 
25 						/* Append "Delete k" op */
26 #define DEL(k)				\
27 { if (last < 0)				\
28     last = sapp[-1] -= (k);		\
29   else					\
30     last = *sapp++ = -(k);		\
31 }
32 						/* Append "Insert k" op */
33 #define INS(k)				\
34 { if (last < 0)				\
35     { sapp[-1] = (k); *sapp++ = last; }	\
36   else					\
37     last = *sapp++ = (k);		\
38 }
39 
40 #define REP { last = *sapp++ = 0; }		/* Append "Replace" op */
41 
42 static int *CC, *DD;	/* Forward cost-only vectors */
43 static int *RR, *SS;	/* Reverse cost-only vectors */
44 
45 /* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
46    A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
47    and appends such a conversion to the current script.                   */
48 
49 static int
align(unsigned char * A,unsigned char * B,int M,int N,int tb,int te)50 align(unsigned char *A, unsigned char *B,
51       int M, int N, int tb, int te) {
52 
53   int   midi, midj, type;	/* Midpoint, type, and cost */
54   int midc;
55   int c1, c2;
56 
57 { register int   i, j;
58   register int c, e, d, s;
59            int t, *wa;
60 
61 /* Boundary cases: M <= 1 or N == 0 */
62 
63   if (N <= 0)
64     { if (M > 0) DEL(M)
65       return -gap(M);
66     }
67   if (M <= 1)
68     { if (M <= 0)
69         { INS(N);
70           return -gap(N);
71         }
72       if (tb < te) tb = te;
73       midc = (tb-hh) - gap(N);
74       midj = 0;
75       wa = w[A[1]];
76       for (j = 1; j <= N; j++)
77         { c = -gap(j-1) + wa[B[j]] - gap(N-j);
78           if (c > midc)
79             { midc = c;
80               midj = j;
81             }
82         }
83       if (midj == 0)
84         { INS(N) DEL(1) }
85       else
86         { if (midj > 1) INS(midj-1)
87           REP
88           if (midj < N) INS(N-midj)
89         }
90       return midc;
91     }
92 
93 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
94 
95   midi = M/2;			/* Forward phase:                          */
96   CC[0] = 0;			/*   Compute C(M/2,k) & D(M/2,k) for all k */
97   t = -g;
98   for (j = 1; j <= N; j++)
99     { CC[j] = t = t-hh;
100       DD[j] = t-g;
101     }
102   t = tb;
103   for (i = 1; i <= midi; i++)
104     { s = CC[0];
105       CC[0] = c = t = t-hh;
106       e = t-g;
107       wa = w[A[i]];
108       for (j = 1; j <= N; j++)
109         { if ((c =   c   - m) > (e =   e   - hh)) e = c;
110           if ((c = CC[j] - m) > (d = DD[j] - hh)) d = c;
111           c = s + wa[B[j]];
112           if (e > c) c = e;
113           if (d > c) c = d;
114           s = CC[j];
115           CC[j] = c;
116           DD[j] = d;
117         }
118     }
119   DD[0] = CC[0];
120 
121   RR[N] = 0;			/* Reverse phase:                          */
122   t = -g;			/*   Compute R(M/2,k) & S(M/2,k) for all k */
123   for (j = N-1; j >= 0; j--)
124     { RR[j] = t = t-hh;
125       SS[j] = t-g;
126     }
127   t = te;
128   for (i = M-1; i >= midi; i--)
129     { s = RR[N];
130       RR[N] = c = t = t-hh;
131       e = t-g;
132       wa = w[A[i+1]];
133       for (j = N-1; j >= 0; j--)
134         { if ((c =   c   - m) > (e =   e   - hh)) e = c;
135           if ((c = RR[j] - m) > (d = SS[j] - hh)) d = c;
136           c = s + wa[B[j+1]];
137           if (e > c) c = e;
138           if (d > c) c = d;
139           s = RR[j];
140           RR[j] = c;
141           SS[j] = d;
142         }
143     }
144   SS[N] = RR[N];
145 
146   midc = CC[0]+RR[0];		/* Find optimal midpoint */
147   midj = 0;
148   type = 1;
149   for (j = 0; j <= N; j++)
150     if ((c = CC[j] + RR[j]) >= midc)
151       if (c > midc || (CC[j] != DD[j] && RR[j] == SS[j]))
152         { midc = c;
153           midj = j;
154         }
155   for (j = N; j >= 0; j--)
156     if ((c = DD[j] + SS[j] + g) > midc)
157       { midc = c;
158         midj = j;
159         type = 2;
160       }
161 }
162 
163 /* Conquer: recursively around midpoint */
164 
165   if (type == 1)
166     { c1 = align(A,B,midi,midj,tb,-g);
167       c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te);
168     }
169   else
170     { align(A,B,midi-1,midj,tb,0);
171       DEL(2);
172       align(A+midi+1,B+midj,M-midi-1,N-midj,0,te);
173     }
174   return midc;
175 }
176 
177 /* Interface and top level of comparator */
178 
179 static int nmax=0;
180 
ALIGN(unsigned char * A,unsigned char * B,int M,int N,int ** W,int G,int H,int * S,int * NC)181 int ALIGN(unsigned char *A, unsigned char *B,
182 	  int M, int N,
183 	  int **W,
184 	  int G, int H, int *S, int *NC)
185 {
186   int c, ck;
187 
188   if (N > NMAX) return -1;	/* Error check */
189 
190   w = W;			/* Setup global parameters */
191   g = G;
192   hh = H;
193   m = g+hh;
194   sapp = S;
195   last = 0;
196 
197   if (CC==NULL) {
198     nmax = N;
199   	CC=(int *)calloc((size_t)(nmax+1),sizeof(int));
200   	DD=(int *)calloc((size_t)(nmax+1),sizeof(int));
201   	RR=(int *)calloc((size_t)(nmax+1),sizeof(int));
202   	SS=(int *)calloc((size_t)(nmax+1),sizeof(int));
203   	}
204   else if (N > nmax ) {
205     nmax = N;
206   	CC=(int *)realloc(CC,(size_t)(nmax+1)*sizeof(int));
207   	DD=(int *)realloc(DD,(size_t)(nmax+1)*sizeof(int));
208   	RR=(int *)realloc(RR,(size_t)(nmax+1)*sizeof(int));
209   	SS=(int *)realloc(SS,(size_t)(nmax+1)*sizeof(int));
210   	}
211 
212   if (CC==NULL || DD==NULL || RR==NULL || SS==NULL) {
213   		fprintf(stderr," cannot allocate llmax arrays\n");
214   		exit(1);
215   }
216 
217   c = align(A,B,M,N,-g,-g);	/* OK, do it */
218   ck = CHECK_SCORE(A,B,M,N,S,NC);
219   if (c != ck) fprintf(stderr,"** Check_score error %d/%d.\n",c,ck);
220   return c;
221 }
222 
223 /* Alignment display routine */
224 
225 static char ALINE[51], BLINE[51], CLINE[51];
226 extern char *sq;
227 
228 void
DISPLAY(unsigned char * A,unsigned char * B,int M,int N,int * S,int AP,int BP)229 DISPLAY(unsigned char *A, unsigned char *B,
230 	int M, int N, int *S, int AP, int BP) {
231 
232   register char *a, *b, *c;
233   register int   i,  j, op;
234   int   lines, ap, bp;
235 
236   i = j = op = lines = 0;
237   ap = AP;
238   bp = BP;
239   a = ALINE;
240   b = BLINE;
241   c = CLINE;
242   while (i < M || j < N)
243     { if (op == 0 && *S == 0)
244       { op = *S++;
245       *a = sq[A[++i]];
246       *b = sq[B[++j]];
247       *c++ = (*a++ == *b++) ? '|' : ' ';
248       }
249     else
250       { if (op == 0)
251 	op = *S++;
252       if (op > 0)
253 	{ *a++ = ' ';
254 	*b++ = sq[B[++j]];
255 	op--;
256 	}
257       else
258 	{ *a++ = sq[A[++i]];
259 	*b++ = ' ';
260 	op++;
261 	}
262       *c++ = '-';
263       }
264     if (a >= ALINE+50 || (i >= M && j >= N))
265       { *a = *b = *c = '\0';
266       printf("\n%5d ",50*lines++);
267       for (b = ALINE+10; b <= a; b += 10)
268 	printf("    .    :");
269       if (b <= a+5)
270 	printf("    .");
271       printf("\n%5d %s\n      %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
272       ap = AP + i;
273       bp = BP + j;
274       a = ALINE;
275       b = BLINE;
276       c = CLINE;
277       }
278     }
279 }
280 
281 /* CHECK_SCORE - return the score of the alignment stored in S */
282 
CHECK_SCORE(unsigned char * A,unsigned char * B,int M,int N,int * S,int * NC)283 static int CHECK_SCORE(unsigned char *A, unsigned char *B,
284 		       int M, int N, int *S, int *NC) {
285 
286   register int   i,  j, op, nc;
287   int score;
288 
289   score = i = j = op = nc = 0;
290   while (i < M || j < N) {
291     op = *S++;
292     if (op == 0) {
293       score = w[A[++i]][B[++j]] + score;
294       nc++;
295     }
296     else if (op > 0) {
297       score = score - (g+op*hh);
298       j = j+op;
299       nc += op;
300     } else {
301       score = score - (g-op*hh);
302       i = i-op;
303       nc -= op;
304     }
305   }
306   *NC = nc;
307   return(score);
308 }
309 
310 /* lib.c - library of C procedures. */
311 
312 /* fatal - print message and die */
313 void
fatal(char * msg)314 fatal(char *msg) {
315 
316   fprintf(stderr, "%s\n", msg);
317   exit(1);
318 }
319 
320 /* fatalf - format message, print it, and die */
321 void
fatalf(char * msg,char * val)322 fatalf(char *msg, char *val) {
323 
324   fprintf(stderr, msg, val);
325   putc('\n', stderr);
326   exit(1);
327 }
328 
329 /* ckalloc - allocate space; check for success */
ckalloc(int amount)330 char *ckalloc(int amount) {
331 
332   char *p;
333 
334   if ((p = malloc((size_t)amount)) == NULL) fatal("Ran out of memory.");
335   return(p);
336 }
337