1 /* A PACKAGE FOR SEQUENCE COMPARISON WITH AFFINE WEIGHTS */
2 /* Here we maximize the similarity score */
3
4 /* Globally passed params and macros */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8
9 #ifdef BIGMEM
10 #define NMAX 80000
11 #else
12 #define NMAX 3000
13 #endif
14
15 static int CHECK_SCORE();
16
17 static int **w; /* w = W */
18 static int g, hh, m; /* g = G, hh = H, m = g+h */
19
20 #define gap(k) ((k) <= 0 ? 0 : g+hh*(k)) /* k-symbol indel cost */
21
22 static int *sapp; /* Current script append ptr */
23 static int last; /* Last script op appended */
24
25 /* Append "Delete k" op */
26 #define DEL(k) \
27 { if (last < 0) \
28 last = sapp[-1] -= (k); \
29 else \
30 last = *sapp++ = -(k); \
31 }
32 /* Append "Insert k" op */
33 #define INS(k) \
34 { if (last < 0) \
35 { sapp[-1] = (k); *sapp++ = last; } \
36 else \
37 last = *sapp++ = (k); \
38 }
39
40 #define REP { last = *sapp++ = 0; } /* Append "Replace" op */
41
42 static int *CC, *DD; /* Forward cost-only vectors */
43 static int *RR, *SS; /* Reverse cost-only vectors */
44
45 /* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
46 A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
47 and appends such a conversion to the current script. */
48
49 static int
align(unsigned char * A,unsigned char * B,int M,int N,int tb,int te)50 align(unsigned char *A, unsigned char *B,
51 int M, int N, int tb, int te) {
52
53 int midi, midj, type; /* Midpoint, type, and cost */
54 int midc;
55 int c1, c2;
56
57 { register int i, j;
58 register int c, e, d, s;
59 int t, *wa;
60
61 /* Boundary cases: M <= 1 or N == 0 */
62
63 if (N <= 0)
64 { if (M > 0) DEL(M)
65 return -gap(M);
66 }
67 if (M <= 1)
68 { if (M <= 0)
69 { INS(N);
70 return -gap(N);
71 }
72 if (tb < te) tb = te;
73 midc = (tb-hh) - gap(N);
74 midj = 0;
75 wa = w[A[1]];
76 for (j = 1; j <= N; j++)
77 { c = -gap(j-1) + wa[B[j]] - gap(N-j);
78 if (c > midc)
79 { midc = c;
80 midj = j;
81 }
82 }
83 if (midj == 0)
84 { INS(N) DEL(1) }
85 else
86 { if (midj > 1) INS(midj-1)
87 REP
88 if (midj < N) INS(N-midj)
89 }
90 return midc;
91 }
92
93 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
94
95 midi = M/2; /* Forward phase: */
96 CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */
97 t = -g;
98 for (j = 1; j <= N; j++)
99 { CC[j] = t = t-hh;
100 DD[j] = t-g;
101 }
102 t = tb;
103 for (i = 1; i <= midi; i++)
104 { s = CC[0];
105 CC[0] = c = t = t-hh;
106 e = t-g;
107 wa = w[A[i]];
108 for (j = 1; j <= N; j++)
109 { if ((c = c - m) > (e = e - hh)) e = c;
110 if ((c = CC[j] - m) > (d = DD[j] - hh)) d = c;
111 c = s + wa[B[j]];
112 if (e > c) c = e;
113 if (d > c) c = d;
114 s = CC[j];
115 CC[j] = c;
116 DD[j] = d;
117 }
118 }
119 DD[0] = CC[0];
120
121 RR[N] = 0; /* Reverse phase: */
122 t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
123 for (j = N-1; j >= 0; j--)
124 { RR[j] = t = t-hh;
125 SS[j] = t-g;
126 }
127 t = te;
128 for (i = M-1; i >= midi; i--)
129 { s = RR[N];
130 RR[N] = c = t = t-hh;
131 e = t-g;
132 wa = w[A[i+1]];
133 for (j = N-1; j >= 0; j--)
134 { if ((c = c - m) > (e = e - hh)) e = c;
135 if ((c = RR[j] - m) > (d = SS[j] - hh)) d = c;
136 c = s + wa[B[j+1]];
137 if (e > c) c = e;
138 if (d > c) c = d;
139 s = RR[j];
140 RR[j] = c;
141 SS[j] = d;
142 }
143 }
144 SS[N] = RR[N];
145
146 midc = CC[0]+RR[0]; /* Find optimal midpoint */
147 midj = 0;
148 type = 1;
149 for (j = 0; j <= N; j++)
150 if ((c = CC[j] + RR[j]) >= midc)
151 if (c > midc || (CC[j] != DD[j] && RR[j] == SS[j]))
152 { midc = c;
153 midj = j;
154 }
155 for (j = N; j >= 0; j--)
156 if ((c = DD[j] + SS[j] + g) > midc)
157 { midc = c;
158 midj = j;
159 type = 2;
160 }
161 }
162
163 /* Conquer: recursively around midpoint */
164
165 if (type == 1)
166 { c1 = align(A,B,midi,midj,tb,-g);
167 c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te);
168 }
169 else
170 { align(A,B,midi-1,midj,tb,0);
171 DEL(2);
172 align(A+midi+1,B+midj,M-midi-1,N-midj,0,te);
173 }
174 return midc;
175 }
176
177 /* Interface and top level of comparator */
178
179 static int nmax=0;
180
ALIGN(unsigned char * A,unsigned char * B,int M,int N,int ** W,int G,int H,int * S,int * NC)181 int ALIGN(unsigned char *A, unsigned char *B,
182 int M, int N,
183 int **W,
184 int G, int H, int *S, int *NC)
185 {
186 int c, ck;
187
188 if (N > NMAX) return -1; /* Error check */
189
190 w = W; /* Setup global parameters */
191 g = G;
192 hh = H;
193 m = g+hh;
194 sapp = S;
195 last = 0;
196
197 if (CC==NULL) {
198 nmax = N;
199 CC=(int *)calloc((size_t)(nmax+1),sizeof(int));
200 DD=(int *)calloc((size_t)(nmax+1),sizeof(int));
201 RR=(int *)calloc((size_t)(nmax+1),sizeof(int));
202 SS=(int *)calloc((size_t)(nmax+1),sizeof(int));
203 }
204 else if (N > nmax ) {
205 nmax = N;
206 CC=(int *)realloc(CC,(size_t)(nmax+1)*sizeof(int));
207 DD=(int *)realloc(DD,(size_t)(nmax+1)*sizeof(int));
208 RR=(int *)realloc(RR,(size_t)(nmax+1)*sizeof(int));
209 SS=(int *)realloc(SS,(size_t)(nmax+1)*sizeof(int));
210 }
211
212 if (CC==NULL || DD==NULL || RR==NULL || SS==NULL) {
213 fprintf(stderr," cannot allocate llmax arrays\n");
214 exit(1);
215 }
216
217 c = align(A,B,M,N,-g,-g); /* OK, do it */
218 ck = CHECK_SCORE(A,B,M,N,S,NC);
219 if (c != ck) fprintf(stderr,"** Check_score error %d/%d.\n",c,ck);
220 return c;
221 }
222
223 /* Alignment display routine */
224
225 static char ALINE[51], BLINE[51], CLINE[51];
226 extern char *sq;
227
228 void
DISPLAY(unsigned char * A,unsigned char * B,int M,int N,int * S,int AP,int BP)229 DISPLAY(unsigned char *A, unsigned char *B,
230 int M, int N, int *S, int AP, int BP) {
231
232 register char *a, *b, *c;
233 register int i, j, op;
234 int lines, ap, bp;
235
236 i = j = op = lines = 0;
237 ap = AP;
238 bp = BP;
239 a = ALINE;
240 b = BLINE;
241 c = CLINE;
242 while (i < M || j < N)
243 { if (op == 0 && *S == 0)
244 { op = *S++;
245 *a = sq[A[++i]];
246 *b = sq[B[++j]];
247 *c++ = (*a++ == *b++) ? '|' : ' ';
248 }
249 else
250 { if (op == 0)
251 op = *S++;
252 if (op > 0)
253 { *a++ = ' ';
254 *b++ = sq[B[++j]];
255 op--;
256 }
257 else
258 { *a++ = sq[A[++i]];
259 *b++ = ' ';
260 op++;
261 }
262 *c++ = '-';
263 }
264 if (a >= ALINE+50 || (i >= M && j >= N))
265 { *a = *b = *c = '\0';
266 printf("\n%5d ",50*lines++);
267 for (b = ALINE+10; b <= a; b += 10)
268 printf(" . :");
269 if (b <= a+5)
270 printf(" .");
271 printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
272 ap = AP + i;
273 bp = BP + j;
274 a = ALINE;
275 b = BLINE;
276 c = CLINE;
277 }
278 }
279 }
280
281 /* CHECK_SCORE - return the score of the alignment stored in S */
282
CHECK_SCORE(unsigned char * A,unsigned char * B,int M,int N,int * S,int * NC)283 static int CHECK_SCORE(unsigned char *A, unsigned char *B,
284 int M, int N, int *S, int *NC) {
285
286 register int i, j, op, nc;
287 int score;
288
289 score = i = j = op = nc = 0;
290 while (i < M || j < N) {
291 op = *S++;
292 if (op == 0) {
293 score = w[A[++i]][B[++j]] + score;
294 nc++;
295 }
296 else if (op > 0) {
297 score = score - (g+op*hh);
298 j = j+op;
299 nc += op;
300 } else {
301 score = score - (g-op*hh);
302 i = i-op;
303 nc -= op;
304 }
305 }
306 *NC = nc;
307 return(score);
308 }
309
310 /* lib.c - library of C procedures. */
311
312 /* fatal - print message and die */
313 void
fatal(char * msg)314 fatal(char *msg) {
315
316 fprintf(stderr, "%s\n", msg);
317 exit(1);
318 }
319
320 /* fatalf - format message, print it, and die */
321 void
fatalf(char * msg,char * val)322 fatalf(char *msg, char *val) {
323
324 fprintf(stderr, msg, val);
325 putc('\n', stderr);
326 exit(1);
327 }
328
329 /* ckalloc - allocate space; check for success */
ckalloc(int amount)330 char *ckalloc(int amount) {
331
332 char *p;
333
334 if ((p = malloc((size_t)amount)) == NULL) fatal("Ran out of memory.");
335 return(p);
336 }
337