1 /*
2 zzlgmata.c Needleman-Wunsch nucleotide mapper to find overlaps
3
4 copyright (c) 1983,1986,1987 William R. Pearson
5
6 August, 1994 - corrected error in alignments for ssearch.
7
8 July, 1994 - improved running time of smatch() by 30%
9
10 Aug, 1991 - completely dchanged match() to use Miller/Chao linear
11 space in band algorithm.
12
13 July 27, 1988 - improved output from consens, so that some context
14 of the match is shown. Put in showall == -1 briefly, then removed it.
15
16 June 29, 1988 - fixed bug in call to initmat();
17
18 September 24, 1987 - combined zzlgmata.c and zggmata.c with
19 -DGLOBAL
20
21 */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26
27 #ifndef max
28 #define max(a,b) (((a)>(b))?(a):(b))
29 #define min(a,b) (((a)<(b))?(a):(b))
30 #endif
31
32 #include "zzgmata.gbl"
33 #define XTERNAL
34 #include "upam.gbl"
35
36 extern int n0, n1; /* length of sequences */
37 extern int dnaseq;
38 extern unsigned char *aa0, *aa1; /* sequence arrays */
39 #ifdef TFASTX
40 extern unsigned char *aa1y;
41 #else
42 extern unsigned char *aa0y;
43 #endif
44 extern FILE *outfd;
45 extern int showall; /* show complete sequences, not just overlaps */
46 extern int llen;
47 extern int lnsflg;
48 extern int optwid;
49
50 int smin0, smin1, smins; /* set bounds for discons */
51 extern int markx;
52
53 #ifdef LFASTA
54 extern int oneseq;
55 extern int lcrc0[];
56 extern int lcrc1[];
57 extern int ncrc;
58 extern int iscore, gscore;
59 int pmirror;
60 #endif
61
62 int window;
63 int nident;
64 static int *res=NULL;
65 static int nres;
66
dmatch(hoff,display)67 dmatch(hoff,display)
68 int hoff, display;
69 {
70 int s0, s1;
71 int nc, ns;
72 float percent;
73 unsigned int i,j;
74 int low, up;
75 int score;
76
77 hoff -= optwid/2;
78
79 if (!display) {
80 #ifdef TFASTX
81 score = lx_align(aa0, n0, aa1y, n1, pam2, -(gdelval-ggapval),
82 -ggapval,-gshift,hoff,optwid);
83 #else
84 score = lx_align(aa1, n1, aa0y, n0, pam2, -(gdelval-ggapval),
85 -ggapval,-gshift,hoff,optwid);
86 #endif
87 if (score < 0) return 0;
88 return score;
89 }
90
91 /* score=LOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
92 pam2,-(gdelval-ggapval),-ggapval,1,
93 &min0,&min1,&max0,&max1,optwid);
94
95 if (score <=0) return 0;
96 */
97 return 0;
98
99 if (showall==1) {
100 if (hoff>0) maxc=optwid+((n0-hoff>=n1) ? n0 : n1+hoff);
101 else maxc=optwid+((n1+hoff>=n0) ? n1 : n0-hoff);
102 }
103 else maxc = min(n0,n1)+2+4*optwid+2*llen;
104 initres(n0+2+4*optwid);
105
106 /* DISPLAY(aa0-1+min0-1,aa1-1+min1-1,max0-min0+1,max1-min1+1,
107 res,min0,min1);
108 */
109 initseq(maxc);
110
111 ns = calcons(aa0,n0,aa1,n1,res,nres,&nc);
112 percent = (float)nident*100.0/nc;
113
114 #ifndef LFASTA
115 if (markx == 4)
116 disgraph(n0,n1,percent,score,min0,min1,max0,max1);
117 else {
118 if (markx==5) {
119 fputc('\n',outfd);
120 disgraph(n0, n1, percent, score, min0, min1, max0, max1);
121 }
122 fprintf(outfd," %6.3f%% identity in %d %s overlap\n",
123 percent,nc,sqnam);
124 discons(seqc0,seqc1,ns);
125 }
126 freeseq();
127 #else
128 if (crcknew(seqc0,seqc1,ns)) {
129 if (display) fprintf(outfd,
130 "\n %5.1f%% identity in %d %s overlap; init: %4d, opt: %4d\n",
131 percent,nc,sqnam,iscore,maxv);
132 gscore = maxv;
133 opnline((long)smin0,(long)smin1,gscore);
134 discons(seqc0,seqc1,ns);
135 clsline((long)smin0,(long)smin1,gscore);
136 #ifdef TPLOT
137 if (oneseq) {
138 pmirror = 1;
139 i = smin0;
140 smin0 = smin1;
141 smin1 = i;
142 opnline((long)smin0,(long)smin1,gscore);
143 discons(seqc1,seqc0,ns);
144 clsline((long)smin0,(long)smin1,gscore);
145 pmirror = 0;
146 }
147 #endif
148 }
149 else maxv = -1;
150 #endif
151 return maxv;
152 }
153
154 static struct swstr {
155 int H;
156 int E;
157 } *ss;
158
159 /* pmatch provides an interface to pro_dna() in lx_align.c */
160
161 /* aa0 has DNA sequence, aa1 has prot sequence */
162
pmatch(aa0,n0,aa0t,n0t,aa1,n1,flag)163 pmatch(aa0,n0,aa0t,n0t,aa1,n1,flag)
164 unsigned char *aa0, *aa0t, *aa1;
165 int n0, n0t, n1;
166 int flag;
167 {
168 int nc, minc, maxc, lc;
169 float percent;
170 int score;
171
172 #ifndef TFASTX
173 initres(n0*3/2);
174 #else
175 initres(n0*3);
176 #endif
177
178 score = pro_dna(aa1,n1,aa0t,n0,pam2,-(gdelval-ggapval), -ggapval, -gshift,
179 res,&nres);
180
181 /* display_alig(res,aa0t,aa1,nres,n0); */
182
183
184 /* fprintf(stderr,"old: %d, nres: %d\n",min(n0,n1)*5/4, nres); */
185
186 if (showall==1) initseq(max(n0,n1)*5/4);
187 else initseq(max(min(n0,n1)*5/4,nres)+2*llen);
188
189
190 nc=calcons(aa0t,n0,aa1,n1,res,nres,&lc);
191
192 percent = (100.0*(float)nident)/(float)lc;
193
194 if (markx == 4)
195 disgraph(n0,n1,percent,score,min0,min1,max0,max1);
196 else {
197 fprintf(outfd,
198 "Smith-Waterman score: %d; %5.1f%% identity in %d %s overlap\n",
199 score, percent,lc,sqnam);
200 discons(seqc0,seqc1,nc);
201 }
202 freeseq();
203
204 return score;
205 }
206
initres(rsiz)207 initres(rsiz) /* initialize results array */
208 int rsiz;
209 {
210 if (res==NULL) res = (int *)calloc((size_t)rsiz,sizeof(int));
211 if (res==NULL)
212 {fprintf(stderr,"cannot allocate alignment results array %d\n",rsiz);
213 exit(1);}
214 }
215
initseq(seqsiz)216 initseq(seqsiz) /* initialize arrays */
217 int seqsiz;
218 {
219 seqc0=calloc((size_t)seqsiz,sizeof(char));
220 seqc1=calloc((size_t)seqsiz,sizeof(char));
221 if (seqc0==NULL || seqc1==NULL)
222 {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
223 exit(1);}
224 salloc = 1;
225 }
226
freeseq()227 freeseq()
228 {
229 free(seqc0); free(seqc1);
230 }
231
232 /*
233 this function builds a consensus sequence in place by
234 going to the maximum match and moving left and up
235 */
236
237
calcons(aa0,n0,aa1,n1,res,nres,nc)238 calcons(aa0,n0,aa1,n1,res,nres,nc)
239 char *aa0, *aa1;
240 int n0, n1;
241 int *res;
242 int nres;
243 int *nc;
244 {
245 int i0, i1;
246 int op, lenc, not_c, nd, ns, itmp;
247 char *sp0, *sp1;
248 int *rp, *rpmax;
249
250 /* don't fill in the ends */
251
252 smins = mins = 0;
253
254 #ifndef TFASTX
255 smin1 = min1= *res++;
256 smin0 = min0= *res++;
257 sp0 = seqc0;
258 sp1 = seqc1;
259 #else
260 smin0 = min1= *res++;
261 smin1 = min0= *res++;
262 sp1 = seqc0;
263 sp0 = seqc1;
264 #endif
265
266 rp = res;
267 rpmax = &res[nres-2];
268
269 lenc = not_c = nident = op = 0;
270 i0 = min0;
271 i1 = min1;
272
273 while (rp < rpmax ) {
274 switch (*rp++) {
275 case 0:
276 *sp0++ = '-';
277 *sp1++ = sq[aa1[i1++]];
278 lenc++;
279 break;
280 case 2:
281 *sp0++ = '/';
282 i0 -= 1;
283 *sp1++ = '-';
284 not_c++;
285 *sp0 = sq[aa0[i0]];
286 i0 += 3;
287 *sp1 = sq[aa1[i1++]];
288 if (*sp0 == *sp1) nident++;
289 sp0++; sp1++;
290 lenc++;
291 break;
292 case 3:
293 *sp0 = sq[aa0[i0]];
294 i0 += 3;
295 *sp1 = sq[aa1[i1++]];
296 if (*sp0 == *sp1) nident++;
297 sp0++; sp1++;
298 lenc++;
299 break;
300 case 4:
301 *sp0++ = '\\';
302 i0 += 1;
303 *sp1++ = '-';
304 not_c++;
305 *sp0 = sq[aa0[i0]];
306 i0 += 3;
307 *sp1 = sq[aa1[i1++]];
308 if (*sp0 == *sp1) nident++;
309 sp0++; sp1++;
310 lenc++;
311 break;
312 case 5:
313 *sp0++ = sq[aa0[i0]];
314 i0 += 3;
315 *sp1++ = '-';
316 lenc++;
317 break;
318 }
319 }
320 #ifndef TFASTX
321 max0 = i0;
322 max1 = i1;
323 #else
324 max1 = i0;
325 max0 = i1;
326 min1 = smin1;
327 min0 = smin0;
328 #endif
329
330 if (lenc < 0) lenc = 1;
331
332 *nc = lenc;
333 /* now we have the middle, get the right end */
334
335 return lenc+not_c;
336 }
337
338
339