1 /*
2 	zzlgmata.c Needleman-Wunsch nucleotide mapper to find overlaps
3 
4 	copyright (c) 1983,1986,1987 William R. Pearson
5 
6 	August, 1994 - corrected error in alignments for ssearch.
7 
8 	July, 1994 - improved running time of smatch() by 30%
9 
10 	Aug, 1991 - completely dchanged match() to use Miller/Chao linear
11 space in band algorithm.
12 
13 	July 27, 1988 - improved output from consens, so that some context
14 of the match is shown.  Put in showall == -1 briefly, then removed it.
15 
16 	June 29, 1988 - fixed bug in call to initmat();
17 
18 	September 24, 1987 - combined zzlgmata.c and zggmata.c with
19 -DGLOBAL
20 
21 */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 
27 #ifndef max
28 #define max(a,b) (((a)>(b))?(a):(b))
29 #define min(a,b) (((a)<(b))?(a):(b))
30 #endif
31 
32 #include "zzgmata.gbl"
33 #define XTERNAL
34 #include "upam.gbl"
35 
36 extern int n0, n1;		/* length of sequences */
37 extern int dnaseq;
38 extern unsigned char *aa0, *aa1;		/* sequence arrays */
39 #ifdef TFASTX
40 extern unsigned char *aa1y;
41 #else
42 extern unsigned char *aa0y;
43 #endif
44 extern FILE *outfd;
45 extern int showall;	/* show complete sequences, not just overlaps */
46 extern int llen;
47 extern int lnsflg;
48 extern int optwid;
49 
50 int smin0, smin1, smins;	/* set bounds for discons */
51 extern int markx;
52 
53 #ifdef LFASTA
54 extern int oneseq;
55 extern int lcrc0[];
56 extern int lcrc1[];
57 extern int ncrc;
58 extern int iscore, gscore;
59 int pmirror;
60 #endif
61 
62 int window;
63 int nident;
64 static int *res=NULL;
65 static int nres;
66 
dmatch(hoff,display)67 dmatch(hoff,display)
68   int hoff, display;
69 {
70   int s0, s1;
71   int nc, ns;
72   float percent;
73   unsigned int i,j;
74   int low, up;
75   int score;
76 
77   hoff -= optwid/2;
78 
79   if (!display) {
80 #ifdef TFASTX
81     score = lx_align(aa0, n0, aa1y, n1, pam2, -(gdelval-ggapval),
82 		     -ggapval,-gshift,hoff,optwid);
83 #else
84     score = lx_align(aa1, n1, aa0y, n0, pam2, -(gdelval-ggapval),
85 		     -ggapval,-gshift,hoff,optwid);
86 #endif
87     if (score < 0) return 0;
88     return score;
89   }
90 
91   /*  score=LOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up,
92 		    pam2,-(gdelval-ggapval),-ggapval,1,
93 		    &min0,&min1,&max0,&max1,optwid);
94 
95   if (score <=0) return 0;
96   */
97   return 0;
98 
99   if (showall==1) {
100     if (hoff>0) maxc=optwid+((n0-hoff>=n1) ? n0 : n1+hoff);
101     else maxc=optwid+((n1+hoff>=n0) ? n1 : n0-hoff);
102   }
103   else maxc = min(n0,n1)+2+4*optwid+2*llen;
104   initres(n0+2+4*optwid);
105 
106   /* 	DISPLAY(aa0-1+min0-1,aa1-1+min1-1,max0-min0+1,max1-min1+1,
107 	res,min0,min1);
108 	*/
109   initseq(maxc);
110 
111   ns = calcons(aa0,n0,aa1,n1,res,nres,&nc);
112   percent = (float)nident*100.0/nc;
113 
114 #ifndef LFASTA
115   if (markx == 4)
116     disgraph(n0,n1,percent,score,min0,min1,max0,max1);
117   else {
118     if (markx==5) {
119       fputc('\n',outfd);
120       disgraph(n0, n1, percent, score, min0, min1, max0, max1);
121     }
122     fprintf(outfd," %6.3f%% identity in %d %s overlap\n",
123 	    percent,nc,sqnam);
124     discons(seqc0,seqc1,ns);
125   }
126   freeseq();
127 #else
128   if (crcknew(seqc0,seqc1,ns)) {
129     if (display) fprintf(outfd,
130 			 "\n %5.1f%% identity in %d %s overlap; init: %4d, opt: %4d\n",
131 			 percent,nc,sqnam,iscore,maxv);
132     gscore = maxv;
133     opnline((long)smin0,(long)smin1,gscore);
134     discons(seqc0,seqc1,ns);
135     clsline((long)smin0,(long)smin1,gscore);
136 #ifdef TPLOT
137     if (oneseq) {
138       pmirror = 1;
139       i = smin0;
140       smin0 = smin1;
141       smin1 = i;
142       opnline((long)smin0,(long)smin1,gscore);
143       discons(seqc1,seqc0,ns);
144       clsline((long)smin0,(long)smin1,gscore);
145       pmirror = 0;
146     }
147 #endif
148   }
149   else maxv = -1;
150 #endif
151   return maxv;
152 }
153 
154 static struct swstr {
155   int H;
156   int E;
157 } *ss;
158 
159 /* pmatch provides an interface to pro_dna() in lx_align.c */
160 
161 /* aa0 has DNA sequence, aa1 has prot sequence */
162 
pmatch(aa0,n0,aa0t,n0t,aa1,n1,flag)163 pmatch(aa0,n0,aa0t,n0t,aa1,n1,flag)
164      unsigned char *aa0, *aa0t, *aa1;
165      int n0, n0t, n1;
166      int flag;
167 {
168   int nc, minc, maxc, lc;
169   float percent;
170   int score;
171 
172 #ifndef TFASTX
173   initres(n0*3/2);
174 #else
175   initres(n0*3);
176 #endif
177 
178   score = pro_dna(aa1,n1,aa0t,n0,pam2,-(gdelval-ggapval), -ggapval, -gshift,
179 		 res,&nres);
180 
181   /*    display_alig(res,aa0t,aa1,nres,n0); */
182 
183 
184 /*  fprintf(stderr,"old: %d, nres: %d\n",min(n0,n1)*5/4, nres); */
185 
186   if (showall==1) initseq(max(n0,n1)*5/4);
187   else initseq(max(min(n0,n1)*5/4,nres)+2*llen);
188 
189 
190   nc=calcons(aa0t,n0,aa1,n1,res,nres,&lc);
191 
192   percent = (100.0*(float)nident)/(float)lc;
193 
194   if (markx == 4)
195     disgraph(n0,n1,percent,score,min0,min1,max0,max1);
196   else {
197     fprintf(outfd,
198 	    "Smith-Waterman score: %d;   %5.1f%% identity in %d %s overlap\n",
199 	    score, percent,lc,sqnam);
200     discons(seqc0,seqc1,nc);
201   }
202   freeseq();
203 
204   return score;
205 }
206 
initres(rsiz)207 initres(rsiz)		/* initialize results array */
208      int rsiz;
209 {
210   if (res==NULL) res = (int *)calloc((size_t)rsiz,sizeof(int));
211   if (res==NULL)
212     {fprintf(stderr,"cannot allocate alignment results array %d\n",rsiz);
213      exit(1);}
214 }
215 
initseq(seqsiz)216 initseq(seqsiz)		/* initialize arrays */
217 	int seqsiz;
218 {
219   seqc0=calloc((size_t)seqsiz,sizeof(char));
220   seqc1=calloc((size_t)seqsiz,sizeof(char));
221   if (seqc0==NULL || seqc1==NULL)
222     {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
223      exit(1);}
224   salloc = 1;
225 }
226 
freeseq()227 freeseq()
228 {
229   free(seqc0); free(seqc1);
230 }
231 
232 /*
233 	this function builds a consensus sequence in place by
234 	going to the maximum match and moving left and up
235 */
236 
237 
calcons(aa0,n0,aa1,n1,res,nres,nc)238 calcons(aa0,n0,aa1,n1,res,nres,nc)
239      char *aa0, *aa1;
240      int n0, n1;
241      int *res;
242      int nres;
243      int *nc;
244 {
245   int i0, i1;
246   int op, lenc, not_c, nd, ns, itmp;
247   char *sp0, *sp1;
248   int *rp, *rpmax;
249 
250   /* don't fill in the ends */
251 
252   smins = mins = 0;
253 
254 #ifndef TFASTX
255   smin1 = min1= *res++;
256   smin0 = min0= *res++;
257   sp0 = seqc0;
258   sp1 = seqc1;
259 #else
260   smin0 = min1= *res++;
261   smin1 = min0= *res++;
262   sp1 = seqc0;
263   sp0 = seqc1;
264 #endif
265 
266   rp = res;
267   rpmax = &res[nres-2];
268 
269   lenc = not_c = nident = op = 0;
270   i0 = min0;
271   i1 = min1;
272 
273   while (rp < rpmax ) {
274     switch (*rp++) {
275     case 0:
276       *sp0++ = '-';
277       *sp1++ = sq[aa1[i1++]];
278       lenc++;
279       break;
280     case 2:
281       *sp0++ = '/';
282       i0 -= 1;
283       *sp1++ = '-';
284       not_c++;
285       *sp0 = sq[aa0[i0]];
286       i0 += 3;
287       *sp1 = sq[aa1[i1++]];
288       if (*sp0 == *sp1) nident++;
289       sp0++; sp1++;
290       lenc++;
291       break;
292     case 3:
293       *sp0 = sq[aa0[i0]];
294       i0 += 3;
295       *sp1 = sq[aa1[i1++]];
296       if (*sp0 == *sp1) nident++;
297       sp0++; sp1++;
298       lenc++;
299       break;
300     case 4:
301       *sp0++ = '\\';
302       i0 += 1;
303       *sp1++ = '-';
304       not_c++;
305       *sp0 = sq[aa0[i0]];
306       i0 += 3;
307       *sp1 = sq[aa1[i1++]];
308       if (*sp0 == *sp1) nident++;
309       sp0++; sp1++;
310       lenc++;
311       break;
312     case 5:
313       *sp0++ = sq[aa0[i0]];
314       i0 += 3;
315       *sp1++ = '-';
316       lenc++;
317       break;
318     }
319   }
320 #ifndef TFASTX
321   max0 = i0;
322   max1 = i1;
323 #else
324   max1 = i0;
325   max0 = i1;
326   min1 = smin1;
327   min0 = smin0;
328 #endif
329 
330   if (lenc < 0) lenc = 1;
331 
332   *nc = lenc;
333 /*	now we have the middle, get the right end */
334 
335   return lenc+not_c;
336 }
337 
338 
339