1 /*	chofas.c	an adaptation of Kanehisa's fortran program
2 */
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 
8 #include "upam.gbl"
9 #define XTERNAL
10 #include "uascii.gbl"
11 
12 int getseq(char *filen, char *seq, int maxs, int *dnaseq);
13 int gettitle(char *filen, char * title, int len);
14 void makemap(char *input, int *map, int n);
15 void predi(char *iseq, int n);
16 
17 char *refstr="\nPlease cite:\n Chou and Fasman (1974) Biochem., 13:222-245\n";
18 char *verstr="version 2.0u66 September 1998";
19 char *progstr="CHOFAS";
20 
21 char *iprompt0=" CHOFAS predicts protein secondary structure\n";
22 char *iprompt1=" protein sequence name: ";
23 
24 #ifdef __MWERKS__
25 #include <Types.h>
26 #include <StandardFile.h>
27 SFReply freply;
28 Point wpos;
29 int tval;
30 char prompt[256];
31 #include <sioux.h>
32 #define getenv mgetenv
33 short glvRef,anvRef, sqvRef, ouvRef, q0vRef, q1vRef;
34 #define IntroDID 400	/* LFASTA */
35 #endif
36 
37 char amino[]={'A','R','N','D','C','Q','E','G','H','I','L','K','M',
38 		'F','P','S','T','W','Y','V','X',' ',' '};
39 
40 char charge[]={' ','+',' ','-',' ',' ','-',' ','.',' ',' ','+',' ',
41 		' ',' ',' ',' ',' ',' ',' ',' '};
42 
43 float hydro[]={0.5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.5,1.8,1.8,0.0,1.3,
44 		2.5,0.0,0.0,0.4,3.4,2.3,1.5,0.0};
45 
46 int amap[23];
47 int dnaseq= -1;
48 
49 #define MAXSEQ 1000
50 char	iseq[MAXSEQ];
51 int	n0;
52 long sq0off=1;
53 #define MAXT 60
54 char title[MAXT];
55 
56 char	cph[MAXSEQ], cps[MAXSEQ], cpt[MAXSEQ];
57 int	iph[MAXSEQ], ips[MAXSEQ];
58 float	ph[MAXSEQ], ps[MAXSEQ], hyd[MAXSEQ], smd[MAXSEQ];
59 int iarr[3];
60 
61 int
main(int argc,char * argv[])62 main(int argc, char *argv[]) {
63 
64   float ya, yb, yc, da, dab, dbc, db, fn0;
65   int i, l0, l1;
66   char fname[256];
67 
68 #ifdef __MWERKS__
69   SIOUXSettings.asktosaveonclose=FALSE;
70   SIOUXSettings.showstatusline=FALSE;
71   SIOUXSettings.autocloseonquit=TRUE;
72 
73   argc = ccommand(&argv);
74   if (GetResource('DLOG',IntroDID)==(Handle)0 &&
75       OpenResFile("\pFASTA.rsrc")<0) {
76     SysBeep(100);
77     fprintf(stderr," WARNING FASTA.rsrc file could not be found\n");
78   }
79   GetVol((StringPtr)prompt,&ouvRef);
80   wpos.h=50; wpos.v=100;
81 #endif
82 
83   if (argc < 2) {
84 #ifndef __MWERKS__
85     fputs(iprompt0,stdout);
86     fprintf(stdout," %s%s\n",verstr,refstr);
87 
88   l1: fputs(iprompt1,stdout);
89     fflush(stdout);
90     if (fgets(fname,sizeof(fname),stdin)==NULL) exit(0);
91     if (fname[strlen(fname)-1]=='\n') fname[strlen(fname)-1]='\0';
92     if (fname[0]=='\0') goto l1;
93 #else
94     NIntroDlog(IntroDID,iprompt0,verstr,refstr,"\0");
95 
96   l1:	FileDlog(iprompt1,&freply);
97     if (freply.good==TRUE) {
98       PtoCstr(freply.fName);
99       strcpy(fname,(char *)freply.fName);
100       q0vRef=freply.vRefNum;
101       SetVol("\p\0",q0vRef);
102     }
103     else exit(0);
104 #endif
105   }
106   else {
107     fputs(iprompt0,stderr);
108     fprintf(stderr," %s%s\n",verstr,refstr);
109     strncpy(fname,argv[1],sizeof(fname));
110   }
111 
112 
113   sascii = aascii;
114   if ((n0=getseq(fname,iseq,MAXSEQ,&dnaseq))<=0) {
115     fprintf(stderr," could not read %s\n",fname);
116     exit(1);
117   }
118   gettitle(fname,title,MAXT);
119 
120   makemap(amino,amap,naa);
121 
122   for (i=0; i<n0; i++) {
123     iseq[i]=amap[iseq[i]];
124     hyd[i]=hydro[iseq[i]];
125   }
126   ya=hyd[3];
127   yb=hyd[0];
128   yc=hyd[1];
129   dbc=yb-yc+ya-yc+ya-hyd[4];
130   db=(dbc+dbc+ya+yb+yb)/3.0-yc;
131   dbc=dbc/2.0;
132   for (i=2; i<n0; i++) {
133     ya=yb;  yb=yc;  yc=hyd[i];
134     da=db;  db=(ya-yb)-(yb-yc);
135     dab=dbc;  dbc=da-db;
136     smd[i-2]=ya-(dab-dbc)*6.0/70.0;
137   }
138 
139   da=(dab-dbc)/35.0;
140   smd[n0-1]=yb+da+da;
141   smd[n0]=yc-da/2.0;
142 
143   for (i=0; i<n0; i++) if (smd[i]<0.0) {cpt[i]='T'; iarr[2]++;}
144   else cpt[i]=' ';
145   predi(iseq, n0);
146 
147   printf(" Chou-Fasman plot of %s, %3d aa;\n",fname,n0);
148   printf("%-s\n\n",title);
149 
150   l1 = n0/60 + 1;
151   for (l0=0; l0<l1; l0++) {
152     printf("       ");
153     for (i=l0*60; i<n0 && i<(l0+1)*60; i++)
154       printf("%c",(i%10 == 9)?'.':' ');
155     printf("\n       ");
156     for (i=l0*60; i<n0 && i<(l0+1)*60; i++)	printf("%c",amino[iseq[i]]);
157     printf("\n helix ");
158     for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",cph[i]);
159     printf("\n sheet ");
160     for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",cps[i]);
161     printf("\n turns ");
162     for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",cpt[i]);
163     printf("\n\n");
164   }
165   printf(" Residue totals: H:%3d   E:%3d   T:%3d\n",
166 	 iarr[0],iarr[1],iarr[2]);
167   fn0 = (float)(n0)/100.0;
168   printf("        percent: H: %4.1f E: %4.1f T: %4.1f\n\n",
169 	 (float)iarr[0]/fn0,(float)iarr[1]/fn0,(float)iarr[2]/fn0);
170   exit(0);
171 }
172 
173 int mh[MAXSEQ],ms[MAXSEQ];
174 
175 struct ps {
176   float helix;
177   int nh;
178   int lh;
179   float sheet;
180   int ns;
181   int ls;
182 } parr[]= {
183   1.42, 2, 1, 0.83, 0, 0,	/* A */
184   0.98, 0, 0, 0.93, 0, 0,	/* R */
185   0.67, 0,-2, 0.89, 0, 0,	/* N */
186   1.01, 1, 0, 0.54, 0,-2,	/* D */
187   0.70, 0, 0, 1.19, 1, 1,	/* C */
188   1.11, 2, 1, 1.10, 1, 1,	/* Q */
189   1.51, 2, 1, 0.37, 0,-2,	/* E */
190   0.57, 0,-2, 0.75, 0,-2,	/* G */
191   1.00, 1, 0, 0.87, 0, 0,	/* H */
192   1.08, 2, 1, 1.60, 1, 1,	/* I */
193   1.21, 2, 1, 1.30, 1, 1,	/* L */
194   1.16, 2, 1, 0.74, 0,-2,	/* K */
195   1.45, 2, 1, 1.05, 1, 1,	/* M */
196   1.13, 2, 1, 1.38, 1, 1,	/* F */
197   0.57, 0,-2, 0.55, 0,-2,	/* P */
198   0.77, 0, 0, 0.75, 0,-2,	/* S */
199   0.83, 0, 0, 1.19, 1, 1,	/* T */
200   1.08, 2, 1, 1.37, 1, 1,	/* W */
201   0.69, 0,-2, 1.47, 1, 1,	/* Y */
202   1.06, 2, 1, 1.70, 1, 1,	/* V */
203   1.00, 0, 0, 1.00, 0, 0};/* X */
204 
205 int icharge[]={0,1,0,-1,0,0,-2,0,1,0,0,1,0,0,-3,0,0,0,0,0,0};
206 
207 void
predi(char * iseq,int n0)208 predi(char *iseq, int n0) {
209   int i, i1, i2, j, k, l, l1;
210   int nch, ncs,n3, ipr;
211 
212   /*	helix nucleation ... sum of .nh >= 8 out of six residues	*/
213 
214   for (i=0; i<n0; i++) {iph[i]=0; ips[i]=0;}
215   iseq[n0]=7;
216 
217   nch = 0;
218   for (i=0; i<6; i++) nch += parr[iseq[i]].nh;
219 
220   for (i=0; i<n0-5; i++) {
221     if (nch >= 8) {
222       for (i1=i; i1<n0 && parr[iseq[i1]].nh == 0; i1++);
223       for (i2 = i+5; i2>=0 && parr[iseq[i2]].nh == 0; i2--);
224       for (k=i1; k<i2; k++) iph[k]=1;
225     }
226     nch += parr[iseq[i+6]].nh - parr[iseq[i]].nh;
227   }
228 
229   /*    ---Sheet nucleation ... sum of NS .GE. 3 out of five residues---	*/
230   ncs = 0;
231   for (i=0; i<5; i++) ncs += parr[iseq[i]].ns;
232 
233   for (i=0; i<n0-5; i++) {
234     if (ncs >= 3) {
235       for (i1=i; i1<n0 && (parr[iseq[i1]].ns == 0); i1++);
236       for (i2=i+4; i2>=0 && (parr[iseq[i2]].ns == 0); i2--);
237       for (k=i1; k<i2; k++) ips[k]=1;
238     }
239     ncs += parr[iseq[i+6]].ns - parr[iseq[i]].ns;
240   }
241 
242   /*    ---Alpha and beta potentials---	*/
243 
244   n3 = n0 - 3;
245   for (i=0; i<n3; i++) {
246     ph[i] = ps[i] = 0.0;
247     mh[i] = ms[i] = 0;
248     for (j=0; j<4; j++) {
249       ph[i] += parr[iseq[i+j]].helix;
250       ps[i] += parr[iseq[i+j]].sheet;
251       mh[i] += parr[iseq[i+j]].lh;
252       ms[i] += parr[iseq[i+j]].ls;
253     }
254     ph[i] *= 0.25;
255     ps[i] *= 0.25;
256   }
257 
258   /*	Helix propagation and termination */
259 
260   for (i=0; i<n3; i++) {
261     if (iph[i]==0) continue;
262     i1 = i+1;
263     for (; i<n3; i++) {
264       if (iph[i]!=0) continue;
265       if (ph[i] < 1.0 && mh[i]<=0) break;
266       iph[i]=1;
267     }
268     i2 = i;
269     for (i=i1-1; i>=4; i--) {
270       if (iph[i]!=0) continue;
271       if (ph[i-4] < 1.0 && mh[i-4] < 0) break;
272       iph[i] = 1;
273     }
274     i = i2-1;
275   }
276 
277   /*    ---Sheet propagation and termination---	*/
278 
279   for(i=0; i<n3; i++) {
280     if (ips[i]==0) continue;
281     for (i1 = i; i<n3; i++) {
282       if (ips[i]!=0) continue;
283       if (ps[i]<1.0 && ms[i]<= 0.0) break;
284       ips[i]=1;
285     }
286     i2 = i;
287     for (i = i1-1; i>=3; i--) {
288       if (ips[i]!=0) continue;
289       if (ps[i-4]<1.0 && ms[i-4]<=0) break;
290       ips[i]=1;
291     }
292     i = i2-1;
293   }
294 
295   /*	helix boundaries	*/
296 
297   k=0;
298   ipr=0;
299   for (i=0; i<n0; i++) {
300     if (iph[i]==0) goto l57;
301     if (ipr!=0) goto l52;
302     ipr = 1;
303   l51:	k++;
304     l=0;
305   l52:	if (icharge[iseq[i]] < -3 && l > 2) goto l55;
306   l53:	iph[i]=k;
307     l++;
308     goto l60;
309   l55:	if (l>5) goto l51;
310     l1 = l-1;
311     for (j=0; j<l1; j++) iph[i-j]=0;
312     k--;
313     goto l51;
314   l57:	if (ipr==0) goto l60;
315     ipr = 0;
316     if (l>5) goto l60;
317     l1 = l-1;
318     for (j=0; j<l1; j++) iph[i-j]=0;
319     k--;
320   l60:	continue;
321   }
322 
323   /*	Convert to annotation symbols	*/
324 
325   ipr = 0;
326   iph[n0]=0;
327   for (i=0; i<n0; i++) {
328     if (iph[i]==0) cph[i]=' ';
329     else if (iph[i]!=iph[i+1]) {cph[i]='>'; ipr=0; iarr[0]++;}
330     else if (iph[i]!=0)
331       if (ipr==0) {cph[i]='<'; ipr=1; iarr[0]++;}
332       else {cph[i]='-'; iarr[0]++;}
333   }
334   for (i=0; i<n0; i++)
335     if (ips[i]==0) cps[i]=' ';
336     else {cps[i]='E'; iarr[1]++;}
337 
338 }
339 
340 void
makemap(char * input,int * map,int n)341 makemap(char *input, int *map, int n) {
342   int i;
343 
344   for (i=0; i<n; i++) map[aascii[input[i]]]=i;
345 }
346