1 /* chofas.c an adaptation of Kanehisa's fortran program
2 */
3
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #include "upam.gbl"
9 #define XTERNAL
10 #include "uascii.gbl"
11
12 int getseq(char *filen, char *seq, int maxs, int *dnaseq);
13 int gettitle(char *filen, char * title, int len);
14 void makemap(char *input, int *map, int n);
15 void predi(char *iseq, int n);
16
17 char *refstr="\nPlease cite:\n Chou and Fasman (1974) Biochem., 13:222-245\n";
18 char *verstr="version 2.0u66 September 1998";
19 char *progstr="CHOFAS";
20
21 char *iprompt0=" CHOFAS predicts protein secondary structure\n";
22 char *iprompt1=" protein sequence name: ";
23
24 #ifdef __MWERKS__
25 #include <Types.h>
26 #include <StandardFile.h>
27 SFReply freply;
28 Point wpos;
29 int tval;
30 char prompt[256];
31 #include <sioux.h>
32 #define getenv mgetenv
33 short glvRef,anvRef, sqvRef, ouvRef, q0vRef, q1vRef;
34 #define IntroDID 400 /* LFASTA */
35 #endif
36
37 char amino[]={'A','R','N','D','C','Q','E','G','H','I','L','K','M',
38 'F','P','S','T','W','Y','V','X',' ',' '};
39
40 char charge[]={' ','+',' ','-',' ',' ','-',' ','.',' ',' ','+',' ',
41 ' ',' ',' ',' ',' ',' ',' ',' '};
42
43 float hydro[]={0.5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.5,1.8,1.8,0.0,1.3,
44 2.5,0.0,0.0,0.4,3.4,2.3,1.5,0.0};
45
46 int amap[23];
47 int dnaseq= -1;
48
49 #define MAXSEQ 1000
50 char iseq[MAXSEQ];
51 int n0;
52 long sq0off=1;
53 #define MAXT 60
54 char title[MAXT];
55
56 char cph[MAXSEQ], cps[MAXSEQ], cpt[MAXSEQ];
57 int iph[MAXSEQ], ips[MAXSEQ];
58 float ph[MAXSEQ], ps[MAXSEQ], hyd[MAXSEQ], smd[MAXSEQ];
59 int iarr[3];
60
61 int
main(int argc,char * argv[])62 main(int argc, char *argv[]) {
63
64 float ya, yb, yc, da, dab, dbc, db, fn0;
65 int i, l0, l1;
66 char fname[256];
67
68 #ifdef __MWERKS__
69 SIOUXSettings.asktosaveonclose=FALSE;
70 SIOUXSettings.showstatusline=FALSE;
71 SIOUXSettings.autocloseonquit=TRUE;
72
73 argc = ccommand(&argv);
74 if (GetResource('DLOG',IntroDID)==(Handle)0 &&
75 OpenResFile("\pFASTA.rsrc")<0) {
76 SysBeep(100);
77 fprintf(stderr," WARNING FASTA.rsrc file could not be found\n");
78 }
79 GetVol((StringPtr)prompt,&ouvRef);
80 wpos.h=50; wpos.v=100;
81 #endif
82
83 if (argc < 2) {
84 #ifndef __MWERKS__
85 fputs(iprompt0,stdout);
86 fprintf(stdout," %s%s\n",verstr,refstr);
87
88 l1: fputs(iprompt1,stdout);
89 fflush(stdout);
90 if (fgets(fname,sizeof(fname),stdin)==NULL) exit(0);
91 if (fname[strlen(fname)-1]=='\n') fname[strlen(fname)-1]='\0';
92 if (fname[0]=='\0') goto l1;
93 #else
94 NIntroDlog(IntroDID,iprompt0,verstr,refstr,"\0");
95
96 l1: FileDlog(iprompt1,&freply);
97 if (freply.good==TRUE) {
98 PtoCstr(freply.fName);
99 strcpy(fname,(char *)freply.fName);
100 q0vRef=freply.vRefNum;
101 SetVol("\p\0",q0vRef);
102 }
103 else exit(0);
104 #endif
105 }
106 else {
107 fputs(iprompt0,stderr);
108 fprintf(stderr," %s%s\n",verstr,refstr);
109 strncpy(fname,argv[1],sizeof(fname));
110 }
111
112
113 sascii = aascii;
114 if ((n0=getseq(fname,iseq,MAXSEQ,&dnaseq))<=0) {
115 fprintf(stderr," could not read %s\n",fname);
116 exit(1);
117 }
118 gettitle(fname,title,MAXT);
119
120 makemap(amino,amap,naa);
121
122 for (i=0; i<n0; i++) {
123 iseq[i]=amap[iseq[i]];
124 hyd[i]=hydro[iseq[i]];
125 }
126 ya=hyd[3];
127 yb=hyd[0];
128 yc=hyd[1];
129 dbc=yb-yc+ya-yc+ya-hyd[4];
130 db=(dbc+dbc+ya+yb+yb)/3.0-yc;
131 dbc=dbc/2.0;
132 for (i=2; i<n0; i++) {
133 ya=yb; yb=yc; yc=hyd[i];
134 da=db; db=(ya-yb)-(yb-yc);
135 dab=dbc; dbc=da-db;
136 smd[i-2]=ya-(dab-dbc)*6.0/70.0;
137 }
138
139 da=(dab-dbc)/35.0;
140 smd[n0-1]=yb+da+da;
141 smd[n0]=yc-da/2.0;
142
143 for (i=0; i<n0; i++) if (smd[i]<0.0) {cpt[i]='T'; iarr[2]++;}
144 else cpt[i]=' ';
145 predi(iseq, n0);
146
147 printf(" Chou-Fasman plot of %s, %3d aa;\n",fname,n0);
148 printf("%-s\n\n",title);
149
150 l1 = n0/60 + 1;
151 for (l0=0; l0<l1; l0++) {
152 printf(" ");
153 for (i=l0*60; i<n0 && i<(l0+1)*60; i++)
154 printf("%c",(i%10 == 9)?'.':' ');
155 printf("\n ");
156 for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",amino[iseq[i]]);
157 printf("\n helix ");
158 for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",cph[i]);
159 printf("\n sheet ");
160 for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",cps[i]);
161 printf("\n turns ");
162 for (i=l0*60; i<n0 && i<(l0+1)*60; i++) printf("%c",cpt[i]);
163 printf("\n\n");
164 }
165 printf(" Residue totals: H:%3d E:%3d T:%3d\n",
166 iarr[0],iarr[1],iarr[2]);
167 fn0 = (float)(n0)/100.0;
168 printf(" percent: H: %4.1f E: %4.1f T: %4.1f\n\n",
169 (float)iarr[0]/fn0,(float)iarr[1]/fn0,(float)iarr[2]/fn0);
170 exit(0);
171 }
172
173 int mh[MAXSEQ],ms[MAXSEQ];
174
175 struct ps {
176 float helix;
177 int nh;
178 int lh;
179 float sheet;
180 int ns;
181 int ls;
182 } parr[]= {
183 1.42, 2, 1, 0.83, 0, 0, /* A */
184 0.98, 0, 0, 0.93, 0, 0, /* R */
185 0.67, 0,-2, 0.89, 0, 0, /* N */
186 1.01, 1, 0, 0.54, 0,-2, /* D */
187 0.70, 0, 0, 1.19, 1, 1, /* C */
188 1.11, 2, 1, 1.10, 1, 1, /* Q */
189 1.51, 2, 1, 0.37, 0,-2, /* E */
190 0.57, 0,-2, 0.75, 0,-2, /* G */
191 1.00, 1, 0, 0.87, 0, 0, /* H */
192 1.08, 2, 1, 1.60, 1, 1, /* I */
193 1.21, 2, 1, 1.30, 1, 1, /* L */
194 1.16, 2, 1, 0.74, 0,-2, /* K */
195 1.45, 2, 1, 1.05, 1, 1, /* M */
196 1.13, 2, 1, 1.38, 1, 1, /* F */
197 0.57, 0,-2, 0.55, 0,-2, /* P */
198 0.77, 0, 0, 0.75, 0,-2, /* S */
199 0.83, 0, 0, 1.19, 1, 1, /* T */
200 1.08, 2, 1, 1.37, 1, 1, /* W */
201 0.69, 0,-2, 1.47, 1, 1, /* Y */
202 1.06, 2, 1, 1.70, 1, 1, /* V */
203 1.00, 0, 0, 1.00, 0, 0};/* X */
204
205 int icharge[]={0,1,0,-1,0,0,-2,0,1,0,0,1,0,0,-3,0,0,0,0,0,0};
206
207 void
predi(char * iseq,int n0)208 predi(char *iseq, int n0) {
209 int i, i1, i2, j, k, l, l1;
210 int nch, ncs,n3, ipr;
211
212 /* helix nucleation ... sum of .nh >= 8 out of six residues */
213
214 for (i=0; i<n0; i++) {iph[i]=0; ips[i]=0;}
215 iseq[n0]=7;
216
217 nch = 0;
218 for (i=0; i<6; i++) nch += parr[iseq[i]].nh;
219
220 for (i=0; i<n0-5; i++) {
221 if (nch >= 8) {
222 for (i1=i; i1<n0 && parr[iseq[i1]].nh == 0; i1++);
223 for (i2 = i+5; i2>=0 && parr[iseq[i2]].nh == 0; i2--);
224 for (k=i1; k<i2; k++) iph[k]=1;
225 }
226 nch += parr[iseq[i+6]].nh - parr[iseq[i]].nh;
227 }
228
229 /* ---Sheet nucleation ... sum of NS .GE. 3 out of five residues--- */
230 ncs = 0;
231 for (i=0; i<5; i++) ncs += parr[iseq[i]].ns;
232
233 for (i=0; i<n0-5; i++) {
234 if (ncs >= 3) {
235 for (i1=i; i1<n0 && (parr[iseq[i1]].ns == 0); i1++);
236 for (i2=i+4; i2>=0 && (parr[iseq[i2]].ns == 0); i2--);
237 for (k=i1; k<i2; k++) ips[k]=1;
238 }
239 ncs += parr[iseq[i+6]].ns - parr[iseq[i]].ns;
240 }
241
242 /* ---Alpha and beta potentials--- */
243
244 n3 = n0 - 3;
245 for (i=0; i<n3; i++) {
246 ph[i] = ps[i] = 0.0;
247 mh[i] = ms[i] = 0;
248 for (j=0; j<4; j++) {
249 ph[i] += parr[iseq[i+j]].helix;
250 ps[i] += parr[iseq[i+j]].sheet;
251 mh[i] += parr[iseq[i+j]].lh;
252 ms[i] += parr[iseq[i+j]].ls;
253 }
254 ph[i] *= 0.25;
255 ps[i] *= 0.25;
256 }
257
258 /* Helix propagation and termination */
259
260 for (i=0; i<n3; i++) {
261 if (iph[i]==0) continue;
262 i1 = i+1;
263 for (; i<n3; i++) {
264 if (iph[i]!=0) continue;
265 if (ph[i] < 1.0 && mh[i]<=0) break;
266 iph[i]=1;
267 }
268 i2 = i;
269 for (i=i1-1; i>=4; i--) {
270 if (iph[i]!=0) continue;
271 if (ph[i-4] < 1.0 && mh[i-4] < 0) break;
272 iph[i] = 1;
273 }
274 i = i2-1;
275 }
276
277 /* ---Sheet propagation and termination--- */
278
279 for(i=0; i<n3; i++) {
280 if (ips[i]==0) continue;
281 for (i1 = i; i<n3; i++) {
282 if (ips[i]!=0) continue;
283 if (ps[i]<1.0 && ms[i]<= 0.0) break;
284 ips[i]=1;
285 }
286 i2 = i;
287 for (i = i1-1; i>=3; i--) {
288 if (ips[i]!=0) continue;
289 if (ps[i-4]<1.0 && ms[i-4]<=0) break;
290 ips[i]=1;
291 }
292 i = i2-1;
293 }
294
295 /* helix boundaries */
296
297 k=0;
298 ipr=0;
299 for (i=0; i<n0; i++) {
300 if (iph[i]==0) goto l57;
301 if (ipr!=0) goto l52;
302 ipr = 1;
303 l51: k++;
304 l=0;
305 l52: if (icharge[iseq[i]] < -3 && l > 2) goto l55;
306 l53: iph[i]=k;
307 l++;
308 goto l60;
309 l55: if (l>5) goto l51;
310 l1 = l-1;
311 for (j=0; j<l1; j++) iph[i-j]=0;
312 k--;
313 goto l51;
314 l57: if (ipr==0) goto l60;
315 ipr = 0;
316 if (l>5) goto l60;
317 l1 = l-1;
318 for (j=0; j<l1; j++) iph[i-j]=0;
319 k--;
320 l60: continue;
321 }
322
323 /* Convert to annotation symbols */
324
325 ipr = 0;
326 iph[n0]=0;
327 for (i=0; i<n0; i++) {
328 if (iph[i]==0) cph[i]=' ';
329 else if (iph[i]!=iph[i+1]) {cph[i]='>'; ipr=0; iarr[0]++;}
330 else if (iph[i]!=0)
331 if (ipr==0) {cph[i]='<'; ipr=1; iarr[0]++;}
332 else {cph[i]='-'; iarr[0]++;}
333 }
334 for (i=0; i<n0; i++)
335 if (ips[i]==0) cps[i]=' ';
336 else {cps[i]='E'; iarr[1]++;}
337
338 }
339
340 void
makemap(char * input,int * map,int n)341 makemap(char *input, int *map, int n) {
342 int i;
343
344 for (i=0; i<n; i++) map[aascii[input[i]]]=i;
345 }
346