1 /* aacomp.c calculate the molecular wt and aa composition
2 of a protein sequence
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 unsigned char *aa0;
10 int n0;
11 #define MAXSEQ 80000
12
13 int nnt=17;
14 char nt[]={"\0ACGTURYMWSKDHVBNX"};
15
16 FILE *ntfd;
17 char fname[120];
18
main(argc,argv)19 main(argc,argv)
20 int argc; char **argv;
21 {
22 int ia, i;
23 char libstr[120];
24
25 if (argc>1) strncpy(fname,argv[1],sizeof(fname));
26 else {
27 fprintf(stderr," usage - revcomp filename\n");
28 exit(1);
29 }
30
31 if ((aa0=(unsigned char *)calloc((size_t)MAXSEQ,sizeof(char)))==NULL) {
32 printf(" cannot allocate %d array\n",MAXSEQ);
33 exit(1);
34 }
35
36 initmat(nt,nnt);
37
38 if (strlen(fname)>0) {
39 if ((ntfd=fopen(fname,"r"))==NULL) {
40 printf(" cannot open %s\n",fname);
41 exit(1);
42 }
43 }
44
45 else ntfd = stdin;
46
47 if ((n0=fgetseq(aa0,MAXSEQ-1,libstr,ntfd))<=0) exit(0);
48
49 revcomp(aa0,n0);
50
51 printf(">-%s\n",libstr);
52 for (i=0; i<n0; i++) {
53 fputc(nt[aa0[i]],stdout);
54 if (i%60==59) fputc('\n',stdout);
55 }
56 if ((n0-1)%60 != 59) fputc('\n',stdout);
57 }
58
59 #define AAMASK 127
60 int nascii[128];
61
initmat(aa,naa)62 initmat(aa,naa)
63 char *aa; int naa;
64 {
65 int i, iaa;
66
67 /* clear out nascii */
68 for (i=0; i<=AAMASK; i++) nascii[i]=0;
69
70 /* set end of line stop */
71 nascii[0]=nascii['\r']=nascii['\n']= -1;
72
73 /* initialize nascii */
74 for (iaa=0; iaa<=naa; iaa++) {
75 nascii[aa[iaa]]=iaa;
76 if (nascii[aa[iaa]]>0 && aa[iaa]>='A' && aa[iaa]<='Z')
77 nascii[aa[iaa]-'A'+'a']=nascii[aa[iaa]];
78 }
79 nascii['U'] = nascii['u'] = nascii['T'];
80 }
81
fgetseq(unsigned char * seq,int maxs,char * libstr,FILE * fptr)82 fgetseq(unsigned char *seq, int maxs,char *libstr, FILE *fptr)
83 {
84 char line[512],*bp;
85 int i, n;
86 int ic;
87
88 i=0;
89 n=0;
90 while(fgets(line,sizeof(line),fptr)!=0) {
91 if (line[0]!='>'&& line[0]!=';')
92 for (i=0; (n<maxs)&&((ic=nascii[line[i]&AAMASK])>=0); i++) {
93 if (ic>0) seq[n++]= ic;
94 }
95 else strncpy(libstr,line+1,120);
96 }
97 if (n==maxs) printf(" sequence may be truncated\n %d %d",n,maxs);
98
99 if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
100
101 fclose(fptr);
102
103 return n;
104 }
105
106
revcomp(unsigned char * seq,int n)107 revcomp(unsigned char *seq, int n)
108 {
109 unsigned char tmp;
110 int i, ni;
111
112 for (i=0; i< n; i++) {
113 if (nt[seq[i]]=='A') seq[i] = nascii['T'];
114 else if (nt[seq[i]]=='C') seq[i] = nascii['G'];
115 else if (nt[seq[i]]=='G') seq[i] = nascii['C'];
116 else if (nt[seq[i]]=='T') seq[i] = nascii['A'];
117 else if (nt[seq[i]]=='R') seq[i] = nascii['Y'];
118 else if (nt[seq[i]]=='Y') seq[i] = nascii['R'];
119 else if (nt[seq[i]]=='M') seq[i] = nascii['K'];
120 else if (nt[seq[i]]=='K') seq[i] = nascii['M'];
121 else if (nt[seq[i]]=='D') seq[i] = nascii['H'];
122 else if (nt[seq[i]]=='H') seq[i] = nascii['D'];
123 else if (nt[seq[i]]=='V') seq[i] = nascii['B'];
124 else if (nt[seq[i]]=='B') seq[i] = nascii['V'];
125 }
126
127 for (i=0, ni = n-1; i< n/2; i++,ni--) {
128 tmp = seq[i];
129 seq[i] = seq[ni];
130 seq[ni] = tmp;
131 }
132 }
133