1 /*	aacomp.c	calculate the molecular wt and aa composition
2 			of a protein sequence
3 */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 
9 unsigned char *aa0;
10 int n0;
11 #define MAXSEQ 80000
12 
13 int nnt=17;
14 char nt[]={"\0ACGTURYMWSKDHVBNX"};
15 
16 FILE *ntfd;
17 char fname[120];
18 
main(argc,argv)19 main(argc,argv)
20      int argc; char **argv;
21 {
22   int ia, i;
23   char libstr[120];
24 
25   if (argc>1) strncpy(fname,argv[1],sizeof(fname));
26   else {
27     fprintf(stderr," usage - revcomp filename\n");
28     exit(1);
29   }
30 
31   if ((aa0=(unsigned char *)calloc((size_t)MAXSEQ,sizeof(char)))==NULL) {
32     printf(" cannot allocate %d array\n",MAXSEQ);
33     exit(1);
34   }
35 
36   initmat(nt,nnt);
37 
38   if (strlen(fname)>0) {
39     if ((ntfd=fopen(fname,"r"))==NULL) {
40       printf(" cannot open %s\n",fname);
41       exit(1);
42     }
43   }
44 
45   else ntfd = stdin;
46 
47   if ((n0=fgetseq(aa0,MAXSEQ-1,libstr,ntfd))<=0) exit(0);
48 
49   revcomp(aa0,n0);
50 
51   printf(">-%s\n",libstr);
52   for (i=0; i<n0; i++) {
53     fputc(nt[aa0[i]],stdout);
54     if (i%60==59) fputc('\n',stdout);
55   }
56   if ((n0-1)%60 != 59) fputc('\n',stdout);
57 }
58 
59 #define AAMASK 127
60 int nascii[128];
61 
initmat(aa,naa)62 initmat(aa,naa)
63      char *aa; int naa;
64 {
65   int i, iaa;
66 
67   /*	clear out nascii	*/
68   for (i=0; i<=AAMASK; i++) nascii[i]=0;
69 
70   /*	set end of line stop	*/
71   nascii[0]=nascii['\r']=nascii['\n']= -1;
72 
73   /* initialize nascii */
74   for (iaa=0; iaa<=naa; iaa++) {
75     nascii[aa[iaa]]=iaa;
76     if (nascii[aa[iaa]]>0 && aa[iaa]>='A' && aa[iaa]<='Z')
77       nascii[aa[iaa]-'A'+'a']=nascii[aa[iaa]];
78   }
79   nascii['U'] = nascii['u'] = nascii['T'];
80 }
81 
fgetseq(unsigned char * seq,int maxs,char * libstr,FILE * fptr)82 fgetseq(unsigned char *seq, int maxs,char *libstr, FILE *fptr)
83 {
84   char line[512],*bp;
85   int i, n;
86   int ic;
87 
88   i=0;
89   n=0;
90   while(fgets(line,sizeof(line),fptr)!=0) {
91     if (line[0]!='>'&& line[0]!=';')
92       for (i=0; (n<maxs)&&((ic=nascii[line[i]&AAMASK])>=0); i++) {
93 	if (ic>0) seq[n++]= ic;
94       }
95     else strncpy(libstr,line+1,120);
96   }
97   if (n==maxs) printf(" sequence may be truncated\n %d %d",n,maxs);
98 
99   if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
100 
101   fclose(fptr);
102 
103   return n;
104 }
105 
106 
revcomp(unsigned char * seq,int n)107 revcomp(unsigned char *seq, int n)
108 {
109   unsigned char tmp;
110   int i, ni;
111 
112   for (i=0; i< n; i++) {
113     if (nt[seq[i]]=='A') seq[i] = nascii['T'];
114     else if (nt[seq[i]]=='C') seq[i] = nascii['G'];
115     else if (nt[seq[i]]=='G') seq[i] = nascii['C'];
116     else if (nt[seq[i]]=='T') seq[i] = nascii['A'];
117     else if (nt[seq[i]]=='R') seq[i] = nascii['Y'];
118     else if (nt[seq[i]]=='Y') seq[i] = nascii['R'];
119     else if (nt[seq[i]]=='M') seq[i] = nascii['K'];
120     else if (nt[seq[i]]=='K') seq[i] = nascii['M'];
121     else if (nt[seq[i]]=='D') seq[i] = nascii['H'];
122     else if (nt[seq[i]]=='H') seq[i] = nascii['D'];
123     else if (nt[seq[i]]=='V') seq[i] = nascii['B'];
124     else if (nt[seq[i]]=='B') seq[i] = nascii['V'];
125   }
126 
127   for (i=0, ni = n-1; i< n/2; i++,ni--) {
128     tmp = seq[i];
129     seq[i] = seq[ni];
130     seq[ni] = tmp;
131   }
132 }
133