1 /* fromgb.c convert from Genbank (and IBI gel reader) format */
2 /* copyright (C) 1988 William R. Pearson */
3 /* coverts files of the following format:
4
5 LOCUS oD001primer 184 BP ENTERED 1/28/86
6 ORIGIN genomic between exons 3a and 3b of dros MHC gene
7 1 ACAAAAATTA AACATAACCA ATCGAACgAA TCCGACACAC CGAACGAAAC TGATATACAG
8 61 ACACGACTTT gGAAAGATCT GCTCCAGCAA GTGACCCCCC GACTACGAAA AAGCCGTGGA
9 121 TATGTCCACT TGACATACTT ACGTGCTCTG TGCTCATACT GGCGGTACTA CACAGCTGAT
10 181 CTAC
11 //
12
13 to standard Pearson FASTA format:
14
15 >oD001primer genomic between exons 3a and 3b of dros MHC gene
16 ACAAAAATTA AACATAACCA ATCGAACgAA TCCGACACAC CGAACGAAAC TGATATACAG
17 ACACGACTTT gGAAAGATCT GCTCCAGCAA GTGACCCCCC GACTACGAAA AAGCCGTGGA
18 TATGTCCACT TGACATACTT ACGTGCTCTG TGCTCATACT GGCGGTACTA CACAGCTGAT
19 CTAC
20
21 */
22
23 /* check for arguments on the command line, write a file that has
24 the same prefix but the 'nt' suffix, if no argments on the command
25 line, then prompt for input/output file names */
26
27 /* the program looks for a LOCUS line, then looks for an origin line, then
28 copies starting at column 11 until a // is found */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #define MAXNAME 80
35 #define MAXLINE 128
36
37 char lline[MAXLINE];
38 char filin[MAXNAME], filout[MAXNAME];
39
40 FILE *fin, *fout;
41
main(argc,argv)42 main(argc,argv)
43 int argc; char *argv[];
44 {
45 char *bp;
46 int argi;
47
48 if (argc < 2) {
49 fprintf(stderr," fromgb file1 ...\n");
50 fprintf(stderr," enter name of file to be converted: ");
51 if (fgets(filin,sizeof(filin),stdin)==NULL) exit(0);
52 if ((bp=strchr(filin,'\n'))!=NULL) *bp='\0';
53 fprintf(stderr," enter name for converted file: ");
54 if (fgets(filout,sizeof(filout),stdin)==NULL) exit(0);
55 if ((bp=strchr(filout,'\n'))!=NULL) *bp='\0';
56
57
58 l1: if ((fin=fopen(filin,"r"))==NULL) {
59 fprintf(stderr," cannot open input file: %s\n",filin);
60 fprintf(stderr," enter name of file to be converted: ");
61 if (fgets(filin,sizeof(filin),stdin)==NULL) exit(0);
62 if ((bp=strchr(filin,'\n'))!=NULL) *bp='\0';
63 goto l1;
64 }
65
66 l2: if (strcmp(filin,filout)==0) {
67 fprintf(stderr,
68 " your input and output file names are identical\n");
69 fprintf(stderr," choose a new output file name: ");
70 if (fgets(filout,sizeof(filout),stdin)==NULL) exit(0);
71 if ((bp=strchr(filout,'\n'))!=NULL) *bp='\0';
72 goto l2;
73 }
74
75
76 l3: if ((fout=fopen(filout,"w"))==NULL) {
77 fprintf(stderr," cannot open output file: %s",filout);
78 fprintf(stderr," enter name for converted file: ");
79 if (fgets(filout,sizeof(filout),stdin)==NULL) exit(0);
80 if ((bp=strchr(filout,'\n'))!=NULL) *bp='\0';
81 goto l3;
82 }
83
84 convert();
85 }
86 else /* file names are on the command line */
87
88 for (argi=1; argi<argc; argi++) {
89 strncpy(filin,argv[argi],sizeof(filin));
90 newname(filout,filin,sizeof(filout));
91 if ((fin=fopen(filin,"r"))==NULL) {
92 fprintf(stderr," cannot open: %s - skipping\n",filin);
93 continue;
94 }
95 if ((fout=fopen(filout,"w"))==NULL) {
96 fprintf(stderr," cannot open: %s - skipping\n",filout);
97 fclose(fin);
98 continue;
99 }
100 convert();
101 fclose(fin);
102 fclose(fout);
103 }
104 }
105
newname(new,old,size)106 newname(new,old,size) /* take sequence.dat, make sequence.nt */
107 char *new, *old; int size;
108 {
109 char *strrchr(), *bp;
110
111 strncpy(new,old,size-3);
112 new[size-3]='\0';
113 if ((bp=strrchr(new,'.'))!=NULL) *bp=0;
114 strcat(new,".nt");
115 }
116
convert()117 convert() /* convert genbank file to FASTA type */
118 {
119 char locus[MAXNAME];
120 char def[MAXLINE];
121 char *strchr(), *bp;
122 int dflag;
123
124 /* first look for LOCUS, then for ORIGIN */
125
126 while (fgets(lline,sizeof(lline),fin)!=NULL)
127 if (strncmp(lline,"LOCUS",5)==0) { /* we have an entry */
128 if ((bp=strchr(&lline[12],' '))!=NULL) *bp='\0';
129 strncpy(locus,&lline[12],sizeof(locus));
130 locus[MAXNAME-1]='\0';
131 while (fgets(lline,sizeof(lline),fin)!=NULL &&
132 strncmp(lline,"ORIGIN",6)!=0 &&
133 (dflag=strncmp(lline,"DEFINITION",10))!=0) ;
134 if (feof(fin)) break;
135 if ((bp=strchr(&lline[12],'\n'))!=NULL) *bp='\0';
136 fprintf(fout,">%s %s\n",locus,&lline[12]);
137 if (dflag==0) { /* we have a definition line */
138 while (fgets(lline,sizeof(lline),fin)!=NULL &&
139 strncmp(lline,"ORIGIN",6)!=0) ;
140 if (feof(fin)) break;
141 }
142
143 while (fgets(lline,sizeof(lline),fin)!=NULL &&
144 strncmp(lline,"//",2)!=0) {
145 fprintf(fout,&lline[10]);
146 }
147 if (feof(fin)) break;
148 }
149 }
150