1 /*	fromgb.c	convert from Genbank (and IBI gel reader) format */
2 /*	copyright (C) 1988 William R. Pearson */
3 /* 	coverts files of the following format:
4 
5 LOCUS       oD001primer   184 BP                      ENTERED  1/28/86
6 ORIGIN      genomic between exons 3a and 3b of dros MHC gene
7         1 ACAAAAATTA AACATAACCA ATCGAACgAA TCCGACACAC CGAACGAAAC TGATATACAG
8        61 ACACGACTTT gGAAAGATCT GCTCCAGCAA GTGACCCCCC GACTACGAAA AAGCCGTGGA
9       121 TATGTCCACT TGACATACTT ACGTGCTCTG TGCTCATACT GGCGGTACTA CACAGCTGAT
10       181 CTAC
11 //
12 
13 to standard Pearson FASTA format:
14 
15 >oD001primer genomic between exons 3a and 3b of dros MHC gene
16 ACAAAAATTA AACATAACCA ATCGAACgAA TCCGACACAC CGAACGAAAC TGATATACAG
17 ACACGACTTT gGAAAGATCT GCTCCAGCAA GTGACCCCCC GACTACGAAA AAGCCGTGGA
18 TATGTCCACT TGACATACTT ACGTGCTCTG TGCTCATACT GGCGGTACTA CACAGCTGAT
19 CTAC
20 
21 */
22 
23 /* check for arguments on the command line, write a file that has
24 the same prefix but the 'nt' suffix, if no argments on the command
25 line, then prompt for input/output file names */
26 
27 /* the program looks for a LOCUS line, then looks for an origin line, then
28 copies starting at column 11 until a // is found */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #define MAXNAME 80
35 #define MAXLINE 128
36 
37 char lline[MAXLINE];
38 char filin[MAXNAME], filout[MAXNAME];
39 
40 FILE *fin, *fout;
41 
main(argc,argv)42 main(argc,argv)
43 	int argc; char *argv[];
44 {
45 	char *bp;
46 	int argi;
47 
48 	if (argc < 2) {
49 		fprintf(stderr," fromgb file1 ...\n");
50 		fprintf(stderr," enter name of file to be converted: ");
51 		if (fgets(filin,sizeof(filin),stdin)==NULL) exit(0);
52 		if ((bp=strchr(filin,'\n'))!=NULL) *bp='\0';
53 		fprintf(stderr," enter name for converted file: ");
54 		if (fgets(filout,sizeof(filout),stdin)==NULL) exit(0);
55 		if ((bp=strchr(filout,'\n'))!=NULL) *bp='\0';
56 
57 
58 l1:		if ((fin=fopen(filin,"r"))==NULL) {
59 		    fprintf(stderr," cannot open input file: %s\n",filin);
60 		    fprintf(stderr," enter name of file to be converted: ");
61 		    if (fgets(filin,sizeof(filin),stdin)==NULL) exit(0);
62 		    if ((bp=strchr(filin,'\n'))!=NULL) *bp='\0';
63 		    goto l1;
64 		    }
65 
66 l2:		if (strcmp(filin,filout)==0) {
67 		    fprintf(stderr,
68 			 " your input and output file names are identical\n");
69 		    fprintf(stderr," choose a new output file name: ");
70 		    if (fgets(filout,sizeof(filout),stdin)==NULL) exit(0);
71 		    if ((bp=strchr(filout,'\n'))!=NULL) *bp='\0';
72 		    goto l2;
73 		    }
74 
75 
76 l3:		if ((fout=fopen(filout,"w"))==NULL) {
77 		    fprintf(stderr," cannot open output file: %s",filout);
78 		    fprintf(stderr," enter name for converted file: ");
79 		    if (fgets(filout,sizeof(filout),stdin)==NULL) exit(0);
80 		    if ((bp=strchr(filout,'\n'))!=NULL) *bp='\0';
81 		    goto l3;
82 		    }
83 
84 		convert();
85 		}
86 	else 		/* file names are on the command line */
87 
88 		for (argi=1; argi<argc; argi++) {
89 		    strncpy(filin,argv[argi],sizeof(filin));
90 		    newname(filout,filin,sizeof(filout));
91 		    if ((fin=fopen(filin,"r"))==NULL) {
92 			fprintf(stderr," cannot open: %s - skipping\n",filin);
93 			continue;
94 			}
95 		    if ((fout=fopen(filout,"w"))==NULL) {
96 		       fprintf(stderr," cannot open: %s - skipping\n",filout);
97 		        fclose(fin);
98 			continue;
99 			}
100 		    convert();
101 		    fclose(fin);
102 		    fclose(fout);
103 		    }
104     }
105 
newname(new,old,size)106 newname(new,old,size)		/* take sequence.dat, make sequence.nt */
107 	char *new, *old; int size;
108 {
109 	char *strrchr(), *bp;
110 
111 	strncpy(new,old,size-3);
112 	new[size-3]='\0';
113 	if ((bp=strrchr(new,'.'))!=NULL) *bp=0;
114 	strcat(new,".nt");
115 	}
116 
convert()117 convert()	/* convert genbank file to FASTA type */
118 {
119 	char locus[MAXNAME];
120 	char def[MAXLINE];
121 	char *strchr(), *bp;
122 	int dflag;
123 
124 /* first look for LOCUS, then for ORIGIN */
125 
126 	while (fgets(lline,sizeof(lline),fin)!=NULL)
127 	    if (strncmp(lline,"LOCUS",5)==0) {	/* we have an entry */
128 		if ((bp=strchr(&lline[12],' '))!=NULL) *bp='\0';
129 		strncpy(locus,&lline[12],sizeof(locus));
130 		locus[MAXNAME-1]='\0';
131 		while (fgets(lline,sizeof(lline),fin)!=NULL &&
132 		    strncmp(lline,"ORIGIN",6)!=0 &&
133 		    (dflag=strncmp(lline,"DEFINITION",10))!=0) ;
134 		if (feof(fin)) break;
135 		if ((bp=strchr(&lline[12],'\n'))!=NULL) *bp='\0';
136 		fprintf(fout,">%s %s\n",locus,&lline[12]);
137 		if (dflag==0) {		/* we have a definition line */
138 		  while (fgets(lline,sizeof(lline),fin)!=NULL &&
139 		    strncmp(lline,"ORIGIN",6)!=0) ;
140 		  if (feof(fin)) break;
141 		  }
142 
143 		while (fgets(lline,sizeof(lline),fin)!=NULL &&
144 		    strncmp(lline,"//",2)!=0) {
145 		    fprintf(fout,&lline[10]);
146 		    }
147 		if (feof(fin)) break;
148 		}
149 	}
150