1 /*****************************************************************************
2 *
3 *   asn2fast.c
4 *
5 *****************************************************************************/
6 #include <tofasta.h>
7 #include <subutil.h>
8 #include <sqnutils.h>
9 #include <accid1.h>
10 #include <lsqfetch.h>
11 
12 #define NUMARG 18
13 Args myargs[NUMARG] = {
14 	{"Filename for asn.1 input","stdin",NULL,NULL,TRUE,'a',ARG_FILE_IN,0.0,0,NULL},
15 	{"Input is a Seq-entry","F", NULL ,NULL ,TRUE,'e',ARG_BOOLEAN,0.0,0,NULL},
16 	{"Input asnfile in binary mode","F",NULL,NULL,TRUE,'b',ARG_BOOLEAN,0.0,0,NULL},
17 	{"Output Protein Filename","fasta.aa", NULL,NULL,TRUE,'p',ARG_FILE_OUT,0.0,0,NULL},
18 	{"Output DNA Filename","fasta.na", NULL,NULL,TRUE,'n',ARG_FILE_OUT,0.0,0,NULL},
19 	{"Log errors to file named:",NULL,NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
20 	{"Combine segmented or delta sequences","F",NULL,NULL,TRUE,'c',ARG_BOOLEAN,0.0,0,NULL},
21 	{"Produce Protein File","T",NULL,NULL,TRUE,'x',ARG_BOOLEAN,0.0,0,NULL},
22 	{"Produce DNA File","T",NULL,NULL,TRUE,'d',ARG_BOOLEAN,0.0,0,NULL},
23 	{"Limit to GenBank","F",NULL,NULL,TRUE,'g',ARG_BOOLEAN,0.0,0,NULL},
24 	{"Instantiate virtual sequences","F",NULL,NULL,TRUE,'v',ARG_BOOLEAN,0.0,0,NULL},
25 	{"Input is a Seq-submit","F", NULL ,NULL ,TRUE,'s',ARG_BOOLEAN,0.0,0,NULL},
26 	{"Produce output file of Quality Scores (DNA sequences only)","F",NULL,NULL,TRUE,'q',ARG_BOOLEAN,0.0,0,NULL},
27 	{"Output Filename for Quality Scores (DNA sequences only)","scores.ql", NULL,NULL,TRUE,'y',ARG_FILE_OUT,0.0,0,NULL},
28 	{"Far Genomic Contig function for Quality Scores","F",NULL,NULL,TRUE,'f',ARG_BOOLEAN,0.0,0,NULL},
29 	{"Remote fetching", "F", NULL, NULL, FALSE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
30 	{"Local fetching", "F", NULL, NULL, FALSE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
31 	{"Print Quality Score Gap as -1, false prints as 0", "F", NULL, NULL, FALSE, 'z', ARG_BOOLEAN, 0.0, 0, NULL},
32 };
33 
PrintQualProc(CharPtr buf,Uint4 buflen,Pointer userdata)34 static void PrintQualProc (CharPtr buf, Uint4 buflen, Pointer userdata)
35 
36 {
37   FILE  *fp;
38 
39   fp = (FILE*) userdata;
40   fprintf (fp, "%s", buf);
41 }
42 
PrintQualScores(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)43 static void PrintQualScores (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
44 
45 {
46 	BioseqPtr  bsp;
47 	FILE       *fp;
48 
49 	if (IS_Bioseq (sep)) {
50 		bsp = (BioseqPtr) sep->data.ptrvalue;
51 
52 		/* WARNING: we're assuming here that asn2fast's quality-score
53 		   output is DNA-centric, thus protein bioseqs can be ignored
54 		   in the PrintQualScores callback. --MLC, 5/2000 */
55 
56 		if (ISA_aa(bsp->mol))
57 		  return;
58 
59 		fp = (FILE*) data;
60 		if (myargs [17].intvalue) {
61 		  PrintQualityScoresToBuffer (bsp, FALSE, fp, PrintQualProc);
62 		} else {
63 		  PrintQualityScoresToBuffer (bsp, TRUE, fp, PrintQualProc);
64 		}
65 	}
66 }
67 
PrintFarQualScores(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)68 static void PrintFarQualScores (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
69 
70 {
71 	BioseqPtr  bsp;
72 	FILE       *fp;
73 
74 	if (IS_Bioseq (sep)) {
75 		bsp = (BioseqPtr) sep->data.ptrvalue;
76 
77 		/* WARNING: we're assuming here that asn2fast's quality-score
78 		   output is DNA-centric, thus protein bioseqs can be ignored
79 		   in the PrintQualScores callback. --MLC, 5/2000 */
80 
81 		if (ISA_aa(bsp->mol))
82 		  return;
83 
84 		fp = (FILE*) data;
85 		if (myargs [17].intvalue) {
86 		  PrintQualityScoresForContig (bsp, FALSE, fp);
87 		} else {
88 		  PrintQualityScoresForContig (bsp, TRUE, fp);
89 		}
90 	}
91 }
92 
93 
94 Boolean CheckIsGenBank(SeqEntryPtr sep);
95 
Main(void)96 Int2 Main(void)
97 {
98 	AsnIoPtr aip;
99 	FILE * aa = NULL, * na = NULL, * ql = NULL;
100 	SeqEntryPtr sep;
101 	SeqSubmitPtr ssp;
102 	AsnTypePtr atp, atp2;
103 	AsnModulePtr amp;
104 	Uint1 group_segs = 0;
105 	Boolean limit_to_genbank,
106 		make_dna,
107 		make_protein,
108 		make_quality,
109 		far_quality,
110 		do_it;
111 
112 
113 					/* check command line arguments */
114 
115 	if ( ! GetArgs("asn2fast",NUMARG, myargs))
116 		return 1;
117 
118 					/* load the sequence alphabets  */
119 					/* (and sequence parse trees)   */
120 	if (! SeqEntryLoad())
121 	{
122 		ErrShow();
123 		return 1;
124 	}
125 				    /* get pointer to all loaded ASN.1 modules */
126 	amp = AsnAllModPtr();
127 	if (amp == NULL)
128 	{
129 		ErrShow();
130 		return 1;
131 	}
132 
133 	if (myargs[11].intvalue) {
134 		if (! SubmitAsnLoad())
135 			Message(MSG_FATAL, "Unable to load parse trees.");
136 
137 		atp2 = AsnFind("Seq-submit");
138 		if (atp2 == NULL)
139 			Message(MSG_FATAL, "Unable to find Seq-submit");
140 		atp = AsnFind("Seq-submit");
141 		if (atp == NULL)
142 			Message(MSG_FATAL, "Unable to find Seq-submit");
143 
144 	} else {
145 		atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
146 		if (atp == NULL)
147 		{
148 			ErrShow();
149 			return 1;
150 		}
151 
152 		atp2 = AsnFind("Bioseq-set.seq-set.E");
153 		if (atp2 == NULL)
154 		{
155 			ErrShow();
156 			return 1;
157 		}
158 	}
159 
160 	make_protein = (Boolean)(myargs[7].intvalue);
161 	make_dna = (Boolean)(myargs[8].intvalue);
162 	make_quality = (Boolean)(myargs[12].intvalue);
163 	far_quality = (Boolean)(myargs[14].intvalue);
164 
165 					/* open the ASN.1 input file in the right mode */
166 
167 	if ((aip = AsnIoOpen (myargs[0].strvalue, myargs[2].intvalue?"rb":"r"))
168           == NULL)
169 	{
170 		ErrShow();
171 		return 1;
172 	}
173 
174 				  				/* open the output file */
175 
176 	if ((myargs[3].strvalue != NULL) && (make_protein))
177 	{
178 		if ( (aa = FileOpen (myargs[3].strvalue, "w")) == NULL)
179 		{
180 			ErrShow();
181 			return 1;
182 		}
183 	}
184 
185 	if ((myargs[4].strvalue != NULL) && (make_dna))
186 	{
187 		if ( (na = FileOpen (myargs[4].strvalue, "w")) == NULL)
188 		{
189 			ErrShow();
190 			return 1;
191 		}
192 	}
193 
194 	if ((myargs[13].strvalue != NULL) && (make_quality))
195 	{
196 		if ( (ql = FileOpen (myargs[13].strvalue, "w")) == NULL)
197 		{
198 			ErrShow();
199 			return 1;
200 		}
201 	}
202 
203                                 /* log errors instead of die */
204     if (myargs[5].strvalue != NULL)
205     {
206         if (! ErrSetLog (myargs[5].strvalue))
207             ErrShow();
208         else
209             ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
210    }
211 
212 	if (myargs[6].intvalue)  /* combine segmented seqs */
213 	{
214 		group_segs = 1;
215 		if (myargs[10].intvalue)
216 			group_segs = 3;       /* and instantiate virtuals */
217 	}
218 
219 	limit_to_genbank = (Boolean)(myargs[9].intvalue);
220 
221 	if (myargs [15].intvalue) {
222 		ID1BioseqFetchEnable ("asn2fast", FALSE);
223 	}
224 	if (myargs [16].intvalue) {
225 		LocalSeqFetchInit (FALSE);
226 	}
227 
228 	if ( myargs[1].intvalue)   /* read one Seq-entry */
229 	{
230 
231 		sep = SeqEntryAsnRead(aip, NULL);
232 		do_it = TRUE;
233 		if (limit_to_genbank)
234 			do_it = CheckIsGenBank(sep);
235 		if (do_it)
236 		{
237 			if (make_protein)
238 				SeqEntrysToFasta(sep, aa, FALSE, group_segs);
239 			if (make_dna)
240 				SeqEntrysToFasta(sep, na, TRUE, group_segs);
241 			if (make_quality) {
242 				if (far_quality) {
243 					SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
244 				} else {
245 					SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
246 				}
247 			}
248 		}
249 		SeqEntryFree(sep);
250 	}
251 	else if ( myargs[11].intvalue)   /* read Seq-submit's */
252 	{
253 		while ((atp = AsnReadId(aip, amp, atp)) != NULL)
254 		{
255 			if (atp == atp2)    /* top level Seq-entry */
256 			{
257 				ssp = SeqSubmitAsnRead(aip, atp);
258 				if (ssp->datatype == 1)
259 				{
260 					sep = (SeqEntryPtr) ssp->data;
261 					do_it = TRUE;
262 					if (limit_to_genbank)
263 						do_it = CheckIsGenBank(sep);
264 					if (do_it)
265 					{
266 						if (make_protein)
267 							SeqEntrysToFasta(sep, aa, FALSE, group_segs);
268 						if (make_dna)
269 							SeqEntrysToFasta(sep, na, TRUE, group_segs);
270 						if (make_quality) {
271 							if (far_quality) {
272 								SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
273 							} else {
274 								SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
275 							}
276 						}
277 					}
278 				}
279 				SeqSubmitFree(ssp);
280 			}
281 			else
282 			{
283 				AsnReadVal(aip, atp, NULL);
284 			}
285 		}
286 	}
287 	else                      /* read Seq-entry's from a Bioseq-set */
288 	{
289 		while ((atp = AsnReadId(aip, amp, atp)) != NULL)
290 		{
291 			if (atp == atp2)    /* top level Seq-entry */
292 			{
293 				sep = SeqEntryAsnRead(aip, atp);
294 				do_it = TRUE;
295 				if (limit_to_genbank)
296 					do_it = CheckIsGenBank(sep);
297 				if (do_it)
298 				{
299 					if (make_protein)
300 						SeqEntrysToFasta(sep, aa, FALSE, group_segs);
301 					if (make_dna)
302 						SeqEntrysToFasta(sep, na, TRUE, group_segs);
303 					if (make_quality) {
304 						if (far_quality) {
305 							SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
306 						} else {
307 							SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
308 						}
309 					}
310 				}
311 				SeqEntryFree(sep);
312 			}
313 			else
314 			{
315 				AsnReadVal(aip, atp, NULL);
316 			}
317 		}
318 	}
319 
320 	AsnIoClose(aip);
321 	if (make_protein)
322 		FileClose(aa);
323 	if (make_dna)
324 		FileClose(na);
325 	if (make_quality)
326 		FileClose (ql);
327 
328 	if (myargs [16].intvalue) {
329 		LocalSeqFetchDisable ();
330 	}
331 	if (myargs [15].intvalue) {
332 		ID1BioseqFetchDisable ();
333 	}
334 
335 	return(0);
336 }
337 
338 void FindGenBank (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
339 
CheckIsGenBank(SeqEntryPtr sep)340 Boolean CheckIsGenBank(SeqEntryPtr sep)
341 {
342 	Boolean retval = FALSE;
343 
344 	SeqEntryExplore(sep, (Pointer)(&retval), FindGenBank);
345 
346 	return retval;
347 }
348 
FindGenBank(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)349 void FindGenBank (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
350 {
351 	BoolPtr ptr;
352 	BioseqPtr bsp;
353 	ValNodePtr vnp;
354 
355 	ptr = (BoolPtr)data;
356 	if (*ptr == TRUE)   /* already know */
357 		return;
358 
359 	if (IS_Bioseq(sep))
360 	{
361 
362 	   bsp = (BioseqPtr)(sep->data.ptrvalue);
363 	                    /* GenBank is a limited view of the world */
364 	   if ( (ISA_na(bsp->mol)) && ( (bsp->repr == Seq_repr_raw) || (bsp->repr == Seq_repr_delta) ) )
365 	   {
366 			for (vnp = bsp->id; vnp != NULL; vnp = vnp->next)
367 			{
368 				switch (vnp->choice)
369 				{
370 					case SEQID_GENBANK:
371 					case SEQID_EMBL:
372 					case SEQID_DDBJ:
373 						*ptr = TRUE;
374 						return;
375 					default:
376 						break;
377 				}
378 			}
379 	   }
380 	}
381 
382 	return;
383 }
384 
385