1  /*****************************************************************************
2 *
3 *   asn2ff.c
4 *   	convert an ASN.1 entry to flat file format, using the FFPrintArrayPtrs.
5 *
6 *****************************************************************************/
7 #include <accentr.h>
8 #include "asn2ff.h"
9 #include "asn2ffp.h"
10 #include "ffprint.h"
11 #include <subutil.h>
12 #include <objall.h>
13 #include <objcode.h>
14 #include <lsqfetch.h>
15 #include <explore.h>
16 
17 #ifdef ENABLE_ID1
18 #include <accid1.h>
19 #endif
20 
21 FILE *fpl;
22 
23 Args myargs[] = {
24 	{"Filename for asn.1 input","stdin",NULL,NULL,TRUE,'a',ARG_FILE_IN,0.0,0,NULL},
25 	{"Input is a Seq-entry","F", NULL ,NULL ,TRUE,'e',ARG_BOOLEAN,0.0,0,NULL},
26 	{"Input asnfile in binary mode","F",NULL,NULL,TRUE,'b',ARG_BOOLEAN,0.0,0,NULL},
27 	{"Output Filename","stdout", NULL,NULL,TRUE,'o',ARG_FILE_OUT,0.0,0,NULL},
28 	{"Show Sequence?","T", NULL ,NULL ,TRUE,'h',ARG_BOOLEAN,0.0,0,NULL},
29 	{"Log errors to file named:",NULL,NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
30 	{"Output Format?: b for GenBank, p for GenPept, e for EMBL, s for PseudoEMBL, x for   GenBankSelect, z for EMBLPEPT",
31 	"b", NULL,NULL,TRUE,'f',ARG_STRING,0.0,0,NULL},
32 	{"Output mode?: r for release, d for dump, s for Sequin, c for Chromoscope, k for dir-sub-debug, l for dir-sub, e for revise, p for partial report","r", NULL ,NULL ,TRUE,'m',ARG_STRING,0.0,0,NULL},
33 	{"Input is a Seq-submit","F", NULL ,NULL ,TRUE,'s',ARG_BOOLEAN,0.0,0,NULL},
34 	{"Show gi numbers?","F", NULL ,NULL ,TRUE,'g',ARG_BOOLEAN,0.0,0,NULL},
35 	{"Non-Strict gene_binding","T", NULL ,NULL ,TRUE,'n',ARG_BOOLEAN,0.0,0,NULL},
36 	{"Show error messages","T", NULL ,NULL ,TRUE,'v',ARG_BOOLEAN,0.0,0,NULL},
37 	{"Show verbose message text","F", NULL ,NULL ,TRUE,'t',ARG_BOOLEAN,0.0,0,NULL},
38 	{"Use HTML output format?","F", NULL,NULL,TRUE,'w',ARG_BOOLEAN,0.0,0,NULL},
39 	{"Output is one top bioseq only","F", NULL ,NULL ,TRUE,'q',
40 	ARG_BOOLEAN,0.0,0,NULL},
41 	{"Output is one top bioseq only in genome view","F", NULL ,NULL ,TRUE,'G',
42 	ARG_BOOLEAN,0.0,0,NULL},
43 	{"Output is map bioseqs only ","F", NULL ,NULL ,TRUE,'M',
44 	ARG_BOOLEAN,0.0,0,NULL},
45 	{"Output error logfile","stderr", NULL,NULL,TRUE,'r',ARG_FILE_OUT,0.0,0,NULL},
46 	{"Show new gene features?","T",NULL,NULL,TRUE,'p',ARG_BOOLEAN,0.0,0,NULL},
47 	{"New algorithm for orgnames?",
48 	"F",NULL,NULL,TRUE,'z',ARG_BOOLEAN,0.0,0,NULL},
49 	{"Print help format only?",
50 	"F",NULL,NULL,TRUE,'y',ARG_BOOLEAN,0.0,0,NULL},
51 	{"From to show a region", "0", NULL, NULL, TRUE, 'A', ARG_FLOAT, 0.0, 0, NULL},
52 	{"To to show a region", "0", NULL, NULL, TRUE, 'B', ARG_FLOAT, 0.0, 0, NULL},
53 	{"Complex sets (phy-set,mut-set, pop-set)?",
54 	"T",NULL,NULL,TRUE,'k',ARG_BOOLEAN,0.0,0,NULL},
55 	{"Use SeqMgr indexing?","F",NULL,NULL,TRUE,'d',ARG_BOOLEAN,0.0,0,NULL},
56 	{"Use VERSION?","T",NULL,NULL,TRUE,'V',ARG_BOOLEAN,0.0,0,NULL},
57 	{"Show Bankit comments?","F",NULL,NULL,TRUE,'C',ARG_BOOLEAN,0.0,0,NULL},
58 	{"For GenBank Release?","F",NULL,NULL,TRUE,'R',ARG_BOOLEAN,0.0,0,NULL},
59 	{"New LOCUS line format?","T",NULL,NULL,TRUE,'L',ARG_BOOLEAN,0.0,0,NULL}
60 	};
61 
62 
myHook(MsgKey key,ErrSev sev,const char * caption,const char * message)63 static MsgAnswer LIBCALLBACK myHook (MsgKey key, ErrSev sev, const char *caption, const char *message)
64 {
65 	fprintf(fpl, "%s\n", message);
66 	return ANS_OK;
67 }
68 
69 /*static void FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
70 {
71     BioseqPtr PNTR bp;
72     BioseqPtr local_bsp;
73 
74     bp = (BioseqPtr PNTR) data;
75     if (IS_Bioseq(sep))
76     {
77         local_bsp = (BioseqPtr) sep->data.ptrvalue;
78         if (ISA_na(local_bsp->mol))
79           *bp = local_bsp;
80     }
81 }
82 */
CheckForCookedBioseqs(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)83 static void CheckForCookedBioseqs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
84 
85 {
86   BoolPtr    bp;
87   BioseqPtr  bsp;
88 
89   if (sep == NULL) return;
90   if (! IS_Bioseq (sep)) return;
91   bp = (BoolPtr) mydata;
92   if (bp == NULL) return;
93   bsp = (BioseqPtr) sep->data.ptrvalue;
94   if (bsp == NULL) return;
95   if (bsp->repr != Seq_repr_raw && bsp->repr != Seq_repr_seg) {
96     *bp = FALSE;
97   }
98 }
99 
IndexASeqEntry(SeqEntryPtr sep,Boolean useSeqMgrIndexes)100 static void IndexASeqEntry (SeqEntryPtr sep, Boolean useSeqMgrIndexes)
101 
102 {
103   Boolean  allRawOrSeg = TRUE;
104   Uint2    entityID;
105 
106   if (sep == NULL || (! useSeqMgrIndexes)) return;
107   while (sep != NULL) {
108     SeqEntryExplore (sep, (Pointer) (&allRawOrSeg), CheckForCookedBioseqs);
109     if (allRawOrSeg) {
110       entityID = ObjMgrGetEntityIDForChoice (sep);
111       SeqMgrIndexFeatures (entityID, NULL);
112     }
113     sep = sep->next;
114   }
115 }
116 
Main(void)117 Int2 Main(void)
118 {
119 
120 	AsnIoPtr aip;
121 	AsnTypePtr atp = NULL, atp1, atp2;
122 	AsnModulePtr amp;
123 	Boolean error_msgs=TRUE, show_gi= TRUE, show_seq = TRUE, non_strict=TRUE;
124 	SeqEntryPtr the_set;
125 	SeqSubmitPtr ssp;
126 	StdPrintOptionsPtr Spop = NULL;
127 	FILE *fp;
128 	Uint1 format, mode;
129 	Boolean good = FALSE;
130 	Asn2ffJobPtr		ajp;
131 	Uint2 entityID;
132 	Int4 num, total;
133 	SeqLocPtr slp;
134 	SeqIntPtr sip;
135 	BioseqPtr bsp;
136 	Boolean useSeqMgrIndexes;
137 	/*
138 	ValNode v;
139 	LinkStrPtr lsp;
140 	*/
141 
142 	if ( ! GetArgs("asn2ff", sizeof(myargs)/sizeof(Args), myargs))
143 		return 1;
144 	ErrSetMessageLevel(SEV_NONE);
145 	ErrSetOptFlags(EO_SHOW_CODES);
146 	ErrSetOptFlags(EO_XLATE_CODES);
147 	if (myargs[12].intvalue)   /* show the verbose error messages? */
148 		ErrSetOptFlags(EO_MSG_MSGTEXT);
149 
150 	if (myargs[13].intvalue)   /* use HTML format for output? */
151 		init_www();
152 	if (! SeqEntryLoad())
153 		ErrShow();
154 
155 	if (myargs[8].intvalue) {
156 		if (! SubmitAsnLoad())
157 			Message(MSG_FATAL, "Unable to load parse trees.");
158 
159 		atp1 = AsnFind("Seq-submit");
160 		if (atp1 == NULL)
161 			Message(MSG_FATAL, "Unable to find Seq-submit");
162 		atp = AsnFind("Seq-submit");
163 		if (atp == NULL)
164 			Message(MSG_FATAL, "Unable to find Seq-submit");
165 
166 	} else {
167 		atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
168 		if (atp == NULL)
169 			ErrShow();
170 
171 		atp2 = AsnFind("Bioseq-set.seq-set.E");
172 		if (atp2 == NULL)
173 			ErrShow();
174 	}
175 
176 			/* open the i/o files in the right mode */
177 
178 	if ((aip =
179 		AsnIoOpen (myargs[0].strvalue, myargs[2].intvalue?"rb":"r")) == NULL)
180 		exit (1);
181 
182 	if ( (fp = FileOpen (myargs[3].strvalue, "w")) == NULL) {
183 		ErrPostEx(SEV_ERROR,0,0, "Can't open %s", myargs[3].strvalue);
184 		exit (1);
185 	}
186 	if ( (fpl = FileOpen (myargs[17].strvalue, "w")) == NULL) {
187 		ErrPostEx(SEV_ERROR,0,0, "Can't open %s", myargs[17].strvalue);
188 		exit (1);
189 	}
190 
191 	SetMessageHook(myHook);
192 
193     if (myargs[5].strvalue != NULL) { 			/* log errors instead of die */
194         if (! ErrSetLog (myargs[5].strvalue))
195             ErrShow();
196         else
197             ErrSetOpts (ERR_TEE, ERR_LOG_ON);
198     }
199 
200 	if (! myargs[4].intvalue)   /* show the sequence */
201 		show_seq = FALSE;
202 
203 	if (! myargs[9].intvalue)   /* show the gi numbers? */
204 		show_gi = FALSE;
205 
206 	if (!myargs[10].intvalue)   /* Non-Strict binding of genes to feats */
207 		non_strict = FALSE;
208 
209 	if (! myargs[11].intvalue)   /* Show validator error messages */
210 		error_msgs = FALSE;
211 
212 	format = GENBANK_FMT;
213 	if (StringICmp(myargs[6].strvalue, "b") == 0) {
214 		format = GENBANK_FMT;
215 	} else if (StringICmp(myargs[6].strvalue, "p") == 0) {
216 		format = GENPEPT_FMT;
217 	} else if (StringICmp(myargs[6].strvalue, "e") == 0) {
218 		format = EMBL_FMT;
219 	} else if (StringICmp(myargs[6].strvalue, "s") == 0) {
220 		format = PSEUDOEMBL_FMT;
221 	} else if (StringICmp(myargs[6].strvalue, "x") == 0) {
222 		format = SELECT_FMT;
223 	} else if (StringICmp(myargs[6].strvalue, "z") == 0) {
224 		format = EMBLPEPT_FMT;
225 	}
226 	if (format == GENPEPT_FMT) {
227 		if (!PrintTemplateSetLoad ("asn2ff.prt")) {
228 			ErrPostEx(SEV_WARNING, 1, 1, "PrintTemplateSetLoad failed");
229 		}
230 		if ((Spop = StdPrintOptionsNew(NULL)) != NULL) {
231 			Spop->newline = "~";
232 			Spop->indent = "";
233 		} else {
234 			ErrPostEx (SEV_FATAL, 1, 1, "StdPrintOptionsNew failed");
235 		}
236 	}
237 
238 	mode = RELEASE_MODE;
239 	if (StringICmp(myargs[7].strvalue, "r") == 0)
240 		mode = RELEASE_MODE;
241 	if (StringICmp(myargs[7].strvalue, "l") == 0)
242 		mode = DIRSUB_MODE;
243 	if (StringICmp(myargs[7].strvalue, "k") == 0)
244 		mode = DIRSUB_DEBUG_MODE;
245 	if (StringICmp(myargs[7].strvalue, "e") == 0)
246 		mode = REVISE_MODE;
247 	if (StringICmp(myargs[7].strvalue, "d") == 0)
248 		mode = DUMP_MODE;
249 	if (StringICmp(myargs[7].strvalue, "s") == 0)
250 		mode = SEQUIN_MODE;
251 	if (StringICmp(myargs[7].strvalue, "c") == 0)
252 		mode = CHROMO_MODE;
253 	if (StringICmp(myargs[7].strvalue, "p") == 0)
254 		mode = PARTIAL_MODE;
255 
256 #ifdef ENABLE_ENTREZ
257 	EntrezBioseqFetchEnable ("asn2ff", FALSE);
258 #endif
259 #ifdef ENABLE_ID1
260 	ID1BioseqFetchEnable ("asn2ff", FALSE);
261 #endif
262 #ifdef ENABLE_LOCAL
263 	BioseqFetchInit(FALSE);
264 #endif
265 	ajp = (Asn2ffJobPtr) MemNew(sizeof(Asn2ffJob));
266 	ajp->show_gene = myargs[18].intvalue;
267 	ajp->show_seq = show_seq;
268 	ajp->show_gi = show_gi;
269 	ajp->error_msgs = error_msgs;
270 	ajp->non_strict = non_strict;
271 	ajp->null_str = FALSE;
272 	ajp->format = format;
273 	ajp->mode = mode;
274 	ajp->fp = fp;
275 	ajp->Spop = Spop;
276 	ajp->gb_style = TRUE;  /* show only non_right_truncated features */
277 
278 	if (myargs[25].intvalue) {
279 		ajp->show_version = TRUE;
280 	}
281 	if (myargs[14].intvalue) {
282 		ajp->gb_style = FALSE;
283 		ajp->only_one = TRUE;
284 		ajp->ignore_top = FALSE;
285 	}
286 	if (myargs[15].intvalue) {
287 		ajp->ignore_top = TRUE;
288 		ajp->genome_view = TRUE;
289 	}
290 	if (myargs[16].intvalue) {
291 		ajp->map_view = TRUE;
292 	}
293 	if (myargs[19].intvalue) {
294 		ajp->orgname = TRUE;
295 	}
296 	if (myargs[20].intvalue) {
297 		ajp->help = TRUE;
298 	}
299 	if (myargs[26].intvalue) {
300 		ajp->bankit = TRUE;
301 	}
302 	if (myargs[27].intvalue) {
303 		ajp->forgbrel = TRUE;
304 	}
305 	if (myargs[28].intvalue) {
306 		ajp->old_locus_fmt = FALSE;
307 	} else {
308 		ajp->old_locus_fmt = TRUE;
309 	}
310 	useSeqMgrIndexes = (Boolean)(myargs[24].intvalue);
311 	/* get pointer to all loaded ASN.1 modules */
312 	amp = AsnAllModPtr();
313 	if (amp == NULL)
314 		ErrShow();
315 
316 	total = 0;
317 	num = 0;
318 	if (myargs[22].floatvalue) {
319 		if (myargs[1].intvalue) {
320 			the_set = SeqEntryAsnRead(aip, NULL);
321 			bsp = NULL;
322 			SeqEntryExplore(the_set, &bsp, FindNuc);
323 			if (bsp == NULL) {
324 				ErrPostEx(SEV_WARNING, 1, 1, "Couldn't find valid bioseq\n");
325 				SeqEntryFree(the_set);
326 				exit (1);
327 			}
328 			num = 1;
329 			slp = ValNodeNew(NULL);
330 			sip = SeqIntNew();
331 			slp->choice = SEQLOC_INT;
332 			slp->data.ptrvalue = sip;
333 			sip->from = 0;
334 			if (myargs[21].floatvalue > 0) {
335 				sip->from = myargs[21].floatvalue-1;
336 			}
337 			if (myargs[22].floatvalue > bsp->length) {
338 				sip->to = bsp->length-1;
339 			} else {
340 				sip->to = myargs[22].floatvalue-1;
341 			}
342 			sip->id = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
343 			IndexASeqEntry (the_set, useSeqMgrIndexes);
344 			SeqLocToFlat(slp, fp, format, ajp->mode);
345 		}
346 	} else if (myargs[8].intvalue) {
347 		if (mode == DUMP_MODE) {
348 		    aip->scan_for_start = TRUE;  /* scan past any garbage */
349 		}
350 		while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
351 			if (atp == atp1) {
352 				ssp = SeqSubmitAsnRead(aip, atp);
353 				if (ssp->datatype == 1) {
354 					IndexASeqEntry ((SeqEntryPtr) ssp->data, useSeqMgrIndexes);
355 	 		if (SeqEntryToFlatAjp (ajp, (SeqEntryPtr) ssp->data, fp, ajp->format, ajp->mode)) {
356 				num++;
357 			}
358 				/*	if ((SeqSubmitToFlat(ssp, fp, mode, FALSE, format,
359 							myargs[18].intvalue)) == TRUE)
360 					{
361 						num++;
362 					}*/
363 				}
364 				SeqSubmitFree(ssp);
365 
366 			} else {
367 				AsnReadVal(aip, atp, NULL);
368 			}
369 		}
370 	} else if (myargs[1].intvalue) {
371 		the_set = SeqEntryAsnRead(aip, NULL);
372 		total++;
373 /*********TEST*******
374 	v.choice = SEQID_GI;
375 	v.data.intvalue = 455854;
376 	SeqEntryToFlatEx (the_set, fp, ajp->format, ajp->mode, &v, 0);
377 			if (mode == PARTIAL_MODE) {
378 			SeqEntryToPartRpt(the_set, stdout);
379 	for (lsp=SeqEntryToStrArrayEx(the_set,  ajp->format, 5866992, TRUE); lsp;
380 		lsp=lsp->next) {
381 		printf ("%s", lsp->line);
382 	}
383 	exit (0);
384  *********TEST*******/
385    		if (myargs[23].intvalue) { /* complex sets */
386 			IndexASeqEntry (the_set, useSeqMgrIndexes);
387 	 		if (SeqEntryToFlatAjp (ajp, the_set, fp, ajp->format, ajp->mode)) {
388 				num++;
389 			}
390 		} else {
391 			if ((entityID = ObjMgrGetEntityIDForPointer(the_set)) == 0) {
392 				ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
393 			}
394 			ajp->entityID = entityID;
395 			ajp->sep = the_set;
396 			IndexASeqEntry (the_set, useSeqMgrIndexes);
397 			if (asn2ff_print(ajp)) {
398 				num++;
399 			}
400 		}
401 		SeqEntryFree(the_set);
402 	} else {
403 		while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
404 			if (atp == atp2) {  /* top level Seq-entry */
405 				the_set = SeqEntryAsnRead(aip, atp);
406 				total++;
407 				if (myargs[23].intvalue) { /* complex sets */
408 					IndexASeqEntry (the_set, useSeqMgrIndexes);
409 	 				if (SeqEntryToFlatAjp (ajp, the_set, fp, ajp->format, ajp->mode))
410 						num++;
411 	 			} else {
412 					if ((entityID=ObjMgrGetEntityIDForPointer(the_set)) == 0) {
413 						ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
414 					}
415 					ajp->entityID = entityID;
416 					ajp->sep = the_set;
417 					IndexASeqEntry (the_set, useSeqMgrIndexes);
418 					if (asn2ff_print(ajp))
419 						num++;
420 				}
421 				SeqEntryFree(the_set);
422 			} else {
423 				AsnReadVal(aip, atp, NULL);
424 			}
425 		}
426 	}
427 
428 #ifdef ENABLE_ENTREZ
429 	EntrezBioseqFetchDisable ();
430 #endif
431 #ifdef ENABLE_LOCAL
432 	BioseqFetchDisable();
433 #endif
434 	if (num == 0) {
435 		ErrPostStr(SEV_WARNING, 1, 1, "No valid entries found");
436 	} else if (num < total) {
437 		ErrPostEx(SEV_WARNING, 1, 1, "[%ld] entries have been processed [total - %ld]\n", num, total);
438 	}
439 	if (format == GENPEPT_FMT && Spop != NULL) {
440 		Spop = StdPrintOptionsFree(Spop);
441 	}
442 	MemFree(ajp);
443 	AsnIoClose(aip);
444 	FileClose(fp);
445 	return(0);
446 }
447