1 /*****************************************************************************
2 *
3 * asn2ff.c
4 * convert an ASN.1 entry to flat file format, using the FFPrintArrayPtrs.
5 *
6 *****************************************************************************/
7 #include <accentr.h>
8 #include "asn2ff.h"
9 #include "asn2ffp.h"
10 #include "ffprint.h"
11 #include <subutil.h>
12 #include <objall.h>
13 #include <objcode.h>
14 #include <lsqfetch.h>
15 #include <explore.h>
16
17 #ifdef ENABLE_ID1
18 #include <accid1.h>
19 #endif
20
21 FILE *fpl;
22
23 Args myargs[] = {
24 {"Filename for asn.1 input","stdin",NULL,NULL,TRUE,'a',ARG_FILE_IN,0.0,0,NULL},
25 {"Input is a Seq-entry","F", NULL ,NULL ,TRUE,'e',ARG_BOOLEAN,0.0,0,NULL},
26 {"Input asnfile in binary mode","F",NULL,NULL,TRUE,'b',ARG_BOOLEAN,0.0,0,NULL},
27 {"Output Filename","stdout", NULL,NULL,TRUE,'o',ARG_FILE_OUT,0.0,0,NULL},
28 {"Show Sequence?","T", NULL ,NULL ,TRUE,'h',ARG_BOOLEAN,0.0,0,NULL},
29 {"Log errors to file named:",NULL,NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
30 {"Output Format?: b for GenBank, p for GenPept, e for EMBL, s for PseudoEMBL, x for GenBankSelect, z for EMBLPEPT",
31 "b", NULL,NULL,TRUE,'f',ARG_STRING,0.0,0,NULL},
32 {"Output mode?: r for release, d for dump, s for Sequin, c for Chromoscope, k for dir-sub-debug, l for dir-sub, e for revise, p for partial report","r", NULL ,NULL ,TRUE,'m',ARG_STRING,0.0,0,NULL},
33 {"Input is a Seq-submit","F", NULL ,NULL ,TRUE,'s',ARG_BOOLEAN,0.0,0,NULL},
34 {"Show gi numbers?","F", NULL ,NULL ,TRUE,'g',ARG_BOOLEAN,0.0,0,NULL},
35 {"Non-Strict gene_binding","T", NULL ,NULL ,TRUE,'n',ARG_BOOLEAN,0.0,0,NULL},
36 {"Show error messages","T", NULL ,NULL ,TRUE,'v',ARG_BOOLEAN,0.0,0,NULL},
37 {"Show verbose message text","F", NULL ,NULL ,TRUE,'t',ARG_BOOLEAN,0.0,0,NULL},
38 {"Use HTML output format?","F", NULL,NULL,TRUE,'w',ARG_BOOLEAN,0.0,0,NULL},
39 {"Output is one top bioseq only","F", NULL ,NULL ,TRUE,'q',
40 ARG_BOOLEAN,0.0,0,NULL},
41 {"Output is one top bioseq only in genome view","F", NULL ,NULL ,TRUE,'G',
42 ARG_BOOLEAN,0.0,0,NULL},
43 {"Output is map bioseqs only ","F", NULL ,NULL ,TRUE,'M',
44 ARG_BOOLEAN,0.0,0,NULL},
45 {"Output error logfile","stderr", NULL,NULL,TRUE,'r',ARG_FILE_OUT,0.0,0,NULL},
46 {"Show new gene features?","T",NULL,NULL,TRUE,'p',ARG_BOOLEAN,0.0,0,NULL},
47 {"New algorithm for orgnames?",
48 "F",NULL,NULL,TRUE,'z',ARG_BOOLEAN,0.0,0,NULL},
49 {"Print help format only?",
50 "F",NULL,NULL,TRUE,'y',ARG_BOOLEAN,0.0,0,NULL},
51 {"From to show a region", "0", NULL, NULL, TRUE, 'A', ARG_FLOAT, 0.0, 0, NULL},
52 {"To to show a region", "0", NULL, NULL, TRUE, 'B', ARG_FLOAT, 0.0, 0, NULL},
53 {"Complex sets (phy-set,mut-set, pop-set)?",
54 "T",NULL,NULL,TRUE,'k',ARG_BOOLEAN,0.0,0,NULL},
55 {"Use SeqMgr indexing?","F",NULL,NULL,TRUE,'d',ARG_BOOLEAN,0.0,0,NULL},
56 {"Use VERSION?","T",NULL,NULL,TRUE,'V',ARG_BOOLEAN,0.0,0,NULL},
57 {"Show Bankit comments?","F",NULL,NULL,TRUE,'C',ARG_BOOLEAN,0.0,0,NULL},
58 {"For GenBank Release?","F",NULL,NULL,TRUE,'R',ARG_BOOLEAN,0.0,0,NULL},
59 {"New LOCUS line format?","T",NULL,NULL,TRUE,'L',ARG_BOOLEAN,0.0,0,NULL}
60 };
61
62
myHook(MsgKey key,ErrSev sev,const char * caption,const char * message)63 static MsgAnswer LIBCALLBACK myHook (MsgKey key, ErrSev sev, const char *caption, const char *message)
64 {
65 fprintf(fpl, "%s\n", message);
66 return ANS_OK;
67 }
68
69 /*static void FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
70 {
71 BioseqPtr PNTR bp;
72 BioseqPtr local_bsp;
73
74 bp = (BioseqPtr PNTR) data;
75 if (IS_Bioseq(sep))
76 {
77 local_bsp = (BioseqPtr) sep->data.ptrvalue;
78 if (ISA_na(local_bsp->mol))
79 *bp = local_bsp;
80 }
81 }
82 */
CheckForCookedBioseqs(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)83 static void CheckForCookedBioseqs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
84
85 {
86 BoolPtr bp;
87 BioseqPtr bsp;
88
89 if (sep == NULL) return;
90 if (! IS_Bioseq (sep)) return;
91 bp = (BoolPtr) mydata;
92 if (bp == NULL) return;
93 bsp = (BioseqPtr) sep->data.ptrvalue;
94 if (bsp == NULL) return;
95 if (bsp->repr != Seq_repr_raw && bsp->repr != Seq_repr_seg) {
96 *bp = FALSE;
97 }
98 }
99
IndexASeqEntry(SeqEntryPtr sep,Boolean useSeqMgrIndexes)100 static void IndexASeqEntry (SeqEntryPtr sep, Boolean useSeqMgrIndexes)
101
102 {
103 Boolean allRawOrSeg = TRUE;
104 Uint2 entityID;
105
106 if (sep == NULL || (! useSeqMgrIndexes)) return;
107 while (sep != NULL) {
108 SeqEntryExplore (sep, (Pointer) (&allRawOrSeg), CheckForCookedBioseqs);
109 if (allRawOrSeg) {
110 entityID = ObjMgrGetEntityIDForChoice (sep);
111 SeqMgrIndexFeatures (entityID, NULL);
112 }
113 sep = sep->next;
114 }
115 }
116
Main(void)117 Int2 Main(void)
118 {
119
120 AsnIoPtr aip;
121 AsnTypePtr atp = NULL, atp1, atp2;
122 AsnModulePtr amp;
123 Boolean error_msgs=TRUE, show_gi= TRUE, show_seq = TRUE, non_strict=TRUE;
124 SeqEntryPtr the_set;
125 SeqSubmitPtr ssp;
126 StdPrintOptionsPtr Spop = NULL;
127 FILE *fp;
128 Uint1 format, mode;
129 Boolean good = FALSE;
130 Asn2ffJobPtr ajp;
131 Uint2 entityID;
132 Int4 num, total;
133 SeqLocPtr slp;
134 SeqIntPtr sip;
135 BioseqPtr bsp;
136 Boolean useSeqMgrIndexes;
137 /*
138 ValNode v;
139 LinkStrPtr lsp;
140 */
141
142 if ( ! GetArgs("asn2ff", sizeof(myargs)/sizeof(Args), myargs))
143 return 1;
144 ErrSetMessageLevel(SEV_NONE);
145 ErrSetOptFlags(EO_SHOW_CODES);
146 ErrSetOptFlags(EO_XLATE_CODES);
147 if (myargs[12].intvalue) /* show the verbose error messages? */
148 ErrSetOptFlags(EO_MSG_MSGTEXT);
149
150 if (myargs[13].intvalue) /* use HTML format for output? */
151 init_www();
152 if (! SeqEntryLoad())
153 ErrShow();
154
155 if (myargs[8].intvalue) {
156 if (! SubmitAsnLoad())
157 Message(MSG_FATAL, "Unable to load parse trees.");
158
159 atp1 = AsnFind("Seq-submit");
160 if (atp1 == NULL)
161 Message(MSG_FATAL, "Unable to find Seq-submit");
162 atp = AsnFind("Seq-submit");
163 if (atp == NULL)
164 Message(MSG_FATAL, "Unable to find Seq-submit");
165
166 } else {
167 atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
168 if (atp == NULL)
169 ErrShow();
170
171 atp2 = AsnFind("Bioseq-set.seq-set.E");
172 if (atp2 == NULL)
173 ErrShow();
174 }
175
176 /* open the i/o files in the right mode */
177
178 if ((aip =
179 AsnIoOpen (myargs[0].strvalue, myargs[2].intvalue?"rb":"r")) == NULL)
180 exit (1);
181
182 if ( (fp = FileOpen (myargs[3].strvalue, "w")) == NULL) {
183 ErrPostEx(SEV_ERROR,0,0, "Can't open %s", myargs[3].strvalue);
184 exit (1);
185 }
186 if ( (fpl = FileOpen (myargs[17].strvalue, "w")) == NULL) {
187 ErrPostEx(SEV_ERROR,0,0, "Can't open %s", myargs[17].strvalue);
188 exit (1);
189 }
190
191 SetMessageHook(myHook);
192
193 if (myargs[5].strvalue != NULL) { /* log errors instead of die */
194 if (! ErrSetLog (myargs[5].strvalue))
195 ErrShow();
196 else
197 ErrSetOpts (ERR_TEE, ERR_LOG_ON);
198 }
199
200 if (! myargs[4].intvalue) /* show the sequence */
201 show_seq = FALSE;
202
203 if (! myargs[9].intvalue) /* show the gi numbers? */
204 show_gi = FALSE;
205
206 if (!myargs[10].intvalue) /* Non-Strict binding of genes to feats */
207 non_strict = FALSE;
208
209 if (! myargs[11].intvalue) /* Show validator error messages */
210 error_msgs = FALSE;
211
212 format = GENBANK_FMT;
213 if (StringICmp(myargs[6].strvalue, "b") == 0) {
214 format = GENBANK_FMT;
215 } else if (StringICmp(myargs[6].strvalue, "p") == 0) {
216 format = GENPEPT_FMT;
217 } else if (StringICmp(myargs[6].strvalue, "e") == 0) {
218 format = EMBL_FMT;
219 } else if (StringICmp(myargs[6].strvalue, "s") == 0) {
220 format = PSEUDOEMBL_FMT;
221 } else if (StringICmp(myargs[6].strvalue, "x") == 0) {
222 format = SELECT_FMT;
223 } else if (StringICmp(myargs[6].strvalue, "z") == 0) {
224 format = EMBLPEPT_FMT;
225 }
226 if (format == GENPEPT_FMT) {
227 if (!PrintTemplateSetLoad ("asn2ff.prt")) {
228 ErrPostEx(SEV_WARNING, 1, 1, "PrintTemplateSetLoad failed");
229 }
230 if ((Spop = StdPrintOptionsNew(NULL)) != NULL) {
231 Spop->newline = "~";
232 Spop->indent = "";
233 } else {
234 ErrPostEx (SEV_FATAL, 1, 1, "StdPrintOptionsNew failed");
235 }
236 }
237
238 mode = RELEASE_MODE;
239 if (StringICmp(myargs[7].strvalue, "r") == 0)
240 mode = RELEASE_MODE;
241 if (StringICmp(myargs[7].strvalue, "l") == 0)
242 mode = DIRSUB_MODE;
243 if (StringICmp(myargs[7].strvalue, "k") == 0)
244 mode = DIRSUB_DEBUG_MODE;
245 if (StringICmp(myargs[7].strvalue, "e") == 0)
246 mode = REVISE_MODE;
247 if (StringICmp(myargs[7].strvalue, "d") == 0)
248 mode = DUMP_MODE;
249 if (StringICmp(myargs[7].strvalue, "s") == 0)
250 mode = SEQUIN_MODE;
251 if (StringICmp(myargs[7].strvalue, "c") == 0)
252 mode = CHROMO_MODE;
253 if (StringICmp(myargs[7].strvalue, "p") == 0)
254 mode = PARTIAL_MODE;
255
256 #ifdef ENABLE_ENTREZ
257 EntrezBioseqFetchEnable ("asn2ff", FALSE);
258 #endif
259 #ifdef ENABLE_ID1
260 ID1BioseqFetchEnable ("asn2ff", FALSE);
261 #endif
262 #ifdef ENABLE_LOCAL
263 BioseqFetchInit(FALSE);
264 #endif
265 ajp = (Asn2ffJobPtr) MemNew(sizeof(Asn2ffJob));
266 ajp->show_gene = myargs[18].intvalue;
267 ajp->show_seq = show_seq;
268 ajp->show_gi = show_gi;
269 ajp->error_msgs = error_msgs;
270 ajp->non_strict = non_strict;
271 ajp->null_str = FALSE;
272 ajp->format = format;
273 ajp->mode = mode;
274 ajp->fp = fp;
275 ajp->Spop = Spop;
276 ajp->gb_style = TRUE; /* show only non_right_truncated features */
277
278 if (myargs[25].intvalue) {
279 ajp->show_version = TRUE;
280 }
281 if (myargs[14].intvalue) {
282 ajp->gb_style = FALSE;
283 ajp->only_one = TRUE;
284 ajp->ignore_top = FALSE;
285 }
286 if (myargs[15].intvalue) {
287 ajp->ignore_top = TRUE;
288 ajp->genome_view = TRUE;
289 }
290 if (myargs[16].intvalue) {
291 ajp->map_view = TRUE;
292 }
293 if (myargs[19].intvalue) {
294 ajp->orgname = TRUE;
295 }
296 if (myargs[20].intvalue) {
297 ajp->help = TRUE;
298 }
299 if (myargs[26].intvalue) {
300 ajp->bankit = TRUE;
301 }
302 if (myargs[27].intvalue) {
303 ajp->forgbrel = TRUE;
304 }
305 if (myargs[28].intvalue) {
306 ajp->old_locus_fmt = FALSE;
307 } else {
308 ajp->old_locus_fmt = TRUE;
309 }
310 useSeqMgrIndexes = (Boolean)(myargs[24].intvalue);
311 /* get pointer to all loaded ASN.1 modules */
312 amp = AsnAllModPtr();
313 if (amp == NULL)
314 ErrShow();
315
316 total = 0;
317 num = 0;
318 if (myargs[22].floatvalue) {
319 if (myargs[1].intvalue) {
320 the_set = SeqEntryAsnRead(aip, NULL);
321 bsp = NULL;
322 SeqEntryExplore(the_set, &bsp, FindNuc);
323 if (bsp == NULL) {
324 ErrPostEx(SEV_WARNING, 1, 1, "Couldn't find valid bioseq\n");
325 SeqEntryFree(the_set);
326 exit (1);
327 }
328 num = 1;
329 slp = ValNodeNew(NULL);
330 sip = SeqIntNew();
331 slp->choice = SEQLOC_INT;
332 slp->data.ptrvalue = sip;
333 sip->from = 0;
334 if (myargs[21].floatvalue > 0) {
335 sip->from = myargs[21].floatvalue-1;
336 }
337 if (myargs[22].floatvalue > bsp->length) {
338 sip->to = bsp->length-1;
339 } else {
340 sip->to = myargs[22].floatvalue-1;
341 }
342 sip->id = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
343 IndexASeqEntry (the_set, useSeqMgrIndexes);
344 SeqLocToFlat(slp, fp, format, ajp->mode);
345 }
346 } else if (myargs[8].intvalue) {
347 if (mode == DUMP_MODE) {
348 aip->scan_for_start = TRUE; /* scan past any garbage */
349 }
350 while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
351 if (atp == atp1) {
352 ssp = SeqSubmitAsnRead(aip, atp);
353 if (ssp->datatype == 1) {
354 IndexASeqEntry ((SeqEntryPtr) ssp->data, useSeqMgrIndexes);
355 if (SeqEntryToFlatAjp (ajp, (SeqEntryPtr) ssp->data, fp, ajp->format, ajp->mode)) {
356 num++;
357 }
358 /* if ((SeqSubmitToFlat(ssp, fp, mode, FALSE, format,
359 myargs[18].intvalue)) == TRUE)
360 {
361 num++;
362 }*/
363 }
364 SeqSubmitFree(ssp);
365
366 } else {
367 AsnReadVal(aip, atp, NULL);
368 }
369 }
370 } else if (myargs[1].intvalue) {
371 the_set = SeqEntryAsnRead(aip, NULL);
372 total++;
373 /*********TEST*******
374 v.choice = SEQID_GI;
375 v.data.intvalue = 455854;
376 SeqEntryToFlatEx (the_set, fp, ajp->format, ajp->mode, &v, 0);
377 if (mode == PARTIAL_MODE) {
378 SeqEntryToPartRpt(the_set, stdout);
379 for (lsp=SeqEntryToStrArrayEx(the_set, ajp->format, 5866992, TRUE); lsp;
380 lsp=lsp->next) {
381 printf ("%s", lsp->line);
382 }
383 exit (0);
384 *********TEST*******/
385 if (myargs[23].intvalue) { /* complex sets */
386 IndexASeqEntry (the_set, useSeqMgrIndexes);
387 if (SeqEntryToFlatAjp (ajp, the_set, fp, ajp->format, ajp->mode)) {
388 num++;
389 }
390 } else {
391 if ((entityID = ObjMgrGetEntityIDForPointer(the_set)) == 0) {
392 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
393 }
394 ajp->entityID = entityID;
395 ajp->sep = the_set;
396 IndexASeqEntry (the_set, useSeqMgrIndexes);
397 if (asn2ff_print(ajp)) {
398 num++;
399 }
400 }
401 SeqEntryFree(the_set);
402 } else {
403 while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
404 if (atp == atp2) { /* top level Seq-entry */
405 the_set = SeqEntryAsnRead(aip, atp);
406 total++;
407 if (myargs[23].intvalue) { /* complex sets */
408 IndexASeqEntry (the_set, useSeqMgrIndexes);
409 if (SeqEntryToFlatAjp (ajp, the_set, fp, ajp->format, ajp->mode))
410 num++;
411 } else {
412 if ((entityID=ObjMgrGetEntityIDForPointer(the_set)) == 0) {
413 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
414 }
415 ajp->entityID = entityID;
416 ajp->sep = the_set;
417 IndexASeqEntry (the_set, useSeqMgrIndexes);
418 if (asn2ff_print(ajp))
419 num++;
420 }
421 SeqEntryFree(the_set);
422 } else {
423 AsnReadVal(aip, atp, NULL);
424 }
425 }
426 }
427
428 #ifdef ENABLE_ENTREZ
429 EntrezBioseqFetchDisable ();
430 #endif
431 #ifdef ENABLE_LOCAL
432 BioseqFetchDisable();
433 #endif
434 if (num == 0) {
435 ErrPostStr(SEV_WARNING, 1, 1, "No valid entries found");
436 } else if (num < total) {
437 ErrPostEx(SEV_WARNING, 1, 1, "[%ld] entries have been processed [total - %ld]\n", num, total);
438 }
439 if (format == GENPEPT_FMT && Spop != NULL) {
440 Spop = StdPrintOptionsFree(Spop);
441 }
442 MemFree(ajp);
443 AsnIoClose(aip);
444 FileClose(fp);
445 return(0);
446 }
447