1 /*   asn2all.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  asn2all.c
27 *
28 * Author:  Jonathan Kans
29 *
30 * Version Creation Date:   7/26/04
31 *
32 * $Revision: 1.167 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * ==========================================================================
39 */
40 
41 #include <ncbi.h>
42 #include <objall.h>
43 #include <objsset.h>
44 #include <objsub.h>
45 #include <objfdef.h>
46 #include <objgbseq.h>
47 #include <objtseq.h>
48 #include <sequtil.h>
49 #include <sqnutils.h>
50 #include <explore.h>
51 #include <asn2gnbi.h>
52 #include <tofasta.h>
53 #include <pmfapi.h>
54 #include <lsqfetch.h>
55 #include <connect/ncbi_gnutls.h>
56 
57 #define ASN2ALL_APP_VER "14.3"
58 
59 CharPtr ASN2ALL_APPLICATION = ASN2ALL_APP_VER;
60 
DoLockFarComponents(SeqEntryPtr sep,Boolean useThreads)61 static ValNodePtr DoLockFarComponents (
62   SeqEntryPtr sep,
63   Boolean useThreads
64 )
65 
66 {
67   ValNodePtr  rsult;
68   time_t      start_time, stop_time;
69 
70   start_time = GetSecs ();
71 
72   if (NlmThreadsAvailable () && useThreads) {
73     rsult = AdvcLockFarComponents (sep, TRUE, FALSE, FALSE, NULL, TRUE);
74   } else if (useThreads) {
75     Message (MSG_POST, "Threads not available in this executable");
76     rsult = AdvcLockFarComponents (sep, TRUE, FALSE, FALSE, NULL, FALSE);
77   } else {
78     rsult = AdvcLockFarComponents (sep, TRUE, FALSE, FALSE, NULL, FALSE);
79   }
80 
81   stop_time = GetSecs ();
82 
83   return rsult;
84 }
85 
86 typedef enum {
87   FLATFILE_FORMAT = 1,
88   FASTA_FORMAT,
89   CDS_FORMAT,
90   GENE_FORMAT,
91   DEFLINE_FORMAT,
92   TABLE_FORMAT,
93   TINY_FORMAT,
94   INSDSEQ_FORMAT,
95   ASN_FORMAT,
96   XML_FORMAT,
97   CACHE_COMPONENTS
98 } AppFormat;
99 
100 typedef struct appflags {
101   AppFormat     format;
102   Boolean       automatic;
103   Boolean       catenated;
104   Boolean       piped;
105   Boolean       batch;
106   Boolean       binary;
107   Boolean       compressed;
108   Boolean       lock;
109   Boolean       useThreads;
110   Int2          type;
111   Int2          linelen;
112   Int2          nearpolicy;
113   CharPtr       sourcedb;
114   ModType       mode;
115   StlType       style;
116   Boolean       extended;
117   Boolean       relaxed;
118   Boolean       failed;
119   Uint4         cdsID;
120   Uint4         geneID;
121   Int4          filterLen;
122   ValNodePtr    filterList;
123   CharPtr PNTR  filterArray;
124   Boolean       go_on;
125   Boolean       is_segmented;
126   Int4          from;
127   Int4          to;
128   Uint1         strand;
129   FILE          *nt;
130   FILE          *aa;
131   AsnIoPtr      an;
132   AsnIoPtr      ap;
133   AsnModulePtr  amp;
134   AsnTypePtr    atp_bss;
135   AsnTypePtr    atp_bsss;
136   AsnTypePtr    atp_se;
137   AsnTypePtr    atp_bsc;
138   AsnTypePtr    bssp_atp;
139   AsnTypePtr    atp_inst;
140   AsnTypePtr    atp_insd;
141   AsnTypePtr    atp_insde;
142   AsnTypePtr    atp_sbp;
143   AsnTypePtr    atp_ssp;
144   AsnTypePtr    atp_tss;
145   AsnTypePtr    atp_tsse;
146   BioseqSet     bss;
147   GBSeq         gbsq;
148   GBSet         gbst;
149   XtraBlock     xtran;
150   XtraBlock     xtrap;
151   TSeqSet       tss;
152   BioseqPtr     parent;
153 } AppFlagData, PNTR AppFlagPtr;
154 
155 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
156 
DoProtFtables(BioseqPtr bsp,Pointer userdata)157 static void DoProtFtables (
158   BioseqPtr bsp,
159   Pointer userdata
160 )
161 
162 {
163   AppFlagPtr  afp;
164 
165   if (bsp == NULL) return;
166   if (! ISA_aa (bsp->mol)) return;
167   afp = (AppFlagPtr) userdata;
168   BioseqToGnbk (bsp, NULL, FTABLE_FMT, afp->mode, afp->style, 0, 0, SHOW_PROT_FTABLE, NULL, afp->aa);
169 }
170 
SaveTinyNucStreams(BioseqPtr bsp,Pointer userdata)171 static void SaveTinyNucStreams (
172   BioseqPtr bsp,
173   Pointer userdata
174 )
175 
176 {
177   AppFlagPtr  afp;
178 
179   if (bsp == NULL) return;
180   if (! ISA_na (bsp->mol)) return;
181   afp = (AppFlagPtr) userdata;
182 
183   BioseqAsnWriteAsTSeq (bsp, afp->an, afp->atp_tsse);
184   /*
185   AsnPrintNewLine (afp->an);
186   AsnIoFlush (afp->an);
187   */
188 }
189 
SaveTinyPrtStreams(BioseqPtr bsp,Pointer userdata)190 static void SaveTinyPrtStreams (
191   BioseqPtr bsp,
192   Pointer userdata
193 )
194 
195 {
196   AppFlagPtr  afp;
197 
198   if (bsp == NULL) return;
199   if (! ISA_aa (bsp->mol)) return;
200   afp = (AppFlagPtr) userdata;
201 
202   BioseqAsnWriteAsTSeq (bsp, afp->ap, afp->atp_tsse);
203   /*
204   AsnPrintNewLine (afp->ap);
205   AsnIoFlush (afp->ap);
206   */
207 }
208 
A2ADeltaLitOnly(BioseqPtr bsp)209 static Boolean A2ADeltaLitOnly (
210   BioseqPtr bsp
211 )
212 
213 {
214   ValNodePtr  vnp;
215 
216   if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
217   for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
218     if (vnp->choice == 1) return FALSE;
219   }
220   return TRUE;
221 }
222 
A2ASegHasParts(BioseqPtr bsp)223 static Boolean A2ASegHasParts (
224   BioseqPtr bsp
225 )
226 
227 {
228   BioseqSetPtr  bssp;
229   SeqEntryPtr   sep;
230 
231   if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
232   sep = bsp->seqentry;
233   if (sep == NULL) return FALSE;
234   sep = sep->next;
235   if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
236   bssp = (BioseqSetPtr) sep->data.ptrvalue;
237   if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
238   return FALSE;
239 }
240 
IsItFar(BioseqPtr bsp,Pointer userdata)241 static void IsItFar (
242   BioseqPtr bsp,
243   Pointer userdata
244 )
245 
246 {
247   BoolPtr  bp;
248 
249   if (bsp == NULL || userdata == NULL) return;
250   bp = (BoolPtr) userdata;
251 
252   if (bsp->repr == Seq_repr_seg && (! A2ASegHasParts (bsp))) {
253     *bp = TRUE;
254   } else if (bsp->repr == Seq_repr_delta && (! A2ADeltaLitOnly (bsp))) {
255     *bp = TRUE;
256   }
257 }
258 
MapLocationOntoDeltaParent(SeqLocPtr location,BioseqPtr parent,SeqMgrSegmentContextPtr scontext)259 static SeqLocPtr MapLocationOntoDeltaParent (
260   SeqLocPtr location,
261   BioseqPtr parent,
262   SeqMgrSegmentContextPtr scontext
263 )
264 
265 {
266   SeqIntPtr  sintp;
267   SeqLocPtr  loc, slp;
268   SeqPntPtr  spp;
269 
270   if (location == NULL || parent == NULL || scontext == NULL) return NULL;
271 
272   loc = (SeqLocPtr) AsnIoMemCopy (location, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
273   if (loc == NULL) return NULL;
274 
275   /* just offset locations, do not change Seq-id */
276 
277   slp = SeqLocFindNext (loc, NULL);
278   while (slp != NULL) {
279     switch (slp->choice) {
280       case SEQLOC_PNT :
281         spp = (SeqPntPtr) slp->data.ptrvalue;
282         if (spp != NULL) {
283           if (scontext->strand == Seq_strand_minus) {
284             spp->point = scontext->cumOffset + scontext->to - spp->point;
285           } else {
286             spp->point = scontext->cumOffset - scontext->from + spp->point;
287           }
288         }
289         break;
290       case SEQLOC_INT :
291         sintp = (SeqIntPtr) slp->data.ptrvalue;
292         if (sintp != NULL) {
293           if (scontext->strand == Seq_strand_minus) {
294             sintp->from = scontext->cumOffset + scontext->to - sintp->from;
295             sintp->to = scontext->cumOffset + scontext->to - sintp->to;
296           } else {
297             sintp->from = scontext->cumOffset - scontext->from + sintp->from;
298             sintp->to = scontext->cumOffset - scontext->from + sintp->to;
299           }
300         }
301         break;
302       default :
303         break;
304     }
305     slp = SeqLocFindNext (loc, slp);
306   }
307 
308   return loc;
309 }
310 
DoCDSSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr scontext)311 static Boolean LIBCALLBACK DoCDSSeg (
312   SeqLocPtr slp,
313   SeqMgrSegmentContextPtr scontext
314 )
315 
316 {
317   AppFlagPtr         afp;
318   BioseqPtr          bsp;
319   Char               buf [128];
320   Uint2              entityID;
321   SeqMgrFeatContext  fcontext;
322   SeqLocPtr          mappedloc;
323   SeqFeatPtr         sfp;
324   SeqIdPtr           sip;
325 
326   if (slp == NULL || scontext == NULL) return TRUE;
327   afp = (AppFlagPtr) scontext->userdata;
328   if (afp == NULL) return TRUE;
329 
330   sip = SeqLocId (slp);
331   if (sip == NULL) return TRUE;
332   bsp = BioseqLockById (sip);
333   if (bsp == NULL) return TRUE;
334 
335   entityID = ObjMgrGetEntityIDForPointer (bsp);
336   SeqMgrIndexFeatures (entityID, NULL);
337 
338   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
339   while (sfp != NULL) {
340     afp->cdsID++;
341     buf [0] = '\0';
342     MakeFastaStreamIdSuffix (sfp, afp->cdsID, "_cds", buf, TRUE, TRUE);
343 
344     mappedloc = MapLocationOntoDeltaParent (sfp->location, afp->parent, scontext);
345     CdRegionFastaStreamEx (sfp, afp->nt,
346                            STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
347                            afp->linelen, 0, 0, TRUE, buf, mappedloc, afp->parent);
348     SeqLocFree (mappedloc);
349 
350     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
351   }
352 
353   BioseqUnlock (bsp);
354 
355   return TRUE;
356 }
357 
DoCDSFasta(BioseqPtr bsp,Pointer userdata)358 static void DoCDSFasta (
359   BioseqPtr bsp,
360   Pointer userdata
361 )
362 
363 {
364   AppFlagPtr         afp;
365   Char               buf [128];
366   SeqMgrFeatContext  fcontext;
367   SeqFeatPtr         sfp;
368 
369   if (bsp == NULL || ! ISA_na (bsp->mol)) return;
370   afp = (AppFlagPtr) userdata;
371   if (afp == NULL) return;
372 
373   afp->parent = bsp;
374 
375   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
376   while (sfp != NULL) {
377     afp->cdsID++;
378     buf [0] = '\0';
379     MakeFastaStreamIdSuffix (sfp, afp->cdsID, "_cds", buf, TRUE, TRUE);
380     CdRegionFastaStream (sfp, afp->nt,
381                          STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
382                          afp->linelen, 0, 0, TRUE, buf);
383     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
384   }
385 
386   if (afp->nearpolicy != 2 && bsp->repr == Seq_repr_delta) {
387     SeqMgrExploreSegments (bsp, (Pointer) afp, DoCDSSeg);
388   }
389 }
390 
DoTransSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr scontext)391 static Boolean LIBCALLBACK DoTransSeg (
392   SeqLocPtr slp,
393   SeqMgrSegmentContextPtr scontext
394 )
395 
396 {
397   AppFlagPtr         afp;
398   BioseqPtr          bsp;
399   Char               buf [128];
400   Uint2              entityID;
401   SeqMgrFeatContext  fcontext;
402   SeqLocPtr          mappedloc;
403   SeqFeatPtr         sfp;
404   SeqIdPtr           sip;
405 
406   if (slp == NULL || scontext == NULL) return TRUE;
407   afp = (AppFlagPtr) scontext->userdata;
408   if (afp == NULL) return TRUE;
409 
410   sip = SeqLocId (slp);
411   if (sip == NULL) return TRUE;
412   bsp = BioseqLockById (sip);
413   if (bsp == NULL) return TRUE;
414 
415   entityID = ObjMgrGetEntityIDForPointer (bsp);
416   SeqMgrIndexFeatures (entityID, NULL);
417 
418   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
419   while (sfp != NULL) {
420     afp->cdsID++;
421     buf [0] = '\0';
422     MakeFastaStreamIdSuffix (sfp, afp->cdsID, "_prot", buf, TRUE, TRUE);
423 
424     mappedloc = MapLocationOntoDeltaParent (sfp->location, afp->parent, scontext);
425     TranslationFastaStreamEx (sfp, afp->aa,
426                               STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
427                               afp->linelen, 0, 0, TRUE, buf, mappedloc, afp->parent);
428     SeqLocFree (mappedloc);
429 
430     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
431   }
432 
433   BioseqUnlock (bsp);
434 
435   return TRUE;
436 }
437 
DoTransFasta(BioseqPtr bsp,Pointer userdata)438 static void DoTransFasta (
439   BioseqPtr bsp,
440   Pointer userdata
441 )
442 
443 {
444   AppFlagPtr         afp;
445   Char               buf [128];
446   SeqMgrFeatContext  fcontext;
447   SeqFeatPtr         sfp;
448 
449   if (bsp == NULL || ! ISA_na (bsp->mol)) return;
450   afp = (AppFlagPtr) userdata;
451   if (afp == NULL) return;
452 
453   afp->parent = bsp;
454 
455   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
456   while (sfp != NULL) {
457     afp->cdsID++;
458     buf [0] = '\0';
459     MakeFastaStreamIdSuffix (sfp, afp->cdsID, "_prot", buf, TRUE, TRUE);
460     TranslationFastaStream (sfp, afp->aa,
461                             STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
462                             afp->linelen, 0, 0, TRUE, buf);
463     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, 0, &fcontext);
464   }
465 
466   if (afp->nearpolicy != 2 && bsp->repr == Seq_repr_delta) {
467     SeqMgrExploreSegments (bsp, (Pointer) afp, DoTransSeg);
468   }
469 }
470 
DoGeneSeg(SeqLocPtr slp,SeqMgrSegmentContextPtr scontext)471 static Boolean LIBCALLBACK DoGeneSeg (
472   SeqLocPtr slp,
473   SeqMgrSegmentContextPtr scontext
474 )
475 
476 {
477   AppFlagPtr         afp;
478   BioseqPtr          bsp;
479   Char               buf [128];
480   Uint2              entityID;
481   SeqMgrFeatContext  fcontext;
482   SeqLocPtr          mappedloc;
483   SeqFeatPtr         sfp;
484   SeqIdPtr           sip;
485 
486   if (slp == NULL || scontext == NULL) return TRUE;
487   afp = (AppFlagPtr) scontext->userdata;
488   if (afp == NULL) return TRUE;
489 
490   sip = SeqLocId (slp);
491   if (sip == NULL) return TRUE;
492   bsp = BioseqLockById (sip);
493   if (bsp == NULL) return TRUE;
494 
495   entityID = ObjMgrGetEntityIDForPointer (bsp);
496   SeqMgrIndexFeatures (entityID, NULL);
497 
498   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
499   while (sfp != NULL) {
500     afp->cdsID++;
501     buf [0] = '\0';
502     MakeFastaStreamIdSuffix (sfp, afp->cdsID, "_gene", buf, FALSE, FALSE);
503 
504     mappedloc = MapLocationOntoDeltaParent (sfp->location, afp->parent, scontext);
505     GeneFastaStreamEx (sfp, afp->nt,
506                        STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
507                        afp->linelen, 0, 0, TRUE, buf, mappedloc, afp->parent);
508     SeqLocFree (mappedloc);
509 
510     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &fcontext);
511   }
512 
513   BioseqUnlock (bsp);
514 
515   return TRUE;
516 }
517 
DoGeneFasta(BioseqPtr bsp,Pointer userdata)518 static void DoGeneFasta (
519   BioseqPtr bsp,
520   Pointer userdata
521 )
522 
523 {
524   AppFlagPtr         afp;
525   Char               buf [32];
526   SeqMgrFeatContext  fcontext;
527   SeqFeatPtr         sfp;
528 
529   if (bsp == NULL || ! ISA_na (bsp->mol)) return;
530   afp = (AppFlagPtr) userdata;
531   if (afp == NULL) return;
532 
533   afp->parent = bsp;
534 
535   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
536   while (sfp != NULL) {
537     afp->geneID++;
538     sprintf (buf, "_gene_%ld", (long) afp->geneID);
539     GeneFastaStream (sfp, afp->nt,
540                      STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
541                      afp->linelen, 0, 0, TRUE, buf);
542     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, 0, &fcontext);
543   }
544 
545   if (afp->nearpolicy != 2 && bsp->repr == Seq_repr_delta) {
546     SeqMgrExploreSegments (bsp, (Pointer) afp, DoGeneSeg);
547   }
548 }
549 
DoNucDefline(BioseqPtr bsp,Pointer userdata)550 static void DoNucDefline (
551   BioseqPtr bsp,
552   Pointer userdata
553 )
554 
555 {
556   AppFlagPtr  afp;
557   Char        id [256];
558   CharPtr     str;
559 
560   if (bsp == NULL || ! ISA_na (bsp->mol)) return;
561   afp = (AppFlagPtr) userdata;
562   if (afp == NULL) return;
563 
564   str = NewCreateDefLine (NULL, bsp, TRUE, FALSE);
565   if (str == NULL) return;
566 
567   SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id));
568 
569   fprintf (afp->nt, ">%s %s\n", id, str);
570 
571   MemFree (str);
572 }
573 
DoProtDefline(BioseqPtr bsp,Pointer userdata)574 static void DoProtDefline (
575   BioseqPtr bsp,
576   Pointer userdata
577 )
578 
579 {
580   AppFlagPtr  afp;
581   Char        id [256];
582   CharPtr     str;
583 
584   if (bsp == NULL || ! ISA_aa (bsp->mol)) return;
585   afp = (AppFlagPtr) userdata;
586   if (afp == NULL) return;
587 
588   str = NewCreateDefLine (NULL, bsp, TRUE, FALSE);
589   if (str == NULL) return;
590 
591   SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id));
592 
593   fprintf (afp->aa, ">%s %s\n", id, str);
594 
595   MemFree (str);
596 }
597 
IdInFilter(CharPtr id,AppFlagPtr afp)598 static Boolean IdInFilter (
599   CharPtr id,
600   AppFlagPtr afp
601 )
602 
603 {
604   CharPtr PNTR  array;
605   Int2          L, R, mid;
606 
607   if (StringHasNoText (id) || afp == NULL) return FALSE;
608 
609   array = afp->filterArray;
610   if (array == NULL) return FALSE;
611 
612   L = 0;
613   R = afp->filterLen - 1;
614 
615   while (L < R) {
616     mid = (L + R) / 2;
617     if (StringICmp (array [mid], id) < 0) {
618       L = mid + 1;
619     } else {
620       R = mid;
621     }
622   }
623 
624   if (StringICmp (array [R], id) == 0) return TRUE;
625 
626   return FALSE;
627 }
628 
CheckFilter(BioseqPtr bsp,Pointer userdata)629 static void CheckFilter (
630   BioseqPtr bsp,
631   Pointer userdata
632 )
633 
634 {
635   AppFlagPtr  afp;
636   Char        id [64];
637   CharPtr     ptr;
638   SeqIdPtr    sip;
639 
640   if (bsp == NULL || userdata == NULL) return;
641   afp = (AppFlagPtr) userdata;
642 
643   for (sip = bsp->id; sip != NULL; sip = sip->next) {
644     SeqIdWrite (sip, id, PRINTID_REPORT, sizeof (id));
645     ptr = StringChr (id, '.');
646     if (ptr != NULL) {
647       *ptr = '\0';
648     }
649     if (IdInFilter (id, afp)) {
650       afp->go_on = TRUE;
651       return;
652     }
653   }
654 }
655 
CheckForSegSeq(BioseqPtr bsp,Pointer userdata)656 static void CheckForSegSeq (
657   BioseqPtr bsp,
658   Pointer userdata
659 )
660 
661 {
662   AppFlagPtr  afp;
663 
664   if (bsp == NULL || userdata == NULL) return;
665   if (bsp->repr != Seq_repr_seg) return;
666   afp = (AppFlagPtr) userdata;
667   afp->is_segmented = TRUE;
668 }
669 
GetFirstGoodBioseq(BioseqPtr bsp,Pointer userdata)670 static void GetFirstGoodBioseq (
671   BioseqPtr bsp,
672   Pointer userdata
673 )
674 
675 {
676   BioseqPtr PNTR bspp;
677 
678   bspp = (BioseqPtr PNTR) userdata;
679   if (*bspp != NULL) return;
680   *bspp = bsp;
681 }
682 
AfpToSeqLoc(SeqEntryPtr sep,AppFlagPtr afp)683 static SeqLocPtr AfpToSeqLoc (
684   SeqEntryPtr sep,
685   AppFlagPtr afp
686 )
687 
688 {
689   BioseqPtr   bsp = NULL;
690   Int4        from;
691   SeqIntPtr   sintp;
692   SeqLocPtr   slp = NULL;
693   Uint1       strand;
694   Int4        to;
695 
696   if (sep == NULL || afp == NULL) return NULL;
697 
698   if ((afp->from < 1 || afp->to < 1) && afp->strand == Seq_strand_plus) return NULL;
699 
700   if (afp->nt != NULL && afp->aa == NULL) {
701     VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_NUCS, GetFirstGoodBioseq);
702   } else if (afp->aa != NULL && afp->nt == NULL) {
703     VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_PROTS, GetFirstGoodBioseq);
704   }
705   if (bsp == NULL) return NULL;
706 
707   from = afp->from;
708   to = afp->to;
709   strand = afp->strand;
710 
711   if (strand == Seq_strand_minus && from == 0 && to == 0) {
712     from = 1;
713     to = bsp->length;
714   }
715   if (from < 0) {
716     from = 1;
717   } else if (from > bsp->length) {
718     from = bsp->length;
719   }
720   if (to < 0) {
721     to = 1;
722   } else if (to > bsp->length) {
723     to = bsp->length;
724   }
725 
726   sintp = SeqIntNew ();
727   if (sintp == NULL) return NULL;
728 
729   sintp->from = from - 1;
730   sintp->to = to - 1;
731   sintp->strand = strand;
732   sintp->id = SeqIdFindBest (bsp->id, 0);
733 
734   slp = ValNodeNew (NULL);
735   if (slp == NULL) return NULL;
736 
737   slp->choice = SEQLOC_INT;
738   slp->data.ptrvalue = (Pointer) sintp;
739 
740   return slp;
741 }
742 
FormatRecord(SeqEntryPtr sep,AppFlagPtr afp,ValNodePtr bsplist)743 static void FormatRecord (
744   SeqEntryPtr sep,
745   AppFlagPtr afp,
746   ValNodePtr bsplist
747 )
748 
749 {
750   BioseqPtr      bsp;
751   CstType        custom = 0;
752   Uint2          entityID;
753   FlgType        flags = 0;
754   Boolean        is_far = FALSE;
755   LckType        locks = 0;
756   SeqLocPtr      slp = NULL;
757   StreamFlgType  streams = STREAM_EXPAND_GAPS;
758   SeqEntryPtr    top;
759   ValNodePtr     vnp;
760 
761   if (sep == NULL || afp == NULL) return;
762 
763   if (afp->filterArray != NULL) {
764     afp->go_on = FALSE;
765     VisitBioseqsInSep (sep, (Pointer) afp, CheckFilter);
766     if (! afp->go_on) return;
767   }
768 
769   if (StringChr (afp->sourcedb, 'w') != NULL) {
770     afp->is_segmented = FALSE;
771     VisitBioseqsInSep (sep, (Pointer) afp, CheckForSegSeq);
772     if (afp->is_segmented) return;
773   }
774 
775   BasicSeqEntryCleanup (sep);
776 
777   VisitBioseqsInSep (sep, (Pointer) &is_far, IsItFar);
778 
779   if (afp->nearpolicy == 2 && is_far) {
780     flags = SHOW_CONTIG_FEATURES | ONLY_NEAR_FEATURES;
781   } else {
782     flags = SHOW_CONTIG_FEATURES;
783   }
784   if (is_far && (! afp->lock)) {
785     locks = LOOKUP_FAR_COMPONENTS;
786   }
787   if (afp->extended) {
788     flags |= REFSEQ_CONVENTIONS | SHOW_TRANCRIPTION | SHOW_PEPTIDE;
789     streams |= STREAM_TAGGED_DEFLINE;
790   }
791   if (afp->relaxed) {
792     flags |= RELAXED_MAPPING;
793   }
794 
795   slp = AfpToSeqLoc (sep, afp);
796 
797   switch (afp->format) {
798     case FLATFILE_FORMAT :
799       if (afp->nt != NULL) {
800         SeqEntryToGnbk (sep, slp, GENBANK_FMT, afp->mode, afp->style,
801                         flags, locks, custom, NULL, afp->nt);
802       }
803       if (afp->aa != NULL) {
804         SeqEntryToGnbk (sep, slp, GENPEPT_FMT, afp->mode, afp->style,
805                         flags, 0, custom, NULL, afp->aa);
806       }
807       break;
808     case FASTA_FORMAT :
809       if (afp->nt != NULL) {
810         if (afp->nearpolicy == 1 ||
811             (afp->nearpolicy == 2 && (! is_far)) ||
812             (afp->nearpolicy == 3 && is_far)) {
813           if (slp != NULL) {
814             SeqLocFastaStream (slp, afp->nt, streams, afp->linelen, 0, 0);
815           } else {
816             SeqEntryFastaStream (sep, afp->nt, streams, afp->linelen,
817                                  0, 0, TRUE, FALSE, FALSE);
818           }
819         }
820       }
821       if (afp->aa != NULL) {
822         if (slp != NULL) {
823           SeqLocFastaStream (slp, afp->aa, streams, afp->linelen, 0, 0);
824         } else {
825           SeqEntryFastaStream (sep, afp->aa, streams, afp->linelen,
826                                0, 0, FALSE, TRUE, FALSE);
827         }
828       }
829       break;
830     case CDS_FORMAT :
831       if (afp->nt != NULL) {
832         entityID = ObjMgrGetEntityIDForChoice (sep);
833         top = GetTopSeqEntryForEntityID (entityID);
834         if (top != NULL) {
835           SeqMgrIndexFeatures (0, top->data.ptrvalue);
836           afp->cdsID = 0;
837           VisitBioseqsInSep (top, (Pointer) afp, DoCDSFasta);
838         }
839       }
840       if (afp->aa != NULL) {
841         entityID = ObjMgrGetEntityIDForChoice (sep);
842         top = GetTopSeqEntryForEntityID (entityID);
843         if (top != NULL) {
844           SeqMgrIndexFeatures (0, top->data.ptrvalue);
845           afp->cdsID = 0;
846           VisitBioseqsInSep (top, (Pointer) afp, DoTransFasta);
847         }
848       }
849       break;
850     case GENE_FORMAT :
851       if (afp->nt != NULL) {
852         entityID = ObjMgrGetEntityIDForChoice (sep);
853         top = GetTopSeqEntryForEntityID (entityID);
854         if (top != NULL) {
855           SeqMgrIndexFeatures (0, top->data.ptrvalue);
856           afp->geneID = 0;
857           VisitBioseqsInSep (top, (Pointer) afp, DoGeneFasta);
858         }
859       }
860       break;
861     case DEFLINE_FORMAT :
862       if (afp->nt != NULL) {
863         entityID = ObjMgrGetEntityIDForChoice (sep);
864         top = GetTopSeqEntryForEntityID (entityID);
865         if (top != NULL) {
866           SeqMgrIndexFeatures (0, top->data.ptrvalue);
867           VisitBioseqsInSep (top, (Pointer) afp, DoNucDefline);
868         }
869       }
870       if (afp->aa != NULL) {
871         entityID = ObjMgrGetEntityIDForChoice (sep);
872         top = GetTopSeqEntryForEntityID (entityID);
873         if (top != NULL) {
874           SeqMgrIndexFeatures (0, top->data.ptrvalue);
875           VisitBioseqsInSep (top, (Pointer) afp, DoProtDefline);
876         }
877       }
878       break;
879     case TABLE_FORMAT :
880       if (afp->nt != NULL) {
881         SeqEntryToGnbk (sep, slp, FTABLE_FMT, afp->mode, afp->style,
882                         flags, locks, 0, NULL, afp->nt);
883       }
884       if (afp->aa != NULL) {
885         VisitBioseqsInSep (sep, (Pointer) afp, DoProtFtables);
886       }
887       break;
888     case TINY_FORMAT :
889       if (afp->an != NULL) {
890         VisitBioseqsInSep (sep, (Pointer) afp, SaveTinyNucStreams);
891       }
892       if (afp->ap != NULL) {
893         VisitBioseqsInSep (sep, (Pointer) afp, SaveTinyPrtStreams);
894       }
895       break;
896     case INSDSEQ_FORMAT :
897       if (afp->an != NULL) {
898         SeqEntryToGnbk (sep, slp, GENBANK_FMT, afp->mode, afp->style,
899                         flags, locks, custom, &(afp->xtran), NULL);
900       }
901       if (afp->ap != NULL) {
902         SeqEntryToGnbk (sep, slp, GENPEPT_FMT, afp->mode, afp->style,
903                         flags, 0, custom, &(afp->xtrap), NULL);
904       }
905       break;
906     case ASN_FORMAT :
907     case XML_FORMAT :
908       SeqEntryAsnWrite (sep, afp->an, NULL);
909       break;
910     case CACHE_COMPONENTS :
911       if (afp->an != NULL) {
912         for (vnp = bsplist; vnp != NULL; vnp = vnp->next) {
913           bsp = (BioseqPtr) vnp->data.ptrvalue;
914           if (bsp == NULL) continue;
915           entityID = ObjMgrGetEntityIDForPointer (bsp);
916           if (entityID < 1) continue;
917           top = GetTopSeqEntryForEntityID (entityID);
918           if (top == NULL) continue;
919           SeqEntryAsnWrite (top, afp->an, afp->atp_se);
920         }
921       }
922       break;
923     default :
924       break;
925   }
926 
927   SeqLocFree (slp);
928 }
929 
ProcessSingleRecord(CharPtr filename,AppFlagPtr afp)930 static void ProcessSingleRecord (
931   CharPtr filename,
932   AppFlagPtr afp
933 )
934 
935 {
936   AsnIoPtr      aip;
937   BioseqPtr     bsp;
938   ValNodePtr    bsplist;
939   BioseqSetPtr  bssp;
940   Pointer       dataptr = NULL;
941   Uint2         datatype = 0, entityID = 0;
942   FILE          *fp;
943   ObjMgrPtr     omp;
944   Uint2         parenttype;
945   Pointer       parentptr;
946   SeqAnnotPtr   sap;
947   SeqEntryPtr   sep;
948   SeqFeatPtr    sfp;
949   SeqIdPtr      sip;
950   SeqLocPtr     slp;
951 
952   if (afp == NULL) return;
953 
954   if (StringHasNoText (filename)) return;
955 
956   if (afp->type == 1) {
957     fp = FileOpen (filename, "r");
958     if (fp == NULL) {
959       Message (MSG_POSTERR, "Failed to open '%s'", filename);
960       return;
961     }
962 
963     dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
964 
965     FileClose (fp);
966 
967     entityID = ObjMgrRegister (datatype, dataptr);
968 
969   } else if (afp->type >= 2 && afp->type <= 5) {
970     aip = AsnIoOpen (filename, afp->binary? "rb" : "r");
971     if (aip == NULL) {
972       Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", filename);
973       return;
974     }
975 
976     SeqMgrHoldIndexing (TRUE);
977     switch (afp->type) {
978       case 2 :
979         dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
980         datatype = OBJ_SEQENTRY;
981         break;
982       case 3 :
983         dataptr = (Pointer) BioseqAsnRead (aip, NULL);
984         datatype = OBJ_BIOSEQ;
985         break;
986       case 4 :
987         dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
988         datatype = OBJ_BIOSEQSET;
989         break;
990       case 5 :
991         dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
992         datatype = OBJ_SEQSUB;
993         break;
994       default :
995         break;
996     }
997     SeqMgrHoldIndexing (FALSE);
998 
999     AsnIoClose (aip);
1000 
1001     entityID = ObjMgrRegister (datatype, dataptr);
1002 
1003   } else {
1004     Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) afp->type);
1005     return;
1006   }
1007 
1008   if (entityID < 1 || dataptr == NULL) {
1009     Message (MSG_POSTERR, "Data read failed for input file '%s'", filename);
1010     return;
1011   }
1012 
1013   if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
1014         datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
1015 
1016     sep = GetTopSeqEntryForEntityID (entityID);
1017 
1018     if (sep == NULL) {
1019       sep = SeqEntryNew ();
1020       if (sep != NULL) {
1021         if (datatype == OBJ_BIOSEQ) {
1022           bsp = (BioseqPtr) dataptr;
1023           sep->choice = 1;
1024           sep->data.ptrvalue = bsp;
1025           SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
1026         } else if (datatype == OBJ_BIOSEQSET) {
1027           bssp = (BioseqSetPtr) dataptr;
1028           sep->choice = 2;
1029           sep->data.ptrvalue = bssp;
1030           SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
1031         } else {
1032           sep = SeqEntryFree (sep);
1033         }
1034       }
1035       sep = GetTopSeqEntryForEntityID (entityID);
1036     }
1037 
1038     if (sep != NULL) {
1039       bsplist = NULL;
1040       if (afp->lock) {
1041         bsplist = DoLockFarComponents (sep, afp->useThreads);
1042       }
1043 
1044       FormatRecord (sep, afp, bsplist);
1045 
1046       bsplist = UnlockFarComponents (bsplist);
1047     }
1048 
1049   } else if (datatype == OBJ_SEQANNOT && afp->format == TABLE_FORMAT) {
1050 
1051     sap = (SeqAnnotPtr) dataptr;
1052     if (sap != NULL && sap->type == 1) {
1053       sip = NULL;
1054       sfp = (SeqFeatPtr) sap->data;
1055       while (sfp != NULL && sip == NULL) {
1056         slp = SeqLocFindNext (sfp->location, NULL);
1057         while (slp != NULL && sip == NULL) {
1058           sip = SeqLocId (slp);
1059           slp = SeqLocFindNext (sfp->location, slp);
1060         }
1061         sfp = sfp->next;
1062       }
1063       if (sip != NULL) {
1064         sep = SeqEntryNew ();
1065         if (sep != NULL) {
1066           bsp = BioseqNew ();
1067           if (bsp != NULL) {
1068             sep->choice = 1;
1069             sep->data.ptrvalue = (Pointer) bsp;
1070             bsp->id = SeqIdDup (sip);
1071             bsp->repr = Seq_repr_virtual;
1072             bsp->mol = Seq_mol_dna;
1073             bsp->length = INT4_MAX;
1074             bsp->annot = sap;
1075             GetSeqEntryParent (sep, &parentptr, &parenttype);
1076             SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
1077             entityID = ObjMgrGetEntityIDForPointer (bsp);
1078             SeqMgrIndexFeatures (entityID, NULL);
1079             if (afp->nt != NULL) {
1080               BioseqToGnbk (bsp, NULL, FTABLE_FMT, afp->mode, afp->style, 0, 0, 0, NULL, afp->nt);
1081             }
1082             bsp->annot = NULL;
1083           }
1084         }
1085         SeqEntryFree (sep);
1086       }
1087     }
1088 
1089   } else {
1090 
1091     Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
1092   }
1093 
1094   ObjMgrFree (datatype, dataptr);
1095 
1096   omp = ObjMgrGet ();
1097   ObjMgrReapOne (omp);
1098   SeqMgrClearBioseqIndex ();
1099   ObjMgrFreeCache (0);
1100   FreeSeqIdGiCache ();
1101 
1102   SeqEntrySetScope (NULL);
1103 }
1104 
ProcessMultipleRecord(CharPtr filename,AppFlagPtr afp)1105 static void ProcessMultipleRecord (
1106   CharPtr filename,
1107   AppFlagPtr afp
1108 )
1109 
1110 {
1111   AsnIoPtr       aip, aop = NULL;
1112   AsnTypePtr     atp = NULL;
1113   BioseqPtr      bsp;
1114   ValNodePtr     bsplist;
1115   DataVal        dv;
1116   FILE           *fp;
1117   Boolean        io_failure = FALSE;
1118   ObjMgrPtr      omp;
1119   SeqEntryPtr    sep;
1120 #ifdef OS_UNIX
1121   Char           cmmd [256];
1122   CharPtr        gzcatprog;
1123   int            ret;
1124   Boolean        usedPopen = FALSE;
1125 #endif
1126 
1127   if (afp == NULL) return;
1128 
1129   if (StringHasNoText (filename)) return;
1130 
1131 #ifndef OS_UNIX
1132   if (afp->compressed) {
1133     Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
1134     return;
1135   }
1136 #endif
1137 
1138 #ifdef OS_UNIX
1139   if (afp->compressed) {
1140     gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
1141     if (gzcatprog != NULL) {
1142       sprintf (cmmd, "%s %s", gzcatprog, filename);
1143     } else {
1144       ret = system ("gzcat -h >/dev/null 2>&1");
1145       if (ret == 0) {
1146         sprintf (cmmd, "gzcat %s", filename);
1147       } else if (ret == -1) {
1148         Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
1149         return;
1150       } else {
1151         ret = system ("zcat -h >/dev/null 2>&1");
1152         if (ret == 0) {
1153           sprintf (cmmd, "zcat %s", filename);
1154         } else if (ret == -1) {
1155           Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
1156           return;
1157         } else {
1158           Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
1159           return;
1160         }
1161       }
1162     }
1163     fp = popen (cmmd, /* afp->binary? "rb" : */ "r");
1164     usedPopen = TRUE;
1165   } else {
1166     fp = FileOpen (filename, afp->binary? "rb" : "r");
1167   }
1168 #else
1169   fp = FileOpen (filename, afp->binary? "rb" : "r");
1170 #endif
1171   if (fp == NULL) {
1172     Message (MSG_POSTERR, "FileOpen failed for input file '%s'", filename);
1173     return;
1174   }
1175 
1176   aip = AsnIoNew (afp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
1177   if (aip == NULL) {
1178     Message (MSG_POSTERR, "AsnIoNew failed for input file '%s'", filename);
1179     return;
1180   }
1181 
1182   switch (afp->format) {
1183     case ASN_FORMAT :
1184       aop = afp->an;
1185       break;
1186     case XML_FORMAT :
1187       aop = afp->an;
1188       break;
1189     default :
1190       break;
1191   }
1192 
1193   if (afp->type == 4) {
1194     atp = afp->atp_bss;
1195   } else if (afp->type == 5) {
1196     atp = afp->atp_ssp;
1197   }
1198 
1199   if (atp == NULL) {
1200     Message (MSG_POSTERR, "Batch processing type not set properly");
1201     return;
1202   }
1203 
1204   if (aop != NULL) {
1205 
1206     if (afp->format == XML_FORMAT) {
1207       while ((! io_failure) && (atp = AsnReadId (aip, afp->amp, atp)) != NULL) {
1208         if (aip->io_failure) {
1209           io_failure = TRUE;
1210           aip->io_failure = FALSE;
1211         }
1212         if (atp == afp->atp_inst) {
1213           /* converts compressed sequences to iupac like asn2xml */
1214           bsp = BioseqNew ();
1215           BioseqInstAsnRead (bsp, aip, atp);
1216           BioseqInstAsnWrite (bsp, aop, atp);
1217           bsp = BioseqFree (bsp);
1218         } else {
1219           AsnReadVal (aip, atp, &dv);
1220           AsnWrite (aop, atp, &dv);
1221           AsnKillValue (atp, &dv);
1222         }
1223         if (aip->io_failure) {
1224           io_failure = TRUE;
1225           aip->io_failure = FALSE;
1226         }
1227       }
1228     } else {
1229       while ((! io_failure) && (atp = AsnReadId (aip, afp->amp, atp)) != NULL) {
1230         if (aip->io_failure) {
1231           io_failure = TRUE;
1232           aip->io_failure = FALSE;
1233         }
1234 
1235         /*
1236         AsnReadVal (aip, atp, &dv);
1237         AsnWrite (aop, atp, &dv);
1238         AsnKillValue (atp, &dv);
1239         */
1240 
1241         if (atp == afp->atp_se) {
1242 
1243           SeqMgrHoldIndexing (TRUE);
1244           sep = SeqEntryAsnRead (aip, atp);
1245           SeqMgrHoldIndexing (FALSE);
1246 
1247           if (afp->filterArray != NULL) {
1248             afp->go_on = FALSE;
1249             VisitBioseqsInSep (sep, (Pointer) afp, CheckFilter);
1250             if (afp->go_on) {
1251               SeqEntryAsnWrite (sep, aop, atp);
1252             }
1253           } else {
1254             SeqEntryAsnWrite (sep, aop, atp);
1255           }
1256 
1257           SeqEntryFree (sep);
1258           omp = ObjMgrGet ();
1259           ObjMgrReapOne (omp);
1260           SeqMgrClearBioseqIndex ();
1261           ObjMgrFreeCache (0);
1262           FreeSeqIdGiCache ();
1263 
1264           SeqEntrySetScope (NULL);
1265 
1266         } else {
1267 
1268           AsnReadVal (aip, atp, &dv);
1269           AsnWrite (aop, atp, &dv);
1270           AsnKillValue (atp, &dv);
1271         }
1272 
1273         if (aip->io_failure) {
1274           io_failure = TRUE;
1275           aip->io_failure = FALSE;
1276         }
1277       }
1278     }
1279 
1280   } else {
1281 
1282     while ((! io_failure) && (atp = AsnReadId (aip, afp->amp, atp)) != NULL) {
1283       if (aip->io_failure) {
1284         io_failure = TRUE;
1285         aip->io_failure = FALSE;
1286       }
1287       if (atp == afp->atp_se) {
1288 
1289         SeqMgrHoldIndexing (TRUE);
1290         sep = SeqEntryAsnRead (aip, atp);
1291         SeqMgrHoldIndexing (FALSE);
1292 
1293         if (sep != NULL) {
1294           bsplist = NULL;
1295           if (afp->lock) {
1296             bsplist = DoLockFarComponents (sep, afp->useThreads);
1297           }
1298 
1299           FormatRecord (sep, afp, bsplist);
1300 
1301           bsplist = UnlockFarComponents (bsplist);
1302         }
1303 
1304         SeqEntryFree (sep);
1305         omp = ObjMgrGet ();
1306         ObjMgrReapOne (omp);
1307         SeqMgrClearBioseqIndex ();
1308         ObjMgrFreeCache (0);
1309         FreeSeqIdGiCache ();
1310 
1311         SeqEntrySetScope (NULL);
1312 
1313       } else {
1314 
1315         AsnReadVal (aip, atp, NULL);
1316       }
1317 
1318       if (aip->io_failure) {
1319         io_failure = TRUE;
1320         aip->io_failure = FALSE;
1321       }
1322     }
1323   }
1324 
1325   if (aip->io_failure) {
1326     io_failure = TRUE;
1327   }
1328 
1329   if (io_failure) {
1330     Message (MSG_POSTERR, "Asn io_failure for input file '%s'", filename);
1331   }
1332 
1333   AsnIoFree (aip, FALSE);
1334 
1335 #ifdef OS_UNIX
1336   if (usedPopen) {
1337     pclose (fp);
1338   } else {
1339     FileClose (fp);
1340   }
1341 #else
1342   FileClose (fp);
1343 #endif
1344 }
1345 
FormatWrapper(SeqEntryPtr sep,Pointer userdata)1346 static void FormatWrapper (
1347   SeqEntryPtr sep,
1348   Pointer userdata
1349 )
1350 
1351 {
1352   AppFlagPtr  afp;
1353   ValNodePtr  bsplist;
1354 
1355   if (sep == NULL) return;
1356   afp = (AppFlagPtr) userdata;
1357   if (afp == NULL) return;
1358 
1359   bsplist = NULL;
1360   if (afp->lock) {
1361     bsplist = DoLockFarComponents (sep, afp->useThreads);
1362   }
1363 
1364   FormatRecord (sep, afp, bsplist);
1365 
1366   bsplist = UnlockFarComponents (bsplist);
1367 }
1368 
ProcessOneRecord(CharPtr filename,Pointer userdata)1369 static void ProcessOneRecord (
1370   CharPtr filename,
1371   Pointer userdata
1372 )
1373 
1374 {
1375   AppFlagPtr   afp;
1376   Char         buf [8192];
1377   Pointer      dataptr;
1378   Uint2        datatype;
1379   Uint2        entityID;
1380   FILE         *fp;
1381   FILE         *ifp;
1382   size_t       num;
1383   FILE         *ofp;
1384   ObjMgrPtr    omp;
1385   Char         path [PATH_MAX];
1386   SeqEntryPtr  sep;
1387 
1388   if (StringHasNoText (filename)) return;
1389   afp = (AppFlagPtr) userdata;
1390   if (afp == NULL) return;
1391 
1392   if (afp->automatic) {
1393 
1394     ReadSequenceAsnFile (filename, afp->binary, afp->compressed, (Pointer) afp, FormatWrapper);
1395 
1396   } else if (afp->catenated || afp->piped) {
1397 
1398     if (afp->piped) {
1399       ifp = FileOpen (filename, "r");
1400       TmpNam (path);
1401       ofp = FileOpen (path, "w");
1402       if (ifp != NULL && ofp != NULL) {
1403         while ((num = FileRead (buf, 1, sizeof (buf), ifp)) > 0) {
1404           if (! FileWrite (buf, 1, num, ofp)) {
1405             FileClose (ofp);
1406             FileClose (ifp);
1407             FileRemove (path);
1408             return;
1409           }
1410         }
1411       }
1412       FileClose (ofp);
1413       FileClose (ifp);
1414       filename = path;
1415     }
1416 
1417     fp = FileOpen (filename, "r");
1418     if (fp != NULL) {
1419 
1420       SeqMgrHoldIndexing (TRUE);
1421       dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
1422       SeqMgrHoldIndexing (FALSE);
1423 
1424       while (dataptr != NULL) {
1425         sep = GetTopSeqEntryForEntityID (entityID);
1426         FormatWrapper (sep, afp);
1427 
1428         ObjMgrFree (datatype, dataptr);
1429 
1430         omp = ObjMgrGet ();
1431         ObjMgrReapOne (omp);
1432         SeqMgrClearBioseqIndex ();
1433         ObjMgrFreeCache (0);
1434         FreeSeqIdGiCache ();
1435 
1436         SeqEntrySetScope (NULL);
1437 
1438         SeqMgrHoldIndexing (TRUE);
1439         dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
1440         SeqMgrHoldIndexing (FALSE);
1441       }
1442       FileClose (fp);
1443     }
1444 
1445     if (afp->piped) {
1446       FileRemove (path);
1447     }
1448 
1449   } else if (afp->batch) {
1450 
1451     ProcessMultipleRecord (filename, afp);
1452 
1453   } else {
1454 
1455     ProcessSingleRecord (filename, afp);
1456   }
1457 }
1458 
SeqEntryFromAccnOrGi(CharPtr str,AppFlagPtr afp)1459 static SeqEntryPtr SeqEntryFromAccnOrGi (
1460   CharPtr str,
1461   AppFlagPtr afp
1462 )
1463 
1464 {
1465   CharPtr      accn;
1466   BioseqPtr    bsp;
1467   Char         buf [64];
1468   Int4         flags = 0;
1469   Int2         retcode = 0;
1470   SeqEntryPtr  sep = NULL;
1471   SeqIdPtr     sip;
1472   CharPtr      tmp1 = NULL;
1473   CharPtr      tmp2 = NULL;
1474   long int     val;
1475 
1476   if (StringHasNoText (str)) return NULL;
1477   StringNCpy_0 (buf, str, sizeof (buf));
1478   TrimSpacesAroundString (buf);
1479 
1480   accn = buf;
1481   tmp1 = StringChr (accn, ',');
1482   if (tmp1 != NULL) {
1483     *tmp1 = '\0';
1484     tmp1++;
1485     tmp2 = StringChr (tmp1, ',');
1486     if (tmp2 != NULL) {
1487       *tmp2 = '\0';
1488       tmp2++;
1489       if (StringDoesHaveText (tmp2) && sscanf (tmp2, "%ld", &val) == 1) {
1490         flags = (Int4) val;
1491       }
1492     }
1493     if (StringDoesHaveText (tmp1) && sscanf (tmp1, "%ld", &val) == 1) {
1494       retcode = (Int2) val;
1495     }
1496   }
1497 
1498   sip = SeqIdFromPubSeqString (accn);
1499   sep = PubSeqSynchronousQueryId (sip, retcode, flags);
1500 
1501   if (sep != NULL) {
1502     bsp = BioseqFind (sip);
1503     sip = SeqIdFree (sip);
1504     if (bsp != NULL) {
1505       if (afp != NULL) {
1506         if (afp->format == ASN_FORMAT || afp->format == XML_FORMAT) return sep;
1507       }
1508       sep = SeqMgrGetSeqEntryForData ((Pointer) bsp);
1509     }
1510   }
1511   sip = SeqIdFree (sip);
1512 
1513   return sep;
1514 }
1515 
ReadFilterFile(CharPtr filterfile,AppFlagPtr afp)1516 static void ReadFilterFile (
1517   CharPtr filterfile,
1518   AppFlagPtr afp
1519 )
1520 
1521 {
1522   CharPtr PNTR  array;
1523   FileCache     fc;
1524   FILE          *fp;
1525   ValNodePtr    head = NULL;
1526   Int4          i;
1527   ValNodePtr    last = NULL;
1528   Int4          len;
1529   Char          line [1023];
1530   CharPtr       ptr;
1531   CharPtr       str;
1532   Char          tmp [64];
1533   ValNodePtr    vnp;
1534 
1535   if (StringHasNoText (filterfile) || afp == NULL) return;
1536 
1537 
1538   fp = FileOpen (filterfile, "r");
1539   if (fp == NULL) return;
1540 
1541   if (FileCacheSetup (&fc, fp)) {
1542     for (str = FileCacheGetString (&fc, line, sizeof (line));
1543          str != NULL;
1544          str = FileCacheGetString (&fc, line, sizeof (line))) {
1545       TrimSpacesAroundString (str);
1546       if (StringHasNoText (str)) continue;
1547 
1548       if (StringIsAllDigits (str)) {
1549         sprintf (tmp, "%s", str);
1550         str = tmp;
1551       }
1552       ptr = StringChr (str, '.');
1553       if (ptr != NULL) {
1554         *ptr = '\0';
1555       }
1556 
1557       vnp = ValNodeCopyStr (&last, 0, str);
1558       if (head == NULL) {
1559         head = vnp;
1560       }
1561       last = vnp;
1562     }
1563   }
1564 
1565   FileClose (fp);
1566 
1567   if (head == NULL) return;
1568 
1569   if (! ValNodeIsSorted (head, SortVnpByString)) {
1570     head = ValNodeSort (head, SortVnpByString);
1571   }
1572   ValNodeUnique (&head, SortVnpByString, ValNodeFreeData);
1573 
1574   len = ValNodeLen (head);
1575   array = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (len + 1));
1576   if (array == NULL) return;
1577 
1578   for (i = 0, vnp = head; i < len && vnp != NULL; i++, vnp = vnp->next) {
1579     str = (CharPtr) vnp->data.ptrvalue;
1580     if (StringHasNoText (str)) continue;
1581     array [i] = str;
1582   }
1583   afp->filterLen = len;
1584   afp->filterList = head;
1585   afp->filterArray = array;
1586 }
1587 
1588 static CharPtr helpLines [] = {
1589   "asn2all is primarily intended for generating reports from the binary",
1590   "ASN.1 Bioseq-set release files downloaded from the NCBI ftp site",
1591   "(ncbi-asn1 directory). It can produce GenBank and GenPept flatfiles,",
1592   "FASTA sequence files, INSDSet structured XML, TinySeq XML, and 5-column",
1593   "feature table format.",
1594   "",
1595   "The release files (which have .aso.gz suffix), should be uncompressed",
1596   "with gunzip, resulting in files with suffix .aso. For example,",
1597   "gbpri1.aso is the first file in the primate division, so the command",
1598   "",
1599   "  gunzip gbpri1.aso.gz",
1600   "",
1601   "will result in gbpri1.aso being created. The original gbpri1.aso.gz",
1602   "file is removed after successful decompression.",
1603   "",
1604   "In asn2all, the name of the file to be processed is specified by the -i",
1605   "command line argument. Use -a t to indicate that it is a release file",
1606   "and -b to indicate that it is binary ASN.1. A text ASN.1 file obtained",
1607   "from Entrez can be processed by using -a a instead of -a t -b.",
1608   "",
1609   "Nucleotide and protein records can be processed simultaneously. Use the",
1610   "-o argument to indicate the nucleotide output file, and the -v argument",
1611   "for the protein output file.",
1612   "",
1613   "The -f argument determines the format to be generated. Legal values of",
1614   "-f and the resulting formats are:",
1615   "",
1616   "  g  GenBank (nucleotide) or GenPept (protein)",
1617   "  f  FASTA",
1618   "  d  CDS FASTA (nucleotide) or Translated FASTA (protein)",
1619   "  t  5-column feature table",
1620   "  y  TinySet XML",
1621   "  s  INSDSet XML",
1622   "  a  ASN.1 of entire record",
1623   "  x  XML version of entire record",
1624   "",
1625   "The command",
1626   "",
1627   "  asn2all -i gbpri1.aso -a t -b -f g -o gbpri1.nuc -v gbpri1.prt",
1628   "",
1629   "will generate GenBank and GenPept reports from gbpri1.aso.",
1630   NULL
1631 };
1632 
DisplayHelpText(void)1633 static void DisplayHelpText (
1634   void
1635 )
1636 
1637 {
1638   Int2  i;
1639 
1640   for (i = 0; helpLines [i] != NULL; i++) {
1641     printf ("%s\n", helpLines [i]);
1642   }
1643   printf ("\n");
1644 }
1645 
1646 /* Args structure contains command-line arguments */
1647 
1648 typedef enum {
1649   p_argInputPath = 0,
1650   i_argInputFile,
1651   o_argNtOutFile,
1652   v_argAaOutFile,
1653   x_argSuffix,
1654   f_argFormat,
1655   a_argType,
1656   b_argBinary,
1657   c_argCompressed,
1658   r_argRemote,
1659   k_argLocal,
1660   d_argAsnIdx,
1661   l_argLockFar,
1662   T_argThreads,
1663   n_argNear,
1664   s_argSourceDb,
1665   X_argExtended,
1666   G_argRelaxed,
1667   A_argAccession,
1668   F_argFilterFile,
1669   h_argHelp,
1670   J_argFrom,
1671   K_argTo,
1672   M_argStrand,
1673 } Arguments;
1674 
1675 
1676 Args myargs [] = {
1677   {"Path to Files", NULL, NULL, NULL,
1678     TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
1679   {"Input File Name", "stdin", NULL, NULL,
1680     TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
1681   {"Nucleotide Output File Name", NULL, NULL, NULL,
1682     TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
1683   {"Protein Output File Name", NULL, NULL, NULL,
1684     TRUE, 'v', ARG_FILE_OUT, 0.0, 0, NULL},
1685   {"File Selection Suffix", ".aso", NULL, NULL,
1686     TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
1687   {"Format\n"
1688    "      g GenBank/GenPept\n"
1689    "      m GenBank Master Style\n"
1690    "      f FASTA\n"
1691    "      d CDS FASTA\n"
1692    "      e Gene FASTA\n"
1693    "      r Regenerated Defline\n"
1694    "      t Feature Table\n"
1695    "      y TinySet XML\n"
1696    "      s INSDSet XML\n"
1697    "      a ASN.1\n"
1698    "      x XML\n"
1699    "      c Cache Components\n", NULL, NULL, NULL,
1700     TRUE, 'f', ARG_STRING, 0.0, 0, NULL},
1701   {"ASN.1 Type\n"
1702    "      a Automatic\n"
1703    "      c Catenated\n"
1704    "      p Piped\n"
1705    "      z Any\n"
1706    "      e Seq-entry\n"
1707    "      b Bioseq\n"
1708    "      s Bioseq-set\n"
1709    "      m Seq-submit\n"
1710    "      t Batch Bioseq-set\n"
1711    "      u Batch Seq-submit\n", "a", NULL, NULL,
1712     TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
1713   {"Bioseq-set is Binary", "F", NULL, NULL,
1714     TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
1715   {"Bioseq-set is Compressed", "F", NULL, NULL,
1716     TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
1717   {"Remote Fetching", "F", NULL, NULL,
1718     TRUE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
1719   {"Local Fetching", "F", NULL, NULL,
1720     TRUE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
1721   {"Path to Indexed Binary ASN.1 Data", NULL, NULL, NULL,
1722     TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
1723   {"Lock Components in Advance", "F", NULL, NULL,
1724     TRUE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
1725   {"Use Threads", "F", NULL, NULL,
1726     TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
1727   {"Near Fasta Policy\n"
1728    "      a All\n"
1729    "      n Near Only\n"
1730    "      f Far Only\n", "n", NULL, NULL,
1731     TRUE, 'n', ARG_STRING, 0.0, 0, NULL},
1732   {"Source Database\n"
1733    "      a Any\n"
1734    "      w Exclude Segmented Sequences\n", "a", NULL, NULL,
1735     TRUE, 's', ARG_STRING, 0.0, 0, NULL},
1736   {"Extended Qualifier Output", "F", NULL, NULL,
1737     TRUE, 'X', ARG_BOOLEAN, 0.0, 0, NULL},
1738   {"Relaxed Genome Mapping", "F", NULL, NULL,
1739     TRUE, 'G', ARG_BOOLEAN, 0.0, 0, NULL},
1740   {"Accession to Fetch (or Accession,retcode,flags where flags -1 fetches external features)", NULL, NULL, NULL,
1741     TRUE, 'A', ARG_STRING, 0.0, 0, NULL},
1742   {"Accession Filter File", NULL, NULL, NULL,
1743     TRUE, 'F', ARG_FILE_IN, 0.0, 0, NULL},
1744   {"Display Help Message", "F", NULL, NULL,
1745     TRUE, 'h', ARG_BOOLEAN, 0.0, 0, NULL},
1746   {"SeqLoc From", "0", NULL, NULL,
1747     TRUE, 'J', ARG_INT, 0.0, 0, NULL},
1748   {"SeqLoc To", "0", NULL, NULL,
1749     TRUE, 'K', ARG_INT, 0.0, 0, NULL},
1750   {"SeqLoc Minus Strand", "F", NULL, NULL,
1751     TRUE, 'M', ARG_BOOLEAN, 0.0, 0, NULL},
1752 };
1753 
Main(void)1754 Int2 Main (void)
1755 
1756 {
1757   CharPtr      asnin, aaout, directory, suffix, ntout, accn, filterfile, asnidx, str;
1758   AppFlagData  afd;
1759   Char         app [64], format, nearpolicy, type, xmlbuf [128];
1760   DataVal      av;
1761   ValNodePtr   bsplist;
1762   Boolean      help, indexed, local, remote;
1763   SeqEntryPtr  sep;
1764 
1765   /* standard setup */
1766 
1767   ErrSetFatalLevel (SEV_MAX);
1768   ErrClearOptFlags (EO_SHOW_USERSTR);
1769   ErrSetLogfile ("stderr", ELOG_APPEND);
1770   UseLocalAsnloadDataAndErrMsg ();
1771   ErrPathReset ();
1772 
1773   SOCK_SetupSSL(NcbiSetupGnuTls);
1774 
1775   if (! AllObjLoad ()) {
1776     Message (MSG_FATAL, "AllObjLoad failed");
1777     return 1;
1778   }
1779   if (! SubmitAsnLoad ()) {
1780     Message (MSG_FATAL, "SubmitAsnLoad failed");
1781     return 1;
1782   }
1783   if (! FeatDefSetLoad ()) {
1784     Message (MSG_FATAL, "FeatDefSetLoad failed");
1785     return 1;
1786   }
1787   if (! SeqCodeSetLoad ()) {
1788     Message (MSG_FATAL, "SeqCodeSetLoad failed");
1789     return 1;
1790   }
1791   if (! GeneticCodeTableLoad ()) {
1792     Message (MSG_FATAL, "GeneticCodeTableLoad failed");
1793     return 1;
1794   }
1795 
1796   /* process command line arguments */
1797 
1798   sprintf (app, "asn2all %s", ASN2ALL_APPLICATION);
1799   if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
1800     return 0;
1801   }
1802 
1803   /* additional setup modifications */
1804 
1805   help = (Boolean) myargs [h_argHelp].intvalue;
1806   if (help) {
1807     DisplayHelpText ();
1808     return 0;
1809   }
1810 
1811   if (! objgbseqAsnLoad ()) {
1812     Message (MSG_POSTERR, "objgbseqAsnLoad failed");
1813     return 1;
1814   }
1815   if (! objinsdseqAsnLoad ()) {
1816     Message (MSG_POSTERR, "objinsdseqAsnLoad failed");
1817     return 1;
1818   }
1819 
1820   if (GetAppParam ("NCBI", "SETTINGS", "XMLPREFIX", NULL, xmlbuf, sizeof (xmlbuf))) {
1821     AsnSetXMLmodulePrefix (StringSave (xmlbuf));
1822   }
1823 
1824   MemSet ((Pointer) &afd, 0, sizeof (AppFlagData));
1825 
1826   remote = (Boolean ) myargs [r_argRemote].intvalue;
1827   local = (Boolean) myargs [k_argLocal].intvalue;
1828   asnidx = (CharPtr) myargs [d_argAsnIdx].strvalue;
1829   indexed = (Boolean) StringDoesHaveText (asnidx);
1830   accn = (CharPtr) myargs [A_argAccession].strvalue;
1831   filterfile = (CharPtr) myargs [F_argFilterFile].strvalue;
1832 
1833   directory = (CharPtr) myargs [p_argInputPath].strvalue;
1834   asnin = (CharPtr) myargs [i_argInputFile].strvalue;
1835   ntout = (CharPtr) myargs [o_argNtOutFile].strvalue;
1836   aaout = (CharPtr) myargs [v_argAaOutFile].strvalue;
1837   suffix = (CharPtr) myargs [x_argSuffix].strvalue;
1838 
1839   /* default to stdout for nucleotide output if nothing specified */
1840 
1841   if (StringHasNoText (ntout) &&
1842       StringHasNoText (aaout)) {
1843     ntout = "stdout";
1844   }
1845 
1846   /* populate parameter structure */
1847 
1848   afd.automatic = FALSE;
1849   afd.catenated = FALSE;
1850   afd.piped = FALSE;
1851   afd.batch = FALSE;
1852   afd.binary = (Boolean) myargs [b_argBinary].intvalue;
1853   afd.compressed = (Boolean) myargs [c_argCompressed].intvalue;
1854   afd.lock = (Boolean) myargs [l_argLockFar].intvalue;
1855   afd.useThreads = (Boolean) myargs [T_argThreads].intvalue;
1856   afd.type = 1;
1857   afd.linelen = 70;
1858   afd.nearpolicy = 1;
1859   afd.mode = ENTREZ_MODE;
1860   afd.style = NORMAL_STYLE;
1861   afd.extended = (Boolean) myargs [X_argExtended].intvalue;
1862   afd.relaxed = (Boolean) myargs [G_argRelaxed].intvalue;
1863   afd.failed = FALSE;
1864 
1865   str = myargs [f_argFormat].strvalue;
1866   TrimSpacesAroundString (str);
1867   if (StringDoesHaveText (str)) {
1868     format = str [0];
1869   } else {
1870     Message (MSG_POSTERR, "You must indicate a format with the -f parameter");
1871     return 1;
1872   }
1873 
1874   format = TO_LOWER (format);
1875   switch (format) {
1876     case 'g' :
1877       afd.format = FLATFILE_FORMAT;
1878       break;
1879     case 'm' :
1880       afd.format = FLATFILE_FORMAT;
1881       afd.style = MASTER_STYLE;
1882       break;
1883     case 'f' :
1884       afd.format = FASTA_FORMAT;
1885       break;
1886     case 'd' :
1887       afd.format = CDS_FORMAT;
1888       break;
1889     case 'e' :
1890       afd.format = GENE_FORMAT;
1891       break;
1892     case 'r' :
1893       afd.format = DEFLINE_FORMAT;
1894       break;
1895     case 't' :
1896       afd.format = TABLE_FORMAT;
1897       break;
1898     case 'y' :
1899       afd.format = TINY_FORMAT;
1900       break;
1901     case 's' :
1902       afd.format = INSDSEQ_FORMAT;
1903       break;
1904     case 'a' :
1905       afd.format = ASN_FORMAT;
1906       break;
1907     case 'x' :
1908       afd.format = XML_FORMAT;
1909       break;
1910     case 'c' :
1911       afd.format = CACHE_COMPONENTS;
1912       break;
1913     default :
1914       afd.format = FLATFILE_FORMAT;
1915       break;
1916   }
1917 
1918   str = myargs [a_argType].strvalue;
1919   TrimSpacesAroundString (str);
1920   if (StringDoesHaveText (str)) {
1921     type = str [0];
1922   } else {
1923     type = 'a';
1924   }
1925 
1926   type = TO_LOWER (type);
1927   switch (type) {
1928     case 'a' :
1929       afd.type = 1;
1930       afd.automatic = TRUE;
1931       break;
1932     case 'c' :
1933       afd.type = 1;
1934       afd.catenated = TRUE;
1935       break;
1936     case 'p' :
1937       afd.type = 1;
1938       afd.piped = TRUE;
1939       break;
1940     case 'z' :
1941       afd.type = 1;
1942       break;
1943     case 'e' :
1944       afd.type = 2;
1945       break;
1946     case 'b' :
1947       afd.type = 3;
1948       break;
1949     case 's' :
1950       afd.type = 4;
1951       break;
1952     case 'm' :
1953       afd.type = 5;
1954       break;
1955     case 't' :
1956       afd.type = 4;
1957       afd.batch = TRUE;
1958       afd.mode = RELEASE_MODE;
1959       break;
1960     case 'u' :
1961       afd.type = 5;
1962       afd.batch = TRUE;
1963       break;
1964     default :
1965       afd.type = 1;
1966       break;
1967   }
1968 
1969   afd.from = myargs [J_argFrom].intvalue;
1970   afd.to = myargs [K_argTo].intvalue;
1971   if (myargs [M_argStrand].intvalue) {
1972     afd.strand = Seq_strand_minus;
1973   } else {
1974     afd.strand = Seq_strand_plus;
1975   }
1976 
1977   str = myargs [n_argNear].strvalue;
1978   TrimSpacesAroundString (str);
1979   if (StringDoesHaveText (str)) {
1980     nearpolicy = str [0];
1981   } else {
1982     nearpolicy = 'a';
1983   }
1984 
1985   nearpolicy = TO_LOWER (nearpolicy);
1986   switch (nearpolicy) {
1987     case 'a' :
1988       afd.nearpolicy = 1;
1989       break;
1990     case 'n' :
1991       afd.nearpolicy = 2;
1992       break;
1993     case 'f' :
1994       afd.nearpolicy = 3;
1995       break;
1996     default :
1997       afd.nearpolicy = 1;
1998       break;
1999   }
2000 
2001   afd.sourcedb = myargs [s_argSourceDb].strvalue;;
2002 
2003   afd.nt = NULL;
2004   afd.aa = NULL;
2005   afd.an = NULL;
2006   afd.ap = NULL;
2007 
2008   afd.amp = AsnAllModPtr ();
2009   afd.atp_ssp = AsnFind ("Seq-submit");
2010   afd.atp_sbp = AsnFind ("Seq-submit.sub");
2011   afd.atp_bss = AsnFind ("Bioseq-set");
2012   afd.atp_bsss = AsnFind ("Bioseq-set.seq-set");
2013   afd.atp_se = AsnFind ("Bioseq-set.seq-set.E");
2014   afd.atp_inst = AsnFind ("Bioseq.inst");
2015   afd.atp_bsc = AsnFind ("Bioseq-set.class");
2016   afd.bssp_atp = AsnLinkType (NULL, afd.atp_bss);
2017   afd.atp_insd = AsnLinkType (NULL, AsnFind ("INSDSet"));
2018   afd.atp_insde = AsnLinkType (NULL, AsnFind ("INSDSet.E"));
2019   afd.atp_tss = AsnLinkType (NULL, AsnFind ("TSeqSet"));
2020   afd.atp_tsse = AsnLinkType (NULL, AsnFind ("TSeqSet.E"));
2021 
2022   /* open output files */
2023 
2024   switch (afd.format) {
2025     case FLATFILE_FORMAT :
2026     case FASTA_FORMAT :
2027     case CDS_FORMAT :
2028     case GENE_FORMAT :
2029     case DEFLINE_FORMAT :
2030     case TABLE_FORMAT :
2031       if (! StringHasNoText (ntout)) {
2032         afd.nt = FileOpen (ntout, "w");
2033         if (afd.nt == NULL) {
2034           Message (MSG_FATAL, "Unable to open nucleotide output file");
2035           return 1;
2036         }
2037       }
2038       if (! StringHasNoText (aaout)) {
2039         afd.aa = FileOpen (aaout, "w");
2040         if (afd.aa == NULL) {
2041           Message (MSG_FATAL, "Unable to open protein output file");
2042           return 1;
2043         }
2044       }
2045       break;
2046     case TINY_FORMAT :
2047     case INSDSEQ_FORMAT :
2048       if (! StringHasNoText (ntout)) {
2049         afd.an = AsnIoOpen (ntout, "wx");
2050         if (afd.an == NULL) {
2051           Message (MSG_FATAL, "Unable to open nucleotide output file");
2052           return 1;
2053         }
2054       }
2055       if (! StringHasNoText (aaout)) {
2056         afd.ap = AsnIoOpen (aaout, "wx");
2057         if (afd.ap == NULL) {
2058           Message (MSG_FATAL, "Unable to open protein output file");
2059           return 1;
2060         }
2061       }
2062       break;
2063     case ASN_FORMAT :
2064       if (! StringHasNoText (ntout)) {
2065         afd.an = AsnIoOpen (ntout, "w");
2066         if (afd.an == NULL) {
2067           Message (MSG_FATAL, "Unable to open output file");
2068           return 1;
2069         }
2070       }
2071       break;
2072     case XML_FORMAT :
2073       if (! StringHasNoText (ntout)) {
2074         afd.an = AsnIoOpen (ntout, "wx");
2075         if (afd.an == NULL) {
2076           Message (MSG_FATAL, "Unable to open output file");
2077           return 1;
2078         }
2079       }
2080       break;
2081     case CACHE_COMPONENTS :
2082       if (! StringHasNoText (ntout)) {
2083         afd.an = AsnIoOpen (ntout, "wb");
2084         if (afd.an == NULL) {
2085           Message (MSG_FATAL, "Unable to open output file");
2086           return 1;
2087         }
2088       }
2089       break;
2090     default :
2091       break;
2092   }
2093 
2094   /* register fetch functions */
2095 
2096   if (remote) {
2097     PubSeqFetchEnable ();
2098     PubMedFetchEnable ();
2099   }
2100 
2101   if (local) {
2102     LocalSeqFetchInit (FALSE);
2103   }
2104 
2105   if (indexed) {
2106     AsnIndexedLibFetchEnable (asnidx, TRUE);
2107   }
2108 
2109   /* open output structures */
2110 
2111   switch (afd.format) {
2112     case TINY_FORMAT :
2113       if (afd.an != NULL) {
2114         AsnOpenStruct (afd.an, afd.atp_tss, (Pointer) &(afd.tss));
2115       }
2116       if (afd.ap != NULL) {
2117         AsnOpenStruct (afd.ap, afd.atp_tss, (Pointer) &(afd.tss));
2118       }
2119       break;
2120     case INSDSEQ_FORMAT :
2121       if (afd.an != NULL) {
2122         afd.xtran.gbseq  = &(afd.gbsq);
2123         afd.xtran.aip = afd.an;
2124         afd.xtran.atp = afd.atp_insde;
2125         AsnOpenStruct (afd.an, afd.atp_insd, (Pointer) &(afd.gbst));
2126       }
2127       if (afd.ap != NULL) {
2128         afd.xtrap.gbseq  = &(afd.gbsq);
2129         afd.xtrap.aip = afd.ap;
2130         afd.xtrap.atp = afd.atp_insde;
2131         AsnOpenStruct (afd.ap, afd.atp_insd, (Pointer) &(afd.gbst));
2132       }
2133       break;
2134     case CACHE_COMPONENTS :
2135       if (afd.an != NULL) {
2136         AsnOpenStruct (afd.an, afd.bssp_atp, (Pointer) &(afd.bss));
2137         av.intvalue = 7;
2138         AsnWrite (afd.an, afd.atp_bsc, &av);
2139         AsnOpenStruct (afd.an, afd.atp_bsss, (Pointer) &(afd.bss.seq_set));
2140       }
2141       break;
2142     default :
2143       break;
2144   }
2145 
2146   if (StringDoesHaveText (filterfile)) {
2147     ReadFilterFile (filterfile, &afd);
2148   }
2149 
2150   /* process input file or download accession */
2151 
2152   if (StringDoesHaveText (accn)) {
2153 
2154     if (remote) {
2155       sep = SeqEntryFromAccnOrGi (accn, &afd);
2156       if (sep != NULL) {
2157         bsplist = NULL;
2158         if (afd.lock) {
2159           bsplist = DoLockFarComponents (sep, afd.useThreads);
2160         }
2161 
2162         FormatRecord (sep, &afd, bsplist);
2163 
2164         bsplist = UnlockFarComponents (bsplist);
2165 
2166         SeqEntryFree (sep);
2167       }
2168     }
2169 
2170   } else if (StringDoesHaveText (directory)) {
2171 
2172     DirExplore (directory, NULL, suffix, TRUE, ProcessOneRecord, (Pointer) &afd);
2173 
2174   } else {
2175 
2176     ProcessOneRecord (asnin, &afd);
2177   }
2178 
2179   /* close output structures */
2180 
2181   switch (afd.format) {
2182     case TINY_FORMAT :
2183       if (afd.an != NULL) {
2184         AsnCloseStruct (afd.an, afd.atp_tss, NULL);
2185         AsnPrintNewLine (afd.an);
2186       }
2187       if (afd.ap != NULL) {
2188         AsnCloseStruct (afd.ap, afd.atp_tss, NULL);
2189         AsnPrintNewLine (afd.ap);
2190       }
2191       break;
2192     case INSDSEQ_FORMAT :
2193       if (afd.an != NULL) {
2194         AsnCloseStruct (afd.an, afd.atp_insd, NULL);
2195         AsnPrintNewLine (afd.an);
2196       }
2197       if (afd.ap != NULL) {
2198         AsnCloseStruct (afd.ap, afd.atp_insd, NULL);
2199         AsnPrintNewLine (afd.ap);
2200       }
2201       break;
2202     case CACHE_COMPONENTS :
2203       if (afd.an != NULL) {
2204         AsnCloseStruct (afd.an, afd.atp_bsss, (Pointer) &(afd.bss.seq_set));
2205         AsnCloseStruct (afd.an, afd.bssp_atp, (Pointer) &(afd.bss));
2206         AsnPrintNewLine (afd.an);
2207       }
2208       break;
2209     default :
2210       break;
2211   }
2212 
2213   /* close output files */
2214 
2215   if (afd.nt != NULL) {
2216     FileClose (afd.nt);
2217   }
2218   if (afd.aa != NULL) {
2219     FileClose (afd.aa);
2220   }
2221 
2222   if (afd.an != NULL) {
2223     AsnIoClose (afd.an);
2224   }
2225 
2226   if (afd.ap != NULL) {
2227     AsnIoClose (afd.ap);
2228   }
2229 
2230   if (afd.format == CACHE_COMPONENTS) {
2231     CreateAsnIndex (ntout, NULL, TRUE);
2232   }
2233 
2234   if (afd.filterList != NULL) {
2235     ValNodeFreeData (afd.filterList);
2236   }
2237 
2238   /* close fetch functions */
2239 
2240   if (indexed) {
2241     AsnIndexedLibFetchDisable ();
2242   }
2243 
2244   if (local) {
2245     LocalSeqFetchDisable ();
2246   }
2247 
2248   if (remote) {
2249     PubMedFetchDisable ();
2250     PubSeqFetchDisable ();
2251   }
2252 
2253   if (afd.failed) {
2254     return 1;
2255   }
2256 
2257   return 0;
2258 }
2259 
2260