1 /*   cdscan.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  cdscan.c
27 *
28 * Author:  Kans, Schuler, Ostell
29 *
30 * Version Creation Date:   2/26/95
31 *
32 * $Revision: 6.1 $
33 *
34 * File Description:
35 *       scans through sequence records on the Entrez discs
36 *       This program is meant to serve as a model for programs that scan
37 *       all the files on the Entrez CDROM disks. Basically it does some
38 *       setup in the main routine to find the disks and get a list of files
39 *       on them for nucleic acid or protein sequences. It then goes through
40 *       each file and reads each Seq-entry from them. On the Entrez CDROMs
41 *       the Seq-entries are Huffman compressed, so the Casn calls decompress
42 *       them for you. Once you have a Seq-entry, you can do whatever you
43 *       want with it. In this program we give the options of printing as
44 *       FASTA file or as GenBank or GenPept file. You can modify this part
45 *       to do whatever you want. The function that receives the SeqEntry is
46 *       called "ProcessSeqEntry".
47 *
48 *   This function is passed one SeqEntry at a time by the CDROM scanning
49 *     Routines. Depending on the global variables it will call routines to
50 *     make fasta, genbank format, or to call a custom routine. The default
51 *     custom routine (CustomRoutine) just prints the SeqIds of the sequences
52 *     to the outputfile and to the progress monitor.
53 *
54 *   At the end of the file are some custom routines written for various
55 *     purposes that could be substituted or modified for other purposes.
56 *     To activate, call them instead of "CustomRoutine"
57 *
58 *      The first locates all GenBank entries and prints out a short summary
59 *      of their citations. It is called GenBankPubs()
60 *
61 *      The second prints the sequence of all CdRegion features in the entry.
62 *      It is called SeqEntryToFeat()
63 *
64 *
65 *       Before trying any of this, be sure you have installed the Entrez
66 *       application itself and ensured that it works. This program uses the
67 *       the same configuration file and will not run if Entrez has not been
68 *       properly installed.
69 *
70 * Modifications:
71 * --------------------------------------------------------------------------
72 * Date     Name        Description of modification
73 * -------  ----------  -----------------------------------------------------
74 *
75 *
76 * ==========================================================================
77 */
78 
79 #ifndef _NEW_CdEntrez_
80 #define _NEW_CdEntrez_
81 #endif
82 
83 #include <ncbi.h>
84 #include <casn.h>
85 #include <accentr.h>
86 #include <cdromlib.h>
87 #include <seqport.h>
88 #include <asn2ff.h>
89 #include <tofasta.h>
90 
91 /*****************************************************************************
92 *
93 *   structs used by main routines
94 *
95 *****************************************************************************/
96 
97 typedef struct filelist {
98   Int2                  cdnum;
99   CharPtr               fdir;
100   CharPtr               fname;
101   struct filelist PNTR  next;
102 } FileList, PNTR FileListPtr;
103 
104 /*****************************************************************************
105 *
106 *   Function prototypes for routines in this module
107 *
108 *****************************************************************************/
109 static Boolean LIBCALLBACK EnumerateFiles PROTO((int cdnum, const char *fdir,
110                                            const char *fname, long fsize,
111                                            void *opaque_data));
112 
113 static FileListPtr FileListNew PROTO((FileListPtr flp, Int2 cdnum,
114                                 CharPtr fdir, CharPtr fname));
115 
116 static void ProcessFileList PROTO((FileListPtr flp, CharPtr outputfile));
117 
118 static void ProcessFile PROTO((FileListPtr flp, CharPtr root, CharPtr outputfile));
119 
120 static void ProcessSeqEntry PROTO((SeqEntryPtr sep, FILE *fp));
121 
122 static void CustomRoutine PROTO((SeqEntryPtr sep, FILE * fp));
123 
124 static void PrintIdDefLine PROTO((SeqEntryPtr sep, Pointer data,
125                          Int4 index, Int2 indent));
126 
127 /*****************************************************************************
128 *
129 *   Static Data used by the main routines
130 *
131 *****************************************************************************/
132 
133 static Char root [PATH_MAX];
134 static EntrezInfoPtr eip;
135 static EntrezDivInfo *div_info;
136 
137 static Int2 format;    /* 1 = GenBank, 2 = FASTA */
138 static Boolean is_na,  /* TRUE = nucleic acids, FALSE = proteins */
139 	is_custom;  /* call custom process instead of std ones */
140 static MonitorPtr pmon = NULL;   /* progress monitor */
141 
142 #define NUMARGS 5
143 
144 Args myargs [NUMARGS] = {
145   {"Scan DNA (1) or Protein (2)", "1", "1", "2", FALSE, 's', ARG_INT, 0.0, 0, NULL},
146   {"Output format: GenBank (1) or FASTA (2)", "1", "1", "2", TRUE, 'f', ARG_INT, 0.0, 0, NULL},
147   {"Call custom process", "F", NULL, NULL, TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
148   {"Show progress monitor", "F", NULL, NULL, TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL},
149   {"Output File", "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
150 };
151 
152 /*****************************************************************************
153 *
154 *   This is the main program.
155 *     It reads command line or initial dialogue arguments
156 *     It initializes parse trees, entrez
157 *     It makes a list of the relevant files to search
158 *     It then calls ProcessFileList to open each file and process it
159 *
160 *****************************************************************************/
Main(void)161 Int2 Main (void)
162 {
163   Char         div [8];
164   FileListPtr  flp = NULL;
165   FILE         *fp;
166   Int2         i;
167   Boolean      is_network, did_init=FALSE;
168   FileListPtr  next;
169   CharPtr      outputfile;
170 
171   if (! GetArgs ("CdScan", NUMARGS, myargs))   /* get input args */
172 	goto ret;
173 
174 											   /* process input args */
175   if (myargs[0].intvalue == 1)
176   	is_na = TRUE;      /* scan nucleic acids */
177   else
178   	is_na = FALSE;     /* scan proteins */
179   format = (Int2)(myargs[1].intvalue);
180   is_custom = (Boolean)(myargs[2].intvalue);
181   if (myargs[3].intvalue)   /* show progress */
182   {
183   	pmon = MonitorStrNew("CdScan", 40);
184   }
185   outputfile = myargs[4].strvalue;   /* output file name */
186 
187   if (pmon != NULL)
188   	MonitorStrValue(pmon, "Reading Parse Trees");
189 
190   if (! SeqEntryLoad() || ! SubmitAsnLoad())   /* read ASN.1 parse trees */
191   {
192   	Message(MSG_ERROR, "Can't open parse trees");
193   	goto ret;
194   }
195 
196   if (! PrintTemplateSetLoad ("asn2ff.prt"))
197   {
198     Message(MSG_ERROR, "Can't load print templates");
199 	goto ret;
200   }
201 
202   if (pmon != NULL)
203   	MonitorStrValue(pmon, "Initializing Entrez");
204 
205   if (! EntrezInit ("cdscan", FALSE, &is_network)) /* init Entrez */
206   {
207   	Message(MSG_ERROR, "Can't initialize Entrez");
208   	goto ret;
209   }
210 
211   did_init = TRUE;
212   if (is_network)
213   {
214       Message (MSG_ERROR, "Network service does not allow scanning");
215   	goto ret;
216   }
217 
218   if (pmon != NULL)
219   	MonitorStrValue(pmon, "Building File List");
220 
221   eip = EntrezGetInfo ();                        /* set up the file lists */
222   if ((eip == NULL) || (eip->div_info == NULL))
223   {
224   	Message(MSG_ERROR, "Can't find Entrez file info");
225   	goto ret;
226   }
227 
228 
229   flp = FileListNew (NULL, INT2_MIN, NULL, NULL);
230   if (flp == NULL)
231   {
232   	Message(MSG_ERROR, "Can't allocate file list");
233   	goto ret;
234   }
235 
236   div_info = eip->div_info;
237   for (i = 0; i < eip->div_count; i++)
238   {
239   	StringNCpy (div, div_info [i].tag, sizeof (div) - 1);
240   	if (! is_na)
241   	{
242       	CdEnumFiles (CdDir_rec, TYP_AA, div, EnumerateFiles, &flp);
243       }
244   	else
245   	{
246       	CdEnumFiles (CdDir_rec, TYP_NT, div, EnumerateFiles, &flp);
247   	}
248   }
249 
250   fp = FileOpen (outputfile, "w");   /* test that we can open output file */
251   if (fp == NULL)
252   {
253   	Message(MSG_ERROR, "Can't open [%s]", outputfile);
254   	goto ret;
255   }
256 
257   FileClose (fp);     /* will be reopened for each input file */
258 
259   ProcessFileList (flp, outputfile);              /* process the file list */
260 
261 ret:                                                         /* clean up */
262 
263   if (pmon != NULL)     /* close the progress monitor */
264 	MonitorFree(pmon);
265 
266   if (did_init)
267 	  EntrezFini();         /* close entrez */
268 
269   while (flp != NULL)       /* free file list */
270   {
271     next = flp->next;
272     MemFree (flp->fdir);
273     MemFree (flp->fname);
274     MemFree (flp);
275     flp = next;
276   }
277 
278   return 0;
279 }
280 
281 /*****************************************************************************
282 *
283 *   ProcessSeqEntry (sep, fp)
284 *
285 *
286 *   This function is passed one SeqEntry at a time by the CDROM scanning
287 *     Routines. Depending on the global variables it will call routines to
288 *     make fasta, genbank format, or to call a custom routine. The default
289 *     custom routine (CustomRoutine) just prints the SeqIds of the sequences
290 *     to the outputfile and to the progress monitor.
291 *
292 *   At the end of the file are some custom routines written for various
293 *     purposes that could be substituted or modified for other purposes.
294 *     To active, call them instead of "CustomRoutine"
295 *
296 *****************************************************************************/
ProcessSeqEntry(SeqEntryPtr sep,FILE * fp)297 static void ProcessSeqEntry (SeqEntryPtr sep, FILE *fp)
298 
299 {
300   Uint1 fmt;
301 
302   if ((sep == NULL) || (fp == NULL))
303 	return;
304 
305   if (is_custom)
306 	CustomRoutine(sep, fp);    /* this is the one you modify */
307   else if (format == 1)   /* genbank format */
308   {
309 	if (is_na)				  /* defined in asn2ff.h */
310 		fmt = GENBANK_FMT;
311 	else
312 		fmt = GENPEPT_FMT;
313 	SeqEntryToFlat(sep, fp, fmt, RELEASE_MODE);   /* dump like entrez does */
314   }
315   else if (format == 2)   /* fasta format */
316 	SeqEntryToFasta(sep, fp, is_na);   /* defined in tofasta.h */
317 
318   return;
319 }
320 
321 /*****************************************************************************
322 *
323 *   CustomRoutine (sep, fp)
324 *     This is just a little model of a customized routine
325 *     Normally you would replace this with one of your own design
326 *     Some examples follow below. In this routine, it prints the
327 *     SeqId and definition line of each entry it finds using SeqEntryExplore.
328 *
329 *****************************************************************************/
CustomRoutine(SeqEntryPtr sep,FILE * fp)330 static void CustomRoutine (SeqEntryPtr sep, FILE * fp)
331 {
332 
333 	SeqEntryExplore(sep, (Pointer)(fp), PrintIdDefLine);
334 }
335 
336 
337 /*****************************************************************************
338 *
339 *   PrintIdDefLine
340 *     SeqEntryExplore callback routine that prints the seqids and definition
341 *       lines.
342 *
343 *****************************************************************************/
PrintIdDefLine(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)344 static void PrintIdDefLine (SeqEntryPtr sep, Pointer data,
345                          Int4 index, Int2 indent)
346 
347 {
348   BioseqPtr     bsp;
349   FILE * fp;
350   Char buf[40];
351   CharPtr title = NULL;
352 
353   if (IS_Bioseq (sep)) {
354 	*buf = '\0';
355     bsp = (BioseqPtr) sep->data.ptrvalue;
356     fp = (FILE *) data;
357 	title = BioseqGetTitle(bsp);   /* this does not deal with all cases */
358 	SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG);  /* print SeqId */
359 	if (pmon != NULL)
360 		MonitorStrValue(pmon, buf);
361 	fprintf(fp, ">%s", buf);
362 	if (title != NULL)
363 		fprintf(fp, " %s", title);
364 	fprintf(fp, "\n");
365   }
366   return;
367 }
368 
369 
370 /*****************************************************************************
371 *
372 *   Other possible Custom routines follow below.
373 *      The first locates all GenBank entries and prints out a short summary
374 *      of their citations. It is called GenBankPubs
375 *
376 *      The second prints the sequence of all CdRegion features in the entry.
377 *      It is called SeqEntryToFeat()
378 *
379 *****************************************************************************/
380 
381 
382 /*****************************************************************************
383 *
384 *   GenBankPubs(sep, fp)
385 *     This set of routines finds GenBank entries and prints a summary of
386 *     their citations
387 *
388 *****************************************************************************/
389 static void GetPubFromGenBank PROTO((SeqEntryPtr sep, Pointer data,
390                          Int4 index, Int2 indent));
391 
GenBankPubs(SeqEntryPtr sep,FILE * fp)392 static void GenBankPubs (SeqEntryPtr sep, FILE * fp)
393 {
394 
395 	SeqEntryExplore(sep, (Pointer)(fp), GetPubFromGenBank);
396 }
397 
398 
PubWrite(CharPtr accession,ValNodePtr vnp,FILE * fp)399 static void PubWrite(CharPtr accession, ValNodePtr vnp, FILE * fp)
400 {
401 	CitArtPtr cap = NULL;
402 	CitJourPtr cjp;
403 	ValNodePtr tvnp;
404 	ImprintPtr ip;
405 	Int2 year = 0;
406 	CharPtr jta = NULL,
407 		volume,
408 		pages;
409 	Char buf[250];
410 
411 	switch (vnp->choice)
412 	{
413 		case PUB_Equiv:
414 			for (tvnp = (ValNodePtr)(vnp->data.ptrvalue); tvnp != NULL;
415 				tvnp = tvnp->next)
416 			{
417 				PubWrite(accession, tvnp, fp);
418 			}
419 			break;
420 		case PUB_Article:
421 			cap = (CitArtPtr)(vnp->data.ptrvalue);
422 			if (cap->from == 1)  /* from a journal */
423 			{
424 				cjp = (CitJourPtr)(cap->fromptr);
425 				ip = cjp->imp;
426 				for (tvnp = cjp->title; tvnp != NULL; tvnp = tvnp->next)
427 				{
428 					switch (tvnp->choice)
429 					{
430 						case Cit_title_jta:
431 						case Cit_title_iso_jta:
432 						case Cit_title_ml_jta:
433 							jta = (CharPtr)(tvnp->data.ptrvalue);
434 							break;
435 						default:
436 							break;
437 					}
438 					if (jta != NULL) break;
439 				}
440 
441 				if (ip->date->data[0] == 1)  /* std date */
442 					year = (Int2)ip->date->data[1] + 1900;
443 				else
444 					year = 0;
445 				volume = ip->volume;
446 				if (volume == NULL)
447 					volume = "(no volume)";
448 				pages = ip->pages;
449 				if (pages == NULL)
450 					pages = "(no pages)";
451 				if (jta == NULL)
452 					jta = "(no jta)";
453 				sprintf(buf, "%s - %s (%d) %s:%s", accession, jta, (int)year,
454 						volume, pages);
455     			fprintf(fp, "%s\n", buf);
456     			if (pmon != NULL)
457     				MonitorStrValue(pmon, buf);
458 			}
459 			break;
460 		default:
461 			break;
462 
463 	}
464 	return;
465 }
466 
GetPubFromGenBank(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)467 static void GetPubFromGenBank (SeqEntryPtr sep, Pointer data,
468                          Int4 index, Int2 indent)
469 
470 {
471   BioseqPtr     bsp;
472   BioseqContextPtr bcp;
473   SeqFeatPtr sfp;
474   ValNodePtr vnp;
475   FILE * fp;
476   CharPtr accession = NULL;
477   ValNode tpub;
478   TextSeqIdPtr tsip;
479   PubdescPtr pdp;
480   Char buf[40];
481 
482   if (! IS_Bioseq(sep))
483   	return;
484 
485   fp = (FILE *) data;   /* get the output file pointer */
486 
487   bsp = (BioseqPtr) sep->data.ptrvalue;
488   if (pmon != NULL)
489   {
490         *buf = '\0';
491 	SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG);
492         MonitorStrValue(pmon, buf);
493   }
494   for (vnp = bsp->id; ((vnp != NULL) && (accession == NULL)); vnp = vnp->next)
495   {
496   	switch (vnp->choice)
497 	{
498 		case SEQID_GENBANK:
499 		case SEQID_EMBL:
500 		case SEQID_DDBJ:
501 			tsip = (TextSeqIdPtr)(vnp->data.ptrvalue);
502 			if (tsip->accession != NULL)
503 				accession = tsip->accession;
504 			break;
505 		default:
506 			break;
507 	}
508   }
509 
510   if (accession == NULL) return;
511 
512   bcp = BioseqContextNew(bsp);
513   if (bcp == NULL)
514 	return;
515 
516   vnp = NULL;
517   tpub.choice = PUB_Equiv;
518   tpub.next = NULL;
519                                  /* get any pub descriptors */
520 
521   while ((vnp = BioseqContextGetSeqDescr(bcp, (Int2)Seq_descr_pub, vnp, NULL)) != NULL)
522   {
523   	pdp = (PubdescPtr)(vnp->data.ptrvalue);   /* it's a Pubdesc */
524 	tpub.data.ptrvalue = pdp->pub;			  /* make Pub-equiv into a Pub */
525 	PubWrite(accession, &tpub, fp);
526 
527   }
528 
529   sfp = NULL;
530   while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
531   {
532 	if (sfp->data.choice == 6)   /* a pub feature */
533 	{
534 	  	pdp = (PubdescPtr)(sfp->data.value.ptrvalue);
535 		tpub.data.ptrvalue = pdp->pub;
536 		PubWrite(accession, &tpub, fp);
537 	}
538 								 /* get any feature citations */
539 	if (sfp->cit != NULL)
540 	{
541 	  	for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next)
542 			PubWrite(accession, vnp, fp);
543 	}
544 
545   }
546 
547   BioseqContextFree(bcp);
548 
549   return;
550 
551 }
552 
553 
554 /*****************************************************************************
555 *
556 *   SeqEntryToFeat()
557 *     This set of routines finds features (in this case CdRegion) and prints
558 *     the part of the sequence that they cover
559 *
560 *****************************************************************************/
561 
562 
563 #define CHARSPERLINE  50
564 
565 typedef struct expstruct {
566   FILE      *fp;	  /* file to write sequence to */
567   AsnIoPtr  aip;
568   Boolean   is_na;	  /* target sequence nucleic acid? */
569   Uint1     feat;     /* type of feature to find */
570 } ExpStruct, PNTR ExpStructPtr;
571 
572 static void PrintSequence PROTO((BioseqPtr bsp, SeqFeatPtr sfp,
573                            FILE *fp, Boolean is_na));
574 static void LIBCALLBACK GetSeqFeat PROTO((AsnExpOptStructPtr aeosp));
575 
SeqEntryToFeat(SeqEntryPtr sep,FILE * fp)576 static void SeqEntryToFeat (SeqEntryPtr sep, FILE *fp)
577 
578 {
579   AsnExpOptPtr  aeop;
580   AsnIoPtr      aip;
581   ExpStructPtr  esp;
582 
583   if (sep != NULL && fp != NULL) {
584     esp = MemNew (sizeof (ExpStruct));
585     if (esp != NULL) {
586       aip = AsnIoNullOpen ();
587       if (aip != NULL) {
588         esp->fp = fp;
589         esp->aip = AsnIoNew (ASNIO_TEXT_OUT, fp, NULL, NULL, NULL);
590         esp->is_na = is_na;
591         esp->feat = 3;  /* look for CdRegion SeqFeat */
592         aeop = AsnExpOptNew (aip, "Seq-feat", (Pointer) esp, GetSeqFeat);
593         if (aeop != NULL) {
594           SeqEntryAsnWrite (sep, aip, NULL);
595           fflush (fp);
596           AsnExpOptFree (aip, aeop);
597         }
598         AsnIoClose (aip);
599       }
600       MemFree (esp);
601     }
602   }
603 }
604 
605 
PrintSequence(BioseqPtr bsp,SeqFeatPtr sfp,FILE * fp,Boolean is_na)606 static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp,
607                            FILE *fp, Boolean is_na)
608 
609 {
610   Char        buffer [255];
611   Uint1       code;
612   Int2        count;
613   Uint1       repr;
614   Uint1       residue;
615   SeqPortPtr  spp;
616   CharPtr     title;
617   CharPtr     tmp;
618 
619   if (bsp != NULL && fp != NULL) {
620     if ((Boolean) ISA_na (bsp->mol) == is_na) {
621       repr = Bioseq_repr (bsp);
622       if (repr == Seq_repr_raw || repr == Seq_repr_const) {
623         title = BioseqGetTitle (bsp);
624         tmp = StringMove (buffer, ">");
625         tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG);
626         tmp = StringMove (tmp, " ");
627         StringNCpy (tmp, title, 200);
628         fprintf (fp, "%s\n", buffer);
629 		if (pmon != NULL)
630 			MonitorStrValue(pmon, buffer);
631         if (is_na) {
632           code = Seq_code_iupacna;
633         } else {
634           code = Seq_code_iupacaa;
635         }
636         if (sfp != NULL) {
637           spp = SeqPortNewByLoc (sfp->location, code);
638         } else {
639           spp = SeqPortNew (bsp, 0, -1, 0, code);
640         }
641         if (spp != NULL) {
642           count = 0;
643           while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) {
644             if (! IS_residue (residue)) {
645               buffer [count] = '\0';
646               fprintf (fp, "%s\n", buffer);
647               count = 0;
648               switch (residue) {
649                 case SEQPORT_VIRT :
650                   fprintf (fp, "[Gap]\n");
651                   break;
652                 case SEQPORT_EOS :
653                   fprintf (fp, "[EOS]\n");
654                   break;
655                 default :
656                   fprintf (fp, "[Invalid Residue]\n");
657                   break;
658               }
659             } else {
660               buffer [count] = residue;
661               count++;
662               if (count >= CHARSPERLINE) {
663                 buffer [count] = '\0';
664                 fprintf (fp, "%s\n", buffer);
665                 count = 0;
666               }
667             }
668           }
669           if (count != 0) {
670             buffer [count] = '\0';
671             fprintf (fp, "%s\n", buffer);
672           }
673           SeqPortFree (spp);
674         }
675       }
676     }
677   }
678 }
679 
GetSeqFeat(AsnExpOptStructPtr aeosp)680 static void LIBCALLBACK GetSeqFeat (AsnExpOptStructPtr aeosp)
681 
682 {
683   BioseqPtr     bsp;
684   ExpStructPtr  esp;
685   SeqFeatPtr    sfp;
686 
687   if (aeosp->dvp->intvalue == START_STRUCT) {
688     esp = (ExpStructPtr) aeosp->data;
689     sfp = (SeqFeatPtr) aeosp->the_struct;
690     if (esp != NULL && esp->fp != NULL && sfp != NULL &&
691         sfp->data.choice == esp->feat) {
692       bsp = BioseqFind (SeqLocId (sfp->location));
693       if (bsp != NULL) {
694         PrintSequence (bsp, sfp, esp->fp, esp->is_na);
695       }
696     }
697   }
698 }
699 
700 /*****************************************************************************
701 *
702 *   These are the rest of the utility routines for reading the CDROM.
703 *
704 *****************************************************************************/
705 
706 
707 /*****************************************************************************
708 *
709 *   opens a file and reads SeqEntrys
710 *   calls ProcessSeqEntry to do the actual work on it
711 *
712 *****************************************************************************/
ProcessFile(FileListPtr flp,CharPtr root,CharPtr outputfile)713 static void ProcessFile (FileListPtr flp, CharPtr root, CharPtr outputfile)
714 {
715   CASN_Handle  casnh;
716   FILE         *fp;
717   Char         path [PATH_MAX];
718   SeqEntryPtr  sep;
719   CASN_Type    type;
720   Char buf[40];
721   Int4 ctr = 0;
722 
723   if (flp != NULL) {
724     fp = FileOpen (outputfile, "a");
725     if (fp != NULL) {
726 	  if (pmon != NULL)
727 	  {
728 	  	sprintf(path, "Opening [%s]", flp->fname);
729 		MonitorStrValue(pmon, path);
730 	  }
731       StringCpy (path, root);
732       FileBuildPath (path, flp->fdir, NULL);
733       FileBuildPath (path, NULL, flp->fname);
734       if ((casnh = CASN_Open (path)) != NULL) {
735         if (! is_na) {
736           type = CASN_Type_aa;
737         } else {
738           type = CASN_Type_nt;
739         }
740         if (CASN_DocType (casnh) == type) {
741           while ((sep = CASN_NextSeqEntry (casnh)) != NULL) {
742             if (pmon != NULL)
743             {
744 		ctr++;
745 		sprintf(buf, "Processing %s Entry %ld", flp->fname,(long)ctr);
746                 MonitorStrValue(pmon, buf);
747 	    }
748             ProcessSeqEntry (sep, fp);
749             SeqEntryFree (sep);
750           }
751         }
752         CASN_Close (casnh);
753       }
754 	  else
755 		Message(MSG_ERROR, "Can't open [%s]", path);
756       FileClose (fp);
757     } else {
758       Message (MSG_FATAL, "Unable to reopen output file [%s]", outputfile);
759     }
760   }
761 }
762 
763 
764 /*****************************************************************************
765 *
766 *  Mounts the appropriate cdrom
767 *  Calls ProcessFile to Open and read through the file
768 *
769 *****************************************************************************/
ProcessFileList(FileListPtr flp,CharPtr outputfile)770 static void ProcessFileList (FileListPtr flp, CharPtr outputfile)
771 
772 {
773   Int2         device;
774   FileListPtr  next;
775   Char         root [PATH_MAX];
776 
777   if (flp != NULL) {
778     root [0] = '\0';
779     device = flp->cdnum;
780     flp = flp->next;
781     while (flp != NULL) {
782       next = flp->next;
783       if (device != flp->cdnum) {
784         if (! CdMountEntrezVolume (flp->cdnum, root, sizeof (root))) {
785           Message (MSG_FATAL, "CdMountEntrezVolume failed");
786           root [0] = '\0';
787         }
788       }
789       ProcessFile (flp, root, outputfile);
790       device = flp->cdnum;
791       flp = next;
792     }
793   }
794 }
795 
796 /*****************************************************************************
797 *
798 *   Add a new file list element
799 *
800 *****************************************************************************/
FileListNew(FileListPtr flp,Int2 cdnum,CharPtr fdir,CharPtr fname)801 static FileListPtr FileListNew (FileListPtr flp, Int2 cdnum,
802                                 CharPtr fdir, CharPtr fname)
803 
804 {
805   FileListPtr  newnode;
806 
807   newnode = (FileListPtr) MemNew (sizeof (FileList));
808   if (newnode != NULL) {
809     if (flp != NULL) {
810       while (flp->next != NULL && flp->next->cdnum <= cdnum) {
811         flp = flp->next;
812       }
813       newnode->next = flp->next;
814       flp->next = newnode;
815     }
816     newnode->cdnum = cdnum;
817     if (fdir != NULL && *fdir != '\0') {
818       newnode->fdir = StringSave (fdir);
819     }
820     if (fname != NULL && *fname != '\0') {
821       newnode->fname = StringSave (fname);
822     }
823   }
824   return newnode;
825 }
826 
827 /*****************************************************************************
828 *
829 *   Get all appropriate files to search
830 *
831 *****************************************************************************/
EnumerateFiles(int cdnum,const char * fdir,const char * fname,long fsize,void * opaque_data)832 static Boolean LIBCALLBACK EnumerateFiles (int cdnum, const char *fdir,
833                                            const char *fname, long fsize,
834                                            void *opaque_data)
835 
836 {
837   FileListPtr      flp;
838   FileListPtr PNTR head;
839 
840   head = (FileListPtr PNTR) opaque_data;
841   flp = NULL;
842   if (head != NULL) {
843     flp = FileListNew (*head, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
844     if (*head == NULL) {
845       *head = flp;
846     }
847   } else {
848     flp = FileListNew (NULL, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
849   }
850   return TRUE;
851 }
852