1 /* cdscan.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: cdscan.c
27 *
28 * Author: Kans, Schuler, Ostell
29 *
30 * Version Creation Date: 2/26/95
31 *
32 * $Revision: 6.1 $
33 *
34 * File Description:
35 * scans through sequence records on the Entrez discs
36 * This program is meant to serve as a model for programs that scan
37 * all the files on the Entrez CDROM disks. Basically it does some
38 * setup in the main routine to find the disks and get a list of files
39 * on them for nucleic acid or protein sequences. It then goes through
40 * each file and reads each Seq-entry from them. On the Entrez CDROMs
41 * the Seq-entries are Huffman compressed, so the Casn calls decompress
42 * them for you. Once you have a Seq-entry, you can do whatever you
43 * want with it. In this program we give the options of printing as
44 * FASTA file or as GenBank or GenPept file. You can modify this part
45 * to do whatever you want. The function that receives the SeqEntry is
46 * called "ProcessSeqEntry".
47 *
48 * This function is passed one SeqEntry at a time by the CDROM scanning
49 * Routines. Depending on the global variables it will call routines to
50 * make fasta, genbank format, or to call a custom routine. The default
51 * custom routine (CustomRoutine) just prints the SeqIds of the sequences
52 * to the outputfile and to the progress monitor.
53 *
54 * At the end of the file are some custom routines written for various
55 * purposes that could be substituted or modified for other purposes.
56 * To activate, call them instead of "CustomRoutine"
57 *
58 * The first locates all GenBank entries and prints out a short summary
59 * of their citations. It is called GenBankPubs()
60 *
61 * The second prints the sequence of all CdRegion features in the entry.
62 * It is called SeqEntryToFeat()
63 *
64 *
65 * Before trying any of this, be sure you have installed the Entrez
66 * application itself and ensured that it works. This program uses the
67 * the same configuration file and will not run if Entrez has not been
68 * properly installed.
69 *
70 * Modifications:
71 * --------------------------------------------------------------------------
72 * Date Name Description of modification
73 * ------- ---------- -----------------------------------------------------
74 *
75 *
76 * ==========================================================================
77 */
78
79 #ifndef _NEW_CdEntrez_
80 #define _NEW_CdEntrez_
81 #endif
82
83 #include <ncbi.h>
84 #include <casn.h>
85 #include <accentr.h>
86 #include <cdromlib.h>
87 #include <seqport.h>
88 #include <asn2ff.h>
89 #include <tofasta.h>
90
91 /*****************************************************************************
92 *
93 * structs used by main routines
94 *
95 *****************************************************************************/
96
97 typedef struct filelist {
98 Int2 cdnum;
99 CharPtr fdir;
100 CharPtr fname;
101 struct filelist PNTR next;
102 } FileList, PNTR FileListPtr;
103
104 /*****************************************************************************
105 *
106 * Function prototypes for routines in this module
107 *
108 *****************************************************************************/
109 static Boolean LIBCALLBACK EnumerateFiles PROTO((int cdnum, const char *fdir,
110 const char *fname, long fsize,
111 void *opaque_data));
112
113 static FileListPtr FileListNew PROTO((FileListPtr flp, Int2 cdnum,
114 CharPtr fdir, CharPtr fname));
115
116 static void ProcessFileList PROTO((FileListPtr flp, CharPtr outputfile));
117
118 static void ProcessFile PROTO((FileListPtr flp, CharPtr root, CharPtr outputfile));
119
120 static void ProcessSeqEntry PROTO((SeqEntryPtr sep, FILE *fp));
121
122 static void CustomRoutine PROTO((SeqEntryPtr sep, FILE * fp));
123
124 static void PrintIdDefLine PROTO((SeqEntryPtr sep, Pointer data,
125 Int4 index, Int2 indent));
126
127 /*****************************************************************************
128 *
129 * Static Data used by the main routines
130 *
131 *****************************************************************************/
132
133 static Char root [PATH_MAX];
134 static EntrezInfoPtr eip;
135 static EntrezDivInfo *div_info;
136
137 static Int2 format; /* 1 = GenBank, 2 = FASTA */
138 static Boolean is_na, /* TRUE = nucleic acids, FALSE = proteins */
139 is_custom; /* call custom process instead of std ones */
140 static MonitorPtr pmon = NULL; /* progress monitor */
141
142 #define NUMARGS 5
143
144 Args myargs [NUMARGS] = {
145 {"Scan DNA (1) or Protein (2)", "1", "1", "2", FALSE, 's', ARG_INT, 0.0, 0, NULL},
146 {"Output format: GenBank (1) or FASTA (2)", "1", "1", "2", TRUE, 'f', ARG_INT, 0.0, 0, NULL},
147 {"Call custom process", "F", NULL, NULL, TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
148 {"Show progress monitor", "F", NULL, NULL, TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL},
149 {"Output File", "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
150 };
151
152 /*****************************************************************************
153 *
154 * This is the main program.
155 * It reads command line or initial dialogue arguments
156 * It initializes parse trees, entrez
157 * It makes a list of the relevant files to search
158 * It then calls ProcessFileList to open each file and process it
159 *
160 *****************************************************************************/
Main(void)161 Int2 Main (void)
162 {
163 Char div [8];
164 FileListPtr flp = NULL;
165 FILE *fp;
166 Int2 i;
167 Boolean is_network, did_init=FALSE;
168 FileListPtr next;
169 CharPtr outputfile;
170
171 if (! GetArgs ("CdScan", NUMARGS, myargs)) /* get input args */
172 goto ret;
173
174 /* process input args */
175 if (myargs[0].intvalue == 1)
176 is_na = TRUE; /* scan nucleic acids */
177 else
178 is_na = FALSE; /* scan proteins */
179 format = (Int2)(myargs[1].intvalue);
180 is_custom = (Boolean)(myargs[2].intvalue);
181 if (myargs[3].intvalue) /* show progress */
182 {
183 pmon = MonitorStrNew("CdScan", 40);
184 }
185 outputfile = myargs[4].strvalue; /* output file name */
186
187 if (pmon != NULL)
188 MonitorStrValue(pmon, "Reading Parse Trees");
189
190 if (! SeqEntryLoad() || ! SubmitAsnLoad()) /* read ASN.1 parse trees */
191 {
192 Message(MSG_ERROR, "Can't open parse trees");
193 goto ret;
194 }
195
196 if (! PrintTemplateSetLoad ("asn2ff.prt"))
197 {
198 Message(MSG_ERROR, "Can't load print templates");
199 goto ret;
200 }
201
202 if (pmon != NULL)
203 MonitorStrValue(pmon, "Initializing Entrez");
204
205 if (! EntrezInit ("cdscan", FALSE, &is_network)) /* init Entrez */
206 {
207 Message(MSG_ERROR, "Can't initialize Entrez");
208 goto ret;
209 }
210
211 did_init = TRUE;
212 if (is_network)
213 {
214 Message (MSG_ERROR, "Network service does not allow scanning");
215 goto ret;
216 }
217
218 if (pmon != NULL)
219 MonitorStrValue(pmon, "Building File List");
220
221 eip = EntrezGetInfo (); /* set up the file lists */
222 if ((eip == NULL) || (eip->div_info == NULL))
223 {
224 Message(MSG_ERROR, "Can't find Entrez file info");
225 goto ret;
226 }
227
228
229 flp = FileListNew (NULL, INT2_MIN, NULL, NULL);
230 if (flp == NULL)
231 {
232 Message(MSG_ERROR, "Can't allocate file list");
233 goto ret;
234 }
235
236 div_info = eip->div_info;
237 for (i = 0; i < eip->div_count; i++)
238 {
239 StringNCpy (div, div_info [i].tag, sizeof (div) - 1);
240 if (! is_na)
241 {
242 CdEnumFiles (CdDir_rec, TYP_AA, div, EnumerateFiles, &flp);
243 }
244 else
245 {
246 CdEnumFiles (CdDir_rec, TYP_NT, div, EnumerateFiles, &flp);
247 }
248 }
249
250 fp = FileOpen (outputfile, "w"); /* test that we can open output file */
251 if (fp == NULL)
252 {
253 Message(MSG_ERROR, "Can't open [%s]", outputfile);
254 goto ret;
255 }
256
257 FileClose (fp); /* will be reopened for each input file */
258
259 ProcessFileList (flp, outputfile); /* process the file list */
260
261 ret: /* clean up */
262
263 if (pmon != NULL) /* close the progress monitor */
264 MonitorFree(pmon);
265
266 if (did_init)
267 EntrezFini(); /* close entrez */
268
269 while (flp != NULL) /* free file list */
270 {
271 next = flp->next;
272 MemFree (flp->fdir);
273 MemFree (flp->fname);
274 MemFree (flp);
275 flp = next;
276 }
277
278 return 0;
279 }
280
281 /*****************************************************************************
282 *
283 * ProcessSeqEntry (sep, fp)
284 *
285 *
286 * This function is passed one SeqEntry at a time by the CDROM scanning
287 * Routines. Depending on the global variables it will call routines to
288 * make fasta, genbank format, or to call a custom routine. The default
289 * custom routine (CustomRoutine) just prints the SeqIds of the sequences
290 * to the outputfile and to the progress monitor.
291 *
292 * At the end of the file are some custom routines written for various
293 * purposes that could be substituted or modified for other purposes.
294 * To active, call them instead of "CustomRoutine"
295 *
296 *****************************************************************************/
ProcessSeqEntry(SeqEntryPtr sep,FILE * fp)297 static void ProcessSeqEntry (SeqEntryPtr sep, FILE *fp)
298
299 {
300 Uint1 fmt;
301
302 if ((sep == NULL) || (fp == NULL))
303 return;
304
305 if (is_custom)
306 CustomRoutine(sep, fp); /* this is the one you modify */
307 else if (format == 1) /* genbank format */
308 {
309 if (is_na) /* defined in asn2ff.h */
310 fmt = GENBANK_FMT;
311 else
312 fmt = GENPEPT_FMT;
313 SeqEntryToFlat(sep, fp, fmt, RELEASE_MODE); /* dump like entrez does */
314 }
315 else if (format == 2) /* fasta format */
316 SeqEntryToFasta(sep, fp, is_na); /* defined in tofasta.h */
317
318 return;
319 }
320
321 /*****************************************************************************
322 *
323 * CustomRoutine (sep, fp)
324 * This is just a little model of a customized routine
325 * Normally you would replace this with one of your own design
326 * Some examples follow below. In this routine, it prints the
327 * SeqId and definition line of each entry it finds using SeqEntryExplore.
328 *
329 *****************************************************************************/
CustomRoutine(SeqEntryPtr sep,FILE * fp)330 static void CustomRoutine (SeqEntryPtr sep, FILE * fp)
331 {
332
333 SeqEntryExplore(sep, (Pointer)(fp), PrintIdDefLine);
334 }
335
336
337 /*****************************************************************************
338 *
339 * PrintIdDefLine
340 * SeqEntryExplore callback routine that prints the seqids and definition
341 * lines.
342 *
343 *****************************************************************************/
PrintIdDefLine(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)344 static void PrintIdDefLine (SeqEntryPtr sep, Pointer data,
345 Int4 index, Int2 indent)
346
347 {
348 BioseqPtr bsp;
349 FILE * fp;
350 Char buf[40];
351 CharPtr title = NULL;
352
353 if (IS_Bioseq (sep)) {
354 *buf = '\0';
355 bsp = (BioseqPtr) sep->data.ptrvalue;
356 fp = (FILE *) data;
357 title = BioseqGetTitle(bsp); /* this does not deal with all cases */
358 SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG); /* print SeqId */
359 if (pmon != NULL)
360 MonitorStrValue(pmon, buf);
361 fprintf(fp, ">%s", buf);
362 if (title != NULL)
363 fprintf(fp, " %s", title);
364 fprintf(fp, "\n");
365 }
366 return;
367 }
368
369
370 /*****************************************************************************
371 *
372 * Other possible Custom routines follow below.
373 * The first locates all GenBank entries and prints out a short summary
374 * of their citations. It is called GenBankPubs
375 *
376 * The second prints the sequence of all CdRegion features in the entry.
377 * It is called SeqEntryToFeat()
378 *
379 *****************************************************************************/
380
381
382 /*****************************************************************************
383 *
384 * GenBankPubs(sep, fp)
385 * This set of routines finds GenBank entries and prints a summary of
386 * their citations
387 *
388 *****************************************************************************/
389 static void GetPubFromGenBank PROTO((SeqEntryPtr sep, Pointer data,
390 Int4 index, Int2 indent));
391
GenBankPubs(SeqEntryPtr sep,FILE * fp)392 static void GenBankPubs (SeqEntryPtr sep, FILE * fp)
393 {
394
395 SeqEntryExplore(sep, (Pointer)(fp), GetPubFromGenBank);
396 }
397
398
PubWrite(CharPtr accession,ValNodePtr vnp,FILE * fp)399 static void PubWrite(CharPtr accession, ValNodePtr vnp, FILE * fp)
400 {
401 CitArtPtr cap = NULL;
402 CitJourPtr cjp;
403 ValNodePtr tvnp;
404 ImprintPtr ip;
405 Int2 year = 0;
406 CharPtr jta = NULL,
407 volume,
408 pages;
409 Char buf[250];
410
411 switch (vnp->choice)
412 {
413 case PUB_Equiv:
414 for (tvnp = (ValNodePtr)(vnp->data.ptrvalue); tvnp != NULL;
415 tvnp = tvnp->next)
416 {
417 PubWrite(accession, tvnp, fp);
418 }
419 break;
420 case PUB_Article:
421 cap = (CitArtPtr)(vnp->data.ptrvalue);
422 if (cap->from == 1) /* from a journal */
423 {
424 cjp = (CitJourPtr)(cap->fromptr);
425 ip = cjp->imp;
426 for (tvnp = cjp->title; tvnp != NULL; tvnp = tvnp->next)
427 {
428 switch (tvnp->choice)
429 {
430 case Cit_title_jta:
431 case Cit_title_iso_jta:
432 case Cit_title_ml_jta:
433 jta = (CharPtr)(tvnp->data.ptrvalue);
434 break;
435 default:
436 break;
437 }
438 if (jta != NULL) break;
439 }
440
441 if (ip->date->data[0] == 1) /* std date */
442 year = (Int2)ip->date->data[1] + 1900;
443 else
444 year = 0;
445 volume = ip->volume;
446 if (volume == NULL)
447 volume = "(no volume)";
448 pages = ip->pages;
449 if (pages == NULL)
450 pages = "(no pages)";
451 if (jta == NULL)
452 jta = "(no jta)";
453 sprintf(buf, "%s - %s (%d) %s:%s", accession, jta, (int)year,
454 volume, pages);
455 fprintf(fp, "%s\n", buf);
456 if (pmon != NULL)
457 MonitorStrValue(pmon, buf);
458 }
459 break;
460 default:
461 break;
462
463 }
464 return;
465 }
466
GetPubFromGenBank(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)467 static void GetPubFromGenBank (SeqEntryPtr sep, Pointer data,
468 Int4 index, Int2 indent)
469
470 {
471 BioseqPtr bsp;
472 BioseqContextPtr bcp;
473 SeqFeatPtr sfp;
474 ValNodePtr vnp;
475 FILE * fp;
476 CharPtr accession = NULL;
477 ValNode tpub;
478 TextSeqIdPtr tsip;
479 PubdescPtr pdp;
480 Char buf[40];
481
482 if (! IS_Bioseq(sep))
483 return;
484
485 fp = (FILE *) data; /* get the output file pointer */
486
487 bsp = (BioseqPtr) sep->data.ptrvalue;
488 if (pmon != NULL)
489 {
490 *buf = '\0';
491 SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG);
492 MonitorStrValue(pmon, buf);
493 }
494 for (vnp = bsp->id; ((vnp != NULL) && (accession == NULL)); vnp = vnp->next)
495 {
496 switch (vnp->choice)
497 {
498 case SEQID_GENBANK:
499 case SEQID_EMBL:
500 case SEQID_DDBJ:
501 tsip = (TextSeqIdPtr)(vnp->data.ptrvalue);
502 if (tsip->accession != NULL)
503 accession = tsip->accession;
504 break;
505 default:
506 break;
507 }
508 }
509
510 if (accession == NULL) return;
511
512 bcp = BioseqContextNew(bsp);
513 if (bcp == NULL)
514 return;
515
516 vnp = NULL;
517 tpub.choice = PUB_Equiv;
518 tpub.next = NULL;
519 /* get any pub descriptors */
520
521 while ((vnp = BioseqContextGetSeqDescr(bcp, (Int2)Seq_descr_pub, vnp, NULL)) != NULL)
522 {
523 pdp = (PubdescPtr)(vnp->data.ptrvalue); /* it's a Pubdesc */
524 tpub.data.ptrvalue = pdp->pub; /* make Pub-equiv into a Pub */
525 PubWrite(accession, &tpub, fp);
526
527 }
528
529 sfp = NULL;
530 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
531 {
532 if (sfp->data.choice == 6) /* a pub feature */
533 {
534 pdp = (PubdescPtr)(sfp->data.value.ptrvalue);
535 tpub.data.ptrvalue = pdp->pub;
536 PubWrite(accession, &tpub, fp);
537 }
538 /* get any feature citations */
539 if (sfp->cit != NULL)
540 {
541 for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next)
542 PubWrite(accession, vnp, fp);
543 }
544
545 }
546
547 BioseqContextFree(bcp);
548
549 return;
550
551 }
552
553
554 /*****************************************************************************
555 *
556 * SeqEntryToFeat()
557 * This set of routines finds features (in this case CdRegion) and prints
558 * the part of the sequence that they cover
559 *
560 *****************************************************************************/
561
562
563 #define CHARSPERLINE 50
564
565 typedef struct expstruct {
566 FILE *fp; /* file to write sequence to */
567 AsnIoPtr aip;
568 Boolean is_na; /* target sequence nucleic acid? */
569 Uint1 feat; /* type of feature to find */
570 } ExpStruct, PNTR ExpStructPtr;
571
572 static void PrintSequence PROTO((BioseqPtr bsp, SeqFeatPtr sfp,
573 FILE *fp, Boolean is_na));
574 static void LIBCALLBACK GetSeqFeat PROTO((AsnExpOptStructPtr aeosp));
575
SeqEntryToFeat(SeqEntryPtr sep,FILE * fp)576 static void SeqEntryToFeat (SeqEntryPtr sep, FILE *fp)
577
578 {
579 AsnExpOptPtr aeop;
580 AsnIoPtr aip;
581 ExpStructPtr esp;
582
583 if (sep != NULL && fp != NULL) {
584 esp = MemNew (sizeof (ExpStruct));
585 if (esp != NULL) {
586 aip = AsnIoNullOpen ();
587 if (aip != NULL) {
588 esp->fp = fp;
589 esp->aip = AsnIoNew (ASNIO_TEXT_OUT, fp, NULL, NULL, NULL);
590 esp->is_na = is_na;
591 esp->feat = 3; /* look for CdRegion SeqFeat */
592 aeop = AsnExpOptNew (aip, "Seq-feat", (Pointer) esp, GetSeqFeat);
593 if (aeop != NULL) {
594 SeqEntryAsnWrite (sep, aip, NULL);
595 fflush (fp);
596 AsnExpOptFree (aip, aeop);
597 }
598 AsnIoClose (aip);
599 }
600 MemFree (esp);
601 }
602 }
603 }
604
605
PrintSequence(BioseqPtr bsp,SeqFeatPtr sfp,FILE * fp,Boolean is_na)606 static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp,
607 FILE *fp, Boolean is_na)
608
609 {
610 Char buffer [255];
611 Uint1 code;
612 Int2 count;
613 Uint1 repr;
614 Uint1 residue;
615 SeqPortPtr spp;
616 CharPtr title;
617 CharPtr tmp;
618
619 if (bsp != NULL && fp != NULL) {
620 if ((Boolean) ISA_na (bsp->mol) == is_na) {
621 repr = Bioseq_repr (bsp);
622 if (repr == Seq_repr_raw || repr == Seq_repr_const) {
623 title = BioseqGetTitle (bsp);
624 tmp = StringMove (buffer, ">");
625 tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG);
626 tmp = StringMove (tmp, " ");
627 StringNCpy (tmp, title, 200);
628 fprintf (fp, "%s\n", buffer);
629 if (pmon != NULL)
630 MonitorStrValue(pmon, buffer);
631 if (is_na) {
632 code = Seq_code_iupacna;
633 } else {
634 code = Seq_code_iupacaa;
635 }
636 if (sfp != NULL) {
637 spp = SeqPortNewByLoc (sfp->location, code);
638 } else {
639 spp = SeqPortNew (bsp, 0, -1, 0, code);
640 }
641 if (spp != NULL) {
642 count = 0;
643 while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) {
644 if (! IS_residue (residue)) {
645 buffer [count] = '\0';
646 fprintf (fp, "%s\n", buffer);
647 count = 0;
648 switch (residue) {
649 case SEQPORT_VIRT :
650 fprintf (fp, "[Gap]\n");
651 break;
652 case SEQPORT_EOS :
653 fprintf (fp, "[EOS]\n");
654 break;
655 default :
656 fprintf (fp, "[Invalid Residue]\n");
657 break;
658 }
659 } else {
660 buffer [count] = residue;
661 count++;
662 if (count >= CHARSPERLINE) {
663 buffer [count] = '\0';
664 fprintf (fp, "%s\n", buffer);
665 count = 0;
666 }
667 }
668 }
669 if (count != 0) {
670 buffer [count] = '\0';
671 fprintf (fp, "%s\n", buffer);
672 }
673 SeqPortFree (spp);
674 }
675 }
676 }
677 }
678 }
679
GetSeqFeat(AsnExpOptStructPtr aeosp)680 static void LIBCALLBACK GetSeqFeat (AsnExpOptStructPtr aeosp)
681
682 {
683 BioseqPtr bsp;
684 ExpStructPtr esp;
685 SeqFeatPtr sfp;
686
687 if (aeosp->dvp->intvalue == START_STRUCT) {
688 esp = (ExpStructPtr) aeosp->data;
689 sfp = (SeqFeatPtr) aeosp->the_struct;
690 if (esp != NULL && esp->fp != NULL && sfp != NULL &&
691 sfp->data.choice == esp->feat) {
692 bsp = BioseqFind (SeqLocId (sfp->location));
693 if (bsp != NULL) {
694 PrintSequence (bsp, sfp, esp->fp, esp->is_na);
695 }
696 }
697 }
698 }
699
700 /*****************************************************************************
701 *
702 * These are the rest of the utility routines for reading the CDROM.
703 *
704 *****************************************************************************/
705
706
707 /*****************************************************************************
708 *
709 * opens a file and reads SeqEntrys
710 * calls ProcessSeqEntry to do the actual work on it
711 *
712 *****************************************************************************/
ProcessFile(FileListPtr flp,CharPtr root,CharPtr outputfile)713 static void ProcessFile (FileListPtr flp, CharPtr root, CharPtr outputfile)
714 {
715 CASN_Handle casnh;
716 FILE *fp;
717 Char path [PATH_MAX];
718 SeqEntryPtr sep;
719 CASN_Type type;
720 Char buf[40];
721 Int4 ctr = 0;
722
723 if (flp != NULL) {
724 fp = FileOpen (outputfile, "a");
725 if (fp != NULL) {
726 if (pmon != NULL)
727 {
728 sprintf(path, "Opening [%s]", flp->fname);
729 MonitorStrValue(pmon, path);
730 }
731 StringCpy (path, root);
732 FileBuildPath (path, flp->fdir, NULL);
733 FileBuildPath (path, NULL, flp->fname);
734 if ((casnh = CASN_Open (path)) != NULL) {
735 if (! is_na) {
736 type = CASN_Type_aa;
737 } else {
738 type = CASN_Type_nt;
739 }
740 if (CASN_DocType (casnh) == type) {
741 while ((sep = CASN_NextSeqEntry (casnh)) != NULL) {
742 if (pmon != NULL)
743 {
744 ctr++;
745 sprintf(buf, "Processing %s Entry %ld", flp->fname,(long)ctr);
746 MonitorStrValue(pmon, buf);
747 }
748 ProcessSeqEntry (sep, fp);
749 SeqEntryFree (sep);
750 }
751 }
752 CASN_Close (casnh);
753 }
754 else
755 Message(MSG_ERROR, "Can't open [%s]", path);
756 FileClose (fp);
757 } else {
758 Message (MSG_FATAL, "Unable to reopen output file [%s]", outputfile);
759 }
760 }
761 }
762
763
764 /*****************************************************************************
765 *
766 * Mounts the appropriate cdrom
767 * Calls ProcessFile to Open and read through the file
768 *
769 *****************************************************************************/
ProcessFileList(FileListPtr flp,CharPtr outputfile)770 static void ProcessFileList (FileListPtr flp, CharPtr outputfile)
771
772 {
773 Int2 device;
774 FileListPtr next;
775 Char root [PATH_MAX];
776
777 if (flp != NULL) {
778 root [0] = '\0';
779 device = flp->cdnum;
780 flp = flp->next;
781 while (flp != NULL) {
782 next = flp->next;
783 if (device != flp->cdnum) {
784 if (! CdMountEntrezVolume (flp->cdnum, root, sizeof (root))) {
785 Message (MSG_FATAL, "CdMountEntrezVolume failed");
786 root [0] = '\0';
787 }
788 }
789 ProcessFile (flp, root, outputfile);
790 device = flp->cdnum;
791 flp = next;
792 }
793 }
794 }
795
796 /*****************************************************************************
797 *
798 * Add a new file list element
799 *
800 *****************************************************************************/
FileListNew(FileListPtr flp,Int2 cdnum,CharPtr fdir,CharPtr fname)801 static FileListPtr FileListNew (FileListPtr flp, Int2 cdnum,
802 CharPtr fdir, CharPtr fname)
803
804 {
805 FileListPtr newnode;
806
807 newnode = (FileListPtr) MemNew (sizeof (FileList));
808 if (newnode != NULL) {
809 if (flp != NULL) {
810 while (flp->next != NULL && flp->next->cdnum <= cdnum) {
811 flp = flp->next;
812 }
813 newnode->next = flp->next;
814 flp->next = newnode;
815 }
816 newnode->cdnum = cdnum;
817 if (fdir != NULL && *fdir != '\0') {
818 newnode->fdir = StringSave (fdir);
819 }
820 if (fname != NULL && *fname != '\0') {
821 newnode->fname = StringSave (fname);
822 }
823 }
824 return newnode;
825 }
826
827 /*****************************************************************************
828 *
829 * Get all appropriate files to search
830 *
831 *****************************************************************************/
EnumerateFiles(int cdnum,const char * fdir,const char * fname,long fsize,void * opaque_data)832 static Boolean LIBCALLBACK EnumerateFiles (int cdnum, const char *fdir,
833 const char *fname, long fsize,
834 void *opaque_data)
835
836 {
837 FileListPtr flp;
838 FileListPtr PNTR head;
839
840 head = (FileListPtr PNTR) opaque_data;
841 flp = NULL;
842 if (head != NULL) {
843 flp = FileListNew (*head, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
844 if (*head == NULL) {
845 *head = flp;
846 }
847 } else {
848 flp = FileListNew (NULL, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
849 }
850 return TRUE;
851 }
852