1 /*   entrcmd.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  entrcmd.c
27 *
28 * Author:  Epstein
29 *
30 * Version Creation Date:   1/4/94
31 *
32 * $Revision: 6.5 $
33 *
34 * File Description:
35 *       non-interactive command line interface for Entrez
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date     Name        Description of modification
40 * -------  ----------  -----------------------------------------------------
41 *
42 * $Log: entrcmd.c,v $
43 * Revision 6.5  2011/12/19 18:40:17  gouriano
44 * Corrected printf formatting. NOJIRA
45 *
46 * Revision 6.4  1999/08/11 18:58:09  kans
47 * changed FindNuc and FindProt to avoid collision with sequtil functions
48 *
49 * Revision 6.3  1998/08/24 20:43:42  kans
50 * fixed -v -fd warnings
51 *
52 * Revision 6.2  1997/12/10 13:48:44  kans
53 * removed call to SeqEntryToFile
54 *
55 * Revision 6.1  1997/11/04 21:02:44  epstein
56 * change stray stdout to master_fp
57 *
58 * Revision 6.0  1997/08/25 18:19:34  madden
59 * Revision changed to 6.0
60 *
61 * Revision 5.7  1997/07/21 16:12:39  epstein
62 * CONVERT to new format for exported list of identifiers
63 *
64 * Revision 5.6  1997/07/14 18:24:20  epstein
65 * add complexity argument 'y'
66 *
67 * Revision 5.5  1997/03/21 18:41:46  epstein
68 * retrieve correct genome sequences
69 *
70  * Revision 5.4  1997/03/10  19:33:45  epstein
71  * add Genomes support
72  *
73  * Revision 5.3  1996/10/24  15:49:27  epstein
74  * add -r option to fetch entries from ID
75  *
76  * Revision 5.2  1996/06/11  15:16:54  epstein
77  * remove another artificial 32K boundary
78  *
79  * Revision 5.1  1996/05/31  19:27:46  epstein
80  * eradicate 32K UID limitations, as much as possible
81  *
82  * Revision 4.4  1996/03/19  17:08:41  epstein
83  * remove stray printfs
84  *
85  * Revision 4.3  1996/02/21  22:09:16  epstein
86  * add EntrezBioseqFetchEnable/Disable() to fix GBFF outputs
87  *
88  * Revision 4.2  1995/09/18  18:18:52  epstein
89  * add GenPept format
90  *
91  * Revision 4.1  1995/08/21  19:41:14  epstein
92  * add cluster analysis
93  *
94  * Revision 4.0  1995/07/26  13:54:26  ostell
95  * force revision to 4.0
96  *
97  * Revision 1.30  1995/07/20  18:58:15  epstein
98  * use new SeqIdWrite function
99  *
100  * Revision 1.29  1995/06/19  21:42:11  kans
101  * changed asn2ff_entrez to SeqEntryToFlat
102  *
103  * Revision 1.28  1995/05/15  01:29:58  ostell
104  * added newline to end of file
105  *
106  * Revision 1.27  1995/05/15  01:28:44  ostell
107  * Fixed Callbacks prototypes to SeqEntryExplore
108  *
109 *
110 * ==========================================================================
111 */
112 
113 #include <ncbi.h>
114 #include <accentr.h>
115 #include <accutils.h>
116 #include <tofasta.h>
117 #include <tomedlin.h>
118 #include <asn2ff.h>
119 
120 Args myargs[] = {
121         {"Initial database", "m",NULL, NULL, TRUE,'d',ARG_STRING,0.0,0,NULL},
122         {"Boolean expression", NULL, NULL, NULL, TRUE, 'e', ARG_STRING, 0.0,0,NULL},
123         {"Comma-delimited list of UIDs", NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0,0,NULL},
124         {"Program of commands", NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0,0,NULL},
125         {"Display status report", "F", NULL, NULL, TRUE, 's', ARG_BOOLEAN, 0.0,0,NULL},
126         {"Produce WWW/HTML formatted output (recommended value is /htbin)", NULL, NULL, NULL, TRUE, 'w', ARG_STRING, 0.0,0,NULL},
127         {"Detailed help", "F", NULL, NULL, TRUE, 'h', ARG_BOOLEAN, 0.0,0,NULL},
128         {"For WWW output, use Forms", "F", NULL, NULL, TRUE, 'f', ARG_BOOLEAN, 0.0,0,NULL},
129         {"'Check' WWW output Forms", "F", NULL, NULL, TRUE, 'c', ARG_BOOLEAN, 0.0,0,NULL},
130         {"Name of export file for named UID list", NULL, NULL, NULL, TRUE, 'x', ARG_STRING,0.0,0,NULL},
131         {"Comma-delimited list of files to import for named UID list", NULL, NULL, NULL, TRUE, 'i', ARG_STRING,0.0,0,NULL},
132         {"Produce a list of terms (term)", NULL, NULL, NULL, TRUE, 't', ARG_STRING, 0.0,0,NULL},
133         {"Taxonomy lookup", NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0,0,NULL},
134         {"On-the-fly neighboring", NULL, NULL, NULL, TRUE, 'n', ARG_FILE_IN, 0.0,0,NULL},
135         {"Output file", "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0,0,NULL},
136         {"Use WWW-style encoding for special input characters", "T", NULL, NULL, TRUE, 'g', ARG_BOOLEAN, 0.0,0,NULL},
137         {"Get sequences from ID Repository", "F", NULL, NULL, TRUE, 'r', ARG_BOOLEAN, 0.0,0,NULL},
138         {"Complexity (1=bioseq only, 2=bioseq set, 3=nuc-prot set)", "3", NULL, NULL, TRUE, 'y', ARG_INT, 0.0,0,NULL}
139 };
140 
141 #define ENTREZ_FLD_MNEMONIC_LENGTH 4
142 #define DEFAULT_TERMLIST_LEN      40
143 
144 #define DISPLAY_SPECIAL_AND_TOTAL 1
145 #define DISPLAY_TOTAL_ONLY        2
146 #define DISPLAY_TERM_ONLY         3
147 
148 typedef struct savlist {
149     CharPtr name;
150     Int4Ptr uids;
151     DocType db;
152     Int2 num;
153 } SavList, PNTR SavListPtr;
154 
155 typedef struct term_and_counts {
156     CharPtr term;
157     Int4 special;
158     Int4 total;
159 } TermAndCounts, PNTR TermAndCountsPtr;
160 
161 typedef struct {
162     Uint4 num;
163     DocUidPtr uids;
164     Int4Ptr weights;
165 } * LocalLinkSetPtr;
166 
167 
168 static CharPtr wwwPrefix = NULL;
169 static CharPtr theTerm = NULL;
170 static Boolean useForms = FALSE;
171 static Boolean checkForms = FALSE;
172 static Int2    numTerms;
173 static Int2    termsBefore;
174 static FILE *  exportFilePtr = NULL;
175 static LocalLinkSetPtr pubLsp = NULL;
176 static Int2    termDisplay = DISPLAY_TERM_ONLY;
177 static FILE *  master_fp = NULL;
178 static Int2    seqEntryRetval = 3;
179 
LocalLinkSetNew(void)180 static LocalLinkSetPtr LocalLinkSetNew(void)
181 {
182     LocalLinkSetPtr lsp;
183 
184     lsp = MemNew(sizeof(*lsp));
185     lsp->num = 0;
186     lsp->uids = NULL;
187     lsp->weights = NULL;
188 
189     return lsp;
190 }
191 
LocalLinkSetFree(LocalLinkSetPtr lsp)192 static LocalLinkSetPtr LocalLinkSetFree(LocalLinkSetPtr lsp)
193 {
194     MemFree(lsp->uids);
195     MemFree(lsp->weights);
196     MemFree(lsp);
197 
198     return NULL;
199 }
200 
LinkSetToLocalLinkSet(LinkSetPtr newlsp)201 static LocalLinkSetPtr LinkSetToLocalLinkSet(LinkSetPtr newlsp)
202 {
203     LocalLinkSetPtr lsp;
204 
205     if (newlsp != NULL)
206     {
207         lsp = LocalLinkSetNew();
208         lsp->num = (Uint4) newlsp->num;
209         lsp->uids = (DocUidPtr) MemDup(newlsp->uids, sizeof(DocUid) * lsp->num);
210         lsp->weights = (DocUidPtr) MemDup(newlsp->weights, sizeof(Int4) * lsp->num);
211     }
212     return lsp;
213 }
214 
215 static void
DoOutput(CharPtr term,Int2 depth,Boolean showTerminal)216 DoOutput(CharPtr term, Int2 depth, Boolean showTerminal)
217 {
218     Int2 i;
219 
220     for (i = 1; i <= depth; i++)
221         fprintf(master_fp, i < depth ? "-" : ( showTerminal ? "*" : "-"));
222     fprintf (master_fp, "%s\n", term);
223 }
224 
225 static void
PreOrderTaxTraversal(EntrezHierarchyPtr ehp,Int2 depth,DocType db,DocField fld,Int2 maxDepth)226 PreOrderTaxTraversal(EntrezHierarchyPtr ehp, Int2 depth, DocType db, DocField fld, Int2 maxDepth)
227 {
228     Int2 i;
229     EntrezHierarchyPtr child;
230 
231     DoOutput(ehp->term, depth, depth >= maxDepth);
232 
233     if (depth >= maxDepth)
234         return;
235 
236     for (i = 0; i < ehp->numChildren; i++)
237     {
238         if (ehp->children[i].isLeafNode)
239         { /* no need to move down tree, since all information is here */
240             DoOutput(ehp->children[i].name, depth + 1, FALSE);
241         } else {
242             child = EntrezHierarchyGet(ehp->children[i].name, db,
243                                        fld);
244             if (child != NULL)
245             {
246                 PreOrderTaxTraversal(child, depth + 1, db, fld, maxDepth);
247                 EntrezHierarchyFree(child);
248             }
249         }
250     }
251 }
252 
253 
254 /* find the last nucleotide bioseq in the bioseqset */
FindANuc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)255 static void FindANuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
256 {
257   BioseqPtr PNTR bp;
258   BioseqPtr local_bsp;
259 
260   bp = (BioseqPtr PNTR) data;
261   if (IS_Bioseq(sep))
262   {
263     local_bsp = (BioseqPtr) sep->data.ptrvalue;
264     if (ISA_na(local_bsp->mol))
265       *bp = local_bsp;
266   }
267 }
268 
269 /* find the last protein bioseq in the bioseqset */
FindAProt(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)270 static void FindAProt(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
271 {
272   BioseqPtr PNTR bp;
273   BioseqPtr local_bsp;
274 
275   bp = (BioseqPtr PNTR) data;
276   if (IS_Bioseq(sep))
277   {
278     local_bsp = (BioseqPtr) sep->data.ptrvalue;
279     if (ISA_aa(local_bsp->mol))
280       *bp = local_bsp;
281   }
282 }
283 
IsGenBank(SeqEntryPtr sep)284 static Boolean IsGenBank (SeqEntryPtr sep)
285 {
286     BioseqPtr  bsp;
287     Uint1      repr;
288     Boolean    rsult;
289 
290     rsult = FALSE;
291     if (sep->choice == 1) {
292         bsp = (BioseqPtr) sep->data.ptrvalue;
293         repr = Bioseq_repr (bsp);
294         if (repr == Seq_repr_raw || repr == Seq_repr_const) {
295             if (ISA_na (bsp->mol)) {
296                 rsult = TRUE;
297             } else {
298                 Message (MSG_ERROR, "Protein record cannot be viewed in GenBank form.");
299             }
300         } else {
301             Message (MSG_ERROR, "Bad sequence repr %d", (int) repr);
302         }
303     } else {
304         rsult = TRUE;
305     }
306     return rsult;
307 }
308 
309 
310 static void
PrintGenbank(SeqEntryPtr sep,Boolean isprot)311 PrintGenbank(SeqEntryPtr sep, Boolean isprot)
312 {
313     SeqEntryToFlat(sep, master_fp, isprot ? GENPEPT_FMT : GENBANK_FMT,
314                    RELEASE_MODE);
315 
316     FilePuts ("\n\n", master_fp);
317 }
318 
PrintDSP(DocSumPtr dsp,DocUid uid)319 static Boolean PrintDSP(DocSumPtr dsp, DocUid uid)
320 {
321 
322     Int2 titleLen;
323     Int2 size;
324     CharPtr pEnd;
325     CharPtr pStart;
326 
327     if (dsp == NULL)
328         return TRUE;
329 
330     pStart = dsp->title;
331     titleLen = StrLen(pStart);
332 
333     if ( titleLen <= 55 ) {
334       size = titleLen;
335     } else {
336         pEnd = pStart + 55;
337         while (IS_WHITESP(*pEnd) == FALSE)
338             pEnd -=1;
339         size = pEnd - pStart;
340     }
341 
342     fprintf (master_fp, "%-20.20s %-*.*s\n",dsp->caption,size,size,pStart);
343     fprintf (master_fp, "%c",(dsp->no_abstract ? ' ' : '*'));
344     fprintf (master_fp, "                    ");
345 
346     pStart = pStart + size;
347 
348     while ((titleLen = StrLen(pStart)) > 0 ) {
349         if ( titleLen <= 55 ) {
350             size = titleLen;
351         } else {
352             pEnd = pStart + 55;
353             while (IS_WHITESP(*pEnd) == FALSE)
354                 pEnd -=1;
355             size = pEnd - pStart;
356         }
357 
358         fprintf (master_fp, "%-*.*s\n",size,size,pStart+1);
359         fprintf (master_fp, "%-21.21s","");
360         pStart = pStart + size;
361     }
362 
363     fprintf (master_fp, "\n");
364 
365     DocSumFree(dsp);
366     return TRUE;
367 }
368 
PrintDSPMwww(DocSumPtr dsp,DocUid uid)369 static Boolean PrintDSPMwww(DocSumPtr dsp, DocUid uid)
370 {
371     CharPtr p;
372     Boolean noNeighbors = FALSE;
373     LocalLinkSetPtr lsp;
374     Int2 medNeighbors;
375     Int2 protNeighbors;
376     Int2 nucNeighbors;
377 
378     if (dsp == NULL)
379         return TRUE;
380 
381     fprintf (master_fp, "<DL>\n<DT>\n");
382     if (useForms)
383     {
384         fprintf (master_fp, "<inPUT TYPE=\"checkbox\" NAME=\"nei\" VALUE=\"%d\"%s>\n", uid,
385                 checkForms ? " CHECKED" : "");
386     }
387     if (TRUE /* used to be non-FORMS only */ )
388     {
389         LinkSetPtr lsp;
390 
391         lsp = NULL;
392         EntrezLinkUidList(&lsp, TYP_ML, TYP_ML, 1, &uid, FALSE);
393         if (lsp != NULL)
394         {
395             medNeighbors = lsp->num;
396             LinkSetFree(lsp);
397             lsp = NULL;
398         } else {
399             medNeighbors = 0;
400         }
401         EntrezLinkUidList(&lsp, TYP_ML, TYP_AA, 1, &uid, FALSE);
402         if (lsp != NULL)
403         {
404             protNeighbors = lsp->num;
405             LinkSetFree(lsp);
406             lsp = NULL;
407         } else {
408             protNeighbors = 0;
409         }
410         EntrezLinkUidList(&lsp, TYP_ML, TYP_NT, 1, &uid, FALSE);
411         if (lsp != NULL)
412         {
413             nucNeighbors = lsp->num;
414             LinkSetFree(lsp);
415             lsp = NULL;
416         } else {
417             nucNeighbors = 0;
418         }
419         noNeighbors = !medNeighbors && !protNeighbors && !nucNeighbors;
420     }
421 
422     fprintf (master_fp, "%s\n<BR><DD>\n", dsp->caption);
423     for (p = dsp->title; *p; p++)
424     {
425         switch (*p)
426         {
427         case '&': fprintf (master_fp, "&amp;"); break;
428         case '<': fprintf (master_fp, "&lt;"); break;
429         case '>': fprintf (master_fp, "&gt;"); break;
430         default: fprintf (master_fp, "%c", *p);
431         }
432     }
433     fprintf (master_fp, "<I>");
434     if (dsp->no_abstract)
435     {
436         fprintf (master_fp, " (no abstract available)");
437     }
438     fprintf (master_fp, " (View ");
439     fprintf (master_fp, "<A HREF=\"%s/entrezmr?%d\">Report format</A>,\n", wwwPrefix, uid);
440     fprintf (master_fp, "<A HREF=\"%s/entrezml?%d\">MEDLARS format</A>,\n", wwwPrefix, uid);
441     if (noNeighbors)
442     {
443         fprintf (master_fp, "or ");
444     }
445     fprintf (master_fp, "<A HREF=\"%s/entrezma?%d\">ASN.1 format</A>", wwwPrefix, uid);
446     if (! noNeighbors)
447     {
448         if (medNeighbors != 0)
449             fprintf (master_fp, ", %s<A HREF=\"%s/entrezmmnei?%d\">%d MEDLINE neighbor%s</A>\n", !protNeighbors && !nucNeighbors ? "or " : "", wwwPrefix, uid, medNeighbors, medNeighbors == 1 ? "" : "s");
450         if (protNeighbors != 0)
451             fprintf (master_fp, ", %s<A HREF=\"%s/entrezmpnei?%d\">%d Protein link%s</A>\n", !nucNeighbors ? "or " : "", wwwPrefix, uid, protNeighbors, protNeighbors == 1 ? "" : "s");
452         if (nucNeighbors != 0)
453             fprintf (master_fp, ", or <A HREF=\"%s/entrezmnnei?%d\">%d Nucleotide link%s</A>\n", wwwPrefix, uid, nucNeighbors, nucNeighbors == 1 ? "" : "s");
454     }
455     fprintf (master_fp, ")\n<P></I></DL>\n");
456 
457     DocSumFree(dsp);
458     return TRUE;
459 }
460 
PrintDSPNwww(DocSumPtr dsp,DocUid uid)461 static Boolean PrintDSPNwww(DocSumPtr dsp, DocUid uid)
462 {
463     CharPtr p;
464     Boolean noNeighbors = FALSE;
465     LocalLinkSetPtr lsp;
466     Int2 medNeighbors;
467     Int2 protNeighbors;
468     Int2 nucNeighbors;
469     Int4 weight;
470     Int2 i;
471 
472     if (dsp == NULL)
473         return TRUE;
474 
475     fprintf (master_fp, "<DL>\n<DT>\n");
476     if (useForms)
477     {
478         fprintf (master_fp, "<inPUT TYPE=\"checkbox\" NAME=\"nei\" VALUE=\"%d\"%s>\n", uid,
479                 checkForms ? " CHECKED" : "");
480     }
481     if (TRUE /* used to be non-FORMS only */ )
482     {
483         LinkSetPtr lsp;
484 
485         lsp = NULL;
486         EntrezLinkUidList(&lsp, TYP_NT, TYP_ML, 1, &uid, FALSE);
487         if (lsp != NULL)
488         {
489             medNeighbors = lsp->num;
490             LinkSetFree(lsp);
491             lsp = NULL;
492         } else {
493             medNeighbors = 0;
494         }
495         EntrezLinkUidList(&lsp, TYP_NT, TYP_AA, 1, &uid, FALSE);
496         if (lsp != NULL)
497         {
498             protNeighbors = lsp->num;
499             LinkSetFree(lsp);
500             lsp = NULL;
501         } else {
502             protNeighbors = 0;
503         }
504         EntrezLinkUidList(&lsp, TYP_NT, TYP_NT, 1, &uid, FALSE);
505         if (lsp != NULL)
506         {
507             nucNeighbors = lsp->num;
508             LinkSetFree(lsp);
509             lsp = NULL;
510         } else {
511             nucNeighbors = 0;
512         }
513         noNeighbors = !medNeighbors && !protNeighbors && !nucNeighbors;
514     }
515 
516     fprintf (master_fp, "%s\n<BR><DD>\n", dsp->caption);
517     for (p = dsp->title; *p; p++)
518     {
519         switch (*p)
520         {
521         case '&': fprintf (master_fp, "&amp;"); break;
522         case '<': fprintf (master_fp, "&lt;"); break;
523         case '>': fprintf (master_fp, "&gt;"); break;
524         default: fprintf (master_fp, "%c", *p);
525         }
526     }
527     fprintf (master_fp, "<I>");
528     weight = -1;
529     if (pubLsp != NULL && pubLsp->weights != NULL)
530     {
531         for (i = 0; i < pubLsp->num; i++)
532         {
533             if (pubLsp->uids[i] == uid)
534                 weight = pubLsp->weights[i];
535         }
536     }
537     if (weight > 1)
538     {
539         fprintf (master_fp, " (Similarity score %d)", weight);
540     }
541     fprintf (master_fp, " (View ");
542     fprintf (master_fp, "<A HREF=\"%s/entreznr?%d\">Report format</A>,\n", wwwPrefix, uid);
543     fprintf (master_fp, "<A HREF=\"%s/entrezng?%d\">GenBank format</A>,\n", wwwPrefix, uid);
544     fprintf (master_fp, "<A HREF=\"%s/entreznf?%d\">FASTA format</A>,\n", wwwPrefix, uid);
545     if (noNeighbors)
546     {
547         fprintf (master_fp, "or ");
548     }
549     fprintf (master_fp, "<A HREF=\"%s/entrezna?%d\">ASN.1 format</A>", wwwPrefix, uid);
550     if (! noNeighbors)
551     {
552         if (medNeighbors != 0)
553             fprintf (master_fp, ", %s<A HREF=\"%s/entreznmnei?%d\">%d MEDLINE link%s</A>\n", !protNeighbors && !nucNeighbors ? "or " : "", wwwPrefix, uid, medNeighbors, medNeighbors == 1 ? "" : "s");
554         if (protNeighbors != 0)
555             fprintf (master_fp, ", %s<A HREF=\"%s/entreznpnei?%d\">%d Protein link%s</A>\n", !nucNeighbors ? "or " : "", wwwPrefix, uid, protNeighbors, protNeighbors == 1 ? "" : "s");
556         if (nucNeighbors != 0)
557             fprintf (master_fp, ", or <A HREF=\"%s/entreznnnei?%d\">%d Nucleotide neighbor%s</A>\n", wwwPrefix, uid, nucNeighbors, nucNeighbors == 1 ? "" : "s");
558     }
559     fprintf (master_fp, ")<P></I></DL>\n");
560 
561     DocSumFree(dsp);
562     return TRUE;
563 }
564 
PrintDSPPwww(DocSumPtr dsp,DocUid uid)565 static Boolean PrintDSPPwww(DocSumPtr dsp, DocUid uid)
566 {
567     CharPtr p;
568     Boolean noNeighbors = FALSE;
569     LocalLinkSetPtr lsp;
570     Int2 medNeighbors;
571     Int2 protNeighbors;
572     Int2 nucNeighbors;
573     Int4 weight;
574     Int2 i;
575 
576     if (dsp == NULL)
577         return TRUE;
578 
579     fprintf (master_fp, "<DL>\n<DT>\n");
580     if (useForms)
581     {
582         fprintf (master_fp, "<inPUT TYPE=\"checkbox\" NAME=\"nei\" VALUE=\"%d\"%s>\n", uid,
583                 checkForms ? " CHECKED" : "");
584     }
585     if (TRUE /* used to be non-FORMS only */ )
586     {
587         LinkSetPtr lsp;
588 
589         lsp = NULL;
590         EntrezLinkUidList(&lsp, TYP_AA, TYP_ML, 1, &uid, FALSE);
591         if (lsp != NULL)
592         {
593             medNeighbors = lsp->num;
594             LinkSetFree(lsp);
595             lsp = NULL;
596         } else {
597             medNeighbors = 0;
598         }
599         EntrezLinkUidList(&lsp, TYP_AA, TYP_AA, 1, &uid, FALSE);
600         if (lsp != NULL)
601         {
602             protNeighbors = lsp->num;
603             LinkSetFree(lsp);
604             lsp = NULL;
605         } else {
606             protNeighbors = 0;
607         }
608         EntrezLinkUidList(&lsp, TYP_AA, TYP_NT, 1, &uid, FALSE);
609         if (lsp != NULL)
610         {
611             nucNeighbors = lsp->num;
612             LinkSetFree(lsp);
613             lsp = NULL;
614         } else {
615             nucNeighbors = 0;
616         }
617         noNeighbors = !medNeighbors && !protNeighbors && !nucNeighbors;
618     }
619 
620     fprintf (master_fp, "%s\n<BR><DD>\n", dsp->caption);
621     for (p = dsp->title; *p; p++)
622     {
623         switch (*p)
624         {
625         case '&': fprintf (master_fp, "&amp;"); break;
626         case '<': fprintf (master_fp, "&lt;"); break;
627         case '>': fprintf (master_fp, "&gt;"); break;
628         default: fprintf (master_fp, "%c", *p);
629         }
630     }
631     fprintf (master_fp, "<I>");
632     weight = -1;
633     if (pubLsp != NULL && pubLsp->weights != NULL)
634     {
635         for (i = 0; i < pubLsp->num; i++)
636         {
637             if (pubLsp->uids[i] == uid)
638                 weight = pubLsp->weights[i];
639         }
640     }
641     if (weight > 1)
642     {
643         fprintf (master_fp, " (Similarity score %d)", weight);
644     }
645     fprintf (master_fp, " (View ");
646     fprintf (master_fp, "<A HREF=\"%s/entrezpr?%d\">Report format</A>,\n", wwwPrefix, uid);
647     fprintf (master_fp, "<A HREF=\"%s/entrezpf?%d\">FASTA format</A>,\n", wwwPrefix, uid);
648     if (noNeighbors)
649     {
650         fprintf (master_fp, "or ");
651     }
652     fprintf (master_fp, "<A HREF=\"%s/entrezpa?%d\">ASN.1 format</A>", wwwPrefix, uid);
653     if (! noNeighbors)
654     {
655         if (medNeighbors != 0)
656             fprintf (master_fp, ", %s<A HREF=\"%s/entrezpmnei?%d\">%d MEDLINE link%s</A>\n", !protNeighbors && !nucNeighbors ? "or " : "", wwwPrefix, uid, medNeighbors, medNeighbors == 1 ? "" : "s");
657         if (protNeighbors != 0)
658             fprintf (master_fp, ", %s<A HREF=\"%s/entrezppnei?%d\">%d Protein neighbor%s</A>\n", !nucNeighbors ? "or " : "", wwwPrefix, uid, protNeighbors, protNeighbors == 1 ? "" : "s");
659         if (nucNeighbors != 0)
660             fprintf (master_fp, ", or <A HREF=\"%s/entrezpnnei?%d\">%d Nucleotide link%s</A>\n", wwwPrefix, uid, nucNeighbors, nucNeighbors == 1 ? "" : "s");
661     }
662     fprintf (master_fp, ")<P></I></DL>\n");
663 
664     DocSumFree(dsp);
665     return TRUE;
666 }
667 
668 static void
ReportBadType(DocType db,CharPtr outputSpec)669 ReportBadType (DocType db, CharPtr outputSpec)
670 {
671     Message(MSG_POST, "Invalid output format \"%s\" for database \"%s\"",
672             outputSpec, db == TYP_ML ? "MEDLINE" : (db == TYP_NT ?
673             "Nucleotide" : (db == TYP_AA ? "Protein" : (db == TYP_CH ?
674             "Genome" : "unknown"))));
675 }
676 
677 static Boolean
ProcessOutput(LocalLinkSetPtr lsp,DocType db,CharPtr outputSpec,long processingCount,long totalCount,Boolean parseOnly)678 ProcessOutput(LocalLinkSetPtr lsp, DocType db, CharPtr outputSpec, long processingCount, long totalCount, Boolean parseOnly)
679 {
680     long i;
681     AsnIoPtr aip;
682     MedlineEntryPtr mep;
683     SeqEntryPtr sep;
684     SeqIdPtr sip;
685     Char seqIdBuf[256];
686 
687     if (StringCmp(outputSpec, "") == 0 || StringCmp(outputSpec, "no") == 0)
688         return TRUE;
689     if (StringCmp(outputSpec, "mc") == 0)
690     {
691         if (db != TYP_ML)
692         {
693             ReportBadType(db, outputSpec);
694             return FALSE;
695         }
696         if (! parseOnly)
697         {
698            fprintf (master_fp, "%ld\n", totalCount);
699         }
700         return TRUE;
701     }
702     if (StringCmp(outputSpec, "mu") == 0)
703     {
704         if (db != TYP_ML)
705         {
706             ReportBadType(db, outputSpec);
707             return FALSE;
708         }
709         if (! parseOnly && lsp != NULL)
710         {
711             if (exportFilePtr != NULL)
712             {
713 	        CharPtr str = "garbage";
714 
715 		switch(db) {
716 		case TYP_ML: str = "MEDLINE"; break;
717 		case TYP_AA: str = "protein"; break;
718 		case TYP_NT: str = "nucleotide"; break;
719 		case TYP_CH: str = "genome"; break;
720 		}
721                 fprintf(exportFilePtr, ">%s\n", str);
722 
723                 for (i = 0; i < processingCount; i++)
724                 {
725                     fprintf(exportFilePtr, "%d\n", lsp->uids[i]);
726                 }
727                 FileClose(exportFilePtr);
728                 exportFilePtr = NULL;
729             } else {
730                 fprintf (master_fp, "\n");
731                 for (i = 0; i < processingCount; i++)
732                 {
733                     fprintf (master_fp, "%d\n", lsp->uids[i]);
734                 }
735                 fprintf (master_fp, "\n");
736                 fflush(master_fp);
737             }
738         }
739         return TRUE;
740     }
741     if (StringCmp(outputSpec, "mz") == 0)
742     { /* analyze */
743         if (db != TYP_ML)
744         {
745             ReportBadType(db, outputSpec);
746             return FALSE;
747         }
748         if (! parseOnly && lsp != NULL)
749         {
750             CharPtr terms[20];
751             Int4 termTotals[20];
752             Int4 count;
753 
754             count = EntrezClusterAnalysis(lsp->uids, lsp->num, FLD_WORD, 0, INT2_MAX, 20, terms, termTotals);
755             fprintf (master_fp, "Analysis resulted in %d terms\n\n", (int) count);
756             for (i = 0; i < count; i++)
757             {
758                 fprintf (master_fp, "%s %ld\n", terms[i], (long) termTotals[i]);
759                 MemFree (terms[i]);
760             }
761         }
762         return TRUE;
763     }
764     if (StringCmp(outputSpec, "md") == 0)
765     {
766         if (db != TYP_ML)
767         {
768             ReportBadType(db, outputSpec);
769             return FALSE;
770         }
771         if (! parseOnly && lsp != NULL)
772         {
773             if (wwwPrefix != NULL && processingCount < totalCount)
774             {
775                 fprintf (master_fp, "Warning: only %ld document summaries are being displayed\n", processingCount);
776                 fprintf (master_fp, "out of %ld total entries.<P>\n", totalCount);
777             }
778             EntrezDocSumListGet((Int2) processingCount, db, lsp->uids,
779                                 wwwPrefix == NULL ? PrintDSP : PrintDSPMwww);
780         }
781         return TRUE;
782     }
783     if (StringCmp(outputSpec, "mr") == 0)
784     {
785         if (db != TYP_ML)
786         {
787             ReportBadType(db, outputSpec);
788             return FALSE;
789         }
790         if (! parseOnly && lsp != NULL)
791         {
792             for (i = 0; i < processingCount; i++)
793             {
794                 mep = EntrezMedlineEntryGet(lsp->uids[i]);
795                 if (mep != NULL)
796                 {
797                     MedlineEntryToDocFile(mep, master_fp);
798                     MedlineEntryFree(mep);
799                     fprintf (master_fp, "\n\n");
800                 }
801             }
802             fflush(master_fp);
803         }
804         return TRUE;
805     }
806     if (StringCmp(outputSpec, "ma") == 0)
807     {
808         if (db != TYP_ML)
809         {
810             ReportBadType(db, outputSpec);
811             return FALSE;
812         }
813         if (! parseOnly && lsp != NULL)
814         {
815             aip = AsnIoNew(ASNIO_TEXT_OUT, master_fp, NULL, NULL, NULL);
816             for (i = 0; i < processingCount; i++)
817             {
818                 mep = EntrezMedlineEntryGet(lsp->uids[i]);
819                 if (mep != NULL)
820                 {
821                     MedlineEntryAsnWrite(mep, aip, NULL);
822                     AsnIoReset(aip);
823                     MedlineEntryFree(mep);
824                 }
825             }
826             AsnIoClose(aip);
827         }
828         return TRUE;
829     }
830     if (StringCmp(outputSpec, "ml") == 0)
831     {
832         if (db != TYP_ML)
833         {
834             ReportBadType(db, outputSpec);
835             return FALSE;
836         }
837         if (! parseOnly && lsp != NULL)
838         {
839             for (i = 0; i < processingCount; i++)
840             {
841                 mep = EntrezMedlineEntryGet(lsp->uids[i]);
842                 if (mep != NULL)
843                 {
844                     MedlineEntryToDataFile(mep, master_fp);
845                     fprintf (master_fp, "\n");
846                     MedlineEntryFree(mep);
847                 }
848             }
849         }
850         return TRUE;
851     }
852 
853 
854     if (StringCmp(outputSpec, "sc") == 0)
855     {
856         if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
857         {
858             ReportBadType(db, outputSpec);
859             return FALSE;
860         }
861         if (! parseOnly)
862         {
863             fprintf (master_fp, "%ld\n", totalCount);
864         }
865         return TRUE;
866     }
867     if (StringCmp(outputSpec, "su") == 0)
868     {
869         if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
870         {
871             ReportBadType(db, outputSpec);
872             return FALSE;
873         }
874         if (! parseOnly && lsp != NULL)
875         {
876             if (exportFilePtr != NULL)
877             {
878 	        CharPtr str = "garbage";
879 
880 		switch(db) {
881 		case TYP_ML: str = "MEDLINE"; break;
882 		case TYP_AA: str = "protein"; break;
883 		case TYP_NT: str = "nucleotide"; break;
884 		case TYP_CH: str = "genome"; break;
885 		}
886                 fprintf(exportFilePtr, ">%s\n", str);
887 
888                 for (i = 0; i < processingCount; i++)
889                 {
890                     fprintf(exportFilePtr, "%d\n", lsp->uids[i]);
891                 }
892                 FileClose(exportFilePtr);
893                 exportFilePtr = NULL;
894             } else {
895                 fprintf (master_fp, "\n");
896                 for (i = 0; i < processingCount; i++)
897                 {
898                     fprintf (master_fp, "%d\n", lsp->uids[i]);
899                 }
900                 fprintf (master_fp, "\n");
901                 fflush(master_fp);
902             }
903         }
904         return TRUE;
905     }
906     if (StringCmp(outputSpec, "sd") == 0)
907     {
908         if (db != TYP_NT && db != TYP_AA)
909         {
910             ReportBadType(db, outputSpec);
911             return FALSE;
912         }
913         if (! parseOnly && lsp != NULL)
914         {
915             if (wwwPrefix != NULL && processingCount < totalCount)
916             {
917                 fprintf (master_fp, "Warning: only %ld document summaries are being displayed\n", processingCount);
918                 fprintf (master_fp, "out of %ld total entries.<P>\n", totalCount);
919             }
920             pubLsp = lsp;
921             EntrezDocSumListGet((Int2) processingCount, db, lsp->uids,
922                                 wwwPrefix == NULL ? PrintDSP : (db == TYP_NT ?
923                                 PrintDSPNwww : PrintDSPPwww));
924             pubLsp = NULL;
925         }
926         return TRUE;
927     }
928     if (StringCmp(outputSpec, "sa") == 0)
929     {
930         if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
931         {
932             ReportBadType(db, outputSpec);
933             return FALSE;
934         }
935         if (! parseOnly && lsp != NULL)
936         {
937             aip = AsnIoNew(ASNIO_TEXT_OUT, master_fp, NULL, NULL, NULL);
938             for (i = 0; i < processingCount; i++)
939             {
940                 sep = EntrezSeqEntryGet(lsp->uids[i], db == TYP_CH ? -1 : seqEntryRetval);
941                 if (sep != NULL)
942                 {
943                     SeqEntryAsnWrite(sep, aip, NULL);
944                     AsnIoReset(aip);
945                     SeqEntryFree(sep);
946                 }
947             }
948             AsnIoClose(aip);
949         }
950         return TRUE;
951     }
952     if (StringCmp(outputSpec, "sg") == 0 || StringCmp(outputSpec, "sr") == 0)
953     {
954         if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
955         {
956             ReportBadType(db, outputSpec);
957             return FALSE;
958         }
959         if (! parseOnly && lsp != NULL)
960         {
961             for (i = 0; i < processingCount; i++)
962             {
963                 sep = EntrezSeqEntryGet(lsp->uids[i], db == TYP_CH ? -1 : seqEntryRetval);
964                 if (sep != NULL)
965                 {
966                     PrintGenbank(sep, db == TYP_AA);
967                     SeqEntryFree(sep);
968                 }
969             }
970         }
971         return TRUE;
972     }
973     if (StringCmp(outputSpec, "sf") == 0)
974     {
975         if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
976         {
977             ReportBadType(db, outputSpec);
978             return FALSE;
979         }
980         if (! parseOnly && lsp != NULL)
981         {
982             for (i = 0; i < processingCount; i++)
983             {
984                 sep = EntrezSeqEntryGet(lsp->uids[i], db == TYP_CH ? -1 : seqEntryRetval);
985                 if (sep != NULL)
986                 {
987                     SeqEntryConvert (sep, Seq_code_iupacna);
988                     SeqEntryToFasta (sep, master_fp, db == TYP_NT);
989                     SeqEntryFree(sep);
990                 }
991                 fprintf (master_fp, "\n");
992             }
993 
994         }
995         return TRUE;
996     }
997     if (StringCmp(outputSpec, "si") == 0)
998     {
999         if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
1000         {
1001             ReportBadType(db, outputSpec);
1002             return FALSE;
1003         }
1004         if (! parseOnly && lsp != NULL)
1005         {
1006             for (i = 0; i < processingCount; i++)
1007             {
1008                 sip = EntrezSeqIdForGI(lsp->uids[i]);
1009                 if (sip != NULL)
1010                 {
1011                     SeqIdWrite(sip, seqIdBuf, PRINTID_FASTA_LONG, sizeof seqIdBuf);
1012 
1013                     fprintf (master_fp, "%s\n", seqIdBuf);
1014                 }
1015             }
1016         }
1017         return TRUE;
1018     }
1019 
1020 
1021     Message(MSG_POST, "Unknown output format \"%s\"", outputSpec);
1022     return FALSE;
1023 }
1024 
1025 static Int2
RunProgram(CharPtr programStr,LocalLinkSetPtr lsp,DocType db,Boolean parseOnly)1026 RunProgram(CharPtr programStr, LocalLinkSetPtr lsp, DocType db, Boolean parseOnly)
1027 {
1028     Int1 wrongDelim = '.';
1029     DocType newdb;
1030     Int2 len;
1031     CharPtr c;
1032     Char outputSpec[3];
1033     long processingCount;
1034     Char numStr[12];
1035     Int2 count;
1036     Int2 numToCopy;
1037     LinkSetPtr newlsp;
1038 
1039     if (programStr == NULL)
1040     {
1041         return -1;
1042     }
1043 
1044     if (lsp == NULL && !parseOnly)
1045     {
1046         return -1;
1047     }
1048 
1049     len = StrLen(programStr);
1050     outputSpec[2] = '\0';
1051     c = programStr;
1052 
1053     for (c = programStr; c < programStr + len; c += count + 1)
1054     {
1055         count = StrCSpn(c, ",.");
1056         if (c[count] == wrongDelim)
1057         {
1058             Message(MSG_POST, "Invalid delimiter");
1059             /* offset to offending delimeter */
1060             return (count + 1 + c - programStr);
1061         }
1062 
1063         processingCount = INT4_MAX;
1064 
1065         if (wrongDelim == '.')
1066         { /* process output */
1067             switch (count) {
1068             case 0:
1069                 outputSpec[0] = '\0';
1070                 break;
1071             case 1:
1072                 Message(MSG_POST, "Invalid output specification \"%c\"", c[1]);
1073                 return ( 2 + c - programStr);
1074             case 2:
1075                 outputSpec[0] = c[0];
1076                 outputSpec[1] = c[1];
1077                 break;
1078             default:
1079                 outputSpec[0] = c[0];
1080                 outputSpec[1] = c[1];
1081                 numToCopy = MIN(count - 2, sizeof(numStr) - 1);
1082                 StrNCpy(numStr, c + 2, numToCopy);
1083                 numStr[numToCopy] = '\0';
1084                 if ((int) StrSpn(numStr, "0123456789") != (int) numToCopy)
1085                 {
1086                   Message(MSG_POST, "Non-numeric character detected");
1087                   return ( count + c - programStr);
1088                 }
1089                 sscanf(numStr, "%ld", &processingCount);
1090                 break;
1091             }
1092             if (lsp != NULL && !parseOnly)
1093             {
1094                 processingCount = MIN(processingCount, lsp->num);
1095             }
1096             if (! ProcessOutput(lsp, db, outputSpec, processingCount,
1097                                 lsp != NULL ? lsp->num : processingCount,
1098                                 parseOnly))
1099             {
1100                 /* note that error will be posted by ProcessOutput() */
1101                 return ( 3 + c - programStr);
1102             }
1103         } else { /* process neighboring */
1104             if (count == 0)
1105             {
1106                 Message(MSG_POST, "Null neighboring specification");
1107                 return ( 1 + c - programStr);
1108             }
1109             if (count > 1)
1110             {
1111                 numToCopy = MIN(count - 1, sizeof(numStr) - 1);
1112                 StrNCpy(numStr, c + 1, numToCopy);
1113                 numStr[numToCopy] = '\0';
1114                 if ((int) StrSpn(numStr, "0123456789") != (int) numToCopy)
1115                 {
1116                   Message(MSG_POST, "Non-numeric character detected");
1117                   return ( count + c - programStr);
1118                 }
1119                 sscanf(numStr, "%ld", &processingCount);
1120             }
1121             switch (*c) {
1122             case 'p':
1123                 newdb = TYP_AA;
1124                 break;
1125             case 'm':
1126                 newdb = TYP_ML;
1127                 break;
1128             case 'n':
1129                 newdb = TYP_NT;
1130                 break;
1131             case 'g':
1132                 newdb = TYP_CH;
1133                 break;
1134             default:
1135                 Message(MSG_POST, "Invalid neighboring specification <%s>", *c);
1136                 return ( 1 + c - programStr);
1137             }
1138 
1139             if (lsp != NULL && !parseOnly)
1140             {
1141                 processingCount = MIN(processingCount, lsp->num);
1142                 newlsp = NULL;
1143                 EntrezLinkUidList(&newlsp, db, newdb, (Int2) processingCount, lsp->uids, FALSE);
1144                 LocalLinkSetFree(lsp);
1145                 lsp = LinkSetToLocalLinkSet(newlsp);
1146                 LinkSetFree(newlsp);
1147             }
1148             db = newdb;
1149         }
1150 
1151         wrongDelim = wrongDelim == '.' ? ',' : '.';
1152     }
1153 
1154     if (lsp != NULL && !parseOnly)
1155         LocalLinkSetFree(lsp);
1156 
1157     return 0;
1158 }
1159 
1160 static void
DumpTerm(CharPtr term,Int4 special,Int4 total)1161 DumpTerm (CharPtr term, Int4 special, Int4 total)
1162 {
1163     switch (termDisplay) {
1164     case DISPLAY_SPECIAL_AND_TOTAL:
1165         fprintf (master_fp, "%s\t%ld\t%ld\n", term, (long) special, (long) total);
1166         break;
1167     case DISPLAY_TOTAL_ONLY:
1168         fprintf (master_fp, "%s\t%ld\n", term, (long) total);
1169         break;
1170     case DISPLAY_TERM_ONLY:
1171         fprintf (master_fp, "%s\n", term);
1172         break;
1173     }
1174 }
1175 
1176 static Boolean
beginTermProc(CharPtr term,Int4 special,Int4 total)1177 beginTermProc(CharPtr term, Int4 special, Int4 total)
1178 {
1179     if (term != NULL)
1180     {
1181         DumpTerm(term, special, total);
1182         MemFree (term);
1183         return TRUE;
1184     } else {
1185         return FALSE;
1186     }
1187 }
1188 
1189 static Boolean
findOneTermProc(CharPtr term,Int4 special,Int4 total)1190 findOneTermProc(CharPtr term, Int4 special, Int4 total)
1191 {
1192     if (term != NULL)
1193     {
1194         MemFree (term);
1195         return TRUE;
1196     } else {
1197         return FALSE;
1198     }
1199 }
1200 
1201 static Boolean
collectNumTermsProc(CharPtr term,Int4 special,Int4 total)1202 collectNumTermsProc(CharPtr term, Int4 special, Int4 total)
1203 {
1204     static Boolean inited = FALSE;
1205     static TermAndCounts PNTR arrayOfTerm = NULL;
1206     static Int2 head;
1207     static Boolean sawOurTerm;
1208     Int4 i;
1209 
1210     if (special == -1)
1211     { /* flag indicating reset */
1212         for (i = 0; i < termsBefore; i++)
1213         {
1214             MemFree(arrayOfTerm[i].term);
1215         }
1216         MemFree(arrayOfTerm);
1217         arrayOfTerm = NULL;
1218         inited = FALSE;
1219         return TRUE;
1220     }
1221 
1222     if (term == NULL)
1223     {
1224         return FALSE;
1225     }
1226 
1227     if (! inited)
1228     {
1229         inited = TRUE;
1230         arrayOfTerm = MemNew(termsBefore * sizeof(TermAndCounts));
1231         for (i = 0; i < termsBefore; i++)
1232         {
1233             arrayOfTerm[i].term = NULL;
1234         }
1235         head = 0;
1236         sawOurTerm = FALSE;
1237     }
1238 
1239     if (sawOurTerm)
1240     {
1241         DumpTerm (term, special, total);
1242         MemFree (term);
1243         if (--head <= 0)
1244             return FALSE; /* no more terms, please */
1245         else
1246             return TRUE;
1247     } else {
1248         if (StringICmp(term, theTerm) >= 0)
1249         {
1250             sawOurTerm = TRUE;
1251             for (i = 0; i < termsBefore; i++)
1252             {
1253                 if (arrayOfTerm[i].term == NULL)
1254                 {
1255                     head = 0; /* didn't wrap around */
1256                     break;
1257                 }
1258             }
1259 
1260             /* print out the queue */
1261             i = head;
1262             do {
1263                 if (arrayOfTerm[i].term == NULL)
1264                     break;
1265                 DumpTerm(arrayOfTerm[i].term, arrayOfTerm[i].special,
1266                          arrayOfTerm[i].total);
1267                 MemFree (arrayOfTerm[i].term);
1268                 arrayOfTerm[i].term = NULL;
1269                 i = (i + 1) % termsBefore;
1270             } while (i != head);
1271             /* number of remaining records to be displayed after this one */
1272             head = numTerms - (termsBefore + 1);
1273             DumpTerm (term, special, total);
1274             MemFree (term);
1275             return TRUE;
1276         }
1277     }
1278 
1279     if (arrayOfTerm[head].term != NULL)
1280     {
1281         MemFree(arrayOfTerm[head].term);
1282     }
1283     arrayOfTerm[head].term = term;
1284     arrayOfTerm[head].special = special;
1285     arrayOfTerm[head].total = total;
1286     head = (head + 1) % termsBefore;
1287     return TRUE;
1288 }
1289 
1290 
1291 static Boolean
TermProcessing(CharPtr programStr,CharPtr termString,DocType db,Boolean parseOnly)1292 TermProcessing(CharPtr programStr, CharPtr termString, DocType db, Boolean parseOnly)
1293 {
1294     Boolean centerOnTerm = FALSE;
1295     Boolean beginWithTerm = FALSE;
1296     Boolean endWithTerm = FALSE;
1297     Boolean inclusive;
1298     Char    fldStr[ENTREZ_FLD_MNEMONIC_LENGTH+1];
1299     DocField fld;
1300     Int2    firstPage;
1301     CharPtr countIndex;
1302     Int2    ratio;
1303     CharPtr localTermString;
1304 
1305     theTerm = termString;
1306 
1307     if (programStr == NULL || termString == NULL || (int) StrLen(programStr) <
1308         (3 + ENTREZ_FLD_MNEMONIC_LENGTH))
1309         return FALSE;
1310 
1311     switch (programStr[0])
1312     {
1313     case 's': /* output with special+total, tab-delimeted */
1314         termDisplay = DISPLAY_SPECIAL_AND_TOTAL; break;
1315     case 't':
1316         termDisplay = DISPLAY_TOTAL_ONLY; break;
1317     case 'o':
1318         termDisplay = DISPLAY_TERM_ONLY; break;
1319     default:
1320         return FALSE;
1321     }
1322     switch (programStr[1])
1323     {
1324     case '3':
1325     case '4':
1326     case '5':
1327     case '6':
1328     case '7':
1329     case '8':
1330     case '9':
1331         ratio = programStr[1] - '0';
1332         centerOnTerm = TRUE; break;
1333     case 'c': /* center on the term */
1334         ratio = 4;
1335         centerOnTerm = TRUE; break;
1336     case 'b':
1337         beginWithTerm = TRUE; break;
1338     case 'e':
1339         endWithTerm = TRUE; break;
1340     default:
1341         return FALSE;
1342     }
1343 
1344     switch (programStr[2])
1345     { /* ignored for centerOnTerm cases, above */
1346     case 'n': /* non-inclusive */
1347         inclusive = FALSE; break;
1348     case 'i': /* inclusive */
1349         inclusive = TRUE; break;
1350     default:
1351         return FALSE;
1352     }
1353 
1354     StrNCpy (fldStr, &programStr[3], ENTREZ_FLD_MNEMONIC_LENGTH);
1355     fldStr[ENTREZ_FLD_MNEMONIC_LENGTH] = '\0';
1356     if ((fld = EntrezStringToField(db, fldStr)) < 0)
1357         return FALSE;
1358     countIndex = &programStr[3+ENTREZ_FLD_MNEMONIC_LENGTH];
1359     numTerms = DEFAULT_TERMLIST_LEN;
1360     if (*countIndex != '\0' &&
1361         StrSpn(countIndex, "0123456789") == StrLen(countIndex))
1362     {
1363         numTerms = atoi(countIndex);
1364     }
1365 
1366     if (parseOnly)
1367         return TRUE;
1368 
1369     if (termString[0] == '"' && termString[StrLen(termString)-1] == '"')
1370     {
1371         localTermString = MemNew(StrLen(termString));
1372         StrCpy (localTermString, &termString[1]);
1373         localTermString[StrLen(localTermString)-1] = '\0';
1374         theTerm = localTermString;
1375     } else {
1376         localTermString = termString;
1377     }
1378 
1379 
1380     if (centerOnTerm)
1381     {
1382         EntrezTermListByTerm(db, fld, localTermString, 1, findOneTermProc, &firstPage);
1383         if (firstPage > 0)
1384         {
1385             firstPage--;
1386         }
1387         termsBefore = (Int2) (numTerms / ((float) ratio / 2));
1388         EntrezTermListByPage(db, fld, firstPage, 4, collectNumTermsProc);
1389         collectNumTermsProc(NULL, -1, -1); /* reset */
1390     } else {
1391         EntrezTermListByTerm(db, fld, localTermString, numTerms, beginTermProc, &firstPage);
1392     }
1393 
1394     if (localTermString != termString)
1395     {
1396         MemFree (localTermString);
1397     }
1398 
1399     return TRUE;
1400 }
1401 
1402 static Boolean
TaxProcessing(CharPtr taxString,DocType db,CharPtr progString)1403 TaxProcessing(CharPtr taxString, DocType db, CharPtr progString)
1404 {
1405     EntrezHierarchyPtr ehp;
1406     EntrezHierarchyPtr ehp2;
1407     EntrezHierarchyChildPtr ecp;
1408     Int2 i;
1409     DocField fld;
1410     int maxDepth;
1411 
1412     if (db != TYP_AA && db != TYP_NT && db != TYP_ML && db != TYP_CH)
1413     {
1414         fprintf (master_fp, "Invalid database type %d\n", db);
1415         return FALSE;
1416     }
1417     fld = db == TYP_ML ? FLD_MESH_HIER : FLD_ORGN_HIER;
1418     ehp = EntrezHierarchyGet(taxString, db, fld);
1419     if (ehp == NULL)
1420     {
1421         fprintf (master_fp, "Term %s not found\n", taxString);
1422         return FALSE;
1423     }
1424 
1425     if (StrNCmp(progString, "dump", 4) == 0)
1426     {
1427         sscanf(&progString[4], "%d", &maxDepth);
1428         if (maxDepth <= 0)
1429             maxDepth = INT2_MAX;
1430         PreOrderTaxTraversal(ehp, 0, db, fld, (Int2) maxDepth);
1431         EntrezHierarchyFree(ehp);
1432     } else {
1433         fprintf (master_fp, "term %s\nLineage:\n", ehp->term);
1434         for (i = 0; i < ehp->numInLineage; i++)
1435             fprintf (master_fp, " %s\n", ehp->lineage[i]);
1436         fprintf (master_fp, " %s\n", taxString);
1437         if (ehp->numInLineage > 0)
1438         {
1439             ehp2 = EntrezHierarchyGet(ehp->lineage[ehp->numInLineage - 1], db,
1440                                       fld);
1441             if (ehp2 != NULL && ehp2->numChildren > 1)
1442             {
1443                 fprintf (master_fp, "Siblings:\n");
1444                 for (i = 0; i < ehp2->numChildren; i++)
1445                 {
1446                     ecp = &ehp2->children[i];
1447                     if (StrICmp(ecp->name, taxString) != 0)
1448                         fprintf (master_fp, " %s\n", ecp->name);
1449                 }
1450                 EntrezHierarchyFree(ehp2);
1451             }
1452         }
1453 
1454         if (ehp->numChildren > 0)
1455         {
1456             fprintf (master_fp, "Children:\n");
1457             for (i = 0; i < ehp->numChildren; i++)
1458             {
1459                 ecp = &ehp->children[i];
1460                 fprintf (master_fp, " %s\n", ecp->name);
1461             }
1462         }
1463         EntrezHierarchyFree(ehp);
1464     }
1465 
1466     return TRUE;
1467 }
1468 
1469 static ValNodePtr
ParseImportedFiles(CharPtr str)1470 ParseImportedFiles(CharPtr str)
1471 {
1472     CharPtr localStr;
1473     CharPtr token;
1474     FILE *fp;
1475     Char s[100];
1476     DocType db;
1477     ValNodePtr head = NULL;
1478     ValNodePtr node;
1479     SavListPtr slp;
1480     Int2 linesread;
1481     Int4Ptr uids;
1482     CharPtr p;
1483 
1484     localStr = StringSave(str);
1485     token = StrTok(localStr, ", ");
1486 
1487     while (token != NULL)
1488     {
1489         if ((fp = FileOpen(token, "r")) == NULL)
1490         {
1491             Message(MSG_POST, "Error opening file %s", token);
1492             MemFree(localStr);
1493             return NULL;
1494         }
1495         linesread = 0;
1496         while (FileGets(s, (sizeof s) - 1, fp) != NULL)
1497         {
1498 	    Boolean nonNumeric = StrSpn(s, "0123456789 \n\r") != StrLen(s);
1499 
1500             linesread++;
1501             if (linesread == 1)
1502             {
1503                 if(nonNumeric)
1504 		  {
1505 		    db = -1;
1506 
1507 		    if(StrICmp(s,">MEDLINE") == 0)
1508 		      db = TYP_ML;
1509 		    else if (StrICmp(s,">protein") == 0)
1510 		      db = TYP_AA;
1511 		    else if (StrICmp(s,">nucleotide") == 0)
1512 		      db = TYP_NT;
1513 		    else if (StrICmp(s,">genome") == 0)
1514 		      db = TYP_CH;
1515 		}
1516 		db = atoi(s);
1517                 if (db != TYP_ML && db != TYP_AA && db != TYP_NT && db != TYP_CH)
1518                 {
1519                     Message(MSG_POST, "Invalid database type %d in file %s", db, token);
1520                     FileClose(fp);
1521                     MemFree(localStr);
1522                     return NULL;
1523                 }
1524 		continue;
1525             }
1526             if (nonNumeric)
1527             {
1528                 Message(MSG_POST, "Invalid character at line %d of file %s", linesread, token);
1529                 FileClose(fp);
1530                 MemFree(localStr);
1531                 return NULL;
1532             }
1533         }
1534         fseek(fp, 0, SEEK_SET); /* rewind to beginning */
1535         uids = (Int4Ptr) MemNew(sizeof(Int4) * linesread);
1536         linesread = -1; /* skip over db this time */
1537         while (FileGets(s, (sizeof s) - 1, fp) != NULL)
1538         {
1539             if (linesread >= 0)
1540             {
1541                 uids[linesread] = atoi(s);
1542             }
1543             linesread++;
1544         }
1545         FileClose(fp);
1546         slp = (SavListPtr) MemNew(sizeof(SavList));
1547         slp->uids = uids;
1548         slp->db = db;
1549         slp->num = linesread;
1550         if ((p = StringRChr(token, DIRDELIMCHR)) == NULL)
1551         {
1552             slp->name = MemNew(StrLen(token) + 2);
1553             StrCpy(&slp->name[1], token);
1554         } else {
1555             slp->name = StringSave(p);
1556         }
1557         slp->name[0] = '*'; /* to make the name unique, like in Entrez */
1558         if (head == NULL)
1559         {
1560             head = ValNodeNew(NULL);
1561             node = head;
1562         } else {
1563             node = ValNodeNew(head);
1564         }
1565         node->data.ptrvalue = (Pointer) slp;
1566 
1567         token = StrTok(NULL, ", ");
1568     }
1569 
1570     return head;
1571 }
1572 
1573 
1574 static LocalLinkSetPtr
ParseUidList(CharPtr str)1575 ParseUidList(CharPtr str)
1576 {
1577     CharPtr localStr;
1578     CharPtr token;
1579     long uid;
1580     int i;
1581     Int4 count = 0;
1582     Int4Ptr vector;
1583     LocalLinkSetPtr lsp;
1584 
1585     /* loop through twice ... the first time count, the second time, store values */
1586     for (i = 0; i < 2; i++)
1587     {
1588         localStr = StringSave(str);
1589         token = StrTok(localStr, ", ");
1590         count = 0;
1591         while (token != NULL)
1592         {
1593             if (StrSpn(token, "0123456789") != StrLen(token))
1594             {
1595                 Message(MSG_POST, "parsing error at position %d", ((long) token) - ((long) localStr));
1596                 MemFree(localStr);
1597                 return NULL;
1598             }
1599             if (i == 1)
1600             {
1601                 sscanf(token, "%ld", &uid);
1602                 vector[count] = (Int4) uid;
1603             }
1604             count++;
1605             token = StrTok(NULL, ", ");
1606         }
1607         if (i == 0)
1608         {
1609             vector = MemNew(count * sizeof(Int4));
1610         }
1611         MemFree(localStr);
1612     }
1613 
1614     if (count == 0)
1615     {
1616         return NULL;
1617     }
1618     lsp = LocalLinkSetNew();
1619     lsp->num = count;
1620     lsp->uids = vector;
1621     return lsp;
1622 }
1623 
1624 static CharPtr
FormatPositionalErr(Int2 beginErr,Int2 endErr,Int2 startLen)1625 FormatPositionalErr(Int2 beginErr, Int2 endErr, Int2 startLen)
1626 {
1627     int i;
1628     CharPtr str;
1629 
1630     /* prepare text describing where error occurred */
1631     str = MemNew(endErr + startLen + 2);
1632     for (i = 0; i < endErr + startLen - 1; i++)
1633     {
1634         str[i] = ' ';
1635     }
1636     str[i++] =  '^';
1637     str[beginErr + startLen] = '^';
1638     str[i] =  '\0';
1639 
1640     return str;
1641 }
1642 
1643 #define IS_HEX(x)   (IS_DIGIT(x) || ((x) >= 'a' && ((x) <= 'f')) || \
1644                      ((x) >= 'A' && ((x) <= 'F')))
1645 
1646 static CharPtr
WWWStyleDecoding(CharPtr string,Boolean doEncoding)1647 WWWStyleDecoding(CharPtr string, Boolean doEncoding)
1648 { /* decoding in-place, assuming that decoded string is always smaller than
1649      original */
1650     CharPtr p, q, maxchar;
1651     Char str[3];
1652     int newchar;
1653 
1654     if (! doEncoding)
1655         return string;
1656 
1657     maxchar = string + (int) StrLen(string);
1658 
1659     for (p = string; p < maxchar - 2; p++)
1660     {
1661         if (*p == '%' && IS_HEX(p[1]) && IS_HEX(p[2]))
1662         {
1663             str[0] = p[1];
1664             str[1] = p[2];
1665             str[2] = '\0';
1666             sscanf(str, "%x", &newchar);
1667             *p = (Char) newchar;
1668             maxchar -= 2;
1669             for (q = p + 1; q <= maxchar; q++)
1670                 *q = q[2];
1671         }
1672     }
1673 
1674     return string;
1675 }
1676 static int LIBCALLBACK
compUidsDescending(VoidPtr a,VoidPtr b)1677 compUidsDescending(VoidPtr a, VoidPtr b)
1678 {
1679     Int4Ptr x = (Int4Ptr) a;
1680     Int4Ptr y = (Int4Ptr) b;
1681 
1682     return (*y - *x);  /* note descending order */
1683 }
1684 
1685 static void
SortUidsDescending(LocalLinkSetPtr lsp)1686 SortUidsDescending(LocalLinkSetPtr lsp)
1687 {
1688     Boolean sorted;
1689     int k;
1690     Int4 temp;
1691 
1692     if (lsp == NULL)
1693         return;
1694 
1695     /* try to sort uids in descending order */
1696 
1697     for (sorted = TRUE, k = 1; k < lsp->num; k++)
1698     {
1699         if (lsp->uids[k-1] < lsp->uids[k])
1700         {
1701             sorted = FALSE;
1702             break;
1703         }
1704     }
1705 
1706     if (! sorted)
1707     {   /* assume that the existing order is reversed */
1708         for (k = (lsp->num / 2) - 1; k >= 0; k--)
1709         {
1710             temp = lsp->uids[k];
1711             lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
1712             lsp->uids[lsp->num - 1 - k] = temp;
1713         }
1714 
1715         /* now check that it's sorted */
1716         for (sorted = TRUE, k = 1; k < lsp->num; k++)
1717         {
1718             if (lsp->uids[k-1] < lsp->uids[k])
1719             {
1720                 sorted = FALSE;
1721                 break;
1722             }
1723         }
1724 
1725         if (! sorted)
1726         { /* as a last resort, sort them using heapsort */
1727             HeapSort(lsp->uids, lsp->num, sizeof(Int4), compUidsDescending);
1728         }
1729     }
1730 }
1731 
1732 static void
PrintHelp(void)1733 PrintHelp(void)
1734 {
1735     fprintf (master_fp, "Entrcmd is a non-interactive command-line interface which allows a user to\n");
1736     fprintf (master_fp, "perform a series of neighboring and output operations, based upon an initial\n");
1737     fprintf (master_fp, "set of UIDs or a boolean expression which describes a set of UIDs.\n");
1738     fprintf (master_fp, "Alternatively, it can be used to display an alphabetically sorted list of\n");
1739     fprintf (master_fp, "terms near an initial term.\n");
1740     fprintf (master_fp, "\n");
1741     fprintf (master_fp, "Type 'entrcmd' with no arguments for a brief summary of command-line options.\n");
1742     fprintf (master_fp, "\n");
1743     fprintf (master_fp, "    EXPRESSION SYNTAX (-e option)\n");
1744     fprintf (master_fp, "\n");
1745     fprintf (master_fp, "The following grammar is based upon Backus-Naur form.  Braces ({}) are used to\n");
1746     fprintf (master_fp, "specify optional fields, and ellipses (...) represents an arbitrary number\n");
1747     fprintf (master_fp, "of repititions.  In most Backus-Naur forms, the vertical bar (|) and brackets\n");
1748     fprintf (master_fp, "([]) are used as meta-symbols.  However, in the following grammar, the\n");
1749     fprintf (master_fp, "vertical bar and brackets are terminal symbols, and three stacked vertical\n");
1750     fprintf (master_fp, "bars are used to represent alternation.\n");
1751     fprintf (master_fp, "\n");
1752     fprintf (master_fp, "expression ::= diff { - diff ... }\n");
1753     fprintf (master_fp, "diff ::= term { | term ... }\n");
1754     fprintf (master_fp, "term ::= factor { & factor ... }\n");
1755     fprintf (master_fp, "                     |\n");
1756     fprintf (master_fp, "factor ::= qualtoken | ( expression )\n");
1757     fprintf (master_fp, "                     |\n");
1758     fprintf (master_fp, "qualtoken ::= token { [ fld { ,S } ] }\n");
1759     fprintf (master_fp, "\n");
1760     fprintf (master_fp, "\n");
1761     fprintf (master_fp, "token is a string of characters which either contains no special characters,\n");
1762     fprintf (master_fp, "or which is delimited by double-quotes (\").  Double-quote marks and\n");
1763     fprintf (master_fp, "backslashes (\\) which appear with a quoted token must be quoted by an\n");
1764     fprintf (master_fp, "additional backslash.\n");
1765     fprintf (master_fp, "\n");
1766     fprintf (master_fp, "fld is an appropriate string describing a field.  The possible values are\n");
1767     fprintf (master_fp, "described in the following table.  For all databases, an asterisk(*) is a\n");
1768     fprintf (master_fp, "possible value for fld, signifying the union of all possible fields for that\n");
1769     fprintf (master_fp, "database.  '*' is also the default field, if no field qualifier is specified.\n");
1770     fprintf (master_fp, "\n");
1771     fprintf (master_fp, "  | fld| Databases and descriptions\n");
1772     fprintf (master_fp, "  +----+--------------------------------------------------------------------\n");
1773     fprintf (master_fp, "  |WORD| For MEDLINE, \"Abstract or Title\"; for Sequences, \"Text Terms\"\n");
1774     fprintf (master_fp, "  |MESH| MEDLINE only, \"MeSH term\"\n");
1775     fprintf (master_fp, "  |AUTH| For all databases, \"Author Name\"\n");
1776     fprintf (master_fp, "  |JOUR| For all databases, \"Journal Title\"\n");
1777     fprintf (master_fp, "  |GENE| For all databases, \"Gene Name\"\n");
1778     fprintf (master_fp, "  |KYWD| For MEDLINE, \"Substance\", for Sequences \"Keyword\"\n");
1779     fprintf (master_fp, "  |ECNO| For MEDLINE and protein, \"E.C. number\"\n");
1780     fprintf (master_fp, "  |ORGN| For all databases, \"Organism\"\n");
1781     fprintf (master_fp, "  |ACCN| For Sequence databases, \"Accession\"\n");
1782     fprintf (master_fp, "  |PROT| For protein, \"Protein Name\"\n");
1783     fprintf (master_fp, "\n");
1784     fprintf (master_fp, "The presence of \",S\"  after a field specifier implies the same semantics\n");
1785     fprintf (master_fp, "as \"special\" in Entrez.  Entrez \"total\" semantics are the default.\n");
1786     fprintf (master_fp, "\n");
1787     fprintf (master_fp, "\n");
1788     fprintf (master_fp, "    PROGRAM OF COMMANDS (-p option)\n");
1789     fprintf (master_fp, "\n");
1790     fprintf (master_fp, "For the \"-e\" and \"-u\" options, the program of commands consists of a sequence of\n");
1791     fprintf (master_fp, "neighboring operations alternated with optional output commands.  All output\n");
1792     fprintf (master_fp, "commands, except the first, must be preceded by a period (.), and all\n");
1793     fprintf (master_fp, "neighboring commands must be preceded by a comma (,).\n");
1794     fprintf (master_fp, "\n");
1795     fprintf (master_fp, "The output commands are:\n");
1796     fprintf (master_fp, "   no    None (default)             sg    Sequence GenBank/GenPept flat file format\n");
1797     fprintf (master_fp, "   ma    MEDLINE ASN.1 format       sa    Sequence ASN.1 format\n");
1798     fprintf (master_fp, "   md    MEDLINE docsums            sd    Sequence docsums\n");
1799     fprintf (master_fp, "   ml    MEDLARS format             sf    Sequence FASTA format\n");
1800     fprintf (master_fp, "   mr    MEDLINE report format      sr    Sequence report format\n");
1801     fprintf (master_fp, "   mu    MEDLINE UIDs               su    Sequence UIDs\n");
1802     fprintf (master_fp, "                                    si    Sequence IDs\n");
1803     fprintf (master_fp, "Each output command may be followed by an optional count indicating how\n");
1804     fprintf (master_fp, "many articles to display.  The default is to display all the articles.\n");
1805     fprintf (master_fp, "\n");
1806     fprintf (master_fp, "If the \"-x\" command line option appears (\"export to a saved UID list\"), then\n");
1807     fprintf (master_fp, "the first \"mu\" or \"su\" command results in those UIDs being written to that\n");
1808     fprintf (master_fp, "\"saved UID list\" file, rather than being written to the standard output.\n");
1809     fprintf (master_fp, "\n");
1810     fprintf (master_fp, "Neighboring commands indicate the database to neighbor \"to\", and\n");
1811     fprintf (master_fp, "consists of the first letter of each of the possible databases:\n");
1812     fprintf (master_fp, "(medline, protein, nucleotide) followed by an optional count of\n");
1813     fprintf (master_fp, "how many of the current set of articles should be included in the\n");
1814     fprintf (master_fp, "neighboring operation.\n");
1815     fprintf (master_fp, "\n");
1816     fprintf (master_fp, "Example:\n");
1817     fprintf (master_fp, "  Find the articles written by \"Kay LE\", but not by \"Forman-Kay JD\".  Find\n");
1818     fprintf (master_fp, "  their MEDLINE neighbors.  Print document summaries for all of these\n");
1819     fprintf (master_fp, "  neighbors.  Of these neighbors, neighbor the first 5 entries to the protein\n");
1820     fprintf (master_fp, "  database.  Print up to 10 of these sequences in Sequence Report format.\n");
1821     fprintf (master_fp, "\n");
1822     fprintf (master_fp, "    entrcmd -e '\"Kay LE\" [AUTH] - \"Forman-Kay JD\" [AUTH]' -p ,m.md,p5.sr10\n");
1823     fprintf (master_fp, "\n");
1824     fprintf (master_fp, "\n");
1825     fprintf (master_fp, "If the \"-t\" option is used, then the program of commands is different from\n");
1826     fprintf (master_fp, "what is described above.  Rather, it consists of a seven character string,\n");
1827     fprintf (master_fp, "optionally followed by the number of terms which should be displayed.\n");
1828     fprintf (master_fp, "The default number of terms is 40.\n");
1829     fprintf (master_fp, "\n");
1830     fprintf (master_fp, "The string is of the form '123FLDD', where 1, 2, and 3 are as follows,\n");
1831     fprintf (master_fp, "and FLDD is one of the field specifications described above (AUTH, etc.).\n");
1832     fprintf (master_fp, "\n");
1833     fprintf (master_fp, "1 - one of 't', 's', or 'o', where 't' means that the total term counts\n");
1834     fprintf (master_fp, "    should be displayed after the term, 's' means that the special and\n");
1835     fprintf (master_fp, "    total term counts should be displayed after the term, and 'o' means\n");
1836     fprintf (master_fp, "    that only the term itself should be displayed\n");
1837     fprintf (master_fp, "2 - one of 'b', 'c', 'e', or an integer from 3 to 9, where:\n");
1838     fprintf (master_fp, "    'b' - display terms beginning with the specified term\n");
1839     fprintf (master_fp, "    'c' - \"center\" terms; i.e., display half the terms before the specified\n");
1840     fprintf (master_fp, "          term, and half the terms after the specified term\n");
1841     fprintf (master_fp, "    'e' - display terms ending with the specified term\n");
1842     fprintf (master_fp, "    k   - an integer from 3 to 9, indicating that (2/k)ths of the terms\n");
1843     fprintf (master_fp, "          should be alphabetically before the specified term.  Note that\n");
1844     fprintf (master_fp, "          '4' is the same as 'c'.  The value '9' is recommended for\n");
1845     fprintf (master_fp, "          scrolled displays.\n");
1846     fprintf (master_fp, "3 - One of 'i' or 'n', indicating for the 'b' and 'e' options above whether\n");
1847     fprintf (master_fp, "    the specified term is to be included in the output, where 'i' means\n");
1848     fprintf (master_fp, "    inclusive, and 'n' means non-inclusive.  This value is ignored for\n");
1849     fprintf (master_fp, "    other values of the previous character, but must be present as a\n");
1850     fprintf (master_fp, "    place-holder.\n");
1851     fprintf (master_fp, "\n");
1852     fprintf (master_fp, "[ WARNING: SOME OF THESE TERM SPECIFICATIONS OPTIONS (COMBINATIONS OF 1,\n");
1853     fprintf (master_fp, "2, AND 3 ABOVE) ARE CURRENTLY UNIMPLEMENTED ]\n");
1854     fprintf (master_fp, "\n");
1855     fprintf (master_fp, "\n");
1856     fprintf (master_fp, "    WORLD WIDE WEB STYLE OUTPUT (-w option)\n");
1857     fprintf (master_fp, "\n");
1858     fprintf (master_fp, "The entrcmd program can also generate output which is appropriate for\n");
1859     fprintf (master_fp, "display in an HTML document, to be \"served\" by a WWW server.  In particular,\n");
1860     fprintf (master_fp, "some output text contains HTML hypertext links to other data, as well as\n");
1861     fprintf (master_fp, "HTML formatting information.  The parameter to the -w option is the\n");
1862     fprintf (master_fp, "directory prefix for the linked hypertext items; \"/htbin\" is recommended.\n");
1863     fprintf (master_fp, "\n");
1864     fprintf (master_fp, "If the \"-w\" option is selected, then the \"-f\" option may also be selected.\n");
1865     fprintf (master_fp, "This indicates that the HTML output should be of a form which is\n");
1866     fprintf (master_fp, "appropriate for a HTML \"FORM\".  This output can only be processed by\n");
1867     fprintf (master_fp, "advanced WWW clients, but potentially provides a nicer interface, where\n");
1868     fprintf (master_fp, "each document summary has an associated checkbox, resulting in a display\n");
1869     fprintf (master_fp, "which is similar to the Entrez CD-ROM application.  The \"-c\" option, if used\n");
1870     fprintf (master_fp, "in conjunction with \"-f\", indicates that these checkboxes should be\n");
1871     fprintf (master_fp, "\"pre-checked\", i.e., selected.  This potentially provides the equivalent\n");
1872     fprintf (master_fp, "of the Entrez \"select all\" operation for neighboring.\n");
1873 }
1874 
1875 
BSPtoLSP(ByteStorePtr bsp)1876 static LocalLinkSetPtr BSPtoLSP(ByteStorePtr bsp)
1877 {
1878     LocalLinkSetPtr lsp;
1879 
1880     if (bsp == NULL)
1881         return NULL;
1882 
1883     lsp = LocalLinkSetNew();
1884 
1885     lsp->num = BSLen(bsp) / sizeof(DocUid);
1886     if ((lsp->uids = MemNew(BSLen(bsp))) == NULL)
1887     { /* platforms which can't allocate this are out of luck */
1888         lsp = LocalLinkSetFree(lsp);
1889     } else {
1890         BSSeek (bsp, 0L, 0);
1891         BSRead (bsp, lsp->uids, lsp->num * sizeof (DocUid));
1892     }
1893 
1894     return lsp;
1895 }
1896 
Main(void)1897 Int2 Main(void)
1898 {
1899     int Numarg = sizeof(myargs)/sizeof(Args);
1900     DocType db = TYP_ML;
1901     Boolean exprSpecified = FALSE;
1902     Boolean uidsSpecified = FALSE;
1903     Boolean termSpecified = FALSE;
1904     Boolean taxSpecified = FALSE;
1905     Boolean neighborSpecified = FALSE;
1906     CharPtr boolString;
1907     short erract;
1908     ErrDesc err;
1909     Int2 beginErr;
1910     Int2 endErr;
1911     CharPtr str;
1912     LocalLinkSetPtr lsp = NULL;
1913     LinkSetPtr oldstylelsp;
1914     ByteStorePtr bsp;
1915     CharPtr programStr;
1916     CharPtr termString;
1917     CharPtr taxString;
1918     CharPtr neighborString;
1919     CharPtr neighborFile;
1920     FILE *neighborFp;
1921     CharPtr exportFile;
1922     CharPtr importFileList;
1923     Int2 progErr;
1924     ValNodePtr savlist = NULL;
1925     ValNodePtr np;
1926     SavListPtr slp;
1927     Char param[6];
1928     EntrezNeighborTextPtr entp;
1929     Boolean useWWWEncoding;
1930 
1931     if ( ! GetArgs("Entrez command-line $Revision: 6.5 $", Numarg, myargs))
1932         return 1;
1933 
1934     if (myargs[14].strvalue)
1935     {
1936         if ((master_fp = FileOpen(myargs[14].strvalue, "w")) == NULL)
1937         {
1938             Message(MSG_POST, "Unable to open output file <%s>", myargs[14].strvalue);
1939             return 9;
1940         }
1941     }
1942 
1943     if (myargs[6].intvalue)
1944     {
1945         PrintHelp();
1946         FileClose(master_fp);
1947         return 0;
1948     }
1949 
1950     if (myargs[0].strvalue != NULL)
1951     {
1952         switch(myargs[0].strvalue[0]) {
1953         case 'm': db = TYP_ML; break;
1954         case 'n': db = TYP_NT; break;
1955         case 'g': db = TYP_CH; break;
1956         case 'p': db = TYP_AA; break;
1957         default:
1958             Message(MSG_POST /* MSG_FATAL */, "Invalid database type <%s>", myargs[0].strvalue);
1959             FileClose(master_fp);
1960             return 1;
1961         }
1962     }
1963 
1964     useWWWEncoding = myargs[15].intvalue;
1965 
1966     if (myargs[16].intvalue)
1967       seqEntryRetval = -2;
1968     else
1969       seqEntryRetval = myargs[17].intvalue;
1970 
1971     if (myargs[1].strvalue != NULL && myargs[1].strvalue[0] != '\0')
1972     {
1973         exprSpecified = TRUE;
1974         boolString = WWWStyleDecoding(myargs[1].strvalue, useWWWEncoding);
1975     }
1976     if (myargs[2].strvalue != NULL && myargs[2].strvalue[0] != '\0')
1977         uidsSpecified = TRUE;
1978 
1979     if (myargs[11].strvalue != NULL && myargs[11].strvalue[0] != '\0')
1980     {
1981         termSpecified = TRUE;
1982         termString = WWWStyleDecoding(myargs[11].strvalue, useWWWEncoding);
1983     }
1984 
1985     if (myargs[12].strvalue != NULL && myargs[12].strvalue[0] != '\0')
1986     {
1987         taxSpecified = TRUE;
1988         taxString = WWWStyleDecoding(myargs[12].strvalue, useWWWEncoding);
1989     }
1990 
1991     if (myargs[13].strvalue != NULL && myargs[13].strvalue[0] != '\0')
1992     {
1993         neighborSpecified = TRUE;
1994         neighborFile = myargs[13].strvalue;
1995         if ((neighborFp = FileOpen(neighborFile, "r")) == NULL)
1996         {
1997             Message(MSG_POST /* MSG_FATAL */, "Unable to open neighboring-file %s", neighborFile);
1998             FileClose(master_fp);
1999             return 1;
2000         }
2001         FileClose(neighborFp);
2002     }
2003 
2004     if (((exprSpecified != 0) + (uidsSpecified != 0) + (termSpecified != 0) +
2005          (taxSpecified != 0) + (neighborSpecified != 0)) != 1)
2006     {
2007         Message(MSG_POST /* MSG_FATAL */, "Exactly one of the -e, -l, -n, -t and -u options must be specified");
2008         FileClose(master_fp);
2009         return 1;
2010     }
2011 
2012     if (uidsSpecified)
2013     {
2014         lsp = ParseUidList(myargs[2].strvalue);
2015         if (lsp == NULL)
2016         {
2017             Message(MSG_POST /* MSG_FATAL */, "Syntax error on UID list");
2018             FileClose(master_fp);
2019             return 1;
2020         }
2021     }
2022 
2023     if (exprSpecified)
2024     {
2025         ErrGetOpts(&erract, NULL);
2026         ErrSetOpts(ERR_CONTINUE, 0);
2027         ErrFetch(&err);
2028         if (! EntrezTLParseString(boolString, db, -1, &beginErr, &endErr))
2029         {
2030             ErrShow();
2031             Message(MSG_POST, "Syntax error: %s", boolString);
2032             if (endErr < 0)
2033             {
2034                 endErr = 0;
2035             }
2036 
2037             str = FormatPositionalErr(beginErr, endErr, StrLen("Syntax error: "));
2038 
2039             Message(MSG_POST, str);
2040             MemFree(str);
2041             FileClose(master_fp);
2042             return 2;
2043         }
2044         ErrSetOpts(erract, 0);
2045     }
2046 
2047     wwwPrefix = myargs[5].strvalue;
2048 
2049     programStr = myargs[3].strvalue;
2050 
2051     useForms = myargs[7].intvalue;
2052     checkForms = myargs[8].intvalue;
2053     exportFile = myargs[9].strvalue;
2054     importFileList = myargs[10].strvalue;
2055 
2056     if (exportFile != NULL && exportFile[0] != '\0') {
2057         GetAppParam("ENTREZ", "ENTRCMD", "EXPORT_OK", "FALSE", param,
2058                     sizeof param);
2059         if (StrICmp(param, "TRUE") != 0)
2060         {
2061             Message(MSG_POST, "Export option is disabled");
2062             FileClose(master_fp);
2063             return 6;
2064         }
2065     }
2066 
2067     if (termSpecified)
2068     {
2069         if (! TermProcessing(programStr, termString, db, TRUE))
2070         {
2071             Message(MSG_POST, "Invalid term program specification %s", programStr);
2072             FileClose(master_fp);
2073             return 3;
2074         }
2075     } else if (taxSpecified) {
2076         /* no action */
2077     } else {
2078         if ((progErr = RunProgram(programStr, NULL, db, TRUE)) != 0)
2079         {
2080             Message(MSG_POST, "Program error: %s", programStr);
2081             if (progErr > 0)
2082                 str = FormatPositionalErr(progErr - 1, progErr - 1,
2083                                           StrLen("Program error: "));
2084             else
2085                 str = StringSave("Validation error");
2086 
2087             Message(MSG_POST, str);
2088             MemFree(str);
2089             FileClose(master_fp);
2090             return 4;
2091         }
2092         if (exportFile != NULL)
2093         {
2094             exportFilePtr = FileOpen(exportFile, "w");
2095         }
2096         if (importFileList != NULL)
2097         {
2098             if ((savlist = ParseImportedFiles(importFileList)) == NULL)
2099             {
2100                 Message(MSG_POST, "Fatal error processing imported files");
2101             }
2102         }
2103     }
2104 
2105     /* note that we defer EntrezInit() until we're sure that there are no */
2106     /* parsing errors                                                     */
2107     if (! EntrezInit("entrcmd", FALSE, NULL))
2108     {
2109         Message(MSG_POST, "Unable to access Entrez dataset");
2110         FileClose(master_fp);
2111         return 5;
2112     }
2113 
2114     EntrezBioseqFetchEnable("entrcmd", TRUE);
2115 
2116     if (myargs[4].intvalue)
2117     {
2118         str = EntrezDetailedInfo();
2119         fprintf (master_fp, "                               STATUS REPORT\n\n\n%s\n\n", str);
2120         fflush(master_fp);
2121     }
2122 
2123     while (savlist != NULL)
2124     { /* create named UID lists, as needed */
2125         slp = (SavListPtr) savlist->data.ptrvalue;
2126         EntrezCreateNamedUidList(slp->name, slp->db, 0, slp->num, slp->uids);
2127         MemFree(slp->name);
2128         MemFree(slp->uids);
2129         MemFree(slp);
2130         np = savlist->next;
2131         MemFree(savlist);
2132         savlist = np;
2133     }
2134 
2135     if (exprSpecified)
2136     { /* note that we deferred evaluation until after EntrezInit() */
2137         if ((bsp = EntrezTLEvalXString(boolString, db, -1, NULL, NULL)) != NULL)
2138         {
2139             lsp = BSPtoLSP(bsp);
2140             BSFree(bsp);
2141         }
2142         if (db == TYP_ML && lsp != NULL)
2143         {
2144             SortUidsDescending(lsp);
2145         }
2146     }
2147 
2148     if (neighborSpecified)
2149     {
2150         if (db == TYP_ML)
2151         {
2152             size_t neighborLen;
2153             int k;
2154             Int4 temp;
2155 
2156             if (! EntrezCanNeighborText())
2157             {
2158                 Message(MSG_POST, "Unable to perform on-the-fly neighboring\n");
2159                 FileClose(master_fp);
2160                 return 5;
2161             }
2162             /* create text object here . */
2163             neighborLen = FileLength(neighborFile);
2164             if ((neighborString = MemNew(neighborLen+1)) == NULL)
2165             {
2166                 Message(MSG_POST, "Unable to allocate memory for on-the-fly neighboring\n");
2167                 FileClose(master_fp);
2168                 return 5;
2169             }
2170             neighborFp = FileOpen(neighborFile, "r");
2171             FileRead(neighborString, neighborLen, 1, neighborFp);
2172             FileClose (neighborFp);
2173             neighborString[neighborLen] = 0;
2174             entp = EntrezNeighborTextNew();
2175             entp->percent_terms_to_use = 100;
2176             entp->max_neighbors = 0;
2177             entp->min_score = 0;
2178             entp->fld = FLD_WORD;
2179             entp->normalText = neighborString;
2180             entp->specialText = StringSave("");
2181 	    oldstylelsp = EntrezDoNeighborText(entp);
2182             lsp = LinkSetToLocalLinkSet(oldstylelsp);
2183 	    LinkSetFree(oldstylelsp);
2184             EntrezNeighborTextFree(entp);
2185 
2186             /* reverse the order since they are received in backwards order */
2187             if (lsp != NULL)
2188             {
2189                 for (k = (lsp->num / 2) - 1; k >= 0; k--)
2190                 {
2191                     temp = lsp->uids[k];
2192                     lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
2193                     lsp->uids[lsp->num - 1 - k] = temp;
2194                 }
2195             }
2196         } else {
2197             Boolean isprot = db == TYP_AA;
2198             SeqEntryPtr sep;
2199             BioseqPtr bsp;
2200 
2201             if (! EntrezCanBlast())
2202             {
2203                 Message(MSG_POST, "Unable to perform on-the-fly BLAST\n");
2204                 FileClose(master_fp);
2205                 return 5;
2206             }
2207             neighborFp = FileOpen(neighborFile, "r");
2208             sep = FastaToSeqEntry(neighborFp, !isprot);
2209             FileClose (neighborFp);
2210             if (sep == NULL)
2211             {
2212                 Message (MSG_OK, "Error encountered while parsing sequence data");
2213                 return 8;
2214             }
2215             bsp = NULL;
2216             SeqEntryExplore(sep, &bsp, isprot? FindAProt : FindANuc);
2217             if (bsp == NULL)
2218             {
2219                 Message (MSG_OK, "Error encountered while parsing sequence data for Bioseq");
2220                 /* ? SeqEntryFree(sep); */
2221                 return 9;
2222             }
2223             oldstylelsp = EntrezBlastBioseq(bsp, db, NULL, NULL, NULL, FALSE);
2224             lsp = LinkSetToLocalLinkSet(oldstylelsp);
2225 	    LinkSetFree(oldstylelsp);
2226         }
2227     }
2228 
2229     if (termSpecified)
2230     {
2231         TermProcessing(programStr, termString, db, FALSE);
2232     } else if (taxSpecified) {
2233         TaxProcessing(taxString, db, programStr);
2234     } else {
2235         RunProgram(programStr, lsp, db, FALSE);
2236     }
2237     EntrezFini();
2238     EntrezBioseqFetchDisable();
2239 
2240     FileClose(master_fp);
2241     return 0;
2242 }
2243