1 /* entrcmd.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: entrcmd.c
27 *
28 * Author: Epstein
29 *
30 * Version Creation Date: 1/4/94
31 *
32 * $Revision: 6.5 $
33 *
34 * File Description:
35 * non-interactive command line interface for Entrez
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date Name Description of modification
40 * ------- ---------- -----------------------------------------------------
41 *
42 * $Log: entrcmd.c,v $
43 * Revision 6.5 2011/12/19 18:40:17 gouriano
44 * Corrected printf formatting. NOJIRA
45 *
46 * Revision 6.4 1999/08/11 18:58:09 kans
47 * changed FindNuc and FindProt to avoid collision with sequtil functions
48 *
49 * Revision 6.3 1998/08/24 20:43:42 kans
50 * fixed -v -fd warnings
51 *
52 * Revision 6.2 1997/12/10 13:48:44 kans
53 * removed call to SeqEntryToFile
54 *
55 * Revision 6.1 1997/11/04 21:02:44 epstein
56 * change stray stdout to master_fp
57 *
58 * Revision 6.0 1997/08/25 18:19:34 madden
59 * Revision changed to 6.0
60 *
61 * Revision 5.7 1997/07/21 16:12:39 epstein
62 * CONVERT to new format for exported list of identifiers
63 *
64 * Revision 5.6 1997/07/14 18:24:20 epstein
65 * add complexity argument 'y'
66 *
67 * Revision 5.5 1997/03/21 18:41:46 epstein
68 * retrieve correct genome sequences
69 *
70 * Revision 5.4 1997/03/10 19:33:45 epstein
71 * add Genomes support
72 *
73 * Revision 5.3 1996/10/24 15:49:27 epstein
74 * add -r option to fetch entries from ID
75 *
76 * Revision 5.2 1996/06/11 15:16:54 epstein
77 * remove another artificial 32K boundary
78 *
79 * Revision 5.1 1996/05/31 19:27:46 epstein
80 * eradicate 32K UID limitations, as much as possible
81 *
82 * Revision 4.4 1996/03/19 17:08:41 epstein
83 * remove stray printfs
84 *
85 * Revision 4.3 1996/02/21 22:09:16 epstein
86 * add EntrezBioseqFetchEnable/Disable() to fix GBFF outputs
87 *
88 * Revision 4.2 1995/09/18 18:18:52 epstein
89 * add GenPept format
90 *
91 * Revision 4.1 1995/08/21 19:41:14 epstein
92 * add cluster analysis
93 *
94 * Revision 4.0 1995/07/26 13:54:26 ostell
95 * force revision to 4.0
96 *
97 * Revision 1.30 1995/07/20 18:58:15 epstein
98 * use new SeqIdWrite function
99 *
100 * Revision 1.29 1995/06/19 21:42:11 kans
101 * changed asn2ff_entrez to SeqEntryToFlat
102 *
103 * Revision 1.28 1995/05/15 01:29:58 ostell
104 * added newline to end of file
105 *
106 * Revision 1.27 1995/05/15 01:28:44 ostell
107 * Fixed Callbacks prototypes to SeqEntryExplore
108 *
109 *
110 * ==========================================================================
111 */
112
113 #include <ncbi.h>
114 #include <accentr.h>
115 #include <accutils.h>
116 #include <tofasta.h>
117 #include <tomedlin.h>
118 #include <asn2ff.h>
119
120 Args myargs[] = {
121 {"Initial database", "m",NULL, NULL, TRUE,'d',ARG_STRING,0.0,0,NULL},
122 {"Boolean expression", NULL, NULL, NULL, TRUE, 'e', ARG_STRING, 0.0,0,NULL},
123 {"Comma-delimited list of UIDs", NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0,0,NULL},
124 {"Program of commands", NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0,0,NULL},
125 {"Display status report", "F", NULL, NULL, TRUE, 's', ARG_BOOLEAN, 0.0,0,NULL},
126 {"Produce WWW/HTML formatted output (recommended value is /htbin)", NULL, NULL, NULL, TRUE, 'w', ARG_STRING, 0.0,0,NULL},
127 {"Detailed help", "F", NULL, NULL, TRUE, 'h', ARG_BOOLEAN, 0.0,0,NULL},
128 {"For WWW output, use Forms", "F", NULL, NULL, TRUE, 'f', ARG_BOOLEAN, 0.0,0,NULL},
129 {"'Check' WWW output Forms", "F", NULL, NULL, TRUE, 'c', ARG_BOOLEAN, 0.0,0,NULL},
130 {"Name of export file for named UID list", NULL, NULL, NULL, TRUE, 'x', ARG_STRING,0.0,0,NULL},
131 {"Comma-delimited list of files to import for named UID list", NULL, NULL, NULL, TRUE, 'i', ARG_STRING,0.0,0,NULL},
132 {"Produce a list of terms (term)", NULL, NULL, NULL, TRUE, 't', ARG_STRING, 0.0,0,NULL},
133 {"Taxonomy lookup", NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0,0,NULL},
134 {"On-the-fly neighboring", NULL, NULL, NULL, TRUE, 'n', ARG_FILE_IN, 0.0,0,NULL},
135 {"Output file", "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0,0,NULL},
136 {"Use WWW-style encoding for special input characters", "T", NULL, NULL, TRUE, 'g', ARG_BOOLEAN, 0.0,0,NULL},
137 {"Get sequences from ID Repository", "F", NULL, NULL, TRUE, 'r', ARG_BOOLEAN, 0.0,0,NULL},
138 {"Complexity (1=bioseq only, 2=bioseq set, 3=nuc-prot set)", "3", NULL, NULL, TRUE, 'y', ARG_INT, 0.0,0,NULL}
139 };
140
141 #define ENTREZ_FLD_MNEMONIC_LENGTH 4
142 #define DEFAULT_TERMLIST_LEN 40
143
144 #define DISPLAY_SPECIAL_AND_TOTAL 1
145 #define DISPLAY_TOTAL_ONLY 2
146 #define DISPLAY_TERM_ONLY 3
147
148 typedef struct savlist {
149 CharPtr name;
150 Int4Ptr uids;
151 DocType db;
152 Int2 num;
153 } SavList, PNTR SavListPtr;
154
155 typedef struct term_and_counts {
156 CharPtr term;
157 Int4 special;
158 Int4 total;
159 } TermAndCounts, PNTR TermAndCountsPtr;
160
161 typedef struct {
162 Uint4 num;
163 DocUidPtr uids;
164 Int4Ptr weights;
165 } * LocalLinkSetPtr;
166
167
168 static CharPtr wwwPrefix = NULL;
169 static CharPtr theTerm = NULL;
170 static Boolean useForms = FALSE;
171 static Boolean checkForms = FALSE;
172 static Int2 numTerms;
173 static Int2 termsBefore;
174 static FILE * exportFilePtr = NULL;
175 static LocalLinkSetPtr pubLsp = NULL;
176 static Int2 termDisplay = DISPLAY_TERM_ONLY;
177 static FILE * master_fp = NULL;
178 static Int2 seqEntryRetval = 3;
179
LocalLinkSetNew(void)180 static LocalLinkSetPtr LocalLinkSetNew(void)
181 {
182 LocalLinkSetPtr lsp;
183
184 lsp = MemNew(sizeof(*lsp));
185 lsp->num = 0;
186 lsp->uids = NULL;
187 lsp->weights = NULL;
188
189 return lsp;
190 }
191
LocalLinkSetFree(LocalLinkSetPtr lsp)192 static LocalLinkSetPtr LocalLinkSetFree(LocalLinkSetPtr lsp)
193 {
194 MemFree(lsp->uids);
195 MemFree(lsp->weights);
196 MemFree(lsp);
197
198 return NULL;
199 }
200
LinkSetToLocalLinkSet(LinkSetPtr newlsp)201 static LocalLinkSetPtr LinkSetToLocalLinkSet(LinkSetPtr newlsp)
202 {
203 LocalLinkSetPtr lsp;
204
205 if (newlsp != NULL)
206 {
207 lsp = LocalLinkSetNew();
208 lsp->num = (Uint4) newlsp->num;
209 lsp->uids = (DocUidPtr) MemDup(newlsp->uids, sizeof(DocUid) * lsp->num);
210 lsp->weights = (DocUidPtr) MemDup(newlsp->weights, sizeof(Int4) * lsp->num);
211 }
212 return lsp;
213 }
214
215 static void
DoOutput(CharPtr term,Int2 depth,Boolean showTerminal)216 DoOutput(CharPtr term, Int2 depth, Boolean showTerminal)
217 {
218 Int2 i;
219
220 for (i = 1; i <= depth; i++)
221 fprintf(master_fp, i < depth ? "-" : ( showTerminal ? "*" : "-"));
222 fprintf (master_fp, "%s\n", term);
223 }
224
225 static void
PreOrderTaxTraversal(EntrezHierarchyPtr ehp,Int2 depth,DocType db,DocField fld,Int2 maxDepth)226 PreOrderTaxTraversal(EntrezHierarchyPtr ehp, Int2 depth, DocType db, DocField fld, Int2 maxDepth)
227 {
228 Int2 i;
229 EntrezHierarchyPtr child;
230
231 DoOutput(ehp->term, depth, depth >= maxDepth);
232
233 if (depth >= maxDepth)
234 return;
235
236 for (i = 0; i < ehp->numChildren; i++)
237 {
238 if (ehp->children[i].isLeafNode)
239 { /* no need to move down tree, since all information is here */
240 DoOutput(ehp->children[i].name, depth + 1, FALSE);
241 } else {
242 child = EntrezHierarchyGet(ehp->children[i].name, db,
243 fld);
244 if (child != NULL)
245 {
246 PreOrderTaxTraversal(child, depth + 1, db, fld, maxDepth);
247 EntrezHierarchyFree(child);
248 }
249 }
250 }
251 }
252
253
254 /* find the last nucleotide bioseq in the bioseqset */
FindANuc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)255 static void FindANuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
256 {
257 BioseqPtr PNTR bp;
258 BioseqPtr local_bsp;
259
260 bp = (BioseqPtr PNTR) data;
261 if (IS_Bioseq(sep))
262 {
263 local_bsp = (BioseqPtr) sep->data.ptrvalue;
264 if (ISA_na(local_bsp->mol))
265 *bp = local_bsp;
266 }
267 }
268
269 /* find the last protein bioseq in the bioseqset */
FindAProt(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)270 static void FindAProt(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
271 {
272 BioseqPtr PNTR bp;
273 BioseqPtr local_bsp;
274
275 bp = (BioseqPtr PNTR) data;
276 if (IS_Bioseq(sep))
277 {
278 local_bsp = (BioseqPtr) sep->data.ptrvalue;
279 if (ISA_aa(local_bsp->mol))
280 *bp = local_bsp;
281 }
282 }
283
IsGenBank(SeqEntryPtr sep)284 static Boolean IsGenBank (SeqEntryPtr sep)
285 {
286 BioseqPtr bsp;
287 Uint1 repr;
288 Boolean rsult;
289
290 rsult = FALSE;
291 if (sep->choice == 1) {
292 bsp = (BioseqPtr) sep->data.ptrvalue;
293 repr = Bioseq_repr (bsp);
294 if (repr == Seq_repr_raw || repr == Seq_repr_const) {
295 if (ISA_na (bsp->mol)) {
296 rsult = TRUE;
297 } else {
298 Message (MSG_ERROR, "Protein record cannot be viewed in GenBank form.");
299 }
300 } else {
301 Message (MSG_ERROR, "Bad sequence repr %d", (int) repr);
302 }
303 } else {
304 rsult = TRUE;
305 }
306 return rsult;
307 }
308
309
310 static void
PrintGenbank(SeqEntryPtr sep,Boolean isprot)311 PrintGenbank(SeqEntryPtr sep, Boolean isprot)
312 {
313 SeqEntryToFlat(sep, master_fp, isprot ? GENPEPT_FMT : GENBANK_FMT,
314 RELEASE_MODE);
315
316 FilePuts ("\n\n", master_fp);
317 }
318
PrintDSP(DocSumPtr dsp,DocUid uid)319 static Boolean PrintDSP(DocSumPtr dsp, DocUid uid)
320 {
321
322 Int2 titleLen;
323 Int2 size;
324 CharPtr pEnd;
325 CharPtr pStart;
326
327 if (dsp == NULL)
328 return TRUE;
329
330 pStart = dsp->title;
331 titleLen = StrLen(pStart);
332
333 if ( titleLen <= 55 ) {
334 size = titleLen;
335 } else {
336 pEnd = pStart + 55;
337 while (IS_WHITESP(*pEnd) == FALSE)
338 pEnd -=1;
339 size = pEnd - pStart;
340 }
341
342 fprintf (master_fp, "%-20.20s %-*.*s\n",dsp->caption,size,size,pStart);
343 fprintf (master_fp, "%c",(dsp->no_abstract ? ' ' : '*'));
344 fprintf (master_fp, " ");
345
346 pStart = pStart + size;
347
348 while ((titleLen = StrLen(pStart)) > 0 ) {
349 if ( titleLen <= 55 ) {
350 size = titleLen;
351 } else {
352 pEnd = pStart + 55;
353 while (IS_WHITESP(*pEnd) == FALSE)
354 pEnd -=1;
355 size = pEnd - pStart;
356 }
357
358 fprintf (master_fp, "%-*.*s\n",size,size,pStart+1);
359 fprintf (master_fp, "%-21.21s","");
360 pStart = pStart + size;
361 }
362
363 fprintf (master_fp, "\n");
364
365 DocSumFree(dsp);
366 return TRUE;
367 }
368
PrintDSPMwww(DocSumPtr dsp,DocUid uid)369 static Boolean PrintDSPMwww(DocSumPtr dsp, DocUid uid)
370 {
371 CharPtr p;
372 Boolean noNeighbors = FALSE;
373 LocalLinkSetPtr lsp;
374 Int2 medNeighbors;
375 Int2 protNeighbors;
376 Int2 nucNeighbors;
377
378 if (dsp == NULL)
379 return TRUE;
380
381 fprintf (master_fp, "<DL>\n<DT>\n");
382 if (useForms)
383 {
384 fprintf (master_fp, "<inPUT TYPE=\"checkbox\" NAME=\"nei\" VALUE=\"%d\"%s>\n", uid,
385 checkForms ? " CHECKED" : "");
386 }
387 if (TRUE /* used to be non-FORMS only */ )
388 {
389 LinkSetPtr lsp;
390
391 lsp = NULL;
392 EntrezLinkUidList(&lsp, TYP_ML, TYP_ML, 1, &uid, FALSE);
393 if (lsp != NULL)
394 {
395 medNeighbors = lsp->num;
396 LinkSetFree(lsp);
397 lsp = NULL;
398 } else {
399 medNeighbors = 0;
400 }
401 EntrezLinkUidList(&lsp, TYP_ML, TYP_AA, 1, &uid, FALSE);
402 if (lsp != NULL)
403 {
404 protNeighbors = lsp->num;
405 LinkSetFree(lsp);
406 lsp = NULL;
407 } else {
408 protNeighbors = 0;
409 }
410 EntrezLinkUidList(&lsp, TYP_ML, TYP_NT, 1, &uid, FALSE);
411 if (lsp != NULL)
412 {
413 nucNeighbors = lsp->num;
414 LinkSetFree(lsp);
415 lsp = NULL;
416 } else {
417 nucNeighbors = 0;
418 }
419 noNeighbors = !medNeighbors && !protNeighbors && !nucNeighbors;
420 }
421
422 fprintf (master_fp, "%s\n<BR><DD>\n", dsp->caption);
423 for (p = dsp->title; *p; p++)
424 {
425 switch (*p)
426 {
427 case '&': fprintf (master_fp, "&"); break;
428 case '<': fprintf (master_fp, "<"); break;
429 case '>': fprintf (master_fp, ">"); break;
430 default: fprintf (master_fp, "%c", *p);
431 }
432 }
433 fprintf (master_fp, "<I>");
434 if (dsp->no_abstract)
435 {
436 fprintf (master_fp, " (no abstract available)");
437 }
438 fprintf (master_fp, " (View ");
439 fprintf (master_fp, "<A HREF=\"%s/entrezmr?%d\">Report format</A>,\n", wwwPrefix, uid);
440 fprintf (master_fp, "<A HREF=\"%s/entrezml?%d\">MEDLARS format</A>,\n", wwwPrefix, uid);
441 if (noNeighbors)
442 {
443 fprintf (master_fp, "or ");
444 }
445 fprintf (master_fp, "<A HREF=\"%s/entrezma?%d\">ASN.1 format</A>", wwwPrefix, uid);
446 if (! noNeighbors)
447 {
448 if (medNeighbors != 0)
449 fprintf (master_fp, ", %s<A HREF=\"%s/entrezmmnei?%d\">%d MEDLINE neighbor%s</A>\n", !protNeighbors && !nucNeighbors ? "or " : "", wwwPrefix, uid, medNeighbors, medNeighbors == 1 ? "" : "s");
450 if (protNeighbors != 0)
451 fprintf (master_fp, ", %s<A HREF=\"%s/entrezmpnei?%d\">%d Protein link%s</A>\n", !nucNeighbors ? "or " : "", wwwPrefix, uid, protNeighbors, protNeighbors == 1 ? "" : "s");
452 if (nucNeighbors != 0)
453 fprintf (master_fp, ", or <A HREF=\"%s/entrezmnnei?%d\">%d Nucleotide link%s</A>\n", wwwPrefix, uid, nucNeighbors, nucNeighbors == 1 ? "" : "s");
454 }
455 fprintf (master_fp, ")\n<P></I></DL>\n");
456
457 DocSumFree(dsp);
458 return TRUE;
459 }
460
PrintDSPNwww(DocSumPtr dsp,DocUid uid)461 static Boolean PrintDSPNwww(DocSumPtr dsp, DocUid uid)
462 {
463 CharPtr p;
464 Boolean noNeighbors = FALSE;
465 LocalLinkSetPtr lsp;
466 Int2 medNeighbors;
467 Int2 protNeighbors;
468 Int2 nucNeighbors;
469 Int4 weight;
470 Int2 i;
471
472 if (dsp == NULL)
473 return TRUE;
474
475 fprintf (master_fp, "<DL>\n<DT>\n");
476 if (useForms)
477 {
478 fprintf (master_fp, "<inPUT TYPE=\"checkbox\" NAME=\"nei\" VALUE=\"%d\"%s>\n", uid,
479 checkForms ? " CHECKED" : "");
480 }
481 if (TRUE /* used to be non-FORMS only */ )
482 {
483 LinkSetPtr lsp;
484
485 lsp = NULL;
486 EntrezLinkUidList(&lsp, TYP_NT, TYP_ML, 1, &uid, FALSE);
487 if (lsp != NULL)
488 {
489 medNeighbors = lsp->num;
490 LinkSetFree(lsp);
491 lsp = NULL;
492 } else {
493 medNeighbors = 0;
494 }
495 EntrezLinkUidList(&lsp, TYP_NT, TYP_AA, 1, &uid, FALSE);
496 if (lsp != NULL)
497 {
498 protNeighbors = lsp->num;
499 LinkSetFree(lsp);
500 lsp = NULL;
501 } else {
502 protNeighbors = 0;
503 }
504 EntrezLinkUidList(&lsp, TYP_NT, TYP_NT, 1, &uid, FALSE);
505 if (lsp != NULL)
506 {
507 nucNeighbors = lsp->num;
508 LinkSetFree(lsp);
509 lsp = NULL;
510 } else {
511 nucNeighbors = 0;
512 }
513 noNeighbors = !medNeighbors && !protNeighbors && !nucNeighbors;
514 }
515
516 fprintf (master_fp, "%s\n<BR><DD>\n", dsp->caption);
517 for (p = dsp->title; *p; p++)
518 {
519 switch (*p)
520 {
521 case '&': fprintf (master_fp, "&"); break;
522 case '<': fprintf (master_fp, "<"); break;
523 case '>': fprintf (master_fp, ">"); break;
524 default: fprintf (master_fp, "%c", *p);
525 }
526 }
527 fprintf (master_fp, "<I>");
528 weight = -1;
529 if (pubLsp != NULL && pubLsp->weights != NULL)
530 {
531 for (i = 0; i < pubLsp->num; i++)
532 {
533 if (pubLsp->uids[i] == uid)
534 weight = pubLsp->weights[i];
535 }
536 }
537 if (weight > 1)
538 {
539 fprintf (master_fp, " (Similarity score %d)", weight);
540 }
541 fprintf (master_fp, " (View ");
542 fprintf (master_fp, "<A HREF=\"%s/entreznr?%d\">Report format</A>,\n", wwwPrefix, uid);
543 fprintf (master_fp, "<A HREF=\"%s/entrezng?%d\">GenBank format</A>,\n", wwwPrefix, uid);
544 fprintf (master_fp, "<A HREF=\"%s/entreznf?%d\">FASTA format</A>,\n", wwwPrefix, uid);
545 if (noNeighbors)
546 {
547 fprintf (master_fp, "or ");
548 }
549 fprintf (master_fp, "<A HREF=\"%s/entrezna?%d\">ASN.1 format</A>", wwwPrefix, uid);
550 if (! noNeighbors)
551 {
552 if (medNeighbors != 0)
553 fprintf (master_fp, ", %s<A HREF=\"%s/entreznmnei?%d\">%d MEDLINE link%s</A>\n", !protNeighbors && !nucNeighbors ? "or " : "", wwwPrefix, uid, medNeighbors, medNeighbors == 1 ? "" : "s");
554 if (protNeighbors != 0)
555 fprintf (master_fp, ", %s<A HREF=\"%s/entreznpnei?%d\">%d Protein link%s</A>\n", !nucNeighbors ? "or " : "", wwwPrefix, uid, protNeighbors, protNeighbors == 1 ? "" : "s");
556 if (nucNeighbors != 0)
557 fprintf (master_fp, ", or <A HREF=\"%s/entreznnnei?%d\">%d Nucleotide neighbor%s</A>\n", wwwPrefix, uid, nucNeighbors, nucNeighbors == 1 ? "" : "s");
558 }
559 fprintf (master_fp, ")<P></I></DL>\n");
560
561 DocSumFree(dsp);
562 return TRUE;
563 }
564
PrintDSPPwww(DocSumPtr dsp,DocUid uid)565 static Boolean PrintDSPPwww(DocSumPtr dsp, DocUid uid)
566 {
567 CharPtr p;
568 Boolean noNeighbors = FALSE;
569 LocalLinkSetPtr lsp;
570 Int2 medNeighbors;
571 Int2 protNeighbors;
572 Int2 nucNeighbors;
573 Int4 weight;
574 Int2 i;
575
576 if (dsp == NULL)
577 return TRUE;
578
579 fprintf (master_fp, "<DL>\n<DT>\n");
580 if (useForms)
581 {
582 fprintf (master_fp, "<inPUT TYPE=\"checkbox\" NAME=\"nei\" VALUE=\"%d\"%s>\n", uid,
583 checkForms ? " CHECKED" : "");
584 }
585 if (TRUE /* used to be non-FORMS only */ )
586 {
587 LinkSetPtr lsp;
588
589 lsp = NULL;
590 EntrezLinkUidList(&lsp, TYP_AA, TYP_ML, 1, &uid, FALSE);
591 if (lsp != NULL)
592 {
593 medNeighbors = lsp->num;
594 LinkSetFree(lsp);
595 lsp = NULL;
596 } else {
597 medNeighbors = 0;
598 }
599 EntrezLinkUidList(&lsp, TYP_AA, TYP_AA, 1, &uid, FALSE);
600 if (lsp != NULL)
601 {
602 protNeighbors = lsp->num;
603 LinkSetFree(lsp);
604 lsp = NULL;
605 } else {
606 protNeighbors = 0;
607 }
608 EntrezLinkUidList(&lsp, TYP_AA, TYP_NT, 1, &uid, FALSE);
609 if (lsp != NULL)
610 {
611 nucNeighbors = lsp->num;
612 LinkSetFree(lsp);
613 lsp = NULL;
614 } else {
615 nucNeighbors = 0;
616 }
617 noNeighbors = !medNeighbors && !protNeighbors && !nucNeighbors;
618 }
619
620 fprintf (master_fp, "%s\n<BR><DD>\n", dsp->caption);
621 for (p = dsp->title; *p; p++)
622 {
623 switch (*p)
624 {
625 case '&': fprintf (master_fp, "&"); break;
626 case '<': fprintf (master_fp, "<"); break;
627 case '>': fprintf (master_fp, ">"); break;
628 default: fprintf (master_fp, "%c", *p);
629 }
630 }
631 fprintf (master_fp, "<I>");
632 weight = -1;
633 if (pubLsp != NULL && pubLsp->weights != NULL)
634 {
635 for (i = 0; i < pubLsp->num; i++)
636 {
637 if (pubLsp->uids[i] == uid)
638 weight = pubLsp->weights[i];
639 }
640 }
641 if (weight > 1)
642 {
643 fprintf (master_fp, " (Similarity score %d)", weight);
644 }
645 fprintf (master_fp, " (View ");
646 fprintf (master_fp, "<A HREF=\"%s/entrezpr?%d\">Report format</A>,\n", wwwPrefix, uid);
647 fprintf (master_fp, "<A HREF=\"%s/entrezpf?%d\">FASTA format</A>,\n", wwwPrefix, uid);
648 if (noNeighbors)
649 {
650 fprintf (master_fp, "or ");
651 }
652 fprintf (master_fp, "<A HREF=\"%s/entrezpa?%d\">ASN.1 format</A>", wwwPrefix, uid);
653 if (! noNeighbors)
654 {
655 if (medNeighbors != 0)
656 fprintf (master_fp, ", %s<A HREF=\"%s/entrezpmnei?%d\">%d MEDLINE link%s</A>\n", !protNeighbors && !nucNeighbors ? "or " : "", wwwPrefix, uid, medNeighbors, medNeighbors == 1 ? "" : "s");
657 if (protNeighbors != 0)
658 fprintf (master_fp, ", %s<A HREF=\"%s/entrezppnei?%d\">%d Protein neighbor%s</A>\n", !nucNeighbors ? "or " : "", wwwPrefix, uid, protNeighbors, protNeighbors == 1 ? "" : "s");
659 if (nucNeighbors != 0)
660 fprintf (master_fp, ", or <A HREF=\"%s/entrezpnnei?%d\">%d Nucleotide link%s</A>\n", wwwPrefix, uid, nucNeighbors, nucNeighbors == 1 ? "" : "s");
661 }
662 fprintf (master_fp, ")<P></I></DL>\n");
663
664 DocSumFree(dsp);
665 return TRUE;
666 }
667
668 static void
ReportBadType(DocType db,CharPtr outputSpec)669 ReportBadType (DocType db, CharPtr outputSpec)
670 {
671 Message(MSG_POST, "Invalid output format \"%s\" for database \"%s\"",
672 outputSpec, db == TYP_ML ? "MEDLINE" : (db == TYP_NT ?
673 "Nucleotide" : (db == TYP_AA ? "Protein" : (db == TYP_CH ?
674 "Genome" : "unknown"))));
675 }
676
677 static Boolean
ProcessOutput(LocalLinkSetPtr lsp,DocType db,CharPtr outputSpec,long processingCount,long totalCount,Boolean parseOnly)678 ProcessOutput(LocalLinkSetPtr lsp, DocType db, CharPtr outputSpec, long processingCount, long totalCount, Boolean parseOnly)
679 {
680 long i;
681 AsnIoPtr aip;
682 MedlineEntryPtr mep;
683 SeqEntryPtr sep;
684 SeqIdPtr sip;
685 Char seqIdBuf[256];
686
687 if (StringCmp(outputSpec, "") == 0 || StringCmp(outputSpec, "no") == 0)
688 return TRUE;
689 if (StringCmp(outputSpec, "mc") == 0)
690 {
691 if (db != TYP_ML)
692 {
693 ReportBadType(db, outputSpec);
694 return FALSE;
695 }
696 if (! parseOnly)
697 {
698 fprintf (master_fp, "%ld\n", totalCount);
699 }
700 return TRUE;
701 }
702 if (StringCmp(outputSpec, "mu") == 0)
703 {
704 if (db != TYP_ML)
705 {
706 ReportBadType(db, outputSpec);
707 return FALSE;
708 }
709 if (! parseOnly && lsp != NULL)
710 {
711 if (exportFilePtr != NULL)
712 {
713 CharPtr str = "garbage";
714
715 switch(db) {
716 case TYP_ML: str = "MEDLINE"; break;
717 case TYP_AA: str = "protein"; break;
718 case TYP_NT: str = "nucleotide"; break;
719 case TYP_CH: str = "genome"; break;
720 }
721 fprintf(exportFilePtr, ">%s\n", str);
722
723 for (i = 0; i < processingCount; i++)
724 {
725 fprintf(exportFilePtr, "%d\n", lsp->uids[i]);
726 }
727 FileClose(exportFilePtr);
728 exportFilePtr = NULL;
729 } else {
730 fprintf (master_fp, "\n");
731 for (i = 0; i < processingCount; i++)
732 {
733 fprintf (master_fp, "%d\n", lsp->uids[i]);
734 }
735 fprintf (master_fp, "\n");
736 fflush(master_fp);
737 }
738 }
739 return TRUE;
740 }
741 if (StringCmp(outputSpec, "mz") == 0)
742 { /* analyze */
743 if (db != TYP_ML)
744 {
745 ReportBadType(db, outputSpec);
746 return FALSE;
747 }
748 if (! parseOnly && lsp != NULL)
749 {
750 CharPtr terms[20];
751 Int4 termTotals[20];
752 Int4 count;
753
754 count = EntrezClusterAnalysis(lsp->uids, lsp->num, FLD_WORD, 0, INT2_MAX, 20, terms, termTotals);
755 fprintf (master_fp, "Analysis resulted in %d terms\n\n", (int) count);
756 for (i = 0; i < count; i++)
757 {
758 fprintf (master_fp, "%s %ld\n", terms[i], (long) termTotals[i]);
759 MemFree (terms[i]);
760 }
761 }
762 return TRUE;
763 }
764 if (StringCmp(outputSpec, "md") == 0)
765 {
766 if (db != TYP_ML)
767 {
768 ReportBadType(db, outputSpec);
769 return FALSE;
770 }
771 if (! parseOnly && lsp != NULL)
772 {
773 if (wwwPrefix != NULL && processingCount < totalCount)
774 {
775 fprintf (master_fp, "Warning: only %ld document summaries are being displayed\n", processingCount);
776 fprintf (master_fp, "out of %ld total entries.<P>\n", totalCount);
777 }
778 EntrezDocSumListGet((Int2) processingCount, db, lsp->uids,
779 wwwPrefix == NULL ? PrintDSP : PrintDSPMwww);
780 }
781 return TRUE;
782 }
783 if (StringCmp(outputSpec, "mr") == 0)
784 {
785 if (db != TYP_ML)
786 {
787 ReportBadType(db, outputSpec);
788 return FALSE;
789 }
790 if (! parseOnly && lsp != NULL)
791 {
792 for (i = 0; i < processingCount; i++)
793 {
794 mep = EntrezMedlineEntryGet(lsp->uids[i]);
795 if (mep != NULL)
796 {
797 MedlineEntryToDocFile(mep, master_fp);
798 MedlineEntryFree(mep);
799 fprintf (master_fp, "\n\n");
800 }
801 }
802 fflush(master_fp);
803 }
804 return TRUE;
805 }
806 if (StringCmp(outputSpec, "ma") == 0)
807 {
808 if (db != TYP_ML)
809 {
810 ReportBadType(db, outputSpec);
811 return FALSE;
812 }
813 if (! parseOnly && lsp != NULL)
814 {
815 aip = AsnIoNew(ASNIO_TEXT_OUT, master_fp, NULL, NULL, NULL);
816 for (i = 0; i < processingCount; i++)
817 {
818 mep = EntrezMedlineEntryGet(lsp->uids[i]);
819 if (mep != NULL)
820 {
821 MedlineEntryAsnWrite(mep, aip, NULL);
822 AsnIoReset(aip);
823 MedlineEntryFree(mep);
824 }
825 }
826 AsnIoClose(aip);
827 }
828 return TRUE;
829 }
830 if (StringCmp(outputSpec, "ml") == 0)
831 {
832 if (db != TYP_ML)
833 {
834 ReportBadType(db, outputSpec);
835 return FALSE;
836 }
837 if (! parseOnly && lsp != NULL)
838 {
839 for (i = 0; i < processingCount; i++)
840 {
841 mep = EntrezMedlineEntryGet(lsp->uids[i]);
842 if (mep != NULL)
843 {
844 MedlineEntryToDataFile(mep, master_fp);
845 fprintf (master_fp, "\n");
846 MedlineEntryFree(mep);
847 }
848 }
849 }
850 return TRUE;
851 }
852
853
854 if (StringCmp(outputSpec, "sc") == 0)
855 {
856 if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
857 {
858 ReportBadType(db, outputSpec);
859 return FALSE;
860 }
861 if (! parseOnly)
862 {
863 fprintf (master_fp, "%ld\n", totalCount);
864 }
865 return TRUE;
866 }
867 if (StringCmp(outputSpec, "su") == 0)
868 {
869 if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
870 {
871 ReportBadType(db, outputSpec);
872 return FALSE;
873 }
874 if (! parseOnly && lsp != NULL)
875 {
876 if (exportFilePtr != NULL)
877 {
878 CharPtr str = "garbage";
879
880 switch(db) {
881 case TYP_ML: str = "MEDLINE"; break;
882 case TYP_AA: str = "protein"; break;
883 case TYP_NT: str = "nucleotide"; break;
884 case TYP_CH: str = "genome"; break;
885 }
886 fprintf(exportFilePtr, ">%s\n", str);
887
888 for (i = 0; i < processingCount; i++)
889 {
890 fprintf(exportFilePtr, "%d\n", lsp->uids[i]);
891 }
892 FileClose(exportFilePtr);
893 exportFilePtr = NULL;
894 } else {
895 fprintf (master_fp, "\n");
896 for (i = 0; i < processingCount; i++)
897 {
898 fprintf (master_fp, "%d\n", lsp->uids[i]);
899 }
900 fprintf (master_fp, "\n");
901 fflush(master_fp);
902 }
903 }
904 return TRUE;
905 }
906 if (StringCmp(outputSpec, "sd") == 0)
907 {
908 if (db != TYP_NT && db != TYP_AA)
909 {
910 ReportBadType(db, outputSpec);
911 return FALSE;
912 }
913 if (! parseOnly && lsp != NULL)
914 {
915 if (wwwPrefix != NULL && processingCount < totalCount)
916 {
917 fprintf (master_fp, "Warning: only %ld document summaries are being displayed\n", processingCount);
918 fprintf (master_fp, "out of %ld total entries.<P>\n", totalCount);
919 }
920 pubLsp = lsp;
921 EntrezDocSumListGet((Int2) processingCount, db, lsp->uids,
922 wwwPrefix == NULL ? PrintDSP : (db == TYP_NT ?
923 PrintDSPNwww : PrintDSPPwww));
924 pubLsp = NULL;
925 }
926 return TRUE;
927 }
928 if (StringCmp(outputSpec, "sa") == 0)
929 {
930 if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
931 {
932 ReportBadType(db, outputSpec);
933 return FALSE;
934 }
935 if (! parseOnly && lsp != NULL)
936 {
937 aip = AsnIoNew(ASNIO_TEXT_OUT, master_fp, NULL, NULL, NULL);
938 for (i = 0; i < processingCount; i++)
939 {
940 sep = EntrezSeqEntryGet(lsp->uids[i], db == TYP_CH ? -1 : seqEntryRetval);
941 if (sep != NULL)
942 {
943 SeqEntryAsnWrite(sep, aip, NULL);
944 AsnIoReset(aip);
945 SeqEntryFree(sep);
946 }
947 }
948 AsnIoClose(aip);
949 }
950 return TRUE;
951 }
952 if (StringCmp(outputSpec, "sg") == 0 || StringCmp(outputSpec, "sr") == 0)
953 {
954 if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
955 {
956 ReportBadType(db, outputSpec);
957 return FALSE;
958 }
959 if (! parseOnly && lsp != NULL)
960 {
961 for (i = 0; i < processingCount; i++)
962 {
963 sep = EntrezSeqEntryGet(lsp->uids[i], db == TYP_CH ? -1 : seqEntryRetval);
964 if (sep != NULL)
965 {
966 PrintGenbank(sep, db == TYP_AA);
967 SeqEntryFree(sep);
968 }
969 }
970 }
971 return TRUE;
972 }
973 if (StringCmp(outputSpec, "sf") == 0)
974 {
975 if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
976 {
977 ReportBadType(db, outputSpec);
978 return FALSE;
979 }
980 if (! parseOnly && lsp != NULL)
981 {
982 for (i = 0; i < processingCount; i++)
983 {
984 sep = EntrezSeqEntryGet(lsp->uids[i], db == TYP_CH ? -1 : seqEntryRetval);
985 if (sep != NULL)
986 {
987 SeqEntryConvert (sep, Seq_code_iupacna);
988 SeqEntryToFasta (sep, master_fp, db == TYP_NT);
989 SeqEntryFree(sep);
990 }
991 fprintf (master_fp, "\n");
992 }
993
994 }
995 return TRUE;
996 }
997 if (StringCmp(outputSpec, "si") == 0)
998 {
999 if (db != TYP_NT && db != TYP_AA && db != TYP_CH)
1000 {
1001 ReportBadType(db, outputSpec);
1002 return FALSE;
1003 }
1004 if (! parseOnly && lsp != NULL)
1005 {
1006 for (i = 0; i < processingCount; i++)
1007 {
1008 sip = EntrezSeqIdForGI(lsp->uids[i]);
1009 if (sip != NULL)
1010 {
1011 SeqIdWrite(sip, seqIdBuf, PRINTID_FASTA_LONG, sizeof seqIdBuf);
1012
1013 fprintf (master_fp, "%s\n", seqIdBuf);
1014 }
1015 }
1016 }
1017 return TRUE;
1018 }
1019
1020
1021 Message(MSG_POST, "Unknown output format \"%s\"", outputSpec);
1022 return FALSE;
1023 }
1024
1025 static Int2
RunProgram(CharPtr programStr,LocalLinkSetPtr lsp,DocType db,Boolean parseOnly)1026 RunProgram(CharPtr programStr, LocalLinkSetPtr lsp, DocType db, Boolean parseOnly)
1027 {
1028 Int1 wrongDelim = '.';
1029 DocType newdb;
1030 Int2 len;
1031 CharPtr c;
1032 Char outputSpec[3];
1033 long processingCount;
1034 Char numStr[12];
1035 Int2 count;
1036 Int2 numToCopy;
1037 LinkSetPtr newlsp;
1038
1039 if (programStr == NULL)
1040 {
1041 return -1;
1042 }
1043
1044 if (lsp == NULL && !parseOnly)
1045 {
1046 return -1;
1047 }
1048
1049 len = StrLen(programStr);
1050 outputSpec[2] = '\0';
1051 c = programStr;
1052
1053 for (c = programStr; c < programStr + len; c += count + 1)
1054 {
1055 count = StrCSpn(c, ",.");
1056 if (c[count] == wrongDelim)
1057 {
1058 Message(MSG_POST, "Invalid delimiter");
1059 /* offset to offending delimeter */
1060 return (count + 1 + c - programStr);
1061 }
1062
1063 processingCount = INT4_MAX;
1064
1065 if (wrongDelim == '.')
1066 { /* process output */
1067 switch (count) {
1068 case 0:
1069 outputSpec[0] = '\0';
1070 break;
1071 case 1:
1072 Message(MSG_POST, "Invalid output specification \"%c\"", c[1]);
1073 return ( 2 + c - programStr);
1074 case 2:
1075 outputSpec[0] = c[0];
1076 outputSpec[1] = c[1];
1077 break;
1078 default:
1079 outputSpec[0] = c[0];
1080 outputSpec[1] = c[1];
1081 numToCopy = MIN(count - 2, sizeof(numStr) - 1);
1082 StrNCpy(numStr, c + 2, numToCopy);
1083 numStr[numToCopy] = '\0';
1084 if ((int) StrSpn(numStr, "0123456789") != (int) numToCopy)
1085 {
1086 Message(MSG_POST, "Non-numeric character detected");
1087 return ( count + c - programStr);
1088 }
1089 sscanf(numStr, "%ld", &processingCount);
1090 break;
1091 }
1092 if (lsp != NULL && !parseOnly)
1093 {
1094 processingCount = MIN(processingCount, lsp->num);
1095 }
1096 if (! ProcessOutput(lsp, db, outputSpec, processingCount,
1097 lsp != NULL ? lsp->num : processingCount,
1098 parseOnly))
1099 {
1100 /* note that error will be posted by ProcessOutput() */
1101 return ( 3 + c - programStr);
1102 }
1103 } else { /* process neighboring */
1104 if (count == 0)
1105 {
1106 Message(MSG_POST, "Null neighboring specification");
1107 return ( 1 + c - programStr);
1108 }
1109 if (count > 1)
1110 {
1111 numToCopy = MIN(count - 1, sizeof(numStr) - 1);
1112 StrNCpy(numStr, c + 1, numToCopy);
1113 numStr[numToCopy] = '\0';
1114 if ((int) StrSpn(numStr, "0123456789") != (int) numToCopy)
1115 {
1116 Message(MSG_POST, "Non-numeric character detected");
1117 return ( count + c - programStr);
1118 }
1119 sscanf(numStr, "%ld", &processingCount);
1120 }
1121 switch (*c) {
1122 case 'p':
1123 newdb = TYP_AA;
1124 break;
1125 case 'm':
1126 newdb = TYP_ML;
1127 break;
1128 case 'n':
1129 newdb = TYP_NT;
1130 break;
1131 case 'g':
1132 newdb = TYP_CH;
1133 break;
1134 default:
1135 Message(MSG_POST, "Invalid neighboring specification <%s>", *c);
1136 return ( 1 + c - programStr);
1137 }
1138
1139 if (lsp != NULL && !parseOnly)
1140 {
1141 processingCount = MIN(processingCount, lsp->num);
1142 newlsp = NULL;
1143 EntrezLinkUidList(&newlsp, db, newdb, (Int2) processingCount, lsp->uids, FALSE);
1144 LocalLinkSetFree(lsp);
1145 lsp = LinkSetToLocalLinkSet(newlsp);
1146 LinkSetFree(newlsp);
1147 }
1148 db = newdb;
1149 }
1150
1151 wrongDelim = wrongDelim == '.' ? ',' : '.';
1152 }
1153
1154 if (lsp != NULL && !parseOnly)
1155 LocalLinkSetFree(lsp);
1156
1157 return 0;
1158 }
1159
1160 static void
DumpTerm(CharPtr term,Int4 special,Int4 total)1161 DumpTerm (CharPtr term, Int4 special, Int4 total)
1162 {
1163 switch (termDisplay) {
1164 case DISPLAY_SPECIAL_AND_TOTAL:
1165 fprintf (master_fp, "%s\t%ld\t%ld\n", term, (long) special, (long) total);
1166 break;
1167 case DISPLAY_TOTAL_ONLY:
1168 fprintf (master_fp, "%s\t%ld\n", term, (long) total);
1169 break;
1170 case DISPLAY_TERM_ONLY:
1171 fprintf (master_fp, "%s\n", term);
1172 break;
1173 }
1174 }
1175
1176 static Boolean
beginTermProc(CharPtr term,Int4 special,Int4 total)1177 beginTermProc(CharPtr term, Int4 special, Int4 total)
1178 {
1179 if (term != NULL)
1180 {
1181 DumpTerm(term, special, total);
1182 MemFree (term);
1183 return TRUE;
1184 } else {
1185 return FALSE;
1186 }
1187 }
1188
1189 static Boolean
findOneTermProc(CharPtr term,Int4 special,Int4 total)1190 findOneTermProc(CharPtr term, Int4 special, Int4 total)
1191 {
1192 if (term != NULL)
1193 {
1194 MemFree (term);
1195 return TRUE;
1196 } else {
1197 return FALSE;
1198 }
1199 }
1200
1201 static Boolean
collectNumTermsProc(CharPtr term,Int4 special,Int4 total)1202 collectNumTermsProc(CharPtr term, Int4 special, Int4 total)
1203 {
1204 static Boolean inited = FALSE;
1205 static TermAndCounts PNTR arrayOfTerm = NULL;
1206 static Int2 head;
1207 static Boolean sawOurTerm;
1208 Int4 i;
1209
1210 if (special == -1)
1211 { /* flag indicating reset */
1212 for (i = 0; i < termsBefore; i++)
1213 {
1214 MemFree(arrayOfTerm[i].term);
1215 }
1216 MemFree(arrayOfTerm);
1217 arrayOfTerm = NULL;
1218 inited = FALSE;
1219 return TRUE;
1220 }
1221
1222 if (term == NULL)
1223 {
1224 return FALSE;
1225 }
1226
1227 if (! inited)
1228 {
1229 inited = TRUE;
1230 arrayOfTerm = MemNew(termsBefore * sizeof(TermAndCounts));
1231 for (i = 0; i < termsBefore; i++)
1232 {
1233 arrayOfTerm[i].term = NULL;
1234 }
1235 head = 0;
1236 sawOurTerm = FALSE;
1237 }
1238
1239 if (sawOurTerm)
1240 {
1241 DumpTerm (term, special, total);
1242 MemFree (term);
1243 if (--head <= 0)
1244 return FALSE; /* no more terms, please */
1245 else
1246 return TRUE;
1247 } else {
1248 if (StringICmp(term, theTerm) >= 0)
1249 {
1250 sawOurTerm = TRUE;
1251 for (i = 0; i < termsBefore; i++)
1252 {
1253 if (arrayOfTerm[i].term == NULL)
1254 {
1255 head = 0; /* didn't wrap around */
1256 break;
1257 }
1258 }
1259
1260 /* print out the queue */
1261 i = head;
1262 do {
1263 if (arrayOfTerm[i].term == NULL)
1264 break;
1265 DumpTerm(arrayOfTerm[i].term, arrayOfTerm[i].special,
1266 arrayOfTerm[i].total);
1267 MemFree (arrayOfTerm[i].term);
1268 arrayOfTerm[i].term = NULL;
1269 i = (i + 1) % termsBefore;
1270 } while (i != head);
1271 /* number of remaining records to be displayed after this one */
1272 head = numTerms - (termsBefore + 1);
1273 DumpTerm (term, special, total);
1274 MemFree (term);
1275 return TRUE;
1276 }
1277 }
1278
1279 if (arrayOfTerm[head].term != NULL)
1280 {
1281 MemFree(arrayOfTerm[head].term);
1282 }
1283 arrayOfTerm[head].term = term;
1284 arrayOfTerm[head].special = special;
1285 arrayOfTerm[head].total = total;
1286 head = (head + 1) % termsBefore;
1287 return TRUE;
1288 }
1289
1290
1291 static Boolean
TermProcessing(CharPtr programStr,CharPtr termString,DocType db,Boolean parseOnly)1292 TermProcessing(CharPtr programStr, CharPtr termString, DocType db, Boolean parseOnly)
1293 {
1294 Boolean centerOnTerm = FALSE;
1295 Boolean beginWithTerm = FALSE;
1296 Boolean endWithTerm = FALSE;
1297 Boolean inclusive;
1298 Char fldStr[ENTREZ_FLD_MNEMONIC_LENGTH+1];
1299 DocField fld;
1300 Int2 firstPage;
1301 CharPtr countIndex;
1302 Int2 ratio;
1303 CharPtr localTermString;
1304
1305 theTerm = termString;
1306
1307 if (programStr == NULL || termString == NULL || (int) StrLen(programStr) <
1308 (3 + ENTREZ_FLD_MNEMONIC_LENGTH))
1309 return FALSE;
1310
1311 switch (programStr[0])
1312 {
1313 case 's': /* output with special+total, tab-delimeted */
1314 termDisplay = DISPLAY_SPECIAL_AND_TOTAL; break;
1315 case 't':
1316 termDisplay = DISPLAY_TOTAL_ONLY; break;
1317 case 'o':
1318 termDisplay = DISPLAY_TERM_ONLY; break;
1319 default:
1320 return FALSE;
1321 }
1322 switch (programStr[1])
1323 {
1324 case '3':
1325 case '4':
1326 case '5':
1327 case '6':
1328 case '7':
1329 case '8':
1330 case '9':
1331 ratio = programStr[1] - '0';
1332 centerOnTerm = TRUE; break;
1333 case 'c': /* center on the term */
1334 ratio = 4;
1335 centerOnTerm = TRUE; break;
1336 case 'b':
1337 beginWithTerm = TRUE; break;
1338 case 'e':
1339 endWithTerm = TRUE; break;
1340 default:
1341 return FALSE;
1342 }
1343
1344 switch (programStr[2])
1345 { /* ignored for centerOnTerm cases, above */
1346 case 'n': /* non-inclusive */
1347 inclusive = FALSE; break;
1348 case 'i': /* inclusive */
1349 inclusive = TRUE; break;
1350 default:
1351 return FALSE;
1352 }
1353
1354 StrNCpy (fldStr, &programStr[3], ENTREZ_FLD_MNEMONIC_LENGTH);
1355 fldStr[ENTREZ_FLD_MNEMONIC_LENGTH] = '\0';
1356 if ((fld = EntrezStringToField(db, fldStr)) < 0)
1357 return FALSE;
1358 countIndex = &programStr[3+ENTREZ_FLD_MNEMONIC_LENGTH];
1359 numTerms = DEFAULT_TERMLIST_LEN;
1360 if (*countIndex != '\0' &&
1361 StrSpn(countIndex, "0123456789") == StrLen(countIndex))
1362 {
1363 numTerms = atoi(countIndex);
1364 }
1365
1366 if (parseOnly)
1367 return TRUE;
1368
1369 if (termString[0] == '"' && termString[StrLen(termString)-1] == '"')
1370 {
1371 localTermString = MemNew(StrLen(termString));
1372 StrCpy (localTermString, &termString[1]);
1373 localTermString[StrLen(localTermString)-1] = '\0';
1374 theTerm = localTermString;
1375 } else {
1376 localTermString = termString;
1377 }
1378
1379
1380 if (centerOnTerm)
1381 {
1382 EntrezTermListByTerm(db, fld, localTermString, 1, findOneTermProc, &firstPage);
1383 if (firstPage > 0)
1384 {
1385 firstPage--;
1386 }
1387 termsBefore = (Int2) (numTerms / ((float) ratio / 2));
1388 EntrezTermListByPage(db, fld, firstPage, 4, collectNumTermsProc);
1389 collectNumTermsProc(NULL, -1, -1); /* reset */
1390 } else {
1391 EntrezTermListByTerm(db, fld, localTermString, numTerms, beginTermProc, &firstPage);
1392 }
1393
1394 if (localTermString != termString)
1395 {
1396 MemFree (localTermString);
1397 }
1398
1399 return TRUE;
1400 }
1401
1402 static Boolean
TaxProcessing(CharPtr taxString,DocType db,CharPtr progString)1403 TaxProcessing(CharPtr taxString, DocType db, CharPtr progString)
1404 {
1405 EntrezHierarchyPtr ehp;
1406 EntrezHierarchyPtr ehp2;
1407 EntrezHierarchyChildPtr ecp;
1408 Int2 i;
1409 DocField fld;
1410 int maxDepth;
1411
1412 if (db != TYP_AA && db != TYP_NT && db != TYP_ML && db != TYP_CH)
1413 {
1414 fprintf (master_fp, "Invalid database type %d\n", db);
1415 return FALSE;
1416 }
1417 fld = db == TYP_ML ? FLD_MESH_HIER : FLD_ORGN_HIER;
1418 ehp = EntrezHierarchyGet(taxString, db, fld);
1419 if (ehp == NULL)
1420 {
1421 fprintf (master_fp, "Term %s not found\n", taxString);
1422 return FALSE;
1423 }
1424
1425 if (StrNCmp(progString, "dump", 4) == 0)
1426 {
1427 sscanf(&progString[4], "%d", &maxDepth);
1428 if (maxDepth <= 0)
1429 maxDepth = INT2_MAX;
1430 PreOrderTaxTraversal(ehp, 0, db, fld, (Int2) maxDepth);
1431 EntrezHierarchyFree(ehp);
1432 } else {
1433 fprintf (master_fp, "term %s\nLineage:\n", ehp->term);
1434 for (i = 0; i < ehp->numInLineage; i++)
1435 fprintf (master_fp, " %s\n", ehp->lineage[i]);
1436 fprintf (master_fp, " %s\n", taxString);
1437 if (ehp->numInLineage > 0)
1438 {
1439 ehp2 = EntrezHierarchyGet(ehp->lineage[ehp->numInLineage - 1], db,
1440 fld);
1441 if (ehp2 != NULL && ehp2->numChildren > 1)
1442 {
1443 fprintf (master_fp, "Siblings:\n");
1444 for (i = 0; i < ehp2->numChildren; i++)
1445 {
1446 ecp = &ehp2->children[i];
1447 if (StrICmp(ecp->name, taxString) != 0)
1448 fprintf (master_fp, " %s\n", ecp->name);
1449 }
1450 EntrezHierarchyFree(ehp2);
1451 }
1452 }
1453
1454 if (ehp->numChildren > 0)
1455 {
1456 fprintf (master_fp, "Children:\n");
1457 for (i = 0; i < ehp->numChildren; i++)
1458 {
1459 ecp = &ehp->children[i];
1460 fprintf (master_fp, " %s\n", ecp->name);
1461 }
1462 }
1463 EntrezHierarchyFree(ehp);
1464 }
1465
1466 return TRUE;
1467 }
1468
1469 static ValNodePtr
ParseImportedFiles(CharPtr str)1470 ParseImportedFiles(CharPtr str)
1471 {
1472 CharPtr localStr;
1473 CharPtr token;
1474 FILE *fp;
1475 Char s[100];
1476 DocType db;
1477 ValNodePtr head = NULL;
1478 ValNodePtr node;
1479 SavListPtr slp;
1480 Int2 linesread;
1481 Int4Ptr uids;
1482 CharPtr p;
1483
1484 localStr = StringSave(str);
1485 token = StrTok(localStr, ", ");
1486
1487 while (token != NULL)
1488 {
1489 if ((fp = FileOpen(token, "r")) == NULL)
1490 {
1491 Message(MSG_POST, "Error opening file %s", token);
1492 MemFree(localStr);
1493 return NULL;
1494 }
1495 linesread = 0;
1496 while (FileGets(s, (sizeof s) - 1, fp) != NULL)
1497 {
1498 Boolean nonNumeric = StrSpn(s, "0123456789 \n\r") != StrLen(s);
1499
1500 linesread++;
1501 if (linesread == 1)
1502 {
1503 if(nonNumeric)
1504 {
1505 db = -1;
1506
1507 if(StrICmp(s,">MEDLINE") == 0)
1508 db = TYP_ML;
1509 else if (StrICmp(s,">protein") == 0)
1510 db = TYP_AA;
1511 else if (StrICmp(s,">nucleotide") == 0)
1512 db = TYP_NT;
1513 else if (StrICmp(s,">genome") == 0)
1514 db = TYP_CH;
1515 }
1516 db = atoi(s);
1517 if (db != TYP_ML && db != TYP_AA && db != TYP_NT && db != TYP_CH)
1518 {
1519 Message(MSG_POST, "Invalid database type %d in file %s", db, token);
1520 FileClose(fp);
1521 MemFree(localStr);
1522 return NULL;
1523 }
1524 continue;
1525 }
1526 if (nonNumeric)
1527 {
1528 Message(MSG_POST, "Invalid character at line %d of file %s", linesread, token);
1529 FileClose(fp);
1530 MemFree(localStr);
1531 return NULL;
1532 }
1533 }
1534 fseek(fp, 0, SEEK_SET); /* rewind to beginning */
1535 uids = (Int4Ptr) MemNew(sizeof(Int4) * linesread);
1536 linesread = -1; /* skip over db this time */
1537 while (FileGets(s, (sizeof s) - 1, fp) != NULL)
1538 {
1539 if (linesread >= 0)
1540 {
1541 uids[linesread] = atoi(s);
1542 }
1543 linesread++;
1544 }
1545 FileClose(fp);
1546 slp = (SavListPtr) MemNew(sizeof(SavList));
1547 slp->uids = uids;
1548 slp->db = db;
1549 slp->num = linesread;
1550 if ((p = StringRChr(token, DIRDELIMCHR)) == NULL)
1551 {
1552 slp->name = MemNew(StrLen(token) + 2);
1553 StrCpy(&slp->name[1], token);
1554 } else {
1555 slp->name = StringSave(p);
1556 }
1557 slp->name[0] = '*'; /* to make the name unique, like in Entrez */
1558 if (head == NULL)
1559 {
1560 head = ValNodeNew(NULL);
1561 node = head;
1562 } else {
1563 node = ValNodeNew(head);
1564 }
1565 node->data.ptrvalue = (Pointer) slp;
1566
1567 token = StrTok(NULL, ", ");
1568 }
1569
1570 return head;
1571 }
1572
1573
1574 static LocalLinkSetPtr
ParseUidList(CharPtr str)1575 ParseUidList(CharPtr str)
1576 {
1577 CharPtr localStr;
1578 CharPtr token;
1579 long uid;
1580 int i;
1581 Int4 count = 0;
1582 Int4Ptr vector;
1583 LocalLinkSetPtr lsp;
1584
1585 /* loop through twice ... the first time count, the second time, store values */
1586 for (i = 0; i < 2; i++)
1587 {
1588 localStr = StringSave(str);
1589 token = StrTok(localStr, ", ");
1590 count = 0;
1591 while (token != NULL)
1592 {
1593 if (StrSpn(token, "0123456789") != StrLen(token))
1594 {
1595 Message(MSG_POST, "parsing error at position %d", ((long) token) - ((long) localStr));
1596 MemFree(localStr);
1597 return NULL;
1598 }
1599 if (i == 1)
1600 {
1601 sscanf(token, "%ld", &uid);
1602 vector[count] = (Int4) uid;
1603 }
1604 count++;
1605 token = StrTok(NULL, ", ");
1606 }
1607 if (i == 0)
1608 {
1609 vector = MemNew(count * sizeof(Int4));
1610 }
1611 MemFree(localStr);
1612 }
1613
1614 if (count == 0)
1615 {
1616 return NULL;
1617 }
1618 lsp = LocalLinkSetNew();
1619 lsp->num = count;
1620 lsp->uids = vector;
1621 return lsp;
1622 }
1623
1624 static CharPtr
FormatPositionalErr(Int2 beginErr,Int2 endErr,Int2 startLen)1625 FormatPositionalErr(Int2 beginErr, Int2 endErr, Int2 startLen)
1626 {
1627 int i;
1628 CharPtr str;
1629
1630 /* prepare text describing where error occurred */
1631 str = MemNew(endErr + startLen + 2);
1632 for (i = 0; i < endErr + startLen - 1; i++)
1633 {
1634 str[i] = ' ';
1635 }
1636 str[i++] = '^';
1637 str[beginErr + startLen] = '^';
1638 str[i] = '\0';
1639
1640 return str;
1641 }
1642
1643 #define IS_HEX(x) (IS_DIGIT(x) || ((x) >= 'a' && ((x) <= 'f')) || \
1644 ((x) >= 'A' && ((x) <= 'F')))
1645
1646 static CharPtr
WWWStyleDecoding(CharPtr string,Boolean doEncoding)1647 WWWStyleDecoding(CharPtr string, Boolean doEncoding)
1648 { /* decoding in-place, assuming that decoded string is always smaller than
1649 original */
1650 CharPtr p, q, maxchar;
1651 Char str[3];
1652 int newchar;
1653
1654 if (! doEncoding)
1655 return string;
1656
1657 maxchar = string + (int) StrLen(string);
1658
1659 for (p = string; p < maxchar - 2; p++)
1660 {
1661 if (*p == '%' && IS_HEX(p[1]) && IS_HEX(p[2]))
1662 {
1663 str[0] = p[1];
1664 str[1] = p[2];
1665 str[2] = '\0';
1666 sscanf(str, "%x", &newchar);
1667 *p = (Char) newchar;
1668 maxchar -= 2;
1669 for (q = p + 1; q <= maxchar; q++)
1670 *q = q[2];
1671 }
1672 }
1673
1674 return string;
1675 }
1676 static int LIBCALLBACK
compUidsDescending(VoidPtr a,VoidPtr b)1677 compUidsDescending(VoidPtr a, VoidPtr b)
1678 {
1679 Int4Ptr x = (Int4Ptr) a;
1680 Int4Ptr y = (Int4Ptr) b;
1681
1682 return (*y - *x); /* note descending order */
1683 }
1684
1685 static void
SortUidsDescending(LocalLinkSetPtr lsp)1686 SortUidsDescending(LocalLinkSetPtr lsp)
1687 {
1688 Boolean sorted;
1689 int k;
1690 Int4 temp;
1691
1692 if (lsp == NULL)
1693 return;
1694
1695 /* try to sort uids in descending order */
1696
1697 for (sorted = TRUE, k = 1; k < lsp->num; k++)
1698 {
1699 if (lsp->uids[k-1] < lsp->uids[k])
1700 {
1701 sorted = FALSE;
1702 break;
1703 }
1704 }
1705
1706 if (! sorted)
1707 { /* assume that the existing order is reversed */
1708 for (k = (lsp->num / 2) - 1; k >= 0; k--)
1709 {
1710 temp = lsp->uids[k];
1711 lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
1712 lsp->uids[lsp->num - 1 - k] = temp;
1713 }
1714
1715 /* now check that it's sorted */
1716 for (sorted = TRUE, k = 1; k < lsp->num; k++)
1717 {
1718 if (lsp->uids[k-1] < lsp->uids[k])
1719 {
1720 sorted = FALSE;
1721 break;
1722 }
1723 }
1724
1725 if (! sorted)
1726 { /* as a last resort, sort them using heapsort */
1727 HeapSort(lsp->uids, lsp->num, sizeof(Int4), compUidsDescending);
1728 }
1729 }
1730 }
1731
1732 static void
PrintHelp(void)1733 PrintHelp(void)
1734 {
1735 fprintf (master_fp, "Entrcmd is a non-interactive command-line interface which allows a user to\n");
1736 fprintf (master_fp, "perform a series of neighboring and output operations, based upon an initial\n");
1737 fprintf (master_fp, "set of UIDs or a boolean expression which describes a set of UIDs.\n");
1738 fprintf (master_fp, "Alternatively, it can be used to display an alphabetically sorted list of\n");
1739 fprintf (master_fp, "terms near an initial term.\n");
1740 fprintf (master_fp, "\n");
1741 fprintf (master_fp, "Type 'entrcmd' with no arguments for a brief summary of command-line options.\n");
1742 fprintf (master_fp, "\n");
1743 fprintf (master_fp, " EXPRESSION SYNTAX (-e option)\n");
1744 fprintf (master_fp, "\n");
1745 fprintf (master_fp, "The following grammar is based upon Backus-Naur form. Braces ({}) are used to\n");
1746 fprintf (master_fp, "specify optional fields, and ellipses (...) represents an arbitrary number\n");
1747 fprintf (master_fp, "of repititions. In most Backus-Naur forms, the vertical bar (|) and brackets\n");
1748 fprintf (master_fp, "([]) are used as meta-symbols. However, in the following grammar, the\n");
1749 fprintf (master_fp, "vertical bar and brackets are terminal symbols, and three stacked vertical\n");
1750 fprintf (master_fp, "bars are used to represent alternation.\n");
1751 fprintf (master_fp, "\n");
1752 fprintf (master_fp, "expression ::= diff { - diff ... }\n");
1753 fprintf (master_fp, "diff ::= term { | term ... }\n");
1754 fprintf (master_fp, "term ::= factor { & factor ... }\n");
1755 fprintf (master_fp, " |\n");
1756 fprintf (master_fp, "factor ::= qualtoken | ( expression )\n");
1757 fprintf (master_fp, " |\n");
1758 fprintf (master_fp, "qualtoken ::= token { [ fld { ,S } ] }\n");
1759 fprintf (master_fp, "\n");
1760 fprintf (master_fp, "\n");
1761 fprintf (master_fp, "token is a string of characters which either contains no special characters,\n");
1762 fprintf (master_fp, "or which is delimited by double-quotes (\"). Double-quote marks and\n");
1763 fprintf (master_fp, "backslashes (\\) which appear with a quoted token must be quoted by an\n");
1764 fprintf (master_fp, "additional backslash.\n");
1765 fprintf (master_fp, "\n");
1766 fprintf (master_fp, "fld is an appropriate string describing a field. The possible values are\n");
1767 fprintf (master_fp, "described in the following table. For all databases, an asterisk(*) is a\n");
1768 fprintf (master_fp, "possible value for fld, signifying the union of all possible fields for that\n");
1769 fprintf (master_fp, "database. '*' is also the default field, if no field qualifier is specified.\n");
1770 fprintf (master_fp, "\n");
1771 fprintf (master_fp, " | fld| Databases and descriptions\n");
1772 fprintf (master_fp, " +----+--------------------------------------------------------------------\n");
1773 fprintf (master_fp, " |WORD| For MEDLINE, \"Abstract or Title\"; for Sequences, \"Text Terms\"\n");
1774 fprintf (master_fp, " |MESH| MEDLINE only, \"MeSH term\"\n");
1775 fprintf (master_fp, " |AUTH| For all databases, \"Author Name\"\n");
1776 fprintf (master_fp, " |JOUR| For all databases, \"Journal Title\"\n");
1777 fprintf (master_fp, " |GENE| For all databases, \"Gene Name\"\n");
1778 fprintf (master_fp, " |KYWD| For MEDLINE, \"Substance\", for Sequences \"Keyword\"\n");
1779 fprintf (master_fp, " |ECNO| For MEDLINE and protein, \"E.C. number\"\n");
1780 fprintf (master_fp, " |ORGN| For all databases, \"Organism\"\n");
1781 fprintf (master_fp, " |ACCN| For Sequence databases, \"Accession\"\n");
1782 fprintf (master_fp, " |PROT| For protein, \"Protein Name\"\n");
1783 fprintf (master_fp, "\n");
1784 fprintf (master_fp, "The presence of \",S\" after a field specifier implies the same semantics\n");
1785 fprintf (master_fp, "as \"special\" in Entrez. Entrez \"total\" semantics are the default.\n");
1786 fprintf (master_fp, "\n");
1787 fprintf (master_fp, "\n");
1788 fprintf (master_fp, " PROGRAM OF COMMANDS (-p option)\n");
1789 fprintf (master_fp, "\n");
1790 fprintf (master_fp, "For the \"-e\" and \"-u\" options, the program of commands consists of a sequence of\n");
1791 fprintf (master_fp, "neighboring operations alternated with optional output commands. All output\n");
1792 fprintf (master_fp, "commands, except the first, must be preceded by a period (.), and all\n");
1793 fprintf (master_fp, "neighboring commands must be preceded by a comma (,).\n");
1794 fprintf (master_fp, "\n");
1795 fprintf (master_fp, "The output commands are:\n");
1796 fprintf (master_fp, " no None (default) sg Sequence GenBank/GenPept flat file format\n");
1797 fprintf (master_fp, " ma MEDLINE ASN.1 format sa Sequence ASN.1 format\n");
1798 fprintf (master_fp, " md MEDLINE docsums sd Sequence docsums\n");
1799 fprintf (master_fp, " ml MEDLARS format sf Sequence FASTA format\n");
1800 fprintf (master_fp, " mr MEDLINE report format sr Sequence report format\n");
1801 fprintf (master_fp, " mu MEDLINE UIDs su Sequence UIDs\n");
1802 fprintf (master_fp, " si Sequence IDs\n");
1803 fprintf (master_fp, "Each output command may be followed by an optional count indicating how\n");
1804 fprintf (master_fp, "many articles to display. The default is to display all the articles.\n");
1805 fprintf (master_fp, "\n");
1806 fprintf (master_fp, "If the \"-x\" command line option appears (\"export to a saved UID list\"), then\n");
1807 fprintf (master_fp, "the first \"mu\" or \"su\" command results in those UIDs being written to that\n");
1808 fprintf (master_fp, "\"saved UID list\" file, rather than being written to the standard output.\n");
1809 fprintf (master_fp, "\n");
1810 fprintf (master_fp, "Neighboring commands indicate the database to neighbor \"to\", and\n");
1811 fprintf (master_fp, "consists of the first letter of each of the possible databases:\n");
1812 fprintf (master_fp, "(medline, protein, nucleotide) followed by an optional count of\n");
1813 fprintf (master_fp, "how many of the current set of articles should be included in the\n");
1814 fprintf (master_fp, "neighboring operation.\n");
1815 fprintf (master_fp, "\n");
1816 fprintf (master_fp, "Example:\n");
1817 fprintf (master_fp, " Find the articles written by \"Kay LE\", but not by \"Forman-Kay JD\". Find\n");
1818 fprintf (master_fp, " their MEDLINE neighbors. Print document summaries for all of these\n");
1819 fprintf (master_fp, " neighbors. Of these neighbors, neighbor the first 5 entries to the protein\n");
1820 fprintf (master_fp, " database. Print up to 10 of these sequences in Sequence Report format.\n");
1821 fprintf (master_fp, "\n");
1822 fprintf (master_fp, " entrcmd -e '\"Kay LE\" [AUTH] - \"Forman-Kay JD\" [AUTH]' -p ,m.md,p5.sr10\n");
1823 fprintf (master_fp, "\n");
1824 fprintf (master_fp, "\n");
1825 fprintf (master_fp, "If the \"-t\" option is used, then the program of commands is different from\n");
1826 fprintf (master_fp, "what is described above. Rather, it consists of a seven character string,\n");
1827 fprintf (master_fp, "optionally followed by the number of terms which should be displayed.\n");
1828 fprintf (master_fp, "The default number of terms is 40.\n");
1829 fprintf (master_fp, "\n");
1830 fprintf (master_fp, "The string is of the form '123FLDD', where 1, 2, and 3 are as follows,\n");
1831 fprintf (master_fp, "and FLDD is one of the field specifications described above (AUTH, etc.).\n");
1832 fprintf (master_fp, "\n");
1833 fprintf (master_fp, "1 - one of 't', 's', or 'o', where 't' means that the total term counts\n");
1834 fprintf (master_fp, " should be displayed after the term, 's' means that the special and\n");
1835 fprintf (master_fp, " total term counts should be displayed after the term, and 'o' means\n");
1836 fprintf (master_fp, " that only the term itself should be displayed\n");
1837 fprintf (master_fp, "2 - one of 'b', 'c', 'e', or an integer from 3 to 9, where:\n");
1838 fprintf (master_fp, " 'b' - display terms beginning with the specified term\n");
1839 fprintf (master_fp, " 'c' - \"center\" terms; i.e., display half the terms before the specified\n");
1840 fprintf (master_fp, " term, and half the terms after the specified term\n");
1841 fprintf (master_fp, " 'e' - display terms ending with the specified term\n");
1842 fprintf (master_fp, " k - an integer from 3 to 9, indicating that (2/k)ths of the terms\n");
1843 fprintf (master_fp, " should be alphabetically before the specified term. Note that\n");
1844 fprintf (master_fp, " '4' is the same as 'c'. The value '9' is recommended for\n");
1845 fprintf (master_fp, " scrolled displays.\n");
1846 fprintf (master_fp, "3 - One of 'i' or 'n', indicating for the 'b' and 'e' options above whether\n");
1847 fprintf (master_fp, " the specified term is to be included in the output, where 'i' means\n");
1848 fprintf (master_fp, " inclusive, and 'n' means non-inclusive. This value is ignored for\n");
1849 fprintf (master_fp, " other values of the previous character, but must be present as a\n");
1850 fprintf (master_fp, " place-holder.\n");
1851 fprintf (master_fp, "\n");
1852 fprintf (master_fp, "[ WARNING: SOME OF THESE TERM SPECIFICATIONS OPTIONS (COMBINATIONS OF 1,\n");
1853 fprintf (master_fp, "2, AND 3 ABOVE) ARE CURRENTLY UNIMPLEMENTED ]\n");
1854 fprintf (master_fp, "\n");
1855 fprintf (master_fp, "\n");
1856 fprintf (master_fp, " WORLD WIDE WEB STYLE OUTPUT (-w option)\n");
1857 fprintf (master_fp, "\n");
1858 fprintf (master_fp, "The entrcmd program can also generate output which is appropriate for\n");
1859 fprintf (master_fp, "display in an HTML document, to be \"served\" by a WWW server. In particular,\n");
1860 fprintf (master_fp, "some output text contains HTML hypertext links to other data, as well as\n");
1861 fprintf (master_fp, "HTML formatting information. The parameter to the -w option is the\n");
1862 fprintf (master_fp, "directory prefix for the linked hypertext items; \"/htbin\" is recommended.\n");
1863 fprintf (master_fp, "\n");
1864 fprintf (master_fp, "If the \"-w\" option is selected, then the \"-f\" option may also be selected.\n");
1865 fprintf (master_fp, "This indicates that the HTML output should be of a form which is\n");
1866 fprintf (master_fp, "appropriate for a HTML \"FORM\". This output can only be processed by\n");
1867 fprintf (master_fp, "advanced WWW clients, but potentially provides a nicer interface, where\n");
1868 fprintf (master_fp, "each document summary has an associated checkbox, resulting in a display\n");
1869 fprintf (master_fp, "which is similar to the Entrez CD-ROM application. The \"-c\" option, if used\n");
1870 fprintf (master_fp, "in conjunction with \"-f\", indicates that these checkboxes should be\n");
1871 fprintf (master_fp, "\"pre-checked\", i.e., selected. This potentially provides the equivalent\n");
1872 fprintf (master_fp, "of the Entrez \"select all\" operation for neighboring.\n");
1873 }
1874
1875
BSPtoLSP(ByteStorePtr bsp)1876 static LocalLinkSetPtr BSPtoLSP(ByteStorePtr bsp)
1877 {
1878 LocalLinkSetPtr lsp;
1879
1880 if (bsp == NULL)
1881 return NULL;
1882
1883 lsp = LocalLinkSetNew();
1884
1885 lsp->num = BSLen(bsp) / sizeof(DocUid);
1886 if ((lsp->uids = MemNew(BSLen(bsp))) == NULL)
1887 { /* platforms which can't allocate this are out of luck */
1888 lsp = LocalLinkSetFree(lsp);
1889 } else {
1890 BSSeek (bsp, 0L, 0);
1891 BSRead (bsp, lsp->uids, lsp->num * sizeof (DocUid));
1892 }
1893
1894 return lsp;
1895 }
1896
Main(void)1897 Int2 Main(void)
1898 {
1899 int Numarg = sizeof(myargs)/sizeof(Args);
1900 DocType db = TYP_ML;
1901 Boolean exprSpecified = FALSE;
1902 Boolean uidsSpecified = FALSE;
1903 Boolean termSpecified = FALSE;
1904 Boolean taxSpecified = FALSE;
1905 Boolean neighborSpecified = FALSE;
1906 CharPtr boolString;
1907 short erract;
1908 ErrDesc err;
1909 Int2 beginErr;
1910 Int2 endErr;
1911 CharPtr str;
1912 LocalLinkSetPtr lsp = NULL;
1913 LinkSetPtr oldstylelsp;
1914 ByteStorePtr bsp;
1915 CharPtr programStr;
1916 CharPtr termString;
1917 CharPtr taxString;
1918 CharPtr neighborString;
1919 CharPtr neighborFile;
1920 FILE *neighborFp;
1921 CharPtr exportFile;
1922 CharPtr importFileList;
1923 Int2 progErr;
1924 ValNodePtr savlist = NULL;
1925 ValNodePtr np;
1926 SavListPtr slp;
1927 Char param[6];
1928 EntrezNeighborTextPtr entp;
1929 Boolean useWWWEncoding;
1930
1931 if ( ! GetArgs("Entrez command-line $Revision: 6.5 $", Numarg, myargs))
1932 return 1;
1933
1934 if (myargs[14].strvalue)
1935 {
1936 if ((master_fp = FileOpen(myargs[14].strvalue, "w")) == NULL)
1937 {
1938 Message(MSG_POST, "Unable to open output file <%s>", myargs[14].strvalue);
1939 return 9;
1940 }
1941 }
1942
1943 if (myargs[6].intvalue)
1944 {
1945 PrintHelp();
1946 FileClose(master_fp);
1947 return 0;
1948 }
1949
1950 if (myargs[0].strvalue != NULL)
1951 {
1952 switch(myargs[0].strvalue[0]) {
1953 case 'm': db = TYP_ML; break;
1954 case 'n': db = TYP_NT; break;
1955 case 'g': db = TYP_CH; break;
1956 case 'p': db = TYP_AA; break;
1957 default:
1958 Message(MSG_POST /* MSG_FATAL */, "Invalid database type <%s>", myargs[0].strvalue);
1959 FileClose(master_fp);
1960 return 1;
1961 }
1962 }
1963
1964 useWWWEncoding = myargs[15].intvalue;
1965
1966 if (myargs[16].intvalue)
1967 seqEntryRetval = -2;
1968 else
1969 seqEntryRetval = myargs[17].intvalue;
1970
1971 if (myargs[1].strvalue != NULL && myargs[1].strvalue[0] != '\0')
1972 {
1973 exprSpecified = TRUE;
1974 boolString = WWWStyleDecoding(myargs[1].strvalue, useWWWEncoding);
1975 }
1976 if (myargs[2].strvalue != NULL && myargs[2].strvalue[0] != '\0')
1977 uidsSpecified = TRUE;
1978
1979 if (myargs[11].strvalue != NULL && myargs[11].strvalue[0] != '\0')
1980 {
1981 termSpecified = TRUE;
1982 termString = WWWStyleDecoding(myargs[11].strvalue, useWWWEncoding);
1983 }
1984
1985 if (myargs[12].strvalue != NULL && myargs[12].strvalue[0] != '\0')
1986 {
1987 taxSpecified = TRUE;
1988 taxString = WWWStyleDecoding(myargs[12].strvalue, useWWWEncoding);
1989 }
1990
1991 if (myargs[13].strvalue != NULL && myargs[13].strvalue[0] != '\0')
1992 {
1993 neighborSpecified = TRUE;
1994 neighborFile = myargs[13].strvalue;
1995 if ((neighborFp = FileOpen(neighborFile, "r")) == NULL)
1996 {
1997 Message(MSG_POST /* MSG_FATAL */, "Unable to open neighboring-file %s", neighborFile);
1998 FileClose(master_fp);
1999 return 1;
2000 }
2001 FileClose(neighborFp);
2002 }
2003
2004 if (((exprSpecified != 0) + (uidsSpecified != 0) + (termSpecified != 0) +
2005 (taxSpecified != 0) + (neighborSpecified != 0)) != 1)
2006 {
2007 Message(MSG_POST /* MSG_FATAL */, "Exactly one of the -e, -l, -n, -t and -u options must be specified");
2008 FileClose(master_fp);
2009 return 1;
2010 }
2011
2012 if (uidsSpecified)
2013 {
2014 lsp = ParseUidList(myargs[2].strvalue);
2015 if (lsp == NULL)
2016 {
2017 Message(MSG_POST /* MSG_FATAL */, "Syntax error on UID list");
2018 FileClose(master_fp);
2019 return 1;
2020 }
2021 }
2022
2023 if (exprSpecified)
2024 {
2025 ErrGetOpts(&erract, NULL);
2026 ErrSetOpts(ERR_CONTINUE, 0);
2027 ErrFetch(&err);
2028 if (! EntrezTLParseString(boolString, db, -1, &beginErr, &endErr))
2029 {
2030 ErrShow();
2031 Message(MSG_POST, "Syntax error: %s", boolString);
2032 if (endErr < 0)
2033 {
2034 endErr = 0;
2035 }
2036
2037 str = FormatPositionalErr(beginErr, endErr, StrLen("Syntax error: "));
2038
2039 Message(MSG_POST, str);
2040 MemFree(str);
2041 FileClose(master_fp);
2042 return 2;
2043 }
2044 ErrSetOpts(erract, 0);
2045 }
2046
2047 wwwPrefix = myargs[5].strvalue;
2048
2049 programStr = myargs[3].strvalue;
2050
2051 useForms = myargs[7].intvalue;
2052 checkForms = myargs[8].intvalue;
2053 exportFile = myargs[9].strvalue;
2054 importFileList = myargs[10].strvalue;
2055
2056 if (exportFile != NULL && exportFile[0] != '\0') {
2057 GetAppParam("ENTREZ", "ENTRCMD", "EXPORT_OK", "FALSE", param,
2058 sizeof param);
2059 if (StrICmp(param, "TRUE") != 0)
2060 {
2061 Message(MSG_POST, "Export option is disabled");
2062 FileClose(master_fp);
2063 return 6;
2064 }
2065 }
2066
2067 if (termSpecified)
2068 {
2069 if (! TermProcessing(programStr, termString, db, TRUE))
2070 {
2071 Message(MSG_POST, "Invalid term program specification %s", programStr);
2072 FileClose(master_fp);
2073 return 3;
2074 }
2075 } else if (taxSpecified) {
2076 /* no action */
2077 } else {
2078 if ((progErr = RunProgram(programStr, NULL, db, TRUE)) != 0)
2079 {
2080 Message(MSG_POST, "Program error: %s", programStr);
2081 if (progErr > 0)
2082 str = FormatPositionalErr(progErr - 1, progErr - 1,
2083 StrLen("Program error: "));
2084 else
2085 str = StringSave("Validation error");
2086
2087 Message(MSG_POST, str);
2088 MemFree(str);
2089 FileClose(master_fp);
2090 return 4;
2091 }
2092 if (exportFile != NULL)
2093 {
2094 exportFilePtr = FileOpen(exportFile, "w");
2095 }
2096 if (importFileList != NULL)
2097 {
2098 if ((savlist = ParseImportedFiles(importFileList)) == NULL)
2099 {
2100 Message(MSG_POST, "Fatal error processing imported files");
2101 }
2102 }
2103 }
2104
2105 /* note that we defer EntrezInit() until we're sure that there are no */
2106 /* parsing errors */
2107 if (! EntrezInit("entrcmd", FALSE, NULL))
2108 {
2109 Message(MSG_POST, "Unable to access Entrez dataset");
2110 FileClose(master_fp);
2111 return 5;
2112 }
2113
2114 EntrezBioseqFetchEnable("entrcmd", TRUE);
2115
2116 if (myargs[4].intvalue)
2117 {
2118 str = EntrezDetailedInfo();
2119 fprintf (master_fp, " STATUS REPORT\n\n\n%s\n\n", str);
2120 fflush(master_fp);
2121 }
2122
2123 while (savlist != NULL)
2124 { /* create named UID lists, as needed */
2125 slp = (SavListPtr) savlist->data.ptrvalue;
2126 EntrezCreateNamedUidList(slp->name, slp->db, 0, slp->num, slp->uids);
2127 MemFree(slp->name);
2128 MemFree(slp->uids);
2129 MemFree(slp);
2130 np = savlist->next;
2131 MemFree(savlist);
2132 savlist = np;
2133 }
2134
2135 if (exprSpecified)
2136 { /* note that we deferred evaluation until after EntrezInit() */
2137 if ((bsp = EntrezTLEvalXString(boolString, db, -1, NULL, NULL)) != NULL)
2138 {
2139 lsp = BSPtoLSP(bsp);
2140 BSFree(bsp);
2141 }
2142 if (db == TYP_ML && lsp != NULL)
2143 {
2144 SortUidsDescending(lsp);
2145 }
2146 }
2147
2148 if (neighborSpecified)
2149 {
2150 if (db == TYP_ML)
2151 {
2152 size_t neighborLen;
2153 int k;
2154 Int4 temp;
2155
2156 if (! EntrezCanNeighborText())
2157 {
2158 Message(MSG_POST, "Unable to perform on-the-fly neighboring\n");
2159 FileClose(master_fp);
2160 return 5;
2161 }
2162 /* create text object here . */
2163 neighborLen = FileLength(neighborFile);
2164 if ((neighborString = MemNew(neighborLen+1)) == NULL)
2165 {
2166 Message(MSG_POST, "Unable to allocate memory for on-the-fly neighboring\n");
2167 FileClose(master_fp);
2168 return 5;
2169 }
2170 neighborFp = FileOpen(neighborFile, "r");
2171 FileRead(neighborString, neighborLen, 1, neighborFp);
2172 FileClose (neighborFp);
2173 neighborString[neighborLen] = 0;
2174 entp = EntrezNeighborTextNew();
2175 entp->percent_terms_to_use = 100;
2176 entp->max_neighbors = 0;
2177 entp->min_score = 0;
2178 entp->fld = FLD_WORD;
2179 entp->normalText = neighborString;
2180 entp->specialText = StringSave("");
2181 oldstylelsp = EntrezDoNeighborText(entp);
2182 lsp = LinkSetToLocalLinkSet(oldstylelsp);
2183 LinkSetFree(oldstylelsp);
2184 EntrezNeighborTextFree(entp);
2185
2186 /* reverse the order since they are received in backwards order */
2187 if (lsp != NULL)
2188 {
2189 for (k = (lsp->num / 2) - 1; k >= 0; k--)
2190 {
2191 temp = lsp->uids[k];
2192 lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
2193 lsp->uids[lsp->num - 1 - k] = temp;
2194 }
2195 }
2196 } else {
2197 Boolean isprot = db == TYP_AA;
2198 SeqEntryPtr sep;
2199 BioseqPtr bsp;
2200
2201 if (! EntrezCanBlast())
2202 {
2203 Message(MSG_POST, "Unable to perform on-the-fly BLAST\n");
2204 FileClose(master_fp);
2205 return 5;
2206 }
2207 neighborFp = FileOpen(neighborFile, "r");
2208 sep = FastaToSeqEntry(neighborFp, !isprot);
2209 FileClose (neighborFp);
2210 if (sep == NULL)
2211 {
2212 Message (MSG_OK, "Error encountered while parsing sequence data");
2213 return 8;
2214 }
2215 bsp = NULL;
2216 SeqEntryExplore(sep, &bsp, isprot? FindAProt : FindANuc);
2217 if (bsp == NULL)
2218 {
2219 Message (MSG_OK, "Error encountered while parsing sequence data for Bioseq");
2220 /* ? SeqEntryFree(sep); */
2221 return 9;
2222 }
2223 oldstylelsp = EntrezBlastBioseq(bsp, db, NULL, NULL, NULL, FALSE);
2224 lsp = LinkSetToLocalLinkSet(oldstylelsp);
2225 LinkSetFree(oldstylelsp);
2226 }
2227 }
2228
2229 if (termSpecified)
2230 {
2231 TermProcessing(programStr, termString, db, FALSE);
2232 } else if (taxSpecified) {
2233 TaxProcessing(taxString, db, programStr);
2234 } else {
2235 RunProgram(programStr, lsp, db, FALSE);
2236 }
2237 EntrezFini();
2238 EntrezBioseqFetchDisable();
2239
2240 FileClose(master_fp);
2241 return 0;
2242 }
2243