1 /* tax3api.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: tax3api.c
27 *
28 * Author: Jonathan Kans
29 *
30 * Version Creation Date: 7/8/04
31 *
32 * $Revision: 1.94 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 #include <ncbi.h>
46 #include <objseq.h>
47 #include <objsset.h>
48 #include <tax3api.h>
49 #include <sqnutils.h>
50 #include <subutil.h>
51 #include <findrepl.h>
52 #define NLM_GENERATED_CODE_PROTO
53 #include <objmacro.h>
54 #include <macroapi.h>
55
56 /* low-level connection functions */
57
58 static Boolean text_tax_asn = FALSE;
59 static Boolean text_tax_set = FALSE;
60
61 static Boolean test_tax_asn = FALSE;
62 static Boolean test_tax_set = FALSE;
63
64 #if 1
65 static const CharPtr tax3servicename = "TaxService3";
66 #else
67 static const CharPtr tax3servicename = "TaxService3Test";
68 #endif
69
Tax3ReplyFixup(Taxon3ReplyPtr t3ry)70 static void Tax3ReplyFixup (
71 Taxon3ReplyPtr t3ry
72 )
73
74 {
75 OrgNamePtr onp;
76 OrgRefPtr orp;
77 T3DataPtr tdp;
78 T3ReplyPtr trp;
79
80 if (t3ry == NULL) return;
81
82 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
83 if (trp->choice != T3Reply_data) continue;
84 tdp = (T3DataPtr) trp->data.ptrvalue;
85 if (tdp == NULL) continue;
86 orp = (OrgRefPtr) tdp->org;
87 if (orp == NULL) continue;
88 onp = orp->orgname;
89 if (onp == NULL) continue;
90 if (onp->pgcode != 0) continue;
91 onp->pgcode = GetSpecialPlastidGenCode (orp->taxname, onp->lineage);
92 }
93 }
94
Tax3OpenConnection(void)95 NLM_EXTERN CONN Tax3OpenConnection (
96 void
97 )
98
99 {
100 #ifdef OS_UNIX
101 CharPtr str;
102
103 if (! text_tax_set) {
104 str = (CharPtr) getenv ("TEXT_TAX_ASN");
105 if (StringDoesHaveText (str)) {
106 if (StringICmp (str, "TRUE") == 0) {
107 text_tax_asn = TRUE;
108 }
109 }
110 text_tax_set = TRUE;
111 }
112
113 if (! test_tax_set) {
114 str = (CharPtr) getenv ("TEST_TAX_ASN");
115 if (StringDoesHaveText (str)) {
116 if (StringICmp (str, "TRUE") == 0) {
117 test_tax_asn = TRUE;
118 }
119 }
120 test_tax_set = TRUE;
121 }
122 #endif
123
124 if (test_tax_asn) {
125 return QUERY_OpenServiceQuery ("TaxService3Test", NULL, 30);
126 }
127
128 return QUERY_OpenServiceQuery (text_tax_asn ? "TaxService3Text" : tax3servicename, NULL, 30);
129 }
130
131 #ifdef OS_MAC
132 #include <Events.h>
133 #endif
134
Tax3WaitForReply(CONN conn)135 NLM_EXTERN Taxon3ReplyPtr Tax3WaitForReply (
136 CONN conn
137 )
138
139 {
140 AsnIoConnPtr aicp;
141 time_t currtime, starttime;
142 time_t max = 0;
143 EIO_Status status;
144 STimeout timeout;
145 Taxon3ReplyPtr t3ry = NULL;
146 #ifdef OS_MAC
147 EventRecord currEvent;
148 #endif
149
150 if (conn == NULL) return NULL;
151
152 #ifdef OS_MAC
153 timeout.sec = 0;
154 timeout.usec = 0;
155 #else
156 timeout.sec = 300;
157 timeout.usec = 0;
158 #endif
159
160 starttime = GetSecs ();
161 while ((status = CONN_Wait (conn, eIO_Read, &timeout)) == eIO_Timeout && max < 300) {
162 currtime = GetSecs ();
163 max = currtime - starttime;
164 #ifdef OS_MAC
165 WaitNextEvent (0, &currEvent, 0, NULL);
166 #endif
167 }
168 if (status == eIO_Success) {
169 aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "r" : "rb", conn);
170 t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
171 Tax3ReplyFixup (t3ry);
172 QUERY_AsnIoConnClose (aicp);
173 }
174 CONN_Close (conn);
175
176 return t3ry;
177 }
178
179 /* high-level connection functions */
180
Tax3SynchronousQuery(Taxon3RequestPtr t3rq)181 NLM_EXTERN Taxon3ReplyPtr Tax3SynchronousQuery (
182 Taxon3RequestPtr t3rq
183 )
184
185 {
186 AsnIoConnPtr aicp;
187 CONN conn;
188 Taxon3ReplyPtr t3ry;
189 time_t t1, t2, t3;
190
191 if (t3rq == NULL) return NULL;
192
193 conn = Tax3OpenConnection ();
194
195 if (conn == NULL) return NULL;
196
197 aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "w" : "wb", conn);
198
199 Taxon3RequestAsnWrite (t3rq, aicp->aip, NULL);
200
201 AsnIoFlush (aicp->aip);
202 QUERY_AsnIoConnClose (aicp);
203
204 QUERY_SendQuery (conn);
205
206 t1 = time(NULL);
207 t3ry = Tax3WaitForReply (conn);
208 t2 = time(NULL);
209 t3 = t2 - t1;
210
211 return t3ry;
212 }
213
Tax3AsynchronousQuery(Taxon3RequestPtr t3rq,QUEUE * queue,QueryResultProc resultproc,VoidPtr userdata)214 NLM_EXTERN Boolean Tax3AsynchronousQuery (
215 Taxon3RequestPtr t3rq,
216 QUEUE* queue,
217 QueryResultProc resultproc,
218 VoidPtr userdata
219 )
220
221 {
222 AsnIoConnPtr aicp;
223 CONN conn;
224
225 if (t3rq == NULL) return FALSE;
226
227 conn = Tax3OpenConnection ();
228
229 if (conn == NULL) return FALSE;
230
231 aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "w" : "wb", conn);
232
233 Taxon3RequestAsnWrite (t3rq, aicp->aip, NULL);
234
235 AsnIoFlush (aicp->aip);
236 QUERY_AsnIoConnClose (aicp);
237
238 QUERY_SendQuery (conn);
239
240 QUERY_AddToQueue (queue, conn, resultproc, userdata, TRUE);
241
242 return TRUE;
243 }
244
Tax3CheckQueue(QUEUE * queue)245 NLM_EXTERN Int4 Tax3CheckQueue (
246 QUEUE* queue
247 )
248
249 {
250 return QUERY_CheckQueue (queue);
251 }
252
Tax3ReadReply(CONN conn,EIO_Status status)253 NLM_EXTERN Taxon3ReplyPtr Tax3ReadReply (
254 CONN conn,
255 EIO_Status status
256 )
257
258 {
259 AsnIoConnPtr aicp;
260 Taxon3ReplyPtr t3ry = NULL;
261
262 if (conn != NULL && status == eIO_Success) {
263 aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "r" : "rb", conn);
264 t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
265 Tax3ReplyFixup (t3ry);
266 QUERY_AsnIoConnClose (aicp);
267 }
268 return t3ry;
269 }
270
CreateTaxon3Request(Int4 taxid,CharPtr name,OrgRefPtr orp)271 NLM_EXTERN Taxon3RequestPtr CreateTaxon3Request (
272 Int4 taxid,
273 CharPtr name,
274 OrgRefPtr orp
275 )
276
277 {
278 Taxon3RequestPtr t2rp;
279
280 t2rp = Taxon3RequestNew ();
281 if (t2rp == NULL) return NULL;
282
283 if (StringDoesHaveText (name)) {
284 ValNodeCopyStr (&(t2rp->request), 2, name);
285 } else if (taxid > 0) {
286 ValNodeAddInt (&(t2rp->request), 1, taxid);
287 } else if (orp != NULL) {
288 orp = AsnIoMemCopy ((Pointer) orp,
289 (AsnReadFunc) OrgRefAsnRead,
290 (AsnWriteFunc) OrgRefAsnWrite);
291 ValNodeAddPointer (&(t2rp->request), 3, (Pointer) orp);
292 }
293
294 return t2rp;
295 }
296
297
SaveTaxon3Request(Taxon3RequestPtr t3rp,CharPtr path)298 static void SaveTaxon3Request (Taxon3RequestPtr t3rp, CharPtr path)
299 {
300 AsnIoPtr aip;
301
302 if (t3rp != NULL) {
303 aip = AsnIoOpen (path, "w");
304 if (aip != NULL) {
305 Taxon3RequestAsnWrite (t3rp, aip, NULL);
306 AsnIoClose (aip);
307 }
308 }
309 }
310
CreateMultiTaxon3Request(ValNodePtr org_list)311 NLM_EXTERN Taxon3RequestPtr CreateMultiTaxon3Request (ValNodePtr org_list)
312 {
313 ValNodePtr vnp;
314 Taxon3RequestPtr t3rp;
315 OrgRefPtr orp;
316
317 t3rp = Taxon3RequestNew ();
318 if (t3rp == NULL) return NULL;
319
320 for (vnp = org_list; vnp != NULL; vnp = vnp->next)
321 {
322 switch (vnp->choice)
323 {
324 case T3Request_taxid:
325 ValNodeAddInt (&(t3rp->request), T3Request_taxid, vnp->data.intvalue);
326 break;
327 case T3Request_name:
328 ValNodeCopyStr (&(t3rp->request), T3Request_name, vnp->data.ptrvalue);
329 break;
330 case T3Request_org:
331 orp = AsnIoMemCopy (vnp->data.ptrvalue,
332 (AsnReadFunc) OrgRefAsnRead,
333 (AsnWriteFunc) OrgRefAsnWrite);
334 ValNodeAddPointer (&(t3rp->request), T3Request_org, (Pointer) orp);
335 break;
336 }
337 }
338
339 /* SaveTaxon3Request(t3rp, "request.txt"); */
340 return t3rp;
341 }
342
343 /* takes ValNode list of integers, creates request */
CreateJoinRequest(ValNodePtr taxon_list)344 NLM_EXTERN Taxon3RequestPtr CreateJoinRequest (ValNodePtr taxon_list)
345 {
346 Taxon3RequestPtr t3rp;
347 ValNodePtr vnp, data = NULL;
348
349 t3rp = Taxon3RequestNew();
350 if (t3rp == NULL) return NULL;
351
352 for (vnp = taxon_list; vnp != NULL; vnp = vnp->next) {
353 ValNodeAddInt (&data, T3Request_join, vnp->data.intvalue);
354 }
355 ValNodeAddPointer ((&t3rp->request), T3Request_join, data);
356
357 /* SaveTaxon3Request(t3rp, "join_request.txt"); */
358 return t3rp;
359 }
360
361
HasMisspellingFlag(T3DataPtr t)362 static Boolean HasMisspellingFlag (T3DataPtr t)
363 {
364 T3StatusFlagsPtr status;
365
366 if (t == NULL) return FALSE;
367 status = t->status;
368 while (status != NULL) {
369 if (StringCmp (status->property, "misspelled_name") == 0) {
370 return TRUE;
371 }
372 status = status->next;
373 }
374 return FALSE;
375 }
376
377
GetStatusFlags(T3DataPtr t)378 static Uint1 GetStatusFlags (T3DataPtr t)
379 {
380 Uint1 flags = 0;
381 T3StatusFlagsPtr status;
382 ValNodePtr vnp;
383
384 if (t == NULL) return FALSE;
385 status = t->status;
386 while (status != NULL) {
387 if (StringCmp (status->property, "unpublished_name") == 0) {
388 flags |= eReturnedOrgFlag_unpublished;
389 } else if (StringCmp (status->property, "misspelled_name") == 0) {
390 flags |= eReturnedOrgFlag_misspelled;
391 } else if (StringCmp (status->property, "old_name_class") == 0) {
392 for (vnp = status->Value_value; vnp != NULL; vnp = vnp->next) {
393 if (vnp->choice == Value_value_str) {
394 if (StringCmp ((CharPtr)(vnp->data.ptrvalue), "common name") == 0
395 || StringCmp ((CharPtr)(vnp->data.ptrvalue), "genbank common name")) {
396 flags |= eReturnedOrgFlag_common_name;
397 }
398 }
399 }
400 }
401
402 status = status->next;
403 }
404 if (flags == 0) {
405 flags = eReturnedOrgFlag_normal;
406 }
407 return flags;
408 }
409
410
ObjectIdCompare(ObjectIdPtr a,ObjectIdPtr b)411 NLM_EXTERN int LIBCALL ObjectIdCompare (ObjectIdPtr a, ObjectIdPtr b)
412 {
413 int rval = 0;
414 Char buf[30];
415
416 if (a == b) {
417 rval = 0;
418 } else if (a == NULL) {
419 rval = -1;
420 } else if (b == NULL) {
421 rval = 1;
422 } else if (a->str == NULL && b->str == NULL) {
423 if (a->id < b->id) {
424 rval = -1;
425 } else if (a->id > b->id) {
426 rval = 1;
427 }
428 } else if (a->str == NULL) {
429 sprintf (buf, "%d", a->id);
430 rval = StringCmp (buf, b->str);
431 } else if (b->str == NULL) {
432 sprintf (buf, "%d", b->id);
433 rval = StringCmp (a->str, buf);
434 } else {
435 rval = StringCmp (a->str, b->str);
436 }
437 return rval;
438 }
439
440
441 /*****************************************************************************
442 *
443 * DbtagMatch(a, b)
444 *
445 *****************************************************************************/
DbtagCompare(DbtagPtr a,DbtagPtr b)446 NLM_EXTERN int LIBCALL DbtagCompare (DbtagPtr a, DbtagPtr b)
447 {
448 int rval = 0;
449
450 if (a == b) {
451 rval = 0;
452 } else if (a == NULL) {
453 rval = -1;
454 } else if (b == NULL) {
455 rval = 1;
456 } else if ((rval = StringICmp (a->db, b->db)) == 0) {
457 rval = ObjectIdCompare (a->tag, b->tag);
458 }
459 return rval;
460 }
461
462
SortVnpByDbtag(VoidPtr ptr1,VoidPtr ptr2)463 static int LIBCALLBACK SortVnpByDbtag (VoidPtr ptr1, VoidPtr ptr2)
464
465 {
466 ValNodePtr vnp1;
467 ValNodePtr vnp2;
468
469 if (ptr1 != NULL && ptr2 != NULL) {
470 vnp1 = *((ValNodePtr PNTR) ptr1);
471 vnp2 = *((ValNodePtr PNTR) ptr2);
472 if (vnp1 != NULL && vnp2 != NULL) {
473 return DbtagCompare (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
474 }
475 }
476 return 0;
477 }
478
OrgModSetCompare(OrgModPtr mod1,OrgModPtr mod2)479 NLM_EXTERN int LIBCALL OrgModSetCompare (OrgModPtr mod1, OrgModPtr mod2)
480 {
481 int rval = 0;
482
483 while (mod1 != NULL && mod2 != NULL && rval == 0)
484 {
485 if (mod1->subtype < mod2->subtype)
486 {
487 rval = -1;
488 }
489 else if (mod1->subtype > mod2->subtype)
490 {
491 rval = 1;
492 }
493 else if ((rval = StringCmp (mod1->subname, mod2->subname)) == 0
494 && (rval = StringCmp (mod1->attrib, mod2->attrib)) == 0)
495 {
496 mod1 = mod1->next;
497 mod2 = mod2->next;
498 }
499 }
500
501 if (rval == 0)
502 {
503 if (mod1 == NULL && mod2 == NULL)
504 {
505 rval = 0;
506 }
507 else if (mod1 == NULL)
508 {
509 rval = -1;
510 }
511 else if (mod2 == NULL)
512 {
513 rval = 1;
514 }
515 }
516 return rval;
517 }
518
519
OrgNameCompare(OrgNamePtr onp1,OrgNamePtr onp2)520 NLM_EXTERN int LIBCALL OrgNameCompare (OrgNamePtr onp1, OrgNamePtr onp2)
521 {
522 int rval = 0;
523
524 while (onp1 != NULL && onp2 != NULL && rval == 0)
525 {
526 if ((rval = OrgModSetCompare(onp1->mod, onp2->mod)) != 0
527 || (rval = StringCmp (onp1->lineage, onp2->lineage)) != 0
528 || (rval = StringCmp (onp1->div, onp2->div)) != 0
529 || (rval = StringCmp (onp1->attrib, onp2->attrib)) != 0)
530 {
531 /* no further processing */
532 }
533 else if (onp1->choice < onp2->choice)
534 {
535 rval = -1;
536 }
537 else if (onp1->choice > onp2->choice)
538 {
539 rval = 1;
540 }
541 else if (onp1->gcode < onp2->gcode)
542 {
543 rval = -1;
544 }
545 else if (onp1->gcode > onp2->gcode)
546 {
547 rval = 1;
548 }
549 else if (onp1->mgcode < onp2->mgcode)
550 {
551 rval = -1;
552 }
553 else if (onp1->mgcode > onp2->mgcode)
554 {
555 rval = 1;
556 }
557 else if (onp1->pgcode < onp2->pgcode)
558 {
559 rval = -1;
560 }
561 else if (onp1->pgcode > onp2->pgcode)
562 {
563 rval = 1;
564 }
565 onp1 = onp1->next;
566 onp2 = onp2->next;
567 }
568 if (rval == 0)
569 {
570 if (onp1 == NULL && onp2 == NULL)
571 {
572 rval = 0;
573 }
574 else if (onp1 == NULL)
575 {
576 rval = -1;
577 }
578 else if (onp2 == NULL)
579 {
580 rval = 1;
581 }
582 }
583 return rval;
584 }
585
586
587 /*****************************************************************************
588 *
589 * OrgRefCompare (orp1, orp2)
590 *
591 *****************************************************************************/
OrgRefCompare(OrgRefPtr orp1,OrgRefPtr orp2)592 NLM_EXTERN int LIBCALL OrgRefCompare (OrgRefPtr orp1, OrgRefPtr orp2)
593 {
594 int rval = 0;
595 if (orp1 == NULL && orp2 == NULL)
596 {
597 return 0;
598 }
599 else if (orp1 == NULL)
600 {
601 return -1;
602 }
603 else if (orp2 == NULL)
604 {
605 return 1;
606 }
607 else if ((rval = StringCmp (orp1->taxname, orp2->taxname)) != 0)
608 {
609 return rval;
610 }
611 else if ((rval = StringCmp (orp1->common, orp2->common)) != 0)
612 {
613 return rval;
614 }
615 else if ((rval = ValNodeCompare (orp1->syn, orp2->syn, SortVnpByString)) != 0)
616 {
617 return rval;
618 }
619 else if ((rval = ValNodeCompare (orp1->db, orp2->db, SortVnpByDbtag)) != 0)
620 {
621 return rval;
622 }
623 else
624 {
625 rval = OrgNameCompare (orp1->orgname, orp2->orgname);
626 }
627 return rval;
628 }
629
630
SortVnpByOrgRef(VoidPtr ptr1,VoidPtr ptr2)631 static int LIBCALLBACK SortVnpByOrgRef (VoidPtr ptr1, VoidPtr ptr2)
632
633 {
634 ValNodePtr vnp1;
635 ValNodePtr vnp2;
636
637 if (ptr1 != NULL && ptr2 != NULL) {
638 vnp1 = *((ValNodePtr PNTR) ptr1);
639 vnp2 = *((ValNodePtr PNTR) ptr2);
640 if (vnp1 != NULL && vnp2 != NULL) {
641 return OrgRefCompare (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
642 }
643 }
644 return 0;
645 }
646
647
Taxon3GetOrgRefList(ValNodePtr org_list)648 NLM_EXTERN ValNodePtr Taxon3GetOrgRefList (ValNodePtr org_list)
649 {
650 Taxon3RequestPtr t3rq;
651 Taxon3ReplyPtr t3ry;
652 T3DataPtr tdp;
653 OrgRefPtr t3orp = NULL;
654 T3ReplyPtr trp;
655 T3ErrorPtr tep;
656 ValNodePtr uniq_list, response_list = NULL, next_org_list, last_org;
657 Int4 request_num, max_requests = 2000;
658 ValNodePtr PNTR ptr_array;
659 ValNodePtr vnp, vnp_rq, vnp_rp;
660 Int4 i, num_orgs;
661 Uint1 choice;
662 TextFsaPtr tags;
663
664 if (org_list == NULL) {
665 return NULL;
666 }
667
668 tags = GetOrgModSearch();
669
670 /* make a copy of the original list - we will prepare the response list by substituting the OrgRef */
671 org_list = ValNodeCopyPtr (org_list);
672
673 /* make array to show original order of ValNodes, so that we can restore after sorting */
674 num_orgs = ValNodeLen (org_list);
675 ptr_array = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_orgs);
676 for (vnp = org_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
677 ptr_array[i] = vnp;
678 }
679
680 org_list = ValNodeSort (org_list, SortVnpByOrgRef);
681
682 /* now make a list of just the unique requests */
683 uniq_list = ValNodeCopyPtr (org_list);
684 ValNodeUnique (&uniq_list, SortVnpByOrgRef, ValNodeFree);
685
686 /* now break large lists into manageable chunks */
687 vnp = uniq_list;
688 while (vnp != NULL) {
689 next_org_list = vnp->next;
690 last_org = vnp;
691 request_num = 1;
692 while (next_org_list != NULL && request_num < max_requests) {
693 last_org = next_org_list;
694 next_org_list = next_org_list->next;
695 request_num++;
696 }
697 if (last_org != NULL) {
698 last_org->next = NULL;
699 }
700
701 /* now create the request */
702
703 t3rq = CreateMultiTaxon3Request (vnp);
704 if (t3rq == NULL) return NULL;
705 t3ry = Tax3SynchronousQuery (t3rq);
706 Taxon3RequestFree (t3rq);
707 if (t3ry != NULL) {
708 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
709 switch (trp->choice) {
710 case T3Reply_error :
711 tep = (T3ErrorPtr) trp->data.ptrvalue;
712 if (tep != NULL) {
713 ErrPostEx (SEV_ERROR, 0, 0, tep->message);
714 }
715 if (tep != NULL && StringStr (tep->message, "ambiguous") != NULL) {
716 ValNodeAddPointer (&response_list, eReturnedOrgFlag_ambiguous, NULL);
717 } else {
718 ValNodeAddPointer (&response_list, eReturnedOrgFlag_error, NULL);
719 }
720 break;
721 case T3Reply_data :
722 tdp = (T3DataPtr) trp->data.ptrvalue;
723 if (tdp != NULL) {
724 t3orp = (OrgRefPtr)(tdp->org);
725 choice = GetStatusFlags (tdp);
726 /* disable
727 ParseTaxNameToQuals(t3orp, tags); */
728 ValNodeAddPointer (&response_list, choice, (Pointer) t3orp);
729 tdp->org = NULL;
730 }
731 break;
732 default :
733 break;
734 }
735 }
736 Taxon3ReplyFree (t3ry);
737 }
738
739 if (last_org != NULL) {
740 last_org->next = next_org_list;
741 }
742 vnp = next_org_list;
743 }
744
745 /* now put responses in list */
746 vnp = uniq_list;
747 vnp_rq = org_list;
748 vnp_rp = response_list;
749
750 while (vnp != NULL && vnp_rq != NULL && vnp_rp != NULL) {
751 while (vnp_rq != NULL && OrgRefCompare (vnp->data.ptrvalue, vnp_rq->data.ptrvalue) == 0) {
752 vnp_rq->data.ptrvalue = AsnIoMemCopy (vnp_rp->data.ptrvalue, (AsnReadFunc) OrgRefAsnRead, (AsnWriteFunc) OrgRefAsnWrite);
753 vnp_rq->choice = vnp_rp->choice;
754 vnp_rq = vnp_rq->next;
755 }
756 vnp_rp->data.ptrvalue = OrgRefFree (vnp_rp->data.ptrvalue);
757 vnp_rp = vnp_rp->next;
758 vnp = vnp->next;
759 }
760 /* if there were more requests than responses, set responses to NULL */
761 while (vnp_rq != NULL) {
762 vnp_rq->data.ptrvalue = NULL;
763 vnp_rq = vnp_rq->next;
764 }
765 /* if there were more responses than requests, free extra responses */
766 while (vnp_rp != NULL) {
767 vnp_rp->data.ptrvalue = OrgRefFree (vnp_rp->data.ptrvalue);
768 vnp_rp = vnp_rp->next;
769 }
770 response_list = ValNodeFree (response_list);
771 uniq_list = ValNodeFree (uniq_list);
772
773 /* now restore original order */
774 for (i = 0; i < num_orgs - 1; i++) {
775 ptr_array[i]->next = ptr_array[i + 1];
776 }
777 ptr_array[num_orgs - 1]->next = NULL;
778 org_list = ptr_array[0];
779 ptr_array = MemFree (ptr_array);
780 tags = TextFsaFree (tags);
781
782 return org_list;
783 }
784
785
TaxFixItemNew(void)786 NLM_EXTERN TaxFixItemPtr TaxFixItemNew (void)
787 {
788 TaxFixItemPtr t;
789
790 t = (TaxFixItemPtr) MemNew (sizeof (TaxFixItemData));
791 MemSet (t, 0, sizeof (TaxFixItemData));
792 return t;
793 }
794
795
TaxFixItemFree(TaxFixItemPtr t)796 NLM_EXTERN TaxFixItemPtr TaxFixItemFree (TaxFixItemPtr t)
797 {
798 if (t != NULL) {
799 t->response_org = OrgRefFree (t->response_org);
800 t->taxname = MemFree (t->taxname);
801 t->suggested_fix = MemFree (t->suggested_fix);
802 t->rank = MemFree (t->rank);
803 t = MemFree (t);
804 }
805 return t;
806 }
807
808
TaxFixItemListFree(ValNodePtr vnp)809 NLM_EXTERN ValNodePtr LIBCALLBACK TaxFixItemListFree (ValNodePtr vnp)
810 {
811 ValNodePtr vnp_next;
812
813 while (vnp != NULL) {
814 vnp_next = vnp->next;
815 vnp->next = NULL;
816 vnp->data.ptrvalue = TaxFixItemFree (vnp->data.ptrvalue);
817 vnp = ValNodeFree (vnp);
818 vnp = vnp_next;
819 }
820 return vnp;
821 }
822
823
StringSum(CharPtr str1,CharPtr str2)824 static CharPtr StringSum (CharPtr str1, CharPtr str2)
825 {
826 CharPtr sum = NULL;
827
828 if (str1 == NULL && str2 == NULL) {
829 sum = NULL;
830 } else if (str1 == NULL) {
831 sum = StringSave (str2);
832 } else if (str2 == NULL) {
833 sum = StringSave (str1);
834 } else {
835 sum = (CharPtr) MemNew (sizeof (Char) * (StringLen (str1) + StringLen (str2) + 1));
836 sprintf (sum, "%s%s", str1, str2);
837 }
838 return sum;
839 }
840
841
MakeUnculturedName(CharPtr taxname,CharPtr suffix)842 static CharPtr MakeUnculturedName (CharPtr taxname, CharPtr suffix)
843 {
844 CharPtr uncultured = "uncultured ";
845 Int4 len, suffix_len = 0, uncultured_len;
846 CharPtr name = NULL;
847 Boolean add_suffix = TRUE, add_uncultured = TRUE;
848
849 if (taxname == NULL) {
850 return NULL;
851 }
852 len = StringLen (taxname) + 1;
853 if (suffix == NULL) {
854 add_suffix = FALSE;
855 } else {
856 suffix_len = StringLen (suffix);
857 if (len > suffix_len && StringCmp (taxname + len - suffix_len, suffix) == 0) {
858 add_suffix = FALSE;
859 } else {
860 len += suffix_len;
861 }
862 }
863 uncultured_len = StringLen (uncultured);
864 if (StringNCmp (taxname, uncultured, uncultured_len) == 0) {
865 add_uncultured = FALSE;
866 } else {
867 len += uncultured_len;
868 }
869
870 name = (CharPtr) MemNew (sizeof (Char) * len);
871 name[0] = 0;
872 if (add_uncultured) {
873 StringCat(name, uncultured);
874 }
875 StringCat(name, taxname);
876 if (add_suffix) {
877 StringCat(name, suffix);
878 }
879 return name;
880 }
881
882
IsArchaea(OrgRefPtr response_org)883 static Boolean IsArchaea(OrgRefPtr response_org)
884 {
885 if (response_org != NULL && response_org->orgname != NULL
886 && StringISearch (response_org->orgname->lineage, "archaea") != NULL) {
887 return TRUE;
888 } else {
889 return FALSE;
890 }
891 }
892
893
IsBacteria(OrgRefPtr response_org)894 static Boolean IsBacteria(OrgRefPtr response_org)
895 {
896 if (response_org != NULL && response_org->orgname != NULL
897 && StringISearch (response_org->orgname->lineage, "bacteria") != NULL) {
898 return TRUE;
899 } else {
900 return FALSE;
901 }
902 }
903
904
IsFungi(OrgRefPtr response_org)905 static Boolean IsFungi(OrgRefPtr response_org)
906 {
907 if (response_org != NULL && response_org->orgname != NULL
908 && StringISearch (response_org->orgname->lineage, " Fungi;") != NULL) {
909 return TRUE;
910 } else {
911 return FALSE;
912 }
913 }
914
915
SuggestedTaxNameFixFromOrgAndRank(TaxFixItemPtr t)916 static CharPtr SuggestedTaxNameFixFromOrgAndRank (TaxFixItemPtr t)
917 {
918 CharPtr fix = NULL;
919 CharPtr stop;
920
921 if (t == NULL || t->response_org == NULL) {
922 return NULL;
923 }
924
925 if (StringICmp (t->rank, "species") == 0) {
926 if (t->is_species_specific) {
927 fix = StringSave (t->response_org->taxname);
928 } else {
929 /* truncate binomial, need to check again */
930 stop = StringChr (t->taxname, ' ');
931 if (stop != NULL) {
932 *stop = 0;
933 t->truncate_binomial = TRUE;
934 }
935 }
936 } else if (t->response_org->orgname != NULL) {
937 if (StringICmp (t->rank, "genus") == 0) {
938 if (IsArchaea(t->response_org) || IsBacteria(t->response_org)) {
939 fix = MakeUnculturedName (t->response_org->taxname, " sp.");
940 } else if (IsFungi(t->response_org)) {
941 fix = MakeUnculturedName (t->response_org->taxname, NULL);
942 }
943 } else {
944 if (IsArchaea(t->response_org)) {
945 fix = MakeUnculturedName (t->response_org->taxname, " archaeon");
946 } else if (IsBacteria(t->response_org)) {
947 fix = MakeUnculturedName (t->response_org->taxname, " bacterium");
948 } else if (IsFungi(t->response_org)) {
949 fix = MakeUnculturedName (t->response_org->taxname, NULL);
950 }
951 }
952 if (t->is_species_specific) {
953 t->remove_species_specific = TRUE;
954 }
955 }
956 return fix;
957 }
958
959
StripCommas(CharPtr str)960 static void StripCommas (CharPtr str)
961 {
962 CharPtr src, dst;
963
964 if (str == NULL) {
965 return;
966 }
967
968 src = str;
969 dst = src;
970 while (*src != 0) {
971 if (*src == ',') {
972 if (*(src + 1) == 0) {
973 /* don't add to dst */
974 } else if (*(src + 1) == ' ') {
975 /* don't add to dst */
976 } else {
977 *dst = ' ';
978 dst++;
979 }
980 } else {
981 *dst = *src;
982 dst++;
983 }
984 src++;
985 }
986 *dst = 0;
987 }
988
989
MakeTaxFixRequestList(ValNodePtr biop_list)990 static ValNodePtr MakeTaxFixRequestList (ValNodePtr biop_list)
991 {
992 ValNodePtr rq_list = NULL, prev = NULL;
993 BioSourcePtr biop;
994 OrgRefPtr org;
995 CharPtr new_name;
996 Int4 len;
997
998 while (biop_list != NULL) {
999 biop = GetBioSourceFromObject (biop_list->choice, biop_list->data.ptrvalue);
1000 org = AsnIoMemCopy (biop->org, (AsnReadFunc) OrgRefAsnRead, (AsnWriteFunc) OrgRefAsnWrite);
1001 /* add period to end of sp */
1002 if ((len = StringLen (org->taxname)) > 3 && StringCmp (org->taxname + len - 3, " sp") == 0) {
1003 new_name = StringSum (org->taxname, ".");
1004 org->taxname = MemFree (org->taxname);
1005 org->taxname = new_name;
1006 }
1007 /* strip commas */
1008 StripCommas (org->taxname);
1009
1010 ValNodeAddPointer (&prev, 3, org);
1011 if (rq_list == NULL) {
1012 rq_list = prev;
1013 }
1014 biop_list = biop_list->next;
1015 }
1016 return rq_list;
1017 }
1018
1019
CheckSuggestedFixes(ValNodePtr tax_fix_list)1020 static void CheckSuggestedFixes (ValNodePtr tax_fix_list)
1021 {
1022 ValNodePtr rq_list = NULL, rp_list = NULL, prev, vnp_rq, vnp_rp;
1023 ValNodePtr vnp, next_org_list, last_org;
1024 Int4 request_num, max_requests = 2000;
1025 TaxFixItemPtr t;
1026 Taxon3RequestPtr t3rq;
1027 Taxon3ReplyPtr t3ry;
1028 T3DataPtr tdp;
1029 T3ReplyPtr trp;
1030 T3ErrorPtr tep;
1031 T3StatusFlagsPtr tfp;
1032 OrgRefPtr org;
1033 Boolean is_species;
1034
1035 prev = NULL;
1036 for (vnp = tax_fix_list; vnp != NULL; vnp = vnp->next) {
1037 t = (TaxFixItemPtr) vnp->data.ptrvalue;
1038 if (t != NULL && t->suggested_fix != NULL) {
1039 ValNodeAddPointer (&prev, 2, StringSave (t->suggested_fix));
1040 }
1041 if (rq_list == NULL) {
1042 rq_list = prev;
1043 }
1044 }
1045
1046 /* now break large lists into manageable chunks */
1047 vnp = rq_list;
1048 while (vnp != NULL) {
1049 next_org_list = vnp->next;
1050 last_org = vnp;
1051 request_num = 1;
1052 while (next_org_list != NULL && request_num < max_requests) {
1053 last_org = next_org_list;
1054 next_org_list = next_org_list->next;
1055 request_num++;
1056 }
1057 if (last_org != NULL) {
1058 last_org->next = NULL;
1059 }
1060
1061 /* now create the request */
1062
1063 t3rq = CreateMultiTaxon3Request (vnp);
1064 if (t3rq == NULL) return;
1065 t3ry = Tax3SynchronousQuery (t3rq);
1066 Taxon3RequestFree (t3rq);
1067 if (t3ry != NULL) {
1068 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
1069 switch (trp->choice) {
1070 case T3Reply_error :
1071 tep = (T3ErrorPtr) trp->data.ptrvalue;
1072 ValNodeAddPointer (&rp_list, 0, NULL);
1073 break;
1074 case T3Reply_data :
1075 tdp = (T3DataPtr) trp->data.ptrvalue;
1076 is_species = FALSE;
1077 if (tdp != NULL) {
1078 for (tfp = tdp->status; tfp != NULL; tfp = tfp->next) {
1079 if (StringICmp (tfp->property, "rank") == 0
1080 && tfp->Value_value != NULL
1081 && tfp->Value_value->choice == Value_value_str
1082 && StringICmp (tfp->Value_value->data.ptrvalue, "species") == 0) {
1083 is_species = TRUE;
1084 }
1085 }
1086 }
1087 if (is_species) {
1088 org = (OrgRefPtr) tdp->org;
1089 ValNodeAddPointer (&rp_list, 0, StringSave (org->taxname));
1090 } else {
1091 ValNodeAddPointer (&rp_list, 0, NULL);
1092 }
1093 break;
1094 default :
1095 break;
1096 }
1097 }
1098 Taxon3ReplyFree (t3ry);
1099 }
1100
1101 if (last_org != NULL) {
1102 last_org->next = next_org_list;
1103 }
1104 vnp = next_org_list;
1105 }
1106 rq_list = ValNodeFreeData (rq_list);
1107
1108 /* adjust suggested fixes */
1109 vnp_rq = tax_fix_list;
1110 vnp_rp = rp_list;
1111
1112 while (vnp_rq != NULL && vnp_rp != NULL) {
1113 while (vnp_rq != NULL && ((t = (TaxFixItemPtr) vnp_rq->data.ptrvalue) == NULL || t->suggested_fix == NULL)) {
1114 vnp_rq = vnp_rq->next;
1115 }
1116 if (t != NULL) {
1117 t->suggested_fix = MemFree (t->suggested_fix);
1118 if (vnp_rq != NULL) {
1119 t->suggested_fix = vnp_rp->data.ptrvalue;
1120 vnp_rp->data.ptrvalue = NULL;
1121 vnp_rq = vnp_rq->next;
1122 vnp_rp = vnp_rp->next;
1123 }
1124 }
1125 }
1126 rp_list = ValNodeFreeData (rp_list);
1127 }
1128
1129
1130 typedef CharPtr (*TryTaxFixChangeFunc) PROTO ((CharPtr orig));
1131
TryNewSuggestedFixes(ValNodePtr tax_fix_list,TryTaxFixChangeFunc func)1132 static void TryNewSuggestedFixes (ValNodePtr tax_fix_list, TryTaxFixChangeFunc func)
1133 {
1134 ValNodePtr rq_list = NULL, rp_list = NULL, prev, vnp_rq, vnp_rp;
1135 ValNodePtr vnp, next_org_list, last_org;
1136 Int4 request_num, max_requests = 2000;
1137 TaxFixItemPtr t;
1138 Taxon3RequestPtr t3rq;
1139 Taxon3ReplyPtr t3ry;
1140 T3DataPtr tdp;
1141 T3ReplyPtr trp;
1142 T3ErrorPtr tep;
1143 T3StatusFlagsPtr tfp;
1144 OrgRefPtr org;
1145 Boolean is_species;
1146 ValNodePtr fix_copy = NULL, fix_prev = NULL;
1147 CharPtr tmp;
1148
1149 prev = NULL;
1150 for (vnp = tax_fix_list; vnp != NULL; vnp = vnp->next) {
1151 t = (TaxFixItemPtr) vnp->data.ptrvalue;
1152 if (t->suggested_fix == NULL) {
1153 tmp = func(t->taxname);
1154 if (tmp != NULL) {
1155 ValNodeAddPointer (&prev, 2, tmp);
1156 if (rq_list == NULL) {
1157 rq_list = prev;
1158 }
1159 ValNodeAddPointer (&fix_prev, 0, t);
1160 if (fix_copy == NULL) {
1161 fix_copy = fix_prev;
1162 }
1163 }
1164 }
1165 }
1166
1167 /* now break large lists into manageable chunks */
1168 vnp = rq_list;
1169 while (vnp != NULL) {
1170 next_org_list = vnp->next;
1171 last_org = vnp;
1172 request_num = 1;
1173 while (next_org_list != NULL && request_num < max_requests) {
1174 last_org = next_org_list;
1175 next_org_list = next_org_list->next;
1176 request_num++;
1177 }
1178 if (last_org != NULL) {
1179 last_org->next = NULL;
1180 }
1181
1182 /* now create the request */
1183
1184 t3rq = CreateMultiTaxon3Request (vnp);
1185 if (t3rq == NULL) return;
1186 t3ry = Tax3SynchronousQuery (t3rq);
1187 Taxon3RequestFree (t3rq);
1188 if (t3ry != NULL) {
1189 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
1190 switch (trp->choice) {
1191 case T3Reply_error :
1192 tep = (T3ErrorPtr) trp->data.ptrvalue;
1193 ValNodeAddPointer (&rp_list, 0, NULL);
1194 break;
1195 case T3Reply_data :
1196 tdp = (T3DataPtr) trp->data.ptrvalue;
1197 is_species = FALSE;
1198 if (tdp != NULL) {
1199 for (tfp = tdp->status; tfp != NULL; tfp = tfp->next) {
1200 if (StringICmp (tfp->property, "rank") == 0
1201 && tfp->Value_value != NULL
1202 && tfp->Value_value->choice == Value_value_str
1203 && StringICmp (tfp->Value_value->data.ptrvalue, "species") == 0) {
1204 is_species = TRUE;
1205 }
1206 }
1207 }
1208 if (is_species) {
1209 org = (OrgRefPtr) tdp->org;
1210 ValNodeAddPointer (&rp_list, 0, StringSave (org->taxname));
1211 } else {
1212 ValNodeAddPointer (&rp_list, 0, NULL);
1213 }
1214 break;
1215 default :
1216 ValNodeAddPointer (&rp_list, 0, NULL);
1217 break;
1218 }
1219 }
1220 Taxon3ReplyFree (t3ry);
1221 }
1222
1223 if (last_org != NULL) {
1224 last_org->next = next_org_list;
1225 }
1226 vnp = next_org_list;
1227 }
1228 rq_list = ValNodeFreeData (rq_list);
1229
1230 /* adjust suggested fixes */
1231 vnp_rq = fix_copy;
1232 vnp_rp = rp_list;
1233
1234 while (vnp_rq != NULL && vnp_rp != NULL) {
1235 t = (TaxFixItemPtr) vnp_rq->data.ptrvalue;
1236 t->suggested_fix = MemFree (t->suggested_fix);
1237 t->suggested_fix = vnp_rp->data.ptrvalue;
1238 vnp_rp->data.ptrvalue = NULL;
1239 vnp_rq = vnp_rq->next;
1240 vnp_rp = vnp_rp->next;
1241 }
1242 rp_list = ValNodeFreeData (rp_list);
1243 /* note - fix_copy points to data that will be freed elsewhere */
1244 fix_copy = ValNodeFree (fix_copy);
1245 }
1246
1247 static const CharPtr SUncultured = "uncultured ";
1248 const Int4 SUnculturedLen = 11;
1249
StandardFixes(CharPtr orig)1250 static CharPtr StandardFixes (CharPtr orig)
1251 {
1252 CharPtr val = NULL;
1253 CharPtr cp, src, dst;
1254 Int4 len;
1255
1256 if (StringNICmp (orig, SUncultured, SUnculturedLen) == 0) {
1257 val = StringSave(orig + 11);
1258 } else {
1259 val = StringSave (orig);
1260 }
1261 /* remove trailing sp. */
1262 len = StringLen (val);
1263 if (len > 3 && StringICmp(val + len - 3, " sp") == 0) {
1264 val[len - 3] = 0;
1265 } else if (len > 4 && StringICmp (val + len - 4, " sp.") == 0) {
1266 val[len - 4] = 0;
1267 }
1268
1269 /* remove commas */
1270 cp = StringChr (val, ',');
1271 if (cp != NULL) {
1272 src = val;
1273 dst = val;
1274 while (*src != 0) {
1275 if (*src == ',') {
1276 if (*(src + 1) != ' ') {
1277 *dst = ' ';
1278 dst++;
1279 }
1280 } else {
1281 *dst = *src;
1282 dst++;
1283 }
1284 src++;
1285 }
1286 *dst = 0;
1287 }
1288 return val;
1289 }
1290
1291
AddUnculturedIfNotPresent(CharPtr orig)1292 static CharPtr AddUnculturedIfNotPresent (CharPtr orig)
1293 {
1294 CharPtr val = NULL;
1295 if (!StringHasNoText (orig) && StringNICmp (orig, "uncultured ", 11) != 0) {
1296 val = MemNew (sizeof (Char) * (StringLen (orig) + 12));
1297 sprintf (val, "uncultured %s", orig);
1298 }
1299 return val;
1300 }
1301
1302
TryUnculturedAndSp(CharPtr orig)1303 static CharPtr TryUnculturedAndSp (CharPtr orig)
1304 {
1305 CharPtr val = NULL;
1306 Int4 len;
1307 Boolean prefix = FALSE, suffix = FALSE;
1308
1309 if (StringHasNoText (orig)) {
1310 return NULL;
1311 }
1312 len = StringLen (orig);
1313
1314 if (len > 4 && StringICmp (orig + len - 4, " sp.") != 0) {
1315 suffix = TRUE;
1316 len += 4;
1317 }
1318 if (StringNICmp (orig, "uncultured ", 11) != 0) {
1319 prefix = TRUE;
1320 len += 11;
1321 }
1322
1323 if (prefix || suffix) {
1324 val = MemNew (sizeof (Char) * (len + 1));
1325 sprintf (val, "%s%s%s", prefix ? "uncultured " : "", orig, suffix ? " sp." : "");
1326 }
1327 return val;
1328 }
1329
1330 static const CharPtr sAmplifiedSpeciesSpecific[] = {
1331 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
1332 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
1333 "[BankIt_uncultured16S_wizard]; [species_specific primers]",
1334 "[uncultured (with species-specific primers)]",
1335 "[uncultured]; [amplified with species-specific primers]",
1336 "[uncultured (using species-specific primers) bacterial source]",
1337 "amplified with species-specific primers",
1338 NULL
1339 };
1340
1341
IsSpeciesSpecificNote(CharPtr note)1342 static Boolean IsSpeciesSpecificNote(CharPtr note)
1343 {
1344 Int4 i;
1345 Boolean rval = FALSE;
1346
1347 if (note == NULL || StringHasNoText(note)) {
1348 return FALSE;
1349 }
1350 for (i = 0; sAmplifiedSpeciesSpecific[i] != NULL && !rval; i++) {
1351 if (StringISearch (note, sAmplifiedSpeciesSpecific[i]) != NULL) {
1352 rval = TRUE;
1353 }
1354 }
1355 return rval;
1356 }
1357
1358
IsSpeciesSpecific(BioSourcePtr biop)1359 static Boolean IsSpeciesSpecific (BioSourcePtr biop)
1360 {
1361 Boolean rval = FALSE;
1362 SubSourcePtr ssp;
1363
1364 if (biop != NULL) {
1365 for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) {
1366 if (ssp->subtype == SUBSRC_other
1367 && IsSpeciesSpecificNote (ssp->name)) {
1368 rval = TRUE;
1369 }
1370 }
1371 }
1372 return rval;
1373 }
1374
1375
RemoveSpeciesSpecificFromNote(CharPtr note)1376 static Boolean RemoveSpeciesSpecificFromNote(CharPtr note)
1377 {
1378 Int4 i;
1379 CharPtr cp;
1380 Boolean rval = FALSE;
1381
1382 if (note == NULL || StringHasNoText (note)) {
1383 return rval;
1384 }
1385 for (i = 0; sAmplifiedSpeciesSpecific[i] != NULL; i++) {
1386 if ((cp = StringISearch (note, sAmplifiedSpeciesSpecific[i])) != NULL) {
1387 StringCpy (cp, cp + StringLen (sAmplifiedSpeciesSpecific[i]));
1388 rval = TRUE;
1389 }
1390 }
1391 return rval;
1392 }
1393
1394
RemoveSpeciesSpecific(BioSourcePtr biop)1395 NLM_EXTERN void RemoveSpeciesSpecific (BioSourcePtr biop)
1396 {
1397 SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
1398 CharPtr cp;
1399
1400 if (biop != NULL) {
1401 for (ssp = biop->subtype; ssp != NULL; ssp = ssp_next) {
1402 ssp_next = ssp->next;
1403 if (ssp->subtype == SUBSRC_other
1404 && RemoveSpeciesSpecificFromNote(ssp->name)
1405 && StringHasNoText (ssp->name)) {
1406 ssp = SubSourceFree (ssp);
1407 if (ssp_prev == NULL) {
1408 biop->subtype = ssp_next;
1409 } else {
1410 ssp_prev->next = ssp_next;
1411 }
1412 } else {
1413 ssp_prev = ssp;
1414 }
1415 }
1416 }
1417 }
1418
1419
1420 static CharPtr sUnfixable[] = {
1421 "rickettsia",
1422 "candidatus",
1423 "endosymbiont",
1424 "phytoplasma",
1425 "wolbachia",
1426 NULL
1427 };
1428
OkToTaxFix(CharPtr orgname)1429 NLM_EXTERN Boolean OkToTaxFix(CharPtr orgname)
1430 {
1431 Int4 i;
1432 Boolean rval = TRUE;
1433
1434 if (orgname == NULL || StringHasNoText (orgname)) {
1435 return FALSE;
1436 }
1437
1438 for (i = 0; sUnfixable[i] != NULL && rval; i++) {
1439 if (StringISearch(orgname, sUnfixable[i]) != NULL) {
1440 rval = FALSE;
1441 }
1442 }
1443 return rval;
1444 }
1445
1446
BuildBlankTaxFixList(ValNodePtr biop_list)1447 static ValNodePtr BuildBlankTaxFixList (ValNodePtr biop_list)
1448 {
1449 ValNodePtr list = NULL, prev = NULL;
1450 BioSourcePtr biop;
1451 TaxFixItemPtr t;
1452
1453 while (biop_list != NULL) {
1454 t = TaxFixItemNew ();
1455 t->data_choice = biop_list->choice;
1456 t->data = biop_list->data.ptrvalue;
1457 biop = GetBioSourceFromObject (biop_list->choice, biop_list->data.ptrvalue);
1458 if (biop != NULL && biop->org != NULL && biop->org->taxname != NULL) {
1459 t->orig_org = biop->org;
1460 t->taxname = StandardFixes (biop->org->taxname);
1461 }
1462 t->is_species_specific = IsSpeciesSpecific(biop);
1463 ValNodeAddPointer (&prev, 0, t);
1464 if (list == NULL) {
1465 list = prev;
1466 }
1467 biop_list = biop_list->next;
1468 }
1469 return list;
1470 }
1471
1472
BuildRequestFromTaxnameList(ValNodePtr taxfix_list)1473 static ValNodePtr BuildRequestFromTaxnameList (ValNodePtr taxfix_list)
1474 {
1475 ValNodePtr list = NULL, prev = NULL;
1476 TaxFixItemPtr t;
1477
1478 while (taxfix_list != NULL) {
1479 t = (TaxFixItemPtr) taxfix_list->data.ptrvalue;
1480 if (t->suggested_fix == NULL) {
1481 ValNodeAddPointer (&prev, 2, StringSave (t->taxname));
1482 } else {
1483 ValNodeAddPointer (&prev, 2, StringSave (t->suggested_fix));
1484 }
1485 if (list == NULL) {
1486 list = prev;
1487 }
1488 taxfix_list = taxfix_list->next;
1489 }
1490 return list;
1491 }
1492
1493
1494 /* what is passed in is a list of names (request_list), a list of fixes (to be filled in),
1495 * and the number of names to include per server request.
1496 */
GetSuggestedNamesFromRank(ValNodePtr request_list,ValNodePtr taxfix_list,Int4 max_requests)1497 static void GetSuggestedNamesFromRank (ValNodePtr request_list, ValNodePtr taxfix_list, Int4 max_requests)
1498 {
1499 ValNodePtr start_request, vnp_rp;
1500 ValNodePtr next_org_list, last_org;
1501 Int4 request_num;
1502 TaxFixItemPtr t;
1503 Taxon3RequestPtr t3rq;
1504 Taxon3ReplyPtr t3ry;
1505 T3DataPtr tdp;
1506 T3ReplyPtr trp;
1507 T3ErrorPtr tep;
1508 T3StatusFlagsPtr tfp;
1509
1510 /* break large lists into manageable chunks */
1511 start_request = request_list;
1512 vnp_rp = taxfix_list;
1513 while (start_request != NULL && vnp_rp != NULL) {
1514 next_org_list = start_request->next;
1515 last_org = start_request;
1516 request_num = 1;
1517 while (next_org_list != NULL && request_num < max_requests) {
1518 last_org = next_org_list;
1519 next_org_list = next_org_list->next;
1520 request_num++;
1521 }
1522 if (last_org != NULL) {
1523 last_org->next = NULL;
1524 }
1525
1526 /* now create the request */
1527
1528 t3rq = CreateMultiTaxon3Request (start_request);
1529 if (t3rq == NULL) return;
1530 t3ry = Tax3SynchronousQuery (t3rq);
1531 Taxon3RequestFree (t3rq);
1532 if (t3ry != NULL) {
1533 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
1534 switch (trp->choice) {
1535 case T3Reply_error :
1536 tep = (T3ErrorPtr) trp->data.ptrvalue;
1537 t = (TaxFixItemPtr) vnp_rp->data.ptrvalue;
1538 if (tep != NULL && StringCmp (tep->message, "Taxname is ambiguous") == 0) {
1539 t->is_ambiguous = TRUE;
1540 }
1541 vnp_rp = vnp_rp->next;
1542 break;
1543 case T3Reply_data :
1544 tdp = (T3DataPtr) trp->data.ptrvalue;
1545 if (tdp != NULL) {
1546 t = (TaxFixItemPtr) vnp_rp->data.ptrvalue;
1547 if (t->suggested_fix == NULL) {
1548 t->response_org = (OrgRefPtr)(tdp->org);
1549 tdp->org = NULL;
1550 for (tfp = tdp->status; tfp != NULL; tfp = tfp->next) {
1551 if (StringICmp (tfp->property, "rank") == 0
1552 && tfp->Value_value != NULL
1553 && tfp->Value_value->choice == Value_value_str) {
1554 t->rank = StringSave (tfp->Value_value->data.ptrvalue);
1555 }
1556 }
1557 t->suggested_fix = SuggestedTaxNameFixFromOrgAndRank (t);
1558 }
1559 vnp_rp = vnp_rp->next;
1560 }
1561 break;
1562 default :
1563 vnp_rp = vnp_rp->next;
1564 break;
1565 }
1566 }
1567 Taxon3ReplyFree (t3ry);
1568 }
1569
1570 if (last_org != NULL) {
1571 last_org->next = next_org_list;
1572 }
1573 start_request = next_org_list;
1574 }
1575 }
1576
1577
AddAmbiguousRequests(ValNodePtr taxfix_list,CharPtr domain)1578 static void AddAmbiguousRequests (ValNodePtr taxfix_list, CharPtr domain)
1579 {
1580 CharPtr uncultured = "uncultured ";
1581 Int4 len;
1582 Int4 uncultured_len;
1583 TaxFixItemPtr t;
1584
1585 uncultured_len = StringLen (uncultured);
1586 while (taxfix_list != NULL) {
1587 t = (TaxFixItemPtr) taxfix_list->data.ptrvalue;
1588 if (t->is_ambiguous && t->suggested_fix == NULL) {
1589 len = StringLen (t->taxname) + StringLen (domain) + uncultured_len + 2;
1590 t->suggested_fix = (CharPtr) MemNew (sizeof (Char) * len);
1591 if (StringNICmp(t->taxname, uncultured, uncultured_len) == 0) {
1592 sprintf(t->suggested_fix, "%s %s", t->taxname, domain);
1593 } else {
1594 sprintf(t->suggested_fix, "%s%s %s", uncultured, t->taxname, domain);
1595 }
1596 }
1597 taxfix_list = taxfix_list->next;
1598 }
1599 }
1600
1601
TryAmbiguousFixes(ValNodePtr taxfix_list)1602 static void TryAmbiguousFixes(ValNodePtr taxfix_list)
1603 {
1604 ValNodeBlock ambig_list;
1605 ValNodePtr vnp;
1606 TaxFixItemPtr tip;
1607
1608 /* try ambiguous values */
1609 InitValNodeBlock(&ambig_list, NULL);
1610 for (vnp = taxfix_list; vnp != NULL; vnp = vnp->next) {
1611 tip = vnp->data.ptrvalue;
1612 if (tip->is_ambiguous) {
1613 ValNodeAddPointerToEnd(&ambig_list, 0, tip);
1614 }
1615 }
1616 if (ambig_list.head != NULL) {
1617 AddAmbiguousRequests(ambig_list.head, "bacterium");
1618 CheckSuggestedFixes (ambig_list.head);
1619 AddAmbiguousRequests(ambig_list.head, "archaeon");
1620 CheckSuggestedFixes (ambig_list.head);
1621 ambig_list.head = ValNodeFree(ambig_list.head);
1622 }
1623 }
1624
1625
TryRankFix(ValNodePtr taxfix_list)1626 static void TryRankFix (ValNodePtr taxfix_list)
1627 {
1628 ValNodePtr request_list;
1629
1630 request_list = BuildRequestFromTaxnameList (taxfix_list);
1631 GetSuggestedNamesFromRank (request_list, taxfix_list, 2000);
1632 request_list = ValNodeFreeData (request_list);
1633
1634 CheckSuggestedFixes (taxfix_list);
1635 }
1636
1637
TryBinomalTruncations(ValNodePtr taxfix_list)1638 static void TryBinomalTruncations(ValNodePtr taxfix_list)
1639 {
1640 ValNodeBlock trunc_list;
1641 ValNodePtr vnp;
1642 TaxFixItemPtr tip;
1643
1644 /* only fill in binomial truncations */
1645 InitValNodeBlock(&trunc_list, NULL);
1646 for (vnp = taxfix_list; vnp != NULL; vnp = vnp->next) {
1647 tip = vnp->data.ptrvalue;
1648 if (tip->truncate_binomial) {
1649 ValNodeAddPointerToEnd(&trunc_list, 0, tip);
1650 }
1651 }
1652
1653
1654 if (trunc_list.head != NULL) {
1655 TryRankFix(trunc_list.head);
1656 trunc_list.head = ValNodeFree (trunc_list.head);
1657 }
1658 }
1659
1660
ProvideDefaultTaxFixes(ValNodePtr taxfix_list)1661 static void ProvideDefaultTaxFixes(ValNodePtr taxfix_list)
1662 {
1663 ValNodePtr vnp;
1664 TaxFixItemPtr t;
1665
1666 for (vnp = taxfix_list; vnp != NULL; vnp = vnp->next) {
1667 t = (TaxFixItemPtr) vnp->data.ptrvalue;
1668 if (t != NULL && StringHasNoText (t->suggested_fix)) {
1669 t->suggested_fix = MemFree (t->suggested_fix);
1670 t->suggested_fix = MakeUnculturedName(t->taxname, NULL);
1671 }
1672 }
1673 }
1674
1675
Taxon3GetTaxFixList(ValNodePtr biop_list)1676 NLM_EXTERN ValNodePtr Taxon3GetTaxFixList (ValNodePtr biop_list)
1677 {
1678 ValNodePtr response_list = NULL;
1679
1680 if (biop_list == NULL) {
1681 return NULL;
1682 }
1683
1684 response_list = BuildBlankTaxFixList(biop_list);
1685
1686 TryRankFix (response_list);
1687 TryBinomalTruncations (response_list);
1688 TryAmbiguousFixes (response_list);
1689
1690 TryNewSuggestedFixes (response_list, AddUnculturedIfNotPresent);
1691 TryNewSuggestedFixes (response_list, TryUnculturedAndSp);
1692 ProvideDefaultTaxFixes (response_list);
1693
1694 return response_list;
1695 }
1696
1697
Taxon3GetOrg(OrgRefPtr orp)1698 NLM_EXTERN OrgRefPtr Taxon3GetOrg (OrgRefPtr orp)
1699
1700 {
1701 Taxon3RequestPtr t3rq;
1702 Taxon3ReplyPtr t3ry;
1703 T3DataPtr tdp;
1704 OrgRefPtr t3orp = NULL;
1705 T3ReplyPtr trp;
1706 T3ErrorPtr tep;
1707
1708 if (orp == NULL) return NULL;
1709
1710 t3rq = CreateTaxon3Request (0, NULL, orp);
1711 if (t3rq == NULL) return NULL;
1712 t3ry = Tax3SynchronousQuery (t3rq);
1713 Taxon3RequestFree (t3rq);
1714 if (t3ry != NULL) {
1715 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
1716 switch (trp->choice) {
1717 case T3Reply_error :
1718 tep = (T3ErrorPtr) trp->data.ptrvalue;
1719 if (tep != NULL) {
1720 ErrPostEx (SEV_ERROR, 0, 0, tep->message);
1721 }
1722 break;
1723 case T3Reply_data :
1724 tdp = (T3DataPtr) trp->data.ptrvalue;
1725 if (tdp != NULL) {
1726 t3orp = (OrgRefPtr)(tdp->org);
1727 tdp->org = NULL;
1728 }
1729 break;
1730 default :
1731 break;
1732 }
1733 }
1734 Taxon3ReplyFree (t3ry);
1735 }
1736
1737 return t3orp;
1738 }
1739
DoOrgIdsMatch(BioSourcePtr b1,BioSourcePtr b2)1740 static Boolean DoOrgIdsMatch(BioSourcePtr b1, BioSourcePtr b2)
1741 {
1742 DbtagPtr d1 = NULL, d2 = NULL;
1743 ValNodePtr vnp;
1744
1745 if (b1 == NULL || b2 == NULL)
1746 {
1747 return FALSE;
1748 }
1749 if (b1->org == NULL || b2->org == NULL)
1750 {
1751 return FALSE;
1752 }
1753 for (vnp = b1->org->db; vnp; vnp = vnp->next)
1754 {
1755 d1 = (DbtagPtr) vnp->data.ptrvalue;
1756 if (StringCmp(d1->db, "taxon") == 0)
1757 {
1758 break;
1759 }
1760 }
1761 for (vnp = b2->org->db; vnp; vnp = vnp->next)
1762 {
1763 d2 = (DbtagPtr) vnp->data.ptrvalue;
1764 if (StringCmp(d2->db, "taxon") == 0)
1765 {
1766 break;
1767 }
1768 }
1769 if (d1 && d2)
1770 {
1771 if (d1->tag->id == d2->tag->id)
1772 {
1773 return TRUE;
1774 }
1775 }
1776 else if (StringICmp(b1->org->taxname, b2->org->taxname) == 0)
1777 {
1778 return TRUE;
1779 }
1780 return FALSE;
1781 }
1782
Tax3BioSourceMerge(BioSourcePtr host,BioSourcePtr guest)1783 static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
1784 {
1785 SubSourcePtr ssp, sp, last_ssp;
1786 OrgModPtr omp, homp, last_omp;
1787 OrgNamePtr onp;
1788
1789 if (host == NULL && guest == NULL)
1790 {
1791 return NULL;
1792 }
1793 if (host == NULL && guest != NULL)
1794 {
1795 host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead,
1796 (AsnWriteFunc) BioSourceAsnWrite);
1797 return host;
1798 }
1799 if (host != NULL && guest == NULL)
1800 {
1801 return host;
1802 }
1803 if (host->genome == 0 && guest->genome != 0)
1804 {
1805 host->genome = guest->genome;
1806 }
1807 if (host->origin == 0 && guest->origin != 0)
1808 {
1809 host->origin = guest->origin;
1810 }
1811 last_ssp = host->subtype;
1812 while (last_ssp != NULL && last_ssp->next != NULL)
1813 {
1814 last_ssp = last_ssp->next;
1815 }
1816 for (ssp = guest->subtype; ssp; ssp = ssp->next)
1817 {
1818 sp = AsnIoMemCopy(ssp, (AsnReadFunc) SubSourceAsnRead,
1819 (AsnWriteFunc) SubSourceAsnWrite);
1820 if (last_ssp == NULL)
1821 {
1822 host->subtype = sp;
1823 }
1824 else
1825 {
1826 last_ssp->next = sp;
1827 last_ssp = sp;
1828 }
1829 }
1830 if (guest->org->orgname)
1831 {
1832 if ((onp = host->org->orgname) == NULL)
1833 {
1834 onp = OrgNameNew();
1835 host->org->orgname = onp;
1836 }
1837 last_omp = onp->mod;
1838 while (last_omp != NULL && last_omp->next != NULL)
1839 {
1840 last_omp = last_omp->next;
1841 }
1842 for (omp = guest->org->orgname->mod; omp; omp = omp->next)
1843 {
1844 homp = AsnIoMemCopy(omp, (AsnReadFunc) OrgModAsnRead,
1845 (AsnWriteFunc) OrgModAsnWrite);
1846 if (last_omp == NULL)
1847 {
1848 onp->mod = homp;
1849 }
1850 else
1851 {
1852 last_omp->next = homp;
1853 last_omp = homp;
1854 }
1855 }
1856 }
1857 return host;
1858 }
1859
1860
1861 /**************************************************************************
1862 * Compare BioSources in one bioseq->descr using Taxonomy to find
1863 * their join parent
1864 * merge if organisms are the same or create a feature if different
1865 *
1866 **************************************************************************/
Tax3MergeSourceDescr(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1867 NLM_EXTERN void Tax3MergeSourceDescr (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1868 {
1869 BioseqPtr bsp = NULL;
1870 ValNodePtr vnp, newlist;
1871 SeqFeatPtr sfp;
1872 BioSourcePtr first_biop = NULL;
1873 BioSourcePtr other_biop;
1874 BioSourcePtr tmp_biop;
1875 ObjValNodePtr ovp;
1876
1877 if (!IS_Bioseq(sep)) {
1878 return;
1879 }
1880 newlist = (ValNodePtr) data;
1881 bsp = (BioseqPtr) sep->data.ptrvalue;
1882 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
1883 && (bsp->repr != Seq_repr_delta))
1884 return;
1885
1886 if (! ISA_na(bsp->mol))
1887 return;
1888
1889 /* add the descriptors in newlist to the end of the list in bsp->descr*/
1890 if (bsp->descr == NULL)
1891 {
1892 bsp->descr = newlist;
1893 }
1894 else
1895 {
1896 for (vnp = bsp->descr; vnp->next != NULL; vnp = vnp->next)
1897 {
1898 }
1899 vnp->next = newlist;
1900 }
1901
1902 /* now find the first source descriptor in bsp->descr that has an org*/
1903 /* note - we can't use SeqMgrGetNextDescriptor here because we have just
1904 * added to the descriptors, so they are not indexed. */
1905 for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
1906 {
1907 if (vnp->choice != Seq_descr_source) continue;
1908 if (vnp->data.ptrvalue == NULL)
1909 {
1910 ErrPostStr(SEV_WARNING, 0, 0, "Source descriptor missing data");
1911 if (vnp->extended)
1912 {
1913 ovp = (ObjValNodePtr) vnp;
1914 ovp->idx.deleteme = TRUE;
1915 }
1916 }
1917 if (first_biop == NULL)
1918 {
1919 first_biop = vnp->data.ptrvalue;
1920 }
1921 else
1922 {
1923 other_biop = vnp->data.ptrvalue;
1924 /* detach biosource pointer from descr, so that it will not be freed
1925 * when the descriptor is deleted.
1926 */
1927 vnp->data.ptrvalue = NULL;
1928 if (vnp->extended)
1929 {
1930 ovp = (ObjValNodePtr) vnp;
1931 ovp->idx.deleteme = TRUE;
1932 }
1933 if (DoOrgIdsMatch(first_biop, other_biop))
1934 {
1935 /* merge the two sources */
1936 tmp_biop = Tax3BioSourceMerge(first_biop, other_biop);
1937 if (tmp_biop == NULL)
1938 {
1939 ErrPostStr (SEV_WARNING, 0, 0, "Failed to merge biosources");
1940 }
1941 else
1942 {
1943 first_biop = tmp_biop;
1944 }
1945 other_biop = BioSourceFree (other_biop);
1946 } else {
1947 /* create a source feature */
1948 sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
1949 if (sfp != NULL)
1950 {
1951 sfp->data.value.ptrvalue = other_biop;
1952 }
1953 }
1954 }
1955 }
1956 return;
1957 }
1958
GetTaxIdFromOrgRef(OrgRefPtr orp)1959 static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
1960 {
1961 Int4 tax_id = -1;
1962 ValNodePtr vnp;
1963 DbtagPtr d;
1964
1965 if (orp != NULL)
1966 {
1967 for (vnp = orp->db; vnp != NULL; vnp = vnp->next)
1968 {
1969 d = (DbtagPtr) vnp->data.ptrvalue;
1970 if (StringCmp(d->db, "taxon") == 0)
1971 {
1972 tax_id = d->tag->id;
1973 break;
1974 }
1975 }
1976 }
1977 return tax_id;
1978 }
1979
Taxon3GetTaxIdByOrgRef(OrgRefPtr orp)1980 NLM_EXTERN Int4 Taxon3GetTaxIdByOrgRef (OrgRefPtr orp)
1981 {
1982 OrgRefPtr orp_repl;
1983 Int4 tax_id = -1;
1984
1985 if (orp == NULL) return -1;
1986
1987 orp_repl = Taxon3GetOrg (orp);
1988 tax_id = GetTaxIdFromOrgRef (orp_repl);
1989 OrgRefFree (orp_repl);
1990
1991 return tax_id;
1992 }
1993
Taxon3GetOrgRefByName(CharPtr orgname)1994 NLM_EXTERN OrgRefPtr Taxon3GetOrgRefByName (CharPtr orgname)
1995 {
1996 OrgRefPtr request, org;
1997
1998 request = OrgRefNew ();
1999 if (request == NULL) return NULL;
2000 request->taxname = orgname;
2001 org = Taxon3GetOrg (request);
2002 request->taxname = NULL;
2003 OrgRefFree (request);
2004 return org;
2005 }
2006
Taxon3GetTaxIdByName(CharPtr orgname)2007 NLM_EXTERN Int4 Taxon3GetTaxIdByName (CharPtr orgname)
2008 {
2009 OrgRefPtr orp;
2010 Int4 tax_id;
2011
2012 orp = Taxon3GetOrgRefByName (orgname);
2013 tax_id = GetTaxIdFromOrgRef (orp);
2014
2015 OrgRefFree(orp);
2016 return tax_id;
2017 }
2018
AddBioSourceToList(BioSourcePtr biop,Pointer userdata)2019 static void AddBioSourceToList (BioSourcePtr biop, Pointer userdata)
2020 {
2021 ValNodePtr PNTR list;
2022
2023 if (biop == NULL || userdata == NULL) return;
2024 list = (ValNodePtr PNTR) userdata;
2025 ValNodeAddPointer (list, 4, (Pointer) biop);
2026 }
2027
Taxon3ReplaceOrgInSeqEntryEx(SeqEntryPtr sep,Boolean keep_syn,Boolean replace_unpub)2028 NLM_EXTERN void Taxon3ReplaceOrgInSeqEntryEx (SeqEntryPtr sep, Boolean keep_syn, Boolean replace_unpub)
2029 {
2030 ValNodePtr biop_list = NULL;
2031 ValNodePtr request_list = NULL;
2032 ValNodePtr response_list = NULL;
2033 ValNodePtr biop_vnp, response_vnp;
2034 BioSourcePtr biop;
2035 OrgRefPtr swap_org, response_org;
2036
2037 VisitBioSourcesInSep (sep, &biop_list, AddBioSourceToList);
2038
2039 for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
2040 {
2041 biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
2042 ValNodeAddPointer (&request_list, 3, biop->org);
2043 }
2044 response_list = Taxon3GetOrgRefList (request_list);
2045
2046 if (ValNodeLen (response_list) != ValNodeLen (request_list))
2047 {
2048 Message (MSG_POST, "Unable to retrieve information from tax server");
2049 return;
2050 }
2051
2052 for (biop_vnp = biop_list, response_vnp = response_list;
2053 biop_vnp != NULL && response_vnp != NULL;
2054 biop_vnp = biop_vnp->next, response_vnp = response_vnp->next)
2055 {
2056 biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
2057 swap_org = biop->org;
2058 response_org = response_vnp->data.ptrvalue;
2059 if (response_org != NULL
2060 && (replace_unpub || !(response_vnp->choice & eReturnedOrgFlag_unpublished)))
2061 {
2062 biop->org = response_org;
2063 response_vnp->data.ptrvalue = NULL;
2064 OrgRefFree (swap_org);
2065 if (! keep_syn)
2066 {
2067 biop->org->syn = ValNodeFreeData(biop->org->syn);
2068 }
2069 }
2070 }
2071 ValNodeFree (request_list);
2072 ValNodeFree (response_list);
2073 ValNodeFree (biop_list);
2074 }
2075
2076
Taxon3ReplaceOrgInSeqEntry(SeqEntryPtr sep,Boolean keep_syn)2077 NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
2078 {
2079 Taxon3ReplaceOrgInSeqEntryEx (sep, keep_syn, TRUE);
2080 }
2081
2082
GetBioSourceFeaturesForCheck(SeqFeatPtr sfp,Pointer userdata)2083 static void GetBioSourceFeaturesForCheck (SeqFeatPtr sfp, Pointer userdata)
2084 {
2085 ValNodePtr PNTR list = (ValNodePtr PNTR) userdata;
2086 if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || list == NULL
2087 || sfp->data.value.ptrvalue == NULL) {
2088 return;
2089 }
2090 ValNodeAddPointer (list, OBJ_SEQFEAT, sfp);
2091 }
2092
2093
GetBioSourceDescriptorsForCheck(SeqDescrPtr sdp,Pointer userdata)2094 static void GetBioSourceDescriptorsForCheck (SeqDescrPtr sdp, Pointer userdata)
2095 {
2096 ValNodePtr PNTR list = (ValNodePtr PNTR) userdata;
2097 if (sdp == NULL || sdp->choice != Seq_descr_source || list == NULL
2098 || sdp->data.ptrvalue == NULL) {
2099 return;
2100 }
2101 ValNodeAddPointer (list, OBJ_SEQDESC, sdp);
2102 }
2103
2104
GetTaxonXref(OrgRefPtr org)2105 static DbtagPtr GetTaxonXref (OrgRefPtr org)
2106 {
2107 ValNodePtr vnp;
2108 DbtagPtr dbt = NULL;
2109
2110 if (org == NULL) return NULL;
2111 vnp = org->db;
2112 while (vnp != NULL && dbt == NULL) {
2113 dbt = (DbtagPtr) vnp->data.ptrvalue;
2114 if (dbt != NULL && StringICmp ((CharPtr) dbt->db, "taxon") != 0) {
2115 dbt = NULL;
2116 }
2117 vnp = vnp->next;
2118 }
2119 return dbt;
2120 }
2121
DoTaxonIdsMatch(OrgRefPtr org1,OrgRefPtr org2)2122 static Boolean DoTaxonIdsMatch (OrgRefPtr org1, OrgRefPtr org2)
2123 {
2124 DbtagPtr dbt1 = NULL, dbt2 = NULL;
2125
2126 if (org1 == NULL || org2 == NULL) return FALSE;
2127
2128 dbt1 = GetTaxonXref (org1);
2129 if (dbt1 == NULL) return FALSE;
2130 dbt2 = GetTaxonXref (org2);
2131 if (dbt2 == NULL) return FALSE;
2132
2133 return DbtagMatch(dbt1, dbt2);
2134 }
2135
2136
Taxon3CheckOrgInSeqEntry(SeqEntryPtr sep,ValNodePtr PNTR not_found,ValNodePtr PNTR bad_match)2137 NLM_EXTERN void Taxon3CheckOrgInSeqEntry (SeqEntryPtr sep, ValNodePtr PNTR not_found, ValNodePtr PNTR bad_match)
2138 {
2139 ValNodePtr request_list = NULL;
2140 ValNodePtr response_list = NULL;
2141 ValNodePtr biop_vnp, response_vnp;
2142 BioSourcePtr biop;
2143 OrgRefPtr orig_org, response_org;
2144 ValNodePtr item_list = NULL;
2145 SeqFeatPtr sfp;
2146 SeqDescrPtr sdp;
2147
2148 VisitFeaturesInSep (sep, &item_list, GetBioSourceFeaturesForCheck);
2149 VisitDescriptorsInSep (sep, &item_list, GetBioSourceDescriptorsForCheck);
2150
2151 for (biop_vnp = item_list; biop_vnp != NULL; biop_vnp = biop_vnp->next) {
2152 biop = NULL;
2153 if (biop_vnp->choice == OBJ_SEQFEAT) {
2154 sfp = (SeqFeatPtr) biop_vnp->data.ptrvalue;
2155 if (sfp != NULL) {
2156 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
2157 }
2158 } else if (biop_vnp->choice == OBJ_SEQDESC) {
2159 sdp = (SeqDescrPtr) biop_vnp->data.ptrvalue;
2160 if (sdp != NULL) {
2161 biop = (BioSourcePtr) sdp->data.ptrvalue;
2162 }
2163 }
2164 if (biop != NULL) {
2165 ValNodeAddPointer (&request_list, 3, biop->org);
2166 }
2167 }
2168
2169 response_list = Taxon3GetOrgRefList (request_list);
2170
2171 if (ValNodeLen (response_list) != ValNodeLen (request_list))
2172 {
2173 Message (MSG_POST, "Unable to retrieve information from tax server");
2174 ValNodeFree (request_list);
2175 ValNodeFree (item_list);
2176 return;
2177 }
2178
2179 for (biop_vnp = item_list, response_vnp = response_list;
2180 biop_vnp != NULL && response_vnp != NULL;
2181 biop_vnp = biop_vnp->next, response_vnp = response_vnp->next)
2182 {
2183 response_org = response_vnp->data.ptrvalue;
2184 biop = NULL;
2185 orig_org = NULL;
2186 if (biop_vnp->choice == OBJ_SEQFEAT) {
2187 sfp = (SeqFeatPtr) biop_vnp->data.ptrvalue;
2188 if (sfp != NULL) {
2189 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
2190 }
2191 } else if (biop_vnp->choice == OBJ_SEQDESC) {
2192 sdp = (SeqDescrPtr) biop_vnp->data.ptrvalue;
2193 if (sdp != NULL) {
2194 biop = (BioSourcePtr) sdp->data.ptrvalue;
2195 }
2196 }
2197 if (biop == NULL) {
2198 Message (MSG_POST, "Error collecting data");
2199 ValNodeFree (request_list);
2200 ValNodeFree (item_list);
2201 return;
2202 } else {
2203 orig_org = biop->org;
2204 if (orig_org != NULL) {
2205 if (response_org == NULL) {
2206 ValNodeAddPointer (not_found, biop_vnp->choice, biop_vnp->data.ptrvalue);
2207 } else if (StringCmp (orig_org->taxname, response_org->taxname) != 0) {
2208 ValNodeAddPointer (bad_match, biop_vnp->choice, biop_vnp->data.ptrvalue);
2209 } else if (!DoTaxonIdsMatch(orig_org, response_org)) {
2210 ValNodeAddPointer (bad_match, biop_vnp->choice, biop_vnp->data.ptrvalue);
2211 }
2212 }
2213 }
2214 OrgRefFree (response_org);
2215 }
2216 ValNodeFree (request_list);
2217 ValNodeFree (response_list);
2218 ValNodeFree (item_list);
2219 }
2220
2221
CheckTaxNamesAgainstTaxDatabase(ValNodePtr PNTR discrepancy_list,ValNodePtr sep_list)2222 NLM_EXTERN void CheckTaxNamesAgainstTaxDatabase (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
2223 {
2224 ValNodePtr vnp;
2225 SeqEntryPtr sep;
2226 SeqEntryPtr orig_scope;
2227 ValNodePtr not_found = NULL, bad_match = NULL;
2228 CharPtr bad_match_fmt = "%d tax names do not match taxonomy lookup.";
2229 CharPtr no_match_fmt = "%d organisms are not found in taxonomy lookup.";
2230 ClickableItemPtr dip;
2231
2232 if (discrepancy_list == NULL) return;
2233
2234
2235 orig_scope = SeqEntryGetScope ();
2236 for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
2237 sep = vnp->data.ptrvalue;
2238 SeqEntrySetScope (sep);
2239 Taxon3CheckOrgInSeqEntry (sep, ¬_found, &bad_match);
2240 }
2241 SeqEntrySetScope (orig_scope);
2242 if (not_found != NULL) {
2243 dip = NewClickableItem (DISC_NO_TAXLOOKUP, no_match_fmt, not_found);
2244 dip->subcategories = NULL;
2245 ValNodeAddPointer (discrepancy_list, 0, dip);
2246 }
2247 if (bad_match != NULL) {
2248 dip = NewClickableItem (DISC_BAD_TAXLOOKUP, bad_match_fmt, bad_match);
2249 dip->subcategories = NULL;
2250 ValNodeAddPointer (discrepancy_list, 0, dip);
2251 }
2252 }
2253
2254
FreeOrgRefValNodeList(ValNodePtr vnp)2255 static ValNodePtr FreeOrgRefValNodeList (ValNodePtr vnp)
2256 {
2257 ValNodePtr vnp_next;
2258 OrgRefPtr org;
2259
2260 while (vnp != NULL)
2261 {
2262 vnp_next = vnp->next;
2263 vnp->next = NULL;
2264 org = (OrgRefPtr) vnp->data.ptrvalue;
2265 vnp->data.ptrvalue = OrgRefFree (org);
2266 vnp = ValNodeFree (vnp);
2267 vnp = vnp_next;
2268 }
2269 return vnp;
2270 }
2271
2272
EndsWithSp(CharPtr str)2273 static Boolean EndsWithSp (CharPtr str)
2274 {
2275 Int4 len;
2276
2277 if (StringHasNoText (str)) return FALSE;
2278 len = StringLen (str);
2279 if (len < 4) return FALSE;
2280 if (StringCmp (str + len - 4, " sp.") == 0) return TRUE;
2281 return FALSE;
2282 }
2283
2284
RemoveSp(CharPtr orig)2285 static CharPtr RemoveSp (CharPtr orig)
2286 {
2287 CharPtr cpy = NULL;
2288 Int4 len;
2289
2290 len = StringLen (orig);
2291 if (len >= 4 && StringCmp (orig + len - 4, " sp.") == 0) {
2292 cpy = (CharPtr) MemNew (sizeof (Char) * len - 3);
2293 StringNCpy (cpy, orig, len - 4);
2294 cpy[len - 4] = 0;
2295 }
2296 return cpy;
2297 }
2298
2299
AddRequestOrgForString(CharPtr str,CharPtr host,ValNodePtr PNTR request_list,ValNodePtr PNTR req_host_list)2300 static void AddRequestOrgForString (CharPtr str, CharPtr host, ValNodePtr PNTR request_list, ValNodePtr PNTR req_host_list)
2301 {
2302 OrgRefPtr request_org;
2303 CharPtr cp, cpy;
2304 CharPtr truncated_host;
2305
2306 if (StringHasNoText (str) || host == NULL || request_list == NULL || req_host_list == NULL)
2307 {
2308 return;
2309 }
2310 truncated_host = StringSave (str);
2311 cp = StringChr(truncated_host, ';');
2312 if (cp != NULL) {
2313 *cp = 0;
2314 }
2315
2316 /* if ends with " sp.", remove " sp." */
2317 cpy = RemoveSp (truncated_host);
2318 if (cpy != NULL) {
2319 request_org = OrgRefNew();
2320 request_org->taxname = StringSave (cpy);
2321 ValNodeAddPointer (request_list, 3, request_org);
2322 ValNodeAddPointer (req_host_list, 0, StringSave (host));
2323 } else {
2324 request_org = OrgRefNew();
2325 request_org->taxname = StringSave (truncated_host);
2326 ValNodeAddPointer (request_list, 3, request_org);
2327 ValNodeAddPointer (req_host_list, 0, StringSave (host));
2328
2329
2330 /* if more than one word, try chopping off last to see if abbreviated name looks up */
2331 cp = StringRChr (truncated_host, ' ');
2332 if (cp != NULL)
2333 {
2334 cpy = StringSave (truncated_host);
2335 cp = StringRChr (cpy, ' ');
2336 if (cp != NULL)
2337 {
2338 *cp = 0;
2339 AddRequestOrgForString (cpy, host, request_list, req_host_list);
2340 }
2341 cpy = MemFree (cpy);
2342 }
2343 }
2344 truncated_host = MemFree (truncated_host);
2345 }
2346
2347 typedef struct specifichostcheck {
2348 CharPtr spec_host;
2349 ValNodePtr request_list; /* ValNodeList of orgs */
2350 ValNodePtr response_list; /* ValNodeList of orgs */
2351 ValNodePtr biop_list; /* ValNodeList of sources with this spec_host value */
2352 } SpecificHostCheckData, PNTR SpecificHostCheckPtr;
2353
2354
SpecificHostCheckListFree(ValNodePtr vnp)2355 static ValNodePtr SpecificHostCheckListFree (ValNodePtr vnp)
2356 {
2357 ValNodePtr vnp_next;
2358 SpecificHostCheckPtr p;
2359
2360 while (vnp != NULL)
2361 {
2362 vnp_next = vnp->next;
2363 vnp->next = NULL;
2364 p = (SpecificHostCheckPtr) vnp->data.ptrvalue;
2365 if (p != NULL)
2366 {
2367 p->request_list = FreeOrgRefValNodeList (p->request_list);
2368 p->response_list = FreeOrgRefValNodeList (p->response_list);
2369 p->spec_host = MemFree (p->spec_host);
2370 p->biop_list = ValNodeFree (p->biop_list);
2371 }
2372 vnp = ValNodeFreeData (vnp);
2373 vnp = vnp_next;
2374 }
2375 return vnp;
2376 }
2377
2378
SortSpecificHostOrgs(ValNodePtr host_list,ValNodePtr request_list,ValNodePtr response_list)2379 static ValNodePtr SortSpecificHostOrgs (ValNodePtr host_list, ValNodePtr request_list, ValNodePtr response_list)
2380 {
2381 ValNodePtr check_list = NULL;
2382 SpecificHostCheckPtr p = NULL;
2383 CharPtr host, prev_host = NULL;
2384
2385 while (host_list != NULL
2386 && request_list != NULL
2387 && response_list != NULL)
2388 {
2389 host = (CharPtr) host_list->data.ptrvalue;
2390 if (StringCmp (host, prev_host) != 0)
2391 {
2392 p = (SpecificHostCheckPtr) MemNew (sizeof (SpecificHostCheckData));
2393 p->spec_host = StringSave (host);
2394 ValNodeAddPointer (&check_list, 0, p);
2395 prev_host = host;
2396 }
2397 ValNodeAddPointer (&(p->request_list), request_list->choice, request_list->data.ptrvalue);
2398 ValNodeAddPointer (&(p->response_list), response_list->choice, response_list->data.ptrvalue);
2399 request_list->data.ptrvalue = NULL;
2400 response_list->data.ptrvalue = NULL;
2401 host_list = host_list->next;
2402 request_list = request_list->next;
2403 response_list = response_list->next;
2404 }
2405 return check_list;
2406 }
2407
2408
StringAlreadyInValNodeList(CharPtr str,ValNodePtr list)2409 static Boolean StringAlreadyInValNodeList (CharPtr str, ValNodePtr list)
2410 {
2411 if (StringHasNoText (str))
2412 {
2413 return TRUE;
2414 }
2415
2416 while (list != NULL)
2417 {
2418 if (StringCmp (str, list->data.ptrvalue) == 0)
2419 {
2420 return TRUE;
2421 }
2422 list = list->next;
2423 }
2424 return FALSE;
2425 }
2426
2427
GetBioSourceFromValNode(ValNodePtr vnp)2428 static BioSourcePtr GetBioSourceFromValNode (ValNodePtr vnp)
2429 {
2430 SeqFeatPtr sfp;
2431 SeqDescrPtr sdp;
2432 BioSourcePtr biop = NULL;
2433
2434 if (vnp == NULL || vnp->data.ptrvalue == NULL) return NULL;
2435
2436 if (vnp->choice == OBJ_SEQFEAT)
2437 {
2438 sfp = (SeqFeatPtr) vnp->data.ptrvalue;
2439 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
2440 }
2441 else if (vnp->choice == OBJ_SEQDESC)
2442 {
2443 sdp = (SeqDescrPtr) vnp->data.ptrvalue;
2444 biop = (BioSourcePtr) sdp->data.ptrvalue;
2445 }
2446 return biop;
2447 }
2448
2449
2450 static CharPtr extract_list[] = {
2451 "cf.",
2452 "cf ",
2453 "aff ",
2454 "aff.",
2455 "near",
2456 "nr.",
2457 "nr ",
2458 NULL};
2459
AdjustSpecificHostForTaxServer(CharPtr spec_host)2460 static void AdjustSpecificHostForTaxServer (CharPtr spec_host)
2461 {
2462 CharPtr cp, src, dst;
2463 Int4 i;
2464
2465 /* ignore separator words */
2466 for (i = 0; extract_list[i] != NULL; i++) {
2467 if ((cp = StringSearch (spec_host, extract_list[i])) != NULL && cp > spec_host && isspace (*(cp - 1))) {
2468 src = cp + StringLen (extract_list[i]);
2469 dst = cp;
2470 while (isspace (*src)) {
2471 src++;
2472 }
2473 while (*src != 0) {
2474 *dst = *src;
2475 dst++;
2476 src++;
2477 }
2478 *dst = 0;
2479 }
2480 }
2481 }
2482
2483
AddBioSourcesToSpecificHostChecklist(ValNodePtr biop_list,ValNodePtr check_list)2484 static void AddBioSourcesToSpecificHostChecklist (ValNodePtr biop_list, ValNodePtr check_list)
2485 {
2486 ValNodePtr biop_vnp, last_vnp = NULL, stop_search;
2487 BioSourcePtr biop;
2488 OrgModPtr mod;
2489 SpecificHostCheckPtr p;
2490 CharPtr tmp;
2491
2492 if (biop_list == NULL || check_list == NULL) return;
2493
2494 for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
2495 {
2496
2497 biop = GetBioSourceFromValNode (biop_vnp);
2498 if (biop == NULL) continue;
2499
2500 if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) continue;
2501 mod = biop->org->orgname->mod;
2502 while (mod != NULL)
2503 {
2504 if (mod->subtype == ORGMOD_nat_host
2505 && !StringHasNoText (mod->subname))
2506 {
2507 if (last_vnp == NULL)
2508 {
2509 last_vnp = check_list;
2510 stop_search = NULL;
2511 }
2512 else
2513 {
2514 stop_search = last_vnp;
2515 }
2516 tmp = StringSave (mod->subname);
2517 AdjustSpecificHostForTaxServer (tmp);
2518 p = NULL;
2519 while (last_vnp != NULL
2520 && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
2521 && StringCmp (p->spec_host, tmp) != 0)
2522 {
2523 p = NULL;
2524 last_vnp = last_vnp->next;
2525 }
2526 if (p == NULL && stop_search != NULL)
2527 {
2528 last_vnp = check_list;
2529 while (last_vnp != stop_search
2530 && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
2531 && StringCmp (p->spec_host, tmp) != 0)
2532 {
2533 p = NULL;
2534 last_vnp = last_vnp->next;
2535 }
2536 }
2537 tmp = MemFree (tmp);
2538 if (p != NULL)
2539 {
2540 ValNodeAddPointer (&(p->biop_list), biop_vnp->choice, biop_vnp->data.ptrvalue);
2541 }
2542 }
2543 mod = mod->next;
2544 }
2545 }
2546 }
2547
2548
ShouldCheckSpecificHostValueForValidator(CharPtr spec_host)2549 static Boolean ShouldCheckSpecificHostValueForValidator (CharPtr spec_host)
2550 {
2551 CharPtr semicolon, space;
2552 if (StringHasNoText (spec_host) || !isupper (*spec_host) || (space = StringChr(spec_host, ' ')) == NULL) {
2553 return FALSE;
2554 } else {
2555 semicolon = StringChr(spec_host, ';');
2556 if (semicolon != NULL && semicolon < space) {
2557 return FALSE;
2558 } else {
2559 return TRUE;
2560 }
2561 }
2562 }
2563
GetSpecificHostValueToCheckForValidator(CharPtr spec_host)2564 static CharPtr GetSpecificHostValueToCheckForValidator (CharPtr spec_host)
2565 {
2566 CharPtr cp, check_val = NULL;
2567 Int4 len = 0;
2568
2569 if (ShouldCheckSpecificHostValueForValidator(spec_host)) {
2570 cp = spec_host;
2571 /* skip first word */
2572 while (*cp != 0 && !isspace (*cp)) {
2573 cp++;
2574 len++;
2575 }
2576 while (isspace (*cp)) {
2577 cp++;
2578 len++;
2579 }
2580
2581 /* if next word is "hybrid" or "x" then want this word plus the third word */
2582 if (StringNCmp (cp, "hybrid ", 7) == 0) {
2583 cp += 7;
2584 len += 7;
2585 } else if (StringNCmp (cp, "x ", 2) == 0) {
2586 cp += 2;
2587 len += 2;
2588 }
2589
2590 if (*cp != '(' && StringNCmp (cp, "sp.", 3) != 0 && *cp != 0) {
2591 /* collect second word */
2592 while (*cp != 0 && !isspace (*cp)) {
2593 cp++;
2594 len++;
2595 }
2596 }
2597
2598 cp = StringChr(spec_host, ';');
2599 if (cp != NULL && cp - spec_host < len) {
2600 len = cp - spec_host;
2601 }
2602
2603 check_val = (CharPtr) MemNew (sizeof (Char) * (len + 1));
2604 StringNCpy (check_val, spec_host, len);
2605 check_val[len] = 0;
2606 TrimSpacesAroundString (check_val);
2607 }
2608 return check_val;
2609 }
2610
ShouldCheckSpecificHostInBioSource(BioSourcePtr biop)2611 static Boolean ShouldCheckSpecificHostInBioSource (BioSourcePtr biop)
2612 {
2613 OrgModPtr mod;
2614 Boolean rval = FALSE;
2615
2616 if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) {
2617 return FALSE;
2618 }
2619 for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
2620 if (mod->subtype == ORGMOD_nat_host) {
2621 rval = ShouldCheckSpecificHostValueForValidator (mod->subname);
2622 }
2623 }
2624 return rval;
2625 }
2626
2627
2628
AddValidatorSpecificHostBioSourceFeatToList(SeqFeatPtr sfp,Pointer userdata)2629 static void AddValidatorSpecificHostBioSourceFeatToList (SeqFeatPtr sfp, Pointer userdata)
2630 {
2631 if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
2632
2633 if (ShouldCheckSpecificHostInBioSource (sfp->data.value.ptrvalue))
2634 {
2635 ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp);
2636 }
2637 }
2638
2639
AddValidatorSpecificHostBioSourceDescToList(SeqDescrPtr sdp,Pointer userdata)2640 static void AddValidatorSpecificHostBioSourceDescToList (SeqDescrPtr sdp, Pointer userdata)
2641 {
2642 if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
2643
2644 if (ShouldCheckSpecificHostInBioSource (sdp->data.ptrvalue))
2645 {
2646 ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp);
2647 }
2648 }
2649
2650
GetValidatorSpecificHostBioSourceList(SeqEntryPtr sep)2651 static ValNodePtr GetValidatorSpecificHostBioSourceList (SeqEntryPtr sep)
2652 {
2653 ValNodePtr list = NULL;
2654
2655 VisitFeaturesInSep (sep, &list, AddValidatorSpecificHostBioSourceFeatToList);
2656 VisitDescriptorsInSep (sep, &list, AddValidatorSpecificHostBioSourceDescToList);
2657 return list;
2658 }
2659
2660
2661 static void
FormatValidatorSpecificHostRequests(ValNodePtr spec_host_list,ValNodePtr PNTR request_list,ValNodePtr PNTR req_host_list)2662 FormatValidatorSpecificHostRequests
2663 (ValNodePtr spec_host_list,
2664 ValNodePtr PNTR request_list,
2665 ValNodePtr PNTR req_host_list)
2666 {
2667 ValNodePtr vnp;
2668 CharPtr orig;
2669 OrgRefPtr request_org;
2670
2671 /* now format requests for unique specific_host values */
2672 for (vnp = spec_host_list; vnp != NULL; vnp = vnp->next)
2673 {
2674 orig = (CharPtr) vnp->data.ptrvalue;
2675 request_org = OrgRefNew();
2676 request_org->taxname = GetSpecificHostValueToCheckForValidator (orig);
2677 ValNodeAddPointer (request_list, 3, request_org);
2678 ValNodeAddPointer (req_host_list, 0, StringSave (orig));
2679 }
2680 }
2681
MatchesSynonym(CharPtr txt,OrgRefPtr response_org)2682 static Boolean MatchesSynonym (CharPtr txt, OrgRefPtr response_org)
2683 {
2684 ValNodePtr syn;
2685 Boolean rval = FALSE;
2686 if (StringHasNoText (txt) || response_org == NULL) return FALSE;
2687
2688 for (syn = response_org->syn; syn != NULL && !rval; syn = syn->next)
2689 {
2690 if (StringCmp (txt, syn->data.ptrvalue) == 0)
2691 {
2692 rval = TRUE;
2693 }
2694 }
2695 return rval;
2696 }
2697
2698
MatchesGenBankSynonym(CharPtr txt,OrgRefPtr response_org)2699 static Boolean MatchesGenBankSynonym (CharPtr txt, OrgRefPtr response_org)
2700 {
2701 OrgModPtr mod;
2702 Boolean rval = FALSE;
2703
2704 if (StringHasNoText (txt) || response_org == NULL || response_org->orgname == NULL) return FALSE;
2705 mod = response_org->orgname->mod;
2706 while (mod != NULL)
2707 {
2708 if ((mod->subtype == ORGMOD_gb_synonym || mod->subtype == ORGMOD_old_name) && StringCmp (txt, mod->subname) == 0)
2709 {
2710 rval = TRUE;
2711 }
2712 mod = mod->next;
2713 }
2714 return rval;
2715 }
2716
2717
GetListOfUniqueSpecificHostValues(ValNodePtr biop_list)2718 static ValNodePtr GetListOfUniqueSpecificHostValues (ValNodePtr biop_list)
2719 {
2720 ValNodePtr biop_vnp;
2721 BioSourcePtr biop;
2722 OrgModPtr mod;
2723 ValNodePtr spec_host_list = NULL;
2724 CharPtr tmp;
2725
2726 /* get a list of unique specific_host values */
2727 for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
2728 {
2729 if (biop_vnp->data.ptrvalue == NULL) continue;
2730 biop = GetBioSourceFromValNode (biop_vnp);
2731 if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) continue;
2732 mod = biop->org->orgname->mod;
2733 while (mod != NULL)
2734 {
2735 if (mod->subtype == ORGMOD_nat_host
2736 && !StringHasNoText (mod->subname))
2737 {
2738 tmp = StringSave (mod->subname);
2739 AdjustSpecificHostForTaxServer (tmp);
2740 ValNodeAddPointer (&spec_host_list, 0, tmp);
2741 }
2742 mod = mod->next;
2743 }
2744 }
2745 spec_host_list = ValNodeSort (spec_host_list, SortVnpByString);
2746 ValNodeUnique (&spec_host_list, SortVnpByString, ValNodeFreeData);
2747 return spec_host_list;
2748 }
2749
2750
FindMatchInOrgRef(CharPtr str,OrgRefPtr org)2751 static CharPtr FindMatchInOrgRef (CharPtr str, OrgRefPtr org)
2752 {
2753 ValNodePtr syn;
2754 OrgModPtr mod;
2755 CharPtr rval = NULL;
2756
2757 if (StringHasNoText (str) || org == NULL) {
2758 rval = NULL;
2759 } else if (StringICmp (org->taxname, str) == 0) {
2760 rval = org->taxname;
2761 } else if (StringICmp (org->common, str) == 0) {
2762 rval = org->common;
2763 } else {
2764 for (syn = org->syn; syn != NULL && rval == NULL; syn = syn->next) {
2765 if (StringICmp (str, (CharPtr)(syn->data.ptrvalue)) == 0) {
2766 rval = (CharPtr)(syn->data.ptrvalue);
2767 }
2768 }
2769 if (org->orgname != NULL) {
2770 for (mod = org->orgname->mod; mod != NULL && rval == NULL; mod = mod->next) {
2771 if ((mod->subtype == ORGMOD_gb_synonym || mod->subtype == ORGMOD_old_name)
2772 && StringICmp (str, mod->subname) == 0) {
2773 rval = mod->subname;
2774 }
2775 }
2776 }
2777 }
2778 return rval;
2779 }
2780
2781
2782 /* Want to check that specific host names are valid */
2783 NLM_EXTERN void
Taxon3ValidateSpecificHostsInSeqEntry(SeqEntryPtr sep,ValNodePtr PNTR misspelled_list,ValNodePtr PNTR bad_caps_list,ValNodePtr PNTR ambiguous_list,ValNodePtr PNTR unrecognized_list)2784 Taxon3ValidateSpecificHostsInSeqEntry
2785 (SeqEntryPtr sep,
2786 ValNodePtr PNTR misspelled_list,
2787 ValNodePtr PNTR bad_caps_list,
2788 ValNodePtr PNTR ambiguous_list,
2789 ValNodePtr PNTR unrecognized_list)
2790 {
2791 ValNodePtr biop_list = NULL;
2792 ValNodePtr req_host_list = NULL, spec_host_list = NULL;
2793 ValNodePtr request_list = NULL;
2794 ValNodePtr response_list = NULL;
2795 ValNodePtr response_vnp, request_vnp;
2796 ValNodePtr check_list, check_vnp;
2797 OrgRefPtr request_org, response_org;
2798 SpecificHostCheckPtr p;
2799 Boolean has_match;
2800 ErrSev level;
2801 Boolean misspelled_flag;
2802 Boolean bad_caps_flag;
2803 Boolean ambiguous_flag;
2804 CharPtr match;
2805
2806 biop_list = GetValidatorSpecificHostBioSourceList (sep);
2807
2808 /* get a list of unique specific_host values */
2809 spec_host_list = GetListOfUniqueSpecificHostValues (biop_list);
2810
2811 /* now format requests for unique specific_host values */
2812 FormatValidatorSpecificHostRequests (spec_host_list, &request_list, &req_host_list);
2813
2814 spec_host_list = ValNodeFreeData (spec_host_list);
2815
2816 level = ErrSetMessageLevel (SEV_MAX);
2817 response_list = Taxon3GetOrgRefList (request_list);
2818 ErrSetMessageLevel (level);
2819
2820 if (ValNodeLen (response_list) != ValNodeLen (request_list))
2821 {
2822 Message (MSG_POST, "Unable to retrieve information from tax server");
2823 }
2824 else
2825 {
2826 /* resort requests so that we can check all responses for the same BioSource together */
2827 check_list = SortSpecificHostOrgs (req_host_list, request_list, response_list);
2828 AddBioSourcesToSpecificHostChecklist (biop_list, check_list);
2829
2830 /* now look at responses */
2831 check_vnp = check_list;
2832 while (check_vnp != NULL)
2833 {
2834 p = (SpecificHostCheckPtr) check_vnp->data.ptrvalue;
2835 if (p != NULL)
2836 {
2837 has_match = FALSE;
2838 misspelled_flag = FALSE;
2839 bad_caps_flag = FALSE;
2840 ambiguous_flag = FALSE;
2841
2842 request_vnp = p->request_list;
2843 response_vnp = p->response_list;
2844 while (!has_match && request_vnp != NULL && response_vnp != NULL)
2845 {
2846 request_org = (OrgRefPtr) request_vnp->data.ptrvalue;
2847 response_org = (OrgRefPtr) response_vnp->data.ptrvalue;
2848 if (response_vnp->choice & eReturnedOrgFlag_misspelled)
2849 {
2850 misspelled_flag = TRUE;
2851 }
2852 else if (response_vnp->choice & eReturnedOrgFlag_ambiguous)
2853 {
2854 ambiguous_flag = TRUE;
2855 }
2856 else
2857 {
2858 match = FindMatchInOrgRef (request_org->taxname, response_org);
2859 if (StringCmp (match, request_org->taxname) == 0)
2860 {
2861 has_match = TRUE;
2862 }
2863 else if (StringICmp (match, request_org->taxname) == 0)
2864 {
2865 if (response_vnp->choice & eReturnedOrgFlag_common_name) {
2866 has_match = TRUE;
2867 } else {
2868 bad_caps_flag = TRUE;
2869 }
2870 }
2871 }
2872 request_vnp = request_vnp->next;
2873 response_vnp = response_vnp->next;
2874 }
2875 if (!has_match)
2876 {
2877 /* add to the list of bad */
2878 if (misspelled_flag) {
2879 if (misspelled_list != NULL) {
2880 ValNodeLink (misspelled_list, p->biop_list);
2881 p->biop_list = NULL;
2882 }
2883 } else if (bad_caps_flag) {
2884 if (bad_caps_list != NULL) {
2885 ValNodeLink (bad_caps_list, p->biop_list);
2886 p->biop_list = NULL;
2887 }
2888 } else if (ambiguous_flag) {
2889 if (ambiguous_list != NULL) {
2890 ValNodeLink (ambiguous_list, p->biop_list);
2891 p->biop_list = NULL;
2892 }
2893 } else {
2894 if (unrecognized_list != NULL) {
2895 ValNodeLink (unrecognized_list, p->biop_list);
2896 p->biop_list = NULL;
2897 }
2898 }
2899 }
2900 }
2901 check_vnp = check_vnp->next;
2902 }
2903 check_list = SpecificHostCheckListFree (check_list);
2904 }
2905
2906 biop_list = ValNodeFree (biop_list);
2907 request_list = FreeOrgRefValNodeList (request_list);
2908 response_list = FreeOrgRefValNodeList (response_list);
2909 req_host_list = ValNodeFreeData (req_host_list);
2910 }
2911
2912
2913 typedef struct spechostgather {
2914 ValNodePtr list;
2915 Boolean caps; /* if true, check only when first letter of first word is capitalized */
2916 Boolean paren; /* if true, check portion inside parentheses as separate string */
2917 } SpecHostGatherData, PNTR SpecHostGatherPtr;
2918
2919
ShouldCheckSpecificHostString(CharPtr str,SpecHostGatherPtr p)2920 static Boolean ShouldCheckSpecificHostString (CharPtr str, SpecHostGatherPtr p)
2921 {
2922 CharPtr cp_start;
2923 Boolean rval = FALSE;
2924
2925 if (StringHasNoText (str) || p == NULL) {
2926 return FALSE;
2927 }
2928
2929 if (!p->caps) {
2930 rval = TRUE;
2931 } else if (isupper (*str)) {
2932 rval = TRUE;
2933 } else if (p->paren) {
2934 cp_start = StringChr (str, '(');
2935 if (cp_start != NULL && ShouldCheckSpecificHostString (cp_start + 1, p)) {
2936 rval = TRUE;
2937 }
2938 }
2939 return rval;
2940 }
2941
2942
HasSpecificHostToBeChecked(BioSourcePtr biop,SpecHostGatherPtr p)2943 static Boolean HasSpecificHostToBeChecked (BioSourcePtr biop, SpecHostGatherPtr p)
2944 {
2945 OrgModPtr mod;
2946 Boolean rval = FALSE;
2947
2948 if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL || p == NULL) return FALSE;
2949
2950 for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
2951 if (mod->subtype == ORGMOD_nat_host && ShouldCheckSpecificHostString (mod->subname, p)) {
2952 rval = TRUE;
2953 }
2954 }
2955 return rval;
2956 }
2957
2958
AddSpecificHostBioSourceFeatToList(SeqFeatPtr sfp,Pointer userdata)2959 static void AddSpecificHostBioSourceFeatToList (SeqFeatPtr sfp, Pointer userdata)
2960 {
2961 SpecHostGatherPtr p;
2962
2963 if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
2964
2965 p = (SpecHostGatherPtr) userdata;
2966 if (HasSpecificHostToBeChecked (sfp->data.value.ptrvalue, p))
2967 {
2968 ValNodeAddPointer (&(p->list), OBJ_SEQFEAT, sfp);
2969 }
2970 }
2971
2972
AddSpecificHostBioSourceDescToList(SeqDescrPtr sdp,Pointer userdata)2973 static void AddSpecificHostBioSourceDescToList (SeqDescrPtr sdp, Pointer userdata)
2974 {
2975 SpecHostGatherPtr p;
2976
2977 if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
2978
2979 p = (SpecHostGatherPtr) userdata;
2980 if (HasSpecificHostToBeChecked (sdp->data.ptrvalue, p))
2981 {
2982 ValNodeAddPointer (&(p->list), OBJ_SEQDESC, sdp);
2983 }
2984 }
2985
2986
GetSpecificHostBioSourceList(SeqEntryPtr sep,Boolean caps,Boolean paren)2987 static ValNodePtr GetSpecificHostBioSourceList (SeqEntryPtr sep, Boolean caps, Boolean paren)
2988 {
2989 SpecHostGatherData d;
2990
2991 d.caps = caps;
2992 d.paren = paren;
2993 d.list = NULL;
2994 VisitFeaturesInSep (sep, &d, AddSpecificHostBioSourceFeatToList);
2995 VisitDescriptorsInSep (sep, &d, AddSpecificHostBioSourceDescToList);
2996 return d.list;
2997 }
2998
2999
3000 static void
FormatSpecificHostRequests(ValNodePtr spec_host_list,ValNodePtr PNTR request_list,ValNodePtr PNTR req_host_list,Boolean caps,Boolean paren)3001 FormatSpecificHostRequests
3002 (ValNodePtr spec_host_list,
3003 ValNodePtr PNTR request_list,
3004 ValNodePtr PNTR req_host_list,
3005 Boolean caps,
3006 Boolean paren)
3007 {
3008 ValNodePtr vnp;
3009 CharPtr orig, cp, str, cp2 = NULL;
3010
3011 /* now format requests for unique specific_host values */
3012 for (vnp = spec_host_list; vnp != NULL; vnp = vnp->next)
3013 {
3014 orig = (CharPtr) vnp->data.ptrvalue;
3015 /* if we have a value in parentheses, submit it separately */
3016 cp = StringChr (orig, '(');
3017 if (cp != NULL)
3018 {
3019 cp2 = StringChr (cp, ')');
3020 }
3021 if (cp != NULL && cp2 != NULL
3022 && ((cp > orig && orig[StringLen (orig) - 1] == ')') /* ends with paren */
3023 || (cp == orig))) /* starts with paren */
3024 {
3025 if (cp > orig && orig[StringLen (orig) - 1] == ')')
3026 {
3027 str = StringSave (orig);
3028 /* remove trailing parenthesis */
3029 str [StringLen(str) - 1] = 0;
3030
3031 cp = str + (cp - orig);
3032
3033 /* remove opening parenthesis */
3034 *cp = 0;
3035 cp++;
3036 }
3037 else
3038 {
3039 str = StringSave (orig);
3040 /* remove leading parenthesis */
3041 str[0] = ' ';
3042 cp = str + (cp2 - orig);
3043 /* remove trailing parenthesis */
3044 *cp = 0;
3045 cp++;
3046 }
3047 TrimSpacesAroundString (cp);
3048 TrimSpacesAroundString (str);
3049 if (paren && (!caps || isupper (*cp))) {
3050 AddRequestOrgForString (cp, orig, request_list, req_host_list);
3051 }
3052 if (!caps || isupper (*str)) {
3053 AddRequestOrgForString (str, orig, request_list, req_host_list);
3054 }
3055 }
3056 else
3057 {
3058 if (!caps || isupper (*orig)) {
3059 AddRequestOrgForString (orig, orig, request_list, req_host_list);
3060 }
3061 }
3062 }
3063 }
3064
3065
3066 typedef struct replacementpair {
3067 CharPtr find;
3068 CharPtr repl;
3069 } ReplacementPairData, PNTR ReplacementPairPtr;
3070
ReplacementPairNew(CharPtr find,CharPtr repl)3071 static ReplacementPairPtr ReplacementPairNew (CharPtr find, CharPtr repl)
3072 {
3073 ReplacementPairPtr r;
3074
3075 r = (ReplacementPairPtr) MemNew (sizeof (ReplacementPairData));
3076 r->find = StringSave (find);
3077 r->repl = StringSave (repl);
3078 return r;
3079 }
3080
ReplacementPairFree(ReplacementPairPtr r)3081 static ReplacementPairPtr ReplacementPairFree (ReplacementPairPtr r)
3082 {
3083 if (r != NULL) {
3084 r->find = MemFree (r->find);
3085 r->repl = MemFree (r->repl);
3086 r = MemFree (r);
3087 }
3088 return r;
3089 }
3090
ReplacementPairListFree(ValNodePtr list)3091 static ValNodePtr ReplacementPairListFree (ValNodePtr list)
3092 {
3093 ValNodePtr list_next;
3094
3095 while (list != NULL) {
3096 list_next = list->next;
3097 list->next = NULL;
3098 list->data.ptrvalue = ReplacementPairFree (list->data.ptrvalue);
3099 list = ValNodeFree (list);
3100 list = list_next;
3101 }
3102 return list;
3103 }
3104
3105
3106 static SpecificHostFixPtr
SpecificHostFixNew(ValNodePtr feat_or_desc,CharPtr bad_host,CharPtr old_taxname,CharPtr new_taxname,Uint1 fix_type)3107 SpecificHostFixNew
3108 (ValNodePtr feat_or_desc,
3109 CharPtr bad_host,
3110 CharPtr old_taxname,
3111 CharPtr new_taxname,
3112 Uint1 fix_type)
3113 {
3114 SpecificHostFixPtr s;
3115
3116 s = (SpecificHostFixPtr) MemNew (sizeof (SpecificHostFixData));
3117 if (feat_or_desc != NULL)
3118 {
3119 s->feat_or_desc = ValNodeNew(NULL);
3120 s->feat_or_desc->choice = feat_or_desc->choice;
3121 s->feat_or_desc->data.ptrvalue = feat_or_desc->data.ptrvalue;
3122 }
3123 s->bad_specific_host = StringSave (bad_host);
3124 s->old_taxname = StringSave (old_taxname);
3125 s->new_taxname = StringSave (new_taxname);
3126 s->fix_type = fix_type;
3127 return s;
3128 }
3129
3130
SpecificHostFixFree(SpecificHostFixPtr s)3131 static SpecificHostFixPtr SpecificHostFixFree (SpecificHostFixPtr s)
3132 {
3133 if (s != NULL)
3134 {
3135 s->feat_or_desc = ValNodeFree (s->feat_or_desc);
3136 s->bad_specific_host = MemFree (s->bad_specific_host);
3137 s->old_taxname = MemFree (s->old_taxname);
3138 s->new_taxname = MemFree (s->new_taxname);
3139 s = MemFree (s);
3140 }
3141 return s;
3142 }
3143
3144
SpecificHostFixListFree(ValNodePtr vnp)3145 extern ValNodePtr SpecificHostFixListFree (ValNodePtr vnp)
3146 {
3147 ValNodePtr vnp_next;
3148
3149 while (vnp != NULL)
3150 {
3151 vnp_next = vnp->next;
3152 vnp->next = NULL;
3153 vnp->data.ptrvalue = SpecificHostFixFree (vnp->data.ptrvalue);
3154 vnp = ValNodeFree (vnp);
3155 vnp = vnp_next;
3156 }
3157 return vnp;
3158 }
3159
3160
GetFixesForOneSpecificHostValue(SpecificHostCheckPtr p)3161 static ValNodePtr GetFixesForOneSpecificHostValue (SpecificHostCheckPtr p)
3162 {
3163 CharPtr prev_success = NULL, new_val, prev_fail = NULL;
3164 Boolean fix_needed = FALSE;
3165 ValNodePtr suggested_fixes = NULL;
3166 OrgRefPtr request_org, response_org;
3167 ValNodePtr biop_vnp, response_vnp, request_vnp, vnp;
3168 SpecificHostFixPtr s;
3169 ValNodePtr fix_list = NULL;
3170 ReplacementPairPtr r;
3171 Uint1 fix_type;
3172 Boolean add_nontrunc_fix;
3173 Boolean ambiguous = FALSE;
3174
3175 if (p == NULL) return NULL;
3176
3177 request_vnp = p->request_list;
3178 response_vnp = p->response_list;
3179
3180 while (request_vnp != NULL && response_vnp != NULL)
3181 {
3182 request_org = (OrgRefPtr) request_vnp->data.ptrvalue;
3183 response_org = (OrgRefPtr) response_vnp->data.ptrvalue;
3184 if (prev_success != NULL
3185 && StringNCmp (request_org->taxname, prev_success, StringLen (request_org->taxname)) == 0) {
3186 /* we don't need to check this one */
3187 } else if (response_org == NULL) {
3188 fix_needed = TRUE;
3189 if (response_vnp->choice & eReturnedOrgFlag_ambiguous) {
3190 ambiguous = TRUE;
3191 }
3192 if (prev_fail == NULL) {
3193 prev_fail = request_org->taxname;
3194 } else if (StringNCmp (prev_fail, request_org->taxname, StringLen (request_org->taxname)) != 0) {
3195 if (response_vnp->choice & eReturnedOrgFlag_ambiguous) {
3196 ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_ambiguous, ReplacementPairNew (request_org->taxname, NULL));
3197 } else {
3198 ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_unrecognized, ReplacementPairNew (request_org->taxname, NULL));
3199 }
3200 prev_fail = request_org->taxname;
3201 }
3202 } else {
3203 prev_success = request_org->taxname;
3204 add_nontrunc_fix = FALSE;
3205 if (response_vnp->choice & eReturnedOrgFlag_misspelled) {
3206 fix_needed = TRUE;
3207 fix_type = eSpecificHostFix_spelling;
3208 new_val = response_org->taxname;
3209 add_nontrunc_fix = TRUE;
3210 } else {
3211 new_val = FindMatchInOrgRef (request_org->taxname, response_org);
3212 if (new_val == NULL) {
3213 fix_needed = TRUE;
3214 fix_type = eSpecificHostFix_replacement;
3215 new_val = response_org->taxname;
3216 add_nontrunc_fix = TRUE;
3217 } else if (StringCmp (new_val, request_org->taxname) != 0) {
3218 fix_needed = TRUE;
3219 fix_type = eSpecificHostFix_capitalization;
3220 add_nontrunc_fix = TRUE;
3221 }
3222 }
3223
3224 /* add fix to truncate and correct spelling and capitalization first */
3225 /* this way the truncation won't fail when it looks for the old version that's already been corrected */
3226 if (prev_fail != NULL) {
3227 if (StringNCmp (prev_fail, request_org->taxname, StringLen (request_org->taxname)) == 0) {
3228 if (new_val != NULL) {
3229 ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_truncation, ReplacementPairNew (prev_fail, new_val));
3230 fix_needed = TRUE;
3231 }
3232 } else {
3233 ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_unrecognized, ReplacementPairNew (prev_fail, NULL));
3234 }
3235 }
3236 /* add fix for just spelling and capitalization after */
3237 if (add_nontrunc_fix) {
3238 ValNodeAddPointer (&suggested_fixes, fix_type, ReplacementPairNew (request_org->taxname, new_val));
3239 }
3240
3241 prev_fail = NULL;
3242 }
3243 request_vnp = request_vnp->next;
3244 response_vnp = response_vnp->next;
3245 }
3246
3247 if (fix_needed) {
3248 for (biop_vnp = p->biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next) {
3249 if (suggested_fixes == NULL) {
3250 s = SpecificHostFixNew (biop_vnp, p->spec_host, p->spec_host, NULL, ambiguous ? eSpecificHostFix_ambiguous : eSpecificHostFix_unrecognized);
3251 ValNodeAddPointer (&fix_list, 0, s);
3252 } else {
3253 for (vnp = suggested_fixes; vnp != NULL; vnp = vnp->next) {
3254 r = (ReplacementPairPtr) vnp->data.ptrvalue;
3255 s = SpecificHostFixNew (biop_vnp, p->spec_host, r->find, r->repl, vnp->choice);
3256 ValNodeAddPointer (&fix_list, 0, s);
3257 }
3258 }
3259 }
3260 }
3261 suggested_fixes = ReplacementPairListFree (suggested_fixes);
3262 return fix_list;
3263 }
3264
3265
Taxon3GetSpecificHostFixesInSeqEntry(SeqEntryPtr sep,Boolean caps,Boolean paren)3266 NLM_EXTERN ValNodePtr Taxon3GetSpecificHostFixesInSeqEntry (SeqEntryPtr sep, Boolean caps, Boolean paren)
3267 {
3268 ValNodePtr biop_list = NULL;
3269 ValNodePtr req_host_list = NULL, spec_host_list = NULL;
3270 ValNodePtr request_list = NULL;
3271 ValNodePtr response_list = NULL;
3272 ValNodePtr check_list, check_vnp;
3273 SpecificHostCheckPtr p;
3274 ErrSev level;
3275 ValNodePtr fix_list = NULL;
3276
3277 biop_list = GetSpecificHostBioSourceList (sep, caps, paren);
3278
3279 /* get a list of unique specific_host values */
3280 spec_host_list = GetListOfUniqueSpecificHostValues (biop_list);
3281
3282 /* now format requests for unique specific_host values */
3283 FormatSpecificHostRequests (spec_host_list, &request_list, &req_host_list, caps, paren);
3284
3285 spec_host_list = ValNodeFreeData (spec_host_list);
3286
3287 level = ErrSetMessageLevel (SEV_MAX);
3288 response_list = Taxon3GetOrgRefList (request_list);
3289 ErrSetMessageLevel (level);
3290
3291 if (ValNodeLen (response_list) != ValNodeLen (request_list))
3292 {
3293 Message (MSG_POST, "Unable to retrieve information from tax server");
3294 }
3295 else
3296 {
3297 /* resort requests so that we can check all responses for the same BioSource together */
3298 check_list = SortSpecificHostOrgs (req_host_list, request_list, response_list);
3299 AddBioSourcesToSpecificHostChecklist (biop_list, check_list);
3300
3301 /* now look at responses */
3302 check_vnp = check_list;
3303 while (check_vnp != NULL)
3304 {
3305 p = (SpecificHostCheckPtr) check_vnp->data.ptrvalue;
3306 ValNodeLink (&fix_list, GetFixesForOneSpecificHostValue (p));
3307 check_vnp = check_vnp->next;
3308 }
3309 check_list = SpecificHostCheckListFree (check_list);
3310 }
3311
3312 biop_list = ValNodeFree (biop_list);
3313 request_list = FreeOrgRefValNodeList (request_list);
3314 response_list = FreeOrgRefValNodeList (response_list);
3315 req_host_list = ValNodeFreeData (req_host_list);
3316
3317 return fix_list;
3318 }
3319
3320
ApplyOneSpecificHostFix(SpecificHostFixPtr s)3321 extern Boolean ApplyOneSpecificHostFix (SpecificHostFixPtr s)
3322 {
3323 BioSourcePtr biop = NULL;
3324 Boolean rval = FALSE;
3325 CharPtr new_spec_host;
3326 ValNode vn;
3327
3328 if (s == NULL || s->feat_or_desc == NULL
3329 || StringHasNoText (s->bad_specific_host)
3330 || StringHasNoText (s->old_taxname)
3331 || StringHasNoText (s->new_taxname)) {
3332 return rval;
3333 }
3334 biop = GetBioSourceFromValNode (s->feat_or_desc);
3335 if (biop == NULL) return rval;
3336
3337 vn.choice = SourceQualChoice_textqual;
3338 vn.data.intvalue = Source_qual_nat_host;
3339 vn.next = NULL;
3340
3341 new_spec_host = GetSourceQualFromBioSource (biop, &vn, NULL);
3342 FindReplaceString (&new_spec_host, s->old_taxname, s->new_taxname, TRUE, TRUE);
3343 if (StringCmp (new_spec_host, s->bad_specific_host) != 0)
3344 {
3345 rval = SetSourceQualInBioSource (biop, &vn, NULL, new_spec_host, ExistingTextOption_replace_old);
3346 }
3347 new_spec_host = MemFree (new_spec_host);
3348 return rval;
3349 }
3350
AddBioSourceFeatToList(SeqFeatPtr sfp,Pointer userdata)3351 static void AddBioSourceFeatToList (SeqFeatPtr sfp, Pointer userdata)
3352 {
3353 if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
3354
3355 ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp);
3356 }
3357
3358
AddBioSourceDescToList(SeqDescrPtr sdp,Pointer userdata)3359 static void AddBioSourceDescToList (SeqDescrPtr sdp, Pointer userdata)
3360 {
3361
3362 if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
3363
3364 ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp);
3365 }
3366
3367
GetBioSourceList(SeqEntryPtr sep)3368 static ValNodePtr GetBioSourceList (SeqEntryPtr sep)
3369 {
3370 ValNodePtr list = NULL;
3371
3372 VisitFeaturesInSep (sep, &list, AddBioSourceFeatToList);
3373 VisitDescriptorsInSep (sep, &list, AddBioSourceDescToList);
3374 return list;
3375 }
3376
3377
GetListOfOrganismNames(ValNodePtr biop_list)3378 static ValNodePtr GetListOfOrganismNames (ValNodePtr biop_list)
3379 {
3380 ValNodePtr biop_vnp;
3381 BioSourcePtr biop;
3382 ValNodePtr list = NULL;
3383
3384 /* get a list of unique specific_host values */
3385 for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
3386 {
3387 if (biop_vnp->data.ptrvalue == NULL) continue;
3388 biop = GetBioSourceFromValNode (biop_vnp);
3389 if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname)) continue;
3390 if (!StringAlreadyInValNodeList (biop->org->taxname, list))
3391 {
3392 ValNodeAddPointer (&list, 0, biop->org->taxname);
3393 }
3394 }
3395 return list;
3396 }
3397
3398
AddBioSourcesToChecklist(ValNodePtr biop_list,ValNodePtr check_list)3399 static void AddBioSourcesToChecklist (ValNodePtr biop_list, ValNodePtr check_list)
3400 {
3401 ValNodePtr biop_vnp, last_vnp = NULL, stop_search;
3402 BioSourcePtr biop;
3403 SpecificHostCheckPtr p;
3404
3405 if (biop_list == NULL || check_list == NULL) return;
3406
3407 for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
3408 {
3409
3410 biop = GetBioSourceFromValNode (biop_vnp);
3411 if (biop == NULL) continue;
3412
3413 if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) continue;
3414 if (last_vnp == NULL)
3415 {
3416 last_vnp = check_list;
3417 stop_search = NULL;
3418 }
3419 else
3420 {
3421 stop_search = last_vnp;
3422 }
3423 p = NULL;
3424 while (last_vnp != NULL
3425 && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
3426 && StringCmp (p->spec_host, biop->org->taxname) != 0)
3427 {
3428 p = NULL;
3429 last_vnp = last_vnp->next;
3430 }
3431 if (p == NULL && stop_search != NULL)
3432 {
3433 last_vnp = check_list;
3434 while (last_vnp != stop_search
3435 && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
3436 && StringCmp (p->spec_host, biop->org->taxname) != 0)
3437 {
3438 p = NULL;
3439 last_vnp = last_vnp->next;
3440 }
3441 }
3442
3443 if (p != NULL)
3444 {
3445 ValNodeAddPointer (&(p->biop_list), biop_vnp->choice, biop_vnp->data.ptrvalue);
3446 }
3447 }
3448 }
3449
3450
GetBioSourcesWithTaxName(CharPtr taxname,ValNodePtr biop_list)3451 static ValNodePtr GetBioSourcesWithTaxName (CharPtr taxname, ValNodePtr biop_list)
3452 {
3453 SeqFeatPtr sfp;
3454 SeqDescrPtr sdp;
3455 BioSourcePtr biop;
3456 ValNodePtr match_list = NULL, vnp;
3457
3458 if (StringHasNoText (taxname) || biop_list == NULL) return NULL;
3459
3460 for (vnp = biop_list; vnp != NULL; vnp = vnp->next) {
3461 biop = NULL;
3462 if (vnp->choice == OBJ_SEQFEAT) {
3463 sfp = (SeqFeatPtr) vnp->data.ptrvalue;
3464 if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) {
3465 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
3466 }
3467 } else if (vnp->choice == OBJ_SEQDESC) {
3468 sdp = (SeqDescrPtr) vnp->data.ptrvalue;
3469 if (sdp != NULL && sdp->choice == Seq_descr_source) {
3470 biop = (BioSourcePtr) sdp->data.ptrvalue;
3471 }
3472 }
3473 if (biop != NULL && biop->org != NULL && StringCmp (taxname, biop->org->taxname) == 0) {
3474 ValNodeAddPointer (&match_list, vnp->choice, vnp->data.ptrvalue);
3475 }
3476 }
3477 return match_list;
3478 }
3479
3480
GetOrganismTaxLookupFailuresInSeqEntry(SeqEntryPtr sep)3481 NLM_EXTERN ValNodePtr GetOrganismTaxLookupFailuresInSeqEntry (SeqEntryPtr sep)
3482 {
3483 ValNodePtr biop_list = NULL;
3484 ValNodePtr unique_list = NULL;
3485 ValNodePtr request_list = NULL;
3486 ValNodePtr response_list = NULL;
3487 ValNodePtr req_vnp, resp_vnp;
3488 ErrSev level;
3489 ValNodePtr failed_list = NULL, vnp;
3490 OrgRefPtr request_org;
3491
3492 biop_list = GetBioSourceList (sep);
3493
3494 /* get a list of unique specific_host values */
3495 unique_list = GetListOfOrganismNames (biop_list);
3496
3497 /* now format requests for unique taxname values */
3498 for (vnp = unique_list; vnp != NULL; vnp = vnp->next)
3499 {
3500 request_org = OrgRefNew();
3501 request_org->taxname = StringSave (vnp->data.ptrvalue);
3502 ValNodeAddPointer (&request_list, 3, request_org);
3503 }
3504
3505 unique_list = ValNodeFree (unique_list);
3506
3507 level = ErrSetMessageLevel (SEV_MAX);
3508 response_list = Taxon3GetOrgRefList (request_list);
3509 ErrSetMessageLevel (level);
3510
3511 if (ValNodeLen (response_list) != ValNodeLen (request_list))
3512 {
3513 Message (MSG_POST, "Unable to retrieve information from tax server");
3514 }
3515 else
3516 {
3517 for (req_vnp = request_list, resp_vnp = response_list;
3518 req_vnp != NULL && resp_vnp != NULL;
3519 req_vnp = req_vnp->next, resp_vnp = resp_vnp->next)
3520 {
3521 if (resp_vnp->data.ptrvalue == NULL)
3522 {
3523 request_org = (OrgRefPtr) req_vnp->data.ptrvalue;
3524 vnp = GetBioSourcesWithTaxName (request_org->taxname, biop_list);
3525 if (vnp != NULL) {
3526 ValNodeAddPointer (&failed_list, 0, StringSave (request_org->taxname));
3527 ValNodeLink (&failed_list, vnp);
3528 }
3529 }
3530 }
3531 }
3532
3533 biop_list = ValNodeFree (biop_list);
3534 request_list = FreeOrgRefValNodeList (request_list);
3535 response_list = FreeOrgRefValNodeList (response_list);
3536
3537 return failed_list;
3538 }
3539
3540
CollectTaxIds(BioSourcePtr biop,Pointer data)3541 static void CollectTaxIds (BioSourcePtr biop, Pointer data)
3542 {
3543 ValNodePtr vnp;
3544 DbtagPtr dbtag;
3545
3546 if (biop == NULL || biop->org == NULL || data == NULL) {
3547 return;
3548 }
3549 for (vnp = biop->org->db; vnp != NULL; vnp = vnp->next) {
3550 dbtag = (DbtagPtr) vnp->data.ptrvalue;
3551 if (dbtag != NULL && StringCmp ("taxon", dbtag->db) == 0 && dbtag->tag->id > 0) {
3552 ValNodeAddInt ((ValNodePtr PNTR) data, 0, dbtag->tag->id);
3553 }
3554 }
3555 }
3556
3557
GetCommonOrgRefForSeqEntry(SeqEntryPtr sep)3558 NLM_EXTERN OrgRefPtr GetCommonOrgRefForSeqEntry (SeqEntryPtr sep)
3559 {
3560 ValNodePtr list = NULL;
3561 Taxon3RequestPtr t3rq;
3562 T3ReplyPtr trp;
3563 Taxon3ReplyPtr t3ry;
3564 T3DataPtr tdp;
3565 T3ErrorPtr tep;
3566 OrgRefPtr org = NULL;
3567
3568 VisitBioSourcesInSep (sep, &list, CollectTaxIds);
3569 if (list == NULL) {
3570 ErrPostEx (SEV_ERROR, 0, 0, "No tax IDs found - cannot create PopSet Title");
3571 return NULL;
3572 }
3573 ValNodeUnique (&list, SortByIntvalue, ValNodeFree);
3574
3575 t3rq = CreateJoinRequest (list);
3576 list = ValNodeFree (list);
3577
3578 t3ry = Tax3SynchronousQuery (t3rq);
3579 Taxon3RequestFree (t3rq);
3580 if (t3ry != NULL) {
3581 for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
3582 switch (trp->choice) {
3583 case T3Reply_error :
3584 tep = (T3ErrorPtr) trp->data.ptrvalue;
3585 if (tep != NULL) {
3586 ErrPostEx (SEV_ERROR, 0, 0, tep->message);
3587 }
3588 break;
3589 case T3Reply_data :
3590 tdp = (T3DataPtr) trp->data.ptrvalue;
3591 if (tdp != NULL) {
3592 org = (OrgRefPtr)(tdp->org);
3593 tdp->org = NULL;
3594 }
3595 break;
3596 default :
3597 break;
3598 }
3599 }
3600 Taxon3ReplyFree (t3ry);
3601 }
3602 if (org == NULL) {
3603 org = OrgRefNew ();
3604 org->taxname = StringSave ("Mixed organisms");
3605 }
3606 return org;
3607 }
3608
3609
SeqDescrFromBioSample(CharPtr number)3610 NLM_EXTERN SeqDescrPtr SeqDescrFromBioSample (CharPtr number)
3611
3612 {
3613 SeqDescrPtr sdp = NULL;
3614 CONN conn;
3615 AsnIoConnPtr aicp;
3616 size_t n_written;
3617 CharPtr i_query_fmt = "id=%s&format=asn1";
3618 CharPtr a_query_fmt = "accession=%s&format=asn1raw";
3619 CharPtr query_fmt;
3620 CharPtr query;
3621 EIO_Status status;
3622 CharPtr host = "api-int";
3623 CharPtr url = "/biosample/fetch/";
3624
3625 if (StringHasNoText (number)) return NULL;
3626 if (isalpha (*number)) {
3627 query_fmt = a_query_fmt;
3628 } else {
3629 query_fmt = i_query_fmt;
3630 }
3631 query = (CharPtr) MemNew (sizeof (Char) * (StringLen (query_fmt) + StringLen (number)));
3632 sprintf (query, query_fmt, number);
3633 conn = QUERY_OpenUrlQuery (host, 0, url,
3634 query, "Sequin", 30, eMIME_T_NcbiData,
3635 eMIME_Fasta, eENCOD_None, 0);
3636 query = MemFree (query);
3637 if (conn == NULL) return NULL;
3638 status = CONN_Write (conn, (const void *) query, StringLen (query),
3639 &n_written, eIO_WritePersist);
3640 if (status != eIO_Success) return NULL;
3641 QUERY_SendQuery (conn);
3642 aicp = QUERY_AsnIoConnOpen ("r", conn);
3643 sdp = SeqDescrAsnRead (aicp->aip, NULL);
3644 if (sdp == NULL) {
3645 if (aicp->aip->buf != NULL) {
3646 Message (MSG_POSTERR, "%s [%s]", aicp->aip->buf, number);
3647 } else {
3648 Message (MSG_POSTERR, "Unable to retrieve BioSample Data for %s", number);
3649 }
3650 }
3651 QUERY_AsnIoConnClose (aicp);
3652
3653 return sdp;
3654 }
3655
3656
3657