1 /* sqnutil1.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: sqnutil1.c
27 *
28 * Author: Jonathan Kans
29 *
30 * Version Creation Date: 9/2/97
31 *
32 * $Revision: 6.913 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 #include <sqnutils.h>
46 #include <gather.h>
47 #include <subutil.h>
48 #include <objfdef.h>
49 #include <seqport.h>
50 #include <objproj.h>
51 /* #include <objmmdb1.h> */
52 #include <gbfeat.h>
53 #include <gbftdef.h>
54 #include <edutil.h>
55 #include <tofasta.h>
56 #include <parsegb.h>
57 #include <utilpars.h>
58 #include <validatr.h>
59 #include <explore.h>
60 #include <subutil.h>
61 #include <asn2gnbi.h>
62 #include <salpacc.h>
63 #include <alignmgr2.h>
64 #include <valid.h>
65 #include <objvalid.h>
66 #include <valapi.h>
67 #include <findrepl.h>
68
69
70 #define NLM_GENERATED_CODE_PROTO
71 #include <objmacro.h>
72 #include <macroapi.h>
73
74 static int descr_insert_order [] = {
75 Seq_descr_title,
76 Seq_descr_source,
77 Seq_descr_molinfo,
78 Seq_descr_het,
79 Seq_descr_pub,
80 Seq_descr_comment,
81 Seq_descr_name,
82 Seq_descr_user,
83 Seq_descr_maploc,
84 Seq_descr_region,
85 Seq_descr_num,
86 Seq_descr_dbxref,
87 Seq_descr_mol_type,
88 Seq_descr_modif,
89 Seq_descr_method,
90 Seq_descr_org,
91 Seq_descr_sp,
92 Seq_descr_pir,
93 Seq_descr_prf,
94 Seq_descr_pdb,
95 Seq_descr_embl,
96 Seq_descr_genbank,
97 Seq_descr_modelev,
98 Seq_descr_create_date,
99 Seq_descr_update_date,
100 0
101 };
102
NormalizeDescriptorProc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)103 static void NormalizeDescriptorProc (
104 SeqEntryPtr sep,
105 Pointer data,
106 Int4 index,
107 Int2 indent
108 )
109
110 {
111 BioseqPtr bsp;
112 BioseqSetPtr bssp;
113 /* arrays are SEQDESCR_MAX + 1, last slot stores unexpected descriptor numbers */
114 SeqDescrPtr first [SEQDESCR_MAX + 1];
115 SeqDescrPtr last [SEQDESCR_MAX + 1];
116 int i;
117 int idx;
118 SeqDescrPtr PNTR head = NULL;
119 SeqDescrPtr PNTR prev = NULL;
120 SeqDescrPtr next;
121 SeqDescrPtr sdp;
122
123 if (sep == NULL) return;
124
125 if (IS_Bioseq (sep)) {
126 bsp = (BioseqPtr) sep->data.ptrvalue;
127 if (bsp == NULL) return;
128 head = &(bsp->descr);
129 } else if (IS_Bioseq_set (sep)) {
130 bssp = (BioseqSetPtr) sep->data.ptrvalue;
131 if (bssp == NULL) return;
132 head = &(bssp->descr);
133 }
134 if (head == NULL) return;
135
136 MemSet ((Pointer) &first, 0, sizeof (first));
137 MemSet ((Pointer) &last, 0, sizeof (last));
138
139 prev = head;
140 sdp = *prev;
141 while (sdp != NULL) {
142 next = sdp->next;
143
144 *prev = sdp->next;
145 sdp->next = NULL;
146
147 idx = (int) sdp->choice;
148 /* unexpected descriptor numbers go into last slot */
149 if (idx <= 0 || idx >= SEQDESCR_MAX) {
150 idx = SEQDESCR_MAX;
151 }
152 if (idx > 0 && idx <= SEQDESCR_MAX) {
153 if (first [idx] == NULL) {
154 first [idx] = sdp;
155 }
156 if (last [idx] != NULL) {
157 (last [idx])->next = sdp;
158 }
159 last [idx] = sdp;
160 }
161
162 sdp = next;
163 }
164
165 for (i = 0; descr_insert_order [i] != 0; i++) {
166 idx = descr_insert_order [i];
167 sdp = first [idx];
168 if (sdp == NULL) continue;
169 ValNodeLink (head, sdp);
170 }
171 }
172
NormalizeDescriptorOrder(SeqEntryPtr sep)173 NLM_EXTERN void NormalizeDescriptorOrder (
174 SeqEntryPtr sep
175 )
176
177 {
178 SeqEntryExplore (sep, NULL, NormalizeDescriptorProc);
179 }
180
181 typedef struct orgscan {
182 ObjMgrPtr omp;
183 Int2 nuclCode;
184 Int2 mitoCode;
185 Int2 pstdCode;
186 Boolean mito;
187 Boolean plastid;
188 Char taxname [196];
189 BioSourcePtr biop;
190 } OrgScan, PNTR OrgScanPtr;
191
OrgScanGatherFunc(GatherContextPtr gcp)192 static Boolean OrgScanGatherFunc (GatherContextPtr gcp)
193
194 {
195 BioSourcePtr biop;
196 Boolean doCodes = FALSE;
197 Boolean doMito = FALSE;
198 Boolean doTaxname = FALSE;
199 Boolean mito = FALSE;
200 Int2 mitoCode = 0;
201 Int2 nuclCode = 0;
202 Int2 pstdCode = 0;
203 ObjMgrTypePtr omtp;
204 OrgNamePtr onp;
205 OrgRefPtr orp;
206 OrgScanPtr osp;
207 ValNodePtr sdp;
208 SeqFeatPtr sfp;
209 Uint2 subtype;
210 CharPtr taxname = NULL;
211 Int2 val;
212 ValNodePtr vnp;
213
214 if (gcp == NULL || gcp->thisitem == NULL) return TRUE;
215 if (gcp->thistype != OBJ_SEQFEAT && gcp->thistype != OBJ_SEQDESC) return TRUE;
216
217 osp = (OrgScanPtr) gcp->userdata;
218 if (osp == NULL) return TRUE;
219
220 subtype = 0;
221 if (gcp->thistype == OBJ_SEQFEAT || gcp->thistype == OBJ_SEQDESC) {
222 omtp = ObjMgrTypeFind (osp->omp, gcp->thistype, NULL, NULL);
223 if (omtp == NULL) {
224 return TRUE;
225 }
226 if (omtp->subtypefunc != NULL) {
227 subtype = (*(omtp->subtypefunc)) (gcp->thisitem);
228 }
229 }
230
231 orp = NULL;
232 biop = NULL;
233 switch (gcp->thistype) {
234 case OBJ_SEQFEAT :
235 sfp = (SeqFeatPtr) gcp->thisitem;
236 switch (subtype) {
237 case FEATDEF_ORG :
238 //LCOV_EXCL_START
239 //org features are converted to biosrc features in BasicCleanup
240 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
241 break;
242 //LCOV_EXCL_STOP
243 case FEATDEF_BIOSRC :
244 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
245 break;
246 default :
247 break;
248 }
249 break;
250 case OBJ_SEQDESC :
251 sdp = (ValNodePtr) gcp->thisitem;
252 switch (subtype) {
253 case Seq_descr_modif :
254 vnp = (ValNodePtr) sdp->data.ptrvalue;
255 while (vnp != NULL) {
256 val = (Int2) vnp->data.intvalue;
257 if (val == MODIF_mitochondrial || val == MODIF_kinetoplast) {
258 mito = TRUE;
259 doMito = TRUE;
260 /* osp->mito = TRUE; */
261 }
262 vnp = vnp->next;
263 }
264 break;
265 case Seq_descr_org :
266 //LCOV_EXCL_START
267 // org descriptors are converted to biosrc descriptors in basiccleanup
268 orp = (OrgRefPtr) sdp->data.ptrvalue;
269 break;
270 //LCOV_EXCL_STOP
271 case Seq_descr_source :
272 biop = (BioSourcePtr) sdp->data.ptrvalue;
273 break;
274 default :
275 break;
276 }
277 break;
278 default :
279 break;
280 }
281
282 if (orp == NULL && biop != NULL) {
283 orp = biop->org;
284 mito = (Boolean) (biop->genome == GENOME_kinetoplast ||
285 biop->genome == GENOME_mitochondrion ||
286 biop->genome == GENOME_hydrogenosome);
287 doMito = TRUE;
288 /* osp->mito = (Boolean) (biop->genome == 4 || biop->genome == 5); */
289 }
290 if (orp != NULL) {
291 taxname = orp->taxname;
292 doTaxname = TRUE;
293 /* StringNCpy_0 (osp->taxname, orp->taxname, sizeof (osp->taxname)); */
294 onp = orp->orgname;
295 if (onp != NULL) {
296 nuclCode = onp->gcode;
297 mitoCode = onp->mgcode;
298 pstdCode = onp->pgcode;
299 doCodes = TRUE;
300 /* osp->nuclCode = onp->gcode;
301 osp->mitoCode = onp->mgcode; */
302 }
303 }
304 if (biop != NULL) {
305 if (osp->biop == NULL || biop->is_focus) {
306 osp->biop = biop;
307 if (doMito) {
308 osp->mito = mito;
309 }
310 osp->plastid = (Boolean) (biop->genome == GENOME_chloroplast ||
311 biop->genome == GENOME_chromoplast ||
312 biop->genome == GENOME_plastid ||
313 biop->genome == GENOME_cyanelle ||
314 biop->genome == GENOME_apicoplast ||
315 biop->genome == GENOME_leucoplast ||
316 biop->genome == GENOME_proplastid ||
317 biop->genome == GENOME_chromatophore);
318 if (doCodes) {
319 osp->nuclCode = nuclCode;
320 osp->mitoCode = mitoCode;
321 osp->pstdCode = pstdCode;
322 }
323 if (doTaxname) {
324 StringNCpy_0 (osp->taxname, taxname, sizeof (osp->taxname));
325 }
326 }
327 }
328
329 return TRUE;
330 }
331
332 //LCOV_EXCL_START
SeqEntryOrEntityIDToGeneticCode(SeqEntryPtr sep,Uint2 entityID,BoolPtr mito,CharPtr taxname,size_t maxsize,BioSourcePtr PNTR biopp)333 static Int2 SeqEntryOrEntityIDToGeneticCode (SeqEntryPtr sep, Uint2 entityID, BoolPtr mito,
334 CharPtr taxname, size_t maxsize,
335 BioSourcePtr PNTR biopp)
336
337 {
338 GatherScope gs;
339 OrgScan osp;
340
341 if (mito != NULL) {
342 *mito = FALSE;
343 }
344 if (taxname != NULL && maxsize > 0) {
345 *taxname = '\0';
346 }
347 osp.mito = FALSE;
348 osp.plastid = FALSE;
349 osp.nuclCode = 0;
350 osp.mitoCode = 0;
351 osp.pstdCode = 0;
352 osp.omp = ObjMgrGet ();
353 osp.taxname [0] = '\0';
354 osp.biop = NULL;
355 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
356 gs.seglevels = 1;
357 gs.get_feats_location = TRUE;
358 MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
359 gs.ignore[OBJ_BIOSEQ] = FALSE;
360 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
361 gs.ignore[OBJ_SEQFEAT] = FALSE;
362 gs.ignore[OBJ_SEQANNOT] = FALSE;
363 gs.ignore[OBJ_SEQDESC] = FALSE;
364 if (sep != NULL) {
365 gs.scope = sep;
366 GatherSeqEntry (sep, (Pointer) &osp, OrgScanGatherFunc, &gs);
367 } else if (entityID > 0) {
368 GatherEntity (entityID, (Pointer) &osp, OrgScanGatherFunc, &gs);
369 }
370 if (mito != NULL) {
371 *mito = osp.mito;
372 }
373 if (taxname != NULL && maxsize > 0) {
374 StringNCpy_0 (taxname, osp.taxname, maxsize);
375 }
376 if (biopp != NULL) {
377 *biopp = osp.biop;
378 }
379 if (osp.plastid) {
380 if (osp.pstdCode > 0) {
381 return osp.pstdCode;
382 } else {
383 return 11;
384 }
385 } else if (osp.mito) {
386 return osp.mitoCode;
387 } else {
388 return osp.nuclCode;
389 }
390 }
391
EntityIDToGeneticCode(Uint2 entityID,BoolPtr mito,CharPtr taxname,size_t maxsize)392 NLM_EXTERN Int2 EntityIDToGeneticCode (Uint2 entityID, BoolPtr mito, CharPtr taxname, size_t maxsize)
393
394 {
395 return SeqEntryOrEntityIDToGeneticCode (NULL, entityID, mito, taxname, maxsize, NULL);
396 }
397
SeqEntryToGeneticCode(SeqEntryPtr sep,BoolPtr mito,CharPtr taxname,size_t maxsize)398 NLM_EXTERN Int2 SeqEntryToGeneticCode (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize)
399
400 {
401 return SeqEntryOrEntityIDToGeneticCode (sep, 0, mito, taxname, maxsize, NULL);
402 }
403
SeqEntryToBioSource(SeqEntryPtr sep,BoolPtr mito,CharPtr taxname,size_t maxsize,BioSourcePtr PNTR biopp)404 NLM_EXTERN Int2 SeqEntryToBioSource (SeqEntryPtr sep, BoolPtr mito, CharPtr taxname, size_t maxsize, BioSourcePtr PNTR biopp)
405
406 {
407 return SeqEntryOrEntityIDToGeneticCode (sep, 0, mito, taxname, maxsize, biopp);
408 }
409
410
BioseqToGeneticCode(BioseqPtr bsp,Int2Ptr gencodep,BoolPtr mitop,BoolPtr plastidp,CharPtr taxnamep,size_t maxsize,BioSourcePtr PNTR biopp)411 NLM_EXTERN Boolean BioseqToGeneticCode (
412 BioseqPtr bsp,
413 Int2Ptr gencodep,
414 BoolPtr mitop,
415 BoolPtr plastidp,
416 CharPtr taxnamep,
417 size_t maxsize,
418 BioSourcePtr PNTR biopp
419 )
420
421 {
422 BioSourcePtr biop = NULL;
423 SeqMgrDescContext dcontext;
424 SeqMgrFeatContext fcontext;
425 Int2 gencode = 0;
426 Boolean mito = FALSE;
427 Int2 mitoCode = 0;
428 Int2 nuclCode = 0;
429 Int2 pstdCode = 0;
430 OrgNamePtr onp;
431 OrgRefPtr orp;
432 Boolean plastid = FALSE;
433 SeqDescrPtr sdp;
434 SeqFeatPtr sfp;
435 CharPtr taxname = NULL;
436
437 if (bsp == NULL) return FALSE;
438
439 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
440 if (sdp != NULL) {
441 biop = (BioSourcePtr) sdp->data.ptrvalue;
442 }
443
444 if (biop == NULL) {
445 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
446 if (sfp != NULL) {
447 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
448 }
449 }
450
451 if (biop == NULL) return FALSE;
452 orp = biop->org;
453 if (orp == NULL) return FALSE;
454
455 taxname = orp->taxname;
456 if (StringHasNoText (taxname)) return FALSE;
457
458 onp = orp->orgname;
459 if (onp != NULL) {
460 nuclCode = onp->gcode;
461 mitoCode = onp->mgcode;
462 pstdCode = onp->pgcode;
463 }
464
465 mito = (Boolean) (biop->genome == GENOME_kinetoplast ||
466 biop->genome == GENOME_mitochondrion ||
467 biop->genome == GENOME_hydrogenosome);
468
469 plastid = (Boolean) (biop->genome == GENOME_chloroplast ||
470 biop->genome == GENOME_chromoplast ||
471 biop->genome == GENOME_plastid ||
472 biop->genome == GENOME_cyanelle ||
473 biop->genome == GENOME_apicoplast ||
474 biop->genome == GENOME_leucoplast ||
475 biop->genome == GENOME_proplastid ||
476 biop->genome == GENOME_chromatophore);
477
478 if (plastid) {
479 if (pstdCode > 0) {
480 gencode = pstdCode;
481 } else {
482 gencode = 11;
483 }
484 } else if (mito) {
485 gencode = mitoCode;
486 } else {
487 gencode = nuclCode;
488 }
489
490 if (gencodep != NULL) {
491 *gencodep = gencode;
492 }
493 if (mitop != NULL) {
494 *mitop = mito;
495 }
496 if (plastidp != NULL) {
497 *plastidp = plastid;
498 }
499 if (taxnamep != NULL && maxsize > 0) {
500 StringNCpy_0 (taxnamep, taxname, maxsize);
501 }
502 if (biopp != NULL) {
503 *biopp = biop;
504 }
505
506 return TRUE;
507 }
508
509
510 typedef struct commontitle {
511 BioseqPtr bsp;
512 SeqDescPtr sdp;
513 } CommonTitleData, PNTR CommonTitlePtr;
514
515
CommonTitleNew(BioseqPtr bsp,SeqDescPtr sdp)516 static CommonTitlePtr CommonTitleNew (BioseqPtr bsp, SeqDescPtr sdp)
517 {
518 CommonTitlePtr c = (CommonTitlePtr) MemNew (sizeof (CommonTitleData));
519 c->bsp = bsp;
520 c->sdp = sdp;
521 return c;
522 }
523
524
CommonTitleFree(CommonTitlePtr c)525 static CommonTitlePtr CommonTitleFree (CommonTitlePtr c)
526 {
527 if (c != NULL) {
528 c = MemFree (c);
529 }
530 return c;
531 }
532
533
CommonTitleListFree(ValNodePtr vnp)534 static ValNodePtr CommonTitleListFree (ValNodePtr vnp)
535 {
536 ValNodePtr vnp_next;
537
538 while (vnp != NULL) {
539 vnp_next = vnp->next;
540 vnp->next = NULL;
541 vnp->data.ptrvalue = CommonTitleFree (vnp->data.ptrvalue);
542 vnp = ValNodeFree (vnp);
543 vnp = vnp_next;
544 }
545 return vnp;
546 }
547
548
RemoveCommonTitles(ValNodePtr vnp,CharPtr common_title)549 static void RemoveCommonTitles (ValNodePtr vnp, CharPtr common_title)
550 {
551 CommonTitlePtr c;
552 ObjValNodePtr ovp;
553
554 while (vnp != NULL) {
555 c = vnp->data.ptrvalue;
556 if (StringCmp (c->sdp->data.ptrvalue, common_title) == 0 && c->sdp->extended > 0) {
557 ovp = (ObjValNodePtr) c->sdp;
558 ovp->idx.deleteme = TRUE;
559 }
560 vnp = vnp->next;
561 }
562 }
563
564
SortCommonTitle(VoidPtr ptr1,VoidPtr ptr2)565 static int LIBCALLBACK SortCommonTitle (VoidPtr ptr1, VoidPtr ptr2)
566
567 {
568 CommonTitlePtr c1;
569 CommonTitlePtr c2;
570 ValNodePtr vnp1;
571 ValNodePtr vnp2;
572
573 if (ptr1 != NULL && ptr2 != NULL) {
574 vnp1 = *((ValNodePtr PNTR) ptr1);
575 vnp2 = *((ValNodePtr PNTR) ptr2);
576 if (vnp1 != NULL && vnp2 != NULL) {
577 c1 = (CommonTitlePtr) vnp1->data.ptrvalue;
578 c2 = (CommonTitlePtr) vnp2->data.ptrvalue;
579 if (c1 != NULL && c2 != NULL && c1->sdp != NULL && c2->sdp != NULL
580 && c1->sdp->data.ptrvalue != NULL && c2->sdp->data.ptrvalue != NULL) {
581 return StringCmp (c1->sdp->data.ptrvalue, c2->sdp->data.ptrvalue);
582 }
583 }
584 }
585 return 0;
586 }
587
588
CollectCommonTitle(BioseqPtr bsp,Pointer data)589 static void CollectCommonTitle (BioseqPtr bsp, Pointer data)
590 {
591 SeqDescPtr sdp;
592
593 if (bsp == NULL || ISA_aa (bsp->mol) || data == NULL) {
594 return;
595 }
596
597 sdp = bsp->descr;
598 while (sdp != NULL) {
599 if (sdp->choice == Seq_descr_title) {
600 ValNodeAddPointer ((ValNodePtr PNTR) data, 0, CommonTitleNew (bsp, sdp));
601 }
602 sdp = sdp->next;
603 }
604 }
605
606
FindCommonTitleFromList(ValNodePtr list)607 static CharPtr FindCommonTitleFromList (ValNodePtr list)
608 {
609 ValNodePtr vnp;
610 CommonTitlePtr c;
611 Int4 num_common = 0, num_total, num_expected;
612 CharPtr common_title;
613
614 if (list == NULL) {
615 return NULL;
616 }
617 num_total = ValNodeLen (list);
618 if (num_total % 2 != 0 || num_total < 4) {
619 return NULL;
620 }
621 num_expected = num_total / 2;
622
623 c = list->data.ptrvalue;
624 common_title = c->sdp->data.ptrvalue;
625 num_common = 1;
626
627 for (vnp = list->next; vnp != NULL; vnp = vnp->next) {
628 c = (CommonTitlePtr) vnp->data.ptrvalue;
629 if (StringCmp (common_title, c->sdp->data.ptrvalue) == 0) {
630 num_common++;
631 } else {
632 num_common = 1;
633 common_title = c->sdp->data.ptrvalue;
634 }
635 }
636 if (num_common == num_expected) {
637 return StringSave (common_title);
638 }
639
640 return NULL;
641 }
642
643
PromoteCommonTitlesSetCallback(BioseqSetPtr bssp,Pointer data)644 static void PromoteCommonTitlesSetCallback (BioseqSetPtr bssp, Pointer data)
645 {
646 ValNodePtr list = NULL;
647 CharPtr common_title = NULL;
648 SeqDescrPtr sdp;
649 Int4 num_member = 0;
650 SeqEntryPtr s;
651 CharPtr set_title = NULL;
652
653 if (bssp == NULL || !GetsDocsumTitle (bssp->_class)) {
654 return;
655 }
656
657 VisitBioseqsInSet (bssp, &list, CollectCommonTitle);
658 list = ValNodeSort (list, SortCommonTitle);
659
660 common_title = FindCommonTitleFromList (list);
661 if (common_title != NULL) {
662 s = bssp->seq_set;
663 while (s != NULL) {
664 num_member++;
665 s = s->next;
666 }
667 if (ValNodeLen (list) == num_member) {
668 for (sdp = bssp->descr; sdp != NULL && set_title == NULL; sdp = sdp->next) {
669 if (sdp->choice == Seq_descr_title) {
670 set_title = sdp->data.ptrvalue;
671 }
672 }
673 if (set_title != NULL
674 && StringCmp (set_title, common_title) != 0) {
675 /* don't remove, the seq titles just happen to be identical */
676 common_title = MemFree (common_title);
677 }
678 }
679 }
680 if (common_title != NULL) {
681 sdp = SeqDescrNew (NULL);
682 sdp->choice = Seq_descr_title;
683 sdp->data.ptrvalue = common_title;
684 sdp->next = bssp->descr;
685 bssp->descr = sdp;
686 RemoveCommonTitles (list, common_title);
687 }
688 list = CommonTitleListFree(list);
689 }
690
691
PromoteCommonTitlesToSet(SeqEntryPtr sep)692 NLM_EXTERN void PromoteCommonTitlesToSet (SeqEntryPtr sep)
693 {
694 VisitSetsInSep (sep, NULL, PromoteCommonTitlesSetCallback);
695 }
696 //LCOV_EXCL_STOP
697
DeleteMultipleTitles(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)698 NLM_EXTERN void DeleteMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
699
700 {
701 BioseqPtr bsp;
702 BioseqSetPtr bssp;
703 Boolean hastitle;
704 ValNodePtr nextsdp;
705 Pointer PNTR prevsdp;
706 ValNodePtr sdp;
707
708 if (IS_Bioseq (sep)) {
709 bsp = (BioseqPtr) sep->data.ptrvalue;
710 sdp = bsp->descr;
711 prevsdp = (Pointer PNTR) &(bsp->descr);
712 } else if (IS_Bioseq_set (sep)) {
713 //LCOV_EXCL_START
714 //cleanup functions only call this during RenormalizeNucProtSets,
715 //and only for Bioseqs
716 bssp = (BioseqSetPtr) sep->data.ptrvalue;
717 sdp = bssp->descr;
718 prevsdp = (Pointer PNTR) &(bssp->descr);
719 //LCOV_EXCL_STOP
720 } else return;
721 hastitle = FALSE;
722 while (sdp != NULL) {
723 nextsdp = sdp->next;
724 if (sdp->choice == Seq_descr_title) {
725 if (hastitle) {
726 //LCOV_EXCL_START
727 //when called from RenormalizeNucProtSets,
728 //extra titles are already gone
729 *(prevsdp) = sdp->next;
730 sdp->next = NULL;
731 SeqDescFree (sdp);
732 //LCOV_EXCL_STOP
733 } else {
734 hastitle = TRUE;
735 prevsdp = (Pointer PNTR) &(sdp->next);
736 }
737 } else {
738 prevsdp = (Pointer PNTR) &(sdp->next);
739 }
740 sdp = nextsdp;
741 }
742 }
743
RenormalizeNucProtSets(SeqEntryPtr sep,Boolean relink)744 NLM_EXTERN Int4 RenormalizeNucProtSets (SeqEntryPtr sep, Boolean relink)
745
746 {
747 SeqAnnotPtr annot;
748 BioseqPtr bsp;
749 BioseqSetPtr bssp;
750 ValNodePtr descr;
751 ObjMgrDataPtr omdptop;
752 ObjMgrData omdata;
753 Uint2 parenttype;
754 Pointer parentptr;
755 SeqAnnotPtr sap, tmp_sap;
756 SeqEntryPtr seqentry;
757 Int4 num_renormalized = 0;
758
759 if (sep == NULL) return 0;
760 if (IS_Bioseq_set (sep)) {
761 bssp = (BioseqSetPtr) sep->data.ptrvalue;
762 if (bssp != NULL && (bssp->_class == 7 ||
763 (bssp->_class >= 13 && bssp->_class <= 16) ||
764 bssp->_class == BioseqseqSet_class_wgs_set ||
765 bssp->_class == BioseqseqSet_class_gen_prod_set ||
766 bssp->_class == BioseqseqSet_class_small_genome_set)) {
767 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
768 num_renormalized += RenormalizeNucProtSets (sep, relink);
769 }
770 return num_renormalized;
771 }
772 if (bssp != NULL && bssp->_class == 1) {
773 seqentry = bssp->seq_set;
774 if (seqentry != NULL && seqentry->next == NULL) {
775
776 if (relink) {
777 SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
778 GetSeqEntryParent (sep, &parentptr, &parenttype);
779 }
780
781 descr = bssp->descr;
782 bssp->descr = NULL;
783 annot = bssp->annot;
784 bssp->annot = NULL;
785
786 sep->choice = seqentry->choice;
787 sep->data.ptrvalue = seqentry->data.ptrvalue;
788 seqentry->data.ptrvalue = NULL;
789 bssp->seq_set = NULL;
790 bssp->seqentry = NULL;
791 MemFree (seqentry);
792 BioseqSetFree (bssp);
793
794 sap = NULL;
795 if (IS_Bioseq (sep)) {
796 bsp = (BioseqPtr) sep->data.ptrvalue;
797 ValNodeLink (&(bsp->descr), descr);
798 if (bsp->annot == NULL) {
799 bsp->annot = annot;
800 annot = NULL;
801 } else {
802 sap = bsp->annot;
803 }
804 } else if (IS_Bioseq_set (sep)) {
805 //LCOV_EXCL_START
806 //should not have set inside nuc-prot set
807 bssp = (BioseqSetPtr) sep->data.ptrvalue;
808 ValNodeLink (&(bssp->descr), descr);
809 if (bssp->annot == NULL) {
810 bssp->annot = annot;
811 annot = NULL;
812 } else {
813 sap = bssp->annot;
814 }
815 //LCOV_EXCL_STOP
816 }
817 if (sap != NULL) {
818 tmp_sap = sap;
819 while (tmp_sap->next != NULL) {
820 tmp_sap = tmp_sap->next;
821 }
822 tmp_sap->next = annot;
823 MergeAdjacentAnnotsInList (sap);
824 }
825
826 DeleteMultipleTitles (sep, NULL, 0, 0);
827
828 if (relink) {
829 SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
830 RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
831 }
832 num_renormalized++;
833 }
834 }
835 }
836 return num_renormalized;
837 }
838
839
840 //LCOV_EXCL_START
841 //only used by RemoveSingleItemSet, which is not used by cleanup
SetHasAlignments(BioseqSetPtr bssp)842 static Boolean SetHasAlignments (BioseqSetPtr bssp)
843 {
844 SeqAnnotPtr sap;
845 Boolean rval = FALSE;
846
847 if (bssp == NULL) {
848 return FALSE;
849 }
850 for (sap = bssp->annot; sap != NULL && !rval; sap = sap->next) {
851 if (sap->type == 2) {
852 rval = TRUE;
853 }
854 }
855 return rval;
856 }
857
858
859 //not used by cleanup
RemoveSingleItemSet(SeqEntryPtr sep,Boolean relink)860 NLM_EXTERN Int4 RemoveSingleItemSet(SeqEntryPtr sep, Boolean relink)
861 {
862 SeqAnnotPtr annot;
863 BioseqPtr bsp;
864 BioseqSetPtr bssp;
865 ValNodePtr descr;
866 ObjMgrDataPtr omdptop;
867 ObjMgrData omdata;
868 Uint2 parenttype;
869 Pointer parentptr;
870 SeqAnnotPtr sap, tmp_sap;
871 SeqEntryPtr seqentry, sep_next;
872 Int4 num_renormalized = 0;
873
874 if (sep == NULL
875 || !IS_Bioseq_set (sep)
876 || (bssp = (BioseqSetPtr) sep->data.ptrvalue) == NULL) {
877 return 0;
878 }
879
880 if ((bssp->_class == BioseqseqSet_class_pop_set
881 || bssp->_class == BioseqseqSet_class_phy_set
882 || bssp->_class == BioseqseqSet_class_mut_set
883 || bssp->_class == BioseqseqSet_class_eco_set)
884 && bssp->seq_set != NULL
885 && bssp->seq_set->next == NULL
886 && !SetHasAlignments(bssp)) {
887
888 seqentry = bssp->seq_set;
889
890 if (relink) {
891 SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
892 GetSeqEntryParent (sep, &parentptr, &parenttype);
893 }
894
895 descr = bssp->descr;
896 bssp->descr = NULL;
897 annot = bssp->annot;
898 bssp->annot = NULL;
899
900 sep->choice = seqentry->choice;
901 sep->data.ptrvalue = seqentry->data.ptrvalue;
902 seqentry->data.ptrvalue = NULL;
903 bssp->seq_set = NULL;
904 bssp->seqentry = NULL;
905 MemFree (seqentry);
906 BioseqSetFree (bssp);
907
908 sap = NULL;
909 if (IS_Bioseq (sep)) {
910 bsp = (BioseqPtr) sep->data.ptrvalue;
911 ValNodeLink (&(bsp->descr), descr);
912 if (bsp->annot == NULL) {
913 bsp->annot = annot;
914 annot = NULL;
915 } else {
916 sap = bsp->annot;
917 }
918 } else if (IS_Bioseq_set (sep)) {
919 bssp = (BioseqSetPtr) sep->data.ptrvalue;
920 ValNodeLink (&(bssp->descr), descr);
921 if (bssp->annot == NULL) {
922 bssp->annot = annot;
923 annot = NULL;
924 } else {
925 sap = bssp->annot;
926 }
927 }
928 if (sap != NULL) {
929 tmp_sap = sap;
930 while (tmp_sap->next != NULL) {
931 tmp_sap = tmp_sap->next;
932 }
933 tmp_sap->next = annot;
934 MergeAdjacentAnnotsInList (sap);
935 }
936
937 DeleteMultipleTitles (sep, NULL, 0, 0);
938
939 if (relink) {
940 SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
941 RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
942 }
943 num_renormalized++;
944 } else {
945 for (sep = bssp->seq_set; sep != NULL; sep = sep_next) {
946 sep_next = sep->next;
947 num_renormalized += RemoveSingleItemSet (sep, relink);
948 }
949 }
950
951 return num_renormalized;
952 }
953
954
IsExtractableDescriptor(SeqDescPtr sdp)955 static Boolean IsExtractableDescriptor (SeqDescPtr sdp)
956 {
957 if (sdp == NULL) {
958 return FALSE;
959 }
960 if (sdp->choice == Seq_descr_pub || sdp->choice == Seq_descr_source) {
961 return TRUE;
962 } else if (sdp->choice == Seq_descr_user && IsDBLinkObject(sdp->data.ptrvalue)) {
963 return TRUE;
964 } else {
965 return FALSE;
966 }
967 }
968
969
ExtractBioSourceAndPubs(SeqEntryPtr sep)970 NLM_EXTERN ValNodePtr ExtractBioSourceAndPubs (SeqEntryPtr sep)
971
972 {
973 BioseqPtr bsp;
974 BioseqSetPtr bssp;
975 ValNodePtr descr;
976 ValNodePtr last;
977 ValNodePtr nextsdp;
978 Pointer PNTR prevsdp;
979 ValNodePtr sdp;
980
981 if (sep == NULL || sep->data.ptrvalue == NULL) return NULL;
982 descr = NULL;
983 last = NULL;
984 sdp = NULL;
985 if (IS_Bioseq (sep)) {
986 bsp = (BioseqPtr) sep->data.ptrvalue;
987 sdp = bsp->descr;
988 prevsdp = (Pointer PNTR) &(bsp->descr);
989 } else if (IS_Bioseq_set (sep)) {
990 bssp = (BioseqSetPtr) sep->data.ptrvalue;
991 sdp = bssp->descr;
992 prevsdp = (Pointer PNTR) &(bssp->descr);
993 } else return NULL;
994 while (sdp != NULL) {
995 nextsdp = sdp->next;
996 if (IsExtractableDescriptor(sdp)) {
997 *(prevsdp) = sdp->next;
998 sdp->next = NULL;
999 if (descr == NULL) {
1000 descr = sdp;
1001 last = descr;
1002 } else if (last != NULL) {
1003 last->next = sdp;
1004 last = last->next;
1005 }
1006 } else {
1007 prevsdp = (Pointer PNTR) &(sdp->next);
1008 }
1009 sdp = nextsdp;
1010 }
1011 return descr;
1012 }
1013
ReplaceBioSourceAndPubs(SeqEntryPtr sep,ValNodePtr descr)1014 NLM_EXTERN void ReplaceBioSourceAndPubs (SeqEntryPtr sep, ValNodePtr descr)
1015
1016 {
1017 BioseqPtr bsp;
1018 BioseqSetPtr bssp;
1019 ValNodePtr last;
1020 Pointer PNTR prevsdp;
1021 ValNodePtr sdp;
1022
1023 if (sep == NULL || descr == NULL) return;
1024 if (IS_Bioseq (sep)) {
1025 bsp = (BioseqPtr) sep->data.ptrvalue;
1026 sdp = bsp->descr;
1027 prevsdp = (Pointer PNTR) &(bsp->descr);
1028 } else if (IS_Bioseq_set (sep)) {
1029 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1030 sdp = bssp->descr;
1031 prevsdp = (Pointer PNTR) &(bssp->descr);
1032 } else return;
1033 last = descr;
1034 while (last->next != NULL) {
1035 last = last->next;
1036 }
1037 last->next = sdp;
1038 *(prevsdp) = descr;
1039 }
1040
1041 typedef struct targetdata {
1042 BioseqPtr bsp;
1043 SeqEntryPtr nps;
1044 Boolean skipGenProdSet;
1045 } TargetData, PNTR TargetDataPtr;
1046
ReturnStackToItem(GatherContextPtr gcp)1047 static Boolean ReturnStackToItem (GatherContextPtr gcp)
1048
1049 {
1050 BioseqSetPtr bssp;
1051 Int2 i;
1052 Uint2 itemtype;
1053 TargetDataPtr tdp;
1054
1055 if (gcp == NULL) return TRUE;
1056 tdp = (TargetDataPtr) gcp->userdata;
1057 if (tdp == NULL) return TRUE;
1058 if (gcp->gatherstack != NULL && gcp->numstack > 0) {
1059 for (i = 0; i < gcp->numstack; i++) {
1060 itemtype = gcp->gatherstack [i].itemtype;
1061 if (itemtype == OBJ_BIOSEQ || itemtype == OBJ_BIOSEQSET) {
1062 tdp->nps = SeqMgrGetSeqEntryForData (gcp->gatherstack [i].thisitem);
1063 if (gcp->gatherstack [i].itemtype == OBJ_BIOSEQSET) {
1064 bssp = (BioseqSetPtr) gcp->gatherstack [i].thisitem;
1065 if (bssp->_class != BioseqseqSet_class_genbank &&
1066 bssp->_class != BioseqseqSet_class_mut_set &&
1067 bssp->_class != BioseqseqSet_class_pop_set &&
1068 bssp->_class != BioseqseqSet_class_phy_set &&
1069 bssp->_class != BioseqseqSet_class_eco_set &&
1070 bssp->_class != BioseqseqSet_class_wgs_set &&
1071 bssp->_class != BioseqseqSet_class_small_genome_set &&
1072 (bssp->_class != BioseqseqSet_class_gen_prod_set ||
1073 (! tdp->skipGenProdSet))) {
1074 return FALSE;
1075 }
1076 } else if (gcp->gatherstack [i].itemtype == OBJ_BIOSEQ) {
1077 return FALSE;
1078 }
1079 }
1080 }
1081 }
1082 return FALSE;
1083 }
1084
GetStackToTarget(GatherContextPtr gcp)1085 static Boolean GetStackToTarget (GatherContextPtr gcp)
1086
1087 {
1088 TargetDataPtr tdp;
1089
1090 if (gcp == NULL) return TRUE;
1091 tdp = (TargetDataPtr) gcp->userdata;
1092 if (tdp == NULL) return TRUE;
1093 if (gcp->thistype == OBJ_BIOSEQ) {
1094 if (tdp->bsp == (BioseqPtr) gcp->thisitem) {
1095 return ReturnStackToItem (gcp);
1096 }
1097 }
1098 return TRUE;
1099 }
1100
GetBestTopParentForDataEx(Uint2 entityID,BioseqPtr bsp,Boolean skipGenProdSet)1101 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForDataEx (Uint2 entityID, BioseqPtr bsp, Boolean skipGenProdSet)
1102
1103 {
1104 BioseqSetPtr bssp;
1105 BioseqSetPtr parent;
1106 GatherScope gs;
1107 TargetData td;
1108
1109 td.bsp = bsp;
1110 td.nps = NULL;
1111 td.skipGenProdSet = skipGenProdSet;
1112 if (entityID > 0 && bsp != NULL) {
1113 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
1114 bssp = (BioseqSetPtr) bsp->idx.parentptr;
1115 if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts && bssp->idx.parenttype == OBJ_BIOSEQSET) {
1116 parent = (BioseqSetPtr) bssp->idx.parentptr;
1117 if (parent != NULL && parent->_class == BioseqseqSet_class_segset) {
1118 bssp = parent;
1119 }
1120 }
1121 if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset && bssp->idx.parenttype == OBJ_BIOSEQSET) {
1122 parent = (BioseqSetPtr) bssp->idx.parentptr;
1123 if (parent != NULL && parent->_class == BioseqseqSet_class_nuc_prot) {
1124 bssp = parent;
1125 }
1126 }
1127 if (bssp != NULL && bssp->seqentry != NULL) {
1128 if (bssp->_class == BioseqseqSet_class_nuc_prot ||
1129 bssp->_class == BioseqseqSet_class_segset ||
1130 bssp->_class == BioseqseqSet_class_parts) {
1131 return bssp->seqentry;
1132 }
1133 }
1134 if (bsp->seqentry != NULL) {
1135 return bsp->seqentry;
1136 }
1137 }
1138 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
1139 gs.seglevels = 1;
1140 MemSet ((Pointer) (gs.ignore), (int) (TRUE), (size_t) (OBJ_MAX * sizeof (Boolean)));
1141 gs.ignore[OBJ_BIOSEQ] = FALSE;
1142 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
1143 GatherEntity (entityID, (Pointer) &td, GetStackToTarget, &gs);
1144 }
1145 return td.nps;
1146 }
1147
GetBestTopParentForData(Uint2 entityID,BioseqPtr bsp)1148 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForData (Uint2 entityID, BioseqPtr bsp)
1149
1150 {
1151 return GetBestTopParentForDataEx (entityID, bsp, FALSE);
1152 }
1153
GetBestTopParentForItemIDEx(Uint2 entityID,Uint4 itemID,Uint2 itemtype,Boolean skipGenProdSet)1154 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemIDEx (Uint2 entityID, Uint4 itemID, Uint2 itemtype, Boolean skipGenProdSet)
1155
1156 {
1157 TargetData td;
1158
1159 td.bsp = NULL;
1160 td.nps = NULL;
1161 td.skipGenProdSet = skipGenProdSet;
1162 if (entityID > 0 && itemID > 0 && itemtype > 0) {
1163 GatherItem (entityID, itemID, itemtype, (Pointer) &td, ReturnStackToItem);
1164 }
1165 return td.nps;
1166 }
1167
GetBestTopParentForItemID(Uint2 entityID,Uint4 itemID,Uint2 itemtype)1168 NLM_EXTERN SeqEntryPtr LIBCALL GetBestTopParentForItemID (Uint2 entityID, Uint4 itemID, Uint2 itemtype)
1169
1170 {
1171 return GetBestTopParentForItemIDEx (entityID, itemID, itemtype, FALSE);
1172 }
1173
GetTopSeqEntryForEntityID(Uint2 entityID)1174 NLM_EXTERN SeqEntryPtr LIBCALL GetTopSeqEntryForEntityID (Uint2 entityID)
1175
1176 {
1177 ObjMgrDataPtr omdp;
1178 SeqSubmitPtr ssp;
1179
1180 omdp = ObjMgrGetData (entityID);
1181 if (omdp != NULL) {
1182 switch (omdp->datatype) {
1183 case OBJ_SEQSUB :
1184 ssp = (SeqSubmitPtr) omdp->dataptr;
1185 if (ssp != NULL && ssp->datatype == 1) {
1186 return (SeqEntryPtr) ssp->data;
1187 }
1188 break;
1189 case OBJ_BIOSEQ :
1190 return (SeqEntryPtr) omdp->choice;
1191 case OBJ_BIOSEQSET :
1192 return (SeqEntryPtr) omdp->choice;
1193 default :
1194 break;
1195 }
1196 }
1197 return NULL;
1198 }
1199 //LCOV_EXCL_STOP
1200
CheckSeqLocForPartialEx(SeqLocPtr location,BoolPtr p5ptr,BoolPtr p3ptr,Int4Ptr limptr)1201 NLM_EXTERN Boolean CheckSeqLocForPartialEx (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr, Int4Ptr limptr)
1202
1203 {
1204 SeqLocPtr firstSlp;
1205 IntFuzzPtr ifp;
1206 SeqLocPtr lastSlp;
1207 Int4 lim;
1208 Boolean partial5;
1209 Boolean partial3;
1210 SeqIntPtr sip;
1211 SeqLocPtr slp;
1212 SeqPntPtr spp;
1213
1214 partial5 = FALSE;
1215 partial3 = FALSE;
1216 lim = -1;
1217 if (location != NULL) {
1218 firstSlp = NULL;
1219 lastSlp = NULL;
1220 slp = SeqLocFindNext (location, NULL);
1221 while (slp != NULL) {
1222 if (firstSlp == NULL) {
1223 firstSlp = slp;
1224 }
1225 lastSlp = slp;
1226 slp = SeqLocFindNext (location, slp);
1227 }
1228 if (firstSlp != NULL) {
1229 if (firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
1230 sip = (SeqIntPtr) firstSlp->data.ptrvalue;
1231 if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1232 ifp = sip->if_to;
1233 if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1234 partial5 = TRUE;
1235 }
1236 } else {
1237 ifp = sip->if_from;
1238 if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1239 partial5 = TRUE;
1240 }
1241 }
1242 } else if (firstSlp->choice == SEQLOC_PNT && firstSlp->data.ptrvalue != NULL) {
1243 spp = (SeqPntPtr) firstSlp->data.ptrvalue;
1244 if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1245 ifp = spp->fuzz;
1246 if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1247 partial5 = TRUE;
1248 }
1249 } else {
1250 ifp = spp->fuzz;
1251 if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1252 partial5 = TRUE;
1253 }
1254 }
1255 ifp = spp->fuzz;
1256 if (ifp != NULL && ifp->choice == 4) {
1257 lim = ifp->a;
1258 }
1259 }
1260 }
1261 if (lastSlp != NULL) {
1262 if (lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
1263 sip = (SeqIntPtr) lastSlp->data.ptrvalue;
1264 if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1265 ifp = sip->if_from;
1266 if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1267 partial3 = TRUE;
1268 }
1269 } else {
1270 ifp = sip->if_to;
1271 if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1272 partial3 = TRUE;
1273 }
1274 }
1275 } else if (lastSlp->choice == SEQLOC_PNT && lastSlp->data.ptrvalue != NULL) {
1276 spp = (SeqPntPtr) lastSlp->data.ptrvalue;
1277 if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1278 ifp = spp->fuzz;
1279 if (ifp != NULL && ifp->choice == 4 && ifp->a == 2) {
1280 partial3 = TRUE;
1281 }
1282 } else {
1283 ifp = spp->fuzz;
1284 if (ifp != NULL && ifp->choice == 4 && ifp->a == 1) {
1285 partial3 = TRUE;
1286 }
1287 }
1288 ifp = spp->fuzz;
1289 if (ifp != NULL && ifp->choice == 4) {
1290 lim = ifp->a;
1291 }
1292 }
1293 }
1294 }
1295 if (p5ptr != NULL) {
1296 *p5ptr = partial5;
1297 }
1298 if (p3ptr != NULL) {
1299 *p3ptr = partial3;
1300 }
1301 if (limptr != NULL) {
1302 *limptr = lim;
1303 }
1304 return (Boolean) (partial5 || partial3 || lim == 3 || lim == 4);
1305 }
1306
CheckSeqLocForPartial(SeqLocPtr location,BoolPtr p5ptr,BoolPtr p3ptr)1307 NLM_EXTERN Boolean CheckSeqLocForPartial (SeqLocPtr location, BoolPtr p5ptr, BoolPtr p3ptr)
1308
1309 {
1310 return CheckSeqLocForPartialEx (location, p5ptr, p3ptr, NULL);
1311 }
1312
ConvertWholeToIntLoc(SeqLocPtr slp)1313 static void ConvertWholeToIntLoc (SeqLocPtr slp)
1314 {
1315 BioseqPtr bsp;
1316 SeqIntPtr sip;
1317
1318 if (slp == NULL || slp->choice != SEQLOC_WHOLE || slp->data.ptrvalue == NULL)
1319 {
1320 return;
1321 }
1322 bsp = BioseqFind (slp->data.ptrvalue);
1323 if (bsp == NULL)
1324 {
1325 return;
1326 }
1327
1328 sip = SeqIntNew ();
1329 if (sip != NULL)
1330 {
1331 sip->from = 0;
1332 sip->to = bsp->length - 1;
1333 sip->id = SeqIdDup (SeqIdFindBest (bsp->id, 0));
1334 sip->strand = bsp->strand;
1335 slp->data.ptrvalue = SeqIdFree (slp->data.ptrvalue);
1336 slp->data.ptrvalue = sip;
1337 slp->choice = SEQLOC_INT;
1338 }
1339 }
1340
SetSeqLocPartialEx(SeqLocPtr location,Boolean partial5,Boolean partial3,Int4 lim)1341 NLM_EXTERN void SetSeqLocPartialEx (SeqLocPtr location, Boolean partial5, Boolean partial3, Int4 lim)
1342
1343 {
1344 SeqLocPtr firstSlp;
1345 IntFuzzPtr ifp;
1346 SeqLocPtr lastSlp;
1347 SeqIntPtr sip;
1348 SeqLocPtr slp;
1349 SeqPntPtr spp;
1350
1351 if (location != NULL) {
1352 /* if whole, need to convert to int */
1353 if (partial5 || partial3)
1354 {
1355 ConvertWholeToIntLoc (location);
1356 }
1357
1358 firstSlp = NULL;
1359 lastSlp = NULL;
1360 slp = SeqLocFindNext (location, NULL);
1361 while (slp != NULL) {
1362 if (firstSlp == NULL) {
1363 firstSlp = slp;
1364 }
1365 lastSlp = slp;
1366 slp = SeqLocFindNext (location, slp);
1367 }
1368 if (firstSlp != NULL) {
1369 if (firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
1370 sip = (SeqIntPtr) firstSlp->data.ptrvalue;
1371 if (partial5) {
1372 ifp = IntFuzzNew ();
1373 if (ifp != NULL) {
1374 ifp->choice = 4;
1375 if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1376 sip->if_to = IntFuzzFree (sip->if_to);
1377 sip->if_to = ifp;
1378 ifp->a = 1;
1379 } else {
1380 sip->if_from = IntFuzzFree (sip->if_from);
1381 sip->if_from = ifp;
1382 ifp->a = 2;
1383 }
1384 }
1385 } else {
1386 if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1387 sip->if_to = IntFuzzFree (sip->if_to);
1388 } else {
1389 sip->if_from = IntFuzzFree (sip->if_from);
1390 }
1391 }
1392 } else if (firstSlp->choice == SEQLOC_PNT && firstSlp->data.ptrvalue != NULL) {
1393 spp = (SeqPntPtr) firstSlp->data.ptrvalue;
1394 if (partial5) {
1395 ifp = IntFuzzNew ();
1396 if (ifp != NULL) {
1397 ifp->choice = 4;
1398 if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1399 spp->fuzz = IntFuzzFree (spp->fuzz);
1400 spp->fuzz = ifp;
1401 ifp->a = 1;
1402 } else {
1403 spp->fuzz = IntFuzzFree (spp->fuzz);
1404 spp->fuzz = ifp;
1405 ifp->a = 2;
1406 }
1407 }
1408 } else if (lim == 3 || lim == 4) {
1409 ifp = IntFuzzNew ();
1410 if (ifp != NULL) {
1411 ifp->choice = 4;
1412 spp->fuzz = IntFuzzFree (spp->fuzz);
1413 spp->fuzz = ifp;
1414 ifp->a = lim;
1415 }
1416 } else {
1417 if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1418 spp->fuzz = IntFuzzFree (spp->fuzz);
1419 } else {
1420 spp->fuzz = IntFuzzFree (spp->fuzz);
1421 }
1422 }
1423 }
1424 }
1425 if (lastSlp != NULL) {
1426 if (lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
1427 sip = (SeqIntPtr) lastSlp->data.ptrvalue;
1428 if (partial3) {
1429 ifp = IntFuzzNew ();
1430 if (ifp != NULL) {
1431 ifp->choice = 4;
1432 if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1433 sip->if_from = IntFuzzFree (sip->if_from);
1434 sip->if_from = ifp;
1435 ifp->a = 2;
1436 } else {
1437 sip->if_to = IntFuzzFree (sip->if_to);
1438 sip->if_to = ifp;
1439 ifp->a = 1;
1440 }
1441 }
1442 } else {
1443 if (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev) {
1444 sip->if_from = IntFuzzFree (sip->if_from);
1445 } else {
1446 sip->if_to = IntFuzzFree (sip->if_to);
1447 }
1448 }
1449 } else if (lastSlp->choice == SEQLOC_PNT && lastSlp->data.ptrvalue != NULL) {
1450 spp = (SeqPntPtr) lastSlp->data.ptrvalue;
1451 if (partial3) {
1452 ifp = IntFuzzNew ();
1453 if (ifp != NULL) {
1454 ifp->choice = 4;
1455 if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1456 spp->fuzz = IntFuzzFree (spp->fuzz);
1457 spp->fuzz = ifp;
1458 ifp->a = 2;
1459 } else {
1460 spp->fuzz = IntFuzzFree (spp->fuzz);
1461 spp->fuzz = ifp;
1462 ifp->a = 1;
1463 }
1464 }
1465 } else if (lim == 3 || lim == 4) {
1466 ifp = IntFuzzNew ();
1467 if (ifp != NULL) {
1468 ifp->choice = 4;
1469 spp->fuzz = IntFuzzFree (spp->fuzz);
1470 spp->fuzz = ifp;
1471 ifp->a = lim;
1472 }
1473 } else {
1474 if (spp->strand == Seq_strand_minus || spp->strand == Seq_strand_both_rev) {
1475 spp->fuzz = IntFuzzFree (spp->fuzz);
1476 } else {
1477 spp->fuzz = IntFuzzFree (spp->fuzz);
1478 }
1479 }
1480 }
1481 }
1482 }
1483 }
1484
SetSeqLocPartial(SeqLocPtr location,Boolean partial5,Boolean partial3)1485 NLM_EXTERN void SetSeqLocPartial (SeqLocPtr location, Boolean partial5, Boolean partial3)
1486
1487 {
1488 SetSeqLocPartialEx (location, partial5, partial3, -1);
1489 }
1490
1491 //LCOV_EXCL_START
GetSeqLocPartialSet(SeqLocPtr location)1492 NLM_EXTERN ValNodePtr GetSeqLocPartialSet (SeqLocPtr location)
1493
1494 {
1495 ValNodePtr head = NULL, last = NULL, vnp;
1496 Int4 lim;
1497 Boolean noLeft;
1498 Boolean noRight;
1499 SeqLocPtr slp;
1500 Int4 val;
1501
1502 if (location == NULL) return NULL;
1503
1504 slp = SeqLocFindNext (location, NULL);
1505 while (slp != NULL) {
1506 CheckSeqLocForPartialEx (slp, &noLeft, &noRight, &lim);
1507 val = 0;
1508 if (noLeft) {
1509 val |= 2;
1510 }
1511 if (noRight) {
1512 val |= 1;
1513 }
1514 if (lim == 3) {
1515 val |= 4;
1516 } else if (lim == 4) {
1517 val |= 8;
1518 }
1519 vnp = ValNodeAddInt (&last, 0, val);
1520 if (head == NULL) {
1521 head = vnp;
1522 }
1523 last = vnp;
1524 slp = SeqLocFindNext (location, slp);
1525 }
1526
1527 return head;
1528 }
1529
SetSeqLocPartialSet(SeqLocPtr location,ValNodePtr vnp)1530 NLM_EXTERN void SetSeqLocPartialSet (SeqLocPtr location, ValNodePtr vnp)
1531
1532 {
1533 Int4 lim;
1534 Boolean noLeft;
1535 Boolean noRight;
1536 SeqLocPtr slp;
1537 Int4 val;
1538
1539 if (location == NULL || vnp == NULL) return;
1540
1541 slp = SeqLocFindNext (location, NULL);
1542 while (slp != NULL && vnp != NULL) {
1543 val = (Int4) vnp->data.intvalue;
1544 noLeft = (Boolean) ((val & 2) != 0);
1545 noRight = (Boolean) ((val & 1) != 0);
1546 lim = -1;
1547 if ((val & 4) != 0) {
1548 lim = 3;
1549 } else if ((val & 8) != 0) {
1550 lim = 4;
1551 }
1552 SetSeqLocPartialEx (slp, noLeft, noRight, lim);
1553 slp = SeqLocFindNext (location, slp);
1554 vnp = vnp->next;
1555 }
1556 }
1557
1558 /* KeyTag section */
1559
SortVnpByString(VoidPtr ptr1,VoidPtr ptr2)1560 NLM_EXTERN int LIBCALLBACK SortVnpByString (VoidPtr ptr1, VoidPtr ptr2)
1561
1562 {
1563 CharPtr str1;
1564 CharPtr str2;
1565 ValNodePtr vnp1;
1566 ValNodePtr vnp2;
1567
1568 if (ptr1 != NULL && ptr2 != NULL) {
1569 vnp1 = *((ValNodePtr PNTR) ptr1);
1570 vnp2 = *((ValNodePtr PNTR) ptr2);
1571 if (vnp1 != NULL && vnp2 != NULL) {
1572 str1 = (CharPtr) vnp1->data.ptrvalue;
1573 str2 = (CharPtr) vnp2->data.ptrvalue;
1574 if (str1 != NULL && str2 != NULL) {
1575 return StringICmp (str1, str2);
1576 }
1577 }
1578 }
1579 return 0;
1580 }
1581
SortVnpByStringCS(VoidPtr ptr1,VoidPtr ptr2)1582 NLM_EXTERN int LIBCALLBACK SortVnpByStringCS (VoidPtr ptr1, VoidPtr ptr2)
1583
1584 {
1585 CharPtr str1;
1586 CharPtr str2;
1587 ValNodePtr vnp1;
1588 ValNodePtr vnp2;
1589
1590 if (ptr1 != NULL && ptr2 != NULL) {
1591 vnp1 = *((ValNodePtr PNTR) ptr1);
1592 vnp2 = *((ValNodePtr PNTR) ptr2);
1593 if (vnp1 != NULL && vnp2 != NULL) {
1594 str1 = (CharPtr) vnp1->data.ptrvalue;
1595 str2 = (CharPtr) vnp2->data.ptrvalue;
1596 if (str1 != NULL && str2 != NULL) {
1597 return StringCmp (str1, str2);
1598 }
1599 }
1600 }
1601 return 0;
1602 }
1603
SortVnpByStringCI(VoidPtr ptr1,VoidPtr ptr2)1604 NLM_EXTERN int LIBCALLBACK SortVnpByStringCI (VoidPtr ptr1, VoidPtr ptr2)
1605
1606 {
1607 CharPtr str1;
1608 CharPtr str2;
1609 ValNodePtr vnp1;
1610 ValNodePtr vnp2;
1611
1612 if (ptr1 != NULL && ptr2 != NULL) {
1613 vnp1 = *((ValNodePtr PNTR) ptr1);
1614 vnp2 = *((ValNodePtr PNTR) ptr2);
1615 if (vnp1 != NULL && vnp2 != NULL) {
1616 str1 = (CharPtr) vnp1->data.ptrvalue;
1617 str2 = (CharPtr) vnp2->data.ptrvalue;
1618 if (str1 != NULL && str2 != NULL) {
1619 return StringCmp (str1, str2);
1620 }
1621 }
1622 }
1623 return 0;
1624 }
1625
SortVnpByStringCIUCFirst(VoidPtr ptr1,VoidPtr ptr2)1626 NLM_EXTERN int LIBCALLBACK SortVnpByStringCIUCFirst (VoidPtr ptr1, VoidPtr ptr2)
1627
1628 {
1629 int comp;
1630 CharPtr str1;
1631 CharPtr str2;
1632 ValNodePtr vnp1;
1633 ValNodePtr vnp2;
1634
1635 if (ptr1 != NULL && ptr2 != NULL) {
1636 vnp1 = *((ValNodePtr PNTR) ptr1);
1637 vnp2 = *((ValNodePtr PNTR) ptr2);
1638 if (vnp1 != NULL && vnp2 != NULL) {
1639 str1 = (CharPtr) vnp1->data.ptrvalue;
1640 str2 = (CharPtr) vnp2->data.ptrvalue;
1641 if (str1 != NULL && str2 != NULL) {
1642 comp = StringICmp (str1, str2);
1643 if (comp != 0) return comp;
1644 return StringCmp (str1, str2);
1645 }
1646 }
1647 }
1648 return 0;
1649 }
1650
SortVnpByStringCILCFirst(VoidPtr ptr1,VoidPtr ptr2)1651 NLM_EXTERN int LIBCALLBACK SortVnpByStringCILCFirst (VoidPtr ptr1, VoidPtr ptr2)
1652
1653 {
1654 int comp;
1655 CharPtr str1;
1656 CharPtr str2;
1657 ValNodePtr vnp1;
1658 ValNodePtr vnp2;
1659
1660 if (ptr1 != NULL && ptr2 != NULL) {
1661 vnp1 = *((ValNodePtr PNTR) ptr1);
1662 vnp2 = *((ValNodePtr PNTR) ptr2);
1663 if (vnp1 != NULL && vnp2 != NULL) {
1664 str1 = (CharPtr) vnp1->data.ptrvalue;
1665 str2 = (CharPtr) vnp2->data.ptrvalue;
1666 if (str1 != NULL && str2 != NULL) {
1667 comp = StringICmp (str1, str2);
1668 if (comp != 0) return comp;
1669 return StringCmp (str2, str1);
1670 }
1671 }
1672 }
1673 return 0;
1674 }
1675
SortVnpByNaturalCS(VoidPtr ptr1,VoidPtr ptr2)1676 NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCS (VoidPtr ptr1, VoidPtr ptr2)
1677
1678 {
1679 CharPtr str1, str2;
1680 ValNodePtr vnp1, vnp2;
1681
1682 if (ptr1 == NULL || ptr2 == NULL) return 0;
1683
1684 vnp1 = *((ValNodePtr PNTR) ptr1);
1685 vnp2 = *((ValNodePtr PNTR) ptr2);
1686 if (vnp1 == NULL || vnp2 == NULL) return 0;
1687
1688 str1 = (CharPtr) vnp1->data.ptrvalue;
1689 str2 = (CharPtr) vnp2->data.ptrvalue;
1690 if (str1 == NULL || str2 == NULL) return 0;
1691
1692 return NaturalStringCmp (str1, str2);
1693 }
1694
SortVnpByNaturalCI(VoidPtr ptr1,VoidPtr ptr2)1695 NLM_EXTERN int LIBCALLBACK SortVnpByNaturalCI (VoidPtr ptr1, VoidPtr ptr2)
1696
1697 {
1698 CharPtr str1, str2;
1699 ValNodePtr vnp1, vnp2;
1700
1701 if (ptr1 == NULL || ptr2 == NULL) return 0;
1702
1703 vnp1 = *((ValNodePtr PNTR) ptr1);
1704 vnp2 = *((ValNodePtr PNTR) ptr2);
1705 if (vnp1 == NULL || vnp2 == NULL) return 0;
1706
1707 str1 = (CharPtr) vnp1->data.ptrvalue;
1708 str2 = (CharPtr) vnp2->data.ptrvalue;
1709 if (str1 == NULL || str2 == NULL) return 0;
1710
1711 return NaturalStringICmp (str1, str2);
1712 }
1713 //LCOV_EXCL_STOP
1714
UniqueValNode(ValNodePtr list)1715 NLM_EXTERN ValNodePtr UniqueValNode (ValNodePtr list)
1716
1717 {
1718 CharPtr last;
1719 ValNodePtr next;
1720 Pointer PNTR prev;
1721 CharPtr str;
1722 ValNodePtr vnp;
1723
1724 if (list == NULL) return NULL;
1725 last = (CharPtr) list->data.ptrvalue;
1726 vnp = list->next;
1727 prev = (Pointer PNTR) &(list->next);
1728 while (vnp != NULL) {
1729 next = vnp->next;
1730 str = (CharPtr) vnp->data.ptrvalue;
1731 if (StringICmp (last, str) == 0) {
1732 vnp->next = NULL;
1733 *prev = next;
1734 ValNodeFreeData (vnp);
1735 } else {
1736 last = (CharPtr) vnp->data.ptrvalue;
1737 prev = (Pointer PNTR) &(vnp->next);
1738 }
1739 vnp = next;
1740 }
1741
1742 return list;
1743 }
1744
1745 //LCOV_EXCL_START
UniqueStringValNodeCS(ValNodePtr list)1746 NLM_EXTERN ValNodePtr UniqueStringValNodeCS (ValNodePtr list)
1747
1748 {
1749 CharPtr last;
1750 ValNodePtr next;
1751 Pointer PNTR prev;
1752 CharPtr str;
1753 ValNodePtr vnp;
1754
1755 if (list == NULL) return NULL;
1756 last = (CharPtr) list->data.ptrvalue;
1757 vnp = list->next;
1758 prev = (Pointer PNTR) &(list->next);
1759 while (vnp != NULL) {
1760 next = vnp->next;
1761 str = (CharPtr) vnp->data.ptrvalue;
1762 if (StringCmp (last, str) == 0) {
1763 vnp->next = NULL;
1764 *prev = next;
1765 ValNodeFreeData (vnp);
1766 } else {
1767 last = (CharPtr) vnp->data.ptrvalue;
1768 prev = (Pointer PNTR) &(vnp->next);
1769 }
1770 vnp = next;
1771 }
1772
1773 return list;
1774 }
1775
UniqueStringValNodeCI(ValNodePtr list)1776 NLM_EXTERN ValNodePtr UniqueStringValNodeCI (ValNodePtr list)
1777
1778 {
1779 CharPtr last;
1780 ValNodePtr next;
1781 Pointer PNTR prev;
1782 CharPtr str;
1783 ValNodePtr vnp;
1784
1785 if (list == NULL) return NULL;
1786 last = (CharPtr) list->data.ptrvalue;
1787 vnp = list->next;
1788 prev = (Pointer PNTR) &(list->next);
1789 while (vnp != NULL) {
1790 next = vnp->next;
1791 str = (CharPtr) vnp->data.ptrvalue;
1792 if (StringICmp (last, str) == 0) {
1793 vnp->next = NULL;
1794 *prev = next;
1795 ValNodeFreeData (vnp);
1796 } else {
1797 last = (CharPtr) vnp->data.ptrvalue;
1798 prev = (Pointer PNTR) &(vnp->next);
1799 }
1800 vnp = next;
1801 }
1802
1803 return list;
1804 }
1805
SortByChoice(VoidPtr ptr1,VoidPtr ptr2)1806 NLM_EXTERN int LIBCALLBACK SortByChoice (VoidPtr ptr1, VoidPtr ptr2)
1807
1808 {
1809 Uint1 chs1;
1810 Uint1 chs2;
1811 ValNodePtr vnp1;
1812 ValNodePtr vnp2;
1813
1814 if (ptr1 == NULL || ptr2 == NULL) return 0;
1815 vnp1 = *((ValNodePtr PNTR) ptr1);
1816 vnp2 = *((ValNodePtr PNTR) ptr2);
1817 if (vnp1 == NULL || vnp2 == NULL) return 0;
1818 chs1 = (Uint1) vnp1->choice;
1819 chs2 = (Uint1) vnp2->choice;
1820 if (chs1 > chs2) {
1821 return 1;
1822 } else if (chs1 < chs2) {
1823 return -1;
1824 }
1825 return 0;
1826 }
1827
SortByIntvalue(VoidPtr ptr1,VoidPtr ptr2)1828 NLM_EXTERN int LIBCALLBACK SortByIntvalue (VoidPtr ptr1, VoidPtr ptr2)
1829
1830 {
1831 Int4 val1;
1832 Int4 val2;
1833 ValNodePtr vnp1;
1834 ValNodePtr vnp2;
1835
1836 if (ptr1 == NULL || ptr2 == NULL) return 0;
1837 vnp1 = *((ValNodePtr PNTR) ptr1);
1838 vnp2 = *((ValNodePtr PNTR) ptr2);
1839 if (vnp1 == NULL || vnp2 == NULL) return 0;
1840 val1 = (Int4) vnp1->data.intvalue;
1841 val2 = (Int4) vnp2->data.intvalue;
1842 if (val1 > val2) {
1843 return 1;
1844 } else if (val1 < val2) {
1845 return -1;
1846 }
1847 return 0;
1848 }
1849
UniqueIntValNode(ValNodePtr list)1850 NLM_EXTERN ValNodePtr UniqueIntValNode (ValNodePtr list)
1851
1852 {
1853 Int4 curr, last;
1854 ValNodePtr next;
1855 Pointer PNTR prev;
1856 ValNodePtr vnp;
1857
1858 if (list == NULL) return NULL;
1859 last = (Int4) list->data.intvalue;
1860 vnp = list->next;
1861 prev = (Pointer PNTR) &(list->next);
1862 while (vnp != NULL) {
1863 next = vnp->next;
1864 curr = (Int4) vnp->data.intvalue;
1865 if (last == curr) {
1866 vnp->next = NULL;
1867 *prev = next;
1868 ValNodeFree (vnp);
1869 } else {
1870 last = (Int4) vnp->data.intvalue;
1871 prev = (Pointer PNTR) &(vnp->next);
1872 }
1873 vnp = next;
1874 }
1875
1876 return list;
1877 }
1878
SortByPtrvalue(VoidPtr ptr1,VoidPtr ptr2)1879 NLM_EXTERN int LIBCALLBACK SortByPtrvalue (VoidPtr ptr1, VoidPtr ptr2)
1880
1881 {
1882 VoidPtr val1;
1883 VoidPtr val2;
1884 ValNodePtr vnp1;
1885 ValNodePtr vnp2;
1886
1887 if (ptr1 == NULL || ptr2 == NULL) return 0;
1888 vnp1 = *((ValNodePtr PNTR) ptr1);
1889 vnp2 = *((ValNodePtr PNTR) ptr2);
1890 if (vnp1 == NULL || vnp2 == NULL) return 0;
1891 val1 = (VoidPtr) vnp1->data.ptrvalue;
1892 val2 = (VoidPtr) vnp2->data.ptrvalue;
1893 if (val1 > val2) {
1894 return 1;
1895 } else if (val1 < val2) {
1896 return -1;
1897 }
1898 return 0;
1899 }
1900
UniquePtrValNode(ValNodePtr list)1901 NLM_EXTERN ValNodePtr UniquePtrValNode (ValNodePtr list)
1902
1903 {
1904 VoidPtr curr, last;
1905 ValNodePtr next;
1906 Pointer PNTR prev;
1907 ValNodePtr vnp;
1908
1909 if (list == NULL) return NULL;
1910 last = (VoidPtr) list->data.ptrvalue;
1911 vnp = list->next;
1912 prev = (Pointer PNTR) &(list->next);
1913 while (vnp != NULL) {
1914 next = vnp->next;
1915 curr = (VoidPtr) vnp->data.ptrvalue;
1916 if (last == curr) {
1917 vnp->next = NULL;
1918 *prev = next;
1919 ValNodeFree (vnp);
1920 } else {
1921 last = (VoidPtr) vnp->data.ptrvalue;
1922 prev = (Pointer PNTR) &(vnp->next);
1923 }
1924 vnp = next;
1925 }
1926
1927 return list;
1928 }
1929
KeyTagInit(KeyTag PNTR ktp,ValNodePtr list)1930 NLM_EXTERN void KeyTagInit (KeyTag PNTR ktp, ValNodePtr list)
1931
1932 {
1933 Int2 i;
1934 CharPtr PNTR index;
1935 Int2 num;
1936 ValNodePtr vnp;
1937
1938 if (ktp == NULL || list == NULL) return;
1939 list = ValNodeSort (list, SortVnpByString);
1940 list = UniqueValNode (list);
1941 num = ValNodeLen (list);
1942 index = MemNew (sizeof (CharPtr) * (num + 1));
1943
1944 for (vnp = list, i = 0; vnp != NULL && i < num; vnp = vnp->next, i++) {
1945 index [i] = (CharPtr) vnp->data.ptrvalue;
1946 }
1947
1948 ktp->num = num;
1949 ktp->list = list;
1950 ktp->index = index;
1951 }
1952
KeyTagClear(KeyTag PNTR ktp)1953 NLM_EXTERN void KeyTagClear (KeyTag PNTR ktp)
1954
1955 {
1956 if (ktp == NULL) return;
1957 ktp->num = 0;
1958 ktp->list = ValNodeFreeData (ktp->list);
1959 ktp->index = MemFree (ktp->index);
1960 }
1961
KeyFromTag(KeyTag PNTR ktp,CharPtr tag)1962 NLM_EXTERN Int2 KeyFromTag (KeyTag PNTR ktp, CharPtr tag)
1963
1964 {
1965 Int2 L, R, mid, compare;
1966
1967 if (ktp == NULL || ktp->list == NULL || ktp->index == NULL) return 0;
1968 if (tag == NULL) return 0;
1969
1970 L = 0;
1971 R = ktp->num - 1;
1972 while (L < R) {
1973 mid = (L + R) / 2;
1974 compare = StringICmp (ktp->index [mid], tag);
1975 if (compare < 0) {
1976 L = mid + 1;
1977 } else {
1978 R = mid;
1979 }
1980 }
1981 if (StringICmp (ktp->index [R], tag) == 0) {
1982 return (R + 1);
1983 }
1984
1985 return 0;
1986 }
1987
TagFromKey(KeyTag PNTR ktp,Int2 key)1988 NLM_EXTERN CharPtr TagFromKey (KeyTag PNTR ktp, Int2 key)
1989
1990 {
1991 if (ktp == NULL || ktp->list == NULL || ktp->index == NULL) return 0;
1992 if (key < 1 || key > ktp->num) return 0;
1993 key--;
1994 return ktp->index [key];
1995 }
1996
1997 /* begin PromoteXrefs section */
1998
1999 typedef struct geneextendlist {
2000 GeneRefPtr grp;
2001 SeqLocPtr slp;
2002 ObjMgrPtr omp;
2003 Boolean rsult;
2004 Char label [41];
2005 } GeneExtendList, PNTR GeneExtendPtr;
2006
GeneExtendFunc(GatherContextPtr gcp)2007 static Boolean GeneExtendFunc (GatherContextPtr gcp)
2008
2009 {
2010 BioseqPtr bsp;
2011 GeneExtendPtr gep;
2012 GeneRefPtr grp;
2013 Boolean hasNulls;
2014 ObjMgrTypePtr omtp;
2015 SeqFeatPtr sfp;
2016 SeqLocPtr slp;
2017 Char thislabel [41];
2018
2019 if (gcp == NULL) return TRUE;
2020
2021 gep = (GeneExtendPtr) gcp->userdata;
2022 if (gep == NULL ) return TRUE;
2023
2024 thislabel [0] = '\0';
2025
2026 if (gcp->thistype == OBJ_SEQFEAT) {
2027 sfp = (SeqFeatPtr) gcp->thisitem;
2028 if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE && sfp->data.value.ptrvalue != NULL) {
2029 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
2030 omtp = ObjMgrTypeFind (gep->omp, gcp->thistype, NULL, NULL);
2031 if (omtp == NULL) {
2032 return TRUE;
2033 }
2034 if (omtp->labelfunc != NULL) {
2035 (*(omtp->labelfunc)) (gcp->thisitem, thislabel, 40, OM_LABEL_CONTENT);
2036 }
2037 if (thislabel [0] != '\0') {
2038 if (StringICmp (thislabel, gep->label) == 0) {
2039 if (SeqLocCompare (gep->slp, sfp->location) != SLC_NO_MATCH) {
2040 bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
2041 if (bsp != NULL) {
2042 slp = SeqLocMerge (bsp, sfp->location, gep->slp, TRUE, FALSE, FALSE);
2043 if (slp != NULL) {
2044 sfp->location = SeqLocFree (sfp->location);
2045 sfp->location = slp;
2046 if (bsp->repr == Seq_repr_seg) {
2047 slp = SegLocToPartsEx (bsp, sfp->location, TRUE);
2048 sfp->location = SeqLocFree (sfp->location);
2049 sfp->location = slp;
2050 hasNulls = LocationHasNullsBetween (sfp->location);
2051 sfp->partial = (sfp->partial || hasNulls);
2052 }
2053 FreeAllFuzz (slp);
2054 gep->rsult = TRUE;
2055 }
2056 }
2057 }
2058 return FALSE;
2059 }
2060 }
2061 }
2062 }
2063 return TRUE;
2064 }
2065
2066 /*
2067 static Boolean ExtendGene (GeneRefPtr grp, SeqEntryPtr nsep, SeqLocPtr slp)
2068
2069 {
2070 GeneExtendList gel;
2071 GatherScope gs;
2072 ObjMgrTypePtr omtp;
2073 SeqFeatPtr sfp;
2074
2075 if (grp == NULL || nsep == NULL || slp == NULL) return FALSE;
2076 gel.grp = grp;
2077 gel.slp = slp;
2078 gel.omp = ObjMgrGet ();
2079 gel.label [0] = '\0';
2080 gel.rsult = FALSE;
2081 omtp = ObjMgrTypeFind (gel.omp, OBJ_SEQFEAT, NULL, NULL);
2082 if (omtp != NULL && omtp->labelfunc != NULL) {
2083 sfp = SeqFeatNew ();
2084 if (sfp != NULL) {
2085 sfp->data.choice = SEQFEAT_GENE;
2086 sfp->data.value.ptrvalue = (Pointer) grp;
2087 (*(omtp->labelfunc)) ((Pointer) sfp, gel.label, 40, OM_LABEL_CONTENT);
2088 sfp->data.value.ptrvalue = NULL;
2089 SeqFeatFree (sfp);
2090 }
2091 }
2092 MemSet ((Pointer)(&gs), 0, sizeof (GatherScope));
2093 gs.seglevels = 1;
2094 gs.get_feats_location = TRUE;
2095 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
2096 gs.ignore[OBJ_BIOSEQ] = FALSE;
2097 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
2098 gs.ignore[OBJ_SEQFEAT] = FALSE;
2099 gs.ignore[OBJ_SEQANNOT] = FALSE;
2100 GatherSeqEntry (nsep, (Pointer) &gel, GeneExtendFunc, &gs);
2101 return gel.rsult;
2102 }
2103 */
2104
SetEmptyGeneticCodes(SeqAnnotPtr sap,Int2 genCode)2105 NLM_EXTERN void SetEmptyGeneticCodes (SeqAnnotPtr sap, Int2 genCode)
2106
2107 {
2108 CdRegionPtr crp;
2109 GeneticCodePtr gc;
2110 SeqFeatPtr sfp;
2111 ValNodePtr vnp;
2112
2113 if (sap == NULL || sap->type != 1) return;
2114 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
2115 if (sfp->data.choice == SEQFEAT_CDREGION) {
2116 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
2117 if (crp != NULL) {
2118 gc = crp->genetic_code;
2119 if (gc != NULL) {
2120 vnp = gc->data.ptrvalue;
2121 if (vnp != NULL && vnp->choice == 2) {
2122 vnp->data.intvalue = (Int4) genCode;
2123 /*
2124 if (vnp->data.intvalue == 0) {
2125 vnp->data.intvalue = (Int4) genCode;
2126 }
2127 */
2128 }
2129 }
2130 }
2131 }
2132 }
2133 }
2134
PromoteXrefsExEx(SeqFeatPtr sfp,BioseqPtr bsp,Uint2 entityID,Boolean include_stop,Boolean remove_trailingX,Boolean gen_prod_set,Boolean force_local_id,BoolPtr seq_fetch_failP)2135 NLM_EXTERN void PromoteXrefsExEx (
2136 SeqFeatPtr sfp,
2137 BioseqPtr bsp,
2138 Uint2 entityID,
2139 Boolean include_stop,
2140 Boolean remove_trailingX,
2141 Boolean gen_prod_set,
2142 Boolean force_local_id,
2143 BoolPtr seq_fetch_failP
2144 )
2145
2146 {
2147 Int2 adv;
2148 ByteStorePtr bs;
2149 BioseqSetPtr bssp;
2150 Char ch;
2151 CharPtr comment;
2152 CdRegionPtr crp;
2153 Int2 ctr = 1;
2154 ValNodePtr descr;
2155 SeqFeatPtr first;
2156 GBQualPtr gbq;
2157 Int4 i;
2158 Char id [128];
2159 SeqEntryPtr last;
2160 Char lcl [128];
2161 BioseqPtr mbsp;
2162 MolInfoPtr mip;
2163 SeqEntryPtr msep;
2164 SeqFeatXrefPtr next;
2165 GBQualPtr nextqual;
2166 SeqEntryPtr old;
2167 ObjMgrDataPtr omdptop;
2168 ObjMgrData omdata;
2169 Uint2 parenttype;
2170 Pointer parentptr;
2171 Boolean partial5;
2172 Boolean partial3;
2173 BioseqPtr pbsp;
2174 SeqFeatXrefPtr PNTR prev;
2175 GBQualPtr PNTR prevqual;
2176 SeqFeatPtr prot;
2177 CharPtr protseq;
2178 ProtRefPtr prp, prp2;
2179 SeqEntryPtr psep;
2180 CharPtr ptr;
2181 CharPtr rnaseq;
2182 SeqEntryPtr sep;
2183 SeqHistPtr shp;
2184 SeqIdPtr sip;
2185 SeqEntryPtr target = NULL;
2186 Uint4 version = 0;
2187 long int val;
2188 ValNodePtr vnp;
2189 SeqFeatXrefPtr xref;
2190 Boolean ok_to_remove;
2191 /*
2192 DbtagPtr dbt;
2193 SeqFeatPtr gene;
2194 GeneRefPtr grp;
2195 */
2196
2197 if (seq_fetch_failP != NULL) {
2198 *seq_fetch_failP = FALSE;
2199 }
2200
2201 if (sfp == NULL || bsp == NULL) return;
2202
2203 /* set subtypes, used to find mRNA features for genomic product sets */
2204
2205 first = sfp;
2206 while (sfp != NULL) {
2207 if (sfp->idx.subtype == 0) {
2208 sfp->idx.subtype = FindFeatDefType (sfp);
2209 }
2210 sfp = sfp->next;
2211 }
2212
2213 /* no longer expand genes specified by qualifiers on other features (except repeat_region) */
2214
2215 /*
2216 sfp = first;
2217 while (sfp != NULL) {
2218 prev = &(sfp->xref);
2219 xref = sfp->xref;
2220 while (xref != NULL) {
2221 next = xref->next;
2222 if (xref->data.choice == SEQFEAT_GENE &&
2223 sfp->data.choice != SEQFEAT_GENE &&
2224 sfp->idx.subtype != FEATDEF_repeat_region) {
2225 grp = (GeneRefPtr) xref->data.value.ptrvalue;
2226 if (grp != NULL && SeqMgrGeneIsSuppressed (grp)) {
2227 } else {
2228 xref->data.value.ptrvalue = NULL;
2229 if (grp != NULL) {
2230 sep = SeqMgrGetSeqEntryForData (bsp);
2231 if (ExtendGene (grp, sep, sfp->location)) {
2232 GeneRefFree (grp);
2233 } else {
2234 gene = CreateNewFeature (sep, NULL, SEQFEAT_GENE, NULL);
2235 if (gene != NULL) {
2236 gene->data.value.ptrvalue = (Pointer) grp;
2237 gene->location = SeqLocFree (gene->location);
2238 gene->location = AsnIoMemCopy (sfp->location,
2239 (AsnReadFunc) SeqLocAsnRead,
2240 (AsnWriteFunc) SeqLocAsnWrite);
2241 for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
2242 dbt = (DbtagPtr) vnp->data.ptrvalue;
2243 if (dbt == NULL) continue;
2244 ValNodeAddPointer (&(gene->dbxref), 0, (Pointer) DbtagDup (dbt));
2245 }
2246 }
2247 }
2248 }
2249 *(prev) = next;
2250 xref->next = NULL;
2251 xref->data.choice = 0;
2252 SeqFeatXrefFree (xref);
2253 }
2254 } else {
2255 prev = &(xref->next);
2256 }
2257 xref = next;
2258 }
2259 sfp = sfp->next;
2260 }
2261 */
2262
2263 /* expand mRNA features into cDNA product sequences */
2264
2265 bssp = NULL;
2266 sep = NULL;
2267 last = NULL;
2268 if (gen_prod_set) {
2269 sep = GetTopSeqEntryForEntityID (entityID);
2270 if (IS_Bioseq_set (sep)) {
2271 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2272 if (bssp != NULL && bssp->seq_set != NULL) {
2273 last = bssp->seq_set;
2274 while (last->next != NULL) {
2275 last = last->next;
2276 }
2277 }
2278 }
2279 }
2280
2281 if (gen_prod_set && sep != NULL && bssp != NULL && last != NULL) {
2282 target = sep;
2283 SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
2284 GetSeqEntryParent (target, &parentptr, &parenttype);
2285 sfp = first;
2286 while (sfp != NULL) {
2287 if (sfp->data.choice == SEQFEAT_RNA &&
2288 /* sfp->idx.subtype != FEATDEF_tRNA && */
2289 sfp->product == NULL && (! sfp->pseudo)) {
2290 gbq = sfp->qual;
2291 prevqual = (GBQualPtr PNTR) &(sfp->qual);
2292 id [0] = '\0';
2293 sip = NULL;
2294 comment = NULL;
2295 while (gbq != NULL) {
2296 nextqual = gbq->next;
2297 if (StringICmp (gbq->qual, "transcript_id") == 0) {
2298 if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2299 ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2300 "RNA transcript_id %s replacing %s", gbq->val, id);
2301 }
2302 *(prevqual) = gbq->next;
2303 gbq->next = NULL;
2304 StringNCpy_0 (id, gbq->val, sizeof (id));
2305 GBQualFree (gbq);
2306 } else if (StringICmp (gbq->qual, "comment") == 0 &&
2307 StringDoesHaveText (gbq->val)) {
2308 *(prevqual) = gbq->next;
2309 gbq->next = NULL;
2310 comment = StringSave (gbq->val);
2311 GBQualFree (gbq);
2312 } else {
2313 prevqual = (GBQualPtr PNTR) &(gbq->next);
2314 }
2315 gbq = nextqual;
2316 }
2317 if (! StringHasNoText (id)) {
2318 if (StringChr (id, '|') != NULL) {
2319 sip = SeqIdParse (id);
2320 } else if (force_local_id) {
2321 sprintf (lcl, "lcl|%s", id);
2322 sip = SeqIdParse (lcl);
2323 } else {
2324 adv = ValidateAccnDotVer (id);
2325 if (adv == 0 || adv == -5) {
2326 ptr = StringChr (id, '.');
2327 if (ptr != NULL) {
2328 *ptr = '\0';
2329 ptr++;
2330 if (sscanf (ptr, "%ld", &val) == 1) {
2331 version = (Uint4) val;
2332 }
2333 }
2334 sip = SeqIdFromAccession (id, version, NULL);
2335 } else {
2336 sprintf (lcl, "lcl|%s", id);
2337 sip = SeqIdParse (lcl);
2338 }
2339 }
2340 }
2341 if (sip != NULL || sfp->idx.subtype == FEATDEF_mRNA) {
2342 rnaseq = GetSequenceByFeature (sfp);
2343 if (rnaseq == NULL && seq_fetch_failP != NULL) {
2344 *seq_fetch_failP = TRUE;
2345 }
2346 if (rnaseq != NULL) {
2347 i = (Int4) StringLen (rnaseq);
2348 bs = BSNew (i + 2);
2349 if (bs != NULL) {
2350 BSWrite (bs, (VoidPtr) rnaseq, (Int4) StringLen (rnaseq));
2351 mbsp = BioseqNew ();
2352 if (mbsp != NULL) {
2353 mbsp->repr = Seq_repr_raw;
2354 mbsp->mol = Seq_mol_rna;
2355 mbsp->seq_data_type = Seq_code_iupacna;
2356 mbsp->seq_data = (SeqDataPtr) bs;
2357 mbsp->length = BSLen (bs);
2358 BioseqPack (mbsp);
2359 bs = NULL;
2360 /*
2361 sep = GetTopSeqEntryForEntityID (entityID);
2362 */
2363 old = SeqEntrySetScope (sep);
2364 if (sip != NULL) {
2365 mbsp->id = sip;
2366 } else if (sfp->idx.subtype == FEATDEF_mRNA) {
2367 /* actually just making rapid unique ID for mRNA */
2368 mbsp->id = MakeNewProteinSeqIdEx (sfp->location, NULL, NULL, &ctr);
2369 }
2370 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
2371 SeqMgrAddToBioseqIndex (mbsp);
2372 SeqEntrySetScope (old);
2373 msep = SeqEntryNew ();
2374 if (msep != NULL) {
2375 msep->choice = 1;
2376 msep->data.ptrvalue = (Pointer) mbsp;
2377 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) mbsp, msep);
2378 mip = MolInfoNew ();
2379 if (mip != NULL) {
2380 switch (sfp->idx.subtype) {
2381 case FEATDEF_preRNA :
2382 mip->biomol = MOLECULE_TYPE_PRE_MRNA;
2383 break;
2384 case FEATDEF_mRNA :
2385 mip->biomol = MOLECULE_TYPE_MRNA;
2386 break;
2387 case FEATDEF_tRNA :
2388 mip->biomol = MOLECULE_TYPE_TRNA;
2389 break;
2390 case FEATDEF_rRNA :
2391 mip->biomol = MOLECULE_TYPE_RRNA;
2392 break;
2393 case FEATDEF_snRNA :
2394 mip->biomol = MOLECULE_TYPE_SNRNA;
2395 break;
2396 case FEATDEF_scRNA :
2397 mip->biomol = MOLECULE_TYPE_SCRNA;
2398 break;
2399 case FEATDEF_otherRNA :
2400 mip->biomol = MOLECULE_TYPE_TRANSCRIBED_RNA;
2401 break;
2402 case FEATDEF_snoRNA :
2403 mip->biomol = MOLECULE_TYPE_SNORNA;
2404 break;
2405 case FEATDEF_ncRNA :
2406 mip->biomol = MOLECULE_TYPE_NCRNA;
2407 break;
2408 case FEATDEF_tmRNA :
2409 mip->biomol = MOLECULE_TYPE_TMRNA;
2410 break;
2411 default :
2412 mip->biomol = 0;
2413 break;
2414 }
2415 if (partial5 && partial3) {
2416 mip->completeness = 5;
2417 } else if (partial5) {
2418 mip->completeness = 3;
2419 } else if (partial3) {
2420 mip->completeness = 4;
2421 }
2422 vnp = CreateNewDescriptor (msep, Seq_descr_molinfo);
2423 if (vnp != NULL) {
2424 vnp->data.ptrvalue = (Pointer) mip;
2425 }
2426 }
2427 if (comment != NULL) {
2428 vnp = CreateNewDescriptor (msep, Seq_descr_comment);
2429 if (vnp != NULL) {
2430 vnp->data.ptrvalue = (Pointer) comment;
2431 }
2432 }
2433 /* add mRNA sequence to genomic product set */
2434 last->next = msep;
2435 last = msep;
2436 SetSeqFeatProduct (sfp, mbsp);
2437 }
2438 }
2439 }
2440 rnaseq = MemFree (rnaseq);
2441 }
2442 }
2443 }
2444 sfp = sfp->next;
2445 }
2446 SeqMgrLinkSeqEntry (target, parenttype, parentptr);
2447 RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
2448 }
2449
2450 /* expand coding region features into protein product sequences */
2451
2452 last = NULL;
2453 sfp = first;
2454 while (sfp != NULL) {
2455 prev = &(sfp->xref);
2456 xref = sfp->xref;
2457 while (xref != NULL) {
2458 next = xref->next;
2459 if (xref->data.choice == SEQFEAT_PROT &&
2460 sfp->data.choice == SEQFEAT_CDREGION &&
2461 sfp->product == NULL && (! sfp->pseudo)) {
2462 prp = (ProtRefPtr) xref->data.value.ptrvalue;
2463 ok_to_remove = TRUE;
2464 if (prp != NULL) {
2465 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
2466 if (crp != NULL) {
2467 /**
2468 crp->frame = 0;
2469 **/
2470 bs = ProteinFromCdRegionEx (sfp, include_stop, remove_trailingX);
2471 if (bs == NULL && seq_fetch_failP != NULL) {
2472 *seq_fetch_failP = TRUE;
2473 }
2474 if (bs != NULL) {
2475 protseq = BSMerge (bs, NULL);
2476 bs = BSFree (bs);
2477 if (protseq != NULL) {
2478 ptr = protseq;
2479 ch = *ptr;
2480 while (ch != '\0') {
2481 *ptr = TO_UPPER (ch);
2482 ptr++;
2483 ch = *ptr;
2484 }
2485 i = (Int4) StringLen (protseq);
2486 if (i > 0 && protseq [i - 1] == '*') {
2487 protseq [i - 1] = '\0';
2488 }
2489 bs = BSNew (i + 2);
2490 if (bs != NULL) {
2491 ptr = protseq;
2492 /*
2493 if (protseq [0] == '-') {
2494 ptr++;
2495 }
2496 */
2497 BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr));
2498 }
2499 protseq = MemFree (protseq);
2500 }
2501 pbsp = BioseqNew ();
2502 if (pbsp != NULL) {
2503 pbsp->repr = Seq_repr_raw;
2504 pbsp->mol = Seq_mol_aa;
2505 pbsp->seq_data_type = Seq_code_ncbieaa;
2506 pbsp->seq_data = (SeqDataPtr) bs;
2507 pbsp->length = BSLen (bs);
2508 bs = NULL;
2509 sep = NULL;
2510 mbsp = NULL;
2511 if (gen_prod_set) {
2512 gbq = sfp->qual;
2513 prevqual = (GBQualPtr PNTR) &(sfp->qual);
2514 id [0] = '\0';
2515 sip = NULL;
2516 while (gbq != NULL) {
2517 nextqual = gbq->next;
2518 if (StringICmp (gbq->qual, "transcript_id") == 0) {
2519 if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2520 ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2521 "CDS transcript_id %s replacing %s", gbq->val, id);
2522 }
2523 *(prevqual) = gbq->next;
2524 gbq->next = NULL;
2525 StringNCpy_0 (id, gbq->val, sizeof (id));
2526 GBQualFree (gbq);
2527 } else if (StringICmp (gbq->qual, "secondary_accession") == 0) {
2528 *(prevqual) = gbq->next;
2529 gbq->next = NULL;
2530 shp = ParseStringIntoSeqHist (NULL, gbq->val);
2531 if (shp != NULL) {
2532 pbsp->hist = shp;
2533 }
2534 GBQualFree (gbq);
2535 } else {
2536 prevqual = (GBQualPtr PNTR) &(gbq->next);
2537 }
2538 gbq = nextqual;
2539 }
2540 if (StringHasNoText (id)) {
2541 Message (MSG_POSTERR, "No transcript_id on CDS - unable to create nuc-prot set");
2542 } else {
2543 if (StringChr (id, '|') != NULL) {
2544 sip = SeqIdParse (id);
2545 } else if (force_local_id) {
2546 sprintf (lcl, "lcl|%s", id);
2547 sip = SeqIdParse (lcl);
2548 } else {
2549 adv = ValidateAccnDotVer (id);
2550 if (adv == 0 || adv == -5) {
2551 ptr = StringChr (id, '.');
2552 if (ptr != NULL) {
2553 *ptr = '\0';
2554 ptr++;
2555 if (sscanf (ptr, "%ld", &val) == 1) {
2556 version = (Uint4) val;
2557 }
2558 }
2559 sip = SeqIdFromAccession (id, version, NULL);
2560 } else {
2561 sprintf (lcl, "lcl|%s", id);
2562 sip = SeqIdParse (lcl);
2563 }
2564 }
2565 }
2566 mbsp = BioseqFind (sip);
2567 SeqIdFree (sip);
2568 if (mbsp != NULL) {
2569 sep = SeqMgrGetSeqEntryForData (mbsp);
2570 /*
2571 } else {
2572 sep = GetBestTopParentForDataEx (entityID, bsp, TRUE);
2573 */
2574 }
2575 } else {
2576 sep = GetBestTopParentForData (entityID, bsp);
2577 }
2578 if (sep == NULL) {
2579 Message (MSG_POSTERR, "No location for nuc-prot set for CDS - unable to create nuc-prot set");
2580 pbsp = BioseqFree (pbsp);
2581 ok_to_remove = FALSE;
2582 } else {
2583 old = SeqEntrySetScope (sep);
2584 gbq = sfp->qual;
2585 prevqual = (GBQualPtr PNTR) &(sfp->qual);
2586 id [0] = '\0';
2587 sip = NULL;
2588 while (gbq != NULL) {
2589 nextqual = gbq->next;
2590 if (StringICmp (gbq->qual, "protein_id") == 0) {
2591 if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2592 ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2593 "CDS protein_id %s replacing %s", gbq->val, id);
2594 }
2595 *(prevqual) = gbq->next;
2596 gbq->next = NULL;
2597 StringNCpy_0 (id, gbq->val, sizeof (id));
2598 GBQualFree (gbq);
2599 } else {
2600 prevqual = (GBQualPtr PNTR) &(gbq->next);
2601 }
2602 gbq = nextqual;
2603 }
2604 if (! StringHasNoText (id)) {
2605 if (StringChr (id, '|') != NULL) {
2606 sip = SeqIdParse (id);
2607 } else if (force_local_id) {
2608 sprintf (lcl, "lcl|%s", id);
2609 sip = SeqIdParse (lcl);
2610 } else {
2611 adv = ValidateAccnDotVer (id);
2612 if (adv == 0 || adv == -5) {
2613 ptr = StringChr (id, '.');
2614 if (ptr != NULL) {
2615 *ptr = '\0';
2616 ptr++;
2617 if (sscanf (ptr, "%ld", &val) == 1) {
2618 version = (Uint4) val;
2619 }
2620 }
2621 sip = SeqIdFromAccession (id, version, NULL);
2622 } else {
2623 sprintf (lcl, "lcl|%s", id);
2624 sip = SeqIdParse (lcl);
2625 }
2626 }
2627 }
2628 if (sip != NULL) {
2629 pbsp->id = sip;
2630 } else {
2631 pbsp->id = MakeNewProteinSeqIdEx (sfp->location, NULL, NULL, &ctr);
2632 }
2633 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
2634 SeqMgrAddToBioseqIndex (pbsp);
2635 SeqEntrySetScope (old);
2636 psep = SeqEntryNew ();
2637 if (psep != NULL) {
2638 psep->choice = 1;
2639 psep->data.ptrvalue = (Pointer) pbsp;
2640 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) pbsp, psep);
2641 mip = MolInfoNew ();
2642 if (mip != NULL) {
2643 mip->biomol = 8;
2644 mip->tech = 8;
2645 if (partial5 && partial3) {
2646 mip->completeness = 5;
2647 } else if (partial5) {
2648 mip->completeness = 3;
2649 } else if (partial3) {
2650 mip->completeness = 4;
2651 }
2652 vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
2653 if (vnp != NULL) {
2654 vnp->data.ptrvalue = (Pointer) mip;
2655 }
2656 }
2657 /* the first protein may change the set/seq structure,
2658 so goes through AddSeqEntryToSeqEntry */
2659
2660 if (gen_prod_set || last == NULL) {
2661 descr = ExtractBioSourceAndPubs (sep);
2662 AddSeqEntryToSeqEntry (sep, psep, TRUE);
2663 ReplaceBioSourceAndPubs (sep, descr);
2664 last = psep;
2665 } else {
2666 last->next = psep;
2667 last = psep;
2668 }
2669 if (target == NULL) {
2670 target = sep;
2671 SaveSeqEntryObjMgrData (target, &omdptop, &omdata);
2672 GetSeqEntryParent (target, &parentptr, &parenttype);
2673 }
2674 SetSeqFeatProduct (sfp, pbsp);
2675 psep = SeqMgrGetSeqEntryForData (pbsp);
2676 if (psep != NULL) {
2677 last = psep;
2678 prot = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
2679 if (prot != NULL) {
2680 prot->data.value.ptrvalue = (Pointer) prp;
2681 SetSeqLocPartial (prot->location, partial5, partial3);
2682 prot->partial = (Boolean) (partial5 || partial3);
2683 }
2684 }
2685 }
2686 }
2687 }
2688 }
2689 }
2690 }
2691 if (ok_to_remove) {
2692 xref->data.value.ptrvalue = NULL;
2693 *(prev) = next;
2694 xref->next = NULL;
2695 xref->data.choice = 0;
2696 SeqFeatXrefFree (xref);
2697 } else {
2698 prev = &(xref->next);
2699 }
2700 } else {
2701 prev = &(xref->next);
2702 }
2703 xref = next;
2704 }
2705 sfp = sfp->next;
2706 }
2707
2708 /* expand mat_peptide features with protein_id qualifiers into protein product sequences */
2709
2710 last = NULL;
2711 sfp = first;
2712 while (sfp != NULL) {
2713 if (sfp->data.choice == SEQFEAT_PROT && sfp->product == NULL) {
2714 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
2715 gbq = sfp->qual;
2716 prevqual = (GBQualPtr PNTR) &(sfp->qual);
2717 id [0] = '\0';
2718 sip = NULL;
2719 while (gbq != NULL) {
2720 nextqual = gbq->next;
2721 if (StringICmp (gbq->qual, "protein_id") == 0) {
2722 if (StringDoesHaveText (id) && StringDoesHaveText (gbq->val)) {
2723 ErrPostEx (SEV_WARNING, ERR_FEATURE_QualWrongThisFeat,
2724 "Protein protein_id %s replacing %s",
2725 gbq->val, id);
2726 }
2727 *(prevqual) = gbq->next;
2728 gbq->next = NULL;
2729 StringNCpy_0 (id, gbq->val, sizeof (id));
2730 GBQualFree (gbq);
2731 } else {
2732 prevqual = (GBQualPtr PNTR) &(gbq->next);
2733 }
2734 gbq = nextqual;
2735 }
2736 if (! StringHasNoText (id)) {
2737 if (StringChr (id, '|') != NULL) {
2738 sip = SeqIdParse (id);
2739 } else if (force_local_id) {
2740 sprintf (lcl, "lcl|%s", id);
2741 sip = SeqIdParse (lcl);
2742 } else {
2743 adv = ValidateAccnDotVer (id);
2744 if (adv == 0 || adv == -5) {
2745 ptr = StringChr (id, '.');
2746 if (ptr != NULL) {
2747 *ptr = '\0';
2748 ptr++;
2749 if (sscanf (ptr, "%ld", &val) == 1) {
2750 version = (Uint4) val;
2751 }
2752 }
2753 sip = SeqIdFromAccession (id, version, NULL);
2754 } else {
2755 sprintf (lcl, "lcl|%s", id);
2756 sip = SeqIdParse (lcl);
2757 }
2758 }
2759 }
2760 if (sip != NULL) {
2761 protseq = GetSequenceByFeature (sfp);
2762 if (protseq == NULL && seq_fetch_failP != NULL) {
2763 *seq_fetch_failP = TRUE;
2764 }
2765 if (protseq != NULL) {
2766 i = (Int4) StringLen (protseq);
2767 bs = BSNew (i + 2);
2768 if (bs != NULL) {
2769 BSWrite (bs, (VoidPtr) protseq, (Int4) StringLen (protseq));
2770 pbsp = BioseqNew ();
2771 if (pbsp != NULL) {
2772 pbsp->repr = Seq_repr_raw;
2773 pbsp->mol = Seq_mol_aa;
2774 pbsp->seq_data_type = Seq_code_ncbieaa;
2775 pbsp->seq_data = (SeqDataPtr) bs;
2776 pbsp->length = BSLen (bs);
2777 bs = NULL;
2778 /*
2779 sep = GetTopSeqEntryForEntityID (entityID);
2780 */
2781 sep = GetBestTopParentForData (entityID, bsp);
2782 old = SeqEntrySetScope (sep);
2783 if (sip != NULL) {
2784 pbsp->id = sip;
2785 } else {
2786 pbsp->id = MakeNewProteinSeqIdEx (sfp->location, NULL, NULL, &ctr);
2787 }
2788 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
2789 SeqMgrAddToBioseqIndex (pbsp);
2790 SeqEntrySetScope (old);
2791 psep = SeqEntryNew ();
2792 if (psep != NULL) {
2793 psep->choice = 1;
2794 psep->data.ptrvalue = (Pointer) pbsp;
2795 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) pbsp, psep);
2796 mip = MolInfoNew ();
2797 if (mip != NULL) {
2798 mip->biomol = MOLECULE_TYPE_PEPTIDE;
2799 if (partial5 && partial3) {
2800 mip->completeness = 5;
2801 } else if (partial5) {
2802 mip->completeness = 3;
2803 } else if (partial3) {
2804 mip->completeness = 4;
2805 }
2806 vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
2807 if (vnp != NULL) {
2808 vnp->data.ptrvalue = (Pointer) mip;
2809 }
2810 }
2811 if (last == NULL) {
2812 AddSeqEntryToSeqEntry (sep, psep, TRUE);
2813 last = psep;
2814 } else {
2815 last->next = psep;
2816 last = psep;
2817 }
2818 SetSeqFeatProduct (sfp, pbsp);
2819 if (prp != NULL) {
2820 prp2 = AsnIoMemCopy ((Pointer) prp,
2821 (AsnReadFunc) ProtRefAsnRead,
2822 (AsnWriteFunc) ProtRefAsnWrite);
2823 if (prp2 != NULL) {
2824 psep = SeqMgrGetSeqEntryForData (pbsp);
2825 if (psep != NULL) {
2826 prot = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
2827 if (prot != NULL) {
2828 prot->data.value.ptrvalue = prp2;
2829 SetSeqLocPartial (prot->location, partial5, partial3);
2830 prot->partial = (Boolean) (partial5 || partial3);
2831 }
2832 }
2833 }
2834 }
2835 }
2836 }
2837 }
2838 protseq = MemFree (protseq);
2839 }
2840 }
2841 }
2842 sfp = sfp->next;
2843 }
2844
2845 if (target != NULL) {
2846 SeqMgrLinkSeqEntry (target, parenttype, parentptr);
2847 RestoreSeqEntryObjMgrData (target, omdptop, &omdata);
2848 }
2849 }
2850
PromoteXrefsEx(SeqFeatPtr sfp,BioseqPtr bsp,Uint2 entityID,Boolean include_stop,Boolean remove_trailingX,Boolean gen_prod_set)2851 NLM_EXTERN void PromoteXrefsEx (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID, Boolean include_stop,
2852 Boolean remove_trailingX, Boolean gen_prod_set)
2853
2854 {
2855 PromoteXrefsExEx (sfp, bsp, entityID, include_stop, remove_trailingX, gen_prod_set, FALSE, NULL);
2856 }
2857
PromoteXrefs(SeqFeatPtr sfp,BioseqPtr bsp,Uint2 entityID)2858 NLM_EXTERN void PromoteXrefs (SeqFeatPtr sfp, BioseqPtr bsp, Uint2 entityID)
2859
2860 {
2861 PromoteXrefsExEx (sfp, bsp, entityID, TRUE, FALSE, FALSE, FALSE, NULL);
2862 }
2863 //LCOV_EXCL_STOP
2864
2865 /* begin BasicSeqEntryCleanup section */
2866
HasNoText(CharPtr str)2867 static Boolean HasNoText (CharPtr str)
2868
2869 {
2870 Uchar ch; /* to use 8bit characters in multibyte languages */
2871
2872 if (str != NULL) {
2873 ch = *str;
2874 while (ch != '\0') {
2875 if (ch > ' ') {
2876 return FALSE;
2877 }
2878 str++;
2879 ch = *str;
2880 }
2881 }
2882 return TRUE;
2883 }
2884
AlreadyInVnpList(ValNodePtr head,ValNodePtr curr)2885 static Boolean AlreadyInVnpList (ValNodePtr head, ValNodePtr curr)
2886
2887 {
2888 if (head == NULL || curr == NULL) return FALSE;
2889 /* since we cannot sort these lists, must check against all previous entries */
2890 while (head != curr && head != NULL) {
2891 if (StringICmp (head->data.ptrvalue, curr->data.ptrvalue) == 0) return TRUE;
2892 head = head->next;
2893 }
2894 return FALSE;
2895 }
2896
2897 //LCOV_EXCL_START
TrimSpacesAndSemicolons(CharPtr str)2898 NLM_EXTERN CharPtr TrimSpacesAndSemicolons (CharPtr str)
2899
2900 {
2901 CharPtr amp;
2902 Uchar ch; /* to use 8bit characters in multibyte languages */
2903 CharPtr dst;
2904 CharPtr ptr;
2905
2906 if (str != NULL && str [0] != '\0') {
2907 dst = str;
2908 ptr = str;
2909 ch = *ptr;
2910 if (ch != '\0' && (ch <= ' ' || ch == ';')) {
2911 while (ch != '\0' && (ch <= ' ' || ch == ';')) {
2912 ptr++;
2913 ch = *ptr;
2914 }
2915 while (ch != '\0') {
2916 *dst = ch;
2917 dst++;
2918 ptr++;
2919 ch = *ptr;
2920 }
2921 *dst = '\0';
2922 }
2923 amp = NULL;
2924 dst = NULL;
2925 ptr = str;
2926 ch = *ptr;
2927 while (ch != '\0') {
2928 if (ch == '&') {
2929 amp = ptr;
2930 dst = NULL;
2931 } else if (ch <= ' ') {
2932 if (dst == NULL) {
2933 dst = ptr;
2934 }
2935 amp = NULL;
2936 } else if (ch == ';') {
2937 if (dst == NULL && amp == NULL) {
2938 dst = ptr;
2939 }
2940 } else {
2941 dst = NULL;
2942 }
2943 ptr++;
2944 ch = *ptr;
2945 }
2946 if (dst != NULL) {
2947 *dst = '\0';
2948 }
2949 }
2950 return str;
2951 }
2952 //LCOV_EXCL_STOP
2953
TrimSpacesAndJunkFromEnds(CharPtr str,Boolean allowEllipsis)2954 NLM_EXTERN CharPtr TrimSpacesAndJunkFromEnds (
2955 CharPtr str,
2956 Boolean allowEllipsis
2957 )
2958
2959 {
2960 Uchar ch; /* to use 8bit characters in multibyte languages */
2961 CharPtr dst;
2962 Boolean isPeriod;
2963 Boolean isTilde;
2964 CharPtr ptr;
2965
2966 if (str != NULL && str [0] != '\0') {
2967 dst = str;
2968 ptr = str;
2969 ch = *ptr;
2970 if (ch != '\0' && (ch <= ' ' || ch == ',' || ch == ';')) {
2971 while (ch != '\0' && (ch <= ' ' || ch == ',' || ch == ';')) {
2972 ptr++;
2973 ch = *ptr;
2974 }
2975 while (ch != '\0') {
2976 *dst = ch;
2977 dst++;
2978 ptr++;
2979 ch = *ptr;
2980 }
2981 *dst = '\0';
2982 }
2983 dst = NULL;
2984 ptr = str;
2985 ch = *ptr;
2986 isPeriod = FALSE;
2987 isTilde = FALSE;
2988 while (ch != '\0') {
2989 if (ch <= ' ' || ch == '.' || ch == ',' || ch == '~' || ch == ';') {
2990 if (dst == NULL) {
2991 dst = ptr;
2992 }
2993 isPeriod = (Boolean) (isPeriod || ch == '.');
2994 isTilde = (Boolean) (isTilde || ch == '~');
2995 } else {
2996 dst = NULL;
2997 isPeriod = FALSE;
2998 isTilde = FALSE;
2999 }
3000 ptr++;
3001 ch = *ptr;
3002 }
3003 if (dst != NULL) {
3004 /* allow one period at end */
3005 if (isPeriod) {
3006 *dst = '.';
3007 dst++;
3008 /* ellipsis are now okay */
3009 if (allowEllipsis && *dst == '.' && dst [1] == '.') {
3010 dst += 2;
3011 }
3012 } else if (isTilde) {
3013 /* allow double tilde at end */
3014 if (*dst == '~' && dst [1] == '~') {
3015 dst += 2;
3016 }
3017 }
3018 *dst = '\0';
3019 }
3020 }
3021 return str;
3022 }
3023
TrimSpacesSemicolonsAndCommas(CharPtr str)3024 static CharPtr TrimSpacesSemicolonsAndCommas (CharPtr str)
3025
3026 {
3027 CharPtr amp;
3028 Uchar ch; /* to use 8bit characters in multibyte languages */
3029 CharPtr dst;
3030 CharPtr ptr;
3031
3032 if (str != NULL && str [0] != '\0') {
3033 dst = str;
3034 ptr = str;
3035 ch = *ptr;
3036 if (ch != '\0' && (ch <= ' ' || ch == ';' || ch == ',')) {
3037 while (ch != '\0' && (ch <= ' ' || ch == ';' || ch == ',')) {
3038 ptr++;
3039 ch = *ptr;
3040 }
3041 while (ch != '\0') {
3042 *dst = ch;
3043 dst++;
3044 ptr++;
3045 ch = *ptr;
3046 }
3047 *dst = '\0';
3048 }
3049 amp = NULL;
3050 dst = NULL;
3051 ptr = str;
3052 ch = *ptr;
3053 while (ch != '\0') {
3054 if (ch == '&') {
3055 amp = ptr;
3056 dst = NULL;
3057 } else if (ch <= ' ') {
3058 if (dst == NULL) {
3059 dst = ptr;
3060 }
3061 amp = NULL;
3062 } else if (ch == ';') {
3063 if (dst == NULL && amp == NULL) {
3064 dst = ptr;
3065 }
3066 } else if (ch == ',') {
3067 if (dst == NULL) {
3068 dst = ptr;
3069 }
3070 amp = NULL;
3071 } else {
3072 dst = NULL;
3073 }
3074 ptr++;
3075 ch = *ptr;
3076 }
3077 if (dst != NULL) {
3078 *dst = '\0';
3079 }
3080 }
3081 return str;
3082 }
3083
TrimFlankingQuotes(CharPtr str)3084 static CharPtr TrimFlankingQuotes (CharPtr str)
3085
3086 {
3087 size_t len;
3088
3089 if (str != NULL && str [0] != '\0') {
3090 len = StringLen (str);
3091 while (len > 0) {
3092 if (str [0] == '"' && str [len - 1] == '"') {
3093 str [0] = ' ';
3094 str [len - 1] = ' ';
3095 } else if (str [0] == '\'' && str [len - 1] == '\'') {
3096 str [0] = ' ';
3097 str [len - 1] = ' ';
3098 } else {
3099 return str;
3100 }
3101 TrimSpacesAroundString (str);
3102 len = StringLen (str);
3103 }
3104 }
3105 return str;
3106 }
3107
RemoveFlankingQuotes(CharPtr PNTR strp)3108 static void RemoveFlankingQuotes (CharPtr PNTR strp)
3109
3110 {
3111 if (strp == NULL) return;
3112 if (*strp == NULL) return;
3113 TrimFlankingQuotes (*strp);
3114 if (HasNoText (*strp)) {
3115 *strp = MemFree (*strp);
3116 }
3117 }
3118
RemoveFlankingQuotesList(ValNodePtr PNTR vnpp)3119 static void RemoveFlankingQuotesList (ValNodePtr PNTR vnpp)
3120
3121 {
3122 ValNodePtr next;
3123 ValNodePtr PNTR prev;
3124 ValNodePtr vnp;
3125
3126 if (vnpp == NULL) return;
3127 prev = vnpp;
3128 vnp = *vnpp;
3129 while (vnp != NULL) {
3130 next = vnp->next;
3131 TrimFlankingQuotes (vnp->data.ptrvalue);
3132 if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3133 *prev = vnp->next;
3134 vnp->next = NULL;
3135 ValNodeFreeData (vnp);
3136 } else {
3137 prev = &(vnp->next);
3138 }
3139 vnp = next;
3140 }
3141 }
3142
CleanVisString(CharPtr PNTR strp)3143 static void CleanVisString (CharPtr PNTR strp)
3144
3145 {
3146 if (strp == NULL) return;
3147 if (*strp == NULL) return;
3148 TrimSpacesSemicolonsAndCommas (*strp);
3149 if (HasNoText (*strp)) {
3150 *strp = MemFree (*strp);
3151 }
3152 }
3153
CleanVisStringAndCompress(CharPtr PNTR strp)3154 static void CleanVisStringAndCompress (CharPtr PNTR strp)
3155
3156 {
3157 if (strp == NULL) return;
3158 if (*strp == NULL) return;
3159 TrimSpacesSemicolonsAndCommas (*strp);
3160 Asn2gnbkCompressSpaces (*strp);
3161 if (HasNoText (*strp)) {
3162 *strp = MemFree (*strp);
3163 }
3164 }
3165
CleanVisStringJunk(CharPtr PNTR strp)3166 static void CleanVisStringJunk (CharPtr PNTR strp)
3167
3168 {
3169 if (strp == NULL) return;
3170 if (*strp == NULL) return;
3171 TrimSpacesAndJunkFromEnds (*strp, TRUE);
3172 if (HasNoText (*strp)) {
3173 *strp = MemFree (*strp);
3174 }
3175 }
3176
CleanVisStringJunkAndCompress(CharPtr PNTR strp)3177 static void CleanVisStringJunkAndCompress (CharPtr PNTR strp)
3178
3179 {
3180 if (strp == NULL) return;
3181 if (*strp == NULL) return;
3182 TrimSpacesAndJunkFromEnds (*strp, TRUE);
3183 Asn2gnbkCompressSpaces (*strp);
3184 if (HasNoText (*strp)) {
3185 *strp = MemFree (*strp);
3186 }
3187 }
3188
CleanDoubleQuote(CharPtr str)3189 static void CleanDoubleQuote (CharPtr str)
3190
3191 {
3192 Char ch;
3193
3194 if (str == NULL) return;
3195 ch = *str;
3196 while (ch != '\0') {
3197 if (ch == '"') {
3198 *str = '\'';
3199 }
3200 str++;
3201 ch = *str;
3202 }
3203 }
3204
RemoveSpacesBetweenTildes(CharPtr str)3205 static CharPtr RemoveSpacesBetweenTildes (CharPtr str)
3206
3207 {
3208 Char ch;
3209 CharPtr dst;
3210 CharPtr ptr;
3211 CharPtr tmp;
3212
3213 if (str == NULL || str [0] == '\0') return str;
3214
3215 dst = str;
3216 ptr = str;
3217 ch = *ptr;
3218 while (ch != '\0') {
3219 *dst = ch;
3220 dst++;
3221 ptr++;
3222 if (ch == '~') {
3223 tmp = ptr;
3224 ch = *tmp;
3225 while (ch != 0 && ch <= ' ') {
3226 tmp++;
3227 ch = *tmp;
3228 }
3229 if (ch == '~') {
3230 ptr = tmp;
3231 }
3232 }
3233 ch = *ptr;
3234 }
3235 *dst = '\0';
3236
3237 return str;
3238 }
3239
CleanVisStringList(ValNodePtr PNTR vnpp)3240 static void CleanVisStringList (ValNodePtr PNTR vnpp)
3241
3242 {
3243 ValNodePtr next;
3244 ValNodePtr PNTR prev;
3245 ValNodePtr vnp;
3246
3247 if (vnpp == NULL) return;
3248 prev = vnpp;
3249 vnp = *vnpp;
3250 while (vnp != NULL) {
3251 next = vnp->next;
3252 TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3253 if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3254 *prev = vnp->next;
3255 vnp->next = NULL;
3256 ValNodeFreeData (vnp);
3257 } else {
3258 prev = &(vnp->next);
3259 }
3260 vnp = next;
3261 }
3262 }
3263
CleanVisStringJunkListAndCompress(ValNodePtr PNTR vnpp)3264 static void CleanVisStringJunkListAndCompress (ValNodePtr PNTR vnpp)
3265
3266 {
3267 ValNodePtr next;
3268 ValNodePtr PNTR prev;
3269 ValNodePtr vnp;
3270
3271 if (vnpp == NULL) return;
3272 prev = vnpp;
3273 vnp = *vnpp;
3274 while (vnp != NULL) {
3275 next = vnp->next;
3276 TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3277 TrimSpacesAndJunkFromEnds (vnp->data.ptrvalue, TRUE);
3278 Asn2gnbkCompressSpaces (vnp->data.ptrvalue);
3279 if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3280 *prev = vnp->next;
3281 vnp->next = NULL;
3282 ValNodeFreeData (vnp);
3283 } else {
3284 prev = &(vnp->next);
3285 }
3286 vnp = next;
3287 }
3288 }
3289
CleanVisStringListAndCompress(ValNodePtr PNTR vnpp)3290 static void CleanVisStringListAndCompress (ValNodePtr PNTR vnpp)
3291
3292 {
3293 ValNodePtr next;
3294 ValNodePtr PNTR prev;
3295 ValNodePtr vnp;
3296
3297 if (vnpp == NULL) return;
3298 prev = vnpp;
3299 vnp = *vnpp;
3300 while (vnp != NULL) {
3301 next = vnp->next;
3302 TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3303 Asn2gnbkCompressSpaces (vnp->data.ptrvalue);
3304 if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpList (*vnpp, vnp)) {
3305 *prev = vnp->next;
3306 vnp->next = NULL;
3307 ValNodeFreeData (vnp);
3308 } else {
3309 prev = &(vnp->next);
3310 }
3311 vnp = next;
3312 }
3313 }
3314
AlreadyInVnpListCaseSensitive(ValNodePtr head,ValNodePtr curr)3315 static Boolean AlreadyInVnpListCaseSensitive (ValNodePtr head, ValNodePtr curr)
3316
3317 {
3318 if (head == NULL || curr == NULL) return FALSE;
3319 /* since we cannot sort these lists, must check against all previous entries */
3320 while (head != curr && head != NULL) {
3321 if (StringCmp (head->data.ptrvalue, curr->data.ptrvalue) == 0) return TRUE;
3322 head = head->next;
3323 }
3324 return FALSE;
3325 }
3326
CleanVisStringListCaseSensitive(ValNodePtr PNTR vnpp)3327 static void CleanVisStringListCaseSensitive (ValNodePtr PNTR vnpp)
3328
3329 {
3330 ValNodePtr next;
3331 ValNodePtr PNTR prev;
3332 ValNodePtr vnp;
3333
3334 if (vnpp == NULL) return;
3335 prev = vnpp;
3336 vnp = *vnpp;
3337 while (vnp != NULL) {
3338 next = vnp->next;
3339 TrimSpacesSemicolonsAndCommas (vnp->data.ptrvalue);
3340 if (HasNoText (vnp->data.ptrvalue) || AlreadyInVnpListCaseSensitive (*vnpp, vnp)) {
3341 *prev = vnp->next;
3342 vnp->next = NULL;
3343 ValNodeFreeData (vnp);
3344 } else {
3345 prev = &(vnp->next);
3346 }
3347 vnp = next;
3348 }
3349 }
3350
CleanDoubleQuoteList(ValNodePtr vnp)3351 static void CleanDoubleQuoteList (ValNodePtr vnp)
3352
3353 {
3354 while (vnp != NULL) {
3355 CleanDoubleQuote ((CharPtr) vnp->data.ptrvalue);
3356 vnp = vnp->next;
3357 }
3358 }
3359
HandledGBQualOnGene(SeqFeatPtr sfp,GBQualPtr gbq)3360 static Boolean HandledGBQualOnGene (SeqFeatPtr sfp, GBQualPtr gbq)
3361
3362 {
3363 Int2 choice = 0;
3364 GeneRefPtr grp;
3365
3366 if (StringICmp (gbq->qual, "map") == 0) {
3367 choice = 2;
3368 } else if (StringICmp (gbq->qual, "allele") == 0) {
3369 choice = 3;
3370 } else if (StringICmp (gbq->qual, "locus_tag") == 0) {
3371 choice = 4;
3372 } else if (StringICmp (gbq->qual, "old_locus_tag") == 0) {
3373 choice = 5;
3374 } else if (StringICmp (gbq->qual, "gene_synonym") == 0) {
3375 choice = 6;
3376 }
3377 if (choice > 0) {
3378 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
3379 if (grp == NULL) return FALSE;
3380 switch (choice) {
3381 case 2 :
3382 if (grp->maploc != NULL) return FALSE;
3383 if (StringHasNoText (gbq->val)) return FALSE;
3384 grp->maploc = StringSave (gbq->val);
3385 break;
3386 case 3 :
3387 if (StringHasNoText (gbq->val)) return FALSE;
3388 if (grp->allele != NULL) {
3389 if (StringICmp (gbq->val, grp->allele) == 0) return TRUE;
3390 return FALSE;
3391 }
3392 grp->allele = StringSave (gbq->val);
3393 break;
3394 case 4 :
3395 if (grp->locus_tag != NULL) return FALSE;
3396 if (StringHasNoText (gbq->val)) return FALSE;
3397 grp->locus_tag = StringSave (gbq->val);
3398 break;
3399 case 5 :
3400 /* removed by indexer request */
3401 /* if (StringHasNoText (gbq->val)) return FALSE;
3402 * if (grp->locus_tag != NULL) {
3403 * if (StringICmp (gbq->val, grp->locus_tag) == 0) return TRUE;
3404 * return FALSE;
3405 * }
3406 */
3407 return FALSE;
3408 break;
3409 case 6 :
3410 if (StringHasNoText (gbq->val)) return FALSE;
3411 ValNodeCopyStr (&(grp->syn), 0, gbq->val);
3412 default :
3413 break;
3414 }
3415 return TRUE;
3416 }
3417 return FALSE;
3418 }
3419
3420 /* code break parser functions from the flatfile parser */
3421
GetQualValueAa(CharPtr qval)3422 static Uint1 GetQualValueAa (CharPtr qval)
3423
3424 {
3425 CharPtr str, eptr, ptr;
3426 Uint1 aa;
3427
3428 str = StringStr(qval, "aa:");
3429 if (str != NULL) {
3430 str += 3;
3431 } else {
3432 ErrPostEx (SEV_WARNING, ERR_QUALIFIER_InvalidDataFormat,
3433 "bad transl_except %s", qval);
3434 str = StringStr(qval, ",");
3435 if (str != NULL) {
3436 str = StringStr(str, ":");
3437 if (str != NULL) {
3438 str++;
3439 }
3440 }
3441 }
3442
3443 if (str == NULL) return (Uint1) 'X';
3444
3445 while (*str == ' ')
3446 ++str;
3447 for (eptr = str; *eptr != ')' && *eptr != ' ' && *eptr != '\0'; eptr++) continue;
3448
3449 ptr = TextSave(str, eptr-str);
3450 aa = ValidAminoAcid(ptr);
3451 MemFree(ptr);
3452
3453 return (aa);
3454 }
3455
SimpleValuePos(CharPtr qval)3456 static CharPtr SimpleValuePos (CharPtr qval)
3457
3458 {
3459 CharPtr bptr, eptr;
3460
3461 if ((bptr = StringStr(qval, "(pos:")) == NULL) {
3462 return NULL;
3463 }
3464
3465 bptr += 5;
3466 while (*bptr == ' ')
3467 ++bptr;
3468 eptr = StringStr (bptr, ",aa:");
3469 if (eptr == NULL) {
3470 for (eptr = bptr; *eptr != ',' && *eptr != '\0'; eptr++) continue;
3471 }
3472 if (eptr == NULL) return NULL;
3473
3474 return (TextSave(bptr, eptr-bptr));
3475 }
3476
3477 //LCOV_EXCL_START
3478 extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset);
ParseAnticodon(SeqFeatPtr sfp,CharPtr val,Int4 offset)3479 extern Boolean ParseAnticodon (SeqFeatPtr sfp, CharPtr val, Int4 offset)
3480
3481 {
3482 Int4 diff;
3483 Int2 j;
3484 Boolean locmap;
3485 int num_errs;
3486 CharPtr pos;
3487 Boolean pos_range = FALSE;
3488 RnaRefPtr rrp;
3489 SeqIntPtr sintp;
3490 SeqIdPtr sip;
3491 Boolean sitesmap;
3492 SeqLocPtr slp;
3493 SeqPntPtr spp;
3494 Uint1 strand;
3495 Int4 temp;
3496 tRNAPtr trp;
3497
3498 if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return FALSE;
3499 if (StringHasNoText (val)) return FALSE;
3500
3501 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
3502 if (rrp == NULL) return FALSE;
3503
3504 if (rrp->ext.choice == 0 && rrp->ext.value.ptrvalue == NULL) {
3505 rrp->ext.choice = 2;
3506 trp = (tRNAPtr) MemNew (sizeof (tRNA));
3507 rrp->ext.value.ptrvalue = (Pointer) trp;
3508 if (trp != NULL) {
3509 trp->aatype = 2;
3510 for (j = 0; j < 6; j++) {
3511 trp->codon [j] = 255;
3512 }
3513 }
3514 }
3515 if (rrp->ext.choice != 2) return FALSE;
3516
3517 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
3518 if (trp == NULL) return FALSE;
3519
3520 /* find SeqId to use */
3521 sip = SeqLocId (sfp->location);
3522 if (sip == NULL) {
3523 slp = SeqLocFindNext (sfp->location, NULL);
3524 if (slp != NULL) {
3525 sip = SeqLocId (slp);
3526 }
3527 }
3528 if (sip == NULL) return FALSE;
3529
3530 /* parse location */
3531 pos = SimpleValuePos (val);
3532 if (pos == NULL) {
3533 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3534 "anticodon parsing failed, %s, drop the anticodon", val);
3535 return FALSE;
3536 }
3537
3538 trp->anticodon = Nlm_gbparseint (pos, &locmap, &sitesmap, &num_errs, sip);
3539 if (trp->anticodon == NULL) {
3540 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3541 "anticodon parsing failed, %s, drop the anticodon", pos);
3542 MemFree (pos);
3543 return FALSE;
3544 }
3545
3546 if (trp->anticodon->choice == SEQLOC_PNT) {
3547 /* allow a single point */
3548 spp = trp->anticodon->data.ptrvalue;
3549 if (spp != NULL) {
3550 spp->point += offset;
3551 }
3552 }
3553 if (trp->anticodon->choice == SEQLOC_INT) {
3554 sintp = trp->anticodon->data.ptrvalue;
3555 if (sintp == NULL) {
3556 MemFree (pos);
3557 return FALSE;
3558 }
3559 sintp->from += offset;
3560 sintp->to += offset;
3561 if (sintp->from > sintp->to) {
3562 temp = sintp->from;
3563 sintp->from = sintp->to;
3564 sintp->to = temp;
3565 }
3566 sintp->strand = SeqLocStrand (sfp->location);
3567 strand = sintp->strand;
3568 diff = SeqLocStop(trp->anticodon) - SeqLocStart(trp->anticodon); /* SeqLocStop/Start does not do what you think */
3569 /*
3570 if ((diff != 2 && (strand != Seq_strand_minus)) ||
3571 (diff != -2 && (strand == Seq_strand_minus))) {
3572 pos_range = TRUE;
3573 }
3574 */
3575 if (diff != 2) {
3576 pos_range = TRUE;
3577 }
3578 if (num_errs > 0 || pos_range) {
3579 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3580 "anticodon range is wrong, %s, drop the anticodon", pos);
3581 MemFree (pos);
3582 return FALSE;
3583 }
3584 if (SeqLocCompare (sfp->location, trp->anticodon) != SLC_B_IN_A) {
3585 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3586 "/anticodon not in tRNA: %s", val);
3587 MemFree (pos);
3588 return FALSE;
3589 }
3590 }
3591
3592 MemFree (pos);
3593
3594 return TRUE;
3595 }
3596 //LCOV_EXCL_STOP
3597
3598 extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset);
ParseCodeBreak(SeqFeatPtr sfp,CharPtr val,Int4 offset)3599 extern Boolean ParseCodeBreak (SeqFeatPtr sfp, CharPtr val, Int4 offset)
3600
3601 {
3602 CodeBreakPtr cbp;
3603 CdRegionPtr crp;
3604 Int4 diff;
3605 CodeBreakPtr lastcbp;
3606 Boolean locmap;
3607 int num_errs;
3608 Boolean packed_int = TRUE;
3609 CharPtr pos;
3610 Boolean pos_range = FALSE;
3611 SeqIntPtr sintp;
3612 SeqIdPtr sip;
3613 Boolean sitesmap;
3614 SeqLocPtr slp;
3615 SeqLocPtr slp1, slp2;
3616 SeqPntPtr spp;
3617 Uint1 strand;
3618 Int4 temp;
3619 CharPtr tmp;
3620
3621 if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE;
3622 if (StringHasNoText (val)) return FALSE;
3623 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
3624 if (crp == NULL) return FALSE;
3625
3626 /* find SeqId to use */
3627 sip = SeqLocId (sfp->location);
3628 if (sip == NULL) {
3629 slp = SeqLocFindNext (sfp->location, NULL);
3630 if (slp != NULL) {
3631 sip = SeqLocId (slp);
3632 }
3633 }
3634 if (sip == NULL) return FALSE;
3635
3636 cbp = CodeBreakNew ();
3637 if (cbp == NULL) return FALSE;
3638 cbp->aa.choice = 1; /* ncbieaa */
3639 cbp->aa.value.intvalue = (Int4) GetQualValueAa (val);
3640
3641 /* parse location */
3642 pos = SimpleValuePos (val);
3643 if (pos == NULL) {
3644 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3645 "transl_except parsing failed, %s, drop the transl_except", val);
3646 return FALSE;
3647 }
3648 if (StringChr (pos, ',') != NULL) {
3649 tmp = (CharPtr) MemNew ((StringLen (pos) + 10) * sizeof (Char));
3650 if (tmp != NULL) {
3651 sprintf (tmp, "join(%s)", pos);
3652 MemFree (pos);
3653 pos = tmp;
3654 }
3655 }
3656 cbp->loc = Nlm_gbparseint (pos, &locmap, &sitesmap, &num_errs, sip);
3657 if (cbp->loc == NULL) {
3658 CodeBreakFree (cbp);
3659 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3660 "transl_except parsing failed, %s, drop the transl_except", pos);
3661 MemFree (pos);
3662 return FALSE;
3663 }
3664 if (cbp->loc->choice == SEQLOC_PNT) {
3665 /* allow a single point */
3666 spp = cbp->loc->data.ptrvalue;
3667 if (spp != NULL) {
3668 spp->point += offset;
3669 }
3670 } else if (cbp->loc->choice == SEQLOC_INT) {
3671 sintp = cbp->loc->data.ptrvalue;
3672 if (sintp == NULL) {
3673 MemFree (pos);
3674 return FALSE;
3675 }
3676 sintp->from += offset;
3677 sintp->to += offset;
3678 if (sintp->from > sintp->to) {
3679 temp = sintp->from;
3680 sintp->from = sintp->to;
3681 sintp->to = temp;
3682 }
3683 sintp->strand = SeqLocStrand (sfp->location);
3684 strand = sintp->strand;
3685 diff = SeqLocStop(cbp->loc) - SeqLocStart(cbp->loc); /* SeqLocStop/Start does not do what you think */
3686 /*
3687 if ((diff != 2 && (strand != Seq_strand_minus)) ||
3688 (diff != -2 && (strand == Seq_strand_minus))) {
3689 pos_range = TRUE;
3690 }
3691 */
3692 if (diff != 2) {
3693 pos_range = TRUE;
3694 }
3695 if (num_errs > 0 || pos_range) {
3696 CodeBreakFree (cbp);
3697 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3698 "transl_except range is wrong, %s, drop the transl_except", pos);
3699 MemFree (pos);
3700 return FALSE;
3701 }
3702 if (SeqLocCompare (sfp->location, cbp->loc) != SLC_B_IN_A) {
3703 CodeBreakFree (cbp);
3704 ErrPostEx (SEV_WARNING, ERR_FEATURE_LocationParsing,
3705 "/transl_except not in CDS: %s", val);
3706 MemFree (pos);
3707 return FALSE;
3708 }
3709 } else {
3710 slp1 = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE);
3711 if (slp1 != NULL) {
3712 slp2 = aaLoc_to_dnaLoc (sfp, slp1);
3713 if (slp2 != NULL) {
3714 SeqLocFree (cbp->loc);
3715 cbp->loc = slp2;
3716 }
3717 SeqLocFree (slp1);
3718 }
3719 slp = SeqLocFindNext (cbp->loc, NULL);
3720 while (slp != NULL) {
3721 if (slp->choice == SEQLOC_PNT) {
3722 spp = slp->data.ptrvalue;
3723 if (spp != NULL) {
3724 sintp = SeqIntNew();
3725 if (sintp != NULL) {
3726 sintp->id = SeqIdDup (spp->id);
3727 sintp->from = spp->point;
3728 sintp->to = spp->point;
3729 sintp->strand = SeqLocStrand (sfp->location);
3730 slp->choice = SEQLOC_INT;
3731 slp->data.ptrvalue = sintp;
3732 SeqPntFree (spp);
3733 }
3734 }
3735 }
3736 if (slp->choice == SEQLOC_INT) {
3737 sintp = slp->data.ptrvalue;
3738 if (sintp == NULL) {
3739 MemFree (pos);
3740 return FALSE;
3741 }
3742 sintp->from += offset;
3743 sintp->to += offset;
3744 if (sintp->from > sintp->to) {
3745 temp = sintp->from;
3746 sintp->from = sintp->to;
3747 sintp->to = temp;
3748 }
3749 sintp->strand = SeqLocStrand (sfp->location);
3750 } else {
3751 packed_int = FALSE;
3752 }
3753 slp = SeqLocFindNext (cbp->loc, slp);
3754 }
3755 slp = cbp->loc;
3756 if (packed_int && slp->choice == SEQLOC_MIX) {
3757 slp->choice = SEQLOC_PACKED_INT;
3758 }
3759 }
3760
3761 /* add to code break list */
3762 lastcbp = crp->code_break;
3763 if (lastcbp == NULL) {
3764 crp->code_break = cbp;
3765 } else {
3766 while (lastcbp->next != NULL) {
3767 lastcbp = lastcbp->next;
3768 }
3769 lastcbp->next = cbp;
3770 }
3771 MemFree (pos);
3772 return TRUE;
3773 }
3774
CodonsAlreadyInOrder(tRNAPtr trp)3775 static Boolean CodonsAlreadyInOrder (tRNAPtr trp)
3776
3777 {
3778 Int2 i, j;
3779
3780 if (trp == NULL) return TRUE;
3781 for (i = 0, j = 1; i < 5; i++, j++) {
3782 if (trp->codon [i] > trp->codon [j]) return FALSE;
3783 }
3784 return TRUE;
3785 }
3786
SortCodons(VoidPtr ptr1,VoidPtr ptr2)3787 static int LIBCALLBACK SortCodons (VoidPtr ptr1, VoidPtr ptr2)
3788
3789 {
3790 Uint1 codon1, codon2;
3791
3792 if (ptr1 == NULL || ptr2 == NULL) return 0;
3793 codon1 = *((Uint1Ptr) ptr1);
3794 codon2 = *((Uint1Ptr) ptr2);
3795 if (codon1 > codon2) {
3796 return 1;
3797 } else if (codon1 < codon2) {
3798 return -1;
3799 }
3800 return 0;
3801 }
3802
UniqueCodons(tRNAPtr trp)3803 static void UniqueCodons (tRNAPtr trp)
3804
3805 {
3806 Int2 i, j;
3807 Uint1 last = 255, next;
3808
3809 if (trp == NULL) return;
3810
3811 for (i = 0, j = 0; i < 6; i++) {
3812 next = trp->codon [i];
3813 if (next != last) {
3814 trp->codon [j] = next;
3815 last = next;
3816 j++;
3817 }
3818 }
3819 while (j < 6) {
3820 trp->codon [j] = 255;
3821 j++;
3822 }
3823 }
3824
3825 static CharPtr codonLetterExpand [] =
3826 {
3827 "?", "A", "C", "AC",
3828 "G", "AG", "CG", "ACG",
3829 "T", "AT", "CT", "ACT",
3830 "GT", "AGT", "CGT", "ACGT",
3831 NULL
3832 };
3833
ParseDegenerateCodon(tRNAPtr trp,Uint1Ptr codon)3834 NLM_EXTERN Boolean ParseDegenerateCodon (tRNAPtr trp, Uint1Ptr codon)
3835
3836 {
3837 Uint1 ch;
3838 Uint1 chrToInt [256];
3839 Int2 k;
3840 Uint1 i, j;
3841 Uint1 idx;
3842 CharPtr intToChr = "?ACMGRSVTWYHKDBN";
3843 CharPtr ptr, str;
3844
3845 if (trp == NULL || codon == NULL) return FALSE;
3846
3847 for (i = 0; i < 2; i++) {
3848 ch = codon [i];
3849 if (ch != 'A' && ch != 'C' && ch != 'G' && ch != 'T') return FALSE;
3850 }
3851
3852 for (k = 0; k < 256; k++) {
3853 chrToInt [k] = 0;
3854 }
3855 for (i = 1; i < 16; i++) {
3856 ch = intToChr [i];
3857 chrToInt [(int) ch] = i;
3858 }
3859
3860 idx = chrToInt [(int) codon [2]];
3861 if (idx > 15) return FALSE;
3862
3863 str = codonLetterExpand [idx];
3864 ptr = str;
3865 ch = *ptr;
3866 j = 0;
3867 codon [3] = '\0';
3868 while (ch != '\0' && j < 6) {
3869 codon [2] = ch;
3870 trp->codon [j] = IndexForCodon (codon, Seq_code_iupacna);
3871 ptr++;
3872 ch = *ptr;
3873 j++;
3874 }
3875
3876 return TRUE;
3877 }
3878
CleanupTrna(SeqFeatPtr sfp,tRNAPtr trp)3879 static void CleanupTrna (SeqFeatPtr sfp, tRNAPtr trp)
3880
3881 {
3882 Uint1 aa = 0;
3883 Uint1 curraa;
3884 Uint1 from = 0;
3885 Int2 j;
3886 Boolean justTrnaText;
3887 SeqMapTablePtr smtp;
3888 Uint1 trpcodon [6];
3889 /*
3890 Char codon [16];
3891 Int2 i;
3892 Boolean okayToFree = TRUE;
3893 CharPtr str;
3894 */
3895
3896 /* look for tRNA-OTHER with actual amino acid in comment */
3897
3898 if (trp == NULL) return;
3899
3900 /*
3901 if (sfp != NULL && sfp->comment != NULL && trp->codon [0] == 255) {
3902 codon [0] = '\0';
3903 if (StringNICmp (sfp->comment, "codon recognized: ", 18) == 0) {
3904 StringNCpy_0 (codon, sfp->comment + 18, sizeof (codon));
3905 } else if (StringNICmp (sfp->comment, "codons recognized: ", 19) == 0) {
3906 StringNCpy_0 (codon, sfp->comment + 19, sizeof (codon));
3907 }
3908 if (StringDoesHaveText (codon)) {
3909 if (StringLen (codon) > 3 && codon [3] == ';') {
3910 codon [3] = '\0';
3911 okayToFree = FALSE;
3912 }
3913 if (StringLen (codon) == 3) {
3914 for (i = 0; i < 3; i++) {
3915 if (codon [i] == 'U') {
3916 codon [i] = 'T';
3917 }
3918 }
3919 if (ParseDegenerateCodon (trp, (Uint1Ptr) codon)) {
3920 if (okayToFree) {
3921 sfp->comment = MemFree (sfp->comment);
3922 } else {
3923 str = StringSave (sfp->comment + 22);
3924 TrimSpacesAroundString (str);
3925 sfp->comment = MemFree (sfp->comment);
3926 if (StringHasNoText (str)) {
3927 str = MemFree (str);
3928 }
3929 sfp->comment = str;
3930 }
3931 }
3932 }
3933 }
3934 }
3935 */
3936
3937 if (! CodonsAlreadyInOrder (trp)) {
3938 StableMergeSort ((VoidPtr) &(trp->codon), 6, sizeof (Uint1), SortCodons);
3939 }
3940 UniqueCodons (trp);
3941
3942 /* now always switch iupacaa to ncbieaa (was just for selenocysteine) */
3943
3944 if (trp->aatype == 1 /* && trp->aa == 'U' */) {
3945 trp->aatype = 2;
3946 }
3947
3948 if (sfp == NULL || sfp->comment == NULL) return;
3949
3950 if (trp->aatype == 2) {
3951 aa = trp->aa;
3952 } else {
3953 switch (trp->aatype) {
3954 case 0 :
3955 from = 0;
3956 break;
3957 case 1 :
3958 from = Seq_code_iupacaa;
3959 break;
3960 case 2 :
3961 from = Seq_code_ncbieaa;
3962 break;
3963 case 3 :
3964 from = Seq_code_ncbi8aa;
3965 break;
3966 case 4 :
3967 from = Seq_code_ncbistdaa;
3968 break;
3969 default:
3970 break;
3971 }
3972 smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
3973 if (smtp != NULL) {
3974 aa = SeqMapTableConvert (smtp, trp->aa);
3975 }
3976 }
3977 if (aa != 'X') {
3978 curraa = ParseTRnaString (sfp->comment, &justTrnaText, trpcodon, TRUE);
3979 if (aa == 0 && curraa != 0) {
3980 aa = curraa;
3981 trp->aa = curraa;
3982 trp->aatype = 2;
3983 }
3984 if (aa != 0 && aa == curraa) {
3985 if (justTrnaText) {
3986 for (j = 0; j < 6; j++) {
3987 if (trp->codon [j] == 255) {
3988 trp->codon [j] = trpcodon [j];
3989 }
3990 }
3991 if (StringCmp (sfp->comment, "fMet") != 0 && StringCmp (sfp->comment, "iMet") != 0) {
3992 sfp->comment = MemFree (sfp->comment);
3993 }
3994 }
3995 }
3996 return;
3997 }
3998 aa = ParseTRnaString (sfp->comment, &justTrnaText, trpcodon, TRUE);
3999 if (aa == 0) return;
4000 trp->aa = aa;
4001 trp->aatype = 2;
4002 if (justTrnaText) {
4003 for (j = 0; j < 6; j++) {
4004 if (trp->codon [j] == 255) {
4005 trp->codon [j] = trpcodon [j];
4006 }
4007 }
4008 if (StringCmp (sfp->comment, "fMet") != 0 && StringCmp (sfp->comment, "iMet") != 0) {
4009 sfp->comment = MemFree (sfp->comment);
4010 }
4011 }
4012 }
4013
GetBestProteinFeatureUnindexed(SeqLocPtr product)4014 NLM_EXTERN SeqFeatPtr LIBCALL GetBestProteinFeatureUnindexed (SeqLocPtr product)
4015
4016 {
4017 BioseqPtr bsp;
4018 SeqFeatPtr prot = NULL;
4019 SeqAnnotPtr sap;
4020 SeqFeatPtr tmp;
4021 ValNode vn;
4022
4023 if (product == NULL) return NULL;
4024 bsp = BioseqFindFromSeqLoc (product);
4025 if (bsp == NULL || bsp->repr != Seq_repr_raw) return NULL;
4026 vn.choice = SEQLOC_WHOLE;
4027 vn.data.ptrvalue = (Pointer) SeqIdFindBest (bsp->id, 0);
4028 vn.next = NULL;
4029 for (sap = bsp->annot; sap != NULL && prot == NULL; sap = sap->next) {
4030 if (sap->type == 1) {
4031 for (tmp = (SeqFeatPtr) sap->data; tmp != NULL && prot == NULL; tmp = tmp->next) {
4032 if (tmp->data.choice == SEQFEAT_PROT) {
4033 if (SeqLocCompare (tmp->location, &vn)) {
4034 /* find first protein feature packaged on and located on bioseq */
4035 prot = tmp;
4036 }
4037 }
4038 }
4039 }
4040 }
4041 return prot;
4042 }
4043
CleanupECNumber(CharPtr str)4044 static void CleanupECNumber (CharPtr str)
4045
4046 {
4047 size_t len;
4048
4049 len = StringLen (str);
4050 if (len < 1) return;
4051 if (str [len - 1] == '.') {
4052 str [len - 1] = ' ';
4053 }
4054 if (StringNICmp (str, "EC ", 3) == 0) {
4055 str [0] = ' ';
4056 str [1] = ' ';
4057 } else if (StringNICmp (str, "EC:", 3) == 0) {
4058 str [0] = ' ';
4059 str [1] = ' ';
4060 str [2] = ' ';
4061 }
4062 TrimSpacesAroundString (str);
4063 }
4064
ECNumberCanBeSplit(CharPtr str)4065 static Boolean ECNumberCanBeSplit (CharPtr str)
4066
4067 {
4068 Char ch;
4069 CharPtr ptr;
4070
4071 if (StringHasNoText (str)) return FALSE;
4072
4073 ptr = str;
4074 ch = *ptr;
4075 while (ch != '\0') {
4076 if ((! IS_DIGIT (ch)) && ch != '.' && ch !='-' && ch !='n' && ch != ' ' && ch !=';') return FALSE;
4077 ptr++;
4078 ch = *ptr;
4079 }
4080
4081 return TRUE;
4082 }
4083
HandledGBQualOnCDS(SeqFeatPtr sfp,GBQualPtr gbq,ValNodePtr PNTR afterMe)4084 static Boolean HandledGBQualOnCDS (SeqFeatPtr sfp, GBQualPtr gbq, ValNodePtr PNTR afterMe)
4085
4086 {
4087 Int2 choice = 0;
4088 CdRegionPtr crp;
4089 Uint1 frame;
4090 ValNodePtr gcp;
4091 ValNodePtr prev;
4092 SeqFeatPtr prot;
4093 ProtRefPtr prp = NULL;
4094 Char str [16];
4095 Int4 transl_table;
4096 int val;
4097 ValNodePtr vnp;
4098 SeqFeatXrefPtr xref;
4099
4100 if (StringICmp (gbq->qual, "product") == 0) {
4101 choice = 1;
4102 } else if (StringICmp (gbq->qual, "function") == 0) {
4103 choice = 2;
4104 } else if (StringICmp (gbq->qual, "EC_number") == 0) {
4105 choice = 3;
4106 } else if (StringICmp (gbq->qual, "prot_note") == 0) {
4107 choice = 4;
4108 }
4109 if (choice > 0) {
4110 prot = GetBestProteinFeatureUnindexed (sfp->product);
4111 if (prot != NULL) {
4112 prp = (ProtRefPtr) prot->data.value.ptrvalue;
4113 }
4114 if (prp == NULL) {
4115 /* otherwise make cross reference */
4116 xref = sfp->xref;
4117 while (xref != NULL && xref->data.choice != SEQFEAT_PROT) {
4118 xref = xref->next;
4119 }
4120 if (xref == NULL) {
4121 prp = ProtRefNew ();
4122 if (prp == NULL) return FALSE;
4123 xref = SeqFeatXrefNew ();
4124 if (xref == NULL) return FALSE;
4125 xref->data.choice = SEQFEAT_PROT;
4126 xref->data.value.ptrvalue = (Pointer) prp;
4127 xref->next = sfp->xref;
4128 sfp->xref = xref;
4129 }
4130 if (xref != NULL) {
4131 prp = (ProtRefPtr) xref->data.value.ptrvalue;
4132 }
4133 }
4134 if (prp == NULL) return FALSE;
4135 switch (choice) {
4136 case 1 :
4137 if (prot != NULL && prot->data.value.ptrvalue != NULL) {
4138 if (*afterMe == NULL) {
4139 /* if protein product exists, product gbqual becomes first name */
4140 vnp = ValNodeCopyStr (NULL, 0, gbq->val);
4141 if (vnp != NULL) {
4142 vnp->next = prp->name;
4143 prp->name = vnp;
4144 }
4145 *afterMe = vnp;
4146 } else {
4147 vnp = ValNodeCopyStr (NULL, 0, gbq->val);
4148 prev = *afterMe;
4149 if (vnp != NULL) {
4150 vnp->next = prev->next;
4151 prev->next = vnp;
4152 }
4153 *afterMe = vnp;
4154 }
4155 } else {
4156 /* if local xref, append to name */
4157 ValNodeCopyStr (&(prp->name), 0, gbq->val);
4158 }
4159 break;
4160 case 2 :
4161 ValNodeCopyStr (&(prp->activity), 0, gbq->val);
4162 break;
4163 case 3 :
4164 ValNodeCopyStr (&(prp->ec), 0, gbq->val);
4165 break;
4166 case 4 :
4167 if (prot == NULL) {
4168 return FALSE;
4169 } else {
4170 prot->comment = StringSave (gbq->val);
4171 }
4172 break;
4173 default :
4174 break;
4175 }
4176 return TRUE;
4177 }
4178
4179 if (StringICmp (gbq->qual, "transl_except") == 0) {
4180 return ParseCodeBreak (sfp, gbq->val, 0);
4181 }
4182
4183 if (StringICmp (gbq->qual, "codon_start") == 0) {
4184 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
4185 if (crp != NULL) {
4186 frame = crp->frame;
4187 if (frame == 0) {
4188 StringNCpy_0 (str, gbq->val, sizeof (str));
4189 if (sscanf (str, "%d", &val) == 1) {
4190 if (val > 0 && val < 4) {
4191 crp->frame = (Uint1) val;
4192 return TRUE;
4193 }
4194 }
4195 frame = 1;
4196 }
4197 sprintf (str, "%d", (int) frame);
4198 if (StringICmp (str, gbq->val) == 0) {
4199 return TRUE;
4200 } else if (sfp->pseudo && sfp->product == NULL) {
4201 StringNCpy_0 (str, gbq->val, sizeof (str));
4202 if (sscanf (str, "%d", &val) == 1) {
4203 if (val > 0 && val < 4) {
4204 crp->frame = (Uint1) val;
4205 return TRUE;
4206 }
4207 }
4208 }
4209 }
4210 }
4211
4212 if (StringICmp (gbq->qual, "transl_table") == 0) {
4213 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
4214 if (crp != NULL) {
4215 transl_table = 0;
4216 gcp = crp->genetic_code;
4217 if (gcp != NULL) {
4218 for (vnp = gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4219 if (vnp->choice == 2 && vnp->data.intvalue != 0) {
4220 transl_table = vnp->data.intvalue;
4221 }
4222 }
4223 if (transl_table == 0) {
4224 transl_table = 1;
4225 }
4226 sprintf (str, "%ld", (long) transl_table);
4227 if (StringICmp (str, gbq->val) == 0) {
4228 return TRUE;
4229 }
4230 } else {
4231 StringNCpy_0 (str, gbq->val, sizeof (str));
4232 if (sscanf (str, "%d", &val) == 1) {
4233 vnp = ValNodeNew (NULL);
4234 if (vnp != NULL) {
4235 vnp->choice = 2;
4236 vnp->data.intvalue = (Int4) val;
4237 gcp = GeneticCodeNew ();
4238 if (gcp != NULL) {
4239 gcp->data.ptrvalue = vnp;
4240 crp->genetic_code = gcp;
4241 return TRUE;
4242 }
4243 }
4244 }
4245 }
4246 }
4247 }
4248
4249 if (StringICmp (gbq->qual, "translation") == 0) {
4250 return TRUE;
4251 }
4252
4253 return FALSE;
4254 }
4255
4256
HandledGBQualOnRNA(SeqFeatPtr sfp,GBQualPtr gbq,Boolean isEmblOrDdbj)4257 static Boolean HandledGBQualOnRNA (SeqFeatPtr sfp, GBQualPtr gbq, Boolean isEmblOrDdbj)
4258
4259 {
4260 Uint1 aa;
4261 BioseqPtr bsp;
4262 Uint1 codon [6];
4263 Boolean emptyRNA;
4264 Int4 from;
4265 Boolean is_fMet = FALSE;
4266 Boolean is_iMet = FALSE;
4267 Boolean is_std_name = FALSE;
4268 Int2 j;
4269 Boolean justTrnaText;
4270 size_t len;
4271 CharPtr name;
4272 CharPtr ptr;
4273 RNAGenPtr rgp;
4274 RnaRefPtr rrp;
4275 SeqIntPtr sintp;
4276 SeqIdPtr sip;
4277 CharPtr str;
4278 Char tmp [64];
4279 Int4 to;
4280 tRNAPtr trp;
4281 long int val;
4282
4283 is_std_name = (Boolean) (StringICmp (gbq->qual, "standard_name") == 0);
4284 if (StringICmp (gbq->qual, "product") == 0 ||
4285 (is_std_name && (! isEmblOrDdbj) )) {
4286 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4287 if (rrp == NULL) return FALSE;
4288 if (rrp->type == 0) {
4289 rrp->type = 255;
4290 }
4291 if (rrp->type == 255 && is_std_name) return FALSE;
4292 if (rrp->ext.choice == 1) {
4293 name = (CharPtr) rrp->ext.value.ptrvalue;
4294 if (StringHasNoText (name)) {
4295 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4296 rrp->ext.choice = 0;
4297 }
4298 }
4299 if (rrp->ext.choice == 2) {
4300 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4301 if (trp != NULL) {
4302 if (trp->aatype == 0 && trp->aa == 0 && trp->anticodon == NULL) {
4303 emptyRNA = TRUE;
4304 for (j = 0; j < 6; j++) {
4305 if (trp->codon [j] != 255) {
4306 emptyRNA = FALSE;
4307 }
4308 }
4309 if (emptyRNA) {
4310 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4311 rrp->ext.choice = 0;
4312 }
4313 }
4314 }
4315 }
4316 if (rrp->type == 3 && rrp->ext.choice == 1) {
4317 name = (CharPtr) rrp->ext.value.ptrvalue;
4318 aa = ParseTRnaString (name, &justTrnaText, codon, FALSE);
4319 if (aa != 0) {
4320 is_fMet = (Boolean) (StringStr (name, "fMet") != NULL);
4321 is_iMet = (Boolean) (StringStr (name, "iMet") != NULL);
4322 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4323 trp = (tRNAPtr) MemNew (sizeof (tRNA));
4324 if (trp != NULL) {
4325 trp->aatype = 2;
4326 for (j = 0; j < 6; j++) {
4327 trp->codon [j] = 255;
4328 }
4329 if (justTrnaText) {
4330 for (j = 0; j < 6; j++) {
4331 trp->codon [j] = codon [j];
4332 }
4333 }
4334 trp->aa = aa;
4335 rrp->ext.choice = 2;
4336 rrp->ext.value.ptrvalue = (Pointer) trp;
4337 if (aa == 'M') {
4338 if (is_fMet) {
4339 if (sfp->comment == NULL) {
4340 sfp->comment = StringSave ("fMet");
4341 } else {
4342 len = StringLen (sfp->comment) + StringLen ("fMet") + 5;
4343 str = MemNew (sizeof (Char) * len);
4344 StringCpy (str, sfp->comment);
4345 StringCat (str, "; ");
4346 StringCat (str, "fMet");
4347 sfp->comment = MemFree (sfp->comment);
4348 sfp->comment = str;
4349 }
4350 }
4351 if (is_iMet) {
4352 if (sfp->comment == NULL) {
4353 sfp->comment = StringSave ("iMet");
4354 } else {
4355 len = StringLen (sfp->comment) + StringLen ("iMet") + 5;
4356 str = MemNew (sizeof (Char) * len);
4357 StringCpy (str, sfp->comment);
4358 StringCat (str, "; ");
4359 StringCat (str, "iMet");
4360 sfp->comment = MemFree (sfp->comment);
4361 sfp->comment = str;
4362 }
4363 }
4364 }
4365 CleanupTrna (sfp, trp);
4366 }
4367 }
4368 }
4369 if (rrp->type == 3 && rrp->ext.choice == 0) {
4370 AddQualifierToFeature (sfp, "product", gbq->val);
4371 return TRUE;
4372 }
4373 if (rrp->type == 3 && rrp->ext.choice == 2) {
4374 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4375 if (trp != NULL && trp->aatype == 2) {
4376 if (trp->aa == 77) {
4377 if (StringICmp (gbq->val, "tRNA-fMet") == 0 || StringICmp (gbq->val, "tRNA-iMet") == 0) return FALSE;
4378 }
4379 if (trp->aa == ParseTRnaString (gbq->val, NULL, NULL, FALSE)) {
4380 return TRUE;
4381 }
4382 }
4383 }
4384 if (rrp->ext.choice == 3) {
4385 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
4386 if (rgp == NULL) return FALSE;
4387 if (StringHasNoText (rgp->product)) {
4388 rgp->product = StringSave (gbq->val);
4389 return TRUE;
4390 }
4391 return FALSE;
4392 }
4393 if (rrp->ext.choice != 0 && rrp->ext.choice != 1) return FALSE;
4394 name = (CharPtr) rrp->ext.value.ptrvalue;
4395 if (! HasNoText (name)) {
4396 if (StringICmp (name, gbq->val) == 0) {
4397 return TRUE;
4398 }
4399 str = StringStr (gbq->val, "rDNA");
4400 if (str != NULL) {
4401 str [1] = 'R';
4402 if (StringICmp (name, gbq->val) == 0) {
4403 return TRUE;
4404 }
4405 }
4406 if (rrp->type == 255 || rrp->type == 8 || rrp->type == 9 || rrp->type == 10) {
4407 /* new convention follows ASN.1 spec comments, allows new RNA types */
4408 return FALSE;
4409 }
4410 /* subsequent /product now added to comment */
4411 if (sfp->comment == NULL) {
4412 sfp->comment = gbq->val;
4413 gbq->val = NULL;
4414 } else if (StringStr (gbq->val, sfp->comment) == NULL) {
4415 len = StringLen (sfp->comment) + StringLen (gbq->val) + 5;
4416 str = MemNew (sizeof (Char) * len);
4417 StringCpy (str, sfp->comment);
4418 StringCat (str, "; ");
4419 StringCat (str, gbq->val);
4420 sfp->comment = MemFree (sfp->comment);
4421 sfp->comment = str;
4422 }
4423 /* return FALSE; */
4424 return TRUE;
4425 }
4426 if (rrp->type == 8 || rrp->type == 9 || rrp->type == 10) {
4427 /* new convention follows ASN.1 spec comments, allows new RNA types */
4428 return FALSE;
4429 }
4430 if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
4431 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4432 }
4433 if (rrp->ext.choice == 0 || rrp->ext.choice == 1) {
4434 rrp->ext.choice = 1;
4435 rrp->ext.value.ptrvalue = StringSave (gbq->val);
4436 return TRUE;
4437 }
4438 } else if (StringICmp (gbq->qual, "anticodon") == 0) {
4439 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4440 if (rrp == NULL) return FALSE;
4441 if (rrp->type == 0) {
4442 rrp->type = 255;
4443 }
4444 if (rrp->type == 3 && rrp->ext.choice == 0) {
4445 trp = (tRNAPtr) MemNew (sizeof (tRNA));
4446 if (trp != NULL) {
4447 rrp->ext.choice = 2;
4448 rrp->ext.value.ptrvalue = trp;
4449 for (j = 0; j < 6; j++) {
4450 trp->codon [j] = 255;
4451 }
4452 }
4453 }
4454 if (rrp->type == 3 && rrp->ext.choice == 2) {
4455 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4456 if (trp != NULL) {
4457 StringNCpy_0 (tmp, gbq->val, sizeof (tmp));
4458 ptr = StringStr (tmp, "(");
4459 if (ptr != NULL) {
4460 ptr = StringStr (ptr + 1, "pos");
4461 if (ptr != NULL) {
4462 ptr = StringStr (ptr + 3, ":");
4463 }
4464 }
4465 if (ptr != NULL) {
4466 str = ptr + 1;
4467 ptr = StringStr (str, "..");
4468 if (ptr != NULL) {
4469 *ptr = '\0';
4470 if (sscanf (str, "%ld", &val) == 1) {
4471 from = val - 1;
4472 str = ptr + 2;
4473 ptr = StringStr (str, ",");
4474 if (ptr != NULL) {
4475 *ptr = '\0';
4476 if (sscanf (str, "%ld", &val) == 1) {
4477 to = val - 1;
4478 sip = SeqLocId (sfp->location);
4479 if (sip != NULL) {
4480 bsp = BioseqFind (sip);
4481 if (bsp != NULL) {
4482 if (from >= 0 && from < bsp->length - 1) {
4483 if (to >= 0 && to < bsp->length - 1) {
4484 sintp = SeqIntNew ();
4485 if (sintp != NULL) {
4486 if (from > to) {
4487 sintp->from = to;
4488 sintp->to = from;
4489 sintp->strand = Seq_strand_minus;
4490 } else {
4491 sintp->from = from;
4492 sintp->to = to;
4493 sintp->strand = Seq_strand_plus;
4494 }
4495 sintp->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
4496 trp->anticodon = ValNodeAddPointer (NULL, SEQLOC_INT, (Pointer) sintp);
4497 if (trp->aatype == 0 && trp->aa == 0) {
4498 ptr = StringStr (ptr + 1, "aa:");
4499 if (ptr != NULL) {
4500 str = ptr + 3;
4501 ptr = StringStr (str, ")");
4502 if (ptr != NULL) {
4503 *ptr = '\0';
4504 trp->aa = ParseTRnaString (str, NULL, NULL, FALSE);
4505 if (trp->aa != 0) {
4506 trp->aatype = 2;
4507 }
4508 }
4509 }
4510 }
4511 return TRUE;
4512 }
4513 }
4514 }
4515 }
4516 }
4517 }
4518 }
4519 }
4520 }
4521 }
4522 }
4523 }
4524 }
4525 return FALSE;
4526 }
4527
HandledGBQualOnProt(SeqFeatPtr sfp,GBQualPtr gbq)4528 static Boolean HandledGBQualOnProt (SeqFeatPtr sfp, GBQualPtr gbq)
4529
4530 {
4531 Int2 choice = 0;
4532 ProtRefPtr prp;
4533 ValNodePtr vnp;
4534
4535 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
4536 if (prp == NULL) return FALSE;
4537 if (StringICmp (gbq->qual, "product") == 0) {
4538 choice = 1;
4539 } else if (StringICmp (gbq->qual, "function") == 0) {
4540 choice = 2;
4541 } else if (StringICmp (gbq->qual, "EC_number") == 0) {
4542 choice = 3;
4543 } else if (StringICmp (gbq->qual, "standard_name") == 0) {
4544 choice = 4;
4545 } else if (StringICmp (gbq->qual, "label") == 0) {
4546 choice = 5;
4547 } else if (StringICmp (gbq->qual, "allele") == 0) {
4548 choice = 6;
4549 }
4550 if (choice == 1 || choice == 4) {
4551 vnp = prp->name;
4552 if (vnp != NULL && (! HasNoText (vnp->data.ptrvalue))) return FALSE;
4553 ValNodeCopyStr (&(prp->name), 0, gbq->val);
4554 /*
4555 vnp = prp->name;
4556 if (vnp != NULL && prp->desc != NULL) {
4557 if (StringICmp (vnp->data.ptrvalue, prp->desc) == 0) {
4558 prp->desc = MemFree (prp->desc);
4559 }
4560 }
4561 */
4562 return TRUE;
4563 } else if (choice == 2) {
4564 ValNodeCopyStr (&(prp->activity), 0, gbq->val);
4565 return TRUE;
4566 } else if (choice == 3) {
4567 ValNodeCopyStr (&(prp->ec), 0, gbq->val);
4568 return TRUE;
4569 } else if (choice == 5) {
4570 return FALSE; /* keep label gbqual only */
4571 } else if (choice == 6) {
4572 return FALSE;
4573 }
4574
4575 if (StringICmp (gbq->qual, "experiment") == 0 ||
4576 StringICmp (gbq->qual, "inference") == 0) {
4577 return FALSE;
4578 }
4579
4580 if (StringICmp (gbq->qual, "UniProtKB_evidence") == 0) {
4581 return FALSE;
4582 }
4583
4584 return TRUE; /* all other gbquals not appropriate on protein features */
4585 }
4586
HandledGBQualOnImp(SeqFeatPtr sfp,GBQualPtr gbq)4587 static Boolean HandledGBQualOnImp (SeqFeatPtr sfp, GBQualPtr gbq)
4588
4589 {
4590 Char ch;
4591 ImpFeatPtr ifp;
4592 Int4 len;
4593 CharPtr ptr;
4594
4595 if (StringICmp (gbq->qual, "rpt_unit") == 0) {
4596 if (HasNoText (gbq->val)) return FALSE;
4597 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4598 if (ifp == NULL) return FALSE;
4599 if (StringICmp (ifp->key, "repeat_region") != 0) return FALSE;
4600 len = SeqLocLen (sfp->location);
4601 if (len != (Int4) StringLen (gbq->val)) return FALSE;
4602 ptr = gbq->val;
4603 ch = *ptr;
4604 while (ch != '\0') {
4605 if (StringChr ("ACGTNacgtn", ch) == NULL) return FALSE;
4606 ptr++;
4607 ch = *ptr;
4608 }
4609 /* return TRUE; */
4610 }
4611 return FALSE;
4612 }
4613
CleanupRptUnit(GBQualPtr gbq)4614 static void CleanupRptUnit (GBQualPtr gbq)
4615
4616 {
4617 Char ch;
4618 size_t len;
4619 CharPtr ptr;
4620 CharPtr str;
4621 CharPtr tmp;
4622
4623 if (gbq == NULL) return;
4624 if (StringHasNoText (gbq->val)) return;
4625 len = StringLen (gbq->val) * 2 + 1;
4626 str = MemNew (sizeof (Char) * len);
4627 if (str == NULL) return;
4628 ptr = str;
4629 tmp = gbq->val;
4630 ch = *tmp;
4631 while (ch != '\0') {
4632 while (ch == '(' || ch == ')' || ch == ',') {
4633 *ptr = ch;
4634 ptr++;
4635 tmp++;
4636 ch = *tmp;
4637 }
4638 while (IS_WHITESP (ch)) {
4639 tmp++;
4640 ch = *tmp;
4641 }
4642 while (IS_DIGIT (ch)) {
4643 *ptr = ch;
4644 ptr++;
4645 tmp++;
4646 ch = *tmp;
4647 }
4648 if (ch == '.' || ch == '-') {
4649 while (ch == '.' || ch == '-') {
4650 tmp++;
4651 ch = *tmp;
4652 }
4653 *ptr = '.';
4654 ptr++;
4655 *ptr = '.';
4656 ptr++;
4657 }
4658 while (IS_WHITESP (ch)) {
4659 tmp++;
4660 ch = *tmp;
4661 }
4662 while (IS_DIGIT (ch)) {
4663 *ptr = ch;
4664 ptr++;
4665 tmp++;
4666 ch = *tmp;
4667 }
4668 while (IS_WHITESP (ch)) {
4669 tmp++;
4670 ch = *tmp;
4671 }
4672 if (ch == '\0' || ch == '(' || ch == ')' || ch == ',' || ch == '.' || IS_WHITESP (ch) || IS_DIGIT (ch)) {
4673 } else {
4674 MemFree (str);
4675 /* lower case the contents */
4676 ptr = gbq->val;
4677 ch = *ptr;
4678 while (ch != '\0') {
4679 if (IS_UPPER (ch)) {
4680 *ptr = TO_LOWER (ch);
4681 }
4682 ptr++;
4683 ch = *ptr;
4684 }
4685 return;
4686 }
4687 }
4688 *ptr = '\0';
4689 gbq->val = MemFree (gbq->val);
4690 gbq->val = str;
4691 /* and lower case the contents */
4692 ptr = str;
4693 ch = *ptr;
4694 while (ch != '\0') {
4695 if (IS_UPPER (ch)) {
4696 *ptr = TO_LOWER (ch);
4697 }
4698 ptr++;
4699 ch = *ptr;
4700 }
4701 }
4702
CleanupRptUnitSeq(GBQualPtr gbq)4703 static void CleanupRptUnitSeq (GBQualPtr gbq)
4704
4705 {
4706 Char ch;
4707 CharPtr ptr;
4708
4709 if (gbq == NULL) return;
4710 if (StringHasNoText (gbq->val)) return;
4711
4712 /* do not clean if val contains non-sequence characters */
4713 ptr = gbq->val;
4714 ch = *ptr;
4715 while (ch != '\0') {
4716 if (StringChr ("ACGTUacgtu", ch) == NULL) return;
4717 ptr++;
4718 ch = *ptr;
4719 }
4720
4721 /* lower case, and convert U to T */
4722 ptr = gbq->val;
4723 ch = *ptr;
4724 while (ch != '\0') {
4725 if (IS_UPPER (ch)) {
4726 ch = TO_LOWER (ch);
4727 *ptr = ch;
4728 }
4729 if (ch == 'u') {
4730 ch = 't';
4731 *ptr = ch;
4732 }
4733 ptr++;
4734 ch = *ptr;
4735 }
4736 }
4737
CleanupRptUnitRange(GBQualPtr gbq)4738 static void CleanupRptUnitRange (GBQualPtr gbq)
4739
4740 {
4741 Char ch;
4742 Int2 dashes = 0;
4743 Int2 dots = 0;
4744 size_t len;
4745 CharPtr ptr;
4746 CharPtr str;
4747 CharPtr tmp;
4748
4749 if (gbq == NULL) return;
4750 if (StringHasNoText (gbq->val)) return;
4751 ptr = gbq->val;
4752 ch = *ptr;
4753 while (ch != '\0') {
4754 if (ch == '-') {
4755 dashes++;
4756 } else if (ch == '.') {
4757 dots++;
4758 } else if (IS_DIGIT (ch)) {
4759 /* okay */
4760 } else return;
4761 ptr++;
4762 ch = *ptr;
4763 }
4764
4765 if (dashes > 0 && dots == 0) {
4766 len = StringLen (gbq->val + dashes);
4767 str = (CharPtr) MemNew (sizeof (Char) * (len + 5));
4768 tmp = str;
4769 ptr = gbq->val;
4770 ch = *ptr;
4771 while (ch != '\0') {
4772 if (ch == '-') {
4773 *tmp = '.';
4774 tmp++;
4775 *tmp = '.';
4776 tmp++;
4777 } else {
4778 *tmp = ch;
4779 tmp++;
4780 }
4781 ptr++;
4782 ch = *ptr;
4783 }
4784 gbq->val = MemFree (gbq->val);
4785 gbq->val = str;
4786 }
4787 }
4788
CleanupReplace(GBQualPtr gbq)4789 static void CleanupReplace (GBQualPtr gbq)
4790
4791 {
4792 Char ch;
4793 CharPtr ptr;
4794
4795 if (gbq == NULL) return;
4796 if (StringHasNoText (gbq->val)) return;
4797 ptr = gbq->val;
4798 ch = *ptr;
4799 while (ch != '\0') {
4800 if (StringChr ("ACGTUacgtu", ch) == NULL) return;
4801 ptr++;
4802 ch = *ptr;
4803 }
4804 /* lower case, and convert U to T */
4805 ptr = gbq->val;
4806 ch = *ptr;
4807 while (ch != '\0') {
4808 if (IS_UPPER (ch)) {
4809 ch = TO_LOWER (ch);
4810 *ptr = ch;
4811 }
4812 if (ch == 'u') {
4813 ch = 't';
4814 *ptr = ch;
4815 }
4816 ptr++;
4817 ch = *ptr;
4818 }
4819 }
4820
4821 static CharPtr evCategoryPfx [] = {
4822 "",
4823 "COORDINATES: ",
4824 "DESCRIPTION: ",
4825 "EXISTENCE: ",
4826 NULL
4827 };
4828
CleanupInference(GBQualPtr gbq)4829 static void CleanupInference (GBQualPtr gbq)
4830
4831 {
4832 Char ch;
4833 CharPtr colon;
4834 CharPtr dst;
4835 Int2 j;
4836 size_t len;
4837 CharPtr ptr;
4838 CharPtr skip;
4839 CharPtr space;
4840 CharPtr str;
4841
4842 if (gbq == NULL) return;
4843 if (StringHasNoText (gbq->val)) return;
4844
4845 str = gbq->val;
4846 space = NULL;
4847 colon = NULL;
4848
4849 skip = NULL;
4850 for (j = 0; evCategoryPfx [j] != NULL; j++) {
4851 len = StringLen (evCategoryPfx [j]);
4852 if (StringNICmp (str, evCategoryPfx [j], len) != 0) continue;
4853 skip = str + len;
4854 }
4855 if (skip != NULL) {
4856 str = skip;
4857 }
4858
4859 dst = str;
4860 ptr = str;
4861 ch = *ptr;
4862 while (ch != '\0') {
4863 *dst = ch;
4864 if (ch == ' ') {
4865 if (space == NULL) {
4866 space = dst;
4867 }
4868 } else if (ch == ':') {
4869 if (space != NULL) {
4870 dst = space;
4871 *dst = ch;
4872 }
4873 space = NULL;
4874 colon = dst;
4875 } else {
4876 if (space != NULL && colon != NULL) {
4877 colon++;
4878 dst = colon;
4879 *dst = ch;
4880 }
4881 space = NULL;
4882 colon = NULL;
4883 }
4884 dst++;
4885 ptr++;
4886 ch = *ptr;
4887 }
4888 *dst = '\0';
4889
4890 dst = str;
4891 ptr = str;
4892 ch = *ptr;
4893 while (ch != '\0') {
4894 *dst = ch;
4895 if ((ch == ':' || ch == ',') && *(ptr + 1) == '?' && *(ptr + 2) == '|') {
4896 ptr += 2;
4897 }
4898 dst++;
4899 ptr++;
4900 ch = *ptr;
4901 }
4902 *dst = '\0';
4903 }
4904
4905 static CharPtr evCategoryNoSpace [] = {
4906 "",
4907 "COORDINATES:",
4908 "DESCRIPTION:",
4909 "EXISTENCE:",
4910 NULL
4911 };
4912
RepairInference(GBQualPtr gbq)4913 static void RepairInference (GBQualPtr gbq)
4914
4915 {
4916 Int2 j;
4917 size_t len;
4918 CharPtr ptr;
4919 CharPtr skip;
4920 CharPtr str;
4921
4922 if (gbq == NULL) return;
4923 if (StringHasNoText (gbq->val)) return;
4924
4925 str = gbq->val;
4926 for (j = 0; evCategoryNoSpace [j] != NULL; j++) {
4927 len = StringLen (evCategoryNoSpace [j]);
4928 if (StringNICmp (str, evCategoryNoSpace [j], len) != 0) continue;
4929 if (StringNICmp (str, evCategoryPfx [j], len + 1) == 0) continue;
4930 /* need to repair */
4931 skip = str + len;
4932 ptr = MemNew (StringLen (skip) + 20);
4933 if (ptr == NULL) return;
4934 StringCpy (ptr, evCategoryPfx [j]);
4935 StringCat (ptr, skip);
4936 gbq->val = MemFree (gbq->val);
4937 gbq->val = ptr;
4938 return;
4939 }
4940 }
4941
CleanupConsSplice(GBQualPtr gbq)4942 static void CleanupConsSplice (GBQualPtr gbq)
4943
4944 {
4945 size_t len;
4946 CharPtr ptr;
4947 CharPtr str;
4948
4949 if (StringNICmp (gbq->val, "(5'site:", 8) != 0) return;
4950 ptr = StringStr (gbq->val, ",3'site:");
4951 if (ptr == NULL) return;
4952 len = StringLen (gbq->val) + 5;
4953 str = (CharPtr) MemNew (len);
4954 if (str == NULL) return;
4955 *ptr = '\0';
4956 ptr++;
4957 StringCpy (str, gbq->val);
4958 StringCat (str, ", ");
4959 StringCat (str, ptr);
4960 gbq->val = MemFree (gbq->val);
4961 gbq->val = str;
4962 }
4963
ExpandParenGroup(GBQualPtr headgbq)4964 static Boolean ExpandParenGroup (GBQualPtr headgbq)
4965
4966 {
4967 Char ch;
4968 GBQualPtr lastgbq;
4969 size_t len;
4970 Int2 nesting;
4971 GBQualPtr newgbq;
4972 GBQualPtr nextqual;
4973 CharPtr ptr;
4974 CharPtr str;
4975 CharPtr tmp;
4976
4977 nextqual = headgbq->next;
4978 lastgbq = headgbq;
4979 ptr = headgbq->val;
4980 tmp = StringSave (ptr + 1);
4981 len = StringLen (tmp);
4982 if (len > 0 && tmp [len - 1] == ')') {
4983 tmp [len - 1] = '\0';
4984 }
4985 str = tmp;
4986 nesting = 0;
4987 ptr = str;
4988 ch = *ptr;
4989 while (ch != '\0') {
4990 if (ch == '(') {
4991 nesting++;
4992 } else if (ch == ')') {
4993 nesting--;
4994 if (nesting < 0) {
4995 MemFree (tmp);
4996 return FALSE;
4997 }
4998 } else if (ch == ',') {
4999 if (nesting < 0) {
5000 MemFree (tmp);
5001 return FALSE;
5002 }
5003 }
5004 ptr++;
5005 ch = *ptr;
5006 }
5007 while (! StringHasNoText (str)) {
5008 ptr = StringChr (str, ',');
5009 if (ptr == NULL) {
5010 ptr = StringRChr (str, ')');
5011 }
5012 if (ptr != NULL) {
5013 *ptr = '\0';
5014 ptr++;
5015 }
5016 TrimSpacesAroundString (str);
5017 newgbq = GBQualNew ();
5018 if (newgbq != NULL) {
5019 newgbq->qual = StringSave (headgbq->qual);
5020 newgbq->val = StringSave (str);
5021 newgbq->next = nextqual;
5022 lastgbq->next = newgbq;
5023 lastgbq = newgbq;
5024 }
5025 str = ptr;
5026 }
5027 MemFree (tmp);
5028 return TRUE;
5029 }
5030
IsBaseRange(CharPtr str)5031 static Boolean IsBaseRange (CharPtr str)
5032
5033 {
5034 CharPtr ptr;
5035 Char tmp [32];
5036 long int val;
5037
5038 if (StringLen (str) > 25) return FALSE;
5039 StringNCpy_0 (tmp, str, sizeof (tmp));
5040 ptr = StringStr (tmp, "..");
5041 if (ptr == NULL) return FALSE;
5042 *ptr = '\0';
5043 if (StringHasNoText (tmp)) return FALSE;
5044 if (sscanf (tmp, "%ld", &val) != 1 || val < 1) return FALSE;
5045 ptr += 2;
5046 if (StringHasNoText (ptr)) return FALSE;
5047 if (sscanf (ptr, "%ld", &val) != 1 || val < 1) return FALSE;
5048 return TRUE;
5049 }
5050
ModernizeFeatureGBQuals(SeqFeatPtr sfp)5051 static void ModernizeFeatureGBQuals (SeqFeatPtr sfp)
5052
5053 {
5054 GBQualPtr gbq;
5055 size_t len;
5056 GBQualPtr nextqual;
5057 GBQualPtr PNTR prevqual;
5058 CharPtr str;
5059 Boolean unlink;
5060
5061 if (sfp == NULL) return;
5062 gbq = sfp->qual;
5063 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5064 while (gbq != NULL) {
5065 CleanVisString (&(gbq->qual));
5066 CleanVisString (&(gbq->val));
5067 if (gbq->qual == NULL) {
5068 gbq->qual = StringSave ("");
5069 }
5070 if (StringIsJustQuotes (gbq->val)) {
5071 gbq->val = MemFree (gbq->val);
5072 }
5073 if (gbq->val == NULL) {
5074 gbq->val = StringSave ("");
5075 }
5076 nextqual = gbq->next;
5077 unlink = TRUE;
5078 if (StringICmp (gbq->qual, "rpt_unit_seq") == 0) {
5079 str = gbq->val;
5080 len = StringLen (str);
5081 if (len > 1 && *str == '{' && str [len - 1] == '}') {
5082 *str = '(';
5083 str [len - 1] = ')';
5084 }
5085 if (len > 1 && *str == '(' && str [len - 1] == ')' /* && StringChr (str + 1, '(') == NULL */) {
5086 if (ExpandParenGroup (gbq)) {
5087 nextqual = gbq->next;
5088 /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
5089 unlink = TRUE;
5090 } else {
5091 unlink = FALSE;
5092 }
5093 } else {
5094 unlink = FALSE;
5095 }
5096 } else if (StringICmp (gbq->qual, "rpt_type") == 0 ||
5097 StringICmp (gbq->qual, "rpt_unit") == 0 ||
5098 StringICmp (gbq->qual, "rpt_unit_range") == 0 ||
5099 StringICmp (gbq->qual, "rpt_unit_seq") == 0 ||
5100 StringICmp (gbq->qual, "replace") == 0 ||
5101 StringICmp (gbq->qual, "compare") == 0 ||
5102 StringICmp (gbq->qual, "old_locus_tag") == 0 ||
5103 StringICmp (gbq->qual, "usedin") == 0) {
5104 str = gbq->val;
5105 len = StringLen (str);
5106 if (len > 1 && *str == '{' && str [len - 1] == '}') {
5107 *str = '(';
5108 str [len - 1] = ')';
5109 }
5110 if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
5111 if (ExpandParenGroup (gbq)) {
5112 nextqual = gbq->next;
5113 /* individual parsed out (xxx,xxx) qualifiers will be processed next, now get rid of original */
5114 unlink = TRUE;
5115 } else {
5116 unlink = FALSE;
5117 }
5118 } else {
5119 unlink = FALSE;
5120 }
5121 } else {
5122 unlink = FALSE;
5123 }
5124 if (unlink) {
5125 *(prevqual) = gbq->next;
5126 gbq->next = NULL;
5127 gbq->qual = MemFree (gbq->qual);
5128 gbq->val = MemFree (gbq->val);
5129 GBQualFree (gbq);
5130 } else {
5131 prevqual = (GBQualPtr PNTR) &(gbq->next);
5132 }
5133 gbq = nextqual;
5134 }
5135 }
5136
5137
MendSatelliteQualifier(CharPtr PNTR satellite)5138 static void MendSatelliteQualifier (CharPtr PNTR satellite)
5139 {
5140 Int4 microsatellite_len = StringLen ("microsatellite");
5141 Int4 minisatellite_len = StringLen ("minisatellite");
5142 Int4 satellite_len = StringLen ("satellite");
5143 Int4 type_len = 0;
5144 CharPtr new_qual, colon, src, dst;
5145
5146 if (satellite == NULL || StringHasNoText (*satellite)) {
5147 return;
5148 }
5149
5150 if (StringNCmp (*satellite, "microsatellite", microsatellite_len) == 0) {
5151 type_len = microsatellite_len;
5152 } else if (StringNCmp (*satellite, "minisatellite", minisatellite_len) == 0) {
5153 type_len = minisatellite_len;
5154 } else if (StringNCmp (*satellite, "satellite", satellite_len) == 0) {
5155 type_len = satellite_len;
5156 }
5157
5158 if (type_len == 0) {
5159 new_qual = (CharPtr) MemNew (sizeof (Char) * (StringLen (*satellite) + satellite_len + 3));
5160 sprintf (new_qual, "satellite:%s", *satellite);
5161 *satellite = MemFree (*satellite);
5162 *satellite = new_qual;
5163 } else if (*(*satellite + type_len) == ' ') {
5164 *(*satellite + type_len) = ':';
5165 }
5166
5167 /* remove spaces after colon */
5168 colon = StringChr (*satellite, ':');
5169 if (colon != NULL) {
5170 src = colon + 1;
5171 dst = colon + 1;
5172 while (*src == ' ') {
5173 src++;
5174 }
5175 while (*src != 0) {
5176 *dst = *src;
5177 dst++;
5178 src++;
5179 }
5180 *dst = 0;
5181 }
5182 }
5183
5184
CleanupFeatureGBQuals(SeqFeatPtr sfp,Boolean isEmblOrDdbj)5185 static void CleanupFeatureGBQuals (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
5186
5187 {
5188 ValNodePtr afterMe = NULL;
5189 Boolean all_digits;
5190 Char ch;
5191 DbtagPtr db;
5192 GBQualPtr gbq;
5193 GeneRefPtr grp;
5194 ImpFeatPtr ifp;
5195 size_t len;
5196 GBQualPtr nextqual;
5197 ObjectIdPtr oip;
5198 GBQualPtr PNTR prevqual;
5199 CharPtr ptr;
5200 GBQualPtr rpt_unit_range = NULL;
5201 GBQualPtr rpt_unit_seq = NULL;
5202 CharPtr str;
5203 CharPtr tag;
5204 Boolean unlink;
5205 ValNodePtr vnp;
5206 SeqFeatXrefPtr xref;
5207
5208 if (sfp == NULL) return;
5209 gbq = sfp->qual;
5210 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5211 while (gbq != NULL) {
5212 CleanVisString (&(gbq->qual));
5213 CleanVisStringAndCompress (&(gbq->val));
5214 if (gbq->qual == NULL) {
5215 gbq->qual = StringSave ("");
5216 }
5217 if (StringIsJustQuotes (gbq->val)) {
5218 gbq->val = MemFree (gbq->val);
5219 }
5220 if (gbq->val == NULL) {
5221 gbq->val = StringSave ("");
5222 }
5223 if (StringICmp (gbq->qual, "replace") == 0) {
5224 if (sfp->data.choice == SEQFEAT_IMP) {
5225 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5226 if (ifp != NULL) {
5227 if (StringICmp (ifp->key, "variation") == 0 && gbq->val != NULL) {
5228 ptr = gbq->val;
5229 ch = *ptr;
5230 while (ch != '\0') {
5231 *ptr = TO_LOWER (ch);
5232 ptr++;
5233 ch = *ptr;
5234 }
5235 }
5236 }
5237 }
5238 }
5239 nextqual = gbq->next;
5240 unlink = TRUE;
5241 if (StringICmp (gbq->qual, "partial") == 0) {
5242 sfp->partial = TRUE;
5243 } else if (StringICmp (gbq->qual, "evidence") == 0) {
5244 /*
5245 if (StringICmp (gbq->val, "experimental") == 0) {
5246 if (sfp->exp_ev != 2) {
5247 sfp->exp_ev = 1;
5248 }
5249 } else if (StringICmp (gbq->val, "not_experimental") == 0) {
5250 sfp->exp_ev = 2;
5251 }
5252 */
5253 } else if (StringICmp (gbq->qual, "exception") == 0) {
5254 sfp->excpt = TRUE;
5255 if (! HasNoText (gbq->val)) {
5256 if (StringICmp (gbq->val, "TRUE") != 0) {
5257 if (sfp->except_text == NULL) {
5258 sfp->except_text = StringSaveNoNull (gbq->val);
5259 }
5260 }
5261 }
5262 } else if (StringICmp (gbq->qual, "note") == 0 ||
5263 StringICmp (gbq->qual, "notes") == 0 ||
5264 StringICmp (gbq->qual, "comment") == 0) {
5265 if (sfp->comment == NULL) {
5266 sfp->comment = gbq->val;
5267 gbq->val = NULL;
5268 } else {
5269 len = StringLen (sfp->comment) + StringLen (gbq->val) + 5;
5270 str = MemNew (sizeof (Char) * len);
5271 StringCpy (str, sfp->comment);
5272 StringCat (str, "; ");
5273 StringCat (str, gbq->val);
5274 sfp->comment = MemFree (sfp->comment);
5275 sfp->comment = str;
5276 }
5277 } else if (StringICmp (gbq->qual, "label") == 0) {
5278 if (StringICmp (gbq->val, FindKeyFromFeatDefType (sfp->idx.subtype, FALSE)) == 0) {
5279 /* skip label that is simply the feature key */
5280 } else if (sfp->comment == NULL || StringISearch (sfp->comment, gbq->qual) == NULL) {
5281 /* if label is not already in comment, append */
5282 len = StringLen (sfp->comment) + StringLen (gbq->val) + StringLen ("label: ") + 5;
5283 str = MemNew (sizeof (Char) * len);
5284 if (sfp->comment == NULL) {
5285 StringCpy (str, "label: ");
5286 StringCat (str, gbq->val);
5287 sfp->comment = str;
5288 } else {
5289 StringCpy (str, sfp->comment);
5290 StringCat (str, "; ");
5291 StringCat (str, "label: ");
5292 StringCat (str, gbq->val);
5293 sfp->comment = MemFree (sfp->comment);
5294 sfp->comment = str;
5295 }
5296 }
5297 } else if (StringICmp (gbq->qual, "db_xref") == 0) {
5298 tag = gbq->val;
5299 ptr = StringChr (tag, ':');
5300 if (ptr != NULL) {
5301 vnp = ValNodeNew (NULL);
5302 db = DbtagNew ();
5303 vnp->data.ptrvalue = db;
5304 *ptr = '\0';
5305 ptr++;
5306 db->db = StringSave (tag);
5307 oip = ObjectIdNew ();
5308 oip->str = StringSave (ptr);
5309 db->tag = oip;
5310 vnp->next = sfp->dbxref;
5311 sfp->dbxref = vnp;
5312 } else {
5313 /*
5314 db->db = StringSave ("?");
5315 oip = ObjectIdNew ();
5316 oip->str = StringSave (tag);
5317 db->tag = oip;
5318 vnp->next = sfp->dbxref;
5319 sfp->dbxref = vnp;
5320 */
5321 unlink = FALSE;
5322 }
5323 } else if (StringICmp (gbq->qual, "gdb_xref") == 0) {
5324 vnp = ValNodeNew (NULL);
5325 db = DbtagNew ();
5326 vnp->data.ptrvalue = db;
5327 db->db = StringSave ("GDB");
5328 oip = ObjectIdNew ();
5329 oip->str = StringSave (gbq->val);
5330 db->tag = oip;
5331 vnp->next = sfp->dbxref;
5332 sfp->dbxref = vnp;
5333 } else if (StringICmp (gbq->qual, "cons_splice") == 0) {
5334 /*
5335 CleanupConsSplice (gbq);
5336 unlink = FALSE;
5337 */
5338 } else if (StringICmp (gbq->qual, "replace") == 0) {
5339 CleanupReplace (gbq);
5340 unlink = FALSE;
5341 } else if (StringICmp (gbq->qual, "rpt_unit_seq") == 0) {
5342 if (IsBaseRange (gbq->val)) {
5343 gbq->qual = MemFree (gbq->qual);
5344 gbq->qual = StringSave ("rpt_unit_range");
5345 CleanupRptUnitRange (gbq);
5346 } else {
5347 CleanupRptUnitSeq (gbq);
5348 }
5349 unlink = FALSE;
5350 } else if (StringICmp (gbq->qual, "rpt_unit_range") == 0) {
5351 if (! IsBaseRange (gbq->val)) {
5352 gbq->qual = MemFree (gbq->qual);
5353 gbq->qual = StringSave ("rpt_unit_seq");
5354 CleanupRptUnitSeq (gbq);
5355 } else {
5356 CleanupRptUnitRange (gbq);
5357 }
5358 unlink = FALSE;
5359 } else if (sfp->data.choice == SEQFEAT_GENE && HandledGBQualOnGene (sfp, gbq)) {
5360 } else if (sfp->data.choice == SEQFEAT_CDREGION && HandledGBQualOnCDS (sfp, gbq, &afterMe)) {
5361 } else if (sfp->data.choice == SEQFEAT_RNA && HandledGBQualOnRNA (sfp, gbq, isEmblOrDdbj)) {
5362 } else if (sfp->data.choice == SEQFEAT_PROT && HandledGBQualOnProt (sfp, gbq)) {
5363 } else if (sfp->data.choice == SEQFEAT_IMP && HandledGBQualOnImp (sfp, gbq)) {
5364 } else if (StringICmp (gbq->qual, "rpt_unit") == 0) {
5365 if (IsBaseRange (gbq->val)) {
5366 gbq->qual = MemFree (gbq->qual);
5367 gbq->qual = StringSave ("rpt_unit_range");
5368 unlink = FALSE;
5369 } else {
5370 gbq->qual = MemFree (gbq->qual);
5371 gbq->qual = StringSave ("rpt_unit_seq");
5372 unlink = FALSE;
5373 }
5374 } else if (StringICmp (gbq->qual, "EC_number") == 0) {
5375 CleanupECNumber (gbq->val);
5376 unlink = FALSE;
5377 } else if (StringICmp (gbq->qual, "pseudo") == 0) {
5378 sfp->pseudo = TRUE;
5379 } else if (StringICmp (gbq->qual, "pseudogene") == 0) {
5380 str = gbq->val;
5381 if (StringICmp (str, "processed") == 0 ||
5382 StringICmp (str, "unprocessed") == 0 ||
5383 StringICmp (str, "unitary") == 0 ||
5384 StringICmp (str, "allelic") == 0 ||
5385 StringICmp (str, "unknown") == 0) {
5386 sfp->pseudo = TRUE;
5387 ptr = str;
5388 ch = *ptr;
5389 while (ch != '\0') {
5390 if (IS_UPPER (ch)) {
5391 *ptr = TO_LOWER (ch);
5392 }
5393 ptr++;
5394 ch = *ptr;
5395 }
5396 }
5397 unlink = FALSE;
5398 } else if (StringICmp (gbq->qual, "ribosomal_slippage") == 0 ||
5399 StringICmp (gbq->qual, "ribosomal-slippage") == 0 ||
5400 StringICmp (gbq->qual, "ribosomal slippage") == 0) {
5401 sfp->excpt = TRUE;
5402 if (HasNoText (gbq->val)) {
5403 if (sfp->except_text == NULL) {
5404 sfp->except_text = StringSaveNoNull ("ribosomal slippage");
5405 }
5406 }
5407 } else if (StringICmp (gbq->qual, "trans_splicing") == 0 ||
5408 StringICmp (gbq->qual, "trans-splicing") == 0 ||
5409 StringICmp (gbq->qual, "trans splicing") == 0) {
5410 sfp->excpt = TRUE;
5411 if (HasNoText (gbq->val)) {
5412 if (sfp->except_text == NULL) {
5413 sfp->except_text = StringSaveNoNull ("trans-splicing");
5414 }
5415 }
5416 } else if (StringICmp (gbq->qual, "artificial_location") == 0 ||
5417 StringICmp (gbq->qual, "artificial-location") == 0 ||
5418 StringICmp (gbq->qual, "artificial location") == 0) {
5419 sfp->excpt = TRUE;
5420 if (HasNoText (gbq->val)) {
5421 if (sfp->except_text == NULL) {
5422 sfp->except_text = StringSaveNoNull ("artificial location");
5423 }
5424 }
5425 } else if (StringICmp (gbq->qual, "gene") == 0 && (! StringHasNoText (gbq->val))) {
5426 grp = GeneRefNew ();
5427 grp->locus = StringSave (gbq->val);
5428 xref = SeqFeatXrefNew ();
5429 xref->data.choice = SEQFEAT_GENE;
5430 xref->data.value.ptrvalue = (Pointer) grp;
5431 xref->specialCleanupFlag = TRUE; /* flag to test for overlapping gene later */
5432 xref->next = sfp->xref;
5433 sfp->xref = xref;
5434 } else if (sfp->data.choice != SEQFEAT_CDREGION && StringICmp (gbq->qual, "codon_start") == 0) {
5435 /* not legal on anything but CDS, so remove it */
5436 } else if (StringICmp (gbq->qual, "experiment") == 0 &&
5437 StringICmp (gbq->val, "experimental evidence, no additional details recorded") == 0) {
5438 /* remove default experiment string if instantiated */
5439 } else if (StringICmp (gbq->qual, "inference") == 0) {
5440 if (StringICmp (gbq->val, "non-experimental evidence, no additional details recorded") == 0) {
5441 /* remove default inference string if instantiated */
5442 } else {
5443 CleanupInference (gbq);
5444 RepairInference (gbq);
5445 unlink = FALSE;
5446 }
5447 } else if (StringICmp (gbq->qual, "transposon") == 0) {
5448 if (StringICmp (gbq->val, "class I integron") == 0 ||
5449 StringICmp (gbq->val, "class II integron") == 0 ||
5450 StringICmp (gbq->val, "class III integron") == 0 ||
5451 StringICmp (gbq->val, "class 1 integron") == 0 ||
5452 StringICmp (gbq->val, "class 2 integron") == 0 ||
5453 StringICmp (gbq->val, "class 3 integron") == 0) {
5454 len = StringLen ("integron") + StringLen (gbq->val) + 5;
5455 str = MemNew (sizeof (Char) * len);
5456 StringCpy (str, "integron");
5457 StringCat (str, ":");
5458 ptr = StringStr (gbq->val, " integron");
5459 if (ptr != NULL) {
5460 *ptr = '\0';
5461 }
5462 StringCat (str, gbq->val);
5463 gbq->val = MemFree (gbq->val);
5464 gbq->val = str;
5465 gbq->qual = MemFree (gbq->qual);
5466 gbq->qual = StringSave ("mobile_element");
5467 unlink = FALSE;
5468 } else {
5469 len = StringLen ("transposon") + StringLen (gbq->val) + 5;
5470 str = MemNew (sizeof (Char) * len);
5471 StringCpy (str, "transposon");
5472 StringCat (str, ":");
5473 StringCat (str, gbq->val);
5474 gbq->val = MemFree (gbq->val);
5475 gbq->val = str;
5476 gbq->qual = MemFree (gbq->qual);
5477 gbq->qual = StringSave ("mobile_element");
5478 unlink = FALSE;
5479 }
5480 } else if (StringICmp (gbq->qual, "insertion_seq") == 0) {
5481 len = StringLen ("insertion sequence") + StringLen (gbq->val) + 5;
5482 str = MemNew (sizeof (Char) * len);
5483 StringCpy (str, "insertion sequence");
5484 StringCat (str, ":");
5485 StringCat (str, gbq->val);
5486 gbq->val = MemFree (gbq->val);
5487 gbq->val = str;
5488 gbq->qual = MemFree (gbq->qual);
5489 gbq->qual = StringSave ("mobile_element");
5490 unlink = FALSE;
5491 } else if (StringCmp (gbq->qual, "satellite") == 0) {
5492 MendSatelliteQualifier(&(gbq->val));
5493 unlink = FALSE;
5494 } else {
5495 unlink = FALSE;
5496 }
5497
5498 if (StringICmp (gbq->qual, "mobile_element") == 0) {
5499 if (sfp->data.choice == SEQFEAT_IMP) {
5500 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5501 if (ifp != NULL) {
5502 if (StringICmp (ifp->key, "repeat_region") == 0 && gbq->val != NULL) {
5503 gbq->qual = MemFree (gbq->qual);
5504 gbq->qual = StringSave ("mobile_element_type");
5505 ifp->key = MemFree (ifp->key);
5506 ifp->key = StringSave ("mobile_element");
5507 sfp->idx.subtype = FEATDEF_mobile_element;
5508 }
5509 }
5510 }
5511 }
5512 if (StringICmp (gbq->qual, "mobile_element") == 0) {
5513 gbq->qual = MemFree (gbq->qual);
5514 gbq->qual = StringSave ("mobile_element_type");
5515 }
5516 if (StringICmp (gbq->qual, "mobile_element_type") == 0) {
5517 if (StringStr (gbq->val, " :") != NULL || StringStr (gbq->val, ": ") != NULL) {
5518 len = StringLen (gbq->val) + 5;
5519 ptr = StringChr (gbq->val, ':');
5520 if (ptr != NULL) {
5521 *ptr = '\0';
5522 ptr++;
5523 TrimSpacesAroundString (gbq->val);
5524 TrimSpacesAroundString (ptr);
5525 str = MemNew (sizeof (Char) * len);
5526 StringCpy (str, gbq->val);
5527 StringCat (str, ":");
5528 StringCat (str, ptr);
5529 gbq->val = MemFree (gbq->val);
5530 gbq->val = str;
5531 }
5532 }
5533 }
5534
5535 if (StringICmp (gbq->qual, "estimated_length") == 0) {
5536 all_digits = TRUE;
5537 ptr = gbq->val;
5538 if (ptr != NULL) {
5539 ch = *ptr;
5540 while (ch != '\0') {
5541 if (! IS_DIGIT (ch)) {
5542 all_digits = FALSE;
5543 }
5544 ptr++;
5545 ch = *ptr;
5546 }
5547 }
5548 if (! all_digits) {
5549 if (StringICmp (gbq->val, "unknown") != 0) {
5550 MemFree (gbq->val);
5551 gbq->val = StringSave ("unknown");
5552 }
5553 }
5554 }
5555
5556 if (sfp->data.choice == SEQFEAT_IMP) {
5557 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5558 if (ifp != NULL) {
5559 if (StringICmp (ifp->key, "conflict") == 0 ) {
5560 ifp->key = MemFree (ifp->key);
5561 ifp->key = StringSave ("misc_difference");
5562 sfp->idx.subtype = FEATDEF_misc_difference;
5563 len = StringLen (sfp->comment) + StringLen ("conflict") + 5;
5564 str = MemNew (sizeof (Char) * len);
5565 if (sfp->comment == NULL) {
5566 StringCpy (str, "conflict");
5567 sfp->comment = str;
5568 } else {
5569 StringCpy (str, "conflict; ");
5570 StringCat (str, sfp->comment);
5571 sfp->comment = MemFree (sfp->comment);
5572 sfp->comment = str;
5573 }
5574 }
5575 }
5576 }
5577
5578 if (rpt_unit_seq != NULL) {
5579 CleanupRptUnit (rpt_unit_seq);
5580 }
5581 if (rpt_unit_range != NULL) {
5582 CleanupRptUnit (rpt_unit_range);
5583 }
5584
5585 if (StringHasNoText (gbq->qual) && StringHasNoText (gbq->val)) {
5586 unlink = TRUE;
5587 }
5588
5589 if (unlink) {
5590 *(prevqual) = gbq->next;
5591 gbq->next = NULL;
5592 gbq->qual = MemFree (gbq->qual);
5593 gbq->val = MemFree (gbq->val);
5594 GBQualFree (gbq);
5595 } else {
5596 prevqual = (GBQualPtr PNTR) &(gbq->next);
5597 }
5598 gbq = nextqual;
5599 }
5600 }
5601
SortByGBQualKeyAndVal(VoidPtr ptr1,VoidPtr ptr2)5602 static int LIBCALLBACK SortByGBQualKeyAndVal (VoidPtr ptr1, VoidPtr ptr2)
5603
5604 {
5605 int compare;
5606 GBQualPtr gbq1;
5607 GBQualPtr gbq2;
5608 CharPtr str1;
5609 CharPtr str2;
5610
5611 if (ptr1 == NULL || ptr2 == NULL) return 0;
5612 gbq1 = *((GBQualPtr PNTR) ptr1);
5613 gbq2 = *((GBQualPtr PNTR) ptr2);
5614 if (gbq1 == NULL || gbq2 == NULL) return 0;
5615 str1 = (CharPtr) gbq1->qual;
5616 str2 = (CharPtr) gbq2->qual;
5617 if (str1 == NULL || str2 == NULL) return 0;
5618 compare = StringICmp (str1, str2);
5619 if (compare != 0) return compare;
5620 str1 = (CharPtr) gbq1->val;
5621 str2 = (CharPtr) gbq2->val;
5622 if (str1 == NULL || str2 == NULL) return 0;
5623 compare = StringICmp (str1, str2);
5624 return compare;
5625 }
5626
GBQualsAlreadyInOrder(GBQualPtr list)5627 static Boolean GBQualsAlreadyInOrder (GBQualPtr list)
5628
5629 {
5630 int compare;
5631 GBQualPtr curr;
5632 GBQualPtr next;
5633
5634 if (list == NULL || list->next == NULL) return TRUE;
5635 curr = list;
5636 next = curr->next;
5637 while (next != NULL) {
5638 compare = StringICmp (curr->qual, next->qual);
5639 if (compare > 0) return FALSE;
5640 if (compare == 0) {
5641 compare = StringICmp (curr->val, next->val);
5642 if (compare > 0) return FALSE;
5643 }
5644 curr = next;
5645 next = curr->next;
5646 }
5647 return TRUE;
5648 }
5649
SortFeatureGBQuals(GBQualPtr list)5650 NLM_EXTERN GBQualPtr SortFeatureGBQuals (GBQualPtr list)
5651
5652 {
5653 size_t count, i;
5654 GBQualPtr gbq, PNTR head;
5655
5656 if (list == NULL) return NULL;
5657 if (GBQualsAlreadyInOrder (list)) return list;
5658
5659 for (gbq = list, count = 0; gbq != NULL; gbq = gbq->next, count++) continue;
5660 head = MemNew (sizeof (GBQualPtr) * (count + 1));
5661
5662 for (gbq = list, i = 0; gbq != NULL && i < count; i++) {
5663 head [i] = gbq;
5664 gbq = gbq->next;
5665 }
5666
5667 StableMergeSort (head, count, sizeof (GBQualPtr), SortByGBQualKeyAndVal);
5668
5669 for (i = 0; i < count; i++) {
5670 gbq = head [i];
5671 gbq->next = head [i + 1];
5672 }
5673
5674 list = head [0];
5675 MemFree (head);
5676
5677 return list;
5678 }
5679
CleanupDuplicateGBQuals(GBQualPtr PNTR prevgbq)5680 NLM_EXTERN void CleanupDuplicateGBQuals (GBQualPtr PNTR prevgbq)
5681
5682 {
5683 GBQualPtr gbq;
5684 GBQualPtr last = NULL;
5685 GBQualPtr next;
5686 Boolean unlink;
5687
5688 if (prevgbq == NULL) return;
5689 gbq = *prevgbq;
5690 while (gbq != NULL) {
5691 next = gbq->next;
5692 unlink = FALSE;
5693 if (last != NULL) {
5694 if (StringICmp (last->qual, gbq->qual) == 0 &&
5695 StringICmp (last->val, gbq->val) == 0) {
5696 unlink = TRUE;
5697 }
5698 } else {
5699 last = gbq;
5700 }
5701 if (unlink) {
5702 *prevgbq = gbq->next;
5703 gbq->next = NULL;
5704 GBQualFree (gbq);
5705 } else {
5706 last = gbq;
5707 prevgbq = (GBQualPtr PNTR) &(gbq->next);
5708 }
5709 gbq = next;
5710 }
5711 }
5712
5713 /* this identifies gbquals that should have been placed into special fields */
5714
5715 #define NUM_ILLEGAL_QUALS 14
5716
5717 /* StringICmp use of TO_UPPER means translation should go before transl_XXX */
5718
5719 static CharPtr illegalGbqualList [NUM_ILLEGAL_QUALS] = {
5720 "anticodon",
5721 "citation",
5722 "codon_start",
5723 "db_xref",
5724 "evidence",
5725 "exception",
5726 "gene",
5727 "note",
5728 "protein_id",
5729 "pseudo",
5730 "transcript_id",
5731 "translation",
5732 "transl_except",
5733 "transl_table",
5734 };
5735
QualifierIsIllegal(CharPtr qualname)5736 static Int2 QualifierIsIllegal (CharPtr qualname)
5737
5738 {
5739 Int2 L, R, mid;
5740
5741 if (qualname == NULL || *qualname == '\0') return FALSE;
5742
5743 L = 0;
5744 R = NUM_ILLEGAL_QUALS - 1;
5745
5746 while (L < R) {
5747 mid = (L + R) / 2;
5748 if (StringICmp (illegalGbqualList [mid], qualname) < 0) {
5749 L = mid + 1;
5750 } else {
5751 R = mid;
5752 }
5753 }
5754
5755 if (StringICmp (illegalGbqualList [R], qualname) == 0) {
5756 return TRUE;
5757 }
5758
5759 return FALSE;
5760 }
5761
GbqualLink(GBQualPtr PNTR head,GBQualPtr qual)5762 static void GbqualLink (GBQualPtr PNTR head, GBQualPtr qual)
5763
5764 {
5765 GBQualPtr gbq;
5766
5767 if (head == NULL || qual == NULL) return;
5768 gbq = *head;
5769 if (gbq != NULL) {
5770 while (gbq->next != NULL) {
5771 gbq = gbq->next;
5772 }
5773 gbq->next = qual;
5774 } else {
5775 *head = qual;
5776 }
5777 }
5778
SortIllegalGBQuals(GBQualPtr list)5779 static GBQualPtr SortIllegalGBQuals (GBQualPtr list)
5780
5781 {
5782 GBQualPtr gbq, next, legal = NULL, illegal = NULL;
5783
5784 gbq = list;
5785 while (gbq != NULL) {
5786 next = gbq->next;
5787 gbq->next = NULL;
5788 if (QualifierIsIllegal (gbq->qual)) {
5789 GbqualLink (&illegal, gbq);
5790 } else {
5791 GbqualLink (&legal, gbq);
5792 }
5793 gbq = next;
5794 }
5795 GbqualLink (&legal, illegal);
5796 return legal;
5797 }
5798
IsSubString(CharPtr str1,CharPtr str2)5799 static Boolean IsSubString (CharPtr str1, CharPtr str2)
5800
5801 {
5802 Char ch;
5803 size_t len1, len2;
5804
5805 len1 = StringLen (str1);
5806 len2 = StringLen (str2);
5807 if (len1 >= len2) return FALSE;
5808 if (StringNICmp (str1, str2, len1) != 0) return FALSE;
5809 ch = str2 [len1];
5810 if (IS_ALPHANUM (ch)) return FALSE;
5811 return TRUE;
5812 }
5813
SortByOrgModSubtype(VoidPtr ptr1,VoidPtr ptr2)5814 static int LIBCALLBACK SortByOrgModSubtype (VoidPtr ptr1, VoidPtr ptr2)
5815
5816 {
5817 int compare;
5818 OrgModPtr omp1;
5819 OrgModPtr omp2;
5820 CharPtr str1;
5821 CharPtr str2;
5822
5823 if (ptr1 == NULL || ptr2 == NULL) return 0;
5824 omp1 = *((OrgModPtr PNTR) ptr1);
5825 omp2 = *((OrgModPtr PNTR) ptr2);
5826 if (omp1 == NULL || omp2 == NULL) return 0;
5827 if (omp1->subtype > omp2->subtype) {
5828 return 1;
5829 } else if (omp1->subtype < omp2->subtype) {
5830 return -1;
5831 }
5832 str1 = (CharPtr) omp1->subname;
5833 str2 = (CharPtr) omp2->subname;
5834 if (str1 == NULL || str2 == NULL) return 0;
5835 compare = StringICmp (str1, str2);
5836 return compare;
5837 }
5838
OrgModsAlreadyInOrder(OrgModPtr list)5839 static Boolean OrgModsAlreadyInOrder (OrgModPtr list)
5840
5841 {
5842 int compare;
5843 OrgModPtr curr;
5844 OrgModPtr next;
5845 CharPtr str1;
5846 CharPtr str2;
5847
5848 if (list == NULL || list->next == NULL) return TRUE;
5849 curr = list;
5850 next = curr->next;
5851 while (next != NULL) {
5852 if (curr->subtype > next->subtype) return FALSE;
5853 str1 = (CharPtr) curr->subname;
5854 str2 = (CharPtr) next->subname;
5855 compare = StringICmp (str1, str2);
5856 if (compare > 0) return FALSE;
5857 curr = next;
5858 next = curr->next;
5859 }
5860 return TRUE;
5861 }
5862
SortOrgModList(OrgModPtr list)5863 static OrgModPtr SortOrgModList (OrgModPtr list)
5864
5865 {
5866 size_t count, i;
5867 OrgModPtr omp, PNTR head;
5868
5869 if (list == NULL) return NULL;
5870 if (OrgModsAlreadyInOrder (list)) return list;
5871
5872 for (omp = list, count = 0; omp != NULL; omp = omp->next, count++) continue;
5873 head = MemNew (sizeof (OrgModPtr) * (count + 1));
5874
5875 for (omp = list, i = 0; omp != NULL && i < count; i++) {
5876 head [i] = omp;
5877 omp = omp->next;
5878 }
5879
5880 StableMergeSort (head, count, sizeof (OrgModPtr), SortByOrgModSubtype);
5881
5882 for (i = 0; i < count; i++) {
5883 omp = head [i];
5884 omp->next = head [i + 1];
5885 }
5886
5887 list = head [0];
5888 MemFree (head);
5889
5890 return list;
5891 }
5892
5893
RemoveSpaceBeforeAndAfterColon(CharPtr str)5894 static void RemoveSpaceBeforeAndAfterColon (CharPtr str)
5895 {
5896 CharPtr pColon, cp, src, dst;
5897
5898 if (StringHasNoText (str)) {
5899 return;
5900 }
5901
5902 pColon = StringChr (str, ':');
5903 while (pColon != NULL) {
5904 cp = pColon - 1;
5905 while (cp > str && isspace (*cp)) {
5906 cp--;
5907 }
5908 if (cp < str || !isspace (*cp)) {
5909 cp++;
5910 }
5911 *cp = ':';
5912 dst = cp + 1;
5913 cp = pColon + 1;
5914 while (isspace (*cp)) {
5915 cp++;
5916 }
5917 src = cp;
5918 pColon = dst - 1;
5919 if (src != dst) {
5920 while (*src != 0) {
5921 *dst = *src;
5922 dst++; src++;
5923 }
5924 *dst = 0;
5925 }
5926 pColon = StringChr (pColon + 1, ':');
5927 }
5928 }
5929
CorrectTildes(CharPtr PNTR str)5930 static void CorrectTildes (
5931 CharPtr PNTR str
5932 )
5933
5934 {
5935 #ifndef OS_MSWIN
5936 FindReplaceString (str, "were ~25 cm in height (~3 weeks)", "were ~~25 cm in height (~~3 weeks)", FALSE, FALSE);
5937 FindReplaceString (str, "generally ~3 weeks", "generally ~~3 weeks", FALSE, FALSE);
5938 FindReplaceString (str, "sequencing (~4 96-well plates)", "sequencing (~~4 96-well plates)", FALSE, FALSE);
5939 FindReplaceString (str, "size distribution (~2 kb)", "size distribution (~~2 kb)", FALSE, FALSE);
5940 FindReplaceString (str, "sequencing (~3 96-well plates)", "sequencing (~~3 96-well plates)", FALSE, FALSE);
5941 FindReplaceString (str, "vector. 1~2 ul of ligated", "vector. 1~~2 ul of ligated", FALSE, FALSE);
5942 /*
5943 FindReplaceString (str, "Lambda FLC I.~Islet cells were provided", "Lambda FLC I.~~Islet cells were provided", FALSE, FALSE);
5944 */
5945 FindReplaceString (str, "different strains~of mice", "different strains of mice", FALSE, FALSE);
5946 FindReplaceString (str, "oligo-dT-NotI primer~(5'-biotin", "oligo-dT-NotI primer (5'-biotin", FALSE, FALSE);
5947 FindReplaceString (str, "sizes of 200~800 bp were purified", "sizes of 200~~800 bp were purified", FALSE, FALSE);
5948 FindReplaceString (str, "Tween 20 (~50 ml per tree)", "Tween 20 (~~50 ml per tree)", FALSE, FALSE);
5949 FindReplaceString (str, "the SMART approach (~http://www.evrogen.com", "the SMART approach (http://www.evrogen.com", FALSE, FALSE);
5950 FindReplaceString (str, "the morning (~10 am) with", "the morning (~~10 am) with", FALSE, FALSE);
5951 FindReplaceString (str, "(host) sequences (~10%)", "(host) sequences (~~10%)", FALSE, FALSE);
5952 /*
5953 FindReplaceString (str, "unidirectionally.~ High quality", "unidirectionally. High quality", FALSE, FALSE);
5954 FindReplaceString (str, "onlysubmitted.~ Average", "onlysubmitted. Average", FALSE, FALSE);
5955 */
5956 FindReplaceString (str, "Plasmid; ~The F03-1270", "Plasmid; The F03-1270", FALSE, FALSE);
5957 FindReplaceString (str, "using STS-PCR~from Eb", "using STS-PCR from Eb", FALSE, FALSE);
5958 FindReplaceString (str, "specific to~the Eb", "specific to the Eb", FALSE, FALSE);
5959 FindReplaceString (str, "side of insert); , M.F., Lennon", "side of insert); Bonaldo, M.F., Lennon", FALSE, FALSE);
5960 FindReplaceString (str, "Uni-ZAP XR vector. 1~2 ul of", "Uni-ZAP XR vector. 1~~2 ul of", FALSE, FALSE);
5961 FindReplaceString (str, "from diploid~Secale montanum", "from diploid Secale montanum", FALSE, FALSE);
5962 FindReplaceString (str, "homology with~U43516,", "homology with U43516,", FALSE, FALSE);
5963 /*
5964 FindReplaceString (str, "from http//www.biobase.dk/~ddbase", "from http//www.biobase.dk/~~ddbase", FALSE, FALSE);
5965 */
5966 FindReplaceString (str, "plasmid; ~Assembled EST", "plasmid; Assembled EST", FALSE, FALSE);
5967 FindReplaceString (str, "databases.~Different cDNA", "databases. Different cDNA", FALSE, FALSE);
5968 FindReplaceString (str, "enzyme PstI.~DH5-alpha", "enzyme PstI. DH5-alpha", FALSE, FALSE);
5969 FindReplaceString (str, "as they~were prepared", "as they were prepared", FALSE, FALSE);
5970 FindReplaceString (str, "loci in~the genome", "loci in the genome", FALSE, FALSE);
5971 FindReplaceString (str, "P{CaSpeR}Cp1~50C (FBti0004219)", "P{CaSpeR}Cp1~~50C (FBti0004219)", FALSE, FALSE);
5972 FindReplaceString (str, "seedlings with 2~4 leaves", "seedlings with 2~~4 leaves", FALSE, FALSE);
5973 FindReplaceString (str, "tween 20 (~50mLs per tree)", "tween 20 (~~50mLs per tree)", FALSE, FALSE);
5974 #endif
5975 }
5976
FixStrainForPrefix(OrgModPtr omp)5977 static void FixStrainForPrefix (OrgModPtr omp)
5978
5979 {
5980 Char ch;
5981 CharPtr cpy;
5982 ValNodePtr head = NULL;
5983 size_t len;
5984 CharPtr pfx;
5985 CharPtr sfx;
5986 CharPtr str;
5987 CharPtr tmp;
5988 ValNodePtr vnp;
5989
5990 if (omp == NULL || omp->subtype != ORGMOD_strain) return;
5991 str = omp->subname;
5992 if (StringHasNoText (str)) return;
5993
5994 head = SplitStringAtSemicolon (str);
5995 if (head == NULL) return;
5996
5997 for (vnp = head; vnp != NULL; vnp = vnp->next) {
5998 str = (CharPtr) vnp->data.ptrvalue;
5999 if (StringHasNoText (str)) continue;
6000 TrimSpacesAroundString (str);
6001
6002 pfx = NULL;
6003 sfx = NULL;
6004 if (StringNICmp (str, "ATCC", 4) == 0) {
6005 pfx = "ATCC";
6006 sfx = str + 4;
6007 } else if (StringNICmp (str, "DSM", 3) == 0) {
6008 pfx = "DSM";
6009 sfx = str + 3;
6010 }
6011 if (pfx == NULL || sfx == NULL) continue;
6012
6013 ch = *sfx;
6014 if (ch == ':' || ch == '/') {
6015 sfx++;
6016 }
6017 cpy = StringSave (sfx);
6018 TrimSpacesAroundString(cpy);
6019 if (! StringIsAllDigits (cpy)) {
6020 cpy = MemFree (cpy);
6021 continue;
6022 }
6023
6024 len = StringLen (pfx) + StringLen (cpy) + 3;
6025 tmp = (CharPtr) MemNew (len);
6026 if (tmp == NULL) continue;
6027 StringCpy (tmp, pfx);
6028 StringCat (tmp, " ");
6029 StringCat (tmp, cpy);
6030 vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
6031 vnp->data.ptrvalue = tmp;
6032 cpy = MemFree (cpy);
6033 }
6034
6035 tmp = ValNodeMergeStrsEx (head, "; ");
6036 if (tmp == NULL) return;
6037
6038 omp->subname = MemFree (omp->subname);
6039 omp->subname = tmp;
6040 }
6041
CleanOrgModListEx(OrgModPtr PNTR ompp,CharPtr orpcommon)6042 static void CleanOrgModListEx (OrgModPtr PNTR ompp, CharPtr orpcommon)
6043
6044 {
6045 Char ch;
6046 OrgModPtr last = NULL;
6047 OrgModPtr next;
6048 OrgModPtr omp;
6049 OrgModPtr omp_anamorph, omp_gb_anamorph, omp_other;
6050 OrgModPtr PNTR prev;
6051 CharPtr ptr;
6052 Boolean redund;
6053 CharPtr str;
6054 CharPtr tmp;
6055 Boolean unlink;
6056
6057 if (ompp == NULL) return;
6058 prev = ompp;
6059 omp = *ompp;
6060 while (omp != NULL) {
6061 next = omp->next;
6062 unlink= FALSE;
6063 CleanVisStringAndCompress (&(omp->subname));
6064 TrimSpacesAndJunkFromEnds (omp->subname, FALSE);
6065 RemoveFlankingQuotes (&(omp->subname));
6066 CleanVisStringAndCompress (&(omp->attrib));
6067 if (omp->subtype == ORGMOD_other && StringDoesHaveText (omp->subname)) {
6068 CorrectTildes (&(omp->subname));
6069 }
6070 if (omp->subtype == ORGMOD_common && StringICmp (omp->subname, orpcommon) == 0) {
6071 /*
6072 unlink = TRUE;
6073 */
6074 } else if (last != NULL) {
6075 if (HasNoText (omp->subname)) {
6076 unlink = TRUE;
6077 } else if ((last->subtype == omp->subtype &&
6078 StringICmp (last->subname, omp->subname) == 0) ||
6079 (last->subtype == omp->subtype &&
6080 last->subtype == ORGMOD_other &&
6081 StringStr (last->subname, omp->subname) != NULL)) {
6082 unlink = TRUE;
6083 } else if (last->subtype == omp->subtype &&
6084 last->subtype == ORGMOD_other &&
6085 IsSubString (last->subname, omp->subname)) {
6086 last->subname = MemFree (last->subname);
6087 last->subname = omp->subname;
6088 omp->subname = NULL;
6089 unlink = TRUE;
6090 }
6091 } else if (HasNoText (omp->subname) ||
6092 StringCmp (omp->subname, ")") == 0 ||
6093 StringCmp (omp->subname, "(") == 0) {
6094 unlink = TRUE;
6095 } else {
6096 last = omp;
6097 }
6098 if (unlink) {
6099 *prev = omp->next;
6100 omp->next = NULL;
6101 OrgModFree (omp);
6102 } else {
6103 last = omp;
6104 prev = &(omp->next);
6105 }
6106 omp = next;
6107 }
6108
6109
6110 for (omp = *ompp; omp != NULL; omp = omp->next) {
6111 if (omp->subtype != ORGMOD_specimen_voucher &&
6112 omp->subtype != ORGMOD_culture_collection &&
6113 omp->subtype != ORGMOD_bio_material) continue;
6114 if (StringHasNoText (omp->subname)) continue;
6115 RemoveSpaceBeforeAndAfterColon (omp->subname);
6116 ptr = StringStr (omp->subname, "::");
6117 if (ptr == NULL) continue;
6118 ptr++;
6119 tmp = ptr;
6120 tmp++;
6121 ch = *tmp;
6122 while (ch != '\0') {
6123 *ptr = ch;
6124 ptr++;
6125 tmp++;
6126 ch = *tmp;
6127 }
6128 *ptr = '\0';
6129 }
6130
6131 omp_anamorph = NULL;
6132 omp_gb_anamorph = NULL;
6133 omp_other = NULL;
6134 redund = FALSE;
6135
6136 for (omp = *ompp; omp != NULL; omp = omp->next) {
6137 if (omp->subtype == ORGMOD_anamorph) {
6138 omp_anamorph = omp;
6139 } else if (omp->subtype == ORGMOD_gb_anamorph) {
6140 omp_gb_anamorph = omp;
6141 } else if (omp->subtype == ORGMOD_other) {
6142 omp_other = omp;
6143 } else if (omp->subtype == ORGMOD_nat_host) {
6144 if (StringICmp (omp->subname, "human") == 0) {
6145 omp->subname = MemFree (omp->subname);
6146 omp->subname = StringSave ("Homo sapiens");
6147 }
6148 } else if (omp->subtype == ORGMOD_strain) {
6149 FixStrainForPrefix (omp);
6150 }
6151 }
6152 if (omp_other != NULL && StringNICmp (omp_other->subname, "anamorph:", 9) == 0) {
6153 ptr = omp_other->subname + 9;
6154 ch = *ptr;
6155 while (ch == ' ') {
6156 ptr++;
6157 ch = *ptr;
6158 }
6159 if (omp_anamorph != NULL) {
6160 str = omp_anamorph->subname;
6161 if (StringCmp (ptr, str) == 0) {
6162 redund = TRUE;
6163 }
6164 } else if (omp_gb_anamorph != NULL) {
6165 str = omp_gb_anamorph->subname;
6166 if (StringCmp (ptr, str) == 0) {
6167 redund = TRUE;
6168 }
6169 }
6170 }
6171 if (redund) {
6172 prev = ompp;
6173 omp = *ompp;
6174 while (omp != NULL) {
6175 next = omp->next;
6176 unlink= FALSE;
6177 if (omp == omp_other) {
6178 unlink= TRUE;
6179 }
6180 if (unlink) {
6181 *prev = omp->next;
6182 omp->next = NULL;
6183 OrgModFree (omp);
6184 } else {
6185 prev = &(omp->next);
6186 }
6187 omp = next;
6188 }
6189 }
6190 }
6191
CleanOrgModList(OrgModPtr PNTR ompp)6192 NLM_EXTERN void CleanOrgModList (OrgModPtr PNTR ompp)
6193
6194 {
6195 CleanOrgModListEx (ompp, NULL);
6196 }
6197
IsNoNameSubSource(SubSourcePtr ssp)6198 static Boolean IsNoNameSubSource (SubSourcePtr ssp)
6199
6200 {
6201 if (ssp == NULL) return FALSE;
6202
6203 return (Boolean) (ssp->subtype == SUBSRC_germline ||
6204 ssp->subtype == SUBSRC_rearranged ||
6205 ssp->subtype == SUBSRC_transgenic ||
6206 ssp->subtype == SUBSRC_environmental_sample ||
6207 ssp->subtype == SUBSRC_metagenomic);
6208 }
6209
SortBySubSourceSubtype(VoidPtr ptr1,VoidPtr ptr2)6210 static int LIBCALLBACK SortBySubSourceSubtype (VoidPtr ptr1, VoidPtr ptr2)
6211
6212 {
6213 int compare;
6214 SubSourcePtr ssp1;
6215 SubSourcePtr ssp2;
6216 CharPtr str1;
6217 CharPtr str2;
6218
6219 if (ptr1 == NULL || ptr2 == NULL) return 0;
6220 ssp1 = *((SubSourcePtr PNTR) ptr1);
6221 ssp2 = *((SubSourcePtr PNTR) ptr2);
6222 if (ssp1 == NULL || ssp2 == NULL) return 0;
6223 if (ssp1->subtype > ssp2->subtype) {
6224 return 1;
6225 } else if (ssp1->subtype < ssp2->subtype) {
6226 return -1;
6227 }
6228 if (IsNoNameSubSource (ssp1)) return 0;
6229 str1 = (CharPtr) ssp1->name;
6230 str2 = (CharPtr) ssp2->name;
6231 if (str1 == NULL || str2 == NULL) return 0;
6232 compare = StringICmp (str1, str2);
6233 return compare;
6234 }
6235
SubSourceAlreadyInOrder(SubSourcePtr list)6236 static Boolean SubSourceAlreadyInOrder (SubSourcePtr list)
6237
6238 {
6239 int compare;
6240 SubSourcePtr curr;
6241 SubSourcePtr next;
6242 CharPtr str1;
6243 CharPtr str2;
6244
6245 if (list == NULL || list->next == NULL) return TRUE;
6246 curr = list;
6247 next = curr->next;
6248 while (next != NULL) {
6249 if (curr->subtype > next->subtype) return FALSE;
6250 if (curr->subtype == next->subtype) {
6251 if (! IsNoNameSubSource (curr)) {
6252 str1 = (CharPtr) curr->name;
6253 str2 = (CharPtr) next->name;
6254 compare = StringICmp (str1, str2);
6255 if (compare > 0) return FALSE;
6256 }
6257 }
6258 curr = next;
6259 next = curr->next;
6260 }
6261 return TRUE;
6262 }
6263
SortSubSourceList(SubSourcePtr list)6264 static SubSourcePtr SortSubSourceList (SubSourcePtr list)
6265
6266 {
6267 size_t count, i;
6268 SubSourcePtr ssp, PNTR head;
6269
6270 if (list == NULL) return NULL;
6271 if (SubSourceAlreadyInOrder (list)) return list;
6272
6273 for (ssp = list, count = 0; ssp != NULL; ssp = ssp->next, count++) continue;
6274 head = MemNew (sizeof (SubSourcePtr) * (count + 1));
6275
6276 for (ssp = list, i = 0; ssp != NULL && i < count; i++) {
6277 head [i] = ssp;
6278 ssp = ssp->next;
6279 }
6280
6281 StableMergeSort (head, count, sizeof (SubSourcePtr), SortBySubSourceSubtype);
6282
6283 for (i = 0; i < count; i++) {
6284 ssp = head [i];
6285 ssp->next = head [i + 1];
6286 }
6287
6288 list = head [0];
6289 MemFree (head);
6290
6291 return list;
6292 }
6293
6294 //LCOV_EXCL_START
TrimParenthesesAndCommasAroundString(CharPtr str)6295 static CharPtr TrimParenthesesAndCommasAroundString (CharPtr str)
6296
6297 {
6298 Uchar ch; /* to use 8bit characters in multibyte languages */
6299 CharPtr dst;
6300 CharPtr ptr;
6301
6302 if (str != NULL && str [0] != '\0') {
6303 dst = str;
6304 ptr = str;
6305 ch = *ptr;
6306 while (ch != '\0' && (ch < ' ' || ch == '(' || ch == ',')) {
6307 ptr++;
6308 ch = *ptr;
6309 }
6310 while (ch != '\0') {
6311 *dst = ch;
6312 dst++;
6313 ptr++;
6314 ch = *ptr;
6315 }
6316 *dst = '\0';
6317 dst = NULL;
6318 ptr = str;
6319 ch = *ptr;
6320 while (ch != '\0') {
6321 if (ch != ')' && ch != ',') {
6322 dst = NULL;
6323 } else if (dst == NULL) {
6324 dst = ptr;
6325 }
6326 ptr++;
6327 ch = *ptr;
6328 }
6329 if (dst != NULL) {
6330 *dst = '\0';
6331 }
6332 }
6333 return str;
6334 }
6335
CombineSplitQual(CharPtr origval,CharPtr newval)6336 static CharPtr CombineSplitQual (CharPtr origval, CharPtr newval)
6337
6338 {
6339 size_t len;
6340 CharPtr str = NULL;
6341
6342 if (StringStr (origval, newval) != NULL) return origval;
6343 len = StringLen (origval) + StringLen (newval) + 5;
6344 str = MemNew (sizeof (Char) * len);
6345 if (str == NULL) return origval;
6346 TrimParenthesesAndCommasAroundString (origval);
6347 TrimParenthesesAndCommasAroundString (newval);
6348 StringCpy (str, "(");
6349 StringCat (str, origval);
6350 StringCat (str, ",");
6351 StringCat (str, newval);
6352 StringCat (str, ")");
6353 /* free original string, knowing return value will replace it */
6354 MemFree (origval);
6355 return str;
6356 }
6357 //LCOV_EXCL_STOP
6358
LocationForPlastidText(CharPtr plastid_name)6359 static Uint1 LocationForPlastidText (CharPtr plastid_name)
6360 {
6361 if (StringICmp (plastid_name, "chloroplast") == 0) {
6362 return GENOME_chloroplast;
6363 } else if (StringICmp (plastid_name, "chromoplast") == 0) {
6364 return GENOME_chromoplast;
6365 } else if (StringICmp (plastid_name, "kinetoplast") == 0) {
6366 return GENOME_kinetoplast;
6367 } else if (StringICmp (plastid_name, "plastid") == 0) {
6368 return GENOME_plastid;
6369 } else if (StringICmp (plastid_name, "apicoplast") == 0) {
6370 return GENOME_apicoplast;
6371 } else if (StringICmp (plastid_name, "leucoplast") == 0) {
6372 return GENOME_leucoplast;
6373 } else if (StringICmp (plastid_name, "proplastid") == 0) {
6374 return GENOME_proplastid;
6375 } else if (StringICmp (plastid_name, "chromatophore") == 0) {
6376 return GENOME_chromatophore;
6377 } else {
6378 return 0;
6379 }
6380 }
6381
6382 //LCOV_EXCL_START
StringToLower(CharPtr str)6383 NLM_EXTERN void StringToLower (CharPtr str)
6384
6385 {
6386 Char ch;
6387
6388 if (str == NULL) return;
6389 ch = *str;
6390 while (ch != '\0') {
6391 *str = TO_LOWER (ch);
6392 str++;
6393 ch = *str;
6394 }
6395 }
6396 //LCOV_EXCL_STOP
6397
6398
CleanPCRPrimerSeq(CharPtr seq)6399 static void CleanPCRPrimerSeq (CharPtr seq)
6400 {
6401 CharPtr ptr, src, dst, tmp;
6402 Char ch;
6403 Boolean in_brackets = FALSE;
6404 Int4 i;
6405
6406 if (StringHasNoText (seq)) {
6407 return;
6408 }
6409
6410 /* upper case sequence */
6411 ptr = seq;
6412 ch = *ptr;
6413 while (ch != '\0') {
6414 if (IS_UPPER (ch)) {
6415 *ptr = TO_LOWER (ch);
6416 }
6417 ptr++;
6418 ch = *ptr;
6419 }
6420 /* remove any spaces in sequence outisde of <modified base> */
6421 src = seq;
6422 dst = seq;
6423 ch = *src;
6424 while (ch != '\0') {
6425 if (ch == '<') {
6426 in_brackets = TRUE;
6427 *dst = ch;
6428 dst++;
6429 } else if (ch == '>') {
6430 in_brackets = FALSE;
6431 *dst = ch;
6432 dst++;
6433 } else if (ch != ' ') {
6434 *dst = ch;
6435 dst++;
6436 } else if (in_brackets) {
6437 *dst = ch;
6438 dst++;
6439 }
6440 src++;
6441 ch = *src;
6442 }
6443 *dst = '\0';
6444 /* upper case modified base <OTHER> */
6445 ptr = seq;
6446 tmp = StringStr (ptr, "<other>");
6447 while (tmp != NULL) {
6448 ptr = tmp + 7;
6449 for (i = 1; i < 6; i++) {
6450 ch = tmp [i];
6451 tmp [i] = TO_UPPER (ch);
6452 }
6453 tmp = StringStr (ptr, "<other>");
6454 }
6455 }
6456
6457
CleanupPCRPrimers(PCRPrimerPtr PNTR pppp)6458 static void CleanupPCRPrimers (PCRPrimerPtr PNTR pppp)
6459
6460 {
6461 PCRPrimerPtr next;
6462 PCRPrimerPtr PNTR prev;
6463 PCRPrimerPtr ppp;
6464 PCRPrimerPtr pr1, pr2;
6465
6466 if (pppp == NULL) return;
6467
6468 ppp = *pppp;
6469 while (ppp != NULL) {
6470 CleanVisString (&(ppp->seq));
6471 CleanPCRPrimerSeq (ppp->seq);
6472 CleanVisString (&(ppp->name));
6473 Asn2gnbkCompressSpaces (ppp->name);
6474 StringToLower (ppp->seq);
6475
6476 ppp = ppp->next;
6477 }
6478
6479 ppp = *pppp;
6480 for (pr1 = ppp; pr1 != NULL; pr1 = pr1->next) {
6481 for (pr2 = pr1->next; pr2 != NULL; pr2 = pr2->next) {
6482 if (StringCmp (pr1->seq, pr2->seq) == 0 && StringCmp (pr1->name, pr2->name) == 0) {
6483 pr2->seq = MemFree (pr2->seq);
6484 pr2->name = MemFree (pr2->name);
6485 } else if (StringCmp (pr1->name, pr2->name) == 0) {
6486 if (StringHasNoText (pr1->seq)) {
6487 pr1->seq = MemFree (pr1->seq);
6488 pr1->seq = pr2->seq;
6489 pr2->seq = NULL;
6490 } else if (StringHasNoText (pr2->seq)) {
6491 pr2->seq = MemFree (pr2->seq);
6492 pr2->name = MemFree (pr2->name);
6493 }
6494 }
6495 }
6496 }
6497
6498 prev = pppp;
6499 ppp = *pppp;
6500 while (ppp != NULL) {
6501 next = ppp->next;
6502
6503 CleanVisString (&(ppp->seq));
6504 CleanPCRPrimerSeq (ppp->seq);
6505 CleanVisString (&(ppp->name));
6506
6507 if (ppp->seq == NULL && ppp->name == NULL) {
6508 *prev = next;
6509 ppp->next = NULL;
6510 PCRPrimerFree (ppp);
6511 } else {
6512 StringToLower (ppp->seq);
6513 prev = &(ppp->next);
6514 }
6515
6516 ppp = next;
6517 }
6518
6519 /* fix artifact caused by fwd/rev-primer-seq starting with colon, separating name and seq */
6520
6521 ppp = *pppp;
6522 if (ppp == NULL) return;
6523 next = ppp->next;
6524 if (next == NULL) return;
6525 if (next->next != NULL) return;
6526
6527 if (ppp->name != NULL && ppp->seq == NULL && next->name == NULL && next->seq != NULL) {
6528 ppp->seq = next->seq;
6529 next->seq = NULL;
6530 ppp->next = NULL;
6531 PCRPrimerFree (next);
6532 } else if (ppp->seq != NULL && ppp->name == NULL && next->seq == NULL && next->name != NULL) {
6533 ppp->name = next->name;
6534 next->name = NULL;
6535 ppp->next = NULL;
6536 PCRPrimerFree (next);
6537 }
6538 }
6539
PCRPrimersMatch(PCRPrimerPtr ppp1,PCRPrimerPtr ppp2)6540 static Boolean PCRPrimersMatch (PCRPrimerPtr ppp1, PCRPrimerPtr ppp2)
6541
6542 {
6543 Int2 len1 = 0, len2 = 0, matches = 0;
6544 PCRPrimerPtr pr1, pr2;
6545
6546 if (ppp1 == NULL || ppp2 == NULL) return FALSE;
6547
6548 for (pr1 = ppp1; pr1 != NULL; pr1 = pr1->next) {
6549 len1++;
6550 }
6551 for (pr2 = ppp2; pr2 != NULL; pr2 = pr2->next) {
6552 len2++;
6553 }
6554 if (len1 != len2) return FALSE;
6555
6556 for (pr1 = ppp1; pr1 != NULL; pr1 = pr1->next) {
6557 for (pr2 = ppp2; pr2 != NULL; pr2 = pr2->next) {
6558 if (StringCmp (pr1->seq, pr2->seq) == 0 && StringCmp (pr1->name, pr2->name) == 0) {
6559 matches++;
6560 }
6561 }
6562 }
6563
6564 if (matches == len1) return TRUE;
6565
6566 return FALSE;
6567 }
6568
PCRReactionSetsMatch(PCRReactionSetPtr prp1,PCRReactionSetPtr prp2)6569 static Boolean PCRReactionSetsMatch (PCRReactionSetPtr prp1, PCRReactionSetPtr prp2)
6570
6571 {
6572 if (prp1 == NULL || prp2 == NULL) return FALSE;
6573
6574 if (! PCRPrimersMatch (prp1->forward, prp2->forward)) return FALSE;
6575 if (! PCRPrimersMatch (prp1->reverse, prp2->reverse)) return FALSE;
6576
6577 return TRUE;
6578 }
6579
CleanupPCRReactionSet(PCRReactionSetPtr PNTR prpp)6580 static void CleanupPCRReactionSet (PCRReactionSetPtr PNTR prpp)
6581
6582 {
6583 PCRReactionSetPtr curr;
6584 PCRReactionSetPtr next;
6585 PCRReactionSetPtr PNTR prev;
6586 PCRReactionSetPtr prp;
6587
6588 if (prpp == NULL) return;
6589
6590 prp = *prpp;
6591 while (prp != NULL) {
6592 CleanupPCRPrimers (&(prp->forward));
6593 CleanupPCRPrimers (&(prp->reverse));
6594 prp = prp->next;
6595 }
6596
6597 prev = prpp;
6598 prp = *prpp;
6599 while (prp != NULL) {
6600 next = prp->next;
6601
6602 curr = next;
6603 while (curr != NULL) {
6604 if (PCRReactionSetsMatch (prp, curr)) {
6605 curr->forward = PCRPrimerFree (curr->forward);
6606 curr->reverse = PCRPrimerFree (curr->reverse);
6607 }
6608 curr = curr->next;
6609 }
6610
6611 if (prp->forward == NULL && prp->reverse == NULL) {
6612 *prev = next;
6613 prp->next = NULL;
6614 PCRReactionFree (prp);
6615 } else {
6616 prev = &(prp->next);
6617 }
6618
6619 prp = next;
6620 }
6621
6622 }
6623
CleanupAltitude(SubSourcePtr ssp)6624 static void CleanupAltitude (SubSourcePtr ssp)
6625
6626 {
6627 Char ch;
6628 size_t len;
6629 CharPtr ptr;
6630
6631 if (ssp == NULL || StringHasNoText (ssp->name)) return;
6632 len = StringLen (ssp->name);
6633 if (len < 1) return;
6634
6635 ptr = ssp->name;
6636 ch = *ptr;
6637
6638 if (len > 2 && ptr [len-1] == '.') {
6639 ptr [len-1] = '\0';
6640 }
6641
6642 if (ch == '+' || ch == '-') {
6643 ptr++;
6644 ch = *ptr;
6645 }
6646
6647 if (! IS_DIGIT (ch)) return;
6648
6649 ptr++;
6650 ch = *ptr;
6651 while (IS_DIGIT (ch)) {
6652 ptr++;
6653 ch = *ptr;
6654 }
6655
6656 if (ch == '.') {
6657 ptr++;
6658 ch = *ptr;
6659 if (! IS_DIGIT (ch)) return;
6660 ptr++;
6661 ch = *ptr;
6662 while (IS_DIGIT (ch)) {
6663 ptr++;
6664 ch = *ptr;
6665 }
6666 }
6667
6668 if (StringCmp (ptr, "m") == 0 ||
6669 StringCmp (ptr, "m.") == 0 ||
6670 StringCmp (ptr, " m") == 0||
6671 StringCmp (ptr, " meters") == 0||
6672 StringCmp (ptr, " metres") == 0) {
6673 *ptr = '\0';
6674 ptr = (CharPtr) MemNew (len + 5);
6675 if (ptr == NULL) return;
6676 StringCpy (ptr, ssp->name);
6677 StringCat (ptr, " m");
6678 ssp->name = MemFree (ssp->name);
6679 ssp->name = ptr;
6680 }
6681 }
6682
6683 static CharPtr coll_date_month_abbrevs [12] =
6684 {
6685 "-Jan-", "-Feb-", "-Mar-", "-Apr-", "-May-", "-Jun-",
6686 "-Jul-", "-Aug-", "-Sep-", "-Oct-", "-Nov-", "-Dec-"
6687 };
6688
CorrectMonthCapitalization(CharPtr str)6689 static void CorrectMonthCapitalization (CharPtr str)
6690
6691 {
6692 Int2 i;
6693 Int2 j;
6694 CharPtr month;
6695 CharPtr ptr;
6696
6697 for (i = 0; i < 12; i++) {
6698 month = coll_date_month_abbrevs [i];
6699 ptr = StringISearch (str, month);
6700 if (ptr == NULL) continue;
6701 for (j = 0; j < 5; j++) {
6702 ptr [j] = month [j];
6703 }
6704 return;
6705 }
6706 }
6707
6708 typedef struct stringpair {
6709 CharPtr from;
6710 CharPtr to;
6711 } StringPair, PNTR StringPairPtr;
6712
6713 static StringPair sex_conv[] = {
6714 { "asexual female", "asexual and female" },
6715 { "asexual male", "asexual and male" },
6716 { "dioecious female", "dioecious and female" },
6717 { "dioecious male", "dioecious and male" },
6718 { "f and m mixed", "female, male, and mixed" },
6719 { "f", "female" },
6720 { "f/m", "female and male" },
6721 { "female,male", "female and male" },
6722 { "female/hermaphrodite", "female and hermaphrodite" },
6723 { "female/male mixed", "female, male, and mixed" },
6724 { "female/male", "female and male" },
6725 { "m and f mixed", "male, female, and mixed" },
6726 { "m", "male" },
6727 { "m/f", "male and female" },
6728 { "male,female", "male and female" },
6729 { "male/female mixed", "male, female, and mixed" },
6730 { "male/female", "male and female" },
6731 { "male/hermaphrodite", "male and hermaphrodite" },
6732 { "mixed female and male", "mixed, female, and male" },
6733 { "mixed female/male", "mixed, female, and male" },
6734 { "mixed male and female", "mixed, male, and female" },
6735 { "mixed male/female", "mixed, male, and female" },
6736 { NULL, NULL }
6737 };
6738
CleanSubSourceList(SubSourcePtr PNTR sspp,Uint1 location)6739 extern void CleanSubSourceList (SubSourcePtr PNTR sspp, Uint1 location)
6740
6741 {
6742 Char ch;
6743 CharPtr dst;
6744 Int2 i;
6745 Boolean in_brackets = FALSE;
6746 SubSourcePtr last = NULL;
6747 size_t len;
6748 SubSourcePtr next;
6749 SubSourcePtr PNTR prev;
6750 CharPtr ptr;
6751 CharPtr src;
6752 SubSourcePtr ssp;
6753 CharPtr str;
6754 CharPtr tmp;
6755 Boolean unlink;
6756 /*
6757 FloatHi ns, ew;
6758 Char lon, lat;
6759 Int4 processed;
6760 */
6761 /*
6762 SubSourcePtr fwd_seq = NULL, rev_seq = NULL, fwd_name = NULL, rev_name = NULL;
6763 size_t len;
6764 */
6765
6766 if (sspp == NULL) return;
6767 prev = sspp;
6768 ssp = *sspp;
6769 while (ssp != NULL) {
6770 next = ssp->next;
6771 unlink= FALSE;
6772 if (! IsNoNameSubSource (ssp)) {
6773 CleanVisStringAndCompress (&(ssp->name));
6774 TrimSpacesAndJunkFromEnds (ssp->name, FALSE);
6775 RemoveFlankingQuotes (&(ssp->name));
6776 } else /* if (StringICmp (ssp->name, "TRUE") == 0) */ {
6777 ssp->name = MemFree (ssp->name);
6778 ssp->name = StringSave ("");
6779 }
6780 if (ssp->subtype == SUBSRC_country) {
6781 CleanVisStringJunk (&(ssp->name));
6782 len = StringLen (ssp->name);
6783 if (len > 2) {
6784 str = ssp->name;
6785 if (str [len - 1] == ':') {
6786 str [len - 1] = '\0';
6787 }
6788 }
6789 if (StringICmp (ssp->name, "United States") == 0 ||
6790 StringICmp (ssp->name, "United States of America") == 0 ||
6791 StringICmp (ssp->name, "U.S.A.") == 0) {
6792 ssp->name = MemFree (ssp->name);
6793 ssp->name = StringSave ("USA");
6794 }
6795 if (StringNICmp (ssp->name, "United States:", 14) == 0) {
6796 str = ssp->name;
6797 str [0] = ' ';
6798 str [1] = ' ';
6799 str [2] = ' ';
6800 str [3] = ' ';
6801 str [4] = ' ';
6802 str [5] = ' ';
6803 str [6] = ' ';
6804 str [7] = ' ';
6805 str [8] = ' ';
6806 str [9] = ' ';
6807 str [10] = 'U';
6808 str [11] = 'S';
6809 str [12] = 'A';
6810 TrimSpacesAroundString (ssp->name);
6811 }
6812 } else if (ssp->subtype == SUBSRC_clone) {
6813 CleanVisStringJunk (&(ssp->name));
6814 } else if (ssp->subtype == SUBSRC_altitude) {
6815 if (ssp->name != NULL && (! AltitudeIsValid (ssp->name))) {
6816 CleanupAltitude (ssp);
6817 }
6818 } else if (ssp->subtype == SUBSRC_lat_lon) {
6819 /*
6820 str = ssp->name;
6821 if (str != NULL) {
6822 ptr = StringStr (str, " N, ");
6823 if (ptr == NULL) {
6824 ptr = StringStr (str, " S, ");
6825 }
6826 if (ptr != NULL) {
6827 ptr += 2;
6828 *ptr = ' ';
6829 Asn2gnbkCompressSpaces (str);
6830 }
6831 }
6832 */
6833 /*
6834 if (str != NULL && sscanf (str, "%lf %c, %lf %c%n", &ns, &lat, &ew, &lon, &processed) == 4 && processed == StringLen (str)) {
6835 ptr = StringChr (str, ',');
6836 if (ptr != NULL) {
6837 *ptr = ' ';
6838 Asn2gnbkCompressSpaces (str);
6839 }
6840 }
6841 */
6842 } else if (ssp->subtype == SUBSRC_other && StringDoesHaveText (ssp->name)) {
6843 CorrectTildes (&(ssp->name));
6844 } else if (ssp->subtype == SUBSRC_sex) {
6845 ptr = ssp->name;
6846 if (StringDoesHaveText (ptr)) {
6847 ch = *ptr;
6848 while (ch != '\0') {
6849 ch = TO_LOWER(ch);
6850 *ptr = ch;
6851 ptr++;
6852 ch = *ptr;
6853 }
6854 ptr = ssp->name;
6855 for (i = 0; sex_conv[i].from != NULL; i++) {
6856 if (StringCmp (ptr, sex_conv[i].from) == 0) {
6857 ssp->name = MemFree (ssp->name);
6858 ssp->name = StringSave (sex_conv[i].to);
6859 break;
6860 }
6861 }
6862 }
6863 } else if (ssp->subtype == SUBSRC_collection_date) {
6864 ptr = ssp->name;
6865 if (StringDoesHaveText (ptr)) {
6866 CorrectMonthCapitalization (ptr);
6867 }
6868 }
6869 if (ssp->subtype == SUBSRC_fwd_primer_seq ||
6870 ssp->subtype == SUBSRC_rev_primer_seq) {
6871 if (ssp->name != NULL) {
6872 /* upper case sequence */
6873 ptr = ssp->name;
6874 ch = *ptr;
6875 while (ch != '\0') {
6876 if (IS_UPPER (ch)) {
6877 *ptr = TO_LOWER (ch);
6878 }
6879 ptr++;
6880 ch = *ptr;
6881 }
6882 /* remove any spaces in sequence outisde of <modified base> */
6883 src = ssp->name;
6884 dst = ssp->name;
6885 ch = *src;
6886 while (ch != '\0') {
6887 if (ch == '<') {
6888 in_brackets = TRUE;
6889 *dst = ch;
6890 dst++;
6891 } else if (ch == '>') {
6892 in_brackets = FALSE;
6893 *dst = ch;
6894 dst++;
6895 } else if (ch != ' ') {
6896 *dst = ch;
6897 dst++;
6898 } else if (in_brackets) {
6899 *dst = ch;
6900 dst++;
6901 }
6902 src++;
6903 ch = *src;
6904 }
6905 *dst = '\0';
6906 /* upper case modified base <OTHER> */
6907 ptr = ssp->name;
6908 tmp = StringStr (ptr, "<other>");
6909 while (tmp != NULL) {
6910 ptr = tmp + 7;
6911 for (i = 1; i < 6; i++) {
6912 ch = tmp [i];
6913 tmp [i] = TO_UPPER (ch);
6914 }
6915 tmp = StringStr (ptr, "<other>");
6916 }
6917 }
6918 }
6919 /*
6920 if (ssp->subtype == SUBSRC_fwd_primer_seq) {
6921 if (fwd_seq == NULL) {
6922 fwd_seq = ssp;
6923 } else {
6924 fwd_seq->name = CombineSplitQual (fwd_seq->name, ssp->name);
6925 unlink = TRUE;
6926 }
6927 }
6928 if (ssp->subtype == SUBSRC_rev_primer_seq) {
6929 if (rev_seq == NULL) {
6930 rev_seq = ssp;
6931 } else {
6932 rev_seq->name = CombineSplitQual (rev_seq->name, ssp->name);
6933 unlink = TRUE;
6934 }
6935 }
6936 if (ssp->subtype == SUBSRC_fwd_primer_name) {
6937 if (fwd_name == NULL) {
6938 fwd_name = ssp;
6939 } else {
6940 fwd_name->name = CombineSplitQual (fwd_name->name, ssp->name);
6941 unlink = TRUE;
6942 }
6943 }
6944 if (ssp->subtype == SUBSRC_rev_primer_name) {
6945 if (rev_name == NULL) {
6946 rev_name = ssp;
6947 } else {
6948 rev_name->name = CombineSplitQual (rev_name->name, ssp->name);
6949 unlink = TRUE;
6950 }
6951 }
6952 */
6953 CleanVisString (&(ssp->attrib));
6954 if (last != NULL) {
6955 if (HasNoText (ssp->name) && (! IsNoNameSubSource (ssp))) {
6956 unlink = TRUE;
6957 } else if (last->subtype == ssp->subtype &&
6958 (IsNoNameSubSource (ssp) ||
6959 StringICmp (last->name, ssp->name) == 0 ||
6960 (last->subtype == SUBSRC_other &&
6961 StringStr (last->name, ssp->name) != NULL))) {
6962 unlink = TRUE;
6963 } else if (last->subtype == ssp->subtype &&
6964 last->subtype == SUBSRC_other &&
6965 IsSubString (last->name, ssp->name)) {
6966 last->name = MemFree (last->name);
6967 last->name = ssp->name;
6968 ssp->name = NULL;
6969 unlink = TRUE;
6970 } else if (ssp->subtype == SUBSRC_plastid_name &&
6971 location != 0
6972 && location == LocationForPlastidText (ssp->name)) {
6973 unlink = TRUE;
6974 }
6975 } else if (HasNoText (ssp->name) && (! IsNoNameSubSource (ssp))) {
6976 unlink = TRUE;
6977 } else if (ssp->subtype == SUBSRC_plastid_name &&
6978 location != 0
6979 && location == LocationForPlastidText (ssp->name)) {
6980 unlink = TRUE;
6981 } else {
6982 last = ssp;
6983 }
6984 if (unlink) {
6985 *prev = ssp->next;
6986 ssp->next = NULL;
6987 SubSourceFree (ssp);
6988 } else {
6989 last = ssp;
6990 prev = &(ssp->next);
6991 }
6992 ssp = next;
6993 }
6994 /*
6995 if (fwd_seq != NULL) {
6996 if (StringChr (fwd_seq->name, ',') != NULL) {
6997 ptr = fwd_seq->name;
6998 len = StringLen (ptr);
6999 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7000 TrimParenthesesAndCommasAroundString (fwd_seq->name);
7001 str = MemNew (sizeof (Char) * (len + 4));
7002 if (str != NULL) {
7003 StringCpy (str, "(");
7004 StringCat (str, fwd_seq->name);
7005 StringCat (str, ")");
7006 fwd_seq->name = MemFree (fwd_seq->name);
7007 fwd_seq->name = str;
7008 }
7009 }
7010 }
7011 }
7012 if (rev_seq != NULL) {
7013 if (StringChr (rev_seq->name, ',') != NULL) {
7014 ptr = rev_seq->name;
7015 len = StringLen (ptr);
7016 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7017 TrimParenthesesAndCommasAroundString (rev_seq->name);
7018 str = MemNew (sizeof (Char) * (len + 4));
7019 if (str != NULL) {
7020 StringCpy (str, "(");
7021 StringCat (str, rev_seq->name);
7022 StringCat (str, ")");
7023 rev_seq->name = MemFree (rev_seq->name);
7024 rev_seq->name = str;
7025 }
7026 }
7027 }
7028 }
7029 if (fwd_name != NULL) {
7030 if (StringChr (fwd_name->name, ',') != NULL) {
7031 ptr = fwd_name->name;
7032 len = StringLen (ptr);
7033 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7034 TrimParenthesesAndCommasAroundString (fwd_name->name);
7035 str = MemNew (sizeof (Char) * (len + 4));
7036 if (str != NULL) {
7037 StringCpy (str, "(");
7038 StringCat (str, fwd_name->name);
7039 StringCat (str, ")");
7040 fwd_name->name = MemFree (fwd_name->name);
7041 fwd_name->name = str;
7042 }
7043 }
7044 }
7045 }
7046 if (rev_name != NULL) {
7047 if (StringChr (rev_name->name, ',') != NULL) {
7048 ptr = rev_name->name;
7049 len = StringLen (ptr);
7050 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7051 TrimParenthesesAndCommasAroundString (rev_name->name);
7052 str = MemNew (sizeof (Char) * (len + 4));
7053 if (str != NULL) {
7054 StringCpy (str, "(");
7055 StringCat (str, rev_name->name);
7056 StringCat (str, ")");
7057 rev_name->name = MemFree (rev_name->name);
7058 rev_name->name = str;
7059 }
7060 }
7061 }
7062 }
7063 */
7064 }
7065
7066 //LCOV_EXCL_START
CleanSubSourcePrimers(SubSourcePtr PNTR sspp)7067 extern void CleanSubSourcePrimers (SubSourcePtr PNTR sspp)
7068
7069 {
7070 SubSourcePtr fwd_seq = NULL, rev_seq = NULL, fwd_name = NULL, rev_name = NULL;
7071 size_t len;
7072 SubSourcePtr next;
7073 SubSourcePtr PNTR prev;
7074 CharPtr ptr;
7075 SubSourcePtr ssp;
7076 CharPtr str;
7077 Boolean unlink;
7078
7079 if (sspp == NULL) return;
7080 prev = sspp;
7081 ssp = *sspp;
7082 while (ssp != NULL) {
7083 next = ssp->next;
7084 unlink= FALSE;
7085 if (ssp->subtype == SUBSRC_fwd_primer_seq) {
7086 if (fwd_seq == NULL) {
7087 fwd_seq = ssp;
7088 } else {
7089 fwd_seq->name = CombineSplitQual (fwd_seq->name, ssp->name);
7090 unlink = TRUE;
7091 }
7092 }
7093 if (ssp->subtype == SUBSRC_rev_primer_seq) {
7094 if (rev_seq == NULL) {
7095 rev_seq = ssp;
7096 } else {
7097 rev_seq->name = CombineSplitQual (rev_seq->name, ssp->name);
7098 unlink = TRUE;
7099 }
7100 }
7101 if (ssp->subtype == SUBSRC_fwd_primer_name) {
7102 if (fwd_name == NULL) {
7103 fwd_name = ssp;
7104 } else {
7105 fwd_name->name = CombineSplitQual (fwd_name->name, ssp->name);
7106 unlink = TRUE;
7107 }
7108 }
7109 if (ssp->subtype == SUBSRC_rev_primer_name) {
7110 if (rev_name == NULL) {
7111 rev_name = ssp;
7112 } else {
7113 rev_name->name = CombineSplitQual (rev_name->name, ssp->name);
7114 unlink = TRUE;
7115 }
7116 }
7117 if (unlink) {
7118 *prev = ssp->next;
7119 ssp->next = NULL;
7120 SubSourceFree (ssp);
7121 } else {
7122 prev = &(ssp->next);
7123 }
7124 ssp = next;
7125 }
7126 if (fwd_seq != NULL) {
7127 if (StringChr (fwd_seq->name, ',') != NULL) {
7128 ptr = fwd_seq->name;
7129 len = StringLen (ptr);
7130 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7131 TrimParenthesesAndCommasAroundString (fwd_seq->name);
7132 str = MemNew (sizeof (Char) * (len + 4));
7133 if (str != NULL) {
7134 StringCpy (str, "(");
7135 StringCat (str, fwd_seq->name);
7136 StringCat (str, ")");
7137 fwd_seq->name = MemFree (fwd_seq->name);
7138 fwd_seq->name = str;
7139 }
7140 }
7141 }
7142 }
7143 if (rev_seq != NULL) {
7144 if (StringChr (rev_seq->name, ',') != NULL) {
7145 ptr = rev_seq->name;
7146 len = StringLen (ptr);
7147 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7148 TrimParenthesesAndCommasAroundString (rev_seq->name);
7149 str = MemNew (sizeof (Char) * (len + 4));
7150 if (str != NULL) {
7151 StringCpy (str, "(");
7152 StringCat (str, rev_seq->name);
7153 StringCat (str, ")");
7154 rev_seq->name = MemFree (rev_seq->name);
7155 rev_seq->name = str;
7156 }
7157 }
7158 }
7159 }
7160 if (fwd_name != NULL) {
7161 if (StringChr (fwd_name->name, ',') != NULL) {
7162 ptr = fwd_name->name;
7163 len = StringLen (ptr);
7164 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7165 TrimParenthesesAndCommasAroundString (fwd_name->name);
7166 str = MemNew (sizeof (Char) * (len + 4));
7167 if (str != NULL) {
7168 StringCpy (str, "(");
7169 StringCat (str, fwd_name->name);
7170 StringCat (str, ")");
7171 fwd_name->name = MemFree (fwd_name->name);
7172 fwd_name->name = str;
7173 }
7174 }
7175 }
7176 }
7177 if (rev_name != NULL) {
7178 if (StringChr (rev_name->name, ',') != NULL) {
7179 ptr = rev_name->name;
7180 len = StringLen (ptr);
7181 if (ptr [0] != '(' || ptr [len - 1] != ')') {
7182 TrimParenthesesAndCommasAroundString (rev_name->name);
7183 str = MemNew (sizeof (Char) * (len + 4));
7184 if (str != NULL) {
7185 StringCpy (str, "(");
7186 StringCat (str, rev_name->name);
7187 StringCat (str, ")");
7188 rev_name->name = MemFree (rev_name->name);
7189 rev_name->name = str;
7190 }
7191 }
7192 }
7193 }
7194 }
7195 //LCOV_EXCL_STOP
7196
OrpModToOrgMod(ValNodePtr PNTR vnpp,OrgModPtr PNTR ompp)7197 static void OrpModToOrgMod (ValNodePtr PNTR vnpp, OrgModPtr PNTR ompp)
7198
7199 {
7200 Char ch;
7201 ValNodePtr next;
7202 Int2 numcommas;
7203 Int2 numspaces;
7204 OrgModPtr omp;
7205 CharPtr ptr;
7206 CharPtr str;
7207 CharPtr val;
7208 ValNodePtr vnp;
7209 Uint1 subtype;
7210
7211 if (vnpp == NULL || ompp == NULL) return;
7212 vnp = *vnpp;
7213 while (vnp != NULL) {
7214 next = vnp->next;
7215 str = (CharPtr) vnp->data.ptrvalue;
7216 val = NULL;
7217 subtype = 0;
7218 StringHasOrgModPrefix (str, &val, &subtype, TRUE);
7219 if (val != NULL) {
7220 numspaces = 0;
7221 numcommas = 0;
7222 ptr = str;
7223 ch = *ptr;
7224 while (ch != '\0') {
7225 if (ch == ' ') {
7226 numspaces++;
7227 } else if (ch == ',') {
7228 numcommas++;
7229 }
7230 ptr++;
7231 ch = *ptr;
7232 }
7233 if (numspaces > 4 || numcommas > 0) {
7234 val = NULL;
7235 }
7236 }
7237 if (val != NULL) {
7238 omp = OrgModNew ();
7239 if (omp != NULL) {
7240 omp->subtype = (Uint1) subtype;
7241 omp->subname = StringSave (val);
7242 omp->next = *ompp;
7243 *ompp = omp;
7244 }
7245 *vnpp = vnp->next;
7246 vnp->next = NULL;
7247 ValNodeFreeData (vnp);
7248 } else {
7249 vnpp = &(vnp->next);
7250 }
7251 vnp = next;
7252 }
7253 }
7254
StringHasSubSourcePrefix(CharPtr str,CharPtr PNTR pval,Uint1Ptr p_subtypeval,Boolean skippref)7255 static void StringHasSubSourcePrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
7256 {
7257 Int2 i;
7258 CharPtr val = NULL;
7259 Uint1 subtype_val = 0;
7260
7261 for (i = 0; current_subsource_subtype_alist[i].name != NULL && subtype_val == 0; i++) {
7262 val = StringHasPrefix (str, current_subsource_subtype_alist [i].name,
7263 (Boolean) (current_subsource_subtype_alist[i].value == SUBSRC_germline ||
7264 current_subsource_subtype_alist[i].value == SUBSRC_rearranged ||
7265 current_subsource_subtype_alist[i].value == SUBSRC_transgenic ||
7266 current_subsource_subtype_alist[i].value == SUBSRC_environmental_sample ||
7267 current_subsource_subtype_alist[i].value == SUBSRC_metagenomic),
7268 skippref);
7269 if (val != NULL) {
7270 subtype_val = current_subsource_subtype_alist[i].value;
7271 }
7272 }
7273 if (subtype_val == 0) {
7274 for (i = 0; subsource_aliases[i].name != NULL && subtype_val == 0; i++) {
7275 val = StringHasPrefix (str, subsource_aliases [i].alias,
7276 (Boolean) (subsource_aliases[i].value == SUBSRC_germline ||
7277 subsource_aliases[i].value == SUBSRC_rearranged ||
7278 subsource_aliases[i].value == SUBSRC_transgenic ||
7279 subsource_aliases[i].value == SUBSRC_environmental_sample ||
7280 subsource_aliases[i].value == SUBSRC_metagenomic),
7281 skippref);
7282 if (val != NULL) {
7283 subtype_val = subsource_aliases[i].value;
7284 }
7285 }
7286 }
7287 if (pval != NULL) {
7288 *pval = val;
7289 }
7290 if (p_subtypeval != NULL) {
7291 *p_subtypeval = subtype_val;
7292 }
7293 }
7294
OrpModToSubSource(ValNodePtr PNTR vnpp,SubSourcePtr PNTR sspp)7295 static void OrpModToSubSource (ValNodePtr PNTR vnpp, SubSourcePtr PNTR sspp)
7296
7297 {
7298 Char ch;
7299 ValNodePtr next;
7300 Int2 numcommas;
7301 Int2 numspaces;
7302 CharPtr ptr;
7303 SubSourcePtr ssp;
7304 CharPtr str;
7305 CharPtr val;
7306 ValNodePtr vnp;
7307 Uint1 subtype_val = 0;
7308
7309 if (vnpp == NULL || sspp == NULL) return;
7310 vnp = *vnpp;
7311 while (vnp != NULL) {
7312 next = vnp->next;
7313 str = (CharPtr) vnp->data.ptrvalue;
7314 val = NULL;
7315 subtype_val = 0;
7316 StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
7317
7318 if (val != NULL) {
7319 numspaces = 0;
7320 numcommas = 0;
7321 ptr = str;
7322 ch = *ptr;
7323 while (ch != '\0') {
7324 if (ch == ' ') {
7325 numspaces++;
7326 } else if (ch == ',') {
7327 numcommas++;
7328 }
7329 ptr++;
7330 ch = *ptr;
7331 }
7332 if (numspaces > 4 || numcommas > 0) {
7333 val = NULL;
7334 }
7335 }
7336 if (val != NULL) {
7337 ssp = SubSourceNew ();
7338 if (ssp != NULL) {
7339 ssp->subtype = subtype_val;
7340 ssp->name = StringSave (val);
7341 ssp->next = *sspp;
7342 *sspp = ssp;
7343 }
7344 *vnpp = vnp->next;
7345 vnp->next = NULL;
7346 ValNodeFreeData (vnp);
7347 } else {
7348 vnpp = &(vnp->next);
7349 }
7350 vnp = next;
7351 }
7352 }
7353
GbqualToOrpMod(GBQualPtr PNTR prevgbq,ValNodePtr PNTR vnpp)7354 static void GbqualToOrpMod (GBQualPtr PNTR prevgbq, ValNodePtr PNTR vnpp)
7355
7356 {
7357 GBQualPtr gbq;
7358 size_t len;
7359 GBQualPtr next;
7360 CharPtr str;
7361 Boolean unlink;
7362 CharPtr val;
7363 Uint1 subtype_val;
7364
7365 if (prevgbq == NULL) return;
7366 gbq = *prevgbq;
7367 while (gbq != NULL) {
7368 next = gbq->next;
7369 unlink = FALSE;
7370 str = gbq->qual;
7371 if (str != NULL) {
7372 val = NULL;
7373 subtype_val = 0;
7374 StringHasOrgModPrefix (str, &val, &subtype_val, FALSE);
7375 if (val == NULL) {
7376 subtype_val = 0;
7377 StringHasSubSourcePrefix (str, &val, &subtype_val, FALSE);
7378
7379 }
7380 if (val != NULL) {
7381 len = StringLen (gbq->val);
7382 str = MemNew (sizeof (Char) * (len + 64));
7383 if (str != NULL) {
7384 StringCpy (str, val);
7385 StringCat (str, "=");
7386 StringCat (str, gbq->val);
7387 ValNodeAddStr (vnpp, 0, str);
7388 unlink = TRUE;
7389 }
7390 }
7391 }
7392 if (unlink) {
7393 *prevgbq = gbq->next;
7394 gbq->next = NULL;
7395 GBQualFree (gbq);
7396 } else {
7397 prevgbq = (GBQualPtr PNTR) &(gbq->next);
7398 }
7399 gbq = next;
7400 }
7401 }
7402
7403 #define IS_WHITESP(c) (((c) == ' ') || ((c) == '\n') || ((c) == '\r') || ((c) == '\t'))
7404
IsStringSingleToken(CharPtr str)7405 static Boolean IsStringSingleToken (CharPtr str)
7406
7407 {
7408 Char ch;
7409
7410 if (StringHasNoText (str)) return FALSE;
7411
7412 ch = *str;
7413 while (ch != '\0') {
7414 if (IS_WHITESP (ch)) return FALSE;
7415 str++;
7416 ch = *str;
7417 }
7418
7419 return TRUE;
7420 }
7421
FindAnOrgMod(OrgNamePtr onp,Uint1 subtype)7422 static CharPtr FindAnOrgMod (OrgNamePtr onp, Uint1 subtype)
7423
7424 {
7425 OrgModPtr omp;
7426
7427 if (onp == NULL || subtype == 0) return NULL;
7428
7429 for (omp = onp->mod; omp != NULL; omp = omp->next) {
7430 if (omp->subtype != subtype) continue;
7431 if (StringHasNoText (omp->subname)) continue;
7432 return omp->subname;
7433 }
7434
7435 return NULL;
7436 }
7437
FindASubSource(BioSourcePtr biop,Uint1 subtype)7438 static CharPtr FindASubSource (BioSourcePtr biop, Uint1 subtype)
7439
7440 {
7441 SubSourcePtr ssp;
7442
7443 if (biop == NULL || subtype == 0) return NULL;
7444
7445 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
7446 if (ssp->subtype != subtype) continue;
7447 if (StringHasNoText (ssp->name)) continue;
7448 return ssp->name;
7449 }
7450
7451 return NULL;
7452 }
7453
FindNextSingleTilde(CharPtr str)7454 static CharPtr FindNextSingleTilde (CharPtr str)
7455
7456 {
7457 Char ch;
7458
7459 if (StringHasNoText (str)) return NULL;
7460
7461 ch = *str;
7462 while (ch != '\0') {
7463 if (ch == ' ') {
7464 if (str [1] == '~') {
7465 str++;
7466 ch = *str;
7467 while (ch == '~') {
7468 str++;
7469 ch = *str;
7470 }
7471 } else {
7472 str++;
7473 ch = *str;
7474 }
7475 } else if (ch == '~') {
7476 if (str [1] != '~') return str;
7477 str++;
7478 ch = *str;
7479 while (ch == '~') {
7480 str++;
7481 ch = *str;
7482 }
7483 } else {
7484 str++;
7485 ch = *str;
7486 }
7487 }
7488
7489 return NULL;
7490 }
7491
SplitAtSingleTilde(CharPtr strs)7492 static ValNodePtr SplitAtSingleTilde (CharPtr strs)
7493
7494 {
7495 ValNodePtr head = NULL;
7496 CharPtr ptr, str, tmp;
7497
7498 if (StringHasNoText (strs)) return NULL;
7499
7500 tmp = StringSave (strs);
7501 str = tmp;
7502
7503 while (StringDoesHaveText (str)) {
7504 ptr = FindNextSingleTilde (str);
7505 if (ptr != NULL) {
7506 *ptr = '\0';
7507 ptr++;
7508 }
7509 TrimSpacesAroundString (str);
7510 ValNodeCopyStr (&head, 0, str);
7511 str = ptr;
7512 }
7513
7514 MemFree (tmp);
7515 return head;
7516 }
7517
MergeTildeStrings(ValNodePtr head)7518 static CharPtr MergeTildeStrings (ValNodePtr head)
7519
7520 {
7521 size_t len = 0;
7522 CharPtr prefix = "", ptr, str;
7523 ValNodePtr vnp;
7524
7525 if (head == NULL) return NULL;
7526
7527 for (vnp = head; vnp != NULL; vnp = vnp->next) {
7528 str = (CharPtr) vnp->data.ptrvalue;
7529 if (StringHasNoText (str)) continue;
7530 len += StringLen (str) + 1;
7531 }
7532 if (len < 1) return NULL;
7533
7534 ptr = MemNew (sizeof (Char) * (len + 2));
7535 if (ptr == NULL) return NULL;
7536
7537 for (vnp = head; vnp != NULL; vnp = vnp->next) {
7538 str = (CharPtr) vnp->data.ptrvalue;
7539 if (StringHasNoText (str)) continue;
7540 StringCat (ptr, prefix);
7541 StringCat (ptr, str);
7542 prefix = "~";
7543 }
7544
7545 return ptr;
7546 }
7547
7548
CleanupOrgModOther(BioSourcePtr biop,OrgNamePtr onp)7549 static void CleanupOrgModOther (BioSourcePtr biop, OrgNamePtr onp)
7550
7551 {
7552 ValNodePtr head, vnp;
7553 OrgModPtr next;
7554 OrgModPtr omp;
7555 OrgModPtr PNTR prev;
7556 CharPtr str;
7557 Uint1 subtype_val;
7558 CharPtr tmp;
7559 Boolean unlink;
7560 CharPtr val;
7561
7562 if (biop == NULL || onp == NULL) return;
7563
7564 prev = &(onp->mod);
7565 omp = onp->mod;
7566 while (omp != NULL) {
7567 next = omp->next;
7568 unlink= FALSE;
7569 if (omp->subtype == ORGMOD_other) {
7570 str = omp->subname;
7571 head = SplitAtSingleTilde (str);
7572 for (vnp = head; vnp != NULL; vnp = vnp->next) {
7573 str = (CharPtr) vnp->data.ptrvalue;
7574 if (StringHasNoText (str)) continue;
7575 val = NULL;
7576 subtype_val = 0;
7577 StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
7578 if (val != NULL) {
7579 tmp = FindAnOrgMod (onp, subtype_val);
7580 if (tmp != NULL && StringICmp (tmp, val) == 0) {
7581 vnp->data.ptrvalue = NULL;
7582 }
7583 } else {
7584 subtype_val = 0;
7585 StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
7586 if (val != NULL) {
7587 tmp = FindASubSource (biop, subtype_val);
7588 if (tmp != NULL && StringICmp (tmp, val) == 0) {
7589 vnp->data.ptrvalue = NULL;
7590 }
7591 }
7592 }
7593 }
7594 str = MergeTildeStrings (head);
7595 ValNodeFreeData (head);
7596 omp->subname = MemFree (omp->subname);
7597 omp->subname = str;
7598 if (StringHasNoText (str)) {
7599 unlink = TRUE;
7600 }
7601 } else if (omp->subtype == ORGMOD_bio_material
7602 || omp->subtype == ORGMOD_culture_collection
7603 || omp->subtype == ORGMOD_specimen_voucher) {
7604 /*
7605 FixOrgModVoucher (omp);
7606 */
7607 }
7608 if (unlink) {
7609 *prev = omp->next;
7610 omp->next = NULL;
7611 OrgModFree (omp);
7612 } else {
7613 prev = &(omp->next);
7614 }
7615 omp = next;
7616 }
7617 }
7618
CleanupSubSourceOther(BioSourcePtr biop,OrgNamePtr onp)7619 static void CleanupSubSourceOther (BioSourcePtr biop, OrgNamePtr onp)
7620
7621 {
7622 ValNodePtr head, vnp;
7623 SubSourcePtr next;
7624 SubSourcePtr PNTR prev;
7625 SubSourcePtr ssp;
7626 CharPtr str;
7627 Uint1 subtype_val;
7628 CharPtr tmp;
7629 Boolean unlink;
7630 CharPtr val;
7631
7632 if (biop == NULL /* || onp == NULL */ ) return;
7633
7634 prev = &(biop->subtype);
7635 ssp = biop->subtype;
7636 while (ssp != NULL) {
7637 next = ssp->next;
7638 unlink = FALSE;
7639 if (ssp->subtype == SUBSRC_other) {
7640 str = ssp->name;
7641 head = SplitAtSingleTilde (str);
7642 for (vnp = head; vnp != NULL; vnp = vnp->next) {
7643 str = (CharPtr) vnp->data.ptrvalue;
7644 if (StringHasNoText (str)) continue;
7645 val = NULL;
7646 subtype_val = 0;
7647 StringHasOrgModPrefix (str, &val, &subtype_val, TRUE);
7648 if (val != NULL) {
7649 tmp = FindAnOrgMod (onp, subtype_val);
7650 if (tmp != NULL && StringICmp (tmp, val) == 0) {
7651 vnp->data.ptrvalue = NULL;
7652 }
7653 } else {
7654 subtype_val = 0;
7655 StringHasSubSourcePrefix (str, &val, &subtype_val, TRUE);
7656 if (val != NULL) {
7657 tmp = FindASubSource (biop, subtype_val);
7658 if (tmp != NULL && StringICmp (tmp, val) == 0) {
7659 vnp->data.ptrvalue = NULL;
7660 }
7661 }
7662 }
7663 }
7664 str = MergeTildeStrings (head);
7665 ValNodeFreeData (head);
7666 ssp->name = MemFree (ssp->name);
7667 ssp->name = str;
7668 if (StringHasNoText (str)) {
7669 unlink = TRUE;
7670 }
7671 }
7672 if (unlink) {
7673 *prev = ssp->next;
7674 ssp->next = NULL;
7675 SubSourceFree (ssp);
7676 } else {
7677 prev = &(ssp->next);
7678 }
7679 ssp = next;
7680 }
7681 }
7682
SortDbxref(VoidPtr ptr1,VoidPtr ptr2)7683 static int LIBCALLBACK SortDbxref (VoidPtr ptr1, VoidPtr ptr2)
7684
7685 {
7686 int compare;
7687 DbtagPtr dbt1;
7688 DbtagPtr dbt2;
7689 ObjectIdPtr oip1;
7690 ObjectIdPtr oip2;
7691 CharPtr str1;
7692 CharPtr str2;
7693 ValNodePtr vnp1;
7694 ValNodePtr vnp2;
7695
7696 if (ptr1 == NULL || ptr2 == NULL) return 0;
7697 vnp1 = *((ValNodePtr PNTR) ptr1);
7698 vnp2 = *((ValNodePtr PNTR) ptr2);
7699 if (vnp1 == NULL || vnp2 == NULL) return 0;
7700 dbt1 = (DbtagPtr) vnp1->data.ptrvalue;
7701 dbt2 = (DbtagPtr) vnp2->data.ptrvalue;
7702 if (dbt1 == NULL || dbt2 == NULL) return 0;
7703 str1 = (CharPtr) dbt1->db;
7704 str2 = (CharPtr) dbt2->db;
7705 if (str1 == NULL || str2 == NULL) return 0;
7706 compare = StringICmp (str1, str2);
7707 if (compare != 0) return compare;
7708 oip1 = dbt1->tag;
7709 oip2 = dbt2->tag;
7710 if (oip1 == NULL || oip2 == NULL) return 0;
7711 str1 = oip1->str;
7712 str2 = oip2->str;
7713 if (str1 != NULL && str2 != NULL) {
7714 return StringICmp (str1, str2);
7715 } else if (str1 == NULL && str2 == NULL) {
7716 if (oip1->id > oip2->id) {
7717 return 1;
7718 } else if (oip1->id < oip2->id) {
7719 return -1;
7720 }
7721 } else if (str1 != NULL) {
7722 return 1;
7723 } else if (str2 != NULL) {
7724 return -1;
7725 }
7726 return 0;
7727 }
7728
FixNumericDbxref(DbtagPtr dbt)7729 static void FixNumericDbxref (DbtagPtr dbt)
7730
7731 {
7732 size_t len;
7733 ObjectIdPtr oip;
7734 CharPtr ptr;
7735 long val;
7736
7737 if (dbt != NULL) {
7738 oip = dbt->tag;
7739 if (oip != NULL) {
7740 ptr = oip->str;
7741 if (ptr != NULL && *ptr != '0' && StringIsAllDigits(ptr)) {
7742 len = StringLen (ptr);
7743 if (len < 10 || (len == 10 && StringCmp (ptr, "2147483647") <= 0)) {
7744 if (sscanf (oip->str, "%ld", &val) == 1) {
7745 oip->id = (Int4) val;
7746 oip->str = MemFree (oip->str);
7747 }
7748 }
7749 }
7750 }
7751 }
7752 }
7753
FixNumericDbxrefs(ValNodePtr vnp)7754 static void FixNumericDbxrefs (ValNodePtr vnp)
7755
7756 {
7757 DbtagPtr dbt;
7758
7759 while (vnp != NULL) {
7760 dbt = (DbtagPtr) vnp->data.ptrvalue;
7761 if (dbt != NULL) {
7762 FixNumericDbxref (dbt);
7763 }
7764 vnp = vnp->next;
7765 }
7766 }
7767
FixOldDbxref(DbtagPtr dbt)7768 static void FixOldDbxref (DbtagPtr dbt)
7769
7770 {
7771 Boolean all_digits;
7772 Char buf [32];
7773 Char ch;
7774 CharPtr ident;
7775 size_t len;
7776 ObjectIdPtr oip;
7777 CharPtr ptr;
7778 CharPtr str;
7779
7780 if (dbt != NULL) {
7781
7782 TrimSpacesAroundString (dbt->db);
7783 oip = dbt->tag;
7784 if (oip != NULL && oip->str != NULL) {
7785 /*
7786 TrimSpacesAroundString (oip->str);
7787 */
7788 TrimSpacesSemicolonsAndCommas (oip->str);
7789 }
7790
7791 if (StringICmp (dbt->db, "SWISS-PROT") == 0 &&
7792 StringCmp (dbt->db, "Swiss-Prot") != 0) {
7793 dbt->db = MemFree (dbt->db);
7794 dbt->db = StringSave ("Swiss-Prot");
7795 } else if (StringICmp (dbt->db, "SPTREMBL") == 0) {
7796 dbt->db = MemFree (dbt->db);
7797 dbt->db = StringSave ("TrEMBL");
7798 } else if (StringICmp (dbt->db, "SUBTILIS") == 0) {
7799 dbt->db = MemFree (dbt->db);
7800 dbt->db = StringSave ("SubtiList");
7801 } else if (StringICmp (dbt->db, "MGD") == 0) {
7802 dbt->db = MemFree (dbt->db);
7803 dbt->db = StringSave ("MGI");
7804 } else if (StringCmp (dbt->db, "cdd") == 0) {
7805 dbt->db = MemFree (dbt->db);
7806 dbt->db = StringSave ("CDD");
7807 } else if (StringCmp (dbt->db, "FlyBase") == 0) {
7808 dbt->db = MemFree (dbt->db);
7809 dbt->db = StringSave ("FLYBASE");
7810 } else if (StringCmp (dbt->db, "GENEDB") == 0) {
7811 dbt->db = MemFree (dbt->db);
7812 dbt->db = StringSave ("GeneDB");
7813 } else if (StringCmp (dbt->db, "GreengenesID") == 0) {
7814 dbt->db = MemFree (dbt->db);
7815 dbt->db = StringSave ("Greengenes");
7816 } else if (StringCmp (dbt->db, "HMPID") == 0) {
7817 dbt->db = MemFree (dbt->db);
7818 dbt->db = StringSave ("HMP");
7819 }
7820 if (StringICmp (dbt->db, "HPRD") == 0) {
7821 oip = dbt->tag;
7822 if (oip != NULL && StringDoesHaveText (oip->str)) {
7823 str = oip->str;
7824 if (str != NULL && StringNICmp (str, "HPRD_", 5) == 0) {
7825 str [0] = ' ';
7826 str [1] = ' ';
7827 str [2] = ' ';
7828 str [3] = ' ';
7829 str [4] = ' ';
7830 TrimSpacesAroundString (str);
7831 }
7832 }
7833 } else if (StringICmp (dbt->db, "MGI") == 0) {
7834 oip = dbt->tag;
7835 if (oip != NULL && oip->str != NULL && StringDoesHaveText (oip->str)) {
7836 str = oip->str;
7837 if (StringNICmp (str, "MGI:", 4) == 0 || StringNICmp (str, "MGD:", 4) == 0) {
7838 str [0] = ' ';
7839 str [1] = ' ';
7840 str [2] = ' ';
7841 str [3] = ' ';
7842 TrimSpacesAroundString (str);
7843 } else if (StringNICmp (str, "J:", 2) == 0) {
7844 ptr = str + 2;
7845 ch = *ptr;
7846 all_digits = TRUE;
7847 while (ch != '\0') {
7848 if (! IS_DIGIT (ch)) {
7849 all_digits = FALSE;
7850 }
7851 ptr++;
7852 ch = *ptr;
7853 }
7854 if (all_digits) {
7855 oip->str = MemFree (oip->str);
7856 oip->str = StringSave ("");
7857 }
7858 }
7859 }
7860 }
7861 if (StringICmp (dbt->db, "Swiss-Prot") == 0 ||
7862 StringICmp (dbt->db, "SWISSPROT") == 0) {
7863 dbt->db = MemFree (dbt->db);
7864 dbt->db = StringSave ("UniProt/Swiss-Prot");
7865 } else if (StringICmp (dbt->db, "TrEMBL") == 0) {
7866 dbt->db = MemFree (dbt->db);
7867 dbt->db = StringSave ("UniProt/TrEMBL");
7868 } else if (StringICmp (dbt->db, "LocusID") == 0) {
7869 dbt->db = MemFree (dbt->db);
7870 dbt->db = StringSave ("GeneID");
7871 } else if (StringICmp (dbt->db, "MaizeDB") == 0) {
7872 dbt->db = MemFree (dbt->db);
7873 dbt->db = StringSave ("MaizeGDB");
7874 }
7875 if (StringICmp (dbt->db, "UniProt/Swiss-Prot") == 0) {
7876 dbt->db = MemFree (dbt->db);
7877 dbt->db = StringSave ("UniProtKB/Swiss-Prot");
7878 } else if (StringICmp (dbt->db, "UniProt/TrEMBL") == 0) {
7879 dbt->db = MemFree (dbt->db);
7880 dbt->db = StringSave ("UniProtKB/TrEMBL");
7881 } else if (StringICmp (dbt->db, "Genew") == 0) {
7882 dbt->db = MemFree (dbt->db);
7883 dbt->db = StringSave ("HGNC");
7884 } else if (StringICmp (dbt->db, "IFO") == 0) {
7885 dbt->db = MemFree (dbt->db);
7886 dbt->db = StringSave ("NBRC");
7887 } else if (StringICmp (dbt->db, "BHB") == 0 ||
7888 StringICmp (dbt->db, "BioHealthBase") == 0) {
7889 dbt->db = MemFree (dbt->db);
7890 dbt->db = StringSave ("IRD");
7891 }
7892
7893 oip = dbt->tag;
7894 if (oip != NULL && oip->str != NULL) {
7895 ident = oip->str;
7896 if (StringCmp (dbt->db, "HGNC") == 0 && StringNCmp (ident, "HGNC:", 5) == 0 ) {
7897 ident += 5;
7898 ptr = StringSave (ident);
7899 oip->str = MemFree (oip->str);
7900 oip->str = ptr;
7901 } else if (StringCmp (dbt->db, "VGNC") == 0 && StringNCmp (ident, "VGNC:", 5) == 0 ) {
7902 ident += 5;
7903 ptr = StringSave (ident);
7904 oip->str = MemFree (oip->str);
7905 oip->str = ptr;
7906 } else if (StringCmp (dbt->db, "MGI") == 0 && StringNCmp (ident, "MGI:", 4) == 0 ) {
7907 ident += 4;
7908 ptr = StringSave (ident);
7909 oip->str = MemFree (oip->str);
7910 oip->str = ptr;
7911 } else if (StringCmp (dbt->db, "RGD") == 0 && StringNCmp (ident, "RGD:", 4) == 0 ) {
7912 ident += 4;
7913 ptr = StringSave (ident);
7914 oip->str = MemFree (oip->str);
7915 oip->str = ptr;
7916 }
7917 }
7918 if (oip != NULL) {
7919 if (StringCmp (dbt->db, "HGNC") == 0 || StringCmp (dbt->db, "VGNC") == 0 || StringCmp (dbt->db, "MGI") == 0) {
7920 if (oip->str == NULL && oip->id > 0) {
7921 sprintf (buf, "%ld", (long) oip->id);
7922 ptr = StringSave (buf);
7923 oip->id = 0;
7924 oip->str = ptr;
7925 }
7926 ident = oip->str;
7927 if (ident != NULL) {
7928 if (StringChr (ident, ':') == NULL) {
7929 len = StringLen (dbt->db) + StringLen (ident) + 5;
7930 ptr = (CharPtr) MemNew (sizeof (Char) * len);
7931 if (ptr != NULL) {
7932 sprintf (ptr, "%s:%s", dbt->db, ident);
7933 oip->str = MemFree (oip->str);
7934 oip->str = ptr;
7935 }
7936 }
7937 }
7938 }
7939 }
7940 }
7941 }
7942
FixOldDbxrefs(ValNodePtr vnp,Boolean isEmblOrDdbj)7943 static void FixOldDbxrefs (ValNodePtr vnp, Boolean isEmblOrDdbj)
7944
7945 {
7946 DbtagPtr dbt;
7947 ObjectIdPtr oip;
7948 CharPtr ptr;
7949 CharPtr tmp;
7950 ValNodePtr vp2;
7951
7952 while (vnp != NULL) {
7953 dbt = (DbtagPtr) vnp->data.ptrvalue;
7954 if (dbt != NULL) {
7955 FixOldDbxref (dbt);
7956
7957 if (! isEmblOrDdbj) {
7958 if (StringCmp (dbt->db, "HGNC") != 0 && StringCmp (dbt->db, "VGNC") != 0 && StringCmp (dbt->db, "MGI") != 0) {
7959 /* expand db_xrefs with colons inside tags */
7960 oip = dbt->tag;
7961 if (oip != NULL && oip->str != NULL) {
7962 ptr = StringChr (oip->str, ':');
7963 if (ptr != NULL) {
7964 if (StringHasNoText (ptr + 1)) {
7965 *ptr = '\0';
7966 } else {
7967 tmp = dbt->db;
7968 dbt = DbtagNew ();
7969 if (dbt != NULL) {
7970 oip = ObjectIdNew ();
7971 if (oip != NULL) {
7972 vp2 = ValNodeNew (NULL);
7973 if (vp2 != NULL) {
7974 *ptr = '\0';
7975 ptr++;
7976 TrimSpacesAroundString (ptr);
7977 dbt->db = StringSave (tmp);
7978 oip->str = StringSave (ptr);
7979 dbt->tag = oip;
7980 vp2->data.ptrvalue = (Pointer) dbt;
7981 vp2->next = vnp->next;
7982 vnp->next = vp2;
7983 }
7984 }
7985 }
7986 }
7987 }
7988 }
7989 }
7990 }
7991 }
7992
7993 vnp = vnp->next;
7994 }
7995 }
7996
CleanupDuplicateDbxrefs(ValNodePtr PNTR prevvnp)7997 static void CleanupDuplicateDbxrefs (ValNodePtr PNTR prevvnp)
7998
7999 {
8000 DbtagPtr dbt;
8001 DbtagPtr last = NULL;
8002 ValNodePtr nextvnp;
8003 ObjectIdPtr oip1;
8004 ObjectIdPtr oip2;
8005 CharPtr str1;
8006 CharPtr str2;
8007 Boolean unlink;
8008 ValNodePtr vnp;
8009
8010 if (prevvnp == NULL) return;
8011 vnp = *prevvnp;
8012 while (vnp != NULL) {
8013 nextvnp = vnp->next;
8014 dbt = (DbtagPtr) vnp->data.ptrvalue;
8015 if (dbt != NULL) {
8016 unlink = FALSE;
8017 if (last != NULL) {
8018 str1 = (CharPtr) dbt->db;
8019 str2 = (CharPtr) last->db;
8020 if (str1 != NULL && str2 != NULL && StringICmp (str1, str2) == 0) {
8021 oip1 = dbt->tag;
8022 oip2 = last->tag;
8023 if (oip1 != NULL && oip2 != NULL) {
8024 str1 = oip1->str;
8025 str2 = oip2->str;
8026 if (str1 != NULL && str2 != NULL) {
8027 if (StringICmp (str1, str2) == 0) {
8028 unlink = TRUE;
8029 }
8030 } else if (str1 == NULL && str2 == NULL) {
8031 if (oip1->id == oip2->id) {
8032 unlink = TRUE;
8033 }
8034 }
8035 }
8036 }
8037 } else {
8038 last = dbt;
8039 }
8040 if (unlink) {
8041 *prevvnp = vnp->next;
8042 vnp->next = NULL;
8043 DbtagFree (dbt);
8044 ValNodeFree (vnp);
8045 } else {
8046 last = dbt;
8047 prevvnp = (ValNodePtr PNTR) &(vnp->next);
8048 }
8049 }
8050 vnp = nextvnp;
8051 }
8052 }
8053
CleanupObsoleteDbxrefs(ValNodePtr PNTR prevvnp)8054 static void CleanupObsoleteDbxrefs (ValNodePtr PNTR prevvnp)
8055
8056 {
8057 DbtagPtr dbt;
8058 ValNodePtr nextvnp;
8059 ObjectIdPtr oip;
8060 CharPtr str;
8061 Boolean unlink;
8062 ValNodePtr vnp;
8063
8064 if (prevvnp == NULL) return;
8065 vnp = *prevvnp;
8066 while (vnp != NULL) {
8067 nextvnp = vnp->next;
8068 dbt = (DbtagPtr) vnp->data.ptrvalue;
8069 if (dbt != NULL) {
8070 unlink = FALSE;
8071 str = (CharPtr) dbt->db;
8072 if (StringHasNoText (str) ||
8073 StringICmp (str, "PID") == 0 ||
8074 StringICmp (str, "PIDg") == 0 ||
8075 /*
8076 StringICmp (str, "PIDe") == 0 ||
8077 StringICmp (str, "PIDd") == 0 ||
8078 */
8079 /*
8080 StringICmp (str, "GI") == 0 ||
8081 */
8082 StringICmp (str, "NID") == 0) {
8083 unlink = TRUE;
8084 }
8085 oip = dbt->tag;
8086 if (oip == NULL) {
8087 unlink = TRUE;
8088 } else if (oip->str != NULL) {
8089 if (StringHasNoText (oip->str)) {
8090 unlink = TRUE;
8091 }
8092 } else if (oip->id == 0) {
8093 unlink = TRUE;
8094 }
8095 if (unlink) {
8096 *prevvnp = vnp->next;
8097 vnp->next = NULL;
8098 DbtagFree (dbt);
8099 ValNodeFree (vnp);
8100 } else {
8101 prevvnp = (ValNodePtr PNTR) &(vnp->next);
8102 }
8103 }
8104 vnp = nextvnp;
8105 }
8106 }
8107
CleanupGoDbxrefs(ValNodePtr vnp)8108 static void CleanupGoDbxrefs (ValNodePtr vnp)
8109
8110 {
8111 DbtagPtr dbt;
8112 size_t idx;
8113 size_t len;
8114 ObjectIdPtr oip;
8115 CharPtr ptr;
8116 Char tmp [32];
8117
8118 while (vnp != NULL) {
8119 dbt = (DbtagPtr) vnp->data.ptrvalue;
8120 if (dbt != NULL) {
8121 if (StringICmp (dbt->db, "GO") == 0) {
8122 oip = dbt->tag;
8123 if (oip != NULL) {
8124 if (oip->str == NULL && oip->id > 0) {
8125 sprintf (tmp, "%ld", (long) oip->id);
8126 oip->str = StringSave (tmp);
8127 oip->id = 0;
8128 }
8129 ptr = oip->str;
8130 if (ptr != NULL && StringIsAllDigits(ptr)) {
8131 len = StringLen (ptr);
8132 if (len < 7) {
8133 idx = 7 - len;
8134 StringCpy (tmp, "0000000");
8135 tmp [idx] = '\0';
8136 StringCat (tmp, ptr);
8137 oip->str = MemFree (oip->str);
8138 oip->str = StringSave (tmp);
8139 }
8140 }
8141 }
8142 }
8143 }
8144 vnp = vnp->next;
8145 }
8146 }
8147
SortCits(VoidPtr ptr1,VoidPtr ptr2)8148 static int LIBCALLBACK SortCits (VoidPtr ptr1, VoidPtr ptr2)
8149
8150 {
8151 int compare;
8152 Char label1 [128], label2 [128];
8153 ValNodePtr ppr1, ppr2;
8154
8155 if (ptr1 == NULL || ptr2 == NULL) return 0;
8156 ppr1 = *((ValNodePtr PNTR) ptr1);
8157 ppr2 = *((ValNodePtr PNTR) ptr2);
8158 if (ppr1 == NULL || ppr2 == NULL) return 0;
8159 PubLabel (ppr1, label1, 127, OM_LABEL_CONTENT);
8160 PubLabel (ppr2, label2, 127, OM_LABEL_CONTENT);
8161 compare = StringICmp (label1, label2);
8162 return compare;
8163 }
8164
CitGenTitlesMatch(ValNodePtr pub1,ValNodePtr pub2)8165 static Boolean CitGenTitlesMatch (ValNodePtr pub1, ValNodePtr pub2)
8166
8167 {
8168 CitGenPtr cgp1, cgp2;
8169
8170 if (pub1->choice == PUB_Gen) {
8171 cgp1 = (CitGenPtr) pub1->data.ptrvalue;
8172 if (cgp1->serial_number != -1 && pub1->next != NULL) {
8173 pub1 = pub1->next;
8174 }
8175 }
8176 if (pub2->choice == PUB_Gen) {
8177 cgp2 = (CitGenPtr) pub2->data.ptrvalue;
8178 if (cgp2->serial_number != -1 && pub2->next != NULL) {
8179 pub2 = pub2->next;
8180 }
8181 }
8182
8183 if (pub1->choice != PUB_Gen || pub2->choice != PUB_Gen) return TRUE;
8184 cgp1 = (CitGenPtr) pub1->data.ptrvalue;
8185 cgp2 = (CitGenPtr) pub2->data.ptrvalue;
8186 if (cgp1->title == NULL || cgp2->title == NULL) return TRUE;
8187 if (StringCmp (cgp1->title, cgp2->title) != 0) return FALSE;
8188 return TRUE;
8189 }
8190
CleanupDuplicateCits(ValNodePtr PNTR prevvnp)8191 static void CleanupDuplicateCits (ValNodePtr PNTR prevvnp)
8192
8193 {
8194 Char label1 [128], label2 [128];
8195 ValNodePtr last = NULL;
8196 ValNodePtr nextvnp;
8197 Boolean unlink;
8198 ValNodePtr vnp;
8199
8200 if (prevvnp == NULL) return;
8201 vnp = *prevvnp;
8202 while (vnp != NULL) {
8203 nextvnp = vnp->next;
8204 unlink = FALSE;
8205 if (last != NULL) {
8206 PubLabelUnique (last, label1, 127, OM_LABEL_CONTENT, TRUE);
8207 PubLabelUnique (vnp, label2, 127, OM_LABEL_CONTENT, TRUE);
8208 if (StringCmp (label1, label2) == 0 && CitGenTitlesMatch (last, vnp)) {
8209 unlink = TRUE;
8210 }
8211 } else {
8212 last = vnp;
8213 }
8214 if (unlink) {
8215 *prevvnp = vnp->next;
8216 vnp->next = NULL;
8217 PubFree (vnp);
8218 } else {
8219 last = vnp;
8220 prevvnp = (ValNodePtr PNTR) &(vnp->next);
8221 }
8222 vnp = nextvnp;
8223 }
8224 }
8225
8226 /* name processing code from Sequin editors */
8227
FirstNameToInitials(CharPtr first,CharPtr inits,size_t maxsize)8228 NLM_EXTERN void FirstNameToInitials (CharPtr first, CharPtr inits, size_t maxsize)
8229
8230 {
8231 Char ch;
8232 Uint2 i;
8233
8234 if (inits != NULL && maxsize > 0) {
8235 inits [0] = '\0';
8236 if (first != NULL) {
8237 i = 0;
8238 ch = *first;
8239 while (ch != '\0' && i < maxsize) {
8240 while (ch != '\0' && (ch <= ' ' || ch == '-')) {
8241 first++;
8242 ch = *first;
8243 }
8244 if (IS_ALPHA (ch)) {
8245 inits [i] = ch;
8246 i++;
8247 first++;
8248 ch = *first;
8249 }
8250 while (ch != '\0' && ch > ' ' && ch != '-') {
8251 first++;
8252 ch = *first;
8253 }
8254 if (ch == '-') {
8255 inits [i] = ch;
8256 i++;
8257 first++;
8258 ch = *first;
8259 }
8260 }
8261 inits [i] = '\0';
8262 }
8263 }
8264 }
8265
StripPeriods(CharPtr str)8266 static void StripPeriods (CharPtr str)
8267
8268 {
8269 Char ch;
8270 CharPtr dst;
8271
8272 if (str != NULL) {
8273 dst = str;
8274 ch = *str;
8275 while (ch != '\0') {
8276 if (ch != '.') {
8277 *dst = ch;
8278 dst++;
8279 }
8280 str++;
8281 ch = *str;
8282 }
8283 *dst = '\0';
8284 }
8285 }
8286
TrimLeadingSpaces(CharPtr str)8287 static void TrimLeadingSpaces (CharPtr str)
8288
8289 {
8290 Char ch;
8291 CharPtr dst;
8292
8293 if (str != NULL && str [0] != '\0') {
8294 dst = str;
8295 ch = *str;
8296 while (ch != '\0' && ch <= ' ') {
8297 str++;
8298 ch = *str;
8299 }
8300 while (ch != '\0') {
8301 *dst = ch;
8302 dst++;
8303 str++;
8304 ch = *str;
8305 }
8306 *dst = '\0';
8307 }
8308 }
8309
ExtractSuffixFromInitials(NameStdPtr nsp)8310 static void ExtractSuffixFromInitials (NameStdPtr nsp)
8311
8312 {
8313 Char ch;
8314 Boolean has_period = FALSE;
8315 size_t len;
8316 CharPtr str;
8317
8318 str = nsp->names [4];
8319 ch = *str;
8320 while (ch != '\0') {
8321 if (ch == '.') {
8322 has_period = TRUE;
8323 }
8324 str++;
8325 ch = *str;
8326 }
8327 if (! has_period) return;
8328 str = nsp->names [4];
8329 len = StringLen (str);
8330 if (len >= 4 && StringCmp (str + len - 3, "III") == 0) {
8331 str [len - 3] = '\0';
8332 nsp->names [5] = StringSave ("III");
8333 } else if (len >= 5 && StringCmp (str + len - 4, "III.") == 0) {
8334 str [len - 4] = '\0';
8335 nsp->names [5] = StringSave ("III");
8336 } else if (len >= 3 && StringCmp (str + len - 2, "Jr") == 0) {
8337 str [len - 2] = '\0';
8338 nsp->names [5] = StringSave ("Jr");
8339 } else if (len >= 4 && StringCmp (str + len - 3, "2nd") == 0) {
8340 str [len - 3] = '\0';
8341 nsp->names [5] = StringSave ("II");
8342 } else if (len >= 3 && StringCmp (str + len - 2, "IV") == 0) {
8343 str [len - 2] = '\0';
8344 nsp->names [5] = StringSave ("IV");
8345 } else if (len >= 4 && StringCmp (str + len - 3, "IV.") == 0) {
8346 str [len - 3] = '\0';
8347 nsp->names [5] = StringSave ("IV");
8348 }
8349 }
8350
NameStdPtrToTabbedString(NameStdPtr nsp,Boolean fixInitials)8351 static CharPtr NameStdPtrToTabbedString (NameStdPtr nsp, Boolean fixInitials)
8352
8353 {
8354 Char first [256];
8355 Char frstinits [64];
8356 Char initials [64];
8357 Int2 j;
8358 Char last [256];
8359 Char middle [128];
8360 Char str [512];
8361 Char suffix [64];
8362
8363 if (nsp == NULL) return NULL;
8364 if (nsp->names [5] == NULL && nsp->names [4] != NULL) {
8365 ExtractSuffixFromInitials (nsp);
8366 }
8367 str [0] = '\0';
8368 StringNCpy_0 (first, nsp->names [1], sizeof (first));
8369 TrimSpacesAroundString (first);
8370 StringNCpy_0 (initials, nsp->names [4], sizeof (initials));
8371 StripPeriods (initials);
8372 TrimLeadingSpaces (initials);
8373 StringNCpy_0 (last, nsp->names [0], sizeof (last));
8374 TrimLeadingSpaces (last);
8375 StringNCpy_0 (middle, nsp->names [2], sizeof (middle));
8376 TrimLeadingSpaces (middle);
8377 if (StringCmp (initials, "al") == 0 &&
8378 StringCmp (last, "et") == 0 &&
8379 first [0] == '\0') {
8380 initials [0] = '\0';
8381 StringCpy (last, "et al.");
8382 }
8383 /*
8384 if (first [0] == '\0') {
8385 StringNCpy_0 (first, initials, sizeof (first));
8386 if (IS_ALPHA (first [0])) {
8387 if (first [1] == '-') {
8388 first [3] = '\0';
8389 } else {
8390 first [1] = '\0';
8391 }
8392 } else {
8393 first [0] = '\0';
8394 }
8395 }
8396 */
8397 frstinits [0] = '\0';
8398 FirstNameToInitials (first, frstinits, sizeof (frstinits) - 1);
8399 StripPeriods (first);
8400 TrimLeadingSpaces (first);
8401 if (first [0] != '\0') {
8402 StringCat (str, first);
8403 } else {
8404 /*
8405 StringCat (str, " ");
8406 */
8407 }
8408 StringCat (str, "\t");
8409 if (fixInitials) {
8410 j = 0;
8411 while (initials [j] != '\0' && TO_UPPER (initials [j]) == TO_UPPER (frstinits [j])) {
8412 j++;
8413 }
8414 if (initials [j] != '\0') {
8415 StringCat (str, initials + j);
8416 } else {
8417 /*
8418 StringCat (str, " ");
8419 */
8420 }
8421 } else if (initials [0] != '\0') {
8422 StringCat (str, initials);
8423 } else if (frstinits [0] != '\0') {
8424 StringCat (str, frstinits);
8425 }
8426 StringCat (str, "\t");
8427 StringCat (str, last);
8428 StringNCpy_0 (suffix, nsp->names [5], sizeof (suffix));
8429 StringCat (str, "\t");
8430 StripPeriods (suffix);
8431 TrimLeadingSpaces (suffix);
8432 if (suffix [0] != '\0') {
8433 StringCat (str, suffix);
8434 } else {
8435 /*
8436 StringCat (str, " ");
8437 */
8438 }
8439 StringCat (str, "\t");
8440 StringCat (str, middle);
8441 StringCat (str, "\n");
8442 return StringSave (str);
8443 }
8444
XtractTagListColumn(CharPtr source,Int2 col)8445 static CharPtr XtractTagListColumn (CharPtr source, Int2 col)
8446
8447 {
8448 Char ch;
8449 size_t count;
8450 CharPtr ptr;
8451 CharPtr str;
8452
8453 if (source == NULL || source [0] == '\0' || col < 0) return NULL;
8454
8455 ptr = source;
8456 ch = *ptr;
8457 while (col > 0 && ch != '\n' && ch != '\0') {
8458 while (ch != '\t' && ch != '\n' && ch != '\0') {
8459 ptr++;
8460 ch = *ptr;
8461 }
8462 if (ch == '\t') {
8463 ptr++;
8464 ch = *ptr;
8465 }
8466 col--;
8467 }
8468
8469 count = 0;
8470 ch = ptr [count];
8471 while (ch != '\t' && ch != '\n' && ch != '\0') {
8472 count++;
8473 ch = ptr [count];
8474 }
8475 str = (CharPtr) MemNew(count + 1);
8476 if (str != NULL) {
8477 MemCpy (str, ptr, count);
8478 }
8479 return str;
8480 }
8481
TabbedStringToNameStdPtr(CharPtr txt,Boolean fixInitials)8482 static NameStdPtr TabbedStringToNameStdPtr (CharPtr txt, Boolean fixInitials)
8483
8484 {
8485 Char ch;
8486 CharPtr first;
8487 Char initials [64];
8488 Int2 j;
8489 Int2 k;
8490 Char last;
8491 Int2 len;
8492 NameStdPtr nsp;
8493 Char periods [128];
8494 CharPtr str;
8495 Char str1 [64];
8496 Char suffix [80];
8497
8498 if (txt == NULL) return NULL;
8499 nsp = NameStdNew ();
8500 if (nsp == NULL) return NULL;
8501 nsp->names [0] = XtractTagListColumn (txt, 2);
8502 TrimLeadingSpaces (nsp->names [0]);
8503 first = XtractTagListColumn (txt, 0);
8504 StripPeriods (first);
8505 nsp->names [1] = StringSave (first);
8506 TrimLeadingSpaces (nsp->names [1]);
8507 str1 [0] = '\0';
8508 if (fixInitials) {
8509 FirstNameToInitials (first, str1, sizeof (str1) - 1);
8510 }
8511 str = XtractTagListColumn (txt, 1);
8512 StringNCat (str1, str, sizeof (str1) - 1);
8513 MemFree (str);
8514 j = 0;
8515 k = 0;
8516 ch = str1 [j];
8517 while (ch != '\0') {
8518 if (ch != ' ') {
8519 initials [k] = ch;
8520 k++;
8521 }
8522 j++;
8523 ch = str1 [j];
8524 }
8525 initials [k] = '\0';
8526 periods [0] = '\0';
8527 j = 0;
8528 ch = initials [j];
8529 while (ch != '\0') {
8530 if (ch == ',') {
8531 initials [j] = '.';
8532 }
8533 j++;
8534 ch = initials [j];
8535 }
8536 str = StringStr (initials, ".ST.");
8537 if (str != NULL) {
8538 *(str + 2) = 't';
8539 }
8540 j = 0;
8541 k = 0;
8542 ch = initials [j];
8543 while (ch != '\0') {
8544 if (ch == '-') {
8545 periods [k] = ch;
8546 k++;
8547 j++;
8548 ch = initials [j];
8549 } else if (ch == '.') {
8550 j++;
8551 ch = initials [j];
8552 } else if (ch == ' ') {
8553 j++;
8554 ch = initials [j];
8555 } else {
8556 periods [k] = ch;
8557 last = ch;
8558 k++;
8559 j++;
8560 ch = initials [j];
8561 if (ch == '\0') {
8562 if (! (IS_LOWER (last))) {
8563 periods [k] = '.';
8564 k++;
8565 }
8566 /* } else if (ch == '.' && initials [j + 1] == '\0') { */
8567 } else if (! (IS_LOWER (ch))) {
8568 periods [k] = '.';
8569 k++;
8570 }
8571 }
8572 }
8573 if (k > 0 && periods [k - 1] != '.') {
8574 periods [k] = '.';
8575 k++;
8576 }
8577 periods [k] = '\0';
8578 nsp->names [4] = StringSave (periods);
8579 TrimLeadingSpaces (nsp->names [4]);
8580 str = XtractTagListColumn (txt, 3);
8581 StringNCpy_0 (str1, str, sizeof (str1));
8582 MemFree (str);
8583 j = 0;
8584 k = 0;
8585 ch = str1 [j];
8586 while (ch != '\0') {
8587 if (ch != ' ') {
8588 suffix [k] = ch;
8589 k++;
8590 }
8591 j++;
8592 ch = str1 [j];
8593 }
8594 suffix [k] = '\0';
8595 if (suffix [0] != '\0') {
8596 len = StringLen (suffix);
8597 if (len > 0 && suffix [len - 1] == '.') {
8598 suffix [len - 1] = '\0';
8599 }
8600 if (StringICmp (suffix, "1d") == 0) {
8601 StringCpy (suffix, "I");
8602 } else if (StringICmp (suffix, "1st") == 0) {
8603 StringCpy (suffix, "I");
8604 } else if (StringICmp (suffix, "2d") == 0) {
8605 StringCpy (suffix, "2nd");
8606 } else if (StringICmp (suffix, "3d") == 0) {
8607 StringCpy (suffix, "3rd");
8608 } else if (StringICmp (suffix, "Sr") == 0) {
8609 StringCpy (suffix, "Sr.");
8610 } else if (StringICmp (suffix, "Jr") == 0) {
8611 StringCpy (suffix, "Jr.");
8612 }
8613 /*
8614 len = StringLen (suffix);
8615 if (len > 0 && suffix [len - 1] != '.') {
8616 StringCat (suffix, ".");
8617 }
8618 */
8619 nsp->names [5] = StringSave (suffix);
8620 TrimLeadingSpaces (nsp->names [5]);
8621 }
8622 if (StringCmp (nsp->names [0], "et al") == 0) {
8623 nsp->names [0] = MemFree (nsp->names [0]);
8624 nsp->names [0] = StringSave ("et al.");
8625 }
8626 nsp->names [2] = XtractTagListColumn (txt, 4);
8627 TrimLeadingSpaces (nsp->names [2]);
8628 if (StringHasNoText (nsp->names [2])) {
8629 nsp->names [2] = MemFree (nsp->names [2]);
8630 }
8631 MemFree (first);
8632 return nsp;
8633 }
8634
CleanAffil(AffilPtr afp)8635 static AffilPtr CleanAffil (AffilPtr afp)
8636
8637 {
8638 if (afp == NULL) return NULL;
8639 CleanVisStringJunkAndCompress (&(afp->affil));
8640 if (afp->choice == 2) {
8641 CleanVisStringJunkAndCompress (&(afp->div));
8642 CleanVisStringJunkAndCompress (&(afp->city));
8643 CleanVisStringJunkAndCompress (&(afp->sub));
8644 CleanVisStringJunkAndCompress (&(afp->country));
8645 CleanVisStringJunkAndCompress (&(afp->street));
8646 CleanVisStringJunkAndCompress (&(afp->email));
8647 CleanVisStringJunkAndCompress (&(afp->fax));
8648 CleanVisStringJunkAndCompress (&(afp->phone));
8649 CleanVisStringJunkAndCompress (&(afp->postal_code));
8650 TrimSpacesSemicolonsAndCommas (afp->postal_code);
8651 if (StringICmp (afp->country, "U.S.A.") == 0) {
8652 afp->country = MemFree (afp->country);
8653 afp->country = StringSave ("USA");
8654 }
8655 if (StringICmp (afp->country, "USA") == 0 && StringCmp (afp->country, "USA") != 0) {
8656 afp->country = MemFree (afp->country);
8657 afp->country = StringSave ("USA");
8658 }
8659 if (StringCmp (afp->country, "USA") == 0 && afp->sub != NULL) {
8660 StripPeriods (afp->sub);
8661 TrimSpacesAroundString (afp->sub);
8662 }
8663 }
8664 if (afp->affil == NULL &&
8665 afp->div == NULL &&
8666 afp->city == NULL &&
8667 afp->sub == NULL &&
8668 afp->country == NULL &&
8669 afp->street == NULL &&
8670 afp->email == NULL &&
8671 afp->fax == NULL &&
8672 afp->phone == NULL &&
8673 afp->postal_code == NULL) {
8674 afp = MemFree (afp);
8675 }
8676 return afp;
8677 }
8678
NormalizeAuthors(AuthListPtr alp,Boolean fixInitials)8679 static void NormalizeAuthors (AuthListPtr alp, Boolean fixInitials)
8680
8681 {
8682 AuthorPtr ap;
8683 CharPtr initials;
8684 size_t len;
8685 ValNodePtr names;
8686 ValNodePtr next;
8687 NameStdPtr nsp;
8688 PersonIdPtr pid;
8689 ValNodePtr PNTR prev;
8690 CharPtr str;
8691 Boolean upcaseinits;
8692 ValNodePtr vnp;
8693 Boolean zap;
8694
8695 if (alp == NULL) return;
8696 alp->affil = CleanAffil (alp->affil);
8697
8698 if (alp->choice == 2 || alp->choice == 3) {
8699 for (vnp = alp->names; vnp != NULL; vnp = vnp->next) {
8700 str = (CharPtr) vnp->data.ptrvalue;
8701 TrimSpacesAroundString (str);
8702 TrimSpacesAndJunkFromEnds (str, FALSE);
8703 Asn2gnbkCompressSpaces (str);
8704 }
8705 }
8706 if (alp->choice != 1) return;
8707
8708 prev = &(alp->names);
8709 names = alp->names;
8710 while (names != NULL) {
8711 next = names->next;
8712 zap = FALSE;
8713 ap = names->data.ptrvalue;
8714 if (ap != NULL) {
8715 pid = ap->name;
8716 if (pid == NULL) {
8717 /* continue */
8718 } else if (pid->choice == 2) {
8719 nsp = pid->data;
8720 if (nsp != NULL /* && nsp->names [4] != NULL */) {
8721 upcaseinits = FALSE;
8722 initials = nsp->names [4];
8723 if (StringLen (initials) > 0) {
8724 if (IS_UPPER (initials [0])) {
8725 upcaseinits = TRUE;
8726 }
8727 }
8728 str = NameStdPtrToTabbedString (nsp, fixInitials);
8729 pid->data = NameStdFree (nsp);
8730 nsp = TabbedStringToNameStdPtr (str, fixInitials);
8731 if (upcaseinits) {
8732 initials = nsp->names [4];
8733 if (StringLen (initials) > 0) {
8734 if (IS_LOWER (initials [0])) {
8735 initials [0] = TO_UPPER (initials [0]);
8736 }
8737 }
8738 }
8739 pid->data = nsp;
8740 MemFree (str);
8741 CleanVisString (&(nsp->names [0]));
8742 CleanVisString (&(nsp->names [1]));
8743 CleanVisString (&(nsp->names [2]));
8744 CleanVisString (&(nsp->names [3]));
8745 CleanVisString (&(nsp->names [4]));
8746 CleanVisString (&(nsp->names [5]));
8747 CleanVisString (&(nsp->names [6]));
8748 if (StringCmp (nsp->names [0], "et") == 0 &&
8749 (StringCmp (nsp->names [4], "al") == 0 ||
8750 StringCmp (nsp->names [4], "al.") == 0 ||
8751 StringCmp (nsp->names [4], "Al.") == 0) &&
8752 (StringHasNoText (nsp->names [1]) ||
8753 StringCmp (nsp->names [1], "a") == 0)) {
8754 nsp->names [4] = MemFree (nsp->names [4]);
8755 nsp->names [1] = MemFree (nsp->names [1]);
8756 nsp->names [0] = MemFree (nsp->names [0]);
8757 nsp->names [0] = StringSave ("et al.");
8758 }
8759 str = nsp->names [0];
8760 len = StringLen (str);
8761 if (len > 4 && StringHasNoText (nsp->names [5])) {
8762 if (StringCmp (str + len - 4, " Jr.") == 0 ||
8763 StringCmp (str + len - 4, " Sr.") == 0) {
8764 nsp->names [5] = StringSave (str + len - 3);
8765 str [len - 4] = '\0';
8766 TrimSpacesAroundString (str);
8767 }
8768 }
8769 str = nsp->names [4];
8770 len = StringLen (str);
8771 if (len > 4 && StringHasNoText (nsp->names [5])) {
8772 if (StringCmp (str + len - 4, ".Jr.") == 0 ||
8773 StringCmp (str + len - 4, ".Sr.") == 0) {
8774 nsp->names [5] = StringSave (str + len - 3);
8775 str [len - 3] = '\0';
8776 TrimSpacesAroundString (str);
8777 }
8778 }
8779 if (StringHasNoText (nsp->names [0]) &&
8780 StringHasNoText (nsp->names [1]) &&
8781 StringHasNoText (nsp->names [2]) &&
8782 StringHasNoText (nsp->names [3]) &&
8783 StringHasNoText (nsp->names [4]) &&
8784 StringHasNoText (nsp->names [5]) &&
8785 StringHasNoText (nsp->names [6])) {
8786 zap = TRUE;
8787 }
8788 /* last name is required, so zap if not present */
8789 if (StringHasNoText (nsp->names [0])) {
8790 zap = TRUE;
8791 }
8792 }
8793 } else if (pid->choice == 3 || pid->choice == 4 || pid->choice == 5) {
8794 TrimSpacesAroundString ((CharPtr) pid->data);
8795 if (StringHasNoText ((CharPtr) pid->data)) {
8796 zap = TRUE;
8797 }
8798 }
8799 }
8800 if (zap) {
8801 /* remove empty authors */
8802 *prev = names->next;
8803 names->next = NULL;
8804 AuthorFree (ap);
8805 ValNodeFree (names);
8806 } else {
8807 prev = &(names->next);
8808 }
8809 names = next;
8810 }
8811 /* if no remaining authors, put in default author for legal ASN.1 */
8812 if (alp->names == NULL) {
8813 names = ValNodeNew (NULL);
8814 if (names != NULL) {
8815 /*
8816 ap = AuthorNew ();
8817 if (ap != NULL) {
8818 pid = PersonIdNew ();
8819 if (pid != NULL) {
8820 pid->choice = 4;
8821 pid->data = (Pointer) StringSave ("?");
8822 ap->name = pid;
8823 names->choice = 1;
8824 names->data.ptrvalue = ap;
8825 alp->names = names;
8826 }
8827 }
8828 */
8829 names->choice = 3;
8830 names->data.ptrvalue = StringSave ("?");
8831 alp->names = names;
8832 alp->choice = 3;
8833 }
8834 }
8835 }
8836
StrStripSpaces(CharPtr str)8837 static void StrStripSpaces (
8838 CharPtr str
8839 )
8840
8841 {
8842 CharPtr new_str;
8843
8844 if (str == NULL) return;
8845
8846 new_str = str;
8847 while (*str != '\0') {
8848 *new_str++ = *str;
8849 if (*str == ' ' || *str == '\t' || *str == '(') {
8850 for (str++; *str == ' ' || *str == '\t'; str++) continue;
8851 if (*str == ')' || *str == ',') {
8852 if( *(new_str - 1) != '(' ) { // this if handles the case "\([ \t]*\)"
8853 --new_str;
8854 }
8855 }
8856 } else {
8857 str++;
8858 }
8859 }
8860 *new_str = '\0';
8861 }
8862
8863 /* from utilpub.c */
empty_citgen(CitGenPtr cit)8864 static Boolean empty_citgen(CitGenPtr cit)
8865 {
8866 if (cit == NULL)
8867 return TRUE;
8868 if (cit->cit)
8869 return FALSE;
8870 if (cit->authors)
8871 return FALSE;
8872 if (cit->muid > 0)
8873 return FALSE;
8874 if (cit->journal)
8875 return FALSE;
8876 if (cit->volume)
8877 return FALSE;
8878 if (cit->issue)
8879 return FALSE;
8880 if (cit->pages)
8881 return FALSE;
8882 if (cit->date)
8883 return FALSE;
8884 if (cit->serial_number > 0)
8885 return FALSE;
8886 if (cit->title)
8887 return FALSE;
8888 if (cit->pmid > 0)
8889 return FALSE;
8890 return TRUE;
8891 }
8892
NormalizePubAuthors(ValNodePtr vnp,Boolean stripSerial,Boolean fixInitials)8893 static void NormalizePubAuthors (ValNodePtr vnp, Boolean stripSerial, Boolean fixInitials)
8894
8895 {
8896 CitArtPtr cap;
8897 CitBookPtr cbp;
8898 CitGenPtr cgp;
8899 CitPatPtr cpp;
8900 CitSubPtr csp;
8901
8902 if (vnp == NULL) return;
8903 if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return;
8904 if (vnp->data.ptrvalue == NULL) return;
8905 switch (vnp->choice) {
8906 case PUB_Gen :
8907 cgp = (CitGenPtr) vnp->data.ptrvalue;
8908 NormalizeAuthors (cgp->authors, fixInitials);
8909 break;
8910 case PUB_Sub :
8911 csp = (CitSubPtr) vnp->data.ptrvalue;
8912 NormalizeAuthors (csp->authors, fixInitials);
8913 break;
8914 case PUB_Article :
8915 cap = (CitArtPtr) vnp->data.ptrvalue;
8916 NormalizeAuthors (cap->authors, fixInitials);
8917 if (cap->from == 2 || cap->from == 3) {
8918 cbp = (CitBookPtr) cap->fromptr;
8919 if (cbp != NULL) {
8920 NormalizeAuthors (cbp->authors, fixInitials);
8921 }
8922 }
8923 break;
8924 case PUB_Book :
8925 cbp = (CitBookPtr) vnp->data.ptrvalue;
8926 NormalizeAuthors (cbp->authors, fixInitials);
8927 break;
8928 case PUB_Man :
8929 cbp = (CitBookPtr) vnp->data.ptrvalue;
8930 if (cbp->othertype == 2 && cbp->let_type == 3) {
8931 NormalizeAuthors (cbp->authors, fixInitials);
8932 }
8933 break;
8934 case PUB_Patent :
8935 cpp = (CitPatPtr) vnp->data.ptrvalue;
8936 NormalizeAuthors (cpp->authors, fixInitials);
8937 NormalizeAuthors (cpp->applicants, fixInitials);
8938 NormalizeAuthors (cpp->assignees, fixInitials);
8939 break;
8940 default :
8941 break;
8942 }
8943 }
8944
NormalizeAPub(ValNodePtr vnp,Boolean stripSerial,Boolean fixInitials)8945 static void NormalizeAPub (ValNodePtr vnp, Boolean stripSerial, Boolean fixInitials)
8946
8947 {
8948 AffilPtr affil;
8949 AuthListPtr alp;
8950 CitArtPtr cap;
8951 CitBookPtr cbp;
8952 CitGenPtr cgp;
8953 CitJourPtr cjp;
8954 CitPatPtr cpp;
8955 CitSubPtr csp;
8956 ImprintPtr imp;
8957 CharPtr str;
8958 CharPtr tmp;
8959 ValNodePtr ttl;
8960
8961 if (vnp == NULL) return;
8962 if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return;
8963 if (vnp->data.ptrvalue == NULL) return;
8964 imp = NULL;
8965 switch (vnp->choice) {
8966 case PUB_Gen :
8967 cgp = (CitGenPtr) vnp->data.ptrvalue;
8968 if (stripSerial) {
8969 cgp->serial_number = -1; /* but does not remove if empty */
8970 }
8971 if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
8972 cgp->cit [0] = 'U';
8973 /* cgp->date = DateFree (cgp->date); */ /* remove date if unpublished */
8974 if (cgp->journal == NULL) {
8975 cgp->volume = MemFree (cgp->volume);
8976 cgp->issue = MemFree (cgp->issue);
8977 cgp->pages = MemFree (cgp->pages);
8978 }
8979 }
8980 TrimSpacesAroundString (cgp->cit);
8981 if (StringDoesHaveText (cgp->title)) {
8982 StrStripSpaces (cgp->title);
8983 }
8984 break;
8985 case PUB_Sub :
8986 csp = (CitSubPtr) vnp->data.ptrvalue;
8987 alp = csp->authors;
8988 imp = csp->imp;
8989 if (alp != NULL && alp->affil == NULL && imp != NULL && imp->pub != NULL) {
8990 alp->affil = imp->pub;
8991 imp->pub = NULL;
8992 }
8993 if (csp->date == NULL && imp != NULL && imp->date != NULL) {
8994 csp->date = imp->date;
8995 imp->date = NULL;
8996 }
8997 if (imp != NULL && imp->date == NULL) {
8998 csp->imp = ImprintFree (csp->imp);
8999 }
9000 if (alp != NULL && alp->affil != NULL) {
9001 affil = alp->affil;
9002 if (affil->choice == 1) {
9003 str = affil->affil;
9004 if (StringNICmp (str, "to the ", 7) == 0) {
9005 if (StringNICmp (str + 24, " databases", 10) == 0) {
9006 str += 34;
9007 if (*str == '.') {
9008 str++;
9009 }
9010 tmp = StringSaveNoNull (TrimSpacesAroundString (str));
9011 affil->affil = MemFree (affil->affil);
9012 affil->affil = tmp;
9013 }
9014 }
9015 }
9016 alp->affil = CleanAffil (alp->affil);
9017 }
9018 imp = csp->imp;
9019 break;
9020 case PUB_Article :
9021 cap = (CitArtPtr) vnp->data.ptrvalue;
9022 if (cap != NULL) {
9023 if (cap->from == 1) {
9024 cjp = (CitJourPtr) cap->fromptr;
9025 if (cjp != NULL) {
9026 imp = cjp->imp;
9027 }
9028 } else if (cap->from == 2 || cap->from == 3) {
9029 cbp = (CitBookPtr) cap->fromptr;
9030 if (cbp != NULL) {
9031 imp = cbp->imp;
9032 }
9033 }
9034 for (ttl = cap->title; ttl != NULL; ttl = ttl->next) {
9035 if (ttl->choice == Cit_title_name) {
9036 str = (CharPtr) ttl->data.ptrvalue;
9037 if (StringHasNoText (str)) continue;
9038 StrStripSpaces (str);
9039 }
9040 }
9041 }
9042 break;
9043 case PUB_Book :
9044 cbp = (CitBookPtr) vnp->data.ptrvalue;
9045 if (cbp != NULL) {
9046 imp = cbp->imp;
9047 }
9048 break;
9049 case PUB_Man :
9050 cbp = (CitBookPtr) vnp->data.ptrvalue;
9051 if (cbp != NULL) {
9052 imp = cbp->imp;
9053 if (imp != NULL) {
9054 affil = imp->pub;
9055 if (affil != NULL && affil->choice == 1) {
9056 CleanVisStringJunkAndCompress (&(affil->affil));
9057 }
9058 }
9059 }
9060 break;
9061 case PUB_Patent :
9062 cpp = (CitPatPtr) vnp->data.ptrvalue;
9063 if (cpp != NULL) {
9064 if (StringCmp (cpp->country, "USA") == 0) {
9065 cpp->country = MemFree (cpp->country);
9066 cpp->country = StringSave ("US");
9067 }
9068 }
9069 break;
9070 default :
9071 break;
9072 }
9073 if (imp != NULL) {
9074 CleanVisStringAndCompress (&(imp->volume));
9075 CleanVisStringAndCompress (&(imp->issue));
9076 CleanVisStringAndCompress (&(imp->pages));
9077 CleanVisStringAndCompress (&(imp->section));
9078 CleanVisStringAndCompress (&(imp->part_sup));
9079 CleanVisStringAndCompress (&(imp->language));
9080 CleanVisStringAndCompress (&(imp->part_supi));
9081 }
9082 }
9083
9084 //LCOV_EXCL_START
CleanUpPubdescAuthors(PubdescPtr pdp)9085 NLM_EXTERN void CleanUpPubdescAuthors (PubdescPtr pdp)
9086
9087 {
9088 Char buf1 [121];
9089 Boolean fixInitials = TRUE;
9090 Boolean hasArt = FALSE;
9091 Boolean hasUid = FALSE;
9092 ValNodePtr next;
9093 ValNodePtr PNTR prev;
9094 ValNodePtr vnp;
9095
9096 if (pdp == NULL) return;
9097 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
9098 if (vnp->choice == PUB_Muid || vnp->choice == PUB_PMid) {
9099 if (vnp->data.intvalue > 0) {
9100 hasUid = TRUE;
9101 }
9102 } else if (vnp->choice == PUB_Article) {
9103 hasArt = TRUE;
9104 }
9105 }
9106 if (hasArt && hasUid) {
9107 fixInitials = FALSE;
9108 }
9109 prev = &(pdp->pub);
9110 vnp = pdp->pub;
9111 while (vnp != NULL) {
9112 next = vnp->next;
9113 PubLabelUnique (vnp, buf1, sizeof (buf1) - 1, OM_LABEL_CONTENT, TRUE);
9114 NormalizePubAuthors (vnp, TRUE, fixInitials);
9115 vnp = next;
9116 }
9117 }
9118 //LCOV_EXCL_STOP
9119
9120 static int pub_order [] = {
9121 0,
9122 3,
9123 4,
9124 13,
9125 2,
9126 5,
9127 6,
9128 7,
9129 8,
9130 9,
9131 10,
9132 11,
9133 12,
9134 1
9135 };
9136
SortByPubType(VoidPtr ptr1,VoidPtr ptr2)9137 static int LIBCALLBACK SortByPubType (VoidPtr ptr1, VoidPtr ptr2)
9138
9139 {
9140 Uint1 chs1;
9141 Uint1 chs2;
9142 ValNodePtr vnp1;
9143 ValNodePtr vnp2;
9144
9145 if (ptr1 == NULL || ptr2 == NULL) return 0;
9146 vnp1 = *((ValNodePtr PNTR) ptr1);
9147 vnp2 = *((ValNodePtr PNTR) ptr2);
9148 if (vnp1 == NULL || vnp2 == NULL) return 0;
9149 chs1 = (Uint1) vnp1->choice;
9150 chs2 = (Uint1) vnp2->choice;
9151 if (chs1 < 14 && chs2 < 14) {
9152 chs1 = pub_order [chs1];
9153 chs2 = pub_order [chs2];
9154 }
9155 if (chs1 > chs2) {
9156 return 1;
9157 } else if (chs1 < chs2) {
9158 return -1;
9159 }
9160 return 0;
9161 }
9162
NormalizePubdesc(PubdescPtr pdp,Boolean stripSerial,Boolean doAuthors,ValNodePtr PNTR publist)9163 static void NormalizePubdesc (PubdescPtr pdp, Boolean stripSerial, Boolean doAuthors, ValNodePtr PNTR publist)
9164
9165 {
9166 ArticleIdPtr aip;
9167 Int4 artpmid = 0;
9168 Char buf1 [121];
9169 Char buf2 [121];
9170 CitArtPtr cap = NULL;
9171 CitGenPtr cgp;
9172 CitJourPtr cjp;
9173 Boolean fixInitials = TRUE;
9174 Boolean hasArt = FALSE;
9175 Boolean hasUid = FALSE;
9176 ImprintPtr imp;
9177 Int4 lastartpmid = 0;
9178 Int4 muid = 0;
9179 ValNodePtr next;
9180 ArticleIdPtr nextaip;
9181 Int4 pmid = 0;
9182 ValNodePtr PNTR prev;
9183 ArticleIdPtr PNTR prevaip;
9184 ValNodePtr vnp;
9185
9186 if (pdp == NULL) return;
9187 CleanVisString (&(pdp->comment));
9188 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
9189 if (vnp->choice == PUB_Muid) {
9190 if (vnp->data.intvalue > 0) {
9191 muid = vnp->data.intvalue;
9192 }
9193 }
9194 if (vnp->choice == PUB_Muid || vnp->choice == PUB_PMid) {
9195 if (vnp->data.intvalue > 0) {
9196 hasUid = TRUE;
9197 }
9198 } else if (vnp->choice == PUB_Article) {
9199 hasArt = TRUE;
9200 }
9201 }
9202 if (hasArt && hasUid) {
9203 fixInitials = FALSE;
9204 }
9205 if (pdp->pub != NULL) {
9206 pdp->pub = ValNodeSort (pdp->pub, SortByPubType);
9207 }
9208
9209 /* remove zero muid where there is also a non-zero muid */
9210 prev = &(pdp->pub);
9211 vnp = pdp->pub;
9212 while (vnp != NULL) {
9213 next = vnp->next;
9214 if (vnp->choice == PUB_Muid && vnp->data.intvalue == 0 && muid != 0) {
9215 *prev = vnp->next;
9216 vnp->next = NULL;
9217 PubFree (vnp);
9218 } else {
9219 prev = &(vnp->next);
9220 }
9221 vnp = next;
9222 }
9223
9224 prev = &(pdp->pub);
9225 vnp = pdp->pub;
9226 if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
9227 cgp = (CitGenPtr) vnp->data.ptrvalue;
9228 buf1 [0] = '\0';
9229 PubLabelUnique (vnp, buf1, sizeof (buf1) - 1, OM_LABEL_CONTENT, TRUE);
9230 if (doAuthors) {
9231 NormalizeAuthors (cgp->authors, fixInitials);
9232 }
9233 if (stripSerial) {
9234 cgp->serial_number = -1;
9235 }
9236 if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
9237 cgp->cit [0] = 'U';
9238 /* cgp->date = DateFree (cgp->date); */ /* remove date if unpublished */
9239 if (cgp->journal == NULL) {
9240 cgp->volume = MemFree (cgp->volume);
9241 cgp->issue = MemFree (cgp->issue);
9242 cgp->pages = MemFree (cgp->pages);
9243 }
9244 }
9245 TrimSpacesAroundString (cgp->cit);
9246 if (StringDoesHaveText (cgp->title)) {
9247 StrStripSpaces (cgp->title);
9248 }
9249 buf2 [0] = '\0';
9250 PubLabelUnique (vnp, buf2, sizeof (buf2) - 1, OM_LABEL_CONTENT, TRUE);
9251 if (StringCmp (buf1, buf2) != 0) {
9252 ValNodeCopyStr (publist, 1, buf1);
9253 ValNodeCopyStr (publist, 2, buf2);
9254 }
9255 return; /* but does not remove if empty and only element of Pub */
9256 }
9257 while (vnp != NULL) {
9258 next = vnp->next;
9259 buf1 [0] = '\0';
9260 PubLabelUnique (vnp, buf1, sizeof (buf1) - 1, OM_LABEL_CONTENT, TRUE);
9261 if (doAuthors) {
9262 NormalizePubAuthors (vnp, stripSerial, fixInitials);
9263 }
9264 NormalizeAPub (vnp, stripSerial, fixInitials);
9265 if (vnp->choice == PUB_Article) {
9266 cap = (CitArtPtr) vnp->data.ptrvalue;
9267 if (cap != NULL && cap->from == 1) {
9268 cjp = (CitJourPtr) cap->fromptr;
9269 if (cjp != NULL) {
9270 imp = cjp->imp;
9271 if (imp != NULL) {
9272 if (imp->pubstatus == PUBSTATUS_aheadofprint && imp->prepub != 2) {
9273 if (StringHasNoText (imp->volume) || StringHasNoText (imp->pages)) {
9274 imp->prepub = 2;
9275 }
9276 }
9277 if (imp->pubstatus == PUBSTATUS_aheadofprint && imp->prepub == 2) {
9278 if (StringDoesHaveText (imp->volume) && StringDoesHaveText (imp->pages)) {
9279 imp->prepub = 0;
9280 }
9281 }
9282 if (imp->pubstatus == PUBSTATUS_epublish && imp->prepub == 2) {
9283 imp->prepub = 0;
9284 }
9285 }
9286 }
9287 }
9288 if (cap != NULL) {
9289 aip = cap->ids;
9290 prevaip = (ArticleIdPtr PNTR) &(cap->ids);
9291 lastartpmid = 0;
9292 while (aip != NULL) {
9293 nextaip = aip->next;
9294 if (aip->choice == ARTICLEID_PUBMED) {
9295 artpmid = aip->data.intvalue;
9296 if (lastartpmid != 0 && lastartpmid == artpmid) {
9297 aip->next = NULL;
9298 *prevaip = nextaip;
9299 ArticleIdFree (aip);
9300 } else {
9301 prevaip = (ArticleIdPtr PNTR) &(aip->next);
9302 }
9303 lastartpmid = artpmid;
9304 } else {
9305 prevaip = (ArticleIdPtr PNTR) &(aip->next);
9306 }
9307 aip = nextaip;
9308 }
9309 }
9310 } else if (vnp->choice == PUB_PMid) {
9311 pmid = vnp->data.intvalue;
9312 }
9313 if (vnp->choice == PUB_Gen && empty_citgen ((CitGenPtr) vnp->data.ptrvalue)) {
9314 *prev = vnp->next;
9315 vnp->next = NULL;
9316 PubFree (vnp);
9317 } else {
9318 prev = &(vnp->next);
9319 buf2 [0] = '\0';
9320 PubLabelUnique (vnp, buf2, sizeof (buf2) - 1, OM_LABEL_CONTENT, TRUE);
9321 if (StringCmp (buf1, buf2) != 0) {
9322 ValNodeCopyStr (publist, 1, buf1);
9323 ValNodeCopyStr (publist, 2, buf2);
9324 }
9325 }
9326 vnp = next;
9327 }
9328 if (pmid == 0 && artpmid > 0) {
9329 ValNodeAddInt (&(pdp->pub), PUB_PMid, artpmid);
9330 } else if (pmid > 0 && artpmid == 0 && cap != NULL) {
9331 ValNodeAddInt (&(cap->ids), ARTICLEID_PUBMED, pmid);
9332 }
9333 }
9334
9335 //LCOV_EXCL_START
CleanUpPubdescBody(PubdescPtr pdp,Boolean stripSerial)9336 NLM_EXTERN void CleanUpPubdescBody (PubdescPtr pdp, Boolean stripSerial)
9337
9338 {
9339 if (pdp == NULL) return;
9340 NormalizePubdesc (pdp, stripSerial, FALSE, NULL);
9341 }
9342 //LCOV_EXCL_STOP
9343
KeywordAlreadyInList(ValNodePtr head,CharPtr kwd)9344 static Boolean KeywordAlreadyInList (ValNodePtr head, CharPtr kwd)
9345
9346 {
9347 ValNodePtr vnp;
9348
9349 if (head == NULL || kwd == NULL) return FALSE;
9350
9351 for (vnp = head; vnp != NULL; vnp = vnp->next) {
9352 if (StringICmp ((CharPtr) vnp->data.ptrvalue, kwd) == 0) return TRUE;
9353 }
9354
9355 return FALSE;
9356 }
9357
CopyGeneXrefToGeneFeat(GeneRefPtr grp,GeneRefPtr grx)9358 static Boolean CopyGeneXrefToGeneFeat (GeneRefPtr grp, GeneRefPtr grx)
9359
9360 {
9361 if (grp == NULL || grx == NULL) return FALSE;
9362 if (grx->db != NULL) {
9363 ValNodeLink (&(grp->db), grx->db);
9364 grx->db = NULL;
9365 }
9366 if (grx->locus == NULL && grx->allele == NULL &&
9367 grx->desc == NULL && grx->maploc == NULL &&
9368 grx->locus_tag == NULL && grx->db == NULL &&
9369 grx->syn == NULL) return TRUE;
9370 return FALSE;
9371 }
9372
HandleXrefOnGene(SeqFeatPtr sfp)9373 static void HandleXrefOnGene (SeqFeatPtr sfp)
9374
9375 {
9376 GeneRefPtr grp;
9377 GeneRefPtr grx;
9378 SeqFeatXrefPtr next;
9379 SeqFeatXrefPtr PNTR prev;
9380 SeqFeatXrefPtr xref;
9381
9382 if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
9383 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
9384 if (grp == NULL) return;
9385 prev = &(sfp->xref);
9386 xref = sfp->xref;
9387 while (xref != NULL) {
9388 next = xref->next;
9389 if (xref->data.choice == SEQFEAT_GENE) {
9390 grx = (GeneRefPtr) xref->data.value.ptrvalue;
9391 if (CopyGeneXrefToGeneFeat (grp, grx)) {
9392 *(prev) = next;
9393 xref->next = NULL;
9394 SeqFeatXrefFree (xref);
9395 } else {
9396 prev = &(xref->next);
9397 }
9398 } else {
9399 prev = &(xref->next);
9400 }
9401 xref = next;
9402 }
9403 }
9404
CopyProtXrefToProtFeat(ProtRefPtr prp,ProtRefPtr prx)9405 static void CopyProtXrefToProtFeat (ProtRefPtr prp, ProtRefPtr prx)
9406
9407 {
9408 ValNodePtr curr;
9409 size_t len;
9410 ValNodePtr next;
9411 ValNodePtr PNTR prev;
9412 CharPtr str;
9413
9414 if (prp == NULL || prx == NULL) return;
9415
9416 if (prx->db != NULL) {
9417 ValNodeLink (&(prp->db), prx->db);
9418 prx->db = NULL;
9419 }
9420
9421 prev = &(prx->name);
9422 curr = prx->name;
9423 while (curr != NULL) {
9424 next = curr->next;
9425 str = (CharPtr) curr->data.ptrvalue;
9426 if (! KeywordAlreadyInList (prp->name, str)) {
9427 ValNodeCopyStr (&(prp->name), 0, str);
9428 *(prev) = next;
9429 curr->next = NULL;
9430 curr->data.ptrvalue = NULL;
9431 ValNodeFree (curr);
9432 } else {
9433 prev = &(curr->next);
9434 }
9435 curr = next;
9436 }
9437
9438 if (prp->desc == NULL) {
9439 prp->desc = prx->desc;
9440 prx->desc = NULL;
9441 } else if (prx->desc != NULL) {
9442 if (StringCmp (prx->desc, prp->desc) != 0) {
9443 len = StringLen (prp->desc) + StringLen (prx->desc) + 6;
9444 str = MemNew (len);
9445 if (str != NULL) {
9446 StringCpy (str, prp->desc);
9447 StringCat (str, "; ");
9448 StringCat (str, prx->desc);
9449 prp->desc = MemFree (prp->desc);
9450 prp->desc = str;
9451 }
9452 }
9453 }
9454
9455 prev = &(prx->ec);
9456 curr = prx->ec;
9457 while (curr != NULL) {
9458 next = curr->next;
9459 str = (CharPtr) curr->data.ptrvalue;
9460 if (! KeywordAlreadyInList (prp->ec, str)) {
9461 ValNodeCopyStr (&(prp->ec), 0, str);
9462 *(prev) = next;
9463 curr->next = NULL;
9464 curr->data.ptrvalue = NULL;
9465 ValNodeFree (curr);
9466 } else {
9467 prev = &(curr->next);
9468 }
9469 curr = next;
9470 }
9471
9472 prev = &(prx->activity);
9473 curr = prx->activity;
9474 while (curr != NULL) {
9475 next = curr->next;
9476 str = (CharPtr) curr->data.ptrvalue;
9477 if (! KeywordAlreadyInList (prp->activity, str)) {
9478 ValNodeCopyStr (&(prp->activity), 0, str);
9479 curr->data.ptrvalue = NULL;
9480 }
9481 *(prev) = next;
9482 curr->next = NULL;
9483 curr->data.ptrvalue = NULL;
9484 ValNodeFree (curr);
9485 curr = next;
9486 }
9487 }
9488
InGpsGenomic(SeqFeatPtr sfp)9489 static Boolean InGpsGenomic (SeqFeatPtr sfp)
9490
9491 {
9492 BioseqPtr bsp;
9493 BioseqSetPtr bssp;
9494
9495 if (sfp == NULL) return FALSE;
9496 bsp = BioseqFindFromSeqLoc (sfp->location);
9497 if (bsp == NULL) return FALSE;
9498 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
9499 bssp = (BioseqSetPtr) bsp->idx.parentptr;
9500 while (bssp != NULL) {
9501 if (bssp->_class == BioseqseqSet_class_nuc_prot) return FALSE;
9502 if (bssp->_class == BioseqseqSet_class_gen_prod_set) return TRUE;
9503 if (bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE;
9504 bssp = (BioseqSetPtr) bssp->idx.parentptr;
9505 }
9506 }
9507 return FALSE;
9508 }
9509
HandleXrefOnCDS(SeqFeatPtr sfp)9510 static void HandleXrefOnCDS (SeqFeatPtr sfp)
9511
9512 {
9513 SeqFeatXrefPtr next;
9514 SeqFeatXrefPtr PNTR prev;
9515 SeqFeatPtr prot;
9516 ProtRefPtr prp;
9517 ProtRefPtr prx;
9518 SeqFeatXrefPtr xref;
9519
9520 if (sfp != NULL && sfp->product != NULL) {
9521 if (InGpsGenomic (sfp)) return;
9522 prot = GetBestProteinFeatureUnindexed (sfp->product);
9523 if (prot != NULL) {
9524 prp = (ProtRefPtr) prot->data.value.ptrvalue;
9525 if (prp != NULL) {
9526 prev = &(sfp->xref);
9527 xref = sfp->xref;
9528 while (xref != NULL) {
9529 next = xref->next;
9530 if (xref->data.choice == SEQFEAT_PROT) {
9531 prx = (ProtRefPtr) xref->data.value.ptrvalue;
9532 CopyProtXrefToProtFeat (prp, prx);
9533 *(prev) = next;
9534 xref->next = NULL;
9535 SeqFeatXrefFree (xref);
9536 } else {
9537 prev = &(xref->next);
9538 }
9539 xref = next;
9540 }
9541 }
9542 }
9543 }
9544 }
9545
CleanUserStrings(UserFieldPtr ufp,Pointer userdata)9546 static void CleanUserStrings (
9547 UserFieldPtr ufp,
9548 Pointer userdata
9549 )
9550
9551 {
9552 CharPtr PNTR cpp;
9553 Int4 i;
9554 ObjectIdPtr oip;
9555
9556 oip = ufp->label;
9557 if (oip != NULL && oip->str != NULL) {
9558 if (! StringHasNoText (oip->str)) {
9559 CleanVisString (&(oip->str));
9560 }
9561 }
9562 if (ufp->choice == 1) {
9563 if (! StringHasNoText ((CharPtr) ufp->data.ptrvalue)) {
9564 CleanVisStringAndCompress ((CharPtr PNTR) &(ufp->data.ptrvalue));
9565 }
9566 } else if (ufp->choice == 7) {
9567 cpp = (CharPtr PNTR) ufp->data.ptrvalue;
9568 if (cpp != NULL) {
9569 for (i = 0; i < ufp->num; i++) {
9570 TrimSpacesSemicolonsAndCommas (cpp [i]);
9571 Asn2gnbkCompressSpaces (cpp [i]);
9572 }
9573 }
9574 }
9575 }
9576
CleanUserFields(UserFieldPtr ufp,Pointer userdata)9577 static void CleanUserFields (
9578 UserFieldPtr ufp,
9579 Pointer userdata
9580 )
9581
9582 {
9583 ObjectIdPtr oip;
9584
9585 oip = ufp->label;
9586 if (oip != NULL && oip->str != NULL) {
9587 if (! StringHasNoText (oip->str)) {
9588 CleanVisString (&(oip->str));
9589 }
9590 }
9591 VisitUserFieldsInUfp (ufp, userdata, CleanUserStrings);
9592 }
9593
9594 //LCOV_EXCL_START
UserFieldSort(UserFieldPtr list,int (LIBCALLBACK * compar)PROTO ((VoidPtr,VoidPtr)))9595 NLM_EXTERN UserFieldPtr LIBCALL UserFieldSort (UserFieldPtr list, int (LIBCALLBACK *compar ) PROTO((VoidPtr, VoidPtr)))
9596
9597 {
9598 Int4 count, i;
9599 UserFieldPtr PNTR head;
9600 UserFieldPtr tmp;
9601
9602 if (list == NULL) return NULL;
9603
9604 count = 0;
9605 for (tmp = list; tmp != NULL; tmp = tmp->next) {
9606 count++;
9607 }
9608
9609 head = (UserFieldPtr *) MemNew (((size_t) count + 1) * sizeof (UserFieldPtr));
9610
9611 for (tmp = list, i = 0; tmp != NULL && i < count; i++) {
9612 head [i] = tmp;
9613 tmp = tmp->next;
9614 }
9615
9616 HeapSort (head, (size_t) count, sizeof (UserFieldPtr), compar);
9617
9618 for (i = 0; i < count; i++) {
9619 tmp = head [i];
9620 tmp->next = head [i + 1];
9621 }
9622 list = head [0];
9623
9624 MemFree (head);
9625
9626 return list;
9627 }
9628 //LCOV_EXCL_STOP
9629
9630 /*
9631 static CharPtr barcodeOrder [] = {
9632 "",
9633 "StructuredCommentPrefix",
9634 "Barcode Index Number",
9635 "Order Assignment",
9636 "iBOL Working Group",
9637 "iBOL Release Status",
9638 "Tentative Name",
9639 "StructuredCommentSuffix",
9640 NULL
9641 };
9642
9643 static Int2 GetBarcodeOrder (CharPtr str)
9644
9645 {
9646 Int2 i;
9647
9648 if (StringHasNoText (str)) return 0;
9649
9650 for (i = 1; barcodeOrder [i] != NULL; i++) {
9651 if (StringCmp (str, barcodeOrder [i]) == 0) return i;
9652 }
9653
9654 return 0;
9655 }
9656
9657 static int LIBCALLBACK ReorderBarcodeFields (VoidPtr ptr1, VoidPtr ptr2)
9658
9659 {
9660 Int2 idx1, idx2;
9661 ObjectIdPtr lbl1, lbl2;
9662 CharPtr str1, str2;
9663 UserFieldPtr ufp1, ufp2;
9664
9665 if (ptr1 == NULL || ptr2 == NULL) return 0;
9666
9667 ufp1 = *((UserFieldPtr PNTR) ptr1);
9668 ufp2 = *((UserFieldPtr PNTR) ptr2);
9669 if (ufp1 == NULL || ufp2 == NULL) return 0;
9670
9671 lbl1 = (ObjectIdPtr) ufp1->label;
9672 lbl2 = (ObjectIdPtr) ufp2->label;
9673 if (lbl1 == NULL || lbl2 == NULL) return 0;
9674
9675 str1 = (CharPtr) lbl1->str;
9676 str2 = (CharPtr) lbl2->str;
9677 if (str1 == NULL || str2 == NULL) return 0;
9678
9679 idx1 = GetBarcodeOrder (str1);
9680 idx2 = GetBarcodeOrder (str2);
9681
9682 if (idx1 > idx2) return 1;
9683 if (idx1 < idx2) return -1;
9684
9685 return 0;
9686 }
9687 */
9688
CleanStructuredComment(UserObjectPtr uop)9689 NLM_EXTERN void CleanStructuredComment (
9690 UserObjectPtr uop
9691 )
9692
9693 {
9694 Boolean genome_assembly_data = FALSE, ibol_data = FALSE;
9695 UserFieldPtr ufp;
9696 CharPtr str, core, new_str;
9697
9698 if (uop == NULL || uop->type == NULL
9699 || StringCmp (uop->type->str, "StructuredComment") != 0) {
9700 return;
9701 }
9702
9703 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
9704 if (ufp->label != NULL
9705 && ufp->choice == 1
9706 && (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
9707 if (StringCmp (ufp->label->str, "StructuredCommentPrefix") == 0) {
9708 core = StructuredCommentDbnameFromString(str);
9709 new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15));
9710 sprintf (new_str, "##%s-START##", core);
9711 str = MemFree (str);
9712 ufp->data.ptrvalue = new_str;
9713 if (StringCmp (core, "Genome-Assembly-Data") == 0) {
9714 genome_assembly_data = TRUE;
9715 } else if (StringCmp (core, "International Barcode of Life (iBOL)Data") == 0) {
9716 ibol_data = TRUE;
9717 }
9718 core = MemFree (core);
9719 } else if (StringCmp (ufp->label->str, "StructuredCommentSuffix") == 0) {
9720 core = StructuredCommentDbnameFromString(str);
9721 new_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (core) + 15));
9722 sprintf (new_str, "##%s-END##", core);
9723 str = MemFree (str);
9724 ufp->data.ptrvalue = new_str;
9725 if (StringCmp (core, "Genome-Assembly-Data") == 0) {
9726 genome_assembly_data = TRUE;
9727 } else if (StringCmp (core, "International Barcode of Life (iBOL)Data") == 0) {
9728 ibol_data = TRUE;
9729 }
9730 core = MemFree (core);
9731 }
9732 }
9733 }
9734
9735 if (genome_assembly_data) {
9736 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
9737 if (ufp->label != NULL
9738 && ufp->choice == 1
9739 && (str = (CharPtr) ufp->data.ptrvalue) != NULL) {
9740 if (StringCmp (ufp->label->str, "Finishing Goal") == 0 ||
9741 StringCmp (ufp->label->str, "Current Finishing Status") == 0) {
9742 if (StringCmp (str, "High Quality Draft") == 0) {
9743 ufp->data.ptrvalue = StringSave ("High-Quality Draft");
9744 str = MemFree (str);
9745 } else if (StringCmp (str, "Improved High Quality Draft") == 0) {
9746 ufp->data.ptrvalue = StringSave ("Improved High-Quality Draft");
9747 str = MemFree (str);
9748 } else if (StringCmp (str, "Annotation Directed") == 0) {
9749 ufp->data.ptrvalue = StringSave ("Annotation-Directed Improvement");
9750 str = MemFree (str);
9751 } else if (StringCmp (str, "Non-contiguous Finished") == 0) {
9752 ufp->data.ptrvalue = StringSave ("Noncontiguous Finished");
9753 str = MemFree (str);
9754 }
9755 } else if (StringCmp(ufp->label->str, "Assembly Date") == 0) {
9756 str = (CharPtr) ufp->data.ptrvalue;
9757 ReformatAssemblyDate(&str);
9758 ufp->data.ptrvalue = str;
9759 }
9760 }
9761 }
9762 }
9763
9764 if (ibol_data) {
9765 /*
9766 uop->data = UserFieldSort (uop->data, ReorderBarcodeFields);
9767 */
9768 ReorderStructuredCommentFields (uop);
9769 }
9770 }
9771
9772
9773 //LCOV_EXCL_START
9774 // change made as a result of SQD-2399, which will not be implemented for the C++ Toolkit
9775 // going forward. bad data was generated internally, production process has been fixed.
CleanRefGeneTrackingUserObject(UserObjectPtr uop)9776 static void CleanRefGeneTrackingUserObject (
9777 UserObjectPtr uop
9778 )
9779
9780 {
9781 UserFieldPtr asmbly = NULL, entry, tmp, ufp;
9782 ObjectIdPtr oip;
9783
9784 if (uop == NULL) return;
9785 oip = uop->type;
9786 if (oip == NULL) return;
9787 if (StringCmp (oip->str, "RefGeneTracking") != 0) return;
9788
9789 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
9790 oip = ufp->label;
9791 if (oip == NULL) continue;
9792 if (StringCmp (oip->str, "Assembly") != 0) continue;
9793 asmbly = ufp;
9794 break;
9795 }
9796
9797 if (asmbly == NULL || asmbly->choice != 11) return;
9798 tmp = asmbly->data.ptrvalue;
9799 if (tmp == NULL || tmp->choice == 11) return;
9800
9801 entry = UserFieldNew ();
9802 if (entry == NULL) return;
9803 oip = ObjectIdNew ();
9804 if (oip == NULL) return;
9805
9806 entry->data.ptrvalue = (Pointer) tmp;
9807 entry->choice = 11;
9808 entry->label = oip;
9809 oip->id = 0;
9810
9811 asmbly->data.ptrvalue = (Pointer) entry;
9812 asmbly->choice = 11;
9813 }
9814 //LCOV_EXCL_STOP
9815
CleanUserObject(UserObjectPtr uop,Pointer userdata)9816 static void CleanUserObject (
9817 UserObjectPtr uop,
9818 Pointer userdata
9819 )
9820
9821 {
9822 ObjectIdPtr oip;
9823
9824 oip = uop->type;
9825 if (oip != NULL && oip->str != NULL) {
9826 if (! StringHasNoText (oip->str)) {
9827 CleanVisString (&(oip->str));
9828 }
9829 }
9830 VisitUserFieldsInUop (uop, userdata, CleanUserFields);
9831 CleanStructuredComment (uop);
9832 CleanRefGeneTrackingUserObject (uop);
9833 }
9834
9835 static CharPtr bsecSiteList [] = {
9836 "", "active", "binding", "cleavage", "inhibit", "modifi",
9837 "glycosylation", "myristoylation", "mutagenized", "metal-binding",
9838 "phosphorylation", "acetylation", "amidation", "methylation",
9839 "hydroxylation", "sulfatation", "oxidative-deamination",
9840 "pyrrolidone-carboxylic-acid", "gamma-carboxyglutamic-acid",
9841 "blocked", "lipid-binding", "np-binding", "DNA-binding",
9842 "signal-peptide", "transit-peptide", "transmembrane-region",
9843 "nitrosylation", NULL
9844 };
9845
9846 static CharPtr uninfStrings [] = {
9847 "signal",
9848 "transit",
9849 "peptide",
9850 "signal peptide",
9851 "signal-peptide",
9852 "signal_peptide",
9853 "transit peptide",
9854 "transit-peptide",
9855 "transit_peptide",
9856 "unnamed",
9857 "unknown",
9858 "putative",
9859 NULL
9860 };
9861
InformativeString(CharPtr str)9862 static Boolean InformativeString (CharPtr str)
9863
9864 {
9865 Int2 i;
9866
9867 if (StringHasNoText (str)) return FALSE;
9868
9869 for (i = 0; uninfStrings [i] != NULL; i++) {
9870 if (StringICmp (str, uninfStrings [i]) == 0) return FALSE;
9871 }
9872
9873 return TRUE;
9874 }
9875
CleanUpExceptText(SeqFeatPtr sfp)9876 static void CleanUpExceptText (SeqFeatPtr sfp)
9877
9878 {
9879 ValNodePtr head, vnp;
9880 size_t len;
9881 CharPtr prefix, ptr, str, tmp;
9882
9883 if (sfp == NULL || sfp->except_text == NULL) return;
9884 if (StringStr (sfp->except_text, "ribosome slippage") == NULL &&
9885 StringStr (sfp->except_text, "trans splicing") == NULL &&
9886 StringStr (sfp->except_text, "alternate processing") == NULL &&
9887 StringStr (sfp->except_text, "non-consensus splice site") == NULL &&
9888 StringStr (sfp->except_text, "adjusted for low quality genome") == NULL) return;
9889
9890 head = NULL;
9891 str = sfp->except_text;
9892 tmp = str;
9893 while (! StringHasNoText (tmp)) {
9894 ptr = StringChr (tmp, ',');
9895 if (ptr != NULL) {
9896 *ptr = '\0';
9897 ptr++;
9898 }
9899 TrimSpacesAroundString (tmp);
9900 ValNodeCopyStr (&head, 0, tmp);
9901 tmp = ptr;
9902 }
9903 for (vnp = head; vnp != NULL; vnp = vnp->next) {
9904 tmp = (CharPtr) vnp->data.ptrvalue;
9905 if (StringHasNoText (tmp)) continue;
9906 if (StringCmp (tmp, "ribosome slippage") == 0) {
9907 vnp->data.ptrvalue = MemFree (tmp);
9908 vnp->data.ptrvalue = StringSave ("ribosomal slippage");
9909 } else if (StringCmp (tmp, "trans splicing") == 0) {
9910 vnp->data.ptrvalue = MemFree (tmp);
9911 vnp->data.ptrvalue = StringSave ("trans-splicing");
9912 } else if (StringCmp (tmp, "alternate processing") == 0) {
9913 vnp->data.ptrvalue = MemFree (tmp);
9914 vnp->data.ptrvalue = StringSave ("alternative processing");
9915 } else if (StringCmp (tmp, "non-consensus splice site") == 0) {
9916 vnp->data.ptrvalue = MemFree (tmp);
9917 vnp->data.ptrvalue = StringSave ("nonconsensus splice site");
9918 } else if (StringCmp (tmp, "adjusted for low quality genome") == 0) {
9919 vnp->data.ptrvalue = MemFree (tmp);
9920 vnp->data.ptrvalue = StringSave ("adjusted for low-quality genome");
9921 }
9922 }
9923
9924 len = 0;
9925 for (vnp = head; vnp != NULL; vnp = vnp->next) {
9926 tmp = (CharPtr) vnp->data.ptrvalue;
9927 if (StringHasNoText (tmp)) continue;
9928 len += StringLen (tmp) + 2;
9929 }
9930
9931 str = (CharPtr) MemNew (len + 2);
9932 if (str == NULL) return;
9933
9934 prefix = "";
9935 for (vnp = head; vnp != NULL; vnp = vnp->next) {
9936 tmp = (CharPtr) vnp->data.ptrvalue;
9937 if (StringHasNoText (tmp)) continue;
9938 StringCat (str, prefix);
9939 StringCat (str, tmp);
9940 prefix = ", ";
9941 }
9942
9943 sfp->except_text = MemFree (sfp->except_text);
9944 sfp->except_text = str;
9945
9946 ValNodeFreeData (head);
9947 }
9948
ExpandGeneSynCom(ValNodePtr headsyn)9949 static Boolean ExpandGeneSynCom (ValNodePtr headsyn)
9950
9951 {
9952 ValNodePtr lastsyn;
9953 ValNodePtr newsyn;
9954 ValNodePtr nextsyn;
9955 CharPtr ptr;
9956 CharPtr str;
9957 CharPtr tmp;
9958
9959 str = (CharPtr) headsyn->data.ptrvalue;
9960 if (StringHasNoText (str)) return TRUE;
9961 if (StringChr (str, ',') == NULL) return FALSE;
9962
9963 nextsyn = headsyn->next;
9964 lastsyn = headsyn;
9965 tmp = StringSave ((CharPtr) headsyn->data.ptrvalue);
9966 str = tmp;
9967
9968 while (! StringHasNoText (str)) {
9969 ptr = StringChr (str, ',');
9970 if (ptr != NULL) {
9971 *ptr = '\0';
9972 ptr++;
9973 }
9974 TrimSpacesAroundString (str);
9975 newsyn = ValNodeNew (NULL);
9976 if (newsyn != NULL) {
9977 newsyn->data.ptrvalue = StringSave (str);
9978 newsyn->next = nextsyn;
9979 lastsyn->next = newsyn;
9980 lastsyn = newsyn;
9981 }
9982 str = ptr;
9983 }
9984
9985 MemFree (tmp);
9986 return TRUE;
9987 }
9988
ExpandGeneSynSem(ValNodePtr headsyn)9989 static Boolean ExpandGeneSynSem (ValNodePtr headsyn)
9990
9991 {
9992 ValNodePtr lastsyn;
9993 ValNodePtr newsyn;
9994 ValNodePtr nextsyn;
9995 CharPtr ptr;
9996 CharPtr str;
9997 CharPtr tmp;
9998
9999 str = (CharPtr) headsyn->data.ptrvalue;
10000 if (StringHasNoText (str)) return TRUE;
10001 if (StringStr (str, "; ") == NULL) return FALSE;
10002
10003 nextsyn = headsyn->next;
10004 lastsyn = headsyn;
10005 tmp = StringSave ((CharPtr) headsyn->data.ptrvalue);
10006 str = tmp;
10007
10008 while (! StringHasNoText (str)) {
10009 ptr = StringStr (str, "; ");
10010 if (ptr != NULL) {
10011 ptr++;
10012 *ptr = '\0';
10013 ptr++;
10014 }
10015 TrimSpacesAroundString (str);
10016 newsyn = ValNodeNew (NULL);
10017 if (newsyn != NULL) {
10018 newsyn->data.ptrvalue = StringSave (str);
10019 newsyn->next = nextsyn;
10020 lastsyn->next = newsyn;
10021 lastsyn = newsyn;
10022 }
10023 str = ptr;
10024 }
10025
10026 MemFree (tmp);
10027 return TRUE;
10028 }
10029
ExpandGeneSynList(GeneRefPtr grp)10030 static void ExpandGeneSynList (GeneRefPtr grp)
10031
10032 {
10033 ValNodePtr currsyn;
10034 ValNodePtr nextsyn;
10035 ValNodePtr PNTR prevsyn;
10036
10037 if (grp == NULL || grp->syn == NULL) return;
10038
10039 currsyn = grp->syn;
10040 prevsyn = &(grp->syn);
10041 while (currsyn != NULL) {
10042 if (ExpandGeneSynCom (currsyn)) {
10043 nextsyn = currsyn->next;
10044 *(prevsyn) = currsyn->next;
10045 currsyn->next = NULL;
10046 ValNodeFreeData (currsyn);
10047 } else {
10048 nextsyn = currsyn->next;
10049 prevsyn = (ValNodePtr PNTR) &(currsyn->next);
10050 }
10051 currsyn = nextsyn;
10052 }
10053
10054 currsyn = grp->syn;
10055 prevsyn = &(grp->syn);
10056 while (currsyn != NULL) {
10057 if (ExpandGeneSynSem (currsyn)) {
10058 nextsyn = currsyn->next;
10059 *(prevsyn) = currsyn->next;
10060 currsyn->next = NULL;
10061 ValNodeFreeData (currsyn);
10062 } else {
10063 nextsyn = currsyn->next;
10064 prevsyn = (ValNodePtr PNTR) &(currsyn->next);
10065 }
10066 currsyn = nextsyn;
10067 }
10068 }
10069
10070 typedef struct gosstruc {
10071 CharPtr term;
10072 Char goid [32];
10073 CharPtr evidence;
10074 Int4 pmid;
10075 CharPtr goref;
10076 UserFieldPtr ufp;
10077 } GosStruc, PNTR GosStrucPtr;
10078
SortVnpByGssp(VoidPtr ptr1,VoidPtr ptr2)10079 static int LIBCALLBACK SortVnpByGssp (VoidPtr ptr1, VoidPtr ptr2)
10080
10081 {
10082 int compare;
10083 GosStrucPtr gsp1, gsp2;
10084 ValNodePtr vnp1, vnp2;
10085
10086 if (ptr1 == NULL || ptr2 == NULL) return 0;
10087 vnp1 = *((ValNodePtr PNTR) ptr1);
10088 vnp2 = *((ValNodePtr PNTR) ptr2);
10089 if (vnp1 == NULL || vnp2 == NULL) return 0;
10090 gsp1 = (GosStrucPtr) vnp1->data.ptrvalue;
10091 gsp2 = (GosStrucPtr) vnp2->data.ptrvalue;
10092 if (gsp1 == NULL || gsp2 == NULL) return 0;
10093
10094 compare = StringICmp (gsp1->goid, gsp2->goid);
10095 if (compare > 0) {
10096 return 1;
10097 } else if (compare < 0) {
10098 return -1;
10099 }
10100
10101 compare = StringICmp (gsp1->term, gsp2->term);
10102 if (compare > 0) {
10103 return 1;
10104 } else if (compare < 0) {
10105 return -1;
10106 }
10107
10108 compare = StringICmp (gsp1->evidence, gsp2->evidence);
10109 if (compare > 0) {
10110 return 1;
10111 } else if (compare < 0) {
10112 return -1;
10113 }
10114
10115 if (gsp1->pmid == 0) return 1;
10116 if (gsp2->pmid == 0) return -1;
10117 if (gsp1->pmid > gsp2->pmid) {
10118 return 1;
10119 } else if (gsp1->pmid < gsp2->pmid) {
10120 return -1;
10121 }
10122
10123 return 0;
10124 }
10125
10126 static CharPtr bsecGoQualType [] = {
10127 "", "Process", "Component", "Function", NULL
10128 };
10129
10130 static CharPtr bsecGoFieldType [] = {
10131 "", "text string", "go id", "pubmed id", "go ref", "evidence", NULL
10132 };
10133
SortGoTerms(UserFieldPtr entryhead)10134 static UserFieldPtr SortGoTerms (
10135 UserFieldPtr entryhead
10136 )
10137
10138 {
10139 UserFieldPtr entry, topufp, ufp, lastufp;
10140 CharPtr evidence, goid, goref, textstr;
10141 Char gid [32];
10142 GosStrucPtr gsp, lastgsp;
10143 ValNodePtr head = NULL, vnp;
10144 Int2 j;
10145 ObjectIdPtr oip;
10146 Int4 pmid;
10147
10148 if (entryhead == NULL) return entryhead;
10149
10150 for (entry = entryhead; entry != NULL; entry = entry->next) {
10151 if (entry == NULL || entry->choice != 11) break;
10152 topufp = (UserFieldPtr) entry->data.ptrvalue;
10153 if (topufp == NULL) continue;
10154
10155 textstr = NULL;
10156 evidence = NULL;
10157 goid = NULL;
10158 goref = NULL;
10159 pmid = 0;
10160 for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
10161 oip = ufp->label;
10162 if (oip == NULL) continue;
10163 for (j = 0; bsecGoFieldType [j] != NULL; j++) {
10164 if (StringICmp (oip->str, bsecGoFieldType [j]) == 0) break;
10165 }
10166 if (bsecGoFieldType [j] == NULL) continue;
10167 switch (j) {
10168 case 1 :
10169 if (ufp->choice == 1) {
10170 textstr = (CharPtr) ufp->data.ptrvalue;
10171 }
10172 break;
10173 case 2 :
10174 if (ufp->choice == 1) {
10175 goid = (CharPtr) ufp->data.ptrvalue;
10176 } else if (ufp->choice == 2) {
10177 sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
10178 goid = (CharPtr) gid;
10179 }
10180 break;
10181 case 3 :
10182 if (ufp->choice == 2) {
10183 pmid = (Int4) ufp->data.intvalue;
10184 }
10185 break;
10186 case 4 :
10187 if (ufp->choice == 1) {
10188 goref = (CharPtr) ufp->data.ptrvalue;
10189 }
10190 break;
10191 case 5 :
10192 if (ufp->choice == 1) {
10193 evidence = (CharPtr) ufp->data.ptrvalue;
10194 }
10195 break;
10196 default :
10197 break;
10198 }
10199 }
10200
10201 if (StringDoesHaveText (textstr)) {
10202 gsp = (GosStrucPtr) MemNew (sizeof (GosStruc));
10203 if (gsp != NULL) {
10204 gsp->term = textstr;
10205 StringNCpy_0 (gsp->goid, goid, sizeof (gsp->goid));
10206 gsp->evidence = evidence;
10207 gsp->pmid = pmid;
10208 gsp->goref = goref;
10209 gsp->ufp = entry;
10210 ValNodeAddPointer (&head, 0, (Pointer) gsp);
10211 }
10212 }
10213 }
10214
10215 if (head == NULL) return entryhead;
10216 head = ValNodeSort (head, SortVnpByGssp);
10217
10218 entryhead = NULL;
10219 lastgsp = NULL;
10220 lastufp = NULL;
10221 for (vnp = head; vnp != NULL; vnp = vnp->next) {
10222 gsp = (GosStrucPtr) vnp->data.ptrvalue;
10223 if (gsp == NULL || gsp->ufp == NULL) continue;
10224 if (lastgsp != NULL &&
10225 (StringICmp (gsp->term, lastgsp->term) == 0 || StringICmp (gsp->goid, lastgsp->goid) == 0) &&
10226 (gsp->pmid == lastgsp->pmid &&
10227 StringICmp (gsp->goref, lastgsp->goref) == 0 &&
10228 StringICmp (gsp->evidence, lastgsp->evidence) == 0)) {
10229 gsp->ufp->next = NULL;
10230 UserFieldFree (gsp->ufp);
10231 } else {
10232 if (lastufp != NULL) {
10233 lastufp->next = gsp->ufp;
10234 } else {
10235 entryhead = gsp->ufp;
10236 }
10237 lastufp = gsp->ufp;
10238 lastufp->next = NULL;
10239 }
10240 lastgsp = gsp;
10241 }
10242
10243 ValNodeFreeData (head);
10244
10245 return entryhead;
10246 }
10247
SortGoTermsUfp(UserFieldPtr ufp,Pointer userdata)10248 static void SortGoTermsUfp (
10249 UserFieldPtr ufp,
10250 Pointer userdata
10251 )
10252
10253 {
10254 UserFieldPtr entry;
10255 Int2 i;
10256 ObjectIdPtr oip;
10257
10258 if (ufp == NULL || ufp->choice != 11) return;
10259 oip = ufp->label;
10260 if (oip == NULL) return;
10261 for (i = 0; bsecGoQualType [i] != NULL; i++) {
10262 if (StringICmp (oip->str, bsecGoQualType [i]) == 0) break;
10263 }
10264 if (bsecGoQualType [i] == NULL) return;
10265
10266 entry = ufp->data.ptrvalue;
10267 if (entry == NULL || entry->choice != 11) return;
10268
10269 ufp->data.ptrvalue = SortGoTerms (entry);
10270 }
10271
SortGoTermsSfp(UserObjectPtr uop,Pointer userdata)10272 static void SortGoTermsSfp (
10273 UserObjectPtr uop,
10274 Pointer userdata
10275 )
10276
10277 {
10278 ObjectIdPtr oip;
10279
10280 if (uop == NULL) return;
10281 oip = uop->type;
10282 if (oip == NULL) return;
10283 if (StringCmp (oip->str, "GeneOntology") == 0) {
10284 VisitUserFieldsInUop (uop, userdata, SortGoTermsUfp);
10285 }
10286 }
10287
CleanupGoTerms(UserFieldPtr entryhead)10288 static void CleanupGoTerms (
10289 UserFieldPtr entryhead
10290 )
10291
10292 {
10293 UserFieldPtr entry, topufp, ufp;
10294 CharPtr goid, goref, str;
10295 Int2 j;
10296 ObjectIdPtr oip;
10297
10298 if (entryhead == NULL) return;
10299
10300 for (entry = entryhead; entry != NULL; entry = entry->next) {
10301 if (entry == NULL || entry->choice != 11) break;
10302 topufp = (UserFieldPtr) entry->data.ptrvalue;
10303 if (topufp == NULL) continue;
10304
10305 goid = NULL;
10306 goref = NULL;
10307 for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
10308 oip = ufp->label;
10309 if (oip == NULL) continue;
10310 for (j = 0; bsecGoFieldType [j] != NULL; j++) {
10311 if (StringICmp (oip->str, bsecGoFieldType [j]) == 0) break;
10312 }
10313 if (bsecGoFieldType [j] == NULL) continue;
10314 switch (j) {
10315 case 2 :
10316 if (ufp->choice == 1) {
10317 goid = (CharPtr) ufp->data.ptrvalue;
10318 if (goid != NULL && *goid != '\0') {
10319 if (StringNICmp (goid, "GO:", 3) == 0) {
10320 str = StringSave (goid + 3);
10321 ufp->data.ptrvalue = (Pointer) str;
10322 MemFree (goid);
10323 }
10324 }
10325 }
10326 break;
10327 case 4 :
10328 if (ufp->choice == 1) {
10329 goref = (CharPtr) ufp->data.ptrvalue;
10330 if (goref != NULL && *goref != '\0') {
10331 if (StringNICmp (goref, "GO_REF:", 7) == 0) {
10332 str = StringSave (goref + 7);
10333 ufp->data.ptrvalue = (Pointer) str;
10334 MemFree (goref);
10335 }
10336 }
10337 }
10338 break;
10339 default :
10340 break;
10341 }
10342 }
10343 }
10344 }
10345
CleanupGoTermsUfp(UserFieldPtr ufp,Pointer userdata)10346 static void CleanupGoTermsUfp (
10347 UserFieldPtr ufp,
10348 Pointer userdata
10349 )
10350
10351 {
10352 UserFieldPtr entry;
10353 Int2 i;
10354 ObjectIdPtr oip;
10355
10356 if (ufp == NULL || ufp->choice != 11) return;
10357 oip = ufp->label;
10358 if (oip == NULL) return;
10359 for (i = 0; bsecGoQualType [i] != NULL; i++) {
10360 if (StringICmp (oip->str, bsecGoQualType [i]) == 0) break;
10361 }
10362 if (bsecGoQualType [i] == NULL) return;
10363
10364 entry = ufp->data.ptrvalue;
10365 if (entry == NULL || entry->choice != 11) return;
10366
10367 CleanupGoTerms (entry);
10368 }
10369
CleanupGoTermsSfp(UserObjectPtr uop,Pointer userdata)10370 static void CleanupGoTermsSfp (
10371 UserObjectPtr uop,
10372 Pointer userdata
10373 )
10374
10375 {
10376 ObjectIdPtr oip;
10377
10378 if (uop == NULL) return;
10379 oip = uop->type;
10380 if (oip == NULL) return;
10381 if (StringCmp (oip->str, "GeneOntology") == 0) {
10382 VisitUserFieldsInUop (uop, userdata, CleanupGoTermsUfp);
10383 }
10384 }
10385
CleanUpSgml(CharPtr str)10386 static CharPtr CleanUpSgml (
10387 CharPtr str
10388 )
10389
10390 {
10391 Int2 ascii_len;
10392 Char buf [256];
10393 CharPtr ptr;
10394
10395 if (StringHasNoText (str)) return NULL;
10396 if (StringChr (str, '&') == NULL) return NULL;
10397
10398 ascii_len = Sgml2AsciiLen (str);
10399 if (ascii_len + 2 >= sizeof (buf)) return NULL;
10400
10401 buf [0] = '\0';
10402 Sgml2Ascii (str, buf, ascii_len + 1);
10403 if (StringHasNoText (buf)) return NULL;
10404 if (StringCmp (str, buf) == 0) return NULL;
10405
10406 ptr = StringChr (buf, '<');
10407 if (ptr != NULL) {
10408 *ptr = ' ';
10409 }
10410 ptr = StringChr (buf, '>');
10411 if (ptr != NULL) {
10412 *ptr = ' ';
10413 }
10414 TrimSpacesAroundString (buf);
10415 Asn2gnbkCompressSpaces (buf);
10416
10417 return StringSave (buf);
10418 }
10419
10420 /* special exception for genome pipeline rRNA names */
10421
NotExceptedRibosomalName(CharPtr name)10422 static Boolean NotExceptedRibosomalName (
10423 CharPtr name
10424 )
10425
10426 {
10427 Char ch;
10428 CharPtr str;
10429
10430 str = StringStr (name, " ribosomal");
10431 if (str == NULL) return FALSE;
10432
10433 str += 10;
10434 ch = *str;
10435 while (ch != '\0') {
10436 if (ch == ' ' || IS_DIGIT (ch)) {
10437 /* okay */
10438 } else {
10439 return TRUE;
10440 }
10441 str++;
10442 ch = *str;
10443 }
10444
10445 return FALSE;
10446 }
10447
10448 //LCOV_EXCL_START
CleanupSubSourceOrgModOtherFeat(SeqFeatPtr sfp,Pointer userdata)10449 NLM_EXTERN void CleanupSubSourceOrgModOtherFeat (
10450 SeqFeatPtr sfp,
10451 Pointer userdata
10452 )
10453
10454 {
10455 BioSourcePtr biop;
10456 OrgNamePtr onp = NULL;
10457 OrgRefPtr orp;
10458
10459 if (sfp == NULL) return;
10460 if (sfp->data.choice != SEQFEAT_BIOSRC) return;
10461 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
10462 if (biop == NULL) return;
10463 orp = biop->org;
10464 if (orp != NULL) {
10465 onp = orp->orgname;
10466 if (orp != NULL) {
10467 CleanupOrgModOther (biop, onp);
10468 }
10469 }
10470 CleanupSubSourceOther (biop, onp);
10471 }
10472
CleanupSubSourceOrgModOtherDesc(SeqDescrPtr sdp,Pointer userdata)10473 NLM_EXTERN void CleanupSubSourceOrgModOtherDesc (
10474 SeqDescrPtr sdp,
10475 Pointer userdata
10476 )
10477
10478 {
10479 BioSourcePtr biop;
10480 OrgNamePtr onp = NULL;
10481 OrgRefPtr orp;
10482
10483 if (sdp == NULL) return;
10484 if (sdp->choice != Seq_descr_source) return;
10485 biop = (BioSourcePtr) sdp->data.ptrvalue;
10486 if (biop == NULL) return;
10487 orp = biop->org;
10488 if (orp != NULL) {
10489 onp = orp->orgname;
10490 if (orp != NULL) {
10491 CleanupOrgModOther (biop, onp);
10492 }
10493 }
10494 CleanupSubSourceOther (biop, onp);
10495 }
10496 //LCOV_EXCL_STOP
10497
10498
10499 typedef struct xmltable {
10500 CharPtr code;
10501 size_t len;
10502 CharPtr letter;
10503 } XmlTable, PNTR XmlTablePtr;
10504
10505 static XmlTable xmlunicodes [] = {
10506 { "&", 4, "&"},
10507 { "&apos", 5, "\'"},
10508 { ">", 3, ">"},
10509 { "<", 3, "<"},
10510 { """, 5, "\""},
10511 { "
", 8, ""},
10512 { "Δ", 5, "Delta"},
10513 { "α", 5, "alpha"},
10514 { "β", 5, "beta"},
10515 { "γ", 5, "gamma"},
10516 { "θ", 5, "theta"},
10517 { "λ", 5, "lambda"},
10518 { "μ", 5, "mu"},
10519 { "ν", 5, "nu"},
10520 { " ", 6, " "},
10521 { "‎", 6, ""},
10522 { "′", 6, "'"},
10523 { "→", 6, "->"},
10524 { "−", 6, "-"},
10525 { "∆", 6, "delta"},
10526 { "fi", 7, "fi"},
10527 { "fl", 7, "fl"},
10528 { ",", 7, ","},
10529 { NULL, 0, ""}
10530 };
10531
BSECDecodeXml(CharPtr str)10532 static CharPtr BSECDecodeXml (
10533 CharPtr str
10534 )
10535
10536 {
10537 Char ch, nxt;
10538 CharPtr dst, ptr, src;
10539 Int2 i;
10540 size_t len;
10541 XmlTablePtr xtp;
10542
10543 if (StringHasNoText (str)) return str;
10544
10545 src = str;
10546 dst = str;
10547 ch = *src;
10548 while (ch != '\0') {
10549 if (ch == '&') {
10550 xtp = NULL;
10551 len = 1;
10552 for (i = 0; xmlunicodes [i].code != NULL; i++) {
10553 if (StringNICmp (src, xmlunicodes [i].code, xmlunicodes [i].len) == 0) {
10554 nxt = *(src +xmlunicodes [i].len);
10555 if (nxt == ';') {
10556 xtp = &(xmlunicodes [i]);
10557 len = xtp->len + 1;
10558 break;
10559 } else if (nxt == ' ' || nxt == '\0') {
10560 xtp = &(xmlunicodes [i]);
10561 len = xtp->len;
10562 break;
10563 }
10564 }
10565 }
10566 if (xtp != NULL) {
10567 if (StringLen (xtp->letter) > 0) {
10568 ptr = xtp->letter;
10569 ch = *ptr;
10570 while (ch != '\0') {
10571 *dst = ch;
10572 dst++;
10573 ptr++;
10574 ch = *ptr;
10575 }
10576 }
10577 src += len;
10578 } else {
10579 *dst = ch;
10580 dst++;
10581 src++;
10582 }
10583 } else {
10584 *dst = ch;
10585 dst++;
10586 src++;
10587 }
10588 ch = *src;
10589 }
10590 *dst = '\0';
10591
10592 return str;
10593 }
10594
CleanupFeatureStrings(SeqFeatPtr sfp,Boolean isJscan,Boolean isEmblOrDdbj,Boolean stripSerial,Boolean modernizeFeats,ValNodePtr PNTR publist)10595 static void CleanupFeatureStrings (
10596 SeqFeatPtr sfp,
10597 Boolean isJscan,
10598 Boolean isEmblOrDdbj,
10599 Boolean stripSerial,
10600 Boolean modernizeFeats,
10601 ValNodePtr PNTR publist
10602 )
10603
10604 {
10605 Uint1 aa;
10606 BioSourcePtr biop;
10607 Char ch;
10608 Uint1 codon [6];
10609 GeneNomenclaturePtr gnp;
10610 GeneRefPtr grp;
10611 ImpFeatPtr ifp;
10612 Boolean is_fMet = FALSE;
10613 Boolean is_iMet = FALSE;
10614 Int2 j;
10615 Boolean justTrnaText;
10616 size_t len;
10617 CharPtr name;
10618 ObjectIdPtr oip;
10619 OrgNamePtr onp = NULL;
10620 OrgRefPtr orp;
10621 PubdescPtr pdp;
10622 ProtRefPtr prp;
10623 CharPtr ptr;
10624 RNAGenPtr rgp;
10625 RNAQualPtr rqp;
10626 RnaRefPtr rrp;
10627 SubSourcePtr ssp;
10628 CharPtr str;
10629 CharPtr suff;
10630 CharPtr temp;
10631 Char tmp [64];
10632 Boolean trimming_junk;
10633 tRNAPtr trp;
10634 UserFieldPtr ufp;
10635 UserObjectPtr uop;
10636 CharPtr val;
10637 ValNodePtr vnp, vnp2;
10638 SeqFeatXrefPtr xref;
10639
10640 if (sfp == NULL) return;
10641 BSECDecodeXml (sfp->comment);
10642 CleanVisStringAndCompress (&(sfp->comment));
10643 len = StringLen (sfp->comment);
10644 if (len > 4) {
10645 if (StringCmp (sfp->comment + len - 3, ",..") == 0 ||
10646 StringCmp (sfp->comment + len - 3, ".,.") == 0 ||
10647 StringCmp (sfp->comment + len - 3, "..,") == 0 ||
10648 StringCmp (sfp->comment + len - 3, ",.,") == 0) {
10649 sfp->comment [len - 3] = '.';
10650 sfp->comment [len - 2] = '.';
10651 sfp->comment [len - 1] = '.';
10652 }
10653 }
10654 BSECDecodeXml (sfp->title);
10655 CleanVisString (&(sfp->title));
10656 CleanVisString (&(sfp->except_text));
10657 if (StringDoesHaveText (sfp->except_text)) {
10658 CleanUpExceptText (sfp);
10659 }
10660 CleanDoubleQuote (sfp->comment);
10661 if (StringCmp (sfp->comment, ".") == 0) {
10662 sfp->comment = MemFree (sfp->comment);
10663 }
10664 /*
10665 if (sfp->ext != NULL) {
10666 VisitUserObjectsInUop (sfp->ext, NULL, SortGoTermsSfp);
10667 }
10668 */
10669 if (sfp->ext != NULL) {
10670 VisitUserObjectsInUop (sfp->ext, NULL, CleanupGoTermsSfp);
10671 }
10672
10673 for (xref = sfp->xref; xref != NULL; xref = xref->next) {
10674 if (xref->data.choice != SEQFEAT_PROT) continue;
10675 prp = (ProtRefPtr) xref->data.value.ptrvalue;
10676 if (prp == NULL) continue;
10677 RemoveFlankingQuotes (&(prp->desc));
10678 RemoveFlankingQuotesList (&(prp->name));
10679 CleanVisStringAndCompress (&(prp->desc));
10680 CleanVisStringListAndCompress (&(prp->name));
10681 }
10682
10683 switch (sfp->data.choice) {
10684 case SEQFEAT_BOND :
10685 case SEQFEAT_PSEC_STR :
10686 case SEQFEAT_COMMENT:
10687 return;
10688 case SEQFEAT_SITE :
10689 for (j = 0; bsecSiteList [j] != NULL; j++) {
10690 StringNCpy_0 (tmp, bsecSiteList [j], sizeof (tmp));
10691 len = StringLen (tmp);
10692 if (StringNICmp (sfp->comment, tmp, len) == 0) {
10693 if (sfp->data.value.intvalue == 0 || sfp->data.value.intvalue == 255) {
10694 sfp->data.value.intvalue = j;
10695 if (StringHasNoText (sfp->comment + len) || StringICmp (sfp->comment + len, " site") == 0) {
10696 sfp->comment = MemFree (sfp->comment);
10697 }
10698 }
10699 } else {
10700 val = tmp;
10701 ch = *val;
10702 while (ch != '\0') {
10703 if (ch == '-') {
10704 *val = ' ';
10705 }
10706 val++;
10707 ch = *val;
10708 }
10709 if (StringNICmp (sfp->comment, tmp, len) == 0) {
10710 if (sfp->data.value.intvalue == 0 || sfp->data.value.intvalue == 255) {
10711 sfp->data.value.intvalue = j;
10712 if (StringHasNoText (sfp->comment + len) || StringICmp (sfp->comment + len, " site") == 0) {
10713 sfp->comment = MemFree (sfp->comment);
10714 }
10715 }
10716 }
10717 }
10718 }
10719 break;
10720 default :
10721 break;
10722 }
10723 if (sfp->data.value.ptrvalue == NULL) return;
10724
10725 biop = NULL;
10726 orp = NULL;
10727 switch (sfp->data.choice) {
10728 case SEQFEAT_ORG :
10729 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
10730 break;
10731 case SEQFEAT_BIOSRC :
10732 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
10733 if (biop != NULL) {
10734 orp = biop->org;
10735 }
10736 default :
10737 break;
10738 }
10739 if (orp != NULL && sfp->qual != NULL) {
10740 GbqualToOrpMod (&(sfp->qual), &(orp->mod));
10741 }
10742
10743 biop = NULL;
10744 orp = NULL;
10745 switch (sfp->data.choice) {
10746 case SEQFEAT_GENE :
10747 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
10748 if (sfp->xref != NULL) {
10749 HandleXrefOnGene (sfp);
10750 }
10751 BSECDecodeXml (grp->locus);
10752 CleanVisStringAndCompress (&(grp->locus));
10753 /*
10754 if (isJscan && StringDoesHaveText (grp->locus)) {
10755 ptr = CleanUpSgml (grp->locus);
10756 if (ptr != NULL) {
10757 grp->locus = MemFree (grp->locus);
10758 grp->locus = StringSave (ptr);
10759 }
10760 }
10761 */
10762 CleanVisString (&(grp->allele));
10763 CleanVisStringAndCompress (&(grp->desc));
10764 CleanVisString (&(grp->maploc));
10765 CleanVisString (&(grp->locus_tag));
10766 ExpandGeneSynList (grp);
10767 /*
10768 if (isJscan && grp->syn != NULL) {
10769 for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
10770 str = (CharPtr) vnp->data.ptrvalue;
10771 if (StringHasNoText (str)) continue;
10772 ptr = CleanUpSgml (str);
10773 if (ptr != NULL) {
10774 vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
10775 vnp->data.ptrvalue = StringSave (ptr);
10776 }
10777 }
10778 }
10779 */
10780 for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
10781 str = (CharPtr) vnp->data.ptrvalue;
10782 if (StringHasNoText (str)) continue;
10783 BSECDecodeXml (str);
10784 }
10785 CleanVisStringListCaseSensitive (&(grp->syn));
10786 grp->syn = ValNodeSort (grp->syn, SortVnpByStringCS);
10787 grp->syn = UniqueStringValNodeCS (grp->syn);
10788 grp->syn = ValNodeSort (grp->syn, SortVnpByStringCILCFirst);
10789 CleanDoubleQuote (grp->locus);
10790 CleanDoubleQuote (grp->allele);
10791 CleanDoubleQuote (grp->desc);
10792 /*
10793 if (isJscan && StringDoesHaveText (grp->desc)) {
10794 ptr = CleanUpSgml (grp->desc);
10795 if (ptr != NULL) {
10796 grp->desc = MemFree (grp->desc);
10797 grp->desc = StringSave (ptr);
10798 }
10799 }
10800 */
10801 CleanDoubleQuote (grp->maploc);
10802 CleanDoubleQuote (grp->locus_tag);
10803 CleanDoubleQuoteList (grp->syn);
10804 FixOldDbxrefs (grp->db, isEmblOrDdbj);
10805 FixNumericDbxrefs (grp->db);
10806 grp->db = ValNodeSort (grp->db, SortDbxref);
10807 CleanupDuplicateDbxrefs (&(grp->db));
10808 CleanupObsoleteDbxrefs (&(grp->db));
10809 CleanupGoDbxrefs (grp->db);
10810 /* now move grp->dbxref to sfp->dbxref */
10811 vnp = grp->db;
10812 grp->db = NULL;
10813 ValNodeLink ((&sfp->dbxref), vnp);
10814 if (grp->locus != NULL && grp->syn != NULL) {
10815 for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
10816 str = (CharPtr) vnp->data.ptrvalue;
10817 if (StringCmp (grp->locus, str) == 0) {
10818 vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
10819 }
10820 }
10821 CleanVisStringListCaseSensitive (&(grp->syn));
10822 }
10823 gnp = grp->formal_name;
10824 if (gnp != NULL) {
10825 FixOldDbxref (gnp->source);
10826 FixNumericDbxref (gnp->source);
10827 }
10828 /*
10829 if (grp->locus != NULL && sfp->comment != NULL) {
10830 if (StringCmp (grp->locus, sfp->comment) == 0) {
10831 sfp->comment = MemFree (sfp->comment);
10832 }
10833 }
10834 */
10835 break;
10836 case SEQFEAT_ORG :
10837 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
10838 break;
10839 case SEQFEAT_CDREGION :
10840 if (sfp->xref != NULL && sfp->product != NULL) {
10841 HandleXrefOnCDS (sfp);
10842 }
10843 break;
10844 case SEQFEAT_PROT :
10845 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
10846 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
10847 str = (CharPtr) vnp->data.ptrvalue;
10848 if (StringHasNoText (str)) continue;
10849 CleanupECNumber (str);
10850 if (ECNumberCanBeSplit (str)) {
10851 ptr = str;
10852 ch = *ptr;
10853 while (ch != '\0' && ch != ' ' && ch != ';') {
10854 ptr++;
10855 ch = *ptr;
10856 }
10857 if (ch != '\0') {
10858 *ptr = '\0';
10859 ptr++;
10860 vnp2 = ValNodeCopyStr (NULL, 0, ptr);
10861 if (vnp2 != NULL) {
10862 vnp2->next = vnp->next;
10863 vnp->next = vnp2;
10864 }
10865 }
10866 }
10867 }
10868 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
10869 str = (CharPtr) vnp->data.ptrvalue;
10870 if (StringHasNoText (str)) continue;
10871 BSECDecodeXml (str);
10872 }
10873 BSECDecodeXml (prp->desc);
10874 CleanVisStringAndCompress (&(prp->desc));
10875 CleanVisStringJunkListAndCompress (&(prp->name));
10876 CleanVisStringList (&(prp->ec));
10877 CleanVisStringJunkListAndCompress (&(prp->activity));
10878 CleanDoubleQuote (prp->desc);
10879 CleanDoubleQuoteList (prp->name);
10880 CleanDoubleQuoteList (prp->ec);
10881 CleanDoubleQuoteList (prp->activity);
10882 RemoveFlankingQuotes (&(prp->desc));
10883 RemoveFlankingQuotesList (&(prp->name));
10884 FixOldDbxrefs (prp->db, isEmblOrDdbj);
10885 FixNumericDbxrefs (prp->db);
10886 prp->db = ValNodeSort (prp->db, SortDbxref);
10887 CleanupDuplicateDbxrefs (&(prp->db));
10888 CleanupObsoleteDbxrefs (&(prp->db));
10889 CleanupGoDbxrefs (prp->db);
10890 /* now move prp->dbxref to sfp->dbxref */
10891 vnp = prp->db;
10892 prp->db = NULL;
10893 ValNodeLink ((&sfp->dbxref), vnp);
10894 if (prp->processed != 3 && prp->processed != 4 && prp->processed != 5 &&
10895 prp->name == NULL && sfp->comment != NULL) {
10896 if (StringICmp (sfp->comment, "putative") != 0) {
10897 ValNodeAddStr (&(prp->name), 0, sfp->comment);
10898 sfp->comment = NULL;
10899 }
10900 }
10901 if (prp->processed == 3 || prp->processed == 4 || prp->processed == 5) {
10902 if (prp->name != NULL) {
10903 str = (CharPtr) prp->name->data.ptrvalue;
10904 if ((StringStr (str, "putative") != NULL ||
10905 StringStr (str, "put. ") != NULL) &&
10906 sfp->comment == NULL) {
10907 sfp->comment = StringSave ("putative");
10908 }
10909 if (! InformativeString (str)) {
10910 prp->name = ValNodeFreeData (prp->name);
10911 }
10912 }
10913 }
10914 if ((prp->processed == 1 || prp->processed == 2) && prp->name == NULL) {
10915 ValNodeCopyStr (&(prp->name), 0, "unnamed");
10916 }
10917 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
10918 str = (CharPtr) vnp->data.ptrvalue;
10919 if (StringICmp (str, "RbcL") == 0 || StringICmp (str, "rubisco large subunit") == 0) {
10920 vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit");
10921 MemFree (str);
10922 } else if (StringICmp (str, "RbcS") == 0 || StringICmp (str, "rubisco small subunit") == 0) {
10923 vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit");
10924 MemFree (str);
10925 }
10926 }
10927 /*
10928 if (StringDoesHaveText (prp->desc)) {
10929 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
10930 str = (CharPtr) vnp->data.ptrvalue;
10931 if (StringHasNoText (str)) continue;
10932 if (StringCmp (prp->desc, str) == 0) {
10933 prp->desc = MemFree (prp->desc);
10934 }
10935 }
10936 }
10937 */
10938 break;
10939 case SEQFEAT_RNA :
10940 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
10941 if (rrp->ext.choice == 1) {
10942 BSECDecodeXml ((CharPtr) rrp->ext.value.ptrvalue);
10943 str = (CharPtr) rrp->ext.value.ptrvalue;
10944 CleanVisStringAndCompress ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
10945 CleanDoubleQuote ((CharPtr) rrp->ext.value.ptrvalue);
10946 RemoveFlankingQuotes ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
10947 if (rrp->ext.value.ptrvalue == NULL) {
10948 rrp->ext.choice = 0;
10949 } else if (rrp->type == 4) {
10950 name = (CharPtr) rrp->ext.value.ptrvalue;
10951 len = StringLen (name);
10952 if (len > 5) {
10953 if (len > 16 && StringNICmp (name + len - 16, " ribosomal RNA .", 14) == 0) {
10954 name [len-2] = '\0';
10955 len = StringLen (name);
10956 }
10957 if (len > 14 && StringNICmp (name + len - 14, " ribosomal rRNA", 14) == 0) {
10958 } else if (StringNICmp (name + len - 5, " rRNA", 5) == 0) {
10959 str = MemNew (len + 10);
10960 if (str != NULL) {
10961 StringNCpy (str, name, len - 5);
10962 StringCat (str, " ribosomal RNA");
10963 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10964 rrp->ext.value.ptrvalue = (Pointer) str;
10965 }
10966 } else if (StringNICmp (name + len - 5, "_rRNA", 5) == 0) {
10967 str = MemNew (len + 10);
10968 if (str != NULL) {
10969 StringNCpy (str, name, len - 5);
10970 StringCat (str, " ribosomal RNA");
10971 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10972 rrp->ext.value.ptrvalue = (Pointer) str;
10973 }
10974 }
10975 }
10976 } else if (rrp->type == 3) {
10977 name = (CharPtr) rrp->ext.value.ptrvalue;
10978 aa = ParseTRnaString (name, &justTrnaText, codon, FALSE);
10979 if (aa != 0) {
10980 is_fMet = (Boolean) (StringStr (name, "fMet") != NULL);
10981 is_iMet = (Boolean) (StringStr (name, "iMet") != NULL);
10982 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10983 trp = (tRNAPtr) MemNew (sizeof (tRNA));
10984 if (trp != NULL) {
10985 trp->aatype = 2;
10986 for (j = 0; j < 6; j++) {
10987 trp->codon [j] = 255;
10988 }
10989 if (justTrnaText) {
10990 for (j = 0; j < 6; j++) {
10991 trp->codon [j] = codon [j];
10992 }
10993 }
10994 trp->aa = aa;
10995 rrp->ext.choice = 2;
10996 rrp->ext.value.ptrvalue = (Pointer) trp;
10997 CleanupTrna (sfp, trp);
10998 }
10999 if (is_fMet) {
11000 if (sfp->comment == NULL) {
11001 sfp->comment = StringSave ("fMet");
11002 } else {
11003 len = StringLen (sfp->comment) + StringLen ("fMet") + 5;
11004 str = MemNew (sizeof (Char) * len);
11005 StringCpy (str, sfp->comment);
11006 StringCat (str, "; ");
11007 StringCat (str, "fMet");
11008 sfp->comment = MemFree (sfp->comment);
11009 sfp->comment = str;
11010 }
11011 }
11012 if (is_iMet) {
11013 if (sfp->comment == NULL) {
11014 sfp->comment = StringSave ("iMet");
11015 } else {
11016 len = StringLen (sfp->comment) + StringLen ("iMet") + 5;
11017 str = MemNew (sizeof (Char) * len);
11018 StringCpy (str, sfp->comment);
11019 StringCat (str, "; ");
11020 StringCat (str, "iMet");
11021 sfp->comment = MemFree (sfp->comment);
11022 sfp->comment = str;
11023 }
11024 }
11025 }
11026 }
11027 } else if (rrp->ext.choice == 2) {
11028 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
11029 CleanupTrna (sfp, trp);
11030 } else if (rrp->type == 3 && (! StringHasNoText (sfp->comment))) {
11031 aa = ParseTRnaString (sfp->comment, &justTrnaText, codon, TRUE);
11032 if (aa != 0) {
11033 trp = (tRNAPtr) MemNew (sizeof (tRNA));
11034 if (trp != NULL) {
11035 trp->aatype = 2;
11036 for (j = 0; j < 6; j++) {
11037 trp->codon [j] = 255;
11038 }
11039 if (justTrnaText) {
11040 for (j = 0; j < 6; j++) {
11041 trp->codon [j] = codon [j];
11042 }
11043 }
11044 trp->aa = aa;
11045 rrp->ext.choice = 2;
11046 rrp->ext.value.ptrvalue = (Pointer) trp;
11047 if (justTrnaText) {
11048 if (StringCmp (sfp->comment, "tRNA-fMet") != 0 &&
11049 StringCmp (sfp->comment, "fMet") != 0 &&
11050 StringCmp (sfp->comment, "fMet tRNA") != 0 &&
11051 StringCmp (sfp->comment, "fMet-tRNA") != 0) {
11052 sfp->comment = MemFree (sfp->comment);
11053 } else {
11054 sfp->comment = MemFree (sfp->comment);
11055 sfp->comment = StringSave ("fMet");
11056 }
11057 if (StringCmp (sfp->comment, "tRNA-iMet") != 0 &&
11058 StringCmp (sfp->comment, "iMet") != 0 &&
11059 StringCmp (sfp->comment, "iMet tRNA") != 0 &&
11060 StringCmp (sfp->comment, "iMet-tRNA") != 0) {
11061 sfp->comment = MemFree (sfp->comment);
11062 } else {
11063 sfp->comment = MemFree (sfp->comment);
11064 sfp->comment = StringSave ("iMet");
11065 }
11066 }
11067 }
11068 }
11069 }
11070 if (rrp->ext.choice == 3) {
11071 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
11072 if (rgp != NULL) {
11073 str = (CharPtr) rgp->product;
11074 CleanVisStringAndCompress (&(rgp->product));
11075 CleanDoubleQuote (rgp->product);
11076 RemoveFlankingQuotes (&(rgp->product));
11077 if (StringICmp (rgp->product, "internal transcribed spacer 1 (ITS1)") == 0) {
11078 rgp->product = MemFree (rgp->product);
11079 rgp->product = StringSave ("internal transcribed spacer 1");
11080 } else if (StringICmp (rgp->product, "internal transcribed spacer 2 (ITS2)") == 0) {
11081 rgp->product = MemFree (rgp->product);
11082 rgp->product = StringSave ("internal transcribed spacer 2");
11083 } else if (StringICmp (rgp->product, "internal transcribed spacer 3 (ITS3)") == 0) {
11084 rgp->product = MemFree (rgp->product);
11085 rgp->product = StringSave ("internal transcribed spacer 3");
11086 }
11087 CleanVisStringAndCompress (&(rgp->_class));
11088 CleanDoubleQuote (rgp->_class);
11089 for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
11090 CleanVisStringAndCompress (&(rqp->qual));
11091 CleanDoubleQuote (rqp->qual);
11092 CleanVisStringAndCompress (&(rqp->val));
11093 CleanDoubleQuote (rqp->val);
11094 }
11095 }
11096 }
11097 if (rrp->ext.choice == 0 && sfp->comment != NULL && rrp->type == 4) {
11098 len = StringLen (sfp->comment);
11099 if (len > 15 && len < 20) {
11100 if (StringNICmp (sfp->comment + len - 15, "S ribosomal RNA", 15) == 0) {
11101 rrp->ext.choice = 1;
11102 rrp->ext.value.ptrvalue = sfp->comment;
11103 sfp->comment = NULL;
11104 }
11105 } else if (len > 6 && len < 20) {
11106 if (StringNICmp (sfp->comment + len - 6, "S rRNA", 6) == 0) {
11107 rrp->ext.choice = 1;
11108 rrp->ext.value.ptrvalue = sfp->comment;
11109 sfp->comment = NULL;
11110 }
11111 }
11112 }
11113 /*
11114 * This section has been commented out based on a request by DeAnne Cravaritis.
11115 * If left in, this causes unexpected results when RNA comments are copied to
11116 * the product name or vice versa.
11117 if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
11118 if (StringICmp ((CharPtr) rrp->ext.value.ptrvalue, sfp->comment) == 0) {
11119 sfp->comment = MemFree (sfp->comment);
11120 }
11121 }
11122 */
11123 if (rrp->type == 4 && rrp->ext.choice == 1 ) {
11124 name = (CharPtr) rrp->ext.value.ptrvalue;
11125 len = StringLen (name);
11126 if (len > 5 && NotExceptedRibosomalName (name)) {
11127 suff = NULL;
11128 str = StringStr (name, " ribosomal");
11129 if (str != NULL) {
11130 suff = str + 10;
11131 ch = *suff;
11132 if (ch != '\0' && ch != ' ') {
11133 suff = NULL;
11134 str = NULL;
11135 }
11136 }
11137 if (str == NULL) {
11138 str = StringStr (name, " rRNA");
11139 if (str != NULL) {
11140 suff = str + 5;
11141 ch = *suff;
11142 if (ch != '\0' && ch != ' ') {
11143 suff = NULL;
11144 str = NULL;
11145 }
11146 }
11147 }
11148 if (suff != NULL && StringNICmp (suff, " RNA", 4) == 0) {
11149 suff += 4;
11150 }
11151 if (suff != NULL && StringNICmp (suff, " DNA", 4) == 0) {
11152 suff += 4;
11153 }
11154 if (suff != NULL && StringNICmp (suff, " ribosomal", 10) == 0) {
11155 suff += 10;
11156 }
11157 TrimSpacesAroundString (suff);
11158 if (str != NULL) {
11159 *str = '\0';
11160 len = StringLen (name);
11161 if (StringHasNoText (suff)) {
11162 suff = NULL;
11163 }
11164 if (suff != NULL) {
11165 len += StringLen (suff) + 2;
11166 }
11167 str = MemNew (len + 15);
11168 if (str != NULL) {
11169 StringCpy (str, name);
11170 StringCat (str, " ribosomal RNA");
11171 if (suff != NULL) {
11172 ch = *suff;
11173 if (ch != ',' && ch != ';') {
11174 StringCat (str, " ");
11175 }
11176 StringCat (str, suff);
11177 }
11178 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11179 rrp->ext.value.ptrvalue = (Pointer) str;
11180 }
11181 }
11182 }
11183 name = (CharPtr) rrp->ext.value.ptrvalue;
11184 len = StringLen (name);
11185 if (len > 5) {
11186 ch = *name;
11187 while (ch != '\0' && (ch == '.' || (IS_DIGIT (ch)))) {
11188 name++;
11189 ch = *name;
11190 }
11191 /*
11192 if (ch == 's' && StringCmp (name, "s ribosomal RNA") == 0) {
11193 *name = 'S';
11194 }
11195 */
11196 if (ch == 's' && name [1] == ' ') {
11197 *name = 'S';
11198 }
11199 }
11200 StrStripSpaces ((CharPtr) rrp->ext.value.ptrvalue);
11201 name = (CharPtr) rrp->ext.value.ptrvalue;
11202 len = StringLen (name);
11203 if (len > 17) {
11204 if (StringNICmp (name + len - 17, "ribosomal RNA RNA", 17) == 0) {
11205 *(name + len - 4) = '\0';
11206 }
11207 }
11208 trimming_junk = TRUE;
11209 while (trimming_junk) {
11210 StrStripSpaces ((CharPtr) rrp->ext.value.ptrvalue);
11211 name = (CharPtr) rrp->ext.value.ptrvalue;
11212 ptr = StringStr (name, "ribosomal ribosomal");
11213 if (ptr != NULL) {
11214 suff = ptr + 19;
11215 *(ptr + 10) = '\0';
11216 temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11217 TrimSpacesAroundString (suff);
11218 StringCpy (temp, name);
11219 if (suff [0] != ' ' && suff [0] != '\0') {
11220 StringCat (temp, " ");
11221 }
11222 StringCat (temp, suff);
11223 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11224 rrp->ext.value.ptrvalue = (Pointer) temp;
11225 } else {
11226 ptr = StringStr (name, "RNA RNA");
11227 if (ptr != NULL) {
11228 suff = ptr + 7;
11229 *(ptr + 4) = '\0';
11230 temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11231 TrimSpacesAroundString (suff);
11232 StringCpy (temp, name);
11233 if (suff [0] != ' ' && suff [0] != '\0') {
11234 StringCat (temp, " ");
11235 }
11236 StringCat (temp, suff);
11237 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11238 rrp->ext.value.ptrvalue = (Pointer) temp;
11239 } else {
11240 ptr = StringStr (name, "ribosomal RNA ribosomal");
11241 if (ptr != NULL) {
11242 suff = ptr + 23;
11243 *(ptr + 14) = '\0';
11244 temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11245 TrimSpacesAroundString (suff);
11246 StringCpy (temp, name);
11247 if (suff [0] != ' ' && suff [0] != '\0') {
11248 StringCat (temp, " ");
11249 }
11250 StringCat (temp, suff);
11251 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11252 rrp->ext.value.ptrvalue = (Pointer) temp;
11253 } else {
11254 ptr = StringStr (name, "ribosomal rRNA");
11255 if (ptr != NULL) {
11256 suff = ptr + 14;
11257 *(ptr + 10) = '\0';
11258 temp = MemNew (StringLen (name) + StringLen (" RNA") + StringLen (suff) + 2);
11259 TrimSpacesAroundString (suff);
11260 StringCpy (temp, name);
11261 StringCat (temp, " RNA");
11262 if (suff [0] != ' ' && suff [0] != '\0') {
11263 StringCat (temp, " ");
11264 }
11265 StringCat (temp, suff);
11266 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11267 rrp->ext.value.ptrvalue = (Pointer) temp;
11268 } else {
11269 ptr = StringStr (name, "RNA rRNA");
11270 if (ptr != NULL) {
11271 suff = ptr + 8;
11272 *(ptr + 3) = '\0';
11273 temp = MemNew (StringLen (name) + StringLen (suff) + 2);
11274 TrimSpacesAroundString (suff);
11275 StringCpy (temp, name);
11276 if (suff [0] != ' ' && suff [0] != '\0') {
11277 StringCat (temp, " ");
11278 }
11279 StringCat (temp, suff);
11280 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11281 rrp->ext.value.ptrvalue = (Pointer) temp;
11282 } else {
11283 trimming_junk = FALSE;
11284 }
11285 }
11286 }
11287 }
11288 }
11289 }
11290 TrimSpacesAroundString ((CharPtr) rrp->ext.value.ptrvalue);
11291 /*
11292 name = (CharPtr) rrp->ext.value.ptrvalue;
11293 if (StringICmp (name, "16S rRNA. Bacterial SSU") == 0) {
11294 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11295 rrp->ext.value.ptrvalue = StringSave ("16S ribosomal RNA");
11296 } else if (StringICmp (name, "23S rRNA. Bacterial LSU") == 0) {
11297 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11298 rrp->ext.value.ptrvalue = StringSave ("23S ribosomal RNA");
11299 } else if (StringICmp (name, "5S rRNA. Bacterial TSU") == 0) {
11300 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11301 rrp->ext.value.ptrvalue = StringSave ("5S ribosomal RNA");
11302 } else if (StringICmp (name, "Large Subunit Ribosomal RNA; lsuRNA; 23S ribosomal RNA") == 0) {
11303 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11304 rrp->ext.value.ptrvalue = StringSave ("23S ribosomal RNA");
11305 } else if (StringICmp (name, "Small Subunit Ribosomal RNA; ssuRNA; 16S ribosomal RNA") == 0) {
11306 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11307 rrp->ext.value.ptrvalue = StringSave ("16S ribosomal RNA");
11308 } else if (StringICmp (name, "Small Subunit Ribosomal RNA; ssuRNA; SSU ribosomal RNA") == 0) {
11309 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11310 rrp->ext.value.ptrvalue = StringSave ("small subunit ribosomal RNA");
11311 } else if (StringICmp (name, "Large Subunit Ribosomal RNA; lsuRNA; LSU ribosomal RNA") == 0) {
11312 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
11313 rrp->ext.value.ptrvalue = StringSave ("large subunit ribosomal RNA");
11314 }
11315 */
11316 }
11317 /*
11318 if (rrp->type == 2 && rrp->ext.choice == 0 && sfp->comment != NULL) {
11319 rrp->ext.choice = 1;
11320 rrp->ext.value.ptrvalue = sfp->comment;
11321 sfp->comment = NULL;
11322 }
11323 */
11324 if (rrp->type == 2 && rrp->ext.choice == 0 && sfp->comment != NULL) {
11325 len = StringLen (sfp->comment);
11326 if (len > 5) {
11327 if (StringNICmp (sfp->comment + len - 4, " RNA", 4) == 0 ||
11328 StringNICmp (sfp->comment + len - 5, " mRNA", 5) == 0) {
11329 rrp->ext.choice = 1;
11330 rrp->ext.value.ptrvalue = sfp->comment;
11331 sfp->comment = NULL;
11332 }
11333 }
11334 }
11335 if (rrp->type == 255 || rrp->type == 10) {
11336 name = GetRNARefProductString (rrp, NULL);
11337 if (StringICmp (name, "its1") == 0 || StringICmp (name, "its 1") == 0) {
11338 SetRNARefProductString (rrp, NULL, "internal transcribed spacer 1", ExistingTextOption_replace_old);
11339 } else if (StringICmp (name, "its2") == 0 || StringICmp (name, "its 2") == 0) {
11340 SetRNARefProductString (rrp, NULL, "internal transcribed spacer 2", ExistingTextOption_replace_old);
11341 } else if (StringICmp (name, "its3") == 0 || StringICmp (name, "its 3") == 0) {
11342 SetRNARefProductString (rrp, NULL, "internal transcribed spacer 3", ExistingTextOption_replace_old);
11343 }
11344 name = MemFree (name);
11345 }
11346 if ((rrp->type == 255 || rrp->type == 10) && rrp->ext.choice == 0 && sfp->comment != NULL) {
11347 if (StringICmp (sfp->comment, "internal transcribed spacer 1") == 0 ||
11348 StringICmp (sfp->comment, "internal transcribed spacer 2") == 0 ||
11349 StringICmp (sfp->comment, "internal transcribed spacer 3") == 0) {
11350 rrp->ext.choice = 1;
11351 rrp->ext.value.ptrvalue = sfp->comment;
11352 sfp->comment = NULL;
11353 } else if (StringICmp (sfp->comment, "internal transcribed spacer 1 (ITS1)") == 0 ||
11354 StringICmp (sfp->comment, "internal transcribed spacer 2 (ITS2)") == 0 ||
11355 StringICmp (sfp->comment, "internal transcribed spacer 3 (ITS3)") == 0) {
11356 ptr = StringStr (sfp->comment, " (");
11357 if (ptr != NULL) {
11358 *ptr = '\0';
11359 }
11360 rrp->ext.choice = 1;
11361 rrp->ext.value.ptrvalue = sfp->comment;
11362 sfp->comment = NULL;
11363 } else if (StringICmp (sfp->comment, "ITS1") == 0 || StringICmp (sfp->comment, "ITS 1") == 0) {
11364 rrp->ext.choice = 1;
11365 rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 1");
11366 sfp->comment = MemFree (sfp->comment);
11367 } else if (StringICmp (sfp->comment, "ITS2") == 0 || StringICmp (sfp->comment, "ITS 2") == 0) {
11368 rrp->ext.choice = 1;
11369 rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 2");
11370 sfp->comment = MemFree (sfp->comment);
11371 } else if (StringICmp (sfp->comment, "ITS3") == 0 || StringICmp (sfp->comment, "ITS 3") == 0) {
11372 rrp->ext.choice = 1;
11373 rrp->ext.value.ptrvalue = StringSave ("internal transcribed spacer 3");
11374 sfp->comment = MemFree (sfp->comment);
11375 }
11376 }
11377 break;
11378 case SEQFEAT_PUB :
11379 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
11380 CleanDoubleQuote (pdp->comment);
11381 NormalizePubdesc (pdp, stripSerial, TRUE, publist);
11382 break;
11383 case SEQFEAT_SEQ :
11384 break;
11385 case SEQFEAT_IMP :
11386 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
11387 CleanVisString (&(ifp->key));
11388 CleanVisString (&(ifp->loc));
11389 CleanVisString (&(ifp->descr));
11390 break;
11391 case SEQFEAT_REGION :
11392 CleanVisStringAndCompress ((CharPtr PNTR) &(sfp->data.value.ptrvalue));
11393 CleanDoubleQuote ((CharPtr) sfp->data.value.ptrvalue);
11394 if (sfp->data.value.ptrvalue == NULL) {
11395 sfp->data.choice = SEQFEAT_COMMENT;
11396 } else {
11397 if (sfp->ext != NULL) {
11398 uop = FindUopByTag (sfp->ext, "cddScoreData");
11399 if (uop != NULL) {
11400 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
11401 if (ufp->choice != 1) continue;
11402 oip = ufp->label;
11403 if (oip == NULL) continue;
11404 if (StringICmp (oip->str, "definition") == 0) {
11405 CleanVisStringAndCompress ((CharPtr PNTR) &(ufp->data.ptrvalue));
11406 CleanDoubleQuote ((CharPtr) ufp->data.ptrvalue);
11407 }
11408 }
11409 }
11410 }
11411 }
11412 break;
11413 case SEQFEAT_COMMENT :
11414 break;
11415 case SEQFEAT_BOND :
11416 break;
11417 case SEQFEAT_SITE :
11418 break;
11419 case SEQFEAT_RSITE :
11420 break;
11421 case SEQFEAT_USER :
11422 VisitAllUserObjectsInUop ((UserObjectPtr) sfp->data.value.ptrvalue, NULL, CleanUserObject);
11423 break;
11424 case SEQFEAT_TXINIT :
11425 break;
11426 case SEQFEAT_NUM :
11427 break;
11428 case SEQFEAT_PSEC_STR :
11429 break;
11430 case SEQFEAT_NON_STD_RESIDUE :
11431 break;
11432 case SEQFEAT_HET :
11433 break;
11434 case SEQFEAT_BIOSRC :
11435 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
11436 if (biop != NULL) {
11437 if (biop->genome == GENOME_virion) {
11438 biop->genome = GENOME_unknown;
11439 }
11440 orp = biop->org;
11441 if (orp != NULL) {
11442 CleanVisStringListAndCompress (&(orp->mod));
11443 OrpModToSubSource (&(orp->mod), &(biop->subtype));
11444 onp = orp->orgname;
11445 if (onp != NULL) {
11446 CleanupOrgModOther (biop, onp);
11447 }
11448 }
11449 biop->subtype = SortSubSourceList (biop->subtype);
11450 CleanSubSourceList (&(biop->subtype), biop->genome);
11451 CleanupSubSourceOther (biop, onp);
11452 biop->subtype = SortSubSourceList (biop->subtype);
11453 if (modernizeFeats) {
11454 ModernizePCRPrimers (biop);
11455 }
11456 CleanupPCRReactionSet (&(biop->pcr_primers));
11457 if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) {
11458 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
11459 if (ssp->subtype == SUBSRC_plasmid_name) {
11460 biop->genome = GENOME_plasmid;
11461 }
11462 }
11463 }
11464 }
11465 break;
11466 default :
11467 break;
11468 }
11469 if (orp != NULL) {
11470 CleanVisStringAndCompress (&(orp->taxname));
11471 CleanVisStringAndCompress (&(orp->common));
11472 CleanVisStringList (&(orp->mod));
11473 CleanVisStringList (&(orp->syn));
11474 FixOldDbxrefs (orp->db, isEmblOrDdbj);
11475 FixNumericDbxrefs (orp->db);
11476 orp->db = ValNodeSort (orp->db, SortDbxref);
11477 orp->syn = ValNodeSort (orp->syn, SortVnpByString);
11478 orp->syn = UniqueValNode (orp->syn);
11479 CleanupDuplicateDbxrefs (&(orp->db));
11480 CleanupObsoleteDbxrefs (&(orp->db));
11481 CleanupGoDbxrefs (orp->db);
11482 onp = orp->orgname;
11483 while (onp != NULL) {
11484 CleanVisString (&(onp->attrib));
11485 CleanVisString (&(onp->lineage));
11486 CleanVisString (&(onp->div));
11487 OrpModToOrgMod (&(orp->mod), &(onp->mod));
11488 onp->mod = SortOrgModList (onp->mod);
11489 CleanOrgModListEx (&(onp->mod), orp->common);
11490 onp->mod = SortOrgModList (onp->mod);
11491 onp = onp->next;
11492 }
11493 }
11494 }
11495
SplitStringsAtSemicolon(ValNodePtr PNTR head)11496 static ValNodePtr SplitStringsAtSemicolon (ValNodePtr PNTR head)
11497
11498 {
11499 ValNodePtr curr, vnp;
11500 CharPtr ptr, str;
11501
11502 if (head == NULL || *head == NULL) return NULL;
11503
11504 curr = *head;
11505 while (curr != NULL) {
11506 str = (CharPtr) curr->data.ptrvalue;
11507 ptr = StringChr (str, ';');
11508 if (ptr != NULL) {
11509 *ptr = '\0';
11510 ptr++;
11511 vnp = ValNodeCopyStr (NULL, 0, ptr);
11512 if (vnp != NULL) {
11513 vnp->next = curr->next;
11514 curr->next = vnp;
11515 }
11516 }
11517 curr = curr->next;
11518 }
11519
11520 return *head;
11521 }
11522
11523
CleanupDescriptorStrings(ValNodePtr sdp,Boolean stripSerial,Boolean modernizeFeats,ValNodePtr PNTR publist,Boolean isEmblOrDdbj)11524 static void CleanupDescriptorStrings (
11525 ValNodePtr sdp,
11526 Boolean stripSerial,
11527 Boolean modernizeFeats,
11528 ValNodePtr PNTR publist,
11529 Boolean isEmblOrDdbj
11530 )
11531
11532 {
11533 BioSourcePtr biop;
11534 EMBLBlockPtr ebp;
11535 GBBlockPtr gbp;
11536 OrgNamePtr onp = NULL;
11537 OrgRefPtr orp;
11538 PubdescPtr pdp;
11539 PirBlockPtr pir;
11540 PrfBlockPtr prf;
11541 SPBlockPtr sp;
11542 SubSourcePtr ssp;
11543 CharPtr str;
11544 ValNodePtr vnp;
11545
11546 if (sdp == NULL) return;
11547 switch (sdp->choice) {
11548 case Seq_descr_mol_type :
11549 case Seq_descr_method :
11550 return;
11551 default :
11552 break;
11553 }
11554 if (sdp->data.ptrvalue == NULL) return;
11555
11556 biop = NULL;
11557 orp = NULL;
11558 switch (sdp->choice) {
11559 case Seq_descr_mol_type :
11560 break;
11561 case Seq_descr_modif :
11562 break;
11563 case Seq_descr_method :
11564 break;
11565 case Seq_descr_name :
11566 CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
11567 if (sdp->data.ptrvalue == NULL) {
11568 sdp->data.ptrvalue = StringSave ("");
11569 }
11570 break;
11571 case Seq_descr_title :
11572 BSECDecodeXml ((CharPtr) sdp->data.ptrvalue);
11573 str = (CharPtr) sdp->data.ptrvalue;
11574 CleanVisStringAndCompress ((CharPtr PNTR) &sdp->data.ptrvalue);
11575 if (sdp->data.ptrvalue == NULL) {
11576 sdp->data.ptrvalue = StringSave ("");
11577 }
11578 break;
11579 case Seq_descr_org :
11580 orp = (OrgRefPtr) sdp->data.ptrvalue;
11581 break;
11582 case Seq_descr_comment :
11583 BSECDecodeXml ((CharPtr) sdp->data.ptrvalue);
11584 CleanVisStringJunk ((CharPtr PNTR) &sdp->data.ptrvalue);
11585 RemoveSpacesBetweenTildes ((CharPtr) sdp->data.ptrvalue);
11586 if (sdp->data.ptrvalue == NULL) {
11587 sdp->data.ptrvalue = StringSave ("");
11588 }
11589 break;
11590 case Seq_descr_num :
11591 break;
11592 case Seq_descr_maploc :
11593 break;
11594 case Seq_descr_pir :
11595 pir = (PirBlockPtr) sdp->data.ptrvalue;
11596 SplitStringsAtSemicolon (&(pir->keywords));
11597 break;
11598 case Seq_descr_genbank :
11599 gbp = (GBBlockPtr) sdp->data.ptrvalue;
11600 SplitStringsAtSemicolon (&(gbp->keywords));
11601 for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
11602 str = (CharPtr) vnp->data.ptrvalue;
11603 if (StringICmp (str, "TPA:reassembly") == 0) {
11604 vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
11605 vnp->data.ptrvalue = StringSave ("TPA:assembly");
11606 } else if (StringICmp (str, "TPA_reassembly") == 0) {
11607 vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
11608 vnp->data.ptrvalue = StringSave ("TPA:assembly");
11609 } else if (StringICmp (str, "TPA_assembly") == 0) {
11610 vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
11611 vnp->data.ptrvalue = StringSave ("TPA:assembly");
11612 }
11613 }
11614 CleanVisStringList (&(gbp->extra_accessions));
11615 gbp->extra_accessions = ValNodeSort (gbp->extra_accessions, SortVnpByString);
11616 gbp->extra_accessions = UniqueValNode (gbp->extra_accessions);
11617 if (isEmblOrDdbj) {
11618 CleanVisStringListCaseSensitive (&(gbp->keywords));
11619 } else {
11620 CleanVisStringList (&(gbp->keywords));
11621 }
11622 CleanVisStringJunk (&(gbp->source));
11623 if (StringCmp (gbp->source, ".") == 0) {
11624 gbp->source = MemFree (gbp->source);
11625 }
11626 CleanVisStringJunk (&(gbp->origin));
11627 if (StringCmp (gbp->origin, ".") == 0) {
11628 gbp->origin = MemFree (gbp->origin);
11629 }
11630 CleanVisString (&(gbp->date));
11631 CleanVisString (&(gbp->div));
11632 CleanVisString (&(gbp->taxonomy));
11633 break;
11634 case Seq_descr_pub :
11635 pdp = (PubdescPtr) sdp->data.ptrvalue;
11636 CleanDoubleQuote (pdp->comment);
11637 NormalizePubdesc (pdp, stripSerial, TRUE, publist);
11638 break;
11639 case Seq_descr_region :
11640 CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
11641 if (sdp->data.ptrvalue == NULL) {
11642 sdp->data.ptrvalue = StringSave ("");
11643 }
11644 break;
11645 case Seq_descr_user :
11646 VisitAllUserObjectsInUop ((UserObjectPtr) sdp->data.ptrvalue, NULL, CleanUserObject);
11647 break;
11648 case Seq_descr_sp :
11649 sp = (SPBlockPtr) sdp->data.ptrvalue;
11650 SplitStringsAtSemicolon (&(sp->keywords));
11651 break;
11652 case Seq_descr_dbxref :
11653 break;
11654 case Seq_descr_embl :
11655 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
11656 CleanVisStringList (&(ebp->extra_acc));
11657 ebp->extra_acc = ValNodeSort (ebp->extra_acc, SortVnpByString);
11658 SplitStringsAtSemicolon (&(ebp->keywords));
11659 CleanVisStringListCaseSensitive (&(ebp->keywords));
11660 break;
11661 case Seq_descr_create_date :
11662 break;
11663 case Seq_descr_update_date :
11664 break;
11665 case Seq_descr_prf :
11666 prf = (PrfBlockPtr) sdp->data.ptrvalue;
11667 SplitStringsAtSemicolon (&(prf->keywords));
11668 break;
11669 case Seq_descr_pdb :
11670 break;
11671 case Seq_descr_het :
11672 break;
11673 case Seq_descr_source :
11674 biop = (BioSourcePtr) sdp->data.ptrvalue;
11675 if (biop != NULL) {
11676 if (biop->genome == GENOME_virion) {
11677 biop->genome = GENOME_unknown;
11678 }
11679 orp = biop->org;
11680 if (orp != NULL) {
11681 CleanVisStringList (&(orp->mod));
11682 OrpModToSubSource (&(orp->mod), &(biop->subtype));
11683 onp = orp->orgname;
11684 if (onp != NULL) {
11685 CleanupOrgModOther (biop, onp);
11686 }
11687 }
11688 biop->subtype = SortSubSourceList (biop->subtype);
11689 CleanSubSourceList (&(biop->subtype), biop->genome);
11690 CleanupSubSourceOther (biop, onp);
11691 biop->subtype = SortSubSourceList (biop->subtype);
11692 if (modernizeFeats) {
11693 ModernizePCRPrimers (biop);
11694 }
11695 CleanupPCRReactionSet (&(biop->pcr_primers));
11696 if (biop->genome == GENOME_unknown || biop->genome == GENOME_genomic) {
11697 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
11698 if (ssp->subtype == SUBSRC_plasmid_name) {
11699 biop->genome = GENOME_plasmid;
11700 }
11701 }
11702 }
11703 }
11704 break;
11705 case Seq_descr_molinfo :
11706 break;
11707 default :
11708 break;
11709 }
11710 if (orp != NULL) {
11711 CleanVisStringAndCompress (&(orp->taxname));
11712 CleanVisStringAndCompress (&(orp->common));
11713 CleanVisStringList (&(orp->mod));
11714 CleanVisStringList (&(orp->syn));
11715 FixOldDbxrefs (orp->db, isEmblOrDdbj);
11716 FixNumericDbxrefs (orp->db);
11717 orp->db = ValNodeSort (orp->db, SortDbxref);
11718 orp->syn = ValNodeSort (orp->syn, SortVnpByString);
11719 orp->syn = UniqueValNode (orp->syn);
11720 CleanupDuplicateDbxrefs (&(orp->db));
11721 CleanupObsoleteDbxrefs (&(orp->db));
11722 CleanupGoDbxrefs (orp->db);
11723 onp = orp->orgname;
11724 while (onp != NULL) {
11725 CleanVisString (&(onp->attrib));
11726 CleanVisString (&(onp->lineage));
11727 CleanVisString (&(onp->div));
11728 OrpModToOrgMod (&(orp->mod), &(onp->mod));
11729 onp->mod = SortOrgModList (onp->mod);
11730 CleanOrgModListEx (&(onp->mod), orp->common);
11731 onp->mod = SortOrgModList (onp->mod);
11732 onp = onp->next;
11733 }
11734 }
11735 }
11736
CheckForQual(GBQualPtr gbqual,CharPtr string_q,CharPtr string_v)11737 static Int2 CheckForQual (GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
11738
11739 {
11740 GBQualPtr curq;
11741
11742 for (curq = gbqual; curq; curq = curq->next) {
11743 if (StringCmp (string_q, curq->qual) == 0) {
11744 if (curq->val == NULL) {
11745 curq->val = StringSave (string_v);
11746 return 1;
11747 }
11748 if (StringCmp (string_v, curq->val) == 0) return 1;
11749 }
11750 }
11751 return 0;
11752 }
AddGBQual(GBQualPtr gbqual,CharPtr qual,CharPtr val)11753 static GBQualPtr AddGBQual (GBQualPtr gbqual, CharPtr qual, CharPtr val)
11754
11755 {
11756 GBQualPtr curq;
11757
11758 if (StringCmp (qual, "translation") == 0) {
11759 if (val == NULL) return gbqual;
11760 if (*val == '\0') return gbqual;
11761 }
11762 if (gbqual) {
11763 if (CheckForQual (gbqual, qual, val) == 1) return gbqual;
11764 for (curq = gbqual; curq->next != NULL; curq = curq->next) continue;
11765 curq->next = GBQualNew ();
11766 curq = curq->next;
11767 if (val)
11768 curq->val = StringSave (val);
11769 curq->qual = StringSave (qual);
11770 } else {
11771 gbqual = GBQualNew ();
11772 gbqual->next = NULL;
11773 if (val)
11774 gbqual->val = StringSave (val);
11775 gbqual->qual = StringSave (qual);
11776 }
11777 return gbqual;
11778 }
11779
AddReplaceQual(SeqFeatPtr sfp,CharPtr p)11780 static void AddReplaceQual (SeqFeatPtr sfp, CharPtr p)
11781
11782 {
11783 CharPtr s, val;
11784
11785 val = StringChr (p, '\"');
11786 if (val == NULL) return;
11787 val++;
11788 s = p + StringLen (p) - 1;
11789 if (*s != ')') return;
11790 for (s--; s > val && *s != '\"'; s--) continue;
11791 if (*s != '\"') return;
11792 *s = '\0';
11793 sfp->qual = (GBQualPtr) AddGBQual (sfp->qual, "replace", val);
11794 *s = '\"';
11795 }
11796
11797 //LCOV_EXCL_START
SerialNumberInString(CharPtr str)11798 NLM_EXTERN Boolean SerialNumberInString (CharPtr str)
11799
11800 {
11801 Char ch;
11802 Boolean hasdigits;
11803 CharPtr ptr;
11804 Boolean suspicious = FALSE;
11805
11806 if (str == NULL || StringHasNoText (str)) return FALSE;
11807 ptr = StringChr (str, '[');
11808
11809 /* bail if first digit after bracket is 0 */
11810 if (ptr != NULL && ptr [1] == '0') return FALSE;
11811
11812 while ((! suspicious) && ptr != NULL) {
11813 hasdigits = FALSE;
11814 ptr++;
11815 ch = *ptr;
11816 while (IS_DIGIT (ch)) {
11817 hasdigits = TRUE;
11818 ptr++;
11819 ch = *ptr;
11820 }
11821 if (ch == ']' && hasdigits) {
11822 suspicious = TRUE;
11823 }
11824 if (! suspicious) {
11825 ptr = StringChr (ptr, '[');
11826 }
11827 }
11828 return suspicious;
11829 }
11830 //LCOV_EXCL_STOP
11831
11832 /* now only strips serials for local, general, refseq, and 2+6 genbank ids */
CheckForSwissProtID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)11833 static void CheckForSwissProtID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
11834
11835 {
11836 BioseqPtr bsp;
11837 SeqIdPtr sip;
11838 BoolPtr stripSerial;
11839 TextSeqIdPtr tsip;
11840
11841 if (sep == NULL) return;
11842 if (IS_Bioseq (sep)) {
11843 bsp = (BioseqPtr) sep->data.ptrvalue;
11844 if (bsp == NULL) return;
11845 stripSerial = (BoolPtr) mydata;
11846 if (stripSerial == NULL) return;
11847 for (sip = bsp->id; sip != NULL; sip = sip->next) {
11848 switch (sip->choice) {
11849 case SEQID_GIBBSQ :
11850 case SEQID_GIBBMT :
11851 *stripSerial = FALSE;
11852 break;
11853 case SEQID_EMBL :
11854 case SEQID_PIR :
11855 case SEQID_SWISSPROT :
11856 case SEQID_PATENT :
11857 case SEQID_DDBJ :
11858 case SEQID_PRF :
11859 case SEQID_PDB :
11860 case SEQID_TPE:
11861 case SEQID_TPD:
11862 case SEQID_GPIPE:
11863 *stripSerial = FALSE;
11864 break;
11865 case SEQID_GENBANK :
11866 case SEQID_TPG:
11867 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
11868 if (tsip != NULL) {
11869 if (StringLen (tsip->accession) == 6) {
11870 *stripSerial = FALSE;
11871 }
11872 }
11873 break;
11874 case SEQID_NOT_SET :
11875 case SEQID_LOCAL :
11876 case SEQID_OTHER :
11877 case SEQID_GENERAL :
11878 break;
11879 default :
11880 break;
11881 }
11882 }
11883 }
11884 }
11885
CheckForEmblDdbjID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)11886 static void CheckForEmblDdbjID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
11887
11888 {
11889 BioseqPtr bsp;
11890 BoolPtr isEmblOrDdbj;
11891 SeqIdPtr sip;
11892
11893 if (sep == NULL) return;
11894 if (IS_Bioseq (sep)) {
11895 bsp = (BioseqPtr) sep->data.ptrvalue;
11896 if (bsp == NULL) return;
11897 isEmblOrDdbj = (BoolPtr) mydata;
11898 if (isEmblOrDdbj == NULL) return;
11899 for (sip = bsp->id; sip != NULL; sip = sip->next) {
11900 switch (sip->choice) {
11901 case SEQID_EMBL :
11902 case SEQID_DDBJ :
11903 *isEmblOrDdbj = TRUE;
11904 break;
11905 default :
11906 break;
11907 }
11908 }
11909 }
11910 }
11911
CheckForJournalScanID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)11912 static void CheckForJournalScanID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
11913
11914 {
11915 BioseqPtr bsp;
11916 BoolPtr isJScan;
11917 SeqIdPtr sip;
11918
11919 if (sep == NULL) return;
11920 if (IS_Bioseq (sep)) {
11921 bsp = (BioseqPtr) sep->data.ptrvalue;
11922 if (bsp == NULL) return;
11923 isJScan = (BoolPtr) mydata;
11924 if (isJScan == NULL) return;
11925 for (sip = bsp->id; sip != NULL; sip = sip->next) {
11926 switch (sip->choice) {
11927 case SEQID_GIBBSQ :
11928 case SEQID_GIBBMT :
11929 case SEQID_GIIM :
11930 *isJScan = TRUE;
11931 break;
11932 default :
11933 break;
11934 }
11935 }
11936 }
11937 }
11938
FixWrongFuzzOnPlusStrand(SeqLocPtr location)11939 NLM_EXTERN Boolean FixWrongFuzzOnPlusStrand (SeqLocPtr location)
11940
11941 {
11942 SeqLocPtr firstSlp;
11943 IntFuzzPtr ifp;
11944 SeqLocPtr lastSlp;
11945 Boolean res = FALSE;
11946 SeqIntPtr sip;
11947 SeqLocPtr slp;
11948 SeqPntPtr spp;
11949
11950 if (location == NULL) return FALSE;
11951
11952 firstSlp = NULL;
11953 lastSlp = NULL;
11954 slp = SeqLocFindNext (location, NULL);
11955 while (slp != NULL) {
11956 if (firstSlp == NULL) {
11957 firstSlp = slp;
11958 }
11959 lastSlp = slp;
11960 slp = SeqLocFindNext (location, slp);
11961 }
11962
11963 if (firstSlp != NULL && firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
11964 sip = (SeqIntPtr) firstSlp->data.ptrvalue;
11965 if (sip != NULL && (sip->strand == Seq_strand_plus || sip->strand == Seq_strand_unknown)) {
11966 if (sip->if_to != NULL && sip->if_from == NULL) {
11967 sip->if_from = IntFuzzFree (sip->if_from);
11968 ifp = IntFuzzNew ();
11969 if (ifp != NULL) {
11970 ifp->choice = 4;
11971 sip->if_from = ifp;
11972 ifp->a = 2;
11973 res = TRUE;
11974 }
11975 }
11976 }
11977 }
11978
11979 if (lastSlp != NULL && lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
11980 sip = (SeqIntPtr) lastSlp->data.ptrvalue;
11981 if (sip != NULL && (sip->strand == Seq_strand_plus || sip->strand == Seq_strand_unknown)) {
11982 if (sip->if_to == NULL && sip->if_from != NULL) {
11983 sip->if_to = IntFuzzFree (sip->if_to);
11984 ifp = IntFuzzNew ();
11985 if (ifp != NULL) {
11986 ifp->choice = 4;
11987 sip->if_to = ifp;
11988 ifp->a = 1;
11989 res = TRUE;
11990 }
11991 }
11992 }
11993 }
11994
11995 return res;
11996 }
11997
FixWrongFuzzOnMinusStrand(SeqLocPtr location)11998 NLM_EXTERN Boolean FixWrongFuzzOnMinusStrand (SeqLocPtr location)
11999
12000 {
12001 SeqLocPtr firstSlp;
12002 IntFuzzPtr ifp;
12003 SeqLocPtr lastSlp;
12004 Boolean res = FALSE;
12005 SeqIntPtr sip;
12006 SeqLocPtr slp;
12007 SeqPntPtr spp;
12008
12009 if (location == NULL) return FALSE;
12010
12011 firstSlp = NULL;
12012 lastSlp = NULL;
12013 slp = SeqLocFindNext (location, NULL);
12014 while (slp != NULL) {
12015 if (firstSlp == NULL) {
12016 firstSlp = slp;
12017 }
12018 lastSlp = slp;
12019 slp = SeqLocFindNext (location, slp);
12020 }
12021
12022 if (firstSlp != NULL && firstSlp->choice == SEQLOC_INT && firstSlp->data.ptrvalue != NULL) {
12023 sip = (SeqIntPtr) firstSlp->data.ptrvalue;
12024 if (sip != NULL && (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev)) {
12025 if (sip->if_to == NULL && sip->if_from != NULL) {
12026 sip->if_from = IntFuzzFree (sip->if_from);
12027 ifp = IntFuzzNew ();
12028 if (ifp != NULL) {
12029 ifp->choice = 4;
12030 sip->if_to = ifp;
12031 ifp->a = 1;
12032 res = TRUE;
12033 }
12034 }
12035 }
12036 }
12037
12038 if (lastSlp != NULL && lastSlp->choice == SEQLOC_INT && lastSlp->data.ptrvalue != NULL) {
12039 sip = (SeqIntPtr) lastSlp->data.ptrvalue;
12040 if (sip != NULL && (sip->strand == Seq_strand_minus || sip->strand == Seq_strand_both_rev)) {
12041 if (sip->if_to != NULL && sip->if_from == NULL) {
12042 sip->if_to = IntFuzzFree (sip->if_to);
12043 ifp = IntFuzzNew ();
12044 if (ifp != NULL) {
12045 ifp->choice = 4;
12046 sip->if_from = ifp;
12047 ifp->a = 2;
12048 res = TRUE;
12049 }
12050 }
12051 }
12052 }
12053
12054 return res;
12055 }
12056
CleanUpSeqLoc(SeqLocPtr slp)12057 NLM_EXTERN void CleanUpSeqLoc (SeqLocPtr slp)
12058
12059 {
12060 BioseqPtr bsp;
12061 SeqLocPtr curr;
12062 SeqLocPtr head;
12063 SeqLocPtr last;
12064 SeqLocPtr loc;
12065 SeqLocPtr next;
12066 SeqIdPtr sip;
12067 SeqIntPtr sintp;
12068 SeqPntPtr spp;
12069 Int4 swp;
12070 SeqLocPtr tail;
12071
12072 if (slp == NULL) return;
12073
12074 if (slp->choice == SEQLOC_WHOLE) {
12075 sip = (SeqIdPtr) slp->data.ptrvalue;
12076 if (sip != NULL) {
12077 bsp = BioseqFind (sip);
12078 if (bsp != NULL) {
12079 sintp = SeqIntNew ();
12080 if (sintp != NULL) {
12081 sintp->from = 0;
12082 sintp->to = bsp->length - 1;
12083 sintp->id = sip; /* reuse existing slp->data.ptrvalue, no need to free */
12084 slp->choice = SEQLOC_INT;
12085 slp->data.ptrvalue = (Pointer) sintp;
12086 }
12087 }
12088 }
12089 }
12090
12091 /* from < to for all intervals */
12092 loc = SeqLocFindNext (slp, NULL);
12093 while (loc != NULL) {
12094 if (loc->choice == SEQLOC_INT) {
12095 sintp = (SeqIntPtr) loc->data.ptrvalue;
12096 if (sintp != NULL) {
12097 if (sintp->from > sintp->to) {
12098 swp = sintp->from;
12099 sintp->from = sintp->to;
12100 sintp->to = swp;
12101 }
12102 if (sintp->strand == Seq_strand_both) {
12103 sintp->strand = Seq_strand_plus;
12104 } else if (sintp->strand == Seq_strand_both_rev) {
12105 sintp->strand = Seq_strand_minus;
12106 }
12107 }
12108 } else if (loc->choice == SEQLOC_PNT) {
12109 spp = (SeqPntPtr) loc->data.ptrvalue;
12110 if (spp != NULL) {
12111 if (spp->strand == Seq_strand_both) {
12112 spp->strand = Seq_strand_plus;
12113 } else if (spp->strand == Seq_strand_both_rev) {
12114 spp->strand = Seq_strand_minus;
12115 }
12116 }
12117 }
12118 loc = SeqLocFindNext (slp, loc);
12119 }
12120
12121 if (slp->choice == SEQLOC_PACKED_INT) {
12122 loc = (SeqLocPtr) slp->data.ptrvalue;
12123 if (loc == NULL || loc->next != NULL) return;
12124 /* here seqloc_packed_int points to a single location element, so no need for seqloc_packed_int parent */
12125 slp->choice = loc->choice;
12126 slp->data.ptrvalue = (Pointer) loc->data.ptrvalue;
12127 MemFree (loc);
12128 return;
12129 }
12130
12131 if (slp->choice != SEQLOC_MIX) return;
12132 loc = (SeqLocPtr) slp->data.ptrvalue;
12133 if (loc == NULL) return;
12134
12135 if (loc->next != NULL) {
12136 /* check for null NULL at beginning */
12137 if (loc->choice == SEQLOC_NULL) {
12138 slp->data.ptrvalue = (Pointer) loc->next;
12139 loc->next = NULL;
12140 ValNodeFree (loc);
12141 }
12142 /* check for null NULL at end */
12143 loc = (SeqLocPtr) slp->data.ptrvalue;
12144 last = NULL;
12145 while (loc->next != NULL) {
12146 last = loc;
12147 loc = loc->next;
12148 }
12149 if (loc->choice == SEQLOC_NULL && last != NULL) {
12150 last->next = NULL;
12151 ValNodeFree (loc);
12152 }
12153 }
12154
12155 loc = (SeqLocPtr) slp->data.ptrvalue;
12156 if (loc == NULL) return;
12157
12158 if (loc->next == NULL) {
12159 /* here seqloc_mix points to a single location element, so no need for seqloc_mix parent */
12160 slp->choice = loc->choice;
12161 slp->data.ptrvalue = (Pointer) loc->data.ptrvalue;
12162 MemFree (loc);
12163 return;
12164 }
12165
12166 /* check for nested seqloc_mix, remove nesting */
12167 curr = loc;
12168 last = NULL;
12169 while (curr != NULL) {
12170 next = curr->next;
12171 if (curr->choice == SEQLOC_MIX) {
12172 head = (SeqLocPtr) curr->data.ptrvalue;
12173 if (head != NULL) {
12174 tail = head;
12175 while (tail->next != NULL) {
12176 tail = tail->next;
12177 }
12178 if (last != NULL) {
12179 last->next = head;
12180 }
12181 tail->next = curr->next;
12182 curr->next = NULL;
12183 curr = MemFree (curr);
12184 }
12185 } else {
12186 last = curr;
12187 }
12188 curr = next;
12189 }
12190
12191 NormalizeNullsBetween (slp);
12192
12193 /*
12194 FixWrongFuzzOnPlusStrand (slp);
12195 FixWrongFuzzOnMinusStrand (slp);
12196 */
12197 }
12198
12199 typedef struct cbloc {
12200 CodeBreakPtr cbp;
12201 Int4 pos;
12202 } CbLoc, PNTR CbLocPtr;
12203
SortByCodeBreakLoc(VoidPtr ptr1,VoidPtr ptr2)12204 static int LIBCALLBACK SortByCodeBreakLoc (VoidPtr ptr1, VoidPtr ptr2)
12205
12206 {
12207 CbLocPtr clp1;
12208 CbLocPtr clp2;
12209
12210 clp1 = (CbLocPtr) ptr1;
12211 clp2 = (CbLocPtr) ptr2;
12212 if (clp1 == NULL || clp2 == NULL) return 0;
12213 if (clp1->pos < clp2->pos) {
12214 return -1;
12215 } else if (clp1->pos > clp2->pos) {
12216 return 1;
12217 }
12218 return 0;
12219 }
12220
SortCodeBreaks(SeqFeatPtr sfp,CodeBreakPtr list)12221 static CodeBreakPtr SortCodeBreaks (SeqFeatPtr sfp, CodeBreakPtr list)
12222
12223 {
12224 BioseqPtr bsp;
12225 CodeBreakPtr cbp;
12226 CbLocPtr head;
12227 size_t count, i;
12228 Boolean out_of_order = FALSE;
12229 Int4 pos;
12230 SeqLocPtr slp;
12231
12232 if (sfp == NULL || list == NULL) return list;
12233 bsp = BioseqFindFromSeqLoc (sfp->product);
12234 if (bsp == NULL) return list;
12235
12236 for (cbp = list, count = 0; cbp != NULL; cbp = cbp->next, count++) continue;
12237 if (count < 2) return list;
12238
12239 head = (CbLocPtr) MemNew (sizeof (CbLoc) * (count + 1));
12240 if (head == NULL) return list;
12241
12242 for (cbp = list, i = 0; cbp != NULL && i < count; i++) {
12243 head [i].cbp = cbp;
12244 slp = dnaLoc_to_aaLoc (sfp, cbp->loc, TRUE, NULL, TRUE);
12245 head [i].pos = GetOffsetInBioseq (slp, bsp, SEQLOC_START) + 1;
12246 SeqLocFree (slp);
12247 cbp = cbp->next;
12248 }
12249
12250 pos = head [0].pos;
12251 for (i = 1; i < count; i++) {
12252 if (head [i].pos < pos) {
12253 out_of_order = TRUE;
12254 }
12255 pos = head [i].pos;
12256 }
12257
12258 if (out_of_order) {
12259 StableMergeSort (head, count, sizeof (CbLoc), SortByCodeBreakLoc);
12260
12261 for (i = 0; i < count; i++) {
12262 cbp = head [i].cbp;
12263 cbp->next = head [i + 1].cbp;
12264 }
12265
12266 list = head [0].cbp;
12267 }
12268
12269 MemFree (head);
12270
12271 return list;
12272 }
12273
CleanupDuplicatedCodeBreaks(CodeBreakPtr PNTR prevcbp)12274 static void CleanupDuplicatedCodeBreaks (CodeBreakPtr PNTR prevcbp)
12275
12276 {
12277 CodeBreakPtr cbp;
12278 CodeBreakPtr last = NULL;
12279 CodeBreakPtr next;
12280 Boolean unlink;
12281
12282 if (prevcbp == NULL) return;
12283 cbp = *prevcbp;
12284 while (cbp != NULL) {
12285 next = cbp->next;
12286 unlink = FALSE;
12287 if (last != NULL) {
12288 if (SeqLocCompare (cbp->loc, last->loc) == SLC_A_EQ_B &&
12289 cbp->aa.choice == last->aa.choice &&
12290 cbp->aa.value.intvalue == last->aa.value.intvalue) {
12291 unlink = TRUE;
12292 }
12293 } else {
12294 last = cbp;
12295 }
12296 if (unlink) {
12297 *prevcbp = cbp->next;
12298 cbp->next = NULL;
12299 CodeBreakFree (cbp);
12300 } else {
12301 last = cbp;
12302 prevcbp = (CodeBreakPtr PNTR) &(cbp->next);
12303 }
12304 cbp = next;
12305 }
12306 }
12307
12308 //LCOV_EXCL_START
12309 CharPtr ncrnaClassList[] = {
12310 "antisense_RNA",
12311 "autocatalytically_spliced_intron",
12312 "hammerhead_ribozyme",
12313 "ribozyme",
12314 "RNase_P_RNA",
12315 "RNase_MRP_RNA",
12316 "telomerase_RNA",
12317 "guide_RNA",
12318 "rasiRNA",
12319 "scRNA",
12320 "siRNA",
12321 "miRNA",
12322 "piRNA",
12323 "snoRNA",
12324 "snRNA",
12325 "SRP_RNA",
12326 "vault_RNA",
12327 "Y_RNA",
12328 "lncRNA",
12329 "other",
12330 NULL};
12331
12332 Int4 NcrnaOTHER = sizeof (ncrnaClassList) / sizeof (CharPtr) - 1;
12333
IsStringInNcRNAClassList(CharPtr str)12334 extern Boolean IsStringInNcRNAClassList (CharPtr str)
12335 {
12336 CharPtr PNTR p;
12337
12338 if (StringHasNoText (str)) return FALSE;
12339 for (p = ncrnaClassList; *p != NULL; p++)
12340 {
12341 if (StringICmp (str, *p) == 0)
12342 {
12343 return TRUE;
12344 }
12345 }
12346 return FALSE;
12347 }
12348
12349
12350 CharPtr regulatoryClassList[] = {
12351 "attenuator",
12352 "CAAT_signal",
12353 "DNase_I_hypersensitive_site",
12354 "enhancer_blocking_element",
12355 "enhancer",
12356 "GC_signal",
12357 "imprinting_control_region",
12358 "insulator",
12359 "locus_control_region",
12360 "matrix_attachment_region",
12361 "minus_10_signal",
12362 "minus_35_signal",
12363 "polyA_signal_sequence",
12364 "promoter",
12365 "recoding_stimulatory_region",
12366 "replication_regulatory_region",
12367 "response_element",
12368 "ribosome_binding_site",
12369 "riboswitch",
12370 "silencer",
12371 "TATA_box",
12372 "terminator",
12373 "transcriptional_cis_regulatory_region",
12374 "other",
12375 NULL};
12376
12377 Int4 RegulatoryOTHER = sizeof (regulatoryClassList) / sizeof (CharPtr) - 1;
12378
IsStringInRegulatoryClassList(CharPtr str)12379 extern Boolean IsStringInRegulatoryClassList (CharPtr str)
12380
12381 {
12382 CharPtr PNTR p;
12383
12384 if (StringHasNoText (str)) return FALSE;
12385 for (p = regulatoryClassList; *p != NULL; p++)
12386 {
12387 if (StringICmp (str, *p) == 0)
12388 {
12389 return TRUE;
12390 }
12391 }
12392 return FALSE;
12393 }
12394
12395 CharPtr recombinationClassList[] = {
12396 "chromosome_breakpoint",
12397 "meiotic_recombination",
12398 "mitotic_recombination",
12399 "non_allelic_homologous_recombination",
12400 "other",
12401 NULL};
12402
IsStringInRecombinationClassList(CharPtr str)12403 extern Boolean IsStringInRecombinationClassList (CharPtr str)
12404
12405 {
12406 CharPtr PNTR p;
12407
12408 if (StringHasNoText (str)) return FALSE;
12409 for (p = recombinationClassList; *p != NULL; p++)
12410 {
12411 if (StringICmp (str, *p) == 0)
12412 {
12413 return TRUE;
12414 }
12415 }
12416 return FALSE;
12417 }
12418 //LCOV_EXCL_STOP
12419
AddNonCopiedQual(SeqFeatPtr sfp,CharPtr qual,CharPtr class_val)12420 static void AddNonCopiedQual (SeqFeatPtr sfp, CharPtr qual, CharPtr class_val)
12421 {
12422 GBQualPtr gbq;
12423
12424 if (sfp == NULL || StringHasNoText (qual) || StringHasNoText (class_val))
12425 {
12426 return;
12427 }
12428 gbq = sfp->qual;
12429 while (gbq != NULL
12430 && (StringCmp (gbq->qual, qual) != 0
12431 || StringCmp (gbq->val, class_val) != 0))
12432 {
12433 gbq = gbq->next;
12434 }
12435 if (gbq == NULL)
12436 {
12437 gbq = GBQualNew ();
12438 gbq->qual = StringSave (qual);
12439 gbq->val = StringSave (class_val);
12440 gbq->next = sfp->qual;
12441 sfp->qual = gbq;
12442 }
12443
12444 }
12445
12446
GetMiRNAProduct(CharPtr str)12447 static CharPtr GetMiRNAProduct (CharPtr str)
12448 {
12449 Int4 len;
12450 CharPtr product = NULL;
12451
12452 if (StringHasNoText (str)) return NULL;
12453 if (StringNCmp (str, "miRNA ", 6) == 0)
12454 {
12455 product = StringSave (str + 6);
12456 }
12457 else if (StringNCmp (str, "microRNA ", 9) == 0)
12458 {
12459 product = StringSave (str + 9);
12460 }
12461 else
12462 {
12463 len = StringLen (str);
12464 if (len > 6 && StringCmp (str + len - 6, " miRNA") == 0
12465 && (len < 15 || StringCmp (str + len - 15, "precursor miRNA") != 0))
12466 {
12467 product = (CharPtr) MemNew (sizeof (Char) * (len - 5));
12468 StringNCpy (product, str, len - 6);
12469 product[len - 6] = 0;
12470 }
12471 else if (len > 9 && StringCmp (str + len - 9, " microRNA") == 0
12472 && (len < 18 || StringCmp (str + len - 18, "precursor microRNA") != 0))
12473 {
12474 product = (CharPtr) MemNew (sizeof (Char) * (len - 8));
12475 StringNCpy (product, str, len - 9);
12476 product[len - 9] = 0;
12477 }
12478 }
12479 return product;
12480 }
12481
12482
ConvertToNcRNA(SeqFeatPtr sfp)12483 static Boolean ConvertToNcRNA (SeqFeatPtr sfp)
12484 {
12485 GBQualPtr gbq;
12486 RnaRefPtr rrp;
12487 Boolean was_converted = FALSE;
12488 CharPtr miRNAproduct = NULL;
12489
12490 if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL)
12491 {
12492 return FALSE;
12493 }
12494 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12495 if (rrp->type == 5 || rrp->type == 6 || rrp->type == 7)
12496 {
12497 if (rrp->type == 5)
12498 {
12499 AddNonCopiedQual (sfp, "ncRNA_class", "snRNA");
12500 }
12501 else if (rrp->type == 6)
12502 {
12503 AddNonCopiedQual (sfp, "ncRNA_class", "scRNA");
12504 }
12505 else if (rrp->type == 7)
12506 {
12507 AddNonCopiedQual (sfp, "ncRNA_class", "snoRNA");
12508 }
12509 if (rrp->ext.choice == 1)
12510 {
12511 AddNonCopiedQual (sfp, "product", rrp->ext.value.ptrvalue);
12512 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12513 }
12514 rrp->ext.choice = 1;
12515 rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12516 rrp->type = 255;
12517 was_converted = TRUE;
12518 }
12519 else if (rrp->type == 255 && rrp->ext.choice == 1)
12520 {
12521 if (IsStringInNcRNAClassList (rrp->ext.value.ptrvalue))
12522 {
12523 AddNonCopiedQual (sfp, "ncRNA_class", rrp->ext.value.ptrvalue);
12524 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12525 rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12526 was_converted = TRUE;
12527 }
12528 else if ((miRNAproduct = GetMiRNAProduct (rrp->ext.value.ptrvalue)) != NULL)
12529 {
12530 AddNonCopiedQual (sfp, "ncRNA_class", "miRNA");
12531 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12532 rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12533 AddNonCopiedQual (sfp, "product", miRNAproduct);
12534 miRNAproduct = MemFree (miRNAproduct);
12535 was_converted = TRUE;
12536 }
12537 else if (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
12538 && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
12539 && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0)
12540 {
12541 AddNonCopiedQual (sfp, "product", rrp->ext.value.ptrvalue);
12542 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12543 rrp->ext.value.ptrvalue = StringSave ("misc_RNA");
12544 was_converted = TRUE;
12545 }
12546 }
12547 if (rrp->type == 255 && rrp->ext.choice == 0) {
12548 rrp->ext.choice = 1;
12549 rrp->ext.value.ptrvalue = StringSave ("misc_RNA");
12550 }
12551 if (rrp->type == 255 && rrp->ext.choice == 1 &&
12552 StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0) {
12553 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
12554 if (StringCmp (gbq->qual, "ncRNA_class") == 0) {
12555 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12556 rrp->ext.value.ptrvalue = StringSave ("ncRNA");
12557 was_converted = TRUE;
12558 } else if (StringCmp (gbq->qual, "tag_peptide") == 0) {
12559 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
12560 rrp->ext.value.ptrvalue = StringSave ("tmRNA");
12561 was_converted = TRUE;
12562 }
12563 }
12564 }
12565 return was_converted;
12566 }
12567
ModernizeFeatureStrings(SeqFeatPtr sfp,Boolean isEmblOrDdbj)12568 static void ModernizeFeatureStrings (SeqFeatPtr sfp, Boolean isEmblOrDdbj)
12569
12570 {
12571 CharPtr desc;
12572 GBQualPtr gbq;
12573 CharPtr name;
12574 ProtRefPtr prp;
12575 RnaRefPtr rrp;
12576 CharPtr str;
12577 ValNodePtr vnp;
12578
12579 if (sfp == NULL) return;
12580
12581 /* skip feature types that do not use data.value.ptrvalue */
12582 switch (sfp->data.choice) {
12583 case SEQFEAT_COMMENT:
12584 case SEQFEAT_BOND:
12585 case SEQFEAT_SITE:
12586 case SEQFEAT_PSEC_STR:
12587 return;
12588 default:
12589 break;
12590 }
12591
12592 if (sfp->data.value.ptrvalue == NULL) return;
12593
12594 switch (sfp->data.choice) {
12595 case SEQFEAT_PROT:
12596 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
12597 desc = prp->desc;
12598 if (! isEmblOrDdbj) {
12599 CleanVisStringList (&(prp->name));
12600 break;
12601 }
12602 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
12603 str = (CharPtr) vnp->data.ptrvalue;
12604 if (StringHasNoText (str)) continue;
12605 if (StringICmp (str, "RbcL") == 0 || StringICmp (str, "rubisco large subunit") == 0) {
12606 vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit");
12607 str = MemFree (str);
12608 if (StringICmp (desc, "RbcL") == 0 || StringICmp (desc, "rubisco large subunit") == 0) {
12609 prp->desc = MemFree (prp->desc);
12610 }
12611 } else if (StringICmp (str, "RbcS") == 0 || StringICmp (str, "rubisco small subunit") == 0) {
12612 vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit");
12613 str = MemFree (str);
12614 if (StringICmp (desc, "RbcS") == 0 || StringICmp (desc, "rubisco small subunit") == 0) {
12615 prp->desc = MemFree (prp->desc);
12616 }
12617 /*
12618 } else if (StringCmp (desc, str) == 0) {
12619 prp->desc = MemFree (prp->desc);
12620 */
12621 }
12622 if (StringStr (str, "ribulose") != NULL &&
12623 StringStr (str, "bisphosphate") != NULL &&
12624 StringStr (str, "methyltransferase") == NULL &&
12625 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit") != 0 &&
12626 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit") != 0) {
12627 if (StringICmp (str, "ribulose 1,5-bisphosphate carboxylase/oxygenase large subunit") == 0 ||
12628 StringICmp (str, "ribulose 1,5-bisphosphate carboxylase large subunit") == 0 ||
12629 StringICmp (str, "ribulose bisphosphate carboxylase large subunit") == 0 ||
12630 StringICmp (str, "ribulose-bisphosphate carboxylase large subunit") == 0 ||
12631 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase large subunit") == 0 ||
12632 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase, large subunit") == 0 ||
12633 StringICmp (str, "large subunit of ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0 ||
12634 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase oxygenase large subunit") == 0 ||
12635 StringICmp (str, "ribulose bisphosphate carboxylase large chain") == 0 ||
12636 StringICmp (str, "ribulose 1,5-bisphosphate carboxylase-oxygenase large subunit") == 0 ||
12637 StringICmp (str, "ribulose bisphosphate carboxylase oxygenase large subunit") == 0 ||
12638 StringICmp (str, "ribulose 1,5 bisphosphate carboxylase large subunit") == 0 ||
12639 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase/oxygenase, large subunit") == 0 ||
12640 StringICmp (str, "large subunit of ribulose-1,5-bisphosphate carboxylase/oxgenase") == 0 ||
12641 StringICmp (str, "ribulose bisphosphate carboxylase/oxygenase large subunit") == 0 ||
12642 StringICmp (str, "ribulose-1,5-bisphosphate carboxylase oxygenase, large subunit") == 0 ||
12643 StringICmp (str, "ribulose 5-bisphosphate carboxylase, large subunit") == 0 ||
12644 StringICmp (str, "ribulosebisphosphate carboxylase large subunit") == 0 ||
12645 StringICmp (str, "ribulose bisphosphate large subunit") == 0 ||
12646 StringICmp (str, "ribulose 1,5 bisphosphate carboxylase/oxygenase large subunit") == 0 ||
12647 StringICmp (str, "ribulose 1,5-bisphosphate carboxylase/oxygenase large chain") == 0 ||
12648 StringICmp (str, "large subunit ribulose-1,5-bisphosphate carboxylase/oxygenase") == 0 ||
12649 StringICmp (str, "ribulose-bisphosphate carboxylase, large subunit") == 0 ||
12650 StringICmp (str, "ribulose-1, 5-bisphosphate carboxylase/oxygenase large-subunit") == 0) {
12651 vnp->data.ptrvalue = StringSave ("ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit");
12652 str = MemFree (str);
12653 }
12654 }
12655 }
12656 CleanVisStringList (&(prp->name));
12657 break;
12658 case SEQFEAT_RNA :
12659 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12660 if (rrp->type == 255 && rrp->ext.choice == 1) {
12661 name = (CharPtr) rrp->ext.value.ptrvalue;
12662 if (StringCmp (name, "misc_RNA") == 0) {
12663 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
12664 if (StringCmp (gbq->qual, "product") != 0) continue;
12665 name = gbq->val;
12666 if (StringHasNoText (name)) continue;
12667 if (StringICmp (name, "its1") == 0 || StringICmp (name, "its 1") == 0) {
12668 gbq->val = MemFree (gbq->val);
12669 gbq->val = StringSave ("internal transcribed spacer 1");
12670 } else if (StringICmp (name, "its2") == 0 || StringICmp (name, "its 2") == 0) {
12671 gbq->val = MemFree (gbq->val);
12672 gbq->val = StringSave ("internal transcribed spacer 2");
12673 } else if (StringICmp (name, "its3") == 0 || StringICmp (name, "its 3") == 0) {
12674 gbq->val = MemFree (gbq->val);
12675 gbq->val = StringSave ("internal transcribed spacer 3");
12676 } else if (StringICmp (name, "Ribosomal DNA internal transcribed spacer 1") == 0) {
12677 gbq->val = MemFree (gbq->val);
12678 gbq->val = StringSave ("internal transcribed spacer 1");
12679 } else if (StringICmp (name, "Ribosomal DNA internal transcribed spacer 2") == 0) {
12680 gbq->val = MemFree (gbq->val);
12681 gbq->val = StringSave ("internal transcribed spacer 2");
12682 } else if (StringICmp (name, "Ribosomal DNA internal transcribed spacer 3") == 0) {
12683 gbq->val = MemFree (gbq->val);
12684 gbq->val = StringSave ("internal transcribed spacer 3");
12685 } else if (StringICmp (name, "internal transcribed spacer 1 (ITS1)") == 0) {
12686 gbq->val = MemFree (gbq->val);
12687 gbq->val = StringSave ("internal transcribed spacer 1");
12688 } else if (StringICmp (name, "internal transcribed spacer 2 (ITS2)") == 0) {
12689 gbq->val = MemFree (gbq->val);
12690 gbq->val = StringSave ("internal transcribed spacer 2");
12691 } else if (StringICmp (name, "internal transcribed spacer 3 (ITS3)") == 0) {
12692 gbq->val = MemFree (gbq->val);
12693 gbq->val = StringSave ("internal transcribed spacer 3");
12694 }
12695 }
12696 }
12697 }
12698 break;
12699 default:
12700 break;
12701 }
12702 }
12703
IsFeatureCommentRedundant(SeqFeatPtr sfp)12704 static Boolean IsFeatureCommentRedundant (SeqFeatPtr sfp)
12705
12706 {
12707 Uint1 aa;
12708 Choice cbaa;
12709 CodeBreakPtr cbp;
12710 CharPtr comment;
12711 CdRegionPtr crp;
12712 SeqFeatPtr feat;
12713 Uint1 from;
12714 GBQualPtr gbq;
12715 GeneRefPtr grp;
12716 CharPtr name;
12717 BioseqPtr prod;
12718 ProtRefPtr prp;
12719 Uint1 residue;
12720 RNAGenPtr rgp;
12721 RNAQualPtr rqp;
12722 RnaRefPtr rrp;
12723 SeqAnnotPtr sap;
12724 SeqCodeTablePtr sctp;
12725 Uint1 seqcode;
12726 SeqIdPtr sip;
12727 SeqMapTablePtr smtp;
12728 CharPtr str;
12729 tRNAPtr trp;
12730 ValNodePtr vnp;
12731
12732 if (sfp == NULL) return FALSE;
12733 comment = sfp->comment;
12734 if (StringHasNoText (comment)) return FALSE;
12735
12736 if (sfp->excpt && StringDoesHaveText (sfp->except_text)) {
12737 if (StringCmp (comment, sfp->except_text) == 0) return TRUE;
12738 }
12739
12740 /* skip feature types that do not use data.value.ptrvalue */
12741 switch (sfp->data.choice) {
12742 case SEQFEAT_COMMENT:
12743 case SEQFEAT_BOND:
12744 case SEQFEAT_SITE:
12745 case SEQFEAT_PSEC_STR:
12746 return FALSE;
12747 default:
12748 break;
12749 }
12750
12751 if (sfp->data.value.ptrvalue == NULL) return FALSE;
12752
12753 switch (sfp->data.choice) {
12754 case SEQFEAT_GENE:
12755 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
12756 /*
12757 if (StringCmp (comment, grp->locus) == 0) return TRUE;
12758 if (StringCmp (comment, grp->desc) == 0) return TRUE;
12759 */
12760 if (StringCmp (comment, grp->locus_tag) == 0) return TRUE;
12761 for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
12762 str = (CharPtr) vnp->data.ptrvalue;
12763 if (StringHasNoText (str)) continue;
12764 if (StringCmp (comment, str) == 0) return TRUE;
12765 }
12766 break;
12767 case SEQFEAT_CDREGION:
12768 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
12769 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
12770 seqcode = 0;
12771 sctp = NULL;
12772 cbaa = cbp->aa;
12773 switch (cbaa.choice) {
12774 case 1 :
12775 seqcode = Seq_code_ncbieaa;
12776 break;
12777 case 2 :
12778 seqcode = Seq_code_ncbi8aa;
12779 break;
12780 case 3 :
12781 seqcode = Seq_code_ncbistdaa;
12782 break;
12783 default :
12784 break;
12785 }
12786 if (seqcode != 0) {
12787 sctp = SeqCodeTableFind (seqcode);
12788 if (sctp != NULL) {
12789 residue = cbaa.value.intvalue;
12790 if (residue != 42) {
12791 if (seqcode != Seq_code_ncbieaa) {
12792 smtp = SeqMapTableFind (seqcode, Seq_code_ncbieaa);
12793 residue = SeqMapTableConvert (smtp, residue);
12794 }
12795 if (residue == 'U') {
12796 if (StringCmp (comment, "selenocysteine") == 0) return TRUE;
12797 } else if (residue == 'O') {
12798 if (StringCmp (comment, "pyrrolysine") == 0) return TRUE;
12799 }
12800 }
12801 }
12802 }
12803 }
12804 if (sfp->product != NULL) {
12805 sip = SeqLocId (sfp->product);
12806 if (sip != NULL) {
12807 prod = BioseqFind (sip);
12808 if (prod != NULL) {
12809 for (sap = prod->annot; sap != NULL; sap = sap->next) {
12810 if (sap->type != 1) continue;
12811 for (feat = (SeqFeatPtr) sap->data; feat != NULL; feat = feat->next) {
12812 if (feat->data.choice != SEQFEAT_PROT) continue;
12813 prp = (ProtRefPtr) feat->data.value.ptrvalue;
12814 if (prp == NULL) continue;
12815 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
12816 str = (CharPtr) vnp->data.ptrvalue;
12817 if (StringHasNoText (str)) continue;
12818 if (StringCmp (comment, str) == 0) return TRUE;
12819 }
12820 }
12821 }
12822 }
12823 }
12824 }
12825 break;
12826 case SEQFEAT_PROT:
12827 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
12828 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
12829 str = (CharPtr) vnp->data.ptrvalue;
12830 if (StringHasNoText (str)) continue;
12831 if (StringCmp (comment, str) == 0) return TRUE;
12832 }
12833 if (StringDoesHaveText (prp->desc)) {
12834 if (StringCmp (comment, prp->desc) == 0) return TRUE;
12835 }
12836 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
12837 str = (CharPtr) vnp->data.ptrvalue;
12838 if (StringHasNoText (str)) continue;
12839 if (StringCmp (comment, str) == 0) return TRUE;
12840 }
12841 break;
12842 case SEQFEAT_RNA :
12843 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12844 if (rrp->type == 255 && rrp->ext.choice == 1) {
12845 name = (CharPtr) rrp->ext.value.ptrvalue;
12846 if (StringCmp (name, "misc_RNA") == 0) {
12847 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
12848 if (StringCmp (gbq->qual, "product") != 0) continue;
12849 name = gbq->val;
12850 if (StringHasNoText (name)) continue;
12851 /*
12852 if (StringICmp (name, "internal transcribed spacer 1") == 0) {
12853 if (StringICmp (comment, "its1") == 0 || StringICmp (comment, "its 1") == 0) return TRUE;
12854 } else if (StringICmp (name, "internal transcribed spacer 2") == 0) {
12855 if (StringICmp (comment, "its2") == 0 || StringICmp (comment, "its 2") == 0) return TRUE;
12856 } else if (StringICmp (name, "internal transcribed spacer 3") == 0) {
12857 if (StringICmp (comment, "its3") == 0 || StringICmp (comment, "its 3") == 0) return TRUE;
12858 }
12859 */
12860 }
12861 }
12862 } else if (rrp->type == 3 && rrp->ext.choice == 2) {
12863 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
12864 if (trp != NULL) {
12865 aa = 0;
12866 if (trp->aatype == 2) {
12867 aa = trp->aa;
12868 } else {
12869 from = 0;
12870 switch (trp->aatype) {
12871 case 0 :
12872 from = 0;
12873 break;
12874 case 1 :
12875 from = Seq_code_iupacaa;
12876 break;
12877 case 2 :
12878 from = Seq_code_ncbieaa;
12879 break;
12880 case 3 :
12881 from = Seq_code_ncbi8aa;
12882 break;
12883 case 4 :
12884 from = Seq_code_ncbistdaa;
12885 break;
12886 default:
12887 break;
12888 }
12889 seqcode = Seq_code_ncbieaa;
12890 smtp = SeqMapTableFind (seqcode, from);
12891 if (smtp != NULL) {
12892 aa = SeqMapTableConvert (smtp, trp->aa);
12893 if (aa == 255 && from == Seq_code_iupacaa) {
12894 if (trp->aa == 'U') {
12895 aa = 'U';
12896 } else if (trp->aa == 'O') {
12897 aa = 'O';
12898 }
12899 }
12900 }
12901 }
12902 if (aa > 0 && aa != 255) {
12903 if (StringNCmp (comment, "aa: ", 4) == 0) {
12904 comment += 4;
12905 }
12906 residue = FindTrnaAA3 (comment);
12907 if (residue == aa) {
12908 if (aa == 'M' && StringICmp ("fMet", comment) == 0) return FALSE;
12909 if (aa == 'M' && StringICmp ("iMet", comment) == 0) return FALSE;
12910 return TRUE;
12911 }
12912 residue = FindTrnaAA (comment);
12913 if (residue == aa) return TRUE;
12914 }
12915 }
12916 } else if (rrp->ext.choice == 3) {
12917 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
12918 if (rgp != NULL) {
12919 if (StringCmp (comment, rgp->product) == 0) return TRUE;
12920 if (StringCmp (comment, rgp->_class) == 0) return TRUE;
12921 for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
12922 if (StringCmp (comment, rqp->val) == 0) return TRUE;
12923 }
12924 }
12925 }
12926 break;
12927 default:
12928 break;
12929 }
12930
12931 return FALSE;
12932 }
12933
12934
ExtractSatelliteFromComment(CharPtr comment)12935 static CharPtr ExtractSatelliteFromComment (CharPtr comment)
12936 {
12937 CharPtr satellite_type = NULL, satellite_start = NULL;
12938 CharPtr satellite_qual = NULL;
12939 Int4 satellite_len, i;
12940
12941 if (StringHasNoText (comment)) {
12942 return NULL;
12943 }
12944
12945 if (StringNCmp (comment, "microsatellite", 14) == 0) {
12946 satellite_type = "microsatellite";
12947 satellite_start = comment;
12948 } else if (StringNCmp (comment, "minisatellite", 13) == 0) {
12949 satellite_type = "minisatellite";
12950 satellite_start = comment;
12951 } else if (StringNCmp (comment, "satellite", 9) == 0) {
12952 satellite_type = "satellite";
12953 satellite_start = comment;
12954 }
12955
12956 if (satellite_start == NULL) {
12957 return NULL;
12958 }
12959
12960 satellite_len = StringLen (satellite_type);
12961 if (comment[satellite_len] == '\0') {
12962 satellite_qual = StringSave (satellite_type);
12963 *comment = 0;
12964 } else if (comment[satellite_len] == ';') {
12965 satellite_qual = StringSave (satellite_type);
12966 for (i = 0; i <= satellite_len; i++) {
12967 comment [i] = ' ';
12968 }
12969 TrimSpacesAroundString (comment);
12970 }
12971 if (comment != NULL && comment [0] == '~' && comment [1] != '~') {
12972 comment [0] = ' ';
12973 TrimSpacesAroundString (comment);
12974 }
12975
12976 return satellite_qual;
12977 }
12978
DoModernizeRNAFields(SeqFeatPtr sfp)12979 static void DoModernizeRNAFields (SeqFeatPtr sfp)
12980
12981 {
12982 RNAQualSetPtr nextrqp;
12983 RNAQualSetPtr PNTR prevrqp;
12984 RNAGenPtr rgp;
12985 RNAQualSetPtr rqp;
12986 RnaRefPtr rrp;
12987 CharPtr str;
12988 Boolean unlink;
12989 Int2 i;
12990 size_t len;
12991 CharPtr ncclass;
12992 CharPtr product;
12993 CharPtr tmp;
12994
12995 if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return;
12996
12997 ModernizeRNAFields (sfp);
12998 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
12999 if (rrp == NULL) return;
13000
13001 if (rrp->ext.choice == 1 && rrp->type == 10) {
13002 str = rrp->ext.value.ptrvalue;
13003 if (StringHasNoText (str)) return;
13004
13005 rgp = (RNAGenPtr) MemNew (sizeof (RNAGen));
13006 if (rgp == NULL) return;
13007 rrp->ext.choice = 3;
13008 rrp->ext.value.ptrvalue = (Pointer) rgp;
13009 rgp->product = str;
13010 }
13011
13012 if (rrp->ext.choice != 3) return;
13013
13014 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
13015 if (rgp == NULL) return;
13016
13017 rqp = rgp->quals;
13018 prevrqp = (RNAQualSetPtr PNTR) &(rgp->quals);
13019 while (rqp != NULL) {
13020 nextrqp = rqp->next;
13021 unlink = FALSE;
13022 if (StringHasNoText (rqp->qual) || StringHasNoText (rqp->val)) {
13023 unlink = TRUE;
13024 }
13025 if (unlink) {
13026 *(prevrqp) = rqp->next;
13027 rqp->next = NULL;
13028 RNAQualFree (rqp);
13029 } else {
13030 prevrqp = (RNAQualSetPtr PNTR) &(rqp->next);
13031 }
13032 rqp = nextrqp;
13033 }
13034
13035 if (rrp->type == 10 && StringDoesHaveText (rgp->product) && rgp->_class == NULL) {
13036 ncclass = rgp->product;
13037 for (i = 0; ncrnaClassList [i] != NULL; i++) {
13038 str = ncrnaClassList [i];
13039 if (StringHasNoText (str)) continue;
13040 len = StringLen (str);
13041 if (len < 1) continue;
13042 if (StringNICmp (ncclass, str, len) != 0) continue;
13043 if (ncclass [len] != ' ') continue;
13044 tmp = ncclass + len + 1;
13045 if (StringHasNoText (tmp)) continue;
13046 ncclass [len] = '\0';
13047 rgp->_class = StringSave (ncclass);
13048 product = StringSave (tmp);
13049 rgp->product = MemFree (rgp->product);
13050 rgp->product = product;
13051 TrimSpacesAroundString (rgp->_class);
13052 TrimSpacesAroundString (rgp->product);
13053 rrp->type = 8;
13054 sfp->idx.subtype = FEATDEF_ncRNA;
13055 }
13056 }
13057
13058 if (rgp->quals != NULL) return;
13059
13060 if (rrp->type == 2 || rrp->type == 4) {
13061 if (StringDoesHaveText (rgp->product) && StringHasNoText (rgp->_class)) {
13062 str = StringSave (rgp->product);
13063 rrp->ext.choice = 1;
13064 rrp->ext.value.ptrvalue = (Pointer) str;
13065 RNAGenFree (rgp);
13066 return;
13067 }
13068 }
13069
13070 if (StringDoesHaveText (rgp->_class) || StringDoesHaveText (rgp->product)) return;
13071
13072 rrp->ext.value.ptrvalue = NULL;
13073 rrp->ext.choice = 0;
13074 RNAGenFree (rgp);
13075 }
13076
13077
FixncRNAClass(SeqFeatPtr sfp)13078 static void FixncRNAClass (SeqFeatPtr sfp)
13079 {
13080 RnaRefPtr rrp;
13081 RNAGenPtr rgp;
13082
13083 if (sfp == NULL || sfp->idx.subtype != FEATDEF_ncRNA
13084 || (rrp = (RnaRefPtr) sfp->data.value.ptrvalue) == NULL
13085 || rrp->ext.choice != 3
13086 || (rgp = (RNAGenPtr) rrp->ext.value.ptrvalue) == NULL)
13087 {
13088 return;
13089 }
13090
13091 if (StringICmp (rgp->_class, "antisense") == 0) {
13092 rgp->_class = MemFree (rgp->_class);
13093 rgp->_class = StringSave ("antisense_RNA");
13094 }
13095 }
13096
13097
MoveBioSourceFeatureNoteToSubSourceNote(SeqFeatPtr sfp)13098 static void MoveBioSourceFeatureNoteToSubSourceNote (SeqFeatPtr sfp)
13099 {
13100 ValNode vn;
13101
13102 if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || StringHasNoText (sfp->comment)) {
13103 return;
13104 }
13105
13106 MemSet (&vn, 0, sizeof (ValNode));
13107 vn.choice = SourceQualChoice_textqual;
13108 vn.data.intvalue = Source_qual_subsource_note;
13109
13110 SetSourceQualInBioSource (sfp->data.value.ptrvalue, &vn, NULL, sfp->comment, ExistingTextOption_append_semi);
13111 sfp->comment = MemFree (sfp->comment);
13112 }
13113
13114
ConsolidateOneLikeSubSourceModifier(SubSourcePtr match_to,Boolean use_semicolon)13115 NLM_EXTERN void ConsolidateOneLikeSubSourceModifier (
13116 SubSourcePtr match_to,
13117 Boolean use_semicolon
13118 )
13119 {
13120 SubSourcePtr prev, index;
13121 Int4 len, num_matches;
13122 CharPtr new_value;
13123
13124 if (match_to == NULL) return;
13125 len = StringLen (match_to->name) + 1;
13126 num_matches = 0;
13127 prev = match_to;
13128 index = match_to->next;
13129 while (index != NULL)
13130 {
13131 if (index->subtype == match_to->subtype && index->name != NULL)
13132 {
13133 len += StringLen (index->name) + 2;
13134 num_matches++;
13135 }
13136 index = index->next;
13137 }
13138 if (num_matches == 0) return;
13139
13140 new_value = MemNew (len * sizeof (char));
13141 if (new_value == NULL) return;
13142
13143 StringCpy (new_value, match_to->name);
13144 index = match_to->next;
13145 while (index != NULL)
13146 {
13147 if (index->subtype == match_to->subtype && index->name != NULL)
13148 {
13149 if (use_semicolon)
13150 {
13151 StringCat (new_value, "; ");
13152 }
13153 else
13154 {
13155 StringCat (new_value, " ");
13156 }
13157 StringCat (new_value, index->name);
13158 prev->next = index->next;
13159 index->next = NULL;
13160 SubSourceFree (index);
13161 index = prev;
13162 }
13163 prev = index;
13164 index = index->next;
13165 }
13166 MemFree (match_to->name);
13167 match_to->name = new_value;
13168 }
13169
13170
ConsolidateOneLikeOrganismModifier(OrgModPtr match_to,Boolean use_semicolon)13171 NLM_EXTERN void ConsolidateOneLikeOrganismModifier (
13172 OrgModPtr match_to,
13173 Boolean use_semicolon
13174 )
13175 {
13176 OrgModPtr prev, index;
13177 Int4 len, num_matches;
13178 CharPtr new_value;
13179
13180 if (match_to == NULL) return;
13181 len = StringLen (match_to->subname) + 1;
13182 num_matches = 0;
13183 prev = match_to;
13184 index = match_to->next;
13185 while (index != NULL)
13186 {
13187 if (index->subtype == match_to->subtype && index->subname != NULL)
13188 {
13189 len += StringLen (index->subname) + 2;
13190 num_matches++;
13191 }
13192 index = index->next;
13193 }
13194 if (num_matches == 0) return;
13195
13196 new_value = MemNew (len * sizeof (char));
13197 if (new_value == NULL) return;
13198
13199 StringCpy (new_value, match_to->subname);
13200 index = match_to->next;
13201 while (index != NULL)
13202 {
13203 if (index->subtype == match_to->subtype && index->subname != NULL)
13204 {
13205 if (use_semicolon)
13206 {
13207 StringCat (new_value, "; ");
13208 }
13209 else
13210 {
13211 StringCat (new_value, " ");
13212 }
13213 StringCat (new_value, index->subname);
13214 prev->next = index->next;
13215 index->next = NULL;
13216 OrgModFree (index);
13217 index = prev;
13218 }
13219 prev = index;
13220 index = index->next;
13221 }
13222 MemFree (match_to->subname);
13223 match_to->subname = new_value;
13224 }
13225
13226 typedef struct reg_feat {
13227 CharPtr feat_key;
13228 CharPtr reg_class;
13229 } RegFeatData, PNTR RegFeatPtr;
13230
13231 static RegFeatData reg_feat_keys [] = {
13232 { "enhancer", "enhancer" },
13233 { "promoter", "promoter" },
13234 { "CAAT_signal", "CAAT_signal" },
13235 { "TATA_signal", "TATA_box" },
13236 { "-35_signal", "minus_35_signal" },
13237 { "-10_signal", "minus_10_signal" },
13238 { "GC_signal", "GC_signal" },
13239 { "RBS", "ribosome_binding_site" },
13240 { "polyA_signal", "polyA_signal_sequence" },
13241 { "attenuator", "attenuator" },
13242 { "terminator", "terminator" },
13243 { "misc_signal", "other" },
13244 { NULL, NULL }
13245 };
13246
ConsolidateBioSourceNotes(BioSourcePtr biop)13247 NLM_EXTERN void ConsolidateBioSourceNotes (BioSourcePtr biop)
13248 {
13249 SubSourcePtr ssp, note_ssp;
13250 OrgModPtr mod, note_mod;
13251
13252 if (biop == NULL) return;
13253
13254 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
13255 {
13256 if (ssp->subtype == 255 && ssp->name != NULL)
13257 {
13258 ConsolidateOneLikeSubSourceModifier (ssp, TRUE);
13259 note_ssp = ssp;
13260 }
13261 }
13262
13263 if (biop->org == NULL || biop->org->orgname == NULL) return;
13264 for (mod = biop->org->orgname->mod; mod != NULL; mod = mod->next)
13265 {
13266 if (mod->subtype == 255 && mod->subname != NULL)
13267 {
13268 ConsolidateOneLikeOrganismModifier (mod, TRUE);
13269 note_mod = mod;
13270 }
13271 }
13272 }
13273
13274
CleanUpSeqFeat(SeqFeatPtr sfp,Boolean isEmblOrDdbj,Boolean isJscan,Boolean stripSerial,Boolean modernizeFeats,ValNodePtr PNTR publist)13275 NLM_EXTERN void CleanUpSeqFeat (
13276 SeqFeatPtr sfp,
13277 Boolean isEmblOrDdbj,
13278 Boolean isJscan,
13279 Boolean stripSerial,
13280 Boolean modernizeFeats,
13281 ValNodePtr PNTR publist
13282 )
13283
13284 {
13285 BioseqPtr bsp;
13286 CodeBreakPtr cbp;
13287 CdRegionPtr crp;
13288 GBQualPtr gbq;
13289 Boolean emptyRNA;
13290 IntFuzzPtr fuzz;
13291 GeneRefPtr grp;
13292 Boolean hasGibbsq;
13293 Boolean hasNulls;
13294 SeqIdPtr id;
13295 ImpFeatPtr ifp;
13296 Int2 j;
13297 MolInfoPtr mip;
13298 CharPtr name;
13299 CharPtr note;
13300 Boolean partial5;
13301 Boolean partial3;
13302 SeqPntPtr pntp;
13303 Uint1 processed;
13304 ProtRefPtr prp;
13305 ValNodePtr psp;
13306 RNAGenPtr rgp;
13307 RNAQualPtr rqp;
13308 RnaRefPtr rrp;
13309 Uint1 rrptype;
13310 CharPtr satellite_type;
13311 SeqDescrPtr sdp;
13312 SeqIntPtr sintp;
13313 SeqIdPtr sip;
13314 SeqLocPtr slp;
13315 CharPtr str;
13316 Uint1 strand;
13317 Boolean sync_mol_info;
13318 tRNAPtr trp;
13319 SeqFeatXrefPtr xref, next, PNTR prevlink;
13320
13321 if (sfp == NULL) return;
13322 crp = NULL;
13323 if (sfp->data.choice == SEQFEAT_IMP) {
13324 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
13325 if (ifp != NULL) {
13326 if (ifp->loc != NULL) {
13327 str = StringStr (ifp->loc, "replace");
13328 if (str != NULL) {
13329 AddReplaceQual (sfp, str);
13330 ifp->loc = MemFree (ifp->loc);
13331 }
13332 }
13333 if (StringCmp (ifp->key, "CDS") == 0) {
13334 if (! isEmblOrDdbj) {
13335 sfp->data.value.ptrvalue = ImpFeatFree (ifp);
13336 sfp->data.choice = SEQFEAT_CDREGION;
13337 crp = CdRegionNew ();
13338 sfp->data.value.ptrvalue = crp;
13339 sfp->idx.subtype = FEATDEF_CDS;
13340 }
13341 } else if (StringCmp (ifp->key, "allele") == 0 ||
13342 StringCmp (ifp->key, "mutation") == 0) {
13343 ifp->key = MemFree (ifp->key);
13344 ifp->key = StringSave ("variation");
13345 sfp->idx.subtype = FEATDEF_variation;
13346 } else if (StringCmp (ifp->key, "Import") == 0 ||
13347 StringCmp (ifp->key, "virion") == 0) {
13348 ifp->key = MemFree (ifp->key);
13349 ifp->key = StringSave ("misc_feature");
13350 sfp->idx.subtype = FEATDEF_misc_feature;
13351 } else if (StringCmp (ifp->key, "repeat_unit") == 0 ) {
13352 ifp->key = MemFree (ifp->key);
13353 ifp->key = StringSave ("repeat_region");
13354 sfp->idx.subtype = FEATDEF_repeat_region;
13355 } else if (StringCmp (ifp->key, "misc_bind") == 0) {
13356 ifp->key = MemFree (ifp->key);
13357 ifp->key = StringSave ("misc_binding");
13358 sfp->idx.subtype = FEATDEF_misc_binding;
13359 } else if (StringCmp (ifp->key, "satellite") == 0 && (! isEmblOrDdbj)) {
13360 ifp->key = MemFree (ifp->key);
13361 ifp->key = StringSave ("repeat_region");
13362 sfp->idx.subtype = FEATDEF_repeat_region;
13363 gbq = GBQualNew ();
13364 if (gbq != NULL) {
13365 gbq->qual = StringSave ("satellite");
13366 gbq->val = ExtractSatelliteFromComment (sfp->comment);
13367 if (gbq->val == NULL) {
13368 gbq->val = StringSave ("satellite");
13369 }
13370 gbq->next = sfp->qual;
13371 sfp->qual = gbq;
13372 }
13373 } else if (StringCmp (ifp->key, "LTR") == 0) {
13374 ifp->key = MemFree (ifp->key);
13375 ifp->key = StringSave ("repeat_region");
13376 sfp->idx.subtype = FEATDEF_repeat_region;
13377 gbq = GBQualNew ();
13378 if (gbq != NULL) {
13379 gbq->qual = StringSave ("rpt_type");
13380 gbq->val = StringSave ("long_terminal_repeat");
13381 gbq->next = sfp->qual;
13382 sfp->qual = gbq;
13383 }
13384 } else if (StringHasNoText (ifp->loc)) {
13385 rrptype = 0;
13386 if (StringCmp (ifp->key, "precursor_RNA") == 0) {
13387 rrptype = 1;
13388 } else if (StringCmp (ifp->key, "mRNA") == 0) {
13389 rrptype = 2;
13390 } else if (StringCmp (ifp->key, "tRNA") == 0) {
13391 rrptype = 3;
13392 } else if (StringCmp (ifp->key, "rRNA") == 0) {
13393 rrptype = 4;
13394 } else if (StringCmp (ifp->key, "snRNA") == 0) {
13395 rrptype = 5;
13396 } else if (StringCmp (ifp->key, "scRNA") == 0) {
13397 rrptype = 6;
13398 } else if (StringCmp (ifp->key, "snoRNA") == 0) {
13399 rrptype = 7;
13400 } else if (StringCmp (ifp->key, "misc_RNA") == 0) {
13401 rrptype = 255;
13402 }
13403 if (rrptype != 0) {
13404 sfp->data.value.ptrvalue = ImpFeatFree (ifp);
13405 sfp->data.choice = SEQFEAT_RNA;
13406 rrp = RnaRefNew ();
13407 sfp->data.value.ptrvalue = rrp;
13408 rrp->type = rrptype;
13409 sfp->idx.subtype = FindFeatDefType (sfp);
13410 } else {
13411 processed = 0;
13412 if (StringCmp (ifp->key, "proprotein") == 0 || StringCmp (ifp->key, "preprotein") == 0) {
13413 processed = 1;
13414 } else if (StringCmp (ifp->key, "mat_peptide") == 0) {
13415 processed = 2;
13416 } else if (StringCmp (ifp->key, "sig_peptide") == 0) {
13417 processed = 3;
13418 } else if (StringCmp (ifp->key, "transit_peptide") == 0) {
13419 processed = 4;
13420 } else if (StringCmp (ifp->key, "propeptide") == 0 || StringCmp (ifp->key, "pro_peptide") == 0) {
13421 processed = 5;
13422 }
13423 if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) {
13424 bsp = BioseqFind (SeqLocId (sfp->location));
13425 if (bsp != NULL && ISA_aa (bsp->mol)) {
13426 sfp->data.value.ptrvalue = ImpFeatFree (ifp);
13427 sfp->data.choice = SEQFEAT_PROT;
13428 prp = ProtRefNew ();
13429 sfp->data.value.ptrvalue = prp;
13430 prp->processed = processed;
13431 sfp->idx.subtype = FindFeatDefType (sfp);
13432 }
13433 }
13434 }
13435 }
13436 if (sfp->data.choice == SEQFEAT_IMP && StringCmp (ifp->key, "repeat_region") == 0 && (! isEmblOrDdbj)) {
13437 satellite_type = ExtractSatelliteFromComment (sfp->comment);
13438 if (satellite_type != NULL) {
13439 gbq = GBQualNew ();
13440 if (gbq != NULL) {
13441 gbq->qual = StringSave ("satellite");
13442 gbq->val = satellite_type;
13443 gbq->next = sfp->qual;
13444 sfp->qual = gbq;
13445 }
13446 }
13447 }
13448 }
13449 }
13450 if (crp != NULL && crp->frame == 0 && (! sfp->pseudo)) {
13451 crp->frame = GetFrameFromLoc (sfp->location);
13452 }
13453 if (sfp->data.choice == SEQFEAT_IMP) {
13454 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
13455 if (ifp != NULL) {
13456 for (j = 0; reg_feat_keys [j].feat_key != NULL; j++) {
13457 if (StringICmp (ifp->key, reg_feat_keys [j].feat_key) == 0) {
13458 ifp->key = MemFree (ifp->key);
13459 ifp->key = StringSave ("regulatory");
13460 sfp->idx.subtype = FEATDEF_regulatory;
13461 gbq = GBQualNew ();
13462 if (gbq != NULL) {
13463 gbq->qual = StringSave ("regulatory_class");
13464 gbq->val = StringSave (reg_feat_keys [j].reg_class);
13465 gbq->next = sfp->qual;
13466 sfp->qual = gbq;
13467 }
13468 break;
13469 }
13470 }
13471 }
13472 }
13473 if (sfp->data.choice == SEQFEAT_IMP) {
13474 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
13475 if (ifp != NULL && StringCmp (ifp->key, "regulatory") == 0) {
13476 note = NULL;
13477 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
13478 if (StringCmp (gbq->qual, "regulatory_class") != 0) continue;
13479 str = StringChr (gbq->val, ':');
13480 if (str == NULL) continue;
13481 if (StringNCmp (gbq->val, "other:", 6) == 0) continue;
13482 *str = '\0';
13483 str++;
13484 TrimSpacesAroundString (str);
13485 if (StringHasNoText (str)) continue;
13486 note = str;
13487 }
13488 if (StringDoesHaveText (note)) {
13489 gbq = GBQualNew ();
13490 if (gbq != NULL) {
13491 gbq->qual = StringSave ("note");
13492 gbq->val = StringSave (note);
13493 gbq->next = sfp->qual;
13494 sfp->qual = gbq;
13495 }
13496 }
13497 }
13498 }
13499 if (sfp->data.choice == SEQFEAT_RNA) {
13500 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
13501 if (rrp != NULL) {
13502 if (rrp->ext.choice == 1) {
13503 name = (CharPtr) rrp->ext.value.ptrvalue;
13504 if (StringHasNoText (name)) {
13505 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
13506 rrp->ext.choice = 0;
13507 }
13508 } else if (rrp->ext.choice == 2) {
13509 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
13510 if (trp != NULL) {
13511 if (trp->aatype == 0 && trp->aa == 0 && trp->anticodon == NULL) {
13512 emptyRNA = TRUE;
13513 for (j = 0; j < 6; j++) {
13514 if (trp->codon [j] != 255) {
13515 emptyRNA = FALSE;
13516 }
13517 }
13518 if (emptyRNA) {
13519 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
13520 rrp->ext.choice = 0;
13521 }
13522 }
13523 }
13524 } else if (rrp->ext.choice == 3) {
13525 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
13526 if (rgp != NULL) {
13527 if (StringHasNoText (rgp->_class) && StringHasNoText (rgp->product)) {
13528 emptyRNA = TRUE;
13529 for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
13530 if (StringDoesHaveText (rqp->qual) && StringDoesHaveText (rqp->val)) {
13531 emptyRNA = FALSE;
13532 }
13533 }
13534 if (emptyRNA) {
13535 rrp->ext.value.ptrvalue = RNAGenFree (rrp->ext.value.ptrvalue);
13536 rrp->ext.choice = 0;
13537 }
13538 }
13539 }
13540 }
13541 }
13542 }
13543 ModernizeFeatureGBQuals (sfp);
13544 sfp->qual = SortFeatureGBQuals (sfp->qual);
13545 CleanupDuplicateGBQuals (&(sfp->qual));
13546 CleanupFeatureGBQuals (sfp, isEmblOrDdbj);
13547 sfp->qual = SortIllegalGBQuals (sfp->qual);
13548 CleanupFeatureStrings (sfp, isJscan, isEmblOrDdbj, stripSerial, modernizeFeats, publist);
13549 FixOldDbxrefs (sfp->dbxref, isEmblOrDdbj);
13550 FixNumericDbxrefs (sfp->dbxref);
13551 sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref);
13552 CleanupDuplicateDbxrefs (&(sfp->dbxref));
13553 CleanupObsoleteDbxrefs (&(sfp->dbxref));
13554 CleanupGoDbxrefs (sfp->dbxref);
13555 psp = sfp->cit;
13556 if (psp != NULL && psp->data.ptrvalue) {
13557 psp->data.ptrvalue = ValNodeSort ((ValNodePtr) psp->data.ptrvalue, SortCits);
13558 CleanupDuplicateCits ((ValNodePtr PNTR) &(psp->data.ptrvalue));
13559 }
13560 CleanUpSeqLoc (sfp->location);
13561 strand = SeqLocStrand (sfp->location);
13562 id = SeqLocId (sfp->location);
13563 if (sfp->data.choice == SEQFEAT_GENE) {
13564 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
13565 if (grp != NULL) {
13566 if (grp->pseudo) {
13567 sfp->pseudo = TRUE;
13568 grp->pseudo = FALSE;
13569 }
13570 }
13571 }
13572 if (sfp->data.choice == SEQFEAT_CDREGION) {
13573 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
13574 if (crp != NULL) {
13575 crp->code_break = SortCodeBreaks (sfp, crp->code_break);
13576 CleanupDuplicatedCodeBreaks (&(crp->code_break));
13577 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
13578 CleanUpSeqLoc (cbp->loc);
13579 if (strand == Seq_strand_minus && id != NULL) {
13580 slp = cbp->loc;
13581 if (slp != NULL && slp->choice == SEQLOC_INT) {
13582 sip = SeqLocId (slp);
13583 if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
13584 sintp = (SeqIntPtr) slp->data.ptrvalue;
13585 if (sintp != NULL) {
13586 sintp->strand = Seq_strand_minus;
13587 }
13588 }
13589 }
13590 }
13591 }
13592 }
13593 }
13594 if (sfp->data.choice == SEQFEAT_RNA) {
13595 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
13596 if (rrp != NULL) {
13597 if (rrp->pseudo) {
13598 sfp->pseudo = TRUE;
13599 rrp->pseudo = FALSE;
13600 }
13601 }
13602 if (rrp != NULL && rrp->ext.choice == 2) {
13603 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
13604 if (trp != NULL && trp->anticodon != NULL) {
13605 CleanUpSeqLoc (trp->anticodon);
13606 if (strand == Seq_strand_minus && id != NULL) {
13607 slp = trp->anticodon;
13608 if (slp != NULL && slp->choice == SEQLOC_INT) {
13609 sip = SeqLocId (slp);
13610 if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
13611 sintp = (SeqIntPtr) slp->data.ptrvalue;
13612 if (sintp != NULL) {
13613 sintp->strand = Seq_strand_minus;
13614 }
13615 }
13616 }
13617 }
13618 }
13619 }
13620 if (ConvertToNcRNA (sfp)) {
13621 sfp->idx.subtype = FindFeatDefType (sfp);
13622 }
13623 if (sfp->idx.subtype == FEATDEF_ncRNA) {
13624 FixncRNAClass (sfp);
13625 }
13626 }
13627 if (sfp->data.choice == SEQFEAT_PROT) {
13628 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
13629 if (prp != NULL && sfp->partial) {
13630 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
13631 if (! partial5 && ! partial3) {
13632 bsp = BioseqFind (SeqLocId (sfp->location));
13633 if (bsp != NULL && ISA_aa (bsp->mol)) {
13634 hasGibbsq = FALSE;
13635 for (sip = bsp->id; sip != NULL; sip = sip->next) {
13636 if (sip->choice == SEQID_GIBBSQ) {
13637 hasGibbsq = TRUE;
13638 }
13639 }
13640 if (hasGibbsq) {
13641 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_title, NULL);
13642 if (sdp != NULL && sdp->choice == Seq_descr_title) {
13643 str = (CharPtr) sdp->data.ptrvalue;
13644 if (StringDoesHaveText (str)) {
13645 sync_mol_info = FALSE;
13646 if (StringStr (str, "{N-terminal}") != NULL) {
13647 partial3 = TRUE;
13648 sync_mol_info = TRUE;
13649 } else if (StringStr (str, "{C-terminal}") != NULL) {
13650 partial5 = TRUE;
13651 sync_mol_info = TRUE;
13652 }
13653 if (sync_mol_info) {
13654 SetSeqLocPartial (sfp->location, partial5, partial3);
13655 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
13656 if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
13657 mip = (MolInfoPtr) sdp->data.ptrvalue;
13658 if (mip != NULL) {
13659 if (partial5 && partial3) {
13660 mip->completeness = 5;
13661 } else if (partial5) {
13662 mip->completeness = 3;
13663 } else if (partial3) {
13664 mip->completeness = 4;
13665 } else if (sfp->partial) {
13666 mip->completeness = 2;
13667 } else {
13668 mip->completeness = 0;
13669 }
13670 }
13671 }
13672 }
13673 }
13674 }
13675 }
13676 }
13677 }
13678 }
13679 }
13680 if (sfp->data.choice == SEQFEAT_REGION ||
13681 sfp->data.choice == SEQFEAT_SITE ||
13682 sfp->data.choice == SEQFEAT_BOND ||
13683 sfp->data.choice == SEQFEAT_PROT) {
13684 bsp = BioseqFind (SeqLocId (sfp->location));
13685 if (bsp != NULL && ISA_aa (bsp->mol)) {
13686 slp = SeqLocFindNext (sfp->location, NULL);
13687 while (slp != NULL) {
13688 if (slp->choice == SEQLOC_INT) {
13689 sintp = (SeqIntPtr) slp->data.ptrvalue;
13690 if (sintp != NULL) {
13691 if (sintp->strand != Seq_strand_unknown) {
13692 sintp->strand = Seq_strand_unknown;
13693 }
13694 }
13695 } else if (slp->choice == SEQLOC_PNT) {
13696 pntp = (SeqPntPtr) slp->data.ptrvalue;
13697 if (pntp->strand != Seq_strand_unknown) {
13698 pntp->strand = Seq_strand_unknown;
13699 }
13700 }
13701 slp = SeqLocFindNext (sfp->location, slp);
13702 }
13703 }
13704 }
13705 if (sfp->data.choice == SEQFEAT_BIOSRC) {
13706 /* combine multiple orgmod or subsource note qualifiers */
13707 ConsolidateBioSourceNotes(sfp->data.value.ptrvalue);
13708 /* if a BioSource feature has a comment, move the comment to
13709 * a subsource note.
13710 */
13711 MoveBioSourceFeatureNoteToSubSourceNote(sfp);
13712 }
13713
13714 ModernizeFeatureStrings (sfp, isEmblOrDdbj);
13715
13716 if (sfp->data.choice == SEQFEAT_GENE) {
13717 if (modernizeFeats) {
13718 ModernizeGeneFields (sfp);
13719 }
13720 }
13721
13722 if (sfp->data.choice == SEQFEAT_RNA) {
13723 if (modernizeFeats) {
13724 DoModernizeRNAFields (sfp);
13725 }
13726 }
13727
13728 if (IsFeatureCommentRedundant (sfp)) {
13729 sfp->comment = MemFree (sfp->comment);
13730 }
13731
13732 /* sort and unique gbquals again after recent processing */
13733 sfp->qual = SortFeatureGBQuals (sfp->qual);
13734 CleanupDuplicateGBQuals (&(sfp->qual));
13735 sfp->qual = SortIllegalGBQuals (sfp->qual);
13736
13737 /* normalize Seq-point fuzz tl to tr and decrement position */
13738 slp = SeqLocFindNext (sfp->location, NULL);
13739 for (slp = SeqLocFindNext (sfp->location, NULL);
13740 slp != NULL;
13741 slp = SeqLocFindNext (sfp->location, slp)) {
13742 if (slp->choice != SEQLOC_PNT) continue;
13743 pntp = (SeqPntPtr) slp->data.ptrvalue;
13744 if (pntp == NULL) continue;
13745 fuzz = pntp->fuzz;
13746 if (fuzz == NULL) continue;
13747 if (fuzz->choice == 4 /* lim */ && fuzz->a == 4 /* tl */ && pntp->point > 0) {
13748 (pntp->point)--;
13749 fuzz->a = 3; /* tr */
13750 }
13751 }
13752
13753 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
13754 hasNulls = LocationHasNullsBetween (sfp->location);
13755 sfp->partial = (sfp->partial || partial5 || partial3 || (hasNulls && ! isEmblOrDdbj));
13756
13757 prevlink = (SeqFeatXrefPtr PNTR) &(sfp->xref);
13758 xref = sfp->xref;
13759 while (xref != NULL) {
13760 next = xref->next;
13761
13762 if (xref->id.choice == 0 && xref->data.choice == 0) {
13763 *prevlink = xref->next;
13764 xref->next = NULL;
13765 MemFree (xref);
13766 } else {
13767 prevlink = (SeqFeatXrefPtr PNTR) &(xref->next);
13768 }
13769
13770 xref = next;
13771 }
13772 }
13773
13774
CleanUpSeqGraph(SeqGraphPtr sgp)13775 static void CleanUpSeqGraph (SeqGraphPtr sgp)
13776
13777 {
13778 if (sgp == NULL) return;
13779 if (sgp->loc != NULL) {
13780 CleanUpSeqLoc (sgp->loc);
13781 }
13782 }
13783
RemoveZeroLengthSeqLits(BioseqPtr bsp)13784 static void RemoveZeroLengthSeqLits (BioseqPtr bsp)
13785 {
13786 DeltaSeqPtr dsp, prev = NULL, dsp_next;
13787 SeqLitPtr slip;
13788
13789 if (bsp == NULL || bsp->repr != Seq_repr_delta) {
13790 return;
13791 }
13792
13793 for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp_next) {
13794 dsp_next = dsp->next;
13795 if (dsp->choice == 2 && (slip = (SeqLitPtr) (dsp->data.ptrvalue)) != NULL
13796 && slip->length == 0 && slip->seq_data_type == 1
13797 && slip->seq_data != NULL) {
13798 if (prev == NULL) {
13799 bsp->seq_ext = dsp->next;
13800 } else {
13801 prev->next = dsp->next;
13802 }
13803 dsp->next = NULL;
13804 dsp = DeltaSeqFree (dsp);
13805 } else {
13806 prev = dsp;
13807 }
13808 }
13809 }
13810
13811 /*
13812 static Boolean CleanUpObjId (ObjectIdPtr oip)
13813
13814 {
13815 size_t len;
13816 CharPtr ptr;
13817 Boolean rval = FALSE;
13818 long val;
13819
13820 if (oip == NULL) return FALSE;
13821 if (StringDoesHaveText (oip->str)) {
13822 if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
13823 TrimSpacesAroundString (oip->str);
13824 rval = TRUE;
13825 }
13826 }
13827 ptr = oip->str;
13828 if (ptr != NULL && *ptr != '0' && StringIsAllDigits(ptr)) {
13829 len = StringLen (ptr);
13830 if (len < 10 || (len == 10 && StringCmp (ptr, "2147483647") <= 0)) {
13831 if (sscanf (oip->str, "%ld", &val) == 1) {
13832 oip->id = (Int4) val;
13833 oip->str = MemFree (oip->str);
13834 rval = TRUE;
13835 }
13836 }
13837 }
13838 return rval;
13839 }
13840
13841 static Boolean CleanUpSeqIdText (SeqIdPtr sip)
13842 {
13843 DbtagPtr dbt;
13844 ObjectIdPtr oip;
13845 Boolean rval = FALSE;
13846
13847 if (sip == NULL) return FALSE;
13848 if (sip->choice == SEQID_LOCAL) {
13849 oip = (ObjectIdPtr) sip->data.ptrvalue;
13850 if (oip != NULL) {
13851 if (CleanUpObjId (oip)) {
13852 rval = TRUE;
13853 }
13854 }
13855 } else if (sip->choice == SEQID_GENERAL) {
13856 dbt = (DbtagPtr) sip->data.ptrvalue;
13857 if (dbt != NULL) {
13858 oip = dbt->tag;
13859 if (oip != NULL) {
13860 if (CleanUpObjId (oip)) {
13861 rval = TRUE;
13862 }
13863 }
13864 }
13865 }
13866 return rval;
13867 }
13868 */
13869
13870
CleanUpSeqIdText(SeqIdPtr sip)13871 static Boolean CleanUpSeqIdText (SeqIdPtr sip)
13872 {
13873 ObjectIdPtr oip;
13874 Boolean rval = FALSE;
13875
13876 if (sip == NULL) return FALSE;
13877 if (sip->choice == SEQID_LOCAL) {
13878 oip = (ObjectIdPtr) sip->data.ptrvalue;
13879 if (oip != NULL) {
13880 if (StringDoesHaveText (oip->str)) {
13881 if (isspace (oip->str[0]) || isspace (oip->str[StringLen (oip->str) - 1])) {
13882 TrimSpacesAroundString (oip->str);
13883 rval = TRUE;
13884 }
13885 }
13886 }
13887 }
13888 return rval;
13889 }
13890
CleanUpSeqId(SeqIdPtr sip,Pointer userdata)13891 static void CleanUpSeqId (
13892 SeqIdPtr sip,
13893 Pointer userdata
13894 )
13895
13896 {
13897 CleanUpSeqIdText (sip);
13898 }
13899
CleanSeqIdInBioseq(BioseqPtr bsp,Pointer userdata)13900 static void CleanSeqIdInBioseq (BioseqPtr bsp, Pointer userdata)
13901
13902 {
13903 SeqIdPtr sip;
13904 Boolean need_reindex = FALSE;
13905
13906 for (sip = bsp->id; sip != NULL; sip = sip->next) {
13907 if (CleanUpSeqIdText (sip)) {
13908 need_reindex = TRUE;
13909 }
13910 }
13911 if (need_reindex) {
13912 SeqMgrReplaceInBioseqIndex (bsp);
13913 }
13914 }
13915
CleanSeqIdInSeqFeat(SeqFeatPtr sfp,Pointer userdata)13916 static void CleanSeqIdInSeqFeat (SeqFeatPtr sfp, Pointer userdata)
13917
13918 {
13919 VisitSeqIdsInSeqFeat (sfp, NULL, CleanUpSeqId);
13920 }
13921
CleanSeqIdInSeqAlign(SeqAlignPtr sap,Pointer userdata)13922 static void CleanSeqIdInSeqAlign (SeqAlignPtr sap, Pointer userdata)
13923
13924 {
13925 VisitSeqIdsInSeqAlign (sap, NULL, CleanUpSeqId);
13926 }
13927
CleanSeqIdInSeqGraph(SeqGraphPtr sgp,Pointer userdata)13928 static void CleanSeqIdInSeqGraph (SeqGraphPtr sgp, Pointer userdata)
13929
13930 {
13931 VisitSeqIdsInSeqGraph (sgp, NULL, CleanUpSeqId);
13932 }
13933
CleanSeqIdInSeqAnnot(SeqAnnotPtr annot,Pointer userdata)13934 static void CleanSeqIdInSeqAnnot (SeqAnnotPtr annot, Pointer userdata)
13935
13936 {
13937 VisitSeqIdsInSeqAnnot (annot, NULL, CleanUpSeqId);
13938 }
13939
13940 typedef struct npcounts {
13941 Int4 nucs;
13942 Int4 prots;
13943 Boolean make_genbank;
13944 } NPCounts, PNTR NPCountsPtr;
13945
CountNucsAndProts(BioseqPtr bsp,Pointer userdata)13946 static void CountNucsAndProts (BioseqPtr bsp, Pointer userdata)
13947
13948 {
13949 NPCountsPtr ncp;
13950
13951 if (bsp == NULL) return;
13952 ncp = (NPCountsPtr) userdata;
13953 if (ncp == NULL) return;
13954
13955 if (ISA_na (bsp->mol)) {
13956 (ncp->nucs)++;
13957 } else if (ISA_aa (bsp->mol)) {
13958 (ncp->prots)++;
13959 }
13960 }
13961
CheckInnerSets(BioseqSetPtr bssp,Pointer userdata)13962 static void CheckInnerSets (BioseqSetPtr bssp, Pointer userdata)
13963
13964 {
13965 NPCountsPtr ncp;
13966
13967 if (bssp == NULL) return;
13968 ncp = (NPCountsPtr) userdata;
13969 if (ncp == NULL) return;
13970
13971 if (bssp->_class == BioseqseqSet_class_segset || bssp->_class == BioseqseqSet_class_parts) return;
13972 ncp->make_genbank = TRUE;
13973 }
13974
FixBadSetClass(BioseqSetPtr bssp,Pointer userdata)13975 static void FixBadSetClass (BioseqSetPtr bssp, Pointer userdata)
13976
13977 {
13978 NPCounts nc;
13979
13980 if (bssp == NULL) return;
13981 if (bssp->_class != BioseqseqSet_class_not_set && bssp->_class != BioseqseqSet_class_other) return;
13982
13983 MemSet ((Pointer) &nc, 0, sizeof (NPCounts));
13984 VisitSequencesInSet (bssp, (Pointer) &nc, VISIT_MAINS, CountNucsAndProts);
13985 VisitSetsInSet (bssp, (Pointer) &nc, CheckInnerSets);
13986 if (nc.nucs == 1 && nc.prots > 0 && (! nc.make_genbank)) {
13987 bssp->_class = BioseqseqSet_class_nuc_prot;
13988 } else {
13989 bssp->_class = BioseqseqSet_class_genbank;
13990 }
13991 }
13992
RemoveDuplicateSeqIds(BioseqPtr bsp)13993 static void RemoveDuplicateSeqIds (BioseqPtr bsp)
13994
13995 {
13996 SeqIdPtr sip, sip_cmp, sip_prev, sip_next;
13997
13998 if (bsp == NULL) {
13999 return;
14000 }
14001
14002 for (sip = bsp->id; sip != NULL; sip = sip->next) {
14003 sip_prev = sip;
14004 for (sip_cmp = sip->next; sip_cmp != NULL; sip_cmp = sip_next) {
14005 sip_next = sip_cmp->next;
14006 if (SeqIdComp (sip, sip_cmp) == SIC_YES) {
14007 sip_prev->next = sip_cmp->next;
14008 sip_cmp->next = NULL;
14009 sip_cmp = SeqIdFree (sip_cmp);
14010 } else {
14011 sip_prev = sip_cmp;
14012 }
14013 }
14014 }
14015 }
14016
14017
BasicSeqEntryCleanupInternal(SeqEntryPtr sep,ValNodePtr PNTR publist,Boolean isEmblOrDdbj,Boolean isJscan,Boolean stripSerial)14018 static void BasicSeqEntryCleanupInternal (
14019 SeqEntryPtr sep,
14020 ValNodePtr PNTR publist,
14021 Boolean isEmblOrDdbj,
14022 Boolean isJscan,
14023 Boolean stripSerial
14024 )
14025
14026 {
14027 BioSourcePtr biop;
14028 BioseqPtr bsp;
14029 BioseqSetPtr bssp;
14030 SeqDescrPtr desc;
14031 Char div [10];
14032 GBBlockPtr gbp;
14033 MolInfoPtr mip;
14034 OrgNamePtr onp;
14035 OrgRefPtr orp;
14036 SeqAnnotPtr sap = NULL;
14037 ValNodePtr sdp = NULL;
14038 SeqFeatPtr sfp;
14039 SeqGraphPtr sgp;
14040 SeqEntryPtr tmp;
14041
14042 if (sep == NULL) return;
14043 if (IS_Bioseq (sep)) {
14044 bsp = (BioseqPtr) sep->data.ptrvalue;
14045 if (bsp == NULL) return;
14046 /* remove duplicate SeqIds on the same Bioseq */
14047 RemoveDuplicateSeqIds (bsp);
14048
14049 /* repair damaged delta sequences */
14050 RemoveZeroLengthSeqLits (bsp);
14051
14052 sap = bsp->annot;
14053 sdp = bsp->descr;
14054 desc = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
14055 if (desc != NULL && desc->choice == Seq_descr_molinfo) {
14056 mip = (MolInfoPtr) desc->data.ptrvalue;
14057 if (mip != NULL) {
14058 /* repair if bsp.mol is not-set */
14059 if (bsp->mol == 0) {
14060 switch (mip->biomol) {
14061 case MOLECULE_TYPE_GENOMIC :
14062 bsp->mol = Seq_mol_na;
14063 break;
14064 case MOLECULE_TYPE_PRE_MRNA :
14065 case MOLECULE_TYPE_MRNA :
14066 case MOLECULE_TYPE_RRNA :
14067 case MOLECULE_TYPE_TRNA :
14068 case MOLECULE_TYPE_SNRNA :
14069 case MOLECULE_TYPE_SCRNA :
14070 case MOLECULE_TYPE_CRNA :
14071 case MOLECULE_TYPE_SNORNA :
14072 case MOLECULE_TYPE_TRANSCRIBED_RNA :
14073 case MOLECULE_TYPE_NCRNA :
14074 case MOLECULE_TYPE_TMRNA :
14075 bsp->mol = Seq_mol_rna;
14076 break;
14077 case MOLECULE_TYPE_PEPTIDE :
14078 bsp->mol = Seq_mol_aa;
14079 break;
14080 case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
14081 bsp->mol = Seq_mol_other;
14082 break;
14083 case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
14084 bsp->mol = Seq_mol_na;
14085 break;
14086 default :
14087 break;
14088 }
14089 } else if (bsp->mol != Seq_mol_rna
14090 && (mip->biomol == MOLECULE_TYPE_CRNA || mip->biomol == MOLECULE_TYPE_MRNA)) {
14091 bsp->mol = Seq_mol_rna;
14092 }
14093 }
14094 }
14095 } else if (IS_Bioseq_set (sep)) {
14096 bssp = (BioseqSetPtr) sep->data.ptrvalue;
14097 if (bssp == NULL) return;
14098 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
14099 BasicSeqEntryCleanupInternal (tmp, publist, isEmblOrDdbj, isJscan, stripSerial);
14100 }
14101 sap = bssp->annot;
14102 sdp = bssp->descr;
14103 } else return;
14104 biop = NULL;
14105 orp = NULL;
14106 gbp = NULL;
14107 div [0] = '\0';
14108 while (sap != NULL) {
14109 if (sap->type == 1) {
14110 sfp = (SeqFeatPtr) sap->data;
14111 while (sfp != NULL) {
14112 CleanUpSeqFeat (sfp, isEmblOrDdbj, isJscan, stripSerial, TRUE, publist);
14113 sfp = sfp->next;
14114 }
14115 } else if (sap->type == 3) {
14116 sgp = (SeqGraphPtr) sap->data;
14117 while (sgp != NULL) {
14118 CleanUpSeqGraph (sgp);
14119 sgp = sgp->next;
14120 }
14121 }
14122 sap = sap->next;
14123 }
14124 while (sdp != NULL) {
14125 switch (sdp->choice) {
14126 case Seq_descr_org :
14127 orp = (OrgRefPtr) sdp->data.ptrvalue;
14128 break;
14129 case Seq_descr_genbank :
14130 gbp = (GBBlockPtr) sdp->data.ptrvalue;
14131 break;
14132 case Seq_descr_source :
14133 biop = (BioSourcePtr) sdp->data.ptrvalue;
14134 if (biop != NULL) {
14135 orp = biop->org;
14136 }
14137 break;
14138 default :
14139 break;
14140 }
14141 CleanupDescriptorStrings (sdp, stripSerial, TRUE, publist, isEmblOrDdbj);
14142 sdp = sdp->next;
14143 }
14144
14145 /* copy genbank block division into biosource, if necessary */
14146
14147 if (orp != NULL && gbp != NULL) {
14148 StringNCpy_0 (div, gbp->div, sizeof (div));
14149 if (StringHasNoText (div)) return;
14150 onp = orp->orgname;
14151 while (onp != NULL) {
14152 if (StringHasNoText (onp->div)) {
14153 onp->div = MemFree (onp->div);
14154 onp->div = StringSaveNoNull (div);
14155 }
14156 onp = onp->next;
14157 }
14158 }
14159 }
14160
ReplaceCitOnFeat(CitGenPtr cgp,ValNodePtr publist)14161 static void ReplaceCitOnFeat (CitGenPtr cgp, ValNodePtr publist)
14162
14163 {
14164 ValNodePtr nxt;
14165 ValNodePtr vnp;
14166
14167 for (vnp = publist; vnp != NULL; vnp = vnp->next) {
14168 if (vnp->choice != 1) continue;
14169 if (StringCmp (cgp->cit, (CharPtr) vnp->data.ptrvalue) == 0) {
14170 nxt = vnp->next;
14171 if (nxt != NULL && nxt->choice == 2) {
14172 cgp->cit = MemFree (cgp->cit);
14173 cgp->cit = StringSaveNoNull ((CharPtr) nxt->data.ptrvalue);
14174 if (cgp->cit != NULL) {
14175 if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
14176 cgp->cit [0] = 'U';
14177 }
14178 }
14179 }
14180 return;
14181 }
14182 }
14183 }
14184
ChangeCitsOnFeats(SeqFeatPtr sfp,Pointer userdata)14185 static void ChangeCitsOnFeats (SeqFeatPtr sfp, Pointer userdata)
14186
14187 {
14188 CitGenPtr cgp;
14189 ValNodePtr ppr;
14190 ValNodePtr psp;
14191 ValNodePtr vnp;
14192
14193 psp = sfp->cit;
14194 if (psp != NULL && psp->data.ptrvalue) {
14195 for (ppr = (ValNodePtr) psp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
14196 vnp = NULL;
14197 if (ppr->choice == PUB_Gen) {
14198 vnp = ppr;
14199 } else if (ppr->choice == PUB_Equiv) {
14200 for (vnp = (ValNodePtr) ppr->data.ptrvalue;
14201 vnp != NULL && vnp->choice != PUB_Gen;
14202 vnp = vnp->next) continue;
14203 }
14204 if (vnp != NULL && vnp->choice == PUB_Gen) {
14205 cgp = (CitGenPtr) vnp->data.ptrvalue;
14206 if (cgp != NULL && (! StringHasNoText (cgp->cit))) {
14207 ReplaceCitOnFeat (cgp, (ValNodePtr) userdata);
14208 }
14209 }
14210 }
14211 }
14212 }
14213
GetPmidForMuid(ValNodePtr pairlist,Int4 muid)14214 static Int4 GetPmidForMuid (ValNodePtr pairlist, Int4 muid)
14215
14216 {
14217 ValNodePtr vnp;
14218
14219 vnp = pairlist;
14220 while (vnp != NULL) {
14221 if (muid == vnp->data.intvalue) {
14222 vnp = vnp->next;
14223 if (vnp == NULL) return 0;
14224 return vnp->data.intvalue;
14225 } else {
14226 vnp = vnp->next;
14227 if (vnp == NULL) return 0;
14228 vnp = vnp->next;
14229 }
14230 }
14231
14232 return 0;
14233 }
14234
ChangeFeatCitsToPmid(SeqFeatPtr sfp,Pointer userdata)14235 static void ChangeFeatCitsToPmid (SeqFeatPtr sfp, Pointer userdata)
14236
14237 {
14238 Int4 muid = 0;
14239 Int4 pmid = 0;
14240 ValNodePtr ppr;
14241 ValNodePtr psp;
14242 ValNodePtr vnp;
14243
14244 psp = sfp->cit;
14245 if (psp != NULL && psp->data.ptrvalue) {
14246 for (ppr = (ValNodePtr) psp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
14247 vnp = NULL;
14248 if (ppr->choice == PUB_Muid) {
14249 vnp = ppr;
14250 } else if (ppr->choice == PUB_Equiv) {
14251 for (vnp = (ValNodePtr) ppr->data.ptrvalue;
14252 vnp != NULL && vnp->choice != PUB_Muid;
14253 vnp = vnp->next) continue;
14254 }
14255 if (vnp != NULL && vnp->choice == PUB_Muid) {
14256 muid = vnp->data.intvalue;
14257 if (muid != 0) {
14258 pmid = GetPmidForMuid ((ValNodePtr) userdata, muid);
14259 if (pmid != 0) {
14260 vnp->choice = PUB_PMid;
14261 vnp->data.intvalue = pmid;
14262 }
14263 }
14264 }
14265 }
14266 }
14267 }
14268
GetMuidPmidPairs(PubdescPtr pdp,Pointer userdata)14269 static void GetMuidPmidPairs (PubdescPtr pdp, Pointer userdata)
14270
14271 {
14272 Int4 muid = 0;
14273 Int4 pmid = 0;
14274 ValNodePtr vnp;
14275
14276 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
14277 switch (vnp->choice) {
14278 case PUB_Muid :
14279 muid = vnp->data.intvalue;
14280 break;
14281 case PUB_PMid :
14282 pmid = vnp->data.intvalue;
14283 break;
14284 default :
14285 break;
14286 }
14287 }
14288 if (muid == 0 || pmid == 0) return;
14289 ValNodeAddInt ((ValNodePtr PNTR) userdata, 0, muid);
14290 ValNodeAddInt ((ValNodePtr PNTR) userdata, 0, pmid);
14291 }
14292
FlattenPubSet(ValNodePtr PNTR prev)14293 static void FlattenPubSet (ValNodePtr PNTR prev)
14294
14295 {
14296 ValNodePtr next;
14297 ValNodePtr ppr;
14298 ValNodePtr vnp;
14299
14300 if (prev == NULL || *prev == NULL) return;
14301 ppr = *prev;
14302 while (ppr != NULL) {
14303 next = ppr->next;
14304
14305 if (ppr->choice == PUB_Equiv) {
14306 vnp = (ValNodePtr) ppr->data.ptrvalue;
14307 if (vnp != NULL && vnp->next == NULL) {
14308 ppr->choice = vnp->choice;
14309 switch (vnp->choice) {
14310 case PUB_Muid :
14311 case PUB_PMid :
14312 ppr->data.intvalue = vnp->data.intvalue;
14313 break;
14314 default :
14315 ppr->data.ptrvalue = vnp->data.ptrvalue;
14316 break;
14317 }
14318 ValNodeFree (vnp);
14319 }
14320 }
14321
14322 ppr = next;
14323 }
14324 }
14325
FlattenDupInPubSet(ValNodePtr PNTR prev)14326 static void FlattenDupInPubSet (ValNodePtr PNTR prev)
14327
14328 {
14329 ValNodePtr next;
14330 ValNodePtr nxt;
14331 ValNodePtr ppr;
14332 ValNodePtr vnp;
14333
14334 if (prev == NULL || *prev == NULL) return;
14335 ppr = *prev;
14336 while (ppr != NULL) {
14337 next = ppr->next;
14338
14339 if (ppr->choice == PUB_Equiv) {
14340 vnp = (ValNodePtr) ppr->data.ptrvalue;
14341 if (vnp != NULL) {
14342 nxt = vnp->next;
14343 if (nxt != NULL && nxt->next == NULL && vnp->choice == nxt->choice) {
14344 switch (vnp->choice) {
14345 case PUB_Muid :
14346 case PUB_PMid :
14347 if (vnp->data.intvalue == nxt->data.intvalue) {
14348 vnp->next = ValNodeFree (nxt);
14349 }
14350 break;
14351 default :
14352 break;
14353 }
14354 }
14355 }
14356 }
14357
14358 ppr = next;
14359 }
14360 }
14361
FlattenPubdesc(PubdescPtr pdp,Pointer userdata)14362 static void FlattenPubdesc (PubdescPtr pdp, Pointer userdata)
14363
14364 {
14365 FlattenPubSet (&(pdp->pub));
14366 }
14367
FlattenSfpCit(SeqFeatPtr sfp,Pointer userdata)14368 static void FlattenSfpCit (SeqFeatPtr sfp, Pointer userdata)
14369
14370 {
14371 ValNodePtr psp;
14372
14373 psp = sfp->cit;
14374 if (psp == NULL) return;
14375 FlattenDupInPubSet ((ValNodePtr PNTR) &(psp->data.ptrvalue));
14376 FlattenPubSet ((ValNodePtr PNTR) &(psp->data.ptrvalue));
14377 }
14378
14379 typedef struct fastnode {
14380 ValNodePtr head;
14381 ValNodePtr tail;
14382 } FastNode, PNTR FastNodePtr;
14383
GetCitGenLabels(PubdescPtr pdp,Pointer userdata)14384 static void GetCitGenLabels (PubdescPtr pdp, Pointer userdata)
14385
14386 {
14387 Char buf [121];
14388 CitGenPtr cgp;
14389 FastNodePtr labellist;
14390 ValNodePtr tmp;
14391 ValNodePtr vnp;
14392
14393 if (pdp == NULL) return;
14394 labellist = (FastNodePtr) userdata;
14395 if (labellist == NULL) return;
14396
14397 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
14398 if (vnp->choice != PUB_Gen) continue;
14399 cgp = (CitGenPtr) vnp->data.ptrvalue;
14400 if (cgp == NULL) continue;
14401 if (cgp->cit == NULL && cgp->journal == NULL &&
14402 cgp->date == NULL && cgp->serial_number) continue;
14403 PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE);
14404 tmp = ValNodeCopyStr (&(labellist->tail), 0, buf);
14405 if (labellist->head == NULL) {
14406 labellist->head = tmp;
14407 }
14408 labellist->tail = tmp;
14409 }
14410 }
14411
ReplaceShortCitGenOnFeat(CitGenPtr cgp,ValNodePtr labellist)14412 static void ReplaceShortCitGenOnFeat (CitGenPtr cgp, ValNodePtr labellist)
14413
14414 {
14415 Char buf [128];
14416 Char ch;
14417 size_t len1;
14418 size_t len2;
14419 CharPtr ptr;
14420 CharPtr str;
14421 CharPtr tmp;
14422 ValNodePtr vnp;
14423
14424 for (vnp = labellist; vnp != NULL; vnp = vnp->next) {
14425 str = (CharPtr) vnp->data.ptrvalue;
14426 if (StringHasNoText (str)) continue;
14427 len1 = StringLen (cgp->cit);
14428 if (len1 < 2 || len1 > 120) continue;
14429 StringCpy (buf, cgp->cit);
14430 ptr = StringStr (buf, "Unpublished");
14431 if (ptr != NULL) {
14432 ptr += 11;
14433 *ptr = '\0';
14434 tmp = StringStr (cgp->cit, "Unpublished");
14435 if (tmp != NULL) {
14436 tmp += 11;
14437 ch = *tmp;
14438 while (ch == ' ') {
14439 tmp++;
14440 ch = *tmp;
14441 }
14442 StringCat (buf, tmp);
14443 }
14444 }
14445 len1 = StringLen (buf);
14446 if (buf [len1 - 1] != '>') continue;
14447 len1--;
14448 len2 = StringLen (str);
14449 if (len1 >= len2) continue;
14450 if (StringNCmp (str, buf, len1) == 0) {
14451 cgp->cit = MemFree (cgp->cit);
14452 cgp->cit = StringSaveNoNull (str);
14453 if (cgp->cit != NULL) {
14454 if (StringNICmp (cgp->cit, "unpublished", 11) == 0) {
14455 cgp->cit [0] = 'U';
14456 }
14457 }
14458 return;
14459 }
14460 }
14461 }
14462
UpdateShortFeatCits(SeqFeatPtr sfp,Pointer userdata)14463 static void UpdateShortFeatCits (SeqFeatPtr sfp, Pointer userdata)
14464
14465 {
14466 CitGenPtr cgp;
14467 ValNodePtr ppr;
14468 ValNodePtr psp;
14469 ValNodePtr vnp;
14470
14471 psp = sfp->cit;
14472 if (psp != NULL && psp->data.ptrvalue) {
14473 for (ppr = (ValNodePtr) psp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
14474 vnp = NULL;
14475 if (ppr->choice == PUB_Gen) {
14476 vnp = ppr;
14477 } else if (ppr->choice == PUB_Equiv) {
14478 for (vnp = (ValNodePtr) ppr->data.ptrvalue;
14479 vnp != NULL && vnp->choice != PUB_Gen;
14480 vnp = vnp->next) continue;
14481 }
14482 if (vnp != NULL && vnp->choice == PUB_Gen) {
14483 cgp = (CitGenPtr) vnp->data.ptrvalue;
14484 if (cgp != NULL && (! StringHasNoText (cgp->cit))) {
14485 ReplaceShortCitGenOnFeat (cgp, (ValNodePtr) userdata);
14486 }
14487 }
14488 }
14489 }
14490 }
14491
14492 //LCOV_EXCL_START
BasicSeqAnnotCleanup(SeqAnnotPtr sap)14493 NLM_EXTERN void BasicSeqAnnotCleanup (SeqAnnotPtr sap)
14494
14495 {
14496 SeqFeatPtr sfp;
14497 SeqGraphPtr sgp;
14498
14499 if (sap == NULL) return;
14500
14501 VisitSeqIdsInSeqAnnot (sap, NULL, CleanUpSeqId);
14502
14503 if (sap->type == 1) {
14504 sfp = (SeqFeatPtr) sap->data;
14505 while (sfp != NULL) {
14506 CleanUpSeqFeat (sfp, FALSE, FALSE, TRUE, TRUE, NULL);
14507 sfp = sfp->next;
14508 }
14509 } else if (sap->type == 3) {
14510 sgp = (SeqGraphPtr) sap->data;
14511 while (sgp != NULL) {
14512 CleanUpSeqGraph (sgp);
14513 sgp = sgp->next;
14514 }
14515 }
14516 }
14517 //LCOV_EXCL_STOP
14518
14519 /*
14520 static CharPtr proteinOrganellePrefix [] = {
14521 NULL,
14522 NULL,
14523 "chloroplast",
14524 "chromoplast",
14525 "kinetoplast",
14526 "mitochondrion",
14527 "plastid",
14528 "macronuclear",
14529 "extrachromosomal",
14530 "plasmid",
14531 NULL,
14532 NULL,
14533 "cyanelle",
14534 "proviral",
14535 "virus",
14536 "nucleomorph",
14537 "apicoplast",
14538 "leucoplast",
14539 "protoplast",
14540 "endogenous virus",
14541 "hydrogenosome",
14542 "chromosome",
14543 "chromatophore"
14544 };
14545 */
14546
14547 static CharPtr proteinOrganellePrefix [] = {
14548 NULL,
14549 NULL,
14550 "chloroplast",
14551 "chromoplast",
14552 "kinetoplast",
14553 "mitochondrion",
14554 "plastid",
14555 "macronuclear",
14556 NULL,
14557 "plasmid",
14558 NULL,
14559 NULL,
14560 "cyanelle",
14561 NULL,
14562 NULL,
14563 "nucleomorph",
14564 "apicoplast",
14565 "leucoplast",
14566 "protoplast",
14567 "endogenous virus",
14568 "hydrogenosome",
14569 NULL,
14570 "chromatophore"
14571 };
14572
TitleEndsInOrganism(CharPtr title,CharPtr organism,CharPtr organelle,CharPtr PNTR onlp,BoolPtr case_diffp)14573 static CharPtr TitleEndsInOrganism (
14574 CharPtr title,
14575 CharPtr organism,
14576 CharPtr organelle,
14577 CharPtr PNTR onlp,
14578 BoolPtr case_diffp
14579 )
14580
14581 {
14582 int genome;
14583 size_t len1, len2, len3;
14584 CharPtr onl, ptr, tmp;
14585
14586 if (onlp != NULL) {
14587 *onlp = NULL;
14588 }
14589 if (case_diffp != NULL) {
14590 *case_diffp = FALSE;
14591 }
14592 if (StringHasNoText (title) || StringHasNoText (organism)) return NULL;
14593 len1 = StringLen (title);
14594 len2 = StringLen (organism);
14595 if (len2 + 4 > len1) return NULL;
14596
14597 tmp = title + len1 - len2 - 3;
14598 if (tmp [0] != ' ' || tmp [1] != '[' || tmp [len2 + 2] != ']') return NULL;
14599 if (StringNICmp (tmp + 2, organism, len2) != 0) return NULL;
14600 if (StringNCmp (tmp + 2, organism, len2) != 0 && case_diffp != NULL) {
14601 *case_diffp = TRUE;
14602 }
14603
14604 if (onlp != NULL) {
14605 len3 = len1 - len2 - 3;
14606 for (genome = GENOME_chloroplast; genome <= GENOME_chromatophore; genome++) {
14607 ptr = proteinOrganellePrefix [genome];
14608 if (ptr == NULL) continue;
14609 len2 = StringLen (ptr);
14610 if (len2 + 4 >= len3) continue;
14611 onl = title + len3 - len2 - 3;
14612 if (onl [0] != ' ' || onl [1] != '(' || onl [len2 + 2] != ')') continue;
14613 if (StringNICmp (onl + 2, ptr, len2) != 0) continue;
14614 *onlp = onl;
14615 break;
14616 }
14617 }
14618
14619 return tmp;
14620 }
14621
RemoveOrgFromEndOfProtein(SeqFeatPtr sfp,Pointer userdata)14622 static void RemoveOrgFromEndOfProtein (SeqFeatPtr sfp, Pointer userdata)
14623
14624 {
14625 CharPtr cp;
14626 size_t len;
14627 ProtRefPtr prp;
14628 CharPtr str;
14629 CharPtr taxname;
14630 ValNodePtr vnp;
14631
14632 if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
14633 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
14634 if (prp == NULL) return;
14635
14636 taxname = (CharPtr) userdata;
14637 if (StringHasNoText (taxname)) return;
14638
14639 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
14640 str = (CharPtr) vnp->data.ptrvalue;
14641 if (StringHasNoText (str)) continue;
14642 len = StringLen (str);
14643 if (len < 5) continue;
14644 if (str [len - 1] != ']') continue;
14645 cp = StringRChr (str, '[');
14646 if (cp == NULL) continue;
14647 if (StringNCmp (cp, "[NAD", 4) == 0) continue;
14648 len = StringLen (taxname);
14649 if (StringLen (cp) != len + 2) continue;
14650 if (StringNICmp (cp + 1, taxname, len - 1) != 0) continue;
14651 *cp = '\0';
14652 TrimSpacesAroundString (str);
14653 }
14654 }
14655
AddPartialToProteinTitle(BioseqPtr bsp,Pointer userdata)14656 static void AddPartialToProteinTitle (
14657 BioseqPtr bsp,
14658 Pointer userdata
14659 )
14660
14661 {
14662 CharPtr binomial = NULL;
14663 BioSourcePtr biop;
14664 BinomialOrgNamePtr bonp;
14665 Boolean case_difference = FALSE;
14666 CharPtr first_super_kingdom = NULL;
14667 int genome = 0;
14668 CharPtr genus = NULL;
14669 Boolean is_cross_kingdom = FALSE;
14670 Boolean is_wp = FALSE;
14671 size_t len;
14672 MolInfoPtr mip;
14673 Int2 num_super_kingdom = 0;
14674 CharPtr oldname = NULL;
14675 OrgModPtr omp;
14676 OrgNamePtr onp;
14677 CharPtr organelle = NULL;
14678 OrgRefPtr orp;
14679 Boolean partial = FALSE;
14680 CharPtr penult = NULL;
14681 CharPtr ptr;
14682 SeqDescrPtr sdp;
14683 CharPtr second_super_kingdom = NULL;
14684 SeqIdPtr sip;
14685 CharPtr species = NULL;
14686 CharPtr str;
14687 CharPtr suffix = NULL;
14688 Boolean super_kingdoms_different = FALSE;
14689 CharPtr taxname = NULL;
14690 TaxElementPtr tep;
14691 CharPtr title;
14692 CharPtr tmp;
14693 TextSeqIdPtr tsip;
14694 SeqDescrPtr ttl = NULL;
14695
14696 if (bsp == NULL) return;
14697 if (! ISA_aa (bsp->mol)) return;
14698
14699 for (sip = bsp->id; sip != NULL; sip = sip->next) {
14700 if (sip->choice == SEQID_SWISSPROT) return;
14701 if (sip->choice == SEQID_OTHER) {
14702 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
14703 if (tsip != NULL && tsip->accession != NULL) {
14704 if (StringNICmp (tsip->accession, "WP_", 3) == 0) {
14705 is_wp = TRUE;
14706 }
14707 }
14708 }
14709 }
14710
14711 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
14712 if (sdp != NULL && sdp->choice == Seq_descr_molinfo) {
14713 mip = (MolInfoPtr) sdp->data.ptrvalue;
14714 if (mip != NULL && mip->completeness > 1 && mip->completeness < 6) {
14715 partial = TRUE;
14716 }
14717 }
14718
14719 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
14720 if (sdp != NULL && sdp->choice == Seq_descr_source) {
14721 biop = (BioSourcePtr) sdp->data.ptrvalue;
14722 if (biop != NULL) {
14723 genome = biop->genome;
14724 if (genome >= GENOME_chloroplast && genome <= GENOME_chromatophore) {
14725 organelle = proteinOrganellePrefix [genome];
14726 }
14727 orp = biop->org;
14728 if (orp != NULL) {
14729 taxname = orp->taxname;
14730 /*
14731 if (StringNICmp (organelle, taxname, StringLen (organelle)) == 0) {
14732 organelle = NULL;
14733 }
14734 */
14735 onp = orp->orgname;
14736 if (onp != NULL) {
14737 if (onp->choice == 1) {
14738 bonp = (BinomialOrgNamePtr) onp->data;
14739 if (bonp != NULL) {
14740 genus = bonp->genus;
14741 species = bonp->species;
14742 }
14743 }
14744 for (omp = onp->mod; omp != NULL; omp = omp->next) {
14745 if (omp->subtype == ORGMOD_old_name) {
14746 oldname = omp->subname;
14747 }
14748 }
14749 }
14750 }
14751 }
14752 }
14753
14754 VisitFeaturesOnBsp (bsp, (Pointer) taxname, RemoveOrgFromEndOfProtein);
14755
14756 ttl = BioseqGetSeqDescr (bsp, Seq_descr_title, NULL);
14757 if (ttl == NULL || ttl->choice != Seq_descr_title) return;
14758 str = (CharPtr) ttl->data.ptrvalue;
14759 if (StringHasNoText (str)) return;
14760
14761 if (is_wp) {
14762 for (sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
14763 sdp != NULL;
14764 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, sdp)) {
14765 if (sdp->choice != Seq_descr_source) continue;
14766 biop = (BioSourcePtr) sdp->data.ptrvalue;
14767 if (biop == NULL) continue;
14768 orp = biop->org;
14769 if (orp == NULL) continue;
14770 onp = orp->orgname;
14771 if (onp == NULL) continue;
14772 if (onp->choice != 5) continue;
14773 for (tep = (TaxElementPtr) onp->data; tep != NULL; tep = tep->next) {
14774 if (tep->fixed_level == 0 && StringICmp (tep->level, "superkingdom") == 0) {
14775 num_super_kingdom++;
14776 if (first_super_kingdom == NULL) {
14777 first_super_kingdom = tep->name;
14778 } else if (StringICmp (first_super_kingdom, tep->name) != 0) {
14779 second_super_kingdom = tep->name;
14780 super_kingdoms_different = TRUE;
14781 }
14782 if (num_super_kingdom > 1 && super_kingdoms_different) {
14783 is_cross_kingdom = TRUE;
14784 }
14785 }
14786 }
14787 }
14788 }
14789
14790 /* search for partial, must be just before parenthesized organelle or bracketed organism */
14791 tmp = StringSearch (str, ", partial [");
14792 if (tmp == NULL) {
14793 tmp = StringSearch (str, ", partial (");
14794 }
14795
14796 /* find oldname or taxname in brackets at end of protein title */
14797 if (oldname != NULL && taxname != NULL) {
14798 suffix = TitleEndsInOrganism (str, oldname, organelle, &penult, &case_difference);
14799 }
14800 if (suffix == NULL && taxname != NULL) {
14801 suffix = TitleEndsInOrganism (str, taxname, organelle, &penult, &case_difference);
14802 if (suffix == NULL && StringDoesHaveText (genus) && StringDoesHaveText (species)) {
14803 len = StringLen (genus) + StringLen (species) + 5;
14804 binomial = (CharPtr) MemNew (len);
14805 if (binomial != NULL) {
14806 StringCpy (binomial, genus);
14807 StringCat (binomial, " ");
14808 StringCat (binomial, species);
14809 suffix = TitleEndsInOrganism (str, binomial, organelle, &penult, &case_difference);
14810 }
14811 }
14812 if (suffix == NULL && is_cross_kingdom) {
14813 ptr = StringStr (str, "][");
14814 if (ptr != NULL) {
14815 *(ptr + 1) = '\0';
14816 suffix = TitleEndsInOrganism (str, taxname, organelle, &penult, &case_difference);
14817 }
14818 } else {
14819 if (organelle == NULL && penult != NULL) {
14820 } else if (organelle != NULL && penult == NULL) {
14821 } else if (StringCmp (organelle, penult) != 0) {
14822 } else if (binomial != NULL) {
14823 } else if (case_difference) {
14824 } else {
14825 /* bail if no need to change partial text (organelle) [organism name] */
14826 if (partial) {
14827 if (tmp != NULL) return;
14828 } else {
14829 if (tmp == NULL) return;
14830 }
14831 }
14832 }
14833 }
14834
14835 binomial = MemFree (binomial);
14836
14837 /* do not change unless [genus species] was at the end */
14838 if (suffix == NULL) return;
14839
14840 /* truncate bracketed info from end of title, will replace with current taxname */
14841 *suffix = '\0';
14842 suffix = taxname;
14843
14844 /* truncate parenthesized info from just before bracketed taxname, will replace with current organelle */
14845 if (penult != NULL) {
14846 *penult = '\0';
14847 }
14848
14849 /* if ", partial [/(" was indeed just before the [genus species] or (organelle), it will now be ", partial" */
14850 if (! partial && tmp != NULL && StringCmp (tmp, ", partial") == 0) {
14851 *tmp = '\0';
14852 }
14853 TrimSpacesAroundString (str);
14854
14855 len = StringLen (str) + StringLen (organelle) + StringLen (suffix) + StringLen (first_super_kingdom) + StringLen (second_super_kingdom) + 20;
14856 title = MemNew (sizeof (Char) * len);
14857 if (title == NULL) return;
14858
14859 StringCpy (title, str);
14860 if (partial && tmp == NULL) {
14861 StringCat (title, ", partial");
14862 }
14863 if (organelle != NULL) {
14864 StringCat (title, " (");
14865 StringCat (title, organelle);
14866 StringCat (title, ")");
14867 }
14868 if (is_cross_kingdom && StringDoesHaveText (first_super_kingdom) && StringDoesHaveText (second_super_kingdom)) {
14869 StringCat (title, " [");
14870 StringCat (title, first_super_kingdom);
14871 StringCat (title, "][");
14872 StringCat (title, second_super_kingdom);
14873 StringCat (title, "]");
14874 } else if (suffix != NULL) {
14875 StringCat (title, " [");
14876 StringCat (title, suffix);
14877 StringCat (title, "]");
14878 }
14879 MemFree (str);
14880 ttl->data.ptrvalue = title;
14881 }
14882
14883 //LCOV_EXCL_START
CleanUpProteinTitles(SeqEntryPtr sep)14884 NLM_EXTERN void CleanUpProteinTitles (SeqEntryPtr sep)
14885
14886 {
14887 if (sep == NULL) return;
14888 VisitBioseqsInSep (sep, NULL, AddPartialToProteinTitle);
14889 }
14890 //LCOV_EXCL_STOP
14891
BasicSeqEntryCleanupEx(SeqEntryPtr sep,Boolean resync)14892 static void BasicSeqEntryCleanupEx (SeqEntryPtr sep, Boolean resync)
14893
14894 {
14895 AuthorPtr ap;
14896 ContactInfoPtr cip;
14897 CitSubPtr csp;
14898 Uint2 entityID;
14899 Boolean isEmblOrDdbj = FALSE;
14900 Boolean isJscan = FALSE;
14901 FastNode labelnode;
14902 ValNodePtr pairlist = NULL;
14903 ValNodePtr publist = NULL;
14904 SeqEntryPtr oldscope;
14905 ObjMgrDataPtr omdp;
14906 SubmitBlockPtr sbp;
14907 SeqSubmitPtr ssp;
14908 Boolean stripSerial = TRUE;
14909
14910 if (sep == NULL) return;
14911
14912 /* InGpsGenomic needs idx fields assigned */
14913
14914 entityID = SeqMgrGetEntityIDForSeqEntry (sep);
14915 AssignIDsInEntityEx (entityID, 0, NULL, NULL);
14916
14917 /* HandleXrefOnCDS call to GetBestProteinFeatureUnindexed now scoped within record */
14918
14919 oldscope = SeqEntrySetScope (sep);
14920
14921 /* clean up spaces in local IDs */
14922
14923 VisitBioseqsInSep (sep, NULL, CleanSeqIdInBioseq);
14924 VisitFeaturesInSep (sep, NULL, CleanSeqIdInSeqFeat);
14925 VisitAlignmentsInSep (sep, NULL, CleanSeqIdInSeqAlign);
14926 VisitGraphsInSep (sep, NULL, CleanSeqIdInSeqGraph);
14927 VisitAnnotsInSep (sep, NULL, CleanSeqIdInSeqAnnot);
14928
14929 /* Fix Bioseq-sets with class 0 */
14930
14931 VisitSetsInSep (sep, NULL, FixBadSetClass);
14932
14933 /* removed unnecessarily nested Pub-equivs */
14934
14935 VisitPubdescsInSep (sep, NULL, FlattenPubdesc);
14936 VisitFeaturesInSep (sep, NULL, FlattenSfpCit);
14937
14938 SeqEntryExplore (sep, (Pointer) &stripSerial, CheckForSwissProtID);
14939 SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
14940 SeqEntryExplore (sep, (Pointer) &isJscan, CheckForJournalScanID);
14941 #ifdef SUPPRESS_STRIP_SERIAL_DIFFERENCES
14942 stripSerial = FALSE;
14943 #endif
14944
14945 BasicSeqEntryCleanupInternal (sep, &publist, isEmblOrDdbj, isJscan, stripSerial);
14946 if (publist != NULL) {
14947 VisitFeaturesInSep (sep, (Pointer) publist, ChangeCitsOnFeats);
14948 }
14949 ValNodeFreeData (publist);
14950
14951 /* now get muid/pmid pairs, update sfp->cits to pmids */
14952
14953 VisitPubdescsInSep (sep, (Pointer) &pairlist, GetMuidPmidPairs);
14954 if (pairlist != NULL) {
14955 VisitFeaturesInSep (sep, (Pointer) pairlist, ChangeFeatCitsToPmid);
14956 }
14957 ValNodeFree (pairlist);
14958
14959 labelnode.head = NULL;
14960 labelnode.tail = NULL;
14961 VisitPubdescsInSep (sep, (Pointer) &labelnode, GetCitGenLabels);
14962 if (labelnode.head != NULL) {
14963 VisitFeaturesInSep (sep, (Pointer) labelnode.head, UpdateShortFeatCits);
14964 }
14965 ValNodeFreeData (labelnode.head);
14966
14967 SeqEntrySetScope (oldscope);
14968
14969 /* also normalize authors on submit block citation */
14970
14971 entityID = SeqMgrGetEntityIDForSeqEntry (sep);
14972 omdp = ObjMgrGetData (entityID);
14973 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
14974 ssp = (SeqSubmitPtr) omdp->dataptr;
14975 if (ssp != NULL && ssp->datatype == 1) {
14976 sbp = ssp->sub;
14977 if (sbp != NULL) {
14978 csp = sbp->cit;
14979 if (csp != NULL) {
14980 NormalizeAuthors (csp->authors, TRUE);
14981 }
14982 cip = sbp->contact;
14983 if (cip != NULL) {
14984 ap = cip->contact;
14985 if (ap != NULL) {
14986 ap->affil = CleanAffil (ap->affil);
14987 }
14988 }
14989 }
14990 }
14991 }
14992
14993 if (resync) {
14994 ResynchCodingRegionPartials (sep);
14995 ResynchMessengerRNAPartials (sep);
14996 ResynchProteinPartials (sep);
14997 }
14998
14999 /*
15000 dynamically add missing partial to already instantiated protein
15001 titles, in between main title and bracketed organism name
15002 */
15003
15004 VisitBioseqsInSep (sep, NULL, AddPartialToProteinTitle);
15005 }
15006
BasicSeqEntryCleanup(SeqEntryPtr sep)15007 NLM_EXTERN void BasicSeqEntryCleanup (SeqEntryPtr sep)
15008
15009 {
15010 BasicSeqEntryCleanupEx (sep, FALSE);
15011 }
15012
15013 //LCOV_EXCL_START
AdvancedSeqEntryCleanup(SeqEntryPtr sep)15014 NLM_EXTERN void AdvancedSeqEntryCleanup (SeqEntryPtr sep)
15015
15016 {
15017 BasicSeqEntryCleanupEx (sep, TRUE);
15018 }
15019 //LCOV_EXCL_STOP
15020
15021 typedef struct bsecsmfedata {
15022 Int4 max;
15023 Int4 num_at_max;
15024 } BsecSmfeData, PNTR BsecSmfePtr;
15025
BsecSMFEProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)15026 static Boolean LIBCALLBACK BsecSMFEProc (
15027 SeqFeatPtr sfp,
15028 SeqMgrFeatContextPtr context
15029 )
15030
15031
15032 {
15033 BsecSmfePtr bsp;
15034 Int4 len;
15035
15036 if (sfp == NULL || context == NULL) return TRUE;
15037 bsp = context->userdata;
15038 if (bsp == NULL) return TRUE;
15039
15040 len = SeqLocLen (sfp->location);
15041 if (len < bsp->max) {
15042 bsp->max = len;
15043 bsp->num_at_max = 1;
15044 } else if (len == bsp->max) {
15045 (bsp->num_at_max)++;
15046 }
15047
15048 return TRUE;
15049 }
15050
RemoveUnnecessaryGeneXrefs(SeqFeatPtr sfp,Pointer userdata)15051 NLM_EXTERN void RemoveUnnecessaryGeneXrefs (
15052 SeqFeatPtr sfp,
15053 Pointer userdata
15054 )
15055
15056 {
15057 BsecSmfeData bsd;
15058 SeqFeatPtr cds;
15059 Int2 count;
15060 SeqFeatXrefPtr curr, next;
15061 SeqMgrFeatContext fcontext;
15062 GeneRefPtr grp, grpx;
15063 SeqFeatXrefPtr PNTR last;
15064 BioseqPtr prd;
15065 SeqFeatPtr sfpx;
15066 CharPtr syn1, syn2;
15067
15068 if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) return;
15069 grp = SeqMgrGetGeneXref (sfp);
15070 if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return;
15071
15072 grpx = NULL;
15073 sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
15074 if (sfpx != NULL) {
15075 if (sfpx->data.choice != SEQFEAT_GENE) return;
15076 grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
15077 } else {
15078 prd = BioseqFindFromSeqLoc (sfp->location);
15079 if (prd != NULL && ISA_aa (prd->mol)) {
15080 cds = SeqMgrGetCDSgivenProduct (prd, NULL);
15081 if (cds != NULL) {
15082 grpx = SeqMgrGetGeneXref (cds);
15083 if (grpx == NULL) {
15084 sfpx = SeqMgrGetOverlappingGene (cds->location, &fcontext);
15085 if (sfpx != NULL && sfpx->data.choice == SEQFEAT_GENE) {
15086 grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
15087 }
15088 }
15089 }
15090 }
15091 }
15092 if (grpx == NULL || SeqMgrGeneIsSuppressed (grp)) return;
15093
15094 if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
15095 if (StringICmp (grp->locus_tag, grpx->locus_tag) != 0) return;
15096 } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
15097 if (StringICmp (grp->locus, grpx->locus) != 0) return;
15098 } else if (grp->syn != NULL && grpx->syn != NULL) {
15099 syn1 = (CharPtr) grp->syn->data.ptrvalue;
15100 syn2 = (CharPtr) grpx->syn->data.ptrvalue;
15101 if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
15102 if (StringICmp (syn1, syn2) != 0) return;
15103 }
15104 }
15105
15106 MemSet ((Pointer) &bsd, 0, sizeof (BsecSmfeData));
15107 bsd.max = INT4_MAX;
15108 bsd.num_at_max = 0;
15109 count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE,
15110 NULL, 0, LOCATION_SUBSET,
15111 (Pointer) &bsd, BsecSMFEProc);
15112
15113 if (bsd.num_at_max < 2) {
15114 last = (SeqFeatXrefPtr PNTR) &(sfp->xref);
15115 curr = sfp->xref;
15116 while (curr != NULL) {
15117 next = curr->next;
15118 if (curr->data.choice == SEQFEAT_GENE) {
15119 *last = next;
15120 curr->next = NULL;
15121 SeqFeatXrefFree (curr);
15122 } else {
15123 last = &(curr->next);
15124 }
15125 curr = next;
15126 }
15127 }
15128 }
15129
15130 //LCOV_EXCL_START
SortSeqFeatFields(SeqFeatPtr sfp,Pointer userdata)15131 static void SortSeqFeatFields (
15132 SeqFeatPtr sfp,
15133 Pointer userdata
15134 )
15135
15136 {
15137 CdRegionPtr crp;
15138 ValNodePtr psp;
15139
15140 if (sfp == NULL) return;
15141
15142 sfp->qual = SortFeatureGBQuals (sfp->qual);
15143
15144 sfp->qual = SortIllegalGBQuals (sfp->qual);
15145
15146 sfp->dbxref = ValNodeSort (sfp->dbxref, SortDbxref);
15147
15148 psp = sfp->cit;
15149 if (psp != NULL && psp->data.ptrvalue) {
15150 psp->data.ptrvalue = ValNodeSort ((ValNodePtr) psp->data.ptrvalue, SortCits);
15151 }
15152
15153 if (sfp->data.choice == SEQFEAT_CDREGION) {
15154 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
15155 if (crp != NULL) {
15156 crp->code_break = SortCodeBreaks (sfp, crp->code_break);
15157 }
15158 }
15159 }
15160
SortBioSourceFields(BioSourcePtr biop,Pointer userdata)15161 static void SortBioSourceFields (
15162 BioSourcePtr biop,
15163 Pointer userdata
15164 )
15165
15166 {
15167 OrgNamePtr onp;
15168 OrgRefPtr orp;
15169
15170 if (biop == NULL) return;
15171
15172 orp = biop->org;
15173 if (orp != NULL) {
15174 orp->db = ValNodeSort (orp->db, SortDbxref);
15175
15176 orp->syn = ValNodeSort (orp->syn, SortVnpByString);
15177 orp->syn = UniqueValNode (orp->syn);
15178
15179 for (onp = orp->orgname; onp != NULL; onp = onp->next) {
15180 onp->mod = SortOrgModList (onp->mod);
15181 }
15182 }
15183
15184 biop->subtype = SortSubSourceList (biop->subtype);
15185 }
15186
SortSeqEntryQualifiers(SeqEntryPtr sep)15187 NLM_EXTERN void SortSeqEntryQualifiers (
15188 SeqEntryPtr sep
15189 )
15190
15191 {
15192 if (sep == NULL) return;
15193
15194 VisitFeaturesInSep (sep, NULL, SortSeqFeatFields);
15195 VisitBioSourcesInSep (sep, NULL, SortBioSourceFields);
15196 }
15197 //LCOV_EXCL_STOP
15198
15199 /* end BasicSeqEntryCleanup section */
15200
CDSPartialsFromTranslation(SeqFeatPtr sfp,Pointer userdata)15201 NLM_EXTERN void CDSPartialsFromTranslation (SeqFeatPtr sfp, Pointer userdata)
15202
15203 {
15204 Int4 i;
15205 Int4 len;
15206 ByteStorePtr newprot;
15207 Boolean partial5 = FALSE;
15208 Boolean partial3 = TRUE;
15209 CharPtr protseq;
15210 Int2 residue;
15211
15212 if (sfp == NULL) return;
15213 if (sfp->data.choice != SEQFEAT_CDREGION) return;
15214
15215 newprot = ProteinFromCdRegionExEx (sfp, TRUE, FALSE, NULL, FALSE);
15216 if (newprot == NULL) return;
15217
15218 protseq = BSMerge (newprot, NULL);
15219 if (protseq != NULL) {
15220 len = StringLen (protseq);
15221
15222 for (i = 0; i < len; i++) {
15223 residue = protseq [i];
15224 if (i == 0 && residue == '-') {
15225 partial5 = TRUE;
15226 }
15227 if (i == len - 1 && residue == '*') {
15228 partial3 = FALSE;
15229 }
15230 }
15231
15232 MemFree (protseq);
15233
15234 SetSeqLocPartial (sfp->location, partial5, partial3);
15235 sfp->partial = (Boolean) (partial5 || partial3);
15236 }
15237
15238 BSFree (newprot);
15239 }
15240
CodingRegionPartialsFromTranslation(SeqEntryPtr sep)15241 NLM_EXTERN void CodingRegionPartialsFromTranslation (SeqEntryPtr sep)
15242
15243 {
15244 VisitFeaturesInSep (sep, NULL, CDSPartialsFromTranslation);
15245 }
15246
ImposeGenePartials(SeqFeatPtr sfp,Pointer userdata)15247 NLM_EXTERN void ImposeGenePartials (SeqFeatPtr sfp, Pointer userdata)
15248
15249 {
15250 BioseqPtr bsp;
15251 SeqMgrFeatContext fcontext, gcontext;
15252 SeqFeatPtr feat, longest = NULL;
15253 Int4 len, min = INT4_MAX;
15254 Boolean new_partial, partial5, partial3;
15255
15256 if (sfp == NULL) return;
15257 if (sfp->data.choice != SEQFEAT_GENE) return;
15258
15259 bsp = BioseqFindFromSeqLoc (sfp->location);
15260 if (bsp == NULL) return;
15261
15262 if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &gcontext) != sfp) return;
15263
15264 feat = SeqMgrGetDesiredFeature (0, bsp, 0, gcontext.index + 1, NULL, &fcontext);
15265 while (feat != NULL && gcontext.right >= fcontext.left) {
15266 len = TestFeatOverlap(feat, sfp, CONTAINED_WITHIN);
15267 if (len >= 0) {
15268 if (len < min) {
15269 min = len;
15270 longest = feat;
15271 }
15272 }
15273 feat = SeqMgrGetNextFeature (bsp, feat, 0, 0, &fcontext);
15274 }
15275
15276 if (longest != NULL) {
15277 CheckSeqLocForPartial (longest->location, &partial5, &partial3);
15278 new_partial = (Boolean) (longest->partial || partial5 || partial3);
15279 SetSeqLocPartial (sfp->location, partial5, partial3);
15280 sfp->partial = new_partial;
15281 }
15282 }
15283
ImposeCDSPartials(SeqFeatPtr sfp,Pointer userdata)15284 NLM_EXTERN void ImposeCDSPartials (SeqFeatPtr sfp, Pointer userdata)
15285
15286 {
15287 SeqFeatPtr mrna;
15288 Boolean new_partial, partial5, partial3;
15289
15290 if (sfp == NULL) return;
15291 if (sfp->data.choice != SEQFEAT_CDREGION) return;
15292
15293 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15294 new_partial = (Boolean) (sfp->partial || partial5 || partial3);
15295 if (new_partial != sfp->partial) {
15296 sfp->partial = new_partial;
15297 }
15298
15299 mrna = GetmRNAforCDS (sfp);
15300 if (mrna != NULL) {
15301 SetSeqLocPartial (mrna->location, partial5, partial3);
15302 mrna->partial = new_partial;
15303 }
15304 }
15305
ImposeCodingRegionPartials(SeqEntryPtr sep)15306 NLM_EXTERN void ImposeCodingRegionPartials (SeqEntryPtr sep)
15307
15308 {
15309 VisitFeaturesInSep (sep, NULL, ImposeCDSPartials);
15310 VisitFeaturesInSep (sep, NULL, ImposeGenePartials);
15311 }
15312
ResynchCDSPartials(SeqFeatPtr sfp,Pointer userdata)15313 NLM_EXTERN void ResynchCDSPartials (SeqFeatPtr sfp, Pointer userdata)
15314
15315 {
15316 SeqFeatPtr bestprot;
15317 BioseqPtr bsp;
15318 MolInfoPtr mip;
15319 Boolean partial5;
15320 Boolean partial3;
15321 ProtRefPtr prp;
15322 SeqEntryPtr sep;
15323 SeqIdPtr sip;
15324 SeqLocPtr slp;
15325 ValNodePtr vnp;
15326 /* variables for logging */
15327 LogInfoPtr lip;
15328 CharPtr orig_loc = NULL, new_loc;
15329 Char id_buf[100];
15330 Boolean new_partial;
15331
15332 if (sfp->data.choice != SEQFEAT_CDREGION) return;
15333 lip = (LogInfoPtr) userdata;
15334 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15335 new_partial = (Boolean) (sfp->partial || partial5 || partial3);
15336 if (new_partial != sfp->partial) {
15337 sfp->partial = new_partial;
15338 if (lip != NULL) {
15339 lip->data_in_log = TRUE;
15340 if (lip->fp != NULL) {
15341 fprintf (lip->fp, "Changed partial flag for coding region\n");
15342 }
15343 }
15344 }
15345
15346 /*
15347 slp = SeqLocFindNext (sfp->location, NULL);
15348 if (slp == NULL) return;
15349 */
15350 sip = SeqLocId (sfp->product);
15351 if (sip == NULL) return;
15352 bsp = BioseqFind (sip);
15353 if (bsp == NULL || !ISA_aa (bsp->mol) || bsp->repr != Seq_repr_raw) return;
15354
15355 bestprot = SeqMgrGetBestProteinFeature (bsp, NULL);
15356 if (bestprot == NULL) {
15357 bestprot = GetBestProteinFeatureUnindexed (sfp->product);
15358 }
15359
15360 sep = SeqMgrGetSeqEntryForData (bsp);
15361 if (sep == NULL) return;
15362
15363 /* only synchronize and extend best if unprocessed or preprotein, not mature/signal/transit peptide */
15364 if (bestprot != NULL && bestprot->location != NULL) {
15365 prp = (ProtRefPtr) bestprot->data.value.ptrvalue;
15366 slp = bestprot->location;
15367 if (prp != NULL && prp->processed < 2 && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)) {
15368
15369 if (lip != NULL) {
15370 orig_loc = SeqLocPrintUseBestID (bestprot->location);
15371 }
15372 slp = NULL;
15373 sip = SeqLocId (bestprot->location);
15374 if (sip != NULL) {
15375 slp = WholeIntervalFromSeqId (sip);
15376 }
15377 if (slp == NULL) {
15378 slp = CreateWholeInterval (sep);
15379 }
15380 SetSeqLocPartial (slp, partial5, partial3);
15381 if (slp != NULL
15382 && (!AsnIoMemComp (slp, bestprot->location, (AsnWriteFunc) SeqLocAsnWrite) || bestprot->partial != sfp->partial)) {
15383 bestprot->location = SeqLocFree (bestprot->location);
15384 bestprot->location = slp;
15385
15386 bestprot->partial = sfp->partial;
15387 if (lip != NULL) {
15388 new_loc = SeqLocPrintUseBestID (bestprot->location);
15389 lip->data_in_log = TRUE;
15390 if (lip->fp != NULL) {
15391 fprintf (lip->fp, "Synchronized coding region partials for protein feature location at %s\n", orig_loc/*, new_loc*/);
15392 }
15393 new_loc = MemFree (new_loc);
15394 }
15395 } else {
15396 slp = SeqLocFree (slp);
15397 }
15398 orig_loc = MemFree (orig_loc);
15399 }
15400 }
15401
15402 vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
15403 id_buf[0] = 0;
15404 if (vnp == NULL) {
15405 vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15406 if (vnp != NULL) {
15407 mip = MolInfoNew ();
15408 vnp->data.ptrvalue = (Pointer) mip;
15409 if (mip != NULL) {
15410 mip->biomol = 8; /* peptide */
15411 mip->tech = 13; /* concept-trans-author */
15412 if (lip != NULL) {
15413 if (lip->fp != NULL) {
15414 SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15415 fprintf (lip->fp, "Added MolInfo descriptor for %s\n", id_buf);
15416 }
15417 lip->data_in_log = TRUE;
15418 }
15419 }
15420 }
15421 }
15422
15423 if (vnp != NULL && (mip = (MolInfoPtr) vnp->data.ptrvalue) != NULL) {
15424 if (partial5 && partial3) {
15425 if (mip->completeness != 5) {
15426 mip->completeness = 5;
15427 if (lip != NULL) {
15428 if (lip->fp != NULL) {
15429 if (id_buf[0] == 0) {
15430 SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15431 }
15432 fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15433 lip->data_in_log = TRUE;
15434 }
15435 }
15436 }
15437 } else if (partial5) {
15438 if (mip->completeness != 3) {
15439 mip->completeness = 3;
15440 if (lip != NULL) {
15441 if (lip->fp != NULL) {
15442 if (id_buf[0] == 0) {
15443 SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15444 }
15445 fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15446 }
15447 lip->data_in_log = TRUE;
15448 }
15449 }
15450 } else if (partial3) {
15451 if (mip->completeness != 4) {
15452 mip->completeness = 4;
15453 if (lip != NULL) {
15454 if (lip->fp != NULL) {
15455 if (id_buf[0] == 0) {
15456 SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15457 }
15458 fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15459 }
15460 lip->data_in_log = TRUE;
15461 }
15462 }
15463 } else if (sfp->partial) {
15464 if (mip->completeness != 2) {
15465 mip->completeness = 2;
15466 if (lip != NULL) {
15467 if (lip->fp != NULL) {
15468 if (id_buf[0] == 0) {
15469 SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15470 }
15471 fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15472 }
15473 lip->data_in_log = TRUE;
15474 }
15475 }
15476 } else {
15477 if (mip->completeness != 0 && mip->completeness != 1) {
15478 mip->completeness = 0;
15479 if (lip != NULL) {
15480 if (lip->fp != NULL) {
15481 if (id_buf[0] == 0) {
15482 SeqIdWrite (SeqIdFindBest (bsp->id, SEQID_GENBANK), id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
15483 }
15484 fprintf (lip->fp, "Adjusted completeness for MolInfo descriptor on %s\n", id_buf);
15485 }
15486 lip->data_in_log = TRUE;
15487 }
15488 }
15489 }
15490 }
15491 }
15492
15493
ResynchCodingRegionPartialsEx(SeqEntryPtr sep,FILE * log_fp)15494 NLM_EXTERN Boolean ResynchCodingRegionPartialsEx (SeqEntryPtr sep, FILE *log_fp)
15495
15496 {
15497 LogInfoData lid;
15498 MemSet (&lid, 0, sizeof (LogInfoData));
15499 lid.fp = log_fp;
15500 VisitFeaturesInSep (sep, &lid, ResynchCDSPartials);
15501 return lid.data_in_log;
15502 }
15503
ResynchCodingRegionPartials(SeqEntryPtr sep)15504 NLM_EXTERN void ResynchCodingRegionPartials (SeqEntryPtr sep)
15505
15506 {
15507 ResynchCodingRegionPartialsEx (sep, NULL);
15508 }
15509
15510
ResynchMRNAPartials(SeqFeatPtr sfp,Pointer userdata)15511 NLM_EXTERN void ResynchMRNAPartials (SeqFeatPtr sfp, Pointer userdata)
15512
15513 {
15514 BioseqPtr bsp;
15515 MolInfoPtr mip;
15516 Boolean partial5;
15517 Boolean partial3;
15518 RnaRefPtr rrp;
15519 SeqEntryPtr sep;
15520 SeqIdPtr sip;
15521 SeqLocPtr slp;
15522 ValNodePtr vnp;
15523
15524 if (sfp->data.choice != SEQFEAT_RNA) return;
15525 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
15526 if (rrp == NULL || rrp->type != 2) return;
15527 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15528 sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
15529 slp = SeqLocFindNext (sfp->location, NULL);
15530 if (slp == NULL) return;
15531 sip = SeqLocId (sfp->product);
15532 if (sip == NULL) return;
15533 bsp = BioseqFind (sip);
15534 if (bsp != NULL && ISA_na (bsp->mol) && bsp->repr == Seq_repr_raw) {
15535 sep = SeqMgrGetSeqEntryForData (bsp);
15536 if (sep == NULL) return;
15537 vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
15538 if (vnp == NULL) {
15539 vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15540 if (vnp != NULL) {
15541 mip = MolInfoNew ();
15542 vnp->data.ptrvalue = (Pointer) mip;
15543 if (mip != NULL) {
15544 mip->biomol = 3; /* mRNA */
15545 mip->tech = 1; /* standard */
15546 }
15547 }
15548 }
15549 if (vnp != NULL) {
15550 mip = (MolInfoPtr) vnp->data.ptrvalue;
15551 if (mip != NULL) {
15552 if (partial5 && partial3) {
15553 mip->completeness = 5;
15554 } else if (partial5) {
15555 mip->completeness = 3;
15556 } else if (partial3) {
15557 mip->completeness = 4;
15558 } else if (sfp->partial) {
15559 mip->completeness = 2;
15560 } else {
15561 mip->completeness = 0;
15562 }
15563 }
15564 }
15565 }
15566 }
15567
ResynchMessengerRNAPartials(SeqEntryPtr sep)15568 NLM_EXTERN void ResynchMessengerRNAPartials (SeqEntryPtr sep)
15569
15570 {
15571 VisitFeaturesInSep (sep, NULL, ResynchMRNAPartials);
15572 }
15573
ResynchPeptidePartials(SeqFeatPtr sfp,Pointer userdata)15574 NLM_EXTERN void ResynchPeptidePartials (SeqFeatPtr sfp, Pointer userdata)
15575
15576 {
15577 SeqFeatPtr bestprot;
15578 BioseqPtr bsp;
15579 MolInfoPtr mip;
15580 Boolean partial5;
15581 Boolean partial3;
15582 ProtRefPtr prp;
15583 SeqEntryPtr sep;
15584 SeqIdPtr sip;
15585 SeqLocPtr slp;
15586 ValNodePtr vnp;
15587
15588 if (sfp->data.choice != SEQFEAT_PROT) return;
15589 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
15590 if (prp == NULL) return;
15591 if (prp->processed < 1 || prp->processed > 5) return;
15592 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
15593 sfp->partial = (Boolean) (sfp->partial || partial5 || partial3);
15594 /*
15595 slp = SeqLocFindNext (sfp->location, NULL);
15596 if (slp == NULL) return;
15597 */
15598 sip = SeqLocId (sfp->product);
15599 if (sip == NULL) return;
15600 bsp = BioseqFind (sip);
15601 if (bsp != NULL && ISA_aa (bsp->mol) && bsp->repr == Seq_repr_raw) {
15602 sep = SeqMgrGetSeqEntryForData (bsp);
15603 if (sep == NULL) return;
15604 bestprot = SeqMgrGetBestProteinFeature (bsp, NULL);
15605 if (bestprot == NULL) {
15606 bestprot = GetBestProteinFeatureUnindexed (sfp->product);
15607 }
15608 if (bestprot != NULL && bestprot->location != NULL) {
15609 /* only synchronize and extend best if unprocessed or preprotein, not mature/signal/transit peptide */
15610 prp = (ProtRefPtr) bestprot->data.value.ptrvalue;
15611 slp = bestprot->location;
15612 if (prp != NULL && prp->processed < 2 && (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)) {
15613 slp = NULL;
15614 sip = SeqLocId (bestprot->location);
15615 if (sip != NULL) {
15616 slp = WholeIntervalFromSeqId (sip);
15617 }
15618 if (slp == NULL) {
15619 slp = CreateWholeInterval (sep);
15620 }
15621 if (slp != NULL) {
15622 bestprot->location = SeqLocFree (bestprot->location);
15623 bestprot->location = slp;
15624 }
15625 SetSeqLocPartial (bestprot->location, partial5, partial3);
15626 bestprot->partial = sfp->partial;
15627 }
15628 }
15629 vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
15630 if (vnp == NULL) {
15631 vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
15632 if (vnp != NULL) {
15633 mip = MolInfoNew ();
15634 vnp->data.ptrvalue = (Pointer) mip;
15635 if (mip != NULL) {
15636 mip->biomol = 8;
15637 mip->tech = 13;
15638 }
15639 }
15640 }
15641 if (vnp != NULL) {
15642 mip = (MolInfoPtr) vnp->data.ptrvalue;
15643 if (mip != NULL) {
15644 if (partial5 && partial3) {
15645 mip->completeness = 5;
15646 } else if (partial5) {
15647 mip->completeness = 3;
15648 } else if (partial3) {
15649 mip->completeness = 4;
15650 } else if (sfp->partial) {
15651 mip->completeness = 2;
15652 } else {
15653 mip->completeness = 0;
15654 }
15655 }
15656 }
15657 }
15658 }
15659
ResynchProteinPartials(SeqEntryPtr sep)15660 NLM_EXTERN void ResynchProteinPartials (SeqEntryPtr sep)
15661
15662 {
15663 VisitFeaturesInSep (sep, NULL, ResynchPeptidePartials);
15664 }
15665
15666 /* SeqIdStripLocus removes the SeqId.name field if accession is set */
15667
SeqIdStripLocus(SeqIdPtr sip)15668 NLM_EXTERN SeqIdPtr SeqIdStripLocus (SeqIdPtr sip)
15669
15670 {
15671 TextSeqIdPtr tip;
15672
15673 if (sip != NULL) {
15674 switch (sip->choice) {
15675 case SEQID_GENBANK :
15676 case SEQID_EMBL :
15677 case SEQID_DDBJ :
15678 case SEQID_OTHER :
15679 case SEQID_TPG:
15680 case SEQID_TPE:
15681 case SEQID_TPD:
15682 case SEQID_GPIPE:
15683 tip = (TextSeqIdPtr) sip->data.ptrvalue;
15684 if (tip != NULL) {
15685 if (! HasNoText (tip->accession)) {
15686 tip->name = MemFree (tip->name);
15687 }
15688 }
15689 break;
15690 default :
15691 break;
15692 }
15693 }
15694 return sip;
15695 }
15696
15697 //LCOV_EXCL_START
StripLocusFromSeqLoc(SeqLocPtr location)15698 NLM_EXTERN SeqLocPtr StripLocusFromSeqLoc (SeqLocPtr location)
15699
15700 {
15701 SeqLocPtr loc;
15702 SeqLocPtr next;
15703 PackSeqPntPtr psp;
15704 SeqBondPtr sbp;
15705 SeqIntPtr sinp;
15706 SeqIdPtr sip;
15707 SeqLocPtr slp;
15708 SeqPntPtr spp;
15709
15710 if (location == NULL) return NULL;
15711 slp = SeqLocFindNext (location, NULL);
15712 while (slp != NULL) {
15713 next = SeqLocFindNext (location, slp);
15714 switch (slp->choice) {
15715 case SEQLOC_EMPTY :
15716 case SEQLOC_WHOLE :
15717 sip = (SeqIdPtr) slp->data.ptrvalue;
15718 SeqIdStripLocus (sip);
15719 break;
15720 case SEQLOC_INT :
15721 sinp = (SeqIntPtr) slp->data.ptrvalue;
15722 if (sinp != NULL) {
15723 SeqIdStripLocus (sinp->id);
15724 }
15725 break;
15726 case SEQLOC_PACKED_INT :
15727 case SEQLOC_MIX :
15728 case SEQLOC_EQUIV :
15729 loc = (SeqLocPtr) slp->data.ptrvalue;
15730 while (loc != NULL) {
15731 sip = SeqLocId (loc);
15732 SeqIdStripLocus (sip);
15733 loc = loc->next;
15734 }
15735 break;
15736 case SEQLOC_BOND :
15737 sbp = (SeqBondPtr) slp->data.ptrvalue;
15738 if (sbp != NULL) {
15739 spp = sbp->a;
15740 if (spp != NULL) {
15741 SeqIdStripLocus (spp->id);
15742 }
15743 spp = sbp->b;
15744 if (spp != NULL) {
15745 SeqIdStripLocus (spp->id);
15746 }
15747 }
15748 break;
15749 case SEQLOC_PNT :
15750 spp = (SeqPntPtr) slp->data.ptrvalue;
15751 if (spp != NULL) {
15752 SeqIdStripLocus (spp->id);
15753 }
15754 break;
15755 case SEQLOC_PACKED_PNT :
15756 psp = (PackSeqPntPtr) slp->data.ptrvalue;
15757 if (psp != NULL) {
15758 SeqIdStripLocus (psp->id);
15759 }
15760 break;
15761 default :
15762 break;
15763 }
15764 slp = next;
15765 }
15766 return location;
15767 }
15768
GetRidOfLocusCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)15769 static void GetRidOfLocusCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
15770
15771 {
15772 BioseqPtr bsp;
15773 BioseqSetPtr bssp;
15774 SeqAnnotPtr sap;
15775 SeqFeatPtr sfp;
15776
15777 if (sep == NULL || sep->data.ptrvalue == NULL) return;
15778 sap = NULL;
15779 if (IS_Bioseq (sep)) {
15780 bsp = (BioseqPtr) sep->data.ptrvalue;
15781 sap = bsp->annot;
15782 } else if (IS_Bioseq_set (sep)) {
15783 bssp = (BioseqSetPtr) sep->data.ptrvalue;
15784 sap = bssp->annot;
15785 } else return;
15786 while (sap != NULL) {
15787 if (sap->type == 1 && sap->data != NULL) {
15788 sfp = (SeqFeatPtr) sap->data;
15789 while (sfp != NULL) {
15790 StripLocusFromSeqLoc (sfp->location);
15791 StripLocusFromSeqLoc (sfp->product);
15792 sfp = sfp->next;
15793 }
15794 }
15795 sap = sap->next;
15796 }
15797 }
15798
GetRidOfLocusInSeqIds(Uint2 entityID,SeqEntryPtr sep)15799 NLM_EXTERN void GetRidOfLocusInSeqIds (Uint2 entityID, SeqEntryPtr sep)
15800
15801 {
15802 if (entityID < 1 && sep == NULL) return;
15803 if (entityID > 0 && sep == NULL) {
15804 sep = GetTopSeqEntryForEntityID (entityID);
15805 }
15806 if (sep == NULL) return;
15807 SeqEntryExplore (sep, NULL, GetRidOfLocusCallback);
15808 }
15809 //LCOV_EXCL_STOP
15810
15811 /* Mac can now use static parse tables by using
15812 Make Strings Read-Only and Store Static Data in TOC
15813 #ifdef OS_MAC
15814 #define ASNLOAD_NEEDED 1
15815 #endif
15816 */
15817 #if defined(OS_DOS) || defined(WIN16)
15818 #define ASNLOAD_NEEDED 1
15819 #endif
15820
FileExists(CharPtr dirname,CharPtr subname,CharPtr filename)15821 static Boolean FileExists (CharPtr dirname, CharPtr subname, CharPtr filename)
15822
15823 {
15824 Char path [PATH_MAX];
15825
15826 StringNCpy_0 (path, dirname, sizeof (path));
15827 FileBuildPath (path, subname, NULL);
15828 FileBuildPath (path, NULL, filename);
15829 return (Boolean) (FileLength (path) > 0);
15830 }
15831
15832 /*
15833 static Boolean CheckAsnloadPath (CharPtr dirname, CharPtr subdir)
15834
15835 {
15836 #ifdef ASNLOAD_NEEDED
15837 Char fname [16];
15838 int i;
15839
15840 for (i = 60; i <= 99; ++i) {
15841 sprintf (fname, "asnmedli.l%02d", (int) i);
15842 if (FileExists (dirname, subdir, fname)) {
15843 return TRUE;
15844 }
15845 }
15846 return FALSE;
15847 #else
15848 return TRUE;
15849 #endif
15850 }
15851 */
15852
CheckDataPath(CharPtr dirname,CharPtr subdir)15853 static Boolean CheckDataPath (CharPtr dirname, CharPtr subdir)
15854
15855 {
15856 if (FileExists (dirname, subdir, "seqcode.val")) return TRUE;
15857 return (Boolean) (FileExists (dirname, subdir, "objprt.prt"));
15858 }
15859
CheckErrMsgPath(CharPtr dirname,CharPtr subdir)15860 static Boolean CheckErrMsgPath (CharPtr dirname, CharPtr subdir)
15861
15862 {
15863 return (Boolean) (FileExists (dirname, subdir, "valid.msg"));
15864 }
15865
15866 //LCOV_EXCL_START
SetTransientPath(CharPtr dirname,CharPtr subname,CharPtr file,CharPtr section,CharPtr type)15867 static void SetTransientPath (CharPtr dirname, CharPtr subname, CharPtr file,
15868 CharPtr section, CharPtr type)
15869
15870 {
15871 Char path [PATH_MAX];
15872
15873 StringNCpy_0 (path, dirname, sizeof (path));
15874 FileBuildPath (path, subname, NULL);
15875 TransientSetAppParam (file, section, type, path);
15876 }
15877
UseLocalAsnloadDataAndErrMsg(void)15878 NLM_EXTERN Boolean UseLocalAsnloadDataAndErrMsg (void)
15879
15880 {
15881 Boolean dataFound;
15882 Char path [PATH_MAX];
15883 Char appPath[PATH_MAX];
15884 CharPtr ptr;
15885
15886 ProgramPath (appPath, sizeof (appPath));
15887 StrCpy(path, appPath);
15888 /* data a sibling of our application? */
15889 ptr = StringRChr (path, DIRDELIMCHR);
15890 if (ptr != NULL) {
15891 ptr++;
15892 *ptr = '\0';
15893 }
15894 dataFound = CheckDataPath (path, "data");
15895 if (! (dataFound)) {
15896 /* data an uncle of our application? */
15897 if (ptr != NULL) {
15898 ptr--;
15899 *ptr = '\0';
15900 ptr = StringRChr (path, DIRDELIMCHR);
15901 if (ptr != NULL) {
15902 ptr++;
15903 *ptr = '\0';
15904 }
15905 dataFound = CheckDataPath (path, "data");
15906 }
15907 }
15908 #ifdef OS_UNIX_DARWIN
15909 if (! (dataFound) && IsApplicationPackage (appPath)) {
15910 /* is data inside our application within Contents/Resources? */
15911 StrCpy (path, appPath);
15912 FileBuildPath (path, "Contents", NULL);
15913 FileBuildPath (path, "Resources", NULL);
15914 dataFound = CheckDataPath (path, "data");
15915 if (! dataFound) {
15916 StrCpy (path, appPath);
15917 ptr = StringStr (path, "/ncbi/build/");
15918 if (ptr != NULL) {
15919 /* see if running under older Xcode 3 build environment */
15920 ptr [5] = '\0';
15921 dataFound = CheckDataPath (path, "data");
15922 }
15923 }
15924 if (! dataFound) {
15925 StrCpy (path, appPath);
15926 ptr = StringStr (path, "/ncbi/make/");
15927 if (ptr != NULL) {
15928 /* see if running under newer Xcode 3 build environment */
15929 ptr [5] = '\0';
15930 dataFound = CheckDataPath (path, "data");
15931 }
15932 }
15933 if (! dataFound) {
15934 StrCpy (path, appPath);
15935 ptr = StringStr (path, "/Library/Developer/");
15936 if (ptr != NULL) {
15937 /* see if running under Xcode 4 build environment */
15938 ptr [19] = '\0';
15939 dataFound = CheckDataPath (path, "data");
15940 }
15941 }
15942 }
15943 #endif
15944 if (dataFound) {
15945 SetTransientPath (path, "asnload", "NCBI", "NCBI", "ASNLOAD");
15946 SetTransientPath (path, "data", "NCBI", "NCBI", "DATA");
15947 if (CheckErrMsgPath (path, "errmsg")) {
15948 SetTransientPath (path, "errmsg", "NCBI", "ErrorProcessing", "MsgPath");
15949 TransientSetAppParam ("NCBI", "ErrorProcessing", "EO_BEEP", "No");
15950 }
15951 return TRUE;
15952 }
15953 return FALSE;
15954 }
15955
CreateWholeInterval(SeqEntryPtr sep)15956 NLM_EXTERN SeqLocPtr CreateWholeInterval (SeqEntryPtr sep)
15957
15958 {
15959 BioseqPtr bsp;
15960 SeqIntPtr sip;
15961 SeqLocPtr slp;
15962
15963 slp = NULL;
15964 if (sep != NULL && sep->choice == 1 && sep->data.ptrvalue != NULL) {
15965 bsp = (BioseqPtr) sep->data.ptrvalue;
15966 slp = ValNodeNew (NULL);
15967 if (slp != NULL) {
15968 sip = SeqIntNew ();
15969 if (sip != NULL) {
15970 slp->choice = SEQLOC_INT;
15971 slp->data.ptrvalue = (Pointer) sip;
15972 sip->from = 0;
15973 sip->to = bsp->length - 1;
15974 if (ISA_na (bsp->mol)) {
15975 sip->strand = Seq_strand_plus;
15976 }
15977 sip->id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
15978 }
15979 }
15980 }
15981 return slp;
15982 }
15983 //LCOV_EXCL_STOP
15984
15985
WholeIntervalFromSeqId(SeqIdPtr sip)15986 NLM_EXTERN SeqLocPtr WholeIntervalFromSeqId (SeqIdPtr sip)
15987
15988 {
15989 BioseqPtr bsp;
15990 SeqIntPtr sintp;
15991 SeqLocPtr slp;
15992
15993 if (sip == NULL) return NULL;
15994 bsp = BioseqFindCore (sip);
15995 if (bsp == NULL) return NULL;
15996 slp = ValNodeNew (NULL);
15997 if (slp == NULL) return NULL;
15998 sintp = SeqIntNew ();
15999 if (sintp == NULL) return NULL;
16000 slp->choice = SEQLOC_INT;
16001 slp->data.ptrvalue = (Pointer) sintp;
16002 sintp->from = 0;
16003 sintp->to = bsp->length - 1;
16004 if (ISA_na (bsp->mol)) {
16005 sintp->strand = Seq_strand_plus;
16006 }
16007 sintp->id = SeqIdStripLocus (SeqIdDup (sip));
16008 return slp;
16009 }
16010
16011 //LCOV_EXCL_START
FreeAllFuzz(SeqLocPtr location)16012 NLM_EXTERN void FreeAllFuzz (SeqLocPtr location)
16013
16014 {
16015 SeqIntPtr sip;
16016 SeqLocPtr slp;
16017
16018 if (location == NULL) return;
16019 slp = SeqLocFindNext (location, NULL);
16020 while (slp != NULL) {
16021 if (slp->choice == SEQLOC_INT) {
16022 sip = (SeqIntPtr) slp->data.ptrvalue;
16023 if (sip != NULL) {
16024 sip->if_to = IntFuzzFree (sip->if_to);
16025 sip->if_from = IntFuzzFree (sip->if_from);
16026 }
16027 }
16028 slp = SeqLocFindNext (location, slp);
16029 }
16030 }
16031 //LCOV_EXCL_STOP
16032
LocationHasNullsBetween(SeqLocPtr location)16033 NLM_EXTERN Boolean LocationHasNullsBetween (SeqLocPtr location)
16034
16035 {
16036 SeqLocPtr slp;
16037
16038 if (location == NULL) return FALSE;
16039 slp = SeqLocFindNext (location, NULL);
16040 while (slp != NULL) {
16041 if (slp->choice == SEQLOC_NULL) return TRUE;
16042 slp = SeqLocFindNext (location, slp);
16043 }
16044 return FALSE;
16045 }
16046
NormalizeNullsBetween(SeqLocPtr location)16047 NLM_EXTERN void NormalizeNullsBetween (SeqLocPtr location)
16048
16049 {
16050 SeqLocPtr next, tmp, vnp;
16051
16052 if (location == NULL) return;
16053 if (! LocationHasNullsBetween (location)) return;
16054
16055 if (location->choice != SEQLOC_MIX) return;
16056 vnp = (ValNodePtr) location->data.ptrvalue;
16057 if (vnp == NULL) return;
16058
16059 while (vnp != NULL && vnp->next != NULL) {
16060 next = vnp->next;
16061 if (vnp->choice != SEQLOC_NULL && next->choice != SEQLOC_NULL) {
16062 tmp = ValNodeNew (NULL);
16063 if (tmp != NULL) {
16064 tmp->choice = SEQLOC_NULL;
16065 tmp->next = vnp->next;
16066 vnp->next = tmp;
16067 }
16068 }
16069 vnp = next;
16070 }
16071 }
16072
FindFeatFromFeatDefType(Uint2 subtype)16073 NLM_EXTERN Uint1 FindFeatFromFeatDefType (Uint2 subtype)
16074
16075 {
16076 switch (subtype) {
16077 case FEATDEF_GENE :
16078 return SEQFEAT_GENE;
16079 case FEATDEF_ORG :
16080 return SEQFEAT_ORG;
16081 case FEATDEF_CDS :
16082 return SEQFEAT_CDREGION;
16083 case FEATDEF_PROT :
16084 return SEQFEAT_PROT;
16085 case FEATDEF_PUB :
16086 return SEQFEAT_PUB;
16087 case FEATDEF_SEQ :
16088 return SEQFEAT_SEQ;
16089 case FEATDEF_REGION :
16090 return SEQFEAT_REGION;
16091 case FEATDEF_COMMENT :
16092 return SEQFEAT_COMMENT;
16093 case FEATDEF_BOND :
16094 return SEQFEAT_BOND;
16095 case FEATDEF_SITE :
16096 return SEQFEAT_SITE;
16097 case FEATDEF_RSITE :
16098 return SEQFEAT_RSITE;
16099 case FEATDEF_USER :
16100 return SEQFEAT_USER;
16101 case FEATDEF_TXINIT :
16102 return SEQFEAT_TXINIT;
16103 case FEATDEF_NUM :
16104 return SEQFEAT_NUM;
16105 case FEATDEF_PSEC_STR :
16106 return SEQFEAT_PSEC_STR;
16107 case FEATDEF_NON_STD_RESIDUE :
16108 return SEQFEAT_NON_STD_RESIDUE;
16109 case FEATDEF_HET :
16110 return SEQFEAT_HET;
16111 case FEATDEF_BIOSRC :
16112 return SEQFEAT_BIOSRC;
16113 default :
16114 if (subtype >= FEATDEF_preRNA && subtype <= FEATDEF_otherRNA) {
16115 return SEQFEAT_RNA;
16116 }
16117 if (subtype == FEATDEF_snoRNA) {
16118 return SEQFEAT_RNA;
16119 }
16120 if (subtype >= FEATDEF_ncRNA && subtype <= FEATDEF_tmRNA) {
16121 return SEQFEAT_RNA;
16122 }
16123 if (subtype >= FEATDEF_preprotein && subtype <= FEATDEF_transit_peptide_aa) {
16124 return SEQFEAT_PROT;
16125 }
16126 if (subtype >= FEATDEF_IMP && subtype <= FEATDEF_site_ref) {
16127 return SEQFEAT_IMP;
16128 }
16129 if (subtype >= FEATDEF_gap && subtype <= FEATDEF_oriT) {
16130 return SEQFEAT_IMP;
16131 }
16132 if (subtype >= FEATDEF_mobile_element && subtype <= FEATDEF_propeptide) {
16133 return SEQFEAT_IMP;
16134 }
16135 if (subtype == FEATDEF_propeptide_aa) {
16136 return SEQFEAT_PROT;
16137 }
16138 }
16139 return 0;
16140 }
16141
16142 //LCOV_EXCL_START
MakeSeqID(CharPtr str)16143 NLM_EXTERN SeqIdPtr MakeSeqID(CharPtr str)
16144
16145 {
16146 CharPtr buf;
16147 Int4 len;
16148 SeqIdPtr sip;
16149
16150 sip = NULL;
16151 if (str != NULL && *str != '\0') {
16152 if (StringChr (str, '|') != NULL) {
16153 sip = SeqIdParse (str);
16154 } else {
16155 len = StringLen (str) + 5;
16156 buf = (CharPtr) MemNew (sizeof (Char) * len);
16157 sprintf (buf, "lcl|%s", str);
16158 sip = SeqIdParse (buf);
16159 buf = MemFree (buf);
16160 }
16161 }
16162 return sip;
16163 }
16164
MakeUniqueSeqID(CharPtr prefix)16165 NLM_EXTERN SeqIdPtr MakeUniqueSeqID (CharPtr prefix)
16166
16167 {
16168 Char buf[60];
16169 CharPtr tmp;
16170 Int2 ctr;
16171 ValNodePtr newid;
16172 ObjectIdPtr oid;
16173 ValNode vn;
16174 TextSeqId tsi;
16175 ValNodePtr altid;
16176 size_t len;
16177
16178 altid = &vn;
16179 vn.choice = SEQID_GENBANK;
16180 vn.next = NULL;
16181 vn.data.ptrvalue = &tsi;
16182 tsi.name = NULL;
16183 tsi.accession = NULL;
16184 tsi.release = NULL;
16185 tsi.version = INT2_MIN;
16186
16187 len = StringLen (prefix);
16188 if (len > 0 && len < 52) {
16189 tmp = StringMove(buf, prefix);
16190 } else {
16191 tmp = StringMove(buf, "tmpseq_");
16192 }
16193
16194 newid = ValNodeNew(NULL);
16195 oid = ObjectIdNew();
16196 oid->str = buf; /* allocate this later */
16197 newid->choice = SEQID_LOCAL;
16198 newid->data.ptrvalue = oid;
16199
16200 tsi.name = buf; /* check for alternative form */
16201
16202 for (ctr = 1; ctr < 32000; ctr++)
16203 {
16204 sprintf(tmp, "%d", (int)ctr);
16205 if ((BioseqFindCore(newid) == NULL) && (BioseqFindCore(altid) == NULL))
16206 {
16207 oid->str = StringSave(buf);
16208 return newid;
16209 }
16210 }
16211
16212 return NULL;
16213 }
16214
SeqIdFindWorst(SeqIdPtr sip)16215 NLM_EXTERN SeqIdPtr SeqIdFindWorst (SeqIdPtr sip)
16216
16217 {
16218 Uint1 order [NUM_SEQID];
16219
16220 SeqIdBestRank (order, NUM_SEQID);
16221 order [SEQID_LOCAL] = 10;
16222 order [SEQID_GENBANK] = 5;
16223 order [SEQID_EMBL] = 5;
16224 order [SEQID_PIR] = 5;
16225 order [SEQID_SWISSPROT] = 5;
16226 order [SEQID_DDBJ] = 5;
16227 order [SEQID_PRF] = 5;
16228 order [SEQID_PDB] = 5;
16229 order [SEQID_TPG] = 5;
16230 order [SEQID_TPE] = 5;
16231 order [SEQID_TPD] = 5;
16232 order [SEQID_GPIPE] = 9;
16233 order [SEQID_NAMED_ANNOT_TRACK] = 9;
16234 order [SEQID_PATENT] = 10;
16235 order [SEQID_OTHER] = 8;
16236 order [SEQID_GENERAL] = 15;
16237 order [SEQID_GIBBSQ] = 15;
16238 order [SEQID_GIBBMT] = 15;
16239 order [SEQID_GIIM] = 20;
16240 order [SEQID_GI] = 20;
16241 return SeqIdSelect (sip, order, NUM_SEQID);
16242 }
16243
CreateNewFeature(SeqEntryPtr sep,SeqEntryPtr placeHere,Uint1 choice,SeqFeatPtr useThis)16244 NLM_EXTERN SeqFeatPtr CreateNewFeature (SeqEntryPtr sep, SeqEntryPtr placeHere,
16245 Uint1 choice, SeqFeatPtr useThis)
16246
16247 {
16248 BioseqPtr bsp;
16249 BioseqSetPtr bssp;
16250 SeqFeatPtr prev;
16251 SeqAnnotPtr sap;
16252 SeqFeatPtr sfp;
16253
16254 if (sep == NULL || sep->choice != 1) return NULL;
16255 sfp = NULL;
16256 bsp = NULL;
16257 bssp = NULL;
16258 if (placeHere == NULL) {
16259 placeHere = sep;
16260 }
16261 if (placeHere != NULL && placeHere->data.ptrvalue != NULL) {
16262 if (placeHere->choice == 1) {
16263 bsp = (BioseqPtr) placeHere->data.ptrvalue;
16264 sap = bsp->annot;
16265 while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
16266 sap = sap->next;
16267 }
16268 if (sap == NULL) {
16269 sap = SeqAnnotNew ();
16270 if (sap != NULL) {
16271 sap->type = 1;
16272 sap->next = bsp->annot;
16273 bsp->annot = sap;
16274 }
16275 sap = bsp->annot;
16276 }
16277 } else if (placeHere->choice == 2) {
16278 bssp = (BioseqSetPtr) placeHere->data.ptrvalue;
16279 sap = bssp->annot;
16280 while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
16281 sap = sap->next;
16282 }
16283 if (sap == NULL) {
16284 sap = SeqAnnotNew ();
16285 if (sap != NULL) {
16286 sap->type = 1;
16287 sap->next = bssp->annot;
16288 bssp->annot = sap;
16289 }
16290 sap = bssp->annot;
16291 }
16292 } else {
16293 return NULL;
16294 }
16295 if (sap != NULL) {
16296 bsp = (BioseqPtr) sep->data.ptrvalue;
16297 if (useThis != NULL) {
16298 sfp = useThis;
16299 } else {
16300 sfp = SeqFeatNew ();
16301 }
16302 if (sap->data != NULL) {
16303 prev = sap->data;
16304 while (prev->next != NULL) {
16305 prev = prev->next;
16306 }
16307 prev->next = sfp;
16308 } else {
16309 sap->data = (Pointer) sfp;
16310 }
16311 if (sfp != NULL) {
16312 sfp->data.choice = choice;
16313 if (useThis == NULL) {
16314 sfp->location = CreateWholeInterval (sep);
16315 }
16316 }
16317 }
16318 }
16319 return sfp;
16320 }
16321
CreateNewFeatureOnBioseq(BioseqPtr bsp,Uint1 choice,SeqLocPtr slp)16322 NLM_EXTERN SeqFeatPtr CreateNewFeatureOnBioseq (BioseqPtr bsp, Uint1 choice, SeqLocPtr slp)
16323
16324 {
16325 SeqEntryPtr sep;
16326 SeqFeatPtr sfp;
16327
16328 if (bsp == NULL) return NULL;
16329 sep = SeqMgrGetSeqEntryForData (bsp);
16330 if (sep == NULL) return NULL;
16331 sfp = CreateNewFeature (sep, NULL, choice, NULL);
16332 if (sfp == NULL) return NULL;
16333 if (slp != NULL) {
16334 sfp->location = SeqLocFree (sfp->location);
16335 sfp->location = AsnIoMemCopy (slp, (AsnReadFunc) SeqLocAsnRead,
16336 (AsnWriteFunc) SeqLocAsnWrite);
16337 }
16338 return sfp;
16339 }
16340
CreateNewDescriptor(SeqEntryPtr sep,Uint1 choice)16341 NLM_EXTERN ValNodePtr CreateNewDescriptor (SeqEntryPtr sep, Uint1 choice)
16342
16343 {
16344 BioseqPtr bsp;
16345 BioseqSetPtr bssp;
16346 Uint1 _class;
16347 ValNodePtr descr;
16348 SeqEntryPtr seqentry;
16349 ValNodePtr vnp;
16350
16351 vnp = NULL;
16352 if (sep != NULL) {
16353 descr = NULL;
16354 vnp = NULL;
16355 bsp = NULL;
16356 bssp = NULL;
16357 seqentry = sep;
16358 while (seqentry != NULL) {
16359 if (seqentry->choice == 1) {
16360 bsp = (BioseqPtr) seqentry->data.ptrvalue;
16361 if (bsp != NULL) {
16362 descr = bsp->descr;
16363 vnp = SeqDescrNew (descr);
16364 if (descr == NULL) {
16365 bsp->descr = vnp;
16366 }
16367 }
16368 seqentry = NULL;
16369 } else if (seqentry->choice == 2) {
16370 bssp = (BioseqSetPtr) seqentry->data.ptrvalue;
16371 if (bssp != NULL) {
16372 _class = bssp->_class;
16373 if (_class == 7) {
16374 descr = bssp->descr;
16375 vnp = SeqDescrNew (descr);
16376 if (descr == NULL) {
16377 bssp->descr = vnp;
16378 }
16379 seqentry = NULL;
16380 } else if ((_class >= 5 && _class <= 8) || _class == 11 /* || _class == BioseqseqSet_class_gen_prod_set */) {
16381 seqentry = bssp->seq_set;
16382 } else {
16383 descr = bssp->descr;
16384 vnp = SeqDescrNew (descr);
16385 if (descr == NULL) {
16386 bssp->descr = vnp;
16387 }
16388 seqentry = NULL;
16389 }
16390 } else {
16391 seqentry = NULL;
16392 }
16393 } else {
16394 seqentry = NULL;
16395 }
16396 }
16397 if (vnp != NULL) {
16398 vnp->choice = choice;
16399 }
16400 }
16401 return vnp;
16402 }
16403
16404
CreateNewDescriptorOnBioseq(BioseqPtr bsp,Uint1 choice)16405 NLM_EXTERN ValNodePtr CreateNewDescriptorOnBioseq (BioseqPtr bsp, Uint1 choice)
16406
16407 {
16408 SeqEntryPtr sep;
16409
16410 if (bsp == NULL) return NULL;
16411 sep = SeqMgrGetSeqEntryForData (bsp);
16412 if (sep == NULL) return NULL;
16413 return CreateNewDescriptor (sep, choice);
16414 }
16415
16416
16417 /* common functions to scan binary ASN.1 file of entire release as Bioseq-set */
16418
VisitSeqIdList(SeqIdPtr sip,Pointer userdata,VisitSeqIdFunc callback)16419 static Int4 VisitSeqIdList (SeqIdPtr sip, Pointer userdata, VisitSeqIdFunc callback)
16420
16421 {
16422 Int4 index = 0;
16423
16424 while (sip != NULL) {
16425 if (callback != NULL) {
16426 callback (sip, userdata);
16427 }
16428 index++;
16429 sip = sip->next;
16430 }
16431 return index;
16432 }
16433
VisitSeqIdsInSeqLoc(SeqLocPtr slp,Pointer userdata,VisitSeqIdFunc callback)16434 NLM_EXTERN Int4 VisitSeqIdsInSeqLoc (SeqLocPtr slp, Pointer userdata, VisitSeqIdFunc callback)
16435
16436 {
16437 Int4 index = 0;
16438 SeqLocPtr loc;
16439 PackSeqPntPtr psp;
16440 SeqBondPtr sbp;
16441 SeqIntPtr sinp;
16442 SeqIdPtr sip;
16443 SeqPntPtr spp;
16444
16445 if (slp == NULL) return index;
16446
16447 while (slp != NULL) {
16448 switch (slp->choice) {
16449 case SEQLOC_NULL :
16450 break;
16451 case SEQLOC_EMPTY :
16452 case SEQLOC_WHOLE :
16453 sip = (SeqIdPtr) slp->data.ptrvalue;
16454 index += VisitSeqIdList (sip, userdata, callback);
16455 break;
16456 case SEQLOC_INT :
16457 sinp = (SeqIntPtr) slp->data.ptrvalue;
16458 if (sinp != NULL) {
16459 sip = sinp->id;
16460 index += VisitSeqIdList (sip, userdata, callback);
16461 }
16462 break;
16463 case SEQLOC_PNT :
16464 spp = (SeqPntPtr) slp->data.ptrvalue;
16465 if (spp != NULL) {
16466 sip = spp->id;
16467 index += VisitSeqIdList (sip, userdata, callback);
16468 }
16469 break;
16470 case SEQLOC_PACKED_PNT :
16471 psp = (PackSeqPntPtr) slp->data.ptrvalue;
16472 if (psp != NULL) {
16473 sip = psp->id;
16474 index += VisitSeqIdList (sip, userdata, callback);
16475 }
16476 break;
16477 case SEQLOC_PACKED_INT :
16478 case SEQLOC_MIX :
16479 case SEQLOC_EQUIV :
16480 loc = (SeqLocPtr) slp->data.ptrvalue;
16481 while (loc != NULL) {
16482 index += VisitSeqIdsInSeqLoc (loc, userdata, callback);
16483 loc = loc->next;
16484 }
16485 break;
16486 case SEQLOC_BOND :
16487 sbp = (SeqBondPtr) slp->data.ptrvalue;
16488 if (sbp != NULL) {
16489 spp = (SeqPntPtr) sbp->a;
16490 if (spp != NULL) {
16491 sip = spp->id;
16492 index += VisitSeqIdList (sip, userdata, callback);
16493 }
16494 spp = (SeqPntPtr) sbp->b;
16495 if (spp != NULL) {
16496 sip = spp->id;
16497 index += VisitSeqIdList (sip, userdata, callback);
16498 }
16499 }
16500 break;
16501 case SEQLOC_FEAT :
16502 break;
16503 default :
16504 break;
16505 }
16506 slp = slp->next;
16507 }
16508
16509 return index;
16510 }
16511
VisitSeqIdsInBioseq(BioseqPtr bsp,Pointer userdata,VisitSeqIdFunc callback)16512 NLM_EXTERN Int4 VisitSeqIdsInBioseq (BioseqPtr bsp, Pointer userdata, VisitSeqIdFunc callback)
16513
16514 {
16515 Int4 index = 0;
16516
16517 if (bsp == NULL) return index;
16518
16519 if (bsp->id != NULL) {
16520 index += VisitSeqIdList (bsp->id, userdata, callback);
16521 }
16522
16523 return index;
16524 }
16525
VisitSeqIdsInSeqFeat(SeqFeatPtr sfp,Pointer userdata,VisitSeqIdFunc callback)16526 NLM_EXTERN Int4 VisitSeqIdsInSeqFeat (SeqFeatPtr sfp, Pointer userdata, VisitSeqIdFunc callback)
16527
16528 {
16529 CodeBreakPtr cbp;
16530 CdRegionPtr crp;
16531 Int4 index = 0;
16532 RnaRefPtr rrp;
16533 tRNAPtr trp;
16534
16535 if (sfp == NULL) return index;
16536
16537 index += VisitSeqIdsInSeqLoc (sfp->location, userdata, callback);
16538 index += VisitSeqIdsInSeqLoc (sfp->product, userdata, callback);
16539
16540 switch (sfp->data.choice) {
16541 case SEQFEAT_CDREGION :
16542 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
16543 if (crp != NULL) {
16544 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
16545 index += VisitSeqIdsInSeqLoc (cbp->loc, userdata, callback);
16546 }
16547 }
16548 break;
16549 case SEQFEAT_RNA :
16550 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
16551 if (rrp != NULL && rrp->ext.choice == 2) {
16552 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
16553 if (trp != NULL && trp->anticodon != NULL) {
16554 index += VisitSeqIdsInSeqLoc (trp->anticodon, userdata, callback);
16555 }
16556 }
16557 break;
16558 default :
16559 break;
16560 }
16561
16562 return index;
16563 }
16564
VisitSeqIdsInSeqAlign(SeqAlignPtr sap,Pointer userdata,VisitSeqIdFunc callback)16565 NLM_EXTERN Int4 VisitSeqIdsInSeqAlign (SeqAlignPtr sap, Pointer userdata, VisitSeqIdFunc callback)
16566
16567 {
16568 DenseDiagPtr ddp;
16569 DenseSegPtr dsp;
16570 Int4 index = 0;
16571 SeqIdPtr sip;
16572 SeqLocPtr slp = NULL;
16573 StdSegPtr ssp;
16574
16575 if (sap == NULL) return index;
16576
16577 if (sap->bounds != NULL) {
16578 sip = SeqLocId (sap->bounds);
16579 index += VisitSeqIdList (sip, userdata, callback);
16580 }
16581
16582 if (sap->segs == NULL) return index;
16583
16584 switch (sap->segtype) {
16585 case SAS_DENDIAG :
16586 ddp = (DenseDiagPtr) sap->segs;
16587 if (ddp != NULL) {
16588 for (sip = ddp->id; sip != NULL; sip = sip->next) {
16589 index += VisitSeqIdList (sip, userdata, callback);
16590 }
16591 }
16592 break;
16593 case SAS_DENSEG :
16594 dsp = (DenseSegPtr) sap->segs;
16595 if (dsp != NULL) {
16596 for (sip = dsp->ids; sip != NULL; sip = sip->next) {
16597 index += VisitSeqIdList (sip, userdata, callback);
16598 }
16599 }
16600 break;
16601 case SAS_STD :
16602 ssp = (StdSegPtr) sap->segs;
16603 for (slp = ssp->loc; slp != NULL; slp = slp->next) {
16604 sip = SeqLocId (slp);
16605 index += VisitSeqIdList (sip, userdata, callback);
16606 }
16607 break;
16608 case SAS_DISC :
16609 /* recursive */
16610 for (sap = (SeqAlignPtr) sap->segs; sap != NULL; sap = sap->next) {
16611 index += VisitSeqIdsInSeqAlign (sap, userdata, callback);
16612 }
16613 break;
16614 default :
16615 break;
16616 }
16617
16618 return index;
16619 }
16620
VisitSeqIdsInSeqGraph(SeqGraphPtr sgp,Pointer userdata,VisitSeqIdFunc callback)16621 NLM_EXTERN Int4 VisitSeqIdsInSeqGraph (SeqGraphPtr sgp, Pointer userdata, VisitSeqIdFunc callback)
16622
16623 {
16624 Int4 index = 0;
16625 SeqIdPtr sip;
16626
16627 if (sgp == NULL) return index;
16628
16629 if (sgp->loc != NULL) {
16630 sip = SeqLocId (sgp->loc);
16631 index += VisitSeqIdList (sip, userdata, callback);
16632 }
16633
16634 return index;
16635 }
16636
VisitSeqIdsInSeqAnnot(SeqAnnotPtr annot,Pointer userdata,VisitSeqIdFunc callback)16637 NLM_EXTERN Int4 VisitSeqIdsInSeqAnnot (SeqAnnotPtr annot, Pointer userdata, VisitSeqIdFunc callback)
16638
16639 {
16640 Int4 index = 0;
16641 SeqAlignPtr sap;
16642 SeqFeatPtr sfp;
16643 SeqGraphPtr sgp;
16644
16645 if (annot == NULL || annot->data == NULL) return index;
16646
16647 switch (annot->type) {
16648
16649 case 1 :
16650 for (sfp = (SeqFeatPtr) annot->data; sfp != NULL; sfp = sfp->next) {
16651 index += VisitSeqIdsInSeqFeat (sfp, userdata, callback);
16652 }
16653 break;
16654
16655 case 2 :
16656 for (sap = (SeqAlignPtr) annot->data; sap != NULL; sap = sap->next) {
16657 index += VisitSeqIdsInSeqAlign (sap, userdata, callback);
16658 }
16659 break;
16660
16661 case 3 :
16662 for (sgp = (SeqGraphPtr) annot->data; sgp != NULL; sgp = sgp->next) {
16663 index += VisitSeqIdsInSeqGraph (sgp, userdata, callback);
16664 }
16665 break;
16666
16667 default :
16668 break;
16669 }
16670
16671 return index;
16672 }
16673
VisitUserFieldsInUfp(UserFieldPtr ufp,Pointer userdata,VisitUserFieldsFunc callback)16674 NLM_EXTERN Int4 VisitUserFieldsInUfp (UserFieldPtr ufp, Pointer userdata, VisitUserFieldsFunc callback)
16675
16676 {
16677 UserFieldPtr curr;
16678 Int4 index = 0;
16679 Boolean nested = FALSE;
16680
16681 if (ufp == NULL) return index;
16682 if (ufp->choice == 11) {
16683 for (curr = (UserFieldPtr) ufp->data.ptrvalue; curr != NULL; curr = curr->next) {
16684 index += VisitUserFieldsInUfp (curr, userdata,callback);
16685 nested = TRUE;
16686 }
16687 }
16688 if (! nested) {
16689 if (callback != NULL) {
16690 callback (ufp, userdata);
16691 }
16692 index++;
16693 }
16694 return index;
16695 }
16696
VisitUserFieldsInUop(UserObjectPtr uop,Pointer userdata,VisitUserFieldsFunc callback)16697 NLM_EXTERN Int4 VisitUserFieldsInUop (UserObjectPtr uop, Pointer userdata, VisitUserFieldsFunc callback)
16698
16699 {
16700 Int4 index = 0;
16701 UserFieldPtr ufp;
16702
16703 if (uop == NULL) return index;
16704 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
16705 if (callback != NULL) {
16706 callback (ufp, userdata);
16707 }
16708 index++;
16709 }
16710 return index;
16711 }
16712
16713 /* Visits only unnested nodes */
VisitUserObjectsInUop(UserObjectPtr uop,Pointer userdata,VisitUserObjectFunc callback)16714 NLM_EXTERN Int4 VisitUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback)
16715
16716 {
16717 Int4 index = 0;
16718 Boolean nested = FALSE;
16719 UserObjectPtr obj;
16720 UserFieldPtr ufp;
16721
16722 if (uop == NULL) return index;
16723 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
16724 if (ufp->choice == 6) {
16725 obj = (UserObjectPtr) ufp->data.ptrvalue;
16726 index += VisitUserObjectsInUop (obj, userdata, callback);
16727 nested = TRUE;
16728 } else if (ufp->choice == 12) {
16729 for (obj = (UserObjectPtr) ufp->data.ptrvalue; obj != NULL; obj = obj->next) {
16730 index += VisitUserObjectsInUop (obj, userdata, callback);
16731 }
16732 nested = TRUE;
16733 }
16734 }
16735 if (! nested) {
16736 if (callback != NULL) {
16737 callback (uop, userdata);
16738 }
16739 index++;
16740 }
16741 return index;
16742 }
16743
VisitAllUserObjectsInUop(UserObjectPtr uop,Pointer userdata,VisitUserObjectFunc callback)16744 NLM_EXTERN Int4 VisitAllUserObjectsInUop (UserObjectPtr uop, Pointer userdata, VisitUserObjectFunc callback)
16745
16746 {
16747 Int4 index = 0;
16748 UserObjectPtr obj;
16749 UserFieldPtr ufp;
16750
16751 if (uop == NULL) return index;
16752 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
16753 if (ufp->choice == 6) {
16754 obj = (UserObjectPtr) ufp->data.ptrvalue;
16755 index += VisitAllUserObjectsInUop (obj, userdata, callback);
16756 } else if (ufp->choice == 12) {
16757 for (obj = (UserObjectPtr) ufp->data.ptrvalue; obj != NULL; obj = obj->next) {
16758 index += VisitAllUserObjectsInUop (obj, userdata, callback);
16759 }
16760 }
16761 }
16762 if (callback != NULL) {
16763 callback (uop, userdata);
16764 }
16765 index++;
16766 return index;
16767 }
16768 //LCOV_EXCL_STOP
16769
16770 typedef struct uopdata {
16771 UserObjectPtr rsult;
16772 CharPtr tag;
16773 } UopData, PNTR UopDataPtr;
16774
FindUopProc(UserObjectPtr uop,Pointer userdata)16775 static void FindUopProc (
16776 UserObjectPtr uop,
16777 Pointer userdata
16778 )
16779
16780 {
16781 ObjectIdPtr oip;
16782 UopDataPtr udp;
16783
16784 if (uop == NULL || userdata == NULL) return;
16785 oip = uop->type;
16786 if (oip == NULL) return;
16787 udp = (UopDataPtr) userdata;
16788 if (StringICmp (oip->str, udp->tag) != 0) return;
16789 udp->rsult = uop;
16790 }
16791
FindUopByTag(UserObjectPtr top,CharPtr tag)16792 NLM_EXTERN UserObjectPtr FindUopByTag (UserObjectPtr top, CharPtr tag)
16793
16794 {
16795 UopData ud;
16796
16797 if (top == NULL || StringHasNoText (tag)) return NULL;
16798 ud.rsult = NULL;
16799 ud.tag = tag;
16800 VisitUserObjectsInUop (top, (Pointer) &ud, FindUopProc);
16801 return ud.rsult;
16802 }
16803
16804 //LCOV_EXCL_START
CombineUserObjects(UserObjectPtr origuop,UserObjectPtr newuop)16805 NLM_EXTERN UserObjectPtr CombineUserObjects (UserObjectPtr origuop, UserObjectPtr newuop)
16806
16807 {
16808 UserFieldPtr prev = NULL;
16809 ObjectIdPtr oip;
16810 UserFieldPtr ufp;
16811 UserObjectPtr uop;
16812
16813 if (newuop == NULL) return origuop;
16814 if (origuop == NULL) return newuop;
16815
16816 /* adding to an object that already chaperones at least two user objects */
16817
16818 oip = origuop->type;
16819 if (oip != NULL && StringICmp (oip->str, "CombinedFeatureUserObjects") == 0) {
16820
16821 for (ufp = origuop->data; ufp != NULL; ufp = ufp->next) {
16822 prev = ufp;
16823 }
16824
16825 ufp = UserFieldNew ();
16826 oip = ObjectIdNew ();
16827 oip->id = 0;
16828 ufp->label = oip;
16829 ufp->choice = 6; /* user object */
16830 ufp->data.ptrvalue = (Pointer) newuop;
16831
16832 /* link new set at end of list */
16833
16834 if (prev != NULL) {
16835 prev->next = ufp;
16836 } else {
16837 origuop->data = ufp;
16838 }
16839 return origuop;
16840 }
16841
16842 /* creating a new chaperone, link in first two user objects */
16843
16844 uop = UserObjectNew ();
16845 oip = ObjectIdNew ();
16846 oip->str = StringSave ("CombinedFeatureUserObjects");
16847 uop->type = oip;
16848
16849 ufp = UserFieldNew ();
16850 oip = ObjectIdNew ();
16851 oip->id = 0;
16852 ufp->label = oip;
16853 ufp->choice = 6; /* user object */
16854 ufp->data.ptrvalue = (Pointer) origuop;
16855 uop->data = ufp;
16856 prev = ufp;
16857
16858 ufp = UserFieldNew ();
16859 oip = ObjectIdNew ();
16860 oip->id = 0;
16861 ufp->label = oip;
16862 ufp->choice = 6; /* user object */
16863 ufp->data.ptrvalue = (Pointer) newuop;
16864 prev->next = ufp;
16865
16866 return uop;
16867 }
16868
16869
VisitDescriptorsProc(SeqDescrPtr descr,Pointer userdata,VisitDescriptorsFunc callback)16870 static Int4 VisitDescriptorsProc (SeqDescrPtr descr, Pointer userdata, VisitDescriptorsFunc callback)
16871
16872 {
16873 Int4 index = 0;
16874 SeqDescrPtr sdp;
16875
16876 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
16877 if (callback != NULL) {
16878 callback (sdp, userdata);
16879 }
16880 index++;
16881 }
16882 return index;
16883 }
16884
VisitDescriptorsOnBsp(BioseqPtr bsp,Pointer userdata,VisitDescriptorsFunc callback)16885 NLM_EXTERN Int4 VisitDescriptorsOnBsp (BioseqPtr bsp, Pointer userdata, VisitDescriptorsFunc callback)
16886
16887 {
16888 Int4 index = 0;
16889
16890 if (bsp == NULL) return index;
16891 index += VisitDescriptorsProc (bsp->descr, userdata, callback);
16892 return index;
16893 }
16894
VisitDescriptorsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitDescriptorsFunc callback)16895 NLM_EXTERN Int4 VisitDescriptorsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitDescriptorsFunc callback)
16896
16897 {
16898 Int4 index = 0;
16899
16900 if (bssp == NULL) return index;
16901 index += VisitDescriptorsProc (bssp->descr, userdata, callback);
16902 return index;
16903 }
16904
VisitDescriptorsInSet(BioseqSetPtr bssp,Pointer userdata,VisitDescriptorsFunc callback)16905 NLM_EXTERN Int4 VisitDescriptorsInSet (BioseqSetPtr bssp, Pointer userdata, VisitDescriptorsFunc callback)
16906
16907 {
16908 Int4 index = 0;
16909 SeqEntryPtr tmp;
16910
16911 if (bssp == NULL) return index;
16912 index += VisitDescriptorsProc (bssp->descr, userdata, callback);
16913 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
16914 index += VisitDescriptorsInSep (tmp, userdata, callback);
16915 }
16916 return index;
16917 }
16918
VisitDescriptorsOnSep(SeqEntryPtr sep,Pointer userdata,VisitDescriptorsFunc callback)16919 NLM_EXTERN Int4 VisitDescriptorsOnSep (SeqEntryPtr sep, Pointer userdata, VisitDescriptorsFunc callback)
16920
16921 {
16922 BioseqPtr bsp;
16923 BioseqSetPtr bssp;
16924 Int4 index = 0;
16925
16926 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
16927 if (IS_Bioseq (sep)) {
16928 bsp = (BioseqPtr) sep->data.ptrvalue;
16929 index += VisitDescriptorsOnBsp (bsp, userdata, callback);
16930 } else if (IS_Bioseq_set (sep)) {
16931 bssp = (BioseqSetPtr) sep->data.ptrvalue;
16932 index += VisitDescriptorsOnSet (bssp, userdata, callback);
16933 }
16934 return index;
16935 }
16936
VisitDescriptorsInSep(SeqEntryPtr sep,Pointer userdata,VisitDescriptorsFunc callback)16937 NLM_EXTERN Int4 VisitDescriptorsInSep (SeqEntryPtr sep, Pointer userdata, VisitDescriptorsFunc callback)
16938
16939 {
16940 BioseqPtr bsp;
16941 BioseqSetPtr bssp;
16942 Int4 index = 0;
16943
16944 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
16945 if (IS_Bioseq (sep)) {
16946 bsp = (BioseqPtr) sep->data.ptrvalue;
16947 index += VisitDescriptorsOnBsp (bsp, userdata, callback);
16948 } else if (IS_Bioseq_set (sep)) {
16949 bssp = (BioseqSetPtr) sep->data.ptrvalue;
16950 index += VisitDescriptorsInSet (bssp, userdata, callback);
16951 }
16952 return index;
16953 }
16954
16955
VisitFeaturesProc(SeqAnnotPtr annot,Pointer userdata,VisitFeaturesFunc callback)16956 static Int4 VisitFeaturesProc (SeqAnnotPtr annot, Pointer userdata, VisitFeaturesFunc callback)
16957
16958 {
16959 Int4 index = 0;
16960 SeqAnnotPtr sap;
16961 SeqFeatPtr sfp;
16962
16963 for (sap = annot; sap != NULL; sap = sap->next) {
16964 if (sap->type != 1) continue;
16965 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
16966 if (callback != NULL) {
16967 callback (sfp, userdata);
16968 }
16969 index++;
16970 }
16971 }
16972 return index;
16973 }
16974
VisitFeaturesOnSap(SeqAnnotPtr sap,Pointer userdata,VisitFeaturesFunc callback)16975 NLM_EXTERN Int4 VisitFeaturesOnSap (SeqAnnotPtr sap, Pointer userdata, VisitFeaturesFunc callback)
16976
16977 {
16978 Int4 index = 0;
16979 SeqFeatPtr sfp;
16980
16981 if (sap == NULL) return index;
16982 if (sap->type != 1) return index;
16983 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
16984 if (callback != NULL) {
16985 callback (sfp, userdata);
16986 }
16987 index++;
16988 }
16989 return index;
16990 }
16991
VisitFeaturesOnBsp(BioseqPtr bsp,Pointer userdata,VisitFeaturesFunc callback)16992 NLM_EXTERN Int4 VisitFeaturesOnBsp (BioseqPtr bsp, Pointer userdata, VisitFeaturesFunc callback)
16993
16994 {
16995 Int4 index = 0;
16996
16997 if (bsp == NULL) return index;
16998 index += VisitFeaturesProc (bsp->annot, userdata, callback);
16999 return index;
17000 }
17001
VisitFeaturesOnSet(BioseqSetPtr bssp,Pointer userdata,VisitFeaturesFunc callback)17002 NLM_EXTERN Int4 VisitFeaturesOnSet (BioseqSetPtr bssp, Pointer userdata, VisitFeaturesFunc callback)
17003
17004 {
17005 Int4 index = 0;
17006
17007 if (bssp == NULL) return index;
17008 index += VisitFeaturesProc (bssp->annot, userdata, callback);
17009 return index;
17010 }
17011
VisitFeaturesInSet(BioseqSetPtr bssp,Pointer userdata,VisitFeaturesFunc callback)17012 NLM_EXTERN Int4 VisitFeaturesInSet (BioseqSetPtr bssp, Pointer userdata, VisitFeaturesFunc callback)
17013
17014 {
17015 Int4 index = 0;
17016 SeqEntryPtr tmp;
17017
17018 if (bssp == NULL) return index;
17019 index += VisitFeaturesProc (bssp->annot, userdata, callback);
17020 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17021 index += VisitFeaturesInSep (tmp, userdata, callback);
17022 }
17023 return index;
17024 }
17025
VisitFeaturesOnSep(SeqEntryPtr sep,Pointer userdata,VisitFeaturesFunc callback)17026 NLM_EXTERN Int4 VisitFeaturesOnSep (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback)
17027
17028 {
17029 BioseqPtr bsp;
17030 BioseqSetPtr bssp;
17031 Int4 index = 0;
17032
17033 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17034 if (IS_Bioseq (sep)) {
17035 bsp = (BioseqPtr) sep->data.ptrvalue;
17036 index += VisitFeaturesOnBsp (bsp, userdata, callback);
17037 } else if (IS_Bioseq_set (sep)) {
17038 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17039 index += VisitFeaturesOnSet (bssp, userdata, callback);
17040 }
17041 return index;
17042 }
17043
VisitFeaturesInSep(SeqEntryPtr sep,Pointer userdata,VisitFeaturesFunc callback)17044 NLM_EXTERN Int4 VisitFeaturesInSep (SeqEntryPtr sep, Pointer userdata, VisitFeaturesFunc callback)
17045
17046 {
17047 BioseqPtr bsp;
17048 BioseqSetPtr bssp;
17049 Int4 index = 0;
17050
17051 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17052 if (IS_Bioseq (sep)) {
17053 bsp = (BioseqPtr) sep->data.ptrvalue;
17054 index += VisitFeaturesOnBsp (bsp, userdata, callback);
17055 } else if (IS_Bioseq_set (sep)) {
17056 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17057 index += VisitFeaturesInSet (bssp, userdata, callback);
17058 }
17059 return index;
17060 }
17061
17062
VisitAlignmentsOnDisc(Pointer segs,Pointer userdata,VisitAlignmentsFunc callback)17063 static Int4 VisitAlignmentsOnDisc (Pointer segs, Pointer userdata, VisitAlignmentsFunc callback)
17064
17065 {
17066 Int4 index = 0;
17067 SeqAlignPtr salp;
17068
17069 for (salp = (SeqAlignPtr) segs; salp != NULL; salp = salp->next) {
17070 if (callback != NULL) {
17071 callback (salp, userdata);
17072 }
17073 index++;
17074 if (salp->segtype == SAS_DISC) {
17075 index += VisitAlignmentsOnDisc (salp->segs, userdata, callback);
17076 }
17077 }
17078 return index;
17079 }
17080
VisitAlignmentsProc(SeqAnnotPtr annot,Pointer userdata,VisitAlignmentsFunc callback)17081 static Int4 VisitAlignmentsProc (SeqAnnotPtr annot, Pointer userdata, VisitAlignmentsFunc callback)
17082
17083 {
17084 Int4 index = 0;
17085 SeqAlignPtr salp;
17086 SeqAnnotPtr sap;
17087
17088 for (sap = annot; sap != NULL; sap = sap->next) {
17089 if (sap->type != 2) continue;
17090 for (salp = (SeqAlignPtr) sap->data; salp != NULL; salp = salp->next) {
17091 if (callback != NULL) {
17092 callback (salp, userdata);
17093 }
17094 index++;
17095 if (salp->segtype == SAS_DISC) {
17096 index += VisitAlignmentsOnDisc (salp->segs, userdata, callback);
17097 }
17098 }
17099 }
17100 return index;
17101 }
17102
VisitAlignmentsOnSap(SeqAnnotPtr sap,Pointer userdata,VisitAlignmentsFunc callback)17103 NLM_EXTERN Int4 VisitAlignmentsOnSap (SeqAnnotPtr sap, Pointer userdata, VisitAlignmentsFunc callback)
17104
17105 {
17106 Int4 index = 0;
17107 SeqAlignPtr salp;
17108
17109 if (sap == NULL) return index;
17110 if (sap->type != 2) return index;
17111 for (salp = (SeqAlignPtr) sap->data; salp != NULL; salp = salp->next) {
17112 if (callback != NULL) {
17113 callback (salp, userdata);
17114 }
17115 index++;
17116 if (salp->segtype == SAS_DISC) {
17117 index += VisitAlignmentsOnDisc (salp->segs, userdata, callback);
17118 }
17119 }
17120 return index;
17121 }
17122
VisitAlignmentsOnBsp(BioseqPtr bsp,Pointer userdata,VisitAlignmentsFunc callback)17123 NLM_EXTERN Int4 VisitAlignmentsOnBsp (BioseqPtr bsp, Pointer userdata, VisitAlignmentsFunc callback)
17124
17125 {
17126 Int4 index = 0;
17127
17128 if (bsp == NULL) return index;
17129 index += VisitAlignmentsProc (bsp->annot, userdata, callback);
17130 return index;
17131 }
17132
VisitAlignmentsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitAlignmentsFunc callback)17133 NLM_EXTERN Int4 VisitAlignmentsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitAlignmentsFunc callback)
17134
17135 {
17136 Int4 index = 0;
17137
17138 if (bssp == NULL) return index;
17139 index += VisitAlignmentsProc (bssp->annot, userdata, callback);
17140 return index;
17141 }
17142
VisitAlignmentsInSet(BioseqSetPtr bssp,Pointer userdata,VisitAlignmentsFunc callback)17143 NLM_EXTERN Int4 VisitAlignmentsInSet (BioseqSetPtr bssp, Pointer userdata, VisitAlignmentsFunc callback)
17144
17145 {
17146 Int4 index = 0;
17147 SeqEntryPtr tmp;
17148
17149 if (bssp == NULL) return index;
17150 index += VisitAlignmentsProc (bssp->annot, userdata, callback);
17151 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17152 index += VisitAlignmentsInSep (tmp, userdata, callback);
17153 }
17154 return index;
17155 }
17156
VisitAlignmentsOnSep(SeqEntryPtr sep,Pointer userdata,VisitAlignmentsFunc callback)17157 NLM_EXTERN Int4 VisitAlignmentsOnSep (SeqEntryPtr sep, Pointer userdata, VisitAlignmentsFunc callback)
17158
17159 {
17160 BioseqPtr bsp;
17161 BioseqSetPtr bssp;
17162 Int4 index = 0;
17163
17164 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17165 if (IS_Bioseq (sep)) {
17166 bsp = (BioseqPtr) sep->data.ptrvalue;
17167 index += VisitAlignmentsOnBsp (bsp, userdata, callback);
17168 } else if (IS_Bioseq_set (sep)) {
17169 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17170 index += VisitAlignmentsOnSet (bssp, userdata, callback);
17171 }
17172 return index;
17173 }
17174
VisitAlignmentsInSep(SeqEntryPtr sep,Pointer userdata,VisitAlignmentsFunc callback)17175 NLM_EXTERN Int4 VisitAlignmentsInSep (SeqEntryPtr sep, Pointer userdata, VisitAlignmentsFunc callback)
17176
17177 {
17178 BioseqPtr bsp;
17179 BioseqSetPtr bssp;
17180 Int4 index = 0;
17181
17182 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17183 if (IS_Bioseq (sep)) {
17184 bsp = (BioseqPtr) sep->data.ptrvalue;
17185 index += VisitAlignmentsOnBsp (bsp, userdata, callback);
17186 } else if (IS_Bioseq_set (sep)) {
17187 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17188 index += VisitAlignmentsInSet (bssp, userdata, callback);
17189 }
17190 return index;
17191 }
17192
17193
VisitGraphsProc(SeqAnnotPtr annot,Pointer userdata,VisitGraphsFunc callback)17194 static Int4 VisitGraphsProc (SeqAnnotPtr annot, Pointer userdata, VisitGraphsFunc callback)
17195
17196 {
17197 Int4 index = 0;
17198 SeqAnnotPtr sap;
17199 SeqGraphPtr sgp;
17200
17201 for (sap = annot; sap != NULL; sap = sap->next) {
17202 if (sap->type != 3) continue;
17203 for (sgp = (SeqGraphPtr) sap->data; sgp != NULL; sgp = sgp->next) {
17204 if (callback != NULL) {
17205 callback (sgp, userdata);
17206 }
17207 index++;
17208 }
17209 }
17210 return index;
17211 }
17212
VisitGraphsOnSap(SeqAnnotPtr sap,Pointer userdata,VisitGraphsFunc callback)17213 NLM_EXTERN Int4 VisitGraphsOnSap (SeqAnnotPtr sap, Pointer userdata, VisitGraphsFunc callback)
17214
17215 {
17216 Int4 index = 0;
17217 SeqGraphPtr sgp;
17218
17219 if (sap == NULL) return index;
17220 if (sap->type != 3) return index;
17221 for (sgp = (SeqGraphPtr) sap->data; sgp != NULL; sgp = sgp->next) {
17222 if (callback != NULL) {
17223 callback (sgp, userdata);
17224 }
17225 index++;
17226 }
17227 return index;
17228 }
17229
VisitGraphsOnBsp(BioseqPtr bsp,Pointer userdata,VisitGraphsFunc callback)17230 NLM_EXTERN Int4 VisitGraphsOnBsp (BioseqPtr bsp, Pointer userdata, VisitGraphsFunc callback)
17231
17232 {
17233 Int4 index = 0;
17234
17235 if (bsp == NULL) return index;
17236 index += VisitGraphsProc (bsp->annot, userdata, callback);
17237 return index;
17238 }
17239
VisitGraphsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitGraphsFunc callback)17240 NLM_EXTERN Int4 VisitGraphsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitGraphsFunc callback)
17241
17242 {
17243 Int4 index = 0;
17244
17245 if (bssp == NULL) return index;
17246 index += VisitGraphsProc (bssp->annot, userdata, callback);
17247 return index;
17248 }
17249
VisitGraphsInSet(BioseqSetPtr bssp,Pointer userdata,VisitGraphsFunc callback)17250 NLM_EXTERN Int4 VisitGraphsInSet (BioseqSetPtr bssp, Pointer userdata, VisitGraphsFunc callback)
17251
17252 {
17253 Int4 index = 0;
17254 SeqEntryPtr tmp;
17255
17256 if (bssp == NULL) return index;
17257 index += VisitGraphsProc (bssp->annot, userdata, callback);
17258 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17259 index += VisitGraphsInSep (tmp, userdata, callback);
17260 }
17261 return index;
17262 }
17263
VisitGraphsOnSep(SeqEntryPtr sep,Pointer userdata,VisitGraphsFunc callback)17264 NLM_EXTERN Int4 VisitGraphsOnSep (SeqEntryPtr sep, Pointer userdata, VisitGraphsFunc callback)
17265
17266 {
17267 BioseqPtr bsp;
17268 BioseqSetPtr bssp;
17269 Int4 index = 0;
17270
17271 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17272 if (IS_Bioseq (sep)) {
17273 bsp = (BioseqPtr) sep->data.ptrvalue;
17274 index += VisitGraphsOnBsp (bsp, userdata, callback);
17275 } else if (IS_Bioseq_set (sep)) {
17276 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17277 index += VisitGraphsOnSet (bssp, userdata, callback);
17278 }
17279 return index;
17280 }
17281
VisitGraphsInSep(SeqEntryPtr sep,Pointer userdata,VisitGraphsFunc callback)17282 NLM_EXTERN Int4 VisitGraphsInSep (SeqEntryPtr sep, Pointer userdata, VisitGraphsFunc callback)
17283
17284 {
17285 BioseqPtr bsp;
17286 BioseqSetPtr bssp;
17287 Int4 index = 0;
17288
17289 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17290 if (IS_Bioseq (sep)) {
17291 bsp = (BioseqPtr) sep->data.ptrvalue;
17292 index += VisitGraphsOnBsp (bsp, userdata, callback);
17293 } else if (IS_Bioseq_set (sep)) {
17294 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17295 index += VisitGraphsInSet (bssp, userdata, callback);
17296 }
17297 return index;
17298 }
17299
17300
VisitAnnotsProc(SeqAnnotPtr annot,Pointer userdata,VisitAnnotsFunc callback)17301 static Int4 VisitAnnotsProc (SeqAnnotPtr annot, Pointer userdata, VisitAnnotsFunc callback)
17302
17303 {
17304 Int4 index = 0;
17305 SeqAnnotPtr sap;
17306
17307 for (sap = annot; sap != NULL; sap = sap->next) {
17308 if (callback != NULL) {
17309 callback (sap, userdata);
17310 }
17311 index++;
17312 }
17313 return index;
17314 }
17315
VisitAnnotsOnBsp(BioseqPtr bsp,Pointer userdata,VisitAnnotsFunc callback)17316 NLM_EXTERN Int4 VisitAnnotsOnBsp (BioseqPtr bsp, Pointer userdata, VisitAnnotsFunc callback)
17317
17318 {
17319 Int4 index = 0;
17320
17321 if (bsp == NULL) return index;
17322 index += VisitAnnotsProc (bsp->annot, userdata, callback);
17323 return index;
17324 }
17325
VisitAnnotsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitAnnotsFunc callback)17326 NLM_EXTERN Int4 VisitAnnotsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitAnnotsFunc callback)
17327
17328 {
17329 Int4 index = 0;
17330
17331 if (bssp == NULL) return index;
17332 index += VisitAnnotsProc (bssp->annot, userdata, callback);
17333 return index;
17334 }
17335
VisitAnnotsInSet(BioseqSetPtr bssp,Pointer userdata,VisitAnnotsFunc callback)17336 NLM_EXTERN Int4 VisitAnnotsInSet (BioseqSetPtr bssp, Pointer userdata, VisitAnnotsFunc callback)
17337
17338 {
17339 Int4 index = 0;
17340 SeqEntryPtr tmp;
17341
17342 if (bssp == NULL) return index;
17343 index += VisitAnnotsProc (bssp->annot, userdata, callback);
17344 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17345 index += VisitAnnotsInSep (tmp, userdata, callback);
17346 }
17347 return index;
17348 }
17349
VisitAnnotsOnSep(SeqEntryPtr sep,Pointer userdata,VisitAnnotsFunc callback)17350 NLM_EXTERN Int4 VisitAnnotsOnSep (SeqEntryPtr sep, Pointer userdata, VisitAnnotsFunc callback)
17351
17352 {
17353 BioseqPtr bsp;
17354 BioseqSetPtr bssp;
17355 Int4 index = 0;
17356
17357 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17358 if (IS_Bioseq (sep)) {
17359 bsp = (BioseqPtr) sep->data.ptrvalue;
17360 index += VisitAnnotsOnBsp (bsp, userdata, callback);
17361 } else if (IS_Bioseq_set (sep)) {
17362 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17363 index += VisitAnnotsOnSet (bssp, userdata, callback);
17364 }
17365 return index;
17366 }
17367
VisitAnnotsInSep(SeqEntryPtr sep,Pointer userdata,VisitAnnotsFunc callback)17368 NLM_EXTERN Int4 VisitAnnotsInSep (SeqEntryPtr sep, Pointer userdata, VisitAnnotsFunc callback)
17369
17370 {
17371 BioseqPtr bsp;
17372 BioseqSetPtr bssp;
17373 Int4 index = 0;
17374
17375 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17376 if (IS_Bioseq (sep)) {
17377 bsp = (BioseqPtr) sep->data.ptrvalue;
17378 index += VisitAnnotsOnBsp (bsp, userdata, callback);
17379 } else if (IS_Bioseq_set (sep)) {
17380 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17381 index += VisitAnnotsInSet (bssp, userdata, callback);
17382 }
17383 return index;
17384 }
17385
17386
VisitAuthorsProc(AuthListPtr alp,Pointer userdata,VisitAuthorFunc callback)17387 static Int4 VisitAuthorsProc (AuthListPtr alp, Pointer userdata, VisitAuthorFunc callback)
17388
17389 {
17390 AuthorPtr ap;
17391 Int4 index = 0;
17392 ValNodePtr names;
17393 NameStdPtr nsp;
17394 PersonIdPtr pid;
17395
17396 if (alp == NULL || alp->choice != 1) return index;
17397
17398 for (names = alp->names; names != NULL; names = names->next) {
17399 ap = names->data.ptrvalue;
17400 if (ap == NULL) continue;
17401 pid = ap->name;
17402 if (pid == NULL || pid->choice != 2) continue;
17403 nsp = pid->data;
17404 if (nsp == NULL) continue;
17405 if (callback != NULL) {
17406 callback (nsp, userdata);
17407 }
17408 index++;
17409 }
17410
17411 return index;
17412 }
17413
VisitAuthorsInPub(PubdescPtr pdp,Pointer userdata,VisitAuthorFunc callback)17414 NLM_EXTERN Int4 VisitAuthorsInPub (PubdescPtr pdp, Pointer userdata, VisitAuthorFunc callback)
17415
17416 {
17417 CitArtPtr cap;
17418 CitBookPtr cbp;
17419 CitGenPtr cgp;
17420 CitPatPtr cpp;
17421 CitSubPtr csp;
17422 Int4 index = 0;
17423 ValNodePtr vnp;
17424
17425 if (pdp == NULL) return index;
17426
17427 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
17428 if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) continue;
17429 if (vnp->data.ptrvalue == NULL) continue;
17430 switch (vnp->choice) {
17431 case PUB_Gen :
17432 cgp = (CitGenPtr) vnp->data.ptrvalue;
17433 index += VisitAuthorsProc (cgp->authors, userdata, callback);
17434 break;
17435 case PUB_Sub :
17436 csp = (CitSubPtr) vnp->data.ptrvalue;
17437 index += VisitAuthorsProc (csp->authors, userdata, callback);
17438 break;
17439 case PUB_Article :
17440 cap = (CitArtPtr) vnp->data.ptrvalue;
17441 index += VisitAuthorsProc (cap->authors, userdata, callback);
17442 if (cap->from == 2 || cap->from == 3) {
17443 cbp = (CitBookPtr) cap->fromptr;
17444 if (cbp != NULL) {
17445 index += VisitAuthorsProc (cbp->authors, userdata, callback);
17446 }
17447 }
17448 break;
17449 case PUB_Book :
17450 cbp = (CitBookPtr) vnp->data.ptrvalue;
17451 index += VisitAuthorsProc (cbp->authors, userdata, callback);
17452 break;
17453 case PUB_Man :
17454 cbp = (CitBookPtr) vnp->data.ptrvalue;
17455 if (cbp->othertype == 2 && cbp->let_type == 3) {
17456 index += VisitAuthorsProc (cbp->authors, userdata, callback);
17457 }
17458 break;
17459 case PUB_Patent :
17460 cpp = (CitPatPtr) vnp->data.ptrvalue;
17461 index += VisitAuthorsProc (cpp->authors, userdata, callback);
17462 index += VisitAuthorsProc (cpp->applicants, userdata, callback);
17463 index += VisitAuthorsProc (cpp->assignees, userdata, callback);
17464 break;
17465 default :
17466 break;
17467 }
17468 }
17469
17470 return index;
17471 }
17472
17473
VisitPubdescsProc(SeqDescrPtr descr,SeqAnnotPtr annot,Pointer userdata,VisitPubdescsFunc callback)17474 static Int4 VisitPubdescsProc (SeqDescrPtr descr, SeqAnnotPtr annot, Pointer userdata, VisitPubdescsFunc callback)
17475
17476 {
17477 Int4 index = 0;
17478 PubdescPtr pdp;
17479 SeqAnnotPtr sap;
17480 SeqDescrPtr sdp;
17481 SeqFeatPtr sfp;
17482
17483 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
17484 if (sdp->choice == Seq_descr_pub) {
17485 pdp = (PubdescPtr) sdp->data.ptrvalue;
17486 if (pdp != NULL) {
17487 if (callback != NULL) {
17488 callback (pdp, userdata);
17489 }
17490 index++;
17491 }
17492 }
17493 }
17494 for (sap = annot; sap != NULL; sap = sap->next) {
17495 if (sap->type != 1) continue;
17496 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
17497 if (sfp->data.choice == SEQFEAT_PUB) {
17498 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
17499 if (pdp != NULL) {
17500 if (callback != NULL) {
17501 callback (pdp, userdata);
17502 }
17503 index++;
17504 }
17505 }
17506 }
17507 }
17508 return index;
17509 }
17510
VisitPubdescsOnBsp(BioseqPtr bsp,Pointer userdata,VisitPubdescsFunc callback)17511 NLM_EXTERN Int4 VisitPubdescsOnBsp (BioseqPtr bsp, Pointer userdata, VisitPubdescsFunc callback)
17512
17513 {
17514 Int4 index = 0;
17515
17516 if (bsp == NULL) return index;
17517 index += VisitPubdescsProc (bsp->descr, bsp->annot, userdata, callback);
17518 return index;
17519 }
17520
VisitPubdescsOnSet(BioseqSetPtr bssp,Pointer userdata,VisitPubdescsFunc callback)17521 NLM_EXTERN Int4 VisitPubdescsOnSet (BioseqSetPtr bssp, Pointer userdata, VisitPubdescsFunc callback)
17522
17523 {
17524 Int4 index = 0;
17525
17526 if (bssp == NULL) return index;
17527 index += VisitPubdescsProc (bssp->descr, bssp->annot, userdata, callback);
17528 return index;
17529 }
17530
VisitPubdescsInSet(BioseqSetPtr bssp,Pointer userdata,VisitPubdescsFunc callback)17531 NLM_EXTERN Int4 VisitPubdescsInSet (BioseqSetPtr bssp, Pointer userdata, VisitPubdescsFunc callback)
17532
17533 {
17534 Int4 index = 0;
17535 SeqEntryPtr tmp;
17536
17537 if (bssp == NULL) return index;
17538 index += VisitPubdescsProc (bssp->descr, bssp->annot, userdata, callback);
17539 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17540 index += VisitPubdescsInSep (tmp, userdata, callback);
17541 }
17542 return index;
17543 }
17544
VisitPubdescsOnSep(SeqEntryPtr sep,Pointer userdata,VisitPubdescsFunc callback)17545 NLM_EXTERN Int4 VisitPubdescsOnSep (SeqEntryPtr sep, Pointer userdata, VisitPubdescsFunc callback)
17546
17547 {
17548 BioseqPtr bsp;
17549 BioseqSetPtr bssp;
17550 Int4 index = 0;
17551
17552 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17553 if (IS_Bioseq (sep)) {
17554 bsp = (BioseqPtr) sep->data.ptrvalue;
17555 index += VisitPubdescsOnBsp (bsp, userdata, callback);
17556 } else if (IS_Bioseq_set (sep)) {
17557 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17558 index += VisitPubdescsOnSet (bssp, userdata, callback);
17559 }
17560 return index;
17561 }
17562
VisitPubdescsInSep(SeqEntryPtr sep,Pointer userdata,VisitPubdescsFunc callback)17563 NLM_EXTERN Int4 VisitPubdescsInSep (SeqEntryPtr sep, Pointer userdata, VisitPubdescsFunc callback)
17564
17565 {
17566 BioseqPtr bsp;
17567 BioseqSetPtr bssp;
17568 Int4 index = 0;
17569
17570 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17571 if (IS_Bioseq (sep)) {
17572 bsp = (BioseqPtr) sep->data.ptrvalue;
17573 index += VisitPubdescsOnBsp (bsp, userdata, callback);
17574 } else if (IS_Bioseq_set (sep)) {
17575 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17576 index += VisitPubdescsInSet (bssp, userdata, callback);
17577 }
17578 return index;
17579 }
17580
17581
VisitBioSourcesProc(SeqDescrPtr descr,SeqAnnotPtr annot,Pointer userdata,VisitBioSourcesFunc callback)17582 static Int4 VisitBioSourcesProc (SeqDescrPtr descr, SeqAnnotPtr annot, Pointer userdata, VisitBioSourcesFunc callback)
17583
17584 {
17585 BioSourcePtr biop;
17586 Int4 index = 0;
17587 SeqAnnotPtr sap;
17588 SeqDescrPtr sdp;
17589 SeqFeatPtr sfp;
17590
17591 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
17592 if (sdp->choice == Seq_descr_source) {
17593 biop = (BioSourcePtr) sdp->data.ptrvalue;
17594 if (biop != NULL) {
17595 if (callback != NULL) {
17596 callback (biop, userdata);
17597 }
17598 index++;
17599 }
17600 }
17601 }
17602 for (sap = annot; sap != NULL; sap = sap->next) {
17603 if (sap->type != 1) continue;
17604 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
17605 if (sfp->data.choice == SEQFEAT_BIOSRC) {
17606 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
17607 if (biop != NULL) {
17608 if (callback != NULL) {
17609 callback (biop, userdata);
17610 }
17611 index++;
17612 }
17613 }
17614 }
17615 }
17616 return index;
17617 }
17618
VisitBioSourcesOnBsp(BioseqPtr bsp,Pointer userdata,VisitBioSourcesFunc callback)17619 NLM_EXTERN Int4 VisitBioSourcesOnBsp (BioseqPtr bsp, Pointer userdata, VisitBioSourcesFunc callback)
17620
17621 {
17622 Int4 index = 0;
17623
17624 if (bsp == NULL) return index;
17625 index += VisitBioSourcesProc (bsp->descr, bsp->annot, userdata, callback);
17626 return index;
17627 }
17628
VisitBioSourcesOnSet(BioseqSetPtr bssp,Pointer userdata,VisitBioSourcesFunc callback)17629 NLM_EXTERN Int4 VisitBioSourcesOnSet (BioseqSetPtr bssp, Pointer userdata, VisitBioSourcesFunc callback)
17630
17631 {
17632 Int4 index = 0;
17633
17634 if (bssp == NULL) return index;
17635 index += VisitBioSourcesProc (bssp->descr, bssp->annot, userdata, callback);
17636 return index;
17637 }
17638
VisitBioSourcesInSet(BioseqSetPtr bssp,Pointer userdata,VisitBioSourcesFunc callback)17639 NLM_EXTERN Int4 VisitBioSourcesInSet (BioseqSetPtr bssp, Pointer userdata, VisitBioSourcesFunc callback)
17640
17641 {
17642 Int4 index = 0;
17643 SeqEntryPtr tmp;
17644
17645 if (bssp == NULL) return index;
17646 index += VisitBioSourcesProc (bssp->descr, bssp->annot, userdata, callback);
17647 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17648 index += VisitBioSourcesInSep (tmp, userdata, callback);
17649 }
17650 return index;
17651 }
17652
VisitBioSourcesOnSep(SeqEntryPtr sep,Pointer userdata,VisitBioSourcesFunc callback)17653 NLM_EXTERN Int4 VisitBioSourcesOnSep (SeqEntryPtr sep, Pointer userdata, VisitBioSourcesFunc callback)
17654
17655 {
17656 BioseqPtr bsp;
17657 BioseqSetPtr bssp;
17658 Int4 index = 0;
17659
17660 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17661 if (IS_Bioseq (sep)) {
17662 bsp = (BioseqPtr) sep->data.ptrvalue;
17663 index += VisitBioSourcesOnBsp (bsp, userdata, callback);
17664 } else if (IS_Bioseq_set (sep)) {
17665 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17666 index += VisitBioSourcesOnSet (bssp, userdata, callback);
17667 }
17668 return index;
17669 }
17670
VisitBioSourcesInSep(SeqEntryPtr sep,Pointer userdata,VisitBioSourcesFunc callback)17671 NLM_EXTERN Int4 VisitBioSourcesInSep (SeqEntryPtr sep, Pointer userdata, VisitBioSourcesFunc callback)
17672
17673 {
17674 BioseqPtr bsp;
17675 BioseqSetPtr bssp;
17676 Int4 index = 0;
17677
17678 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17679 if (IS_Bioseq (sep)) {
17680 bsp = (BioseqPtr) sep->data.ptrvalue;
17681 index += VisitBioSourcesOnBsp (bsp, userdata, callback);
17682 } else if (IS_Bioseq_set (sep)) {
17683 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17684 index += VisitBioSourcesInSet (bssp, userdata, callback);
17685 }
17686 return index;
17687 }
17688
17689
VisitBioseqsInSet(BioseqSetPtr bssp,Pointer userdata,VisitBioseqsFunc callback)17690 NLM_EXTERN Int4 VisitBioseqsInSet (BioseqSetPtr bssp, Pointer userdata, VisitBioseqsFunc callback)
17691
17692 {
17693 Int4 index = 0;
17694 SeqEntryPtr tmp;
17695
17696 if (bssp == NULL) return index;
17697 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17698 index += VisitBioseqsInSep (tmp, userdata, callback);
17699 }
17700 return index;
17701 }
17702
VisitBioseqsInSep(SeqEntryPtr sep,Pointer userdata,VisitBioseqsFunc callback)17703 NLM_EXTERN Int4 VisitBioseqsInSep (SeqEntryPtr sep, Pointer userdata, VisitBioseqsFunc callback)
17704
17705 {
17706 BioseqPtr bsp;
17707 BioseqSetPtr bssp;
17708 Int4 index = 0;
17709
17710 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17711 if (IS_Bioseq (sep)) {
17712 bsp = (BioseqPtr) sep->data.ptrvalue;
17713 if (callback != NULL) {
17714 callback (bsp, userdata);
17715 }
17716 index++;
17717 } else if (IS_Bioseq_set (sep)) {
17718 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17719 index += VisitBioseqsInSet (bssp, userdata, callback);
17720 }
17721 return index;
17722 }
17723
VisitSequencesInSet(BioseqSetPtr bssp,Pointer userdata,Int2 filter,VisitSequencesFunc callback)17724 NLM_EXTERN Int4 VisitSequencesInSet (BioseqSetPtr bssp, Pointer userdata, Int2 filter, VisitSequencesFunc callback)
17725
17726 {
17727 Int4 index = 0;
17728 SeqEntryPtr tmp;
17729
17730 if (bssp == NULL) return index;
17731 if (bssp->_class == BioseqseqSet_class_parts) {
17732 if (filter != VISIT_PARTS) return index;
17733 filter = VISIT_MAINS;
17734 }
17735 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17736 index += VisitSequencesInSep (tmp, userdata, filter, callback);
17737 }
17738 return index;
17739 }
17740
VisitSequencesInSep(SeqEntryPtr sep,Pointer userdata,Int2 filter,VisitSequencesFunc callback)17741 NLM_EXTERN Int4 VisitSequencesInSep (SeqEntryPtr sep, Pointer userdata, Int2 filter, VisitSequencesFunc callback)
17742
17743 {
17744 BioseqPtr bsp;
17745 BioseqSetPtr bssp;
17746 Int4 index = 0;
17747
17748 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17749 if (IS_Bioseq (sep)) {
17750 bsp = (BioseqPtr) sep->data.ptrvalue;
17751 if (filter == VISIT_MAINS ||
17752 (filter == VISIT_NUCS && ISA_na (bsp->mol)) ||
17753 (filter == VISIT_PROTS && ISA_aa (bsp->mol))) {
17754 if (callback != NULL) {
17755 callback (bsp, userdata);
17756 }
17757 index++;
17758 }
17759 } else if (IS_Bioseq_set (sep)) {
17760 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17761 index += VisitSequencesInSet (bssp, userdata, filter, callback);
17762 }
17763 return index;
17764 }
17765
VisitSetsInSet(BioseqSetPtr bssp,Pointer userdata,VisitSetsFunc callback)17766 NLM_EXTERN Int4 VisitSetsInSet (BioseqSetPtr bssp, Pointer userdata, VisitSetsFunc callback)
17767
17768 {
17769 Int4 index = 0;
17770 SeqEntryPtr tmp;
17771
17772 if (bssp == NULL) return index;
17773 if (callback != NULL) {
17774 callback (bssp, userdata);
17775 }
17776 index++;
17777 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17778 index += VisitSetsInSep (tmp, userdata, callback);
17779 }
17780 return index;
17781 }
17782
VisitSetsInSep(SeqEntryPtr sep,Pointer userdata,VisitSetsFunc callback)17783 NLM_EXTERN Int4 VisitSetsInSep (SeqEntryPtr sep, Pointer userdata, VisitSetsFunc callback)
17784
17785 {
17786 BioseqSetPtr bssp;
17787 Int4 index = 0;
17788
17789 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17790 if (IS_Bioseq_set (sep)) {
17791 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17792 index += VisitSetsInSet (bssp, userdata, callback);
17793 }
17794 return index;
17795 }
17796
VisitElementsInSep(SeqEntryPtr sep,Pointer userdata,VisitElementsFunc callback)17797 NLM_EXTERN Int4 VisitElementsInSep (SeqEntryPtr sep, Pointer userdata, VisitElementsFunc callback)
17798
17799 {
17800 BioseqSetPtr bssp;
17801 Int4 index = 0;
17802 SeqEntryPtr tmp;
17803
17804 if (sep == NULL || sep->data.ptrvalue == NULL) return index;
17805 if (IS_Bioseq_set (sep)) {
17806 bssp = (BioseqSetPtr) sep->data.ptrvalue;
17807 if (bssp == NULL) return index;
17808 if (bssp->_class == 7 ||
17809 (bssp->_class >= 13 && bssp->_class <= 16) ||
17810 bssp->_class == BioseqseqSet_class_wgs_set ||
17811 bssp->_class == BioseqseqSet_class_gen_prod_set ||
17812 bssp->_class == BioseqseqSet_class_small_genome_set) {
17813 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
17814 index += VisitElementsInSep (tmp, userdata, callback);
17815 }
17816 return index;
17817 }
17818 }
17819 if (callback != NULL) {
17820 callback (sep, userdata);
17821 }
17822 index++;
17823 return index;
17824 }
17825
IsPopPhyEtcSet(Uint1 _class)17826 NLM_EXTERN Boolean IsPopPhyEtcSet (Uint1 _class)
17827
17828 {
17829 if (_class == BioseqseqSet_class_mut_set ||
17830 _class == BioseqseqSet_class_pop_set ||
17831 _class == BioseqseqSet_class_phy_set ||
17832 _class == BioseqseqSet_class_eco_set ||
17833 _class == BioseqseqSet_class_wgs_set ||
17834 _class == BioseqseqSet_class_small_genome_set) return TRUE;
17835 return FALSE;
17836 }
17837
17838
CleanupStringsForOneDescriptor(SeqDescPtr sdp,SeqEntryPtr sep)17839 NLM_EXTERN void CleanupStringsForOneDescriptor (SeqDescPtr sdp, SeqEntryPtr sep)
17840 {
17841 Boolean stripSerial = FALSE;
17842 Boolean isEmblOrDdbj = FALSE;
17843
17844 if (sdp == NULL) {
17845 return;
17846 }
17847 SeqEntryExplore (sep, (Pointer) &stripSerial, CheckForSwissProtID);
17848 SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
17849
17850 if (sdp->choice == Seq_descr_pub) {
17851 FlattenPubdesc (sdp->data.ptrvalue, NULL);
17852 }
17853
17854 CleanupDescriptorStrings (sdp, stripSerial, TRUE, NULL, isEmblOrDdbj);
17855 }
17856
17857
CleanupOneSeqFeat(SeqFeatPtr sfp)17858 NLM_EXTERN void CleanupOneSeqFeat (SeqFeatPtr sfp)
17859 {
17860 Boolean isEmblOrDdbj = FALSE;
17861 Boolean isJscan = FALSE;
17862 Boolean stripSerial = TRUE;
17863 ValNodePtr publist = NULL;
17864 SeqEntryPtr sep;
17865
17866 if (sfp->idx.entityID == 0) {
17867 return;
17868 }
17869 sep = GetTopSeqEntryForEntityID (sfp->idx.entityID);
17870
17871 SeqEntryExplore (sep, (Pointer) &stripSerial, CheckForSwissProtID);
17872 SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
17873 SeqEntryExplore (sep, (Pointer) &isJscan, CheckForJournalScanID);
17874 FlattenSfpCit (sfp, NULL);
17875 CleanUpSeqFeat (sfp, isEmblOrDdbj, isJscan, stripSerial, TRUE, &publist);
17876
17877 if (publist != NULL) {
17878 ChangeCitsOnFeats (sfp, publist);
17879 }
17880 ValNodeFreeData (publist);
17881 }
17882 //LCOV_EXCL_STOP
17883
RemoveFeatureLink(SeqFeatPtr sfp1,SeqFeatPtr sfp2)17884 NLM_EXTERN void RemoveFeatureLink (SeqFeatPtr sfp1, SeqFeatPtr sfp2)
17885 {
17886 SeqFeatXrefPtr xref, next, PNTR prevlink;
17887 ObjectIdPtr oip;
17888 SeqFeatPtr link_sfp;
17889 Char buf [32];
17890 CharPtr str = NULL;
17891
17892 if (sfp1 == NULL) return;
17893
17894 prevlink = (SeqFeatXrefPtr PNTR) &(sfp1->xref);
17895 xref = sfp1->xref;
17896 while (xref != NULL) {
17897 next = xref->next;
17898 link_sfp = NULL;
17899
17900 if (xref->id.choice == 3) {
17901 oip = (ObjectIdPtr) xref->id.value.ptrvalue;
17902 if (oip != NULL) {
17903 if (StringDoesHaveText (oip->str)) {
17904 str = oip->str;
17905 } else {
17906 sprintf (buf, "%ld", (long) oip->id);
17907 str = buf;
17908 }
17909 link_sfp = SeqMgrGetFeatureByFeatID (sfp1->idx.entityID, NULL, str, NULL, NULL);
17910 }
17911 }
17912 if (link_sfp == sfp2) {
17913 *prevlink = xref->next;
17914 xref->next = NULL;
17915 MemFree (xref);
17916 } else {
17917 prevlink = (SeqFeatXrefPtr PNTR) &(xref->next);
17918 }
17919
17920 xref = next;
17921 }
17922 }
17923
17924
LinkTwoFeatures(SeqFeatPtr dst,SeqFeatPtr sfp)17925 NLM_EXTERN void LinkTwoFeatures (SeqFeatPtr dst, SeqFeatPtr sfp)
17926
17927 {
17928 ChoicePtr cp;
17929 ObjectIdPtr oip;
17930 SeqFeatXrefPtr xref, prev_xref, next_xref;
17931 SeqFeatPtr old_match;
17932
17933 if (dst == NULL || sfp == NULL) return;
17934
17935 cp = &(dst->id);
17936 if (cp == NULL) return;
17937 if (cp->choice == 3) {
17938 /* don't create a duplicate xref, remove links to other features */
17939 xref = sfp->xref;
17940 prev_xref = NULL;
17941 while (xref != NULL) {
17942 next_xref = xref->next;
17943 if (xref->id.choice == 3 && xref->id.value.ptrvalue != NULL) {
17944 if (ObjectIdMatch (cp->value.ptrvalue, xref->id.value.ptrvalue)) {
17945 /* already have this xref */
17946 return;
17947 } else {
17948 old_match = SeqMgrGetFeatureByFeatID (sfp->idx.entityID, NULL, NULL, xref, NULL);
17949 RemoveFeatureLink (sfp, old_match);
17950 RemoveFeatureLink (old_match, sfp);
17951 }
17952 } else {
17953 prev_xref = xref;
17954 }
17955 xref = next_xref;
17956 }
17957
17958 oip = (ObjectIdPtr) cp->value.ptrvalue;
17959 if (oip != NULL) {
17960 oip = AsnIoMemCopy (oip, (AsnReadFunc) ObjectIdAsnRead,
17961 (AsnWriteFunc) ObjectIdAsnWrite);
17962 if (oip != NULL) {
17963 xref = SeqFeatXrefNew ();
17964 if (xref != NULL) {
17965 xref->id.choice = 3;
17966 xref->id.value.ptrvalue = (Pointer) oip;
17967 xref->next = sfp->xref;
17968 sfp->xref = xref;
17969 }
17970 }
17971 }
17972 }
17973 }
17974
17975 /* basic cleanup code from sqnutil3.c */
17976
ConvertSourceFeatDescProc(SeqFeatPtr sfp,Pointer userdata)17977 extern void ConvertSourceFeatDescProc (SeqFeatPtr sfp, Pointer userdata)
17978
17979 {
17980 BioSourcePtr biop;
17981 BioseqPtr bsp;
17982 SubSourcePtr lastssp;
17983 ObjValNodePtr ovp;
17984 SeqDescPtr sdp;
17985 SeqEntryPtr sep;
17986 SeqIdPtr sip;
17987 SubSourcePtr ssp;
17988 ValNode vn;
17989 ValNodePtr last_dbxref;
17990
17991 /* look for biosource features */
17992 if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC) return;
17993 /* get bioseq by feature location */
17994 sip = SeqLocId (sfp->location);
17995 bsp = BioseqFind (sip);
17996 if (bsp == NULL) return;
17997 sip = SeqIdFindBest(bsp->id, 0);
17998 if (sip == NULL) return;
17999 vn.choice = SEQLOC_WHOLE;
18000 vn.extended = 0;
18001 vn.data.ptrvalue = (Pointer) sip;
18002 vn.next = NULL;
18003 /* is feature full length? */
18004 if (SeqLocCompare (sfp->location, &vn) != SLC_A_EQ_B) return;
18005 sep = SeqMgrGetSeqEntryForData (bsp);
18006 if (sep == NULL) return;
18007 sdp = CreateNewDescriptor (sep, Seq_descr_source);
18008 if (sdp == NULL) return;
18009 /* move biosource from feature to descriptor */
18010 sdp->data.ptrvalue = sfp->data.value.ptrvalue;
18011 if (sdp->extended != 0) {
18012 ovp = (ObjValNodePtr) sdp;
18013 ovp->idx.subtype = Seq_descr_source;
18014 }
18015 sfp->data.value.ptrvalue = NULL;
18016 /* flag old feature for removal */
18017 sfp->idx.deleteme = TRUE;
18018 /* move comment to subsource note */
18019 if (sfp->comment == NULL) return;
18020 biop = (BioSourcePtr) sdp->data.ptrvalue;
18021 if (biop == NULL) return;
18022 ssp = SubSourceNew ();
18023 if (ssp == NULL) return;
18024 ssp->subtype = SUBSRC_other;
18025 ssp->name = sfp->comment;
18026 sfp->comment = NULL;
18027 /* link in at end, since BasicSeqEntry will have sorted this list */
18028 if (biop->subtype == NULL) {
18029 biop->subtype = ssp;
18030 } else {
18031 lastssp = biop->subtype;
18032 while (lastssp->next != NULL) {
18033 lastssp = lastssp->next;
18034 }
18035 lastssp->next = ssp;
18036 }
18037
18038 /* move dbxrefs on feature to source */
18039 if (sfp->dbxref != NULL) {
18040 if (biop->org == NULL) {
18041 biop->org = OrgRefNew();
18042 }
18043 last_dbxref = biop->org->db;
18044 while (last_dbxref != NULL && last_dbxref->next != NULL) {
18045 last_dbxref = last_dbxref->next;
18046 }
18047 if (last_dbxref == NULL) {
18048 biop->org->db = sfp->dbxref;
18049 } else {
18050 last_dbxref->next = sfp->dbxref;
18051 }
18052 sfp->dbxref = NULL;
18053 }
18054 }
18055
ExtendSingleGeneOnMRNA(BioseqPtr bsp,Pointer userdata)18056 extern void ExtendSingleGeneOnMRNA (BioseqPtr bsp, Pointer userdata)
18057
18058 {
18059 MolInfoPtr mip;
18060 SeqDescrPtr sdp;
18061 Boolean is_mrna = FALSE, is_master_seq = FALSE, has_nulls = FALSE;
18062 SeqFeatPtr gene = NULL;
18063 SeqFeatPtr sfp;
18064 SeqMgrFeatContext context;
18065 Int4 num_cds = 0;
18066 Int4 num_mrna = 0;
18067 SeqIdPtr sip;
18068 SeqLocPtr slp;
18069 Boolean partial5, partial3;
18070 BioSourcePtr biop;
18071 OrgRefPtr orp;
18072 BioseqSetPtr bssp;
18073
18074 if (bsp == NULL || bsp->length == 0
18075 || !ISA_na (bsp->mol)) {
18076 return;
18077 }
18078
18079 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
18080 if (sdp != NULL) {
18081 mip = (MolInfoPtr) sdp->data.ptrvalue;
18082 if (mip != NULL && mip->biomol == MOLECULE_TYPE_MRNA) {
18083 is_mrna = TRUE;
18084 }
18085 }
18086 if (!is_mrna) {
18087 return;
18088 }
18089
18090 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
18091 if (sdp != NULL) {
18092 biop = (BioSourcePtr) sdp->data.ptrvalue;
18093 if (biop != NULL) {
18094 if (biop->origin == ORG_ARTIFICIAL) {
18095 orp = biop->org;
18096 if (orp != NULL) {
18097 if (StringICmp (orp->taxname, "synthetic construct") == 0) return;
18098 }
18099 }
18100 }
18101 }
18102
18103 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
18104 bssp = (BioseqSetPtr) bsp->idx.parentptr;
18105 if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset) {
18106 is_master_seq = TRUE;
18107 }
18108 }
18109
18110 for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
18111 sfp != NULL;
18112 sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context)) {
18113 if (sfp->data.choice == SEQFEAT_GENE) {
18114 /* skip this sequence if it has more than one gene */
18115 if (gene == NULL) {
18116 gene = sfp;
18117 } else {
18118 return;
18119 }
18120 } else if (sfp->data.choice == SEQFEAT_CDREGION) {
18121 num_cds++;
18122 /* skip this sequence if it has more than one coding region */
18123 if (num_cds > 1 && !is_master_seq) {
18124 return;
18125 }
18126 } else if (sfp->idx.subtype == FEATDEF_mRNA) {
18127 num_mrna++;
18128 /* skip this sequence if it has more than one mRNA */
18129 if (num_mrna > 1) return;
18130 }
18131 }
18132
18133 if (gene != NULL && gene->location != NULL) {
18134 slp = gene->location;
18135 if (slp->choice != SEQLOC_INT) {
18136 for (sip = bsp->id; sip != NULL; sip = sip->next) {
18137 /* skip this sequence if it is multi-interval and EMBL or DDBJ */
18138 if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) return;
18139 }
18140 }
18141 }
18142
18143 if (gene != NULL && BioseqFindFromSeqLoc (gene->location) == bsp) {
18144 CheckSeqLocForPartial (gene->location, &partial5, &partial3);
18145 has_nulls = LocationHasNullsBetween (gene->location);
18146 /* gene should cover entire length of sequence */
18147 slp = SeqLocIntNew (0, bsp->length - 1, SeqLocStrand (gene->location), SeqIdFindBest (bsp->id, 0));
18148 SetSeqLocPartial (slp, partial5, partial3);
18149 gene->location = SeqLocFree (gene->location);
18150 gene->location = slp;
18151 if (is_master_seq) {
18152 MergeFeatureIntervalsToParts (gene, has_nulls);
18153 }
18154 }
18155 }
18156
18157 //LCOV_EXCL_START
DbtagParse(CharPtr str)18158 static DbtagPtr DbtagParse (
18159 CharPtr str
18160 )
18161
18162 {
18163 Boolean all_digits = TRUE;
18164 Char ch;
18165 DbtagPtr dbt;
18166 long num;
18167 Int2 num_digits = 0;
18168 ObjectIdPtr oip;
18169 CharPtr ptr;
18170 CharPtr tmp;
18171
18172 if (StringHasNoText (str)) return NULL;
18173 ptr = StringChr (str, ':');
18174 if (ptr == NULL) return NULL;
18175
18176 dbt = DbtagNew ();
18177 oip = ObjectIdNew ();
18178 if (dbt == NULL || oip == NULL) return NULL;
18179
18180 if (ptr != NULL) {
18181 *ptr = '\0';
18182 ptr++;
18183 }
18184
18185 dbt->db = StringSave (str);
18186 dbt->tag = oip;
18187
18188 tmp = ptr;
18189 ch = *tmp;
18190 while (ch != '\0') {
18191 if (IS_DIGIT (ch)) {
18192 num_digits++;
18193 } else {
18194 all_digits = FALSE;
18195 }
18196 tmp++;
18197 ch = *tmp;
18198 }
18199
18200 if (all_digits && *ptr != '0') {
18201 if (num_digits < 10 || (num_digits == 10 && StringCmp (ptr, "2147483647") <= 0)) {
18202 sscanf (ptr, "%ld", &num);
18203 oip->id = (Int4) num;
18204 return dbt;
18205 }
18206 }
18207
18208 oip->str = StringSave (ptr);
18209
18210 return dbt;
18211 }
18212 //LCOV_EXCL_STOP
18213
GetNomenclatureUOP(UserObjectPtr uop,Pointer userdata)18214 static void GetNomenclatureUOP (
18215 UserObjectPtr uop,
18216 Pointer userdata
18217 )
18218
18219 {
18220 ObjectIdPtr oip;
18221 UserObjectPtr PNTR uopp;
18222
18223 if (uop == NULL || userdata == NULL) return;
18224 oip = uop->type;
18225 if (oip == NULL) return;
18226 if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
18227 uopp = (UserObjectPtr PNTR) userdata;
18228 *uopp = uop;
18229 }
18230
18231
18232 //LCOV_EXCL_START
ModernizeGeneFields(SeqFeatPtr sfp)18233 NLM_EXTERN void ModernizeGeneFields (
18234 SeqFeatPtr sfp
18235 )
18236
18237 {
18238 GeneNomenclaturePtr gnp;
18239 GeneRefPtr grp;
18240 ObjectIdPtr oip;
18241 CharPtr str;
18242 CharPtr symbol = NULL, name = NULL, source = NULL;
18243 Uint2 status = 0;
18244 UserFieldPtr ufp;
18245 UserObjectPtr uop = NULL;
18246 UserObjectPtr curr, next;
18247 UserObjectPtr PNTR prev;
18248
18249 if (sfp == NULL) return;
18250 if (sfp->data.choice != SEQFEAT_GENE) return;
18251
18252 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
18253 if (grp == NULL) return;
18254
18255 if (grp->formal_name != NULL) return;
18256
18257 if (sfp->ext == NULL) return;
18258 VisitUserObjectsInUop (sfp->ext, (Pointer) &uop, GetNomenclatureUOP);
18259 if (uop == NULL) return;
18260
18261 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
18262 oip = ufp->label;
18263 if (oip == NULL || oip->str == NULL) continue;
18264 if (StringICmp (oip->str, "Symbol") == 0) {
18265 if (ufp->choice == 1) {
18266 str = (CharPtr) ufp->data.ptrvalue;
18267 if (str != NULL) {
18268 symbol = str;
18269 }
18270 }
18271 } else if (StringICmp (oip->str, "Name") == 0) {
18272 if (ufp->choice == 1) {
18273 str = (CharPtr) ufp->data.ptrvalue;
18274 if (str != NULL) {
18275 name = str;
18276 }
18277 }
18278 } else if (StringICmp (oip->str, "DataSource") == 0) {
18279 if (ufp->choice == 1) {
18280 str = (CharPtr) ufp->data.ptrvalue;
18281 if (str != NULL) {
18282 source = str;
18283 }
18284 }
18285 } else if (StringICmp (oip->str, "Status") == 0) {
18286 if (ufp->choice == 1) {
18287 str = (CharPtr) ufp->data.ptrvalue;
18288 if (str != NULL) {
18289 if (StringICmp (str, "Official") == 0) {
18290 status = 1;
18291 } else if (StringICmp (str, "Interim") == 0) {
18292 status = 2;
18293 }
18294 }
18295 }
18296 }
18297 }
18298 if (symbol == NULL && name == NULL && source == NULL && status == 0) return;
18299
18300 gnp = GeneNomenclatureNew ();
18301 if (gnp == NULL) return;
18302
18303 gnp->status = status;
18304 gnp->symbol = StringSaveNoNull (symbol);
18305 gnp->name = StringSaveNoNull (name);
18306 gnp->source = DbtagParse (source);
18307
18308 grp->formal_name = gnp;
18309
18310 prev = (UserObjectPtr PNTR) &(sfp->ext);
18311 curr = sfp->ext;
18312 while (curr != NULL) {
18313 next = curr->next;
18314 if (uop == curr) {
18315 *(prev) = curr->next;
18316 curr->next = NULL;
18317 UserObjectFree (curr);
18318 } else {
18319 prev = (UserObjectPtr PNTR) &(curr->next);
18320 }
18321 curr = next;
18322 }
18323 }
18324 //LCOV_EXCL_STOP
18325
18326
18327 /* PCR_primer manipulation functions */
18328
ParsePCRComponent(CharPtr strs)18329 static ValNodePtr ParsePCRComponent (
18330 CharPtr strs
18331 )
18332
18333 {
18334 ValNodePtr head = NULL;
18335 size_t len;
18336 CharPtr ptr, str, tmp;
18337
18338 if (StringHasNoText (strs)) return NULL;
18339
18340 tmp = StringSave (strs);
18341 if (tmp == NULL) return NULL;
18342
18343 str = tmp;
18344 len = StringLen (str);
18345 if (len > 1 && *str == '(' && str [len - 1] == ')' && StringChr (str + 1, '(') == NULL) {
18346 str [len - 1] = '\0';
18347 str++;
18348 }
18349
18350 while (StringDoesHaveText (str)) {
18351 ptr = StringChr (str, ',');
18352 if (ptr != NULL) {
18353 *ptr = '\0';
18354 ptr++;
18355 }
18356
18357 TrimSpacesAroundString (str);
18358 ValNodeCopyStr (&head, 0, str);
18359
18360 str = ptr;
18361 }
18362
18363 MemFree (tmp);
18364 return head;
18365 }
18366
ParsePCRStrings(CharPtr fwd_primer_seq,CharPtr rev_primer_seq,CharPtr fwd_primer_name,CharPtr rev_primer_name)18367 NLM_EXTERN ValNodePtr ParsePCRStrings (
18368 CharPtr fwd_primer_seq,
18369 CharPtr rev_primer_seq,
18370 CharPtr fwd_primer_name,
18371 CharPtr rev_primer_name
18372 )
18373
18374 {
18375 ValNodePtr curr_fwd_name;
18376 ValNodePtr curr_fwd_seq;
18377 ValNodePtr curr_rev_name;
18378 ValNodePtr curr_rev_seq;
18379 CharPtr fwd_name;
18380 CharPtr fwd_seq;
18381 CharPtr rev_name;
18382 CharPtr rev_seq;
18383 ValNodePtr fwd_name_list = NULL;
18384 ValNodePtr fwd_seq_list = NULL;
18385 ValNodePtr rev_name_list = NULL;
18386 ValNodePtr rev_seq_list = NULL;
18387 ValNodePtr head = NULL;
18388 Boolean okay;
18389 Int2 orig_order = 0;
18390 PcrSetPtr psp;
18391
18392 fwd_seq_list = ParsePCRComponent (fwd_primer_seq);
18393 rev_seq_list = ParsePCRComponent (rev_primer_seq);
18394 fwd_name_list = ParsePCRComponent (fwd_primer_name);
18395 rev_name_list = ParsePCRComponent (rev_primer_name);
18396
18397 curr_fwd_seq = fwd_seq_list;
18398 curr_rev_seq = rev_seq_list;
18399 curr_fwd_name = fwd_name_list;
18400 curr_rev_name = rev_name_list;
18401
18402 while (curr_fwd_seq != NULL || curr_rev_seq != NULL || curr_fwd_name != NULL || curr_rev_name != NULL) {
18403 fwd_seq = NULL;
18404 rev_seq = NULL;
18405 fwd_name = NULL;
18406 rev_name = NULL;
18407 okay = FALSE;
18408
18409 if (curr_fwd_seq != NULL) {
18410 fwd_seq = (CharPtr) curr_fwd_seq->data.ptrvalue;
18411 curr_fwd_seq = curr_fwd_seq->next;
18412 okay = TRUE;
18413 }
18414
18415 if (curr_rev_seq != NULL) {
18416 rev_seq = (CharPtr) curr_rev_seq->data.ptrvalue;
18417 curr_rev_seq = curr_rev_seq->next;
18418 okay = TRUE;
18419 }
18420
18421 if (curr_fwd_name != NULL) {
18422 fwd_name = (CharPtr) curr_fwd_name->data.ptrvalue;
18423 curr_fwd_name = curr_fwd_name->next;
18424 okay = TRUE;
18425 }
18426
18427 if (curr_rev_name != NULL) {
18428 rev_name = (CharPtr) curr_rev_name->data.ptrvalue;
18429 curr_rev_name = curr_rev_name->next;
18430 okay = TRUE;
18431 }
18432
18433 if (okay) {
18434 psp = (PcrSetPtr) MemNew (sizeof (PcrSet));
18435 if (psp != NULL) {
18436 psp->fwd_seq = StringSaveNoNull (fwd_seq);
18437 psp->rev_seq = StringSaveNoNull (rev_seq);
18438 psp->fwd_name = StringSaveNoNull (fwd_name);
18439 psp->rev_name = StringSaveNoNull (rev_name);
18440 orig_order++;
18441 psp->orig_order = orig_order;
18442 ValNodeAddPointer (&head, 0, (Pointer) psp);
18443 }
18444 }
18445 }
18446
18447 ValNodeFreeData (fwd_seq_list);
18448 ValNodeFreeData (rev_seq_list);
18449 ValNodeFreeData (fwd_name_list);
18450 ValNodeFreeData (rev_name_list);
18451
18452 return head;
18453 }
18454
ParsePCRSet(BioSourcePtr biop)18455 NLM_EXTERN ValNodePtr ParsePCRSet (
18456 BioSourcePtr biop
18457 )
18458
18459 {
18460 CharPtr fwd_primer_seq = NULL;
18461 CharPtr rev_primer_seq = NULL;
18462 CharPtr fwd_primer_name = NULL;
18463 CharPtr rev_primer_name = NULL;
18464 SubSourcePtr ssp;
18465
18466 if (biop == NULL) return NULL;
18467
18468 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
18469 if (ssp->subtype == SUBSRC_fwd_primer_seq) {
18470 fwd_primer_seq = ssp->name;
18471 } else if (ssp->subtype == SUBSRC_rev_primer_seq) {
18472 rev_primer_seq = ssp->name;
18473 } else if (ssp->subtype == SUBSRC_fwd_primer_name) {
18474 fwd_primer_name = ssp->name;
18475 } else if (ssp->subtype == SUBSRC_rev_primer_name) {
18476 rev_primer_name = ssp->name;
18477 }
18478 }
18479
18480 return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
18481 }
18482
ParsePCRColonString(CharPtr strs)18483 static ValNodePtr ParsePCRColonString (
18484 CharPtr strs
18485 )
18486
18487 {
18488 ValNodePtr head = NULL;
18489 size_t len;
18490 CharPtr ptr, str, tmp;
18491
18492 if (StringHasNoText (strs)) return NULL;
18493
18494 tmp = StringSave (strs);
18495 str = tmp;
18496 len = StringLen (str);
18497 if (len > 1 && StringChr (str, ':') != NULL) {
18498 while (StringDoesHaveText (str)) {
18499 ptr = StringChr (str, ':');
18500 if (ptr != NULL) {
18501 *ptr = '\0';
18502 ptr++;
18503 }
18504 TrimSpacesAroundString (str);
18505 ValNodeCopyStr (&head, 0, str);
18506 str = ptr;
18507 }
18508 } else {
18509 ValNodeCopyStr (&head, 0, str);
18510 }
18511
18512 MemFree (tmp);
18513 return head;
18514 }
18515
18516 //LCOV_EXCL_START
FusePrimerNames(CharPtr first,CharPtr second)18517 static CharPtr FusePrimerNames(
18518 CharPtr first,
18519 CharPtr second
18520 )
18521
18522 {
18523 size_t len;
18524 CharPtr str;
18525
18526 if (first == NULL) return second;
18527 if (second == NULL) return first;
18528
18529 len = StringLen (first) + StringLen (second) + 5;
18530 str = MemNew (len);
18531 if (str == NULL) return NULL;
18532
18533 StringCpy (str, first);
18534 StringCat (str, ":");
18535 StringCat (str, second);
18536
18537 return str;
18538 }
18539
ModernizePCRPrimerHalf(CharPtr seq,CharPtr name)18540 static PCRPrimerPtr ModernizePCRPrimerHalf (
18541 CharPtr seq,
18542 CharPtr name
18543 )
18544
18545 {
18546 CharPtr curr_name = NULL, curr_seq = NULL, fused_name;
18547 PCRPrimerPtr curr_primer = NULL, last_primer = NULL, primer_set = NULL;
18548 ValNodePtr name_list, seq_list, name_vnp, seq_vnp;
18549
18550 seq_list = ParsePCRColonString (seq);
18551 name_list = ParsePCRColonString (name);
18552
18553 seq_vnp = seq_list;
18554 name_vnp = name_list;
18555
18556 while (seq_vnp != NULL /* || name_vnp != NULL */) {
18557 if (seq_vnp != NULL) {
18558 curr_seq = (CharPtr) seq_vnp->data.ptrvalue;
18559 seq_vnp = seq_vnp->next;
18560 }
18561 if (name_vnp != NULL) {
18562 curr_name = (CharPtr) name_vnp->data.ptrvalue;
18563 name_vnp = name_vnp->next;
18564 } else {
18565 curr_name = NULL;
18566 }
18567
18568 curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
18569 if (curr_primer != NULL) {
18570 curr_primer->seq = StringSaveNoNull (curr_seq);
18571 curr_primer->name = StringSaveNoNull (curr_name);
18572
18573 if (primer_set == NULL) {
18574 primer_set = curr_primer;
18575 }
18576 if (last_primer != NULL) {
18577 last_primer->next = curr_primer;
18578 }
18579 last_primer = curr_primer;
18580 }
18581 }
18582
18583 while (name_vnp != NULL && last_primer != NULL) {
18584 curr_name = (CharPtr) name_vnp->data.ptrvalue;
18585 fused_name = FusePrimerNames (last_primer->name, curr_name);
18586 MemFree (last_primer->name);
18587 last_primer->name = StringSaveNoNull (fused_name);
18588 name_vnp = name_vnp->next;
18589 }
18590
18591 while (name_vnp != NULL && last_primer == NULL) {
18592 curr_name = (CharPtr) name_vnp->data.ptrvalue;
18593 curr_primer = (PCRPrimerPtr) MemNew (sizeof (PCRPrimer));
18594 if (curr_primer != NULL) {
18595 curr_primer->name = StringSaveNoNull (curr_name);
18596
18597 if (primer_set == NULL) {
18598 primer_set = curr_primer;
18599 }
18600 if (last_primer != NULL) {
18601 last_primer->next = curr_primer;
18602 }
18603 last_primer = curr_primer;
18604 }
18605 name_vnp = name_vnp->next;
18606 }
18607
18608 ValNodeFreeData (seq_list);
18609 ValNodeFreeData (name_list);
18610
18611 return primer_set;
18612 }
18613
ModernizePCRPrimers(BioSourcePtr biop)18614 NLM_EXTERN void ModernizePCRPrimers (
18615 BioSourcePtr biop
18616 )
18617
18618 {
18619 PCRReactionSetPtr curr_reaction, last_reaction = NULL, reaction_set = NULL;
18620 PCRPrimerPtr forward, reverse;
18621 PcrSetPtr psp;
18622 ValNodePtr pset, vnp;
18623 SubSourcePtr nextssp;
18624 SubSourcePtr PNTR prevssp;
18625 SubSourcePtr ssp;
18626 Boolean unlink;
18627
18628 if (biop == NULL) return;
18629 /* if (biop->pcr_primers != NULL) return; */
18630
18631 pset = ParsePCRSet (biop);
18632 if (pset == NULL) return;
18633
18634 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
18635 psp = (PcrSetPtr) vnp->data.ptrvalue;
18636 if (psp == NULL) continue;
18637
18638 forward = ModernizePCRPrimerHalf (psp->fwd_seq, psp->fwd_name);
18639 reverse = ModernizePCRPrimerHalf (psp->rev_seq, psp->rev_name);
18640
18641 if (forward != NULL || reverse != NULL) {
18642
18643 curr_reaction = (PCRReactionSetPtr) MemNew (sizeof (PCRReactionSet));
18644 if (curr_reaction != NULL) {
18645 curr_reaction->forward = forward;
18646 curr_reaction->reverse = reverse;
18647
18648 if (reaction_set == NULL) {
18649 reaction_set = curr_reaction;
18650 }
18651 if (last_reaction != NULL) {
18652 last_reaction->next = curr_reaction;
18653 }
18654 last_reaction = curr_reaction;
18655 }
18656 }
18657 }
18658
18659 FreePCRSet (pset);
18660
18661 if (reaction_set != NULL) {
18662 if (last_reaction != NULL) {
18663 /* merge with existing structured pcr_primers */
18664 last_reaction->next = biop->pcr_primers;
18665 }
18666 biop->pcr_primers = reaction_set;
18667
18668 ssp = biop->subtype;
18669 prevssp = (SubSourcePtr PNTR) &(biop->subtype);
18670 while (ssp != NULL) {
18671 nextssp = ssp->next;
18672 unlink= FALSE;
18673
18674 if (ssp->subtype == SUBSRC_fwd_primer_seq ||
18675 ssp->subtype == SUBSRC_rev_primer_seq ||
18676 ssp->subtype == SUBSRC_fwd_primer_name ||
18677 ssp->subtype == SUBSRC_rev_primer_name) {
18678 unlink = TRUE;
18679 }
18680
18681 if (unlink) {
18682 *prevssp = ssp->next;
18683 ssp->next = NULL;
18684 SubSourceFree (ssp);
18685 } else {
18686 prevssp = (SubSourcePtr PNTR) &(ssp->next);
18687 }
18688 ssp = nextssp;
18689 }
18690 }
18691 }
18692 //LCOV_EXCL_STOP
18693
PubIsEffectivelyEmpty(PubdescPtr pdp)18694 NLM_EXTERN Boolean PubIsEffectivelyEmpty (PubdescPtr pdp)
18695
18696 {
18697 ValNodePtr vnp;
18698
18699 if (pdp == NULL) return FALSE;
18700 vnp = pdp->pub;
18701 if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_Gen) {
18702 if (empty_citgen ((CitGenPtr) vnp->data.ptrvalue)) {
18703 return TRUE;
18704 }
18705 }
18706 return FALSE;
18707 }
18708
18709
18710
18711
18712