1 static char const rcsid[] = "$Id: toasn3.c,v 6.150 2016/11/02 23:36:09 kans Exp $";
2
3 /*****************************************************************************
4 *
5 * toasn3.c
6 * converts a Seq-entry or elements of a Bioseq-set to new Biosource style
7 *
8 *****************************************************************************/
9
10 #include <gather.h>
11 #include <toasn3.h>
12 #include <toporg.h>
13 #include <tfuns.h>
14 #include <terr.h>
15 #include <utilpub.h>
16 #include "ftusrstr.h"
17 #include <utilpars.h> /*ValidAminoAcid PROTO*/
18 #include <seqport.h> /*GetFrameFromLoc PROTO*/
19 #include <asn2ff6.h> /*AddGBQual PROTO*/
20 #include <sqnutils.h>
21 #include <explore.h>
22 #include <edutil.h>
23 #include <subutil.h>
24
25 static char *this_file = "toasn3";
26 #ifdef THIS_FILE
27 #undef THIS_FILE
28 #endif
29 #define THIS_FILE this_file
30 static char *this_module ="toasn3";
31 #ifdef THIS_MODULE
32 #undef THIS_MODULE
33 #endif
34 #define THIS_MODULE this_module
35
36
37 #define num_bond 5
38 static CharPtr feat_bond[num_bond] = {NULL, "disulfide bond", "thiolester bond", "xlink bond", "thioether bond"};
39
40 #define num_site 27
41 static CharPtr feat_site[num_site] = {NULL,
42 "active",
43 "binding",
44 "cleavage",
45 "inhibit",
46 "modifi",
47 "glycosylation",
48 "myristoylation",
49 "mutagenized",
50 "metal-binding",
51 "phosphorylation",
52 "acetylation",
53 "amidation",
54 "methylation",
55 "hydroxylation",
56 "sulfatation",
57 "oxidative-deamination",
58 "pyrrolidone-carboxylic-acid",
59 "gamma-carboxyglutamic-acid",
60 "blocked",
61 "lipid-binding",
62 "np-binding",
63 "dna-binding",
64 "signal-peptide",
65 "transit-peptide",
66 "transmembrane-region",
67 "nitrosylation"
68 };
69
70 #define num_genome 15
71 static CharPtr genome[num_genome] = {"unknown", "genomic", "chloroplast", "chromoplast", "kinetoplast", "mitochondrion", "plastid", "macronuclear",
72 "extrachrom", "plasmid", "transposon", "insertion_seq", "cyanelle", "proviral", "virion"};
73
74 #define num_subtype 22
75 static CharPtr subtype[num_subtype] = {
76 "chromosome", "map", "clone", "sub_clone", "haplotype", "genotype", "sex",
77 "cell_line", "cell_type", "tissue_type", "clone_lib", "dev_stage",
78 "frequency", "germline", "rearranged", "lab_host", "pop_variant",
79 "tissue_lib", "plasmid", "transposon", "insertion_seq", "plastid"};
80
81 static ORGMOD orgmod_subtype[10] = {
82 { "strain", 2 }, {"sub_strain", 3}, {"variety", 6}, {"serotype",7}, {"cultivar",10}, {"isolate", 17}, {"specific_host", 21}, {"sub_species", 22}, {"note", 255}, { NULL, 0 }
83 };
84
85 #define num_bad_quals 3
86 static CharPtr bad_quals[num_bad_quals] = {
87 "label", "usedin", "citation"};
88
89 #define num_organelle 5
90 static ORGMOD organelle[num_organelle] = {
91 {"Mitochondrion ", 5}, {"Chloroplast ", 2},
92 {"Kinetoplast ", 4}, {"Cyanelle ", 12}, {"Plastid ", 6}
93 };
94
95 #define TOTAL_TECH 6
96 static ORGMOD check_tech[TOTAL_TECH] = {
97 {"EST", MI_TECH_est}, {"STS", MI_TECH_sts}, {"GSS", MI_TECH_survey},
98 {"HTG", MI_TECH_htgs_1 }, {"HTG", MI_TECH_htgs_2}, {"HTG", MI_TECH_htgs_3}
99 };
100 static void CheckGeneticCode(SeqEntryPtr sep);
101
FindStr(CharPtr PNTR array,Int2 array_num,CharPtr str)102 static Int2 FindStr(CharPtr PNTR array, Int2 array_num, CharPtr str) {
103 Char ch;
104 Int2 i;
105 size_t len;
106 Char tmp [64];
107 CharPtr val;
108
109 for (i = 0; i < array_num; i++) {
110 if (array[i] == NULL) {
111 continue;
112 }
113 StringNCpy_0 (tmp, array[i], sizeof (tmp));
114 len = StringLen (tmp);
115 if (StringNCmp(str, tmp, len) == 0) {
116 return i;
117 }
118 val = tmp;
119 ch = *val;
120 while (ch != '\0') {
121 if (ch == '-') {
122 *val = ' ';
123 }
124 val++;
125 ch = *val;
126 }
127 if (StringNCmp(str, tmp, len) == 0) {
128 return i;
129 }
130 }
131 return -1;
132 }
133
134 /*****************************************************************************
135 *
136 * ToAsn4(sep, isEmblOrDdbj)
137 * Converts pubs to asn.1 spec 4.0 within SeqEntryPtr - SeqEntryPubsAsn4
138 * move tax lineage from GBblock to BioSource
139 *****************************************************************************/
ToAsn4(SeqEntryPtr sep,Boolean isEmblOrDdbj)140 Int4 ToAsn4 (SeqEntryPtr sep, Boolean isEmblOrDdbj)
141 {
142 CharPtr lineage = NULL;
143
144 SeqEntryPubsAsn4(sep, isEmblOrDdbj);
145 SeqEntryExplore(sep, (Pointer) (&lineage), FindOldLineage);
146 if (lineage) {
147 SeqEntryExplore(sep, (Pointer) (&lineage), NewLineage);
148 MemFree(lineage);
149 }
150 return 0;
151 }
152
153 //LCOV_EXCL_START
154 // not used
CopySfpId(SeqFeatPtr new,SeqFeatPtr sfp)155 static void CopySfpId(SeqFeatPtr new, SeqFeatPtr sfp)
156 {
157 ObjectIdPtr oip, noip;
158 DbtagPtr dtp, ndtp;
159
160 if (sfp && sfp->id.choice) {
161 new->id.choice = sfp->id.choice;
162 switch(sfp->id.choice)
163 {
164 case 1:
165 new->id.value.intvalue = sfp->id.value.intvalue;
166 break;
167 case 2:
168 new->id.value.ptrvalue =
169 AsnIoMemCopy((Pointer) sfp->id.value.ptrvalue,
170 (AsnReadFunc) GiimAsnRead,
171 (AsnWriteFunc) GiimAsnWrite);
172
173 break;
174 case 3:
175 oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
176 noip = AsnIoMemCopy(oip, (AsnReadFunc) ObjectIdAsnRead,
177 (AsnWriteFunc) ObjectIdAsnWrite);
178 new->id.value.ptrvalue = noip;
179 break;
180 case 4:
181 dtp = (DbtagPtr) sfp->id.value.ptrvalue;
182 ndtp = AsnIoMemCopy(dtp, (AsnReadFunc) DbtagAsnRead,
183 (AsnWriteFunc) DbtagAsnWrite);
184 new->id.value.ptrvalue = ndtp;
185 break;
186 default:
187 break;
188 }
189 }
190 return;
191 }
192 //LCOV_EXCL_STOP
193
toasn3_free(ToAsn3Ptr tap)194 static void toasn3_free(ToAsn3Ptr tap)
195 {
196 OrgFixPtr ofp, next_ofp;
197 MolFixPtr mfp, next_mfp;
198
199 if (tap == NULL) {
200 return;
201 }
202 ofp = tap->ofp;
203 while(ofp) {
204 next_ofp = ofp->next;
205 if (ofp->sfp) {
206 SeqLocFree(ofp->sfp->location);
207 if (ofp->sfp->cit) {
208 PubSetFree(ofp->sfp->cit);
209 }
210 MemFree(ofp->sfp);
211 }
212 MemFree(ofp);
213 ofp = next_ofp;
214 }
215 mfp = tap->mfp;
216 while(mfp) {
217 next_mfp = mfp->next;
218 MemFree(mfp);
219 mfp = next_mfp;
220 }
221 }
222
vnp_psp_free(ValNodePtr vnp)223 static void vnp_psp_free(ValNodePtr vnp)
224 {
225 ValNodePtr next_vnp;
226 PubStructPtr psp;
227
228 while (vnp) {
229 next_vnp = vnp->next;
230 psp = (PubStructPtr) vnp->data.ptrvalue;
231 if (psp) {
232 FreePubStruct(psp);
233 }
234 MemFree(vnp);
235 vnp = next_vnp;
236 }
237 }
238
vnp_list_free(ValNodePtr vnp)239 static void vnp_list_free(ValNodePtr vnp)
240 {
241 ValNodePtr next_vnp;
242 PubdescPtr psp;
243
244 while (vnp) {
245 next_vnp = vnp->next;
246 psp = (PubdescPtr) vnp->data.ptrvalue;
247 if (psp) {
248 PubdescFree(psp);
249 }
250 MemFree(vnp);
251 vnp = next_vnp;
252 }
253 }
254
255
NOT_segment(SeqEntryPtr sep)256 static Boolean NOT_segment(SeqEntryPtr sep)
257 {
258 BioseqSetPtr bssp;
259 SeqEntryPtr seqsep;
260
261 if (IS_Bioseq(sep))
262 return TRUE;
263 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
264 if (bssp->_class == 1) { /* 1 - nucprot set */
265 seqsep = bssp->seq_set;
266 if (seqsep == NULL) {
267 return TRUE;
268 }
269 if (seqsep->choice == 1) {
270 return TRUE;
271 }
272 }
273 return FALSE;
274 }
275
OrgFixNew(void)276 static OrgFixPtr OrgFixNew(void)
277 {
278 OrgFixPtr ofp;
279
280 ofp = MemNew(sizeof(OrgFix));
281 ofp->contains = NULL;
282 ofp->desc = FALSE;
283 ofp->sfp = NULL;
284 ofp->imp = NULL;
285 ofp->orp = NULL;
286
287 return ofp;
288 }
289
290 //LCOV_EXCL_START
291 // used for rescuing biosource and molinfo from modif, mol-type, and method descriptors,
292 // which are obsolete
293 /*****************************************************************************/
AddOrgToFix(OrgRefPtr orp,ToAsn3Ptr tap,ValNodePtr mod,SeqEntryPtr sep,ValNodePtr vnp,SeqFeatPtr sfp,Int4 index)294 static void AddOrgToFix (OrgRefPtr orp, ToAsn3Ptr tap, ValNodePtr mod,
295 SeqEntryPtr sep, ValNodePtr vnp, SeqFeatPtr sfp, Int4 index)
296 {
297 OrgFixPtr ofp;
298 SeqFeatPtr feat;
299
300 ofp = OrgFixNew();
301 ofp->contains = sep;
302 if (vnp == NULL) {
303 ofp->desc = FALSE;
304 } else {
305 ofp->desc = TRUE;
306 }
307 if (sfp) {
308 feat = SeqFeatNew();
309 if (sfp->id.choice) {
310 CopySfpId(feat, sfp);
311 }
312 feat->location =
313 AsnIoMemCopy(sfp->location, (AsnReadFunc) SeqLocAsnRead,
314 (AsnWriteFunc) SeqLocAsnWrite);
315 if (sfp->cit) {
316 feat->cit = AsnIoMemCopy(sfp->cit, (AsnReadFunc) PubSetAsnRead,
317 (AsnWriteFunc) PubSetAsnWrite);
318 }
319 ofp->sfp = feat;
320 }
321 ofp->orp = orp;
322 ofp->index = index;
323 if (mod != NULL) {
324 ofp->modif = mod->data.ptrvalue;
325 }
326 tap->ofp = tie_next_biosource(tap->ofp, ofp);
327
328 return;
329 }
330
331 // used for rescuing molinfo from modif, mol-type, and method descriptors,
332 // which are obsolete
333 /*****************************************************************************/
AddMolToFix(ToAsn3Ptr tap,SeqEntryPtr sep,Uint1 mol,ValNodePtr mod,Uint1 meth,Int4 index)334 static void AddMolToFix (ToAsn3Ptr tap, SeqEntryPtr sep,
335 Uint1 mol, ValNodePtr mod, Uint1 meth, Int4 index)
336 {
337 MolFixPtr mfp;
338
339 mfp = MemNew(sizeof(MolFix));
340 mfp->contains = sep;
341 if (mol != 0)
342 mfp->mol = mol;
343 if (mod != NULL) {
344 mfp->modif = mod;
345 }
346 if (meth != 0)
347 mfp->method = meth;
348 mfp->index = index;
349 tap->mfp = tie_next_mol(tap->mfp, mfp);
350
351 return;
352 }
353 //LCOV_EXCL_STOP
354 /*****************************************************************************/
355
AddImpToFix(SeqFeatPtr imp,ToAsn3Ptr tap,SeqEntryPtr sep,ValNodePtr vnp,SeqFeatPtr sfp,Int4 index)356 static void AddImpToFix (SeqFeatPtr imp, ToAsn3Ptr tap, SeqEntryPtr sep,
357 ValNodePtr vnp, SeqFeatPtr sfp, Int4 index)
358 {
359 OrgFixPtr ofp;
360 SeqFeatPtr feat;
361
362 ofp = OrgFixNew();
363 ofp->contains = sep;
364 if (vnp == NULL) {
365 ofp->desc = FALSE;
366 } else {
367 ofp->desc = TRUE;
368 }
369 if (sfp) {
370 feat = SeqFeatNew();
371 feat->location =
372 AsnIoMemCopy(sfp->location, (AsnReadFunc) SeqLocAsnRead,
373 (AsnWriteFunc) SeqLocAsnWrite);
374 if (sfp->cit) {
375 feat->cit = AsnIoMemCopy(sfp->cit, (AsnReadFunc) PubSetAsnRead,
376 (AsnWriteFunc) PubSetAsnWrite);
377 }
378 ofp->sfp = feat;
379 }
380 ofp->imp = imp;
381 ofp->index = index;
382 tap->ofp = tie_next_biosource(tap->ofp, ofp);
383
384 return;
385 }
386
387 /*****************************************************************************/
FixToAsn(SeqEntryPtr sep,ToAsn3Ptr tap)388 static void FixToAsn(SeqEntryPtr sep, ToAsn3Ptr tap)
389 {
390 OrgFixPtr ofp;
391 BioSourcePtr bsp;
392 MolFixPtr mfp;
393 MolInfoPtr mfi;
394 Uint1 mod;
395 ValNodePtr vnp;
396 OrgRefPtr orp;
397 GBQualPtr q;
398 CharPtr tmp;
399 Int2 i, len = 0;
400
401 //LCOV_EXCL_START
402 // used for rescuing molinfo from modif, mol-type, and method descriptors,
403 // which are obsolete
404 mfp = tap->mfp;
405 while (mfp != NULL) {
406
407 mfi = NULL;
408 if (mfp->mol != 0) {
409 mfi = new_info(mfi);
410 mfi->biomol = mfp->mol;
411 }
412 if (mfp->method != 0) {
413 mfi = new_info(mfi);
414 mfi->tech = mfp->method + 7;
415 }
416 for(vnp = mfp->modif; vnp != NULL; vnp=vnp->next) {
417 mod = vnp->data.intvalue;
418 mfi = ModToMolInfo(mfi, mod);
419 }
420 mfp->molinfo = mfi;
421 mfp = mfp->next;
422
423 }
424 //LCOV_EXCL_STOP
425
426 /* look for Org-refs (desc or feature) and create Biosource */
427 for (ofp = tap->ofp; ofp != NULL; ofp = ofp->next) {
428 if (ofp->orp != NULL && (ofp->desc != FALSE || ofp->sfp != NULL)) {
429 bsp = BioSourceNew();
430 bsp->org = AsnIoMemCopy(ofp->orp, (AsnReadFunc) OrgRefAsnRead,
431 (AsnWriteFunc) OrgRefAsnWrite);
432 tap->had_biosource = TRUE;
433 //LCOV_EXCL_START
434 // used for rescuing biosource from modif, mol-type, and method descriptors,
435 // which are obsolete
436 for (mfp = tap->mfp; mfp; mfp = mfp->next) {
437 if (ofp->index < mfp->index) {
438 continue;
439 }
440 for(vnp = mfp->modif; vnp != NULL; vnp=vnp->next) {
441 mod = vnp->data.intvalue;
442 if (bsp == NULL) {
443 bsp = BioSourceNew();
444 }
445 ModToBiosource(bsp, mod);
446 }
447 }
448 //LCOV_EXCL_STOP
449 ofp->bsp = bsp;
450 }
451 }
452 /* look for Impfeat: create new bsp for every "source" (desc or feature) */
453 for (ofp = tap->ofp; ofp != NULL; ofp = ofp->next) {
454 if (ofp->imp == NULL) {
455 continue;
456 }
457 if (ofp->desc == TRUE) {
458 bsp = BioSourceNew();
459 orp = OrgRefNew();
460 for(q = ofp->imp->qual; q != NULL; q = q->next) {
461 if (StringCmp(q->qual, "organism") == 0) {
462 tmp = MemNew(StringLen(q->val)+1);
463 StringCpy(tmp, q->val);
464 for (i = 0; i < num_organelle; i++) {
465 if (StringNCmp(tmp, organelle[i].name,
466 StringLen(organelle[i].name)) == 0) {
467 len = StringLen(organelle[i].name);
468 bsp->genome = organelle[i].num;
469 break;
470 }
471 }
472 orp->taxname = StringSave(tmp + len);
473 MemFree(tmp);
474 }
475 }
476 bsp->org = orp;
477 if (ofp->imp && ofp->imp->qual)
478 CheckQualsWithComm(bsp, ofp->imp);
479 ofp->bsp = bsp;
480 } else if(ofp->sfp != NULL) {
481 bsp = BioSourceNew();
482 orp = OrgRefNew();
483 for(q = ofp->imp->qual; q != NULL; q = q->next) {
484 if (StringCmp(q->qual, "organism") == 0) {
485 tmp = MemNew(StringLen(q->val)+1);
486 StringCpy(tmp, q->val);
487 for (i = 0; i < num_organelle; i++) {
488 if (StringNCmp(tmp, organelle[i].name,
489 StringLen(organelle[i].name)) == 0) {
490 len = StringLen(organelle[i].name);
491 bsp->genome = organelle[i].num;
492 break;
493 }
494 }
495 orp->taxname = StringSave(tmp + len);
496 MemFree(tmp);
497 }
498 }
499 bsp->org = orp;
500 if (ofp->imp && ofp->imp)
501 CheckQualsWithComm(bsp, ofp->imp);
502 ofp->bsp = bsp;
503 }
504 }
505 }
506
507 /*****************************************************************************
508 *
509 * Build MolInfo from GIBBmod and GIBBmol GIBBmethod
510 *
511 *****************************************************************************/
FixMol(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)512 static void FixMol (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
513 {
514 ValNodePtr vnp, tmp;
515 BioseqPtr bsp;
516 BioseqSetPtr bssp;
517 MolFixPtr mfp;
518
519 mfp = (MolFixPtr)data;
520
521 if (IS_Bioseq(sep))
522 {
523 bsp = (BioseqPtr)(sep->data.ptrvalue);
524 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
525 && (bsp->repr != Seq_repr_delta))
526 return;
527 vnp = bsp->descr;
528 }
529 else
530 {
531 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
532 vnp = bssp->descr;
533 }
534 while (mfp != NULL) {
535 if (mfp->index == index && mfp->molinfo != NULL) {
536 tmp = SeqDescrNew(vnp);
537 tmp->choice = Seq_descr_molinfo;
538 tmp->data.ptrvalue = mfp->molinfo;
539 }
540 mfp = mfp->next;
541 }
542
543 return;
544 }
545
FixProtMolInfo(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)546 static void FixProtMolInfo (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
547
548 {
549 BioseqPtr bsp;
550 MolInfoPtr mip = NULL;
551 ValNodePtr vnp;
552
553 if (! IS_Bioseq(sep)) return;
554 bsp = (BioseqPtr) sep->data.ptrvalue;
555 if (bsp == NULL) return;
556 if (! ISA_aa (bsp->mol)) return;
557 for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
558 if (vnp->choice == Seq_descr_molinfo) {
559 mip = (MolInfoPtr) vnp->data.ptrvalue;
560 if (mip != NULL) {
561 if (mip->biomol == 0) {
562 mip->biomol = 8;
563 }
564 }
565 }
566 }
567 if (mip == NULL) {
568 mip = MolInfoNew ();
569 if (mip == NULL) return;
570 mip->biomol = 8;
571 vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
572 if (vnp == NULL) return;
573 vnp->data.ptrvalue = (Pointer) mip;
574 }
575 }
576
FuseMolInfos(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)577 static void FuseMolInfos (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
578
579 {
580 BioseqPtr bsp;
581 MolInfoPtr first;
582 MolInfoPtr mip;
583 ValNodePtr next;
584 ValNodePtr PNTR prev;
585 Boolean remove;
586 ValNodePtr vnp;
587
588 if (! IS_Bioseq(sep)) return;
589 bsp = (BioseqPtr) sep->data.ptrvalue;
590 if (bsp == NULL) return;
591 vnp = bsp->descr;
592 prev = &(bsp->descr);
593 first = NULL;
594 while (vnp != NULL) {
595 remove = FALSE;
596 next = vnp->next;
597 if (vnp->choice == Seq_descr_molinfo) {
598 mip = (MolInfoPtr) vnp->data.ptrvalue;
599 if (first == NULL) {
600 first = mip;
601 } else if (mip != NULL) {
602 if (first->biomol == 0) {
603 first->biomol = mip->biomol;
604 }
605 if (first->tech == 0) {
606 first->tech = mip->tech;
607 }
608 if (first->completeness == 0) {
609 first->completeness = mip->completeness;
610 }
611 if (first->biomol == mip->biomol &&
612 first->tech == mip->tech &&
613 first->completeness == mip->completeness) {
614 if (first->techexp == NULL) {
615 first->techexp = mip->techexp;
616 mip->techexp = NULL;
617 }
618 remove = TRUE;
619 }
620 }
621 }
622 if (remove) {
623 *prev = vnp->next;
624 vnp->next = NULL;
625 MolInfoFree (mip);
626 ValNodeFree (vnp);
627 } else {
628 prev = &(vnp->next);
629 }
630 vnp = next;
631 }
632 }
633
634 //LCOV_EXCL_START
635 // used for rescuing biosource from org desc and org feat, which
636 //are converted to biosource desc and biosource feat earlier
637 /*****************************************************************************
638 *
639 * Build Biosource from descr-org and features
640 *
641 *****************************************************************************/
FixOrg(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)642 static void FixOrg (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
643 {
644 ValNodePtr vnp, tmp;
645 BioseqPtr bsp;
646 BioseqSetPtr bssp;
647 OrgFixPtr ofp;
648 SeqAnnotPtr sap;
649 SeqFeatPtr new;
650
651 ofp = (OrgFixPtr)data;
652
653 if (IS_Bioseq(sep))
654 {
655 bsp = (BioseqPtr)(sep->data.ptrvalue);
656 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
657 && (bsp->repr != Seq_repr_delta))
658 return;
659 vnp = bsp->descr;
660 sap = bsp->annot;
661 }
662 else
663 {
664 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
665 vnp = bssp->descr;
666 sap = bssp->annot;
667 }
668 for (; ofp != NULL; ofp = ofp->next) {
669 if (ofp->desc == TRUE) {
670 if (ofp->index == index && ofp->bsp != NULL) {
671 tmp = SeqDescrNew(vnp);
672 tmp->choice = Seq_descr_source;
673 tmp->data.ptrvalue = ofp->bsp;
674 }
675 }
676 if (ofp->index == index && ofp->sfp != NULL && ofp->bsp != NULL) {
677 new = SeqFeatNew();
678 if (ofp->sfp->id.choice) {
679 CopySfpId(new, ofp->sfp);
680 }
681 new->data.choice = SEQFEAT_BIOSRC;
682 new->data.value.ptrvalue = ofp->bsp;
683 new->location = AsnIoMemCopy(ofp->sfp->location,
684 (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
685 if (ofp->sfp->cit) {
686 new->cit = AsnIoMemCopy(ofp->sfp->cit,
687 (AsnReadFunc) PubSetAsnRead, (AsnWriteFunc) PubSetAsnWrite);
688 }
689 sap->data = tie_feat(sap->data, new);
690 }
691 }
692 return;
693 }
694 //LCOV_EXCL_STOP
695
696 /*****************************************************************************
697 *
698 * HasSiteRef(sfp, userdata)
699 * Checks for Site-ref ImpFeat before unnecessarily rearranging pub descriptors
700 *****************************************************************************/
HasSiteRef(SeqFeatPtr sfp,Pointer userdata)701 static void HasSiteRef (SeqFeatPtr sfp, Pointer userdata)
702
703 {
704 BoolPtr foundP;
705 ImpFeatPtr ifp;
706
707 foundP = (BoolPtr) userdata;
708 if (sfp->cit == NULL) return;
709 if (sfp->data.choice != SEQFEAT_IMP) return;
710 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
711 if (ifp == NULL) return;
712 if (StringCmp(ifp->key, "Site-ref") == 0) {
713 *foundP = TRUE;
714 }
715 }
716
717 /*****************************************************************************
718 *
719 * SeqEntryPubsAsn4(sep)
720 * Converts pubs to asn.1 spec 4.0 within SeqEntryPtr
721 *****************************************************************************/
SeqEntryPubsAsn4Ex(SeqEntryPtr sep,Boolean isEmblOrDdbj,Boolean uniqueOnBioseq)722 Int4 SeqEntryPubsAsn4Ex (SeqEntryPtr sep, Boolean isEmblOrDdbj, Boolean uniqueOnBioseq)
723 {
724 BioseqPtr bsp = NULL;
725 BioseqSetPtr bioset = NULL;
726 ValNodePtr vnp = NULL, publist, tmp, v;
727 PubdescPtr pubdesc;
728 Boolean foundSitRef = FALSE;
729
730 if (IS_Bioseq(sep)) {
731 bsp = (BioseqPtr) (sep->data.ptrvalue);
732 } else if (IS_Bioseq_set(sep)) {
733 bioset = (BioseqSetPtr) (sep->data.ptrvalue); /* top level set */
734 }
735 SeqEntryExplore(sep, &vnp, FindCit);
736 SeqEntryExplore(sep, &vnp, ChangeCitQual);
737 vnp_psp_free(vnp);
738
739 VisitFeaturesInSep (sep, (Pointer) &foundSitRef, HasSiteRef);
740 if (foundSitRef) {
741 SeqEntryExplore(sep, NULL, NewPubs);
742 }
743 SeqEntryExplore(sep, NULL, DeleteSites);
744
745 /* move pubs in set to the top level */
746 if (bioset && bioset->_class != 9 && (! isEmblOrDdbj)) {
747 publist = NULL;
748 SeqEntryExplore(sep, (Pointer) NULL, MoveSegmPubs);
749 SeqEntryExplore(sep, (Pointer) NULL, MoveNPPubs);
750 /* unique pubs on the set level*/
751 tmp = ValNodeExtractList(&bioset->descr, Seq_descr_pub);
752 for (v = tmp; v; v = v->next) {
753 pubdesc = v->data.ptrvalue;
754 publist = AddToList(publist, NULL, pubdesc);
755 }
756 bioset->descr = ValNodeLink(&(bioset->descr), publist);
757 /* check pubs in Bioseqs, delete if they are already on the top */
758 for (v = publist; v; v = v->next) {
759 pubdesc = v->data.ptrvalue;
760 SeqEntryExplore(sep, pubdesc, DeletePubs);
761 }
762 vnp_list_free(tmp);
763 }
764 if (uniqueOnBioseq && bsp != NULL && (! isEmblOrDdbj)) {
765 /* unique pubs on the bioseq level*/
766 publist = NULL;
767 tmp = ValNodeExtractList(&bsp->descr, Seq_descr_pub);
768 for (v = tmp; v; v = v->next) {
769 pubdesc = v->data.ptrvalue;
770 publist = AddToList(publist, NULL, pubdesc);
771 }
772 bsp->descr = ValNodeLink(&(bsp->descr), publist);
773 vnp_list_free(tmp);
774 }
775 SeqEntryExplore(sep, NULL, ChangeCitSub);
776 return 0;
777 }
778
SeqEntryPubsAsn4(SeqEntryPtr sep,Boolean isEmblOrDdbj)779 Int4 SeqEntryPubsAsn4 (SeqEntryPtr sep, Boolean isEmblOrDdbj)
780 {
781 return SeqEntryPubsAsn4Ex(sep, isEmblOrDdbj, TRUE);
782 }
783
784 /*****************************************************************************
785 *
786 * Remove old (ver 2.0) asn.1 (with check for the new ver 3.0)
787 *
788 *****************************************************************************/
StripOld(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)789 void StripOld (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
790 {
791 ValNodePtr vnp = NULL, tmp = NULL, tmpnext;
792 SeqFeatPtr sfp, sfp_next;
793 OrgRefPtr orp;
794 SeqAnnotPtr sap, ap, apnext;
795 BioseqPtr bsp = NULL;
796 BioseqSetPtr bssp = NULL;
797
798 if (IS_Bioseq(sep)) {
799 bsp = (BioseqPtr)(sep->data.ptrvalue);
800 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
801 && (bsp->repr != Seq_repr_delta))
802 return;
803
804 vnp = bsp->descr;
805 sap = bsp->annot;
806 } else {
807 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
808 vnp = bssp->descr;
809 sap = bssp->annot;
810 }
811 if (vnp) {
812 for (tmp=ValNodeExtractList(&vnp, Seq_descr_modif);tmp;tmp=tmpnext){
813 tmpnext = tmp->next;
814 ValNodeFree(tmp->data.ptrvalue);
815 MemFree(tmp);
816 }
817 for (tmp=ValNodeExtractList(&vnp, Seq_descr_mol_type);tmp;tmp=tmpnext) {
818 tmpnext = tmp->next;
819 MemFree(tmp);
820 }
821 for (tmp=ValNodeExtractList(&vnp, Seq_descr_method); tmp; tmp=tmpnext) {
822 tmpnext = tmp->next;
823 MemFree(tmp);
824 }
825 for (tmp = ValNodeExtractList(&vnp, Seq_descr_org); tmp; tmp = tmpnext){
826 tmpnext = tmp->next;
827 orp = (OrgRefPtr) tmp->data.ptrvalue;
828 OrgRefFree(orp);
829 MemFree(tmp);
830 }
831 if (bsp != NULL) {
832 bsp->descr = vnp;
833 } else if (bssp != NULL) {
834 bssp->descr = vnp;
835 }
836 }
837 for (ap = sap; ap; ap = apnext) {
838 apnext = ap->next;
839 if (ap->type != 1)
840 continue;
841 /* tmp_sfp = (SeqFeatPtr) (ap->data); */
842 for(sfp = ExtractSourceFeatList((SeqFeatPtr PNTR) &(ap->data));sfp;sfp=sfp_next){
843 sfp_next=sfp->next;
844 SeqFeatFree(sfp);
845 }
846 for(sfp = SeqFeatExtractList((SeqFeatPtr PNTR) &(ap->data), SEQFEAT_ORG);sfp;sfp=sfp_next){
847 sfp_next=sfp->next;
848 SeqFeatFree(sfp);
849 }
850 /* ap->data = tmp_sfp; */
851 /* now keep empty annot if annot_descr present */
852 if (ap->data == NULL && ap->desc == NULL) {
853 sap = remove_annot(sap, ap);
854 }
855 }
856 if (bsp != NULL) {
857 bsp->annot = sap;
858 } else if (bssp != NULL) {
859 bssp->annot = sap;
860 }
861 }
862
863 //LCOV_EXCL_START
864 /*****************************************************************************
865 * EMBL may have multiple OS lines that are parsed to multiple descr on
866 * the top level. In NCBI model only one Biosource descr is allowed, others
867 * should be moved to the feature table
868 *****************************************************************************/
GetMultBiosource(SeqEntryPtr sep)869 ValNodePtr GetMultBiosource(SeqEntryPtr sep)
870 {
871 ValNodePtr bvnp, vnp, retval;
872 BioseqPtr bsp = NULL;
873 BioseqSetPtr bssp = NULL;
874
875 if (sep == NULL)
876 return NULL;
877 if (IS_Bioseq(sep)) {
878 bsp = (BioseqPtr)(sep->data.ptrvalue);
879 vnp = bsp->descr;
880 } else {
881 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
882 vnp = bssp->descr;
883 }
884 bvnp = ValNodeExtractList(&vnp, Seq_descr_source);
885 if (bvnp == NULL) {
886 return NULL;
887 }
888 if (bvnp->next != NULL) {
889 retval = bvnp->next;
890 bvnp->next = NULL;
891 } else {
892 retval = NULL;
893 }
894 vnp = tie_next(vnp, bvnp);
895 if (bsp != NULL) {
896 bsp->descr = vnp;
897 } else if (bssp != NULL) {
898 bssp->descr = vnp;
899 }
900 return retval;
901 }
902 //LCOV_EXCL_STOP
903
904 /*****************************************************************************
905 * RemoveEmptyTitleAndPubGenAsOnlyPub removes pub { pub { gen { } } empty pubs
906 *****************************************************************************/
907
RemoveEmptyTitleAndPubGenAsOnlyPub(SeqEntryPtr sep)908 static void RemoveEmptyTitleAndPubGenAsOnlyPub (SeqEntryPtr sep)
909
910 {
911 BioseqPtr bsp;
912 BioseqSetPtr bssp;
913 SeqAnnotPtr nextsap;
914 ValNodePtr nextsdp;
915 SeqFeatPtr nextsfp;
916 Pointer PNTR prevsap;
917 Pointer PNTR prevsdp;
918 Pointer PNTR prevsfp;
919 SeqAnnotPtr sap = NULL;
920 ValNodePtr sdp = NULL;
921 SeqFeatPtr sfp;
922 SeqEntryPtr tmp;
923
924 if (sep == NULL) return;
925 if (IS_Bioseq (sep)) {
926 bsp = (BioseqPtr) sep->data.ptrvalue;
927 if (bsp == NULL) return;
928 sap = bsp->annot;
929 prevsap = (Pointer PNTR) &(bsp->annot);
930 sdp = bsp->descr;
931 prevsdp = (Pointer PNTR) &(bsp->descr);
932 } else if (IS_Bioseq_set (sep)) {
933 bssp = (BioseqSetPtr) sep->data.ptrvalue;
934 if (bssp == NULL) return;
935 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
936 RemoveEmptyTitleAndPubGenAsOnlyPub (tmp);
937 }
938 sap = bssp->annot;
939 prevsap = (Pointer PNTR) &(bssp->annot);
940 sdp = bssp->descr;
941 prevsdp = (Pointer PNTR) &(bssp->descr);
942 } else return;
943 while (sap != NULL) {
944 nextsap = sap->next;
945 if (sap->type == 1) {
946 sfp = (SeqFeatPtr) sap->data;
947 prevsfp = (Pointer PNTR) &(sap->data);
948 while (sfp != NULL) {
949 nextsfp = sfp->next;
950 if (sfp->data.choice == SEQFEAT_PUB && PubIsEffectivelyEmpty ((PubdescPtr) sfp->data.value.ptrvalue)) {
951 *(prevsfp) = sfp->next;
952 sfp->next = NULL;
953 SeqFeatFree (sfp);
954 } else {
955 prevsfp = (Pointer PNTR) &(sfp->next);
956 }
957 sfp = nextsfp;
958 }
959 }
960 /* now keep empty annot if annot_descr present */
961 if (sap->data == NULL && sap->desc == NULL) {
962 *(prevsap) = sap->next;
963 sap->next = NULL;
964 SeqAnnotFree (sap);
965 } else {
966 prevsap = (Pointer PNTR) &(sap->next);
967 }
968 sap = nextsap;
969 }
970 while (sdp != NULL) {
971 nextsdp = sdp->next;
972 if (sdp->choice == Seq_descr_pub && PubIsEffectivelyEmpty ((PubdescPtr) sdp->data.ptrvalue)) {
973 *(prevsdp) = sdp->next;
974 sdp->next = NULL;
975 SeqDescFree (sdp);
976 } else if (sdp->choice == Seq_descr_title && StringHasNoText ((CharPtr) sdp->data.ptrvalue)) {
977 *(prevsdp) = sdp->next;
978 sdp->next = NULL;
979 SeqDescFree (sdp);
980 } else {
981 prevsdp = (Pointer PNTR) &(sdp->next);
982 }
983 sdp = nextsdp;
984 }
985 }
986
987 //LCOV_EXCL_START
988 // never called, always called with Ex
989 /*****************************************************************************
990 * SeqEntryToAsn3(sep)
991 * Converts a SeqEntry with old OrgRefs to SeqEntry with Biosource
992 * Does the Taxonomy lookup if taxserver = TRUE and taxfun != NULL
993 * Strips old stuff if strip_old=TRUE
994 * Moves /map from GeneRef, removes ProtRef xrefs and checks genetic
995 * code in CDSs
996 * RETURN:
997 * INFO_ASNOLD - if the entry is in spec 3.0 (has BioSource) already
998 * INFO_ASNNEW - if the entry is converted to new spec
999 * ERR_REJECT - if the entry has internal FATAL errors
1000 * ERR_INPUT - if input is NULL
1001 *
1002 *****************************************************************************/
SeqEntryToAsn3(SeqEntryPtr sep,Boolean strip_old,Boolean source_correct,Boolean taxserver,SeqEntryFunc taxfun)1003 Int4 SeqEntryToAsn3 (SeqEntryPtr sep, Boolean strip_old, Boolean source_correct, Boolean taxserver, SeqEntryFunc taxfun)
1004 {
1005 return SeqEntryToAsn3Ex(sep, strip_old, source_correct,
1006 taxserver, taxfun, NULL, FALSE, FALSE);
1007 }
1008 //LCOV_EXCL_STOP
1009
is_equiv(SeqEntryPtr sep)1010 static Boolean is_equiv(SeqEntryPtr sep)
1011 {
1012 BioseqSetPtr bssp;
1013
1014 if (IS_Bioseq(sep)) {
1015 return FALSE;
1016 }
1017 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1018 if (bssp->_class != 10) { /* equiv */
1019 return FALSE;
1020 }
1021 return TRUE;
1022 }
1023
RestoreUpdateDatePos(SeqEntryPtr sep,Int2 update_date_pos)1024 static void RestoreUpdateDatePos (SeqEntryPtr sep, Int2 update_date_pos)
1025
1026 {
1027 BioseqSetPtr bssp;
1028 ValNodePtr descr;
1029 ValNodePtr vnp;
1030
1031 if (update_date_pos < 0) return;
1032 if (! IS_Bioseq_set (sep)) return;
1033 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1034 if (bssp == NULL) return;
1035
1036 vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_update_date);
1037 if (vnp == NULL) return;
1038 if (update_date_pos == 0) {
1039 vnp->next = bssp->descr;
1040 bssp->descr = vnp;
1041 } else {
1042 descr = bssp->descr;
1043 while (update_date_pos > 1 && descr != NULL) {
1044 descr = descr->next;
1045 update_date_pos--;
1046 }
1047 if (descr != NULL) {
1048 vnp->next = descr->next;
1049 descr->next = vnp;
1050 } else {
1051 bssp->descr = ValNodeLink (&(bssp->descr), vnp);
1052 }
1053 }
1054 }
1055
GetUpdateDatePos(SeqEntryPtr sep)1056 static Int2 GetUpdateDatePos (SeqEntryPtr sep)
1057
1058 {
1059 BioseqSetPtr bssp;
1060 Int2 i;
1061 ValNodePtr vnp;
1062
1063 if (! IS_Bioseq_set (sep)) return -1;
1064 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1065 if (bssp == NULL) return -1;
1066
1067 for (vnp = bssp->descr, i = 0; vnp != NULL; vnp = vnp->next, i++) {
1068 if (vnp->choice == Seq_descr_update_date) return i;
1069 }
1070 return -1;
1071 }
1072
CleanMiscFeatFields(SeqFeatPtr sfp,Pointer userdata)1073 static void CleanMiscFeatFields (SeqFeatPtr sfp, Pointer userdata)
1074
1075 {
1076 GeneRefPtr grp;
1077 ProtRefPtr prp;
1078 CharPtr str;
1079 ValNodePtr vnp;
1080
1081 if (sfp == NULL) return;
1082
1083 switch (sfp->data.choice) {
1084 case SEQFEAT_GENE:
1085 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
1086 if (grp == NULL) return;
1087 if (grp->locus != NULL && sfp->comment != NULL && StringCmp (sfp->comment, grp->locus) == 0) {
1088 sfp->comment = MemFree (sfp->comment);
1089 }
1090 if (grp->desc != NULL && sfp->comment != NULL && StringCmp (sfp->comment, grp->desc) == 0) {
1091 sfp->comment = MemFree (sfp->comment);
1092 }
1093 break;
1094 case SEQFEAT_PROT:
1095 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
1096 if (prp == NULL) return;
1097 if (prp->desc != NULL) {
1098 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
1099 str = (CharPtr) vnp->data.ptrvalue;
1100 if (StringHasNoText (str)) continue;
1101 if (StringCmp (prp->desc, str) == 0) {
1102 prp->desc = MemFree (prp->desc);
1103 }
1104 }
1105 }
1106 break;
1107 default:
1108 break;
1109 }
1110 }
1111
1112 typedef struct dblinknpsdata {
1113 SeqDescrPtr dblinksdp;
1114 Boolean morethanone;
1115 } DblinkNpsData, PNTR DblinkNpsPtr;
1116
FindOneDblink(SeqDescrPtr sdp,Pointer userdata)1117 static void FindOneDblink (SeqDescrPtr sdp, Pointer userdata)
1118
1119 {
1120 DblinkNpsPtr dnp;
1121 ObjectIdPtr oip;
1122 UserObjectPtr uop;
1123
1124 if (sdp == NULL || sdp->choice != Seq_descr_user) return;
1125 uop = (UserObjectPtr) sdp->data.ptrvalue;
1126 if (uop == NULL) return;
1127 oip = uop->type;
1128 if (oip == NULL) return;
1129 if (StringCmp (oip->str, "DBLink") != 0) return;
1130
1131 dnp = (DblinkNpsPtr) userdata;
1132 if (dnp == NULL) return;
1133 if (dnp->dblinksdp == NULL) {
1134 dnp->dblinksdp = sdp;
1135 } else {
1136 dnp->morethanone = TRUE;
1137 }
1138 }
1139
MoveDBLinkToNPS(BioseqSetPtr bssp,Pointer userdata)1140 static void MoveDBLinkToNPS (BioseqSetPtr bssp, Pointer userdata)
1141
1142 {
1143 DblinkNpsData dnd;
1144 ObjValNodePtr ovp;
1145 SeqDescrPtr sdp;
1146 UserObjectPtr uop;
1147
1148 if (bssp == NULL) return;
1149 if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
1150
1151 MemSet ((Pointer) &dnd, 0, sizeof (DblinkNpsData));
1152 dnd.dblinksdp = NULL;
1153 dnd.morethanone = FALSE;
1154
1155 VisitDescriptorsInSet (bssp, (Pointer) &dnd, FindOneDblink);
1156
1157 if (dnd.morethanone) return;
1158
1159 sdp = dnd.dblinksdp;
1160 if (sdp == NULL) return;
1161 if (sdp->extended == 0) return;
1162 ovp = (ObjValNodePtr) sdp;
1163 if (ovp->idx.parenttype != OBJ_BIOSEQ) return;
1164
1165 uop = (UserObjectPtr) sdp->data.ptrvalue;
1166 if (uop == NULL) return;
1167 sdp->data.ptrvalue = NULL;
1168 ovp->idx.deleteme = TRUE;
1169
1170 SeqDescrAddPointer (&(bssp->descr), Seq_descr_user, uop);
1171 }
1172
MarkEmptyUserObjects(SeqDescrPtr sdp,Pointer userdata)1173 static void MarkEmptyUserObjects (SeqDescrPtr sdp, Pointer userdata)
1174
1175 {
1176 BoolPtr bp;
1177 ObjectIdPtr oip;
1178 ObjValNodePtr ovp;
1179 UserObjectPtr uop;
1180
1181 if (sdp->choice != Seq_descr_user) return;
1182 uop = (UserObjectPtr) sdp->data.ptrvalue;
1183
1184 if (uop != NULL) {
1185 oip = uop->type;
1186 if (oip != NULL) {
1187 if (StringICmp (oip->str, "NcbiAutofix") == 0) return;
1188 if (StringICmp (oip->str, "Unverified") == 0) return;
1189 if (uop->data != NULL) return;
1190 }
1191 }
1192
1193 if (sdp->extended == 0) return;
1194 ovp = (ObjValNodePtr) sdp;
1195 ovp->idx.deleteme = TRUE;
1196
1197 bp = (BoolPtr) userdata;
1198 if (bp != NULL) {
1199 *bp = TRUE;
1200 }
1201 }
1202
1203 /*****************************************************************************
1204 * SeqEntryToAsn3Ex(sep)
1205 * Converts a SeqEntry with old OrgRefs to SeqEntry with Biosource
1206 * Does the Taxonomy lookup if taxserver = TRUE and taxfun != NULL
1207 * Strips old stuff if strip_old=TRUE
1208 * Moves /map from GeneRef, removes ProtRef xrefs and checks genetic
1209 * code in CDSs
1210 * RETURN:
1211 * INFO_ASNOLD - if the entry is in spec 3.0 (has BioSource) already
1212 * INFO_ASNNEW - if the entry is converted to new spec
1213 * ERR_REJECT - if the entry has internal FATAL errors
1214 * ERR_INPUT - if input is NULL
1215 *
1216 * New argument added SeqEntryFunc taxmerge
1217 * txfun - Taxon3ReplaceOrgInSeqEntry
1218 * taxmerge - Tax3MergeSourceDescr
1219 *****************************************************************************/
SeqEntryToAsn3Ex(SeqEntryPtr sep,Boolean strip_old,Boolean source_correct,Boolean taxserver,SeqEntryFunc taxfun,SeqEntryFunc taxmerge,Boolean gpipeMode,Boolean isEmblOrDdbj)1220 Int4 SeqEntryToAsn3Ex (
1221 SeqEntryPtr sep,
1222 Boolean strip_old,
1223 Boolean source_correct,
1224 Boolean taxserver,
1225 SeqEntryFunc taxfun,
1226 SeqEntryFunc taxmerge,
1227 Boolean gpipeMode,
1228 Boolean isEmblOrDdbj
1229 )
1230 {
1231 ToAsn3 ta;
1232 OrgFixPtr ofp = NULL;
1233 MolFixPtr mfp = NULL;
1234 CharPtr porg = NULL;
1235 QualMap qm;
1236 BSMap bs;
1237 ValNodePtr mult = NULL;
1238 Int4 retval = INFO_ASNOLD, ret;
1239 Int2 update_date_pos;
1240 Boolean do_delete = FALSE;
1241
1242 ta.had_biosource = FALSE;
1243 ta.had_molinfo = FALSE;
1244 ta.ofp = NULL;
1245 ta.mfp = NULL;
1246 qm.name = NULL;
1247 qm.same = TRUE;
1248 bs.same = TRUE;
1249 bs.bsp = NULL;
1250
1251 if (sep == NULL) {
1252 return ERR_INPUT;
1253 }
1254
1255 RemoveAllNcbiCleanupUserObjects (sep);
1256
1257 VisitDescriptorsInSep (sep, (Pointer) &do_delete, MarkEmptyUserObjects);
1258 if (do_delete) {
1259 DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
1260 }
1261
1262 VisitSetsInSep (sep, NULL, MoveDBLinkToNPS);
1263
1264 update_date_pos = GetUpdateDatePos (sep);
1265 RemoveEmptyTitleAndPubGenAsOnlyPub (sep);
1266 if (source_correct) {
1267 SeqEntryExplore(sep, (Pointer)(&porg), CorrectSourceFeat);
1268 }
1269 toporg(sep);
1270 SeqEntryExplore(sep, (Pointer)(&ta), FindOrg);
1271
1272 VisitFeaturesInSep (sep, NULL, CleanMiscFeatFields);
1273
1274 if (ta.had_biosource) {
1275 /* entry is in asn.1 spec 3.0 already do the checks only */
1276 retval |= INFO_ASNNEW;
1277 if(strip_old) {
1278 SeqEntryExplore(sep, NULL, StripOld);
1279 }
1280 ToAsn4(sep, isEmblOrDdbj); /* move pubs and lineage */
1281 CombineBSFeat(sep);
1282 if (taxserver && taxfun != NULL) {
1283 SeqEntryExplore(sep, NULL, taxfun);
1284 }
1285 if (is_equiv(sep)) {
1286 /*do nothing*/
1287 }else if (NOT_segment(sep)) {
1288 if (taxserver && taxmerge != NULL) {
1289 SeqEntryExplore(sep, mult, taxmerge);
1290 } else {
1291 SeqEntryExplore(sep, mult, MergeBSinDescr);
1292 }
1293 } else {
1294 //LCOV_EXCL_START
1295 // Only for SegSets
1296 SeqEntryExplore(sep, (Pointer) (&bs), CheckBS);
1297 if (bs.same == TRUE) {
1298 SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromParts);
1299 } else {
1300 SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromTop);
1301 }
1302 //LCOV_EXCL_STOP
1303 }
1304 ret = FixNucProtSet(sep);
1305 retval |= ret;
1306 EntryChangeImpFeat(sep);
1307 EntryChangeGBSource(sep);
1308 SeqEntryExplore (sep, NULL, FixProtMolInfo);
1309 SeqEntryExplore (sep, NULL, FuseMolInfos);
1310 if (! gpipeMode) {
1311 SeqEntryExplore(sep, NULL, StripProtXref);
1312 }
1313 SeqEntryExplore(sep, (Pointer)(&qm), CheckMaps);
1314 /*
1315 if (qm.same == TRUE) {
1316 SeqEntryExplore(sep, (Pointer)(&qm), StripMaps);
1317 } else {
1318 SeqEntryExplore(sep, NULL, MapsToGenref);
1319 }
1320 */
1321 if (! isEmblOrDdbj) {
1322 SeqEntryExplore(sep, NULL, MapsToGenref);
1323 }
1324 CheckGeneticCode(sep);
1325 NormalizeSegSeqMolInfo (sep);
1326 toasn3_free(&ta);
1327 RestoreUpdateDatePos (sep, update_date_pos);
1328 if(qm.name != NULL)
1329 {
1330 MemFree(qm.name);
1331 }
1332
1333 return retval;
1334 }
1335 if (ta.ofp == NULL) {
1336 ErrPostStr(SEV_WARNING, ERR_ORGANISM_NotFound, "No information found to create BioSource");
1337 }
1338 if (ta.mfp == NULL) {
1339 ErrPostStr(SEV_WARNING, ERR_ORGANISM_NotFound, "No information found to create MolInfo");
1340 }
1341
1342 FixToAsn(sep, (Pointer)(&ta));
1343
1344 if (ta.ofp != NULL) {
1345 ofp = ta.ofp;
1346 SeqEntryExplore(sep, (Pointer)ofp, FixOrg);
1347 }
1348 if (ta.mfp != NULL) {
1349 mfp = ta.mfp;
1350 SeqEntryExplore(sep, (Pointer)mfp, FixMol);
1351 }
1352
1353 /* entry is converted to asn.1 spec 3.0, now do the checks */
1354 retval = INFO_ASNNEW;
1355 if(ta.had_biosource && strip_old) {
1356 SeqEntryExplore(sep, NULL, StripOld);
1357 }
1358 ToAsn4(sep, isEmblOrDdbj); /* move pubs and lineage */
1359 if (taxserver && taxfun != NULL) {
1360 SeqEntryExplore(sep, NULL, taxfun);
1361 }
1362 if (is_equiv(sep)) {
1363 /*do nothing*/
1364 } else if (NOT_segment(sep)) {
1365 if (taxserver && taxmerge != NULL) {
1366 SeqEntryExplore(sep, mult, taxmerge);
1367 } else {
1368 SeqEntryExplore(sep, mult, MergeBSinDescr);
1369 }
1370 } else {
1371 SeqEntryExplore(sep, (Pointer) (&bs), CheckBS);
1372 if (bs.same == TRUE) {
1373 SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromParts);
1374 } else {
1375 SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromTop);
1376 }
1377 }
1378 ret = FixNucProtSet(sep);
1379 retval |= ret;
1380 EntryChangeImpFeat(sep);
1381 EntryChangeGBSource(sep);
1382 SeqEntryExplore (sep, NULL, FixProtMolInfo);
1383 SeqEntryExplore (sep, NULL, FuseMolInfos);
1384 if (! gpipeMode) {
1385 SeqEntryExplore(sep, NULL, StripProtXref);
1386 }
1387 SeqEntryExplore(sep, (Pointer)(&qm), CheckMaps);
1388 /*
1389 if (qm.same == TRUE) {
1390 SeqEntryExplore(sep, (Pointer)(&qm), StripMaps);
1391 } else {
1392 SeqEntryExplore(sep, NULL, MapsToGenref);
1393 }
1394 */
1395 if (! isEmblOrDdbj) {
1396 SeqEntryExplore(sep, NULL, MapsToGenref);
1397 }
1398 CheckGeneticCode(sep);
1399 NormalizeSegSeqMolInfo (sep);
1400 toasn3_free(&ta);
1401 RestoreUpdateDatePos (sep, update_date_pos);
1402 if(qm.name)
1403 qm.name=MemFree(qm.name);
1404 return retval;
1405 }
1406
1407 //LCOV_EXCL_START
CheckLocWhole(BioseqPtr bsp,SeqLocPtr slp)1408 Boolean CheckLocWhole(BioseqPtr bsp, SeqLocPtr slp)
1409 {
1410 SeqIntPtr sip;
1411
1412 if (slp == NULL)
1413 return FALSE;
1414
1415 if (slp->choice == SEQLOC_WHOLE) {
1416 return TRUE;
1417 } else if (slp->choice == SEQLOC_INT) {
1418 sip = slp->data.ptrvalue;
1419 if (sip->from == 0 && sip->to == bsp->length-1) {
1420 return TRUE;
1421 }
1422 }
1423 return FALSE;
1424 }
1425 //LCOV_EXCL_STOP
1426 /*****************************************************************************
1427 *
1428 * Find all the OrgRefs
1429 *
1430 *****************************************************************************/
FindOrg(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1431 void FindOrg (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1432 {
1433 ValNodePtr vnp, vnp0;
1434 BioseqPtr bsp;
1435 BioseqSetPtr bssp;
1436 OrgRefPtr orp;
1437 SeqAnnotPtr sap, ap;
1438 SeqFeatPtr sfp;
1439 ToAsn3Ptr tap;
1440 Uint1 mol = 0, meth = 0;
1441 ValNodePtr mod = NULL, org;
1442 ImpFeatPtr imp;
1443 Boolean info = FALSE;
1444 Int4 len;
1445 Boolean whole = FALSE;
1446 GBQualPtr q;
1447
1448 tap = (ToAsn3Ptr)data;
1449
1450 if (tap->had_biosource)
1451 return;
1452
1453 if (IS_Bioseq(sep))
1454 {
1455 bsp = (BioseqPtr)(sep->data.ptrvalue);
1456 vnp = bsp->descr;
1457 sap = bsp->annot;
1458 len = bsp->length;
1459 }
1460 else
1461 {
1462 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1463 vnp = bssp->descr;
1464 sap = bssp->annot;
1465 }
1466
1467 vnp0 = vnp;
1468 //LCOV_EXCL_START
1469 // used for rescuing biosource and molinfo from modif, mol-type, and method descriptors,
1470 // which are obsolete
1471 while (vnp != NULL) {
1472 if (vnp->choice == Seq_descr_org) {
1473 org = vnp;
1474 orp = (OrgRefPtr)(vnp->data.ptrvalue);
1475 AddOrgToFix(orp, tap, mod, sep, org, NULL, index);
1476 }
1477 if (vnp->choice == Seq_descr_mol_type) {
1478 mol = vnp->data.intvalue;
1479 if (mol != 0) {
1480 info = TRUE;
1481 }
1482 } else if (vnp->choice == Seq_descr_method) {
1483 meth = vnp->data.intvalue;
1484 if (meth != 0) {
1485 info = TRUE;
1486 }
1487 } else if (vnp->choice == Seq_descr_modif) {
1488 mod = vnp->data.ptrvalue;
1489 info = TRUE;
1490 } else if (vnp->choice == Seq_descr_source) {
1491 tap->had_biosource = TRUE;
1492 return;
1493 }
1494 vnp = vnp->next;
1495 }
1496 if (info) {
1497 AddMolToFix(tap, sep, mol, mod, meth, index);
1498 }
1499 //LCOV_EXCL_STOP
1500 for (ap = sap; ap; ap = ap->next) {
1501 if (ap->type != 1) { /* feature table */
1502 continue;
1503 }
1504 for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
1505 if (sfp->data.choice == SEQFEAT_ORG) {
1506 orp = (OrgRefPtr)(sfp->data.value.ptrvalue);
1507 if ((bsp = BioseqFind(SeqLocId(sfp->location))) != NULL) {
1508 whole = check_whole(sfp, bsp->length);
1509 }
1510 if (whole) {
1511 AddOrgToFix(orp, tap, NULL, sep, vnp0, NULL, index);
1512 } else {
1513 AddOrgToFix(orp, tap, NULL, sep, NULL, sfp, index);
1514 }
1515 }
1516 if (sfp->data.choice == SEQFEAT_IMP) {
1517 imp = (ImpFeatPtr)(sfp->data.value.ptrvalue);
1518 if (StringCmp(imp->key, "source") == 0) {
1519 for(q = sfp->qual; q != NULL; q = q->next) {
1520 if (StringCmp(q->qual, "organism") == 0) {
1521 break;
1522 }
1523 }
1524 if (q == NULL) {
1525 continue;
1526 }
1527 if ((bsp = BioseqFind(SeqLocId(sfp->location))) != NULL) {
1528 whole = check_whole(sfp, bsp->length);
1529 }
1530 if (whole) {
1531 AddImpToFix(sfp, tap, sep, vnp0, NULL, index);
1532 } else {
1533 AddImpToFix(sfp, tap, sep, NULL, sfp, index);
1534 }
1535 }
1536 }
1537 whole = FALSE;
1538 }
1539 }
1540 return;
1541 }
1542
1543 /***********************************************************************
1544 * 0 same organisms
1545 * -1 different organisms
1546 ************************************************************************/
1547
BSComparison(BioSourcePtr one,BioSourcePtr two)1548 Int4 BSComparison(BioSourcePtr one, BioSourcePtr two)
1549 {
1550 OrgRefPtr orp1, orp2;
1551 OrgNamePtr onp1, onp2;
1552 OrgModPtr omp1, omp2;
1553 SubSourcePtr ssp1, ssp2;
1554 CharPtr name1 = NULL, name2 = NULL;
1555 Int4 i, retval = -1;
1556
1557 if (one == NULL || two == NULL)
1558 return -1;
1559 if ((orp1 = one->org) == NULL)
1560 return -1;
1561 if ((orp2 = two->org) == NULL)
1562 return -1;
1563 if ((name1 = orp1->taxname) == NULL)
1564 return -1;
1565 if (*name1 == '\0') {
1566 ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1567 return -1;
1568 }
1569 if ((name2 = orp2->taxname) == NULL)
1570 return -1;
1571 if (*name2 == '\0') {
1572 ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1573 return -1;
1574 }
1575 /* Strip organelle from organism */
1576 for (i = 0; i < num_organelle; i++) {
1577 if (StringNCmp(name1, organelle[i].name,
1578 StringLen(organelle[i].name)) == 0) {
1579 name1 += StringLen(organelle[i].name);
1580 }
1581 if (StringNCmp(name2, organelle[i].name,
1582 StringLen(organelle[i].name)) == 0) {
1583 name2 += StringLen(organelle[i].name);
1584 }
1585 }
1586 for (; name1 != NULL && *name1 == ' '; name1++) continue;
1587 for (; name2 != NULL && *name2 == ' '; name2++) continue;
1588 if (StringICmp(name2, name1) == 0) {
1589 retval = 0;
1590 } else {
1591 ErrPostEx(SEV_ERROR, ERR_ORGANISM_Diff,
1592 "Different organisms in one entry: %s|%s", name2, name1);
1593 retval = -1;
1594 }
1595
1596 /* Compare clones - now all subsource and orgmod modifiers */
1597
1598 for (ssp1 = one->subtype, ssp2 = two->subtype;
1599 ssp1 != NULL && ssp2 != NULL;
1600 ssp1 = ssp1->next, ssp2 = ssp2->next) {
1601 if (ssp1->subtype != ssp2->subtype) return -1;
1602 if (StringICmp (ssp1->name, ssp2->name) != 0) return -1;
1603 }
1604 if (ssp1 != NULL || ssp2 != NULL) return -1;
1605
1606 onp1 = orp1->orgname;
1607 onp2 = orp2->orgname;
1608 if (onp1 == NULL || onp2 == NULL) return retval;
1609
1610 for (omp1 = onp1->mod, omp2 = onp2->mod;
1611 omp1 != NULL && omp2 != NULL;
1612 omp1 = omp1->next, omp2 = omp2->next) {
1613 if (omp1->subtype != omp2->subtype) return -1;
1614 if (StringICmp (omp1->subname, omp2->subname) != 0) return -1;
1615 }
1616 if (omp1 != NULL || omp2 != NULL) return -1;
1617
1618 return retval;
1619 }
1620
1621 //LCOV_EXCL_START
BSComparisonEx(BioSourcePtr one,BioSourcePtr two,Boolean clone)1622 Int4 BSComparisonEx(BioSourcePtr one, BioSourcePtr two, Boolean clone)
1623 {
1624 OrgRefPtr orp1, orp2;
1625 SubSourcePtr ssp1, ssp2;
1626 CharPtr name1 = NULL, name2 = NULL;
1627 CharPtr subname1 = NULL, subname2 = NULL;
1628 Int4 i, retval = -1;
1629
1630 if (one == NULL || two == NULL)
1631 return -1;
1632 if ((orp1 = one->org) == NULL)
1633 return -1;
1634 if ((orp2 = two->org) == NULL)
1635 return -1;
1636 if ((name1 = orp1->taxname) == NULL)
1637 return -1;
1638 if (*name1 == '\0') {
1639 ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1640 return -1;
1641 }
1642 if ((name2 = orp2->taxname) == NULL)
1643 return -1;
1644 if (*name2 == '\0') {
1645 ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1646 return -1;
1647 }
1648 /* Strip organelle from organism */
1649 for (i = 0; i < num_organelle; i++) {
1650 if (StringNCmp(name1, organelle[i].name,
1651 StringLen(organelle[i].name)) == 0) {
1652 name1 += StringLen(organelle[i].name);
1653 }
1654 if (StringNCmp(name2, organelle[i].name,
1655 StringLen(organelle[i].name)) == 0) {
1656 name2 += StringLen(organelle[i].name);
1657 }
1658 }
1659 for (; name1 != NULL && *name1 == ' '; name1++) continue;
1660 for (; name2 != NULL && *name2 == ' '; name2++) continue;
1661 if (StringICmp(name2, name1) == 0) {
1662 retval = 0;
1663 } else {
1664 ErrPostEx(SEV_ERROR, ERR_ORGANISM_Diff,
1665 "Different organisms in one entry: %s|%s", name2, name1);
1666 retval = -1;
1667 }
1668
1669 /* Compare clones */
1670 for (ssp1 = one->subtype; ssp1; ssp1= ssp1->next) {
1671 if (ssp1->subtype == 3) { /* clone */
1672 subname1 = ssp1->name;
1673 }
1674 }
1675 for (ssp2 = two->subtype; ssp2; ssp2= ssp2->next) {
1676 if (ssp2->subtype == 3) { /* clone */
1677 subname2 = ssp2->name;
1678 }
1679 }
1680 if (clone) {
1681 if (subname1 == NULL || subname2 == NULL) {
1682 return retval;
1683 }
1684 }
1685 if (StringCmp(subname1, subname2) != 0) {
1686 return -1;
1687 }
1688 /* Compare notes (that are kludged to subtype 'other' */
1689 for (ssp1 = one->subtype; ssp1; ssp1= ssp1->next) {
1690 if (ssp1->subtype == 255) { /* other */
1691 subname1 = ssp1->name;
1692 }
1693 }
1694 for (ssp2 = two->subtype; ssp2; ssp2= ssp2->next) {
1695 if (ssp2->subtype == 255) { /* other */
1696 subname2 = ssp2->name;
1697 }
1698 }
1699 if (clone) {
1700 if (subname1 == NULL || subname2 == NULL) {
1701 return retval;
1702 }
1703 }
1704 if (StringCmp(subname1, subname2) != 0) {
1705 return -1;
1706 }
1707 return retval;
1708 }
1709 //LCOV_EXCL_STOP
1710
GetQualValue(GBQualPtr gbqual,CharPtr qual)1711 static CharPtr GetQualValue(GBQualPtr gbqual, CharPtr qual)
1712 {
1713 GBQualPtr q;
1714 CharPtr value = NULL;
1715
1716 for(q = gbqual; q != NULL; q = q->next) {
1717 if (StringCmp(q->qual, qual) == 0) {
1718 value = q->val;
1719 break;
1720 }
1721 }
1722 return value;
1723 }
1724
1725 /* mapping from source feature qualifiers and comments */
CheckQualsWithComm(BioSourcePtr bsp,SeqFeatPtr sfp)1726 void CheckQualsWithComm(BioSourcePtr bsp, SeqFeatPtr sfp)
1727 {
1728 CharPtr tmp;
1729 OrgModPtr omp = NULL;
1730 OrgNamePtr onp = NULL;
1731 OrgRefPtr orp;
1732
1733 if (bsp == NULL)
1734 return;
1735 if (sfp == NULL)
1736 return;
1737 if (bsp->org == NULL)
1738 return;
1739 CheckQuals(bsp, sfp->qual);
1740 if (sfp->comment != NULL) {
1741 tmp = MemNew(StringLen(sfp->comment) +1);
1742 StringCpy(tmp, sfp->comment);
1743 orp = (OrgRefPtr) bsp->org;
1744 onp = bsp->org->orgname;
1745 if (onp == NULL) {
1746 onp = OrgNameNew();
1747 }
1748 omp = OrgModNew();
1749 omp->subtype = 255;
1750 omp->subname = StringSave(tmp);
1751 onp->mod = tie_next_OrgMod(onp->mod, omp);
1752 MemFree(tmp);
1753 }
1754 if (onp != NULL) {
1755 bsp->org->orgname = onp;
1756 }
1757 return;
1758 }
1759
CheckQuals(BioSourcePtr bsp,GBQualPtr qsfp)1760 void CheckQuals(BioSourcePtr bsp, GBQualPtr qsfp)
1761 {
1762 GBQualPtr q;
1763 static Char msg[51];
1764 Uint1 i;
1765 SubSourcePtr ssp;
1766 OrgModPtr omp = NULL;
1767 OrgNamePtr onp;
1768 OrgRefPtr orp;
1769
1770
1771 if (bsp == NULL)
1772 return;
1773 if (bsp->org == NULL)
1774 return;
1775 orp = (OrgRefPtr) bsp->org;
1776 onp = bsp->org->orgname;
1777 if (onp)
1778 omp = onp->mod;
1779 for (q = qsfp; q != NULL; q=q->next) {
1780 if (StringCmp(q->qual, "organism") == 0) {
1781 continue;
1782 }
1783
1784 if (StringCmp(q->qual, "note") == 0) {
1785 if (onp == NULL) {
1786 onp = OrgNameNew();
1787 }
1788 omp = OrgModNew();
1789 omp->subtype = 255;
1790 omp->subname = StringSave(q->val);
1791 onp->mod = tie_next_OrgMod(onp->mod, omp);
1792 }
1793 for (i = 0; i < num_bad_quals && q->qual != NULL; i++) {
1794 if (StringCmp(bad_quals[i], q->qual) == 0) {
1795 StringNCpy(msg, q->val, 50);
1796 ErrPostEx(SEV_WARNING, ERR_SOURCE_UnwantedQualifiers,
1797 "Unwanted qualifier on source feature: %s=%s", q->qual, msg);
1798 continue;
1799 }
1800 }
1801 for (i = 0; i < num_genome && q->qual != NULL; i++) {
1802 if (StringCmp(genome[i], q->qual) == 0) {
1803 if (!bsp->genome) {
1804 bsp->genome = i;
1805 break;
1806 } else if (bsp->genome == 5 && i == 4) {
1807 bsp->genome = i;
1808 break;
1809 }
1810 }
1811 }
1812 for (i = 0; i < num_subtype && q->qual != NULL; i++) {
1813 if (StringCmp(subtype[i], q->qual) == 0) {
1814 ssp = SubSourceNew();
1815 ssp->subtype = (Uint1) (i+1);
1816 if (q->val == NULL) {
1817 ssp->name = StringSave("");
1818 } else {
1819 ssp->name = StringSave(q->val);
1820 }
1821 bsp->subtype = tie_next_subtype(bsp->subtype, ssp);
1822 break;
1823 }
1824 }
1825 for (i=0; orgmod_subtype[i].name != NULL; i++) {
1826 if (StringCmp(q->qual, "organism") == 0) {
1827 continue;
1828 }
1829 if (StringCmp(q->qual, orgmod_subtype[i].name) == 0) {
1830 if (onp == NULL) {
1831 onp = OrgNameNew();
1832 }
1833 /* *******************************************************************
1834 We need to find the OrgName here. Now it's optional. Tatiana 10.21.94
1835 onp->choice = 2; (virus)
1836 onp->data = Nlm_StringSave("proba");
1837 ******************************************************************* */
1838 omp = OrgModNew();
1839 omp->subtype = (Uint1) orgmod_subtype[i].num;
1840 omp->subname = StringSave(q->val);
1841 onp->mod = tie_next_OrgMod(onp->mod, omp);
1842 break;
1843 }
1844 }
1845 }
1846 if (onp != NULL) {
1847 bsp->org->orgname = onp;
1848 }
1849 return;
1850 }
1851
1852 //LCOV_EXCL_START
1853 // used for rescuing molinfo from modif, mol-type, and method descriptors,
1854 // which are obsolete
new_info(MolInfoPtr mfi)1855 MolInfoPtr new_info(MolInfoPtr mfi)
1856 {
1857 return (mfi == NULL) ? MolInfoNew() : mfi;
1858 }
1859
1860 /*****************************************************************************/
1861 // used for rescuing molinfo from modif, mol-type, and method descriptors,
1862 // which are obsolete
ModToMolInfo(MolInfoPtr mfi,Uint1 mod)1863 MolInfoPtr ModToMolInfo(MolInfoPtr mfi, Uint1 mod)
1864 {
1865
1866 switch(mod) {
1867 case 10:
1868 mfi = new_info(mfi);
1869 mfi->completeness = 2;
1870 break;
1871 case 11:
1872 mfi = new_info(mfi);
1873 mfi->completeness = 1;
1874 break;
1875 case 16:
1876 mfi = new_info(mfi);
1877 mfi->completeness = 3;
1878 break;
1879 case 17:
1880 mfi = new_info(mfi);
1881 mfi->completeness = 4;
1882 break;
1883 case 20:
1884 mfi = new_info(mfi);
1885 mfi->tech = 2;
1886 break;
1887 case 21:
1888 mfi = new_info(mfi);
1889 mfi->tech = 3;
1890 break;
1891 case 22:
1892 mfi = new_info(mfi);
1893 mfi->tech = 4;
1894 break;
1895 default:
1896 break;
1897 }
1898 return mfi;
1899 }
1900
1901 /*****************************************************************************/
1902 // used for rescuing BioSource from modif, mol-type, and method descriptors,
1903 // which are obsolete
ModToBiosource(BioSourcePtr bsp,Uint1 mod)1904 void ModToBiosource(BioSourcePtr bsp, Uint1 mod)
1905 {
1906 switch(mod) {
1907 case 2:
1908 bsp->genome = 8; /* extrachrom */
1909 break;
1910 case 3:
1911 bsp->genome = 9; /*plasmid */
1912 break;
1913 case 4:
1914 bsp->genome = 5; /* mitochondrion */
1915 break;
1916 case 5:
1917 bsp->genome = 2; /* chloroplast */
1918 break;
1919 case 6:
1920 bsp->genome = 4; /* kinetoplast */
1921 break;
1922 case 7:
1923 bsp->genome = 12; /* cyanelle */
1924 break;
1925 case 8:
1926 bsp->origin = 5; /* synthetic */
1927 break;
1928 case 12:
1929 bsp->origin = 3; /* mutagen */
1930 break;
1931 case 13:
1932 bsp->origin = 2; /* natmut */
1933 break;
1934 case 14:
1935 bsp->genome = 10; /*transposon */
1936 break;
1937 case 15:
1938 bsp->genome = 11; /* insertion-seq */
1939 break;
1940 case 18:
1941 bsp->genome = 7; /*macronuclear */
1942 break;
1943 case 19:
1944 bsp->genome = 13; /* proviral*/
1945 break;
1946 case 23:
1947 bsp->genome = 3; /* chromoplast */
1948 break;
1949 default:
1950 break;
1951 }
1952 return;
1953 }
1954
1955 /*****************************************************************************
1956 *
1957 * if no BioSource found on descr level and feature Biosource found
1958 * move it to the top
1959 * (stop using 05-09-96)
1960 *****************************************************************************/
CkOrg(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1961 void CkOrg (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1962 {
1963 ToAsn3Ptr tap;
1964 ValNodePtr vnp, tmp;
1965 SeqFeatPtr sfp, tmp_sfp = NULL;
1966 SeqAnnotPtr sap;
1967 BioseqPtr bsp = NULL;
1968 BioseqSetPtr bssp = NULL;
1969
1970 tap = (ToAsn3Ptr)data;
1971 if (!tap->had_biosource)
1972 return;
1973
1974 if (IS_Bioseq(sep)) {
1975 bsp = (BioseqPtr)(sep->data.ptrvalue);
1976 vnp = bsp->descr;
1977 sap = bsp->annot;
1978 } else {
1979 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1980 vnp = bssp->descr;
1981 sap = bssp->annot;
1982 }
1983
1984 for (tmp = vnp; tmp != NULL; tmp = tmp->next) {
1985 if (tmp->choice == Seq_descr_source) {
1986 break;
1987 }
1988 }
1989 if (tmp != NULL) {
1990 return;
1991 }
1992 if (sap == NULL || sap->type != 1) {
1993 return;
1994 }
1995 tmp_sfp = (SeqFeatPtr) (sap->data);
1996 sfp = SeqFeatExtractList(&(tmp_sfp), SEQFEAT_BIOSRC);
1997 if (sfp != NULL) {
1998 tmp = SeqDescrNew(vnp);
1999 tmp->choice = Seq_descr_source;
2000 tmp->data.ptrvalue = AsnIoMemCopy(sfp->data.value.ptrvalue,
2001 (AsnReadFunc) BioSourceAsnRead, (AsnWriteFunc) BioSourceAsnWrite);
2002 SeqFeatFree(sfp);
2003 }
2004 sap->data = tmp_sfp;
2005 if (tmp_sfp == NULL) {
2006 if (bsp != NULL) {
2007 bsp->annot = NULL;
2008 } else if (bssp != NULL) {
2009 bssp->annot = NULL;
2010 }
2011 }
2012 }
2013 //LCOV_EXCL_STOP
2014
2015 /**************************************************************************
2016 * Compare BioSources in one bioseq->descr,
2017 * merge if organisms are the same or create a feature if different
2018 *
2019 **************************************************************************/
MergeBSinDescr(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2020 void MergeBSinDescr (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2021 {
2022 BioseqPtr bsp = NULL;
2023 ValNodePtr vnp, v, vnext, mult;
2024 SeqAnnotPtr sap = NULL;
2025 SeqIdPtr sip;
2026 SeqFeatPtr sfp;
2027 BioSourcePtr bsrc = NULL, bs;
2028
2029 if (!IS_Bioseq(sep)) {
2030 return;
2031 }
2032 mult = (ValNodePtr) data;
2033 bsp = (BioseqPtr) sep->data.ptrvalue;
2034 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
2035 && (bsp->repr != Seq_repr_delta))
2036 return;
2037
2038 if (! ISA_na(bsp->mol))
2039 return;
2040
2041 sap = bsp->annot;
2042 bsp->descr = tie_next(bsp->descr, mult);
2043 for (vnp = bsp->descr; vnp; vnp= vnp->next) {
2044 if (vnp->choice == Seq_descr_source) {
2045 bsrc = vnp->data.ptrvalue;
2046 break;
2047 }
2048 }
2049 if (bsrc == NULL || bsrc->org == NULL) {
2050 return;
2051 }
2052 for (v = vnp->next; v; v = vnext) {
2053 vnext = v->next;
2054 if (v->choice != Seq_descr_source) {
2055 continue;
2056 }
2057 bs = v->data.ptrvalue;
2058 if (bs->org != NULL) {
2059 if (bsrc && CmpOrgById(bsrc, bs) == TRUE) {
2060 bsrc = BioSourceMerge(bsrc, bs);
2061 } else {
2062 sfp = SeqFeatNew();
2063 sfp->location = ValNodeNew(NULL);
2064 sfp->location->choice = SEQLOC_WHOLE;
2065 sip = SeqIdDup(bsp->id);
2066 sfp->location->data.ptrvalue = sip ;
2067 sfp->data.choice = SEQFEAT_BIOSRC;
2068 sfp->data.value.ptrvalue =
2069 AsnIoMemCopy(bs, (AsnReadFunc) BioSourceAsnRead,
2070 (AsnWriteFunc) BioSourceAsnWrite);
2071 if (sap == NULL) {
2072 sap = SeqAnnotNew();
2073 sap->type = 1;
2074 bsp->annot = sap;
2075 }
2076 sap->data = tie_feat(sap->data, sfp);
2077 }
2078 } else {
2079 ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "Biosource missing Organism info");
2080 }
2081 BioSourceFree(bs);
2082 vnp->next = remove_node(vnp->next, v);
2083
2084 }
2085 return;
2086 }
2087 /******************************************************************************** Move Biosource to nuc-prot set level
2088 *******************************************************************************/
FixNucProtSet(SeqEntryPtr sep)2089 Int4 FixNucProtSet(SeqEntryPtr sep)
2090 {
2091 BioseqSetPtr bssp, bseg;
2092 BioseqPtr bsp = NULL, prot = NULL;
2093 ValNodePtr descr = NULL;
2094 ValNodePtr tmp, vnp, v, vnext;
2095 BioSourcePtr bsrc = NULL, bs;
2096 SeqEntryPtr seqsep, s;
2097 SeqAnnotPtr sap = NULL;
2098 SeqIdPtr sip;
2099 SeqFeatPtr sfp;
2100 Int4 retval = INFO_ASNOLD;
2101 Boolean bSingle = FALSE;
2102
2103
2104 if (IS_Bioseq(sep)) {
2105 return retval;
2106 }
2107 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2108 if (bssp->_class != 1) { /* do the rest for nuc-prot only */
2109 return retval;
2110 }
2111 seqsep = bssp->seq_set;
2112 if (seqsep == NULL) {
2113 return retval;
2114 }
2115 if (seqsep->choice == 1) { /* single bioseq */
2116 bsp = (BioseqPtr) seqsep->data.ptrvalue;
2117 descr = bsp->descr;
2118 sap = bsp->annot;
2119 bSingle = TRUE;
2120 } else if (seqsep->choice == 2) { /* segmented set */
2121 bseg = (BioseqSetPtr) seqsep->data.ptrvalue;
2122 /* quick fix of core dump in segmented sets with multiple organisms
2123 BIOSOURCE feature is created on main segmeted bioseq (not parts) !*/
2124 s = bseg->seq_set;
2125 if (s != NULL) {
2126 bsp = (BioseqPtr) s->data.ptrvalue;
2127 }
2128 descr = bseg->descr;
2129 sap = bseg->annot;
2130 }
2131 if (descr == NULL) {
2132 return retval; /* nothing to move */
2133 }
2134 for (vnp = bssp->descr; vnp; vnp = vnp->next) { /* nucprot set level */
2135 if (vnp->choice == Seq_descr_source) {
2136 bsrc = vnp->data.ptrvalue;
2137 break;
2138 }
2139 }
2140 for (v = descr; v; v = vnext) { /* from bioseq or BioseqSet */
2141 vnext = v->next;
2142 if (v->choice != Seq_descr_source) {
2143 continue;
2144 }
2145 bs = v->data.ptrvalue;
2146 if (bsrc == NULL) {
2147 bsrc = BioSourceMerge(bsrc, bs);
2148 tmp = SeqDescrAdd(&(bssp->descr));
2149 tmp->choice = Seq_descr_source;
2150 tmp->data.ptrvalue = bsrc;
2151 } else if (CmpOrgById(bsrc, bs) == TRUE) {
2152 bsrc = BioSourceMerge(bsrc, bs);
2153 } else if (bsp != NULL) {
2154 sfp = SeqFeatNew();
2155 sfp->location = ValNodeNew(NULL);
2156 sfp->location->choice = SEQLOC_WHOLE;
2157 sip = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
2158 sfp->location->data.ptrvalue = sip;
2159 sfp->data.choice = SEQFEAT_BIOSRC;
2160 sfp->data.value.ptrvalue =
2161 AsnIoMemCopy(bs, (AsnReadFunc) BioSourceAsnRead,
2162 (AsnWriteFunc) BioSourceAsnWrite);
2163 if (sap == NULL) {
2164 sap = SeqAnnotNew();
2165 sap->type = 1;
2166 }
2167 sap->data = tie_feat(sap->data, sfp);
2168 }
2169 BioSourceFree(bs);
2170 descr = remove_node(descr, v);
2171
2172 }
2173 /* remove Biosource from protein sequence if it's there
2174 merging BioSource with the one on the top level*/
2175 for (s = seqsep->next; s; s = s->next) {
2176 prot = s->data.ptrvalue;
2177 vnp = ValNodeExtractList(&prot->descr, Seq_descr_source);
2178 if (vnp != NULL) {
2179 bs = vnp->data.ptrvalue;
2180 if (bsrc == NULL) {
2181 bsrc = BioSourceMerge(bsrc, bs);
2182 tmp = SeqDescrNew(bssp->descr);
2183 tmp->choice = Seq_descr_source;
2184 tmp->data.ptrvalue = bsrc;
2185 } else if (CmpOrgById(bsrc, bs) == TRUE) {
2186 bsrc = BioSourceMerge(bsrc, bs);
2187 } else {
2188 ErrPostStr(SEV_ERROR, ERR_ORGANISM_Diff,
2189 "ATTENTION: different organisms in nuc-prot set");
2190 retval = ERR_REJECT;
2191 }
2192 if (retval == ERR_REJECT) {
2193 prot->descr = ValNodeLink(&prot->descr, vnp);
2194 } else {
2195 BioSourceFree(bs);
2196 ValNodeFree(vnp);
2197 }
2198 }
2199 }
2200 if (bSingle) {
2201 bsp->descr = descr;
2202 bsp->annot = sap;
2203 } else {
2204 bseg->descr = descr;
2205 bseg->annot = sap;
2206 }
2207 return retval;
2208 }
2209
2210 /*****************************************************************************
2211 * check BioSource descr for the parts of segmented set,
2212 * if organisms are the same and no "clone" Biosources are the same
2213 * they would be deleted from parts in the next SeqEntryExplore
2214 * if different BioSource from the top would be deleted
2215 *****************************************************************************/
CheckBS(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2216 void CheckBS (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2217 {
2218 BioseqSetPtr bssp, tmp;
2219 SeqEntryPtr segsep, parts;
2220 BSMapPtr bmp;
2221
2222 bmp = data;
2223 if (bmp->same == FALSE) {
2224 return;
2225 }
2226 if (IS_Bioseq(sep)) {
2227 return;
2228 }
2229 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2230 if (bssp->_class != 2) { /* do the rest for segset only */
2231 if (bssp->_class >= 7) {
2232 bmp->same = FALSE; /* for other sets organisms are different */
2233 }
2234 return;
2235 }
2236 segsep = bssp->seq_set;
2237 if (segsep->next == NULL) {
2238 return;
2239 }
2240 if (!IS_Bioseq(segsep->next)) {
2241 tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
2242 parts = tmp->seq_set;
2243 if (parts == NULL) {
2244 return;
2245 }
2246 bmp->same = CheckSegDescrChoice(parts, Seq_descr_source);
2247 }
2248
2249 }
2250
2251 //LCOV_EXCL_START
seq_loc_compare(SeqLocPtr a,SeqLocPtr b)2252 Int2 seq_loc_compare( SeqLocPtr a, SeqLocPtr b)
2253 {
2254 Int2 retval = -1;
2255 Int4 a_strt, a_stop, b_strt, b_stop;
2256 SeqIdPtr a_sip, b_sip;
2257
2258 retval = SeqLocCompare(a, b);
2259 if (retval > 0) {
2260 return retval;
2261 }
2262 a_sip = SeqLocId(a);
2263 b_sip = SeqLocId(b);
2264 if (SeqIdForSameBioseq(a_sip, b_sip)) {
2265 a_strt = SeqLocStart(a);
2266 a_stop = SeqLocStop(a);
2267 b_strt = SeqLocStart(b);
2268 b_stop = SeqLocStop(b);
2269 if (a_stop+1 == b_strt || b_stop+1 == a_strt)
2270 retval = 5;
2271 }
2272 return retval;
2273 }
2274
compare_quals(GBQualPtr PNTR qual1,GBQualPtr PNTR qual2)2275 void compare_quals(GBQualPtr PNTR qual1, GBQualPtr PNTR qual2)
2276 {
2277 GBQualPtr q1, q2, q2next;
2278
2279 for (q1 = *qual1; q1 != NULL; q1 = q1->next) {
2280 for (q2 = *qual2; q2 != NULL; q2 = q2next) {
2281 q2next = q2->next;
2282 if ((StringCmp(q1->qual, q2->qual) == 0) &&
2283 (StringCmp(q1->val, q2->val) == 0)) {
2284 *qual2 = remove_qual(*qual2, q2);
2285 }
2286 }
2287 }
2288 }
2289
feat_join(SeqFeatPtr f1,SeqFeatPtr f2,SeqFeatPtr head)2290 Boolean feat_join(SeqFeatPtr f1, SeqFeatPtr f2, SeqFeatPtr head)
2291 {
2292 Boolean new = FALSE;
2293 Int2 comp;
2294 Int4 a_strt, a_stop, b_strt, b_stop, a, b;
2295 GBQualPtr q1, q2, fq, q1next, q2next;
2296 SeqFeatPtr f;
2297 ImpFeatPtr imp;
2298 SeqLocPtr slp;
2299 SeqIntPtr sip, f1_sip;
2300 Boolean nmatch = FALSE;
2301
2302 comp = seq_loc_compare(f1->location, f2->location);
2303 switch (comp)
2304 {
2305 case 0:
2306 break;
2307 case 1:
2308 for (q2 = f2->qual; q2 != NULL; q2 = q2->next) {
2309 for (q1 = f1->qual; q1 != NULL; q1 = q1next) {
2310 q1next = q1->next;
2311 if ((StringCmp(q1->qual, q2->qual) == 0) &&
2312 (StringCmp(q1->val, q2->val) == 0)) {
2313 f1->qual = remove_qual(f1->qual, q1);
2314 }
2315 }
2316 }
2317 break;
2318 case 3:
2319 for (q1 = f1->qual; q1 != NULL; q1 = q1->next) {
2320 for (q2 = f2->qual; q2 != NULL; q2 = q2next) {
2321 q2next = q2->next;
2322 if ((StringCmp(q1->qual, q2->qual) == 0) &&
2323 (StringCmp(q1->val, q2->val) == 0)) {
2324 continue;
2325 } else {
2326 nmatch = TRUE;
2327 break;
2328 }
2329 }
2330 }
2331 if (nmatch) {
2332 GBQualFree(f2->qual);
2333 ErrPostStr(SEV_WARNING, ERR_SOURCE_DiffQualifiers,
2334 "Identical source features with unmatching qualifiers");
2335 } else {
2336 GBQualFree(f2->qual);
2337 ErrPostStr(SEV_WARNING, ERR_SOURCE_Identical,
2338 "Identical source features: one is removed");
2339 }
2340 case 2:
2341 for (q1 = f1->qual; q1 != NULL; q1 = q1->next) {
2342 for (q2 = f2->qual; q2 != NULL; q2 = q2next) {
2343 q2next = q2->next;
2344 if ((StringCmp(q1->qual, q2->qual) == 0) &&
2345 (StringCmp(q1->val, q2->val) == 0)) {
2346 f2->qual = remove_qual(f2->qual, q2);
2347 }
2348 }
2349 }
2350 break;
2351 case 4:
2352 case 5:
2353 a_strt = SeqLocStart(f1->location);
2354 a_stop = SeqLocStop(f1->location);
2355 b_strt = SeqLocStart(f2->location);
2356 b_stop = SeqLocStop(f2->location);
2357 a = a_strt;
2358 if (b_strt < a_strt)
2359 a = b_strt;
2360 b = a_stop;
2361 if (b_stop > a_stop)
2362 b = b_stop;
2363 f = SeqFeatNew();
2364 imp = ImpFeatNew();
2365 imp->key = StringSave("source");
2366 slp = ValNodeNew(NULL);
2367 slp->choice = SEQLOC_INT;
2368 sip = SeqIntNew();
2369 f1_sip = (SeqIntPtr) (f1->location)->data.ptrvalue;
2370 sip->id = SeqIdDup(f1_sip->id);
2371 sip->from = a;
2372 sip->to = b;
2373 slp->data.ptrvalue = sip;
2374 f->location = slp;
2375 for (q1=f1->qual; q1 != NULL; q1 = q1next) {
2376 q1next = q1->next;
2377 for (q2=f2->qual; q2 != NULL; q2 = q2next) {
2378 q2next = q2->next;
2379 if ((StringCmp(q1->qual, q2->qual) == 0) &&
2380 (StringCmp(q1->val, q2->val) == 0)) {
2381 fq = GBQualNew();
2382 fq->qual = q1->qual;
2383 q1->qual = NULL;
2384 fq->val = q1->val;
2385 q1->val = NULL;
2386 f->qual = fq;
2387 f1->qual = remove_qual(f2->qual, q2);
2388 f2->qual = remove_qual(f1->qual, q1);
2389 }
2390 }
2391 }
2392 head = tie_feat(head, f);
2393 new = TRUE;
2394 break;
2395 default:
2396 break;
2397 }
2398 return new;
2399
2400 }
2401
count_join(SeqFeatPtr f1,SeqFeatPtr f2)2402 void count_join(SeqFeatPtr f1, SeqFeatPtr f2)
2403 {
2404 Int2 comp, nq1, nq2;
2405 GBQualPtr q1, q2;
2406 static Char msg1[51], msg2[51];
2407
2408 comp = seq_loc_compare(f1->location, f2->location);
2409 if (comp != 3) {
2410 return;
2411 }
2412 StringNCpy(msg1, SeqLocPrint(f1->location), 50);
2413 StringNCpy(msg2, SeqLocPrint(f2->location), 50);
2414 for (q1 = f1->qual, nq1 = 0; q1 != NULL; q1 = q1->next, nq1++) {
2415 }
2416 for (q2 = f2->qual, nq2 = 0; q2 != NULL; q2 = q2->next, nq2++) {
2417 }
2418 if (nq1 != nq2) {
2419 ErrPostEx(SEV_WARNING, ERR_SOURCE_DiffQualifiers,
2420 "Identical source features with unmatching number of qualifiers %s|%s",
2421 msg1, msg2);
2422 return;
2423 }
2424 for (q1 = f1->qual, nq1 = 0; q1 != NULL; q1 = q1->next, nq1++) {
2425 for (q2 = f2->qual; q2 != NULL; q2 = q2->next) {
2426 if ((StringCmp(q1->qual, q2->qual) == 0) &&
2427 (StringCmp(q1->val, q2->val) == 0)) {
2428 break;
2429 }
2430 }
2431 if (q2 == NULL) {
2432 ErrPostEx(SEV_WARNING, ERR_SOURCE_DiffQualifiers,
2433 "Identical source features with unmatching qualifiers %s|%s",
2434 msg1, msg2);
2435 }
2436 }
2437 ErrPostEx(SEV_WARNING, ERR_SOURCE_Identical, "Identical source features; %s|%s",
2438 msg1, msg2);
2439 }
2440
FindWholeBSFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2441 static void FindWholeBSFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2442 {
2443 BioseqPtr bsp = NULL;
2444 SeqFeatPtr sfp, fp;
2445 SeqAnnotPtr ap;
2446 WholeFeatPtr wfp;
2447 Boolean is_na = FALSE;
2448
2449 wfp = (WholeFeatPtr) data;
2450 if (IS_Bioseq(sep)) {
2451 bsp = (BioseqPtr)(sep->data.ptrvalue);
2452 if (bsp->mol != Seq_mol_aa) {
2453 is_na = TRUE;
2454 }
2455 }
2456 if (bsp == NULL || !is_na) {
2457 return;
2458 }
2459 for (ap = bsp->annot; ap; ap = ap->next) {
2460 if (ap->type != 1) {
2461 continue;
2462 }
2463 for (sfp = ap->data; sfp != NULL; sfp=sfp->next) {
2464 if (sfp->data.choice != SEQFEAT_BIOSRC) {
2465 continue;
2466 }
2467 if (check_whole(sfp, bsp->length) == FALSE) {
2468 continue;
2469 } else {
2470 wfp->count++;
2471 fp = SeqFeatNew();
2472 fp->data.choice = sfp->data.choice;
2473 fp->data.value.ptrvalue = sfp->data.value.ptrvalue;
2474 wfp->sfp = tie_feat(wfp->sfp, fp);
2475 }
2476 }
2477 }
2478 }
MergeWholeBSFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2479 static void MergeWholeBSFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2480 {
2481 BioseqPtr bsp = NULL;
2482 ValNodePtr vnp;
2483 SeqAnnotPtr ap;
2484 SeqFeatPtr sfp;
2485 WholeFeatPtr wfp;
2486 Boolean remove = FALSE;
2487 Boolean is_na = FALSE;
2488 BioSourcePtr dbio = NULL, fbio = NULL;
2489
2490 wfp = (WholeFeatPtr) data;
2491 if (IS_Bioseq(sep)) {
2492 bsp = (BioseqPtr)(sep->data.ptrvalue);
2493 if (bsp->mol != Seq_mol_aa) {
2494 is_na = TRUE;
2495 }
2496 }
2497 if (bsp == NULL || !is_na) {
2498 return;
2499 }
2500 for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
2501 if (vnp->choice != Seq_descr_source) {
2502 continue;
2503 }
2504 dbio = (BioSourcePtr) vnp->data.ptrvalue;
2505 if (wfp->sfp != NULL) {
2506 fbio = (BioSourcePtr) wfp->sfp->data.value.ptrvalue;
2507 }
2508 if (dbio) {
2509 if (fbio && CmpOrgById(dbio, fbio) == TRUE) {
2510 if (BSComparisonEx(dbio, fbio, TRUE) == 0) {
2511 BioSourceMerge(dbio, fbio);
2512 remove = TRUE;
2513 }
2514 }
2515 }
2516 }
2517 if (remove == FALSE) {
2518 return;
2519 }
2520 for (ap = bsp->annot; ap; ap = ap->next) {
2521 if (ap->type != 1) {
2522 continue;
2523 }
2524 for (sfp = ap->data; sfp != NULL; sfp=sfp->next) {
2525 if (sfp->data.choice != SEQFEAT_BIOSRC) {
2526 continue;
2527 }
2528 if (SeqMgrFeaturesAreIndexed (sfp->idx.entityID) != 0) {
2529 SeqMgrClearFeatureIndexes(sfp->idx.entityID, NULL);
2530 }
2531 ap->data = remove_feat(ap->data, sfp);
2532 break;
2533 }
2534 }
2535 }
2536
2537 // NOTE: This never finds whole features because they were already cleaned up by
2538 // ConvertFullLenSourceFeatToDesc
CombineBSFeat(SeqEntryPtr sep)2539 void CombineBSFeat(SeqEntryPtr sep)
2540 {
2541 WholeFeatPtr wfp;
2542
2543 wfp = WholeFeatNew();
2544 SeqEntryExplore(sep, (Pointer)wfp, FindWholeBSFeat);
2545 if (wfp->count == 1) {
2546 SeqEntryExplore(sep, (Pointer)wfp, MergeWholeBSFeat);
2547 }
2548 WholeFeatFree(wfp);
2549 }
2550
2551 /*****************************************************************************
2552 *
2553 * Count multiple source features print out error messages
2554 *
2555 *****************************************************************************/
CountSourceFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2556 void CountSourceFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2557 {
2558 Boolean whole = FALSE;
2559 Int2 count=0;
2560 Int4 len;
2561 ValNodePtr vnp;
2562 SeqFeatPtr f, ff;
2563 SeqAnnotPtr sap, ap;
2564 BioseqPtr bsp = NULL;
2565 ImpFeatPtr imp;
2566 CharPtr f_org, ff_org;
2567 GBQualPtr q;
2568 SeqIdPtr sidp;
2569 TextSeqIdPtr tsip = NULL;
2570 Pointer pnt;
2571 Boolean PNTR pultiple;
2572 Boolean is_na = FALSE;
2573
2574 pultiple = (Boolean PNTR) data;
2575 if (IS_Bioseq(sep)) {
2576 bsp = (BioseqPtr)(sep->data.ptrvalue);
2577 vnp = bsp->descr;
2578 sap = bsp->annot;
2579 len = bsp->length;
2580 if (bsp->mol != Seq_mol_aa) {
2581 is_na = TRUE;
2582 }
2583 }
2584 if (bsp == NULL || !is_na) {
2585 return;
2586 }
2587 for (sidp = bsp->id; sidp != NULL; sidp = sidp->next) {
2588 pnt = sidp->data.ptrvalue;
2589 switch (sidp->choice)
2590 {
2591 case SEQID_LOCAL: /* local */
2592 case SEQID_GIBBSQ: /* gibbseq */
2593 case SEQID_GIBBMT: /* gibbmt */
2594 case SEQID_GIIM: /* giimid */
2595 case SEQID_PATENT: /* patent seq id */
2596 case SEQID_GENERAL: /* general */
2597 case SEQID_GI: /* gi */
2598 case SEQID_PDB:
2599 continue;
2600 case SEQID_GENBANK: /* genbank */
2601 case SEQID_EMBL: /* embl */
2602 case SEQID_PIR: /* pir */
2603 case SEQID_SWISSPROT: /* swissprot */
2604 case SEQID_OTHER: /* other */
2605 case SEQID_DDBJ:
2606 case SEQID_PRF:
2607 tsip = (TextSeqIdPtr) sidp->data.ptrvalue;
2608 break;
2609 default:
2610 continue;
2611 }
2612 if (tsip != NULL) {
2613 flat2asn_install_accession_user_string(tsip->accession);
2614 flat2asn_install_locus_user_string(tsip->name);
2615 break;
2616 }
2617 }
2618 if (tsip == NULL) {
2619 flat2asn_install_accession_user_string("SET_UP");
2620 flat2asn_install_locus_user_string("SET_UP");
2621 }
2622 for (ap = sap; ap; ap = ap->next) {
2623 if (sap->type != 1) {
2624 continue;
2625 }
2626 for (f = ap->data; f != NULL; f=f->next) {
2627 if (f->data.choice == SEQFEAT_IMP) {
2628 imp = (ImpFeatPtr) f->data.value.ptrvalue;
2629 if (imp != NULL && StringCmp(imp->key, "source") == 0) {
2630 for(q = f->qual; q != NULL; q = q->next) {
2631 if (StringCmp(q->qual, "organism") == 0) {
2632 break;
2633 }
2634 }
2635 if (q == NULL) {
2636 ErrPostStr(SEV_WARNING, ERR_SOURCE_MissingOrganism, "Missing /organism in 'source' feature");
2637 }
2638 count++;
2639 whole = check_whole(f, len);
2640 }
2641 }
2642 }
2643 }
2644 if (count == 0) {
2645 ErrPostStr(SEV_WARNING, ERR_SOURCE_NotFound, "NO SOURCE feature");
2646 *pultiple = TRUE;
2647 } else if (count == 1) {
2648 if (!whole) {
2649 ErrPostStr(SEV_WARNING, ERR_SOURCE_NotFoundWHole, "one NOT_WHOLE SOURCE feature");
2650 *pultiple = TRUE;
2651 }
2652 } else if (count > 1) {
2653 /* check for /transposon and /insertion_seq and /clone*/
2654 if (true_multiple(sap, len)) {
2655 ErrPostStr(SEV_WARNING, ERR_SOURCE_Multiple, "MULTIPLE SOURCE features");
2656 *pultiple = TRUE;
2657 }
2658 for (ap = sap; ap; ap = ap->next) {
2659 if (sap->type != 1) {
2660 continue;
2661 }
2662 for (f = ap->data; f != NULL; f = f->next) {
2663 if (f->qual == NULL)
2664 continue;
2665 f_org = NULL;
2666 for(q = f->qual; q != NULL; q = q->next) {
2667 if (StringCmp(q->qual, "organism") == 0) {
2668 f_org = q->val;
2669 break;
2670 }
2671 }
2672 for (ff = f->next; ff != NULL; ff = ff->next) {
2673 if (ff->qual == NULL)
2674 continue;
2675 ff_org = NULL;
2676 for(q = ff->qual; q != NULL; q = q->next) {
2677 if (StringCmp(q->qual, "organism") == 0) {
2678 ff_org = q->val;
2679 break;
2680 }
2681 }
2682 if (f_org && ff_org) {
2683 if (StringCmp(f_org, ff_org) != 0) {
2684 ErrPostEx(SEV_WARNING, ERR_SOURCE_Diff, "Different SOURCE features: %s|%s",
2685 f_org, ff_org);
2686 }
2687 count_join(f, ff);
2688 }
2689 }
2690 }
2691 }
2692 }
2693
2694 }
2695
2696 /*****************************************************************************
2697 *
2698 * Check multiple source features and try to correct them
2699 *
2700 *****************************************************************************/
2701 // NOTE that this is never called by the cleanup library
CorrectSourceFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2702 void CorrectSourceFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2703 {
2704 Boolean whole = FALSE, new = FALSE;
2705 Int2 count=0;
2706 Int4 len;
2707 ValNodePtr vnp0, vnp;
2708 SeqFeatPtr sfp = NULL, tmp_sfp = NULL, f, ff, fnext;
2709 SeqAnnotPtr sap;
2710 BioseqPtr bsp = NULL;
2711 BioseqSetPtr bssp;
2712 ImpFeatPtr imp;
2713 OrgRefPtr orp;
2714 CharPtr name, org_name, f_org = NULL, ff_org = NULL;
2715 GBQualPtr q;
2716 SeqLocPtr slp;
2717 static Char msg[51];
2718 CharPtr PNTR pporg;
2719 Boolean is_na = FALSE;
2720
2721 pporg = (CharPtr PNTR) data;
2722 if (IS_Bioseq(sep))
2723 {
2724 bsp = (BioseqPtr)(sep->data.ptrvalue);
2725 vnp = bsp->descr;
2726 sap = bsp->annot;
2727 len = bsp->length;
2728 if (bsp->mol != Seq_mol_aa) {
2729 is_na = TRUE;
2730 }
2731 }
2732 else
2733 {
2734 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2735 vnp = bssp->descr;
2736 sap = bssp->annot;
2737 }
2738
2739 for (vnp0 = vnp; vnp != NULL; vnp = vnp->next) {
2740 if (vnp->choice == Seq_descr_org) {
2741 if ((orp = vnp->data.ptrvalue) != NULL) {
2742 if (*pporg == NULL) {
2743 *pporg = orp->taxname;
2744 }
2745 }
2746 }
2747 }
2748 org_name = *pporg;
2749 if (bsp == NULL || !is_na) {
2750 return;
2751 }
2752 if (sap != NULL && sap->type == 1) {
2753 tmp_sfp = (SeqFeatPtr) (sap->data);
2754 sfp = ExtractSourceFeatList(&(tmp_sfp));
2755 }
2756 for (f = sfp; f != NULL; f=f->next) {
2757 count++;
2758 if (f->data.choice != SEQFEAT_IMP) {
2759 continue;
2760 }
2761 imp = (ImpFeatPtr) f->data.value.ptrvalue;
2762 if (imp != NULL) {
2763 whole = check_whole(sfp, len);
2764 if (whole) {
2765 for(q = f->qual; q != NULL; q = q->next) {
2766 if (StringCmp(q->qual, "organism") == 0) {
2767 name = MemNew(StringLen(q->val)+1);
2768 StringCpy(name, q->val);
2769 }
2770 }
2771 }
2772 }
2773 }
2774 if (!whole) {
2775 if (count == 0) {
2776 ErrPostStr(SEV_WARNING, ERR_SOURCE_NotFound, "NO ORGANISM feature! Create one");
2777 sfp = SeqFeatNew();
2778 imp = ImpFeatNew();
2779 sfp->data.choice = SEQFEAT_IMP;
2780 sfp->data.value.ptrvalue = imp;
2781 imp->key = StringSave("source");
2782 q = GBQualNew();
2783 q->qual = StringSave("organism");
2784 if (org_name) {
2785 q->val = StringSave(org_name);
2786 } else {
2787 q->val = StringSave("unknown");
2788 }
2789 sfp->qual = q;
2790 slp = ValNodeNew(NULL);
2791 slp->choice = SEQLOC_WHOLE;
2792 slp->data.ptrvalue = (SeqLocPtr) SeqIdDup(bsp->id);
2793 sfp->location = slp;
2794
2795 } else if (count == 1) {
2796 StringNCpy(msg, SeqLocPrint(sfp->location), 50);
2797 ErrPostEx(SEV_WARNING, ERR_SOURCE_NotFoundWHole,
2798 "Convert source feature %s to whole", msg);
2799 slp = ValNodeNew(NULL);
2800 slp->choice = SEQLOC_WHOLE;
2801 slp->data.ptrvalue = (SeqLocPtr) SeqIdDup(bsp->id);
2802 sfp->location = slp;
2803
2804 }
2805
2806 }
2807 if (count > 1) {
2808 do {
2809 for (f = sfp; f != NULL; f = f->next) {
2810 if (f->qual == NULL)
2811 continue;
2812 for(q = f->qual; q != NULL; q = q->next) {
2813 if (StringCmp(q->qual, "organism") == 0) {
2814 f_org = q->val;
2815 break;
2816 }
2817 }
2818 for (ff = f->next; ff != NULL; ff = ff->next) {
2819 if (ff->qual == NULL)
2820 continue;
2821 for(q = ff->qual; q != NULL; q = q->next) {
2822 if (StringCmp(q->qual, "organism") == 0) {
2823 ff_org = q->val;
2824 break;
2825 }
2826 }
2827 if (StringCmp(f_org, ff_org) != 0)
2828 continue;
2829 new = feat_join(f, ff, sfp);
2830 }
2831 }
2832 } while (new);
2833 }
2834 for (f = sfp; f != NULL; f = fnext) {
2835 fnext = f->next;
2836 if (f->qual == NULL) {
2837 sfp = remove_feat(sfp, f);
2838 }
2839 }
2840 tmp_sfp = tie_feat(tmp_sfp, sfp);
2841 if (sap) {
2842 sap->data = tmp_sfp;
2843 if (bsp)
2844 {
2845 bsp = (BioseqPtr)(sep->data.ptrvalue);
2846 bsp->descr = vnp0;
2847 if (tmp_sfp == NULL) {
2848 bsp->annot = NULL;
2849 }
2850 }
2851 else
2852 {
2853 bssp->descr = vnp0;
2854 if (tmp_sfp == NULL) {
2855 bssp->annot = NULL;
2856 }
2857 }
2858 }
2859 }
2860 //LCOV_EXCL_STOP
2861
2862
BioSourceToGeneticCode(BioSourcePtr biop)2863 Int2 BioSourceToGeneticCode (BioSourcePtr biop)
2864 {
2865 OrgNamePtr onp;
2866 OrgRefPtr orp;
2867 Uint1 pgcode;
2868
2869 if (biop != NULL) {
2870 orp = biop->org;
2871 if (orp != NULL) {
2872 onp = orp->orgname;
2873 if (onp != NULL) {
2874 if (biop->genome == GENOME_kinetoplast ||
2875 biop->genome == GENOME_mitochondrion ||
2876 biop->genome == GENOME_hydrogenosome) {
2877 return onp->mgcode;
2878 } else if (biop->genome == GENOME_chloroplast ||
2879 biop->genome == GENOME_chromoplast ||
2880 biop->genome == GENOME_plastid ||
2881 biop->genome == GENOME_cyanelle ||
2882 biop->genome == GENOME_apicoplast ||
2883 biop->genome == GENOME_leucoplast ||
2884 biop->genome == GENOME_proplastid) {
2885 if (onp->pgcode > 0) {
2886 return onp->pgcode;
2887 } else {
2888 pgcode = GetSpecialPlastidGenCode (orp->taxname, onp->lineage);
2889 if (pgcode > 0) {
2890 return pgcode;
2891 }
2892 return 11;
2893 }
2894 } else {
2895 return onp->gcode;
2896 }
2897 }
2898 }
2899 }
2900 return 0;
2901 }
2902
GetTopBiop(SeqDescrPtr sdp,Pointer userdata)2903 static void GetTopBiop (SeqDescrPtr sdp, Pointer userdata)
2904
2905 {
2906 BioSourcePtr PNTR biopp;
2907
2908 if (sdp == NULL || sdp->choice != Seq_descr_source) return;
2909 biopp = (BioSourcePtr PNTR) userdata;
2910 if (biopp == NULL) return;
2911 if (*biopp != NULL) return;
2912 *biopp = (BioSourcePtr) sdp->data.ptrvalue;
2913 }
2914
GetTopBioSourceFromSep(SeqEntryPtr sep)2915 static BioSourcePtr GetTopBioSourceFromSep (SeqEntryPtr sep)
2916
2917 {
2918 BioSourcePtr biop = NULL;
2919
2920 VisitDescriptorsInSep (sep, (Pointer) &biop, GetTopBiop);
2921 return biop;
2922 }
2923
FixPIDDbtag(ValNodePtr PNTR vnpp)2924 static void FixPIDDbtag(ValNodePtr PNTR vnpp)
2925 {
2926 ValNodePtr vnp;
2927 DbtagPtr db;
2928 Char val[166];
2929
2930 for (vnp = *vnpp; vnp; vnp = vnp->next) {
2931 if (vnp->choice != SEQID_GENERAL) {
2932 continue;
2933 }
2934 db = vnp->data.ptrvalue;
2935 if (db == NULL) {
2936 continue;
2937 }
2938 if (StringNCmp(db->db, "PIDe", 4) == 0) {
2939 MemFree(db->db);
2940 db->db = StringSave("PID");
2941 sprintf(val, "e%ld", (long) db->tag->id);
2942 db->tag->str = StringSave(val);
2943 db->tag->id = 0;
2944 vnp->data.ptrvalue = db;
2945 } else if(StringNCmp(db->db, "PIDd", 4) == 0) {
2946 MemFree(db->db);
2947 db->db = StringSave("PID");
2948 sprintf(val, "d%ld", (long) db->tag->id);
2949 db->tag->str = StringSave(val);
2950 db->tag->id = 0;
2951 vnp->data.ptrvalue = db;
2952 }
2953 }
2954 }
2955
GetProduct(ValNodePtr product,ValNodePtr location)2956 static CharPtr GetProduct(ValNodePtr product, ValNodePtr location)
2957 {
2958 CharPtr protein_seq=NULL, start_ptr=NULL;
2959 Int4 length;
2960 SeqPortPtr spp;
2961 Uint1 residue, code;
2962 BioseqPtr bsp;
2963 SeqIdPtr sip;
2964
2965 if (product == NULL)
2966 return NULL;
2967 sip = SeqLocId(product);
2968 bsp = BioseqFindCore(sip);
2969 if (bsp == NULL) /* Bioseq is (or has been) in memory */
2970 return NULL;
2971 code = Seq_code_ncbieaa;
2972 length = SeqLocLen(product);
2973 if (length <= 0) {
2974 return NULL;
2975 }
2976 start_ptr = protein_seq =
2977 (CharPtr) MemNew((size_t) (length*sizeof(CharPtr)));
2978 spp = SeqPortNewByLoc(product, code);
2979 spp->do_virtual = TRUE;
2980 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
2981 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
2982 continue;
2983 if (residue == INVALID_RESIDUE)
2984 residue = (Uint1) 'X';
2985 *protein_seq = residue;
2986 protein_seq++;
2987 }
2988 SeqPortFree(spp);
2989 return start_ptr;
2990 }
2991
stripStr(CharPtr base,CharPtr str)2992 static CharPtr stripStr(CharPtr base, CharPtr str)
2993 {
2994 CharPtr bptr, eptr;
2995
2996 bptr = StringStr(base, str);
2997 if (bptr != NULL) {
2998 eptr = bptr + StringLen(str);
2999 StringCpy(bptr, eptr);
3000 }
3001
3002 return base;
3003 }
3004
space_save(CharPtr str)3005 static CharPtr space_save(CharPtr str)
3006 /* deletes spaces from the begining and the end and returns Nlm_StringSave */ {
3007 CharPtr s, ss;
3008
3009 if (str == NULL) {
3010 return NULL;
3011 }
3012 for (; isspace(*str) || *str == ','; str++) continue;
3013 for (s = str; *s != '\0'; s++) {
3014 if (*s == '\n') {
3015 for (ss = s+1; isspace(*ss); ss++) continue;
3016 *s = ' ';
3017 strcpy(s+1, ss);
3018 }
3019 }
3020 for (s=str+StringLen(str)-1; s >= str && (*s == ' ' || *s == ';' ||
3021 *s == ',' || *s == '\"' || *s == '\t'); s--) {
3022 *s = '\0';
3023 }
3024
3025 if (*str == '\0') {
3026 return NULL;
3027 } else {
3028 return Nlm_StringSave(str);
3029 }
3030 }
3031
StripCDSComment(SeqFeatPtr sfp)3032 static SeqFeatPtr StripCDSComment(SeqFeatPtr sfp)
3033 {
3034 CharPtr strA = "Author-given protein sequence is in conflict with the conceptual translation.";
3035 CharPtr strC = "Method: conceptual translation supplied by author.";
3036 CharPtr pchComment, comment, eptr;
3037
3038 pchComment = sfp->comment;
3039 if (pchComment == NULL)
3040 return sfp;
3041 pchComment = stripStr(pchComment, strA);
3042 pchComment = stripStr(pchComment, strC);
3043
3044 comment = space_save(pchComment);
3045 if (comment) {
3046 eptr = comment+StringLen(comment) -1;
3047 if (*eptr == ';') {
3048 *eptr = '\0';
3049 }
3050 }
3051 MemFree(sfp->comment);
3052 sfp->comment = comment;
3053 return sfp;
3054 }
3055
CompareTranslation(ByteStorePtr bsp,CharPtr qval)3056 static Boolean CompareTranslation(ByteStorePtr bsp, CharPtr qval)
3057 {
3058 CharPtr ptr;
3059 Int2 residue /* , residue1, residue2 */ ;
3060 Int4 len, blen;
3061 /*
3062 Boolean done;
3063 */
3064
3065 if(qval == NULL || *qval == '\0')
3066 return(FALSE);
3067 len = StringLen(qval);
3068 BSSeek(bsp, 0, SEEK_SET);
3069
3070 blen = BSLen(bsp);
3071 #if 0
3072 done = FALSE;
3073 while ((! done) && (len)) {
3074 residue1 = qval[(len-1)];
3075 if (residue1 == 'X') /* remove terminal X */
3076 len--;
3077 else
3078 done = TRUE;
3079 }
3080 done = FALSE;
3081 while ((! done) && (blen)) {
3082 BSSeek(bsp, (blen-1), SEEK_SET);
3083 residue2 = BSGetByte(bsp);
3084 if (residue2 == 'X')
3085 blen--;
3086 else
3087 done = TRUE;
3088 }
3089 #endif
3090 BSSeek(bsp, 0, SEEK_SET);
3091 if (blen != len) {
3092 return FALSE;
3093 } else {
3094 for (ptr = qval; *ptr != '\0' &&
3095 (residue = BSGetByte(bsp)) != EOF; ptr++) {
3096
3097 if (residue != *ptr) {
3098 return FALSE;
3099 }
3100
3101 } /* for */
3102
3103 } /* compare two sequences */
3104 return TRUE;
3105 }
3106
CheckGCode(SeqFeatPtr sfp,Pointer userdata)3107 static void CheckGCode (SeqFeatPtr sfp, Pointer userdata)
3108
3109 {
3110 Int2Ptr codep;
3111 Uint1 code;
3112 SeqFeatPtr f;
3113 CdRegionPtr cds;
3114 BioseqPtr bsp = NULL;
3115 SeqAnnotPtr ap;
3116 ValNodePtr vnp, vnpnext;
3117 DbtagPtr db;
3118 GeneticCodePtr grp;
3119 Uint1 gcpvalue;
3120 CharPtr protein_seq = NULL;
3121 ByteStorePtr byte_sp;
3122 MolInfoPtr mfp;
3123
3124 if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return;
3125 codep = (Int2Ptr) userdata;
3126 if (codep == NULL) return;
3127 code = (Uint1) *codep;
3128
3129 cds = (CdRegionPtr) sfp->data.value.ptrvalue;
3130 if (cds == NULL) return;
3131
3132 grp = cds->genetic_code;
3133 if (sfp->product != NULL) {
3134 /* remove all PID dbxref */
3135 for (vnp=sfp->dbxref; vnp; vnp=vnpnext) {
3136 vnpnext = vnp->next;
3137 db = vnp->data.ptrvalue;
3138 if (db->db) {
3139 if (StringNCmp(db->db, "PID", 3) == 0) {
3140 sfp->dbxref = remove_node(sfp->dbxref, vnp);
3141 }
3142 }
3143 }
3144 /* change SeqId GENERAL dbtag in ProtRef */
3145 vnp = SeqLocId(sfp->product);
3146 FixPIDDbtag(&vnp);
3147 /* change PID in protein SeqID GENERAL dbtag */
3148 bsp = BioseqFind(SeqLocId(sfp->product));
3149 if (bsp != NULL) {
3150 FixPIDDbtag(&(bsp->id));
3151 /* change SeqId GENERAL dbtag in ProtRef */
3152 for (ap = bsp->annot; ap; ap = ap ->next) {
3153 if (ap->type != 1) {
3154 continue;
3155 }
3156 for (f = ap->data; f; f = f->next) {
3157 if (f->data.choice != SEQFEAT_PROT) {
3158 continue;
3159 }
3160 vnp = SeqLocId(f->location);
3161 FixPIDDbtag(&vnp);
3162 }
3163 }
3164 }
3165 }
3166 cds = sfp->data.value.ptrvalue;
3167 /* check the translation - skip if conflict flag not set */
3168 if (sfp->product && cds->conflict) {
3169 protein_seq = GetProduct(sfp->product, sfp->location);
3170 byte_sp = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
3171 if (cds->conflict == TRUE) {
3172 if (CompareTranslation (byte_sp, protein_seq)) {
3173 cds->conflict = FALSE;
3174 } else if (bsp != NULL) {
3175 for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
3176 if (vnp->choice == Seq_descr_molinfo) {
3177 break;
3178 }
3179 }
3180 if (vnp != NULL) {
3181 mfp = vnp->data.ptrvalue;
3182 mfp->tech = 13; /* _concept_transl_a */
3183 }
3184 }
3185 }
3186 if (protein_seq)
3187 MemFree(protein_seq);
3188 if (byte_sp)
3189 BSFree(byte_sp);
3190 }
3191 /* remove asn2ff_generated comments */
3192 sfp = StripCDSComment(sfp);
3193
3194 /* check genetic code */
3195 if (GBQualPresent("pseudo", sfp->qual) == TRUE) {
3196 return;
3197 }
3198 if (sfp->pseudo) return;
3199 if (cds) {
3200 grp = cds->genetic_code;
3201 if (grp == NULL) {
3202 gcpvalue = 1;
3203 } else {
3204 vnp = grp->data.ptrvalue;
3205 gcpvalue = vnp->data.intvalue;
3206 }
3207 if (gcpvalue != code) {
3208 CharPtr str=SeqLocPrint(sfp->location);
3209 ErrPostEx(SEV_ERROR, ERR_TAXONOMY_GeneticCode,
3210 "Genetic code from Taxonomy server is different from the one in CDS %s: %d|%d", str, (int) code, (int) gcpvalue);
3211 MemFree(str);
3212 }
3213 }
3214 }
3215
3216 /***************************************************************************
3217 * This function is twofold
3218 * 1 - checks genetic code with Taxonomy dbase
3219 * checks db_xref and removes them all if product is present
3220 * changes PIDe to PID in dbtag
3221 ****************************************************************************/
CheckGeneticCode(SeqEntryPtr sep)3222 static void CheckGeneticCode (SeqEntryPtr sep)
3223
3224 {
3225 BioSourcePtr biop;
3226 Int2 code;
3227
3228 if (sep == NULL) return;
3229 biop = GetTopBioSourceFromSep (sep);
3230 if (biop == NULL) return;
3231 code = BioSourceToGeneticCode (biop);
3232
3233 if (code <= 0) {
3234 ErrPostStr(SEV_WARNING, ERR_SOURCE_GeneticCode, "Genetic code in BioSource not found");
3235 return;
3236 }
3237
3238 VisitFeaturesInSep (sep, (Pointer) &code, CheckGCode);
3239 }
3240
3241 //LCOV_EXCL_START
ParseRange(CharPtr pos,Int4 PNTR from,Int4 PNTR to)3242 static Boolean ParseRange(CharPtr pos, Int4 PNTR from, Int4 PNTR to)
3243 {
3244 CharPtr ptr, ptr1, ptr2;
3245
3246 *from = *to = -1;
3247
3248 if (!IS_DIGIT(*pos))
3249 return FALSE;
3250 /* 1st digit */
3251 for (ptr = pos; IS_DIGIT(*ptr) && *ptr != '\0'; ptr++) continue;
3252
3253 if (*ptr != '\0') {
3254 *from = (Int4) atoi(pos);
3255
3256 ptr1 = ptr;
3257 if (*ptr1 == '.')
3258 ++ptr1;
3259 else
3260 return FALSE;
3261
3262 if (*ptr1 == '.') {
3263 ++ptr1;
3264 } else {
3265 return FALSE;
3266 } /* 2nd digit */
3267 for (ptr2 = ptr1; IS_DIGIT(*ptr2) && *ptr2 != '\0'; ptr2++) continue;
3268
3269 if (*ptr2 != '\0') {
3270 return FALSE;
3271 } else {
3272 *to = (Int4) atoi(ptr1);
3273 return (TRUE);
3274 }
3275 } else {
3276 return FALSE;
3277 }
3278
3279 }
3280
SeqLocFromPos(SeqIdPtr sid,CharPtr pos)3281 static SeqLocPtr SeqLocFromPos(SeqIdPtr sid, CharPtr pos)
3282 {
3283 SeqLocPtr slp;
3284 SeqIntPtr sip;
3285 Int4 from, to;
3286
3287 if (ParseRange(pos, &from, &to)) {
3288
3289 sip = SeqIntNew();
3290 sip->from = from - 1;
3291 sip->to = to - 1;
3292
3293 slp = ValNodeNew(NULL);
3294 slp->choice = SEQLOC_INT;
3295 slp->data.ptrvalue = SeqIdDup(sid);
3296 } else {
3297 slp = ValNodeNew(NULL);
3298 slp->choice = SEQLOC_WHOLE;
3299 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup(sid);
3300 }
3301 return (slp);
3302
3303 }
3304
GetQualValuePos(CharPtr qval)3305 static CharPtr GetQualValuePos(CharPtr qval)
3306 {
3307 CharPtr bptr, eptr;
3308
3309 if ((bptr = StringStr(qval, "(pos:")) == NULL) {
3310 return NULL;
3311 }
3312
3313 bptr += 5;
3314 while (*bptr == ' ')
3315 ++bptr;
3316 if (StringNCmp(bptr, "(complement)", 12) == 0) {
3317 bptr += 12;
3318 }
3319 while (*bptr == ' ')
3320 ++bptr;
3321 for (eptr = bptr; *eptr != ',' && *eptr != '\0'; eptr++) continue;
3322
3323 return (TextSave(bptr, eptr-bptr));
3324
3325 }
3326
GetQualValueAa(CharPtr qval)3327 static Uint1 GetQualValueAa(CharPtr qval)
3328 {
3329 CharPtr str, eptr = NULL, ptr;
3330 Uint1 aa = 0;
3331
3332 str = StringStr(qval, "aa:");
3333 if (str != NULL) {
3334 str += 3;
3335 while (*str == ' ')
3336 ++str;
3337 for (eptr = str; *eptr != ')' && *eptr != ' ' && *eptr != '\0'; eptr++) continue;
3338 }
3339
3340 if (eptr != NULL && str != NULL) {
3341 ptr = TextSave(str, eptr-str);
3342 aa = ValidAminoAcid(ptr);
3343 MemFree(ptr);
3344 }
3345
3346 return (aa);
3347
3348 }
3349
3350 // Note: conversion of impfeat cds is handled by CleanUpSeqFeat,
3351 // which is called before this function is
ImpFeatToCdregion(SeqFeatPtr sfp)3352 Boolean ImpFeatToCdregion(SeqFeatPtr sfp)
3353 {
3354 ImpFeatPtr imp;
3355 GBQualPtr q, qnext;
3356 Int2 frame = -1;
3357 CdRegionPtr crp;
3358 CharPtr pos;
3359 GeneticCodePtr gcp;
3360 Uint1 gc;
3361 ValNodePtr vnp;
3362 Choice cp;
3363 CodeBreakPtr hcbp = NULL, cbp;
3364 SeqIntPtr sip;
3365 SeqLocPtr loc;
3366 BioseqPtr bsp;
3367 SeqIdPtr sidp = NULL;
3368
3369 if (sfp == NULL)
3370 return FALSE;
3371 if (sfp->data.choice != SEQFEAT_IMP)
3372 return FALSE;
3373 imp = sfp->data.value.ptrvalue;
3374 if (StringCmp(imp->key, "CDS") != 0)
3375 return FALSE;
3376
3377 /* do not convert ImpCDS if EMBL or DDBJ */
3378 bsp = BioseqFindFromSeqLoc (sfp->location);
3379 if (bsp != NULL) {
3380 for (sidp = bsp->id;
3381 sidp != NULL && sidp->choice != SEQID_EMBL && sidp->choice != SEQID_DDBJ;
3382 sidp = sidp->next) continue;
3383 }
3384 if (sidp != NULL) return FALSE;
3385
3386 sfp->data.choice = SEQFEAT_CDREGION;
3387 ImpFeatFree(imp);
3388 crp = CdRegionNew();
3389 sfp->data.value.ptrvalue = crp;
3390 for (q = sfp->qual; q; q = qnext) {
3391 qnext = q->next;
3392 if (StringCmp(q->qual, "transl_table") == 0) {
3393 gc = (Uint1) atoi(q->val);
3394 vnp = ValNodeNew(NULL);
3395 vnp->choice = 2;
3396 vnp->data.intvalue = gc;
3397 gcp = GeneticCodeNew();
3398 gcp->data.ptrvalue = vnp;
3399 crp->genetic_code = gcp;
3400 sfp->qual = remove_qual(sfp->qual, q);
3401 } else if (StringCmp(q->qual, "translation") == 0) {
3402 sfp->qual = remove_qual(sfp->qual, q);
3403 } else if (StringCmp(q->qual, "transl_except") == 0) {
3404 cp.choice = 1; /* ncbieaa */
3405 cp.value.intvalue = (Int4) GetQualValueAa(q->val);
3406 pos = GetQualValuePos(q->val);
3407 loc = SeqLocFromPos(SeqLocId(sfp->location), pos);
3408 if (loc->choice !=SEQLOC_INT) {
3409 ErrPostEx(SEV_WARNING, ERR_FEATURE_BadLocation,
3410 "Location error for code break [%s]", pos);
3411 MemFree(pos);
3412 continue;
3413 }
3414 cbp = CodeBreakNew();
3415 cbp->aa = cp;
3416 cbp->loc = loc;
3417 sip = cbp->loc->data.ptrvalue;
3418 sip->strand = SeqLocStrand(sfp->location);
3419 if (SeqLocCompare(sfp->location, cbp->loc) != SLC_B_IN_A) {
3420 CodeBreakFree(cbp);
3421 cbp = NULL;
3422 }
3423 MemFree(pos);
3424 hcbp = tie_next_cbp(hcbp, cbp);
3425 sfp->qual = remove_qual(sfp->qual, q);
3426 } else if (StringCmp(q->qual, "codon_start") == 0) {
3427 frame = (Uint1) atoi(q->val);
3428 sfp->qual = remove_qual(sfp->qual, q);
3429 crp->frame = (Uint1)frame;
3430 } else if (StringCmp(q->qual, "exception") == 0) {
3431 sfp->excpt = TRUE;
3432 }
3433
3434 }
3435 if (frame == -1) {
3436 frame = GetFrameFromLoc(sfp->location);
3437 crp->frame = (Uint1)frame;
3438 }
3439
3440 return TRUE;
3441 }
3442 //LCOV_EXCL_STOP
3443
NoteToComment(SeqFeatPtr sfp)3444 static void NoteToComment (SeqFeatPtr sfp)
3445 {
3446 GBQualPtr q, qnext;
3447 size_t len;
3448 CharPtr str;
3449
3450 for (q=sfp->qual; q; q=qnext)
3451 {
3452 qnext = q->next;
3453 if (StringICmp (q->qual, "note") == 0) {
3454 if (sfp->comment == NULL) {
3455 sfp->comment = q->val;
3456 } else {
3457 len = StringLen (sfp->comment) + StringLen (q->val) + 5;
3458 str = MemNew (sizeof (Char) * len);
3459 StringCpy (str, sfp->comment);
3460 StringCat (str, "; ");
3461 StringCat (str, q->val);
3462 sfp->comment = MemFree (sfp->comment);
3463 q->val = MemFree (q->val);
3464 sfp->comment = str;
3465 }
3466 q->val = NULL;
3467 sfp->qual = remove_qual(sfp->qual, q);
3468 }
3469 }
3470 return;
3471 }
ChangeImpFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3472 static void ChangeImpFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3473 {
3474 BioseqPtr bsp;
3475 BioseqSetPtr bssp;
3476 SeqAnnotPtr sap, annot;
3477 SeqFeatPtr sfp;
3478
3479 if (IS_Bioseq(sep)) {
3480 bsp = (BioseqPtr)(sep->data.ptrvalue);
3481 annot = bsp->annot;
3482 } else {
3483 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
3484 annot = bssp->annot;
3485 }
3486 for (sap = annot; sap != NULL; sap = sap->next) {
3487 if (sap->type != 1) {
3488 continue;
3489 }
3490 for (sfp = sap->data; sfp != NULL; sfp = sfp->next) {
3491 if (sfp->qual) {
3492 NoteToComment(sfp);
3493 }
3494 if (sfp->data.choice != SEQFEAT_IMP) {
3495 continue;
3496 }
3497 ChangeReplaceToQual(sfp);
3498 ImpFeatToCdregion(sfp);
3499 }
3500 }
3501 }
3502
ChangeReplaceToQual(SeqFeatPtr sfp)3503 void ChangeReplaceToQual(SeqFeatPtr sfp)
3504 {
3505 ImpFeatPtr ifp;
3506 CharPtr p;
3507
3508 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
3509 if(ifp == NULL || ifp->loc == NULL)
3510 return;
3511 if ((p = StringStr(ifp->loc, "replace")) != NULL) {
3512 //LCOV_EXCL_START
3513 // This is never called, because BasicCleanup would have removed it already
3514 AddReplaceQual(sfp, p);
3515 MemFree(ifp->loc);
3516 ifp->loc = NULL;
3517 //LCOV_EXCL_STOP
3518 }
3519 return;
3520 }
3521
3522 //LCOV_EXCL_START
3523 /**********************************************************/
AddReplaceQual(SeqFeatPtr sfp,CharPtr p)3524 void AddReplaceQual(SeqFeatPtr sfp, CharPtr p)
3525 {
3526 CharPtr s, val;
3527
3528 val = StringChr(p, '\"');
3529 if(val == NULL)
3530 return;
3531 val++;
3532 s = p + StringLen(p) - 1;
3533 if(*s != ')')
3534 return;
3535 for(s--; s > val && *s != '\"'; s--) continue;
3536 if(*s != '\"')
3537 return;
3538 *s = '\0';
3539 sfp->qual = (GBQualPtr) AddGBQual(sfp->qual, "replace", val);
3540 *s = '\"';
3541 return;
3542 }
3543 //LCOV_EXCL_STOP
3544
3545 /***************************************************************************
3546 * check and remove HTG keywords automaticly generated by asn2ff
3547 * HTG info is redundand in GBBlock
3548 ***************************************************************************/
3549
CheckKeywords(GBBlockPtr gbp,Uint1 tech)3550 static void CheckKeywords(GBBlockPtr gbp, Uint1 tech)
3551 {
3552 ValNodePtr vnp, vnpnext;
3553 CharPtr word;
3554
3555 if (gbp == NULL || gbp->keywords == NULL)
3556 return;
3557 for (vnp = gbp->keywords; vnp; vnp=vnpnext) {
3558 vnpnext = vnp->next;
3559 word = (CharPtr) vnp->data.ptrvalue;
3560 if (StringCmp(word, "HTG") == 0) {
3561 MemFree(word);
3562 gbp->keywords = remove_node(gbp->keywords, vnp);
3563 }
3564 else if (tech == MI_TECH_htgs_0 && StringCmp(word, "HTGS_PHASE0") == 0) {
3565 MemFree(word);
3566 gbp->keywords = remove_node(gbp->keywords, vnp);
3567 }
3568 else if (tech == MI_TECH_htgs_1 && StringCmp(word, "HTGS_PHASE1") == 0) {
3569 MemFree(word);
3570 gbp->keywords = remove_node(gbp->keywords, vnp);
3571 }
3572 else if (tech == MI_TECH_htgs_2 && StringCmp(word, "HTGS_PHASE2") == 0) {
3573 MemFree(word);
3574 gbp->keywords = remove_node(gbp->keywords, vnp);
3575 }
3576 else if (tech == MI_TECH_htgs_3 && StringCmp(word, "HTGS_PHASE3") == 0) {
3577 MemFree(word);
3578 gbp->keywords = remove_node(gbp->keywords, vnp);
3579 }
3580 else if (tech == MI_TECH_est && StringCmp(word, "EST") == 0) {
3581 MemFree(word);
3582 gbp->keywords = remove_node(gbp->keywords, vnp);
3583 }
3584 else if (tech == MI_TECH_sts && StringCmp(word, "STS") == 0) {
3585 MemFree(word);
3586 gbp->keywords = remove_node(gbp->keywords, vnp);
3587 }
3588 }
3589 return;
3590 }
3591
ChangeGBDiv(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3592 static void ChangeGBDiv (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3593 {
3594 BioseqPtr bsp = NULL;
3595 BioseqSetPtr bssp;
3596 ValNodePtr descr = NULL, vnp;
3597 CharPtr div;
3598 GBBlockPtr gbp;
3599 Int2 i;
3600 Boolean is_patent = FALSE;
3601 MolInfoPtr mfp = NULL;
3602 SeqIdPtr sip;
3603
3604 div = (CharPtr) data;
3605 if (IS_Bioseq(sep)) {
3606 bsp = (BioseqPtr)(sep->data.ptrvalue);
3607 descr = bsp->descr;
3608 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3609 if (sip->choice == SEQID_PATENT) {
3610 is_patent = TRUE;
3611 }
3612 }
3613 } else {
3614 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
3615 descr = bssp->descr;
3616 }
3617 for (vnp = descr; vnp != NULL; vnp = vnp->next) {
3618 if (vnp->choice == Seq_descr_molinfo) {
3619 mfp = (MolInfoPtr) vnp->data.ptrvalue;
3620 break;
3621 }
3622 }
3623 for (vnp = descr; vnp != NULL; vnp = vnp->next) {
3624 if (vnp->choice == Seq_descr_genbank) {
3625 gbp = (GBBlockPtr) vnp->data.ptrvalue;
3626 if (gbp == NULL) continue;
3627 if (mfp) {
3628 if (mfp->tech == MI_TECH_htgs_0 ||
3629 mfp->tech == MI_TECH_htgs_1 ||
3630 mfp->tech == MI_TECH_htgs_2 ||
3631 mfp->tech == MI_TECH_htgs_3 ||
3632 mfp->tech == MI_TECH_est ||
3633 mfp->tech == MI_TECH_sts) {
3634 CheckKeywords(gbp, mfp->tech);
3635 }
3636 }
3637 if (gbp->div == NULL) continue;
3638 for (i=0; i < TOTAL_TECH; i++) {
3639 if (StringCmp(gbp->div, check_tech[i].name) == 0) {
3640 break;
3641 }
3642 }
3643 if (i != TOTAL_TECH) {
3644 if (mfp) {
3645 if (StringCmp(gbp->div, "HTG") == 0
3646 || StringCmp(gbp->div, "PRI") == 0) {
3647 if (mfp->tech == MI_TECH_htgs_1
3648 || mfp->tech == MI_TECH_htgs_2 ||
3649 mfp->tech == MI_TECH_htgs_3) {
3650 gbp->div = MemFree(gbp->div);
3651 return;
3652 }
3653 } else if (mfp->tech == check_tech[i].num) {
3654 gbp->div = MemFree(gbp->div);
3655 return;
3656 } else if (mfp->tech == 0 && StringCmp (gbp->div, "STS") == 0) {
3657 mfp->tech = MI_TECH_sts;
3658 gbp->div = MemFree(gbp->div);
3659 return;
3660 }
3661 }
3662 }
3663 if (div != NULL) {
3664 if (StringCmp(gbp->div, div) == 0) {
3665 gbp->div = MemFree(gbp->div);
3666 gbp->taxonomy = MemFree(gbp->taxonomy);
3667 } else if (StringCmp(gbp->div, "UNA") == 0) {
3668 gbp->div = MemFree(gbp->div);
3669 } else if (StringCmp(gbp->div, "UNC") == 0) {
3670 gbp->div = MemFree(gbp->div);
3671 } else if (StringCmp(gbp->div, "PAT") == 0 && is_patent) {
3672 gbp->div = MemFree(gbp->div);
3673 }
3674 }
3675 }
3676 }
3677 }
3678
3679 typedef struct gbsrcdata {
3680 CharPtr taxname;
3681 CharPtr common;
3682 CharPtr oldname;
3683 CharPtr strain;
3684 OrgNamePtr onp;
3685 } GBSourceData, PNTR GBSourcePtr;
3686
AbbrevStrIEql(CharPtr str,CharPtr gbpsrc)3687 static Boolean AbbrevStrIEql (CharPtr str, CharPtr gbpsrc)
3688
3689 {
3690 Char buf [200];
3691 Char ch;
3692 CharPtr ptr;
3693
3694 if (StringLen (str) >= sizeof (buf)) return FALSE;
3695
3696 ch = *str;
3697 ptr = buf;
3698
3699 *ptr = ch;
3700 ptr++;
3701 str = StringChr (str, ' ');
3702 if (str == NULL) return FALSE;
3703 str++;
3704 ch = *str;
3705 while (ch == ' ') {
3706 str++;
3707 ch = *str;
3708 }
3709 *ptr = '.';
3710 ptr++;
3711 *ptr = '\0';
3712 StringCat (ptr, str);
3713
3714 return (Boolean) (StringICmp (buf, gbpsrc) == 0);
3715 }
3716
CanDeleteGBSource(GBSourcePtr gsp,CharPtr gbpsrc)3717 static Boolean CanDeleteGBSource (GBSourcePtr gsp, CharPtr gbpsrc)
3718
3719 {
3720 Char ch;
3721 Boolean foundStrain = FALSE;
3722 Boolean goOn = TRUE;
3723 /*
3724 OrgModPtr omp;
3725 OrgNamePtr onp;
3726 */
3727 CharPtr ptr;
3728 CharPtr str;
3729
3730 if (gsp == NULL || StringHasNoText (gbpsrc)) return FALSE;
3731
3732 str = StringStr (gbpsrc, "(strain");
3733 if (str != NULL) {
3734 ptr = str + 7;
3735 ch = *ptr;
3736 while (ch != '\0' && goOn) {
3737 if (ch == ')') {
3738 if (StringHasNoText (ptr + 1)) {
3739 *ptr = '\0';
3740 goOn = FALSE;
3741 foundStrain = TRUE;
3742 }
3743 } else if (ch == ',' || ch == ';') {
3744 goOn = FALSE;
3745 }
3746 ptr++;
3747 ch = *ptr;
3748 }
3749 } else {
3750 str = StringStr (gbpsrc, "strain)");
3751 if (str != NULL) return FALSE; /* do not handle this case for now */
3752 }
3753 if (foundStrain) {
3754 *str = '\0';
3755 str += 7;
3756 TrimSpacesAroundString (gbpsrc);
3757 TrimSpacesAroundString (str);
3758 if (StringDoesHaveText (gsp->strain)) {
3759 if (StringICmp (gsp->strain, str) != 0) return FALSE;
3760 /*
3761 } else if (gsp->onp != NULL) {
3762 omp = OrgModNew ();
3763 if (omp != NULL && gsp->onp != NULL) {
3764 onp = gsp->onp;
3765 omp->subtype = ORGMOD_strain;
3766 omp->subname = StringSave (str);
3767 omp->next = onp->mod;
3768 onp->mod = omp;
3769 }
3770 */
3771 } else {
3772 return FALSE; /* do not rescue strain at this point, so do not remove gbp->source */
3773 }
3774 }
3775
3776 if (StringDoesHaveText (gsp->taxname) && StringICmp (gsp->taxname, gbpsrc) == 0) return TRUE;
3777 if (StringDoesHaveText (gsp->common) && StringICmp (gsp->common, gbpsrc) == 0) return TRUE;
3778 if (StringDoesHaveText (gsp->oldname) && StringICmp (gsp->oldname, gbpsrc) == 0) return TRUE;
3779
3780 if (StringDoesHaveText (gsp->taxname) && AbbrevStrIEql (gsp->taxname, gbpsrc)) return TRUE;
3781 if (StringDoesHaveText (gsp->oldname) && AbbrevStrIEql (gsp->oldname, gbpsrc)) return TRUE;
3782
3783
3784 return FALSE;
3785 }
3786
TrimPeriodFromEnd(CharPtr str)3787 static void TrimPeriodFromEnd (CharPtr str)
3788
3789 {
3790 size_t len;
3791
3792 len = StringLen (str);
3793 if (len < 2) return;
3794 if (str [len - 1] == '.') {
3795 str [len - 1] = '\0';
3796 }
3797 }
3798
ChangeGBSource(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3799 static void ChangeGBSource (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3800 {
3801 BioseqPtr bsp;
3802 ValNodePtr descr, vnp;
3803 CharPtr /* source, */ s;
3804 GBBlockPtr gbp;
3805 GBSourcePtr gsp = NULL;
3806 size_t len;
3807
3808 if (IS_Bioseq(sep)) {
3809 bsp = (BioseqPtr)(sep->data.ptrvalue);
3810 descr = bsp->descr;
3811 /*
3812 source = (CharPtr) data;
3813 */
3814 gsp = (GBSourcePtr) data;
3815 } else {
3816 return;
3817 }
3818 for (vnp = descr; vnp != NULL; vnp = vnp->next) {
3819 if (vnp->choice == Seq_descr_genbank) {
3820 gbp = (GBBlockPtr) vnp->data.ptrvalue;
3821 if (gbp == NULL || gbp->source == NULL) {
3822 return;
3823 }
3824 s = StringSave (gbp->source);
3825 len = StringLen (s);
3826 if (len > 5 && StringCmp (s + len - 5, " DNA.") == 0) {
3827 s [len - 5] = '\0';
3828 } else if (len > 6 && StringCmp (s + len - 6, " rRNA.") == 0) {
3829 s [len - 6] = '\0';
3830 }
3831 TrimPeriodFromEnd (s);
3832 /*
3833 if (*(s+StringLen(s)-1) =='.') {
3834 *(s+StringLen(s)-1) = '\0';
3835 }
3836 */
3837 if (CanDeleteGBSource (gsp, s)) {
3838 gbp->source = MemFree (gbp->source);
3839 }
3840 MemFree (s);
3841 }
3842 }
3843 }
3844
EntryChangeGBSource(SeqEntryPtr sep)3845 void EntryChangeGBSource (SeqEntryPtr sep)
3846 {
3847 OrgRefPtr orp=NULL;
3848 OrgNamePtr onp = NULL;
3849 OrgModPtr omp;
3850 BioSourcePtr biosp;
3851 CharPtr /* source=NULL, s, */ div = NULL;
3852 /*
3853 ValNodePtr v;
3854 Int2 len=0;
3855 */
3856 GBSourceData gsd;
3857
3858 if (sep == NULL)
3859 return;
3860
3861 MemSet ((Pointer) &gsd, 0, sizeof (GBSourceData));
3862
3863 biosp = GetTopBioSourceFromSep (sep);
3864 if (biosp != NULL) {
3865 orp = biosp->org;
3866 }
3867 if (orp) {
3868 /*
3869 if (orp->common) {
3870 len = StringLen(orp->common);
3871 } else if(orp->taxname) {
3872 len = StringLen(orp->taxname);
3873 }
3874 for (v = orp->mod; v; v = v->next) {
3875 len += StringLen(v->data.ptrvalue) + 1;
3876 }
3877 if (len > 0) {
3878 source = s = MemNew(len+1);
3879 *s = '\0';
3880 if (orp->common) {
3881 StringCpy(s, orp->common);
3882 } else if(orp->taxname) {
3883 StringCpy(s, orp->taxname);
3884 }
3885 s += StringLen(s);
3886 for (v = orp->mod; v; v = v->next) {
3887 sprintf(s, " %s", (CharPtr) v->data.ptrvalue);
3888 s += StringLen(s);
3889 }
3890 if (*(source+len-1) == '.') {
3891 *(source+len-1) = '\0';
3892 }
3893 }
3894 */
3895 if (StringDoesHaveText (orp->taxname)) {
3896 gsd.taxname = StringSave (orp->taxname);
3897 TrimSpacesAndJunkFromEnds (gsd.taxname, FALSE);
3898 TrimPeriodFromEnd (gsd.taxname);
3899 }
3900 if (StringDoesHaveText (orp->common)) {
3901 gsd.common = StringSave (orp->common);
3902 TrimSpacesAndJunkFromEnds (gsd.common, FALSE);
3903 TrimPeriodFromEnd (gsd.common);
3904 }
3905 onp = orp->orgname;
3906 if (onp != NULL) {
3907 gsd.onp = onp;
3908 for (omp = onp->mod; omp != NULL; omp = omp->next) {
3909 if (StringHasNoText (omp->subname)) continue;
3910 if (omp->subtype == ORGMOD_strain) {
3911 gsd.strain = StringSave (omp->subname);
3912 TrimSpacesAndJunkFromEnds (gsd.strain, FALSE);
3913 TrimPeriodFromEnd (gsd.strain);
3914 } else if (omp->subtype == ORGMOD_old_name) {
3915 gsd.oldname = StringSave (omp->subname);
3916 TrimSpacesAndJunkFromEnds (gsd.oldname, FALSE);
3917 TrimPeriodFromEnd (gsd.oldname);
3918 }
3919 }
3920 if (StringDoesHaveText (onp->div)) {
3921 div = StringSave (onp->div);
3922 }
3923 }
3924 /*
3925 if (orp->orgname && orp->orgname->div) {
3926 div = StringSave(orp->orgname->div);
3927 }
3928 */
3929 }
3930 SeqEntryExplore(sep, /* source */ &gsd, ChangeGBSource);
3931 SeqEntryExplore(sep, div, ChangeGBDiv);
3932 if (div)
3933 MemFree(div);
3934 /*
3935 if (source)
3936 MemFree(source);
3937 */
3938 MemFree (gsd.taxname);
3939 MemFree (gsd.common);
3940 MemFree (gsd.oldname);
3941 MemFree (gsd.strain);
3942 return;
3943 }
3944
EntryChangeImpFeat(SeqEntryPtr sep)3945 void EntryChangeImpFeat (SeqEntryPtr sep)
3946 {
3947 if (sep == NULL)
3948 return;
3949 SeqEntryExplore(sep, NULL, ChangeImpFeat);
3950 EntryChangeImpFeatToProt(sep);
3951 return;
3952 }
3953
MergeDupBioSources(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3954 static void MergeDupBioSources (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3955
3956 {
3957 BioSourcePtr biop1, biop2;
3958 BioseqPtr bsp;
3959 BioseqSetPtr bssp;
3960 Boolean fuseanddelete;
3961 ValNodePtr mod;
3962 ValNodePtr nextvnp;
3963 OrgModPtr omp;
3964 Pointer PNTR prevvnp;
3965 ValNodePtr sdp;
3966 SubSourcePtr ssp;
3967 OrgNamePtr onp1, onp2;
3968 OrgRefPtr orp1, orp2;
3969 ValNodePtr vnp;
3970
3971 if (IS_Bioseq (sep)) {
3972 bsp = (BioseqPtr) sep->data.ptrvalue;
3973 sdp = bsp->descr;
3974 } else if (IS_Bioseq_set (sep)) {
3975 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3976 sdp = bssp->descr;
3977 } else {
3978 return;
3979 }
3980 while (sdp != NULL) {
3981 if (sdp->choice == Seq_descr_source && sdp->data.ptrvalue != NULL) {
3982 biop1 = (BioSourcePtr) sdp->data.ptrvalue;
3983 orp1 = biop1->org;
3984 if (orp1 != NULL) {
3985 vnp = sdp->next;
3986 prevvnp = (Pointer PNTR) &(sdp->next);
3987 while (vnp != NULL) {
3988 nextvnp = vnp->next;
3989 fuseanddelete = FALSE;
3990 biop2 = NULL;
3991 orp2 = NULL;
3992 if (vnp->choice == Seq_descr_source && vnp->data.ptrvalue != NULL) {
3993 biop2 = (BioSourcePtr) vnp->data.ptrvalue;
3994 orp2 = biop2->org;
3995 if (orp2 != NULL) {
3996 if ((orp1->taxname != NULL) && (orp2->taxname != NULL) &&
3997 StringCmp (orp1->taxname, orp2->taxname) == 0) {
3998 fuseanddelete = TRUE;
3999 }
4000 }
4001 }
4002 if (fuseanddelete) {
4003 *(prevvnp) = vnp->next;
4004 vnp->next = NULL;
4005 if (biop2 != NULL) {
4006 if (biop1->genome == 0) {
4007 biop1->genome = biop2->genome;
4008 }
4009 if (biop1->origin == 0) {
4010 biop1->origin = biop2->origin;
4011 }
4012 if (! biop1->is_focus) {
4013 biop1->is_focus = biop2->is_focus;
4014 }
4015 if (biop1->subtype == NULL) {
4016 biop1->subtype = biop2->subtype;
4017 biop2->subtype = NULL;
4018 } else {
4019 ssp = biop1->subtype;
4020 while (ssp->next != NULL) {
4021 ssp = ssp->next;
4022 }
4023 ssp->next = biop2->subtype;
4024 biop2->subtype = NULL;
4025 }
4026 if (orp1 != NULL && orp2 != NULL) {
4027 if (orp1->mod == NULL) {
4028 orp1->mod = orp2->mod;
4029 orp2->mod = NULL;
4030 } else {
4031 mod = orp1->mod;
4032 while (mod->next != NULL) {
4033 mod = mod->next;
4034 }
4035 mod->next = orp2->mod;
4036 orp2->mod = NULL;
4037 }
4038 if (orp1->db == NULL) {
4039 orp1->db = orp2->db;
4040 orp2->db = NULL;
4041 }
4042 if (orp1->syn == NULL) {
4043 orp1->syn = orp2->syn;
4044 orp2->syn = NULL;
4045 }
4046 onp1 = orp1->orgname;
4047 onp2 = orp2->orgname;
4048 if (onp1 != NULL && onp2 != NULL) {
4049 if (onp1->mod == NULL) {
4050 onp1->mod = onp2->mod;
4051 onp2->mod = NULL;
4052 } else {
4053 omp = onp1->mod;
4054 while (omp->next != NULL) {
4055 omp = omp->next;
4056 }
4057 omp->next = onp2->mod;
4058 onp2->mod = NULL;
4059 }
4060 if (onp1->gcode == 0) {
4061 onp1->gcode = onp2->gcode;
4062 }
4063 if (onp1->mgcode == 0) {
4064 onp1->mgcode = onp2->mgcode;
4065 }
4066 if (onp1->lineage == NULL) {
4067 onp1->lineage = onp2->lineage;
4068 onp2->lineage = NULL;
4069 }
4070 if (onp1->div == NULL) {
4071 onp1->div = onp2->div;
4072 onp2->div = NULL;
4073 }
4074 }
4075 }
4076 }
4077 SeqDescFree (vnp);
4078 } else {
4079 prevvnp = (Pointer PNTR) &(vnp->next);
4080 }
4081 vnp = nextvnp;
4082 }
4083 }
4084 }
4085 sdp = sdp->next;
4086 }
4087 }
4088
EntryMergeDupBioSources(SeqEntryPtr sep)4089 void EntryMergeDupBioSources (SeqEntryPtr sep)
4090
4091 {
4092 SeqEntryExplore (sep, NULL, MergeDupBioSources);
4093 }
4094
TASNTrimSpacesAndTrailingSemicolons(CharPtr str)4095 static CharPtr TASNTrimSpacesAndTrailingSemicolons (CharPtr str)
4096
4097 {
4098 CharPtr amp;
4099 Uchar ch; /* to use 8bit characters in multibyte languages */
4100 CharPtr dst;
4101 CharPtr ptr;
4102
4103 if (str != NULL && str [0] != '\0') {
4104 dst = str;
4105 ptr = str;
4106 ch = *ptr;
4107 while (ch != '\0' && ch <= ' ') {
4108 ptr++;
4109 ch = *ptr;
4110 }
4111 while (ch != '\0') {
4112 *dst = ch;
4113 dst++;
4114 ptr++;
4115 ch = *ptr;
4116 }
4117 *dst = '\0';
4118 amp = NULL;
4119 dst = NULL;
4120 ptr = str;
4121 ch = *ptr;
4122 while (ch != '\0') {
4123 if (ch == '&') {
4124 amp = ptr;
4125 dst = NULL;
4126 } else if (ch == ' ') {
4127 if (dst == NULL) {
4128 dst = ptr;
4129 }
4130 amp = NULL;
4131 } else if (ch == ';') {
4132 if (dst == NULL && amp == NULL) {
4133 dst = ptr;
4134 }
4135 } else {
4136 dst = NULL;
4137 }
4138 ptr++;
4139 ch = *ptr;
4140 }
4141 if (dst != NULL) {
4142 *dst = '\0';
4143 }
4144 }
4145 return str;
4146 }
4147
TASNTrimInternalSemicolons(CharPtr str)4148 static CharPtr TASNTrimInternalSemicolons (CharPtr str)
4149
4150 {
4151 Uchar ch; /* to use 8bit characters in multibyte languages */
4152 CharPtr dst;
4153 Boolean hasspace;
4154 CharPtr ptr;
4155 CharPtr tmp;
4156
4157 if (str != NULL && str [0] != '\0') {
4158 dst = str;
4159 ptr = str;
4160 ch = *ptr;
4161 while (ch != '\0') {
4162 if (ch == ';') {
4163 *dst = ch;
4164 dst++;
4165 ptr++;
4166 ch = *ptr;
4167 tmp = ptr;
4168 hasspace = FALSE;
4169 while (ch == ';' || ch == ' ' || ch == '\t') {
4170 if (ch == ' ') {
4171 hasspace = TRUE;
4172 }
4173 ptr++;
4174 ch = *ptr;
4175 }
4176 if (hasspace) {
4177 *dst = ' ';
4178 dst++;
4179 }
4180 } else {
4181 *dst = ch;
4182 dst++;
4183 ptr++;
4184 ch = *ptr;
4185 }
4186 }
4187 *dst = '\0';
4188 }
4189 return str;
4190 }
4191
TASNStringHasNoText(CharPtr str)4192 static Boolean TASNStringHasNoText (CharPtr str)
4193
4194 {
4195 Uchar ch; /* to use 8bit characters in multibyte languages */
4196
4197 if (str != NULL) {
4198 ch = *str;
4199 while (ch != '\0') {
4200 if (ch > ' ') {
4201 return FALSE;
4202 }
4203 str++;
4204 ch = *str;
4205 }
4206 }
4207 return TRUE;
4208 }
4209
CleanVisString(CharPtr PNTR strp)4210 static void CleanVisString (CharPtr PNTR strp)
4211
4212 {
4213 if (strp == NULL) return;
4214 if (*strp == NULL) return;
4215 TASNTrimSpacesAndTrailingSemicolons (*strp);
4216 TASNTrimInternalSemicolons (*strp);
4217 if (TASNStringHasNoText (*strp)) {
4218 *strp = MemFree (*strp);
4219 }
4220 }
4221
CleanVisStringJunk(CharPtr PNTR strp)4222 static void CleanVisStringJunk (CharPtr PNTR strp)
4223
4224 {
4225 if (strp == NULL) return;
4226 if (*strp == NULL) return;
4227 TrimSpacesAndJunkFromEnds (*strp, TRUE);
4228 TASNTrimInternalSemicolons (*strp);
4229 if (TASNStringHasNoText (*strp)) {
4230 *strp = MemFree (*strp);
4231 }
4232 }
4233
CleanVisStringList(ValNodePtr PNTR vnpp)4234 static void CleanVisStringList (ValNodePtr PNTR vnpp)
4235
4236 {
4237 ValNodePtr next;
4238 ValNodePtr PNTR prev;
4239 ValNodePtr vnp;
4240
4241 if (vnpp == NULL) return;
4242 prev = vnpp;
4243 vnp = *vnpp;
4244 while (vnp != NULL) {
4245 next = vnp->next;
4246 TASNTrimSpacesAndTrailingSemicolons (vnp->data.ptrvalue);
4247 if (TASNStringHasNoText (vnp->data.ptrvalue)) {
4248 *prev = vnp->next;
4249 vnp->next = NULL;
4250 ValNodeFreeData (vnp);
4251 } else {
4252 prev = &(vnp->next);
4253 }
4254 vnp = next;
4255 }
4256 }
4257
CheckGBBlock(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)4258 static void CheckGBBlock (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
4259
4260 {
4261 BioseqPtr bsp;
4262 BioseqSetPtr bssp;
4263 Boolean empty;
4264 GBBlockPtr gbp;
4265 BoolPtr hasGB;
4266 ValNodePtr nextsdp;
4267 Pointer PNTR prevsdp;
4268 ValNodePtr sdp;
4269
4270 if (IS_Bioseq (sep)) {
4271 bsp = (BioseqPtr) sep->data.ptrvalue;
4272 sdp = bsp->descr;
4273 prevsdp = (Pointer PNTR) &(bsp->descr);
4274 } else if (IS_Bioseq_set (sep)) {
4275 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4276 sdp = bssp->descr;
4277 prevsdp = (Pointer PNTR) &(bssp->descr);
4278 } else {
4279 return;
4280 }
4281 bsp = (BioseqPtr) sep->data.ptrvalue;
4282 if (bsp == NULL) return;
4283 hasGB = (BoolPtr) data;
4284 sdp = bsp->descr;
4285 prevsdp = (Pointer PNTR) &(bsp->descr);
4286 while (sdp != NULL) {
4287 nextsdp = sdp->next;
4288 empty = FALSE;
4289 if (sdp->choice == Seq_descr_genbank && sdp->data.ptrvalue != NULL) {
4290 gbp = (GBBlockPtr) sdp->data.ptrvalue;
4291 if (gbp->source != NULL || gbp->taxonomy != NULL) {
4292 if (hasGB != NULL) {
4293 *hasGB = TRUE;
4294 }
4295 } else if (gbp->div != NULL) {
4296 if (StringCmp (gbp->div, "PAT") != 0 &&
4297 StringCmp (gbp->div, "SYN")) {
4298 if (hasGB != NULL) {
4299 *hasGB = TRUE;
4300 }
4301 }
4302 }
4303 CleanVisStringList (&(gbp->extra_accessions));
4304 CleanVisStringList (&(gbp->keywords));
4305 CleanVisString (&(gbp->source));
4306 CleanVisString (&(gbp->origin));
4307 CleanVisString (&(gbp->date));
4308 CleanVisString (&(gbp->div));
4309 CleanVisString (&(gbp->taxonomy));
4310 if (gbp->extra_accessions == NULL && gbp->source == NULL &&
4311 gbp->keywords == NULL && gbp->origin == NULL &&
4312 gbp->date == NULL && gbp->entry_date == NULL &&
4313 gbp->div == NULL && gbp->taxonomy == NULL) {
4314 empty = TRUE;
4315 ObjMgrDeSelect (0, 0, 0, 0, NULL);
4316 }
4317 }
4318 if (empty) {
4319 *(prevsdp) = sdp->next;
4320 sdp->next = NULL;
4321 sdp = SeqDescFree (sdp);
4322 } else {
4323 prevsdp = (Pointer PNTR) &(sdp->next);
4324 }
4325 sdp = nextsdp;
4326 }
4327 }
4328
EntryCheckGBBlock(SeqEntryPtr sep)4329 extern Boolean EntryCheckGBBlock (SeqEntryPtr sep)
4330
4331 {
4332 Boolean hasGBStuff;
4333
4334 hasGBStuff = FALSE;
4335 SeqEntryExplore (sep, (Pointer) &hasGBStuff, CheckGBBlock);
4336 return hasGBStuff;
4337 }
4338
4339
GetOriginalBeforeAdjustment(SeqLocPtr slp,Int4Ptr p_oldfrom,Int4Ptr p_oldto)4340 static SeqIntPtr GetOriginalBeforeAdjustment (SeqLocPtr slp, Int4Ptr p_oldfrom, Int4Ptr p_oldto)
4341 {
4342 SeqLocPtr curr = NULL, last = NULL;
4343 SeqIntPtr sip;
4344
4345 if (slp == NULL)
4346 {
4347 return NULL;
4348 }
4349 while ((curr = SeqLocFindNext(slp, curr)) != NULL)
4350 {
4351 last = curr;
4352 }
4353
4354 if (last != NULL && last->choice != SEQLOC_INT) /* this is too weird */
4355 {
4356 return NULL;
4357 }
4358 if (last == NULL || last->data.ptrvalue == NULL) return NULL;
4359 sip = (SeqIntPtr)(last->data.ptrvalue);
4360
4361 *p_oldfrom = sip->from;
4362 *p_oldto = sip->to;
4363
4364 return sip;
4365 }
4366
4367
AdjustLocByRemainder(SeqLocPtr slp,Int4 remainder,Boolean even_if_partial,Int4Ptr p_oldnum)4368 static Boolean AdjustLocByRemainder (SeqLocPtr slp, Int4 remainder, Boolean even_if_partial, Int4Ptr p_oldnum)
4369 {
4370 SeqIntPtr sip;
4371 BioseqPtr nucseq;
4372 Int4 oldfrom, oldto;
4373 Int4 oldnum = 0;
4374
4375 sip = GetOriginalBeforeAdjustment(slp, &oldfrom, &oldto);
4376 if (sip == NULL) {
4377 return FALSE;
4378 }
4379
4380 nucseq = BioseqFind(sip->id);
4381 if (nucseq == NULL)
4382 {
4383 return FALSE;
4384 }
4385
4386 switch (remainder)
4387 {
4388 case 0:
4389 remainder = 3;
4390 break;
4391 case 1:
4392 remainder = 2;
4393 break;
4394 case 2:
4395 remainder = 1;
4396 break;
4397 }
4398
4399 if (sip->strand == Seq_strand_minus)
4400 {
4401 if (sip->from < remainder)
4402 {
4403 return FALSE;
4404 }
4405 if (sip->if_from != NULL && !even_if_partial)
4406 {
4407 return FALSE;
4408 }
4409 oldnum = sip->from;
4410 sip->from -= remainder;
4411 }
4412 else
4413 {
4414 if (sip->to >= (nucseq->length - remainder))
4415 {
4416 return FALSE;
4417 }
4418 if (sip->if_to != NULL && !even_if_partial)
4419 {
4420 return FALSE;
4421 }
4422 oldnum = sip->to;
4423 sip->to += remainder;
4424 }
4425 if (p_oldnum != NULL) {
4426 *p_oldnum = oldnum;
4427 }
4428 return TRUE;
4429 }
4430
4431
4432 /*****************************************************************************
4433 *
4434 * CdEndCheck(sfp, fp)
4435 *
4436 *****************************************************************************/
CdEndCheck(SeqFeatPtr sfp,FILE * fp,Boolean also_adjust_mrna)4437 static void CdEndCheck(SeqFeatPtr sfp, FILE *fp, Boolean also_adjust_mrna)
4438 {
4439 ByteStorePtr newprot = NULL;
4440 BioseqPtr protseq, nucseq;
4441 Int4 len, remainder, aas, oldfrom, oldto, protlen, i, oldnum;
4442 Int4 m_oldfrom, m_oldto;
4443 CdRegionPtr crp;
4444 SeqIdPtr protid, tmp;
4445 SeqIntPtr sip, msip = NULL;
4446 Int2 residue, residue2;
4447 Char nuc[PATH_MAX];
4448 CodeBreakPtr cbp;
4449 Int4 pos1, pos2, pos;
4450 SeqLocPtr tmpslp;
4451 Int4 len2;
4452 SeqFeatPtr gene = NULL;
4453 SeqFeatPtr mrna = NULL;
4454 GeneRefPtr grp ;
4455 BioseqPtr bsp;
4456 SeqLocPtr slp;
4457 Boolean hasNulls;
4458 Boolean noLeft;
4459 Boolean noRight;
4460 Boolean noLeftFeat;
4461 Boolean noLeftGene;
4462 Boolean noRightFeat;
4463 Boolean noRightGene;
4464
4465
4466 grp = SeqMgrGetGeneXref (sfp);
4467 if (grp == NULL || (! SeqMgrGeneIsSuppressed (grp))) {
4468 gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
4469 }
4470 if (also_adjust_mrna) {
4471 mrna = SeqMgrGetOverlappingmRNA (sfp->location, NULL);
4472 if (mrna != NULL) {
4473 if (SeqLocStrand (mrna->location) == Seq_strand_minus) {
4474 if (SeqLocStart (mrna->location) != SeqLocStart (sfp->location)) {
4475 mrna = NULL;
4476 }
4477 } else {
4478 if (SeqLocStop (mrna->location) != SeqLocStop (sfp->location)) {
4479 mrna = NULL;
4480 }
4481 }
4482 }
4483 }
4484
4485 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
4486 len = SeqLocLen(sfp->location);
4487 len2 = len;
4488 switch (crp->frame)
4489 {
4490 case 2:
4491 len -= 1;
4492 break;
4493 case 3:
4494 len -= 2;
4495 break;
4496 default:
4497 break;
4498 }
4499 remainder = len % 3;
4500 aas = len/3; /* total aas in protein if no stop codon */
4501 protid = SeqLocId(sfp->product);
4502 if (protid == NULL)
4503 return;
4504 /* protseq = BioseqFind(protid); */
4505 protseq = BioseqLockById (protid); /* tries BioseqFind, will fetch remotely if enabled */
4506 if (protseq == NULL) return;
4507 BioseqUnlock (protseq); /* unlock but do not cache out, easier than unlocking everywhere in code below */
4508 if (((protseq->length + 1) == aas) && (remainder == 0)) /* correct length with termination */
4509 return;
4510
4511 if (protseq->seq_data_type == Seq_code_gap) return;
4512
4513 cbp = crp->code_break;
4514 while (cbp != NULL)
4515 {
4516 pos1 = INT4_MAX;
4517 pos2 = -10;
4518 tmpslp = NULL;
4519 while ((tmpslp = SeqLocFindNext(cbp->loc, tmpslp)) != NULL)
4520 {
4521 pos = GetOffsetInLoc(tmpslp, sfp->location, SEQLOC_START);
4522 if (pos < pos1)
4523 pos1 = pos;
4524 pos = GetOffsetInLoc(tmpslp, sfp->location, SEQLOC_STOP);
4525 if (pos > pos2)
4526 pos2 = pos;
4527 }
4528 pos = pos2 - pos1; /* codon length */
4529 if (/* pos == 2 || */ (pos >= 0 && pos <= 1 && pos2 == len2 - 1)) /* a codon */
4530 /* allowing a partial codon at the end */
4531 {
4532 return;
4533 }
4534
4535 cbp = cbp->next;
4536 }
4537
4538 if (protseq->length == aas && remainder == 0)
4539 {
4540 /* do we already have a stop codon, but the translated protein includes it? */
4541 if (protseq->repr == Seq_repr_raw) {
4542 newprot = (ByteStorePtr) protseq->seq_data;
4543 if (newprot != NULL) {
4544 protlen = BSLen(newprot);
4545 BSSeek(newprot, (protlen - 1), SEEK_SET);
4546 residue = BSGetByte(newprot);
4547 while (residue == '*' && protlen == protseq->length && protlen > 0) {
4548 BSSeek (newprot, -1, SEEK_END);
4549 BSDelete (newprot, 1);
4550 BSSeek (newprot, -1, SEEK_END);
4551 protlen--;
4552 protseq->length = protlen;
4553 residue = BSGetByte (newprot);
4554 }
4555 }
4556 }
4557 }
4558
4559 sip = GetOriginalBeforeAdjustment (sfp->location, &oldfrom, &oldto);
4560 if (mrna != NULL) {
4561 msip = GetOriginalBeforeAdjustment (mrna->location, &m_oldfrom, &m_oldto);
4562 }
4563
4564 if (!AdjustLocByRemainder(sfp->location, remainder, FALSE, &oldnum)) {
4565 return;
4566 }
4567 if (mrna != NULL) {
4568 AdjustLocByRemainder (mrna->location, remainder, TRUE, NULL);
4569 }
4570
4571 nucseq = BioseqFind(sip->id);
4572 newprot = ProteinFromCdRegion(sfp, TRUE); /* include stop codons */
4573 if (newprot == NULL)
4574 {
4575 goto erret;
4576 }
4577
4578 protlen = BSLen(newprot);
4579 if (protlen != aas + 1)
4580 {
4581 goto erret;
4582 }
4583
4584 BSSeek(newprot, (protlen - 1), SEEK_SET);
4585 residue = BSGetByte(newprot);
4586 if (residue != '*')
4587 {
4588 goto erret;
4589 }
4590
4591 BSSeek(newprot, (protlen-1), SEEK_SET);
4592 BSDelete(newprot, 1); /* remove termination from protein */
4593 BSSeek(newprot, 0, SEEK_SET); /* check for internal termination */
4594 BSSeek((ByteStorePtr) protseq->seq_data, 0, SEEK_SET);
4595 protlen = BSLen(newprot);
4596 for (i = 0; i < protlen; i++)
4597 {
4598 residue = BSGetByte(newprot);
4599 residue2 = BSGetByte((ByteStorePtr) protseq->seq_data);
4600 if (residue != residue2)
4601 {
4602 goto erret;
4603 }
4604
4605 }
4606
4607 BSFree((ByteStorePtr) protseq->seq_data);
4608 protseq->seq_data = (SeqDataPtr) newprot;
4609 protseq->length = protlen;
4610 /****** to avoid killing asn2gnbk ***
4611 protseq->seq_data_type = Seq_code_ncbieaa;
4612 sfp->partial = FALSE;
4613
4614 ************************************/
4615 for (tmp = nucseq->id; tmp != NULL; tmp = tmp->next)
4616 {
4617 if ((tmp->choice == SEQID_GENBANK) ||
4618 (tmp->choice == SEQID_EMBL) ||
4619 (tmp->choice == SEQID_DDBJ))
4620 break;
4621 }
4622
4623 if (tmp == NULL)
4624 SeqIdWrite(nucseq->id, nuc, PRINTID_FASTA_LONG, sizeof (nuc) - 1);
4625 else
4626 SeqIdWrite(tmp, nuc, PRINTID_TEXTID_ACCESSION, sizeof (nuc) - 1);
4627
4628 if (fp != NULL)
4629 fprintf(fp, "%s %ld %d\n", nuc, (long)(oldnum+1), (int)remainder);
4630
4631 if (gene != NULL) {
4632 if (SeqLocAinB (sfp->location, gene->location) <= 0) {
4633 bsp = BioseqFindFromSeqLoc (gene->location);
4634 if (bsp != NULL) {
4635 hasNulls = LocationHasNullsBetween (gene->location);
4636 slp = SeqLocMerge (bsp, gene->location, sfp->location, TRUE, FALSE, hasNulls);
4637 if (slp != NULL) {
4638 CheckSeqLocForPartial (gene->location, &noLeftGene, &noRightGene);
4639 gene->location = SeqLocFree (gene->location);
4640 gene->location = slp;
4641 CheckSeqLocForPartial (sfp->location, &noLeftFeat, &noRightFeat);
4642 if (bsp->repr == Seq_repr_seg) {
4643 slp = SegLocToPartsEx (bsp, gene->location, TRUE);
4644 gene->location = SeqLocFree (gene->location);
4645 gene->location = slp;
4646 hasNulls = LocationHasNullsBetween (gene->location);
4647 gene->partial = (gene->partial || hasNulls);
4648 }
4649 FreeAllFuzz (gene->location);
4650 noLeft = (noLeftFeat || noLeftGene);
4651 noRight = (noRightFeat || noRightGene);
4652 SetSeqLocPartial (gene->location, noLeft, noRight);
4653 gene->partial = (gene->partial || noLeft || noRight);
4654 }
4655 }
4656 }
4657 }
4658
4659 return;
4660 erret:
4661 BSFree(newprot);
4662 sip->from = oldfrom;
4663 sip->to = oldto;
4664 if (msip != NULL) {
4665 msip->from = m_oldfrom;
4666 msip->to = m_oldto;
4667 }
4668 return;
4669 }
4670
4671
4672 typedef struct findcd {
4673 FILE *fp;
4674 Boolean also_adjust_mrna;
4675 } FindCdData, PNTR FindCdPtr;
4676
FindCd(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)4677 static void FindCd (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
4678 {
4679 SeqAnnotPtr sap;
4680 SeqFeatPtr sfp;
4681 BioseqPtr bsp;
4682 BioseqSetPtr bssp;
4683 FindCdPtr fcp;
4684 FILE *fp = NULL;
4685 Boolean also_adjust_mrna = FALSE;
4686
4687 fcp = (FindCdPtr) data;
4688 if (fcp != NULL) {
4689 fp = fcp->fp;
4690 also_adjust_mrna = fcp->also_adjust_mrna;
4691 }
4692 if (IS_Bioseq(sep))
4693 {
4694 bsp = (BioseqPtr)(sep->data.ptrvalue);
4695 sap = bsp->annot;
4696 }
4697 else
4698 {
4699 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
4700 sap = bssp->annot;
4701 }
4702
4703 while (sap != NULL)
4704 {
4705 if (sap->type == 1) /* ftable */
4706 {
4707 sfp = (SeqFeatPtr)(sap->data);
4708 while (sfp != NULL)
4709 {
4710 if (sfp->data.choice == 3) { /* cdregion */
4711 if (! sfp->excpt) { /* if not biological exception */
4712 CdEndCheck(sfp, fp, also_adjust_mrna);
4713 }
4714 }
4715 sfp = sfp->next;
4716 }
4717 }
4718 sap = sap->next;
4719 }
4720
4721 return;
4722 }
4723
fake_bond_loc(SeqLocPtr slp)4724 static SeqLocPtr fake_bond_loc(SeqLocPtr slp)
4725 {
4726 SeqLocPtr loc, l, lnext, ldata;
4727
4728
4729 if (slp == NULL)
4730 return NULL;
4731 loc = MemNew(sizeof(SeqLoc));
4732 MemCopy(loc, slp, sizeof(SeqLoc));
4733 ldata = (SeqLocPtr) loc->data.ptrvalue;
4734 if (slp->choice != SEQLOC_MIX)
4735 return loc;
4736 for (l=ldata; l; l=lnext) {
4737 lnext = l->next;
4738 if (l->choice == SEQLOC_NULL) {
4739 ldata = remove_node(ldata, l);
4740 }
4741 }
4742 return loc;
4743
4744 }
4745
4746 /*****************************************************************************
4747 *
4748 * Check for CdRegion ending in middle base of codon
4749 *
4750 *****************************************************************************/
4751
CdCheckEx(SeqEntryPtr sep,FILE * fp,Boolean also_adjust_mrna)4752 NLM_EXTERN void CdCheckEx(SeqEntryPtr sep, FILE *fp, Boolean also_adjust_mrna)
4753 {
4754 FindCdData fcd;
4755
4756 MemSet (&fcd, 0, sizeof (FindCdData));
4757 fcd.fp = fp;
4758 fcd.also_adjust_mrna = also_adjust_mrna;
4759 SeqEntryExplore(sep, (Pointer)&fcd, FindCd);
4760 return;
4761 }
4762
CdCheck(SeqEntryPtr sep,FILE * fp)4763 NLM_EXTERN void CdCheck(SeqEntryPtr sep, FILE *fp)
4764 {
4765 CdCheckEx (sep, fp, FALSE);
4766 }
4767
4768
OutOfFramePeptideButEmblOrDdbj(SeqFeatPtr sfp,SeqFeatPtr cds)4769 static Boolean OutOfFramePeptideButEmblOrDdbj (SeqFeatPtr sfp, SeqFeatPtr cds)
4770
4771 {
4772 BioseqPtr bsp;
4773 CdRegionPtr crp;
4774 ImpFeatPtr ifp;
4775 SeqLocPtr first = NULL, last = NULL, slp = NULL;
4776 Boolean partial5, partial3;
4777 Int4 pos1, pos2, adjust = 0, mod1, mod2;
4778 SeqIdPtr sip;
4779
4780 if (sfp == NULL || cds == NULL || sfp->data.choice != SEQFEAT_IMP) return FALSE;
4781
4782 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4783 if (ifp == NULL) return FALSE;
4784 if (StringCmp (ifp->key, "mat_peptide") != 0 &&
4785 StringCmp (ifp->key, "sig_peptide") != 0 &&
4786 StringCmp (ifp->key, "transit_peptide") != 0 &&
4787 StringCmp (ifp->key, "propeptide") != 0) return FALSE;
4788
4789 crp = (CdRegionPtr) cds->data.value.ptrvalue;
4790 if (crp == NULL) return FALSE;
4791 if (crp->frame == 2) {
4792 adjust = 1;
4793 } else if (crp->frame == 3) {
4794 adjust = 2;
4795 }
4796
4797 while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
4798 last = slp;
4799 if (first == NULL) {
4800 first = slp;
4801 }
4802 }
4803 if (first == NULL || last == NULL) return FALSE;
4804
4805 pos1 = GetOffsetInLoc (first, cds->location, SEQLOC_START) - adjust;
4806 pos2 = GetOffsetInLoc (last, cds->location, SEQLOC_STOP) - adjust;
4807 mod1 = pos1 % 3;
4808 mod2 = pos2 % 3;
4809
4810 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
4811 if (partial5) {
4812 mod1 = 0;
4813 }
4814 if (partial3) {
4815 mod2 = 2;
4816 }
4817
4818 if (mod1 == 0 && mod2 == 2) return FALSE;
4819
4820 bsp = BioseqFindFromSeqLoc (sfp->location);
4821 if (bsp == NULL) return FALSE;
4822 for (sip = bsp->id;
4823 sip != NULL && sip->choice != SEQID_EMBL && sip->choice != SEQID_DDBJ;
4824 sip = sip->next) continue;
4825 if (sip != NULL) return TRUE;
4826
4827 return FALSE;
4828 }
4829
IncompatibleStrands(SeqLocPtr loc1,SeqLocPtr loc2)4830 static Boolean IncompatibleStrands (SeqLocPtr loc1, SeqLocPtr loc2)
4831
4832 {
4833 Boolean minus1 = FALSE, minus2 = FALSE;
4834 Uint1 strand1, strand2;
4835
4836 if (loc1 == NULL || loc2 == NULL) return FALSE;
4837
4838 strand1 = SeqLocStrand (loc1);
4839 strand2 = SeqLocStrand (loc2);
4840
4841 minus1 = (Boolean) (strand1 == Seq_strand_minus || strand1 == Seq_strand_both_rev);
4842 minus2 = (Boolean) (strand2 == Seq_strand_minus || strand2 == Seq_strand_both_rev);
4843
4844 if (minus1 != minus2) return TRUE;
4845
4846 return FALSE;
4847 }
4848
ImpFeatToProtRef(SeqFeatArr sfa)4849 static void ImpFeatToProtRef(SeqFeatArr sfa)
4850 {
4851 SeqFeatPtr f1, f2, best_cds, sfp;
4852 SeqLocPtr loc, slp;
4853 ImpFeatPtr ifp;
4854 ProtRefPtr prot;
4855 BioseqPtr bsp;
4856 SeqAnnotPtr sap;
4857 Int4 diff_lowest, diff_current, frame;
4858 ValNodePtr tmp1, tmp2;
4859 Uint2 retval;
4860 Int2 i;
4861 Boolean lfree = FALSE, partial5, partial3;
4862 CharPtr p, q;
4863 GBQualPtr qu, qunext;
4864 GeneRefPtr grp1, grp2;
4865
4866 for (tmp1 = sfa.pept; tmp1; tmp1 = tmp1->next) {
4867 lfree = FALSE;
4868 f1 = (SeqFeatPtr) tmp1->data.ptrvalue;
4869 loc = f1->location;
4870 if (tmp1->choice == SEQFEAT_BOND) {
4871 loc = fake_bond_loc(f1->location);
4872 lfree = TRUE;
4873 }
4874 diff_lowest = -1;
4875 best_cds = NULL;
4876 for (tmp2=sfa.cds; tmp2; tmp2=tmp2->next) {
4877 f2 = tmp2->data.ptrvalue;
4878 if (IncompatibleStrands (loc, f2->location)) continue;
4879 diff_current = SeqLocAinB(loc, f2->location);
4880 if (diff_current < 0) continue;
4881 /* if no best yet, take first candidate */
4882 if (diff_lowest == -1) {
4883 diff_lowest = diff_current;
4884 best_cds = f2;
4885 continue;
4886 }
4887 /* if newer candidate has tighter coverage, take it */
4888 if (diff_current < diff_lowest) {
4889 diff_lowest = diff_current;
4890 best_cds = f2;
4891 continue;
4892 }
4893 /* use gene xref as tie breaker for genes with same coverage */
4894 grp1 = SeqMgrGetGeneXref (f1);
4895 if (grp1 == NULL || SeqMgrGeneIsSuppressed (grp1)) continue;
4896 grp2 = SeqMgrGetGeneXref (f2);
4897 if (grp2 == NULL || SeqMgrGeneIsSuppressed (grp2)) continue;
4898 if (StringDoesHaveText (grp1->locus_tag) && StringDoesHaveText (grp2->locus_tag)) {
4899 if (StringICmp (grp1->locus_tag, grp2->locus_tag) != 0) continue;
4900 } else if (StringDoesHaveText (grp1->locus) && StringDoesHaveText (grp2->locus)) {
4901 if (StringICmp (grp1->locus, grp2->locus) != 0) continue;
4902 }
4903 diff_lowest = diff_current;
4904 best_cds = f2;
4905 /*
4906 if (diff_current == 0) {
4907 best_cds = f2;
4908 break;
4909 } else if (diff_current > 0) {
4910 if ((diff_lowest == -1) || (diff_current < diff_lowest)) {
4911 diff_lowest = diff_current;
4912 best_cds = f2;
4913 }
4914 }
4915 */
4916 }
4917 /*
4918 if (lfree)
4919 SeqLocFree(loc);
4920 */
4921 if (best_cds == NULL) {
4922 p = SeqLocPrint(f1->location);
4923 ErrPostEx(SEV_WARNING, ERR_FEATURE_CDSNotFound,
4924 "CDS for the peptide feature [%s] not found", p);
4925 MemFree(p);
4926 } else {
4927 if (OutOfFramePeptideButEmblOrDdbj (f1, best_cds))
4928 continue;
4929 CheckSeqLocForPartial (f1->location, &partial5, &partial3);
4930 slp = dnaLoc_to_aaLoc(best_cds, f1->location, TRUE, &frame, FALSE);
4931 if (slp == NULL) {
4932 p = SeqLocPrint(f1->location);
4933 q = SeqLocPrint(best_cds->location);
4934 ErrPostEx(SEV_ERROR, ERR_FEATURE_CannotMapDnaLocToAALoc, "peptide location:%s| CDS location:%s", p, q);
4935 MemFree(p);
4936 MemFree(q);
4937 continue;
4938 }
4939 SetSeqLocPartial (slp, partial5, partial3);
4940 ifp = (ImpFeatPtr) f1->data.value.ptrvalue;
4941 sfp = SeqFeatNew();
4942 sfp->location = slp;
4943
4944 sfp->partial = (Boolean) (f1->partial || partial5 || partial3);
4945 sfp->excpt = f1->excpt;
4946 sfp->exp_ev = f1->exp_ev;
4947 sfp->pseudo = f1->pseudo;
4948
4949 sfp->comment = f1->comment;
4950 f1->comment = NULL;
4951 sfp->qual = f1->qual;
4952 f1->qual = NULL;
4953 sfp->title = f1->title;
4954 f1->title = NULL;
4955 sfp->ext = f1->ext;
4956 f1->ext = NULL;
4957 sfp->cit = f1->cit;
4958 f1->cit = NULL;
4959
4960 sfp->xref = f1->xref;
4961 f1->xref = NULL;
4962 sfp->dbxref = f1->dbxref;
4963 f1->dbxref = NULL;
4964 sfp->except_text = f1->except_text;
4965 f1->except_text = NULL;
4966
4967 if (f1->qual != NULL) {
4968 sfp->qual = f1->qual;
4969 f1->qual = NULL;
4970 }
4971 if (tmp1->choice == SEQFEAT_PROT) {
4972 sfp->data.choice = SEQFEAT_PROT;
4973 prot = ProtRefNew();
4974 sfp->data.value.ptrvalue = prot;
4975 if (StringCmp(ifp->key, "mat_peptide") == 0) {
4976 prot->processed = 2;
4977 for (qu=sfp->qual; qu; qu=qunext) {
4978 qunext = qu->next;
4979 if (StringCmp(qu->qual, "product") == 0) {
4980 ValNodeAddStr(&(prot->name), 0,StringSave(qu->val));
4981 sfp->qual = remove_qual(sfp->qual, qu);
4982 }
4983 }
4984 }
4985 if (StringCmp(ifp->key, "sig_peptide") == 0)
4986 prot->processed = 3;
4987 if (StringCmp(ifp->key, "transit_peptide") == 0)
4988 prot->processed = 4;
4989 if (StringCmp(ifp->key, "propeptide") == 0)
4990 prot->processed = 5;
4991 if (f1->comment != NULL) {
4992 if ((prot->processed == 2 || prot->name == NULL) && StringICmp (f1->comment, "putative") != 0) {
4993 ValNodeAddStr(&(prot->name), 0,StringSave(f1->comment));
4994 } else {
4995 sfp->comment = StringSave(f1->comment);
4996 }
4997 }
4998 } else if (tmp1->choice == SEQFEAT_SITE) {
4999 sfp->data.choice = SEQFEAT_SITE;
5000 if ((i = FindStr(feat_site, num_site, f1->comment)) != -1) {
5001 sfp->data.value.intvalue = i;
5002 } else {
5003 sfp->data.value.intvalue = 255;
5004 }
5005 } else if (tmp1->choice == SEQFEAT_BOND) {
5006 sfp->data.choice = SEQFEAT_BOND;
5007 if ((i = FindStr(feat_bond, num_bond, f1->comment)) != -1) {
5008 sfp->data.value.intvalue = i;
5009 } else {
5010 sfp->data.value.intvalue = 255;
5011 }
5012 }
5013 if (f1->title)
5014 {
5015 if(sfp->comment != NULL)
5016 MemFree(sfp->comment);
5017 sfp->comment = StringSave(f1->title);
5018 }
5019 CheckSeqLocForPartial (f1->location, &partial5, &partial3);
5020 sfp->excpt = f1->excpt;
5021 sfp->partial = (Boolean) (f1->partial || partial5 || partial3);
5022 sfp->exp_ev = f1->exp_ev;
5023 sfp->pseudo = f1->pseudo;
5024 if(sfp->location)
5025 SeqLocFree(sfp->location);
5026 sfp->location =
5027 dnaLoc_to_aaLoc(best_cds, f1->location, TRUE, &frame, FALSE);
5028 if (sfp->location == NULL) {
5029 p = SeqLocPrint(f1->location);
5030 q = SeqLocPrint(best_cds->location);
5031 ErrPostEx(SEV_ERROR, ERR_FEATURE_CannotMapDnaLocToAALoc, "peptide location:%s| CDS location:%s", p, q);
5032 MemFree(sfp);
5033 MemFree(p);
5034 MemFree(q);
5035 continue;
5036 }
5037 SetSeqLocPartial (sfp->location, partial5, partial3);
5038 if(f1->comment != NULL)
5039 MemFree(f1->comment);
5040 f1->comment = StringSave("FeatureToBeDeleted");
5041 if (sfp->partial == FALSE) {
5042 retval = SeqLocPartialCheck(sfp->location);
5043 if (retval > SLP_COMPLETE && retval < SLP_NOSTART) {
5044 sfp->partial = TRUE;
5045 }
5046 }
5047 bsp = BioseqLockById(SeqLocId(best_cds->product));
5048 if (bsp) {
5049 if (bsp->annot == NULL) {
5050 sap = SeqAnnotNew();
5051 sap->type = 1;
5052 bsp->annot = sap;
5053 } else {
5054 sap = bsp->annot;
5055 }
5056 sap->data = tie_feat(sap->data, sfp);
5057 BioseqUnlock(bsp);
5058 }
5059 }
5060 }
5061 }
5062
PseudoGeneOverlap(SeqLocPtr slp)5063 static Boolean PseudoGeneOverlap (SeqLocPtr slp)
5064
5065 {
5066 SeqFeatPtr gene;
5067 GeneRefPtr grp;
5068
5069 gene = SeqMgrGetOverlappingGene (slp, NULL);
5070 if (gene == NULL) return FALSE;
5071 if (gene->pseudo) return TRUE;
5072 grp = (GeneRefPtr) gene->data.value.ptrvalue;
5073 if (grp == NULL) return FALSE;
5074 if (grp->pseudo) return TRUE;
5075 return FALSE;
5076 }
5077
GetCdRegionsWithPeptides(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)5078 static void GetCdRegionsWithPeptides (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
5079 {
5080 SeqAnnotPtr annot, sap;
5081 BioseqPtr bsp, fbsp;
5082 BioseqSetPtr bssp;
5083 Int2 i;
5084 ImpFeatPtr ifp;
5085 Boolean okay;
5086 SeqFeatArrPtr sfap;
5087 SeqFeatPtr sfp;
5088 SeqIdPtr sip;
5089 ValNodePtr tmp;
5090
5091 sfap = (SeqFeatArrPtr) data;
5092 if (IS_Bioseq(sep)) {
5093 bsp = (BioseqPtr)(sep->data.ptrvalue);
5094 annot = bsp->annot;
5095 } else {
5096 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
5097 annot = bssp->annot;
5098 }
5099 for (sap = annot; sap != NULL; sap = sap->next) {
5100 if (sap->type != 1) {
5101 continue;
5102 }
5103 for (sfp = sap->data; sfp != NULL; sfp = sfp->next) {
5104 if (sfp->data.choice == SEQFEAT_CDREGION) {
5105 if ((! sfp->pseudo) && (! (PseudoGeneOverlap (sfp->location)))) {
5106 tmp = ValNodeNew(NULL);
5107 tmp->data.ptrvalue = sfp;
5108 sfap->cds = tie_next(sfap->cds, tmp);
5109 }
5110 }
5111 if (sfp->data.choice == SEQFEAT_IMP) {
5112 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5113 if (StringCmp(ifp->key, "mat_peptide") == 0 ||
5114 StringCmp(ifp->key, "sig_peptide") == 0 ||
5115 StringCmp(ifp->key, "transit_peptide") == 0 ||
5116 StringCmp(ifp->key, "propeptide") == 0) {
5117 tmp = ValNodeNew(NULL);
5118 tmp->choice = SEQFEAT_PROT;
5119 tmp->data.ptrvalue = sfp;
5120 sfap->pept = tie_next(sfap->pept, tmp);
5121 } else if (StringCmp(ifp->key, "misc_feature") == 0
5122 && sfp->comment != NULL) {
5123 if ((i = FindStr(feat_site, num_site, sfp->comment)) != -1){
5124 if (i >= 23 && i <= 25) {
5125 okay = TRUE;
5126 fbsp = BioseqFindFromSeqLoc (sfp->location);
5127 if (fbsp != NULL) {
5128 for (sip = fbsp->id; sip != NULL; sip = sip->next) {
5129 if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
5130 okay = FALSE;
5131 }
5132 }
5133 }
5134 if (okay) {
5135 tmp = ValNodeNew(NULL);
5136 tmp->choice = SEQFEAT_SITE;
5137 tmp->data.ptrvalue = sfp;
5138 sfap->pept = tie_next(sfap->pept, tmp);
5139 }
5140 }
5141 } else if ((i =
5142 FindStr(feat_bond, num_bond, sfp->comment)) != -1){
5143 tmp = ValNodeNew(NULL);
5144 tmp->choice = SEQFEAT_BOND;
5145 tmp->data.ptrvalue = sfp;
5146 sfap->pept = tie_next(sfap->pept, tmp);
5147 }
5148 }
5149 }
5150 }
5151 }
5152 }
5153
RemovePeptideImpFeats(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)5154 static void RemovePeptideImpFeats (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
5155 {
5156 BioseqPtr bsp;
5157 BioseqSetPtr bssp;
5158 SeqAnnotPtr sap, annot, nextsap, PNTR prevsap;
5159 SeqFeatPtr sfp, sfpnext;
5160 ImpFeatPtr ifp;
5161
5162 if (IS_Bioseq(sep)) {
5163 bsp = (BioseqPtr)(sep->data.ptrvalue);
5164 annot = bsp->annot;
5165 prevsap = (SeqAnnotPtr PNTR) &(bsp->annot);
5166 } else {
5167 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
5168 annot = bssp->annot;
5169 prevsap = (SeqAnnotPtr PNTR) &(bssp->annot);
5170 }
5171 sap = annot;
5172 while (sap != NULL) {
5173 nextsap = sap->next;
5174 if (sap->type == 1) {
5175 for (sfp = sap->data; sfp != NULL; sfp = sfpnext) {
5176 sfpnext = sfp->next;
5177 if (sfp->data.choice != SEQFEAT_IMP) {
5178 continue;
5179 }
5180 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5181 if (sfp->comment &&
5182 StringCmp(sfp->comment, "FeatureToBeDeleted") == 0) {
5183 sap->data = remove_feat(sap->data, sfp);
5184 }
5185 }
5186 }
5187 /* now keep empty annot if annot_descr present */
5188 if (sap->data == NULL && sap->desc == NULL) {
5189 *(prevsap) = sap->next;
5190 sap->next = NULL;
5191 SeqAnnotFree (sap);
5192 } else {
5193 prevsap = (SeqAnnotPtr PNTR) &(sap->next);
5194 }
5195 sap = nextsap;
5196 }
5197 }
5198
CleanUpTmpFeatStruct(SeqFeatArrPtr sfap)5199 static void CleanUpTmpFeatStruct(SeqFeatArrPtr sfap)
5200 {
5201 ValNodePtr tmp, tmpnext;
5202
5203 for (tmp = sfap->cds; tmp; tmp = tmpnext) {
5204 tmpnext = tmp->next;
5205 MemFree(tmp);
5206 }
5207 for (tmp = sfap->pept; tmp; tmp = tmpnext) {
5208 tmpnext = tmp->next;
5209 MemFree(tmp);
5210 }
5211 }
5212
ProtFeatOnNucToImpFeat(SeqFeatPtr sfp,Pointer userdata)5213 static void ProtFeatOnNucToImpFeat (SeqFeatPtr sfp, Pointer userdata)
5214
5215 {
5216 BioseqPtr bsp;
5217 GBQualPtr gbq, last;
5218 ImpFeatPtr ifp;
5219 CharPtr key = NULL;
5220 ProtRefPtr prp;
5221 CharPtr str = NULL;
5222 ValNodePtr vnp;
5223
5224 if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
5225 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5226 if (prp == NULL) return;
5227 if (prp->processed < 1) return;
5228 bsp = BioseqFindFromSeqLoc (sfp->location);
5229 if (bsp == NULL) return;
5230 if (ISA_aa (bsp->mol)) return;
5231 ifp = ImpFeatNew ();
5232 if (ifp == NULL) return;
5233 switch (prp->processed) {
5234 case 1:
5235 key = "preprotein";
5236 break;
5237 case 2:
5238 key = "mat_peptide";
5239 break;
5240 case 3:
5241 key = "sig_peptide";
5242 break;
5243 case 4:
5244 key = "transit_peptide";
5245 break;
5246 case 5:
5247 key = "propeptide";
5248 break;
5249 default:
5250 return;
5251 }
5252 ifp->key = StringSave (key);
5253 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
5254 str = (CharPtr) vnp->data.ptrvalue;
5255 if (StringHasNoText (str)) continue;
5256 gbq = GBQualNew ();
5257 if (gbq == NULL) continue;
5258 gbq->qual = StringSave ("product");
5259 gbq->val = StringSave (str);
5260 if (sfp->qual == NULL) {
5261 sfp->qual = gbq;
5262 } else {
5263 last = sfp->qual;
5264 while (last->next != NULL) {
5265 last = last->next;
5266 }
5267 last->next = gbq;
5268 }
5269 }
5270 if (StringDoesHaveText (prp->desc)) {
5271 gbq = GBQualNew ();
5272 if (gbq != NULL) {
5273 gbq->qual = StringSave ("prot_desc");
5274 gbq->val = StringSave (str);
5275 if (sfp->qual == NULL) {
5276 sfp->qual = gbq;
5277 } else {
5278 last = sfp->qual;
5279 while (last->next != NULL) {
5280 last = last->next;
5281 }
5282 last->next = gbq;
5283 }
5284 }
5285 }
5286 sfp->data.choice = SEQFEAT_IMP;
5287 sfp->data.value.ptrvalue = ifp;
5288 MemFree (prp);
5289 }
5290
EntryChangeImpFeatToProt(SeqEntryPtr sep)5291 void EntryChangeImpFeatToProt (SeqEntryPtr sep)
5292 {
5293
5294 SeqFeatArr sfa;
5295
5296 if (sep == NULL)
5297 return;
5298 MemSet ((Pointer) (&sfa), 0, sizeof (SeqFeatArr));
5299 VisitFeaturesInSep (sep, NULL, ProtFeatOnNucToImpFeat);
5300 SeqEntryExplore(sep, &sfa, GetCdRegionsWithPeptides);
5301 ImpFeatToProtRef(sfa);
5302 SeqEntryExplore(sep, NULL, RemovePeptideImpFeats);
5303 CleanUpTmpFeatStruct(&sfa);
5304 return;
5305 }
5306
5307 //LCOV_EXCL_START
5308 /* functions moved from Sequin */
NormalizeAuthors(AuthListPtr alp)5309 static void NormalizeAuthors (AuthListPtr alp)
5310
5311 {
5312 AuthorPtr ap;
5313 Char ch;
5314 CharPtr initials;
5315 Int2 j;
5316 Int2 k;
5317 size_t len;
5318 ValNodePtr names;
5319 NameStdPtr nsp;
5320 CharPtr periods;
5321 PersonIdPtr pid;
5322
5323 if (alp == NULL || alp->choice != 1) return;
5324 for (names = alp->names; names != NULL; names = names->next) {
5325 ap = names->data.ptrvalue;
5326 if (ap != NULL) {
5327 pid = ap->name;
5328 if (pid != NULL && pid->choice == 2) {
5329 nsp = pid->data;
5330 if (nsp != NULL && nsp->names [4] != NULL) {
5331 initials = nsp->names [4];
5332 len = MAX ((size_t) (StringLen (initials) * 2 + 4), (size_t) 64);
5333 periods = MemNew (len);
5334 if (periods == NULL) return;
5335 periods [0] = '\0';
5336 j = 0;
5337 k = 0;
5338 ch = initials [j];
5339 while (ch != '\0') {
5340 if (ch == '-') {
5341 periods [k] = ch;
5342 k++;
5343 j++;
5344 ch = initials [j];
5345 } else if (ch == '.') {
5346 j++;
5347 ch = initials [j];
5348 } else if (ch == ' ') {
5349 j++;
5350 ch = initials [j];
5351 } else {
5352 periods [k] = ch;
5353 k++;
5354 j++;
5355 ch = initials [j];
5356 periods [k] = '.';
5357 k++;
5358 }
5359 }
5360 periods [k] = '\0';
5361 nsp->names [4] = MemFree (nsp->names [4]);
5362 nsp->names [4] = StringSave (periods);
5363 MemFree (periods);
5364 }
5365 }
5366 }
5367 }
5368 }
5369
NormalizeAPub(ValNodePtr vnp)5370 static void NormalizeAPub (ValNodePtr vnp)
5371
5372 {
5373 AuthListPtr alp;
5374 CitArtPtr cap;
5375 CitBookPtr cbp;
5376 CitGenPtr cgp;
5377 CitPatPtr cpp;
5378 CitSubPtr csp;
5379 ImprintPtr imp;
5380
5381 if (vnp == NULL) return;
5382 if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return;
5383 if (vnp->data.ptrvalue == NULL) return;
5384 switch (vnp->choice) {
5385 case PUB_Gen :
5386 cgp = (CitGenPtr) vnp->data.ptrvalue;
5387 NormalizeAuthors (cgp->authors);
5388 break;
5389 case PUB_Sub :
5390 csp = (CitSubPtr) vnp->data.ptrvalue;
5391 NormalizeAuthors (csp->authors);
5392 alp = csp->authors;
5393 imp = csp->imp;
5394 if (alp != NULL && alp->affil == NULL && imp != NULL && imp->pub != NULL) {
5395 alp->affil = imp->pub;
5396 imp->pub = NULL;
5397 }
5398 if (csp->date == NULL && imp != NULL && imp->date != NULL) {
5399 csp->date = imp->date;
5400 imp->date = NULL;
5401 }
5402 if (imp != NULL && imp->pub == NULL) {
5403 csp->imp = ImprintFree (csp->imp);
5404 }
5405 break;
5406 case PUB_Article :
5407 cap = (CitArtPtr) vnp->data.ptrvalue;
5408 NormalizeAuthors (cap->authors);
5409 break;
5410 case PUB_Book :
5411 cbp = (CitBookPtr) vnp->data.ptrvalue;
5412 NormalizeAuthors (cbp->authors);
5413 break;
5414 case PUB_Man :
5415 cbp = (CitBookPtr) vnp->data.ptrvalue;
5416 if (cbp->othertype == 2 && cbp->let_type == 3) {
5417 NormalizeAuthors (cbp->authors);
5418 }
5419 break;
5420 case PUB_Patent :
5421 cpp = (CitPatPtr) vnp->data.ptrvalue;
5422 NormalizeAuthors (cpp->authors);
5423 NormalizeAuthors (cpp->applicants);
5424 NormalizeAuthors (cpp->assignees);
5425 break;
5426 default :
5427 break;
5428 }
5429 }
5430
NormalizePeriods(GatherContextPtr gcp)5431 static Boolean NormalizePeriods (GatherContextPtr gcp)
5432
5433 {
5434 PubdescPtr pdp;
5435 ValNodePtr sdp;
5436 SeqFeatPtr sfp;
5437 ValNodePtr vnp;
5438
5439 if (gcp == NULL) return TRUE;
5440 pdp = NULL;
5441 if (gcp->thistype == OBJ_SEQFEAT) {
5442 sfp = (SeqFeatPtr) gcp->thisitem;
5443 if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
5444 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5445 }
5446 } else if (gcp->thistype == OBJ_SEQDESC) {
5447 sdp = (ValNodePtr) gcp->thisitem;
5448 if (sdp != NULL && sdp->choice == Seq_descr_pub) {
5449 pdp = (PubdescPtr) sdp->data.ptrvalue;
5450 }
5451 }
5452 if (pdp == NULL) return TRUE;
5453 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5454 NormalizeAPub (vnp);
5455 }
5456 return TRUE;
5457 }
5458
NormalizePeriodsOnInitials(SeqEntryPtr sep)5459 void NormalizePeriodsOnInitials (SeqEntryPtr sep)
5460
5461 {
5462 GatherScope gs;
5463
5464 if (sep == NULL) return;
5465 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5466 gs.seglevels = 1;
5467 gs.get_feats_location = FALSE;
5468 MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5469 gs.ignore[OBJ_BIOSEQ] = FALSE;
5470 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5471 gs.ignore[OBJ_SEQFEAT] = FALSE;
5472 gs.ignore[OBJ_SEQANNOT] = FALSE;
5473 gs.ignore[OBJ_SEQDESC] = FALSE;
5474 GatherSeqEntry (sep, NULL, NormalizePeriods, &gs);
5475 }
5476
NormalizeRnas(GatherContextPtr gcp)5477 static Boolean NormalizeRnas (GatherContextPtr gcp)
5478
5479 {
5480 GBQualPtr gbqual;
5481 GBQualPtr nextqual;
5482 GBQualPtr PNTR prevqual;
5483 RnaRefPtr rrp;
5484 SeqFeatPtr sfp;
5485 CharPtr str;
5486
5487 if (gcp == NULL) return TRUE;
5488 if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5489 sfp = (SeqFeatPtr) gcp->thisitem;
5490 if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return TRUE;
5491 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
5492 if (rrp == NULL) return TRUE;
5493 if (rrp->type == 0) {
5494 rrp->type = 255;
5495 }
5496 if (rrp->ext.choice != 0 && rrp->ext.choice != 1) return TRUE;
5497 if (! TASNStringHasNoText (rrp->ext.value.ptrvalue)) return TRUE;
5498 str = NULL;
5499 gbqual = sfp->qual;
5500 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5501 while (gbqual != NULL) {
5502 nextqual = gbqual->next;
5503 if (StringICmp (gbqual->qual, "product") == 0) {
5504 str = StringSave (gbqual->val);
5505 *(prevqual) = gbqual->next;
5506 gbqual->next = NULL;
5507 gbqual->qual = MemFree (gbqual->qual);
5508 gbqual->val = MemFree (gbqual->val);
5509 GBQualFree (gbqual);
5510 } else {
5511 prevqual = (GBQualPtr PNTR) &(gbqual->next);
5512 }
5513 gbqual = nextqual;
5514 }
5515 if (str == NULL) {
5516 gbqual = sfp->qual;
5517 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5518 while (gbqual != NULL) {
5519 nextqual = gbqual->next;
5520 if (StringICmp (gbqual->qual, "standard_name") == 0) {
5521 str = StringSave (gbqual->val);
5522 *(prevqual) = gbqual->next;
5523 gbqual->next = NULL;
5524 gbqual->qual = MemFree (gbqual->qual);
5525 gbqual->val = MemFree (gbqual->val);
5526 GBQualFree (gbqual);
5527 } else {
5528 prevqual = (GBQualPtr PNTR) &(gbqual->next);
5529 }
5530 gbqual = nextqual;
5531 }
5532 }
5533 if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
5534 rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
5535 }
5536 if (rrp->ext.choice == 0 || rrp->ext.choice == 1) {
5537 rrp->ext.choice = 1;
5538 rrp->ext.value.ptrvalue = str;
5539 str = NULL;
5540 }
5541 MemFree (str);
5542 return TRUE;
5543 }
5544
MoveRnaGBQualProductToName(SeqEntryPtr sep)5545 void MoveRnaGBQualProductToName (SeqEntryPtr sep)
5546
5547 {
5548 GatherScope gs;
5549
5550 if (sep == NULL) return;
5551 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5552 gs.seglevels = 1;
5553 gs.get_feats_location = FALSE;
5554 MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5555 gs.ignore[OBJ_BIOSEQ] = FALSE;
5556 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5557 gs.ignore[OBJ_SEQFEAT] = FALSE;
5558 gs.ignore[OBJ_SEQANNOT] = FALSE;
5559 GatherSeqEntry (sep, NULL, NormalizeRnas, &gs);
5560 }
5561
NormalizeProts(GatherContextPtr gcp)5562 static Boolean NormalizeProts (GatherContextPtr gcp)
5563
5564 {
5565 GBQualPtr gbqual;
5566 GBQualPtr nextqual;
5567 GBQualPtr PNTR prevqual;
5568 ProtRefPtr prp;
5569 SeqFeatPtr sfp;
5570 CharPtr str;
5571 ValNodePtr vnp;
5572
5573 if (gcp == NULL) return TRUE;
5574 if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5575 sfp = (SeqFeatPtr) gcp->thisitem;
5576 if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return TRUE;
5577 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5578 if (prp == NULL) return TRUE;
5579 vnp = prp->name;
5580 if (vnp == NULL || TASNStringHasNoText (vnp->data.ptrvalue)) {
5581 str = NULL;
5582 gbqual = sfp->qual;
5583 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5584 while (gbqual != NULL) {
5585 nextqual = gbqual->next;
5586 if (StringICmp (gbqual->qual, "product") == 0) {
5587 str = StringSave (gbqual->val);
5588 *(prevqual) = gbqual->next;
5589 gbqual->next = NULL;
5590 gbqual->qual = MemFree (gbqual->qual);
5591 gbqual->val = MemFree (gbqual->val);
5592 GBQualFree (gbqual);
5593 } else {
5594 prevqual = (GBQualPtr PNTR) &(gbqual->next);
5595 }
5596 gbqual = nextqual;
5597 }
5598 if (vnp == NULL) {
5599 vnp = ValNodeNew (NULL);
5600 prp->name = vnp;
5601 }
5602 vnp = prp->name;
5603 if (vnp != NULL) {
5604 vnp->data.ptrvalue = str;
5605 str = NULL;
5606 }
5607 MemFree (str);
5608 }
5609 vnp = prp->name;
5610 if (vnp == NULL || TASNStringHasNoText (vnp->data.ptrvalue)) return TRUE;
5611 if (prp->desc == NULL) return TRUE;
5612 if (StringICmp (vnp->data.ptrvalue, prp->desc) == 0) {
5613 prp->desc = MemFree (prp->desc);
5614 }
5615 return TRUE;
5616 }
5617
MoveProtGBQualProductToName(SeqEntryPtr sep)5618 void MoveProtGBQualProductToName (SeqEntryPtr sep)
5619
5620 {
5621 GatherScope gs;
5622
5623 if (sep == NULL) return;
5624 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5625 gs.seglevels = 1;
5626 gs.get_feats_location = FALSE;
5627 MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5628 gs.ignore[OBJ_BIOSEQ] = FALSE;
5629 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5630 gs.ignore[OBJ_SEQFEAT] = FALSE;
5631 gs.ignore[OBJ_SEQANNOT] = FALSE;
5632 GatherSeqEntry (sep, NULL, NormalizeProts, &gs);
5633 }
5634
NormalizeCds(GatherContextPtr gcp)5635 static Boolean NormalizeCds (GatherContextPtr gcp)
5636
5637 {
5638 BioseqContextPtr bcp;
5639 BioseqPtr bsp;
5640 GBQualPtr gbqual;
5641 GBQualPtr nextqual;
5642 GBQualPtr PNTR prevqual;
5643 ProtRefPtr prp;
5644 SeqEntryPtr sep;
5645 SeqFeatPtr sfp;
5646 SeqFeatPtr sfp2;
5647 CharPtr str;
5648 ValNodePtr vnp;
5649
5650 if (gcp == NULL) return TRUE;
5651 if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5652 sfp = (SeqFeatPtr) gcp->thisitem;
5653 if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return TRUE;
5654 bsp = BioseqFind (SeqLocId (sfp->product));
5655 if (bsp == NULL) return TRUE;
5656 str = NULL;
5657 gbqual = sfp->qual;
5658 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5659 while (gbqual != NULL) {
5660 nextqual = gbqual->next;
5661 if (StringICmp (gbqual->qual, "product") == 0) {
5662 str = StringSave (gbqual->val);
5663 *(prevqual) = gbqual->next;
5664 gbqual->next = NULL;
5665 gbqual->qual = MemFree (gbqual->qual);
5666 gbqual->val = MemFree (gbqual->val);
5667 GBQualFree (gbqual);
5668 } else {
5669 prevqual = (GBQualPtr PNTR) &(gbqual->next);
5670 }
5671 gbqual = nextqual;
5672 }
5673 if (str == NULL) return TRUE;
5674
5675 sfp2 = NULL;
5676 bcp = BioseqContextNew (bsp);
5677 sfp2 = BioseqContextGetSeqFeat (bcp, SEQFEAT_PROT, NULL, NULL, 0);
5678 BioseqContextFree (bcp);
5679 if (sfp2 == NULL) {
5680 prp = CreateNewProtRef (str, NULL, NULL, NULL);
5681 if (prp != NULL) {
5682 sep = SeqMgrGetSeqEntryForData (bsp);
5683 if (sep != NULL) {
5684 sfp = CreateNewFeature (sep, NULL, SEQFEAT_PROT, NULL);
5685 if (sfp != NULL) {
5686 sfp->data.value.ptrvalue = (Pointer) prp;
5687 }
5688 }
5689 }
5690 return TRUE;
5691 }
5692
5693 prp = (ProtRefPtr) sfp2->data.value.ptrvalue;
5694 if (prp == NULL) return TRUE;
5695 vnp = prp->name;
5696 if (vnp != NULL && (! TASNStringHasNoText (vnp->data.ptrvalue))) return TRUE;
5697 if (vnp == NULL) {
5698 vnp = ValNodeNew (NULL);
5699 prp->name = vnp;
5700 }
5701 vnp = prp->name;
5702 if (vnp != NULL) {
5703 vnp->data.ptrvalue = str;
5704 str = NULL;
5705 }
5706 MemFree (str);
5707 return TRUE;
5708 }
5709
MoveCdsGBQualProductToName(SeqEntryPtr sep)5710 void MoveCdsGBQualProductToName (SeqEntryPtr sep)
5711
5712 {
5713 GatherScope gs;
5714
5715 if (sep == NULL) return;
5716 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5717 gs.seglevels = 1;
5718 gs.get_feats_location = FALSE;
5719 MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5720 gs.ignore[OBJ_BIOSEQ] = FALSE;
5721 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5722 gs.ignore[OBJ_SEQFEAT] = FALSE;
5723 gs.ignore[OBJ_SEQANNOT] = FALSE;
5724 GatherSeqEntry (sep, NULL, NormalizeCds, &gs);
5725 }
5726
NormalizeFeatGBQuals(GatherContextPtr gcp)5727 static Boolean NormalizeFeatGBQuals (GatherContextPtr gcp)
5728
5729 {
5730 GBQualPtr gbqual;
5731 size_t len;
5732 GBQualPtr nextqual;
5733 GBQualPtr PNTR prevqual;
5734 SeqFeatPtr sfp;
5735 CharPtr str;
5736
5737 if (gcp == NULL) return TRUE;
5738 if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5739 sfp = (SeqFeatPtr) gcp->thisitem;
5740 if (sfp == NULL) return TRUE;
5741 gbqual = sfp->qual;
5742 prevqual = (GBQualPtr PNTR) &(sfp->qual);
5743 while (gbqual != NULL) {
5744 nextqual = gbqual->next;
5745 if (StringICmp (gbqual->qual, "partial") == 0) {
5746 *(prevqual) = gbqual->next;
5747 gbqual->next = NULL;
5748 gbqual->qual = MemFree (gbqual->qual);
5749 gbqual->val = MemFree (gbqual->val);
5750 GBQualFree (gbqual);
5751 sfp->partial = TRUE;
5752 } else if (StringICmp (gbqual->qual, "evidence") == 0) {
5753 if (StringICmp (gbqual->val, "experimental") == 0) {
5754 sfp->exp_ev = 1;
5755 } else if (StringICmp (gbqual->val, "not_experimental") == 0) {
5756 sfp->exp_ev = 2;
5757 }
5758 *(prevqual) = gbqual->next;
5759 gbqual->next = NULL;
5760 gbqual->qual = MemFree (gbqual->qual);
5761 gbqual->val = MemFree (gbqual->val);
5762 GBQualFree (gbqual);
5763 } else if (StringICmp (gbqual->qual, "exception") == 0) {
5764 sfp->excpt = TRUE;
5765 } else if (StringICmp (gbqual->qual, "note") == 0) {
5766 *(prevqual) = gbqual->next;
5767 gbqual->next = NULL;
5768 if (sfp->comment == NULL) {
5769 sfp->comment = gbqual->val;
5770 } else {
5771 len = StringLen (sfp->comment) + StringLen (gbqual->val) + 5;
5772 str = MemNew (sizeof (Char) * len);
5773 StringCpy (str, sfp->comment);
5774 StringCat (str, "; ");
5775 StringCat (str, gbqual->val);
5776 sfp->comment = MemFree (sfp->comment);
5777 gbqual->val = MemFree (gbqual->val);
5778 sfp->comment = str;
5779 }
5780 gbqual->val = NULL;
5781 GBQualFree (gbqual);
5782 } else {
5783 prevqual = (GBQualPtr PNTR) &(gbqual->next);
5784 }
5785 gbqual = nextqual;
5786 }
5787 return TRUE;
5788 }
5789
MoveFeatGBQualsToFields(SeqEntryPtr sep)5790 void MoveFeatGBQualsToFields (SeqEntryPtr sep)
5791
5792 {
5793 GatherScope gs;
5794
5795 if (sep == NULL) return;
5796 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5797 gs.seglevels = 1;
5798 gs.get_feats_location = FALSE;
5799 MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5800 gs.ignore[OBJ_BIOSEQ] = FALSE;
5801 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5802 gs.ignore[OBJ_SEQFEAT] = FALSE;
5803 gs.ignore[OBJ_SEQANNOT] = FALSE;
5804 GatherSeqEntry (sep, NULL, NormalizeFeatGBQuals, &gs);
5805 }
5806
StripTitleFromProteinProducts(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)5807 static void StripTitleFromProteinProducts (SeqEntryPtr sep, Pointer mydata,
5808 Int4 index, Int2 indent)
5809
5810 {
5811 BioseqPtr bsp;
5812 SeqIdPtr sip;
5813 ValNodePtr vnp;
5814
5815 if (sep == NULL) return;
5816 if (! IS_Bioseq (sep)) return;
5817 bsp = (BioseqPtr) sep->data.ptrvalue;
5818 if (bsp == NULL) return;
5819 if (! ISA_aa (bsp->mol)) return;
5820 for (sip = bsp->id; sip != NULL; sip = sip->next) {
5821 if (sip->choice == SEQID_OTHER) return;
5822 }
5823 vnp = ValNodeExtract (&(bsp->descr), Seq_descr_title);
5824 if (vnp == NULL) return;
5825 ValNodeFreeData (vnp);
5826 }
5827
StripTitleFromProtsInNucProts(SeqEntryPtr sep)5828 void StripTitleFromProtsInNucProts (SeqEntryPtr sep)
5829
5830 {
5831 BioseqSetPtr bssp;
5832
5833 if (sep == NULL) return;
5834 if (! IS_Bioseq_set (sep)) return;
5835 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5836 if (bssp == NULL) return;
5837 if (bssp->_class == 7 ||
5838 (bssp->_class >= 13 && bssp->_class <= 16) ||
5839 bssp->_class == BioseqseqSet_class_wgs_set ||
5840 bssp->_class == BioseqseqSet_class_small_genome_set) {
5841 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5842 StripTitleFromProtsInNucProts (sep);
5843 }
5844 return;
5845 }
5846 if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
5847 SeqEntryExplore (sep, NULL, StripTitleFromProteinProducts);
5848 }
5849 //LCOV_EXCL_STOP
5850
5851
CleanFeatStrings(SeqFeatPtr sfp)5852 static void CleanFeatStrings (SeqFeatPtr sfp)
5853
5854 {
5855 BioSourcePtr biop;
5856 GeneRefPtr grp;
5857 ImpFeatPtr ifp;
5858 Boolean noSfpDataPtrValue;
5859 OrgNamePtr onp;
5860 OrgRefPtr orp;
5861 PubdescPtr pdp;
5862 ProtRefPtr prp;
5863 RnaRefPtr rrp;
5864
5865 if (sfp == NULL) return;
5866 CleanVisString (&sfp->comment);
5867 CleanVisString (&sfp->title);
5868 noSfpDataPtrValue = FALSE;
5869 switch (sfp->data.choice) {
5870 case SEQFEAT_BOND :
5871 case SEQFEAT_SITE :
5872 case SEQFEAT_PSEC_STR :
5873 case SEQFEAT_COMMENT:
5874 noSfpDataPtrValue = TRUE;
5875 break;
5876 default :
5877 break;
5878 }
5879 if (noSfpDataPtrValue) return;
5880 if (sfp->data.value.ptrvalue == NULL) return;
5881 orp = NULL;
5882 switch (sfp->data.choice) {
5883 case SEQFEAT_GENE :
5884 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5885 CleanVisString (&(grp->locus));
5886 CleanVisString (&(grp->allele));
5887 CleanVisString (&(grp->desc));
5888 CleanVisString (&(grp->maploc));
5889 CleanVisString (&(grp->locus_tag));
5890 CleanVisStringList (&(grp->syn));
5891 break;
5892 case SEQFEAT_ORG :
5893 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
5894 break;
5895 case SEQFEAT_CDREGION :
5896 break;
5897 case SEQFEAT_PROT :
5898 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5899 CleanVisString (&(prp->desc));
5900 CleanVisStringList (&(prp->name));
5901 CleanVisStringList (&(prp->ec));
5902 CleanVisStringList (&(prp->activity));
5903 break;
5904 case SEQFEAT_RNA :
5905 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
5906 if (rrp->ext.choice == 1) {
5907 CleanVisString ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
5908 if (rrp->ext.value.ptrvalue == NULL) {
5909 rrp->ext.choice = 0;
5910 }
5911 }
5912 break;
5913 case SEQFEAT_PUB :
5914 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5915 CleanVisString (&(pdp->comment));
5916 break;
5917 case SEQFEAT_SEQ :
5918 break;
5919 case SEQFEAT_IMP :
5920 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5921 CleanVisString (&(ifp->key));
5922 CleanVisString (&(ifp->loc));
5923 CleanVisString (&(ifp->descr));
5924 break;
5925 case SEQFEAT_REGION :
5926 CleanVisString ((CharPtr PNTR) &(sfp->data.value.ptrvalue));
5927 if (sfp->data.value.ptrvalue == NULL) {
5928 sfp->data.choice = SEQFEAT_COMMENT;
5929 }
5930 break;
5931 case SEQFEAT_COMMENT :
5932 break;
5933 case SEQFEAT_BOND :
5934 break;
5935 case SEQFEAT_SITE :
5936 break;
5937 case SEQFEAT_RSITE :
5938 break;
5939 case SEQFEAT_USER :
5940 break;
5941 case SEQFEAT_TXINIT :
5942 break;
5943 case SEQFEAT_NUM :
5944 break;
5945 case SEQFEAT_PSEC_STR :
5946 break;
5947 case SEQFEAT_NON_STD_RESIDUE :
5948 break;
5949 case SEQFEAT_HET :
5950 break;
5951 case SEQFEAT_BIOSRC :
5952 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
5953 orp = biop->org;
5954 CleanSubSourceList (&(biop->subtype), biop->genome);
5955 break;
5956 default :
5957 break;
5958 }
5959 if (orp != NULL) {
5960 CleanVisString (&(orp->taxname));
5961 CleanVisString (&(orp->common));
5962 CleanVisStringList (&(orp->mod));
5963 CleanVisStringList (&(orp->syn));
5964 onp = orp->orgname;
5965 while (onp != NULL) {
5966 CleanVisString (&(onp->attrib));
5967 CleanVisString (&(onp->lineage));
5968 CleanVisString (&(onp->div));
5969 CleanOrgModList (&(onp->mod));
5970 onp = onp->next;
5971 }
5972 }
5973 }
5974
OnlyPunctuation(CharPtr str)5975 static Boolean OnlyPunctuation (CharPtr str)
5976
5977 {
5978 Uchar ch; /* to use 8bit characters in multibyte languages */
5979
5980 if (str != NULL) {
5981 ch = *str;
5982 while (ch != '\0') {
5983 if (ch > ' ' && ch != '.' && ch != ',' && ch != '~' && ch != ';') {
5984 return FALSE;
5985 }
5986 str++;
5987 ch = *str;
5988 }
5989 }
5990 return TRUE;
5991 }
5992
CleanDescStrings(ValNodePtr sdp)5993 static void CleanDescStrings (ValNodePtr sdp)
5994
5995 {
5996 BioSourcePtr biop;
5997 GBBlockPtr gbp;
5998 Boolean noSdpDataPtrValue;
5999 OrgNamePtr onp;
6000 OrgRefPtr orp;
6001 PubdescPtr pdp;
6002
6003 if (sdp == NULL) return;
6004 noSdpDataPtrValue = FALSE;
6005 switch (sdp->choice) {
6006 case Seq_descr_mol_type :
6007 case Seq_descr_method :
6008 noSdpDataPtrValue = TRUE;
6009 break;
6010 default :
6011 break;
6012 }
6013 if (noSdpDataPtrValue) return;
6014 if (sdp->data.ptrvalue == NULL) return;
6015 orp = NULL;
6016 switch (sdp->choice) {
6017 case Seq_descr_mol_type :
6018 break;
6019 case Seq_descr_modif :
6020 break;
6021 case Seq_descr_method :
6022 break;
6023 case Seq_descr_name :
6024 CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
6025 break;
6026 case Seq_descr_title :
6027 CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
6028 break;
6029 case Seq_descr_org :
6030 orp = (OrgRefPtr) sdp->data.ptrvalue;
6031 break;
6032 case Seq_descr_comment :
6033 CleanVisStringJunk ((CharPtr PNTR) &sdp->data.ptrvalue);
6034 if (OnlyPunctuation ((CharPtr) sdp->data.ptrvalue)) {
6035 sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
6036 }
6037 break;
6038 case Seq_descr_num :
6039 break;
6040 case Seq_descr_maploc :
6041 break;
6042 case Seq_descr_pir :
6043 break;
6044 case Seq_descr_genbank :
6045 gbp = (GBBlockPtr) sdp->data.ptrvalue;
6046 CleanVisStringList (&(gbp->extra_accessions));
6047 CleanVisStringList (&(gbp->keywords));
6048 CleanVisString (&(gbp->source));
6049 CleanVisString (&(gbp->origin));
6050 CleanVisString (&(gbp->date));
6051 CleanVisString (&(gbp->div));
6052 CleanVisString (&(gbp->taxonomy));
6053 break;
6054 case Seq_descr_pub :
6055 pdp = (PubdescPtr) sdp->data.ptrvalue;
6056 CleanVisString (&(pdp->comment));
6057 break;
6058 case Seq_descr_region :
6059 CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
6060 break;
6061 case Seq_descr_user :
6062 break;
6063 case Seq_descr_sp :
6064 break;
6065 case Seq_descr_dbxref :
6066 break;
6067 case Seq_descr_embl :
6068 break;
6069 case Seq_descr_create_date :
6070 break;
6071 case Seq_descr_update_date :
6072 break;
6073 case Seq_descr_prf :
6074 break;
6075 case Seq_descr_pdb :
6076 break;
6077 case Seq_descr_het :
6078 break;
6079 case Seq_descr_source :
6080 biop = (BioSourcePtr) sdp->data.ptrvalue;
6081 orp = biop->org;
6082 CleanSubSourceList (&(biop->subtype), biop->genome);
6083 break;
6084 case Seq_descr_molinfo :
6085 break;
6086 default :
6087 break;
6088 }
6089 if (orp != NULL) {
6090 CleanVisString (&(orp->taxname));
6091 CleanVisString (&(orp->common));
6092 CleanVisStringList (&(orp->mod));
6093 CleanVisStringList (&(orp->syn));
6094 onp = orp->orgname;
6095 while (onp != NULL) {
6096 CleanVisString (&(onp->attrib));
6097 CleanVisString (&(onp->lineage));
6098 CleanVisString (&(onp->div));
6099 CleanOrgModList (&(onp->mod));
6100 onp = onp->next;
6101 }
6102 }
6103 }
6104
GetRidOfEmptyFeatsDescCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6105 void GetRidOfEmptyFeatsDescCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6106
6107 {
6108 BioseqPtr bsp;
6109 BioseqSetPtr bssp;
6110 SeqAnnotPtr nextsap;
6111 SeqDescrPtr nextsdp;
6112 SeqFeatPtr nextsfp;
6113 Pointer PNTR prevsap;
6114 Pointer PNTR prevsdp;
6115 Pointer PNTR prevsfp;
6116 SeqAnnotPtr sap;
6117 SeqDescrPtr sdp;
6118 SeqFeatPtr sfp;
6119
6120 if (sep == NULL || sep->data.ptrvalue == NULL) return;
6121 sap = NULL;
6122 sdp = NULL;
6123 if (IS_Bioseq (sep)) {
6124 bsp = (BioseqPtr) sep->data.ptrvalue;
6125 sap = bsp->annot;
6126 prevsap = (Pointer PNTR) &(bsp->annot);
6127 sdp = bsp->descr;
6128 prevsdp = (Pointer PNTR) &(bsp->descr);
6129 } else if (IS_Bioseq_set (sep)) {
6130 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6131 sap = bssp->annot;
6132 prevsap = (Pointer PNTR) &(bssp->annot);
6133 sdp = bssp->descr;
6134 prevsdp = (Pointer PNTR) &(bssp->descr);
6135 } else return;
6136 while (sap != NULL) {
6137 nextsap = sap->next;
6138 if (sap->type == 1 && sap->data != NULL) {
6139 sfp = (SeqFeatPtr) sap->data;
6140 prevsfp = (Pointer PNTR) &(sap->data);
6141 while (sfp != NULL) {
6142 nextsfp = sfp->next;
6143 CleanFeatStrings (sfp);
6144 if (sfp->data.choice != SEQFEAT_BOND &&
6145 sfp->data.choice != SEQFEAT_SITE &&
6146 sfp->data.choice != SEQFEAT_PSEC_STR &&
6147 sfp->data.choice != SEQFEAT_COMMENT &&
6148 sfp->data.value.ptrvalue == NULL) {
6149 *(prevsfp) = sfp->next;
6150 sfp->next = NULL;
6151 SeqFeatFree (sfp);
6152 } else {
6153 prevsfp = (Pointer PNTR) &(sfp->next);
6154 }
6155 sfp = nextsfp;
6156 }
6157 }
6158 /* now keep empty annot if annot_descr present */
6159 if (sap->data == NULL && sap->desc == NULL) {
6160 *(prevsap) = sap->next;
6161 sap->next = NULL;
6162 SeqAnnotFree (sap);
6163 } else {
6164 prevsap = (Pointer PNTR) &(sap->next);
6165 }
6166 sap = nextsap;
6167 }
6168 while (sdp != NULL) {
6169 nextsdp = sdp->next;
6170 CleanDescStrings (sdp);
6171 if (sdp->choice != Seq_descr_mol_type &&
6172 sdp->choice != Seq_descr_method &&
6173 sdp->data.ptrvalue == NULL) {
6174 *(prevsdp) = sdp->next;
6175 sdp->next = NULL;
6176 SeqDescrFree (sdp);
6177 } else {
6178 prevsdp = (Pointer PNTR) &(sdp->next);
6179 }
6180 sdp = nextsdp;
6181 }
6182 }
6183
6184 /* move_cds from Serge Bazhin, modified by Kans */
6185
6186 typedef struct bool_bioseq_set {
6187 Uint2 found;
6188 BioseqSetPtr bssp;
6189 Boolean doPseudo;
6190 } BoolBioseqSet, PNTR BoolBioseqSetPtr;
6191
6192 /**********************************************************/
put_cds_on_nps(BioseqSetPtr bssp,SeqFeatPtr sfp)6193 static void put_cds_on_nps (BioseqSetPtr bssp, SeqFeatPtr sfp)
6194
6195 {
6196 SeqFeatPtr prev;
6197 SeqAnnotPtr sap;
6198
6199 if (bssp == NULL || sfp == NULL) return;
6200 sap = bssp->annot;
6201 while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
6202 sap = sap->next;
6203 }
6204 if (sap == NULL) {
6205 sap = SeqAnnotNew ();
6206 if (sap != NULL) {
6207 sap->type = 1;
6208 sap->next = bssp->annot;
6209 bssp->annot = sap;
6210 }
6211 }
6212 sap = bssp->annot;
6213 if (sap == NULL) return;
6214 if (sap->data != NULL) {
6215 prev = sap->data;
6216 while (prev->next != NULL) {
6217 prev = prev->next;
6218 }
6219 prev->next = sfp;
6220 } else {
6221 sap->data = (Pointer) sfp;
6222 }
6223 }
6224
6225 /**********************************************************
6226 *
6227 * void move_cds_within_nucprot(sep, bbsp)
6228 *
6229 * Runs through nuc-prot Bioseq-set components, looks for cdregions
6230 * its Seq-entries, and moves their pointers to nuc-prot
6231 * Bioseq-set.
6232 *
6233 **********************************************************/
move_cds_within_nucprot(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6234 static void move_cds_within_nucprot(SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6235
6236 {
6237 BoolBioseqSetPtr bbsp;
6238 BioseqPtr bsp;
6239 BioseqSetPtr bssp;
6240 SeqAnnotPtr nextsap;
6241 SeqFeatPtr nextsfp;
6242 Pointer PNTR prevsap;
6243 Pointer PNTR prevsfp;
6244 SeqAnnotPtr sap;
6245 SeqFeatPtr sfp;
6246
6247 if (sep == NULL || sep->data.ptrvalue == NULL) return;
6248 bbsp = (BoolBioseqSetPtr) mydata;
6249 if (bbsp == NULL) return;
6250 if (IS_Bioseq (sep)) {
6251 bsp = (BioseqPtr) sep->data.ptrvalue;
6252 sap = bsp->annot;
6253 prevsap = (Pointer PNTR) &(bsp->annot);
6254 } else if (IS_Bioseq_set (sep)) {
6255 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6256 sap = bssp->annot;
6257 prevsap = (Pointer PNTR) &(bssp->annot);
6258 } else return;
6259 while (sap != NULL) {
6260 nextsap = sap->next;
6261 if (sap->type == 1) {
6262 sfp = (SeqFeatPtr) sap->data;
6263 prevsfp = (Pointer PNTR) &(sap->data);
6264 while (sfp != NULL) {
6265 nextsfp = sfp->next;
6266 if (sfp->data.choice == SEQFEAT_CDREGION && (! sfp->pseudo) &&
6267 (sfp->product != NULL || SeqLocLen (sfp->location) >= 6)) {
6268 *(prevsfp) = sfp->next;
6269 sfp->next = NULL;
6270 bbsp->found++;
6271 /* ErrPostEx(SEV_WARNING, 0, 0, "Moving cdregion from na Bioseq.annot to Bioseq-set.annot."); */
6272 put_cds_on_nps (bbsp->bssp, sfp);
6273 } else {
6274 prevsfp = (Pointer PNTR) &(sfp->next);
6275 }
6276 sfp = nextsfp;
6277 }
6278 }
6279 /* now keep empty annot if annot_descr present */
6280 if (sap->data == NULL && sap->desc == NULL) {
6281 *(prevsap) = sap->next;
6282 sap->next = NULL;
6283 SeqAnnotFree (sap);
6284 } else {
6285 prevsap = (Pointer PNTR) &(sap->next);
6286 }
6287 sap = nextsap;
6288 }
6289 }
6290
6291 /**********************************************************
6292 *
6293 * Uint2 move_cds(sep)
6294 *
6295 * Moves cdregion features to nuc-prot set level
6296 *
6297 **********************************************************/
move_cds_ex(SeqEntryPtr sep,Boolean doPseudo)6298 Uint2 move_cds_ex (SeqEntryPtr sep, Boolean doPseudo)
6299 {
6300 BioseqSetPtr bssp;
6301 Uint2 found;
6302 BoolBioseqSet bbsp;
6303
6304 if (sep == NULL) return 0;
6305 if (! IS_Bioseq_set (sep)) return 0;
6306 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6307 if (bssp == NULL) return 0;
6308 if (bssp->_class == BioseqseqSet_class_genbank ||
6309 (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
6310 bssp->_class == BioseqseqSet_class_gen_prod_set ||
6311 bssp->_class == BioseqseqSet_class_wgs_set ||
6312 bssp->_class == BioseqseqSet_class_small_genome_set) {
6313 found = 0;
6314 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
6315 found += move_cds (sep);
6316 }
6317 return found;
6318 }
6319 if (bssp->_class != 1) return 0;
6320 bbsp.found = 0;
6321 bbsp.bssp = bssp;
6322 bbsp.doPseudo = doPseudo;
6323 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
6324 SeqEntryExplore (sep, (Pointer) &bbsp, move_cds_within_nucprot);
6325 }
6326 return(bbsp.found);
6327 }
6328
move_cds(SeqEntryPtr sep)6329 Uint2 move_cds(SeqEntryPtr sep)
6330 {
6331 return move_cds_ex (sep, TRUE);
6332 }
6333
MoveDbxrefs(GatherContextPtr gcp)6334 static Boolean MoveDbxrefs (GatherContextPtr gcp)
6335
6336 {
6337 GBQualPtr qual;
6338 GBQualPtr nextqual;
6339 SeqFeatPtr sfp;
6340 DbtagPtr db;
6341 ObjectIdPtr oip;
6342 ValNodePtr vnp;
6343 CharPtr tag, value, p;
6344
6345 if (gcp == NULL) return TRUE;
6346 if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
6347 sfp = (SeqFeatPtr) gcp->thisitem;
6348 for (qual=sfp->qual; qual; qual = nextqual) {
6349 nextqual = qual->next;
6350 if (StringICmp (qual->qual, "db_xref") == 0) {
6351 vnp = ValNodeNew(NULL);
6352 db = DbtagNew();
6353 vnp->data.ptrvalue = db;
6354 tag = qual->val;
6355 if ((p = StrChr(tag, ':')) != NULL) {
6356 value = p+1;
6357 *p = '\0';
6358 db->db = StringSave (tag);
6359 oip = ObjectIdNew();
6360 oip->str = StringSave (value);
6361 db->tag = oip;
6362 } else {
6363 db->db = StringSave ("?");
6364 oip = ObjectIdNew();
6365 oip->str = StringSave (tag);
6366 db->tag = oip;
6367 }
6368 sfp->dbxref = tie_next(sfp->dbxref, vnp);
6369 sfp->qual = remove_qual(sfp->qual, qual);
6370 }
6371 }
6372 return TRUE;
6373 }
6374
SeqEntryMoveDbxrefs(SeqEntryPtr sep)6375 Boolean SeqEntryMoveDbxrefs (SeqEntryPtr sep)
6376
6377 {
6378 GatherScope gs;
6379
6380 if (sep == NULL) return FALSE;
6381 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
6382 gs.seglevels = 1;
6383 gs.get_feats_location = FALSE;
6384 MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
6385 gs.ignore[OBJ_SEQFEAT] = FALSE;
6386 gs.ignore[OBJ_SEQANNOT] = FALSE;
6387 GatherSeqEntry (sep, NULL, MoveDbxrefs, &gs);
6388 return TRUE;
6389 }
6390