1 static char const rcsid[] = "$Id: toasn3.c,v 6.150 2016/11/02 23:36:09 kans Exp $";
2 
3 /*****************************************************************************
4 *
5 *   toasn3.c
6 *       converts a Seq-entry or elements of a Bioseq-set to new Biosource style
7 *
8 *****************************************************************************/
9 
10 #include <gather.h>
11 #include <toasn3.h>
12 #include <toporg.h>
13 #include <tfuns.h>
14 #include <terr.h>
15 #include <utilpub.h>
16 #include "ftusrstr.h"
17 #include <utilpars.h>   /*ValidAminoAcid PROTO*/
18 #include <seqport.h>    /*GetFrameFromLoc PROTO*/
19 #include <asn2ff6.h>    /*AddGBQual PROTO*/
20 #include <sqnutils.h>
21 #include <explore.h>
22 #include <edutil.h>
23 #include <subutil.h>
24 
25 static char *this_file = "toasn3";
26 #ifdef THIS_FILE
27 #undef THIS_FILE
28 #endif
29 #define THIS_FILE this_file
30 static char *this_module ="toasn3";
31 #ifdef THIS_MODULE
32 #undef THIS_MODULE
33 #endif
34 #define THIS_MODULE this_module
35 
36 
37 #define num_bond 5
38 static CharPtr feat_bond[num_bond] = {NULL, "disulfide bond", "thiolester bond", "xlink bond", "thioether bond"};
39 
40 #define num_site 27
41 static CharPtr feat_site[num_site] = {NULL,
42         "active",
43         "binding",
44         "cleavage",
45         "inhibit",
46         "modifi",
47         "glycosylation",
48         "myristoylation",
49         "mutagenized",
50         "metal-binding",
51         "phosphorylation",
52         "acetylation",
53         "amidation",
54         "methylation",
55         "hydroxylation",
56         "sulfatation",
57         "oxidative-deamination",
58         "pyrrolidone-carboxylic-acid",
59         "gamma-carboxyglutamic-acid",
60         "blocked",
61         "lipid-binding",
62         "np-binding",
63         "dna-binding",
64         "signal-peptide",
65         "transit-peptide",
66         "transmembrane-region",
67         "nitrosylation"
68 };
69 
70 #define num_genome 15
71 static CharPtr genome[num_genome] = {"unknown", "genomic", "chloroplast", "chromoplast", "kinetoplast", "mitochondrion", "plastid", "macronuclear",
72 "extrachrom", "plasmid", "transposon", "insertion_seq", "cyanelle", "proviral", "virion"};
73 
74 #define num_subtype 22
75 static CharPtr subtype[num_subtype] = {
76 "chromosome", "map", "clone", "sub_clone", "haplotype", "genotype", "sex",
77 "cell_line", "cell_type", "tissue_type", "clone_lib", "dev_stage",
78 "frequency", "germline", "rearranged", "lab_host", "pop_variant",
79 "tissue_lib", "plasmid", "transposon", "insertion_seq", "plastid"};
80 
81 static ORGMOD orgmod_subtype[10] = {
82     { "strain", 2 }, {"sub_strain", 3}, {"variety", 6}, {"serotype",7}, {"cultivar",10}, {"isolate", 17}, {"specific_host", 21}, {"sub_species", 22}, {"note", 255}, { NULL, 0 }
83 };
84 
85 #define num_bad_quals 3
86 static CharPtr bad_quals[num_bad_quals] = {
87 "label", "usedin", "citation"};
88 
89 #define num_organelle 5
90 static ORGMOD organelle[num_organelle] = {
91 {"Mitochondrion ", 5}, {"Chloroplast ", 2},
92 {"Kinetoplast ", 4},  {"Cyanelle ", 12}, {"Plastid ", 6}
93 };
94 
95 #define TOTAL_TECH 6
96 static ORGMOD check_tech[TOTAL_TECH] = {
97 {"EST", MI_TECH_est}, {"STS", MI_TECH_sts}, {"GSS", MI_TECH_survey},
98 {"HTG", MI_TECH_htgs_1 },  {"HTG", MI_TECH_htgs_2}, {"HTG", MI_TECH_htgs_3}
99 };
100 static void CheckGeneticCode(SeqEntryPtr sep);
101 
FindStr(CharPtr PNTR array,Int2 array_num,CharPtr str)102 static Int2 FindStr(CharPtr PNTR array, Int2 array_num, CharPtr str) {
103     Char ch;
104     Int2 i;
105     size_t len;
106     Char tmp [64];
107     CharPtr val;
108 
109     for (i = 0; i < array_num; i++) {
110         if (array[i] == NULL) {
111             continue;
112         }
113         StringNCpy_0 (tmp, array[i], sizeof (tmp));
114         len = StringLen (tmp);
115         if (StringNCmp(str, tmp, len) == 0) {
116             return i;
117         }
118         val = tmp;
119         ch = *val;
120         while (ch != '\0') {
121             if (ch == '-') {
122                 *val = ' ';
123             }
124             val++;
125             ch = *val;
126         }
127         if (StringNCmp(str, tmp, len) == 0) {
128             return i;
129         }
130     }
131     return -1;
132 }
133 
134 /*****************************************************************************
135 *
136 *   ToAsn4(sep, isEmblOrDdbj)
137 *       Converts pubs to asn.1 spec 4.0 within SeqEntryPtr - SeqEntryPubsAsn4
138 *        move tax lineage from GBblock to BioSource
139 *****************************************************************************/
ToAsn4(SeqEntryPtr sep,Boolean isEmblOrDdbj)140 Int4 ToAsn4 (SeqEntryPtr sep, Boolean isEmblOrDdbj)
141 {
142     CharPtr lineage = NULL;
143 
144     SeqEntryPubsAsn4(sep, isEmblOrDdbj);
145     SeqEntryExplore(sep, (Pointer) (&lineage), FindOldLineage);
146     if (lineage) {
147         SeqEntryExplore(sep, (Pointer) (&lineage), NewLineage);
148         MemFree(lineage);
149     }
150     return 0;
151 }
152 
153 //LCOV_EXCL_START
154 // not used
CopySfpId(SeqFeatPtr new,SeqFeatPtr sfp)155 static void CopySfpId(SeqFeatPtr new, SeqFeatPtr sfp)
156 {
157     ObjectIdPtr oip, noip;
158     DbtagPtr dtp, ndtp;
159 
160     if (sfp && sfp->id.choice) {
161         new->id.choice = sfp->id.choice;
162         switch(sfp->id.choice)
163         {
164             case 1:
165                 new->id.value.intvalue = sfp->id.value.intvalue;
166                 break;
167             case 2:
168                 new->id.value.ptrvalue =
169                     AsnIoMemCopy((Pointer) sfp->id.value.ptrvalue,
170                         (AsnReadFunc) GiimAsnRead,
171                                          (AsnWriteFunc) GiimAsnWrite);
172 
173                 break;
174             case 3:
175                 oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
176                 noip =  AsnIoMemCopy(oip, (AsnReadFunc) ObjectIdAsnRead,
177                                          (AsnWriteFunc) ObjectIdAsnWrite);
178                 new->id.value.ptrvalue = noip;
179                 break;
180             case 4:
181                 dtp = (DbtagPtr) sfp->id.value.ptrvalue;
182                 ndtp =  AsnIoMemCopy(dtp, (AsnReadFunc) DbtagAsnRead,
183                                          (AsnWriteFunc) DbtagAsnWrite);
184                 new->id.value.ptrvalue = ndtp;
185                 break;
186             default:
187                 break;
188         }
189     }
190     return;
191 }
192 //LCOV_EXCL_STOP
193 
toasn3_free(ToAsn3Ptr tap)194 static void toasn3_free(ToAsn3Ptr tap)
195 {
196     OrgFixPtr ofp, next_ofp;
197     MolFixPtr mfp, next_mfp;
198 
199     if (tap == NULL) {
200         return;
201     }
202     ofp = tap->ofp;
203     while(ofp) {
204         next_ofp = ofp->next;
205         if (ofp->sfp) {
206             SeqLocFree(ofp->sfp->location);
207             if (ofp->sfp->cit) {
208                 PubSetFree(ofp->sfp->cit);
209             }
210             MemFree(ofp->sfp);
211         }
212         MemFree(ofp);
213         ofp = next_ofp;
214     }
215     mfp = tap->mfp;
216     while(mfp) {
217         next_mfp = mfp->next;
218         MemFree(mfp);
219         mfp = next_mfp;
220     }
221 }
222 
vnp_psp_free(ValNodePtr vnp)223 static void vnp_psp_free(ValNodePtr vnp)
224 {
225     ValNodePtr next_vnp;
226     PubStructPtr psp;
227 
228     while (vnp) {
229         next_vnp = vnp->next;
230         psp = (PubStructPtr) vnp->data.ptrvalue;
231         if (psp) {
232             FreePubStruct(psp);
233         }
234         MemFree(vnp);
235         vnp = next_vnp;
236     }
237 }
238 
vnp_list_free(ValNodePtr vnp)239 static void vnp_list_free(ValNodePtr vnp)
240 {
241     ValNodePtr next_vnp;
242     PubdescPtr psp;
243 
244     while (vnp) {
245         next_vnp = vnp->next;
246         psp = (PubdescPtr) vnp->data.ptrvalue;
247         if (psp) {
248             PubdescFree(psp);
249         }
250         MemFree(vnp);
251         vnp = next_vnp;
252     }
253 }
254 
255 
NOT_segment(SeqEntryPtr sep)256 static Boolean NOT_segment(SeqEntryPtr sep)
257 {
258     BioseqSetPtr bssp;
259     SeqEntryPtr seqsep;
260 
261     if (IS_Bioseq(sep))
262         return TRUE;
263     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
264     if (bssp->_class == 1) {    /*  1 - nucprot set  */
265         seqsep = bssp->seq_set;
266         if (seqsep == NULL) {
267             return TRUE;
268         }
269         if (seqsep->choice == 1) {
270             return TRUE;
271         }
272     }
273     return FALSE;
274 }
275 
OrgFixNew(void)276 static OrgFixPtr OrgFixNew(void)
277 {
278     OrgFixPtr ofp;
279 
280     ofp = MemNew(sizeof(OrgFix));
281     ofp->contains = NULL;
282     ofp->desc = FALSE;
283     ofp->sfp = NULL;
284     ofp->imp = NULL;
285     ofp->orp = NULL;
286 
287     return ofp;
288 }
289 
290 //LCOV_EXCL_START
291 // used for rescuing biosource and molinfo from modif, mol-type, and method descriptors,
292 // which are obsolete
293 /*****************************************************************************/
AddOrgToFix(OrgRefPtr orp,ToAsn3Ptr tap,ValNodePtr mod,SeqEntryPtr sep,ValNodePtr vnp,SeqFeatPtr sfp,Int4 index)294 static void AddOrgToFix (OrgRefPtr orp, ToAsn3Ptr tap, ValNodePtr mod,
295                     SeqEntryPtr sep, ValNodePtr vnp, SeqFeatPtr sfp, Int4 index)
296 {
297     OrgFixPtr ofp;
298     SeqFeatPtr feat;
299 
300     ofp = OrgFixNew();
301     ofp->contains = sep;
302     if (vnp == NULL) {
303         ofp->desc = FALSE;
304     } else {
305         ofp->desc = TRUE;
306     }
307     if (sfp) {
308         feat = SeqFeatNew();
309         if (sfp->id.choice) {
310             CopySfpId(feat, sfp);
311         }
312         feat->location =
313             AsnIoMemCopy(sfp->location, (AsnReadFunc) SeqLocAsnRead,
314                 (AsnWriteFunc) SeqLocAsnWrite);
315         if (sfp->cit) {
316             feat->cit = AsnIoMemCopy(sfp->cit, (AsnReadFunc) PubSetAsnRead,
317                 (AsnWriteFunc) PubSetAsnWrite);
318         }
319         ofp->sfp = feat;
320     }
321     ofp->orp = orp;
322     ofp->index = index;
323     if (mod != NULL) {
324         ofp->modif = mod->data.ptrvalue;
325     }
326     tap->ofp = tie_next_biosource(tap->ofp, ofp);
327 
328     return;
329 }
330 
331 // used for rescuing molinfo from modif, mol-type, and method descriptors,
332 // which are obsolete
333 /*****************************************************************************/
AddMolToFix(ToAsn3Ptr tap,SeqEntryPtr sep,Uint1 mol,ValNodePtr mod,Uint1 meth,Int4 index)334 static void AddMolToFix (ToAsn3Ptr tap, SeqEntryPtr sep,
335                          Uint1 mol, ValNodePtr mod, Uint1 meth, Int4 index)
336 {
337     MolFixPtr mfp;
338 
339     mfp = MemNew(sizeof(MolFix));
340     mfp->contains = sep;
341     if (mol != 0)
342         mfp->mol = mol;
343     if (mod != NULL) {
344         mfp->modif = mod;
345     }
346     if (meth != 0)
347         mfp->method = meth;
348     mfp->index = index;
349     tap->mfp = tie_next_mol(tap->mfp, mfp);
350 
351     return;
352 }
353 //LCOV_EXCL_STOP
354 /*****************************************************************************/
355 
AddImpToFix(SeqFeatPtr imp,ToAsn3Ptr tap,SeqEntryPtr sep,ValNodePtr vnp,SeqFeatPtr sfp,Int4 index)356 static void AddImpToFix (SeqFeatPtr imp, ToAsn3Ptr tap, SeqEntryPtr sep,
357                                      ValNodePtr vnp, SeqFeatPtr sfp, Int4 index)
358 {
359     OrgFixPtr ofp;
360     SeqFeatPtr    feat;
361 
362     ofp = OrgFixNew();
363     ofp->contains = sep;
364     if (vnp == NULL) {
365         ofp->desc = FALSE;
366     } else {
367         ofp->desc = TRUE;
368     }
369     if (sfp) {
370         feat = SeqFeatNew();
371         feat->location =
372             AsnIoMemCopy(sfp->location, (AsnReadFunc) SeqLocAsnRead,
373                 (AsnWriteFunc) SeqLocAsnWrite);
374         if (sfp->cit) {
375             feat->cit = AsnIoMemCopy(sfp->cit, (AsnReadFunc) PubSetAsnRead,
376                 (AsnWriteFunc) PubSetAsnWrite);
377         }
378         ofp->sfp = feat;
379     }
380     ofp->imp = imp;
381     ofp->index = index;
382     tap->ofp = tie_next_biosource(tap->ofp, ofp);
383 
384     return;
385 }
386 
387 /*****************************************************************************/
FixToAsn(SeqEntryPtr sep,ToAsn3Ptr tap)388 static void FixToAsn(SeqEntryPtr sep, ToAsn3Ptr tap)
389 {
390     OrgFixPtr    ofp;
391     BioSourcePtr bsp;
392     MolFixPtr     mfp;
393     MolInfoPtr    mfi;
394     Uint1         mod;
395     ValNodePtr    vnp;
396     OrgRefPtr    orp;
397     GBQualPtr    q;
398     CharPtr        tmp;
399     Int2        i, len = 0;
400 
401 //LCOV_EXCL_START
402 // used for rescuing molinfo from modif, mol-type, and method descriptors,
403 // which are obsolete
404     mfp = tap->mfp;
405     while (mfp != NULL) {
406 
407         mfi = NULL;
408         if (mfp->mol != 0) {
409             mfi = new_info(mfi);
410             mfi->biomol = mfp->mol;
411         }
412         if (mfp->method != 0) {
413             mfi = new_info(mfi);
414             mfi->tech = mfp->method + 7;
415         }
416         for(vnp = mfp->modif; vnp != NULL; vnp=vnp->next) {
417             mod = vnp->data.intvalue;
418             mfi = ModToMolInfo(mfi, mod);
419         }
420         mfp->molinfo = mfi;
421         mfp = mfp->next;
422 
423     }
424 //LCOV_EXCL_STOP
425 
426 /* look for Org-refs (desc or feature) and create Biosource */
427     for (ofp = tap->ofp; ofp != NULL; ofp = ofp->next) {
428         if (ofp->orp != NULL && (ofp->desc != FALSE || ofp->sfp != NULL)) {
429             bsp = BioSourceNew();
430             bsp->org = AsnIoMemCopy(ofp->orp, (AsnReadFunc) OrgRefAsnRead,
431                (AsnWriteFunc) OrgRefAsnWrite);
432             tap->had_biosource = TRUE;
433 //LCOV_EXCL_START
434 // used for rescuing biosource from modif, mol-type, and method descriptors,
435 // which are obsolete
436             for (mfp = tap->mfp; mfp; mfp = mfp->next) {
437                 if (ofp->index < mfp->index) {
438                     continue;
439                 }
440                  for(vnp = mfp->modif; vnp != NULL; vnp=vnp->next) {
441                     mod = vnp->data.intvalue;
442                     if (bsp == NULL) {
443                         bsp = BioSourceNew();
444                     }
445                     ModToBiosource(bsp, mod);
446                 }
447             }
448 //LCOV_EXCL_STOP
449             ofp->bsp = bsp;
450        }
451     }
452 /* look for Impfeat: create new bsp for every "source" (desc or feature) */
453     for (ofp = tap->ofp; ofp != NULL; ofp = ofp->next) {
454         if (ofp->imp == NULL) {
455             continue;
456         }
457         if (ofp->desc == TRUE) {
458             bsp = BioSourceNew();
459             orp = OrgRefNew();
460             for(q = ofp->imp->qual; q != NULL; q = q->next) {
461                 if (StringCmp(q->qual, "organism") == 0) {
462                     tmp = MemNew(StringLen(q->val)+1);
463                     StringCpy(tmp, q->val);
464                     for (i = 0; i < num_organelle; i++) {
465                         if (StringNCmp(tmp, organelle[i].name,
466                             StringLen(organelle[i].name)) == 0) {
467                             len = StringLen(organelle[i].name);
468                             bsp->genome = organelle[i].num;
469                             break;
470                         }
471                     }
472                     orp->taxname = StringSave(tmp + len);
473                     MemFree(tmp);
474                 }
475             }
476             bsp->org = orp;
477             if (ofp->imp && ofp->imp->qual)
478                 CheckQualsWithComm(bsp, ofp->imp);
479             ofp->bsp = bsp;
480         } else if(ofp->sfp != NULL) {
481             bsp = BioSourceNew();
482             orp = OrgRefNew();
483             for(q = ofp->imp->qual; q != NULL; q = q->next) {
484                 if (StringCmp(q->qual, "organism") == 0) {
485                     tmp = MemNew(StringLen(q->val)+1);
486                     StringCpy(tmp, q->val);
487                     for (i = 0; i < num_organelle; i++) {
488                         if (StringNCmp(tmp, organelle[i].name,
489                             StringLen(organelle[i].name)) == 0) {
490                             len = StringLen(organelle[i].name);
491                             bsp->genome = organelle[i].num;
492                             break;
493                         }
494                     }
495                     orp->taxname = StringSave(tmp + len);
496                     MemFree(tmp);
497                 }
498             }
499             bsp->org = orp;
500             if (ofp->imp && ofp->imp)
501                 CheckQualsWithComm(bsp, ofp->imp);
502             ofp->bsp = bsp;
503         }
504     }
505 }
506 
507 /*****************************************************************************
508 *
509 *  Build MolInfo from GIBBmod and GIBBmol GIBBmethod
510 *
511 *****************************************************************************/
FixMol(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)512 static void FixMol (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
513 {
514     ValNodePtr         vnp, tmp;
515     BioseqPtr        bsp;
516     BioseqSetPtr     bssp;
517     MolFixPtr        mfp;
518 
519     mfp = (MolFixPtr)data;
520 
521     if (IS_Bioseq(sep))
522     {
523         bsp = (BioseqPtr)(sep->data.ptrvalue);
524         if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
525                  && (bsp->repr != Seq_repr_delta))
526             return;
527         vnp = bsp->descr;
528     }
529     else
530     {
531         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
532         vnp = bssp->descr;
533     }
534     while (mfp != NULL) {
535         if (mfp->index == index && mfp->molinfo != NULL) {
536             tmp = SeqDescrNew(vnp);
537             tmp->choice = Seq_descr_molinfo;
538             tmp->data.ptrvalue = mfp->molinfo;
539         }
540         mfp = mfp->next;
541     }
542 
543     return;
544 }
545 
FixProtMolInfo(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)546 static void FixProtMolInfo (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
547 
548 {
549     BioseqPtr   bsp;
550     MolInfoPtr  mip = NULL;
551     ValNodePtr  vnp;
552 
553     if (! IS_Bioseq(sep)) return;
554     bsp = (BioseqPtr) sep->data.ptrvalue;
555     if (bsp == NULL) return;
556     if (! ISA_aa (bsp->mol)) return;
557     for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
558         if (vnp->choice == Seq_descr_molinfo) {
559             mip = (MolInfoPtr) vnp->data.ptrvalue;
560             if (mip != NULL) {
561                 if (mip->biomol == 0) {
562                     mip->biomol = 8;
563                 }
564             }
565         }
566     }
567     if (mip == NULL) {
568         mip = MolInfoNew ();
569         if (mip == NULL) return;
570         mip->biomol = 8;
571         vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
572         if (vnp == NULL) return;
573         vnp->data.ptrvalue = (Pointer) mip;
574     }
575 }
576 
FuseMolInfos(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)577 static void FuseMolInfos (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
578 
579 {
580     BioseqPtr        bsp;
581     MolInfoPtr       first;
582     MolInfoPtr       mip;
583     ValNodePtr       next;
584     ValNodePtr PNTR  prev;
585     Boolean          remove;
586     ValNodePtr       vnp;
587 
588     if (! IS_Bioseq(sep)) return;
589     bsp = (BioseqPtr) sep->data.ptrvalue;
590     if (bsp == NULL) return;
591     vnp = bsp->descr;
592     prev = &(bsp->descr);
593     first = NULL;
594     while (vnp != NULL) {
595         remove = FALSE;
596         next = vnp->next;
597         if (vnp->choice == Seq_descr_molinfo) {
598             mip = (MolInfoPtr) vnp->data.ptrvalue;
599             if (first == NULL) {
600                 first = mip;
601             } else if (mip != NULL) {
602                 if (first->biomol == 0) {
603                     first->biomol = mip->biomol;
604                 }
605                 if (first->tech == 0) {
606                     first->tech = mip->tech;
607                 }
608                 if (first->completeness == 0) {
609                     first->completeness = mip->completeness;
610                 }
611                 if (first->biomol == mip->biomol &&
612                     first->tech == mip->tech &&
613                     first->completeness == mip->completeness) {
614                     if (first->techexp == NULL) {
615                         first->techexp = mip->techexp;
616                         mip->techexp = NULL;
617                     }
618                     remove = TRUE;
619                 }
620             }
621         }
622         if (remove) {
623             *prev = vnp->next;
624             vnp->next = NULL;
625             MolInfoFree (mip);
626             ValNodeFree (vnp);
627         } else {
628             prev = &(vnp->next);
629         }
630         vnp = next;
631     }
632 }
633 
634 //LCOV_EXCL_START
635 // used for rescuing biosource from org desc and org feat, which
636 //are converted to biosource desc and biosource feat earlier
637 /*****************************************************************************
638 *
639 *  Build Biosource from descr-org and features
640 *
641 *****************************************************************************/
FixOrg(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)642 static void FixOrg (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
643 {
644     ValNodePtr         vnp, tmp;
645     BioseqPtr        bsp;
646     BioseqSetPtr     bssp;
647     OrgFixPtr        ofp;
648     SeqAnnotPtr        sap;
649     SeqFeatPtr        new;
650 
651     ofp = (OrgFixPtr)data;
652 
653     if (IS_Bioseq(sep))
654     {
655         bsp = (BioseqPtr)(sep->data.ptrvalue);
656         if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
657                     && (bsp->repr != Seq_repr_delta))
658             return;
659         vnp = bsp->descr;
660         sap = bsp->annot;
661     }
662     else
663     {
664         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
665         vnp = bssp->descr;
666         sap = bssp->annot;
667     }
668     for (; ofp != NULL; ofp = ofp->next) {
669         if (ofp->desc == TRUE) {
670             if (ofp->index == index && ofp->bsp != NULL) {
671                 tmp = SeqDescrNew(vnp);
672                 tmp->choice = Seq_descr_source;
673                 tmp->data.ptrvalue = ofp->bsp;
674             }
675         }
676         if (ofp->index == index && ofp->sfp != NULL && ofp->bsp != NULL) {
677             new = SeqFeatNew();
678             if (ofp->sfp->id.choice) {
679                 CopySfpId(new, ofp->sfp);
680             }
681             new->data.choice = SEQFEAT_BIOSRC;
682             new->data.value.ptrvalue = ofp->bsp;
683             new->location =  AsnIoMemCopy(ofp->sfp->location,
684                     (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
685             if (ofp->sfp->cit) {
686                 new->cit =  AsnIoMemCopy(ofp->sfp->cit,
687                     (AsnReadFunc) PubSetAsnRead, (AsnWriteFunc) PubSetAsnWrite);
688             }
689             sap->data = tie_feat(sap->data, new);
690         }
691     }
692     return;
693 }
694 //LCOV_EXCL_STOP
695 
696 /*****************************************************************************
697 *
698 *   HasSiteRef(sfp, userdata)
699 *       Checks for Site-ref ImpFeat before unnecessarily rearranging pub descriptors
700 *****************************************************************************/
HasSiteRef(SeqFeatPtr sfp,Pointer userdata)701 static void HasSiteRef (SeqFeatPtr sfp, Pointer userdata)
702 
703 {
704   BoolPtr     foundP;
705   ImpFeatPtr  ifp;
706 
707   foundP = (BoolPtr) userdata;
708   if (sfp->cit == NULL) return;
709   if (sfp->data.choice != SEQFEAT_IMP) return;
710   ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
711   if (ifp == NULL) return;
712   if (StringCmp(ifp->key, "Site-ref") == 0) {
713     *foundP = TRUE;
714   }
715 }
716 
717 /*****************************************************************************
718 *
719 *   SeqEntryPubsAsn4(sep)
720 *       Converts pubs to asn.1 spec 4.0 within SeqEntryPtr
721 *****************************************************************************/
SeqEntryPubsAsn4Ex(SeqEntryPtr sep,Boolean isEmblOrDdbj,Boolean uniqueOnBioseq)722 Int4 SeqEntryPubsAsn4Ex (SeqEntryPtr sep, Boolean isEmblOrDdbj, Boolean uniqueOnBioseq)
723 {
724     BioseqPtr bsp = NULL;
725     BioseqSetPtr bioset = NULL;
726     ValNodePtr vnp = NULL, publist, tmp, v;
727     PubdescPtr pubdesc;
728     Boolean foundSitRef = FALSE;
729 
730     if (IS_Bioseq(sep)) {
731         bsp = (BioseqPtr) (sep->data.ptrvalue);
732     } else if (IS_Bioseq_set(sep)) {
733         bioset = (BioseqSetPtr) (sep->data.ptrvalue); /* top level set */
734     }
735     SeqEntryExplore(sep, &vnp, FindCit);
736     SeqEntryExplore(sep, &vnp, ChangeCitQual);
737     vnp_psp_free(vnp);
738 
739     VisitFeaturesInSep (sep, (Pointer) &foundSitRef, HasSiteRef);
740     if (foundSitRef) {
741         SeqEntryExplore(sep, NULL, NewPubs);
742     }
743     SeqEntryExplore(sep, NULL, DeleteSites);
744 
745 /* move pubs in set to the top level */
746     if (bioset && bioset->_class != 9 && (! isEmblOrDdbj)) {
747         publist = NULL;
748         SeqEntryExplore(sep, (Pointer) NULL, MoveSegmPubs);
749         SeqEntryExplore(sep, (Pointer) NULL, MoveNPPubs);
750 /*   unique pubs on the set level*/
751         tmp = ValNodeExtractList(&bioset->descr, Seq_descr_pub);
752         for (v = tmp; v; v = v->next) {
753             pubdesc = v->data.ptrvalue;
754             publist = AddToList(publist, NULL, pubdesc);
755         }
756         bioset->descr = ValNodeLink(&(bioset->descr), publist);
757 /* check pubs in Bioseqs, delete if they are already on the top */
758         for (v = publist; v; v = v->next) {
759             pubdesc = v->data.ptrvalue;
760             SeqEntryExplore(sep, pubdesc, DeletePubs);
761         }
762         vnp_list_free(tmp);
763     }
764     if (uniqueOnBioseq && bsp != NULL && (! isEmblOrDdbj)) {
765 /*   unique pubs on the bioseq level*/
766         publist = NULL;
767         tmp = ValNodeExtractList(&bsp->descr, Seq_descr_pub);
768         for (v = tmp; v; v = v->next) {
769             pubdesc = v->data.ptrvalue;
770             publist = AddToList(publist, NULL, pubdesc);
771         }
772         bsp->descr = ValNodeLink(&(bsp->descr), publist);
773         vnp_list_free(tmp);
774     }
775     SeqEntryExplore(sep, NULL, ChangeCitSub);
776     return 0;
777 }
778 
SeqEntryPubsAsn4(SeqEntryPtr sep,Boolean isEmblOrDdbj)779 Int4 SeqEntryPubsAsn4 (SeqEntryPtr sep, Boolean isEmblOrDdbj)
780 {
781     return SeqEntryPubsAsn4Ex(sep, isEmblOrDdbj, TRUE);
782 }
783 
784 /*****************************************************************************
785 *
786 *  Remove old (ver 2.0)  asn.1 (with check for the new ver 3.0)
787 *
788 *****************************************************************************/
StripOld(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)789 void StripOld (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
790 {
791     ValNodePtr    vnp = NULL, tmp = NULL, tmpnext;
792     SeqFeatPtr    sfp, sfp_next;
793     OrgRefPtr    orp;
794     SeqAnnotPtr    sap, ap, apnext;
795     BioseqPtr    bsp = NULL;
796     BioseqSetPtr    bssp = NULL;
797 
798     if (IS_Bioseq(sep)) {
799         bsp = (BioseqPtr)(sep->data.ptrvalue);
800         if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
801                     && (bsp->repr != Seq_repr_delta))
802             return;
803 
804         vnp = bsp->descr;
805         sap = bsp->annot;
806     } else {
807         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
808         vnp = bssp->descr;
809         sap = bssp->annot;
810     }
811     if (vnp) {
812         for (tmp=ValNodeExtractList(&vnp, Seq_descr_modif);tmp;tmp=tmpnext){
813             tmpnext = tmp->next;
814             ValNodeFree(tmp->data.ptrvalue);
815             MemFree(tmp);
816         }
817         for (tmp=ValNodeExtractList(&vnp, Seq_descr_mol_type);tmp;tmp=tmpnext) {
818             tmpnext = tmp->next;
819             MemFree(tmp);
820         }
821         for (tmp=ValNodeExtractList(&vnp, Seq_descr_method); tmp; tmp=tmpnext) {
822             tmpnext = tmp->next;
823             MemFree(tmp);
824         }
825         for (tmp = ValNodeExtractList(&vnp, Seq_descr_org); tmp; tmp = tmpnext){
826             tmpnext = tmp->next;
827             orp = (OrgRefPtr) tmp->data.ptrvalue;
828             OrgRefFree(orp);
829             MemFree(tmp);
830         }
831         if (bsp != NULL) {
832             bsp->descr = vnp;
833         } else if (bssp != NULL) {
834             bssp->descr = vnp;
835         }
836     }
837     for (ap = sap; ap; ap = apnext) {
838         apnext = ap->next;
839         if (ap->type != 1)
840             continue;
841         /* tmp_sfp = (SeqFeatPtr) (ap->data); */
842         for(sfp = ExtractSourceFeatList((SeqFeatPtr PNTR) &(ap->data));sfp;sfp=sfp_next){
843             sfp_next=sfp->next;
844             SeqFeatFree(sfp);
845         }
846         for(sfp = SeqFeatExtractList((SeqFeatPtr PNTR) &(ap->data), SEQFEAT_ORG);sfp;sfp=sfp_next){
847             sfp_next=sfp->next;
848             SeqFeatFree(sfp);
849         }
850         /* ap->data = tmp_sfp; */
851         /* now keep empty annot if annot_descr present */
852         if (ap->data == NULL && ap->desc == NULL) {
853             sap = remove_annot(sap, ap);
854         }
855     }
856     if (bsp != NULL) {
857         bsp->annot = sap;
858     } else if (bssp != NULL) {
859         bssp->annot = sap;
860     }
861 }
862 
863 //LCOV_EXCL_START
864 /*****************************************************************************
865 * EMBL may have multiple OS lines that are parsed to multiple descr on
866 * the top level. In NCBI model only one Biosource descr is allowed, others
867 * should be moved to the feature table
868 *****************************************************************************/
GetMultBiosource(SeqEntryPtr sep)869 ValNodePtr GetMultBiosource(SeqEntryPtr sep)
870 {
871     ValNodePtr bvnp, vnp, retval;
872     BioseqPtr bsp = NULL;
873     BioseqSetPtr bssp = NULL;
874 
875     if (sep == NULL)
876         return NULL;
877     if (IS_Bioseq(sep)) {
878         bsp = (BioseqPtr)(sep->data.ptrvalue);
879         vnp = bsp->descr;
880     } else {
881         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
882         vnp = bssp->descr;
883     }
884     bvnp =     ValNodeExtractList(&vnp, Seq_descr_source);
885     if (bvnp == NULL) {
886         return NULL;
887     }
888     if (bvnp->next != NULL) {
889         retval = bvnp->next;
890         bvnp->next = NULL;
891     } else {
892         retval = NULL;
893     }
894     vnp = tie_next(vnp, bvnp);
895     if (bsp != NULL) {
896         bsp->descr = vnp;
897     } else if (bssp != NULL) {
898         bssp->descr = vnp;
899     }
900     return retval;
901 }
902 //LCOV_EXCL_STOP
903 
904 /*****************************************************************************
905 * RemoveEmptyTitleAndPubGenAsOnlyPub removes pub { pub { gen { } } empty pubs
906 *****************************************************************************/
907 
RemoveEmptyTitleAndPubGenAsOnlyPub(SeqEntryPtr sep)908 static void RemoveEmptyTitleAndPubGenAsOnlyPub (SeqEntryPtr sep)
909 
910 {
911   BioseqPtr     bsp;
912   BioseqSetPtr  bssp;
913   SeqAnnotPtr   nextsap;
914   ValNodePtr    nextsdp;
915   SeqFeatPtr    nextsfp;
916   Pointer PNTR  prevsap;
917   Pointer PNTR  prevsdp;
918   Pointer PNTR  prevsfp;
919   SeqAnnotPtr   sap = NULL;
920   ValNodePtr    sdp = NULL;
921   SeqFeatPtr    sfp;
922   SeqEntryPtr   tmp;
923 
924   if (sep == NULL) return;
925   if (IS_Bioseq (sep)) {
926     bsp = (BioseqPtr) sep->data.ptrvalue;
927     if (bsp == NULL) return;
928     sap = bsp->annot;
929     prevsap = (Pointer PNTR) &(bsp->annot);
930     sdp = bsp->descr;
931     prevsdp = (Pointer PNTR) &(bsp->descr);
932   } else if (IS_Bioseq_set (sep)) {
933     bssp = (BioseqSetPtr) sep->data.ptrvalue;
934     if (bssp == NULL) return;
935     for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
936       RemoveEmptyTitleAndPubGenAsOnlyPub (tmp);
937     }
938     sap = bssp->annot;
939     prevsap = (Pointer PNTR) &(bssp->annot);
940     sdp = bssp->descr;
941     prevsdp = (Pointer PNTR) &(bssp->descr);
942   } else return;
943   while (sap != NULL) {
944     nextsap = sap->next;
945     if (sap->type == 1) {
946       sfp = (SeqFeatPtr) sap->data;
947       prevsfp = (Pointer PNTR) &(sap->data);
948       while (sfp != NULL) {
949         nextsfp = sfp->next;
950         if (sfp->data.choice == SEQFEAT_PUB && PubIsEffectivelyEmpty ((PubdescPtr) sfp->data.value.ptrvalue)) {
951           *(prevsfp) = sfp->next;
952           sfp->next = NULL;
953           SeqFeatFree (sfp);
954         } else {
955           prevsfp = (Pointer PNTR) &(sfp->next);
956         }
957         sfp = nextsfp;
958       }
959     }
960     /* now keep empty annot if annot_descr present */
961     if (sap->data == NULL && sap->desc == NULL) {
962       *(prevsap) = sap->next;
963       sap->next = NULL;
964       SeqAnnotFree (sap);
965     } else {
966       prevsap = (Pointer PNTR) &(sap->next);
967     }
968     sap = nextsap;
969   }
970   while (sdp != NULL) {
971     nextsdp = sdp->next;
972     if (sdp->choice == Seq_descr_pub && PubIsEffectivelyEmpty ((PubdescPtr) sdp->data.ptrvalue)) {
973       *(prevsdp) = sdp->next;
974       sdp->next = NULL;
975       SeqDescFree (sdp);
976     } else if (sdp->choice == Seq_descr_title && StringHasNoText ((CharPtr) sdp->data.ptrvalue)) {
977       *(prevsdp) = sdp->next;
978       sdp->next = NULL;
979       SeqDescFree (sdp);
980     } else {
981       prevsdp = (Pointer PNTR) &(sdp->next);
982     }
983     sdp = nextsdp;
984   }
985 }
986 
987 //LCOV_EXCL_START
988 // never called, always called with Ex
989 /*****************************************************************************
990 *   SeqEntryToAsn3(sep)
991 *       Converts a SeqEntry with old OrgRefs to SeqEntry with Biosource
992 *        Does the Taxonomy lookup if taxserver = TRUE and taxfun != NULL
993 *        Strips old stuff if strip_old=TRUE
994 *        Moves /map from GeneRef, removes ProtRef xrefs and checks genetic
995 *        code in CDSs
996 *        RETURN:
997 *        INFO_ASNOLD - if the entry is in spec 3.0 (has BioSource) already
998 *        INFO_ASNNEW - if the entry is converted to new spec
999 *        ERR_REJECT -  if the entry has internal FATAL errors
1000 *        ERR_INPUT -   if input is NULL
1001 *
1002 *****************************************************************************/
SeqEntryToAsn3(SeqEntryPtr sep,Boolean strip_old,Boolean source_correct,Boolean taxserver,SeqEntryFunc taxfun)1003 Int4 SeqEntryToAsn3 (SeqEntryPtr sep, Boolean strip_old, Boolean source_correct, Boolean taxserver, SeqEntryFunc taxfun)
1004 {
1005     return SeqEntryToAsn3Ex(sep, strip_old, source_correct,
1006             taxserver, taxfun, NULL, FALSE, FALSE);
1007 }
1008 //LCOV_EXCL_STOP
1009 
is_equiv(SeqEntryPtr sep)1010 static Boolean is_equiv(SeqEntryPtr sep)
1011 {
1012     BioseqSetPtr bssp;
1013 
1014     if (IS_Bioseq(sep)) {
1015         return FALSE;
1016     }
1017     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1018     if (bssp->_class != 10) {    /*  equiv */
1019         return FALSE;
1020     }
1021     return TRUE;
1022 }
1023 
RestoreUpdateDatePos(SeqEntryPtr sep,Int2 update_date_pos)1024 static void RestoreUpdateDatePos (SeqEntryPtr sep, Int2 update_date_pos)
1025 
1026 {
1027   BioseqSetPtr  bssp;
1028   ValNodePtr    descr;
1029   ValNodePtr    vnp;
1030 
1031   if (update_date_pos < 0) return;
1032   if (! IS_Bioseq_set (sep)) return;
1033   bssp = (BioseqSetPtr) sep->data.ptrvalue;
1034   if (bssp == NULL) return;
1035 
1036   vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_update_date);
1037   if (vnp == NULL) return;
1038   if (update_date_pos == 0) {
1039     vnp->next = bssp->descr;
1040     bssp->descr = vnp;
1041   } else {
1042     descr = bssp->descr;
1043     while (update_date_pos > 1 && descr != NULL) {
1044       descr = descr->next;
1045       update_date_pos--;
1046     }
1047     if (descr != NULL) {
1048       vnp->next = descr->next;
1049       descr->next = vnp;
1050     } else {
1051       bssp->descr = ValNodeLink (&(bssp->descr), vnp);
1052     }
1053   }
1054 }
1055 
GetUpdateDatePos(SeqEntryPtr sep)1056 static Int2 GetUpdateDatePos (SeqEntryPtr sep)
1057 
1058 {
1059   BioseqSetPtr  bssp;
1060   Int2          i;
1061   ValNodePtr    vnp;
1062 
1063   if (! IS_Bioseq_set (sep)) return -1;
1064   bssp = (BioseqSetPtr) sep->data.ptrvalue;
1065   if (bssp == NULL) return -1;
1066 
1067   for (vnp = bssp->descr, i = 0; vnp != NULL; vnp = vnp->next, i++) {
1068     if (vnp->choice == Seq_descr_update_date) return i;
1069   }
1070   return -1;
1071 }
1072 
CleanMiscFeatFields(SeqFeatPtr sfp,Pointer userdata)1073 static void CleanMiscFeatFields (SeqFeatPtr sfp, Pointer userdata)
1074 
1075 {
1076   GeneRefPtr  grp;
1077   ProtRefPtr  prp;
1078   CharPtr     str;
1079   ValNodePtr  vnp;
1080 
1081   if (sfp == NULL) return;
1082 
1083   switch (sfp->data.choice) {
1084     case SEQFEAT_GENE:
1085       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
1086       if (grp == NULL) return;
1087       if (grp->locus != NULL && sfp->comment != NULL && StringCmp (sfp->comment, grp->locus) == 0) {
1088         sfp->comment = MemFree (sfp->comment);
1089       }
1090       if (grp->desc != NULL && sfp->comment != NULL && StringCmp (sfp->comment, grp->desc) == 0) {
1091         sfp->comment = MemFree (sfp->comment);
1092       }
1093       break;
1094     case SEQFEAT_PROT:
1095       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
1096       if (prp == NULL) return;
1097       if (prp->desc != NULL) {
1098         for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
1099           str = (CharPtr) vnp->data.ptrvalue;
1100           if (StringHasNoText (str)) continue;
1101           if (StringCmp (prp->desc, str) == 0) {
1102             prp->desc = MemFree (prp->desc);
1103           }
1104         }
1105       }
1106       break;
1107     default:
1108       break;
1109   }
1110 }
1111 
1112 typedef struct dblinknpsdata {
1113   SeqDescrPtr  dblinksdp;
1114   Boolean      morethanone;
1115 } DblinkNpsData, PNTR DblinkNpsPtr;
1116 
FindOneDblink(SeqDescrPtr sdp,Pointer userdata)1117 static void FindOneDblink (SeqDescrPtr sdp, Pointer userdata)
1118 
1119 {
1120   DblinkNpsPtr   dnp;
1121   ObjectIdPtr    oip;
1122   UserObjectPtr  uop;
1123 
1124   if (sdp == NULL || sdp->choice != Seq_descr_user) return;
1125   uop = (UserObjectPtr) sdp->data.ptrvalue;
1126   if (uop == NULL) return;
1127   oip = uop->type;
1128   if (oip == NULL) return;
1129   if (StringCmp (oip->str, "DBLink") != 0) return;
1130 
1131   dnp = (DblinkNpsPtr) userdata;
1132   if (dnp == NULL) return;
1133   if (dnp->dblinksdp == NULL) {
1134     dnp->dblinksdp = sdp;
1135   } else {
1136     dnp->morethanone = TRUE;
1137   }
1138 }
1139 
MoveDBLinkToNPS(BioseqSetPtr bssp,Pointer userdata)1140 static void MoveDBLinkToNPS (BioseqSetPtr bssp, Pointer userdata)
1141 
1142 {
1143   DblinkNpsData  dnd;
1144   ObjValNodePtr  ovp;
1145   SeqDescrPtr    sdp;
1146   UserObjectPtr  uop;
1147 
1148   if (bssp == NULL) return;
1149   if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
1150 
1151   MemSet ((Pointer) &dnd, 0, sizeof (DblinkNpsData));
1152   dnd.dblinksdp = NULL;
1153   dnd.morethanone = FALSE;
1154 
1155   VisitDescriptorsInSet (bssp, (Pointer) &dnd, FindOneDblink);
1156 
1157   if (dnd.morethanone) return;
1158 
1159   sdp = dnd.dblinksdp;
1160   if (sdp == NULL) return;
1161   if (sdp->extended == 0) return;
1162   ovp = (ObjValNodePtr) sdp;
1163   if (ovp->idx.parenttype != OBJ_BIOSEQ) return;
1164 
1165   uop = (UserObjectPtr) sdp->data.ptrvalue;
1166   if (uop == NULL) return;
1167   sdp->data.ptrvalue = NULL;
1168   ovp->idx.deleteme = TRUE;
1169 
1170   SeqDescrAddPointer (&(bssp->descr), Seq_descr_user, uop);
1171 }
1172 
MarkEmptyUserObjects(SeqDescrPtr sdp,Pointer userdata)1173 static void MarkEmptyUserObjects (SeqDescrPtr sdp, Pointer userdata)
1174 
1175 {
1176   BoolPtr        bp;
1177   ObjectIdPtr    oip;
1178   ObjValNodePtr  ovp;
1179   UserObjectPtr  uop;
1180 
1181   if (sdp->choice != Seq_descr_user) return;
1182   uop = (UserObjectPtr) sdp->data.ptrvalue;
1183 
1184   if (uop != NULL) {
1185     oip = uop->type;
1186     if (oip != NULL) {
1187       if (StringICmp (oip->str, "NcbiAutofix") == 0) return;
1188       if (StringICmp (oip->str, "Unverified") == 0) return;
1189       if (uop->data != NULL) return;
1190     }
1191   }
1192 
1193   if (sdp->extended == 0) return;
1194   ovp = (ObjValNodePtr) sdp;
1195   ovp->idx.deleteme = TRUE;
1196 
1197   bp = (BoolPtr) userdata;
1198   if (bp != NULL) {
1199     *bp = TRUE;
1200   }
1201 }
1202 
1203 /*****************************************************************************
1204 *   SeqEntryToAsn3Ex(sep)
1205 *       Converts a SeqEntry with old OrgRefs to SeqEntry with Biosource
1206 *        Does the Taxonomy lookup if taxserver = TRUE and taxfun != NULL
1207 *        Strips old stuff if strip_old=TRUE
1208 *        Moves /map from GeneRef, removes ProtRef xrefs and checks genetic
1209 *        code in CDSs
1210 *        RETURN:
1211 *        INFO_ASNOLD - if the entry is in spec 3.0 (has BioSource) already
1212 *        INFO_ASNNEW - if the entry is converted to new spec
1213 *        ERR_REJECT -  if the entry has internal FATAL errors
1214 *        ERR_INPUT -   if input is NULL
1215 *
1216 *        New argument added SeqEntryFunc taxmerge
1217 *        txfun - Taxon3ReplaceOrgInSeqEntry
1218 *        taxmerge - Tax3MergeSourceDescr
1219 *****************************************************************************/
SeqEntryToAsn3Ex(SeqEntryPtr sep,Boolean strip_old,Boolean source_correct,Boolean taxserver,SeqEntryFunc taxfun,SeqEntryFunc taxmerge,Boolean gpipeMode,Boolean isEmblOrDdbj)1220 Int4 SeqEntryToAsn3Ex (
1221 SeqEntryPtr sep,
1222 Boolean strip_old,
1223 Boolean source_correct,
1224 Boolean taxserver,
1225 SeqEntryFunc taxfun,
1226 SeqEntryFunc taxmerge,
1227 Boolean gpipeMode,
1228 Boolean isEmblOrDdbj
1229 )
1230 {
1231     ToAsn3 ta;
1232     OrgFixPtr ofp = NULL;
1233     MolFixPtr mfp = NULL;
1234     CharPtr porg = NULL;
1235     QualMap qm;
1236     BSMap bs;
1237     ValNodePtr mult = NULL;
1238     Int4 retval = INFO_ASNOLD, ret;
1239     Int2 update_date_pos;
1240     Boolean  do_delete = FALSE;
1241 
1242     ta.had_biosource = FALSE;
1243     ta.had_molinfo = FALSE;
1244     ta.ofp = NULL;
1245     ta.mfp = NULL;
1246     qm.name = NULL;
1247     qm.same = TRUE;
1248     bs.same = TRUE;
1249     bs.bsp = NULL;
1250 
1251     if (sep == NULL) {
1252         return ERR_INPUT;
1253     }
1254 
1255     RemoveAllNcbiCleanupUserObjects (sep);
1256 
1257     VisitDescriptorsInSep (sep, (Pointer) &do_delete, MarkEmptyUserObjects);
1258     if (do_delete) {
1259       DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
1260     }
1261 
1262     VisitSetsInSep (sep, NULL, MoveDBLinkToNPS);
1263 
1264     update_date_pos = GetUpdateDatePos (sep);
1265     RemoveEmptyTitleAndPubGenAsOnlyPub (sep);
1266     if (source_correct) {
1267         SeqEntryExplore(sep, (Pointer)(&porg), CorrectSourceFeat);
1268     }
1269     toporg(sep);
1270     SeqEntryExplore(sep, (Pointer)(&ta), FindOrg);
1271 
1272     VisitFeaturesInSep (sep, NULL, CleanMiscFeatFields);
1273 
1274     if (ta.had_biosource) {
1275 /* entry is in asn.1 spec 3.0 already do the checks only */
1276         retval |= INFO_ASNNEW;
1277         if(strip_old) {
1278             SeqEntryExplore(sep, NULL, StripOld);
1279         }
1280         ToAsn4(sep, isEmblOrDdbj);               /* move pubs and lineage */
1281         CombineBSFeat(sep);
1282         if (taxserver && taxfun != NULL) {
1283             SeqEntryExplore(sep, NULL, taxfun);
1284         }
1285         if (is_equiv(sep)) {
1286             /*do nothing*/
1287         }else if (NOT_segment(sep)) {
1288             if (taxserver && taxmerge != NULL) {
1289                 SeqEntryExplore(sep, mult, taxmerge);
1290             } else {
1291                 SeqEntryExplore(sep, mult, MergeBSinDescr);
1292             }
1293         } else {
1294             //LCOV_EXCL_START
1295             // Only for SegSets
1296             SeqEntryExplore(sep, (Pointer) (&bs), CheckBS);
1297             if (bs.same == TRUE) {
1298                 SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromParts);
1299             } else {
1300                 SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromTop);
1301             }
1302             //LCOV_EXCL_STOP
1303         }
1304          ret = FixNucProtSet(sep);
1305          retval |= ret;
1306         EntryChangeImpFeat(sep);
1307         EntryChangeGBSource(sep);
1308         SeqEntryExplore (sep, NULL, FixProtMolInfo);
1309         SeqEntryExplore (sep, NULL, FuseMolInfos);
1310         if (! gpipeMode) {
1311           SeqEntryExplore(sep, NULL, StripProtXref);
1312         }
1313         SeqEntryExplore(sep, (Pointer)(&qm), CheckMaps);
1314         /*
1315         if (qm.same == TRUE) {
1316             SeqEntryExplore(sep, (Pointer)(&qm), StripMaps);
1317         } else {
1318             SeqEntryExplore(sep, NULL, MapsToGenref);
1319         }
1320         */
1321         if (! isEmblOrDdbj) {
1322           SeqEntryExplore(sep, NULL, MapsToGenref);
1323         }
1324         CheckGeneticCode(sep);
1325         NormalizeSegSeqMolInfo (sep);
1326         toasn3_free(&ta);
1327         RestoreUpdateDatePos (sep, update_date_pos);
1328         if(qm.name != NULL)
1329         {
1330             MemFree(qm.name);
1331         }
1332 
1333         return retval;
1334     }
1335     if (ta.ofp == NULL) {
1336         ErrPostStr(SEV_WARNING, ERR_ORGANISM_NotFound, "No information found to create BioSource");
1337     }
1338     if (ta.mfp == NULL) {
1339         ErrPostStr(SEV_WARNING, ERR_ORGANISM_NotFound, "No information found to create MolInfo");
1340     }
1341 
1342     FixToAsn(sep, (Pointer)(&ta));
1343 
1344     if (ta.ofp != NULL) {
1345         ofp = ta.ofp;
1346         SeqEntryExplore(sep, (Pointer)ofp, FixOrg);
1347     }
1348     if (ta.mfp != NULL) {
1349         mfp = ta.mfp;
1350         SeqEntryExplore(sep, (Pointer)mfp, FixMol);
1351     }
1352 
1353 /* entry  is converted to asn.1 spec 3.0, now do the checks */
1354     retval = INFO_ASNNEW;
1355     if(ta.had_biosource && strip_old) {
1356         SeqEntryExplore(sep, NULL, StripOld);
1357     }
1358     ToAsn4(sep, isEmblOrDdbj);          /* move pubs and lineage */
1359     if (taxserver && taxfun != NULL) {
1360         SeqEntryExplore(sep, NULL, taxfun);
1361     }
1362     if (is_equiv(sep)) {
1363             /*do nothing*/
1364     } else if (NOT_segment(sep)) {
1365         if (taxserver && taxmerge != NULL) {
1366             SeqEntryExplore(sep, mult, taxmerge);
1367         } else {
1368             SeqEntryExplore(sep, mult, MergeBSinDescr);
1369         }
1370     } else {
1371         SeqEntryExplore(sep, (Pointer) (&bs), CheckBS);
1372         if (bs.same == TRUE) {
1373             SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromParts);
1374         } else {
1375             SeqEntryExplore(sep, (Pointer) (&bs), StripBSfromTop);
1376         }
1377     }
1378     ret = FixNucProtSet(sep);
1379     retval |= ret;
1380     EntryChangeImpFeat(sep);
1381     EntryChangeGBSource(sep);
1382     SeqEntryExplore (sep, NULL, FixProtMolInfo);
1383     SeqEntryExplore (sep, NULL, FuseMolInfos);
1384     if (! gpipeMode) {
1385       SeqEntryExplore(sep, NULL, StripProtXref);
1386     }
1387     SeqEntryExplore(sep, (Pointer)(&qm), CheckMaps);
1388     /*
1389     if (qm.same == TRUE) {
1390         SeqEntryExplore(sep, (Pointer)(&qm), StripMaps);
1391     } else {
1392         SeqEntryExplore(sep, NULL, MapsToGenref);
1393     }
1394     */
1395     if (! isEmblOrDdbj) {
1396       SeqEntryExplore(sep, NULL, MapsToGenref);
1397     }
1398     CheckGeneticCode(sep);
1399     NormalizeSegSeqMolInfo (sep);
1400     toasn3_free(&ta);
1401     RestoreUpdateDatePos (sep, update_date_pos);
1402     if(qm.name)
1403         qm.name=MemFree(qm.name);
1404     return retval;
1405 }
1406 
1407 //LCOV_EXCL_START
CheckLocWhole(BioseqPtr bsp,SeqLocPtr slp)1408 Boolean CheckLocWhole(BioseqPtr bsp, SeqLocPtr slp)
1409 {
1410     SeqIntPtr sip;
1411 
1412     if (slp == NULL)
1413         return FALSE;
1414 
1415     if (slp->choice == SEQLOC_WHOLE) {
1416         return TRUE;
1417     } else if (slp->choice == SEQLOC_INT) {
1418         sip = slp->data.ptrvalue;
1419         if (sip->from == 0 && sip->to == bsp->length-1) {
1420             return TRUE;
1421         }
1422     }
1423     return FALSE;
1424 }
1425 //LCOV_EXCL_STOP
1426 /*****************************************************************************
1427 *
1428 *   Find all the OrgRefs
1429 *
1430 *****************************************************************************/
FindOrg(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1431 void FindOrg (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1432 {
1433     ValNodePtr vnp, vnp0;
1434     BioseqPtr bsp;
1435     BioseqSetPtr bssp;
1436     OrgRefPtr orp;
1437     SeqAnnotPtr sap, ap;
1438     SeqFeatPtr sfp;
1439     ToAsn3Ptr tap;
1440     Uint1        mol = 0, meth = 0;
1441     ValNodePtr    mod = NULL, org;
1442     ImpFeatPtr    imp;
1443     Boolean        info = FALSE;
1444     Int4        len;
1445     Boolean     whole = FALSE;
1446     GBQualPtr    q;
1447 
1448     tap = (ToAsn3Ptr)data;
1449 
1450     if (tap->had_biosource)
1451         return;
1452 
1453     if (IS_Bioseq(sep))
1454     {
1455         bsp = (BioseqPtr)(sep->data.ptrvalue);
1456         vnp = bsp->descr;
1457         sap = bsp->annot;
1458         len = bsp->length;
1459     }
1460     else
1461     {
1462         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1463         vnp = bssp->descr;
1464         sap = bssp->annot;
1465     }
1466 
1467     vnp0 = vnp;
1468 //LCOV_EXCL_START
1469 // used for rescuing biosource and molinfo from modif, mol-type, and method descriptors,
1470 // which are obsolete
1471     while (vnp != NULL) {
1472         if (vnp->choice == Seq_descr_org) {
1473             org = vnp;
1474             orp = (OrgRefPtr)(vnp->data.ptrvalue);
1475             AddOrgToFix(orp, tap, mod, sep, org, NULL, index);
1476         }
1477         if (vnp->choice == Seq_descr_mol_type) {
1478             mol = vnp->data.intvalue;
1479             if (mol != 0) {
1480                 info = TRUE;
1481             }
1482         } else if (vnp->choice == Seq_descr_method) {
1483             meth = vnp->data.intvalue;
1484             if (meth != 0) {
1485                 info = TRUE;
1486             }
1487         } else if (vnp->choice == Seq_descr_modif) {
1488             mod = vnp->data.ptrvalue;
1489             info = TRUE;
1490         } else if (vnp->choice == Seq_descr_source) {
1491             tap->had_biosource = TRUE;
1492             return;
1493         }
1494         vnp = vnp->next;
1495     }
1496     if (info) {
1497         AddMolToFix(tap, sep, mol, mod, meth, index);
1498     }
1499 //LCOV_EXCL_STOP
1500     for (ap = sap; ap; ap = ap->next) {
1501         if (ap->type != 1) {  /* feature table */
1502             continue;
1503         }
1504         for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
1505             if (sfp->data.choice == SEQFEAT_ORG) {
1506                 orp = (OrgRefPtr)(sfp->data.value.ptrvalue);
1507                 if ((bsp = BioseqFind(SeqLocId(sfp->location))) != NULL) {
1508                     whole = check_whole(sfp, bsp->length);
1509                 }
1510                 if (whole) {
1511                     AddOrgToFix(orp, tap, NULL, sep, vnp0, NULL, index);
1512                 } else {
1513                     AddOrgToFix(orp, tap, NULL, sep, NULL, sfp, index);
1514                 }
1515             }
1516             if (sfp->data.choice == SEQFEAT_IMP) {
1517                 imp = (ImpFeatPtr)(sfp->data.value.ptrvalue);
1518                 if (StringCmp(imp->key, "source") == 0) {
1519                     for(q = sfp->qual; q != NULL; q = q->next) {
1520                         if (StringCmp(q->qual, "organism") == 0) {
1521                             break;
1522                         }
1523                     }
1524                     if (q == NULL) {
1525                         continue;
1526                     }
1527                     if ((bsp = BioseqFind(SeqLocId(sfp->location))) != NULL) {
1528                         whole = check_whole(sfp, bsp->length);
1529                     }
1530                     if (whole) {
1531                         AddImpToFix(sfp, tap, sep, vnp0, NULL, index);
1532                     } else {
1533                         AddImpToFix(sfp, tap, sep, NULL, sfp, index);
1534                     }
1535                 }
1536             }
1537             whole = FALSE;
1538         }
1539     }
1540     return;
1541 }
1542 
1543 /***********************************************************************
1544 *    0     same organisms
1545 *  -1    different organisms
1546 ************************************************************************/
1547 
BSComparison(BioSourcePtr one,BioSourcePtr two)1548 Int4 BSComparison(BioSourcePtr one, BioSourcePtr two)
1549 {
1550     OrgRefPtr orp1, orp2;
1551     OrgNamePtr onp1, onp2;
1552     OrgModPtr omp1, omp2;
1553     SubSourcePtr ssp1, ssp2;
1554     CharPtr name1 = NULL, name2 = NULL;
1555     Int4        i, retval = -1;
1556 
1557     if (one == NULL || two == NULL)
1558         return -1;
1559     if ((orp1 = one->org) == NULL)
1560         return -1;
1561     if ((orp2 = two->org) == NULL)
1562         return -1;
1563     if ((name1 = orp1->taxname) == NULL)
1564         return -1;
1565     if (*name1 == '\0') {
1566         ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1567         return -1;
1568     }
1569     if ((name2 = orp2->taxname) == NULL)
1570         return -1;
1571     if (*name2 == '\0') {
1572         ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1573         return -1;
1574     }
1575 /*  Strip organelle from organism */
1576     for (i = 0; i < num_organelle; i++) {
1577         if (StringNCmp(name1, organelle[i].name,
1578                 StringLen(organelle[i].name)) == 0) {
1579             name1 += StringLen(organelle[i].name);
1580         }
1581         if (StringNCmp(name2, organelle[i].name,
1582                 StringLen(organelle[i].name)) == 0) {
1583             name2 += StringLen(organelle[i].name);
1584         }
1585     }
1586     for (; name1 != NULL && *name1 == ' '; name1++) continue;
1587     for (; name2 != NULL && *name2 == ' '; name2++) continue;
1588     if (StringICmp(name2, name1) == 0) {
1589         retval = 0;
1590     } else {
1591             ErrPostEx(SEV_ERROR, ERR_ORGANISM_Diff,
1592      "Different organisms in one entry: %s|%s", name2, name1);
1593          retval = -1;
1594     }
1595 
1596 /* Compare clones - now all subsource and orgmod modifiers */
1597 
1598     for (ssp1 = one->subtype, ssp2 = two->subtype;
1599         ssp1 != NULL && ssp2 != NULL;
1600         ssp1 = ssp1->next, ssp2 = ssp2->next) {
1601         if (ssp1->subtype != ssp2->subtype) return -1;
1602         if (StringICmp (ssp1->name, ssp2->name) != 0) return -1;
1603     }
1604     if (ssp1 != NULL || ssp2 != NULL) return -1;
1605 
1606     onp1 = orp1->orgname;
1607     onp2 = orp2->orgname;
1608     if (onp1 == NULL || onp2 == NULL) return retval;
1609 
1610     for (omp1 = onp1->mod, omp2 = onp2->mod;
1611         omp1 != NULL && omp2 != NULL;
1612         omp1 = omp1->next, omp2 = omp2->next) {
1613         if (omp1->subtype != omp2->subtype) return -1;
1614         if (StringICmp (omp1->subname, omp2->subname) != 0) return -1;
1615     }
1616     if (omp1 != NULL || omp2 != NULL) return -1;
1617 
1618     return retval;
1619 }
1620 
1621 //LCOV_EXCL_START
BSComparisonEx(BioSourcePtr one,BioSourcePtr two,Boolean clone)1622 Int4 BSComparisonEx(BioSourcePtr one, BioSourcePtr two, Boolean clone)
1623 {
1624     OrgRefPtr orp1, orp2;
1625     SubSourcePtr ssp1, ssp2;
1626     CharPtr name1 = NULL, name2 = NULL;
1627     CharPtr subname1 = NULL, subname2 = NULL;
1628     Int4        i, retval = -1;
1629 
1630     if (one == NULL || two == NULL)
1631         return -1;
1632     if ((orp1 = one->org) == NULL)
1633         return -1;
1634     if ((orp2 = two->org) == NULL)
1635         return -1;
1636     if ((name1 = orp1->taxname) == NULL)
1637         return -1;
1638     if (*name1 == '\0') {
1639         ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1640         return -1;
1641     }
1642     if ((name2 = orp2->taxname) == NULL)
1643         return -1;
1644     if (*name2 == '\0') {
1645         ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "empty organism in source feature");
1646         return -1;
1647     }
1648 /*  Strip organelle from organism */
1649     for (i = 0; i < num_organelle; i++) {
1650         if (StringNCmp(name1, organelle[i].name,
1651                 StringLen(organelle[i].name)) == 0) {
1652             name1 += StringLen(organelle[i].name);
1653         }
1654         if (StringNCmp(name2, organelle[i].name,
1655                 StringLen(organelle[i].name)) == 0) {
1656             name2 += StringLen(organelle[i].name);
1657         }
1658     }
1659     for (; name1 != NULL && *name1 == ' '; name1++) continue;
1660     for (; name2 != NULL && *name2 == ' '; name2++) continue;
1661     if (StringICmp(name2, name1) == 0) {
1662         retval = 0;
1663     } else {
1664             ErrPostEx(SEV_ERROR, ERR_ORGANISM_Diff,
1665      "Different organisms in one entry: %s|%s", name2, name1);
1666          retval = -1;
1667     }
1668 
1669 /* Compare clones */
1670     for (ssp1 = one->subtype; ssp1; ssp1= ssp1->next) {
1671         if (ssp1->subtype == 3) {  /* clone */
1672             subname1 = ssp1->name;
1673         }
1674     }
1675     for (ssp2 = two->subtype; ssp2; ssp2= ssp2->next) {
1676         if (ssp2->subtype == 3) {  /* clone */
1677             subname2 = ssp2->name;
1678         }
1679     }
1680     if (clone) {
1681         if (subname1 == NULL || subname2 == NULL) {
1682             return retval;
1683         }
1684     }
1685     if (StringCmp(subname1, subname2) != 0) {
1686         return -1;
1687     }
1688 /* Compare notes (that are kludged to subtype 'other' */
1689     for (ssp1 = one->subtype; ssp1; ssp1= ssp1->next) {
1690         if (ssp1->subtype == 255) {  /* other */
1691             subname1 = ssp1->name;
1692         }
1693     }
1694     for (ssp2 = two->subtype; ssp2; ssp2= ssp2->next) {
1695         if (ssp2->subtype == 255) {  /* other */
1696             subname2 = ssp2->name;
1697         }
1698     }
1699     if (clone) {
1700         if (subname1 == NULL || subname2 == NULL) {
1701             return retval;
1702         }
1703     }
1704     if (StringCmp(subname1, subname2) != 0) {
1705         return -1;
1706     }
1707     return retval;
1708 }
1709 //LCOV_EXCL_STOP
1710 
GetQualValue(GBQualPtr gbqual,CharPtr qual)1711 static CharPtr GetQualValue(GBQualPtr gbqual, CharPtr qual)
1712 {
1713     GBQualPtr    q;
1714     CharPtr     value = NULL;
1715 
1716         for(q = gbqual; q != NULL; q = q->next) {
1717             if (StringCmp(q->qual, qual) == 0) {
1718                 value = q->val;
1719                 break;
1720             }
1721         }
1722         return value;
1723 }
1724 
1725 /*        mapping from source feature qualifiers and comments */
CheckQualsWithComm(BioSourcePtr bsp,SeqFeatPtr sfp)1726 void CheckQualsWithComm(BioSourcePtr bsp, SeqFeatPtr sfp)
1727 {
1728     CharPtr    tmp;
1729     OrgModPtr    omp = NULL;
1730     OrgNamePtr    onp = NULL;
1731     OrgRefPtr    orp;
1732 
1733     if (bsp == NULL)
1734         return;
1735     if (sfp == NULL)
1736         return;
1737     if (bsp->org == NULL)
1738         return;
1739     CheckQuals(bsp, sfp->qual);
1740     if (sfp->comment != NULL) {
1741         tmp  = MemNew(StringLen(sfp->comment) +1);
1742         StringCpy(tmp, sfp->comment);
1743         orp = (OrgRefPtr) bsp->org;
1744         onp = bsp->org->orgname;
1745         if (onp == NULL) {
1746             onp = OrgNameNew();
1747         }
1748         omp = OrgModNew();
1749         omp->subtype = 255;
1750         omp->subname = StringSave(tmp);
1751         onp->mod = tie_next_OrgMod(onp->mod, omp);
1752         MemFree(tmp);
1753     }
1754     if (onp != NULL) {
1755         bsp->org->orgname = onp;
1756     }
1757     return;
1758 }
1759 
CheckQuals(BioSourcePtr bsp,GBQualPtr qsfp)1760 void CheckQuals(BioSourcePtr bsp, GBQualPtr qsfp)
1761 {
1762     GBQualPtr     q;
1763     static Char    msg[51];
1764     Uint1        i;
1765     SubSourcePtr ssp;
1766     OrgModPtr    omp = NULL;
1767     OrgNamePtr    onp;
1768     OrgRefPtr    orp;
1769 
1770 
1771     if (bsp == NULL)
1772         return;
1773     if (bsp->org == NULL)
1774         return;
1775     orp = (OrgRefPtr) bsp->org;
1776     onp = bsp->org->orgname;
1777     if (onp)
1778         omp = onp->mod;
1779     for (q = qsfp; q != NULL; q=q->next) {
1780         if (StringCmp(q->qual, "organism") == 0) {
1781             continue;
1782         }
1783 
1784         if (StringCmp(q->qual, "note") == 0) {
1785             if (onp == NULL) {
1786                 onp = OrgNameNew();
1787             }
1788             omp = OrgModNew();
1789             omp->subtype = 255;
1790             omp->subname = StringSave(q->val);
1791             onp->mod = tie_next_OrgMod(onp->mod, omp);
1792         }
1793         for (i = 0; i < num_bad_quals && q->qual != NULL; i++) {
1794             if (StringCmp(bad_quals[i], q->qual) == 0) {
1795             StringNCpy(msg, q->val, 50);
1796             ErrPostEx(SEV_WARNING, ERR_SOURCE_UnwantedQualifiers,
1797          "Unwanted qualifier on source feature: %s=%s", q->qual, msg);
1798              continue;
1799             }
1800         }
1801         for (i = 0; i < num_genome && q->qual != NULL; i++) {
1802             if (StringCmp(genome[i], q->qual) == 0) {
1803                 if (!bsp->genome) {
1804                     bsp->genome = i;
1805                     break;
1806                 } else if (bsp->genome == 5 && i == 4) {
1807                     bsp->genome = i;
1808                     break;
1809                 }
1810             }
1811         }
1812         for (i = 0; i < num_subtype && q->qual != NULL; i++) {
1813             if (StringCmp(subtype[i], q->qual) == 0) {
1814                 ssp = SubSourceNew();
1815                 ssp->subtype = (Uint1) (i+1);
1816                 if (q->val == NULL) {
1817                     ssp->name = StringSave("");
1818                 } else {
1819                     ssp->name = StringSave(q->val);
1820                 }
1821                 bsp->subtype = tie_next_subtype(bsp->subtype, ssp);
1822                 break;
1823             }
1824         }
1825         for (i=0; orgmod_subtype[i].name != NULL; i++) {
1826             if (StringCmp(q->qual, "organism") == 0) {
1827                 continue;
1828             }
1829             if (StringCmp(q->qual, orgmod_subtype[i].name) == 0) {
1830                 if (onp == NULL) {
1831                     onp = OrgNameNew();
1832                 }
1833 /* *******************************************************************
1834 We need to find the OrgName here. Now it's optional. Tatiana 10.21.94
1835                     onp->choice = 2;                    (virus)
1836                     onp->data = Nlm_StringSave("proba");
1837 ******************************************************************* */
1838                 omp = OrgModNew();
1839                 omp->subtype = (Uint1) orgmod_subtype[i].num;
1840                 omp->subname = StringSave(q->val);
1841                 onp->mod = tie_next_OrgMod(onp->mod, omp);
1842                 break;
1843             }
1844         }
1845     }
1846     if (onp != NULL) {
1847         bsp->org->orgname = onp;
1848     }
1849     return;
1850 }
1851 
1852 //LCOV_EXCL_START
1853 // used for rescuing molinfo from modif, mol-type, and method descriptors,
1854 // which are obsolete
new_info(MolInfoPtr mfi)1855 MolInfoPtr new_info(MolInfoPtr mfi)
1856 {
1857     return (mfi == NULL) ? MolInfoNew() : mfi;
1858 }
1859 
1860 /*****************************************************************************/
1861 // used for rescuing molinfo from modif, mol-type, and method descriptors,
1862 // which are obsolete
ModToMolInfo(MolInfoPtr mfi,Uint1 mod)1863 MolInfoPtr ModToMolInfo(MolInfoPtr mfi, Uint1 mod)
1864 {
1865 
1866         switch(mod) {
1867             case 10:
1868                 mfi = new_info(mfi);
1869                 mfi->completeness = 2;
1870                 break;
1871             case 11:
1872                 mfi = new_info(mfi);
1873                 mfi->completeness = 1;
1874                 break;
1875             case 16:
1876                 mfi = new_info(mfi);
1877                 mfi->completeness = 3;
1878                 break;
1879             case 17:
1880                 mfi = new_info(mfi);
1881                 mfi->completeness = 4;
1882                 break;
1883             case 20:
1884                 mfi = new_info(mfi);
1885                 mfi->tech = 2;
1886                 break;
1887             case 21:
1888                 mfi = new_info(mfi);
1889                 mfi->tech = 3;
1890                 break;
1891             case 22:
1892                 mfi = new_info(mfi);
1893                 mfi->tech = 4;
1894                 break;
1895             default:
1896                 break;
1897         }
1898         return mfi;
1899 }
1900 
1901 /*****************************************************************************/
1902 // used for rescuing BioSource from modif, mol-type, and method descriptors,
1903 // which are obsolete
ModToBiosource(BioSourcePtr bsp,Uint1 mod)1904 void ModToBiosource(BioSourcePtr bsp, Uint1 mod)
1905 {
1906         switch(mod) {
1907             case 2:
1908                 bsp->genome = 8;   /* extrachrom */
1909                 break;
1910             case 3:
1911                 bsp->genome = 9;    /*plasmid */
1912                 break;
1913             case 4:
1914                 bsp->genome = 5;    /* mitochondrion */
1915                 break;
1916             case 5:
1917                 bsp->genome = 2;    /* chloroplast */
1918                 break;
1919             case 6:
1920                 bsp->genome = 4;    /* kinetoplast */
1921                 break;
1922             case 7:
1923                 bsp->genome = 12;    /* cyanelle */
1924                 break;
1925             case 8:
1926                 bsp->origin = 5;     /* synthetic */
1927                 break;
1928             case 12:
1929                 bsp->origin = 3;    /* mutagen */
1930                 break;
1931             case 13:
1932                 bsp->origin = 2;    /* natmut */
1933                 break;
1934             case 14:
1935                 bsp->genome = 10;    /*transposon */
1936                 break;
1937             case 15:
1938                 bsp->genome = 11;    /* insertion-seq */
1939                 break;
1940             case 18:
1941                 bsp->genome = 7;    /*macronuclear */
1942                 break;
1943             case 19:
1944                 bsp->genome = 13;    /* proviral*/
1945                 break;
1946             case 23:
1947                 bsp->genome = 3;    /* chromoplast */
1948                 break;
1949             default:
1950                 break;
1951         }
1952         return;
1953 }
1954 
1955 /*****************************************************************************
1956 *
1957 *  if no BioSource found on descr level and feature Biosource found
1958 *    move it to the top
1959 *    (stop using 05-09-96)
1960 *****************************************************************************/
CkOrg(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1961 void CkOrg (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1962 {
1963     ToAsn3Ptr    tap;
1964     ValNodePtr    vnp, tmp;
1965     SeqFeatPtr    sfp, tmp_sfp = NULL;
1966     SeqAnnotPtr    sap;
1967     BioseqPtr    bsp = NULL;
1968     BioseqSetPtr    bssp = NULL;
1969 
1970     tap = (ToAsn3Ptr)data;
1971     if (!tap->had_biosource)
1972         return;
1973 
1974     if (IS_Bioseq(sep)) {
1975         bsp = (BioseqPtr)(sep->data.ptrvalue);
1976         vnp = bsp->descr;
1977         sap = bsp->annot;
1978     } else {
1979         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1980         vnp = bssp->descr;
1981         sap = bssp->annot;
1982     }
1983 
1984     for (tmp = vnp; tmp != NULL; tmp = tmp->next) {
1985         if (tmp->choice == Seq_descr_source) {
1986             break;
1987         }
1988     }
1989     if (tmp != NULL) {
1990         return;
1991     }
1992     if (sap == NULL || sap->type != 1) {
1993         return;
1994     }
1995     tmp_sfp = (SeqFeatPtr) (sap->data);
1996     sfp = SeqFeatExtractList(&(tmp_sfp), SEQFEAT_BIOSRC);
1997     if (sfp != NULL) {
1998         tmp = SeqDescrNew(vnp);
1999         tmp->choice = Seq_descr_source;
2000         tmp->data.ptrvalue = AsnIoMemCopy(sfp->data.value.ptrvalue,
2001         (AsnReadFunc) BioSourceAsnRead, (AsnWriteFunc) BioSourceAsnWrite);
2002         SeqFeatFree(sfp);
2003     }
2004     sap->data = tmp_sfp;
2005     if (tmp_sfp == NULL) {
2006         if (bsp != NULL) {
2007             bsp->annot = NULL;
2008         } else if (bssp != NULL) {
2009             bssp->annot = NULL;
2010         }
2011     }
2012 }
2013 //LCOV_EXCL_STOP
2014 
2015 /**************************************************************************
2016 *    Compare BioSources in one bioseq->descr,
2017 *    merge if organisms are the same or create a feature if different
2018 *
2019 **************************************************************************/
MergeBSinDescr(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2020 void MergeBSinDescr (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2021 {
2022     BioseqPtr bsp = NULL;
2023     ValNodePtr vnp, v, vnext, mult;
2024     SeqAnnotPtr sap = NULL;
2025     SeqIdPtr        sip;
2026     SeqFeatPtr        sfp;
2027     BioSourcePtr bsrc = NULL, bs;
2028 
2029     if (!IS_Bioseq(sep)) {
2030         return;
2031     }
2032     mult = (ValNodePtr) data;
2033     bsp = (BioseqPtr) sep->data.ptrvalue;
2034     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
2035             && (bsp->repr != Seq_repr_delta))
2036         return;
2037 
2038     if (! ISA_na(bsp->mol))
2039         return;
2040 
2041     sap = bsp->annot;
2042     bsp->descr = tie_next(bsp->descr, mult);
2043     for (vnp = bsp->descr; vnp; vnp= vnp->next) {
2044         if (vnp->choice == Seq_descr_source) {
2045             bsrc = vnp->data.ptrvalue;
2046             break;
2047         }
2048     }
2049     if (bsrc == NULL || bsrc->org == NULL) {
2050         return;
2051     }
2052     for (v = vnp->next; v; v = vnext) {
2053         vnext = v->next;
2054         if (v->choice != Seq_descr_source) {
2055             continue;
2056         }
2057         bs = v->data.ptrvalue;
2058         if (bs->org != NULL) {
2059             if (bsrc && CmpOrgById(bsrc, bs) == TRUE) {
2060                 bsrc = BioSourceMerge(bsrc, bs);
2061             } else {
2062                 sfp = SeqFeatNew();
2063                 sfp->location = ValNodeNew(NULL);
2064                 sfp->location->choice = SEQLOC_WHOLE;
2065                 sip = SeqIdDup(bsp->id);
2066                 sfp->location->data.ptrvalue = sip ;
2067                 sfp->data.choice = SEQFEAT_BIOSRC;
2068                 sfp->data.value.ptrvalue =
2069                     AsnIoMemCopy(bs, (AsnReadFunc) BioSourceAsnRead,
2070                                        (AsnWriteFunc) BioSourceAsnWrite);
2071                    if (sap == NULL) {
2072                        sap = SeqAnnotNew();
2073                        sap->type = 1;
2074                        bsp->annot = sap;
2075                    }
2076                    sap->data = tie_feat(sap->data, sfp);
2077             }
2078         } else {
2079             ErrPostStr(SEV_WARNING, ERR_ORGANISM_Empty, "Biosource missing Organism info");
2080         }
2081         BioSourceFree(bs);
2082         vnp->next = remove_node(vnp->next, v);
2083 
2084     }
2085     return;
2086 }
2087 /********************************************************************************    Move Biosource to nuc-prot set level
2088 *******************************************************************************/
FixNucProtSet(SeqEntryPtr sep)2089 Int4 FixNucProtSet(SeqEntryPtr sep)
2090 {
2091     BioseqSetPtr bssp, bseg;
2092     BioseqPtr bsp = NULL, prot = NULL;
2093     ValNodePtr descr = NULL;
2094     ValNodePtr tmp, vnp, v, vnext;
2095     BioSourcePtr bsrc = NULL, bs;
2096     SeqEntryPtr seqsep, s;
2097     SeqAnnotPtr sap = NULL;
2098     SeqIdPtr        sip;
2099     SeqFeatPtr        sfp;
2100     Int4             retval = INFO_ASNOLD;
2101     Boolean            bSingle = FALSE;
2102 
2103 
2104     if (IS_Bioseq(sep)) {
2105         return retval;
2106     }
2107     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2108     if (bssp->_class != 1) {    /*  do the rest for nuc-prot only */
2109         return retval;
2110     }
2111     seqsep = bssp->seq_set;
2112     if (seqsep == NULL) {
2113         return retval;
2114     }
2115     if (seqsep->choice == 1) {   /* single bioseq */
2116         bsp = (BioseqPtr) seqsep->data.ptrvalue;
2117         descr = bsp->descr;
2118         sap = bsp->annot;
2119         bSingle = TRUE;
2120     } else if (seqsep->choice == 2) { /* segmented set */
2121         bseg = (BioseqSetPtr) seqsep->data.ptrvalue;
2122 /*   quick fix of core dump in segmented sets with multiple organisms
2123     BIOSOURCE feature is created on main segmeted bioseq (not parts) !*/
2124         s = bseg->seq_set;
2125         if (s != NULL) {
2126             bsp = (BioseqPtr) s->data.ptrvalue;
2127         }
2128         descr = bseg->descr;
2129         sap = bseg->annot;
2130     }
2131     if (descr == NULL) {
2132         return retval;        /* nothing to move */
2133     }
2134     for (vnp = bssp->descr; vnp; vnp = vnp->next) {  /* nucprot set level */
2135         if (vnp->choice == Seq_descr_source) {
2136             bsrc = vnp->data.ptrvalue;
2137             break;
2138         }
2139     }
2140     for (v = descr; v; v = vnext) {            /* from bioseq or BioseqSet */
2141         vnext = v->next;
2142         if (v->choice != Seq_descr_source) {
2143             continue;
2144         }
2145         bs = v->data.ptrvalue;
2146         if (bsrc == NULL) {
2147             bsrc = BioSourceMerge(bsrc, bs);
2148             tmp = SeqDescrAdd(&(bssp->descr));
2149             tmp->choice = Seq_descr_source;
2150             tmp->data.ptrvalue = bsrc;
2151         } else if (CmpOrgById(bsrc, bs) == TRUE) {
2152             bsrc = BioSourceMerge(bsrc, bs);
2153         } else if (bsp != NULL) {
2154             sfp = SeqFeatNew();
2155             sfp->location = ValNodeNew(NULL);
2156             sfp->location->choice = SEQLOC_WHOLE;
2157             sip = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
2158             sfp->location->data.ptrvalue = sip;
2159             sfp->data.choice = SEQFEAT_BIOSRC;
2160             sfp->data.value.ptrvalue =
2161                 AsnIoMemCopy(bs, (AsnReadFunc) BioSourceAsnRead,
2162                                    (AsnWriteFunc) BioSourceAsnWrite);
2163                if (sap == NULL) {
2164                    sap = SeqAnnotNew();
2165                    sap->type = 1;
2166                }
2167                sap->data = tie_feat(sap->data, sfp);
2168         }
2169         BioSourceFree(bs);
2170         descr = remove_node(descr, v);
2171 
2172     }
2173 /* remove Biosource from protein sequence if it's there
2174     merging BioSource with the one on the top level*/
2175     for (s = seqsep->next; s; s = s->next) {
2176         prot = s->data.ptrvalue;
2177         vnp = ValNodeExtractList(&prot->descr, Seq_descr_source);
2178         if (vnp != NULL) {
2179             bs = vnp->data.ptrvalue;
2180             if (bsrc == NULL) {
2181                 bsrc = BioSourceMerge(bsrc, bs);
2182                 tmp = SeqDescrNew(bssp->descr);
2183                 tmp->choice = Seq_descr_source;
2184                 tmp->data.ptrvalue = bsrc;
2185             } else if (CmpOrgById(bsrc, bs) == TRUE) {
2186                 bsrc = BioSourceMerge(bsrc, bs);
2187              } else {
2188                  ErrPostStr(SEV_ERROR, ERR_ORGANISM_Diff,
2189                  "ATTENTION: different organisms in nuc-prot set");
2190                  retval = ERR_REJECT;
2191              }
2192              if (retval == ERR_REJECT) {
2193                  prot->descr = ValNodeLink(&prot->descr, vnp);
2194              } else {
2195                  BioSourceFree(bs);
2196                  ValNodeFree(vnp);
2197              }
2198         }
2199     }
2200     if (bSingle) {
2201         bsp->descr = descr;
2202         bsp->annot = sap;
2203     } else {
2204         bseg->descr = descr;
2205         bseg->annot = sap;
2206     }
2207     return retval;
2208 }
2209 
2210 /*****************************************************************************
2211 *    check BioSource descr for the parts of segmented set,
2212 *    if organisms  are the same and no "clone" Biosources are the same
2213 *    they would be deleted from parts in the next SeqEntryExplore
2214 *    if different BioSource from the top would be deleted
2215 *****************************************************************************/
CheckBS(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2216 void CheckBS (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2217 {
2218     BioseqSetPtr    bssp, tmp;
2219     SeqEntryPtr         segsep, parts;
2220     BSMapPtr        bmp;
2221 
2222     bmp = data;
2223     if (bmp->same == FALSE) {
2224         return;
2225     }
2226     if (IS_Bioseq(sep)) {
2227         return;
2228     }
2229     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2230     if (bssp->_class != 2) {    /*  do the rest for segset only */
2231         if (bssp->_class >= 7) {
2232             bmp->same = FALSE;   /* for other sets organisms are different */
2233         }
2234         return;
2235     }
2236     segsep = bssp->seq_set;
2237     if (segsep->next == NULL) {
2238         return;
2239     }
2240     if (!IS_Bioseq(segsep->next)) {
2241         tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
2242         parts = tmp->seq_set;
2243         if (parts == NULL) {
2244             return;
2245         }
2246         bmp->same = CheckSegDescrChoice(parts, Seq_descr_source);
2247     }
2248 
2249 }
2250 
2251 //LCOV_EXCL_START
seq_loc_compare(SeqLocPtr a,SeqLocPtr b)2252 Int2 seq_loc_compare( SeqLocPtr a, SeqLocPtr b)
2253 {
2254     Int2 retval = -1;
2255     Int4 a_strt, a_stop, b_strt, b_stop;
2256     SeqIdPtr    a_sip, b_sip;
2257 
2258     retval = SeqLocCompare(a, b);
2259     if (retval > 0) {
2260         return retval;
2261     }
2262     a_sip = SeqLocId(a);
2263     b_sip = SeqLocId(b);
2264     if (SeqIdForSameBioseq(a_sip, b_sip)) {
2265         a_strt = SeqLocStart(a);
2266         a_stop = SeqLocStop(a);
2267         b_strt = SeqLocStart(b);
2268         b_stop = SeqLocStop(b);
2269         if (a_stop+1 == b_strt || b_stop+1 == a_strt)
2270             retval = 5;
2271     }
2272     return retval;
2273 }
2274 
compare_quals(GBQualPtr PNTR qual1,GBQualPtr PNTR qual2)2275 void compare_quals(GBQualPtr PNTR qual1, GBQualPtr PNTR qual2)
2276 {
2277     GBQualPtr q1, q2, q2next;
2278 
2279     for (q1 = *qual1; q1 != NULL; q1 = q1->next) {
2280         for (q2 = *qual2; q2 != NULL; q2 = q2next) {
2281             q2next = q2->next;
2282             if ((StringCmp(q1->qual, q2->qual) == 0) &&
2283                 (StringCmp(q1->val, q2->val) == 0)) {
2284                 *qual2 = remove_qual(*qual2, q2);
2285             }
2286         }
2287     }
2288 }
2289 
feat_join(SeqFeatPtr f1,SeqFeatPtr f2,SeqFeatPtr head)2290 Boolean feat_join(SeqFeatPtr f1, SeqFeatPtr f2, SeqFeatPtr head)
2291 {
2292     Boolean new = FALSE;
2293     Int2    comp;
2294     Int4 a_strt, a_stop, b_strt, b_stop, a, b;
2295     GBQualPtr q1, q2, fq, q1next, q2next;
2296     SeqFeatPtr f;
2297     ImpFeatPtr imp;
2298     SeqLocPtr slp;
2299     SeqIntPtr sip, f1_sip;
2300     Boolean nmatch = FALSE;
2301 
2302     comp = seq_loc_compare(f1->location, f2->location);
2303     switch (comp)
2304     {
2305         case 0:
2306             break;
2307         case 1:
2308             for (q2 = f2->qual; q2 != NULL; q2 = q2->next) {
2309                 for (q1 = f1->qual; q1 != NULL; q1 = q1next) {
2310                     q1next = q1->next;
2311                     if ((StringCmp(q1->qual, q2->qual) == 0) &&
2312                         (StringCmp(q1->val, q2->val) == 0)) {
2313                         f1->qual = remove_qual(f1->qual, q1);
2314                     }
2315                 }
2316             }
2317             break;
2318         case 3:
2319             for (q1 = f1->qual; q1 != NULL; q1 = q1->next) {
2320                 for (q2 = f2->qual; q2 != NULL; q2 = q2next) {
2321                     q2next = q2->next;
2322                     if ((StringCmp(q1->qual, q2->qual) == 0) &&
2323                         (StringCmp(q1->val, q2->val) == 0)) {
2324                         continue;
2325                     } else {
2326                         nmatch = TRUE;
2327                         break;
2328                     }
2329                 }
2330             }
2331             if (nmatch) {
2332                 GBQualFree(f2->qual);
2333             ErrPostStr(SEV_WARNING, ERR_SOURCE_DiffQualifiers,
2334          "Identical source features with unmatching qualifiers");
2335             } else {
2336                 GBQualFree(f2->qual);
2337             ErrPostStr(SEV_WARNING, ERR_SOURCE_Identical,
2338          "Identical source features: one is removed");
2339             }
2340         case 2:
2341             for (q1 = f1->qual; q1 != NULL; q1 = q1->next) {
2342                 for (q2 = f2->qual; q2 != NULL; q2 = q2next) {
2343                     q2next = q2->next;
2344                     if ((StringCmp(q1->qual, q2->qual) == 0) &&
2345                         (StringCmp(q1->val, q2->val) == 0)) {
2346                         f2->qual = remove_qual(f2->qual, q2);
2347                     }
2348                 }
2349             }
2350             break;
2351         case 4:
2352         case 5:
2353             a_strt = SeqLocStart(f1->location);
2354             a_stop = SeqLocStop(f1->location);
2355             b_strt = SeqLocStart(f2->location);
2356             b_stop = SeqLocStop(f2->location);
2357             a = a_strt;
2358             if (b_strt < a_strt)
2359                 a = b_strt;
2360             b = a_stop;
2361             if (b_stop > a_stop)
2362                 b = b_stop;
2363             f = SeqFeatNew();
2364             imp = ImpFeatNew();
2365             imp->key = StringSave("source");
2366             slp = ValNodeNew(NULL);
2367             slp->choice = SEQLOC_INT;
2368             sip = SeqIntNew();
2369             f1_sip = (SeqIntPtr) (f1->location)->data.ptrvalue;
2370             sip->id = SeqIdDup(f1_sip->id);
2371             sip->from = a;
2372             sip->to = b;
2373             slp->data.ptrvalue = sip;
2374             f->location = slp;
2375             for (q1=f1->qual; q1 != NULL; q1 = q1next) {
2376                 q1next = q1->next;
2377                 for (q2=f2->qual; q2 != NULL; q2 = q2next) {
2378                     q2next = q2->next;
2379                     if ((StringCmp(q1->qual, q2->qual) == 0) &&
2380                         (StringCmp(q1->val, q2->val) == 0)) {
2381                         fq = GBQualNew();
2382                         fq->qual = q1->qual;
2383                         q1->qual = NULL;
2384                         fq->val = q1->val;
2385                         q1->val = NULL;
2386                         f->qual = fq;
2387                         f1->qual = remove_qual(f2->qual, q2);
2388                         f2->qual = remove_qual(f1->qual, q1);
2389                     }
2390                 }
2391             }
2392             head = tie_feat(head, f);
2393             new = TRUE;
2394             break;
2395         default:
2396             break;
2397     }
2398     return new;
2399 
2400 }
2401 
count_join(SeqFeatPtr f1,SeqFeatPtr f2)2402 void count_join(SeqFeatPtr f1, SeqFeatPtr f2)
2403 {
2404     Int2    comp, nq1, nq2;
2405     GBQualPtr q1, q2;
2406     static Char msg1[51], msg2[51];
2407 
2408     comp = seq_loc_compare(f1->location, f2->location);
2409     if (comp != 3) {
2410         return;
2411     }
2412     StringNCpy(msg1, SeqLocPrint(f1->location), 50);
2413     StringNCpy(msg2, SeqLocPrint(f2->location), 50);
2414     for (q1 = f1->qual, nq1 = 0; q1 != NULL; q1 = q1->next, nq1++) {
2415     }
2416     for (q2 = f2->qual, nq2 = 0; q2 != NULL; q2 = q2->next, nq2++) {
2417     }
2418     if (nq1 != nq2) {
2419         ErrPostEx(SEV_WARNING, ERR_SOURCE_DiffQualifiers,
2420      "Identical source features with unmatching number of qualifiers %s|%s",
2421              msg1, msg2);
2422          return;
2423     }
2424     for (q1 = f1->qual, nq1 = 0; q1 != NULL; q1 = q1->next, nq1++) {
2425         for (q2 = f2->qual; q2 != NULL; q2 = q2->next) {
2426             if ((StringCmp(q1->qual, q2->qual) == 0) &&
2427                 (StringCmp(q1->val, q2->val) == 0)) {
2428                 break;
2429             }
2430         }
2431         if (q2 == NULL) {
2432             ErrPostEx(SEV_WARNING, ERR_SOURCE_DiffQualifiers,
2433             "Identical source features with unmatching qualifiers %s|%s",
2434                 msg1, msg2);
2435         }
2436     }
2437     ErrPostEx(SEV_WARNING, ERR_SOURCE_Identical, "Identical source features; %s|%s",
2438         msg1, msg2);
2439 }
2440 
FindWholeBSFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2441 static void FindWholeBSFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2442 {
2443     BioseqPtr    bsp = NULL;
2444     SeqFeatPtr  sfp, fp;
2445     SeqAnnotPtr ap;
2446     WholeFeatPtr wfp;
2447     Boolean is_na = FALSE;
2448 
2449     wfp = (WholeFeatPtr) data;
2450     if (IS_Bioseq(sep)) {
2451         bsp = (BioseqPtr)(sep->data.ptrvalue);
2452         if (bsp->mol != Seq_mol_aa) {
2453             is_na = TRUE;
2454         }
2455     }
2456     if (bsp == NULL || !is_na) {
2457         return;
2458     }
2459     for (ap = bsp->annot; ap; ap = ap->next) {
2460         if (ap->type != 1) {
2461             continue;
2462         }
2463         for (sfp = ap->data; sfp != NULL; sfp=sfp->next) {
2464             if (sfp->data.choice != SEQFEAT_BIOSRC) {
2465                 continue;
2466             }
2467             if (check_whole(sfp, bsp->length) == FALSE) {
2468                 continue;
2469             } else {
2470                 wfp->count++;
2471                 fp = SeqFeatNew();
2472                 fp->data.choice = sfp->data.choice;
2473                 fp->data.value.ptrvalue = sfp->data.value.ptrvalue;
2474                 wfp->sfp = tie_feat(wfp->sfp, fp);
2475             }
2476         }
2477     }
2478 }
MergeWholeBSFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2479 static void MergeWholeBSFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2480 {
2481     BioseqPtr    bsp = NULL;
2482     ValNodePtr vnp;
2483     SeqAnnotPtr ap;
2484     SeqFeatPtr  sfp;
2485     WholeFeatPtr wfp;
2486     Boolean remove = FALSE;
2487     Boolean is_na = FALSE;
2488     BioSourcePtr dbio = NULL, fbio = NULL;
2489 
2490     wfp = (WholeFeatPtr) data;
2491     if (IS_Bioseq(sep)) {
2492         bsp = (BioseqPtr)(sep->data.ptrvalue);
2493         if (bsp->mol != Seq_mol_aa) {
2494             is_na = TRUE;
2495         }
2496     }
2497     if (bsp == NULL || !is_na) {
2498         return;
2499     }
2500     for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
2501         if (vnp->choice != Seq_descr_source) {
2502             continue;
2503         }
2504         dbio = (BioSourcePtr) vnp->data.ptrvalue;
2505         if (wfp->sfp != NULL) {
2506             fbio = (BioSourcePtr) wfp->sfp->data.value.ptrvalue;
2507         }
2508         if (dbio) {
2509             if (fbio && CmpOrgById(dbio, fbio) == TRUE) {
2510                 if (BSComparisonEx(dbio, fbio, TRUE) == 0) {
2511                     BioSourceMerge(dbio, fbio);
2512                     remove = TRUE;
2513                 }
2514             }
2515         }
2516     }
2517     if (remove == FALSE) {
2518         return;
2519     }
2520     for (ap = bsp->annot; ap; ap = ap->next) {
2521         if (ap->type != 1) {
2522             continue;
2523         }
2524         for (sfp = ap->data; sfp != NULL; sfp=sfp->next) {
2525             if (sfp->data.choice != SEQFEAT_BIOSRC) {
2526                 continue;
2527             }
2528             if (SeqMgrFeaturesAreIndexed (sfp->idx.entityID) != 0) {
2529               SeqMgrClearFeatureIndexes(sfp->idx.entityID, NULL);
2530             }
2531             ap->data = remove_feat(ap->data, sfp);
2532             break;
2533         }
2534     }
2535 }
2536 
2537 // NOTE: This never finds whole features because they were already cleaned up by
2538 // ConvertFullLenSourceFeatToDesc
CombineBSFeat(SeqEntryPtr sep)2539 void CombineBSFeat(SeqEntryPtr sep)
2540 {
2541     WholeFeatPtr wfp;
2542 
2543     wfp = WholeFeatNew();
2544     SeqEntryExplore(sep, (Pointer)wfp, FindWholeBSFeat);
2545     if (wfp->count == 1) {
2546         SeqEntryExplore(sep, (Pointer)wfp, MergeWholeBSFeat);
2547     }
2548     WholeFeatFree(wfp);
2549 }
2550 
2551 /*****************************************************************************
2552 *
2553 *  Count multiple source features print out error messages
2554 *
2555 *****************************************************************************/
CountSourceFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2556 void CountSourceFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2557 {
2558     Boolean        whole = FALSE;
2559     Int2        count=0;
2560     Int4        len;
2561     ValNodePtr    vnp;
2562     SeqFeatPtr    f, ff;
2563     SeqAnnotPtr    sap, ap;
2564     BioseqPtr    bsp = NULL;
2565     ImpFeatPtr    imp;
2566     CharPtr        f_org, ff_org;
2567     GBQualPtr    q;
2568     SeqIdPtr    sidp;
2569     TextSeqIdPtr    tsip = NULL;
2570     Pointer pnt;
2571     Boolean PNTR pultiple;
2572     Boolean     is_na = FALSE;
2573 
2574     pultiple = (Boolean PNTR) data;
2575     if (IS_Bioseq(sep)) {
2576         bsp = (BioseqPtr)(sep->data.ptrvalue);
2577         vnp = bsp->descr;
2578         sap = bsp->annot;
2579         len = bsp->length;
2580         if (bsp->mol != Seq_mol_aa) {
2581             is_na = TRUE;
2582         }
2583     }
2584     if (bsp == NULL || !is_na) {
2585         return;
2586     }
2587     for (sidp = bsp->id; sidp != NULL; sidp = sidp->next) {
2588         pnt = sidp->data.ptrvalue;
2589         switch (sidp->choice)
2590         {
2591             case SEQID_LOCAL:      /* local */
2592             case SEQID_GIBBSQ:      /* gibbseq */
2593             case SEQID_GIBBMT:      /* gibbmt */
2594             case SEQID_GIIM:      /* giimid */
2595             case SEQID_PATENT:      /* patent seq id */
2596             case SEQID_GENERAL:     /* general */
2597             case SEQID_GI:     /* gi */
2598             case SEQID_PDB:
2599             continue;
2600             case SEQID_GENBANK:      /* genbank */
2601             case SEQID_EMBL:      /* embl */
2602             case SEQID_PIR:      /* pir   */
2603             case SEQID_SWISSPROT:      /* swissprot */
2604             case SEQID_OTHER:     /* other */
2605             case SEQID_DDBJ:
2606             case SEQID_PRF:
2607                  tsip = (TextSeqIdPtr) sidp->data.ptrvalue;
2608                 break;
2609             default:
2610                 continue;
2611         }
2612         if (tsip != NULL) {
2613             flat2asn_install_accession_user_string(tsip->accession);
2614             flat2asn_install_locus_user_string(tsip->name);
2615             break;
2616         }
2617     }
2618     if (tsip == NULL) {
2619         flat2asn_install_accession_user_string("SET_UP");
2620         flat2asn_install_locus_user_string("SET_UP");
2621     }
2622     for (ap = sap; ap; ap = ap->next) {
2623         if (sap->type != 1) {
2624             continue;
2625         }
2626         for (f = ap->data; f != NULL; f=f->next) {
2627             if (f->data.choice == SEQFEAT_IMP) {
2628                 imp = (ImpFeatPtr) f->data.value.ptrvalue;
2629                 if (imp != NULL && StringCmp(imp->key, "source") == 0) {
2630                     for(q = f->qual; q != NULL; q = q->next) {
2631                         if (StringCmp(q->qual, "organism") == 0) {
2632                             break;
2633                         }
2634                     }
2635                     if (q == NULL) {
2636         ErrPostStr(SEV_WARNING, ERR_SOURCE_MissingOrganism, "Missing /organism in 'source' feature");
2637                     }
2638                     count++;
2639                     whole = check_whole(f, len);
2640                 }
2641             }
2642         }
2643     }
2644     if (count == 0) {
2645             ErrPostStr(SEV_WARNING, ERR_SOURCE_NotFound, "NO SOURCE feature");
2646             *pultiple = TRUE;
2647     } else if (count == 1) {
2648         if (!whole) {
2649             ErrPostStr(SEV_WARNING, ERR_SOURCE_NotFoundWHole, "one NOT_WHOLE SOURCE feature");
2650             *pultiple = TRUE;
2651         }
2652     } else if (count > 1) {
2653 /* check for /transposon and /insertion_seq and /clone*/
2654         if (true_multiple(sap, len)) {
2655             ErrPostStr(SEV_WARNING, ERR_SOURCE_Multiple, "MULTIPLE SOURCE features");
2656             *pultiple = TRUE;
2657         }
2658         for (ap = sap; ap; ap = ap->next) {
2659             if (sap->type != 1) {
2660                 continue;
2661             }
2662             for (f = ap->data; f != NULL; f = f->next) {
2663                 if (f->qual == NULL)
2664                     continue;
2665                 f_org = NULL;
2666                 for(q = f->qual; q != NULL; q = q->next) {
2667                     if (StringCmp(q->qual, "organism") == 0) {
2668                         f_org = q->val;
2669                         break;
2670                     }
2671                 }
2672                 for (ff = f->next; ff != NULL; ff = ff->next) {
2673                     if (ff->qual == NULL)
2674                         continue;
2675                     ff_org = NULL;
2676                     for(q = ff->qual; q != NULL; q = q->next) {
2677                         if (StringCmp(q->qual, "organism") == 0) {
2678                             ff_org = q->val;
2679                             break;
2680                         }
2681                     }
2682                     if (f_org && ff_org) {
2683                         if (StringCmp(f_org, ff_org) != 0) {
2684             ErrPostEx(SEV_WARNING, ERR_SOURCE_Diff, "Different SOURCE features: %s|%s",
2685                             f_org, ff_org);
2686                         }
2687                         count_join(f, ff);
2688                     }
2689                 }
2690             }
2691         }
2692     }
2693 
2694 }
2695 
2696 /*****************************************************************************
2697 *
2698 *  Check multiple source features and try to correct them
2699 *
2700 *****************************************************************************/
2701 // NOTE that this is never called by the cleanup library
CorrectSourceFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2702 void CorrectSourceFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2703 {
2704     Boolean        whole = FALSE, new = FALSE;
2705     Int2        count=0;
2706     Int4        len;
2707     ValNodePtr    vnp0, vnp;
2708     SeqFeatPtr    sfp = NULL, tmp_sfp = NULL, f, ff, fnext;
2709     SeqAnnotPtr    sap;
2710     BioseqPtr    bsp = NULL;
2711     BioseqSetPtr    bssp;
2712     ImpFeatPtr    imp;
2713     OrgRefPtr    orp;
2714     CharPtr        name, org_name, f_org = NULL, ff_org = NULL;
2715     GBQualPtr    q;
2716     SeqLocPtr     slp;
2717     static Char        msg[51];
2718     CharPtr PNTR pporg;
2719     Boolean     is_na = FALSE;
2720 
2721     pporg = (CharPtr PNTR) data;
2722     if (IS_Bioseq(sep))
2723     {
2724         bsp = (BioseqPtr)(sep->data.ptrvalue);
2725         vnp = bsp->descr;
2726         sap = bsp->annot;
2727         len = bsp->length;
2728         if (bsp->mol != Seq_mol_aa) {
2729             is_na = TRUE;
2730         }
2731     }
2732     else
2733     {
2734         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2735         vnp = bssp->descr;
2736         sap = bssp->annot;
2737     }
2738 
2739     for (vnp0 = vnp; vnp != NULL; vnp = vnp->next) {
2740         if (vnp->choice == Seq_descr_org) {
2741             if ((orp = vnp->data.ptrvalue) != NULL) {
2742                 if (*pporg == NULL) {
2743                     *pporg = orp->taxname;
2744                 }
2745             }
2746         }
2747     }
2748     org_name = *pporg;
2749     if (bsp == NULL || !is_na) {
2750         return;
2751     }
2752     if (sap != NULL && sap->type == 1) {
2753         tmp_sfp = (SeqFeatPtr) (sap->data);
2754         sfp = ExtractSourceFeatList(&(tmp_sfp));
2755     }
2756     for (f = sfp; f != NULL; f=f->next) {
2757         count++;
2758         if (f->data.choice != SEQFEAT_IMP) {
2759             continue;
2760         }
2761         imp = (ImpFeatPtr) f->data.value.ptrvalue;
2762         if (imp != NULL) {
2763             whole = check_whole(sfp, len);
2764             if (whole) {
2765                 for(q = f->qual; q != NULL; q = q->next) {
2766                     if (StringCmp(q->qual, "organism") == 0) {
2767                         name = MemNew(StringLen(q->val)+1);
2768                         StringCpy(name, q->val);
2769                     }
2770                 }
2771             }
2772         }
2773     }
2774     if (!whole) {
2775         if (count == 0) {
2776             ErrPostStr(SEV_WARNING, ERR_SOURCE_NotFound, "NO ORGANISM feature! Create one");
2777             sfp = SeqFeatNew();
2778             imp = ImpFeatNew();
2779             sfp->data.choice = SEQFEAT_IMP;
2780             sfp->data.value.ptrvalue = imp;
2781             imp->key = StringSave("source");
2782             q = GBQualNew();
2783             q->qual = StringSave("organism");
2784             if (org_name) {
2785                 q->val = StringSave(org_name);
2786             } else {
2787                 q->val = StringSave("unknown");
2788             }
2789             sfp->qual = q;
2790             slp = ValNodeNew(NULL);
2791             slp->choice = SEQLOC_WHOLE;
2792             slp->data.ptrvalue = (SeqLocPtr) SeqIdDup(bsp->id);
2793             sfp->location = slp;
2794 
2795         } else if (count == 1) {
2796             StringNCpy(msg, SeqLocPrint(sfp->location), 50);
2797             ErrPostEx(SEV_WARNING, ERR_SOURCE_NotFoundWHole,
2798             "Convert source feature %s to whole", msg);
2799             slp = ValNodeNew(NULL);
2800             slp->choice = SEQLOC_WHOLE;
2801             slp->data.ptrvalue = (SeqLocPtr) SeqIdDup(bsp->id);
2802             sfp->location = slp;
2803 
2804         }
2805 
2806     }
2807   if (count > 1) {
2808     do {
2809         for (f = sfp; f != NULL; f = f->next) {
2810             if (f->qual == NULL)
2811                 continue;
2812             for(q = f->qual; q != NULL; q = q->next) {
2813                 if (StringCmp(q->qual, "organism") == 0) {
2814                     f_org = q->val;
2815                     break;
2816                 }
2817             }
2818             for (ff = f->next; ff != NULL; ff = ff->next) {
2819                 if (ff->qual == NULL)
2820                     continue;
2821                 for(q = ff->qual; q != NULL; q = q->next) {
2822                     if (StringCmp(q->qual, "organism") == 0) {
2823                         ff_org = q->val;
2824                         break;
2825                     }
2826                 }
2827                 if (StringCmp(f_org, ff_org) != 0)
2828                     continue;
2829                 new = feat_join(f, ff, sfp);
2830             }
2831         }
2832     } while (new);
2833   }
2834     for (f = sfp; f != NULL; f = fnext) {
2835         fnext = f->next;
2836         if (f->qual == NULL) {
2837             sfp = remove_feat(sfp, f);
2838         }
2839     }
2840     tmp_sfp = tie_feat(tmp_sfp, sfp);
2841     if (sap) {
2842         sap->data = tmp_sfp;
2843         if (bsp)
2844         {
2845             bsp = (BioseqPtr)(sep->data.ptrvalue);
2846             bsp->descr = vnp0;
2847             if (tmp_sfp == NULL) {
2848                 bsp->annot = NULL;
2849             }
2850         }
2851         else
2852         {
2853             bssp->descr = vnp0;
2854             if (tmp_sfp == NULL) {
2855                 bssp->annot = NULL;
2856             }
2857         }
2858     }
2859 }
2860 //LCOV_EXCL_STOP
2861 
2862 
BioSourceToGeneticCode(BioSourcePtr biop)2863 Int2 BioSourceToGeneticCode (BioSourcePtr biop)
2864 {
2865   OrgNamePtr  onp;
2866   OrgRefPtr   orp;
2867   Uint1       pgcode;
2868 
2869   if (biop != NULL) {
2870     orp = biop->org;
2871     if (orp != NULL) {
2872       onp = orp->orgname;
2873       if (onp != NULL) {
2874         if (biop->genome == GENOME_kinetoplast ||
2875             biop->genome == GENOME_mitochondrion ||
2876             biop->genome == GENOME_hydrogenosome) {
2877           return onp->mgcode;
2878         } else if (biop->genome == GENOME_chloroplast ||
2879                    biop->genome == GENOME_chromoplast ||
2880                    biop->genome == GENOME_plastid ||
2881                    biop->genome == GENOME_cyanelle ||
2882                    biop->genome == GENOME_apicoplast ||
2883                    biop->genome == GENOME_leucoplast ||
2884                    biop->genome == GENOME_proplastid) {
2885           if (onp->pgcode > 0) {
2886             return onp->pgcode;
2887           } else {
2888             pgcode = GetSpecialPlastidGenCode (orp->taxname, onp->lineage);
2889             if (pgcode > 0) {
2890               return pgcode;
2891             }
2892             return 11;
2893           }
2894         } else {
2895           return onp->gcode;
2896         }
2897       }
2898     }
2899   }
2900   return 0;
2901 }
2902 
GetTopBiop(SeqDescrPtr sdp,Pointer userdata)2903 static void GetTopBiop (SeqDescrPtr sdp, Pointer userdata)
2904 
2905 {
2906   BioSourcePtr PNTR  biopp;
2907 
2908   if (sdp == NULL || sdp->choice != Seq_descr_source) return;
2909   biopp = (BioSourcePtr PNTR) userdata;
2910   if (biopp == NULL) return;
2911   if (*biopp != NULL) return;
2912   *biopp = (BioSourcePtr) sdp->data.ptrvalue;
2913 }
2914 
GetTopBioSourceFromSep(SeqEntryPtr sep)2915 static BioSourcePtr GetTopBioSourceFromSep (SeqEntryPtr sep)
2916 
2917 {
2918   BioSourcePtr  biop = NULL;
2919 
2920   VisitDescriptorsInSep (sep, (Pointer) &biop, GetTopBiop);
2921   return biop;
2922 }
2923 
FixPIDDbtag(ValNodePtr PNTR vnpp)2924 static void FixPIDDbtag(ValNodePtr PNTR vnpp)
2925 {
2926     ValNodePtr         vnp;
2927     DbtagPtr         db;
2928     Char             val[166];
2929 
2930         for (vnp = *vnpp; vnp; vnp = vnp->next) {
2931             if (vnp->choice != SEQID_GENERAL) {
2932                 continue;
2933             }
2934             db = vnp->data.ptrvalue;
2935             if (db == NULL) {
2936                 continue;
2937             }
2938             if (StringNCmp(db->db, "PIDe", 4) == 0) {
2939                     MemFree(db->db);
2940                     db->db = StringSave("PID");
2941                     sprintf(val, "e%ld", (long) db->tag->id);
2942                     db->tag->str = StringSave(val);
2943                     db->tag->id = 0;
2944                     vnp->data.ptrvalue = db;
2945             } else if(StringNCmp(db->db, "PIDd", 4) == 0) {
2946                     MemFree(db->db);
2947                     db->db = StringSave("PID");
2948                     sprintf(val, "d%ld", (long) db->tag->id);
2949                     db->tag->str = StringSave(val);
2950                     db->tag->id = 0;
2951                     vnp->data.ptrvalue = db;
2952             }
2953         }
2954 }
2955 
GetProduct(ValNodePtr product,ValNodePtr location)2956 static CharPtr GetProduct(ValNodePtr product, ValNodePtr location)
2957 {
2958     CharPtr protein_seq=NULL, start_ptr=NULL;
2959     Int4 length;
2960     SeqPortPtr spp;
2961     Uint1 residue, code;
2962     BioseqPtr bsp;
2963     SeqIdPtr sip;
2964 
2965     if (product == NULL)
2966         return NULL;
2967     sip = SeqLocId(product);
2968     bsp = BioseqFindCore(sip);
2969     if (bsp == NULL)   /* Bioseq is (or has been) in memory */
2970         return NULL;
2971     code = Seq_code_ncbieaa;
2972     length = SeqLocLen(product);
2973     if (length <= 0) {
2974         return NULL;
2975     }
2976     start_ptr = protein_seq =
2977         (CharPtr) MemNew((size_t) (length*sizeof(CharPtr)));
2978     spp = SeqPortNewByLoc(product, code);
2979     spp->do_virtual = TRUE;
2980     while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
2981         if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
2982             continue;
2983         if (residue == INVALID_RESIDUE)
2984             residue = (Uint1) 'X';
2985         *protein_seq = residue;
2986         protein_seq++;
2987     }
2988         SeqPortFree(spp);
2989     return start_ptr;
2990 }
2991 
stripStr(CharPtr base,CharPtr str)2992 static CharPtr stripStr(CharPtr base, CharPtr str)
2993 {
2994     CharPtr bptr, eptr;
2995 
2996     bptr = StringStr(base, str);
2997     if (bptr != NULL) {
2998         eptr = bptr + StringLen(str);
2999         StringCpy(bptr, eptr);
3000     }
3001 
3002     return base;
3003 }
3004 
space_save(CharPtr str)3005 static CharPtr  space_save(CharPtr str)
3006 /* deletes spaces from the begining and the end and returns Nlm_StringSave */                   {
3007     CharPtr s, ss;
3008 
3009     if (str == NULL) {
3010         return NULL;
3011     }
3012     for (; isspace(*str) || *str == ','; str++) continue;
3013     for (s = str; *s != '\0'; s++) {
3014         if (*s == '\n') {
3015             for (ss = s+1; isspace(*ss); ss++) continue;
3016             *s = ' ';
3017             strcpy(s+1, ss);
3018         }
3019     }
3020     for (s=str+StringLen(str)-1; s >= str && (*s == ' ' || *s == ';' ||
3021          *s == ',' || *s == '\"' || *s == '\t'); s--) {
3022         *s = '\0';
3023     }
3024 
3025     if (*str == '\0') {
3026         return NULL;
3027     } else {
3028         return Nlm_StringSave(str);
3029     }
3030 }
3031 
StripCDSComment(SeqFeatPtr sfp)3032 static SeqFeatPtr StripCDSComment(SeqFeatPtr sfp)
3033 {
3034     CharPtr    strA = "Author-given protein sequence is in conflict with the conceptual translation.";
3035     CharPtr strC = "Method: conceptual translation supplied by author.";
3036     CharPtr    pchComment, comment, eptr;
3037 
3038     pchComment = sfp->comment;
3039     if (pchComment == NULL)
3040         return sfp;
3041     pchComment = stripStr(pchComment, strA);
3042     pchComment = stripStr(pchComment, strC);
3043 
3044     comment = space_save(pchComment);
3045     if (comment) {
3046         eptr = comment+StringLen(comment) -1;
3047         if (*eptr == ';') {
3048             *eptr = '\0';
3049         }
3050     }
3051     MemFree(sfp->comment);
3052     sfp->comment = comment;
3053     return sfp;
3054 }
3055 
CompareTranslation(ByteStorePtr bsp,CharPtr qval)3056 static Boolean CompareTranslation(ByteStorePtr bsp, CharPtr qval)
3057 {
3058     CharPtr                 ptr;
3059     Int2             residue /* , residue1, residue2 */ ;
3060     Int4             len, blen;
3061     /*
3062     Boolean         done;
3063     */
3064 
3065     if(qval == NULL || *qval == '\0')
3066         return(FALSE);
3067     len = StringLen(qval);
3068     BSSeek(bsp, 0, SEEK_SET);
3069 
3070     blen = BSLen(bsp);
3071 #if 0
3072     done = FALSE;
3073     while ((! done) && (len)) {
3074           residue1 = qval[(len-1)];
3075           if (residue1 == 'X')    /* remove terminal X */
3076                 len--;
3077           else
3078                 done = TRUE;
3079      }
3080      done = FALSE;
3081      while ((! done) && (blen)) {
3082           BSSeek(bsp, (blen-1), SEEK_SET);
3083           residue2 = BSGetByte(bsp);
3084           if (residue2 == 'X')
3085                 blen--;
3086           else
3087                 done = TRUE;
3088      }
3089 #endif
3090         BSSeek(bsp, 0, SEEK_SET);
3091         if (blen != len) {
3092             return FALSE;
3093         } else {
3094             for (ptr = qval; *ptr != '\0' &&
3095                                 (residue = BSGetByte(bsp)) != EOF; ptr++) {
3096 
3097                  if (residue != *ptr) {
3098                     return FALSE;
3099                  }
3100 
3101              } /* for */
3102 
3103          } /* compare two sequences */
3104             return TRUE;
3105 }
3106 
CheckGCode(SeqFeatPtr sfp,Pointer userdata)3107 static void CheckGCode (SeqFeatPtr sfp, Pointer userdata)
3108 
3109 {
3110     Int2Ptr  codep;
3111     Uint1    code;
3112     SeqFeatPtr      f;
3113     CdRegionPtr     cds;
3114     BioseqPtr         bsp = NULL;
3115     SeqAnnotPtr     ap;
3116     ValNodePtr         vnp, vnpnext;
3117     DbtagPtr         db;
3118     GeneticCodePtr     grp;
3119     Uint1             gcpvalue;
3120     CharPtr            protein_seq = NULL;
3121     ByteStorePtr    byte_sp;
3122     MolInfoPtr        mfp;
3123 
3124   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return;
3125   codep = (Int2Ptr) userdata;
3126   if (codep == NULL) return;
3127   code = (Uint1) *codep;
3128 
3129   cds = (CdRegionPtr) sfp->data.value.ptrvalue;
3130   if (cds == NULL) return;
3131 
3132             grp = cds->genetic_code;
3133             if (sfp->product != NULL) {
3134 /*  remove all PID dbxref  */
3135                 for (vnp=sfp->dbxref; vnp; vnp=vnpnext) {
3136                     vnpnext = vnp->next;
3137                     db = vnp->data.ptrvalue;
3138                     if (db->db) {
3139                         if (StringNCmp(db->db, "PID", 3) == 0) {
3140                             sfp->dbxref = remove_node(sfp->dbxref, vnp);
3141                         }
3142                     }
3143                 }
3144 /* change SeqId GENERAL dbtag in ProtRef */
3145                 vnp = SeqLocId(sfp->product);
3146                 FixPIDDbtag(&vnp);
3147 /*  change PID in protein SeqID GENERAL dbtag  */
3148                 bsp = BioseqFind(SeqLocId(sfp->product));
3149                 if (bsp != NULL) {
3150                     FixPIDDbtag(&(bsp->id));
3151 /* change SeqId GENERAL dbtag in ProtRef */
3152                     for (ap = bsp->annot; ap; ap = ap ->next) {
3153                         if (ap->type != 1) {
3154                             continue;
3155                         }
3156                         for (f = ap->data; f; f = f->next) {
3157                             if (f->data.choice != SEQFEAT_PROT) {
3158                                 continue;
3159                             }
3160                             vnp = SeqLocId(f->location);
3161                             FixPIDDbtag(&vnp);
3162                         }
3163                     }
3164                 }
3165             }
3166             cds = sfp->data.value.ptrvalue;
3167 /* check the translation - skip if conflict flag not set */
3168             if (sfp->product && cds->conflict) {
3169                 protein_seq = GetProduct(sfp->product, sfp->location);
3170                 byte_sp = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
3171                 if (cds->conflict == TRUE) {
3172                     if (CompareTranslation (byte_sp, protein_seq)) {
3173                         cds->conflict = FALSE;
3174                     } else if (bsp != NULL) {
3175                         for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
3176                             if (vnp->choice == Seq_descr_molinfo) {
3177                                 break;
3178                             }
3179                         }
3180                         if (vnp != NULL) {
3181                             mfp = vnp->data.ptrvalue;
3182                             mfp->tech = 13; /* _concept_transl_a */
3183                         }
3184                     }
3185                 }
3186                 if (protein_seq)
3187                     MemFree(protein_seq);
3188                 if (byte_sp)
3189                     BSFree(byte_sp);
3190             }
3191 /* remove asn2ff_generated comments */
3192             sfp = StripCDSComment(sfp);
3193 
3194 /* check genetic code */
3195             if (GBQualPresent("pseudo", sfp->qual) == TRUE) {
3196                 return;
3197             }
3198             if (sfp->pseudo) return;
3199             if (cds) {
3200                 grp = cds->genetic_code;
3201                 if (grp == NULL) {
3202                     gcpvalue = 1;
3203                 } else {
3204                     vnp = grp->data.ptrvalue;
3205                     gcpvalue = vnp->data.intvalue;
3206                 }
3207                 if (gcpvalue != code) {
3208                     CharPtr    str=SeqLocPrint(sfp->location);
3209                     ErrPostEx(SEV_ERROR, ERR_TAXONOMY_GeneticCode,
3210     "Genetic code from Taxonomy server is different from the one in CDS %s: %d|%d", str, (int) code, (int) gcpvalue);
3211                     MemFree(str);
3212                 }
3213             }
3214 }
3215 
3216 /***************************************************************************
3217 *    This function is twofold
3218 *    1 - checks genetic code with Taxonomy dbase
3219 *    checks db_xref and removes them all if product is present
3220 *    changes PIDe to PID in dbtag
3221 ****************************************************************************/
CheckGeneticCode(SeqEntryPtr sep)3222 static void CheckGeneticCode (SeqEntryPtr sep)
3223 
3224 {
3225   BioSourcePtr  biop;
3226   Int2          code;
3227 
3228   if (sep == NULL) return;
3229   biop = GetTopBioSourceFromSep (sep);
3230   if (biop == NULL) return;
3231   code = BioSourceToGeneticCode (biop);
3232 
3233   if (code <= 0) {
3234     ErrPostStr(SEV_WARNING, ERR_SOURCE_GeneticCode, "Genetic code in BioSource not found");
3235     return;
3236   }
3237 
3238   VisitFeaturesInSep (sep, (Pointer) &code, CheckGCode);
3239 }
3240 
3241 //LCOV_EXCL_START
ParseRange(CharPtr pos,Int4 PNTR from,Int4 PNTR to)3242 static Boolean ParseRange(CharPtr pos, Int4 PNTR from, Int4 PNTR to)
3243 {
3244     CharPtr  ptr, ptr1, ptr2;
3245 
3246     *from = *to = -1;
3247 
3248     if (!IS_DIGIT(*pos))
3249         return FALSE;
3250                                         /* 1st digit */
3251     for (ptr = pos; IS_DIGIT(*ptr) && *ptr != '\0'; ptr++) continue;
3252 
3253     if (*ptr != '\0') {
3254         *from = (Int4) atoi(pos);
3255 
3256         ptr1 = ptr;
3257         if (*ptr1 == '.')
3258             ++ptr1;
3259         else
3260             return FALSE;
3261 
3262         if (*ptr1 == '.') {
3263             ++ptr1;
3264         } else {
3265             return FALSE;
3266         }                             /* 2nd digit */
3267         for (ptr2 = ptr1; IS_DIGIT(*ptr2) && *ptr2 != '\0'; ptr2++) continue;
3268 
3269         if (*ptr2 != '\0') {
3270             return FALSE;
3271         } else {
3272             *to = (Int4) atoi(ptr1);
3273             return (TRUE);
3274         }
3275     } else {
3276         return FALSE;
3277     }
3278 
3279 }
3280 
SeqLocFromPos(SeqIdPtr sid,CharPtr pos)3281 static SeqLocPtr SeqLocFromPos(SeqIdPtr sid, CharPtr pos)
3282 {
3283     SeqLocPtr  slp;
3284     SeqIntPtr  sip;
3285     Int4         from, to;
3286 
3287     if (ParseRange(pos, &from, &to)) {
3288 
3289         sip = SeqIntNew();
3290         sip->from = from - 1;
3291         sip->to = to - 1;
3292 
3293         slp = ValNodeNew(NULL);
3294         slp->choice = SEQLOC_INT;
3295         slp->data.ptrvalue = SeqIdDup(sid);
3296     } else {
3297         slp = ValNodeNew(NULL);
3298         slp->choice = SEQLOC_WHOLE;
3299         slp->data.ptrvalue = (SeqIdPtr) SeqIdDup(sid);
3300     }
3301     return (slp);
3302 
3303 }
3304 
GetQualValuePos(CharPtr qval)3305 static CharPtr GetQualValuePos(CharPtr qval)
3306 {
3307    CharPtr bptr, eptr;
3308 
3309    if ((bptr = StringStr(qval, "(pos:")) == NULL) {
3310            return NULL;
3311    }
3312 
3313    bptr += 5;
3314    while (*bptr == ' ')
3315        ++bptr;
3316    if (StringNCmp(bptr, "(complement)", 12) == 0) {
3317            bptr += 12;
3318    }
3319    while (*bptr == ' ')
3320        ++bptr;
3321    for (eptr = bptr; *eptr != ',' && *eptr != '\0'; eptr++) continue;
3322 
3323    return (TextSave(bptr, eptr-bptr));
3324 
3325 }
3326 
GetQualValueAa(CharPtr qval)3327 static Uint1 GetQualValueAa(CharPtr qval)
3328 {
3329    CharPtr  str, eptr = NULL, ptr;
3330    Uint1    aa = 0;
3331 
3332     str = StringStr(qval, "aa:");
3333     if (str != NULL) {
3334            str += 3;
3335            while (*str == ' ')
3336                ++str;
3337            for (eptr = str; *eptr != ')' && *eptr != ' ' && *eptr != '\0'; eptr++) continue;
3338     }
3339 
3340     if (eptr != NULL && str != NULL) {
3341       ptr = TextSave(str, eptr-str);
3342       aa = ValidAminoAcid(ptr);
3343       MemFree(ptr);
3344     }
3345 
3346     return (aa);
3347 
3348 }
3349 
3350 // Note: conversion of impfeat cds is handled by CleanUpSeqFeat,
3351 // which is called before this function is
ImpFeatToCdregion(SeqFeatPtr sfp)3352 Boolean ImpFeatToCdregion(SeqFeatPtr sfp)
3353 {
3354     ImpFeatPtr imp;
3355     GBQualPtr    q, qnext;
3356     Int2        frame = -1;
3357     CdRegionPtr crp;
3358     CharPtr        pos;
3359     GeneticCodePtr  gcp;
3360     Uint1        gc;
3361     ValNodePtr    vnp;
3362     Choice            cp;
3363     CodeBreakPtr    hcbp = NULL, cbp;
3364     SeqIntPtr        sip;
3365     SeqLocPtr         loc;
3366     BioseqPtr       bsp;
3367     SeqIdPtr        sidp = NULL;
3368 
3369     if (sfp == NULL)
3370         return FALSE;
3371     if (sfp->data.choice != SEQFEAT_IMP)
3372         return FALSE;
3373     imp = sfp->data.value.ptrvalue;
3374     if (StringCmp(imp->key, "CDS") != 0)
3375         return FALSE;
3376 
3377     /* do not convert ImpCDS if EMBL or DDBJ */
3378     bsp = BioseqFindFromSeqLoc (sfp->location);
3379     if (bsp != NULL) {
3380         for (sidp = bsp->id;
3381             sidp != NULL && sidp->choice != SEQID_EMBL && sidp->choice != SEQID_DDBJ;
3382             sidp = sidp->next) continue;
3383     }
3384     if (sidp != NULL) return FALSE;
3385 
3386     sfp->data.choice = SEQFEAT_CDREGION;
3387     ImpFeatFree(imp);
3388     crp = CdRegionNew();
3389     sfp->data.value.ptrvalue = crp;
3390     for (q = sfp->qual; q; q = qnext) {
3391         qnext = q->next;
3392         if (StringCmp(q->qual, "transl_table") == 0) {
3393             gc = (Uint1) atoi(q->val);
3394             vnp = ValNodeNew(NULL);
3395             vnp->choice = 2;
3396             vnp->data.intvalue = gc;
3397             gcp = GeneticCodeNew();
3398             gcp->data.ptrvalue = vnp;
3399             crp->genetic_code = gcp;
3400             sfp->qual = remove_qual(sfp->qual, q);
3401         } else if (StringCmp(q->qual, "translation") == 0) {
3402             sfp->qual = remove_qual(sfp->qual, q);
3403         } else if (StringCmp(q->qual, "transl_except") == 0) {
3404             cp.choice = 1;    /* ncbieaa */
3405             cp.value.intvalue = (Int4) GetQualValueAa(q->val);
3406             pos = GetQualValuePos(q->val);
3407              loc = SeqLocFromPos(SeqLocId(sfp->location), pos);
3408              if (loc->choice !=SEQLOC_INT) {
3409                 ErrPostEx(SEV_WARNING, ERR_FEATURE_BadLocation,
3410                 "Location error for code break [%s]", pos);
3411                  MemFree(pos);
3412                 continue;
3413              }
3414             cbp = CodeBreakNew();
3415             cbp->aa = cp;
3416              cbp->loc = loc;
3417             sip = cbp->loc->data.ptrvalue;
3418             sip->strand = SeqLocStrand(sfp->location);
3419              if (SeqLocCompare(sfp->location, cbp->loc) != SLC_B_IN_A) {
3420               CodeBreakFree(cbp);
3421               cbp = NULL;
3422              }
3423              MemFree(pos);
3424              hcbp = tie_next_cbp(hcbp, cbp);
3425             sfp->qual = remove_qual(sfp->qual, q);
3426         } else if (StringCmp(q->qual, "codon_start") == 0) {
3427             frame = (Uint1) atoi(q->val);
3428             sfp->qual = remove_qual(sfp->qual, q);
3429             crp->frame = (Uint1)frame;
3430         } else if (StringCmp(q->qual, "exception") == 0) {
3431             sfp->excpt = TRUE;
3432         }
3433 
3434     }
3435     if (frame == -1) {
3436         frame = GetFrameFromLoc(sfp->location);
3437         crp->frame = (Uint1)frame;
3438     }
3439 
3440     return TRUE;
3441 }
3442 //LCOV_EXCL_STOP
3443 
NoteToComment(SeqFeatPtr sfp)3444 static void NoteToComment (SeqFeatPtr sfp)
3445 {
3446     GBQualPtr       q, qnext;
3447     size_t          len;
3448     CharPtr         str;
3449 
3450     for (q=sfp->qual; q; q=qnext)
3451     {
3452         qnext = q->next;
3453         if (StringICmp (q->qual, "note") == 0) {
3454             if (sfp->comment == NULL) {
3455                 sfp->comment = q->val;
3456             } else {
3457                 len = StringLen (sfp->comment) + StringLen (q->val) + 5;
3458                 str = MemNew (sizeof (Char) * len);
3459                 StringCpy (str, sfp->comment);
3460                 StringCat (str, "; ");
3461                 StringCat (str, q->val);
3462                 sfp->comment = MemFree (sfp->comment);
3463                 q->val = MemFree (q->val);
3464                  sfp->comment = str;
3465              }
3466             q->val = NULL;
3467             sfp->qual = remove_qual(sfp->qual, q);
3468         }
3469     }
3470     return;
3471 }
ChangeImpFeat(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3472 static void ChangeImpFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3473 {
3474     BioseqPtr         bsp;
3475     BioseqSetPtr    bssp;
3476     SeqAnnotPtr        sap, annot;
3477     SeqFeatPtr        sfp;
3478 
3479     if (IS_Bioseq(sep)) {
3480         bsp = (BioseqPtr)(sep->data.ptrvalue);
3481         annot = bsp->annot;
3482     } else {
3483         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
3484         annot = bssp->annot;
3485     }
3486     for (sap = annot; sap != NULL; sap = sap->next) {
3487         if (sap->type != 1) {
3488             continue;
3489         }
3490         for (sfp = sap->data; sfp != NULL; sfp = sfp->next) {
3491             if (sfp->qual) {
3492                 NoteToComment(sfp);
3493             }
3494             if (sfp->data.choice != SEQFEAT_IMP) {
3495                 continue;
3496             }
3497             ChangeReplaceToQual(sfp);
3498             ImpFeatToCdregion(sfp);
3499         }
3500     }
3501 }
3502 
ChangeReplaceToQual(SeqFeatPtr sfp)3503 void ChangeReplaceToQual(SeqFeatPtr sfp)
3504 {
3505     ImpFeatPtr ifp;
3506     CharPtr p;
3507 
3508     ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
3509     if(ifp == NULL || ifp->loc == NULL)
3510         return;
3511     if ((p = StringStr(ifp->loc, "replace")) != NULL) {
3512 //LCOV_EXCL_START
3513         // This is never called, because BasicCleanup would have removed it already
3514         AddReplaceQual(sfp, p);
3515         MemFree(ifp->loc);
3516         ifp->loc = NULL;
3517 //LCOV_EXCL_STOP
3518     }
3519     return;
3520 }
3521 
3522 //LCOV_EXCL_START
3523 /**********************************************************/
AddReplaceQual(SeqFeatPtr sfp,CharPtr p)3524 void AddReplaceQual(SeqFeatPtr sfp, CharPtr p)
3525 {
3526     CharPtr s, val;
3527 
3528     val = StringChr(p, '\"');
3529     if(val == NULL)
3530            return;
3531     val++;
3532     s = p + StringLen(p) - 1;
3533     if(*s != ')')
3534            return;
3535     for(s--; s > val && *s != '\"'; s--) continue;
3536         if(*s != '\"')
3537                return;
3538     *s = '\0';
3539     sfp->qual = (GBQualPtr) AddGBQual(sfp->qual, "replace", val);
3540     *s = '\"';
3541     return;
3542 }
3543 //LCOV_EXCL_STOP
3544 
3545 /***************************************************************************
3546 *    check and remove HTG keywords automaticly generated by asn2ff
3547 *    HTG info is redundand in GBBlock
3548 ***************************************************************************/
3549 
CheckKeywords(GBBlockPtr gbp,Uint1 tech)3550 static void CheckKeywords(GBBlockPtr gbp, Uint1 tech)
3551 {
3552     ValNodePtr vnp, vnpnext;
3553     CharPtr word;
3554 
3555     if (gbp == NULL || gbp->keywords == NULL)
3556         return;
3557     for (vnp = gbp->keywords; vnp; vnp=vnpnext) {
3558         vnpnext = vnp->next;
3559         word = (CharPtr) vnp->data.ptrvalue;
3560         if (StringCmp(word, "HTG") == 0) {
3561             MemFree(word);
3562             gbp->keywords = remove_node(gbp->keywords, vnp);
3563         }
3564         else if (tech == MI_TECH_htgs_0 && StringCmp(word, "HTGS_PHASE0") == 0) {
3565             MemFree(word);
3566             gbp->keywords = remove_node(gbp->keywords, vnp);
3567         }
3568         else if (tech == MI_TECH_htgs_1 && StringCmp(word, "HTGS_PHASE1") == 0) {
3569             MemFree(word);
3570             gbp->keywords = remove_node(gbp->keywords, vnp);
3571         }
3572         else if (tech == MI_TECH_htgs_2 && StringCmp(word, "HTGS_PHASE2") == 0) {
3573             MemFree(word);
3574             gbp->keywords = remove_node(gbp->keywords, vnp);
3575         }
3576         else if (tech == MI_TECH_htgs_3 && StringCmp(word, "HTGS_PHASE3") == 0) {
3577             MemFree(word);
3578             gbp->keywords = remove_node(gbp->keywords, vnp);
3579         }
3580         else if (tech == MI_TECH_est && StringCmp(word, "EST") == 0) {
3581             MemFree(word);
3582             gbp->keywords = remove_node(gbp->keywords, vnp);
3583         }
3584         else if (tech == MI_TECH_sts && StringCmp(word, "STS") == 0) {
3585             MemFree(word);
3586             gbp->keywords = remove_node(gbp->keywords, vnp);
3587         }
3588     }
3589     return;
3590 }
3591 
ChangeGBDiv(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3592 static void ChangeGBDiv (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3593 {
3594     BioseqPtr     bsp = NULL;
3595     BioseqSetPtr  bssp;
3596     ValNodePtr    descr = NULL, vnp;
3597     CharPtr       div;
3598     GBBlockPtr    gbp;
3599     Int2          i;
3600     Boolean       is_patent = FALSE;
3601     MolInfoPtr    mfp = NULL;
3602     SeqIdPtr      sip;
3603 
3604     div = (CharPtr) data;
3605     if (IS_Bioseq(sep)) {
3606         bsp = (BioseqPtr)(sep->data.ptrvalue);
3607         descr = bsp->descr;
3608         for (sip = bsp->id; sip != NULL; sip = sip->next) {
3609           if (sip->choice == SEQID_PATENT) {
3610             is_patent = TRUE;
3611           }
3612         }
3613     } else {
3614         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
3615         descr = bssp->descr;
3616     }
3617     for (vnp = descr; vnp != NULL; vnp = vnp->next) {
3618         if (vnp->choice == Seq_descr_molinfo) {
3619             mfp = (MolInfoPtr) vnp->data.ptrvalue;
3620             break;
3621         }
3622     }
3623     for (vnp = descr; vnp != NULL; vnp = vnp->next) {
3624         if (vnp->choice == Seq_descr_genbank) {
3625             gbp = (GBBlockPtr) vnp->data.ptrvalue;
3626             if (gbp == NULL) continue;
3627             if (mfp) {
3628                 if (mfp->tech == MI_TECH_htgs_0 ||
3629                         mfp->tech == MI_TECH_htgs_1 ||
3630                         mfp->tech == MI_TECH_htgs_2 ||
3631                         mfp->tech == MI_TECH_htgs_3 ||
3632                         mfp->tech == MI_TECH_est ||
3633                         mfp->tech == MI_TECH_sts) {
3634                     CheckKeywords(gbp, mfp->tech);
3635                 }
3636             }
3637             if (gbp->div == NULL) continue;
3638             for (i=0; i < TOTAL_TECH; i++) {
3639                 if (StringCmp(gbp->div, check_tech[i].name) == 0) {
3640                     break;
3641                 }
3642             }
3643             if (i != TOTAL_TECH) {
3644                 if (mfp) {
3645                     if (StringCmp(gbp->div, "HTG") == 0
3646                         || StringCmp(gbp->div, "PRI") == 0) {
3647                         if (mfp->tech == MI_TECH_htgs_1
3648                             || mfp->tech == MI_TECH_htgs_2 ||
3649                                 mfp->tech == MI_TECH_htgs_3) {
3650                             gbp->div = MemFree(gbp->div);
3651                             return;
3652                         }
3653                     } else if (mfp->tech == check_tech[i].num) {
3654                         gbp->div = MemFree(gbp->div);
3655                         return;
3656                     } else if (mfp->tech == 0 && StringCmp (gbp->div, "STS") == 0) {
3657                         mfp->tech = MI_TECH_sts;
3658                         gbp->div = MemFree(gbp->div);
3659                         return;
3660                     }
3661                 }
3662             }
3663             if (div != NULL) {
3664                 if (StringCmp(gbp->div, div) == 0) {
3665                     gbp->div = MemFree(gbp->div);
3666                     gbp->taxonomy = MemFree(gbp->taxonomy);
3667                 } else if (StringCmp(gbp->div, "UNA") == 0) {
3668                     gbp->div = MemFree(gbp->div);
3669                 } else if (StringCmp(gbp->div, "UNC") == 0) {
3670                     gbp->div = MemFree(gbp->div);
3671                 } else if (StringCmp(gbp->div, "PAT") == 0 && is_patent) {
3672                     gbp->div = MemFree(gbp->div);
3673                 }
3674             }
3675         }
3676     }
3677 }
3678 
3679 typedef struct gbsrcdata {
3680   CharPtr     taxname;
3681   CharPtr     common;
3682   CharPtr     oldname;
3683   CharPtr     strain;
3684   OrgNamePtr  onp;
3685 } GBSourceData, PNTR GBSourcePtr;
3686 
AbbrevStrIEql(CharPtr str,CharPtr gbpsrc)3687 static Boolean AbbrevStrIEql (CharPtr str, CharPtr gbpsrc)
3688 
3689 {
3690   Char     buf [200];
3691   Char     ch;
3692   CharPtr  ptr;
3693 
3694   if (StringLen (str) >= sizeof (buf)) return FALSE;
3695 
3696   ch = *str;
3697   ptr = buf;
3698 
3699   *ptr = ch;
3700   ptr++;
3701   str = StringChr (str, ' ');
3702   if (str == NULL) return FALSE;
3703   str++;
3704   ch = *str;
3705   while (ch == ' ') {
3706     str++;
3707     ch = *str;
3708   }
3709   *ptr = '.';
3710   ptr++;
3711   *ptr = '\0';
3712   StringCat (ptr, str);
3713 
3714   return (Boolean) (StringICmp (buf, gbpsrc) == 0);
3715 }
3716 
CanDeleteGBSource(GBSourcePtr gsp,CharPtr gbpsrc)3717 static Boolean CanDeleteGBSource (GBSourcePtr gsp, CharPtr gbpsrc)
3718 
3719 {
3720   Char        ch;
3721   Boolean     foundStrain = FALSE;
3722   Boolean     goOn = TRUE;
3723   /*
3724   OrgModPtr   omp;
3725   OrgNamePtr  onp;
3726   */
3727   CharPtr     ptr;
3728   CharPtr     str;
3729 
3730   if (gsp == NULL || StringHasNoText (gbpsrc)) return FALSE;
3731 
3732   str = StringStr (gbpsrc, "(strain");
3733   if (str != NULL) {
3734     ptr = str + 7;
3735     ch = *ptr;
3736     while (ch != '\0' && goOn) {
3737       if (ch == ')') {
3738         if (StringHasNoText (ptr + 1)) {
3739           *ptr = '\0';
3740           goOn = FALSE;
3741           foundStrain = TRUE;
3742         }
3743       } else if (ch == ',' || ch == ';') {
3744         goOn = FALSE;
3745       }
3746       ptr++;
3747       ch = *ptr;
3748     }
3749   } else {
3750     str = StringStr (gbpsrc, "strain)");
3751     if (str != NULL) return FALSE; /* do not handle this case for now */
3752   }
3753   if (foundStrain) {
3754     *str = '\0';
3755     str += 7;
3756     TrimSpacesAroundString (gbpsrc);
3757     TrimSpacesAroundString (str);
3758     if (StringDoesHaveText (gsp->strain)) {
3759       if (StringICmp (gsp->strain, str) != 0) return FALSE;
3760     /*
3761     } else if (gsp->onp != NULL) {
3762       omp = OrgModNew ();
3763       if (omp != NULL && gsp->onp != NULL) {
3764         onp = gsp->onp;
3765         omp->subtype = ORGMOD_strain;
3766         omp->subname = StringSave (str);
3767         omp->next = onp->mod;
3768         onp->mod = omp;
3769       }
3770     */
3771     } else {
3772       return FALSE; /* do not rescue strain at this point, so do not remove gbp->source */
3773     }
3774   }
3775 
3776   if (StringDoesHaveText (gsp->taxname) && StringICmp (gsp->taxname, gbpsrc) == 0) return TRUE;
3777   if (StringDoesHaveText (gsp->common) && StringICmp (gsp->common, gbpsrc) == 0) return TRUE;
3778   if (StringDoesHaveText (gsp->oldname) && StringICmp (gsp->oldname, gbpsrc) == 0) return TRUE;
3779 
3780   if (StringDoesHaveText (gsp->taxname) && AbbrevStrIEql (gsp->taxname, gbpsrc)) return TRUE;
3781   if (StringDoesHaveText (gsp->oldname) && AbbrevStrIEql (gsp->oldname, gbpsrc)) return TRUE;
3782 
3783 
3784   return FALSE;
3785 }
3786 
TrimPeriodFromEnd(CharPtr str)3787 static void TrimPeriodFromEnd (CharPtr str)
3788 
3789 {
3790   size_t  len;
3791 
3792   len = StringLen (str);
3793   if (len < 2) return;
3794   if (str [len - 1] == '.') {
3795     str [len - 1] = '\0';
3796   }
3797 }
3798 
ChangeGBSource(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3799 static void ChangeGBSource (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3800 {
3801     BioseqPtr    bsp;
3802     ValNodePtr   descr, vnp;
3803     CharPtr      /* source, */ s;
3804     GBBlockPtr   gbp;
3805     GBSourcePtr  gsp = NULL;
3806     size_t       len;
3807 
3808     if (IS_Bioseq(sep)) {
3809         bsp = (BioseqPtr)(sep->data.ptrvalue);
3810         descr = bsp->descr;
3811         /*
3812         source = (CharPtr) data;
3813         */
3814         gsp = (GBSourcePtr) data;
3815     } else {
3816         return;
3817     }
3818     for (vnp = descr; vnp != NULL; vnp = vnp->next) {
3819         if (vnp->choice == Seq_descr_genbank) {
3820             gbp = (GBBlockPtr) vnp->data.ptrvalue;
3821             if (gbp == NULL || gbp->source == NULL) {
3822                 return;
3823             }
3824             s = StringSave (gbp->source);
3825             len = StringLen (s);
3826             if (len > 5 && StringCmp (s + len - 5, " DNA.") == 0) {
3827               s [len - 5] = '\0';
3828             } else if (len > 6 && StringCmp (s + len - 6, " rRNA.") == 0) {
3829               s [len - 6] = '\0';
3830             }
3831             TrimPeriodFromEnd (s);
3832             /*
3833             if (*(s+StringLen(s)-1) =='.') {
3834                 *(s+StringLen(s)-1) = '\0';
3835             }
3836             */
3837             if (CanDeleteGBSource (gsp, s)) {
3838                 gbp->source = MemFree (gbp->source);
3839             }
3840             MemFree (s);
3841         }
3842     }
3843 }
3844 
EntryChangeGBSource(SeqEntryPtr sep)3845 void EntryChangeGBSource (SeqEntryPtr sep)
3846 {
3847     OrgRefPtr       orp=NULL;
3848     OrgNamePtr      onp = NULL;
3849     OrgModPtr       omp;
3850     BioSourcePtr    biosp;
3851     CharPtr         /* source=NULL, s, */ div = NULL;
3852     /*
3853     ValNodePtr      v;
3854     Int2            len=0;
3855     */
3856     GBSourceData    gsd;
3857 
3858     if (sep == NULL)
3859         return;
3860 
3861     MemSet ((Pointer) &gsd, 0, sizeof (GBSourceData));
3862 
3863     biosp = GetTopBioSourceFromSep (sep);
3864     if (biosp != NULL) {
3865         orp = biosp->org;
3866     }
3867     if (orp) {
3868         /*
3869         if (orp->common) {
3870             len = StringLen(orp->common);
3871         } else if(orp->taxname) {
3872             len = StringLen(orp->taxname);
3873         }
3874         for (v = orp->mod; v; v = v->next) {
3875             len += StringLen(v->data.ptrvalue) + 1;
3876         }
3877         if (len > 0) {
3878             source = s = MemNew(len+1);
3879             *s = '\0';
3880             if (orp->common) {
3881                 StringCpy(s, orp->common);
3882             } else if(orp->taxname) {
3883                 StringCpy(s, orp->taxname);
3884             }
3885             s += StringLen(s);
3886             for (v = orp->mod; v; v = v->next) {
3887                 sprintf(s, " %s", (CharPtr) v->data.ptrvalue);
3888                 s += StringLen(s);
3889             }
3890             if (*(source+len-1) == '.') {
3891                 *(source+len-1) = '\0';
3892             }
3893         }
3894         */
3895         if (StringDoesHaveText (orp->taxname)) {
3896           gsd.taxname = StringSave (orp->taxname);
3897           TrimSpacesAndJunkFromEnds (gsd.taxname, FALSE);
3898           TrimPeriodFromEnd (gsd.taxname);
3899         }
3900         if (StringDoesHaveText (orp->common)) {
3901           gsd.common = StringSave (orp->common);
3902           TrimSpacesAndJunkFromEnds (gsd.common, FALSE);
3903           TrimPeriodFromEnd (gsd.common);
3904         }
3905         onp = orp->orgname;
3906         if (onp != NULL) {
3907           gsd.onp = onp;
3908           for (omp = onp->mod; omp != NULL; omp = omp->next) {
3909             if (StringHasNoText (omp->subname)) continue;
3910             if (omp->subtype == ORGMOD_strain) {
3911               gsd.strain = StringSave (omp->subname);
3912               TrimSpacesAndJunkFromEnds (gsd.strain, FALSE);
3913               TrimPeriodFromEnd (gsd.strain);
3914             } else if (omp->subtype == ORGMOD_old_name) {
3915               gsd.oldname = StringSave (omp->subname);
3916               TrimSpacesAndJunkFromEnds (gsd.oldname, FALSE);
3917               TrimPeriodFromEnd (gsd.oldname);
3918             }
3919           }
3920           if (StringDoesHaveText (onp->div)) {
3921             div = StringSave (onp->div);
3922           }
3923         }
3924         /*
3925         if (orp->orgname && orp->orgname->div) {
3926             div = StringSave(orp->orgname->div);
3927         }
3928         */
3929     }
3930     SeqEntryExplore(sep, /* source */ &gsd, ChangeGBSource);
3931     SeqEntryExplore(sep, div, ChangeGBDiv);
3932     if (div)
3933         MemFree(div);
3934     /*
3935     if (source)
3936         MemFree(source);
3937     */
3938     MemFree (gsd.taxname);
3939     MemFree (gsd.common);
3940     MemFree (gsd.oldname);
3941     MemFree (gsd.strain);
3942     return;
3943 }
3944 
EntryChangeImpFeat(SeqEntryPtr sep)3945 void EntryChangeImpFeat (SeqEntryPtr sep)
3946 {
3947     if (sep == NULL)
3948         return;
3949     SeqEntryExplore(sep, NULL, ChangeImpFeat);
3950     EntryChangeImpFeatToProt(sep);
3951     return;
3952 }
3953 
MergeDupBioSources(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)3954 static void MergeDupBioSources (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
3955 
3956 {
3957   BioSourcePtr  biop1, biop2;
3958   BioseqPtr     bsp;
3959   BioseqSetPtr  bssp;
3960   Boolean       fuseanddelete;
3961   ValNodePtr    mod;
3962   ValNodePtr    nextvnp;
3963   OrgModPtr     omp;
3964   Pointer PNTR  prevvnp;
3965   ValNodePtr    sdp;
3966   SubSourcePtr  ssp;
3967   OrgNamePtr    onp1, onp2;
3968   OrgRefPtr     orp1, orp2;
3969   ValNodePtr    vnp;
3970 
3971   if (IS_Bioseq (sep)) {
3972     bsp = (BioseqPtr) sep->data.ptrvalue;
3973     sdp = bsp->descr;
3974   } else if (IS_Bioseq_set (sep)) {
3975     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3976     sdp = bssp->descr;
3977   } else {
3978       return;
3979   }
3980   while (sdp != NULL) {
3981     if (sdp->choice == Seq_descr_source && sdp->data.ptrvalue != NULL) {
3982       biop1 = (BioSourcePtr) sdp->data.ptrvalue;
3983       orp1 = biop1->org;
3984       if (orp1 != NULL) {
3985         vnp = sdp->next;
3986         prevvnp = (Pointer PNTR) &(sdp->next);
3987         while (vnp != NULL) {
3988           nextvnp = vnp->next;
3989           fuseanddelete = FALSE;
3990           biop2 = NULL;
3991           orp2 = NULL;
3992           if (vnp->choice == Seq_descr_source && vnp->data.ptrvalue != NULL) {
3993             biop2 = (BioSourcePtr) vnp->data.ptrvalue;
3994             orp2 = biop2->org;
3995             if (orp2 != NULL) {
3996               if ((orp1->taxname != NULL) && (orp2->taxname != NULL) &&
3997                   StringCmp (orp1->taxname, orp2->taxname) == 0) {
3998                 fuseanddelete = TRUE;
3999               }
4000             }
4001           }
4002           if (fuseanddelete) {
4003             *(prevvnp) = vnp->next;
4004             vnp->next = NULL;
4005             if (biop2 != NULL) {
4006               if (biop1->genome == 0) {
4007                 biop1->genome = biop2->genome;
4008               }
4009               if (biop1->origin == 0) {
4010                 biop1->origin = biop2->origin;
4011               }
4012               if (! biop1->is_focus) {
4013                 biop1->is_focus = biop2->is_focus;
4014               }
4015               if (biop1->subtype == NULL) {
4016                 biop1->subtype = biop2->subtype;
4017                 biop2->subtype = NULL;
4018               } else {
4019                 ssp = biop1->subtype;
4020                 while (ssp->next != NULL) {
4021                   ssp = ssp->next;
4022                 }
4023                 ssp->next = biop2->subtype;
4024                 biop2->subtype = NULL;
4025               }
4026               if (orp1 != NULL && orp2 != NULL) {
4027                 if (orp1->mod == NULL) {
4028                   orp1->mod = orp2->mod;
4029                   orp2->mod = NULL;
4030                 } else {
4031                   mod = orp1->mod;
4032                   while (mod->next != NULL) {
4033                     mod = mod->next;
4034                   }
4035                   mod->next = orp2->mod;
4036                   orp2->mod = NULL;
4037                 }
4038                 if (orp1->db == NULL) {
4039                   orp1->db = orp2->db;
4040                   orp2->db = NULL;
4041                 }
4042                 if (orp1->syn == NULL) {
4043                   orp1->syn = orp2->syn;
4044                   orp2->syn = NULL;
4045                 }
4046                 onp1 = orp1->orgname;
4047                 onp2 = orp2->orgname;
4048                 if (onp1 != NULL && onp2 != NULL) {
4049                   if (onp1->mod == NULL) {
4050                     onp1->mod = onp2->mod;
4051                     onp2->mod = NULL;
4052                   } else {
4053                     omp = onp1->mod;
4054                     while (omp->next != NULL) {
4055                       omp = omp->next;
4056                     }
4057                     omp->next = onp2->mod;
4058                     onp2->mod = NULL;
4059                   }
4060                   if (onp1->gcode == 0) {
4061                     onp1->gcode = onp2->gcode;
4062                   }
4063                   if (onp1->mgcode == 0) {
4064                     onp1->mgcode = onp2->mgcode;
4065                   }
4066                   if (onp1->lineage == NULL) {
4067                     onp1->lineage = onp2->lineage;
4068                     onp2->lineage = NULL;
4069                   }
4070                   if (onp1->div == NULL) {
4071                     onp1->div = onp2->div;
4072                     onp2->div = NULL;
4073                   }
4074                 }
4075               }
4076             }
4077             SeqDescFree (vnp);
4078           } else {
4079             prevvnp = (Pointer PNTR) &(vnp->next);
4080           }
4081           vnp = nextvnp;
4082         }
4083       }
4084     }
4085     sdp = sdp->next;
4086   }
4087 }
4088 
EntryMergeDupBioSources(SeqEntryPtr sep)4089 void EntryMergeDupBioSources (SeqEntryPtr sep)
4090 
4091 {
4092   SeqEntryExplore (sep, NULL, MergeDupBioSources);
4093 }
4094 
TASNTrimSpacesAndTrailingSemicolons(CharPtr str)4095 static CharPtr TASNTrimSpacesAndTrailingSemicolons (CharPtr str)
4096 
4097 {
4098   CharPtr  amp;
4099   Uchar    ch;    /* to use 8bit characters in multibyte languages */
4100   CharPtr  dst;
4101   CharPtr  ptr;
4102 
4103   if (str != NULL && str [0] != '\0') {
4104     dst = str;
4105     ptr = str;
4106     ch = *ptr;
4107     while (ch != '\0' && ch <= ' ') {
4108       ptr++;
4109       ch = *ptr;
4110     }
4111     while (ch != '\0') {
4112       *dst = ch;
4113       dst++;
4114       ptr++;
4115       ch = *ptr;
4116     }
4117     *dst = '\0';
4118     amp = NULL;
4119     dst = NULL;
4120     ptr = str;
4121     ch = *ptr;
4122     while (ch != '\0') {
4123       if (ch == '&') {
4124         amp = ptr;
4125         dst = NULL;
4126       } else if (ch == ' ') {
4127         if (dst == NULL) {
4128           dst = ptr;
4129         }
4130         amp = NULL;
4131       } else if (ch == ';') {
4132         if (dst == NULL && amp == NULL) {
4133           dst = ptr;
4134         }
4135       } else {
4136         dst = NULL;
4137       }
4138       ptr++;
4139       ch = *ptr;
4140     }
4141     if (dst != NULL) {
4142       *dst = '\0';
4143     }
4144   }
4145   return str;
4146 }
4147 
TASNTrimInternalSemicolons(CharPtr str)4148 static CharPtr TASNTrimInternalSemicolons (CharPtr str)
4149 
4150 {
4151   Uchar    ch;    /* to use 8bit characters in multibyte languages */
4152   CharPtr  dst;
4153   Boolean  hasspace;
4154   CharPtr  ptr;
4155   CharPtr  tmp;
4156 
4157   if (str != NULL && str [0] != '\0') {
4158     dst = str;
4159     ptr = str;
4160     ch = *ptr;
4161     while (ch != '\0') {
4162       if (ch == ';') {
4163         *dst = ch;
4164         dst++;
4165         ptr++;
4166         ch = *ptr;
4167         tmp = ptr;
4168         hasspace = FALSE;
4169         while (ch == ';' || ch == ' ' || ch == '\t') {
4170           if (ch == ' ') {
4171             hasspace = TRUE;
4172           }
4173           ptr++;
4174           ch = *ptr;
4175         }
4176         if (hasspace) {
4177           *dst = ' ';
4178           dst++;
4179         }
4180       } else {
4181         *dst = ch;
4182         dst++;
4183         ptr++;
4184         ch = *ptr;
4185       }
4186     }
4187     *dst = '\0';
4188   }
4189   return str;
4190 }
4191 
TASNStringHasNoText(CharPtr str)4192 static Boolean TASNStringHasNoText (CharPtr str)
4193 
4194 {
4195   Uchar  ch;    /* to use 8bit characters in multibyte languages */
4196 
4197   if (str != NULL) {
4198     ch = *str;
4199     while (ch != '\0') {
4200       if (ch > ' ') {
4201         return FALSE;
4202       }
4203       str++;
4204       ch = *str;
4205     }
4206   }
4207   return TRUE;
4208 }
4209 
CleanVisString(CharPtr PNTR strp)4210 static void CleanVisString (CharPtr PNTR strp)
4211 
4212 {
4213   if (strp == NULL) return;
4214   if (*strp == NULL) return;
4215   TASNTrimSpacesAndTrailingSemicolons (*strp);
4216   TASNTrimInternalSemicolons (*strp);
4217   if (TASNStringHasNoText (*strp)) {
4218     *strp = MemFree (*strp);
4219   }
4220 }
4221 
CleanVisStringJunk(CharPtr PNTR strp)4222 static void CleanVisStringJunk (CharPtr PNTR strp)
4223 
4224 {
4225   if (strp == NULL) return;
4226   if (*strp == NULL) return;
4227   TrimSpacesAndJunkFromEnds (*strp, TRUE);
4228   TASNTrimInternalSemicolons (*strp);
4229   if (TASNStringHasNoText (*strp)) {
4230     *strp = MemFree (*strp);
4231   }
4232 }
4233 
CleanVisStringList(ValNodePtr PNTR vnpp)4234 static void CleanVisStringList (ValNodePtr PNTR vnpp)
4235 
4236 {
4237   ValNodePtr       next;
4238   ValNodePtr PNTR  prev;
4239   ValNodePtr       vnp;
4240 
4241   if (vnpp == NULL) return;
4242   prev = vnpp;
4243   vnp = *vnpp;
4244   while (vnp != NULL) {
4245     next = vnp->next;
4246     TASNTrimSpacesAndTrailingSemicolons (vnp->data.ptrvalue);
4247     if (TASNStringHasNoText (vnp->data.ptrvalue)) {
4248       *prev = vnp->next;
4249       vnp->next = NULL;
4250       ValNodeFreeData (vnp);
4251     } else {
4252       prev = &(vnp->next);
4253     }
4254     vnp = next;
4255   }
4256 }
4257 
CheckGBBlock(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)4258 static void CheckGBBlock (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
4259 
4260 {
4261   BioseqPtr     bsp;
4262   BioseqSetPtr  bssp;
4263   Boolean       empty;
4264   GBBlockPtr    gbp;
4265   BoolPtr       hasGB;
4266   ValNodePtr    nextsdp;
4267   Pointer PNTR  prevsdp;
4268   ValNodePtr    sdp;
4269 
4270   if (IS_Bioseq (sep)) {
4271     bsp = (BioseqPtr) sep->data.ptrvalue;
4272     sdp = bsp->descr;
4273     prevsdp = (Pointer PNTR) &(bsp->descr);
4274   } else if (IS_Bioseq_set (sep)) {
4275     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4276     sdp = bssp->descr;
4277     prevsdp = (Pointer PNTR) &(bssp->descr);
4278   } else {
4279       return;
4280   }
4281   bsp = (BioseqPtr) sep->data.ptrvalue;
4282   if (bsp == NULL) return;
4283   hasGB = (BoolPtr) data;
4284   sdp = bsp->descr;
4285   prevsdp = (Pointer PNTR) &(bsp->descr);
4286   while (sdp != NULL) {
4287     nextsdp = sdp->next;
4288     empty = FALSE;
4289     if (sdp->choice == Seq_descr_genbank && sdp->data.ptrvalue != NULL) {
4290       gbp = (GBBlockPtr) sdp->data.ptrvalue;
4291       if (gbp->source != NULL || gbp->taxonomy != NULL) {
4292         if (hasGB != NULL) {
4293           *hasGB = TRUE;
4294         }
4295       } else if (gbp->div != NULL) {
4296         if (StringCmp (gbp->div, "PAT") != 0 &&
4297             StringCmp (gbp->div, "SYN")) {
4298           if (hasGB != NULL) {
4299             *hasGB = TRUE;
4300           }
4301         }
4302       }
4303       CleanVisStringList (&(gbp->extra_accessions));
4304       CleanVisStringList (&(gbp->keywords));
4305       CleanVisString (&(gbp->source));
4306       CleanVisString (&(gbp->origin));
4307       CleanVisString (&(gbp->date));
4308       CleanVisString (&(gbp->div));
4309       CleanVisString (&(gbp->taxonomy));
4310       if (gbp->extra_accessions == NULL && gbp->source == NULL &&
4311           gbp->keywords == NULL && gbp->origin == NULL &&
4312           gbp->date == NULL && gbp->entry_date == NULL &&
4313           gbp->div == NULL && gbp->taxonomy == NULL) {
4314         empty = TRUE;
4315         ObjMgrDeSelect (0, 0, 0, 0, NULL);
4316       }
4317     }
4318     if (empty) {
4319       *(prevsdp) = sdp->next;
4320       sdp->next = NULL;
4321       sdp = SeqDescFree (sdp);
4322     } else {
4323       prevsdp = (Pointer PNTR) &(sdp->next);
4324     }
4325     sdp = nextsdp;
4326   }
4327 }
4328 
EntryCheckGBBlock(SeqEntryPtr sep)4329 extern Boolean EntryCheckGBBlock (SeqEntryPtr sep)
4330 
4331 {
4332   Boolean  hasGBStuff;
4333 
4334   hasGBStuff = FALSE;
4335   SeqEntryExplore (sep, (Pointer) &hasGBStuff, CheckGBBlock);
4336   return hasGBStuff;
4337 }
4338 
4339 
GetOriginalBeforeAdjustment(SeqLocPtr slp,Int4Ptr p_oldfrom,Int4Ptr p_oldto)4340 static SeqIntPtr GetOriginalBeforeAdjustment (SeqLocPtr slp, Int4Ptr p_oldfrom, Int4Ptr p_oldto)
4341 {
4342     SeqLocPtr curr = NULL, last = NULL;
4343     SeqIntPtr sip;
4344 
4345     if (slp == NULL)
4346     {
4347         return NULL;
4348     }
4349     while ((curr = SeqLocFindNext(slp, curr)) != NULL)
4350     {
4351         last = curr;
4352     }
4353 
4354     if (last != NULL && last->choice != SEQLOC_INT)  /* this is too weird */
4355     {
4356         return NULL;
4357     }
4358     if (last == NULL || last->data.ptrvalue == NULL) return NULL;
4359     sip = (SeqIntPtr)(last->data.ptrvalue);
4360 
4361     *p_oldfrom = sip->from;
4362     *p_oldto = sip->to;
4363 
4364     return sip;
4365 }
4366 
4367 
AdjustLocByRemainder(SeqLocPtr slp,Int4 remainder,Boolean even_if_partial,Int4Ptr p_oldnum)4368 static Boolean AdjustLocByRemainder (SeqLocPtr slp, Int4 remainder, Boolean even_if_partial, Int4Ptr p_oldnum)
4369 {
4370     SeqIntPtr sip;
4371     BioseqPtr nucseq;
4372     Int4      oldfrom, oldto;
4373     Int4      oldnum = 0;
4374 
4375     sip = GetOriginalBeforeAdjustment(slp, &oldfrom, &oldto);
4376     if (sip == NULL) {
4377       return FALSE;
4378     }
4379 
4380     nucseq = BioseqFind(sip->id);
4381     if (nucseq == NULL)
4382     {
4383         return FALSE;
4384     }
4385 
4386     switch (remainder)
4387     {
4388         case 0:
4389             remainder = 3;
4390             break;
4391         case 1:
4392             remainder = 2;
4393             break;
4394         case 2:
4395             remainder = 1;
4396             break;
4397     }
4398 
4399     if (sip->strand == Seq_strand_minus)
4400     {
4401         if (sip->from < remainder)
4402         {
4403             return FALSE;
4404         }
4405         if (sip->if_from != NULL && !even_if_partial)
4406         {
4407             return FALSE;
4408         }
4409         oldnum = sip->from;
4410         sip->from -= remainder;
4411     }
4412     else
4413     {
4414         if (sip->to >= (nucseq->length - remainder))
4415         {
4416             return FALSE;
4417         }
4418         if (sip->if_to != NULL && !even_if_partial)
4419         {
4420             return FALSE;
4421         }
4422         oldnum = sip->to;
4423         sip->to += remainder;
4424     }
4425     if (p_oldnum != NULL) {
4426       *p_oldnum = oldnum;
4427     }
4428     return TRUE;
4429 }
4430 
4431 
4432 /*****************************************************************************
4433 *
4434 *   CdEndCheck(sfp, fp)
4435 *
4436 *****************************************************************************/
CdEndCheck(SeqFeatPtr sfp,FILE * fp,Boolean also_adjust_mrna)4437 static void CdEndCheck(SeqFeatPtr sfp, FILE *fp, Boolean also_adjust_mrna)
4438 {
4439     ByteStorePtr newprot = NULL;
4440     BioseqPtr protseq, nucseq;
4441     Int4 len, remainder, aas, oldfrom, oldto, protlen, i, oldnum;
4442     Int4 m_oldfrom, m_oldto;
4443     CdRegionPtr crp;
4444     SeqIdPtr protid, tmp;
4445     SeqIntPtr sip, msip = NULL;
4446     Int2 residue, residue2;
4447     Char nuc[PATH_MAX];
4448     CodeBreakPtr cbp;
4449     Int4 pos1, pos2, pos;
4450     SeqLocPtr tmpslp;
4451     Int4 len2;
4452     SeqFeatPtr gene = NULL;
4453     SeqFeatPtr mrna = NULL;
4454     GeneRefPtr grp ;
4455     BioseqPtr bsp;
4456     SeqLocPtr slp;
4457     Boolean        hasNulls;
4458     Boolean        noLeft;
4459     Boolean        noRight;
4460     Boolean        noLeftFeat;
4461     Boolean        noLeftGene;
4462     Boolean        noRightFeat;
4463     Boolean        noRightGene;
4464 
4465 
4466     grp = SeqMgrGetGeneXref (sfp);
4467     if (grp == NULL || (! SeqMgrGeneIsSuppressed (grp))) {
4468       gene = SeqMgrGetOverlappingGene (sfp->location, NULL);
4469     }
4470     if (also_adjust_mrna) {
4471       mrna = SeqMgrGetOverlappingmRNA (sfp->location, NULL);
4472       if (mrna != NULL) {
4473         if (SeqLocStrand (mrna->location) == Seq_strand_minus) {
4474           if (SeqLocStart (mrna->location) != SeqLocStart (sfp->location)) {
4475             mrna = NULL;
4476           }
4477         } else {
4478           if (SeqLocStop (mrna->location) != SeqLocStop (sfp->location)) {
4479             mrna = NULL;
4480           }
4481         }
4482       }
4483     }
4484 
4485     crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
4486     len = SeqLocLen(sfp->location);
4487     len2 = len;
4488     switch (crp->frame)
4489     {
4490         case 2:
4491             len -= 1;
4492             break;
4493         case 3:
4494             len -= 2;
4495             break;
4496         default:
4497             break;
4498     }
4499     remainder = len % 3;
4500     aas = len/3;    /* total aas in protein if no stop codon */
4501     protid = SeqLocId(sfp->product);
4502     if (protid == NULL)
4503         return;
4504     /* protseq = BioseqFind(protid); */
4505     protseq = BioseqLockById (protid); /* tries BioseqFind, will fetch remotely if enabled */
4506     if (protseq == NULL) return;
4507     BioseqUnlock (protseq); /* unlock but do not cache out, easier than unlocking everywhere in code below */
4508     if (((protseq->length + 1) == aas) && (remainder == 0)) /* correct length with termination */
4509         return;
4510 
4511     if (protseq->seq_data_type == Seq_code_gap) return;
4512 
4513     cbp = crp->code_break;
4514     while (cbp != NULL)
4515     {
4516         pos1 = INT4_MAX;
4517         pos2 = -10;
4518         tmpslp = NULL;
4519         while ((tmpslp = SeqLocFindNext(cbp->loc, tmpslp)) != NULL)
4520         {
4521             pos = GetOffsetInLoc(tmpslp, sfp->location, SEQLOC_START);
4522             if (pos < pos1)
4523                 pos1 = pos;
4524             pos = GetOffsetInLoc(tmpslp, sfp->location, SEQLOC_STOP);
4525             if (pos > pos2)
4526                 pos2 = pos;
4527         }
4528         pos = pos2 - pos1; /* codon length */
4529         if (/* pos == 2 || */ (pos >= 0 && pos <= 1 && pos2 == len2 - 1))   /*  a codon */
4530         /* allowing a partial codon at the end */
4531         {
4532             return;
4533         }
4534 
4535         cbp = cbp->next;
4536     }
4537 
4538     if (protseq->length == aas && remainder == 0)
4539     {
4540       /* do we already have a stop codon, but the translated protein includes it? */
4541       if (protseq->repr == Seq_repr_raw) {
4542         newprot = (ByteStorePtr) protseq->seq_data;
4543         if (newprot != NULL) {
4544           protlen = BSLen(newprot);
4545           BSSeek(newprot, (protlen - 1), SEEK_SET);
4546           residue = BSGetByte(newprot);
4547           while (residue == '*' && protlen == protseq->length && protlen > 0) {
4548             BSSeek (newprot, -1, SEEK_END);
4549             BSDelete (newprot, 1);
4550             BSSeek (newprot, -1, SEEK_END);
4551             protlen--;
4552             protseq->length = protlen;
4553             residue = BSGetByte (newprot);
4554           }
4555         }
4556       }
4557     }
4558 
4559     sip = GetOriginalBeforeAdjustment (sfp->location, &oldfrom, &oldto);
4560     if (mrna != NULL) {
4561       msip = GetOriginalBeforeAdjustment (mrna->location, &m_oldfrom, &m_oldto);
4562     }
4563 
4564     if (!AdjustLocByRemainder(sfp->location, remainder, FALSE, &oldnum)) {
4565         return;
4566     }
4567     if (mrna != NULL) {
4568       AdjustLocByRemainder (mrna->location, remainder, TRUE, NULL);
4569     }
4570 
4571     nucseq = BioseqFind(sip->id);
4572     newprot = ProteinFromCdRegion(sfp, TRUE);   /* include stop codons */
4573     if (newprot == NULL)
4574     {
4575         goto erret;
4576     }
4577 
4578     protlen = BSLen(newprot);
4579     if (protlen != aas + 1)
4580     {
4581         goto erret;
4582     }
4583 
4584     BSSeek(newprot, (protlen - 1), SEEK_SET);
4585     residue = BSGetByte(newprot);
4586     if (residue != '*')
4587     {
4588         goto erret;
4589     }
4590 
4591     BSSeek(newprot, (protlen-1), SEEK_SET);
4592     BSDelete(newprot, 1);   /* remove termination from protein */
4593     BSSeek(newprot, 0, SEEK_SET);  /* check for internal termination */
4594     BSSeek((ByteStorePtr) protseq->seq_data, 0, SEEK_SET);
4595     protlen = BSLen(newprot);
4596     for (i = 0; i < protlen; i++)
4597     {
4598         residue = BSGetByte(newprot);
4599         residue2 = BSGetByte((ByteStorePtr) protseq->seq_data);
4600         if (residue != residue2)
4601         {
4602             goto erret;
4603         }
4604 
4605     }
4606 
4607     BSFree((ByteStorePtr) protseq->seq_data);
4608     protseq->seq_data = (SeqDataPtr) newprot;
4609     protseq->length = protlen;
4610     /****** to avoid killing asn2gnbk ***
4611     protseq->seq_data_type = Seq_code_ncbieaa;
4612     sfp->partial = FALSE;
4613 
4614     ************************************/
4615     for (tmp = nucseq->id; tmp != NULL; tmp = tmp->next)
4616     {
4617         if ((tmp->choice == SEQID_GENBANK) ||
4618             (tmp->choice == SEQID_EMBL) ||
4619             (tmp->choice == SEQID_DDBJ))
4620             break;
4621     }
4622 
4623     if (tmp == NULL)
4624         SeqIdWrite(nucseq->id, nuc, PRINTID_FASTA_LONG, sizeof (nuc) - 1);
4625     else
4626         SeqIdWrite(tmp, nuc, PRINTID_TEXTID_ACCESSION, sizeof (nuc) - 1);
4627 
4628     if (fp != NULL)
4629         fprintf(fp, "%s %ld %d\n", nuc, (long)(oldnum+1), (int)remainder);
4630 
4631     if (gene != NULL) {
4632         if (SeqLocAinB (sfp->location, gene->location) <= 0) {
4633             bsp = BioseqFindFromSeqLoc (gene->location);
4634             if (bsp != NULL) {
4635               hasNulls = LocationHasNullsBetween (gene->location);
4636               slp = SeqLocMerge (bsp, gene->location, sfp->location, TRUE, FALSE, hasNulls);
4637               if (slp != NULL) {
4638                 CheckSeqLocForPartial (gene->location, &noLeftGene, &noRightGene);
4639                 gene->location = SeqLocFree (gene->location);
4640                 gene->location = slp;
4641                 CheckSeqLocForPartial (sfp->location, &noLeftFeat, &noRightFeat);
4642                 if (bsp->repr == Seq_repr_seg) {
4643                   slp = SegLocToPartsEx (bsp, gene->location, TRUE);
4644                   gene->location = SeqLocFree (gene->location);
4645                   gene->location = slp;
4646                   hasNulls = LocationHasNullsBetween (gene->location);
4647                   gene->partial = (gene->partial || hasNulls);
4648                 }
4649                 FreeAllFuzz (gene->location);
4650                 noLeft = (noLeftFeat || noLeftGene);
4651                 noRight = (noRightFeat || noRightGene);
4652                 SetSeqLocPartial (gene->location, noLeft, noRight);
4653                 gene->partial = (gene->partial || noLeft || noRight);
4654               }
4655             }
4656         }
4657     }
4658 
4659     return;
4660 erret:
4661     BSFree(newprot);
4662     sip->from = oldfrom;
4663     sip->to = oldto;
4664     if (msip != NULL) {
4665       msip->from = m_oldfrom;
4666       msip->to = m_oldto;
4667     }
4668     return;
4669 }
4670 
4671 
4672 typedef struct findcd {
4673     FILE *fp;
4674     Boolean also_adjust_mrna;
4675 } FindCdData, PNTR FindCdPtr;
4676 
FindCd(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)4677 static void FindCd (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
4678 {
4679     SeqAnnotPtr sap;
4680     SeqFeatPtr sfp;
4681     BioseqPtr bsp;
4682     BioseqSetPtr bssp;
4683     FindCdPtr fcp;
4684     FILE *fp = NULL;
4685     Boolean also_adjust_mrna = FALSE;
4686 
4687     fcp = (FindCdPtr) data;
4688     if (fcp != NULL) {
4689       fp = fcp->fp;
4690       also_adjust_mrna = fcp->also_adjust_mrna;
4691     }
4692     if (IS_Bioseq(sep))
4693     {
4694         bsp = (BioseqPtr)(sep->data.ptrvalue);
4695         sap = bsp->annot;
4696     }
4697     else
4698     {
4699         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
4700         sap = bssp->annot;
4701     }
4702 
4703     while (sap != NULL)
4704     {
4705         if (sap->type == 1)   /* ftable */
4706         {
4707             sfp = (SeqFeatPtr)(sap->data);
4708             while (sfp != NULL)
4709             {
4710                 if (sfp->data.choice == 3) { /* cdregion */
4711                     if (! sfp->excpt) { /* if not biological exception */
4712                         CdEndCheck(sfp, fp, also_adjust_mrna);
4713                     }
4714                 }
4715                 sfp = sfp->next;
4716             }
4717         }
4718         sap = sap->next;
4719     }
4720 
4721     return;
4722 }
4723 
fake_bond_loc(SeqLocPtr slp)4724 static SeqLocPtr fake_bond_loc(SeqLocPtr slp)
4725 {
4726     SeqLocPtr loc, l, lnext, ldata;
4727 
4728 
4729     if (slp == NULL)
4730         return NULL;
4731     loc = MemNew(sizeof(SeqLoc));
4732     MemCopy(loc, slp, sizeof(SeqLoc));
4733     ldata = (SeqLocPtr) loc->data.ptrvalue;
4734     if (slp->choice != SEQLOC_MIX)
4735         return loc;
4736     for (l=ldata; l; l=lnext) {
4737         lnext = l->next;
4738         if (l->choice == SEQLOC_NULL) {
4739             ldata = remove_node(ldata, l);
4740         }
4741     }
4742     return loc;
4743 
4744 }
4745 
4746 /*****************************************************************************
4747 *
4748 *   Check for CdRegion ending in middle base of codon
4749 *
4750 *****************************************************************************/
4751 
CdCheckEx(SeqEntryPtr sep,FILE * fp,Boolean also_adjust_mrna)4752 NLM_EXTERN void CdCheckEx(SeqEntryPtr sep, FILE *fp, Boolean also_adjust_mrna)
4753 {
4754     FindCdData fcd;
4755 
4756     MemSet (&fcd, 0, sizeof (FindCdData));
4757     fcd.fp = fp;
4758     fcd.also_adjust_mrna = also_adjust_mrna;
4759     SeqEntryExplore(sep, (Pointer)&fcd, FindCd);
4760     return;
4761 }
4762 
CdCheck(SeqEntryPtr sep,FILE * fp)4763 NLM_EXTERN void CdCheck(SeqEntryPtr sep, FILE *fp)
4764 {
4765     CdCheckEx (sep, fp, FALSE);
4766 }
4767 
4768 
OutOfFramePeptideButEmblOrDdbj(SeqFeatPtr sfp,SeqFeatPtr cds)4769 static Boolean OutOfFramePeptideButEmblOrDdbj (SeqFeatPtr sfp, SeqFeatPtr cds)
4770 
4771 {
4772   BioseqPtr    bsp;
4773   CdRegionPtr  crp;
4774   ImpFeatPtr   ifp;
4775   SeqLocPtr    first = NULL, last = NULL, slp = NULL;
4776   Boolean      partial5, partial3;
4777   Int4         pos1, pos2, adjust = 0, mod1, mod2;
4778   SeqIdPtr     sip;
4779 
4780   if (sfp == NULL || cds == NULL || sfp->data.choice != SEQFEAT_IMP) return FALSE;
4781 
4782   ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4783   if (ifp == NULL) return FALSE;
4784   if (StringCmp (ifp->key, "mat_peptide") != 0 &&
4785       StringCmp (ifp->key, "sig_peptide") != 0 &&
4786       StringCmp (ifp->key, "transit_peptide") != 0 &&
4787       StringCmp (ifp->key, "propeptide") != 0) return FALSE;
4788 
4789   crp = (CdRegionPtr) cds->data.value.ptrvalue;
4790   if (crp == NULL) return FALSE;
4791   if (crp->frame == 2) {
4792     adjust = 1;
4793   } else if (crp->frame == 3) {
4794     adjust = 2;
4795   }
4796 
4797   while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
4798     last = slp;
4799     if (first == NULL) {
4800       first = slp;
4801     }
4802   }
4803   if (first == NULL || last == NULL) return FALSE;
4804 
4805   pos1 = GetOffsetInLoc (first, cds->location, SEQLOC_START) - adjust;
4806   pos2 = GetOffsetInLoc (last, cds->location, SEQLOC_STOP) - adjust;
4807   mod1 = pos1 % 3;
4808   mod2 = pos2 % 3;
4809 
4810   CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
4811   if (partial5) {
4812     mod1 = 0;
4813   }
4814   if (partial3) {
4815     mod2 = 2;
4816   }
4817 
4818   if (mod1 == 0 && mod2 == 2) return FALSE;
4819 
4820   bsp = BioseqFindFromSeqLoc (sfp->location);
4821   if (bsp == NULL) return FALSE;
4822   for (sip = bsp->id;
4823        sip != NULL && sip->choice != SEQID_EMBL && sip->choice != SEQID_DDBJ;
4824        sip = sip->next) continue;
4825   if (sip != NULL) return TRUE;
4826 
4827   return FALSE;
4828 }
4829 
IncompatibleStrands(SeqLocPtr loc1,SeqLocPtr loc2)4830 static Boolean IncompatibleStrands (SeqLocPtr loc1, SeqLocPtr loc2)
4831 
4832 {
4833   Boolean  minus1 = FALSE, minus2 = FALSE;
4834   Uint1    strand1, strand2;
4835 
4836   if (loc1 == NULL || loc2 == NULL) return FALSE;
4837 
4838   strand1 = SeqLocStrand (loc1);
4839   strand2 = SeqLocStrand (loc2);
4840 
4841   minus1 = (Boolean) (strand1 == Seq_strand_minus || strand1 == Seq_strand_both_rev);
4842   minus2 = (Boolean) (strand2 == Seq_strand_minus || strand2 == Seq_strand_both_rev);
4843 
4844   if (minus1 != minus2) return TRUE;
4845 
4846   return FALSE;
4847 }
4848 
ImpFeatToProtRef(SeqFeatArr sfa)4849 static void ImpFeatToProtRef(SeqFeatArr sfa)
4850 {
4851     SeqFeatPtr f1, f2, best_cds, sfp;
4852     SeqLocPtr loc, slp;
4853     ImpFeatPtr ifp;
4854     ProtRefPtr prot;
4855     BioseqPtr bsp;
4856     SeqAnnotPtr sap;
4857     Int4 diff_lowest, diff_current, frame;
4858     ValNodePtr tmp1, tmp2;
4859     Uint2 retval;
4860     Int2 i;
4861     Boolean lfree = FALSE, partial5, partial3;
4862     CharPtr p, q;
4863     GBQualPtr qu, qunext;
4864     GeneRefPtr grp1, grp2;
4865 
4866     for (tmp1 = sfa.pept; tmp1; tmp1 = tmp1->next) {
4867         lfree = FALSE;
4868         f1 = (SeqFeatPtr) tmp1->data.ptrvalue;
4869         loc = f1->location;
4870         if (tmp1->choice == SEQFEAT_BOND) {
4871             loc = fake_bond_loc(f1->location);
4872             lfree = TRUE;
4873         }
4874         diff_lowest = -1;
4875         best_cds = NULL;
4876         for (tmp2=sfa.cds; tmp2; tmp2=tmp2->next) {
4877             f2 = tmp2->data.ptrvalue;
4878             if (IncompatibleStrands (loc, f2->location)) continue;
4879             diff_current = SeqLocAinB(loc, f2->location);
4880             if (diff_current < 0) continue;
4881             /* if no best yet, take first candidate */
4882             if (diff_lowest == -1) {
4883               diff_lowest = diff_current;
4884               best_cds = f2;
4885               continue;
4886             }
4887             /* if newer candidate has tighter coverage, take it */
4888             if (diff_current < diff_lowest) {
4889               diff_lowest = diff_current;
4890               best_cds = f2;
4891               continue;
4892             }
4893             /* use gene xref as tie breaker for genes with same coverage */
4894             grp1 = SeqMgrGetGeneXref (f1);
4895             if (grp1 == NULL || SeqMgrGeneIsSuppressed (grp1)) continue;
4896             grp2 = SeqMgrGetGeneXref (f2);
4897             if (grp2 == NULL || SeqMgrGeneIsSuppressed (grp2)) continue;
4898             if (StringDoesHaveText (grp1->locus_tag) && StringDoesHaveText (grp2->locus_tag)) {
4899               if (StringICmp (grp1->locus_tag, grp2->locus_tag) != 0) continue;
4900             } else if (StringDoesHaveText (grp1->locus) && StringDoesHaveText (grp2->locus)) {
4901               if (StringICmp (grp1->locus, grp2->locus) != 0) continue;
4902             }
4903             diff_lowest = diff_current;
4904             best_cds = f2;
4905             /*
4906             if (diff_current == 0) {
4907                 best_cds = f2;
4908                 break;
4909             } else if (diff_current > 0) {
4910                 if ((diff_lowest == -1) || (diff_current < diff_lowest)) {
4911                     diff_lowest = diff_current;
4912                     best_cds = f2;
4913                 }
4914             }
4915             */
4916         }
4917         /*
4918         if (lfree)
4919             SeqLocFree(loc);
4920         */
4921         if (best_cds == NULL) {
4922             p = SeqLocPrint(f1->location);
4923             ErrPostEx(SEV_WARNING, ERR_FEATURE_CDSNotFound,
4924             "CDS for the peptide feature [%s] not found", p);
4925             MemFree(p);
4926         } else {
4927             if (OutOfFramePeptideButEmblOrDdbj (f1, best_cds))
4928                 continue;
4929             CheckSeqLocForPartial (f1->location, &partial5, &partial3);
4930             slp = dnaLoc_to_aaLoc(best_cds, f1->location, TRUE, &frame, FALSE);
4931             if (slp == NULL) {
4932             p = SeqLocPrint(f1->location);
4933             q = SeqLocPrint(best_cds->location);
4934             ErrPostEx(SEV_ERROR, ERR_FEATURE_CannotMapDnaLocToAALoc, "peptide location:%s| CDS location:%s", p, q);
4935             MemFree(p);
4936             MemFree(q);
4937                 continue;
4938             }
4939             SetSeqLocPartial (slp, partial5, partial3);
4940             ifp = (ImpFeatPtr) f1->data.value.ptrvalue;
4941             sfp = SeqFeatNew();
4942             sfp->location = slp;
4943 
4944             sfp->partial = (Boolean) (f1->partial || partial5 || partial3);
4945             sfp->excpt = f1->excpt;
4946             sfp->exp_ev = f1->exp_ev;
4947             sfp->pseudo = f1->pseudo;
4948 
4949             sfp->comment = f1->comment;
4950             f1->comment = NULL;
4951             sfp->qual = f1->qual;
4952             f1->qual = NULL;
4953             sfp->title = f1->title;
4954             f1->title = NULL;
4955             sfp->ext = f1->ext;
4956             f1->ext = NULL;
4957             sfp->cit = f1->cit;
4958             f1->cit = NULL;
4959 
4960             sfp->xref = f1->xref;
4961             f1->xref = NULL;
4962             sfp->dbxref = f1->dbxref;
4963             f1->dbxref = NULL;
4964             sfp->except_text = f1->except_text;
4965             f1->except_text = NULL;
4966 
4967             if (f1->qual != NULL) {
4968                 sfp->qual = f1->qual;
4969                 f1->qual = NULL;
4970             }
4971             if (tmp1->choice == SEQFEAT_PROT) {
4972                 sfp->data.choice = SEQFEAT_PROT;
4973                 prot = ProtRefNew();
4974                 sfp->data.value.ptrvalue = prot;
4975                 if (StringCmp(ifp->key, "mat_peptide") == 0) {
4976                     prot->processed = 2;
4977                     for (qu=sfp->qual; qu; qu=qunext) {
4978                         qunext = qu->next;
4979                         if (StringCmp(qu->qual, "product") == 0) {
4980                             ValNodeAddStr(&(prot->name), 0,StringSave(qu->val));
4981                             sfp->qual = remove_qual(sfp->qual, qu);
4982                         }
4983                     }
4984                 }
4985                 if (StringCmp(ifp->key, "sig_peptide") == 0)
4986                     prot->processed = 3;
4987                 if (StringCmp(ifp->key, "transit_peptide") == 0)
4988                     prot->processed = 4;
4989                 if (StringCmp(ifp->key, "propeptide") == 0)
4990                     prot->processed = 5;
4991                 if (f1->comment != NULL) {
4992                     if ((prot->processed == 2 || prot->name == NULL) && StringICmp (f1->comment, "putative") != 0) {
4993                         ValNodeAddStr(&(prot->name), 0,StringSave(f1->comment));
4994                     } else {
4995                         sfp->comment = StringSave(f1->comment);
4996                     }
4997                 }
4998             } else if (tmp1->choice == SEQFEAT_SITE) {
4999                 sfp->data.choice = SEQFEAT_SITE;
5000                 if ((i = FindStr(feat_site, num_site, f1->comment)) != -1) {
5001                     sfp->data.value.intvalue = i;
5002                 } else {
5003                     sfp->data.value.intvalue = 255;
5004                 }
5005             } else if (tmp1->choice == SEQFEAT_BOND) {
5006                 sfp->data.choice = SEQFEAT_BOND;
5007                 if ((i = FindStr(feat_bond, num_bond, f1->comment)) != -1) {
5008                     sfp->data.value.intvalue = i;
5009                 } else {
5010                     sfp->data.value.intvalue = 255;
5011                 }
5012             }
5013             if (f1->title)
5014             {
5015                 if(sfp->comment != NULL)
5016                     MemFree(sfp->comment);
5017                 sfp->comment = StringSave(f1->title);
5018             }
5019             CheckSeqLocForPartial (f1->location, &partial5, &partial3);
5020             sfp->excpt = f1->excpt;
5021             sfp->partial = (Boolean) (f1->partial || partial5 || partial3);
5022             sfp->exp_ev = f1->exp_ev;
5023             sfp->pseudo = f1->pseudo;
5024             if(sfp->location)
5025                 SeqLocFree(sfp->location);
5026             sfp->location =
5027                 dnaLoc_to_aaLoc(best_cds, f1->location, TRUE, &frame, FALSE);
5028             if (sfp->location == NULL) {
5029             p = SeqLocPrint(f1->location);
5030             q = SeqLocPrint(best_cds->location);
5031             ErrPostEx(SEV_ERROR, ERR_FEATURE_CannotMapDnaLocToAALoc, "peptide location:%s| CDS location:%s", p, q);
5032                 MemFree(sfp);
5033                 MemFree(p);
5034                 MemFree(q);
5035                 continue;
5036             }
5037             SetSeqLocPartial (sfp->location, partial5, partial3);
5038             if(f1->comment != NULL)
5039                 MemFree(f1->comment);
5040             f1->comment = StringSave("FeatureToBeDeleted");
5041             if (sfp->partial == FALSE) {
5042                 retval = SeqLocPartialCheck(sfp->location);
5043                 if (retval > SLP_COMPLETE && retval < SLP_NOSTART) {
5044                     sfp->partial = TRUE;
5045                 }
5046             }
5047             bsp = BioseqLockById(SeqLocId(best_cds->product));
5048             if (bsp) {
5049                 if (bsp->annot == NULL) {
5050                     sap = SeqAnnotNew();
5051                     sap->type = 1;
5052                     bsp->annot = sap;
5053                 } else {
5054                     sap = bsp->annot;
5055                 }
5056                 sap->data = tie_feat(sap->data, sfp);
5057                 BioseqUnlock(bsp);
5058             }
5059         }
5060     }
5061 }
5062 
PseudoGeneOverlap(SeqLocPtr slp)5063 static Boolean PseudoGeneOverlap (SeqLocPtr slp)
5064 
5065 {
5066   SeqFeatPtr  gene;
5067   GeneRefPtr  grp;
5068 
5069   gene = SeqMgrGetOverlappingGene (slp, NULL);
5070   if (gene == NULL) return FALSE;
5071   if (gene->pseudo) return TRUE;
5072   grp = (GeneRefPtr) gene->data.value.ptrvalue;
5073   if (grp == NULL) return FALSE;
5074   if (grp->pseudo) return TRUE;
5075   return FALSE;
5076 }
5077 
GetCdRegionsWithPeptides(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)5078 static void GetCdRegionsWithPeptides (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
5079 {
5080     SeqAnnotPtr    annot, sap;
5081     BioseqPtr      bsp, fbsp;
5082     BioseqSetPtr   bssp;
5083     Int2           i;
5084     ImpFeatPtr     ifp;
5085     Boolean        okay;
5086     SeqFeatArrPtr  sfap;
5087     SeqFeatPtr     sfp;
5088     SeqIdPtr       sip;
5089     ValNodePtr     tmp;
5090 
5091     sfap = (SeqFeatArrPtr) data;
5092     if (IS_Bioseq(sep)) {
5093         bsp = (BioseqPtr)(sep->data.ptrvalue);
5094         annot = bsp->annot;
5095     } else {
5096         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
5097         annot = bssp->annot;
5098     }
5099     for (sap = annot; sap != NULL; sap = sap->next) {
5100         if (sap->type != 1) {
5101             continue;
5102         }
5103         for (sfp = sap->data; sfp != NULL; sfp = sfp->next) {
5104             if (sfp->data.choice == SEQFEAT_CDREGION) {
5105                 if ((! sfp->pseudo) && (! (PseudoGeneOverlap (sfp->location)))) {
5106                     tmp = ValNodeNew(NULL);
5107                     tmp->data.ptrvalue = sfp;
5108                     sfap->cds = tie_next(sfap->cds, tmp);
5109                 }
5110             }
5111             if (sfp->data.choice == SEQFEAT_IMP) {
5112                 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5113                 if (StringCmp(ifp->key, "mat_peptide") == 0 ||
5114                     StringCmp(ifp->key, "sig_peptide") == 0 ||
5115                     StringCmp(ifp->key, "transit_peptide") == 0 ||
5116                     StringCmp(ifp->key, "propeptide") == 0) {
5117                     tmp = ValNodeNew(NULL);
5118                     tmp->choice = SEQFEAT_PROT;
5119                     tmp->data.ptrvalue = sfp;
5120                     sfap->pept = tie_next(sfap->pept, tmp);
5121                 } else if (StringCmp(ifp->key, "misc_feature") == 0
5122                         && sfp->comment != NULL) {
5123                     if ((i = FindStr(feat_site, num_site, sfp->comment)) != -1){
5124                         if (i >= 23 && i <= 25) {
5125                             okay = TRUE;
5126                             fbsp = BioseqFindFromSeqLoc (sfp->location);
5127                             if (fbsp != NULL) {
5128                               for (sip = fbsp->id; sip != NULL; sip = sip->next) {
5129                                 if (sip->choice == SEQID_EMBL || sip->choice == SEQID_DDBJ) {
5130                                   okay = FALSE;
5131                                 }
5132                               }
5133                             }
5134                             if (okay) {
5135                                 tmp = ValNodeNew(NULL);
5136                                 tmp->choice = SEQFEAT_SITE;
5137                                 tmp->data.ptrvalue = sfp;
5138                                 sfap->pept = tie_next(sfap->pept, tmp);
5139                             }
5140                         }
5141                     } else if ((i =
5142                             FindStr(feat_bond, num_bond, sfp->comment)) != -1){
5143                         tmp = ValNodeNew(NULL);
5144                         tmp->choice = SEQFEAT_BOND;
5145                         tmp->data.ptrvalue = sfp;
5146                         sfap->pept = tie_next(sfap->pept, tmp);
5147                     }
5148                 }
5149             }
5150         }
5151     }
5152 }
5153 
RemovePeptideImpFeats(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)5154 static void RemovePeptideImpFeats (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
5155 {
5156     BioseqPtr         bsp;
5157     BioseqSetPtr    bssp;
5158     SeqAnnotPtr        sap, annot, nextsap, PNTR prevsap;
5159     SeqFeatPtr        sfp, sfpnext;
5160     ImpFeatPtr         ifp;
5161 
5162     if (IS_Bioseq(sep)) {
5163         bsp = (BioseqPtr)(sep->data.ptrvalue);
5164         annot = bsp->annot;
5165         prevsap = (SeqAnnotPtr PNTR) &(bsp->annot);
5166     } else {
5167         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
5168         annot = bssp->annot;
5169         prevsap = (SeqAnnotPtr PNTR) &(bssp->annot);
5170     }
5171     sap = annot;
5172     while (sap != NULL) {
5173         nextsap = sap->next;
5174         if (sap->type == 1) {
5175             for (sfp = sap->data; sfp != NULL; sfp = sfpnext) {
5176                 sfpnext = sfp->next;
5177                 if (sfp->data.choice != SEQFEAT_IMP) {
5178                     continue;
5179                 }
5180                 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5181                 if (sfp->comment &&
5182                     StringCmp(sfp->comment, "FeatureToBeDeleted") == 0) {
5183                     sap->data = remove_feat(sap->data, sfp);
5184                 }
5185             }
5186         }
5187         /* now keep empty annot if annot_descr present */
5188         if (sap->data == NULL && sap->desc == NULL) {
5189             *(prevsap) = sap->next;
5190             sap->next = NULL;
5191             SeqAnnotFree (sap);
5192         } else {
5193             prevsap = (SeqAnnotPtr PNTR) &(sap->next);
5194         }
5195         sap = nextsap;
5196     }
5197 }
5198 
CleanUpTmpFeatStruct(SeqFeatArrPtr sfap)5199 static void CleanUpTmpFeatStruct(SeqFeatArrPtr sfap)
5200 {
5201     ValNodePtr tmp, tmpnext;
5202 
5203     for (tmp = sfap->cds; tmp; tmp = tmpnext) {
5204         tmpnext = tmp->next;
5205         MemFree(tmp);
5206     }
5207     for (tmp = sfap->pept; tmp; tmp = tmpnext) {
5208         tmpnext = tmp->next;
5209         MemFree(tmp);
5210     }
5211 }
5212 
ProtFeatOnNucToImpFeat(SeqFeatPtr sfp,Pointer userdata)5213 static void ProtFeatOnNucToImpFeat (SeqFeatPtr sfp, Pointer userdata)
5214 
5215 {
5216   BioseqPtr   bsp;
5217   GBQualPtr   gbq, last;
5218   ImpFeatPtr  ifp;
5219   CharPtr     key = NULL;
5220   ProtRefPtr  prp;
5221   CharPtr     str = NULL;
5222   ValNodePtr  vnp;
5223 
5224   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
5225   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5226   if (prp == NULL) return;
5227   if (prp->processed < 1) return;
5228   bsp = BioseqFindFromSeqLoc (sfp->location);
5229   if (bsp == NULL) return;
5230   if (ISA_aa (bsp->mol)) return;
5231   ifp = ImpFeatNew ();
5232   if (ifp == NULL) return;
5233   switch (prp->processed) {
5234     case 1:
5235       key = "preprotein";
5236       break;
5237     case 2:
5238       key = "mat_peptide";
5239       break;
5240     case 3:
5241       key = "sig_peptide";
5242       break;
5243     case 4:
5244       key = "transit_peptide";
5245       break;
5246     case 5:
5247       key = "propeptide";
5248       break;
5249     default:
5250       return;
5251   }
5252   ifp->key = StringSave (key);
5253   for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
5254     str = (CharPtr) vnp->data.ptrvalue;
5255     if (StringHasNoText (str)) continue;
5256     gbq = GBQualNew ();
5257     if (gbq == NULL) continue;
5258     gbq->qual = StringSave ("product");
5259     gbq->val = StringSave (str);
5260     if (sfp->qual == NULL) {
5261       sfp->qual = gbq;
5262     } else {
5263       last = sfp->qual;
5264       while (last->next != NULL) {
5265         last = last->next;
5266       }
5267       last->next = gbq;
5268     }
5269   }
5270   if (StringDoesHaveText (prp->desc)) {
5271     gbq = GBQualNew ();
5272     if (gbq != NULL) {
5273       gbq->qual = StringSave ("prot_desc");
5274       gbq->val = StringSave (str);
5275     if (sfp->qual == NULL) {
5276       sfp->qual = gbq;
5277     } else {
5278       last = sfp->qual;
5279       while (last->next != NULL) {
5280         last = last->next;
5281       }
5282       last->next = gbq;
5283     }
5284     }
5285   }
5286   sfp->data.choice = SEQFEAT_IMP;
5287   sfp->data.value.ptrvalue = ifp;
5288   MemFree (prp);
5289 }
5290 
EntryChangeImpFeatToProt(SeqEntryPtr sep)5291 void EntryChangeImpFeatToProt (SeqEntryPtr sep)
5292 {
5293 
5294     SeqFeatArr sfa;
5295 
5296     if (sep == NULL)
5297         return;
5298     MemSet ((Pointer) (&sfa), 0, sizeof (SeqFeatArr));
5299     VisitFeaturesInSep (sep, NULL, ProtFeatOnNucToImpFeat);
5300     SeqEntryExplore(sep, &sfa, GetCdRegionsWithPeptides);
5301     ImpFeatToProtRef(sfa);
5302     SeqEntryExplore(sep, NULL, RemovePeptideImpFeats);
5303     CleanUpTmpFeatStruct(&sfa);
5304     return;
5305 }
5306 
5307 //LCOV_EXCL_START
5308 /* functions moved from Sequin */
NormalizeAuthors(AuthListPtr alp)5309 static void NormalizeAuthors (AuthListPtr alp)
5310 
5311 {
5312   AuthorPtr    ap;
5313   Char         ch;
5314   CharPtr      initials;
5315   Int2         j;
5316   Int2         k;
5317   size_t       len;
5318   ValNodePtr   names;
5319   NameStdPtr   nsp;
5320   CharPtr      periods;
5321   PersonIdPtr  pid;
5322 
5323   if (alp == NULL || alp->choice != 1) return;
5324   for (names = alp->names; names != NULL; names = names->next) {
5325     ap = names->data.ptrvalue;
5326     if (ap != NULL) {
5327       pid = ap->name;
5328       if (pid != NULL && pid->choice == 2) {
5329         nsp = pid->data;
5330         if (nsp != NULL && nsp->names [4] != NULL) {
5331           initials = nsp->names [4];
5332           len = MAX ((size_t) (StringLen (initials) * 2 + 4), (size_t) 64);
5333           periods = MemNew (len);
5334           if (periods == NULL) return;
5335           periods [0] = '\0';
5336           j = 0;
5337           k = 0;
5338           ch = initials [j];
5339           while (ch != '\0') {
5340             if (ch == '-') {
5341               periods [k] = ch;
5342               k++;
5343               j++;
5344               ch = initials [j];
5345             } else if (ch == '.') {
5346               j++;
5347               ch = initials [j];
5348             } else if (ch == ' ') {
5349               j++;
5350               ch = initials [j];
5351             } else {
5352               periods [k] = ch;
5353               k++;
5354               j++;
5355               ch = initials [j];
5356               periods [k] = '.';
5357               k++;
5358             }
5359           }
5360           periods [k] = '\0';
5361           nsp->names [4] = MemFree (nsp->names [4]);
5362           nsp->names [4] = StringSave (periods);
5363           MemFree (periods);
5364         }
5365       }
5366     }
5367   }
5368 }
5369 
NormalizeAPub(ValNodePtr vnp)5370 static void NormalizeAPub (ValNodePtr vnp)
5371 
5372 {
5373   AuthListPtr  alp;
5374   CitArtPtr    cap;
5375   CitBookPtr   cbp;
5376   CitGenPtr    cgp;
5377   CitPatPtr    cpp;
5378   CitSubPtr    csp;
5379   ImprintPtr   imp;
5380 
5381   if (vnp == NULL) return;
5382   if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return;
5383   if (vnp->data.ptrvalue == NULL) return;
5384   switch (vnp->choice) {
5385     case PUB_Gen :
5386       cgp = (CitGenPtr) vnp->data.ptrvalue;
5387       NormalizeAuthors (cgp->authors);
5388       break;
5389     case PUB_Sub :
5390       csp = (CitSubPtr) vnp->data.ptrvalue;
5391       NormalizeAuthors (csp->authors);
5392       alp = csp->authors;
5393       imp = csp->imp;
5394       if (alp != NULL && alp->affil == NULL && imp != NULL && imp->pub != NULL) {
5395         alp->affil = imp->pub;
5396         imp->pub = NULL;
5397       }
5398       if (csp->date == NULL && imp != NULL && imp->date != NULL) {
5399         csp->date = imp->date;
5400         imp->date = NULL;
5401       }
5402       if (imp != NULL && imp->pub == NULL) {
5403         csp->imp = ImprintFree (csp->imp);
5404       }
5405       break;
5406     case PUB_Article :
5407       cap = (CitArtPtr) vnp->data.ptrvalue;
5408       NormalizeAuthors (cap->authors);
5409       break;
5410     case PUB_Book :
5411       cbp = (CitBookPtr) vnp->data.ptrvalue;
5412       NormalizeAuthors (cbp->authors);
5413       break;
5414     case PUB_Man :
5415       cbp = (CitBookPtr) vnp->data.ptrvalue;
5416       if (cbp->othertype == 2 && cbp->let_type == 3) {
5417         NormalizeAuthors (cbp->authors);
5418       }
5419       break;
5420     case PUB_Patent :
5421       cpp = (CitPatPtr) vnp->data.ptrvalue;
5422       NormalizeAuthors (cpp->authors);
5423       NormalizeAuthors (cpp->applicants);
5424       NormalizeAuthors (cpp->assignees);
5425       break;
5426     default :
5427       break;
5428   }
5429 }
5430 
NormalizePeriods(GatherContextPtr gcp)5431 static Boolean NormalizePeriods (GatherContextPtr gcp)
5432 
5433 {
5434   PubdescPtr  pdp;
5435   ValNodePtr  sdp;
5436   SeqFeatPtr  sfp;
5437   ValNodePtr  vnp;
5438 
5439   if (gcp == NULL) return TRUE;
5440   pdp = NULL;
5441   if (gcp->thistype == OBJ_SEQFEAT) {
5442     sfp = (SeqFeatPtr) gcp->thisitem;
5443     if (sfp != NULL && sfp->data.choice == SEQFEAT_PUB) {
5444       pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5445     }
5446   } else if (gcp->thistype == OBJ_SEQDESC) {
5447     sdp = (ValNodePtr) gcp->thisitem;
5448     if (sdp != NULL && sdp->choice == Seq_descr_pub) {
5449       pdp = (PubdescPtr) sdp->data.ptrvalue;
5450     }
5451   }
5452   if (pdp == NULL) return TRUE;
5453   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5454     NormalizeAPub (vnp);
5455   }
5456   return TRUE;
5457 }
5458 
NormalizePeriodsOnInitials(SeqEntryPtr sep)5459 void NormalizePeriodsOnInitials (SeqEntryPtr sep)
5460 
5461 {
5462   GatherScope   gs;
5463 
5464   if (sep == NULL) return;
5465   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5466   gs.seglevels = 1;
5467   gs.get_feats_location = FALSE;
5468   MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5469   gs.ignore[OBJ_BIOSEQ] = FALSE;
5470   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5471   gs.ignore[OBJ_SEQFEAT] = FALSE;
5472   gs.ignore[OBJ_SEQANNOT] = FALSE;
5473   gs.ignore[OBJ_SEQDESC] = FALSE;
5474   GatherSeqEntry (sep, NULL, NormalizePeriods, &gs);
5475 }
5476 
NormalizeRnas(GatherContextPtr gcp)5477 static Boolean NormalizeRnas (GatherContextPtr gcp)
5478 
5479 {
5480   GBQualPtr       gbqual;
5481   GBQualPtr       nextqual;
5482   GBQualPtr PNTR  prevqual;
5483   RnaRefPtr       rrp;
5484   SeqFeatPtr      sfp;
5485   CharPtr         str;
5486 
5487   if (gcp == NULL) return TRUE;
5488   if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5489   sfp = (SeqFeatPtr) gcp->thisitem;
5490   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return TRUE;
5491   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
5492   if (rrp == NULL) return TRUE;
5493   if (rrp->type == 0) {
5494     rrp->type = 255;
5495   }
5496   if (rrp->ext.choice != 0 && rrp->ext.choice != 1) return TRUE;
5497   if (! TASNStringHasNoText (rrp->ext.value.ptrvalue)) return TRUE;
5498   str = NULL;
5499   gbqual = sfp->qual;
5500   prevqual = (GBQualPtr PNTR) &(sfp->qual);
5501   while (gbqual != NULL) {
5502     nextqual = gbqual->next;
5503     if (StringICmp (gbqual->qual, "product") == 0) {
5504       str = StringSave (gbqual->val);
5505       *(prevqual) = gbqual->next;
5506       gbqual->next = NULL;
5507       gbqual->qual = MemFree (gbqual->qual);
5508       gbqual->val = MemFree (gbqual->val);
5509       GBQualFree (gbqual);
5510     } else {
5511       prevqual = (GBQualPtr PNTR) &(gbqual->next);
5512     }
5513     gbqual = nextqual;
5514   }
5515   if (str == NULL) {
5516     gbqual = sfp->qual;
5517     prevqual = (GBQualPtr PNTR) &(sfp->qual);
5518     while (gbqual != NULL) {
5519       nextqual = gbqual->next;
5520       if (StringICmp (gbqual->qual, "standard_name") == 0) {
5521         str = StringSave (gbqual->val);
5522         *(prevqual) = gbqual->next;
5523         gbqual->next = NULL;
5524         gbqual->qual = MemFree (gbqual->qual);
5525         gbqual->val = MemFree (gbqual->val);
5526         GBQualFree (gbqual);
5527       } else {
5528         prevqual = (GBQualPtr PNTR) &(gbqual->next);
5529       }
5530       gbqual = nextqual;
5531     }
5532   }
5533   if (rrp->ext.choice == 1 && rrp->ext.value.ptrvalue != NULL) {
5534     rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
5535   }
5536   if (rrp->ext.choice == 0 || rrp->ext.choice == 1) {
5537     rrp->ext.choice = 1;
5538     rrp->ext.value.ptrvalue = str;
5539     str = NULL;
5540   }
5541   MemFree (str);
5542   return TRUE;
5543 }
5544 
MoveRnaGBQualProductToName(SeqEntryPtr sep)5545 void MoveRnaGBQualProductToName (SeqEntryPtr sep)
5546 
5547 {
5548   GatherScope   gs;
5549 
5550   if (sep == NULL) return;
5551   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5552   gs.seglevels = 1;
5553   gs.get_feats_location = FALSE;
5554   MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5555   gs.ignore[OBJ_BIOSEQ] = FALSE;
5556   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5557   gs.ignore[OBJ_SEQFEAT] = FALSE;
5558   gs.ignore[OBJ_SEQANNOT] = FALSE;
5559   GatherSeqEntry (sep, NULL, NormalizeRnas, &gs);
5560 }
5561 
NormalizeProts(GatherContextPtr gcp)5562 static Boolean NormalizeProts (GatherContextPtr gcp)
5563 
5564 {
5565   GBQualPtr       gbqual;
5566   GBQualPtr       nextqual;
5567   GBQualPtr PNTR  prevqual;
5568   ProtRefPtr      prp;
5569   SeqFeatPtr      sfp;
5570   CharPtr         str;
5571   ValNodePtr      vnp;
5572 
5573   if (gcp == NULL) return TRUE;
5574   if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5575   sfp = (SeqFeatPtr) gcp->thisitem;
5576   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return TRUE;
5577   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5578   if (prp == NULL) return TRUE;
5579   vnp = prp->name;
5580   if (vnp == NULL || TASNStringHasNoText (vnp->data.ptrvalue)) {
5581     str = NULL;
5582     gbqual = sfp->qual;
5583     prevqual = (GBQualPtr PNTR) &(sfp->qual);
5584     while (gbqual != NULL) {
5585       nextqual = gbqual->next;
5586       if (StringICmp (gbqual->qual, "product") == 0) {
5587         str = StringSave (gbqual->val);
5588         *(prevqual) = gbqual->next;
5589         gbqual->next = NULL;
5590         gbqual->qual = MemFree (gbqual->qual);
5591         gbqual->val = MemFree (gbqual->val);
5592         GBQualFree (gbqual);
5593       } else {
5594         prevqual = (GBQualPtr PNTR) &(gbqual->next);
5595       }
5596       gbqual = nextqual;
5597     }
5598     if (vnp == NULL) {
5599       vnp = ValNodeNew (NULL);
5600       prp->name = vnp;
5601     }
5602     vnp = prp->name;
5603     if (vnp != NULL) {
5604       vnp->data.ptrvalue = str;
5605       str = NULL;
5606     }
5607     MemFree (str);
5608   }
5609   vnp = prp->name;
5610   if (vnp == NULL || TASNStringHasNoText (vnp->data.ptrvalue)) return TRUE;
5611   if (prp->desc == NULL) return TRUE;
5612   if (StringICmp (vnp->data.ptrvalue, prp->desc) == 0) {
5613     prp->desc = MemFree (prp->desc);
5614   }
5615   return TRUE;
5616 }
5617 
MoveProtGBQualProductToName(SeqEntryPtr sep)5618 void MoveProtGBQualProductToName (SeqEntryPtr sep)
5619 
5620 {
5621   GatherScope   gs;
5622 
5623   if (sep == NULL) return;
5624   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5625   gs.seglevels = 1;
5626   gs.get_feats_location = FALSE;
5627   MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5628   gs.ignore[OBJ_BIOSEQ] = FALSE;
5629   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5630   gs.ignore[OBJ_SEQFEAT] = FALSE;
5631   gs.ignore[OBJ_SEQANNOT] = FALSE;
5632   GatherSeqEntry (sep, NULL, NormalizeProts, &gs);
5633 }
5634 
NormalizeCds(GatherContextPtr gcp)5635 static Boolean NormalizeCds (GatherContextPtr gcp)
5636 
5637 {
5638   BioseqContextPtr  bcp;
5639   BioseqPtr         bsp;
5640   GBQualPtr         gbqual;
5641   GBQualPtr         nextqual;
5642   GBQualPtr PNTR    prevqual;
5643   ProtRefPtr        prp;
5644   SeqEntryPtr       sep;
5645   SeqFeatPtr        sfp;
5646   SeqFeatPtr        sfp2;
5647   CharPtr           str;
5648   ValNodePtr        vnp;
5649 
5650   if (gcp == NULL) return TRUE;
5651   if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5652   sfp = (SeqFeatPtr) gcp->thisitem;
5653   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return TRUE;
5654   bsp = BioseqFind (SeqLocId (sfp->product));
5655   if (bsp == NULL) return TRUE;
5656   str = NULL;
5657   gbqual = sfp->qual;
5658   prevqual = (GBQualPtr PNTR) &(sfp->qual);
5659   while (gbqual != NULL) {
5660     nextqual = gbqual->next;
5661     if (StringICmp (gbqual->qual, "product") == 0) {
5662       str = StringSave (gbqual->val);
5663       *(prevqual) = gbqual->next;
5664       gbqual->next = NULL;
5665       gbqual->qual = MemFree (gbqual->qual);
5666       gbqual->val = MemFree (gbqual->val);
5667       GBQualFree (gbqual);
5668     } else {
5669       prevqual = (GBQualPtr PNTR) &(gbqual->next);
5670     }
5671     gbqual = nextqual;
5672   }
5673   if (str == NULL) return TRUE;
5674 
5675   sfp2 = NULL;
5676   bcp = BioseqContextNew (bsp);
5677   sfp2 = BioseqContextGetSeqFeat (bcp, SEQFEAT_PROT, NULL, NULL, 0);
5678   BioseqContextFree (bcp);
5679   if (sfp2 == NULL) {
5680     prp = CreateNewProtRef (str, NULL, NULL, NULL);
5681     if (prp != NULL) {
5682       sep = SeqMgrGetSeqEntryForData (bsp);
5683       if (sep != NULL) {
5684         sfp = CreateNewFeature (sep, NULL, SEQFEAT_PROT, NULL);
5685         if (sfp != NULL) {
5686           sfp->data.value.ptrvalue = (Pointer) prp;
5687         }
5688       }
5689     }
5690     return TRUE;
5691   }
5692 
5693   prp = (ProtRefPtr) sfp2->data.value.ptrvalue;
5694   if (prp == NULL) return TRUE;
5695   vnp = prp->name;
5696   if (vnp != NULL && (! TASNStringHasNoText (vnp->data.ptrvalue))) return TRUE;
5697   if (vnp == NULL) {
5698     vnp = ValNodeNew (NULL);
5699     prp->name = vnp;
5700   }
5701   vnp = prp->name;
5702   if (vnp != NULL) {
5703     vnp->data.ptrvalue = str;
5704     str = NULL;
5705   }
5706   MemFree (str);
5707   return TRUE;
5708 }
5709 
MoveCdsGBQualProductToName(SeqEntryPtr sep)5710 void MoveCdsGBQualProductToName (SeqEntryPtr sep)
5711 
5712 {
5713   GatherScope   gs;
5714 
5715   if (sep == NULL) return;
5716   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5717   gs.seglevels = 1;
5718   gs.get_feats_location = FALSE;
5719   MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5720   gs.ignore[OBJ_BIOSEQ] = FALSE;
5721   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5722   gs.ignore[OBJ_SEQFEAT] = FALSE;
5723   gs.ignore[OBJ_SEQANNOT] = FALSE;
5724   GatherSeqEntry (sep, NULL, NormalizeCds, &gs);
5725 }
5726 
NormalizeFeatGBQuals(GatherContextPtr gcp)5727 static Boolean NormalizeFeatGBQuals (GatherContextPtr gcp)
5728 
5729 {
5730   GBQualPtr       gbqual;
5731   size_t          len;
5732   GBQualPtr       nextqual;
5733   GBQualPtr PNTR  prevqual;
5734   SeqFeatPtr      sfp;
5735   CharPtr         str;
5736 
5737   if (gcp == NULL) return TRUE;
5738   if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
5739   sfp = (SeqFeatPtr) gcp->thisitem;
5740   if (sfp == NULL) return TRUE;
5741   gbqual = sfp->qual;
5742   prevqual = (GBQualPtr PNTR) &(sfp->qual);
5743   while (gbqual != NULL) {
5744     nextqual = gbqual->next;
5745     if (StringICmp (gbqual->qual, "partial") == 0) {
5746       *(prevqual) = gbqual->next;
5747       gbqual->next = NULL;
5748       gbqual->qual = MemFree (gbqual->qual);
5749       gbqual->val = MemFree (gbqual->val);
5750       GBQualFree (gbqual);
5751       sfp->partial = TRUE;
5752     } else if (StringICmp (gbqual->qual, "evidence") == 0) {
5753       if (StringICmp (gbqual->val, "experimental") == 0) {
5754         sfp->exp_ev = 1;
5755       } else if (StringICmp (gbqual->val, "not_experimental") == 0) {
5756         sfp->exp_ev = 2;
5757       }
5758       *(prevqual) = gbqual->next;
5759       gbqual->next = NULL;
5760       gbqual->qual = MemFree (gbqual->qual);
5761       gbqual->val = MemFree (gbqual->val);
5762       GBQualFree (gbqual);
5763     } else if (StringICmp (gbqual->qual, "exception") == 0) {
5764       sfp->excpt = TRUE;
5765     } else if (StringICmp (gbqual->qual, "note") == 0) {
5766       *(prevqual) = gbqual->next;
5767       gbqual->next = NULL;
5768       if (sfp->comment == NULL) {
5769         sfp->comment = gbqual->val;
5770       } else {
5771         len = StringLen (sfp->comment) + StringLen (gbqual->val) + 5;
5772         str = MemNew (sizeof (Char) * len);
5773         StringCpy (str, sfp->comment);
5774         StringCat (str, "; ");
5775         StringCat (str, gbqual->val);
5776         sfp->comment = MemFree (sfp->comment);
5777         gbqual->val = MemFree (gbqual->val);
5778         sfp->comment = str;
5779       }
5780       gbqual->val = NULL;
5781       GBQualFree (gbqual);
5782     } else {
5783       prevqual = (GBQualPtr PNTR) &(gbqual->next);
5784     }
5785     gbqual = nextqual;
5786   }
5787   return TRUE;
5788 }
5789 
MoveFeatGBQualsToFields(SeqEntryPtr sep)5790 void MoveFeatGBQualsToFields (SeqEntryPtr sep)
5791 
5792 {
5793   GatherScope   gs;
5794 
5795   if (sep == NULL) return;
5796   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5797   gs.seglevels = 1;
5798   gs.get_feats_location = FALSE;
5799   MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5800   gs.ignore[OBJ_BIOSEQ] = FALSE;
5801   gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5802   gs.ignore[OBJ_SEQFEAT] = FALSE;
5803   gs.ignore[OBJ_SEQANNOT] = FALSE;
5804   GatherSeqEntry (sep, NULL, NormalizeFeatGBQuals, &gs);
5805 }
5806 
StripTitleFromProteinProducts(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)5807 static void StripTitleFromProteinProducts (SeqEntryPtr sep, Pointer mydata,
5808                                            Int4 index, Int2 indent)
5809 
5810 {
5811   BioseqPtr   bsp;
5812   SeqIdPtr    sip;
5813   ValNodePtr  vnp;
5814 
5815   if (sep == NULL) return;
5816   if (! IS_Bioseq (sep)) return;
5817   bsp = (BioseqPtr) sep->data.ptrvalue;
5818   if (bsp == NULL) return;
5819   if (! ISA_aa (bsp->mol)) return;
5820   for (sip = bsp->id; sip != NULL; sip = sip->next) {
5821     if (sip->choice == SEQID_OTHER) return;
5822   }
5823   vnp = ValNodeExtract (&(bsp->descr), Seq_descr_title);
5824   if (vnp == NULL) return;
5825   ValNodeFreeData (vnp);
5826 }
5827 
StripTitleFromProtsInNucProts(SeqEntryPtr sep)5828 void StripTitleFromProtsInNucProts (SeqEntryPtr sep)
5829 
5830 {
5831   BioseqSetPtr  bssp;
5832 
5833   if (sep == NULL) return;
5834   if (! IS_Bioseq_set (sep)) return;
5835   bssp = (BioseqSetPtr) sep->data.ptrvalue;
5836   if (bssp == NULL) return;
5837   if (bssp->_class == 7 ||
5838       (bssp->_class >= 13 && bssp->_class <= 16) ||
5839       bssp->_class == BioseqseqSet_class_wgs_set ||
5840       bssp->_class == BioseqseqSet_class_small_genome_set) {
5841     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5842       StripTitleFromProtsInNucProts (sep);
5843     }
5844     return;
5845   }
5846   if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
5847   SeqEntryExplore (sep, NULL, StripTitleFromProteinProducts);
5848 }
5849 //LCOV_EXCL_STOP
5850 
5851 
CleanFeatStrings(SeqFeatPtr sfp)5852 static void CleanFeatStrings (SeqFeatPtr sfp)
5853 
5854 {
5855   BioSourcePtr  biop;
5856   GeneRefPtr    grp;
5857   ImpFeatPtr    ifp;
5858   Boolean       noSfpDataPtrValue;
5859   OrgNamePtr    onp;
5860   OrgRefPtr     orp;
5861   PubdescPtr    pdp;
5862   ProtRefPtr    prp;
5863   RnaRefPtr     rrp;
5864 
5865   if (sfp == NULL) return;
5866   CleanVisString (&sfp->comment);
5867   CleanVisString (&sfp->title);
5868   noSfpDataPtrValue = FALSE;
5869   switch (sfp->data.choice) {
5870     case SEQFEAT_BOND :
5871     case SEQFEAT_SITE :
5872     case SEQFEAT_PSEC_STR :
5873     case SEQFEAT_COMMENT:
5874       noSfpDataPtrValue = TRUE;
5875       break;
5876     default :
5877       break;
5878   }
5879   if (noSfpDataPtrValue) return;
5880   if (sfp->data.value.ptrvalue == NULL) return;
5881   orp = NULL;
5882   switch (sfp->data.choice) {
5883     case SEQFEAT_GENE :
5884       grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5885       CleanVisString (&(grp->locus));
5886       CleanVisString (&(grp->allele));
5887       CleanVisString (&(grp->desc));
5888       CleanVisString (&(grp->maploc));
5889       CleanVisString (&(grp->locus_tag));
5890       CleanVisStringList (&(grp->syn));
5891       break;
5892     case SEQFEAT_ORG :
5893       orp = (OrgRefPtr) sfp->data.value.ptrvalue;
5894       break;
5895     case SEQFEAT_CDREGION :
5896       break;
5897     case SEQFEAT_PROT :
5898       prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5899       CleanVisString (&(prp->desc));
5900       CleanVisStringList (&(prp->name));
5901       CleanVisStringList (&(prp->ec));
5902       CleanVisStringList (&(prp->activity));
5903       break;
5904     case SEQFEAT_RNA :
5905       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
5906       if (rrp->ext.choice == 1) {
5907         CleanVisString ((CharPtr PNTR) &(rrp->ext.value.ptrvalue));
5908         if (rrp->ext.value.ptrvalue == NULL) {
5909           rrp->ext.choice = 0;
5910         }
5911       }
5912       break;
5913     case SEQFEAT_PUB :
5914       pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5915       CleanVisString (&(pdp->comment));
5916       break;
5917     case SEQFEAT_SEQ :
5918       break;
5919     case SEQFEAT_IMP :
5920       ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5921       CleanVisString (&(ifp->key));
5922       CleanVisString (&(ifp->loc));
5923       CleanVisString (&(ifp->descr));
5924       break;
5925     case SEQFEAT_REGION :
5926       CleanVisString ((CharPtr PNTR) &(sfp->data.value.ptrvalue));
5927       if (sfp->data.value.ptrvalue == NULL) {
5928         sfp->data.choice = SEQFEAT_COMMENT;
5929       }
5930       break;
5931     case SEQFEAT_COMMENT :
5932       break;
5933     case SEQFEAT_BOND :
5934       break;
5935     case SEQFEAT_SITE :
5936       break;
5937     case SEQFEAT_RSITE :
5938       break;
5939     case SEQFEAT_USER :
5940       break;
5941     case SEQFEAT_TXINIT :
5942       break;
5943     case SEQFEAT_NUM :
5944       break;
5945     case SEQFEAT_PSEC_STR :
5946       break;
5947     case SEQFEAT_NON_STD_RESIDUE :
5948       break;
5949     case SEQFEAT_HET :
5950       break;
5951     case SEQFEAT_BIOSRC :
5952       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
5953       orp = biop->org;
5954       CleanSubSourceList (&(biop->subtype), biop->genome);
5955       break;
5956     default :
5957       break;
5958   }
5959   if (orp != NULL) {
5960     CleanVisString (&(orp->taxname));
5961     CleanVisString (&(orp->common));
5962     CleanVisStringList (&(orp->mod));
5963     CleanVisStringList (&(orp->syn));
5964     onp = orp->orgname;
5965     while (onp != NULL) {
5966       CleanVisString (&(onp->attrib));
5967       CleanVisString (&(onp->lineage));
5968       CleanVisString (&(onp->div));
5969       CleanOrgModList (&(onp->mod));
5970       onp = onp->next;
5971     }
5972   }
5973 }
5974 
OnlyPunctuation(CharPtr str)5975 static Boolean OnlyPunctuation (CharPtr str)
5976 
5977 {
5978   Uchar  ch;    /* to use 8bit characters in multibyte languages */
5979 
5980   if (str != NULL) {
5981     ch = *str;
5982     while (ch != '\0') {
5983       if (ch > ' ' && ch != '.' && ch != ',' && ch != '~' && ch != ';') {
5984         return FALSE;
5985       }
5986       str++;
5987       ch = *str;
5988     }
5989   }
5990   return TRUE;
5991 }
5992 
CleanDescStrings(ValNodePtr sdp)5993 static void CleanDescStrings (ValNodePtr sdp)
5994 
5995 {
5996   BioSourcePtr  biop;
5997   GBBlockPtr    gbp;
5998   Boolean       noSdpDataPtrValue;
5999   OrgNamePtr    onp;
6000   OrgRefPtr     orp;
6001   PubdescPtr    pdp;
6002 
6003   if (sdp == NULL) return;
6004   noSdpDataPtrValue = FALSE;
6005   switch (sdp->choice) {
6006     case Seq_descr_mol_type :
6007     case Seq_descr_method :
6008       noSdpDataPtrValue = TRUE;
6009       break;
6010     default :
6011       break;
6012   }
6013   if (noSdpDataPtrValue) return;
6014   if (sdp->data.ptrvalue == NULL) return;
6015   orp = NULL;
6016   switch (sdp->choice) {
6017     case Seq_descr_mol_type :
6018       break;
6019     case Seq_descr_modif :
6020       break;
6021     case Seq_descr_method :
6022       break;
6023     case Seq_descr_name :
6024       CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
6025       break;
6026     case Seq_descr_title :
6027       CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
6028       break;
6029     case Seq_descr_org :
6030       orp = (OrgRefPtr) sdp->data.ptrvalue;
6031       break;
6032     case Seq_descr_comment :
6033       CleanVisStringJunk ((CharPtr PNTR) &sdp->data.ptrvalue);
6034       if (OnlyPunctuation ((CharPtr) sdp->data.ptrvalue)) {
6035         sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
6036       }
6037       break;
6038     case Seq_descr_num :
6039       break;
6040     case Seq_descr_maploc :
6041       break;
6042     case Seq_descr_pir :
6043       break;
6044     case Seq_descr_genbank :
6045       gbp = (GBBlockPtr) sdp->data.ptrvalue;
6046       CleanVisStringList (&(gbp->extra_accessions));
6047       CleanVisStringList (&(gbp->keywords));
6048       CleanVisString (&(gbp->source));
6049       CleanVisString (&(gbp->origin));
6050       CleanVisString (&(gbp->date));
6051       CleanVisString (&(gbp->div));
6052       CleanVisString (&(gbp->taxonomy));
6053       break;
6054     case Seq_descr_pub :
6055       pdp = (PubdescPtr) sdp->data.ptrvalue;
6056       CleanVisString (&(pdp->comment));
6057       break;
6058     case Seq_descr_region :
6059       CleanVisString ((CharPtr PNTR) &sdp->data.ptrvalue);
6060       break;
6061     case Seq_descr_user :
6062       break;
6063     case Seq_descr_sp :
6064       break;
6065     case Seq_descr_dbxref :
6066       break;
6067     case Seq_descr_embl :
6068       break;
6069     case Seq_descr_create_date :
6070       break;
6071     case Seq_descr_update_date :
6072       break;
6073     case Seq_descr_prf :
6074       break;
6075     case Seq_descr_pdb :
6076       break;
6077     case Seq_descr_het :
6078       break;
6079     case Seq_descr_source :
6080       biop = (BioSourcePtr) sdp->data.ptrvalue;
6081       orp = biop->org;
6082       CleanSubSourceList (&(biop->subtype), biop->genome);
6083       break;
6084     case Seq_descr_molinfo :
6085       break;
6086     default :
6087       break;
6088   }
6089   if (orp != NULL) {
6090     CleanVisString (&(orp->taxname));
6091     CleanVisString (&(orp->common));
6092     CleanVisStringList (&(orp->mod));
6093     CleanVisStringList (&(orp->syn));
6094     onp = orp->orgname;
6095     while (onp != NULL) {
6096       CleanVisString (&(onp->attrib));
6097       CleanVisString (&(onp->lineage));
6098       CleanVisString (&(onp->div));
6099       CleanOrgModList (&(onp->mod));
6100       onp = onp->next;
6101     }
6102   }
6103 }
6104 
GetRidOfEmptyFeatsDescCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6105 void GetRidOfEmptyFeatsDescCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6106 
6107 {
6108   BioseqPtr     bsp;
6109   BioseqSetPtr  bssp;
6110   SeqAnnotPtr   nextsap;
6111   SeqDescrPtr   nextsdp;
6112   SeqFeatPtr    nextsfp;
6113   Pointer PNTR  prevsap;
6114   Pointer PNTR  prevsdp;
6115   Pointer PNTR  prevsfp;
6116   SeqAnnotPtr   sap;
6117   SeqDescrPtr   sdp;
6118   SeqFeatPtr    sfp;
6119 
6120   if (sep == NULL || sep->data.ptrvalue == NULL) return;
6121   sap = NULL;
6122   sdp = NULL;
6123   if (IS_Bioseq (sep)) {
6124     bsp = (BioseqPtr) sep->data.ptrvalue;
6125     sap = bsp->annot;
6126     prevsap = (Pointer PNTR) &(bsp->annot);
6127     sdp = bsp->descr;
6128     prevsdp = (Pointer PNTR) &(bsp->descr);
6129   } else if (IS_Bioseq_set (sep)) {
6130     bssp = (BioseqSetPtr) sep->data.ptrvalue;
6131     sap = bssp->annot;
6132     prevsap = (Pointer PNTR) &(bssp->annot);
6133     sdp = bssp->descr;
6134     prevsdp = (Pointer PNTR) &(bssp->descr);
6135   } else return;
6136   while (sap != NULL) {
6137     nextsap = sap->next;
6138     if (sap->type == 1 && sap->data != NULL) {
6139       sfp = (SeqFeatPtr) sap->data;
6140       prevsfp = (Pointer PNTR) &(sap->data);
6141       while (sfp != NULL) {
6142         nextsfp = sfp->next;
6143         CleanFeatStrings (sfp);
6144         if (sfp->data.choice != SEQFEAT_BOND &&
6145             sfp->data.choice != SEQFEAT_SITE &&
6146             sfp->data.choice != SEQFEAT_PSEC_STR &&
6147             sfp->data.choice != SEQFEAT_COMMENT &&
6148             sfp->data.value.ptrvalue == NULL) {
6149           *(prevsfp) = sfp->next;
6150           sfp->next = NULL;
6151           SeqFeatFree (sfp);
6152         } else {
6153           prevsfp = (Pointer PNTR) &(sfp->next);
6154         }
6155         sfp = nextsfp;
6156       }
6157     }
6158     /* now keep empty annot if annot_descr present */
6159     if (sap->data == NULL && sap->desc == NULL) {
6160       *(prevsap) = sap->next;
6161       sap->next = NULL;
6162       SeqAnnotFree (sap);
6163     } else {
6164       prevsap = (Pointer PNTR) &(sap->next);
6165     }
6166     sap = nextsap;
6167   }
6168   while (sdp != NULL) {
6169     nextsdp = sdp->next;
6170     CleanDescStrings (sdp);
6171     if (sdp->choice != Seq_descr_mol_type &&
6172         sdp->choice != Seq_descr_method &&
6173         sdp->data.ptrvalue == NULL) {
6174       *(prevsdp) = sdp->next;
6175       sdp->next = NULL;
6176       SeqDescrFree (sdp);
6177     } else {
6178       prevsdp = (Pointer PNTR) &(sdp->next);
6179     }
6180     sdp = nextsdp;
6181   }
6182 }
6183 
6184 /* move_cds from Serge Bazhin, modified by Kans */
6185 
6186 typedef struct bool_bioseq_set {
6187     Uint2        found;
6188     BioseqSetPtr bssp;
6189     Boolean      doPseudo;
6190 } BoolBioseqSet, PNTR BoolBioseqSetPtr;
6191 
6192 /**********************************************************/
put_cds_on_nps(BioseqSetPtr bssp,SeqFeatPtr sfp)6193 static void put_cds_on_nps (BioseqSetPtr bssp, SeqFeatPtr sfp)
6194 
6195 {
6196   SeqFeatPtr   prev;
6197   SeqAnnotPtr  sap;
6198 
6199   if (bssp == NULL || sfp == NULL) return;
6200   sap = bssp->annot;
6201   while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
6202     sap = sap->next;
6203   }
6204   if (sap == NULL) {
6205     sap = SeqAnnotNew ();
6206     if (sap != NULL) {
6207       sap->type = 1;
6208       sap->next = bssp->annot;
6209       bssp->annot = sap;
6210     }
6211   }
6212   sap = bssp->annot;
6213   if (sap == NULL) return;
6214   if (sap->data != NULL) {
6215     prev = sap->data;
6216     while (prev->next != NULL) {
6217       prev = prev->next;
6218     }
6219     prev->next = sfp;
6220   } else {
6221     sap->data = (Pointer) sfp;
6222   }
6223 }
6224 
6225 /**********************************************************
6226  *
6227  *   void move_cds_within_nucprot(sep, bbsp)
6228  *
6229  *      Runs through nuc-prot Bioseq-set components, looks for cdregions
6230  *   its Seq-entries, and moves their pointers to nuc-prot
6231  *   Bioseq-set.
6232  *
6233  **********************************************************/
move_cds_within_nucprot(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6234 static void move_cds_within_nucprot(SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6235 
6236 {
6237   BoolBioseqSetPtr  bbsp;
6238   BioseqPtr         bsp;
6239   BioseqSetPtr      bssp;
6240   SeqAnnotPtr       nextsap;
6241   SeqFeatPtr        nextsfp;
6242   Pointer PNTR      prevsap;
6243   Pointer PNTR      prevsfp;
6244   SeqAnnotPtr       sap;
6245   SeqFeatPtr        sfp;
6246 
6247   if (sep == NULL || sep->data.ptrvalue == NULL) return;
6248   bbsp = (BoolBioseqSetPtr) mydata;
6249   if (bbsp == NULL) return;
6250   if (IS_Bioseq (sep)) {
6251     bsp = (BioseqPtr) sep->data.ptrvalue;
6252     sap = bsp->annot;
6253     prevsap = (Pointer PNTR) &(bsp->annot);
6254   } else if (IS_Bioseq_set (sep)) {
6255     bssp = (BioseqSetPtr) sep->data.ptrvalue;
6256     sap = bssp->annot;
6257     prevsap = (Pointer PNTR) &(bssp->annot);
6258   } else return;
6259   while (sap != NULL) {
6260     nextsap = sap->next;
6261     if (sap->type == 1) {
6262       sfp = (SeqFeatPtr) sap->data;
6263       prevsfp = (Pointer PNTR) &(sap->data);
6264       while (sfp != NULL) {
6265         nextsfp = sfp->next;
6266         if (sfp->data.choice == SEQFEAT_CDREGION && (! sfp->pseudo) &&
6267             (sfp->product != NULL || SeqLocLen (sfp->location) >= 6)) {
6268           *(prevsfp) = sfp->next;
6269           sfp->next = NULL;
6270           bbsp->found++;
6271           /* ErrPostEx(SEV_WARNING, 0, 0, "Moving cdregion from na Bioseq.annot to Bioseq-set.annot."); */
6272           put_cds_on_nps (bbsp->bssp, sfp);
6273         } else {
6274           prevsfp = (Pointer PNTR) &(sfp->next);
6275         }
6276         sfp = nextsfp;
6277       }
6278     }
6279     /* now keep empty annot if annot_descr present */
6280     if (sap->data == NULL && sap->desc == NULL) {
6281       *(prevsap) = sap->next;
6282       sap->next = NULL;
6283       SeqAnnotFree (sap);
6284     } else {
6285       prevsap = (Pointer PNTR) &(sap->next);
6286     }
6287     sap = nextsap;
6288   }
6289 }
6290 
6291 /**********************************************************
6292  *
6293  *   Uint2 move_cds(sep)
6294  *
6295  *      Moves cdregion features to nuc-prot set level
6296  *
6297  **********************************************************/
move_cds_ex(SeqEntryPtr sep,Boolean doPseudo)6298 Uint2 move_cds_ex (SeqEntryPtr sep, Boolean doPseudo)
6299 {
6300     BioseqSetPtr   bssp;
6301     Uint2          found;
6302     BoolBioseqSet  bbsp;
6303 
6304     if (sep == NULL) return 0;
6305     if (! IS_Bioseq_set (sep)) return 0;
6306     bssp = (BioseqSetPtr) sep->data.ptrvalue;
6307     if (bssp == NULL) return 0;
6308     if (bssp->_class == BioseqseqSet_class_genbank ||
6309         (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
6310         bssp->_class == BioseqseqSet_class_gen_prod_set ||
6311         bssp->_class == BioseqseqSet_class_wgs_set ||
6312         bssp->_class == BioseqseqSet_class_small_genome_set) {
6313         found = 0;
6314         for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
6315           found += move_cds (sep);
6316         }
6317         return found;
6318     }
6319     if (bssp->_class != 1) return 0;
6320     bbsp.found = 0;
6321     bbsp.bssp = bssp;
6322     bbsp.doPseudo = doPseudo;
6323     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
6324         SeqEntryExplore (sep, (Pointer) &bbsp, move_cds_within_nucprot);
6325     }
6326     return(bbsp.found);
6327 }
6328 
move_cds(SeqEntryPtr sep)6329 Uint2 move_cds(SeqEntryPtr sep)
6330 {
6331   return move_cds_ex (sep, TRUE);
6332 }
6333 
MoveDbxrefs(GatherContextPtr gcp)6334 static Boolean MoveDbxrefs (GatherContextPtr gcp)
6335 
6336 {
6337     GBQualPtr    qual;
6338     GBQualPtr    nextqual;
6339     SeqFeatPtr   sfp;
6340     DbtagPtr     db;
6341     ObjectIdPtr  oip;
6342     ValNodePtr   vnp;
6343     CharPtr      tag, value, p;
6344 
6345     if (gcp == NULL) return TRUE;
6346     if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
6347     sfp = (SeqFeatPtr) gcp->thisitem;
6348     for (qual=sfp->qual; qual; qual = nextqual) {
6349         nextqual = qual->next;
6350         if (StringICmp (qual->qual, "db_xref") == 0) {
6351             vnp = ValNodeNew(NULL);
6352             db = DbtagNew();
6353             vnp->data.ptrvalue = db;
6354             tag = qual->val;
6355             if ((p = StrChr(tag, ':')) != NULL) {
6356                 value = p+1;
6357                 *p = '\0';
6358                 db->db = StringSave (tag);
6359                 oip = ObjectIdNew();
6360                 oip->str = StringSave (value);
6361                 db->tag = oip;
6362             } else {
6363                 db->db = StringSave ("?");
6364                 oip = ObjectIdNew();
6365                 oip->str = StringSave (tag);
6366                 db->tag = oip;
6367             }
6368             sfp->dbxref = tie_next(sfp->dbxref, vnp);
6369             sfp->qual = remove_qual(sfp->qual, qual);
6370         }
6371     }
6372   return TRUE;
6373 }
6374 
SeqEntryMoveDbxrefs(SeqEntryPtr sep)6375 Boolean SeqEntryMoveDbxrefs (SeqEntryPtr sep)
6376 
6377 {
6378   GatherScope  gs;
6379 
6380   if (sep == NULL) return FALSE;
6381   MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
6382   gs.seglevels = 1;
6383   gs.get_feats_location = FALSE;
6384   MemSet ((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
6385   gs.ignore[OBJ_SEQFEAT] = FALSE;
6386   gs.ignore[OBJ_SEQANNOT] = FALSE;
6387   GatherSeqEntry (sep, NULL, MoveDbxrefs, &gs);
6388   return TRUE;
6389 }
6390