1 static char const rcsid[] = "$Id: toporg.c,v 6.251 2016/11/02 23:36:10 kans Exp $";
2 
3 #include <stdio.h>
4 #include <ncbi.h>
5 #include <sequtil.h>
6 #include <toasn3.h>
7 #include <toporg.h>
8 #include <tfuns.h>
9 #include <utilpub.h>
10 
11 /* includes for new cleanup functions from Sequin */
12 #include <sqnutils.h>
13 #include <gather.h>
14 #include <explore.h>
15 #include <subutil.h>
16 #include <tofasta.h>
17 #include <objfdef.h>
18 #include <valid.h>
19 
20 static ValNodePtr GetDescrNoTitles (ValNodePtr PNTR descr);
21 
22 SeqDescrPtr remove_descr PROTO((SeqDescrPtr head, SeqDescrPtr x));
23 //LCOV_EXCL_START
24 /****************************************************************************
25  *   move org-ref, modif, mol_type, date, title and pubs
26  *   to seg-set level in segmented set, if not there
27  *   move org-ref, modif, date, title to nuc-prot level, if not there
28  *   notice that mol_type and pub are not moved to nucprot level
29  *****************************************************************************/
toporg(SeqEntryPtr sep)30 void toporg(SeqEntryPtr sep)
31 {
32 
33   SeqEntryExplore(sep, NULL, ChkSegset);
34   SeqEntryExplore(sep, NULL, ChkNucProt);
35 
36   return;
37 }
38 
39 // Used for segsets
40 /****************************************************************************
41  *     check for backbone entry and expand (to whole) OrgRef feature
42  *    if it's not whole and the only one OrgRef in the entry
43  *     whole fetures would be converted to descr later in FindOrg function
44  *
45  *     check if org-ref, modif, mol_type, date and title
46  *     are the same for all segments
47  *     move them to seg-set level in segmented set, if not already there
48  *****************************************************************************/
ChkSegset(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)49 void ChkSegset (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
50 {
51 
52   BioseqSetPtr  bssp, tmp;
53   BioseqPtr     bsp;
54   SeqEntryPtr   segsep, parts = NULL;
55   ValNodePtr    vnp = NULL, set_vnp = NULL, upd_date_vnp = NULL;
56   ValNodePtr    org, modif, mol, date, v /*, title */;
57   SeqAnnotPtr   sap = NULL;
58   SeqFeatPtr    tmp_sfp, sfp0, sfp;
59   SeqIdPtr      sidp;
60   Pointer       pnt;
61   SeqLocPtr     slp;
62   SeqIntPtr     sip;
63   OrgRefPtr     orp;
64   Boolean       is_org = FALSE, is_modif = FALSE, is_title = FALSE;
65   Boolean       is_date = FALSE, is_mol = FALSE;
66   Boolean       is_na = FALSE, is_bb = FALSE, whole = FALSE;
67   Int2          count = 0;
68   Int4          len;
69   static Char   msg[51];
70 
71   if (IS_Bioseq(sep)) {
72     bsp = (BioseqPtr)(sep->data.ptrvalue);
73     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
74       return;
75     vnp = bsp->descr;
76     sap = bsp->annot;
77     len = bsp->length;
78     if (bsp->mol != Seq_mol_aa) {
79       is_na = TRUE;
80     }
81     if (bsp == NULL || !is_na) {
82       return;
83     }
84     for (sidp = bsp->id; sidp != NULL; sidp = sidp->next) {
85       pnt = sidp->data.ptrvalue;
86       switch (sidp->choice)
87       {
88         case SEQID_LOCAL:      /* local */
89         case SEQID_GIIM:       /* giimid */
90         case SEQID_PATENT:     /* patent seq id */
91         case SEQID_GENERAL:    /* general */
92         case SEQID_GI:         /* gi */
93         case SEQID_PDB:
94         case SEQID_GENBANK:    /* genbank */
95         case SEQID_EMBL:       /* embl */
96         case SEQID_PIR:        /* pir   */
97         case SEQID_SWISSPROT:  /* swissprot */
98         case SEQID_OTHER:      /* other */
99         case SEQID_DDBJ:
100         case SEQID_PRF:
101         case SEQID_TPG:
102         case SEQID_TPE:
103         case SEQID_TPD:
104           continue;
105         case SEQID_GIBBSQ:     /* gibbseq */
106         case SEQID_GIBBMT:     /* gibbmt */
107           is_bb = TRUE;
108           break;
109         default:
110           continue;
111       }
112     }
113     if (!is_bb) {
114       return;
115     }
116     if (sap != NULL && sap->type == 1)   /* feature table */
117     {
118       tmp_sfp = (SeqFeatPtr) (sap->data);
119       sfp0 = SeqFeatExtractList(&(tmp_sfp), SEQFEAT_ORG);
120       for (sfp = sfp0; sfp != NULL; sfp = sfp->next) {
121         orp = (OrgRefPtr)(sfp->data.value.ptrvalue);
122         count++;
123         if ((whole = check_whole(sfp, bsp->length)) == TRUE) {
124           break;
125         }
126       }
127       if (!whole && count == 1) {
128         StringNCpy(msg, SeqLocPrint(sfp0->location), 50);
129         ErrPostEx(SEV_WARNING, 0, 2,
130                   "Backbone entry source with bad OrgRef feature: %s", msg);
131         slp = sfp0->location;
132         sip = slp->data.ptrvalue;
133         sip->from = 0;
134         sip->to = len-1;
135       }
136       tmp_sfp = tie_feat(tmp_sfp, sfp0);
137       sap->data = tmp_sfp;
138       bsp->annot = sap;
139     }
140     return;
141   }
142 
143   bssp = (BioseqSetPtr)(sep->data.ptrvalue);
144   if (bssp->_class != 2) {    /*  do the rest for segset only */
145     return;
146   }
147   segsep = bssp->seq_set;
148   set_vnp = bssp->descr;
149   if (segsep->next == NULL) {
150     return;
151   }
152   if (!IS_Bioseq(segsep->next)) {
153     tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
154     parts = tmp->seq_set;
155     if (parts == NULL) {
156       return;
157     }
158   }
159   /*    title = SrchSegChoice(parts, Seq_descr_title);*/
160   org = SrchSegChoice(parts, Seq_descr_org);
161   mol = SrchSegChoice(parts, Seq_descr_mol_type);
162   modif = SrchSegChoice(parts, Seq_descr_modif);
163   date = SrchSegChoice(parts, Seq_descr_update_date);
164   for(v=set_vnp; v != NULL; v = v->next) {
165     if (v->choice == Seq_descr_org) {
166       is_org = TRUE;
167     }
168     if (v->choice == Seq_descr_org) {
169       is_title = TRUE;
170     }
171     if (v->choice == Seq_descr_mol_type) {
172       if (mol && mol->data.intvalue == v->data.intvalue) {
173         is_mol = TRUE;
174         ValNodeFree(mol);
175       }
176     }
177     if (v->choice == Seq_descr_modif) {
178       is_modif = TRUE;
179     }
180     if (v->choice == Seq_descr_update_date) {
181       is_date = TRUE;
182       upd_date_vnp = v;
183     }
184   }
185   /*
186    if (!is_title) {
187    set_vnp = tie_next(set_vnp, title);
188    }
189    */
190   if (!is_modif) {
191     if (set_vnp != NULL) {
192       set_vnp = tie_next(set_vnp, modif);
193     } else {
194       ValNodeLink (&(bssp->descr), modif);
195     }
196   }
197   if (!is_org) {
198     if (set_vnp != NULL) {
199       set_vnp = tie_next(set_vnp, org);
200     } else {
201       ValNodeLink (&(bssp->descr), org);
202     }
203   }
204   if (!is_mol) {
205     if (set_vnp != NULL) {
206       set_vnp = tie_next(set_vnp, mol);
207     } else {
208       ValNodeLink (&(bssp->descr), mol);
209     }
210   }
211   if (!is_date) {
212     if (set_vnp != NULL) {
213       set_vnp = tie_next(set_vnp, date);
214     } else {
215       ValNodeLink (&(bssp->descr), date);
216     }
217   } else if (upd_date_vnp != NULL && date != NULL) {
218     upd_date_vnp->data.ptrvalue = DateFree ((DatePtr) upd_date_vnp->data.ptrvalue);
219     upd_date_vnp->data.ptrvalue = DateDup ((DatePtr) date->data.ptrvalue);
220   }
221   SrchSegSeqMol(parts);
222 
223   return;
224 }
225 //LCOV_EXCL_STOP
226 
227 
PubLabelMatchEx(ValNodePtr vnp1,ValNodePtr vnp2,Boolean checkdates)228 static Int2 PubLabelMatchEx (ValNodePtr vnp1, ValNodePtr vnp2, Boolean checkdates)
229 
230 {
231   AffilPtr     afp1, afp2;
232   AuthListPtr  alp1, alp2;
233   CitGenPtr    cgp1 = NULL, cgp2 = NULL;
234   CitSubPtr    csp1 = NULL, csp2 = NULL;
235   Int2         ret;
236 
237   if (vnp1 == NULL || vnp2 == NULL) return -1;
238   ret = PubLabelMatch (vnp1, vnp2);
239   if (ret != 0) return ret;
240 
241   while (vnp1 != NULL) {
242     if (vnp1->choice == PUB_Sub) {
243       csp1 = (CitSubPtr) vnp1->data.ptrvalue;
244     } else if (vnp1->choice == PUB_Gen) {
245       cgp1 = (CitGenPtr) vnp1->data.ptrvalue;
246     }
247     vnp1 = vnp1->next;
248   }
249 
250   while (vnp2 != NULL) {
251     if (vnp2->choice == PUB_Sub) {
252       csp2 = (CitSubPtr) vnp2->data.ptrvalue;
253     } else if (vnp2->choice == PUB_Gen) {
254       cgp2 = (CitGenPtr) vnp2->data.ptrvalue;
255     }
256     vnp2 = vnp2->next;
257   }
258 
259   if (csp1 != NULL && csp2 != NULL) {
260     if (checkdates && csp1->date != NULL && csp2->date != NULL && DateMatch (csp1->date, csp2->date, FALSE) != 0) return -1;
261     if (StringICmp (csp1->descr, csp2->descr) != 0) return -1;
262     alp1 = csp1->authors;
263     alp2 = csp2->authors;
264     if (alp1 == NULL || alp2 == NULL) return 0;
265     if (AuthListMatch (alp1, alp2, TRUE) != 0) return -1;
266     afp1 = alp1->affil;
267     afp2 = alp2->affil;
268     if (afp1 != NULL && afp2 != NULL) {
269       if (! AsnIoMemComp (afp1, afp2, (AsnWriteFunc) AffilAsnWrite)) return -1;
270     } else if (afp1 != NULL || afp2 != NULL) {
271       return -1;
272     }
273     return 0;
274   }
275 
276   if (cgp1 != NULL && cgp2 != NULL) {
277     if (checkdates && cgp1->date != NULL && cgp2->date != NULL && DateMatch (cgp1->date, cgp2->date, FALSE) != 0) return -1;
278     if (StringICmp (cgp1->cit, cgp2->cit) != 0) return -1;
279     alp1 = cgp1->authors;
280     alp2 = cgp2->authors;
281     if (alp1 == NULL || alp2 == NULL) return 0;
282     if (AuthListMatch (alp1, alp2, TRUE) != 0) return -1;
283     afp1 = alp1->affil;
284     afp2 = alp2->affil;
285     if (afp1 != NULL && afp2 != NULL) {
286       if (! AsnIoMemComp (afp1, afp2, (AsnWriteFunc) AffilAsnWrite)) return -1;
287     } else if (afp1 != NULL || afp2 != NULL) {
288       return -1;
289     }
290     return 0;
291   }
292 
293   return 0;
294 }
295 
296 //LCOV_EXCL_START
297 // Used for segsets
RemovePubFromParts(SeqEntryPtr sep,ValNodePtr pub)298 static void RemovePubFromParts(SeqEntryPtr sep, ValNodePtr pub)
299 {
300   BioseqPtr    b;
301   ValNodePtr   v, vnp, next;
302   SeqEntryPtr  s;
303   PubdescPtr   pdp, p;
304 
305   for (vnp = pub; vnp; vnp = vnp->next) {
306     pdp = vnp->data.ptrvalue;
307     for (s= sep; s; s=s->next) {
308       b = (BioseqPtr)(s->data.ptrvalue);
309       for (v=b->descr; v; v=next) {
310         next = v->next;
311         if (v->choice != Seq_descr_pub)
312           continue;
313         p = v->data.ptrvalue;
314         if (PubLabelMatchEx (pdp->pub, p->pub, TRUE) == 0) {
315           if (pdp->name != NULL || pdp->fig != NULL
316               || pdp->num != NULL || pdp->maploc != NULL
317               || pdp->comment != NULL) {
318             continue;
319           } else {
320             b->descr = remove_descr(b->descr, v);
321           }
322         }
323       }
324     }
325   }
326   return;
327 }
328 //LCOV_EXCL_STOP
329 
330 
331 /***************************************************************************
332  *    0  match
333  *    1 no match
334  *    2 type unknown or not implemented
335  ***************************************************************************/
NumberingMatch(ValNodePtr num1,ValNodePtr num2)336 static Int2 NumberingMatch(ValNodePtr num1, ValNodePtr num2)
337 {
338   NumContPtr nc1, nc2;
339   NumEnumPtr ne1, ne2;
340   NumRealPtr nr1, nr2;
341 
342   if (num1 == NULL || num2 == NULL)
343     return 0;
344   if (num1->choice != num2->choice)
345     return 1;
346   switch(num1->choice)
347   {
348     case Numbering_cont:
349       nc1 = (NumContPtr) num1->data.ptrvalue;
350       nc2 = (NumContPtr) num2->data.ptrvalue;
351       if (nc1->refnum == nc2->refnum) {
352         return 0;
353       }
354       break;
355     case Numbering_enum:
356       ne1 = (NumEnumPtr) num1->data.ptrvalue;
357       ne2 = (NumEnumPtr) num2->data.ptrvalue;
358       if (ne1->num != ne2->num)
359         return 1;
360       return (StringCmp(ne1->buf, ne2->buf));
361     case Numbering_ref_source:
362       break;
363     case Numbering_ref_align:
364       break;
365     case Numbering_real:
366       nr1 = (NumRealPtr) num1->data.ptrvalue;
367       nr2 = (NumRealPtr) num2->data.ptrvalue;
368       return (StringCmp(nr1->units, nr2->units));
369     default:
370       break;
371   }
372   return 2;
373 }
374 
AlpFromPdp(PubdescPtr pdp,CitSubPtr csp)375 static AuthListPtr AlpFromPdp (PubdescPtr pdp, CitSubPtr csp)
376 
377 {
378   AuthListPtr  alp = NULL;
379   CitArtPtr    cap;
380   CitBookPtr   cbp;
381   CitGenPtr    cgp;
382   CitPatPtr    cpp;
383   ValNodePtr   vnp;
384 
385   if (csp != NULL) {
386     alp = csp->authors;
387     if (alp != NULL) return alp;
388   }
389   if (pdp == NULL) return NULL;
390 
391   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
392     switch (vnp->choice) {
393       case PUB_Gen :
394         cgp = (CitGenPtr) vnp->data.ptrvalue;
395         if (cgp != NULL) {
396           alp = cgp->authors;
397         }
398         break;
399       case PUB_Sub :
400         csp = (CitSubPtr) vnp->data.ptrvalue;
401         if (csp != NULL) {
402           alp = csp->authors;
403         }
404         break;
405       case PUB_Article :
406         cap = (CitArtPtr) vnp->data.ptrvalue;
407         if (cap != NULL) {
408           alp = cap->authors;
409         }
410         break;
411       case PUB_Book :
412       case PUB_Proc :
413       case PUB_Man :
414         cbp = (CitBookPtr) vnp->data.ptrvalue;
415         if (cbp != NULL) {
416           alp = cbp->authors;
417         }
418         break;
419       case PUB_Patent :
420         cpp = (CitPatPtr) vnp->data.ptrvalue;
421         if (cpp != NULL) {
422           alp = cpp->authors;
423         }
424         break;
425       default :
426         break;
427     }
428 
429     if (alp != NULL) return alp;
430   }
431 
432   return NULL;
433 }
434 
PubdescMatch(PubdescPtr p1,PubdescPtr p2)435 static Boolean PubdescMatch (PubdescPtr p1, PubdescPtr p2)
436 {
437   AuthListPtr alp1, alp2;
438 
439   if (p1 == NULL || p2 == NULL)
440     return TRUE;
441   if (p1->name && p2->name) {
442     if (StringCmp(p1->name, p2->name) != 0)
443       return FALSE;
444   }
445   if (p1->fig && p2->fig) {
446     if (StringCmp(p1->fig, p2->fig) != 0)
447       return FALSE;
448   }
449   if (p1->maploc && p2->maploc) {
450     if (StringCmp(p1->maploc, p2->maploc) != 0)
451       return FALSE;
452   }
453   if (p1->comment && p2->comment) {
454     if (StringCmp(p1->comment, p2->comment) != 0)
455       return FALSE;
456   }
457   if (p1->num && p2->num) {
458     if (NumberingMatch(p1->num, p2->num) != 0)
459       return FALSE;
460   }
461   /* do full author match */
462   alp1 = AlpFromPdp (p1, NULL);
463   alp2 = AlpFromPdp (p2, NULL);
464   if (alp1 != NULL && alp2 != NULL) {
465     if (AuthListMatch (alp1, alp2, TRUE) != 0)
466       return FALSE;
467   }
468   return TRUE;
469 }
470 
471 //LCOV_EXCL_START
472 // Used for segsets
473 /* return list of pubs that are the same in all segmets */
CheckSegsForPub(SeqEntryPtr sep)474 static ValNodePtr CheckSegsForPub(SeqEntryPtr sep)
475 {
476   BioseqPtr    bsp, b;
477   ValNodePtr   vnp, v, list = NULL, vnpnext, new, next;
478   PubdescPtr   pdp, p, new_p;
479   Boolean      same;
480   SeqEntryPtr  s;
481 
482   if (sep == NULL) {
483     return NULL;
484   }
485   if (!IS_Bioseq(sep)) {
486     return NULL;
487   }
488   bsp = (BioseqPtr)(sep->data.ptrvalue);
489   /* first bioseq from parts */
490   for (vnp=bsp->descr; vnp; vnp=vnpnext) {
491     vnpnext = vnp->next;
492     if (vnp->choice != Seq_descr_pub)
493       continue;
494     pdp = vnp->data.ptrvalue;
495     for (s= sep->next, same = FALSE; s; s=s->next) {
496       b = (BioseqPtr)(s->data.ptrvalue);
497       /* added to skip virtual spacers */
498       if (b->repr == Seq_repr_virtual && b->descr == NULL) continue;
499       for (v=b->descr; v; v=next) {
500         next = v->next;
501         if (v->choice != Seq_descr_pub)
502           continue;
503         p = v->data.ptrvalue;
504         if (PubLabelMatchEx (pdp->pub, p->pub, FALSE) == 0) {
505           if (PubdescMatch(pdp, p) == TRUE) {
506             same = TRUE;
507             break;
508           }
509         }
510       }
511       if (v == NULL) {
512         same = FALSE;
513         break;
514       }
515     }
516     if (same == TRUE) {
517       new = SeqDescrNew(NULL);
518       new->choice = Seq_descr_pub;
519       new_p = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
520                            (AsnWriteFunc) PubdescAsnWrite);
521       new->data.ptrvalue = new_p;
522       list = tie_next(list, new);
523     }
524   }
525   return list;
526 }
527 
528 // used for segsets
529 /* move identical pubs in segmented set to the set level */
MoveSegmPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)530 void MoveSegmPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
531 {
532 
533   BioseqSetPtr  bssp, tmp;
534   SeqEntryPtr   segsep, parts = NULL;
535   ValNodePtr    v, pub, vv, next;
536   PubdescPtr    pdp, pdpv;
537 
538   if (IS_Bioseq(sep)) {
539     return;
540   }
541   bssp = (BioseqSetPtr)(sep->data.ptrvalue);
542   if (bssp->_class != 2) {    /*  do the rest for segset only */
543     return;
544   }
545   segsep = bssp->seq_set;
546   if (segsep->next == NULL) {
547     return;
548   }
549   if (!IS_Bioseq(segsep->next)) {
550     tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
551     parts = tmp->seq_set;
552     if (parts == NULL) {
553       return;
554     }
555   }
556   /* find identical pubs in all segments */
557   pub = CheckSegsForPub(parts);
558   if (pub) {
559     RemovePubFromParts(parts, pub);
560   }
561   /* check if pub is already on the set descr */
562   for(v=bssp->descr; v != NULL; v = v->next) {
563     if (v->choice != Seq_descr_pub)
564       continue;
565     for (vv = pub; vv; vv = next) {
566       next = vv->next;
567       pdp = vv->data.ptrvalue;
568       pdpv = v->data.ptrvalue;
569       if (PubLabelMatchEx (pdp->pub, pdpv->pub, FALSE) == 0) {
570         PubdescFree(pdp);
571         pub = remove_node(pub, vv);
572       }
573     }
574   }
575 
576   bssp->descr = tie_next(bssp->descr, pub);
577 
578   return;
579 }
580 
581 // Used for segsets
AllPartsHaveTitles(BioseqSetPtr bssp)582 static Boolean AllPartsHaveTitles(BioseqSetPtr bssp)
583 
584 {
585   BioseqPtr    bsp;
586   SeqEntryPtr  sep;
587   CharPtr      str;
588 
589   if (bssp == NULL) return FALSE;
590   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
591     if (sep->choice != 1) return FALSE;
592     bsp = (BioseqPtr) sep->data.ptrvalue;
593     if (bsp == NULL) return FALSE;
594     str = BioseqGetTitle (bsp);
595     if (StringHasNoText (str)) return FALSE;
596   }
597 
598   return TRUE;
599 }
600 
ChkNucProt(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)601 void ChkNucProt (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
602 {
603   BioseqSetPtr  bssp, tmp, bssp2, bssp3;
604   BioseqPtr     bsp = NULL, bsp2, bsp3, bsp4;
605   SeqEntryPtr   seqsep, sep2, sep3, sep4;
606   ValNodePtr    descr = NULL, head = NULL, vnp, next;
607   Int2Ptr       ip;
608   Boolean       is_org = FALSE, is_modif = FALSE, is_title = FALSE;
609   Boolean       is_date = FALSE, is_nuc /* , is_pub = FALSE */;
610   CharPtr       npstitle = NULL, seqtitle = NULL;
611   ValNodePtr    PNTR prev;
612   Char          ch;
613 
614   if (IS_Bioseq(sep)) {
615     return;
616   }
617   bssp = (BioseqSetPtr)(sep->data.ptrvalue);
618   if (bssp->_class != BioseqseqSet_class_nuc_prot) {    /*  do the rest for nuc-prot only */
619     return;
620   }
621   seqsep = bssp->seq_set;
622   if (seqsep == NULL || seqsep->data.ptrvalue == NULL) return;
623 
624   /* if protein first, extract nucleotide, put first */
625   if (seqsep->choice == 1) {
626     bsp3 = seqsep->data.ptrvalue;
627     if (bsp3 != NULL && ISA_aa (bsp3->mol)) {
628       prev = (ValNodePtr PNTR) &(bssp->seq_set);
629       vnp = bssp->seq_set;
630       while (vnp != NULL) {
631         next = vnp->next;
632         is_nuc = FALSE;
633         if (IS_Bioseq (vnp)) {
634           bsp4 = (BioseqPtr) vnp->data.ptrvalue;
635           if (bsp4 != NULL && ISA_na (bsp4->mol)) {
636             is_nuc = TRUE;
637           }
638         }
639         if (is_nuc) {
640           *prev = vnp->next;
641           ValNodeLink (&head, vnp);
642           vnp->next = NULL;
643         } else {
644           prev = (ValNodePtr PNTR) &(vnp->next);
645         }
646         vnp = next;
647       }
648       if (head != NULL) {
649         vnp = bssp->seq_set;
650         bssp->seq_set = head;
651         ValNodeLink (&(bssp->seq_set), vnp);
652       }
653     }
654 
655     seqsep = bssp->seq_set;
656     if (seqsep == NULL || seqsep->data.ptrvalue == NULL) return;
657   }
658 
659   if (seqsep->choice == 1) {
660     bsp = seqsep->data.ptrvalue;
661     descr = bsp->descr;
662   } else if (seqsep->choice == 2) {
663     tmp = seqsep->data.ptrvalue;
664     descr = tmp->descr;
665   }
666   ip = (Int2Ptr) data;
667   if (bssp->descr == NULL) {
668     bssp->descr = GetDescrNoTitles(&descr);
669   } else {
670     for (vnp = bssp->descr; vnp!= NULL; vnp= vnp->next) {
671       if (vnp->choice == Seq_descr_title) {
672         is_title = TRUE;
673         npstitle = (CharPtr) vnp->data.ptrvalue;
674       }
675       if (vnp->choice == Seq_descr_org) {
676         is_org = TRUE;
677       }
678       if (vnp->choice == Seq_descr_modif) {
679         is_modif = TRUE;
680       }
681       if (vnp->choice == Seq_descr_update_date) {
682         is_date = TRUE;
683       }
684       /*    if (vnp->choice == Seq_descr_pub) {
685        is_pub = TRUE;
686        }
687        */
688     }
689     /* look for old style nps title, remove if based on nucleotide title, also remove exact duplicate */
690     if (npstitle != NULL) {
691       seqtitle = SeqEntryGetTitle (seqsep);
692       if (seqtitle != NULL) {
693         ch = *npstitle;
694         while (ch != '\0' && ch == *seqtitle) {
695           npstitle++;
696           ch = *npstitle;
697           seqtitle++;
698         }
699         if (ch == '\0' || StringCmp (npstitle, ", and translated products") == 0) {
700           vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
701           ValNodeFreeData (vnp);
702           if (ip != NULL) {
703             *ip = 1;
704           }
705         } else {
706           /* now removing any unrelated title */
707           vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
708           ValNodeFreeData (vnp);
709           if (ip != NULL) {
710             *ip = 2;
711           }
712         }
713       } else if (bsp != NULL) {
714         /* if no nucleotide title, move nps title to it */
715         vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
716         bsp->descr = ValNodeLink (&(bsp->descr), vnp);
717         descr = bsp->descr;
718         if (ip != NULL) {
719           *ip = 3;
720         }
721       } else if (seqsep->choice == 2) {
722         if (ip != NULL) {
723           *ip = 4;
724         }
725         /* get segmented sequence in segset */
726         sep2 = bssp->seq_set;
727         if (sep2 != NULL && sep2->choice == 2 && SeqEntryGetTitle (sep2) == NULL) {
728           bssp2 = sep2->data.ptrvalue;
729           if (bssp2 != NULL && bssp2->_class == BioseqseqSet_class_segset) {
730             sep3 = bssp2->seq_set;
731             if (sep3 != NULL && sep3->choice == 1) {
732               bsp2 = sep3->data.ptrvalue;
733               if (bsp2 != NULL && BioseqGetTitle (bsp2) == NULL) {
734                 sep4 = sep3->next;
735                 if (sep4 != NULL && sep4->choice == 2) {
736                   bssp3 = sep4->data.ptrvalue;
737                   if (bssp3 != NULL && bssp3->_class == BioseqseqSet_class_parts) {
738                     if (AllPartsHaveTitles (bssp3)) {
739                       /* if no segmented nucleotide bioseq title, move nps title to it */
740                       vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
741                       bsp2->descr = ValNodeLink (&(bsp2->descr), vnp);
742                       if (ip != NULL) {
743                         *ip = 5;
744                       }
745                     } else {
746                       if (ip != NULL) {
747                         *ip = 6;
748                       }
749                     }
750                   }
751                 }
752               }
753             }
754           }
755         }
756       } else {
757         if (ip != NULL) {
758           *ip = 7;
759         }
760       }
761     }
762     /*
763      if (!is_title) {
764      vnp = ValNodeExtractList(&descr, Seq_descr_title);
765      bssp->descr = ValNodeLink(&(bssp->descr), vnp);
766      }
767      */
768     if (!is_modif && check_GIBB(descr)) {
769       vnp = ValNodeExtractList(&descr, Seq_descr_modif);
770       if (vnp != NULL) {
771         bssp->descr = ValNodeLink(&(bssp->descr), vnp);
772       }
773     }
774     if (!is_org) {
775       vnp = ValNodeExtractList(&descr, Seq_descr_org);
776       if (vnp != NULL) {
777         bssp->descr = ValNodeLink(&(bssp->descr), vnp);
778       }
779     }
780     if (!is_date) {
781       vnp = ValNodeExtractList(&descr, Seq_descr_update_date);
782       if (vnp != NULL) {
783         bssp->descr = ValNodeLink(&(bssp->descr), vnp);
784       }
785     }
786     /*    vnp = ValNodeExtractList(&descr, Seq_descr_pub);
787      if (!is_pub)
788      bssp->descr = ValNodeLink(&(bssp->descr), vnp);
789      */
790   }
791   if (seqsep->choice == 1) {
792     bsp = seqsep->data.ptrvalue;
793     bsp->descr = descr;
794   }
795   if (seqsep->choice == 2) {
796     tmp = seqsep->data.ptrvalue;
797     tmp->descr = descr;
798   }
799   return;
800 }
801 //LCOV_EXCL_STOP
802 
803 
MoveNPPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)804 void MoveNPPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
805 {
806 
807   BioseqSetPtr  bssp, tmp;
808   BioseqPtr     bsp;
809   SeqEntryPtr   seqsep;
810   ValNodePtr    descr = NULL, vnp = NULL, vnext, v, v_copy;
811   PubdescPtr    pdp, pdp_copy;
812 
813   if (IS_Bioseq(sep)) {
814     return;
815   }
816   bssp = (BioseqSetPtr)(sep->data.ptrvalue);
817   if (bssp->_class != 1) {    /*  do the rest for nuc-prot only */
818     return;
819   }
820   seqsep = bssp->seq_set;
821   if (seqsep == NULL) {
822     return;
823   }
824   if (seqsep->choice == 1) {
825     bsp = seqsep->data.ptrvalue;
826     descr = bsp->descr;
827   }
828   if (seqsep->choice == 2) {
829     tmp = seqsep->data.ptrvalue;
830     descr = tmp->descr;
831   }
832   if (bssp->descr == NULL) {
833     bssp->descr = GetDescrNoTitles(&descr);
834   } else {
835     /* move pubs to nuc-prot level */
836     vnp = ValNodeExtractList(&descr, Seq_descr_pub);
837     for (v=vnp; v; v=vnext) {
838       vnext = v->next;
839       pdp = (PubdescPtr) v->data.ptrvalue;
840       if (pdp->num != NULL || pdp->name != NULL || pdp->fig != NULL
841           || pdp->comment !=NULL) {
842         pdp_copy = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
843                                 (AsnWriteFunc) PubdescAsnWrite);
844         v_copy = SeqDescrNew(NULL);
845         v_copy->choice = Seq_descr_pub;
846         v_copy->data.ptrvalue = pdp_copy;
847         descr = ValNodeLink(&(descr), v_copy);
848         PubdescFree (pdp);
849         vnp = remove_node(vnp, v);
850       }
851     }
852     if (vnp != NULL) {
853       bssp->descr = ValNodeLink(&(bssp->descr), vnp);
854     }
855   }
856   if (seqsep->choice == 1) {
857     bsp = seqsep->data.ptrvalue;
858     bsp->descr = descr;
859   }
860   if (seqsep->choice == 2) {
861     tmp = seqsep->data.ptrvalue;
862     tmp->descr = descr;
863   }
864   return;
865 }
866 
GetSeqDescFromSeqEntry(SeqEntryPtr sep)867 static SeqDescrPtr GetSeqDescFromSeqEntry (SeqEntryPtr sep)
868 
869 {
870   BioseqPtr     bsp;
871   BioseqSetPtr  bssp;
872 
873   if (sep == NULL) return NULL;
874 
875   if (IS_Bioseq (sep)) {
876     bsp = (BioseqPtr) sep->data.ptrvalue;
877     if (bsp == NULL) return NULL;
878     return bsp->descr;
879   } else if (IS_Bioseq_set (sep)) {
880     bssp = (BioseqSetPtr) sep->data.ptrvalue;
881     if (bssp == NULL) return NULL;
882     return bssp->descr;
883   }
884 
885   return NULL;
886 }
887 
888 /* return list of pubs that are the same on all pop/phy/mut components */
CheckSegsForPopPhyMut(SeqEntryPtr sep)889 static SeqDescrPtr CheckSegsForPopPhyMut (SeqEntryPtr sep)
890 
891 {
892   SeqDescrPtr    descr;
893   ValNodePtr     head;
894   ValNodePtr     last;
895   SeqDescrPtr    list = NULL;
896   ObjValNodePtr  ovp;
897   PubdescPtr     pdp1, pdp2;
898   Boolean        same;
899   SeqDescrPtr    sdp1, sdp2;
900   SeqEntryPtr    tmp;
901   ValNodePtr     vnp, vnp1, vnp2;
902 
903   for (sdp1 = GetSeqDescFromSeqEntry (sep); sdp1 != NULL; sdp1 = sdp1->next) {
904     if (sdp1->choice != Seq_descr_pub) continue;
905     pdp1 = (PubdescPtr) sdp1->data.ptrvalue;
906     if (pdp1 == NULL) continue;
907     head = NULL;
908     last = NULL;
909     for (tmp = sep->next, same = FALSE; tmp != NULL; tmp = tmp->next) {
910       for (sdp2 = GetSeqDescFromSeqEntry (tmp); sdp2 != NULL; sdp2 = sdp2->next) {
911         if (sdp2->choice != Seq_descr_pub) continue;
912         pdp2 = (PubdescPtr) sdp2->data.ptrvalue;
913         if (pdp2 == NULL) continue;
914         if (PubLabelMatchEx (pdp1->pub, pdp2->pub, FALSE) == 0) {
915           if (PubdescMatch (pdp1, pdp2)) {
916             same = TRUE;
917             vnp = ValNodeAddPointer (&last, 0, (Pointer) sdp2);
918             if (head == NULL) {
919               head = vnp;
920             }
921             last = vnp;
922             break;
923           }
924         }
925       }
926       if (sdp2 == NULL) {
927         same = FALSE;
928         break;
929       }
930     }
931     if (same) {
932       descr = SeqDescrNew (NULL);
933       descr->choice = Seq_descr_pub;
934       descr->data.ptrvalue = AsnIoMemCopy (pdp1,
935                                            (AsnReadFunc) PubdescAsnRead,
936                                            (AsnWriteFunc) PubdescAsnWrite);
937       list = tie_next (list, descr);
938       /* mark original pubs for deletion */
939       if (sdp1->extended) {
940         ovp = (ObjValNodePtr) sdp1;
941         ovp->idx.deleteme = 1;
942       }
943       for (vnp1 = head; vnp1 != NULL; vnp1 = vnp1->next) {
944         vnp2 = (ValNodePtr) vnp1->data.ptrvalue;
945         if (vnp2->extended) {
946           ovp = (ObjValNodePtr) vnp2;
947           ovp->idx.deleteme = 1;
948         }
949       }
950     }
951     ValNodeFree (head);
952   }
953 
954   return list;
955 }
956 
957 /* move identical pubs in pop/phy/mut components to the set level */
MovePopPhyMutPubsProc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)958 static void MovePopPhyMutPubsProc (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
959 
960 {
961   BioseqSetPtr  bssp;
962   ValNodePtr    v, pub, vv, next;
963   PubdescPtr    pdp, pdpv;
964 
965   if (sep == NULL) return;
966   if (! IS_Bioseq_set (sep)) return;
967   bssp = (BioseqSetPtr) sep->data.ptrvalue;
968   if (bssp == NULL) return;
969   if ((bssp->_class < BioseqseqSet_class_mut_set ||
970        bssp->_class > BioseqseqSet_class_eco_set) &&
971       bssp->_class != BioseqseqSet_class_wgs_set &&
972       bssp->_class != BioseqseqSet_class_small_genome_set) return;
973   pub = CheckSegsForPopPhyMut (bssp->seq_set);
974   if (pub == NULL) return;
975   /* check if pub is already on the set descr */
976   for(v=bssp->descr; v != NULL; v = v->next) {
977     if (v->choice != Seq_descr_pub)
978       continue;
979     for (vv = pub; vv; vv = next) {
980       next = vv->next;
981       pdp = vv->data.ptrvalue;
982       pdpv = v->data.ptrvalue;
983       if (PubLabelMatchEx (pdp->pub, pdpv->pub, FALSE) == 0) {
984         PubdescFree(pdp);
985         pub = remove_node(pub, vv);
986       }
987     }
988   }
989 
990   bssp->descr = tie_next(bssp->descr, pub);
991 }
992 
MovePopPhyMutPubs(SeqEntryPtr sep)993 void MovePopPhyMutPubs (SeqEntryPtr sep)
994 
995 {
996   if (sep == NULL) return;
997   SeqEntryExplore(sep, (Pointer) NULL, MovePopPhyMutPubsProc);
998   DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
999 }
1000 
1001 //LCOV_EXCL_START
AddFeatToBioseq(SeqFeatPtr sfp,BioseqPtr bsp)1002 static void AddFeatToBioseq (SeqFeatPtr sfp, BioseqPtr bsp)
1003 
1004 {
1005   SeqFeatPtr   prev;
1006   SeqAnnotPtr  sap;
1007 
1008   if (sfp == NULL || bsp == NULL) return;
1009   sap = bsp->annot;
1010   while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
1011     sap = sap->next;
1012   }
1013   if (sap == NULL) {
1014     sap = SeqAnnotNew ();
1015     if (sap != NULL) {
1016       sap->type = 1;
1017       sap->next = bsp->annot;
1018       bsp->annot = sap;
1019     }
1020   }
1021   sap = bsp->annot;
1022   if (sap != NULL) {
1023     if (sap->data != NULL) {
1024       prev = sap->data;
1025       while (prev->next != NULL) {
1026         prev = prev->next;
1027       }
1028       prev->next = sfp;
1029     } else {
1030       sap->data = (Pointer) sfp;
1031     }
1032   }
1033 }
1034 
SSECNoGenomeAnnotInAnnotDescr(SeqAnnotPtr sap)1035 static Boolean SSECNoGenomeAnnotInAnnotDescr (SeqAnnotPtr sap)
1036 
1037 {
1038   AnnotDescrPtr  adp;
1039   ObjectIdPtr    oip;
1040   CharPtr        str;
1041   UserFieldPtr   ufp;
1042   UserObjectPtr  uop;
1043 
1044   if (sap == NULL) return TRUE;
1045 
1046   for (adp = sap->desc; adp != NULL; adp = adp->next) {
1047     if (adp->choice != Annot_descr_user) continue;
1048     uop = (UserObjectPtr) adp->data.ptrvalue;
1049     if (uop == NULL) continue;
1050     oip = uop->type;
1051     if (oip == NULL) continue;
1052     if (StringICmp (oip->str, "StructuredComment") != 0) continue;
1053     for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
1054       if (ufp->choice != 1) continue;
1055       oip = ufp->label;
1056       if (oip == NULL) continue;
1057       if (StringCmp (oip->str, "StructuredCommentPrefix") != 0) continue;
1058       str = (CharPtr) ufp->data.ptrvalue;
1059       if (StringCmp (str, "##Genome-Annotation-Data-START##") == 0) return FALSE;
1060     }
1061   }
1062 
1063   return TRUE;
1064 }
1065 
MoveFeatsOnPartsProc(BioseqSetPtr bssp,Pointer userdata)1066 static void MoveFeatsOnPartsProc (BioseqSetPtr bssp, Pointer userdata)
1067 
1068 {
1069   SeqAnnotPtr    nextsap;
1070   SeqFeatPtr     nextsfp;
1071   Pointer PNTR   prevsap;
1072   Pointer PNTR   prevsfp;
1073   SeqAnnotPtr    sap;
1074   SeqFeatPtr     sfp;
1075   BioseqPtr      target;
1076 
1077   if (bssp == NULL || bssp->_class != BioseqseqSet_class_parts) return;
1078 
1079   sap = bssp->annot;
1080   prevsap = (Pointer PNTR) &(bssp->annot);
1081 
1082   while (sap != NULL) {
1083     nextsap = sap->next;
1084     if (sap->type == 1) {
1085       sfp = (SeqFeatPtr) sap->data;
1086       prevsfp = (Pointer PNTR) &(sap->data);
1087       while (sfp != NULL) {
1088         nextsfp = sfp->next;
1089         /* target = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID); */
1090         target = BioseqFindFromSeqLoc (sfp->location);
1091         if (target != NULL) {
1092           *(prevsfp) = sfp->next;
1093           sfp->next = NULL;
1094           AddFeatToBioseq (sfp, target);
1095         } else {
1096           prevsfp = (Pointer PNTR) &(sfp->next);
1097         }
1098         sfp = nextsfp;
1099       }
1100     }
1101     /* now keep empty annot if annot_descr present */
1102     if (sap->data == NULL && /* sap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (sap)) {
1103       *(prevsap) = sap->next;
1104       sap->next = NULL;
1105       SeqAnnotFree (sap);
1106     } else {
1107       prevsap = (Pointer PNTR) &(sap->next);
1108     }
1109     sap = nextsap;
1110   }
1111 }
1112 
MoveFeatsFromPartsSet(SeqEntryPtr sep)1113 extern void MoveFeatsFromPartsSet (SeqEntryPtr sep)
1114 
1115 {
1116   VisitSetsInSep (sep, NULL, MoveFeatsOnPartsProc);
1117 }
1118 //LCOV_EXCL_STOP
1119 
CmpOrgById(BioSourcePtr b1,BioSourcePtr b2)1120 Boolean CmpOrgById(BioSourcePtr b1, BioSourcePtr b2)
1121 {
1122   DbtagPtr    d1 = NULL, d2 = NULL;
1123   ValNodePtr  vnp;
1124 
1125   if (b1 == NULL || b2 == NULL) {
1126     return FALSE;
1127   }
1128   if (b1->org ==  NULL || b2->org == NULL) {
1129     return FALSE;
1130   }
1131   for (vnp = b1->org->db; vnp; vnp = vnp->next) {
1132     d1 = (DbtagPtr) vnp->data.ptrvalue;
1133     if (StringCmp(d1->db, "taxon") == 0) {
1134       break;
1135     }
1136   }
1137   for (vnp = b2->org->db; vnp; vnp = vnp->next) {
1138     d2 = (DbtagPtr) vnp->data.ptrvalue;
1139     if (StringCmp(d2->db, "taxon") == 0) {
1140       break;
1141     }
1142   }
1143   if (d1 && d2) {
1144     if (d1->tag->id == d2->tag->id) {
1145       return TRUE;
1146     } else {
1147     }
1148   } else if (StringICmp(b1->org->taxname, b2->org->taxname) == 0) {
1149     return TRUE;
1150   }
1151   return FALSE;
1152 }
1153 
BioSourceMerge(BioSourcePtr host,BioSourcePtr guest)1154 BioSourcePtr BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
1155 {
1156   SubSourcePtr  ssp, sp;
1157   OrgModPtr     omp, homp;
1158   OrgNamePtr    onp;
1159 
1160   if (host == NULL && guest == NULL) {
1161     return NULL;
1162   }
1163   if (host == NULL && guest != NULL) {
1164     host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead,
1165                         (AsnWriteFunc) BioSourceAsnWrite);
1166     return host;
1167   }
1168   if (host != NULL && guest == NULL) {
1169     return host;
1170   }
1171   if (host->genome == 0 && guest->genome != 0) {
1172     host->genome = guest->genome;
1173   }
1174   if (host->origin == 0 && guest->origin != 0) {
1175     host->origin = guest->origin;
1176   }
1177   for (ssp = guest->subtype; ssp; ssp = ssp->next) {
1178     sp = AsnIoMemCopy(ssp, (AsnReadFunc) SubSourceAsnRead,
1179                       (AsnWriteFunc) SubSourceAsnWrite);
1180     host->subtype = tie_next_subtype(host->subtype, sp);
1181   }
1182   if (guest->org->orgname) {
1183     for (omp = guest->org->orgname->mod; omp; omp = omp->next) {
1184       homp = AsnIoMemCopy(omp, (AsnReadFunc) OrgModAsnRead,
1185                           (AsnWriteFunc) OrgModAsnWrite);
1186       if ((onp = host->org->orgname)    == NULL) {
1187         onp = OrgNameNew();
1188         host->org->orgname = onp;
1189       }
1190       onp->mod = tie_next_OrgMod(onp->mod, homp);
1191     }
1192   }
1193   return host;
1194 }
1195 
BioSourceCommon(BioSourcePtr host,BioSourcePtr guest)1196 BioSourcePtr BioSourceCommon(BioSourcePtr host, BioSourcePtr guest)
1197 {
1198   SubSourcePtr  ssp, sp, spnext;
1199   OrgModPtr     omp, om, ompnext;
1200 
1201   if (host->genome != guest->genome) {
1202     host->genome = 0;
1203   }
1204   if (host->origin != guest->origin) {
1205     host->origin = 0;
1206   }
1207   for (sp = host->subtype; sp; sp = spnext) {
1208     spnext = sp->next;
1209     for (ssp = guest->subtype; ssp; ssp = ssp->next) {
1210       if (sp->subtype == ssp->subtype &&
1211           StringCmp(sp->name, ssp->name) == 0) {
1212         break;
1213       }
1214     }
1215     if (ssp == NULL) {
1216       host->subtype = remove_subtype(host->subtype, sp);
1217     }
1218   }
1219   if (CmpOrgById(host, guest) == FALSE) {
1220     OrgRefFree(host->org);
1221     host->org = NULL;
1222     return host;
1223   }
1224   if (StringExtCmp(host->org->common, guest->org->common) != 0) {
1225     MemFree(host->org->common);
1226     host->org->common = NULL;
1227   }
1228   if (guest->org->orgname == NULL) {
1229     MemFree(host->org->orgname);
1230     host->org->orgname = NULL;
1231   } else {
1232     if (host->org->orgname) {
1233       for (omp = host->org->orgname->mod; omp; omp = ompnext) {
1234         ompnext = omp->next;
1235         for (om = guest->org->orgname->mod; om; om = om->next) {
1236           if (om->subtype == omp->subtype &&
1237               StringCmp(om->subname, omp->subname) == 0) {
1238             break;
1239           }
1240         }
1241       }
1242       if (omp == NULL) {
1243         host->org->orgname->mod =
1244         remove_OrgMod(host->org->orgname->mod, omp);
1245       }
1246     }
1247   }
1248   return host;
1249 }
1250 
EmptyBioSource(BioSourcePtr bio)1251 static Boolean EmptyBioSource(BioSourcePtr bio)
1252 {
1253   if (bio == NULL || bio->org == NULL) return TRUE;
1254   if (bio->org->taxname == NULL && bio->org->common == NULL && bio->org->db == NULL) return TRUE;
1255   return FALSE;
1256 }
1257 
StripBSfromTop(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1258 void StripBSfromTop (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1259 {
1260   BioseqSetPtr  bssp, tmp;
1261   ValNodePtr    vnp, bio_vnp = NULL;
1262   SeqEntryPtr   segsep, parts, cur;
1263   BioseqPtr     bsp;
1264   BioSourcePtr  biotop = NULL, bio = NULL;
1265   Boolean       first = TRUE;
1266 
1267   if (IS_Bioseq(sep)) {
1268     return;
1269   }
1270   bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1271   if (bssp->_class != 2) {    /*  do the rest for segset only */
1272     return;
1273   }
1274   bio_vnp = ValNodeExtractList(&(bssp->descr), Seq_descr_source);
1275   if (bio_vnp) {
1276     biotop = ((BioSourcePtr)bio_vnp->data.ptrvalue);
1277   } else {
1278     bio_vnp = SeqDescrNew(NULL);
1279     bio_vnp->choice = Seq_descr_source;
1280   }
1281   segsep = bssp->seq_set;
1282   if (segsep->next == NULL) {
1283     return;
1284   }
1285   if (!IS_Bioseq(segsep->next)) {
1286     tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
1287     parts = tmp->seq_set;
1288     if (parts == NULL) {
1289       return;
1290     }
1291     for (cur = parts; cur; cur = cur->next) {
1292       bsp = cur->data.ptrvalue;
1293       for (vnp = bsp->descr; vnp; vnp=vnp->next) {
1294         if (vnp->choice == Seq_descr_source) {
1295           bio = ((BioSourcePtr)vnp->data.ptrvalue);
1296           break;
1297         }
1298       }
1299       if (bio) {
1300         if (biotop == NULL && first == TRUE) {
1301           biotop = AsnIoMemCopy(bio, (AsnReadFunc) BioSourceAsnRead,
1302                                 (AsnWriteFunc) BioSourceAsnWrite);
1303           bio_vnp->data.ptrvalue = biotop;
1304           first = FALSE;
1305         } else {
1306           biotop = BioSourceCommon(biotop, bio);
1307         }
1308       }
1309     }
1310     if (biotop != NULL) {
1311       if (EmptyBioSource(biotop)) {
1312         BioSourceFree(biotop);
1313         ValNodeFree(bio_vnp);
1314       } else {
1315         bssp->descr = tie_next(bssp->descr, bio_vnp);
1316       }
1317     }
1318   }
1319   return;
1320 }
1321 
StripBSfromParts(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1322 void StripBSfromParts (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1323 {
1324 
1325   BioseqSetPtr  bssp, tmp;
1326   BioseqPtr     bsp;
1327   SeqEntryPtr   segsep, parts = NULL, cur;
1328   ValNodePtr    descr, vnp, set_vnp;
1329   BSMapPtr      bsmp;
1330   Boolean       empty;
1331   BioSourcePtr  biosp = NULL;
1332 
1333   bsmp = (BSMapPtr) data;
1334   if (IS_Bioseq(sep)) {
1335     return;
1336   }
1337   bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1338   if (bssp->_class != 2) {    /*  do the rest for segset only */
1339     return;
1340   }
1341   vnp = ValNodeExtractList(&(bssp->descr), Seq_descr_source);
1342   if (vnp) {
1343     biosp = (BioSourcePtr) vnp->data.ptrvalue;
1344     ValNodeFree(vnp);
1345     vnp=NULL;
1346   }
1347   segsep = bssp->seq_set;
1348   if (segsep->next == NULL) {
1349     return;
1350   }
1351   set_vnp = bssp->descr;
1352   if (!IS_Bioseq(segsep->next)) {
1353     tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
1354     parts = tmp->seq_set;
1355   }
1356   for (cur = parts, empty = TRUE; cur; cur = cur->next) {
1357     bsp = cur->data.ptrvalue;
1358     vnp = ValNodeExtractList(&(bsp->descr), Seq_descr_source);
1359     if (vnp) {
1360       biosp = BioSourceMerge(biosp, vnp->data.ptrvalue);
1361       BioSourceFree((BioSourcePtr) (vnp->data.ptrvalue));
1362       ValNodeFree(vnp);
1363     }
1364   }
1365   if (biosp) {
1366     descr = SeqDescrNew(NULL);
1367     descr->choice = Seq_descr_source;
1368     descr->data.ptrvalue = biosp;
1369     bssp->descr = ValNodeLink(&(bssp->descr), descr);
1370   }
1371 
1372   return;
1373 }
1374 
1375 //LCOV_EXCL_START
1376 /*------------------------ GetDescr() --------------------------*/
1377 /*****************************************************************************
1378  *  GetDescr:
1379  *                                                                    8-12-93
1380  ******************************************************************************/
GetDescr(ValNodePtr PNTR descr)1381 ValNodePtr GetDescr(ValNodePtr PNTR descr)
1382 {
1383   ValNodePtr  vnp, hvnp = NULL;
1384 
1385   vnp = ValNodeExtractList(descr, Seq_descr_title);
1386   if (vnp != NULL) {
1387     hvnp = ValNodeLink(&hvnp, vnp);
1388   }
1389 
1390   vnp = ValNodeExtractList(descr, Seq_descr_org);
1391   if (vnp != NULL) {
1392     hvnp = ValNodeLink(&hvnp, vnp);
1393   }
1394 
1395   if ( check_GIBB(*descr)) {
1396     vnp = ValNodeExtractList(descr, Seq_descr_modif);
1397     if (vnp != NULL) {
1398       hvnp = ValNodeLink(&hvnp, vnp);
1399     }
1400   }
1401   vnp = ValNodeExtractList(descr, Seq_descr_comment);
1402   if (vnp != NULL) {
1403     hvnp = ValNodeLink(&hvnp, vnp);
1404   }
1405 
1406   vnp = ValNodeExtractList(descr, Seq_descr_pub);
1407   if (vnp != NULL) {
1408     hvnp = ValNodeLink(&hvnp, vnp);
1409   }
1410 
1411   vnp = ValNodeExtractList(descr, Seq_descr_update_date);
1412   if (vnp != NULL) {
1413     hvnp = ValNodeLink(&hvnp, vnp);
1414   }
1415 
1416   return (hvnp);
1417 
1418 } /* GetDescr */
1419 //LCOV_EXCL_STOP
1420 
GetDescrNoTitles(ValNodePtr PNTR descr)1421 static ValNodePtr GetDescrNoTitles (ValNodePtr PNTR descr)
1422 {
1423   ValNodePtr  vnp, hvnp = NULL;
1424 
1425   vnp = ValNodeExtractList(descr, Seq_descr_org);
1426   if (vnp != NULL) {
1427     hvnp = ValNodeLink(&hvnp, vnp);
1428   }
1429 
1430   if ( check_GIBB(*descr)) {
1431     //LCOV_EXCL_START
1432     //all Seq_descr_modif descriptors were removed upstream by StripOld
1433     vnp = ValNodeExtractList(descr, Seq_descr_modif);
1434     if (vnp != NULL) {
1435       hvnp = ValNodeLink(&hvnp, vnp);
1436     }
1437     //LCOV_EXCL_STOP
1438   }
1439   vnp = ValNodeExtractList(descr, Seq_descr_comment);
1440   if (vnp != NULL) {
1441     hvnp = ValNodeLink(&hvnp, vnp);
1442   }
1443 
1444   vnp = ValNodeExtractList(descr, Seq_descr_pub);
1445   if (vnp != NULL) {
1446     hvnp = ValNodeLink(&hvnp, vnp);
1447   }
1448 
1449   vnp = ValNodeExtractList(descr, Seq_descr_update_date);
1450   if (vnp != NULL) {
1451     hvnp = ValNodeLink(&hvnp, vnp);
1452   }
1453 
1454   return (hvnp);
1455 
1456 } /* GetDescrNoTitles */
1457 
1458 //LCOV_EXCL_START
1459 //All Seq_descr_modif descriptors were removed upstream by StripOld
1460 /*------------------------ check_GIBB() --------------------------*/
1461 /*****************************************************************************
1462  *  check_GIBB:
1463  *                                                                    8-12-93
1464  ******************************************************************************/
check_GIBB(ValNodePtr descr)1465 Boolean check_GIBB(ValNodePtr descr)
1466 {
1467   ValNodePtr  vnp, modif;
1468   Int4        gmod;
1469 
1470   if (descr == NULL) {
1471     return FALSE;
1472   }
1473   for (vnp = descr; vnp && vnp->choice != Seq_descr_modif; vnp = vnp->next)
1474     continue;
1475   if (vnp == NULL) {
1476     return FALSE;
1477   }
1478   modif = (ValNodePtr) vnp->data.ptrvalue;
1479   if (modif == NULL) {
1480     return FALSE;
1481   }
1482   gmod = modif->data.intvalue;
1483   if (gmod == Seq_descr_GIBB_mod_dna || gmod == Seq_descr_GIBB_mod_rna ||
1484       gmod == Seq_descr_GIBB_mod_est || gmod == Seq_descr_GIBB_mod_complete
1485       || gmod == Seq_descr_GIBB_mod_partial) {
1486     return FALSE;
1487   }
1488   return TRUE;
1489 }
1490 
1491 //used only by ChkSegset (segsets only)
1492 /*----------------------------- SrchSegChoice() --------------------------*/
1493 /*****************************************************************************
1494  *  SrchSegChoice:
1495  ******************************************************************************/
SrchSegChoice(SeqEntryPtr sep,Uint1 choice)1496 ValNodePtr SrchSegChoice(SeqEntryPtr sep, Uint1 choice)
1497 {
1498   BioseqPtr   bsp;
1499   ValNodePtr  hvnp = NULL;
1500 
1501   if (sep == NULL) {
1502     return NULL;
1503   }
1504   if (IS_Bioseq(sep)) {
1505     bsp = (BioseqPtr)(sep->data.ptrvalue);
1506     /* first bioseq from parts */
1507     if (CheckSegDescrChoice(sep, choice)) {     /*identical */
1508       hvnp = ValNodeExtractList(&(bsp->descr), choice);
1509       CleanUpSeqDescrChoice(sep->next, choice);
1510     }
1511   }
1512   return (hvnp);
1513 
1514 } /* SrchSegChoice */
1515 
1516 // Used for segsets
1517 /*---------------------------- SrchSegSeqMol() --------------------------*/
1518 /*************************************************************************
1519  *  SrchSegSeqMol:
1520  *                                                             5-14-93
1521  **************************************************************************/
SrchSegSeqMol(SeqEntryPtr sep)1522 void SrchSegSeqMol(SeqEntryPtr sep)
1523 {
1524   BioseqPtr    bsp = NULL;
1525   SeqEntryPtr  cursep;
1526   Uint1        mol;
1527   /*
1528   CharPtr     str1, str2;
1529   */
1530 
1531   if (sep == NULL || sep->next) {
1532     return;
1533   }
1534   if (IS_Bioseq(sep)) {
1535     bsp = sep->data.ptrvalue;
1536   }
1537   if (bsp == NULL) {
1538     return;
1539   }
1540   mol = bsp->mol;
1541 
1542   for (cursep = sep->next; cursep != NULL; cursep = cursep->next) {
1543     if (IS_Bioseq(sep)) {
1544       bsp = cursep->data.ptrvalue;
1545     } else {
1546       continue;
1547     }
1548     if (mol != bsp->mol) {
1549       break;
1550     }
1551   }
1552 
1553   return;
1554 
1555 } /* SrchSegSeqMol */
1556 //LCOV_EXCL_STOP
1557 
1558 /*------------------------ CheckSegDescrChoice() -------------------------*/
1559 /*****************************************************************************
1560  *  CheckSegDescrChoice:
1561  *                                                                  5-18-93
1562  ******************************************************************************/
CheckSegDescrChoice(SeqEntryPtr sep,Uint1 choice)1563 Boolean CheckSegDescrChoice(SeqEntryPtr sep, Uint1 choice)
1564 {
1565   BioseqPtr     bsp;
1566   SeqEntryPtr   cursep;
1567   ValNodePtr    vnp, mvnp;
1568   Boolean       same;
1569   Boolean       no_choice = TRUE;
1570   BioSourcePtr  biosp = NULL;
1571   OrgRefPtr     orp;
1572   CharPtr       title = NULL;
1573   DatePtr       dp = NULL;
1574   CharPtr       org = NULL;
1575   Int4          modif = -1, mol = -1;
1576   PubdescPtr    pdp = NULL;
1577 
1578   for (cursep = sep, same = TRUE;
1579        cursep != NULL && same; cursep = cursep->next) {
1580     bsp = cursep->data.ptrvalue;
1581     for (vnp = bsp->descr; vnp != NULL && vnp->choice != choice;
1582          vnp = vnp->next)
1583       continue;
1584 
1585     if (vnp == NULL) {
1586       same = FALSE;
1587       no_choice = TRUE;
1588     } else if (choice == Seq_descr_org) {
1589       //LCOV_EXCL_START
1590       //only ever called with Seq_descr_source as choice
1591       no_choice = FALSE;
1592       orp = vnp->data.ptrvalue;
1593 
1594       if (org == NULL)
1595         org = orp->taxname;
1596       else if (StringCmp(org, orp->taxname) != 0)
1597         same = FALSE;
1598       //LCOV_EXCL_STOP
1599     } else if (choice == Seq_descr_source) {
1600       no_choice = FALSE;
1601       if (biosp == NULL) {
1602         biosp = vnp->data.ptrvalue;
1603       } else if (BSComparison(biosp,
1604                               (BioSourcePtr) vnp->data.ptrvalue) != 0) {
1605         same = FALSE;
1606       }
1607 //LCOV_EXCL_START
1608 //only ever called with Seq_descr_source as choice
1609     } else if (choice == Seq_descr_mol_type) {
1610       no_choice = FALSE;
1611       if (mol == -1)
1612         mol = vnp->data.intvalue;
1613       else if (mol != vnp->data.intvalue)
1614         same = FALSE;
1615     } else if (choice == Seq_descr_modif) {
1616       no_choice = FALSE;
1617       mvnp = vnp->data.ptrvalue;
1618 
1619       if (modif == -1)
1620         modif = mvnp->data.intvalue;
1621       else if (modif != mvnp->data.intvalue)
1622         same = FALSE;
1623     }else if (choice == Seq_descr_update_date) {
1624       no_choice = FALSE;
1625       if (dp == NULL)
1626         dp = vnp->data.ptrvalue;
1627       else if (DateMatch(dp, vnp->data.ptrvalue, TRUE) != 0)
1628         same = FALSE;
1629     } else if (choice == Seq_descr_pub) {
1630       no_choice = FALSE;
1631       if (pdp == NULL)
1632         pdp = vnp->data.ptrvalue;
1633       else if (PubMatch(pdp->pub,
1634                         ((PubdescPtr)(vnp->data.ptrvalue))->pub) != 0)
1635         same = FALSE;
1636     } else if (choice == Seq_descr_title) {
1637       no_choice = FALSE;
1638       if (title == NULL)
1639         title = vnp->data.ptrvalue;
1640       else if (StringCmp(title, (CharPtr) vnp->data.ptrvalue) != 0)
1641         same = FALSE;
1642     } else {
1643       no_choice = FALSE;
1644       ErrPostEx(SEV_WARNING, 0, 2,
1645                 "Unrecognized choice: %d", choice);
1646       same = FALSE;
1647     }
1648   }
1649   if (same == FALSE && no_choice == TRUE && choice != Seq_descr_update_date) {
1650     same = TRUE;
1651   }
1652   //LCOV_EXCL_STOP
1653   return (same);
1654 
1655 } /* CheckSegDescrChoice */
1656 
1657 
StripProtXref(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1658 void StripProtXref (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1659 {
1660   BioseqPtr       bsp, prot = NULL;
1661   BioseqSetPtr    bssp;
1662   ValNodePtr      vnp = NULL;
1663   SeqAnnotPtr     sap = NULL, ap, pap = NULL;
1664   SeqFeatPtr      sfp, psfp = NULL, head;
1665   ProtRefPtr      prp, pprp = NULL;
1666   SeqFeatXrefPtr  xrp, xrpnext;
1667   SeqIdPtr        sid;
1668   SeqLocPtr       slp;
1669   TextSeqIdPtr    tsip;
1670 
1671   if (IS_Bioseq(sep)) {
1672     bsp = (BioseqPtr)(sep->data.ptrvalue);
1673     sap = bsp->annot;
1674     if (bsp != NULL) {
1675       for (sid = bsp->id; sid != NULL; sid = sid->next) {
1676         if (sid->choice != SEQID_OTHER) continue;
1677         tsip = (TextSeqIdPtr) sid->data.ptrvalue;
1678         if (tsip == NULL) continue;
1679         if (StringNCmp (tsip->accession, "NC_", 3) == 0) return;
1680         if (StringNCmp (tsip->accession, "NG_", 3) == 0) return;
1681         if (StringNCmp (tsip->accession, "NT_", 3) == 0) return;
1682         if (StringNCmp (tsip->accession, "NW_", 3) == 0) return;
1683         if (StringNCmp (tsip->accession, "AC_", 3) == 0) return;
1684       }
1685     }
1686   }
1687   else {
1688     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1689     sap = bssp->annot;
1690   }
1691   for (ap = sap; ap != NULL; ap = ap->next) {
1692     if (ap->type != 1) continue;
1693     head = (SeqFeatPtr)(ap->data);
1694     for (sfp = head; sfp; sfp = sfp->next) {
1695       if (sfp->data.choice != SEQFEAT_CDREGION) continue;
1696       prot = NULL;
1697       pap = NULL;
1698       psfp = NULL;
1699       pprp = NULL;
1700       vnp = sfp->product;
1701       if (vnp != NULL) {
1702         if (vnp->choice == SEQLOC_WHOLE) {
1703           sid = vnp->data.ptrvalue;
1704           prot = BioseqFind(sid);
1705         }
1706       }
1707       if (prot != NULL) {
1708         pap = prot->annot;
1709         if (pap != NULL) {
1710           for (psfp = pap->data; psfp; psfp=psfp->next) {
1711             if (psfp->data.choice == SEQFEAT_PROT) {
1712               pprp = psfp->data.value.ptrvalue;
1713               break;
1714             }
1715           }
1716         }
1717       }
1718       if (vnp != NULL) {         /* sfp->product != NULL */
1719 //LCOV_EXCL_START
1720 //Prot-ref xrefs already removed upstream by basic cleanup
1721         for (xrp = sfp->xref; xrp != NULL; xrp = xrpnext) {
1722           xrpnext = xrp->next;
1723           if (xrp->data.choice == SEQFEAT_PROT) {
1724             prp = xrp->data.value.ptrvalue;
1725             if (pap != NULL && pprp == NULL) {
1726               if (psfp == NULL) {
1727                 psfp = SeqFeatNew();
1728                 psfp->data.choice = SEQFEAT_PROT;
1729                 slp = ValNodeNew(NULL);
1730                 slp->choice = SEQLOC_WHOLE;
1731                 slp->data.ptrvalue = SeqIdDup(sid);
1732                 psfp->location = slp;
1733                 pap->data = tie_feat(pap->data, psfp);
1734               }
1735               psfp->data.value.ptrvalue = AsnIoMemCopy(prp,
1736                                                        (AsnReadFunc) ProtRefAsnRead,
1737                                                        (AsnWriteFunc) ProtRefAsnWrite);
1738             }
1739             sfp->xref = remove_xref(sfp->xref, xrp);
1740           }
1741         }
1742 //LCOV_EXCL_STOP
1743       }
1744     }
1745     ap->data = head;
1746   }
1747   if (IS_Bioseq(sep)) {
1748     bsp = (BioseqPtr)(sep->data.ptrvalue);
1749     bsp->annot = sap;
1750   }
1751   else {
1752     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1753     bssp->annot = sap;
1754   }
1755   return;
1756 }
1757 
1758 //LCOV_EXCL_START
1759 //obsolete, and possibly dangerous
GetAnticodonFromObject(SeqFeatPtr sfp)1760 static SeqLocPtr GetAnticodonFromObject(SeqFeatPtr sfp)
1761 {
1762   UserObjectPtr  usop;
1763   UserFieldPtr   ufp;
1764   Int4Ptr        ints;
1765   SeqLocPtr      slp;
1766   SeqIntPtr      sip;
1767   Int4           from = 0, to = 0;
1768 
1769   if (sfp == NULL) {
1770     return NULL;
1771   }
1772   if ((usop = sfp->ext) == NULL) {
1773     return NULL;
1774   }
1775   if (StringICmp (usop->_class, "NCBI") != 0) {
1776     return NULL;
1777   }
1778   ufp = usop->data;
1779   if (ufp && ufp->choice == 8) {  /* ints */
1780     ints = (Int4Ptr) ufp->data.ptrvalue;
1781     from = ints[0];
1782     to = ints[1];
1783   }
1784   sip = SeqIntNew();
1785   sip->from = from;
1786   sip->to = to;
1787   sip->id = SeqIdDup(SeqLocId(sfp->location));
1788   slp = ValNodeNew(NULL);
1789   slp->choice = SEQLOC_INT;
1790   slp->data.ptrvalue = sip;
1791   sfp->ext = usop->next /* NULL */;
1792   UserObjectFree (usop);
1793   return slp;
1794 
1795 }
1796 //LCOV_EXCL_STOP
1797 
1798 /*--------------------------- CheckMaps() --------------------------*/
1799 /***************************************************************************
1800  *   CheckMaps:
1801  *   -- find all /map and Gene-ref
1802  *        if all maps are the same put it to Biosource and remove quals
1803  *   -- change User-Object anticodon in tRNA to SeqLoc
1804  ****************************************************************************/
CheckMaps(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1805 void CheckMaps (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1806 {
1807   BioSourcePtr   biop;
1808   BioseqPtr      bsp;
1809   BioseqSetPtr   bssp;
1810   ValNodePtr     descr;
1811   SeqAnnotPtr    sap = NULL, ap;
1812   CharPtr        qval;
1813   SeqFeatPtr     sfp;
1814   GeneRefPtr     grp;
1815   QualMapPtr     qmp;
1816   RnaRefPtr      rrp;
1817   SubSourcePtr   ssp;
1818   tRNAPtr        trna;
1819   GBQualPtr      q, qnext;
1820 
1821   qmp = data;
1822   if (qmp->same == FALSE) {
1823     return;
1824   }
1825   if (IS_Bioseq(sep)) {
1826     bsp = (BioseqPtr)(sep->data.ptrvalue);
1827     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
1828       return;
1829     descr = bsp->descr;
1830     sap = bsp->annot;
1831   }
1832   else {
1833     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1834     descr = bssp->descr;
1835     sap = bssp->annot;
1836   }
1837 
1838   while (descr != NULL) {
1839     if (descr->choice == Seq_descr_source) {
1840       biop = (BioSourcePtr) descr->data.ptrvalue;
1841       if (biop != NULL) {
1842         for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
1843           if (ssp->subtype == SUBSRC_map && ssp->name != NULL) {
1844             if (qmp->name == NULL) {
1845               qmp->name = StringSave (ssp->name);
1846             } else if (StringCmp (qmp->name, ssp->name) != 0) {
1847               qmp->same = FALSE;
1848             }
1849           }
1850         }
1851       }
1852     }
1853     descr = descr->next;
1854   }
1855 
1856   /* look for all the same maploc and place it to SubSource*/
1857   for (ap = sap; ap != NULL; ap = ap->next) {
1858     if (ap->type != 1) {
1859       continue;
1860     }
1861     for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
1862       if ((qval = get_qvalue(sfp->qual, "map")) != NULL) {
1863         if (qmp->name == NULL) {
1864           qmp->name = StringSave(qval);
1865         } else if (StringCmp(qmp->name, qval) != 0) {
1866           qmp->same = FALSE;
1867           break;
1868         }
1869       }
1870       if (sfp->data.choice == SEQFEAT_GENE) {
1871         grp = sfp->data.value.ptrvalue;
1872         if ((qval = grp->maploc) != NULL) {
1873           if (qmp->name == NULL) {
1874             qmp->name = StringSave(qval);
1875           } else if (StringCmp(qmp->name, qval) != 0) {
1876             qmp->same = FALSE;
1877             break;
1878           }
1879         }
1880       }
1881       if (sfp->data.choice == SEQFEAT_RNA) {
1882         rrp = sfp->data.value.ptrvalue;
1883         if (rrp->type == 3 && rrp->ext.choice == 2) {
1884           trna = rrp->ext.value.ptrvalue;
1885           if (sfp->ext != NULL && trna->anticodon == NULL) {
1886             //LCOV_EXCL_START
1887             // obsolete and possibly dangerous
1888             trna->anticodon = GetAnticodonFromObject(sfp);
1889             for (q = sfp->qual; q; q = qnext) {
1890               qnext = q->next;
1891               if (StringCmp(q->qual, "anticodon") == 0) {
1892                 sfp->qual = remove_qual(sfp->qual, q);
1893               }
1894             }
1895             //LCOV_EXCL_STOP
1896           }
1897         }
1898       }
1899     }
1900   }
1901   return;
1902 }
1903 
1904 //LCOV_EXCL_START
StripMaps(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1905 void StripMaps(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1906 {
1907   BioseqPtr       bsp;
1908   BioseqSetPtr    bssp;
1909   BioSourcePtr    biosp = NULL;
1910   SubSourcePtr    ssp;
1911   ValNodePtr      descr = NULL, vnp;
1912   SeqAnnotPtr     sap = NULL, ap;
1913   CharPtr         qval = NULL;
1914   SeqFeatPtr      sfp;
1915   GeneRefPtr      grp;
1916   QualMapPtr      qmp;
1917   SeqFeatXrefPtr  xrp;
1918 
1919   qmp = data;
1920   if (qmp->same == FALSE || qmp->name == NULL) {
1921     return;
1922   }
1923   if (IS_Bioseq(sep)) {
1924     bsp = (BioseqPtr)(sep->data.ptrvalue);
1925     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
1926       return;
1927     descr = bsp->descr;
1928     sap = bsp->annot;
1929   }
1930   else {
1931     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1932     descr = bssp->descr;
1933     sap = bssp->annot;
1934   }
1935   for ( vnp = descr; vnp != NULL; vnp = vnp->next) {
1936     if (vnp->choice == Seq_descr_source) {
1937       biosp = vnp->data.ptrvalue;
1938       break;
1939     }
1940   }
1941   for (ap = sap; ap != NULL; ap = ap->next) {
1942     if (ap->type == 1) {
1943       for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
1944         qval = qvalue_extract(&(sfp->qual), "map");
1945         if(qval != NULL)
1946         {
1947           MemFree(qval);
1948           qval = NULL;
1949         }
1950         if (sfp->data.choice == SEQFEAT_GENE) {
1951           grp = sfp->data.value.ptrvalue;
1952           if(grp->maploc != NULL)
1953           {
1954             MemFree(grp->maploc);
1955             grp->maploc = NULL;
1956           }
1957         }
1958         for (xrp = sfp->xref; xrp != NULL; xrp = xrp->next) {
1959           if (xrp->data.choice == SEQFEAT_GENE) {
1960             grp = xrp->data.value.ptrvalue;
1961             if(grp->maploc != NULL)
1962             {
1963               MemFree(grp->maploc);
1964               grp->maploc = NULL;
1965             }
1966           }
1967         }
1968       }
1969     }
1970   }
1971   if (biosp != NULL) {  /* has biosource */
1972     ssp = SubSourceNew();
1973     ssp->subtype = 2;    /*map */
1974     ssp->name = StringSave(qmp->name);
1975     biosp->subtype = tie_next_subtype(biosp->subtype, ssp);
1976   }
1977   /*    if (qval) {
1978    MemFree(qval);
1979    }*/
1980   return;
1981 }
1982 //LCOV_EXCL_STOP
1983 
GBQualPresent(CharPtr ptr,GBQualPtr gbqual)1984 static Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
1985 
1986 {
1987   Boolean    present=FALSE;
1988   GBQualPtr  qual;
1989 
1990   for (qual=gbqual; qual != NULL; qual=qual->next)
1991     if (StringCmp(ptr, qual->qual) == 0)
1992     {
1993       present = TRUE;
1994       break;
1995     }
1996 
1997   return present;
1998 }
1999 
ExamineGBQual(CharPtr ptr,GBQualPtr gbqual)2000 static CharPtr ExamineGBQual (CharPtr ptr, GBQualPtr gbqual)
2001 
2002 {
2003   GBQualPtr  qual;
2004 
2005   for (qual = gbqual; qual != NULL; qual = qual->next) {
2006     if (StringCmp (ptr, qual->qual) == 0) return qual->val;
2007   }
2008 
2009   return NULL;
2010 }
2011 
MapsToGenref(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2012 void MapsToGenref (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2013 {
2014   BioseqPtr     bsp;
2015   BioseqSetPtr  bssp;
2016   ValNodePtr    descr = NULL, head, last, vnp;
2017   SeqAnnotPtr   sap = NULL, ap;
2018   CharPtr       qval= NULL, name;
2019   Boolean       same;
2020   SeqFeatPtr    sfp, cur;
2021   SeqLocPtr     loc;
2022   GeneRefPtr    grp;
2023 
2024   if (IS_Bioseq(sep)) {
2025     bsp = (BioseqPtr)(sep->data.ptrvalue);
2026     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2027       return;
2028     descr = bsp->descr;
2029     sap = bsp->annot;
2030   }
2031   else {
2032     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2033     descr = bssp->descr;
2034     sap = bssp->annot;
2035   }
2036 
2037   /* maps are different */
2038   for (ap = sap; ap != NULL; ap = ap->next) {
2039     if (ap->type == 1) {
2040       for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
2041         if (sfp->data.choice == SEQFEAT_GENE) {
2042           grp = sfp->data.value.ptrvalue;
2043           name = NULL;
2044           head = NULL;
2045           last = NULL;
2046           same = TRUE;
2047           for (cur = (SeqFeatPtr)(ap->data); cur; cur = cur->next) {
2048             if ((GBQualPresent("map", cur->qual)) == FALSE) {
2049               continue;
2050             }
2051             if (SeqLocAinB(cur->location, sfp->location) < 0 ) {
2052               continue;
2053             }
2054             vnp = ValNodeAddPointer (&last, 0, cur);
2055             if (head == NULL) {
2056               head = vnp;
2057             }
2058             last = vnp;
2059           }
2060           for (vnp = head; vnp != NULL; vnp = vnp->next) {
2061             cur = (SeqFeatPtr) vnp->data.ptrvalue;
2062             qval = ExamineGBQual("map", (cur->qual));
2063             if (name == NULL) {
2064               name = qval;
2065             } else if (StringICmp (name, qval) != 0) {
2066               same = FALSE;
2067             }
2068           }
2069           if (same && name != NULL) {
2070             if (grp->maploc == NULL && name != NULL) {
2071               grp->maploc = StringSave(name);
2072             }
2073             name = NULL;
2074             loc = NULL;
2075             for (vnp = head; vnp != NULL; vnp = vnp->next) {
2076               cur = (SeqFeatPtr) vnp->data.ptrvalue;
2077               if (cur == NULL) continue;
2078               qval = qvalue_extract(&(cur->qual), "map");
2079               MemFree (qval);
2080             }
2081           }
2082           ValNodeFree (head);
2083         } /* if SEQFEAT_GENE */
2084       }
2085     } /* if ftable */
2086   }
2087   return;
2088 }
2089 
CheckMinPub(ValNodePtr pub,Boolean is_ref_seq_prot)2090 static Boolean CheckMinPub(ValNodePtr pub, Boolean is_ref_seq_prot)
2091 {
2092   CitGenPtr  gen;
2093 
2094   if (pub == NULL) {
2095     return TRUE;
2096   }
2097   if (pub->choice == PUB_Muid || pub->choice == PUB_PMid) {
2098     if (pub->next == NULL) {
2099       if (is_ref_seq_prot) return FALSE;
2100       return TRUE;
2101     } else {
2102       return (CheckMinPub(pub->next, is_ref_seq_prot));
2103     }
2104   }
2105   if (pub->choice == PUB_Gen) {
2106     gen = pub->data.ptrvalue;
2107     if (gen->cit != NULL && gen->journal == NULL && gen->authors == NULL
2108         && gen->volume == NULL && gen->pages == NULL) {
2109       if (pub->next == NULL) {
2110         return TRUE;
2111       } else {
2112         return (CheckMinPub(pub->next, FALSE));
2113       }
2114     }
2115   }
2116   return FALSE;
2117 }
2118 
OkayToFuseRemarks(CharPtr com1,CharPtr com2)2119 static Boolean OkayToFuseRemarks (CharPtr com1, CharPtr com2)
2120 
2121 {
2122   if (com1 != NULL && com2 != NULL) {
2123     if (StringICmp (com1, com2) != 0) return FALSE;
2124   }
2125 
2126   return TRUE;
2127 }
2128 
AddToListEx(ValNodePtr list,ValNodePtr check,PubdescPtr pdp,Boolean is_ref_seq_prot)2129 static ValNodePtr AddToListEx (ValNodePtr list, ValNodePtr check, PubdescPtr pdp, Boolean is_ref_seq_prot)
2130 {
2131   ValNodePtr    v, vnext;
2132   PubdescPtr    vpdp;
2133   PubStructPtr  psp;
2134   ValNodePtr    pubequ1 = NULL, pubequ2 = NULL;
2135   Boolean       is_1;
2136 
2137   if (pdp == NULL) {
2138     return NULL;
2139   }
2140   for (v = check; v != NULL; v = v->next) {
2141     psp = v->data.ptrvalue;
2142     if (psp->start != 2) {
2143       continue;
2144     }
2145     if (PubLabelMatchEx (psp->pub, pdp->pub, TRUE) == 0) {
2146       return list;
2147     }
2148   }
2149   if (pdp->name == NULL && pdp->fig == NULL && pdp->fig == NULL) {
2150     if (CheckMinPub(pdp->pub, is_ref_seq_prot) == TRUE) {   /* do not add minimum pub */
2151       return list;
2152     }
2153   }
2154   for (v = list; v != NULL; v = vnext) {
2155     vnext = v->next;
2156     vpdp = v->data.ptrvalue;
2157     if (vpdp->pub->next != NULL) {
2158       pubequ1 = SeqDescrNew(NULL);
2159       is_1 = TRUE;
2160       pubequ1->choice = PUB_Equiv;
2161       pubequ1->data.ptrvalue = vpdp->pub;
2162     } else {
2163       is_1 = FALSE;
2164       pubequ1 = vpdp->pub;
2165     }
2166     if (pdp->pub->next != NULL) {
2167       pubequ2 = SeqDescrNew(NULL);
2168       pubequ2->choice = PUB_Equiv;
2169       pubequ2->data.ptrvalue = pdp->pub;
2170     } else {
2171       pubequ2 = pdp->pub;
2172     }
2173     if (PubLabelMatchEx (pubequ1, pubequ2, TRUE) == 0 && OkayToFuseRemarks (pdp->comment, vpdp->comment)) {
2174       if (pdp->reftype == 2 && vpdp->reftype == 1) {
2175         vpdp->reftype = 2;
2176       }
2177       if (pdp->reftype == 1 && vpdp->reftype == 2) {
2178         pdp->reftype = 2;
2179       }
2180       if (vpdp->comment != NULL && pdp->comment == NULL) {
2181         pdp->comment = StringSave (vpdp->comment);
2182       } else if (vpdp->comment == NULL && pdp->comment != NULL) {
2183         vpdp->comment = StringSave (pdp->comment);
2184       }
2185       if (SelectBestPub(pubequ1, pubequ2) >= 0) {
2186         if (is_1) {
2187           ValNodeFree(pubequ1);
2188         }
2189         if (pdp->pub->next != NULL) {
2190           ValNodeFree(pubequ2);
2191         }
2192         return list;
2193       }
2194       PubdescFree((PubdescPtr) (v->data.ptrvalue));
2195       list = remove_node(list, v);
2196     }
2197     if (is_1) {
2198       ValNodeFree(pubequ1);
2199     }
2200     if (pdp->pub->next != NULL) {
2201       ValNodeFree(pubequ2);
2202     }
2203   }
2204   if (pdp && pdp->pub) {
2205     v = SeqDescrNew(NULL);
2206     v->choice = Seq_descr_pub;
2207     v->data.ptrvalue = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
2208                                     (AsnWriteFunc) PubdescAsnWrite);
2209   }
2210   list = ValNodeLink(&list, v);
2211   /*  may be sort ???? */
2212   return list;
2213 }
2214 
AddToList(ValNodePtr list,ValNodePtr check,PubdescPtr pdp)2215 ValNodePtr AddToList(ValNodePtr list, ValNodePtr check, PubdescPtr pdp)
2216 {
2217   return AddToListEx (list, check, pdp, TRUE);
2218 }
2219 
2220 //LCOV_EXCL_START
2221 //this cleanup takes place in BasicCleanup
CheckCitSubNew(ValNodePtr vnp)2222 void CheckCitSubNew(ValNodePtr vnp)
2223 {
2224   CitSubPtr    csp;
2225   AuthListPtr  alp;
2226   ImprintPtr   imp;
2227 
2228   if (vnp == NULL)
2229     return;
2230   if (vnp->choice != PUB_Sub)
2231     return;
2232   csp = (CitSubPtr) vnp->data.ptrvalue;
2233   if (csp != NULL) {
2234     alp = csp->authors;
2235     imp = csp->imp;
2236     if (alp != NULL && alp->affil == NULL &&
2237         imp != NULL && imp->pub != NULL) {
2238       alp->affil = imp->pub;
2239       imp->pub = NULL;
2240     }
2241     if (csp->date == NULL && imp != NULL && imp->date != NULL) {
2242       csp->date = imp->date;
2243       imp->date = NULL;
2244     }
2245     if (imp != NULL && imp->pub == NULL) {
2246       csp->imp = ImprintFree (csp->imp);
2247     }
2248   }
2249   return;
2250 }
2251 //LCOV_EXCL_STOP
2252 
ChangeCitSub(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2253 void ChangeCitSub (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2254 {
2255   BioseqPtr     bsp = NULL;
2256   BioseqSetPtr  bssp = NULL;
2257   ValNodePtr    descr = NULL, vnp, v;
2258   PubdescPtr    pdp;
2259 
2260   if (IS_Bioseq(sep)) {
2261     bsp = (BioseqPtr)(sep->data.ptrvalue);
2262     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2263       return;
2264     descr = bsp->descr;
2265   }
2266   else {
2267     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2268     descr = bssp->descr;
2269   }
2270   for (vnp = descr; vnp; vnp=vnp->next) {
2271     if (vnp->choice != Seq_descr_pub) {
2272       continue;
2273     }
2274     if ((pdp = vnp->data.ptrvalue) == NULL) {
2275       continue;
2276     }
2277     for (v = pdp->pub; v; v=v->next) {
2278       if (v->choice == PUB_Sub) {
2279         CheckCitSubNew(v);
2280       }
2281     }
2282   }
2283 }
2284 
2285 //LCOV_EXCL_START
2286 /***************************************************************************
2287  *   NewPubs:
2288  *   -- find all ImpFeat "sites"
2289  *        change to pubdesc with reftype 'sites'
2290  *    -- find all other sfp->cit
2291  *        change to pubdesc with reftype 'feats'
2292  *    -- pubs are moved from SeqAnnot to Seqdescr on the same level
2293  ****************************************************************************/
NewPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2294 void NewPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2295 {
2296   BioseqPtr     bsp = NULL;
2297   BioseqSetPtr  bssp = NULL;
2298   ValNodePtr    descr = NULL, pubset, tmp, pubequ;
2299   ValNodePtr    next_pubequ, pub, min_pub;
2300   SeqAnnotPtr   sap = NULL, ap, apnext;
2301   SeqFeatPtr    sfp, cur, curnext;
2302   ImpFeatPtr    ifp;
2303   PubdescPtr    pubdesc;
2304   ValNodePtr    check = NULL, np_list = NULL;
2305   SeqIdPtr      sip;
2306   Boolean       is_ref_seq_prot = FALSE;
2307 
2308   if (IS_Bioseq(sep)) {
2309     bsp = (BioseqPtr)(sep->data.ptrvalue);
2310     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2311       return;
2312     descr = bsp->descr;
2313     sap = bsp->annot;
2314     if (ISA_aa (bsp->mol)) {
2315       for (sip = bsp->id; sip != NULL; sip = sip->next) {
2316         if (sip->choice == SEQID_OTHER) {
2317           is_ref_seq_prot = TRUE;
2318         }
2319       }
2320     }
2321   }
2322   else {
2323     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2324     descr = bssp->descr;
2325     sap = bssp->annot;
2326   }
2327   tmp = ValNodeExtractList(&descr, Seq_descr_pub);
2328   if (tmp != NULL) {
2329     np_list = ValNodeLink(&np_list, tmp);
2330   }
2331   for (ap = sap; ap != NULL; ap = apnext) {
2332     apnext = ap->next;
2333     if (ap->type != 1) {
2334       continue;
2335     }
2336     sfp = (SeqFeatPtr)(ap->data);
2337     for (cur = sfp; cur; cur = curnext) {
2338       curnext = cur->next;
2339       if (cur->cit == NULL) {
2340         continue;
2341       }
2342       pubset = cur->cit;
2343       pub = NULL;
2344       min_pub = NULL;
2345       pubequ = pubset->data.ptrvalue;
2346       while (pubequ) {
2347         next_pubequ = pubequ->next;
2348         pubdesc = PubdescNew();
2349         if (pubequ->choice == PUB_Equiv) {
2350           pubdesc->pub = pubequ->data.ptrvalue;
2351         } else {
2352           pubdesc->pub = pubequ;
2353         }
2354         if (cur->data.choice == SEQFEAT_IMP) {
2355           ifp = cur->data.value.ptrvalue;
2356           if (StringCmp(ifp->key, "Site-ref") == 0) {
2357             pubdesc->reftype = 1; /* sites */
2358             np_list = AddToListEx (np_list, check, pubdesc, is_ref_seq_prot);
2359             min_pub = MinimizePub(pubequ);
2360             pub = tie_next(pub, min_pub);
2361             MemFree(pubdesc);
2362           } else {
2363             pubdesc->reftype = 2;
2364             np_list = AddToListEx (np_list, check, pubdesc, is_ref_seq_prot);
2365             min_pub = MinimizePub(pubequ);
2366             pub = tie_next(pub, min_pub);
2367             MemFree(pubdesc);
2368           }
2369         } else {
2370           pubdesc->reftype = 2;
2371           np_list = AddToListEx (np_list, check, pubdesc, is_ref_seq_prot);
2372           min_pub = MinimizePub(pubequ);
2373           pub = tie_next(pub, min_pub);
2374           MemFree(pubdesc);
2375         }
2376         PubFree(pubequ);
2377         pubequ = next_pubequ;
2378       }
2379       if (pub && pubset) {
2380         pubset->data.ptrvalue = pub;
2381       }
2382     }
2383     ap->data = sfp;
2384   }
2385 
2386   for (ap = sap; ap != NULL; ap = apnext) {
2387     apnext = ap->next;
2388     /* now keep empty annot if annot_descr present */
2389     if (ap->data == NULL && /* ap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (ap)) {
2390       sap = remove_annot(sap, ap);
2391     }
2392   }
2393 
2394   if (bssp != NULL) {
2395     descr = tie_next(descr, np_list);
2396     bssp->descr = descr;
2397     bssp->annot = sap;
2398   } else {
2399     descr = tie_next(descr, np_list);
2400     bsp->descr = descr;
2401     bsp->annot = sap;
2402   }
2403   return;
2404 }
2405 
CmpPub(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2406 void CmpPub (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2407 {
2408   BioseqPtr        bsp;
2409   PubdescPtr       pdp, vpdp;
2410   ValNodePtr       pubequ1, pubequ2, v;
2411   PubdescPtr PNTR  ppdp;
2412   Boolean          is_1;
2413 
2414   ppdp = data;
2415   pdp = *ppdp;
2416   if (pdp == NULL) {
2417     return;
2418   }
2419   if (!IS_Bioseq(sep)) {
2420     return;
2421   }
2422   bsp = (BioseqPtr)(sep->data.ptrvalue);
2423   if (bsp->mol == Seq_mol_aa) {
2424     return;
2425   }
2426   for(v = bsp->descr; v; v=v->next) {
2427     if (v->choice != Seq_descr_pub) {
2428       continue;
2429     }
2430     vpdp = v->data.ptrvalue;
2431     if (vpdp->pub->next != NULL) {
2432       pubequ1 = SeqDescrNew(NULL);
2433       is_1 = TRUE;
2434       pubequ1->choice = PUB_Equiv;
2435       pubequ1->data.ptrvalue = vpdp->pub;
2436     } else {
2437       is_1 = FALSE;
2438       pubequ1 = vpdp->pub;
2439     }
2440     if (pdp->pub->next != NULL) {
2441       pubequ2 = SeqDescrNew(NULL);
2442       pubequ2->choice = PUB_Equiv;
2443       pubequ2->data.ptrvalue = pdp->pub;
2444     } else {
2445       pubequ2 = pdp->pub;
2446     }
2447     if (PubMatch(pubequ1, pubequ2) == 0) {
2448       if (is_1) {
2449         ValNodeFree(pubequ1);
2450       }
2451       if (pdp->pub->next != NULL) {
2452         ValNodeFree(pubequ2);
2453       }
2454       break;
2455     }
2456     if (is_1) {
2457       ValNodeFree(pubequ1);
2458     }
2459     if (pdp->pub->next != NULL) {
2460       ValNodeFree(pubequ2);
2461     }
2462   }
2463   if (v == NULL) {
2464     PubdescFree(*ppdp);
2465     *ppdp = NULL;
2466   }
2467 
2468   return;
2469 }
2470 //LCOV_EXCL_STOP
2471 
2472 /***********************************************************************
2473  *    delete pubs from Bioseqs if they are already on the top level
2474  *    don't delete Pubdesc if additional info (name fig, num etc) is present
2475  ************************************************************************/
DeletePubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2476 void DeletePubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2477 {
2478   BioseqPtr   bsp;
2479   PubdescPtr  pdp, vpdp;
2480   ValNodePtr  pubequ1, pubequ2, v, vnext, descr = NULL;
2481   Boolean     is_1, is_2;
2482 
2483 
2484   pdp = data;
2485   if (pdp == NULL) {
2486     return;
2487   }
2488   if (!IS_Bioseq(sep)) {
2489     return;
2490   }
2491   bsp = (BioseqPtr)(sep->data.ptrvalue);
2492   if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2493     return;
2494   /*
2495    if (bsp->mol == Seq_mol_aa) {
2496    return;
2497    }
2498    */
2499   descr = bsp->descr;
2500   for(v = descr; v; v = vnext) {
2501     vnext = v->next;
2502     if (v->choice != Seq_descr_pub) {
2503       continue;
2504     }
2505     vpdp = v->data.ptrvalue;   /* from the Bioseq */
2506     if (vpdp->name != NULL || vpdp->fig != NULL
2507         || vpdp->num != NULL || vpdp->maploc != NULL
2508         || vpdp->comment != NULL) {
2509       continue;
2510     }
2511     if (vpdp->pub->next != NULL) {
2512       is_1 = TRUE;
2513       pubequ1 = ValNodeNew(NULL);
2514       pubequ1->choice = PUB_Equiv;
2515       pubequ1->data.ptrvalue = vpdp->pub;
2516     } else {
2517       is_1 = FALSE;
2518       pubequ1 = vpdp->pub;
2519     }
2520     if (pdp->pub->next != NULL) {  /* from the set */
2521       is_2 = TRUE;
2522       pubequ2 = ValNodeNew(NULL);
2523       pubequ2->choice = PUB_Equiv;
2524       pubequ2->data.ptrvalue = pdp->pub;
2525     } else {
2526       is_2 = FALSE;
2527       pubequ2 = pdp->pub;
2528     }
2529     if (PubMatch(pubequ1, pubequ2) == 0) {
2530       PubdescFree((PubdescPtr) (v->data.ptrvalue));
2531       descr = remove_node(descr, v);
2532     }
2533     if (is_1) {
2534       ValNodeFree(pubequ1);
2535     }
2536     if (is_2) {
2537       ValNodeFree(pubequ2);
2538     }
2539   }
2540   bsp->descr = descr;
2541   return;
2542 }
2543 
2544 //LCOV_EXCL_START
MoveSetPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2545 void MoveSetPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2546 {
2547   BioseqPtr      bsp = NULL;
2548   BioseqSetPtr   bssp;
2549   Boolean        first;
2550   PubSetListPtr  psp;
2551   ValNodePtr     descr = NULL,v, vnext, tmp, set_list;
2552   PubdescPtr     tmp_pdp, pdp;
2553 
2554   psp = data;
2555   set_list = psp->list;
2556   first = psp->first;
2557   if (IS_Bioseq(sep) && (first == TRUE)) {
2558     bsp = sep->data.ptrvalue;
2559     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2560       return;
2561     descr = bsp->descr;
2562   } else {
2563     bssp = sep->data.ptrvalue;
2564     if (bssp->_class == 4 && bssp->seq_set != NULL) {
2565       bsp = (BioseqPtr) bssp->seq_set->data.ptrvalue;
2566       descr = bsp->descr;
2567     }
2568   }
2569   if (bsp == NULL) {
2570     return;
2571   }
2572   if (first == FALSE) {
2573     return;
2574   }
2575   for (v = descr; v; v=vnext) {
2576     vnext = v->next;
2577     if (v->choice == Seq_descr_pub) {
2578       pdp = v->data.ptrvalue;
2579       tmp_pdp = AsnIoMemCopy(pdp,
2580                              (AsnReadFunc) PubdescAsnRead, (AsnWriteFunc) PubdescAsnWrite);
2581       SeqEntryExplore(sep, &tmp_pdp, CmpPub);
2582       if (tmp_pdp != NULL) {
2583         tmp = SeqDescrNew(NULL);
2584         tmp->choice = Seq_descr_pub;
2585         tmp->data.ptrvalue = tmp_pdp;
2586         set_list = tie_next(set_list, tmp);
2587         SeqEntryExplore(sep, tmp_pdp, DeletePubs);
2588         first = FALSE;
2589       }
2590     }
2591   }
2592   psp->list = set_list;
2593   psp->first = first;
2594   data = psp;
2595   return;
2596 }
2597 //LCOV_EXCL_STOP
2598 
FindOldLineage(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2599 void FindOldLineage (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2600 {
2601   BioseqPtr     bsp;
2602   BioseqSetPtr  bssp;
2603   ValNodePtr    descr = NULL, vnp;
2604   GBBlockPtr    gb;
2605   CharPtr PNTR  linp;
2606   CharPtr       lineage;
2607 
2608   linp = (CharPtr PNTR) data;
2609   lineage = *linp;
2610   if (IS_Bioseq(sep)) {
2611     bsp = (BioseqPtr)(sep->data.ptrvalue);
2612     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2613       return;
2614     descr = bsp->descr;
2615   } else {
2616     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2617     descr = bssp->descr;
2618   }
2619   for (vnp = descr; vnp; vnp= vnp->next) {
2620     if (vnp->choice == Seq_descr_genbank) {
2621       gb = vnp->data.ptrvalue;
2622       if (gb->taxonomy) {
2623         if (*linp) {
2624           MemFree(*linp);
2625         }
2626         *linp = gb->taxonomy;
2627         gb->taxonomy = NULL;
2628       }
2629       break;
2630     }
2631   }
2632 }
2633 
2634 //LCOV_EXCL_START
NewLineage(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2635 void NewLineage (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2636 {
2637   BioseqPtr     bsp;
2638   BioseqSetPtr  bssp;
2639   ValNodePtr    descr = NULL, vnp;
2640   BioSourcePtr  biosp;
2641   OrgRefPtr     orp = NULL;
2642   OrgNamePtr    omp;
2643   CharPtr PNTR  linp;
2644   CharPtr       lineage;
2645 
2646   linp = (CharPtr PNTR) data;
2647   lineage = *linp;
2648   if (IS_Bioseq(sep)) {
2649     bsp = (BioseqPtr)(sep->data.ptrvalue);
2650     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2651       return;
2652     descr = bsp->descr;
2653   }
2654   else {
2655     bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2656     descr = bssp->descr;
2657   }
2658   for (vnp = descr; vnp; vnp= vnp->next) {
2659     if (vnp->choice == Seq_descr_source) {
2660       biosp = vnp->data.ptrvalue;
2661       orp = (OrgRefPtr) biosp->org;
2662       break;
2663     }
2664   }
2665   if (orp && lineage) {
2666     if (orp->orgname == NULL) {
2667       omp = OrgNameNew();
2668       orp->orgname = omp;
2669     }
2670     if(orp->orgname->lineage != NULL)
2671     {
2672       MemFree(orp->orgname->lineage);
2673     }
2674     orp->orgname->lineage = StringSave(lineage);
2675   }
2676 
2677 }
2678 
2679 //only for segsets
2680 /****************************************************************************
2681  *  delete_valnode:
2682  *****************************************************************************/
delete_valnode(ValNodePtr host,Uint1 choice)2683 static ValNodePtr delete_valnode(ValNodePtr host, Uint1 choice)
2684 {
2685   Boolean     first;
2686   ValNodePtr  curvnp, prevnp;
2687 
2688   for (curvnp = host, first = TRUE; curvnp != NULL
2689        && curvnp->choice != choice; curvnp = curvnp->next) {
2690 
2691     if (first) {
2692       prevnp = curvnp;
2693       first = FALSE;
2694     }
2695     else
2696       prevnp = prevnp->next;
2697   }
2698 
2699   if (curvnp == NULL) {
2700     return host;
2701   }
2702   if (first) {
2703     host = curvnp->next;
2704   } else {
2705     prevnp->next = curvnp->next;
2706   }
2707   curvnp->next = NULL;
2708 
2709   switch (choice) {
2710     case Seq_descr_org:
2711       OrgRefFree(curvnp->data.ptrvalue);
2712       break;
2713     case Seq_descr_modif:
2714       ValNodeFree(curvnp->data.ptrvalue);
2715       break;
2716     case Seq_descr_update_date:
2717       DateFree(curvnp->data.ptrvalue);
2718       break;
2719     case Seq_descr_mol_type:
2720       break;
2721     default:
2722       break;
2723   }
2724 
2725   ValNodeFree(curvnp);
2726 
2727   return host;
2728 }
2729 
2730 //only for segsets
2731 /*------------------- CleanUpSeqDescrChoice() -------------------------*/
2732 /****************************************************************************
2733  *  CleanUpSeqDescrChoice:
2734  *                                                                    5-21-93
2735  *****************************************************************************/
CleanUpSeqDescrChoice(SeqEntryPtr sep,Uint1 choice)2736 void CleanUpSeqDescrChoice(SeqEntryPtr sep, Uint1 choice)
2737 {
2738   BioseqPtr    bsp;
2739   SeqEntryPtr  cursep;
2740 
2741   for (cursep = sep; cursep != NULL; cursep = cursep->next) {
2742     bsp = cursep->data.ptrvalue;
2743 
2744     bsp->descr = delete_valnode(bsp->descr, choice);
2745   }
2746 
2747 } /* CleanUpSeqDescrChoice */
2748 
2749 //only for segsets
2750 /**********************************************************/
remove_descr(SeqDescrPtr head,SeqDescrPtr x)2751 SeqDescrPtr remove_descr(SeqDescrPtr head, SeqDescrPtr x)
2752 {
2753   SeqDescrPtr  v;
2754   SeqDescrPtr  p = NULL;
2755 
2756   if(head == NULL)
2757     return(NULL);
2758 
2759   if(x == head)
2760   {
2761     head = x->next;
2762     x->next = NULL;
2763     SeqDescFree(x);
2764     return(head);
2765   }
2766   for(v = head; v != NULL && v != x; v = v->next)
2767     p = v;
2768 
2769   if(v != NULL && p != NULL)
2770   {
2771     p->next = x->next;
2772     x->next = NULL;
2773     SeqDescFree(x);
2774   }
2775   return(head);
2776 }
2777 //LCOV_EXCL_STOP
2778 
2779 
2780 /* Cleanup functions originally from Sequin */
2781 
FindConsistentMolInfo(SeqEntryPtr sep,MolInfoPtr PNTR mipp,BoolPtr consist)2782 static void FindConsistentMolInfo (SeqEntryPtr sep, MolInfoPtr PNTR mipp, BoolPtr consist)
2783 
2784 {
2785   BioseqPtr     bsp = NULL;
2786   BioseqSetPtr  bssp = NULL;
2787   MolInfoPtr    mip;
2788   ValNodePtr    sdp = NULL;
2789 
2790   if (sep == NULL || sep->data.ptrvalue == NULL) return;
2791   if (IS_Bioseq (sep)) {
2792     bsp = (BioseqPtr) sep->data.ptrvalue;
2793     sdp = bsp->descr;
2794   } else if (IS_Bioseq_set (sep)) {
2795     bssp = (BioseqSetPtr) sep->data.ptrvalue;
2796     sdp = bssp->descr;
2797   } else return;
2798   while (sdp != NULL) {
2799     if (sdp->choice == Seq_descr_molinfo) {
2800       mip = (MolInfoPtr) sdp->data.ptrvalue;
2801       if (mip != NULL) {
2802         if (*mipp == NULL) {
2803           *mipp = mip;
2804         } else {
2805           if ((*mipp)->biomol != mip->biomol ||
2806               (*mipp)->tech != mip->tech ||
2807               (*mipp)->completeness != mip->completeness ||
2808               StringICmp ((*mipp)->techexp, mip->techexp) != 0) {
2809             *consist = FALSE;
2810           }
2811         }
2812       }
2813     }
2814     sdp = sdp->next;
2815   }
2816   if (bssp == NULL) return;
2817   for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
2818     FindConsistentMolInfo (sep, mipp, consist);
2819   }
2820 }
2821 
RemoveMolInfoCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)2822 static void RemoveMolInfoCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
2823 
2824 {
2825   BioseqPtr     bsp;
2826   BioseqSetPtr  bssp;
2827   ValNodePtr    nextsdp;
2828   Pointer PNTR  prevsdp;
2829   ValNodePtr    sdp;
2830 
2831   if (IS_Bioseq (sep)) {
2832     bsp = (BioseqPtr) sep->data.ptrvalue;
2833     sdp = bsp->descr;
2834     prevsdp = (Pointer PNTR) &(bsp->descr);
2835   } else if (IS_Bioseq_set (sep)) {
2836     bssp = (BioseqSetPtr) sep->data.ptrvalue;
2837     sdp = bssp->descr;
2838     prevsdp = (Pointer PNTR) &(bssp->descr);
2839   } else return;
2840   while (sdp != NULL) {
2841     nextsdp = sdp->next;
2842     if (sdp->choice == Seq_descr_molinfo) {
2843       *(prevsdp) = sdp->next;
2844       sdp->next = NULL;
2845       SeqDescFree (sdp);
2846     } else {
2847       prevsdp = (Pointer PNTR) &(sdp->next);
2848     }
2849     sdp = nextsdp;
2850   }
2851 }
2852 
NormalizeSegSeqMolInfo(SeqEntryPtr sep)2853 extern void NormalizeSegSeqMolInfo (SeqEntryPtr sep)
2854 
2855 {
2856   BioseqSetPtr  bssp;
2857   Boolean       consistent;
2858   MolInfoPtr    master;
2859   MolInfoPtr    mip;
2860   ValNodePtr    sdp;
2861 
2862   if (IS_Bioseq_set (sep)) {
2863     bssp = (BioseqSetPtr) sep->data.ptrvalue;
2864     if (bssp == NULL) return;
2865     if (bssp != NULL && bssp->_class != BioseqseqSet_class_segset) {
2866       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
2867         NormalizeSegSeqMolInfo (sep);
2868       }
2869       return;
2870     }
2871     if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset) {
2872       mip = NULL;
2873       consistent = TRUE;
2874       FindConsistentMolInfo (sep, &mip, &consistent);
2875       if (mip != NULL && consistent) {
2876         master = MolInfoNew ();
2877         if (master == NULL) return;
2878         master->biomol = mip->biomol;
2879         master->tech = mip->tech;
2880         master->completeness = mip->completeness;
2881         master->techexp = StringSaveNoNull (mip->techexp);
2882         SeqEntryExplore (sep, NULL, RemoveMolInfoCallback);
2883         sdp = CreateNewDescriptor (sep, Seq_descr_molinfo);
2884         if (sdp != NULL) {
2885           sdp->data.ptrvalue = (Pointer) master;
2886         }
2887       }
2888     }
2889   }
2890 }
2891 
CollectPseudoCdsProducts(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)2892 static void CollectPseudoCdsProducts (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
2893 
2894 {
2895   BioseqContextPtr  bcp;
2896   BioseqPtr         bsp;
2897   BioseqSetPtr      bssp;
2898   CharPtr           label;
2899   size_t            len;
2900   BioseqPtr         product;
2901   SeqFeatPtr        prot;
2902   ProtRefPtr        prp;
2903   Boolean           pseudo;
2904   GBQualPtr         gbqual;
2905   SeqAnnotPtr       sap;
2906   SeqFeatPtr        sfp;
2907   CharPtr           str;
2908   ValNodePtr PNTR   vnpp;
2909 
2910   if (sep == NULL || sep->data.ptrvalue == NULL) return;
2911   vnpp = (ValNodePtr PNTR) mydata;
2912   if (vnpp == NULL) return;
2913   if (IS_Bioseq (sep)) {
2914     bsp = (BioseqPtr) sep->data.ptrvalue;
2915     sap = bsp->annot;
2916   } else if (IS_Bioseq_set (sep)) {
2917     bssp = (BioseqSetPtr) sep->data.ptrvalue;
2918     sap = bssp->annot;
2919   } else return;
2920   while (sap != NULL) {
2921     if (sap->type == 1) {
2922       sfp = (SeqFeatPtr) sap->data;
2923       while (sfp != NULL) {
2924         if (sfp->data.choice == SEQFEAT_CDREGION) {
2925           pseudo = sfp->pseudo;
2926           if (! pseudo) {
2927             gbqual = sfp->qual;
2928             while (gbqual != NULL) {
2929               if (StringICmp (gbqual->qual, "pseudo") == 0) {
2930                 pseudo = TRUE;
2931               }
2932               gbqual = gbqual->next;
2933             }
2934           }
2935           if (pseudo) {
2936             product = BioseqFind (SeqLocId (sfp->product));
2937             if (product != NULL) {
2938               ValNodeAddPointer (vnpp, 0, (Pointer) product);
2939               sfp->product = SeqLocFree (sfp->product);
2940               prot = SeqMgrGetBestProteinFeature (product, NULL);
2941               if (prot == NULL) {
2942                 bcp = BioseqContextNew (product);
2943                 prot = BioseqContextGetSeqFeat (bcp, SEQFEAT_PROT, NULL, NULL, 0);
2944                 BioseqContextFree (bcp);
2945               }
2946               if (prot != NULL) {
2947                 prp = (ProtRefPtr) prot->data.value.ptrvalue;
2948                 if (prp != NULL) {
2949                   label = NULL;
2950                   if (prp->name != NULL) {
2951                     label = prp->name->data.ptrvalue;
2952                   } else if (prp->desc != NULL) {
2953                     label = prp->desc;
2954                   }
2955                   if (label != NULL) {
2956                     if (sfp->comment == NULL) {
2957                       sfp->comment = StringSaveNoNull (label);
2958                     } else {
2959                       len = StringLen (sfp->comment) + StringLen (label) + 5;
2960                       str = MemNew (sizeof (Char) * len);
2961                       StringCpy (str, sfp->comment);
2962                       StringCat (str, "; ");
2963                       StringCat (str, label);
2964                       sfp->comment = MemFree (sfp->comment);
2965                       sfp->comment = str;
2966                     }
2967                   }
2968                 }
2969               }
2970             }
2971           }
2972         }
2973         sfp = sfp->next;
2974       }
2975     }
2976     sap = sap->next;
2977   }
2978 }
2979 
CheckForEmblDdbjID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)2980 static void CheckForEmblDdbjID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
2981 
2982 {
2983   BioseqPtr  bsp;
2984   BoolPtr    isEmblOrDdbj;
2985   SeqIdPtr   sip;
2986 
2987   if (sep == NULL) return;
2988   if (IS_Bioseq (sep)) {
2989     bsp = (BioseqPtr) sep->data.ptrvalue;
2990     if (bsp == NULL) return;
2991     isEmblOrDdbj = (BoolPtr) mydata;
2992     if (isEmblOrDdbj == NULL) return;
2993     for (sip = bsp->id; sip != NULL; sip = sip->next) {
2994       switch (sip->choice) {
2995         case SEQID_EMBL :
2996         case SEQID_DDBJ :
2997         case SEQID_TPE :
2998         case SEQID_TPD :
2999           *isEmblOrDdbj = TRUE;
3000           break;
3001           break;
3002         default :
3003           break;
3004       }
3005     }
3006   }
3007 }
3008 
CheckForLclGnlOnly(BioseqPtr bsp,Pointer mydata)3009 static void CheckForLclGnlOnly (BioseqPtr bsp, Pointer mydata)
3010 
3011 {
3012   BoolPtr   lclGnlOnly;
3013   SeqIdPtr  sip;
3014 
3015   if (bsp == NULL) return;
3016   lclGnlOnly = (BoolPtr) mydata;
3017   if (lclGnlOnly == NULL) return;
3018 
3019   for (sip = bsp->id; sip != NULL; sip = sip->next) {
3020     if (sip->choice == SEQID_LOCAL) continue;
3021     if (sip->choice == SEQID_GENERAL) continue;
3022     *lclGnlOnly = FALSE;
3023   }
3024 }
3025 
CleanUpPseudoProductsEx(Uint2 entityID,SeqEntryPtr sep,Boolean doPseudo)3026 static void CleanUpPseudoProductsEx (Uint2 entityID, SeqEntryPtr sep, Boolean doPseudo)
3027 
3028 {
3029   BioseqPtr      bsp;
3030   Char           id [41];
3031   Boolean        isEmblOrDdbj = FALSE;
3032   Uint4          itemID;
3033   ValNodePtr     list;
3034   OMProcControl  ompc;
3035   ValNodePtr     vnp;
3036 
3037   if (entityID == 0 || sep == NULL) return;
3038   SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
3039   if (isEmblOrDdbj) return;
3040   list = NULL;
3041   SeqEntryExplore (sep, &list, CollectPseudoCdsProducts);
3042   for (vnp = list; vnp != NULL; vnp = vnp->next) {
3043     bsp = (BioseqPtr) vnp->data.ptrvalue;
3044     itemID = GetItemIDGivenPointer (entityID, OBJ_BIOSEQ, (Pointer) bsp);
3045     if (itemID > 0) {
3046       if (doPseudo) {
3047         MemSet ((Pointer) (&ompc), 0, sizeof (OMProcControl));
3048         ompc.do_not_reload_from_cache = TRUE;
3049         ompc.input_entityID = entityID;
3050         ompc.input_itemID = itemID;
3051         ompc.input_itemtype = OBJ_BIOSEQ;
3052         if (! DetachDataForProc (&ompc, FALSE)) {
3053           Message (MSG_POSTERR, "DetachDataForProc failed");
3054         }
3055       } else {
3056         SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
3057         ErrPostEx (SEV_WARNING, 0, 2, "Accession %s is product of pseudo CDS", id);
3058       }
3059     }
3060   }
3061   ValNodeFree (list);
3062 }
3063 
3064 //LCOV_EXCL_START
CleanUpPseudoProducts(Uint2 entityID,SeqEntryPtr sep)3065 extern void CleanUpPseudoProducts (Uint2 entityID, SeqEntryPtr sep)
3066 
3067 {
3068   CleanUpPseudoProductsEx (entityID, sep, TRUE);
3069 }
3070 //LCOV_EXCL_STOP
3071 
CleanupGenbankCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3072 extern void CleanupGenbankCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3073 
3074 {
3075   BioseqPtr      bsp;
3076   BioseqSetPtr   bssp;
3077   Boolean        empty;
3078   GBBlockPtr     gbp;
3079   ValNodePtr     nextsdp;
3080   Pointer PNTR   prevsdp;
3081   ValNodePtr     sdp;
3082 
3083   if (sep == NULL || sep->data.ptrvalue == NULL) return;
3084   if (IS_Bioseq (sep)) {
3085     bsp = (BioseqPtr) sep->data.ptrvalue;
3086     sdp = bsp->descr;
3087     prevsdp = (Pointer PNTR) &(bsp->descr);
3088   } else if (IS_Bioseq_set (sep)) {
3089     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3090     sdp = bssp->descr;
3091     prevsdp = (Pointer PNTR) &(bssp->descr);
3092   } else return;
3093   while (sdp != NULL) {
3094     nextsdp = sdp->next;
3095     empty = FALSE;
3096     if (sdp->choice == Seq_descr_genbank && sdp->data.ptrvalue != NULL) {
3097       gbp = (GBBlockPtr) sdp->data.ptrvalue;
3098       /* gbp->source = MemFree (gbp->source); */
3099       /* gbp->origin = MemFree (gbp->origin); */
3100       gbp->taxonomy = MemFree (gbp->taxonomy);
3101       if (gbp->extra_accessions == NULL && gbp->source == NULL &&
3102           gbp->keywords == NULL && gbp->origin == NULL &&
3103           gbp->date == NULL && gbp->entry_date == NULL &&
3104           gbp->div == NULL && gbp->taxonomy == NULL) {
3105         empty = TRUE;
3106       }
3107     }
3108     if (empty) {
3109       *(prevsdp) = sdp->next;
3110       sdp->next = NULL;
3111       SeqDescFree (sdp);
3112     } else {
3113       prevsdp = (Pointer PNTR) &(sdp->next);
3114     }
3115     sdp = nextsdp;
3116   }
3117 }
3118 
BarCodeTechToKeyword(BioseqPtr bsp,Pointer userdata)3119 static void BarCodeTechToKeyword (BioseqPtr bsp, Pointer userdata)
3120 
3121 {
3122   GBBlockPtr   gbp;
3123   MolInfoPtr   mip;
3124   SeqDescrPtr  sdp;
3125   ValNodePtr   vnp;
3126 
3127   if (bsp == NULL) return;
3128 
3129   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
3130   if (sdp == NULL || sdp->choice != Seq_descr_molinfo) return;
3131 
3132   mip = (MolInfoPtr) sdp->data.ptrvalue;
3133   if (mip == NULL || mip->tech != MI_TECH_barcode) return;
3134 
3135   sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_genbank, NULL);
3136   if (sdp == NULL) {
3137     gbp = GBBlockNew ();
3138     if (gbp != NULL) {
3139       sdp = SeqDescrAddPointer (&(bsp->descr), Seq_descr_genbank, (Pointer) gbp);
3140     }
3141   }
3142   if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
3143 
3144   gbp = (GBBlockPtr) sdp->data.ptrvalue;
3145   for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
3146     if (StringICmp ((CharPtr) vnp->data.ptrvalue, "BARCODE") == 0) return;
3147   }
3148 
3149   vnp = ValNodeCopyStr (NULL, 0, "BARCODE");
3150   if (vnp == NULL) return;
3151 
3152   vnp->next = gbp->keywords;
3153   gbp->keywords = vnp;
3154 }
3155 
EmptyOrNullString(CharPtr str)3156 static Boolean EmptyOrNullString (CharPtr str)
3157 
3158 {
3159   Char  ch;
3160 
3161   if (str == NULL) return TRUE;
3162   ch = *str;
3163   while (ch != '\0') {
3164     if (ch > ' ' && ch <= '~') return FALSE;
3165     str++;
3166     ch = *str;
3167   }
3168   return TRUE;
3169 }
3170 
MergeAdjacentAnnotsCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3171 extern void MergeAdjacentAnnotsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3172 
3173 {
3174   BioseqPtr     bsp;
3175   BioseqSetPtr  bssp;
3176   SeqAnnotPtr   nextsap;
3177   SeqAnnotPtr   sap;
3178   SeqFeatPtr    sfp;
3179 
3180   if (sep == NULL || sep->data.ptrvalue == NULL) return;
3181   if (IS_Bioseq (sep)) {
3182     bsp = (BioseqPtr) sep->data.ptrvalue;
3183     sap = bsp->annot;
3184   } else if (IS_Bioseq_set (sep)) {
3185     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3186     sap = bssp->annot;
3187   } else return;
3188   while (sap != NULL) {
3189     nextsap = sap->next;
3190     if (sap->type == 1 && nextsap != NULL && nextsap->type == 1) {
3191       if (sap->id == NULL && nextsap->id == NULL &&
3192           sap->name == NULL && nextsap->name == NULL &&
3193           sap->db == 0 && nextsap->db == 0 &&
3194           sap->desc == NULL && nextsap->desc == NULL &&
3195           sap->data != NULL && nextsap->data != NULL) {
3196         sfp = (SeqFeatPtr) sap->data;
3197         while (sfp->next != NULL) {
3198           sfp = sfp->next;
3199         }
3200         sfp->next = (SeqFeatPtr) nextsap->data;
3201         nextsap->data = NULL;
3202         sap->next = nextsap->next;
3203         SeqAnnotFree (nextsap);
3204         nextsap = sap->next;
3205       }
3206     }
3207     sap = nextsap;
3208   }
3209 }
3210 
HasEvidenceOrInferenceQual(SeqFeatPtr sfp)3211 static Boolean HasEvidenceOrInferenceQual (SeqFeatPtr sfp)
3212 
3213 {
3214   GBQualPtr  gbq;
3215 
3216   if (sfp == NULL) return FALSE;
3217   for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
3218     if (StringICmp (gbq->qual, "experiment") == 0) return TRUE;
3219     if (StringICmp (gbq->qual, "inference") == 0) return TRUE;
3220   }
3221   return FALSE;
3222 }
3223 
CleanupEmptyFeatCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3224 extern void CleanupEmptyFeatCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3225 
3226 {
3227   BioseqPtr      bsp;
3228   BioseqSetPtr   bssp;
3229   Boolean        empty;
3230   GeneRefPtr     grp;
3231   ImpFeatPtr     ifp;
3232   SeqAnnotPtr    nextsap;
3233   SeqFeatPtr     nextsfp;
3234   Pointer PNTR   prevsap;
3235   Pointer PNTR   prevsfp;
3236   ProtRefPtr     prp;
3237   SeqAnnotPtr    sap;
3238   SeqFeatPtr     sfp;
3239   ValNodePtr     vnp;
3240 
3241   if (sep == NULL || sep->data.ptrvalue == NULL) return;
3242   if (IS_Bioseq (sep)) {
3243     bsp = (BioseqPtr) sep->data.ptrvalue;
3244     sap = bsp->annot;
3245     prevsap = (Pointer PNTR) &(bsp->annot);
3246   } else if (IS_Bioseq_set (sep)) {
3247     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3248     sap = bssp->annot;
3249     prevsap = (Pointer PNTR) &(bssp->annot);
3250   } else return;
3251   while (sap != NULL) {
3252     nextsap = sap->next;
3253     if (sap->type == 1) {
3254       sfp = (SeqFeatPtr) sap->data;
3255       prevsfp = (Pointer PNTR) &(sap->data);
3256       while (sfp != NULL) {
3257         nextsfp = sfp->next;
3258         empty = FALSE;
3259         if (sfp->data.choice == SEQFEAT_GENE && sfp->data.value.ptrvalue != NULL) {
3260           grp = (GeneRefPtr) sfp->data.value.ptrvalue;
3261           if (EmptyOrNullString (grp->locus)) {
3262             grp->locus = MemFree (grp->locus);
3263           }
3264           if (EmptyOrNullString (grp->allele)) {
3265             grp->allele = MemFree (grp->allele);
3266           }
3267           if (EmptyOrNullString (grp->desc)) {
3268             grp->desc = MemFree (grp->desc);
3269           }
3270           if (EmptyOrNullString (grp->maploc)) {
3271             grp->maploc = MemFree (grp->maploc);
3272           }
3273           if (EmptyOrNullString (grp->locus_tag)) {
3274             grp->locus_tag = MemFree (grp->locus_tag);
3275           }
3276           if (EmptyOrNullString (grp->locus) &&
3277               EmptyOrNullString (grp->allele) &&
3278               EmptyOrNullString (grp->desc) &&
3279               EmptyOrNullString (grp->maploc) &&
3280               EmptyOrNullString (grp->locus_tag) &&
3281               grp->db == NULL && grp->syn == NULL &&
3282               (! sfp->pseudo) && (! grp->pseudo) &&
3283               (sfp->exp_ev == 0) &&
3284               (! HasEvidenceOrInferenceQual (sfp))) {
3285             empty = TRUE;
3286           }
3287           if (empty) {
3288             /* if it has a comment, convert to misc_feature */
3289             if (! EmptyOrNullString (sfp->comment)) {
3290               ifp = (ImpFeatPtr) MemNew (sizeof (ImpFeat));
3291               if (ifp != NULL) {
3292                 ifp->key = StringSave ("misc_feature");
3293                 sfp->data.choice = SEQFEAT_IMP;
3294                 sfp->data.value.ptrvalue = (Pointer) ifp;
3295                 sfp->idx.subtype = 0;
3296                 GeneRefFree (grp);
3297                 empty = FALSE;
3298               }
3299             }
3300           }
3301         } else if (sfp->data.choice == SEQFEAT_PROT && sfp->data.value.ptrvalue != NULL) {
3302           prp = (ProtRefPtr) sfp->data.value.ptrvalue;
3303           if (prp->processed != 3 && prp->processed != 4 && prp->processed != 5 &&
3304               prp->name == NULL && sfp->comment != NULL) {
3305             if (StringICmp (sfp->comment, "putative") != 0) {
3306               ValNodeAddStr (&(prp->name), 0, sfp->comment);
3307               sfp->comment = NULL;
3308             }
3309           }
3310           if (prp->processed == 2 && prp->name == NULL) {
3311             ValNodeCopyStr (&(prp->name), 0, "unnamed");
3312           }
3313           if (prp->processed != 3 && prp->processed != 4 && prp->processed != 5) {
3314             vnp = prp->name;
3315             if ((vnp == NULL || EmptyOrNullString ((CharPtr) vnp->data.ptrvalue)) &&
3316                 EmptyOrNullString (prp->desc) &&
3317                 prp->ec == NULL && prp->activity == NULL && prp->db == NULL) {
3318               empty = TRUE;
3319             }
3320           }
3321         } else if (sfp->data.choice == SEQFEAT_COMMENT && EmptyOrNullString (sfp->comment)) {
3322           empty = TRUE;
3323         }
3324         if (empty) {
3325           *(prevsfp) = sfp->next;
3326           sfp->next = NULL;
3327           SeqFeatFree (sfp);
3328         } else {
3329           prevsfp = (Pointer PNTR) &(sfp->next);
3330         }
3331         sfp = nextsfp;
3332       }
3333     }
3334     /* now keep empty annot if annot_descr present */
3335     if (sap->data == NULL && /* sap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (sap)) {
3336       *(prevsap) = sap->next;
3337       sap->next = NULL;
3338       SeqAnnotFree (sap);
3339     } else {
3340       prevsap = (Pointer PNTR) &(sap->next);
3341     }
3342     sap = nextsap;
3343   }
3344 }
3345 
RemoveBioSourceOnPopSet(SeqEntryPtr sep,OrgRefPtr master)3346 extern void RemoveBioSourceOnPopSet (SeqEntryPtr sep, OrgRefPtr master)
3347 
3348 {
3349   BioSourcePtr  biop;
3350   BioseqSetPtr  bssp;
3351   OrgRefPtr     orp;
3352   ValNodePtr    sdp;
3353 
3354   if (sep == NULL) return;
3355   if (IS_Bioseq_set (sep)) {
3356     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3357     if (bssp == NULL) return;
3358     if (bssp->_class == 7 ||
3359         (bssp->_class >= 13 && bssp->_class <= 16) ||
3360         bssp->_class == BioseqseqSet_class_wgs_set ||
3361         bssp->_class == BioseqseqSet_class_small_genome_set) { /* now on phy and mut sets */
3362       sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3363       if (sdp == NULL) return;
3364       biop = (BioSourcePtr) sdp->data.ptrvalue;
3365       if (biop == NULL) return;
3366       orp = biop->org;
3367       if (orp == NULL) return;
3368       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3369         RemoveBioSourceOnPopSet (sep, orp);
3370       }
3371       sdp = ValNodeExtract (&(bssp->descr), Seq_descr_source);
3372       SeqDescrFree (sdp);
3373       return;
3374     }
3375     //LCOV_EXCL_START
3376     //unreachable code
3377     /* if (bssp->_class == 7 || bssp->_class == 13 || bssp->_class == 15) return; */
3378     if (bssp->_class == 7) { /* also handle genbank supersets */
3379       orp = NULL;
3380       sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3381       if (sdp != NULL) {
3382         biop = (BioSourcePtr) sdp->data.ptrvalue;
3383         if (biop != NULL) {
3384           orp = biop->org;
3385         }
3386       }
3387       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3388         RemoveBioSourceOnPopSet (sep, orp);
3389       }
3390       sdp = ValNodeExtract (&(bssp->descr), Seq_descr_source);
3391       SeqDescrFree (sdp);
3392       return;
3393     }
3394     //LCOV_EXCL_STOP
3395   }
3396   if (master == NULL) return;
3397   sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3398   if (sdp != NULL) return;
3399   biop = BioSourceNew ();
3400   if (biop == NULL) return;
3401   orp = OrgRefNew ();
3402   if (orp == NULL) return;
3403   biop->org = orp;
3404   orp->taxname = StringSave (master->taxname);
3405   orp->common = StringSave (master->common);
3406   sdp = CreateNewDescriptor (sep, Seq_descr_source);
3407   if (sdp == NULL) return;
3408   sdp->data.ptrvalue = (Pointer) biop;
3409 }
3410 
RemoveMolInfoOnPopSet(SeqEntryPtr sep,MolInfoPtr master)3411 extern void RemoveMolInfoOnPopSet (SeqEntryPtr sep, MolInfoPtr master)
3412 
3413 {
3414   BioseqSetPtr  bssp;
3415   MolInfoPtr    mip;
3416   ValNodePtr    sdp;
3417 
3418   if (sep == NULL) return;
3419   if (IS_Bioseq_set (sep)) {
3420     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3421     if (bssp == NULL) return;
3422     if (bssp->_class == 7 ||
3423         (bssp->_class >= 13 && bssp->_class <= 16) ||
3424         bssp->_class == BioseqseqSet_class_wgs_set ||
3425         bssp->_class == BioseqseqSet_class_small_genome_set) {
3426       sdp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
3427       if (sdp == NULL) return;
3428       mip = (MolInfoPtr) sdp->data.ptrvalue;
3429       if (mip == NULL) return;
3430       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3431         RemoveMolInfoOnPopSet (sep, mip);
3432       }
3433       sdp = ValNodeExtract (&(bssp->descr), Seq_descr_molinfo);
3434       SeqDescrFree (sdp);
3435       return;
3436     }
3437   }
3438   if (master == NULL) return;
3439   sdp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
3440   if (sdp != NULL) return;
3441   mip = MolInfoNew ();
3442   if (mip == NULL) return;
3443   mip->biomol = master->biomol;
3444   mip->tech = master->tech;
3445   if (StringDoesHaveText (master->techexp)) {
3446     mip->techexp = StringSave (master->techexp);
3447   }
3448   mip->completeness = master->completeness;
3449   if (StringDoesHaveText (master->gbmoltype)) {
3450     mip->gbmoltype = StringSave (master->gbmoltype);
3451   }
3452   sdp = CreateNewDescriptor (sep, Seq_descr_molinfo);
3453   if (sdp == NULL) return;
3454   sdp->data.ptrvalue = (Pointer) mip;
3455 }
3456 
3457 /* NoBiosourceOrTaxonId also looks for lineage and division */
3458 //LCOV_EXCL_START
3459 // Not used for cleanup
NoBiosourceOrTaxonId(SeqEntryPtr sep)3460 extern Boolean NoBiosourceOrTaxonId (SeqEntryPtr sep)
3461 
3462 {
3463   BioSourcePtr  biop;
3464   BioseqSetPtr  bssp;
3465   DbtagPtr      dbt;
3466   Boolean       notaxid;
3467   ObjectIdPtr   oid;
3468   OrgNamePtr    onp;
3469   OrgRefPtr     orp;
3470   ValNodePtr    sdp;
3471   ValNodePtr    vnp;
3472 
3473   if (sep == NULL) return TRUE;
3474   if (IS_Bioseq_set (sep)) {
3475     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3476     if (bssp != NULL && (bssp->_class == 7 ||
3477                          (bssp->_class >= 13 && bssp->_class <= 16) ||
3478                          bssp->_class == BioseqseqSet_class_wgs_set ||
3479                          bssp->_class == BioseqseqSet_class_small_genome_set)) {
3480       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3481         if (NoBiosourceOrTaxonId (sep)) return TRUE;
3482       }
3483       return FALSE;
3484     }
3485   }
3486   sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3487   if (sdp == NULL) return TRUE;
3488   biop = (BioSourcePtr) sdp->data.ptrvalue;
3489   if (biop == NULL) return TRUE;
3490   orp = biop->org;
3491   if (orp == NULL) return TRUE;
3492   vnp = orp->db;
3493   if (vnp == NULL) return TRUE;
3494   notaxid = TRUE;
3495   while (vnp != NULL) {
3496     dbt = (DbtagPtr) vnp->data.ptrvalue;
3497     if (dbt != NULL) {
3498       if (StringCmp (dbt->db, "taxon") == 0) {
3499         oid = dbt->tag;
3500         if (oid != NULL) {
3501           if (oid->str == NULL && oid->id > 0) {
3502             notaxid = FALSE;
3503           }
3504         }
3505       }
3506     }
3507     vnp = vnp->next;
3508   }
3509   if (notaxid) return TRUE;
3510   onp = orp->orgname;
3511   if (onp == NULL) return TRUE;
3512   if (StringHasNoText (onp->lineage) || StringHasNoText (onp->div)) return TRUE;
3513   return FALSE;
3514 }
3515 
CollectGeneFeatures(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3516 static void CollectGeneFeatures(SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3517 
3518 {
3519   BioseqPtr        bsp;
3520   BioseqSetPtr     bssp;
3521   SeqAnnotPtr      sap;
3522   SeqFeatPtr       sfp;
3523   ValNodePtr PNTR  vnpp;
3524 
3525   if (sep == NULL || sep->data.ptrvalue == NULL || mydata == NULL) return;
3526   vnpp = (ValNodePtr PNTR) mydata;
3527   sap = NULL;
3528   if (IS_Bioseq (sep)) {
3529     bsp = (BioseqPtr) sep->data.ptrvalue;
3530     sap = bsp->annot;
3531   } else if (IS_Bioseq_set (sep)) {
3532     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3533     sap = bssp->annot;
3534   } else return;
3535   while (sap != NULL) {
3536     if (sap->type == 1 && sap->data != NULL) {
3537       sfp = (SeqFeatPtr) sap->data;
3538       while (sfp != NULL) {
3539         if (sfp->data.choice == SEQFEAT_GENE) {
3540           ValNodeAddPointer (vnpp, 0, (Pointer) sfp);
3541         }
3542         sfp = sfp->next;
3543       }
3544     }
3545     sap = sap->next;
3546   }
3547 }
3548 
ExtendGeneWithinNucProt(SeqEntryPtr sep)3549 static void ExtendGeneWithinNucProt (SeqEntryPtr sep)
3550 
3551 {
3552   BioseqSetPtr  bssp;
3553   ValNodePtr    vnp;
3554 
3555   if (sep == NULL) return;
3556   if (IS_Bioseq_set (sep)) {
3557     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3558     if (bssp == NULL) return;
3559     if (bssp->_class == 7 ||
3560         (bssp->_class >= 13 && bssp->_class <= 16) ||
3561         bssp->_class == BioseqseqSet_class_wgs_set ||
3562         bssp->_class == BioseqseqSet_class_small_genome_set) {
3563       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3564         ExtendGeneWithinNucProt (sep);
3565       }
3566       return;
3567     }
3568   }
3569   vnp = NULL;
3570   SeqEntryExplore (sep, (Pointer) (&vnp), CollectGeneFeatures);
3571   if (vnp != NULL && vnp->next == NULL) {
3572     SeqEntryExplore (sep, NULL, CorrectGeneFeatLocation);
3573   }
3574   ValNodeFree (vnp);
3575 }
3576 
ExtendGeneFeatIfOnMRNA(Uint2 entityID,SeqEntryPtr sep)3577 extern void ExtendGeneFeatIfOnMRNA (Uint2 entityID, SeqEntryPtr sep)
3578 
3579 {
3580   if (entityID < 1 && sep == NULL) return;
3581   if (entityID > 0 && sep == NULL) {
3582     sep = GetTopSeqEntryForEntityID (entityID);
3583   }
3584   if (sep == NULL) return;
3585   ExtendGeneWithinNucProt (sep);
3586 }
3587 //LCOV_EXCL_STOP
3588 
ConvertPubFeatDescProc(GatherObjectPtr gop)3589 static Boolean ConvertPubFeatDescProc (GatherObjectPtr gop)
3590 
3591 {
3592   BioseqPtr    bsp;
3593   size_t       len;
3594   PubdescPtr   pdp;
3595   SeqDescPtr   sdp;
3596   SeqEntryPtr  sep;
3597   SeqFeatPtr   sfp;
3598   SeqIdPtr     sip;
3599   CharPtr      str;
3600   ValNode      vn;
3601 
3602   if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3603   sfp = (SeqFeatPtr) gop->dataptr;
3604   /* look for publication features */
3605   if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB) return TRUE;
3606   /* get bioseq by feature location */
3607   sip = SeqLocId (sfp->location);
3608   bsp = BioseqFind (sip);
3609   if (bsp == NULL) return TRUE;
3610   sip = SeqIdFindBest(bsp->id, 0);
3611   if (sip == NULL) return TRUE;
3612   vn.choice = SEQLOC_WHOLE;
3613   vn.extended = 0;
3614   vn.data.ptrvalue = (Pointer) sip;
3615   vn.next = NULL;
3616   /* is feature full length? */
3617   if (SeqLocCompare (sfp->location, &vn) != SLC_A_EQ_B) return TRUE;
3618   sep = SeqMgrGetSeqEntryForData (bsp);
3619   if (sep == NULL) return TRUE;
3620   sdp = CreateNewDescriptor (sep, Seq_descr_pub);
3621   if (sdp == NULL) return TRUE;
3622   /* move publication from feature to descriptor */
3623   sdp->data.ptrvalue = sfp->data.value.ptrvalue;
3624   sfp->data.value.ptrvalue = NULL;
3625   /* flag old feature for removal */
3626   sfp->idx.deleteme = TRUE;
3627   /* move or append comment to pubdesc comment */
3628   if (sfp->comment == NULL) return TRUE;
3629   pdp = (PubdescPtr) sdp->data.ptrvalue;
3630   if (pdp == NULL) return TRUE;
3631   if (pdp->comment == NULL) {
3632     pdp->comment = sfp->comment;
3633   } else {
3634     len = StringLen (sfp->comment) + StringLen (pdp->comment) + 5;
3635     str = MemNew (sizeof (Char) * len);
3636     StringCpy (str, pdp->comment);
3637     StringCat (str, "; ");
3638     StringCat (str, sfp->comment);
3639     pdp->comment = MemFree (pdp->comment);
3640     pdp->comment = str;
3641   }
3642   sfp->comment = NULL;
3643   return TRUE;
3644 }
3645 
ConvertFullLenPubFeatToDesc(SeqEntryPtr sep)3646 extern void ConvertFullLenPubFeatToDesc (SeqEntryPtr sep)
3647 
3648 {
3649   Boolean      objMgrFilter [OBJ_MAX];
3650   SeqEntryPtr  oldscope;
3651 
3652   if (sep == NULL) return;
3653   oldscope = SeqEntrySetScope (sep);
3654 
3655   MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
3656   objMgrFilter [OBJ_SEQFEAT] = TRUE;
3657 
3658   GatherObjectsInEntity (0, OBJ_SEQENTRY, (Pointer) sep,
3659                          ConvertPubFeatDescProc, NULL, objMgrFilter);
3660 
3661   SeqEntrySetScope (oldscope);
3662   DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
3663 }
3664 
GatherConvertSourceFeatDescProc(GatherObjectPtr gop)3665 static Boolean GatherConvertSourceFeatDescProc (GatherObjectPtr gop)
3666 
3667 {
3668   SeqFeatPtr  sfp;
3669 
3670   if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3671   sfp = (SeqFeatPtr) gop->dataptr;
3672   ConvertSourceFeatDescProc (sfp, NULL);
3673   return TRUE;
3674 }
3675 
LookForTransgenic(SeqDescrPtr sdp,Pointer userdata)3676 static void LookForTransgenic (SeqDescrPtr sdp, Pointer userdata)
3677 
3678 {
3679   BioSourcePtr  biop;
3680   BoolPtr       is_trans_or_focus;
3681   SubSourcePtr  ssp;
3682 
3683   if (sdp == NULL || sdp->choice != Seq_descr_source) return;
3684   biop = (BioSourcePtr) sdp->data.ptrvalue;
3685   if (biop == NULL) return;
3686   if (biop->is_focus) {
3687     is_trans_or_focus = (BoolPtr) userdata;
3688     *is_trans_or_focus = TRUE;
3689     return;
3690   }
3691   for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
3692     if (ssp->subtype == SUBSRC_transgenic) {
3693       is_trans_or_focus = (BoolPtr) userdata;
3694       *is_trans_or_focus = TRUE;
3695       return;
3696     }
3697   }
3698 }
3699 
ConvertFullLenSourceFeatToDesc(SeqEntryPtr sep)3700 extern void ConvertFullLenSourceFeatToDesc (SeqEntryPtr sep)
3701 
3702 {
3703   Boolean      is_trans_or_focus = FALSE;
3704   Boolean      objMgrFilter [OBJ_MAX];
3705   SeqEntryPtr  oldscope;
3706 
3707   if (sep == NULL) return;
3708   VisitDescriptorsInSep (sep, (Pointer) &is_trans_or_focus, LookForTransgenic);
3709   if (is_trans_or_focus) return;
3710 
3711   oldscope = SeqEntrySetScope (sep);
3712 
3713   MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
3714   objMgrFilter [OBJ_SEQFEAT] = TRUE;
3715 
3716   GatherObjectsInEntity (0, OBJ_SEQENTRY, (Pointer) sep,
3717                          GatherConvertSourceFeatDescProc, NULL, objMgrFilter);
3718 
3719   SeqEntrySetScope (oldscope);
3720   DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
3721 }
3722 
LoopSeqEntryToAsn3(SeqEntryPtr sep,Boolean strip,Boolean correct,SeqEntryFunc taxfun,SeqEntryFunc taxmerge,Boolean gpipeMode,Boolean isEmblOrDdbj)3723 static Int4 LoopSeqEntryToAsn3 (
3724   SeqEntryPtr sep,
3725   Boolean strip,
3726   Boolean correct,
3727   SeqEntryFunc taxfun,
3728   SeqEntryFunc taxmerge,
3729   Boolean gpipeMode,
3730   Boolean isEmblOrDdbj
3731 )
3732 
3733 {
3734   BioseqSetPtr  bssp;
3735   SeqEntryPtr   oldscope;
3736   Int4          rsult;
3737   Boolean       taxserver;
3738 
3739   rsult = 0;
3740   if (IS_Bioseq_set (sep)) {
3741     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3742     if (bssp != NULL && (bssp->_class == 7 ||
3743                          (bssp->_class >= 13 && bssp->_class <= 16) ||
3744                          bssp->_class == BioseqseqSet_class_wgs_set ||
3745                          bssp->_class == BioseqseqSet_class_small_genome_set)) {
3746       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3747         rsult += LoopSeqEntryToAsn3 (sep, strip, correct, taxfun, taxmerge, gpipeMode, isEmblOrDdbj);
3748       }
3749       return rsult;
3750     }
3751   }
3752   oldscope = SeqEntrySetScope (sep);
3753   taxserver = (Boolean) (taxfun != NULL || taxmerge != NULL);
3754   rsult = SeqEntryToAsn3Ex (sep, strip, correct, taxserver, taxfun, taxmerge, gpipeMode, isEmblOrDdbj);
3755   SeqEntrySetScope (oldscope);
3756   return rsult;
3757 }
3758 
3759 //LCOV_EXCL_START
3760 //This is never called; basic cleanup takes care of converting
3761 // gene quals to gene xrefs
DeleteBadMarkedGeneXrefs(GatherObjectPtr gop)3762 static Boolean DeleteBadMarkedGeneXrefs (GatherObjectPtr gop)
3763 
3764 {
3765   GeneRefPtr           grp;
3766   SeqFeatXrefPtr       nextxref;
3767   SeqFeatXrefPtr PNTR  prevxref;
3768   SeqFeatPtr           sfp;
3769   SeqFeatPtr           sfpx;
3770   Boolean              unlink;
3771   SeqFeatXrefPtr       xref;
3772 
3773   if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3774   sfp = (SeqFeatPtr) gop->dataptr;
3775   xref = sfp->xref;
3776   prevxref = (SeqFeatXrefPtr PNTR) &(sfp->xref);
3777   while (xref != NULL) {
3778     nextxref = xref->next;
3779     unlink = FALSE;
3780     if (xref->specialCleanupFlag && xref->data.choice == SEQFEAT_GENE) {
3781       grp = (GeneRefPtr) xref->data.value.ptrvalue;
3782       if (grp != NULL) {
3783         sfpx = SeqMgrGetOverlappingGene (sfp->location, NULL);
3784         if (sfpx != NULL && sfpx->data.choice == SEQFEAT_GENE) {
3785           unlink = TRUE;
3786         }
3787       }
3788     }
3789     xref->specialCleanupFlag = FALSE;
3790     if (unlink) {
3791       *(prevxref) = xref->next;
3792       xref->next = NULL;
3793       SeqFeatXrefFree (xref);
3794     } else {
3795       prevxref = (SeqFeatXrefPtr PNTR) &(xref->next);
3796     }
3797     xref = nextxref;
3798   }
3799   return TRUE;
3800 }
3801 //LCOV_EXCL_STOP
3802 
MarkMovedGeneGbquals(GatherObjectPtr gop)3803 static Boolean MarkMovedGeneGbquals (GatherObjectPtr gop)
3804 
3805 {
3806   GBQualPtr       gbq;
3807   GeneRefPtr      grp;
3808   BoolPtr         hasMarkedGenesP;
3809   GBQualPtr       nextqual;
3810   GBQualPtr PNTR  prevqual;
3811   SeqFeatPtr      sfp;
3812   SeqFeatXrefPtr  xref;
3813 
3814   if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3815   hasMarkedGenesP = (BoolPtr) gop->userdata;
3816   if (hasMarkedGenesP == NULL) return TRUE;
3817   sfp = (SeqFeatPtr) gop->dataptr;
3818   gbq = sfp->qual;
3819   prevqual = (GBQualPtr PNTR) &(sfp->qual);
3820   while (gbq != NULL) {
3821     TrimSpacesAroundString (gbq->qual);
3822     TrimSpacesAroundString (gbq->val);
3823     nextqual = gbq->next;
3824     if (StringICmp (gbq->qual, "gene") == 0 && (! StringHasNoText (gbq->val))) {
3825       //LCOV_EXCL_START
3826       //This is never called; basic cleanup takes care of converting
3827       // gene quals to gene xrefs
3828       grp = GeneRefNew ();
3829       grp->locus = StringSave (gbq->val);
3830       xref = SeqFeatXrefNew ();
3831       xref->data.choice = SEQFEAT_GENE;
3832       xref->data.value.ptrvalue = (Pointer) grp;
3833       xref->specialCleanupFlag = TRUE; /* flag to test for overlapping gene later */
3834       xref->next = sfp->xref;
3835       sfp->xref = xref;
3836       *(prevqual) = gbq->next;
3837       gbq->next = NULL;
3838       gbq->qual = MemFree (gbq->qual);
3839       gbq->val = MemFree (gbq->val);
3840       GBQualFree (gbq);
3841       *hasMarkedGenesP = TRUE;
3842       //LCOV_EXCL_STOP
3843     } else {
3844       prevqual = (GBQualPtr PNTR) &(gbq->next);
3845     }
3846     gbq = nextqual;
3847   }
3848   return TRUE;
3849 }
3850 
3851 /* RemoveMultipleTitles currently removes FIRST title in chain */
3852 
RemoveMultipleTitles(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3853 static void RemoveMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3854 
3855 {
3856   BioseqPtr      bsp;
3857   BioseqSetPtr   bssp;
3858   SeqDescrPtr    descr = NULL;
3859   SeqDescrPtr    lasttitle = NULL;
3860   ObjValNodePtr  ovp;
3861   SeqDescrPtr    sdp;
3862 
3863   if (IS_Bioseq (sep)) {
3864     bsp = (BioseqPtr) sep->data.ptrvalue;
3865     if (bsp == NULL) return;
3866     descr = bsp->descr;
3867   } else if (IS_Bioseq_set (sep)) {
3868     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3869     if (bssp == NULL) return;
3870     descr = bssp->descr;
3871   } else return;
3872   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
3873     if (sdp->choice != Seq_descr_title) continue;
3874     if (lasttitle != NULL) {
3875       if (lasttitle->extended != 0) {
3876         ovp = (ObjValNodePtr) lasttitle;
3877         ovp->idx.deleteme = TRUE;
3878       }
3879       lasttitle = sdp;
3880     } else {
3881       lasttitle = sdp;
3882     }
3883   }
3884 }
3885 
GetCspFromPdp(PubdescPtr pdp)3886 static CitSubPtr GetCspFromPdp (PubdescPtr pdp)
3887 
3888 {
3889   ValNodePtr  vnp;
3890 
3891   if (pdp == NULL) return NULL;
3892   vnp = pdp->pub;
3893   if (vnp == NULL) return NULL;
3894   if (vnp->choice != PUB_Sub) return NULL;
3895   return (CitSubPtr) vnp->data.ptrvalue;
3896 }
3897 
CitSubsMatch(CitSubPtr csp1,CitSubPtr csp2)3898 static Boolean CitSubsMatch (CitSubPtr csp1, CitSubPtr csp2)
3899 
3900 {
3901   AffilPtr     afp1, afp2;
3902   AuthListPtr  alp1, alp2;
3903 
3904   if (csp1 == NULL || csp2 == NULL) return FALSE;
3905   if (DateMatch (csp1->date, csp2->date, FALSE) != 0) return FALSE;
3906   if (StringICmp (csp1->descr, csp2->descr) != 0) return FALSE;
3907   alp1 = csp1->authors;
3908   alp2 = csp2->authors;
3909   if (alp1 == NULL || alp2 == NULL) return FALSE;
3910   if (AuthListMatch (alp1, alp2, TRUE) != 0) return FALSE;
3911   afp1 = alp1->affil;
3912   afp2 = alp2->affil;
3913   if (afp1 != NULL && afp2 != NULL) {
3914     if (! AsnIoMemComp (afp1, afp2, (AsnWriteFunc) AffilAsnWrite)) return FALSE;
3915   }
3916   return TRUE;
3917 }
3918 
MergeEquivCitSubs(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3919 static void MergeEquivCitSubs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3920 
3921 {
3922   AuthListPtr    alp1, alp2;
3923   BioseqPtr      bsp;
3924   BioseqSetPtr   bssp;
3925   CitSubPtr      csp1, csp2;
3926   SeqDescrPtr    descr = NULL;
3927   SeqDescrPtr    lastcit;
3928   PubdescPtr     lastpdp;
3929   ObjValNodePtr  ovp;
3930   PubdescPtr     pdp;
3931   SeqDescrPtr    sdp;
3932 
3933   if (IS_Bioseq (sep)) {
3934     bsp = (BioseqPtr) sep->data.ptrvalue;
3935     if (bsp == NULL) return;
3936     descr = bsp->descr;
3937   } else if (IS_Bioseq_set (sep)) {
3938     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3939     if (bssp == NULL) return;
3940     descr = bssp->descr;
3941   } else return;
3942 
3943   lastcit = NULL;
3944   lastpdp = NULL;
3945   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
3946     if (sdp->choice != Seq_descr_pub) continue;
3947     pdp = (PubdescPtr) sdp->data.ptrvalue;
3948     if (pdp == NULL) continue;
3949     if (lastcit != NULL && lastpdp != NULL) {
3950       csp2 = GetCspFromPdp (pdp);
3951       if (csp2 != NULL) {
3952         if (CitSubsMatch (csp1, csp2) &&
3953             OkayToFuseRemarks (pdp->comment, lastpdp->comment)) {
3954           alp1 = csp1->authors;
3955           alp2 = csp2->authors;
3956           if (alp1 != NULL && alp2 != NULL) {
3957             if (alp1->affil == NULL && alp2->affil != NULL) {
3958               alp1->affil = alp2->affil;
3959               alp2->affil = NULL;
3960             }
3961           }
3962           if (lastpdp->comment == NULL && pdp->comment != NULL) {
3963             lastpdp->comment = pdp->comment;
3964             pdp->comment = NULL;
3965           }
3966           if (sdp->extended != 0) {
3967             ovp = (ObjValNodePtr) sdp;
3968             ovp->idx.deleteme = TRUE;
3969           }
3970         } else {
3971           lastcit = sdp;
3972           csp1 = csp2;
3973         }
3974       }
3975     } else {
3976       csp1 = GetCspFromPdp (pdp);
3977       if (csp1 != NULL) {
3978         lastcit = sdp;
3979       }
3980     }
3981     lastpdp = pdp;
3982   }
3983 }
3984 
MergeMultipleDates(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3985 static void MergeMultipleDates (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3986 
3987 {
3988   BioseqPtr      bsp;
3989   BioseqSetPtr   bssp;
3990   DatePtr        dp1, dp2;
3991   SeqDescrPtr    descr = NULL;
3992   SeqDescrPtr    lastdate;
3993   ObjValNodePtr  ovp;
3994   SeqDescrPtr    sdp;
3995   Int2           status;
3996 
3997   if (IS_Bioseq (sep)) {
3998     bsp = (BioseqPtr) sep->data.ptrvalue;
3999     if (bsp == NULL) return;
4000     descr = bsp->descr;
4001   } else if (IS_Bioseq_set (sep)) {
4002     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4003     if (bssp == NULL) return;
4004     descr = bssp->descr;
4005   } else return;
4006 
4007   lastdate = NULL;
4008   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4009     if (sdp->choice != Seq_descr_create_date) continue;
4010     if (lastdate != NULL) {
4011       dp1 = (DatePtr) lastdate->data.ptrvalue;
4012       dp2 = (DatePtr) sdp->data.ptrvalue;
4013       status = DateMatch (dp1, dp2, FALSE);
4014       if (status == 1) {
4015         if (sdp->extended != 0) {
4016           ovp = (ObjValNodePtr) sdp;
4017           ovp->idx.deleteme = TRUE;
4018         }
4019       } else {
4020         if (lastdate->extended != 0) {
4021           ovp = (ObjValNodePtr) lastdate;
4022           ovp->idx.deleteme = TRUE;
4023         }
4024         lastdate = sdp;
4025       }
4026     } else {
4027       lastdate = sdp;
4028     }
4029   }
4030 
4031   lastdate = NULL;
4032   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4033     if (sdp->choice != Seq_descr_update_date) continue;
4034     if (lastdate != NULL) {
4035       dp1 = (DatePtr) lastdate->data.ptrvalue;
4036       dp2 = (DatePtr) sdp->data.ptrvalue;
4037       status = DateMatch (dp1, dp2, FALSE);
4038       if (status == 1) {
4039         if (sdp->extended != 0) {
4040           ovp = (ObjValNodePtr) sdp;
4041           ovp->idx.deleteme = TRUE;
4042         }
4043       } else {
4044         if (lastdate->extended != 0) {
4045           ovp = (ObjValNodePtr) lastdate;
4046           ovp->idx.deleteme = TRUE;
4047         }
4048         lastdate = sdp;
4049       }
4050     } else {
4051       lastdate = sdp;
4052     }
4053   }
4054 }
4055 
GetNextBspBsspDescrUnindexed(BioseqPtr bsp,BioseqSetPtr bssp,Uint1 choice,SeqDescrPtr curr)4056 static SeqDescrPtr GetNextBspBsspDescrUnindexed (
4057   BioseqPtr bsp,
4058   BioseqSetPtr bssp,
4059   Uint1 choice,
4060   SeqDescrPtr curr
4061 )
4062 
4063 {
4064   ObjValNodePtr  ovp;
4065   SeqDescrPtr    sdp = NULL;
4066 
4067   if (bsp == NULL && bssp == NULL) return NULL;
4068   if (choice == 0) return NULL;
4069 
4070   if (curr == NULL) {
4071     if (bsp != NULL) {
4072       sdp = bsp->descr;
4073     } else if (bssp != NULL) {
4074       sdp = bssp->descr;
4075     }
4076     curr = sdp;
4077   } else {
4078     sdp = curr->next;
4079   }
4080   while (sdp != NULL) {
4081     if (sdp->choice == choice) return sdp;
4082     sdp = sdp->next;
4083   }
4084 
4085   if (curr != NULL && curr->extended != 0) {
4086     ovp = (ObjValNodePtr) curr;
4087     if (ovp->idx.parenttype == OBJ_BIOSEQ) {
4088       bsp = (BioseqPtr) ovp->idx.parentptr;
4089       if (bsp == NULL) return NULL;
4090       if (bsp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4091       bssp = (BioseqSetPtr) bsp->idx.parentptr;
4092     } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
4093       bssp = (BioseqSetPtr) ovp->idx.parentptr;
4094       if (bssp == NULL) return NULL;
4095       if (bssp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4096       bssp = (BioseqSetPtr) bssp->idx.parentptr;
4097     } else {
4098       return NULL;
4099     }
4100   } else {
4101     if (bsp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4102     bssp = (BioseqSetPtr) bsp->idx.parentptr;
4103   }
4104 
4105   while (bssp != NULL) {
4106     for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
4107       if (sdp->choice == choice) return sdp;
4108      }
4109      if (bssp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4110      bssp = (BioseqSetPtr) bssp->idx.parentptr;
4111   }
4112   return NULL;
4113 }
4114 
MarkFirstPubIfEquivalent(SeqDescrPtr sdp1,SeqDescrPtr sdp2)4115 static void MarkFirstPubIfEquivalent (
4116   SeqDescrPtr sdp1,
4117   SeqDescrPtr sdp2
4118 )
4119 
4120 {
4121   ObjValNodePtr  ovp;
4122   PubdescPtr     pdp1, pdp2;
4123 
4124   if (sdp1 == NULL || sdp2 == NULL) return;
4125   if (sdp1->choice != Seq_descr_pub || sdp2->choice != Seq_descr_pub) return;
4126 
4127   pdp1 = (PubdescPtr) sdp1->data.ptrvalue;
4128   pdp2 = (PubdescPtr) sdp2->data.ptrvalue;
4129   if (pdp1 == NULL || pdp2 == NULL) return;
4130 
4131   if (! AsnIoMemComp (pdp1->pub, pdp2->pub, (AsnWriteFunc) PubEquivAsnWrite)) return;
4132 
4133   if (! AsnIoMemComp (pdp1, pdp2, (AsnWriteFunc) PubdescAsnWrite)) {
4134     if (pdp2->name == NULL && pdp1->name != NULL) {
4135       pdp2->name = pdp1->name;
4136       pdp1->name = NULL;
4137     }
4138     if (pdp2->fig == NULL && pdp1->fig != NULL) {
4139       pdp2->fig = pdp1->fig;
4140       pdp1->fig = NULL;
4141     }
4142     if (pdp2->num == NULL && pdp1->num != NULL) {
4143       pdp2->num = pdp1->num;
4144       pdp1->num = NULL;
4145     }
4146     if (! pdp2->numexc && pdp1->numexc) {
4147       pdp2->numexc = pdp1->numexc;
4148     }
4149     if (! pdp2->poly_a && pdp1->poly_a) {
4150       pdp2->poly_a = pdp1->poly_a;
4151     }
4152     if (pdp2->align_group == 0 && pdp1->align_group != 0) {
4153       pdp2->align_group = pdp1->align_group;
4154     }
4155     if (pdp2->maploc == NULL && pdp1->maploc != NULL) {
4156       pdp2->maploc = pdp1->maploc;
4157       pdp1->maploc = NULL;
4158     }
4159     if (pdp2->seq_raw == NULL && pdp1->seq_raw != NULL) {
4160       pdp2->seq_raw = pdp1->seq_raw;
4161       pdp1->seq_raw = NULL;
4162     }
4163     if (pdp2->comment == NULL && pdp1->comment != NULL) {
4164       pdp2->comment = pdp1->comment;
4165       pdp1->comment = NULL;
4166     }
4167     if (pdp2->reftype == 0 && pdp1->reftype != 0) {
4168       pdp2->reftype = pdp1->reftype;
4169     }
4170   }
4171   ovp = (ObjValNodePtr) sdp1;
4172   ovp->idx.deleteme = TRUE;
4173 }
4174 
RemoveIdenticalPubs(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)4175 static void RemoveIdenticalPubs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
4176 
4177 {
4178   BioseqPtr     bsp = NULL;
4179   BioseqSetPtr  bssp = NULL;
4180   SeqDescrPtr   descr = NULL, sdp1, sdp2;
4181 
4182   if (IS_Bioseq (sep)) {
4183     bsp = (BioseqPtr) sep->data.ptrvalue;
4184     if (bsp == NULL) return;
4185     descr = bsp->descr;
4186   } else if (IS_Bioseq_set (sep)) {
4187     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4188     if (bssp == NULL) return;
4189     descr = bssp->descr;
4190   } else return;
4191 
4192   for (sdp1 = descr; sdp1 != NULL; sdp1 = sdp1->next) {
4193     if (sdp1->choice != Seq_descr_pub) continue;
4194     sdp2 = GetNextBspBsspDescrUnindexed (bsp, bssp, Seq_descr_pub, sdp1);
4195     while (sdp2 != NULL) {
4196       MarkFirstPubIfEquivalent (sdp1, sdp2);
4197       sdp2 = GetNextBspBsspDescrUnindexed (bsp, bssp, Seq_descr_pub, sdp2);
4198     }
4199   }
4200 }
4201 
MolInfoUpdate(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)4202 static void MolInfoUpdate (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
4203 
4204 {
4205   BioseqPtr      bsp;
4206   BioseqSetPtr   bssp;
4207   SeqDescrPtr    descr = NULL;
4208   Uint1          meth;
4209   MolInfoPtr     mip = NULL;
4210   Uint1          mol;
4211   ObjValNodePtr  ovp;
4212   SeqDescrPtr    sdp;
4213 
4214   if (IS_Bioseq (sep)) {
4215     bsp = (BioseqPtr) sep->data.ptrvalue;
4216     if (bsp == NULL) return;
4217     descr = bsp->descr;
4218   } else if (IS_Bioseq_set (sep)) {
4219     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4220     if (bssp == NULL) return;
4221     descr = bssp->descr;
4222   } else return;
4223 
4224   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4225     if (sdp->choice != Seq_descr_molinfo) continue;
4226     mip = (MolInfoPtr) sdp->data.ptrvalue;
4227   }
4228   if (mip == NULL) return;
4229 
4230   for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4231     switch (sdp->choice) {
4232       case Seq_descr_mol_type :
4233         mol = sdp->data.intvalue;
4234         if (mol != 0 && mip->biomol == 0) {
4235           switch (mol) {
4236             case MOLECULE_TYPE_GENOMIC :
4237               mip->biomol = MOLECULE_TYPE_GENOMIC;
4238               break;
4239             case MOLECULE_TYPE_PRE_MRNA :
4240               mip->biomol = MOLECULE_TYPE_PRE_MRNA;
4241               break;
4242             case MOLECULE_TYPE_MRNA :
4243               mip->biomol = MOLECULE_TYPE_MRNA;
4244               break;
4245             case MOLECULE_TYPE_RRNA :
4246               mip->biomol = MOLECULE_TYPE_RRNA;
4247               break;
4248             case MOLECULE_TYPE_TRNA :
4249               mip->biomol = MOLECULE_TYPE_TRNA;
4250               break;
4251             case MOLECULE_TYPE_SNRNA :
4252               mip->biomol = MOLECULE_TYPE_SNRNA;
4253               break;
4254             case MOLECULE_TYPE_SCRNA :
4255               mip->biomol = MOLECULE_TYPE_SCRNA;
4256               break;
4257             case MOLECULE_TYPE_PEPTIDE :
4258               mip->biomol = MOLECULE_TYPE_PEPTIDE;
4259               break;
4260             case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
4261               mip->biomol = MOLECULE_TYPE_OTHER_GENETIC_MATERIAL;
4262               break;
4263             case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
4264               mip->biomol = MOLECULE_TYPE_GENOMIC_MRNA_MIX;
4265               break;
4266             case 255 :
4267               mip->biomol = 255;
4268               break;
4269             default :
4270               break;
4271           }
4272         }
4273         if (sdp->extended != 0) {
4274           ovp = (ObjValNodePtr) sdp;
4275           ovp->idx.deleteme = TRUE;
4276         }
4277         break;
4278       case Seq_descr_modif :
4279         break;
4280       case Seq_descr_method :
4281         meth = sdp->data.intvalue;
4282         if (meth != 0 && mip->tech == 0) {
4283           switch (meth) {
4284             case METHOD_concept_transl :
4285               mip->tech = MI_TECH_concept_trans;
4286               break;
4287             case METHOD_seq_pept :
4288               mip->tech = MI_TECH_seq_pept;
4289               break;
4290             case METHOD_both :
4291               mip->tech = MI_TECH_both;
4292               break;
4293             case METHOD_seq_pept_overlap :
4294               mip->tech = MI_TECH_seq_pept_overlap;
4295               break;
4296             case METHOD_seq_pept_homol :
4297               mip->tech = MI_TECH_seq_pept_homol;
4298               break;
4299             case METHOD_concept_transl_a :
4300               mip->tech = MI_TECH_concept_trans_a;
4301               break;
4302             case METHOD_other :
4303               mip->tech = MI_TECH_other;
4304               break;
4305             default :
4306               break;
4307           }
4308         }
4309         if (sdp->extended != 0) {
4310           ovp = (ObjValNodePtr) sdp;
4311           ovp->idx.deleteme = TRUE;
4312         }
4313         break;
4314       default :
4315         break;
4316     }
4317   }
4318 }
4319 
SSECGetAuthListPtr(PubdescPtr pdp)4320 static AuthListPtr SSECGetAuthListPtr (PubdescPtr pdp)
4321 
4322 {
4323   AuthListPtr  alp = NULL;
4324   CitArtPtr    cap;
4325   CitBookPtr   cbp;
4326   CitGenPtr    cgp;
4327   CitPatPtr    cpp;
4328   CitSubPtr    csp;
4329   ValNodePtr   vnp;
4330 
4331   if (pdp == NULL) return NULL;
4332 
4333   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4334     switch (vnp->choice) {
4335       case PUB_Gen :
4336         cgp = (CitGenPtr) vnp->data.ptrvalue;
4337         if (cgp != NULL) {
4338           alp = cgp->authors;
4339         }
4340         break;
4341       case PUB_Sub :
4342         csp = (CitSubPtr) vnp->data.ptrvalue;
4343         if (csp != NULL) {
4344           alp = csp->authors;
4345         }
4346         break;
4347       case PUB_Article :
4348         cap = (CitArtPtr) vnp->data.ptrvalue;
4349         if (cap != NULL) {
4350           alp = cap->authors;
4351         }
4352         break;
4353       case PUB_Book :
4354       case PUB_Proc :
4355       case PUB_Man :
4356         cbp = (CitBookPtr) vnp->data.ptrvalue;
4357         if (cbp != NULL) {
4358           alp = cbp->authors;
4359         }
4360         break;
4361       case PUB_Patent :
4362         cpp = (CitPatPtr) vnp->data.ptrvalue;
4363         if (cpp != NULL) {
4364           alp = cpp->authors;
4365         }
4366         break;
4367       default :
4368         break;
4369     }
4370 
4371     if (alp != NULL) return alp;
4372   }
4373 
4374   return NULL;
4375 }
4376 
JustMuid(ValNodePtr ppr)4377 static Boolean JustMuid (ValNodePtr ppr)
4378 
4379 {
4380   ValNodePtr  vnp;
4381 
4382   if (ppr == NULL) return FALSE;
4383   if (ppr->choice == PUB_Muid) return TRUE;
4384   if (ppr->choice == PUB_Equiv) {
4385     for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4386       if (JustMuid (vnp)) return TRUE;
4387     }
4388   }
4389   return FALSE;
4390 }
4391 
4392 //LCOV_EXCL_START
4393 // this accomplishes nothing useful
FixZeroMuid(ValNodePtr ppr,Int4 muid)4394 static void FixZeroMuid (ValNodePtr ppr, Int4 muid)
4395 
4396 {
4397   ValNodePtr  vnp;
4398 
4399   if (ppr == NULL) return;
4400   if (ppr->choice == PUB_Muid) {
4401     ppr->data.intvalue = muid;
4402   }
4403   if (ppr->choice == PUB_Equiv) {
4404     for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4405       FixZeroMuid (vnp, muid);
4406     }
4407   }
4408 }
4409 
RepairBadBackbonePub(PubdescPtr pdp,Int4 muid)4410 static void RepairBadBackbonePub (PubdescPtr pdp, Int4 muid)
4411 
4412 {
4413   ValNodePtr  vnp;
4414 
4415   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4416     if (! JustMuid (vnp)) return;
4417   }
4418   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4419     FixZeroMuid (vnp, muid);
4420   }
4421 }
4422 //LCOV_EXCL_STOP
4423 
RemoveZeroMuids(ValNodePtr ppr,ValNodePtr PNTR prev)4424 static void RemoveZeroMuids (ValNodePtr  ppr, ValNodePtr PNTR prev)
4425 
4426 {
4427   ValNodePtr  next;
4428 
4429   /* if only muid 0, cannot leave empty pdp->pub */
4430 
4431   if (ppr == NULL || ppr->next == NULL) return;
4432 
4433   while (ppr != NULL) {
4434     next = ppr->next;
4435     if (ppr->choice == PUB_Muid && ppr->data.intvalue == 0) {
4436       *prev = ppr->next;
4437       ppr->next = NULL;
4438       ValNodeFree (ppr);
4439     } else {
4440       prev = (ValNodePtr PNTR) &(ppr->next);
4441     }
4442     ppr = next;
4443   }
4444 }
4445 
IsPatent(PubdescPtr pdp)4446 static Boolean IsPatent (PubdescPtr pdp)
4447 
4448 {
4449   ValNodePtr  vnp;
4450 
4451   if (pdp == NULL) return FALSE;
4452   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4453     if (vnp->choice == PUB_Patent) return TRUE;
4454   }
4455   return FALSE;
4456 }
4457 
IsPubContentBadEx(PubdescPtr pdp,Boolean strict)4458 static Boolean IsPubContentBadEx (PubdescPtr pdp, Boolean strict)
4459 {
4460   AuthListPtr  alp = NULL;
4461   AuthorPtr    ap;
4462   CitArtPtr    cap;
4463   CitJourPtr   cjp;
4464   ImprintPtr   imp;
4465   ValNodePtr   names;
4466   NameStdPtr   nsp;
4467   PersonIdPtr  pid;
4468   CharPtr      title = NULL;
4469   ValNodePtr   ttl, vnp;
4470 
4471   if (pdp == NULL) return FALSE;
4472   /* keep anything with a figure - backbone entry */
4473 
4474   if (! StringHasNoText (pdp->fig)) return FALSE;
4475 
4476   /* look for at least one author name */
4477 
4478   alp = SSECGetAuthListPtr (pdp);
4479   if (alp == NULL && strict) return TRUE;
4480   if (IsPatent (pdp)) {
4481     /* patents can get away with no authors */
4482   } else if (alp == NULL) {
4483     /* if accession or GI assigned, can have no authors */
4484   } else if (alp->choice == 1) {
4485     names = alp->names;
4486     if (names == NULL) return TRUE;
4487     ap = (AuthorPtr) names->data.ptrvalue;
4488     if (ap == NULL) return TRUE;
4489     pid = ap->name;
4490     if (pid == NULL) return TRUE;
4491     if (pid->choice == 2) {
4492       nsp = (NameStdPtr) pid->data;
4493       if (nsp == NULL) return TRUE;
4494       if (StringHasNoText (nsp->names [0])) return TRUE;
4495     } else if (pid->choice == 3 || pid->choice == 4) {
4496       if (StringHasNoText ((CharPtr) pid->data)) return TRUE;
4497     }
4498   } else if (alp->choice == 2 || alp->choice == 3) {
4499     names = alp->names;
4500     if (names == NULL) return TRUE;
4501     if (StringHasNoText ((CharPtr) names->data.ptrvalue)) return TRUE;
4502   }
4503 
4504   /* look for CitArt journal */
4505 
4506   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4507     switch (vnp->choice) {
4508       case PUB_Article :
4509         cap = (CitArtPtr) vnp->data.ptrvalue;
4510         if (cap != NULL) {
4511           switch (cap->from) {
4512             case 1 :
4513               cjp = (CitJourPtr) cap->fromptr;
4514               if (cjp == NULL) return TRUE;
4515               if (cjp->title == NULL) return TRUE;
4516               for (ttl = cjp->title; ttl != NULL; ttl = ttl->next) {
4517                 if (! StringHasNoText ((CharPtr) ttl->data.ptrvalue)) {
4518                   title = (CharPtr) ttl->data.ptrvalue;
4519                 }
4520               }
4521               if (title == NULL) return TRUE;
4522               imp = cjp->imp;
4523               if (imp == NULL) return TRUE;
4524               break;
4525             default :
4526               break;
4527           }
4528         }
4529         break;
4530       default :
4531         break;
4532     }
4533   }
4534 
4535   return FALSE;
4536 }
4537 
4538 //LCOV_EXCL_START
4539 //Not part of cleanup
IsPubContentBad(PubdescPtr pdp)4540 extern Boolean IsPubContentBad(PubdescPtr pdp)
4541 
4542 {
4543   return IsPubContentBadEx (pdp, TRUE);
4544 }
4545 //LCOV_EXCL_STOP
4546 
IsPubBad(PubdescPtr pdp,Pointer userdata,Boolean strict)4547 static Boolean IsPubBad (PubdescPtr pdp, Pointer userdata, Boolean strict)
4548 
4549 {
4550   Int4        muid;
4551   Int4Ptr     muidp;
4552   ValNodePtr  vnp;
4553 
4554   if (pdp == NULL) return FALSE;
4555 
4556   /* single pmid not cleared here, left for CheckMinPub with RefSeq protein exception */
4557 
4558   vnp = pdp->pub;
4559   if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_PMid) {
4560     /* but first check for 0 pmid, mark for removal */
4561     if (vnp->data.intvalue == 0) return TRUE;
4562     return FALSE;
4563   }
4564 
4565   /* if single real muid, repair 0 muid backbone references */
4566 
4567   muidp = (Int4Ptr) userdata;
4568   if (muidp != NULL) {
4569     muid = *muidp;
4570     if (muid != 0 && muid != -1) {
4571       RepairBadBackbonePub (pdp, muid);
4572     }
4573   }
4574 
4575   /* remove remaining 0 muids */
4576 
4577   RemoveZeroMuids (pdp->pub, &(pdp->pub));
4578 
4579   return IsPubContentBadEx (pdp, strict);
4580 }
4581 
RemoveBadPubFeat(SeqFeatPtr sfp,Pointer userdata)4582 static void RemoveBadPubFeat (SeqFeatPtr sfp, Pointer userdata)
4583 
4584 {
4585   PubdescPtr  pdp;
4586 
4587   if (sfp->data.choice != SEQFEAT_PUB) return;
4588   pdp = (PubdescPtr) sfp->data.value.ptrvalue;
4589   if (IsPubBad (pdp, userdata, FALSE)) {
4590     sfp->idx.deleteme = TRUE;
4591   }
4592 }
4593 
RemoveBadPubDescr(SeqDescrPtr sdp,Pointer userdata)4594 static void RemoveBadPubDescr (SeqDescrPtr sdp, Pointer userdata)
4595 
4596 {
4597   ObjValNodePtr  ovp;
4598   PubdescPtr     pdp;
4599 
4600   if (sdp->choice != Seq_descr_pub) return;
4601   pdp = (PubdescPtr) sdp->data.ptrvalue;
4602   if (IsPubBad (pdp, userdata, FALSE)) {
4603     if (sdp->extended != 0) {
4604       ovp = (ObjValNodePtr) sdp;
4605       ovp->idx.deleteme = TRUE;
4606     }
4607   }
4608 }
4609 
RemoveBadPubFeatStrict(SeqFeatPtr sfp,Pointer userdata)4610 static void RemoveBadPubFeatStrict (SeqFeatPtr sfp, Pointer userdata)
4611 
4612 {
4613   PubdescPtr  pdp;
4614 
4615   if (sfp->data.choice != SEQFEAT_PUB) return;
4616   pdp = (PubdescPtr) sfp->data.value.ptrvalue;
4617   if (IsPubBad (pdp, userdata, TRUE)) {
4618     sfp->idx.deleteme = TRUE;
4619   }
4620 }
4621 
RemoveBadPubDescrStrict(SeqDescrPtr sdp,Pointer userdata)4622 static void RemoveBadPubDescrStrict (SeqDescrPtr sdp, Pointer userdata)
4623 
4624 {
4625   ObjValNodePtr  ovp;
4626   PubdescPtr     pdp;
4627 
4628   if (sdp->choice != Seq_descr_pub) return;
4629   pdp = (PubdescPtr) sdp->data.ptrvalue;
4630   if (IsPubBad (pdp, userdata, TRUE)) {
4631     if (sdp->extended != 0) {
4632       ovp = (ObjValNodePtr) sdp;
4633       ovp->idx.deleteme = TRUE;
4634     }
4635   }
4636 }
4637 
RemoveEmptyUserObjects(SeqDescrPtr sdp,Pointer userdata)4638 static void RemoveEmptyUserObjects (SeqDescrPtr sdp, Pointer userdata)
4639 
4640 {
4641   ObjectIdPtr    oip;
4642   ObjValNodePtr  ovp;
4643   UserObjectPtr  uop;
4644 
4645   if (sdp->choice != Seq_descr_user) return;
4646   uop = (UserObjectPtr) sdp->data.ptrvalue;
4647 
4648   if (uop != NULL) {
4649     oip = uop->type;
4650     if (oip != NULL) {
4651       if (StringICmp (oip->str, "NcbiAutofix") == 0) return;
4652       if (StringICmp (oip->str, "Unverified") == 0) return;
4653       if (uop->data != NULL) return;
4654     }
4655   }
4656 
4657   if (sdp->extended == 0) return;
4658   ovp = (ObjValNodePtr) sdp;
4659   ovp->idx.deleteme = TRUE;
4660 }
4661 
LookForUniqMuidProc(ValNodePtr ppr,Int4Ptr muidp)4662 static void LookForUniqMuidProc (ValNodePtr ppr, Int4Ptr muidp)
4663 
4664 {
4665   Int4        muid;
4666   ValNodePtr  vnp;
4667 
4668   if (ppr == NULL || muidp == NULL) return;
4669   if (*muidp == -1) return;
4670   switch (ppr->choice) {
4671     case PUB_Muid :
4672       muid = ppr->data.intvalue;
4673       if (muid == 0) {
4674       } else if (*muidp == 0) {
4675         *muidp = muid;
4676       } else if (*muidp != muid) {
4677         *muidp = -1;
4678       }
4679       break;
4680     case PUB_Equiv :
4681       for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4682         LookForUniqMuidProc (vnp, muidp);
4683       }
4684       break;
4685     default :
4686       break;
4687   }
4688 }
4689 
LookForUniqueMuid(PubdescPtr pdp,Pointer userdata)4690 static void LookForUniqueMuid (PubdescPtr pdp, Pointer userdata)
4691 
4692 {
4693   Int4Ptr     muidp;
4694   ValNodePtr  vnp;
4695 
4696   muidp = (Int4Ptr) userdata;
4697   if (*muidp == -1) return;
4698   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4699     LookForUniqMuidProc (vnp, muidp);
4700   }
4701 }
4702 
EmptyExceptSerialNumber(CitGenPtr cgp)4703 static Boolean EmptyExceptSerialNumber (CitGenPtr cgp)
4704 {
4705   if (cgp == NULL) return FALSE;
4706 
4707   if (!StringHasNoText (cgp->cit)
4708       || cgp->authors != NULL
4709       || cgp->muid != 0
4710       || cgp->journal != NULL
4711       || !StringHasNoText (cgp->volume)
4712       || !StringHasNoText (cgp->issue)
4713       || !StringHasNoText (cgp->pages)
4714       || cgp->date != NULL
4715       || !StringHasNoText (cgp->title)
4716       || cgp->pmid != 0) {
4717     return FALSE;
4718   } else {
4719     return TRUE;
4720   }
4721 }
4722 
ArePubsMergeableForFig(PubdescPtr fig,PubdescPtr nofig)4723 static Boolean ArePubsMergeableForFig(PubdescPtr fig, PubdescPtr nofig)
4724 {
4725   CitGenPtr  cgp_fig, cgp_nofig;
4726 
4727   if (fig == NULL || nofig == NULL) return FALSE;
4728 
4729   /* name */
4730   if (!StringHasNoText (nofig->name) && !StringHasNoText (fig->name)
4731       && !StringCmp (nofig->name, fig->name)) {
4732     return FALSE;
4733   }
4734   /* fig */
4735   if (!StringHasNoText (nofig->fig) && !StringHasNoText (fig->fig)
4736       && !StringCmp (nofig->fig, fig->fig)) {
4737     return FALSE;
4738   }
4739   /* num */
4740   if (NumberingMatch(nofig->num, fig->num) != 0) {
4741     return FALSE;
4742   }
4743 
4744   if ((nofig->numexc && !fig->numexc)
4745       || (!nofig->numexc && fig->numexc)) {
4746     return FALSE;
4747   }
4748   if ((nofig->poly_a && !fig->poly_a)
4749       || (!nofig->poly_a && fig->poly_a)) {
4750     return FALSE;
4751   }
4752 
4753   /* maploc */
4754   if (!StringHasNoText (nofig->maploc) && !StringHasNoText (fig->maploc)
4755       && !StringCmp (nofig->maploc, fig->maploc)) {
4756     return FALSE;
4757   }
4758 
4759   /* seq-raw */
4760   if (!StringHasNoText (nofig->seq_raw) && !StringHasNoText (fig->seq_raw)
4761       && !StringCmp (nofig->seq_raw, fig->seq_raw)) {
4762     return FALSE;
4763   }
4764 
4765   /* align-group */
4766   if (nofig->align_group > 0 && fig->align_group > 0
4767       && nofig->align_group != fig->align_group) {
4768     return FALSE;
4769   }
4770 
4771   /* comment */
4772   if (!StringHasNoText (nofig->comment) && !StringHasNoText (fig->comment)
4773       && !StringCmp (nofig->comment, fig->comment)) {
4774     return FALSE;
4775   }
4776 
4777   /* reftype */
4778   if (nofig->reftype > 0 && fig->reftype > 0
4779       && nofig->reftype != fig->reftype) {
4780     return FALSE;
4781   }
4782 
4783   if (nofig->pub != NULL && fig->pub != NULL) {
4784     if (nofig->pub->next != NULL
4785         || fig->pub->next != NULL
4786         || nofig->pub->choice != PUB_Gen
4787         || fig->pub->choice != PUB_Gen
4788         || nofig->pub->data.ptrvalue == NULL
4789         || fig->pub->data.ptrvalue == NULL) {
4790       return FALSE;
4791     }
4792     cgp_fig = fig->pub->data.ptrvalue;
4793     cgp_nofig = nofig->pub->data.ptrvalue;
4794     if (!EmptyExceptSerialNumber (cgp_fig)
4795         || !EmptyExceptSerialNumber (cgp_nofig)
4796         || !cgp_fig->serial_number != cgp_nofig->serial_number) {
4797       return FALSE;
4798     }
4799   }
4800 
4801   return TRUE;
4802 }
4803 
4804 /* rescue pub with just fig that is in same chain as real pub by merging data */
4805 
MergePubFigInChain(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)4806 static void MergePubFigInChain (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
4807 
4808 {
4809   BioseqPtr      bsp;
4810   BioseqSetPtr   bssp;
4811   SeqDescrPtr    descr = NULL;
4812   PubdescPtr     hasfig = NULL;
4813   Int4Ptr        muidp;
4814   PubdescPtr     nofig = NULL;
4815   ObjValNodePtr  ovp = NULL;
4816   PubdescPtr     pdp;
4817   SeqDescrPtr    sdp_fig, sdp_nofig;
4818 
4819   muidp = (Int4Ptr) mydata;
4820   if (*muidp == -1) return;
4821 
4822   if (IS_Bioseq (sep)) {
4823     bsp = (BioseqPtr) sep->data.ptrvalue;
4824     if (bsp == NULL) return;
4825     descr = bsp->descr;
4826   } else if (IS_Bioseq_set (sep)) {
4827     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4828     if (bssp == NULL) return;
4829     descr = bssp->descr;
4830   } else return;
4831 
4832   /* attempt to combine any descriptor with a fig with
4833    * a different publication without a fig, but only if
4834    * they do not have conflicting data.
4835    */
4836   for (sdp_fig = descr; sdp_fig != NULL; sdp_fig = sdp_fig->next) {
4837     if (sdp_fig->choice != Seq_descr_pub
4838         || sdp_fig->extended == 0) continue;
4839     pdp = (PubdescPtr) sdp_fig->data.ptrvalue;
4840     if (pdp == NULL) continue;
4841     if (! StringHasNoText (pdp->fig)) {
4842       hasfig = pdp;
4843       ovp = (ObjValNodePtr) sdp_fig;
4844 
4845       for (sdp_nofig = descr; sdp_nofig != NULL; sdp_nofig = sdp_nofig->next) {
4846         if (sdp_nofig->choice != Seq_descr_pub) continue;
4847         pdp = (PubdescPtr) sdp_nofig->data.ptrvalue;
4848         if (pdp != NULL && StringHasNoText (pdp->fig)
4849             && ArePubsMergeableForFig(hasfig, pdp)) {
4850           nofig = pdp;
4851           if (StringHasNoText (nofig->name)) {
4852             nofig->name = MemFree (nofig->name);
4853             nofig->name = hasfig->name;
4854             hasfig->name = NULL;
4855           }
4856           if (StringHasNoText (nofig->fig)) {
4857             nofig->fig = MemFree (nofig->fig);
4858             nofig->fig = hasfig->fig;
4859             hasfig->fig = NULL;
4860           }
4861           if (nofig->num == NULL) {
4862             nofig->num = hasfig->num;
4863             hasfig->num = NULL;
4864           }
4865           if (hasfig->numexc) {
4866             nofig->numexc = hasfig->numexc;
4867           }
4868           if (hasfig->poly_a) {
4869             nofig->poly_a = hasfig->poly_a;
4870           }
4871           if (hasfig->align_group > 0) {
4872             nofig->align_group = hasfig->align_group;
4873           }
4874           if (StringHasNoText (nofig->maploc)) {
4875             nofig->maploc = MemFree (nofig->maploc);
4876             nofig->maploc = hasfig->maploc;
4877             hasfig->maploc = NULL;
4878           }
4879           if (StringHasNoText (nofig->seq_raw)) {
4880             nofig->seq_raw = MemFree (nofig->seq_raw);
4881             nofig->seq_raw = hasfig->seq_raw;
4882             hasfig->seq_raw = NULL;
4883           }
4884           if (StringHasNoText (nofig->comment)) {
4885             nofig->comment = MemFree (nofig->comment);
4886             nofig->comment = hasfig->comment;
4887             hasfig->comment = NULL;
4888           }
4889           if (hasfig->reftype > 0) {
4890             nofig->reftype = hasfig->reftype;
4891           }
4892 
4893           if (nofig->pub == NULL) {
4894             nofig->pub = hasfig->pub;
4895           }
4896 
4897           ovp->idx.deleteme = TRUE;
4898         }
4899       }
4900     }
4901   }
4902 }
4903 
CorrectSfpExceptText(SeqFeatPtr sfp,Pointer userdata)4904 static void CorrectSfpExceptText (SeqFeatPtr sfp, Pointer userdata)
4905 
4906 {
4907   if (sfp == NULL || StringHasNoText (sfp->except_text)) return;
4908   if (StringICmp (sfp->except_text, "reasons cited in publication") == 0) {
4909     sfp->except_text = MemFree (sfp->except_text);
4910     sfp->except_text = StringSave ("reasons given in citation");
4911   }
4912 }
4913 
IsCodonCorrect(tRNAPtr trp,Uint1 taa)4914 static Boolean IsCodonCorrect (tRNAPtr trp, Uint1 taa)
4915 
4916 {
4917   Uint1           aa;
4918   Uint1           from;
4919   SeqMapTablePtr  smtp;
4920 
4921   if (trp == NULL) return TRUE;
4922   aa = 0;
4923   if (trp->aatype == 2) {
4924     aa = trp->aa;
4925   } else {
4926     from = 0;
4927     switch (trp->aatype) {
4928       case 0:
4929         from = 0;
4930         break;
4931       case 1:
4932         from = Seq_code_iupacaa;
4933         break;
4934       case 2:
4935         from = Seq_code_ncbieaa;
4936         break;
4937       case 3:
4938         from = Seq_code_ncbi8aa;
4939         break;
4940       case 4:
4941         from = Seq_code_ncbistdaa;
4942         break;
4943       default:
4944         break;
4945     }
4946     smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
4947     if (smtp != NULL) {
4948       aa = SeqMapTableConvert (smtp, trp->aa);
4949     }
4950   }
4951   if (aa > 0 && aa != 255) {
4952     if (taa != aa && aa != 'U') {
4953       return FALSE;
4954     }
4955   }
4956   return TRUE;
4957 }
4958 
4959 static    Uint1 codon_xref [4] = {   /* mapping from NCBI2na to codon codes */
4960 2,  /* A */
4961 1,  /* C */
4962 3,  /* G */
4963 0 }; /* T */
4964 
CorrectTrnaCodons(SeqFeatPtr sfp,Pointer userdata)4965 static void CorrectTrnaCodons (SeqFeatPtr sfp, Pointer userdata)
4966 
4967 {
4968   Uint1           alt [4];
4969   Char            ch;
4970   CharPtr         codes;
4971   Uint1           codon [4];
4972   Int2            i, j, k;
4973   Uint1           index;
4974   RnaRefPtr       rrp;
4975   Uint1           residue;
4976   SeqMapTablePtr  smtp;
4977   Uint1           taa;
4978   tRNAPtr         trp;
4979 
4980   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return;
4981   rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4982   if (rrp == NULL || rrp->type != 3 || rrp->ext.choice != 2) return;
4983   trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4984   if (trp == NULL) return;
4985   if (trp->codon [0] == 255 || trp->codon [1] != 255 || trp->codon [0] >= 64) return;
4986   codes = (CharPtr) userdata;
4987   if (codes == NULL) return;
4988 
4989   /* see if existing codon is correct in current genetic code */
4990 
4991   index = trp->codon [0];
4992   taa = codes [index];
4993   if (IsCodonCorrect (trp, taa)) return;
4994 
4995   /* convert codon to string */
4996 
4997   smtp = SeqMapTableFind (Seq_code_iupacna, Seq_code_ncbi2na);
4998   if (smtp == NULL) return;
4999   for (i = 0, j = 16; i < 3; i++, j /= 4) {
5000     residue = (Uint1) ((Int2) index / j);
5001     index -= (Uint1) (residue * j);
5002     for (k = 0; k < 4; k++) {
5003       if (codon_xref[k] == residue) {
5004         residue = (Uint1) k;
5005         break;
5006       }
5007     }
5008     residue = SeqMapTableConvert (smtp, residue);
5009     codon [i] = residue;
5010   }
5011   codon [3] = 0;
5012 
5013   /* reverse complement */
5014 
5015   for (i = 0; i < 3; i++) {
5016     ch = (Char) codon [2 - i];
5017     if (ch == 'A') {
5018       ch = 'T';
5019     } else if (ch == 'C') {
5020       ch = 'G';
5021     } else if (ch == 'G') {
5022       ch = 'C';
5023     } else if (ch == 'T') {
5024       ch = 'A';
5025     }
5026     alt [i] = ch;
5027   }
5028   alt [3] = 0;
5029 
5030   /* see if revcomp codon is correct in current genetic code */
5031 
5032   index = IndexForCodon (alt, Seq_code_iupacna);
5033   taa = codes [index];
5034   if (IsCodonCorrect (trp, taa)) {
5035     trp->codon [0] = index;
5036     return;
5037   }
5038 
5039   /* just complement */
5040 
5041   for (i = 0; i < 3; i++) {
5042     ch = (Char) codon [i];
5043     if (ch == 'A') {
5044       ch = 'T';
5045     } else if (ch == 'C') {
5046       ch = 'G';
5047     } else if (ch == 'G') {
5048       ch = 'C';
5049     } else if (ch == 'T') {
5050       ch = 'A';
5051     }
5052     alt [i] = ch;
5053   }
5054   alt [3] = 0;
5055 
5056   /* see if complement codon is correct in current genetic code */
5057 
5058   index = IndexForCodon (alt, Seq_code_iupacna);
5059   taa = codes [index];
5060   if (IsCodonCorrect (trp, taa)) {
5061     trp->codon [0] = index;
5062     return;
5063   }
5064 
5065   /* just reverse */
5066 
5067   for (i = 0; i < 3; i++) {
5068     ch = (Char) codon [2 - i];
5069     alt [i] = ch;
5070   }
5071   alt [3] = 0;
5072 
5073   /* see if reverse codon is correct in current genetic code */
5074 
5075   index = IndexForCodon (alt, Seq_code_iupacna);
5076   taa = codes [index];
5077   if (IsCodonCorrect (trp, taa)) {
5078     trp->codon [0] = index;
5079     return;
5080   }
5081 }
5082 
FindSingleBioSource(BioSourcePtr biop,Pointer userdata)5083 static void FindSingleBioSource (BioSourcePtr biop, Pointer userdata)
5084 
5085 {
5086   BioSourcePtr PNTR  biopp;
5087 
5088   biopp = (BioSourcePtr PNTR) userdata;
5089   if (biop == NULL || biopp == NULL) return;
5090   *biopp = biop;
5091 }
5092 
CleanupOldName(BioSourcePtr biop,Pointer userdata)5093 static void CleanupOldName (BioSourcePtr biop, Pointer userdata)
5094 
5095 {
5096   OrgModPtr       next, omp;
5097   OrgNamePtr      onp;
5098   OrgRefPtr       orp;
5099   OrgModPtr PNTR  prev;
5100 
5101   if (biop == NULL) return;
5102   orp = biop->org;
5103   if (orp == NULL || StringHasNoText (orp->taxname)) return;
5104   onp = orp->orgname;
5105   if (onp == NULL || onp->mod == NULL) return;
5106   prev = &(onp->mod);
5107   omp = *prev;
5108   while (omp != NULL) {
5109     next = omp->next;
5110     if (omp->subtype == ORGMOD_old_name && StringCmp (orp->taxname, omp->subname) == 0 && StringHasNoText (omp->attrib)) {
5111       *prev = omp->next;
5112       omp->next = NULL;
5113       OrgModFree (omp);
5114     } else {
5115       prev = &(omp->next);
5116     }
5117     omp = next;
5118   }
5119 }
5120 
CleanupOrgModNote(BioSourcePtr biop,Pointer userdata)5121 static void CleanupOrgModNote (BioSourcePtr biop, Pointer userdata)
5122 
5123 {
5124   CharPtr         gbacr = NULL, gbana = NULL, gbsyn = NULL, taxname = NULL;
5125   OrgModPtr       next, omp;
5126   OrgNamePtr      onp;
5127   OrgRefPtr       orp;
5128   OrgModPtr PNTR  prev;
5129 
5130   if (biop == NULL) return;
5131   orp = biop->org;
5132   if (orp == NULL) return;
5133   taxname = orp->taxname;
5134   onp = orp->orgname;
5135   if (onp == NULL || onp->mod == NULL) return;
5136   for (omp = onp->mod; omp != NULL; omp = omp->next) {
5137     if (omp->subtype == ORGMOD_gb_acronym) {
5138       gbacr = omp->subname;
5139     } else if (omp->subtype == ORGMOD_gb_anamorph) {
5140       gbana = omp->subname;
5141     } else if (omp->subtype == ORGMOD_gb_synonym) {
5142       gbsyn = omp->subname;
5143     }
5144   }
5145   prev = &(onp->mod);
5146   omp = *prev;
5147   while (omp != NULL) {
5148     next = omp->next;
5149     if (omp->subtype == ORGMOD_other &&
5150         StringDoesHaveText (omp->subname) &&
5151         (StringCmp (taxname, omp->subname) == 0 ||
5152          StringCmp (gbacr, omp->subname) == 0 ||
5153          StringCmp (gbana, omp->subname) == 0 ||
5154          StringCmp (gbsyn, omp->subname) == 0)) {
5155       *prev = omp->next;
5156       omp->next = NULL;
5157       OrgModFree (omp);
5158     } else {
5159       prev = &(omp->next);
5160     }
5161     omp = next;
5162   }
5163 }
5164 
GetUnambigOverlappingGene(BioseqPtr bsp,SeqLocPtr slp)5165 static SeqFeatPtr GetUnambigOverlappingGene (BioseqPtr bsp, SeqLocPtr slp)
5166 
5167 {
5168   SeqMgrFeatContext  context;
5169   SeqFeatPtr         gene;
5170   Int2               i;
5171   Int4Ptr            ivals;
5172   Int2               j;
5173   SeqFeatPtr         next;
5174   Int2               numivals;
5175 
5176   gene = SeqMgrGetOverlappingGene (slp, &context);
5177   if (gene == NULL) return NULL;
5178   numivals = context.numivals;
5179   ivals = context.ivals;
5180   next = SeqMgrGetNextFeature (bsp, gene, SEQFEAT_GENE, 0, &context);
5181   if (next == NULL) return gene;
5182   if (numivals != context.numivals) return gene;
5183   for (i = 0, j = 0; i < numivals; i++) {
5184     if (ivals [j] != context.ivals [j]) return gene;
5185     j++;
5186     if (ivals [j] != context.ivals [j]) return gene;
5187     j++;
5188   }
5189   return NULL;
5190 }
5191 
5192 //LCOV_EXCL_START
5193 //This is never called, because RemoveUnneededGeneXrefs is
5194 //only called when isEmblDdbj is false
LookForPeptides(SeqFeatPtr sfp,Pointer userdata)5195 static void LookForPeptides (SeqFeatPtr sfp, Pointer userdata)
5196 
5197 {
5198   BoolPtr     hasPepP;
5199   ProtRefPtr  prp;
5200 
5201   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
5202   hasPepP = (BoolPtr) userdata;
5203   if (hasPepP == NULL) return;
5204 
5205   prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5206   if (prp == NULL) return;
5207   if (prp->processed > 0) {
5208     *hasPepP = TRUE;
5209   }
5210 }
5211 //LCOV_EXCL_STOP
5212 
RemoveUnneededGeneXrefs(BioseqPtr bsp,Pointer userdata)5213 static void RemoveUnneededGeneXrefs (BioseqPtr bsp, Pointer userdata)
5214 
5215 {
5216   BoolPtr            bp;
5217   SeqFeatXrefPtr     curr;
5218   SeqMgrFeatContext  fcontext;
5219   GeneRefPtr         grp;
5220   GeneRefPtr         grpx;
5221   Boolean            hasPeptide;
5222   Boolean            isEmblDdbj = FALSE;
5223   SeqFeatXrefPtr     PNTR last;
5224   SeqFeatXrefPtr     next;
5225   BioseqPtr          pbsp;
5226   Boolean            redundantgenexref;
5227   SeqFeatPtr         sfp;
5228   SeqFeatPtr         sfpx;
5229   CharPtr            syn1;
5230   CharPtr            syn2;
5231 
5232   bp = (BoolPtr) userdata;
5233   if (bp != NULL) {
5234     isEmblDdbj = *bp;
5235   }
5236   sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
5237   while (sfp != NULL) {
5238     if (sfp->data.choice != SEQFEAT_GENE) {
5239       grp = SeqMgrGetGeneXref (sfp);
5240       if (grp != NULL && (! SeqMgrGeneIsSuppressed (grp))) {
5241         sfpx = GetUnambigOverlappingGene (bsp, sfp->location);
5242         if (sfpx != NULL && sfpx->data.choice == SEQFEAT_GENE) {
5243           grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
5244           if (grpx != NULL) {
5245             redundantgenexref = FALSE;
5246             if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
5247               if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
5248                 redundantgenexref = TRUE;
5249               }
5250             } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
5251               if (StringICmp (grp->locus, grpx->locus) == 0) {
5252                 redundantgenexref = TRUE;
5253               }
5254             } else if (grp->syn != NULL && grpx->syn != NULL) {
5255               syn1 = (CharPtr) grp->syn->data.ptrvalue;
5256               syn2 = (CharPtr) grpx->syn->data.ptrvalue;
5257               if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
5258                 if (StringICmp (syn1, syn2) == 0) {
5259                   redundantgenexref = TRUE;
5260                 }
5261               }
5262             }
5263             if (redundantgenexref && isEmblDdbj && sfp->data.choice == SEQFEAT_CDREGION) {
5264               //LCOV_EXCL_START
5265               //This is never called, because RemoveUnneededGeneXrefs is
5266               //only called when isEmblDdbj is false
5267               hasPeptide = FALSE;
5268               pbsp = BioseqFindFromSeqLoc (sfp->product);
5269               if (pbsp != NULL) {
5270                 VisitFeaturesOnBsp (pbsp, (Pointer) &hasPeptide, LookForPeptides);
5271                 if (hasPeptide) {
5272                   redundantgenexref = FALSE;
5273                 }
5274               }
5275               //LCOV_EXCL_STOP
5276             }
5277             if (redundantgenexref) {
5278               last = (SeqFeatXrefPtr PNTR) &(sfp->xref);
5279               curr = sfp->xref;
5280               while (curr != NULL) {
5281                 next = curr->next;
5282                 if (curr->data.choice == SEQFEAT_GENE) {
5283                   *last = next;
5284                   curr->next = NULL;
5285                   SeqFeatXrefFree (curr);
5286                 } else {
5287                   last = &(curr->next);
5288                 }
5289                 curr = next;
5290               }
5291             }
5292           }
5293         }
5294       }
5295     }
5296     sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
5297   }
5298 }
5299 
StripBadTitleFromProteinProducts(BioseqPtr bsp,Pointer userdata)5300 static void StripBadTitleFromProteinProducts (BioseqPtr bsp, Pointer userdata)
5301 
5302 {
5303   BioseqSetPtr       bssp;
5304   CharPtr            buf;
5305   size_t             buflen = 1001;
5306   SeqMgrDescContext  dcontext;
5307   MolInfoPtr         mip;
5308   ObjValNodePtr      ovp;
5309   SeqDescrPtr        sdp;
5310   SeqIdPtr           sip;
5311   Uint1              tech;
5312   CharPtr            title;
5313   ValNodePtr         vnp;
5314 
5315   if (bsp == NULL) return;
5316   if (! ISA_aa (bsp->mol)) return;
5317   for (sip = bsp->id; sip != NULL; sip = sip->next) {
5318     if (sip->choice == SEQID_OTHER) return;
5319   }
5320   vnp = BioseqGetSeqDescr (bsp, Seq_descr_title, NULL);
5321   if (vnp == NULL) return;
5322 
5323   if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
5324     bssp = (BioseqSetPtr) bsp->idx.parentptr;
5325     while (bssp != NULL && bssp->_class != BioseqseqSet_class_nuc_prot) {
5326       if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
5327         bssp = (BioseqSetPtr) bssp->idx.parentptr;
5328       } else {
5329         bssp = NULL;
5330       }
5331     }
5332     if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
5333       title = (CharPtr) vnp->data.ptrvalue;
5334       tech = 0;
5335       sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
5336       if (sdp != NULL) {
5337         mip = (MolInfoPtr) sdp->data.ptrvalue;
5338         if (mip != NULL) {
5339           tech = mip->tech;
5340         }
5341       }
5342       buf = MemNew (sizeof (Char) * (buflen + 1));
5343       if (buf != NULL && NewCreateDefLineBuf (NULL, bsp, buf, buflen, TRUE, FALSE)) {
5344         if (StringICmp (buf, title) != 0) {
5345           if (vnp->extended != 0) {
5346             ovp = (ObjValNodePtr) vnp;
5347             ovp->idx.deleteme = TRUE;
5348           }
5349         }
5350       }
5351       MemFree (buf);
5352     }
5353   }
5354 }
5355 
MarkBadProtTitlesInNucProts(SeqEntryPtr sep)5356 static void MarkBadProtTitlesInNucProts (SeqEntryPtr sep)
5357 
5358 {
5359   BioseqSetPtr  bssp;
5360 
5361   if (sep == NULL) return;
5362   if (! IS_Bioseq_set (sep)) return;
5363   bssp = (BioseqSetPtr) sep->data.ptrvalue;
5364   if (bssp == NULL) return;
5365   if (bssp->_class == BioseqseqSet_class_genbank ||
5366       (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
5367       bssp->_class == BioseqseqSet_class_wgs_set ||
5368       bssp->_class == BioseqseqSet_class_small_genome_set) {
5369     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5370       /* StripTitleFromProtsInNucProts (sep); */
5371       MarkBadProtTitlesInNucProts (sep);
5372     }
5373     return;
5374   }
5375   if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
5376   VisitBioseqsInSep (sep, NULL, StripBadTitleFromProteinProducts);
5377 }
5378 
MakeNcbiCleanupObject(SeqEntryPtr sep,Boolean gpipeMode)5379 static void MakeNcbiCleanupObject (SeqEntryPtr sep, Boolean gpipeMode)
5380 
5381 {
5382   DatePtr        dp;
5383   ValNodePtr     sdp;
5384   UserObjectPtr  uop;
5385 
5386   dp = DateCurr ();
5387   if (dp == NULL) return;
5388 
5389   uop = CreateNcbiCleanupUserObject ();
5390   if (uop == NULL) return;
5391 
5392   if (gpipeMode) {
5393 //LCOV_EXCL_START
5394     // GPIPE doesn't use C Toolkit
5395     AddStringToNcbiCleanupUserObject (uop, "method", "GpipeSeqEntryCleanup");
5396 //LCOV_EXCL_STOP
5397   } else {
5398     AddStringToNcbiCleanupUserObject (uop, "method", "SeriousSeqEntryCleanup");
5399   }
5400   AddIntegerToNcbiCleanupUserObject (uop, "version", NCBI_CLEANUP_VERSION);
5401 
5402   AddIntegerToNcbiCleanupUserObject (uop, "month", dp->data [2]);
5403   AddIntegerToNcbiCleanupUserObject (uop, "day", dp->data [3]);
5404   AddIntegerToNcbiCleanupUserObject (uop, "year", dp->data [1] + 1900);
5405 
5406   DateFree (dp);
5407 
5408   sdp = NewDescrOnSeqEntry (sep, Seq_descr_user);
5409   if (sdp == NULL) return;
5410   sdp->data.ptrvalue = uop;
5411 }
5412 
5413 typedef struct ssecpseudo {
5414   CharPtr  genepseudo;
5415   CharPtr  pseudogene;
5416   Boolean inconsistent;
5417 } SsecPseudoData, PNTR SsecPseudoPtr;
5418 
SsecTestPseudoProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5419 static Boolean LIBCALLBACK SsecTestPseudoProc (
5420   SeqFeatPtr sfp,
5421   SeqMgrFeatContextPtr context
5422 )
5423 
5424 
5425 {
5426   SsecPseudoPtr  bpp;
5427   GBQualPtr      gbq;
5428 
5429   if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || context == NULL) return TRUE;
5430   bpp = context->userdata;
5431   if (bpp == NULL) return TRUE;
5432 
5433   if (! sfp->pseudo) return TRUE;
5434 
5435   for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
5436     if (StringCmp (gbq->qual, "pseudogene") != 0) continue;
5437     if (bpp->pseudogene == NULL) {
5438       bpp->pseudogene = gbq->val;
5439     } else if (StringCmp (gbq->val, bpp->pseudogene) != 0) {
5440       bpp->inconsistent = TRUE;
5441     }
5442   }
5443 
5444   return TRUE;
5445 }
5446 
SsecSetPseudoProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5447 static Boolean LIBCALLBACK SsecSetPseudoProc (
5448   SeqFeatPtr sfp,
5449   SeqMgrFeatContextPtr context
5450 )
5451 
5452 
5453 {
5454   SsecPseudoPtr  bpp;
5455   GBQualPtr      gbq;
5456 
5457   if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || context == NULL) return TRUE;
5458   bpp = context->userdata;
5459   if (bpp == NULL) return TRUE;
5460 
5461   if (! sfp->pseudo) return TRUE;
5462 
5463   gbq = GBQualNew ();
5464   if (gbq == NULL) return TRUE;
5465 
5466   gbq->qual = StringSave ("pseudogene");
5467   gbq->val = StringSave (bpp->pseudogene);
5468 
5469   gbq->next = sfp->qual;
5470   sfp->qual = gbq;
5471 
5472   return TRUE;
5473 }
5474 
SynchronizePseudogenesProc(SeqFeatPtr sfp,Pointer userdata)5475 static void SynchronizePseudogenesProc (
5476   SeqFeatPtr sfp,
5477   Pointer userdata
5478 )
5479 
5480 {
5481   SsecPseudoData  bpd;
5482   BioseqPtr       bsp;
5483   GBQualPtr       gbq;
5484 
5485   if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
5486   if (! sfp->pseudo) return;
5487 
5488   MemSet ((Pointer) &bpd, 0, sizeof (SsecPseudoData));
5489 
5490   /* get pseudogene value from gene feature */
5491 
5492   for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
5493     if (StringCmp (gbq->qual, "pseudogene") != 0) continue;
5494     if (bpd.genepseudo == NULL) {
5495       bpd.genepseudo = gbq->val;
5496     } else if (StringCmp (gbq->val, bpd.genepseudo) != 0) {
5497       bpd.inconsistent = TRUE;
5498     }
5499   }
5500 
5501   if (bpd.inconsistent) return;
5502 
5503   bsp = BioseqFindFromSeqLoc (sfp->location);
5504   if (bsp == NULL) return;
5505 
5506   /* check pseudogene values from underlying features */
5507 
5508   SeqMgrExploreFeatures (bsp, (Pointer) &bpd, SsecTestPseudoProc, sfp->location, NULL, NULL);
5509 
5510   if (bpd.inconsistent) return;
5511 
5512   if (bpd.genepseudo == NULL && bpd.pseudogene == NULL) return;
5513 
5514   if (bpd.pseudogene == NULL) {
5515     bpd.pseudogene = bpd.genepseudo;
5516   } else if (bpd.genepseudo != NULL && StringCmp (bpd.pseudogene, bpd.genepseudo) != 0) {
5517     return;
5518   }
5519 
5520   gbq = GBQualNew ();
5521   if (gbq == NULL) return;
5522 
5523   gbq->qual = StringSave ("pseudogene");
5524   gbq->val = StringSave (bpd.pseudogene);
5525 
5526   gbq->next = sfp->qual;
5527   sfp->qual = gbq;
5528 
5529   SeqMgrExploreFeatures (bsp, (Pointer) &bpd, SsecSetPseudoProc, sfp->location, NULL, NULL);
5530 }
5531 
5532 typedef struct genexrefskew {
5533   Int4  gene_locus;
5534   Int4  gene_locus_tag;
5535   Int4  xref_locus;
5536   Int4  xref_locus_tag;
5537 } GeneSkew, PNTR GeneSkewPtr;
5538 
FindGeneXrefMismatch(SeqFeatPtr sfp,Pointer userdata)5539 static void FindGeneXrefMismatch (
5540   SeqFeatPtr sfp,
5541   Pointer userdata
5542 )
5543 
5544 {
5545   GeneRefPtr      grp;
5546   GeneSkewPtr     gsp;
5547   SeqFeatXrefPtr  xref;
5548 
5549   if (sfp == NULL) return;
5550   gsp = (GeneSkewPtr) userdata;
5551   if (gsp == NULL) return;
5552 
5553   if (sfp->data.choice == SEQFEAT_GENE) {
5554     grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5555     if (grp != NULL) {
5556       if (StringDoesHaveText (grp->locus)) {
5557         (gsp->gene_locus)++;
5558       }
5559       if (StringDoesHaveText (grp->locus_tag)) {
5560         (gsp->gene_locus_tag)++;
5561       }
5562     }
5563   }
5564 
5565   for (xref = sfp->xref; xref != NULL; xref = xref->next) {
5566     if (xref->data.choice != SEQFEAT_GENE) continue;
5567     grp = (GeneRefPtr) xref->data.value.ptrvalue;
5568     if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) continue;
5569       if (StringDoesHaveText (grp->locus)) {
5570         (gsp->xref_locus)++;
5571       }
5572       if (StringDoesHaveText (grp->locus_tag)) {
5573         (gsp->xref_locus_tag)++;
5574       }
5575   }
5576 }
5577 
ForceGeneXrefToLocus(SeqFeatPtr sfp,Pointer userdata)5578 static void ForceGeneXrefToLocus (
5579   SeqFeatPtr sfp,
5580   Pointer userdata
5581 )
5582 
5583 {
5584   GeneRefPtr      grp;
5585   SeqFeatXrefPtr  xref;
5586 
5587   if (sfp == NULL) return;
5588 
5589   for (xref = sfp->xref; xref != NULL; xref = xref->next) {
5590     if (xref->data.choice != SEQFEAT_GENE) continue;
5591     grp = (GeneRefPtr) xref->data.value.ptrvalue;
5592     if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) continue;
5593     if (StringDoesHaveText (grp->locus_tag) && grp->locus == NULL) {
5594       grp->locus = grp->locus_tag;
5595       grp->locus_tag = NULL;
5596     }
5597   }
5598 }
5599 
ForceGeneXrefToLocusTag(SeqFeatPtr sfp,Pointer userdata)5600 static void ForceGeneXrefToLocusTag (
5601   SeqFeatPtr sfp,
5602   Pointer userdata
5603 )
5604 
5605 {
5606   GeneRefPtr      grp;
5607   SeqFeatXrefPtr  xref;
5608 
5609   if (sfp == NULL) return;
5610 
5611   for (xref = sfp->xref; xref != NULL; xref = xref->next) {
5612     if (xref->data.choice != SEQFEAT_GENE) continue;
5613     grp = (GeneRefPtr) xref->data.value.ptrvalue;
5614     if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) continue;
5615     if (StringDoesHaveText (grp->locus) && grp->locus_tag == NULL) {
5616       grp->locus_tag = grp->locus;
5617       grp->locus = NULL;
5618     }
5619   }
5620 }
5621 
FixGeneXrefSkew(SeqEntryPtr sep)5622 static void FixGeneXrefSkew (
5623   SeqEntryPtr sep
5624 )
5625 
5626 {
5627   GeneSkew  gs;
5628 
5629   if (sep == NULL) return;
5630 
5631   MemSet ((Pointer) &gs, 0, sizeof (GeneSkew));
5632   VisitFeaturesInSep (sep, (Pointer) &gs, FindGeneXrefMismatch);
5633   if (gs.gene_locus == 0 && gs.gene_locus_tag > 0) {
5634     if (gs.xref_locus > 0 && gs.xref_locus_tag == 0) {
5635       VisitFeaturesInSep (sep, NULL, ForceGeneXrefToLocusTag);
5636     }
5637   } else if (gs.gene_locus > 0 && gs.gene_locus_tag == 0) {
5638     if (gs.xref_locus == 0 && gs.xref_locus_tag > 0) {
5639       VisitFeaturesInSep (sep, NULL, ForceGeneXrefToLocus);
5640     }
5641   }
5642 }
5643 
FindPubWithFig(SeqDescPtr sdp,Pointer userdata)5644 static void FindPubWithFig(SeqDescPtr sdp, Pointer userdata)
5645 {
5646     PubdescPtr pdp;
5647     Int4 a = 0;
5648 
5649     if (sdp->choice == Seq_descr_pub) {
5650         pdp = (PubdescPtr)sdp->data.ptrvalue;
5651         if (pdp->fig != NULL) {
5652             a = 1;
5653         }
5654     }
5655 }
5656 
SeriousSeqEntryCleanupEx(SeqEntryPtr sep,SeqEntryFunc taxfun,SeqEntryFunc taxmerge,Boolean doPseudo,Boolean gpipeMode)5657 static void SeriousSeqEntryCleanupEx (SeqEntryPtr sep, SeqEntryFunc taxfun, SeqEntryFunc taxmerge, Boolean doPseudo, Boolean gpipeMode)
5658 
5659 {
5660   BioSourcePtr    biop;
5661   BioseqSetPtr    bssp;
5662   Int2            code;
5663   CharPtr         codes;
5664   Uint2           entityID;
5665   GeneticCodePtr  gncp;
5666   Boolean         hasMarkedGenes = FALSE;
5667   Boolean         isEmblOrDdbj = FALSE;
5668   ErrSev          lsev;
5669   ErrSev          msev;
5670   Int4            muid = 0;
5671   Boolean         objMgrFilter [OBJ_MAX];
5672   SeqEntryPtr     oldscope;
5673   Boolean         lclGnlOnly = TRUE;
5674   SeqEntryPtr     tmp;
5675   ValNodePtr      vnp;
5676 
5677   if (sep == NULL) return;
5678   oldscope = SeqEntrySetScope (sep);
5679   msev = ErrSetMessageLevel (SEV_MAX);
5680   lsev = ErrSetLogLevel (SEV_MAX);
5681   entityID = SeqMgrGetEntityIDForSeqEntry (sep);
5682   /* clear indexes, since CleanupEmptyFeatCallback removes genes, etc. */
5683   SeqMgrClearFeatureIndexes (entityID, NULL);
5684   RemoveAllNcbiCleanupUserObjects (sep);
5685   RemoveDuplicateNestedSetsForEntityIDNoUpdate (entityID);
5686   SeqMgrClearFeatureIndexes (entityID, NULL);
5687   if (IS_Bioseq_set (sep)) {
5688     bssp = (BioseqSetPtr) sep->data.ptrvalue;
5689     if (bssp != NULL && bssp->_class == BioseqseqSet_class_genbank) {
5690       tmp = bssp->seq_set;
5691       if (tmp != NULL && tmp->next == NULL && (IS_Bioseq (tmp))) {
5692         /* coerce genbank set on top of single sequence to nuc-prot set for unnecessary set removal */
5693         bssp->_class = BioseqseqSet_class_nuc_prot;
5694         RenormalizeNucProtSets (sep, TRUE);
5695       }
5696     }
5697   }
5698   MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
5699   objMgrFilter [OBJ_SEQFEAT] = TRUE;
5700   GatherObjectsInEntity (entityID, 0, NULL, MarkMovedGeneGbquals, (Pointer) &hasMarkedGenes, objMgrFilter);
5701   BasicSeqEntryCleanup (sep);
5702   SeqEntryExplore (sep, NULL, CleanupGenbankCallback);
5703   ConvertFullLenSourceFeatToDesc (sep);
5704   ConvertFullLenPubFeatToDesc (sep);
5705   SeqEntryExplore (sep, NULL, CleanupEmptyFeatCallback);
5706   SeqEntryExplore (sep, NULL, MergeAdjacentAnnotsCallback);
5707   /* reindex, since PseudoGeneOverlap gets best overlapping gene */
5708   SeqMgrIndexFeatures (entityID, NULL);
5709   EntryChangeImpFeat(sep);     /* change any CDS ImpFeat to real CdRegion */
5710   /* MoveRnaGBQualProductToName (sep); */ /* move rna gbqual product to rna-ref.ext.name */
5711   /* MoveProtGBQualProductToName (sep); */ /* move prot gbqual product to prot-ref.name */
5712   /* MoveCdsGBQualProductToName (sep); */ /* move cds gbqual product to prot-ref.name */
5713   /* MoveFeatGBQualsToFields (sep); */ /* move feature partial, exception to fields */
5714   /* ExtendGeneFeatIfOnMRNA (0, sep); */ /* gene on mRNA is full length */
5715 
5716   SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
5717   VisitBioseqsInSep (sep, (Pointer) &lclGnlOnly, CheckForLclGnlOnly);
5718   VisitBioseqsInSep (sep, NULL, ExtendSingleGeneOnMRNA);
5719 
5720   RemoveBioSourceOnPopSet (sep, NULL);
5721   RemoveMolInfoOnPopSet (sep, NULL);
5722   /*
5723   SeqEntryExplore (sep, NULL, DeleteMultipleTitles);
5724   */
5725   SeqEntryExplore (sep, NULL, RemoveMultipleTitles);
5726   SeqEntryExplore (sep, NULL, MergeMultipleDates);
5727   VisitPubdescsInSep (sep, (Pointer) &muid, LookForUniqueMuid);
5728   if (lclGnlOnly) {
5729     VisitDescriptorsInSep (sep, (Pointer) &muid, RemoveBadPubDescrStrict);
5730     VisitFeaturesInSep (sep, (Pointer) &muid, RemoveBadPubFeatStrict);
5731   } else {
5732     VisitDescriptorsInSep (sep, (Pointer) &muid, RemoveBadPubDescr);
5733     VisitFeaturesInSep (sep, (Pointer) &muid, RemoveBadPubFeat);
5734   }
5735   SeqEntryExplore (sep, (Pointer) &muid, MergePubFigInChain);
5736   VisitFeaturesInSep (sep, NULL, CorrectSfpExceptText);
5737   if (! isEmblOrDdbj) {
5738     SeqEntryExplore (sep, NULL, MergeEquivCitSubs);
5739   }
5740   VisitDescriptorsInSep (sep, NULL, RemoveEmptyUserObjects);
5741   DeleteMarkedObjects(0, OBJ_SEQENTRY, (Pointer)sep);
5742   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5743   EntryMergeDupBioSources (sep); /* do before and after SE2A3 */
5744   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5745   LoopSeqEntryToAsn3(sep, TRUE, FALSE, taxfun, taxmerge, gpipeMode, isEmblOrDdbj);
5746   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5747   /* EntryStripSerialNumber(sep); */ /* strip citation serial numbers */
5748   MovePopPhyMutPubs (sep);
5749   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5750   EntryChangeGBSource(sep);   /* at least remove redundant information in GBBlocks */
5751   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5752   EntryCheckGBBlock(sep);
5753   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5754   SeqEntryMoveDbxrefs(sep); /* db_xref gbqual to sfp->dbxref */
5755   VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5756   EntryMergeDupBioSources(sep);
5757   SeqEntryExplore (sep, NULL, GetRidOfEmptyFeatsDescCallback);
5758   CleanUpPseudoProductsEx (entityID, sep, doPseudo);
5759   RenormalizeNucProtSets (sep, TRUE);
5760   /*
5761   StripTitleFromProtsInNucProts (sep);
5762   */
5763   MarkBadProtTitlesInNucProts (sep);
5764   MoveFeatsFromPartsSet (sep);
5765   move_cds_ex (sep, doPseudo);
5766   SeqEntryExplore (sep, NULL, MolInfoUpdate);
5767   DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
5768   /* do these again, since SE2A3 can create full length source feature */
5769   SeqEntryExplore (sep, NULL, CleanupGenbankCallback);
5770   ConvertFullLenSourceFeatToDesc (sep);
5771   ConvertFullLenPubFeatToDesc (sep);
5772   SeqEntryExplore (sep, NULL, CleanupEmptyFeatCallback);
5773   SeqEntryExplore (sep, NULL, MergeAdjacentAnnotsCallback);
5774   /* VisitBioseqsInSep (sep, NULL, BarCodeTechToKeyword); */
5775 
5776   /* tbl2asn now calls processes EC numbers with reporting before SSEC */
5777   UpdateReplacedECNumbersEx (sep, NULL, NULL, TRUE, FALSE);
5778 
5779   /*
5780    if (GetAppProperty ("NcbiTbl2Asn") != NULL) {
5781     DeleteBadECNumbers (sep);
5782   }
5783   */
5784 
5785   /* reindex, since CdEndCheck (from CdCheck) gets best overlapping gene */
5786   SeqMgrIndexFeatures (entityID, NULL);
5787   biop = NULL;
5788   if (VisitBioSourcesInSep (sep, (Pointer) &biop, FindSingleBioSource) == 1) {
5789     code = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
5790     gncp = GeneticCodeFind (code, NULL);
5791     if (gncp == NULL) {
5792       gncp = GeneticCodeFind (1, NULL);
5793     }
5794     if (gncp != NULL) {
5795       codes = NULL;
5796       for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
5797         if (vnp->choice == 3) {
5798           codes = (CharPtr) vnp->data.ptrvalue;
5799         }
5800       }
5801       if (codes != NULL) {
5802         VisitFeaturesInSep (sep, (Pointer) codes, CorrectTrnaCodons);
5803       }
5804     }
5805   }
5806   VisitBioSourcesInSep (sep, NULL, CleanupOldName);
5807   VisitBioSourcesInSep (sep, NULL, CleanupOrgModNote);
5808   CdCheck (sep, NULL);
5809   /* do again to catch occasional duplicate pub on some set components */
5810   SeqEntryPubsAsn4 (sep, isEmblOrDdbj);
5811   SeqMgrIndexFeatures (entityID, NULL);
5812   if (hasMarkedGenes) {
5813     //LCOV_EXCL_START
5814     //This is never called; basic cleanup takes care of converting
5815     // gene quals to gene xrefs
5816     MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
5817     objMgrFilter [OBJ_SEQFEAT] = TRUE;
5818     GatherObjectsInEntity (entityID, 0, NULL, DeleteBadMarkedGeneXrefs, NULL, objMgrFilter);
5819     //LCOV_EXCL_STOP
5820   }
5821   if (! gpipeMode) {
5822     if (! isEmblOrDdbj) {   /* for now leave gene xrefs on EMBL and DDBJ */
5823       VisitBioseqsInSep (sep, (Pointer) &isEmblOrDdbj, RemoveUnneededGeneXrefs);
5824     }
5825   }
5826   ResynchCodingRegionPartials (sep);
5827   ResynchMessengerRNAPartials (sep);
5828   ResynchProteinPartials (sep);
5829   InstantiateProteinTitles (entityID, NULL);
5830   if (isEmblOrDdbj) {
5831     RemoveStructuredCommentKeywords (entityID);
5832   } else {
5833     RemoveAllStructuredCommentKeywords (entityID);
5834   }
5835   AddStructuredCommentKeywords (entityID);
5836   SeqMgrClearFeatureIndexes (entityID, NULL);
5837 
5838   SeqEntryExplore (sep, NULL, RemoveIdenticalPubs);
5839   DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
5840 
5841   FixGeneXrefSkew (sep);
5842 
5843   MakeNcbiCleanupObject (sep, gpipeMode);
5844   /*
5845   SeqMgrIndexFeatures (entityID, NULL);
5846   VisitFeaturesInSep (sep, NULL, SynchronizePseudogenesProc);
5847   */
5848   BasicSeqEntryCleanup (sep);
5849   if (! gpipeMode) {
5850     NormalizeDescriptorOrder (sep);
5851   }
5852   TransTableFreeAll ();
5853   ErrSetMessageLevel (msev);
5854   ErrSetLogLevel (lsev);
5855   SeqEntrySetScope (oldscope);
5856 }
5857 
SeriousSeqEntryCleanup(SeqEntryPtr sep,SeqEntryFunc taxfun,SeqEntryFunc taxmerge)5858 extern void SeriousSeqEntryCleanup (SeqEntryPtr sep, SeqEntryFunc taxfun, SeqEntryFunc taxmerge)
5859 
5860 {
5861   SeriousSeqEntryCleanupEx (sep, taxfun, taxmerge, TRUE, FALSE);
5862 }
5863 
5864 //LCOV_EXCL_START
SeriousSeqEntryCleanupBulk(SeqEntryPtr sep)5865 extern void SeriousSeqEntryCleanupBulk (SeqEntryPtr sep)
5866 
5867 {
5868   SeriousSeqEntryCleanupEx (sep, NULL, NULL, FALSE, FALSE);
5869 }
5870 
5871 // GPIPE doesn't use cleanasn
GpipeSeqEntryCleanup(SeqEntryPtr sep)5872 extern void GpipeSeqEntryCleanup (SeqEntryPtr sep)
5873 
5874 {
5875   SeriousSeqEntryCleanupEx (sep, NULL, NULL, TRUE, TRUE);
5876 }
5877 
5878 // used by ExtendedSeqEntryCleanup (not used)
5879 typedef struct dummysmfedata {
5880   Int4        max;
5881   Int4        num_at_max;
5882   Int4        num_trans_spliced;
5883   Boolean     equivalent_genes;
5884   GeneRefPtr  grp_at_max;
5885 } DummySmfeData, PNTR DummySmfePtr;
5886 
DummySMFEProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5887 static Boolean LIBCALLBACK DummySMFEProc (
5888   SeqFeatPtr sfp,
5889   SeqMgrFeatContextPtr context
5890 )
5891 
5892 
5893 {
5894   DummySmfePtr  dsp;
5895   GeneRefPtr    grp, grpx;
5896   Int4          len;
5897   Boolean       redundantgenexref = FALSE;
5898   CharPtr       syn1, syn2;
5899 
5900   if (sfp == NULL || context == NULL) return TRUE;
5901   dsp = context->userdata;
5902   if (dsp == NULL) return TRUE;
5903   if (sfp->data.choice != SEQFEAT_GENE) return TRUE;
5904   grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5905   if (grp == NULL) return TRUE;
5906 
5907   len = SeqLocLen (sfp->location);
5908   if (len < dsp->max) {
5909     dsp->max = len;
5910     dsp->num_at_max = 1;
5911     dsp->num_trans_spliced = 0;
5912     if (StringISearch (sfp->except_text, "trans-splicing") != NULL) {
5913       (dsp->num_trans_spliced)++;
5914     }
5915     dsp->equivalent_genes = FALSE;
5916     dsp->grp_at_max = grp;
5917   } else if (len == dsp->max) {
5918     (dsp->num_at_max)++;
5919     if (StringISearch (sfp->except_text, "trans-splicing") != NULL) {
5920       (dsp->num_trans_spliced)++;
5921     }
5922     grpx = dsp->grp_at_max;
5923     if (grpx != NULL) {
5924       redundantgenexref = FALSE;
5925       if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
5926         if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
5927           redundantgenexref = TRUE;
5928         }
5929       } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
5930         if (StringICmp (grp->locus, grpx->locus) == 0) {
5931           redundantgenexref = TRUE;
5932         }
5933       } else if (grp->syn != NULL && grpx->syn != NULL) {
5934         syn1 = (CharPtr) grp->syn->data.ptrvalue;
5935         syn2 = (CharPtr) grpx->syn->data.ptrvalue;
5936         if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
5937           if (StringICmp (syn1, syn2) == 0) {
5938             redundantgenexref = TRUE;
5939           }
5940         }
5941       }
5942     }
5943     if (redundantgenexref) {
5944       dsp->equivalent_genes = TRUE;
5945     }
5946   }
5947 
5948   return TRUE;
5949 }
5950 
5951 // used by ExtendedSeqEntryCleanup (not used)
AddMissingGeneXref(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5952 static Boolean LIBCALLBACK AddMissingGeneXref (SeqFeatPtr sfp, SeqMgrFeatContextPtr context)
5953 
5954 {
5955   Int2               count;
5956   DummySmfeData      dsd;
5957   SeqMgrFeatContext  gcontext;
5958   SeqFeatPtr         gene;
5959   GeneRefPtr         grp;
5960   CharPtr            locus, locus_tag;
5961   SeqEntryPtr        sep, oldscope;
5962   Boolean            smallGenomeSet;
5963   BoolPtr            smallGenomeSetP;
5964   SeqFeatXrefPtr     xref;
5965 
5966   if (sfp == NULL || context == NULL) return TRUE;
5967   smallGenomeSetP = (BoolPtr) context->userdata;
5968   if (smallGenomeSetP == NULL) return FALSE;
5969   smallGenomeSet = *smallGenomeSetP;
5970 
5971   if (sfp->idx.subtype == FEATDEF_GENE ||
5972       sfp->idx.subtype == FEATDEF_operon ||
5973       sfp->idx.subtype == FEATDEF_gap ||
5974       sfp->idx.subtype == FEATDEF_repeat_region ||
5975       sfp->idx.subtype == FEATDEF_mobile_element ||
5976       sfp->idx.subtype == FEATDEF_centromere ||
5977       sfp->idx.subtype == FEATDEF_primer_bind ||
5978       sfp->idx.subtype == FEATDEF_telomere) return TRUE;
5979 
5980   grp = SeqMgrGetGeneXref (sfp);
5981   if (grp != NULL) return TRUE;
5982 
5983   MemSet ((Pointer) &dsd, 0, sizeof (DummySmfeData));
5984   dsd.max = INT4_MAX;
5985   dsd.num_at_max = 0;
5986   dsd.num_trans_spliced = 0;
5987   dsd.equivalent_genes = FALSE;
5988   dsd.grp_at_max = NULL;
5989   count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0,
5990                                            LOCATION_SUBSET, (Pointer) &dsd, DummySMFEProc);
5991 
5992   if (dsd.num_at_max <= 1) return TRUE;
5993   if (smallGenomeSet && dsd.num_at_max == dsd.num_trans_spliced) return TRUE;
5994 
5995   sep = GetTopSeqEntryForEntityID (sfp->idx.entityID);
5996   oldscope = SeqEntrySetScope (sep);
5997   gene = SeqMgrGetOverlappingFeatureEx (sfp->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, &gcontext, TRUE);
5998   SeqEntrySetScope (oldscope);
5999   if (gene == NULL) return TRUE;
6000 
6001   grp = (GeneRefPtr) gene->data.value.ptrvalue;
6002   if (grp == NULL) return TRUE;
6003   locus = grp->locus;
6004   locus_tag = grp->locus_tag;
6005   if (StringHasNoText (locus) && StringHasNoText (locus_tag)) return TRUE;
6006 
6007   grp = GeneRefNew ();
6008   if (grp == NULL) return TRUE;
6009   grp->locus = StringSaveNoNull (locus);
6010   grp->locus_tag = StringSaveNoNull (locus_tag);
6011   xref = SeqFeatXrefNew ();
6012   if (xref == NULL) return TRUE;
6013   xref->data.choice = SEQFEAT_GENE;
6014   xref->data.value.ptrvalue = (Pointer) grp;
6015   xref->next = sfp->xref;
6016   sfp->xref = xref;
6017 
6018   return TRUE;
6019 }
6020 
6021 // used by ExtendedSeqEntryCleanup (not used)
BspMissingGeneRef(BioseqPtr bsp,Pointer userdata)6022 static void BspMissingGeneRef (BioseqPtr bsp, Pointer userdata)
6023 
6024 {
6025   if (bsp == NULL || ISA_aa (bsp->mol)) return;
6026 
6027   SeqMgrExploreFeatures (bsp, userdata, AddMissingGeneXref, NULL, NULL, NULL);
6028 }
6029 
6030 // used by ExtendedSeqEntryCleanup (not used)
IsSmallGenomeSet(BioseqSetPtr bssp,Pointer userdata)6031 static void IsSmallGenomeSet (
6032   BioseqSetPtr bssp,
6033   Pointer userdata
6034 )
6035 
6036 {
6037   BoolPtr  smallGenomeSetP;
6038 
6039   if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return;
6040   smallGenomeSetP = (BoolPtr) userdata;
6041   if (smallGenomeSetP == NULL) return;
6042   *smallGenomeSetP = TRUE;
6043 }
6044 
6045 // used by ExtendedSeqEntryCleanup (not used)
FixSeqStrandOther(SeqFeatPtr sfp,Pointer userdata)6046 static void FixSeqStrandOther (
6047   SeqFeatPtr sfp,
6048   Pointer userdata
6049 )
6050 
6051 {
6052   SeqIntPtr  sintp;
6053   SeqLocPtr  slp;
6054   SeqPntPtr  spp;
6055 
6056   if (sfp == NULL || sfp->location == NULL) return;
6057 
6058   /*
6059   if (sfp->data.choice != SEQFEAT_GENE &&
6060       sfp->data.choice != SEQFEAT_CDREGION &&
6061       sfp->data.choice != SEQFEAT_PROT &&
6062       sfp->data.choice != SEQFEAT_RNA) return;
6063   */
6064 
6065   slp = SeqLocFindNext (sfp->location, NULL);
6066   while (slp != NULL) {
6067     switch (slp->choice) {
6068       case SEQLOC_INT:
6069         sintp = (SeqIntPtr) slp->data.ptrvalue;
6070         if (sintp != NULL && sintp->strand == Seq_strand_other) {
6071           sintp->strand = Seq_strand_plus;
6072         }
6073         break;
6074       case SEQLOC_PNT:
6075         spp = (SeqPntPtr) slp->data.ptrvalue;
6076         if (spp != NULL && spp->strand == Seq_strand_other) {
6077           spp->strand = Seq_strand_plus;
6078         }
6079         break;
6080       default:
6081         break;
6082     }
6083     slp = SeqLocFindNext (sfp->location, slp);
6084   }
6085 }
6086 
6087 // option not used
ExtendedSeqEntryCleanup(SeqEntryPtr sep)6088 extern void ExtendedSeqEntryCleanup (SeqEntryPtr sep)
6089 
6090 {
6091   Uint2    entityID;
6092   Boolean  smallGenomeSet = FALSE;
6093 
6094   if (sep == NULL) return;
6095   SeriousSeqEntryCleanupEx (sep, NULL, NULL, TRUE, FALSE);
6096 
6097   VisitSetsInSep (sep, (Pointer) &smallGenomeSet, IsSmallGenomeSet);
6098 
6099   entityID = ObjMgrGetEntityIDForChoice (sep);
6100 
6101   VisitFeaturesInSep (sep, NULL, FixSeqStrandOther);
6102 
6103   SeqMgrIndexFeatures (entityID, NULL);
6104   VisitBioseqsInSep (sep, (Pointer) &smallGenomeSet, BspMissingGeneRef);
6105 
6106   SeqMgrClearFeatureIndexes (entityID, NULL);
6107 }
6108 
SeriousSeqAnnotCleanup(SeqAnnotPtr sap)6109 extern void SeriousSeqAnnotCleanup (SeqAnnotPtr sap)
6110 
6111 {
6112   AnnotDescrPtr  adp;
6113   DatePtr        dp;
6114   AnnotDescrPtr  last;
6115   UserObjectPtr  uop;
6116 
6117   if (sap == NULL) return;
6118 
6119   RemoveAllSeqAnnotCleanupUserObjs (sap);
6120 
6121   BasicSeqAnnotCleanup (sap);
6122 
6123   dp = DateCurr ();
6124   if (dp == NULL) return;
6125 
6126   uop = CreateNcbiCleanupUserObject ();
6127   if (uop == NULL) return;
6128 
6129   AddStringToNcbiCleanupUserObject (uop, "method", "SeriousSeqAnnotCleanup");
6130   AddIntegerToNcbiCleanupUserObject (uop, "version", NCBI_CLEANUP_VERSION);
6131 
6132   AddIntegerToNcbiCleanupUserObject (uop, "month", dp->data [2]);
6133   AddIntegerToNcbiCleanupUserObject (uop, "day", dp->data [3]);
6134   AddIntegerToNcbiCleanupUserObject (uop, "year", dp->data [1] + 1900);
6135 
6136   DateFree (dp);
6137 
6138   adp = AnnotDescrNew (NULL);
6139   if (adp == NULL) return;
6140 
6141   adp->choice = Annot_descr_user;
6142   adp->data.ptrvalue = uop;
6143 
6144   if (sap->desc == NULL) {
6145     sap->desc = adp;
6146   } else {
6147     last = sap->desc;
6148     while (last->next != NULL) {
6149       last = last->next;
6150     }
6151     last->next = adp;
6152   }
6153 }
6154 
6155 
6156 /* ConvertSegSetToDeltaSeq section */
6157 // Only for SegSets
FindSegSet(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6158 static void FindSegSet (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6159 
6160 {
6161   BioseqSetPtr      bssp;
6162   SeqEntryPtr PNTR  sepp;
6163 
6164   if (sep == NULL) return;
6165   if (! IS_Bioseq_set (sep)) return;
6166   bssp = (BioseqSetPtr) sep->data.ptrvalue;
6167   if (bssp == NULL) return;
6168   if (bssp->_class != BioseqseqSet_class_segset) return;
6169   sepp = (SeqEntryPtr PNTR) mydata;
6170   if (sepp == NULL) return;
6171   *sepp = sep;
6172 }
6173 
MarkSegSets(BioseqSetPtr bssp,Pointer userdata)6174 static void MarkSegSets (BioseqSetPtr bssp, Pointer userdata)
6175 
6176 {
6177   if (bssp == NULL) return;
6178   if (bssp->_class != BioseqseqSet_class_segset) return;
6179   bssp->idx.deleteme = TRUE;
6180 }
6181 
6182 // Only for SegSets
LookForMixedMols(BioseqPtr bsp,Pointer userdata)6183 static void LookForMixedMols (BioseqPtr bsp, Pointer userdata)
6184 
6185 {
6186   Uint1Ptr  molp;
6187 
6188   if (bsp == NULL) return;
6189   molp = (Uint1Ptr) userdata;
6190   if (molp == NULL) return;
6191 
6192   /* Boolean OR bsp->mols within segset to look for mixtures */
6193   *molp |= bsp->mol;
6194 }
6195 
6196 // Only for SegSets
CheckForMissingMolInfo(BioseqPtr bsp,Pointer userdata)6197 static void CheckForMissingMolInfo(BioseqPtr bsp, Pointer userdata)
6198 
6199 {
6200   BoolPtr            bp;
6201   SeqMgrDescContext  context;
6202   SeqDescrPtr        sdp;
6203 
6204   if (bsp == NULL) return;
6205   bp = (BoolPtr) userdata;
6206   if (bp == NULL) return;
6207 
6208   if (bsp->repr == Seq_repr_virtual) return;
6209   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
6210   if (sdp == NULL) {
6211     *bp = TRUE;
6212   }
6213 }
6214 
6215 typedef struct gbdata {
6216   GBBlockPtr  gbp;
6217   CharPtr     source;
6218   CharPtr     origin;
6219   Boolean     mixedsources;
6220   Boolean     mixedorigins;
6221 } GBData, PNTR GBDataPtr;
6222 
6223 // Only for SegSets
FindGenBankDiffs(SeqDescrPtr sdp,Pointer userdata)6224 static void FindGenBankDiffs(
6225   SeqDescrPtr sdp,
6226   Pointer userdata
6227 )
6228 
6229 {
6230   GBDataPtr   gbdp;
6231   GBBlockPtr  gbp;
6232 
6233   if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
6234   gbdp = (GBDataPtr) userdata;
6235   if (gbdp == NULL) return;
6236 
6237   gbp = (GBBlockPtr) sdp->data.ptrvalue;
6238   if (gbp == NULL) return;
6239 
6240   if (gbdp->gbp == NULL) {
6241     gbdp->gbp = gbp;
6242     gbdp->source = gbp->source;
6243     gbdp->origin = gbp->origin;
6244     return;
6245   }
6246 
6247   if (StringICmp (gbp->source, gbdp->source) != 0) {
6248     gbdp->mixedsources = TRUE;
6249   }
6250   if (StringICmp (gbp->origin, gbdp->origin) != 0) {
6251     gbdp->mixedorigins = TRUE;
6252   }
6253 }
6254 
6255 // Only for SegSets
CopyFirstGBBlock(SeqDescrPtr sdp,Pointer userdata)6256 static void CopyFirstGBBlock(
6257   SeqDescrPtr sdp,
6258   Pointer userdata
6259 )
6260 
6261 {
6262   GBBlockPtr       gbp;
6263   GBBlockPtr PNTR  gbpp;
6264 
6265 
6266   if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
6267   gbp = (GBBlockPtr) sdp->data.ptrvalue;
6268   if (gbp == NULL) return;
6269 
6270   gbpp = (GBBlockPtr PNTR) userdata;
6271   if (gbpp == NULL) return;
6272 
6273   if (*gbpp != NULL) return;
6274   *gbpp = (GBBlockPtr) AsnIoMemCopy (gbp, (AsnReadFunc) GBBlockAsnRead, (AsnWriteFunc) GBBlockAsnWrite);
6275 }
6276 
6277 // Only for SegSets
CopyFirstMolInfo(SeqDescrPtr sdp,Pointer userdata)6278 static void CopyFirstMolInfo (
6279   SeqDescrPtr sdp,
6280   Pointer userdata
6281 )
6282 
6283 {
6284   MolInfoPtr       mip;
6285   MolInfoPtr PNTR  mipp;
6286 
6287 
6288   if (sdp == NULL || sdp->choice != Seq_descr_molinfo) return;
6289   mip = (MolInfoPtr) sdp->data.ptrvalue;
6290   if (mip == NULL) return;
6291 
6292   mipp = (MolInfoPtr PNTR) userdata;
6293   if (mipp == NULL) return;
6294 
6295   if (*mipp != NULL) return;
6296   *mipp = (MolInfoPtr) AsnIoMemCopy (mip, (AsnReadFunc) MolInfoAsnRead, (AsnWriteFunc) MolInfoAsnWrite);
6297 }
6298 
6299 // Only for SegSets
CopyFirstTitle(SeqDescrPtr sdp,Pointer userdata)6300 static void CopyFirstTitle (
6301   SeqDescrPtr sdp,
6302   Pointer userdata
6303 )
6304 
6305 {
6306   CharPtr       title;
6307   CharPtr PNTR  titlep;
6308 
6309 
6310   if (sdp == NULL || sdp->choice != Seq_descr_title) return;
6311   title = (CharPtr) sdp->data.ptrvalue;
6312   if (title == NULL) return;
6313 
6314   titlep = (CharPtr PNTR) userdata;
6315   if (titlep == NULL) return;
6316 
6317   if (*titlep != NULL) return;
6318   *titlep = (CharPtr) StringSave (title);
6319 }
6320 
6321 // Only for SegSets
AddPartAccns(BioseqPtr bsp,Pointer userdata)6322 static void AddPartAccns (
6323   BioseqPtr bsp,
6324   Pointer userdata
6325 )
6326 
6327 {
6328   Char        buf [64];
6329   GBBlockPtr  gbp;
6330   SeqIdPtr    sip;
6331 
6332   if (bsp == NULL) return;
6333   gbp = (GBBlockPtr) userdata;
6334   if (gbp == NULL) return;
6335 
6336   if (bsp->repr == Seq_repr_virtual) return;
6337 
6338   sip = SeqIdFindBestAccession (bsp->id);
6339   if (sip == NULL) return;
6340 
6341   SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, sizeof (buf));
6342   if (StringHasNoText (buf)) return;
6343 
6344   ValNodeCopyStr (&(gbp->extra_accessions), 0, buf);
6345 }
6346 
6347 // Only for SegSets
AddPartHist(BioseqPtr bsp,Pointer userdata)6348 static void AddPartHist (
6349   BioseqPtr bsp,
6350   Pointer userdata
6351 )
6352 
6353 {
6354   Char        buf [64];
6355   BioseqPtr   deltabsp;
6356   SeqHistPtr  shp;
6357   SeqIdPtr    sip;
6358 
6359   if (bsp == NULL) return;
6360   deltabsp = (BioseqPtr) userdata;
6361   if (deltabsp == NULL) return;
6362 
6363   if (bsp->repr == Seq_repr_virtual) return;
6364 
6365   sip = SeqIdFindBestAccession (bsp->id);
6366   if (sip == NULL) return;
6367 
6368   SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, sizeof (buf));
6369   if (StringHasNoText (buf)) return;
6370 
6371   shp = ParseStringIntoSeqHist (deltabsp->hist, buf);
6372   if (deltabsp->hist == NULL) {
6373     deltabsp->hist = shp;
6374   }
6375 }
6376 
6377 // Only for SegSets
AddSegToDeltaSeq(SeqLocPtr slp,SeqMgrSegmentContextPtr context)6378 static Boolean LIBCALLBACK AddSegToDeltaSeq (
6379   SeqLocPtr slp,
6380   SeqMgrSegmentContextPtr context
6381 )
6382 
6383 {
6384   CharPtr     bases;
6385   BioseqPtr   bsp;
6386   BioseqPtr   deltabsp;
6387   IntFuzzPtr  ifp;
6388   SeqLocPtr   loc;
6389   SeqIdPtr    sip;
6390   SeqLitPtr   slitp;
6391 
6392   if (slp == NULL || context == NULL) return FALSE;
6393   deltabsp = (BioseqPtr) context->userdata;
6394   if (deltabsp == NULL) return FALSE;
6395 
6396   sip = SeqLocId (slp);
6397   if (sip == NULL) {
6398     loc = SeqLocFindNext (slp, NULL);
6399     if (loc == NULL) return TRUE;
6400     sip = SeqLocId (loc);
6401   }
6402   if (sip == NULL) return TRUE;
6403 
6404   bsp = BioseqFind (sip);
6405   if (bsp == NULL) return TRUE;
6406 
6407   if (bsp->repr == Seq_repr_virtual) {
6408     if (deltabsp->seq_ext != NULL) {
6409       /* insert gap of unknown length (by convention, 100 bases) between the previous segment and this one. */
6410       slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
6411       if (slitp != NULL) {
6412         slitp->length = bsp->length;
6413         if (slitp->length == 100) {
6414           ifp = IntFuzzNew ();
6415           if (ifp != NULL) {
6416             ifp->choice = 4;
6417             slitp->fuzz = ifp;
6418           }
6419         }
6420         ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slitp);
6421         deltabsp->length += slitp->length;
6422       }
6423     }
6424     return TRUE;
6425   }
6426 
6427   bases = GetSequenceByBsp (bsp);
6428   if (bases == NULL) return TRUE;
6429 
6430   slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
6431   if (slitp != NULL) {
6432     slitp->length = StringLen (bases);
6433     ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slitp);
6434     slitp->seq_data = (SeqDataPtr) BSNew (slitp->length);
6435     slitp->seq_data_type = Seq_code_iupacna;
6436     AddBasesToByteStore ((ByteStorePtr) slitp->seq_data, bases);
6437     deltabsp->length += slitp->length;
6438   }
6439 
6440   return TRUE;
6441 }
6442 
6443 // Only for SegSets
MoveAnnotsToDeltaSeq(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6444 static void MoveAnnotsToDeltaSeq (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6445 
6446 {
6447   BioseqPtr     bsp = NULL;
6448   BioseqSetPtr  bssp = NULL;
6449   BioseqPtr     deltabsp;
6450   SeqAnnotPtr   nextsap;
6451   SeqFeatPtr    nextsfp;
6452   Pointer PNTR  prevsap;
6453   Pointer PNTR  prevsfp;
6454   SeqAnnotPtr   sap;
6455   SeqFeatPtr    sfp;
6456 
6457   if (sep == NULL) return;
6458   deltabsp = (BioseqPtr) mydata;
6459   if (deltabsp == NULL) return;
6460 
6461   if (IS_Bioseq (sep)) {
6462     bsp = (BioseqPtr) sep->data.ptrvalue;
6463     if (bsp == NULL) return;
6464     sap = bsp->annot;
6465     prevsap = (Pointer PNTR) &(bsp->annot);
6466   } else if (IS_Bioseq_set (sep)) {
6467     bssp = (BioseqSetPtr) sep->data.ptrvalue;
6468     if (bssp == NULL) return;
6469     sap = bssp->annot;
6470     prevsap = (Pointer PNTR) &(bssp->annot);
6471   } else return;
6472 
6473   while (sap != NULL) {
6474     nextsap = sap->next;
6475     if (sap->type == 1) {
6476       sfp = (SeqFeatPtr) sap->data;
6477       prevsfp = (Pointer PNTR) &(sap->data);
6478       while (sfp != NULL) {
6479         nextsfp = sfp->next;
6480         *(prevsfp) = sfp->next;
6481         sfp->next = NULL;
6482         AddFeatToBioseq (sfp, deltabsp);
6483         sfp = nextsfp;
6484       }
6485     }
6486     /* now keep empty annot if annot_descr present */
6487     if (sap->data == NULL && /* sap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (sap)) {
6488       *(prevsap) = sap->next;
6489       sap->next = NULL;
6490       SeqAnnotFree (sap);
6491     } else {
6492       prevsap = (Pointer PNTR) &(sap->next);
6493     }
6494     sap = nextsap;
6495   }
6496 }
6497 
6498 // Only for SegSets
RptUnitInBaseRange(CharPtr str,Int4Ptr fromP,Int4Ptr toP)6499 static Boolean RptUnitInBaseRange (CharPtr str, Int4Ptr fromP, Int4Ptr toP)
6500 
6501 {
6502   CharPtr   ptr;
6503   Char      tmp [32];
6504   long int  val;
6505 
6506   if (StringLen (str) > 25) return FALSE;
6507   StringNCpy_0 (tmp, str, sizeof (tmp));
6508   ptr = StringStr (tmp, "..");
6509   if (ptr == NULL) return FALSE;
6510   *ptr = '\0';
6511   if (StringHasNoText (tmp)) return FALSE;
6512   if (sscanf (tmp, "%ld", &val) != 1 || val < 1) return FALSE;
6513   if (fromP != NULL) {
6514     *fromP = val - 1;
6515   }
6516   ptr += 2;
6517   if (StringHasNoText (ptr)) return FALSE;
6518   if (sscanf (ptr, "%ld", &val) != 1 || val < 1) return FALSE;
6519   if (toP != NULL) {
6520     *toP = val - 1;
6521   }
6522   return TRUE;
6523 }
6524 
6525 // Only for SegSets
FindFirstLocalBioseq(SeqLocPtr loc)6526 static BioseqPtr FindFirstLocalBioseq (SeqLocPtr loc)
6527 
6528 {
6529   BioseqPtr  bsp;
6530   SeqIdPtr   sip;
6531   SeqLocPtr  slp = NULL;
6532 
6533   if (loc == NULL) return NULL;
6534 
6535   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
6536     sip = SeqLocId (slp);
6537     if (sip != NULL) {
6538       bsp = BioseqFindCore (sip);
6539       if (bsp != NULL) return bsp;
6540     }
6541   }
6542 
6543   return NULL;
6544 }
6545 
6546 // Only for SegSets
MapSegFeatToMaster(SeqFeatPtr sfp,Pointer userdata)6547 static void MapSegFeatToMaster (
6548   SeqFeatPtr sfp,
6549   Pointer userdata
6550 )
6551 
6552 {
6553   BioseqPtr     bsp, ptbsp;
6554   Char          buf [64];
6555   CodeBreakPtr  cbp;
6556   CdRegionPtr   crp;
6557   Int4          from, to;
6558   GBQualPtr     gbq;
6559   Boolean       hasNulls;
6560   Int4          lim;
6561   Boolean       noLeft;
6562   Boolean       noRight;
6563   ValNodePtr    partiallist = NULL, emptypartials = NULL;
6564   RnaRefPtr     rrp;
6565   SeqInt        sint;
6566   SeqIntPtr     sintp;
6567   SeqIdPtr      sip;
6568   SeqLocPtr     slp = NULL;
6569   tRNAPtr       trna;
6570   ValNode       vn;
6571 
6572   if (sfp == NULL || sfp->location == NULL) return;
6573 
6574   bsp = BioseqFindFromSeqLoc (sfp->location);
6575   if (bsp == NULL) return;
6576   if (ISA_aa (bsp->mol)) return;
6577   if (bsp->repr != Seq_repr_seg) return;
6578 
6579   partiallist = GetSeqLocPartialSet (sfp->location);
6580   CheckSeqLocForPartialEx (sfp->location, &noLeft, &noRight, &lim);
6581   hasNulls = LocationHasNullsBetween (sfp->location);
6582 
6583   if (sfp->data.choice == SEQFEAT_GENE) {
6584     slp = SeqLocMergeExEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6585     hasNulls = FALSE;
6586     sfp->partial = FALSE;
6587   } else if (sfp->data.choice == SEQFEAT_CDREGION) {
6588     slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6589   } else if (sfp->data.choice == SEQFEAT_RNA) {
6590     slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6591   } else {
6592     slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, FALSE);
6593   }
6594   if (slp == NULL) {
6595     ValNodeFree (partiallist);
6596     return;
6597   }
6598 
6599   ptbsp = FindFirstLocalBioseq (sfp->location);
6600 
6601   sfp->location = SeqLocFree (sfp->location);
6602   sfp->location = slp;
6603   emptypartials = GetSeqLocPartialSet (sfp->location);
6604   FreeAllFuzz (sfp->location);
6605   SetSeqLocPartialEx (sfp->location, noLeft, noRight, lim);
6606   if (ValNodeLen (partiallist) == ValNodeLen (emptypartials)) {
6607     SetSeqLocPartialSet (sfp->location, partiallist);
6608   }
6609   ValNodeFree (partiallist);
6610   ValNodeFree (emptypartials);
6611   sfp->partial = (sfp->partial || noLeft || noRight || hasNulls);
6612 
6613   switch (sfp->data.choice) {
6614     case SEQFEAT_CDREGION :
6615       crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6616       if (crp != NULL && crp->code_break != NULL) {
6617         for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
6618           slp = SeqLocMerge (bsp, cbp->loc, NULL, FALSE, TRUE, FALSE);
6619           if (slp != NULL) {
6620             cbp->loc = SeqLocFree (cbp->loc);
6621             cbp->loc = slp;
6622             FreeAllFuzz (cbp->loc);
6623           }
6624         }
6625       }
6626       break;
6627     case SEQFEAT_RNA :
6628       rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
6629       if (rrp != NULL && rrp->type == 3 && rrp->ext.choice == 2) {
6630         trna = rrp->ext.value.ptrvalue;
6631         if (trna != NULL && trna->anticodon != NULL) {
6632           slp = SeqLocMerge (bsp, trna->anticodon, NULL, FALSE, TRUE, FALSE);
6633           if (slp != NULL) {
6634             trna->anticodon = SeqLocFree (trna->anticodon);
6635             trna->anticodon = slp;
6636             FreeAllFuzz (trna->anticodon);
6637           }
6638         }
6639       }
6640       break;
6641     default :
6642       break;
6643   }
6644 
6645   for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6646     if (StringICmp (gbq->qual, "rpt_unit_range") != 0) continue;
6647     if (! RptUnitInBaseRange (gbq->val, &from, &to)) continue;
6648      if (ptbsp == NULL || ptbsp->repr != Seq_repr_raw) continue;
6649     sip = SeqIdFindBest (ptbsp->id, 0);
6650     if (sip == NULL) continue;
6651     MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
6652     MemSet ((Pointer) &vn, 0, sizeof (ValNode));
6653     if (from < to) {
6654       sint.from = from;
6655       sint.to = to;
6656       sint.strand = Seq_strand_plus;
6657     } else {
6658       sint.to = from;
6659       sint.from = to;
6660       sint.strand = Seq_strand_minus;
6661     }
6662     sint.id = sip;
6663     vn.choice = SEQLOC_INT;
6664     vn.data.ptrvalue = (Pointer) &sint;
6665     vn.next = NULL;
6666     slp = SeqLocMerge (bsp, &vn, NULL, FALSE, TRUE, FALSE);
6667     if (slp != NULL) {
6668       if (slp->choice == SEQLOC_INT) {
6669         sintp = (SeqIntPtr) slp->data.ptrvalue;
6670         if (sintp != NULL) {
6671           buf [0] = '\0';
6672           if (sintp->strand == Seq_strand_minus) {
6673             sprintf (buf, "%ld..%ld", (long) sintp->to + 1, (long) sintp->from + 1);
6674           } else {
6675             sprintf (buf, "%ld..%ld", (long) sintp->from + 1, (long) sintp->to + 1);
6676           }
6677           if (StringDoesHaveText (buf)) {
6678             gbq->val = MemFree (gbq->val);
6679             gbq->val = StringSave (buf);
6680           }
6681         }
6682       }
6683       SeqLocFree (slp);
6684     }
6685   }
6686 }
6687 
6688 // Only for SegSets
PartDescToFeatProc(SeqDescrPtr sdp,Pointer userdata)6689 static void PartDescToFeatProc(
6690   SeqDescrPtr sdp,
6691   Pointer userdata
6692 )
6693 
6694 {
6695   BioSourcePtr  biop;
6696   BioseqPtr     bsp;
6697   PubdescPtr    pdp;
6698   SeqFeatPtr    sfp;
6699 
6700   if (sdp == NULL) return;
6701   bsp = (BioseqPtr) userdata;
6702   if (bsp == NULL) return;
6703 
6704   if (sdp->choice == Seq_descr_pub) {
6705     pdp = (PubdescPtr) sdp->data.ptrvalue;
6706     if (pdp == NULL) return;
6707     sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_PUB, NULL);
6708     if (sfp != NULL) {
6709       sfp->data.value.ptrvalue = AsnIoMemCopy (pdp, (AsnReadFunc) PubdescAsnRead, (AsnWriteFunc) PubdescAsnWrite);
6710     }
6711   } else if (sdp->choice == Seq_descr_source) {
6712     biop = (BioSourcePtr) sdp->data.ptrvalue;
6713     if (biop == NULL) return;
6714     sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
6715     if (sfp != NULL) {
6716       sfp->data.value.ptrvalue = AsnIoMemCopy (biop, (AsnReadFunc) BioSourceAsnRead, (AsnWriteFunc) BioSourceAsnWrite);
6717     }
6718   }
6719 }
6720 
6721 // Only for SegSets
MovePartDescToFeat(BioseqPtr bsp,Pointer userdata)6722 static void MovePartDescToFeat (
6723   BioseqPtr bsp,
6724   Pointer userdata
6725 )
6726 
6727 {
6728   if (bsp == NULL) return;
6729   if (bsp->repr == Seq_repr_virtual) return;
6730   VisitDescriptorsOnBsp (bsp, (Pointer) bsp, PartDescToFeatProc);
6731 }
6732 
6733 // Only for SegSets
PartCitSubDatesCompProc(SeqDescrPtr sdp,Pointer userdata)6734 static void PartCitSubDatesCompProc(
6735   SeqDescrPtr sdp,
6736   Pointer userdata
6737 )
6738 
6739 {
6740   Int2          compare;
6741   CitSubPtr     csp;
6742   DatePtr PNTR  dpp;
6743   PubdescPtr    pdp;
6744   ValNodePtr    vnp;
6745 
6746   if (sdp == NULL || sdp->choice != Seq_descr_pub) return;
6747   dpp = (DatePtr PNTR) userdata;
6748   if (dpp == NULL) return;
6749 
6750   pdp = (PubdescPtr) sdp->data.ptrvalue;
6751   if (pdp == NULL) return;
6752 
6753   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
6754     if (vnp->choice != PUB_Sub) continue;
6755     csp = (CitSubPtr) vnp->data.ptrvalue;
6756     if (csp == NULL || csp->date == NULL) continue;
6757     if (*dpp == NULL) {
6758       *dpp = csp->date;
6759     } else {
6760       compare = DateMatch (*dpp, csp->date, TRUE);
6761       if (compare == 1) {
6762         *dpp = csp->date;
6763       }
6764     }
6765   }
6766 }
6767 
6768 
6769 // Only for SegSets
CompPartCitSubDates(BioseqPtr bsp,Pointer userdata)6770 static void CompPartCitSubDates(
6771   BioseqPtr bsp,
6772   Pointer userdata
6773 )
6774 
6775 {
6776   if (bsp == NULL) return;
6777   VisitDescriptorsOnBsp (bsp, userdata, PartCitSubDatesCompProc);
6778 }
6779 
6780 // Only for SegSets
PartCitSubDatesSyncProc(SeqDescrPtr sdp,Pointer userdata)6781 static void PartCitSubDatesSyncProc (
6782   SeqDescrPtr sdp,
6783   Pointer userdata
6784 )
6785 
6786 {
6787   CitSubPtr   csp;
6788   DatePtr     dp;
6789   PubdescPtr  pdp;
6790   ValNodePtr  vnp;
6791 
6792   if (sdp == NULL || sdp->choice != Seq_descr_pub) return;
6793   dp = (DatePtr) userdata;
6794   if (dp == NULL) return;
6795 
6796   pdp = (PubdescPtr) sdp->data.ptrvalue;
6797   if (pdp == NULL) return;
6798 
6799   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
6800     if (vnp->choice != PUB_Sub) continue;
6801     csp = (CitSubPtr) vnp->data.ptrvalue;
6802     if (csp == NULL || csp->date == NULL) continue;
6803     if (DateMatch (csp->date, dp, TRUE) != 0) {
6804       csp->date = DateFree (csp->date);
6805       csp->date = DateDup (dp);
6806     }
6807   }
6808 }
6809 
6810 // Only for SegSets
SyncPartCitSubDates(BioseqPtr bsp,Pointer userdata)6811 static void SyncPartCitSubDates (
6812   BioseqPtr bsp,
6813   Pointer userdata
6814 )
6815 
6816 {
6817   VisitDescriptorsOnBsp (bsp, userdata, PartCitSubDatesSyncProc);
6818 }
6819 
6820 // Only for SegSets
SegSeqNullToVirtual(SeqEntryPtr sep)6821 extern void SegSeqNullToVirtual (SeqEntryPtr sep)
6822 
6823 {
6824   BioseqSetPtr  bssp;
6825   Uint2         entityID;
6826   GBData        gbd;
6827   Boolean       hasnulls = FALSE;
6828   Boolean       hasvirt = FALSE;
6829   Boolean       missingMolInfo = FALSE;
6830   Uint1         mol = 0;
6831   SeqEntryPtr   partssep = NULL;
6832   BioseqSetPtr  partsset = NULL;
6833   BioseqPtr     segbsp = NULL;
6834   SeqEntryPtr   segsep = NULL;
6835   SeqEntryPtr   segseq = NULL;
6836   BioseqSetPtr  segset = NULL;
6837   SeqLocPtr     slp;
6838   BioseqPtr     virtbsp;
6839   ValNode       vn;
6840   SeqLocPtr     vslp;
6841 
6842   if (sep == NULL) return;
6843 
6844   if (sep == NULL) return;
6845   if (! IS_Bioseq_set (sep)) return;
6846   bssp = (BioseqSetPtr) sep->data.ptrvalue;
6847   if (bssp == NULL) return;
6848 
6849   /* skip pop/phy/mut/eco/wgs sets for now */
6850   if (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) return;
6851   if (bssp->_class == BioseqseqSet_class_wgs_set) return;
6852   if (bssp->_class == BioseqseqSet_class_small_genome_set) return;
6853 
6854   /* find SeqEntryPtr parent of single seg set */
6855   SeqEntryExplore (sep, (Pointer) &segsep, FindSegSet);
6856   if (segsep == NULL) return;
6857 
6858   /* do not handle just segset without nucprot set wrapper for now */
6859   if (segsep == sep) return;
6860 
6861   /* skip the few cases of mixed molecule types */
6862   VisitBioseqsInSep (segsep, (Pointer) &mol, LookForMixedMols);
6863   if (mol != Seq_mol_dna && mol != Seq_mol_rna && mol != Seq_mol_aa && mol != Seq_mol_na && mol != Seq_mol_other) return;
6864 
6865   /* skip mixed gbblock source or origin fields for now */
6866   MemSet ((Pointer) &gbd, 0, sizeof (gbd));
6867   VisitDescriptorsInSep (sep, (Pointer) &gbd, FindGenBankDiffs);
6868   if (gbd.mixedsources || gbd.mixedorigins) return;
6869 
6870   /* avoid copying protein molinfo if nucleotide molinfo is missing */
6871   VisitBioseqsInSep (sep, (Pointer) &missingMolInfo, CheckForMissingMolInfo);
6872   if (missingMolInfo) return;
6873 
6874   if (! IS_Bioseq_set (segsep)) return;
6875   segset = (BioseqSetPtr) segsep->data.ptrvalue;
6876   if (segset == NULL) return;
6877   segseq = segset->seq_set;
6878   if (segseq == NULL) return;
6879   if (! IS_Bioseq (segseq)) return;
6880   segbsp = (BioseqPtr) segseq->data.ptrvalue;
6881   if (segbsp == NULL) return;
6882   if (segbsp->repr != Seq_repr_seg) return;
6883   partssep = segseq->next;
6884   if (partssep == NULL) return;
6885   if (! IS_Bioseq_set (partssep)) return;
6886   partsset = (BioseqSetPtr) partssep->data.ptrvalue;
6887   if (partsset == NULL) return;
6888 
6889   entityID = segbsp->idx.entityID;
6890 
6891   /* check to see if it doesn't need conversion */
6892   MemSet ((Pointer) &vn, 0, sizeof (vn));
6893   vn.choice = SEQLOC_MIX;
6894   vn.extended = 0;
6895   vn.data.ptrvalue = segbsp->seq_ext;
6896   vn.next = NULL;
6897   slp = SeqLocFindNext (&vn, NULL);
6898   while (slp != NULL) {
6899     if (slp->choice == SEQLOC_NULL) {
6900       hasnulls = TRUE;
6901     }
6902     slp = SeqLocFindNext (&vn, slp);
6903   }
6904 
6905   /* if no nulls or virtuals, add a null and then fill in between all components */
6906   if (! hasnulls) {
6907     if (! hasvirt) {
6908       slp = SeqLocFindNext (&vn, NULL);
6909       if (slp != NULL) {
6910         vslp = ValNodeNew (NULL);
6911         if (vslp != NULL) {
6912           vslp->choice = SEQLOC_NULL;
6913           vslp->next = slp->next;
6914           slp->next = vslp;
6915           NormalizeNullsBetween (&vn);
6916           hasnulls = TRUE;
6917         }
6918       }
6919     }
6920   }
6921 
6922   if (! hasnulls) return;
6923 
6924   /* virtual part of conventional 100 base gap length */
6925   virtbsp = BioseqNew ();
6926   if (virtbsp == NULL) return;
6927   virtbsp->id = MakeUniqueSeqID ("virtual_");
6928   virtbsp->repr = Seq_repr_virtual;
6929   virtbsp->mol = segbsp->mol;
6930   virtbsp->seq_data_type = 0;
6931   virtbsp->seq_ext_type = 0;
6932   virtbsp->seq_data = NULL;
6933   virtbsp->seq_ext = NULL;
6934   virtbsp->length = 100;
6935 
6936   /* put virtual segments between real segments */
6937   MemSet ((Pointer) &vn, 0, sizeof (vn));
6938   vn.choice = SEQLOC_MIX;
6939   vn.extended = 0;
6940   vn.data.ptrvalue = segbsp->seq_ext;
6941   vn.next = NULL;
6942   slp = SeqLocFindNext (&vn, NULL);
6943   while (slp != NULL) {
6944     if (slp->choice == SEQLOC_NULL) {
6945       slp->choice = SEQLOC_WHOLE;
6946       slp->data.ptrvalue = SeqIdDup (virtbsp->id);
6947       segbsp->length += virtbsp->length;
6948     }
6949     slp = SeqLocFindNext (&vn, slp);
6950   }
6951 
6952   /* package virtual bioseq in parts */
6953   ValNodeAddPointer (&(partsset->seq_set), 1, (Pointer) virtbsp);
6954 
6955   /* reindex for new segmented bioseq length */
6956   SeqMgrIndexFeatures (entityID, NULL);
6957 }
6958 
6959 // Only for SegSets
ForceSegSeqNullToVirtual(SeqEntryPtr sep)6960 static void ForceSegSeqNullToVirtual (SeqEntryPtr sep)
6961 
6962 {
6963   Uint2         entityID;
6964   Boolean       hasnulls = FALSE;
6965   Boolean       hasvirt = FALSE;
6966   SeqEntryPtr   partssep = NULL;
6967   BioseqSetPtr  partsset = NULL;
6968   BioseqPtr     segbsp = NULL;
6969   SeqEntryPtr   segsep = NULL;
6970   SeqEntryPtr   segseq = NULL;
6971   BioseqSetPtr  segset = NULL;
6972   SeqLocPtr     slp;
6973   BioseqPtr     vbsp;
6974   BioseqPtr     virtbsp;
6975   ValNode       vn;
6976   SeqLocPtr     vslp;
6977 
6978   if (sep == NULL) return;
6979 
6980   SeqEntryExplore (sep, (Pointer) &segsep, FindSegSet);
6981   if (segsep == NULL) return;
6982 
6983   if (! IS_Bioseq_set (segsep)) return;
6984   segset = (BioseqSetPtr) segsep->data.ptrvalue;
6985   if (segset == NULL) return;
6986   segseq = segset->seq_set;
6987   if (segseq == NULL) return;
6988   if (! IS_Bioseq (segseq)) return;
6989   segbsp = (BioseqPtr) segseq->data.ptrvalue;
6990   if (segbsp == NULL) return;
6991   if (segbsp->repr != Seq_repr_seg) return;
6992   partssep = segseq->next;
6993   if (partssep == NULL) return;
6994   if (! IS_Bioseq_set (partssep)) return;
6995   partsset = (BioseqSetPtr) partssep->data.ptrvalue;
6996   if (partsset == NULL) return;
6997 
6998   entityID = segbsp->idx.entityID;
6999 
7000   /* check to see if it doesn't need conversion */
7001   MemSet ((Pointer) &vn, 0, sizeof (vn));
7002   vn.choice = SEQLOC_MIX;
7003   vn.extended = 0;
7004   vn.data.ptrvalue = segbsp->seq_ext;
7005   vn.next = NULL;
7006   slp = SeqLocFindNext (&vn, NULL);
7007   while (slp != NULL) {
7008     if (slp->choice == SEQLOC_NULL) {
7009       hasnulls = TRUE;
7010     } else {
7011       vbsp = BioseqFindFromSeqLoc (slp);
7012       if (vbsp != NULL && vbsp->repr == Seq_repr_virtual) {
7013         hasvirt = TRUE;
7014       }
7015     }
7016     slp = SeqLocFindNext (&vn, slp);
7017   }
7018 
7019   /* if no nulls or virtuals, add a null and then fill in between all components */
7020   if (! hasnulls) {
7021     if (! hasvirt) {
7022       slp = SeqLocFindNext (&vn, NULL);
7023       if (slp != NULL) {
7024         vslp = ValNodeNew (NULL);
7025         if (vslp != NULL) {
7026           vslp->choice = SEQLOC_NULL;
7027           vslp->next = slp->next;
7028           slp->next = vslp;
7029           NormalizeNullsBetween (&vn);
7030           hasnulls = TRUE;
7031         }
7032       }
7033     }
7034   }
7035 
7036   if (! hasnulls) return;
7037 
7038   /* virtual part of conventional 100 base gap length */
7039   virtbsp = BioseqNew ();
7040   if (virtbsp == NULL) return;
7041   virtbsp->id = MakeUniqueSeqID ("virtual_");
7042   virtbsp->repr = Seq_repr_virtual;
7043   virtbsp->mol = segbsp->mol;
7044   virtbsp->seq_data_type = 0;
7045   virtbsp->seq_ext_type = 0;
7046   virtbsp->seq_data = NULL;
7047   virtbsp->seq_ext = NULL;
7048   virtbsp->length = 100;
7049 
7050   /* put virtual segments between real segments */
7051   MemSet ((Pointer) &vn, 0, sizeof (vn));
7052   vn.choice = SEQLOC_MIX;
7053   vn.extended = 0;
7054   vn.data.ptrvalue = segbsp->seq_ext;
7055   vn.next = NULL;
7056   slp = SeqLocFindNext (&vn, NULL);
7057   while (slp != NULL) {
7058     if (slp->choice == SEQLOC_NULL) {
7059       slp->choice = SEQLOC_WHOLE;
7060       slp->data.ptrvalue = SeqIdDup (virtbsp->id);
7061       segbsp->length += virtbsp->length;
7062     }
7063     slp = SeqLocFindNext (&vn, slp);
7064   }
7065 
7066   /* package virtual bioseq in parts */
7067   ValNodeAddPointer (&(partsset->seq_set), 1, (Pointer) virtbsp);
7068 
7069   /* reindex for new segmented bioseq length */
7070   SeqMgrIndexFeatures (entityID, NULL);
7071 }
7072 
7073 // Only for SegSets
ConvertSegSetToDeltaSeqInt(SeqEntryPtr sep,Uint2 entityID,Boolean cleanup)7074 static Boolean ConvertSegSetToDeltaSeqInt(SeqEntryPtr sep, Uint2 entityID, Boolean cleanup)
7075 
7076 {
7077   BioseqSetPtr   bssp;
7078   GBData         gbd;
7079   DatePtr        cp = NULL;
7080   DatePtr        dp = NULL;
7081   BioseqPtr      deltabsp;
7082   GBBlockPtr     gbp = NULL;
7083   MolInfoPtr     mip = NULL;
7084   Boolean        missingMolInfo = FALSE;
7085   Uint1          mol = 0;
7086   ObjValNodePtr  ovp;
7087   SeqEntryPtr    partssep = NULL;
7088   BioseqPtr      segbsp = NULL;
7089   SeqEntryPtr    segsep = NULL;
7090   SeqEntryPtr    segseq = NULL;
7091   BioseqSetPtr   segset = NULL;
7092   SeqIdPtr       tmpid;
7093   CharPtr        str, ttl = NULL;
7094   ValNodePtr     vnp;
7095 
7096   if (sep == NULL) return FALSE;
7097   if (! IS_Bioseq_set (sep)) return FALSE;
7098   bssp = (BioseqSetPtr) sep->data.ptrvalue;
7099   if (bssp == NULL) return FALSE;
7100 
7101   /* skip pop/phy/mut/eco/wgs sets for now */
7102   if (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) return FALSE;
7103   if (bssp->_class == BioseqseqSet_class_wgs_set) return FALSE;
7104   if (bssp->_class == BioseqseqSet_class_small_genome_set) return FALSE;
7105 
7106   /* find SeqEntryPtr parent of single seg set */
7107   SeqEntryExplore (sep, (Pointer) &segsep, FindSegSet);
7108   if (segsep == NULL) return FALSE;
7109 
7110   /* do not handle just segset without nucprot set wrapper for now */
7111   if (segsep == sep) return FALSE;
7112 
7113   /* skip the few cases of mixed molecule types */
7114   VisitBioseqsInSep (segsep, (Pointer) &mol, LookForMixedMols);
7115   if (mol != Seq_mol_dna && mol != Seq_mol_rna && mol != Seq_mol_aa && mol != Seq_mol_na && mol != Seq_mol_other) return FALSE;
7116 
7117   /* skip mixed gbblock source or origin fields for now */
7118   MemSet ((Pointer) &gbd, 0, sizeof (gbd));
7119   VisitDescriptorsInSep (sep, (Pointer) &gbd, FindGenBankDiffs);
7120   /*
7121   if (gbd.mixedsources || gbd.mixedorigins) return FALSE;
7122   */
7123 
7124   /* avoid copying protein molinfo if nucleotide molinfo is missing */
7125   VisitBioseqsInSep (sep, (Pointer) &missingMolInfo, CheckForMissingMolInfo);
7126   if (missingMolInfo) return FALSE;
7127 
7128   /*
7129   ConvertSegSetsToDeltaSequences (sep);
7130   */
7131 
7132   if (! IS_Bioseq_set (segsep)) return FALSE;
7133   segset = (BioseqSetPtr) segsep->data.ptrvalue;
7134   if (segset == NULL) return FALSE;
7135   segseq = segset->seq_set;
7136   if (segseq == NULL) return FALSE;
7137   if (! IS_Bioseq (segseq)) return FALSE;
7138   segbsp = (BioseqPtr) segseq->data.ptrvalue;
7139   if (segbsp == NULL) return FALSE;
7140   if (segbsp->repr != Seq_repr_seg) return FALSE;
7141   partssep = segseq->next;
7142   if (partssep == NULL) return FALSE;
7143 
7144   /* synchronize dates of cit-sub descriptors on parts */
7145   VisitBioseqsInSep (partssep, (Pointer) &dp, CompPartCitSubDates);
7146   cp = DateDup (dp);
7147   VisitBioseqsInSep (partssep, (Pointer) cp, SyncPartCitSubDates);
7148   DateFree (cp);
7149   /* move pub descriptors up from parts before adding virtual sequence */
7150   SeqEntryPubsAsn4 (sep, FALSE);
7151 
7152   /* then convert any remaining pub and source descriptors on parts to features */
7153   VisitBioseqsInSep (partssep, NULL, MovePartDescToFeat);
7154 
7155   /* put virtual segments of conventional 100 bases between real segments */
7156   ForceSegSeqNullToVirtual (sep);
7157 
7158   /* map feature locations to segmented bioseq */
7159   VisitFeaturesInSep (sep, NULL, MapSegFeatToMaster);
7160 
7161   VisitDescriptorsInSep (segsep, (Pointer) &gbp, CopyFirstGBBlock);
7162   VisitDescriptorsInSep (segsep, (Pointer) &mip, CopyFirstMolInfo);
7163   VisitDescriptorsInSep (segsep, (Pointer) &ttl, CopyFirstTitle);
7164 
7165   if (gbp == NULL) {
7166     gbp = GBBlockNew ();
7167   }
7168   /* populate secondary accessions */
7169   VisitBioseqsInSep (partssep, (Pointer) gbp, AddPartAccns);
7170 
7171   /*
7172   deltabsp = GetDeltaSeqFromMasterSeg (segbsp);
7173   */
7174 
7175   deltabsp = BioseqNew ();
7176   if (deltabsp == NULL) return FALSE;
7177   deltabsp->id = MakeUniqueSeqID ("delta_");
7178   deltabsp->repr = Seq_repr_delta;
7179   deltabsp->mol = segbsp->mol;
7180   deltabsp->seq_data_type = 0;
7181   deltabsp->seq_ext_type = 4;
7182   deltabsp->seq_data = NULL;
7183   deltabsp->seq_ext = NULL;
7184   deltabsp->length = 0;
7185 
7186   /* populate Seq-hist.replaces */
7187   VisitBioseqsInSep (partssep, (Pointer) deltabsp, AddPartHist);
7188 
7189   /* construct delta seq from segmented parts */
7190   SeqMgrExploreSegments (segbsp, (Pointer) deltabsp, AddSegToDeltaSeq);
7191 
7192   /* move features in segset to delta seq */
7193   SeqEntryExplore (segsep, (Pointer) deltabsp, MoveAnnotsToDeltaSeq);
7194 
7195   /* insert delta sequence into chain */
7196   vnp = SeqDescrNew (NULL);
7197   if (vnp == NULL) return FALSE;
7198   vnp->choice = 1;
7199   vnp->data.ptrvalue = (Pointer) deltabsp;
7200   vnp->next = segsep->next;
7201   segsep->next = vnp;
7202 
7203   /* keep segmented bioseq IDs */
7204   tmpid = segbsp->id;
7205   segbsp->id = deltabsp->id;
7206   deltabsp->id = tmpid;
7207 
7208   SeqMgrDeleteFromBioseqIndex (segbsp);
7209   SeqMgrReplaceInBioseqIndex (deltabsp);
7210 
7211   /* remove old segset */
7212   segset->idx.deleteme = TRUE;
7213   DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
7214 
7215   /*
7216   BioseqFree (virtbsp);
7217   */
7218 
7219   /* add descriptors */
7220   if (ttl != NULL) {
7221     SeqDescrAddPointer (&(deltabsp->descr), Seq_descr_title, (Pointer) ttl);
7222   }
7223   if (mip != NULL) {
7224     SeqDescrAddPointer (&(deltabsp->descr), Seq_descr_molinfo, (Pointer) mip);
7225   }
7226   if (gbp != NULL) {
7227     SeqDescrAddPointer (&(deltabsp->descr), Seq_descr_genbank, (Pointer) gbp);
7228   }
7229 
7230   BioseqPack (deltabsp);
7231 
7232   /* indexes are out of date */
7233   SeqMgrClearFeatureIndexes (entityID, NULL);
7234 
7235   if (cleanup) {
7236     SeriousSeqEntryCleanup (sep, NULL, NULL);
7237     RemoveAllNcbiCleanupUserObjects (sep);
7238   }
7239 
7240   return TRUE;
7241 }
7242 
7243 // Only for SegSets
ConvertSegSetToDeltaSeqEx(SeqEntryPtr sep,Boolean cleanup)7244 extern Boolean ConvertSegSetToDeltaSeqEx (SeqEntryPtr sep, Boolean cleanup)
7245 
7246 {
7247   BioseqSetPtr  bssp;
7248   Uint2         entityID;
7249   Boolean       res = TRUE;
7250   SeqEntryPtr   tmp;
7251 
7252   if (sep == NULL) return FALSE;
7253   if (! IS_Bioseq_set (sep)) return FALSE;
7254   bssp = (BioseqSetPtr) sep->data.ptrvalue;
7255   if (bssp == NULL) return FALSE;
7256 
7257   entityID = ObjMgrGetEntityIDForChoice (sep);
7258 
7259   if ((bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
7260       bssp->_class == BioseqseqSet_class_wgs_set ||
7261       bssp->_class == BioseqseqSet_class_small_genome_set) {
7262     for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
7263       ConvertSegSetToDeltaSeqInt (tmp, entityID, cleanup);
7264     }
7265   } else {
7266 
7267     res = ConvertSegSetToDeltaSeqInt (sep, entityID, cleanup);
7268   }
7269 
7270   /* in case segset was not marked */
7271   VisitSetsInSep (sep, NULL, MarkSegSets);
7272 
7273   /* remove old segset */
7274   DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
7275 
7276   /* indexes are out of date */
7277   SeqMgrClearFeatureIndexes (entityID, NULL);
7278 
7279   SeriousSeqEntryCleanup (sep, NULL, NULL);
7280   RemoveAllNcbiCleanupUserObjects (sep);
7281 
7282   return res;
7283 }
7284 
ConvertSegSetToDeltaSeq(SeqEntryPtr sep)7285 extern Boolean ConvertSegSetToDeltaSeq (SeqEntryPtr sep)
7286 
7287 {
7288   return ConvertSegSetToDeltaSeqEx (sep, TRUE);
7289 }
7290 
7291 // Only for SegSets
ConvPartDescToFeatCallback(BioseqSetPtr bssp,Pointer userdata)7292 static void ConvPartDescToFeatCallback(
7293   BioseqSetPtr bssp,
7294   Pointer userdata
7295 )
7296 
7297 {
7298   if (bssp == NULL) return;
7299   if (bssp->_class != BioseqseqSet_class_parts) return;
7300 
7301   VisitBioseqsInSet (bssp, NULL, MovePartDescToFeat);
7302 }
7303 
7304 // Only for SegSets
ConvertPartDescToFeat(SeqEntryPtr sep)7305 extern void ConvertPartDescToFeat (SeqEntryPtr sep)
7306 
7307 {
7308   VisitSetsInSep (sep, NULL, ConvPartDescToFeatCallback);
7309 }
7310 
7311 // Not used
SimpleAutoDef(SeqEntryPtr sep)7312 extern void SimpleAutoDef (SeqEntryPtr sep)
7313 
7314 {
7315   BioseqPtr                     bsp = NULL;
7316   BioseqSetPtr                  bssp = NULL;
7317   ValNodePtr                    defline_clauses = NULL;
7318   Uint2                         entityID = 0;
7319   DeflineFeatureRequestList     feature_requests;
7320   Int4                          index;
7321   ValNodePtr                    modifier_indices = NULL;
7322   ModifierItemLocalPtr          modList;
7323   OrganismDescriptionModifiers  odmp;
7324   SeqEntryPtr                   oldscope;
7325 
7326   if (sep == NULL) return;
7327   if (IS_Bioseq (sep)) {
7328     bsp = (BioseqPtr) sep->data.ptrvalue;
7329     if (bsp == NULL) return;
7330     entityID = bsp->idx.entityID;
7331   } else if (IS_Bioseq_set (sep)) {
7332     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7333     if (bssp == NULL) return;
7334     entityID = bssp->idx.entityID;
7335   } else return;
7336 
7337   if (entityID < 1) return;
7338 
7339   modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData));
7340   if (modList == NULL) return;
7341 
7342   InitFeatureRequests (&feature_requests);
7343 
7344   SetRequiredModifiers (modList);
7345   CountModifiers (modList, sep);
7346 
7347   InitOrganismDescriptionModifiers (&odmp, sep);
7348 
7349   RemoveNucProtSetTitles (sep);
7350   oldscope = SeqEntrySetScope (sep);
7351 
7352   BuildDefLineFeatClauseList (sep, entityID, &feature_requests,
7353                               DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE,
7354                               &defline_clauses);
7355   if (AreFeatureClausesUnique (defline_clauses)) {
7356     modifier_indices = GetModifierIndicesFromModList (modList);
7357   } else {
7358     modifier_indices = FindBestModifiers (sep, modList);
7359   }
7360 
7361   BuildDefinitionLinesFromFeatureClauseLists (defline_clauses, modList,
7362                                               modifier_indices, &odmp);
7363   DefLineFeatClauseListFree (defline_clauses);
7364   if (modList != NULL) {
7365     for (index = 0; index < NumDefLineModifiers (); index++) {
7366       ValNodeFree (modList [index].values_seen);
7367     }
7368     MemFree (modList);
7369   }
7370   modifier_indices = ValNodeFree (modifier_indices);
7371 
7372   ClearProteinTitlesInNucProts (entityID, NULL);
7373   InstantiateProteinTitles (entityID, NULL);
7374   /*
7375   RemovePopsetTitles (sep);
7376   */
7377   AddPopsetTitles (sep, &feature_requests, DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE);
7378 
7379   SeqEntrySetScope (oldscope);
7380 }
7381 //LCOV_EXCL_STOP
7382 
7383