1 static char const rcsid[] = "$Id: toporg.c,v 6.251 2016/11/02 23:36:10 kans Exp $";
2
3 #include <stdio.h>
4 #include <ncbi.h>
5 #include <sequtil.h>
6 #include <toasn3.h>
7 #include <toporg.h>
8 #include <tfuns.h>
9 #include <utilpub.h>
10
11 /* includes for new cleanup functions from Sequin */
12 #include <sqnutils.h>
13 #include <gather.h>
14 #include <explore.h>
15 #include <subutil.h>
16 #include <tofasta.h>
17 #include <objfdef.h>
18 #include <valid.h>
19
20 static ValNodePtr GetDescrNoTitles (ValNodePtr PNTR descr);
21
22 SeqDescrPtr remove_descr PROTO((SeqDescrPtr head, SeqDescrPtr x));
23 //LCOV_EXCL_START
24 /****************************************************************************
25 * move org-ref, modif, mol_type, date, title and pubs
26 * to seg-set level in segmented set, if not there
27 * move org-ref, modif, date, title to nuc-prot level, if not there
28 * notice that mol_type and pub are not moved to nucprot level
29 *****************************************************************************/
toporg(SeqEntryPtr sep)30 void toporg(SeqEntryPtr sep)
31 {
32
33 SeqEntryExplore(sep, NULL, ChkSegset);
34 SeqEntryExplore(sep, NULL, ChkNucProt);
35
36 return;
37 }
38
39 // Used for segsets
40 /****************************************************************************
41 * check for backbone entry and expand (to whole) OrgRef feature
42 * if it's not whole and the only one OrgRef in the entry
43 * whole fetures would be converted to descr later in FindOrg function
44 *
45 * check if org-ref, modif, mol_type, date and title
46 * are the same for all segments
47 * move them to seg-set level in segmented set, if not already there
48 *****************************************************************************/
ChkSegset(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)49 void ChkSegset (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
50 {
51
52 BioseqSetPtr bssp, tmp;
53 BioseqPtr bsp;
54 SeqEntryPtr segsep, parts = NULL;
55 ValNodePtr vnp = NULL, set_vnp = NULL, upd_date_vnp = NULL;
56 ValNodePtr org, modif, mol, date, v /*, title */;
57 SeqAnnotPtr sap = NULL;
58 SeqFeatPtr tmp_sfp, sfp0, sfp;
59 SeqIdPtr sidp;
60 Pointer pnt;
61 SeqLocPtr slp;
62 SeqIntPtr sip;
63 OrgRefPtr orp;
64 Boolean is_org = FALSE, is_modif = FALSE, is_title = FALSE;
65 Boolean is_date = FALSE, is_mol = FALSE;
66 Boolean is_na = FALSE, is_bb = FALSE, whole = FALSE;
67 Int2 count = 0;
68 Int4 len;
69 static Char msg[51];
70
71 if (IS_Bioseq(sep)) {
72 bsp = (BioseqPtr)(sep->data.ptrvalue);
73 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
74 return;
75 vnp = bsp->descr;
76 sap = bsp->annot;
77 len = bsp->length;
78 if (bsp->mol != Seq_mol_aa) {
79 is_na = TRUE;
80 }
81 if (bsp == NULL || !is_na) {
82 return;
83 }
84 for (sidp = bsp->id; sidp != NULL; sidp = sidp->next) {
85 pnt = sidp->data.ptrvalue;
86 switch (sidp->choice)
87 {
88 case SEQID_LOCAL: /* local */
89 case SEQID_GIIM: /* giimid */
90 case SEQID_PATENT: /* patent seq id */
91 case SEQID_GENERAL: /* general */
92 case SEQID_GI: /* gi */
93 case SEQID_PDB:
94 case SEQID_GENBANK: /* genbank */
95 case SEQID_EMBL: /* embl */
96 case SEQID_PIR: /* pir */
97 case SEQID_SWISSPROT: /* swissprot */
98 case SEQID_OTHER: /* other */
99 case SEQID_DDBJ:
100 case SEQID_PRF:
101 case SEQID_TPG:
102 case SEQID_TPE:
103 case SEQID_TPD:
104 continue;
105 case SEQID_GIBBSQ: /* gibbseq */
106 case SEQID_GIBBMT: /* gibbmt */
107 is_bb = TRUE;
108 break;
109 default:
110 continue;
111 }
112 }
113 if (!is_bb) {
114 return;
115 }
116 if (sap != NULL && sap->type == 1) /* feature table */
117 {
118 tmp_sfp = (SeqFeatPtr) (sap->data);
119 sfp0 = SeqFeatExtractList(&(tmp_sfp), SEQFEAT_ORG);
120 for (sfp = sfp0; sfp != NULL; sfp = sfp->next) {
121 orp = (OrgRefPtr)(sfp->data.value.ptrvalue);
122 count++;
123 if ((whole = check_whole(sfp, bsp->length)) == TRUE) {
124 break;
125 }
126 }
127 if (!whole && count == 1) {
128 StringNCpy(msg, SeqLocPrint(sfp0->location), 50);
129 ErrPostEx(SEV_WARNING, 0, 2,
130 "Backbone entry source with bad OrgRef feature: %s", msg);
131 slp = sfp0->location;
132 sip = slp->data.ptrvalue;
133 sip->from = 0;
134 sip->to = len-1;
135 }
136 tmp_sfp = tie_feat(tmp_sfp, sfp0);
137 sap->data = tmp_sfp;
138 bsp->annot = sap;
139 }
140 return;
141 }
142
143 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
144 if (bssp->_class != 2) { /* do the rest for segset only */
145 return;
146 }
147 segsep = bssp->seq_set;
148 set_vnp = bssp->descr;
149 if (segsep->next == NULL) {
150 return;
151 }
152 if (!IS_Bioseq(segsep->next)) {
153 tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
154 parts = tmp->seq_set;
155 if (parts == NULL) {
156 return;
157 }
158 }
159 /* title = SrchSegChoice(parts, Seq_descr_title);*/
160 org = SrchSegChoice(parts, Seq_descr_org);
161 mol = SrchSegChoice(parts, Seq_descr_mol_type);
162 modif = SrchSegChoice(parts, Seq_descr_modif);
163 date = SrchSegChoice(parts, Seq_descr_update_date);
164 for(v=set_vnp; v != NULL; v = v->next) {
165 if (v->choice == Seq_descr_org) {
166 is_org = TRUE;
167 }
168 if (v->choice == Seq_descr_org) {
169 is_title = TRUE;
170 }
171 if (v->choice == Seq_descr_mol_type) {
172 if (mol && mol->data.intvalue == v->data.intvalue) {
173 is_mol = TRUE;
174 ValNodeFree(mol);
175 }
176 }
177 if (v->choice == Seq_descr_modif) {
178 is_modif = TRUE;
179 }
180 if (v->choice == Seq_descr_update_date) {
181 is_date = TRUE;
182 upd_date_vnp = v;
183 }
184 }
185 /*
186 if (!is_title) {
187 set_vnp = tie_next(set_vnp, title);
188 }
189 */
190 if (!is_modif) {
191 if (set_vnp != NULL) {
192 set_vnp = tie_next(set_vnp, modif);
193 } else {
194 ValNodeLink (&(bssp->descr), modif);
195 }
196 }
197 if (!is_org) {
198 if (set_vnp != NULL) {
199 set_vnp = tie_next(set_vnp, org);
200 } else {
201 ValNodeLink (&(bssp->descr), org);
202 }
203 }
204 if (!is_mol) {
205 if (set_vnp != NULL) {
206 set_vnp = tie_next(set_vnp, mol);
207 } else {
208 ValNodeLink (&(bssp->descr), mol);
209 }
210 }
211 if (!is_date) {
212 if (set_vnp != NULL) {
213 set_vnp = tie_next(set_vnp, date);
214 } else {
215 ValNodeLink (&(bssp->descr), date);
216 }
217 } else if (upd_date_vnp != NULL && date != NULL) {
218 upd_date_vnp->data.ptrvalue = DateFree ((DatePtr) upd_date_vnp->data.ptrvalue);
219 upd_date_vnp->data.ptrvalue = DateDup ((DatePtr) date->data.ptrvalue);
220 }
221 SrchSegSeqMol(parts);
222
223 return;
224 }
225 //LCOV_EXCL_STOP
226
227
PubLabelMatchEx(ValNodePtr vnp1,ValNodePtr vnp2,Boolean checkdates)228 static Int2 PubLabelMatchEx (ValNodePtr vnp1, ValNodePtr vnp2, Boolean checkdates)
229
230 {
231 AffilPtr afp1, afp2;
232 AuthListPtr alp1, alp2;
233 CitGenPtr cgp1 = NULL, cgp2 = NULL;
234 CitSubPtr csp1 = NULL, csp2 = NULL;
235 Int2 ret;
236
237 if (vnp1 == NULL || vnp2 == NULL) return -1;
238 ret = PubLabelMatch (vnp1, vnp2);
239 if (ret != 0) return ret;
240
241 while (vnp1 != NULL) {
242 if (vnp1->choice == PUB_Sub) {
243 csp1 = (CitSubPtr) vnp1->data.ptrvalue;
244 } else if (vnp1->choice == PUB_Gen) {
245 cgp1 = (CitGenPtr) vnp1->data.ptrvalue;
246 }
247 vnp1 = vnp1->next;
248 }
249
250 while (vnp2 != NULL) {
251 if (vnp2->choice == PUB_Sub) {
252 csp2 = (CitSubPtr) vnp2->data.ptrvalue;
253 } else if (vnp2->choice == PUB_Gen) {
254 cgp2 = (CitGenPtr) vnp2->data.ptrvalue;
255 }
256 vnp2 = vnp2->next;
257 }
258
259 if (csp1 != NULL && csp2 != NULL) {
260 if (checkdates && csp1->date != NULL && csp2->date != NULL && DateMatch (csp1->date, csp2->date, FALSE) != 0) return -1;
261 if (StringICmp (csp1->descr, csp2->descr) != 0) return -1;
262 alp1 = csp1->authors;
263 alp2 = csp2->authors;
264 if (alp1 == NULL || alp2 == NULL) return 0;
265 if (AuthListMatch (alp1, alp2, TRUE) != 0) return -1;
266 afp1 = alp1->affil;
267 afp2 = alp2->affil;
268 if (afp1 != NULL && afp2 != NULL) {
269 if (! AsnIoMemComp (afp1, afp2, (AsnWriteFunc) AffilAsnWrite)) return -1;
270 } else if (afp1 != NULL || afp2 != NULL) {
271 return -1;
272 }
273 return 0;
274 }
275
276 if (cgp1 != NULL && cgp2 != NULL) {
277 if (checkdates && cgp1->date != NULL && cgp2->date != NULL && DateMatch (cgp1->date, cgp2->date, FALSE) != 0) return -1;
278 if (StringICmp (cgp1->cit, cgp2->cit) != 0) return -1;
279 alp1 = cgp1->authors;
280 alp2 = cgp2->authors;
281 if (alp1 == NULL || alp2 == NULL) return 0;
282 if (AuthListMatch (alp1, alp2, TRUE) != 0) return -1;
283 afp1 = alp1->affil;
284 afp2 = alp2->affil;
285 if (afp1 != NULL && afp2 != NULL) {
286 if (! AsnIoMemComp (afp1, afp2, (AsnWriteFunc) AffilAsnWrite)) return -1;
287 } else if (afp1 != NULL || afp2 != NULL) {
288 return -1;
289 }
290 return 0;
291 }
292
293 return 0;
294 }
295
296 //LCOV_EXCL_START
297 // Used for segsets
RemovePubFromParts(SeqEntryPtr sep,ValNodePtr pub)298 static void RemovePubFromParts(SeqEntryPtr sep, ValNodePtr pub)
299 {
300 BioseqPtr b;
301 ValNodePtr v, vnp, next;
302 SeqEntryPtr s;
303 PubdescPtr pdp, p;
304
305 for (vnp = pub; vnp; vnp = vnp->next) {
306 pdp = vnp->data.ptrvalue;
307 for (s= sep; s; s=s->next) {
308 b = (BioseqPtr)(s->data.ptrvalue);
309 for (v=b->descr; v; v=next) {
310 next = v->next;
311 if (v->choice != Seq_descr_pub)
312 continue;
313 p = v->data.ptrvalue;
314 if (PubLabelMatchEx (pdp->pub, p->pub, TRUE) == 0) {
315 if (pdp->name != NULL || pdp->fig != NULL
316 || pdp->num != NULL || pdp->maploc != NULL
317 || pdp->comment != NULL) {
318 continue;
319 } else {
320 b->descr = remove_descr(b->descr, v);
321 }
322 }
323 }
324 }
325 }
326 return;
327 }
328 //LCOV_EXCL_STOP
329
330
331 /***************************************************************************
332 * 0 match
333 * 1 no match
334 * 2 type unknown or not implemented
335 ***************************************************************************/
NumberingMatch(ValNodePtr num1,ValNodePtr num2)336 static Int2 NumberingMatch(ValNodePtr num1, ValNodePtr num2)
337 {
338 NumContPtr nc1, nc2;
339 NumEnumPtr ne1, ne2;
340 NumRealPtr nr1, nr2;
341
342 if (num1 == NULL || num2 == NULL)
343 return 0;
344 if (num1->choice != num2->choice)
345 return 1;
346 switch(num1->choice)
347 {
348 case Numbering_cont:
349 nc1 = (NumContPtr) num1->data.ptrvalue;
350 nc2 = (NumContPtr) num2->data.ptrvalue;
351 if (nc1->refnum == nc2->refnum) {
352 return 0;
353 }
354 break;
355 case Numbering_enum:
356 ne1 = (NumEnumPtr) num1->data.ptrvalue;
357 ne2 = (NumEnumPtr) num2->data.ptrvalue;
358 if (ne1->num != ne2->num)
359 return 1;
360 return (StringCmp(ne1->buf, ne2->buf));
361 case Numbering_ref_source:
362 break;
363 case Numbering_ref_align:
364 break;
365 case Numbering_real:
366 nr1 = (NumRealPtr) num1->data.ptrvalue;
367 nr2 = (NumRealPtr) num2->data.ptrvalue;
368 return (StringCmp(nr1->units, nr2->units));
369 default:
370 break;
371 }
372 return 2;
373 }
374
AlpFromPdp(PubdescPtr pdp,CitSubPtr csp)375 static AuthListPtr AlpFromPdp (PubdescPtr pdp, CitSubPtr csp)
376
377 {
378 AuthListPtr alp = NULL;
379 CitArtPtr cap;
380 CitBookPtr cbp;
381 CitGenPtr cgp;
382 CitPatPtr cpp;
383 ValNodePtr vnp;
384
385 if (csp != NULL) {
386 alp = csp->authors;
387 if (alp != NULL) return alp;
388 }
389 if (pdp == NULL) return NULL;
390
391 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
392 switch (vnp->choice) {
393 case PUB_Gen :
394 cgp = (CitGenPtr) vnp->data.ptrvalue;
395 if (cgp != NULL) {
396 alp = cgp->authors;
397 }
398 break;
399 case PUB_Sub :
400 csp = (CitSubPtr) vnp->data.ptrvalue;
401 if (csp != NULL) {
402 alp = csp->authors;
403 }
404 break;
405 case PUB_Article :
406 cap = (CitArtPtr) vnp->data.ptrvalue;
407 if (cap != NULL) {
408 alp = cap->authors;
409 }
410 break;
411 case PUB_Book :
412 case PUB_Proc :
413 case PUB_Man :
414 cbp = (CitBookPtr) vnp->data.ptrvalue;
415 if (cbp != NULL) {
416 alp = cbp->authors;
417 }
418 break;
419 case PUB_Patent :
420 cpp = (CitPatPtr) vnp->data.ptrvalue;
421 if (cpp != NULL) {
422 alp = cpp->authors;
423 }
424 break;
425 default :
426 break;
427 }
428
429 if (alp != NULL) return alp;
430 }
431
432 return NULL;
433 }
434
PubdescMatch(PubdescPtr p1,PubdescPtr p2)435 static Boolean PubdescMatch (PubdescPtr p1, PubdescPtr p2)
436 {
437 AuthListPtr alp1, alp2;
438
439 if (p1 == NULL || p2 == NULL)
440 return TRUE;
441 if (p1->name && p2->name) {
442 if (StringCmp(p1->name, p2->name) != 0)
443 return FALSE;
444 }
445 if (p1->fig && p2->fig) {
446 if (StringCmp(p1->fig, p2->fig) != 0)
447 return FALSE;
448 }
449 if (p1->maploc && p2->maploc) {
450 if (StringCmp(p1->maploc, p2->maploc) != 0)
451 return FALSE;
452 }
453 if (p1->comment && p2->comment) {
454 if (StringCmp(p1->comment, p2->comment) != 0)
455 return FALSE;
456 }
457 if (p1->num && p2->num) {
458 if (NumberingMatch(p1->num, p2->num) != 0)
459 return FALSE;
460 }
461 /* do full author match */
462 alp1 = AlpFromPdp (p1, NULL);
463 alp2 = AlpFromPdp (p2, NULL);
464 if (alp1 != NULL && alp2 != NULL) {
465 if (AuthListMatch (alp1, alp2, TRUE) != 0)
466 return FALSE;
467 }
468 return TRUE;
469 }
470
471 //LCOV_EXCL_START
472 // Used for segsets
473 /* return list of pubs that are the same in all segmets */
CheckSegsForPub(SeqEntryPtr sep)474 static ValNodePtr CheckSegsForPub(SeqEntryPtr sep)
475 {
476 BioseqPtr bsp, b;
477 ValNodePtr vnp, v, list = NULL, vnpnext, new, next;
478 PubdescPtr pdp, p, new_p;
479 Boolean same;
480 SeqEntryPtr s;
481
482 if (sep == NULL) {
483 return NULL;
484 }
485 if (!IS_Bioseq(sep)) {
486 return NULL;
487 }
488 bsp = (BioseqPtr)(sep->data.ptrvalue);
489 /* first bioseq from parts */
490 for (vnp=bsp->descr; vnp; vnp=vnpnext) {
491 vnpnext = vnp->next;
492 if (vnp->choice != Seq_descr_pub)
493 continue;
494 pdp = vnp->data.ptrvalue;
495 for (s= sep->next, same = FALSE; s; s=s->next) {
496 b = (BioseqPtr)(s->data.ptrvalue);
497 /* added to skip virtual spacers */
498 if (b->repr == Seq_repr_virtual && b->descr == NULL) continue;
499 for (v=b->descr; v; v=next) {
500 next = v->next;
501 if (v->choice != Seq_descr_pub)
502 continue;
503 p = v->data.ptrvalue;
504 if (PubLabelMatchEx (pdp->pub, p->pub, FALSE) == 0) {
505 if (PubdescMatch(pdp, p) == TRUE) {
506 same = TRUE;
507 break;
508 }
509 }
510 }
511 if (v == NULL) {
512 same = FALSE;
513 break;
514 }
515 }
516 if (same == TRUE) {
517 new = SeqDescrNew(NULL);
518 new->choice = Seq_descr_pub;
519 new_p = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
520 (AsnWriteFunc) PubdescAsnWrite);
521 new->data.ptrvalue = new_p;
522 list = tie_next(list, new);
523 }
524 }
525 return list;
526 }
527
528 // used for segsets
529 /* move identical pubs in segmented set to the set level */
MoveSegmPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)530 void MoveSegmPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
531 {
532
533 BioseqSetPtr bssp, tmp;
534 SeqEntryPtr segsep, parts = NULL;
535 ValNodePtr v, pub, vv, next;
536 PubdescPtr pdp, pdpv;
537
538 if (IS_Bioseq(sep)) {
539 return;
540 }
541 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
542 if (bssp->_class != 2) { /* do the rest for segset only */
543 return;
544 }
545 segsep = bssp->seq_set;
546 if (segsep->next == NULL) {
547 return;
548 }
549 if (!IS_Bioseq(segsep->next)) {
550 tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
551 parts = tmp->seq_set;
552 if (parts == NULL) {
553 return;
554 }
555 }
556 /* find identical pubs in all segments */
557 pub = CheckSegsForPub(parts);
558 if (pub) {
559 RemovePubFromParts(parts, pub);
560 }
561 /* check if pub is already on the set descr */
562 for(v=bssp->descr; v != NULL; v = v->next) {
563 if (v->choice != Seq_descr_pub)
564 continue;
565 for (vv = pub; vv; vv = next) {
566 next = vv->next;
567 pdp = vv->data.ptrvalue;
568 pdpv = v->data.ptrvalue;
569 if (PubLabelMatchEx (pdp->pub, pdpv->pub, FALSE) == 0) {
570 PubdescFree(pdp);
571 pub = remove_node(pub, vv);
572 }
573 }
574 }
575
576 bssp->descr = tie_next(bssp->descr, pub);
577
578 return;
579 }
580
581 // Used for segsets
AllPartsHaveTitles(BioseqSetPtr bssp)582 static Boolean AllPartsHaveTitles(BioseqSetPtr bssp)
583
584 {
585 BioseqPtr bsp;
586 SeqEntryPtr sep;
587 CharPtr str;
588
589 if (bssp == NULL) return FALSE;
590 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
591 if (sep->choice != 1) return FALSE;
592 bsp = (BioseqPtr) sep->data.ptrvalue;
593 if (bsp == NULL) return FALSE;
594 str = BioseqGetTitle (bsp);
595 if (StringHasNoText (str)) return FALSE;
596 }
597
598 return TRUE;
599 }
600
ChkNucProt(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)601 void ChkNucProt (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
602 {
603 BioseqSetPtr bssp, tmp, bssp2, bssp3;
604 BioseqPtr bsp = NULL, bsp2, bsp3, bsp4;
605 SeqEntryPtr seqsep, sep2, sep3, sep4;
606 ValNodePtr descr = NULL, head = NULL, vnp, next;
607 Int2Ptr ip;
608 Boolean is_org = FALSE, is_modif = FALSE, is_title = FALSE;
609 Boolean is_date = FALSE, is_nuc /* , is_pub = FALSE */;
610 CharPtr npstitle = NULL, seqtitle = NULL;
611 ValNodePtr PNTR prev;
612 Char ch;
613
614 if (IS_Bioseq(sep)) {
615 return;
616 }
617 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
618 if (bssp->_class != BioseqseqSet_class_nuc_prot) { /* do the rest for nuc-prot only */
619 return;
620 }
621 seqsep = bssp->seq_set;
622 if (seqsep == NULL || seqsep->data.ptrvalue == NULL) return;
623
624 /* if protein first, extract nucleotide, put first */
625 if (seqsep->choice == 1) {
626 bsp3 = seqsep->data.ptrvalue;
627 if (bsp3 != NULL && ISA_aa (bsp3->mol)) {
628 prev = (ValNodePtr PNTR) &(bssp->seq_set);
629 vnp = bssp->seq_set;
630 while (vnp != NULL) {
631 next = vnp->next;
632 is_nuc = FALSE;
633 if (IS_Bioseq (vnp)) {
634 bsp4 = (BioseqPtr) vnp->data.ptrvalue;
635 if (bsp4 != NULL && ISA_na (bsp4->mol)) {
636 is_nuc = TRUE;
637 }
638 }
639 if (is_nuc) {
640 *prev = vnp->next;
641 ValNodeLink (&head, vnp);
642 vnp->next = NULL;
643 } else {
644 prev = (ValNodePtr PNTR) &(vnp->next);
645 }
646 vnp = next;
647 }
648 if (head != NULL) {
649 vnp = bssp->seq_set;
650 bssp->seq_set = head;
651 ValNodeLink (&(bssp->seq_set), vnp);
652 }
653 }
654
655 seqsep = bssp->seq_set;
656 if (seqsep == NULL || seqsep->data.ptrvalue == NULL) return;
657 }
658
659 if (seqsep->choice == 1) {
660 bsp = seqsep->data.ptrvalue;
661 descr = bsp->descr;
662 } else if (seqsep->choice == 2) {
663 tmp = seqsep->data.ptrvalue;
664 descr = tmp->descr;
665 }
666 ip = (Int2Ptr) data;
667 if (bssp->descr == NULL) {
668 bssp->descr = GetDescrNoTitles(&descr);
669 } else {
670 for (vnp = bssp->descr; vnp!= NULL; vnp= vnp->next) {
671 if (vnp->choice == Seq_descr_title) {
672 is_title = TRUE;
673 npstitle = (CharPtr) vnp->data.ptrvalue;
674 }
675 if (vnp->choice == Seq_descr_org) {
676 is_org = TRUE;
677 }
678 if (vnp->choice == Seq_descr_modif) {
679 is_modif = TRUE;
680 }
681 if (vnp->choice == Seq_descr_update_date) {
682 is_date = TRUE;
683 }
684 /* if (vnp->choice == Seq_descr_pub) {
685 is_pub = TRUE;
686 }
687 */
688 }
689 /* look for old style nps title, remove if based on nucleotide title, also remove exact duplicate */
690 if (npstitle != NULL) {
691 seqtitle = SeqEntryGetTitle (seqsep);
692 if (seqtitle != NULL) {
693 ch = *npstitle;
694 while (ch != '\0' && ch == *seqtitle) {
695 npstitle++;
696 ch = *npstitle;
697 seqtitle++;
698 }
699 if (ch == '\0' || StringCmp (npstitle, ", and translated products") == 0) {
700 vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
701 ValNodeFreeData (vnp);
702 if (ip != NULL) {
703 *ip = 1;
704 }
705 } else {
706 /* now removing any unrelated title */
707 vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
708 ValNodeFreeData (vnp);
709 if (ip != NULL) {
710 *ip = 2;
711 }
712 }
713 } else if (bsp != NULL) {
714 /* if no nucleotide title, move nps title to it */
715 vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
716 bsp->descr = ValNodeLink (&(bsp->descr), vnp);
717 descr = bsp->descr;
718 if (ip != NULL) {
719 *ip = 3;
720 }
721 } else if (seqsep->choice == 2) {
722 if (ip != NULL) {
723 *ip = 4;
724 }
725 /* get segmented sequence in segset */
726 sep2 = bssp->seq_set;
727 if (sep2 != NULL && sep2->choice == 2 && SeqEntryGetTitle (sep2) == NULL) {
728 bssp2 = sep2->data.ptrvalue;
729 if (bssp2 != NULL && bssp2->_class == BioseqseqSet_class_segset) {
730 sep3 = bssp2->seq_set;
731 if (sep3 != NULL && sep3->choice == 1) {
732 bsp2 = sep3->data.ptrvalue;
733 if (bsp2 != NULL && BioseqGetTitle (bsp2) == NULL) {
734 sep4 = sep3->next;
735 if (sep4 != NULL && sep4->choice == 2) {
736 bssp3 = sep4->data.ptrvalue;
737 if (bssp3 != NULL && bssp3->_class == BioseqseqSet_class_parts) {
738 if (AllPartsHaveTitles (bssp3)) {
739 /* if no segmented nucleotide bioseq title, move nps title to it */
740 vnp = ValNodeExtractList (&(bssp->descr), Seq_descr_title);
741 bsp2->descr = ValNodeLink (&(bsp2->descr), vnp);
742 if (ip != NULL) {
743 *ip = 5;
744 }
745 } else {
746 if (ip != NULL) {
747 *ip = 6;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755 }
756 } else {
757 if (ip != NULL) {
758 *ip = 7;
759 }
760 }
761 }
762 /*
763 if (!is_title) {
764 vnp = ValNodeExtractList(&descr, Seq_descr_title);
765 bssp->descr = ValNodeLink(&(bssp->descr), vnp);
766 }
767 */
768 if (!is_modif && check_GIBB(descr)) {
769 vnp = ValNodeExtractList(&descr, Seq_descr_modif);
770 if (vnp != NULL) {
771 bssp->descr = ValNodeLink(&(bssp->descr), vnp);
772 }
773 }
774 if (!is_org) {
775 vnp = ValNodeExtractList(&descr, Seq_descr_org);
776 if (vnp != NULL) {
777 bssp->descr = ValNodeLink(&(bssp->descr), vnp);
778 }
779 }
780 if (!is_date) {
781 vnp = ValNodeExtractList(&descr, Seq_descr_update_date);
782 if (vnp != NULL) {
783 bssp->descr = ValNodeLink(&(bssp->descr), vnp);
784 }
785 }
786 /* vnp = ValNodeExtractList(&descr, Seq_descr_pub);
787 if (!is_pub)
788 bssp->descr = ValNodeLink(&(bssp->descr), vnp);
789 */
790 }
791 if (seqsep->choice == 1) {
792 bsp = seqsep->data.ptrvalue;
793 bsp->descr = descr;
794 }
795 if (seqsep->choice == 2) {
796 tmp = seqsep->data.ptrvalue;
797 tmp->descr = descr;
798 }
799 return;
800 }
801 //LCOV_EXCL_STOP
802
803
MoveNPPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)804 void MoveNPPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
805 {
806
807 BioseqSetPtr bssp, tmp;
808 BioseqPtr bsp;
809 SeqEntryPtr seqsep;
810 ValNodePtr descr = NULL, vnp = NULL, vnext, v, v_copy;
811 PubdescPtr pdp, pdp_copy;
812
813 if (IS_Bioseq(sep)) {
814 return;
815 }
816 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
817 if (bssp->_class != 1) { /* do the rest for nuc-prot only */
818 return;
819 }
820 seqsep = bssp->seq_set;
821 if (seqsep == NULL) {
822 return;
823 }
824 if (seqsep->choice == 1) {
825 bsp = seqsep->data.ptrvalue;
826 descr = bsp->descr;
827 }
828 if (seqsep->choice == 2) {
829 tmp = seqsep->data.ptrvalue;
830 descr = tmp->descr;
831 }
832 if (bssp->descr == NULL) {
833 bssp->descr = GetDescrNoTitles(&descr);
834 } else {
835 /* move pubs to nuc-prot level */
836 vnp = ValNodeExtractList(&descr, Seq_descr_pub);
837 for (v=vnp; v; v=vnext) {
838 vnext = v->next;
839 pdp = (PubdescPtr) v->data.ptrvalue;
840 if (pdp->num != NULL || pdp->name != NULL || pdp->fig != NULL
841 || pdp->comment !=NULL) {
842 pdp_copy = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
843 (AsnWriteFunc) PubdescAsnWrite);
844 v_copy = SeqDescrNew(NULL);
845 v_copy->choice = Seq_descr_pub;
846 v_copy->data.ptrvalue = pdp_copy;
847 descr = ValNodeLink(&(descr), v_copy);
848 PubdescFree (pdp);
849 vnp = remove_node(vnp, v);
850 }
851 }
852 if (vnp != NULL) {
853 bssp->descr = ValNodeLink(&(bssp->descr), vnp);
854 }
855 }
856 if (seqsep->choice == 1) {
857 bsp = seqsep->data.ptrvalue;
858 bsp->descr = descr;
859 }
860 if (seqsep->choice == 2) {
861 tmp = seqsep->data.ptrvalue;
862 tmp->descr = descr;
863 }
864 return;
865 }
866
GetSeqDescFromSeqEntry(SeqEntryPtr sep)867 static SeqDescrPtr GetSeqDescFromSeqEntry (SeqEntryPtr sep)
868
869 {
870 BioseqPtr bsp;
871 BioseqSetPtr bssp;
872
873 if (sep == NULL) return NULL;
874
875 if (IS_Bioseq (sep)) {
876 bsp = (BioseqPtr) sep->data.ptrvalue;
877 if (bsp == NULL) return NULL;
878 return bsp->descr;
879 } else if (IS_Bioseq_set (sep)) {
880 bssp = (BioseqSetPtr) sep->data.ptrvalue;
881 if (bssp == NULL) return NULL;
882 return bssp->descr;
883 }
884
885 return NULL;
886 }
887
888 /* return list of pubs that are the same on all pop/phy/mut components */
CheckSegsForPopPhyMut(SeqEntryPtr sep)889 static SeqDescrPtr CheckSegsForPopPhyMut (SeqEntryPtr sep)
890
891 {
892 SeqDescrPtr descr;
893 ValNodePtr head;
894 ValNodePtr last;
895 SeqDescrPtr list = NULL;
896 ObjValNodePtr ovp;
897 PubdescPtr pdp1, pdp2;
898 Boolean same;
899 SeqDescrPtr sdp1, sdp2;
900 SeqEntryPtr tmp;
901 ValNodePtr vnp, vnp1, vnp2;
902
903 for (sdp1 = GetSeqDescFromSeqEntry (sep); sdp1 != NULL; sdp1 = sdp1->next) {
904 if (sdp1->choice != Seq_descr_pub) continue;
905 pdp1 = (PubdescPtr) sdp1->data.ptrvalue;
906 if (pdp1 == NULL) continue;
907 head = NULL;
908 last = NULL;
909 for (tmp = sep->next, same = FALSE; tmp != NULL; tmp = tmp->next) {
910 for (sdp2 = GetSeqDescFromSeqEntry (tmp); sdp2 != NULL; sdp2 = sdp2->next) {
911 if (sdp2->choice != Seq_descr_pub) continue;
912 pdp2 = (PubdescPtr) sdp2->data.ptrvalue;
913 if (pdp2 == NULL) continue;
914 if (PubLabelMatchEx (pdp1->pub, pdp2->pub, FALSE) == 0) {
915 if (PubdescMatch (pdp1, pdp2)) {
916 same = TRUE;
917 vnp = ValNodeAddPointer (&last, 0, (Pointer) sdp2);
918 if (head == NULL) {
919 head = vnp;
920 }
921 last = vnp;
922 break;
923 }
924 }
925 }
926 if (sdp2 == NULL) {
927 same = FALSE;
928 break;
929 }
930 }
931 if (same) {
932 descr = SeqDescrNew (NULL);
933 descr->choice = Seq_descr_pub;
934 descr->data.ptrvalue = AsnIoMemCopy (pdp1,
935 (AsnReadFunc) PubdescAsnRead,
936 (AsnWriteFunc) PubdescAsnWrite);
937 list = tie_next (list, descr);
938 /* mark original pubs for deletion */
939 if (sdp1->extended) {
940 ovp = (ObjValNodePtr) sdp1;
941 ovp->idx.deleteme = 1;
942 }
943 for (vnp1 = head; vnp1 != NULL; vnp1 = vnp1->next) {
944 vnp2 = (ValNodePtr) vnp1->data.ptrvalue;
945 if (vnp2->extended) {
946 ovp = (ObjValNodePtr) vnp2;
947 ovp->idx.deleteme = 1;
948 }
949 }
950 }
951 ValNodeFree (head);
952 }
953
954 return list;
955 }
956
957 /* move identical pubs in pop/phy/mut components to the set level */
MovePopPhyMutPubsProc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)958 static void MovePopPhyMutPubsProc (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
959
960 {
961 BioseqSetPtr bssp;
962 ValNodePtr v, pub, vv, next;
963 PubdescPtr pdp, pdpv;
964
965 if (sep == NULL) return;
966 if (! IS_Bioseq_set (sep)) return;
967 bssp = (BioseqSetPtr) sep->data.ptrvalue;
968 if (bssp == NULL) return;
969 if ((bssp->_class < BioseqseqSet_class_mut_set ||
970 bssp->_class > BioseqseqSet_class_eco_set) &&
971 bssp->_class != BioseqseqSet_class_wgs_set &&
972 bssp->_class != BioseqseqSet_class_small_genome_set) return;
973 pub = CheckSegsForPopPhyMut (bssp->seq_set);
974 if (pub == NULL) return;
975 /* check if pub is already on the set descr */
976 for(v=bssp->descr; v != NULL; v = v->next) {
977 if (v->choice != Seq_descr_pub)
978 continue;
979 for (vv = pub; vv; vv = next) {
980 next = vv->next;
981 pdp = vv->data.ptrvalue;
982 pdpv = v->data.ptrvalue;
983 if (PubLabelMatchEx (pdp->pub, pdpv->pub, FALSE) == 0) {
984 PubdescFree(pdp);
985 pub = remove_node(pub, vv);
986 }
987 }
988 }
989
990 bssp->descr = tie_next(bssp->descr, pub);
991 }
992
MovePopPhyMutPubs(SeqEntryPtr sep)993 void MovePopPhyMutPubs (SeqEntryPtr sep)
994
995 {
996 if (sep == NULL) return;
997 SeqEntryExplore(sep, (Pointer) NULL, MovePopPhyMutPubsProc);
998 DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
999 }
1000
1001 //LCOV_EXCL_START
AddFeatToBioseq(SeqFeatPtr sfp,BioseqPtr bsp)1002 static void AddFeatToBioseq (SeqFeatPtr sfp, BioseqPtr bsp)
1003
1004 {
1005 SeqFeatPtr prev;
1006 SeqAnnotPtr sap;
1007
1008 if (sfp == NULL || bsp == NULL) return;
1009 sap = bsp->annot;
1010 while (sap != NULL && (sap->name != NULL || sap->desc != NULL || sap->type != 1)) {
1011 sap = sap->next;
1012 }
1013 if (sap == NULL) {
1014 sap = SeqAnnotNew ();
1015 if (sap != NULL) {
1016 sap->type = 1;
1017 sap->next = bsp->annot;
1018 bsp->annot = sap;
1019 }
1020 }
1021 sap = bsp->annot;
1022 if (sap != NULL) {
1023 if (sap->data != NULL) {
1024 prev = sap->data;
1025 while (prev->next != NULL) {
1026 prev = prev->next;
1027 }
1028 prev->next = sfp;
1029 } else {
1030 sap->data = (Pointer) sfp;
1031 }
1032 }
1033 }
1034
SSECNoGenomeAnnotInAnnotDescr(SeqAnnotPtr sap)1035 static Boolean SSECNoGenomeAnnotInAnnotDescr (SeqAnnotPtr sap)
1036
1037 {
1038 AnnotDescrPtr adp;
1039 ObjectIdPtr oip;
1040 CharPtr str;
1041 UserFieldPtr ufp;
1042 UserObjectPtr uop;
1043
1044 if (sap == NULL) return TRUE;
1045
1046 for (adp = sap->desc; adp != NULL; adp = adp->next) {
1047 if (adp->choice != Annot_descr_user) continue;
1048 uop = (UserObjectPtr) adp->data.ptrvalue;
1049 if (uop == NULL) continue;
1050 oip = uop->type;
1051 if (oip == NULL) continue;
1052 if (StringICmp (oip->str, "StructuredComment") != 0) continue;
1053 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
1054 if (ufp->choice != 1) continue;
1055 oip = ufp->label;
1056 if (oip == NULL) continue;
1057 if (StringCmp (oip->str, "StructuredCommentPrefix") != 0) continue;
1058 str = (CharPtr) ufp->data.ptrvalue;
1059 if (StringCmp (str, "##Genome-Annotation-Data-START##") == 0) return FALSE;
1060 }
1061 }
1062
1063 return TRUE;
1064 }
1065
MoveFeatsOnPartsProc(BioseqSetPtr bssp,Pointer userdata)1066 static void MoveFeatsOnPartsProc (BioseqSetPtr bssp, Pointer userdata)
1067
1068 {
1069 SeqAnnotPtr nextsap;
1070 SeqFeatPtr nextsfp;
1071 Pointer PNTR prevsap;
1072 Pointer PNTR prevsfp;
1073 SeqAnnotPtr sap;
1074 SeqFeatPtr sfp;
1075 BioseqPtr target;
1076
1077 if (bssp == NULL || bssp->_class != BioseqseqSet_class_parts) return;
1078
1079 sap = bssp->annot;
1080 prevsap = (Pointer PNTR) &(bssp->annot);
1081
1082 while (sap != NULL) {
1083 nextsap = sap->next;
1084 if (sap->type == 1) {
1085 sfp = (SeqFeatPtr) sap->data;
1086 prevsfp = (Pointer PNTR) &(sap->data);
1087 while (sfp != NULL) {
1088 nextsfp = sfp->next;
1089 /* target = GetBioseqGivenSeqLoc (sfp->location, sfp->idx.entityID); */
1090 target = BioseqFindFromSeqLoc (sfp->location);
1091 if (target != NULL) {
1092 *(prevsfp) = sfp->next;
1093 sfp->next = NULL;
1094 AddFeatToBioseq (sfp, target);
1095 } else {
1096 prevsfp = (Pointer PNTR) &(sfp->next);
1097 }
1098 sfp = nextsfp;
1099 }
1100 }
1101 /* now keep empty annot if annot_descr present */
1102 if (sap->data == NULL && /* sap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (sap)) {
1103 *(prevsap) = sap->next;
1104 sap->next = NULL;
1105 SeqAnnotFree (sap);
1106 } else {
1107 prevsap = (Pointer PNTR) &(sap->next);
1108 }
1109 sap = nextsap;
1110 }
1111 }
1112
MoveFeatsFromPartsSet(SeqEntryPtr sep)1113 extern void MoveFeatsFromPartsSet (SeqEntryPtr sep)
1114
1115 {
1116 VisitSetsInSep (sep, NULL, MoveFeatsOnPartsProc);
1117 }
1118 //LCOV_EXCL_STOP
1119
CmpOrgById(BioSourcePtr b1,BioSourcePtr b2)1120 Boolean CmpOrgById(BioSourcePtr b1, BioSourcePtr b2)
1121 {
1122 DbtagPtr d1 = NULL, d2 = NULL;
1123 ValNodePtr vnp;
1124
1125 if (b1 == NULL || b2 == NULL) {
1126 return FALSE;
1127 }
1128 if (b1->org == NULL || b2->org == NULL) {
1129 return FALSE;
1130 }
1131 for (vnp = b1->org->db; vnp; vnp = vnp->next) {
1132 d1 = (DbtagPtr) vnp->data.ptrvalue;
1133 if (StringCmp(d1->db, "taxon") == 0) {
1134 break;
1135 }
1136 }
1137 for (vnp = b2->org->db; vnp; vnp = vnp->next) {
1138 d2 = (DbtagPtr) vnp->data.ptrvalue;
1139 if (StringCmp(d2->db, "taxon") == 0) {
1140 break;
1141 }
1142 }
1143 if (d1 && d2) {
1144 if (d1->tag->id == d2->tag->id) {
1145 return TRUE;
1146 } else {
1147 }
1148 } else if (StringICmp(b1->org->taxname, b2->org->taxname) == 0) {
1149 return TRUE;
1150 }
1151 return FALSE;
1152 }
1153
BioSourceMerge(BioSourcePtr host,BioSourcePtr guest)1154 BioSourcePtr BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
1155 {
1156 SubSourcePtr ssp, sp;
1157 OrgModPtr omp, homp;
1158 OrgNamePtr onp;
1159
1160 if (host == NULL && guest == NULL) {
1161 return NULL;
1162 }
1163 if (host == NULL && guest != NULL) {
1164 host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead,
1165 (AsnWriteFunc) BioSourceAsnWrite);
1166 return host;
1167 }
1168 if (host != NULL && guest == NULL) {
1169 return host;
1170 }
1171 if (host->genome == 0 && guest->genome != 0) {
1172 host->genome = guest->genome;
1173 }
1174 if (host->origin == 0 && guest->origin != 0) {
1175 host->origin = guest->origin;
1176 }
1177 for (ssp = guest->subtype; ssp; ssp = ssp->next) {
1178 sp = AsnIoMemCopy(ssp, (AsnReadFunc) SubSourceAsnRead,
1179 (AsnWriteFunc) SubSourceAsnWrite);
1180 host->subtype = tie_next_subtype(host->subtype, sp);
1181 }
1182 if (guest->org->orgname) {
1183 for (omp = guest->org->orgname->mod; omp; omp = omp->next) {
1184 homp = AsnIoMemCopy(omp, (AsnReadFunc) OrgModAsnRead,
1185 (AsnWriteFunc) OrgModAsnWrite);
1186 if ((onp = host->org->orgname) == NULL) {
1187 onp = OrgNameNew();
1188 host->org->orgname = onp;
1189 }
1190 onp->mod = tie_next_OrgMod(onp->mod, homp);
1191 }
1192 }
1193 return host;
1194 }
1195
BioSourceCommon(BioSourcePtr host,BioSourcePtr guest)1196 BioSourcePtr BioSourceCommon(BioSourcePtr host, BioSourcePtr guest)
1197 {
1198 SubSourcePtr ssp, sp, spnext;
1199 OrgModPtr omp, om, ompnext;
1200
1201 if (host->genome != guest->genome) {
1202 host->genome = 0;
1203 }
1204 if (host->origin != guest->origin) {
1205 host->origin = 0;
1206 }
1207 for (sp = host->subtype; sp; sp = spnext) {
1208 spnext = sp->next;
1209 for (ssp = guest->subtype; ssp; ssp = ssp->next) {
1210 if (sp->subtype == ssp->subtype &&
1211 StringCmp(sp->name, ssp->name) == 0) {
1212 break;
1213 }
1214 }
1215 if (ssp == NULL) {
1216 host->subtype = remove_subtype(host->subtype, sp);
1217 }
1218 }
1219 if (CmpOrgById(host, guest) == FALSE) {
1220 OrgRefFree(host->org);
1221 host->org = NULL;
1222 return host;
1223 }
1224 if (StringExtCmp(host->org->common, guest->org->common) != 0) {
1225 MemFree(host->org->common);
1226 host->org->common = NULL;
1227 }
1228 if (guest->org->orgname == NULL) {
1229 MemFree(host->org->orgname);
1230 host->org->orgname = NULL;
1231 } else {
1232 if (host->org->orgname) {
1233 for (omp = host->org->orgname->mod; omp; omp = ompnext) {
1234 ompnext = omp->next;
1235 for (om = guest->org->orgname->mod; om; om = om->next) {
1236 if (om->subtype == omp->subtype &&
1237 StringCmp(om->subname, omp->subname) == 0) {
1238 break;
1239 }
1240 }
1241 }
1242 if (omp == NULL) {
1243 host->org->orgname->mod =
1244 remove_OrgMod(host->org->orgname->mod, omp);
1245 }
1246 }
1247 }
1248 return host;
1249 }
1250
EmptyBioSource(BioSourcePtr bio)1251 static Boolean EmptyBioSource(BioSourcePtr bio)
1252 {
1253 if (bio == NULL || bio->org == NULL) return TRUE;
1254 if (bio->org->taxname == NULL && bio->org->common == NULL && bio->org->db == NULL) return TRUE;
1255 return FALSE;
1256 }
1257
StripBSfromTop(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1258 void StripBSfromTop (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1259 {
1260 BioseqSetPtr bssp, tmp;
1261 ValNodePtr vnp, bio_vnp = NULL;
1262 SeqEntryPtr segsep, parts, cur;
1263 BioseqPtr bsp;
1264 BioSourcePtr biotop = NULL, bio = NULL;
1265 Boolean first = TRUE;
1266
1267 if (IS_Bioseq(sep)) {
1268 return;
1269 }
1270 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1271 if (bssp->_class != 2) { /* do the rest for segset only */
1272 return;
1273 }
1274 bio_vnp = ValNodeExtractList(&(bssp->descr), Seq_descr_source);
1275 if (bio_vnp) {
1276 biotop = ((BioSourcePtr)bio_vnp->data.ptrvalue);
1277 } else {
1278 bio_vnp = SeqDescrNew(NULL);
1279 bio_vnp->choice = Seq_descr_source;
1280 }
1281 segsep = bssp->seq_set;
1282 if (segsep->next == NULL) {
1283 return;
1284 }
1285 if (!IS_Bioseq(segsep->next)) {
1286 tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
1287 parts = tmp->seq_set;
1288 if (parts == NULL) {
1289 return;
1290 }
1291 for (cur = parts; cur; cur = cur->next) {
1292 bsp = cur->data.ptrvalue;
1293 for (vnp = bsp->descr; vnp; vnp=vnp->next) {
1294 if (vnp->choice == Seq_descr_source) {
1295 bio = ((BioSourcePtr)vnp->data.ptrvalue);
1296 break;
1297 }
1298 }
1299 if (bio) {
1300 if (biotop == NULL && first == TRUE) {
1301 biotop = AsnIoMemCopy(bio, (AsnReadFunc) BioSourceAsnRead,
1302 (AsnWriteFunc) BioSourceAsnWrite);
1303 bio_vnp->data.ptrvalue = biotop;
1304 first = FALSE;
1305 } else {
1306 biotop = BioSourceCommon(biotop, bio);
1307 }
1308 }
1309 }
1310 if (biotop != NULL) {
1311 if (EmptyBioSource(biotop)) {
1312 BioSourceFree(biotop);
1313 ValNodeFree(bio_vnp);
1314 } else {
1315 bssp->descr = tie_next(bssp->descr, bio_vnp);
1316 }
1317 }
1318 }
1319 return;
1320 }
1321
StripBSfromParts(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1322 void StripBSfromParts (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1323 {
1324
1325 BioseqSetPtr bssp, tmp;
1326 BioseqPtr bsp;
1327 SeqEntryPtr segsep, parts = NULL, cur;
1328 ValNodePtr descr, vnp, set_vnp;
1329 BSMapPtr bsmp;
1330 Boolean empty;
1331 BioSourcePtr biosp = NULL;
1332
1333 bsmp = (BSMapPtr) data;
1334 if (IS_Bioseq(sep)) {
1335 return;
1336 }
1337 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1338 if (bssp->_class != 2) { /* do the rest for segset only */
1339 return;
1340 }
1341 vnp = ValNodeExtractList(&(bssp->descr), Seq_descr_source);
1342 if (vnp) {
1343 biosp = (BioSourcePtr) vnp->data.ptrvalue;
1344 ValNodeFree(vnp);
1345 vnp=NULL;
1346 }
1347 segsep = bssp->seq_set;
1348 if (segsep->next == NULL) {
1349 return;
1350 }
1351 set_vnp = bssp->descr;
1352 if (!IS_Bioseq(segsep->next)) {
1353 tmp = (BioseqSetPtr) (segsep->next->data.ptrvalue); /*segsep->next=parts*/
1354 parts = tmp->seq_set;
1355 }
1356 for (cur = parts, empty = TRUE; cur; cur = cur->next) {
1357 bsp = cur->data.ptrvalue;
1358 vnp = ValNodeExtractList(&(bsp->descr), Seq_descr_source);
1359 if (vnp) {
1360 biosp = BioSourceMerge(biosp, vnp->data.ptrvalue);
1361 BioSourceFree((BioSourcePtr) (vnp->data.ptrvalue));
1362 ValNodeFree(vnp);
1363 }
1364 }
1365 if (biosp) {
1366 descr = SeqDescrNew(NULL);
1367 descr->choice = Seq_descr_source;
1368 descr->data.ptrvalue = biosp;
1369 bssp->descr = ValNodeLink(&(bssp->descr), descr);
1370 }
1371
1372 return;
1373 }
1374
1375 //LCOV_EXCL_START
1376 /*------------------------ GetDescr() --------------------------*/
1377 /*****************************************************************************
1378 * GetDescr:
1379 * 8-12-93
1380 ******************************************************************************/
GetDescr(ValNodePtr PNTR descr)1381 ValNodePtr GetDescr(ValNodePtr PNTR descr)
1382 {
1383 ValNodePtr vnp, hvnp = NULL;
1384
1385 vnp = ValNodeExtractList(descr, Seq_descr_title);
1386 if (vnp != NULL) {
1387 hvnp = ValNodeLink(&hvnp, vnp);
1388 }
1389
1390 vnp = ValNodeExtractList(descr, Seq_descr_org);
1391 if (vnp != NULL) {
1392 hvnp = ValNodeLink(&hvnp, vnp);
1393 }
1394
1395 if ( check_GIBB(*descr)) {
1396 vnp = ValNodeExtractList(descr, Seq_descr_modif);
1397 if (vnp != NULL) {
1398 hvnp = ValNodeLink(&hvnp, vnp);
1399 }
1400 }
1401 vnp = ValNodeExtractList(descr, Seq_descr_comment);
1402 if (vnp != NULL) {
1403 hvnp = ValNodeLink(&hvnp, vnp);
1404 }
1405
1406 vnp = ValNodeExtractList(descr, Seq_descr_pub);
1407 if (vnp != NULL) {
1408 hvnp = ValNodeLink(&hvnp, vnp);
1409 }
1410
1411 vnp = ValNodeExtractList(descr, Seq_descr_update_date);
1412 if (vnp != NULL) {
1413 hvnp = ValNodeLink(&hvnp, vnp);
1414 }
1415
1416 return (hvnp);
1417
1418 } /* GetDescr */
1419 //LCOV_EXCL_STOP
1420
GetDescrNoTitles(ValNodePtr PNTR descr)1421 static ValNodePtr GetDescrNoTitles (ValNodePtr PNTR descr)
1422 {
1423 ValNodePtr vnp, hvnp = NULL;
1424
1425 vnp = ValNodeExtractList(descr, Seq_descr_org);
1426 if (vnp != NULL) {
1427 hvnp = ValNodeLink(&hvnp, vnp);
1428 }
1429
1430 if ( check_GIBB(*descr)) {
1431 //LCOV_EXCL_START
1432 //all Seq_descr_modif descriptors were removed upstream by StripOld
1433 vnp = ValNodeExtractList(descr, Seq_descr_modif);
1434 if (vnp != NULL) {
1435 hvnp = ValNodeLink(&hvnp, vnp);
1436 }
1437 //LCOV_EXCL_STOP
1438 }
1439 vnp = ValNodeExtractList(descr, Seq_descr_comment);
1440 if (vnp != NULL) {
1441 hvnp = ValNodeLink(&hvnp, vnp);
1442 }
1443
1444 vnp = ValNodeExtractList(descr, Seq_descr_pub);
1445 if (vnp != NULL) {
1446 hvnp = ValNodeLink(&hvnp, vnp);
1447 }
1448
1449 vnp = ValNodeExtractList(descr, Seq_descr_update_date);
1450 if (vnp != NULL) {
1451 hvnp = ValNodeLink(&hvnp, vnp);
1452 }
1453
1454 return (hvnp);
1455
1456 } /* GetDescrNoTitles */
1457
1458 //LCOV_EXCL_START
1459 //All Seq_descr_modif descriptors were removed upstream by StripOld
1460 /*------------------------ check_GIBB() --------------------------*/
1461 /*****************************************************************************
1462 * check_GIBB:
1463 * 8-12-93
1464 ******************************************************************************/
check_GIBB(ValNodePtr descr)1465 Boolean check_GIBB(ValNodePtr descr)
1466 {
1467 ValNodePtr vnp, modif;
1468 Int4 gmod;
1469
1470 if (descr == NULL) {
1471 return FALSE;
1472 }
1473 for (vnp = descr; vnp && vnp->choice != Seq_descr_modif; vnp = vnp->next)
1474 continue;
1475 if (vnp == NULL) {
1476 return FALSE;
1477 }
1478 modif = (ValNodePtr) vnp->data.ptrvalue;
1479 if (modif == NULL) {
1480 return FALSE;
1481 }
1482 gmod = modif->data.intvalue;
1483 if (gmod == Seq_descr_GIBB_mod_dna || gmod == Seq_descr_GIBB_mod_rna ||
1484 gmod == Seq_descr_GIBB_mod_est || gmod == Seq_descr_GIBB_mod_complete
1485 || gmod == Seq_descr_GIBB_mod_partial) {
1486 return FALSE;
1487 }
1488 return TRUE;
1489 }
1490
1491 //used only by ChkSegset (segsets only)
1492 /*----------------------------- SrchSegChoice() --------------------------*/
1493 /*****************************************************************************
1494 * SrchSegChoice:
1495 ******************************************************************************/
SrchSegChoice(SeqEntryPtr sep,Uint1 choice)1496 ValNodePtr SrchSegChoice(SeqEntryPtr sep, Uint1 choice)
1497 {
1498 BioseqPtr bsp;
1499 ValNodePtr hvnp = NULL;
1500
1501 if (sep == NULL) {
1502 return NULL;
1503 }
1504 if (IS_Bioseq(sep)) {
1505 bsp = (BioseqPtr)(sep->data.ptrvalue);
1506 /* first bioseq from parts */
1507 if (CheckSegDescrChoice(sep, choice)) { /*identical */
1508 hvnp = ValNodeExtractList(&(bsp->descr), choice);
1509 CleanUpSeqDescrChoice(sep->next, choice);
1510 }
1511 }
1512 return (hvnp);
1513
1514 } /* SrchSegChoice */
1515
1516 // Used for segsets
1517 /*---------------------------- SrchSegSeqMol() --------------------------*/
1518 /*************************************************************************
1519 * SrchSegSeqMol:
1520 * 5-14-93
1521 **************************************************************************/
SrchSegSeqMol(SeqEntryPtr sep)1522 void SrchSegSeqMol(SeqEntryPtr sep)
1523 {
1524 BioseqPtr bsp = NULL;
1525 SeqEntryPtr cursep;
1526 Uint1 mol;
1527 /*
1528 CharPtr str1, str2;
1529 */
1530
1531 if (sep == NULL || sep->next) {
1532 return;
1533 }
1534 if (IS_Bioseq(sep)) {
1535 bsp = sep->data.ptrvalue;
1536 }
1537 if (bsp == NULL) {
1538 return;
1539 }
1540 mol = bsp->mol;
1541
1542 for (cursep = sep->next; cursep != NULL; cursep = cursep->next) {
1543 if (IS_Bioseq(sep)) {
1544 bsp = cursep->data.ptrvalue;
1545 } else {
1546 continue;
1547 }
1548 if (mol != bsp->mol) {
1549 break;
1550 }
1551 }
1552
1553 return;
1554
1555 } /* SrchSegSeqMol */
1556 //LCOV_EXCL_STOP
1557
1558 /*------------------------ CheckSegDescrChoice() -------------------------*/
1559 /*****************************************************************************
1560 * CheckSegDescrChoice:
1561 * 5-18-93
1562 ******************************************************************************/
CheckSegDescrChoice(SeqEntryPtr sep,Uint1 choice)1563 Boolean CheckSegDescrChoice(SeqEntryPtr sep, Uint1 choice)
1564 {
1565 BioseqPtr bsp;
1566 SeqEntryPtr cursep;
1567 ValNodePtr vnp, mvnp;
1568 Boolean same;
1569 Boolean no_choice = TRUE;
1570 BioSourcePtr biosp = NULL;
1571 OrgRefPtr orp;
1572 CharPtr title = NULL;
1573 DatePtr dp = NULL;
1574 CharPtr org = NULL;
1575 Int4 modif = -1, mol = -1;
1576 PubdescPtr pdp = NULL;
1577
1578 for (cursep = sep, same = TRUE;
1579 cursep != NULL && same; cursep = cursep->next) {
1580 bsp = cursep->data.ptrvalue;
1581 for (vnp = bsp->descr; vnp != NULL && vnp->choice != choice;
1582 vnp = vnp->next)
1583 continue;
1584
1585 if (vnp == NULL) {
1586 same = FALSE;
1587 no_choice = TRUE;
1588 } else if (choice == Seq_descr_org) {
1589 //LCOV_EXCL_START
1590 //only ever called with Seq_descr_source as choice
1591 no_choice = FALSE;
1592 orp = vnp->data.ptrvalue;
1593
1594 if (org == NULL)
1595 org = orp->taxname;
1596 else if (StringCmp(org, orp->taxname) != 0)
1597 same = FALSE;
1598 //LCOV_EXCL_STOP
1599 } else if (choice == Seq_descr_source) {
1600 no_choice = FALSE;
1601 if (biosp == NULL) {
1602 biosp = vnp->data.ptrvalue;
1603 } else if (BSComparison(biosp,
1604 (BioSourcePtr) vnp->data.ptrvalue) != 0) {
1605 same = FALSE;
1606 }
1607 //LCOV_EXCL_START
1608 //only ever called with Seq_descr_source as choice
1609 } else if (choice == Seq_descr_mol_type) {
1610 no_choice = FALSE;
1611 if (mol == -1)
1612 mol = vnp->data.intvalue;
1613 else if (mol != vnp->data.intvalue)
1614 same = FALSE;
1615 } else if (choice == Seq_descr_modif) {
1616 no_choice = FALSE;
1617 mvnp = vnp->data.ptrvalue;
1618
1619 if (modif == -1)
1620 modif = mvnp->data.intvalue;
1621 else if (modif != mvnp->data.intvalue)
1622 same = FALSE;
1623 }else if (choice == Seq_descr_update_date) {
1624 no_choice = FALSE;
1625 if (dp == NULL)
1626 dp = vnp->data.ptrvalue;
1627 else if (DateMatch(dp, vnp->data.ptrvalue, TRUE) != 0)
1628 same = FALSE;
1629 } else if (choice == Seq_descr_pub) {
1630 no_choice = FALSE;
1631 if (pdp == NULL)
1632 pdp = vnp->data.ptrvalue;
1633 else if (PubMatch(pdp->pub,
1634 ((PubdescPtr)(vnp->data.ptrvalue))->pub) != 0)
1635 same = FALSE;
1636 } else if (choice == Seq_descr_title) {
1637 no_choice = FALSE;
1638 if (title == NULL)
1639 title = vnp->data.ptrvalue;
1640 else if (StringCmp(title, (CharPtr) vnp->data.ptrvalue) != 0)
1641 same = FALSE;
1642 } else {
1643 no_choice = FALSE;
1644 ErrPostEx(SEV_WARNING, 0, 2,
1645 "Unrecognized choice: %d", choice);
1646 same = FALSE;
1647 }
1648 }
1649 if (same == FALSE && no_choice == TRUE && choice != Seq_descr_update_date) {
1650 same = TRUE;
1651 }
1652 //LCOV_EXCL_STOP
1653 return (same);
1654
1655 } /* CheckSegDescrChoice */
1656
1657
StripProtXref(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1658 void StripProtXref (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1659 {
1660 BioseqPtr bsp, prot = NULL;
1661 BioseqSetPtr bssp;
1662 ValNodePtr vnp = NULL;
1663 SeqAnnotPtr sap = NULL, ap, pap = NULL;
1664 SeqFeatPtr sfp, psfp = NULL, head;
1665 ProtRefPtr prp, pprp = NULL;
1666 SeqFeatXrefPtr xrp, xrpnext;
1667 SeqIdPtr sid;
1668 SeqLocPtr slp;
1669 TextSeqIdPtr tsip;
1670
1671 if (IS_Bioseq(sep)) {
1672 bsp = (BioseqPtr)(sep->data.ptrvalue);
1673 sap = bsp->annot;
1674 if (bsp != NULL) {
1675 for (sid = bsp->id; sid != NULL; sid = sid->next) {
1676 if (sid->choice != SEQID_OTHER) continue;
1677 tsip = (TextSeqIdPtr) sid->data.ptrvalue;
1678 if (tsip == NULL) continue;
1679 if (StringNCmp (tsip->accession, "NC_", 3) == 0) return;
1680 if (StringNCmp (tsip->accession, "NG_", 3) == 0) return;
1681 if (StringNCmp (tsip->accession, "NT_", 3) == 0) return;
1682 if (StringNCmp (tsip->accession, "NW_", 3) == 0) return;
1683 if (StringNCmp (tsip->accession, "AC_", 3) == 0) return;
1684 }
1685 }
1686 }
1687 else {
1688 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1689 sap = bssp->annot;
1690 }
1691 for (ap = sap; ap != NULL; ap = ap->next) {
1692 if (ap->type != 1) continue;
1693 head = (SeqFeatPtr)(ap->data);
1694 for (sfp = head; sfp; sfp = sfp->next) {
1695 if (sfp->data.choice != SEQFEAT_CDREGION) continue;
1696 prot = NULL;
1697 pap = NULL;
1698 psfp = NULL;
1699 pprp = NULL;
1700 vnp = sfp->product;
1701 if (vnp != NULL) {
1702 if (vnp->choice == SEQLOC_WHOLE) {
1703 sid = vnp->data.ptrvalue;
1704 prot = BioseqFind(sid);
1705 }
1706 }
1707 if (prot != NULL) {
1708 pap = prot->annot;
1709 if (pap != NULL) {
1710 for (psfp = pap->data; psfp; psfp=psfp->next) {
1711 if (psfp->data.choice == SEQFEAT_PROT) {
1712 pprp = psfp->data.value.ptrvalue;
1713 break;
1714 }
1715 }
1716 }
1717 }
1718 if (vnp != NULL) { /* sfp->product != NULL */
1719 //LCOV_EXCL_START
1720 //Prot-ref xrefs already removed upstream by basic cleanup
1721 for (xrp = sfp->xref; xrp != NULL; xrp = xrpnext) {
1722 xrpnext = xrp->next;
1723 if (xrp->data.choice == SEQFEAT_PROT) {
1724 prp = xrp->data.value.ptrvalue;
1725 if (pap != NULL && pprp == NULL) {
1726 if (psfp == NULL) {
1727 psfp = SeqFeatNew();
1728 psfp->data.choice = SEQFEAT_PROT;
1729 slp = ValNodeNew(NULL);
1730 slp->choice = SEQLOC_WHOLE;
1731 slp->data.ptrvalue = SeqIdDup(sid);
1732 psfp->location = slp;
1733 pap->data = tie_feat(pap->data, psfp);
1734 }
1735 psfp->data.value.ptrvalue = AsnIoMemCopy(prp,
1736 (AsnReadFunc) ProtRefAsnRead,
1737 (AsnWriteFunc) ProtRefAsnWrite);
1738 }
1739 sfp->xref = remove_xref(sfp->xref, xrp);
1740 }
1741 }
1742 //LCOV_EXCL_STOP
1743 }
1744 }
1745 ap->data = head;
1746 }
1747 if (IS_Bioseq(sep)) {
1748 bsp = (BioseqPtr)(sep->data.ptrvalue);
1749 bsp->annot = sap;
1750 }
1751 else {
1752 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1753 bssp->annot = sap;
1754 }
1755 return;
1756 }
1757
1758 //LCOV_EXCL_START
1759 //obsolete, and possibly dangerous
GetAnticodonFromObject(SeqFeatPtr sfp)1760 static SeqLocPtr GetAnticodonFromObject(SeqFeatPtr sfp)
1761 {
1762 UserObjectPtr usop;
1763 UserFieldPtr ufp;
1764 Int4Ptr ints;
1765 SeqLocPtr slp;
1766 SeqIntPtr sip;
1767 Int4 from = 0, to = 0;
1768
1769 if (sfp == NULL) {
1770 return NULL;
1771 }
1772 if ((usop = sfp->ext) == NULL) {
1773 return NULL;
1774 }
1775 if (StringICmp (usop->_class, "NCBI") != 0) {
1776 return NULL;
1777 }
1778 ufp = usop->data;
1779 if (ufp && ufp->choice == 8) { /* ints */
1780 ints = (Int4Ptr) ufp->data.ptrvalue;
1781 from = ints[0];
1782 to = ints[1];
1783 }
1784 sip = SeqIntNew();
1785 sip->from = from;
1786 sip->to = to;
1787 sip->id = SeqIdDup(SeqLocId(sfp->location));
1788 slp = ValNodeNew(NULL);
1789 slp->choice = SEQLOC_INT;
1790 slp->data.ptrvalue = sip;
1791 sfp->ext = usop->next /* NULL */;
1792 UserObjectFree (usop);
1793 return slp;
1794
1795 }
1796 //LCOV_EXCL_STOP
1797
1798 /*--------------------------- CheckMaps() --------------------------*/
1799 /***************************************************************************
1800 * CheckMaps:
1801 * -- find all /map and Gene-ref
1802 * if all maps are the same put it to Biosource and remove quals
1803 * -- change User-Object anticodon in tRNA to SeqLoc
1804 ****************************************************************************/
CheckMaps(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1805 void CheckMaps (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1806 {
1807 BioSourcePtr biop;
1808 BioseqPtr bsp;
1809 BioseqSetPtr bssp;
1810 ValNodePtr descr;
1811 SeqAnnotPtr sap = NULL, ap;
1812 CharPtr qval;
1813 SeqFeatPtr sfp;
1814 GeneRefPtr grp;
1815 QualMapPtr qmp;
1816 RnaRefPtr rrp;
1817 SubSourcePtr ssp;
1818 tRNAPtr trna;
1819 GBQualPtr q, qnext;
1820
1821 qmp = data;
1822 if (qmp->same == FALSE) {
1823 return;
1824 }
1825 if (IS_Bioseq(sep)) {
1826 bsp = (BioseqPtr)(sep->data.ptrvalue);
1827 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
1828 return;
1829 descr = bsp->descr;
1830 sap = bsp->annot;
1831 }
1832 else {
1833 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1834 descr = bssp->descr;
1835 sap = bssp->annot;
1836 }
1837
1838 while (descr != NULL) {
1839 if (descr->choice == Seq_descr_source) {
1840 biop = (BioSourcePtr) descr->data.ptrvalue;
1841 if (biop != NULL) {
1842 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
1843 if (ssp->subtype == SUBSRC_map && ssp->name != NULL) {
1844 if (qmp->name == NULL) {
1845 qmp->name = StringSave (ssp->name);
1846 } else if (StringCmp (qmp->name, ssp->name) != 0) {
1847 qmp->same = FALSE;
1848 }
1849 }
1850 }
1851 }
1852 }
1853 descr = descr->next;
1854 }
1855
1856 /* look for all the same maploc and place it to SubSource*/
1857 for (ap = sap; ap != NULL; ap = ap->next) {
1858 if (ap->type != 1) {
1859 continue;
1860 }
1861 for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
1862 if ((qval = get_qvalue(sfp->qual, "map")) != NULL) {
1863 if (qmp->name == NULL) {
1864 qmp->name = StringSave(qval);
1865 } else if (StringCmp(qmp->name, qval) != 0) {
1866 qmp->same = FALSE;
1867 break;
1868 }
1869 }
1870 if (sfp->data.choice == SEQFEAT_GENE) {
1871 grp = sfp->data.value.ptrvalue;
1872 if ((qval = grp->maploc) != NULL) {
1873 if (qmp->name == NULL) {
1874 qmp->name = StringSave(qval);
1875 } else if (StringCmp(qmp->name, qval) != 0) {
1876 qmp->same = FALSE;
1877 break;
1878 }
1879 }
1880 }
1881 if (sfp->data.choice == SEQFEAT_RNA) {
1882 rrp = sfp->data.value.ptrvalue;
1883 if (rrp->type == 3 && rrp->ext.choice == 2) {
1884 trna = rrp->ext.value.ptrvalue;
1885 if (sfp->ext != NULL && trna->anticodon == NULL) {
1886 //LCOV_EXCL_START
1887 // obsolete and possibly dangerous
1888 trna->anticodon = GetAnticodonFromObject(sfp);
1889 for (q = sfp->qual; q; q = qnext) {
1890 qnext = q->next;
1891 if (StringCmp(q->qual, "anticodon") == 0) {
1892 sfp->qual = remove_qual(sfp->qual, q);
1893 }
1894 }
1895 //LCOV_EXCL_STOP
1896 }
1897 }
1898 }
1899 }
1900 }
1901 return;
1902 }
1903
1904 //LCOV_EXCL_START
StripMaps(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1905 void StripMaps(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1906 {
1907 BioseqPtr bsp;
1908 BioseqSetPtr bssp;
1909 BioSourcePtr biosp = NULL;
1910 SubSourcePtr ssp;
1911 ValNodePtr descr = NULL, vnp;
1912 SeqAnnotPtr sap = NULL, ap;
1913 CharPtr qval = NULL;
1914 SeqFeatPtr sfp;
1915 GeneRefPtr grp;
1916 QualMapPtr qmp;
1917 SeqFeatXrefPtr xrp;
1918
1919 qmp = data;
1920 if (qmp->same == FALSE || qmp->name == NULL) {
1921 return;
1922 }
1923 if (IS_Bioseq(sep)) {
1924 bsp = (BioseqPtr)(sep->data.ptrvalue);
1925 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
1926 return;
1927 descr = bsp->descr;
1928 sap = bsp->annot;
1929 }
1930 else {
1931 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1932 descr = bssp->descr;
1933 sap = bssp->annot;
1934 }
1935 for ( vnp = descr; vnp != NULL; vnp = vnp->next) {
1936 if (vnp->choice == Seq_descr_source) {
1937 biosp = vnp->data.ptrvalue;
1938 break;
1939 }
1940 }
1941 for (ap = sap; ap != NULL; ap = ap->next) {
1942 if (ap->type == 1) {
1943 for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
1944 qval = qvalue_extract(&(sfp->qual), "map");
1945 if(qval != NULL)
1946 {
1947 MemFree(qval);
1948 qval = NULL;
1949 }
1950 if (sfp->data.choice == SEQFEAT_GENE) {
1951 grp = sfp->data.value.ptrvalue;
1952 if(grp->maploc != NULL)
1953 {
1954 MemFree(grp->maploc);
1955 grp->maploc = NULL;
1956 }
1957 }
1958 for (xrp = sfp->xref; xrp != NULL; xrp = xrp->next) {
1959 if (xrp->data.choice == SEQFEAT_GENE) {
1960 grp = xrp->data.value.ptrvalue;
1961 if(grp->maploc != NULL)
1962 {
1963 MemFree(grp->maploc);
1964 grp->maploc = NULL;
1965 }
1966 }
1967 }
1968 }
1969 }
1970 }
1971 if (biosp != NULL) { /* has biosource */
1972 ssp = SubSourceNew();
1973 ssp->subtype = 2; /*map */
1974 ssp->name = StringSave(qmp->name);
1975 biosp->subtype = tie_next_subtype(biosp->subtype, ssp);
1976 }
1977 /* if (qval) {
1978 MemFree(qval);
1979 }*/
1980 return;
1981 }
1982 //LCOV_EXCL_STOP
1983
GBQualPresent(CharPtr ptr,GBQualPtr gbqual)1984 static Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
1985
1986 {
1987 Boolean present=FALSE;
1988 GBQualPtr qual;
1989
1990 for (qual=gbqual; qual != NULL; qual=qual->next)
1991 if (StringCmp(ptr, qual->qual) == 0)
1992 {
1993 present = TRUE;
1994 break;
1995 }
1996
1997 return present;
1998 }
1999
ExamineGBQual(CharPtr ptr,GBQualPtr gbqual)2000 static CharPtr ExamineGBQual (CharPtr ptr, GBQualPtr gbqual)
2001
2002 {
2003 GBQualPtr qual;
2004
2005 for (qual = gbqual; qual != NULL; qual = qual->next) {
2006 if (StringCmp (ptr, qual->qual) == 0) return qual->val;
2007 }
2008
2009 return NULL;
2010 }
2011
MapsToGenref(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2012 void MapsToGenref (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2013 {
2014 BioseqPtr bsp;
2015 BioseqSetPtr bssp;
2016 ValNodePtr descr = NULL, head, last, vnp;
2017 SeqAnnotPtr sap = NULL, ap;
2018 CharPtr qval= NULL, name;
2019 Boolean same;
2020 SeqFeatPtr sfp, cur;
2021 SeqLocPtr loc;
2022 GeneRefPtr grp;
2023
2024 if (IS_Bioseq(sep)) {
2025 bsp = (BioseqPtr)(sep->data.ptrvalue);
2026 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2027 return;
2028 descr = bsp->descr;
2029 sap = bsp->annot;
2030 }
2031 else {
2032 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2033 descr = bssp->descr;
2034 sap = bssp->annot;
2035 }
2036
2037 /* maps are different */
2038 for (ap = sap; ap != NULL; ap = ap->next) {
2039 if (ap->type == 1) {
2040 for (sfp = (SeqFeatPtr)(ap->data); sfp; sfp = sfp->next) {
2041 if (sfp->data.choice == SEQFEAT_GENE) {
2042 grp = sfp->data.value.ptrvalue;
2043 name = NULL;
2044 head = NULL;
2045 last = NULL;
2046 same = TRUE;
2047 for (cur = (SeqFeatPtr)(ap->data); cur; cur = cur->next) {
2048 if ((GBQualPresent("map", cur->qual)) == FALSE) {
2049 continue;
2050 }
2051 if (SeqLocAinB(cur->location, sfp->location) < 0 ) {
2052 continue;
2053 }
2054 vnp = ValNodeAddPointer (&last, 0, cur);
2055 if (head == NULL) {
2056 head = vnp;
2057 }
2058 last = vnp;
2059 }
2060 for (vnp = head; vnp != NULL; vnp = vnp->next) {
2061 cur = (SeqFeatPtr) vnp->data.ptrvalue;
2062 qval = ExamineGBQual("map", (cur->qual));
2063 if (name == NULL) {
2064 name = qval;
2065 } else if (StringICmp (name, qval) != 0) {
2066 same = FALSE;
2067 }
2068 }
2069 if (same && name != NULL) {
2070 if (grp->maploc == NULL && name != NULL) {
2071 grp->maploc = StringSave(name);
2072 }
2073 name = NULL;
2074 loc = NULL;
2075 for (vnp = head; vnp != NULL; vnp = vnp->next) {
2076 cur = (SeqFeatPtr) vnp->data.ptrvalue;
2077 if (cur == NULL) continue;
2078 qval = qvalue_extract(&(cur->qual), "map");
2079 MemFree (qval);
2080 }
2081 }
2082 ValNodeFree (head);
2083 } /* if SEQFEAT_GENE */
2084 }
2085 } /* if ftable */
2086 }
2087 return;
2088 }
2089
CheckMinPub(ValNodePtr pub,Boolean is_ref_seq_prot)2090 static Boolean CheckMinPub(ValNodePtr pub, Boolean is_ref_seq_prot)
2091 {
2092 CitGenPtr gen;
2093
2094 if (pub == NULL) {
2095 return TRUE;
2096 }
2097 if (pub->choice == PUB_Muid || pub->choice == PUB_PMid) {
2098 if (pub->next == NULL) {
2099 if (is_ref_seq_prot) return FALSE;
2100 return TRUE;
2101 } else {
2102 return (CheckMinPub(pub->next, is_ref_seq_prot));
2103 }
2104 }
2105 if (pub->choice == PUB_Gen) {
2106 gen = pub->data.ptrvalue;
2107 if (gen->cit != NULL && gen->journal == NULL && gen->authors == NULL
2108 && gen->volume == NULL && gen->pages == NULL) {
2109 if (pub->next == NULL) {
2110 return TRUE;
2111 } else {
2112 return (CheckMinPub(pub->next, FALSE));
2113 }
2114 }
2115 }
2116 return FALSE;
2117 }
2118
OkayToFuseRemarks(CharPtr com1,CharPtr com2)2119 static Boolean OkayToFuseRemarks (CharPtr com1, CharPtr com2)
2120
2121 {
2122 if (com1 != NULL && com2 != NULL) {
2123 if (StringICmp (com1, com2) != 0) return FALSE;
2124 }
2125
2126 return TRUE;
2127 }
2128
AddToListEx(ValNodePtr list,ValNodePtr check,PubdescPtr pdp,Boolean is_ref_seq_prot)2129 static ValNodePtr AddToListEx (ValNodePtr list, ValNodePtr check, PubdescPtr pdp, Boolean is_ref_seq_prot)
2130 {
2131 ValNodePtr v, vnext;
2132 PubdescPtr vpdp;
2133 PubStructPtr psp;
2134 ValNodePtr pubequ1 = NULL, pubequ2 = NULL;
2135 Boolean is_1;
2136
2137 if (pdp == NULL) {
2138 return NULL;
2139 }
2140 for (v = check; v != NULL; v = v->next) {
2141 psp = v->data.ptrvalue;
2142 if (psp->start != 2) {
2143 continue;
2144 }
2145 if (PubLabelMatchEx (psp->pub, pdp->pub, TRUE) == 0) {
2146 return list;
2147 }
2148 }
2149 if (pdp->name == NULL && pdp->fig == NULL && pdp->fig == NULL) {
2150 if (CheckMinPub(pdp->pub, is_ref_seq_prot) == TRUE) { /* do not add minimum pub */
2151 return list;
2152 }
2153 }
2154 for (v = list; v != NULL; v = vnext) {
2155 vnext = v->next;
2156 vpdp = v->data.ptrvalue;
2157 if (vpdp->pub->next != NULL) {
2158 pubequ1 = SeqDescrNew(NULL);
2159 is_1 = TRUE;
2160 pubequ1->choice = PUB_Equiv;
2161 pubequ1->data.ptrvalue = vpdp->pub;
2162 } else {
2163 is_1 = FALSE;
2164 pubequ1 = vpdp->pub;
2165 }
2166 if (pdp->pub->next != NULL) {
2167 pubequ2 = SeqDescrNew(NULL);
2168 pubequ2->choice = PUB_Equiv;
2169 pubequ2->data.ptrvalue = pdp->pub;
2170 } else {
2171 pubequ2 = pdp->pub;
2172 }
2173 if (PubLabelMatchEx (pubequ1, pubequ2, TRUE) == 0 && OkayToFuseRemarks (pdp->comment, vpdp->comment)) {
2174 if (pdp->reftype == 2 && vpdp->reftype == 1) {
2175 vpdp->reftype = 2;
2176 }
2177 if (pdp->reftype == 1 && vpdp->reftype == 2) {
2178 pdp->reftype = 2;
2179 }
2180 if (vpdp->comment != NULL && pdp->comment == NULL) {
2181 pdp->comment = StringSave (vpdp->comment);
2182 } else if (vpdp->comment == NULL && pdp->comment != NULL) {
2183 vpdp->comment = StringSave (pdp->comment);
2184 }
2185 if (SelectBestPub(pubequ1, pubequ2) >= 0) {
2186 if (is_1) {
2187 ValNodeFree(pubequ1);
2188 }
2189 if (pdp->pub->next != NULL) {
2190 ValNodeFree(pubequ2);
2191 }
2192 return list;
2193 }
2194 PubdescFree((PubdescPtr) (v->data.ptrvalue));
2195 list = remove_node(list, v);
2196 }
2197 if (is_1) {
2198 ValNodeFree(pubequ1);
2199 }
2200 if (pdp->pub->next != NULL) {
2201 ValNodeFree(pubequ2);
2202 }
2203 }
2204 if (pdp && pdp->pub) {
2205 v = SeqDescrNew(NULL);
2206 v->choice = Seq_descr_pub;
2207 v->data.ptrvalue = AsnIoMemCopy(pdp, (AsnReadFunc) PubdescAsnRead,
2208 (AsnWriteFunc) PubdescAsnWrite);
2209 }
2210 list = ValNodeLink(&list, v);
2211 /* may be sort ???? */
2212 return list;
2213 }
2214
AddToList(ValNodePtr list,ValNodePtr check,PubdescPtr pdp)2215 ValNodePtr AddToList(ValNodePtr list, ValNodePtr check, PubdescPtr pdp)
2216 {
2217 return AddToListEx (list, check, pdp, TRUE);
2218 }
2219
2220 //LCOV_EXCL_START
2221 //this cleanup takes place in BasicCleanup
CheckCitSubNew(ValNodePtr vnp)2222 void CheckCitSubNew(ValNodePtr vnp)
2223 {
2224 CitSubPtr csp;
2225 AuthListPtr alp;
2226 ImprintPtr imp;
2227
2228 if (vnp == NULL)
2229 return;
2230 if (vnp->choice != PUB_Sub)
2231 return;
2232 csp = (CitSubPtr) vnp->data.ptrvalue;
2233 if (csp != NULL) {
2234 alp = csp->authors;
2235 imp = csp->imp;
2236 if (alp != NULL && alp->affil == NULL &&
2237 imp != NULL && imp->pub != NULL) {
2238 alp->affil = imp->pub;
2239 imp->pub = NULL;
2240 }
2241 if (csp->date == NULL && imp != NULL && imp->date != NULL) {
2242 csp->date = imp->date;
2243 imp->date = NULL;
2244 }
2245 if (imp != NULL && imp->pub == NULL) {
2246 csp->imp = ImprintFree (csp->imp);
2247 }
2248 }
2249 return;
2250 }
2251 //LCOV_EXCL_STOP
2252
ChangeCitSub(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2253 void ChangeCitSub (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2254 {
2255 BioseqPtr bsp = NULL;
2256 BioseqSetPtr bssp = NULL;
2257 ValNodePtr descr = NULL, vnp, v;
2258 PubdescPtr pdp;
2259
2260 if (IS_Bioseq(sep)) {
2261 bsp = (BioseqPtr)(sep->data.ptrvalue);
2262 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2263 return;
2264 descr = bsp->descr;
2265 }
2266 else {
2267 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2268 descr = bssp->descr;
2269 }
2270 for (vnp = descr; vnp; vnp=vnp->next) {
2271 if (vnp->choice != Seq_descr_pub) {
2272 continue;
2273 }
2274 if ((pdp = vnp->data.ptrvalue) == NULL) {
2275 continue;
2276 }
2277 for (v = pdp->pub; v; v=v->next) {
2278 if (v->choice == PUB_Sub) {
2279 CheckCitSubNew(v);
2280 }
2281 }
2282 }
2283 }
2284
2285 //LCOV_EXCL_START
2286 /***************************************************************************
2287 * NewPubs:
2288 * -- find all ImpFeat "sites"
2289 * change to pubdesc with reftype 'sites'
2290 * -- find all other sfp->cit
2291 * change to pubdesc with reftype 'feats'
2292 * -- pubs are moved from SeqAnnot to Seqdescr on the same level
2293 ****************************************************************************/
NewPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2294 void NewPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2295 {
2296 BioseqPtr bsp = NULL;
2297 BioseqSetPtr bssp = NULL;
2298 ValNodePtr descr = NULL, pubset, tmp, pubequ;
2299 ValNodePtr next_pubequ, pub, min_pub;
2300 SeqAnnotPtr sap = NULL, ap, apnext;
2301 SeqFeatPtr sfp, cur, curnext;
2302 ImpFeatPtr ifp;
2303 PubdescPtr pubdesc;
2304 ValNodePtr check = NULL, np_list = NULL;
2305 SeqIdPtr sip;
2306 Boolean is_ref_seq_prot = FALSE;
2307
2308 if (IS_Bioseq(sep)) {
2309 bsp = (BioseqPtr)(sep->data.ptrvalue);
2310 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2311 return;
2312 descr = bsp->descr;
2313 sap = bsp->annot;
2314 if (ISA_aa (bsp->mol)) {
2315 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2316 if (sip->choice == SEQID_OTHER) {
2317 is_ref_seq_prot = TRUE;
2318 }
2319 }
2320 }
2321 }
2322 else {
2323 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2324 descr = bssp->descr;
2325 sap = bssp->annot;
2326 }
2327 tmp = ValNodeExtractList(&descr, Seq_descr_pub);
2328 if (tmp != NULL) {
2329 np_list = ValNodeLink(&np_list, tmp);
2330 }
2331 for (ap = sap; ap != NULL; ap = apnext) {
2332 apnext = ap->next;
2333 if (ap->type != 1) {
2334 continue;
2335 }
2336 sfp = (SeqFeatPtr)(ap->data);
2337 for (cur = sfp; cur; cur = curnext) {
2338 curnext = cur->next;
2339 if (cur->cit == NULL) {
2340 continue;
2341 }
2342 pubset = cur->cit;
2343 pub = NULL;
2344 min_pub = NULL;
2345 pubequ = pubset->data.ptrvalue;
2346 while (pubequ) {
2347 next_pubequ = pubequ->next;
2348 pubdesc = PubdescNew();
2349 if (pubequ->choice == PUB_Equiv) {
2350 pubdesc->pub = pubequ->data.ptrvalue;
2351 } else {
2352 pubdesc->pub = pubequ;
2353 }
2354 if (cur->data.choice == SEQFEAT_IMP) {
2355 ifp = cur->data.value.ptrvalue;
2356 if (StringCmp(ifp->key, "Site-ref") == 0) {
2357 pubdesc->reftype = 1; /* sites */
2358 np_list = AddToListEx (np_list, check, pubdesc, is_ref_seq_prot);
2359 min_pub = MinimizePub(pubequ);
2360 pub = tie_next(pub, min_pub);
2361 MemFree(pubdesc);
2362 } else {
2363 pubdesc->reftype = 2;
2364 np_list = AddToListEx (np_list, check, pubdesc, is_ref_seq_prot);
2365 min_pub = MinimizePub(pubequ);
2366 pub = tie_next(pub, min_pub);
2367 MemFree(pubdesc);
2368 }
2369 } else {
2370 pubdesc->reftype = 2;
2371 np_list = AddToListEx (np_list, check, pubdesc, is_ref_seq_prot);
2372 min_pub = MinimizePub(pubequ);
2373 pub = tie_next(pub, min_pub);
2374 MemFree(pubdesc);
2375 }
2376 PubFree(pubequ);
2377 pubequ = next_pubequ;
2378 }
2379 if (pub && pubset) {
2380 pubset->data.ptrvalue = pub;
2381 }
2382 }
2383 ap->data = sfp;
2384 }
2385
2386 for (ap = sap; ap != NULL; ap = apnext) {
2387 apnext = ap->next;
2388 /* now keep empty annot if annot_descr present */
2389 if (ap->data == NULL && /* ap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (ap)) {
2390 sap = remove_annot(sap, ap);
2391 }
2392 }
2393
2394 if (bssp != NULL) {
2395 descr = tie_next(descr, np_list);
2396 bssp->descr = descr;
2397 bssp->annot = sap;
2398 } else {
2399 descr = tie_next(descr, np_list);
2400 bsp->descr = descr;
2401 bsp->annot = sap;
2402 }
2403 return;
2404 }
2405
CmpPub(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2406 void CmpPub (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2407 {
2408 BioseqPtr bsp;
2409 PubdescPtr pdp, vpdp;
2410 ValNodePtr pubequ1, pubequ2, v;
2411 PubdescPtr PNTR ppdp;
2412 Boolean is_1;
2413
2414 ppdp = data;
2415 pdp = *ppdp;
2416 if (pdp == NULL) {
2417 return;
2418 }
2419 if (!IS_Bioseq(sep)) {
2420 return;
2421 }
2422 bsp = (BioseqPtr)(sep->data.ptrvalue);
2423 if (bsp->mol == Seq_mol_aa) {
2424 return;
2425 }
2426 for(v = bsp->descr; v; v=v->next) {
2427 if (v->choice != Seq_descr_pub) {
2428 continue;
2429 }
2430 vpdp = v->data.ptrvalue;
2431 if (vpdp->pub->next != NULL) {
2432 pubequ1 = SeqDescrNew(NULL);
2433 is_1 = TRUE;
2434 pubequ1->choice = PUB_Equiv;
2435 pubequ1->data.ptrvalue = vpdp->pub;
2436 } else {
2437 is_1 = FALSE;
2438 pubequ1 = vpdp->pub;
2439 }
2440 if (pdp->pub->next != NULL) {
2441 pubequ2 = SeqDescrNew(NULL);
2442 pubequ2->choice = PUB_Equiv;
2443 pubequ2->data.ptrvalue = pdp->pub;
2444 } else {
2445 pubequ2 = pdp->pub;
2446 }
2447 if (PubMatch(pubequ1, pubequ2) == 0) {
2448 if (is_1) {
2449 ValNodeFree(pubequ1);
2450 }
2451 if (pdp->pub->next != NULL) {
2452 ValNodeFree(pubequ2);
2453 }
2454 break;
2455 }
2456 if (is_1) {
2457 ValNodeFree(pubequ1);
2458 }
2459 if (pdp->pub->next != NULL) {
2460 ValNodeFree(pubequ2);
2461 }
2462 }
2463 if (v == NULL) {
2464 PubdescFree(*ppdp);
2465 *ppdp = NULL;
2466 }
2467
2468 return;
2469 }
2470 //LCOV_EXCL_STOP
2471
2472 /***********************************************************************
2473 * delete pubs from Bioseqs if they are already on the top level
2474 * don't delete Pubdesc if additional info (name fig, num etc) is present
2475 ************************************************************************/
DeletePubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2476 void DeletePubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2477 {
2478 BioseqPtr bsp;
2479 PubdescPtr pdp, vpdp;
2480 ValNodePtr pubequ1, pubequ2, v, vnext, descr = NULL;
2481 Boolean is_1, is_2;
2482
2483
2484 pdp = data;
2485 if (pdp == NULL) {
2486 return;
2487 }
2488 if (!IS_Bioseq(sep)) {
2489 return;
2490 }
2491 bsp = (BioseqPtr)(sep->data.ptrvalue);
2492 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2493 return;
2494 /*
2495 if (bsp->mol == Seq_mol_aa) {
2496 return;
2497 }
2498 */
2499 descr = bsp->descr;
2500 for(v = descr; v; v = vnext) {
2501 vnext = v->next;
2502 if (v->choice != Seq_descr_pub) {
2503 continue;
2504 }
2505 vpdp = v->data.ptrvalue; /* from the Bioseq */
2506 if (vpdp->name != NULL || vpdp->fig != NULL
2507 || vpdp->num != NULL || vpdp->maploc != NULL
2508 || vpdp->comment != NULL) {
2509 continue;
2510 }
2511 if (vpdp->pub->next != NULL) {
2512 is_1 = TRUE;
2513 pubequ1 = ValNodeNew(NULL);
2514 pubequ1->choice = PUB_Equiv;
2515 pubequ1->data.ptrvalue = vpdp->pub;
2516 } else {
2517 is_1 = FALSE;
2518 pubequ1 = vpdp->pub;
2519 }
2520 if (pdp->pub->next != NULL) { /* from the set */
2521 is_2 = TRUE;
2522 pubequ2 = ValNodeNew(NULL);
2523 pubequ2->choice = PUB_Equiv;
2524 pubequ2->data.ptrvalue = pdp->pub;
2525 } else {
2526 is_2 = FALSE;
2527 pubequ2 = pdp->pub;
2528 }
2529 if (PubMatch(pubequ1, pubequ2) == 0) {
2530 PubdescFree((PubdescPtr) (v->data.ptrvalue));
2531 descr = remove_node(descr, v);
2532 }
2533 if (is_1) {
2534 ValNodeFree(pubequ1);
2535 }
2536 if (is_2) {
2537 ValNodeFree(pubequ2);
2538 }
2539 }
2540 bsp->descr = descr;
2541 return;
2542 }
2543
2544 //LCOV_EXCL_START
MoveSetPubs(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2545 void MoveSetPubs (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2546 {
2547 BioseqPtr bsp = NULL;
2548 BioseqSetPtr bssp;
2549 Boolean first;
2550 PubSetListPtr psp;
2551 ValNodePtr descr = NULL,v, vnext, tmp, set_list;
2552 PubdescPtr tmp_pdp, pdp;
2553
2554 psp = data;
2555 set_list = psp->list;
2556 first = psp->first;
2557 if (IS_Bioseq(sep) && (first == TRUE)) {
2558 bsp = sep->data.ptrvalue;
2559 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2560 return;
2561 descr = bsp->descr;
2562 } else {
2563 bssp = sep->data.ptrvalue;
2564 if (bssp->_class == 4 && bssp->seq_set != NULL) {
2565 bsp = (BioseqPtr) bssp->seq_set->data.ptrvalue;
2566 descr = bsp->descr;
2567 }
2568 }
2569 if (bsp == NULL) {
2570 return;
2571 }
2572 if (first == FALSE) {
2573 return;
2574 }
2575 for (v = descr; v; v=vnext) {
2576 vnext = v->next;
2577 if (v->choice == Seq_descr_pub) {
2578 pdp = v->data.ptrvalue;
2579 tmp_pdp = AsnIoMemCopy(pdp,
2580 (AsnReadFunc) PubdescAsnRead, (AsnWriteFunc) PubdescAsnWrite);
2581 SeqEntryExplore(sep, &tmp_pdp, CmpPub);
2582 if (tmp_pdp != NULL) {
2583 tmp = SeqDescrNew(NULL);
2584 tmp->choice = Seq_descr_pub;
2585 tmp->data.ptrvalue = tmp_pdp;
2586 set_list = tie_next(set_list, tmp);
2587 SeqEntryExplore(sep, tmp_pdp, DeletePubs);
2588 first = FALSE;
2589 }
2590 }
2591 }
2592 psp->list = set_list;
2593 psp->first = first;
2594 data = psp;
2595 return;
2596 }
2597 //LCOV_EXCL_STOP
2598
FindOldLineage(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2599 void FindOldLineage (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2600 {
2601 BioseqPtr bsp;
2602 BioseqSetPtr bssp;
2603 ValNodePtr descr = NULL, vnp;
2604 GBBlockPtr gb;
2605 CharPtr PNTR linp;
2606 CharPtr lineage;
2607
2608 linp = (CharPtr PNTR) data;
2609 lineage = *linp;
2610 if (IS_Bioseq(sep)) {
2611 bsp = (BioseqPtr)(sep->data.ptrvalue);
2612 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2613 return;
2614 descr = bsp->descr;
2615 } else {
2616 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2617 descr = bssp->descr;
2618 }
2619 for (vnp = descr; vnp; vnp= vnp->next) {
2620 if (vnp->choice == Seq_descr_genbank) {
2621 gb = vnp->data.ptrvalue;
2622 if (gb->taxonomy) {
2623 if (*linp) {
2624 MemFree(*linp);
2625 }
2626 *linp = gb->taxonomy;
2627 gb->taxonomy = NULL;
2628 }
2629 break;
2630 }
2631 }
2632 }
2633
2634 //LCOV_EXCL_START
NewLineage(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2635 void NewLineage (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2636 {
2637 BioseqPtr bsp;
2638 BioseqSetPtr bssp;
2639 ValNodePtr descr = NULL, vnp;
2640 BioSourcePtr biosp;
2641 OrgRefPtr orp = NULL;
2642 OrgNamePtr omp;
2643 CharPtr PNTR linp;
2644 CharPtr lineage;
2645
2646 linp = (CharPtr PNTR) data;
2647 lineage = *linp;
2648 if (IS_Bioseq(sep)) {
2649 bsp = (BioseqPtr)(sep->data.ptrvalue);
2650 if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const))
2651 return;
2652 descr = bsp->descr;
2653 }
2654 else {
2655 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2656 descr = bssp->descr;
2657 }
2658 for (vnp = descr; vnp; vnp= vnp->next) {
2659 if (vnp->choice == Seq_descr_source) {
2660 biosp = vnp->data.ptrvalue;
2661 orp = (OrgRefPtr) biosp->org;
2662 break;
2663 }
2664 }
2665 if (orp && lineage) {
2666 if (orp->orgname == NULL) {
2667 omp = OrgNameNew();
2668 orp->orgname = omp;
2669 }
2670 if(orp->orgname->lineage != NULL)
2671 {
2672 MemFree(orp->orgname->lineage);
2673 }
2674 orp->orgname->lineage = StringSave(lineage);
2675 }
2676
2677 }
2678
2679 //only for segsets
2680 /****************************************************************************
2681 * delete_valnode:
2682 *****************************************************************************/
delete_valnode(ValNodePtr host,Uint1 choice)2683 static ValNodePtr delete_valnode(ValNodePtr host, Uint1 choice)
2684 {
2685 Boolean first;
2686 ValNodePtr curvnp, prevnp;
2687
2688 for (curvnp = host, first = TRUE; curvnp != NULL
2689 && curvnp->choice != choice; curvnp = curvnp->next) {
2690
2691 if (first) {
2692 prevnp = curvnp;
2693 first = FALSE;
2694 }
2695 else
2696 prevnp = prevnp->next;
2697 }
2698
2699 if (curvnp == NULL) {
2700 return host;
2701 }
2702 if (first) {
2703 host = curvnp->next;
2704 } else {
2705 prevnp->next = curvnp->next;
2706 }
2707 curvnp->next = NULL;
2708
2709 switch (choice) {
2710 case Seq_descr_org:
2711 OrgRefFree(curvnp->data.ptrvalue);
2712 break;
2713 case Seq_descr_modif:
2714 ValNodeFree(curvnp->data.ptrvalue);
2715 break;
2716 case Seq_descr_update_date:
2717 DateFree(curvnp->data.ptrvalue);
2718 break;
2719 case Seq_descr_mol_type:
2720 break;
2721 default:
2722 break;
2723 }
2724
2725 ValNodeFree(curvnp);
2726
2727 return host;
2728 }
2729
2730 //only for segsets
2731 /*------------------- CleanUpSeqDescrChoice() -------------------------*/
2732 /****************************************************************************
2733 * CleanUpSeqDescrChoice:
2734 * 5-21-93
2735 *****************************************************************************/
CleanUpSeqDescrChoice(SeqEntryPtr sep,Uint1 choice)2736 void CleanUpSeqDescrChoice(SeqEntryPtr sep, Uint1 choice)
2737 {
2738 BioseqPtr bsp;
2739 SeqEntryPtr cursep;
2740
2741 for (cursep = sep; cursep != NULL; cursep = cursep->next) {
2742 bsp = cursep->data.ptrvalue;
2743
2744 bsp->descr = delete_valnode(bsp->descr, choice);
2745 }
2746
2747 } /* CleanUpSeqDescrChoice */
2748
2749 //only for segsets
2750 /**********************************************************/
remove_descr(SeqDescrPtr head,SeqDescrPtr x)2751 SeqDescrPtr remove_descr(SeqDescrPtr head, SeqDescrPtr x)
2752 {
2753 SeqDescrPtr v;
2754 SeqDescrPtr p = NULL;
2755
2756 if(head == NULL)
2757 return(NULL);
2758
2759 if(x == head)
2760 {
2761 head = x->next;
2762 x->next = NULL;
2763 SeqDescFree(x);
2764 return(head);
2765 }
2766 for(v = head; v != NULL && v != x; v = v->next)
2767 p = v;
2768
2769 if(v != NULL && p != NULL)
2770 {
2771 p->next = x->next;
2772 x->next = NULL;
2773 SeqDescFree(x);
2774 }
2775 return(head);
2776 }
2777 //LCOV_EXCL_STOP
2778
2779
2780 /* Cleanup functions originally from Sequin */
2781
FindConsistentMolInfo(SeqEntryPtr sep,MolInfoPtr PNTR mipp,BoolPtr consist)2782 static void FindConsistentMolInfo (SeqEntryPtr sep, MolInfoPtr PNTR mipp, BoolPtr consist)
2783
2784 {
2785 BioseqPtr bsp = NULL;
2786 BioseqSetPtr bssp = NULL;
2787 MolInfoPtr mip;
2788 ValNodePtr sdp = NULL;
2789
2790 if (sep == NULL || sep->data.ptrvalue == NULL) return;
2791 if (IS_Bioseq (sep)) {
2792 bsp = (BioseqPtr) sep->data.ptrvalue;
2793 sdp = bsp->descr;
2794 } else if (IS_Bioseq_set (sep)) {
2795 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2796 sdp = bssp->descr;
2797 } else return;
2798 while (sdp != NULL) {
2799 if (sdp->choice == Seq_descr_molinfo) {
2800 mip = (MolInfoPtr) sdp->data.ptrvalue;
2801 if (mip != NULL) {
2802 if (*mipp == NULL) {
2803 *mipp = mip;
2804 } else {
2805 if ((*mipp)->biomol != mip->biomol ||
2806 (*mipp)->tech != mip->tech ||
2807 (*mipp)->completeness != mip->completeness ||
2808 StringICmp ((*mipp)->techexp, mip->techexp) != 0) {
2809 *consist = FALSE;
2810 }
2811 }
2812 }
2813 }
2814 sdp = sdp->next;
2815 }
2816 if (bssp == NULL) return;
2817 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
2818 FindConsistentMolInfo (sep, mipp, consist);
2819 }
2820 }
2821
RemoveMolInfoCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)2822 static void RemoveMolInfoCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
2823
2824 {
2825 BioseqPtr bsp;
2826 BioseqSetPtr bssp;
2827 ValNodePtr nextsdp;
2828 Pointer PNTR prevsdp;
2829 ValNodePtr sdp;
2830
2831 if (IS_Bioseq (sep)) {
2832 bsp = (BioseqPtr) sep->data.ptrvalue;
2833 sdp = bsp->descr;
2834 prevsdp = (Pointer PNTR) &(bsp->descr);
2835 } else if (IS_Bioseq_set (sep)) {
2836 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2837 sdp = bssp->descr;
2838 prevsdp = (Pointer PNTR) &(bssp->descr);
2839 } else return;
2840 while (sdp != NULL) {
2841 nextsdp = sdp->next;
2842 if (sdp->choice == Seq_descr_molinfo) {
2843 *(prevsdp) = sdp->next;
2844 sdp->next = NULL;
2845 SeqDescFree (sdp);
2846 } else {
2847 prevsdp = (Pointer PNTR) &(sdp->next);
2848 }
2849 sdp = nextsdp;
2850 }
2851 }
2852
NormalizeSegSeqMolInfo(SeqEntryPtr sep)2853 extern void NormalizeSegSeqMolInfo (SeqEntryPtr sep)
2854
2855 {
2856 BioseqSetPtr bssp;
2857 Boolean consistent;
2858 MolInfoPtr master;
2859 MolInfoPtr mip;
2860 ValNodePtr sdp;
2861
2862 if (IS_Bioseq_set (sep)) {
2863 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2864 if (bssp == NULL) return;
2865 if (bssp != NULL && bssp->_class != BioseqseqSet_class_segset) {
2866 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
2867 NormalizeSegSeqMolInfo (sep);
2868 }
2869 return;
2870 }
2871 if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset) {
2872 mip = NULL;
2873 consistent = TRUE;
2874 FindConsistentMolInfo (sep, &mip, &consistent);
2875 if (mip != NULL && consistent) {
2876 master = MolInfoNew ();
2877 if (master == NULL) return;
2878 master->biomol = mip->biomol;
2879 master->tech = mip->tech;
2880 master->completeness = mip->completeness;
2881 master->techexp = StringSaveNoNull (mip->techexp);
2882 SeqEntryExplore (sep, NULL, RemoveMolInfoCallback);
2883 sdp = CreateNewDescriptor (sep, Seq_descr_molinfo);
2884 if (sdp != NULL) {
2885 sdp->data.ptrvalue = (Pointer) master;
2886 }
2887 }
2888 }
2889 }
2890 }
2891
CollectPseudoCdsProducts(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)2892 static void CollectPseudoCdsProducts (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
2893
2894 {
2895 BioseqContextPtr bcp;
2896 BioseqPtr bsp;
2897 BioseqSetPtr bssp;
2898 CharPtr label;
2899 size_t len;
2900 BioseqPtr product;
2901 SeqFeatPtr prot;
2902 ProtRefPtr prp;
2903 Boolean pseudo;
2904 GBQualPtr gbqual;
2905 SeqAnnotPtr sap;
2906 SeqFeatPtr sfp;
2907 CharPtr str;
2908 ValNodePtr PNTR vnpp;
2909
2910 if (sep == NULL || sep->data.ptrvalue == NULL) return;
2911 vnpp = (ValNodePtr PNTR) mydata;
2912 if (vnpp == NULL) return;
2913 if (IS_Bioseq (sep)) {
2914 bsp = (BioseqPtr) sep->data.ptrvalue;
2915 sap = bsp->annot;
2916 } else if (IS_Bioseq_set (sep)) {
2917 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2918 sap = bssp->annot;
2919 } else return;
2920 while (sap != NULL) {
2921 if (sap->type == 1) {
2922 sfp = (SeqFeatPtr) sap->data;
2923 while (sfp != NULL) {
2924 if (sfp->data.choice == SEQFEAT_CDREGION) {
2925 pseudo = sfp->pseudo;
2926 if (! pseudo) {
2927 gbqual = sfp->qual;
2928 while (gbqual != NULL) {
2929 if (StringICmp (gbqual->qual, "pseudo") == 0) {
2930 pseudo = TRUE;
2931 }
2932 gbqual = gbqual->next;
2933 }
2934 }
2935 if (pseudo) {
2936 product = BioseqFind (SeqLocId (sfp->product));
2937 if (product != NULL) {
2938 ValNodeAddPointer (vnpp, 0, (Pointer) product);
2939 sfp->product = SeqLocFree (sfp->product);
2940 prot = SeqMgrGetBestProteinFeature (product, NULL);
2941 if (prot == NULL) {
2942 bcp = BioseqContextNew (product);
2943 prot = BioseqContextGetSeqFeat (bcp, SEQFEAT_PROT, NULL, NULL, 0);
2944 BioseqContextFree (bcp);
2945 }
2946 if (prot != NULL) {
2947 prp = (ProtRefPtr) prot->data.value.ptrvalue;
2948 if (prp != NULL) {
2949 label = NULL;
2950 if (prp->name != NULL) {
2951 label = prp->name->data.ptrvalue;
2952 } else if (prp->desc != NULL) {
2953 label = prp->desc;
2954 }
2955 if (label != NULL) {
2956 if (sfp->comment == NULL) {
2957 sfp->comment = StringSaveNoNull (label);
2958 } else {
2959 len = StringLen (sfp->comment) + StringLen (label) + 5;
2960 str = MemNew (sizeof (Char) * len);
2961 StringCpy (str, sfp->comment);
2962 StringCat (str, "; ");
2963 StringCat (str, label);
2964 sfp->comment = MemFree (sfp->comment);
2965 sfp->comment = str;
2966 }
2967 }
2968 }
2969 }
2970 }
2971 }
2972 }
2973 sfp = sfp->next;
2974 }
2975 }
2976 sap = sap->next;
2977 }
2978 }
2979
CheckForEmblDdbjID(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)2980 static void CheckForEmblDdbjID (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
2981
2982 {
2983 BioseqPtr bsp;
2984 BoolPtr isEmblOrDdbj;
2985 SeqIdPtr sip;
2986
2987 if (sep == NULL) return;
2988 if (IS_Bioseq (sep)) {
2989 bsp = (BioseqPtr) sep->data.ptrvalue;
2990 if (bsp == NULL) return;
2991 isEmblOrDdbj = (BoolPtr) mydata;
2992 if (isEmblOrDdbj == NULL) return;
2993 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2994 switch (sip->choice) {
2995 case SEQID_EMBL :
2996 case SEQID_DDBJ :
2997 case SEQID_TPE :
2998 case SEQID_TPD :
2999 *isEmblOrDdbj = TRUE;
3000 break;
3001 break;
3002 default :
3003 break;
3004 }
3005 }
3006 }
3007 }
3008
CheckForLclGnlOnly(BioseqPtr bsp,Pointer mydata)3009 static void CheckForLclGnlOnly (BioseqPtr bsp, Pointer mydata)
3010
3011 {
3012 BoolPtr lclGnlOnly;
3013 SeqIdPtr sip;
3014
3015 if (bsp == NULL) return;
3016 lclGnlOnly = (BoolPtr) mydata;
3017 if (lclGnlOnly == NULL) return;
3018
3019 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3020 if (sip->choice == SEQID_LOCAL) continue;
3021 if (sip->choice == SEQID_GENERAL) continue;
3022 *lclGnlOnly = FALSE;
3023 }
3024 }
3025
CleanUpPseudoProductsEx(Uint2 entityID,SeqEntryPtr sep,Boolean doPseudo)3026 static void CleanUpPseudoProductsEx (Uint2 entityID, SeqEntryPtr sep, Boolean doPseudo)
3027
3028 {
3029 BioseqPtr bsp;
3030 Char id [41];
3031 Boolean isEmblOrDdbj = FALSE;
3032 Uint4 itemID;
3033 ValNodePtr list;
3034 OMProcControl ompc;
3035 ValNodePtr vnp;
3036
3037 if (entityID == 0 || sep == NULL) return;
3038 SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
3039 if (isEmblOrDdbj) return;
3040 list = NULL;
3041 SeqEntryExplore (sep, &list, CollectPseudoCdsProducts);
3042 for (vnp = list; vnp != NULL; vnp = vnp->next) {
3043 bsp = (BioseqPtr) vnp->data.ptrvalue;
3044 itemID = GetItemIDGivenPointer (entityID, OBJ_BIOSEQ, (Pointer) bsp);
3045 if (itemID > 0) {
3046 if (doPseudo) {
3047 MemSet ((Pointer) (&ompc), 0, sizeof (OMProcControl));
3048 ompc.do_not_reload_from_cache = TRUE;
3049 ompc.input_entityID = entityID;
3050 ompc.input_itemID = itemID;
3051 ompc.input_itemtype = OBJ_BIOSEQ;
3052 if (! DetachDataForProc (&ompc, FALSE)) {
3053 Message (MSG_POSTERR, "DetachDataForProc failed");
3054 }
3055 } else {
3056 SeqIdWrite (bsp->id, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
3057 ErrPostEx (SEV_WARNING, 0, 2, "Accession %s is product of pseudo CDS", id);
3058 }
3059 }
3060 }
3061 ValNodeFree (list);
3062 }
3063
3064 //LCOV_EXCL_START
CleanUpPseudoProducts(Uint2 entityID,SeqEntryPtr sep)3065 extern void CleanUpPseudoProducts (Uint2 entityID, SeqEntryPtr sep)
3066
3067 {
3068 CleanUpPseudoProductsEx (entityID, sep, TRUE);
3069 }
3070 //LCOV_EXCL_STOP
3071
CleanupGenbankCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3072 extern void CleanupGenbankCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3073
3074 {
3075 BioseqPtr bsp;
3076 BioseqSetPtr bssp;
3077 Boolean empty;
3078 GBBlockPtr gbp;
3079 ValNodePtr nextsdp;
3080 Pointer PNTR prevsdp;
3081 ValNodePtr sdp;
3082
3083 if (sep == NULL || sep->data.ptrvalue == NULL) return;
3084 if (IS_Bioseq (sep)) {
3085 bsp = (BioseqPtr) sep->data.ptrvalue;
3086 sdp = bsp->descr;
3087 prevsdp = (Pointer PNTR) &(bsp->descr);
3088 } else if (IS_Bioseq_set (sep)) {
3089 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3090 sdp = bssp->descr;
3091 prevsdp = (Pointer PNTR) &(bssp->descr);
3092 } else return;
3093 while (sdp != NULL) {
3094 nextsdp = sdp->next;
3095 empty = FALSE;
3096 if (sdp->choice == Seq_descr_genbank && sdp->data.ptrvalue != NULL) {
3097 gbp = (GBBlockPtr) sdp->data.ptrvalue;
3098 /* gbp->source = MemFree (gbp->source); */
3099 /* gbp->origin = MemFree (gbp->origin); */
3100 gbp->taxonomy = MemFree (gbp->taxonomy);
3101 if (gbp->extra_accessions == NULL && gbp->source == NULL &&
3102 gbp->keywords == NULL && gbp->origin == NULL &&
3103 gbp->date == NULL && gbp->entry_date == NULL &&
3104 gbp->div == NULL && gbp->taxonomy == NULL) {
3105 empty = TRUE;
3106 }
3107 }
3108 if (empty) {
3109 *(prevsdp) = sdp->next;
3110 sdp->next = NULL;
3111 SeqDescFree (sdp);
3112 } else {
3113 prevsdp = (Pointer PNTR) &(sdp->next);
3114 }
3115 sdp = nextsdp;
3116 }
3117 }
3118
BarCodeTechToKeyword(BioseqPtr bsp,Pointer userdata)3119 static void BarCodeTechToKeyword (BioseqPtr bsp, Pointer userdata)
3120
3121 {
3122 GBBlockPtr gbp;
3123 MolInfoPtr mip;
3124 SeqDescrPtr sdp;
3125 ValNodePtr vnp;
3126
3127 if (bsp == NULL) return;
3128
3129 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_molinfo, NULL);
3130 if (sdp == NULL || sdp->choice != Seq_descr_molinfo) return;
3131
3132 mip = (MolInfoPtr) sdp->data.ptrvalue;
3133 if (mip == NULL || mip->tech != MI_TECH_barcode) return;
3134
3135 sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_genbank, NULL);
3136 if (sdp == NULL) {
3137 gbp = GBBlockNew ();
3138 if (gbp != NULL) {
3139 sdp = SeqDescrAddPointer (&(bsp->descr), Seq_descr_genbank, (Pointer) gbp);
3140 }
3141 }
3142 if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
3143
3144 gbp = (GBBlockPtr) sdp->data.ptrvalue;
3145 for (vnp = gbp->keywords; vnp != NULL; vnp = vnp->next) {
3146 if (StringICmp ((CharPtr) vnp->data.ptrvalue, "BARCODE") == 0) return;
3147 }
3148
3149 vnp = ValNodeCopyStr (NULL, 0, "BARCODE");
3150 if (vnp == NULL) return;
3151
3152 vnp->next = gbp->keywords;
3153 gbp->keywords = vnp;
3154 }
3155
EmptyOrNullString(CharPtr str)3156 static Boolean EmptyOrNullString (CharPtr str)
3157
3158 {
3159 Char ch;
3160
3161 if (str == NULL) return TRUE;
3162 ch = *str;
3163 while (ch != '\0') {
3164 if (ch > ' ' && ch <= '~') return FALSE;
3165 str++;
3166 ch = *str;
3167 }
3168 return TRUE;
3169 }
3170
MergeAdjacentAnnotsCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3171 extern void MergeAdjacentAnnotsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3172
3173 {
3174 BioseqPtr bsp;
3175 BioseqSetPtr bssp;
3176 SeqAnnotPtr nextsap;
3177 SeqAnnotPtr sap;
3178 SeqFeatPtr sfp;
3179
3180 if (sep == NULL || sep->data.ptrvalue == NULL) return;
3181 if (IS_Bioseq (sep)) {
3182 bsp = (BioseqPtr) sep->data.ptrvalue;
3183 sap = bsp->annot;
3184 } else if (IS_Bioseq_set (sep)) {
3185 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3186 sap = bssp->annot;
3187 } else return;
3188 while (sap != NULL) {
3189 nextsap = sap->next;
3190 if (sap->type == 1 && nextsap != NULL && nextsap->type == 1) {
3191 if (sap->id == NULL && nextsap->id == NULL &&
3192 sap->name == NULL && nextsap->name == NULL &&
3193 sap->db == 0 && nextsap->db == 0 &&
3194 sap->desc == NULL && nextsap->desc == NULL &&
3195 sap->data != NULL && nextsap->data != NULL) {
3196 sfp = (SeqFeatPtr) sap->data;
3197 while (sfp->next != NULL) {
3198 sfp = sfp->next;
3199 }
3200 sfp->next = (SeqFeatPtr) nextsap->data;
3201 nextsap->data = NULL;
3202 sap->next = nextsap->next;
3203 SeqAnnotFree (nextsap);
3204 nextsap = sap->next;
3205 }
3206 }
3207 sap = nextsap;
3208 }
3209 }
3210
HasEvidenceOrInferenceQual(SeqFeatPtr sfp)3211 static Boolean HasEvidenceOrInferenceQual (SeqFeatPtr sfp)
3212
3213 {
3214 GBQualPtr gbq;
3215
3216 if (sfp == NULL) return FALSE;
3217 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
3218 if (StringICmp (gbq->qual, "experiment") == 0) return TRUE;
3219 if (StringICmp (gbq->qual, "inference") == 0) return TRUE;
3220 }
3221 return FALSE;
3222 }
3223
CleanupEmptyFeatCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3224 extern void CleanupEmptyFeatCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3225
3226 {
3227 BioseqPtr bsp;
3228 BioseqSetPtr bssp;
3229 Boolean empty;
3230 GeneRefPtr grp;
3231 ImpFeatPtr ifp;
3232 SeqAnnotPtr nextsap;
3233 SeqFeatPtr nextsfp;
3234 Pointer PNTR prevsap;
3235 Pointer PNTR prevsfp;
3236 ProtRefPtr prp;
3237 SeqAnnotPtr sap;
3238 SeqFeatPtr sfp;
3239 ValNodePtr vnp;
3240
3241 if (sep == NULL || sep->data.ptrvalue == NULL) return;
3242 if (IS_Bioseq (sep)) {
3243 bsp = (BioseqPtr) sep->data.ptrvalue;
3244 sap = bsp->annot;
3245 prevsap = (Pointer PNTR) &(bsp->annot);
3246 } else if (IS_Bioseq_set (sep)) {
3247 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3248 sap = bssp->annot;
3249 prevsap = (Pointer PNTR) &(bssp->annot);
3250 } else return;
3251 while (sap != NULL) {
3252 nextsap = sap->next;
3253 if (sap->type == 1) {
3254 sfp = (SeqFeatPtr) sap->data;
3255 prevsfp = (Pointer PNTR) &(sap->data);
3256 while (sfp != NULL) {
3257 nextsfp = sfp->next;
3258 empty = FALSE;
3259 if (sfp->data.choice == SEQFEAT_GENE && sfp->data.value.ptrvalue != NULL) {
3260 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
3261 if (EmptyOrNullString (grp->locus)) {
3262 grp->locus = MemFree (grp->locus);
3263 }
3264 if (EmptyOrNullString (grp->allele)) {
3265 grp->allele = MemFree (grp->allele);
3266 }
3267 if (EmptyOrNullString (grp->desc)) {
3268 grp->desc = MemFree (grp->desc);
3269 }
3270 if (EmptyOrNullString (grp->maploc)) {
3271 grp->maploc = MemFree (grp->maploc);
3272 }
3273 if (EmptyOrNullString (grp->locus_tag)) {
3274 grp->locus_tag = MemFree (grp->locus_tag);
3275 }
3276 if (EmptyOrNullString (grp->locus) &&
3277 EmptyOrNullString (grp->allele) &&
3278 EmptyOrNullString (grp->desc) &&
3279 EmptyOrNullString (grp->maploc) &&
3280 EmptyOrNullString (grp->locus_tag) &&
3281 grp->db == NULL && grp->syn == NULL &&
3282 (! sfp->pseudo) && (! grp->pseudo) &&
3283 (sfp->exp_ev == 0) &&
3284 (! HasEvidenceOrInferenceQual (sfp))) {
3285 empty = TRUE;
3286 }
3287 if (empty) {
3288 /* if it has a comment, convert to misc_feature */
3289 if (! EmptyOrNullString (sfp->comment)) {
3290 ifp = (ImpFeatPtr) MemNew (sizeof (ImpFeat));
3291 if (ifp != NULL) {
3292 ifp->key = StringSave ("misc_feature");
3293 sfp->data.choice = SEQFEAT_IMP;
3294 sfp->data.value.ptrvalue = (Pointer) ifp;
3295 sfp->idx.subtype = 0;
3296 GeneRefFree (grp);
3297 empty = FALSE;
3298 }
3299 }
3300 }
3301 } else if (sfp->data.choice == SEQFEAT_PROT && sfp->data.value.ptrvalue != NULL) {
3302 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
3303 if (prp->processed != 3 && prp->processed != 4 && prp->processed != 5 &&
3304 prp->name == NULL && sfp->comment != NULL) {
3305 if (StringICmp (sfp->comment, "putative") != 0) {
3306 ValNodeAddStr (&(prp->name), 0, sfp->comment);
3307 sfp->comment = NULL;
3308 }
3309 }
3310 if (prp->processed == 2 && prp->name == NULL) {
3311 ValNodeCopyStr (&(prp->name), 0, "unnamed");
3312 }
3313 if (prp->processed != 3 && prp->processed != 4 && prp->processed != 5) {
3314 vnp = prp->name;
3315 if ((vnp == NULL || EmptyOrNullString ((CharPtr) vnp->data.ptrvalue)) &&
3316 EmptyOrNullString (prp->desc) &&
3317 prp->ec == NULL && prp->activity == NULL && prp->db == NULL) {
3318 empty = TRUE;
3319 }
3320 }
3321 } else if (sfp->data.choice == SEQFEAT_COMMENT && EmptyOrNullString (sfp->comment)) {
3322 empty = TRUE;
3323 }
3324 if (empty) {
3325 *(prevsfp) = sfp->next;
3326 sfp->next = NULL;
3327 SeqFeatFree (sfp);
3328 } else {
3329 prevsfp = (Pointer PNTR) &(sfp->next);
3330 }
3331 sfp = nextsfp;
3332 }
3333 }
3334 /* now keep empty annot if annot_descr present */
3335 if (sap->data == NULL && /* sap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (sap)) {
3336 *(prevsap) = sap->next;
3337 sap->next = NULL;
3338 SeqAnnotFree (sap);
3339 } else {
3340 prevsap = (Pointer PNTR) &(sap->next);
3341 }
3342 sap = nextsap;
3343 }
3344 }
3345
RemoveBioSourceOnPopSet(SeqEntryPtr sep,OrgRefPtr master)3346 extern void RemoveBioSourceOnPopSet (SeqEntryPtr sep, OrgRefPtr master)
3347
3348 {
3349 BioSourcePtr biop;
3350 BioseqSetPtr bssp;
3351 OrgRefPtr orp;
3352 ValNodePtr sdp;
3353
3354 if (sep == NULL) return;
3355 if (IS_Bioseq_set (sep)) {
3356 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3357 if (bssp == NULL) return;
3358 if (bssp->_class == 7 ||
3359 (bssp->_class >= 13 && bssp->_class <= 16) ||
3360 bssp->_class == BioseqseqSet_class_wgs_set ||
3361 bssp->_class == BioseqseqSet_class_small_genome_set) { /* now on phy and mut sets */
3362 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3363 if (sdp == NULL) return;
3364 biop = (BioSourcePtr) sdp->data.ptrvalue;
3365 if (biop == NULL) return;
3366 orp = biop->org;
3367 if (orp == NULL) return;
3368 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3369 RemoveBioSourceOnPopSet (sep, orp);
3370 }
3371 sdp = ValNodeExtract (&(bssp->descr), Seq_descr_source);
3372 SeqDescrFree (sdp);
3373 return;
3374 }
3375 //LCOV_EXCL_START
3376 //unreachable code
3377 /* if (bssp->_class == 7 || bssp->_class == 13 || bssp->_class == 15) return; */
3378 if (bssp->_class == 7) { /* also handle genbank supersets */
3379 orp = NULL;
3380 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3381 if (sdp != NULL) {
3382 biop = (BioSourcePtr) sdp->data.ptrvalue;
3383 if (biop != NULL) {
3384 orp = biop->org;
3385 }
3386 }
3387 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3388 RemoveBioSourceOnPopSet (sep, orp);
3389 }
3390 sdp = ValNodeExtract (&(bssp->descr), Seq_descr_source);
3391 SeqDescrFree (sdp);
3392 return;
3393 }
3394 //LCOV_EXCL_STOP
3395 }
3396 if (master == NULL) return;
3397 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3398 if (sdp != NULL) return;
3399 biop = BioSourceNew ();
3400 if (biop == NULL) return;
3401 orp = OrgRefNew ();
3402 if (orp == NULL) return;
3403 biop->org = orp;
3404 orp->taxname = StringSave (master->taxname);
3405 orp->common = StringSave (master->common);
3406 sdp = CreateNewDescriptor (sep, Seq_descr_source);
3407 if (sdp == NULL) return;
3408 sdp->data.ptrvalue = (Pointer) biop;
3409 }
3410
RemoveMolInfoOnPopSet(SeqEntryPtr sep,MolInfoPtr master)3411 extern void RemoveMolInfoOnPopSet (SeqEntryPtr sep, MolInfoPtr master)
3412
3413 {
3414 BioseqSetPtr bssp;
3415 MolInfoPtr mip;
3416 ValNodePtr sdp;
3417
3418 if (sep == NULL) return;
3419 if (IS_Bioseq_set (sep)) {
3420 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3421 if (bssp == NULL) return;
3422 if (bssp->_class == 7 ||
3423 (bssp->_class >= 13 && bssp->_class <= 16) ||
3424 bssp->_class == BioseqseqSet_class_wgs_set ||
3425 bssp->_class == BioseqseqSet_class_small_genome_set) {
3426 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
3427 if (sdp == NULL) return;
3428 mip = (MolInfoPtr) sdp->data.ptrvalue;
3429 if (mip == NULL) return;
3430 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3431 RemoveMolInfoOnPopSet (sep, mip);
3432 }
3433 sdp = ValNodeExtract (&(bssp->descr), Seq_descr_molinfo);
3434 SeqDescrFree (sdp);
3435 return;
3436 }
3437 }
3438 if (master == NULL) return;
3439 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
3440 if (sdp != NULL) return;
3441 mip = MolInfoNew ();
3442 if (mip == NULL) return;
3443 mip->biomol = master->biomol;
3444 mip->tech = master->tech;
3445 if (StringDoesHaveText (master->techexp)) {
3446 mip->techexp = StringSave (master->techexp);
3447 }
3448 mip->completeness = master->completeness;
3449 if (StringDoesHaveText (master->gbmoltype)) {
3450 mip->gbmoltype = StringSave (master->gbmoltype);
3451 }
3452 sdp = CreateNewDescriptor (sep, Seq_descr_molinfo);
3453 if (sdp == NULL) return;
3454 sdp->data.ptrvalue = (Pointer) mip;
3455 }
3456
3457 /* NoBiosourceOrTaxonId also looks for lineage and division */
3458 //LCOV_EXCL_START
3459 // Not used for cleanup
NoBiosourceOrTaxonId(SeqEntryPtr sep)3460 extern Boolean NoBiosourceOrTaxonId (SeqEntryPtr sep)
3461
3462 {
3463 BioSourcePtr biop;
3464 BioseqSetPtr bssp;
3465 DbtagPtr dbt;
3466 Boolean notaxid;
3467 ObjectIdPtr oid;
3468 OrgNamePtr onp;
3469 OrgRefPtr orp;
3470 ValNodePtr sdp;
3471 ValNodePtr vnp;
3472
3473 if (sep == NULL) return TRUE;
3474 if (IS_Bioseq_set (sep)) {
3475 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3476 if (bssp != NULL && (bssp->_class == 7 ||
3477 (bssp->_class >= 13 && bssp->_class <= 16) ||
3478 bssp->_class == BioseqseqSet_class_wgs_set ||
3479 bssp->_class == BioseqseqSet_class_small_genome_set)) {
3480 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3481 if (NoBiosourceOrTaxonId (sep)) return TRUE;
3482 }
3483 return FALSE;
3484 }
3485 }
3486 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
3487 if (sdp == NULL) return TRUE;
3488 biop = (BioSourcePtr) sdp->data.ptrvalue;
3489 if (biop == NULL) return TRUE;
3490 orp = biop->org;
3491 if (orp == NULL) return TRUE;
3492 vnp = orp->db;
3493 if (vnp == NULL) return TRUE;
3494 notaxid = TRUE;
3495 while (vnp != NULL) {
3496 dbt = (DbtagPtr) vnp->data.ptrvalue;
3497 if (dbt != NULL) {
3498 if (StringCmp (dbt->db, "taxon") == 0) {
3499 oid = dbt->tag;
3500 if (oid != NULL) {
3501 if (oid->str == NULL && oid->id > 0) {
3502 notaxid = FALSE;
3503 }
3504 }
3505 }
3506 }
3507 vnp = vnp->next;
3508 }
3509 if (notaxid) return TRUE;
3510 onp = orp->orgname;
3511 if (onp == NULL) return TRUE;
3512 if (StringHasNoText (onp->lineage) || StringHasNoText (onp->div)) return TRUE;
3513 return FALSE;
3514 }
3515
CollectGeneFeatures(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3516 static void CollectGeneFeatures(SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3517
3518 {
3519 BioseqPtr bsp;
3520 BioseqSetPtr bssp;
3521 SeqAnnotPtr sap;
3522 SeqFeatPtr sfp;
3523 ValNodePtr PNTR vnpp;
3524
3525 if (sep == NULL || sep->data.ptrvalue == NULL || mydata == NULL) return;
3526 vnpp = (ValNodePtr PNTR) mydata;
3527 sap = NULL;
3528 if (IS_Bioseq (sep)) {
3529 bsp = (BioseqPtr) sep->data.ptrvalue;
3530 sap = bsp->annot;
3531 } else if (IS_Bioseq_set (sep)) {
3532 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3533 sap = bssp->annot;
3534 } else return;
3535 while (sap != NULL) {
3536 if (sap->type == 1 && sap->data != NULL) {
3537 sfp = (SeqFeatPtr) sap->data;
3538 while (sfp != NULL) {
3539 if (sfp->data.choice == SEQFEAT_GENE) {
3540 ValNodeAddPointer (vnpp, 0, (Pointer) sfp);
3541 }
3542 sfp = sfp->next;
3543 }
3544 }
3545 sap = sap->next;
3546 }
3547 }
3548
ExtendGeneWithinNucProt(SeqEntryPtr sep)3549 static void ExtendGeneWithinNucProt (SeqEntryPtr sep)
3550
3551 {
3552 BioseqSetPtr bssp;
3553 ValNodePtr vnp;
3554
3555 if (sep == NULL) return;
3556 if (IS_Bioseq_set (sep)) {
3557 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3558 if (bssp == NULL) return;
3559 if (bssp->_class == 7 ||
3560 (bssp->_class >= 13 && bssp->_class <= 16) ||
3561 bssp->_class == BioseqseqSet_class_wgs_set ||
3562 bssp->_class == BioseqseqSet_class_small_genome_set) {
3563 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3564 ExtendGeneWithinNucProt (sep);
3565 }
3566 return;
3567 }
3568 }
3569 vnp = NULL;
3570 SeqEntryExplore (sep, (Pointer) (&vnp), CollectGeneFeatures);
3571 if (vnp != NULL && vnp->next == NULL) {
3572 SeqEntryExplore (sep, NULL, CorrectGeneFeatLocation);
3573 }
3574 ValNodeFree (vnp);
3575 }
3576
ExtendGeneFeatIfOnMRNA(Uint2 entityID,SeqEntryPtr sep)3577 extern void ExtendGeneFeatIfOnMRNA (Uint2 entityID, SeqEntryPtr sep)
3578
3579 {
3580 if (entityID < 1 && sep == NULL) return;
3581 if (entityID > 0 && sep == NULL) {
3582 sep = GetTopSeqEntryForEntityID (entityID);
3583 }
3584 if (sep == NULL) return;
3585 ExtendGeneWithinNucProt (sep);
3586 }
3587 //LCOV_EXCL_STOP
3588
ConvertPubFeatDescProc(GatherObjectPtr gop)3589 static Boolean ConvertPubFeatDescProc (GatherObjectPtr gop)
3590
3591 {
3592 BioseqPtr bsp;
3593 size_t len;
3594 PubdescPtr pdp;
3595 SeqDescPtr sdp;
3596 SeqEntryPtr sep;
3597 SeqFeatPtr sfp;
3598 SeqIdPtr sip;
3599 CharPtr str;
3600 ValNode vn;
3601
3602 if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3603 sfp = (SeqFeatPtr) gop->dataptr;
3604 /* look for publication features */
3605 if (sfp == NULL || sfp->data.choice != SEQFEAT_PUB) return TRUE;
3606 /* get bioseq by feature location */
3607 sip = SeqLocId (sfp->location);
3608 bsp = BioseqFind (sip);
3609 if (bsp == NULL) return TRUE;
3610 sip = SeqIdFindBest(bsp->id, 0);
3611 if (sip == NULL) return TRUE;
3612 vn.choice = SEQLOC_WHOLE;
3613 vn.extended = 0;
3614 vn.data.ptrvalue = (Pointer) sip;
3615 vn.next = NULL;
3616 /* is feature full length? */
3617 if (SeqLocCompare (sfp->location, &vn) != SLC_A_EQ_B) return TRUE;
3618 sep = SeqMgrGetSeqEntryForData (bsp);
3619 if (sep == NULL) return TRUE;
3620 sdp = CreateNewDescriptor (sep, Seq_descr_pub);
3621 if (sdp == NULL) return TRUE;
3622 /* move publication from feature to descriptor */
3623 sdp->data.ptrvalue = sfp->data.value.ptrvalue;
3624 sfp->data.value.ptrvalue = NULL;
3625 /* flag old feature for removal */
3626 sfp->idx.deleteme = TRUE;
3627 /* move or append comment to pubdesc comment */
3628 if (sfp->comment == NULL) return TRUE;
3629 pdp = (PubdescPtr) sdp->data.ptrvalue;
3630 if (pdp == NULL) return TRUE;
3631 if (pdp->comment == NULL) {
3632 pdp->comment = sfp->comment;
3633 } else {
3634 len = StringLen (sfp->comment) + StringLen (pdp->comment) + 5;
3635 str = MemNew (sizeof (Char) * len);
3636 StringCpy (str, pdp->comment);
3637 StringCat (str, "; ");
3638 StringCat (str, sfp->comment);
3639 pdp->comment = MemFree (pdp->comment);
3640 pdp->comment = str;
3641 }
3642 sfp->comment = NULL;
3643 return TRUE;
3644 }
3645
ConvertFullLenPubFeatToDesc(SeqEntryPtr sep)3646 extern void ConvertFullLenPubFeatToDesc (SeqEntryPtr sep)
3647
3648 {
3649 Boolean objMgrFilter [OBJ_MAX];
3650 SeqEntryPtr oldscope;
3651
3652 if (sep == NULL) return;
3653 oldscope = SeqEntrySetScope (sep);
3654
3655 MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
3656 objMgrFilter [OBJ_SEQFEAT] = TRUE;
3657
3658 GatherObjectsInEntity (0, OBJ_SEQENTRY, (Pointer) sep,
3659 ConvertPubFeatDescProc, NULL, objMgrFilter);
3660
3661 SeqEntrySetScope (oldscope);
3662 DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
3663 }
3664
GatherConvertSourceFeatDescProc(GatherObjectPtr gop)3665 static Boolean GatherConvertSourceFeatDescProc (GatherObjectPtr gop)
3666
3667 {
3668 SeqFeatPtr sfp;
3669
3670 if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3671 sfp = (SeqFeatPtr) gop->dataptr;
3672 ConvertSourceFeatDescProc (sfp, NULL);
3673 return TRUE;
3674 }
3675
LookForTransgenic(SeqDescrPtr sdp,Pointer userdata)3676 static void LookForTransgenic (SeqDescrPtr sdp, Pointer userdata)
3677
3678 {
3679 BioSourcePtr biop;
3680 BoolPtr is_trans_or_focus;
3681 SubSourcePtr ssp;
3682
3683 if (sdp == NULL || sdp->choice != Seq_descr_source) return;
3684 biop = (BioSourcePtr) sdp->data.ptrvalue;
3685 if (biop == NULL) return;
3686 if (biop->is_focus) {
3687 is_trans_or_focus = (BoolPtr) userdata;
3688 *is_trans_or_focus = TRUE;
3689 return;
3690 }
3691 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
3692 if (ssp->subtype == SUBSRC_transgenic) {
3693 is_trans_or_focus = (BoolPtr) userdata;
3694 *is_trans_or_focus = TRUE;
3695 return;
3696 }
3697 }
3698 }
3699
ConvertFullLenSourceFeatToDesc(SeqEntryPtr sep)3700 extern void ConvertFullLenSourceFeatToDesc (SeqEntryPtr sep)
3701
3702 {
3703 Boolean is_trans_or_focus = FALSE;
3704 Boolean objMgrFilter [OBJ_MAX];
3705 SeqEntryPtr oldscope;
3706
3707 if (sep == NULL) return;
3708 VisitDescriptorsInSep (sep, (Pointer) &is_trans_or_focus, LookForTransgenic);
3709 if (is_trans_or_focus) return;
3710
3711 oldscope = SeqEntrySetScope (sep);
3712
3713 MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
3714 objMgrFilter [OBJ_SEQFEAT] = TRUE;
3715
3716 GatherObjectsInEntity (0, OBJ_SEQENTRY, (Pointer) sep,
3717 GatherConvertSourceFeatDescProc, NULL, objMgrFilter);
3718
3719 SeqEntrySetScope (oldscope);
3720 DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
3721 }
3722
LoopSeqEntryToAsn3(SeqEntryPtr sep,Boolean strip,Boolean correct,SeqEntryFunc taxfun,SeqEntryFunc taxmerge,Boolean gpipeMode,Boolean isEmblOrDdbj)3723 static Int4 LoopSeqEntryToAsn3 (
3724 SeqEntryPtr sep,
3725 Boolean strip,
3726 Boolean correct,
3727 SeqEntryFunc taxfun,
3728 SeqEntryFunc taxmerge,
3729 Boolean gpipeMode,
3730 Boolean isEmblOrDdbj
3731 )
3732
3733 {
3734 BioseqSetPtr bssp;
3735 SeqEntryPtr oldscope;
3736 Int4 rsult;
3737 Boolean taxserver;
3738
3739 rsult = 0;
3740 if (IS_Bioseq_set (sep)) {
3741 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3742 if (bssp != NULL && (bssp->_class == 7 ||
3743 (bssp->_class >= 13 && bssp->_class <= 16) ||
3744 bssp->_class == BioseqseqSet_class_wgs_set ||
3745 bssp->_class == BioseqseqSet_class_small_genome_set)) {
3746 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3747 rsult += LoopSeqEntryToAsn3 (sep, strip, correct, taxfun, taxmerge, gpipeMode, isEmblOrDdbj);
3748 }
3749 return rsult;
3750 }
3751 }
3752 oldscope = SeqEntrySetScope (sep);
3753 taxserver = (Boolean) (taxfun != NULL || taxmerge != NULL);
3754 rsult = SeqEntryToAsn3Ex (sep, strip, correct, taxserver, taxfun, taxmerge, gpipeMode, isEmblOrDdbj);
3755 SeqEntrySetScope (oldscope);
3756 return rsult;
3757 }
3758
3759 //LCOV_EXCL_START
3760 //This is never called; basic cleanup takes care of converting
3761 // gene quals to gene xrefs
DeleteBadMarkedGeneXrefs(GatherObjectPtr gop)3762 static Boolean DeleteBadMarkedGeneXrefs (GatherObjectPtr gop)
3763
3764 {
3765 GeneRefPtr grp;
3766 SeqFeatXrefPtr nextxref;
3767 SeqFeatXrefPtr PNTR prevxref;
3768 SeqFeatPtr sfp;
3769 SeqFeatPtr sfpx;
3770 Boolean unlink;
3771 SeqFeatXrefPtr xref;
3772
3773 if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3774 sfp = (SeqFeatPtr) gop->dataptr;
3775 xref = sfp->xref;
3776 prevxref = (SeqFeatXrefPtr PNTR) &(sfp->xref);
3777 while (xref != NULL) {
3778 nextxref = xref->next;
3779 unlink = FALSE;
3780 if (xref->specialCleanupFlag && xref->data.choice == SEQFEAT_GENE) {
3781 grp = (GeneRefPtr) xref->data.value.ptrvalue;
3782 if (grp != NULL) {
3783 sfpx = SeqMgrGetOverlappingGene (sfp->location, NULL);
3784 if (sfpx != NULL && sfpx->data.choice == SEQFEAT_GENE) {
3785 unlink = TRUE;
3786 }
3787 }
3788 }
3789 xref->specialCleanupFlag = FALSE;
3790 if (unlink) {
3791 *(prevxref) = xref->next;
3792 xref->next = NULL;
3793 SeqFeatXrefFree (xref);
3794 } else {
3795 prevxref = (SeqFeatXrefPtr PNTR) &(xref->next);
3796 }
3797 xref = nextxref;
3798 }
3799 return TRUE;
3800 }
3801 //LCOV_EXCL_STOP
3802
MarkMovedGeneGbquals(GatherObjectPtr gop)3803 static Boolean MarkMovedGeneGbquals (GatherObjectPtr gop)
3804
3805 {
3806 GBQualPtr gbq;
3807 GeneRefPtr grp;
3808 BoolPtr hasMarkedGenesP;
3809 GBQualPtr nextqual;
3810 GBQualPtr PNTR prevqual;
3811 SeqFeatPtr sfp;
3812 SeqFeatXrefPtr xref;
3813
3814 if (gop->itemtype != OBJ_SEQFEAT) return TRUE;
3815 hasMarkedGenesP = (BoolPtr) gop->userdata;
3816 if (hasMarkedGenesP == NULL) return TRUE;
3817 sfp = (SeqFeatPtr) gop->dataptr;
3818 gbq = sfp->qual;
3819 prevqual = (GBQualPtr PNTR) &(sfp->qual);
3820 while (gbq != NULL) {
3821 TrimSpacesAroundString (gbq->qual);
3822 TrimSpacesAroundString (gbq->val);
3823 nextqual = gbq->next;
3824 if (StringICmp (gbq->qual, "gene") == 0 && (! StringHasNoText (gbq->val))) {
3825 //LCOV_EXCL_START
3826 //This is never called; basic cleanup takes care of converting
3827 // gene quals to gene xrefs
3828 grp = GeneRefNew ();
3829 grp->locus = StringSave (gbq->val);
3830 xref = SeqFeatXrefNew ();
3831 xref->data.choice = SEQFEAT_GENE;
3832 xref->data.value.ptrvalue = (Pointer) grp;
3833 xref->specialCleanupFlag = TRUE; /* flag to test for overlapping gene later */
3834 xref->next = sfp->xref;
3835 sfp->xref = xref;
3836 *(prevqual) = gbq->next;
3837 gbq->next = NULL;
3838 gbq->qual = MemFree (gbq->qual);
3839 gbq->val = MemFree (gbq->val);
3840 GBQualFree (gbq);
3841 *hasMarkedGenesP = TRUE;
3842 //LCOV_EXCL_STOP
3843 } else {
3844 prevqual = (GBQualPtr PNTR) &(gbq->next);
3845 }
3846 gbq = nextqual;
3847 }
3848 return TRUE;
3849 }
3850
3851 /* RemoveMultipleTitles currently removes FIRST title in chain */
3852
RemoveMultipleTitles(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3853 static void RemoveMultipleTitles (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3854
3855 {
3856 BioseqPtr bsp;
3857 BioseqSetPtr bssp;
3858 SeqDescrPtr descr = NULL;
3859 SeqDescrPtr lasttitle = NULL;
3860 ObjValNodePtr ovp;
3861 SeqDescrPtr sdp;
3862
3863 if (IS_Bioseq (sep)) {
3864 bsp = (BioseqPtr) sep->data.ptrvalue;
3865 if (bsp == NULL) return;
3866 descr = bsp->descr;
3867 } else if (IS_Bioseq_set (sep)) {
3868 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3869 if (bssp == NULL) return;
3870 descr = bssp->descr;
3871 } else return;
3872 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
3873 if (sdp->choice != Seq_descr_title) continue;
3874 if (lasttitle != NULL) {
3875 if (lasttitle->extended != 0) {
3876 ovp = (ObjValNodePtr) lasttitle;
3877 ovp->idx.deleteme = TRUE;
3878 }
3879 lasttitle = sdp;
3880 } else {
3881 lasttitle = sdp;
3882 }
3883 }
3884 }
3885
GetCspFromPdp(PubdescPtr pdp)3886 static CitSubPtr GetCspFromPdp (PubdescPtr pdp)
3887
3888 {
3889 ValNodePtr vnp;
3890
3891 if (pdp == NULL) return NULL;
3892 vnp = pdp->pub;
3893 if (vnp == NULL) return NULL;
3894 if (vnp->choice != PUB_Sub) return NULL;
3895 return (CitSubPtr) vnp->data.ptrvalue;
3896 }
3897
CitSubsMatch(CitSubPtr csp1,CitSubPtr csp2)3898 static Boolean CitSubsMatch (CitSubPtr csp1, CitSubPtr csp2)
3899
3900 {
3901 AffilPtr afp1, afp2;
3902 AuthListPtr alp1, alp2;
3903
3904 if (csp1 == NULL || csp2 == NULL) return FALSE;
3905 if (DateMatch (csp1->date, csp2->date, FALSE) != 0) return FALSE;
3906 if (StringICmp (csp1->descr, csp2->descr) != 0) return FALSE;
3907 alp1 = csp1->authors;
3908 alp2 = csp2->authors;
3909 if (alp1 == NULL || alp2 == NULL) return FALSE;
3910 if (AuthListMatch (alp1, alp2, TRUE) != 0) return FALSE;
3911 afp1 = alp1->affil;
3912 afp2 = alp2->affil;
3913 if (afp1 != NULL && afp2 != NULL) {
3914 if (! AsnIoMemComp (afp1, afp2, (AsnWriteFunc) AffilAsnWrite)) return FALSE;
3915 }
3916 return TRUE;
3917 }
3918
MergeEquivCitSubs(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3919 static void MergeEquivCitSubs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3920
3921 {
3922 AuthListPtr alp1, alp2;
3923 BioseqPtr bsp;
3924 BioseqSetPtr bssp;
3925 CitSubPtr csp1, csp2;
3926 SeqDescrPtr descr = NULL;
3927 SeqDescrPtr lastcit;
3928 PubdescPtr lastpdp;
3929 ObjValNodePtr ovp;
3930 PubdescPtr pdp;
3931 SeqDescrPtr sdp;
3932
3933 if (IS_Bioseq (sep)) {
3934 bsp = (BioseqPtr) sep->data.ptrvalue;
3935 if (bsp == NULL) return;
3936 descr = bsp->descr;
3937 } else if (IS_Bioseq_set (sep)) {
3938 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3939 if (bssp == NULL) return;
3940 descr = bssp->descr;
3941 } else return;
3942
3943 lastcit = NULL;
3944 lastpdp = NULL;
3945 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
3946 if (sdp->choice != Seq_descr_pub) continue;
3947 pdp = (PubdescPtr) sdp->data.ptrvalue;
3948 if (pdp == NULL) continue;
3949 if (lastcit != NULL && lastpdp != NULL) {
3950 csp2 = GetCspFromPdp (pdp);
3951 if (csp2 != NULL) {
3952 if (CitSubsMatch (csp1, csp2) &&
3953 OkayToFuseRemarks (pdp->comment, lastpdp->comment)) {
3954 alp1 = csp1->authors;
3955 alp2 = csp2->authors;
3956 if (alp1 != NULL && alp2 != NULL) {
3957 if (alp1->affil == NULL && alp2->affil != NULL) {
3958 alp1->affil = alp2->affil;
3959 alp2->affil = NULL;
3960 }
3961 }
3962 if (lastpdp->comment == NULL && pdp->comment != NULL) {
3963 lastpdp->comment = pdp->comment;
3964 pdp->comment = NULL;
3965 }
3966 if (sdp->extended != 0) {
3967 ovp = (ObjValNodePtr) sdp;
3968 ovp->idx.deleteme = TRUE;
3969 }
3970 } else {
3971 lastcit = sdp;
3972 csp1 = csp2;
3973 }
3974 }
3975 } else {
3976 csp1 = GetCspFromPdp (pdp);
3977 if (csp1 != NULL) {
3978 lastcit = sdp;
3979 }
3980 }
3981 lastpdp = pdp;
3982 }
3983 }
3984
MergeMultipleDates(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3985 static void MergeMultipleDates (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3986
3987 {
3988 BioseqPtr bsp;
3989 BioseqSetPtr bssp;
3990 DatePtr dp1, dp2;
3991 SeqDescrPtr descr = NULL;
3992 SeqDescrPtr lastdate;
3993 ObjValNodePtr ovp;
3994 SeqDescrPtr sdp;
3995 Int2 status;
3996
3997 if (IS_Bioseq (sep)) {
3998 bsp = (BioseqPtr) sep->data.ptrvalue;
3999 if (bsp == NULL) return;
4000 descr = bsp->descr;
4001 } else if (IS_Bioseq_set (sep)) {
4002 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4003 if (bssp == NULL) return;
4004 descr = bssp->descr;
4005 } else return;
4006
4007 lastdate = NULL;
4008 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4009 if (sdp->choice != Seq_descr_create_date) continue;
4010 if (lastdate != NULL) {
4011 dp1 = (DatePtr) lastdate->data.ptrvalue;
4012 dp2 = (DatePtr) sdp->data.ptrvalue;
4013 status = DateMatch (dp1, dp2, FALSE);
4014 if (status == 1) {
4015 if (sdp->extended != 0) {
4016 ovp = (ObjValNodePtr) sdp;
4017 ovp->idx.deleteme = TRUE;
4018 }
4019 } else {
4020 if (lastdate->extended != 0) {
4021 ovp = (ObjValNodePtr) lastdate;
4022 ovp->idx.deleteme = TRUE;
4023 }
4024 lastdate = sdp;
4025 }
4026 } else {
4027 lastdate = sdp;
4028 }
4029 }
4030
4031 lastdate = NULL;
4032 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4033 if (sdp->choice != Seq_descr_update_date) continue;
4034 if (lastdate != NULL) {
4035 dp1 = (DatePtr) lastdate->data.ptrvalue;
4036 dp2 = (DatePtr) sdp->data.ptrvalue;
4037 status = DateMatch (dp1, dp2, FALSE);
4038 if (status == 1) {
4039 if (sdp->extended != 0) {
4040 ovp = (ObjValNodePtr) sdp;
4041 ovp->idx.deleteme = TRUE;
4042 }
4043 } else {
4044 if (lastdate->extended != 0) {
4045 ovp = (ObjValNodePtr) lastdate;
4046 ovp->idx.deleteme = TRUE;
4047 }
4048 lastdate = sdp;
4049 }
4050 } else {
4051 lastdate = sdp;
4052 }
4053 }
4054 }
4055
GetNextBspBsspDescrUnindexed(BioseqPtr bsp,BioseqSetPtr bssp,Uint1 choice,SeqDescrPtr curr)4056 static SeqDescrPtr GetNextBspBsspDescrUnindexed (
4057 BioseqPtr bsp,
4058 BioseqSetPtr bssp,
4059 Uint1 choice,
4060 SeqDescrPtr curr
4061 )
4062
4063 {
4064 ObjValNodePtr ovp;
4065 SeqDescrPtr sdp = NULL;
4066
4067 if (bsp == NULL && bssp == NULL) return NULL;
4068 if (choice == 0) return NULL;
4069
4070 if (curr == NULL) {
4071 if (bsp != NULL) {
4072 sdp = bsp->descr;
4073 } else if (bssp != NULL) {
4074 sdp = bssp->descr;
4075 }
4076 curr = sdp;
4077 } else {
4078 sdp = curr->next;
4079 }
4080 while (sdp != NULL) {
4081 if (sdp->choice == choice) return sdp;
4082 sdp = sdp->next;
4083 }
4084
4085 if (curr != NULL && curr->extended != 0) {
4086 ovp = (ObjValNodePtr) curr;
4087 if (ovp->idx.parenttype == OBJ_BIOSEQ) {
4088 bsp = (BioseqPtr) ovp->idx.parentptr;
4089 if (bsp == NULL) return NULL;
4090 if (bsp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4091 bssp = (BioseqSetPtr) bsp->idx.parentptr;
4092 } else if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
4093 bssp = (BioseqSetPtr) ovp->idx.parentptr;
4094 if (bssp == NULL) return NULL;
4095 if (bssp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4096 bssp = (BioseqSetPtr) bssp->idx.parentptr;
4097 } else {
4098 return NULL;
4099 }
4100 } else {
4101 if (bsp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4102 bssp = (BioseqSetPtr) bsp->idx.parentptr;
4103 }
4104
4105 while (bssp != NULL) {
4106 for (sdp = bssp->descr; sdp != NULL; sdp = sdp->next) {
4107 if (sdp->choice == choice) return sdp;
4108 }
4109 if (bssp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4110 bssp = (BioseqSetPtr) bssp->idx.parentptr;
4111 }
4112 return NULL;
4113 }
4114
MarkFirstPubIfEquivalent(SeqDescrPtr sdp1,SeqDescrPtr sdp2)4115 static void MarkFirstPubIfEquivalent (
4116 SeqDescrPtr sdp1,
4117 SeqDescrPtr sdp2
4118 )
4119
4120 {
4121 ObjValNodePtr ovp;
4122 PubdescPtr pdp1, pdp2;
4123
4124 if (sdp1 == NULL || sdp2 == NULL) return;
4125 if (sdp1->choice != Seq_descr_pub || sdp2->choice != Seq_descr_pub) return;
4126
4127 pdp1 = (PubdescPtr) sdp1->data.ptrvalue;
4128 pdp2 = (PubdescPtr) sdp2->data.ptrvalue;
4129 if (pdp1 == NULL || pdp2 == NULL) return;
4130
4131 if (! AsnIoMemComp (pdp1->pub, pdp2->pub, (AsnWriteFunc) PubEquivAsnWrite)) return;
4132
4133 if (! AsnIoMemComp (pdp1, pdp2, (AsnWriteFunc) PubdescAsnWrite)) {
4134 if (pdp2->name == NULL && pdp1->name != NULL) {
4135 pdp2->name = pdp1->name;
4136 pdp1->name = NULL;
4137 }
4138 if (pdp2->fig == NULL && pdp1->fig != NULL) {
4139 pdp2->fig = pdp1->fig;
4140 pdp1->fig = NULL;
4141 }
4142 if (pdp2->num == NULL && pdp1->num != NULL) {
4143 pdp2->num = pdp1->num;
4144 pdp1->num = NULL;
4145 }
4146 if (! pdp2->numexc && pdp1->numexc) {
4147 pdp2->numexc = pdp1->numexc;
4148 }
4149 if (! pdp2->poly_a && pdp1->poly_a) {
4150 pdp2->poly_a = pdp1->poly_a;
4151 }
4152 if (pdp2->align_group == 0 && pdp1->align_group != 0) {
4153 pdp2->align_group = pdp1->align_group;
4154 }
4155 if (pdp2->maploc == NULL && pdp1->maploc != NULL) {
4156 pdp2->maploc = pdp1->maploc;
4157 pdp1->maploc = NULL;
4158 }
4159 if (pdp2->seq_raw == NULL && pdp1->seq_raw != NULL) {
4160 pdp2->seq_raw = pdp1->seq_raw;
4161 pdp1->seq_raw = NULL;
4162 }
4163 if (pdp2->comment == NULL && pdp1->comment != NULL) {
4164 pdp2->comment = pdp1->comment;
4165 pdp1->comment = NULL;
4166 }
4167 if (pdp2->reftype == 0 && pdp1->reftype != 0) {
4168 pdp2->reftype = pdp1->reftype;
4169 }
4170 }
4171 ovp = (ObjValNodePtr) sdp1;
4172 ovp->idx.deleteme = TRUE;
4173 }
4174
RemoveIdenticalPubs(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)4175 static void RemoveIdenticalPubs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
4176
4177 {
4178 BioseqPtr bsp = NULL;
4179 BioseqSetPtr bssp = NULL;
4180 SeqDescrPtr descr = NULL, sdp1, sdp2;
4181
4182 if (IS_Bioseq (sep)) {
4183 bsp = (BioseqPtr) sep->data.ptrvalue;
4184 if (bsp == NULL) return;
4185 descr = bsp->descr;
4186 } else if (IS_Bioseq_set (sep)) {
4187 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4188 if (bssp == NULL) return;
4189 descr = bssp->descr;
4190 } else return;
4191
4192 for (sdp1 = descr; sdp1 != NULL; sdp1 = sdp1->next) {
4193 if (sdp1->choice != Seq_descr_pub) continue;
4194 sdp2 = GetNextBspBsspDescrUnindexed (bsp, bssp, Seq_descr_pub, sdp1);
4195 while (sdp2 != NULL) {
4196 MarkFirstPubIfEquivalent (sdp1, sdp2);
4197 sdp2 = GetNextBspBsspDescrUnindexed (bsp, bssp, Seq_descr_pub, sdp2);
4198 }
4199 }
4200 }
4201
MolInfoUpdate(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)4202 static void MolInfoUpdate (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
4203
4204 {
4205 BioseqPtr bsp;
4206 BioseqSetPtr bssp;
4207 SeqDescrPtr descr = NULL;
4208 Uint1 meth;
4209 MolInfoPtr mip = NULL;
4210 Uint1 mol;
4211 ObjValNodePtr ovp;
4212 SeqDescrPtr sdp;
4213
4214 if (IS_Bioseq (sep)) {
4215 bsp = (BioseqPtr) sep->data.ptrvalue;
4216 if (bsp == NULL) return;
4217 descr = bsp->descr;
4218 } else if (IS_Bioseq_set (sep)) {
4219 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4220 if (bssp == NULL) return;
4221 descr = bssp->descr;
4222 } else return;
4223
4224 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4225 if (sdp->choice != Seq_descr_molinfo) continue;
4226 mip = (MolInfoPtr) sdp->data.ptrvalue;
4227 }
4228 if (mip == NULL) return;
4229
4230 for (sdp = descr; sdp != NULL; sdp = sdp->next) {
4231 switch (sdp->choice) {
4232 case Seq_descr_mol_type :
4233 mol = sdp->data.intvalue;
4234 if (mol != 0 && mip->biomol == 0) {
4235 switch (mol) {
4236 case MOLECULE_TYPE_GENOMIC :
4237 mip->biomol = MOLECULE_TYPE_GENOMIC;
4238 break;
4239 case MOLECULE_TYPE_PRE_MRNA :
4240 mip->biomol = MOLECULE_TYPE_PRE_MRNA;
4241 break;
4242 case MOLECULE_TYPE_MRNA :
4243 mip->biomol = MOLECULE_TYPE_MRNA;
4244 break;
4245 case MOLECULE_TYPE_RRNA :
4246 mip->biomol = MOLECULE_TYPE_RRNA;
4247 break;
4248 case MOLECULE_TYPE_TRNA :
4249 mip->biomol = MOLECULE_TYPE_TRNA;
4250 break;
4251 case MOLECULE_TYPE_SNRNA :
4252 mip->biomol = MOLECULE_TYPE_SNRNA;
4253 break;
4254 case MOLECULE_TYPE_SCRNA :
4255 mip->biomol = MOLECULE_TYPE_SCRNA;
4256 break;
4257 case MOLECULE_TYPE_PEPTIDE :
4258 mip->biomol = MOLECULE_TYPE_PEPTIDE;
4259 break;
4260 case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
4261 mip->biomol = MOLECULE_TYPE_OTHER_GENETIC_MATERIAL;
4262 break;
4263 case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
4264 mip->biomol = MOLECULE_TYPE_GENOMIC_MRNA_MIX;
4265 break;
4266 case 255 :
4267 mip->biomol = 255;
4268 break;
4269 default :
4270 break;
4271 }
4272 }
4273 if (sdp->extended != 0) {
4274 ovp = (ObjValNodePtr) sdp;
4275 ovp->idx.deleteme = TRUE;
4276 }
4277 break;
4278 case Seq_descr_modif :
4279 break;
4280 case Seq_descr_method :
4281 meth = sdp->data.intvalue;
4282 if (meth != 0 && mip->tech == 0) {
4283 switch (meth) {
4284 case METHOD_concept_transl :
4285 mip->tech = MI_TECH_concept_trans;
4286 break;
4287 case METHOD_seq_pept :
4288 mip->tech = MI_TECH_seq_pept;
4289 break;
4290 case METHOD_both :
4291 mip->tech = MI_TECH_both;
4292 break;
4293 case METHOD_seq_pept_overlap :
4294 mip->tech = MI_TECH_seq_pept_overlap;
4295 break;
4296 case METHOD_seq_pept_homol :
4297 mip->tech = MI_TECH_seq_pept_homol;
4298 break;
4299 case METHOD_concept_transl_a :
4300 mip->tech = MI_TECH_concept_trans_a;
4301 break;
4302 case METHOD_other :
4303 mip->tech = MI_TECH_other;
4304 break;
4305 default :
4306 break;
4307 }
4308 }
4309 if (sdp->extended != 0) {
4310 ovp = (ObjValNodePtr) sdp;
4311 ovp->idx.deleteme = TRUE;
4312 }
4313 break;
4314 default :
4315 break;
4316 }
4317 }
4318 }
4319
SSECGetAuthListPtr(PubdescPtr pdp)4320 static AuthListPtr SSECGetAuthListPtr (PubdescPtr pdp)
4321
4322 {
4323 AuthListPtr alp = NULL;
4324 CitArtPtr cap;
4325 CitBookPtr cbp;
4326 CitGenPtr cgp;
4327 CitPatPtr cpp;
4328 CitSubPtr csp;
4329 ValNodePtr vnp;
4330
4331 if (pdp == NULL) return NULL;
4332
4333 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4334 switch (vnp->choice) {
4335 case PUB_Gen :
4336 cgp = (CitGenPtr) vnp->data.ptrvalue;
4337 if (cgp != NULL) {
4338 alp = cgp->authors;
4339 }
4340 break;
4341 case PUB_Sub :
4342 csp = (CitSubPtr) vnp->data.ptrvalue;
4343 if (csp != NULL) {
4344 alp = csp->authors;
4345 }
4346 break;
4347 case PUB_Article :
4348 cap = (CitArtPtr) vnp->data.ptrvalue;
4349 if (cap != NULL) {
4350 alp = cap->authors;
4351 }
4352 break;
4353 case PUB_Book :
4354 case PUB_Proc :
4355 case PUB_Man :
4356 cbp = (CitBookPtr) vnp->data.ptrvalue;
4357 if (cbp != NULL) {
4358 alp = cbp->authors;
4359 }
4360 break;
4361 case PUB_Patent :
4362 cpp = (CitPatPtr) vnp->data.ptrvalue;
4363 if (cpp != NULL) {
4364 alp = cpp->authors;
4365 }
4366 break;
4367 default :
4368 break;
4369 }
4370
4371 if (alp != NULL) return alp;
4372 }
4373
4374 return NULL;
4375 }
4376
JustMuid(ValNodePtr ppr)4377 static Boolean JustMuid (ValNodePtr ppr)
4378
4379 {
4380 ValNodePtr vnp;
4381
4382 if (ppr == NULL) return FALSE;
4383 if (ppr->choice == PUB_Muid) return TRUE;
4384 if (ppr->choice == PUB_Equiv) {
4385 for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4386 if (JustMuid (vnp)) return TRUE;
4387 }
4388 }
4389 return FALSE;
4390 }
4391
4392 //LCOV_EXCL_START
4393 // this accomplishes nothing useful
FixZeroMuid(ValNodePtr ppr,Int4 muid)4394 static void FixZeroMuid (ValNodePtr ppr, Int4 muid)
4395
4396 {
4397 ValNodePtr vnp;
4398
4399 if (ppr == NULL) return;
4400 if (ppr->choice == PUB_Muid) {
4401 ppr->data.intvalue = muid;
4402 }
4403 if (ppr->choice == PUB_Equiv) {
4404 for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4405 FixZeroMuid (vnp, muid);
4406 }
4407 }
4408 }
4409
RepairBadBackbonePub(PubdescPtr pdp,Int4 muid)4410 static void RepairBadBackbonePub (PubdescPtr pdp, Int4 muid)
4411
4412 {
4413 ValNodePtr vnp;
4414
4415 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4416 if (! JustMuid (vnp)) return;
4417 }
4418 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4419 FixZeroMuid (vnp, muid);
4420 }
4421 }
4422 //LCOV_EXCL_STOP
4423
RemoveZeroMuids(ValNodePtr ppr,ValNodePtr PNTR prev)4424 static void RemoveZeroMuids (ValNodePtr ppr, ValNodePtr PNTR prev)
4425
4426 {
4427 ValNodePtr next;
4428
4429 /* if only muid 0, cannot leave empty pdp->pub */
4430
4431 if (ppr == NULL || ppr->next == NULL) return;
4432
4433 while (ppr != NULL) {
4434 next = ppr->next;
4435 if (ppr->choice == PUB_Muid && ppr->data.intvalue == 0) {
4436 *prev = ppr->next;
4437 ppr->next = NULL;
4438 ValNodeFree (ppr);
4439 } else {
4440 prev = (ValNodePtr PNTR) &(ppr->next);
4441 }
4442 ppr = next;
4443 }
4444 }
4445
IsPatent(PubdescPtr pdp)4446 static Boolean IsPatent (PubdescPtr pdp)
4447
4448 {
4449 ValNodePtr vnp;
4450
4451 if (pdp == NULL) return FALSE;
4452 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4453 if (vnp->choice == PUB_Patent) return TRUE;
4454 }
4455 return FALSE;
4456 }
4457
IsPubContentBadEx(PubdescPtr pdp,Boolean strict)4458 static Boolean IsPubContentBadEx (PubdescPtr pdp, Boolean strict)
4459 {
4460 AuthListPtr alp = NULL;
4461 AuthorPtr ap;
4462 CitArtPtr cap;
4463 CitJourPtr cjp;
4464 ImprintPtr imp;
4465 ValNodePtr names;
4466 NameStdPtr nsp;
4467 PersonIdPtr pid;
4468 CharPtr title = NULL;
4469 ValNodePtr ttl, vnp;
4470
4471 if (pdp == NULL) return FALSE;
4472 /* keep anything with a figure - backbone entry */
4473
4474 if (! StringHasNoText (pdp->fig)) return FALSE;
4475
4476 /* look for at least one author name */
4477
4478 alp = SSECGetAuthListPtr (pdp);
4479 if (alp == NULL && strict) return TRUE;
4480 if (IsPatent (pdp)) {
4481 /* patents can get away with no authors */
4482 } else if (alp == NULL) {
4483 /* if accession or GI assigned, can have no authors */
4484 } else if (alp->choice == 1) {
4485 names = alp->names;
4486 if (names == NULL) return TRUE;
4487 ap = (AuthorPtr) names->data.ptrvalue;
4488 if (ap == NULL) return TRUE;
4489 pid = ap->name;
4490 if (pid == NULL) return TRUE;
4491 if (pid->choice == 2) {
4492 nsp = (NameStdPtr) pid->data;
4493 if (nsp == NULL) return TRUE;
4494 if (StringHasNoText (nsp->names [0])) return TRUE;
4495 } else if (pid->choice == 3 || pid->choice == 4) {
4496 if (StringHasNoText ((CharPtr) pid->data)) return TRUE;
4497 }
4498 } else if (alp->choice == 2 || alp->choice == 3) {
4499 names = alp->names;
4500 if (names == NULL) return TRUE;
4501 if (StringHasNoText ((CharPtr) names->data.ptrvalue)) return TRUE;
4502 }
4503
4504 /* look for CitArt journal */
4505
4506 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4507 switch (vnp->choice) {
4508 case PUB_Article :
4509 cap = (CitArtPtr) vnp->data.ptrvalue;
4510 if (cap != NULL) {
4511 switch (cap->from) {
4512 case 1 :
4513 cjp = (CitJourPtr) cap->fromptr;
4514 if (cjp == NULL) return TRUE;
4515 if (cjp->title == NULL) return TRUE;
4516 for (ttl = cjp->title; ttl != NULL; ttl = ttl->next) {
4517 if (! StringHasNoText ((CharPtr) ttl->data.ptrvalue)) {
4518 title = (CharPtr) ttl->data.ptrvalue;
4519 }
4520 }
4521 if (title == NULL) return TRUE;
4522 imp = cjp->imp;
4523 if (imp == NULL) return TRUE;
4524 break;
4525 default :
4526 break;
4527 }
4528 }
4529 break;
4530 default :
4531 break;
4532 }
4533 }
4534
4535 return FALSE;
4536 }
4537
4538 //LCOV_EXCL_START
4539 //Not part of cleanup
IsPubContentBad(PubdescPtr pdp)4540 extern Boolean IsPubContentBad(PubdescPtr pdp)
4541
4542 {
4543 return IsPubContentBadEx (pdp, TRUE);
4544 }
4545 //LCOV_EXCL_STOP
4546
IsPubBad(PubdescPtr pdp,Pointer userdata,Boolean strict)4547 static Boolean IsPubBad (PubdescPtr pdp, Pointer userdata, Boolean strict)
4548
4549 {
4550 Int4 muid;
4551 Int4Ptr muidp;
4552 ValNodePtr vnp;
4553
4554 if (pdp == NULL) return FALSE;
4555
4556 /* single pmid not cleared here, left for CheckMinPub with RefSeq protein exception */
4557
4558 vnp = pdp->pub;
4559 if (vnp != NULL && vnp->next == NULL && vnp->choice == PUB_PMid) {
4560 /* but first check for 0 pmid, mark for removal */
4561 if (vnp->data.intvalue == 0) return TRUE;
4562 return FALSE;
4563 }
4564
4565 /* if single real muid, repair 0 muid backbone references */
4566
4567 muidp = (Int4Ptr) userdata;
4568 if (muidp != NULL) {
4569 muid = *muidp;
4570 if (muid != 0 && muid != -1) {
4571 RepairBadBackbonePub (pdp, muid);
4572 }
4573 }
4574
4575 /* remove remaining 0 muids */
4576
4577 RemoveZeroMuids (pdp->pub, &(pdp->pub));
4578
4579 return IsPubContentBadEx (pdp, strict);
4580 }
4581
RemoveBadPubFeat(SeqFeatPtr sfp,Pointer userdata)4582 static void RemoveBadPubFeat (SeqFeatPtr sfp, Pointer userdata)
4583
4584 {
4585 PubdescPtr pdp;
4586
4587 if (sfp->data.choice != SEQFEAT_PUB) return;
4588 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
4589 if (IsPubBad (pdp, userdata, FALSE)) {
4590 sfp->idx.deleteme = TRUE;
4591 }
4592 }
4593
RemoveBadPubDescr(SeqDescrPtr sdp,Pointer userdata)4594 static void RemoveBadPubDescr (SeqDescrPtr sdp, Pointer userdata)
4595
4596 {
4597 ObjValNodePtr ovp;
4598 PubdescPtr pdp;
4599
4600 if (sdp->choice != Seq_descr_pub) return;
4601 pdp = (PubdescPtr) sdp->data.ptrvalue;
4602 if (IsPubBad (pdp, userdata, FALSE)) {
4603 if (sdp->extended != 0) {
4604 ovp = (ObjValNodePtr) sdp;
4605 ovp->idx.deleteme = TRUE;
4606 }
4607 }
4608 }
4609
RemoveBadPubFeatStrict(SeqFeatPtr sfp,Pointer userdata)4610 static void RemoveBadPubFeatStrict (SeqFeatPtr sfp, Pointer userdata)
4611
4612 {
4613 PubdescPtr pdp;
4614
4615 if (sfp->data.choice != SEQFEAT_PUB) return;
4616 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
4617 if (IsPubBad (pdp, userdata, TRUE)) {
4618 sfp->idx.deleteme = TRUE;
4619 }
4620 }
4621
RemoveBadPubDescrStrict(SeqDescrPtr sdp,Pointer userdata)4622 static void RemoveBadPubDescrStrict (SeqDescrPtr sdp, Pointer userdata)
4623
4624 {
4625 ObjValNodePtr ovp;
4626 PubdescPtr pdp;
4627
4628 if (sdp->choice != Seq_descr_pub) return;
4629 pdp = (PubdescPtr) sdp->data.ptrvalue;
4630 if (IsPubBad (pdp, userdata, TRUE)) {
4631 if (sdp->extended != 0) {
4632 ovp = (ObjValNodePtr) sdp;
4633 ovp->idx.deleteme = TRUE;
4634 }
4635 }
4636 }
4637
RemoveEmptyUserObjects(SeqDescrPtr sdp,Pointer userdata)4638 static void RemoveEmptyUserObjects (SeqDescrPtr sdp, Pointer userdata)
4639
4640 {
4641 ObjectIdPtr oip;
4642 ObjValNodePtr ovp;
4643 UserObjectPtr uop;
4644
4645 if (sdp->choice != Seq_descr_user) return;
4646 uop = (UserObjectPtr) sdp->data.ptrvalue;
4647
4648 if (uop != NULL) {
4649 oip = uop->type;
4650 if (oip != NULL) {
4651 if (StringICmp (oip->str, "NcbiAutofix") == 0) return;
4652 if (StringICmp (oip->str, "Unverified") == 0) return;
4653 if (uop->data != NULL) return;
4654 }
4655 }
4656
4657 if (sdp->extended == 0) return;
4658 ovp = (ObjValNodePtr) sdp;
4659 ovp->idx.deleteme = TRUE;
4660 }
4661
LookForUniqMuidProc(ValNodePtr ppr,Int4Ptr muidp)4662 static void LookForUniqMuidProc (ValNodePtr ppr, Int4Ptr muidp)
4663
4664 {
4665 Int4 muid;
4666 ValNodePtr vnp;
4667
4668 if (ppr == NULL || muidp == NULL) return;
4669 if (*muidp == -1) return;
4670 switch (ppr->choice) {
4671 case PUB_Muid :
4672 muid = ppr->data.intvalue;
4673 if (muid == 0) {
4674 } else if (*muidp == 0) {
4675 *muidp = muid;
4676 } else if (*muidp != muid) {
4677 *muidp = -1;
4678 }
4679 break;
4680 case PUB_Equiv :
4681 for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
4682 LookForUniqMuidProc (vnp, muidp);
4683 }
4684 break;
4685 default :
4686 break;
4687 }
4688 }
4689
LookForUniqueMuid(PubdescPtr pdp,Pointer userdata)4690 static void LookForUniqueMuid (PubdescPtr pdp, Pointer userdata)
4691
4692 {
4693 Int4Ptr muidp;
4694 ValNodePtr vnp;
4695
4696 muidp = (Int4Ptr) userdata;
4697 if (*muidp == -1) return;
4698 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4699 LookForUniqMuidProc (vnp, muidp);
4700 }
4701 }
4702
EmptyExceptSerialNumber(CitGenPtr cgp)4703 static Boolean EmptyExceptSerialNumber (CitGenPtr cgp)
4704 {
4705 if (cgp == NULL) return FALSE;
4706
4707 if (!StringHasNoText (cgp->cit)
4708 || cgp->authors != NULL
4709 || cgp->muid != 0
4710 || cgp->journal != NULL
4711 || !StringHasNoText (cgp->volume)
4712 || !StringHasNoText (cgp->issue)
4713 || !StringHasNoText (cgp->pages)
4714 || cgp->date != NULL
4715 || !StringHasNoText (cgp->title)
4716 || cgp->pmid != 0) {
4717 return FALSE;
4718 } else {
4719 return TRUE;
4720 }
4721 }
4722
ArePubsMergeableForFig(PubdescPtr fig,PubdescPtr nofig)4723 static Boolean ArePubsMergeableForFig(PubdescPtr fig, PubdescPtr nofig)
4724 {
4725 CitGenPtr cgp_fig, cgp_nofig;
4726
4727 if (fig == NULL || nofig == NULL) return FALSE;
4728
4729 /* name */
4730 if (!StringHasNoText (nofig->name) && !StringHasNoText (fig->name)
4731 && !StringCmp (nofig->name, fig->name)) {
4732 return FALSE;
4733 }
4734 /* fig */
4735 if (!StringHasNoText (nofig->fig) && !StringHasNoText (fig->fig)
4736 && !StringCmp (nofig->fig, fig->fig)) {
4737 return FALSE;
4738 }
4739 /* num */
4740 if (NumberingMatch(nofig->num, fig->num) != 0) {
4741 return FALSE;
4742 }
4743
4744 if ((nofig->numexc && !fig->numexc)
4745 || (!nofig->numexc && fig->numexc)) {
4746 return FALSE;
4747 }
4748 if ((nofig->poly_a && !fig->poly_a)
4749 || (!nofig->poly_a && fig->poly_a)) {
4750 return FALSE;
4751 }
4752
4753 /* maploc */
4754 if (!StringHasNoText (nofig->maploc) && !StringHasNoText (fig->maploc)
4755 && !StringCmp (nofig->maploc, fig->maploc)) {
4756 return FALSE;
4757 }
4758
4759 /* seq-raw */
4760 if (!StringHasNoText (nofig->seq_raw) && !StringHasNoText (fig->seq_raw)
4761 && !StringCmp (nofig->seq_raw, fig->seq_raw)) {
4762 return FALSE;
4763 }
4764
4765 /* align-group */
4766 if (nofig->align_group > 0 && fig->align_group > 0
4767 && nofig->align_group != fig->align_group) {
4768 return FALSE;
4769 }
4770
4771 /* comment */
4772 if (!StringHasNoText (nofig->comment) && !StringHasNoText (fig->comment)
4773 && !StringCmp (nofig->comment, fig->comment)) {
4774 return FALSE;
4775 }
4776
4777 /* reftype */
4778 if (nofig->reftype > 0 && fig->reftype > 0
4779 && nofig->reftype != fig->reftype) {
4780 return FALSE;
4781 }
4782
4783 if (nofig->pub != NULL && fig->pub != NULL) {
4784 if (nofig->pub->next != NULL
4785 || fig->pub->next != NULL
4786 || nofig->pub->choice != PUB_Gen
4787 || fig->pub->choice != PUB_Gen
4788 || nofig->pub->data.ptrvalue == NULL
4789 || fig->pub->data.ptrvalue == NULL) {
4790 return FALSE;
4791 }
4792 cgp_fig = fig->pub->data.ptrvalue;
4793 cgp_nofig = nofig->pub->data.ptrvalue;
4794 if (!EmptyExceptSerialNumber (cgp_fig)
4795 || !EmptyExceptSerialNumber (cgp_nofig)
4796 || !cgp_fig->serial_number != cgp_nofig->serial_number) {
4797 return FALSE;
4798 }
4799 }
4800
4801 return TRUE;
4802 }
4803
4804 /* rescue pub with just fig that is in same chain as real pub by merging data */
4805
MergePubFigInChain(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)4806 static void MergePubFigInChain (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
4807
4808 {
4809 BioseqPtr bsp;
4810 BioseqSetPtr bssp;
4811 SeqDescrPtr descr = NULL;
4812 PubdescPtr hasfig = NULL;
4813 Int4Ptr muidp;
4814 PubdescPtr nofig = NULL;
4815 ObjValNodePtr ovp = NULL;
4816 PubdescPtr pdp;
4817 SeqDescrPtr sdp_fig, sdp_nofig;
4818
4819 muidp = (Int4Ptr) mydata;
4820 if (*muidp == -1) return;
4821
4822 if (IS_Bioseq (sep)) {
4823 bsp = (BioseqPtr) sep->data.ptrvalue;
4824 if (bsp == NULL) return;
4825 descr = bsp->descr;
4826 } else if (IS_Bioseq_set (sep)) {
4827 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4828 if (bssp == NULL) return;
4829 descr = bssp->descr;
4830 } else return;
4831
4832 /* attempt to combine any descriptor with a fig with
4833 * a different publication without a fig, but only if
4834 * they do not have conflicting data.
4835 */
4836 for (sdp_fig = descr; sdp_fig != NULL; sdp_fig = sdp_fig->next) {
4837 if (sdp_fig->choice != Seq_descr_pub
4838 || sdp_fig->extended == 0) continue;
4839 pdp = (PubdescPtr) sdp_fig->data.ptrvalue;
4840 if (pdp == NULL) continue;
4841 if (! StringHasNoText (pdp->fig)) {
4842 hasfig = pdp;
4843 ovp = (ObjValNodePtr) sdp_fig;
4844
4845 for (sdp_nofig = descr; sdp_nofig != NULL; sdp_nofig = sdp_nofig->next) {
4846 if (sdp_nofig->choice != Seq_descr_pub) continue;
4847 pdp = (PubdescPtr) sdp_nofig->data.ptrvalue;
4848 if (pdp != NULL && StringHasNoText (pdp->fig)
4849 && ArePubsMergeableForFig(hasfig, pdp)) {
4850 nofig = pdp;
4851 if (StringHasNoText (nofig->name)) {
4852 nofig->name = MemFree (nofig->name);
4853 nofig->name = hasfig->name;
4854 hasfig->name = NULL;
4855 }
4856 if (StringHasNoText (nofig->fig)) {
4857 nofig->fig = MemFree (nofig->fig);
4858 nofig->fig = hasfig->fig;
4859 hasfig->fig = NULL;
4860 }
4861 if (nofig->num == NULL) {
4862 nofig->num = hasfig->num;
4863 hasfig->num = NULL;
4864 }
4865 if (hasfig->numexc) {
4866 nofig->numexc = hasfig->numexc;
4867 }
4868 if (hasfig->poly_a) {
4869 nofig->poly_a = hasfig->poly_a;
4870 }
4871 if (hasfig->align_group > 0) {
4872 nofig->align_group = hasfig->align_group;
4873 }
4874 if (StringHasNoText (nofig->maploc)) {
4875 nofig->maploc = MemFree (nofig->maploc);
4876 nofig->maploc = hasfig->maploc;
4877 hasfig->maploc = NULL;
4878 }
4879 if (StringHasNoText (nofig->seq_raw)) {
4880 nofig->seq_raw = MemFree (nofig->seq_raw);
4881 nofig->seq_raw = hasfig->seq_raw;
4882 hasfig->seq_raw = NULL;
4883 }
4884 if (StringHasNoText (nofig->comment)) {
4885 nofig->comment = MemFree (nofig->comment);
4886 nofig->comment = hasfig->comment;
4887 hasfig->comment = NULL;
4888 }
4889 if (hasfig->reftype > 0) {
4890 nofig->reftype = hasfig->reftype;
4891 }
4892
4893 if (nofig->pub == NULL) {
4894 nofig->pub = hasfig->pub;
4895 }
4896
4897 ovp->idx.deleteme = TRUE;
4898 }
4899 }
4900 }
4901 }
4902 }
4903
CorrectSfpExceptText(SeqFeatPtr sfp,Pointer userdata)4904 static void CorrectSfpExceptText (SeqFeatPtr sfp, Pointer userdata)
4905
4906 {
4907 if (sfp == NULL || StringHasNoText (sfp->except_text)) return;
4908 if (StringICmp (sfp->except_text, "reasons cited in publication") == 0) {
4909 sfp->except_text = MemFree (sfp->except_text);
4910 sfp->except_text = StringSave ("reasons given in citation");
4911 }
4912 }
4913
IsCodonCorrect(tRNAPtr trp,Uint1 taa)4914 static Boolean IsCodonCorrect (tRNAPtr trp, Uint1 taa)
4915
4916 {
4917 Uint1 aa;
4918 Uint1 from;
4919 SeqMapTablePtr smtp;
4920
4921 if (trp == NULL) return TRUE;
4922 aa = 0;
4923 if (trp->aatype == 2) {
4924 aa = trp->aa;
4925 } else {
4926 from = 0;
4927 switch (trp->aatype) {
4928 case 0:
4929 from = 0;
4930 break;
4931 case 1:
4932 from = Seq_code_iupacaa;
4933 break;
4934 case 2:
4935 from = Seq_code_ncbieaa;
4936 break;
4937 case 3:
4938 from = Seq_code_ncbi8aa;
4939 break;
4940 case 4:
4941 from = Seq_code_ncbistdaa;
4942 break;
4943 default:
4944 break;
4945 }
4946 smtp = SeqMapTableFind (Seq_code_ncbieaa, from);
4947 if (smtp != NULL) {
4948 aa = SeqMapTableConvert (smtp, trp->aa);
4949 }
4950 }
4951 if (aa > 0 && aa != 255) {
4952 if (taa != aa && aa != 'U') {
4953 return FALSE;
4954 }
4955 }
4956 return TRUE;
4957 }
4958
4959 static Uint1 codon_xref [4] = { /* mapping from NCBI2na to codon codes */
4960 2, /* A */
4961 1, /* C */
4962 3, /* G */
4963 0 }; /* T */
4964
CorrectTrnaCodons(SeqFeatPtr sfp,Pointer userdata)4965 static void CorrectTrnaCodons (SeqFeatPtr sfp, Pointer userdata)
4966
4967 {
4968 Uint1 alt [4];
4969 Char ch;
4970 CharPtr codes;
4971 Uint1 codon [4];
4972 Int2 i, j, k;
4973 Uint1 index;
4974 RnaRefPtr rrp;
4975 Uint1 residue;
4976 SeqMapTablePtr smtp;
4977 Uint1 taa;
4978 tRNAPtr trp;
4979
4980 if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return;
4981 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4982 if (rrp == NULL || rrp->type != 3 || rrp->ext.choice != 2) return;
4983 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
4984 if (trp == NULL) return;
4985 if (trp->codon [0] == 255 || trp->codon [1] != 255 || trp->codon [0] >= 64) return;
4986 codes = (CharPtr) userdata;
4987 if (codes == NULL) return;
4988
4989 /* see if existing codon is correct in current genetic code */
4990
4991 index = trp->codon [0];
4992 taa = codes [index];
4993 if (IsCodonCorrect (trp, taa)) return;
4994
4995 /* convert codon to string */
4996
4997 smtp = SeqMapTableFind (Seq_code_iupacna, Seq_code_ncbi2na);
4998 if (smtp == NULL) return;
4999 for (i = 0, j = 16; i < 3; i++, j /= 4) {
5000 residue = (Uint1) ((Int2) index / j);
5001 index -= (Uint1) (residue * j);
5002 for (k = 0; k < 4; k++) {
5003 if (codon_xref[k] == residue) {
5004 residue = (Uint1) k;
5005 break;
5006 }
5007 }
5008 residue = SeqMapTableConvert (smtp, residue);
5009 codon [i] = residue;
5010 }
5011 codon [3] = 0;
5012
5013 /* reverse complement */
5014
5015 for (i = 0; i < 3; i++) {
5016 ch = (Char) codon [2 - i];
5017 if (ch == 'A') {
5018 ch = 'T';
5019 } else if (ch == 'C') {
5020 ch = 'G';
5021 } else if (ch == 'G') {
5022 ch = 'C';
5023 } else if (ch == 'T') {
5024 ch = 'A';
5025 }
5026 alt [i] = ch;
5027 }
5028 alt [3] = 0;
5029
5030 /* see if revcomp codon is correct in current genetic code */
5031
5032 index = IndexForCodon (alt, Seq_code_iupacna);
5033 taa = codes [index];
5034 if (IsCodonCorrect (trp, taa)) {
5035 trp->codon [0] = index;
5036 return;
5037 }
5038
5039 /* just complement */
5040
5041 for (i = 0; i < 3; i++) {
5042 ch = (Char) codon [i];
5043 if (ch == 'A') {
5044 ch = 'T';
5045 } else if (ch == 'C') {
5046 ch = 'G';
5047 } else if (ch == 'G') {
5048 ch = 'C';
5049 } else if (ch == 'T') {
5050 ch = 'A';
5051 }
5052 alt [i] = ch;
5053 }
5054 alt [3] = 0;
5055
5056 /* see if complement codon is correct in current genetic code */
5057
5058 index = IndexForCodon (alt, Seq_code_iupacna);
5059 taa = codes [index];
5060 if (IsCodonCorrect (trp, taa)) {
5061 trp->codon [0] = index;
5062 return;
5063 }
5064
5065 /* just reverse */
5066
5067 for (i = 0; i < 3; i++) {
5068 ch = (Char) codon [2 - i];
5069 alt [i] = ch;
5070 }
5071 alt [3] = 0;
5072
5073 /* see if reverse codon is correct in current genetic code */
5074
5075 index = IndexForCodon (alt, Seq_code_iupacna);
5076 taa = codes [index];
5077 if (IsCodonCorrect (trp, taa)) {
5078 trp->codon [0] = index;
5079 return;
5080 }
5081 }
5082
FindSingleBioSource(BioSourcePtr biop,Pointer userdata)5083 static void FindSingleBioSource (BioSourcePtr biop, Pointer userdata)
5084
5085 {
5086 BioSourcePtr PNTR biopp;
5087
5088 biopp = (BioSourcePtr PNTR) userdata;
5089 if (biop == NULL || biopp == NULL) return;
5090 *biopp = biop;
5091 }
5092
CleanupOldName(BioSourcePtr biop,Pointer userdata)5093 static void CleanupOldName (BioSourcePtr biop, Pointer userdata)
5094
5095 {
5096 OrgModPtr next, omp;
5097 OrgNamePtr onp;
5098 OrgRefPtr orp;
5099 OrgModPtr PNTR prev;
5100
5101 if (biop == NULL) return;
5102 orp = biop->org;
5103 if (orp == NULL || StringHasNoText (orp->taxname)) return;
5104 onp = orp->orgname;
5105 if (onp == NULL || onp->mod == NULL) return;
5106 prev = &(onp->mod);
5107 omp = *prev;
5108 while (omp != NULL) {
5109 next = omp->next;
5110 if (omp->subtype == ORGMOD_old_name && StringCmp (orp->taxname, omp->subname) == 0 && StringHasNoText (omp->attrib)) {
5111 *prev = omp->next;
5112 omp->next = NULL;
5113 OrgModFree (omp);
5114 } else {
5115 prev = &(omp->next);
5116 }
5117 omp = next;
5118 }
5119 }
5120
CleanupOrgModNote(BioSourcePtr biop,Pointer userdata)5121 static void CleanupOrgModNote (BioSourcePtr biop, Pointer userdata)
5122
5123 {
5124 CharPtr gbacr = NULL, gbana = NULL, gbsyn = NULL, taxname = NULL;
5125 OrgModPtr next, omp;
5126 OrgNamePtr onp;
5127 OrgRefPtr orp;
5128 OrgModPtr PNTR prev;
5129
5130 if (biop == NULL) return;
5131 orp = biop->org;
5132 if (orp == NULL) return;
5133 taxname = orp->taxname;
5134 onp = orp->orgname;
5135 if (onp == NULL || onp->mod == NULL) return;
5136 for (omp = onp->mod; omp != NULL; omp = omp->next) {
5137 if (omp->subtype == ORGMOD_gb_acronym) {
5138 gbacr = omp->subname;
5139 } else if (omp->subtype == ORGMOD_gb_anamorph) {
5140 gbana = omp->subname;
5141 } else if (omp->subtype == ORGMOD_gb_synonym) {
5142 gbsyn = omp->subname;
5143 }
5144 }
5145 prev = &(onp->mod);
5146 omp = *prev;
5147 while (omp != NULL) {
5148 next = omp->next;
5149 if (omp->subtype == ORGMOD_other &&
5150 StringDoesHaveText (omp->subname) &&
5151 (StringCmp (taxname, omp->subname) == 0 ||
5152 StringCmp (gbacr, omp->subname) == 0 ||
5153 StringCmp (gbana, omp->subname) == 0 ||
5154 StringCmp (gbsyn, omp->subname) == 0)) {
5155 *prev = omp->next;
5156 omp->next = NULL;
5157 OrgModFree (omp);
5158 } else {
5159 prev = &(omp->next);
5160 }
5161 omp = next;
5162 }
5163 }
5164
GetUnambigOverlappingGene(BioseqPtr bsp,SeqLocPtr slp)5165 static SeqFeatPtr GetUnambigOverlappingGene (BioseqPtr bsp, SeqLocPtr slp)
5166
5167 {
5168 SeqMgrFeatContext context;
5169 SeqFeatPtr gene;
5170 Int2 i;
5171 Int4Ptr ivals;
5172 Int2 j;
5173 SeqFeatPtr next;
5174 Int2 numivals;
5175
5176 gene = SeqMgrGetOverlappingGene (slp, &context);
5177 if (gene == NULL) return NULL;
5178 numivals = context.numivals;
5179 ivals = context.ivals;
5180 next = SeqMgrGetNextFeature (bsp, gene, SEQFEAT_GENE, 0, &context);
5181 if (next == NULL) return gene;
5182 if (numivals != context.numivals) return gene;
5183 for (i = 0, j = 0; i < numivals; i++) {
5184 if (ivals [j] != context.ivals [j]) return gene;
5185 j++;
5186 if (ivals [j] != context.ivals [j]) return gene;
5187 j++;
5188 }
5189 return NULL;
5190 }
5191
5192 //LCOV_EXCL_START
5193 //This is never called, because RemoveUnneededGeneXrefs is
5194 //only called when isEmblDdbj is false
LookForPeptides(SeqFeatPtr sfp,Pointer userdata)5195 static void LookForPeptides (SeqFeatPtr sfp, Pointer userdata)
5196
5197 {
5198 BoolPtr hasPepP;
5199 ProtRefPtr prp;
5200
5201 if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
5202 hasPepP = (BoolPtr) userdata;
5203 if (hasPepP == NULL) return;
5204
5205 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5206 if (prp == NULL) return;
5207 if (prp->processed > 0) {
5208 *hasPepP = TRUE;
5209 }
5210 }
5211 //LCOV_EXCL_STOP
5212
RemoveUnneededGeneXrefs(BioseqPtr bsp,Pointer userdata)5213 static void RemoveUnneededGeneXrefs (BioseqPtr bsp, Pointer userdata)
5214
5215 {
5216 BoolPtr bp;
5217 SeqFeatXrefPtr curr;
5218 SeqMgrFeatContext fcontext;
5219 GeneRefPtr grp;
5220 GeneRefPtr grpx;
5221 Boolean hasPeptide;
5222 Boolean isEmblDdbj = FALSE;
5223 SeqFeatXrefPtr PNTR last;
5224 SeqFeatXrefPtr next;
5225 BioseqPtr pbsp;
5226 Boolean redundantgenexref;
5227 SeqFeatPtr sfp;
5228 SeqFeatPtr sfpx;
5229 CharPtr syn1;
5230 CharPtr syn2;
5231
5232 bp = (BoolPtr) userdata;
5233 if (bp != NULL) {
5234 isEmblDdbj = *bp;
5235 }
5236 sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext);
5237 while (sfp != NULL) {
5238 if (sfp->data.choice != SEQFEAT_GENE) {
5239 grp = SeqMgrGetGeneXref (sfp);
5240 if (grp != NULL && (! SeqMgrGeneIsSuppressed (grp))) {
5241 sfpx = GetUnambigOverlappingGene (bsp, sfp->location);
5242 if (sfpx != NULL && sfpx->data.choice == SEQFEAT_GENE) {
5243 grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
5244 if (grpx != NULL) {
5245 redundantgenexref = FALSE;
5246 if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
5247 if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
5248 redundantgenexref = TRUE;
5249 }
5250 } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
5251 if (StringICmp (grp->locus, grpx->locus) == 0) {
5252 redundantgenexref = TRUE;
5253 }
5254 } else if (grp->syn != NULL && grpx->syn != NULL) {
5255 syn1 = (CharPtr) grp->syn->data.ptrvalue;
5256 syn2 = (CharPtr) grpx->syn->data.ptrvalue;
5257 if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
5258 if (StringICmp (syn1, syn2) == 0) {
5259 redundantgenexref = TRUE;
5260 }
5261 }
5262 }
5263 if (redundantgenexref && isEmblDdbj && sfp->data.choice == SEQFEAT_CDREGION) {
5264 //LCOV_EXCL_START
5265 //This is never called, because RemoveUnneededGeneXrefs is
5266 //only called when isEmblDdbj is false
5267 hasPeptide = FALSE;
5268 pbsp = BioseqFindFromSeqLoc (sfp->product);
5269 if (pbsp != NULL) {
5270 VisitFeaturesOnBsp (pbsp, (Pointer) &hasPeptide, LookForPeptides);
5271 if (hasPeptide) {
5272 redundantgenexref = FALSE;
5273 }
5274 }
5275 //LCOV_EXCL_STOP
5276 }
5277 if (redundantgenexref) {
5278 last = (SeqFeatXrefPtr PNTR) &(sfp->xref);
5279 curr = sfp->xref;
5280 while (curr != NULL) {
5281 next = curr->next;
5282 if (curr->data.choice == SEQFEAT_GENE) {
5283 *last = next;
5284 curr->next = NULL;
5285 SeqFeatXrefFree (curr);
5286 } else {
5287 last = &(curr->next);
5288 }
5289 curr = next;
5290 }
5291 }
5292 }
5293 }
5294 }
5295 }
5296 sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext);
5297 }
5298 }
5299
StripBadTitleFromProteinProducts(BioseqPtr bsp,Pointer userdata)5300 static void StripBadTitleFromProteinProducts (BioseqPtr bsp, Pointer userdata)
5301
5302 {
5303 BioseqSetPtr bssp;
5304 CharPtr buf;
5305 size_t buflen = 1001;
5306 SeqMgrDescContext dcontext;
5307 MolInfoPtr mip;
5308 ObjValNodePtr ovp;
5309 SeqDescrPtr sdp;
5310 SeqIdPtr sip;
5311 Uint1 tech;
5312 CharPtr title;
5313 ValNodePtr vnp;
5314
5315 if (bsp == NULL) return;
5316 if (! ISA_aa (bsp->mol)) return;
5317 for (sip = bsp->id; sip != NULL; sip = sip->next) {
5318 if (sip->choice == SEQID_OTHER) return;
5319 }
5320 vnp = BioseqGetSeqDescr (bsp, Seq_descr_title, NULL);
5321 if (vnp == NULL) return;
5322
5323 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
5324 bssp = (BioseqSetPtr) bsp->idx.parentptr;
5325 while (bssp != NULL && bssp->_class != BioseqseqSet_class_nuc_prot) {
5326 if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
5327 bssp = (BioseqSetPtr) bssp->idx.parentptr;
5328 } else {
5329 bssp = NULL;
5330 }
5331 }
5332 if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
5333 title = (CharPtr) vnp->data.ptrvalue;
5334 tech = 0;
5335 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
5336 if (sdp != NULL) {
5337 mip = (MolInfoPtr) sdp->data.ptrvalue;
5338 if (mip != NULL) {
5339 tech = mip->tech;
5340 }
5341 }
5342 buf = MemNew (sizeof (Char) * (buflen + 1));
5343 if (buf != NULL && NewCreateDefLineBuf (NULL, bsp, buf, buflen, TRUE, FALSE)) {
5344 if (StringICmp (buf, title) != 0) {
5345 if (vnp->extended != 0) {
5346 ovp = (ObjValNodePtr) vnp;
5347 ovp->idx.deleteme = TRUE;
5348 }
5349 }
5350 }
5351 MemFree (buf);
5352 }
5353 }
5354 }
5355
MarkBadProtTitlesInNucProts(SeqEntryPtr sep)5356 static void MarkBadProtTitlesInNucProts (SeqEntryPtr sep)
5357
5358 {
5359 BioseqSetPtr bssp;
5360
5361 if (sep == NULL) return;
5362 if (! IS_Bioseq_set (sep)) return;
5363 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5364 if (bssp == NULL) return;
5365 if (bssp->_class == BioseqseqSet_class_genbank ||
5366 (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
5367 bssp->_class == BioseqseqSet_class_wgs_set ||
5368 bssp->_class == BioseqseqSet_class_small_genome_set) {
5369 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5370 /* StripTitleFromProtsInNucProts (sep); */
5371 MarkBadProtTitlesInNucProts (sep);
5372 }
5373 return;
5374 }
5375 if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
5376 VisitBioseqsInSep (sep, NULL, StripBadTitleFromProteinProducts);
5377 }
5378
MakeNcbiCleanupObject(SeqEntryPtr sep,Boolean gpipeMode)5379 static void MakeNcbiCleanupObject (SeqEntryPtr sep, Boolean gpipeMode)
5380
5381 {
5382 DatePtr dp;
5383 ValNodePtr sdp;
5384 UserObjectPtr uop;
5385
5386 dp = DateCurr ();
5387 if (dp == NULL) return;
5388
5389 uop = CreateNcbiCleanupUserObject ();
5390 if (uop == NULL) return;
5391
5392 if (gpipeMode) {
5393 //LCOV_EXCL_START
5394 // GPIPE doesn't use C Toolkit
5395 AddStringToNcbiCleanupUserObject (uop, "method", "GpipeSeqEntryCleanup");
5396 //LCOV_EXCL_STOP
5397 } else {
5398 AddStringToNcbiCleanupUserObject (uop, "method", "SeriousSeqEntryCleanup");
5399 }
5400 AddIntegerToNcbiCleanupUserObject (uop, "version", NCBI_CLEANUP_VERSION);
5401
5402 AddIntegerToNcbiCleanupUserObject (uop, "month", dp->data [2]);
5403 AddIntegerToNcbiCleanupUserObject (uop, "day", dp->data [3]);
5404 AddIntegerToNcbiCleanupUserObject (uop, "year", dp->data [1] + 1900);
5405
5406 DateFree (dp);
5407
5408 sdp = NewDescrOnSeqEntry (sep, Seq_descr_user);
5409 if (sdp == NULL) return;
5410 sdp->data.ptrvalue = uop;
5411 }
5412
5413 typedef struct ssecpseudo {
5414 CharPtr genepseudo;
5415 CharPtr pseudogene;
5416 Boolean inconsistent;
5417 } SsecPseudoData, PNTR SsecPseudoPtr;
5418
SsecTestPseudoProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5419 static Boolean LIBCALLBACK SsecTestPseudoProc (
5420 SeqFeatPtr sfp,
5421 SeqMgrFeatContextPtr context
5422 )
5423
5424
5425 {
5426 SsecPseudoPtr bpp;
5427 GBQualPtr gbq;
5428
5429 if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || context == NULL) return TRUE;
5430 bpp = context->userdata;
5431 if (bpp == NULL) return TRUE;
5432
5433 if (! sfp->pseudo) return TRUE;
5434
5435 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
5436 if (StringCmp (gbq->qual, "pseudogene") != 0) continue;
5437 if (bpp->pseudogene == NULL) {
5438 bpp->pseudogene = gbq->val;
5439 } else if (StringCmp (gbq->val, bpp->pseudogene) != 0) {
5440 bpp->inconsistent = TRUE;
5441 }
5442 }
5443
5444 return TRUE;
5445 }
5446
SsecSetPseudoProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5447 static Boolean LIBCALLBACK SsecSetPseudoProc (
5448 SeqFeatPtr sfp,
5449 SeqMgrFeatContextPtr context
5450 )
5451
5452
5453 {
5454 SsecPseudoPtr bpp;
5455 GBQualPtr gbq;
5456
5457 if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE || context == NULL) return TRUE;
5458 bpp = context->userdata;
5459 if (bpp == NULL) return TRUE;
5460
5461 if (! sfp->pseudo) return TRUE;
5462
5463 gbq = GBQualNew ();
5464 if (gbq == NULL) return TRUE;
5465
5466 gbq->qual = StringSave ("pseudogene");
5467 gbq->val = StringSave (bpp->pseudogene);
5468
5469 gbq->next = sfp->qual;
5470 sfp->qual = gbq;
5471
5472 return TRUE;
5473 }
5474
SynchronizePseudogenesProc(SeqFeatPtr sfp,Pointer userdata)5475 static void SynchronizePseudogenesProc (
5476 SeqFeatPtr sfp,
5477 Pointer userdata
5478 )
5479
5480 {
5481 SsecPseudoData bpd;
5482 BioseqPtr bsp;
5483 GBQualPtr gbq;
5484
5485 if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
5486 if (! sfp->pseudo) return;
5487
5488 MemSet ((Pointer) &bpd, 0, sizeof (SsecPseudoData));
5489
5490 /* get pseudogene value from gene feature */
5491
5492 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
5493 if (StringCmp (gbq->qual, "pseudogene") != 0) continue;
5494 if (bpd.genepseudo == NULL) {
5495 bpd.genepseudo = gbq->val;
5496 } else if (StringCmp (gbq->val, bpd.genepseudo) != 0) {
5497 bpd.inconsistent = TRUE;
5498 }
5499 }
5500
5501 if (bpd.inconsistent) return;
5502
5503 bsp = BioseqFindFromSeqLoc (sfp->location);
5504 if (bsp == NULL) return;
5505
5506 /* check pseudogene values from underlying features */
5507
5508 SeqMgrExploreFeatures (bsp, (Pointer) &bpd, SsecTestPseudoProc, sfp->location, NULL, NULL);
5509
5510 if (bpd.inconsistent) return;
5511
5512 if (bpd.genepseudo == NULL && bpd.pseudogene == NULL) return;
5513
5514 if (bpd.pseudogene == NULL) {
5515 bpd.pseudogene = bpd.genepseudo;
5516 } else if (bpd.genepseudo != NULL && StringCmp (bpd.pseudogene, bpd.genepseudo) != 0) {
5517 return;
5518 }
5519
5520 gbq = GBQualNew ();
5521 if (gbq == NULL) return;
5522
5523 gbq->qual = StringSave ("pseudogene");
5524 gbq->val = StringSave (bpd.pseudogene);
5525
5526 gbq->next = sfp->qual;
5527 sfp->qual = gbq;
5528
5529 SeqMgrExploreFeatures (bsp, (Pointer) &bpd, SsecSetPseudoProc, sfp->location, NULL, NULL);
5530 }
5531
5532 typedef struct genexrefskew {
5533 Int4 gene_locus;
5534 Int4 gene_locus_tag;
5535 Int4 xref_locus;
5536 Int4 xref_locus_tag;
5537 } GeneSkew, PNTR GeneSkewPtr;
5538
FindGeneXrefMismatch(SeqFeatPtr sfp,Pointer userdata)5539 static void FindGeneXrefMismatch (
5540 SeqFeatPtr sfp,
5541 Pointer userdata
5542 )
5543
5544 {
5545 GeneRefPtr grp;
5546 GeneSkewPtr gsp;
5547 SeqFeatXrefPtr xref;
5548
5549 if (sfp == NULL) return;
5550 gsp = (GeneSkewPtr) userdata;
5551 if (gsp == NULL) return;
5552
5553 if (sfp->data.choice == SEQFEAT_GENE) {
5554 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5555 if (grp != NULL) {
5556 if (StringDoesHaveText (grp->locus)) {
5557 (gsp->gene_locus)++;
5558 }
5559 if (StringDoesHaveText (grp->locus_tag)) {
5560 (gsp->gene_locus_tag)++;
5561 }
5562 }
5563 }
5564
5565 for (xref = sfp->xref; xref != NULL; xref = xref->next) {
5566 if (xref->data.choice != SEQFEAT_GENE) continue;
5567 grp = (GeneRefPtr) xref->data.value.ptrvalue;
5568 if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) continue;
5569 if (StringDoesHaveText (grp->locus)) {
5570 (gsp->xref_locus)++;
5571 }
5572 if (StringDoesHaveText (grp->locus_tag)) {
5573 (gsp->xref_locus_tag)++;
5574 }
5575 }
5576 }
5577
ForceGeneXrefToLocus(SeqFeatPtr sfp,Pointer userdata)5578 static void ForceGeneXrefToLocus (
5579 SeqFeatPtr sfp,
5580 Pointer userdata
5581 )
5582
5583 {
5584 GeneRefPtr grp;
5585 SeqFeatXrefPtr xref;
5586
5587 if (sfp == NULL) return;
5588
5589 for (xref = sfp->xref; xref != NULL; xref = xref->next) {
5590 if (xref->data.choice != SEQFEAT_GENE) continue;
5591 grp = (GeneRefPtr) xref->data.value.ptrvalue;
5592 if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) continue;
5593 if (StringDoesHaveText (grp->locus_tag) && grp->locus == NULL) {
5594 grp->locus = grp->locus_tag;
5595 grp->locus_tag = NULL;
5596 }
5597 }
5598 }
5599
ForceGeneXrefToLocusTag(SeqFeatPtr sfp,Pointer userdata)5600 static void ForceGeneXrefToLocusTag (
5601 SeqFeatPtr sfp,
5602 Pointer userdata
5603 )
5604
5605 {
5606 GeneRefPtr grp;
5607 SeqFeatXrefPtr xref;
5608
5609 if (sfp == NULL) return;
5610
5611 for (xref = sfp->xref; xref != NULL; xref = xref->next) {
5612 if (xref->data.choice != SEQFEAT_GENE) continue;
5613 grp = (GeneRefPtr) xref->data.value.ptrvalue;
5614 if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) continue;
5615 if (StringDoesHaveText (grp->locus) && grp->locus_tag == NULL) {
5616 grp->locus_tag = grp->locus;
5617 grp->locus = NULL;
5618 }
5619 }
5620 }
5621
FixGeneXrefSkew(SeqEntryPtr sep)5622 static void FixGeneXrefSkew (
5623 SeqEntryPtr sep
5624 )
5625
5626 {
5627 GeneSkew gs;
5628
5629 if (sep == NULL) return;
5630
5631 MemSet ((Pointer) &gs, 0, sizeof (GeneSkew));
5632 VisitFeaturesInSep (sep, (Pointer) &gs, FindGeneXrefMismatch);
5633 if (gs.gene_locus == 0 && gs.gene_locus_tag > 0) {
5634 if (gs.xref_locus > 0 && gs.xref_locus_tag == 0) {
5635 VisitFeaturesInSep (sep, NULL, ForceGeneXrefToLocusTag);
5636 }
5637 } else if (gs.gene_locus > 0 && gs.gene_locus_tag == 0) {
5638 if (gs.xref_locus == 0 && gs.xref_locus_tag > 0) {
5639 VisitFeaturesInSep (sep, NULL, ForceGeneXrefToLocus);
5640 }
5641 }
5642 }
5643
FindPubWithFig(SeqDescPtr sdp,Pointer userdata)5644 static void FindPubWithFig(SeqDescPtr sdp, Pointer userdata)
5645 {
5646 PubdescPtr pdp;
5647 Int4 a = 0;
5648
5649 if (sdp->choice == Seq_descr_pub) {
5650 pdp = (PubdescPtr)sdp->data.ptrvalue;
5651 if (pdp->fig != NULL) {
5652 a = 1;
5653 }
5654 }
5655 }
5656
SeriousSeqEntryCleanupEx(SeqEntryPtr sep,SeqEntryFunc taxfun,SeqEntryFunc taxmerge,Boolean doPseudo,Boolean gpipeMode)5657 static void SeriousSeqEntryCleanupEx (SeqEntryPtr sep, SeqEntryFunc taxfun, SeqEntryFunc taxmerge, Boolean doPseudo, Boolean gpipeMode)
5658
5659 {
5660 BioSourcePtr biop;
5661 BioseqSetPtr bssp;
5662 Int2 code;
5663 CharPtr codes;
5664 Uint2 entityID;
5665 GeneticCodePtr gncp;
5666 Boolean hasMarkedGenes = FALSE;
5667 Boolean isEmblOrDdbj = FALSE;
5668 ErrSev lsev;
5669 ErrSev msev;
5670 Int4 muid = 0;
5671 Boolean objMgrFilter [OBJ_MAX];
5672 SeqEntryPtr oldscope;
5673 Boolean lclGnlOnly = TRUE;
5674 SeqEntryPtr tmp;
5675 ValNodePtr vnp;
5676
5677 if (sep == NULL) return;
5678 oldscope = SeqEntrySetScope (sep);
5679 msev = ErrSetMessageLevel (SEV_MAX);
5680 lsev = ErrSetLogLevel (SEV_MAX);
5681 entityID = SeqMgrGetEntityIDForSeqEntry (sep);
5682 /* clear indexes, since CleanupEmptyFeatCallback removes genes, etc. */
5683 SeqMgrClearFeatureIndexes (entityID, NULL);
5684 RemoveAllNcbiCleanupUserObjects (sep);
5685 RemoveDuplicateNestedSetsForEntityIDNoUpdate (entityID);
5686 SeqMgrClearFeatureIndexes (entityID, NULL);
5687 if (IS_Bioseq_set (sep)) {
5688 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5689 if (bssp != NULL && bssp->_class == BioseqseqSet_class_genbank) {
5690 tmp = bssp->seq_set;
5691 if (tmp != NULL && tmp->next == NULL && (IS_Bioseq (tmp))) {
5692 /* coerce genbank set on top of single sequence to nuc-prot set for unnecessary set removal */
5693 bssp->_class = BioseqseqSet_class_nuc_prot;
5694 RenormalizeNucProtSets (sep, TRUE);
5695 }
5696 }
5697 }
5698 MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
5699 objMgrFilter [OBJ_SEQFEAT] = TRUE;
5700 GatherObjectsInEntity (entityID, 0, NULL, MarkMovedGeneGbquals, (Pointer) &hasMarkedGenes, objMgrFilter);
5701 BasicSeqEntryCleanup (sep);
5702 SeqEntryExplore (sep, NULL, CleanupGenbankCallback);
5703 ConvertFullLenSourceFeatToDesc (sep);
5704 ConvertFullLenPubFeatToDesc (sep);
5705 SeqEntryExplore (sep, NULL, CleanupEmptyFeatCallback);
5706 SeqEntryExplore (sep, NULL, MergeAdjacentAnnotsCallback);
5707 /* reindex, since PseudoGeneOverlap gets best overlapping gene */
5708 SeqMgrIndexFeatures (entityID, NULL);
5709 EntryChangeImpFeat(sep); /* change any CDS ImpFeat to real CdRegion */
5710 /* MoveRnaGBQualProductToName (sep); */ /* move rna gbqual product to rna-ref.ext.name */
5711 /* MoveProtGBQualProductToName (sep); */ /* move prot gbqual product to prot-ref.name */
5712 /* MoveCdsGBQualProductToName (sep); */ /* move cds gbqual product to prot-ref.name */
5713 /* MoveFeatGBQualsToFields (sep); */ /* move feature partial, exception to fields */
5714 /* ExtendGeneFeatIfOnMRNA (0, sep); */ /* gene on mRNA is full length */
5715
5716 SeqEntryExplore (sep, (Pointer) &isEmblOrDdbj, CheckForEmblDdbjID);
5717 VisitBioseqsInSep (sep, (Pointer) &lclGnlOnly, CheckForLclGnlOnly);
5718 VisitBioseqsInSep (sep, NULL, ExtendSingleGeneOnMRNA);
5719
5720 RemoveBioSourceOnPopSet (sep, NULL);
5721 RemoveMolInfoOnPopSet (sep, NULL);
5722 /*
5723 SeqEntryExplore (sep, NULL, DeleteMultipleTitles);
5724 */
5725 SeqEntryExplore (sep, NULL, RemoveMultipleTitles);
5726 SeqEntryExplore (sep, NULL, MergeMultipleDates);
5727 VisitPubdescsInSep (sep, (Pointer) &muid, LookForUniqueMuid);
5728 if (lclGnlOnly) {
5729 VisitDescriptorsInSep (sep, (Pointer) &muid, RemoveBadPubDescrStrict);
5730 VisitFeaturesInSep (sep, (Pointer) &muid, RemoveBadPubFeatStrict);
5731 } else {
5732 VisitDescriptorsInSep (sep, (Pointer) &muid, RemoveBadPubDescr);
5733 VisitFeaturesInSep (sep, (Pointer) &muid, RemoveBadPubFeat);
5734 }
5735 SeqEntryExplore (sep, (Pointer) &muid, MergePubFigInChain);
5736 VisitFeaturesInSep (sep, NULL, CorrectSfpExceptText);
5737 if (! isEmblOrDdbj) {
5738 SeqEntryExplore (sep, NULL, MergeEquivCitSubs);
5739 }
5740 VisitDescriptorsInSep (sep, NULL, RemoveEmptyUserObjects);
5741 DeleteMarkedObjects(0, OBJ_SEQENTRY, (Pointer)sep);
5742 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5743 EntryMergeDupBioSources (sep); /* do before and after SE2A3 */
5744 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5745 LoopSeqEntryToAsn3(sep, TRUE, FALSE, taxfun, taxmerge, gpipeMode, isEmblOrDdbj);
5746 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5747 /* EntryStripSerialNumber(sep); */ /* strip citation serial numbers */
5748 MovePopPhyMutPubs (sep);
5749 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5750 EntryChangeGBSource(sep); /* at least remove redundant information in GBBlocks */
5751 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5752 EntryCheckGBBlock(sep);
5753 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5754 SeqEntryMoveDbxrefs(sep); /* db_xref gbqual to sfp->dbxref */
5755 VisitDescriptorsInSep(sep, NULL, FindPubWithFig);
5756 EntryMergeDupBioSources(sep);
5757 SeqEntryExplore (sep, NULL, GetRidOfEmptyFeatsDescCallback);
5758 CleanUpPseudoProductsEx (entityID, sep, doPseudo);
5759 RenormalizeNucProtSets (sep, TRUE);
5760 /*
5761 StripTitleFromProtsInNucProts (sep);
5762 */
5763 MarkBadProtTitlesInNucProts (sep);
5764 MoveFeatsFromPartsSet (sep);
5765 move_cds_ex (sep, doPseudo);
5766 SeqEntryExplore (sep, NULL, MolInfoUpdate);
5767 DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
5768 /* do these again, since SE2A3 can create full length source feature */
5769 SeqEntryExplore (sep, NULL, CleanupGenbankCallback);
5770 ConvertFullLenSourceFeatToDesc (sep);
5771 ConvertFullLenPubFeatToDesc (sep);
5772 SeqEntryExplore (sep, NULL, CleanupEmptyFeatCallback);
5773 SeqEntryExplore (sep, NULL, MergeAdjacentAnnotsCallback);
5774 /* VisitBioseqsInSep (sep, NULL, BarCodeTechToKeyword); */
5775
5776 /* tbl2asn now calls processes EC numbers with reporting before SSEC */
5777 UpdateReplacedECNumbersEx (sep, NULL, NULL, TRUE, FALSE);
5778
5779 /*
5780 if (GetAppProperty ("NcbiTbl2Asn") != NULL) {
5781 DeleteBadECNumbers (sep);
5782 }
5783 */
5784
5785 /* reindex, since CdEndCheck (from CdCheck) gets best overlapping gene */
5786 SeqMgrIndexFeatures (entityID, NULL);
5787 biop = NULL;
5788 if (VisitBioSourcesInSep (sep, (Pointer) &biop, FindSingleBioSource) == 1) {
5789 code = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
5790 gncp = GeneticCodeFind (code, NULL);
5791 if (gncp == NULL) {
5792 gncp = GeneticCodeFind (1, NULL);
5793 }
5794 if (gncp != NULL) {
5795 codes = NULL;
5796 for (vnp = (ValNodePtr) gncp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
5797 if (vnp->choice == 3) {
5798 codes = (CharPtr) vnp->data.ptrvalue;
5799 }
5800 }
5801 if (codes != NULL) {
5802 VisitFeaturesInSep (sep, (Pointer) codes, CorrectTrnaCodons);
5803 }
5804 }
5805 }
5806 VisitBioSourcesInSep (sep, NULL, CleanupOldName);
5807 VisitBioSourcesInSep (sep, NULL, CleanupOrgModNote);
5808 CdCheck (sep, NULL);
5809 /* do again to catch occasional duplicate pub on some set components */
5810 SeqEntryPubsAsn4 (sep, isEmblOrDdbj);
5811 SeqMgrIndexFeatures (entityID, NULL);
5812 if (hasMarkedGenes) {
5813 //LCOV_EXCL_START
5814 //This is never called; basic cleanup takes care of converting
5815 // gene quals to gene xrefs
5816 MemSet ((Pointer) objMgrFilter, FALSE, sizeof (objMgrFilter));
5817 objMgrFilter [OBJ_SEQFEAT] = TRUE;
5818 GatherObjectsInEntity (entityID, 0, NULL, DeleteBadMarkedGeneXrefs, NULL, objMgrFilter);
5819 //LCOV_EXCL_STOP
5820 }
5821 if (! gpipeMode) {
5822 if (! isEmblOrDdbj) { /* for now leave gene xrefs on EMBL and DDBJ */
5823 VisitBioseqsInSep (sep, (Pointer) &isEmblOrDdbj, RemoveUnneededGeneXrefs);
5824 }
5825 }
5826 ResynchCodingRegionPartials (sep);
5827 ResynchMessengerRNAPartials (sep);
5828 ResynchProteinPartials (sep);
5829 InstantiateProteinTitles (entityID, NULL);
5830 if (isEmblOrDdbj) {
5831 RemoveStructuredCommentKeywords (entityID);
5832 } else {
5833 RemoveAllStructuredCommentKeywords (entityID);
5834 }
5835 AddStructuredCommentKeywords (entityID);
5836 SeqMgrClearFeatureIndexes (entityID, NULL);
5837
5838 SeqEntryExplore (sep, NULL, RemoveIdenticalPubs);
5839 DeleteMarkedObjects (0, OBJ_SEQENTRY, (Pointer) sep);
5840
5841 FixGeneXrefSkew (sep);
5842
5843 MakeNcbiCleanupObject (sep, gpipeMode);
5844 /*
5845 SeqMgrIndexFeatures (entityID, NULL);
5846 VisitFeaturesInSep (sep, NULL, SynchronizePseudogenesProc);
5847 */
5848 BasicSeqEntryCleanup (sep);
5849 if (! gpipeMode) {
5850 NormalizeDescriptorOrder (sep);
5851 }
5852 TransTableFreeAll ();
5853 ErrSetMessageLevel (msev);
5854 ErrSetLogLevel (lsev);
5855 SeqEntrySetScope (oldscope);
5856 }
5857
SeriousSeqEntryCleanup(SeqEntryPtr sep,SeqEntryFunc taxfun,SeqEntryFunc taxmerge)5858 extern void SeriousSeqEntryCleanup (SeqEntryPtr sep, SeqEntryFunc taxfun, SeqEntryFunc taxmerge)
5859
5860 {
5861 SeriousSeqEntryCleanupEx (sep, taxfun, taxmerge, TRUE, FALSE);
5862 }
5863
5864 //LCOV_EXCL_START
SeriousSeqEntryCleanupBulk(SeqEntryPtr sep)5865 extern void SeriousSeqEntryCleanupBulk (SeqEntryPtr sep)
5866
5867 {
5868 SeriousSeqEntryCleanupEx (sep, NULL, NULL, FALSE, FALSE);
5869 }
5870
5871 // GPIPE doesn't use cleanasn
GpipeSeqEntryCleanup(SeqEntryPtr sep)5872 extern void GpipeSeqEntryCleanup (SeqEntryPtr sep)
5873
5874 {
5875 SeriousSeqEntryCleanupEx (sep, NULL, NULL, TRUE, TRUE);
5876 }
5877
5878 // used by ExtendedSeqEntryCleanup (not used)
5879 typedef struct dummysmfedata {
5880 Int4 max;
5881 Int4 num_at_max;
5882 Int4 num_trans_spliced;
5883 Boolean equivalent_genes;
5884 GeneRefPtr grp_at_max;
5885 } DummySmfeData, PNTR DummySmfePtr;
5886
DummySMFEProc(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5887 static Boolean LIBCALLBACK DummySMFEProc (
5888 SeqFeatPtr sfp,
5889 SeqMgrFeatContextPtr context
5890 )
5891
5892
5893 {
5894 DummySmfePtr dsp;
5895 GeneRefPtr grp, grpx;
5896 Int4 len;
5897 Boolean redundantgenexref = FALSE;
5898 CharPtr syn1, syn2;
5899
5900 if (sfp == NULL || context == NULL) return TRUE;
5901 dsp = context->userdata;
5902 if (dsp == NULL) return TRUE;
5903 if (sfp->data.choice != SEQFEAT_GENE) return TRUE;
5904 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5905 if (grp == NULL) return TRUE;
5906
5907 len = SeqLocLen (sfp->location);
5908 if (len < dsp->max) {
5909 dsp->max = len;
5910 dsp->num_at_max = 1;
5911 dsp->num_trans_spliced = 0;
5912 if (StringISearch (sfp->except_text, "trans-splicing") != NULL) {
5913 (dsp->num_trans_spliced)++;
5914 }
5915 dsp->equivalent_genes = FALSE;
5916 dsp->grp_at_max = grp;
5917 } else if (len == dsp->max) {
5918 (dsp->num_at_max)++;
5919 if (StringISearch (sfp->except_text, "trans-splicing") != NULL) {
5920 (dsp->num_trans_spliced)++;
5921 }
5922 grpx = dsp->grp_at_max;
5923 if (grpx != NULL) {
5924 redundantgenexref = FALSE;
5925 if (StringDoesHaveText (grp->locus_tag) && StringDoesHaveText (grpx->locus_tag)) {
5926 if (StringICmp (grp->locus_tag, grpx->locus_tag) == 0) {
5927 redundantgenexref = TRUE;
5928 }
5929 } else if (StringDoesHaveText (grp->locus) && StringDoesHaveText (grpx->locus)) {
5930 if (StringICmp (grp->locus, grpx->locus) == 0) {
5931 redundantgenexref = TRUE;
5932 }
5933 } else if (grp->syn != NULL && grpx->syn != NULL) {
5934 syn1 = (CharPtr) grp->syn->data.ptrvalue;
5935 syn2 = (CharPtr) grpx->syn->data.ptrvalue;
5936 if (StringDoesHaveText (syn1) && StringDoesHaveText (syn2)) {
5937 if (StringICmp (syn1, syn2) == 0) {
5938 redundantgenexref = TRUE;
5939 }
5940 }
5941 }
5942 }
5943 if (redundantgenexref) {
5944 dsp->equivalent_genes = TRUE;
5945 }
5946 }
5947
5948 return TRUE;
5949 }
5950
5951 // used by ExtendedSeqEntryCleanup (not used)
AddMissingGeneXref(SeqFeatPtr sfp,SeqMgrFeatContextPtr context)5952 static Boolean LIBCALLBACK AddMissingGeneXref (SeqFeatPtr sfp, SeqMgrFeatContextPtr context)
5953
5954 {
5955 Int2 count;
5956 DummySmfeData dsd;
5957 SeqMgrFeatContext gcontext;
5958 SeqFeatPtr gene;
5959 GeneRefPtr grp;
5960 CharPtr locus, locus_tag;
5961 SeqEntryPtr sep, oldscope;
5962 Boolean smallGenomeSet;
5963 BoolPtr smallGenomeSetP;
5964 SeqFeatXrefPtr xref;
5965
5966 if (sfp == NULL || context == NULL) return TRUE;
5967 smallGenomeSetP = (BoolPtr) context->userdata;
5968 if (smallGenomeSetP == NULL) return FALSE;
5969 smallGenomeSet = *smallGenomeSetP;
5970
5971 if (sfp->idx.subtype == FEATDEF_GENE ||
5972 sfp->idx.subtype == FEATDEF_operon ||
5973 sfp->idx.subtype == FEATDEF_gap ||
5974 sfp->idx.subtype == FEATDEF_repeat_region ||
5975 sfp->idx.subtype == FEATDEF_mobile_element ||
5976 sfp->idx.subtype == FEATDEF_centromere ||
5977 sfp->idx.subtype == FEATDEF_primer_bind ||
5978 sfp->idx.subtype == FEATDEF_telomere) return TRUE;
5979
5980 grp = SeqMgrGetGeneXref (sfp);
5981 if (grp != NULL) return TRUE;
5982
5983 MemSet ((Pointer) &dsd, 0, sizeof (DummySmfeData));
5984 dsd.max = INT4_MAX;
5985 dsd.num_at_max = 0;
5986 dsd.num_trans_spliced = 0;
5987 dsd.equivalent_genes = FALSE;
5988 dsd.grp_at_max = NULL;
5989 count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE, NULL, 0,
5990 LOCATION_SUBSET, (Pointer) &dsd, DummySMFEProc);
5991
5992 if (dsd.num_at_max <= 1) return TRUE;
5993 if (smallGenomeSet && dsd.num_at_max == dsd.num_trans_spliced) return TRUE;
5994
5995 sep = GetTopSeqEntryForEntityID (sfp->idx.entityID);
5996 oldscope = SeqEntrySetScope (sep);
5997 gene = SeqMgrGetOverlappingFeatureEx (sfp->location, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, &gcontext, TRUE);
5998 SeqEntrySetScope (oldscope);
5999 if (gene == NULL) return TRUE;
6000
6001 grp = (GeneRefPtr) gene->data.value.ptrvalue;
6002 if (grp == NULL) return TRUE;
6003 locus = grp->locus;
6004 locus_tag = grp->locus_tag;
6005 if (StringHasNoText (locus) && StringHasNoText (locus_tag)) return TRUE;
6006
6007 grp = GeneRefNew ();
6008 if (grp == NULL) return TRUE;
6009 grp->locus = StringSaveNoNull (locus);
6010 grp->locus_tag = StringSaveNoNull (locus_tag);
6011 xref = SeqFeatXrefNew ();
6012 if (xref == NULL) return TRUE;
6013 xref->data.choice = SEQFEAT_GENE;
6014 xref->data.value.ptrvalue = (Pointer) grp;
6015 xref->next = sfp->xref;
6016 sfp->xref = xref;
6017
6018 return TRUE;
6019 }
6020
6021 // used by ExtendedSeqEntryCleanup (not used)
BspMissingGeneRef(BioseqPtr bsp,Pointer userdata)6022 static void BspMissingGeneRef (BioseqPtr bsp, Pointer userdata)
6023
6024 {
6025 if (bsp == NULL || ISA_aa (bsp->mol)) return;
6026
6027 SeqMgrExploreFeatures (bsp, userdata, AddMissingGeneXref, NULL, NULL, NULL);
6028 }
6029
6030 // used by ExtendedSeqEntryCleanup (not used)
IsSmallGenomeSet(BioseqSetPtr bssp,Pointer userdata)6031 static void IsSmallGenomeSet (
6032 BioseqSetPtr bssp,
6033 Pointer userdata
6034 )
6035
6036 {
6037 BoolPtr smallGenomeSetP;
6038
6039 if (bssp == NULL || bssp->_class != BioseqseqSet_class_small_genome_set) return;
6040 smallGenomeSetP = (BoolPtr) userdata;
6041 if (smallGenomeSetP == NULL) return;
6042 *smallGenomeSetP = TRUE;
6043 }
6044
6045 // used by ExtendedSeqEntryCleanup (not used)
FixSeqStrandOther(SeqFeatPtr sfp,Pointer userdata)6046 static void FixSeqStrandOther (
6047 SeqFeatPtr sfp,
6048 Pointer userdata
6049 )
6050
6051 {
6052 SeqIntPtr sintp;
6053 SeqLocPtr slp;
6054 SeqPntPtr spp;
6055
6056 if (sfp == NULL || sfp->location == NULL) return;
6057
6058 /*
6059 if (sfp->data.choice != SEQFEAT_GENE &&
6060 sfp->data.choice != SEQFEAT_CDREGION &&
6061 sfp->data.choice != SEQFEAT_PROT &&
6062 sfp->data.choice != SEQFEAT_RNA) return;
6063 */
6064
6065 slp = SeqLocFindNext (sfp->location, NULL);
6066 while (slp != NULL) {
6067 switch (slp->choice) {
6068 case SEQLOC_INT:
6069 sintp = (SeqIntPtr) slp->data.ptrvalue;
6070 if (sintp != NULL && sintp->strand == Seq_strand_other) {
6071 sintp->strand = Seq_strand_plus;
6072 }
6073 break;
6074 case SEQLOC_PNT:
6075 spp = (SeqPntPtr) slp->data.ptrvalue;
6076 if (spp != NULL && spp->strand == Seq_strand_other) {
6077 spp->strand = Seq_strand_plus;
6078 }
6079 break;
6080 default:
6081 break;
6082 }
6083 slp = SeqLocFindNext (sfp->location, slp);
6084 }
6085 }
6086
6087 // option not used
ExtendedSeqEntryCleanup(SeqEntryPtr sep)6088 extern void ExtendedSeqEntryCleanup (SeqEntryPtr sep)
6089
6090 {
6091 Uint2 entityID;
6092 Boolean smallGenomeSet = FALSE;
6093
6094 if (sep == NULL) return;
6095 SeriousSeqEntryCleanupEx (sep, NULL, NULL, TRUE, FALSE);
6096
6097 VisitSetsInSep (sep, (Pointer) &smallGenomeSet, IsSmallGenomeSet);
6098
6099 entityID = ObjMgrGetEntityIDForChoice (sep);
6100
6101 VisitFeaturesInSep (sep, NULL, FixSeqStrandOther);
6102
6103 SeqMgrIndexFeatures (entityID, NULL);
6104 VisitBioseqsInSep (sep, (Pointer) &smallGenomeSet, BspMissingGeneRef);
6105
6106 SeqMgrClearFeatureIndexes (entityID, NULL);
6107 }
6108
SeriousSeqAnnotCleanup(SeqAnnotPtr sap)6109 extern void SeriousSeqAnnotCleanup (SeqAnnotPtr sap)
6110
6111 {
6112 AnnotDescrPtr adp;
6113 DatePtr dp;
6114 AnnotDescrPtr last;
6115 UserObjectPtr uop;
6116
6117 if (sap == NULL) return;
6118
6119 RemoveAllSeqAnnotCleanupUserObjs (sap);
6120
6121 BasicSeqAnnotCleanup (sap);
6122
6123 dp = DateCurr ();
6124 if (dp == NULL) return;
6125
6126 uop = CreateNcbiCleanupUserObject ();
6127 if (uop == NULL) return;
6128
6129 AddStringToNcbiCleanupUserObject (uop, "method", "SeriousSeqAnnotCleanup");
6130 AddIntegerToNcbiCleanupUserObject (uop, "version", NCBI_CLEANUP_VERSION);
6131
6132 AddIntegerToNcbiCleanupUserObject (uop, "month", dp->data [2]);
6133 AddIntegerToNcbiCleanupUserObject (uop, "day", dp->data [3]);
6134 AddIntegerToNcbiCleanupUserObject (uop, "year", dp->data [1] + 1900);
6135
6136 DateFree (dp);
6137
6138 adp = AnnotDescrNew (NULL);
6139 if (adp == NULL) return;
6140
6141 adp->choice = Annot_descr_user;
6142 adp->data.ptrvalue = uop;
6143
6144 if (sap->desc == NULL) {
6145 sap->desc = adp;
6146 } else {
6147 last = sap->desc;
6148 while (last->next != NULL) {
6149 last = last->next;
6150 }
6151 last->next = adp;
6152 }
6153 }
6154
6155
6156 /* ConvertSegSetToDeltaSeq section */
6157 // Only for SegSets
FindSegSet(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6158 static void FindSegSet (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6159
6160 {
6161 BioseqSetPtr bssp;
6162 SeqEntryPtr PNTR sepp;
6163
6164 if (sep == NULL) return;
6165 if (! IS_Bioseq_set (sep)) return;
6166 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6167 if (bssp == NULL) return;
6168 if (bssp->_class != BioseqseqSet_class_segset) return;
6169 sepp = (SeqEntryPtr PNTR) mydata;
6170 if (sepp == NULL) return;
6171 *sepp = sep;
6172 }
6173
MarkSegSets(BioseqSetPtr bssp,Pointer userdata)6174 static void MarkSegSets (BioseqSetPtr bssp, Pointer userdata)
6175
6176 {
6177 if (bssp == NULL) return;
6178 if (bssp->_class != BioseqseqSet_class_segset) return;
6179 bssp->idx.deleteme = TRUE;
6180 }
6181
6182 // Only for SegSets
LookForMixedMols(BioseqPtr bsp,Pointer userdata)6183 static void LookForMixedMols (BioseqPtr bsp, Pointer userdata)
6184
6185 {
6186 Uint1Ptr molp;
6187
6188 if (bsp == NULL) return;
6189 molp = (Uint1Ptr) userdata;
6190 if (molp == NULL) return;
6191
6192 /* Boolean OR bsp->mols within segset to look for mixtures */
6193 *molp |= bsp->mol;
6194 }
6195
6196 // Only for SegSets
CheckForMissingMolInfo(BioseqPtr bsp,Pointer userdata)6197 static void CheckForMissingMolInfo(BioseqPtr bsp, Pointer userdata)
6198
6199 {
6200 BoolPtr bp;
6201 SeqMgrDescContext context;
6202 SeqDescrPtr sdp;
6203
6204 if (bsp == NULL) return;
6205 bp = (BoolPtr) userdata;
6206 if (bp == NULL) return;
6207
6208 if (bsp->repr == Seq_repr_virtual) return;
6209 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
6210 if (sdp == NULL) {
6211 *bp = TRUE;
6212 }
6213 }
6214
6215 typedef struct gbdata {
6216 GBBlockPtr gbp;
6217 CharPtr source;
6218 CharPtr origin;
6219 Boolean mixedsources;
6220 Boolean mixedorigins;
6221 } GBData, PNTR GBDataPtr;
6222
6223 // Only for SegSets
FindGenBankDiffs(SeqDescrPtr sdp,Pointer userdata)6224 static void FindGenBankDiffs(
6225 SeqDescrPtr sdp,
6226 Pointer userdata
6227 )
6228
6229 {
6230 GBDataPtr gbdp;
6231 GBBlockPtr gbp;
6232
6233 if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
6234 gbdp = (GBDataPtr) userdata;
6235 if (gbdp == NULL) return;
6236
6237 gbp = (GBBlockPtr) sdp->data.ptrvalue;
6238 if (gbp == NULL) return;
6239
6240 if (gbdp->gbp == NULL) {
6241 gbdp->gbp = gbp;
6242 gbdp->source = gbp->source;
6243 gbdp->origin = gbp->origin;
6244 return;
6245 }
6246
6247 if (StringICmp (gbp->source, gbdp->source) != 0) {
6248 gbdp->mixedsources = TRUE;
6249 }
6250 if (StringICmp (gbp->origin, gbdp->origin) != 0) {
6251 gbdp->mixedorigins = TRUE;
6252 }
6253 }
6254
6255 // Only for SegSets
CopyFirstGBBlock(SeqDescrPtr sdp,Pointer userdata)6256 static void CopyFirstGBBlock(
6257 SeqDescrPtr sdp,
6258 Pointer userdata
6259 )
6260
6261 {
6262 GBBlockPtr gbp;
6263 GBBlockPtr PNTR gbpp;
6264
6265
6266 if (sdp == NULL || sdp->choice != Seq_descr_genbank) return;
6267 gbp = (GBBlockPtr) sdp->data.ptrvalue;
6268 if (gbp == NULL) return;
6269
6270 gbpp = (GBBlockPtr PNTR) userdata;
6271 if (gbpp == NULL) return;
6272
6273 if (*gbpp != NULL) return;
6274 *gbpp = (GBBlockPtr) AsnIoMemCopy (gbp, (AsnReadFunc) GBBlockAsnRead, (AsnWriteFunc) GBBlockAsnWrite);
6275 }
6276
6277 // Only for SegSets
CopyFirstMolInfo(SeqDescrPtr sdp,Pointer userdata)6278 static void CopyFirstMolInfo (
6279 SeqDescrPtr sdp,
6280 Pointer userdata
6281 )
6282
6283 {
6284 MolInfoPtr mip;
6285 MolInfoPtr PNTR mipp;
6286
6287
6288 if (sdp == NULL || sdp->choice != Seq_descr_molinfo) return;
6289 mip = (MolInfoPtr) sdp->data.ptrvalue;
6290 if (mip == NULL) return;
6291
6292 mipp = (MolInfoPtr PNTR) userdata;
6293 if (mipp == NULL) return;
6294
6295 if (*mipp != NULL) return;
6296 *mipp = (MolInfoPtr) AsnIoMemCopy (mip, (AsnReadFunc) MolInfoAsnRead, (AsnWriteFunc) MolInfoAsnWrite);
6297 }
6298
6299 // Only for SegSets
CopyFirstTitle(SeqDescrPtr sdp,Pointer userdata)6300 static void CopyFirstTitle (
6301 SeqDescrPtr sdp,
6302 Pointer userdata
6303 )
6304
6305 {
6306 CharPtr title;
6307 CharPtr PNTR titlep;
6308
6309
6310 if (sdp == NULL || sdp->choice != Seq_descr_title) return;
6311 title = (CharPtr) sdp->data.ptrvalue;
6312 if (title == NULL) return;
6313
6314 titlep = (CharPtr PNTR) userdata;
6315 if (titlep == NULL) return;
6316
6317 if (*titlep != NULL) return;
6318 *titlep = (CharPtr) StringSave (title);
6319 }
6320
6321 // Only for SegSets
AddPartAccns(BioseqPtr bsp,Pointer userdata)6322 static void AddPartAccns (
6323 BioseqPtr bsp,
6324 Pointer userdata
6325 )
6326
6327 {
6328 Char buf [64];
6329 GBBlockPtr gbp;
6330 SeqIdPtr sip;
6331
6332 if (bsp == NULL) return;
6333 gbp = (GBBlockPtr) userdata;
6334 if (gbp == NULL) return;
6335
6336 if (bsp->repr == Seq_repr_virtual) return;
6337
6338 sip = SeqIdFindBestAccession (bsp->id);
6339 if (sip == NULL) return;
6340
6341 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, sizeof (buf));
6342 if (StringHasNoText (buf)) return;
6343
6344 ValNodeCopyStr (&(gbp->extra_accessions), 0, buf);
6345 }
6346
6347 // Only for SegSets
AddPartHist(BioseqPtr bsp,Pointer userdata)6348 static void AddPartHist (
6349 BioseqPtr bsp,
6350 Pointer userdata
6351 )
6352
6353 {
6354 Char buf [64];
6355 BioseqPtr deltabsp;
6356 SeqHistPtr shp;
6357 SeqIdPtr sip;
6358
6359 if (bsp == NULL) return;
6360 deltabsp = (BioseqPtr) userdata;
6361 if (deltabsp == NULL) return;
6362
6363 if (bsp->repr == Seq_repr_virtual) return;
6364
6365 sip = SeqIdFindBestAccession (bsp->id);
6366 if (sip == NULL) return;
6367
6368 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, sizeof (buf));
6369 if (StringHasNoText (buf)) return;
6370
6371 shp = ParseStringIntoSeqHist (deltabsp->hist, buf);
6372 if (deltabsp->hist == NULL) {
6373 deltabsp->hist = shp;
6374 }
6375 }
6376
6377 // Only for SegSets
AddSegToDeltaSeq(SeqLocPtr slp,SeqMgrSegmentContextPtr context)6378 static Boolean LIBCALLBACK AddSegToDeltaSeq (
6379 SeqLocPtr slp,
6380 SeqMgrSegmentContextPtr context
6381 )
6382
6383 {
6384 CharPtr bases;
6385 BioseqPtr bsp;
6386 BioseqPtr deltabsp;
6387 IntFuzzPtr ifp;
6388 SeqLocPtr loc;
6389 SeqIdPtr sip;
6390 SeqLitPtr slitp;
6391
6392 if (slp == NULL || context == NULL) return FALSE;
6393 deltabsp = (BioseqPtr) context->userdata;
6394 if (deltabsp == NULL) return FALSE;
6395
6396 sip = SeqLocId (slp);
6397 if (sip == NULL) {
6398 loc = SeqLocFindNext (slp, NULL);
6399 if (loc == NULL) return TRUE;
6400 sip = SeqLocId (loc);
6401 }
6402 if (sip == NULL) return TRUE;
6403
6404 bsp = BioseqFind (sip);
6405 if (bsp == NULL) return TRUE;
6406
6407 if (bsp->repr == Seq_repr_virtual) {
6408 if (deltabsp->seq_ext != NULL) {
6409 /* insert gap of unknown length (by convention, 100 bases) between the previous segment and this one. */
6410 slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
6411 if (slitp != NULL) {
6412 slitp->length = bsp->length;
6413 if (slitp->length == 100) {
6414 ifp = IntFuzzNew ();
6415 if (ifp != NULL) {
6416 ifp->choice = 4;
6417 slitp->fuzz = ifp;
6418 }
6419 }
6420 ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slitp);
6421 deltabsp->length += slitp->length;
6422 }
6423 }
6424 return TRUE;
6425 }
6426
6427 bases = GetSequenceByBsp (bsp);
6428 if (bases == NULL) return TRUE;
6429
6430 slitp = (SeqLitPtr) MemNew (sizeof (SeqLit));
6431 if (slitp != NULL) {
6432 slitp->length = StringLen (bases);
6433 ValNodeAddPointer ((ValNodePtr PNTR) &(deltabsp->seq_ext), (Int2) 2, (Pointer) slitp);
6434 slitp->seq_data = (SeqDataPtr) BSNew (slitp->length);
6435 slitp->seq_data_type = Seq_code_iupacna;
6436 AddBasesToByteStore ((ByteStorePtr) slitp->seq_data, bases);
6437 deltabsp->length += slitp->length;
6438 }
6439
6440 return TRUE;
6441 }
6442
6443 // Only for SegSets
MoveAnnotsToDeltaSeq(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)6444 static void MoveAnnotsToDeltaSeq (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
6445
6446 {
6447 BioseqPtr bsp = NULL;
6448 BioseqSetPtr bssp = NULL;
6449 BioseqPtr deltabsp;
6450 SeqAnnotPtr nextsap;
6451 SeqFeatPtr nextsfp;
6452 Pointer PNTR prevsap;
6453 Pointer PNTR prevsfp;
6454 SeqAnnotPtr sap;
6455 SeqFeatPtr sfp;
6456
6457 if (sep == NULL) return;
6458 deltabsp = (BioseqPtr) mydata;
6459 if (deltabsp == NULL) return;
6460
6461 if (IS_Bioseq (sep)) {
6462 bsp = (BioseqPtr) sep->data.ptrvalue;
6463 if (bsp == NULL) return;
6464 sap = bsp->annot;
6465 prevsap = (Pointer PNTR) &(bsp->annot);
6466 } else if (IS_Bioseq_set (sep)) {
6467 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6468 if (bssp == NULL) return;
6469 sap = bssp->annot;
6470 prevsap = (Pointer PNTR) &(bssp->annot);
6471 } else return;
6472
6473 while (sap != NULL) {
6474 nextsap = sap->next;
6475 if (sap->type == 1) {
6476 sfp = (SeqFeatPtr) sap->data;
6477 prevsfp = (Pointer PNTR) &(sap->data);
6478 while (sfp != NULL) {
6479 nextsfp = sfp->next;
6480 *(prevsfp) = sfp->next;
6481 sfp->next = NULL;
6482 AddFeatToBioseq (sfp, deltabsp);
6483 sfp = nextsfp;
6484 }
6485 }
6486 /* now keep empty annot if annot_descr present */
6487 if (sap->data == NULL && /* sap->desc == NULL */ SSECNoGenomeAnnotInAnnotDescr (sap)) {
6488 *(prevsap) = sap->next;
6489 sap->next = NULL;
6490 SeqAnnotFree (sap);
6491 } else {
6492 prevsap = (Pointer PNTR) &(sap->next);
6493 }
6494 sap = nextsap;
6495 }
6496 }
6497
6498 // Only for SegSets
RptUnitInBaseRange(CharPtr str,Int4Ptr fromP,Int4Ptr toP)6499 static Boolean RptUnitInBaseRange (CharPtr str, Int4Ptr fromP, Int4Ptr toP)
6500
6501 {
6502 CharPtr ptr;
6503 Char tmp [32];
6504 long int val;
6505
6506 if (StringLen (str) > 25) return FALSE;
6507 StringNCpy_0 (tmp, str, sizeof (tmp));
6508 ptr = StringStr (tmp, "..");
6509 if (ptr == NULL) return FALSE;
6510 *ptr = '\0';
6511 if (StringHasNoText (tmp)) return FALSE;
6512 if (sscanf (tmp, "%ld", &val) != 1 || val < 1) return FALSE;
6513 if (fromP != NULL) {
6514 *fromP = val - 1;
6515 }
6516 ptr += 2;
6517 if (StringHasNoText (ptr)) return FALSE;
6518 if (sscanf (ptr, "%ld", &val) != 1 || val < 1) return FALSE;
6519 if (toP != NULL) {
6520 *toP = val - 1;
6521 }
6522 return TRUE;
6523 }
6524
6525 // Only for SegSets
FindFirstLocalBioseq(SeqLocPtr loc)6526 static BioseqPtr FindFirstLocalBioseq (SeqLocPtr loc)
6527
6528 {
6529 BioseqPtr bsp;
6530 SeqIdPtr sip;
6531 SeqLocPtr slp = NULL;
6532
6533 if (loc == NULL) return NULL;
6534
6535 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
6536 sip = SeqLocId (slp);
6537 if (sip != NULL) {
6538 bsp = BioseqFindCore (sip);
6539 if (bsp != NULL) return bsp;
6540 }
6541 }
6542
6543 return NULL;
6544 }
6545
6546 // Only for SegSets
MapSegFeatToMaster(SeqFeatPtr sfp,Pointer userdata)6547 static void MapSegFeatToMaster (
6548 SeqFeatPtr sfp,
6549 Pointer userdata
6550 )
6551
6552 {
6553 BioseqPtr bsp, ptbsp;
6554 Char buf [64];
6555 CodeBreakPtr cbp;
6556 CdRegionPtr crp;
6557 Int4 from, to;
6558 GBQualPtr gbq;
6559 Boolean hasNulls;
6560 Int4 lim;
6561 Boolean noLeft;
6562 Boolean noRight;
6563 ValNodePtr partiallist = NULL, emptypartials = NULL;
6564 RnaRefPtr rrp;
6565 SeqInt sint;
6566 SeqIntPtr sintp;
6567 SeqIdPtr sip;
6568 SeqLocPtr slp = NULL;
6569 tRNAPtr trna;
6570 ValNode vn;
6571
6572 if (sfp == NULL || sfp->location == NULL) return;
6573
6574 bsp = BioseqFindFromSeqLoc (sfp->location);
6575 if (bsp == NULL) return;
6576 if (ISA_aa (bsp->mol)) return;
6577 if (bsp->repr != Seq_repr_seg) return;
6578
6579 partiallist = GetSeqLocPartialSet (sfp->location);
6580 CheckSeqLocForPartialEx (sfp->location, &noLeft, &noRight, &lim);
6581 hasNulls = LocationHasNullsBetween (sfp->location);
6582
6583 if (sfp->data.choice == SEQFEAT_GENE) {
6584 slp = SeqLocMergeExEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6585 hasNulls = FALSE;
6586 sfp->partial = FALSE;
6587 } else if (sfp->data.choice == SEQFEAT_CDREGION) {
6588 slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6589 } else if (sfp->data.choice == SEQFEAT_RNA) {
6590 slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6591 } else {
6592 slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, FALSE);
6593 }
6594 if (slp == NULL) {
6595 ValNodeFree (partiallist);
6596 return;
6597 }
6598
6599 ptbsp = FindFirstLocalBioseq (sfp->location);
6600
6601 sfp->location = SeqLocFree (sfp->location);
6602 sfp->location = slp;
6603 emptypartials = GetSeqLocPartialSet (sfp->location);
6604 FreeAllFuzz (sfp->location);
6605 SetSeqLocPartialEx (sfp->location, noLeft, noRight, lim);
6606 if (ValNodeLen (partiallist) == ValNodeLen (emptypartials)) {
6607 SetSeqLocPartialSet (sfp->location, partiallist);
6608 }
6609 ValNodeFree (partiallist);
6610 ValNodeFree (emptypartials);
6611 sfp->partial = (sfp->partial || noLeft || noRight || hasNulls);
6612
6613 switch (sfp->data.choice) {
6614 case SEQFEAT_CDREGION :
6615 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6616 if (crp != NULL && crp->code_break != NULL) {
6617 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
6618 slp = SeqLocMerge (bsp, cbp->loc, NULL, FALSE, TRUE, FALSE);
6619 if (slp != NULL) {
6620 cbp->loc = SeqLocFree (cbp->loc);
6621 cbp->loc = slp;
6622 FreeAllFuzz (cbp->loc);
6623 }
6624 }
6625 }
6626 break;
6627 case SEQFEAT_RNA :
6628 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
6629 if (rrp != NULL && rrp->type == 3 && rrp->ext.choice == 2) {
6630 trna = rrp->ext.value.ptrvalue;
6631 if (trna != NULL && trna->anticodon != NULL) {
6632 slp = SeqLocMerge (bsp, trna->anticodon, NULL, FALSE, TRUE, FALSE);
6633 if (slp != NULL) {
6634 trna->anticodon = SeqLocFree (trna->anticodon);
6635 trna->anticodon = slp;
6636 FreeAllFuzz (trna->anticodon);
6637 }
6638 }
6639 }
6640 break;
6641 default :
6642 break;
6643 }
6644
6645 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6646 if (StringICmp (gbq->qual, "rpt_unit_range") != 0) continue;
6647 if (! RptUnitInBaseRange (gbq->val, &from, &to)) continue;
6648 if (ptbsp == NULL || ptbsp->repr != Seq_repr_raw) continue;
6649 sip = SeqIdFindBest (ptbsp->id, 0);
6650 if (sip == NULL) continue;
6651 MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
6652 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
6653 if (from < to) {
6654 sint.from = from;
6655 sint.to = to;
6656 sint.strand = Seq_strand_plus;
6657 } else {
6658 sint.to = from;
6659 sint.from = to;
6660 sint.strand = Seq_strand_minus;
6661 }
6662 sint.id = sip;
6663 vn.choice = SEQLOC_INT;
6664 vn.data.ptrvalue = (Pointer) &sint;
6665 vn.next = NULL;
6666 slp = SeqLocMerge (bsp, &vn, NULL, FALSE, TRUE, FALSE);
6667 if (slp != NULL) {
6668 if (slp->choice == SEQLOC_INT) {
6669 sintp = (SeqIntPtr) slp->data.ptrvalue;
6670 if (sintp != NULL) {
6671 buf [0] = '\0';
6672 if (sintp->strand == Seq_strand_minus) {
6673 sprintf (buf, "%ld..%ld", (long) sintp->to + 1, (long) sintp->from + 1);
6674 } else {
6675 sprintf (buf, "%ld..%ld", (long) sintp->from + 1, (long) sintp->to + 1);
6676 }
6677 if (StringDoesHaveText (buf)) {
6678 gbq->val = MemFree (gbq->val);
6679 gbq->val = StringSave (buf);
6680 }
6681 }
6682 }
6683 SeqLocFree (slp);
6684 }
6685 }
6686 }
6687
6688 // Only for SegSets
PartDescToFeatProc(SeqDescrPtr sdp,Pointer userdata)6689 static void PartDescToFeatProc(
6690 SeqDescrPtr sdp,
6691 Pointer userdata
6692 )
6693
6694 {
6695 BioSourcePtr biop;
6696 BioseqPtr bsp;
6697 PubdescPtr pdp;
6698 SeqFeatPtr sfp;
6699
6700 if (sdp == NULL) return;
6701 bsp = (BioseqPtr) userdata;
6702 if (bsp == NULL) return;
6703
6704 if (sdp->choice == Seq_descr_pub) {
6705 pdp = (PubdescPtr) sdp->data.ptrvalue;
6706 if (pdp == NULL) return;
6707 sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_PUB, NULL);
6708 if (sfp != NULL) {
6709 sfp->data.value.ptrvalue = AsnIoMemCopy (pdp, (AsnReadFunc) PubdescAsnRead, (AsnWriteFunc) PubdescAsnWrite);
6710 }
6711 } else if (sdp->choice == Seq_descr_source) {
6712 biop = (BioSourcePtr) sdp->data.ptrvalue;
6713 if (biop == NULL) return;
6714 sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
6715 if (sfp != NULL) {
6716 sfp->data.value.ptrvalue = AsnIoMemCopy (biop, (AsnReadFunc) BioSourceAsnRead, (AsnWriteFunc) BioSourceAsnWrite);
6717 }
6718 }
6719 }
6720
6721 // Only for SegSets
MovePartDescToFeat(BioseqPtr bsp,Pointer userdata)6722 static void MovePartDescToFeat (
6723 BioseqPtr bsp,
6724 Pointer userdata
6725 )
6726
6727 {
6728 if (bsp == NULL) return;
6729 if (bsp->repr == Seq_repr_virtual) return;
6730 VisitDescriptorsOnBsp (bsp, (Pointer) bsp, PartDescToFeatProc);
6731 }
6732
6733 // Only for SegSets
PartCitSubDatesCompProc(SeqDescrPtr sdp,Pointer userdata)6734 static void PartCitSubDatesCompProc(
6735 SeqDescrPtr sdp,
6736 Pointer userdata
6737 )
6738
6739 {
6740 Int2 compare;
6741 CitSubPtr csp;
6742 DatePtr PNTR dpp;
6743 PubdescPtr pdp;
6744 ValNodePtr vnp;
6745
6746 if (sdp == NULL || sdp->choice != Seq_descr_pub) return;
6747 dpp = (DatePtr PNTR) userdata;
6748 if (dpp == NULL) return;
6749
6750 pdp = (PubdescPtr) sdp->data.ptrvalue;
6751 if (pdp == NULL) return;
6752
6753 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
6754 if (vnp->choice != PUB_Sub) continue;
6755 csp = (CitSubPtr) vnp->data.ptrvalue;
6756 if (csp == NULL || csp->date == NULL) continue;
6757 if (*dpp == NULL) {
6758 *dpp = csp->date;
6759 } else {
6760 compare = DateMatch (*dpp, csp->date, TRUE);
6761 if (compare == 1) {
6762 *dpp = csp->date;
6763 }
6764 }
6765 }
6766 }
6767
6768
6769 // Only for SegSets
CompPartCitSubDates(BioseqPtr bsp,Pointer userdata)6770 static void CompPartCitSubDates(
6771 BioseqPtr bsp,
6772 Pointer userdata
6773 )
6774
6775 {
6776 if (bsp == NULL) return;
6777 VisitDescriptorsOnBsp (bsp, userdata, PartCitSubDatesCompProc);
6778 }
6779
6780 // Only for SegSets
PartCitSubDatesSyncProc(SeqDescrPtr sdp,Pointer userdata)6781 static void PartCitSubDatesSyncProc (
6782 SeqDescrPtr sdp,
6783 Pointer userdata
6784 )
6785
6786 {
6787 CitSubPtr csp;
6788 DatePtr dp;
6789 PubdescPtr pdp;
6790 ValNodePtr vnp;
6791
6792 if (sdp == NULL || sdp->choice != Seq_descr_pub) return;
6793 dp = (DatePtr) userdata;
6794 if (dp == NULL) return;
6795
6796 pdp = (PubdescPtr) sdp->data.ptrvalue;
6797 if (pdp == NULL) return;
6798
6799 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
6800 if (vnp->choice != PUB_Sub) continue;
6801 csp = (CitSubPtr) vnp->data.ptrvalue;
6802 if (csp == NULL || csp->date == NULL) continue;
6803 if (DateMatch (csp->date, dp, TRUE) != 0) {
6804 csp->date = DateFree (csp->date);
6805 csp->date = DateDup (dp);
6806 }
6807 }
6808 }
6809
6810 // Only for SegSets
SyncPartCitSubDates(BioseqPtr bsp,Pointer userdata)6811 static void SyncPartCitSubDates (
6812 BioseqPtr bsp,
6813 Pointer userdata
6814 )
6815
6816 {
6817 VisitDescriptorsOnBsp (bsp, userdata, PartCitSubDatesSyncProc);
6818 }
6819
6820 // Only for SegSets
SegSeqNullToVirtual(SeqEntryPtr sep)6821 extern void SegSeqNullToVirtual (SeqEntryPtr sep)
6822
6823 {
6824 BioseqSetPtr bssp;
6825 Uint2 entityID;
6826 GBData gbd;
6827 Boolean hasnulls = FALSE;
6828 Boolean hasvirt = FALSE;
6829 Boolean missingMolInfo = FALSE;
6830 Uint1 mol = 0;
6831 SeqEntryPtr partssep = NULL;
6832 BioseqSetPtr partsset = NULL;
6833 BioseqPtr segbsp = NULL;
6834 SeqEntryPtr segsep = NULL;
6835 SeqEntryPtr segseq = NULL;
6836 BioseqSetPtr segset = NULL;
6837 SeqLocPtr slp;
6838 BioseqPtr virtbsp;
6839 ValNode vn;
6840 SeqLocPtr vslp;
6841
6842 if (sep == NULL) return;
6843
6844 if (sep == NULL) return;
6845 if (! IS_Bioseq_set (sep)) return;
6846 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6847 if (bssp == NULL) return;
6848
6849 /* skip pop/phy/mut/eco/wgs sets for now */
6850 if (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) return;
6851 if (bssp->_class == BioseqseqSet_class_wgs_set) return;
6852 if (bssp->_class == BioseqseqSet_class_small_genome_set) return;
6853
6854 /* find SeqEntryPtr parent of single seg set */
6855 SeqEntryExplore (sep, (Pointer) &segsep, FindSegSet);
6856 if (segsep == NULL) return;
6857
6858 /* do not handle just segset without nucprot set wrapper for now */
6859 if (segsep == sep) return;
6860
6861 /* skip the few cases of mixed molecule types */
6862 VisitBioseqsInSep (segsep, (Pointer) &mol, LookForMixedMols);
6863 if (mol != Seq_mol_dna && mol != Seq_mol_rna && mol != Seq_mol_aa && mol != Seq_mol_na && mol != Seq_mol_other) return;
6864
6865 /* skip mixed gbblock source or origin fields for now */
6866 MemSet ((Pointer) &gbd, 0, sizeof (gbd));
6867 VisitDescriptorsInSep (sep, (Pointer) &gbd, FindGenBankDiffs);
6868 if (gbd.mixedsources || gbd.mixedorigins) return;
6869
6870 /* avoid copying protein molinfo if nucleotide molinfo is missing */
6871 VisitBioseqsInSep (sep, (Pointer) &missingMolInfo, CheckForMissingMolInfo);
6872 if (missingMolInfo) return;
6873
6874 if (! IS_Bioseq_set (segsep)) return;
6875 segset = (BioseqSetPtr) segsep->data.ptrvalue;
6876 if (segset == NULL) return;
6877 segseq = segset->seq_set;
6878 if (segseq == NULL) return;
6879 if (! IS_Bioseq (segseq)) return;
6880 segbsp = (BioseqPtr) segseq->data.ptrvalue;
6881 if (segbsp == NULL) return;
6882 if (segbsp->repr != Seq_repr_seg) return;
6883 partssep = segseq->next;
6884 if (partssep == NULL) return;
6885 if (! IS_Bioseq_set (partssep)) return;
6886 partsset = (BioseqSetPtr) partssep->data.ptrvalue;
6887 if (partsset == NULL) return;
6888
6889 entityID = segbsp->idx.entityID;
6890
6891 /* check to see if it doesn't need conversion */
6892 MemSet ((Pointer) &vn, 0, sizeof (vn));
6893 vn.choice = SEQLOC_MIX;
6894 vn.extended = 0;
6895 vn.data.ptrvalue = segbsp->seq_ext;
6896 vn.next = NULL;
6897 slp = SeqLocFindNext (&vn, NULL);
6898 while (slp != NULL) {
6899 if (slp->choice == SEQLOC_NULL) {
6900 hasnulls = TRUE;
6901 }
6902 slp = SeqLocFindNext (&vn, slp);
6903 }
6904
6905 /* if no nulls or virtuals, add a null and then fill in between all components */
6906 if (! hasnulls) {
6907 if (! hasvirt) {
6908 slp = SeqLocFindNext (&vn, NULL);
6909 if (slp != NULL) {
6910 vslp = ValNodeNew (NULL);
6911 if (vslp != NULL) {
6912 vslp->choice = SEQLOC_NULL;
6913 vslp->next = slp->next;
6914 slp->next = vslp;
6915 NormalizeNullsBetween (&vn);
6916 hasnulls = TRUE;
6917 }
6918 }
6919 }
6920 }
6921
6922 if (! hasnulls) return;
6923
6924 /* virtual part of conventional 100 base gap length */
6925 virtbsp = BioseqNew ();
6926 if (virtbsp == NULL) return;
6927 virtbsp->id = MakeUniqueSeqID ("virtual_");
6928 virtbsp->repr = Seq_repr_virtual;
6929 virtbsp->mol = segbsp->mol;
6930 virtbsp->seq_data_type = 0;
6931 virtbsp->seq_ext_type = 0;
6932 virtbsp->seq_data = NULL;
6933 virtbsp->seq_ext = NULL;
6934 virtbsp->length = 100;
6935
6936 /* put virtual segments between real segments */
6937 MemSet ((Pointer) &vn, 0, sizeof (vn));
6938 vn.choice = SEQLOC_MIX;
6939 vn.extended = 0;
6940 vn.data.ptrvalue = segbsp->seq_ext;
6941 vn.next = NULL;
6942 slp = SeqLocFindNext (&vn, NULL);
6943 while (slp != NULL) {
6944 if (slp->choice == SEQLOC_NULL) {
6945 slp->choice = SEQLOC_WHOLE;
6946 slp->data.ptrvalue = SeqIdDup (virtbsp->id);
6947 segbsp->length += virtbsp->length;
6948 }
6949 slp = SeqLocFindNext (&vn, slp);
6950 }
6951
6952 /* package virtual bioseq in parts */
6953 ValNodeAddPointer (&(partsset->seq_set), 1, (Pointer) virtbsp);
6954
6955 /* reindex for new segmented bioseq length */
6956 SeqMgrIndexFeatures (entityID, NULL);
6957 }
6958
6959 // Only for SegSets
ForceSegSeqNullToVirtual(SeqEntryPtr sep)6960 static void ForceSegSeqNullToVirtual (SeqEntryPtr sep)
6961
6962 {
6963 Uint2 entityID;
6964 Boolean hasnulls = FALSE;
6965 Boolean hasvirt = FALSE;
6966 SeqEntryPtr partssep = NULL;
6967 BioseqSetPtr partsset = NULL;
6968 BioseqPtr segbsp = NULL;
6969 SeqEntryPtr segsep = NULL;
6970 SeqEntryPtr segseq = NULL;
6971 BioseqSetPtr segset = NULL;
6972 SeqLocPtr slp;
6973 BioseqPtr vbsp;
6974 BioseqPtr virtbsp;
6975 ValNode vn;
6976 SeqLocPtr vslp;
6977
6978 if (sep == NULL) return;
6979
6980 SeqEntryExplore (sep, (Pointer) &segsep, FindSegSet);
6981 if (segsep == NULL) return;
6982
6983 if (! IS_Bioseq_set (segsep)) return;
6984 segset = (BioseqSetPtr) segsep->data.ptrvalue;
6985 if (segset == NULL) return;
6986 segseq = segset->seq_set;
6987 if (segseq == NULL) return;
6988 if (! IS_Bioseq (segseq)) return;
6989 segbsp = (BioseqPtr) segseq->data.ptrvalue;
6990 if (segbsp == NULL) return;
6991 if (segbsp->repr != Seq_repr_seg) return;
6992 partssep = segseq->next;
6993 if (partssep == NULL) return;
6994 if (! IS_Bioseq_set (partssep)) return;
6995 partsset = (BioseqSetPtr) partssep->data.ptrvalue;
6996 if (partsset == NULL) return;
6997
6998 entityID = segbsp->idx.entityID;
6999
7000 /* check to see if it doesn't need conversion */
7001 MemSet ((Pointer) &vn, 0, sizeof (vn));
7002 vn.choice = SEQLOC_MIX;
7003 vn.extended = 0;
7004 vn.data.ptrvalue = segbsp->seq_ext;
7005 vn.next = NULL;
7006 slp = SeqLocFindNext (&vn, NULL);
7007 while (slp != NULL) {
7008 if (slp->choice == SEQLOC_NULL) {
7009 hasnulls = TRUE;
7010 } else {
7011 vbsp = BioseqFindFromSeqLoc (slp);
7012 if (vbsp != NULL && vbsp->repr == Seq_repr_virtual) {
7013 hasvirt = TRUE;
7014 }
7015 }
7016 slp = SeqLocFindNext (&vn, slp);
7017 }
7018
7019 /* if no nulls or virtuals, add a null and then fill in between all components */
7020 if (! hasnulls) {
7021 if (! hasvirt) {
7022 slp = SeqLocFindNext (&vn, NULL);
7023 if (slp != NULL) {
7024 vslp = ValNodeNew (NULL);
7025 if (vslp != NULL) {
7026 vslp->choice = SEQLOC_NULL;
7027 vslp->next = slp->next;
7028 slp->next = vslp;
7029 NormalizeNullsBetween (&vn);
7030 hasnulls = TRUE;
7031 }
7032 }
7033 }
7034 }
7035
7036 if (! hasnulls) return;
7037
7038 /* virtual part of conventional 100 base gap length */
7039 virtbsp = BioseqNew ();
7040 if (virtbsp == NULL) return;
7041 virtbsp->id = MakeUniqueSeqID ("virtual_");
7042 virtbsp->repr = Seq_repr_virtual;
7043 virtbsp->mol = segbsp->mol;
7044 virtbsp->seq_data_type = 0;
7045 virtbsp->seq_ext_type = 0;
7046 virtbsp->seq_data = NULL;
7047 virtbsp->seq_ext = NULL;
7048 virtbsp->length = 100;
7049
7050 /* put virtual segments between real segments */
7051 MemSet ((Pointer) &vn, 0, sizeof (vn));
7052 vn.choice = SEQLOC_MIX;
7053 vn.extended = 0;
7054 vn.data.ptrvalue = segbsp->seq_ext;
7055 vn.next = NULL;
7056 slp = SeqLocFindNext (&vn, NULL);
7057 while (slp != NULL) {
7058 if (slp->choice == SEQLOC_NULL) {
7059 slp->choice = SEQLOC_WHOLE;
7060 slp->data.ptrvalue = SeqIdDup (virtbsp->id);
7061 segbsp->length += virtbsp->length;
7062 }
7063 slp = SeqLocFindNext (&vn, slp);
7064 }
7065
7066 /* package virtual bioseq in parts */
7067 ValNodeAddPointer (&(partsset->seq_set), 1, (Pointer) virtbsp);
7068
7069 /* reindex for new segmented bioseq length */
7070 SeqMgrIndexFeatures (entityID, NULL);
7071 }
7072
7073 // Only for SegSets
ConvertSegSetToDeltaSeqInt(SeqEntryPtr sep,Uint2 entityID,Boolean cleanup)7074 static Boolean ConvertSegSetToDeltaSeqInt(SeqEntryPtr sep, Uint2 entityID, Boolean cleanup)
7075
7076 {
7077 BioseqSetPtr bssp;
7078 GBData gbd;
7079 DatePtr cp = NULL;
7080 DatePtr dp = NULL;
7081 BioseqPtr deltabsp;
7082 GBBlockPtr gbp = NULL;
7083 MolInfoPtr mip = NULL;
7084 Boolean missingMolInfo = FALSE;
7085 Uint1 mol = 0;
7086 ObjValNodePtr ovp;
7087 SeqEntryPtr partssep = NULL;
7088 BioseqPtr segbsp = NULL;
7089 SeqEntryPtr segsep = NULL;
7090 SeqEntryPtr segseq = NULL;
7091 BioseqSetPtr segset = NULL;
7092 SeqIdPtr tmpid;
7093 CharPtr str, ttl = NULL;
7094 ValNodePtr vnp;
7095
7096 if (sep == NULL) return FALSE;
7097 if (! IS_Bioseq_set (sep)) return FALSE;
7098 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7099 if (bssp == NULL) return FALSE;
7100
7101 /* skip pop/phy/mut/eco/wgs sets for now */
7102 if (bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) return FALSE;
7103 if (bssp->_class == BioseqseqSet_class_wgs_set) return FALSE;
7104 if (bssp->_class == BioseqseqSet_class_small_genome_set) return FALSE;
7105
7106 /* find SeqEntryPtr parent of single seg set */
7107 SeqEntryExplore (sep, (Pointer) &segsep, FindSegSet);
7108 if (segsep == NULL) return FALSE;
7109
7110 /* do not handle just segset without nucprot set wrapper for now */
7111 if (segsep == sep) return FALSE;
7112
7113 /* skip the few cases of mixed molecule types */
7114 VisitBioseqsInSep (segsep, (Pointer) &mol, LookForMixedMols);
7115 if (mol != Seq_mol_dna && mol != Seq_mol_rna && mol != Seq_mol_aa && mol != Seq_mol_na && mol != Seq_mol_other) return FALSE;
7116
7117 /* skip mixed gbblock source or origin fields for now */
7118 MemSet ((Pointer) &gbd, 0, sizeof (gbd));
7119 VisitDescriptorsInSep (sep, (Pointer) &gbd, FindGenBankDiffs);
7120 /*
7121 if (gbd.mixedsources || gbd.mixedorigins) return FALSE;
7122 */
7123
7124 /* avoid copying protein molinfo if nucleotide molinfo is missing */
7125 VisitBioseqsInSep (sep, (Pointer) &missingMolInfo, CheckForMissingMolInfo);
7126 if (missingMolInfo) return FALSE;
7127
7128 /*
7129 ConvertSegSetsToDeltaSequences (sep);
7130 */
7131
7132 if (! IS_Bioseq_set (segsep)) return FALSE;
7133 segset = (BioseqSetPtr) segsep->data.ptrvalue;
7134 if (segset == NULL) return FALSE;
7135 segseq = segset->seq_set;
7136 if (segseq == NULL) return FALSE;
7137 if (! IS_Bioseq (segseq)) return FALSE;
7138 segbsp = (BioseqPtr) segseq->data.ptrvalue;
7139 if (segbsp == NULL) return FALSE;
7140 if (segbsp->repr != Seq_repr_seg) return FALSE;
7141 partssep = segseq->next;
7142 if (partssep == NULL) return FALSE;
7143
7144 /* synchronize dates of cit-sub descriptors on parts */
7145 VisitBioseqsInSep (partssep, (Pointer) &dp, CompPartCitSubDates);
7146 cp = DateDup (dp);
7147 VisitBioseqsInSep (partssep, (Pointer) cp, SyncPartCitSubDates);
7148 DateFree (cp);
7149 /* move pub descriptors up from parts before adding virtual sequence */
7150 SeqEntryPubsAsn4 (sep, FALSE);
7151
7152 /* then convert any remaining pub and source descriptors on parts to features */
7153 VisitBioseqsInSep (partssep, NULL, MovePartDescToFeat);
7154
7155 /* put virtual segments of conventional 100 bases between real segments */
7156 ForceSegSeqNullToVirtual (sep);
7157
7158 /* map feature locations to segmented bioseq */
7159 VisitFeaturesInSep (sep, NULL, MapSegFeatToMaster);
7160
7161 VisitDescriptorsInSep (segsep, (Pointer) &gbp, CopyFirstGBBlock);
7162 VisitDescriptorsInSep (segsep, (Pointer) &mip, CopyFirstMolInfo);
7163 VisitDescriptorsInSep (segsep, (Pointer) &ttl, CopyFirstTitle);
7164
7165 if (gbp == NULL) {
7166 gbp = GBBlockNew ();
7167 }
7168 /* populate secondary accessions */
7169 VisitBioseqsInSep (partssep, (Pointer) gbp, AddPartAccns);
7170
7171 /*
7172 deltabsp = GetDeltaSeqFromMasterSeg (segbsp);
7173 */
7174
7175 deltabsp = BioseqNew ();
7176 if (deltabsp == NULL) return FALSE;
7177 deltabsp->id = MakeUniqueSeqID ("delta_");
7178 deltabsp->repr = Seq_repr_delta;
7179 deltabsp->mol = segbsp->mol;
7180 deltabsp->seq_data_type = 0;
7181 deltabsp->seq_ext_type = 4;
7182 deltabsp->seq_data = NULL;
7183 deltabsp->seq_ext = NULL;
7184 deltabsp->length = 0;
7185
7186 /* populate Seq-hist.replaces */
7187 VisitBioseqsInSep (partssep, (Pointer) deltabsp, AddPartHist);
7188
7189 /* construct delta seq from segmented parts */
7190 SeqMgrExploreSegments (segbsp, (Pointer) deltabsp, AddSegToDeltaSeq);
7191
7192 /* move features in segset to delta seq */
7193 SeqEntryExplore (segsep, (Pointer) deltabsp, MoveAnnotsToDeltaSeq);
7194
7195 /* insert delta sequence into chain */
7196 vnp = SeqDescrNew (NULL);
7197 if (vnp == NULL) return FALSE;
7198 vnp->choice = 1;
7199 vnp->data.ptrvalue = (Pointer) deltabsp;
7200 vnp->next = segsep->next;
7201 segsep->next = vnp;
7202
7203 /* keep segmented bioseq IDs */
7204 tmpid = segbsp->id;
7205 segbsp->id = deltabsp->id;
7206 deltabsp->id = tmpid;
7207
7208 SeqMgrDeleteFromBioseqIndex (segbsp);
7209 SeqMgrReplaceInBioseqIndex (deltabsp);
7210
7211 /* remove old segset */
7212 segset->idx.deleteme = TRUE;
7213 DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
7214
7215 /*
7216 BioseqFree (virtbsp);
7217 */
7218
7219 /* add descriptors */
7220 if (ttl != NULL) {
7221 SeqDescrAddPointer (&(deltabsp->descr), Seq_descr_title, (Pointer) ttl);
7222 }
7223 if (mip != NULL) {
7224 SeqDescrAddPointer (&(deltabsp->descr), Seq_descr_molinfo, (Pointer) mip);
7225 }
7226 if (gbp != NULL) {
7227 SeqDescrAddPointer (&(deltabsp->descr), Seq_descr_genbank, (Pointer) gbp);
7228 }
7229
7230 BioseqPack (deltabsp);
7231
7232 /* indexes are out of date */
7233 SeqMgrClearFeatureIndexes (entityID, NULL);
7234
7235 if (cleanup) {
7236 SeriousSeqEntryCleanup (sep, NULL, NULL);
7237 RemoveAllNcbiCleanupUserObjects (sep);
7238 }
7239
7240 return TRUE;
7241 }
7242
7243 // Only for SegSets
ConvertSegSetToDeltaSeqEx(SeqEntryPtr sep,Boolean cleanup)7244 extern Boolean ConvertSegSetToDeltaSeqEx (SeqEntryPtr sep, Boolean cleanup)
7245
7246 {
7247 BioseqSetPtr bssp;
7248 Uint2 entityID;
7249 Boolean res = TRUE;
7250 SeqEntryPtr tmp;
7251
7252 if (sep == NULL) return FALSE;
7253 if (! IS_Bioseq_set (sep)) return FALSE;
7254 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7255 if (bssp == NULL) return FALSE;
7256
7257 entityID = ObjMgrGetEntityIDForChoice (sep);
7258
7259 if ((bssp->_class >= BioseqseqSet_class_mut_set && bssp->_class <= BioseqseqSet_class_eco_set) ||
7260 bssp->_class == BioseqseqSet_class_wgs_set ||
7261 bssp->_class == BioseqseqSet_class_small_genome_set) {
7262 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
7263 ConvertSegSetToDeltaSeqInt (tmp, entityID, cleanup);
7264 }
7265 } else {
7266
7267 res = ConvertSegSetToDeltaSeqInt (sep, entityID, cleanup);
7268 }
7269
7270 /* in case segset was not marked */
7271 VisitSetsInSep (sep, NULL, MarkSegSets);
7272
7273 /* remove old segset */
7274 DeleteMarkedObjects (0, OBJ_SEQENTRY, sep);
7275
7276 /* indexes are out of date */
7277 SeqMgrClearFeatureIndexes (entityID, NULL);
7278
7279 SeriousSeqEntryCleanup (sep, NULL, NULL);
7280 RemoveAllNcbiCleanupUserObjects (sep);
7281
7282 return res;
7283 }
7284
ConvertSegSetToDeltaSeq(SeqEntryPtr sep)7285 extern Boolean ConvertSegSetToDeltaSeq (SeqEntryPtr sep)
7286
7287 {
7288 return ConvertSegSetToDeltaSeqEx (sep, TRUE);
7289 }
7290
7291 // Only for SegSets
ConvPartDescToFeatCallback(BioseqSetPtr bssp,Pointer userdata)7292 static void ConvPartDescToFeatCallback(
7293 BioseqSetPtr bssp,
7294 Pointer userdata
7295 )
7296
7297 {
7298 if (bssp == NULL) return;
7299 if (bssp->_class != BioseqseqSet_class_parts) return;
7300
7301 VisitBioseqsInSet (bssp, NULL, MovePartDescToFeat);
7302 }
7303
7304 // Only for SegSets
ConvertPartDescToFeat(SeqEntryPtr sep)7305 extern void ConvertPartDescToFeat (SeqEntryPtr sep)
7306
7307 {
7308 VisitSetsInSep (sep, NULL, ConvPartDescToFeatCallback);
7309 }
7310
7311 // Not used
SimpleAutoDef(SeqEntryPtr sep)7312 extern void SimpleAutoDef (SeqEntryPtr sep)
7313
7314 {
7315 BioseqPtr bsp = NULL;
7316 BioseqSetPtr bssp = NULL;
7317 ValNodePtr defline_clauses = NULL;
7318 Uint2 entityID = 0;
7319 DeflineFeatureRequestList feature_requests;
7320 Int4 index;
7321 ValNodePtr modifier_indices = NULL;
7322 ModifierItemLocalPtr modList;
7323 OrganismDescriptionModifiers odmp;
7324 SeqEntryPtr oldscope;
7325
7326 if (sep == NULL) return;
7327 if (IS_Bioseq (sep)) {
7328 bsp = (BioseqPtr) sep->data.ptrvalue;
7329 if (bsp == NULL) return;
7330 entityID = bsp->idx.entityID;
7331 } else if (IS_Bioseq_set (sep)) {
7332 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7333 if (bssp == NULL) return;
7334 entityID = bssp->idx.entityID;
7335 } else return;
7336
7337 if (entityID < 1) return;
7338
7339 modList = MemNew (NumDefLineModifiers () * sizeof (ModifierItemLocalData));
7340 if (modList == NULL) return;
7341
7342 InitFeatureRequests (&feature_requests);
7343
7344 SetRequiredModifiers (modList);
7345 CountModifiers (modList, sep);
7346
7347 InitOrganismDescriptionModifiers (&odmp, sep);
7348
7349 RemoveNucProtSetTitles (sep);
7350 oldscope = SeqEntrySetScope (sep);
7351
7352 BuildDefLineFeatClauseList (sep, entityID, &feature_requests,
7353 DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE,
7354 &defline_clauses);
7355 if (AreFeatureClausesUnique (defline_clauses)) {
7356 modifier_indices = GetModifierIndicesFromModList (modList);
7357 } else {
7358 modifier_indices = FindBestModifiers (sep, modList);
7359 }
7360
7361 BuildDefinitionLinesFromFeatureClauseLists (defline_clauses, modList,
7362 modifier_indices, &odmp);
7363 DefLineFeatClauseListFree (defline_clauses);
7364 if (modList != NULL) {
7365 for (index = 0; index < NumDefLineModifiers (); index++) {
7366 ValNodeFree (modList [index].values_seen);
7367 }
7368 MemFree (modList);
7369 }
7370 modifier_indices = ValNodeFree (modifier_indices);
7371
7372 ClearProteinTitlesInNucProts (entityID, NULL);
7373 InstantiateProteinTitles (entityID, NULL);
7374 /*
7375 RemovePopsetTitles (sep);
7376 */
7377 AddPopsetTitles (sep, &feature_requests, DEFAULT_ORGANELLE_CLAUSE, FALSE, FALSE);
7378
7379 SeqEntrySetScope (oldscope);
7380 }
7381 //LCOV_EXCL_STOP
7382
7383