1 /* objsset.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: objsset.c
27 *
28 * Author: James Ostell
29 *
30 * Version Creation Date: 4/1/91
31 *
32 * $Revision: 6.19 $
33 *
34 * File Description: Object manager for module NCBI-Seqset
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 /** for ErrPostEx() ****/
46
47 static char *this_module = "ncbiobj";
48 #define THIS_MODULE this_module
49 static char *this_file = __FILE__;
50 #define THIS_FILE this_file
51
52 /**********************/
53
54 #include <objsset.h> /* the Bioseq-set interface */
55 #include <objmgr.h>
56 #include <sequtil.h> /* used by SeqEntryAsnGet() */
57 #include <asnsset.h> /* the AsnTool header */
58
59 static Boolean loaded = FALSE;
60
61 /*****************************************************************************
62 *
63 * BioseqSet ObjMgr Routines
64 *
65 *****************************************************************************/
66 static CharPtr bioseqsettypename = "BioseqSet";
67
BioseqSetNewFunc(void)68 static Pointer LIBCALLBACK BioseqSetNewFunc (void)
69 {
70 return (Pointer) BioseqSetNew();
71 }
72
BioseqSetFreeFunc(Pointer data)73 static Pointer LIBCALLBACK BioseqSetFreeFunc (Pointer data)
74 {
75 return (Pointer) BioseqSetFree ((BioseqSetPtr) data);
76 }
77
BioseqSetAsnWriteFunc(Pointer data,AsnIoPtr aip,AsnTypePtr atp)78 static Boolean LIBCALLBACK BioseqSetAsnWriteFunc (Pointer data, AsnIoPtr aip, AsnTypePtr atp)
79 {
80 return BioseqSetAsnWrite((BioseqSetPtr)data, aip, atp);
81 }
82
BioseqSetAsnReadFunc(AsnIoPtr aip,AsnTypePtr atp)83 static Pointer LIBCALLBACK BioseqSetAsnReadFunc (AsnIoPtr aip, AsnTypePtr atp)
84 {
85 return (Pointer) BioseqSetAsnRead (aip, atp);
86 }
87
88 typedef struct bestbioseqlabel {
89 BioseqPtr bsp;
90 Boolean is_na,
91 has_accession,
92 has_gb;
93 } BestBioseqLabel, PNTR BestBioseqLabelPtr;
94
FindBestBioseqLabel(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)95 static void FindBestBioseqLabel (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
96 {
97 BioseqPtr bsp;
98 Boolean is_na = FALSE,
99 has_accession = FALSE,
100 has_gb = FALSE,
101 longer = FALSE;
102 BestBioseqLabelPtr bblp;
103 SeqIdPtr sip;
104 TextSeqIdPtr tsip;
105 Boolean takeit = FALSE;
106
107 if (! IS_Bioseq(sep))
108 return;
109
110 bsp = (BioseqPtr)(sep->data.ptrvalue);
111 bblp = (BestBioseqLabelPtr)data;
112
113 is_na = ISA_na(bsp->mol);
114 for (sip = bsp->id; sip != NULL; sip = sip->next)
115 {
116 switch (sip->choice)
117 {
118 case SEQID_GENBANK:
119 case SEQID_EMBL:
120 case SEQID_DDBJ:
121 case SEQID_OTHER:
122 case SEQID_TPG:
123 case SEQID_TPE:
124 case SEQID_TPD:
125 case SEQID_GPIPE:
126 has_gb = TRUE;
127 case SEQID_PIR:
128 case SEQID_SWISSPROT:
129 case SEQID_PRF:
130 tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
131 if (tsip->accession != NULL)
132 has_accession = TRUE;
133 break;
134 default:
135 break;
136 }
137 }
138
139
140 if (bblp->bsp == NULL)
141 takeit = TRUE;
142 else
143 {
144 if (bsp->length > bblp->bsp->length)
145 longer = TRUE;
146 if (bblp->has_accession)
147 {
148 if (has_accession)
149 {
150 if (longer)
151 takeit = TRUE;
152 }
153 }
154 else if (has_accession)
155 takeit = TRUE;
156 else if (bblp->has_gb)
157 {
158 if (has_gb)
159 {
160 if (longer)
161 takeit = TRUE;
162 }
163 }
164 else if (has_gb)
165 takeit = TRUE;
166 else if (bblp->is_na)
167 {
168 if (is_na)
169 {
170 if (longer)
171 takeit = TRUE;
172 }
173 }
174 else if (is_na)
175 takeit = TRUE;
176 else if (longer)
177 takeit = TRUE;
178 }
179
180 if (takeit)
181 {
182 bblp->bsp = bsp;
183 bblp->has_accession = has_accession;
184 bblp->has_gb = has_gb;
185 bblp->is_na = is_na;
186 }
187
188 return;
189 }
190
BioseqSetLabelFunc(Pointer data,CharPtr buffer,Int2 buflen,Uint1 content)191 static Int2 LIBCALLBACK BioseqSetLabelFunc ( Pointer data, CharPtr buffer, Int2 buflen, Uint1 content)
192 {
193 return BioseqSetLabel ((BioseqSetPtr)data, buffer, buflen, content);
194 }
195
BioseqSetLabel(BioseqSetPtr bssp,CharPtr buffer,Int2 buflen,Uint1 content)196 NLM_EXTERN Int2 LIBCALL BioseqSetLabel (BioseqSetPtr bssp, CharPtr buffer, Int2 buflen, Uint1 content)
197 {
198 CharPtr set_name;
199 Int2 diff, len;
200 BestBioseqLabel bbl;
201 Char tbuf[40];
202
203 if ((bssp == NULL) || (buflen < 1))
204 return 0;
205
206 set_name = AsnEnumTypeStr(BIOSEQ_SET_class, (Int2)(bssp->_class));
207 len = buflen;
208
209 if (content == OM_LABEL_TYPE)
210 return LabelCopy(buffer, set_name, buflen);
211
212 if (content != OM_LABEL_CONTENT)
213 {
214 diff = LabelCopyExtra(buffer, set_name, buflen, NULL, ": ");
215 buflen -= diff;
216 buffer += diff;
217 }
218
219 MemSet(&bbl, 0, sizeof(BestBioseqLabel));
220 SeqEntryExplore(bssp->seq_set, (Pointer)&bbl, FindBestBioseqLabel);
221 tbuf[0] = '\0';
222
223 if (bbl.bsp == NULL)
224 StringMove(tbuf, "(No Bioseqs)");
225 else
226 SeqIdWrite(bbl.bsp->id, tbuf, PRINTID_FASTA_LONG, 39);
227
228 diff = LabelCopy(buffer, tbuf, buflen);
229 buflen -= diff;
230 buffer += diff;
231
232 return (len - buflen); /* no special SUMMARY yet */
233 }
234
BioseqSetSubTypeFunc(Pointer ptr)235 static Uint2 LIBCALLBACK BioseqSetSubTypeFunc (Pointer ptr)
236 {
237 if (ptr == NULL)
238 return 0;
239 return (Uint2)((BioseqSetPtr)ptr)->_class;
240 }
241
242 /*****************************************************************************
243 *
244 * SeqEntry ObjMgr Routines
245 *
246 *****************************************************************************/
247 static CharPtr seqentrytypename = "SeqEntry";
248
SeqEntryNewFunc(void)249 static Pointer LIBCALLBACK SeqEntryNewFunc (void)
250 {
251 return (Pointer) ValNodeNew(NULL);
252 }
253
SeqEntryFreeFunc(Pointer data)254 static Pointer LIBCALLBACK SeqEntryFreeFunc (Pointer data)
255 {
256 return (Pointer) SeqEntryFree ((SeqEntryPtr) data);
257 }
258
SeqEntryAsnWriteFunc(Pointer data,AsnIoPtr aip,AsnTypePtr atp)259 static Boolean LIBCALLBACK SeqEntryAsnWriteFunc (Pointer data, AsnIoPtr aip, AsnTypePtr atp)
260 {
261 return SeqEntryAsnWrite((SeqEntryPtr)data, aip, atp);
262 }
263
SeqEntryAsnReadFunc(AsnIoPtr aip,AsnTypePtr atp)264 static Pointer LIBCALLBACK SeqEntryAsnReadFunc (AsnIoPtr aip, AsnTypePtr atp)
265 {
266 return (Pointer) SeqEntryAsnRead (aip, atp);
267 }
268
SeqEntryLabelFunc(Pointer data,CharPtr buffer,Int2 buflen,Uint1 content)269 static Int2 LIBCALLBACK SeqEntryLabelFunc ( Pointer data, CharPtr buffer, Int2 buflen, Uint1 content)
270 {
271 return SeqEntryLabel((SeqEntryPtr)data, buffer, buflen, content);
272 }
273
SeqEntryLabel(SeqEntryPtr sep,CharPtr buffer,Int2 buflen,Uint1 content)274 NLM_EXTERN Int2 LIBCALL SeqEntryLabel (SeqEntryPtr sep, CharPtr buffer, Int2 buflen, Uint1 content)
275 {
276 if ((sep == NULL) || (buflen < 1))
277 return 0;
278
279 if (IS_Bioseq(sep))
280 return BioseqLabel((BioseqPtr)(sep->data.ptrvalue), buffer, buflen, content);
281 else
282 return BioseqSetLabel((BioseqSetPtr)(sep->data.ptrvalue), buffer, buflen, content);
283 }
284
SeqEntrySubTypeFunc(Pointer ptr)285 static Uint2 LIBCALLBACK SeqEntrySubTypeFunc (Pointer ptr)
286 {
287 if (ptr == NULL)
288 return 0;
289 return (Uint2)((SeqEntryPtr)ptr)->choice;
290 }
291
292 /*****************************************************************************
293 *
294 * SeqSetAsnLoad()
295 *
296 *****************************************************************************/
SeqSetAsnLoad(void)297 NLM_EXTERN Boolean LIBCALL SeqSetAsnLoad (void)
298 {
299 if (loaded)
300 return TRUE;
301 loaded = TRUE;
302
303 if (! GeneralAsnLoad())
304 {
305 loaded = FALSE;
306 return FALSE;
307 }
308 if (! SeqAsnLoad())
309 {
310 loaded = FALSE;
311 return FALSE;
312 }
313 if (! AsnLoad())
314 {
315 loaded = FALSE;
316 return FALSE;
317 }
318
319 ObjMgrTypeLoad(OBJ_BIOSEQSET, "Bioseq-set", bioseqsettypename, "Set of Biol. Sequences",
320 BIOSEQ_SET, BioseqSetNewFunc, BioseqSetAsnReadFunc, BioseqSetAsnWriteFunc,
321 BioseqSetFreeFunc, BioseqSetLabelFunc, BioseqSetSubTypeFunc);
322
323 ObjMgrTypeLoad(OBJ_SEQENTRY, "Seq-entry", seqentrytypename, "Sequence Entry",
324 SEQ_ENTRY, SeqEntryNewFunc, SeqEntryAsnReadFunc, SeqEntryAsnWriteFunc,
325 SeqEntryFreeFunc, SeqEntryLabelFunc, SeqEntrySubTypeFunc);
326
327 return TRUE;
328 }
329
330
331 /*****************************************************************************
332 *
333 * Bioseq-set Routines
334 *
335 *****************************************************************************/
336
337 /*****************************************************************************
338 *
339 * BioseqSetNew()
340 *
341 *****************************************************************************/
BioseqSetNew(void)342 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetNew (void)
343 {
344 BioseqSetPtr bsp;
345
346 bsp = (BioseqSetPtr)MemNew(sizeof(BioseqSet));
347 if (bsp == NULL) return bsp;
348 bsp->level = INT2_MIN;
349 SeqMgrAdd (SM_BIOSEQSET, (Pointer)bsp); /* add to BioseqSet list */
350 return bsp;
351 }
352
353 /*****************************************************************************
354 *
355 * BioseqSetFree(bsp)
356 * Frees one BioseqSet and associated data
357 *
358 *****************************************************************************/
BioseqSetFree(BioseqSetPtr bsp)359 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetFree (BioseqSetPtr bsp)
360 {
361 Boolean top = FALSE;
362
363 if (bsp == NULL)
364 return bsp;
365
366 if (bsp->idx.parentptr == NULL || bsp->idx.parenttype == OBJ_SEQSUB) {
367 if (bsp->seqentry != NULL) {
368 SeqMgrDeleteIndexesInRecord (bsp->seqentry);
369 top = TRUE;
370 }
371 }
372
373 BioseqSetFreeComponents(bsp, TRUE);
374
375 if (! SeqMgrDelete(SM_BIOSEQSET, (Pointer)bsp))
376 ErrPostEx(SEV_ERROR, 0,0, "BioseqSetFree: pointer not registered");
377
378 if (top) {
379 ObjMgrDeleteAllInRecord ();
380 }
381
382 return (BioseqSetPtr)MemFree(bsp);
383 }
384
385
ReverseSeqEntryList(SeqEntryPtr start)386 static SeqEntryPtr ReverseSeqEntryList (SeqEntryPtr start)
387 {
388 SeqEntryPtr first = NULL, next;
389
390 while (start != NULL) {
391 next = start->next;
392 start->next = first;
393 first = start;
394 start = next;
395 }
396
397 return first;
398 }
399
400 /*****************************************************************************
401 *
402 * BioseqSetFreeComponents(bsp, parts)
403 * Frees associated data of a BioseqSet
404 * if (parts == FALSE)
405 * Calls SeqEntryFreeComponents() for seq-set
406 * else
407 * Calls SeqEntryFree()
408 * Does not free the BioseqSet itself
409 * Called by BioseqSetFree
410 *
411 *****************************************************************************/
BioseqSetFreeComponents(BioseqSetPtr bsp,Boolean parts)412 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetFreeComponents (BioseqSetPtr bsp, Boolean parts)
413 {
414 SeqAnnotPtr sp, spnext;
415 SeqEntryPtr sep, sepnext;
416
417 if (bsp == NULL)
418 return bsp;
419
420 bsp->id = ObjectIdFree(bsp->id);
421 bsp->coll = DbtagFree(bsp->coll);
422 bsp->release = MemFree(bsp->release);
423 bsp->date = DateFree(bsp->date);
424 bsp->descr = SeqDescrFree(bsp->descr);
425 sep = bsp->seq_set;
426 /* reverse seq-set, to increase speed of freeing it, since the elements will have
427 * probably been added to the index in order, and it's faster to take the last out first.
428 */
429 sep = ReverseSeqEntryList(sep);
430 bsp->seq_set = sep;
431 while (sep != NULL)
432 {
433 sepnext = sep->next;
434 if (parts)
435 {
436 SeqEntryFree(sep);
437 }
438 else
439 {
440 SeqEntryFreeComponents(sep);
441 }
442 sep = sepnext;
443 }
444 sp = bsp->annot;
445 bsp->annot = NULL;
446 while (sp != NULL)
447 {
448 spnext = sp->next;
449 SeqAnnotFree(sp);
450 sp = spnext;
451 }
452 return bsp;
453 }
454 /*****************************************************************************
455 *
456 * BioseqSetAsnWrite(bsp, aip, atp)
457 * atp is the current type (if identifier of a parent struct)
458 * if atp == NULL, then assumes it stands alone (BioseqSet ::=)
459 *
460 *****************************************************************************/
BioseqSetAsnWrite(BioseqSetPtr bsp,AsnIoPtr aip,AsnTypePtr orig)461 NLM_EXTERN Boolean LIBCALL BioseqSetAsnWrite (BioseqSetPtr bsp, AsnIoPtr aip, AsnTypePtr orig)
462 {
463 DataVal av;
464 AsnTypePtr atp;
465 SeqEntryPtr sep;
466 Boolean retval = FALSE;
467
468 if (! loaded)
469 {
470 if (! SeqSetAsnLoad())
471 return FALSE;
472 }
473
474 if (aip == NULL)
475 return FALSE;
476
477 atp = AsnLinkType(orig, BIOSEQ_SET); /* link local tree */
478 if (atp == NULL) return FALSE;
479
480 if (bsp == NULL) { AsnNullValueMsg(aip, atp); goto erret; }
481
482 MemSet ((Pointer) (&av), 0, sizeof (DataVal));
483
484 if (! AsnOpenStruct(aip, atp, (Pointer)bsp)) goto erret;
485
486 if (bsp->id != NULL)
487 {
488 if (! ObjectIdAsnWrite(bsp->id, aip, BIOSEQ_SET_id)) goto erret;
489 }
490 if (bsp->coll != NULL)
491 {
492 if (! DbtagAsnWrite(bsp->coll, aip, BIOSEQ_SET_coll)) goto erret;
493 }
494 if (bsp->level != INT2_MIN)
495 {
496 av.intvalue = bsp->level;
497 if (! AsnWrite(aip, BIOSEQ_SET_level, &av)) goto erret;
498 }
499 if (bsp->_class != 0)
500 {
501 av.intvalue = bsp->_class;
502 if (! AsnWrite(aip, BIOSEQ_SET_class, &av)) goto erret;
503 }
504 if (bsp->release != NULL)
505 {
506 av.ptrvalue = bsp->release;
507 if (! AsnWrite(aip, BIOSEQ_SET_release, &av)) goto erret;
508 }
509 if (bsp->date != NULL)
510 {
511 if (! DateAsnWrite(bsp->date, aip, BIOSEQ_SET_date)) goto erret;
512 }
513 if (bsp->descr != NULL) /* Seq-descr optional */
514 {
515 if (! SeqDescrAsnWrite(bsp->descr, aip, BIOSEQ_SET_descr)) goto erret;
516 }
517
518 if (! AsnOpenStruct(aip, BIOSEQ_SET_seq_set, (Pointer)bsp->seq_set)) goto erret;
519 sep = bsp->seq_set;
520 while (sep != NULL)
521 {
522 if (! SeqEntryAsnWrite(sep, aip, BIOSEQ_SET_seq_set_E)) goto erret;
523 sep = sep->next;
524 }
525 if (! AsnCloseStruct(aip, BIOSEQ_SET_seq_set, (Pointer)bsp->seq_set)) goto erret;
526 if (bsp->annot != NULL) /* annotation optional */
527 {
528 if (! SeqAnnotSetAsnWrite(bsp->annot, aip, BIOSEQ_SET_annot, BIOSEQ_SET_annot_E)) goto erret;
529 }
530
531 if (! AsnCloseStruct(aip, atp, (Pointer)bsp)) goto erret;
532 retval = TRUE;
533 erret:
534 AsnUnlinkType(orig); /* unlink local tree */
535 return retval;
536 }
537
538 static void SeqDescrPack PROTO((ValNodePtr PNTR to, ValNodePtr PNTR from));
539
540 /*****************************************************************************
541 *
542 * SeqDescrPack(to, from)
543 * address of asnodeptr at head of list to add to
544 * address of asnodeptr at head of list to add from.
545 * sets *from to NULL on completion.
546 * deletes extra titles
547 *****************************************************************************/
SeqDescrPack(ValNodePtr PNTR to,ValNodePtr PNTR from)548 static void SeqDescrPack (ValNodePtr PNTR to, ValNodePtr PNTR from)
549 {
550 ValNodePtr currdescr, newdescr, tmpdescr;
551 Boolean have_title = FALSE;
552
553 currdescr = *to;
554 if (currdescr == NULL) /* nothing to merge with */
555 {
556 *to = *from;
557 *from = NULL;
558 return;
559 }
560
561 while (currdescr->next != NULL)
562 {
563 if (currdescr->choice == Seq_descr_title)
564 have_title = TRUE;
565 currdescr = currdescr->next;
566 }
567 if (currdescr->choice == Seq_descr_title)
568 have_title = TRUE;
569 newdescr = *from;
570 while (newdescr != NULL)
571 {
572 if ((have_title) && (newdescr->choice == Seq_descr_title))
573 {
574 tmpdescr = newdescr;
575 newdescr = tmpdescr->next;
576 MemFree(tmpdescr->data.ptrvalue);
577 MemFree(tmpdescr);
578 }
579 else
580 {
581 currdescr->next = newdescr;
582 newdescr = newdescr->next;
583 currdescr = currdescr->next;
584 }
585 }
586 *from = NULL;
587
588 return;
589 }
590
591 /*****************************************************************************
592 *
593 * BioseqSetAsnRead(aip, atp)
594 * atp is the current type (if identifier of a parent struct)
595 * assumption is readIdent has occurred
596 * if atp == NULL, then assumes it stands alone and read ident
597 * has not occurred.
598 *
599 *****************************************************************************/
BioseqSetAsnRead(AsnIoPtr aip,AsnTypePtr orig)600 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetAsnRead (AsnIoPtr aip, AsnTypePtr orig)
601 {
602 DataVal av;
603 AsnTypePtr atp, oldatp;
604 BioseqSetPtr bsp=NULL, tmp;
605 SeqEntryPtr curr, next, hold = NULL;
606 SeqAnnotPtr sap;
607 AsnOptionPtr aop;
608 Op_objssetPtr oop = NULL;
609 Op_objseqPtr osp = NULL;
610 Boolean this_one = TRUE,
611 get_bioseq = FALSE,
612 check_set = FALSE,
613 got_it = FALSE,
614 old_in_right_set = FALSE;
615
616
617 if (! loaded)
618 {
619 if (! SeqSetAsnLoad())
620 return bsp;
621 }
622
623 if (aip == NULL)
624 return bsp;
625
626 if ((aop = AsnIoOptionGet(aip, OP_NCBIOBJSSET, SEQENTRY_OPTION_MAX_COMPLEX, NULL)) != NULL)
627 {
628 this_one = FALSE;
629 check_set = TRUE;
630 oop = (Op_objssetPtr) aop->data.ptrvalue;
631 old_in_right_set = oop->in_right_set;
632 if (oop->retcode == 1) /* get bioseq */
633 get_bioseq = TRUE;
634 else
635 {
636 if((aop = AsnIoOptionGet(aip, OP_NCBIOBJSEQ, BIOSEQ_CHECK_ID, NULL))!=NULL)
637 osp = (Op_objseqPtr)aop->data.ptrvalue;
638 }
639 }
640
641 if (orig == NULL) /* BioseqSet ::= (self contained) */
642 atp = AsnReadId(aip, amp, BIOSEQ_SET);
643 else
644 atp = AsnLinkType(orig, BIOSEQ_SET); /* link in local tree */
645 oldatp = atp;
646 if (atp == NULL) return bsp;
647
648 bsp = BioseqSetNew();
649 if (bsp == NULL) goto erret;
650
651 if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* read the start struct */
652 curr = NULL;
653
654 while ((atp = AsnReadId(aip, amp, atp)) != oldatp)
655 {
656 if (atp == NULL) goto erret;
657 if (atp == BIOSEQ_SET_id)
658 {
659 bsp->id = ObjectIdAsnRead(aip, atp);
660 if (bsp->id == NULL) goto erret;
661 }
662 else if (atp == BIOSEQ_SET_coll)
663 {
664 bsp->coll = DbtagAsnRead(aip, atp);
665 if (bsp->coll == NULL) goto erret;
666 }
667 else if (atp == BIOSEQ_SET_date)
668 {
669 bsp->date = DateAsnRead(aip, atp);
670 if (bsp->date == NULL) goto erret;
671 }
672 else if (atp == BIOSEQ_SET_descr)
673 {
674 bsp->descr = SeqDescrAsnRead(aip, atp);
675 if (bsp->descr == NULL) goto erret;
676 }
677 else if (atp == BIOSEQ_SET_seq_set_E)
678 {
679 if (got_it) /* already have the entry we want */
680 AsnSkipValue(aip, atp);
681 else
682 {
683 if ((next = SeqEntryAsnRead(aip, atp)) != NULL)
684 {
685 if (IS_Bioseq(next))
686 SeqMgrConnect(SM_BIOSEQ, next->data.ptrvalue,
687 SM_BIOSEQSET, (Pointer) bsp);
688 else
689 SeqMgrConnect(SM_BIOSEQSET, next->data.ptrvalue,
690 SM_BIOSEQSET, (Pointer) bsp);
691
692 if (get_bioseq) /* will only be here if got it */
693 {
694 got_it = TRUE;
695 hold = next;
696 }
697 else if (check_set)
698 {
699 if (oop->working_on_set == 2) /* found set in lower level */
700 got_it = TRUE;
701 else if ((oop->retcode == 2 || oop->retcode == 3) && (! oop->in_right_set))
702 {
703 if (osp->found_it) /* found the component Bioseq */
704 {
705 got_it = TRUE;
706 oop->working_on_set = 2; /* all done */
707 }
708 }
709
710 if (got_it)
711 {
712 hold = next;
713 curr = bsp->seq_set; /* free sibling sets - not needed */
714 while (curr != NULL)
715 {
716 next = curr->next;
717 SeqEntryFree(curr);
718 curr = next;
719 }
720 bsp->seq_set = NULL;
721 }
722 }
723
724 if (! got_it)
725 {
726 if (curr == NULL)
727 bsp->seq_set = next;
728 else
729 curr->next = next;
730 curr = next;
731 }
732 }
733 }
734 }
735 else if (atp == BIOSEQ_SET_annot)
736 {
737 if ((get_bioseq) && (! got_it))
738 AsnSkipValue(aip, atp);
739 else
740 {
741 bsp->annot = SeqAnnotSetAsnRead(aip, atp, BIOSEQ_SET_annot_E);
742 /* if (bsp->annot == NULL) goto erret;n*/
743 }
744 }
745 else
746 {
747 if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* takes care of everything else */
748 if (atp == BIOSEQ_SET_level)
749 bsp->level = (Int2)av.intvalue;
750 else if (atp == BIOSEQ_SET_class)
751 {
752 bsp->_class = (Uint1)av.intvalue;
753 if ((! get_bioseq) && (check_set)) /* looking for a set */
754 {
755 switch (oop->retcode)
756 {
757 case 2: /* seg-set */
758 if (bsp->_class == 2)
759 this_one = TRUE;
760 break;
761 case 3: /* nuc-prot */
762 if (bsp->_class == 1)
763 this_one = TRUE;
764 break;
765 case 4: /* pub-set */
766 if (bsp->_class == 9)
767 this_one = TRUE;
768 break;
769 }
770 if (this_one)
771 oop->in_right_set = TRUE;
772 }
773 }
774 else if (atp == BIOSEQ_SET_release)
775 bsp->release = (CharPtr)av.ptrvalue;
776 }
777 }
778 if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* end BioseqSet */
779
780 if (check_set) /* check sets */
781 {
782 if (! got_it)
783 {
784 if (get_bioseq) /* can't use anything at this level */
785 {
786 return BioseqSetFree(bsp);
787 }
788
789 if (osp->found_it) /* found the contained Bioseq */
790 {
791 if (! this_one)
792 oop->working_on_set = 1;
793 else
794 oop->working_on_set = 2;
795 oop->in_right_set = old_in_right_set;
796 return bsp;
797 }
798 }
799
800 if (got_it)
801 {
802 if ((this_one) && (oop->working_on_set != 2))
803 {
804 oop->working_on_set = 2;
805 bsp->seq_set = hold;
806 }
807 /* copy the annot and descr if lower level is BioseqSet */
808 else if (IS_Bioseq_set(hold))
809 { /* make this smarter */
810 ObjMgrConnect (OBJ_BIOSEQSET, (Pointer) hold->data.ptrvalue, 0, NULL); /* disconnect */
811 tmp = (BioseqSetPtr)hold->data.ptrvalue;
812 hold->data.ptrvalue = NULL;
813 hold = SeqEntryFree (hold);
814 if (tmp->annot == NULL)
815 {
816 tmp->annot = bsp->annot;
817 }
818 else
819 {
820 sap = tmp->annot;
821 while (sap->next != NULL)
822 {
823 sap = sap->next;
824 }
825 sap->next = bsp->annot;
826 }
827 bsp->annot = NULL;
828 SeqDescrPack(&tmp->descr, &bsp->descr);
829 BioseqSetFree(bsp);
830 bsp = tmp;
831 }
832 else
833 {
834 bsp->seq_set = hold;
835 }
836 }
837 }
838
839 if (check_set)
840 oop->in_right_set = old_in_right_set;
841 ret:
842 AsnUnlinkType(orig); /* unlink local tree */
843 return bsp;
844 erret:
845 aip->io_failure = TRUE;
846 bsp = BioseqSetFree(bsp);
847 goto ret;
848 }
849
850 /*****************************************************************************
851 *
852 * SeqEntryNew()
853 *
854 *****************************************************************************/
SeqEntryNew(void)855 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryNew (void)
856 {
857 SeqEntryPtr sep;
858
859 sep = ValNodeNew(NULL);
860 if (sep == NULL) return sep;
861
862 return sep;
863 }
864
865 /*****************************************************************************
866 *
867 * SeqEntryFree(sep)
868 * Frees one SeqEntry and associated data
869 *
870 *****************************************************************************/
SeqEntryFree(SeqEntryPtr sep)871 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFree (SeqEntryPtr sep)
872 {
873 if (sep == NULL)
874 return sep;
875
876 if (sep->choice == 1)
877 BioseqFree((BioseqPtr)sep->data.ptrvalue);
878 else if (sep->choice == 2)
879 BioseqSetFree((BioseqSetPtr)sep->data.ptrvalue);
880
881 return (SeqEntryPtr)MemFree(sep);
882 }
883
884 /*****************************************************************************
885 *
886 * SeqEntryFreeComponents(sep)
887 * Frees components of elements associated with SeqEntry
888 * used by SeqMgr for caching out
889 *
890 *****************************************************************************/
SeqEntryFreeComponents(SeqEntryPtr sep)891 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFreeComponents (SeqEntryPtr sep)
892 {
893 if (sep == NULL)
894 return sep;
895
896 if (sep->choice == 1)
897 BioseqFreeComponents((BioseqPtr)sep->data.ptrvalue);
898 else if (sep->choice == 2)
899 BioseqSetFreeComponents((BioseqSetPtr)sep->data.ptrvalue, FALSE);
900
901 return sep;
902 }
903
904 /*****************************************************************************
905 *
906 * SeqEntryAsnWrite(sep, aip, atp)
907 * atp is the current type (if identifier of a parent struct)
908 * if atp == NULL, then assumes it stands alone (SeqEntry ::=)
909 *
910 *****************************************************************************/
SeqEntryAsnWrite(SeqEntryPtr sep,AsnIoPtr aip,AsnTypePtr orig)911 NLM_EXTERN Boolean LIBCALL SeqEntryAsnWrite (SeqEntryPtr sep, AsnIoPtr aip, AsnTypePtr orig)
912 {
913 AsnTypePtr atp;
914 DataVal av;
915 Boolean retval = FALSE;
916
917 if (! loaded)
918 {
919 if (! SeqSetAsnLoad())
920 return FALSE;
921 }
922
923 if (aip == NULL)
924 return FALSE;
925
926 atp = AsnLinkType(orig, SEQ_ENTRY); /* link local tree */
927 if (atp == NULL) return FALSE;
928
929 if (sep == NULL) { AsnNullValueMsg(aip, atp); goto erret; }
930
931 MemSet ((Pointer) (&av), 0, sizeof (DataVal));
932
933 av.ptrvalue = (Pointer)sep;
934 if (! AsnWriteChoice(aip, atp, (Int2)sep->choice, &av)) goto erret;
935 if (sep->choice == 1)
936 {
937 if (! BioseqAsnWrite((BioseqPtr)sep->data.ptrvalue, aip, SEQ_ENTRY_seq))
938 goto erret;
939 }
940 else if (sep->choice == 2)
941 {
942 if (! BioseqSetAsnWrite((BioseqSetPtr)sep->data.ptrvalue, aip, SEQ_ENTRY_set))
943 goto erret;
944 }
945 retval = TRUE;
946 erret:
947 AsnUnlinkType(orig);
948 return retval;
949 }
950
951 /*****************************************************************************
952 *
953 * SeqEntryAsnRead(aip, atp)
954 * atp is the current type (if identifier of a parent struct)
955 * assumption is readIdent has occurred
956 * if atp == NULL, then assumes it stands alone and read ident
957 * has not occurred.
958 *
959 *****************************************************************************/
SeqEntryAsnRead(AsnIoPtr aip,AsnTypePtr orig)960 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryAsnRead (AsnIoPtr aip, AsnTypePtr orig)
961 {
962 DataVal av;
963 AsnTypePtr atp;
964 SeqEntryPtr sep=NULL;
965 Uint1 type = 0;
966
967 if (! loaded)
968 {
969 if (! SeqSetAsnLoad())
970 return sep;
971 }
972
973 if (aip == NULL)
974 return sep;
975
976 if (orig == NULL) /* SeqEntry ::= (self contained) */
977 atp = AsnReadId(aip, amp, SEQ_ENTRY);
978 else
979 atp = AsnLinkType(orig, SEQ_ENTRY); /* link in local tree */
980 if (atp == NULL) return sep;
981
982 sep = SeqEntryNew();
983 if (sep == NULL) goto erret;
984
985 if (AsnReadVal(aip, atp, &av) <= 0) goto erret; /* read the CHOICE */
986 atp = AsnReadId(aip, amp, atp); if (atp == NULL) goto erret; /* which choice? */
987 if (atp == SEQ_ENTRY_seq)
988 {
989 sep->choice = 1;
990 sep->data.ptrvalue = (Pointer) BioseqAsnRead(aip, atp);
991 type = (Uint1)SM_BIOSEQ;
992 }
993 else if (atp == SEQ_ENTRY_set)
994 {
995 sep->choice = 2;
996 sep->data.ptrvalue = (Pointer) BioseqSetAsnRead(aip, atp);
997 type = (Uint1)SM_BIOSEQSET;
998 }
999
1000 if (sep->data.ptrvalue == NULL)
1001 goto erret;
1002 else
1003 SeqMgrSeqEntry(type, sep->data.ptrvalue, sep);
1004 ret:
1005 AsnUnlinkType(orig); /* unlink local tree */
1006 return sep;
1007 erret:
1008 aip->io_failure = TRUE;
1009 sep = SeqEntryFree(sep);
1010 goto ret;
1011 }
1012
1013 /*****************************************************************************
1014 *
1015 * Used by SeqEntryAsnGet to clear unwanted features
1016 *
1017 *****************************************************************************/
1018
1019 typedef struct secf {
1020 Int2 numseqids;
1021 SeqIdPtr PNTR seqids;
1022 } Secf, PNTR SecfPtr;
1023
1024 static void SeqEntryGetIds PROTO((SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent));
1025 static void SeqEntryDumpFeats PROTO((SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent));
1026 static void SeqEntryClearFeatures PROTO((SeqEntryPtr sep));
1027
1028 /*****************************************************************************
1029 *
1030 * SeqEntryAsnGet(aip, atp, sip, retcode)
1031 * atp is the current type (if identifier of a parent struct)
1032 * assumption is readIdent has occurred
1033 * if atp == NULL, then assumes it stands alone and read ident
1034 * has not occurred.
1035 * looks for entry with sip=SeqId
1036 * returns a collection of maximum complexity = retcode
1037 * where retcode =
1038 entry (0) , -- the "natural" entry for this (nuc-prot)
1039 bioseq (1) , -- only the bioseq identified
1040 bioseq-set (2) , -- any seg-set it may be part of
1041 nuc-prot (3) , -- any nuc-prot it may be part of
1042 pub-set (4) } DEFAULT entry }
1043 *
1044 *****************************************************************************/
SeqEntryAsnGet(AsnIoPtr aip,AsnTypePtr orig,SeqIdPtr sip,Int2 retcode)1045 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryAsnGet (AsnIoPtr aip, AsnTypePtr orig, SeqIdPtr sip, Int2 retcode)
1046 {
1047 AsnOptionPtr aop;
1048 Op_objssetPtr oop;
1049 Op_objseqPtr osp;
1050 SeqEntryPtr sep=NULL;
1051 BioseqSetPtr bssp;
1052 BioseqPtr bsp;
1053 SeqAnnotPtr sap;
1054 DataVal av;
1055 Boolean pack_it = FALSE;
1056
1057 if ((aip == NULL) || (sip == NULL) || (retcode < 0) || (retcode > 4))
1058 return sep;
1059
1060 av.realvalue = 0.0; /* just zeros it out to prevent debugger fuss */
1061 oop = (Op_objssetPtr)MemNew(sizeof(Op_objsset));
1062 if (oop == NULL) return sep;
1063
1064 oop->sip = sip;
1065 oop->retcode = retcode;
1066 av.ptrvalue = (Pointer) oop;
1067 aop = AsnIoOptionNew(aip, (Int2)OP_NCBIOBJSSET, (Int2)SEQENTRY_OPTION_MAX_COMPLEX, av, DefAsnOptionFree);
1068 if (aop == NULL)
1069 return sep;
1070
1071 osp = (Op_objseqPtr)MemNew(sizeof(Op_objseq));
1072 if (osp == NULL) return sep;
1073
1074 osp->sip = sip;
1075 if (retcode == 1) /* just after a Bioseq */
1076 osp->load_by_id = TRUE;
1077 av.ptrvalue = (Pointer) osp;
1078 aop = AsnIoOptionNew(aip, OP_NCBIOBJSEQ, BIOSEQ_CHECK_ID, av, DefAsnOptionFree);
1079 if (aop == NULL)
1080 return sep;
1081
1082 sep = SeqEntryAsnRead(aip, orig);
1083 if (sep == NULL) goto erret;
1084
1085 if ((retcode == 1) && (IS_Bioseq_set(sep)))
1086 pack_it = TRUE;
1087 else if ((retcode == 2) && (IS_Bioseq_set(sep)))
1088 { /* got Bioseq, but not part of seg-set */
1089 bssp = (BioseqSetPtr)sep->data.ptrvalue;
1090 if (bssp->_class != 2) /* not a seg-set */
1091 pack_it = TRUE;
1092 }
1093 else if ((retcode == 3) && (IS_Bioseq_set(sep)))
1094 { /* got Bioseq, but not part of nuc-prot set */
1095 bssp = (BioseqSetPtr)sep->data.ptrvalue;
1096 if (bssp->_class != 1) /* not a nuc-prot set */
1097 pack_it = TRUE;
1098 }
1099
1100
1101 if (! osp->found_it)
1102 {
1103 sep = SeqEntryFree(sep);
1104 }
1105 else if (pack_it) /* want Bioseq out of Bioseq-set */
1106 {
1107 bssp = (BioseqSetPtr)sep->data.ptrvalue;
1108 sep->data.ptrvalue = NULL;
1109 SeqEntryFree(sep);
1110 sep = bssp->seq_set;
1111 bssp->seq_set = NULL;
1112 bsp = (BioseqPtr)sep->data.ptrvalue;
1113 ObjMgrConnect(OBJ_BIOSEQ, (Pointer)bsp, 0, NULL); /* disconnect */
1114 SeqDescrPack(&bsp->descr, &bssp->descr);
1115 if (bsp->annot == NULL)
1116 bsp->annot = bssp->annot;
1117 else
1118 {
1119 sap = bsp->annot;
1120 while (sap->next != NULL)
1121 sap = sap->next;
1122 sap->next = bssp->annot;
1123 }
1124 bssp->annot = NULL;
1125 BioseqSetFree(bssp);
1126 }
1127
1128 if (retcode)
1129 SeqEntryClearFeatures(sep); /* clear unwanted features */
1130
1131 erret:
1132 AsnIoOptionFree(aip, OP_NCBIOBJSSET, SEQENTRY_OPTION_MAX_COMPLEX);
1133 AsnIoOptionFree(aip, OP_NCBIOBJSEQ, BIOSEQ_CHECK_ID);
1134 return sep;
1135 }
1136
1137 /*****************************************************************************
1138 *
1139 * void SeqEntryClearFeatures(sep)
1140 * clears features which do not refer to a Bioseq in sep
1141 *
1142 *****************************************************************************/
SeqEntryClearFeatures(SeqEntryPtr sep)1143 static void SeqEntryClearFeatures (SeqEntryPtr sep)
1144 {
1145 SecfPtr sp;
1146 Int2 bioseq_count;
1147
1148 if (sep == NULL)
1149 return;
1150
1151 sp = (SecfPtr)MemNew(sizeof(Secf));
1152 if (sp == NULL) return;
1153 bioseq_count = (Int2) SeqEntryCount(sep); /* overestimate */
1154 sp->numseqids = 0;
1155 sp->seqids = (SeqIdPtr PNTR)MemNew(bioseq_count * sizeof(SeqIdPtr));
1156 if (sp->seqids == NULL) return;
1157
1158 SeqEntryExplore(sep, (Pointer)sp, SeqEntryGetIds);
1159 SeqEntryExplore(sep, (Pointer)sp, SeqEntryDumpFeats);
1160 MemFree(sp->seqids);
1161 MemFree(sp);
1162 return;
1163 }
1164
1165 /*****************************************************************************
1166 *
1167 * SeqEntryGetIds()
1168 * SeqEntryExplore function to make a list of seqids of all bioseqs in
1169 * a seqentry.
1170 *
1171 *****************************************************************************/
SeqEntryGetIds(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1172 static void SeqEntryGetIds (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1173 {
1174 SecfPtr sfp;
1175
1176 if (IS_Bioseq(sep))
1177 {
1178 sfp = (SecfPtr)data;
1179 sfp->seqids[sfp->numseqids] = ((BioseqPtr)sep->data.ptrvalue)->id;
1180 sfp->numseqids++;
1181 }
1182 return;
1183 }
1184
1185 /*****************************************************************************
1186 *
1187 * SeqEntryDumpFeats()
1188 * SeqEntryExplore function to clear features which do not match a
1189 * list of seqids.
1190 *
1191 *****************************************************************************/
SeqEntryDumpFeats(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1192 static void SeqEntryDumpFeats (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1193 {
1194 SecfPtr sp;
1195 Int2 numids, i;
1196 SeqIdPtr PNTR sip;
1197 SeqIdPtr cursip;
1198 ValNode an;
1199 SeqAnnotPtr sap, prevsap, sapnext;
1200 SeqFeatPtr curr, next, prev;
1201 Boolean got_it;
1202
1203 sp = (SecfPtr)data;
1204 numids = sp->numseqids;
1205 sip = sp->seqids;
1206 an.choice = SEQLOC_WHOLE;
1207 if (IS_Bioseq(sep))
1208 sap = ((BioseqPtr)sep->data.ptrvalue)->annot;
1209 else
1210 sap = ((BioseqSetPtr)sep->data.ptrvalue)->annot;
1211 prevsap = NULL;
1212 while (sap != NULL)
1213 {
1214 if (sap->type == 1) /* feature table */
1215 {
1216 prev = NULL;
1217 curr = (SeqFeatPtr)sap->data;
1218 while (curr != NULL)
1219 {
1220 got_it = FALSE;
1221 for (i = 0; i < numids; i++)
1222 {
1223 cursip = sip[i];
1224 while (cursip != NULL)
1225 {
1226 an.data.ptrvalue = (Pointer)cursip;
1227 if (SeqLocCompare(&an, curr->product))
1228 {
1229 got_it = TRUE;
1230 break;
1231 }
1232 if (SeqLocCompare(&an, curr->location))
1233 {
1234 got_it = TRUE;
1235 break;
1236 }
1237 cursip = cursip->next;
1238 }
1239 if (got_it)
1240 break;
1241 }
1242 next = curr->next;
1243 if (! got_it) /* delete it */
1244 {
1245 if (prev == NULL)
1246 sap->data = (Pointer)next;
1247 else
1248 prev->next = next;
1249 SeqFeatFree(curr);
1250 }
1251 else
1252 prev = curr;
1253 curr = next;
1254 }
1255 /* delete feature table if empty */
1256 if (sap->data == NULL) /* deleted them all */
1257 {
1258 sapnext = sap->next;
1259 SeqAnnotFree(sap);
1260 if (prevsap == NULL)
1261 {
1262 if (IS_Bioseq(sep))
1263 ((BioseqPtr)sep->data.ptrvalue)->annot = sapnext;
1264 else
1265 ((BioseqSetPtr)sep->data.ptrvalue)->annot = sapnext;
1266 }
1267 sap = sapnext;
1268 }
1269 else
1270 sap = sap->next;
1271 }
1272 else
1273 sap = sap->next;
1274 }
1275 return;
1276 }
1277
1278
1279 /*****************************************************************************
1280 *
1281 * Boolean SeqEntryLoad()
1282 *
1283 *****************************************************************************/
SeqEntryLoad(void)1284 NLM_EXTERN Boolean LIBCALL SeqEntryLoad (void)
1285 {
1286 if (! SeqSetAsnLoad())
1287 return FALSE;
1288
1289 return BioseqLoad(); /* need all the Bioseq stuff */
1290 }
1291
1292
1293
1294