1 /*  objsset.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  objsset.c
27 *
28 * Author:  James Ostell
29 *
30 * Version Creation Date: 4/1/91
31 *
32 * $Revision: 6.19 $
33 *
34 * File Description:  Object manager for module NCBI-Seqset
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date	   Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 
45 /** for ErrPostEx() ****/
46 
47 static char *this_module = "ncbiobj";
48 #define THIS_MODULE this_module
49 static char *this_file = __FILE__;
50 #define THIS_FILE this_file
51 
52 /**********************/
53 
54 #include <objsset.h>		   /* the Bioseq-set interface */
55 #include <objmgr.h>
56 #include <sequtil.h>           /* used by SeqEntryAsnGet() */
57 #include <asnsset.h>        /* the AsnTool header */
58 
59 static Boolean loaded = FALSE;
60 
61 /*****************************************************************************
62 *
63 *   BioseqSet ObjMgr Routines
64 *
65 *****************************************************************************/
66 static CharPtr bioseqsettypename = "BioseqSet";
67 
BioseqSetNewFunc(void)68 static Pointer LIBCALLBACK BioseqSetNewFunc (void)
69 {
70 	return (Pointer) BioseqSetNew();
71 }
72 
BioseqSetFreeFunc(Pointer data)73 static Pointer LIBCALLBACK BioseqSetFreeFunc (Pointer data)
74 {
75 	return (Pointer) BioseqSetFree ((BioseqSetPtr) data);
76 }
77 
BioseqSetAsnWriteFunc(Pointer data,AsnIoPtr aip,AsnTypePtr atp)78 static Boolean LIBCALLBACK BioseqSetAsnWriteFunc (Pointer data, AsnIoPtr aip, AsnTypePtr atp)
79 {
80 	return BioseqSetAsnWrite((BioseqSetPtr)data, aip, atp);
81 }
82 
BioseqSetAsnReadFunc(AsnIoPtr aip,AsnTypePtr atp)83 static Pointer LIBCALLBACK BioseqSetAsnReadFunc (AsnIoPtr aip, AsnTypePtr atp)
84 {
85 	return (Pointer) BioseqSetAsnRead (aip, atp);
86 }
87 
88 typedef struct bestbioseqlabel {
89 	BioseqPtr bsp;
90 	Boolean is_na,
91 		has_accession,
92 		has_gb;
93 } BestBioseqLabel, PNTR BestBioseqLabelPtr;
94 
FindBestBioseqLabel(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)95 static void FindBestBioseqLabel (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
96 {
97 	BioseqPtr bsp;
98 	Boolean is_na = FALSE,
99 			has_accession = FALSE,
100 			has_gb = FALSE,
101 			longer = FALSE;
102 	BestBioseqLabelPtr bblp;
103 	SeqIdPtr sip;
104 	TextSeqIdPtr tsip;
105 	Boolean takeit = FALSE;
106 
107 	if (! IS_Bioseq(sep))
108 		return;
109 
110 	bsp = (BioseqPtr)(sep->data.ptrvalue);
111 	bblp = (BestBioseqLabelPtr)data;
112 
113 	is_na = ISA_na(bsp->mol);
114 	for (sip = bsp->id; sip != NULL; sip = sip->next)
115 	{
116 		switch (sip->choice)
117 		{
118 			case SEQID_GENBANK:
119 			case SEQID_EMBL:
120 			case SEQID_DDBJ:
121 			case SEQID_OTHER:
122 			case SEQID_TPG:
123 			case SEQID_TPE:
124 			case SEQID_TPD:
125 		    case SEQID_GPIPE:
126 				has_gb = TRUE;
127 			case SEQID_PIR:
128 			case SEQID_SWISSPROT:
129 			case SEQID_PRF:
130 				tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
131 				if (tsip->accession != NULL)
132 					has_accession = TRUE;
133 				break;
134 			default:
135 				break;
136 		}
137 	}
138 
139 
140 	if (bblp->bsp == NULL)
141 		takeit = TRUE;
142 	else
143 	{
144 		if (bsp->length > bblp->bsp->length)
145 			longer = TRUE;
146 		if (bblp->has_accession)
147 		{
148 			if (has_accession)
149 			{
150 				if (longer)
151 					takeit = TRUE;
152 			}
153 		}
154 		else if (has_accession)
155 			takeit = TRUE;
156 		else if (bblp->has_gb)
157 		{
158 			if (has_gb)
159 			{
160 				if (longer)
161 				takeit = TRUE;
162 			}
163 		}
164 		else if (has_gb)
165 			takeit = TRUE;
166 		else if (bblp->is_na)
167 		{
168 			if (is_na)
169 			{
170 				if (longer)
171 					takeit = TRUE;
172 			}
173 		}
174 		else if (is_na)
175 			takeit = TRUE;
176 		else if (longer)
177 			takeit = TRUE;
178 	}
179 
180 	if (takeit)
181 	{
182 		bblp->bsp = bsp;
183 		bblp->has_accession = has_accession;
184 		bblp->has_gb = has_gb;
185 		bblp->is_na = is_na;
186 	}
187 
188 	return;
189 }
190 
BioseqSetLabelFunc(Pointer data,CharPtr buffer,Int2 buflen,Uint1 content)191 static Int2 LIBCALLBACK BioseqSetLabelFunc ( Pointer data, CharPtr buffer, Int2 buflen, Uint1 content)
192 {
193 	return BioseqSetLabel ((BioseqSetPtr)data, buffer, buflen, content);
194 }
195 
BioseqSetLabel(BioseqSetPtr bssp,CharPtr buffer,Int2 buflen,Uint1 content)196 NLM_EXTERN Int2 LIBCALL BioseqSetLabel (BioseqSetPtr bssp, CharPtr buffer, Int2 buflen, Uint1 content)
197 {
198 	CharPtr set_name;
199 	Int2 diff, len;
200 	BestBioseqLabel bbl;
201 	Char tbuf[40];
202 
203 	if ((bssp == NULL) || (buflen < 1))
204 		return 0;
205 
206 	set_name = AsnEnumTypeStr(BIOSEQ_SET_class, (Int2)(bssp->_class));
207 	len = buflen;
208 
209 	if (content == OM_LABEL_TYPE)
210 		return LabelCopy(buffer, set_name, buflen);
211 
212 	if (content != OM_LABEL_CONTENT)
213 	{
214 		diff = LabelCopyExtra(buffer, set_name, buflen, NULL, ": ");
215 		buflen -= diff;
216 		buffer += diff;
217 	}
218 
219 	MemSet(&bbl, 0, sizeof(BestBioseqLabel));
220 	SeqEntryExplore(bssp->seq_set, (Pointer)&bbl, FindBestBioseqLabel);
221 	tbuf[0] = '\0';
222 
223 	if (bbl.bsp == NULL)
224 		StringMove(tbuf, "(No Bioseqs)");
225 	else
226 		SeqIdWrite(bbl.bsp->id, tbuf, PRINTID_FASTA_LONG, 39);
227 
228 	diff = LabelCopy(buffer, tbuf, buflen);
229 	buflen -= diff;
230 	buffer += diff;
231 
232 	return (len - buflen);   /* no special SUMMARY yet */
233 }
234 
BioseqSetSubTypeFunc(Pointer ptr)235 static Uint2 LIBCALLBACK BioseqSetSubTypeFunc (Pointer ptr)
236 {
237 	if (ptr == NULL)
238 		return 0;
239 	return (Uint2)((BioseqSetPtr)ptr)->_class;
240 }
241 
242 /*****************************************************************************
243 *
244 *   SeqEntry ObjMgr Routines
245 *
246 *****************************************************************************/
247 static CharPtr seqentrytypename = "SeqEntry";
248 
SeqEntryNewFunc(void)249 static Pointer LIBCALLBACK SeqEntryNewFunc (void)
250 {
251 	return (Pointer) ValNodeNew(NULL);
252 }
253 
SeqEntryFreeFunc(Pointer data)254 static Pointer LIBCALLBACK SeqEntryFreeFunc (Pointer data)
255 {
256 	return (Pointer) SeqEntryFree ((SeqEntryPtr) data);
257 }
258 
SeqEntryAsnWriteFunc(Pointer data,AsnIoPtr aip,AsnTypePtr atp)259 static Boolean LIBCALLBACK SeqEntryAsnWriteFunc (Pointer data, AsnIoPtr aip, AsnTypePtr atp)
260 {
261 	return SeqEntryAsnWrite((SeqEntryPtr)data, aip, atp);
262 }
263 
SeqEntryAsnReadFunc(AsnIoPtr aip,AsnTypePtr atp)264 static Pointer LIBCALLBACK SeqEntryAsnReadFunc (AsnIoPtr aip, AsnTypePtr atp)
265 {
266 	return (Pointer) SeqEntryAsnRead (aip, atp);
267 }
268 
SeqEntryLabelFunc(Pointer data,CharPtr buffer,Int2 buflen,Uint1 content)269 static Int2 LIBCALLBACK SeqEntryLabelFunc ( Pointer data, CharPtr buffer, Int2 buflen, Uint1 content)
270 {
271 	return SeqEntryLabel((SeqEntryPtr)data, buffer, buflen, content);
272 }
273 
SeqEntryLabel(SeqEntryPtr sep,CharPtr buffer,Int2 buflen,Uint1 content)274 NLM_EXTERN Int2 LIBCALL SeqEntryLabel (SeqEntryPtr sep, CharPtr buffer, Int2 buflen, Uint1 content)
275 {
276 	if ((sep == NULL) || (buflen < 1))
277 		return 0;
278 
279 	if (IS_Bioseq(sep))
280 		return BioseqLabel((BioseqPtr)(sep->data.ptrvalue), buffer, buflen, content);
281 	else
282 		return BioseqSetLabel((BioseqSetPtr)(sep->data.ptrvalue), buffer, buflen, content);
283 }
284 
SeqEntrySubTypeFunc(Pointer ptr)285 static Uint2 LIBCALLBACK SeqEntrySubTypeFunc (Pointer ptr)
286 {
287 	if (ptr == NULL)
288 		return 0;
289 	return (Uint2)((SeqEntryPtr)ptr)->choice;
290 }
291 
292 /*****************************************************************************
293 *
294 *   SeqSetAsnLoad()
295 *
296 *****************************************************************************/
SeqSetAsnLoad(void)297 NLM_EXTERN Boolean LIBCALL SeqSetAsnLoad (void)
298 {
299     if (loaded)
300         return TRUE;
301     loaded = TRUE;
302 
303     if (! GeneralAsnLoad())
304     {
305         loaded = FALSE;
306         return FALSE;
307     }
308     if (! SeqAsnLoad())
309     {
310         loaded = FALSE;
311         return FALSE;
312     }
313     if (! AsnLoad())
314     {
315         loaded = FALSE;
316         return FALSE;
317     }
318 
319 	ObjMgrTypeLoad(OBJ_BIOSEQSET, "Bioseq-set", bioseqsettypename, "Set of Biol. Sequences",
320 		BIOSEQ_SET, BioseqSetNewFunc, BioseqSetAsnReadFunc, BioseqSetAsnWriteFunc,
321 		BioseqSetFreeFunc, BioseqSetLabelFunc, BioseqSetSubTypeFunc);
322 
323 	ObjMgrTypeLoad(OBJ_SEQENTRY, "Seq-entry", seqentrytypename, "Sequence Entry",
324 		SEQ_ENTRY, SeqEntryNewFunc, SeqEntryAsnReadFunc, SeqEntryAsnWriteFunc,
325 		SeqEntryFreeFunc, SeqEntryLabelFunc, SeqEntrySubTypeFunc);
326 
327 	return TRUE;
328 }
329 
330 
331 /*****************************************************************************
332 *
333 *   Bioseq-set Routines
334 *
335 *****************************************************************************/
336 
337 /*****************************************************************************
338 *
339 *   BioseqSetNew()
340 *
341 *****************************************************************************/
BioseqSetNew(void)342 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetNew (void)
343 {
344     BioseqSetPtr bsp;
345 
346     bsp = (BioseqSetPtr)MemNew(sizeof(BioseqSet));
347 	if (bsp == NULL) return bsp;
348     bsp->level = INT2_MIN;
349 	SeqMgrAdd (SM_BIOSEQSET, (Pointer)bsp);   /* add to BioseqSet list */
350     return bsp;
351 }
352 
353 /*****************************************************************************
354 *
355 *   BioseqSetFree(bsp)
356 *       Frees one BioseqSet and associated data
357 *
358 *****************************************************************************/
BioseqSetFree(BioseqSetPtr bsp)359 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetFree (BioseqSetPtr bsp)
360 {
361 	Boolean top = FALSE;
362 
363     if (bsp == NULL)
364         return bsp;
365 
366 	if (bsp->idx.parentptr == NULL || bsp->idx.parenttype == OBJ_SEQSUB) {
367 		if (bsp->seqentry != NULL) {
368 			SeqMgrDeleteIndexesInRecord (bsp->seqentry);
369 			top = TRUE;
370 		}
371 	}
372 
373 	BioseqSetFreeComponents(bsp, TRUE);
374 
375 	if (! SeqMgrDelete(SM_BIOSEQSET, (Pointer)bsp))
376 	    ErrPostEx(SEV_ERROR, 0,0, "BioseqSetFree: pointer not registered");
377 
378 	if (top) {
379 		ObjMgrDeleteAllInRecord ();
380 	}
381 
382 	return (BioseqSetPtr)MemFree(bsp);
383 }
384 
385 
ReverseSeqEntryList(SeqEntryPtr start)386 static SeqEntryPtr ReverseSeqEntryList (SeqEntryPtr start)
387 {
388   SeqEntryPtr first = NULL, next;
389 
390   while (start != NULL) {
391     next = start->next;
392     start->next = first;
393     first = start;
394     start = next;
395   }
396 
397   return first;
398 }
399 
400 /*****************************************************************************
401 *
402 *   BioseqSetFreeComponents(bsp, parts)
403 *       Frees associated data of a BioseqSet
404 *   	if (parts == FALSE)
405 *   	      Calls SeqEntryFreeComponents() for seq-set
406 *       else
407 *             Calls SeqEntryFree()
408 *   	Does not free the BioseqSet itself
409 *   	Called by BioseqSetFree
410 *
411 *****************************************************************************/
BioseqSetFreeComponents(BioseqSetPtr bsp,Boolean parts)412 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetFreeComponents (BioseqSetPtr bsp, Boolean parts)
413 {
414     SeqAnnotPtr sp, spnext;
415     SeqEntryPtr sep, sepnext;
416 
417     if (bsp == NULL)
418         return bsp;
419 
420     bsp->id = ObjectIdFree(bsp->id);
421     bsp->coll = DbtagFree(bsp->coll);
422     bsp->release = MemFree(bsp->release);
423     bsp->date = DateFree(bsp->date);
424     bsp->descr = SeqDescrFree(bsp->descr);
425     sep = bsp->seq_set;
426     /* reverse seq-set, to increase speed of freeing it, since the elements will have
427      * probably been added to the index in order, and it's faster to take the last out first.
428      */
429     sep = ReverseSeqEntryList(sep);
430     bsp->seq_set = sep;
431     while (sep != NULL)
432     {
433         sepnext = sep->next;
434 		    if (parts)
435         {
436 	          SeqEntryFree(sep);
437         }
438 		    else
439         {
440 			      SeqEntryFreeComponents(sep);
441         }
442         sep = sepnext;
443     }
444     sp = bsp->annot;
445 	  bsp->annot = NULL;
446     while (sp != NULL)
447     {
448         spnext = sp->next;
449         SeqAnnotFree(sp);
450         sp = spnext;
451     }
452 	return bsp;
453 }
454 /*****************************************************************************
455 *
456 *   BioseqSetAsnWrite(bsp, aip, atp)
457 *   	atp is the current type (if identifier of a parent struct)
458 *       if atp == NULL, then assumes it stands alone (BioseqSet ::=)
459 *
460 *****************************************************************************/
BioseqSetAsnWrite(BioseqSetPtr bsp,AsnIoPtr aip,AsnTypePtr orig)461 NLM_EXTERN Boolean LIBCALL BioseqSetAsnWrite (BioseqSetPtr bsp, AsnIoPtr aip, AsnTypePtr orig)
462 {
463 	DataVal av;
464 	AsnTypePtr atp;
465     SeqEntryPtr sep;
466 	Boolean retval = FALSE;
467 
468 	if (! loaded)
469 	{
470 		if (! SeqSetAsnLoad())
471 			return FALSE;
472 	}
473 
474 	if (aip == NULL)
475 		return FALSE;
476 
477 	atp = AsnLinkType(orig, BIOSEQ_SET);   /* link local tree */
478 	if (atp == NULL) return FALSE;
479 
480 	if (bsp == NULL) { AsnNullValueMsg(aip, atp); goto erret; }
481 
482     MemSet ((Pointer) (&av), 0, sizeof (DataVal));
483 
484 	if (! AsnOpenStruct(aip, atp, (Pointer)bsp)) goto erret;
485 
486     if (bsp->id != NULL)
487 	{
488         if (! ObjectIdAsnWrite(bsp->id, aip, BIOSEQ_SET_id)) goto erret;
489 	}
490     if (bsp->coll != NULL)
491 	{
492         if (! DbtagAsnWrite(bsp->coll, aip, BIOSEQ_SET_coll)) goto erret;
493 	}
494     if (bsp->level != INT2_MIN)
495     {
496         av.intvalue = bsp->level;
497         if (! AsnWrite(aip, BIOSEQ_SET_level, &av)) goto erret;
498     }
499     if (bsp->_class != 0)
500     {
501         av.intvalue = bsp->_class;
502         if (! AsnWrite(aip, BIOSEQ_SET_class, &av)) goto erret;
503     }
504     if (bsp->release != NULL)
505     {
506         av.ptrvalue = bsp->release;
507         if (! AsnWrite(aip, BIOSEQ_SET_release, &av)) goto erret;
508     }
509     if (bsp->date != NULL)
510 	{
511         if (! DateAsnWrite(bsp->date, aip, BIOSEQ_SET_date)) goto erret;
512 	}
513     if (bsp->descr != NULL)              /* Seq-descr optional */
514 	{
515         if (! SeqDescrAsnWrite(bsp->descr, aip, BIOSEQ_SET_descr)) goto erret;
516 	}
517 
518     if (! AsnOpenStruct(aip, BIOSEQ_SET_seq_set, (Pointer)bsp->seq_set)) goto erret;
519     sep = bsp->seq_set;
520     while (sep != NULL)
521     {
522         if (! SeqEntryAsnWrite(sep, aip, BIOSEQ_SET_seq_set_E)) goto erret;
523         sep = sep->next;
524     }
525     if (! AsnCloseStruct(aip, BIOSEQ_SET_seq_set, (Pointer)bsp->seq_set)) goto erret;
526     if (bsp->annot != NULL)              /* annotation optional */
527 	{
528         if (! SeqAnnotSetAsnWrite(bsp->annot, aip, BIOSEQ_SET_annot, BIOSEQ_SET_annot_E)) goto erret;
529 	}
530 
531     if (! AsnCloseStruct(aip, atp, (Pointer)bsp)) goto erret;
532 	retval = TRUE;
533 erret:
534 	AsnUnlinkType(orig);        /* unlink local tree */
535 	return retval;
536 }
537 
538 static void SeqDescrPack PROTO((ValNodePtr PNTR to, ValNodePtr PNTR from));
539 
540 /*****************************************************************************
541 *
542 *   SeqDescrPack(to, from)
543 *       address of asnodeptr at head of list to add to
544 *       address of asnodeptr at head of list to add from.
545 *       sets *from to NULL on completion.
546 *       deletes extra titles
547 *****************************************************************************/
SeqDescrPack(ValNodePtr PNTR to,ValNodePtr PNTR from)548 static void SeqDescrPack (ValNodePtr PNTR to, ValNodePtr PNTR from)
549 {
550     ValNodePtr currdescr, newdescr, tmpdescr;
551     Boolean have_title = FALSE;
552 
553     currdescr = *to;
554     if (currdescr == NULL)   /* nothing to merge with */
555     {
556         *to = *from;
557         *from = NULL;
558         return;
559     }
560 
561     while (currdescr->next != NULL)
562     {
563         if (currdescr->choice == Seq_descr_title)
564             have_title = TRUE;
565         currdescr = currdescr->next;
566     }
567     if (currdescr->choice == Seq_descr_title)
568          have_title = TRUE;
569     newdescr = *from;
570     while (newdescr != NULL)
571     {
572         if ((have_title) && (newdescr->choice == Seq_descr_title))
573         {
574             tmpdescr = newdescr;
575             newdescr = tmpdescr->next;
576             MemFree(tmpdescr->data.ptrvalue);
577             MemFree(tmpdescr);
578         }
579         else
580         {
581             currdescr->next = newdescr;
582             newdescr = newdescr->next;
583             currdescr = currdescr->next;
584         }
585     }
586     *from = NULL;
587 
588     return;
589 }
590 
591 /*****************************************************************************
592 *
593 *   BioseqSetAsnRead(aip, atp)
594 *   	atp is the current type (if identifier of a parent struct)
595 *            assumption is readIdent has occurred
596 *       if atp == NULL, then assumes it stands alone and read ident
597 *            has not occurred.
598 *
599 *****************************************************************************/
BioseqSetAsnRead(AsnIoPtr aip,AsnTypePtr orig)600 NLM_EXTERN BioseqSetPtr LIBCALL BioseqSetAsnRead (AsnIoPtr aip, AsnTypePtr orig)
601 {
602   DataVal av;
603   AsnTypePtr atp, oldatp;
604   BioseqSetPtr bsp=NULL, tmp;
605   SeqEntryPtr curr, next, hold = NULL;
606   SeqAnnotPtr sap;
607   AsnOptionPtr aop;
608   Op_objssetPtr oop = NULL;
609   Op_objseqPtr osp = NULL;
610   Boolean this_one = TRUE,
611           get_bioseq = FALSE,
612           check_set = FALSE,
613           got_it = FALSE,
614           old_in_right_set = FALSE;
615 
616 
617 	if (! loaded)
618 	{
619 		if (! SeqSetAsnLoad())
620 			return bsp;
621 	}
622 
623 	if (aip == NULL)
624 		return bsp;
625 
626 	if ((aop = AsnIoOptionGet(aip, OP_NCBIOBJSSET, SEQENTRY_OPTION_MAX_COMPLEX, NULL)) != NULL)
627 	{
628 		this_one = FALSE;
629 		check_set = TRUE;
630 		oop = (Op_objssetPtr) aop->data.ptrvalue;
631 		old_in_right_set = oop->in_right_set;
632 		if (oop->retcode == 1)    /* get bioseq */
633 			get_bioseq = TRUE;
634 		else
635 		{
636 			if((aop = AsnIoOptionGet(aip, OP_NCBIOBJSEQ, BIOSEQ_CHECK_ID, NULL))!=NULL)
637 				osp = (Op_objseqPtr)aop->data.ptrvalue;
638 		}
639 	}
640 
641 	if (orig == NULL)           /* BioseqSet ::= (self contained) */
642 		atp = AsnReadId(aip, amp, BIOSEQ_SET);
643 	else
644 		atp = AsnLinkType(orig, BIOSEQ_SET);    /* link in local tree */
645     oldatp = atp;
646 	if (atp == NULL) return bsp;
647 
648 	bsp = BioseqSetNew();
649 	if (bsp == NULL) goto erret;
650 
651 	if (AsnReadVal(aip, atp, &av) <= 0) goto erret;    /* read the start struct */
652     curr = NULL;
653 
654     while ((atp = AsnReadId(aip, amp, atp)) != oldatp)
655     {
656 		if (atp == NULL) goto erret;
657         if (atp == BIOSEQ_SET_id)
658 		{
659             bsp->id = ObjectIdAsnRead(aip, atp);
660 			if (bsp->id == NULL) goto erret;
661 		}
662         else if (atp == BIOSEQ_SET_coll)
663 		{
664             bsp->coll = DbtagAsnRead(aip, atp);
665 			if (bsp->coll == NULL) goto erret;
666 		}
667         else if (atp == BIOSEQ_SET_date)
668 		{
669             bsp->date = DateAsnRead(aip, atp);
670 			if (bsp->date == NULL) goto erret;
671 		}
672         else if (atp == BIOSEQ_SET_descr)
673 		{
674             bsp->descr = SeqDescrAsnRead(aip, atp);
675 			if (bsp->descr == NULL) goto erret;
676 		}
677         else if (atp == BIOSEQ_SET_seq_set_E)
678         {
679 			if (got_it)    /* already have the entry we want */
680 				AsnSkipValue(aip, atp);
681             else
682 			{
683 				if ((next = SeqEntryAsnRead(aip, atp)) != NULL)
684 				{
685 					if (IS_Bioseq(next))
686 						SeqMgrConnect(SM_BIOSEQ, next->data.ptrvalue,
687 							SM_BIOSEQSET, (Pointer) bsp);
688 					else
689 						SeqMgrConnect(SM_BIOSEQSET, next->data.ptrvalue,
690 							SM_BIOSEQSET, (Pointer) bsp);
691 
692 					if (get_bioseq)      /* will only be here if got it */
693 					{
694 						got_it = TRUE;
695 						hold = next;
696 					}
697 					else if (check_set)
698 					{
699 						if (oop->working_on_set == 2)  /* found set in lower level */
700 							got_it = TRUE;
701 						else if ((oop->retcode == 2 || oop->retcode == 3) && (! oop->in_right_set))
702 						{
703 							if (osp->found_it)   /* found the component Bioseq */
704 							{
705 								got_it = TRUE;
706 								oop->working_on_set = 2;   /* all done */
707 							}
708 						}
709 
710 						if (got_it)
711 						{
712 							hold = next;
713 							curr = bsp->seq_set;      /* free sibling sets - not needed */
714 							while (curr != NULL)
715 							{
716 						 		next = curr->next;
717 								SeqEntryFree(curr);
718 								curr = next;
719 							}
720 							bsp->seq_set = NULL;
721 						}
722 					}
723 
724 					if (! got_it)
725 					{
726 			            if (curr == NULL)
727     			            bsp->seq_set = next;
728         			    else
729             			    curr->next = next;
730 	            		curr = next;
731 					}
732 				}
733 			}
734         }
735         else if (atp == BIOSEQ_SET_annot)
736         {
737 			if ((get_bioseq) && (! got_it))
738 				AsnSkipValue(aip, atp);
739 			else
740 			{
741 	            bsp->annot = SeqAnnotSetAsnRead(aip, atp, BIOSEQ_SET_annot_E);
742 				/* if (bsp->annot == NULL) goto erret;n*/
743 			}
744         }
745         else
746         {
747             if (AsnReadVal(aip, atp, &av) <= 0) goto erret;    /* takes care of everything else */
748             if (atp == BIOSEQ_SET_level)
749                 bsp->level = (Int2)av.intvalue;
750             else if (atp == BIOSEQ_SET_class)
751 			{
752                 bsp->_class = (Uint1)av.intvalue;
753 				if ((! get_bioseq) && (check_set))       /* looking for a set */
754 				{
755 					switch (oop->retcode)
756 					{
757 						case 2:   /* seg-set */
758 							if (bsp->_class == 2)
759 								this_one = TRUE;
760 							break;
761 						case 3:   /* nuc-prot */
762 							if (bsp->_class == 1)
763 								this_one = TRUE;
764 							break;
765 						case 4:   /* pub-set */
766 							if (bsp->_class == 9)
767 								this_one = TRUE;
768 							break;
769 					}
770 					if (this_one)
771 						oop->in_right_set = TRUE;
772 				}
773 			}
774             else if (atp == BIOSEQ_SET_release)
775                 bsp->release = (CharPtr)av.ptrvalue;
776         }
777     }
778     if (AsnReadVal(aip, atp, &av) <= 0) goto erret;   /* end BioseqSet */
779 
780   if (check_set)      /* check sets */
781   {
782     if (! got_it)
783     {
784       if (get_bioseq)   /* can't use anything at this level */
785       {
786         return BioseqSetFree(bsp);
787       }
788 
789       if (osp->found_it)    /* found the contained Bioseq */
790       {
791         if (! this_one)
792           oop->working_on_set = 1;
793         else
794           oop->working_on_set = 2;
795         oop->in_right_set = old_in_right_set;
796         return bsp;
797       }
798     }
799 
800     if (got_it)
801     {
802       if ((this_one) && (oop->working_on_set != 2))
803       {
804         oop->working_on_set = 2;
805         bsp->seq_set = hold;
806       }
807       /* copy the annot and descr if lower level is BioseqSet */
808       else if (IS_Bioseq_set(hold))
809       {                              /* make this smarter */
810         ObjMgrConnect (OBJ_BIOSEQSET, (Pointer) hold->data.ptrvalue, 0, NULL); /* disconnect */
811         tmp = (BioseqSetPtr)hold->data.ptrvalue;
812         hold->data.ptrvalue = NULL;
813         hold = SeqEntryFree (hold);
814         if (tmp->annot == NULL)
815         {
816           tmp->annot = bsp->annot;
817         }
818         else
819         {
820           sap = tmp->annot;
821           while (sap->next != NULL)
822           {
823             sap = sap->next;
824           }
825           sap->next = bsp->annot;
826         }
827         bsp->annot = NULL;
828         SeqDescrPack(&tmp->descr, &bsp->descr);
829         BioseqSetFree(bsp);
830         bsp = tmp;
831       }
832       else
833       {
834         bsp->seq_set = hold;
835       }
836     }
837   }
838 
839   if (check_set)
840     oop->in_right_set = old_in_right_set;
841 ret:
842   AsnUnlinkType(orig);     /*  unlink local tree */
843   return bsp;
844 erret:
845   aip->io_failure = TRUE;
846   bsp = BioseqSetFree(bsp);
847   goto ret;
848 }
849 
850 /*****************************************************************************
851 *
852 *   SeqEntryNew()
853 *
854 *****************************************************************************/
SeqEntryNew(void)855 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryNew (void)
856 {
857     SeqEntryPtr sep;
858 
859     sep = ValNodeNew(NULL);
860 	if (sep == NULL) return sep;
861 
862     return sep;
863 }
864 
865 /*****************************************************************************
866 *
867 *   SeqEntryFree(sep)
868 *       Frees one SeqEntry and associated data
869 *
870 *****************************************************************************/
SeqEntryFree(SeqEntryPtr sep)871 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFree (SeqEntryPtr sep)
872 {
873     if (sep == NULL)
874         return sep;
875 
876     if (sep->choice == 1)
877         BioseqFree((BioseqPtr)sep->data.ptrvalue);
878     else if (sep->choice == 2)
879         BioseqSetFree((BioseqSetPtr)sep->data.ptrvalue);
880 
881 	return (SeqEntryPtr)MemFree(sep);
882 }
883 
884 /*****************************************************************************
885 *
886 *   SeqEntryFreeComponents(sep)
887 *       Frees components of elements associated with SeqEntry
888 *   	used by SeqMgr for caching out
889 *
890 *****************************************************************************/
SeqEntryFreeComponents(SeqEntryPtr sep)891 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFreeComponents (SeqEntryPtr sep)
892 {
893     if (sep == NULL)
894         return sep;
895 
896     if (sep->choice == 1)
897         BioseqFreeComponents((BioseqPtr)sep->data.ptrvalue);
898     else if (sep->choice == 2)
899         BioseqSetFreeComponents((BioseqSetPtr)sep->data.ptrvalue, FALSE);
900 
901 	return sep;
902 }
903 
904 /*****************************************************************************
905 *
906 *   SeqEntryAsnWrite(sep, aip, atp)
907 *   	atp is the current type (if identifier of a parent struct)
908 *       if atp == NULL, then assumes it stands alone (SeqEntry ::=)
909 *
910 *****************************************************************************/
SeqEntryAsnWrite(SeqEntryPtr sep,AsnIoPtr aip,AsnTypePtr orig)911 NLM_EXTERN Boolean LIBCALL SeqEntryAsnWrite (SeqEntryPtr sep, AsnIoPtr aip, AsnTypePtr orig)
912 {
913     AsnTypePtr atp;
914 	DataVal av;
915 	Boolean retval = FALSE;
916 
917 	if (! loaded)
918 	{
919 		if (! SeqSetAsnLoad())
920 			return FALSE;
921 	}
922 
923 	if (aip == NULL)
924 		return FALSE;
925 
926 	atp = AsnLinkType(orig, SEQ_ENTRY);   /* link local tree */
927 	if (atp == NULL) return FALSE;
928 
929 	if (sep == NULL) { AsnNullValueMsg(aip, atp); goto erret; }
930 
931     MemSet ((Pointer) (&av), 0, sizeof (DataVal));
932 
933 	av.ptrvalue = (Pointer)sep;
934     if (! AsnWriteChoice(aip, atp, (Int2)sep->choice, &av)) goto erret;
935     if (sep->choice == 1)
936 	{
937         if (! BioseqAsnWrite((BioseqPtr)sep->data.ptrvalue, aip, SEQ_ENTRY_seq))
938 			goto erret;
939 	}
940     else if (sep->choice == 2)
941 	{
942         if (! BioseqSetAsnWrite((BioseqSetPtr)sep->data.ptrvalue, aip, SEQ_ENTRY_set))
943 			goto erret;
944 	}
945 	retval = TRUE;
946 erret:
947     AsnUnlinkType(orig);
948     return retval;
949 }
950 
951 /*****************************************************************************
952 *
953 *   SeqEntryAsnRead(aip, atp)
954 *   	atp is the current type (if identifier of a parent struct)
955 *            assumption is readIdent has occurred
956 *       if atp == NULL, then assumes it stands alone and read ident
957 *            has not occurred.
958 *
959 *****************************************************************************/
SeqEntryAsnRead(AsnIoPtr aip,AsnTypePtr orig)960 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryAsnRead (AsnIoPtr aip, AsnTypePtr orig)
961 {
962 	DataVal av;
963 	AsnTypePtr atp;
964     SeqEntryPtr sep=NULL;
965 	Uint1 type = 0;
966 
967 	if (! loaded)
968 	{
969 		if (! SeqSetAsnLoad())
970 			return sep;
971 	}
972 
973 	if (aip == NULL)
974 		return sep;
975 
976 	if (orig == NULL)           /* SeqEntry ::= (self contained) */
977 		atp = AsnReadId(aip, amp, SEQ_ENTRY);
978 	else
979 		atp = AsnLinkType(orig, SEQ_ENTRY);    /* link in local tree */
980 	if (atp == NULL) return sep;
981 
982 	sep = SeqEntryNew();
983 	if (sep == NULL) goto erret;
984 
985 	if (AsnReadVal(aip, atp, &av) <= 0) goto erret;    /* read the CHOICE */
986     atp = AsnReadId(aip, amp, atp); if (atp == NULL) goto erret;   /* which choice? */
987     if (atp == SEQ_ENTRY_seq)
988     {
989         sep->choice = 1;
990         sep->data.ptrvalue = (Pointer) BioseqAsnRead(aip, atp);
991         type = (Uint1)SM_BIOSEQ;
992     }
993     else if (atp == SEQ_ENTRY_set)
994     {
995         sep->choice = 2;
996         sep->data.ptrvalue = (Pointer) BioseqSetAsnRead(aip, atp);
997 	type = (Uint1)SM_BIOSEQSET;
998     }
999 
1000 	if (sep->data.ptrvalue == NULL)
1001 		goto erret;
1002         else
1003 		SeqMgrSeqEntry(type, sep->data.ptrvalue, sep);
1004 ret:
1005     AsnUnlinkType(orig);      /*  unlink local tree */
1006 	return sep;
1007 erret:
1008     aip->io_failure = TRUE;
1009 	sep = SeqEntryFree(sep);
1010 	goto ret;
1011 }
1012 
1013 /*****************************************************************************
1014 *
1015 *   Used by SeqEntryAsnGet to clear unwanted features
1016 *
1017 *****************************************************************************/
1018 
1019 typedef struct secf {
1020 	Int2 numseqids;
1021 	SeqIdPtr PNTR seqids;
1022 } Secf, PNTR SecfPtr;
1023 
1024 static void SeqEntryGetIds PROTO((SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent));
1025 static void SeqEntryDumpFeats PROTO((SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent));
1026 static void SeqEntryClearFeatures PROTO((SeqEntryPtr sep));
1027 
1028 /*****************************************************************************
1029 *
1030 *   SeqEntryAsnGet(aip, atp, sip, retcode)
1031 *   	atp is the current type (if identifier of a parent struct)
1032 *            assumption is readIdent has occurred
1033 *       if atp == NULL, then assumes it stands alone and read ident
1034 *            has not occurred.
1035 *       looks for entry with sip=SeqId
1036 *       returns a collection of maximum complexity = retcode
1037 *         where retcode =
1038         entry (0) ,             -- the "natural" entry for this (nuc-prot)
1039         bioseq (1) ,            -- only the bioseq identified
1040         bioseq-set (2) ,        -- any seg-set it may be part of
1041         nuc-prot (3) ,            -- any nuc-prot it may be part of
1042         pub-set (4) } DEFAULT entry }
1043 *
1044 *****************************************************************************/
SeqEntryAsnGet(AsnIoPtr aip,AsnTypePtr orig,SeqIdPtr sip,Int2 retcode)1045 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryAsnGet (AsnIoPtr aip, AsnTypePtr orig, SeqIdPtr sip, Int2 retcode)
1046 {
1047 	AsnOptionPtr aop;
1048 	Op_objssetPtr oop;
1049 	Op_objseqPtr osp;
1050     SeqEntryPtr sep=NULL;
1051 	BioseqSetPtr bssp;
1052 	BioseqPtr bsp;
1053     SeqAnnotPtr sap;
1054     DataVal av;
1055 	Boolean pack_it = FALSE;
1056 
1057 	if ((aip == NULL) || (sip == NULL) || (retcode < 0) || (retcode > 4))
1058 		return sep;
1059 
1060 	av.realvalue = 0.0;    /* just zeros it out to prevent debugger fuss */
1061 	oop = (Op_objssetPtr)MemNew(sizeof(Op_objsset));
1062 	if (oop == NULL) return sep;
1063 
1064 	oop->sip = sip;
1065 	oop->retcode = retcode;
1066 	av.ptrvalue = (Pointer) oop;
1067 	aop = AsnIoOptionNew(aip, (Int2)OP_NCBIOBJSSET, (Int2)SEQENTRY_OPTION_MAX_COMPLEX, av, DefAsnOptionFree);
1068 	if (aop == NULL)
1069 		return sep;
1070 
1071 	osp = (Op_objseqPtr)MemNew(sizeof(Op_objseq));
1072 	if (osp == NULL) return sep;
1073 
1074 	osp->sip = sip;
1075     if (retcode == 1)   /* just after a Bioseq */
1076         osp->load_by_id = TRUE;
1077 	av.ptrvalue = (Pointer) osp;
1078 	aop = AsnIoOptionNew(aip, OP_NCBIOBJSEQ, BIOSEQ_CHECK_ID, av, DefAsnOptionFree);
1079 	if (aop == NULL)
1080 		return sep;
1081 
1082 	sep = SeqEntryAsnRead(aip, orig);
1083 	if (sep == NULL) goto erret;
1084 
1085 	if ((retcode == 1) && (IS_Bioseq_set(sep)))
1086 		pack_it = TRUE;
1087 	else if ((retcode == 2) && (IS_Bioseq_set(sep)))
1088 	{							/* got Bioseq, but not part of seg-set */
1089 		bssp = (BioseqSetPtr)sep->data.ptrvalue;
1090 		if (bssp->_class != 2)   /* not a seg-set */
1091 			pack_it = TRUE;
1092 	}
1093 	else if ((retcode == 3) && (IS_Bioseq_set(sep)))
1094 	{							/* got Bioseq, but not part of nuc-prot set */
1095 		bssp = (BioseqSetPtr)sep->data.ptrvalue;
1096 		if (bssp->_class != 1)   /* not a nuc-prot set */
1097 			pack_it = TRUE;
1098 	}
1099 
1100 
1101 	if (! osp->found_it)
1102 	{
1103 		sep = SeqEntryFree(sep);
1104 	}
1105 	else if (pack_it)  /* want Bioseq out of Bioseq-set */
1106 	{
1107 		bssp = (BioseqSetPtr)sep->data.ptrvalue;
1108 		sep->data.ptrvalue = NULL;
1109 		SeqEntryFree(sep);
1110 		sep = bssp->seq_set;
1111 		bssp->seq_set = NULL;
1112 		bsp = (BioseqPtr)sep->data.ptrvalue;
1113 		ObjMgrConnect(OBJ_BIOSEQ, (Pointer)bsp, 0, NULL); /* disconnect */
1114         SeqDescrPack(&bsp->descr, &bssp->descr);
1115 		if (bsp->annot == NULL)
1116 			bsp->annot = bssp->annot;
1117 		else
1118         {
1119     	    sap = bsp->annot;
1120         	while (sap->next != NULL)
1121             	sap = sap->next;
1122 	        sap->next = bssp->annot;
1123         }
1124 		bssp->annot = NULL;
1125 		BioseqSetFree(bssp);
1126 	}
1127 
1128 	if (retcode)
1129 		SeqEntryClearFeatures(sep);      /* clear unwanted features */
1130 
1131 erret:
1132 	AsnIoOptionFree(aip, OP_NCBIOBJSSET, SEQENTRY_OPTION_MAX_COMPLEX);
1133 	AsnIoOptionFree(aip, OP_NCBIOBJSEQ, BIOSEQ_CHECK_ID);
1134 	return sep;
1135 }
1136 
1137 /*****************************************************************************
1138 *
1139 *   void SeqEntryClearFeatures(sep)
1140 *      clears features which do not refer to a Bioseq in sep
1141 *
1142 *****************************************************************************/
SeqEntryClearFeatures(SeqEntryPtr sep)1143 static void SeqEntryClearFeatures (SeqEntryPtr sep)
1144 {
1145 	SecfPtr sp;
1146 	Int2 bioseq_count;
1147 
1148 	if (sep == NULL)
1149 		return;
1150 
1151 	sp = (SecfPtr)MemNew(sizeof(Secf));
1152 	if (sp == NULL) return;
1153 	bioseq_count = (Int2) SeqEntryCount(sep);  /* overestimate */
1154 	sp->numseqids = 0;
1155 	sp->seqids = (SeqIdPtr PNTR)MemNew(bioseq_count * sizeof(SeqIdPtr));
1156 	if (sp->seqids == NULL) return;
1157 
1158 	SeqEntryExplore(sep, (Pointer)sp, SeqEntryGetIds);
1159 	SeqEntryExplore(sep, (Pointer)sp, SeqEntryDumpFeats);
1160     MemFree(sp->seqids);
1161     MemFree(sp);
1162     return;
1163 }
1164 
1165 /*****************************************************************************
1166 *
1167 *   SeqEntryGetIds()
1168 *   	SeqEntryExplore function to make a list of seqids of all bioseqs in
1169 *       a seqentry.
1170 *
1171 *****************************************************************************/
SeqEntryGetIds(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1172 static void SeqEntryGetIds (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1173 {
1174 	SecfPtr sfp;
1175 
1176 	if (IS_Bioseq(sep))
1177 	{
1178 		sfp = (SecfPtr)data;
1179         sfp->seqids[sfp->numseqids] = ((BioseqPtr)sep->data.ptrvalue)->id;
1180 		sfp->numseqids++;
1181 	}
1182     return;
1183 }
1184 
1185 /*****************************************************************************
1186 *
1187 *   SeqEntryDumpFeats()
1188 *   	SeqEntryExplore function to clear features which do not match a
1189 *       list of seqids.
1190 *
1191 *****************************************************************************/
SeqEntryDumpFeats(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1192 static void SeqEntryDumpFeats (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1193 {
1194 	SecfPtr sp;
1195 	Int2 numids, i;
1196 	SeqIdPtr PNTR sip;
1197 	SeqIdPtr cursip;
1198 	ValNode an;
1199 	SeqAnnotPtr sap, prevsap, sapnext;
1200 	SeqFeatPtr curr, next, prev;
1201 	Boolean got_it;
1202 
1203 	sp = (SecfPtr)data;
1204 	numids = sp->numseqids;
1205 	sip = sp->seqids;
1206 	an.choice = SEQLOC_WHOLE;
1207 	if (IS_Bioseq(sep))
1208 		sap = ((BioseqPtr)sep->data.ptrvalue)->annot;
1209 	else
1210 		sap = ((BioseqSetPtr)sep->data.ptrvalue)->annot;
1211     prevsap = NULL;
1212 	while (sap != NULL)
1213 	{
1214 		if (sap->type == 1)  /* feature table */
1215 		{
1216 			prev = NULL;
1217 			curr = (SeqFeatPtr)sap->data;
1218 			while (curr != NULL)
1219 			{
1220 				got_it = FALSE;
1221 				for (i = 0; i < numids; i++)
1222 				{
1223 					cursip = sip[i];
1224 					while (cursip != NULL)
1225 					{
1226 						an.data.ptrvalue = (Pointer)cursip;
1227 						if (SeqLocCompare(&an, curr->product))
1228 						{
1229 							got_it = TRUE;
1230 							break;
1231 						}
1232 						if (SeqLocCompare(&an, curr->location))
1233 						{
1234 							got_it = TRUE;
1235 							break;
1236 						}
1237 						cursip = cursip->next;
1238 					}
1239 					if (got_it)
1240 						break;
1241 				}
1242 				next = curr->next;
1243 				if (! got_it)    /* delete it */
1244 				{
1245 					if (prev == NULL)
1246 						sap->data = (Pointer)next;
1247 					else
1248 						prev->next = next;
1249 					SeqFeatFree(curr);
1250 				}
1251 				else
1252 					prev = curr;
1253 				curr = next;
1254 			}
1255 			/* delete feature table if empty */
1256             if (sap->data == NULL)   /* deleted them all */
1257             {
1258                 sapnext = sap->next;
1259                 SeqAnnotFree(sap);
1260                 if (prevsap == NULL)
1261                 {
1262                 	if (IS_Bioseq(sep))
1263 		                ((BioseqPtr)sep->data.ptrvalue)->annot = sapnext;
1264                 	else
1265 		                ((BioseqSetPtr)sep->data.ptrvalue)->annot = sapnext;
1266                 }
1267                 sap = sapnext;
1268             }
1269             else
1270                 sap = sap->next;
1271 		}
1272         else
1273     		sap = sap->next;
1274 	}
1275     return;
1276 }
1277 
1278 
1279 /*****************************************************************************
1280 *
1281 *   Boolean SeqEntryLoad()
1282 *
1283 *****************************************************************************/
SeqEntryLoad(void)1284 NLM_EXTERN Boolean LIBCALL SeqEntryLoad (void)
1285 {
1286     if (! SeqSetAsnLoad())
1287         return FALSE;
1288 
1289     return BioseqLoad();   /* need all the Bioseq stuff */
1290 }
1291 
1292 
1293 
1294