1 /* seqmgr.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: seqmgr.c
27 *
28 * Author: James Ostell
29 *
30 * Version Creation Date: 9/94
31 *
32 * $Revision: 6.344 $
33 *
34 * File Description: Manager for Bioseqs and BioseqSets
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 * ==========================================================================
42 */
43
44 /** for ErrPostEx() ****/
45
46 static char *this_module = "ncbiapi";
47 #define THIS_MODULE this_module
48 static char *this_file = __FILE__;
49 #define THIS_FILE this_file
50
51 /**********************/
52
53 #include <explore.h> /* new public functions prototyped here */
54 #include <seqmgr.h> /* the interface */
55 #include <sequtil.h> /* CLEAN THIS UP LATER? */
56 #include <gather.h>
57 #include <subutil.h>
58 #include <ncbithr.h>
59 #include <objfdef.h>
60 #include <sqnutils.h>
61 #include <seqport.h>
62 #include <edutil.h>
63 #include <alignmgr2.h>
64
65 /*****************************************************************************
66 *
67 * Bioseq Management
68 *
69 *****************************************************************************/
70
71 static BioseqPtr LIBCALLBACK BSFetchFunc PROTO((SeqIdPtr sid, Uint1 ld_type));
72 static BioseqPtr NEAR BioseqFindFunc PROTO((SeqIdPtr sid, Boolean reload_from_cache, Boolean force_it, Boolean use_bioseq_cache));
73 static Boolean NEAR SeqMgrGenericSelect PROTO((SeqLocPtr region, Int2 type,
74 Uint1Ptr rgb));
75 static BioseqPtr NEAR BioseqReloadFunc PROTO((SeqIdPtr sid, ObjMgrDataPtr omdp));
76
77 static Boolean NEAR SeqMgrProcessNonIndexedBioseq PROTO((Boolean force_it));
78 static Boolean NEAR SeqMgrAddIndexElement PROTO((SeqMgrPtr smp, BioseqPtr bsp, CharPtr buf,
79 Boolean sort_now));
80 static void NEAR RevStringUpper PROTO((CharPtr str));
81 static BSFetchTop NEAR SeqMgrGetFetchTop (void);
82
83
84 /*****************************************************************************
85 *
86 * Return the current SeqMgr
87 * SeqMgrGet is obsolete
88 * SeqMgrReadLock, ReadUnlock, WriteLock, WriteUnlock are thread safe
89 *
90 *****************************************************************************/
91 static TNlmMutex smp_mutex = NULL;
92 static SeqMgrPtr global_smp = NULL;
93 static TNlmRWlock smp_RWlock = NULL;
94 static TNlmRWlock sgi_RWlock = NULL;
95
96 /*****************************************************************************
97 *
98 * Return the current SeqMgr
99 * Initialize if not done already
100 * This function will become obsolete
101 *
102 *****************************************************************************/
SeqMgrGet(void)103 NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrGet (void)
104 {
105 Int4 ret;
106 SeqMgrPtr smp;
107
108 if (global_smp != NULL)
109 return global_smp;
110
111 ret = NlmMutexLockEx(&smp_mutex); /* protect this section */
112 if (ret) /* error */
113 {
114 ErrPostEx(SEV_FATAL,0,0,"SeqMgrGet failed [%ld]", (long)ret);
115 return NULL;
116 }
117
118 if (global_smp == NULL) /* check again after mutex */
119 {
120 /*** have to initialize it **/
121 smp = (SeqMgrPtr) MemNew (sizeof(SeqMgr));
122 smp->bsfetch = BSFetchFunc; /* BioseqFetch default */
123 smp->fetch_on_lock = TRUE; /* fetch when locking */
124 smp_RWlock = NlmRWinit(); /* initialize RW lock */
125 sgi_RWlock = NlmRWinit(); /* initialize RW lock */
126 global_smp = smp; /* do this last for mutex safety */
127 }
128
129 NlmMutexUnlock(smp_mutex);
130
131 return global_smp;
132 }
133
134 /*****************************************************************************
135 *
136 * SeqMgrReadLock()
137 * Initialize if not done already
138 * A thread can have only one read or write lock at a time
139 * Many threads can have read locks
140 * Only one thread can have a write lock
141 * No other threads may have read locks if a write lock is granted
142 * If another thread holds a write lock, this call blocks until write
143 * is unlocked.
144 *
145 *****************************************************************************/
SeqMgrReadLock(void)146 NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrReadLock (void)
147 {
148 SeqMgrPtr smp;
149 Int4 ret;
150
151 smp = SeqMgrGet(); /* ensure initialization */
152
153 ret = NlmRWrdlock(smp_RWlock);
154 if (ret != 0)
155 {
156 ErrPostEx(SEV_ERROR,0,0,"SeqMgrReadLock: RWrdlock error [%ld]",
157 (long)ret);
158 return NULL;
159 }
160 return smp;
161 }
162
163 /*****************************************************************************
164 *
165 * SeqMgrWriteLock
166 * Initialize if not done already
167 * A thread can have only one read or write lock at a time
168 * Many threads can have read locks
169 * Only one thread can have a write lock
170 * No other threads may have read locks if a write lock is granted
171 * If another thread holds a read or write lock, this call blocks until write
172 * is unlocked.
173 *
174 *****************************************************************************/
SeqMgrWriteLock(void)175 NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrWriteLock (void)
176 {
177 SeqMgrPtr smp;
178 Int4 ret;
179
180 smp = SeqMgrGet(); /* ensure initialization */
181
182 ret = NlmRWwrlock(smp_RWlock);
183 if (ret != 0)
184 {
185 ErrPostEx(SEV_ERROR,0,0,"SeqMgrWriteLock: RWwrlock error [%ld]",
186 (long)ret);
187 return NULL;
188 }
189 smp->is_write_locked = TRUE;
190 return smp;
191 }
192
193
194 /*****************************************************************************
195 *
196 * SeqMgrUnlock()
197 *
198 *****************************************************************************/
SeqMgrUnlock(void)199 NLM_EXTERN Boolean LIBCALL SeqMgrUnlock (void)
200 {
201 SeqMgrPtr smp;
202 Int4 ret;
203
204 smp = SeqMgrGet(); /* ensure initialization */
205
206 ret = NlmRWunlock(smp_RWlock);
207 if (ret != 0)
208 {
209 ErrPostEx(SEV_ERROR,0,0,"SeqMgrUnlock: RWunlock error [%ld]",
210 (long)ret);
211 return FALSE;
212 }
213 smp->is_write_locked = FALSE; /* can't be write locked */
214 return TRUE;
215 }
216
217 /****************************************************************************
218 *
219 * RevStringUpper(str)
220 * Up cases and reverses string
221 * to get different parts early for SeqId StringCmps
222 *
223 *****************************************************************************/
RevStringUpper(CharPtr str)224 static void NEAR RevStringUpper (CharPtr str)
225 {
226 CharPtr nd;
227 Char tmp;
228
229 if (str == NULL)
230 return;
231 nd = str;
232 while (*nd != '\0')
233 nd++;
234 nd--;
235
236 while (nd > str)
237 {
238 tmp = TO_UPPER(*nd);
239 *nd = TO_UPPER(*str);
240 *str = tmp;
241 nd--; str++;
242 }
243
244 if (nd == str)
245 *nd = TO_UPPER(*nd);
246 return;
247 }
248
MakeReversedSeqIdString(SeqIdPtr sid,CharPtr buf,size_t len)249 NLM_EXTERN Boolean MakeReversedSeqIdString (SeqIdPtr sid, CharPtr buf, size_t len)
250
251 {
252 Uint1 oldchoice;
253 CharPtr tmp;
254 TextSeqIdPtr tsip;
255
256 if (sid == NULL || buf == NULL || len < 1) return FALSE;
257 oldchoice = 0;
258 switch (sid->choice) {
259 case SEQID_GI:
260 sprintf (buf, "%ld", (long)(sid->data.intvalue));
261 break;
262 case SEQID_EMBL:
263 case SEQID_DDBJ:
264 oldchoice = sid->choice;
265 sid->choice = SEQID_GENBANK;
266 case SEQID_GENBANK:
267 case SEQID_PIR:
268 case SEQID_OTHER:
269 case SEQID_SWISSPROT:
270 case SEQID_PRF:
271 case SEQID_TPG:
272 case SEQID_TPE:
273 case SEQID_TPD:
274 case SEQID_GPIPE:
275 case SEQID_NAMED_ANNOT_TRACK:
276 tsip = (TextSeqIdPtr) (sid->data.ptrvalue);
277 if (tsip->accession != NULL) {
278 tmp = tsip->name;
279 tsip->name = NULL;
280 SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len);
281 tsip->name = tmp;
282 } else {
283 SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len);
284 }
285 if (oldchoice)
286 sid->choice = oldchoice;
287 break;
288 default:
289 SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len);
290 break;
291 }
292 RevStringUpper (buf);
293 return TRUE;
294 }
295
296 /*****************************************************************************
297 *
298 * SeqEntrySetScope(sep)
299 * scopes global seqentry searches to sep
300 * setting sep=NULL, opens scope to all seqentries in memory
301 * returns the current scope
302 *
303 *****************************************************************************/
SeqEntrySetScope(SeqEntryPtr sep)304 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntrySetScope(SeqEntryPtr sep)
305 {
306 SeqEntryPtr curr = NULL;
307 SeqMgrPtr smp;
308 Int2 i, j;
309 SMScopePtr smsp;
310 TNlmThread thr;
311 Boolean found;
312
313 smp = SeqMgrWriteLock();
314 if (smp == NULL) goto ret;
315 thr = NlmThreadSelf();
316 found = FALSE;
317 for (i = 0, smsp = smp->scope; i < smp->num_scope; i++, smsp++)
318 {
319 if (NlmThreadCompare(thr, smsp->thr))
320 {
321 curr = smsp->SEscope;
322 smsp->SEscope = sep;
323 if (sep == NULL) /* removing one? */
324 {
325 smp->num_scope--;
326 j = smp->num_scope - i; /* number to move */
327 if (j) /* not last one */
328 MemCopy(smsp, (smsp+1), (size_t)(j * sizeof(SMScope)));
329 }
330 goto ret; /* all done */
331 }
332 }
333
334 /* thread not on list */
335 if (sep == NULL)
336 goto ret; /* nothing to do */
337
338 i = smp->num_scope;
339 j = smp->total_scope;
340 if (j == i) /* need more room */
341 {
342 j += 20; /* new size */
343 smsp = smp->scope;
344 smp->scope = MemNew((size_t)(j * sizeof(SMScope)));
345 MemCopy(smp->scope, smsp, (size_t)(i * sizeof(SMScope)));
346 smp->total_scope = j;
347 MemFree(smsp);
348 }
349
350 smp->scope[i].thr = thr;
351 smp->scope[i].SEscope = sep;
352 smp->num_scope++;
353
354 ret: SeqMgrUnlock();
355 return curr;
356 }
357
358 /*****************************************************************************
359 *
360 * SeqEntryGetScope(sep)
361 * returns the current scope or NULL if none set
362 *
363 *****************************************************************************/
SeqEntryGetScope(void)364 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryGetScope(void)
365 {
366 SeqMgrPtr smp;
367 SeqEntryPtr scope = NULL;
368 Int2 i;
369 SMScopePtr smsp;
370 TNlmThread thr;
371
372 smp = SeqMgrReadLock();
373 if (smp == NULL) return FALSE;
374 thr = NlmThreadSelf();
375 for (i = 0, smsp = smp->scope; i < smp->num_scope; i++, smsp++)
376 {
377 if (NlmThreadCompare(thr, smsp->thr))
378 {
379 scope = smsp->SEscope;
380 break;
381 }
382 }
383 SeqMgrUnlock();
384 return scope;
385 }
386
387 /*****************************************************************************
388 *
389 * BioseqFind(SeqIdPtr)
390 * Just checks in object loaded memory
391 * Will also restore a Bioseq that has been cached out
392 *
393 *****************************************************************************/
BioseqFind(SeqIdPtr sid)394 NLM_EXTERN BioseqPtr LIBCALL BioseqFind (SeqIdPtr sid)
395 {
396 return BioseqFindFunc(sid, TRUE, TRUE, TRUE);
397 }
398
399 /*****************************************************************************
400 *
401 * BioseqFindCore(sid)
402 * Finds a Bioseq in memory based on SeqId when only "core" elements needed
403 * Will NOT restore a Bioseq that has been cached out by SeqMgr
404 * This function is for use ONLY by functions that only need the parts
405 * of the Bioseq left when cached out. This includes the SeqId chain,
406 * and non-pointer components of the Bioseq.
407 *
408 *****************************************************************************/
BioseqFindCore(SeqIdPtr sip)409 NLM_EXTERN BioseqPtr LIBCALL BioseqFindCore (SeqIdPtr sip)
410 {
411 return BioseqFindFunc(sip, FALSE, TRUE, TRUE);
412 }
413
414 /*****************************************************************************
415 *
416 * BioseqFindSpecial(sid)
417 * Finds a Bioseq in memory based on SeqId when only "core" elements needed
418 * Will NOT restore a Bioseq that has been cached out by SeqMgr
419 * This function does not use the bioseq_cache mechanism, and is for
420 * the validator to check for IdOnMultipleBioseqs.
421 *
422 *****************************************************************************/
BioseqFindSpecial(SeqIdPtr sip)423 NLM_EXTERN BioseqPtr LIBCALL BioseqFindSpecial (SeqIdPtr sip)
424 {
425 return BioseqFindFunc(sip, FALSE, TRUE, FALSE);
426 }
427
428 /*****************************************************************************
429 *
430 * BioseqFindEntity(sid, itemIDptr)
431 * Finds a Bioseq in memory based on SeqId
432 * Will NOT restore a Bioseq that has been cached out by SeqMgr
433 * returns EntityID if found, otherwise 0
434 * itemIDptr is set to the value for itemID in ObjMgr functions
435 * itemtype is OBJ_BIOSEQ of course
436 *
437 *****************************************************************************/
BioseqFindEntity(SeqIdPtr sip,Uint4Ptr itemIDptr)438 NLM_EXTERN Uint2 LIBCALL BioseqFindEntity (SeqIdPtr sip, Uint4Ptr itemIDptr)
439 {
440 BioseqPtr bsp;
441 Uint2 entityID = 0;
442
443 *itemIDptr = 0;
444 bsp = BioseqFindCore(sip);
445 if (bsp == NULL) return entityID; /* not found */
446 entityID = ObjMgrGetEntityIDForPointer((Pointer)bsp);
447 if (! entityID)
448 return entityID;
449
450 *itemIDptr = GatherItemIDByData(entityID, OBJ_BIOSEQ, (Pointer)bsp);
451 return entityID;
452 }
453
454 /********************************************************************************
455 *
456 * BioseqReload (omdp, lockit)
457 * reloads the cached SeqEntry at top of omdp
458 * if (lockit) locks the record
459 *
460 *********************************************************************************/
461
BioseqReload(ObjMgrDataPtr omdp,Boolean lockit)462 NLM_EXTERN ObjMgrDataPtr LIBCALL BioseqReload(ObjMgrDataPtr omdp, Boolean lockit)
463 {
464 BioseqPtr bsp = NULL;
465 ObjMgrDataPtr retval = NULL;
466 Int4 j;
467 ObjMgrPtr omp;
468
469 if (omdp == NULL) return retval;
470 if (! ((omdp->datatype == OBJ_BIOSEQ) || (omdp->datatype == OBJ_BIOSEQSET)))
471 return retval;
472 if (omdp->parentptr != NULL)
473 {
474 omp = ObjMgrReadLock();
475 omdp = ObjMgrFindTop(omp, omdp);
476 ObjMgrUnlock();
477 if (omdp == NULL)
478 return retval;
479 }
480
481 if (omdp->tempload == TL_CACHED) /* only need to reload if cached */
482 {
483 bsp = BioseqReloadFunc (NULL, omdp);
484 if (bsp == NULL)
485 return retval;
486 omp = ObjMgrReadLock();
487 j = ObjMgrLookup(omp, (Pointer)bsp);
488 if (j < 0) {
489
490 Char tmpbuff[256];
491
492 SeqIdWrite(bsp->id, tmpbuff,
493 PRINTID_FASTA_LONG, sizeof(tmpbuff));
494
495 ErrPostEx(SEV_WARNING, 0, __LINE__,
496 "ObjMgrLookup() returned negative value "
497 "id = %s, totobj = %d, currobj = %d, "
498 "HighestEntityID = %d", tmpbuff, omp->totobj,
499 omp->currobj, omp->HighestEntityID);
500
501 ObjMgrUnlock();
502 return retval;
503 }
504
505 omdp = ObjMgrFindTop(omp, omp->datalist[j]);
506 ObjMgrUnlock();
507 }
508
509 if (lockit)
510 {
511 ObjMgrLock(omdp->datatype, omdp->dataptr, TRUE);
512 }
513
514 return omdp;
515 }
516
SeqMgrGetFetchTop(void)517 static BSFetchTop NEAR SeqMgrGetFetchTop (void)
518 {
519 SeqMgrPtr smp;
520 BSFetchTop bsftp=NULL;
521
522 smp = SeqMgrReadLock();
523 if (smp == NULL) return bsftp;
524 bsftp = smp->bsfetch;
525 SeqMgrUnlock();
526 return bsftp;
527 }
528
BioseqReloadFunc(SeqIdPtr sid,ObjMgrDataPtr omdp)529 static BioseqPtr NEAR BioseqReloadFunc (SeqIdPtr sid, ObjMgrDataPtr omdp)
530 {
531 Int4 j;
532 ObjMgrDataPtr oldomdp;
533 OMUserDataPtr omudp, next;
534 ObjMgrProcPtr ompp;
535 OMProcControl ompc;
536 BioseqPtr bsp= NULL;
537 Int2 ret;
538 ObjMgrPtr omp;
539 BSFetchTop bsftp=NULL;
540
541 ompp = NULL;
542 omp = ObjMgrReadLock();
543 for (omudp = omdp->userdata; omudp != NULL; omudp = omudp->next)
544 {
545 if (omudp->proctype == OMPROC_FETCH) /* caching function */
546 {
547 ompp = ObjMgrProcFind(omp, omudp->procid, NULL, 0);
548 if (ompp != NULL)
549 break;
550 }
551 }
552 ObjMgrUnlock();
553
554 if (ompp != NULL && ompp->outputtype != OBJ_BIOSEQ)
555 return bsp;
556
557 oldomdp = omdp;
558 omdp = NULL;
559 bsftp = SeqMgrGetFetchTop();
560 if (bsftp != NULL)
561 {
562 if (ompp != NULL) /* fetch proc left a signal */
563 { /* rerun fetch */
564 MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
565 ompc.input_data = sid;
566 ompc.input_entityID = oldomdp->EntityID;
567 ompc.proc = ompp;
568 ret = (* (ompp->func))((Pointer)&ompc);
569 switch (ret)
570 {
571 case OM_MSG_RET_ERROR:
572 ErrShow();
573 break;
574 case OM_MSG_RET_DEL:
575 break;
576 case OM_MSG_RET_OK:
577 break;
578 case OM_MSG_RET_DONE:
579 omp = ObjMgrWriteLock();
580 ObjMgrSetTempLoad (omp, ompc.output_data);
581 ObjMgrUnlock();
582 bsp = (BioseqPtr)(ompc.output_data);
583 break;
584 default:
585 break;
586 }
587 }
588
589 if (bsp == NULL) /* nope, try regular fetch */
590 {
591 bsp = (*(bsftp))(sid, BSFETCH_TEMP);
592 }
593
594 if (bsp != NULL)
595 {
596 omp = ObjMgrReadLock();
597 j = ObjMgrLookup(omp, (Pointer)bsp);
598 if (j < 0) {
599
600 Char tmpbuff[256];
601
602 SeqIdWrite(bsp->id, tmpbuff,
603 PRINTID_FASTA_LONG, sizeof(tmpbuff));
604
605 ErrPostEx(SEV_WARNING, 0, __LINE__,
606 "ObjMgrLookup() returned negative value "
607 "id = %s, totobj = %d, currobj = %d, "
608 "HighestEntityID = %d", tmpbuff,
609 omp->totobj,
610 omp->currobj, omp->HighestEntityID);
611 ObjMgrUnlock();
612 return bsp;
613 }
614 omdp = ObjMgrFindTop(omp, omp->datalist[j]);
615 ObjMgrUnlock();
616 ObjMgrDeleteIndexOnEntityID (omp, oldomdp->EntityID);
617 omdp->EntityID = oldomdp->EntityID;
618 oldomdp->EntityID = 0;
619 ObjMgrAddIndexOnEntityID (omp, omdp->EntityID, omdp);
620
621 omudp = omdp->userdata;
622 while (omudp != NULL)
623 {
624 next = omudp->next;
625 if (omudp->freefunc != NULL)
626 (*(omudp->freefunc))(omudp->userdata.ptrvalue);
627 MemFree(omudp);
628 omudp = next;
629 }
630 omdp->userdata = oldomdp->userdata;
631 oldomdp->userdata = NULL;
632
633 if (oldomdp->choice != NULL)
634 SeqEntryFree(oldomdp->choice);
635 else
636 {
637 switch(oldomdp->datatype)
638 {
639 case OBJ_BIOSEQ:
640 BioseqFree((BioseqPtr)(oldomdp->dataptr));
641 break;
642 case OBJ_BIOSEQSET:
643 BioseqSetFree((BioseqSetPtr)(oldomdp->dataptr));
644 break;
645 default:
646 ErrPostEx(SEV_ERROR,0,0,"BioseqReloadFunc: delete unknown type [%d]",
647 (int)(oldomdp->datatype));
648 break;
649 }
650 }
651 }
652 }
653 return bsp;
654 }
655 /** static func used internally **/
656
657 /*******************************************
658 *
659 * WARNING: if you change BIOSEQ_CACHE_NUM, you have to change the
660 * number of NULL in the initialization of the 2 static pointer arrays
661 * below
662 *
663 *******************************************/
664 /* nb: this cache is cleared in SeqMgrDeleteFromBioseqIndex() */
665 #define BIOSEQ_CACHE_NUM 3
666 static SeqEntryPtr se_cache[BIOSEQ_CACHE_NUM] = {
667 NULL, NULL, NULL}; /* for a few platforms */
668 static ObjMgrDataPtr omdp_cache[BIOSEQ_CACHE_NUM] = {
669 NULL, NULL, NULL}; /* for a few platforms */
670 static TNlmMutex smp_cache_mutex = NULL;
671
BioseqFindFunc(SeqIdPtr sid,Boolean reload_from_cache,Boolean force_it,Boolean use_bioseq_cache)672 static BioseqPtr NEAR BioseqFindFunc (SeqIdPtr sid, Boolean reload_from_cache, Boolean force_it, Boolean use_bioseq_cache)
673 {
674 Int4 i, j, num, imin, imax, retval;
675 SeqIdIndexElementPtr PNTR sipp;
676 CharPtr tmp;
677 Char buf[128];
678 Boolean do_return;
679 SeqMgrPtr smp;
680 ObjMgrPtr omp;
681 ObjMgrDataPtr omdp = NULL;
682 BioseqPtr bsp = NULL, tbsp;
683 SeqEntryPtr scope = NULL;
684
685 if (sid == NULL)
686 return NULL;
687
688 SeqMgrReadLock(); /* make sure no other thread is writing */
689 retval = NlmMutexLockEx(&smp_cache_mutex); /* protect this section */
690 SeqMgrUnlock();
691 if (retval) /* error */
692 {
693 ErrPostEx(SEV_FATAL,0,0,"BioseqFindFunc cache mutex failed [%ld]", (long)retval);
694 return NULL;
695 }
696
697 do_return = FALSE;
698 scope = SeqEntryGetScope(); /* first check the cache */
699 for (i = 0; i < BIOSEQ_CACHE_NUM && use_bioseq_cache; i++)
700 {
701 if (omdp_cache[i] == NULL)
702 break;
703 omdp = omdp_cache[i];
704 if (omdp->datatype == OBJ_BIOSEQ)
705 {
706 if ((scope == NULL) || (scope == se_cache[i]))
707 {
708 bsp = (BioseqPtr)(omdp->dataptr);
709
710 if (BioseqMatch(bsp, sid))
711 {
712 for (j = i; j > 0; j--) /* shift to top of cache */
713 {
714 omdp_cache[j] = omdp_cache[j-1];
715 se_cache[j] = se_cache[j-1];
716 }
717 omdp_cache[0] = omdp;
718 se_cache[0] = scope;
719
720 if (! reload_from_cache)
721 {
722 do_return = TRUE;
723 goto done_cache;
724 }
725
726 omp = ObjMgrReadLock();
727 omdp = ObjMgrFindTop(omp, omdp);
728 ObjMgrUnlock();
729 if (omdp == NULL || omdp->tempload != TL_CACHED)
730 {
731 do_return = TRUE;
732 goto done_cache;
733 }
734
735 bsp = BioseqReloadFunc(sid, omdp);
736
737 if (bsp == NULL)
738 {
739
740 ErrPostEx(SEV_ERROR,0,0,"BioseqFindFunc: couldn't uncache");
741 }
742 do_return = TRUE;
743 goto done_cache;
744 }
745 }
746 }
747 }
748 done_cache:
749 NlmMutexUnlock(smp_cache_mutex);
750 if (do_return) /* all done */
751 {
752 return bsp;
753 }
754
755 bsp = NULL; /* resetting it */
756
757 SeqMgrProcessNonIndexedBioseq(force_it); /* make sure all are indexed */
758
759 /* stringify as in SeqMgrAdd */
760
761 MakeReversedSeqIdString (sid, buf, sizeof (buf) - 1); /* common function to make id, call RevStringUpper */
762
763
764 imin = 0;
765 smp = SeqMgrReadLock();
766 imax = smp->BioseqIndexCnt - 1;
767 sipp = smp->BioseqIndex;
768
769 num = -1;
770
771 while (imax >= imin)
772 {
773 i = (imax + imin)/2;
774 tmp = sipp[i]->str;
775 if ((j = StringCmp(tmp, buf)) > 0)
776 imax = i - 1;
777 else if (j < 0)
778 imin = i + 1;
779 else
780 {
781 num = i;
782 break;
783 }
784 }
785
786 if (num < 0) /* couldn't find it */
787 {
788 /*
789 Message(MSG_ERROR, "[1] Couldn't find [%s]", buf);
790 */
791 bsp = NULL;
792 goto ret;
793 }
794
795
796 if (scope != NULL) /* check in scope */
797 {
798 tbsp = (BioseqPtr)(sipp[num]->omdp->dataptr);
799 if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
800 {
801 bsp = tbsp;
802 omdp = sipp[num]->omdp;
803 }
804 else
805 { /* not in scope, could be duplicate SeqId */
806 i = num-1;
807 while ((i >= 0) && (bsp == NULL) && (! StringCmp(sipp[i]->str, buf))) /* back up */
808 {
809 tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr);
810 if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
811 {
812 bsp = tbsp;
813 omdp = sipp[i]->omdp;
814 }
815 i--;
816 }
817 i = num + 1;
818 imax = smp->BioseqIndexCnt - 1;
819 while ((bsp == NULL) && (i <= imax) && (! StringCmp(sipp[i]->str, buf)))
820 {
821 tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr);
822 if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
823 {
824 bsp = tbsp;
825 omdp = sipp[i]->omdp;
826 }
827 i++;
828 }
829 }
830 }
831 else /* no scope set */
832 {
833 omdp = sipp[num]->omdp;
834 bsp = (BioseqPtr)(omdp->dataptr);
835 }
836
837
838 if (bsp == NULL) /* not found */
839 {
840 /*
841 Message(MSG_ERROR, "[2] Couldn't find [%s]", buf);
842 */
843 goto ret;
844 }
845
846 retval = NlmMutexLockEx(&smp_cache_mutex); /* protect this section */
847 if (retval) /* error */
848 {
849 ErrPostEx(SEV_FATAL,0,0,"BioseqFindFunc2 cache mutex failed [%ld]", (long)retval);
850 bsp = NULL;
851 goto ret;
852 }
853
854 for (j = (BIOSEQ_CACHE_NUM - 1); j > 0; j--) /* shift to top of cache */
855 {
856 omdp_cache[j] = omdp_cache[j-1];
857 se_cache[j] = se_cache[j-1];
858 }
859 omdp_cache[0] = omdp;
860 se_cache[0] = scope;
861
862 NlmMutexUnlock(smp_cache_mutex);
863
864 if (! reload_from_cache)
865 goto ret;
866
867 omp = ObjMgrReadLock();
868 omdp = ObjMgrFindTop(omp, omdp);
869 ObjMgrUnlock();
870 if (omdp == NULL)
871 {
872 bsp = NULL;
873 goto ret;
874 }
875 if (omdp->tempload == TL_CACHED)
876 {
877 SeqMgrUnlock();
878 bsp = BioseqReloadFunc(sid, omdp);
879 goto ret2;
880 }
881 ret:
882 SeqMgrUnlock();
883 ret2:
884 return bsp;
885 }
886
887 /*****************************************************************************
888 *
889 * ClearBioseqFindCache()
890 * frees internal omdp and se caches which can thwart detection of colliding IDs
891 *
892 *****************************************************************************/
ClearBioseqFindCache(void)893 NLM_EXTERN void ClearBioseqFindCache (void)
894
895 {
896 Int4 i;
897 SeqMgrPtr smp;
898
899 smp = SeqMgrWriteLock ();
900
901 for (i = 0; i < BIOSEQ_CACHE_NUM; i++) {
902 omdp_cache [i] = NULL;
903 se_cache [i] = NULL;
904 }
905
906 SeqMgrUnlock ();
907 }
908
909 /*****************************************************************************
910 *
911 * SeqMgrFreeCache()
912 * frees all cached SeqEntrys
913 * returns FALSE if any errors occurred
914 *
915 *****************************************************************************/
SeqMgrFreeCache(void)916 NLM_EXTERN Boolean LIBCALL SeqMgrFreeCache(void)
917 {
918 return ObjMgrFreeCache(OBJ_SEQENTRY);
919 }
920
921 /*****************************************************************************
922 *
923 * BioseqLockById(SeqIdPtr)
924 * Finds the Bioseq and locks it
925 * Makes sure appropriate BioseqContent is present
926 *
927 *****************************************************************************/
BioseqLockByIdEx(SeqIdPtr sid,Boolean force_it)928 static BioseqPtr LIBCALL BioseqLockByIdEx (SeqIdPtr sid, Boolean force_it)
929 {
930 BioseqPtr bsp = NULL;
931 SeqMgrPtr smp;
932 SeqEntryPtr oldscope = NULL;
933 BSFetchTop bsftp;
934 Boolean fetch_on_lock;
935 DbtagPtr dbt;
936
937 if (sid == NULL) return bsp;
938
939 /* special case for DeltaSeqsToSeqLoc fake IDs - ignore */
940 if (sid->choice == SEQID_GENERAL) {
941 dbt = (DbtagPtr) sid->data.ptrvalue;
942 if (dbt != NULL && StringCmp (dbt->db, "SeqLit") == 0) {
943 return NULL;
944 }
945 }
946
947 bsp = BioseqFindFunc(sid, TRUE, force_it, TRUE);
948 if (bsp == NULL)
949 {
950 smp = SeqMgrReadLock();
951 if (smp == NULL) return bsp;
952 fetch_on_lock = smp->fetch_on_lock;
953 bsftp = smp->bsfetch;
954 SeqMgrUnlock();
955
956 if (fetch_on_lock)
957 {
958 oldscope = SeqEntrySetScope (NULL);
959 if (oldscope != NULL) {
960 bsp = BioseqFindFunc(sid, TRUE, force_it, TRUE);
961 SeqEntrySetScope (oldscope);
962 }
963 if (bsp == NULL && bsftp != NULL)
964 bsp = (*(bsftp))(sid, BSFETCH_TEMP);
965 }
966 }
967
968 if (bsp == NULL) return NULL;
969
970 ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, TRUE);
971 return bsp;
972 }
973
BioseqLockById(SeqIdPtr sid)974 NLM_EXTERN BioseqPtr LIBCALL BioseqLockById (SeqIdPtr sid)
975 {
976 return BioseqLockByIdEx (sid, TRUE);
977 }
978
979 /*****************************************************************************
980 *
981 * BioseqUnlockById(SeqIdPtr sip)
982 * Frees a Bioseq to be dumped from memory if necessary
983 *
984 *****************************************************************************/
BioseqUnlockById(SeqIdPtr sip)985 NLM_EXTERN Boolean LIBCALL BioseqUnlockById (SeqIdPtr sip)
986 {
987 BioseqPtr bsp;
988
989 if (sip == NULL) return FALSE;
990
991 bsp = BioseqFindFunc(sip, FALSE, TRUE, TRUE);
992 if (bsp == NULL)
993 return FALSE;
994
995 ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, FALSE);
996 return TRUE;
997 }
998
999 /*****************************************************************************
1000 *
1001 * BioseqLock(BioseqPtr)
1002 * Locks a Bioseq
1003 * Any cached data is returned to memory
1004 *
1005 *****************************************************************************/
BioseqLock(BioseqPtr bsp)1006 NLM_EXTERN BioseqPtr LIBCALL BioseqLock (BioseqPtr bsp)
1007 {
1008 if (bsp == NULL) return NULL;
1009
1010 ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, TRUE);
1011
1012 return bsp;
1013 }
1014
1015 /*****************************************************************************
1016 *
1017 * BioseqUnlock(BioseqPtr)
1018 * Frees a Bioseq to be dumped from memory if necessary
1019 *
1020 *****************************************************************************/
BioseqUnlock(BioseqPtr bsp)1021 NLM_EXTERN Boolean LIBCALL BioseqUnlock (BioseqPtr bsp)
1022 {
1023 if (bsp == NULL) return FALSE;
1024
1025 if (ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, FALSE) >= 0)
1026 return TRUE;
1027 else
1028 return FALSE;
1029 }
1030
1031 /*****************************************************************************
1032 *
1033 * BioseqFetch(SeqIdPtr, flag)
1034 * loads bioseq into memory if possible
1035 * first trys LocalLoad
1036 * they trys EntrezLoad
1037 *
1038 *****************************************************************************/
BSFetchFunc(SeqIdPtr sid,Uint1 ld_type)1039 static BioseqPtr LIBCALLBACK BSFetchFunc (SeqIdPtr sid, Uint1 ld_type)
1040 {
1041 BioseqPtr bsp = NULL;
1042 ObjMgrProcPtr ompp;
1043 OMProcControl ompc;
1044 Int2 ret;
1045 ObjMgrPtr omp;
1046
1047 ompp = NULL;
1048 while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_BIOSEQ, ompp)) != NULL)
1049 {
1050 MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
1051 ompc.input_data = sid;
1052 ompc.proc = ompp;
1053 ret = (* (ompp->func))((Pointer)&ompc);
1054 switch (ret)
1055 {
1056 case OM_MSG_RET_ERROR:
1057 ErrShow();
1058 break;
1059 case OM_MSG_RET_DEL:
1060 break;
1061 case OM_MSG_RET_OK:
1062 break;
1063 case OM_MSG_RET_DONE:
1064 if (ld_type == BSFETCH_TEMP)
1065 {
1066 omp = ObjMgrWriteLock();
1067 ObjMgrSetTempLoad (omp, ompc.output_data);
1068 ObjMgrUnlock();
1069 }
1070 bsp = (BioseqPtr)(ompc.output_data);
1071 break;
1072 default:
1073 break;
1074 }
1075 if (bsp != NULL) /* got one */
1076 break;
1077 }
1078
1079 return bsp;
1080 }
1081
1082
BioseqFetch(SeqIdPtr sid,Uint1 ld_type)1083 NLM_EXTERN BioseqPtr LIBCALL BioseqFetch (SeqIdPtr sid, Uint1 ld_type)
1084 {
1085 BSFetchTop bsftp;
1086 BioseqPtr bsp;
1087
1088 bsp = BioseqFindFunc(sid, TRUE, TRUE, TRUE);
1089 if (bsp != NULL) return bsp;
1090
1091 bsftp = SeqMgrGetFetchTop();
1092 if (bsftp == NULL) return NULL;
1093
1094 return (*(bsftp))(sid, ld_type);
1095 }
1096
1097 /*****************************************************************************
1098 *
1099 * GetSeqIdForGI(BIG_ID)
1100 * returns the SeqId for a GI
1101 * returns NULL if can't find it
1102 * The returned SeqId is allocated. Caller must free it.
1103 *
1104 *****************************************************************************/
1105 typedef struct seqidblock {
1106 BIG_ID uid;
1107 time_t touch;
1108 SeqIdPtr sip;
1109 CharPtr revstr;
1110 } SeqIdBlock, PNTR SeqIdBlockPtr;
1111
1112 static ValNodePtr seqidgicache = NULL;
1113 static ValNodePtr PNTR seqidgiarray = NULL;
1114 static ValNodePtr PNTR giseqidarray = NULL;
1115 static Int2 seqidcount = 0;
1116 static Boolean seqidgiindexed = FALSE;
1117
1118 /* record first in small linear list so as not to sort main list after every addition */
1119 static ValNodePtr seqidgilatest = NULL;
1120 static Int2 seqidunidxcount = 0;
1121
1122 /*
1123 static TNlmRWlock sid_RWlock = NULL;
1124 */
1125
RecordInSeqIdGiCache(BIG_ID gi,SeqIdPtr sip)1126 NLM_EXTERN void RecordInSeqIdGiCache ( BIG_ID gi, SeqIdPtr sip)
1127
1128 {
1129 Char buf [128];
1130 ValNodePtr vnp;
1131 SeqIdBlockPtr sibp;
1132 Int4 retval;
1133
1134 /* if (sip == NULL) return; okay to cache NULL because we protect against SeqIdDup */
1135
1136 retval = NlmRWwrlock(sgi_RWlock);
1137 if (retval != 0)
1138 {
1139 ErrPostEx(SEV_ERROR,0,0,"RecSeqIdGi: RWwrlock error [%ld]",
1140 (long)retval);
1141 return;
1142 }
1143
1144
1145 vnp = ValNodeNew (NULL);
1146 if (vnp == NULL) goto ret;
1147 sibp = (SeqIdBlockPtr) MemNew (sizeof (SeqIdBlock));
1148 if (sibp == NULL) {
1149 MemFree (vnp);
1150 goto ret;
1151 }
1152
1153 sibp->uid = gi;
1154 if (sip != NULL) {
1155 sibp->sip = SeqIdDup (sip);
1156 sibp->touch = GetSecs ();
1157 if (MakeReversedSeqIdString (sibp->sip, buf, sizeof (buf) - 1)) {
1158 sibp->revstr = StringSave (buf);
1159 }
1160 }
1161 vnp->data.ptrvalue = (Pointer) sibp;
1162
1163 /* insert at head of unindexed list. */
1164
1165 vnp->next = seqidgilatest;
1166 seqidgilatest = vnp;
1167 seqidunidxcount++;
1168
1169 if (seqidunidxcount > 50 && seqidgilatest != NULL && seqidgicache != NULL) {
1170
1171 /* if over threshhold, insert unindexed list at head of main list (must
1172 already exist so as to allow bulk lookup recording prior to use) */
1173
1174 vnp = seqidgilatest;
1175 while (vnp->next != NULL) {
1176 vnp = vnp->next;
1177 }
1178
1179 vnp->next = seqidgicache;
1180 seqidgicache = seqidgilatest;
1181
1182 /* clear unindexed list pointer and reset count */
1183
1184 seqidgilatest = NULL;
1185 seqidunidxcount = 0;
1186
1187 /* null out sorted access arrays, will sort, unique, and index at next use */
1188
1189 seqidgiarray = MemFree (seqidgiarray);
1190 giseqidarray = MemFree (giseqidarray);
1191 seqidgiindexed = FALSE;
1192 }
1193
1194 ret:
1195 retval = NlmRWunlock(sgi_RWlock);
1196 if (retval != 0)
1197 {
1198 ErrPostEx(SEV_ERROR,0,0,"RecSeqIdGiUnlock: RWunlock error [%ld]",
1199 (long)retval);
1200 }
1201 }
1202
FreeSeqIdGiCache(void)1203 NLM_EXTERN void FreeSeqIdGiCache (void)
1204
1205 {
1206 Int4 ret;
1207 SeqIdBlockPtr sibp;
1208 ValNodePtr vnp;
1209
1210 ret = NlmRWwrlock(sgi_RWlock);
1211 if (ret != 0) {
1212 ErrPostEx(SEV_ERROR,0,0,"FreeSeqIdGiCache: RWwrlock error [%ld]", (long) ret);
1213 return;
1214 }
1215
1216 seqidgiindexed = FALSE;
1217 seqidcount = 0;
1218 seqidgiarray = MemFree (seqidgiarray);
1219 giseqidarray = MemFree (giseqidarray);
1220
1221 for (vnp = seqidgicache; vnp != NULL; vnp = vnp->next) {
1222 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1223 if (sibp == NULL) continue;
1224 sibp->sip = SeqIdFree (sibp->sip);
1225 sibp->revstr = MemFree (sibp->revstr);
1226 }
1227 seqidgicache = ValNodeFreeData (seqidgicache);
1228
1229 /* also free unindexed list of most recent additions */
1230
1231 for (vnp = seqidgilatest; vnp != NULL; vnp = vnp->next) {
1232 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1233 if (sibp == NULL) continue;
1234 sibp->sip = SeqIdFree (sibp->sip);
1235 sibp->revstr = MemFree (sibp->revstr);
1236 }
1237 seqidgilatest = ValNodeFreeData (seqidgilatest);
1238 seqidunidxcount = 0;
1239
1240 ret = NlmRWunlock(sgi_RWlock);
1241 if (ret != 0) {
1242 ErrPostEx(SEV_ERROR,0,0,"FreeSeqIdGiCache: RWwrlock error [%ld]", (long) ret);
1243 return;
1244 }
1245 }
1246
1247 /* trim list by sorting older nodes to end of list if list grew too large */
1248
SortSeqIdGiCacheTime(VoidPtr ptr1,VoidPtr ptr2)1249 static int LIBCALLBACK SortSeqIdGiCacheTime (VoidPtr ptr1, VoidPtr ptr2)
1250
1251 {
1252 SeqIdBlockPtr sibp1;
1253 SeqIdBlockPtr sibp2;
1254 ValNodePtr vnp1;
1255 ValNodePtr vnp2;
1256
1257 if (ptr1 == NULL || ptr2 == NULL) return 0;
1258 vnp1 = *((ValNodePtr PNTR) ptr1);
1259 vnp2 = *((ValNodePtr PNTR) ptr2);
1260 if (vnp1 == NULL || vnp2 == NULL) return 0;
1261 sibp1 = (SeqIdBlockPtr) vnp1->data.ptrvalue;
1262 sibp2 = (SeqIdBlockPtr) vnp2->data.ptrvalue;
1263 if (sibp1 == NULL || sibp2 == NULL) return 0;
1264 if (sibp1->touch > sibp2->touch) {
1265 return -1;
1266 } else if (sibp1->touch < sibp2->touch) {
1267 return 1;
1268 }
1269 return 0;
1270 }
1271
1272 /* sort valnode list by gi */
1273
SortSeqIdGiByUid(VoidPtr ptr1,VoidPtr ptr2)1274 static int LIBCALLBACK SortSeqIdGiByUid (VoidPtr ptr1, VoidPtr ptr2)
1275
1276 {
1277 SeqIdBlockPtr sibp1;
1278 SeqIdBlockPtr sibp2;
1279 ValNodePtr vnp1;
1280 ValNodePtr vnp2;
1281
1282 if (ptr1 == NULL || ptr2 == NULL) return 0;
1283 vnp1 = *((ValNodePtr PNTR) ptr1);
1284 vnp2 = *((ValNodePtr PNTR) ptr2);
1285 if (vnp1 == NULL || vnp2 == NULL) return 0;
1286 sibp1 = (SeqIdBlockPtr) vnp1->data.ptrvalue;
1287 sibp2 = (SeqIdBlockPtr) vnp2->data.ptrvalue;
1288 if (sibp1 == NULL || sibp2 == NULL) return 0;
1289 if (sibp1->uid < sibp2->uid) {
1290 return -1;
1291 } else if (sibp1->uid > sibp2->uid) {
1292 return 1;
1293 }
1294 return 0;
1295 }
1296
UniqueSeqIdGiByUid(ValNodePtr list)1297 static ValNodePtr UniqueSeqIdGiByUid (ValNodePtr list)
1298
1299 {
1300 SeqIdBlockPtr curr, last;
1301 ValNodePtr next;
1302 Pointer PNTR prev;
1303 ValNodePtr vnp;
1304
1305 if (list == NULL) return NULL;
1306 last = (SeqIdBlockPtr) list->data.ptrvalue;
1307 vnp = list->next;
1308 prev = (Pointer PNTR) &(list->next);
1309 while (vnp != NULL) {
1310 next = vnp->next;
1311 curr = (SeqIdBlockPtr) vnp->data.ptrvalue;
1312 if (last != NULL && curr != NULL && last->uid == curr->uid) {
1313 vnp->next = NULL;
1314 *prev = next;
1315 ValNodeFreeData (vnp);
1316 } else {
1317 last = (SeqIdBlockPtr) vnp->data.ptrvalue;
1318 prev = (Pointer PNTR) &(vnp->next);
1319 }
1320 vnp = next;
1321 }
1322
1323 return list;
1324 }
1325
1326 /* sort valnode array by reversed seqid string */
1327
SortSeqIdGiByString(VoidPtr ptr1,VoidPtr ptr2)1328 static int LIBCALLBACK SortSeqIdGiByString (VoidPtr ptr1, VoidPtr ptr2)
1329
1330 {
1331 SeqIdBlockPtr sibp1;
1332 SeqIdBlockPtr sibp2;
1333 CharPtr str1;
1334 CharPtr str2;
1335 ValNodePtr vnp1;
1336 ValNodePtr vnp2;
1337
1338 if (ptr1 == NULL || ptr2 == NULL) return 0;
1339 vnp1 = *((ValNodePtr PNTR) ptr1);
1340 vnp2 = *((ValNodePtr PNTR) ptr2);
1341 if (vnp1 == NULL || vnp2 == NULL) return 0;
1342 sibp1 = (SeqIdBlockPtr) vnp1->data.ptrvalue;
1343 sibp2 = (SeqIdBlockPtr) vnp2->data.ptrvalue;
1344 if (sibp1 == NULL || sibp2 == NULL) return 0;
1345 str1 = sibp1->revstr;
1346 str2 = sibp2->revstr;
1347 if (str1 == NULL || str2 == NULL) return 0;
1348 return StringICmp (str1, str2);
1349 }
1350
UpdateSeqIdGiArrays(void)1351 static Boolean UpdateSeqIdGiArrays (void)
1352
1353 {
1354 Int2 i;
1355 Int4 ret;
1356 SeqIdBlockPtr sibp;
1357 ValNodePtr tmp, vnp;
1358
1359 if (seqidgicache == NULL && seqidgilatest == NULL) return FALSE;
1360
1361 if (! seqidgiindexed) {
1362 ret = NlmRWwrlock (sgi_RWlock);
1363 if (ret != 0) {
1364 ErrPostEx (SEV_ERROR, 0, 0, "SeqIdGi: RWwrlock error [%ld]", (long) ret);
1365 return FALSE;
1366 }
1367
1368 if (seqidunidxcount > 50 && seqidgilatest != NULL) {
1369
1370 /* if over threshhold, insert unindexed list at head of main list */
1371
1372 vnp = seqidgilatest;
1373 while (vnp->next != NULL) {
1374 vnp = vnp->next;
1375 }
1376
1377 vnp->next = seqidgicache;
1378 seqidgicache = seqidgilatest;
1379
1380 /* clear unindexed list pointer and reset count */
1381
1382 seqidgilatest = NULL;
1383 seqidunidxcount = 0;
1384
1385 /* null out sorted access arrays, will sort, unique, and index at next use */
1386
1387 seqidgiarray = MemFree (seqidgiarray);
1388 giseqidarray = MemFree (giseqidarray);
1389 seqidgiindexed = FALSE;
1390 }
1391
1392 if (! seqidgiindexed) {
1393
1394 /* if list is too large, sort by touch time, cut least recently used ids */
1395
1396 seqidcount = (Int2) ValNodeLen (seqidgicache);
1397 if (seqidcount > 32000) {
1398
1399 seqidgicache = ValNodeSort (seqidgicache, SortSeqIdGiCacheTime);
1400 for (vnp = seqidgicache; vnp != NULL && seqidcount > 24000; vnp = vnp->next) {
1401 seqidcount--;
1402 }
1403 if (vnp != NULL) {
1404 for (tmp = vnp->next; tmp != NULL; tmp = tmp->next) {
1405 sibp = (SeqIdBlockPtr) tmp->data.ptrvalue;
1406 if (sibp == NULL) continue;
1407 sibp->sip = SeqIdFree (sibp->sip);
1408 sibp->revstr = MemFree (sibp->revstr);
1409 }
1410 vnp->next = ValNodeFreeData (vnp->next);
1411 }
1412 }
1413
1414 /* sort list by gi */
1415
1416 seqidgicache = ValNodeSort (seqidgicache, SortSeqIdGiByUid);
1417 seqidgicache = UniqueSeqIdGiByUid (seqidgicache);
1418 seqidcount = (Int2) ValNodeLen (seqidgicache);
1419
1420 /* copy sorted list into both arrays */
1421
1422 if (seqidcount > 0) {
1423 seqidgiarray = MemNew (sizeof (ValNodePtr) * (size_t) (seqidcount + 1));
1424 giseqidarray = MemNew (sizeof (ValNodePtr) * (size_t) (seqidcount + 1));
1425 if (seqidgiarray != NULL && giseqidarray != NULL) {
1426 for (vnp = seqidgicache, i = 0; vnp != NULL; vnp = vnp->next, i++) {
1427 seqidgiarray [i] = vnp;
1428 giseqidarray [i] = vnp;
1429 }
1430
1431 /* now resort one array by seqid string */
1432
1433 StableMergeSort (giseqidarray, (size_t) seqidcount, sizeof (ValNodePtr), SortSeqIdGiByString);
1434 }
1435 }
1436
1437 /* finally, set indexed flag */
1438
1439 seqidgiindexed = TRUE;
1440 }
1441
1442 ret = NlmRWunlock (sgi_RWlock);
1443 if (ret != 0) {
1444 ErrPostEx (SEV_ERROR, 0, 0, "SeqIdGi: RWunlock error [%ld]", (long) ret);
1445 return FALSE;
1446 }
1447 }
1448
1449 return TRUE;
1450 }
1451
FetchFromSeqIdGiCache(BIG_ID gi,SeqIdPtr PNTR sipp)1452 NLM_EXTERN Boolean FetchFromSeqIdGiCache ( BIG_ID gi, SeqIdPtr PNTR sipp)
1453
1454 {
1455 ValNodePtr vnp;
1456 SeqIdBlockPtr sibp = NULL;
1457 Int2 left, right, mid;
1458 BIG_ID compare;
1459 Int4 ret;
1460 Boolean done = FALSE;
1461
1462
1463 if (sipp != NULL) {
1464 *sipp = NULL;
1465 }
1466 if (seqidgicache == NULL && seqidgilatest == NULL) return done;
1467
1468 if (! UpdateSeqIdGiArrays ()) {
1469 return done;
1470 }
1471
1472 ret = NlmRWrdlock(sgi_RWlock);
1473 if (ret != 0)
1474 {
1475 ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWrdlock error [%ld]",
1476 (long)ret);
1477 return done;
1478 }
1479
1480 if (seqidgiarray != NULL) {
1481 left = 1;
1482 right = seqidcount;
1483 while (left <= right) {
1484 mid = (left + right) / 2;
1485 compare = 0;
1486 vnp = seqidgiarray [mid - 1];
1487 if (vnp != NULL) {
1488 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1489 if (sibp != NULL) {
1490 compare = gi - sibp->uid;
1491 }
1492 }
1493 if (compare <= 0) {
1494 right = mid - 1;
1495 }
1496 if (compare >= 0) {
1497 left = mid + 1;
1498 }
1499 }
1500 if (left > right + 1 && sibp != NULL) {
1501 if (sibp->sip != NULL) {
1502 if (sipp != NULL) {
1503 *sipp = SeqIdDup (sibp->sip);
1504 }
1505 sibp->touch = GetSecs ();
1506 }
1507 done = TRUE;
1508 }
1509 }
1510
1511 if (! done) {
1512 for (vnp = seqidgilatest; vnp != NULL; vnp = vnp->next) {
1513 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1514 if (sibp == NULL) continue;
1515 if (sibp->uid == gi) {
1516 if (sibp->sip != NULL) {
1517 if (sipp != NULL) {
1518 *sipp = SeqIdDup (sibp->sip);
1519 }
1520 sibp->touch = GetSecs ();
1521 done = TRUE;
1522 break;
1523 }
1524 }
1525 }
1526 }
1527
1528 ret = NlmRWunlock(sgi_RWlock);
1529 if (ret != 0)
1530 {
1531 ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWunlock error [%ld]",
1532 (long)ret);
1533 }
1534
1535 return done;
1536 }
1537
GetSeqIdForGI(BIG_ID gi)1538 NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI (BIG_ID gi)
1539 {
1540 BioseqPtr bsp = NULL;
1541 ObjMgrProcPtr ompp;
1542 OMProcControl ompc;
1543 Int2 ret;
1544 SeqIdPtr sip, sip2=NULL, otherh=NULL, otherl = NULL, otherp = NULL, gb=NULL;
1545 ValNode vn;
1546 SeqEntryPtr oldscope = NULL;
1547
1548
1549 if (gi <= 0)
1550 return sip2;
1551
1552 vn.choice = SEQID_GI;
1553 vn.data.intvalue = gi;
1554 vn.next = NULL;
1555
1556 oldscope = SeqEntrySetScope (NULL);
1557 bsp = BioseqFindCore(&vn);
1558 SeqEntrySetScope (oldscope);
1559
1560 if (bsp != NULL)
1561 {
1562 for (sip = bsp->id; sip != NULL; sip = sip->next)
1563 {
1564 switch (sip->choice)
1565 {
1566 case SEQID_LOCAL: /* object id */
1567 case SEQID_GIBBSQ:
1568 case SEQID_GIBBMT:
1569 case SEQID_PATENT:
1570 case SEQID_GENERAL:
1571 otherl = sip;
1572 break;
1573 case SEQID_GI:
1574 break;
1575 case SEQID_GENBANK:
1576 case SEQID_EMBL:
1577 case SEQID_PIR:
1578 case SEQID_SWISSPROT:
1579 case SEQID_DDBJ:
1580 case SEQID_PRF:
1581 case SEQID_PDB:
1582 case SEQID_OTHER:
1583 case SEQID_TPG:
1584 case SEQID_TPE:
1585 case SEQID_TPD:
1586 gb = sip;
1587 break;
1588 case SEQID_GPIPE:
1589 otherp = sip;
1590 break;
1591 default:
1592 if (otherh == NULL)
1593 otherh = sip;
1594 break;
1595 }
1596 }
1597 }
1598
1599
1600 if (gb != NULL)
1601 sip2 = gb;
1602 else if (otherp != NULL)
1603 sip2 = otherp;
1604 else if (otherh != NULL)
1605 sip2 = otherh;
1606 else if (otherl != NULL)
1607 sip2 = otherl;
1608
1609 if (sip2 != NULL)
1610 return SeqIdDup(sip2);
1611
1612 if (FetchFromSeqIdGiCache (gi, &sip2)) {
1613 return sip2;
1614 }
1615
1616 ompp = NULL;
1617 while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL)
1618 {
1619 if ((ompp->subinputtype == SEQID_GI) && (ompp->suboutputtype == 0))
1620 {
1621 MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
1622 ompc.input_data = &vn;
1623 ompc.proc = ompp;
1624 ret = (* (ompp->func))((Pointer)&ompc);
1625 switch (ret)
1626 {
1627 case OM_MSG_RET_ERROR:
1628 ErrShow();
1629 break;
1630 case OM_MSG_RET_DEL:
1631 break;
1632 case OM_MSG_RET_OK:
1633 break;
1634 case OM_MSG_RET_DONE:
1635 sip2 = (SeqIdPtr)(ompc.output_data);
1636 if (sip2 != NULL) {
1637 RecordInSeqIdGiCache (gi, sip2);
1638 return sip2;
1639 }
1640 break;
1641 default:
1642 break;
1643 }
1644 }
1645 }
1646
1647 RecordInSeqIdGiCache (gi, sip2);
1648 return sip2;
1649 }
1650
1651 /*****************************************************************************
1652 *
1653 * GetGIForSeqId(SeqIdPtr)
1654 * returns the GI for a SeqId
1655 * returns 0 if can't find it
1656 *
1657 *****************************************************************************/
FetchFromGiSeqIdCache(SeqIdPtr sip,BIG_ID_PNTR gip)1658 NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, BIG_ID_PNTR gip)
1659
1660 {
1661 Char buf [128];
1662 ValNodePtr vnp;
1663 SeqIdBlockPtr sibp = NULL;
1664 Int2 left, right, mid;
1665 Int4 compare, ret;
1666 Boolean done = FALSE;
1667
1668
1669 if (gip != NULL) {
1670 *gip = 0;
1671 }
1672 if (seqidgicache == NULL && seqidgilatest == NULL) return done;
1673 if (sip == NULL) return done;
1674 if (! MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) return done;
1675
1676 if (! UpdateSeqIdGiArrays ()) {
1677 return done;
1678 }
1679
1680 ret = NlmRWrdlock(sgi_RWlock);
1681 if (ret != 0)
1682 {
1683 ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWrdlock error [%ld]",
1684 (long)ret);
1685 return done;
1686 }
1687
1688 if (giseqidarray != NULL) {
1689 left = 1;
1690 right = seqidcount;
1691 while (left <= right) {
1692 mid = (left + right) / 2;
1693 compare = 0;
1694 vnp = giseqidarray [mid - 1];
1695 if (vnp != NULL) {
1696 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1697 if (sibp != NULL) {
1698 compare = StringCmp (buf, sibp->revstr);
1699 }
1700 }
1701 if (compare <= 0) {
1702 right = mid - 1;
1703 }
1704 if (compare >= 0) {
1705 left = mid + 1;
1706 }
1707 }
1708 if (left > right + 1 && sibp != NULL) {
1709 if (gip != NULL) {
1710 *gip = sibp->uid;
1711 }
1712 sibp->touch = GetSecs ();
1713 done = TRUE;
1714 }
1715 }
1716
1717 if (! done) {
1718 for (vnp = seqidgilatest; vnp != NULL; vnp = vnp->next) {
1719 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1720 if (sibp == NULL) continue;
1721 if (StringCmp (buf, sibp->revstr) == 0) {
1722 if (gip != NULL) {
1723 *gip = sibp->uid;
1724 }
1725 sibp->touch = GetSecs ();
1726 done = TRUE;
1727 break;
1728 }
1729 }
1730 }
1731
1732 ret = NlmRWunlock(sgi_RWlock);
1733 if (ret != 0)
1734 {
1735 ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWunlock error [%ld]",
1736 (long)ret);
1737 }
1738
1739 return done;
1740 }
1741
GetGIForSeqId(SeqIdPtr sid)1742 NLM_EXTERN BIG_ID LIBCALL GetGIForSeqId (SeqIdPtr sid)
1743 {
1744 BioseqPtr bsp = NULL;
1745 ObjMgrProcPtr ompp;
1746 OMProcControl ompc;
1747 Int2 ret;
1748 SeqIdPtr sip;
1749 BIG_ID gi = 0;
1750 SeqEntryPtr oldscope = NULL;
1751
1752
1753 if (sid == NULL)
1754 return gi;
1755
1756 if (sid->choice == SEQID_GI)
1757 return sid->data.intvalue;
1758
1759 oldscope = SeqEntrySetScope (NULL);
1760 bsp = BioseqFindCore(sid);
1761 SeqEntrySetScope (oldscope);
1762
1763 if (bsp != NULL)
1764 {
1765 for (sip = bsp->id; sip != NULL; sip = sip->next)
1766 {
1767 if (sip->choice == SEQID_GI)
1768 return sip->data.intvalue;
1769 }
1770 }
1771
1772 if (FetchFromGiSeqIdCache (sid, &gi)) {
1773 return gi;
1774 }
1775
1776 ompp = NULL;
1777 while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL)
1778 {
1779 if ((ompp->subinputtype == 0) && (ompp->suboutputtype == SEQID_GI))
1780 {
1781 MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
1782 ompc.input_data = sid;
1783 ompc.proc = ompp;
1784 ret = (* (ompp->func))((Pointer)&ompc);
1785 switch (ret)
1786 {
1787 case OM_MSG_RET_ERROR:
1788 ErrShow();
1789 break;
1790 case OM_MSG_RET_DEL:
1791 break;
1792 case OM_MSG_RET_OK:
1793 break;
1794 case OM_MSG_RET_DONE:
1795 sip = (SeqIdPtr)(ompc.output_data);
1796 if (sip != NULL)
1797 {
1798 if (sip->choice == SEQID_GI)
1799 {
1800 gi = (BIG_ID) sip->data.intvalue;
1801 SeqIdFree(sip);
1802 RecordInSeqIdGiCache (gi, sid);
1803 return gi;
1804 }
1805 SeqIdFree(sip);
1806 }
1807 break;
1808 default:
1809 break;
1810 }
1811 }
1812 }
1813
1814 return gi;
1815 }
1816
1817
1818 /*****************************************************************************
1819 *
1820 * SeqEntryFind(sip)
1821 * returns top level seqentry for sip
1822 *
1823 *****************************************************************************/
SeqEntryFind(SeqIdPtr sid)1824 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFind (SeqIdPtr sid)
1825 {
1826 BioseqPtr bsp;
1827 ObjMgrDataPtr omdp;
1828 ObjMgrDataPtr PNTR omdpp;
1829 SeqEntryPtr result=NULL;
1830 SeqSubmitPtr ssp;
1831 Int4 i;
1832 ObjMgrPtr omp;
1833
1834 bsp = BioseqFind(sid);
1835 if (bsp == NULL) return result;
1836
1837 omp = ObjMgrReadLock();
1838 omdpp = omp->datalist;
1839
1840 i = ObjMgrLookup(omp, (Pointer)bsp);
1841 if (i < 0) {
1842 Char tmpbuff[256];
1843
1844 SeqIdWrite(bsp->id, tmpbuff,
1845 PRINTID_FASTA_LONG, sizeof(tmpbuff));
1846
1847 ErrPostEx(SEV_WARNING, 0, __LINE__,
1848 "ObjMgrLookup() returned negative value "
1849 "id = %s, totobj = %d, currobj = %d, "
1850 "HighestEntityID = %d", tmpbuff,
1851 omp->totobj,
1852 omp->currobj, omp->HighestEntityID);
1853 ObjMgrUnlock();
1854 return result;
1855 }
1856
1857 omdp = omdpp[i];
1858 while (omdp->parentptr != NULL)
1859 {
1860 i = ObjMgrLookup(omp, (omdp->parentptr));
1861 if (i < 0) {
1862 Char tmpbuff[256];
1863
1864 SeqIdWrite(bsp->id, tmpbuff,
1865 PRINTID_FASTA_LONG, sizeof(tmpbuff));
1866
1867 ErrPostEx(SEV_WARNING, 0, __LINE__,
1868 "ObjMgrLookup() returned negative value "
1869 "id = %s, totobj = %d, currobj = %d, "
1870 "HighestEntityID = %d", tmpbuff,
1871 omp->totobj,
1872 omp->currobj, omp->HighestEntityID);
1873 ObjMgrUnlock();
1874 return result;
1875 }
1876 omdp = omdpp[i];
1877 }
1878
1879 if (omdp->datatype == OBJ_SEQSUB) {
1880 ssp = (SeqSubmitPtr) omdp->dataptr;
1881 if (ssp != NULL && ssp->datatype == 1) {
1882 result = (SeqEntryPtr) ssp->data;
1883 }
1884 } else {
1885 result = omdp->choice;
1886 }
1887 ObjMgrUnlock();
1888 return result;
1889 }
1890
1891 /*****************************************************************************
1892 *
1893 * BioseqContextPtr BioseqContextNew (bsp)
1894 *
1895 *****************************************************************************/
BioseqContextNew(BioseqPtr bsp)1896 NLM_EXTERN BioseqContextPtr LIBCALL BioseqContextNew (BioseqPtr bsp)
1897 {
1898 ObjMgrDataPtr omdp;
1899 ObjMgrDataPtr PNTR omdpp;
1900 Int4 i;
1901 Int2 ctr=0;
1902 SeqEntryPtr seps[BIOSEQCONTEXTMAX];
1903 BioseqContextPtr bcp;
1904 ObjMgrPtr omp;
1905
1906 if (bsp == NULL)
1907 return NULL;
1908
1909
1910 bcp = MemNew(sizeof(BioseqContext));
1911 bcp->bsp = bsp;
1912 bcp->se.choice = 1; /* bioseq */
1913 bcp->se.data.ptrvalue = bsp;
1914
1915 omp = ObjMgrReadLock();
1916 if (omp == NULL) return BioseqContextFree(bcp);
1917 omdpp = omp->datalist;
1918
1919 i = ObjMgrLookup(omp, (Pointer)bsp);
1920 if (i < 0) {
1921 Char tmpbuff[256];
1922
1923 SeqIdWrite(bsp->id, tmpbuff,
1924 PRINTID_FASTA_LONG, sizeof(tmpbuff));
1925
1926 ErrPostEx(SEV_WARNING, 0, __LINE__,
1927 "ObjMgrLookup() returned negative value "
1928 "id = %s, totobj = %d, currobj = %d, "
1929 "HighestEntityID = %d", tmpbuff,
1930 omp->totobj,
1931 omp->currobj, omp->HighestEntityID);
1932 ObjMgrUnlock();
1933 return NULL;
1934 }
1935 omdp = omdpp[i];
1936
1937 if (omdp->choice != NULL)
1938 {
1939 seps[ctr] = omdp->choice;
1940 ctr++;
1941
1942 while (omdp->parentptr != NULL)
1943 {
1944 i = ObjMgrLookup(omp, (omdp->parentptr));
1945 if (i < 0) {
1946 Char tmpbuff[256];
1947
1948 SeqIdWrite(bsp->id, tmpbuff,
1949 PRINTID_FASTA_LONG, sizeof(tmpbuff));
1950
1951 ErrPostEx(SEV_WARNING, 0, __LINE__,
1952 "ObjMgrLookup() returned negative value "
1953 "id = %s, totobj = %d, currobj = %d, "
1954 "HighestEntityID = %d", tmpbuff,
1955 omp->totobj,
1956 omp->currobj, omp->HighestEntityID);
1957 ObjMgrUnlock();
1958 return NULL;
1959 }
1960 omdp = omdpp[i];
1961 if (omdp->choice != NULL)
1962 {
1963 if (ctr == BIOSEQCONTEXTMAX)
1964 ErrPostEx(SEV_ERROR, 0,0, "BioseqContextNew: more than %d levels",(int)ctr);
1965 else
1966 {
1967 seps[ctr] = omdp->choice;
1968 ctr++;
1969 }
1970 }
1971 }
1972
1973 bcp->count = ctr;
1974 for (i = 0; i < bcp->count; i++)
1975 {
1976 ctr--;
1977 bcp->context[i] = seps[ctr];
1978 }
1979 }
1980
1981 if (omdp->tempload == TL_CACHED)
1982 {
1983 ErrPostEx(SEV_ERROR,0,0,"BioseqContextNew: bsp is TL_CACHED");
1984 bcp = BioseqContextFree(bcp);
1985 }
1986
1987 ObjMgrUnlock();
1988
1989 return bcp;
1990 }
1991
1992 /*****************************************************************************
1993 *
1994 * BioseqContextFree(bcp)
1995 *
1996 *****************************************************************************/
BioseqContextFree(BioseqContextPtr bcp)1997 NLM_EXTERN BioseqContextPtr LIBCALL BioseqContextFree(BioseqContextPtr bcp)
1998 {
1999 return MemFree(bcp);
2000 }
2001
2002 /*****************************************************************************
2003 *
2004 * BioseqContextGetSeqDescr(bcp, type, curr, SeqEntryPtr PNTR sep)
2005 * returns pointer to the next SeqDescr of this type
2006 * type gives type of Seq-descr
2007 * if (type == 0)
2008 * get them all
2009 * curr is NULL or previous node of this type found
2010 * moves up from bsp
2011 * if (sep != NULL) sep set to SeqEntryPtr containing the SeqDescr.
2012 *
2013 *****************************************************************************/
BioseqContextGetSeqDescr(BioseqContextPtr bcp,Int2 type,ValNodePtr curr,SeqEntryPtr PNTR the_sep)2014 NLM_EXTERN ValNodePtr LIBCALL BioseqContextGetSeqDescr (BioseqContextPtr bcp, Int2 type, ValNodePtr curr, SeqEntryPtr PNTR the_sep) /* the last one you used */
2015 {
2016 Int2 i;
2017 ValNodePtr tmp = NULL;
2018 Boolean found = FALSE;
2019 BioseqPtr bsp;
2020 BioseqSetPtr bssp;
2021
2022 if (bcp == NULL) return NULL;
2023
2024 if (the_sep != NULL)
2025 *the_sep = NULL;
2026
2027 if (bcp->count == 0) /* just a Bioseq */
2028 {
2029 tmp = BioseqGetSeqDescr(bcp->bsp, type, curr);
2030 if (the_sep != NULL) *the_sep = bcp->context[1];
2031 return tmp;
2032 }
2033
2034 i = bcp->count - 1;
2035 if (curr != NULL) /* find where we are */
2036 {
2037 while ((i >= 0) && (! found))
2038 {
2039 if (IS_Bioseq(bcp->context[i]))
2040 {
2041 bsp = (BioseqPtr)((bcp->context[i])->data.ptrvalue);
2042 tmp = bsp->descr;
2043 }
2044 else
2045 {
2046 bssp = (BioseqSetPtr)((bcp->context[i])->data.ptrvalue);
2047 tmp = bssp->descr;
2048 }
2049 while ((tmp != curr) && (tmp != NULL))
2050 tmp = tmp->next;
2051 if (tmp == curr)
2052 {
2053 found = TRUE;
2054 tmp = tmp->next;
2055 }
2056 else
2057 i--;
2058 }
2059 if (! found) /* can't find it! */
2060 return NULL;
2061 }
2062 else /* get first one */
2063 {
2064 tmp = bcp->bsp->descr;
2065 }
2066
2067 while (i >= 0)
2068 {
2069 while (tmp != NULL)
2070 {
2071 if ((! type) || ((Int2)(tmp->choice) == type))
2072 {
2073 if (the_sep != NULL) *the_sep = bcp->context[i];
2074 return tmp;
2075 }
2076 tmp = tmp->next;
2077 }
2078 i--;
2079 if (i >= 0)
2080 {
2081 if (IS_Bioseq(bcp->context[i]))
2082 {
2083 bsp = (BioseqPtr)((bcp->context[i])->data.ptrvalue);
2084 tmp = bsp->descr;
2085 }
2086 else
2087 {
2088 bssp = (BioseqSetPtr)((bcp->context[i])->data.ptrvalue);
2089 tmp = bssp->descr;
2090 }
2091 }
2092 }
2093 return NULL;
2094 }
2095
2096 /*****************************************************************************
2097 *
2098 * BioseqContextGetSeqFeat(bcp, type, curr, sapp)
2099 * returns pointer to the next Seq-feat of this type
2100 * type gives type of Seq-descr
2101 * if (type == 0)
2102 * get them all
2103 * curr is NULL or previous node of this type found
2104 * moves up from bsp
2105 * if (sapp != NULL) is filled with SeqAnnotPtr containing the SeqFeat
2106 * in:
2107 * 0 = sfp->location only
2108 * 1 = sfp->product only
2109 * 2 = either of above
2110 *
2111 *****************************************************************************/
BioseqContextGetSeqFeat(BioseqContextPtr bcp,Int2 type,SeqFeatPtr curr,SeqAnnotPtr PNTR sapp,Int2 in)2112 NLM_EXTERN SeqFeatPtr LIBCALL BioseqContextGetSeqFeat (BioseqContextPtr bcp, Int2 type,
2113 SeqFeatPtr curr, SeqAnnotPtr PNTR sapp, Int2 in) /* the last one you used */
2114 {
2115 SeqEntryPtr sep;
2116
2117 if (bcp == NULL) return NULL;
2118
2119 if (sapp != NULL)
2120 *sapp = NULL;
2121
2122 if (bcp->count == 0) /* just a BioseqSeq */
2123 sep = &(bcp->se);
2124 else
2125 sep = bcp->context[0];
2126
2127 return SeqEntryGetSeqFeat (sep, type, curr, sapp, in, bcp->bsp);
2128 }
2129
2130 typedef struct smgetseqfeat {
2131 Boolean hit;
2132 SeqFeatPtr last,
2133 this;
2134 SeqAnnotPtr sap;
2135 SeqLocPtr slp1, slp2;
2136 Int2 in, type;
2137 } SMGetSeqFeat, PNTR GetSeqFeatPtr;
2138
2139 NLM_EXTERN void GetSeqFeatCallback (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
2140
2141 /*****************************************************************************
2142 *
2143 * SeqEntryGetSeqFeat(sep, type, curr, sapp)
2144 * returns pointer to the next Seq-feat of this type
2145 * type gives type of SeqFeat
2146 * if (type == 0)
2147 * get them all
2148 * curr is NULL or previous node of this type found
2149 * moves up from bsp
2150 * if (sapp != NULL) is filled with SeqAnnotPtr containing the SeqFeat
2151 * if (bsp != NULL) then for that Bioseq match on location by "in"
2152 * in:
2153 * 0 = sfp->location only
2154 * 1 = sfp->product only
2155 * 2 = either of above
2156 *
2157 *****************************************************************************/
SeqEntryGetSeqFeat(SeqEntryPtr sep,Int2 type,SeqFeatPtr curr,SeqAnnotPtr PNTR sapp,Int2 in,BioseqPtr bsp)2158 NLM_EXTERN SeqFeatPtr LIBCALL SeqEntryGetSeqFeat (SeqEntryPtr sep, Int2 type,
2159 SeqFeatPtr curr, SeqAnnotPtr PNTR sapp, Int2 in, BioseqPtr bsp) /* the last one you used */
2160 {
2161 SMGetSeqFeat gsf;
2162 ValNode vn1, vn2;
2163
2164 if (sep == NULL) return NULL;
2165
2166 if (sapp != NULL)
2167 *sapp = NULL;
2168
2169 if (curr == NULL)
2170 gsf.hit = TRUE;
2171 else
2172 gsf.hit = FALSE;
2173 gsf.last = curr;
2174 gsf.this = NULL;
2175 gsf.sap = NULL;
2176 gsf.type = type;
2177 gsf.in = in;
2178 if (bsp != NULL) /* matching by Bioseq */
2179 {
2180 if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_ref))
2181 {
2182 vn2.choice = SEQLOC_MIX;
2183 vn2.data.ptrvalue = bsp->seq_ext;
2184 gsf.slp2 = (SeqLocPtr)(&vn2);
2185 }
2186 else
2187 gsf.slp2 = NULL;
2188
2189 vn1.choice = SEQLOC_WHOLE;
2190 vn1.data.ptrvalue = (Pointer) SeqIdFindBest (bsp->id, 0);
2191 gsf.slp1 = (SeqLocPtr)(&vn1);
2192 }
2193 else
2194 gsf.slp1 = NULL;
2195
2196 SeqEntryExplore (sep, (Pointer)(&gsf), GetSeqFeatCallback);
2197
2198 if (sapp != NULL)
2199 *sapp = gsf.sap;
2200
2201 return gsf.this;
2202 }
2203
GetSeqFeatCallback(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2204 NLM_EXTERN void GetSeqFeatCallback (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2205 {
2206 GetSeqFeatPtr gsfp;
2207 BioseqPtr bsp;
2208 BioseqSetPtr bssp;
2209 SeqAnnotPtr sap;
2210 SeqFeatPtr sfp, last;
2211 Boolean hit, gotit = FALSE;
2212 Uint1 type;
2213 SeqLocPtr slp1, slp2 = NULL, slp;
2214 Int2 i, in = 0, retval;
2215
2216 gsfp = (GetSeqFeatPtr)data;
2217 if (gsfp->this != NULL) /* got it */
2218 return;
2219
2220 last = gsfp->last;
2221 hit = gsfp->hit;
2222 type = (Uint1)(gsfp->type);
2223 if (gsfp->slp1 != NULL) /* matching by Bioseq */
2224 {
2225 slp1 = gsfp->slp1;
2226 slp2 = gsfp->slp2;
2227 in = gsfp->in;
2228 }
2229 else
2230 slp1 = NULL;
2231
2232 if (IS_Bioseq(sep))
2233 {
2234 bsp = (BioseqPtr)(sep->data.ptrvalue);
2235 sap = bsp->annot;
2236 }
2237 else
2238 {
2239 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2240 sap = bssp->annot;
2241 }
2242
2243 while (sap != NULL)
2244 {
2245 if (sap->type == 1) /* feature table */
2246 {
2247 for (sfp = (SeqFeatPtr)(sap->data); sfp != NULL; sfp = sfp->next)
2248 {
2249 if (! hit) /* still looking */
2250 {
2251 if (sfp == last)
2252 {
2253 hit = TRUE;
2254 gsfp->hit = TRUE;
2255 }
2256 }
2257 else
2258 {
2259 if ((! type) || (type == sfp->data.choice))
2260 {
2261 if (slp1 != NULL) /* look for feats on a bioseq */
2262 {
2263 for (i = 0; i < 2; i++)
2264 {
2265 if ((i == 0) && (in != 1))
2266 slp = sfp->location;
2267 else if ((i==1) && (in != 0))
2268 slp = sfp->product;
2269 else
2270 slp = NULL;
2271 if (slp != NULL)
2272 {
2273 retval = SeqLocCompare(slp, slp1);
2274 if (retval)
2275 {
2276 gotit = TRUE;
2277 break;
2278 }
2279
2280 if (slp2 != NULL)
2281 {
2282 retval = SeqLocCompare(slp, slp2);
2283 if (retval)
2284 {
2285 gotit = TRUE;
2286 break;
2287 }
2288 }
2289 }
2290 }
2291 }
2292 else
2293 gotit = TRUE;
2294 if (gotit)
2295 {
2296 gsfp->this = sfp;
2297 gsfp->sap = sap;
2298 return;
2299 }
2300 }
2301 }
2302 }
2303 }
2304
2305 sap = sap->next;
2306 }
2307
2308 return;
2309 }
2310
2311 /*****************************************************************************
2312 *
2313 * BioseqContextGetTitle(bcp)
2314 * returns first title for Bioseq in this context
2315 *
2316 *****************************************************************************/
BioseqContextGetTitle(BioseqContextPtr bcp)2317 NLM_EXTERN CharPtr LIBCALL BioseqContextGetTitle(BioseqContextPtr bcp)
2318 {
2319 CharPtr title = NULL;
2320 ValNodePtr vnp;
2321
2322 vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_title, NULL, NULL);
2323 if (vnp != NULL)
2324 title = (CharPtr)vnp->data.ptrvalue;
2325 return title;
2326 }
2327
2328 /*****************************************************************************
2329 *
2330 * SeqMgr Functions
2331 *
2332 *****************************************************************************/
2333
2334 /*****************************************************************************
2335 *
2336 * SeqMgrSeqEntry(type, data, sep)
2337 * Adds the SeqEntryPtr pointing directly to this Bioseq or BioseqSet
2338 *
2339 *****************************************************************************/
SeqMgrSeqEntry(Uint1 type,Pointer data,SeqEntryPtr sep)2340 NLM_EXTERN Boolean LIBCALL SeqMgrSeqEntry (Uint1 type, Pointer data, SeqEntryPtr sep)
2341 {
2342 return ObjMgrSetChoice (OBJ_SEQENTRY, sep, data);
2343 }
2344
2345 /*****************************************************************************
2346 *
2347 * SeqMgrGetSeqEntryForData(data)
2348 * returns SeqEntryPtr for a BioseqPtr or BioseqSetPtr
2349 * sep must have been put in SeqMgr using SeqMgrSeqEntry
2350 * the Bioseq/BioseqSets it is a part of must also be in SeqMgr
2351 * returns NULL on failure.
2352 *
2353 *****************************************************************************/
SeqMgrGetSeqEntryForData(Pointer data)2354 NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForData (Pointer data)
2355 {
2356 return ObjMgrGetChoiceForData(data);
2357 }
2358
2359 /*****************************************************************************
2360 *
2361 * SeqMgrGetEntityIDForSeqEntry(sep)
2362 * returns the EntityID for a SeqEntryPtr
2363 * sep must have been put in SeqMgr using SeqMgrSeqEntry
2364 * the Bioseq/BioseqSets it is a part of must also be in SeqMgr
2365 * This function will move up to the top of the SeqEntry tree it may be
2366 * in. If top level EntityID is 0, one is assigned at this point.
2367 * If an element is moved under a different hierarchy, its EntityID will
2368 * change.
2369 * returns 0 on failure.
2370 *
2371 *****************************************************************************/
SeqMgrGetEntityIDForSeqEntry(SeqEntryPtr sep)2372 NLM_EXTERN Int2 LIBCALL SeqMgrGetEntityIDForSeqEntry (SeqEntryPtr sep)
2373 {
2374 return ObjMgrGetEntityIDForChoice (sep);
2375 }
2376
2377 /*****************************************************************************
2378 *
2379 * SeqMgrGetSeqEntryForEntityID (id)
2380 *
2381 *****************************************************************************/
SeqMgrGetSeqEntryForEntityID(Int2 id)2382 NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForEntityID (Int2 id)
2383 {
2384 return ObjMgrGetChoiceForEntityID (id);
2385 }
2386
2387 /*****************************************************************************
2388 *
2389 * SeqMgrSetBSFetchTop (fetch, data)
2390 * sets the BSFetchTop routine to "fetch"
2391 * returns previous value
2392 * set to NULL to turn off all fetching for that type
2393 *
2394 * Current value can be called directly as BioseqFetch();
2395 * Default is
2396 * 1) looks in memory
2397 * 2) looks locally if LocalBSFetch is set
2398 * 3) looks remotely if RemoteBSFetch is set
2399 *
2400 *****************************************************************************/
SeqMgrSetBSFetchTop(BSFetchTop fetch,Pointer data)2401 NLM_EXTERN BSFetchTop LIBCALL SeqMgrSetBSFetchTop (BSFetchTop fetch, Pointer data)
2402 {
2403 SeqMgrPtr smp;
2404 BSFetchTop tmp = NULL;
2405
2406 smp = SeqMgrWriteLock();
2407 if (smp == NULL) return tmp;
2408
2409 tmp = smp->bsfetch;
2410 smp->bsfetch = fetch;
2411 smp->TopData = data;
2412 SeqMgrUnlock();
2413 return tmp;
2414 }
2415
2416 /*****************************************************************************
2417 *
2418 * SeqMgrSetFetchOnLock(value)
2419 * if value = TRUE, manager will try to fetch the bioseq if not in
2420 * memory, when BioseqLock is called
2421 * if FALSE, BioseqLock will only look in memory
2422 * returns previous value of fetch_on_lock
2423 * default is to fetch_on_lock
2424 *
2425 *****************************************************************************/
SeqMgrSetFetchOnLock(Boolean value)2426 NLM_EXTERN Boolean LIBCALL SeqMgrSetFetchOnLock (Boolean value)
2427 {
2428 Boolean tmp=FALSE;
2429 SeqMgrPtr smp;
2430
2431 smp = SeqMgrWriteLock();
2432 if (smp == NULL) return tmp;
2433
2434 tmp = smp->fetch_on_lock;
2435 smp->fetch_on_lock = value;
2436 SeqMgrUnlock();
2437 return tmp;
2438 }
2439
2440 /*****************************************************************************
2441 *
2442 * SeqMgrLinkSeqEntry(sep, parenttype, parentptr)
2443 * connects all component seq-entries by traversing the linked list
2444 * all calling SeqMgrConnect and SeqMgrSeqEntry appropriately
2445 * if parenttype != 0, then assumes seqentry is contained in parentptr
2446 * and should be connected to it
2447 *
2448 *****************************************************************************/
SeqMgrLinkSeqEntry(SeqEntryPtr sep,Uint2 parenttype,Pointer parentptr)2449 NLM_EXTERN Boolean LIBCALL SeqMgrLinkSeqEntry (SeqEntryPtr sep, Uint2 parenttype, Pointer parentptr)
2450 {
2451 SeqEntryPtr sep2;
2452 BioseqSetPtr bssp;
2453 Uint2 the_type;
2454
2455 if (sep == NULL)
2456 return FALSE;
2457
2458 if (IS_Bioseq(sep))
2459 the_type = OBJ_BIOSEQ;
2460 else
2461 the_type = OBJ_BIOSEQSET;
2462
2463 SeqMgrSeqEntry((Uint1)the_type, sep->data.ptrvalue, sep);
2464
2465 /**** if (parenttype != 0) ****/
2466 ObjMgrConnect(the_type, sep->data.ptrvalue, parenttype, parentptr);
2467
2468 if (! IS_Bioseq(sep))
2469 {
2470 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2471 for (sep2 = bssp->seq_set; sep2 != NULL; sep2 = sep2->next)
2472 {
2473 SeqMgrLinkSeqEntry (sep2, the_type, sep->data.ptrvalue);
2474 }
2475 }
2476 return TRUE;
2477 }
2478 /*****************************************************************************
2479 *
2480 * Selection Functions for data objects based on SeqLoc
2481 * See also general selection in objmgr.h
2482 *
2483 *****************************************************************************/
2484
2485 /*****************************************************************************
2486 *
2487 * SeqMgrSelect(region)
2488 * region is a SeqLocPtr
2489 * It can only apply to one Bioseq
2490 * selected area will be extreme left and right ends
2491 * fuzziness is ignored
2492 * if something else selected, deselects it first, then selects requested
2493 * item
2494 * to select without deselecting something else, use SeqMgrAlsoSelect()
2495 * returns TRUE if item is now currently selected, FALSE if not
2496 * "region" is always copied. Caller is responsible for managment of
2497 * SeqLoc that is passed in.
2498 *
2499 *****************************************************************************/
SeqMgrSelect(SeqLocPtr region)2500 NLM_EXTERN Boolean LIBCALL SeqMgrSelect (SeqLocPtr region)
2501 {
2502 return SeqMgrGenericSelect(region, 1, NULL);
2503 }
2504
SeqMgrAlsoSelect(SeqLocPtr region)2505 NLM_EXTERN Boolean LIBCALL SeqMgrAlsoSelect (SeqLocPtr region)
2506 {
2507 return SeqMgrGenericSelect(region, 2, NULL);
2508 }
2509
2510 /*****************************************************************************
2511 *
2512 * SeqMgrDeSelect(region)
2513 * if this item was selected, then deselects and returns TRUE
2514 * else returns FALSE
2515 *
2516 *****************************************************************************/
SeqMgrDeSelect(SeqLocPtr region)2517 NLM_EXTERN Boolean LIBCALL SeqMgrDeSelect (SeqLocPtr region)
2518 {
2519 return SeqMgrGenericSelect(region, 3, NULL);
2520 }
2521
2522 /*****************************************************************************
2523 *
2524 * SeqMgrSetColor(region, rgb)
2525 *
2526 *****************************************************************************/
SeqMgrSetColor(SeqLocPtr region,Uint1Ptr rgb)2527 NLM_EXTERN Boolean LIBCALL SeqMgrSetColor (SeqLocPtr region, Uint1Ptr rgb)
2528 {
2529 if (rgb == NULL) return FALSE;
2530 return SeqMgrGenericSelect(region, 4, rgb);
2531 }
2532
SeqMgrGenericSelect(SeqLocPtr region,Int2 type,Uint1Ptr rgb)2533 static Boolean NEAR SeqMgrGenericSelect (SeqLocPtr region, Int2 type,
2534 Uint1Ptr rgb)
2535 {
2536 SeqInt si;
2537 ValNode vn;
2538 SeqIdPtr sip;
2539 Uint2 entityID;
2540 Uint4 itemID;
2541
2542 if (region == NULL) return FALSE;
2543
2544 sip = SeqLocId(region);
2545 if (sip == NULL) return FALSE;
2546
2547 entityID = BioseqFindEntity(sip, &itemID);
2548 if (entityID == 0) return FALSE;
2549
2550 MemSet((Pointer)(&si), 0, sizeof(SeqInt));
2551 MemSet((Pointer)(&vn), 0, sizeof(ValNode));
2552
2553 si.id = sip;
2554 si.from = SeqLocStart(region);
2555 si.to = SeqLocStop(region);
2556 si.strand = SeqLocStrand(region);
2557
2558 if ((si.from < 0) || (si.to < 0) || (si.from > si.to)) return FALSE;
2559
2560 vn.choice = SEQLOC_INT;
2561 vn.data.ptrvalue = &si;
2562
2563 switch (type)
2564 {
2565 case 1:
2566 return ObjMgrSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn);
2567 case 2:
2568 return ObjMgrAlsoSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn);
2569 case 3:
2570 return ObjMgrDeSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn);
2571 case 4:
2572 return ObjMgrSetColor(entityID, itemID, OBJ_BIOSEQ,
2573 OM_REGION_SEQLOC, &vn, rgb);
2574 default:
2575 break;
2576 }
2577
2578 return FALSE;
2579 }
2580
2581 /*****************************************************************************
2582 *
2583 * SpreadGapsInDeltaSeq(BioseqPtr bsp)
2584 * bsp must be a delta seq
2585 * function counts deltas with known lengths ( = known_len)
2586 * counts deltas which are gaps of unknown length ( = unk_count)
2587 * these can delta of length 0, delta with fuzz = lim (unk),
2588 * or SEQLOC_NULL
2589 * converts all unknown gaps to delta with fuzz = lim(unk)
2590 * sets length of all unknown gaps to
2591 * (bsp->length - known_len)/unk_count
2592 * any reminder spread over first few gaps
2593 *
2594 *****************************************************************************/
SpreadGapsInDeltaSeq(BioseqPtr bsp)2595 NLM_EXTERN Boolean LIBCALL SpreadGapsInDeltaSeq (BioseqPtr bsp)
2596 {
2597 Boolean retval = FALSE;
2598 Int4 known_len = 0,
2599 total_gap, gap_len,
2600 unk_count = 0,
2601 remainder;
2602 DeltaSeqPtr dsp;
2603 SeqLocPtr slocp;
2604 SeqLitPtr slp;
2605 IntFuzzPtr ifp;
2606
2607 if (bsp == NULL) return retval;
2608 if ((bsp->repr != Seq_repr_delta) || (bsp->seq_ext == NULL))
2609 return retval;
2610
2611 retval = TRUE; /* can function */
2612
2613 for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next)
2614 {
2615 switch (dsp->choice)
2616 {
2617 case 1: /* SeqLocPtr */
2618 slocp = (SeqLocPtr)(dsp->data.ptrvalue);
2619 if (slocp == NULL) break;
2620 if (slocp->choice == SEQLOC_NULL) /* convert it */
2621 {
2622 SeqLocFree(slocp);
2623 slp = SeqLitNew();
2624 dsp->choice = 2;
2625 dsp->data.ptrvalue = slp;
2626 ifp = IntFuzzNew();
2627 slp->fuzz = ifp;
2628 ifp->choice = 4; /* lim - type unk */
2629 unk_count++;
2630 }
2631 else /* count length */
2632 known_len += SeqLocLen(slocp);
2633 break;
2634 case 2: /* SeqLitPtr */
2635 slp = (SeqLitPtr)(dsp->data.ptrvalue);
2636 if (slp == NULL) break;
2637 if (slp->seq_data != NULL) /* not a gap */
2638 {
2639 known_len += slp->length;
2640 break;
2641 }
2642 ifp = slp->fuzz;
2643 if (slp->length == 0) /* unknown length */
2644 {
2645 unk_count++;
2646 if (ifp != NULL)
2647 {
2648 if (ifp->choice != 4) /* not lim */
2649 ifp = IntFuzzFree(ifp);
2650 else if (ifp->a != 0) /* not unk */
2651 ifp = IntFuzzFree(ifp);
2652 }
2653 if (ifp == NULL)
2654 {
2655 ifp = IntFuzzNew();
2656 ifp->choice = 4; /* lim - unk */
2657 slp->fuzz = ifp;
2658 }
2659 }
2660 else /* gap length was set */
2661 {
2662 if (ifp == NULL) /* no fuzz - count length */
2663 known_len += slp->length;
2664 else /* might be a guess */
2665 {
2666 if ((ifp->choice == 4) && (ifp->a == 0)) /* lim - unk */
2667 unk_count++;
2668 else
2669 known_len += slp->length;
2670 }
2671 }
2672 break;
2673 default:
2674 break;
2675 }
2676
2677 }
2678
2679 if (unk_count == 0) /* no unknown gaps */
2680 return retval;
2681
2682 total_gap = bsp->length - known_len;
2683 if (total_gap < 0)
2684 total_gap = 0;
2685 gap_len = total_gap / unk_count;
2686 remainder = total_gap - (gap_len * unk_count);
2687
2688 for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next)
2689 {
2690 switch (dsp->choice)
2691 {
2692 case 1: /* SeqLocPtr */
2693 break;
2694 case 2: /* SeqLitPtr */
2695 slp = (SeqLitPtr)(dsp->data.ptrvalue);
2696 if (slp == NULL) break;
2697 if (slp->seq_data != NULL) break;
2698 ifp = slp->fuzz;
2699 if (ifp == NULL) break;
2700 if ((ifp->choice != 4) || (ifp->a != 0))
2701 break;
2702 slp->length = gap_len;
2703 if (remainder)
2704 {
2705 slp->length++;
2706 remainder--;
2707 }
2708 break;
2709 default:
2710 break;
2711 }
2712 }
2713
2714 return retval;
2715 }
2716
2717 /*****************************************************************************
2718 *
2719 * CountGapsInDeltaSeq(BioseqPtr bsp, &num_segs, &num_gaps, &known_residues, &num_gaps_faked)
2720 * bsp must be a delta seq
2721 * function counts deltas and returns a profile
2722 * num_segs = total number of segments
2723 * num_gaps = total number of segments representing gaps
2724 * known_residues = number of real residues in the sequence (not gaps)
2725 * num_gaps_faked = number of gaps where real length is not known, but where
2726 * a length was guessed by spreading the total gap length
2727 * out over all gaps evenly.
2728 *
2729 * NOTE: any of these pointers except bsp can be NULL
2730 *
2731 * returns TRUE if values in argument were set.
2732 *
2733 *****************************************************************************/
NextLitLength(DeltaSeqPtr next,Int4Ptr lenp)2734 static Boolean NextLitLength (DeltaSeqPtr next, Int4Ptr lenp)
2735
2736 {
2737 SeqLitPtr slp;
2738
2739 if (lenp == NULL) return FALSE;
2740 *lenp = 0;
2741 if (next == NULL || next->choice != 2) return FALSE;
2742 slp = (SeqLitPtr) next->data.ptrvalue;
2743 if (slp == NULL || slp->seq_data == NULL) return FALSE;
2744 if (slp->seq_data_type == Seq_code_gap) return FALSE;
2745 *lenp = slp->length;
2746 return TRUE;
2747 }
2748
CountGapsInDeltaSeq(BioseqPtr bsp,Int4Ptr num_segs,Int4Ptr num_gaps,Int4Ptr known_residues,Int4Ptr num_gaps_faked,CharPtr buf,Int4 buflen)2749 NLM_EXTERN Boolean LIBCALL CountGapsInDeltaSeq (BioseqPtr bsp, Int4Ptr num_segs, Int4Ptr num_gaps, Int4Ptr known_residues, Int4Ptr num_gaps_faked, CharPtr buf, Int4 buflen)
2750 {
2751 Boolean retval = FALSE;
2752 Int4 residues = 0,
2753 segs = 0,
2754 gaps = 0,
2755 len = 0,
2756 fake_gaps = 0,
2757 from = 0,
2758 tlen = 0,
2759 nxtlen;
2760 DeltaSeqPtr dsp, next;
2761 SeqLocPtr slocp;
2762 SeqLitPtr slp;
2763 IntFuzzPtr ifp;
2764 Boolean unk;
2765 static Char tmp[128];
2766 Int2 diff, blen;
2767
2768 if (bsp == NULL) return retval;
2769 if ((bsp->repr != Seq_repr_delta) || (bsp->seq_ext == NULL))
2770 return retval;
2771
2772 retval = TRUE; /* can function */
2773
2774
2775 for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = next)
2776 {
2777 next = dsp->next;
2778 segs++;
2779 from = len + 1;
2780 switch (dsp->choice)
2781 {
2782 case 1: /* SeqLocPtr */
2783 slocp = (SeqLocPtr)(dsp->data.ptrvalue);
2784 if (slocp == NULL) break;
2785 if (slocp->choice == SEQLOC_NULL) /* gap */
2786 {
2787 gaps++;
2788 sprintf(tmp, "* %ld %ld gap of unknown length~", (long) from, (long) len);
2789 blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2790 diff = LabelCopy(buf, tmp, blen);
2791 buflen -= diff;
2792 buf += diff;
2793 }
2794 else { /* count length */
2795 residues += SeqLocLen(slocp);
2796 if (buf != NULL) {
2797 tlen = SeqLocLen(slocp);
2798 len += tlen;
2799 sprintf(tmp, "* %8ld %8ld: contig of %ld bp in length~", (long) from, (long) len, (long) tlen);
2800 blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2801 diff = LabelCopy(buf, tmp, blen);
2802 buflen -= diff;
2803 buf += diff;
2804 }
2805 }
2806 break;
2807 case 2: /* SeqLitPtr */
2808 slp = (SeqLitPtr)(dsp->data.ptrvalue);
2809 if (slp == NULL) break;
2810 tlen = slp->length;
2811 len += tlen;
2812 if (slp->seq_data != NULL && slp->seq_data_type != Seq_code_gap)
2813 {
2814 residues += slp->length;
2815 while (NextLitLength (next, &nxtlen)) {
2816 tlen += nxtlen;
2817 len += nxtlen;
2818 residues += nxtlen;
2819 next = next->next;
2820 }
2821 if (buf) {
2822 sprintf(tmp, "* %8ld %8ld: contig of %ld bp in length~", (long) from, (long) len, (long) tlen);
2823 blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2824 diff = LabelCopy(buf, tmp, blen);
2825 buflen -= diff;
2826 buf += diff;
2827 }
2828 }
2829 else
2830 {
2831 unk = FALSE;
2832 gaps++;
2833 ifp = slp->fuzz;
2834 if (ifp != NULL)
2835 {
2836 if ((ifp->choice == 4) && (ifp->a == 0)) {
2837 unk = TRUE;
2838 fake_gaps++;
2839 if (buf) {
2840 if (from > len) {
2841 sprintf(tmp, "* gap of unknown length~");
2842 } else {
2843 sprintf(tmp, "* %8ld %8ld: gap of unknown length~", (long) from, (long) len);
2844 }
2845 blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2846 diff = LabelCopy(buf, tmp, blen);
2847 buflen -= diff;
2848 buf += diff;
2849 }
2850 }
2851 }
2852 if (!unk && buf) {
2853 sprintf(tmp, "* %8ld %8ld: gap of %ld bp~", (long) from, (long) len, (long) tlen);
2854 blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2855 diff = LabelCopy(buf, tmp, blen);
2856 buflen -= diff;
2857 buf += diff;
2858 }
2859 }
2860 break;
2861 default:
2862 break;
2863 }
2864 }
2865
2866 if (num_segs != NULL)
2867 *num_segs = segs;
2868 if (num_gaps != NULL)
2869 *num_gaps = gaps;
2870 if (known_residues != NULL)
2871 *known_residues = residues;
2872 if (num_gaps_faked != NULL)
2873 *num_gaps_faked = fake_gaps;
2874
2875 return retval;
2876 }
2877
2878
2879 /*****************************************************************************
2880 *
2881 * SeqMgrAdd(type, data)
2882 * adds a Bioseq or BioseqSet to the sequence manager
2883 *
2884 *****************************************************************************/
SeqMgrAdd(Uint2 type,Pointer data)2885 NLM_EXTERN Boolean LIBCALL SeqMgrAdd (Uint2 type, Pointer data)
2886 {
2887 Boolean retval;
2888
2889 SeqMgrWriteLock();
2890 retval = ObjMgrAdd(type, data);
2891 if (type != OBJ_BIOSEQ) {
2892 SeqMgrUnlock();
2893 return retval;
2894 }
2895 retval &= SeqMgrAddToBioseqIndex((BioseqPtr)data);
2896
2897 SeqMgrUnlock();
2898
2899 return retval;
2900 }
2901
2902
2903 /*****************************************************************************
2904 *
2905 * SeqMgrDelete(type, data)
2906 * deletes a Bioseq or BioseqSet from the sequence manager
2907 *
2908 *****************************************************************************/
SeqMgrDelete(Uint2 type,Pointer data)2909 NLM_EXTERN Boolean LIBCALL SeqMgrDelete (Uint2 type, Pointer data)
2910 {
2911 Boolean retval = FALSE;
2912
2913 SeqMgrWriteLock();
2914 if (type == OBJ_BIOSEQ) /* remove id indexes */
2915 SeqMgrDeleteFromBioseqIndex((BioseqPtr)data);
2916
2917 retval = ObjMgrDelete(type, data);
2918 SeqMgrUnlock();
2919 return retval;
2920 }
2921
2922
2923
SeqMgrAddIndexElement(SeqMgrPtr smp,BioseqPtr bsp,CharPtr buf,Boolean sort_now)2924 static Boolean NEAR SeqMgrAddIndexElement(SeqMgrPtr smp, BioseqPtr bsp, CharPtr buf, Boolean sort_now)
2925 {
2926 SeqIdIndexElementPtr sip, PNTR sipp;
2927 SeqIdIndexBlockPtr sibp, prev;
2928 Int4 imin, imax, i, j;
2929 CharPtr tmp, newstr;
2930 ObjMgrDataPtr omdp;
2931 ObjMgrPtr omp;
2932
2933 omp = ObjMgrReadLock();
2934 omdp = ObjMgrFindByData(omp, (Pointer)bsp); /* caching protection */
2935 ObjMgrUnlock();
2936 if (omdp == NULL)
2937 {
2938 return FALSE;
2939 }
2940
2941 sipp = smp->BioseqIndex;
2942 if (smp->BioseqIndexCnt >= smp->BioseqIndexNum) /* expand space */
2943 {
2944 prev = NULL;
2945 for (sibp = smp->BioseqIndexData; sibp != NULL; sibp = sibp->next)
2946 prev = sibp;
2947 sibp = MemNew(sizeof(SeqIdIndexBlock));
2948 if (prev != NULL)
2949 prev->next = sibp;
2950 else
2951 smp->BioseqIndexData = sibp;
2952
2953 smp->BioseqIndex = MemNew((smp->BioseqIndexNum + 100) *
2954 sizeof(SeqIdIndexElementPtr));
2955 MemCopy(smp->BioseqIndex, sipp, (smp->BioseqIndexNum *
2956 sizeof(SeqIdIndexElementPtr)));
2957 MemFree(sipp);
2958 smp->BioseqIndexNum += 100;
2959 sipp = smp->BioseqIndex;
2960 for (i = 0, j = smp->BioseqIndexCnt; i < 100; i++, j++)
2961 sipp[j] = &(sibp->sid[i]);
2962 }
2963
2964 i = smp->BioseqIndexCnt; /* empties are at the end */
2965 sip = sipp[i];
2966 sip->omdp = omdp; /* fill in the values */
2967 sip->str = StringSave(buf);
2968 newstr = sip->str;
2969 RevStringUpper(newstr); /* try to avoid case check */
2970
2971 if (! sort_now)
2972 {
2973 smp->BioseqIndexCnt++; /* got one more */
2974 return TRUE;
2975 }
2976
2977 imin = 0; /* find where it goes */
2978 imax = i-1;
2979 if (imax >= 0)
2980 tmp = sipp[imax]->str;
2981 if ((i) && (StringCmp(newstr, sipp[imax]->str) < 0))
2982 {
2983 i = (imax + imin) / 2;
2984 while (imax > imin)
2985 {
2986 tmp = sipp[i]->str;
2987 if ((j = StringCmp(newstr, tmp)) < 0)
2988 imax = i - 1;
2989 else if (j > 0)
2990 imin = i + 1;
2991 else
2992 break;
2993 i = (imax + imin)/2;
2994 }
2995
2996 if (StringCmp(newstr, sipp[i]->str) > 0) /* check for off by 1 */
2997 {
2998 i++;
2999 }
3000
3001
3002 imax = smp->BioseqIndexCnt - 1; /* open the array */
3003 while (imax >= i)
3004 {
3005 sipp[imax+1] = sipp[imax];
3006 imax--;
3007 }
3008 }
3009
3010 sipp[i] = sip; /* put in the pointer in order */
3011 smp->BioseqIndexCnt++; /* got one more */
3012 return TRUE;
3013 }
3014
3015 /*****************************************************************************
3016 *
3017 * SeqMgrHoldIndexing(Boolean hold)
3018 * stops sequence indexing to allow bulk loading if hold = TRUE
3019 * starts it when hold = FALSE;
3020 * uses a counter so you must call it the same number of times
3021 * with TRUE as with FALSE
3022 * when the counter decrements to 0, it will index what it has.
3023 *
3024 *****************************************************************************/
SeqMgrHoldIndexing(Boolean hold)3025 NLM_EXTERN void LIBCALL SeqMgrHoldIndexing (Boolean hold)
3026 {
3027 SeqMgrPtr smp;
3028
3029 smp = SeqMgrWriteLock();
3030 if (hold)
3031 smp->hold_indexing++;
3032 else
3033 smp->hold_indexing--;
3034 SeqMgrUnlock();
3035
3036 if (! smp->hold_indexing)
3037 SeqMgrProcessNonIndexedBioseq(FALSE);
3038
3039 return;
3040 }
3041
3042 int LIBCALLBACK SeqIdIndexElementCmp (VoidPtr a, VoidPtr b);
3043
SeqIdIndexElementCmp(VoidPtr a,VoidPtr b)3044 int LIBCALLBACK SeqIdIndexElementCmp (VoidPtr a, VoidPtr b)
3045 {
3046 return (int)(StringCmp((*(SeqIdIndexElementPtr PNTR)a)->str,
3047 (*(SeqIdIndexElementPtr PNTR)b)->str));
3048 }
3049
3050 /*****************************************************************************
3051 *
3052 * SeqMgrProcessNonIndexedBioseq(Boolean force_it)
3053 * Indexes a BioseqPtr by SeqId(s)
3054 * If ! force_it, respects the smp->don't index flag
3055 *
3056 *****************************************************************************/
SeqMgrProcessNonIndexedBioseq(Boolean force_it)3057 static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
3058 {
3059 BioseqPtr PNTR bspp, bsp;
3060 Int4 i, total, k, old_BioseqIndexCnt;
3061 SeqIdPtr sip;
3062 Char buf[128];
3063 /*
3064 CharPtr tmp;
3065 */
3066 Uint1 oldchoice;
3067 Boolean indexed;
3068 TextSeqIdPtr tsip;
3069 SeqMgrPtr smp;
3070 Int2 version;
3071 Boolean sort_now = TRUE;
3072 TextSeqId tsi;
3073 SeqId si;
3074
3075 smp = SeqMgrReadLock();
3076 if ((! smp->NonIndexedBioseqCnt) || /* nothing to index */
3077 ((! force_it) && (smp->hold_indexing))) /* holding off on indexing */
3078 {
3079 SeqMgrUnlock();
3080 return TRUE;
3081 }
3082 SeqMgrUnlock();
3083
3084 smp = SeqMgrWriteLock();
3085 if ((! smp->NonIndexedBioseqCnt) || /* nothing to index */
3086 ((! force_it) && (smp->hold_indexing))) /* holding off on indexing */
3087 {
3088 SeqMgrUnlock();
3089 return TRUE;
3090 }
3091
3092 total = smp->NonIndexedBioseqCnt;
3093 old_BioseqIndexCnt=smp->BioseqIndexCnt; /*** remember this one to do smart sort ****/
3094
3095 if (total > 100) /* heap sort is faster */
3096 sort_now = FALSE;
3097
3098 bspp = smp->NonIndexedBioseq;
3099 for (i = 0; i < total; i++)
3100 {
3101 indexed = FALSE;
3102 bsp = bspp[i];
3103 if (bsp != NULL)
3104 {
3105 if (bsp->id != NULL)
3106 {
3107 indexed = TRUE;
3108 version = 0;
3109 for (sip = bsp->id; sip != NULL; sip = sip->next)
3110 {
3111 oldchoice = 0;
3112 switch (sip->choice)
3113 {
3114 case SEQID_GI:
3115 sprintf(buf, "%ld", (long)(sip->data.intvalue));
3116 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3117 break;
3118 case SEQID_EMBL:
3119 case SEQID_DDBJ:
3120 oldchoice = sip->choice;
3121 /*
3122 sip->choice = SEQID_GENBANK;
3123 */
3124 case SEQID_GENBANK:
3125 case SEQID_OTHER:
3126 case SEQID_TPG:
3127 case SEQID_TPE:
3128 case SEQID_TPD:
3129 case SEQID_GPIPE:
3130 tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
3131 if (((tsip->version > 0) && (tsip->release == NULL))
3132 && SHOWVERSION)
3133 {
3134 version = tsip->version;
3135 }
3136 case SEQID_PIR:
3137 case SEQID_SWISSPROT:
3138 case SEQID_PRF:
3139 tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
3140 /*
3141 if (tsip->name != NULL)
3142 {
3143 tmp = tsip->accession;
3144 tsip->accession = NULL;
3145 SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3146 SeqMgrAddIndexElement(smp, bsp, buf,sort_now);
3147 tsip->accession = tmp;
3148 }
3149 */
3150 /*
3151 tmp = tsip->name;
3152 tsip->name = NULL;
3153 SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3154 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3155 */
3156
3157 MemSet ((Pointer) &tsi, 0, sizeof (TextSeqId));
3158 tsi.name = tsip->name;
3159 tsi.accession = tsip->accession;
3160 tsi.release = tsip->release;
3161 tsi.version = tsip->version;
3162 MemSet ((Pointer) &si, 0, sizeof (SeqId));
3163 si.choice = sip->choice;
3164 if (oldchoice != 0) {
3165 si.choice = SEQID_GENBANK;
3166 }
3167 si.data.ptrvalue = (Pointer) &tsi;
3168
3169 if (tsi.name != NULL) {
3170 tsi.accession = NULL;
3171 SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3172 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3173 tsi.accession = tsip->accession;
3174 }
3175 tsi.name = NULL;
3176 SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3177 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3178 if (version) {
3179 tsi.version = 0;
3180 SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3181 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3182 /*
3183 tsip->version = 0;
3184 SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3185 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3186 tsip->version = version;
3187 */
3188 }
3189 /*
3190 tsip->name = tmp;
3191 */
3192 /*
3193 if (oldchoice)
3194 sip->choice = oldchoice;
3195 */
3196 break;
3197 default:
3198 SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3199 SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3200 break;
3201 }
3202 }
3203 }
3204 }
3205 if (indexed)
3206 bspp[i] = NULL;
3207 }
3208
3209 /* faster single pass removal of NULLs */
3210 for (i = 0, k = 0; i < total; i++) {
3211 bsp = bspp [i];
3212 if (bsp != NULL) {
3213 bspp [k] = bsp;
3214 k++;
3215 }
3216 }
3217 total = k;
3218
3219 /*
3220 for (i = 0; i < total; i++)
3221 {
3222 if (bspp[i] == NULL)
3223 {
3224 total--;
3225 for (k = i; k < total; k++)
3226 bspp[k] = bspp[k+1];
3227 i--;
3228 }
3229 }
3230 */
3231
3232 smp->NonIndexedBioseqCnt = total;
3233
3234 if (! sort_now) /* sort at the end */
3235 {
3236 if( old_BioseqIndexCnt > 1000 /**** sorted part of the array is large ***/
3237 && (old_BioseqIndexCnt*1.1 > smp->BioseqIndexCnt ) ){ /*** unsorted part of the array is < 10% ***/
3238 SeqIdIndexElementPtr PNTR bsindex_buf;
3239 SeqIdIndexElementPtr stack_buf[1024];
3240 int i_o, i_n, i_w;
3241 int unsorted_size= smp->BioseqIndexCnt - old_BioseqIndexCnt;
3242
3243 #if 1
3244 /**** sort unsorted part ****/
3245 StableMergeSort((VoidPtr) (smp->BioseqIndex+old_BioseqIndexCnt), (size_t) unsorted_size,
3246 sizeof(SeqIdIndexElementPtr), SeqIdIndexElementCmp);
3247 /**** move new part to an array ****/
3248 if(unsorted_size > 1024){
3249 bsindex_buf=Nlm_Malloc(sizeof(*bsindex_buf)*unsorted_size);
3250 } else {
3251 bsindex_buf=stack_buf;
3252 }
3253 MemMove((VoidPtr)bsindex_buf,(VoidPtr)(smp->BioseqIndex+old_BioseqIndexCnt),
3254 sizeof(*bsindex_buf)*unsorted_size);
3255
3256 /**** merge both arrays from the end ****/
3257 i_n=unsorted_size-1; /**** new part index ****/
3258 i_o=old_BioseqIndexCnt-1; /**** old part index ***/
3259 i_w=smp->BioseqIndexCnt-1;/**** whole array index ***/
3260 i=0;
3261 while(i_n >= 0) {
3262 if( i_o < 0
3263 || SeqIdIndexElementCmp((VoidPtr)(bsindex_buf+i_n),
3264 (VoidPtr)(smp->BioseqIndex+i_o)) >= 0){
3265 /**** move new element ***/
3266 smp->BioseqIndex[i_w]=bsindex_buf[i_n];
3267 i_w--;i_n--;
3268 } else {
3269 /**** move old element ***/
3270 smp->BioseqIndex[i_w]=smp->BioseqIndex[i_o];
3271 i_w--;i_o--;
3272 }
3273 i++;
3274 }
3275 /*** cleanup *****/
3276 if(unsorted_size > 1024){
3277 MemFree(bsindex_buf);
3278 }
3279 #else
3280 StableMergeSort((VoidPtr) (smp->BioseqIndex), (size_t)(smp->BioseqIndexCnt),
3281 sizeof(SeqIdIndexElementPtr), SeqIdIndexElementCmp);
3282 #endif
3283 } else { /** Heap Sort should be faster ***/
3284 StableMergeSort((VoidPtr) (smp->BioseqIndex), (size_t)(smp->BioseqIndexCnt),
3285 sizeof(SeqIdIndexElementPtr), SeqIdIndexElementCmp);
3286 }
3287 }
3288
3289 SeqMgrUnlock();
3290
3291 return TRUE;
3292 }
3293
3294
3295
3296 /*****************************************************************************
3297 *
3298 * SeqMgrAddToBioseqIndex(bsp)
3299 * Indexes a BioseqPtr by SeqId(s)
3300 *
3301 *****************************************************************************/
SeqMgrAddToBioseqIndex(BioseqPtr bsp)3302 NLM_EXTERN Boolean LIBCALL SeqMgrAddToBioseqIndex (BioseqPtr bsp)
3303 {
3304 SeqMgrPtr smp;
3305 BioseqPtr PNTR bspp;
3306
3307 if (bsp == NULL)
3308 return FALSE;
3309
3310 smp = SeqMgrWriteLock();
3311
3312 /* if this bsp was the last one added, no need to add it again */
3313 if (smp->NonIndexedBioseqCnt > 0 && smp->NonIndexedBioseq [smp->NonIndexedBioseqCnt - 1] == bsp) {
3314 SeqMgrUnlock();
3315 return TRUE;
3316 }
3317
3318 /* increase array as needed */
3319 if (smp->NonIndexedBioseqCnt >= smp->NonIndexedBioseqNum)
3320 {
3321 bspp = smp->NonIndexedBioseq;
3322 smp->NonIndexedBioseq = MemNew((smp->NonIndexedBioseqNum + 10) * sizeof (BioseqPtr));
3323 if (smp->NonIndexedBioseq == NULL) {
3324 Message (MSG_POSTERR, "Unable to allocate memory for bioseq index");
3325 smp->NonIndexedBioseq = bspp;
3326 return FALSE;
3327 }
3328 MemCopy(smp->NonIndexedBioseq, bspp, (smp->NonIndexedBioseqNum * sizeof(BioseqPtr)));
3329 MemFree(bspp);
3330 smp->NonIndexedBioseqNum += 10;
3331 }
3332
3333 smp->NonIndexedBioseq[smp->NonIndexedBioseqCnt] = bsp;
3334 smp->NonIndexedBioseqCnt++;
3335
3336 SeqMgrUnlock();
3337
3338 SeqMgrProcessNonIndexedBioseq(FALSE);
3339
3340 return TRUE;
3341 }
3342
3343
3344 /*****************************************************************************
3345 *
3346 * SeqMgrDeleteDeleteFromBioseqIndex(bsp)
3347 * Removes index on BioseqPtr SeqIds
3348 *
3349 *****************************************************************************/
SeqMgrDeleteFromBioseqIndex(BioseqPtr bsp)3350 NLM_EXTERN Boolean LIBCALL SeqMgrDeleteFromBioseqIndex (BioseqPtr bsp)
3351 {
3352 SeqMgrPtr smp;
3353 SeqIdIndexElementPtr PNTR sipp, sip;
3354 Int4 i, j, num;
3355 BioseqPtr PNTR bspp;
3356 ObjMgrDataPtr omdp;
3357 ObjMgrPtr omp;
3358
3359 smp = SeqMgrWriteLock();
3360
3361 /* bail if in bulk deletion of large record */
3362 if (bsp != NULL) {
3363 omdp = SeqMgrGetOmdpForBioseq (bsp);
3364 if (omdp != NULL && omdp->bulkIndexFree) {
3365 SeqMgrUnlock();
3366 return FALSE;
3367 }
3368 }
3369 /* check if not indexed yet */
3370 if (smp->NonIndexedBioseqCnt > 0)
3371 {
3372 num = smp->NonIndexedBioseqCnt;
3373 bspp = smp->NonIndexedBioseq;
3374 for (i = 0; i < num; i++)
3375 {
3376 if (bspp[i] == bsp)
3377 {
3378 num--;
3379 for (j = i; j < num; j++)
3380 bspp[j] = bspp[j+1];
3381 i--;
3382 }
3383 }
3384 smp->NonIndexedBioseqCnt = num;
3385 }
3386
3387 num = smp->BioseqIndexCnt;
3388 sipp = smp->BioseqIndex;
3389
3390 /* omp = ObjMgrReadLock(); */
3391
3392 omp = ObjMgrGet();
3393 omdp = ObjMgrFindByData(omp, (Pointer)bsp);
3394
3395 /* ObjMgrUnlock(); */
3396
3397 for (i = 0; i < BIOSEQ_CACHE_NUM; i++) /* remove from BioseqFind cache */
3398 {
3399 if (omdp_cache[i] == omdp)
3400 {
3401 omdp_cache[i] = NULL;
3402 se_cache[i] = NULL;
3403 }
3404 }
3405
3406 for (i = 0; i < num; i++)
3407 {
3408 if (sipp[i]->omdp == omdp)
3409 {
3410 sipp[i]->omdp = NULL;
3411 sipp[i]->str = MemFree(sipp[i]->str);
3412 sip = sipp[i];
3413 for (j = i; j < (num-1); j++)
3414 sipp[j] = sipp[j+1];
3415 sipp[j] = sip;
3416 num--; i--;
3417 }
3418 }
3419
3420 smp->BioseqIndexCnt = num;
3421
3422 SeqMgrUnlock();
3423
3424 return TRUE;
3425 }
3426
3427 /*****************************************************************************
3428 *
3429 * SeqMgrDeleteIndexesInRecord (sep)
3430 * Bulk removal of SeqId index on entire entity prior to its deletion
3431 *
3432 *****************************************************************************/
MarkSeqForBulkDeletion(BioseqPtr bsp,Pointer userdata)3433 static void MarkSeqForBulkDeletion (
3434 BioseqPtr bsp,
3435 Pointer userdata
3436 )
3437
3438 {
3439 ObjMgrDataPtr omdp;
3440
3441 if (bsp == NULL) return;
3442 omdp = SeqMgrGetOmdpForBioseq (bsp);
3443 if (omdp == NULL || omdp->being_freed) return;
3444 omdp->bulkIndexFree = TRUE;
3445 }
3446
MarkSetForBulkDeletion(BioseqSetPtr bssp,Pointer userdata)3447 static void MarkSetForBulkDeletion (
3448 BioseqSetPtr bssp,
3449 Pointer userdata
3450 )
3451
3452 {
3453 ObjMgrDataPtr omdp;
3454 ObjMgrPtr omp;
3455
3456 if (bssp == NULL) return;
3457 omp = ObjMgrWriteLock ();
3458 omdp = ObjMgrFindByData (omp, bssp);
3459 if (omdp != NULL && !omdp->being_freed) {
3460 omdp->bulkIndexFree = TRUE;
3461 }
3462 ObjMgrUnlock ();
3463 }
3464
SeqMgrDeleteIndexesInRecord(SeqEntryPtr sep)3465 NLM_EXTERN Boolean LIBCALL SeqMgrDeleteIndexesInRecord (SeqEntryPtr sep)
3466
3467 {
3468 BioseqPtr bsp;
3469 BioseqPtr PNTR bspp;
3470 Int4 i, j, k, num;
3471 ObjMgrDataPtr omdp;
3472 SeqIdIndexElementPtr PNTR sipp;
3473 SeqMgrPtr smp;
3474 SeqIdIndexElementPtr PNTR tmp;
3475
3476 if (sep == NULL) return FALSE;
3477
3478 smp = SeqMgrWriteLock ();
3479
3480 VisitBioseqsInSep (sep, NULL, MarkSeqForBulkDeletion);
3481 VisitSetsInSep (sep, NULL, MarkSetForBulkDeletion);
3482
3483 /* check if not indexed yet */
3484
3485 if (smp->NonIndexedBioseqCnt > 0) {
3486
3487 num = smp->NonIndexedBioseqCnt;
3488 bspp = smp->NonIndexedBioseq;
3489
3490 for (i = 0; i < num; i++) {
3491 bsp = bspp [i];
3492 if (bsp != NULL) {
3493 omdp = SeqMgrGetOmdpForBioseq (bsp);
3494 if (omdp != NULL && omdp->bulkIndexFree) {
3495 num--;
3496 for (j = i; j < num; j++) {
3497 bspp [j] = bspp [j + 1];
3498 }
3499 i--;
3500 }
3501 }
3502 }
3503
3504 smp->NonIndexedBioseqCnt = num;
3505 }
3506
3507 /* remove from BioseqFind cache */
3508
3509 for (i = 0; i < BIOSEQ_CACHE_NUM; i++) {
3510 omdp = omdp_cache [i];
3511 if (omdp != NULL && omdp->bulkIndexFree) {
3512 omdp_cache [i] = NULL;
3513 se_cache [i] = NULL;
3514 }
3515 }
3516
3517 /* bulk free of indexes from sipp list */
3518
3519 sipp = smp->BioseqIndex;
3520 if (sipp == NULL) {
3521 SeqMgrUnlock ();
3522 return FALSE;
3523 }
3524
3525 num = smp->BioseqIndexCnt;
3526 tmp = (SeqIdIndexElementPtr PNTR) MemNew (sizeof (SeqIdIndexElementPtr) * (size_t) (num + 1));
3527 if (tmp != NULL) {
3528
3529 /* null out dying indexes, compress list, move empties to end */
3530
3531 for (i = 0, j = 0, k = 0; i < num; i++) {
3532 omdp = sipp [i]->omdp;
3533 if (omdp != NULL && omdp->bulkIndexFree) {
3534 sipp [i]->omdp = NULL;
3535 sipp [i]->str = MemFree (sipp [i]->str);
3536 tmp [k] = sipp [i];
3537 k++;
3538 } else {
3539 sipp [j] = sipp [i];
3540 j++;
3541 }
3542 }
3543 /* update count of remaining indexes */
3544
3545 smp->BioseqIndexCnt = j;
3546 MemMove (sipp + j, tmp, sizeof (SeqIdIndexElementPtr) * (size_t) k);
3547 }
3548 MemFree (tmp);
3549
3550 SeqMgrUnlock ();
3551
3552 return TRUE;
3553 }
3554
3555 /*****************************************************************************
3556 *
3557 * SeqMgrClearBioseqIndex()
3558 * Clears entire SeqId index for all entities
3559 *
3560 *****************************************************************************/
SeqMgrClearBioseqIndex(void)3561 NLM_EXTERN void SeqMgrClearBioseqIndex (void)
3562
3563 {
3564 BioseqPtr PNTR bspp;
3565 Int4 i, num;
3566 SeqIdIndexBlockPtr sibp, next;
3567 SeqIdIndexElementPtr sip;
3568 SeqIdIndexElementPtr PNTR sipp;
3569 SeqMgrPtr smp;
3570
3571 smp = SeqMgrWriteLock ();
3572
3573 num = smp->NonIndexedBioseqCnt;
3574 bspp = smp->NonIndexedBioseq;
3575 if (bspp != NULL) {
3576 for (i = 0; i < num; i++) {
3577 bspp [i] = NULL;
3578 }
3579 }
3580 smp->NonIndexedBioseqCnt = 0;
3581 smp->NonIndexedBioseqNum = 0;
3582 smp->NonIndexedBioseq = MemFree (smp->NonIndexedBioseq);
3583
3584 num = smp->BioseqIndexCnt;
3585 sipp = smp->BioseqIndex;
3586 if (sipp != NULL) {
3587 for (i = 0; i < num; i++) {
3588 sip = sipp [i];
3589 if (sip != NULL) {
3590 sip->omdp = NULL;
3591 sip->str = MemFree (sip->str);
3592 }
3593 sipp [i] = NULL;
3594 }
3595 }
3596 smp->BioseqIndexCnt = 0;
3597 smp->BioseqIndexNum = 0;
3598 for (sibp = smp->BioseqIndexData; sibp != NULL; sibp = next) {
3599 next = sibp->next;
3600 MemFree (sibp);
3601 }
3602 smp->BioseqIndexData = NULL;
3603
3604 SeqMgrUnlock ();
3605 }
3606
3607 /*****************************************************************************
3608 *
3609 * SeqMgrReplaceInBioseqIndex(bsp)
3610 * Replaces index on BioseqPtr SeqIds
3611 *
3612 *****************************************************************************/
SeqMgrReplaceInBioseqIndex(BioseqPtr bsp)3613 NLM_EXTERN Boolean LIBCALL SeqMgrReplaceInBioseqIndex (BioseqPtr bsp)
3614 {
3615 SeqMgrDeleteFromBioseqIndex(bsp);
3616 return SeqMgrAddToBioseqIndex(bsp);
3617 }
3618
3619 /*****************************************************************************
3620 *
3621 * GetUniGeneIDForSeqId(SeqIdPtr)
3622 * returns the UniGene ID for a SeqId
3623 * returns 0 if can't find it, or not a legal unigene id
3624 * This only applies to genomes division of entrez
3625 *
3626 *****************************************************************************/
3627
3628 /*****************************************************************
3629 *
3630 * IT IS a KLUDGE!! Add 1,000,000 to the unigene id
3631 *
3632 *****************************************************************/
3633 #define KLUDGE_UNIGENE 1000000 /*the kludge offset val add to unigene sequence*/
3634 #define KLUDGE_FlyBase 2000000 /*the kludge offset for FlyBase*/
3635 #define KLUDGE_JACKSON 3000000 /*the kludge offset for the Mouse data*/
3636 #define KLUDGE_JRGP 4000000 /*the kludge offset for the rice data*/
3637 #define KLUDGE_CESC 5000000 /*the kludge offset for the C. elegans data*/
3638 #define KLUDGE_BSNR 6000000 /*the kludge offset for the B. subtilis data*/
3639 #define KLUDGE_HUMGEN 7000000 /*the kludge offset for the Human genomic data*/
3640 #define KLUDGE_YGG 8000000 /*the kludge offset for the yeast data*/
3641 #define KLUDGE_NCBICG 9000000 /*the kludge offset for small genomes*/
3642 #define KLUDGE_MAIZE 10000000 /*the kludge offset for corn*/
3643
GetUniGeneIDForSeqId(SeqIdPtr sip)3644 NLM_EXTERN Int4 LIBCALL GetUniGeneIDForSeqId (SeqIdPtr sip)
3645 {
3646 DbtagPtr db_tag;
3647 ObjectIdPtr oip;
3648
3649 if (sip == NULL)
3650 return 0;
3651
3652
3653 if(sip->choice != SEQID_GENERAL)
3654 return 0;
3655
3656 db_tag = sip->data.ptrvalue;
3657 if(db_tag == NULL || db_tag->db == NULL)
3658 return 0;
3659
3660 oip = db_tag->tag;
3661 if(oip == NULL || oip->id == 0)
3662 return 0;
3663
3664 if(StringCmp(db_tag->db, "UNIGENE") == 0)
3665 return (KLUDGE_UNIGENE+ oip->id);
3666 if(StringCmp(db_tag->db, "UniGene") == 0)
3667 return (KLUDGE_UNIGENE+ oip->id);
3668 if(StringCmp(db_tag->db, "FlyBase") == 0)
3669 return (KLUDGE_FlyBase+ oip->id);
3670 if(StringCmp(db_tag->db, "JACKSON") == 0)
3671 return (KLUDGE_JACKSON+ oip->id);
3672 if(StringCmp(db_tag->db, "JRGP") == 0)
3673 return (KLUDGE_JRGP + oip->id);
3674 if(StringCmp(db_tag->db, "CESC") == 0)
3675 return (KLUDGE_CESC + oip->id);
3676 if(StringCmp(db_tag->db, "BSNR") == 0)
3677 return (KLUDGE_BSNR + oip->id);
3678 if(StringCmp(db_tag->db, "HUMGEN") == 0)
3679 return (KLUDGE_HUMGEN + oip->id);
3680 if(StringCmp(db_tag->db, "YGG") == 0)
3681 return (KLUDGE_YGG + oip->id);
3682 if(StringCmp(db_tag->db, "NCBICG") == 0)
3683 return (KLUDGE_NCBICG + oip->id);
3684 if(StringCmp(db_tag->db, "MAIZE") == 0)
3685 return (KLUDGE_MAIZE + oip->id);
3686 return 0;
3687
3688 }
3689
3690
3691 /*****************************************************************************
3692 *
3693 * BioseqExtra extensions to preindex for rapid retrieval
3694 *
3695 *****************************************************************************/
3696
3697 /*
3698 * remaining to be done are mapping tables for rapid coordinate conversion
3699 * between genome record and parts, genomic DNA and mRNA, and mRNA and protein
3700 */
3701
SeqMgrGetOmdpForPointer(Pointer ptr)3702 static ObjMgrDataPtr SeqMgrGetOmdpForPointer (Pointer ptr)
3703
3704 {
3705 ObjMgrDataPtr omdp;
3706 ObjMgrPtr omp;
3707
3708 if (ptr == NULL) return NULL;
3709 omp = ObjMgrWriteLock ();
3710 omdp = ObjMgrFindByData (omp, ptr);
3711 ObjMgrUnlock ();
3712 return omdp;
3713 }
3714
SeqMgrGetOmdpForBioseq(BioseqPtr bsp)3715 NLM_EXTERN ObjMgrDataPtr SeqMgrGetOmdpForBioseq (BioseqPtr bsp)
3716
3717 {
3718 ObjMgrDataPtr omdp = NULL;
3719 ObjMgrPtr omp;
3720
3721 if (bsp == NULL) return NULL;
3722 omp = ObjMgrWriteLock ();
3723 omdp = (ObjMgrDataPtr) bsp->omdp;
3724 if (omdp == NULL) {
3725 omdp = ObjMgrFindByData (omp, bsp);
3726 bsp->omdp = (Pointer) omdp;
3727 }
3728 ObjMgrUnlock ();
3729 return omdp;
3730 }
3731
SeqMgrGetExtraDataForOmdp(ObjMgrDataPtr omdp)3732 NLM_EXTERN Pointer SeqMgrGetExtraDataForOmdp (ObjMgrDataPtr omdp)
3733
3734 {
3735 Pointer extradata;
3736 ObjMgrPtr omp;
3737
3738 if (omdp == NULL) return NULL;
3739 omp = ObjMgrWriteLock ();
3740 extradata = (Pointer) omdp->extradata;
3741 ObjMgrUnlock ();
3742 return extradata;
3743 }
3744
SeqMgrGetTopSeqEntryForEntity(Uint2 entityID)3745 static SeqEntryPtr SeqMgrGetTopSeqEntryForEntity (Uint2 entityID)
3746
3747 {
3748 ObjMgrDataPtr omdp;
3749 SeqSubmitPtr ssp;
3750
3751 omdp = ObjMgrGetData (entityID);
3752 if (omdp == NULL) return FALSE;
3753 switch (omdp->datatype) {
3754 case OBJ_SEQSUB :
3755 ssp = (SeqSubmitPtr) omdp->dataptr;
3756 if (ssp != NULL && ssp->datatype == 1) {
3757 return (SeqEntryPtr) ssp->data;
3758 }
3759 break;
3760 case OBJ_BIOSEQ :
3761 case OBJ_BIOSEQSET :
3762 return (SeqEntryPtr) omdp->choice;
3763 default :
3764 break;
3765 }
3766 return NULL;
3767 }
3768
3769
SeqMgrClearBioseqExtraData(ObjMgrDataPtr omdp)3770 static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp)
3771
3772 {
3773 BioseqExtraPtr bspextra;
3774 SMFeatBlockPtr currf;
3775 SMSeqIdxPtr currp;
3776 Int2 i;
3777 SMFeatItemPtr itemf;
3778 Int4 j;
3779 SMFeatBlockPtr nextf;
3780 SMSeqIdxPtr nextp;
3781 SMFidItemPtr sfip;
3782
3783 if (omdp == NULL) return FALSE;
3784 bspextra = (BioseqExtraPtr) omdp->extradata;
3785 if (bspextra == NULL) return FALSE;
3786
3787 /* free sorted arrays of pointers into data blocks */
3788
3789 bspextra->descrsByID = MemFree (bspextra->descrsByID);
3790 bspextra->descrsBySdp = MemFree (bspextra->descrsBySdp);
3791 bspextra->descrsByIndex = MemFree (bspextra->descrsByIndex);
3792
3793 bspextra->annotDescByID = MemFree (bspextra->annotDescByID);
3794
3795 bspextra->alignsByID = MemFree (bspextra->alignsByID);
3796
3797 bspextra->featsByID = MemFree (bspextra->featsByID);
3798 bspextra->featsBySfp = MemFree (bspextra->featsBySfp);
3799 bspextra->featsByPos = MemFree (bspextra->featsByPos);
3800 bspextra->featsByRev = MemFree (bspextra->featsByRev);
3801 bspextra->featsByLabel = MemFree (bspextra->featsByLabel);
3802
3803 bspextra->genesByPos = MemFree (bspextra->genesByPos);
3804 bspextra->mRNAsByPos = MemFree (bspextra->mRNAsByPos);
3805 bspextra->CDSsByPos = MemFree (bspextra->CDSsByPos);
3806 bspextra->pubsByPos = MemFree (bspextra->pubsByPos);
3807 bspextra->orgsByPos = MemFree (bspextra->orgsByPos);
3808 bspextra->operonsByPos = MemFree (bspextra->operonsByPos);
3809
3810 bspextra->genesByLocusTag = MemFree (bspextra->genesByLocusTag);
3811
3812 /* free list of descriptor information */
3813
3814 bspextra->desclisthead = ValNodeFreeData (bspextra->desclisthead);
3815
3816 /* free arrays to speed mapping from parts to segmented bioseq */
3817
3818 bspextra->partsByLoc = MemFree (bspextra->partsByLoc);
3819 bspextra->partsBySeqId = MemFree (bspextra->partsBySeqId);
3820
3821 /* free data blocks of feature information */
3822
3823 currf = bspextra->featlisthead;
3824 while (currf != NULL) {
3825 nextf = currf->next;
3826
3827 if (currf->data != NULL) {
3828
3829 /* free allocated label strings within block items */
3830
3831 for (i = 0; i < currf->index; i++) {
3832 itemf = &(currf->data [i]);
3833 MemFree (itemf->label);
3834 MemFree (itemf->ivals);
3835 }
3836
3837 /* free array of SMFeatItems */
3838
3839 MemFree (currf->data);
3840 }
3841
3842 MemFree (currf);
3843 currf = nextf;
3844 }
3845
3846 /* free data blocks of parts to segment mapping information */
3847
3848 currp = bspextra->segparthead;
3849 while (currp != NULL) {
3850 nextp = currp->next;
3851 SeqLocFree (currp->slp);
3852 MemFree (currp->seqIdOfPart);
3853 MemFree (currp);
3854 currp = nextp;
3855 }
3856
3857 /* free list of seqfeatptrs whose product points to the bioseq */
3858
3859 bspextra->prodlisthead = ValNodeFree (bspextra->prodlisthead);
3860
3861 if (bspextra->featsByFeatID != NULL) {
3862 for (j = 0; j < bspextra->numfids; j++) {
3863 sfip = bspextra->featsByFeatID [j];
3864 if (sfip == NULL) continue;
3865 MemFree (sfip->fid);
3866 MemFree (sfip);
3867 }
3868 bspextra->featsByFeatID = MemFree (bspextra->featsByFeatID);
3869 }
3870
3871 /* clean interval list once implemented */
3872
3873 bspextra->featlisthead = NULL;
3874 bspextra->featlisttail = NULL;
3875 bspextra->segparthead = NULL;
3876
3877 bspextra->numaligns = 0;
3878 bspextra->numfeats = 0;
3879 bspextra->numgenes = 0;
3880 bspextra->nummRNAs = 0;
3881 bspextra->numCDSs = 0;
3882 bspextra->numpubs = 0;
3883 bspextra->numorgs = 0;
3884 bspextra->numoperons = 0;
3885 bspextra->numfids = 0;
3886 bspextra->numsegs = 0;
3887
3888 bspextra->min = INT4_MAX;
3889 bspextra->processed = UINT1_MAX;
3890 bspextra->blocksize = 50;
3891
3892 bspextra->protFeat = NULL;
3893 bspextra->cdsOrRnaFeat = NULL;
3894
3895 /* free genome - parts mapping arrays when they are added */
3896
3897 return TRUE;
3898 }
3899
DoSeqMgrFreeBioseqExtraData(ObjMgrDataPtr omdp)3900 static Boolean DoSeqMgrFreeBioseqExtraData (ObjMgrDataPtr omdp)
3901
3902 {
3903 if (omdp == NULL) return FALSE;
3904 if (omdp->datatype != OBJ_BIOSEQ && omdp->datatype != OBJ_BIOSEQSET) return FALSE;
3905 if (omdp->extradata != NULL) {
3906 SeqMgrClearBioseqExtraData (omdp);
3907 omdp->extradata = MemFree (omdp->extradata);
3908 omdp->reapextra = NULL;
3909 omdp->reloadextra = NULL;
3910 omdp->freeextra = NULL;
3911 }
3912 return TRUE;
3913 }
3914
3915 /* object manager callbacks to reap, reload, and free extra bioseq data */
3916
SeqMgrReapBioseqExtraFunc(Pointer data)3917 NLM_EXTERN Pointer LIBCALLBACK SeqMgrReapBioseqExtraFunc (Pointer data)
3918
3919 {
3920 BioseqExtraPtr bspextra;
3921 SMFeatBlockPtr curr;
3922 Int2 i;
3923 SMFeatItemPtr item;
3924 ObjMgrDataPtr omdp;
3925 SMDescItemPtr sdip;
3926 ValNodePtr vnp;
3927
3928 omdp = (ObjMgrDataPtr) data;
3929 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
3930 bspextra = (BioseqExtraPtr) omdp->extradata;
3931 if (bspextra == NULL) return NULL;
3932
3933 /* loop through data blocks of feature information */
3934
3935 curr = bspextra->featlisthead;
3936 while (curr != NULL) {
3937
3938 /* NULL out pointers to cached out feature and annot */
3939
3940 if (curr->data != NULL) {
3941 for (i = 0; i < curr->index; i++) {
3942 item = &(curr->data [i]);
3943 item->sfp = NULL;
3944 item->sap = NULL;
3945 }
3946 }
3947
3948 curr = curr->next;
3949 }
3950
3951 /* these were originally only recorded if omdp->tempload == TL_NOT_TEMP */
3952
3953 bspextra->protFeat = NULL;
3954 bspextra->cdsOrRnaFeat = NULL;
3955
3956 /* NULL out pointers to cached out descriptors */
3957
3958 for (vnp = bspextra->desclisthead; vnp != NULL; vnp = vnp->next) {
3959 sdip = (SMDescItemPtr) vnp->data.ptrvalue;
3960 if (sdip != NULL) {
3961 sdip->sdp = NULL;
3962 sdip->sep = NULL;
3963 }
3964 }
3965
3966 return NULL;
3967 }
3968
3969 /* !!! SeqMgrReloadBioseqExtraFunc is not yet implemented !!! */
3970
SeqMgrReloadBioseqExtraFunc(Pointer data)3971 NLM_EXTERN Pointer LIBCALLBACK SeqMgrReloadBioseqExtraFunc (Pointer data)
3972
3973 {
3974 return NULL;
3975 }
3976
SeqMgrFreeBioseqExtraFunc(Pointer data)3977 NLM_EXTERN Pointer LIBCALLBACK SeqMgrFreeBioseqExtraFunc (Pointer data)
3978
3979 {
3980 DoSeqMgrFreeBioseqExtraData ((ObjMgrDataPtr) data);
3981 return NULL;
3982 }
3983
3984 /*****************************************************************************
3985 *
3986 * SeqMgrClearFeatureIndexes clears every bioseq in an entity
3987 *
3988 *****************************************************************************/
3989
SeqMgrClearIndexesProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3990 static void SeqMgrClearIndexesProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3991
3992 {
3993 BioseqPtr bsp;
3994 BioseqSetPtr bssp;
3995 ObjMgrDataPtr omdp = NULL;
3996 BoolPtr rsult;
3997 SeqAlignPtr sal;
3998 SeqAnnotPtr sap = NULL;
3999
4000 if (sep == NULL || (! IS_Bioseq (sep))) return;
4001 if (IS_Bioseq (sep)) {
4002 bsp = (BioseqPtr) sep->data.ptrvalue;
4003 if (bsp == NULL) return;
4004 bsp->seqentry = NULL;
4005 omdp = SeqMgrGetOmdpForBioseq (bsp);
4006 sap = bsp->annot;
4007 } else if (IS_Bioseq_set (sep)) {
4008 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4009 if (bssp == NULL) return;
4010 bssp->seqentry = NULL;
4011 omdp = SeqMgrGetOmdpForPointer (bssp);
4012 sap = bssp->annot;
4013 } else return;
4014 while (sap != NULL) {
4015 if (sap->type == 2) {
4016 sal = (SeqAlignPtr) sap->data;
4017 while (sal != NULL) {
4018 /* ! clean up seq-align indexes ! */
4019 sal = sal->next;
4020 }
4021 }
4022 sap = sap->next;
4023 }
4024 if (omdp != NULL && DoSeqMgrFreeBioseqExtraData (omdp)) {
4025 rsult = (BoolPtr) mydata;
4026 *rsult = TRUE;
4027 }
4028 }
4029
SeqMgrClearFeatureIndexes(Uint2 entityID,Pointer ptr)4030 NLM_EXTERN Boolean LIBCALL SeqMgrClearFeatureIndexes (Uint2 entityID, Pointer ptr)
4031
4032 {
4033 ObjMgrDataPtr omdp;
4034 Boolean rsult = FALSE;
4035 SeqEntryPtr sep;
4036
4037 if (entityID == 0) {
4038 entityID = ObjMgrGetEntityIDForPointer (ptr);
4039 }
4040 if (entityID == 0) return FALSE;
4041 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4042 if (sep == NULL) return FALSE;
4043 SeqEntryExplore (sep, (Pointer) (&rsult), SeqMgrClearIndexesProc);
4044
4045 /* clear out object manager time of indexing flag and master feature itemID list */
4046
4047 omdp = ObjMgrGetData (entityID);
4048 if (omdp != NULL) {
4049 omdp->indexed = 0;
4050 SeqMgrClearBioseqExtraData (omdp);
4051 omdp->extradata = MemFree (omdp->extradata);
4052 omdp->reapextra = NULL;
4053 omdp->reloadextra = NULL;
4054 omdp->freeextra = NULL;
4055 }
4056 return rsult;
4057 }
4058
4059 /*****************************************************************************
4060 *
4061 * IsNonGappedLiteral(BioseqPtr bsp)
4062 * Returns TRUE if bsp is a delta seq is composed only of Seq-lits with
4063 * actual sequence data. These are now made to allow optimal compression
4064 * of otherwise raw sequences with runs of ambiguous bases.
4065 *
4066 *****************************************************************************/
4067
IsNonGappedLiteral(BioseqPtr bsp)4068 NLM_EXTERN Boolean IsNonGappedLiteral (BioseqPtr bsp)
4069
4070 {
4071 DeltaSeqPtr dsp;
4072 SeqLitPtr slitp;
4073
4074 if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
4075 if (bsp->seq_ext_type != 4 || bsp->seq_ext == NULL) return FALSE;
4076
4077 for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp->next) {
4078 if (dsp->choice != 2) return FALSE; /* not Seq-lit */
4079 slitp = (SeqLitPtr) dsp->data.ptrvalue;
4080 if (slitp == NULL) return FALSE;
4081 if (slitp->seq_data == NULL || slitp->length == 0 || slitp->seq_data_type == Seq_code_gap) return FALSE; /* gap */
4082 }
4083
4084 return TRUE;
4085 }
4086
4087 /*****************************************************************************
4088 *
4089 * FindAppropriateBioseq finds the segmented bioseq if location is join on parts
4090 *
4091 *****************************************************************************/
4092
FindAppropriateBioseq(SeqLocPtr loc,BioseqPtr tryfirst,BoolPtr is_small_genome_set)4093 static BioseqPtr FindAppropriateBioseq (SeqLocPtr loc, BioseqPtr tryfirst, BoolPtr is_small_genome_set)
4094
4095 {
4096 BioseqPtr bsp = NULL;
4097 BioseqExtraPtr bspextra;
4098 BioseqSetPtr bssp;
4099 ObjMgrDataPtr omdp;
4100 BioseqPtr part;
4101 SeqEntryPtr sep;
4102 SeqIdPtr sip;
4103 SeqLocPtr slp;
4104
4105 if (is_small_genome_set != NULL) {
4106 *is_small_genome_set = FALSE;
4107 }
4108 if (loc == NULL) return NULL;
4109 sip = SeqLocId (loc);
4110 if (sip != NULL) {
4111 if (tryfirst != NULL && SeqIdIn (sip, tryfirst->id)) {
4112 bsp = tryfirst;
4113 } else {
4114 bsp = BioseqFindCore (sip);
4115 }
4116
4117 /* first see if this is raw local part of segmented bioseq */
4118
4119 if (bsp != NULL && (bsp->repr == Seq_repr_raw || /* IsNonGappedLiteral (bsp) */ bsp->repr == Seq_repr_delta)) {
4120 omdp = SeqMgrGetOmdpForBioseq (bsp);
4121 if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
4122 bspextra = (BioseqExtraPtr) omdp->extradata;
4123 if (bspextra != NULL) {
4124 if (bspextra->parentBioseq != NULL) {
4125 bsp = bspextra->parentBioseq;
4126 }
4127 }
4128 }
4129 }
4130 return bsp;
4131 }
4132
4133 /* otherwise assume location is on multiple parts of a segmented set (deprecated) or is in a small genome set */
4134
4135 slp = SeqLocFindNext (loc, NULL);
4136 if (slp == NULL) return NULL;
4137 sip = SeqLocId (slp);
4138 if (sip == NULL) return NULL;
4139 part = BioseqFindCore (sip);
4140 if (part == NULL) return NULL;
4141 omdp = SeqMgrGetOmdpForBioseq (part);
4142 while (omdp != NULL) {
4143 if (omdp->datatype == OBJ_BIOSEQSET) {
4144 bssp = (BioseqSetPtr) omdp->dataptr;
4145 if (bssp != NULL) {
4146 if (bssp->_class == BioseqseqSet_class_segset) {
4147 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
4148 if (IS_Bioseq (sep)) {
4149 bsp = (BioseqPtr) sep->data.ptrvalue;
4150 if (bsp != NULL) {
4151 return bsp;
4152 }
4153 }
4154 }
4155 } else if (bssp->_class == BioseqseqSet_class_small_genome_set) {
4156 if (is_small_genome_set != NULL) {
4157 *is_small_genome_set = TRUE;
4158 }
4159 return part;
4160 }
4161 }
4162 }
4163 omdp = SeqMgrGetOmdpForPointer (omdp->parentptr);
4164 }
4165 return NULL;
4166 }
4167
4168 /*****************************************************************************
4169 *
4170 * FindFirstLocalBioseq is called as a last resort if FindAppropriateBioseq
4171 * fails, and it scans the feature location to find the first local bioseq
4172 * referenced by a feature interval
4173 *
4174 *****************************************************************************/
4175
FindFirstLocalBioseq(SeqLocPtr loc)4176 static BioseqPtr FindFirstLocalBioseq (SeqLocPtr loc)
4177
4178 {
4179 BioseqPtr bsp;
4180 SeqIdPtr sip;
4181 SeqLocPtr slp = NULL;
4182
4183 if (loc == NULL) return NULL;
4184
4185 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4186 sip = SeqLocId (slp);
4187 if (sip != NULL) {
4188 bsp = BioseqFindCore (sip);
4189 if (bsp != NULL) return bsp;
4190 }
4191 }
4192
4193 return NULL;
4194 }
4195
4196 /*****************************************************************************
4197 *
4198 * BioseqFindFromSeqLoc finds the segmented bioseq if location is join on parts,
4199 * and does so even if some of the intervals are far accessions.
4200 *
4201 *****************************************************************************/
4202
BioseqFindFromSeqLoc(SeqLocPtr loc)4203 NLM_EXTERN BioseqPtr BioseqFindFromSeqLoc (SeqLocPtr loc)
4204
4205 {
4206 BioseqPtr bsp = NULL;
4207
4208 if (loc == NULL) return NULL;
4209 bsp = FindAppropriateBioseq (loc, NULL, NULL);
4210 if (bsp == NULL) {
4211 bsp = FindFirstLocalBioseq (loc);
4212 }
4213 return bsp;
4214 }
4215
4216 /*****************************************************************************
4217 *
4218 * SeqMgrGetParentOfPart returns the segmented bioseq parent of a part bioseq,
4219 * and fills in the context structure.
4220 *
4221 *****************************************************************************/
4222
SeqMgrGetParentOfPart(BioseqPtr bsp,SeqMgrSegmentContext PNTR context)4223 NLM_EXTERN BioseqPtr LIBCALL SeqMgrGetParentOfPart (BioseqPtr bsp,
4224 SeqMgrSegmentContext PNTR context)
4225
4226 {
4227 BioseqExtraPtr bspextra;
4228 Char buf [128];
4229 Int2 compare;
4230 Uint2 entityID;
4231 Int4 i;
4232 Int4 numsegs;
4233 ObjMgrDataPtr omdp;
4234 BioseqPtr parent;
4235 SMSeqIdxPtr PNTR partsByLoc;
4236 SMSeqIdxPtr PNTR partsBySeqId;
4237 SMSeqIdxPtr segpartptr;
4238 SeqIdPtr sip;
4239 SeqLocPtr slp;
4240 Int4 L, R, mid;
4241
4242 if (context != NULL) {
4243 MemSet ((Pointer) context, 0, sizeof (SeqMgrSegmentContext));
4244 }
4245 if (bsp == NULL) return NULL;
4246 omdp = SeqMgrGetOmdpForBioseq (bsp);
4247 if (omdp == NULL) return NULL;
4248 if (omdp->datatype != OBJ_BIOSEQ) return NULL;
4249 bspextra = (BioseqExtraPtr) omdp->extradata;
4250 if (bspextra == NULL) return NULL;
4251 entityID = bsp->idx.entityID;
4252 if (entityID < 1) {
4253 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
4254 }
4255
4256 parent = bspextra->parentBioseq;
4257 if (parent == NULL) return NULL;
4258
4259 /* now need parts list from extra data on parent */
4260
4261 omdp = SeqMgrGetOmdpForBioseq (parent);
4262 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return parent;
4263 bspextra = (BioseqExtraPtr) omdp->extradata;
4264 if (bspextra == NULL) return parent;
4265
4266 partsBySeqId = bspextra->partsBySeqId;
4267 numsegs = bspextra->numsegs;
4268 if (partsBySeqId == NULL || numsegs < 1) return parent;
4269
4270 sip = bsp->id;
4271 if (sip == NULL) return parent;
4272
4273 /* binary search into array on segmented bioseq sorted by part seqID (reversed) string */
4274
4275 while (sip != NULL) {
4276 if (MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) {
4277 L = 0;
4278 R = numsegs - 1;
4279 while (L < R) {
4280 mid = (L + R) / 2;
4281 segpartptr = partsBySeqId [mid];
4282 compare = StringCmp (segpartptr->seqIdOfPart, buf);
4283 if (compare < 0) {
4284 L = mid + 1;
4285 } else {
4286 R = mid;
4287 }
4288 }
4289 segpartptr = partsBySeqId [R];
4290 if (segpartptr != NULL && StringCmp (segpartptr->seqIdOfPart, buf) == 0) {
4291 if (context != NULL) {
4292 slp = segpartptr->slp;
4293 context->entityID = entityID;
4294 context->itemID = segpartptr->itemID;
4295 context->slp = slp;
4296 context->parent = segpartptr->parentBioseq;
4297 context->cumOffset = segpartptr->cumOffset;
4298 context->from = segpartptr->from;
4299 context->to = segpartptr->to;
4300 context->strand = segpartptr->strand;
4301 context->userdata = NULL;
4302 context->omdp = (Pointer) omdp;
4303 context->index = 0;
4304
4305 /* now find entry in partsByLoc list to set proper index */
4306
4307 partsByLoc = bspextra->partsByLoc;
4308 if (partsByLoc != NULL) {
4309 i = 0;
4310 while (i < numsegs) {
4311 if (segpartptr == partsByLoc [i]) {
4312 context->index = i + 1;
4313 }
4314 i++;
4315 }
4316 }
4317 }
4318 return parent;
4319 }
4320 }
4321 sip = sip->next;
4322 }
4323
4324 return parent;
4325 }
4326
4327 /*****************************************************************************
4328 *
4329 * SeqMgrGetBioseqContext fills in the context structure for any bioseq.
4330 *
4331 *****************************************************************************/
4332
SeqMgrGetBioseqContext(BioseqPtr bsp,SeqMgrBioseqContext PNTR context)4333 NLM_EXTERN Boolean LIBCALL SeqMgrGetBioseqContext (BioseqPtr bsp,
4334 SeqMgrBioseqContext PNTR context)
4335
4336 {
4337 BioseqExtraPtr bspextra;
4338 Uint2 entityID;
4339 ObjMgrDataPtr omdp;
4340 SeqEntryPtr sep;
4341
4342 if (context != NULL) {
4343 MemSet ((Pointer) context, 0, sizeof (SeqMgrBioseqContext));
4344 }
4345 if (bsp == NULL || context == NULL) return FALSE;
4346
4347 entityID = bsp->idx.entityID;
4348 if (entityID < 1) {
4349 entityID = ObjMgrGetEntityIDForPointer (bsp);
4350 }
4351 if (entityID == 0) return FALSE;
4352
4353 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4354 if (sep == NULL) return FALSE;
4355
4356 context->entityID = entityID;
4357 context->index = 0;
4358 context->userdata = NULL;
4359
4360 omdp = SeqMgrGetOmdpForBioseq (bsp);
4361 if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
4362 bspextra = (BioseqExtraPtr) omdp->extradata;
4363 if (bspextra != NULL) {
4364 context->itemID = bspextra->bspItemID;
4365 context->bsp = bsp;
4366 context->sep = bsp->seqentry;
4367 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
4368 context->bssp = (BioseqSetPtr) bsp->idx.parentptr;
4369 }
4370 context->numsegs = bspextra->numsegs;
4371 context->omdp = omdp;
4372 context->index = bspextra->bspIndex;
4373 }
4374 }
4375
4376 return (Boolean) (context->index != 0);
4377 }
4378
4379 /*****************************************************************************
4380 *
4381 * GetOffsetInNearBioseq is called to get the intervals on near bioseqs
4382 *
4383 *****************************************************************************/
4384
GetOffsetInNearBioseq(SeqLocPtr loc,BioseqPtr in,Uint1 which_end)4385 static Int4 GetOffsetInNearBioseq (SeqLocPtr loc, BioseqPtr in, Uint1 which_end)
4386
4387 {
4388 BioseqPtr bsp;
4389 SeqLocPtr first = NULL, last = NULL, slp = NULL;
4390 SeqIdPtr sip;
4391 Uint1 strand;
4392 Int4 val;
4393
4394 if (loc == NULL) return -1;
4395
4396 /* first attempt should work if no far bioseqs */
4397
4398 val = GetOffsetInBioseq (loc, in, which_end);
4399 if (val != -1) return val;
4400
4401 /* now go through sublocs and find extremes that are not on far bioseqs */
4402
4403 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4404 sip = SeqLocId (slp);
4405 if (sip != NULL) {
4406 bsp = BioseqFind (sip);
4407 if (bsp != NULL) {
4408 last = slp;
4409 if (first == NULL) {
4410 first = slp;
4411 }
4412 }
4413 }
4414 }
4415 if (first == NULL) return -1;
4416 strand = SeqLocStrand (first);
4417
4418 switch (which_end) {
4419 case SEQLOC_LEFT_END:
4420 if (strand == Seq_strand_minus) {
4421 return GetOffsetInBioseq (last, in, which_end);
4422 } else {
4423 return GetOffsetInBioseq (first, in, which_end);
4424 }
4425 break;
4426 case SEQLOC_RIGHT_END:
4427 if (strand == Seq_strand_minus) {
4428 return GetOffsetInBioseq (first, in, which_end);
4429 } else {
4430 return GetOffsetInBioseq (last, in, which_end);
4431 }
4432 break;
4433 case SEQLOC_START:
4434 return GetOffsetInBioseq (first, in, which_end);
4435 break;
4436 case SEQLOC_STOP:
4437 return GetOffsetInBioseq (last, in, which_end);
4438 break;
4439 default :
4440 break;
4441 }
4442
4443 return -1;
4444 }
4445
4446
GetLeftAndRightOffsetsInNearBioseq(SeqLocPtr loc,BioseqPtr in,Int4Ptr left,Int4Ptr right,Boolean small_genome_set,Boolean bad_order,Boolean mixed_strand)4447 static void GetLeftAndRightOffsetsInNearBioseq (
4448 SeqLocPtr loc,
4449 BioseqPtr in,
4450 Int4Ptr left,
4451 Int4Ptr right,
4452 Boolean small_genome_set,
4453 Boolean bad_order,
4454 Boolean mixed_strand
4455 )
4456
4457 {
4458 BioseqPtr bsp;
4459 SeqLocPtr first = NULL, last = NULL, slp = NULL;
4460 SeqIdPtr sip;
4461 Uint1 strand;
4462 Int4 val_left = -1, val_right = -1;
4463 Boolean left_flip = FALSE, right_flip = FALSE;
4464
4465 if (left != NULL) {
4466 *left = -1;
4467 }
4468 if (right != NULL) {
4469 *right = -1;
4470 }
4471 if (loc == NULL) return;
4472
4473 /* first attempt should work if no far bioseqs */
4474 sip = SeqLocId (loc);
4475 if (in != NULL && SeqIdIn (sip, in->id)) {
4476 bsp = in;
4477 } else {
4478 bsp = BioseqFind (sip);
4479 }
4480 if (bsp != NULL) {
4481 GetLeftAndRightOffsetsInBioseq (loc, in, &val_left, &val_right, bsp->topology == TOPOLOGY_CIRCULAR, FALSE, &left_flip, &right_flip);
4482 if (val_left != -1 && val_right != -1) {
4483 if (left != NULL) {
4484 *left = val_left;
4485 }
4486 if (right != NULL) {
4487 *right = val_right;
4488 }
4489 return;
4490 }
4491 }
4492
4493 /* now go through sublocs and find extremes that are not on far bioseqs */
4494
4495 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4496 sip = SeqLocId (slp);
4497 if (sip != NULL) {
4498 bsp = BioseqFind (sip);
4499 if (bsp != NULL && ((! small_genome_set) || bsp == in)) {
4500 last = slp;
4501 if (first == NULL) {
4502 first = slp;
4503 }
4504 }
4505 }
4506 }
4507 if (first == NULL) return;
4508 strand = SeqLocStrand (first);
4509
4510 if (strand == Seq_strand_minus) {
4511 val_left = GetOffsetInBioseq (last, in, SEQLOC_LEFT_END);
4512 val_right = GetOffsetInBioseq (first, in, SEQLOC_RIGHT_END);
4513 } else {
4514 val_left = GetOffsetInBioseq (first, in, SEQLOC_LEFT_END);
4515 val_right = GetOffsetInBioseq (last, in, SEQLOC_RIGHT_END);
4516 }
4517
4518 if (left != NULL) {
4519 *left = val_left;
4520 }
4521 if (right != NULL) {
4522 *right = val_right;
4523 }
4524 }
4525
4526
4527 /*
4528 static Int4 GetOffsetInFirstLocalBioseq (SeqLocPtr loc, BioseqPtr in, Uint1 which_end)
4529
4530 {
4531 SeqLocPtr slp = NULL;
4532 Int4 val;
4533
4534 if (loc == NULL) return -1;
4535
4536 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4537 val = GetOffsetInBioseq (slp, in, which_end);
4538 if (val != -1) return val;
4539 }
4540
4541 return -1;
4542 }
4543 */
4544
4545 /*****************************************************************************
4546 *
4547 * SeqMgrFindSMFeatItemPtr and SeqMgrFindSMFeatItemByID return SMFeatItemPtr
4548 * to access internal fields
4549 * SeqMgrGetDesiredDescriptor and SeqMgrGetDesiredFeature take an itemID,
4550 * position index, or SeqDescPtr or SeqFeatPtr, return the SeqDescPtr or
4551 * SeqFeatPtr, and fill in the context structure
4552 *
4553 *****************************************************************************/
4554
SeqMgrFindSMFeatItemPtr(SeqFeatPtr sfp)4555 NLM_EXTERN SMFeatItemPtr LIBCALL SeqMgrFindSMFeatItemPtr (SeqFeatPtr sfp)
4556
4557 {
4558 SMFeatItemPtr PNTR array;
4559 BioseqPtr bsp;
4560 BioseqExtraPtr bspextra;
4561 SMFeatBlockPtr curr;
4562 Int2 i;
4563 SMFeatItemPtr item;
4564 Int4 L;
4565 Int4 mid;
4566 ObjMgrDataPtr omdp;
4567 Int4 R;
4568
4569 if (sfp == NULL) return NULL;
4570 bsp = FindAppropriateBioseq (sfp->location, NULL, NULL);
4571 if (bsp == NULL) {
4572 bsp = FindFirstLocalBioseq (sfp->location);
4573 }
4574 omdp = SeqMgrGetOmdpForBioseq (bsp);
4575 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
4576 bspextra = (BioseqExtraPtr) omdp->extradata;
4577 if (bspextra == NULL) return NULL;
4578
4579 /* first try array sorted by SeqFeatPtr value */
4580
4581 array = bspextra->featsBySfp;
4582 if (array != NULL && bspextra->numfeats > 0) {
4583 L = 0;
4584 R = bspextra->numfeats - 1;
4585 while (L < R) {
4586 mid = (L + R) / 2;
4587 item = array [mid];
4588 if (item != NULL && item->sfp < sfp) {
4589 L = mid + 1;
4590 } else {
4591 R = mid;
4592 }
4593 }
4594
4595 item = array [R];
4596 if (item->sfp == sfp) return item;
4597 }
4598
4599 /* now look in feature indices for cached feature information */
4600
4601 curr = bspextra->featlisthead;
4602 while (curr != NULL) {
4603
4604 if (curr->data != NULL) {
4605 for (i = 0; i < curr->index; i++) {
4606 item = &(curr->data [i]);
4607 if (item->sfp == sfp && (! item->ignore)) return item;
4608 }
4609 }
4610
4611 curr = curr->next;
4612 }
4613
4614 return NULL;
4615 }
4616
SeqMgrFindSMFeatItemByID(Uint2 entityID,BioseqPtr bsp,Uint4 itemID)4617 NLM_EXTERN SMFeatItemPtr LIBCALL SeqMgrFindSMFeatItemByID (Uint2 entityID, BioseqPtr bsp, Uint4 itemID)
4618
4619 {
4620 SMFeatItemPtr PNTR array;
4621 BioseqExtraPtr bspextra;
4622 SMFeatBlockPtr curr;
4623 Int2 i;
4624 SMFeatItemPtr item;
4625 Int4 L;
4626 Int4 mid;
4627 ObjMgrDataPtr omdp;
4628 Int4 R;
4629
4630 if (entityID > 0) {
4631 omdp = ObjMgrGetData (entityID);
4632 if (omdp == NULL) return NULL;
4633 } else {
4634 if (bsp == NULL) return NULL;
4635 omdp = SeqMgrGetOmdpForBioseq (bsp);
4636 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
4637 }
4638 bspextra = (BioseqExtraPtr) omdp->extradata;
4639 if (bspextra == NULL) return NULL;
4640
4641 /* first try array sorted by itemID value */
4642
4643 array = bspextra->featsByID;
4644 if (array != NULL && bspextra->numfeats > 0) {
4645 L = 0;
4646 R = bspextra->numfeats - 1;
4647 while (L < R) {
4648 mid = (L + R) / 2;
4649 item = array [mid];
4650 if (item != NULL && item->itemID < itemID) {
4651 L = mid + 1;
4652 } else {
4653 R = mid;
4654 }
4655 }
4656
4657 item = array [R];
4658 if (item->itemID == itemID) return item;
4659 }
4660
4661 /* now look in feature indices for cached feature information */
4662
4663 curr = bspextra->featlisthead;
4664 while (curr != NULL) {
4665
4666 if (curr->data != NULL) {
4667 for (i = 0; i < curr->index; i++) {
4668 item = &(curr->data [i]);
4669 if (item->itemID == itemID && (! item->ignore)) return item;
4670 }
4671 }
4672
4673 curr = curr->next;
4674 }
4675
4676 return NULL;
4677 }
4678
ItemIDfromAnnotDesc(AnnotDescPtr adp)4679 static Int4 ItemIDfromAnnotDesc (AnnotDescPtr adp)
4680
4681 {
4682 ObjValNodePtr ovp;
4683
4684 if (adp == NULL || adp->extended == 0) return 0;
4685 ovp = (ObjValNodePtr) adp;
4686 return ovp->idx.itemID;
4687 }
4688
SeqMgrFindAnnotDescByID(Uint2 entityID,Uint4 itemID)4689 NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrFindAnnotDescByID (Uint2 entityID, Uint4 itemID)
4690
4691 {
4692 AnnotDescPtr PNTR array;
4693 BioseqExtraPtr bspextra;
4694 AnnotDescPtr item;
4695 Int4 L;
4696 Int4 mid;
4697 ObjMgrDataPtr omdp;
4698 Int4 R;
4699
4700 if (entityID < 1) return NULL;
4701 omdp = ObjMgrGetData (entityID);
4702 if (omdp == NULL) return NULL;
4703 bspextra = (BioseqExtraPtr) omdp->extradata;
4704 if (bspextra == NULL) return NULL;
4705
4706 array = bspextra->annotDescByID;
4707 if (array != NULL && bspextra->numannotdesc > 0) {
4708 L = 0;
4709 R = bspextra->numannotdesc - 1;
4710 while (L < R) {
4711 mid = (L + R) / 2;
4712 item = array [mid];
4713 if (item != NULL && ItemIDfromAnnotDesc (item) < itemID) {
4714 L = mid + 1;
4715 } else {
4716 R = mid;
4717 }
4718 }
4719
4720 item = array [R];
4721 if (ItemIDfromAnnotDesc (item) == itemID) return item;
4722 }
4723
4724 return NULL;
4725 }
4726
SeqMgrFindSeqAlignByID(Uint2 entityID,Uint4 itemID)4727 NLM_EXTERN SeqAlignPtr LIBCALL SeqMgrFindSeqAlignByID (Uint2 entityID, Uint4 itemID)
4728
4729 {
4730 BioseqExtraPtr bspextra;
4731 ObjMgrDataPtr omdp;
4732
4733 if (entityID < 1) return NULL;
4734 omdp = ObjMgrGetData (entityID);
4735 if (omdp == NULL) return NULL;
4736 bspextra = (BioseqExtraPtr) omdp->extradata;
4737 if (bspextra == NULL) return NULL;
4738
4739 if (bspextra->alignsByID != NULL && bspextra->numaligns > 0 &&
4740 itemID > 0 && itemID <= (Uint4) bspextra->numaligns) {
4741 return bspextra->alignsByID [itemID];
4742 }
4743
4744 return NULL;
4745 }
4746
SeqMgrGetDesiredFeature(Uint2 entityID,BioseqPtr bsp,Uint4 itemID,Uint4 index,SeqFeatPtr sfp,SeqMgrFeatContext PNTR context)4747 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetDesiredFeature (Uint2 entityID, BioseqPtr bsp,
4748 Uint4 itemID, Uint4 index, SeqFeatPtr sfp,
4749 SeqMgrFeatContext PNTR context)
4750
4751 {
4752 SMFeatItemPtr PNTR array;
4753 BioseqExtraPtr bspextra;
4754 SeqFeatPtr curr;
4755 SMFeatItemPtr item = NULL;
4756 ObjMgrDataPtr omdp;
4757
4758 if (context != NULL) {
4759 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
4760 }
4761 if (entityID > 0) {
4762 omdp = ObjMgrGetData (entityID);
4763 if (omdp == NULL) return NULL;
4764 } else {
4765 if (bsp == NULL) return NULL;
4766 omdp = SeqMgrGetOmdpForBioseq (bsp);
4767 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
4768 }
4769 bspextra = (BioseqExtraPtr) omdp->extradata;
4770 if (bspextra == NULL) return NULL;
4771
4772 if (itemID > 0) {
4773 item = SeqMgrFindSMFeatItemByID (entityID, bsp, itemID);
4774 } else if (index > 0) {
4775 array = bspextra->featsByPos;
4776 if (array != NULL && bspextra->numfeats > 0 && index <= (Uint4) bspextra->numfeats) {
4777 item = array [index - 1];
4778 }
4779 } else if (sfp != NULL) {
4780 item = SeqMgrFindSMFeatItemPtr (sfp);
4781 }
4782 if (item == NULL) return NULL;
4783
4784 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
4785
4786 curr = item->sfp;
4787 if (curr != NULL && context != NULL && (! item->ignore)) {
4788 context->entityID = entityID;
4789 context->itemID = item->itemID;
4790 context->sfp = curr;
4791 context->sap = item->sap;
4792 context->bsp = item->bsp;
4793 context->label = item->label;
4794 context->left = item->left;
4795 context->right = item->right;
4796 context->dnaStop = item->dnaStop;
4797 context->partialL = item->partialL;
4798 context->partialR = item->partialR;
4799 context->external = item->external;
4800 context->farloc = item->farloc;
4801 context->bad_order = item->bad_order;
4802 context->mixed_strand = item->mixed_strand;
4803 context->ts_image = item->ts_image;
4804 context->strand = item->strand;
4805 if (curr != NULL) {
4806 context->seqfeattype = curr->data.choice;
4807 } else {
4808 context->seqfeattype = FindFeatFromFeatDefType (item->subtype);
4809 }
4810 context->featdeftype = item->subtype;
4811 context->numivals = item->numivals;
4812 context->ivals = item->ivals;
4813 context->userdata = NULL;
4814 context->omdp = (Pointer) omdp;
4815 context->index = item->index + 1;
4816 }
4817 return curr;
4818 }
4819
4820 /*
4821 static ValNodePtr DesiredDescriptorPerBioseq (SeqEntryPtr sep, BioseqPtr bsp,
4822 Uint2 itemID, Uint2 index, ValNodePtr sdp,
4823 SeqMgrDescContext PNTR context)
4824
4825 {
4826 BioseqSetPtr bssp;
4827 ValNodePtr curr = NULL;
4828 SeqEntryPtr tmp;
4829
4830 if (sep != NULL) {
4831 if (IS_Bioseq (sep)) {
4832 bsp = (BioseqPtr) sep->data.ptrvalue;
4833 if (bsp == NULL) return NULL;
4834 } else if (IS_Bioseq_set (sep)) {
4835 bssp = (BioseqSetPtr) sep->data.ptrvalue;
4836 if (bssp == NULL) return NULL;
4837 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
4838 curr = DesiredDescriptorPerBioseq (tmp, NULL, itemID, index, sdp, context);
4839 if (curr != NULL) return curr;
4840 }
4841 return NULL;
4842 }
4843 }
4844
4845 if (bsp == NULL) return NULL;
4846
4847 while ((curr = SeqMgrGetNextDescriptor (bsp, curr, 0, context)) != NULL) {
4848 if (itemID > 0 && itemID == context->itemID) return curr;
4849 if (index > 0 && index == context->index) return curr;
4850 if (sdp != NULL && sdp == curr) return curr;
4851 }
4852
4853 return NULL;
4854 }
4855
4856 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetDesiredDescriptor (Uint2 entityID, BioseqPtr bsp,
4857 Uint2 itemID, Uint2 index, ValNodePtr sdp,
4858 SeqMgrDescContext PNTR context)
4859
4860 {
4861 SeqMgrDescContext dfaultcontext;
4862 SeqEntryPtr sep;
4863
4864 if (context == NULL) {
4865 context = &dfaultcontext;
4866 }
4867 if (context != NULL) {
4868 MemSet ((Pointer) context, 0, sizeof (SeqMgrDescContext));
4869 }
4870
4871 if (entityID > 0) {
4872 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4873 if (sep == NULL) return NULL;
4874 return DesiredDescriptorPerBioseq (sep, NULL, itemID, index, sdp, context);
4875 } else if (bsp != NULL) {
4876 return DesiredDescriptorPerBioseq (NULL, bsp, itemID, index, sdp, context);
4877 }
4878
4879 return NULL;
4880 }
4881 */
4882
SeqMgrFindSMDescItemByID(BioseqExtraPtr bspextra,Uint4 itemID)4883 static SMDescItemPtr SeqMgrFindSMDescItemByID (BioseqExtraPtr bspextra, Uint4 itemID)
4884
4885 {
4886 SMDescItemPtr PNTR array;
4887 SMDescItemPtr item;
4888 Int4 L;
4889 Int4 mid;
4890 Int4 R;
4891
4892 if (bspextra == NULL) return NULL;
4893
4894 array = bspextra->descrsByID;
4895 if (array != NULL && bspextra->numdescs > 0) {
4896 L = 0;
4897 R = bspextra->numdescs - 1;
4898 while (L < R) {
4899 mid = (L + R) / 2;
4900 item = array [mid];
4901 if (item != NULL && item->itemID < itemID) {
4902 L = mid + 1;
4903 } else {
4904 R = mid;
4905 }
4906 }
4907
4908 item = array [R];
4909 if (item->itemID == itemID) return item;
4910 }
4911
4912 return NULL;
4913 }
4914
SeqMgrFindSMDescItemBySdp(BioseqExtraPtr bspextra,SeqDescrPtr sdp)4915 static SMDescItemPtr SeqMgrFindSMDescItemBySdp (BioseqExtraPtr bspextra, SeqDescrPtr sdp)
4916
4917 {
4918 SMDescItemPtr PNTR array;
4919 SMDescItemPtr item;
4920 Int4 L;
4921 Int4 mid;
4922 Int4 R;
4923
4924 if (bspextra == NULL) return NULL;
4925
4926 array = bspextra->descrsBySdp;
4927 if (array != NULL && bspextra->numdescs > 0) {
4928 L = 0;
4929 R = bspextra->numdescs - 1;
4930 while (L < R) {
4931 mid = (L + R) / 2;
4932 item = array [mid];
4933 if (item != NULL && item->sdp < sdp) {
4934 L = mid + 1;
4935 } else {
4936 R = mid;
4937 }
4938 }
4939
4940 item = array [R];
4941 if (item->sdp == sdp) return item;
4942 }
4943
4944 return NULL;
4945 }
4946
SeqMgrFindSMDescItemByIndex(BioseqExtraPtr bspextra,Uint4 index)4947 static SMDescItemPtr SeqMgrFindSMDescItemByIndex (BioseqExtraPtr bspextra, Uint4 index)
4948
4949 {
4950 SMDescItemPtr PNTR array;
4951 SMDescItemPtr item;
4952 Int4 L;
4953 Int4 mid;
4954 Int4 R;
4955
4956 if (bspextra == NULL) return NULL;
4957
4958 array = bspextra->descrsByIndex;
4959 if (array != NULL && bspextra->numdescs > 0) {
4960 L = 0;
4961 R = bspextra->numdescs - 1;
4962 while (L < R) {
4963 mid = (L + R) / 2;
4964 item = array [mid];
4965 if (item != NULL && item->index < index) {
4966 L = mid + 1;
4967 } else {
4968 R = mid;
4969 }
4970 }
4971
4972 item = array [R];
4973 if (item->index == index) return item;
4974 }
4975
4976 return NULL;
4977 }
4978
SeqMgrGetDesiredDescriptor(Uint2 entityID,BioseqPtr bsp,Uint4 itemID,Uint4 index,ValNodePtr sdp,SeqMgrDescContext PNTR context)4979 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetDesiredDescriptor (Uint2 entityID, BioseqPtr bsp,
4980 Uint4 itemID, Uint4 index, ValNodePtr sdp,
4981 SeqMgrDescContext PNTR context)
4982
4983 {
4984 BioseqExtraPtr bspextra;
4985 SeqMgrDescContext dfaultcontext;
4986 ObjMgrDataPtr omdp = NULL;
4987 SMDescItemPtr sdip = NULL;
4988 SeqEntryPtr sep;
4989
4990 if (context == NULL) {
4991 context = &dfaultcontext;
4992 }
4993 if (context != NULL) {
4994 MemSet ((Pointer) context, 0, sizeof (SeqMgrDescContext));
4995 }
4996
4997 if (entityID > 0) {
4998 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4999 if (sep != NULL) {
5000 omdp = SeqMgrGetOmdpForPointer (sep->data.ptrvalue);
5001 }
5002 } else if (bsp != NULL) {
5003 omdp = SeqMgrGetOmdpForBioseq (bsp);
5004 entityID = bsp->idx.entityID;
5005 if (entityID < 1) {
5006 entityID = ObjMgrGetEntityIDForPointer (bsp);
5007 }
5008 }
5009
5010 if (omdp == NULL) return NULL;
5011 bspextra = (BioseqExtraPtr) omdp->extradata;
5012 if (bspextra == NULL) return NULL;
5013
5014 if (itemID > 0) {
5015 sdip = SeqMgrFindSMDescItemByID (bspextra, itemID);
5016 } else if (index > 0) {
5017 sdip = SeqMgrFindSMDescItemByIndex (bspextra, index);
5018 } else if (sdp != NULL) {
5019 sdip = SeqMgrFindSMDescItemBySdp (bspextra, sdp);
5020 }
5021
5022 if (sdip == NULL) return NULL;
5023
5024 context->entityID = entityID;
5025 context->itemID = sdip->itemID;
5026 context->sdp = sdip->sdp;
5027 context->sep = sdip->sep;
5028 context->level = sdip->level;
5029 context->seqdesctype = sdip->seqdesctype;
5030 context->userdata = NULL;
5031 context->omdp = omdp;
5032 context->index = sdip->index;
5033
5034 return sdip->sdp;
5035 }
5036
SeqMgrGetDesiredAnnotDesc(Uint2 entityID,BioseqPtr bsp,Uint4 itemID,SeqMgrAndContext PNTR context)5037 NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrGetDesiredAnnotDesc (
5038 Uint2 entityID,
5039 BioseqPtr bsp,
5040 Uint4 itemID,
5041 SeqMgrAndContext PNTR context
5042 )
5043
5044 {
5045 AnnotDescPtr adp = NULL;
5046 BioseqExtraPtr bspextra;
5047 SeqMgrAndContext dfaultcontext;
5048 ObjMgrDataPtr omdp = NULL;
5049 SeqEntryPtr sep;
5050
5051 if (context == NULL) {
5052 context = &dfaultcontext;
5053 }
5054 if (context != NULL) {
5055 MemSet ((Pointer) context, 0, sizeof (SeqMgrAndContext));
5056 }
5057
5058 if (entityID > 0) {
5059 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
5060 if (sep != NULL) {
5061 omdp = SeqMgrGetOmdpForPointer (sep->data.ptrvalue);
5062 }
5063 } else if (bsp != NULL) {
5064 omdp = SeqMgrGetOmdpForBioseq (bsp);
5065 entityID = bsp->idx.entityID;
5066 if (entityID < 1) {
5067 entityID = ObjMgrGetEntityIDForPointer (bsp);
5068 }
5069 }
5070
5071 if (omdp == NULL) return NULL;
5072 bspextra = (BioseqExtraPtr) omdp->extradata;
5073 if (bspextra == NULL) return NULL;
5074
5075 if (itemID > 0) {
5076 adp = SeqMgrFindAnnotDescByID (entityID, itemID);
5077 }
5078
5079 if (adp == NULL) return NULL;
5080
5081 context->entityID = entityID;
5082 context->itemID = itemID;
5083 context->adp = adp;
5084 context->annotdesctype = adp->choice;
5085 context->userdata = NULL;
5086 context->omdp = omdp;
5087 context->index = itemID;
5088
5089 return adp;
5090 }
5091
5092 /*****************************************************************************
5093 *
5094 * RecordFeaturesInBioseqs callback explores bioseqs, bioseq sets, and features,
5095 * keeping a running total of the descriptor item counts, and records specific
5096 * information about features on each bioseq
5097 *
5098 *****************************************************************************/
5099
5100 typedef struct extraindex {
5101 SeqEntryPtr topsep;
5102 BioseqPtr lastbsp;
5103 SeqAnnotPtr lastsap;
5104 BioseqSetPtr lastbssp;
5105 ValNodePtr alignhead;
5106 ValNodePtr lastalign;
5107 ValNodePtr adphead;
5108 ValNodePtr lastadp;
5109 SMSeqIdxPtr segpartail;
5110 Int4 cumulative;
5111 Uint4 bspcount;
5112 Uint4 aligncount;
5113 Uint4 descrcount;
5114 Uint4 featcount;
5115 Uint4 adpcount;
5116 Int4 seqlitid;
5117 Boolean flip;
5118 } ExtraIndex, PNTR ExtraIndexPtr;
5119
SetDescriptorCounts(ValNodePtr sdp,ExtraIndexPtr exindx,Pointer thisitem,Uint2 thistype)5120 static void SetDescriptorCounts (ValNodePtr sdp, ExtraIndexPtr exindx, Pointer thisitem, Uint2 thistype)
5121
5122 {
5123 Uint4 count = 0;
5124 ObjMgrDataPtr omdp;
5125
5126 /* count bioseq or bioseq set descriptors, to calculate omdp.lastDescrItemID */
5127
5128 if (sdp == NULL || exindx == NULL) return;
5129 if (thistype == OBJ_BIOSEQ) {
5130 omdp = SeqMgrGetOmdpForBioseq ((BioseqPtr) thisitem);
5131 } else {
5132 omdp = SeqMgrGetOmdpForPointer (thisitem);
5133 }
5134 if (omdp == NULL) return;
5135
5136 omdp->lastDescrItemID = exindx->descrcount;
5137 while (sdp != NULL) {
5138 count++;
5139 sdp = sdp->next;
5140 }
5141 exindx->descrcount += count;
5142 }
5143
CreateBioseqExtraBlock(ObjMgrDataPtr omdp,BioseqPtr bsp)5144 static void CreateBioseqExtraBlock (ObjMgrDataPtr omdp, BioseqPtr bsp)
5145
5146 {
5147 BioseqExtraPtr bspextra;
5148
5149 if (omdp == NULL || omdp->extradata != NULL) return;
5150
5151 bspextra = (BioseqExtraPtr) MemNew (sizeof (BioseqExtra));
5152 omdp->extradata = (Pointer) bspextra;
5153 if (bspextra == NULL) return;
5154
5155 omdp->reapextra = SeqMgrReapBioseqExtraFunc;
5156 omdp->reloadextra = SeqMgrReloadBioseqExtraFunc;
5157 omdp->freeextra = SeqMgrFreeBioseqExtraFunc;
5158
5159 bspextra->bsp = bsp;
5160 bspextra->omdp = omdp;
5161 bspextra->min = INT4_MAX;
5162 bspextra->processed = UINT1_MAX;
5163 }
5164
CountAlignmentsProc(GatherObjectPtr gop)5165 static Boolean CountAlignmentsProc (GatherObjectPtr gop)
5166
5167 {
5168 ExtraIndexPtr exindx;
5169
5170 if (gop == NULL || gop->itemtype != OBJ_SEQALIGN) return TRUE;
5171 exindx = (ExtraIndexPtr) gop->userdata;
5172 if (exindx == NULL) return TRUE;
5173 (exindx->aligncount)++;
5174 return TRUE;
5175 }
5176
CollectAlignsProc(GatherObjectPtr gop)5177 static Boolean CollectAlignsProc (GatherObjectPtr gop)
5178
5179 {
5180 SeqAlignPtr PNTR alignsByID;
5181
5182 if (gop == NULL || gop->itemtype != OBJ_SEQALIGN) return TRUE;
5183 alignsByID = (SeqAlignPtr PNTR) gop->userdata;
5184 if (alignsByID == NULL) return TRUE;
5185 alignsByID [gop->itemID] = (SeqAlignPtr) gop->dataptr;
5186 return TRUE;
5187 }
5188
SeqMgrIndexAlignments(Uint2 entityID)5189 NLM_EXTERN void LIBCALL SeqMgrIndexAlignments (Uint2 entityID)
5190
5191 {
5192 SeqAlignPtr PNTR alignsByID;
5193 BioseqExtraPtr bspextra;
5194 ExtraIndex exind;
5195 Boolean objMgrFilter [OBJ_MAX];
5196 ObjMgrDataPtr omdp;
5197
5198 if (entityID == 0) return;
5199
5200 /* count alignments */
5201
5202 exind.topsep = NULL;
5203 exind.lastbsp = NULL;
5204 exind.lastsap = NULL;
5205 exind.lastbssp = NULL;
5206 exind.alignhead = NULL;
5207 exind.lastalign = NULL;
5208 exind.adphead = NULL;
5209 exind.lastadp = NULL;
5210 exind.segpartail = NULL;
5211 exind.bspcount = 0;
5212 exind.aligncount = 0;
5213 exind.descrcount = 0;
5214 exind.featcount = 0;
5215 exind.adpcount = 0;
5216 exind.seqlitid = 0;
5217
5218 MemSet ((Pointer) objMgrFilter, 0, sizeof (objMgrFilter));
5219 objMgrFilter [OBJ_SEQALIGN] = TRUE;
5220 GatherObjectsInEntity (entityID, 0, NULL, CountAlignmentsProc, (Pointer) &exind, objMgrFilter);
5221
5222 omdp = ObjMgrGetData (entityID);
5223 if (omdp != NULL) {
5224
5225 CreateBioseqExtraBlock (omdp, NULL);
5226 bspextra = (BioseqExtraPtr) omdp->extradata;
5227 if (bspextra != NULL) {
5228
5229 /* get rid of previous lookup array */
5230
5231 bspextra->alignsByID = MemFree (bspextra->alignsByID);
5232 bspextra->numaligns = 0;
5233
5234 /* alignment ID to SeqAlignPtr index always goes on top of entity */
5235
5236 if (exind.aligncount > 0) {
5237 alignsByID = (SeqAlignPtr PNTR) MemNew (sizeof (SeqAlignPtr) * (exind.aligncount + 2));
5238 if (alignsByID != NULL) {
5239
5240 /* copy SeqAlignPtr for each itemID */
5241
5242 GatherObjectsInEntity (entityID, 0, NULL, CollectAlignsProc, (Pointer) alignsByID, objMgrFilter);
5243
5244 bspextra->alignsByID = alignsByID;
5245 bspextra->numaligns = exind.aligncount;
5246 }
5247 }
5248 }
5249 }
5250 }
5251
SeqIdWithinBioseq(BioseqPtr bsp,SeqLocPtr slp)5252 static SeqIdPtr SeqIdWithinBioseq (BioseqPtr bsp, SeqLocPtr slp)
5253
5254 {
5255 SeqIdPtr a;
5256 SeqIdPtr b;
5257
5258 if (bsp == NULL || slp == NULL) return NULL;
5259 a = SeqLocId (slp);
5260 if (a == NULL) return NULL;
5261 for (b = bsp->id; b != NULL; b = b->next) {
5262 if (SeqIdComp (a, b) == SIC_YES) return b;
5263 }
5264 return NULL;
5265 }
5266
5267 /*
5268 static void FindGPS (BioseqSetPtr bssp, Pointer userdata)
5269
5270 {
5271 BoolPtr is_gpsP;
5272
5273 if (bssp == NULL || bssp->_class != BioseqseqSet_class_gen_prod_set) return;
5274 is_gpsP = (BoolPtr) userdata;
5275 *is_gpsP = TRUE;
5276 }
5277 */
5278
ProcessFeatureProducts(SeqFeatPtr sfp,Uint4 itemID,GatherObjectPtr gop)5279 static void ProcessFeatureProducts (SeqFeatPtr sfp, Uint4 itemID, GatherObjectPtr gop)
5280
5281 {
5282 BioseqPtr bsp;
5283 BioseqExtraPtr bspextra;
5284 BioseqSetPtr bssp;
5285 Char buf [81];
5286 CharPtr ctmp;
5287 Int4 diff;
5288 GatherContext gc;
5289 GatherContextPtr gcp;
5290 Boolean is_gps;
5291 CharPtr loclbl;
5292 Int4 min;
5293 ObjMgrDataPtr omdp;
5294 Uint1 processed;
5295 CharPtr prodlbl;
5296 ProtRefPtr prp;
5297 SeqFeatPtr prt;
5298 CharPtr ptmp;
5299 SeqAnnotPtr sap;
5300 SeqIdPtr sip;
5301 SeqLocPtr slp;
5302 ValNode vn;
5303
5304 if (sfp == NULL || sfp->product == NULL) return;
5305 if (sfp->data.choice != SEQFEAT_CDREGION &&
5306 sfp->data.choice != SEQFEAT_RNA &&
5307 sfp->data.choice != SEQFEAT_PROT) return;
5308
5309 sip = SeqLocId (sfp->product);
5310 if (sip == NULL) return;
5311 bsp = BioseqFind (sip);
5312 if (bsp == NULL) return;
5313 omdp = SeqMgrGetOmdpForBioseq (bsp);
5314 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
5315
5316 bspextra = (BioseqExtraPtr) omdp->extradata;
5317 if (bspextra == NULL) {
5318 CreateBioseqExtraBlock (omdp, bsp);
5319 bspextra = (BioseqExtraPtr) omdp->extradata;
5320 }
5321 if (bspextra == NULL) return;
5322
5323 /* cds or rna reference stored in product bioseq's omdp.cdsOrRnaFeat */
5324
5325 if (bspextra->cdsOrRnaFeat != NULL && bspextra->cdsOrRnaFeat != sfp) {
5326 FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
5327 ctmp = SeqLocPrint (sfp->location);
5328 loclbl = ctmp;
5329 if (loclbl == NULL) {
5330 loclbl = "?";
5331 }
5332 ptmp = SeqLocPrint (sfp->product);
5333 prodlbl = ptmp;
5334 if (prodlbl == NULL) {
5335 prodlbl = "?";
5336 }
5337 /*
5338 {
5339 GatherContext gc;
5340 GatherContextPtr gcp;
5341 Boolean is_gps = FALSE;
5342 SeqEntryPtr sep;
5343 MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
5344 gcp = &gc;
5345 gc.entityID = gop->entityID;
5346 gc.itemID = gop->itemID;
5347 gc.thistype = gop->itemtype;
5348 sep = GetTopSeqEntryForEntityID (gop->entityID);
5349 VisitSetsInSep (sep, (Pointer) &is_gps, FindGPS);
5350 if (! is_gps) {
5351 ErrPostItem (SEV_WARNING, 0, 0,
5352 "SeqMgr indexing cds or rna progenitor already set - Feature: %s - Location [%s] - Product [%s]",
5353 buf, loclbl, prodlbl);
5354 }
5355 }
5356 */
5357 is_gps = FALSE;
5358 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
5359 bssp = (BioseqSetPtr) bsp->idx.parentptr;
5360 while (bssp != NULL) {
5361 if (bssp->_class == BioseqseqSet_class_gen_prod_set) {
5362 is_gps = TRUE;
5363 }
5364 if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
5365 bssp = (BioseqSetPtr) bssp->idx.parentptr;
5366 } else {
5367 bssp = NULL;
5368 }
5369 }
5370 }
5371 if (! is_gps) {
5372 MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
5373 gcp = &gc;
5374 gc.entityID = gop->entityID;
5375 gc.itemID = gop->itemID;
5376 gc.thistype = gop->itemtype;
5377 ErrPostItem (SEV_WARNING, 0, 0,
5378 "SeqMgr indexing cds or rna progenitor already set - Feature: %s - Location [%s] - Product [%s]",
5379 buf, loclbl, prodlbl);
5380 }
5381 MemFree (ctmp);
5382 MemFree (ptmp);
5383 }
5384
5385 /* if (omdp->tempload == TL_NOT_TEMP) { */
5386 /* check bsp mol against feature type */
5387 if (bspextra->cdsOrRnaFeat != NULL) {
5388 /* do not override if set */
5389 } else if (sfp->data.choice == SEQFEAT_CDREGION && ISA_aa (bsp->mol)) {
5390 bspextra->cdsOrRnaFeat = sfp;
5391 } else if (sfp->data.choice == SEQFEAT_RNA && ISA_na (bsp->mol)) {
5392 bspextra->cdsOrRnaFeat = sfp;
5393 } else if (sfp->data.choice == SEQFEAT_PROT && ISA_aa (bsp->mol)) {
5394 bspextra->cdsOrRnaFeat = sfp;
5395 }
5396 /* } */
5397
5398 /* add to prodlisthead list for gather by get_feats_product */
5399
5400 ValNodeAddPointer (&(bspextra->prodlisthead), 0, (Pointer) sfp);
5401
5402 if (sfp->data.choice == SEQFEAT_RNA || sfp->data.choice == SEQFEAT_PROT) return;
5403
5404 /* if protFeat exists it was set by exhaustive gather on protein bioseq */
5405
5406 if (bspextra->protFeat != NULL) return;
5407
5408 /* calculate largest protein feature on cds's product bioseq */
5409
5410 min = INT4_MAX;
5411 processed = UINT1_MAX;
5412 vn.choice = SEQLOC_WHOLE;
5413 vn.data.ptrvalue = (Pointer) bsp->id;
5414 vn.next = NULL;
5415 slp = (Pointer) (&vn);
5416
5417 sap = bsp->annot;
5418 while (sap != NULL) {
5419 if (sap->type == 1) {
5420 prt = (SeqFeatPtr) sap->data;
5421 while (prt != NULL) {
5422 if (prt->data.choice == SEQFEAT_PROT) {
5423 prp = (ProtRefPtr) prt->data.value.ptrvalue;
5424
5425 /* get SeqId in bioseq that matches SeqId used for location */
5426
5427 vn.data.ptrvalue = SeqIdWithinBioseq (bsp, prt->location);
5428
5429 diff = SeqLocAinB (prt->location, slp);
5430 if (diff >= 0 && prp != NULL) {
5431 if (diff < min) {
5432 min = diff;
5433 processed = prp->processed;
5434 /* if (omdp->tempload == TL_NOT_TEMP) { */
5435 bspextra->protFeat = prt;
5436 /* } */
5437 } else if (diff == min) {
5438 /* unprocessed 0 preferred over preprotein 1 preferred over mat peptide 2 */
5439 if ( /* prp != NULL && prp->processed == 0 */ prp->processed < processed ) {
5440 min = diff;
5441 processed = prp->processed;
5442 bspextra->protFeat = prt;
5443 }
5444 }
5445 }
5446 }
5447 prt = prt->next;
5448 }
5449 }
5450 sap = sap->next;
5451 }
5452 }
5453
5454
SimpleIvalsCalculation(SeqLocPtr slp,BioseqPtr bsp,Boolean flip,SMFeatItemPtr item)5455 static Boolean SimpleIvalsCalculation (SeqLocPtr slp, BioseqPtr bsp, Boolean flip, SMFeatItemPtr item)
5456 {
5457 SeqIntPtr sint;
5458
5459 if (!flip && slp != NULL && bsp != NULL && item != NULL && slp->choice == SEQLOC_INT
5460 && (sint = (SeqIntPtr) slp->data.ptrvalue) != NULL
5461 && SeqIdIn (sint->id, bsp->id)) {
5462 item->strand = sint->strand;
5463 item->numivals = 1;
5464 item->ivals = MemNew (sizeof (Int4) * 2);
5465 if (item->strand == Seq_strand_minus) {
5466 item->ivals[0] = sint->to;
5467 item->ivals[1] = sint->from;
5468 } else {
5469 item->ivals[0] = sint->from;
5470 item->ivals[1] = sint->to;
5471 }
5472 return TRUE;
5473 } else {
5474 return FALSE;
5475 }
5476 }
5477
RecordOneFeature(BioseqExtraPtr bspextra,ObjMgrDataPtr omdp,BioseqPtr bsp,ExtraIndexPtr exindx,SeqFeatPtr sfp,Int4 left,Int4 right,Uint4 itemID,Uint2 subtype,Boolean external,Boolean farloc,Boolean bad_order,Boolean mixed_strand,Boolean ignore,Boolean ts_image)5478 static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
5479 BioseqPtr bsp, ExtraIndexPtr exindx, SeqFeatPtr sfp,
5480 Int4 left, Int4 right, Uint4 itemID, Uint2 subtype,
5481 Boolean external, Boolean farloc, Boolean bad_order,
5482 Boolean mixed_strand, Boolean ignore, Boolean ts_image)
5483
5484 {
5485 Char buf [129];
5486 SMFeatBlockPtr curr;
5487 Int4 from;
5488 Int2 i;
5489 SMFeatItemPtr item;
5490 Int4Ptr ivals;
5491 SeqLocPtr loc;
5492 SMFeatBlockPtr next;
5493 Int2 numivals = 0;
5494 CharPtr ptr;
5495 SeqIdPtr sip;
5496 SeqLocPtr slp = NULL;
5497 Uint1 strand;
5498 Int4 swap;
5499 Int4 to;
5500
5501 if (bspextra == NULL || omdp == NULL || bsp == NULL || exindx == NULL || sfp == NULL) return;
5502
5503 if (bspextra->featlisttail != NULL) {
5504
5505 /* just in case blocksize should was not set for some reason */
5506
5507 if (bspextra->blocksize < 1) {
5508 bspextra->blocksize = 5;
5509 }
5510
5511 curr = bspextra->featlisttail;
5512 if (curr->index >= bspextra->blocksize) {
5513
5514 /* allocate next chunk in linked list of blocks */
5515
5516 next = (SMFeatBlockPtr) MemNew (sizeof (SMFeatBlock));
5517 curr->next = next;
5518
5519 if (next != NULL) {
5520 bspextra->featlisttail = next;
5521 curr = next;
5522 }
5523 }
5524
5525 if (curr->index < bspextra->blocksize) {
5526
5527 /* allocate data block if not yet done for this chunk */
5528
5529 if (curr->data == NULL) {
5530 curr->data = (SMFeatItemPtr) MemNew (sizeof (SMFeatItem) * (size_t) (bspextra->blocksize));
5531 }
5532
5533 /* now record desired information about current feature */
5534
5535 if (curr->data != NULL) {
5536 item = &(curr->data [curr->index]);
5537 /* if (omdp->tempload == TL_NOT_TEMP) { */
5538 item->sfp = sfp;
5539 item->sap = exindx->lastsap;
5540 item->bsp = bsp;
5541 /* } */
5542 FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
5543 ptr = buf;
5544 if (sfp->data.choice == SEQFEAT_RNA) {
5545 ptr = StringStr (buf, "RNA-");
5546 if (ptr != NULL) {
5547 ptr += 4;
5548 } else {
5549 ptr = buf;
5550 }
5551 }
5552 item->label = StringSaveNoNull (ptr);
5553 item->left = left;
5554 item->right = right;
5555 if (exindx->flip) {
5556 item->left = bsp->length - right;
5557 item->right = bsp->length - left;
5558 }
5559 item->dnaStop = -1;
5560 CheckSeqLocForPartial (sfp->location, &(item->partialL), &(item->partialR));
5561 item->external = external;
5562 item->farloc = farloc;
5563 item->bad_order = bad_order;
5564 item->mixed_strand = mixed_strand;
5565 item->ts_image = ts_image;
5566 /*
5567 item->strand = SeqLocStrand (sfp->location);
5568 if (exindx->flip) {
5569 item->strand = StrandCmp (item->strand);
5570 }
5571 */
5572 if (subtype == 0) {
5573 subtype = FindFeatDefType (sfp);
5574 }
5575 item->subtype = subtype;
5576 item->itemID = itemID;
5577 item->ignore = ignore;
5578 item->overlap = -1;
5579
5580 /* record start/stop pairs of intervals on target bioseq */
5581
5582 /*
5583 single_interval = (Boolean) (item->subtype == FEATDEF_GENE ||
5584 item->subtype == FEATDEF_PUB);
5585 */
5586
5587 if (SimpleIvalsCalculation (sfp->location, bsp, exindx->flip, item)) {
5588 /* don't need to do complex merging to calculate intervals */
5589 } else {
5590 loc = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE);
5591
5592 if (exindx->flip) {
5593 sip = SeqIdFindBest (bsp->id, 0);
5594 slp = SeqLocCopyRegion (sip, loc, bsp, 0, bsp->length - 1, Seq_strand_minus, FALSE);
5595 SeqLocFree (loc);
5596 loc = slp;
5597 }
5598 /* record strand relative to segmented parent */
5599 item->strand = SeqLocStrand (loc);
5600 if (exindx->flip) {
5601 item->strand = StrandCmp (item->strand);
5602 }
5603 strand = item->strand;
5604
5605 slp = NULL;
5606 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
5607 numivals++;
5608 }
5609 if (numivals > 0) {
5610 ivals = MemNew (sizeof (Int4) * (numivals * 2));
5611 item->ivals = ivals;
5612 item->numivals = numivals;
5613 slp = NULL;
5614 i = 0;
5615 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
5616 from = SeqLocStart (slp);
5617 to = SeqLocStop (slp);
5618 if (strand == Seq_strand_minus) {
5619 swap = from;
5620 from = to;
5621 to = swap;
5622 }
5623 ivals [i] = from;
5624 i++;
5625 ivals [i] = to;
5626 i++;
5627 }
5628 }
5629 SeqLocFree (loc);
5630 }
5631 }
5632
5633 /* increment count on current block */
5634
5635 (curr->index)++;
5636
5637 /* count all features, per bioseq and per entity */
5638
5639 (bspextra->numfeats)++;
5640 (exindx->featcount)++;
5641
5642 }
5643 }
5644 }
5645
5646
CheckForTransSplice(SeqFeatPtr sfp,BoolPtr bad_orderP,BoolPtr mixed_strandP,Boolean circular)5647 static void CheckForTransSplice (
5648 SeqFeatPtr sfp,
5649 BoolPtr bad_orderP,
5650 BoolPtr mixed_strandP,
5651 Boolean circular
5652 )
5653
5654 {
5655 Boolean mixed_strand = FALSE, ordered = TRUE;
5656 SeqIdPtr id1, id2;
5657 SeqLocPtr prev, tmp;
5658 SeqIntPtr sip1, sip2, prevsip;
5659 Uint1 strand1, strand2;
5660
5661 if (sfp == NULL || sfp->location == NULL) return;
5662
5663 tmp = NULL;
5664 prev = NULL;
5665 sip1 = NULL;
5666 id1 = NULL;
5667 prevsip = NULL;
5668 strand1 = Seq_strand_other;
5669
5670 while ((tmp = SeqLocFindNext (sfp->location, tmp)) != NULL) {
5671
5672 /* just check seqloc_interval */
5673
5674 if (tmp->choice == SEQLOC_INT) {
5675 sip1 = prevsip;
5676 sip2 = (SeqIntPtr) (tmp->data.ptrvalue);
5677 strand2 = sip2->strand;
5678 id2 = sip2->id;
5679 if ((sip1 != NULL) && (ordered) && (! circular)) {
5680 if (SeqIdForSameBioseq (sip1->id, sip2->id)) {
5681 if (strand2 == Seq_strand_minus) {
5682 if (sip1->to < sip2->to) {
5683 ordered = FALSE;
5684 }
5685 } else {
5686 if (sip1->to > sip2->to) {
5687 ordered = FALSE;
5688 }
5689 }
5690 }
5691 }
5692 prevsip = sip2;
5693 if ((strand1 != Seq_strand_other) && (strand2 != Seq_strand_other)) {
5694 if (SeqIdForSameBioseq (id1, id2)) {
5695 if (strand1 != strand2) {
5696 if (strand1 == Seq_strand_plus && strand2 == Seq_strand_unknown) {
5697 /* unmarked_strand = TRUE; */
5698 } else if (strand1 == Seq_strand_unknown && strand2 == Seq_strand_plus) {
5699 /* unmarked_strand = TRUE; */
5700 } else {
5701 mixed_strand = TRUE;
5702 }
5703 }
5704 }
5705 }
5706
5707 strand1 = strand2;
5708 id1 = id2;
5709 }
5710 }
5711
5712 /* Publication intervals ordering does not matter */
5713
5714 if (sfp->idx.subtype == FEATDEF_PUB) {
5715 ordered = TRUE;
5716 }
5717
5718 /* ignore ordering of heterogen bonds */
5719
5720 if (sfp->data.choice == SEQFEAT_HET) {
5721 ordered = TRUE;
5722 }
5723
5724 /* misc_recomb intervals SHOULD be in reverse order */
5725
5726 if (sfp->idx.subtype == FEATDEF_misc_recomb) {
5727 ordered = TRUE;
5728 }
5729
5730 /* primer_bind intervals MAY be in on opposite strands */
5731
5732 if (sfp->idx.subtype == FEATDEF_primer_bind) {
5733 mixed_strand = FALSE;
5734 ordered = TRUE;
5735 }
5736
5737 if (! ordered) {
5738 *bad_orderP = TRUE;
5739 }
5740 if (mixed_strand) {
5741 *mixed_strandP = TRUE;
5742 }
5743 }
5744
5745
RecordFeatureOnBioseq(GatherObjectPtr gop,BioseqPtr bsp,SeqFeatPtr sfp,ExtraIndexPtr exindx,Boolean usingLocalBsp,Boolean special_case,Boolean small_gen_set,Boolean ts_image)5746 static Boolean RecordFeatureOnBioseq (
5747 GatherObjectPtr gop,
5748 BioseqPtr bsp,
5749 SeqFeatPtr sfp,
5750 ExtraIndexPtr exindx,
5751 Boolean usingLocalBsp,
5752 Boolean special_case,
5753 Boolean small_gen_set,
5754 Boolean ts_image
5755 )
5756
5757 {
5758 Boolean bad_order;
5759 BioseqExtraPtr bspextra;
5760 Char buf [128];
5761 Int2 count;
5762 CharPtr ctmp;
5763 Int4 diff;
5764 Int4 left;
5765 CharPtr loclbl;
5766 Boolean mixed_strand;
5767 ObjMgrDataPtr omdp;
5768 ProtRefPtr prp;
5769 Int4 right;
5770 SeqAnnotPtr sap;
5771 SeqLocPtr slp;
5772 Int4 swap;
5773 SeqFeatPtr tmp;
5774 ValNode vn;
5775
5776 if (gop == NULL || bsp == NULL || sfp == NULL || exindx == NULL) return FALSE;
5777
5778 omdp = SeqMgrGetOmdpForBioseq (bsp);
5779 if (omdp == NULL) return TRUE;
5780
5781 /* now prepare for adding feature to index */
5782
5783 bspextra = (BioseqExtraPtr) omdp->extradata;
5784 if (bspextra == NULL) {
5785 CreateBioseqExtraBlock (omdp, bsp);
5786 bspextra = (BioseqExtraPtr) omdp->extradata;
5787 }
5788 if (bspextra == NULL) return TRUE;
5789
5790 /* get extreme left and right extents of feature location on near bioseqs */
5791 /* merge here to get correct extremes even in case of trans-splicing */
5792
5793 /* but this broke the handling of genes spanning the origin, so cannot do */
5794 /*
5795 slp = SeqLocMergeEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE);
5796 */
5797 if (special_case) {
5798 slp = sfp->product;
5799 } else {
5800 slp = sfp->location;
5801 }
5802
5803 bad_order = FALSE;
5804 mixed_strand = FALSE;
5805 CheckForTransSplice (sfp, &bad_order, &mixed_strand, /* (Boolean) (bsp->topology == TOPOLOGY_CIRCULAR) */ FALSE);
5806
5807 #if 1
5808 GetLeftAndRightOffsetsInNearBioseq (slp, bsp, &left, &right, small_gen_set, bad_order, mixed_strand);
5809 #else
5810 left = GetOffsetInNearBioseq (slp, bsp, SEQLOC_LEFT_END);
5811 right = GetOffsetInNearBioseq (slp, bsp, SEQLOC_RIGHT_END);
5812 #endif
5813 /*
5814 SeqLocFree (slp);
5815 */
5816 if (left == -1 || right == -1) {
5817 GatherContext gc;
5818 GatherContextPtr gcp;
5819 Char lastbspid [128];
5820 SeqIdPtr sip;
5821 MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
5822 gcp = &gc;
5823 gc.entityID = gop->entityID;
5824 gc.itemID = gop->itemID;
5825 gc.thistype = gop->itemtype;
5826 lastbspid [0] = '\0';
5827 if (exindx->lastbsp != NULL) {
5828 sip = SeqIdFindBest (exindx->lastbsp->id, 0);
5829 if (sip != NULL) {
5830 SeqIdWrite (sip, lastbspid, PRINTID_FASTA_LONG, sizeof (lastbspid));
5831 }
5832 }
5833 FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
5834 ctmp = SeqLocPrint (sfp->location);
5835 loclbl = ctmp;
5836 if (loclbl == NULL) {
5837 loclbl = "?";
5838 }
5839 ErrPostItem (SEV_WARNING, 0, 0,
5840 "SeqMgr indexing feature mapping problem - Feature: %s - Location [%s] - Record [%s]",
5841 buf, loclbl, lastbspid);
5842 MemFree (ctmp);
5843 return TRUE;
5844 }
5845
5846 /* if indexing protein bioseq, store largest protein feature */
5847
5848 if (sfp->data.choice == SEQFEAT_PROT) {
5849 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5850
5851 vn.choice = SEQLOC_WHOLE;
5852 vn.data.ptrvalue = (Pointer) bsp->id;
5853 vn.next = NULL;
5854 slp = (Pointer) &vn;
5855
5856 /* get SeqId in bioseq that matches SeqId used for location */
5857
5858 vn.data.ptrvalue = (Pointer) SeqIdWithinBioseq (bsp, sfp->location);
5859
5860 diff = SeqLocAinB (sfp->location, slp);
5861 if (diff >= 0 && prp != NULL) {
5862 if (diff < bspextra->min) {
5863 bspextra->min = diff;
5864 bspextra->processed = prp->processed;
5865 /* if (omdp->tempload == TL_NOT_TEMP) { */
5866 bspextra->protFeat = sfp;
5867 /* } */
5868 } else if (diff == bspextra->min) {
5869 /* unprocessed 0 preferred over preprotein 1 preferred over mat peptide 2 */
5870 if ( /* prp != NULL && prp->processed == 0 */ prp->processed < bspextra->processed ) {
5871 bspextra->min = diff;
5872 bspextra->processed = prp->processed;
5873 bspextra->protFeat = sfp;
5874 }
5875 }
5876 }
5877 }
5878
5879 /* add feature item to linked list of blocks */
5880
5881 if (bspextra->featlisthead == NULL) {
5882 bspextra->featlisthead = (SMFeatBlockPtr) MemNew (sizeof (SMFeatBlock));
5883
5884 /* for first feature indexed on this bioseq, quickly see if few or many
5885 additional features, since most features on a bioseq are packaged in
5886 the same list, and most proteins only have one bioseq */
5887
5888 for (tmp = sfp, count = 0;
5889 tmp != NULL && count < 50;
5890 tmp = tmp->next, count++) continue;
5891
5892 /* extend count if above features were packaged on a bioseq set (presumably CDS or mRNA) */
5893
5894 if (exindx->lastbssp != NULL) {
5895 for (sap = bsp->annot; sap != NULL; sap = sap->next) {
5896 if (sap->type == 1) {
5897
5898 for (tmp = (SeqFeatPtr) sap->data;
5899 tmp != NULL && count < 50;
5900 tmp = tmp->next, count++) continue;
5901
5902 }
5903 }
5904 }
5905
5906 bspextra->blocksize = count;
5907 }
5908 if (bspextra->featlisttail == NULL) {
5909 bspextra->featlisttail = bspextra->featlisthead;
5910 }
5911
5912 if (bspextra->featlisttail != NULL) {
5913
5914 /* if feature spans origin, record with left < 0 */
5915
5916 if (left > right && bsp->topology == TOPOLOGY_CIRCULAR) {
5917 left -= bsp->length;
5918 }
5919
5920 /* some trans-spliced locations can confound GetOffsetInNearBioseq, so normalize here */
5921
5922 if (left > right) {
5923 swap = left;
5924 left = right;
5925 right = swap;
5926 }
5927
5928 RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left,
5929 right, gop->itemID, gop->subtype, gop->external, usingLocalBsp,
5930 bad_order, mixed_strand, special_case, ts_image);
5931
5932 /* record gene, publication, and biosource features twice if spanning the origin */
5933
5934 if (left < 0 && bsp->topology == TOPOLOGY_CIRCULAR) {
5935 if (sfp->data.choice == SEQFEAT_GENE ||
5936 sfp->data.choice == SEQFEAT_PUB ||
5937 sfp->data.choice == SEQFEAT_BIOSRC ||
5938 sfp->idx.subtype == FEATDEF_operon) {
5939
5940 RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left + bsp->length,
5941 right + bsp->length, gop->itemID, gop->subtype, gop->external, usingLocalBsp,
5942 bad_order, mixed_strand, TRUE, ts_image);
5943
5944 }
5945 }
5946 }
5947
5948 return TRUE;
5949 }
5950
5951 typedef struct adpbspdata {
5952 AnnotDescPtr adp;
5953 BioseqPtr bsp;
5954 } AdpBspData, PNTR AdpBspPtr;
5955
5956 /* callback for recording features and descriptor, prot, and cdsOrRna information */
5957
RecordFeaturesInBioseqs(GatherObjectPtr gop)5958 static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
5959
5960 {
5961 AdpBspPtr abp;
5962 AnnotDescPtr adp = NULL;
5963 BioseqPtr bsp = NULL;
5964 BioseqExtraPtr bspextra;
5965 BioseqSetPtr bssp = NULL;
5966 Char buf [81];
5967 CharPtr ctmp;
5968 ExtraIndexPtr exindx;
5969 ValNodePtr head = NULL;
5970 BioseqPtr lbsp;
5971 CharPtr loclbl;
5972 ObjMgrDataPtr omdp;
5973 SeqAnnotPtr sap = NULL;
5974 ValNodePtr sdp = NULL;
5975 SeqFeatPtr sfp = NULL;
5976 SeqAlignPtr sal = NULL;
5977 SeqIdPtr sip;
5978 SeqLocPtr slp;
5979 Boolean small_gen_set = FALSE;
5980 Boolean special_case = FALSE;
5981 ValNodePtr tail = NULL;
5982 Boolean usingLocalBsp = FALSE;
5983 ValNodePtr vnp;
5984
5985 switch (gop->itemtype) {
5986 case OBJ_BIOSEQ :
5987 bsp = (BioseqPtr) gop->dataptr;
5988 if (bsp == NULL) return TRUE;
5989 sdp = bsp->descr;
5990 break;
5991 case OBJ_BIOSEQSET :
5992 bssp = (BioseqSetPtr) gop->dataptr;
5993 if (bssp == NULL) return TRUE;
5994 sdp = bssp->descr;
5995 break;
5996 case OBJ_SEQANNOT :
5997 sap = (SeqAnnotPtr) gop->dataptr;
5998 break;
5999 case OBJ_ANNOTDESC :
6000 adp = (AnnotDescPtr) gop->dataptr;
6001 break;
6002 case OBJ_SEQFEAT :
6003 sfp = (SeqFeatPtr) gop->dataptr;
6004 break;
6005 case OBJ_SEQALIGN :
6006 sal = (SeqAlignPtr) gop->dataptr;
6007 break;
6008 default :
6009 return TRUE;
6010 }
6011
6012 exindx = (ExtraIndexPtr) gop->userdata;
6013 if (exindx == NULL) return FALSE;
6014
6015 /* save bspItemID to support bioseq explore functions */
6016
6017 if (bsp != NULL) {
6018
6019 (exindx->bspcount)++;
6020
6021 /* save last BioseqPtr to check first for appropriate bioseq */
6022
6023 exindx->lastbsp = bsp;
6024
6025 /* blocksize for new block based only on features packaged on bioseq */
6026
6027 exindx->lastbssp = NULL;
6028
6029 omdp = SeqMgrGetOmdpForBioseq (bsp);
6030 if (omdp != NULL) {
6031 bspextra = (BioseqExtraPtr) omdp->extradata;
6032 if (bspextra == NULL) {
6033 CreateBioseqExtraBlock (omdp, bsp);
6034 bspextra = (BioseqExtraPtr) omdp->extradata;
6035 }
6036 if (bspextra != NULL) {
6037 bspextra->bspItemID = gop->itemID;
6038 bspextra->bspIndex = exindx->bspcount;
6039 }
6040 }
6041 }
6042
6043 /* save last BioseqSetPtr to calculate blocksize from bioseq set and bioseq features,
6044 features on bioseq set presumably being CDS or mRNA and applying only to nucleotides */
6045
6046 if (bssp != NULL) {
6047 exindx->lastbssp = bssp;
6048 }
6049
6050 /* count bioseq or bioseq set descriptors, to calculate lastDescrItemID */
6051
6052 if (sdp != NULL) {
6053 SetDescriptorCounts (sdp, exindx, gop->dataptr, gop->itemtype);
6054 return TRUE;
6055 }
6056
6057 /* save SeqAnnotPtr containing next features to be gathered */
6058
6059 if (sap != NULL) {
6060 exindx->lastsap = sap;
6061 return TRUE;
6062 }
6063
6064 /* record SeqAlignPtr in val node list - expects all itemIDs to be presented */
6065
6066 if (sal != NULL) {
6067 vnp = ValNodeAddPointer (&(exindx->lastalign), 0, (Pointer) sal);
6068 if (exindx->alignhead == NULL) {
6069 exindx->alignhead = exindx->lastalign;
6070 }
6071 exindx->lastalign = vnp;
6072 (exindx->aligncount)++;
6073 return TRUE;
6074 }
6075
6076 /* record AnnotDescPtr and relevant BioseqPtr in val node list */
6077
6078 if (adp != NULL) {
6079 abp = (AdpBspPtr) MemNew (sizeof (AdpBspData));
6080 if (abp != NULL) {
6081 abp->adp = adp;
6082 sap = exindx->lastsap;
6083 if (sap != NULL && sap->type == 1) {
6084 bsp = NULL;
6085 sfp = (SeqFeatPtr) sap->data;
6086 /* if empty Seq-annot with Seq-annot.descr, use last Bioseq */
6087 if (sfp == NULL) {
6088 bsp = exindx->lastbsp;
6089 }
6090 while (sfp != NULL && bsp == NULL) {
6091 slp = sfp->location;
6092 if (slp != NULL) {
6093 bsp = BioseqFindFromSeqLoc (slp);
6094 if (bsp == NULL && gop->external) {
6095 bsp = exindx->lastbsp;
6096 }
6097 }
6098 sfp = sfp->next;
6099 }
6100 abp->bsp = bsp;
6101 }
6102 vnp = ValNodeAddPointer (&(exindx->lastadp), 0, (Pointer) abp);
6103 if (exindx->adphead == NULL) {
6104 exindx->adphead = exindx->lastadp;
6105 }
6106 exindx->lastadp = vnp;
6107 (exindx->adpcount)++;
6108 }
6109 return TRUE;
6110 }
6111
6112 /* otherwise index features on every bioseq in entity */
6113
6114 if (sfp == NULL) return TRUE;
6115
6116 /* cds or rna reference stored in product bioseq's omdp.cdsOrRnaFeat,
6117 best protein feature in omdp.protFeat (do before adding CDS) */
6118
6119 if (sfp->product != NULL) {
6120 ProcessFeatureProducts (sfp, gop->itemID, gop);
6121 }
6122
6123 bsp = FindAppropriateBioseq (sfp->location, exindx->lastbsp, &small_gen_set);
6124
6125 /* failure here can be due to SeqLoc that references far accession */
6126
6127 if (bsp == NULL) {
6128
6129 /* if far accession, find first local bioseq on any location interval */
6130
6131 bsp = FindFirstLocalBioseq (sfp->location);
6132
6133 /* report whether far accession was able to be handled */
6134
6135 FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
6136 ctmp = SeqLocPrint (sfp->location);
6137 loclbl = ctmp;
6138 if (loclbl == NULL) {
6139 loclbl = "?";
6140 }
6141
6142 if (bsp == NULL) {
6143 {
6144 GatherContext gc;
6145 GatherContextPtr gcp;
6146 Char lastbspid [41];
6147 SeqIdPtr sip;
6148 MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
6149 gcp = &gc;
6150 gc.entityID = gop->entityID;
6151 gc.itemID = gop->itemID;
6152 gc.thistype = gop->itemtype;
6153 lastbspid [0] = '\0';
6154 if (exindx->lastbsp != NULL) {
6155 sip = SeqIdFindBest (exindx->lastbsp->id, 0);
6156 if (sip != NULL) {
6157 SeqIdWrite (sip, lastbspid, PRINTID_FASTA_LONG, sizeof (lastbspid));
6158 }
6159 }
6160 ErrPostItem (SEV_WARNING, 0, 0,
6161 "SeqMgr indexing feature location problem - Feature: %s - Location [%s] - Record [%s]",
6162 buf, loclbl, lastbspid);
6163 }
6164 } else {
6165 /*
6166 ErrPostItem (SEV_INFO, 0, 0,
6167 "SeqMgr indexing detected and handled far accession - Feature: %s - Location [%s]",
6168 buf, loclbl);
6169 */
6170 }
6171 MemFree (ctmp);
6172
6173 if (bsp == NULL && sfp->product != NULL &&
6174 sfp->data.choice == SEQFEAT_CDREGION &&
6175 IS_Bioseq (exindx->topsep)) {
6176 bsp = (BioseqPtr) exindx->topsep->data.ptrvalue;
6177 if (bsp == NULL || (! ISA_aa (bsp->mol))) return TRUE;
6178 special_case = TRUE;
6179 bsp = FindAppropriateBioseq (sfp->product, exindx->lastbsp, &small_gen_set);
6180 if (bsp == NULL) return TRUE;
6181 } else {
6182 if (bsp == NULL) return TRUE;
6183 usingLocalBsp = TRUE;
6184 }
6185 }
6186
6187 /* assume subsequent features will be on this bioseq */
6188
6189 exindx->lastbsp = bsp;
6190
6191 RecordFeatureOnBioseq (gop, bsp, sfp, exindx, usingLocalBsp, special_case, small_gen_set, FALSE);
6192
6193 /* for small genome set, index mixed-chromosome features on other chromosomes as misc_features for visibility */
6194
6195 if (sfp->data.choice != SEQFEAT_GENE) return TRUE;
6196
6197 if (small_gen_set) {
6198 slp = SeqLocFindNext (sfp->location, NULL);
6199 while (slp != NULL) {
6200 sip = SeqLocId (slp);
6201 if (sip != NULL) {
6202 lbsp = BioseqFindCore (sip);
6203 if (lbsp != NULL) {
6204 if (lbsp != bsp) {
6205 ValNodeAddPointerEx (&head, &tail, 0, (Pointer) lbsp);
6206 }
6207 }
6208 }
6209 slp = SeqLocFindNext (sfp->location, slp);
6210 }
6211 if (head != NULL) {
6212 head = ValNodeSort (head, SortByPtrvalue);
6213 head = UniquePtrValNode (head);
6214
6215 for (vnp = head; vnp != NULL; vnp = vnp->next) {
6216 bsp = (BioseqPtr) vnp->data.ptrvalue;
6217 if (bsp == NULL) continue;
6218
6219 /*
6220 !!! need to add flag so that these features are only fetched by flatfile generator
6221 and with a distinct flag so that they show up as something like misc_feature instead
6222 of CDS !!!
6223 */
6224
6225 exindx->lastbsp = bsp;
6226 RecordFeatureOnBioseq (gop, bsp, sfp, exindx, usingLocalBsp, special_case, small_gen_set, TRUE);
6227 }
6228
6229 ValNodeFree (head);
6230 }
6231 }
6232
6233 return TRUE;
6234 }
6235
6236 /*****************************************************************************
6237 *
6238 * RecordSegmentsInBioseqs callback explores bioseq segments
6239 *
6240 *****************************************************************************/
6241
RecordSegmentsInBioseqs(GatherObjectPtr gop)6242 static Boolean RecordSegmentsInBioseqs (GatherObjectPtr gop)
6243
6244 {
6245 BioseqPtr bsp = NULL;
6246 BioseqExtraPtr bspextra;
6247 Char buf [128];
6248 Dbtag db;
6249 DeltaSeqPtr dsp;
6250 ExtraIndexPtr exindx;
6251 Int4 from;
6252 Boolean isSeg = FALSE;
6253 ObjectId oi;
6254 ObjMgrDataPtr omdp;
6255 SMSeqIdxPtr segpartptr;
6256 SeqId si;
6257 SeqIdPtr sid;
6258 SeqInt sint;
6259 SeqIntPtr sipp;
6260 SeqLoc sl;
6261 SeqLitPtr slitp;
6262 SeqLocPtr slp = NULL;
6263 Uint1 strand;
6264 Int4 to;
6265
6266 exindx = (ExtraIndexPtr) gop->userdata;
6267 if (exindx == NULL) return FALSE;
6268
6269 switch (gop->itemtype) {
6270 case OBJ_BIOSEQ :
6271 bsp = (BioseqPtr) gop->dataptr;
6272 if (bsp == NULL) return TRUE;
6273 break;
6274 case OBJ_BIOSEQ_SEG :
6275 isSeg = TRUE;
6276 slp = (SeqLocPtr) gop->dataptr;
6277 if (slp == NULL) return TRUE;
6278 break;
6279 case OBJ_BIOSEQ_DELTA :
6280 dsp = (DeltaSeqPtr) gop->dataptr;
6281 if (dsp == NULL) return TRUE;
6282 if (dsp->choice == 1) {
6283 slp = (SeqLocPtr) dsp->data.ptrvalue;
6284 } else if (dsp->choice == 2) {
6285 slitp = (SeqLitPtr) dsp->data.ptrvalue;
6286 if (slitp != NULL) {
6287 /* fake seqloc, same as in DeltaSeqsToSeqLocs */
6288 MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
6289 MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
6290 MemSet ((Pointer) &si, 0, sizeof (SeqId));
6291 MemSet ((Pointer) &db, 0, sizeof (Dbtag));
6292 MemSet ((Pointer) &oi, 0, sizeof (ObjectId));
6293 sl.choice = SEQLOC_INT;
6294 sl.data.ptrvalue = (Pointer) &sint;
6295 sint.from = 0;
6296 sint.to = slitp->length - 1;
6297 si.choice = SEQID_GENERAL;
6298 si.data.ptrvalue = (Pointer) &db;
6299 db.db = "SeqLit";
6300 db.tag = &oi;
6301 (exindx->seqlitid)++;
6302 oi.id = exindx->seqlitid;
6303 sint.id = &si;
6304 slp = &sl;
6305 }
6306 }
6307 break;
6308 default :
6309 return TRUE;
6310 }
6311
6312 if (bsp != NULL) {
6313 if (bsp->repr == Seq_repr_seg) {
6314 exindx->lastbsp = bsp;
6315 } else if (bsp->repr == Seq_repr_delta) {
6316 exindx->lastbsp = bsp;
6317 } else if (bsp->repr == Seq_repr_ref) {
6318 exindx->lastbsp = bsp;
6319 } else {
6320 exindx->lastbsp = NULL;
6321 }
6322 exindx->cumulative = 0;
6323 return TRUE;
6324 }
6325
6326 if (slp == NULL) return TRUE;
6327
6328 bsp = exindx->lastbsp;
6329 if (bsp == NULL) return TRUE;
6330
6331 omdp = SeqMgrGetOmdpForBioseq (bsp);
6332 if (omdp == NULL) return TRUE;
6333
6334 bspextra = (BioseqExtraPtr) omdp->extradata;
6335 if (bspextra == NULL) {
6336 CreateBioseqExtraBlock (omdp, bsp);
6337 bspextra = (BioseqExtraPtr) omdp->extradata;
6338 }
6339 if (bspextra == NULL) return TRUE;
6340
6341 if (slp->choice == SEQLOC_INT && slp->data.ptrvalue != NULL) {
6342 sipp = (SeqIntPtr) (slp->data.ptrvalue);
6343 from = sipp->from;
6344 to = sipp->to;
6345 strand = sipp->strand;
6346 } else {
6347 from = 0;
6348 to = SeqLocLen (slp) - 1;
6349 strand = SeqLocStrand (slp);
6350 }
6351
6352 if (to - from + 1 < 1) return TRUE;
6353
6354 /* create and fill in SMSeqIdx element */
6355
6356 segpartptr = MemNew (sizeof (SMSeqIdx));
6357 if (segpartptr != NULL) {
6358 sid = SeqLocId (slp);
6359 if (MakeReversedSeqIdString (sid, buf, sizeof (buf) - 1)) {
6360 segpartptr->slp = AsnIoMemCopy (slp,
6361 (AsnReadFunc) SeqLocAsnRead,
6362 (AsnWriteFunc) SeqLocAsnWrite);
6363 segpartptr->seqIdOfPart = StringSave (buf);
6364 if (isSeg) {
6365
6366 /* only annotate parentBioseq for segmented, not delta bioseq */
6367
6368 segpartptr->parentBioseq = bsp;
6369 } else {
6370 segpartptr->parentBioseq = NULL;
6371 }
6372 segpartptr->cumOffset = exindx->cumulative;
6373 segpartptr->from = from;
6374 segpartptr->to = to;
6375 segpartptr->strand = strand;
6376 segpartptr->itemID = gop->itemID;
6377 }
6378 }
6379
6380 exindx->cumulative += (to - from + 1);
6381
6382 /* link into segparthead list of parts IDs */
6383
6384 if (bspextra->segparthead == NULL) {
6385 bspextra->segparthead = segpartptr;
6386 exindx->segpartail = segpartptr;
6387 } else if (exindx->segpartail != NULL) {
6388 exindx->segpartail->next = segpartptr;
6389 exindx->segpartail = segpartptr;
6390 }
6391
6392 return TRUE;
6393 }
6394
6395 /*****************************************************************************
6396 *
6397 * SortFeatItemListByID callback sorts array into feature item table by itemID
6398 * SortFeatItemListBySfp sorts by feature pointer
6399 * SortFeatItemListByPos sorts by feature position
6400 * SortFeatItemListByRev sorts by reverse feature position
6401 *
6402 *****************************************************************************/
6403
SortFeatItemListByID(VoidPtr vp1,VoidPtr vp2)6404 static int LIBCALLBACK SortFeatItemListByID (VoidPtr vp1, VoidPtr vp2)
6405
6406 {
6407 SMFeatItemPtr PNTR spp1 = vp1;
6408 SMFeatItemPtr PNTR spp2 = vp2;
6409 SMFeatItemPtr sp1;
6410 SMFeatItemPtr sp2;
6411
6412 if (spp1 == NULL || spp2 == NULL) return 0;
6413 sp1 = *((SMFeatItemPtr PNTR) spp1);
6414 sp2 = *((SMFeatItemPtr PNTR) spp2);
6415 if (sp1 == NULL || sp2 == NULL) return 0;
6416
6417 /* sort by feature itemID */
6418
6419 if (sp1->itemID > sp2->itemID) {
6420 return 1;
6421 } else if (sp1->itemID < sp2->itemID) {
6422 return -1;
6423
6424 /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
6425
6426 } else if (sp1->ignore) {
6427 return 1;
6428 } else if (sp2->ignore) {
6429 return -1;
6430 }
6431
6432 return 0;
6433 }
6434
SortFeatItemListBySfp(VoidPtr vp1,VoidPtr vp2)6435 static int LIBCALLBACK SortFeatItemListBySfp (VoidPtr vp1, VoidPtr vp2)
6436
6437 {
6438 SMFeatItemPtr PNTR spp1 = vp1;
6439 SMFeatItemPtr PNTR spp2 = vp2;
6440 SMFeatItemPtr sp1;
6441 SMFeatItemPtr sp2;
6442
6443 if (spp1 == NULL || spp2 == NULL) return 0;
6444 sp1 = *((SMFeatItemPtr PNTR) spp1);
6445 sp2 = *((SMFeatItemPtr PNTR) spp2);
6446 if (sp1 == NULL || sp2 == NULL) return 0;
6447
6448 /* sort by SeqFeatPtr value */
6449
6450 if (sp1->sfp > sp2->sfp) {
6451 return 1;
6452 } else if (sp1->sfp < sp2->sfp) {
6453 return -1;
6454
6455 /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
6456
6457 } else if (sp1->ignore) {
6458 return 1;
6459 } else if (sp2->ignore) {
6460 return -1;
6461 }
6462
6463 return 0;
6464 }
6465
SortFeatItemListByLabel(VoidPtr vp1,VoidPtr vp2)6466 static int LIBCALLBACK SortFeatItemListByLabel (VoidPtr vp1, VoidPtr vp2)
6467
6468 {
6469 int compare;
6470 SMFeatItemPtr PNTR spp1 = vp1;
6471 SMFeatItemPtr PNTR spp2 = vp2;
6472 SMFeatItemPtr sp1;
6473 SMFeatItemPtr sp2;
6474
6475 if (spp1 == NULL || spp2 == NULL) return 0;
6476 sp1 = *((SMFeatItemPtr PNTR) spp1);
6477 sp2 = *((SMFeatItemPtr PNTR) spp2);
6478 if (sp1 == NULL || sp2 == NULL) return 0;
6479
6480 /* sort by label value */
6481
6482 compare = StringICmp (sp1->label, sp2->label);
6483 if (compare > 0) {
6484 return 1;
6485 } else if (compare < 0) {
6486 return -1;
6487 }
6488
6489 /* If they're case-insensitive the same, but case-sensitive different,
6490 then fall back to sort by case-sensitive
6491 (e.g. AJ344068.1 has genes korA and KorA ) */
6492 compare = StringCmp (sp1->label, sp2->label);
6493 if( compare > 0 ) {
6494 return 1;
6495 } else if( compare < 0 ) {
6496 return -1;
6497 }
6498
6499 /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
6500
6501 if (sp1->ignore) {
6502 return 1;
6503 } else if (sp2->ignore) {
6504 return -1;
6505 }
6506
6507 return 0;
6508 }
6509
SortFeatItemListByLocusTag(VoidPtr vp1,VoidPtr vp2)6510 static int LIBCALLBACK SortFeatItemListByLocusTag (VoidPtr vp1, VoidPtr vp2)
6511
6512 {
6513 int compare;
6514 GeneRefPtr grp1;
6515 GeneRefPtr grp2;
6516 SeqFeatPtr sfp1;
6517 SeqFeatPtr sfp2;
6518 SMFeatItemPtr PNTR spp1 = vp1;
6519 SMFeatItemPtr PNTR spp2 = vp2;
6520 SMFeatItemPtr sp1;
6521 SMFeatItemPtr sp2;
6522
6523 if (spp1 == NULL || spp2 == NULL) return 0;
6524 sp1 = *((SMFeatItemPtr PNTR) spp1);
6525 sp2 = *((SMFeatItemPtr PNTR) spp2);
6526 if (sp1 == NULL || sp2 == NULL) return 0;
6527
6528 sfp1 = sp1->sfp;
6529 sfp2 = sp2->sfp;
6530 if (sfp1 == NULL || sfp2 == NULL) return 0;
6531
6532 if (sfp1->data.choice != SEQFEAT_GENE || sfp2->data.choice != SEQFEAT_GENE) return 0;
6533 grp1 = (GeneRefPtr) sfp1->data.value.ptrvalue;
6534 grp2 = (GeneRefPtr) sfp2->data.value.ptrvalue;
6535 if (grp1 == NULL || grp2 == NULL) return 0;
6536
6537 /* sort by locus_tag */
6538
6539 compare = StringICmp (grp1->locus_tag, grp2->locus_tag);
6540 if (compare > 0) {
6541 return 1;
6542 } else if (compare < 0) {
6543 return -1;
6544 }
6545
6546 /* sort by locus if locus_tag is identical */
6547
6548 compare = StringICmp (grp1->locus, grp2->locus);
6549 if (compare > 0) {
6550 return 1;
6551 } else if (compare < 0) {
6552 return -1;
6553 }
6554
6555 /* for duplicated genes that cross origin, put ignored item last for binary search */
6556
6557 if (sp1->ignore) {
6558 return 1;
6559 } else if (sp2->ignore) {
6560 return -1;
6561 }
6562
6563 return 0;
6564 }
6565
SortFeatItemListByPos(VoidPtr vp1,VoidPtr vp2)6566 static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
6567
6568 {
6569 Int2 compare;
6570 CdRegionPtr crp1;
6571 CdRegionPtr crp2;
6572 Int2 i;
6573 Char id1 [128];
6574 Char id2 [128];
6575 Int2 j;
6576 Int2 numivals;
6577 SeqAnnotPtr sap1;
6578 SeqAnnotPtr sap2;
6579 SeqIdPtr sip1;
6580 SeqIdPtr sip2;
6581 SMFeatItemPtr PNTR spp1 = vp1;
6582 SMFeatItemPtr PNTR spp2 = vp2;
6583 SMFeatItemPtr sp1;
6584 SMFeatItemPtr sp2;
6585 SeqFeatPtr sfp1;
6586 SeqFeatPtr sfp2;
6587 SeqLocPtr slp1;
6588 SeqLocPtr slp2;
6589 Uint1 subtype1;
6590 Uint1 subtype2;
6591
6592 if (spp1 == NULL || spp2 == NULL) return 0;
6593 sp1 = *((SMFeatItemPtr PNTR) spp1);
6594 sp2 = *((SMFeatItemPtr PNTR) spp2);
6595 if (sp1 == NULL || sp2 == NULL) return 0;
6596
6597 /* feature with smallest left extreme is first */
6598
6599 if (sp1->left > sp2->left) {
6600 return 1;
6601 } else if (sp1->left < sp2->left) {
6602 return -1;
6603
6604 /* reversing order so that longest feature is first */
6605
6606 } else if (sp1->right > sp2->right) {
6607 return -1; /* was 1 */
6608 } else if (sp1->right < sp2->right) {
6609 return 1; /* was -1 */
6610 }
6611
6612 /* given identical extremes, put operon features first */
6613
6614 if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) {
6615 return -1;
6616 } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) {
6617 return 1;
6618 }
6619
6620 /* then gene features */
6621
6622 if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
6623 return -1;
6624 } else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) {
6625 return 1;
6626 }
6627
6628 /* then rna features */
6629
6630 subtype1 = FindFeatFromFeatDefType (sp1->subtype);
6631 subtype2 = FindFeatFromFeatDefType (sp2->subtype);
6632
6633 if (subtype1 == SEQFEAT_RNA && subtype2 != SEQFEAT_RNA) {
6634 return -1;
6635 } else if (subtype2 == SEQFEAT_RNA && subtype1 != SEQFEAT_RNA) {
6636 return 1;
6637 }
6638
6639 /* precursor RNA before non-coding RNA */
6640
6641 if (sp1->subtype == FEATDEF_preRNA && sp2->subtype == FEATDEF_ncRNA) {
6642 return -1;
6643 } else if (sp2->subtype == FEATDEF_preRNA && sp1->subtype == FEATDEF_ncRNA) {
6644 return 1;
6645 }
6646
6647 /* then cds features */
6648
6649 if (sp1->subtype == FEATDEF_CDS && sp2->subtype != FEATDEF_CDS) {
6650 return -1;
6651 } else if (sp2->subtype == FEATDEF_CDS && sp1->subtype != FEATDEF_CDS) {
6652 return 1;
6653 }
6654
6655 /* next compare internal intervals */
6656
6657 numivals = MIN (sp1->numivals, sp2->numivals);
6658 if (numivals > 0 && sp1->ivals != NULL && sp2->ivals != NULL) {
6659 for (i = 0, j = 0; i < numivals; i++) {
6660
6661 /* check biological start position */
6662
6663 if (sp1->ivals [j] > sp2->ivals [j]) {
6664 return 1;
6665 } else if (sp1->ivals [j] < sp2->ivals [j]) {
6666 return -1;
6667 }
6668 j++;
6669
6670 /* check biological stop position */
6671
6672 if (sp1->ivals [j] > sp2->ivals [j]) {
6673 return -1; /* was 1 */
6674 } else if (sp1->ivals [j] < sp2->ivals [j]) {
6675 return 1; /* was -1 */
6676 }
6677 j++;
6678 }
6679 }
6680
6681 /* one with fewer intervals goes first */
6682
6683 if (sp1->numivals > sp2->numivals) {
6684 return 1;
6685 } else if (sp1->numivals < sp2->numivals) {
6686 return -1;
6687 }
6688
6689 /* next compare other feature subtypes */
6690
6691 if (sp1->subtype < sp2->subtype) {
6692 return -1;
6693 } else if (sp1->subtype > sp2->subtype) {
6694 return 1;
6695 }
6696
6697 /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */
6698
6699 if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) {
6700 if (sp1->itemID > sp2->itemID) {
6701 return 1;
6702 } else if (sp1->itemID < sp2->itemID) {
6703 return -1;
6704 }
6705 }
6706
6707 /* if identical cds ranges, compare codon_start */
6708
6709 if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) {
6710 sfp1 = sp1->sfp;
6711 sfp2 = sp2->sfp;
6712 if (sfp1 != NULL && sfp2 != NULL) {
6713
6714 crp1 = (CdRegionPtr) sfp1->data.value.ptrvalue;
6715 crp2 = (CdRegionPtr) sfp2->data.value.ptrvalue;
6716 if (crp1 != NULL && crp2 != NULL) {
6717 if (crp1->frame > 1 || crp2->frame > 1) {
6718 if (crp1->frame < crp2->frame) {
6719 return -1;
6720 } else if (crp1->frame < crp2->frame) {
6721 return 1;
6722 }
6723 }
6724 }
6725 }
6726 }
6727
6728 /* then compare cds or mRNA product identifiers */
6729
6730 sfp1 = sp1->sfp;
6731 sfp2 = sp2->sfp;
6732 if (sfp1 != NULL && sfp2 != NULL) {
6733 slp1 = (SeqLocPtr) sfp1->product;
6734 slp2 = (SeqLocPtr) sfp2->product;
6735 if (slp1 != NULL && slp2 == NULL) {
6736 return 1;
6737 } else if (slp1 == NULL && slp2 != NULL) {
6738 return -1;
6739 } else if (slp1 != NULL && slp2 != NULL) {
6740 sip1 = SeqLocId (slp1);
6741 sip2 = SeqLocId (slp2);
6742 if (sip1 != NULL && sip2 == NULL) {
6743 return 1;
6744 } else if (sip1 == NULL && sip2 != NULL) {
6745 return -1;
6746 } else if (sip1 != NULL && sip2 != NULL) {
6747 SeqIdWrite (sip1, id1, PRINTID_FASTA_LONG, sizeof (id1) - 1);
6748 SeqIdWrite (sip2, id2, PRINTID_FASTA_LONG, sizeof (id2) - 1);
6749 compare = StringCmp (id1, id2);
6750 if (compare > 0) {
6751 return 1;
6752 } else if (compare < 0) {
6753 return -1;
6754 }
6755 }
6756 }
6757 }
6758
6759 /* then compare feature label */
6760
6761 compare = StringCmp (sp1->label, sp2->label);
6762 if (compare > 0) {
6763 return 1;
6764 } else if (compare < 0) {
6765 return -1;
6766 }
6767
6768 /* compare parent seq-annot by itemID (was sap pointer value) */
6769
6770 sap1 = sp1->sap;
6771 sap2 = sp2->sap;
6772 if (sap1 != NULL && sap2 != NULL) {
6773 if (sap1->idx.itemID > sap2->idx.itemID) {
6774 return 1;
6775 } else if (sap1->idx.itemID < sap2->idx.itemID) {
6776 return -1;
6777 }
6778 }
6779
6780 /* last comparison to make it absolutely deterministic */
6781
6782 if (sp1->itemID > sp2->itemID) {
6783 return 1;
6784 } else if (sp1->itemID < sp2->itemID) {
6785 return -1;
6786 }
6787
6788 return 0;
6789 }
6790
SortFeatItemListByRev(VoidPtr vp1,VoidPtr vp2)6791 static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2)
6792
6793 {
6794 Int2 compare;
6795 CdRegionPtr crp1;
6796 CdRegionPtr crp2;
6797 Int2 i;
6798 Int2 j;
6799 Int2 k;
6800 Int2 numivals;
6801 SeqAnnotPtr sap1;
6802 SeqAnnotPtr sap2;
6803 SMFeatItemPtr PNTR spp1 = vp1;
6804 SMFeatItemPtr PNTR spp2 = vp2;
6805 SMFeatItemPtr sp1;
6806 SMFeatItemPtr sp2;
6807 SeqFeatPtr sfp1;
6808 SeqFeatPtr sfp2;
6809 Uint1 subtype1;
6810 Uint1 subtype2;
6811
6812 if (spp1 == NULL || spp2 == NULL) return 0;
6813 sp1 = *((SMFeatItemPtr PNTR) spp1);
6814 sp2 = *((SMFeatItemPtr PNTR) spp2);
6815 if (sp1 == NULL || sp2 == NULL) return 0;
6816
6817 /* feature with largest right extreme is first */
6818
6819 if (sp1->right < sp2->right) {
6820 return 1;
6821 } else if (sp1->right > sp2->right) {
6822 return -1;
6823
6824 /* reversing order so that longest feature is first */
6825
6826 } else if (sp1->left < sp2->left) {
6827 return -1;
6828 } else if (sp1->left > sp2->left) {
6829 return 1;
6830 }
6831
6832 /* given identical extremes, put operon features first */
6833
6834 if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) {
6835 return -1;
6836 } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) {
6837 return 1;
6838 }
6839
6840 /* then gene features */
6841
6842 if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
6843 return -1;
6844 } else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) {
6845 return 1;
6846 }
6847
6848 /* then rna features */
6849
6850 subtype1 = FindFeatFromFeatDefType (sp1->subtype);
6851 subtype2 = FindFeatFromFeatDefType (sp2->subtype);
6852
6853 if (subtype1 == SEQFEAT_RNA && subtype2 != SEQFEAT_RNA) {
6854 return -1;
6855 } else if (subtype2 == SEQFEAT_RNA && subtype1 != SEQFEAT_RNA) {
6856 return 1;
6857 }
6858
6859 /* precursor RNA before non-coding RNA */
6860
6861 if (sp1->subtype == FEATDEF_preRNA && sp2->subtype == FEATDEF_ncRNA) {
6862 return -1;
6863 } else if (sp2->subtype == FEATDEF_preRNA && sp1->subtype == FEATDEF_ncRNA) {
6864 return 1;
6865 }
6866
6867 /* then cds features */
6868
6869 if (sp1->subtype == FEATDEF_CDS && sp2->subtype != FEATDEF_CDS) {
6870 return -1;
6871 } else if (sp2->subtype == FEATDEF_CDS && sp1->subtype != FEATDEF_CDS) {
6872 return 1;
6873 }
6874
6875 /* next compare internal intervals */
6876
6877 numivals = MIN (sp1->numivals, sp2->numivals);
6878 if (numivals > 0 && sp1->ivals != NULL && sp2->ivals != NULL) {
6879 for (i = 0, j = sp1->numivals * 2, k = sp2->numivals * 2; i < numivals; i++) {
6880
6881 /* check biological stop position */
6882
6883 k--;
6884 j--;
6885 if (sp1->ivals [j] < sp2->ivals [k]) {
6886 return 1;
6887 } else if (sp1->ivals [j] > sp2->ivals [k]) {
6888 return -1;
6889 }
6890
6891 /* check biological start position */
6892
6893 k--;
6894 j--;
6895 if (sp1->ivals [j] < sp2->ivals [k]) {
6896 return -1;
6897 } else if (sp1->ivals [j] > sp2->ivals [k]) {
6898 return 1;
6899 }
6900 }
6901 }
6902
6903 /* one with fewer intervals goes first */
6904
6905 if (sp1->numivals > sp2->numivals) {
6906 return 1;
6907 } else if (sp1->numivals < sp2->numivals) {
6908 return -1;
6909 }
6910
6911 /* next compare other feature subtypes */
6912
6913 if (sp1->subtype < sp2->subtype) {
6914 return -1;
6915 } else if (sp1->subtype > sp2->subtype) {
6916 return 1;
6917 }
6918
6919 /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */
6920
6921 if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) {
6922 if (sp1->itemID > sp2->itemID) {
6923 return 1;
6924 } else if (sp1->itemID < sp2->itemID) {
6925 return -1;
6926 }
6927 }
6928
6929 /* if identical cds ranges, compare codon_start */
6930
6931 if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) {
6932 sfp1 = sp1->sfp;
6933 sfp2 = sp2->sfp;
6934 if (sfp1 != NULL && sfp2 != NULL) {
6935 crp1 = (CdRegionPtr) sfp1->data.value.ptrvalue;
6936 crp2 = (CdRegionPtr) sfp2->data.value.ptrvalue;
6937 if (crp1 != NULL && crp2 != NULL) {
6938 if (crp1->frame > 1 || crp2->frame > 1) {
6939 if (crp1->frame < crp2->frame) {
6940 return -1;
6941 } else if (crp1->frame < crp2->frame) {
6942 return 1;
6943 }
6944 }
6945 }
6946 }
6947 }
6948
6949 /* then compare feature label */
6950
6951 compare = StringCmp (sp1->label, sp2->label);
6952 if (compare > 0) {
6953 return 1;
6954 } else if (compare < 0) {
6955 return -1;
6956 }
6957
6958 /* compare parent seq-annot by itemID (was sap pointer value) */
6959
6960 sap1 = sp1->sap;
6961 sap2 = sp2->sap;
6962 if (sap1 != NULL && sap2 != NULL) {
6963 if (sap1->idx.itemID > sap2->idx.itemID) {
6964 return 1;
6965 } else if (sap1->idx.itemID < sap2->idx.itemID) {
6966 return -1;
6967 }
6968 }
6969
6970 /* last comparison to make it absolutely deterministic */
6971
6972 if (sp1->itemID > sp2->itemID) {
6973 return 1;
6974 } else if (sp1->itemID < sp2->itemID) {
6975 return -1;
6976 }
6977
6978 return 0;
6979 }
6980
SortFidListByFeatID(VoidPtr vp1,VoidPtr vp2)6981 static int LIBCALLBACK SortFidListByFeatID (VoidPtr vp1, VoidPtr vp2)
6982
6983 {
6984 int compare;
6985 SMFidItemPtr PNTR spp1 = vp1;
6986 SMFidItemPtr PNTR spp2 = vp2;
6987 SMFidItemPtr sp1;
6988 SMFidItemPtr sp2;
6989
6990 if (spp1 == NULL || spp2 == NULL) return 0;
6991 sp1 = *((SMFidItemPtr PNTR) spp1);
6992 sp2 = *((SMFidItemPtr PNTR) spp2);
6993 if (sp1 == NULL || sp2 == NULL) return 0;
6994
6995 /* sort by feature itemID label value */
6996
6997 compare = StringICmp (sp1->fid, sp2->fid);
6998 if (compare > 0) {
6999 return 1;
7000 } else if (compare < 0) {
7001 return -1;
7002 }
7003
7004 return 0;
7005 }
7006
7007 /*****************************************************************************
7008 *
7009 * IndexSegmentedParts callback builds index to speed up mapping
7010 * of parts to segmented bioseqs
7011 *
7012 *****************************************************************************/
7013
SortSeqIdxArray(VoidPtr ptr1,VoidPtr ptr2)7014 static int LIBCALLBACK SortSeqIdxArray (VoidPtr ptr1, VoidPtr ptr2)
7015
7016 {
7017 Int2 compare;
7018 SMSeqIdxPtr PNTR partp1 = ptr1;
7019 SMSeqIdxPtr PNTR partp2 = ptr2;
7020 SMSeqIdxPtr part1, part2;
7021
7022 if (partp1 == NULL || partp2 == NULL) return 0;
7023 part1 = *((SMSeqIdxPtr PNTR) partp1);
7024 part2 = *((SMSeqIdxPtr PNTR) partp2);
7025 if (part1 == NULL || part2 == NULL) return 0;
7026 compare = StringCmp (part1->seqIdOfPart, part2->seqIdOfPart);
7027 if (compare > 0) {
7028 return 1;
7029 } else if (compare < 0) {
7030 return -1;
7031 }
7032 if (part1->cumOffset > part2->cumOffset) {
7033 return 1;
7034 } else if (part1->cumOffset < part2->cumOffset) {
7035 return -1;
7036 }
7037 return 0;
7038 }
7039
WithinPartsSet(BioseqPtr bsp)7040 static Boolean WithinPartsSet (BioseqPtr bsp)
7041
7042 {
7043 BioseqSetPtr bssp;
7044
7045 if (bsp == NULL) return FALSE;
7046
7047 if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) {
7048 bssp = (BioseqSetPtr) bsp->idx.parentptr;
7049 while (bssp != NULL) {
7050 if (bssp->_class == BioseqseqSet_class_parts) return TRUE;
7051 if (bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE;
7052 bssp = bssp->idx.parentptr;
7053 }
7054 }
7055
7056 return FALSE;
7057 }
7058
IndexSegmentedParts(SeqEntryPtr sep,BioseqPtr PNTR lastsegbsp)7059 static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp)
7060
7061 {
7062 BioseqPtr bsp;
7063 BioseqExtraPtr bspextra;
7064 BioseqSetPtr bssp;
7065 Int4 i;
7066 Int4 numsegs = 0;
7067 ObjMgrDataPtr omdp;
7068 SMSeqIdxPtr PNTR partsByLoc;
7069 SMSeqIdxPtr PNTR partsBySeqId;
7070 SMSeqIdxPtr segpartptr;
7071
7072 if (sep == NULL) return;
7073 if (IS_Bioseq_set (sep)) {
7074 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7075 if (bssp == NULL) return;
7076 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7077 IndexSegmentedParts (sep, lastsegbsp);
7078 }
7079 if (bssp->_class == BioseqseqSet_class_segset && lastsegbsp != NULL) {
7080 *lastsegbsp = NULL;
7081 }
7082 return;
7083 }
7084
7085 if (! IS_Bioseq (sep)) return;
7086 bsp = (BioseqPtr) sep->data.ptrvalue;
7087 if (bsp == NULL) return;
7088
7089 /* check for raw part packaged with segmented bioseq */
7090
7091 if ((bsp->repr == Seq_repr_raw || /* IsNonGappedLiteral (bsp) */ bsp->repr == Seq_repr_delta) &&
7092 lastsegbsp != NULL && *lastsegbsp != NULL && WithinPartsSet (bsp)) {
7093 omdp = SeqMgrGetOmdpForBioseq (bsp);
7094 if (omdp == NULL) return;
7095
7096 bspextra = (BioseqExtraPtr) omdp->extradata;
7097 if (bspextra == NULL) {
7098 CreateBioseqExtraBlock (omdp, bsp);
7099 bspextra = (BioseqExtraPtr) omdp->extradata;
7100 }
7101 if (bspextra == NULL) return;
7102
7103 /* now record segmented parent of raw part if all are packaged together */
7104
7105 bspextra->parentBioseq = *lastsegbsp;
7106 return;
7107 }
7108
7109 if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) return;
7110
7111 omdp = SeqMgrGetOmdpForBioseq (bsp);
7112 if (omdp == NULL) return;
7113
7114 bspextra = (BioseqExtraPtr) omdp->extradata;
7115 if (bspextra == NULL) {
7116 CreateBioseqExtraBlock (omdp, bsp);
7117 bspextra = (BioseqExtraPtr) omdp->extradata;
7118 }
7119 if (bspextra == NULL) return;
7120
7121 if (lastsegbsp != NULL && bsp->repr == Seq_repr_seg) {
7122 *lastsegbsp = bsp;
7123 }
7124
7125 for (segpartptr = bspextra->segparthead;
7126 segpartptr != NULL;
7127 segpartptr = segpartptr->next) {
7128 numsegs++;
7129 }
7130
7131 bspextra->numsegs = numsegs;
7132 segpartptr = bspextra->segparthead;
7133 if (numsegs < 1 || segpartptr == NULL) return;
7134
7135 partsByLoc = (SMSeqIdxPtr PNTR) MemNew (sizeof (SMSeqIdxPtr) * (numsegs + 1));
7136 bspextra->partsByLoc = partsByLoc;
7137
7138 if (partsByLoc != NULL) {
7139 i = 0;
7140 while (i < numsegs && segpartptr != NULL) {
7141 partsByLoc [i] = segpartptr;
7142 segpartptr = segpartptr->next;
7143 i++;
7144 }
7145
7146 partsBySeqId = (SMSeqIdxPtr PNTR) MemNew (sizeof (SMSeqIdxPtr) * (numsegs + 1));
7147 bspextra->partsBySeqId = partsBySeqId;
7148
7149 if (partsBySeqId != NULL) {
7150 for (i = 0; i < numsegs; i++) {
7151 partsBySeqId [i] = partsByLoc [i];
7152 }
7153
7154 /* sort array by SeqId for binary search */
7155
7156 StableMergeSort ((Pointer) partsBySeqId, numsegs, sizeof (SMSeqIdxPtr), SortSeqIdxArray);
7157 }
7158
7159 }
7160 }
7161
7162 /*****************************************************************************
7163 *
7164 * IndexRecordedFeatures callback builds sorted arrays of features and genes
7165 *
7166 *****************************************************************************/
7167
IndexRecordedFeatures(SeqEntryPtr sep,Boolean dorevfeats,Uint4 baseItemID)7168 static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats, Uint4 baseItemID)
7169
7170 {
7171 BioseqPtr bsp;
7172 BioseqExtraPtr bspextra;
7173 BioseqSetPtr bssp;
7174 SeqFeatPtr cds;
7175 SMFeatBlockPtr curr;
7176 SeqLocPtr dnaloc;
7177 SMFeatItemPtr PNTR featsByID;
7178 SMFeatItemPtr PNTR featsBySfp;
7179 SMFeatItemPtr PNTR featsByPos;
7180 SMFeatItemPtr PNTR featsByRev;
7181 SMFeatItemPtr PNTR featsByLabel;
7182 SMFeatItemPtr PNTR genesByLocusTag;
7183 SMFeatItemPtr PNTR genesByPos;
7184 Int4 i;
7185 Int4 j;
7186 SMFeatItemPtr item;
7187 SMFeatItemPtr last;
7188 BioseqPtr nuc;
7189 Int4 numfeats;
7190 Int4 numgenes;
7191 ObjMgrDataPtr omdp;
7192 Int4 pt;
7193 SeqLocPtr segloc;
7194 SeqFeatPtr sfp;
7195 SeqLocPtr slp;
7196 Int4 stop;
7197
7198 if (sep == NULL) return;
7199 if (IS_Bioseq_set (sep)) {
7200 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7201 if (bssp == NULL) return;
7202 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7203 IndexRecordedFeatures (sep, dorevfeats, baseItemID);
7204 }
7205 return;
7206 }
7207
7208 if (! IS_Bioseq (sep)) return;
7209 bsp = (BioseqPtr) sep->data.ptrvalue;
7210 if (bsp == NULL) return;
7211
7212 omdp = SeqMgrGetOmdpForBioseq (bsp);
7213 if (omdp == NULL) return;
7214 bspextra = (BioseqExtraPtr) omdp->extradata;
7215 if (bspextra == NULL) return;
7216
7217 numfeats = bspextra->numfeats;
7218
7219 curr = bspextra->featlisthead;
7220
7221 if (numfeats > 0 && curr != NULL) {
7222
7223 /* build array of pointers into feature items */
7224
7225 featsByID = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7226 bspextra->featsByID = featsByID;
7227
7228 if (featsByID != NULL) {
7229 i = 0;
7230 j = 0;
7231 while (i < numfeats && curr != NULL) {
7232 if (j >= curr->index || j >= bspextra->blocksize) {
7233 j = 0;
7234 curr = curr->next;
7235 }
7236 if (curr != NULL && j < curr->index && curr->data != NULL) {
7237 featsByID [i] = &(curr->data [j]);
7238 i++;
7239 j++;
7240 }
7241 }
7242 if (i < numfeats) {
7243 ErrPostEx (SEV_WARNING, 0, 0, "SeqMgr indexing feature table build problem");
7244 }
7245
7246 featsBySfp = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7247 bspextra->featsBySfp = featsBySfp;
7248
7249 if (featsBySfp != NULL) {
7250 for (i = 0; i < numfeats; i++) {
7251 featsBySfp [i] = featsByID [i];
7252 }
7253
7254 /* sort all features by SeqFeatPtr value */
7255
7256 StableMergeSort ((VoidPtr) featsBySfp, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListBySfp);
7257 }
7258
7259 featsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7260 bspextra->featsByPos = featsByPos;
7261
7262 if (featsByPos != NULL) {
7263 for (i = 0; i < numfeats; i++) {
7264 featsByPos [i] = featsByID [i];
7265 }
7266
7267 /* sort all features by feature location on bioseq */
7268
7269 StableMergeSort ((VoidPtr) featsByPos, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByPos);
7270
7271 for (i = 0; i < numfeats; i++) {
7272 item = featsByPos [i];
7273 if (item != NULL) {
7274 item->index = i;
7275 }
7276 }
7277
7278 /* gap feature in record overrides flatfile-generated feature */
7279
7280 if (baseItemID > 0) {
7281 last = featsByPos [0];
7282 for (i = 1; i < numfeats; i++) {
7283 item = featsByPos [i];
7284 if (item != NULL && last != NULL) {
7285 if (last->subtype == FEATDEF_gap && item->subtype == FEATDEF_gap) {
7286 if (last->left == item->left && last->right == item->right) {
7287 if (item->itemID >= baseItemID) {
7288 item->ignore = TRUE;
7289 }
7290 }
7291 }
7292 }
7293 last = item;
7294 }
7295 }
7296
7297 /* build arrays of sorted gene, mRNA, CDS, publication, and biosource features for lookup by overlap */
7298
7299 bspextra->genesByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numgenes), 0, FEATDEF_GENE);
7300 bspextra->mRNAsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->nummRNAs), 0, FEATDEF_mRNA);
7301 bspextra->CDSsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numCDSs), 0, FEATDEF_CDS);
7302 bspextra->pubsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numpubs), 0, FEATDEF_PUB);
7303 bspextra->orgsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numorgs), 0, FEATDEF_BIOSRC);
7304 bspextra->operonsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numoperons), 0, FEATDEF_operon);
7305 }
7306
7307 if (dorevfeats) {
7308 featsByRev = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7309 bspextra->featsByRev = featsByRev;
7310
7311 if (featsByRev != NULL) {
7312 for (i = 0; i < numfeats; i++) {
7313 featsByRev [i] = featsByID [i];
7314 }
7315
7316 /* optionally sort all features by feature reverse location on bioseq */
7317
7318 StableMergeSort ((VoidPtr) featsByRev, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByRev);
7319 }
7320 }
7321
7322 featsByLabel = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7323 bspextra->featsByLabel = featsByLabel;
7324
7325 if (featsByLabel != NULL) {
7326 for (i = 0; i < numfeats; i++) {
7327 featsByLabel [i] = featsByID [i];
7328 }
7329
7330 /* sort all features by label value */
7331
7332 StableMergeSort ((VoidPtr) featsByLabel, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByLabel);
7333 }
7334
7335 genesByPos = bspextra->genesByPos;
7336 numgenes = bspextra->numgenes;
7337 if (genesByPos != NULL && numgenes > 0) {
7338
7339 genesByLocusTag = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numgenes + 1));
7340 bspextra->genesByLocusTag = genesByLocusTag;
7341
7342 if (genesByLocusTag != NULL) {
7343 for (i = 0; i < numgenes; i++) {
7344 genesByLocusTag [i] = genesByPos [i];
7345 }
7346
7347 /* sort by locus_tag value */
7348
7349 StableMergeSort ((VoidPtr) genesByLocusTag, (size_t) numgenes, sizeof (SMFeatItemPtr), SortFeatItemListByLocusTag);
7350 }
7351 }
7352 }
7353 }
7354
7355 if (numfeats < 1 || (! ISA_aa (bsp->mol))) return;
7356 cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
7357 if (cds == NULL) return;
7358 nuc = BioseqFindFromSeqLoc (cds->location);
7359 if (nuc == NULL) return;
7360
7361 featsByPos = bspextra->featsByPos;
7362 if (featsByPos != NULL) {
7363 for (i = 0; i < numfeats; i++) {
7364 item = featsByPos [i];
7365 if (item != NULL) {
7366 sfp = item->sfp;
7367 if (sfp != NULL) {
7368
7369 /* map to dna (on parts if segmented) */
7370
7371 dnaloc = aaFeatLoc_to_dnaFeatLoc (cds, sfp->location);
7372 if (dnaloc != NULL) {
7373
7374 /* map to segmented bioseq coordinates if necessary */
7375
7376 segloc = SeqLocMergeExEx (nuc, dnaloc, NULL, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE);
7377
7378 SeqLocFree (dnaloc);
7379 if (segloc != NULL) {
7380
7381 slp = NULL;
7382 stop = -1;
7383
7384 /* now find where last point maps on nucleotide for flatfile */
7385
7386 while ((slp = SeqLocFindNext (segloc, slp)) != NULL) {
7387 pt = SeqLocStop (slp);
7388 if (pt != -1) {
7389 stop = pt;
7390 }
7391 }
7392 item->dnaStop = stop;
7393
7394 SeqLocFree (segloc);
7395 }
7396 }
7397 }
7398 }
7399 }
7400 }
7401 }
7402
7403 /*****************************************************************************
7404 *
7405 * IndexFeaturesOnEntity makes feature pointers across all Bioseqs in entity
7406 *
7407 *****************************************************************************/
7408
IndexFeaturesOnEntity(SeqEntryPtr sep,SMFeatItemPtr PNTR featsByID,Int4Ptr countP)7409 static void IndexFeaturesOnEntity (SeqEntryPtr sep, SMFeatItemPtr PNTR featsByID, Int4Ptr countP)
7410
7411 {
7412 BioseqPtr bsp;
7413 BioseqExtraPtr bspextra;
7414 BioseqSetPtr bssp;
7415 Int4 count;
7416 Int4 i;
7417 Int4 numfeats;
7418 ObjMgrDataPtr omdp;
7419
7420 if (sep == NULL || featsByID == NULL || countP == NULL) return;
7421 if (IS_Bioseq_set (sep)) {
7422 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7423 if (bssp == NULL) return;
7424 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7425 IndexFeaturesOnEntity (sep, featsByID, countP);
7426 }
7427 return;
7428 }
7429
7430 if (! IS_Bioseq (sep)) return;
7431 bsp = (BioseqPtr) sep->data.ptrvalue;
7432 if (bsp == NULL) return;
7433
7434 omdp = SeqMgrGetOmdpForBioseq (bsp);
7435 if (omdp == NULL) return;
7436 bspextra = (BioseqExtraPtr) omdp->extradata;
7437 if (bspextra == NULL) return;
7438
7439 numfeats = bspextra->numfeats;
7440 if (bspextra->featsByID != NULL && numfeats > 0) {
7441 count = *countP;
7442
7443 for (i = 0; i < numfeats; i++, count++) {
7444 featsByID [count] = bspextra->featsByID [i];
7445 }
7446
7447 *countP = count;
7448 }
7449 }
7450
7451 /*****************************************************************************
7452 *
7453 * SortDescItemListByID callback sorts by descriptor itemID
7454 * SortDescItemListBySdp sorts by descriptor pointer
7455 * SortDescItemListByIndex sorts by descriptor index
7456 *
7457 *****************************************************************************/
7458
SortDescItemListByID(VoidPtr vp1,VoidPtr vp2)7459 static int LIBCALLBACK SortDescItemListByID (VoidPtr vp1, VoidPtr vp2)
7460
7461 {
7462 SMDescItemPtr PNTR spp1 = vp1;
7463 SMDescItemPtr PNTR spp2 = vp2;
7464 SMDescItemPtr sp1;
7465 SMDescItemPtr sp2;
7466
7467 if (spp1 == NULL || spp2 == NULL) return 0;
7468 sp1 = *((SMDescItemPtr PNTR) spp1);
7469 sp2 = *((SMDescItemPtr PNTR) spp2);
7470 if (sp1 == NULL || sp2 == NULL) return 0;
7471
7472 /* sort by descriptor itemID */
7473
7474 if (sp1->itemID > sp2->itemID) {
7475 return 1;
7476 } else if (sp1->itemID < sp2->itemID) {
7477 return -1;
7478 }
7479
7480 return 0;
7481 }
7482
SortDescItemListBySdp(VoidPtr vp1,VoidPtr vp2)7483 static int LIBCALLBACK SortDescItemListBySdp (VoidPtr vp1, VoidPtr vp2)
7484
7485 {
7486 SMDescItemPtr PNTR spp1 = vp1;
7487 SMDescItemPtr PNTR spp2 = vp2;
7488 SMDescItemPtr sp1;
7489 SMDescItemPtr sp2;
7490
7491 if (spp1 == NULL || spp2 == NULL) return 0;
7492 sp1 = *((SMDescItemPtr PNTR) spp1);
7493 sp2 = *((SMDescItemPtr PNTR) spp2);
7494 if (sp1 == NULL || sp2 == NULL) return 0;
7495
7496 /* sort by SeqDescrPtr value */
7497
7498 if (sp1->sdp > sp2->sdp) {
7499 return 1;
7500 } else if (sp1->sdp < sp2->sdp) {
7501 return -1;
7502 }
7503
7504 return 0;
7505 }
7506
SortDescItemListByIndex(VoidPtr vp1,VoidPtr vp2)7507 static int LIBCALLBACK SortDescItemListByIndex (VoidPtr vp1, VoidPtr vp2)
7508
7509 {
7510 SMDescItemPtr PNTR spp1 = vp1;
7511 SMDescItemPtr PNTR spp2 = vp2;
7512 SMDescItemPtr sp1;
7513 SMDescItemPtr sp2;
7514
7515 if (spp1 == NULL || spp2 == NULL) return 0;
7516 sp1 = *((SMDescItemPtr PNTR) spp1);
7517 sp2 = *((SMDescItemPtr PNTR) spp2);
7518 if (sp1 == NULL || sp2 == NULL) return 0;
7519
7520 /* sort by descriptor index */
7521
7522 if (sp1->index > sp2->index) {
7523 return 1;
7524 } else if (sp1->index < sp2->index) {
7525 return -1;
7526 }
7527
7528 return 0;
7529 }
7530
7531 /*****************************************************************************
7532 *
7533 * RecordDescriptorsInBioseqs callback records list of relevant descriptors
7534 *
7535 *****************************************************************************/
7536
RecordDescriptorsInBioseqs(BioseqPtr bsp,Pointer userdata)7537 static void RecordDescriptorsInBioseqs (BioseqPtr bsp, Pointer userdata)
7538
7539 {
7540 BioseqExtraPtr bspextra;
7541 SeqMgrDescContext context;
7542 ValNodePtr head = NULL;
7543 ValNodePtr last = NULL;
7544 Int4 numdescs = 0;
7545 ObjMgrDataPtr omdp;
7546 SMDescItemPtr sdip;
7547 SeqDescrPtr sdp;
7548 ValNodePtr vnp;
7549
7550 if (bsp == NULL) return;
7551
7552 omdp = SeqMgrGetOmdpForBioseq (bsp);
7553 if (omdp == NULL) return;
7554 bspextra = (BioseqExtraPtr) omdp->extradata;
7555 if (bspextra == NULL) return;
7556
7557 sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &context);
7558 while (sdp != NULL) {
7559
7560 numdescs++;
7561 sdip = (SMDescItemPtr) MemNew (sizeof (SMDescItem));
7562 if (sdip != NULL) {
7563 vnp = ValNodeNew (last);
7564 if (head == NULL) {
7565 head = vnp;
7566 }
7567 last = vnp;
7568 if (vnp != NULL) {
7569 vnp->data.ptrvalue = (Pointer) sdip;
7570 }
7571 sdip->sdp = sdp;
7572 sdip->sep = context.sep;
7573 sdip->itemID = context.itemID;
7574 sdip->index = context.index;
7575 sdip->level = context.level;
7576 sdip->seqdesctype = context.seqdesctype;
7577 }
7578
7579 sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &context);
7580 }
7581
7582 bspextra->desclisthead = head;
7583 bspextra->numdescs = numdescs;
7584 }
7585
7586 /*****************************************************************************
7587 *
7588 * RecordDescriptorsOnTopSet callback records list of all descriptors
7589 *
7590 *****************************************************************************/
7591
7592 typedef struct descindex {
7593 ValNodePtr deschead;
7594 ValNodePtr lastdesc;
7595 Int4 numdescs;
7596 } DescIndex, PNTR DescIndexPtr;
7597
RecordAllDescsCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)7598 static void RecordAllDescsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
7599
7600 {
7601 BioseqPtr bsp;
7602 BioseqSetPtr bssp;
7603 DescIndexPtr dxp;
7604 ObjValNodePtr ovp;
7605 SMDescItemPtr sdip;
7606 SeqDescrPtr sdp = NULL;
7607 ValNodePtr vnp;
7608
7609 if (sep == NULL || mydata == NULL) return;
7610 dxp = (DescIndexPtr) mydata;
7611
7612 if (IS_Bioseq (sep)) {
7613 bsp = (BioseqPtr) sep->data.ptrvalue;
7614 if (bsp == NULL) return;
7615 sdp = bsp->descr;
7616 } else if (IS_Bioseq_set (sep)) {
7617 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7618 if (bssp == NULL) return;
7619 sdp = bssp->descr;
7620 } else return;
7621
7622 while (sdp != NULL) {
7623 (dxp->numdescs)++;
7624 sdip = (SMDescItemPtr) MemNew (sizeof (SMDescItem));
7625 if (sdip != NULL) {
7626 vnp = ValNodeNew (dxp->lastdesc);
7627 if (dxp->deschead == NULL) {
7628 dxp->deschead = vnp;
7629 }
7630 dxp->lastdesc = vnp;
7631 if (vnp != NULL) {
7632 vnp->data.ptrvalue = (Pointer) sdip;
7633 }
7634 sdip->sdp = sdp;
7635 sdip->sep = sep;
7636 if (sdp->extended != 0) {
7637 ovp = (ObjValNodePtr) sdp;
7638 sdip->itemID = ovp->idx.itemID;
7639 }
7640 sdip->index = 0;
7641 sdip->level = indent;
7642 sdip->seqdesctype = sdp->choice;
7643 }
7644 sdp = sdp->next;
7645 }
7646 }
7647
RecordDescriptorsOnTopSet(SeqEntryPtr sep)7648 static void RecordDescriptorsOnTopSet (SeqEntryPtr sep)
7649
7650 {
7651 BioseqExtraPtr bspextra;
7652 BioseqSetPtr bssp;
7653 DescIndex dx;
7654 ObjMgrDataPtr omdp;
7655
7656 if (sep == NULL) return;
7657 if (! IS_Bioseq_set (sep)) return;
7658
7659 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7660 if (bssp == NULL) return;
7661
7662 omdp = SeqMgrGetOmdpForPointer (bssp);
7663 if (omdp == NULL) return;
7664 bspextra = (BioseqExtraPtr) omdp->extradata;
7665 if (bspextra == NULL) {
7666 CreateBioseqExtraBlock (omdp, NULL);
7667 bspextra = (BioseqExtraPtr) omdp->extradata;
7668 }
7669 if (bspextra == NULL) return;
7670
7671 dx.deschead = NULL;
7672 dx.lastdesc = NULL;
7673 dx.numdescs = 0;
7674
7675 SeqEntryExplore (sep, (Pointer) &dx, RecordAllDescsCallback);
7676
7677 bspextra->desclisthead = dx.deschead;
7678 bspextra->numdescs = dx.numdescs;
7679 }
7680
7681 /*****************************************************************************
7682 *
7683 * IndexRecordedDescriptors callback builds sorted arrays of descriptors
7684 *
7685 *****************************************************************************/
7686
IndexRecordedDescriptors(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)7687 static void IndexRecordedDescriptors (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
7688
7689 {
7690 BioseqPtr bsp;
7691 BioseqExtraPtr bspextra;
7692 BioseqSetPtr bssp;
7693 SMDescItemPtr PNTR descrsByID;
7694 SMDescItemPtr PNTR descrsBySdp;
7695 SMDescItemPtr PNTR descrsByIndex;
7696 ValNodePtr head;
7697 Int4 i;
7698 Int4 numdescs;
7699 ObjMgrDataPtr omdp = NULL;
7700 SMDescItemPtr sdip;
7701 ValNodePtr vnp;
7702
7703 if (sep == NULL) return;
7704 if (IS_Bioseq (sep)) {
7705 bsp = (BioseqPtr) sep->data.ptrvalue;
7706 if (bsp == NULL) return;
7707 omdp = SeqMgrGetOmdpForBioseq (bsp);
7708 } else if (IS_Bioseq_set (sep)) {
7709 bssp = (BioseqSetPtr) sep->data.ptrvalue;
7710 if (bssp == NULL) return;
7711 omdp = SeqMgrGetOmdpForPointer (bssp);
7712 } else return;
7713
7714 if (omdp == NULL) return;
7715 bspextra = (BioseqExtraPtr) omdp->extradata;
7716 if (bspextra == NULL) return;
7717
7718 head = bspextra->desclisthead;
7719 numdescs = bspextra->numdescs;
7720
7721 if (head != NULL && numdescs > 0) {
7722
7723 /* build array of pointers into descriptor items */
7724
7725 descrsByID = (SMDescItemPtr PNTR) MemNew (sizeof (SMDescItemPtr) * (numdescs + 1));
7726 bspextra->descrsByID = descrsByID;
7727
7728 descrsBySdp = (SMDescItemPtr PNTR) MemNew (sizeof (SMDescItemPtr) * (numdescs + 1));
7729 bspextra->descrsBySdp = descrsBySdp;
7730
7731 descrsByIndex = (SMDescItemPtr PNTR) MemNew (sizeof (SMDescItemPtr) * (numdescs + 1));
7732 bspextra->descrsByIndex = descrsByIndex;
7733
7734 if (descrsByID != NULL && descrsBySdp != NULL && descrsByIndex != NULL) {
7735 for (i = 0, vnp = head; i < numdescs && vnp != NULL; i++, vnp = vnp->next) {
7736 sdip = (SMDescItemPtr) vnp->data.ptrvalue;
7737 if (sdip != NULL) {
7738 descrsByID [i] = sdip;
7739 descrsBySdp [i] = sdip;
7740 descrsByIndex [i] = sdip;
7741 }
7742 }
7743
7744 /* sort all descriptors by itemID, SeqDescrPtr value, or index */
7745
7746 StableMergeSort ((VoidPtr) descrsByID, (size_t) numdescs, sizeof (SMDescItemPtr), SortDescItemListByID);
7747 StableMergeSort ((VoidPtr) descrsBySdp, (size_t) numdescs, sizeof (SMDescItemPtr), SortDescItemListBySdp);
7748 StableMergeSort ((VoidPtr) descrsByIndex, (size_t) numdescs, sizeof (SMDescItemPtr), SortDescItemListByIndex);
7749 }
7750 }
7751 }
7752
7753 /*****************************************************************************
7754 *
7755 * DoSegmentedProtein needed because SeqIdWithinBioseq may fail for segmented proteins
7756 *
7757 *****************************************************************************/
7758
DoSegmentedProtein(BioseqPtr bsp,Pointer userdata)7759 static void DoSegmentedProtein (BioseqPtr bsp, Pointer userdata)
7760
7761 {
7762 BioseqExtraPtr bspextra;
7763 SeqMgrFeatContext context;
7764 ObjMgrDataPtr omdp;
7765 BioseqPtr parent = NULL;
7766 SeqFeatPtr sfp;
7767
7768 if (! ISA_aa (bsp->mol)) return;
7769
7770 if (bsp->repr != Seq_repr_seg) {
7771 parent = SeqMgrGetParentOfPart (bsp, NULL);
7772 if (parent == NULL) return;
7773 }
7774
7775 omdp = SeqMgrGetOmdpForBioseq (bsp);
7776 if (omdp == NULL) return;
7777
7778 bspextra = (BioseqExtraPtr) omdp->extradata;
7779 if (bspextra == NULL) return;
7780
7781 /* if it already has a best protein feature, return */
7782
7783 if (bspextra->protFeat != NULL) return;
7784
7785 /* part of parent inherits best protein from parent */
7786
7787 if (bsp->repr != Seq_repr_seg && parent != NULL) {
7788 sfp = SeqMgrGetBestProteinFeature (parent, NULL);
7789 bspextra->protFeat = sfp;
7790 return;
7791 }
7792
7793 /* now check for full-length proteins on segmented parent */
7794
7795 sfp = SeqMgrGetNextFeatureByLabel (bsp, NULL, SEQFEAT_PROT, 0, &context);
7796 while (sfp != NULL) {
7797 if (context.left == 0 && context.right == bsp->length - 1) {
7798 bspextra->protFeat = sfp;
7799 }
7800
7801 sfp = SeqMgrGetNextFeatureByLabel (bsp, sfp, SEQFEAT_PROT, 0, &context);
7802 }
7803 }
7804
7805 /*****************************************************************************
7806 *
7807 * IndexAnnotDescsOnBioseqs
7808 *
7809 *****************************************************************************/
7810
SortAbpVnpByBsp(VoidPtr ptr1,VoidPtr ptr2)7811 static int LIBCALLBACK SortAbpVnpByBsp (VoidPtr ptr1, VoidPtr ptr2)
7812
7813 {
7814 AdpBspPtr abp1, abp2;
7815 AnnotDescPtr adp1, adp2;
7816 BioseqPtr bsp1, bsp2;
7817 ObjValNodePtr ovp1, ovp2;
7818 ValNodePtr vnp1, vnp2;
7819
7820 if (ptr1 == NULL || ptr2 == NULL) return 0;
7821 vnp1 = *((ValNodePtr PNTR) ptr1);
7822 vnp2 = *((ValNodePtr PNTR) ptr2);
7823 if (vnp1 == NULL || vnp2 == NULL) return 0;
7824 abp1 = (AdpBspPtr) vnp1->data.ptrvalue;
7825 abp2 = (AdpBspPtr) vnp2->data.ptrvalue;
7826 if (abp1 == NULL || abp2 == NULL) return 0;
7827 bsp1 = (BioseqPtr) abp1->bsp;
7828 bsp2 = (BioseqPtr) abp2->bsp;
7829 if (bsp1 > bsp2) {
7830 return 1;
7831 } else if (bsp1 < bsp2) {
7832 return -1;
7833 }
7834 adp1 = (AnnotDescPtr) abp1->adp;
7835 adp2 = (AnnotDescPtr) abp2->adp;
7836 if (adp1 == NULL || adp2 == NULL) return 0;
7837 if (adp1->extended != 0 && adp2->extended != 0) {
7838 ovp1 = (ObjValNodePtr) adp1;
7839 ovp2 = (ObjValNodePtr) adp2;
7840 if (ovp1->idx.itemID > ovp2->idx.itemID) {
7841 return 1;
7842 } else if (ovp1->idx.itemID < ovp2->idx.itemID) {
7843 return -1;
7844 }
7845 }
7846 return 0;
7847 }
7848
GetBspFromVnpAbpBsp(ValNodePtr vnp)7849 static BioseqPtr GetBspFromVnpAbpBsp (
7850 ValNodePtr vnp
7851 )
7852
7853 {
7854 AdpBspPtr abp;
7855
7856 if (vnp == NULL) return NULL;
7857 abp = (AdpBspPtr) vnp->data.ptrvalue;
7858 if (abp == NULL) return NULL;
7859 return abp->bsp;
7860 }
7861
IndexAnnotDescsOnBioseqs(ValNodePtr adphead)7862 static void IndexAnnotDescsOnBioseqs (
7863 ValNodePtr adphead
7864 )
7865
7866 {
7867 AdpBspPtr abp;
7868 Int4 adpcount, count;
7869 AnnotDescPtr PNTR annotDescByID;
7870 BioseqPtr bsp;
7871 BioseqExtraPtr bspextra;
7872 ValNodePtr nxt, top, vnp;
7873 ObjMgrDataPtr omdp;
7874
7875 if (adphead == NULL) return;
7876 top = adphead;
7877
7878 while (top != NULL) {
7879 bsp = GetBspFromVnpAbpBsp (top);
7880 adpcount = 1;
7881 nxt = top->next;
7882 while (nxt != NULL && GetBspFromVnpAbpBsp (nxt) == bsp) {
7883 adpcount++;
7884 nxt = nxt->next;
7885 }
7886
7887 if (bsp != NULL) {
7888 omdp = SeqMgrGetOmdpForBioseq (bsp);
7889 if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
7890 CreateBioseqExtraBlock (omdp, NULL);
7891 bspextra = (BioseqExtraPtr) omdp->extradata;
7892 if (bspextra != NULL) {
7893
7894 annotDescByID = (AnnotDescPtr PNTR) MemNew (sizeof (AnnotDescPtr) * (adpcount + 1));
7895 if (annotDescByID != NULL) {
7896
7897 for (vnp = top, count = 0; vnp != NULL && count < adpcount; vnp = vnp->next, count++) {
7898 abp = (AdpBspPtr) vnp->data.ptrvalue;
7899 if (abp == NULL) continue;
7900 annotDescByID [count] = abp->adp;
7901 }
7902
7903 bspextra->annotDescByID = annotDescByID;
7904 bspextra->numannotdesc = adpcount;
7905 }
7906 }
7907 }
7908 }
7909
7910 top = nxt;
7911 }
7912 }
7913
IndexFeatIDsOnEntity(BioseqExtraPtr bspextra)7914 static void IndexFeatIDsOnEntity (
7915 BioseqExtraPtr bspextra
7916 )
7917
7918 {
7919 Char buf [32];
7920 SMFidItemPtr PNTR featsByFeatID;
7921 SMFeatItemPtr PNTR featsByID;
7922 ValNodePtr head = NULL, last = NULL;
7923 SMFeatItemPtr item;
7924 Int4 j;
7925 Int4 len;
7926 ObjectIdPtr oip;
7927 SMFidItemPtr sfip;
7928 SeqFeatPtr sfp;
7929 ValNodePtr vnp;
7930
7931 if (bspextra == NULL || bspextra->numfeats < 1 || bspextra->featsByID == NULL) return;
7932
7933 featsByID = bspextra->featsByID;
7934 for (j = 0; j < bspextra->numfeats; j++) {
7935 item = featsByID [j];
7936 if (item == NULL) continue;
7937 if (item->ignore) continue;
7938 sfp = item->sfp;
7939 if (sfp == NULL) continue;
7940 if (sfp->id.choice != 3) continue;
7941 oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
7942 if (oip == NULL) continue;
7943 sfip = (SMFidItemPtr) MemNew (sizeof (SMFidItem));
7944 if (sfip == NULL) continue;
7945 if (StringDoesHaveText (oip->str)) {
7946 sfip->fid = StringSave (oip->str);
7947 } else {
7948 sprintf (buf, "%ld", (long) oip->id);
7949 sfip->fid = StringSave (buf);
7950 }
7951 sfip->feat = item;
7952 vnp = ValNodeAddPointer (&last, 0, (Pointer) sfip);
7953 if (head == NULL) {
7954 head = vnp;
7955 }
7956 last = vnp;
7957 }
7958
7959 len = ValNodeLen (head);
7960 if (len < 1) return;
7961 featsByFeatID = (SMFidItemPtr PNTR) MemNew (sizeof (SMFidItemPtr) * (len + 1));
7962 if (featsByFeatID != NULL) {
7963 for (vnp = head, j = 0; vnp != NULL; vnp = vnp->next, j++) {
7964 sfip = (SMFidItemPtr) vnp->data.ptrvalue;
7965 if (sfip == NULL) continue;
7966 featsByFeatID [j] = sfip;
7967 }
7968
7969 /* sort all features on entity-wide list by itemID */
7970
7971 StableMergeSort ((VoidPtr) featsByFeatID, (size_t) len, sizeof (SMFidItemPtr), SortFidListByFeatID);
7972
7973 bspextra->featsByFeatID = featsByFeatID;
7974 bspextra->numfids = len;
7975 }
7976 ValNodeFree (head);
7977 }
7978
7979 /*****************************************************************************
7980 *
7981 * SeqMgrReindexBioseqExtraData refreshes internal indices for rapid retrieval
7982 *
7983 *****************************************************************************/
7984
s_DoSeqMgrIndexFeatures(Uint2 entityID,Pointer ptr,Boolean flip,Boolean dorevfeats,ValNodePtr extra)7985 static Uint2 LIBCALL s_DoSeqMgrIndexFeatures (
7986 Uint2 entityID,
7987 Pointer ptr,
7988 Boolean flip,
7989 Boolean dorevfeats,
7990 ValNodePtr extra
7991 )
7992
7993 {
7994 AdpBspPtr abp;
7995 AnnotDescPtr PNTR annotDescByID;
7996 Uint4 baseItemID = 0;
7997 BioseqPtr bsp;
7998 BioseqExtraPtr bspextra;
7999 Int4 count;
8000 ExtraIndex exind;
8001 SMFeatItemPtr PNTR featsByID;
8002 BioseqPtr lastsegbsp = NULL;
8003 Boolean objMgrFilter [OBJ_MAX];
8004 SeqEntryPtr oldscope;
8005 ObjMgrDataPtr omdp;
8006 ValNodePtr publist;
8007 SeqAnnotPtr sap;
8008 SeqEntryPtr sep;
8009 SeqFeatPtr sfp;
8010 ValNodePtr vnp;
8011
8012 if (entityID == 0) {
8013 entityID = ObjMgrGetEntityIDForPointer (ptr);
8014 }
8015 if (entityID == 0) return 0;
8016
8017 /* reset any existing index data on all bioseqs in entity */
8018
8019 SeqMgrClearFeatureIndexes (entityID, NULL);
8020
8021 /* want to scope to bioseqs within the entity, to allow for colliding IDs */
8022
8023 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
8024
8025 /* make top SeqEntry if only Bioseq or BioseqSet was read */
8026
8027 if (sep == NULL) {
8028 omdp = ObjMgrGetData (entityID);
8029 if (omdp != NULL) {
8030 if (omdp->datatype == OBJ_BIOSEQ || omdp->datatype == OBJ_BIOSEQSET) {
8031 sep = SeqEntryNew ();
8032 if (sep != NULL) {
8033 if (omdp->datatype == OBJ_BIOSEQ) {
8034 sep->choice = 1;
8035 sep->data.ptrvalue = omdp->dataptr;
8036 SeqMgrSeqEntry (SM_BIOSEQ, omdp->dataptr, sep);
8037 } else {
8038 sep->choice = 2;
8039 sep->data.ptrvalue = omdp->dataptr;
8040 SeqMgrSeqEntry (SM_BIOSEQSET, omdp->dataptr, sep);
8041 }
8042 }
8043 sep = GetTopSeqEntryForEntityID (entityID);
8044 }
8045 }
8046 }
8047
8048 if (sep == NULL) return 0;
8049
8050 /* clean up many old-style ASN.1 problems without changing structure */
8051
8052 BasicSeqEntryCleanup (sep);
8053
8054 /* do the same cleanup to remotely fetched feature tables */
8055
8056 for (vnp = extra; vnp != NULL; vnp = vnp->next) {
8057 bsp = (BioseqPtr) vnp->data.ptrvalue;
8058 if (bsp == NULL) continue;
8059 for (sap = bsp->annot; sap != NULL; sap = sap->next) {
8060 if (sap->type != 1) continue;
8061 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
8062 publist = NULL;
8063 CleanUpSeqFeat (sfp, FALSE, FALSE, TRUE, TRUE, &publist);
8064 ValNodeFreeData (publist);
8065 }
8066 }
8067 }
8068
8069 /* set gather/objmgr fields now present in several objects */
8070
8071 AssignIDsInEntityEx (entityID, 0, NULL, extra);
8072
8073 /* get first feature itemID in remote feature tables (including generated gaps) */
8074
8075 for (vnp = extra; vnp != NULL && baseItemID == 0; vnp = vnp->next) {
8076 bsp = (BioseqPtr) vnp->data.ptrvalue;
8077 if (bsp == NULL) continue;
8078 for (sap = bsp->annot; sap != NULL && baseItemID == 0; sap = sap->next) {
8079 if (sap->type != 1) continue;
8080 for (sfp = (SeqFeatPtr) sap->data; sfp != NULL && baseItemID == 0; sfp = sfp->next) {
8081 baseItemID = sfp->idx.itemID;
8082 }
8083 }
8084 }
8085
8086 /* set scope for FindAppropriateBioseq, FindFirstLocalBioseq */
8087
8088 oldscope = SeqEntrySetScope (sep);
8089
8090 /* gather all segmented locations */
8091
8092 exind.topsep = sep;
8093 exind.lastbsp = NULL;
8094 exind.lastsap = NULL;
8095 exind.lastbssp = NULL;
8096 exind.alignhead = NULL;
8097 exind.lastalign = NULL;
8098 exind.adphead = NULL;
8099 exind.lastadp = NULL;
8100 exind.segpartail = NULL;
8101 exind.bspcount = 0;
8102 exind.aligncount = 0;
8103 exind.descrcount = 0;
8104 exind.featcount = 0;
8105 exind.adpcount = 0;
8106 exind.seqlitid = 0;
8107 exind.flip = flip;
8108
8109 MemSet ((Pointer) objMgrFilter, 0, sizeof (objMgrFilter));
8110 objMgrFilter [OBJ_BIOSEQ] = TRUE;
8111 /* objMgrFilter [OBJ_BIOSEQSET] = TRUE not needed */
8112 objMgrFilter [OBJ_BIOSEQ_SEG] = TRUE;
8113 objMgrFilter [OBJ_BIOSEQ_DELTA] = TRUE;
8114 GatherObjectsInEntity (entityID, 0, NULL, RecordSegmentsInBioseqs, (Pointer) &exind, objMgrFilter);
8115
8116 /* build indexes to speed mapping of parts to segmented bioseq */
8117
8118 lastsegbsp = NULL;
8119
8120 IndexSegmentedParts (sep, &lastsegbsp);
8121
8122 /* now gather to get descriptor itemID counts on each bioseq or bioseq set,
8123 and record features on the bioseq indicated by the feature location */
8124
8125 exind.topsep = sep;
8126 exind.lastbsp = NULL;
8127 exind.lastsap = NULL;
8128 exind.lastbssp = NULL;
8129 exind.alignhead = NULL;
8130 exind.lastalign = NULL;
8131 exind.adphead = NULL;
8132 exind.lastadp = NULL;
8133 exind.segpartail = NULL;
8134 exind.bspcount = 0;
8135 exind.aligncount = 0;
8136 exind.descrcount = 0;
8137 exind.featcount = 0;
8138 exind.adpcount = 0;
8139 exind.seqlitid = 0;
8140 exind.flip = flip;
8141
8142 MemSet ((Pointer) objMgrFilter, 0, sizeof (objMgrFilter));
8143 objMgrFilter [OBJ_BIOSEQ] = TRUE;
8144 objMgrFilter [OBJ_BIOSEQSET] = TRUE;
8145 objMgrFilter [OBJ_SEQANNOT] = TRUE;
8146 objMgrFilter [OBJ_ANNOTDESC] = TRUE;
8147 objMgrFilter [OBJ_SEQFEAT] = TRUE;
8148 objMgrFilter [OBJ_SEQALIGN] = TRUE;
8149 GatherObjectsInEntityEx (entityID, 0, NULL, RecordFeaturesInBioseqs, (Pointer) &exind, objMgrFilter, extra);
8150
8151 /* finish building array of sorted features on each indexed bioseq */
8152
8153 IndexRecordedFeatures (sep, dorevfeats, baseItemID);
8154
8155 /* set best protein feature for segmented protein bioseqs and their parts */
8156
8157 VisitBioseqsInSep (sep, NULL, DoSegmentedProtein);
8158
8159 /* resetset scope used to limit FindAppropriateBioseq, FindFirstLocalBioseq */
8160
8161 SeqEntrySetScope (oldscope);
8162
8163 /* stamp top of entity with time of indexing */
8164
8165 omdp = ObjMgrGetData (entityID);
8166 if (omdp != NULL) {
8167 omdp->indexed = GetSecs ();
8168
8169 /* alignment ID to SeqAlignPtr index always goes on top of entity */
8170
8171 SeqMgrIndexAlignments (entityID);
8172
8173 /* master indexes if top of entity is not a Bioseq */
8174
8175 if (omdp->datatype != OBJ_BIOSEQ) {
8176
8177 CreateBioseqExtraBlock (omdp, NULL);
8178 bspextra = (BioseqExtraPtr) omdp->extradata;
8179 if (bspextra != NULL) {
8180
8181 /* make master index of features by itemID at top of entity */
8182
8183 if (exind.featcount > 0) {
8184 featsByID = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (exind.featcount + 1));
8185 if (featsByID != NULL) {
8186 count = 0;
8187 IndexFeaturesOnEntity (sep, featsByID, &count);
8188
8189 /* sort all features on entity-wide list by itemID */
8190
8191 StableMergeSort ((VoidPtr) featsByID, (size_t) count, sizeof (SMFeatItemPtr), SortFeatItemListByID);
8192
8193 bspextra->featsByID = featsByID;
8194 bspextra->numfeats = count;
8195 }
8196 }
8197
8198 /* make master index of annot descs by itemID at top of entity */
8199
8200 if (exind.adpcount > 0) {
8201 annotDescByID = (AnnotDescPtr PNTR) MemNew (sizeof (AnnotDescPtr) * (exind.adpcount + 1));
8202 if (annotDescByID != NULL) {
8203 for (vnp = exind.adphead, count = 0; vnp != NULL && count < (Int4) exind.adpcount; vnp = vnp->next, count++) {
8204 abp = (AdpBspPtr) vnp->data.ptrvalue;
8205 if (abp == NULL) continue;
8206 annotDescByID [count] = abp->adp;
8207 }
8208
8209 bspextra->annotDescByID = annotDescByID;
8210 bspextra->numannotdesc = exind.adpcount;
8211 }
8212 }
8213 }
8214 }
8215
8216 /* add feature ID indexto top of entity */
8217
8218 CreateBioseqExtraBlock (omdp, NULL);
8219 bspextra = (BioseqExtraPtr) omdp->extradata;
8220 if (bspextra != NULL) {
8221 IndexFeatIDsOnEntity (bspextra);
8222 }
8223 }
8224
8225 /* finish indexing list of descriptors on each indexed bioseq */
8226
8227 VisitBioseqsInSep (sep, NULL, RecordDescriptorsInBioseqs);
8228
8229 /* index annot descs on each target bioseq */
8230
8231 if (exind.adphead != NULL) {
8232 exind.adphead = ValNodeSort (exind.adphead, SortAbpVnpByBsp);
8233 IndexAnnotDescsOnBioseqs (exind.adphead);
8234 }
8235
8236 if (IS_Bioseq_set (sep)) {
8237 RecordDescriptorsOnTopSet (sep);
8238 }
8239
8240 SeqEntryExplore (sep, NULL, IndexRecordedDescriptors);
8241
8242 /* free chain of SeqAlignPtr now that index is built */
8243
8244 ValNodeFree (exind.alignhead);
8245
8246 /* free chain of AdpBspPtr (AnnotDescPtr and BioseqPtr) now that index is built */
8247
8248 ValNodeFreeData (exind.adphead);
8249
8250 return entityID;
8251 }
8252
8253 static TNlmMutex smp_feat_index_mutex = NULL;
8254
SeqMgrIndexFeaturesExEx(Uint2 entityID,Pointer ptr,Boolean flip,Boolean dorevfeats,ValNodePtr extra)8255 NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
8256 Uint2 entityID,
8257 Pointer ptr,
8258 Boolean flip,
8259 Boolean dorevfeats,
8260 ValNodePtr extra
8261 )
8262
8263 {
8264 Uint2 eID;
8265 Int4 ret;
8266
8267 ret = NlmMutexLockEx (&smp_feat_index_mutex);
8268 if (ret) {
8269 ErrPostEx (SEV_FATAL, 0, 0, "SeqMgrIndexFeatures mutex failed [%ld]", (long) ret);
8270 return 0;
8271 }
8272
8273 eID = s_DoSeqMgrIndexFeatures (entityID, ptr, flip, dorevfeats, extra);
8274
8275 NlmMutexUnlock (smp_feat_index_mutex);
8276
8277 return eID;
8278 }
8279
SeqMgrIndexFeaturesEx(Uint2 entityID,Pointer ptr,Boolean flip,Boolean dorevfeats)8280 NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesEx (
8281 Uint2 entityID,
8282 Pointer ptr,
8283 Boolean flip,
8284 Boolean dorevfeats
8285 )
8286
8287 {
8288 return SeqMgrIndexFeaturesExEx (entityID, ptr, flip, dorevfeats, NULL);
8289 }
8290
SeqMgrIndexFeatures(Uint2 entityID,Pointer ptr)8291 NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeatures (
8292 Uint2 entityID,
8293 Pointer ptr
8294 )
8295
8296 {
8297 return SeqMgrIndexFeaturesExEx (entityID, ptr, FALSE, FALSE, NULL);
8298 }
8299
8300 /*****************************************************************************
8301 *
8302 * SeqMgrIsBioseqIndexed checks for presence of time of indexing stamp
8303 *
8304 *****************************************************************************/
8305
SeqMgrFeaturesAreIndexed(Uint2 entityID)8306 NLM_EXTERN time_t LIBCALL SeqMgrFeaturesAreIndexed (Uint2 entityID)
8307
8308 {
8309 ObjMgrDataPtr omdp;
8310
8311 if (entityID == 0) return 0;
8312 omdp = ObjMgrGetData (entityID);
8313 if (omdp == NULL) return 0;
8314 return omdp->indexed;
8315 }
8316
8317 /*****************************************************************************
8318 *
8319 * SeqMgrGetBestProteinFeature and SeqMgrGetCDSgivenProduct take a protein
8320 * bioseq to get the best protein feature or encoding CDS
8321 * SeqMgrGetRNAgivenProduct takes an mRNA (cDNA) bioseq and gets encoding mRNA
8322 * feature on the genomic bioseq
8323 *
8324 *****************************************************************************/
8325
SeqMgrGetProtXref(SeqFeatPtr sfp)8326 NLM_EXTERN ProtRefPtr LIBCALL SeqMgrGetProtXref (SeqFeatPtr sfp)
8327
8328 {
8329 ProtRefPtr prp = NULL;
8330 SeqFeatXrefPtr xref;
8331
8332 if (sfp == NULL) return NULL;
8333 xref = sfp->xref;
8334 while (xref != NULL && xref->data.choice != SEQFEAT_PROT) {
8335 xref = xref->next;
8336 }
8337 if (xref != NULL) {
8338 prp = (ProtRefPtr) xref->data.value.ptrvalue;
8339 }
8340 return prp;
8341 }
8342
SetContextForFeature(SeqFeatPtr sfp,SeqMgrFeatContext PNTR context,ObjMgrDataPtr omdp)8343 static void SetContextForFeature (SeqFeatPtr sfp, SeqMgrFeatContext PNTR context, ObjMgrDataPtr omdp)
8344
8345 {
8346 SMFeatItemPtr best;
8347 SeqFeatPtr bst;
8348
8349 if (sfp == NULL || context == NULL || omdp == NULL) return;
8350 best = SeqMgrFindSMFeatItemPtr (sfp);
8351 if (best == NULL) return;
8352 bst = best->sfp;
8353 if (bst != NULL && bst->idx.entityID > 0) {
8354 context->entityID = bst->idx.entityID;
8355 } else {
8356 context->entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
8357 }
8358 context->itemID = best->itemID;
8359 context->sfp = bst;
8360 context->sap = best->sap;
8361 context->bsp = best->bsp;
8362 context->label = best->label;
8363 context->left = best->left;
8364 context->right = best->right;
8365 context->dnaStop = best->dnaStop;
8366 context->partialL = best->partialL;
8367 context->partialR = best->partialR;
8368 context->farloc = best->farloc;
8369 context->bad_order = best->bad_order;
8370 context->mixed_strand = best->mixed_strand;
8371 context->ts_image = best->ts_image;
8372 context->strand = best->strand;
8373 if (bst != NULL) {
8374 context->seqfeattype = bst->data.choice;
8375 } else {
8376 context->seqfeattype = FindFeatFromFeatDefType (best->subtype);
8377 }
8378 context->featdeftype = best->subtype;
8379 context->numivals = best->numivals;
8380 context->ivals = best->ivals;
8381 context->userdata = NULL;
8382 context->omdp = (Pointer) omdp;
8383 context->index = best->index + 1;
8384 }
8385
SeqMgrGetBestProteinFeature(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8386 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetBestProteinFeature (BioseqPtr bsp,
8387 SeqMgrFeatContext PNTR context)
8388
8389 {
8390 BioseqExtraPtr bspextra;
8391 ObjMgrDataPtr omdp;
8392
8393 if (context != NULL) {
8394 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8395 }
8396 omdp = SeqMgrGetOmdpForBioseq (bsp);
8397 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8398 bspextra = (BioseqExtraPtr) omdp->extradata;
8399 if (bspextra == NULL) return NULL;
8400 SetContextForFeature (bspextra->protFeat, context, omdp);
8401 return bspextra->protFeat;
8402 }
8403
SeqMgrGetCDSgivenProduct(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8404 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetCDSgivenProduct (BioseqPtr bsp,
8405 SeqMgrFeatContext PNTR context)
8406
8407 {
8408 BioseqExtraPtr bspextra;
8409 ObjMgrDataPtr omdp;
8410 SeqFeatPtr sfp;
8411
8412 if (context != NULL) {
8413 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8414 }
8415 omdp = SeqMgrGetOmdpForBioseq (bsp);
8416 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8417 bspextra = (BioseqExtraPtr) omdp->extradata;
8418 if (bspextra == NULL) return NULL;
8419 sfp = bspextra->cdsOrRnaFeat;
8420 if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return NULL;
8421 SetContextForFeature (sfp, context, omdp);
8422 return sfp;
8423 }
8424
SeqMgrGetRNAgivenProduct(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8425 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetRNAgivenProduct (BioseqPtr bsp,
8426 SeqMgrFeatContext PNTR context)
8427
8428 {
8429 BioseqExtraPtr bspextra;
8430 ObjMgrDataPtr omdp;
8431 SeqFeatPtr sfp;
8432
8433 if (context != NULL) {
8434 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8435 }
8436 omdp = SeqMgrGetOmdpForBioseq (bsp);
8437 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8438 bspextra = (BioseqExtraPtr) omdp->extradata;
8439 if (bspextra == NULL) return NULL;
8440 sfp = bspextra->cdsOrRnaFeat;
8441 if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return NULL;
8442 SetContextForFeature (sfp, context, omdp);
8443 return sfp;
8444 }
8445
SeqMgrGetPROTgivenProduct(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8446 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetPROTgivenProduct (BioseqPtr bsp,
8447 SeqMgrFeatContext PNTR context)
8448
8449 {
8450 BioseqExtraPtr bspextra;
8451 ObjMgrDataPtr omdp;
8452 SeqFeatPtr sfp;
8453
8454 if (context != NULL) {
8455 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8456 }
8457 omdp = SeqMgrGetOmdpForBioseq (bsp);
8458 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8459 bspextra = (BioseqExtraPtr) omdp->extradata;
8460 if (bspextra == NULL) return NULL;
8461 sfp = bspextra->cdsOrRnaFeat;
8462 if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return NULL;
8463 SetContextForFeature (sfp, context, omdp);
8464 return sfp;
8465 }
8466
SeqMgrGetSfpProductList(BioseqPtr bsp)8467 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetSfpProductList (BioseqPtr bsp)
8468
8469 {
8470 BioseqExtraPtr bspextra;
8471 ObjMgrDataPtr omdp;
8472
8473 omdp = SeqMgrGetOmdpForBioseq (bsp);
8474 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8475 bspextra = (BioseqExtraPtr) omdp->extradata;
8476 if (bspextra == NULL) return NULL;
8477 return bspextra->prodlisthead;
8478 }
8479
8480 /*****************************************************************************
8481 *
8482 * SeqMgrGetGeneXref, SeqMgrGeneIsSuppressed, SeqMgrGetProtXref,
8483 * SeqMgrGetOverlappingGene, and SeqMgrGetOverlappingPub
8484 *
8485 *****************************************************************************/
8486
HasNoText(CharPtr str)8487 static Boolean HasNoText (CharPtr str)
8488
8489 {
8490 Char ch;
8491
8492 if (str != NULL) {
8493 ch = *str;
8494 while (ch != '\0') {
8495 if (ch > ' ') {
8496 return FALSE;
8497 }
8498 str++;
8499 ch = *str;
8500 }
8501 }
8502 return TRUE;
8503 }
8504
SeqMgrGetGeneXref(SeqFeatPtr sfp)8505 NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref (SeqFeatPtr sfp)
8506
8507 {
8508 GeneRefPtr grp = NULL;
8509 SeqFeatXrefPtr xref;
8510
8511 if (sfp == NULL) return NULL;
8512 xref = sfp->xref;
8513 while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
8514 xref = xref->next;
8515 }
8516 if (xref != NULL) {
8517 grp = (GeneRefPtr) xref->data.value.ptrvalue;
8518 }
8519 return grp;
8520 }
8521
SeqMgrGetGeneXrefEx(SeqFeatPtr sfp,ObjectIdPtr PNTR oipP)8522 NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (SeqFeatPtr sfp, ObjectIdPtr PNTR oipP)
8523
8524 {
8525 GeneRefPtr grp = NULL;
8526 ObjectIdPtr oip;
8527 SeqFeatXrefPtr xref;
8528
8529 if (oipP != NULL) {
8530 *oipP = NULL;
8531 }
8532 if (sfp == NULL) return NULL;
8533
8534 /* Look for SeqFeatData xref, maybe with Feature ID as well */
8535 xref = sfp->xref;
8536 while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
8537 xref = xref->next;
8538 }
8539 if (xref != NULL) {
8540 grp = (GeneRefPtr) xref->data.value.ptrvalue;
8541 if (xref->id.choice == 3) {
8542 oip = (ObjectIdPtr) xref->id.value.ptrvalue;
8543 if (oip != NULL && oipP != NULL) {
8544 *oipP = oip;
8545 }
8546 }
8547 return grp;
8548 }
8549
8550 /* Look for Feature ID xref on its own */
8551 for (xref = sfp->xref; xref != NULL; xref = xref->next) {
8552 if (xref->id.choice == 3) {
8553 oip = (ObjectIdPtr) xref->id.value.ptrvalue;
8554 if (oip != NULL && oipP != NULL) {
8555 *oipP = oip;
8556 }
8557 }
8558 }
8559
8560 return NULL;
8561 }
8562
SeqMgrGeneIsSuppressed(GeneRefPtr grp)8563 NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (GeneRefPtr grp)
8564
8565 {
8566 if (grp == NULL) return FALSE;
8567 if (grp != NULL && HasNoText (grp->locus) && HasNoText (grp->allele) &&
8568 HasNoText (grp->desc) && HasNoText (grp->maploc) &&
8569 HasNoText (grp->locus_tag) && grp->db == NULL &&
8570 grp->syn == NULL) return TRUE;
8571 return FALSE;
8572 }
8573
CheckInternalExonBoundaries(Int2 numivalsA,Int4Ptr ivalsA,Int2 numivalsB,Int4Ptr ivalsB)8574 static Boolean CheckInternalExonBoundaries (Int2 numivalsA, Int4Ptr ivalsA, Int2 numivalsB, Int4Ptr ivalsB)
8575
8576 {
8577 Int2 i;
8578 Int2 j;
8579
8580 if (numivalsA > numivalsB) return FALSE;
8581 if (ivalsA == NULL || ivalsB == NULL) return TRUE;
8582
8583 /* scan first exon-intron boundary against candidate start positions */
8584
8585 for (i = 0; i <= numivalsB - numivalsA; i++) {
8586 if (ivalsA [1] == ivalsB [2 * i + 1]) break;
8587 }
8588 if (i > numivalsB - numivalsA) return FALSE;
8589
8590 /* scan subsequent exon-intron and intron-exon boundaries */
8591
8592 for (j = 2; j <= 2 * numivalsA - 2; j++) {
8593 if (ivalsA [j] != ivalsB [2 * i + j]) return FALSE;
8594 }
8595
8596 return TRUE;
8597 }
8598
StrandsMatch(Uint1 featstrand,Uint1 locstrand)8599 static Boolean StrandsMatch (Uint1 featstrand, Uint1 locstrand)
8600
8601 {
8602 if (featstrand == locstrand) return TRUE;
8603 if (locstrand == Seq_strand_unknown && featstrand != Seq_strand_minus) return TRUE;
8604 if (featstrand == Seq_strand_unknown && locstrand != Seq_strand_minus) return TRUE;
8605 if (featstrand == Seq_strand_both && locstrand != Seq_strand_minus) return TRUE;
8606 if (locstrand == Seq_strand_both) return TRUE;
8607 return FALSE;
8608 }
8609
TestForOverlap(SMFeatItemPtr feat,SeqLocPtr slp,Int4 left,Int4 right,Int2 overlapType,Int2 numivals,Int4Ptr ivals)8610 static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp,
8611 Int4 left, Int4 right, Int2 overlapType,
8612 Int2 numivals, Int4Ptr ivals)
8613
8614 {
8615 SeqLocPtr a, b;
8616 Int4 diff;
8617 SeqFeatPtr sfp;
8618
8619 if (overlapType == SIMPLE_OVERLAP) {
8620
8621 /* location must merely be overlapped by gene, etc., or either one inside the other */
8622
8623 if (feat->right >= left && feat->left <= right) {
8624 diff = ABS (left - feat->left) + ABS (feat->right - right);
8625 return diff;
8626 }
8627
8628 /*
8629 if ((feat->left <= left && feat->right > left) ||
8630 (feat->left < right && feat->right >= right)) {
8631 diff = ABS (left - feat->left) + ABS (feat->right - right);
8632 return diff;
8633 }
8634 */
8635
8636 } else if (overlapType == CONTAINED_WITHIN) {
8637
8638 /* requires location to be completely contained within gene, etc. */
8639
8640 if (feat->left <= left && feat->right >= right) {
8641 diff = (left - feat->left) + (feat->right - right);
8642 return diff;
8643 }
8644
8645 } else if (overlapType == LOCATION_SUBSET || overlapType == CHECK_INTERVALS) {
8646
8647 /* requires individual intervals to be completely contained within gene, etc. */
8648 sfp = feat->sfp;
8649 if (sfp != NULL) {
8650 diff = SeqLocAinB (slp, sfp->location);
8651 if (diff >= 0) {
8652 if (overlapType == LOCATION_SUBSET || numivals == 1 ||
8653 CheckInternalExonBoundaries (numivals, ivals, feat->numivals, feat->ivals)) {
8654 return diff;
8655 }
8656 }
8657 }
8658
8659 } else if (overlapType == INTERVAL_OVERLAP || overlapType == COMMON_INTERVAL) {
8660
8661 /* requires overlap between at least one pair of intervals (INTERVAL_OVERLAP) */
8662 /* or one complete shared interval (COMMON_INTERVAL) */
8663
8664 if (feat->right >= left && feat->left <= right) {
8665 sfp = feat->sfp;
8666 if (sfp != NULL) {
8667 a = SeqLocFindNext (slp, NULL);
8668 while (a != NULL) {
8669 b = SeqLocFindNext (sfp->location, NULL);
8670 while (b != NULL) {
8671 if ((overlapType == INTERVAL_OVERLAP
8672 && SeqLocCompare (a, b) != SLC_NO_MATCH)
8673 || (overlapType == COMMON_INTERVAL
8674 && SeqLocCompare (a, b) == SLC_A_EQ_B))
8675 {
8676 diff = ABS (left - feat->left) + ABS (feat->right - right);
8677 return diff;
8678 }
8679 b = SeqLocFindNext (sfp->location, b);
8680 }
8681 a = SeqLocFindNext (slp, a);
8682 }
8683 }
8684 }
8685 }
8686 else if (overlapType == RANGE_MATCH)
8687 {
8688 /* left and right ends must match exactly */
8689 if (feat->right == right && feat->left == left)
8690 {
8691 return 0;
8692 }
8693 }
8694
8695 return -1;
8696 }
8697
SeqMgrBestOverlapSetContext(SMFeatItemPtr best,ObjMgrDataPtr omdp,Pointer userdata,SeqMgrFeatContext PNTR context)8698 static void SeqMgrBestOverlapSetContext (
8699 SMFeatItemPtr best,
8700 ObjMgrDataPtr omdp,
8701 Pointer userdata,
8702 SeqMgrFeatContext PNTR context
8703 )
8704
8705 {
8706 SeqFeatPtr bst;
8707
8708 if (best != NULL && omdp != NULL && context != NULL) {
8709 bst = best->sfp;
8710 if (bst != NULL && bst->idx.entityID > 0) {
8711 context->entityID = bst->idx.entityID;
8712 } else {
8713 context->entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
8714 }
8715 context->itemID = best->itemID;
8716 context->sfp = best->sfp;
8717 context->sap = best->sap;
8718 context->bsp = best->bsp;
8719 context->label = best->label;
8720 context->left = best->left;
8721 context->right = best->right;
8722 context->dnaStop = best->dnaStop;
8723 context->partialL = best->partialL;
8724 context->partialR = best->partialR;
8725 context->farloc = best->farloc;
8726 context->bad_order = best->bad_order;
8727 context->mixed_strand = best->mixed_strand;
8728 context->ts_image = best->ts_image;
8729 context->strand = best->strand;
8730 if (bst != NULL) {
8731 context->seqfeattype = bst->data.choice;
8732 } else {
8733 context->seqfeattype = FindFeatFromFeatDefType (best->subtype);
8734 }
8735 context->featdeftype = best->subtype;
8736 context->numivals = best->numivals;
8737 context->ivals = best->ivals;
8738 context->userdata = userdata;
8739 context->omdp = (Pointer) omdp;
8740 context->index = best->index + 1;
8741 }
8742 }
8743
TransSplicedStrandsMatch(Uint1 locstrand,SeqLocPtr slp,SMFeatItemPtr feat)8744 static Boolean TransSplicedStrandsMatch (Uint1 locstrand, SeqLocPtr slp, SMFeatItemPtr feat)
8745
8746 {
8747 Uint1 featstrand;
8748 SeqLocPtr loc;
8749 SeqFeatPtr sfp;
8750
8751 if (slp == NULL || feat == NULL) return FALSE;
8752 sfp = feat->sfp;
8753 if (sfp == NULL) return FALSE;
8754
8755 if (! sfp->excpt) return FALSE;
8756 if (StringISearch (sfp->except_text, "trans-splicing") == NULL) return FALSE;
8757
8758 loc = SeqLocFindNext (sfp->location, NULL);
8759 while (loc != NULL) {
8760 if (SeqLocAinB (slp, loc) >= 0) {
8761 featstrand = SeqLocStrand (loc);
8762 if (StrandsMatch (featstrand, locstrand)) return TRUE;
8763 }
8764 loc = SeqLocFindNext (sfp->location, loc);
8765 }
8766
8767 return FALSE;
8768 }
8769
SeqMgrGetBestOverlappingFeat(SeqLocPtr slp,Uint2 subtype,SMFeatItemPtr PNTR array,Int4 num,Int4Ptr pos,Int2 overlapType,SeqMgrFeatContext PNTR context,Int2Ptr count,Pointer userdata,SeqMgrFeatExploreProc userfunc,Boolean special)8770 static SeqFeatPtr SeqMgrGetBestOverlappingFeat (
8771 SeqLocPtr slp,
8772 Uint2 subtype,
8773 SMFeatItemPtr PNTR array,
8774 Int4 num,
8775 Int4Ptr pos,
8776 Int2 overlapType,
8777 SeqMgrFeatContext PNTR context,
8778 Int2Ptr count,
8779 Pointer userdata,
8780 SeqMgrFeatExploreProc userfunc,
8781 Boolean special
8782 )
8783
8784 {
8785 SMFeatItemPtr best = NULL;
8786 BioseqPtr bsp;
8787 BioseqExtraPtr bspextra;
8788 Int4 diff;
8789 Uint2 entityID;
8790 SMFeatItemPtr feat;
8791 Int4 from;
8792 Boolean goOn = TRUE;
8793 Int4 hier = -1;
8794 Int2 i;
8795 Uint4 index = 0;
8796 Int4Ptr ivals = NULL;
8797 Int4 L;
8798 Int4 left;
8799 SeqLocPtr loc;
8800 Int4 max = INT4_MAX;
8801 Boolean may_be_trans_spliced;
8802 Int4 mid;
8803 Int2 numivals = 0;
8804 SeqEntryPtr oldscope;
8805 ObjMgrDataPtr omdp;
8806 SMFeatItemPtr prev;
8807 Int4 R;
8808 Int4 right;
8809 SeqEntryPtr sep;
8810 Uint1 strand;
8811 Int4 swap;
8812 SeqLocPtr tmp;
8813 Int4 to;
8814
8815 if (context != NULL) {
8816 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8817 }
8818 if (pos != NULL) {
8819 *pos = 0;
8820 }
8821 if (count != NULL) {
8822 *count = 0;
8823 }
8824 if (slp == NULL) return NULL;
8825
8826 bsp = FindAppropriateBioseq (slp, NULL, NULL);
8827 if (bsp == NULL) {
8828 bsp = FindFirstLocalBioseq (slp);
8829 }
8830 if (bsp == NULL) return NULL;
8831 omdp = SeqMgrGetOmdpForBioseq (bsp);
8832 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8833
8834 bspextra = (BioseqExtraPtr) omdp->extradata;
8835 if (bspextra == NULL) return NULL;
8836
8837 switch (subtype) {
8838 case FEATDEF_GENE :
8839 array = bspextra->genesByPos;
8840 num = bspextra->numgenes;
8841 break;
8842 case FEATDEF_CDS :
8843 array = bspextra->CDSsByPos;
8844 num = bspextra->numCDSs;
8845 break;
8846 case FEATDEF_mRNA :
8847 array = bspextra->mRNAsByPos;
8848 num = bspextra->nummRNAs;
8849 break;
8850 case FEATDEF_PUB :
8851 array = bspextra->pubsByPos;
8852 num = bspextra->numpubs;
8853 break;
8854 case FEATDEF_BIOSRC :
8855 array = bspextra->orgsByPos;
8856 num = bspextra->numorgs;
8857 break;
8858 case FEATDEF_operon :
8859 array = bspextra->operonsByPos;
8860 num = bspextra->numoperons;
8861 default :
8862 break;
8863 }
8864
8865 if (array == NULL || num < 1) return NULL;
8866
8867 entityID = bsp->idx.entityID;
8868 if (entityID < 1) {
8869 entityID = ObjMgrGetEntityIDForPointer (bsp);
8870 }
8871 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
8872 oldscope = SeqEntrySetScope (sep);
8873
8874 left = GetOffsetInNearBioseq (slp, bsp, SEQLOC_LEFT_END);
8875 right = GetOffsetInNearBioseq (slp, bsp, SEQLOC_RIGHT_END);
8876
8877 SeqEntrySetScope (oldscope);
8878
8879 if (left == -1 || right == -1) return NULL;
8880
8881 /* if feature spans origin, normalize with left < 0 */
8882
8883 if (left > right && bsp->topology == TOPOLOGY_CIRCULAR) {
8884 left -= bsp->length;
8885 }
8886
8887 /* some trans-spliced locations can confound GetOffsetInNearBioseq, so normalize here */
8888
8889 if (left > right) {
8890 swap = left;
8891 left = right;
8892 right = swap;
8893 }
8894
8895 /* binary search to leftmost candidate within the xxxByPos array */
8896
8897 L = 0;
8898 R = num - 1;
8899 while (L < R) {
8900 mid = (L + R) / 2;
8901 feat = array [mid];
8902 if (feat != NULL && feat->right < left) {
8903 L = mid + 1;
8904 } else {
8905 R = mid;
8906 }
8907 }
8908
8909 feat = array [R];
8910
8911 if (feat != NULL && feat->left > left && R > 0) {
8912
8913 /* if hit is already past location, location was in between local hits */
8914
8915 prev = array [R - 1];
8916 if (prev != NULL && prev->overlap != -1) {
8917
8918 /* backup R by one to check appropriate overlap hierarchy */
8919
8920 R--;
8921 feat = array [R];
8922 }
8923 }
8924
8925 if (feat != NULL) {
8926 hier = feat->overlap;
8927 }
8928
8929 loc = SeqLocMergeExEx (bsp, slp, NULL, FALSE, /* TRUE */ FALSE, FALSE, FALSE, TRUE, TRUE, FALSE);
8930 strand = SeqLocStrand (loc);
8931 if (overlapType == CHECK_INTERVALS) {
8932 tmp = NULL;
8933 while ((tmp = SeqLocFindNext (loc, tmp)) != NULL) {
8934 numivals++;
8935 }
8936 if (numivals > 0) {
8937 ivals = MemNew (sizeof (Int4) * (numivals * 2));
8938 if (ivals != NULL) {
8939 tmp = NULL;
8940 i = 0;
8941 while ((tmp = SeqLocFindNext (loc, tmp)) != NULL) {
8942 from = SeqLocStart (tmp);
8943 to = SeqLocStop (tmp);
8944 if (strand == Seq_strand_minus) {
8945 swap = from;
8946 from = to;
8947 to = swap;
8948 }
8949 ivals [i] = from;
8950 i++;
8951 ivals [i] = to;
8952 i++;
8953 }
8954 }
8955 }
8956 }
8957 SeqLocFree (loc);
8958
8959 /* linear scan to smallest covering gene, publication, biosource, etc. */
8960
8961 while (R < num && feat != NULL && feat->left <= right) {
8962
8963 if ((! feat->ignore) || userfunc == NULL) {
8964
8965 /* requires feature to be contained within gene, etc. */
8966
8967 may_be_trans_spliced = (Boolean) (special && (feat->bad_order || feat->mixed_strand));
8968 if (may_be_trans_spliced) {
8969 diff = TestForOverlap (feat, slp, left, right, LOCATION_SUBSET, numivals, ivals);
8970 } else {
8971 diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
8972 }
8973 if (diff >= 0) {
8974
8975 if (StrandsMatch (feat->strand, strand) || (may_be_trans_spliced && TransSplicedStrandsMatch (strand, slp, feat))) {
8976
8977 if (userfunc != NULL && context != NULL && goOn) {
8978 SeqMgrBestOverlapSetContext (feat, omdp, userdata, context);
8979 if (! userfunc (feat->sfp, context)) {
8980 goOn = FALSE;
8981 }
8982 if (count != NULL) {
8983 (*count)++;
8984 }
8985 }
8986
8987 /* diff = (left - feat->left) + (feat->right - right); */
8988 /* Don't need to check ties because in this loop we always hit the leftmost first */
8989 if ( diff < max )
8990 {
8991 best = feat;
8992 index = R;
8993 max = diff;
8994 }
8995 }
8996 }
8997 }
8998 R++;
8999 feat = array [R];
9000 }
9001
9002 /* also will go up gene overlap hierarchy pointers from original R hit */
9003
9004 while (hier != -1) {
9005
9006 feat = array [hier];
9007 if (feat != NULL && ((! feat->ignore) || userfunc == NULL)) {
9008
9009 may_be_trans_spliced = (Boolean) (special && (feat->bad_order || feat->mixed_strand));
9010 if (may_be_trans_spliced) {
9011 diff = TestForOverlap (feat, slp, left, right, LOCATION_SUBSET, numivals, ivals);
9012 } else {
9013 diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
9014 }
9015 if (diff >= 0) {
9016
9017 if (StrandsMatch (feat->strand, strand) || (may_be_trans_spliced && TransSplicedStrandsMatch (strand, slp, feat))) {
9018
9019 if (userfunc != NULL && context != NULL && goOn) {
9020 SeqMgrBestOverlapSetContext (feat, omdp, userdata, context);
9021 if (! userfunc (feat->sfp, context)) {
9022 goOn = FALSE;
9023 }
9024 if (count != NULL) {
9025 (*count)++;
9026 }
9027 }
9028
9029 /* diff = (left - feat->left) + (feat->right - right); */
9030 /* For ties, first wins */
9031 if (diff < max || ( diff == max && hier < index )) {
9032 best = feat;
9033 index = hier;
9034 max = diff;
9035 }
9036 }
9037 }
9038 hier = feat->overlap;
9039 } else {
9040 hier = -1;
9041 }
9042 }
9043
9044 if (ivals != NULL) {
9045 ivals = MemFree (ivals);
9046 }
9047
9048 if (best != NULL) {
9049 if (pos != NULL) {
9050 *pos = index + 1;
9051 }
9052 if (context != NULL) {
9053 SeqMgrBestOverlapSetContext (best, omdp, userdata, context);
9054 }
9055 return best->sfp;
9056 }
9057
9058 return NULL;
9059 }
9060
TestFeatOverlap(SeqFeatPtr sfpA,SeqFeatPtr sfpB,Int2 overlapType)9061 NLM_EXTERN Int4 TestFeatOverlap (SeqFeatPtr sfpA, SeqFeatPtr sfpB, Int2 overlapType)
9062
9063 {
9064 Int4 diff;
9065 SMFeatItemPtr sfipA, sfipB;
9066
9067 if (sfpA == NULL || sfpB == NULL) return -1;
9068 sfipA = SeqMgrFindSMFeatItemPtr (sfpA);
9069 sfipB = SeqMgrFindSMFeatItemPtr (sfpB);
9070 if (sfipA == NULL || sfipB == NULL) return -1;
9071
9072 diff = TestForOverlap (sfipB, sfpA->location, sfipA->left, sfipA->right,
9073 overlapType, sfipA->numivals, sfipA->ivals);
9074 if (diff < 0) return -1;
9075
9076 if (StrandsMatch (sfipB->strand, sfipA->strand)) {
9077 return diff;
9078 }
9079
9080 return -1;
9081 }
9082
SeqMgrGetOverlappingGene(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9083 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingGene (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9084
9085 {
9086 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_GENE, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, TRUE);
9087 }
9088
SeqMgrGetOverlappingmRNA(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9089 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9090
9091 {
9092 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9093 }
9094
SeqMgrGetLocationSupersetmRNA(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9095 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetLocationSupersetmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9096
9097 {
9098 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, context, NULL, NULL, NULL, FALSE);
9099 }
9100
SeqMgrGetOverlappingCDS(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9101 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingCDS (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9102
9103 {
9104 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_CDS, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9105 }
9106
SeqMgrGetOverlappingPub(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9107 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingPub (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9108
9109 {
9110 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_PUB, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9111 }
9112
SeqMgrGetOverlappingSource(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9113 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingSource (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9114
9115 {
9116 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9117 }
9118
SeqMgrGetOverlappingOperon(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9119 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9120
9121 {
9122 return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_operon, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9123 }
9124
9125 /*****************************************************************************
9126 *
9127 * SeqMgrGetFeatureByLabel returns a feature with the desired label
9128 * If desired, place a SeqMgrFeatContext data structure on the stack, and pass
9129 * in &context as the last parameter
9130 *
9131 *****************************************************************************/
9132
GetLabelOrLocusTag(SMFeatItemPtr feat,Boolean byLocusTag)9133 static CharPtr GetLabelOrLocusTag (SMFeatItemPtr feat, Boolean byLocusTag)
9134
9135 {
9136 GeneRefPtr grp;
9137 SeqFeatPtr sfp;
9138
9139 if (feat == NULL) return NULL;
9140 if (byLocusTag) {
9141 sfp = feat->sfp;
9142 if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return NULL;
9143 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
9144 if (grp == NULL) return NULL;
9145 return grp->locus_tag;
9146 }
9147 return feat->label;
9148 }
9149
SeqMgrGetFeatureByLabelEx(BioseqPtr bsp,CharPtr label,Uint1 seqFeatChoice,Uint1 featDefChoice,Boolean byLocusTag,SeqMgrFeatContext PNTR context)9150 static SeqFeatPtr LIBCALL SeqMgrGetFeatureByLabelEx (BioseqPtr bsp, CharPtr label,
9151 Uint1 seqFeatChoice, Uint1 featDefChoice,
9152 Boolean byLocusTag, SeqMgrFeatContext PNTR context)
9153
9154 {
9155 SMFeatItemPtr PNTR array;
9156 BioseqExtraPtr bspextra;
9157 Uint2 entityID;
9158 SMFeatItemPtr feat;
9159 Int4 L;
9160 Int4 mid;
9161 Int4 num;
9162 ObjMgrDataPtr omdp;
9163 Int4 R;
9164 Uint1 seqfeattype;
9165 SeqFeatPtr sfp;
9166
9167 if (context != NULL) {
9168 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
9169 }
9170
9171 if (bsp == NULL || StringHasNoText (label)) return NULL;
9172
9173 omdp = SeqMgrGetOmdpForBioseq (bsp);
9174 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9175
9176 bspextra = (BioseqExtraPtr) omdp->extradata;
9177 if (bspextra == NULL) return NULL;
9178 if (byLocusTag) {
9179 array = bspextra->genesByLocusTag;
9180 num = bspextra->numgenes;
9181 } else {
9182 array = bspextra->featsByLabel;
9183 num = bspextra->numfeats;
9184 }
9185 if (array == NULL || num < 1) return NULL;
9186
9187 entityID = bsp->idx.entityID;
9188 if (entityID < 1) {
9189 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9190 }
9191
9192 /* binary search to leftmost candidate within the featsByLabel array */
9193
9194 L = 0;
9195 R = num - 1;
9196 while (L < R) {
9197 mid = (L + R) / 2;
9198 feat = array [mid];
9199 if (feat != NULL && StringICmp (GetLabelOrLocusTag (feat, byLocusTag), label) < 0) {
9200 L = mid + 1;
9201 } else {
9202 R = mid;
9203 }
9204 }
9205
9206 feat = array [R];
9207
9208 /* linear scan to find desired label on desired feature type */
9209
9210 while (R < num && feat != NULL && StringICmp (GetLabelOrLocusTag (feat, byLocusTag), label) == 0) {
9211 sfp = feat->sfp;
9212 if (sfp != NULL) {
9213 seqfeattype = sfp->data.choice;
9214 if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9215 (featDefChoice == 0 || feat->subtype == featDefChoice) &&
9216 (! feat->ignore)) {
9217 if (context != NULL) {
9218 context->entityID = entityID;
9219 context->itemID = feat->itemID;
9220 context->sfp = sfp;
9221 context->sap = feat->sap;
9222 context->bsp = feat->bsp;
9223 context->label = GetLabelOrLocusTag (feat, byLocusTag);
9224 context->left = feat->left;
9225 context->right = feat->right;
9226 context->dnaStop = feat->dnaStop;
9227 context->partialL = feat->partialL;
9228 context->partialR = feat->partialR;
9229 context->farloc = feat->farloc;
9230 context->bad_order = feat->bad_order;
9231 context->mixed_strand = feat->mixed_strand;
9232 context->ts_image = feat->ts_image;
9233 context->strand = feat->strand;
9234 context->seqfeattype = seqfeattype;
9235 context->featdeftype = feat->subtype;
9236 context->numivals = feat->numivals;
9237 context->ivals = feat->ivals;
9238 context->userdata = NULL;
9239 context->omdp = (Pointer) omdp;
9240 context->index = R + 1;
9241 }
9242 return sfp;
9243 }
9244 }
9245
9246 R++;
9247 feat = array [R];
9248 }
9249
9250 return NULL;
9251 }
9252
SeqMgrGetFeatureByLabel(BioseqPtr bsp,CharPtr label,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context)9253 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureByLabel (BioseqPtr bsp, CharPtr label,
9254 Uint1 seqFeatChoice, Uint1 featDefChoice,
9255 SeqMgrFeatContext PNTR context)
9256
9257 {
9258 return SeqMgrGetFeatureByLabelEx (bsp, label, seqFeatChoice, featDefChoice, FALSE, context);
9259 }
9260
SeqMgrGetGeneByLocusTag(BioseqPtr bsp,CharPtr locusTag,SeqMgrFeatContext PNTR context)9261 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetGeneByLocusTag (BioseqPtr bsp, CharPtr locusTag,
9262 SeqMgrFeatContext PNTR context)
9263
9264 {
9265 return SeqMgrGetFeatureByLabelEx (bsp, locusTag, SEQFEAT_GENE, 0, TRUE, context);
9266 }
9267
SeqMgrGetFeatureByFeatID(Uint2 entityID,BioseqPtr bsp,CharPtr featid,SeqFeatXrefPtr xref,SeqMgrFeatContext PNTR context)9268 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureByFeatID (
9269 Uint2 entityID,
9270 BioseqPtr bsp,
9271 CharPtr featid,
9272 SeqFeatXrefPtr xref,
9273 SeqMgrFeatContext PNTR context
9274 )
9275
9276 {
9277 SMFidItemPtr PNTR array;
9278 BioseqExtraPtr bspextra;
9279 Char buf [32];
9280 SMFeatItemPtr feat;
9281 SMFidItemPtr item;
9282 Int4 L;
9283 Int4 mid;
9284 Int4 num;
9285 ObjectIdPtr oip;
9286 ObjMgrDataPtr omdp;
9287 Int4 R;
9288 SeqFeatPtr sfp;
9289
9290 if (context != NULL) {
9291 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
9292 }
9293
9294 if (entityID > 0) {
9295 omdp = ObjMgrGetData (entityID);
9296 if (omdp == NULL) return NULL;
9297 } else {
9298 if (bsp == NULL) return NULL;
9299 omdp = SeqMgrGetOmdpForBioseq (bsp);
9300 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9301 }
9302 bspextra = (BioseqExtraPtr) omdp->extradata;
9303 if (bspextra == NULL) return NULL;
9304
9305 /* first try array sorted by itemID value */
9306
9307 array = bspextra->featsByFeatID;
9308 num = bspextra->numfids;
9309 if (array == NULL || num < 1) return NULL;
9310
9311 if (StringHasNoText (featid) && xref != NULL && xref->id.choice == 3) {
9312 oip = (ObjectIdPtr) xref->id.value.ptrvalue;
9313 if (oip != NULL) {
9314 if (StringDoesHaveText (oip->str)) {
9315 featid = oip->str;
9316 } else {
9317 sprintf (buf, "%ld", (long) oip->id);
9318 featid = buf;
9319 }
9320 }
9321 }
9322 if (StringHasNoText (featid)) return NULL;
9323
9324 L = 0;
9325 R = num - 1;
9326 while (L < R) {
9327 mid = (L + R) / 2;
9328 item = array [mid];
9329 if (item != NULL && StringICmp (item->fid, featid) < 0) {
9330 L = mid + 1;
9331 } else {
9332 R = mid;
9333 }
9334 }
9335
9336 item = array [R];
9337 if (StringICmp (item->fid, featid) == 0) {
9338 feat = item->feat;
9339 if (feat == NULL) return NULL;
9340 sfp = feat->sfp;
9341 if (sfp != NULL) {
9342 if (! feat->ignore) {
9343 if (context != NULL) {
9344 context->entityID = entityID;
9345 context->itemID = feat->itemID;
9346 context->sfp = sfp;
9347 context->sap = feat->sap;
9348 context->bsp = feat->bsp;
9349 context->label = feat->label;
9350 context->left = feat->left;
9351 context->right = feat->right;
9352 context->dnaStop = feat->dnaStop;
9353 context->partialL = feat->partialL;
9354 context->partialR = feat->partialR;
9355 context->farloc = feat->farloc;
9356 context->bad_order = feat->bad_order;
9357 context->mixed_strand = feat->mixed_strand;
9358 context->ts_image = feat->ts_image;
9359 context->strand = feat->strand;
9360 context->seqfeattype = sfp->data.choice;;
9361 context->featdeftype = feat->subtype;
9362 context->numivals = feat->numivals;
9363 context->ivals = feat->ivals;
9364 context->userdata = NULL;
9365 context->omdp = (Pointer) omdp;
9366 context->index = R + 1;
9367 }
9368 return sfp;
9369 }
9370 }
9371 }
9372
9373 return NULL;
9374 }
9375
9376 /*****************************************************************************
9377 *
9378 * SeqMgrBuildFeatureIndex builds a sorted array index for any feature type
9379 * (including gene, mRNA, CDS, publication, and biosource built-in arrays)
9380 * SeqMgrGetOverlappingFeature uses the array, or a feature subtype (chocies
9381 * are FEATDEF_GENE, FEATDEF_CDS, FEATDEF_mRNA, FEATDEF_PUB, or FEATDEF_BIOSRC)
9382 * to find feature overlap, requiring either that the location be completely
9383 * contained within the feature intervals, contained within the feature extreme
9384 * range, or merely that it be overlapped by the feature, and returns the position
9385 * in the index
9386 * SeqMgrGetFeatureInIndex gets an arbitrary feature indexed by the array
9387 *
9388 *****************************************************************************/
9389
SeqMgrBuildFeatureIndex(BioseqPtr bsp,Int4Ptr num,Uint1 seqFeatChoice,Uint1 featDefChoice)9390 NLM_EXTERN VoidPtr LIBCALL SeqMgrBuildFeatureIndex (BioseqPtr bsp, Int4Ptr num,
9391 Uint1 seqFeatChoice, Uint1 featDefChoice)
9392
9393 {
9394 SMFeatItemPtr PNTR array;
9395 BioseqExtraPtr bspextra;
9396 SMFeatItemPtr PNTR featsByPos;
9397 Int4 i;
9398 Int4 j;
9399 Int4 k;
9400 SMFeatItemPtr item;
9401 Int4 numfeats;
9402 Int4 numitems;
9403 SMFeatItemPtr nxtitem;
9404 ObjMgrDataPtr omdp;
9405 Boolean overlaps;
9406 Uint1 seqfeattype;
9407
9408 if (num != NULL) {
9409 *num = 0;
9410 }
9411 if (bsp == NULL) return NULL;
9412 omdp = SeqMgrGetOmdpForBioseq (bsp);
9413 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9414
9415 bspextra = (BioseqExtraPtr) omdp->extradata;
9416 if (bspextra == NULL) return NULL;
9417
9418 featsByPos = bspextra->featsByPos;
9419 numfeats = bspextra->numfeats;
9420 if (featsByPos == NULL || numfeats < 1) return NULL;
9421
9422 for (i = 0, numitems = 0; i < numfeats; i++) {
9423 item = featsByPos [i];
9424 seqfeattype = FindFeatFromFeatDefType (item->subtype);
9425 if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9426 (featDefChoice == 0 || item->subtype == featDefChoice)) {
9427 numitems++;
9428 }
9429 }
9430 if (numitems < 1) return NULL;
9431
9432 array = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numitems + 1));
9433 if (array == NULL) return NULL;
9434
9435 i = 0;
9436 j = 0;
9437 while (i < numfeats && j < numitems) {
9438 item = featsByPos [i];
9439 seqfeattype = FindFeatFromFeatDefType (item->subtype);
9440 if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9441 (featDefChoice == 0 || item->subtype == featDefChoice)) {
9442 array [j] = item;
9443 j++;
9444 }
9445 i++;
9446 }
9447
9448 if (num != NULL) {
9449 *num = numitems;
9450 }
9451
9452 for (j = 0; j < numitems - 1; j++) {
9453 item = array [j];
9454 for (k = j + 1, overlaps = TRUE; k < numitems && overlaps; k++) {
9455 nxtitem = array [k];
9456 if ((item->left <= nxtitem->left && item->right > nxtitem->left) ||
9457 (item->left < nxtitem->right && item->right >= nxtitem->right)) {
9458
9459 /* after binary search, also go up the hierarchy chain to avoid traps */
9460
9461 nxtitem->overlap = j;
9462 } else {
9463 overlaps = FALSE;
9464 }
9465 }
9466 }
9467
9468 return (VoidPtr) array;
9469 }
9470
SeqMgrGetOverlappingFeature(SeqLocPtr slp,Uint2 subtype,VoidPtr featarray,Int4 numfeats,Int4Ptr position,Int2 overlapType,SeqMgrFeatContext PNTR context)9471 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (SeqLocPtr slp, Uint2 subtype,
9472 VoidPtr featarray, Int4 numfeats,
9473 Int4Ptr position, Int2 overlapType,
9474 SeqMgrFeatContext PNTR context)
9475
9476 {
9477 return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
9478 numfeats, position, overlapType, context, NULL, NULL, NULL, FALSE);
9479 }
9480
SeqMgrGetOverlappingFeatureEx(SeqLocPtr slp,Uint2 subtype,VoidPtr featarray,Int4 numfeats,Int4Ptr position,Int2 overlapType,SeqMgrFeatContext PNTR context,Boolean special)9481 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeatureEx (SeqLocPtr slp, Uint2 subtype,
9482 VoidPtr featarray, Int4 numfeats,
9483 Int4Ptr position, Int2 overlapType,
9484 SeqMgrFeatContext PNTR context,
9485 Boolean special)
9486
9487 {
9488 return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
9489 numfeats, position, overlapType, context, NULL, NULL, NULL, special);
9490 }
9491
SeqMgrGetAllOverlappingFeatures(SeqLocPtr slp,Uint2 subtype,VoidPtr featarray,Int4 numfeats,Int2 overlapType,Pointer userdata,SeqMgrFeatExploreProc userfunc)9492 NLM_EXTERN Int2 LIBCALL SeqMgrGetAllOverlappingFeatures (SeqLocPtr slp, Uint2 subtype,
9493 VoidPtr featarray,
9494 Int4 numfeats,
9495 Int2 overlapType,
9496 Pointer userdata,
9497 SeqMgrFeatExploreProc userfunc)
9498
9499 {
9500 SeqMgrFeatContext context;
9501 Int2 count;
9502
9503 SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
9504 numfeats, NULL, overlapType, &context, &count,
9505 userdata, userfunc, FALSE);
9506
9507 return count;
9508 }
9509
SeqMgrGetFeatureInIndex(BioseqPtr bsp,VoidPtr featarray,Int4 numfeats,Uint4 index,SeqMgrFeatContext PNTR context)9510 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex (BioseqPtr bsp, VoidPtr featarray,
9511 Int4 numfeats, Uint4 index,
9512 SeqMgrFeatContext PNTR context)
9513
9514 {
9515 SMFeatItemPtr PNTR array;
9516 SeqFeatPtr curr;
9517 Uint2 entityID;
9518 SMFeatItemPtr item = NULL;
9519 ObjMgrDataPtr omdp;
9520
9521 if (context != NULL) {
9522 MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
9523 }
9524 if (bsp == NULL || featarray == NULL || numfeats < 1) return NULL;
9525 if (index < 1 || index > (Uint4) numfeats) return NULL;
9526 omdp = SeqMgrGetOmdpForBioseq (bsp);
9527 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9528
9529 array = (SMFeatItemPtr PNTR) featarray;
9530 item = array [index - 1];
9531 if (item == NULL) return NULL;
9532
9533 entityID = bsp->idx.entityID;
9534 if (entityID < 1) {
9535 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9536 }
9537
9538 curr = item->sfp;
9539 if (curr != NULL && context != NULL && (! item->ignore)) {
9540 context->entityID = entityID;
9541 context->itemID = item->itemID;
9542 context->sfp = curr;
9543 context->sap = item->sap;
9544 context->bsp = item->bsp;
9545 context->label = item->label;
9546 context->left = item->left;
9547 context->right = item->right;
9548 context->dnaStop = item->dnaStop;
9549 context->partialL = item->partialL;
9550 context->partialR = item->partialR;
9551 context->external = item->external;
9552 context->farloc = item->farloc;
9553 context->bad_order = item->bad_order;
9554 context->mixed_strand = item->mixed_strand;
9555 context->ts_image = item->ts_image;
9556 context->strand = item->strand;
9557 if (curr != NULL) {
9558 context->seqfeattype = curr->data.choice;
9559 } else {
9560 context->seqfeattype = FindFeatFromFeatDefType (item->subtype);
9561 }
9562 context->featdeftype = item->subtype;
9563 context->numivals = item->numivals;
9564 context->ivals = item->ivals;
9565 context->userdata = NULL;
9566 context->omdp = (Pointer) omdp;
9567 context->index = item->index + 1;
9568 }
9569 return curr;
9570 }
9571
9572 /*****************************************************************************
9573 *
9574 * SeqMgrGetNextDescriptor and SeqMgrGetNextFeature
9575 *
9576 *****************************************************************************/
9577
SeqMgrGetNextDescriptor(BioseqPtr bsp,ValNodePtr curr,Uint1 seqDescChoice,SeqMgrDescContext PNTR context)9578 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetNextDescriptor (BioseqPtr bsp, ValNodePtr curr,
9579 Uint1 seqDescChoice,
9580 SeqMgrDescContext PNTR context)
9581
9582 {
9583 BioseqSetPtr bssp;
9584 Uint2 entityID;
9585 ObjMgrDataPtr omdp;
9586 SeqEntryPtr sep;
9587 ValNode vn;
9588
9589 if (context == NULL) return NULL;
9590
9591 /* if curr is NULL, initialize context fields (in user's stack) */
9592
9593 if (curr == NULL) {
9594 if (bsp == NULL) return NULL;
9595 omdp = SeqMgrGetOmdpForBioseq (bsp);
9596 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9597
9598 context->omdp = (Pointer) omdp;
9599 context->itemID = omdp->lastDescrItemID;
9600 context->index = 0;
9601 context->level = 0;
9602
9603 /* start curr just before beginning of bioseq descriptor list */
9604
9605 curr = &vn;
9606 vn.choice = 0;
9607 vn.data.ptrvalue = 0;
9608 vn.next = bsp->descr;
9609 }
9610
9611 omdp = (ObjMgrDataPtr) context->omdp;
9612 if (omdp == NULL) return NULL;
9613
9614 if (bsp != NULL && bsp->idx.entityID > 0) {
9615 entityID = bsp->idx.entityID;
9616 } else {
9617 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9618 }
9619
9620 if (bsp != NULL && bsp->seqentry != NULL) {
9621 sep = bsp->seqentry;
9622 } else {
9623 sep = ObjMgrGetChoiceForData (omdp->dataptr);
9624 }
9625
9626 /* now look for next appropriate descriptor after curr in current chain */
9627
9628 while (curr != NULL) {
9629 curr = curr->next;
9630 if (curr != NULL) {
9631 (context->itemID)++;
9632 (context->index)++;
9633 if (seqDescChoice == 0 || curr->choice == seqDescChoice) {
9634 context->entityID = entityID;
9635 context->sdp = curr;
9636 context->sep = sep;
9637 context->seqdesctype = curr->choice;
9638 context->userdata = NULL;
9639 context->omdp = (Pointer) omdp;
9640 return curr;
9641 }
9642 }
9643 }
9644
9645 /* now go up omdp chain looking for next descriptor */
9646
9647 while (curr == NULL) {
9648 omdp = SeqMgrGetOmdpForPointer (omdp->parentptr);
9649 if (omdp == NULL) return NULL;
9650
9651 /* update current omdp in context */
9652
9653 context->omdp = (Pointer) omdp;
9654 context->itemID = omdp->lastDescrItemID;
9655
9656 switch (omdp->datatype) {
9657 case OBJ_BIOSEQ :
9658 bsp = (BioseqPtr) omdp->dataptr;
9659 curr = bsp->descr;
9660 break;
9661 case OBJ_BIOSEQSET :
9662 bssp = (BioseqSetPtr) omdp->dataptr;
9663 curr = bssp->descr;
9664 break;
9665 default :
9666 break;
9667 }
9668
9669 if (omdp->datatype == OBJ_BIOSEQ && bsp != NULL && bsp->seqentry != NULL) {
9670 sep = bsp->seqentry;
9671 } else if (omdp->datatype == OBJ_BIOSEQSET && bssp != NULL && bssp->seqentry != NULL) {
9672 sep = bssp->seqentry;
9673 } else {
9674 sep = ObjMgrGetChoiceForData (omdp->dataptr);
9675 }
9676
9677 (context->level)++;
9678
9679 /* now look for first appropriate descriptor in current chain */
9680
9681 while (curr != NULL) {
9682 (context->itemID)++;
9683 (context->index)++;
9684 if (seqDescChoice == 0 || curr->choice == seqDescChoice) {
9685 context->entityID = entityID;
9686 context->sdp = curr;
9687 context->sep = sep;
9688 context->seqdesctype = curr->choice;
9689 context->userdata = NULL;
9690 context->omdp = (Pointer) omdp;
9691 return curr;
9692 }
9693 curr = curr->next;
9694 }
9695 }
9696
9697 return curr;
9698 }
9699
SeqMgrGetNextFeatureEx(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context,Boolean byLabel,Boolean byLocusTag)9700 static SeqFeatPtr LIBCALL SeqMgrGetNextFeatureEx (BioseqPtr bsp, SeqFeatPtr curr,
9701 Uint1 seqFeatChoice, Uint1 featDefChoice,
9702 SeqMgrFeatContext PNTR context,
9703 Boolean byLabel, Boolean byLocusTag)
9704
9705 {
9706 SMFeatItemPtr PNTR array = NULL;
9707 BioseqExtraPtr bspextra;
9708 Uint2 entityID;
9709 Uint4 i;
9710 SMFeatItemPtr item;
9711 Int4 num = 0;
9712 ObjMgrDataPtr omdp;
9713 Uint1 seqfeattype;
9714
9715 if (context == NULL) return NULL;
9716
9717 /* if curr is NULL, initialize context fields (in user's stack) */
9718
9719
9720 if (curr == NULL) {
9721 if (bsp == NULL) return NULL;
9722 omdp = SeqMgrGetOmdpForBioseq (bsp);
9723 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9724
9725 context->omdp = (Pointer) omdp;
9726 context->index = 0;
9727 }
9728
9729 omdp = (ObjMgrDataPtr) context->omdp;
9730 if (omdp == NULL) return NULL;
9731 bspextra = (BioseqExtraPtr) omdp->extradata;
9732 if (bspextra == NULL) return NULL;
9733 if (byLocusTag) {
9734 array = bspextra->genesByLocusTag;
9735 num = bspextra->numgenes;
9736 } else if (byLabel) {
9737 array = bspextra->featsByLabel;
9738 num = bspextra->numfeats;
9739 } else {
9740 array = bspextra->featsByPos;
9741 num = bspextra->numfeats;
9742 }
9743 if (array == NULL || num < 1) return NULL;
9744
9745 if (bsp != NULL && bsp->idx.entityID > 0) {
9746 entityID = bsp->idx.entityID;
9747 } else {
9748 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9749 }
9750
9751 i = context->index;
9752
9753 /* now look for next appropriate feature */
9754
9755 while (i < (Uint4) num) {
9756 item = array [i];
9757 if (item != NULL) {
9758 curr = item->sfp;
9759 i++;
9760 if (curr != NULL) {
9761 seqfeattype = curr->data.choice;
9762 if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9763 (featDefChoice == 0 || item->subtype == featDefChoice) &&
9764 (! item->ignore)) {
9765 context->entityID = entityID;
9766 context->itemID = item->itemID;
9767 context->sfp = curr;
9768 context->sap = item->sap;
9769 context->bsp = item->bsp;
9770 context->label = item->label;
9771 context->left = item->left;
9772 context->right = item->right;
9773 context->dnaStop = item->dnaStop;
9774 context->partialL = item->partialL;
9775 context->partialR = item->partialR;
9776 context->external = item->external;
9777 context->farloc = item->farloc;
9778 context->bad_order = item->bad_order;
9779 context->mixed_strand = item->mixed_strand;
9780 context->ts_image = item->ts_image;
9781 context->strand = item->strand;
9782 context->seqfeattype = seqfeattype;
9783 context->featdeftype = item->subtype;
9784 context->numivals = item->numivals;
9785 context->ivals = item->ivals;
9786 context->userdata = NULL;
9787 context->omdp = (Pointer) omdp;
9788 if (byLocusTag) {
9789 context->index = i;
9790 } else if (byLabel) {
9791 context->index = i;
9792 } else {
9793 context->index = item->index + 1;
9794 }
9795 return curr;
9796 }
9797 }
9798 }
9799 }
9800
9801 return NULL;
9802 }
9803
SeqMgrGetNextFeature(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context)9804 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextFeature (BioseqPtr bsp, SeqFeatPtr curr,
9805 Uint1 seqFeatChoice, Uint1 featDefChoice,
9806 SeqMgrFeatContext PNTR context)
9807
9808 {
9809 return SeqMgrGetNextFeatureEx (bsp, curr, seqFeatChoice, featDefChoice, context, FALSE, FALSE);
9810 }
9811
SeqMgrGetNextFeatureByLabel(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context)9812 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextFeatureByLabel (BioseqPtr bsp, SeqFeatPtr curr,
9813 Uint1 seqFeatChoice, Uint1 featDefChoice,
9814 SeqMgrFeatContext PNTR context)
9815
9816 {
9817 return SeqMgrGetNextFeatureEx (bsp, curr, seqFeatChoice, featDefChoice, context, TRUE, FALSE);
9818 }
9819
SeqMgrGetNextGeneByLocusTag(BioseqPtr bsp,SeqFeatPtr curr,SeqMgrFeatContext PNTR context)9820 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextGeneByLocusTag (BioseqPtr bsp, SeqFeatPtr curr,
9821 SeqMgrFeatContext PNTR context
9822 )
9823
9824 {
9825 return SeqMgrGetNextFeatureEx (bsp, curr, SEQFEAT_GENE, 0, context, FALSE, TRUE);
9826 }
9827
SeqMgrGetNextAnnotDesc(BioseqPtr bsp,AnnotDescPtr curr,Uint1 annotDescChoice,SeqMgrAndContext PNTR context)9828 NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrGetNextAnnotDesc (
9829 BioseqPtr bsp,
9830 AnnotDescPtr curr,
9831 Uint1 annotDescChoice,
9832 SeqMgrAndContext PNTR context
9833 )
9834
9835 {
9836 Uint1 annotdesctype;
9837 AnnotDescPtr PNTR array = NULL;
9838 BioseqExtraPtr bspextra;
9839 Uint2 entityID;
9840 Uint4 i;
9841 AnnotDescPtr item;
9842 Int4 num = 0;
9843 ObjMgrDataPtr omdp;
9844 ObjValNodePtr ovp;
9845
9846 if (context == NULL) return NULL;
9847
9848 /* if curr is NULL, initialize context fields (in user's stack) */
9849
9850
9851 if (curr == NULL) {
9852 if (bsp == NULL) return NULL;
9853 /*
9854 entityID = ObjMgrGetEntityIDForPointer (bsp);
9855 if (entityID < 1) return NULL;
9856 omdp = ObjMgrGetData (entityID);
9857 if (omdp == NULL) return NULL;
9858 */
9859 omdp = SeqMgrGetOmdpForBioseq (bsp);
9860 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9861
9862 context->omdp = (Pointer) omdp;
9863 context->index = 0;
9864 }
9865
9866 omdp = (ObjMgrDataPtr) context->omdp;
9867 if (omdp == NULL) return NULL;
9868
9869 bspextra = (BioseqExtraPtr) omdp->extradata;
9870 if (bspextra == NULL) return NULL;
9871
9872 array = bspextra->annotDescByID;
9873 num = bspextra->numannotdesc;
9874 if (array == NULL || num < 1) return NULL;
9875
9876 if (bsp != NULL && bsp->idx.entityID > 0) {
9877 entityID = bsp->idx.entityID;
9878 } else {
9879 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9880 }
9881
9882 i = context->index;
9883
9884 /* now look for next appropriate annotdesc */
9885
9886 while (i < (Uint4) num) {
9887 item = array [i];
9888 if (item != NULL && item->extended != 0) {
9889 ovp = (ObjValNodePtr) item;
9890 i++;
9891 annotdesctype = item->choice;
9892 if (annotDescChoice == 0 || annotdesctype == annotDescChoice) {
9893 context->entityID = entityID;
9894 context->itemID = ovp->idx.itemID;
9895 context->adp = item;
9896 context->annotdesctype = annotdesctype;
9897 context->userdata = NULL;
9898 context->omdp = (Pointer) omdp;
9899 context->index = i;
9900 return item;
9901 }
9902 }
9903 }
9904
9905 return NULL;
9906 }
9907
9908 /*****************************************************************************
9909 *
9910 * SeqMgrExploreBioseqs, SeqMgrExploreSegments, SeqMgrExploreDescriptors,
9911 * SeqMgrExploreFeatures, SeqMgrVisitDescriptors, and SeqMgrVisitFeatures
9912 *
9913 *****************************************************************************/
9914
JustExamineBioseqs(SeqEntryPtr sep,BioseqSetPtr bssp,SeqMgrBioseqContextPtr context,SeqMgrBioseqExploreProc userfunc,Boolean nucs,Boolean prots,Boolean parts,Int4Ptr count)9915 static Boolean JustExamineBioseqs (SeqEntryPtr sep, BioseqSetPtr bssp,
9916 SeqMgrBioseqContextPtr context,
9917 SeqMgrBioseqExploreProc userfunc,
9918 Boolean nucs, Boolean prots, Boolean parts,
9919 Int4Ptr count)
9920
9921 {
9922 BioseqPtr bsp;
9923 BioseqExtraPtr bspextra;
9924 ObjMgrDataPtr omdp;
9925
9926 if (sep == NULL || context == NULL || userfunc == NULL) return FALSE;
9927
9928 if (IS_Bioseq (sep)) {
9929 bsp = (BioseqPtr) sep->data.ptrvalue;
9930 if (bsp == NULL) return TRUE;
9931
9932 /* check for desired molecule type */
9933
9934 if (ISA_na (bsp->mol) && (! nucs)) return TRUE;
9935 if (ISA_aa (bsp->mol) && (! prots)) return TRUE;
9936
9937 omdp = SeqMgrGetOmdpForBioseq (bsp);
9938 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return TRUE;
9939 bspextra = (BioseqExtraPtr) omdp->extradata;
9940 if (bspextra == NULL) return TRUE;
9941
9942 context->itemID = bspextra->bspItemID;
9943 context->bsp = bsp;
9944 context->sep = sep;
9945 context->bssp = bssp;
9946 context->numsegs = bspextra->numsegs;
9947 context->omdp = omdp;
9948 (context->index)++;
9949
9950 if (count != NULL) {
9951 (*count)++;
9952 }
9953 /* continue until user function returns FALSE, then exit all recursions */
9954
9955 if (! userfunc (bsp, context)) return FALSE;
9956 return TRUE;
9957 }
9958
9959 if (IS_Bioseq_set (sep)) {
9960 bssp = (BioseqSetPtr) sep->data.ptrvalue;
9961 if (bssp == NULL) return TRUE;
9962
9963 /* check to see if parts should be explored */
9964
9965 if (bssp->_class == BioseqseqSet_class_parts) {
9966 if (! parts) return TRUE;
9967
9968 /* within the parts set we want to see individual component bioseqs */
9969
9970 nucs = TRUE;
9971 prots = TRUE;
9972 }
9973
9974 /* recursively explore bioseq set until user function returns FALSE */
9975
9976 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
9977 if (! JustExamineBioseqs (sep, bssp, context, userfunc, nucs, prots, parts, count)) return FALSE;
9978 }
9979 }
9980
9981 return TRUE;
9982 }
9983
SeqMgrExploreBioseqs(Uint2 entityID,Pointer ptr,Pointer userdata,SeqMgrBioseqExploreProc userfunc,Boolean nucs,Boolean prots,Boolean parts)9984 NLM_EXTERN Int4 LIBCALL SeqMgrExploreBioseqs (Uint2 entityID, Pointer ptr, Pointer userdata,
9985 SeqMgrBioseqExploreProc userfunc,
9986 Boolean nucs, Boolean prots, Boolean parts)
9987
9988 {
9989 SeqMgrBioseqContext context;
9990 Int4 count = 0;
9991 SeqEntryPtr sep;
9992
9993 if (entityID == 0) {
9994 entityID = ObjMgrGetEntityIDForPointer (ptr);
9995 }
9996 if (entityID == 0) return 0;
9997 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
9998 if (sep == NULL) return 0;
9999 if (userfunc == NULL) return 0;
10000
10001 context.entityID = entityID;
10002 context.index = 0;
10003 context.userdata = userdata;
10004
10005 /* recursive call to explore SeqEntry and pass appropriate bioseqs to user */
10006
10007 JustExamineBioseqs (sep, NULL, &context, userfunc, nucs, prots, parts, &count);
10008
10009 return count;
10010 }
10011
SeqMgrExploreSegments(BioseqPtr bsp,Pointer userdata,SeqMgrSegmentExploreProc userfunc)10012 NLM_EXTERN Int4 LIBCALL SeqMgrExploreSegments (BioseqPtr bsp, Pointer userdata,
10013 SeqMgrSegmentExploreProc userfunc)
10014
10015 {
10016 BioseqExtraPtr bspextra;
10017 SeqMgrSegmentContext context;
10018 Int4 count = 0;
10019 Uint2 entityID;
10020 Uint4 i;
10021 ObjMgrDataPtr omdp;
10022 SMSeqIdxPtr PNTR partsByLoc;
10023 SMSeqIdxPtr segpartptr;
10024 SeqLocPtr slp;
10025
10026 if (bsp == NULL) return 0;
10027 if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) return 0;
10028 omdp = SeqMgrGetOmdpForBioseq (bsp);
10029 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
10030 if (userfunc == NULL) return 0;
10031 entityID = bsp->idx.entityID;
10032 if (entityID < 1) {
10033 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
10034 }
10035
10036 bspextra = (BioseqExtraPtr) omdp->extradata;
10037 if (bspextra == NULL) return 0;
10038 partsByLoc = bspextra->partsByLoc;
10039 if (partsByLoc == NULL || bspextra->numsegs < 1) return 0;
10040
10041 for (i = 0; i < (Uint4) bspextra->numsegs; i++) {
10042 segpartptr = partsByLoc [i];
10043 if (segpartptr != NULL) {
10044 slp = segpartptr->slp;
10045 context.entityID = entityID;
10046 context.itemID = segpartptr->itemID;
10047 context.slp = slp;
10048 context.parent = segpartptr->parentBioseq;
10049 context.cumOffset = segpartptr->cumOffset;
10050 context.from = segpartptr->from;
10051 context.to = segpartptr->to;
10052 context.strand = segpartptr->strand;
10053 context.userdata = userdata;
10054 context.omdp = (Pointer) omdp;
10055 context.index = i + 1;
10056
10057 count++;
10058
10059 if (! userfunc (slp, &context)) return count;
10060 }
10061 }
10062
10063 return count;
10064 }
10065
SeqMgrExploreDescriptors(BioseqPtr bsp,Pointer userdata,SeqMgrDescExploreProc userfunc,BoolPtr seqDescFilter)10066 NLM_EXTERN Int4 LIBCALL SeqMgrExploreDescriptors (BioseqPtr bsp, Pointer userdata,
10067 SeqMgrDescExploreProc userfunc,
10068 BoolPtr seqDescFilter)
10069
10070 {
10071 BioseqSetPtr bssp;
10072 SeqMgrDescContext context;
10073 Int4 count = 0;
10074 Uint2 entityID;
10075 Uint4 itemID;
10076 ObjMgrDataPtr omdp;
10077 ValNodePtr sdp;
10078 SeqEntryPtr sep;
10079
10080 if (bsp == NULL) return 0;
10081 omdp = SeqMgrGetOmdpForBioseq (bsp);
10082 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
10083 if (userfunc == NULL) return 0;
10084 entityID = bsp->idx.entityID;
10085 if (entityID < 1) {
10086 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
10087 }
10088
10089 context.index = 0;
10090 context.level = 0;
10091 while (omdp != NULL) {
10092 itemID = omdp->lastDescrItemID;
10093 sdp = NULL;
10094 switch (omdp->datatype) {
10095 case OBJ_BIOSEQ :
10096 bsp = (BioseqPtr) omdp->dataptr;
10097 sdp = bsp->descr;
10098 break;
10099 case OBJ_BIOSEQSET :
10100 bssp = (BioseqSetPtr) omdp->dataptr;
10101 sdp = bssp->descr;
10102 break;
10103 default :
10104 break;
10105 }
10106
10107 sep = ObjMgrGetChoiceForData (omdp->dataptr);
10108
10109 /* call for every appropriate descriptor in current chain */
10110
10111 while (sdp != NULL) {
10112 itemID++;
10113 if (seqDescFilter == NULL || seqDescFilter [sdp->choice]) {
10114 context.entityID = entityID;
10115 context.itemID = itemID;
10116 context.sdp = sdp;
10117 context.sep = sep;
10118 context.seqdesctype = sdp->choice;
10119 context.userdata = userdata;
10120 context.omdp = (Pointer) omdp;
10121 (context.index)++;
10122
10123 count++;
10124
10125 if (! userfunc (sdp, &context)) return count;
10126 }
10127 sdp = sdp->next;
10128 }
10129
10130 /* now go up omdp chain looking for next descriptor */
10131
10132 omdp = SeqMgrGetOmdpForPointer (omdp->parentptr);
10133 (context.level)++;
10134 }
10135 return count;
10136 }
10137
SeqMgrExploreFeaturesInt(BioseqPtr bsp,Pointer userdata,SeqMgrFeatExploreProc userfunc,SeqLocPtr locationFilter,BoolPtr seqFeatFilter,BoolPtr featDefFilter,Boolean doreverse)10138 static Int4 LIBCALL SeqMgrExploreFeaturesInt (BioseqPtr bsp, Pointer userdata,
10139 SeqMgrFeatExploreProc userfunc,
10140 SeqLocPtr locationFilter,
10141 BoolPtr seqFeatFilter,
10142 BoolPtr featDefFilter,
10143 Boolean doreverse)
10144
10145 {
10146 BioseqExtraPtr bspextra;
10147 SeqMgrFeatContext context;
10148 Int4 count = 0;
10149 Uint2 entityID;
10150 SMFeatItemPtr PNTR featsByID;
10151 SMFeatItemPtr PNTR featsByPos;
10152 SMFeatItemPtr PNTR featsByRev;
10153 Uint4 i;
10154 SMFeatItemPtr item;
10155 Int4 left = INT4_MIN;
10156 ObjMgrDataPtr omdp;
10157 Int4 right = INT4_MAX;
10158 Uint1 seqfeattype;
10159 SeqFeatPtr sfp;
10160 Uint4 start = 0;
10161 Int4 tmp;
10162
10163 if (bsp == NULL) return 0;
10164 omdp = SeqMgrGetOmdpForBioseq (bsp);
10165 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
10166 if (userfunc == NULL) return 0;
10167 entityID = bsp->idx.entityID;
10168 if (entityID < 1) {
10169 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
10170 }
10171
10172 bspextra = (BioseqExtraPtr) omdp->extradata;
10173 if (bspextra == NULL) return 0;
10174
10175 if (doreverse) {
10176 if (bspextra->featsByRev == NULL) {
10177
10178 /* index by reverse position if not already done */
10179
10180 featsByRev = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (bspextra->numfeats + 1));
10181 bspextra->featsByRev = featsByRev;
10182
10183 if (featsByRev != NULL) {
10184 featsByID = bspextra->featsByID;
10185 for (i = 0; i < (Uint4) bspextra->numfeats; i++) {
10186 featsByRev [i] = featsByID [i];
10187 }
10188
10189 /* sort all features by feature reverse location on bioseq */
10190
10191 StableMergeSort ((VoidPtr) featsByRev, (size_t) bspextra->numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByRev);
10192 }
10193 }
10194
10195 featsByPos = bspextra->featsByRev;
10196 } else {
10197 featsByPos = bspextra->featsByPos;
10198 }
10199 if (featsByPos == NULL || bspextra->numfeats < 1) return 0;
10200
10201 if (locationFilter != NULL) {
10202 left = GetOffsetInBioseq (locationFilter, bsp, SEQLOC_LEFT_END);
10203 if (left == -1) left = INT4_MIN;
10204 right = GetOffsetInBioseq (locationFilter, bsp, SEQLOC_RIGHT_END);
10205 if (right == -1) right = INT4_MAX;
10206
10207 /* if far segmented or delta, and location (from explore
10208 segments) is minus strand, will need to swap */
10209
10210 if (left > right) {
10211 tmp = left;
10212 left = right;
10213 right = tmp;
10214 }
10215
10216 /*
10217 binary search to leftmost candidate would need featsByPos array
10218 variant sorted primarily by rightmost position, so comment this
10219 out for now, resurrect and add new array only if it turns out to
10220 be necessary when we support entrez fetch subrecord by location
10221 */
10222
10223 /*
10224 L = 0;
10225 R = bspextra->numfeats - 1;
10226 while (L < R) {
10227 mid = (L + R) / 2;
10228 item = featsByPos [mid];
10229 if (item != NULL && item->right < left) {
10230 L = mid + 1;
10231 } else {
10232 R = mid;
10233 }
10234 }
10235
10236 start = R;
10237 */
10238 }
10239
10240 /* call for every appropriate feature in sorted list */
10241
10242 for (i = start; i < (Uint4) bspextra->numfeats; i++) {
10243 item = featsByPos [i];
10244 if (item != NULL) {
10245
10246 /* can exit once past rightmost limit */
10247
10248 if (locationFilter != NULL && (! doreverse) && item->left > right) return count;
10249 if (locationFilter != NULL && (doreverse) && item->right < left) return count;
10250
10251 sfp = item->sfp;
10252 if (sfp != NULL) {
10253 seqfeattype = sfp->data.choice;
10254 } else {
10255 seqfeattype = FindFeatFromFeatDefType (item->subtype);
10256 }
10257 if ((seqFeatFilter == NULL || seqFeatFilter [seqfeattype]) &&
10258 (featDefFilter == NULL || featDefFilter [item->subtype]) &&
10259 (locationFilter == NULL || (item->right > left && item->left <= right)) &&
10260 (! item->ignore)) {
10261 context.entityID = entityID;
10262 context.itemID = item->itemID;
10263 context.sfp = sfp;
10264 context.sap = item->sap;
10265 context.bsp = item->bsp;
10266 context.label = item->label;
10267 context.left = item->left;
10268 context.right = item->right;
10269 context.dnaStop = item->dnaStop;
10270 context.partialL = item->partialL;
10271 context.partialR = item->partialR;
10272 context.external = item->external;
10273 context.farloc = item->farloc;
10274 context.bad_order = item->bad_order;
10275 context.mixed_strand = item->mixed_strand;
10276 context.strand = item->strand;
10277 context.seqfeattype = seqfeattype;
10278 context.featdeftype = item->subtype;
10279 context.numivals = item->numivals;
10280 context.ivals = item->ivals;
10281 context.userdata = userdata;
10282 context.omdp = (Pointer) omdp;
10283 context.index = item->index + 1;
10284
10285 count++;
10286
10287 if (! userfunc (sfp, &context)) return count;
10288 }
10289 }
10290 }
10291 return count;
10292 }
10293
SeqMgrExploreFeatures(BioseqPtr bsp,Pointer userdata,SeqMgrFeatExploreProc userfunc,SeqLocPtr locationFilter,BoolPtr seqFeatFilter,BoolPtr featDefFilter)10294 NLM_EXTERN Int4 LIBCALL SeqMgrExploreFeatures (BioseqPtr bsp, Pointer userdata,
10295 SeqMgrFeatExploreProc userfunc,
10296 SeqLocPtr locationFilter,
10297 BoolPtr seqFeatFilter,
10298 BoolPtr featDefFilter)
10299
10300 {
10301 return SeqMgrExploreFeaturesInt (bsp, userdata, userfunc, locationFilter, seqFeatFilter, featDefFilter, FALSE);
10302 }
10303
SeqMgrExploreFeaturesRev(BioseqPtr bsp,Pointer userdata,SeqMgrFeatExploreProc userfunc,SeqLocPtr locationFilter,BoolPtr seqFeatFilter,BoolPtr featDefFilter)10304 NLM_EXTERN Int4 LIBCALL SeqMgrExploreFeaturesRev (BioseqPtr bsp, Pointer userdata,
10305 SeqMgrFeatExploreProc userfunc,
10306 SeqLocPtr locationFilter,
10307 BoolPtr seqFeatFilter,
10308 BoolPtr featDefFilter)
10309
10310 {
10311 return SeqMgrExploreFeaturesInt (bsp, userdata, userfunc, locationFilter, seqFeatFilter, featDefFilter, TRUE);
10312 }
10313
VisitDescriptorsPerSeqEntry(Uint2 entityID,SeqEntryPtr sep,Pointer userdata,SeqMgrDescExploreProc userfunc,BoolPtr seqDescFilter)10314 static Int2 VisitDescriptorsPerSeqEntry (Uint2 entityID, SeqEntryPtr sep,
10315 Pointer userdata, SeqMgrDescExploreProc userfunc,
10316 BoolPtr seqDescFilter)
10317
10318 {
10319 BioseqPtr bsp;
10320 BioseqSetPtr bssp = NULL;
10321 Uint2 count = 0;
10322 SeqMgrDescContext context;
10323 Uint4 itemID;
10324 ObjMgrDataPtr omdp = NULL;
10325 ValNodePtr sdp = NULL;
10326 SeqEntryPtr tmp;
10327
10328 if (sep != NULL) {
10329 if (IS_Bioseq (sep)) {
10330 bsp = (BioseqPtr) sep->data.ptrvalue;
10331 if (bsp == NULL) return 0;
10332 omdp = SeqMgrGetOmdpForBioseq (bsp);
10333 sdp = bsp->descr;
10334 } else if (IS_Bioseq_set (sep)) {
10335 bssp = (BioseqSetPtr) sep->data.ptrvalue;
10336 if (bssp == NULL) return 0;
10337 omdp = SeqMgrGetOmdpForPointer (bssp);
10338 sdp = bssp->descr;
10339 }
10340 }
10341 if (omdp == NULL) return 0;
10342 itemID = omdp->lastDescrItemID;
10343
10344 context.index = 0;
10345 context.level = 0;
10346
10347 while (sdp != NULL) {
10348 itemID++;
10349 if (seqDescFilter == NULL || seqDescFilter [sdp->choice]) {
10350 context.entityID = entityID;
10351 context.itemID = itemID;
10352 context.sdp = sdp;
10353 context.sep = sep;
10354 context.seqdesctype = sdp->choice;
10355 context.userdata = userdata;
10356 context.omdp = (Pointer) omdp;
10357
10358 count++;
10359
10360 if (! userfunc (sdp, &context)) return count;
10361 }
10362 sdp = sdp->next;
10363 }
10364
10365 if (bssp != NULL) {
10366 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
10367 count += VisitDescriptorsPerSeqEntry (entityID, tmp, userdata, userfunc, seqDescFilter);
10368 }
10369 }
10370
10371 return count;
10372 }
10373
SeqMgrVisitDescriptors(Uint2 entityID,Pointer userdata,SeqMgrDescExploreProc userfunc,BoolPtr seqDescFilter)10374 NLM_EXTERN Int2 LIBCALL SeqMgrVisitDescriptors (Uint2 entityID, Pointer userdata,
10375 SeqMgrDescExploreProc userfunc,
10376 BoolPtr seqDescFilter)
10377
10378 {
10379 SeqEntryPtr sep;
10380
10381 if (entityID < 1 || userfunc == NULL) return 0;
10382 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
10383 if (sep == NULL) return 0;
10384
10385 return VisitDescriptorsPerSeqEntry (entityID, sep, userdata, userfunc, seqDescFilter);
10386 }
10387
SeqMgrVisitFeatures(Uint2 entityID,Pointer userdata,SeqMgrFeatExploreProc userfunc,BoolPtr seqFeatFilter,BoolPtr featDefFilter)10388 NLM_EXTERN Int2 LIBCALL SeqMgrVisitFeatures (Uint2 entityID, Pointer userdata,
10389 SeqMgrFeatExploreProc userfunc,
10390 BoolPtr seqFeatFilter, BoolPtr featDefFilter)
10391
10392 {
10393 BioseqExtraPtr bspextra;
10394 SeqMgrFeatContext context;
10395 Int2 count = 0;
10396 SMFeatItemPtr PNTR featsByID;
10397 Uint2 i;
10398 SMFeatItemPtr item;
10399 ObjMgrDataPtr omdp;
10400 Uint1 seqfeattype;
10401 SeqFeatPtr sfp;
10402
10403 omdp = ObjMgrGetData (entityID);
10404 if (omdp == NULL) return 0;
10405 if (userfunc == NULL) return 0;
10406
10407 bspextra = (BioseqExtraPtr) omdp->extradata;
10408 if (bspextra == NULL) return 0;
10409 featsByID = bspextra->featsByID;
10410 if (featsByID == NULL || bspextra->numfeats < 1) return 0;
10411
10412 /* call for every appropriate feature in itemID order */
10413
10414 for (i = 0; i < bspextra->numfeats; i++) {
10415 item = featsByID [i];
10416 if (item != NULL) {
10417
10418 sfp = item->sfp;
10419 if (sfp != NULL) {
10420 seqfeattype = sfp->data.choice;
10421 } else {
10422 seqfeattype = FindFeatFromFeatDefType (item->subtype);
10423 }
10424 if ((seqFeatFilter == NULL || seqFeatFilter [seqfeattype]) &&
10425 (featDefFilter == NULL || featDefFilter [item->subtype]) &&
10426 (! item->ignore)) {
10427 context.entityID = entityID;
10428 context.itemID = item->itemID;
10429 context.sfp = sfp;
10430 context.sap = item->sap;
10431 context.bsp = item->bsp;
10432 context.label = item->label;
10433 context.left = item->left;
10434 context.right = item->right;
10435 context.dnaStop = item->dnaStop;
10436 context.partialL = item->partialL;
10437 context.partialR = item->partialR;
10438 context.external = item->external;
10439 context.farloc = item->farloc;
10440 context.bad_order = item->bad_order;
10441 context.mixed_strand = item->mixed_strand;
10442 context.strand = item->strand;
10443 context.seqfeattype = seqfeattype;
10444 context.featdeftype = item->subtype;
10445 context.numivals = item->numivals;
10446 context.ivals = item->ivals;
10447 context.userdata = userdata;
10448 context.omdp = (Pointer) omdp;
10449 context.index = 0;
10450
10451 count++;
10452
10453 if (! userfunc (sfp, &context)) return count;
10454 }
10455 }
10456 }
10457 return count;
10458 }
10459
10460 /*****************************************************************************
10461 *
10462 * SeqMgrMapPartToSegmentedBioseq can speed up sequtil's CheckPointInBioseq
10463 * for indexed part bioseq to segmented bioseq mapping
10464 *
10465 *****************************************************************************/
10466
BinarySearchPartToSegmentMap(BioseqPtr in,Int4 pos,BioseqPtr bsp,SeqIdPtr sip,Boolean relaxed,Int4 from,Int4 to)10467 static SMSeqIdxPtr BinarySearchPartToSegmentMap (BioseqPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr sip, Boolean relaxed, Int4 from, Int4 to)
10468
10469 {
10470 BioseqExtraPtr bspextra;
10471 Char buf [128];
10472 Int2 compare;
10473 ObjMgrDataPtr omdp;
10474 SMSeqIdxPtr PNTR partsBySeqId;
10475 SMSeqIdxPtr segpartptr;
10476 CharPtr seqIdOfPart;
10477 Int4 L, R, mid;
10478
10479 if (in == NULL) return NULL;
10480 omdp = SeqMgrGetOmdpForBioseq (in);
10481 if (omdp == NULL) return NULL;
10482 bspextra = (BioseqExtraPtr) omdp->extradata;
10483 if (bspextra == NULL) return NULL;
10484
10485 partsBySeqId = bspextra->partsBySeqId;
10486 if (partsBySeqId == NULL || bspextra->numsegs < 1) return NULL;
10487
10488 if (bsp != NULL) {
10489 sip = bsp->id;
10490 }
10491 if (sip == NULL) return NULL;
10492
10493 /* binary search into array on segmented bioseq sorted by part seqID (reversed) string */
10494
10495 while (sip != NULL) {
10496 if (MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) {
10497 L = 0;
10498 R = bspextra->numsegs - 1;
10499 while (L < R) {
10500 mid = (L + R) / 2;
10501 segpartptr = partsBySeqId [mid];
10502 compare = StringCmp (segpartptr->seqIdOfPart, buf);
10503 if (compare < 0) {
10504 L = mid + 1;
10505 } else {
10506 R = mid;
10507 }
10508 }
10509
10510 /* loop through all components with same seqID, get appropriate segment */
10511
10512 segpartptr = partsBySeqId [R];
10513 seqIdOfPart = segpartptr->seqIdOfPart;
10514 while (R < bspextra->numsegs && StringCmp (seqIdOfPart, buf) == 0) {
10515 if (relaxed) {
10516
10517 /* for genome mapping of portion not included in contig */
10518
10519 if ((from >= segpartptr->from && from <= segpartptr->to) ||
10520 (to >= segpartptr->from && to <= segpartptr->to) ||
10521 (from < segpartptr->from && to > segpartptr->to) ||
10522 (to < segpartptr->from && from > segpartptr->to)) {
10523
10524 return segpartptr;
10525 }
10526
10527 } else if (pos >= segpartptr->from && pos <= segpartptr->to) {
10528
10529 /* otherwise only map portion included in contig */
10530
10531 return segpartptr;
10532 }
10533
10534 R++;
10535 if (R < bspextra->numsegs) {
10536 segpartptr = partsBySeqId [R];
10537 seqIdOfPart = segpartptr->seqIdOfPart;
10538 } else {
10539 seqIdOfPart = NULL;
10540 }
10541 }
10542 }
10543 sip = sip->next;
10544 }
10545
10546 return NULL;
10547 }
10548
GenomePartToSegmentMap(BioseqPtr in,BioseqPtr bsp,Int4 from,Int4 to)10549 NLM_EXTERN SMSeqIdxPtr GenomePartToSegmentMap (BioseqPtr in, BioseqPtr bsp, Int4 from, Int4 to)
10550
10551 {
10552 return BinarySearchPartToSegmentMap (in, 0, bsp, NULL, TRUE, from, to);
10553 }
10554
SeqMgrMapPartToSegmentedBioseq(BioseqPtr in,Int4 pos,BioseqPtr bsp,SeqIdPtr sip,BoolPtr flip_strand,Boolean relaxed)10555 NLM_EXTERN Int4 LIBCALL SeqMgrMapPartToSegmentedBioseq (BioseqPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr sip, BoolPtr flip_strand, Boolean relaxed)
10556
10557 {
10558 BioseqExtraPtr bspextra;
10559 SMSeqIdxPtr currp;
10560 SMSeqIdxPtr nextp;
10561 ObjMgrDataPtr omdp;
10562 SMSeqIdxPtr segpartptr;
10563
10564 if (in == NULL) return -1;
10565 if (flip_strand != NULL) {
10566 *flip_strand = FALSE;
10567 }
10568
10569 /* first check to see if part has been loaded and single map up block installed */
10570
10571 if (bsp != NULL) {
10572 omdp = SeqMgrGetOmdpForBioseq (bsp);
10573 if (omdp != NULL) {
10574 bspextra = (BioseqExtraPtr) omdp->extradata;
10575 if (bspextra != NULL) {
10576
10577 /* no need for partsByLoc or partsBySeqId arrays, just use segparthead linked list */
10578
10579 for (segpartptr = bspextra->segparthead; segpartptr != NULL; segpartptr = segpartptr->next) {
10580 if (segpartptr->parentBioseq == in) {
10581 if (pos >= segpartptr->from && pos <= segpartptr->to) {
10582
10583 /* success, immediate return with mapped up value */
10584
10585 if (segpartptr->strand == Seq_strand_minus) {
10586 if (flip_strand != NULL) {
10587 *flip_strand = FALSE;
10588 }
10589 return segpartptr->cumOffset + (segpartptr->to - pos);
10590 } else {
10591 return segpartptr->cumOffset + (pos - segpartptr->from);
10592 }
10593 }
10594 }
10595 }
10596 }
10597 }
10598 }
10599
10600 /* otherwise do binary search on segmented bioseq mapping data */
10601
10602 segpartptr = BinarySearchPartToSegmentMap (in, pos, bsp, sip, relaxed, 0, 0);
10603 if (segpartptr == NULL) return -1;
10604
10605 if (relaxed || (pos >= segpartptr->from && pos <= segpartptr->to)) {
10606
10607 /* install map up block on part, if it has been loaded, to speed up next search */
10608
10609 if (bsp != NULL) {
10610 omdp = SeqMgrGetOmdpForBioseq (bsp);
10611 if (omdp != NULL) {
10612 bspextra = (BioseqExtraPtr) omdp->extradata;
10613 if (bspextra == NULL) {
10614 CreateBioseqExtraBlock (omdp, bsp);
10615 bspextra = (BioseqExtraPtr) omdp->extradata;
10616 }
10617 if (bspextra != NULL) {
10618
10619 /* clean up any old map up info on part */
10620
10621 for (currp = bspextra->segparthead; currp != NULL; currp = nextp) {
10622 nextp = currp->next;
10623 SeqLocFree (currp->slp);
10624 MemFree (currp->seqIdOfPart);
10625 MemFree (currp);
10626 }
10627 bspextra->segparthead = NULL;
10628 bspextra->numsegs = 0;
10629 bspextra->partsByLoc = MemFree (bspextra->partsByLoc);
10630 bspextra->partsBySeqId = MemFree (bspextra->partsBySeqId);
10631
10632 /* allocate single map up block */
10633
10634 currp = MemNew (sizeof (SMSeqIdx));
10635 if (currp != NULL) {
10636 currp->slp = AsnIoMemCopy (segpartptr->slp,
10637 (AsnReadFunc) SeqLocAsnRead,
10638 (AsnWriteFunc) SeqLocAsnWrite);
10639 currp->seqIdOfPart = StringSave (segpartptr->seqIdOfPart);
10640 currp->parentBioseq = segpartptr->parentBioseq;
10641 currp->cumOffset = segpartptr->cumOffset;
10642 currp->from = segpartptr->from;
10643 currp->to = segpartptr->to;
10644 currp->strand = segpartptr->strand;
10645 }
10646
10647 /* add new map up block to part */
10648
10649 bspextra->segparthead = currp;
10650 }
10651 }
10652 }
10653
10654 /* now return offset result */
10655
10656 if (segpartptr->strand == Seq_strand_minus) {
10657 if (flip_strand != NULL) {
10658 *flip_strand = TRUE;
10659 }
10660 return segpartptr->cumOffset + (segpartptr->to - pos);
10661 } else {
10662 return segpartptr->cumOffset + (pos - segpartptr->from);
10663 }
10664 }
10665 return -1;
10666 }
10667
10668 /*****************************************************************************
10669 *
10670 * TrimLocInSegment takes a location on an indexed far segmented part and trims
10671 * trims it to the region referred to by the parent segmented or delta bioseq.
10672 *
10673 * Only implemented for seqloc_int components, not seqloc_point
10674 *
10675 *****************************************************************************/
10676
TrimLocInSegment(BioseqPtr master,SeqLocPtr location,BoolPtr p5ptr,BoolPtr p3ptr)10677 NLM_EXTERN SeqLocPtr TrimLocInSegment (
10678 BioseqPtr master,
10679 SeqLocPtr location,
10680 BoolPtr p5ptr,
10681 BoolPtr p3ptr
10682 )
10683
10684 {
10685 BioseqPtr bsp;
10686 BioseqExtraPtr bspextra;
10687 Char buf [128];
10688 Int2 compare;
10689 ObjMgrDataPtr omdp;
10690 Boolean partial5;
10691 Boolean partial3;
10692 SMSeqIdxPtr PNTR partsBySeqId;
10693 SeqLocPtr rsult = NULL;
10694 SMSeqIdxPtr segpartptr;
10695 CharPtr seqIdOfPart;
10696 SeqIdPtr sip;
10697 SeqIntPtr sint;
10698 SeqLocPtr slp;
10699 Uint1 strand;
10700 Int4 L, R, mid;
10701 Int4 start, stop, swap;
10702
10703 if (master == NULL || location == NULL) return NULL;
10704
10705 omdp = SeqMgrGetOmdpForBioseq (master);
10706 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
10707 bspextra = (BioseqExtraPtr) omdp->extradata;
10708 if (bspextra == NULL) return NULL;
10709
10710 partsBySeqId = bspextra->partsBySeqId;
10711 if (partsBySeqId == NULL || bspextra->numsegs < 1) return NULL;
10712
10713 partial5 = FALSE;
10714 partial3 = FALSE;
10715
10716 if (p5ptr != NULL) {
10717 partial5 = *p5ptr;
10718 }
10719 if (p3ptr != NULL) {
10720 partial3 = *p3ptr;
10721 }
10722
10723 for (slp = SeqLocFindNext (location, NULL);
10724 slp != NULL;
10725 slp = SeqLocFindNext (location, slp)) {
10726 if (slp->choice != SEQLOC_INT) continue;
10727 sint = (SeqIntPtr) slp->data.ptrvalue;
10728 if (sint == NULL) continue;
10729 strand = sint->strand;
10730
10731 bsp = BioseqFind (sint->id);
10732 if (bsp == NULL) continue;
10733
10734 for (sip = bsp->id; sip != NULL; sip = sip->next) {
10735 if (! MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) continue;
10736
10737 L = 0;
10738 R = bspextra->numsegs - 1;
10739 while (L < R) {
10740 mid = (L + R) / 2;
10741 segpartptr = partsBySeqId [mid];
10742 compare = StringCmp (segpartptr->seqIdOfPart, buf);
10743 if (compare < 0) {
10744 L = mid + 1;
10745 } else {
10746 R = mid;
10747 }
10748 }
10749
10750 segpartptr = partsBySeqId [R];
10751 seqIdOfPart = segpartptr->seqIdOfPart;
10752
10753 while (R < bspextra->numsegs && StringCmp (seqIdOfPart, buf) == 0) {
10754
10755 start = sint->from;
10756 stop = sint->to;
10757
10758 if ((sint->from <= segpartptr->from && sint->to > segpartptr->from) ||
10759 (sint->from < segpartptr->to && sint->to >= segpartptr->to)) {
10760
10761 if (sint->from < segpartptr->from) {
10762 start = segpartptr->from;
10763 if (strand == Seq_strand_minus || strand == Seq_strand_both_rev) {
10764 partial3 = TRUE;
10765 } else {
10766 partial5 = TRUE;
10767 }
10768 }
10769 if (sint->to > segpartptr->to) {
10770 stop = segpartptr->to;
10771 if (strand == Seq_strand_minus || strand == Seq_strand_both_rev) {
10772 partial5 = TRUE;
10773 } else {
10774 partial3 = TRUE;
10775 }
10776 }
10777
10778 if (strand == Seq_strand_minus || strand == Seq_strand_both_rev) {
10779 swap = start;
10780 start = stop;
10781 stop = swap;
10782 }
10783
10784 rsult = AddIntervalToLocation (rsult, sint->id, start, stop, FALSE, FALSE);
10785 }
10786
10787 R++;
10788 if (R < bspextra->numsegs) {
10789 segpartptr = partsBySeqId [R];
10790 seqIdOfPart = segpartptr->seqIdOfPart;
10791 } else {
10792 seqIdOfPart = NULL;
10793 }
10794 }
10795 }
10796 }
10797
10798 if (p5ptr != NULL) {
10799 *p5ptr = partial5;
10800 }
10801 if (p3ptr != NULL) {
10802 *p3ptr = partial3;
10803 }
10804
10805 return rsult;
10806 }
10807
10808 /***************************/
10809
10810 static ValNodePtr smp_requested_uid_list = NULL;
10811 static TNlmMutex smp_requested_uid_mutex = NULL;
10812
10813 static ValNodePtr smp_locked_bsp_list = NULL;
10814 static TNlmMutex smp_locked_bsp_mutex = NULL;
10815
AddBspToList(BioseqPtr bsp)10816 static void AddBspToList (
10817 BioseqPtr bsp
10818 )
10819
10820 {
10821 Int4 ret;
10822 ValNodePtr vnp;
10823
10824 if (bsp == NULL) return;
10825
10826 ret = NlmMutexLockEx (&smp_locked_bsp_mutex);
10827 if (ret) {
10828 ErrPostEx (SEV_FATAL, 0, 0, "AddBspToList mutex failed [%ld]", (long) ret);
10829 return;
10830 }
10831
10832 vnp = ValNodeAddPointer (NULL, 0, (Pointer) bsp);
10833 if (vnp != NULL) {
10834 vnp->next = smp_locked_bsp_list;
10835 smp_locked_bsp_list = vnp;
10836 }
10837
10838 NlmMutexUnlock (smp_locked_bsp_mutex);
10839 }
10840
RemoveUidFromQueue(void)10841 static Int4 RemoveUidFromQueue (
10842 void
10843 )
10844
10845 {
10846 Int4 ret, uid = 0;
10847 ValNodePtr vnp;
10848
10849 ret = NlmMutexLockEx (&smp_requested_uid_mutex);
10850 if (ret) {
10851 ErrPostEx (SEV_FATAL, 0, 0, "RemoveUidFromQueue mutex failed [%ld]", (long) ret);
10852 return 0;
10853 }
10854
10855 /* extract next requested uid from queue */
10856
10857 if (smp_requested_uid_list != NULL) {
10858 vnp = smp_requested_uid_list;
10859 smp_requested_uid_list = vnp->next;
10860 vnp->next = NULL;
10861 uid = (Int4) vnp->data.intvalue;
10862 ValNodeFree (vnp);
10863 }
10864
10865 NlmMutexUnlock (smp_requested_uid_mutex);
10866
10867 return uid;
10868 }
10869
DoAsyncLookup(VoidPtr arg)10870 static VoidPtr DoAsyncLookup (
10871 VoidPtr arg
10872 )
10873
10874 {
10875 BioseqPtr bsp;
10876 Int4 uid;
10877 ValNode vn;
10878
10879 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
10880
10881 uid = RemoveUidFromQueue ();
10882 while (uid > 0) {
10883
10884 vn.choice = SEQID_GI;
10885 vn.data.intvalue = uid;
10886 vn.next = NULL;
10887
10888 if (BioseqFindFunc (&vn, FALSE, FALSE, TRUE) == NULL) {
10889 bsp = BioseqLockByIdEx (&vn, FALSE);
10890 if (bsp != NULL) {
10891 AddBspToList (bsp);
10892 }
10893 }
10894
10895 uid = RemoveUidFromQueue ();
10896 }
10897
10898 return NULL;
10899 }
10900
10901 #define NUM_ASYNC_LOOKUP_THREADS 5
10902
LookupAndExtractBspListMT(ValNodePtr PNTR uidlistP)10903 static ValNodePtr LookupAndExtractBspListMT (
10904 ValNodePtr PNTR uidlistP
10905 )
10906
10907 {
10908 Int2 i;
10909 Int4 ret;
10910 VoidPtr status;
10911 ValNodePtr sublist = NULL;
10912 TNlmThread thds [NUM_ASYNC_LOOKUP_THREADS];
10913
10914 if (uidlistP == NULL || *uidlistP == NULL) return NULL;
10915
10916 ret = NlmMutexLockEx (&smp_requested_uid_mutex);
10917 if (ret) {
10918 ErrPostEx (SEV_FATAL, 0, 0, "add uid mutex failed [%ld]", (long) ret);
10919 return NULL;
10920 }
10921
10922 smp_requested_uid_list = *uidlistP;
10923 *uidlistP = NULL;
10924
10925 NlmMutexUnlock (smp_requested_uid_mutex);
10926
10927 /* spawn several threads for individual lock requests */
10928
10929 for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
10930 thds [i] = NlmThreadCreate (DoAsyncLookup, NULL);
10931 }
10932
10933 /* wait for all fetching threads to complete */
10934
10935 for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
10936 NlmThreadJoin (thds [i], &status);
10937 }
10938
10939 ret = NlmMutexLockEx (&smp_locked_bsp_mutex);
10940 if (ret) {
10941 ErrPostEx (SEV_FATAL, 0, 0, "get bsp mutex failed [%ld]", (long) ret);
10942 return NULL;
10943 }
10944
10945 sublist = smp_locked_bsp_list;
10946 smp_locked_bsp_list = NULL;
10947
10948 NlmMutexUnlock (smp_locked_bsp_mutex);
10949
10950 return sublist;
10951 }
10952
LookupAndExtractBspListST(ValNodePtr PNTR uidlistP,Boolean reindexIfBig)10953 static ValNodePtr LookupAndExtractBspListST (
10954 ValNodePtr PNTR uidlistP,
10955 Boolean reindexIfBig
10956 )
10957
10958 {
10959 BioseqPtr bsp;
10960 Uint2 entityID;
10961 SeqEntryPtr sep;
10962 SeqId si;
10963 ValNodePtr sublist = NULL, vnp, vnx;
10964 Int4 uid;
10965
10966 if (uidlistP == NULL || *uidlistP == NULL) return NULL;
10967
10968 MemSet ((Pointer) &si, 0, sizeof (SeqId));
10969
10970 /* record fetching loop */
10971
10972 for (vnp = *uidlistP; vnp != NULL; vnp = vnp->next) {
10973 uid = (Int4) vnp->data.intvalue;
10974 if (uid < 1) continue;
10975 si.choice = SEQID_GI;
10976 si.data.intvalue = uid;
10977
10978 if (BioseqFindFunc (&si, FALSE, TRUE, TRUE) != NULL) continue;
10979 bsp = BioseqLockByIdEx (&si, FALSE);
10980 if (bsp == NULL) continue;
10981
10982 if (reindexIfBig) {
10983 entityID = ObjMgrGetEntityIDForPointer (bsp);
10984 sep = GetTopSeqEntryForEntityID (entityID);
10985 if (sep != NULL && VisitBioseqsInSep (sep, NULL, NULL) > 2) {
10986 SeqMgrHoldIndexing (FALSE);
10987 ObjMgrClearHold ();
10988 ObjMgrSetHold ();
10989 SeqMgrHoldIndexing (TRUE);
10990 }
10991 }
10992
10993 vnx = ValNodeAddPointer (NULL, 0, (Pointer) bsp);
10994 if (vnx == NULL) continue;
10995 vnx->next = sublist;
10996 sublist = vnx;
10997 }
10998
10999 /* clean up input uidlist */
11000
11001 *uidlistP = ValNodeFree (*uidlistP);
11002
11003 return sublist;
11004 }
11005
LookupAndExtractBspList(ValNodePtr PNTR uidlistP,Boolean usethreads,Boolean reindexIfBig)11006 static ValNodePtr LookupAndExtractBspList (
11007 ValNodePtr PNTR uidlistP,
11008 Boolean usethreads,
11009 Boolean reindexIfBig
11010 )
11011
11012 {
11013 SeqEntryPtr oldsep;
11014 SeqId si;
11015 ValNodePtr sublist = NULL, vnp;
11016 Int4 uid;
11017
11018 if (uidlistP == NULL || *uidlistP == NULL) return NULL;
11019
11020 MemSet ((Pointer) &si, 0, sizeof (SeqId));
11021
11022 /* exclude any records already loaded anywhere in memory */
11023
11024 oldsep = SeqEntrySetScope (NULL);
11025 for (vnp = *uidlistP; vnp != NULL; vnp = vnp->next) {
11026 uid = (Int4) vnp->data.intvalue;
11027 if (uid < 1) continue;
11028 si.choice = SEQID_GI;
11029 si.data.intvalue = uid;
11030
11031 if (BioseqFindFunc (&si, FALSE, FALSE, TRUE) == NULL) continue;
11032 vnp->data.intvalue = 0;
11033 }
11034 SeqEntrySetScope (oldsep);
11035
11036 /* now do actual fetching */
11037
11038 if (usethreads) {
11039 sublist = LookupAndExtractBspListMT (uidlistP);
11040 } else {
11041 sublist = LookupAndExtractBspListST (uidlistP, reindexIfBig);
11042 }
11043
11044 return sublist;
11045 }
11046
SortUniqueCleanseUidList(ValNodePtr PNTR uidlistP,ValNodePtr PNTR bsplistP)11047 static void SortUniqueCleanseUidList (
11048 ValNodePtr PNTR uidlistP,
11049 ValNodePtr PNTR bsplistP
11050 )
11051
11052 {
11053 BioseqPtr bsp;
11054 Int4 j, len, L, R, mid, uid;
11055 SeqIdPtr sip;
11056 ValNodePtr PNTR uids;
11057 ValNodePtr vnp, vnx;
11058
11059 if (uidlistP == NULL || *uidlistP == NULL) return;
11060
11061 /* sort and unique uids to download */
11062
11063 *uidlistP = ValNodeSort (*uidlistP, SortByIntvalue);
11064 *uidlistP = UniqueIntValNode (*uidlistP);
11065
11066 if (bsplistP == NULL || *bsplistP == NULL) return;
11067
11068 /* zero out any uids already fetched in earlier loop */
11069
11070 len = ValNodeLen (*uidlistP);
11071 if (len == 0) return;
11072 uids = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * (len + 1));
11073 if (uids == NULL) return;
11074
11075 for (vnp = *uidlistP, j = 0; vnp != NULL; vnp = vnp->next, j++) {
11076 uids [j] = vnp;
11077 }
11078
11079 for (vnp = *bsplistP; vnp != NULL; vnp = vnp->next) {
11080 bsp = (BioseqPtr) vnp->data.ptrvalue;
11081 if (bsp == NULL) continue;
11082 uid = 0;
11083 for (sip = bsp->id; sip != NULL && uid == 0; sip = sip->next) {
11084 if (sip->choice != SEQID_GI) continue;
11085 uid = (Int4) sip->data.intvalue;
11086 }
11087 if (uid < 1) continue;
11088
11089 L = 0;
11090 R = len - 1;
11091
11092 while (L < R) {
11093 mid = (L + R) / 2;
11094 vnx = uids [mid];
11095 if (vnx != NULL && vnx->data.intvalue < uid) {
11096 L = mid + 1;
11097 } else {
11098 R = mid;
11099 }
11100 }
11101
11102 vnx = uids [R];
11103 if (vnx != NULL && vnx->data.intvalue == uid) {
11104 /* mark uid that is already loaded */
11105 vnx->choice = 1;
11106 }
11107 }
11108
11109 for (vnp = *uidlistP; vnp != NULL; vnp = vnp->next) {
11110 if (vnp->choice == 1) {
11111 /* clear out marked uids */
11112 vnp->data.intvalue = 0;
11113 }
11114 }
11115
11116 MemFree (uids);
11117 }
11118
11119 typedef struct iddata {
11120 ValNodePtr uidlist;
11121 ValNodePtr siplist;
11122 } IdLists, PNTR IdListsPtr;
11123
CollectAllSegments(SeqLocPtr slp,Pointer userdata)11124 static void CollectAllSegments (SeqLocPtr slp, Pointer userdata)
11125
11126 {
11127 BioseqPtr bsp;
11128 IdListsPtr ilp;
11129 SeqLocPtr loc;
11130 SeqIdPtr sip;
11131 TextSeqIdPtr tsip;
11132 BIG_ID uid = 0;
11133 ValNodePtr vnp;
11134
11135 if (slp == NULL || userdata == NULL) return;
11136 ilp = (IdListsPtr) userdata;
11137
11138 sip = SeqLocId (slp);
11139 if (sip == NULL) {
11140 loc = SeqLocFindNext (slp, NULL);
11141 if (loc != NULL) {
11142 sip = SeqLocId (loc);
11143 }
11144 }
11145 if (sip == NULL) return;
11146 if (sip->choice == SEQID_GI) {
11147 uid = (BIG_ID) sip->data.intvalue;
11148 } else {
11149 switch (sip->choice) {
11150 case SEQID_GENBANK :
11151 case SEQID_EMBL :
11152 case SEQID_DDBJ :
11153 case SEQID_OTHER :
11154 case SEQID_TPG:
11155 case SEQID_TPE:
11156 case SEQID_TPD:
11157
11158 /* if not gi number, first see if local accession */
11159
11160 bsp = BioseqFindCore (sip);
11161 if (bsp != NULL) return;
11162
11163 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
11164 if (tsip != NULL) {
11165 if (tsip->version > 0) {
11166 uid = GetGIForSeqId (sip);
11167 }
11168 }
11169 break;
11170 case SEQID_GENERAL:
11171 uid = 0;
11172 break;
11173 default :
11174 break;
11175 }
11176 if (uid < 1) {
11177 vnp = ValNodeAddPointer (NULL, 0, (Pointer) sip);
11178 if (vnp == NULL) return;
11179
11180 /* if not resolvable to gi number, link in head of sip list */
11181
11182 vnp->next = ilp->siplist;
11183 ilp->siplist = vnp;
11184
11185 return;
11186 }
11187 }
11188 if (uid < 1) return;
11189
11190 vnp = ValNodeAddBigInt (NULL, 0, uid);
11191 if (vnp == NULL) return;
11192
11193 /* link in head of uid list */
11194
11195 vnp->next = ilp->uidlist;
11196 ilp->uidlist = vnp;
11197 }
11198
CollectAllBioseqs(BioseqPtr bsp,Pointer userdata)11199 static void CollectAllBioseqs (BioseqPtr bsp, Pointer userdata)
11200
11201 {
11202 DeltaSeqPtr dsp;
11203 SeqLocPtr slp = NULL;
11204 ValNode vn;
11205
11206 if (bsp == NULL || userdata == NULL) return;
11207
11208 if (bsp->repr == Seq_repr_seg) {
11209 vn.choice = SEQLOC_MIX;
11210 vn.extended = 0;
11211 vn.data.ptrvalue = bsp->seq_ext;
11212 vn.next = NULL;
11213 while ((slp = SeqLocFindNext (&vn, slp)) != NULL) {
11214 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11215 CollectAllSegments (slp, userdata);
11216 }
11217 }
11218 } else if (bsp->repr == Seq_repr_delta) {
11219 for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
11220 if (dsp->choice == 1) {
11221 slp = (SeqLocPtr) dsp->data.ptrvalue;
11222 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11223 CollectAllSegments (slp, userdata);
11224 }
11225 }
11226 }
11227 } else if (bsp->repr == Seq_repr_ref) {
11228 slp = (SeqLocPtr) bsp->seq_ext;
11229 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11230 CollectAllSegments (slp, userdata);
11231 }
11232 }
11233 }
11234
CollectAllLocations(SeqFeatPtr sfp,Pointer userdata)11235 static void CollectAllLocations (SeqFeatPtr sfp, Pointer userdata)
11236
11237 {
11238 SeqLocPtr slp = NULL;
11239
11240 if (sfp == NULL || userdata == NULL || sfp->location == NULL) return;
11241
11242 while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
11243 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11244 CollectAllSegments (slp, userdata);
11245 }
11246 }
11247 }
11248
CollectAllProducts(SeqFeatPtr sfp,Pointer userdata)11249 static void CollectAllProducts (SeqFeatPtr sfp, Pointer userdata)
11250
11251 {
11252 SeqLocPtr slp = NULL;
11253
11254 if (sfp == NULL || userdata == NULL || sfp->product == NULL) return;
11255
11256 while ((slp = SeqLocFindNext (sfp->product, slp)) != NULL) {
11257 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11258 CollectAllSegments (slp, userdata);
11259 }
11260 }
11261 }
11262
CollectAllSublocs(SeqLocPtr loc,Pointer userdata)11263 static void CollectAllSublocs (SeqLocPtr loc, Pointer userdata)
11264
11265 {
11266 SeqLocPtr slp = NULL;
11267
11268 if (loc == NULL || userdata == NULL) return;
11269
11270 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
11271 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11272 CollectAllSegments (slp, userdata);
11273 }
11274 }
11275 }
11276
FetchFromUidList(ValNodePtr PNTR uidlistP,ValNodePtr PNTR bsplistP,Boolean usethreads,Boolean reindexIfBig)11277 static void FetchFromUidList (
11278 ValNodePtr PNTR uidlistP,
11279 ValNodePtr PNTR bsplistP,
11280 Boolean usethreads,
11281 Boolean reindexIfBig
11282 )
11283
11284 {
11285 BioseqPtr bsp;
11286 ValNodePtr sublist, uidlist, vnp;
11287
11288 if (uidlistP == NULL || bsplistP == NULL) return;
11289
11290 SortUniqueCleanseUidList (uidlistP, bsplistP);
11291 sublist = LookupAndExtractBspList (uidlistP, usethreads, reindexIfBig);
11292
11293 while (sublist != NULL) {
11294
11295 uidlist = NULL;
11296
11297 /* recursively queue delta or segmented component uids */
11298
11299 for (vnp = sublist; vnp != NULL; vnp = vnp->next) {
11300
11301 bsp = (BioseqPtr) vnp->data.ptrvalue;
11302 if (bsp == NULL) continue;
11303 if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) continue;
11304
11305 CollectAllBioseqs (bsp, (Pointer) &uidlist);
11306 }
11307
11308 ValNodeLink (bsplistP, sublist);
11309 sublist = NULL;
11310
11311 SortUniqueCleanseUidList (&uidlist, bsplistP);
11312 sublist = LookupAndExtractBspList (&uidlist, usethreads, reindexIfBig);
11313 }
11314 }
11315
FetchFromSipList(ValNodePtr PNTR siplistP,ValNodePtr PNTR bsplistP)11316 static void FetchFromSipList (
11317 ValNodePtr PNTR siplistP,
11318 ValNodePtr PNTR bsplistP
11319 )
11320
11321 {
11322 BioseqPtr bsp;
11323 SeqIdPtr sip;
11324 ValNodePtr vnp;
11325 ValNodePtr vnx;
11326
11327 if (siplistP == NULL || bsplistP == NULL) return;
11328
11329 for (vnp = *siplistP; vnp != NULL; vnp = vnp->next) {
11330 sip = (SeqIdPtr) vnp->data.ptrvalue;
11331 if (sip == NULL) continue;
11332 if (BioseqFindCore (sip) != NULL) continue;
11333 bsp = BioseqLockById (sip);
11334 if (bsp == NULL) continue;
11335 vnx = ValNodeAddPointer (NULL, 0, (Pointer) bsp);
11336 if (vnx == NULL) continue;
11337 vnx->next = *bsplistP;
11338 *bsplistP = vnx;
11339 }
11340 }
11341
LookForNonGiSegments(SeqLocPtr slp,SeqIdPtr sip,Pointer userdata)11342 static void LookForNonGiSegments (
11343 SeqLocPtr slp,
11344 SeqIdPtr sip,
11345 Pointer userdata
11346 )
11347
11348 {
11349 BoolPtr nonGi;
11350 SeqLocPtr loc;
11351
11352 if (slp == NULL && sip == NULL) return;
11353 if (userdata == NULL) return;
11354 nonGi = (BoolPtr) userdata;
11355
11356 if (sip == NULL) {
11357 sip = SeqLocId (slp);
11358 if (sip == NULL) {
11359 loc = SeqLocFindNext (slp, NULL);
11360 if (loc != NULL) {
11361 sip = SeqLocId (loc);
11362 }
11363 }
11364 }
11365 if (sip == NULL) return;
11366
11367 if (sip->choice != SEQID_GI) {
11368 *nonGi = TRUE;
11369 }
11370 }
11371
LookForNonGiBioseqs(BioseqPtr bsp,Pointer userdata)11372 static void LookForNonGiBioseqs (
11373 BioseqPtr bsp,
11374 Pointer userdata
11375 )
11376
11377 {
11378 DeltaSeqPtr dsp;
11379 SeqLocPtr slp = NULL;
11380 ValNode vn;
11381
11382 if (bsp == NULL) return;
11383
11384 if (bsp->repr == Seq_repr_seg) {
11385 vn.choice = SEQLOC_MIX;
11386 vn.extended = 0;
11387 vn.data.ptrvalue = bsp->seq_ext;
11388 vn.next = NULL;
11389 while ((slp = SeqLocFindNext (&vn, slp)) != NULL) {
11390 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11391 LookForNonGiSegments (slp, NULL, userdata);
11392 }
11393 }
11394 } else if (bsp->repr == Seq_repr_delta) {
11395 for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
11396 if (dsp->choice == 1) {
11397 slp = (SeqLocPtr) dsp->data.ptrvalue;
11398 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11399 LookForNonGiSegments (slp, NULL, userdata);
11400 }
11401 }
11402 }
11403 } else if (bsp->repr == Seq_repr_ref) {
11404 slp = (SeqLocPtr) bsp->seq_ext;
11405 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11406 LookForNonGiSegments (slp, NULL, userdata);
11407 }
11408 }
11409 }
11410
LookForNonGiLocations(SeqFeatPtr sfp,Pointer userdata)11411 static void LookForNonGiLocations (SeqFeatPtr sfp, Pointer userdata)
11412
11413 {
11414 SeqLocPtr slp = NULL;
11415
11416 if (sfp == NULL || userdata == NULL || sfp->location == NULL) return;
11417
11418 while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
11419 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11420 LookForNonGiSegments (slp, NULL, userdata);
11421 }
11422 }
11423 }
11424
LookForNonGiProducts(SeqFeatPtr sfp,Pointer userdata)11425 static void LookForNonGiProducts (SeqFeatPtr sfp, Pointer userdata)
11426
11427 {
11428 SeqLocPtr slp = NULL;
11429
11430 if (sfp == NULL || userdata == NULL || sfp->product == NULL) return;
11431
11432 while ((slp = SeqLocFindNext (sfp->product, slp)) != NULL) {
11433 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11434 LookForNonGiSegments (slp, NULL, userdata);
11435 }
11436 }
11437 }
11438
LookForNonGiSublocs(SeqLocPtr loc,Pointer userdata)11439 static void LookForNonGiSublocs (SeqLocPtr loc, Pointer userdata)
11440
11441 {
11442 SeqLocPtr slp = NULL;
11443
11444 if (loc == NULL || userdata == NULL) return;
11445
11446 while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
11447 if (slp != NULL && slp->choice != SEQLOC_NULL) {
11448 LookForNonGiSegments (slp, NULL, userdata);
11449 }
11450 }
11451 }
11452
AdvcLockFarComponents(SeqEntryPtr sep,Boolean components,Boolean locations,Boolean products,SeqLocPtr loc,Boolean usethreads)11453 NLM_EXTERN ValNodePtr AdvcLockFarComponents (
11454 SeqEntryPtr sep,
11455 Boolean components,
11456 Boolean locations,
11457 Boolean products,
11458 SeqLocPtr loc,
11459 Boolean usethreads
11460 )
11461
11462 {
11463 ValNodePtr bsplist = NULL;
11464 IdLists ils;
11465 Boolean nonGi;
11466 SeqEntryPtr oldsep;
11467
11468 if (sep == NULL) return NULL;
11469 oldsep = SeqEntrySetScope (sep);
11470
11471 /* if non-GI components/locations/products, lookup in bulk first */
11472
11473 if (components) {
11474 nonGi = FALSE;
11475 VisitBioseqsInSep (sep, (Pointer) &nonGi, LookForNonGiBioseqs);
11476 if (nonGi) {
11477 LookupFarSeqIDs (sep, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE);
11478 }
11479 }
11480
11481 if (locations) {
11482 nonGi = FALSE;
11483 VisitFeaturesInSep (sep, (Pointer) &nonGi, LookForNonGiLocations);
11484 if (nonGi) {
11485 LookupFarSeqIDs (sep, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
11486 }
11487 }
11488
11489 if (products) {
11490 nonGi = FALSE;
11491 VisitFeaturesInSep (sep, (Pointer) &nonGi, LookForNonGiProducts);
11492 if (nonGi) {
11493 LookupFarSeqIDs (sep, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE);
11494 }
11495 }
11496
11497 if (loc != NULL) {
11498 nonGi = FALSE;
11499 LookForNonGiSublocs (loc, (Pointer) &nonGi);
11500 if (nonGi) {
11501 LookupFarSeqIDs (sep, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE);
11502 }
11503 }
11504
11505 /* now collect list of GI numbers, lock into memory */
11506
11507 ils.siplist = NULL;
11508
11509 if (components) {
11510 ObjMgrSetHold ();
11511 SeqMgrHoldIndexing (TRUE);
11512 ils.uidlist = NULL;
11513 VisitBioseqsInSep (sep, (Pointer) &ils, CollectAllBioseqs);
11514 FetchFromUidList (&ils.uidlist, &bsplist, usethreads, FALSE);
11515 SeqMgrHoldIndexing (FALSE);
11516 ObjMgrClearHold ();
11517 }
11518
11519 if (locations) {
11520 ObjMgrSetHold ();
11521 SeqMgrHoldIndexing (TRUE);
11522 ils.uidlist = NULL;
11523 VisitFeaturesInSep (sep, (Pointer) &ils, CollectAllLocations);
11524 FetchFromUidList (&ils.uidlist, &bsplist, usethreads, TRUE);
11525 SeqMgrHoldIndexing (FALSE);
11526 ObjMgrClearHold ();
11527 }
11528
11529 if (products) {
11530 ObjMgrSetHold ();
11531 SeqMgrHoldIndexing (TRUE);
11532 ils.uidlist = NULL;
11533 VisitFeaturesInSep (sep, (Pointer) &ils, CollectAllProducts);
11534 FetchFromUidList (&ils.uidlist, &bsplist, usethreads, TRUE);
11535 SeqMgrHoldIndexing (FALSE);
11536 ObjMgrClearHold ();
11537 }
11538
11539 if (loc != NULL) {
11540 ObjMgrSetHold ();
11541 SeqMgrHoldIndexing (TRUE);
11542 ils.uidlist = NULL;
11543 CollectAllSublocs (loc, (Pointer) &ils);
11544 FetchFromUidList (&ils.uidlist, &bsplist, usethreads, TRUE);
11545 SeqMgrHoldIndexing (FALSE);
11546 ObjMgrClearHold ();
11547 }
11548
11549 /* process list of non-GI sips, lock into memory */
11550
11551 if (ils.siplist != NULL) {
11552 FetchFromSipList (&ils.siplist, &bsplist);
11553
11554 ValNodeFree (ils.siplist);
11555 }
11556
11557 SeqEntrySetScope (oldsep);
11558 return bsplist;
11559 }
11560
11561 /***************************/
11562
LockFarComponentsEx(SeqEntryPtr sep,Boolean components,Boolean locations,Boolean products,SeqLocPtr loc)11563 NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, SeqLocPtr loc)
11564
11565 {
11566 #ifdef OS_UNIX
11567 CharPtr str;
11568 #endif
11569
11570 if (sep == NULL) return NULL;
11571
11572 #ifdef OS_UNIX
11573 str = getenv ("ADV_LOCK_FAR_COMPONENTS");
11574 if (str != NULL) {
11575 if (StringICmp (str, "Multi") == 0) {
11576 return AdvcLockFarComponents (sep, components, locations, products, loc, TRUE);
11577 }
11578 }
11579 #endif
11580
11581 return AdvcLockFarComponents (sep, components, locations, products, loc, FALSE);
11582 }
11583
LockFarComponents(SeqEntryPtr sep)11584 NLM_EXTERN ValNodePtr LockFarComponents (SeqEntryPtr sep)
11585
11586 {
11587 return LockFarComponentsEx (sep, TRUE, FALSE, FALSE, NULL);
11588 }
11589
UnlockFarComponents(ValNodePtr bsplist)11590 NLM_EXTERN ValNodePtr UnlockFarComponents (ValNodePtr bsplist)
11591
11592 {
11593 BioseqPtr bsp;
11594 ValNodePtr vnp;
11595
11596 if (bsplist == NULL) return NULL;
11597
11598 ObjMgrSetHold ();
11599
11600 for (vnp = bsplist; vnp != NULL; vnp = vnp->next) {
11601 bsp = (BioseqPtr) vnp->data.ptrvalue;
11602 if (bsp != NULL) {
11603 BioseqUnlock (bsp);
11604 }
11605 }
11606
11607 ObjMgrClearHold ();
11608
11609 return ValNodeFree (bsplist);
11610 }
11611
LockFarAlignmentBioseqs(SeqAlignPtr salp)11612 NLM_EXTERN ValNodePtr LockFarAlignmentBioseqs (SeqAlignPtr salp)
11613 {
11614 ValNodePtr bsplist = NULL;
11615 SeqAlignPtr tmp_salp;
11616 Int4 alnRows, seq_num, index_num;
11617 SeqIdPtr tmp_sip;
11618 BioseqPtr bsp;
11619 ObjMgrDataPtr omdp;
11620 ObjMgrPtr omp;
11621
11622 omp = ObjMgrWriteLock();
11623 if (omp == NULL) return NULL;
11624
11625 for (tmp_salp = salp; tmp_salp != NULL; tmp_salp = tmp_salp->next) {
11626 alnRows = AlnMgr2GetNumRows(tmp_salp); /* size of the alignment */
11627 for (seq_num = 1; seq_num < alnRows + 1; seq_num++) {
11628 tmp_sip = AlnMgr2GetNthSeqIdPtr(tmp_salp, seq_num);
11629 bsp = BioseqLockById(tmp_sip);
11630 if (bsp == NULL) continue;
11631 index_num = ObjMgrLookup(omp, (Pointer)bsp);
11632 if (index_num < 0) {
11633 ValNodeAddPointer (&bsplist, 0, bsp);
11634 } else {
11635 omdp = ObjMgrFindTop (omp, omp->datalist[index_num]);
11636 if (omdp != NULL && omdp->tempload == TL_NOT_TEMP) {
11637 BioseqUnlock (bsp);
11638 } else {
11639 ValNodeAddPointer (&bsplist, 0, bsp);
11640 }
11641 }
11642 }
11643 }
11644 ObjMgrUnlock();
11645 return bsplist;
11646 }
11647
11648 /*****************************************************************************
11649 *
11650 * SeqMgrSetPreCache
11651 * registers the GiToSeqID precache function
11652 * LookupFarSeqIDs
11653 * calls any registered function to preload the cache
11654 *
11655 *****************************************************************************/
11656
SeqMgrSetPreCache(SIDPreCacheFunc func)11657 NLM_EXTERN void LIBCALL SeqMgrSetPreCache (SIDPreCacheFunc func)
11658
11659 {
11660 SeqMgrPtr smp;
11661
11662 smp = SeqMgrWriteLock ();
11663 if (smp == NULL) return;
11664 smp->seq_id_precache_func = func;
11665 SeqMgrUnlock ();
11666 }
11667
LookupFarSeqIDs(SeqEntryPtr sep,Boolean components,Boolean locations,Boolean products,Boolean alignments,Boolean history,Boolean inference,Boolean others)11668 NLM_EXTERN Int4 LookupFarSeqIDs (
11669 SeqEntryPtr sep,
11670 Boolean components,
11671 Boolean locations,
11672 Boolean products,
11673 Boolean alignments,
11674 Boolean history,
11675 Boolean inference,
11676 Boolean others
11677 )
11678
11679 {
11680 SIDPreCacheFunc func;
11681 SeqMgrPtr smp;
11682
11683 smp = SeqMgrReadLock ();
11684 if (smp == NULL) return 0;
11685 func = smp->seq_id_precache_func;
11686 SeqMgrUnlock ();
11687 if (func == NULL) return 0;
11688 return (*func) (sep, components, locations, products, alignments, history, inference, others);
11689 }
11690
11691 /*****************************************************************************
11692 *
11693 * SeqMgrSetSeqIdSetFunc
11694 * registers the GiToSeqIdSet lookup function
11695 * GetSeqIdSetForGI
11696 * calls any registered function to lookup the set of SeqIds
11697 *
11698 *****************************************************************************/
11699
SeqMgrSetSeqIdSetFunc(SeqIdSetLookupFunc func)11700 NLM_EXTERN void LIBCALL SeqMgrSetSeqIdSetFunc (SeqIdSetLookupFunc func)
11701
11702 {
11703 SeqMgrPtr smp;
11704
11705 smp = SeqMgrWriteLock ();
11706 if (smp == NULL) return;
11707 smp->seq_id_set_lookup_func = func;
11708 SeqMgrUnlock ();
11709 }
11710
GetSeqIdSetForGI(BIG_ID gi)11711 NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI (BIG_ID gi)
11712
11713 {
11714 SeqIdSetLookupFunc func;
11715 SeqMgrPtr smp;
11716
11717 smp = SeqMgrReadLock ();
11718 if (smp == NULL) return 0;
11719 func = smp->seq_id_set_lookup_func;
11720 SeqMgrUnlock ();
11721 if (func == NULL) return 0;
11722 return (*func) (gi);
11723 }
11724
11725 /*****************************************************************************
11726 *
11727 * SeqMgrSetLenFunc
11728 * registers the GiToSeqLen lookup function
11729 * SeqMgrSetAccnVerFunc
11730 * registers the GiToAccnVer lookup function
11731 *
11732 *****************************************************************************/
11733
SeqMgrSetLenFunc(SeqLenLookupFunc func)11734 NLM_EXTERN void LIBCALL SeqMgrSetLenFunc (SeqLenLookupFunc func)
11735
11736 {
11737 SeqMgrPtr smp;
11738
11739 smp = SeqMgrWriteLock ();
11740 if (smp == NULL) return;
11741 smp->seq_len_lookup_func = func;
11742 SeqMgrUnlock ();
11743 }
11744
SeqMgrSetAccnVerFunc(AccnVerLookupFunc func)11745 NLM_EXTERN void LIBCALL SeqMgrSetAccnVerFunc (AccnVerLookupFunc func)
11746
11747 {
11748 SeqMgrPtr smp;
11749
11750 smp = SeqMgrWriteLock ();
11751 if (smp == NULL) return;
11752 smp->accn_ver_lookup_func = func;
11753 SeqMgrUnlock ();
11754 }
11755
11756 /*******************************************************************
11757 *
11758 * SeqEntryAsnOut()
11759 *
11760 * dumps parts of SeqEntry from a memory object
11761 *
11762 *******************************************************************/
11763
11764 typedef struct ext_pack_data {
11765 SeqEntryPtr sep [5];
11766 Uint4 minSapItemID;
11767 Uint4 maxSapItemID;
11768 ValNodePtr descChain;
11769 ValNodePtr featChain;
11770 ValNodePtr lastVnp;
11771 } ExtPackData, PNTR ExtPackPtr;
11772
GetSapBounds(SeqAnnotPtr sap,Pointer userdata)11773 static void GetSapBounds (SeqAnnotPtr sap, Pointer userdata)
11774
11775 {
11776 ExtPackPtr epp;
11777
11778 epp = (ExtPackPtr) userdata;
11779 epp->minSapItemID = MIN (epp->minSapItemID, sap->idx.itemID);
11780 epp->maxSapItemID = MAX (epp->maxSapItemID, sap->idx.itemID);
11781 }
11782
SeqEntryAsnOut(SeqEntryPtr sep,SeqIdPtr sip,Int2 retcode,AsnIoPtr aipout)11783 NLM_EXTERN Boolean SeqEntryAsnOut (SeqEntryPtr sep, SeqIdPtr sip,
11784 Int2 retcode, AsnIoPtr aipout)
11785
11786 {
11787 BioseqPtr bsp;
11788 BioseqSetPtr bssp;
11789 SeqMgrFeatContext context;
11790 Uint2 entityID;
11791 ExtPackData epd;
11792 SeqEntryPtr oldscope;
11793 BioseqSetPtr parent;
11794 SeqAnnotPtr sap;
11795 SeqDescrPtr sdp;
11796 SeqFeatPtr sfp;
11797 SeqEntryPtr top;
11798 ValNodePtr vnp;
11799 AsnOptionPtr aopp_feat = NULL, aopp_desc = NULL;
11800 DataVal dv;
11801
11802 if (sep == NULL || sip == NULL || aipout == NULL) return FALSE;
11803
11804 if (retcode > 4) {
11805 retcode = 0;
11806 }
11807 if (retcode < 0) {
11808 retcode = 0;
11809 }
11810
11811 entityID = ObjMgrGetEntityIDForChoice (sep);
11812 if (entityID < 1) return FALSE;
11813 top = GetTopSeqEntryForEntityID (entityID);
11814 if (top == NULL) return FALSE;
11815
11816 /* indexing sets idx fields, will find features outside of desired
11817 SeqEntry */
11818
11819 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
11820 SeqMgrIndexFeatures (entityID, NULL);
11821 }
11822
11823 /* find Bioseq within entity given SeqId */
11824
11825 oldscope = SeqEntrySetScope (top);
11826 bsp = BioseqFind (sip);
11827 SeqEntrySetScope (oldscope);
11828 if (bsp == NULL) return FALSE;
11829
11830 MemSet ((Pointer) &epd, 0, sizeof (ExtPackData));
11831
11832 /* get parent hierarchy */
11833
11834 epd.sep [0] = top;
11835 epd.sep [1] = bsp->seqentry;
11836
11837 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
11838 parent = (BioseqSetPtr) bsp->idx.parentptr;
11839 while (parent != NULL) {
11840 switch (parent->_class) {
11841 case BioseqseqSet_class_nuc_prot :
11842 epd.sep [3] = parent->seqentry;
11843 break;
11844 case BioseqseqSet_class_segset :
11845 epd.sep [2] = parent->seqentry;
11846 break;
11847 case BioseqseqSet_class_pub_set :
11848 epd.sep [4] = parent->seqentry;
11849 break;
11850 default :
11851 break;
11852 }
11853 if (parent->idx.parenttype == OBJ_BIOSEQSET) {
11854 parent = (BioseqSetPtr) parent->idx.parentptr;
11855 } else {
11856 parent = NULL;
11857 }
11858 }
11859 }
11860
11861 /* get desired SeqEntry given retcode parameter */
11862
11863 sep = NULL;
11864 while (retcode >= 0 && sep == NULL) {
11865 sep = epd.sep [retcode];
11866 retcode --;
11867 }
11868 if (sep == NULL) return FALSE;
11869
11870 /* get immediate parent of SeqEntry to be returned */
11871
11872 parent = NULL;
11873 if (IS_Bioseq (sep)) {
11874 bsp = (BioseqPtr) sep->data.ptrvalue;
11875 if (bsp == NULL) return FALSE;
11876 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
11877 parent = (BioseqSetPtr) bsp->idx.parentptr;
11878 }
11879 } else if (IS_Bioseq_set (sep)) {
11880 bssp = (BioseqSetPtr) sep->data.ptrvalue;
11881 if (bssp == NULL) return FALSE;
11882 if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
11883 parent = (BioseqSetPtr) bssp->idx.parentptr;
11884 }
11885 }
11886
11887 /* find itemID range of SeqAnnots within current SeqEntry */
11888
11889 epd.minSapItemID = UINT4_MAX;
11890 epd.maxSapItemID = 0;
11891 VisitAnnotsInSep (sep, (Pointer) &epd, GetSapBounds);
11892
11893 /* go up parent hierarchy, pointing to applicable descriptors */
11894
11895 epd.lastVnp = NULL;
11896 while (parent != NULL) {
11897 for (sdp = parent->descr; sdp != NULL; sdp = sdp->next) {
11898 vnp = ValNodeAddPointer (&(epd.lastVnp), 0, (Pointer) sdp);
11899 if (epd.descChain == NULL) {
11900 epd.descChain = epd.lastVnp;
11901 }
11902 epd.lastVnp = vnp;
11903 }
11904 if (parent->idx.parenttype == OBJ_BIOSEQSET) {
11905 parent = (BioseqSetPtr) parent->idx.parentptr;
11906 } else {
11907 parent = NULL;
11908 }
11909 }
11910
11911 /* find features indexed on Bioseq that are packaged outside
11912 current SeqEntry */
11913
11914 epd.lastVnp = NULL;
11915 sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
11916 while (sfp != NULL) {
11917 sap = context.sap;
11918 if (sap != NULL) {
11919 if (sap->idx.itemID < epd.minSapItemID || sap->idx.itemID >
11920 epd.maxSapItemID) {
11921 vnp = ValNodeAddPointer (&(epd.lastVnp), 0, (Pointer) sfp);
11922 if (epd.featChain == NULL) {
11923 epd.featChain = epd.lastVnp;
11924 }
11925 epd.lastVnp = vnp;
11926 }
11927 }
11928 sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context);
11929 }
11930
11931 /* also need to get features whose products point to the Bioseq */
11932
11933 sfp = NULL;
11934 if (ISA_na (bsp->mol)) {
11935 sfp = SeqMgrGetRNAgivenProduct (bsp, &context);
11936 } else if (ISA_aa (bsp->mol)) {
11937 sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
11938 }
11939 if (sfp != NULL) {
11940 sap = context.sap;
11941 if (sap != NULL) {
11942 if (sap->idx.itemID < epd.minSapItemID || sap->idx.itemID >
11943 epd.maxSapItemID) {
11944 vnp = ValNodeAddPointer (&(epd.lastVnp), 0, (Pointer) sfp);
11945 if (epd.featChain == NULL) {
11946 epd.featChain = epd.lastVnp;
11947 }
11948 epd.lastVnp = vnp;
11949 }
11950 }
11951 }
11952
11953 /* now write sep, adding descriptors from descChain and features
11954 from featChain */
11955
11956 MemSet(&dv, 0, sizeof(DataVal)); /* zero it out */
11957 if (epd.descChain) /* have extra descriptors */
11958 {
11959 dv.ptrvalue = (Pointer)(epd.descChain);
11960 aopp_desc = AsnIoOptionNew(aipout, OP_NCBIOBJSEQ, CHECK_EXTRA_DESC, dv, NULL);
11961 }
11962
11963 if (epd.featChain) /* have extra features */
11964 {
11965 dv.ptrvalue = (Pointer)(epd.featChain);
11966 aopp_feat = AsnIoOptionNew(aipout, OP_NCBIOBJSEQ, CHECK_EXTRA_FEAT, dv, NULL);
11967 }
11968
11969 SeqEntryAsnWrite(sep, aipout, NULL);
11970
11971 /* clean up valnode chains */
11972
11973 ValNodeFree (epd.descChain);
11974 ValNodeFree (epd.featChain);
11975
11976 return TRUE;
11977 }
11978
11979 /*
11980 static void SeqMgrReport (void)
11981
11982 {
11983 BioseqPtr bsp;
11984 BioseqPtr PNTR bspp;
11985 Int4 i, num;
11986 ObjMgrDataPtr omdp;
11987 ObjMgrPtr omp;
11988 SeqIdIndexElementPtr PNTR sipp;
11989 SeqMgrPtr smp;
11990 Char str [128];
11991
11992 omp = ObjMgrGet ();
11993 if (omp != NULL) {
11994 printf ("Currobj %d, totobj %d\n", (int) omp->currobj, (int) omp->totobj);
11995 fflush (stdout);
11996 }
11997 smp = SeqMgrGet ();
11998 if (smp != NULL) {
11999 num = smp->BioseqIndexCnt;
12000 sipp = smp->BioseqIndex;
12001 printf ("BioseqIndexCnt %ld\n", (long) num);
12002 fflush (stdout);
12003 if (sipp == NULL) {
12004 printf ("sipp is NULL\n");
12005 fflush (stdout);
12006 } else {
12007 for (i = 0; i < num; i++) {
12008 omdp = sipp [i]->omdp;
12009 if (omdp != NULL && omdp->bulkIndexFree) {
12010 printf ("omdp %ld bulkIndexFree flag set\n", (long) i);
12011 fflush (stdout);
12012 }
12013 StringNCpy_0 (str, sipp [i]->str, sizeof (str));
12014 RevStringUpper (str);
12015 printf (" %3ld - %s\n", (long) i, str);
12016 fflush (stdout);
12017 }
12018 printf ("-\n");
12019 fflush (stdout);
12020 for (i = smp->BioseqIndexCnt; i < smp->BioseqIndexNum; i++) {
12021 StringNCpy_0 (str, sipp [i]->str, sizeof (str));
12022 RevStringUpper (str);
12023 if (! StringHasNoText (str)) {
12024 printf (" %3ld - %s\n", (long) i, str);
12025 fflush (stdout);
12026 }
12027 }
12028 printf ("-\n");
12029 fflush (stdout);
12030 }
12031 num = smp->NonIndexedBioseqCnt;
12032 bspp = smp->NonIndexedBioseq;
12033 printf ("NonIndexedBioseqCnt %ld\n", (long) num);
12034 fflush (stdout);
12035 if (bspp == NULL) {
12036 printf ("bspp is NULL\n");
12037 fflush (stdout);
12038 return;
12039 }
12040 for (i = 0; i < num; i++) {
12041 bsp = bspp [i];
12042 if (bsp != NULL) {
12043 SeqIdWrite (bsp->id, str, PRINTID_FASTA_LONG, sizeof (str) - 1);
12044 printf (" %3ld - %s\n", (long) i, str);
12045 fflush (stdout);
12046 } else {
12047 printf (" %3ld - (null)\n", (long) i);
12048 fflush (stdout);
12049 }
12050 }
12051 }
12052 printf ("\n");
12053 fflush (stdout);
12054 }
12055 */
12056
12057 typedef int (*FeatureFindCompare) PROTO ((SMFeatItemPtr, CharPtr));
12058
FeatureFindCompareLabel(SMFeatItemPtr feat,CharPtr label)12059 static int FeatureFindCompareLabel (SMFeatItemPtr feat, CharPtr label)
12060 {
12061 if (feat == NULL) return -1;
12062 return StringICmp (feat->label, label);
12063 }
12064
FeatureFindCompareLocusTag(SMFeatItemPtr feat,CharPtr label)12065 static int FeatureFindCompareLocusTag (SMFeatItemPtr feat, CharPtr label)
12066 {
12067 GeneRefPtr grp;
12068
12069 if (feat == NULL || feat->sfp == NULL || feat->subtype != FEATDEF_GENE) {
12070 return -1;
12071 }
12072 grp = (GeneRefPtr) feat->sfp->data.value.ptrvalue;
12073 return StringICmp (grp->locus_tag, label);
12074 }
12075
FindArrayPosForFirst(SMFeatItemPtr PNTR array,FeatureFindCompare compare_func,Int4 num,CharPtr label,Uint1 seqFeatChoice,Uint1 featDefChoice)12076 static Int4 FindArrayPosForFirst
12077 (SMFeatItemPtr PNTR array,
12078 FeatureFindCompare compare_func,
12079 Int4 num,
12080 CharPtr label,
12081 Uint1 seqFeatChoice,
12082 Uint1 featDefChoice)
12083 {
12084 Int4 L, R;
12085 Int4 mid;
12086 SMFeatItemPtr feat;
12087
12088 if (array == NULL || compare_func == NULL) return -1;
12089 /* use binary search to find first one */
12090 L = 0;
12091 R = num - 1;
12092 while (L < R) {
12093 mid = (L + R) / 2;
12094 feat = array [mid];
12095 if (feat != NULL && compare_func (feat, label) < 0) {
12096 L = mid + 1;
12097 } else {
12098 R = mid;
12099 }
12100 }
12101 if (R > num) {
12102 return -1;
12103 }
12104 return R;
12105 }
12106
FindNthFeatureUseMultipleArrays(SMFeatItemPtr PNTR PNTR arrays,Int4Ptr array_sizes,FeatureFindCompare PNTR compare_funcs,Int4 num_arrays,CharPtr label,Uint2 entityID,BioseqPtr bsp,Uint1 seqFeatChoice,Uint1 featDefChoice,Int4 n,Int4 PNTR last_found,SeqMgrFeatContext PNTR context)12107 static SeqFeatPtr FindNthFeatureUseMultipleArrays
12108 (SMFeatItemPtr PNTR PNTR arrays,
12109 Int4Ptr array_sizes,
12110 FeatureFindCompare PNTR compare_funcs,
12111 Int4 num_arrays,
12112 CharPtr label,
12113 Uint2 entityID,
12114 BioseqPtr bsp,
12115 Uint1 seqFeatChoice,
12116 Uint1 featDefChoice,
12117 Int4 n,
12118 Int4 PNTR last_found,
12119 SeqMgrFeatContext PNTR context)
12120 {
12121 Int4Ptr firsts;
12122 Boolean found, already_found;
12123 SMFeatItemPtr feat;
12124 Int4 index = 0, k, leftmost, i2;
12125 SMFeatItemPtr PNTR found_list;
12126 SeqFeatPtr sfp = NULL;
12127 ObjMgrDataPtr omdp;
12128
12129 if (arrays == NULL || array_sizes == NULL || compare_funcs == NULL || num_arrays < 1) return NULL;
12130
12131 omdp = SeqMgrGetOmdpForBioseq (bsp);
12132 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
12133
12134 found_list = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * n);
12135
12136 /* set up pointers to first match in each array */
12137 firsts = (Int4Ptr) MemNew (num_arrays * sizeof (Int4));
12138 for (k = 0; k < num_arrays; k++) {
12139 firsts[k] = FindArrayPosForFirst (arrays[k],
12140 compare_funcs[k],
12141 array_sizes[k], label,
12142 seqFeatChoice, featDefChoice);
12143 found = FALSE;
12144 while (!found
12145 && firsts[k] >= 0 && firsts[k] < array_sizes[k]
12146 && compare_funcs[k] (arrays[k][firsts[k]], label) == 0) {
12147 feat = arrays[k][firsts[k]];
12148 if (feat->sfp != NULL
12149 && (seqFeatChoice == 0 || feat->sfp->data.choice == seqFeatChoice)
12150 && (featDefChoice == 0 || feat->subtype == featDefChoice)
12151 && (! feat->ignore)) {
12152 found = TRUE;
12153 } else {
12154 firsts[k]++;
12155 }
12156 }
12157 if (!found) {
12158 firsts[k] = -1;
12159 }
12160 }
12161 leftmost = 0;
12162 while (index < n && leftmost != -1) {
12163 /* find leftmost match first and increment */
12164 leftmost = -1;
12165 for (k = 0; k < num_arrays; k++) {
12166 if (firsts[k] > -1) {
12167 if (leftmost == -1 || SortFeatItemListByPos (arrays[k] + firsts[k], arrays[leftmost] + firsts[leftmost]) < 0) {
12168 leftmost = k;
12169 }
12170 }
12171 }
12172 if (leftmost > -1) {
12173 already_found = FALSE;
12174 for (i2 = 0; i2 < index && !already_found; i2++) {
12175 if (found_list[i2]->sfp == arrays[leftmost][firsts[leftmost]]->sfp) {
12176 already_found = TRUE;
12177 }
12178 }
12179 if (!already_found) {
12180 feat = arrays[leftmost][firsts[leftmost]];
12181 found_list[index] = feat;
12182 sfp = feat->sfp;
12183 if (context != NULL) {
12184 context->entityID = entityID;
12185 context->itemID = feat->itemID;
12186 context->sfp = feat->sfp;
12187 context->sap = feat->sap;
12188 context->bsp = feat->bsp;
12189 context->label = feat->label;
12190 context->left = feat->left;
12191 context->right = feat->right;
12192 context->dnaStop = feat->dnaStop;
12193 context->partialL = feat->partialL;
12194 context->partialR = feat->partialR;
12195 context->farloc = feat->farloc;
12196 context->strand = feat->strand;
12197 context->seqfeattype = sfp->data.choice;
12198 context->featdeftype = feat->subtype;
12199 context->numivals = feat->numivals;
12200 context->ivals = feat->ivals;
12201 context->userdata = NULL;
12202 context->omdp = (Pointer) omdp;
12203 context->index = firsts[leftmost] + 1;
12204 }
12205 index++;
12206 if (last_found != NULL) {
12207 *last_found = index;
12208 }
12209 }
12210 /* increment to next in leftmost array */
12211 firsts[leftmost]++;
12212 found = FALSE;
12213 while (!found
12214 && firsts[leftmost] >= 0 && firsts[leftmost] < array_sizes[leftmost]
12215 && compare_funcs[leftmost] (arrays[leftmost][firsts[leftmost]], label) == 0) {
12216 feat = arrays[leftmost][firsts[leftmost]];
12217 if (feat->sfp != NULL
12218 && (seqFeatChoice == 0 || feat->sfp->data.choice == seqFeatChoice)
12219 && (featDefChoice == 0 || feat->subtype == featDefChoice)
12220 && (! feat->ignore)) {
12221 found = TRUE;
12222 } else {
12223 firsts[leftmost]++;
12224 }
12225 }
12226 if (!found) {
12227 firsts[leftmost] = -1;
12228 }
12229 }
12230 }
12231 found_list = MemFree (found_list);
12232 if (index == n) {
12233 return sfp;
12234 } else {
12235 return NULL;
12236 }
12237 }
12238
FindNthGeneOnBspByLabelOrLocusTag(BioseqPtr bsp,CharPtr label,Int4 n,Int4 PNTR last_found,SeqMgrFeatContext PNTR context)12239 NLM_EXTERN SeqFeatPtr FindNthGeneOnBspByLabelOrLocusTag
12240 (BioseqPtr bsp,
12241 CharPtr label,
12242 Int4 n,
12243 Int4 PNTR last_found,
12244 SeqMgrFeatContext PNTR context)
12245 {
12246 ObjMgrDataPtr omdp;
12247 BioseqExtraPtr bspextra;
12248 Uint2 entityID;
12249 SMFeatItemPtr PNTR arrays[2];
12250 Int4 array_sizes[2];
12251 FeatureFindCompare compare_funcs[2];
12252 SeqFeatPtr sfp = NULL;
12253 Int4 num;
12254
12255 if (bsp == NULL || StringHasNoText (label)) return NULL;
12256
12257 omdp = SeqMgrGetOmdpForBioseq (bsp);
12258 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
12259
12260 bspextra = (BioseqExtraPtr) omdp->extradata;
12261 if (bspextra == NULL) return NULL;
12262 num = bspextra->numfeats;
12263
12264 if (num < 1 || bspextra->featsByLabel == NULL || bspextra->genesByLocusTag == NULL) return NULL;
12265
12266
12267 if (n < 0 || n > bspextra->numfeats) return NULL;
12268
12269 entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
12270
12271 arrays[0] = bspextra->featsByLabel;
12272 array_sizes[0] = bspextra->numfeats;
12273 compare_funcs[0] = FeatureFindCompareLabel;
12274 arrays[1] = bspextra->genesByLocusTag;
12275 array_sizes[1] = bspextra->numgenes;
12276 compare_funcs[1] = FeatureFindCompareLocusTag;
12277
12278 sfp = FindNthFeatureUseMultipleArrays (arrays, array_sizes, compare_funcs, 2, label, entityID,
12279 bsp, SEQFEAT_GENE, FEATDEF_GENE, n + 1, last_found,
12280 context);
12281
12282 return sfp;
12283 }
12284
12285
SeqMgrClearBioseqExtraDataDescriptors(ObjMgrDataPtr omdp)12286 static Boolean SeqMgrClearBioseqExtraDataDescriptors (ObjMgrDataPtr omdp)
12287 {
12288 BioseqExtraPtr bspextra;
12289
12290 if (omdp == NULL) return FALSE;
12291 bspextra = (BioseqExtraPtr) omdp->extradata;
12292 if (bspextra == NULL) return FALSE;
12293
12294 /* free sorted arrays of pointers into data blocks */
12295
12296 bspextra->descrsByID = MemFree (bspextra->descrsByID);
12297 bspextra->descrsBySdp = MemFree (bspextra->descrsBySdp);
12298 bspextra->descrsByIndex = MemFree (bspextra->descrsByIndex);
12299
12300 /* free list of descriptor information */
12301
12302 bspextra->desclisthead = ValNodeFreeData (bspextra->desclisthead);
12303
12304 return TRUE;
12305 }
12306
12307
SeqMgrClearDescriptorIndexesProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)12308 static void SeqMgrClearDescriptorIndexesProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
12309
12310 {
12311 BioseqPtr bsp;
12312 BioseqSetPtr bssp;
12313 ObjMgrDataPtr omdp = NULL;
12314 BoolPtr rsult;
12315
12316 if (sep == NULL || (! IS_Bioseq (sep))) return;
12317 if (IS_Bioseq (sep)) {
12318 bsp = (BioseqPtr) sep->data.ptrvalue;
12319 if (bsp == NULL) return;
12320 omdp = SeqMgrGetOmdpForBioseq (bsp);
12321 } else if (IS_Bioseq_set (sep)) {
12322 bssp = (BioseqSetPtr) sep->data.ptrvalue;
12323 if (bssp == NULL) return;
12324 omdp = SeqMgrGetOmdpForPointer (bssp);
12325 } else return;
12326 if (omdp != NULL && SeqMgrClearBioseqExtraDataDescriptors (omdp)) {
12327 rsult = (BoolPtr) mydata;
12328 *rsult = TRUE;
12329 }
12330 }
12331
12332
12333 /* NOTE - this function does NOT do basic seqentry cleanup;
12334 * it assumes that cleanup has been done already, probably
12335 * on just the descriptor that was changed.
12336 */
SeqMgrRedoDescriptorIndexes(Uint2 entityID,Pointer ptr)12337 NLM_EXTERN void SeqMgrRedoDescriptorIndexes (Uint2 entityID, Pointer ptr)
12338
12339 {
12340 Boolean rsult = FALSE;
12341 SeqEntryPtr sep;
12342
12343 if (entityID == 0) {
12344 entityID = ObjMgrGetEntityIDForPointer (ptr);
12345 }
12346 if (entityID == 0) return;
12347 sep = SeqMgrGetTopSeqEntryForEntity (entityID);
12348 if (sep == NULL) return;
12349 SeqEntryExplore (sep, (Pointer) (&rsult), SeqMgrClearDescriptorIndexesProc);
12350
12351 /* finish indexing list of descriptors on each indexed bioseq */
12352
12353 VisitBioseqsInSep (sep, NULL, RecordDescriptorsInBioseqs);
12354
12355 if (IS_Bioseq_set (sep)) {
12356 RecordDescriptorsOnTopSet (sep);
12357 }
12358
12359 SeqEntryExplore (sep, NULL, IndexRecordedDescriptors);
12360 }
12361
12362
SeqMgrRedoFeatByLabel(ObjMgrDataPtr omdp)12363 static void SeqMgrRedoFeatByLabel (ObjMgrDataPtr omdp)
12364 {
12365 BioseqExtraPtr bspextra;
12366 SeqFeatPtr sfp;
12367 Int4 i;
12368 Char buf [129];
12369 CharPtr ptr;
12370
12371 if (omdp == NULL) return;
12372 bspextra = (BioseqExtraPtr) omdp->extradata;
12373 if (bspextra == NULL || bspextra->featsByLabel == NULL) return;
12374
12375 for (i = 0; i < bspextra->numfeats; i++) {
12376 sfp = bspextra->featsByLabel[i]->sfp;
12377
12378 FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
12379 ptr = buf;
12380 if (sfp->data.choice == SEQFEAT_RNA) {
12381 ptr = StringStr (buf, "RNA-");
12382 if (ptr != NULL) {
12383 ptr += 4;
12384 } else {
12385 ptr = buf;
12386 }
12387 }
12388 bspextra->featsByLabel[i]->label = MemFree (bspextra->featsByLabel[i]->label);
12389 bspextra->featsByLabel[i]->label = StringSaveNoNull (ptr);
12390 }
12391
12392 StableMergeSort ((VoidPtr) bspextra->featsByLabel, (size_t) bspextra->numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByLabel);
12393 }
12394
SeqMgrRedoFeatByLabelProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)12395 static void SeqMgrRedoFeatByLabelProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
12396
12397 {
12398 BioseqPtr bsp;
12399 BioseqSetPtr bssp;
12400 ObjMgrDataPtr omdp = NULL;
12401
12402 if (sep == NULL || (! IS_Bioseq (sep))) return;
12403 if (IS_Bioseq (sep)) {
12404 bsp = (BioseqPtr) sep->data.ptrvalue;
12405 if (bsp == NULL) return;
12406 omdp = SeqMgrGetOmdpForBioseq (bsp);
12407 } else if (IS_Bioseq_set (sep)) {
12408 bssp = (BioseqSetPtr) sep->data.ptrvalue;
12409 if (bssp == NULL) return;
12410 omdp = SeqMgrGetOmdpForPointer (bssp);
12411 } else return;
12412 SeqMgrRedoFeatByLabel (omdp);
12413 }
12414
12415
SeqMgrRedoFeatByLabelIndexes(Uint2 entityID,Pointer ptr)12416 NLM_EXTERN void SeqMgrRedoFeatByLabelIndexes (Uint2 entityID, Pointer ptr)
12417 {
12418 Int4 ret;
12419 SeqEntryPtr sep;
12420
12421 ret = NlmMutexLockEx (&smp_feat_index_mutex);
12422 if (ret) {
12423 ErrPostEx (SEV_FATAL, 0, 0, "SeqMgrIndexFeatures mutex failed [%ld]", (long) ret);
12424 return;
12425 }
12426 if (entityID == 0) {
12427 entityID = ObjMgrGetEntityIDForPointer (ptr);
12428 }
12429 if (entityID != 0) {
12430 sep = GetTopSeqEntryForEntityID (entityID);
12431 SeqEntryExplore (sep, NULL, SeqMgrRedoFeatByLabelProc);
12432 }
12433
12434 NlmMutexUnlock (smp_feat_index_mutex);
12435 }
12436
12437