1 /*  seqmgr.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  seqmgr.c
27 *
28 * Author:  James Ostell
29 *
30 * Version Creation Date: 9/94
31 *
32 * $Revision: 6.344 $
33 *
34 * File Description:  Manager for Bioseqs and BioseqSets
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date       Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 * ==========================================================================
42 */
43 
44 /** for ErrPostEx() ****/
45 
46 static char *this_module = "ncbiapi";
47 #define THIS_MODULE this_module
48 static char *this_file = __FILE__;
49 #define THIS_FILE this_file
50 
51 /**********************/
52 
53 #include <explore.h>       /* new public functions prototyped here */
54 #include <seqmgr.h>        /* the interface */
55 #include <sequtil.h>       /* CLEAN THIS UP LATER? */
56 #include <gather.h>
57 #include <subutil.h>
58 #include <ncbithr.h>
59 #include <objfdef.h>
60 #include <sqnutils.h>
61 #include <seqport.h>
62 #include <edutil.h>
63 #include <alignmgr2.h>
64 
65 /*****************************************************************************
66 *
67 *   Bioseq Management
68 *
69 *****************************************************************************/
70 
71 static BioseqPtr LIBCALLBACK BSFetchFunc PROTO((SeqIdPtr sid, Uint1 ld_type));
72 static BioseqPtr NEAR BioseqFindFunc PROTO((SeqIdPtr sid, Boolean reload_from_cache, Boolean force_it, Boolean use_bioseq_cache));
73 static Boolean NEAR SeqMgrGenericSelect PROTO((SeqLocPtr region, Int2 type,
74                                              Uint1Ptr rgb));
75 static BioseqPtr NEAR BioseqReloadFunc PROTO((SeqIdPtr sid, ObjMgrDataPtr omdp));
76 
77 static Boolean NEAR SeqMgrProcessNonIndexedBioseq PROTO((Boolean force_it));
78 static Boolean NEAR SeqMgrAddIndexElement PROTO((SeqMgrPtr smp, BioseqPtr bsp, CharPtr buf,
79                                                   Boolean sort_now));
80 static void NEAR RevStringUpper PROTO((CharPtr str));
81 static BSFetchTop NEAR SeqMgrGetFetchTop (void);
82 
83 
84 /*****************************************************************************
85 *
86 *   Return the current SeqMgr
87 *       SeqMgrGet is obsolete
88 *       SeqMgrReadLock, ReadUnlock, WriteLock, WriteUnlock are thread safe
89 *
90 *****************************************************************************/
91 static TNlmMutex smp_mutex = NULL;
92 static SeqMgrPtr global_smp = NULL;
93 static TNlmRWlock smp_RWlock = NULL;
94 static TNlmRWlock sgi_RWlock = NULL;
95 
96 /*****************************************************************************
97 *
98 *   Return the current SeqMgr
99 *       Initialize if not done already
100 *       This function will become obsolete
101 *
102 *****************************************************************************/
SeqMgrGet(void)103 NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrGet (void)
104 {
105     Int4 ret;
106     SeqMgrPtr smp;
107 
108     if (global_smp != NULL)
109         return global_smp;
110 
111     ret = NlmMutexLockEx(&smp_mutex);  /* protect this section */
112     if (ret)  /* error */
113     {
114         ErrPostEx(SEV_FATAL,0,0,"SeqMgrGet failed [%ld]", (long)ret);
115         return NULL;
116     }
117 
118     if (global_smp == NULL)  /* check again after mutex */
119     {
120                                  /*** have to initialize it **/
121         smp = (SeqMgrPtr) MemNew (sizeof(SeqMgr));
122         smp->bsfetch = BSFetchFunc;  /* BioseqFetch default */
123         smp->fetch_on_lock = TRUE;     /* fetch when locking */
124         smp_RWlock = NlmRWinit();  /* initialize RW lock */
125         sgi_RWlock = NlmRWinit();  /* initialize RW lock */
126         global_smp = smp;       /* do this last for mutex safety */
127     }
128 
129     NlmMutexUnlock(smp_mutex);
130 
131     return global_smp;
132 }
133 
134 /*****************************************************************************
135 *
136 *   SeqMgrReadLock()
137 *       Initialize if not done already
138 *       A thread can have only one read or write lock at a time
139 *       Many threads can have read locks
140 *       Only one thread can have a write lock
141 *       No other threads may have read locks if a write lock is granted
142 *       If another thread holds a write lock, this call blocks until write
143 *          is unlocked.
144 *
145 *****************************************************************************/
SeqMgrReadLock(void)146 NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrReadLock (void)
147 {
148     SeqMgrPtr smp;
149     Int4 ret;
150 
151     smp = SeqMgrGet();  /* ensure initialization */
152 
153     ret = NlmRWrdlock(smp_RWlock);
154     if (ret != 0)
155     {
156         ErrPostEx(SEV_ERROR,0,0,"SeqMgrReadLock: RWrdlock error [%ld]",
157             (long)ret);
158         return NULL;
159     }
160     return smp;
161 }
162 
163 /*****************************************************************************
164 *
165 *   SeqMgrWriteLock
166 *       Initialize if not done already
167 *       A thread can have only one read or write lock at a time
168 *       Many threads can have read locks
169 *       Only one thread can have a write lock
170 *       No other threads may have read locks if a write lock is granted
171 *       If another thread holds a read or write lock, this call blocks until write
172 *          is unlocked.
173 *
174 *****************************************************************************/
SeqMgrWriteLock(void)175 NLM_EXTERN SeqMgrPtr LIBCALL SeqMgrWriteLock (void)
176 {
177     SeqMgrPtr smp;
178     Int4 ret;
179 
180     smp = SeqMgrGet();  /* ensure initialization */
181 
182     ret = NlmRWwrlock(smp_RWlock);
183     if (ret != 0)
184     {
185         ErrPostEx(SEV_ERROR,0,0,"SeqMgrWriteLock: RWwrlock error [%ld]",
186             (long)ret);
187         return NULL;
188     }
189     smp->is_write_locked = TRUE;
190     return smp;
191 }
192 
193 
194 /*****************************************************************************
195 *
196 *  SeqMgrUnlock()
197 *
198 *****************************************************************************/
SeqMgrUnlock(void)199 NLM_EXTERN Boolean LIBCALL SeqMgrUnlock (void)
200 {
201     SeqMgrPtr smp;
202     Int4 ret;
203 
204     smp = SeqMgrGet();  /* ensure initialization */
205 
206     ret = NlmRWunlock(smp_RWlock);
207     if (ret != 0)
208     {
209         ErrPostEx(SEV_ERROR,0,0,"SeqMgrUnlock: RWunlock error [%ld]",
210             (long)ret);
211         return FALSE;
212     }
213     smp->is_write_locked = FALSE;  /* can't be write locked */
214     return TRUE;
215 }
216 
217 /****************************************************************************
218 *
219 *  RevStringUpper(str)
220 *    Up cases and reverses string
221 *      to get different parts early for SeqId StringCmps
222 *
223 *****************************************************************************/
RevStringUpper(CharPtr str)224 static void NEAR RevStringUpper (CharPtr str)
225 {
226     CharPtr nd;
227     Char tmp;
228 
229         if (str == NULL)
230             return;
231     nd = str;
232     while (*nd != '\0')
233         nd++;
234     nd--;
235 
236     while (nd > str)
237     {
238         tmp = TO_UPPER(*nd);
239         *nd = TO_UPPER(*str);
240         *str = tmp;
241         nd--; str++;
242     }
243 
244     if (nd == str)
245         *nd = TO_UPPER(*nd);
246     return;
247 }
248 
MakeReversedSeqIdString(SeqIdPtr sid,CharPtr buf,size_t len)249 NLM_EXTERN Boolean MakeReversedSeqIdString (SeqIdPtr sid, CharPtr buf, size_t len)
250 
251 {
252   Uint1         oldchoice;
253   CharPtr       tmp;
254   TextSeqIdPtr  tsip;
255 
256   if (sid == NULL || buf == NULL || len < 1) return FALSE;
257   oldchoice = 0;
258   switch (sid->choice) {
259     case SEQID_GI:
260       sprintf (buf, "%ld", (long)(sid->data.intvalue));
261       break;
262     case SEQID_EMBL:
263     case SEQID_DDBJ:
264       oldchoice = sid->choice;
265       sid->choice = SEQID_GENBANK;
266     case SEQID_GENBANK:
267     case SEQID_PIR:
268     case SEQID_OTHER:
269     case SEQID_SWISSPROT:
270     case SEQID_PRF:
271     case SEQID_TPG:
272     case SEQID_TPE:
273     case SEQID_TPD:
274     case SEQID_GPIPE:
275     case SEQID_NAMED_ANNOT_TRACK:
276       tsip = (TextSeqIdPtr) (sid->data.ptrvalue);
277       if (tsip->accession != NULL) {
278         tmp = tsip->name;
279         tsip->name = NULL;
280         SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len);
281         tsip->name = tmp;
282       } else {
283         SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len);
284       }
285       if (oldchoice)
286         sid->choice = oldchoice;
287       break;
288     default:
289       SeqIdWrite (sid, buf, PRINTID_FASTA_SHORT, len);
290       break;
291   }
292   RevStringUpper (buf);
293   return TRUE;
294 }
295 
296 /*****************************************************************************
297 *
298 *   SeqEntrySetScope(sep)
299 *       scopes global seqentry searches to sep
300 *       setting sep=NULL, opens scope to all seqentries in memory
301 *       returns the current scope
302 *
303 *****************************************************************************/
SeqEntrySetScope(SeqEntryPtr sep)304 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntrySetScope(SeqEntryPtr sep)
305 {
306     SeqEntryPtr curr = NULL;
307     SeqMgrPtr smp;
308     Int2 i, j;
309     SMScopePtr smsp;
310     TNlmThread thr;
311     Boolean found;
312 
313     smp = SeqMgrWriteLock();
314     if (smp == NULL) goto ret;
315     thr = NlmThreadSelf();
316     found = FALSE;
317     for (i = 0, smsp = smp->scope; i < smp->num_scope; i++, smsp++)
318     {
319         if (NlmThreadCompare(thr, smsp->thr))
320         {
321             curr = smsp->SEscope;
322             smsp->SEscope = sep;
323             if (sep == NULL)  /* removing one? */
324             {
325                 smp->num_scope--;
326                 j = smp->num_scope - i;  /* number to move */
327                 if (j)  /* not last one */
328                     MemCopy(smsp, (smsp+1), (size_t)(j * sizeof(SMScope)));
329             }
330             goto ret;    /* all done */
331         }
332     }
333 
334                   /* thread not on list */
335     if (sep == NULL)
336         goto ret;       /* nothing to do */
337 
338     i = smp->num_scope;
339     j = smp->total_scope;
340     if (j == i)  /* need more room */
341     {
342         j += 20;   /* new size */
343         smsp = smp->scope;
344         smp->scope = MemNew((size_t)(j * sizeof(SMScope)));
345         MemCopy(smp->scope, smsp, (size_t)(i * sizeof(SMScope)));
346         smp->total_scope = j;
347         MemFree(smsp);
348     }
349 
350     smp->scope[i].thr = thr;
351     smp->scope[i].SEscope = sep;
352     smp->num_scope++;
353 
354 ret: SeqMgrUnlock();
355     return curr;
356 }
357 
358 /*****************************************************************************
359 *
360 *   SeqEntryGetScope(sep)
361 *       returns the current scope or NULL if none set
362 *
363 *****************************************************************************/
SeqEntryGetScope(void)364 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryGetScope(void)
365 {
366     SeqMgrPtr smp;
367     SeqEntryPtr scope = NULL;
368     Int2 i;
369     SMScopePtr smsp;
370     TNlmThread thr;
371 
372     smp = SeqMgrReadLock();
373     if (smp == NULL) return FALSE;
374     thr = NlmThreadSelf();
375     for (i = 0, smsp = smp->scope; i < smp->num_scope; i++, smsp++)
376     {
377         if (NlmThreadCompare(thr, smsp->thr))
378         {
379             scope = smsp->SEscope;
380             break;
381         }
382     }
383     SeqMgrUnlock();
384     return scope;
385 }
386 
387 /*****************************************************************************
388 *
389 *   BioseqFind(SeqIdPtr)
390 *       Just checks in object loaded memory
391 *       Will also restore a Bioseq that has been cached out
392 *
393 *****************************************************************************/
BioseqFind(SeqIdPtr sid)394 NLM_EXTERN BioseqPtr LIBCALL BioseqFind (SeqIdPtr sid)
395 {
396     return BioseqFindFunc(sid, TRUE, TRUE, TRUE);
397 }
398 
399 /*****************************************************************************
400 *
401 *   BioseqFindCore(sid)
402 *       Finds a Bioseq in memory based on SeqId when only "core" elements needed
403 *       Will NOT restore a Bioseq that has been cached out by SeqMgr
404 *       This function is for use ONLY by functions that only need the parts
405 *         of the Bioseq left when cached out. This includes the SeqId chain,
406 *         and non-pointer components of the Bioseq.
407 *
408 *****************************************************************************/
BioseqFindCore(SeqIdPtr sip)409 NLM_EXTERN BioseqPtr LIBCALL BioseqFindCore (SeqIdPtr sip)
410 {
411     return BioseqFindFunc(sip, FALSE, TRUE, TRUE);
412 }
413 
414 /*****************************************************************************
415 *
416 *   BioseqFindSpecial(sid)
417 *       Finds a Bioseq in memory based on SeqId when only "core" elements needed
418 *       Will NOT restore a Bioseq that has been cached out by SeqMgr
419 *       This function does not use the bioseq_cache mechanism, and is for
420 *         the validator to check for IdOnMultipleBioseqs.
421 *
422 *****************************************************************************/
BioseqFindSpecial(SeqIdPtr sip)423 NLM_EXTERN BioseqPtr LIBCALL BioseqFindSpecial (SeqIdPtr sip)
424 {
425     return BioseqFindFunc(sip, FALSE, TRUE, FALSE);
426 }
427 
428 /*****************************************************************************
429 *
430 *   BioseqFindEntity(sid, itemIDptr)
431 *       Finds a Bioseq in memory based on SeqId
432 *       Will NOT restore a Bioseq that has been cached out by SeqMgr
433 *       returns EntityID if found, otherwise 0
434 *       itemIDptr is set to the value for itemID in ObjMgr functions
435 *       itemtype is OBJ_BIOSEQ of course
436 *
437 *****************************************************************************/
BioseqFindEntity(SeqIdPtr sip,Uint4Ptr itemIDptr)438 NLM_EXTERN Uint2 LIBCALL BioseqFindEntity (SeqIdPtr sip, Uint4Ptr itemIDptr)
439 {
440     BioseqPtr bsp;
441     Uint2 entityID = 0;
442 
443     *itemIDptr = 0;
444     bsp = BioseqFindCore(sip);
445     if (bsp == NULL) return entityID;  /* not found */
446     entityID = ObjMgrGetEntityIDForPointer((Pointer)bsp);
447     if (! entityID)
448         return entityID;
449 
450     *itemIDptr = GatherItemIDByData(entityID, OBJ_BIOSEQ, (Pointer)bsp);
451     return entityID;
452 }
453 
454 /********************************************************************************
455 *
456 *   BioseqReload (omdp, lockit)
457 *     reloads the cached SeqEntry at top of omdp
458 *     if (lockit) locks the record
459 *
460 *********************************************************************************/
461 
BioseqReload(ObjMgrDataPtr omdp,Boolean lockit)462 NLM_EXTERN ObjMgrDataPtr LIBCALL BioseqReload(ObjMgrDataPtr omdp, Boolean lockit)
463 {
464     BioseqPtr bsp = NULL;
465     ObjMgrDataPtr retval = NULL;
466     Int4 j;
467     ObjMgrPtr omp;
468 
469     if (omdp == NULL) return retval;
470     if (! ((omdp->datatype == OBJ_BIOSEQ) || (omdp->datatype == OBJ_BIOSEQSET)))
471         return retval;
472     if (omdp->parentptr != NULL)
473     {
474         omp = ObjMgrReadLock();
475         omdp = ObjMgrFindTop(omp, omdp);
476         ObjMgrUnlock();
477         if (omdp == NULL)
478             return retval;
479     }
480 
481     if (omdp->tempload == TL_CACHED)   /* only need to reload if cached */
482     {
483         bsp = BioseqReloadFunc (NULL, omdp);
484         if (bsp == NULL)
485             return retval;
486         omp = ObjMgrReadLock();
487         j = ObjMgrLookup(omp, (Pointer)bsp);
488         if (j < 0) {
489 
490                     Char tmpbuff[256];
491 
492                     SeqIdWrite(bsp->id, tmpbuff,
493                                PRINTID_FASTA_LONG, sizeof(tmpbuff));
494 
495                     ErrPostEx(SEV_WARNING, 0, __LINE__,
496                               "ObjMgrLookup() returned negative value "
497                               "id = %s, totobj = %d, currobj = %d, "
498                               "HighestEntityID = %d", tmpbuff, omp->totobj,
499                               omp->currobj, omp->HighestEntityID);
500 
501                     ObjMgrUnlock();
502                     return retval;
503                 }
504 
505         omdp = ObjMgrFindTop(omp, omp->datalist[j]);
506         ObjMgrUnlock();
507     }
508 
509     if (lockit)
510     {
511         ObjMgrLock(omdp->datatype, omdp->dataptr, TRUE);
512     }
513 
514     return omdp;
515 }
516 
SeqMgrGetFetchTop(void)517 static BSFetchTop NEAR SeqMgrGetFetchTop (void)
518 {
519     SeqMgrPtr smp;
520     BSFetchTop bsftp=NULL;
521 
522     smp = SeqMgrReadLock();
523     if (smp == NULL) return bsftp;
524     bsftp = smp->bsfetch;
525     SeqMgrUnlock();
526     return bsftp;
527 }
528 
BioseqReloadFunc(SeqIdPtr sid,ObjMgrDataPtr omdp)529 static BioseqPtr NEAR BioseqReloadFunc (SeqIdPtr sid, ObjMgrDataPtr omdp)
530 {
531     Int4 j;
532     ObjMgrDataPtr oldomdp;
533     OMUserDataPtr omudp, next;
534     ObjMgrProcPtr ompp;
535     OMProcControl ompc;
536     BioseqPtr bsp= NULL;
537     Int2 ret;
538     ObjMgrPtr omp;
539     BSFetchTop bsftp=NULL;
540 
541     ompp = NULL;
542     omp = ObjMgrReadLock();
543     for (omudp = omdp->userdata; omudp != NULL; omudp = omudp->next)
544     {
545         if (omudp->proctype == OMPROC_FETCH)  /* caching function */
546         {
547             ompp = ObjMgrProcFind(omp, omudp->procid, NULL, 0);
548             if (ompp != NULL)
549                 break;
550         }
551     }
552     ObjMgrUnlock();
553 
554     if (ompp != NULL && ompp->outputtype != OBJ_BIOSEQ)
555         return bsp;
556 
557     oldomdp = omdp;
558     omdp = NULL;
559     bsftp = SeqMgrGetFetchTop();
560     if (bsftp != NULL)
561     {
562         if (ompp != NULL)    /* fetch proc left a signal */
563         {                                 /* rerun fetch */
564             MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
565             ompc.input_data = sid;
566             ompc.input_entityID = oldomdp->EntityID;
567             ompc.proc = ompp;
568             ret = (* (ompp->func))((Pointer)&ompc);
569             switch (ret)
570             {
571                 case OM_MSG_RET_ERROR:
572                     ErrShow();
573                     break;
574                 case OM_MSG_RET_DEL:
575                     break;
576                 case OM_MSG_RET_OK:
577                     break;
578                 case OM_MSG_RET_DONE:
579                     omp = ObjMgrWriteLock();
580                     ObjMgrSetTempLoad (omp, ompc.output_data);
581                     ObjMgrUnlock();
582                     bsp = (BioseqPtr)(ompc.output_data);
583                     break;
584                 default:
585                     break;
586             }
587         }
588 
589         if (bsp == NULL)  /* nope, try regular fetch */
590         {
591             bsp = (*(bsftp))(sid, BSFETCH_TEMP);
592         }
593 
594         if (bsp != NULL)
595         {
596             omp = ObjMgrReadLock();
597             j = ObjMgrLookup(omp, (Pointer)bsp);
598             if (j < 0) {
599 
600                             Char tmpbuff[256];
601 
602                             SeqIdWrite(bsp->id, tmpbuff,
603                                        PRINTID_FASTA_LONG, sizeof(tmpbuff));
604 
605                             ErrPostEx(SEV_WARNING, 0, __LINE__,
606                                       "ObjMgrLookup() returned negative value "
607                                       "id = %s, totobj = %d, currobj = %d, "
608                                       "HighestEntityID = %d", tmpbuff,
609                                       omp->totobj,
610                                       omp->currobj, omp->HighestEntityID);
611                             ObjMgrUnlock();
612                             return bsp;
613                         }
614             omdp = ObjMgrFindTop(omp, omp->datalist[j]);
615             ObjMgrUnlock();
616             ObjMgrDeleteIndexOnEntityID (omp, oldomdp->EntityID);
617             omdp->EntityID = oldomdp->EntityID;
618             oldomdp->EntityID = 0;
619             ObjMgrAddIndexOnEntityID (omp, omdp->EntityID, omdp);
620 
621             omudp = omdp->userdata;
622             while (omudp != NULL)
623             {
624                 next = omudp->next;
625                 if (omudp->freefunc != NULL)
626                                  (*(omudp->freefunc))(omudp->userdata.ptrvalue);
627                 MemFree(omudp);
628                 omudp = next;
629             }
630             omdp->userdata = oldomdp->userdata;
631             oldomdp->userdata = NULL;
632 
633             if (oldomdp->choice != NULL)
634                 SeqEntryFree(oldomdp->choice);
635             else
636             {
637                 switch(oldomdp->datatype)
638                 {
639                     case OBJ_BIOSEQ:
640                         BioseqFree((BioseqPtr)(oldomdp->dataptr));
641                         break;
642                     case OBJ_BIOSEQSET:
643                         BioseqSetFree((BioseqSetPtr)(oldomdp->dataptr));
644                         break;
645                     default:
646                         ErrPostEx(SEV_ERROR,0,0,"BioseqReloadFunc: delete unknown type [%d]",
647                             (int)(oldomdp->datatype));
648                         break;
649                 }
650             }
651         }
652     }
653     return bsp;
654 }
655 /** static func used internally **/
656 
657 /*******************************************
658 *
659 *  WARNING: if you change BIOSEQ_CACHE_NUM, you have to change the
660 *   number of NULL in the initialization of the 2 static pointer arrays
661 *   below
662 *
663 *******************************************/
664 /* nb: this cache is cleared in SeqMgrDeleteFromBioseqIndex() */
665 #define BIOSEQ_CACHE_NUM 3
666 static SeqEntryPtr se_cache[BIOSEQ_CACHE_NUM] = {
667     NULL, NULL, NULL};   /* for a few platforms */
668 static ObjMgrDataPtr omdp_cache[BIOSEQ_CACHE_NUM] = {
669     NULL, NULL, NULL};   /* for a few platforms */
670 static TNlmMutex smp_cache_mutex = NULL;
671 
BioseqFindFunc(SeqIdPtr sid,Boolean reload_from_cache,Boolean force_it,Boolean use_bioseq_cache)672 static BioseqPtr NEAR BioseqFindFunc (SeqIdPtr sid, Boolean reload_from_cache, Boolean force_it, Boolean use_bioseq_cache)
673 {
674     Int4 i, j, num, imin, imax, retval;
675     SeqIdIndexElementPtr PNTR sipp;
676     CharPtr tmp;
677     Char buf[128];
678     Boolean do_return;
679     SeqMgrPtr smp;
680     ObjMgrPtr omp;
681     ObjMgrDataPtr omdp = NULL;
682     BioseqPtr bsp = NULL, tbsp;
683     SeqEntryPtr scope = NULL;
684 
685     if (sid == NULL)
686         return NULL;
687 
688     SeqMgrReadLock();    /* make sure no other thread is writing */
689     retval = NlmMutexLockEx(&smp_cache_mutex);  /* protect this section */
690     SeqMgrUnlock();
691     if (retval)  /* error */
692     {
693         ErrPostEx(SEV_FATAL,0,0,"BioseqFindFunc cache mutex failed [%ld]", (long)retval);
694         return NULL;
695     }
696 
697     do_return = FALSE;
698     scope = SeqEntryGetScope();       /* first check the cache */
699     for (i = 0; i < BIOSEQ_CACHE_NUM && use_bioseq_cache; i++)
700     {
701         if (omdp_cache[i] == NULL)
702             break;
703         omdp = omdp_cache[i];
704         if (omdp->datatype == OBJ_BIOSEQ)
705         {
706             if ((scope == NULL) || (scope == se_cache[i]))
707             {
708                 bsp = (BioseqPtr)(omdp->dataptr);
709 
710                 if (BioseqMatch(bsp, sid))
711                 {
712                     for (j = i; j > 0; j--)  /* shift to top of cache */
713                     {
714                         omdp_cache[j] = omdp_cache[j-1];
715                         se_cache[j] = se_cache[j-1];
716                     }
717                     omdp_cache[0] = omdp;
718                     se_cache[0] = scope;
719 
720                     if (! reload_from_cache)
721                     {
722                         do_return = TRUE;
723                         goto done_cache;
724                     }
725 
726                     omp = ObjMgrReadLock();
727                     omdp = ObjMgrFindTop(omp, omdp);
728                     ObjMgrUnlock();
729                     if (omdp == NULL || omdp->tempload != TL_CACHED)
730                     {
731                         do_return = TRUE;
732                         goto done_cache;
733                     }
734 
735                     bsp = BioseqReloadFunc(sid, omdp);
736 
737                     if (bsp == NULL)
738                     {
739 
740                         ErrPostEx(SEV_ERROR,0,0,"BioseqFindFunc: couldn't uncache");
741                     }
742                     do_return = TRUE;
743                     goto done_cache;
744                 }
745             }
746         }
747     }
748 done_cache:
749     NlmMutexUnlock(smp_cache_mutex);
750     if (do_return)  /* all done */
751     {
752         return bsp;
753     }
754 
755     bsp = NULL; /* resetting it */
756 
757     SeqMgrProcessNonIndexedBioseq(force_it);    /* make sure all are indexed */
758 
759         /* stringify as in SeqMgrAdd */
760 
761     MakeReversedSeqIdString (sid, buf, sizeof (buf) - 1); /* common function to make id, call RevStringUpper */
762 
763 
764     imin = 0;
765     smp = SeqMgrReadLock();
766     imax = smp->BioseqIndexCnt - 1;
767     sipp = smp->BioseqIndex;
768 
769     num = -1;
770 
771     while (imax >= imin)
772     {
773         i = (imax + imin)/2;
774         tmp = sipp[i]->str;
775         if ((j = StringCmp(tmp, buf)) > 0)
776             imax = i - 1;
777         else if (j < 0)
778             imin = i + 1;
779         else
780         {
781             num = i;
782             break;
783         }
784     }
785 
786     if (num < 0)  /* couldn't find it */
787     {
788         /*
789         Message(MSG_ERROR, "[1] Couldn't find [%s]", buf);
790         */
791         bsp = NULL;
792         goto ret;
793     }
794 
795 
796     if (scope != NULL)    /* check in scope */
797     {
798         tbsp = (BioseqPtr)(sipp[num]->omdp->dataptr);
799         if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
800         {
801             bsp = tbsp;
802             omdp = sipp[num]->omdp;
803         }
804         else
805         {                  /* not in scope, could be duplicate SeqId */
806             i = num-1;
807             while ((i >= 0) && (bsp == NULL) && (! StringCmp(sipp[i]->str, buf)))  /* back up */
808             {
809                tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr);
810                if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
811                {
812                    bsp = tbsp;
813                     omdp = sipp[i]->omdp;
814                }
815                i--;
816             }
817             i = num + 1;
818             imax = smp->BioseqIndexCnt - 1;
819             while ((bsp == NULL) && (i <= imax) && (! StringCmp(sipp[i]->str, buf)))
820             {
821                tbsp = (BioseqPtr)(sipp[i]->omdp->dataptr);
822                if (ObjMgrIsChild(scope->data.ptrvalue, tbsp))
823                {
824                    bsp = tbsp;
825                     omdp = sipp[i]->omdp;
826                }
827                i++;
828             }
829         }
830     }
831     else  /* no scope set */
832     {
833         omdp = sipp[num]->omdp;
834         bsp = (BioseqPtr)(omdp->dataptr);
835     }
836 
837 
838     if (bsp == NULL)   /* not found */
839     {
840         /*
841         Message(MSG_ERROR, "[2] Couldn't find [%s]", buf);
842         */
843         goto ret;
844     }
845 
846     retval = NlmMutexLockEx(&smp_cache_mutex);  /* protect this section */
847     if (retval)  /* error */
848     {
849         ErrPostEx(SEV_FATAL,0,0,"BioseqFindFunc2 cache mutex failed [%ld]", (long)retval);
850         bsp = NULL;
851         goto ret;
852     }
853 
854     for (j = (BIOSEQ_CACHE_NUM - 1); j > 0; j--)  /* shift to top of cache */
855     {
856         omdp_cache[j] = omdp_cache[j-1];
857         se_cache[j] = se_cache[j-1];
858     }
859     omdp_cache[0] = omdp;
860     se_cache[0] = scope;
861 
862     NlmMutexUnlock(smp_cache_mutex);
863 
864     if (! reload_from_cache)
865         goto ret;
866 
867     omp = ObjMgrReadLock();
868     omdp = ObjMgrFindTop(omp, omdp);
869     ObjMgrUnlock();
870     if (omdp == NULL)
871     {
872         bsp = NULL;
873         goto ret;
874     }
875         if (omdp->tempload == TL_CACHED)
876         {
877                 SeqMgrUnlock();
878                 bsp = BioseqReloadFunc(sid, omdp);
879                 goto ret2;
880         }
881 ret:
882         SeqMgrUnlock();
883 ret2:
884         return bsp;
885 }
886 
887 /*****************************************************************************
888 *
889 *   ClearBioseqFindCache()
890 *       frees internal omdp and se caches which can thwart detection of colliding IDs
891 *
892 *****************************************************************************/
ClearBioseqFindCache(void)893 NLM_EXTERN void ClearBioseqFindCache (void)
894 
895 {
896   Int4       i;
897   SeqMgrPtr  smp;
898 
899   smp = SeqMgrWriteLock ();
900 
901   for (i = 0; i < BIOSEQ_CACHE_NUM; i++) {
902     omdp_cache [i] = NULL;
903     se_cache [i] = NULL;
904   }
905 
906   SeqMgrUnlock ();
907 }
908 
909 /*****************************************************************************
910 *
911 *   SeqMgrFreeCache()
912 *       frees all cached SeqEntrys
913 *       returns FALSE if any errors occurred
914 *
915 *****************************************************************************/
SeqMgrFreeCache(void)916 NLM_EXTERN Boolean LIBCALL SeqMgrFreeCache(void)
917 {
918     return ObjMgrFreeCache(OBJ_SEQENTRY);
919 }
920 
921 /*****************************************************************************
922 *
923 *   BioseqLockById(SeqIdPtr)
924 *       Finds the Bioseq and locks it
925 *       Makes sure appropriate BioseqContent is present
926 *
927 *****************************************************************************/
BioseqLockByIdEx(SeqIdPtr sid,Boolean force_it)928 static BioseqPtr LIBCALL BioseqLockByIdEx (SeqIdPtr sid, Boolean force_it)
929 {
930     BioseqPtr bsp = NULL;
931     SeqMgrPtr smp;
932     SeqEntryPtr oldscope = NULL;
933     BSFetchTop bsftp;
934     Boolean fetch_on_lock;
935     DbtagPtr dbt;
936 
937     if (sid == NULL) return bsp;
938 
939     /* special case for DeltaSeqsToSeqLoc fake IDs - ignore */
940     if (sid->choice == SEQID_GENERAL) {
941         dbt = (DbtagPtr) sid->data.ptrvalue;
942         if (dbt != NULL && StringCmp (dbt->db, "SeqLit") == 0) {
943             return NULL;
944         }
945     }
946 
947     bsp = BioseqFindFunc(sid, TRUE, force_it, TRUE);
948     if (bsp == NULL)
949     {
950         smp = SeqMgrReadLock();
951         if (smp == NULL) return bsp;
952         fetch_on_lock = smp->fetch_on_lock;
953         bsftp = smp->bsfetch;
954         SeqMgrUnlock();
955 
956         if (fetch_on_lock)
957         {
958             oldscope = SeqEntrySetScope (NULL);
959             if (oldscope != NULL) {
960                 bsp = BioseqFindFunc(sid, TRUE, force_it, TRUE);
961                 SeqEntrySetScope (oldscope);
962             }
963             if (bsp == NULL && bsftp != NULL)
964                 bsp = (*(bsftp))(sid, BSFETCH_TEMP);
965         }
966     }
967 
968     if (bsp == NULL) return NULL;
969 
970     ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, TRUE);
971     return bsp;
972 }
973 
BioseqLockById(SeqIdPtr sid)974 NLM_EXTERN BioseqPtr LIBCALL BioseqLockById (SeqIdPtr sid)
975 {
976     return BioseqLockByIdEx (sid, TRUE);
977 }
978 
979 /*****************************************************************************
980 *
981 *   BioseqUnlockById(SeqIdPtr sip)
982 *       Frees a Bioseq to be dumped from memory if necessary
983 *
984 *****************************************************************************/
BioseqUnlockById(SeqIdPtr sip)985 NLM_EXTERN Boolean LIBCALL BioseqUnlockById (SeqIdPtr sip)
986 {
987     BioseqPtr bsp;
988 
989     if (sip == NULL) return FALSE;
990 
991     bsp = BioseqFindFunc(sip, FALSE, TRUE, TRUE);
992     if (bsp == NULL)
993         return FALSE;
994 
995     ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, FALSE);
996     return TRUE;
997 }
998 
999 /*****************************************************************************
1000 *
1001 *   BioseqLock(BioseqPtr)
1002 *       Locks a Bioseq
1003 *       Any cached data is returned to memory
1004 *
1005 *****************************************************************************/
BioseqLock(BioseqPtr bsp)1006 NLM_EXTERN BioseqPtr LIBCALL BioseqLock (BioseqPtr bsp)
1007 {
1008     if (bsp == NULL) return NULL;
1009 
1010     ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, TRUE);
1011 
1012     return bsp;
1013 }
1014 
1015 /*****************************************************************************
1016 *
1017 *   BioseqUnlock(BioseqPtr)
1018 *       Frees a Bioseq to be dumped from memory if necessary
1019 *
1020 *****************************************************************************/
BioseqUnlock(BioseqPtr bsp)1021 NLM_EXTERN Boolean LIBCALL BioseqUnlock (BioseqPtr bsp)
1022 {
1023     if (bsp == NULL) return FALSE;
1024 
1025     if (ObjMgrLock(OBJ_BIOSEQ, (Pointer)bsp, FALSE) >= 0)
1026         return TRUE;
1027     else
1028         return FALSE;
1029 }
1030 
1031 /*****************************************************************************
1032 *
1033 *   BioseqFetch(SeqIdPtr, flag)
1034 *       loads bioseq into memory if possible
1035 *       first trys LocalLoad
1036 *       they trys EntrezLoad
1037 *
1038 *****************************************************************************/
BSFetchFunc(SeqIdPtr sid,Uint1 ld_type)1039 static BioseqPtr LIBCALLBACK BSFetchFunc (SeqIdPtr sid, Uint1 ld_type)
1040 {
1041     BioseqPtr bsp = NULL;
1042     ObjMgrProcPtr ompp;
1043     OMProcControl ompc;
1044     Int2 ret;
1045     ObjMgrPtr omp;
1046 
1047     ompp = NULL;
1048     while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_BIOSEQ, ompp)) != NULL)
1049     {
1050         MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
1051         ompc.input_data = sid;
1052         ompc.proc = ompp;
1053         ret = (* (ompp->func))((Pointer)&ompc);
1054         switch (ret)
1055         {
1056             case OM_MSG_RET_ERROR:
1057                 ErrShow();
1058                 break;
1059             case OM_MSG_RET_DEL:
1060                 break;
1061             case OM_MSG_RET_OK:
1062                 break;
1063             case OM_MSG_RET_DONE:
1064                 if (ld_type == BSFETCH_TEMP)
1065                 {
1066                     omp = ObjMgrWriteLock();
1067                     ObjMgrSetTempLoad (omp, ompc.output_data);
1068                     ObjMgrUnlock();
1069                 }
1070                 bsp = (BioseqPtr)(ompc.output_data);
1071                 break;
1072             default:
1073                 break;
1074         }
1075         if (bsp != NULL)  /* got one */
1076             break;
1077     }
1078 
1079     return bsp;
1080 }
1081 
1082 
BioseqFetch(SeqIdPtr sid,Uint1 ld_type)1083 NLM_EXTERN BioseqPtr LIBCALL BioseqFetch (SeqIdPtr sid, Uint1 ld_type)
1084 {
1085     BSFetchTop bsftp;
1086     BioseqPtr bsp;
1087 
1088     bsp = BioseqFindFunc(sid, TRUE, TRUE, TRUE);
1089     if (bsp != NULL) return bsp;
1090 
1091     bsftp = SeqMgrGetFetchTop();
1092     if (bsftp == NULL) return NULL;
1093 
1094     return (*(bsftp))(sid, ld_type);
1095 }
1096 
1097 /*****************************************************************************
1098 *
1099 *   GetSeqIdForGI(BIG_ID)
1100 *     returns the SeqId for a GI
1101 *     returns NULL if can't find it
1102 *     The returned SeqId is allocated. Caller must free it.
1103 *
1104 *****************************************************************************/
1105 typedef struct seqidblock {
1106   BIG_ID     uid;
1107   time_t     touch;
1108   SeqIdPtr   sip;
1109   CharPtr    revstr;
1110 } SeqIdBlock, PNTR SeqIdBlockPtr;
1111 
1112 static ValNodePtr seqidgicache = NULL;
1113 static ValNodePtr PNTR seqidgiarray = NULL;
1114 static ValNodePtr PNTR giseqidarray = NULL;
1115 static Int2 seqidcount = 0;
1116 static Boolean seqidgiindexed = FALSE;
1117 
1118 /* record first in small linear list so as not to sort main list after every addition */
1119 static ValNodePtr seqidgilatest = NULL;
1120 static Int2 seqidunidxcount = 0;
1121 
1122 /*
1123 static TNlmRWlock sid_RWlock = NULL;
1124 */
1125 
RecordInSeqIdGiCache(BIG_ID gi,SeqIdPtr sip)1126 NLM_EXTERN void RecordInSeqIdGiCache ( BIG_ID gi, SeqIdPtr sip)
1127 
1128 {
1129     Char buf [128];
1130     ValNodePtr vnp;
1131     SeqIdBlockPtr sibp;
1132     Int4 retval;
1133 
1134     /* if (sip == NULL) return; okay to cache NULL because we protect against SeqIdDup */
1135 
1136     retval = NlmRWwrlock(sgi_RWlock);
1137     if (retval != 0)
1138     {
1139         ErrPostEx(SEV_ERROR,0,0,"RecSeqIdGi: RWwrlock error [%ld]",
1140             (long)retval);
1141         return;
1142     }
1143 
1144 
1145     vnp = ValNodeNew (NULL);
1146     if (vnp == NULL) goto ret;
1147     sibp = (SeqIdBlockPtr) MemNew (sizeof (SeqIdBlock));
1148     if (sibp == NULL) {
1149         MemFree (vnp);
1150         goto ret;
1151     }
1152 
1153     sibp->uid = gi;
1154     if (sip != NULL) {
1155         sibp->sip = SeqIdDup (sip);
1156         sibp->touch = GetSecs ();
1157         if (MakeReversedSeqIdString (sibp->sip, buf, sizeof (buf) - 1)) {
1158           sibp->revstr = StringSave (buf);
1159         }
1160     }
1161     vnp->data.ptrvalue = (Pointer) sibp;
1162 
1163     /* insert at head of unindexed list. */
1164 
1165     vnp->next = seqidgilatest;
1166     seqidgilatest = vnp;
1167     seqidunidxcount++;
1168 
1169     if (seqidunidxcount > 50 && seqidgilatest != NULL && seqidgicache != NULL) {
1170 
1171       /* if over threshhold, insert unindexed list at head of main list (must
1172          already exist so as to allow bulk lookup recording prior to use) */
1173 
1174       vnp = seqidgilatest;
1175       while (vnp->next != NULL) {
1176         vnp = vnp->next;
1177       }
1178 
1179       vnp->next = seqidgicache;
1180       seqidgicache = seqidgilatest;
1181 
1182       /* clear unindexed list pointer and reset count */
1183 
1184       seqidgilatest = NULL;
1185       seqidunidxcount = 0;
1186 
1187       /* null out sorted access arrays, will sort, unique, and index at next use */
1188 
1189       seqidgiarray = MemFree (seqidgiarray);
1190       giseqidarray = MemFree (giseqidarray);
1191       seqidgiindexed = FALSE;
1192     }
1193 
1194 ret:
1195     retval = NlmRWunlock(sgi_RWlock);
1196     if (retval != 0)
1197     {
1198         ErrPostEx(SEV_ERROR,0,0,"RecSeqIdGiUnlock: RWunlock error [%ld]",
1199             (long)retval);
1200     }
1201 }
1202 
FreeSeqIdGiCache(void)1203 NLM_EXTERN void FreeSeqIdGiCache (void)
1204 
1205 {
1206   Int4           ret;
1207   SeqIdBlockPtr  sibp;
1208   ValNodePtr     vnp;
1209 
1210   ret = NlmRWwrlock(sgi_RWlock);
1211   if (ret != 0) {
1212     ErrPostEx(SEV_ERROR,0,0,"FreeSeqIdGiCache: RWwrlock error [%ld]", (long) ret);
1213     return;
1214   }
1215 
1216   seqidgiindexed = FALSE;
1217   seqidcount = 0;
1218   seqidgiarray = MemFree (seqidgiarray);
1219   giseqidarray = MemFree (giseqidarray);
1220 
1221   for (vnp = seqidgicache; vnp != NULL; vnp = vnp->next) {
1222     sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1223     if (sibp == NULL) continue;
1224     sibp->sip = SeqIdFree (sibp->sip);
1225     sibp->revstr = MemFree (sibp->revstr);
1226   }
1227   seqidgicache = ValNodeFreeData (seqidgicache);
1228 
1229   /* also free unindexed list of most recent additions */
1230 
1231   for (vnp = seqidgilatest; vnp != NULL; vnp = vnp->next) {
1232     sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1233     if (sibp == NULL) continue;
1234     sibp->sip = SeqIdFree (sibp->sip);
1235     sibp->revstr = MemFree (sibp->revstr);
1236   }
1237   seqidgilatest = ValNodeFreeData (seqidgilatest);
1238   seqidunidxcount = 0;
1239 
1240   ret = NlmRWunlock(sgi_RWlock);
1241   if (ret != 0) {
1242     ErrPostEx(SEV_ERROR,0,0,"FreeSeqIdGiCache: RWwrlock error [%ld]", (long) ret);
1243     return;
1244   }
1245 }
1246 
1247 /* trim list by sorting older nodes to end of list if list grew too large */
1248 
SortSeqIdGiCacheTime(VoidPtr ptr1,VoidPtr ptr2)1249 static int LIBCALLBACK SortSeqIdGiCacheTime (VoidPtr ptr1, VoidPtr ptr2)
1250 
1251 {
1252   SeqIdBlockPtr  sibp1;
1253   SeqIdBlockPtr  sibp2;
1254   ValNodePtr     vnp1;
1255   ValNodePtr     vnp2;
1256 
1257   if (ptr1 == NULL || ptr2 == NULL) return 0;
1258   vnp1 = *((ValNodePtr PNTR) ptr1);
1259   vnp2 = *((ValNodePtr PNTR) ptr2);
1260   if (vnp1 == NULL || vnp2 == NULL) return 0;
1261   sibp1 = (SeqIdBlockPtr) vnp1->data.ptrvalue;
1262   sibp2 = (SeqIdBlockPtr) vnp2->data.ptrvalue;
1263   if (sibp1 == NULL || sibp2 == NULL) return 0;
1264   if (sibp1->touch > sibp2->touch) {
1265      return -1;
1266   } else if (sibp1->touch < sibp2->touch) {
1267     return 1;
1268   }
1269   return 0;
1270 }
1271 
1272 /* sort valnode list by gi */
1273 
SortSeqIdGiByUid(VoidPtr ptr1,VoidPtr ptr2)1274 static int LIBCALLBACK SortSeqIdGiByUid (VoidPtr ptr1, VoidPtr ptr2)
1275 
1276 {
1277   SeqIdBlockPtr  sibp1;
1278   SeqIdBlockPtr  sibp2;
1279   ValNodePtr     vnp1;
1280   ValNodePtr     vnp2;
1281 
1282   if (ptr1 == NULL || ptr2 == NULL) return 0;
1283   vnp1 = *((ValNodePtr PNTR) ptr1);
1284   vnp2 = *((ValNodePtr PNTR) ptr2);
1285   if (vnp1 == NULL || vnp2 == NULL) return 0;
1286   sibp1 = (SeqIdBlockPtr) vnp1->data.ptrvalue;
1287   sibp2 = (SeqIdBlockPtr) vnp2->data.ptrvalue;
1288   if (sibp1 == NULL || sibp2 == NULL) return 0;
1289   if (sibp1->uid < sibp2->uid) {
1290      return -1;
1291   } else if (sibp1->uid > sibp2->uid) {
1292     return 1;
1293   }
1294   return 0;
1295 }
1296 
UniqueSeqIdGiByUid(ValNodePtr list)1297 static ValNodePtr UniqueSeqIdGiByUid (ValNodePtr list)
1298 
1299 {
1300   SeqIdBlockPtr  curr, last;
1301   ValNodePtr     next;
1302   Pointer PNTR   prev;
1303   ValNodePtr     vnp;
1304 
1305   if (list == NULL) return NULL;
1306   last = (SeqIdBlockPtr) list->data.ptrvalue;
1307   vnp = list->next;
1308   prev = (Pointer PNTR) &(list->next);
1309   while (vnp != NULL) {
1310     next = vnp->next;
1311     curr = (SeqIdBlockPtr) vnp->data.ptrvalue;
1312     if (last != NULL && curr != NULL && last->uid == curr->uid) {
1313       vnp->next = NULL;
1314       *prev = next;
1315       ValNodeFreeData (vnp);
1316     } else {
1317       last = (SeqIdBlockPtr) vnp->data.ptrvalue;
1318       prev = (Pointer PNTR) &(vnp->next);
1319     }
1320     vnp = next;
1321   }
1322 
1323   return list;
1324 }
1325 
1326 /* sort valnode array by reversed seqid string */
1327 
SortSeqIdGiByString(VoidPtr ptr1,VoidPtr ptr2)1328 static int LIBCALLBACK SortSeqIdGiByString (VoidPtr ptr1, VoidPtr ptr2)
1329 
1330 {
1331   SeqIdBlockPtr  sibp1;
1332   SeqIdBlockPtr  sibp2;
1333   CharPtr        str1;
1334   CharPtr        str2;
1335   ValNodePtr     vnp1;
1336   ValNodePtr     vnp2;
1337 
1338   if (ptr1 == NULL || ptr2 == NULL) return 0;
1339   vnp1 = *((ValNodePtr PNTR) ptr1);
1340   vnp2 = *((ValNodePtr PNTR) ptr2);
1341   if (vnp1 == NULL || vnp2 == NULL) return 0;
1342   sibp1 = (SeqIdBlockPtr) vnp1->data.ptrvalue;
1343   sibp2 = (SeqIdBlockPtr) vnp2->data.ptrvalue;
1344   if (sibp1 == NULL || sibp2 == NULL) return 0;
1345   str1 = sibp1->revstr;
1346   str2 = sibp2->revstr;
1347   if (str1 == NULL || str2 == NULL) return 0;
1348   return StringICmp (str1, str2);
1349 }
1350 
UpdateSeqIdGiArrays(void)1351 static Boolean UpdateSeqIdGiArrays (void)
1352 
1353 {
1354   Int2           i;
1355   Int4           ret;
1356   SeqIdBlockPtr  sibp;
1357   ValNodePtr     tmp, vnp;
1358 
1359   if (seqidgicache == NULL && seqidgilatest == NULL) return FALSE;
1360 
1361   if (! seqidgiindexed) {
1362     ret = NlmRWwrlock (sgi_RWlock);
1363     if (ret != 0) {
1364       ErrPostEx (SEV_ERROR, 0, 0, "SeqIdGi: RWwrlock error [%ld]", (long) ret);
1365       return FALSE;
1366     }
1367 
1368     if (seqidunidxcount > 50 && seqidgilatest != NULL) {
1369 
1370       /* if over threshhold, insert unindexed list at head of main list */
1371 
1372       vnp = seqidgilatest;
1373       while (vnp->next != NULL) {
1374         vnp = vnp->next;
1375       }
1376 
1377       vnp->next = seqidgicache;
1378       seqidgicache = seqidgilatest;
1379 
1380       /* clear unindexed list pointer and reset count */
1381 
1382       seqidgilatest = NULL;
1383       seqidunidxcount = 0;
1384 
1385       /* null out sorted access arrays, will sort, unique, and index at next use */
1386 
1387       seqidgiarray = MemFree (seqidgiarray);
1388       giseqidarray = MemFree (giseqidarray);
1389       seqidgiindexed = FALSE;
1390     }
1391 
1392     if (! seqidgiindexed) {
1393 
1394       /* if list is too large, sort by touch time, cut least recently used ids */
1395 
1396       seqidcount = (Int2) ValNodeLen (seqidgicache);
1397       if (seqidcount > 32000) {
1398 
1399         seqidgicache = ValNodeSort (seqidgicache, SortSeqIdGiCacheTime);
1400         for (vnp = seqidgicache; vnp != NULL && seqidcount > 24000; vnp = vnp->next) {
1401           seqidcount--;
1402         }
1403         if (vnp != NULL) {
1404           for (tmp = vnp->next; tmp != NULL; tmp = tmp->next) {
1405             sibp = (SeqIdBlockPtr) tmp->data.ptrvalue;
1406             if (sibp == NULL) continue;
1407             sibp->sip = SeqIdFree (sibp->sip);
1408             sibp->revstr = MemFree (sibp->revstr);
1409           }
1410           vnp->next = ValNodeFreeData (vnp->next);
1411         }
1412       }
1413 
1414       /* sort list by gi */
1415 
1416       seqidgicache = ValNodeSort (seqidgicache, SortSeqIdGiByUid);
1417       seqidgicache = UniqueSeqIdGiByUid (seqidgicache);
1418       seqidcount = (Int2) ValNodeLen (seqidgicache);
1419 
1420       /* copy sorted list into both arrays */
1421 
1422       if (seqidcount > 0) {
1423         seqidgiarray = MemNew (sizeof (ValNodePtr) * (size_t) (seqidcount + 1));
1424         giseqidarray = MemNew (sizeof (ValNodePtr) * (size_t) (seqidcount + 1));
1425         if (seqidgiarray != NULL && giseqidarray != NULL) {
1426           for (vnp = seqidgicache, i = 0; vnp != NULL; vnp = vnp->next, i++) {
1427             seqidgiarray [i] = vnp;
1428             giseqidarray [i] = vnp;
1429           }
1430 
1431           /* now resort one array by seqid string */
1432 
1433           StableMergeSort (giseqidarray, (size_t) seqidcount, sizeof (ValNodePtr), SortSeqIdGiByString);
1434         }
1435       }
1436 
1437       /* finally, set indexed flag */
1438 
1439       seqidgiindexed = TRUE;
1440     }
1441 
1442     ret = NlmRWunlock (sgi_RWlock);
1443     if (ret != 0) {
1444       ErrPostEx (SEV_ERROR, 0, 0, "SeqIdGi: RWunlock error [%ld]", (long) ret);
1445       return FALSE;
1446     }
1447   }
1448 
1449   return TRUE;
1450 }
1451 
FetchFromSeqIdGiCache(BIG_ID gi,SeqIdPtr PNTR sipp)1452 NLM_EXTERN Boolean FetchFromSeqIdGiCache ( BIG_ID gi, SeqIdPtr PNTR sipp)
1453 
1454 {
1455     ValNodePtr vnp;
1456     SeqIdBlockPtr sibp = NULL;
1457     Int2 left, right, mid;
1458     BIG_ID compare;
1459     Int4 ret;
1460     Boolean done = FALSE;
1461 
1462 
1463     if (sipp != NULL) {
1464       *sipp = NULL;
1465     }
1466     if (seqidgicache == NULL && seqidgilatest == NULL) return done;
1467 
1468     if (! UpdateSeqIdGiArrays ()) {
1469         return done;
1470     }
1471 
1472     ret = NlmRWrdlock(sgi_RWlock);
1473     if (ret != 0)
1474     {
1475         ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWrdlock error [%ld]",
1476             (long)ret);
1477         return done;
1478     }
1479 
1480     if (seqidgiarray != NULL) {
1481         left = 1;
1482         right = seqidcount;
1483         while (left <= right) {
1484             mid = (left + right) / 2;
1485             compare = 0;
1486             vnp = seqidgiarray [mid - 1];
1487             if (vnp != NULL) {
1488                 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1489                 if (sibp != NULL) {
1490                     compare = gi - sibp->uid;
1491                 }
1492             }
1493             if (compare <= 0) {
1494                 right = mid - 1;
1495             }
1496             if (compare >= 0) {
1497                 left = mid + 1;
1498             }
1499         }
1500         if (left > right + 1 && sibp != NULL) {
1501             if (sibp->sip != NULL) {
1502                 if (sipp != NULL) {
1503                     *sipp = SeqIdDup (sibp->sip);
1504                 }
1505                 sibp->touch = GetSecs ();
1506             }
1507             done = TRUE;
1508         }
1509     }
1510 
1511     if (! done) {
1512       for (vnp = seqidgilatest; vnp != NULL; vnp = vnp->next) {
1513         sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1514         if (sibp == NULL) continue;
1515         if (sibp->uid == gi) {
1516           if (sibp->sip != NULL) {
1517             if (sipp != NULL) {
1518               *sipp = SeqIdDup (sibp->sip);
1519             }
1520             sibp->touch = GetSecs ();
1521             done = TRUE;
1522             break;
1523           }
1524         }
1525       }
1526     }
1527 
1528     ret = NlmRWunlock(sgi_RWlock);
1529     if (ret != 0)
1530     {
1531         ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWunlock error [%ld]",
1532             (long)ret);
1533     }
1534 
1535     return done;
1536 }
1537 
GetSeqIdForGI(BIG_ID gi)1538 NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdForGI (BIG_ID gi)
1539 {
1540     BioseqPtr bsp = NULL;
1541     ObjMgrProcPtr ompp;
1542     OMProcControl ompc;
1543     Int2 ret;
1544     SeqIdPtr sip, sip2=NULL, otherh=NULL, otherl = NULL, otherp = NULL, gb=NULL;
1545     ValNode vn;
1546     SeqEntryPtr oldscope = NULL;
1547 
1548 
1549     if (gi <= 0)
1550         return sip2;
1551 
1552     vn.choice = SEQID_GI;
1553     vn.data.intvalue = gi;
1554     vn.next = NULL;
1555 
1556     oldscope = SeqEntrySetScope (NULL);
1557     bsp = BioseqFindCore(&vn);
1558     SeqEntrySetScope (oldscope);
1559 
1560     if (bsp != NULL)
1561     {
1562         for (sip = bsp->id; sip != NULL; sip = sip->next)
1563         {
1564             switch (sip->choice)
1565             {
1566                 case SEQID_LOCAL:           /* object id */
1567                 case SEQID_GIBBSQ:
1568                 case SEQID_GIBBMT:
1569                 case SEQID_PATENT:
1570                 case SEQID_GENERAL:
1571                     otherl = sip;
1572                     break;
1573                 case SEQID_GI:
1574                     break;
1575                 case SEQID_GENBANK:
1576                 case SEQID_EMBL:
1577                 case SEQID_PIR:
1578                 case SEQID_SWISSPROT:
1579                 case SEQID_DDBJ:
1580                 case SEQID_PRF:
1581                 case SEQID_PDB:
1582                 case SEQID_OTHER:
1583                 case SEQID_TPG:
1584                 case SEQID_TPE:
1585                 case SEQID_TPD:
1586                     gb = sip;
1587                     break;
1588                 case SEQID_GPIPE:
1589                     otherp = sip;
1590                     break;
1591                 default:
1592                     if (otherh == NULL)
1593                         otherh = sip;
1594                     break;
1595             }
1596         }
1597     }
1598 
1599 
1600     if (gb != NULL)
1601         sip2 = gb;
1602     else if (otherp != NULL)
1603         sip2 = otherp;
1604     else if (otherh != NULL)
1605         sip2 = otherh;
1606     else if (otherl != NULL)
1607         sip2 = otherl;
1608 
1609     if (sip2 != NULL)
1610         return SeqIdDup(sip2);
1611 
1612     if (FetchFromSeqIdGiCache (gi, &sip2)) {
1613         return sip2;
1614     }
1615 
1616     ompp = NULL;
1617     while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL)
1618     {
1619         if ((ompp->subinputtype == SEQID_GI) && (ompp->suboutputtype == 0))
1620         {
1621             MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
1622             ompc.input_data = &vn;
1623             ompc.proc = ompp;
1624             ret = (* (ompp->func))((Pointer)&ompc);
1625             switch (ret)
1626             {
1627                 case OM_MSG_RET_ERROR:
1628                     ErrShow();
1629                     break;
1630                 case OM_MSG_RET_DEL:
1631                     break;
1632                 case OM_MSG_RET_OK:
1633                     break;
1634                 case OM_MSG_RET_DONE:
1635                     sip2 = (SeqIdPtr)(ompc.output_data);
1636                     if (sip2 != NULL) {
1637                          RecordInSeqIdGiCache (gi, sip2);
1638                         return sip2;
1639                     }
1640                     break;
1641                 default:
1642                     break;
1643             }
1644         }
1645     }
1646 
1647     RecordInSeqIdGiCache (gi, sip2);
1648     return sip2;
1649 }
1650 
1651 /*****************************************************************************
1652 *
1653 *   GetGIForSeqId(SeqIdPtr)
1654 *     returns the GI for a SeqId
1655 *     returns 0 if can't find it
1656 *
1657 *****************************************************************************/
FetchFromGiSeqIdCache(SeqIdPtr sip,BIG_ID_PNTR gip)1658 NLM_EXTERN Boolean FetchFromGiSeqIdCache (SeqIdPtr sip, BIG_ID_PNTR gip)
1659 
1660 {
1661     Char buf [128];
1662     ValNodePtr vnp;
1663     SeqIdBlockPtr sibp = NULL;
1664     Int2 left, right, mid;
1665     Int4 compare, ret;
1666     Boolean done = FALSE;
1667 
1668 
1669     if (gip != NULL) {
1670       *gip = 0;
1671     }
1672     if (seqidgicache == NULL && seqidgilatest == NULL) return done;
1673     if (sip == NULL) return done;
1674     if (! MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) return done;
1675 
1676     if (! UpdateSeqIdGiArrays ()) {
1677         return done;
1678     }
1679 
1680     ret = NlmRWrdlock(sgi_RWlock);
1681     if (ret != 0)
1682     {
1683         ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWrdlock error [%ld]",
1684             (long)ret);
1685         return done;
1686     }
1687 
1688     if (giseqidarray != NULL) {
1689         left = 1;
1690         right = seqidcount;
1691         while (left <= right) {
1692             mid = (left + right) / 2;
1693             compare = 0;
1694             vnp = giseqidarray [mid - 1];
1695             if (vnp != NULL) {
1696                 sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1697                 if (sibp != NULL) {
1698                     compare = StringCmp (buf, sibp->revstr);
1699                 }
1700             }
1701             if (compare <= 0) {
1702                 right = mid - 1;
1703             }
1704             if (compare >= 0) {
1705                 left = mid + 1;
1706             }
1707         }
1708         if (left > right + 1 && sibp != NULL) {
1709             if (gip != NULL) {
1710                 *gip = sibp->uid;
1711             }
1712             sibp->touch = GetSecs ();
1713             done = TRUE;
1714         }
1715     }
1716 
1717     if (! done) {
1718       for (vnp = seqidgilatest; vnp != NULL; vnp = vnp->next) {
1719         sibp = (SeqIdBlockPtr) vnp->data.ptrvalue;
1720         if (sibp == NULL) continue;
1721         if (StringCmp (buf, sibp->revstr) == 0) {
1722           if (gip != NULL) {
1723             *gip = sibp->uid;
1724           }
1725           sibp->touch = GetSecs ();
1726           done = TRUE;
1727           break;
1728         }
1729       }
1730     }
1731 
1732     ret = NlmRWunlock(sgi_RWlock);
1733     if (ret != 0)
1734     {
1735         ErrPostEx(SEV_ERROR,0,0,"SeqIdGi: RWunlock error [%ld]",
1736             (long)ret);
1737     }
1738 
1739     return done;
1740 }
1741 
GetGIForSeqId(SeqIdPtr sid)1742 NLM_EXTERN BIG_ID LIBCALL GetGIForSeqId (SeqIdPtr sid)
1743 {
1744     BioseqPtr bsp = NULL;
1745     ObjMgrProcPtr ompp;
1746     OMProcControl ompc;
1747     Int2 ret;
1748     SeqIdPtr sip;
1749     BIG_ID gi = 0;
1750     SeqEntryPtr oldscope = NULL;
1751 
1752 
1753     if (sid == NULL)
1754         return gi;
1755 
1756     if (sid->choice == SEQID_GI)
1757         return sid->data.intvalue;
1758 
1759     oldscope = SeqEntrySetScope (NULL);
1760     bsp = BioseqFindCore(sid);
1761     SeqEntrySetScope (oldscope);
1762 
1763     if (bsp != NULL)
1764     {
1765         for (sip = bsp->id; sip != NULL; sip = sip->next)
1766         {
1767             if (sip->choice == SEQID_GI)
1768                 return sip->data.intvalue;
1769         }
1770     }
1771 
1772     if (FetchFromGiSeqIdCache (sid, &gi)) {
1773         return gi;
1774     }
1775 
1776     ompp = NULL;
1777     while ((ompp = ObjMgrProcFindNext(NULL, OMPROC_FETCH, OBJ_SEQID, OBJ_SEQID, ompp)) != NULL)
1778     {
1779         if ((ompp->subinputtype == 0) && (ompp->suboutputtype == SEQID_GI))
1780         {
1781             MemSet((Pointer)(&ompc), 0, sizeof(OMProcControl));
1782             ompc.input_data = sid;
1783             ompc.proc = ompp;
1784             ret = (* (ompp->func))((Pointer)&ompc);
1785             switch (ret)
1786             {
1787                 case OM_MSG_RET_ERROR:
1788                     ErrShow();
1789                     break;
1790                 case OM_MSG_RET_DEL:
1791                     break;
1792                 case OM_MSG_RET_OK:
1793                     break;
1794                 case OM_MSG_RET_DONE:
1795                     sip = (SeqIdPtr)(ompc.output_data);
1796                     if (sip != NULL)
1797                     {
1798                         if (sip->choice == SEQID_GI)
1799                         {
1800                             gi = (BIG_ID) sip->data.intvalue;
1801                             SeqIdFree(sip);
1802                             RecordInSeqIdGiCache (gi, sid);
1803                             return gi;
1804                         }
1805                         SeqIdFree(sip);
1806                     }
1807                     break;
1808                 default:
1809                     break;
1810             }
1811         }
1812     }
1813 
1814     return gi;
1815 }
1816 
1817 
1818 /*****************************************************************************
1819 *
1820 *   SeqEntryFind(sip)
1821 *       returns top level seqentry for sip
1822 *
1823 *****************************************************************************/
SeqEntryFind(SeqIdPtr sid)1824 NLM_EXTERN SeqEntryPtr LIBCALL SeqEntryFind (SeqIdPtr sid)
1825 {
1826     BioseqPtr bsp;
1827     ObjMgrDataPtr omdp;
1828     ObjMgrDataPtr PNTR omdpp;
1829     SeqEntryPtr result=NULL;
1830     SeqSubmitPtr ssp;
1831     Int4 i;
1832     ObjMgrPtr omp;
1833 
1834     bsp = BioseqFind(sid);
1835     if (bsp == NULL) return result;
1836 
1837     omp = ObjMgrReadLock();
1838     omdpp = omp->datalist;
1839 
1840     i = ObjMgrLookup(omp, (Pointer)bsp);
1841     if (i < 0) {
1842             Char tmpbuff[256];
1843 
1844             SeqIdWrite(bsp->id, tmpbuff,
1845                        PRINTID_FASTA_LONG, sizeof(tmpbuff));
1846 
1847             ErrPostEx(SEV_WARNING, 0, __LINE__,
1848                       "ObjMgrLookup() returned negative value "
1849                       "id = %s, totobj = %d, currobj = %d, "
1850                       "HighestEntityID = %d", tmpbuff,
1851                       omp->totobj,
1852                       omp->currobj, omp->HighestEntityID);
1853             ObjMgrUnlock();
1854             return result;
1855         }
1856 
1857     omdp = omdpp[i];
1858     while (omdp->parentptr != NULL)
1859     {
1860         i = ObjMgrLookup(omp, (omdp->parentptr));
1861         if (i < 0) {
1862                     Char tmpbuff[256];
1863 
1864                     SeqIdWrite(bsp->id, tmpbuff,
1865                                PRINTID_FASTA_LONG, sizeof(tmpbuff));
1866 
1867                     ErrPostEx(SEV_WARNING, 0, __LINE__,
1868                               "ObjMgrLookup() returned negative value "
1869                               "id = %s, totobj = %d, currobj = %d, "
1870                               "HighestEntityID = %d", tmpbuff,
1871                               omp->totobj,
1872                               omp->currobj, omp->HighestEntityID);
1873                     ObjMgrUnlock();
1874                     return result;
1875                 }
1876         omdp = omdpp[i];
1877     }
1878 
1879     if (omdp->datatype == OBJ_SEQSUB) {
1880         ssp = (SeqSubmitPtr) omdp->dataptr;
1881         if (ssp != NULL && ssp->datatype == 1) {
1882             result = (SeqEntryPtr) ssp->data;
1883         }
1884     } else {
1885         result = omdp->choice;
1886     }
1887     ObjMgrUnlock();
1888     return result;
1889 }
1890 
1891 /*****************************************************************************
1892 *
1893 *   BioseqContextPtr BioseqContextNew (bsp)
1894 *
1895 *****************************************************************************/
BioseqContextNew(BioseqPtr bsp)1896 NLM_EXTERN BioseqContextPtr LIBCALL BioseqContextNew (BioseqPtr bsp)
1897 {
1898     ObjMgrDataPtr omdp;
1899     ObjMgrDataPtr PNTR omdpp;
1900     Int4    i;
1901     Int2    ctr=0;
1902     SeqEntryPtr seps[BIOSEQCONTEXTMAX];
1903     BioseqContextPtr bcp;
1904     ObjMgrPtr omp;
1905 
1906     if (bsp == NULL)
1907         return NULL;
1908 
1909 
1910     bcp = MemNew(sizeof(BioseqContext));
1911     bcp->bsp = bsp;
1912     bcp->se.choice = 1;   /* bioseq */
1913     bcp->se.data.ptrvalue = bsp;
1914 
1915     omp = ObjMgrReadLock();
1916     if (omp == NULL) return BioseqContextFree(bcp);
1917     omdpp = omp->datalist;
1918 
1919     i = ObjMgrLookup(omp, (Pointer)bsp);
1920     if (i < 0) {
1921             Char tmpbuff[256];
1922 
1923             SeqIdWrite(bsp->id, tmpbuff,
1924                        PRINTID_FASTA_LONG, sizeof(tmpbuff));
1925 
1926             ErrPostEx(SEV_WARNING, 0, __LINE__,
1927                       "ObjMgrLookup() returned negative value "
1928                       "id = %s, totobj = %d, currobj = %d, "
1929                       "HighestEntityID = %d", tmpbuff,
1930                       omp->totobj,
1931                       omp->currobj, omp->HighestEntityID);
1932             ObjMgrUnlock();
1933             return NULL;
1934         }
1935     omdp = omdpp[i];
1936 
1937     if (omdp->choice != NULL)
1938     {
1939         seps[ctr] = omdp->choice;
1940         ctr++;
1941 
1942         while (omdp->parentptr != NULL)
1943         {
1944             i = ObjMgrLookup(omp, (omdp->parentptr));
1945             if (i < 0) {
1946                             Char tmpbuff[256];
1947 
1948                             SeqIdWrite(bsp->id, tmpbuff,
1949                                        PRINTID_FASTA_LONG, sizeof(tmpbuff));
1950 
1951                             ErrPostEx(SEV_WARNING, 0, __LINE__,
1952                                       "ObjMgrLookup() returned negative value "
1953                                       "id = %s, totobj = %d, currobj = %d, "
1954                                       "HighestEntityID = %d", tmpbuff,
1955                                       omp->totobj,
1956                                       omp->currobj, omp->HighestEntityID);
1957                             ObjMgrUnlock();
1958                             return NULL;
1959                         }
1960             omdp = omdpp[i];
1961             if (omdp->choice != NULL)
1962             {
1963                 if (ctr == BIOSEQCONTEXTMAX)
1964                     ErrPostEx(SEV_ERROR, 0,0, "BioseqContextNew: more than %d levels",(int)ctr);
1965                 else
1966                 {
1967                     seps[ctr] = omdp->choice;
1968                     ctr++;
1969                 }
1970             }
1971         }
1972 
1973         bcp->count = ctr;
1974         for (i = 0; i < bcp->count; i++)
1975         {
1976             ctr--;
1977             bcp->context[i] = seps[ctr];
1978         }
1979     }
1980 
1981     if (omdp->tempload == TL_CACHED)
1982     {
1983         ErrPostEx(SEV_ERROR,0,0,"BioseqContextNew: bsp is TL_CACHED");
1984         bcp = BioseqContextFree(bcp);
1985     }
1986 
1987     ObjMgrUnlock();
1988 
1989     return bcp;
1990 }
1991 
1992 /*****************************************************************************
1993 *
1994 *   BioseqContextFree(bcp)
1995 *
1996 *****************************************************************************/
BioseqContextFree(BioseqContextPtr bcp)1997 NLM_EXTERN BioseqContextPtr LIBCALL BioseqContextFree(BioseqContextPtr bcp)
1998 {
1999     return MemFree(bcp);
2000 }
2001 
2002 /*****************************************************************************
2003 *
2004 *   BioseqContextGetSeqDescr(bcp, type, curr, SeqEntryPtr PNTR sep)
2005 *       returns pointer to the next SeqDescr of this type
2006 *       type gives type of Seq-descr
2007 *       if (type == 0)
2008 *          get them all
2009 *       curr is NULL or previous node of this type found
2010 *       moves up from bsp
2011 *        if (sep != NULL) sep set to SeqEntryPtr containing the SeqDescr.
2012 *
2013 *****************************************************************************/
BioseqContextGetSeqDescr(BioseqContextPtr bcp,Int2 type,ValNodePtr curr,SeqEntryPtr PNTR the_sep)2014 NLM_EXTERN ValNodePtr LIBCALL BioseqContextGetSeqDescr (BioseqContextPtr bcp, Int2 type, ValNodePtr curr, SeqEntryPtr PNTR the_sep)    /* the last one you used */
2015 {
2016     Int2 i;
2017     ValNodePtr tmp = NULL;
2018     Boolean found = FALSE;
2019     BioseqPtr bsp;
2020     BioseqSetPtr bssp;
2021 
2022     if (bcp == NULL) return NULL;
2023 
2024     if (the_sep != NULL)
2025         *the_sep = NULL;
2026 
2027     if (bcp->count == 0)   /* just a Bioseq */
2028     {
2029         tmp = BioseqGetSeqDescr(bcp->bsp, type, curr);
2030         if (the_sep != NULL) *the_sep = bcp->context[1];
2031         return tmp;
2032     }
2033 
2034     i = bcp->count - 1;
2035     if (curr != NULL)   /* find where we are */
2036     {
2037         while ((i >= 0) && (! found))
2038         {
2039             if (IS_Bioseq(bcp->context[i]))
2040             {
2041                 bsp = (BioseqPtr)((bcp->context[i])->data.ptrvalue);
2042                 tmp = bsp->descr;
2043             }
2044             else
2045             {
2046                 bssp = (BioseqSetPtr)((bcp->context[i])->data.ptrvalue);
2047                 tmp = bssp->descr;
2048             }
2049             while ((tmp != curr) && (tmp != NULL))
2050                 tmp = tmp->next;
2051             if (tmp == curr)
2052             {
2053                 found = TRUE;
2054                 tmp = tmp->next;
2055             }
2056             else
2057                 i--;
2058         }
2059         if (! found)   /* can't find it! */
2060             return NULL;
2061     }
2062     else              /* get first one */
2063     {
2064         tmp = bcp->bsp->descr;
2065     }
2066 
2067     while (i >= 0)
2068     {
2069         while (tmp != NULL)
2070         {
2071             if ((! type) || ((Int2)(tmp->choice) == type))
2072             {
2073                 if (the_sep != NULL) *the_sep = bcp->context[i];
2074                 return tmp;
2075             }
2076             tmp = tmp->next;
2077         }
2078         i--;
2079         if (i >= 0)
2080         {
2081             if (IS_Bioseq(bcp->context[i]))
2082             {
2083                 bsp = (BioseqPtr)((bcp->context[i])->data.ptrvalue);
2084                 tmp = bsp->descr;
2085             }
2086             else
2087             {
2088                 bssp = (BioseqSetPtr)((bcp->context[i])->data.ptrvalue);
2089                 tmp = bssp->descr;
2090             }
2091         }
2092     }
2093     return NULL;
2094 }
2095 
2096 /*****************************************************************************
2097 *
2098 *   BioseqContextGetSeqFeat(bcp, type, curr, sapp)
2099 *       returns pointer to the next Seq-feat of this type
2100 *       type gives type of Seq-descr
2101 *       if (type == 0)
2102 *          get them all
2103 *       curr is NULL or previous node of this type found
2104 *       moves up from bsp
2105 *       if (sapp != NULL) is filled with SeqAnnotPtr containing the SeqFeat
2106 *       in:
2107 *           0 = sfp->location only
2108 *           1 = sfp->product only
2109 *           2 = either of above
2110 *
2111 *****************************************************************************/
BioseqContextGetSeqFeat(BioseqContextPtr bcp,Int2 type,SeqFeatPtr curr,SeqAnnotPtr PNTR sapp,Int2 in)2112 NLM_EXTERN SeqFeatPtr LIBCALL BioseqContextGetSeqFeat (BioseqContextPtr bcp, Int2 type,
2113     SeqFeatPtr curr, SeqAnnotPtr PNTR sapp, Int2 in)    /* the last one you used */
2114 {
2115     SeqEntryPtr sep;
2116 
2117     if (bcp == NULL) return NULL;
2118 
2119     if (sapp != NULL)
2120         *sapp = NULL;
2121 
2122     if (bcp->count == 0)    /* just a BioseqSeq */
2123         sep = &(bcp->se);
2124     else
2125         sep = bcp->context[0];
2126 
2127     return SeqEntryGetSeqFeat (sep, type, curr, sapp, in, bcp->bsp);
2128 }
2129 
2130 typedef struct smgetseqfeat {
2131     Boolean hit;
2132     SeqFeatPtr last,
2133         this;
2134     SeqAnnotPtr sap;
2135     SeqLocPtr slp1, slp2;
2136     Int2 in, type;
2137 } SMGetSeqFeat, PNTR GetSeqFeatPtr;
2138 
2139 NLM_EXTERN void GetSeqFeatCallback (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
2140 
2141 /*****************************************************************************
2142 *
2143 *   SeqEntryGetSeqFeat(sep, type, curr, sapp)
2144 *       returns pointer to the next Seq-feat of this type
2145 *       type gives type of SeqFeat
2146 *       if (type == 0)
2147 *          get them all
2148 *       curr is NULL or previous node of this type found
2149 *       moves up from bsp
2150 *       if (sapp != NULL) is filled with SeqAnnotPtr containing the SeqFeat
2151 *       if (bsp != NULL) then for that Bioseq match on location by "in"
2152 *       in:
2153 *           0 = sfp->location only
2154 *           1 = sfp->product only
2155 *           2 = either of above
2156 *
2157 *****************************************************************************/
SeqEntryGetSeqFeat(SeqEntryPtr sep,Int2 type,SeqFeatPtr curr,SeqAnnotPtr PNTR sapp,Int2 in,BioseqPtr bsp)2158 NLM_EXTERN SeqFeatPtr LIBCALL SeqEntryGetSeqFeat (SeqEntryPtr sep, Int2 type,
2159     SeqFeatPtr curr, SeqAnnotPtr PNTR sapp, Int2 in, BioseqPtr bsp)    /* the last one you used */
2160 {
2161     SMGetSeqFeat gsf;
2162     ValNode vn1, vn2;
2163 
2164     if (sep == NULL) return NULL;
2165 
2166     if (sapp != NULL)
2167         *sapp = NULL;
2168 
2169     if (curr == NULL)
2170         gsf.hit = TRUE;
2171     else
2172         gsf.hit = FALSE;
2173     gsf.last = curr;
2174     gsf.this = NULL;
2175     gsf.sap = NULL;
2176     gsf.type = type;
2177     gsf.in = in;
2178     if (bsp != NULL)   /* matching by Bioseq */
2179     {
2180         if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_ref))
2181         {
2182             vn2.choice = SEQLOC_MIX;
2183             vn2.data.ptrvalue = bsp->seq_ext;
2184             gsf.slp2 = (SeqLocPtr)(&vn2);
2185         }
2186         else
2187             gsf.slp2 = NULL;
2188 
2189         vn1.choice = SEQLOC_WHOLE;
2190         vn1.data.ptrvalue = (Pointer) SeqIdFindBest (bsp->id, 0);
2191         gsf.slp1 = (SeqLocPtr)(&vn1);
2192     }
2193     else
2194         gsf.slp1 = NULL;
2195 
2196     SeqEntryExplore (sep, (Pointer)(&gsf), GetSeqFeatCallback);
2197 
2198     if (sapp != NULL)
2199         *sapp = gsf.sap;
2200 
2201     return gsf.this;
2202 }
2203 
GetSeqFeatCallback(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2204 NLM_EXTERN void GetSeqFeatCallback (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2205 {
2206     GetSeqFeatPtr gsfp;
2207     BioseqPtr bsp;
2208     BioseqSetPtr bssp;
2209     SeqAnnotPtr sap;
2210     SeqFeatPtr sfp, last;
2211     Boolean hit, gotit = FALSE;
2212     Uint1 type;
2213     SeqLocPtr slp1, slp2 = NULL, slp;
2214     Int2 i, in = 0, retval;
2215 
2216     gsfp = (GetSeqFeatPtr)data;
2217     if (gsfp->this != NULL)   /* got it */
2218         return;
2219 
2220     last = gsfp->last;
2221     hit = gsfp->hit;
2222     type = (Uint1)(gsfp->type);
2223     if (gsfp->slp1 != NULL)   /* matching by Bioseq */
2224     {
2225         slp1 = gsfp->slp1;
2226         slp2 = gsfp->slp2;
2227         in = gsfp->in;
2228     }
2229     else
2230         slp1 = NULL;
2231 
2232     if (IS_Bioseq(sep))
2233     {
2234         bsp = (BioseqPtr)(sep->data.ptrvalue);
2235         sap = bsp->annot;
2236     }
2237     else
2238     {
2239         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2240         sap = bssp->annot;
2241     }
2242 
2243     while (sap != NULL)
2244     {
2245         if (sap->type == 1)  /* feature table */
2246         {
2247             for (sfp = (SeqFeatPtr)(sap->data); sfp != NULL; sfp = sfp->next)
2248             {
2249                 if (! hit)       /* still looking */
2250                 {
2251                     if (sfp == last)
2252                     {
2253                         hit = TRUE;
2254                         gsfp->hit = TRUE;
2255                     }
2256                 }
2257                 else
2258                 {
2259                     if ((! type) || (type == sfp->data.choice))
2260                     {
2261                         if (slp1 != NULL)   /* look for feats on a bioseq */
2262                         {
2263                             for (i = 0; i < 2; i++)
2264                             {
2265                                 if ((i == 0) && (in != 1))
2266                                     slp = sfp->location;
2267                                 else if ((i==1) && (in != 0))
2268                                     slp = sfp->product;
2269                                 else
2270                                     slp = NULL;
2271                                 if (slp != NULL)
2272                                 {
2273                                     retval = SeqLocCompare(slp, slp1);
2274                                     if (retval)
2275                                     {
2276                                         gotit = TRUE;
2277                                         break;
2278                                     }
2279 
2280                                     if (slp2 != NULL)
2281                                     {
2282                                         retval = SeqLocCompare(slp, slp2);
2283                                         if (retval)
2284                                         {
2285                                             gotit = TRUE;
2286                                             break;
2287                                         }
2288                                     }
2289                                 }
2290                             }
2291                         }
2292                         else
2293                             gotit = TRUE;
2294                         if (gotit)
2295                         {
2296                             gsfp->this = sfp;
2297                             gsfp->sap = sap;
2298                             return;
2299                         }
2300                     }
2301                 }
2302             }
2303         }
2304 
2305         sap = sap->next;
2306     }
2307 
2308     return;
2309 }
2310 
2311 /*****************************************************************************
2312 *
2313 *   BioseqContextGetTitle(bcp)
2314 *     returns first title for Bioseq in this context
2315 *
2316 *****************************************************************************/
BioseqContextGetTitle(BioseqContextPtr bcp)2317 NLM_EXTERN CharPtr LIBCALL BioseqContextGetTitle(BioseqContextPtr bcp)
2318 {
2319     CharPtr title = NULL;
2320     ValNodePtr vnp;
2321 
2322     vnp = BioseqContextGetSeqDescr(bcp, Seq_descr_title, NULL, NULL);
2323     if (vnp != NULL)
2324         title = (CharPtr)vnp->data.ptrvalue;
2325     return title;
2326 }
2327 
2328 /*****************************************************************************
2329 *
2330 *   SeqMgr Functions
2331 *
2332 *****************************************************************************/
2333 
2334 /*****************************************************************************
2335 *
2336 *   SeqMgrSeqEntry(type, data, sep)
2337 *       Adds the SeqEntryPtr pointing directly to this Bioseq or BioseqSet
2338 *
2339 *****************************************************************************/
SeqMgrSeqEntry(Uint1 type,Pointer data,SeqEntryPtr sep)2340 NLM_EXTERN Boolean LIBCALL SeqMgrSeqEntry (Uint1 type, Pointer data, SeqEntryPtr sep)
2341 {
2342     return ObjMgrSetChoice (OBJ_SEQENTRY, sep, data);
2343 }
2344 
2345 /*****************************************************************************
2346 *
2347 *   SeqMgrGetSeqEntryForData(data)
2348 *       returns SeqEntryPtr for a BioseqPtr or BioseqSetPtr
2349 *       sep must have been put in SeqMgr using SeqMgrSeqEntry
2350 *       the Bioseq/BioseqSets it is a part of must also be in SeqMgr
2351 *       returns NULL on failure.
2352 *
2353 *****************************************************************************/
SeqMgrGetSeqEntryForData(Pointer data)2354 NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForData (Pointer data)
2355 {
2356     return ObjMgrGetChoiceForData(data);
2357 }
2358 
2359 /*****************************************************************************
2360 *
2361 *   SeqMgrGetEntityIDForSeqEntry(sep)
2362 *       returns the EntityID for a SeqEntryPtr
2363 *       sep must have been put in SeqMgr using SeqMgrSeqEntry
2364 *       the Bioseq/BioseqSets it is a part of must also be in SeqMgr
2365 *       This function will move up to the top of the SeqEntry tree it may be
2366 *          in. If top level EntityID is 0, one is assigned at this point.
2367 *       If an element is moved under a different hierarchy, its EntityID will
2368 *          change.
2369 *       returns 0 on failure.
2370 *
2371 *****************************************************************************/
SeqMgrGetEntityIDForSeqEntry(SeqEntryPtr sep)2372 NLM_EXTERN Int2 LIBCALL SeqMgrGetEntityIDForSeqEntry (SeqEntryPtr sep)
2373 {
2374     return ObjMgrGetEntityIDForChoice (sep);
2375 }
2376 
2377 /*****************************************************************************
2378 *
2379 *   SeqMgrGetSeqEntryForEntityID (id)
2380 *
2381 *****************************************************************************/
SeqMgrGetSeqEntryForEntityID(Int2 id)2382 NLM_EXTERN SeqEntryPtr LIBCALL SeqMgrGetSeqEntryForEntityID (Int2 id)
2383 {
2384     return ObjMgrGetChoiceForEntityID (id);
2385 }
2386 
2387 /*****************************************************************************
2388 *
2389 *   SeqMgrSetBSFetchTop (fetch, data)
2390 *       sets the BSFetchTop routine to "fetch"
2391 *       returns previous value
2392 *       set to NULL to turn off all fetching for that type
2393 *
2394 *       Current value can be called directly as BioseqFetch();
2395 *       Default is
2396 *           1) looks in memory
2397 *           2) looks locally if LocalBSFetch is set
2398 *            3) looks remotely if RemoteBSFetch is set
2399 *
2400 *****************************************************************************/
SeqMgrSetBSFetchTop(BSFetchTop fetch,Pointer data)2401 NLM_EXTERN BSFetchTop LIBCALL SeqMgrSetBSFetchTop (BSFetchTop fetch, Pointer data)
2402 {
2403     SeqMgrPtr smp;
2404     BSFetchTop tmp = NULL;
2405 
2406     smp = SeqMgrWriteLock();
2407     if (smp == NULL) return tmp;
2408 
2409     tmp = smp->bsfetch;
2410     smp->bsfetch = fetch;
2411     smp->TopData = data;
2412     SeqMgrUnlock();
2413     return tmp;
2414 }
2415 
2416 /*****************************************************************************
2417 *
2418 *   SeqMgrSetFetchOnLock(value)
2419 *       if value = TRUE, manager will try to fetch the bioseq if not in
2420 *          memory, when BioseqLock is called
2421 *       if FALSE, BioseqLock will only look in memory
2422 *       returns previous value of fetch_on_lock
2423 *       default is to fetch_on_lock
2424 *
2425 *****************************************************************************/
SeqMgrSetFetchOnLock(Boolean value)2426 NLM_EXTERN Boolean LIBCALL SeqMgrSetFetchOnLock (Boolean value)
2427 {
2428     Boolean tmp=FALSE;
2429     SeqMgrPtr smp;
2430 
2431     smp = SeqMgrWriteLock();
2432     if (smp == NULL) return tmp;
2433 
2434     tmp = smp->fetch_on_lock;
2435     smp->fetch_on_lock = value;
2436     SeqMgrUnlock();
2437     return tmp;
2438 }
2439 
2440 /*****************************************************************************
2441 *
2442 *   SeqMgrLinkSeqEntry(sep, parenttype, parentptr)
2443 *      connects all component seq-entries by traversing the linked list
2444 *        all calling SeqMgrConnect and SeqMgrSeqEntry appropriately
2445 *        if parenttype != 0, then assumes seqentry is contained in parentptr
2446 *           and should be connected to it
2447 *
2448 *****************************************************************************/
SeqMgrLinkSeqEntry(SeqEntryPtr sep,Uint2 parenttype,Pointer parentptr)2449 NLM_EXTERN Boolean LIBCALL SeqMgrLinkSeqEntry (SeqEntryPtr sep, Uint2 parenttype, Pointer parentptr)
2450 {
2451     SeqEntryPtr sep2;
2452     BioseqSetPtr bssp;
2453     Uint2 the_type;
2454 
2455     if (sep == NULL)
2456         return FALSE;
2457 
2458     if (IS_Bioseq(sep))
2459         the_type = OBJ_BIOSEQ;
2460     else
2461         the_type = OBJ_BIOSEQSET;
2462 
2463     SeqMgrSeqEntry((Uint1)the_type, sep->data.ptrvalue, sep);
2464 
2465     /**** if (parenttype != 0) ****/
2466     ObjMgrConnect(the_type, sep->data.ptrvalue, parenttype, parentptr);
2467 
2468     if (! IS_Bioseq(sep))
2469     {
2470         bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2471         for (sep2 = bssp->seq_set; sep2 != NULL; sep2 = sep2->next)
2472         {
2473             SeqMgrLinkSeqEntry (sep2, the_type, sep->data.ptrvalue);
2474         }
2475     }
2476     return TRUE;
2477 }
2478 /*****************************************************************************
2479 *
2480 *   Selection Functions for data objects based on SeqLoc
2481 *      See also general selection in objmgr.h
2482 *
2483 *****************************************************************************/
2484 
2485 /*****************************************************************************
2486 *
2487 *   SeqMgrSelect(region)
2488 *      region is a SeqLocPtr
2489 *          It can only apply to one Bioseq
2490 *          selected area will be extreme left and right ends
2491 *          fuzziness is ignored
2492 *      if something else selected, deselects it first, then selects requested
2493 *        item
2494 *      to select without deselecting something else, use SeqMgrAlsoSelect()
2495 *      returns TRUE if item is now currently selected, FALSE if not
2496 *      "region" is always copied. Caller is responsible for managment of
2497 *         SeqLoc that is passed in.
2498 *
2499 *****************************************************************************/
SeqMgrSelect(SeqLocPtr region)2500 NLM_EXTERN Boolean LIBCALL SeqMgrSelect (SeqLocPtr region)
2501 {
2502     return SeqMgrGenericSelect(region, 1, NULL);
2503 }
2504 
SeqMgrAlsoSelect(SeqLocPtr region)2505 NLM_EXTERN Boolean LIBCALL SeqMgrAlsoSelect (SeqLocPtr region)
2506 {
2507     return SeqMgrGenericSelect(region, 2, NULL);
2508 }
2509 
2510 /*****************************************************************************
2511 *
2512 *   SeqMgrDeSelect(region)
2513 *       if this item was selected, then deselects and returns TRUE
2514 *       else returns FALSE
2515 *
2516 *****************************************************************************/
SeqMgrDeSelect(SeqLocPtr region)2517 NLM_EXTERN Boolean LIBCALL SeqMgrDeSelect (SeqLocPtr region)
2518 {
2519     return SeqMgrGenericSelect(region, 3, NULL);
2520 }
2521 
2522 /*****************************************************************************
2523 *
2524 *   SeqMgrSetColor(region, rgb)
2525 *
2526 *****************************************************************************/
SeqMgrSetColor(SeqLocPtr region,Uint1Ptr rgb)2527 NLM_EXTERN Boolean LIBCALL SeqMgrSetColor (SeqLocPtr region, Uint1Ptr rgb)
2528 {
2529     if (rgb == NULL) return FALSE;
2530         return SeqMgrGenericSelect(region, 4, rgb);
2531 }
2532 
SeqMgrGenericSelect(SeqLocPtr region,Int2 type,Uint1Ptr rgb)2533 static Boolean NEAR SeqMgrGenericSelect (SeqLocPtr region, Int2 type,
2534                                            Uint1Ptr rgb)
2535 {
2536     SeqInt si;
2537     ValNode vn;
2538     SeqIdPtr sip;
2539     Uint2 entityID;
2540     Uint4 itemID;
2541 
2542     if (region == NULL) return FALSE;
2543 
2544     sip = SeqLocId(region);
2545     if (sip == NULL) return FALSE;
2546 
2547     entityID = BioseqFindEntity(sip, &itemID);
2548     if (entityID == 0) return FALSE;
2549 
2550     MemSet((Pointer)(&si), 0, sizeof(SeqInt));
2551     MemSet((Pointer)(&vn), 0, sizeof(ValNode));
2552 
2553     si.id = sip;
2554     si.from = SeqLocStart(region);
2555     si.to = SeqLocStop(region);
2556     si.strand = SeqLocStrand(region);
2557 
2558     if ((si.from < 0) || (si.to < 0) || (si.from > si.to)) return FALSE;
2559 
2560     vn.choice = SEQLOC_INT;
2561     vn.data.ptrvalue = &si;
2562 
2563     switch (type)
2564     {
2565         case 1:
2566             return ObjMgrSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn);
2567         case 2:
2568             return ObjMgrAlsoSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn);
2569         case 3:
2570             return ObjMgrDeSelect(entityID, itemID, OBJ_BIOSEQ, OM_REGION_SEQLOC, &vn);
2571         case 4:
2572             return ObjMgrSetColor(entityID, itemID, OBJ_BIOSEQ,
2573                                  OM_REGION_SEQLOC, &vn, rgb);
2574         default:
2575             break;
2576     }
2577 
2578     return FALSE;
2579 }
2580 
2581 /*****************************************************************************
2582 *
2583 *   SpreadGapsInDeltaSeq(BioseqPtr bsp)
2584 *      bsp must be a delta seq
2585 *      function counts deltas with known lengths ( = known_len)
2586 *               counts deltas which are gaps of unknown length ( = unk_count)
2587 *                  these can delta of length 0, delta with fuzz = lim (unk),
2588 *                    or SEQLOC_NULL
2589 *               converts all unknown gaps to delta with fuzz = lim(unk)
2590 *               sets length of all unknown gaps to
2591 *                  (bsp->length - known_len)/unk_count
2592 *                  any reminder spread over first few gaps
2593 *
2594 *****************************************************************************/
SpreadGapsInDeltaSeq(BioseqPtr bsp)2595 NLM_EXTERN Boolean LIBCALL SpreadGapsInDeltaSeq (BioseqPtr bsp)
2596 {
2597     Boolean retval = FALSE;
2598     Int4 known_len = 0,
2599          total_gap, gap_len,
2600          unk_count = 0,
2601          remainder;
2602     DeltaSeqPtr dsp;
2603     SeqLocPtr slocp;
2604     SeqLitPtr slp;
2605     IntFuzzPtr ifp;
2606 
2607     if (bsp == NULL) return retval;
2608     if ((bsp->repr != Seq_repr_delta) || (bsp->seq_ext == NULL))
2609         return retval;
2610 
2611     retval = TRUE;  /* can function */
2612 
2613     for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next)
2614     {
2615         switch (dsp->choice)
2616         {
2617             case 1:      /* SeqLocPtr */
2618                 slocp = (SeqLocPtr)(dsp->data.ptrvalue);
2619                 if (slocp == NULL) break;
2620                 if (slocp->choice == SEQLOC_NULL)  /* convert it */
2621                 {
2622                     SeqLocFree(slocp);
2623                     slp = SeqLitNew();
2624                     dsp->choice = 2;
2625                     dsp->data.ptrvalue = slp;
2626                     ifp = IntFuzzNew();
2627                     slp->fuzz = ifp;
2628                     ifp->choice = 4;   /* lim - type unk */
2629                     unk_count++;
2630                 }
2631                 else                               /* count length */
2632                     known_len += SeqLocLen(slocp);
2633                 break;
2634             case 2:   /* SeqLitPtr */
2635                 slp = (SeqLitPtr)(dsp->data.ptrvalue);
2636                 if (slp == NULL) break;
2637                 if (slp->seq_data != NULL)         /* not a gap */
2638                 {
2639                     known_len += slp->length;
2640                     break;
2641                 }
2642                 ifp = slp->fuzz;
2643                 if (slp->length == 0)  /* unknown length */
2644                 {
2645                     unk_count++;
2646                     if (ifp != NULL)
2647                     {
2648                         if (ifp->choice != 4)  /* not lim */
2649                             ifp = IntFuzzFree(ifp);
2650                         else if (ifp->a != 0)  /* not unk */
2651                             ifp = IntFuzzFree(ifp);
2652                     }
2653                     if (ifp == NULL)
2654                     {
2655                         ifp = IntFuzzNew();
2656                         ifp->choice = 4; /* lim - unk */
2657                         slp->fuzz = ifp;
2658                     }
2659                 }
2660                 else                      /* gap length was set */
2661                 {
2662                     if (ifp == NULL)  /* no fuzz - count length */
2663                         known_len += slp->length;
2664                     else              /* might be a guess */
2665                     {
2666                         if ((ifp->choice == 4) && (ifp->a == 0)) /* lim - unk */
2667                             unk_count++;
2668                         else
2669                             known_len += slp->length;
2670                     }
2671                 }
2672                 break;
2673             default:
2674                 break;
2675         }
2676 
2677     }
2678 
2679     if (unk_count == 0)   /* no unknown gaps */
2680         return retval;
2681 
2682     total_gap = bsp->length - known_len;
2683     if (total_gap < 0)
2684         total_gap = 0;
2685     gap_len = total_gap / unk_count;
2686     remainder = total_gap - (gap_len * unk_count);
2687 
2688     for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = dsp->next)
2689     {
2690         switch (dsp->choice)
2691         {
2692             case 1:      /* SeqLocPtr */
2693                 break;
2694             case 2:   /* SeqLitPtr */
2695                 slp = (SeqLitPtr)(dsp->data.ptrvalue);
2696                 if (slp == NULL) break;
2697                 if (slp->seq_data != NULL) break;
2698                 ifp = slp->fuzz;
2699                 if (ifp == NULL) break;
2700                 if ((ifp->choice != 4) || (ifp->a != 0))
2701                     break;
2702                 slp->length = gap_len;
2703                 if (remainder)
2704                 {
2705                     slp->length++;
2706                     remainder--;
2707                 }
2708                 break;
2709             default:
2710                 break;
2711         }
2712     }
2713 
2714     return retval;
2715 }
2716 
2717 /*****************************************************************************
2718 *
2719 *   CountGapsInDeltaSeq(BioseqPtr bsp, &num_segs, &num_gaps, &known_residues, &num_gaps_faked)
2720 *      bsp must be a delta seq
2721 *      function counts deltas and returns a profile
2722 *          num_segs = total number of segments
2723 *          num_gaps = total number of segments representing gaps
2724 *          known_residues = number of real residues in the sequence (not gaps)
2725 *          num_gaps_faked = number of gaps where real length is not known, but where
2726 *                           a length was guessed by spreading the total gap length
2727 *                           out over all gaps evenly.
2728 *
2729 *      NOTE: any of these pointers except bsp can be NULL
2730 *
2731 *      returns TRUE if values in argument were set.
2732 *
2733 *****************************************************************************/
NextLitLength(DeltaSeqPtr next,Int4Ptr lenp)2734 static Boolean NextLitLength (DeltaSeqPtr next, Int4Ptr lenp)
2735 
2736 {
2737   SeqLitPtr  slp;
2738 
2739   if (lenp == NULL) return FALSE;
2740   *lenp = 0;
2741   if (next == NULL || next->choice != 2) return FALSE;
2742   slp = (SeqLitPtr) next->data.ptrvalue;
2743   if (slp == NULL || slp->seq_data == NULL) return FALSE;
2744   if (slp->seq_data_type == Seq_code_gap) return FALSE;
2745   *lenp = slp->length;
2746   return TRUE;
2747 }
2748 
CountGapsInDeltaSeq(BioseqPtr bsp,Int4Ptr num_segs,Int4Ptr num_gaps,Int4Ptr known_residues,Int4Ptr num_gaps_faked,CharPtr buf,Int4 buflen)2749 NLM_EXTERN Boolean LIBCALL CountGapsInDeltaSeq (BioseqPtr bsp, Int4Ptr num_segs, Int4Ptr num_gaps, Int4Ptr known_residues, Int4Ptr num_gaps_faked, CharPtr buf, Int4 buflen)
2750 {
2751     Boolean retval = FALSE;
2752     Int4 residues = 0,
2753         segs = 0,
2754         gaps = 0,
2755         len = 0,
2756         fake_gaps = 0,
2757         from = 0,
2758         tlen = 0,
2759         nxtlen;
2760     DeltaSeqPtr dsp, next;
2761     SeqLocPtr slocp;
2762     SeqLitPtr slp;
2763     IntFuzzPtr ifp;
2764     Boolean unk;
2765     static Char tmp[128];
2766     Int2 diff, blen;
2767 
2768     if (bsp == NULL) return retval;
2769     if ((bsp->repr != Seq_repr_delta) || (bsp->seq_ext == NULL))
2770         return retval;
2771 
2772     retval = TRUE;  /* can function */
2773 
2774 
2775     for (dsp = (DeltaSeqPtr)(bsp->seq_ext); dsp != NULL; dsp = next)
2776     {
2777         next = dsp->next;
2778         segs++;
2779         from = len + 1;
2780         switch (dsp->choice)
2781         {
2782             case 1:      /* SeqLocPtr */
2783                 slocp = (SeqLocPtr)(dsp->data.ptrvalue);
2784                 if (slocp == NULL) break;
2785                 if (slocp->choice == SEQLOC_NULL)  /* gap */
2786                 {
2787                     gaps++;
2788                     sprintf(tmp, "* %ld %ld gap of unknown length~", (long) from, (long) len);
2789                     blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2790                     diff = LabelCopy(buf, tmp, blen);
2791                     buflen -= diff;
2792                     buf += diff;
2793                 }
2794                 else {                              /* count length */
2795                     residues += SeqLocLen(slocp);
2796                     if (buf != NULL) {
2797                         tlen =  SeqLocLen(slocp);
2798                         len  += tlen;
2799                         sprintf(tmp, "* %8ld %8ld: contig of %ld bp in length~", (long) from, (long) len, (long) tlen);
2800                         blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2801                         diff = LabelCopy(buf, tmp, blen);
2802                         buflen -= diff;
2803                         buf += diff;
2804                     }
2805                 }
2806                 break;
2807             case 2:   /* SeqLitPtr */
2808                 slp = (SeqLitPtr)(dsp->data.ptrvalue);
2809                 if (slp == NULL) break;
2810                 tlen =  slp->length;
2811                 len  += tlen;
2812                 if (slp->seq_data != NULL && slp->seq_data_type != Seq_code_gap)
2813                 {
2814                     residues += slp->length;
2815                     while (NextLitLength (next, &nxtlen)) {
2816                         tlen += nxtlen;
2817                         len  += nxtlen;
2818                         residues += nxtlen;
2819                         next = next->next;
2820                     }
2821                     if (buf) {
2822                         sprintf(tmp, "* %8ld %8ld: contig of %ld bp in length~", (long) from, (long) len, (long) tlen);
2823                         blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2824                         diff = LabelCopy(buf, tmp, blen);
2825                         buflen -= diff;
2826                         buf += diff;
2827                     }
2828                 }
2829                 else
2830                 {
2831                     unk = FALSE;
2832                     gaps++;
2833                     ifp = slp->fuzz;
2834                     if (ifp != NULL)
2835                     {
2836                         if ((ifp->choice == 4) && (ifp->a == 0)) {
2837                             unk = TRUE;
2838                             fake_gaps++;
2839                             if (buf) {
2840                                 if (from > len) {
2841                                 sprintf(tmp, "*                    gap of unknown length~");
2842                                 } else {
2843                                 sprintf(tmp, "* %8ld %8ld: gap of unknown length~", (long) from, (long) len);
2844                                 }
2845                                 blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2846                                 diff = LabelCopy(buf, tmp, blen);
2847                                 buflen -= diff;
2848                                 buf += diff;
2849                             }
2850                         }
2851                     }
2852                     if (!unk && buf) {
2853                         sprintf(tmp, "* %8ld %8ld: gap of %ld bp~", (long) from, (long) len, (long) tlen);
2854                         blen = (Int2) MIN ((Int4) buflen, (Int4) sizeof (tmp));
2855                         diff = LabelCopy(buf, tmp, blen);
2856                         buflen -= diff;
2857                         buf += diff;
2858                     }
2859                 }
2860                 break;
2861             default:
2862                 break;
2863         }
2864     }
2865 
2866     if (num_segs != NULL)
2867         *num_segs = segs;
2868     if (num_gaps != NULL)
2869         *num_gaps = gaps;
2870     if (known_residues != NULL)
2871         *known_residues = residues;
2872     if (num_gaps_faked != NULL)
2873         *num_gaps_faked = fake_gaps;
2874 
2875     return retval;
2876 }
2877 
2878 
2879 /*****************************************************************************
2880 *
2881 *   SeqMgrAdd(type, data)
2882 *       adds a Bioseq or BioseqSet to the sequence manager
2883 *
2884 *****************************************************************************/
SeqMgrAdd(Uint2 type,Pointer data)2885 NLM_EXTERN Boolean LIBCALL SeqMgrAdd (Uint2 type, Pointer data)
2886 {
2887     Boolean retval;
2888 
2889     SeqMgrWriteLock();
2890     retval = ObjMgrAdd(type, data);
2891     if (type != OBJ_BIOSEQ) {
2892         SeqMgrUnlock();
2893         return retval;
2894     }
2895     retval &= SeqMgrAddToBioseqIndex((BioseqPtr)data);
2896 
2897     SeqMgrUnlock();
2898 
2899     return retval;
2900 }
2901 
2902 
2903 /*****************************************************************************
2904 *
2905 *   SeqMgrDelete(type, data)
2906 *       deletes a Bioseq or BioseqSet from the sequence manager
2907 *
2908 *****************************************************************************/
SeqMgrDelete(Uint2 type,Pointer data)2909 NLM_EXTERN Boolean LIBCALL SeqMgrDelete (Uint2 type, Pointer data)
2910 {
2911     Boolean retval = FALSE;
2912 
2913     SeqMgrWriteLock();
2914     if (type == OBJ_BIOSEQ)  /* remove id indexes */
2915         SeqMgrDeleteFromBioseqIndex((BioseqPtr)data);
2916 
2917     retval = ObjMgrDelete(type, data);
2918     SeqMgrUnlock();
2919     return retval;
2920 }
2921 
2922 
2923 
SeqMgrAddIndexElement(SeqMgrPtr smp,BioseqPtr bsp,CharPtr buf,Boolean sort_now)2924 static Boolean NEAR SeqMgrAddIndexElement(SeqMgrPtr smp, BioseqPtr bsp, CharPtr buf, Boolean sort_now)
2925 {
2926     SeqIdIndexElementPtr sip, PNTR sipp;
2927     SeqIdIndexBlockPtr sibp, prev;
2928     Int4 imin, imax, i, j;
2929     CharPtr tmp, newstr;
2930     ObjMgrDataPtr omdp;
2931     ObjMgrPtr omp;
2932 
2933     omp = ObjMgrReadLock();
2934     omdp = ObjMgrFindByData(omp, (Pointer)bsp);  /* caching protection */
2935     ObjMgrUnlock();
2936     if (omdp == NULL)
2937     {
2938         return FALSE;
2939     }
2940 
2941     sipp = smp->BioseqIndex;
2942     if (smp->BioseqIndexCnt >= smp->BioseqIndexNum)  /* expand space */
2943     {
2944        prev = NULL;
2945        for (sibp = smp->BioseqIndexData; sibp != NULL; sibp = sibp->next)
2946            prev = sibp;
2947        sibp = MemNew(sizeof(SeqIdIndexBlock));
2948        if (prev != NULL)
2949            prev->next = sibp;
2950        else
2951            smp->BioseqIndexData = sibp;
2952 
2953        smp->BioseqIndex = MemNew((smp->BioseqIndexNum + 100) *
2954                      sizeof(SeqIdIndexElementPtr));
2955        MemCopy(smp->BioseqIndex, sipp, (smp->BioseqIndexNum *
2956                         sizeof(SeqIdIndexElementPtr)));
2957        MemFree(sipp);
2958        smp->BioseqIndexNum += 100;
2959        sipp = smp->BioseqIndex;
2960        for (i = 0, j = smp->BioseqIndexCnt; i < 100; i++, j++)
2961            sipp[j] = &(sibp->sid[i]);
2962     }
2963 
2964     i = smp->BioseqIndexCnt;   /* empties are at the end */
2965     sip = sipp[i];
2966     sip->omdp = omdp;       /* fill in the values */
2967     sip->str = StringSave(buf);
2968     newstr = sip->str;
2969     RevStringUpper(newstr);  /* try to avoid case check */
2970 
2971     if (! sort_now)
2972     {
2973         smp->BioseqIndexCnt++;     /* got one more */
2974         return TRUE;
2975     }
2976 
2977     imin = 0;                   /* find where it goes */
2978     imax = i-1;
2979     if (imax >= 0)
2980         tmp = sipp[imax]->str;
2981     if ((i) && (StringCmp(newstr, sipp[imax]->str) < 0))
2982     {
2983         i = (imax + imin) / 2;
2984         while (imax > imin)
2985         {
2986             tmp = sipp[i]->str;
2987             if ((j = StringCmp(newstr, tmp)) < 0)
2988                 imax = i - 1;
2989             else if (j > 0)
2990                 imin = i + 1;
2991             else
2992                 break;
2993             i = (imax + imin)/2;
2994         }
2995 
2996         if (StringCmp(newstr, sipp[i]->str) > 0) /* check for off by 1 */
2997         {
2998             i++;
2999         }
3000 
3001 
3002         imax = smp->BioseqIndexCnt - 1;     /* open the array */
3003         while (imax >= i)
3004         {
3005             sipp[imax+1] = sipp[imax];
3006             imax--;
3007         }
3008     }
3009 
3010     sipp[i] = sip;    /* put in the pointer in order */
3011     smp->BioseqIndexCnt++;     /* got one more */
3012     return TRUE;
3013 }
3014 
3015 /*****************************************************************************
3016 *
3017 *   SeqMgrHoldIndexing(Boolean hold)
3018 *       stops sequence indexing to allow bulk loading if hold = TRUE
3019 *       starts it when hold = FALSE;
3020 *       uses a counter so you must call it the same number of times
3021 *        with TRUE as with FALSE
3022 *       when the counter decrements to 0, it will index what it has.
3023 *
3024 *****************************************************************************/
SeqMgrHoldIndexing(Boolean hold)3025 NLM_EXTERN void LIBCALL SeqMgrHoldIndexing (Boolean hold)
3026 {
3027     SeqMgrPtr smp;
3028 
3029     smp = SeqMgrWriteLock();
3030     if (hold)
3031         smp->hold_indexing++;
3032     else
3033         smp->hold_indexing--;
3034     SeqMgrUnlock();
3035 
3036     if (! smp->hold_indexing)
3037         SeqMgrProcessNonIndexedBioseq(FALSE);
3038 
3039     return;
3040 }
3041 
3042 int LIBCALLBACK SeqIdIndexElementCmp (VoidPtr a, VoidPtr b);
3043 
SeqIdIndexElementCmp(VoidPtr a,VoidPtr b)3044 int LIBCALLBACK SeqIdIndexElementCmp (VoidPtr a, VoidPtr b)
3045 {
3046     return (int)(StringCmp((*(SeqIdIndexElementPtr PNTR)a)->str,
3047                    (*(SeqIdIndexElementPtr PNTR)b)->str));
3048 }
3049 
3050 /*****************************************************************************
3051 *
3052 *   SeqMgrProcessNonIndexedBioseq(Boolean force_it)
3053 *       Indexes a BioseqPtr by SeqId(s)
3054 *       If ! force_it, respects the smp->don't index flag
3055 *
3056 *****************************************************************************/
SeqMgrProcessNonIndexedBioseq(Boolean force_it)3057 static Boolean NEAR SeqMgrProcessNonIndexedBioseq(Boolean force_it)
3058 {
3059     BioseqPtr PNTR bspp, bsp;
3060     Int4 i, total, k, old_BioseqIndexCnt;
3061     SeqIdPtr sip;
3062     Char buf[128];
3063     /*
3064     CharPtr tmp;
3065     */
3066     Uint1 oldchoice;
3067     Boolean indexed;
3068     TextSeqIdPtr tsip;
3069     SeqMgrPtr smp;
3070     Int2 version;
3071     Boolean sort_now = TRUE;
3072     TextSeqId tsi;
3073     SeqId si;
3074 
3075     smp = SeqMgrReadLock();
3076     if ((! smp->NonIndexedBioseqCnt) ||           /* nothing to index */
3077         ((! force_it) && (smp->hold_indexing)))   /* holding off on indexing */
3078     {
3079         SeqMgrUnlock();
3080         return TRUE;
3081     }
3082     SeqMgrUnlock();
3083 
3084     smp = SeqMgrWriteLock();
3085         if ((! smp->NonIndexedBioseqCnt) ||           /* nothing to index */
3086             ((! force_it) && (smp->hold_indexing)))   /* holding off on indexing */
3087     {
3088         SeqMgrUnlock();
3089         return TRUE;
3090     }
3091 
3092     total = smp->NonIndexedBioseqCnt;
3093     old_BioseqIndexCnt=smp->BioseqIndexCnt; /*** remember this one to do smart sort ****/
3094 
3095     if (total > 100)   /* heap sort is faster */
3096         sort_now = FALSE;
3097 
3098     bspp = smp->NonIndexedBioseq;
3099     for (i = 0; i < total; i++)
3100     {
3101         indexed = FALSE;
3102         bsp = bspp[i];
3103         if (bsp != NULL)
3104         {
3105             if (bsp->id != NULL)
3106             {
3107                 indexed = TRUE;
3108                 version = 0;
3109                 for (sip = bsp->id; sip != NULL; sip = sip->next)
3110                 {
3111                     oldchoice = 0;
3112                     switch (sip->choice)
3113                     {
3114                     case SEQID_GI:
3115                         sprintf(buf, "%ld", (long)(sip->data.intvalue));
3116                         SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3117                         break;
3118                     case SEQID_EMBL:
3119                     case SEQID_DDBJ:
3120                         oldchoice = sip->choice;
3121                         /*
3122                         sip->choice = SEQID_GENBANK;
3123                         */
3124                     case SEQID_GENBANK:
3125                     case SEQID_OTHER:
3126                     case SEQID_TPG:
3127                     case SEQID_TPE:
3128                     case SEQID_TPD:
3129                     case SEQID_GPIPE:
3130                         tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
3131                         if (((tsip->version > 0) && (tsip->release == NULL))
3132                             && SHOWVERSION)
3133                           {
3134                             version = tsip->version;
3135                           }
3136                     case SEQID_PIR:
3137                     case SEQID_SWISSPROT:
3138                     case SEQID_PRF:
3139                         tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
3140                         /*
3141                         if (tsip->name != NULL)
3142                         {
3143                             tmp = tsip->accession;
3144                             tsip->accession = NULL;
3145                             SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3146                             SeqMgrAddIndexElement(smp, bsp, buf,sort_now);
3147                             tsip->accession = tmp;
3148                         }
3149                         */
3150                         /*
3151                         tmp = tsip->name;
3152                         tsip->name = NULL;
3153                         SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3154                         SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3155                         */
3156 
3157                         MemSet ((Pointer) &tsi, 0, sizeof (TextSeqId));
3158                         tsi.name = tsip->name;
3159                         tsi.accession = tsip->accession;
3160                         tsi.release = tsip->release;
3161                         tsi.version = tsip->version;
3162                         MemSet ((Pointer) &si, 0, sizeof (SeqId));
3163                         si.choice = sip->choice;
3164                         if (oldchoice != 0) {
3165                           si.choice = SEQID_GENBANK;
3166                         }
3167                         si.data.ptrvalue = (Pointer) &tsi;
3168 
3169                         if (tsi.name != NULL) {
3170                           tsi.accession = NULL;
3171                           SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3172                           SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3173                           tsi.accession = tsip->accession;
3174                         }
3175                         tsi.name = NULL;
3176                         SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3177                         SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3178                         if (version) {
3179                           tsi.version = 0;
3180                           SeqIdWrite(&si, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3181                           SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3182                           /*
3183                           tsip->version = 0;
3184                           SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3185                           SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3186                           tsip->version = version;
3187                           */
3188                         }
3189                         /*
3190                         tsip->name = tmp;
3191                         */
3192                         /*
3193                         if (oldchoice)
3194                             sip->choice = oldchoice;
3195                         */
3196                         break;
3197                     default:
3198                           SeqIdWrite(sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3199                         SeqMgrAddIndexElement(smp, bsp, buf, sort_now);
3200                         break;
3201                     }
3202                 }
3203             }
3204         }
3205         if (indexed)
3206             bspp[i] = NULL;
3207     }
3208 
3209     /* faster single pass removal of NULLs */
3210     for (i = 0, k = 0; i < total; i++) {
3211         bsp = bspp [i];
3212         if (bsp != NULL) {
3213             bspp [k] = bsp;
3214             k++;
3215         }
3216     }
3217     total = k;
3218 
3219     /*
3220     for (i = 0; i < total; i++)
3221     {
3222         if (bspp[i] == NULL)
3223         {
3224            total--;
3225            for (k = i; k < total; k++)
3226                bspp[k] = bspp[k+1];
3227            i--;
3228         }
3229     }
3230     */
3231 
3232     smp->NonIndexedBioseqCnt = total;
3233 
3234     if (! sort_now)   /* sort at the end */
3235     {
3236         if(   old_BioseqIndexCnt > 1000 /**** sorted part of the array is large ***/
3237                    && (old_BioseqIndexCnt*1.1 > smp->BioseqIndexCnt ) ){ /*** unsorted part of the array is < 10% ***/
3238             SeqIdIndexElementPtr PNTR    bsindex_buf;
3239             SeqIdIndexElementPtr        stack_buf[1024];
3240             int    i_o, i_n, i_w;
3241             int    unsorted_size= smp->BioseqIndexCnt - old_BioseqIndexCnt;
3242 
3243 #if 1
3244             /****  sort unsorted part ****/
3245             StableMergeSort((VoidPtr) (smp->BioseqIndex+old_BioseqIndexCnt), (size_t) unsorted_size,
3246                                   sizeof(SeqIdIndexElementPtr), SeqIdIndexElementCmp);
3247             /**** move new part to an array ****/
3248             if(unsorted_size > 1024){
3249                 bsindex_buf=Nlm_Malloc(sizeof(*bsindex_buf)*unsorted_size);
3250             } else {
3251                 bsindex_buf=stack_buf;
3252             }
3253             MemMove((VoidPtr)bsindex_buf,(VoidPtr)(smp->BioseqIndex+old_BioseqIndexCnt),
3254                 sizeof(*bsindex_buf)*unsorted_size);
3255 
3256             /**** merge both arrays from the end ****/
3257             i_n=unsorted_size-1;      /**** new part index ****/
3258             i_o=old_BioseqIndexCnt-1; /**** old part index ***/
3259             i_w=smp->BioseqIndexCnt-1;/**** whole array index ***/
3260             i=0;
3261             while(i_n >= 0) {
3262                 if(   i_o < 0
3263                                    || SeqIdIndexElementCmp((VoidPtr)(bsindex_buf+i_n),
3264                                (VoidPtr)(smp->BioseqIndex+i_o)) >= 0){
3265                     /**** move new element ***/
3266                     smp->BioseqIndex[i_w]=bsindex_buf[i_n];
3267                     i_w--;i_n--;
3268                 } else {
3269                     /**** move old element ***/
3270                     smp->BioseqIndex[i_w]=smp->BioseqIndex[i_o];
3271                                         i_w--;i_o--;
3272                 }
3273                 i++;
3274             }
3275             /*** cleanup *****/
3276             if(unsorted_size > 1024){
3277                 MemFree(bsindex_buf);
3278             }
3279 #else
3280             StableMergeSort((VoidPtr) (smp->BioseqIndex), (size_t)(smp->BioseqIndexCnt),
3281                                   sizeof(SeqIdIndexElementPtr), SeqIdIndexElementCmp);
3282 #endif
3283         } else { /** Heap Sort should be faster ***/
3284             StableMergeSort((VoidPtr) (smp->BioseqIndex), (size_t)(smp->BioseqIndexCnt),
3285                   sizeof(SeqIdIndexElementPtr), SeqIdIndexElementCmp);
3286         }
3287     }
3288 
3289     SeqMgrUnlock();
3290 
3291     return TRUE;
3292 }
3293 
3294 
3295 
3296 /*****************************************************************************
3297 *
3298 *   SeqMgrAddToBioseqIndex(bsp)
3299 *       Indexes a BioseqPtr by SeqId(s)
3300 *
3301 *****************************************************************************/
SeqMgrAddToBioseqIndex(BioseqPtr bsp)3302 NLM_EXTERN Boolean LIBCALL SeqMgrAddToBioseqIndex (BioseqPtr bsp)
3303 {
3304     SeqMgrPtr smp;
3305     BioseqPtr PNTR bspp;
3306 
3307     if (bsp == NULL)
3308         return FALSE;
3309 
3310     smp = SeqMgrWriteLock();
3311 
3312     /* if this bsp was the last one added, no need to add it again */
3313     if (smp->NonIndexedBioseqCnt > 0 && smp->NonIndexedBioseq [smp->NonIndexedBioseqCnt - 1] == bsp) {
3314         SeqMgrUnlock();
3315         return TRUE;
3316     }
3317 
3318                               /* increase array as needed */
3319     if (smp->NonIndexedBioseqCnt >= smp->NonIndexedBioseqNum)
3320     {
3321         bspp = smp->NonIndexedBioseq;
3322         smp->NonIndexedBioseq = MemNew((smp->NonIndexedBioseqNum + 10) * sizeof (BioseqPtr));
3323         if (smp->NonIndexedBioseq == NULL) {
3324           Message (MSG_POSTERR, "Unable to allocate memory for bioseq index");
3325           smp->NonIndexedBioseq = bspp;
3326           return FALSE;
3327         }
3328         MemCopy(smp->NonIndexedBioseq, bspp, (smp->NonIndexedBioseqNum * sizeof(BioseqPtr)));
3329         MemFree(bspp);
3330         smp->NonIndexedBioseqNum += 10;
3331     }
3332 
3333     smp->NonIndexedBioseq[smp->NonIndexedBioseqCnt] = bsp;
3334     smp->NonIndexedBioseqCnt++;
3335 
3336     SeqMgrUnlock();
3337 
3338     SeqMgrProcessNonIndexedBioseq(FALSE);
3339 
3340     return TRUE;
3341 }
3342 
3343 
3344 /*****************************************************************************
3345 *
3346 *   SeqMgrDeleteDeleteFromBioseqIndex(bsp)
3347 *       Removes index on BioseqPtr SeqIds
3348 *
3349 *****************************************************************************/
SeqMgrDeleteFromBioseqIndex(BioseqPtr bsp)3350 NLM_EXTERN Boolean LIBCALL SeqMgrDeleteFromBioseqIndex (BioseqPtr bsp)
3351 {
3352     SeqMgrPtr smp;
3353     SeqIdIndexElementPtr PNTR sipp, sip;
3354     Int4 i, j, num;
3355     BioseqPtr PNTR bspp;
3356     ObjMgrDataPtr omdp;
3357     ObjMgrPtr omp;
3358 
3359     smp = SeqMgrWriteLock();
3360 
3361     /* bail if in bulk deletion of large record */
3362     if (bsp != NULL) {
3363         omdp = SeqMgrGetOmdpForBioseq (bsp);
3364         if (omdp != NULL && omdp->bulkIndexFree) {
3365             SeqMgrUnlock();
3366             return FALSE;
3367         }
3368     }
3369                                 /* check if not indexed yet */
3370     if (smp->NonIndexedBioseqCnt > 0)
3371     {
3372         num = smp->NonIndexedBioseqCnt;
3373         bspp = smp->NonIndexedBioseq;
3374         for (i = 0; i < num; i++)
3375         {
3376             if (bspp[i] == bsp)
3377             {
3378                 num--;
3379                 for (j = i; j < num; j++)
3380                      bspp[j] = bspp[j+1];
3381                 i--;
3382             }
3383         }
3384         smp->NonIndexedBioseqCnt = num;
3385     }
3386 
3387     num = smp->BioseqIndexCnt;
3388     sipp = smp->BioseqIndex;
3389 
3390         /*    omp = ObjMgrReadLock();  */
3391 
3392         omp = ObjMgrGet();
3393     omdp = ObjMgrFindByData(omp, (Pointer)bsp);
3394 
3395     /* ObjMgrUnlock(); */
3396 
3397     for (i = 0; i < BIOSEQ_CACHE_NUM; i++)   /* remove from BioseqFind cache */
3398     {
3399         if (omdp_cache[i] == omdp)
3400         {
3401             omdp_cache[i] = NULL;
3402             se_cache[i] = NULL;
3403         }
3404     }
3405 
3406     for (i = 0; i < num; i++)
3407     {
3408        if (sipp[i]->omdp == omdp)
3409        {
3410            sipp[i]->omdp = NULL;
3411            sipp[i]->str = MemFree(sipp[i]->str);
3412            sip = sipp[i];
3413            for (j = i; j < (num-1); j++)
3414                sipp[j] = sipp[j+1];
3415            sipp[j] = sip;
3416            num--; i--;
3417        }
3418     }
3419 
3420     smp->BioseqIndexCnt = num;
3421 
3422     SeqMgrUnlock();
3423 
3424     return TRUE;
3425 }
3426 
3427 /*****************************************************************************
3428 *
3429 *   SeqMgrDeleteIndexesInRecord (sep)
3430 *       Bulk removal of SeqId index on entire entity prior to its deletion
3431 *
3432 *****************************************************************************/
MarkSeqForBulkDeletion(BioseqPtr bsp,Pointer userdata)3433 static void MarkSeqForBulkDeletion (
3434   BioseqPtr bsp,
3435   Pointer userdata
3436 )
3437 
3438 {
3439   ObjMgrDataPtr  omdp;
3440 
3441   if (bsp == NULL) return;
3442   omdp = SeqMgrGetOmdpForBioseq (bsp);
3443   if (omdp == NULL || omdp->being_freed) return;
3444   omdp->bulkIndexFree = TRUE;
3445 }
3446 
MarkSetForBulkDeletion(BioseqSetPtr bssp,Pointer userdata)3447 static void MarkSetForBulkDeletion (
3448   BioseqSetPtr bssp,
3449   Pointer userdata
3450 )
3451 
3452 {
3453   ObjMgrDataPtr  omdp;
3454   ObjMgrPtr      omp;
3455 
3456   if (bssp == NULL) return;
3457   omp = ObjMgrWriteLock ();
3458   omdp = ObjMgrFindByData (omp, bssp);
3459   if (omdp != NULL && !omdp->being_freed) {
3460     omdp->bulkIndexFree = TRUE;
3461   }
3462   ObjMgrUnlock ();
3463 }
3464 
SeqMgrDeleteIndexesInRecord(SeqEntryPtr sep)3465 NLM_EXTERN Boolean LIBCALL SeqMgrDeleteIndexesInRecord (SeqEntryPtr sep)
3466 
3467 {
3468   BioseqPtr                  bsp;
3469   BioseqPtr PNTR             bspp;
3470   Int4                       i, j, k, num;
3471   ObjMgrDataPtr              omdp;
3472   SeqIdIndexElementPtr PNTR  sipp;
3473   SeqMgrPtr                  smp;
3474   SeqIdIndexElementPtr PNTR  tmp;
3475 
3476   if (sep == NULL) return FALSE;
3477 
3478   smp = SeqMgrWriteLock ();
3479 
3480   VisitBioseqsInSep (sep, NULL, MarkSeqForBulkDeletion);
3481   VisitSetsInSep (sep, NULL, MarkSetForBulkDeletion);
3482 
3483   /* check if not indexed yet */
3484 
3485   if (smp->NonIndexedBioseqCnt > 0) {
3486 
3487     num = smp->NonIndexedBioseqCnt;
3488     bspp = smp->NonIndexedBioseq;
3489 
3490     for (i = 0; i < num; i++) {
3491       bsp = bspp [i];
3492       if (bsp != NULL) {
3493         omdp = SeqMgrGetOmdpForBioseq (bsp);
3494         if (omdp != NULL && omdp->bulkIndexFree) {
3495           num--;
3496           for (j = i; j < num; j++) {
3497              bspp [j] = bspp [j + 1];
3498           }
3499           i--;
3500         }
3501       }
3502     }
3503 
3504     smp->NonIndexedBioseqCnt = num;
3505   }
3506 
3507   /* remove from BioseqFind cache */
3508 
3509   for (i = 0; i < BIOSEQ_CACHE_NUM; i++) {
3510     omdp = omdp_cache [i];
3511     if (omdp != NULL && omdp->bulkIndexFree) {
3512       omdp_cache [i] = NULL;
3513       se_cache [i] = NULL;
3514     }
3515   }
3516 
3517   /* bulk free of indexes from sipp list */
3518 
3519   sipp = smp->BioseqIndex;
3520   if (sipp == NULL) {
3521     SeqMgrUnlock ();
3522     return FALSE;
3523   }
3524 
3525   num = smp->BioseqIndexCnt;
3526   tmp = (SeqIdIndexElementPtr PNTR) MemNew (sizeof (SeqIdIndexElementPtr) * (size_t) (num + 1));
3527   if (tmp != NULL) {
3528 
3529     /* null out dying indexes, compress list, move empties to end */
3530 
3531     for (i = 0, j = 0, k = 0; i < num; i++) {
3532       omdp = sipp [i]->omdp;
3533       if (omdp != NULL && omdp->bulkIndexFree) {
3534         sipp [i]->omdp = NULL;
3535         sipp [i]->str = MemFree (sipp [i]->str);
3536         tmp [k] = sipp [i];
3537         k++;
3538       } else {
3539         sipp [j] = sipp [i];
3540         j++;
3541       }
3542     }
3543     /* update count of remaining indexes */
3544 
3545     smp->BioseqIndexCnt = j;
3546     MemMove (sipp + j, tmp, sizeof (SeqIdIndexElementPtr) * (size_t) k);
3547   }
3548   MemFree (tmp);
3549 
3550   SeqMgrUnlock ();
3551 
3552   return TRUE;
3553 }
3554 
3555 /*****************************************************************************
3556 *
3557 *   SeqMgrClearBioseqIndex()
3558 *       Clears entire SeqId index for all entities
3559 *
3560 *****************************************************************************/
SeqMgrClearBioseqIndex(void)3561 NLM_EXTERN void SeqMgrClearBioseqIndex (void)
3562 
3563 {
3564   BioseqPtr PNTR             bspp;
3565   Int4                       i, num;
3566   SeqIdIndexBlockPtr         sibp, next;
3567   SeqIdIndexElementPtr       sip;
3568   SeqIdIndexElementPtr PNTR  sipp;
3569   SeqMgrPtr                  smp;
3570 
3571   smp = SeqMgrWriteLock ();
3572 
3573   num = smp->NonIndexedBioseqCnt;
3574   bspp = smp->NonIndexedBioseq;
3575   if (bspp != NULL) {
3576     for (i = 0; i < num; i++) {
3577       bspp [i] = NULL;
3578     }
3579   }
3580   smp->NonIndexedBioseqCnt = 0;
3581   smp->NonIndexedBioseqNum = 0;
3582   smp->NonIndexedBioseq = MemFree (smp->NonIndexedBioseq);
3583 
3584   num = smp->BioseqIndexCnt;
3585   sipp = smp->BioseqIndex;
3586   if (sipp != NULL) {
3587     for (i = 0; i < num; i++) {
3588       sip = sipp [i];
3589       if (sip != NULL) {
3590         sip->omdp = NULL;
3591         sip->str = MemFree (sip->str);
3592       }
3593       sipp [i] = NULL;
3594     }
3595   }
3596   smp->BioseqIndexCnt = 0;
3597   smp->BioseqIndexNum = 0;
3598   for (sibp = smp->BioseqIndexData; sibp != NULL; sibp = next) {
3599     next = sibp->next;
3600     MemFree (sibp);
3601   }
3602   smp->BioseqIndexData = NULL;
3603 
3604   SeqMgrUnlock ();
3605 }
3606 
3607 /*****************************************************************************
3608 *
3609 *   SeqMgrReplaceInBioseqIndex(bsp)
3610 *       Replaces index on BioseqPtr SeqIds
3611 *
3612 *****************************************************************************/
SeqMgrReplaceInBioseqIndex(BioseqPtr bsp)3613 NLM_EXTERN Boolean LIBCALL SeqMgrReplaceInBioseqIndex (BioseqPtr bsp)
3614 {
3615     SeqMgrDeleteFromBioseqIndex(bsp);
3616     return SeqMgrAddToBioseqIndex(bsp);
3617 }
3618 
3619 /*****************************************************************************
3620 *
3621 *   GetUniGeneIDForSeqId(SeqIdPtr)
3622 *     returns the UniGene ID for a SeqId
3623 *     returns 0 if can't find it, or not a legal unigene id
3624 *     This only applies to genomes division of entrez
3625 *
3626 *****************************************************************************/
3627 
3628 /*****************************************************************
3629 *
3630 *    IT IS a KLUDGE!! Add 1,000,000 to the unigene id
3631 *
3632 *****************************************************************/
3633 #define KLUDGE_UNIGENE 1000000    /*the kludge offset val add to unigene sequence*/
3634 #define KLUDGE_FlyBase 2000000    /*the kludge offset for FlyBase*/
3635 #define KLUDGE_JACKSON 3000000  /*the kludge offset for the Mouse data*/
3636 #define KLUDGE_JRGP    4000000  /*the kludge offset for the rice data*/
3637 #define KLUDGE_CESC    5000000  /*the kludge offset for the C. elegans data*/
3638 #define KLUDGE_BSNR    6000000  /*the kludge offset for the B. subtilis data*/
3639 #define KLUDGE_HUMGEN  7000000  /*the kludge offset for the Human genomic data*/
3640 #define KLUDGE_YGG     8000000  /*the kludge offset for the yeast data*/
3641 #define KLUDGE_NCBICG  9000000  /*the kludge offset for small genomes*/
3642 #define KLUDGE_MAIZE  10000000  /*the kludge offset for corn*/
3643 
GetUniGeneIDForSeqId(SeqIdPtr sip)3644 NLM_EXTERN Int4 LIBCALL GetUniGeneIDForSeqId (SeqIdPtr sip)
3645 {
3646     DbtagPtr db_tag;
3647     ObjectIdPtr oip;
3648 
3649     if (sip == NULL)
3650         return 0;
3651 
3652 
3653     if(sip->choice != SEQID_GENERAL)
3654         return 0;
3655 
3656     db_tag = sip->data.ptrvalue;
3657     if(db_tag == NULL || db_tag->db == NULL)
3658         return 0;
3659 
3660     oip = db_tag->tag;
3661     if(oip == NULL || oip->id == 0)
3662         return 0;
3663 
3664     if(StringCmp(db_tag->db, "UNIGENE") == 0)
3665         return (KLUDGE_UNIGENE+ oip->id);
3666     if(StringCmp(db_tag->db, "UniGene") == 0)
3667         return (KLUDGE_UNIGENE+ oip->id);
3668     if(StringCmp(db_tag->db, "FlyBase") == 0)
3669         return (KLUDGE_FlyBase+ oip->id);
3670     if(StringCmp(db_tag->db, "JACKSON") == 0)
3671         return (KLUDGE_JACKSON+ oip->id);
3672     if(StringCmp(db_tag->db, "JRGP") == 0)
3673         return (KLUDGE_JRGP + oip->id);
3674     if(StringCmp(db_tag->db, "CESC") == 0)
3675         return (KLUDGE_CESC + oip->id);
3676     if(StringCmp(db_tag->db, "BSNR") == 0)
3677         return (KLUDGE_BSNR + oip->id);
3678         if(StringCmp(db_tag->db, "HUMGEN") == 0)
3679                 return (KLUDGE_HUMGEN + oip->id);
3680         if(StringCmp(db_tag->db, "YGG") == 0)
3681                 return (KLUDGE_YGG + oip->id);
3682         if(StringCmp(db_tag->db, "NCBICG") == 0)
3683                 return (KLUDGE_NCBICG + oip->id);
3684         if(StringCmp(db_tag->db, "MAIZE") == 0)
3685                 return (KLUDGE_MAIZE + oip->id);
3686     return 0;
3687 
3688 }
3689 
3690 
3691 /*****************************************************************************
3692 *
3693 *   BioseqExtra extensions to preindex for rapid retrieval
3694 *
3695 *****************************************************************************/
3696 
3697 /*
3698 *  remaining to be done are mapping tables for rapid coordinate conversion
3699 *  between genome record and parts, genomic DNA and mRNA, and mRNA and protein
3700 */
3701 
SeqMgrGetOmdpForPointer(Pointer ptr)3702 static ObjMgrDataPtr SeqMgrGetOmdpForPointer (Pointer ptr)
3703 
3704 {
3705   ObjMgrDataPtr  omdp;
3706   ObjMgrPtr      omp;
3707 
3708   if (ptr == NULL) return NULL;
3709   omp = ObjMgrWriteLock ();
3710   omdp = ObjMgrFindByData (omp, ptr);
3711   ObjMgrUnlock ();
3712   return omdp;
3713 }
3714 
SeqMgrGetOmdpForBioseq(BioseqPtr bsp)3715 NLM_EXTERN ObjMgrDataPtr SeqMgrGetOmdpForBioseq (BioseqPtr bsp)
3716 
3717 {
3718   ObjMgrDataPtr  omdp = NULL;
3719   ObjMgrPtr      omp;
3720 
3721   if (bsp == NULL) return NULL;
3722   omp = ObjMgrWriteLock ();
3723   omdp = (ObjMgrDataPtr) bsp->omdp;
3724   if (omdp == NULL) {
3725     omdp = ObjMgrFindByData (omp, bsp);
3726     bsp->omdp = (Pointer) omdp;
3727   }
3728   ObjMgrUnlock ();
3729   return omdp;
3730 }
3731 
SeqMgrGetExtraDataForOmdp(ObjMgrDataPtr omdp)3732 NLM_EXTERN Pointer SeqMgrGetExtraDataForOmdp (ObjMgrDataPtr omdp)
3733 
3734 {
3735   Pointer    extradata;
3736   ObjMgrPtr  omp;
3737 
3738   if (omdp == NULL) return NULL;
3739   omp = ObjMgrWriteLock ();
3740   extradata = (Pointer) omdp->extradata;
3741   ObjMgrUnlock ();
3742   return extradata;
3743 }
3744 
SeqMgrGetTopSeqEntryForEntity(Uint2 entityID)3745 static SeqEntryPtr SeqMgrGetTopSeqEntryForEntity (Uint2 entityID)
3746 
3747 {
3748   ObjMgrDataPtr  omdp;
3749   SeqSubmitPtr   ssp;
3750 
3751   omdp = ObjMgrGetData (entityID);
3752   if (omdp == NULL) return FALSE;
3753   switch (omdp->datatype) {
3754     case OBJ_SEQSUB :
3755       ssp = (SeqSubmitPtr) omdp->dataptr;
3756       if (ssp != NULL && ssp->datatype == 1) {
3757         return (SeqEntryPtr) ssp->data;
3758       }
3759       break;
3760     case OBJ_BIOSEQ :
3761     case OBJ_BIOSEQSET :
3762       return (SeqEntryPtr) omdp->choice;
3763     default :
3764       break;
3765   }
3766   return NULL;
3767 }
3768 
3769 
SeqMgrClearBioseqExtraData(ObjMgrDataPtr omdp)3770 static Boolean SeqMgrClearBioseqExtraData (ObjMgrDataPtr omdp)
3771 
3772 {
3773   BioseqExtraPtr  bspextra;
3774   SMFeatBlockPtr  currf;
3775   SMSeqIdxPtr     currp;
3776   Int2            i;
3777   SMFeatItemPtr   itemf;
3778   Int4            j;
3779   SMFeatBlockPtr  nextf;
3780   SMSeqIdxPtr     nextp;
3781   SMFidItemPtr    sfip;
3782 
3783   if (omdp == NULL) return FALSE;
3784   bspextra = (BioseqExtraPtr) omdp->extradata;
3785   if (bspextra == NULL) return FALSE;
3786 
3787   /* free sorted arrays of pointers into data blocks */
3788 
3789   bspextra->descrsByID = MemFree (bspextra->descrsByID);
3790   bspextra->descrsBySdp = MemFree (bspextra->descrsBySdp);
3791   bspextra->descrsByIndex = MemFree (bspextra->descrsByIndex);
3792 
3793   bspextra->annotDescByID = MemFree (bspextra->annotDescByID);
3794 
3795   bspextra->alignsByID = MemFree (bspextra->alignsByID);
3796 
3797   bspextra->featsByID = MemFree (bspextra->featsByID);
3798   bspextra->featsBySfp = MemFree (bspextra->featsBySfp);
3799   bspextra->featsByPos = MemFree (bspextra->featsByPos);
3800   bspextra->featsByRev = MemFree (bspextra->featsByRev);
3801   bspextra->featsByLabel = MemFree (bspextra->featsByLabel);
3802 
3803   bspextra->genesByPos = MemFree (bspextra->genesByPos);
3804   bspextra->mRNAsByPos = MemFree (bspextra->mRNAsByPos);
3805   bspextra->CDSsByPos = MemFree (bspextra->CDSsByPos);
3806   bspextra->pubsByPos = MemFree (bspextra->pubsByPos);
3807   bspextra->orgsByPos = MemFree (bspextra->orgsByPos);
3808   bspextra->operonsByPos = MemFree (bspextra->operonsByPos);
3809 
3810   bspextra->genesByLocusTag = MemFree (bspextra->genesByLocusTag);
3811 
3812   /* free list of descriptor information */
3813 
3814   bspextra->desclisthead = ValNodeFreeData (bspextra->desclisthead);
3815 
3816   /* free arrays to speed mapping from parts to segmented bioseq */
3817 
3818   bspextra->partsByLoc = MemFree (bspextra->partsByLoc);
3819   bspextra->partsBySeqId = MemFree (bspextra->partsBySeqId);
3820 
3821   /* free data blocks of feature information */
3822 
3823   currf = bspextra->featlisthead;
3824   while (currf != NULL) {
3825     nextf = currf->next;
3826 
3827     if (currf->data != NULL) {
3828 
3829       /* free allocated label strings within block items */
3830 
3831       for (i = 0; i < currf->index; i++) {
3832         itemf = &(currf->data [i]);
3833         MemFree (itemf->label);
3834         MemFree (itemf->ivals);
3835       }
3836 
3837       /* free array of SMFeatItems */
3838 
3839       MemFree (currf->data);
3840     }
3841 
3842     MemFree (currf);
3843     currf = nextf;
3844   }
3845 
3846   /* free data blocks of parts to segment mapping information */
3847 
3848   currp = bspextra->segparthead;
3849   while (currp != NULL) {
3850     nextp = currp->next;
3851     SeqLocFree (currp->slp);
3852     MemFree (currp->seqIdOfPart);
3853     MemFree (currp);
3854     currp = nextp;
3855   }
3856 
3857   /* free list of seqfeatptrs whose product points to the bioseq */
3858 
3859   bspextra->prodlisthead = ValNodeFree (bspextra->prodlisthead);
3860 
3861   if (bspextra->featsByFeatID != NULL) {
3862     for (j = 0; j < bspextra->numfids; j++) {
3863       sfip = bspextra->featsByFeatID [j];
3864       if (sfip == NULL) continue;
3865       MemFree (sfip->fid);
3866       MemFree (sfip);
3867     }
3868     bspextra->featsByFeatID = MemFree (bspextra->featsByFeatID);
3869   }
3870 
3871   /* clean interval list once implemented */
3872 
3873   bspextra->featlisthead = NULL;
3874   bspextra->featlisttail = NULL;
3875   bspextra->segparthead = NULL;
3876 
3877   bspextra->numaligns = 0;
3878   bspextra->numfeats = 0;
3879   bspextra->numgenes = 0;
3880   bspextra->nummRNAs = 0;
3881   bspextra->numCDSs = 0;
3882   bspextra->numpubs = 0;
3883   bspextra->numorgs = 0;
3884   bspextra->numoperons = 0;
3885   bspextra->numfids = 0;
3886   bspextra->numsegs = 0;
3887 
3888   bspextra->min = INT4_MAX;
3889   bspextra->processed = UINT1_MAX;
3890   bspextra->blocksize = 50;
3891 
3892   bspextra->protFeat = NULL;
3893   bspextra->cdsOrRnaFeat = NULL;
3894 
3895   /* free genome - parts mapping arrays when they are added */
3896 
3897   return TRUE;
3898 }
3899 
DoSeqMgrFreeBioseqExtraData(ObjMgrDataPtr omdp)3900 static Boolean DoSeqMgrFreeBioseqExtraData (ObjMgrDataPtr omdp)
3901 
3902 {
3903   if (omdp == NULL) return FALSE;
3904   if (omdp->datatype != OBJ_BIOSEQ && omdp->datatype != OBJ_BIOSEQSET) return FALSE;
3905   if (omdp->extradata != NULL) {
3906     SeqMgrClearBioseqExtraData (omdp);
3907     omdp->extradata = MemFree (omdp->extradata);
3908     omdp->reapextra = NULL;
3909     omdp->reloadextra = NULL;
3910     omdp->freeextra = NULL;
3911   }
3912   return TRUE;
3913 }
3914 
3915 /* object manager callbacks to reap, reload, and free extra bioseq data */
3916 
SeqMgrReapBioseqExtraFunc(Pointer data)3917 NLM_EXTERN Pointer LIBCALLBACK SeqMgrReapBioseqExtraFunc (Pointer data)
3918 
3919 {
3920   BioseqExtraPtr  bspextra;
3921   SMFeatBlockPtr  curr;
3922   Int2            i;
3923   SMFeatItemPtr   item;
3924   ObjMgrDataPtr   omdp;
3925   SMDescItemPtr   sdip;
3926   ValNodePtr      vnp;
3927 
3928   omdp = (ObjMgrDataPtr) data;
3929   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
3930   bspextra = (BioseqExtraPtr) omdp->extradata;
3931   if (bspextra == NULL) return NULL;
3932 
3933   /* loop through data blocks of feature information */
3934 
3935   curr = bspextra->featlisthead;
3936   while (curr != NULL) {
3937 
3938     /* NULL out pointers to cached out feature and annot */
3939 
3940     if (curr->data != NULL) {
3941       for (i = 0; i < curr->index; i++) {
3942         item = &(curr->data [i]);
3943         item->sfp = NULL;
3944         item->sap = NULL;
3945       }
3946     }
3947 
3948     curr = curr->next;
3949   }
3950 
3951   /* these were originally only recorded if omdp->tempload == TL_NOT_TEMP */
3952 
3953   bspextra->protFeat = NULL;
3954   bspextra->cdsOrRnaFeat = NULL;
3955 
3956   /* NULL out pointers to cached out descriptors */
3957 
3958   for (vnp = bspextra->desclisthead; vnp != NULL; vnp = vnp->next) {
3959     sdip = (SMDescItemPtr) vnp->data.ptrvalue;
3960     if (sdip != NULL) {
3961       sdip->sdp = NULL;
3962       sdip->sep = NULL;
3963     }
3964   }
3965 
3966   return NULL;
3967 }
3968 
3969 /* !!! SeqMgrReloadBioseqExtraFunc is not yet implemented !!! */
3970 
SeqMgrReloadBioseqExtraFunc(Pointer data)3971 NLM_EXTERN Pointer LIBCALLBACK SeqMgrReloadBioseqExtraFunc (Pointer data)
3972 
3973 {
3974   return NULL;
3975 }
3976 
SeqMgrFreeBioseqExtraFunc(Pointer data)3977 NLM_EXTERN Pointer LIBCALLBACK SeqMgrFreeBioseqExtraFunc (Pointer data)
3978 
3979 {
3980   DoSeqMgrFreeBioseqExtraData ((ObjMgrDataPtr) data);
3981   return NULL;
3982 }
3983 
3984 /*****************************************************************************
3985 *
3986 *   SeqMgrClearFeatureIndexes clears every bioseq in an entity
3987 *
3988 *****************************************************************************/
3989 
SeqMgrClearIndexesProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3990 static void SeqMgrClearIndexesProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
3991 
3992 {
3993   BioseqPtr      bsp;
3994   BioseqSetPtr   bssp;
3995   ObjMgrDataPtr  omdp = NULL;
3996   BoolPtr        rsult;
3997   SeqAlignPtr    sal;
3998   SeqAnnotPtr    sap = NULL;
3999 
4000   if (sep == NULL || (! IS_Bioseq (sep))) return;
4001   if (IS_Bioseq (sep)) {
4002     bsp = (BioseqPtr) sep->data.ptrvalue;
4003     if (bsp == NULL) return;
4004     bsp->seqentry = NULL;
4005     omdp = SeqMgrGetOmdpForBioseq (bsp);
4006     sap = bsp->annot;
4007   } else if (IS_Bioseq_set (sep)) {
4008     bssp = (BioseqSetPtr) sep->data.ptrvalue;
4009     if (bssp == NULL) return;
4010     bssp->seqentry = NULL;
4011     omdp = SeqMgrGetOmdpForPointer (bssp);
4012     sap = bssp->annot;
4013   } else return;
4014   while (sap != NULL) {
4015     if (sap->type == 2) {
4016       sal = (SeqAlignPtr) sap->data;
4017       while (sal != NULL) {
4018         /* ! clean up seq-align indexes ! */
4019         sal = sal->next;
4020       }
4021     }
4022     sap = sap->next;
4023   }
4024   if (omdp != NULL && DoSeqMgrFreeBioseqExtraData (omdp)) {
4025     rsult = (BoolPtr) mydata;
4026     *rsult = TRUE;
4027   }
4028 }
4029 
SeqMgrClearFeatureIndexes(Uint2 entityID,Pointer ptr)4030 NLM_EXTERN Boolean LIBCALL SeqMgrClearFeatureIndexes (Uint2 entityID, Pointer ptr)
4031 
4032 {
4033   ObjMgrDataPtr  omdp;
4034   Boolean        rsult = FALSE;
4035   SeqEntryPtr    sep;
4036 
4037   if (entityID == 0) {
4038     entityID = ObjMgrGetEntityIDForPointer (ptr);
4039   }
4040   if (entityID == 0) return FALSE;
4041   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4042   if (sep == NULL) return FALSE;
4043   SeqEntryExplore (sep, (Pointer) (&rsult), SeqMgrClearIndexesProc);
4044 
4045   /* clear out object manager time of indexing flag and master feature itemID list */
4046 
4047   omdp = ObjMgrGetData (entityID);
4048   if (omdp != NULL) {
4049     omdp->indexed = 0;
4050     SeqMgrClearBioseqExtraData (omdp);
4051     omdp->extradata = MemFree (omdp->extradata);
4052     omdp->reapextra = NULL;
4053     omdp->reloadextra = NULL;
4054     omdp->freeextra = NULL;
4055   }
4056   return rsult;
4057 }
4058 
4059 /*****************************************************************************
4060 *
4061 *   IsNonGappedLiteral(BioseqPtr bsp)
4062 *      Returns TRUE if bsp is a delta seq is composed only of Seq-lits with
4063 *      actual sequence data.  These are now made to allow optimal compression
4064 *      of otherwise raw sequences with runs of ambiguous bases.
4065 *
4066 *****************************************************************************/
4067 
IsNonGappedLiteral(BioseqPtr bsp)4068 NLM_EXTERN Boolean IsNonGappedLiteral (BioseqPtr bsp)
4069 
4070 {
4071   DeltaSeqPtr  dsp;
4072   SeqLitPtr    slitp;
4073 
4074   if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
4075   if (bsp->seq_ext_type != 4 || bsp->seq_ext == NULL) return FALSE;
4076 
4077   for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp = dsp->next) {
4078     if (dsp->choice != 2) return FALSE; /* not Seq-lit */
4079     slitp = (SeqLitPtr) dsp->data.ptrvalue;
4080     if (slitp == NULL) return FALSE;
4081     if (slitp->seq_data == NULL || slitp->length == 0 || slitp->seq_data_type == Seq_code_gap) return FALSE; /* gap */
4082   }
4083 
4084   return TRUE;
4085 }
4086 
4087 /*****************************************************************************
4088 *
4089 *   FindAppropriateBioseq finds the segmented bioseq if location is join on parts
4090 *
4091 *****************************************************************************/
4092 
FindAppropriateBioseq(SeqLocPtr loc,BioseqPtr tryfirst,BoolPtr is_small_genome_set)4093 static BioseqPtr FindAppropriateBioseq (SeqLocPtr loc, BioseqPtr tryfirst, BoolPtr is_small_genome_set)
4094 
4095 {
4096   BioseqPtr       bsp = NULL;
4097   BioseqExtraPtr  bspextra;
4098   BioseqSetPtr    bssp;
4099   ObjMgrDataPtr   omdp;
4100   BioseqPtr       part;
4101   SeqEntryPtr     sep;
4102   SeqIdPtr        sip;
4103   SeqLocPtr       slp;
4104 
4105   if (is_small_genome_set != NULL) {
4106     *is_small_genome_set = FALSE;
4107   }
4108   if (loc == NULL) return NULL;
4109   sip = SeqLocId (loc);
4110   if (sip != NULL) {
4111     if (tryfirst != NULL && SeqIdIn (sip, tryfirst->id)) {
4112       bsp = tryfirst;
4113     } else {
4114       bsp = BioseqFindCore (sip);
4115     }
4116 
4117     /* first see if this is raw local part of segmented bioseq */
4118 
4119     if (bsp != NULL && (bsp->repr == Seq_repr_raw || /* IsNonGappedLiteral (bsp) */ bsp->repr == Seq_repr_delta)) {
4120       omdp = SeqMgrGetOmdpForBioseq (bsp);
4121       if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
4122         bspextra = (BioseqExtraPtr) omdp->extradata;
4123         if (bspextra != NULL) {
4124           if (bspextra->parentBioseq != NULL) {
4125             bsp = bspextra->parentBioseq;
4126           }
4127         }
4128       }
4129     }
4130     return bsp;
4131   }
4132 
4133   /* otherwise assume location is on multiple parts of a segmented set (deprecated) or is in a small genome set */
4134 
4135   slp = SeqLocFindNext (loc, NULL);
4136   if (slp == NULL) return NULL;
4137   sip = SeqLocId (slp);
4138   if (sip == NULL) return NULL;
4139   part = BioseqFindCore (sip);
4140   if (part == NULL) return NULL;
4141   omdp = SeqMgrGetOmdpForBioseq (part);
4142   while (omdp != NULL) {
4143     if (omdp->datatype == OBJ_BIOSEQSET) {
4144       bssp = (BioseqSetPtr) omdp->dataptr;
4145       if (bssp != NULL) {
4146         if (bssp->_class == BioseqseqSet_class_segset) {
4147           for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
4148             if (IS_Bioseq (sep)) {
4149               bsp = (BioseqPtr) sep->data.ptrvalue;
4150               if (bsp != NULL) {
4151                 return bsp;
4152               }
4153             }
4154           }
4155         } else if (bssp->_class == BioseqseqSet_class_small_genome_set) {
4156           if (is_small_genome_set != NULL) {
4157             *is_small_genome_set = TRUE;
4158           }
4159           return part;
4160         }
4161       }
4162     }
4163     omdp = SeqMgrGetOmdpForPointer (omdp->parentptr);
4164   }
4165   return NULL;
4166 }
4167 
4168 /*****************************************************************************
4169 *
4170 *   FindFirstLocalBioseq is called as a last resort if FindAppropriateBioseq
4171 *     fails, and it scans the feature location to find the first local bioseq
4172 *     referenced by a feature interval
4173 *
4174 *****************************************************************************/
4175 
FindFirstLocalBioseq(SeqLocPtr loc)4176 static BioseqPtr FindFirstLocalBioseq (SeqLocPtr loc)
4177 
4178 {
4179   BioseqPtr  bsp;
4180   SeqIdPtr   sip;
4181   SeqLocPtr  slp = NULL;
4182 
4183   if (loc == NULL) return NULL;
4184 
4185   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4186     sip = SeqLocId (slp);
4187     if (sip != NULL) {
4188       bsp = BioseqFindCore (sip);
4189       if (bsp != NULL) return bsp;
4190     }
4191   }
4192 
4193   return NULL;
4194 }
4195 
4196 /*****************************************************************************
4197 *
4198 *   BioseqFindFromSeqLoc finds the segmented bioseq if location is join on parts,
4199 *     and does so even if some of the intervals are far accessions.
4200 *
4201 *****************************************************************************/
4202 
BioseqFindFromSeqLoc(SeqLocPtr loc)4203 NLM_EXTERN BioseqPtr BioseqFindFromSeqLoc (SeqLocPtr loc)
4204 
4205 {
4206   BioseqPtr  bsp = NULL;
4207 
4208   if (loc == NULL) return NULL;
4209   bsp = FindAppropriateBioseq (loc, NULL, NULL);
4210   if (bsp == NULL) {
4211     bsp = FindFirstLocalBioseq (loc);
4212   }
4213   return bsp;
4214 }
4215 
4216 /*****************************************************************************
4217 *
4218 *   SeqMgrGetParentOfPart returns the segmented bioseq parent of a part bioseq,
4219 *     and fills in the context structure.
4220 *
4221 *****************************************************************************/
4222 
SeqMgrGetParentOfPart(BioseqPtr bsp,SeqMgrSegmentContext PNTR context)4223 NLM_EXTERN BioseqPtr LIBCALL SeqMgrGetParentOfPart (BioseqPtr bsp,
4224                                                     SeqMgrSegmentContext PNTR context)
4225 
4226 {
4227   BioseqExtraPtr    bspextra;
4228   Char              buf [128];
4229   Int2              compare;
4230   Uint2             entityID;
4231   Int4              i;
4232   Int4              numsegs;
4233   ObjMgrDataPtr     omdp;
4234   BioseqPtr         parent;
4235   SMSeqIdxPtr PNTR  partsByLoc;
4236   SMSeqIdxPtr PNTR  partsBySeqId;
4237   SMSeqIdxPtr       segpartptr;
4238   SeqIdPtr          sip;
4239   SeqLocPtr         slp;
4240   Int4              L, R, mid;
4241 
4242   if (context != NULL) {
4243     MemSet ((Pointer) context, 0, sizeof (SeqMgrSegmentContext));
4244   }
4245   if (bsp == NULL) return NULL;
4246   omdp = SeqMgrGetOmdpForBioseq (bsp);
4247   if (omdp == NULL) return NULL;
4248   if (omdp->datatype != OBJ_BIOSEQ) return NULL;
4249   bspextra = (BioseqExtraPtr) omdp->extradata;
4250   if (bspextra == NULL) return NULL;
4251   entityID = bsp->idx.entityID;
4252   if (entityID < 1) {
4253     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
4254   }
4255 
4256   parent = bspextra->parentBioseq;
4257   if (parent == NULL) return NULL;
4258 
4259   /* now need parts list from extra data on parent */
4260 
4261   omdp = SeqMgrGetOmdpForBioseq (parent);
4262   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return parent;
4263   bspextra = (BioseqExtraPtr) omdp->extradata;
4264   if (bspextra == NULL) return parent;
4265 
4266   partsBySeqId = bspextra->partsBySeqId;
4267   numsegs = bspextra->numsegs;
4268   if (partsBySeqId == NULL || numsegs < 1) return parent;
4269 
4270   sip = bsp->id;
4271   if (sip == NULL) return parent;
4272 
4273   /* binary search into array on segmented bioseq sorted by part seqID (reversed) string */
4274 
4275   while (sip != NULL) {
4276     if (MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) {
4277       L = 0;
4278       R = numsegs - 1;
4279       while (L < R) {
4280         mid = (L + R) / 2;
4281         segpartptr = partsBySeqId [mid];
4282         compare = StringCmp (segpartptr->seqIdOfPart, buf);
4283         if (compare < 0) {
4284           L = mid + 1;
4285         } else {
4286           R = mid;
4287         }
4288       }
4289       segpartptr = partsBySeqId [R];
4290       if (segpartptr != NULL && StringCmp (segpartptr->seqIdOfPart, buf) == 0) {
4291         if (context != NULL) {
4292           slp = segpartptr->slp;
4293           context->entityID = entityID;
4294           context->itemID = segpartptr->itemID;
4295           context->slp = slp;
4296           context->parent = segpartptr->parentBioseq;
4297           context->cumOffset = segpartptr->cumOffset;
4298           context->from = segpartptr->from;
4299           context->to = segpartptr->to;
4300           context->strand = segpartptr->strand;
4301           context->userdata = NULL;
4302           context->omdp = (Pointer) omdp;
4303           context->index = 0;
4304 
4305           /* now find entry in partsByLoc list to set proper index */
4306 
4307           partsByLoc = bspextra->partsByLoc;
4308           if (partsByLoc != NULL) {
4309             i = 0;
4310             while (i < numsegs) {
4311               if (segpartptr == partsByLoc [i]) {
4312                 context->index = i + 1;
4313               }
4314               i++;
4315             }
4316           }
4317         }
4318         return parent;
4319       }
4320     }
4321     sip = sip->next;
4322   }
4323 
4324   return parent;
4325 }
4326 
4327 /*****************************************************************************
4328 *
4329 *   SeqMgrGetBioseqContext fills in the context structure for any bioseq.
4330 *
4331 *****************************************************************************/
4332 
SeqMgrGetBioseqContext(BioseqPtr bsp,SeqMgrBioseqContext PNTR context)4333 NLM_EXTERN Boolean LIBCALL SeqMgrGetBioseqContext (BioseqPtr bsp,
4334                                                    SeqMgrBioseqContext PNTR context)
4335 
4336 {
4337   BioseqExtraPtr  bspextra;
4338   Uint2           entityID;
4339   ObjMgrDataPtr   omdp;
4340   SeqEntryPtr     sep;
4341 
4342   if (context != NULL) {
4343     MemSet ((Pointer) context, 0, sizeof (SeqMgrBioseqContext));
4344   }
4345   if (bsp == NULL || context == NULL) return FALSE;
4346 
4347   entityID = bsp->idx.entityID;
4348   if (entityID < 1) {
4349     entityID = ObjMgrGetEntityIDForPointer (bsp);
4350   }
4351   if (entityID == 0) return FALSE;
4352 
4353   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4354   if (sep == NULL) return FALSE;
4355 
4356   context->entityID = entityID;
4357   context->index = 0;
4358   context->userdata = NULL;
4359 
4360   omdp = SeqMgrGetOmdpForBioseq (bsp);
4361   if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
4362     bspextra = (BioseqExtraPtr) omdp->extradata;
4363     if (bspextra != NULL) {
4364       context->itemID = bspextra->bspItemID;
4365       context->bsp = bsp;
4366       context->sep = bsp->seqentry;
4367       if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
4368         context->bssp = (BioseqSetPtr) bsp->idx.parentptr;
4369       }
4370       context->numsegs = bspextra->numsegs;
4371       context->omdp = omdp;
4372       context->index = bspextra->bspIndex;
4373     }
4374   }
4375 
4376   return (Boolean) (context->index != 0);
4377 }
4378 
4379 /*****************************************************************************
4380 *
4381 *   GetOffsetInNearBioseq is called to get the intervals on near bioseqs
4382 *
4383 *****************************************************************************/
4384 
GetOffsetInNearBioseq(SeqLocPtr loc,BioseqPtr in,Uint1 which_end)4385 static Int4 GetOffsetInNearBioseq (SeqLocPtr loc, BioseqPtr in, Uint1 which_end)
4386 
4387 {
4388   BioseqPtr  bsp;
4389   SeqLocPtr  first = NULL, last = NULL, slp = NULL;
4390   SeqIdPtr   sip;
4391   Uint1      strand;
4392   Int4       val;
4393 
4394   if (loc == NULL) return -1;
4395 
4396   /* first attempt should work if no far bioseqs */
4397 
4398   val = GetOffsetInBioseq (loc, in, which_end);
4399   if (val != -1) return val;
4400 
4401   /* now go through sublocs and find extremes that are not on far bioseqs */
4402 
4403   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4404     sip = SeqLocId (slp);
4405     if (sip != NULL) {
4406       bsp = BioseqFind (sip);
4407       if (bsp != NULL) {
4408         last = slp;
4409         if (first == NULL) {
4410           first = slp;
4411         }
4412       }
4413     }
4414   }
4415   if (first == NULL) return -1;
4416   strand = SeqLocStrand (first);
4417 
4418   switch (which_end) {
4419     case SEQLOC_LEFT_END:
4420       if (strand == Seq_strand_minus) {
4421         return GetOffsetInBioseq (last, in, which_end);
4422       } else {
4423         return GetOffsetInBioseq (first, in, which_end);
4424       }
4425       break;
4426     case SEQLOC_RIGHT_END:
4427       if (strand == Seq_strand_minus) {
4428         return GetOffsetInBioseq (first, in, which_end);
4429       } else {
4430         return GetOffsetInBioseq (last, in, which_end);
4431       }
4432       break;
4433     case SEQLOC_START:
4434       return GetOffsetInBioseq (first, in, which_end);
4435       break;
4436     case SEQLOC_STOP:
4437       return GetOffsetInBioseq (last, in, which_end);
4438       break;
4439     default :
4440       break;
4441   }
4442 
4443   return -1;
4444 }
4445 
4446 
GetLeftAndRightOffsetsInNearBioseq(SeqLocPtr loc,BioseqPtr in,Int4Ptr left,Int4Ptr right,Boolean small_genome_set,Boolean bad_order,Boolean mixed_strand)4447 static void GetLeftAndRightOffsetsInNearBioseq (
4448   SeqLocPtr loc,
4449   BioseqPtr in,
4450   Int4Ptr left,
4451   Int4Ptr right,
4452   Boolean small_genome_set,
4453   Boolean bad_order,
4454   Boolean mixed_strand
4455 )
4456 
4457 {
4458   BioseqPtr  bsp;
4459   SeqLocPtr  first = NULL, last = NULL, slp = NULL;
4460   SeqIdPtr   sip;
4461   Uint1      strand;
4462   Int4       val_left = -1, val_right = -1;
4463   Boolean    left_flip = FALSE, right_flip = FALSE;
4464 
4465   if (left != NULL) {
4466     *left = -1;
4467   }
4468   if (right != NULL) {
4469     *right = -1;
4470   }
4471   if (loc == NULL) return;
4472 
4473   /* first attempt should work if no far bioseqs */
4474   sip = SeqLocId (loc);
4475   if (in != NULL && SeqIdIn (sip, in->id)) {
4476     bsp = in;
4477   } else {
4478     bsp = BioseqFind (sip);
4479   }
4480   if (bsp != NULL) {
4481     GetLeftAndRightOffsetsInBioseq (loc, in, &val_left, &val_right, bsp->topology == TOPOLOGY_CIRCULAR, FALSE, &left_flip, &right_flip);
4482     if (val_left != -1 && val_right != -1) {
4483       if (left != NULL) {
4484         *left = val_left;
4485       }
4486       if (right != NULL) {
4487         *right = val_right;
4488       }
4489       return;
4490     }
4491   }
4492 
4493   /* now go through sublocs and find extremes that are not on far bioseqs */
4494 
4495   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4496     sip = SeqLocId (slp);
4497     if (sip != NULL) {
4498       bsp = BioseqFind (sip);
4499       if (bsp != NULL && ((! small_genome_set) || bsp == in)) {
4500         last = slp;
4501         if (first == NULL) {
4502           first = slp;
4503         }
4504       }
4505     }
4506   }
4507   if (first == NULL) return;
4508   strand = SeqLocStrand (first);
4509 
4510   if (strand == Seq_strand_minus) {
4511     val_left = GetOffsetInBioseq (last, in, SEQLOC_LEFT_END);
4512     val_right = GetOffsetInBioseq (first, in, SEQLOC_RIGHT_END);
4513   } else {
4514     val_left = GetOffsetInBioseq (first, in, SEQLOC_LEFT_END);
4515     val_right = GetOffsetInBioseq (last, in, SEQLOC_RIGHT_END);
4516   }
4517 
4518   if (left != NULL) {
4519     *left = val_left;
4520   }
4521   if (right != NULL) {
4522     *right = val_right;
4523   }
4524 }
4525 
4526 
4527 /*
4528 static Int4 GetOffsetInFirstLocalBioseq (SeqLocPtr loc, BioseqPtr in, Uint1 which_end)
4529 
4530 {
4531   SeqLocPtr  slp = NULL;
4532   Int4       val;
4533 
4534   if (loc == NULL) return -1;
4535 
4536   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
4537     val = GetOffsetInBioseq (slp, in, which_end);
4538     if (val != -1) return val;
4539   }
4540 
4541   return -1;
4542 }
4543 */
4544 
4545 /*****************************************************************************
4546 *
4547 *   SeqMgrFindSMFeatItemPtr and SeqMgrFindSMFeatItemByID return SMFeatItemPtr
4548 *     to access internal fields
4549 *   SeqMgrGetDesiredDescriptor and SeqMgrGetDesiredFeature take an itemID,
4550 *     position index, or SeqDescPtr or SeqFeatPtr, return the SeqDescPtr or
4551 *     SeqFeatPtr, and fill in the context structure
4552 *
4553 *****************************************************************************/
4554 
SeqMgrFindSMFeatItemPtr(SeqFeatPtr sfp)4555 NLM_EXTERN SMFeatItemPtr LIBCALL SeqMgrFindSMFeatItemPtr (SeqFeatPtr sfp)
4556 
4557 {
4558   SMFeatItemPtr PNTR  array;
4559   BioseqPtr           bsp;
4560   BioseqExtraPtr      bspextra;
4561   SMFeatBlockPtr      curr;
4562   Int2                i;
4563   SMFeatItemPtr       item;
4564   Int4                L;
4565   Int4                mid;
4566   ObjMgrDataPtr       omdp;
4567   Int4                R;
4568 
4569   if (sfp == NULL) return NULL;
4570   bsp = FindAppropriateBioseq (sfp->location, NULL, NULL);
4571   if (bsp == NULL) {
4572     bsp = FindFirstLocalBioseq (sfp->location);
4573   }
4574   omdp = SeqMgrGetOmdpForBioseq (bsp);
4575   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
4576   bspextra = (BioseqExtraPtr) omdp->extradata;
4577   if (bspextra == NULL) return NULL;
4578 
4579   /* first try array sorted by SeqFeatPtr value */
4580 
4581   array = bspextra->featsBySfp;
4582   if (array != NULL && bspextra->numfeats > 0) {
4583     L = 0;
4584     R = bspextra->numfeats - 1;
4585     while (L < R) {
4586       mid = (L + R) / 2;
4587       item = array [mid];
4588       if (item != NULL && item->sfp < sfp) {
4589         L = mid + 1;
4590       } else {
4591         R = mid;
4592       }
4593     }
4594 
4595     item = array [R];
4596     if (item->sfp == sfp) return item;
4597   }
4598 
4599   /* now look in feature indices for cached feature information */
4600 
4601   curr = bspextra->featlisthead;
4602   while (curr != NULL) {
4603 
4604     if (curr->data != NULL) {
4605       for (i = 0; i < curr->index; i++) {
4606         item = &(curr->data [i]);
4607         if (item->sfp == sfp && (! item->ignore)) return item;
4608       }
4609     }
4610 
4611     curr = curr->next;
4612   }
4613 
4614   return NULL;
4615 }
4616 
SeqMgrFindSMFeatItemByID(Uint2 entityID,BioseqPtr bsp,Uint4 itemID)4617 NLM_EXTERN SMFeatItemPtr LIBCALL SeqMgrFindSMFeatItemByID (Uint2 entityID, BioseqPtr bsp, Uint4 itemID)
4618 
4619 {
4620   SMFeatItemPtr PNTR  array;
4621   BioseqExtraPtr      bspextra;
4622   SMFeatBlockPtr      curr;
4623   Int2                i;
4624   SMFeatItemPtr       item;
4625   Int4                L;
4626   Int4                mid;
4627   ObjMgrDataPtr       omdp;
4628   Int4                R;
4629 
4630   if (entityID > 0) {
4631     omdp = ObjMgrGetData (entityID);
4632     if (omdp == NULL) return NULL;
4633   } else {
4634     if (bsp == NULL) return NULL;
4635     omdp = SeqMgrGetOmdpForBioseq (bsp);
4636     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
4637   }
4638   bspextra = (BioseqExtraPtr) omdp->extradata;
4639   if (bspextra == NULL) return NULL;
4640 
4641   /* first try array sorted by itemID value */
4642 
4643   array = bspextra->featsByID;
4644   if (array != NULL && bspextra->numfeats > 0) {
4645     L = 0;
4646     R = bspextra->numfeats - 1;
4647     while (L < R) {
4648       mid = (L + R) / 2;
4649       item = array [mid];
4650       if (item != NULL && item->itemID < itemID) {
4651         L = mid + 1;
4652       } else {
4653         R = mid;
4654       }
4655     }
4656 
4657     item = array [R];
4658     if (item->itemID == itemID) return item;
4659   }
4660 
4661   /* now look in feature indices for cached feature information */
4662 
4663   curr = bspextra->featlisthead;
4664   while (curr != NULL) {
4665 
4666     if (curr->data != NULL) {
4667       for (i = 0; i < curr->index; i++) {
4668         item = &(curr->data [i]);
4669         if (item->itemID == itemID && (! item->ignore)) return item;
4670       }
4671     }
4672 
4673     curr = curr->next;
4674   }
4675 
4676   return NULL;
4677 }
4678 
ItemIDfromAnnotDesc(AnnotDescPtr adp)4679 static Int4 ItemIDfromAnnotDesc (AnnotDescPtr adp)
4680 
4681 {
4682   ObjValNodePtr  ovp;
4683 
4684   if (adp == NULL || adp->extended == 0) return 0;
4685   ovp = (ObjValNodePtr) adp;
4686   return ovp->idx.itemID;
4687 }
4688 
SeqMgrFindAnnotDescByID(Uint2 entityID,Uint4 itemID)4689 NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrFindAnnotDescByID (Uint2 entityID, Uint4 itemID)
4690 
4691 {
4692   AnnotDescPtr PNTR  array;
4693   BioseqExtraPtr     bspextra;
4694   AnnotDescPtr       item;
4695   Int4               L;
4696   Int4               mid;
4697   ObjMgrDataPtr      omdp;
4698   Int4               R;
4699 
4700   if (entityID < 1) return NULL;
4701   omdp = ObjMgrGetData (entityID);
4702   if (omdp == NULL) return NULL;
4703   bspextra = (BioseqExtraPtr) omdp->extradata;
4704   if (bspextra == NULL) return NULL;
4705 
4706   array = bspextra->annotDescByID;
4707   if (array != NULL && bspextra->numannotdesc > 0) {
4708     L = 0;
4709     R = bspextra->numannotdesc - 1;
4710     while (L < R) {
4711       mid = (L + R) / 2;
4712       item = array [mid];
4713       if (item != NULL && ItemIDfromAnnotDesc (item) < itemID) {
4714         L = mid + 1;
4715       } else {
4716         R = mid;
4717       }
4718     }
4719 
4720     item = array [R];
4721     if (ItemIDfromAnnotDesc (item) == itemID) return item;
4722   }
4723 
4724   return NULL;
4725 }
4726 
SeqMgrFindSeqAlignByID(Uint2 entityID,Uint4 itemID)4727 NLM_EXTERN SeqAlignPtr LIBCALL SeqMgrFindSeqAlignByID (Uint2 entityID, Uint4 itemID)
4728 
4729 {
4730   BioseqExtraPtr  bspextra;
4731   ObjMgrDataPtr   omdp;
4732 
4733   if (entityID < 1) return NULL;
4734   omdp = ObjMgrGetData (entityID);
4735   if (omdp == NULL) return NULL;
4736   bspextra = (BioseqExtraPtr) omdp->extradata;
4737   if (bspextra == NULL) return NULL;
4738 
4739   if (bspextra->alignsByID != NULL && bspextra->numaligns > 0 &&
4740       itemID > 0 && itemID <= (Uint4) bspextra->numaligns) {
4741     return bspextra->alignsByID [itemID];
4742   }
4743 
4744   return NULL;
4745 }
4746 
SeqMgrGetDesiredFeature(Uint2 entityID,BioseqPtr bsp,Uint4 itemID,Uint4 index,SeqFeatPtr sfp,SeqMgrFeatContext PNTR context)4747 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetDesiredFeature (Uint2 entityID, BioseqPtr bsp,
4748                                                        Uint4 itemID, Uint4 index, SeqFeatPtr sfp,
4749                                                        SeqMgrFeatContext PNTR context)
4750 
4751 {
4752   SMFeatItemPtr PNTR  array;
4753   BioseqExtraPtr      bspextra;
4754   SeqFeatPtr          curr;
4755   SMFeatItemPtr       item = NULL;
4756   ObjMgrDataPtr       omdp;
4757 
4758   if (context != NULL) {
4759     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
4760   }
4761   if (entityID > 0) {
4762     omdp = ObjMgrGetData (entityID);
4763     if (omdp == NULL) return NULL;
4764   } else {
4765     if (bsp == NULL) return NULL;
4766     omdp = SeqMgrGetOmdpForBioseq (bsp);
4767     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
4768   }
4769   bspextra = (BioseqExtraPtr) omdp->extradata;
4770   if (bspextra == NULL) return NULL;
4771 
4772   if (itemID > 0) {
4773     item = SeqMgrFindSMFeatItemByID (entityID, bsp, itemID);
4774   } else if (index > 0) {
4775     array = bspextra->featsByPos;
4776     if (array != NULL && bspextra->numfeats > 0 && index <= (Uint4) bspextra->numfeats) {
4777       item = array [index - 1];
4778     }
4779   } else if (sfp != NULL) {
4780     item = SeqMgrFindSMFeatItemPtr (sfp);
4781   }
4782   if (item == NULL) return NULL;
4783 
4784   entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
4785 
4786   curr = item->sfp;
4787   if (curr != NULL && context != NULL && (! item->ignore)) {
4788     context->entityID = entityID;
4789     context->itemID = item->itemID;
4790     context->sfp = curr;
4791     context->sap = item->sap;
4792     context->bsp = item->bsp;
4793     context->label = item->label;
4794     context->left = item->left;
4795     context->right = item->right;
4796     context->dnaStop = item->dnaStop;
4797     context->partialL = item->partialL;
4798     context->partialR = item->partialR;
4799     context->external = item->external;
4800     context->farloc = item->farloc;
4801     context->bad_order = item->bad_order;
4802     context->mixed_strand = item->mixed_strand;
4803     context->ts_image = item->ts_image;
4804     context->strand = item->strand;
4805     if (curr != NULL) {
4806       context->seqfeattype = curr->data.choice;
4807     } else {
4808       context->seqfeattype = FindFeatFromFeatDefType (item->subtype);
4809     }
4810     context->featdeftype = item->subtype;
4811     context->numivals = item->numivals;
4812     context->ivals = item->ivals;
4813     context->userdata = NULL;
4814     context->omdp = (Pointer) omdp;
4815     context->index = item->index + 1;
4816   }
4817   return curr;
4818 }
4819 
4820 /*
4821 static ValNodePtr DesiredDescriptorPerBioseq (SeqEntryPtr sep, BioseqPtr bsp,
4822                                               Uint2 itemID, Uint2 index, ValNodePtr sdp,
4823                                               SeqMgrDescContext PNTR context)
4824 
4825 {
4826   BioseqSetPtr  bssp;
4827   ValNodePtr    curr = NULL;
4828   SeqEntryPtr   tmp;
4829 
4830   if (sep != NULL) {
4831     if (IS_Bioseq (sep)) {
4832       bsp = (BioseqPtr) sep->data.ptrvalue;
4833       if (bsp == NULL) return NULL;
4834     } else if (IS_Bioseq_set (sep)) {
4835       bssp = (BioseqSetPtr) sep->data.ptrvalue;
4836       if (bssp == NULL) return NULL;
4837       for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
4838         curr = DesiredDescriptorPerBioseq (tmp, NULL, itemID, index, sdp, context);
4839         if (curr != NULL) return curr;
4840       }
4841       return NULL;
4842     }
4843   }
4844 
4845   if (bsp == NULL) return NULL;
4846 
4847   while ((curr = SeqMgrGetNextDescriptor (bsp, curr, 0, context)) != NULL) {
4848     if (itemID > 0 && itemID == context->itemID) return curr;
4849     if (index > 0 && index == context->index) return curr;
4850     if (sdp != NULL && sdp == curr) return curr;
4851   }
4852 
4853   return NULL;
4854 }
4855 
4856 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetDesiredDescriptor (Uint2 entityID, BioseqPtr bsp,
4857                                                           Uint2 itemID, Uint2 index, ValNodePtr sdp,
4858                                                           SeqMgrDescContext PNTR context)
4859 
4860 {
4861   SeqMgrDescContext  dfaultcontext;
4862   SeqEntryPtr        sep;
4863 
4864   if (context == NULL) {
4865     context = &dfaultcontext;
4866   }
4867   if (context != NULL) {
4868     MemSet ((Pointer) context, 0, sizeof (SeqMgrDescContext));
4869   }
4870 
4871   if (entityID > 0) {
4872     sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4873     if (sep == NULL) return NULL;
4874     return DesiredDescriptorPerBioseq (sep, NULL, itemID, index, sdp, context);
4875   } else if (bsp != NULL) {
4876     return DesiredDescriptorPerBioseq (NULL, bsp, itemID, index, sdp, context);
4877   }
4878 
4879   return NULL;
4880 }
4881 */
4882 
SeqMgrFindSMDescItemByID(BioseqExtraPtr bspextra,Uint4 itemID)4883 static SMDescItemPtr SeqMgrFindSMDescItemByID (BioseqExtraPtr bspextra, Uint4 itemID)
4884 
4885 {
4886   SMDescItemPtr PNTR  array;
4887   SMDescItemPtr       item;
4888   Int4                L;
4889   Int4                mid;
4890   Int4                R;
4891 
4892   if (bspextra == NULL) return NULL;
4893 
4894   array = bspextra->descrsByID;
4895   if (array != NULL && bspextra->numdescs > 0) {
4896     L = 0;
4897     R = bspextra->numdescs - 1;
4898     while (L < R) {
4899       mid = (L + R) / 2;
4900       item = array [mid];
4901       if (item != NULL && item->itemID < itemID) {
4902         L = mid + 1;
4903       } else {
4904         R = mid;
4905       }
4906     }
4907 
4908     item = array [R];
4909     if (item->itemID == itemID) return item;
4910   }
4911 
4912   return NULL;
4913 }
4914 
SeqMgrFindSMDescItemBySdp(BioseqExtraPtr bspextra,SeqDescrPtr sdp)4915 static SMDescItemPtr SeqMgrFindSMDescItemBySdp (BioseqExtraPtr bspextra, SeqDescrPtr sdp)
4916 
4917 {
4918   SMDescItemPtr PNTR  array;
4919   SMDescItemPtr       item;
4920   Int4                L;
4921   Int4                mid;
4922   Int4                R;
4923 
4924   if (bspextra == NULL) return NULL;
4925 
4926   array = bspextra->descrsBySdp;
4927   if (array != NULL && bspextra->numdescs > 0) {
4928     L = 0;
4929     R = bspextra->numdescs - 1;
4930     while (L < R) {
4931       mid = (L + R) / 2;
4932       item = array [mid];
4933       if (item != NULL && item->sdp < sdp) {
4934         L = mid + 1;
4935       } else {
4936         R = mid;
4937       }
4938     }
4939 
4940     item = array [R];
4941     if (item->sdp == sdp) return item;
4942   }
4943 
4944   return NULL;
4945 }
4946 
SeqMgrFindSMDescItemByIndex(BioseqExtraPtr bspextra,Uint4 index)4947 static SMDescItemPtr SeqMgrFindSMDescItemByIndex (BioseqExtraPtr bspextra, Uint4 index)
4948 
4949 {
4950   SMDescItemPtr PNTR  array;
4951   SMDescItemPtr       item;
4952   Int4                L;
4953   Int4                mid;
4954   Int4                R;
4955 
4956   if (bspextra == NULL) return NULL;
4957 
4958   array = bspextra->descrsByIndex;
4959   if (array != NULL && bspextra->numdescs > 0) {
4960     L = 0;
4961     R = bspextra->numdescs - 1;
4962     while (L < R) {
4963       mid = (L + R) / 2;
4964       item = array [mid];
4965       if (item != NULL && item->index < index) {
4966         L = mid + 1;
4967       } else {
4968         R = mid;
4969       }
4970     }
4971 
4972     item = array [R];
4973     if (item->index == index) return item;
4974   }
4975 
4976   return NULL;
4977 }
4978 
SeqMgrGetDesiredDescriptor(Uint2 entityID,BioseqPtr bsp,Uint4 itemID,Uint4 index,ValNodePtr sdp,SeqMgrDescContext PNTR context)4979 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetDesiredDescriptor (Uint2 entityID, BioseqPtr bsp,
4980                                                           Uint4 itemID, Uint4 index, ValNodePtr sdp,
4981                                                           SeqMgrDescContext PNTR context)
4982 
4983 {
4984   BioseqExtraPtr     bspextra;
4985   SeqMgrDescContext  dfaultcontext;
4986   ObjMgrDataPtr      omdp = NULL;
4987   SMDescItemPtr      sdip = NULL;
4988   SeqEntryPtr        sep;
4989 
4990   if (context == NULL) {
4991     context = &dfaultcontext;
4992   }
4993   if (context != NULL) {
4994     MemSet ((Pointer) context, 0, sizeof (SeqMgrDescContext));
4995   }
4996 
4997   if (entityID > 0) {
4998     sep = SeqMgrGetTopSeqEntryForEntity (entityID);
4999     if (sep != NULL) {
5000       omdp = SeqMgrGetOmdpForPointer (sep->data.ptrvalue);
5001     }
5002   } else if (bsp != NULL) {
5003     omdp = SeqMgrGetOmdpForBioseq (bsp);
5004     entityID = bsp->idx.entityID;
5005     if (entityID < 1) {
5006       entityID = ObjMgrGetEntityIDForPointer (bsp);
5007     }
5008   }
5009 
5010   if (omdp == NULL) return NULL;
5011   bspextra = (BioseqExtraPtr) omdp->extradata;
5012   if (bspextra == NULL) return NULL;
5013 
5014   if (itemID > 0) {
5015     sdip = SeqMgrFindSMDescItemByID (bspextra, itemID);
5016   } else if (index > 0) {
5017     sdip = SeqMgrFindSMDescItemByIndex (bspextra, index);
5018   } else if (sdp != NULL) {
5019     sdip = SeqMgrFindSMDescItemBySdp (bspextra, sdp);
5020   }
5021 
5022   if (sdip == NULL) return NULL;
5023 
5024   context->entityID = entityID;
5025   context->itemID = sdip->itemID;
5026   context->sdp = sdip->sdp;
5027   context->sep = sdip->sep;
5028   context->level = sdip->level;
5029   context->seqdesctype = sdip->seqdesctype;
5030   context->userdata = NULL;
5031   context->omdp = omdp;
5032   context->index = sdip->index;
5033 
5034   return sdip->sdp;
5035 }
5036 
SeqMgrGetDesiredAnnotDesc(Uint2 entityID,BioseqPtr bsp,Uint4 itemID,SeqMgrAndContext PNTR context)5037 NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrGetDesiredAnnotDesc (
5038   Uint2 entityID,
5039   BioseqPtr bsp,
5040   Uint4 itemID,
5041   SeqMgrAndContext PNTR context
5042 )
5043 
5044 {
5045   AnnotDescPtr      adp = NULL;
5046   BioseqExtraPtr    bspextra;
5047   SeqMgrAndContext  dfaultcontext;
5048   ObjMgrDataPtr     omdp = NULL;
5049   SeqEntryPtr       sep;
5050 
5051   if (context == NULL) {
5052     context = &dfaultcontext;
5053   }
5054   if (context != NULL) {
5055     MemSet ((Pointer) context, 0, sizeof (SeqMgrAndContext));
5056   }
5057 
5058   if (entityID > 0) {
5059     sep = SeqMgrGetTopSeqEntryForEntity (entityID);
5060     if (sep != NULL) {
5061       omdp = SeqMgrGetOmdpForPointer (sep->data.ptrvalue);
5062     }
5063   } else if (bsp != NULL) {
5064     omdp = SeqMgrGetOmdpForBioseq (bsp);
5065     entityID = bsp->idx.entityID;
5066     if (entityID < 1) {
5067       entityID = ObjMgrGetEntityIDForPointer (bsp);
5068     }
5069   }
5070 
5071   if (omdp == NULL) return NULL;
5072   bspextra = (BioseqExtraPtr) omdp->extradata;
5073   if (bspextra == NULL) return NULL;
5074 
5075   if (itemID > 0) {
5076     adp = SeqMgrFindAnnotDescByID (entityID, itemID);
5077   }
5078 
5079   if (adp == NULL) return NULL;
5080 
5081   context->entityID = entityID;
5082   context->itemID = itemID;
5083   context->adp = adp;
5084   context->annotdesctype = adp->choice;
5085   context->userdata = NULL;
5086   context->omdp = omdp;
5087   context->index = itemID;
5088 
5089   return adp;
5090 }
5091 
5092 /*****************************************************************************
5093 *
5094 *   RecordFeaturesInBioseqs callback explores bioseqs, bioseq sets, and features,
5095 *     keeping a running total of the descriptor item counts, and records specific
5096 *     information about features on each bioseq
5097 *
5098 *****************************************************************************/
5099 
5100 typedef struct extraindex {
5101   SeqEntryPtr     topsep;
5102   BioseqPtr       lastbsp;
5103   SeqAnnotPtr     lastsap;
5104   BioseqSetPtr    lastbssp;
5105   ValNodePtr      alignhead;
5106   ValNodePtr      lastalign;
5107   ValNodePtr      adphead;
5108   ValNodePtr      lastadp;
5109   SMSeqIdxPtr     segpartail;
5110   Int4            cumulative;
5111   Uint4           bspcount;
5112   Uint4           aligncount;
5113   Uint4           descrcount;
5114   Uint4           featcount;
5115   Uint4           adpcount;
5116   Int4            seqlitid;
5117   Boolean         flip;
5118 } ExtraIndex, PNTR ExtraIndexPtr;
5119 
SetDescriptorCounts(ValNodePtr sdp,ExtraIndexPtr exindx,Pointer thisitem,Uint2 thistype)5120 static void SetDescriptorCounts (ValNodePtr sdp, ExtraIndexPtr exindx, Pointer thisitem, Uint2 thistype)
5121 
5122 {
5123   Uint4          count = 0;
5124   ObjMgrDataPtr  omdp;
5125 
5126   /* count bioseq or bioseq set descriptors, to calculate omdp.lastDescrItemID */
5127 
5128   if (sdp == NULL || exindx == NULL) return;
5129   if (thistype == OBJ_BIOSEQ) {
5130     omdp = SeqMgrGetOmdpForBioseq ((BioseqPtr) thisitem);
5131   } else {
5132     omdp = SeqMgrGetOmdpForPointer (thisitem);
5133   }
5134   if (omdp == NULL) return;
5135 
5136   omdp->lastDescrItemID = exindx->descrcount;
5137   while (sdp != NULL) {
5138     count++;
5139     sdp = sdp->next;
5140   }
5141   exindx->descrcount += count;
5142 }
5143 
CreateBioseqExtraBlock(ObjMgrDataPtr omdp,BioseqPtr bsp)5144 static void CreateBioseqExtraBlock (ObjMgrDataPtr omdp, BioseqPtr bsp)
5145 
5146 {
5147   BioseqExtraPtr  bspextra;
5148 
5149   if (omdp == NULL || omdp->extradata != NULL) return;
5150 
5151   bspextra = (BioseqExtraPtr) MemNew (sizeof (BioseqExtra));
5152   omdp->extradata = (Pointer) bspextra;
5153   if (bspextra == NULL) return;
5154 
5155   omdp->reapextra = SeqMgrReapBioseqExtraFunc;
5156   omdp->reloadextra = SeqMgrReloadBioseqExtraFunc;
5157   omdp->freeextra = SeqMgrFreeBioseqExtraFunc;
5158 
5159   bspextra->bsp = bsp;
5160   bspextra->omdp = omdp;
5161   bspextra->min = INT4_MAX;
5162   bspextra->processed = UINT1_MAX;
5163 }
5164 
CountAlignmentsProc(GatherObjectPtr gop)5165 static Boolean CountAlignmentsProc (GatherObjectPtr gop)
5166 
5167 {
5168   ExtraIndexPtr  exindx;
5169 
5170   if (gop == NULL || gop->itemtype != OBJ_SEQALIGN) return TRUE;
5171   exindx = (ExtraIndexPtr) gop->userdata;
5172   if (exindx == NULL) return TRUE;
5173   (exindx->aligncount)++;
5174   return TRUE;
5175 }
5176 
CollectAlignsProc(GatherObjectPtr gop)5177 static Boolean CollectAlignsProc (GatherObjectPtr gop)
5178 
5179 {
5180   SeqAlignPtr PNTR  alignsByID;
5181 
5182   if (gop == NULL || gop->itemtype != OBJ_SEQALIGN) return TRUE;
5183   alignsByID = (SeqAlignPtr PNTR) gop->userdata;
5184   if (alignsByID == NULL) return TRUE;
5185   alignsByID [gop->itemID] = (SeqAlignPtr) gop->dataptr;
5186   return TRUE;
5187 }
5188 
SeqMgrIndexAlignments(Uint2 entityID)5189 NLM_EXTERN void LIBCALL SeqMgrIndexAlignments (Uint2 entityID)
5190 
5191 {
5192   SeqAlignPtr PNTR  alignsByID;
5193   BioseqExtraPtr    bspextra;
5194   ExtraIndex        exind;
5195   Boolean           objMgrFilter [OBJ_MAX];
5196   ObjMgrDataPtr     omdp;
5197 
5198   if (entityID == 0) return;
5199 
5200   /* count alignments */
5201 
5202   exind.topsep = NULL;
5203   exind.lastbsp = NULL;
5204   exind.lastsap = NULL;
5205   exind.lastbssp = NULL;
5206   exind.alignhead = NULL;
5207   exind.lastalign = NULL;
5208   exind.adphead = NULL;
5209   exind.lastadp = NULL;
5210   exind.segpartail = NULL;
5211   exind.bspcount = 0;
5212   exind.aligncount = 0;
5213   exind.descrcount = 0;
5214   exind.featcount = 0;
5215   exind.adpcount = 0;
5216   exind.seqlitid = 0;
5217 
5218   MemSet ((Pointer) objMgrFilter, 0, sizeof (objMgrFilter));
5219   objMgrFilter [OBJ_SEQALIGN] = TRUE;
5220   GatherObjectsInEntity (entityID, 0, NULL, CountAlignmentsProc, (Pointer) &exind, objMgrFilter);
5221 
5222   omdp = ObjMgrGetData (entityID);
5223   if (omdp != NULL) {
5224 
5225     CreateBioseqExtraBlock (omdp, NULL);
5226     bspextra = (BioseqExtraPtr) omdp->extradata;
5227     if (bspextra != NULL) {
5228 
5229       /* get rid of previous lookup array */
5230 
5231       bspextra->alignsByID = MemFree (bspextra->alignsByID);
5232       bspextra->numaligns = 0;
5233 
5234       /* alignment ID to SeqAlignPtr index always goes on top of entity */
5235 
5236       if (exind.aligncount > 0) {
5237         alignsByID = (SeqAlignPtr PNTR) MemNew (sizeof (SeqAlignPtr) * (exind.aligncount + 2));
5238         if (alignsByID != NULL) {
5239 
5240           /* copy SeqAlignPtr for each itemID */
5241 
5242           GatherObjectsInEntity (entityID, 0, NULL, CollectAlignsProc, (Pointer) alignsByID, objMgrFilter);
5243 
5244           bspextra->alignsByID = alignsByID;
5245           bspextra->numaligns = exind.aligncount;
5246         }
5247       }
5248     }
5249   }
5250 }
5251 
SeqIdWithinBioseq(BioseqPtr bsp,SeqLocPtr slp)5252 static SeqIdPtr SeqIdWithinBioseq (BioseqPtr bsp, SeqLocPtr slp)
5253 
5254 {
5255   SeqIdPtr  a;
5256   SeqIdPtr  b;
5257 
5258   if (bsp == NULL || slp == NULL) return NULL;
5259   a = SeqLocId (slp);
5260   if (a == NULL) return NULL;
5261   for (b = bsp->id; b != NULL; b = b->next) {
5262     if (SeqIdComp (a, b) == SIC_YES) return b;
5263   }
5264   return NULL;
5265 }
5266 
5267 /*
5268 static void FindGPS (BioseqSetPtr bssp, Pointer userdata)
5269 
5270 {
5271   BoolPtr  is_gpsP;
5272 
5273   if (bssp == NULL || bssp->_class != BioseqseqSet_class_gen_prod_set) return;
5274   is_gpsP = (BoolPtr) userdata;
5275   *is_gpsP = TRUE;
5276 }
5277 */
5278 
ProcessFeatureProducts(SeqFeatPtr sfp,Uint4 itemID,GatherObjectPtr gop)5279 static void ProcessFeatureProducts (SeqFeatPtr sfp, Uint4 itemID, GatherObjectPtr gop)
5280 
5281 {
5282   BioseqPtr         bsp;
5283   BioseqExtraPtr    bspextra;
5284   BioseqSetPtr      bssp;
5285   Char              buf [81];
5286   CharPtr           ctmp;
5287   Int4              diff;
5288   GatherContext     gc;
5289   GatherContextPtr  gcp;
5290   Boolean           is_gps;
5291   CharPtr           loclbl;
5292   Int4              min;
5293   ObjMgrDataPtr     omdp;
5294   Uint1             processed;
5295   CharPtr           prodlbl;
5296   ProtRefPtr        prp;
5297   SeqFeatPtr        prt;
5298   CharPtr           ptmp;
5299   SeqAnnotPtr       sap;
5300   SeqIdPtr          sip;
5301   SeqLocPtr         slp;
5302   ValNode           vn;
5303 
5304   if (sfp == NULL || sfp->product == NULL) return;
5305   if (sfp->data.choice != SEQFEAT_CDREGION &&
5306       sfp->data.choice != SEQFEAT_RNA &&
5307       sfp->data.choice != SEQFEAT_PROT) return;
5308 
5309   sip = SeqLocId (sfp->product);
5310   if (sip == NULL) return;
5311   bsp = BioseqFind (sip);
5312   if (bsp == NULL) return;
5313   omdp = SeqMgrGetOmdpForBioseq (bsp);
5314   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
5315 
5316   bspextra = (BioseqExtraPtr) omdp->extradata;
5317   if (bspextra == NULL) {
5318     CreateBioseqExtraBlock (omdp, bsp);
5319     bspextra = (BioseqExtraPtr) omdp->extradata;
5320   }
5321   if (bspextra == NULL) return;
5322 
5323   /* cds or rna reference stored in product bioseq's omdp.cdsOrRnaFeat */
5324 
5325   if (bspextra->cdsOrRnaFeat != NULL && bspextra->cdsOrRnaFeat != sfp) {
5326     FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
5327     ctmp = SeqLocPrint (sfp->location);
5328     loclbl = ctmp;
5329     if (loclbl == NULL) {
5330       loclbl = "?";
5331     }
5332     ptmp = SeqLocPrint (sfp->product);
5333     prodlbl = ptmp;
5334     if (prodlbl == NULL) {
5335       prodlbl = "?";
5336     }
5337     /*
5338     {
5339       GatherContext     gc;
5340       GatherContextPtr  gcp;
5341       Boolean           is_gps = FALSE;
5342       SeqEntryPtr       sep;
5343       MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
5344       gcp = &gc;
5345       gc.entityID = gop->entityID;
5346       gc.itemID = gop->itemID;
5347       gc.thistype = gop->itemtype;
5348       sep = GetTopSeqEntryForEntityID (gop->entityID);
5349       VisitSetsInSep (sep, (Pointer) &is_gps, FindGPS);
5350       if (! is_gps) {
5351         ErrPostItem (SEV_WARNING, 0, 0,
5352                      "SeqMgr indexing cds or rna progenitor already set - Feature: %s - Location [%s] - Product [%s]",
5353                      buf, loclbl, prodlbl);
5354       }
5355     }
5356     */
5357     is_gps = FALSE;
5358     if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
5359       bssp = (BioseqSetPtr) bsp->idx.parentptr;
5360       while (bssp != NULL) {
5361         if (bssp->_class == BioseqseqSet_class_gen_prod_set) {
5362           is_gps = TRUE;
5363         }
5364         if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
5365           bssp = (BioseqSetPtr) bssp->idx.parentptr;
5366         } else {
5367           bssp = NULL;
5368         }
5369       }
5370     }
5371     if (! is_gps) {
5372       MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
5373       gcp = &gc;
5374       gc.entityID = gop->entityID;
5375       gc.itemID = gop->itemID;
5376       gc.thistype = gop->itemtype;
5377       ErrPostItem (SEV_WARNING, 0, 0,
5378                    "SeqMgr indexing cds or rna progenitor already set - Feature: %s - Location [%s] - Product [%s]",
5379                    buf, loclbl, prodlbl);
5380     }
5381     MemFree (ctmp);
5382     MemFree (ptmp);
5383   }
5384 
5385   /* if (omdp->tempload == TL_NOT_TEMP) { */
5386   /* check bsp mol against feature type  */
5387   if (bspextra->cdsOrRnaFeat != NULL) {
5388     /* do not override if set */
5389   } else if (sfp->data.choice == SEQFEAT_CDREGION && ISA_aa (bsp->mol)) {
5390     bspextra->cdsOrRnaFeat = sfp;
5391   } else if (sfp->data.choice == SEQFEAT_RNA && ISA_na (bsp->mol)) {
5392     bspextra->cdsOrRnaFeat = sfp;
5393   } else if (sfp->data.choice == SEQFEAT_PROT && ISA_aa (bsp->mol)) {
5394     bspextra->cdsOrRnaFeat = sfp;
5395   }
5396   /* } */
5397 
5398   /* add to prodlisthead list for gather by get_feats_product */
5399 
5400   ValNodeAddPointer (&(bspextra->prodlisthead), 0, (Pointer) sfp);
5401 
5402   if (sfp->data.choice == SEQFEAT_RNA || sfp->data.choice == SEQFEAT_PROT) return;
5403 
5404   /* if protFeat exists it was set by exhaustive gather on protein bioseq */
5405 
5406   if (bspextra->protFeat != NULL) return;
5407 
5408   /* calculate largest protein feature on cds's product bioseq */
5409 
5410   min = INT4_MAX;
5411   processed = UINT1_MAX;
5412   vn.choice = SEQLOC_WHOLE;
5413   vn.data.ptrvalue = (Pointer) bsp->id;
5414   vn.next = NULL;
5415   slp = (Pointer) (&vn);
5416 
5417   sap = bsp->annot;
5418   while (sap != NULL) {
5419     if (sap->type == 1) {
5420       prt = (SeqFeatPtr) sap->data;
5421       while (prt != NULL) {
5422         if (prt->data.choice == SEQFEAT_PROT) {
5423           prp = (ProtRefPtr) prt->data.value.ptrvalue;
5424 
5425           /* get SeqId in bioseq that matches SeqId used for location */
5426 
5427           vn.data.ptrvalue = SeqIdWithinBioseq (bsp, prt->location);
5428 
5429           diff = SeqLocAinB (prt->location, slp);
5430           if (diff >= 0 && prp != NULL) {
5431             if (diff < min) {
5432               min = diff;
5433               processed = prp->processed;
5434               /* if (omdp->tempload == TL_NOT_TEMP) { */
5435                 bspextra->protFeat = prt;
5436               /* } */
5437             } else if (diff == min) {
5438               /* unprocessed 0 preferred over preprotein 1 preferred over mat peptide 2 */
5439               if ( /* prp != NULL && prp->processed == 0 */ prp->processed < processed ) {
5440                 min = diff;
5441                 processed = prp->processed;
5442                 bspextra->protFeat = prt;
5443               }
5444             }
5445           }
5446         }
5447         prt = prt->next;
5448       }
5449     }
5450     sap = sap->next;
5451   }
5452 }
5453 
5454 
SimpleIvalsCalculation(SeqLocPtr slp,BioseqPtr bsp,Boolean flip,SMFeatItemPtr item)5455 static Boolean SimpleIvalsCalculation (SeqLocPtr slp, BioseqPtr bsp, Boolean flip, SMFeatItemPtr item)
5456 {
5457   SeqIntPtr sint;
5458 
5459   if (!flip && slp != NULL && bsp != NULL && item != NULL && slp->choice == SEQLOC_INT
5460       && (sint = (SeqIntPtr) slp->data.ptrvalue) != NULL
5461       && SeqIdIn (sint->id, bsp->id)) {
5462     item->strand = sint->strand;
5463     item->numivals = 1;
5464     item->ivals = MemNew (sizeof (Int4) * 2);
5465     if (item->strand == Seq_strand_minus) {
5466       item->ivals[0] = sint->to;
5467       item->ivals[1] = sint->from;
5468     } else {
5469       item->ivals[0] = sint->from;
5470       item->ivals[1] = sint->to;
5471     }
5472     return TRUE;
5473   } else {
5474     return FALSE;
5475   }
5476 }
5477 
RecordOneFeature(BioseqExtraPtr bspextra,ObjMgrDataPtr omdp,BioseqPtr bsp,ExtraIndexPtr exindx,SeqFeatPtr sfp,Int4 left,Int4 right,Uint4 itemID,Uint2 subtype,Boolean external,Boolean farloc,Boolean bad_order,Boolean mixed_strand,Boolean ignore,Boolean ts_image)5478 static void RecordOneFeature (BioseqExtraPtr bspextra, ObjMgrDataPtr omdp,
5479                               BioseqPtr bsp, ExtraIndexPtr exindx, SeqFeatPtr sfp,
5480                               Int4 left, Int4 right, Uint4 itemID, Uint2 subtype,
5481                               Boolean external, Boolean farloc, Boolean bad_order,
5482                               Boolean mixed_strand, Boolean ignore, Boolean ts_image)
5483 
5484 {
5485   Char            buf [129];
5486   SMFeatBlockPtr  curr;
5487   Int4            from;
5488   Int2            i;
5489   SMFeatItemPtr   item;
5490   Int4Ptr         ivals;
5491   SeqLocPtr       loc;
5492   SMFeatBlockPtr  next;
5493   Int2            numivals = 0;
5494   CharPtr         ptr;
5495   SeqIdPtr        sip;
5496   SeqLocPtr       slp = NULL;
5497   Uint1           strand;
5498   Int4            swap;
5499   Int4            to;
5500 
5501   if (bspextra == NULL || omdp == NULL || bsp == NULL || exindx == NULL || sfp == NULL) return;
5502 
5503   if (bspextra->featlisttail != NULL) {
5504 
5505     /* just in case blocksize should was not set for some reason */
5506 
5507     if (bspextra->blocksize < 1) {
5508       bspextra->blocksize = 5;
5509     }
5510 
5511     curr = bspextra->featlisttail;
5512     if (curr->index >= bspextra->blocksize) {
5513 
5514       /* allocate next chunk in linked list of blocks */
5515 
5516       next = (SMFeatBlockPtr) MemNew (sizeof (SMFeatBlock));
5517       curr->next = next;
5518 
5519       if (next != NULL) {
5520         bspextra->featlisttail = next;
5521         curr = next;
5522       }
5523     }
5524 
5525     if (curr->index < bspextra->blocksize) {
5526 
5527       /* allocate data block if not yet done for this chunk */
5528 
5529       if (curr->data == NULL) {
5530         curr->data = (SMFeatItemPtr) MemNew (sizeof (SMFeatItem) * (size_t) (bspextra->blocksize));
5531       }
5532 
5533       /* now record desired information about current feature */
5534 
5535       if (curr->data != NULL) {
5536         item = &(curr->data [curr->index]);
5537         /* if (omdp->tempload == TL_NOT_TEMP) { */
5538           item->sfp = sfp;
5539           item->sap = exindx->lastsap;
5540           item->bsp = bsp;
5541         /* } */
5542         FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
5543         ptr = buf;
5544         if (sfp->data.choice == SEQFEAT_RNA) {
5545           ptr = StringStr (buf, "RNA-");
5546           if (ptr != NULL) {
5547             ptr += 4;
5548           } else {
5549             ptr = buf;
5550           }
5551         }
5552         item->label = StringSaveNoNull (ptr);
5553         item->left = left;
5554         item->right = right;
5555         if (exindx->flip) {
5556           item->left = bsp->length - right;
5557           item->right = bsp->length - left;
5558         }
5559         item->dnaStop = -1;
5560         CheckSeqLocForPartial (sfp->location, &(item->partialL), &(item->partialR));
5561         item->external = external;
5562         item->farloc = farloc;
5563         item->bad_order = bad_order;
5564         item->mixed_strand = mixed_strand;
5565         item->ts_image = ts_image;
5566         /*
5567         item->strand = SeqLocStrand (sfp->location);
5568         if (exindx->flip) {
5569           item->strand = StrandCmp (item->strand);
5570         }
5571         */
5572         if (subtype == 0) {
5573           subtype = FindFeatDefType (sfp);
5574         }
5575         item->subtype = subtype;
5576         item->itemID = itemID;
5577         item->ignore = ignore;
5578         item->overlap = -1;
5579 
5580         /* record start/stop pairs of intervals on target bioseq */
5581 
5582         /*
5583         single_interval = (Boolean) (item->subtype == FEATDEF_GENE ||
5584                                      item->subtype == FEATDEF_PUB);
5585         */
5586 
5587         if (SimpleIvalsCalculation (sfp->location, bsp, exindx->flip, item)) {
5588           /* don't need to do complex merging to calculate intervals */
5589         } else {
5590           loc = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE);
5591 
5592           if (exindx->flip) {
5593             sip = SeqIdFindBest (bsp->id, 0);
5594             slp = SeqLocCopyRegion (sip, loc, bsp, 0, bsp->length - 1, Seq_strand_minus, FALSE);
5595             SeqLocFree (loc);
5596             loc = slp;
5597           }
5598           /* record strand relative to segmented parent */
5599           item->strand = SeqLocStrand (loc);
5600           if (exindx->flip) {
5601             item->strand = StrandCmp (item->strand);
5602           }
5603           strand = item->strand;
5604 
5605           slp = NULL;
5606           while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
5607             numivals++;
5608           }
5609           if (numivals > 0) {
5610             ivals = MemNew (sizeof (Int4) * (numivals * 2));
5611             item->ivals = ivals;
5612             item->numivals = numivals;
5613             slp = NULL;
5614             i = 0;
5615             while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
5616               from = SeqLocStart (slp);
5617               to = SeqLocStop (slp);
5618               if (strand == Seq_strand_minus) {
5619                 swap = from;
5620                 from = to;
5621                 to = swap;
5622               }
5623               ivals [i] = from;
5624               i++;
5625               ivals [i] = to;
5626               i++;
5627             }
5628           }
5629           SeqLocFree (loc);
5630         }
5631       }
5632 
5633       /* increment count on current block */
5634 
5635       (curr->index)++;
5636 
5637       /* count all features, per bioseq and per entity */
5638 
5639       (bspextra->numfeats)++;
5640       (exindx->featcount)++;
5641 
5642     }
5643   }
5644 }
5645 
5646 
CheckForTransSplice(SeqFeatPtr sfp,BoolPtr bad_orderP,BoolPtr mixed_strandP,Boolean circular)5647 static void CheckForTransSplice (
5648   SeqFeatPtr sfp,
5649   BoolPtr bad_orderP,
5650   BoolPtr mixed_strandP,
5651   Boolean circular
5652 )
5653 
5654 {
5655   Boolean    mixed_strand = FALSE, ordered = TRUE;
5656   SeqIdPtr   id1, id2;
5657   SeqLocPtr  prev, tmp;
5658   SeqIntPtr  sip1, sip2, prevsip;
5659   Uint1      strand1, strand2;
5660 
5661   if (sfp == NULL || sfp->location == NULL) return;
5662 
5663   tmp = NULL;
5664   prev = NULL;
5665   sip1 = NULL;
5666   id1 = NULL;
5667   prevsip = NULL;
5668   strand1 = Seq_strand_other;
5669 
5670   while ((tmp = SeqLocFindNext (sfp->location, tmp)) != NULL) {
5671 
5672     /* just check seqloc_interval */
5673 
5674     if (tmp->choice == SEQLOC_INT) {
5675       sip1 = prevsip;
5676       sip2 = (SeqIntPtr) (tmp->data.ptrvalue);
5677       strand2 = sip2->strand;
5678       id2 = sip2->id;
5679       if ((sip1 != NULL) && (ordered) && (! circular)) {
5680         if (SeqIdForSameBioseq (sip1->id, sip2->id)) {
5681           if (strand2 == Seq_strand_minus) {
5682             if (sip1->to < sip2->to) {
5683               ordered = FALSE;
5684             }
5685           } else {
5686             if (sip1->to > sip2->to) {
5687               ordered = FALSE;
5688             }
5689           }
5690         }
5691       }
5692       prevsip = sip2;
5693       if ((strand1 != Seq_strand_other) && (strand2 != Seq_strand_other)) {
5694         if (SeqIdForSameBioseq (id1, id2)) {
5695           if (strand1 != strand2) {
5696             if (strand1 == Seq_strand_plus && strand2 == Seq_strand_unknown) {
5697               /* unmarked_strand = TRUE; */
5698             } else if (strand1 == Seq_strand_unknown && strand2 == Seq_strand_plus) {
5699               /* unmarked_strand = TRUE; */
5700             } else {
5701               mixed_strand = TRUE;
5702             }
5703           }
5704         }
5705       }
5706 
5707       strand1 = strand2;
5708       id1 = id2;
5709     }
5710   }
5711 
5712   /* Publication intervals ordering does not matter */
5713 
5714   if (sfp->idx.subtype == FEATDEF_PUB) {
5715     ordered = TRUE;
5716   }
5717 
5718   /* ignore ordering of heterogen bonds */
5719 
5720   if (sfp->data.choice == SEQFEAT_HET) {
5721     ordered = TRUE;
5722   }
5723 
5724   /* misc_recomb intervals SHOULD be in reverse order */
5725 
5726   if (sfp->idx.subtype == FEATDEF_misc_recomb) {
5727     ordered = TRUE;
5728   }
5729 
5730     /* primer_bind intervals MAY be in on opposite strands */
5731 
5732   if (sfp->idx.subtype == FEATDEF_primer_bind) {
5733     mixed_strand = FALSE;
5734     ordered = TRUE;
5735   }
5736 
5737   if (! ordered) {
5738     *bad_orderP = TRUE;
5739   }
5740   if (mixed_strand) {
5741     *mixed_strandP = TRUE;
5742   }
5743 }
5744 
5745 
RecordFeatureOnBioseq(GatherObjectPtr gop,BioseqPtr bsp,SeqFeatPtr sfp,ExtraIndexPtr exindx,Boolean usingLocalBsp,Boolean special_case,Boolean small_gen_set,Boolean ts_image)5746 static Boolean RecordFeatureOnBioseq (
5747   GatherObjectPtr gop,
5748   BioseqPtr bsp,
5749   SeqFeatPtr sfp,
5750   ExtraIndexPtr exindx,
5751   Boolean usingLocalBsp,
5752   Boolean special_case,
5753   Boolean small_gen_set,
5754   Boolean ts_image
5755 )
5756 
5757 {
5758   Boolean         bad_order;
5759   BioseqExtraPtr  bspextra;
5760   Char            buf [128];
5761   Int2            count;
5762   CharPtr         ctmp;
5763   Int4            diff;
5764   Int4            left;
5765   CharPtr         loclbl;
5766   Boolean         mixed_strand;
5767   ObjMgrDataPtr   omdp;
5768   ProtRefPtr      prp;
5769   Int4            right;
5770   SeqAnnotPtr     sap;
5771   SeqLocPtr       slp;
5772   Int4            swap;
5773   SeqFeatPtr      tmp;
5774   ValNode         vn;
5775 
5776   if (gop == NULL || bsp == NULL || sfp == NULL || exindx == NULL) return FALSE;
5777 
5778   omdp = SeqMgrGetOmdpForBioseq (bsp);
5779   if (omdp == NULL) return TRUE;
5780 
5781   /* now prepare for adding feature to index */
5782 
5783   bspextra = (BioseqExtraPtr) omdp->extradata;
5784   if (bspextra == NULL) {
5785     CreateBioseqExtraBlock (omdp, bsp);
5786     bspextra = (BioseqExtraPtr) omdp->extradata;
5787   }
5788   if (bspextra == NULL) return TRUE;
5789 
5790   /* get extreme left and right extents of feature location on near bioseqs */
5791   /* merge here to get correct extremes even in case of trans-splicing */
5792 
5793   /* but this broke the handling of genes spanning the origin, so cannot do */
5794   /*
5795   slp = SeqLocMergeEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE);
5796   */
5797   if (special_case) {
5798     slp = sfp->product;
5799   } else {
5800     slp = sfp->location;
5801   }
5802 
5803   bad_order = FALSE;
5804   mixed_strand = FALSE;
5805   CheckForTransSplice (sfp, &bad_order, &mixed_strand, /* (Boolean) (bsp->topology == TOPOLOGY_CIRCULAR) */ FALSE);
5806 
5807 #if 1
5808   GetLeftAndRightOffsetsInNearBioseq (slp, bsp, &left, &right, small_gen_set, bad_order, mixed_strand);
5809 #else
5810   left = GetOffsetInNearBioseq (slp, bsp, SEQLOC_LEFT_END);
5811   right = GetOffsetInNearBioseq (slp, bsp, SEQLOC_RIGHT_END);
5812 #endif
5813   /*
5814   SeqLocFree (slp);
5815   */
5816   if (left == -1 || right == -1) {
5817     GatherContext     gc;
5818     GatherContextPtr  gcp;
5819     Char              lastbspid [128];
5820     SeqIdPtr          sip;
5821     MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
5822     gcp = &gc;
5823     gc.entityID = gop->entityID;
5824     gc.itemID = gop->itemID;
5825     gc.thistype = gop->itemtype;
5826     lastbspid [0] = '\0';
5827     if (exindx->lastbsp != NULL) {
5828       sip = SeqIdFindBest (exindx->lastbsp->id, 0);
5829       if (sip != NULL) {
5830         SeqIdWrite (sip, lastbspid, PRINTID_FASTA_LONG, sizeof (lastbspid));
5831       }
5832     }
5833     FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
5834     ctmp = SeqLocPrint (sfp->location);
5835     loclbl = ctmp;
5836     if (loclbl == NULL) {
5837       loclbl = "?";
5838     }
5839     ErrPostItem (SEV_WARNING, 0, 0,
5840                  "SeqMgr indexing feature mapping problem - Feature: %s - Location [%s] - Record [%s]",
5841                  buf, loclbl, lastbspid);
5842     MemFree (ctmp);
5843     return TRUE;
5844   }
5845 
5846   /* if indexing protein bioseq, store largest protein feature */
5847 
5848   if (sfp->data.choice == SEQFEAT_PROT) {
5849     prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5850 
5851     vn.choice = SEQLOC_WHOLE;
5852     vn.data.ptrvalue = (Pointer) bsp->id;
5853     vn.next = NULL;
5854     slp = (Pointer) &vn;
5855 
5856     /* get SeqId in bioseq that matches SeqId used for location */
5857 
5858     vn.data.ptrvalue = (Pointer) SeqIdWithinBioseq (bsp, sfp->location);
5859 
5860     diff = SeqLocAinB (sfp->location, slp);
5861     if (diff >= 0 && prp != NULL) {
5862       if (diff < bspextra->min) {
5863         bspextra->min = diff;
5864         bspextra->processed = prp->processed;
5865         /* if (omdp->tempload == TL_NOT_TEMP) { */
5866           bspextra->protFeat = sfp;
5867         /* } */
5868       } else if (diff == bspextra->min) {
5869         /* unprocessed 0 preferred over preprotein 1 preferred over mat peptide 2 */
5870         if ( /* prp != NULL && prp->processed == 0 */ prp->processed < bspextra->processed ) {
5871           bspextra->min = diff;
5872           bspextra->processed = prp->processed;
5873           bspextra->protFeat = sfp;
5874         }
5875       }
5876     }
5877   }
5878 
5879   /* add feature item to linked list of blocks */
5880 
5881   if (bspextra->featlisthead == NULL) {
5882     bspextra->featlisthead = (SMFeatBlockPtr) MemNew (sizeof (SMFeatBlock));
5883 
5884     /* for first feature indexed on this bioseq, quickly see if few or many
5885        additional features, since most features on a bioseq are packaged in
5886        the same list, and most proteins only have one bioseq */
5887 
5888     for (tmp = sfp, count = 0;
5889          tmp != NULL && count < 50;
5890          tmp = tmp->next, count++) continue;
5891 
5892     /* extend count if above features were packaged on a bioseq set (presumably CDS or mRNA) */
5893 
5894     if (exindx->lastbssp != NULL) {
5895       for (sap = bsp->annot; sap != NULL; sap = sap->next) {
5896         if (sap->type == 1) {
5897 
5898           for (tmp = (SeqFeatPtr) sap->data;
5899                tmp != NULL && count < 50;
5900                tmp = tmp->next, count++) continue;
5901 
5902         }
5903       }
5904     }
5905 
5906     bspextra->blocksize = count;
5907   }
5908   if (bspextra->featlisttail == NULL) {
5909     bspextra->featlisttail = bspextra->featlisthead;
5910   }
5911 
5912   if (bspextra->featlisttail != NULL) {
5913 
5914     /* if feature spans origin, record with left < 0 */
5915 
5916     if (left > right && bsp->topology == TOPOLOGY_CIRCULAR) {
5917       left -= bsp->length;
5918     }
5919 
5920     /* some trans-spliced locations can confound GetOffsetInNearBioseq, so normalize here */
5921 
5922     if (left > right) {
5923       swap = left;
5924       left = right;
5925       right = swap;
5926     }
5927 
5928     RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left,
5929                       right, gop->itemID, gop->subtype, gop->external, usingLocalBsp,
5930                       bad_order, mixed_strand, special_case, ts_image);
5931 
5932     /* record gene, publication, and biosource features twice if spanning the origin */
5933 
5934     if (left < 0 && bsp->topology == TOPOLOGY_CIRCULAR) {
5935       if (sfp->data.choice == SEQFEAT_GENE ||
5936           sfp->data.choice == SEQFEAT_PUB ||
5937           sfp->data.choice == SEQFEAT_BIOSRC ||
5938           sfp->idx.subtype == FEATDEF_operon) {
5939 
5940         RecordOneFeature (bspextra, omdp, bsp, exindx, sfp, left + bsp->length,
5941                           right + bsp->length, gop->itemID, gop->subtype, gop->external, usingLocalBsp,
5942                           bad_order, mixed_strand, TRUE, ts_image);
5943 
5944       }
5945     }
5946   }
5947 
5948   return TRUE;
5949 }
5950 
5951 typedef struct adpbspdata {
5952   AnnotDescPtr  adp;
5953   BioseqPtr     bsp;
5954 } AdpBspData, PNTR AdpBspPtr;
5955 
5956 /* callback for recording features and descriptor, prot, and cdsOrRna information */
5957 
RecordFeaturesInBioseqs(GatherObjectPtr gop)5958 static Boolean RecordFeaturesInBioseqs (GatherObjectPtr gop)
5959 
5960 {
5961   AdpBspPtr       abp;
5962   AnnotDescPtr    adp = NULL;
5963   BioseqPtr       bsp = NULL;
5964   BioseqExtraPtr  bspextra;
5965   BioseqSetPtr    bssp = NULL;
5966   Char            buf [81];
5967   CharPtr         ctmp;
5968   ExtraIndexPtr   exindx;
5969   ValNodePtr      head = NULL;
5970   BioseqPtr       lbsp;
5971   CharPtr         loclbl;
5972   ObjMgrDataPtr   omdp;
5973   SeqAnnotPtr     sap = NULL;
5974   ValNodePtr      sdp = NULL;
5975   SeqFeatPtr      sfp = NULL;
5976   SeqAlignPtr     sal = NULL;
5977   SeqIdPtr        sip;
5978   SeqLocPtr       slp;
5979   Boolean         small_gen_set = FALSE;
5980   Boolean         special_case = FALSE;
5981   ValNodePtr      tail = NULL;
5982   Boolean         usingLocalBsp = FALSE;
5983   ValNodePtr      vnp;
5984 
5985   switch (gop->itemtype) {
5986     case OBJ_BIOSEQ :
5987       bsp = (BioseqPtr) gop->dataptr;
5988       if (bsp == NULL) return TRUE;
5989       sdp = bsp->descr;
5990       break;
5991     case OBJ_BIOSEQSET :
5992       bssp = (BioseqSetPtr) gop->dataptr;
5993       if (bssp == NULL) return TRUE;
5994       sdp = bssp->descr;
5995       break;
5996     case OBJ_SEQANNOT :
5997       sap = (SeqAnnotPtr) gop->dataptr;
5998       break;
5999     case OBJ_ANNOTDESC :
6000       adp = (AnnotDescPtr) gop->dataptr;
6001       break;
6002     case OBJ_SEQFEAT :
6003       sfp = (SeqFeatPtr) gop->dataptr;
6004       break;
6005     case OBJ_SEQALIGN :
6006       sal = (SeqAlignPtr) gop->dataptr;
6007       break;
6008     default :
6009       return TRUE;
6010   }
6011 
6012   exindx = (ExtraIndexPtr) gop->userdata;
6013   if (exindx == NULL) return FALSE;
6014 
6015   /* save bspItemID to support bioseq explore functions */
6016 
6017   if (bsp != NULL) {
6018 
6019     (exindx->bspcount)++;
6020 
6021     /* save last BioseqPtr to check first for appropriate bioseq */
6022 
6023     exindx->lastbsp = bsp;
6024 
6025     /* blocksize for new block based only on features packaged on bioseq */
6026 
6027     exindx->lastbssp = NULL;
6028 
6029     omdp = SeqMgrGetOmdpForBioseq (bsp);
6030     if (omdp != NULL) {
6031       bspextra = (BioseqExtraPtr) omdp->extradata;
6032       if (bspextra == NULL) {
6033         CreateBioseqExtraBlock (omdp, bsp);
6034         bspextra = (BioseqExtraPtr) omdp->extradata;
6035       }
6036       if (bspextra != NULL) {
6037         bspextra->bspItemID = gop->itemID;
6038         bspextra->bspIndex = exindx->bspcount;
6039       }
6040     }
6041   }
6042 
6043   /* save last BioseqSetPtr to calculate blocksize from bioseq set and bioseq features,
6044      features on bioseq set presumably being CDS or mRNA and applying only to nucleotides */
6045 
6046   if (bssp != NULL) {
6047     exindx->lastbssp = bssp;
6048   }
6049 
6050   /* count bioseq or bioseq set descriptors, to calculate lastDescrItemID */
6051 
6052   if (sdp != NULL) {
6053     SetDescriptorCounts (sdp, exindx, gop->dataptr, gop->itemtype);
6054     return TRUE;
6055   }
6056 
6057   /* save SeqAnnotPtr containing next features to be gathered */
6058 
6059   if (sap != NULL) {
6060     exindx->lastsap = sap;
6061     return TRUE;
6062   }
6063 
6064   /* record SeqAlignPtr in val node list - expects all itemIDs to be presented */
6065 
6066   if (sal != NULL) {
6067     vnp = ValNodeAddPointer (&(exindx->lastalign), 0, (Pointer) sal);
6068     if (exindx->alignhead == NULL) {
6069       exindx->alignhead = exindx->lastalign;
6070     }
6071     exindx->lastalign = vnp;
6072     (exindx->aligncount)++;
6073     return TRUE;
6074   }
6075 
6076   /* record AnnotDescPtr and relevant BioseqPtr in val node list */
6077 
6078   if (adp != NULL) {
6079     abp = (AdpBspPtr) MemNew (sizeof (AdpBspData));
6080     if (abp != NULL) {
6081       abp->adp = adp;
6082       sap = exindx->lastsap;
6083       if (sap != NULL && sap->type == 1) {
6084         bsp = NULL;
6085         sfp = (SeqFeatPtr) sap->data;
6086         /* if empty Seq-annot with Seq-annot.descr, use last Bioseq */
6087         if (sfp == NULL) {
6088           bsp = exindx->lastbsp;
6089         }
6090         while (sfp != NULL && bsp == NULL) {
6091           slp = sfp->location;
6092           if (slp != NULL) {
6093             bsp = BioseqFindFromSeqLoc (slp);
6094             if (bsp == NULL && gop->external) {
6095               bsp = exindx->lastbsp;
6096             }
6097           }
6098           sfp = sfp->next;
6099         }
6100         abp->bsp = bsp;
6101       }
6102       vnp = ValNodeAddPointer (&(exindx->lastadp), 0, (Pointer) abp);
6103       if (exindx->adphead == NULL) {
6104         exindx->adphead = exindx->lastadp;
6105       }
6106       exindx->lastadp = vnp;
6107       (exindx->adpcount)++;
6108     }
6109     return TRUE;
6110   }
6111 
6112   /* otherwise index features on every bioseq in entity */
6113 
6114   if (sfp == NULL) return TRUE;
6115 
6116   /* cds or rna reference stored in product bioseq's omdp.cdsOrRnaFeat,
6117      best protein feature in omdp.protFeat (do before adding CDS) */
6118 
6119   if (sfp->product != NULL) {
6120     ProcessFeatureProducts (sfp, gop->itemID, gop);
6121   }
6122 
6123   bsp = FindAppropriateBioseq (sfp->location, exindx->lastbsp, &small_gen_set);
6124 
6125   /* failure here can be due to SeqLoc that references far accession */
6126 
6127   if (bsp == NULL) {
6128 
6129     /* if far accession, find first local bioseq on any location interval */
6130 
6131     bsp = FindFirstLocalBioseq (sfp->location);
6132 
6133     /* report whether far accession was able to be handled */
6134 
6135     FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
6136     ctmp = SeqLocPrint (sfp->location);
6137     loclbl = ctmp;
6138     if (loclbl == NULL) {
6139       loclbl = "?";
6140     }
6141 
6142     if (bsp == NULL) {
6143       {
6144         GatherContext     gc;
6145         GatherContextPtr  gcp;
6146         Char              lastbspid [41];
6147         SeqIdPtr          sip;
6148         MemSet ((Pointer) &gc, 0, sizeof (GatherContext));
6149         gcp = &gc;
6150         gc.entityID = gop->entityID;
6151         gc.itemID = gop->itemID;
6152         gc.thistype = gop->itemtype;
6153         lastbspid [0] = '\0';
6154         if (exindx->lastbsp != NULL) {
6155           sip = SeqIdFindBest (exindx->lastbsp->id, 0);
6156           if (sip != NULL) {
6157             SeqIdWrite (sip, lastbspid, PRINTID_FASTA_LONG, sizeof (lastbspid));
6158           }
6159         }
6160         ErrPostItem (SEV_WARNING, 0, 0,
6161                      "SeqMgr indexing feature location problem - Feature: %s - Location [%s] - Record [%s]",
6162                      buf, loclbl, lastbspid);
6163       }
6164     } else {
6165       /*
6166       ErrPostItem (SEV_INFO, 0, 0,
6167                    "SeqMgr indexing detected and handled far accession - Feature: %s - Location [%s]",
6168                    buf, loclbl);
6169       */
6170     }
6171     MemFree (ctmp);
6172 
6173     if (bsp == NULL && sfp->product != NULL &&
6174         sfp->data.choice == SEQFEAT_CDREGION &&
6175         IS_Bioseq (exindx->topsep)) {
6176       bsp = (BioseqPtr) exindx->topsep->data.ptrvalue;
6177       if (bsp == NULL || (! ISA_aa (bsp->mol))) return TRUE;
6178       special_case = TRUE;
6179       bsp = FindAppropriateBioseq (sfp->product, exindx->lastbsp, &small_gen_set);
6180       if (bsp == NULL) return TRUE;
6181     } else {
6182       if (bsp == NULL) return TRUE;
6183       usingLocalBsp = TRUE;
6184     }
6185   }
6186 
6187   /* assume subsequent features will be on this bioseq */
6188 
6189   exindx->lastbsp = bsp;
6190 
6191   RecordFeatureOnBioseq (gop, bsp, sfp, exindx, usingLocalBsp, special_case, small_gen_set, FALSE);
6192 
6193   /* for small genome set, index mixed-chromosome features on other chromosomes as misc_features for visibility */
6194 
6195   if (sfp->data.choice != SEQFEAT_GENE) return TRUE;
6196 
6197   if (small_gen_set) {
6198     slp = SeqLocFindNext (sfp->location, NULL);
6199     while (slp != NULL) {
6200       sip = SeqLocId (slp);
6201       if (sip != NULL) {
6202         lbsp = BioseqFindCore (sip);
6203         if (lbsp != NULL) {
6204           if (lbsp != bsp) {
6205             ValNodeAddPointerEx (&head, &tail, 0, (Pointer) lbsp);
6206           }
6207         }
6208       }
6209       slp = SeqLocFindNext (sfp->location, slp);
6210     }
6211     if (head != NULL) {
6212       head = ValNodeSort (head, SortByPtrvalue);
6213       head = UniquePtrValNode (head);
6214 
6215       for (vnp = head; vnp != NULL; vnp = vnp->next) {
6216         bsp = (BioseqPtr) vnp->data.ptrvalue;
6217         if (bsp == NULL) continue;
6218 
6219         /*
6220         !!! need to add flag so that these features are only fetched by flatfile generator
6221         and with a distinct flag so that they show up as something like misc_feature instead
6222         of CDS !!!
6223         */
6224 
6225         exindx->lastbsp = bsp;
6226         RecordFeatureOnBioseq (gop, bsp, sfp, exindx, usingLocalBsp, special_case, small_gen_set, TRUE);
6227       }
6228 
6229       ValNodeFree (head);
6230     }
6231   }
6232 
6233   return TRUE;
6234 }
6235 
6236 /*****************************************************************************
6237 *
6238 *   RecordSegmentsInBioseqs callback explores bioseq segments
6239 *
6240 *****************************************************************************/
6241 
RecordSegmentsInBioseqs(GatherObjectPtr gop)6242 static Boolean RecordSegmentsInBioseqs (GatherObjectPtr gop)
6243 
6244 {
6245   BioseqPtr       bsp = NULL;
6246   BioseqExtraPtr  bspextra;
6247   Char            buf [128];
6248   Dbtag           db;
6249   DeltaSeqPtr     dsp;
6250   ExtraIndexPtr   exindx;
6251   Int4            from;
6252   Boolean         isSeg = FALSE;
6253   ObjectId        oi;
6254   ObjMgrDataPtr   omdp;
6255   SMSeqIdxPtr     segpartptr;
6256   SeqId           si;
6257   SeqIdPtr        sid;
6258   SeqInt          sint;
6259   SeqIntPtr       sipp;
6260   SeqLoc          sl;
6261   SeqLitPtr       slitp;
6262   SeqLocPtr       slp = NULL;
6263   Uint1           strand;
6264   Int4            to;
6265 
6266   exindx = (ExtraIndexPtr) gop->userdata;
6267   if (exindx == NULL) return FALSE;
6268 
6269   switch (gop->itemtype) {
6270     case OBJ_BIOSEQ :
6271       bsp = (BioseqPtr) gop->dataptr;
6272       if (bsp == NULL) return TRUE;
6273       break;
6274     case OBJ_BIOSEQ_SEG :
6275       isSeg = TRUE;
6276       slp = (SeqLocPtr) gop->dataptr;
6277       if (slp == NULL) return TRUE;
6278       break;
6279     case OBJ_BIOSEQ_DELTA :
6280       dsp = (DeltaSeqPtr) gop->dataptr;
6281       if (dsp == NULL) return TRUE;
6282       if (dsp->choice == 1) {
6283         slp = (SeqLocPtr) dsp->data.ptrvalue;
6284       } else if (dsp->choice == 2) {
6285         slitp = (SeqLitPtr) dsp->data.ptrvalue;
6286         if (slitp != NULL) {
6287           /* fake seqloc, same as in DeltaSeqsToSeqLocs */
6288           MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
6289           MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
6290           MemSet ((Pointer) &si, 0, sizeof (SeqId));
6291           MemSet ((Pointer) &db, 0, sizeof (Dbtag));
6292           MemSet ((Pointer) &oi, 0, sizeof (ObjectId));
6293           sl.choice = SEQLOC_INT;
6294           sl.data.ptrvalue = (Pointer) &sint;
6295           sint.from = 0;
6296           sint.to = slitp->length - 1;
6297           si.choice = SEQID_GENERAL;
6298           si.data.ptrvalue = (Pointer) &db;
6299           db.db = "SeqLit";
6300           db.tag = &oi;
6301           (exindx->seqlitid)++;
6302           oi.id = exindx->seqlitid;
6303           sint.id = &si;
6304           slp = &sl;
6305         }
6306       }
6307       break;
6308     default :
6309       return TRUE;
6310   }
6311 
6312   if (bsp != NULL) {
6313     if (bsp->repr == Seq_repr_seg) {
6314       exindx->lastbsp = bsp;
6315     } else if (bsp->repr == Seq_repr_delta) {
6316       exindx->lastbsp = bsp;
6317     } else if (bsp->repr == Seq_repr_ref) {
6318       exindx->lastbsp = bsp;
6319     } else {
6320       exindx->lastbsp = NULL;
6321     }
6322     exindx->cumulative = 0;
6323     return TRUE;
6324   }
6325 
6326   if (slp == NULL) return TRUE;
6327 
6328   bsp = exindx->lastbsp;
6329   if (bsp == NULL) return TRUE;
6330 
6331   omdp = SeqMgrGetOmdpForBioseq (bsp);
6332   if (omdp == NULL) return TRUE;
6333 
6334   bspextra = (BioseqExtraPtr) omdp->extradata;
6335   if (bspextra == NULL) {
6336     CreateBioseqExtraBlock (omdp, bsp);
6337     bspextra = (BioseqExtraPtr) omdp->extradata;
6338   }
6339   if (bspextra == NULL) return TRUE;
6340 
6341   if (slp->choice == SEQLOC_INT && slp->data.ptrvalue != NULL) {
6342     sipp = (SeqIntPtr) (slp->data.ptrvalue);
6343     from = sipp->from;
6344     to = sipp->to;
6345     strand = sipp->strand;
6346   } else {
6347     from = 0;
6348     to = SeqLocLen (slp) - 1;
6349     strand = SeqLocStrand (slp);
6350   }
6351 
6352   if (to - from + 1 < 1) return TRUE;
6353 
6354   /* create and fill in SMSeqIdx element */
6355 
6356   segpartptr = MemNew (sizeof (SMSeqIdx));
6357   if (segpartptr != NULL) {
6358     sid = SeqLocId (slp);
6359     if (MakeReversedSeqIdString (sid, buf, sizeof (buf) - 1)) {
6360       segpartptr->slp = AsnIoMemCopy (slp,
6361                                       (AsnReadFunc) SeqLocAsnRead,
6362                                       (AsnWriteFunc) SeqLocAsnWrite);
6363       segpartptr->seqIdOfPart = StringSave (buf);
6364       if (isSeg) {
6365 
6366         /* only annotate parentBioseq for segmented, not delta bioseq */
6367 
6368         segpartptr->parentBioseq = bsp;
6369       } else {
6370         segpartptr->parentBioseq = NULL;
6371       }
6372       segpartptr->cumOffset = exindx->cumulative;
6373       segpartptr->from = from;
6374       segpartptr->to = to;
6375       segpartptr->strand = strand;
6376       segpartptr->itemID = gop->itemID;
6377     }
6378   }
6379 
6380   exindx->cumulative += (to - from + 1);
6381 
6382   /* link into segparthead list of parts IDs */
6383 
6384   if (bspextra->segparthead == NULL) {
6385     bspextra->segparthead = segpartptr;
6386     exindx->segpartail = segpartptr;
6387   } else if (exindx->segpartail != NULL) {
6388     exindx->segpartail->next = segpartptr;
6389     exindx->segpartail = segpartptr;
6390   }
6391 
6392   return TRUE;
6393 }
6394 
6395 /*****************************************************************************
6396 *
6397 *   SortFeatItemListByID callback sorts array into feature item table by itemID
6398 *   SortFeatItemListBySfp sorts by feature pointer
6399 *   SortFeatItemListByPos sorts by feature position
6400 *   SortFeatItemListByRev sorts by reverse feature position
6401 *
6402 *****************************************************************************/
6403 
SortFeatItemListByID(VoidPtr vp1,VoidPtr vp2)6404 static int LIBCALLBACK SortFeatItemListByID (VoidPtr vp1, VoidPtr vp2)
6405 
6406 {
6407   SMFeatItemPtr PNTR  spp1 = vp1;
6408   SMFeatItemPtr PNTR  spp2 = vp2;
6409   SMFeatItemPtr       sp1;
6410   SMFeatItemPtr       sp2;
6411 
6412   if (spp1 == NULL || spp2 == NULL) return 0;
6413   sp1 = *((SMFeatItemPtr PNTR) spp1);
6414   sp2 = *((SMFeatItemPtr PNTR) spp2);
6415   if (sp1 == NULL || sp2 == NULL) return 0;
6416 
6417   /* sort by feature itemID */
6418 
6419   if (sp1->itemID > sp2->itemID) {
6420     return 1;
6421   } else if (sp1->itemID < sp2->itemID) {
6422     return -1;
6423 
6424   /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
6425 
6426   } else if (sp1->ignore) {
6427     return 1;
6428   } else if (sp2->ignore) {
6429     return -1;
6430   }
6431 
6432   return 0;
6433 }
6434 
SortFeatItemListBySfp(VoidPtr vp1,VoidPtr vp2)6435 static int LIBCALLBACK SortFeatItemListBySfp (VoidPtr vp1, VoidPtr vp2)
6436 
6437 {
6438   SMFeatItemPtr PNTR  spp1 = vp1;
6439   SMFeatItemPtr PNTR  spp2 = vp2;
6440   SMFeatItemPtr       sp1;
6441   SMFeatItemPtr       sp2;
6442 
6443   if (spp1 == NULL || spp2 == NULL) return 0;
6444   sp1 = *((SMFeatItemPtr PNTR) spp1);
6445   sp2 = *((SMFeatItemPtr PNTR) spp2);
6446   if (sp1 == NULL || sp2 == NULL) return 0;
6447 
6448   /* sort by SeqFeatPtr value */
6449 
6450   if (sp1->sfp > sp2->sfp) {
6451     return 1;
6452   } else if (sp1->sfp < sp2->sfp) {
6453     return -1;
6454 
6455   /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
6456 
6457   } else if (sp1->ignore) {
6458     return 1;
6459   } else if (sp2->ignore) {
6460     return -1;
6461   }
6462 
6463   return 0;
6464 }
6465 
SortFeatItemListByLabel(VoidPtr vp1,VoidPtr vp2)6466 static int LIBCALLBACK SortFeatItemListByLabel (VoidPtr vp1, VoidPtr vp2)
6467 
6468 {
6469   int                 compare;
6470   SMFeatItemPtr PNTR  spp1 = vp1;
6471   SMFeatItemPtr PNTR  spp2 = vp2;
6472   SMFeatItemPtr       sp1;
6473   SMFeatItemPtr       sp2;
6474 
6475   if (spp1 == NULL || spp2 == NULL) return 0;
6476   sp1 = *((SMFeatItemPtr PNTR) spp1);
6477   sp2 = *((SMFeatItemPtr PNTR) spp2);
6478   if (sp1 == NULL || sp2 == NULL) return 0;
6479 
6480   /* sort by label value */
6481 
6482   compare = StringICmp (sp1->label, sp2->label);
6483   if (compare > 0) {
6484     return 1;
6485   } else if (compare < 0) {
6486     return -1;
6487   }
6488 
6489   /* If they're case-insensitive the same, but case-sensitive different,
6490      then fall back to sort by case-sensitive
6491      (e.g. AJ344068.1 has genes korA and KorA ) */
6492   compare = StringCmp (sp1->label, sp2->label);
6493   if( compare > 0 ) {
6494     return 1;
6495   } else if( compare < 0 ) {
6496     return -1;
6497   }
6498 
6499   /* for duplicated genes, etc., that cross origin, put ignored item last for binary search */
6500 
6501   if (sp1->ignore) {
6502     return 1;
6503   } else if (sp2->ignore) {
6504     return -1;
6505   }
6506 
6507   return 0;
6508 }
6509 
SortFeatItemListByLocusTag(VoidPtr vp1,VoidPtr vp2)6510 static int LIBCALLBACK SortFeatItemListByLocusTag (VoidPtr vp1, VoidPtr vp2)
6511 
6512 {
6513   int                 compare;
6514   GeneRefPtr          grp1;
6515   GeneRefPtr          grp2;
6516   SeqFeatPtr          sfp1;
6517   SeqFeatPtr          sfp2;
6518   SMFeatItemPtr PNTR  spp1 = vp1;
6519   SMFeatItemPtr PNTR  spp2 = vp2;
6520   SMFeatItemPtr       sp1;
6521   SMFeatItemPtr       sp2;
6522 
6523   if (spp1 == NULL || spp2 == NULL) return 0;
6524   sp1 = *((SMFeatItemPtr PNTR) spp1);
6525   sp2 = *((SMFeatItemPtr PNTR) spp2);
6526   if (sp1 == NULL || sp2 == NULL) return 0;
6527 
6528   sfp1 = sp1->sfp;
6529   sfp2 = sp2->sfp;
6530   if (sfp1 == NULL || sfp2 == NULL) return 0;
6531 
6532   if (sfp1->data.choice != SEQFEAT_GENE || sfp2->data.choice != SEQFEAT_GENE) return 0;
6533   grp1 = (GeneRefPtr) sfp1->data.value.ptrvalue;
6534   grp2 = (GeneRefPtr) sfp2->data.value.ptrvalue;
6535   if (grp1 == NULL || grp2 == NULL) return 0;
6536 
6537   /* sort by locus_tag */
6538 
6539   compare = StringICmp (grp1->locus_tag, grp2->locus_tag);
6540   if (compare > 0) {
6541     return 1;
6542   } else if (compare < 0) {
6543     return -1;
6544   }
6545 
6546   /* sort by locus if locus_tag is identical */
6547 
6548   compare = StringICmp (grp1->locus, grp2->locus);
6549   if (compare > 0) {
6550     return 1;
6551   } else if (compare < 0) {
6552     return -1;
6553   }
6554 
6555   /* for duplicated genes that cross origin, put ignored item last for binary search */
6556 
6557   if (sp1->ignore) {
6558     return 1;
6559   } else if (sp2->ignore) {
6560     return -1;
6561   }
6562 
6563   return 0;
6564 }
6565 
SortFeatItemListByPos(VoidPtr vp1,VoidPtr vp2)6566 static int LIBCALLBACK SortFeatItemListByPos (VoidPtr vp1, VoidPtr vp2)
6567 
6568 {
6569   Int2                compare;
6570   CdRegionPtr         crp1;
6571   CdRegionPtr         crp2;
6572   Int2                i;
6573   Char                id1 [128];
6574   Char                id2 [128];
6575   Int2                j;
6576   Int2                numivals;
6577   SeqAnnotPtr         sap1;
6578   SeqAnnotPtr         sap2;
6579   SeqIdPtr            sip1;
6580   SeqIdPtr            sip2;
6581   SMFeatItemPtr PNTR  spp1 = vp1;
6582   SMFeatItemPtr PNTR  spp2 = vp2;
6583   SMFeatItemPtr       sp1;
6584   SMFeatItemPtr       sp2;
6585   SeqFeatPtr          sfp1;
6586   SeqFeatPtr          sfp2;
6587   SeqLocPtr           slp1;
6588   SeqLocPtr           slp2;
6589   Uint1               subtype1;
6590   Uint1               subtype2;
6591 
6592   if (spp1 == NULL || spp2 == NULL) return 0;
6593   sp1 = *((SMFeatItemPtr PNTR) spp1);
6594   sp2 = *((SMFeatItemPtr PNTR) spp2);
6595   if (sp1 == NULL || sp2 == NULL) return 0;
6596 
6597   /* feature with smallest left extreme is first */
6598 
6599   if (sp1->left > sp2->left) {
6600     return 1;
6601   } else if (sp1->left < sp2->left) {
6602     return -1;
6603 
6604   /* reversing order so that longest feature is first */
6605 
6606   } else if (sp1->right > sp2->right) {
6607     return -1; /* was 1 */
6608   } else if (sp1->right < sp2->right) {
6609     return 1; /* was -1 */
6610   }
6611 
6612   /* given identical extremes, put operon features first */
6613 
6614   if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) {
6615     return -1;
6616   } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) {
6617     return 1;
6618   }
6619 
6620   /* then gene features */
6621 
6622   if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
6623     return -1;
6624   } else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) {
6625     return 1;
6626   }
6627 
6628   /* then rna features */
6629 
6630   subtype1 = FindFeatFromFeatDefType (sp1->subtype);
6631   subtype2 = FindFeatFromFeatDefType (sp2->subtype);
6632 
6633   if (subtype1 == SEQFEAT_RNA && subtype2 != SEQFEAT_RNA) {
6634     return -1;
6635   } else if (subtype2 == SEQFEAT_RNA && subtype1 != SEQFEAT_RNA) {
6636     return 1;
6637   }
6638 
6639   /* precursor RNA before non-coding RNA */
6640 
6641   if (sp1->subtype == FEATDEF_preRNA && sp2->subtype == FEATDEF_ncRNA) {
6642     return -1;
6643   } else if (sp2->subtype == FEATDEF_preRNA && sp1->subtype == FEATDEF_ncRNA) {
6644     return 1;
6645   }
6646 
6647   /* then cds features */
6648 
6649   if (sp1->subtype == FEATDEF_CDS && sp2->subtype != FEATDEF_CDS) {
6650     return -1;
6651   } else if (sp2->subtype == FEATDEF_CDS && sp1->subtype != FEATDEF_CDS) {
6652     return 1;
6653   }
6654 
6655   /* next compare internal intervals */
6656 
6657   numivals = MIN (sp1->numivals, sp2->numivals);
6658   if (numivals > 0 && sp1->ivals != NULL && sp2->ivals != NULL) {
6659     for (i = 0, j = 0; i < numivals; i++) {
6660 
6661       /* check biological start position */
6662 
6663       if (sp1->ivals [j] > sp2->ivals [j]) {
6664         return 1;
6665       } else if (sp1->ivals [j] < sp2->ivals [j]) {
6666         return -1;
6667       }
6668       j++;
6669 
6670       /* check biological stop position */
6671 
6672       if (sp1->ivals [j] > sp2->ivals [j]) {
6673         return -1; /* was 1 */
6674       } else if (sp1->ivals [j] < sp2->ivals [j]) {
6675         return 1; /* was -1 */
6676       }
6677       j++;
6678     }
6679   }
6680 
6681   /* one with fewer intervals goes first */
6682 
6683   if (sp1->numivals > sp2->numivals) {
6684     return 1;
6685   } else if (sp1->numivals < sp2->numivals) {
6686     return -1;
6687   }
6688 
6689   /* next compare other feature subtypes */
6690 
6691   if (sp1->subtype < sp2->subtype) {
6692     return -1;
6693   } else if (sp1->subtype > sp2->subtype) {
6694     return 1;
6695   }
6696 
6697   /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */
6698 
6699   if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) {
6700     if (sp1->itemID > sp2->itemID) {
6701       return 1;
6702     } else if (sp1->itemID < sp2->itemID) {
6703       return -1;
6704     }
6705   }
6706 
6707   /* if identical cds ranges, compare codon_start */
6708 
6709   if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) {
6710     sfp1 = sp1->sfp;
6711     sfp2 = sp2->sfp;
6712     if (sfp1 != NULL && sfp2 != NULL) {
6713 
6714       crp1 = (CdRegionPtr) sfp1->data.value.ptrvalue;
6715       crp2 = (CdRegionPtr) sfp2->data.value.ptrvalue;
6716       if (crp1 != NULL && crp2 != NULL) {
6717         if (crp1->frame > 1 || crp2->frame > 1) {
6718           if (crp1->frame < crp2->frame) {
6719             return -1;
6720           } else if (crp1->frame < crp2->frame) {
6721             return 1;
6722           }
6723         }
6724       }
6725     }
6726   }
6727 
6728   /* then compare cds or mRNA product identifiers */
6729 
6730   sfp1 = sp1->sfp;
6731   sfp2 = sp2->sfp;
6732   if (sfp1 != NULL && sfp2 != NULL) {
6733     slp1 = (SeqLocPtr) sfp1->product;
6734     slp2 = (SeqLocPtr) sfp2->product;
6735     if (slp1 != NULL && slp2 == NULL) {
6736       return 1;
6737     } else if (slp1 == NULL && slp2 != NULL) {
6738       return -1;
6739     } else if (slp1 != NULL && slp2 != NULL) {
6740       sip1 = SeqLocId (slp1);
6741       sip2 = SeqLocId (slp2);
6742       if (sip1 != NULL && sip2 == NULL) {
6743         return 1;
6744       } else if (sip1 == NULL && sip2 != NULL) {
6745         return -1;
6746       } else if (sip1 != NULL && sip2 != NULL) {
6747         SeqIdWrite (sip1, id1, PRINTID_FASTA_LONG, sizeof (id1) - 1);
6748         SeqIdWrite (sip2, id2, PRINTID_FASTA_LONG, sizeof (id2) - 1);
6749         compare = StringCmp (id1, id2);
6750         if (compare > 0) {
6751           return 1;
6752         } else if (compare < 0) {
6753           return -1;
6754         }
6755       }
6756     }
6757   }
6758 
6759   /* then compare feature label */
6760 
6761   compare = StringCmp (sp1->label, sp2->label);
6762   if (compare > 0) {
6763     return 1;
6764   } else if (compare < 0) {
6765     return -1;
6766   }
6767 
6768   /* compare parent seq-annot by itemID (was sap pointer value) */
6769 
6770   sap1 = sp1->sap;
6771   sap2 = sp2->sap;
6772   if (sap1 != NULL && sap2 != NULL) {
6773     if (sap1->idx.itemID > sap2->idx.itemID) {
6774       return 1;
6775     } else if (sap1->idx.itemID < sap2->idx.itemID) {
6776       return -1;
6777     }
6778   }
6779 
6780   /* last comparison to make it absolutely deterministic */
6781 
6782   if (sp1->itemID > sp2->itemID) {
6783     return 1;
6784   } else if (sp1->itemID < sp2->itemID) {
6785     return -1;
6786   }
6787 
6788   return 0;
6789 }
6790 
SortFeatItemListByRev(VoidPtr vp1,VoidPtr vp2)6791 static int LIBCALLBACK SortFeatItemListByRev (VoidPtr vp1, VoidPtr vp2)
6792 
6793 {
6794   Int2                compare;
6795   CdRegionPtr         crp1;
6796   CdRegionPtr         crp2;
6797   Int2                i;
6798   Int2                j;
6799   Int2                k;
6800   Int2                numivals;
6801   SeqAnnotPtr         sap1;
6802   SeqAnnotPtr         sap2;
6803   SMFeatItemPtr PNTR  spp1 = vp1;
6804   SMFeatItemPtr PNTR  spp2 = vp2;
6805   SMFeatItemPtr       sp1;
6806   SMFeatItemPtr       sp2;
6807   SeqFeatPtr          sfp1;
6808   SeqFeatPtr          sfp2;
6809   Uint1               subtype1;
6810   Uint1               subtype2;
6811 
6812   if (spp1 == NULL || spp2 == NULL) return 0;
6813   sp1 = *((SMFeatItemPtr PNTR) spp1);
6814   sp2 = *((SMFeatItemPtr PNTR) spp2);
6815   if (sp1 == NULL || sp2 == NULL) return 0;
6816 
6817   /* feature with largest right extreme is first */
6818 
6819   if (sp1->right < sp2->right) {
6820     return 1;
6821   } else if (sp1->right > sp2->right) {
6822     return -1;
6823 
6824   /* reversing order so that longest feature is first */
6825 
6826   } else if (sp1->left < sp2->left) {
6827     return -1;
6828   } else if (sp1->left > sp2->left) {
6829     return 1;
6830   }
6831 
6832   /* given identical extremes, put operon features first */
6833 
6834   if (sp1->subtype == FEATDEF_operon && sp2->subtype != FEATDEF_operon) {
6835     return -1;
6836   } else if (sp2->subtype == FEATDEF_operon && sp1->subtype != FEATDEF_operon) {
6837     return 1;
6838   }
6839 
6840   /* then gene features */
6841 
6842   if (sp1->subtype == FEATDEF_GENE && sp2->subtype != FEATDEF_GENE) {
6843     return -1;
6844   } else if (sp2->subtype == FEATDEF_GENE && sp1->subtype != FEATDEF_GENE) {
6845     return 1;
6846   }
6847 
6848   /* then rna features */
6849 
6850   subtype1 = FindFeatFromFeatDefType (sp1->subtype);
6851   subtype2 = FindFeatFromFeatDefType (sp2->subtype);
6852 
6853   if (subtype1 == SEQFEAT_RNA && subtype2 != SEQFEAT_RNA) {
6854     return -1;
6855   } else if (subtype2 == SEQFEAT_RNA && subtype1 != SEQFEAT_RNA) {
6856     return 1;
6857   }
6858 
6859   /* precursor RNA before non-coding RNA */
6860 
6861   if (sp1->subtype == FEATDEF_preRNA && sp2->subtype == FEATDEF_ncRNA) {
6862     return -1;
6863   } else if (sp2->subtype == FEATDEF_preRNA && sp1->subtype == FEATDEF_ncRNA) {
6864     return 1;
6865   }
6866 
6867   /* then cds features */
6868 
6869   if (sp1->subtype == FEATDEF_CDS && sp2->subtype != FEATDEF_CDS) {
6870     return -1;
6871   } else if (sp2->subtype == FEATDEF_CDS && sp1->subtype != FEATDEF_CDS) {
6872     return 1;
6873   }
6874 
6875   /* next compare internal intervals */
6876 
6877   numivals = MIN (sp1->numivals, sp2->numivals);
6878   if (numivals > 0 && sp1->ivals != NULL && sp2->ivals != NULL) {
6879     for (i = 0, j = sp1->numivals * 2, k = sp2->numivals * 2; i < numivals; i++) {
6880 
6881       /* check biological stop position */
6882 
6883       k--;
6884       j--;
6885       if (sp1->ivals [j] < sp2->ivals [k]) {
6886         return 1;
6887       } else if (sp1->ivals [j] > sp2->ivals [k]) {
6888         return -1;
6889       }
6890 
6891       /* check biological start position */
6892 
6893       k--;
6894       j--;
6895       if (sp1->ivals [j] < sp2->ivals [k]) {
6896         return -1;
6897       } else if (sp1->ivals [j] > sp2->ivals [k]) {
6898         return 1;
6899       }
6900     }
6901   }
6902 
6903   /* one with fewer intervals goes first */
6904 
6905   if (sp1->numivals > sp2->numivals) {
6906     return 1;
6907   } else if (sp1->numivals < sp2->numivals) {
6908     return -1;
6909   }
6910 
6911   /* next compare other feature subtypes */
6912 
6913   if (sp1->subtype < sp2->subtype) {
6914     return -1;
6915   } else if (sp1->subtype > sp2->subtype) {
6916     return 1;
6917   }
6918 
6919   /* if identical gap ranges, use itemID to put flatfile-generated gap feature last */
6920 
6921   if (sp1->subtype == FEATDEF_gap && sp2->subtype == FEATDEF_gap) {
6922     if (sp1->itemID > sp2->itemID) {
6923       return 1;
6924     } else if (sp1->itemID < sp2->itemID) {
6925       return -1;
6926     }
6927   }
6928 
6929   /* if identical cds ranges, compare codon_start */
6930 
6931   if (sp1->subtype == FEATDEF_CDS && sp2->subtype == FEATDEF_CDS) {
6932     sfp1 = sp1->sfp;
6933     sfp2 = sp2->sfp;
6934     if (sfp1 != NULL && sfp2 != NULL) {
6935       crp1 = (CdRegionPtr) sfp1->data.value.ptrvalue;
6936       crp2 = (CdRegionPtr) sfp2->data.value.ptrvalue;
6937       if (crp1 != NULL && crp2 != NULL) {
6938         if (crp1->frame > 1 || crp2->frame > 1) {
6939           if (crp1->frame < crp2->frame) {
6940             return -1;
6941           } else if (crp1->frame < crp2->frame) {
6942             return 1;
6943           }
6944         }
6945       }
6946     }
6947   }
6948 
6949   /* then compare feature label */
6950 
6951   compare = StringCmp (sp1->label, sp2->label);
6952   if (compare > 0) {
6953     return 1;
6954   } else if (compare < 0) {
6955     return -1;
6956   }
6957 
6958   /* compare parent seq-annot by itemID (was sap pointer value) */
6959 
6960   sap1 = sp1->sap;
6961   sap2 = sp2->sap;
6962   if (sap1 != NULL && sap2 != NULL) {
6963     if (sap1->idx.itemID > sap2->idx.itemID) {
6964       return 1;
6965     } else if (sap1->idx.itemID < sap2->idx.itemID) {
6966       return -1;
6967     }
6968   }
6969 
6970   /* last comparison to make it absolutely deterministic */
6971 
6972   if (sp1->itemID > sp2->itemID) {
6973     return 1;
6974   } else if (sp1->itemID < sp2->itemID) {
6975     return -1;
6976   }
6977 
6978   return 0;
6979 }
6980 
SortFidListByFeatID(VoidPtr vp1,VoidPtr vp2)6981 static int LIBCALLBACK SortFidListByFeatID (VoidPtr vp1, VoidPtr vp2)
6982 
6983 {
6984   int                compare;
6985   SMFidItemPtr PNTR  spp1 = vp1;
6986   SMFidItemPtr PNTR  spp2 = vp2;
6987   SMFidItemPtr       sp1;
6988   SMFidItemPtr       sp2;
6989 
6990   if (spp1 == NULL || spp2 == NULL) return 0;
6991   sp1 = *((SMFidItemPtr PNTR) spp1);
6992   sp2 = *((SMFidItemPtr PNTR) spp2);
6993   if (sp1 == NULL || sp2 == NULL) return 0;
6994 
6995   /* sort by feature itemID label value */
6996 
6997   compare = StringICmp (sp1->fid, sp2->fid);
6998   if (compare > 0) {
6999     return 1;
7000   } else if (compare < 0) {
7001     return -1;
7002   }
7003 
7004   return 0;
7005 }
7006 
7007 /*****************************************************************************
7008 *
7009 *   IndexSegmentedParts callback builds index to speed up mapping
7010 *     of parts to segmented bioseqs
7011 *
7012 *****************************************************************************/
7013 
SortSeqIdxArray(VoidPtr ptr1,VoidPtr ptr2)7014 static int LIBCALLBACK SortSeqIdxArray (VoidPtr ptr1, VoidPtr ptr2)
7015 
7016 {
7017   Int2              compare;
7018   SMSeqIdxPtr PNTR  partp1 = ptr1;
7019   SMSeqIdxPtr PNTR  partp2 = ptr2;
7020   SMSeqIdxPtr       part1, part2;
7021 
7022   if (partp1 == NULL || partp2 == NULL) return 0;
7023   part1 = *((SMSeqIdxPtr PNTR) partp1);
7024   part2 = *((SMSeqIdxPtr PNTR) partp2);
7025   if (part1 == NULL || part2 == NULL) return 0;
7026   compare = StringCmp (part1->seqIdOfPart, part2->seqIdOfPart);
7027   if (compare > 0) {
7028     return 1;
7029   } else if (compare < 0) {
7030     return -1;
7031   }
7032   if (part1->cumOffset > part2->cumOffset) {
7033     return 1;
7034   } else if (part1->cumOffset < part2->cumOffset) {
7035     return -1;
7036   }
7037   return 0;
7038 }
7039 
WithinPartsSet(BioseqPtr bsp)7040 static Boolean WithinPartsSet (BioseqPtr bsp)
7041 
7042 {
7043   BioseqSetPtr  bssp;
7044 
7045   if (bsp == NULL) return FALSE;
7046 
7047   if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) {
7048     bssp = (BioseqSetPtr) bsp->idx.parentptr;
7049     while (bssp != NULL) {
7050       if (bssp->_class == BioseqseqSet_class_parts) return TRUE;
7051       if (bssp->idx.parenttype != OBJ_BIOSEQSET) return FALSE;
7052       bssp = bssp->idx.parentptr;
7053     }
7054   }
7055 
7056   return FALSE;
7057 }
7058 
IndexSegmentedParts(SeqEntryPtr sep,BioseqPtr PNTR lastsegbsp)7059 static void IndexSegmentedParts (SeqEntryPtr sep, BioseqPtr PNTR lastsegbsp)
7060 
7061 {
7062   BioseqPtr         bsp;
7063   BioseqExtraPtr    bspextra;
7064   BioseqSetPtr      bssp;
7065   Int4              i;
7066   Int4              numsegs = 0;
7067   ObjMgrDataPtr     omdp;
7068   SMSeqIdxPtr PNTR  partsByLoc;
7069   SMSeqIdxPtr PNTR  partsBySeqId;
7070   SMSeqIdxPtr       segpartptr;
7071 
7072   if (sep == NULL) return;
7073   if (IS_Bioseq_set (sep)) {
7074     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7075     if (bssp == NULL) return;
7076     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7077       IndexSegmentedParts (sep, lastsegbsp);
7078     }
7079     if (bssp->_class == BioseqseqSet_class_segset && lastsegbsp != NULL) {
7080       *lastsegbsp = NULL;
7081     }
7082     return;
7083   }
7084 
7085   if (! IS_Bioseq (sep)) return;
7086   bsp = (BioseqPtr) sep->data.ptrvalue;
7087   if (bsp == NULL) return;
7088 
7089   /* check for raw part packaged with segmented bioseq */
7090 
7091   if ((bsp->repr == Seq_repr_raw || /* IsNonGappedLiteral (bsp) */ bsp->repr == Seq_repr_delta) &&
7092       lastsegbsp != NULL && *lastsegbsp != NULL && WithinPartsSet (bsp)) {
7093     omdp = SeqMgrGetOmdpForBioseq (bsp);
7094     if (omdp == NULL) return;
7095 
7096     bspextra = (BioseqExtraPtr) omdp->extradata;
7097     if (bspextra == NULL) {
7098       CreateBioseqExtraBlock (omdp, bsp);
7099       bspextra = (BioseqExtraPtr) omdp->extradata;
7100     }
7101     if (bspextra == NULL) return;
7102 
7103     /* now record segmented parent of raw part if all are packaged together */
7104 
7105     bspextra->parentBioseq = *lastsegbsp;
7106     return;
7107   }
7108 
7109   if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) return;
7110 
7111   omdp = SeqMgrGetOmdpForBioseq (bsp);
7112   if (omdp == NULL) return;
7113 
7114   bspextra = (BioseqExtraPtr) omdp->extradata;
7115   if (bspextra == NULL) {
7116     CreateBioseqExtraBlock (omdp, bsp);
7117     bspextra = (BioseqExtraPtr) omdp->extradata;
7118   }
7119   if (bspextra == NULL) return;
7120 
7121   if (lastsegbsp != NULL && bsp->repr == Seq_repr_seg) {
7122     *lastsegbsp = bsp;
7123   }
7124 
7125   for (segpartptr = bspextra->segparthead;
7126        segpartptr != NULL;
7127        segpartptr = segpartptr->next) {
7128     numsegs++;
7129   }
7130 
7131   bspextra->numsegs = numsegs;
7132   segpartptr = bspextra->segparthead;
7133   if (numsegs < 1 || segpartptr == NULL) return;
7134 
7135   partsByLoc = (SMSeqIdxPtr PNTR) MemNew (sizeof (SMSeqIdxPtr) * (numsegs + 1));
7136   bspextra->partsByLoc = partsByLoc;
7137 
7138   if (partsByLoc != NULL) {
7139     i = 0;
7140     while (i < numsegs && segpartptr != NULL) {
7141       partsByLoc [i] = segpartptr;
7142       segpartptr = segpartptr->next;
7143       i++;
7144     }
7145 
7146     partsBySeqId = (SMSeqIdxPtr PNTR) MemNew (sizeof (SMSeqIdxPtr) * (numsegs + 1));
7147     bspextra->partsBySeqId = partsBySeqId;
7148 
7149     if (partsBySeqId != NULL) {
7150       for (i = 0; i < numsegs; i++) {
7151         partsBySeqId [i] = partsByLoc [i];
7152       }
7153 
7154       /* sort array by SeqId for binary search */
7155 
7156       StableMergeSort ((Pointer) partsBySeqId, numsegs, sizeof (SMSeqIdxPtr), SortSeqIdxArray);
7157     }
7158 
7159   }
7160 }
7161 
7162 /*****************************************************************************
7163 *
7164 *   IndexRecordedFeatures callback builds sorted arrays of features and genes
7165 *
7166 *****************************************************************************/
7167 
IndexRecordedFeatures(SeqEntryPtr sep,Boolean dorevfeats,Uint4 baseItemID)7168 static void IndexRecordedFeatures (SeqEntryPtr sep, Boolean dorevfeats, Uint4 baseItemID)
7169 
7170 {
7171   BioseqPtr           bsp;
7172   BioseqExtraPtr      bspextra;
7173   BioseqSetPtr        bssp;
7174   SeqFeatPtr          cds;
7175   SMFeatBlockPtr      curr;
7176   SeqLocPtr           dnaloc;
7177   SMFeatItemPtr PNTR  featsByID;
7178   SMFeatItemPtr PNTR  featsBySfp;
7179   SMFeatItemPtr PNTR  featsByPos;
7180   SMFeatItemPtr PNTR  featsByRev;
7181   SMFeatItemPtr PNTR  featsByLabel;
7182   SMFeatItemPtr PNTR  genesByLocusTag;
7183   SMFeatItemPtr PNTR  genesByPos;
7184   Int4                i;
7185   Int4                j;
7186   SMFeatItemPtr       item;
7187   SMFeatItemPtr       last;
7188   BioseqPtr           nuc;
7189   Int4                numfeats;
7190   Int4                numgenes;
7191   ObjMgrDataPtr       omdp;
7192   Int4                pt;
7193   SeqLocPtr           segloc;
7194   SeqFeatPtr          sfp;
7195   SeqLocPtr           slp;
7196   Int4                stop;
7197 
7198   if (sep == NULL) return;
7199   if (IS_Bioseq_set (sep)) {
7200     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7201     if (bssp == NULL) return;
7202     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7203       IndexRecordedFeatures (sep, dorevfeats, baseItemID);
7204     }
7205     return;
7206   }
7207 
7208   if (! IS_Bioseq (sep)) return;
7209   bsp = (BioseqPtr) sep->data.ptrvalue;
7210   if (bsp == NULL) return;
7211 
7212   omdp = SeqMgrGetOmdpForBioseq (bsp);
7213   if (omdp == NULL) return;
7214   bspextra = (BioseqExtraPtr) omdp->extradata;
7215   if (bspextra == NULL) return;
7216 
7217   numfeats = bspextra->numfeats;
7218 
7219   curr = bspextra->featlisthead;
7220 
7221   if (numfeats > 0 && curr != NULL) {
7222 
7223     /* build array of pointers into feature items */
7224 
7225     featsByID = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7226     bspextra->featsByID = featsByID;
7227 
7228     if (featsByID != NULL) {
7229       i = 0;
7230       j = 0;
7231       while (i < numfeats && curr != NULL) {
7232         if (j >= curr->index || j >= bspextra->blocksize) {
7233           j = 0;
7234           curr = curr->next;
7235         }
7236         if (curr != NULL && j < curr->index && curr->data != NULL) {
7237           featsByID [i] = &(curr->data [j]);
7238           i++;
7239           j++;
7240         }
7241       }
7242       if (i < numfeats) {
7243         ErrPostEx (SEV_WARNING, 0, 0, "SeqMgr indexing feature table build problem");
7244       }
7245 
7246       featsBySfp = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7247       bspextra->featsBySfp = featsBySfp;
7248 
7249       if (featsBySfp != NULL) {
7250         for (i = 0; i < numfeats; i++) {
7251           featsBySfp [i] = featsByID [i];
7252         }
7253 
7254         /* sort all features by SeqFeatPtr value */
7255 
7256         StableMergeSort ((VoidPtr) featsBySfp, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListBySfp);
7257       }
7258 
7259       featsByPos = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7260       bspextra->featsByPos = featsByPos;
7261 
7262       if (featsByPos != NULL) {
7263         for (i = 0; i < numfeats; i++) {
7264           featsByPos [i] = featsByID [i];
7265         }
7266 
7267         /* sort all features by feature location on bioseq */
7268 
7269         StableMergeSort ((VoidPtr) featsByPos, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByPos);
7270 
7271         for (i = 0; i < numfeats; i++) {
7272           item = featsByPos [i];
7273           if (item != NULL) {
7274             item->index = i;
7275           }
7276         }
7277 
7278         /* gap feature in record overrides flatfile-generated feature */
7279 
7280         if (baseItemID > 0) {
7281           last = featsByPos [0];
7282           for (i = 1; i < numfeats; i++) {
7283             item = featsByPos [i];
7284             if (item != NULL && last != NULL) {
7285               if (last->subtype == FEATDEF_gap && item->subtype == FEATDEF_gap) {
7286                 if (last->left == item->left && last->right == item->right) {
7287                   if (item->itemID >= baseItemID) {
7288                     item->ignore = TRUE;
7289                   }
7290                 }
7291               }
7292             }
7293             last = item;
7294           }
7295         }
7296 
7297         /* build arrays of sorted gene, mRNA, CDS, publication, and biosource features for lookup by overlap */
7298 
7299         bspextra->genesByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numgenes), 0, FEATDEF_GENE);
7300         bspextra->mRNAsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->nummRNAs), 0, FEATDEF_mRNA);
7301         bspextra->CDSsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numCDSs), 0, FEATDEF_CDS);
7302         bspextra->pubsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numpubs), 0, FEATDEF_PUB);
7303         bspextra->orgsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numorgs), 0, FEATDEF_BIOSRC);
7304         bspextra->operonsByPos = SeqMgrBuildFeatureIndex (bsp, &(bspextra->numoperons), 0, FEATDEF_operon);
7305       }
7306 
7307       if (dorevfeats) {
7308         featsByRev = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7309         bspextra->featsByRev = featsByRev;
7310 
7311         if (featsByRev != NULL) {
7312           for (i = 0; i < numfeats; i++) {
7313             featsByRev [i] = featsByID [i];
7314           }
7315 
7316           /* optionally sort all features by feature reverse location on bioseq */
7317 
7318           StableMergeSort ((VoidPtr) featsByRev, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByRev);
7319         }
7320       }
7321 
7322       featsByLabel = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numfeats + 1));
7323       bspextra->featsByLabel = featsByLabel;
7324 
7325       if (featsByLabel != NULL) {
7326         for (i = 0; i < numfeats; i++) {
7327           featsByLabel [i] = featsByID [i];
7328         }
7329 
7330         /* sort all features by label value */
7331 
7332         StableMergeSort ((VoidPtr) featsByLabel, (size_t) numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByLabel);
7333       }
7334 
7335       genesByPos = bspextra->genesByPos;
7336       numgenes = bspextra->numgenes;
7337       if (genesByPos != NULL && numgenes > 0) {
7338 
7339         genesByLocusTag = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numgenes + 1));
7340         bspextra->genesByLocusTag = genesByLocusTag;
7341 
7342         if (genesByLocusTag != NULL) {
7343           for (i = 0; i < numgenes; i++) {
7344             genesByLocusTag [i] = genesByPos [i];
7345           }
7346 
7347           /* sort by locus_tag value */
7348 
7349           StableMergeSort ((VoidPtr) genesByLocusTag, (size_t) numgenes, sizeof (SMFeatItemPtr), SortFeatItemListByLocusTag);
7350         }
7351       }
7352     }
7353   }
7354 
7355   if (numfeats < 1 || (! ISA_aa (bsp->mol))) return;
7356   cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
7357   if (cds == NULL) return;
7358   nuc = BioseqFindFromSeqLoc (cds->location);
7359   if (nuc == NULL) return;
7360 
7361   featsByPos = bspextra->featsByPos;
7362   if (featsByPos != NULL) {
7363     for (i = 0; i < numfeats; i++) {
7364       item = featsByPos [i];
7365       if (item != NULL) {
7366         sfp = item->sfp;
7367         if (sfp != NULL) {
7368 
7369           /* map to dna (on parts if segmented) */
7370 
7371           dnaloc = aaFeatLoc_to_dnaFeatLoc (cds, sfp->location);
7372           if (dnaloc != NULL) {
7373 
7374             /* map to segmented bioseq coordinates if necessary */
7375 
7376             segloc = SeqLocMergeExEx (nuc, dnaloc, NULL, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE);
7377 
7378             SeqLocFree (dnaloc);
7379             if (segloc != NULL) {
7380 
7381               slp = NULL;
7382               stop = -1;
7383 
7384               /* now find where last point maps on nucleotide for flatfile */
7385 
7386               while ((slp = SeqLocFindNext (segloc, slp)) != NULL) {
7387                 pt = SeqLocStop (slp);
7388                 if (pt != -1) {
7389                   stop = pt;
7390                 }
7391               }
7392               item->dnaStop = stop;
7393 
7394               SeqLocFree (segloc);
7395             }
7396           }
7397         }
7398       }
7399     }
7400   }
7401 }
7402 
7403 /*****************************************************************************
7404 *
7405 *   IndexFeaturesOnEntity makes feature pointers across all Bioseqs in entity
7406 *
7407 *****************************************************************************/
7408 
IndexFeaturesOnEntity(SeqEntryPtr sep,SMFeatItemPtr PNTR featsByID,Int4Ptr countP)7409 static void IndexFeaturesOnEntity (SeqEntryPtr sep, SMFeatItemPtr PNTR featsByID, Int4Ptr countP)
7410 
7411 {
7412   BioseqPtr       bsp;
7413   BioseqExtraPtr  bspextra;
7414   BioseqSetPtr    bssp;
7415   Int4            count;
7416   Int4            i;
7417   Int4            numfeats;
7418   ObjMgrDataPtr   omdp;
7419 
7420   if (sep == NULL || featsByID == NULL || countP == NULL) return;
7421   if (IS_Bioseq_set (sep)) {
7422     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7423     if (bssp == NULL) return;
7424     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7425       IndexFeaturesOnEntity (sep, featsByID, countP);
7426     }
7427     return;
7428   }
7429 
7430   if (! IS_Bioseq (sep)) return;
7431   bsp = (BioseqPtr) sep->data.ptrvalue;
7432   if (bsp == NULL) return;
7433 
7434   omdp = SeqMgrGetOmdpForBioseq (bsp);
7435   if (omdp == NULL) return;
7436   bspextra = (BioseqExtraPtr) omdp->extradata;
7437   if (bspextra == NULL) return;
7438 
7439   numfeats = bspextra->numfeats;
7440   if (bspextra->featsByID != NULL && numfeats > 0) {
7441     count = *countP;
7442 
7443     for (i = 0; i < numfeats; i++, count++) {
7444       featsByID [count] = bspextra->featsByID [i];
7445     }
7446 
7447     *countP = count;
7448   }
7449 }
7450 
7451 /*****************************************************************************
7452 *
7453 *   SortDescItemListByID callback sorts by descriptor itemID
7454 *   SortDescItemListBySdp sorts by descriptor pointer
7455 *   SortDescItemListByIndex sorts by descriptor index
7456 *
7457 *****************************************************************************/
7458 
SortDescItemListByID(VoidPtr vp1,VoidPtr vp2)7459 static int LIBCALLBACK SortDescItemListByID (VoidPtr vp1, VoidPtr vp2)
7460 
7461 {
7462   SMDescItemPtr PNTR  spp1 = vp1;
7463   SMDescItemPtr PNTR  spp2 = vp2;
7464   SMDescItemPtr       sp1;
7465   SMDescItemPtr       sp2;
7466 
7467   if (spp1 == NULL || spp2 == NULL) return 0;
7468   sp1 = *((SMDescItemPtr PNTR) spp1);
7469   sp2 = *((SMDescItemPtr PNTR) spp2);
7470   if (sp1 == NULL || sp2 == NULL) return 0;
7471 
7472   /* sort by descriptor itemID */
7473 
7474   if (sp1->itemID > sp2->itemID) {
7475     return 1;
7476   } else if (sp1->itemID < sp2->itemID) {
7477     return -1;
7478   }
7479 
7480   return 0;
7481 }
7482 
SortDescItemListBySdp(VoidPtr vp1,VoidPtr vp2)7483 static int LIBCALLBACK SortDescItemListBySdp (VoidPtr vp1, VoidPtr vp2)
7484 
7485 {
7486   SMDescItemPtr PNTR  spp1 = vp1;
7487   SMDescItemPtr PNTR  spp2 = vp2;
7488   SMDescItemPtr       sp1;
7489   SMDescItemPtr       sp2;
7490 
7491   if (spp1 == NULL || spp2 == NULL) return 0;
7492   sp1 = *((SMDescItemPtr PNTR) spp1);
7493   sp2 = *((SMDescItemPtr PNTR) spp2);
7494   if (sp1 == NULL || sp2 == NULL) return 0;
7495 
7496   /* sort by SeqDescrPtr value */
7497 
7498   if (sp1->sdp > sp2->sdp) {
7499     return 1;
7500   } else if (sp1->sdp < sp2->sdp) {
7501     return -1;
7502   }
7503 
7504   return 0;
7505 }
7506 
SortDescItemListByIndex(VoidPtr vp1,VoidPtr vp2)7507 static int LIBCALLBACK SortDescItemListByIndex (VoidPtr vp1, VoidPtr vp2)
7508 
7509 {
7510   SMDescItemPtr PNTR  spp1 = vp1;
7511   SMDescItemPtr PNTR  spp2 = vp2;
7512   SMDescItemPtr       sp1;
7513   SMDescItemPtr       sp2;
7514 
7515   if (spp1 == NULL || spp2 == NULL) return 0;
7516   sp1 = *((SMDescItemPtr PNTR) spp1);
7517   sp2 = *((SMDescItemPtr PNTR) spp2);
7518   if (sp1 == NULL || sp2 == NULL) return 0;
7519 
7520   /* sort by descriptor index */
7521 
7522   if (sp1->index > sp2->index) {
7523     return 1;
7524   } else if (sp1->index < sp2->index) {
7525     return -1;
7526   }
7527 
7528   return 0;
7529 }
7530 
7531 /*****************************************************************************
7532 *
7533 *   RecordDescriptorsInBioseqs callback records list of relevant descriptors
7534 *
7535 *****************************************************************************/
7536 
RecordDescriptorsInBioseqs(BioseqPtr bsp,Pointer userdata)7537 static void RecordDescriptorsInBioseqs (BioseqPtr bsp, Pointer userdata)
7538 
7539 {
7540   BioseqExtraPtr      bspextra;
7541   SeqMgrDescContext   context;
7542   ValNodePtr          head = NULL;
7543   ValNodePtr          last = NULL;
7544   Int4                numdescs = 0;
7545   ObjMgrDataPtr       omdp;
7546   SMDescItemPtr       sdip;
7547   SeqDescrPtr         sdp;
7548   ValNodePtr          vnp;
7549 
7550   if (bsp == NULL) return;
7551 
7552   omdp = SeqMgrGetOmdpForBioseq (bsp);
7553   if (omdp == NULL) return;
7554   bspextra = (BioseqExtraPtr) omdp->extradata;
7555   if (bspextra == NULL) return;
7556 
7557   sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &context);
7558   while (sdp != NULL) {
7559 
7560     numdescs++;
7561     sdip = (SMDescItemPtr) MemNew (sizeof (SMDescItem));
7562     if (sdip != NULL) {
7563       vnp = ValNodeNew (last);
7564       if (head == NULL) {
7565         head = vnp;
7566       }
7567       last = vnp;
7568       if (vnp != NULL) {
7569         vnp->data.ptrvalue = (Pointer) sdip;
7570       }
7571       sdip->sdp = sdp;
7572       sdip->sep = context.sep;
7573       sdip->itemID = context.itemID;
7574       sdip->index = context.index;
7575       sdip->level = context.level;
7576       sdip->seqdesctype = context.seqdesctype;
7577     }
7578 
7579     sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &context);
7580   }
7581 
7582   bspextra->desclisthead = head;
7583   bspextra->numdescs = numdescs;
7584 }
7585 
7586 /*****************************************************************************
7587 *
7588 *   RecordDescriptorsOnTopSet callback records list of all descriptors
7589 *
7590 *****************************************************************************/
7591 
7592 typedef struct descindex {
7593   ValNodePtr  deschead;
7594   ValNodePtr  lastdesc;
7595   Int4        numdescs;
7596 } DescIndex, PNTR DescIndexPtr;
7597 
RecordAllDescsCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)7598 static void RecordAllDescsCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
7599 
7600 {
7601   BioseqPtr      bsp;
7602   BioseqSetPtr   bssp;
7603   DescIndexPtr   dxp;
7604   ObjValNodePtr  ovp;
7605   SMDescItemPtr  sdip;
7606   SeqDescrPtr    sdp = NULL;
7607   ValNodePtr     vnp;
7608 
7609   if (sep == NULL || mydata == NULL) return;
7610   dxp = (DescIndexPtr) mydata;
7611 
7612   if (IS_Bioseq (sep)) {
7613     bsp = (BioseqPtr) sep->data.ptrvalue;
7614     if (bsp == NULL) return;
7615     sdp = bsp->descr;
7616   } else if (IS_Bioseq_set (sep)) {
7617     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7618     if (bssp == NULL) return;
7619     sdp = bssp->descr;
7620   } else return;
7621 
7622   while (sdp != NULL) {
7623     (dxp->numdescs)++;
7624     sdip = (SMDescItemPtr) MemNew (sizeof (SMDescItem));
7625     if (sdip != NULL) {
7626       vnp = ValNodeNew (dxp->lastdesc);
7627       if (dxp->deschead == NULL) {
7628         dxp->deschead = vnp;
7629       }
7630       dxp->lastdesc = vnp;
7631       if (vnp != NULL) {
7632         vnp->data.ptrvalue = (Pointer) sdip;
7633       }
7634       sdip->sdp = sdp;
7635       sdip->sep = sep;
7636       if (sdp->extended != 0) {
7637         ovp = (ObjValNodePtr) sdp;
7638         sdip->itemID = ovp->idx.itemID;
7639       }
7640       sdip->index = 0;
7641       sdip->level = indent;
7642       sdip->seqdesctype = sdp->choice;
7643     }
7644     sdp = sdp->next;
7645   }
7646 }
7647 
RecordDescriptorsOnTopSet(SeqEntryPtr sep)7648 static void RecordDescriptorsOnTopSet (SeqEntryPtr sep)
7649 
7650 {
7651   BioseqExtraPtr  bspextra;
7652   BioseqSetPtr    bssp;
7653   DescIndex       dx;
7654   ObjMgrDataPtr   omdp;
7655 
7656   if (sep == NULL) return;
7657   if (! IS_Bioseq_set (sep)) return;
7658 
7659   bssp = (BioseqSetPtr) sep->data.ptrvalue;
7660   if (bssp == NULL) return;
7661 
7662   omdp = SeqMgrGetOmdpForPointer (bssp);
7663   if (omdp == NULL) return;
7664   bspextra = (BioseqExtraPtr) omdp->extradata;
7665   if (bspextra == NULL) {
7666     CreateBioseqExtraBlock (omdp, NULL);
7667     bspextra = (BioseqExtraPtr) omdp->extradata;
7668   }
7669   if (bspextra == NULL) return;
7670 
7671   dx.deschead = NULL;
7672   dx.lastdesc = NULL;
7673   dx.numdescs = 0;
7674 
7675   SeqEntryExplore (sep, (Pointer) &dx, RecordAllDescsCallback);
7676 
7677   bspextra->desclisthead = dx.deschead;
7678   bspextra->numdescs = dx.numdescs;
7679 }
7680 
7681 /*****************************************************************************
7682 *
7683 *   IndexRecordedDescriptors callback builds sorted arrays of descriptors
7684 *
7685 *****************************************************************************/
7686 
IndexRecordedDescriptors(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)7687 static void IndexRecordedDescriptors (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
7688 
7689 {
7690   BioseqPtr           bsp;
7691   BioseqExtraPtr      bspextra;
7692   BioseqSetPtr        bssp;
7693   SMDescItemPtr PNTR  descrsByID;
7694   SMDescItemPtr PNTR  descrsBySdp;
7695   SMDescItemPtr PNTR  descrsByIndex;
7696   ValNodePtr          head;
7697   Int4                i;
7698   Int4                numdescs;
7699   ObjMgrDataPtr       omdp = NULL;
7700   SMDescItemPtr       sdip;
7701   ValNodePtr          vnp;
7702 
7703   if (sep == NULL) return;
7704   if (IS_Bioseq (sep)) {
7705     bsp = (BioseqPtr) sep->data.ptrvalue;
7706     if (bsp == NULL) return;
7707     omdp = SeqMgrGetOmdpForBioseq (bsp);
7708   } else if (IS_Bioseq_set (sep)) {
7709     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7710     if (bssp == NULL) return;
7711     omdp = SeqMgrGetOmdpForPointer (bssp);
7712   } else return;
7713 
7714   if (omdp == NULL) return;
7715   bspextra = (BioseqExtraPtr) omdp->extradata;
7716   if (bspextra == NULL) return;
7717 
7718   head = bspextra->desclisthead;
7719   numdescs = bspextra->numdescs;
7720 
7721   if (head != NULL && numdescs > 0) {
7722 
7723     /* build array of pointers into descriptor items */
7724 
7725     descrsByID = (SMDescItemPtr PNTR) MemNew (sizeof (SMDescItemPtr) * (numdescs + 1));
7726     bspextra->descrsByID = descrsByID;
7727 
7728     descrsBySdp = (SMDescItemPtr PNTR) MemNew (sizeof (SMDescItemPtr) * (numdescs + 1));
7729     bspextra->descrsBySdp = descrsBySdp;
7730 
7731     descrsByIndex = (SMDescItemPtr PNTR) MemNew (sizeof (SMDescItemPtr) * (numdescs + 1));
7732     bspextra->descrsByIndex = descrsByIndex;
7733 
7734     if (descrsByID != NULL && descrsBySdp != NULL && descrsByIndex != NULL) {
7735       for (i = 0, vnp = head; i < numdescs && vnp != NULL; i++, vnp = vnp->next) {
7736         sdip = (SMDescItemPtr) vnp->data.ptrvalue;
7737         if (sdip != NULL) {
7738           descrsByID [i] = sdip;
7739           descrsBySdp [i] = sdip;
7740           descrsByIndex [i] = sdip;
7741         }
7742       }
7743 
7744       /* sort all descriptors by itemID, SeqDescrPtr value, or index */
7745 
7746       StableMergeSort ((VoidPtr) descrsByID, (size_t) numdescs, sizeof (SMDescItemPtr), SortDescItemListByID);
7747       StableMergeSort ((VoidPtr) descrsBySdp, (size_t) numdescs, sizeof (SMDescItemPtr), SortDescItemListBySdp);
7748       StableMergeSort ((VoidPtr) descrsByIndex, (size_t) numdescs, sizeof (SMDescItemPtr), SortDescItemListByIndex);
7749     }
7750   }
7751 }
7752 
7753 /*****************************************************************************
7754 *
7755 *   DoSegmentedProtein needed because SeqIdWithinBioseq may fail for segmented proteins
7756 *
7757 *****************************************************************************/
7758 
DoSegmentedProtein(BioseqPtr bsp,Pointer userdata)7759 static void DoSegmentedProtein (BioseqPtr bsp, Pointer userdata)
7760 
7761 {
7762   BioseqExtraPtr     bspextra;
7763   SeqMgrFeatContext  context;
7764   ObjMgrDataPtr      omdp;
7765   BioseqPtr          parent = NULL;
7766   SeqFeatPtr         sfp;
7767 
7768   if (! ISA_aa (bsp->mol)) return;
7769 
7770    if (bsp->repr != Seq_repr_seg) {
7771     parent = SeqMgrGetParentOfPart (bsp, NULL);
7772     if (parent == NULL) return;
7773   }
7774 
7775   omdp = SeqMgrGetOmdpForBioseq (bsp);
7776   if (omdp == NULL) return;
7777 
7778   bspextra = (BioseqExtraPtr) omdp->extradata;
7779   if (bspextra == NULL) return;
7780 
7781   /* if it already has a best protein feature, return */
7782 
7783   if (bspextra->protFeat != NULL) return;
7784 
7785   /* part of parent inherits best protein from parent */
7786 
7787   if (bsp->repr != Seq_repr_seg && parent != NULL) {
7788     sfp = SeqMgrGetBestProteinFeature (parent, NULL);
7789     bspextra->protFeat = sfp;
7790     return;
7791   }
7792 
7793   /* now check for full-length proteins on segmented parent */
7794 
7795   sfp = SeqMgrGetNextFeatureByLabel (bsp, NULL, SEQFEAT_PROT, 0, &context);
7796   while (sfp != NULL) {
7797     if (context.left == 0 && context.right == bsp->length - 1) {
7798       bspextra->protFeat = sfp;
7799     }
7800 
7801     sfp = SeqMgrGetNextFeatureByLabel (bsp, sfp, SEQFEAT_PROT, 0, &context);
7802   }
7803 }
7804 
7805 /*****************************************************************************
7806 *
7807 *   IndexAnnotDescsOnBioseqs
7808 *
7809 *****************************************************************************/
7810 
SortAbpVnpByBsp(VoidPtr ptr1,VoidPtr ptr2)7811 static int LIBCALLBACK SortAbpVnpByBsp (VoidPtr ptr1, VoidPtr ptr2)
7812 
7813 {
7814   AdpBspPtr      abp1, abp2;
7815   AnnotDescPtr   adp1, adp2;
7816   BioseqPtr      bsp1, bsp2;
7817   ObjValNodePtr  ovp1, ovp2;
7818   ValNodePtr     vnp1, vnp2;
7819 
7820   if (ptr1 == NULL || ptr2 == NULL) return 0;
7821   vnp1 = *((ValNodePtr PNTR) ptr1);
7822   vnp2 = *((ValNodePtr PNTR) ptr2);
7823   if (vnp1 == NULL || vnp2 == NULL) return 0;
7824   abp1 = (AdpBspPtr) vnp1->data.ptrvalue;
7825   abp2 = (AdpBspPtr) vnp2->data.ptrvalue;
7826   if (abp1 == NULL || abp2 == NULL) return 0;
7827   bsp1 = (BioseqPtr) abp1->bsp;
7828   bsp2 = (BioseqPtr) abp2->bsp;
7829   if (bsp1 > bsp2) {
7830     return 1;
7831   } else if (bsp1 < bsp2) {
7832     return -1;
7833   }
7834   adp1 = (AnnotDescPtr) abp1->adp;
7835   adp2 = (AnnotDescPtr) abp2->adp;
7836   if (adp1 == NULL || adp2 == NULL) return 0;
7837   if (adp1->extended != 0 && adp2->extended != 0) {
7838     ovp1 = (ObjValNodePtr) adp1;
7839     ovp2 = (ObjValNodePtr) adp2;
7840     if (ovp1->idx.itemID > ovp2->idx.itemID) {
7841       return 1;
7842     } else if (ovp1->idx.itemID < ovp2->idx.itemID) {
7843       return -1;
7844     }
7845   }
7846   return 0;
7847 }
7848 
GetBspFromVnpAbpBsp(ValNodePtr vnp)7849 static BioseqPtr GetBspFromVnpAbpBsp (
7850   ValNodePtr vnp
7851 )
7852 
7853 {
7854   AdpBspPtr  abp;
7855 
7856   if (vnp == NULL) return NULL;
7857   abp = (AdpBspPtr) vnp->data.ptrvalue;
7858   if (abp == NULL) return NULL;
7859   return abp->bsp;
7860 }
7861 
IndexAnnotDescsOnBioseqs(ValNodePtr adphead)7862 static void IndexAnnotDescsOnBioseqs (
7863   ValNodePtr adphead
7864 )
7865 
7866 {
7867   AdpBspPtr          abp;
7868   Int4               adpcount, count;
7869   AnnotDescPtr PNTR  annotDescByID;
7870   BioseqPtr          bsp;
7871   BioseqExtraPtr     bspextra;
7872   ValNodePtr         nxt, top, vnp;
7873   ObjMgrDataPtr      omdp;
7874 
7875   if (adphead == NULL) return;
7876   top = adphead;
7877 
7878   while (top != NULL) {
7879     bsp = GetBspFromVnpAbpBsp (top);
7880     adpcount = 1;
7881     nxt = top->next;
7882     while (nxt != NULL && GetBspFromVnpAbpBsp (nxt) == bsp) {
7883       adpcount++;
7884       nxt = nxt->next;
7885     }
7886 
7887     if (bsp != NULL) {
7888       omdp = SeqMgrGetOmdpForBioseq (bsp);
7889       if (omdp != NULL && omdp->datatype == OBJ_BIOSEQ) {
7890         CreateBioseqExtraBlock (omdp, NULL);
7891         bspextra = (BioseqExtraPtr) omdp->extradata;
7892         if (bspextra != NULL) {
7893 
7894           annotDescByID = (AnnotDescPtr PNTR) MemNew (sizeof (AnnotDescPtr) * (adpcount + 1));
7895           if (annotDescByID != NULL) {
7896 
7897             for (vnp = top, count = 0; vnp != NULL && count < adpcount; vnp = vnp->next, count++) {
7898               abp = (AdpBspPtr) vnp->data.ptrvalue;
7899               if (abp == NULL) continue;
7900               annotDescByID [count] = abp->adp;
7901             }
7902 
7903             bspextra->annotDescByID = annotDescByID;
7904             bspextra->numannotdesc = adpcount;
7905           }
7906         }
7907       }
7908     }
7909 
7910     top = nxt;
7911   }
7912 }
7913 
IndexFeatIDsOnEntity(BioseqExtraPtr bspextra)7914 static void IndexFeatIDsOnEntity (
7915   BioseqExtraPtr bspextra
7916 )
7917 
7918 {
7919   Char                buf [32];
7920   SMFidItemPtr PNTR   featsByFeatID;
7921   SMFeatItemPtr PNTR  featsByID;
7922   ValNodePtr          head = NULL, last = NULL;
7923   SMFeatItemPtr       item;
7924   Int4                j;
7925   Int4                len;
7926   ObjectIdPtr         oip;
7927   SMFidItemPtr        sfip;
7928   SeqFeatPtr          sfp;
7929   ValNodePtr          vnp;
7930 
7931   if (bspextra == NULL || bspextra->numfeats < 1 || bspextra->featsByID == NULL) return;
7932 
7933   featsByID = bspextra->featsByID;
7934   for (j = 0; j < bspextra->numfeats; j++) {
7935     item = featsByID [j];
7936     if (item == NULL) continue;
7937     if (item->ignore) continue;
7938     sfp = item->sfp;
7939     if (sfp == NULL) continue;
7940     if (sfp->id.choice != 3) continue;
7941     oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
7942     if (oip == NULL) continue;
7943     sfip = (SMFidItemPtr) MemNew (sizeof (SMFidItem));
7944     if (sfip == NULL) continue;
7945     if (StringDoesHaveText (oip->str)) {
7946       sfip->fid = StringSave (oip->str);
7947     } else {
7948       sprintf (buf, "%ld", (long) oip->id);
7949       sfip->fid = StringSave (buf);
7950     }
7951     sfip->feat = item;
7952     vnp = ValNodeAddPointer (&last, 0, (Pointer) sfip);
7953     if (head == NULL) {
7954       head = vnp;
7955     }
7956     last = vnp;
7957   }
7958 
7959   len = ValNodeLen (head);
7960   if (len < 1) return;
7961   featsByFeatID = (SMFidItemPtr PNTR) MemNew (sizeof (SMFidItemPtr) * (len + 1));
7962   if (featsByFeatID != NULL) {
7963     for (vnp = head, j = 0; vnp != NULL; vnp = vnp->next, j++) {
7964       sfip = (SMFidItemPtr) vnp->data.ptrvalue;
7965       if (sfip == NULL) continue;
7966       featsByFeatID [j] = sfip;
7967     }
7968 
7969     /* sort all features on entity-wide list by itemID */
7970 
7971     StableMergeSort ((VoidPtr) featsByFeatID, (size_t) len, sizeof (SMFidItemPtr), SortFidListByFeatID);
7972 
7973     bspextra->featsByFeatID = featsByFeatID;
7974     bspextra->numfids = len;
7975   }
7976   ValNodeFree (head);
7977 }
7978 
7979 /*****************************************************************************
7980 *
7981 *   SeqMgrReindexBioseqExtraData refreshes internal indices for rapid retrieval
7982 *
7983 *****************************************************************************/
7984 
s_DoSeqMgrIndexFeatures(Uint2 entityID,Pointer ptr,Boolean flip,Boolean dorevfeats,ValNodePtr extra)7985 static Uint2 LIBCALL s_DoSeqMgrIndexFeatures (
7986   Uint2 entityID,
7987   Pointer ptr,
7988   Boolean flip,
7989   Boolean dorevfeats,
7990   ValNodePtr extra
7991 )
7992 
7993 {
7994   AdpBspPtr           abp;
7995   AnnotDescPtr PNTR   annotDescByID;
7996   Uint4               baseItemID = 0;
7997   BioseqPtr           bsp;
7998   BioseqExtraPtr      bspextra;
7999   Int4                count;
8000   ExtraIndex          exind;
8001   SMFeatItemPtr PNTR  featsByID;
8002   BioseqPtr           lastsegbsp = NULL;
8003   Boolean             objMgrFilter [OBJ_MAX];
8004   SeqEntryPtr         oldscope;
8005   ObjMgrDataPtr       omdp;
8006   ValNodePtr          publist;
8007   SeqAnnotPtr         sap;
8008   SeqEntryPtr         sep;
8009   SeqFeatPtr          sfp;
8010   ValNodePtr          vnp;
8011 
8012   if (entityID == 0) {
8013     entityID = ObjMgrGetEntityIDForPointer (ptr);
8014   }
8015   if (entityID == 0) return 0;
8016 
8017   /* reset any existing index data on all bioseqs in entity */
8018 
8019   SeqMgrClearFeatureIndexes (entityID, NULL);
8020 
8021   /* want to scope to bioseqs within the entity, to allow for colliding IDs */
8022 
8023   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
8024 
8025   /* make top SeqEntry if only Bioseq or BioseqSet was read */
8026 
8027   if (sep == NULL) {
8028     omdp = ObjMgrGetData (entityID);
8029     if (omdp != NULL) {
8030       if (omdp->datatype == OBJ_BIOSEQ || omdp->datatype == OBJ_BIOSEQSET) {
8031         sep = SeqEntryNew ();
8032         if (sep != NULL) {
8033           if (omdp->datatype == OBJ_BIOSEQ) {
8034             sep->choice = 1;
8035             sep->data.ptrvalue = omdp->dataptr;
8036             SeqMgrSeqEntry (SM_BIOSEQ, omdp->dataptr, sep);
8037           } else {
8038             sep->choice = 2;
8039             sep->data.ptrvalue = omdp->dataptr;
8040             SeqMgrSeqEntry (SM_BIOSEQSET, omdp->dataptr, sep);
8041           }
8042         }
8043         sep = GetTopSeqEntryForEntityID (entityID);
8044       }
8045     }
8046   }
8047 
8048   if (sep == NULL) return 0;
8049 
8050   /* clean up many old-style ASN.1 problems without changing structure */
8051 
8052   BasicSeqEntryCleanup (sep);
8053 
8054   /* do the same cleanup to remotely fetched feature tables */
8055 
8056   for (vnp = extra; vnp != NULL; vnp = vnp->next) {
8057     bsp = (BioseqPtr) vnp->data.ptrvalue;
8058     if (bsp == NULL) continue;
8059     for (sap = bsp->annot; sap != NULL; sap = sap->next) {
8060       if (sap->type != 1) continue;
8061       for (sfp = (SeqFeatPtr) sap->data; sfp != NULL; sfp = sfp->next) {
8062         publist = NULL;
8063         CleanUpSeqFeat (sfp, FALSE, FALSE, TRUE, TRUE, &publist);
8064         ValNodeFreeData (publist);
8065       }
8066     }
8067   }
8068 
8069   /* set gather/objmgr fields now present in several objects */
8070 
8071   AssignIDsInEntityEx (entityID, 0, NULL, extra);
8072 
8073   /* get first feature itemID in remote feature tables (including generated gaps) */
8074 
8075   for (vnp = extra; vnp != NULL && baseItemID == 0; vnp = vnp->next) {
8076     bsp = (BioseqPtr) vnp->data.ptrvalue;
8077     if (bsp == NULL) continue;
8078     for (sap = bsp->annot; sap != NULL && baseItemID == 0; sap = sap->next) {
8079       if (sap->type != 1) continue;
8080       for (sfp = (SeqFeatPtr) sap->data; sfp != NULL && baseItemID == 0; sfp = sfp->next) {
8081         baseItemID = sfp->idx.itemID;
8082       }
8083     }
8084   }
8085 
8086   /* set scope for FindAppropriateBioseq, FindFirstLocalBioseq */
8087 
8088   oldscope = SeqEntrySetScope (sep);
8089 
8090   /* gather all segmented locations */
8091 
8092   exind.topsep = sep;
8093   exind.lastbsp = NULL;
8094   exind.lastsap = NULL;
8095   exind.lastbssp = NULL;
8096   exind.alignhead = NULL;
8097   exind.lastalign = NULL;
8098   exind.adphead = NULL;
8099   exind.lastadp = NULL;
8100   exind.segpartail = NULL;
8101   exind.bspcount = 0;
8102   exind.aligncount = 0;
8103   exind.descrcount = 0;
8104   exind.featcount = 0;
8105   exind.adpcount = 0;
8106   exind.seqlitid = 0;
8107   exind.flip = flip;
8108 
8109   MemSet ((Pointer) objMgrFilter, 0, sizeof (objMgrFilter));
8110   objMgrFilter [OBJ_BIOSEQ] = TRUE;
8111   /* objMgrFilter [OBJ_BIOSEQSET] = TRUE not needed */
8112   objMgrFilter [OBJ_BIOSEQ_SEG] = TRUE;
8113   objMgrFilter [OBJ_BIOSEQ_DELTA] = TRUE;
8114   GatherObjectsInEntity (entityID, 0, NULL, RecordSegmentsInBioseqs, (Pointer) &exind, objMgrFilter);
8115 
8116   /* build indexes to speed mapping of parts to segmented bioseq */
8117 
8118   lastsegbsp = NULL;
8119 
8120   IndexSegmentedParts (sep, &lastsegbsp);
8121 
8122   /* now gather to get descriptor itemID counts on each bioseq or bioseq set,
8123      and record features on the bioseq indicated by the feature location */
8124 
8125   exind.topsep = sep;
8126   exind.lastbsp = NULL;
8127   exind.lastsap = NULL;
8128   exind.lastbssp = NULL;
8129   exind.alignhead = NULL;
8130   exind.lastalign = NULL;
8131   exind.adphead = NULL;
8132   exind.lastadp = NULL;
8133   exind.segpartail = NULL;
8134   exind.bspcount = 0;
8135   exind.aligncount = 0;
8136   exind.descrcount = 0;
8137   exind.featcount = 0;
8138   exind.adpcount = 0;
8139   exind.seqlitid = 0;
8140   exind.flip = flip;
8141 
8142   MemSet ((Pointer) objMgrFilter, 0, sizeof (objMgrFilter));
8143   objMgrFilter [OBJ_BIOSEQ] = TRUE;
8144   objMgrFilter [OBJ_BIOSEQSET] = TRUE;
8145   objMgrFilter [OBJ_SEQANNOT] = TRUE;
8146   objMgrFilter [OBJ_ANNOTDESC] = TRUE;
8147   objMgrFilter [OBJ_SEQFEAT] = TRUE;
8148   objMgrFilter [OBJ_SEQALIGN] = TRUE;
8149   GatherObjectsInEntityEx (entityID, 0, NULL, RecordFeaturesInBioseqs, (Pointer) &exind, objMgrFilter, extra);
8150 
8151   /* finish building array of sorted features on each indexed bioseq */
8152 
8153   IndexRecordedFeatures (sep, dorevfeats, baseItemID);
8154 
8155   /* set best protein feature for segmented protein bioseqs and their parts */
8156 
8157   VisitBioseqsInSep (sep, NULL, DoSegmentedProtein);
8158 
8159   /* resetset scope used to limit FindAppropriateBioseq, FindFirstLocalBioseq */
8160 
8161   SeqEntrySetScope (oldscope);
8162 
8163   /* stamp top of entity with time of indexing */
8164 
8165   omdp = ObjMgrGetData (entityID);
8166   if (omdp != NULL) {
8167     omdp->indexed = GetSecs ();
8168 
8169     /* alignment ID to SeqAlignPtr index always goes on top of entity */
8170 
8171     SeqMgrIndexAlignments (entityID);
8172 
8173     /* master indexes if top of entity is not a Bioseq */
8174 
8175     if (omdp->datatype != OBJ_BIOSEQ) {
8176 
8177       CreateBioseqExtraBlock (omdp, NULL);
8178       bspextra = (BioseqExtraPtr) omdp->extradata;
8179       if (bspextra != NULL) {
8180 
8181         /* make master index of features by itemID at top of entity */
8182 
8183         if (exind.featcount > 0) {
8184           featsByID = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (exind.featcount + 1));
8185           if (featsByID != NULL) {
8186             count = 0;
8187             IndexFeaturesOnEntity (sep, featsByID, &count);
8188 
8189             /* sort all features on entity-wide list by itemID */
8190 
8191             StableMergeSort ((VoidPtr) featsByID, (size_t) count, sizeof (SMFeatItemPtr), SortFeatItemListByID);
8192 
8193             bspextra->featsByID = featsByID;
8194             bspextra->numfeats = count;
8195           }
8196         }
8197 
8198        /* make master index of annot descs by itemID at top of entity */
8199 
8200         if (exind.adpcount > 0) {
8201           annotDescByID = (AnnotDescPtr PNTR) MemNew (sizeof (AnnotDescPtr) * (exind.adpcount + 1));
8202           if (annotDescByID != NULL) {
8203             for (vnp = exind.adphead, count = 0; vnp != NULL && count < (Int4) exind.adpcount; vnp = vnp->next, count++) {
8204               abp = (AdpBspPtr) vnp->data.ptrvalue;
8205               if (abp == NULL) continue;
8206               annotDescByID [count] = abp->adp;
8207             }
8208 
8209             bspextra->annotDescByID = annotDescByID;
8210             bspextra->numannotdesc = exind.adpcount;
8211           }
8212         }
8213       }
8214     }
8215 
8216     /* add feature ID indexto top of entity */
8217 
8218     CreateBioseqExtraBlock (omdp, NULL);
8219     bspextra = (BioseqExtraPtr) omdp->extradata;
8220     if (bspextra != NULL) {
8221       IndexFeatIDsOnEntity (bspextra);
8222     }
8223   }
8224 
8225   /* finish indexing list of descriptors on each indexed bioseq */
8226 
8227   VisitBioseqsInSep (sep, NULL, RecordDescriptorsInBioseqs);
8228 
8229   /* index annot descs on each target bioseq */
8230 
8231   if (exind.adphead != NULL) {
8232     exind.adphead = ValNodeSort (exind.adphead, SortAbpVnpByBsp);
8233     IndexAnnotDescsOnBioseqs (exind.adphead);
8234   }
8235 
8236   if (IS_Bioseq_set (sep)) {
8237     RecordDescriptorsOnTopSet (sep);
8238   }
8239 
8240   SeqEntryExplore (sep, NULL, IndexRecordedDescriptors);
8241 
8242   /* free chain of SeqAlignPtr now that index is built */
8243 
8244   ValNodeFree (exind.alignhead);
8245 
8246   /* free chain of AdpBspPtr (AnnotDescPtr and BioseqPtr) now that index is built */
8247 
8248   ValNodeFreeData (exind.adphead);
8249 
8250   return entityID;
8251 }
8252 
8253 static TNlmMutex  smp_feat_index_mutex = NULL;
8254 
SeqMgrIndexFeaturesExEx(Uint2 entityID,Pointer ptr,Boolean flip,Boolean dorevfeats,ValNodePtr extra)8255 NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesExEx (
8256   Uint2 entityID,
8257   Pointer ptr,
8258   Boolean flip,
8259   Boolean dorevfeats,
8260   ValNodePtr extra
8261 )
8262 
8263 {
8264   Uint2  eID;
8265   Int4   ret;
8266 
8267   ret = NlmMutexLockEx (&smp_feat_index_mutex);
8268   if (ret) {
8269     ErrPostEx (SEV_FATAL, 0, 0, "SeqMgrIndexFeatures mutex failed [%ld]", (long) ret);
8270     return 0;
8271   }
8272 
8273   eID = s_DoSeqMgrIndexFeatures (entityID, ptr, flip, dorevfeats, extra);
8274 
8275   NlmMutexUnlock (smp_feat_index_mutex);
8276 
8277   return eID;
8278 }
8279 
SeqMgrIndexFeaturesEx(Uint2 entityID,Pointer ptr,Boolean flip,Boolean dorevfeats)8280 NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeaturesEx (
8281   Uint2 entityID,
8282   Pointer ptr,
8283   Boolean flip,
8284   Boolean dorevfeats
8285 )
8286 
8287 {
8288   return SeqMgrIndexFeaturesExEx (entityID, ptr, flip, dorevfeats, NULL);
8289 }
8290 
SeqMgrIndexFeatures(Uint2 entityID,Pointer ptr)8291 NLM_EXTERN Uint2 LIBCALL SeqMgrIndexFeatures (
8292   Uint2 entityID,
8293   Pointer ptr
8294 )
8295 
8296 {
8297   return SeqMgrIndexFeaturesExEx (entityID, ptr, FALSE, FALSE, NULL);
8298 }
8299 
8300 /*****************************************************************************
8301 *
8302 *   SeqMgrIsBioseqIndexed checks for presence of time of indexing stamp
8303 *
8304 *****************************************************************************/
8305 
SeqMgrFeaturesAreIndexed(Uint2 entityID)8306 NLM_EXTERN time_t LIBCALL SeqMgrFeaturesAreIndexed (Uint2 entityID)
8307 
8308 {
8309   ObjMgrDataPtr  omdp;
8310 
8311   if (entityID == 0) return 0;
8312   omdp = ObjMgrGetData (entityID);
8313   if (omdp == NULL) return 0;
8314   return omdp->indexed;
8315 }
8316 
8317 /*****************************************************************************
8318 *
8319 *   SeqMgrGetBestProteinFeature and SeqMgrGetCDSgivenProduct take a protein
8320 *     bioseq to get the best protein feature or encoding CDS
8321 *   SeqMgrGetRNAgivenProduct takes an mRNA (cDNA) bioseq and gets encoding mRNA
8322 *     feature on the genomic bioseq
8323 *
8324 *****************************************************************************/
8325 
SeqMgrGetProtXref(SeqFeatPtr sfp)8326 NLM_EXTERN ProtRefPtr LIBCALL SeqMgrGetProtXref (SeqFeatPtr sfp)
8327 
8328 {
8329   ProtRefPtr      prp = NULL;
8330   SeqFeatXrefPtr  xref;
8331 
8332   if (sfp == NULL) return NULL;
8333   xref = sfp->xref;
8334   while (xref != NULL && xref->data.choice != SEQFEAT_PROT) {
8335     xref = xref->next;
8336   }
8337   if (xref != NULL) {
8338     prp = (ProtRefPtr) xref->data.value.ptrvalue;
8339   }
8340   return prp;
8341 }
8342 
SetContextForFeature(SeqFeatPtr sfp,SeqMgrFeatContext PNTR context,ObjMgrDataPtr omdp)8343 static void SetContextForFeature (SeqFeatPtr sfp, SeqMgrFeatContext PNTR context, ObjMgrDataPtr omdp)
8344 
8345 {
8346   SMFeatItemPtr  best;
8347   SeqFeatPtr     bst;
8348 
8349   if (sfp == NULL || context == NULL || omdp == NULL) return;
8350   best = SeqMgrFindSMFeatItemPtr (sfp);
8351   if (best == NULL) return;
8352   bst = best->sfp;
8353   if (bst != NULL && bst->idx.entityID > 0) {
8354     context->entityID = bst->idx.entityID;
8355   } else {
8356     context->entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
8357   }
8358   context->itemID = best->itemID;
8359   context->sfp = bst;
8360   context->sap = best->sap;
8361   context->bsp = best->bsp;
8362   context->label = best->label;
8363   context->left = best->left;
8364   context->right = best->right;
8365   context->dnaStop = best->dnaStop;
8366   context->partialL = best->partialL;
8367   context->partialR = best->partialR;
8368   context->farloc = best->farloc;
8369   context->bad_order = best->bad_order;
8370   context->mixed_strand = best->mixed_strand;
8371   context->ts_image = best->ts_image;
8372   context->strand = best->strand;
8373   if (bst != NULL) {
8374     context->seqfeattype = bst->data.choice;
8375   } else {
8376     context->seqfeattype = FindFeatFromFeatDefType (best->subtype);
8377   }
8378   context->featdeftype = best->subtype;
8379   context->numivals = best->numivals;
8380   context->ivals = best->ivals;
8381   context->userdata = NULL;
8382   context->omdp = (Pointer) omdp;
8383   context->index = best->index + 1;
8384 }
8385 
SeqMgrGetBestProteinFeature(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8386 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetBestProteinFeature (BioseqPtr bsp,
8387                                                            SeqMgrFeatContext PNTR context)
8388 
8389 {
8390   BioseqExtraPtr  bspextra;
8391   ObjMgrDataPtr   omdp;
8392 
8393   if (context != NULL) {
8394     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8395   }
8396   omdp = SeqMgrGetOmdpForBioseq (bsp);
8397   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8398   bspextra = (BioseqExtraPtr) omdp->extradata;
8399   if (bspextra == NULL) return NULL;
8400   SetContextForFeature (bspextra->protFeat, context, omdp);
8401   return bspextra->protFeat;
8402 }
8403 
SeqMgrGetCDSgivenProduct(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8404 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetCDSgivenProduct (BioseqPtr bsp,
8405                                                         SeqMgrFeatContext PNTR context)
8406 
8407 {
8408   BioseqExtraPtr  bspextra;
8409   ObjMgrDataPtr   omdp;
8410   SeqFeatPtr      sfp;
8411 
8412   if (context != NULL) {
8413     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8414   }
8415   omdp = SeqMgrGetOmdpForBioseq (bsp);
8416   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8417   bspextra = (BioseqExtraPtr) omdp->extradata;
8418   if (bspextra == NULL) return NULL;
8419   sfp = bspextra->cdsOrRnaFeat;
8420   if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return NULL;
8421   SetContextForFeature (sfp, context, omdp);
8422   return sfp;
8423 }
8424 
SeqMgrGetRNAgivenProduct(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8425 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetRNAgivenProduct (BioseqPtr bsp,
8426                                                         SeqMgrFeatContext PNTR context)
8427 
8428 {
8429   BioseqExtraPtr  bspextra;
8430   ObjMgrDataPtr   omdp;
8431   SeqFeatPtr      sfp;
8432 
8433   if (context != NULL) {
8434     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8435   }
8436   omdp = SeqMgrGetOmdpForBioseq (bsp);
8437   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8438   bspextra = (BioseqExtraPtr) omdp->extradata;
8439   if (bspextra == NULL) return NULL;
8440   sfp = bspextra->cdsOrRnaFeat;
8441   if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA) return NULL;
8442   SetContextForFeature (sfp, context, omdp);
8443   return sfp;
8444 }
8445 
SeqMgrGetPROTgivenProduct(BioseqPtr bsp,SeqMgrFeatContext PNTR context)8446 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetPROTgivenProduct (BioseqPtr bsp,
8447                                                          SeqMgrFeatContext PNTR context)
8448 
8449 {
8450   BioseqExtraPtr  bspextra;
8451   ObjMgrDataPtr   omdp;
8452   SeqFeatPtr      sfp;
8453 
8454   if (context != NULL) {
8455     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8456   }
8457   omdp = SeqMgrGetOmdpForBioseq (bsp);
8458   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8459   bspextra = (BioseqExtraPtr) omdp->extradata;
8460   if (bspextra == NULL) return NULL;
8461   sfp = bspextra->cdsOrRnaFeat;
8462   if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return NULL;
8463   SetContextForFeature (sfp, context, omdp);
8464   return sfp;
8465 }
8466 
SeqMgrGetSfpProductList(BioseqPtr bsp)8467 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetSfpProductList (BioseqPtr bsp)
8468 
8469 {
8470   BioseqExtraPtr  bspextra;
8471   ObjMgrDataPtr   omdp;
8472 
8473   omdp = SeqMgrGetOmdpForBioseq (bsp);
8474   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8475   bspextra = (BioseqExtraPtr) omdp->extradata;
8476   if (bspextra == NULL) return NULL;
8477   return bspextra->prodlisthead;
8478 }
8479 
8480 /*****************************************************************************
8481 *
8482 *   SeqMgrGetGeneXref, SeqMgrGeneIsSuppressed, SeqMgrGetProtXref,
8483 *     SeqMgrGetOverlappingGene, and SeqMgrGetOverlappingPub
8484 *
8485 *****************************************************************************/
8486 
HasNoText(CharPtr str)8487 static Boolean HasNoText (CharPtr str)
8488 
8489 {
8490   Char  ch;
8491 
8492   if (str != NULL) {
8493     ch = *str;
8494     while (ch != '\0') {
8495       if (ch > ' ') {
8496         return FALSE;
8497       }
8498       str++;
8499       ch = *str;
8500     }
8501   }
8502   return TRUE;
8503 }
8504 
SeqMgrGetGeneXref(SeqFeatPtr sfp)8505 NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXref (SeqFeatPtr sfp)
8506 
8507 {
8508   GeneRefPtr      grp = NULL;
8509   SeqFeatXrefPtr  xref;
8510 
8511   if (sfp == NULL) return NULL;
8512   xref = sfp->xref;
8513   while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
8514     xref = xref->next;
8515   }
8516   if (xref != NULL) {
8517     grp = (GeneRefPtr) xref->data.value.ptrvalue;
8518   }
8519   return grp;
8520 }
8521 
SeqMgrGetGeneXrefEx(SeqFeatPtr sfp,ObjectIdPtr PNTR oipP)8522 NLM_EXTERN GeneRefPtr LIBCALL SeqMgrGetGeneXrefEx (SeqFeatPtr sfp, ObjectIdPtr PNTR oipP)
8523 
8524 {
8525   GeneRefPtr      grp = NULL;
8526   ObjectIdPtr     oip;
8527   SeqFeatXrefPtr  xref;
8528 
8529   if (oipP != NULL) {
8530     *oipP = NULL;
8531   }
8532   if (sfp == NULL) return NULL;
8533 
8534   /* Look for SeqFeatData xref, maybe with Feature ID as well */
8535   xref = sfp->xref;
8536   while (xref != NULL && xref->data.choice != SEQFEAT_GENE) {
8537     xref = xref->next;
8538   }
8539   if (xref != NULL) {
8540     grp = (GeneRefPtr) xref->data.value.ptrvalue;
8541     if (xref->id.choice == 3) {
8542       oip = (ObjectIdPtr) xref->id.value.ptrvalue;
8543       if (oip != NULL && oipP != NULL) {
8544         *oipP = oip;
8545       }
8546     }
8547     return grp;
8548   }
8549 
8550   /* Look for Feature ID xref on its own */
8551   for (xref = sfp->xref; xref != NULL; xref = xref->next) {
8552     if (xref->id.choice == 3) {
8553       oip = (ObjectIdPtr) xref->id.value.ptrvalue;
8554       if (oip != NULL && oipP != NULL) {
8555         *oipP = oip;
8556       }
8557     }
8558   }
8559 
8560   return NULL;
8561 }
8562 
SeqMgrGeneIsSuppressed(GeneRefPtr grp)8563 NLM_EXTERN Boolean LIBCALL SeqMgrGeneIsSuppressed (GeneRefPtr grp)
8564 
8565 {
8566   if (grp == NULL) return FALSE;
8567   if (grp != NULL && HasNoText (grp->locus) && HasNoText (grp->allele) &&
8568       HasNoText (grp->desc) && HasNoText (grp->maploc) &&
8569       HasNoText (grp->locus_tag) && grp->db == NULL &&
8570       grp->syn == NULL) return TRUE;
8571   return FALSE;
8572 }
8573 
CheckInternalExonBoundaries(Int2 numivalsA,Int4Ptr ivalsA,Int2 numivalsB,Int4Ptr ivalsB)8574 static Boolean CheckInternalExonBoundaries (Int2 numivalsA, Int4Ptr ivalsA, Int2 numivalsB, Int4Ptr ivalsB)
8575 
8576 {
8577   Int2  i;
8578   Int2  j;
8579 
8580   if (numivalsA > numivalsB) return FALSE;
8581   if (ivalsA == NULL || ivalsB == NULL) return TRUE;
8582 
8583   /* scan first exon-intron boundary against candidate start positions */
8584 
8585   for (i = 0; i <= numivalsB - numivalsA; i++) {
8586     if (ivalsA [1] == ivalsB [2 * i + 1]) break;
8587   }
8588   if (i > numivalsB - numivalsA) return FALSE;
8589 
8590   /* scan subsequent exon-intron and intron-exon boundaries */
8591 
8592   for (j = 2; j <= 2 * numivalsA - 2; j++) {
8593     if (ivalsA [j] != ivalsB [2 * i + j]) return FALSE;
8594   }
8595 
8596   return TRUE;
8597 }
8598 
StrandsMatch(Uint1 featstrand,Uint1 locstrand)8599 static Boolean StrandsMatch (Uint1 featstrand, Uint1 locstrand)
8600 
8601 {
8602   if (featstrand == locstrand) return TRUE;
8603   if (locstrand == Seq_strand_unknown && featstrand != Seq_strand_minus) return TRUE;
8604   if (featstrand == Seq_strand_unknown && locstrand != Seq_strand_minus) return TRUE;
8605   if (featstrand == Seq_strand_both && locstrand != Seq_strand_minus) return TRUE;
8606   if (locstrand == Seq_strand_both) return TRUE;
8607   return FALSE;
8608 }
8609 
TestForOverlap(SMFeatItemPtr feat,SeqLocPtr slp,Int4 left,Int4 right,Int2 overlapType,Int2 numivals,Int4Ptr ivals)8610 static Int4 TestForOverlap (SMFeatItemPtr feat, SeqLocPtr slp,
8611                             Int4 left, Int4 right, Int2 overlapType,
8612                             Int2 numivals, Int4Ptr ivals)
8613 
8614 {
8615   SeqLocPtr   a, b;
8616   Int4        diff;
8617   SeqFeatPtr  sfp;
8618 
8619   if (overlapType == SIMPLE_OVERLAP) {
8620 
8621     /* location must merely be overlapped by gene, etc., or either one inside the other */
8622 
8623     if (feat->right >= left && feat->left <= right) {
8624       diff = ABS (left - feat->left) + ABS (feat->right - right);
8625       return diff;
8626     }
8627 
8628     /*
8629     if ((feat->left <= left && feat->right > left) ||
8630         (feat->left < right && feat->right >= right)) {
8631       diff = ABS (left - feat->left) + ABS (feat->right - right);
8632       return diff;
8633     }
8634     */
8635 
8636   } else if (overlapType == CONTAINED_WITHIN) {
8637 
8638     /* requires location to be completely contained within gene, etc. */
8639 
8640     if (feat->left <= left && feat->right >= right) {
8641       diff = (left - feat->left) + (feat->right - right);
8642       return diff;
8643     }
8644 
8645   } else if (overlapType == LOCATION_SUBSET || overlapType == CHECK_INTERVALS) {
8646 
8647     /* requires individual intervals to be completely contained within gene, etc. */
8648     sfp = feat->sfp;
8649     if (sfp != NULL) {
8650       diff = SeqLocAinB (slp, sfp->location);
8651       if (diff >= 0) {
8652         if (overlapType == LOCATION_SUBSET || numivals == 1 ||
8653             CheckInternalExonBoundaries (numivals, ivals, feat->numivals, feat->ivals)) {
8654           return diff;
8655         }
8656       }
8657     }
8658 
8659   } else if (overlapType == INTERVAL_OVERLAP || overlapType == COMMON_INTERVAL) {
8660 
8661     /* requires overlap between at least one pair of intervals (INTERVAL_OVERLAP) */
8662     /* or one complete shared interval (COMMON_INTERVAL) */
8663 
8664     if (feat->right >= left && feat->left <= right) {
8665       sfp = feat->sfp;
8666       if (sfp != NULL) {
8667         a = SeqLocFindNext (slp, NULL);
8668         while (a != NULL) {
8669           b = SeqLocFindNext (sfp->location, NULL);
8670           while (b != NULL) {
8671             if ((overlapType == INTERVAL_OVERLAP
8672                 && SeqLocCompare (a, b) != SLC_NO_MATCH)
8673               || (overlapType == COMMON_INTERVAL
8674                 && SeqLocCompare (a, b) == SLC_A_EQ_B))
8675             {
8676               diff = ABS (left - feat->left) + ABS (feat->right - right);
8677               return diff;
8678             }
8679             b = SeqLocFindNext (sfp->location, b);
8680           }
8681           a = SeqLocFindNext (slp, a);
8682         }
8683       }
8684     }
8685   }
8686   else if (overlapType == RANGE_MATCH)
8687   {
8688       /* left and right ends must match exactly */
8689       if (feat->right == right && feat->left == left)
8690       {
8691         return 0;
8692       }
8693   }
8694 
8695   return -1;
8696 }
8697 
SeqMgrBestOverlapSetContext(SMFeatItemPtr best,ObjMgrDataPtr omdp,Pointer userdata,SeqMgrFeatContext PNTR context)8698 static void SeqMgrBestOverlapSetContext (
8699   SMFeatItemPtr best,
8700   ObjMgrDataPtr omdp,
8701   Pointer userdata,
8702   SeqMgrFeatContext PNTR context
8703 )
8704 
8705 {
8706   SeqFeatPtr  bst;
8707 
8708   if (best != NULL && omdp != NULL && context != NULL) {
8709     bst = best->sfp;
8710     if (bst != NULL && bst->idx.entityID > 0) {
8711       context->entityID = bst->idx.entityID;
8712     } else {
8713       context->entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
8714     }
8715     context->itemID = best->itemID;
8716     context->sfp = best->sfp;
8717     context->sap = best->sap;
8718     context->bsp = best->bsp;
8719     context->label = best->label;
8720     context->left = best->left;
8721     context->right = best->right;
8722     context->dnaStop = best->dnaStop;
8723     context->partialL = best->partialL;
8724     context->partialR = best->partialR;
8725     context->farloc = best->farloc;
8726     context->bad_order = best->bad_order;
8727     context->mixed_strand = best->mixed_strand;
8728     context->ts_image = best->ts_image;
8729     context->strand = best->strand;
8730     if (bst != NULL) {
8731       context->seqfeattype = bst->data.choice;
8732     } else {
8733       context->seqfeattype = FindFeatFromFeatDefType (best->subtype);
8734     }
8735     context->featdeftype = best->subtype;
8736     context->numivals = best->numivals;
8737     context->ivals = best->ivals;
8738     context->userdata = userdata;
8739     context->omdp = (Pointer) omdp;
8740     context->index = best->index + 1;
8741   }
8742 }
8743 
TransSplicedStrandsMatch(Uint1 locstrand,SeqLocPtr slp,SMFeatItemPtr feat)8744 static Boolean TransSplicedStrandsMatch (Uint1 locstrand, SeqLocPtr slp, SMFeatItemPtr feat)
8745 
8746 {
8747   Uint1       featstrand;
8748   SeqLocPtr   loc;
8749   SeqFeatPtr  sfp;
8750 
8751   if (slp == NULL || feat == NULL) return FALSE;
8752   sfp = feat->sfp;
8753   if (sfp == NULL) return FALSE;
8754 
8755   if (! sfp->excpt) return FALSE;
8756   if (StringISearch (sfp->except_text, "trans-splicing") == NULL) return FALSE;
8757 
8758   loc = SeqLocFindNext (sfp->location, NULL);
8759   while (loc != NULL) {
8760     if (SeqLocAinB (slp, loc) >= 0) {
8761       featstrand = SeqLocStrand (loc);
8762       if (StrandsMatch (featstrand, locstrand)) return TRUE;
8763     }
8764     loc = SeqLocFindNext (sfp->location, loc);
8765   }
8766 
8767   return FALSE;
8768 }
8769 
SeqMgrGetBestOverlappingFeat(SeqLocPtr slp,Uint2 subtype,SMFeatItemPtr PNTR array,Int4 num,Int4Ptr pos,Int2 overlapType,SeqMgrFeatContext PNTR context,Int2Ptr count,Pointer userdata,SeqMgrFeatExploreProc userfunc,Boolean special)8770 static SeqFeatPtr SeqMgrGetBestOverlappingFeat (
8771   SeqLocPtr slp,
8772   Uint2 subtype,
8773   SMFeatItemPtr PNTR array,
8774   Int4 num,
8775   Int4Ptr pos,
8776   Int2 overlapType,
8777   SeqMgrFeatContext PNTR context,
8778   Int2Ptr count,
8779   Pointer userdata,
8780   SeqMgrFeatExploreProc userfunc,
8781   Boolean special
8782 )
8783 
8784 {
8785   SMFeatItemPtr   best = NULL;
8786   BioseqPtr       bsp;
8787   BioseqExtraPtr  bspextra;
8788   Int4            diff;
8789   Uint2           entityID;
8790   SMFeatItemPtr   feat;
8791   Int4            from;
8792   Boolean         goOn = TRUE;
8793   Int4            hier = -1;
8794   Int2            i;
8795   Uint4           index = 0;
8796   Int4Ptr         ivals = NULL;
8797   Int4            L;
8798   Int4            left;
8799   SeqLocPtr       loc;
8800   Int4            max = INT4_MAX;
8801   Boolean         may_be_trans_spliced;
8802   Int4            mid;
8803   Int2            numivals = 0;
8804   SeqEntryPtr     oldscope;
8805   ObjMgrDataPtr   omdp;
8806   SMFeatItemPtr   prev;
8807   Int4            R;
8808   Int4            right;
8809   SeqEntryPtr     sep;
8810   Uint1           strand;
8811   Int4            swap;
8812   SeqLocPtr       tmp;
8813   Int4            to;
8814 
8815   if (context != NULL) {
8816     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
8817   }
8818   if (pos != NULL) {
8819     *pos = 0;
8820   }
8821   if (count != NULL) {
8822     *count = 0;
8823   }
8824   if (slp == NULL) return NULL;
8825 
8826   bsp = FindAppropriateBioseq (slp, NULL, NULL);
8827   if (bsp == NULL) {
8828     bsp = FindFirstLocalBioseq (slp);
8829   }
8830   if (bsp == NULL) return NULL;
8831   omdp = SeqMgrGetOmdpForBioseq (bsp);
8832   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
8833 
8834   bspextra = (BioseqExtraPtr) omdp->extradata;
8835   if (bspextra == NULL) return NULL;
8836 
8837   switch (subtype) {
8838     case FEATDEF_GENE :
8839       array = bspextra->genesByPos;
8840       num = bspextra->numgenes;
8841       break;
8842     case FEATDEF_CDS :
8843       array = bspextra->CDSsByPos;
8844       num = bspextra->numCDSs;
8845       break;
8846     case FEATDEF_mRNA :
8847       array = bspextra->mRNAsByPos;
8848       num = bspextra->nummRNAs;
8849       break;
8850     case FEATDEF_PUB :
8851       array = bspextra->pubsByPos;
8852       num = bspextra->numpubs;
8853       break;
8854     case FEATDEF_BIOSRC :
8855       array = bspextra->orgsByPos;
8856       num = bspextra->numorgs;
8857       break;
8858       case FEATDEF_operon :
8859       array = bspextra->operonsByPos;
8860       num = bspextra->numoperons;
8861     default :
8862       break;
8863   }
8864 
8865   if (array == NULL || num < 1) return NULL;
8866 
8867   entityID = bsp->idx.entityID;
8868   if (entityID < 1) {
8869     entityID = ObjMgrGetEntityIDForPointer (bsp);
8870   }
8871   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
8872   oldscope = SeqEntrySetScope (sep);
8873 
8874   left = GetOffsetInNearBioseq (slp, bsp, SEQLOC_LEFT_END);
8875   right = GetOffsetInNearBioseq (slp, bsp, SEQLOC_RIGHT_END);
8876 
8877   SeqEntrySetScope (oldscope);
8878 
8879   if (left == -1 || right == -1) return NULL;
8880 
8881   /* if feature spans origin, normalize with left < 0 */
8882 
8883   if (left > right && bsp->topology == TOPOLOGY_CIRCULAR) {
8884     left -= bsp->length;
8885   }
8886 
8887   /* some trans-spliced locations can confound GetOffsetInNearBioseq, so normalize here */
8888 
8889   if (left > right) {
8890     swap = left;
8891     left = right;
8892     right = swap;
8893   }
8894 
8895   /* binary search to leftmost candidate within the xxxByPos array */
8896 
8897   L = 0;
8898   R = num - 1;
8899   while (L < R) {
8900     mid = (L + R) / 2;
8901     feat = array [mid];
8902     if (feat != NULL && feat->right < left) {
8903       L = mid + 1;
8904     } else {
8905       R = mid;
8906     }
8907   }
8908 
8909   feat = array [R];
8910 
8911   if (feat != NULL && feat->left > left && R > 0) {
8912 
8913     /* if hit is already past location, location was in between local hits */
8914 
8915     prev = array [R - 1];
8916     if (prev != NULL && prev->overlap != -1) {
8917 
8918       /* backup R by one to check appropriate overlap hierarchy */
8919 
8920       R--;
8921       feat = array [R];
8922     }
8923   }
8924 
8925   if (feat != NULL) {
8926     hier = feat->overlap;
8927   }
8928 
8929   loc = SeqLocMergeExEx (bsp, slp, NULL, FALSE, /* TRUE */ FALSE, FALSE, FALSE, TRUE, TRUE, FALSE);
8930   strand = SeqLocStrand (loc);
8931   if (overlapType == CHECK_INTERVALS) {
8932     tmp = NULL;
8933     while ((tmp = SeqLocFindNext (loc, tmp)) != NULL) {
8934       numivals++;
8935     }
8936     if (numivals > 0) {
8937       ivals = MemNew (sizeof (Int4) * (numivals * 2));
8938       if (ivals != NULL) {
8939         tmp = NULL;
8940         i = 0;
8941         while ((tmp = SeqLocFindNext (loc, tmp)) != NULL) {
8942           from = SeqLocStart (tmp);
8943           to = SeqLocStop (tmp);
8944           if (strand == Seq_strand_minus) {
8945             swap = from;
8946             from = to;
8947             to = swap;
8948           }
8949           ivals [i] = from;
8950           i++;
8951           ivals [i] = to;
8952           i++;
8953         }
8954       }
8955     }
8956   }
8957   SeqLocFree (loc);
8958 
8959   /* linear scan to smallest covering gene, publication, biosource, etc. */
8960 
8961   while (R < num && feat != NULL && feat->left <= right) {
8962 
8963     if ((! feat->ignore) || userfunc == NULL) {
8964 
8965       /* requires feature to be contained within gene, etc. */
8966 
8967       may_be_trans_spliced = (Boolean) (special && (feat->bad_order || feat->mixed_strand));
8968       if (may_be_trans_spliced) {
8969         diff = TestForOverlap (feat, slp, left, right, LOCATION_SUBSET, numivals, ivals);
8970       } else {
8971         diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
8972       }
8973       if (diff >= 0) {
8974 
8975         if (StrandsMatch (feat->strand, strand) || (may_be_trans_spliced && TransSplicedStrandsMatch (strand, slp, feat))) {
8976 
8977           if (userfunc != NULL && context != NULL && goOn) {
8978             SeqMgrBestOverlapSetContext (feat, omdp, userdata, context);
8979             if (! userfunc (feat->sfp, context)) {
8980               goOn = FALSE;
8981             }
8982             if (count != NULL) {
8983               (*count)++;
8984             }
8985           }
8986 
8987           /* diff = (left - feat->left) + (feat->right - right); */
8988           /* Don't need to check ties because in this loop we always hit the leftmost first */
8989           if ( diff < max )
8990           {
8991             best = feat;
8992             index = R;
8993             max = diff;
8994           }
8995         }
8996       }
8997     }
8998     R++;
8999     feat = array [R];
9000   }
9001 
9002   /* also will go up gene overlap hierarchy pointers from original R hit */
9003 
9004   while (hier != -1) {
9005 
9006     feat = array [hier];
9007     if (feat != NULL && ((! feat->ignore) || userfunc == NULL)) {
9008 
9009       may_be_trans_spliced = (Boolean) (special && (feat->bad_order || feat->mixed_strand));
9010       if (may_be_trans_spliced) {
9011         diff = TestForOverlap (feat, slp, left, right, LOCATION_SUBSET, numivals, ivals);
9012       } else {
9013         diff = TestForOverlap (feat, slp, left, right, overlapType, numivals, ivals);
9014       }
9015       if (diff >= 0) {
9016 
9017         if (StrandsMatch (feat->strand, strand) || (may_be_trans_spliced && TransSplicedStrandsMatch (strand, slp, feat))) {
9018 
9019           if (userfunc != NULL && context != NULL && goOn) {
9020             SeqMgrBestOverlapSetContext (feat, omdp, userdata, context);
9021             if (! userfunc (feat->sfp, context)) {
9022               goOn = FALSE;
9023             }
9024             if (count != NULL) {
9025               (*count)++;
9026             }
9027           }
9028 
9029           /* diff = (left - feat->left) + (feat->right - right); */
9030           /* For ties, first wins */
9031           if (diff < max || ( diff == max && hier < index )) {
9032             best = feat;
9033             index = hier;
9034             max = diff;
9035           }
9036         }
9037       }
9038       hier = feat->overlap;
9039     } else {
9040       hier = -1;
9041     }
9042   }
9043 
9044   if (ivals != NULL) {
9045     ivals = MemFree (ivals);
9046   }
9047 
9048   if (best != NULL) {
9049     if (pos != NULL) {
9050       *pos = index + 1;
9051     }
9052     if (context != NULL) {
9053       SeqMgrBestOverlapSetContext (best, omdp, userdata, context);
9054     }
9055     return best->sfp;
9056   }
9057 
9058   return NULL;
9059 }
9060 
TestFeatOverlap(SeqFeatPtr sfpA,SeqFeatPtr sfpB,Int2 overlapType)9061 NLM_EXTERN Int4 TestFeatOverlap (SeqFeatPtr sfpA, SeqFeatPtr sfpB, Int2 overlapType)
9062 
9063 {
9064   Int4           diff;
9065   SMFeatItemPtr  sfipA, sfipB;
9066 
9067   if (sfpA == NULL || sfpB == NULL) return -1;
9068   sfipA = SeqMgrFindSMFeatItemPtr (sfpA);
9069   sfipB = SeqMgrFindSMFeatItemPtr (sfpB);
9070   if (sfipA == NULL || sfipB == NULL) return -1;
9071 
9072   diff = TestForOverlap (sfipB, sfpA->location, sfipA->left, sfipA->right,
9073                          overlapType, sfipA->numivals, sfipA->ivals);
9074   if (diff < 0) return -1;
9075 
9076   if (StrandsMatch (sfipB->strand, sfipA->strand)) {
9077     return diff;
9078   }
9079 
9080   return -1;
9081 }
9082 
SeqMgrGetOverlappingGene(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9083 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingGene (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9084 
9085 {
9086   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_GENE, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, TRUE);
9087 }
9088 
SeqMgrGetOverlappingmRNA(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9089 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9090 
9091 {
9092   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9093 }
9094 
SeqMgrGetLocationSupersetmRNA(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9095 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetLocationSupersetmRNA (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9096 
9097 {
9098   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_mRNA, NULL, 0, NULL, LOCATION_SUBSET, context, NULL, NULL, NULL, FALSE);
9099 }
9100 
SeqMgrGetOverlappingCDS(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9101 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingCDS (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9102 
9103 {
9104   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_CDS, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9105 }
9106 
SeqMgrGetOverlappingPub(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9107 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingPub (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9108 
9109 {
9110   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_PUB, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9111 }
9112 
SeqMgrGetOverlappingSource(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9113 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingSource (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9114 
9115 {
9116   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_BIOSRC, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9117 }
9118 
SeqMgrGetOverlappingOperon(SeqLocPtr slp,SeqMgrFeatContext PNTR context)9119 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingOperon (SeqLocPtr slp, SeqMgrFeatContext PNTR context)
9120 
9121 {
9122   return SeqMgrGetBestOverlappingFeat (slp, FEATDEF_operon, NULL, 0, NULL, CONTAINED_WITHIN, context, NULL, NULL, NULL, FALSE);
9123 }
9124 
9125 /*****************************************************************************
9126 *
9127 *   SeqMgrGetFeatureByLabel returns a feature with the desired label
9128 *   If desired, place a SeqMgrFeatContext data structure on the stack, and pass
9129 *     in &context as the last parameter
9130 *
9131 *****************************************************************************/
9132 
GetLabelOrLocusTag(SMFeatItemPtr feat,Boolean byLocusTag)9133 static CharPtr GetLabelOrLocusTag (SMFeatItemPtr feat, Boolean byLocusTag)
9134 
9135 {
9136   GeneRefPtr  grp;
9137   SeqFeatPtr  sfp;
9138 
9139   if (feat == NULL) return NULL;
9140   if (byLocusTag) {
9141     sfp = feat->sfp;
9142     if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return NULL;
9143     grp = (GeneRefPtr) sfp->data.value.ptrvalue;
9144     if (grp == NULL) return NULL;
9145     return grp->locus_tag;
9146   }
9147   return feat->label;
9148 }
9149 
SeqMgrGetFeatureByLabelEx(BioseqPtr bsp,CharPtr label,Uint1 seqFeatChoice,Uint1 featDefChoice,Boolean byLocusTag,SeqMgrFeatContext PNTR context)9150 static SeqFeatPtr LIBCALL SeqMgrGetFeatureByLabelEx (BioseqPtr bsp, CharPtr label,
9151                                                      Uint1 seqFeatChoice, Uint1 featDefChoice,
9152                                                      Boolean byLocusTag, SeqMgrFeatContext PNTR context)
9153 
9154 {
9155   SMFeatItemPtr PNTR  array;
9156   BioseqExtraPtr      bspextra;
9157   Uint2               entityID;
9158   SMFeatItemPtr       feat;
9159   Int4                L;
9160   Int4                mid;
9161   Int4                num;
9162   ObjMgrDataPtr       omdp;
9163   Int4                R;
9164   Uint1               seqfeattype;
9165   SeqFeatPtr          sfp;
9166 
9167   if (context != NULL) {
9168     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
9169   }
9170 
9171   if (bsp == NULL || StringHasNoText (label)) return NULL;
9172 
9173   omdp = SeqMgrGetOmdpForBioseq (bsp);
9174   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9175 
9176   bspextra = (BioseqExtraPtr) omdp->extradata;
9177   if (bspextra == NULL) return NULL;
9178   if (byLocusTag) {
9179     array = bspextra->genesByLocusTag;
9180     num = bspextra->numgenes;
9181   } else {
9182     array = bspextra->featsByLabel;
9183     num = bspextra->numfeats;
9184   }
9185   if (array == NULL || num < 1) return NULL;
9186 
9187   entityID = bsp->idx.entityID;
9188   if (entityID < 1) {
9189     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9190   }
9191 
9192   /* binary search to leftmost candidate within the featsByLabel array */
9193 
9194   L = 0;
9195   R = num - 1;
9196   while (L < R) {
9197     mid = (L + R) / 2;
9198     feat = array [mid];
9199     if (feat != NULL && StringICmp (GetLabelOrLocusTag (feat, byLocusTag), label) < 0) {
9200       L = mid + 1;
9201     } else {
9202       R = mid;
9203     }
9204   }
9205 
9206   feat = array [R];
9207 
9208   /* linear scan to find desired label on desired feature type */
9209 
9210   while (R < num && feat != NULL && StringICmp (GetLabelOrLocusTag (feat, byLocusTag), label) == 0) {
9211     sfp = feat->sfp;
9212     if (sfp != NULL) {
9213       seqfeattype = sfp->data.choice;
9214       if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9215           (featDefChoice == 0 || feat->subtype == featDefChoice) &&
9216           (! feat->ignore)) {
9217         if (context != NULL) {
9218           context->entityID = entityID;
9219           context->itemID = feat->itemID;
9220           context->sfp = sfp;
9221           context->sap = feat->sap;
9222           context->bsp = feat->bsp;
9223           context->label = GetLabelOrLocusTag (feat, byLocusTag);
9224           context->left = feat->left;
9225           context->right = feat->right;
9226           context->dnaStop = feat->dnaStop;
9227           context->partialL = feat->partialL;
9228           context->partialR = feat->partialR;
9229           context->farloc = feat->farloc;
9230           context->bad_order = feat->bad_order;
9231           context->mixed_strand = feat->mixed_strand;
9232           context->ts_image = feat->ts_image;
9233           context->strand = feat->strand;
9234           context->seqfeattype = seqfeattype;
9235           context->featdeftype = feat->subtype;
9236           context->numivals = feat->numivals;
9237           context->ivals = feat->ivals;
9238           context->userdata = NULL;
9239           context->omdp = (Pointer) omdp;
9240           context->index = R + 1;
9241         }
9242         return sfp;
9243       }
9244     }
9245 
9246     R++;
9247     feat = array [R];
9248   }
9249 
9250   return NULL;
9251 }
9252 
SeqMgrGetFeatureByLabel(BioseqPtr bsp,CharPtr label,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context)9253 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureByLabel (BioseqPtr bsp, CharPtr label,
9254                                                        Uint1 seqFeatChoice, Uint1 featDefChoice,
9255                                                        SeqMgrFeatContext PNTR context)
9256 
9257 {
9258   return SeqMgrGetFeatureByLabelEx (bsp, label, seqFeatChoice, featDefChoice, FALSE, context);
9259 }
9260 
SeqMgrGetGeneByLocusTag(BioseqPtr bsp,CharPtr locusTag,SeqMgrFeatContext PNTR context)9261 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetGeneByLocusTag (BioseqPtr bsp, CharPtr locusTag,
9262                                                        SeqMgrFeatContext PNTR context)
9263 
9264 {
9265   return SeqMgrGetFeatureByLabelEx (bsp, locusTag, SEQFEAT_GENE, 0, TRUE, context);
9266 }
9267 
SeqMgrGetFeatureByFeatID(Uint2 entityID,BioseqPtr bsp,CharPtr featid,SeqFeatXrefPtr xref,SeqMgrFeatContext PNTR context)9268 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureByFeatID (
9269   Uint2 entityID,
9270   BioseqPtr bsp,
9271   CharPtr featid,
9272   SeqFeatXrefPtr xref,
9273   SeqMgrFeatContext PNTR context
9274 )
9275 
9276 {
9277   SMFidItemPtr PNTR  array;
9278   BioseqExtraPtr     bspextra;
9279   Char               buf [32];
9280   SMFeatItemPtr      feat;
9281   SMFidItemPtr       item;
9282   Int4               L;
9283   Int4               mid;
9284   Int4               num;
9285   ObjectIdPtr        oip;
9286   ObjMgrDataPtr      omdp;
9287   Int4               R;
9288   SeqFeatPtr         sfp;
9289 
9290   if (context != NULL) {
9291     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
9292   }
9293 
9294   if (entityID > 0) {
9295     omdp = ObjMgrGetData (entityID);
9296     if (omdp == NULL) return NULL;
9297   } else {
9298     if (bsp == NULL) return NULL;
9299     omdp = SeqMgrGetOmdpForBioseq (bsp);
9300     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9301   }
9302   bspextra = (BioseqExtraPtr) omdp->extradata;
9303   if (bspextra == NULL) return NULL;
9304 
9305   /* first try array sorted by itemID value */
9306 
9307   array = bspextra->featsByFeatID;
9308   num = bspextra->numfids;
9309   if (array == NULL || num < 1) return NULL;
9310 
9311   if (StringHasNoText (featid) && xref != NULL && xref->id.choice == 3) {
9312     oip = (ObjectIdPtr) xref->id.value.ptrvalue;
9313     if (oip != NULL) {
9314       if (StringDoesHaveText (oip->str)) {
9315         featid = oip->str;
9316       } else {
9317         sprintf (buf, "%ld", (long) oip->id);
9318         featid = buf;
9319       }
9320     }
9321   }
9322   if (StringHasNoText (featid)) return NULL;
9323 
9324   L = 0;
9325   R = num - 1;
9326   while (L < R) {
9327     mid = (L + R) / 2;
9328     item = array [mid];
9329     if (item != NULL && StringICmp (item->fid, featid) < 0) {
9330       L = mid + 1;
9331     } else {
9332       R = mid;
9333     }
9334   }
9335 
9336   item = array [R];
9337   if (StringICmp (item->fid, featid) == 0) {
9338     feat = item->feat;
9339     if (feat == NULL) return NULL;
9340     sfp = feat->sfp;
9341     if (sfp != NULL) {
9342       if (! feat->ignore) {
9343         if (context != NULL) {
9344           context->entityID = entityID;
9345           context->itemID = feat->itemID;
9346           context->sfp = sfp;
9347           context->sap = feat->sap;
9348           context->bsp = feat->bsp;
9349           context->label = feat->label;
9350           context->left = feat->left;
9351           context->right = feat->right;
9352           context->dnaStop = feat->dnaStop;
9353           context->partialL = feat->partialL;
9354           context->partialR = feat->partialR;
9355           context->farloc = feat->farloc;
9356           context->bad_order = feat->bad_order;
9357           context->mixed_strand = feat->mixed_strand;
9358           context->ts_image = feat->ts_image;
9359           context->strand = feat->strand;
9360           context->seqfeattype = sfp->data.choice;;
9361           context->featdeftype = feat->subtype;
9362           context->numivals = feat->numivals;
9363           context->ivals = feat->ivals;
9364           context->userdata = NULL;
9365           context->omdp = (Pointer) omdp;
9366           context->index = R + 1;
9367         }
9368         return sfp;
9369       }
9370     }
9371   }
9372 
9373   return NULL;
9374 }
9375 
9376 /*****************************************************************************
9377 *
9378 *   SeqMgrBuildFeatureIndex builds a sorted array index for any feature type
9379 *     (including gene, mRNA, CDS, publication, and biosource built-in arrays)
9380 *   SeqMgrGetOverlappingFeature uses the array, or a feature subtype (chocies
9381 *     are FEATDEF_GENE, FEATDEF_CDS, FEATDEF_mRNA, FEATDEF_PUB, or FEATDEF_BIOSRC)
9382 *     to find feature overlap, requiring either that the location be completely
9383 *     contained within the feature intervals, contained within the feature extreme
9384 *     range, or merely that it be overlapped by the feature, and returns the position
9385 *     in the index
9386 *   SeqMgrGetFeatureInIndex gets an arbitrary feature indexed by the array
9387 *
9388 *****************************************************************************/
9389 
SeqMgrBuildFeatureIndex(BioseqPtr bsp,Int4Ptr num,Uint1 seqFeatChoice,Uint1 featDefChoice)9390 NLM_EXTERN VoidPtr LIBCALL SeqMgrBuildFeatureIndex (BioseqPtr bsp, Int4Ptr num,
9391                                                     Uint1 seqFeatChoice, Uint1 featDefChoice)
9392 
9393 {
9394   SMFeatItemPtr PNTR  array;
9395   BioseqExtraPtr      bspextra;
9396   SMFeatItemPtr PNTR  featsByPos;
9397   Int4                i;
9398   Int4                j;
9399   Int4                k;
9400   SMFeatItemPtr       item;
9401   Int4                numfeats;
9402   Int4                numitems;
9403   SMFeatItemPtr       nxtitem;
9404   ObjMgrDataPtr       omdp;
9405   Boolean             overlaps;
9406   Uint1               seqfeattype;
9407 
9408   if (num != NULL) {
9409     *num = 0;
9410   }
9411   if (bsp == NULL) return NULL;
9412   omdp = SeqMgrGetOmdpForBioseq (bsp);
9413   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9414 
9415   bspextra = (BioseqExtraPtr) omdp->extradata;
9416   if (bspextra == NULL) return NULL;
9417 
9418   featsByPos = bspextra->featsByPos;
9419   numfeats = bspextra->numfeats;
9420   if (featsByPos == NULL || numfeats < 1) return NULL;
9421 
9422   for (i = 0, numitems = 0; i < numfeats; i++) {
9423     item = featsByPos [i];
9424     seqfeattype = FindFeatFromFeatDefType (item->subtype);
9425     if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9426         (featDefChoice == 0 || item->subtype == featDefChoice)) {
9427       numitems++;
9428     }
9429   }
9430   if (numitems < 1) return NULL;
9431 
9432   array = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (numitems + 1));
9433   if (array == NULL) return NULL;
9434 
9435   i = 0;
9436   j = 0;
9437   while (i < numfeats && j < numitems) {
9438     item = featsByPos [i];
9439     seqfeattype = FindFeatFromFeatDefType (item->subtype);
9440     if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9441         (featDefChoice == 0 || item->subtype == featDefChoice)) {
9442       array [j] = item;
9443       j++;
9444     }
9445     i++;
9446   }
9447 
9448   if (num != NULL) {
9449     *num = numitems;
9450   }
9451 
9452   for (j = 0; j < numitems - 1; j++) {
9453       item = array [j];
9454       for (k = j + 1, overlaps = TRUE; k < numitems && overlaps; k++) {
9455           nxtitem = array [k];
9456           if ((item->left <= nxtitem->left && item->right > nxtitem->left) ||
9457               (item->left < nxtitem->right && item->right >= nxtitem->right)) {
9458 
9459               /* after binary search, also go up the hierarchy chain to avoid traps */
9460 
9461               nxtitem->overlap = j;
9462           } else {
9463               overlaps = FALSE;
9464           }
9465       }
9466   }
9467 
9468   return (VoidPtr) array;
9469 }
9470 
SeqMgrGetOverlappingFeature(SeqLocPtr slp,Uint2 subtype,VoidPtr featarray,Int4 numfeats,Int4Ptr position,Int2 overlapType,SeqMgrFeatContext PNTR context)9471 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeature (SeqLocPtr slp, Uint2 subtype,
9472                                                            VoidPtr featarray, Int4 numfeats,
9473                                                            Int4Ptr position, Int2 overlapType,
9474                                                            SeqMgrFeatContext PNTR context)
9475 
9476 {
9477   return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
9478                                        numfeats, position, overlapType, context, NULL, NULL, NULL, FALSE);
9479 }
9480 
SeqMgrGetOverlappingFeatureEx(SeqLocPtr slp,Uint2 subtype,VoidPtr featarray,Int4 numfeats,Int4Ptr position,Int2 overlapType,SeqMgrFeatContext PNTR context,Boolean special)9481 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetOverlappingFeatureEx (SeqLocPtr slp, Uint2 subtype,
9482                                                            VoidPtr featarray, Int4 numfeats,
9483                                                            Int4Ptr position, Int2 overlapType,
9484                                                            SeqMgrFeatContext PNTR context,
9485                                                            Boolean special)
9486 
9487 {
9488   return SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
9489                                        numfeats, position, overlapType, context, NULL, NULL, NULL, special);
9490 }
9491 
SeqMgrGetAllOverlappingFeatures(SeqLocPtr slp,Uint2 subtype,VoidPtr featarray,Int4 numfeats,Int2 overlapType,Pointer userdata,SeqMgrFeatExploreProc userfunc)9492 NLM_EXTERN Int2 LIBCALL SeqMgrGetAllOverlappingFeatures (SeqLocPtr slp, Uint2 subtype,
9493                                                          VoidPtr featarray,
9494                                                          Int4 numfeats,
9495                                                          Int2 overlapType,
9496                                                          Pointer userdata,
9497                                                          SeqMgrFeatExploreProc userfunc)
9498 
9499 {
9500   SeqMgrFeatContext  context;
9501   Int2               count;
9502 
9503   SeqMgrGetBestOverlappingFeat (slp, subtype, (SMFeatItemPtr PNTR) featarray,
9504                                 numfeats, NULL, overlapType, &context, &count,
9505                                 userdata, userfunc, FALSE);
9506 
9507   return count;
9508 }
9509 
SeqMgrGetFeatureInIndex(BioseqPtr bsp,VoidPtr featarray,Int4 numfeats,Uint4 index,SeqMgrFeatContext PNTR context)9510 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetFeatureInIndex (BioseqPtr bsp, VoidPtr featarray,
9511                                                        Int4 numfeats, Uint4 index,
9512                                                        SeqMgrFeatContext PNTR context)
9513 
9514 {
9515   SMFeatItemPtr PNTR  array;
9516   SeqFeatPtr          curr;
9517   Uint2               entityID;
9518   SMFeatItemPtr       item = NULL;
9519   ObjMgrDataPtr       omdp;
9520 
9521   if (context != NULL) {
9522     MemSet ((Pointer) context, 0, sizeof (SeqMgrFeatContext));
9523   }
9524   if (bsp == NULL || featarray == NULL || numfeats < 1) return NULL;
9525   if (index < 1 || index > (Uint4) numfeats) return NULL;
9526   omdp = SeqMgrGetOmdpForBioseq (bsp);
9527   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9528 
9529   array = (SMFeatItemPtr PNTR) featarray;
9530   item = array [index - 1];
9531   if (item == NULL) return NULL;
9532 
9533   entityID = bsp->idx.entityID;
9534   if (entityID < 1) {
9535     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9536   }
9537 
9538   curr = item->sfp;
9539   if (curr != NULL && context != NULL && (! item->ignore)) {
9540     context->entityID = entityID;
9541     context->itemID = item->itemID;
9542     context->sfp = curr;
9543     context->sap = item->sap;
9544     context->bsp = item->bsp;
9545     context->label = item->label;
9546     context->left = item->left;
9547     context->right = item->right;
9548     context->dnaStop = item->dnaStop;
9549     context->partialL = item->partialL;
9550     context->partialR = item->partialR;
9551     context->external = item->external;
9552     context->farloc = item->farloc;
9553     context->bad_order = item->bad_order;
9554     context->mixed_strand = item->mixed_strand;
9555     context->ts_image = item->ts_image;
9556     context->strand = item->strand;
9557     if (curr != NULL) {
9558       context->seqfeattype = curr->data.choice;
9559     } else {
9560       context->seqfeattype = FindFeatFromFeatDefType (item->subtype);
9561     }
9562     context->featdeftype = item->subtype;
9563     context->numivals = item->numivals;
9564     context->ivals = item->ivals;
9565     context->userdata = NULL;
9566     context->omdp = (Pointer) omdp;
9567     context->index = item->index + 1;
9568   }
9569   return curr;
9570 }
9571 
9572 /*****************************************************************************
9573 *
9574 *   SeqMgrGetNextDescriptor and SeqMgrGetNextFeature
9575 *
9576 *****************************************************************************/
9577 
SeqMgrGetNextDescriptor(BioseqPtr bsp,ValNodePtr curr,Uint1 seqDescChoice,SeqMgrDescContext PNTR context)9578 NLM_EXTERN ValNodePtr LIBCALL SeqMgrGetNextDescriptor (BioseqPtr bsp, ValNodePtr curr,
9579                                                        Uint1 seqDescChoice,
9580                                                        SeqMgrDescContext PNTR context)
9581 
9582 {
9583   BioseqSetPtr   bssp;
9584   Uint2          entityID;
9585   ObjMgrDataPtr  omdp;
9586   SeqEntryPtr    sep;
9587   ValNode        vn;
9588 
9589   if (context == NULL) return NULL;
9590 
9591   /* if curr is NULL, initialize context fields (in user's stack) */
9592 
9593   if (curr == NULL) {
9594     if (bsp == NULL) return NULL;
9595     omdp = SeqMgrGetOmdpForBioseq (bsp);
9596     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9597 
9598     context->omdp = (Pointer) omdp;
9599     context->itemID = omdp->lastDescrItemID;
9600     context->index = 0;
9601     context->level = 0;
9602 
9603     /* start curr just before beginning of bioseq descriptor list */
9604 
9605     curr = &vn;
9606     vn.choice = 0;
9607     vn.data.ptrvalue = 0;
9608     vn.next = bsp->descr;
9609   }
9610 
9611   omdp = (ObjMgrDataPtr) context->omdp;
9612   if (omdp == NULL) return NULL;
9613 
9614   if (bsp != NULL && bsp->idx.entityID > 0) {
9615     entityID = bsp->idx.entityID;
9616   } else {
9617     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9618   }
9619 
9620   if (bsp != NULL && bsp->seqentry != NULL) {
9621     sep = bsp->seqentry;
9622   } else {
9623     sep = ObjMgrGetChoiceForData (omdp->dataptr);
9624   }
9625 
9626   /* now look for next appropriate descriptor after curr in current chain */
9627 
9628   while (curr != NULL) {
9629     curr = curr->next;
9630     if (curr != NULL) {
9631       (context->itemID)++;
9632       (context->index)++;
9633       if (seqDescChoice == 0 || curr->choice == seqDescChoice) {
9634         context->entityID = entityID;
9635         context->sdp = curr;
9636         context->sep = sep;
9637         context->seqdesctype = curr->choice;
9638         context->userdata = NULL;
9639         context->omdp = (Pointer) omdp;
9640         return curr;
9641       }
9642     }
9643   }
9644 
9645   /* now go up omdp chain looking for next descriptor */
9646 
9647   while (curr == NULL) {
9648     omdp = SeqMgrGetOmdpForPointer (omdp->parentptr);
9649     if (omdp == NULL) return NULL;
9650 
9651     /* update current omdp in context */
9652 
9653     context->omdp = (Pointer) omdp;
9654     context->itemID = omdp->lastDescrItemID;
9655 
9656     switch (omdp->datatype) {
9657       case OBJ_BIOSEQ :
9658         bsp = (BioseqPtr) omdp->dataptr;
9659         curr = bsp->descr;
9660         break;
9661       case OBJ_BIOSEQSET :
9662         bssp = (BioseqSetPtr) omdp->dataptr;
9663         curr = bssp->descr;
9664         break;
9665       default :
9666         break;
9667     }
9668 
9669     if (omdp->datatype == OBJ_BIOSEQ && bsp != NULL && bsp->seqentry != NULL) {
9670       sep = bsp->seqentry;
9671     } else if (omdp->datatype == OBJ_BIOSEQSET && bssp != NULL && bssp->seqentry != NULL) {
9672       sep = bssp->seqentry;
9673     } else {
9674       sep = ObjMgrGetChoiceForData (omdp->dataptr);
9675     }
9676 
9677     (context->level)++;
9678 
9679     /* now look for first appropriate descriptor in current chain */
9680 
9681     while (curr != NULL) {
9682       (context->itemID)++;
9683       (context->index)++;
9684       if (seqDescChoice == 0 || curr->choice == seqDescChoice) {
9685         context->entityID = entityID;
9686         context->sdp = curr;
9687         context->sep = sep;
9688         context->seqdesctype = curr->choice;
9689         context->userdata = NULL;
9690         context->omdp = (Pointer) omdp;
9691         return curr;
9692       }
9693       curr = curr->next;
9694     }
9695   }
9696 
9697   return curr;
9698 }
9699 
SeqMgrGetNextFeatureEx(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context,Boolean byLabel,Boolean byLocusTag)9700 static SeqFeatPtr LIBCALL SeqMgrGetNextFeatureEx (BioseqPtr bsp, SeqFeatPtr curr,
9701                                                   Uint1 seqFeatChoice, Uint1 featDefChoice,
9702                                                   SeqMgrFeatContext PNTR context,
9703                                                   Boolean byLabel, Boolean byLocusTag)
9704 
9705 {
9706   SMFeatItemPtr PNTR  array = NULL;
9707   BioseqExtraPtr      bspextra;
9708   Uint2               entityID;
9709   Uint4               i;
9710   SMFeatItemPtr       item;
9711   Int4                num = 0;
9712   ObjMgrDataPtr       omdp;
9713   Uint1               seqfeattype;
9714 
9715   if (context == NULL) return NULL;
9716 
9717   /* if curr is NULL, initialize context fields (in user's stack) */
9718 
9719 
9720   if (curr == NULL) {
9721     if (bsp == NULL) return NULL;
9722     omdp = SeqMgrGetOmdpForBioseq (bsp);
9723     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9724 
9725     context->omdp = (Pointer) omdp;
9726     context->index = 0;
9727   }
9728 
9729   omdp = (ObjMgrDataPtr) context->omdp;
9730   if (omdp == NULL) return NULL;
9731   bspextra = (BioseqExtraPtr) omdp->extradata;
9732   if (bspextra == NULL) return NULL;
9733   if (byLocusTag) {
9734     array = bspextra->genesByLocusTag;
9735     num = bspextra->numgenes;
9736   } else if (byLabel) {
9737     array = bspextra->featsByLabel;
9738     num = bspextra->numfeats;
9739   } else {
9740     array = bspextra->featsByPos;
9741     num = bspextra->numfeats;
9742   }
9743   if (array == NULL || num < 1) return NULL;
9744 
9745   if (bsp != NULL && bsp->idx.entityID > 0) {
9746     entityID = bsp->idx.entityID;
9747   } else {
9748     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9749   }
9750 
9751   i = context->index;
9752 
9753   /* now look for next appropriate feature */
9754 
9755   while (i < (Uint4) num) {
9756     item = array [i];
9757     if (item != NULL) {
9758       curr = item->sfp;
9759       i++;
9760       if (curr != NULL) {
9761         seqfeattype = curr->data.choice;
9762         if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
9763             (featDefChoice == 0 || item->subtype == featDefChoice) &&
9764             (! item->ignore)) {
9765           context->entityID = entityID;
9766           context->itemID = item->itemID;
9767           context->sfp = curr;
9768           context->sap = item->sap;
9769           context->bsp = item->bsp;
9770           context->label = item->label;
9771           context->left = item->left;
9772           context->right = item->right;
9773           context->dnaStop = item->dnaStop;
9774           context->partialL = item->partialL;
9775           context->partialR = item->partialR;
9776           context->external = item->external;
9777           context->farloc = item->farloc;
9778           context->bad_order = item->bad_order;
9779           context->mixed_strand = item->mixed_strand;
9780           context->ts_image = item->ts_image;
9781           context->strand = item->strand;
9782           context->seqfeattype = seqfeattype;
9783           context->featdeftype = item->subtype;
9784           context->numivals = item->numivals;
9785           context->ivals = item->ivals;
9786           context->userdata = NULL;
9787           context->omdp = (Pointer) omdp;
9788           if (byLocusTag) {
9789             context->index = i;
9790           } else if (byLabel) {
9791             context->index = i;
9792           } else {
9793             context->index = item->index + 1;
9794           }
9795           return curr;
9796         }
9797       }
9798     }
9799   }
9800 
9801   return NULL;
9802 }
9803 
SeqMgrGetNextFeature(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context)9804 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextFeature (BioseqPtr bsp, SeqFeatPtr curr,
9805                                                     Uint1 seqFeatChoice, Uint1 featDefChoice,
9806                                                     SeqMgrFeatContext PNTR context)
9807 
9808 {
9809   return SeqMgrGetNextFeatureEx (bsp, curr, seqFeatChoice, featDefChoice, context, FALSE, FALSE);
9810 }
9811 
SeqMgrGetNextFeatureByLabel(BioseqPtr bsp,SeqFeatPtr curr,Uint1 seqFeatChoice,Uint1 featDefChoice,SeqMgrFeatContext PNTR context)9812 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextFeatureByLabel (BioseqPtr bsp, SeqFeatPtr curr,
9813                                                            Uint1 seqFeatChoice, Uint1 featDefChoice,
9814                                                            SeqMgrFeatContext PNTR context)
9815 
9816 {
9817   return SeqMgrGetNextFeatureEx (bsp, curr, seqFeatChoice, featDefChoice, context, TRUE, FALSE);
9818 }
9819 
SeqMgrGetNextGeneByLocusTag(BioseqPtr bsp,SeqFeatPtr curr,SeqMgrFeatContext PNTR context)9820 NLM_EXTERN SeqFeatPtr LIBCALL SeqMgrGetNextGeneByLocusTag (BioseqPtr bsp, SeqFeatPtr curr,
9821                                                            SeqMgrFeatContext PNTR context
9822 )
9823 
9824 {
9825   return SeqMgrGetNextFeatureEx (bsp, curr, SEQFEAT_GENE, 0, context, FALSE, TRUE);
9826 }
9827 
SeqMgrGetNextAnnotDesc(BioseqPtr bsp,AnnotDescPtr curr,Uint1 annotDescChoice,SeqMgrAndContext PNTR context)9828 NLM_EXTERN AnnotDescPtr LIBCALL SeqMgrGetNextAnnotDesc (
9829   BioseqPtr bsp,
9830   AnnotDescPtr curr,
9831   Uint1 annotDescChoice,
9832   SeqMgrAndContext PNTR context
9833 )
9834 
9835 {
9836   Uint1              annotdesctype;
9837   AnnotDescPtr PNTR  array = NULL;
9838   BioseqExtraPtr     bspextra;
9839   Uint2              entityID;
9840   Uint4              i;
9841   AnnotDescPtr       item;
9842   Int4               num = 0;
9843   ObjMgrDataPtr      omdp;
9844   ObjValNodePtr      ovp;
9845 
9846   if (context == NULL) return NULL;
9847 
9848   /* if curr is NULL, initialize context fields (in user's stack) */
9849 
9850 
9851   if (curr == NULL) {
9852     if (bsp == NULL) return NULL;
9853     /*
9854     entityID = ObjMgrGetEntityIDForPointer (bsp);
9855     if (entityID < 1) return NULL;
9856     omdp = ObjMgrGetData (entityID);
9857     if (omdp == NULL) return NULL;
9858     */
9859     omdp = SeqMgrGetOmdpForBioseq (bsp);
9860     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
9861 
9862     context->omdp = (Pointer) omdp;
9863     context->index = 0;
9864   }
9865 
9866   omdp = (ObjMgrDataPtr) context->omdp;
9867   if (omdp == NULL) return NULL;
9868 
9869   bspextra = (BioseqExtraPtr) omdp->extradata;
9870   if (bspextra == NULL) return NULL;
9871 
9872   array = bspextra->annotDescByID;
9873   num = bspextra->numannotdesc;
9874   if (array == NULL || num < 1) return NULL;
9875 
9876   if (bsp != NULL && bsp->idx.entityID > 0) {
9877     entityID = bsp->idx.entityID;
9878   } else {
9879     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
9880   }
9881 
9882   i = context->index;
9883 
9884   /* now look for next appropriate annotdesc */
9885 
9886   while (i < (Uint4) num) {
9887     item = array [i];
9888     if (item != NULL && item->extended != 0) {
9889       ovp = (ObjValNodePtr) item;
9890       i++;
9891       annotdesctype = item->choice;
9892       if (annotDescChoice == 0 || annotdesctype == annotDescChoice) {
9893         context->entityID = entityID;
9894         context->itemID = ovp->idx.itemID;
9895         context->adp = item;
9896         context->annotdesctype = annotdesctype;
9897         context->userdata = NULL;
9898         context->omdp = (Pointer) omdp;
9899         context->index = i;
9900         return item;
9901       }
9902     }
9903   }
9904 
9905   return NULL;
9906 }
9907 
9908 /*****************************************************************************
9909 *
9910 *   SeqMgrExploreBioseqs, SeqMgrExploreSegments, SeqMgrExploreDescriptors,
9911 *     SeqMgrExploreFeatures, SeqMgrVisitDescriptors, and SeqMgrVisitFeatures
9912 *
9913 *****************************************************************************/
9914 
JustExamineBioseqs(SeqEntryPtr sep,BioseqSetPtr bssp,SeqMgrBioseqContextPtr context,SeqMgrBioseqExploreProc userfunc,Boolean nucs,Boolean prots,Boolean parts,Int4Ptr count)9915 static Boolean JustExamineBioseqs (SeqEntryPtr sep, BioseqSetPtr bssp,
9916                                    SeqMgrBioseqContextPtr context,
9917                                    SeqMgrBioseqExploreProc userfunc,
9918                                    Boolean nucs, Boolean prots, Boolean parts,
9919                                    Int4Ptr count)
9920 
9921 {
9922   BioseqPtr       bsp;
9923   BioseqExtraPtr  bspextra;
9924   ObjMgrDataPtr   omdp;
9925 
9926   if (sep == NULL || context == NULL || userfunc == NULL) return FALSE;
9927 
9928   if (IS_Bioseq (sep)) {
9929     bsp = (BioseqPtr) sep->data.ptrvalue;
9930     if (bsp == NULL) return TRUE;
9931 
9932     /* check for desired molecule type */
9933 
9934     if (ISA_na (bsp->mol) && (! nucs)) return TRUE;
9935     if (ISA_aa (bsp->mol) && (! prots)) return TRUE;
9936 
9937     omdp = SeqMgrGetOmdpForBioseq (bsp);
9938     if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return TRUE;
9939     bspextra = (BioseqExtraPtr) omdp->extradata;
9940     if (bspextra == NULL) return TRUE;
9941 
9942     context->itemID = bspextra->bspItemID;
9943     context->bsp = bsp;
9944     context->sep = sep;
9945     context->bssp = bssp;
9946     context->numsegs = bspextra->numsegs;
9947     context->omdp = omdp;
9948     (context->index)++;
9949 
9950     if (count != NULL) {
9951       (*count)++;
9952     }
9953     /* continue until user function returns FALSE, then exit all recursions */
9954 
9955     if (! userfunc (bsp, context)) return FALSE;
9956     return TRUE;
9957   }
9958 
9959   if (IS_Bioseq_set (sep)) {
9960     bssp = (BioseqSetPtr) sep->data.ptrvalue;
9961     if (bssp == NULL) return TRUE;
9962 
9963     /* check to see if parts should be explored */
9964 
9965     if (bssp->_class == BioseqseqSet_class_parts) {
9966       if (! parts) return TRUE;
9967 
9968       /* within the parts set we want to see individual component bioseqs */
9969 
9970       nucs = TRUE;
9971       prots = TRUE;
9972     }
9973 
9974     /* recursively explore bioseq set until user function returns FALSE */
9975 
9976     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
9977       if (! JustExamineBioseqs (sep, bssp, context, userfunc, nucs, prots, parts, count)) return FALSE;
9978     }
9979   }
9980 
9981   return TRUE;
9982 }
9983 
SeqMgrExploreBioseqs(Uint2 entityID,Pointer ptr,Pointer userdata,SeqMgrBioseqExploreProc userfunc,Boolean nucs,Boolean prots,Boolean parts)9984 NLM_EXTERN Int4 LIBCALL SeqMgrExploreBioseqs (Uint2 entityID, Pointer ptr, Pointer userdata,
9985                                               SeqMgrBioseqExploreProc userfunc,
9986                                               Boolean nucs, Boolean prots, Boolean parts)
9987 
9988 {
9989   SeqMgrBioseqContext  context;
9990   Int4                 count = 0;
9991   SeqEntryPtr          sep;
9992 
9993   if (entityID == 0) {
9994     entityID = ObjMgrGetEntityIDForPointer (ptr);
9995   }
9996   if (entityID == 0) return 0;
9997   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
9998   if (sep == NULL) return 0;
9999   if (userfunc == NULL) return 0;
10000 
10001   context.entityID = entityID;
10002   context.index = 0;
10003   context.userdata = userdata;
10004 
10005   /* recursive call to explore SeqEntry and pass appropriate bioseqs to user */
10006 
10007   JustExamineBioseqs (sep, NULL, &context, userfunc, nucs, prots, parts, &count);
10008 
10009   return count;
10010 }
10011 
SeqMgrExploreSegments(BioseqPtr bsp,Pointer userdata,SeqMgrSegmentExploreProc userfunc)10012 NLM_EXTERN Int4 LIBCALL SeqMgrExploreSegments (BioseqPtr bsp, Pointer userdata,
10013                                                SeqMgrSegmentExploreProc userfunc)
10014 
10015 {
10016   BioseqExtraPtr        bspextra;
10017   SeqMgrSegmentContext  context;
10018   Int4                  count = 0;
10019   Uint2                 entityID;
10020   Uint4                 i;
10021   ObjMgrDataPtr         omdp;
10022   SMSeqIdxPtr PNTR      partsByLoc;
10023   SMSeqIdxPtr           segpartptr;
10024   SeqLocPtr             slp;
10025 
10026   if (bsp == NULL) return 0;
10027   if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) return 0;
10028   omdp = SeqMgrGetOmdpForBioseq (bsp);
10029   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
10030   if (userfunc == NULL) return 0;
10031   entityID = bsp->idx.entityID;
10032   if (entityID < 1) {
10033     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
10034   }
10035 
10036   bspextra = (BioseqExtraPtr) omdp->extradata;
10037   if (bspextra == NULL) return 0;
10038   partsByLoc = bspextra->partsByLoc;
10039   if (partsByLoc == NULL || bspextra->numsegs < 1) return 0;
10040 
10041   for (i = 0; i < (Uint4) bspextra->numsegs; i++) {
10042     segpartptr = partsByLoc [i];
10043     if (segpartptr != NULL) {
10044       slp = segpartptr->slp;
10045       context.entityID = entityID;
10046       context.itemID = segpartptr->itemID;
10047       context.slp = slp;
10048       context.parent = segpartptr->parentBioseq;
10049       context.cumOffset = segpartptr->cumOffset;
10050       context.from = segpartptr->from;
10051       context.to = segpartptr->to;
10052       context.strand = segpartptr->strand;
10053       context.userdata = userdata;
10054       context.omdp = (Pointer) omdp;
10055       context.index = i + 1;
10056 
10057       count++;
10058 
10059       if (! userfunc (slp, &context)) return count;
10060     }
10061   }
10062 
10063   return count;
10064 }
10065 
SeqMgrExploreDescriptors(BioseqPtr bsp,Pointer userdata,SeqMgrDescExploreProc userfunc,BoolPtr seqDescFilter)10066 NLM_EXTERN Int4 LIBCALL SeqMgrExploreDescriptors (BioseqPtr bsp, Pointer userdata,
10067                                                   SeqMgrDescExploreProc userfunc,
10068                                                   BoolPtr seqDescFilter)
10069 
10070 {
10071   BioseqSetPtr       bssp;
10072   SeqMgrDescContext  context;
10073   Int4               count = 0;
10074   Uint2              entityID;
10075   Uint4              itemID;
10076   ObjMgrDataPtr      omdp;
10077   ValNodePtr         sdp;
10078   SeqEntryPtr        sep;
10079 
10080   if (bsp == NULL) return 0;
10081   omdp = SeqMgrGetOmdpForBioseq (bsp);
10082   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
10083   if (userfunc == NULL) return 0;
10084   entityID = bsp->idx.entityID;
10085   if (entityID < 1) {
10086     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
10087   }
10088 
10089   context.index = 0;
10090   context.level = 0;
10091   while (omdp != NULL) {
10092     itemID = omdp->lastDescrItemID;
10093     sdp = NULL;
10094     switch (omdp->datatype) {
10095       case OBJ_BIOSEQ :
10096         bsp = (BioseqPtr) omdp->dataptr;
10097         sdp = bsp->descr;
10098         break;
10099       case OBJ_BIOSEQSET :
10100         bssp = (BioseqSetPtr) omdp->dataptr;
10101         sdp = bssp->descr;
10102         break;
10103       default :
10104         break;
10105     }
10106 
10107     sep = ObjMgrGetChoiceForData (omdp->dataptr);
10108 
10109     /* call for every appropriate descriptor in current chain */
10110 
10111     while (sdp != NULL) {
10112       itemID++;
10113       if (seqDescFilter == NULL || seqDescFilter [sdp->choice]) {
10114         context.entityID = entityID;
10115         context.itemID = itemID;
10116         context.sdp = sdp;
10117         context.sep = sep;
10118         context.seqdesctype = sdp->choice;
10119         context.userdata = userdata;
10120         context.omdp = (Pointer) omdp;
10121         (context.index)++;
10122 
10123         count++;
10124 
10125         if (! userfunc (sdp, &context)) return count;
10126       }
10127       sdp = sdp->next;
10128     }
10129 
10130     /* now go up omdp chain looking for next descriptor */
10131 
10132     omdp = SeqMgrGetOmdpForPointer (omdp->parentptr);
10133     (context.level)++;
10134   }
10135   return count;
10136 }
10137 
SeqMgrExploreFeaturesInt(BioseqPtr bsp,Pointer userdata,SeqMgrFeatExploreProc userfunc,SeqLocPtr locationFilter,BoolPtr seqFeatFilter,BoolPtr featDefFilter,Boolean doreverse)10138 static Int4 LIBCALL SeqMgrExploreFeaturesInt (BioseqPtr bsp, Pointer userdata,
10139                                               SeqMgrFeatExploreProc userfunc,
10140                                               SeqLocPtr locationFilter,
10141                                               BoolPtr seqFeatFilter,
10142                                               BoolPtr featDefFilter,
10143                                               Boolean doreverse)
10144 
10145 {
10146   BioseqExtraPtr      bspextra;
10147   SeqMgrFeatContext   context;
10148   Int4                count = 0;
10149   Uint2               entityID;
10150   SMFeatItemPtr PNTR  featsByID;
10151   SMFeatItemPtr PNTR  featsByPos;
10152   SMFeatItemPtr PNTR  featsByRev;
10153   Uint4               i;
10154   SMFeatItemPtr       item;
10155   Int4                left = INT4_MIN;
10156   ObjMgrDataPtr       omdp;
10157   Int4                right = INT4_MAX;
10158   Uint1               seqfeattype;
10159   SeqFeatPtr          sfp;
10160   Uint4               start = 0;
10161   Int4                tmp;
10162 
10163   if (bsp == NULL) return 0;
10164   omdp = SeqMgrGetOmdpForBioseq (bsp);
10165   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return 0;
10166   if (userfunc == NULL) return 0;
10167   entityID = bsp->idx.entityID;
10168   if (entityID < 1) {
10169     entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
10170   }
10171 
10172   bspextra = (BioseqExtraPtr) omdp->extradata;
10173   if (bspextra == NULL) return 0;
10174 
10175   if (doreverse) {
10176     if (bspextra->featsByRev == NULL) {
10177 
10178       /* index by reverse position if not already done */
10179 
10180       featsByRev = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * (bspextra->numfeats + 1));
10181       bspextra->featsByRev = featsByRev;
10182 
10183       if (featsByRev != NULL) {
10184         featsByID = bspextra->featsByID;
10185         for (i = 0; i < (Uint4) bspextra->numfeats; i++) {
10186           featsByRev [i] = featsByID [i];
10187         }
10188 
10189         /* sort all features by feature reverse location on bioseq */
10190 
10191         StableMergeSort ((VoidPtr) featsByRev, (size_t) bspextra->numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByRev);
10192       }
10193     }
10194 
10195     featsByPos = bspextra->featsByRev;
10196   } else {
10197     featsByPos = bspextra->featsByPos;
10198   }
10199   if (featsByPos == NULL || bspextra->numfeats < 1) return 0;
10200 
10201   if (locationFilter != NULL) {
10202     left = GetOffsetInBioseq (locationFilter, bsp, SEQLOC_LEFT_END);
10203     if (left == -1) left = INT4_MIN;
10204     right = GetOffsetInBioseq (locationFilter, bsp, SEQLOC_RIGHT_END);
10205     if (right == -1) right = INT4_MAX;
10206 
10207     /* if far segmented or delta, and location (from explore
10208        segments) is minus strand, will need to swap */
10209 
10210     if (left > right) {
10211       tmp = left;
10212       left = right;
10213       right = tmp;
10214     }
10215 
10216     /*
10217     binary search to leftmost candidate would need featsByPos array
10218     variant sorted primarily by rightmost position, so comment this
10219     out for now, resurrect and add new array only if it turns out to
10220     be necessary when we support entrez fetch subrecord by location
10221     */
10222 
10223     /*
10224     L = 0;
10225     R = bspextra->numfeats - 1;
10226     while (L < R) {
10227       mid = (L + R) / 2;
10228       item = featsByPos [mid];
10229       if (item != NULL && item->right < left) {
10230         L = mid + 1;
10231       } else {
10232         R = mid;
10233       }
10234     }
10235 
10236     start = R;
10237     */
10238   }
10239 
10240   /* call for every appropriate feature in sorted list */
10241 
10242   for (i = start; i < (Uint4) bspextra->numfeats; i++) {
10243     item = featsByPos [i];
10244     if (item != NULL) {
10245 
10246       /* can exit once past rightmost limit */
10247 
10248       if (locationFilter != NULL && (! doreverse) && item->left > right) return count;
10249       if (locationFilter != NULL && (doreverse) && item->right < left) return count;
10250 
10251       sfp = item->sfp;
10252       if (sfp != NULL) {
10253         seqfeattype = sfp->data.choice;
10254       } else {
10255         seqfeattype = FindFeatFromFeatDefType (item->subtype);
10256       }
10257       if ((seqFeatFilter == NULL || seqFeatFilter [seqfeattype]) &&
10258           (featDefFilter == NULL || featDefFilter [item->subtype]) &&
10259           (locationFilter == NULL || (item->right > left && item->left <= right)) &&
10260           (! item->ignore)) {
10261         context.entityID = entityID;
10262         context.itemID = item->itemID;
10263         context.sfp = sfp;
10264         context.sap = item->sap;
10265         context.bsp = item->bsp;
10266         context.label = item->label;
10267         context.left = item->left;
10268         context.right = item->right;
10269         context.dnaStop = item->dnaStop;
10270         context.partialL = item->partialL;
10271         context.partialR = item->partialR;
10272         context.external = item->external;
10273         context.farloc = item->farloc;
10274         context.bad_order = item->bad_order;
10275         context.mixed_strand = item->mixed_strand;
10276         context.strand = item->strand;
10277         context.seqfeattype = seqfeattype;
10278         context.featdeftype = item->subtype;
10279         context.numivals = item->numivals;
10280         context.ivals = item->ivals;
10281         context.userdata = userdata;
10282         context.omdp = (Pointer) omdp;
10283         context.index = item->index + 1;
10284 
10285         count++;
10286 
10287         if (! userfunc (sfp, &context)) return count;
10288       }
10289     }
10290   }
10291   return count;
10292 }
10293 
SeqMgrExploreFeatures(BioseqPtr bsp,Pointer userdata,SeqMgrFeatExploreProc userfunc,SeqLocPtr locationFilter,BoolPtr seqFeatFilter,BoolPtr featDefFilter)10294 NLM_EXTERN Int4 LIBCALL SeqMgrExploreFeatures (BioseqPtr bsp, Pointer userdata,
10295                                                SeqMgrFeatExploreProc userfunc,
10296                                                SeqLocPtr locationFilter,
10297                                                BoolPtr seqFeatFilter,
10298                                                BoolPtr featDefFilter)
10299 
10300 {
10301   return SeqMgrExploreFeaturesInt (bsp, userdata, userfunc, locationFilter, seqFeatFilter, featDefFilter, FALSE);
10302 }
10303 
SeqMgrExploreFeaturesRev(BioseqPtr bsp,Pointer userdata,SeqMgrFeatExploreProc userfunc,SeqLocPtr locationFilter,BoolPtr seqFeatFilter,BoolPtr featDefFilter)10304 NLM_EXTERN Int4 LIBCALL SeqMgrExploreFeaturesRev (BioseqPtr bsp, Pointer userdata,
10305                                                   SeqMgrFeatExploreProc userfunc,
10306                                                   SeqLocPtr locationFilter,
10307                                                   BoolPtr seqFeatFilter,
10308                                                   BoolPtr featDefFilter)
10309 
10310 {
10311   return SeqMgrExploreFeaturesInt (bsp, userdata, userfunc, locationFilter, seqFeatFilter, featDefFilter, TRUE);
10312 }
10313 
VisitDescriptorsPerSeqEntry(Uint2 entityID,SeqEntryPtr sep,Pointer userdata,SeqMgrDescExploreProc userfunc,BoolPtr seqDescFilter)10314 static Int2 VisitDescriptorsPerSeqEntry (Uint2 entityID, SeqEntryPtr sep,
10315                                          Pointer userdata, SeqMgrDescExploreProc userfunc,
10316                                          BoolPtr seqDescFilter)
10317 
10318 {
10319   BioseqPtr          bsp;
10320   BioseqSetPtr       bssp = NULL;
10321   Uint2              count = 0;
10322   SeqMgrDescContext  context;
10323   Uint4              itemID;
10324   ObjMgrDataPtr      omdp = NULL;
10325   ValNodePtr         sdp = NULL;
10326   SeqEntryPtr        tmp;
10327 
10328   if (sep != NULL) {
10329     if (IS_Bioseq (sep)) {
10330       bsp = (BioseqPtr) sep->data.ptrvalue;
10331       if (bsp == NULL) return 0;
10332       omdp = SeqMgrGetOmdpForBioseq (bsp);
10333       sdp = bsp->descr;
10334     } else if (IS_Bioseq_set (sep)) {
10335       bssp = (BioseqSetPtr) sep->data.ptrvalue;
10336       if (bssp == NULL) return 0;
10337       omdp = SeqMgrGetOmdpForPointer (bssp);
10338       sdp = bssp->descr;
10339     }
10340   }
10341   if (omdp == NULL) return 0;
10342   itemID = omdp->lastDescrItemID;
10343 
10344   context.index = 0;
10345   context.level = 0;
10346 
10347   while (sdp != NULL) {
10348     itemID++;
10349     if (seqDescFilter == NULL || seqDescFilter [sdp->choice]) {
10350       context.entityID = entityID;
10351       context.itemID = itemID;
10352       context.sdp = sdp;
10353       context.sep = sep;
10354       context.seqdesctype = sdp->choice;
10355       context.userdata = userdata;
10356       context.omdp = (Pointer) omdp;
10357 
10358       count++;
10359 
10360       if (! userfunc (sdp, &context)) return count;
10361     }
10362     sdp = sdp->next;
10363   }
10364 
10365   if (bssp != NULL) {
10366     for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
10367       count += VisitDescriptorsPerSeqEntry (entityID, tmp, userdata, userfunc, seqDescFilter);
10368     }
10369   }
10370 
10371   return count;
10372 }
10373 
SeqMgrVisitDescriptors(Uint2 entityID,Pointer userdata,SeqMgrDescExploreProc userfunc,BoolPtr seqDescFilter)10374 NLM_EXTERN Int2 LIBCALL SeqMgrVisitDescriptors (Uint2 entityID, Pointer userdata,
10375                                                 SeqMgrDescExploreProc userfunc,
10376                                                 BoolPtr seqDescFilter)
10377 
10378 {
10379   SeqEntryPtr  sep;
10380 
10381   if (entityID < 1 || userfunc == NULL) return 0;
10382   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
10383   if (sep == NULL) return 0;
10384 
10385   return VisitDescriptorsPerSeqEntry (entityID, sep, userdata, userfunc, seqDescFilter);
10386 }
10387 
SeqMgrVisitFeatures(Uint2 entityID,Pointer userdata,SeqMgrFeatExploreProc userfunc,BoolPtr seqFeatFilter,BoolPtr featDefFilter)10388 NLM_EXTERN Int2 LIBCALL SeqMgrVisitFeatures (Uint2 entityID, Pointer userdata,
10389                                              SeqMgrFeatExploreProc userfunc,
10390                                              BoolPtr seqFeatFilter, BoolPtr featDefFilter)
10391 
10392 {
10393   BioseqExtraPtr      bspextra;
10394   SeqMgrFeatContext   context;
10395   Int2                count = 0;
10396   SMFeatItemPtr PNTR  featsByID;
10397   Uint2               i;
10398   SMFeatItemPtr       item;
10399   ObjMgrDataPtr       omdp;
10400   Uint1               seqfeattype;
10401   SeqFeatPtr          sfp;
10402 
10403   omdp = ObjMgrGetData (entityID);
10404   if (omdp == NULL) return 0;
10405   if (userfunc == NULL) return 0;
10406 
10407   bspextra = (BioseqExtraPtr) omdp->extradata;
10408   if (bspextra == NULL) return 0;
10409   featsByID = bspextra->featsByID;
10410   if (featsByID == NULL || bspextra->numfeats < 1) return 0;
10411 
10412   /* call for every appropriate feature in itemID order */
10413 
10414   for (i = 0; i < bspextra->numfeats; i++) {
10415     item = featsByID [i];
10416     if (item != NULL) {
10417 
10418       sfp = item->sfp;
10419       if (sfp != NULL) {
10420         seqfeattype = sfp->data.choice;
10421       } else {
10422         seqfeattype = FindFeatFromFeatDefType (item->subtype);
10423       }
10424       if ((seqFeatFilter == NULL || seqFeatFilter [seqfeattype]) &&
10425           (featDefFilter == NULL || featDefFilter [item->subtype]) &&
10426           (! item->ignore)) {
10427         context.entityID = entityID;
10428         context.itemID = item->itemID;
10429         context.sfp = sfp;
10430         context.sap = item->sap;
10431         context.bsp = item->bsp;
10432         context.label = item->label;
10433         context.left = item->left;
10434         context.right = item->right;
10435         context.dnaStop = item->dnaStop;
10436         context.partialL = item->partialL;
10437         context.partialR = item->partialR;
10438         context.external = item->external;
10439         context.farloc = item->farloc;
10440         context.bad_order = item->bad_order;
10441         context.mixed_strand = item->mixed_strand;
10442         context.strand = item->strand;
10443         context.seqfeattype = seqfeattype;
10444         context.featdeftype = item->subtype;
10445         context.numivals = item->numivals;
10446         context.ivals = item->ivals;
10447         context.userdata = userdata;
10448         context.omdp = (Pointer) omdp;
10449         context.index = 0;
10450 
10451         count++;
10452 
10453         if (! userfunc (sfp, &context)) return count;
10454       }
10455     }
10456   }
10457   return count;
10458 }
10459 
10460 /*****************************************************************************
10461 *
10462 *   SeqMgrMapPartToSegmentedBioseq can speed up sequtil's CheckPointInBioseq
10463 *     for indexed part bioseq to segmented bioseq mapping
10464 *
10465 *****************************************************************************/
10466 
BinarySearchPartToSegmentMap(BioseqPtr in,Int4 pos,BioseqPtr bsp,SeqIdPtr sip,Boolean relaxed,Int4 from,Int4 to)10467 static SMSeqIdxPtr BinarySearchPartToSegmentMap (BioseqPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr sip, Boolean relaxed, Int4 from, Int4 to)
10468 
10469 {
10470   BioseqExtraPtr    bspextra;
10471   Char              buf [128];
10472   Int2              compare;
10473   ObjMgrDataPtr     omdp;
10474   SMSeqIdxPtr PNTR  partsBySeqId;
10475   SMSeqIdxPtr       segpartptr;
10476   CharPtr           seqIdOfPart;
10477   Int4              L, R, mid;
10478 
10479   if (in == NULL) return NULL;
10480   omdp = SeqMgrGetOmdpForBioseq (in);
10481   if (omdp == NULL) return NULL;
10482   bspextra = (BioseqExtraPtr) omdp->extradata;
10483   if (bspextra == NULL) return NULL;
10484 
10485   partsBySeqId = bspextra->partsBySeqId;
10486   if (partsBySeqId == NULL || bspextra->numsegs < 1) return NULL;
10487 
10488   if (bsp != NULL) {
10489     sip = bsp->id;
10490   }
10491   if (sip == NULL) return NULL;
10492 
10493   /* binary search into array on segmented bioseq sorted by part seqID (reversed) string */
10494 
10495   while (sip != NULL) {
10496     if (MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) {
10497       L = 0;
10498       R = bspextra->numsegs - 1;
10499       while (L < R) {
10500         mid = (L + R) / 2;
10501         segpartptr = partsBySeqId [mid];
10502         compare = StringCmp (segpartptr->seqIdOfPart, buf);
10503         if (compare < 0) {
10504           L = mid + 1;
10505         } else {
10506           R = mid;
10507         }
10508       }
10509 
10510       /* loop through all components with same seqID, get appropriate segment */
10511 
10512       segpartptr = partsBySeqId [R];
10513       seqIdOfPart = segpartptr->seqIdOfPart;
10514       while (R < bspextra->numsegs && StringCmp (seqIdOfPart, buf) == 0) {
10515         if (relaxed) {
10516 
10517           /* for genome mapping of portion not included in contig */
10518 
10519           if ((from >= segpartptr->from && from <= segpartptr->to) ||
10520               (to >= segpartptr->from && to <= segpartptr->to) ||
10521               (from < segpartptr->from && to > segpartptr->to) ||
10522               (to < segpartptr->from && from > segpartptr->to)) {
10523 
10524             return segpartptr;
10525           }
10526 
10527         } else if (pos >= segpartptr->from && pos <= segpartptr->to) {
10528 
10529           /* otherwise only map portion included in contig */
10530 
10531           return segpartptr;
10532         }
10533 
10534         R++;
10535         if (R < bspextra->numsegs) {
10536           segpartptr = partsBySeqId [R];
10537           seqIdOfPart = segpartptr->seqIdOfPart;
10538         } else {
10539           seqIdOfPart = NULL;
10540         }
10541       }
10542     }
10543     sip = sip->next;
10544   }
10545 
10546   return NULL;
10547 }
10548 
GenomePartToSegmentMap(BioseqPtr in,BioseqPtr bsp,Int4 from,Int4 to)10549 NLM_EXTERN SMSeqIdxPtr GenomePartToSegmentMap (BioseqPtr in, BioseqPtr bsp, Int4 from, Int4 to)
10550 
10551 {
10552   return BinarySearchPartToSegmentMap (in, 0, bsp, NULL, TRUE, from, to);
10553 }
10554 
SeqMgrMapPartToSegmentedBioseq(BioseqPtr in,Int4 pos,BioseqPtr bsp,SeqIdPtr sip,BoolPtr flip_strand,Boolean relaxed)10555 NLM_EXTERN Int4 LIBCALL SeqMgrMapPartToSegmentedBioseq (BioseqPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr sip, BoolPtr flip_strand, Boolean relaxed)
10556 
10557 {
10558   BioseqExtraPtr  bspextra;
10559   SMSeqIdxPtr     currp;
10560   SMSeqIdxPtr     nextp;
10561   ObjMgrDataPtr   omdp;
10562   SMSeqIdxPtr     segpartptr;
10563 
10564   if (in == NULL) return -1;
10565   if (flip_strand != NULL) {
10566     *flip_strand = FALSE;
10567   }
10568 
10569   /* first check to see if part has been loaded and single map up block installed */
10570 
10571   if (bsp != NULL) {
10572     omdp = SeqMgrGetOmdpForBioseq (bsp);
10573     if (omdp != NULL) {
10574       bspextra = (BioseqExtraPtr) omdp->extradata;
10575       if (bspextra != NULL) {
10576 
10577         /* no need for partsByLoc or partsBySeqId arrays, just use segparthead linked list */
10578 
10579         for (segpartptr = bspextra->segparthead; segpartptr != NULL; segpartptr = segpartptr->next) {
10580           if (segpartptr->parentBioseq == in) {
10581             if (pos >= segpartptr->from && pos <= segpartptr->to) {
10582 
10583               /* success, immediate return with mapped up value */
10584 
10585               if (segpartptr->strand == Seq_strand_minus) {
10586                 if (flip_strand != NULL) {
10587                   *flip_strand = FALSE;
10588                 }
10589                 return segpartptr->cumOffset + (segpartptr->to - pos);
10590               } else {
10591                 return segpartptr->cumOffset + (pos - segpartptr->from);
10592               }
10593             }
10594           }
10595         }
10596       }
10597     }
10598   }
10599 
10600   /* otherwise do binary search on segmented bioseq mapping data */
10601 
10602   segpartptr = BinarySearchPartToSegmentMap (in, pos, bsp, sip, relaxed, 0, 0);
10603   if (segpartptr == NULL) return -1;
10604 
10605   if (relaxed || (pos >= segpartptr->from && pos <= segpartptr->to)) {
10606 
10607     /* install map up block on part, if it has been loaded, to speed up next search */
10608 
10609     if (bsp != NULL) {
10610       omdp = SeqMgrGetOmdpForBioseq (bsp);
10611       if (omdp != NULL) {
10612         bspextra = (BioseqExtraPtr) omdp->extradata;
10613         if (bspextra == NULL) {
10614           CreateBioseqExtraBlock (omdp, bsp);
10615           bspextra = (BioseqExtraPtr) omdp->extradata;
10616         }
10617         if (bspextra != NULL) {
10618 
10619           /* clean up any old map up info on part */
10620 
10621           for (currp = bspextra->segparthead; currp != NULL; currp = nextp) {
10622             nextp = currp->next;
10623             SeqLocFree (currp->slp);
10624             MemFree (currp->seqIdOfPart);
10625             MemFree (currp);
10626           }
10627           bspextra->segparthead = NULL;
10628           bspextra->numsegs = 0;
10629           bspextra->partsByLoc = MemFree (bspextra->partsByLoc);
10630           bspextra->partsBySeqId = MemFree (bspextra->partsBySeqId);
10631 
10632           /* allocate single map up block */
10633 
10634           currp = MemNew (sizeof (SMSeqIdx));
10635           if (currp != NULL) {
10636             currp->slp = AsnIoMemCopy (segpartptr->slp,
10637                                        (AsnReadFunc) SeqLocAsnRead,
10638                                        (AsnWriteFunc) SeqLocAsnWrite);
10639             currp->seqIdOfPart = StringSave (segpartptr->seqIdOfPart);
10640             currp->parentBioseq = segpartptr->parentBioseq;
10641             currp->cumOffset = segpartptr->cumOffset;
10642             currp->from = segpartptr->from;
10643             currp->to = segpartptr->to;
10644             currp->strand = segpartptr->strand;
10645           }
10646 
10647           /* add new map up block to part */
10648 
10649           bspextra->segparthead = currp;
10650         }
10651       }
10652     }
10653 
10654     /* now return offset result */
10655 
10656     if (segpartptr->strand == Seq_strand_minus) {
10657       if (flip_strand != NULL) {
10658         *flip_strand = TRUE;
10659       }
10660       return segpartptr->cumOffset + (segpartptr->to - pos);
10661     } else {
10662       return segpartptr->cumOffset + (pos - segpartptr->from);
10663     }
10664   }
10665   return -1;
10666 }
10667 
10668 /*****************************************************************************
10669 *
10670 *   TrimLocInSegment takes a location on an indexed far segmented part and trims
10671 *     trims it to the region referred to by the parent segmented or delta bioseq.
10672 *
10673 *     Only implemented for seqloc_int components, not seqloc_point
10674 *
10675 *****************************************************************************/
10676 
TrimLocInSegment(BioseqPtr master,SeqLocPtr location,BoolPtr p5ptr,BoolPtr p3ptr)10677 NLM_EXTERN SeqLocPtr TrimLocInSegment (
10678   BioseqPtr master,
10679   SeqLocPtr location,
10680   BoolPtr p5ptr,
10681   BoolPtr p3ptr
10682 )
10683 
10684 {
10685   BioseqPtr         bsp;
10686   BioseqExtraPtr    bspextra;
10687   Char              buf [128];
10688   Int2              compare;
10689   ObjMgrDataPtr     omdp;
10690   Boolean           partial5;
10691   Boolean           partial3;
10692   SMSeqIdxPtr PNTR  partsBySeqId;
10693   SeqLocPtr         rsult = NULL;
10694   SMSeqIdxPtr       segpartptr;
10695   CharPtr           seqIdOfPart;
10696   SeqIdPtr          sip;
10697   SeqIntPtr         sint;
10698   SeqLocPtr         slp;
10699   Uint1             strand;
10700   Int4              L, R, mid;
10701   Int4              start, stop, swap;
10702 
10703   if (master == NULL || location == NULL) return NULL;
10704 
10705   omdp = SeqMgrGetOmdpForBioseq (master);
10706   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
10707   bspextra = (BioseqExtraPtr) omdp->extradata;
10708   if (bspextra == NULL) return NULL;
10709 
10710   partsBySeqId = bspextra->partsBySeqId;
10711   if (partsBySeqId == NULL || bspextra->numsegs < 1) return NULL;
10712 
10713   partial5 = FALSE;
10714   partial3 = FALSE;
10715 
10716   if (p5ptr != NULL) {
10717     partial5 = *p5ptr;
10718   }
10719   if (p3ptr != NULL) {
10720     partial3 = *p3ptr;
10721   }
10722 
10723   for (slp = SeqLocFindNext (location, NULL);
10724        slp != NULL;
10725        slp = SeqLocFindNext (location, slp)) {
10726     if (slp->choice != SEQLOC_INT) continue;
10727     sint = (SeqIntPtr) slp->data.ptrvalue;
10728     if (sint == NULL) continue;
10729     strand = sint->strand;
10730 
10731     bsp = BioseqFind (sint->id);
10732     if (bsp == NULL) continue;
10733 
10734     for (sip = bsp->id; sip != NULL; sip = sip->next) {
10735       if (! MakeReversedSeqIdString (sip, buf, sizeof (buf) - 1)) continue;
10736 
10737       L = 0;
10738       R = bspextra->numsegs - 1;
10739       while (L < R) {
10740         mid = (L + R) / 2;
10741         segpartptr = partsBySeqId [mid];
10742         compare = StringCmp (segpartptr->seqIdOfPart, buf);
10743         if (compare < 0) {
10744           L = mid + 1;
10745         } else {
10746           R = mid;
10747         }
10748       }
10749 
10750       segpartptr = partsBySeqId [R];
10751       seqIdOfPart = segpartptr->seqIdOfPart;
10752 
10753       while (R < bspextra->numsegs && StringCmp (seqIdOfPart, buf) == 0) {
10754 
10755         start = sint->from;
10756         stop = sint->to;
10757 
10758         if ((sint->from <= segpartptr->from && sint->to > segpartptr->from) ||
10759             (sint->from < segpartptr->to && sint->to >= segpartptr->to)) {
10760 
10761           if (sint->from < segpartptr->from) {
10762             start = segpartptr->from;
10763             if (strand == Seq_strand_minus || strand == Seq_strand_both_rev) {
10764               partial3 = TRUE;
10765             } else {
10766               partial5 = TRUE;
10767             }
10768           }
10769           if (sint->to > segpartptr->to) {
10770             stop = segpartptr->to;
10771             if (strand == Seq_strand_minus || strand == Seq_strand_both_rev) {
10772               partial5 = TRUE;
10773             } else {
10774               partial3 = TRUE;
10775             }
10776           }
10777 
10778           if (strand == Seq_strand_minus || strand == Seq_strand_both_rev) {
10779             swap = start;
10780             start = stop;
10781             stop = swap;
10782           }
10783 
10784           rsult = AddIntervalToLocation (rsult, sint->id, start, stop, FALSE, FALSE);
10785         }
10786 
10787         R++;
10788         if (R < bspextra->numsegs) {
10789           segpartptr = partsBySeqId [R];
10790           seqIdOfPart = segpartptr->seqIdOfPart;
10791         } else {
10792           seqIdOfPart = NULL;
10793         }
10794       }
10795     }
10796   }
10797 
10798   if (p5ptr != NULL) {
10799     *p5ptr = partial5;
10800   }
10801   if (p3ptr != NULL) {
10802     *p3ptr = partial3;
10803   }
10804 
10805   return rsult;
10806 }
10807 
10808 /***************************/
10809 
10810 static ValNodePtr  smp_requested_uid_list = NULL;
10811 static TNlmMutex   smp_requested_uid_mutex = NULL;
10812 
10813 static ValNodePtr  smp_locked_bsp_list = NULL;
10814 static TNlmMutex   smp_locked_bsp_mutex = NULL;
10815 
AddBspToList(BioseqPtr bsp)10816 static void AddBspToList (
10817   BioseqPtr bsp
10818 )
10819 
10820 {
10821   Int4        ret;
10822   ValNodePtr  vnp;
10823 
10824   if (bsp == NULL) return;
10825 
10826   ret = NlmMutexLockEx (&smp_locked_bsp_mutex);
10827   if (ret) {
10828     ErrPostEx (SEV_FATAL, 0, 0, "AddBspToList mutex failed [%ld]", (long) ret);
10829     return;
10830   }
10831 
10832   vnp = ValNodeAddPointer (NULL, 0, (Pointer) bsp);
10833   if (vnp != NULL) {
10834     vnp->next = smp_locked_bsp_list;
10835     smp_locked_bsp_list = vnp;
10836   }
10837 
10838   NlmMutexUnlock (smp_locked_bsp_mutex);
10839 }
10840 
RemoveUidFromQueue(void)10841 static Int4 RemoveUidFromQueue (
10842   void
10843 )
10844 
10845 {
10846   Int4        ret, uid = 0;
10847   ValNodePtr  vnp;
10848 
10849   ret = NlmMutexLockEx (&smp_requested_uid_mutex);
10850   if (ret) {
10851     ErrPostEx (SEV_FATAL, 0, 0, "RemoveUidFromQueue mutex failed [%ld]", (long) ret);
10852     return 0;
10853   }
10854 
10855   /* extract next requested uid from queue */
10856 
10857   if (smp_requested_uid_list != NULL) {
10858     vnp = smp_requested_uid_list;
10859     smp_requested_uid_list = vnp->next;
10860     vnp->next = NULL;
10861     uid = (Int4) vnp->data.intvalue;
10862     ValNodeFree (vnp);
10863   }
10864 
10865   NlmMutexUnlock (smp_requested_uid_mutex);
10866 
10867   return uid;
10868 }
10869 
DoAsyncLookup(VoidPtr arg)10870 static VoidPtr DoAsyncLookup (
10871   VoidPtr arg
10872 )
10873 
10874 {
10875   BioseqPtr  bsp;
10876   Int4       uid;
10877   ValNode    vn;
10878 
10879   MemSet ((Pointer) &vn, 0, sizeof (ValNode));
10880 
10881   uid = RemoveUidFromQueue ();
10882   while (uid > 0) {
10883 
10884     vn.choice = SEQID_GI;
10885     vn.data.intvalue = uid;
10886     vn.next = NULL;
10887 
10888     if (BioseqFindFunc (&vn, FALSE, FALSE, TRUE) == NULL) {
10889       bsp = BioseqLockByIdEx (&vn, FALSE);
10890       if (bsp != NULL) {
10891         AddBspToList (bsp);
10892       }
10893     }
10894 
10895     uid = RemoveUidFromQueue ();
10896   }
10897 
10898   return NULL;
10899 }
10900 
10901 #define NUM_ASYNC_LOOKUP_THREADS 5
10902 
LookupAndExtractBspListMT(ValNodePtr PNTR uidlistP)10903 static ValNodePtr LookupAndExtractBspListMT (
10904   ValNodePtr PNTR uidlistP
10905 )
10906 
10907 {
10908   Int2        i;
10909   Int4        ret;
10910   VoidPtr     status;
10911   ValNodePtr  sublist = NULL;
10912   TNlmThread  thds [NUM_ASYNC_LOOKUP_THREADS];
10913 
10914   if (uidlistP == NULL || *uidlistP == NULL) return NULL;
10915 
10916   ret = NlmMutexLockEx (&smp_requested_uid_mutex);
10917   if (ret) {
10918     ErrPostEx (SEV_FATAL, 0, 0, "add uid mutex failed [%ld]", (long) ret);
10919     return NULL;
10920   }
10921 
10922   smp_requested_uid_list = *uidlistP;
10923   *uidlistP = NULL;
10924 
10925   NlmMutexUnlock (smp_requested_uid_mutex);
10926 
10927   /* spawn several threads for individual lock requests */
10928 
10929   for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
10930     thds [i] = NlmThreadCreate (DoAsyncLookup, NULL);
10931   }
10932 
10933   /* wait for all fetching threads to complete */
10934 
10935   for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
10936     NlmThreadJoin (thds [i], &status);
10937   }
10938 
10939   ret = NlmMutexLockEx (&smp_locked_bsp_mutex);
10940   if (ret) {
10941     ErrPostEx (SEV_FATAL, 0, 0, "get bsp mutex failed [%ld]", (long) ret);
10942     return NULL;
10943   }
10944 
10945   sublist = smp_locked_bsp_list;
10946   smp_locked_bsp_list = NULL;
10947 
10948   NlmMutexUnlock (smp_locked_bsp_mutex);
10949 
10950   return sublist;
10951 }
10952 
LookupAndExtractBspListST(ValNodePtr PNTR uidlistP,Boolean reindexIfBig)10953 static ValNodePtr LookupAndExtractBspListST (
10954   ValNodePtr PNTR uidlistP,
10955   Boolean reindexIfBig
10956 )
10957 
10958 {
10959   BioseqPtr    bsp;
10960   Uint2        entityID;
10961   SeqEntryPtr  sep;
10962   SeqId        si;
10963   ValNodePtr   sublist = NULL, vnp, vnx;
10964   Int4         uid;
10965 
10966   if (uidlistP == NULL || *uidlistP == NULL) return NULL;
10967 
10968   MemSet ((Pointer) &si, 0, sizeof (SeqId));
10969 
10970   /* record fetching loop */
10971 
10972   for (vnp = *uidlistP; vnp != NULL; vnp = vnp->next) {
10973     uid = (Int4) vnp->data.intvalue;
10974     if (uid < 1) continue;
10975     si.choice = SEQID_GI;
10976     si.data.intvalue = uid;
10977 
10978     if (BioseqFindFunc (&si, FALSE, TRUE, TRUE) != NULL) continue;
10979     bsp = BioseqLockByIdEx (&si, FALSE);
10980     if (bsp == NULL) continue;
10981 
10982     if (reindexIfBig) {
10983       entityID = ObjMgrGetEntityIDForPointer (bsp);
10984       sep = GetTopSeqEntryForEntityID (entityID);
10985       if (sep != NULL && VisitBioseqsInSep (sep, NULL, NULL) > 2) {
10986         SeqMgrHoldIndexing (FALSE);
10987         ObjMgrClearHold ();
10988         ObjMgrSetHold ();
10989         SeqMgrHoldIndexing (TRUE);
10990       }
10991    }
10992 
10993     vnx = ValNodeAddPointer (NULL, 0, (Pointer) bsp);
10994     if (vnx == NULL) continue;
10995     vnx->next = sublist;
10996     sublist = vnx;
10997   }
10998 
10999   /* clean up input uidlist */
11000 
11001   *uidlistP = ValNodeFree (*uidlistP);
11002 
11003   return sublist;
11004 }
11005 
LookupAndExtractBspList(ValNodePtr PNTR uidlistP,Boolean usethreads,Boolean reindexIfBig)11006 static ValNodePtr LookupAndExtractBspList (
11007   ValNodePtr PNTR uidlistP,
11008   Boolean usethreads,
11009   Boolean reindexIfBig
11010 )
11011 
11012 {
11013   SeqEntryPtr  oldsep;
11014   SeqId        si;
11015   ValNodePtr   sublist = NULL, vnp;
11016   Int4         uid;
11017 
11018   if (uidlistP == NULL || *uidlistP == NULL) return NULL;
11019 
11020   MemSet ((Pointer) &si, 0, sizeof (SeqId));
11021 
11022   /* exclude any records already loaded anywhere in memory */
11023 
11024   oldsep = SeqEntrySetScope (NULL);
11025   for (vnp = *uidlistP; vnp != NULL; vnp = vnp->next) {
11026     uid = (Int4) vnp->data.intvalue;
11027     if (uid < 1) continue;
11028     si.choice = SEQID_GI;
11029     si.data.intvalue = uid;
11030 
11031     if (BioseqFindFunc (&si, FALSE, FALSE, TRUE) == NULL) continue;
11032     vnp->data.intvalue = 0;
11033   }
11034   SeqEntrySetScope (oldsep);
11035 
11036   /* now do actual fetching */
11037 
11038   if (usethreads) {
11039     sublist = LookupAndExtractBspListMT (uidlistP);
11040   } else {
11041     sublist = LookupAndExtractBspListST (uidlistP, reindexIfBig);
11042   }
11043 
11044   return sublist;
11045 }
11046 
SortUniqueCleanseUidList(ValNodePtr PNTR uidlistP,ValNodePtr PNTR bsplistP)11047 static void SortUniqueCleanseUidList (
11048   ValNodePtr PNTR uidlistP,
11049   ValNodePtr PNTR bsplistP
11050 )
11051 
11052 {
11053   BioseqPtr        bsp;
11054   Int4             j, len, L, R, mid, uid;
11055   SeqIdPtr         sip;
11056   ValNodePtr PNTR  uids;
11057   ValNodePtr       vnp, vnx;
11058 
11059   if (uidlistP == NULL || *uidlistP == NULL) return;
11060 
11061   /* sort and unique uids to download */
11062 
11063   *uidlistP = ValNodeSort (*uidlistP, SortByIntvalue);
11064   *uidlistP = UniqueIntValNode (*uidlistP);
11065 
11066   if (bsplistP == NULL || *bsplistP == NULL) return;
11067 
11068   /* zero out any uids already fetched in earlier loop */
11069 
11070   len = ValNodeLen (*uidlistP);
11071   if (len == 0) return;
11072   uids = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * (len + 1));
11073   if (uids == NULL) return;
11074 
11075   for (vnp = *uidlistP, j = 0; vnp != NULL; vnp = vnp->next, j++) {
11076     uids [j] = vnp;
11077   }
11078 
11079   for (vnp = *bsplistP; vnp != NULL; vnp = vnp->next) {
11080     bsp = (BioseqPtr) vnp->data.ptrvalue;
11081     if (bsp == NULL) continue;
11082     uid = 0;
11083     for (sip = bsp->id; sip != NULL && uid == 0; sip = sip->next) {
11084       if (sip->choice != SEQID_GI) continue;
11085       uid = (Int4) sip->data.intvalue;
11086     }
11087     if (uid < 1) continue;
11088 
11089     L = 0;
11090     R = len - 1;
11091 
11092     while (L < R) {
11093       mid = (L + R) / 2;
11094       vnx = uids [mid];
11095       if (vnx != NULL && vnx->data.intvalue < uid) {
11096         L = mid + 1;
11097       } else {
11098         R = mid;
11099       }
11100     }
11101 
11102     vnx = uids [R];
11103     if (vnx != NULL && vnx->data.intvalue == uid) {
11104       /* mark uid that is already loaded */
11105       vnx->choice = 1;
11106     }
11107   }
11108 
11109   for (vnp = *uidlistP; vnp != NULL; vnp = vnp->next) {
11110     if (vnp->choice == 1) {
11111       /* clear out marked uids */
11112       vnp->data.intvalue = 0;
11113     }
11114   }
11115 
11116   MemFree (uids);
11117 }
11118 
11119 typedef struct iddata {
11120   ValNodePtr  uidlist;
11121   ValNodePtr  siplist;
11122 } IdLists, PNTR IdListsPtr;
11123 
CollectAllSegments(SeqLocPtr slp,Pointer userdata)11124 static void CollectAllSegments (SeqLocPtr slp, Pointer userdata)
11125 
11126 {
11127   BioseqPtr     bsp;
11128   IdListsPtr    ilp;
11129   SeqLocPtr     loc;
11130   SeqIdPtr      sip;
11131   TextSeqIdPtr  tsip;
11132   BIG_ID        uid = 0;
11133   ValNodePtr    vnp;
11134 
11135   if (slp == NULL || userdata == NULL) return;
11136   ilp = (IdListsPtr) userdata;
11137 
11138   sip = SeqLocId (slp);
11139   if (sip == NULL) {
11140     loc = SeqLocFindNext (slp, NULL);
11141     if (loc != NULL) {
11142       sip = SeqLocId (loc);
11143     }
11144   }
11145   if (sip == NULL) return;
11146   if (sip->choice == SEQID_GI) {
11147     uid = (BIG_ID) sip->data.intvalue;
11148   } else {
11149     switch (sip->choice) {
11150       case SEQID_GENBANK :
11151       case SEQID_EMBL :
11152       case SEQID_DDBJ :
11153       case SEQID_OTHER :
11154       case SEQID_TPG:
11155       case SEQID_TPE:
11156       case SEQID_TPD:
11157 
11158         /* if not gi number, first see if local accession */
11159 
11160         bsp = BioseqFindCore (sip);
11161         if (bsp != NULL) return;
11162 
11163         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
11164         if (tsip != NULL) {
11165           if (tsip->version > 0) {
11166             uid = GetGIForSeqId (sip);
11167           }
11168         }
11169         break;
11170       case SEQID_GENERAL:
11171         uid = 0;
11172         break;
11173       default :
11174         break;
11175     }
11176     if (uid < 1) {
11177       vnp = ValNodeAddPointer (NULL, 0, (Pointer) sip);
11178       if (vnp == NULL) return;
11179 
11180       /* if not resolvable to gi number, link in head of sip list */
11181 
11182       vnp->next = ilp->siplist;
11183       ilp->siplist = vnp;
11184 
11185       return;
11186     }
11187   }
11188   if (uid < 1) return;
11189 
11190   vnp = ValNodeAddBigInt (NULL, 0, uid);
11191   if (vnp == NULL) return;
11192 
11193   /* link in head of uid list */
11194 
11195   vnp->next = ilp->uidlist;
11196   ilp->uidlist = vnp;
11197 }
11198 
CollectAllBioseqs(BioseqPtr bsp,Pointer userdata)11199 static void CollectAllBioseqs (BioseqPtr bsp, Pointer userdata)
11200 
11201 {
11202   DeltaSeqPtr  dsp;
11203   SeqLocPtr    slp = NULL;
11204   ValNode      vn;
11205 
11206   if (bsp == NULL || userdata == NULL) return;
11207 
11208   if (bsp->repr == Seq_repr_seg) {
11209     vn.choice = SEQLOC_MIX;
11210     vn.extended = 0;
11211     vn.data.ptrvalue = bsp->seq_ext;
11212     vn.next = NULL;
11213     while ((slp = SeqLocFindNext (&vn, slp)) != NULL) {
11214       if (slp != NULL && slp->choice != SEQLOC_NULL) {
11215         CollectAllSegments (slp, userdata);
11216       }
11217     }
11218   } else if (bsp->repr == Seq_repr_delta) {
11219     for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
11220       if (dsp->choice == 1) {
11221         slp = (SeqLocPtr) dsp->data.ptrvalue;
11222         if (slp != NULL && slp->choice != SEQLOC_NULL) {
11223           CollectAllSegments (slp, userdata);
11224         }
11225       }
11226     }
11227   } else if (bsp->repr == Seq_repr_ref) {
11228     slp = (SeqLocPtr) bsp->seq_ext;
11229     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11230       CollectAllSegments (slp, userdata);
11231     }
11232   }
11233 }
11234 
CollectAllLocations(SeqFeatPtr sfp,Pointer userdata)11235 static void CollectAllLocations (SeqFeatPtr sfp, Pointer userdata)
11236 
11237 {
11238   SeqLocPtr  slp = NULL;
11239 
11240   if (sfp == NULL || userdata == NULL || sfp->location == NULL) return;
11241 
11242   while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
11243     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11244       CollectAllSegments (slp, userdata);
11245     }
11246   }
11247 }
11248 
CollectAllProducts(SeqFeatPtr sfp,Pointer userdata)11249 static void CollectAllProducts (SeqFeatPtr sfp, Pointer userdata)
11250 
11251 {
11252   SeqLocPtr  slp = NULL;
11253 
11254   if (sfp == NULL || userdata == NULL || sfp->product == NULL) return;
11255 
11256   while ((slp = SeqLocFindNext (sfp->product, slp)) != NULL) {
11257     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11258       CollectAllSegments (slp, userdata);
11259     }
11260   }
11261 }
11262 
CollectAllSublocs(SeqLocPtr loc,Pointer userdata)11263 static void CollectAllSublocs (SeqLocPtr loc, Pointer userdata)
11264 
11265 {
11266   SeqLocPtr  slp = NULL;
11267 
11268   if (loc == NULL || userdata == NULL) return;
11269 
11270   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
11271     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11272       CollectAllSegments (slp, userdata);
11273     }
11274   }
11275 }
11276 
FetchFromUidList(ValNodePtr PNTR uidlistP,ValNodePtr PNTR bsplistP,Boolean usethreads,Boolean reindexIfBig)11277 static void FetchFromUidList (
11278   ValNodePtr PNTR uidlistP,
11279   ValNodePtr PNTR bsplistP,
11280   Boolean usethreads,
11281   Boolean reindexIfBig
11282 )
11283 
11284 {
11285   BioseqPtr   bsp;
11286   ValNodePtr  sublist, uidlist, vnp;
11287 
11288   if (uidlistP == NULL || bsplistP == NULL) return;
11289 
11290   SortUniqueCleanseUidList (uidlistP, bsplistP);
11291   sublist = LookupAndExtractBspList (uidlistP, usethreads, reindexIfBig);
11292 
11293   while (sublist != NULL) {
11294 
11295     uidlist = NULL;
11296 
11297     /* recursively queue delta or segmented component uids */
11298 
11299     for (vnp = sublist; vnp != NULL; vnp = vnp->next) {
11300 
11301       bsp = (BioseqPtr) vnp->data.ptrvalue;
11302       if (bsp == NULL) continue;
11303       if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta && bsp->repr != Seq_repr_ref) continue;
11304 
11305       CollectAllBioseqs (bsp, (Pointer) &uidlist);
11306     }
11307 
11308     ValNodeLink (bsplistP, sublist);
11309     sublist = NULL;
11310 
11311     SortUniqueCleanseUidList (&uidlist, bsplistP);
11312     sublist = LookupAndExtractBspList (&uidlist, usethreads, reindexIfBig);
11313   }
11314 }
11315 
FetchFromSipList(ValNodePtr PNTR siplistP,ValNodePtr PNTR bsplistP)11316 static void FetchFromSipList (
11317   ValNodePtr PNTR siplistP,
11318   ValNodePtr PNTR bsplistP
11319 )
11320 
11321 {
11322   BioseqPtr   bsp;
11323   SeqIdPtr    sip;
11324   ValNodePtr  vnp;
11325   ValNodePtr  vnx;
11326 
11327   if (siplistP == NULL || bsplistP == NULL) return;
11328 
11329   for (vnp = *siplistP; vnp != NULL; vnp = vnp->next) {
11330     sip = (SeqIdPtr) vnp->data.ptrvalue;
11331     if (sip == NULL) continue;
11332     if (BioseqFindCore (sip) != NULL) continue;
11333     bsp = BioseqLockById (sip);
11334     if (bsp == NULL) continue;
11335     vnx = ValNodeAddPointer (NULL, 0, (Pointer) bsp);
11336     if (vnx == NULL) continue;
11337     vnx->next = *bsplistP;
11338     *bsplistP = vnx;
11339   }
11340 }
11341 
LookForNonGiSegments(SeqLocPtr slp,SeqIdPtr sip,Pointer userdata)11342 static void LookForNonGiSegments (
11343   SeqLocPtr slp,
11344   SeqIdPtr sip,
11345   Pointer userdata
11346 )
11347 
11348 {
11349   BoolPtr    nonGi;
11350   SeqLocPtr  loc;
11351 
11352   if (slp == NULL && sip == NULL) return;
11353   if (userdata == NULL) return;
11354   nonGi = (BoolPtr) userdata;
11355 
11356   if (sip == NULL) {
11357     sip = SeqLocId (slp);
11358     if (sip == NULL) {
11359       loc = SeqLocFindNext (slp, NULL);
11360       if (loc != NULL) {
11361         sip = SeqLocId (loc);
11362       }
11363     }
11364   }
11365   if (sip == NULL) return;
11366 
11367   if (sip->choice != SEQID_GI) {
11368     *nonGi = TRUE;
11369   }
11370 }
11371 
LookForNonGiBioseqs(BioseqPtr bsp,Pointer userdata)11372 static void LookForNonGiBioseqs (
11373   BioseqPtr bsp,
11374   Pointer userdata
11375 )
11376 
11377 {
11378   DeltaSeqPtr  dsp;
11379   SeqLocPtr    slp = NULL;
11380   ValNode      vn;
11381 
11382   if (bsp == NULL) return;
11383 
11384   if (bsp->repr == Seq_repr_seg) {
11385     vn.choice = SEQLOC_MIX;
11386     vn.extended = 0;
11387     vn.data.ptrvalue = bsp->seq_ext;
11388     vn.next = NULL;
11389     while ((slp = SeqLocFindNext (&vn, slp)) != NULL) {
11390       if (slp != NULL && slp->choice != SEQLOC_NULL) {
11391         LookForNonGiSegments (slp, NULL, userdata);
11392       }
11393     }
11394   } else if (bsp->repr == Seq_repr_delta) {
11395     for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
11396       if (dsp->choice == 1) {
11397         slp = (SeqLocPtr) dsp->data.ptrvalue;
11398         if (slp != NULL && slp->choice != SEQLOC_NULL) {
11399           LookForNonGiSegments (slp, NULL, userdata);
11400         }
11401       }
11402     }
11403   } else if (bsp->repr == Seq_repr_ref) {
11404     slp = (SeqLocPtr) bsp->seq_ext;
11405     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11406       LookForNonGiSegments (slp, NULL, userdata);
11407     }
11408   }
11409 }
11410 
LookForNonGiLocations(SeqFeatPtr sfp,Pointer userdata)11411 static void LookForNonGiLocations (SeqFeatPtr sfp, Pointer userdata)
11412 
11413 {
11414   SeqLocPtr  slp = NULL;
11415 
11416   if (sfp == NULL || userdata == NULL || sfp->location == NULL) return;
11417 
11418   while ((slp = SeqLocFindNext (sfp->location, slp)) != NULL) {
11419     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11420       LookForNonGiSegments (slp, NULL, userdata);
11421     }
11422   }
11423 }
11424 
LookForNonGiProducts(SeqFeatPtr sfp,Pointer userdata)11425 static void LookForNonGiProducts (SeqFeatPtr sfp, Pointer userdata)
11426 
11427 {
11428   SeqLocPtr  slp = NULL;
11429 
11430   if (sfp == NULL || userdata == NULL || sfp->product == NULL) return;
11431 
11432   while ((slp = SeqLocFindNext (sfp->product, slp)) != NULL) {
11433     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11434       LookForNonGiSegments (slp, NULL, userdata);
11435     }
11436   }
11437 }
11438 
LookForNonGiSublocs(SeqLocPtr loc,Pointer userdata)11439 static void LookForNonGiSublocs (SeqLocPtr loc, Pointer userdata)
11440 
11441 {
11442   SeqLocPtr  slp = NULL;
11443 
11444   if (loc == NULL || userdata == NULL) return;
11445 
11446   while ((slp = SeqLocFindNext (loc, slp)) != NULL) {
11447     if (slp != NULL && slp->choice != SEQLOC_NULL) {
11448       LookForNonGiSegments (slp, NULL, userdata);
11449     }
11450   }
11451 }
11452 
AdvcLockFarComponents(SeqEntryPtr sep,Boolean components,Boolean locations,Boolean products,SeqLocPtr loc,Boolean usethreads)11453 NLM_EXTERN ValNodePtr AdvcLockFarComponents (
11454   SeqEntryPtr sep,
11455   Boolean components,
11456   Boolean locations,
11457   Boolean products,
11458   SeqLocPtr loc,
11459   Boolean usethreads
11460 )
11461 
11462 {
11463   ValNodePtr   bsplist = NULL;
11464   IdLists      ils;
11465   Boolean      nonGi;
11466   SeqEntryPtr  oldsep;
11467 
11468   if (sep == NULL) return NULL;
11469   oldsep = SeqEntrySetScope (sep);
11470 
11471   /* if non-GI components/locations/products, lookup in bulk first */
11472 
11473   if (components) {
11474     nonGi = FALSE;
11475     VisitBioseqsInSep (sep, (Pointer) &nonGi, LookForNonGiBioseqs);
11476     if (nonGi) {
11477       LookupFarSeqIDs (sep, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE);
11478     }
11479   }
11480 
11481   if (locations) {
11482     nonGi = FALSE;
11483     VisitFeaturesInSep (sep, (Pointer) &nonGi, LookForNonGiLocations);
11484     if (nonGi) {
11485       LookupFarSeqIDs (sep, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
11486     }
11487   }
11488 
11489   if (products) {
11490     nonGi = FALSE;
11491     VisitFeaturesInSep (sep, (Pointer) &nonGi, LookForNonGiProducts);
11492     if (nonGi) {
11493       LookupFarSeqIDs (sep, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE);
11494     }
11495   }
11496 
11497   if (loc != NULL) {
11498     nonGi = FALSE;
11499     LookForNonGiSublocs (loc, (Pointer) &nonGi);
11500     if (nonGi) {
11501       LookupFarSeqIDs (sep, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE);
11502     }
11503   }
11504 
11505   /* now collect list of GI numbers, lock into memory */
11506 
11507   ils.siplist = NULL;
11508 
11509   if (components) {
11510     ObjMgrSetHold ();
11511     SeqMgrHoldIndexing (TRUE);
11512     ils.uidlist = NULL;
11513     VisitBioseqsInSep (sep, (Pointer) &ils, CollectAllBioseqs);
11514     FetchFromUidList (&ils.uidlist, &bsplist, usethreads, FALSE);
11515     SeqMgrHoldIndexing (FALSE);
11516     ObjMgrClearHold ();
11517   }
11518 
11519   if (locations) {
11520     ObjMgrSetHold ();
11521     SeqMgrHoldIndexing (TRUE);
11522     ils.uidlist = NULL;
11523     VisitFeaturesInSep (sep, (Pointer) &ils, CollectAllLocations);
11524     FetchFromUidList (&ils.uidlist, &bsplist, usethreads, TRUE);
11525     SeqMgrHoldIndexing (FALSE);
11526     ObjMgrClearHold ();
11527   }
11528 
11529   if (products) {
11530     ObjMgrSetHold ();
11531     SeqMgrHoldIndexing (TRUE);
11532     ils.uidlist = NULL;
11533     VisitFeaturesInSep (sep, (Pointer) &ils, CollectAllProducts);
11534     FetchFromUidList (&ils.uidlist, &bsplist, usethreads, TRUE);
11535     SeqMgrHoldIndexing (FALSE);
11536     ObjMgrClearHold ();
11537   }
11538 
11539   if (loc != NULL) {
11540     ObjMgrSetHold ();
11541     SeqMgrHoldIndexing (TRUE);
11542     ils.uidlist = NULL;
11543     CollectAllSublocs (loc, (Pointer) &ils);
11544     FetchFromUidList (&ils.uidlist, &bsplist, usethreads, TRUE);
11545     SeqMgrHoldIndexing (FALSE);
11546     ObjMgrClearHold ();
11547   }
11548 
11549   /* process list of non-GI sips, lock into memory */
11550 
11551   if (ils.siplist != NULL) {
11552     FetchFromSipList (&ils.siplist, &bsplist);
11553 
11554     ValNodeFree (ils.siplist);
11555   }
11556 
11557   SeqEntrySetScope (oldsep);
11558   return bsplist;
11559 }
11560 
11561 /***************************/
11562 
LockFarComponentsEx(SeqEntryPtr sep,Boolean components,Boolean locations,Boolean products,SeqLocPtr loc)11563 NLM_EXTERN ValNodePtr LockFarComponentsEx (SeqEntryPtr sep, Boolean components, Boolean locations, Boolean products, SeqLocPtr loc)
11564 
11565 {
11566 #ifdef OS_UNIX
11567   CharPtr      str;
11568 #endif
11569 
11570   if (sep == NULL) return NULL;
11571 
11572 #ifdef OS_UNIX
11573   str = getenv ("ADV_LOCK_FAR_COMPONENTS");
11574   if (str != NULL) {
11575     if (StringICmp (str, "Multi") == 0) {
11576       return AdvcLockFarComponents (sep, components, locations, products, loc, TRUE);
11577     }
11578   }
11579 #endif
11580 
11581   return AdvcLockFarComponents (sep, components, locations, products, loc, FALSE);
11582 }
11583 
LockFarComponents(SeqEntryPtr sep)11584 NLM_EXTERN ValNodePtr LockFarComponents (SeqEntryPtr sep)
11585 
11586 {
11587   return LockFarComponentsEx (sep, TRUE, FALSE, FALSE, NULL);
11588 }
11589 
UnlockFarComponents(ValNodePtr bsplist)11590 NLM_EXTERN ValNodePtr UnlockFarComponents (ValNodePtr bsplist)
11591 
11592 {
11593   BioseqPtr   bsp;
11594   ValNodePtr  vnp;
11595 
11596   if (bsplist == NULL) return NULL;
11597 
11598   ObjMgrSetHold ();
11599 
11600   for (vnp = bsplist; vnp != NULL; vnp = vnp->next) {
11601     bsp = (BioseqPtr) vnp->data.ptrvalue;
11602     if (bsp != NULL) {
11603       BioseqUnlock (bsp);
11604     }
11605   }
11606 
11607   ObjMgrClearHold ();
11608 
11609   return ValNodeFree (bsplist);
11610 }
11611 
LockFarAlignmentBioseqs(SeqAlignPtr salp)11612 NLM_EXTERN ValNodePtr LockFarAlignmentBioseqs (SeqAlignPtr salp)
11613 {
11614   ValNodePtr    bsplist = NULL;
11615   SeqAlignPtr   tmp_salp;
11616   Int4          alnRows, seq_num, index_num;
11617   SeqIdPtr      tmp_sip;
11618   BioseqPtr     bsp;
11619   ObjMgrDataPtr omdp;
11620   ObjMgrPtr     omp;
11621 
11622   omp = ObjMgrWriteLock();
11623   if (omp == NULL) return NULL;
11624 
11625   for (tmp_salp = salp; tmp_salp != NULL; tmp_salp = tmp_salp->next) {
11626     alnRows = AlnMgr2GetNumRows(tmp_salp);  /* size of the alignment */
11627     for (seq_num = 1; seq_num < alnRows + 1; seq_num++) {
11628       tmp_sip = AlnMgr2GetNthSeqIdPtr(tmp_salp, seq_num);
11629       bsp = BioseqLockById(tmp_sip);
11630       if (bsp == NULL) continue;
11631       index_num = ObjMgrLookup(omp, (Pointer)bsp);
11632       if (index_num < 0) {
11633         ValNodeAddPointer (&bsplist, 0, bsp);
11634       } else {
11635         omdp = ObjMgrFindTop (omp, omp->datalist[index_num]);
11636         if (omdp != NULL && omdp->tempload == TL_NOT_TEMP) {
11637           BioseqUnlock (bsp);
11638         } else {
11639           ValNodeAddPointer (&bsplist, 0, bsp);
11640         }
11641       }
11642     }
11643   }
11644   ObjMgrUnlock();
11645   return bsplist;
11646 }
11647 
11648 /*****************************************************************************
11649 *
11650 *   SeqMgrSetPreCache
11651 *       registers the GiToSeqID precache function
11652 *   LookupFarSeqIDs
11653 *       calls any registered function to preload the cache
11654 *
11655 *****************************************************************************/
11656 
SeqMgrSetPreCache(SIDPreCacheFunc func)11657 NLM_EXTERN void LIBCALL SeqMgrSetPreCache (SIDPreCacheFunc func)
11658 
11659 {
11660   SeqMgrPtr  smp;
11661 
11662   smp = SeqMgrWriteLock ();
11663   if (smp == NULL) return;
11664   smp->seq_id_precache_func = func;
11665   SeqMgrUnlock ();
11666 }
11667 
LookupFarSeqIDs(SeqEntryPtr sep,Boolean components,Boolean locations,Boolean products,Boolean alignments,Boolean history,Boolean inference,Boolean others)11668 NLM_EXTERN Int4 LookupFarSeqIDs (
11669   SeqEntryPtr sep,
11670   Boolean components,
11671   Boolean locations,
11672   Boolean products,
11673   Boolean alignments,
11674   Boolean history,
11675   Boolean inference,
11676   Boolean others
11677 )
11678 
11679 {
11680   SIDPreCacheFunc  func;
11681   SeqMgrPtr        smp;
11682 
11683   smp = SeqMgrReadLock ();
11684   if (smp == NULL) return 0;
11685   func = smp->seq_id_precache_func;
11686   SeqMgrUnlock ();
11687   if (func == NULL) return 0;
11688   return (*func) (sep, components, locations, products, alignments, history, inference, others);
11689 }
11690 
11691 /*****************************************************************************
11692 *
11693 *   SeqMgrSetSeqIdSetFunc
11694 *       registers the GiToSeqIdSet lookup function
11695 *   GetSeqIdSetForGI
11696 *       calls any registered function to lookup the set of SeqIds
11697 *
11698 *****************************************************************************/
11699 
SeqMgrSetSeqIdSetFunc(SeqIdSetLookupFunc func)11700 NLM_EXTERN void LIBCALL SeqMgrSetSeqIdSetFunc (SeqIdSetLookupFunc func)
11701 
11702 {
11703   SeqMgrPtr  smp;
11704 
11705   smp = SeqMgrWriteLock ();
11706   if (smp == NULL) return;
11707   smp->seq_id_set_lookup_func = func;
11708   SeqMgrUnlock ();
11709 }
11710 
GetSeqIdSetForGI(BIG_ID gi)11711 NLM_EXTERN SeqIdPtr LIBCALL GetSeqIdSetForGI (BIG_ID gi)
11712 
11713 {
11714   SeqIdSetLookupFunc  func;
11715   SeqMgrPtr           smp;
11716 
11717   smp = SeqMgrReadLock ();
11718   if (smp == NULL) return 0;
11719   func = smp->seq_id_set_lookup_func;
11720   SeqMgrUnlock ();
11721   if (func == NULL) return 0;
11722   return (*func) (gi);
11723 }
11724 
11725 /*****************************************************************************
11726 *
11727 *   SeqMgrSetLenFunc
11728 *       registers the GiToSeqLen lookup function
11729 *   SeqMgrSetAccnVerFunc
11730 *       registers the GiToAccnVer lookup function
11731 *
11732 *****************************************************************************/
11733 
SeqMgrSetLenFunc(SeqLenLookupFunc func)11734 NLM_EXTERN void LIBCALL SeqMgrSetLenFunc (SeqLenLookupFunc func)
11735 
11736 {
11737   SeqMgrPtr  smp;
11738 
11739   smp = SeqMgrWriteLock ();
11740   if (smp == NULL) return;
11741   smp->seq_len_lookup_func = func;
11742   SeqMgrUnlock ();
11743 }
11744 
SeqMgrSetAccnVerFunc(AccnVerLookupFunc func)11745 NLM_EXTERN void LIBCALL SeqMgrSetAccnVerFunc (AccnVerLookupFunc func)
11746 
11747 {
11748   SeqMgrPtr  smp;
11749 
11750   smp = SeqMgrWriteLock ();
11751   if (smp == NULL) return;
11752   smp->accn_ver_lookup_func = func;
11753   SeqMgrUnlock ();
11754 }
11755 
11756 /*******************************************************************
11757 *
11758 *   SeqEntryAsnOut()
11759 *
11760 *       dumps parts of SeqEntry from a memory object
11761 *
11762 *******************************************************************/
11763 
11764 typedef struct ext_pack_data {
11765    SeqEntryPtr  sep [5];
11766    Uint4        minSapItemID;
11767    Uint4        maxSapItemID;
11768    ValNodePtr   descChain;
11769    ValNodePtr   featChain;
11770    ValNodePtr   lastVnp;
11771 } ExtPackData, PNTR ExtPackPtr;
11772 
GetSapBounds(SeqAnnotPtr sap,Pointer userdata)11773 static void GetSapBounds (SeqAnnotPtr sap, Pointer userdata)
11774 
11775 {
11776    ExtPackPtr  epp;
11777 
11778    epp = (ExtPackPtr) userdata;
11779    epp->minSapItemID = MIN (epp->minSapItemID, sap->idx.itemID);
11780    epp->maxSapItemID = MAX (epp->maxSapItemID, sap->idx.itemID);
11781 }
11782 
SeqEntryAsnOut(SeqEntryPtr sep,SeqIdPtr sip,Int2 retcode,AsnIoPtr aipout)11783 NLM_EXTERN Boolean SeqEntryAsnOut (SeqEntryPtr sep, SeqIdPtr sip,
11784                                     Int2 retcode, AsnIoPtr aipout)
11785 
11786 {
11787    BioseqPtr          bsp;
11788    BioseqSetPtr       bssp;
11789    SeqMgrFeatContext  context;
11790    Uint2              entityID;
11791    ExtPackData        epd;
11792    SeqEntryPtr        oldscope;
11793    BioseqSetPtr       parent;
11794    SeqAnnotPtr        sap;
11795    SeqDescrPtr        sdp;
11796    SeqFeatPtr         sfp;
11797    SeqEntryPtr        top;
11798    ValNodePtr         vnp;
11799    AsnOptionPtr       aopp_feat = NULL, aopp_desc = NULL;
11800    DataVal            dv;
11801 
11802    if (sep == NULL || sip == NULL || aipout == NULL) return FALSE;
11803 
11804    if (retcode > 4) {
11805      retcode = 0;
11806    }
11807    if (retcode < 0) {
11808      retcode = 0;
11809    }
11810 
11811    entityID = ObjMgrGetEntityIDForChoice (sep);
11812    if (entityID < 1) return FALSE;
11813    top = GetTopSeqEntryForEntityID (entityID);
11814    if (top == NULL) return FALSE;
11815 
11816    /* indexing sets idx fields, will find features outside of desired
11817 SeqEntry */
11818 
11819    if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
11820      SeqMgrIndexFeatures (entityID, NULL);
11821    }
11822 
11823    /* find Bioseq within entity given SeqId */
11824 
11825    oldscope = SeqEntrySetScope (top);
11826    bsp = BioseqFind (sip);
11827    SeqEntrySetScope (oldscope);
11828    if (bsp == NULL) return FALSE;
11829 
11830    MemSet ((Pointer) &epd, 0, sizeof (ExtPackData));
11831 
11832    /* get parent hierarchy */
11833 
11834    epd.sep [0] = top;
11835    epd.sep [1] = bsp->seqentry;
11836 
11837    if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
11838      parent = (BioseqSetPtr) bsp->idx.parentptr;
11839      while (parent != NULL) {
11840        switch (parent->_class) {
11841          case BioseqseqSet_class_nuc_prot :
11842            epd.sep [3] = parent->seqentry;
11843            break;
11844          case BioseqseqSet_class_segset :
11845            epd.sep [2] = parent->seqentry;
11846            break;
11847          case BioseqseqSet_class_pub_set :
11848            epd.sep [4] = parent->seqentry;
11849            break;
11850          default :
11851            break;
11852        }
11853        if (parent->idx.parenttype == OBJ_BIOSEQSET) {
11854          parent = (BioseqSetPtr) parent->idx.parentptr;
11855        } else {
11856          parent = NULL;
11857        }
11858      }
11859    }
11860 
11861    /* get desired SeqEntry given retcode parameter */
11862 
11863    sep = NULL;
11864    while (retcode >= 0 && sep == NULL) {
11865      sep = epd.sep [retcode];
11866      retcode --;
11867    }
11868    if (sep == NULL) return FALSE;
11869 
11870    /* get immediate parent of SeqEntry to be returned */
11871 
11872    parent = NULL;
11873    if (IS_Bioseq (sep)) {
11874      bsp = (BioseqPtr) sep->data.ptrvalue;
11875      if (bsp == NULL) return FALSE;
11876      if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
11877        parent = (BioseqSetPtr) bsp->idx.parentptr;
11878      }
11879    } else if (IS_Bioseq_set (sep)) {
11880      bssp = (BioseqSetPtr) sep->data.ptrvalue;
11881      if (bssp == NULL) return FALSE;
11882      if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
11883        parent = (BioseqSetPtr) bssp->idx.parentptr;
11884      }
11885    }
11886 
11887    /* find itemID range of SeqAnnots within current SeqEntry */
11888 
11889    epd.minSapItemID = UINT4_MAX;
11890    epd.maxSapItemID = 0;
11891    VisitAnnotsInSep (sep, (Pointer) &epd, GetSapBounds);
11892 
11893    /* go up parent hierarchy, pointing to applicable descriptors */
11894 
11895    epd.lastVnp = NULL;
11896    while (parent != NULL) {
11897      for (sdp = parent->descr; sdp != NULL; sdp = sdp->next) {
11898        vnp = ValNodeAddPointer (&(epd.lastVnp), 0, (Pointer) sdp);
11899        if (epd.descChain == NULL) {
11900          epd.descChain = epd.lastVnp;
11901        }
11902        epd.lastVnp = vnp;
11903      }
11904      if (parent->idx.parenttype == OBJ_BIOSEQSET) {
11905        parent = (BioseqSetPtr) parent->idx.parentptr;
11906      } else {
11907        parent = NULL;
11908      }
11909    }
11910 
11911    /* find features indexed on Bioseq that are packaged outside
11912 current SeqEntry */
11913 
11914    epd.lastVnp = NULL;
11915    sfp = SeqMgrGetNextFeature (bsp, NULL, 0, 0, &context);
11916    while (sfp != NULL) {
11917      sap = context.sap;
11918      if (sap != NULL) {
11919        if (sap->idx.itemID < epd.minSapItemID || sap->idx.itemID >
11920 epd.maxSapItemID) {
11921          vnp = ValNodeAddPointer (&(epd.lastVnp), 0, (Pointer) sfp);
11922          if (epd.featChain == NULL) {
11923            epd.featChain = epd.lastVnp;
11924          }
11925          epd.lastVnp = vnp;
11926        }
11927      }
11928      sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &context);
11929    }
11930 
11931    /* also need to get features whose products point to the Bioseq */
11932 
11933    sfp = NULL;
11934    if (ISA_na (bsp->mol)) {
11935      sfp = SeqMgrGetRNAgivenProduct (bsp, &context);
11936    } else if (ISA_aa (bsp->mol)) {
11937      sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
11938    }
11939    if (sfp != NULL) {
11940      sap = context.sap;
11941      if (sap != NULL) {
11942        if (sap->idx.itemID < epd.minSapItemID || sap->idx.itemID >
11943 epd.maxSapItemID) {
11944          vnp = ValNodeAddPointer (&(epd.lastVnp), 0, (Pointer) sfp);
11945          if (epd.featChain == NULL) {
11946            epd.featChain = epd.lastVnp;
11947          }
11948          epd.lastVnp = vnp;
11949        }
11950      }
11951    }
11952 
11953    /* now write sep, adding descriptors from descChain and features
11954 from featChain */
11955 
11956   MemSet(&dv, 0, sizeof(DataVal));  /* zero it out */
11957   if (epd.descChain)   /* have extra descriptors */
11958   {
11959     dv.ptrvalue = (Pointer)(epd.descChain);
11960     aopp_desc = AsnIoOptionNew(aipout, OP_NCBIOBJSEQ, CHECK_EXTRA_DESC, dv, NULL);
11961   }
11962 
11963   if (epd.featChain)   /* have extra features */
11964   {
11965     dv.ptrvalue = (Pointer)(epd.featChain);
11966     aopp_feat = AsnIoOptionNew(aipout, OP_NCBIOBJSEQ, CHECK_EXTRA_FEAT, dv, NULL);
11967   }
11968 
11969   SeqEntryAsnWrite(sep, aipout, NULL);
11970 
11971    /* clean up valnode chains */
11972 
11973    ValNodeFree (epd.descChain);
11974    ValNodeFree (epd.featChain);
11975 
11976    return TRUE;
11977 }
11978 
11979 /*
11980 static void SeqMgrReport (void)
11981 
11982 {
11983   BioseqPtr                  bsp;
11984   BioseqPtr PNTR             bspp;
11985   Int4                       i, num;
11986   ObjMgrDataPtr              omdp;
11987   ObjMgrPtr                  omp;
11988   SeqIdIndexElementPtr PNTR  sipp;
11989   SeqMgrPtr                  smp;
11990   Char                       str [128];
11991 
11992   omp = ObjMgrGet ();
11993   if (omp != NULL) {
11994     printf ("Currobj %d, totobj %d\n", (int) omp->currobj, (int) omp->totobj);
11995     fflush (stdout);
11996   }
11997   smp = SeqMgrGet ();
11998   if (smp != NULL) {
11999     num = smp->BioseqIndexCnt;
12000     sipp = smp->BioseqIndex;
12001     printf ("BioseqIndexCnt %ld\n", (long) num);
12002     fflush (stdout);
12003     if (sipp == NULL) {
12004       printf ("sipp is NULL\n");
12005       fflush (stdout);
12006     } else {
12007       for (i = 0; i < num; i++) {
12008         omdp = sipp [i]->omdp;
12009         if (omdp != NULL && omdp->bulkIndexFree) {
12010           printf ("omdp %ld bulkIndexFree flag set\n", (long) i);
12011           fflush (stdout);
12012         }
12013         StringNCpy_0 (str, sipp [i]->str, sizeof (str));
12014         RevStringUpper (str);
12015         printf (" %3ld - %s\n", (long) i, str);
12016         fflush (stdout);
12017       }
12018       printf ("-\n");
12019       fflush (stdout);
12020       for (i = smp->BioseqIndexCnt; i < smp->BioseqIndexNum; i++) {
12021         StringNCpy_0 (str, sipp [i]->str, sizeof (str));
12022         RevStringUpper (str);
12023         if (! StringHasNoText (str)) {
12024           printf (" %3ld - %s\n", (long) i, str);
12025           fflush (stdout);
12026         }
12027       }
12028       printf ("-\n");
12029       fflush (stdout);
12030     }
12031     num = smp->NonIndexedBioseqCnt;
12032     bspp = smp->NonIndexedBioseq;
12033     printf ("NonIndexedBioseqCnt %ld\n", (long) num);
12034     fflush (stdout);
12035     if (bspp == NULL) {
12036       printf ("bspp is NULL\n");
12037       fflush (stdout);
12038       return;
12039     }
12040     for (i = 0; i < num; i++) {
12041       bsp = bspp [i];
12042       if (bsp != NULL) {
12043         SeqIdWrite (bsp->id, str, PRINTID_FASTA_LONG, sizeof (str) - 1);
12044         printf (" %3ld - %s\n", (long) i, str);
12045         fflush (stdout);
12046       } else {
12047         printf (" %3ld - (null)\n", (long) i);
12048         fflush (stdout);
12049       }
12050     }
12051   }
12052   printf ("\n");
12053   fflush (stdout);
12054 }
12055 */
12056 
12057 typedef int (*FeatureFindCompare) PROTO ((SMFeatItemPtr, CharPtr));
12058 
FeatureFindCompareLabel(SMFeatItemPtr feat,CharPtr label)12059 static int FeatureFindCompareLabel (SMFeatItemPtr feat, CharPtr label)
12060 {
12061   if (feat == NULL) return -1;
12062   return StringICmp (feat->label, label);
12063 }
12064 
FeatureFindCompareLocusTag(SMFeatItemPtr feat,CharPtr label)12065 static int FeatureFindCompareLocusTag (SMFeatItemPtr feat, CharPtr label)
12066 {
12067   GeneRefPtr grp;
12068 
12069   if (feat == NULL || feat->sfp == NULL || feat->subtype != FEATDEF_GENE) {
12070     return -1;
12071   }
12072   grp = (GeneRefPtr) feat->sfp->data.value.ptrvalue;
12073   return StringICmp (grp->locus_tag, label);
12074 }
12075 
FindArrayPosForFirst(SMFeatItemPtr PNTR array,FeatureFindCompare compare_func,Int4 num,CharPtr label,Uint1 seqFeatChoice,Uint1 featDefChoice)12076 static Int4 FindArrayPosForFirst
12077 (SMFeatItemPtr PNTR array,
12078  FeatureFindCompare compare_func,
12079  Int4               num,
12080  CharPtr            label,
12081  Uint1              seqFeatChoice,
12082  Uint1              featDefChoice)
12083 {
12084   Int4                L, R;
12085   Int4                mid;
12086   SMFeatItemPtr       feat;
12087 
12088   if (array == NULL || compare_func == NULL) return -1;
12089   /* use binary search to find first one */
12090   L = 0;
12091   R = num - 1;
12092   while (L < R) {
12093     mid = (L + R) / 2;
12094     feat = array [mid];
12095     if (feat != NULL && compare_func (feat, label) < 0) {
12096       L = mid + 1;
12097     } else {
12098       R = mid;
12099     }
12100   }
12101   if (R > num) {
12102     return -1;
12103   }
12104   return R;
12105 }
12106 
FindNthFeatureUseMultipleArrays(SMFeatItemPtr PNTR PNTR arrays,Int4Ptr array_sizes,FeatureFindCompare PNTR compare_funcs,Int4 num_arrays,CharPtr label,Uint2 entityID,BioseqPtr bsp,Uint1 seqFeatChoice,Uint1 featDefChoice,Int4 n,Int4 PNTR last_found,SeqMgrFeatContext PNTR context)12107 static SeqFeatPtr FindNthFeatureUseMultipleArrays
12108 (SMFeatItemPtr PNTR PNTR arrays,
12109  Int4Ptr                 array_sizes,
12110  FeatureFindCompare PNTR compare_funcs,
12111  Int4                    num_arrays,
12112  CharPtr                 label,
12113  Uint2                   entityID,
12114  BioseqPtr               bsp,
12115  Uint1                   seqFeatChoice,
12116  Uint1                   featDefChoice,
12117  Int4                    n,
12118  Int4 PNTR               last_found,
12119  SeqMgrFeatContext PNTR  context)
12120 {
12121   Int4Ptr firsts;
12122   Boolean found, already_found;
12123   SMFeatItemPtr       feat;
12124   Int4                index = 0, k, leftmost, i2;
12125   SMFeatItemPtr PNTR  found_list;
12126   SeqFeatPtr          sfp = NULL;
12127   ObjMgrDataPtr       omdp;
12128 
12129   if (arrays == NULL || array_sizes == NULL || compare_funcs == NULL || num_arrays < 1) return NULL;
12130 
12131   omdp = SeqMgrGetOmdpForBioseq (bsp);
12132   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
12133 
12134   found_list = (SMFeatItemPtr PNTR) MemNew (sizeof (SMFeatItemPtr) * n);
12135 
12136   /* set up pointers to first match in each array */
12137   firsts = (Int4Ptr) MemNew (num_arrays * sizeof (Int4));
12138   for (k = 0; k < num_arrays; k++) {
12139     firsts[k] = FindArrayPosForFirst (arrays[k],
12140                                       compare_funcs[k],
12141                                       array_sizes[k], label,
12142                                       seqFeatChoice, featDefChoice);
12143     found = FALSE;
12144     while (!found
12145            && firsts[k] >= 0 && firsts[k] < array_sizes[k]
12146            && compare_funcs[k] (arrays[k][firsts[k]], label) == 0) {
12147       feat = arrays[k][firsts[k]];
12148       if (feat->sfp != NULL
12149           && (seqFeatChoice == 0 || feat->sfp->data.choice == seqFeatChoice)
12150           && (featDefChoice == 0 || feat->subtype == featDefChoice)
12151           && (! feat->ignore)) {
12152         found = TRUE;
12153       } else {
12154         firsts[k]++;
12155       }
12156     }
12157     if (!found) {
12158       firsts[k] = -1;
12159     }
12160   }
12161   leftmost = 0;
12162   while (index < n && leftmost != -1) {
12163     /* find leftmost match first and increment */
12164     leftmost = -1;
12165     for (k = 0; k < num_arrays; k++) {
12166       if (firsts[k] > -1) {
12167         if (leftmost == -1 || SortFeatItemListByPos (arrays[k] + firsts[k], arrays[leftmost] + firsts[leftmost]) < 0) {
12168           leftmost = k;
12169         }
12170       }
12171     }
12172     if (leftmost > -1) {
12173       already_found = FALSE;
12174       for (i2 = 0; i2 < index && !already_found; i2++) {
12175         if (found_list[i2]->sfp == arrays[leftmost][firsts[leftmost]]->sfp) {
12176           already_found = TRUE;
12177         }
12178       }
12179       if (!already_found) {
12180         feat = arrays[leftmost][firsts[leftmost]];
12181         found_list[index] = feat;
12182         sfp = feat->sfp;
12183         if (context != NULL) {
12184           context->entityID = entityID;
12185           context->itemID = feat->itemID;
12186           context->sfp = feat->sfp;
12187           context->sap = feat->sap;
12188           context->bsp = feat->bsp;
12189           context->label = feat->label;
12190           context->left = feat->left;
12191           context->right = feat->right;
12192           context->dnaStop = feat->dnaStop;
12193           context->partialL = feat->partialL;
12194           context->partialR = feat->partialR;
12195           context->farloc = feat->farloc;
12196           context->strand = feat->strand;
12197           context->seqfeattype = sfp->data.choice;
12198           context->featdeftype = feat->subtype;
12199           context->numivals = feat->numivals;
12200           context->ivals = feat->ivals;
12201           context->userdata = NULL;
12202           context->omdp = (Pointer) omdp;
12203           context->index = firsts[leftmost] + 1;
12204         }
12205         index++;
12206         if (last_found != NULL) {
12207           *last_found = index;
12208         }
12209       }
12210       /* increment to next in leftmost array */
12211       firsts[leftmost]++;
12212       found = FALSE;
12213       while (!found
12214              && firsts[leftmost] >= 0 && firsts[leftmost] < array_sizes[leftmost]
12215              && compare_funcs[leftmost] (arrays[leftmost][firsts[leftmost]], label) == 0) {
12216         feat = arrays[leftmost][firsts[leftmost]];
12217         if (feat->sfp != NULL
12218             && (seqFeatChoice == 0 || feat->sfp->data.choice == seqFeatChoice)
12219             && (featDefChoice == 0 || feat->subtype == featDefChoice)
12220             && (! feat->ignore)) {
12221           found = TRUE;
12222         } else {
12223           firsts[leftmost]++;
12224         }
12225       }
12226       if (!found) {
12227         firsts[leftmost] = -1;
12228       }
12229     }
12230   }
12231   found_list = MemFree (found_list);
12232   if (index == n) {
12233     return sfp;
12234   } else {
12235     return NULL;
12236   }
12237 }
12238 
FindNthGeneOnBspByLabelOrLocusTag(BioseqPtr bsp,CharPtr label,Int4 n,Int4 PNTR last_found,SeqMgrFeatContext PNTR context)12239 NLM_EXTERN SeqFeatPtr FindNthGeneOnBspByLabelOrLocusTag
12240 (BioseqPtr              bsp,
12241  CharPtr                label,
12242  Int4                   n,
12243  Int4 PNTR              last_found,
12244  SeqMgrFeatContext PNTR context)
12245 {
12246   ObjMgrDataPtr       omdp;
12247   BioseqExtraPtr      bspextra;
12248   Uint2               entityID;
12249   SMFeatItemPtr PNTR  arrays[2];
12250   Int4                array_sizes[2];
12251   FeatureFindCompare  compare_funcs[2];
12252   SeqFeatPtr          sfp = NULL;
12253   Int4                num;
12254 
12255   if (bsp == NULL || StringHasNoText (label)) return NULL;
12256 
12257   omdp = SeqMgrGetOmdpForBioseq (bsp);
12258   if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
12259 
12260   bspextra = (BioseqExtraPtr) omdp->extradata;
12261   if (bspextra == NULL) return NULL;
12262   num = bspextra->numfeats;
12263 
12264   if (num < 1 || bspextra->featsByLabel == NULL || bspextra->genesByLocusTag == NULL) return NULL;
12265 
12266 
12267   if (n < 0 || n > bspextra->numfeats) return NULL;
12268 
12269   entityID = ObjMgrGetEntityIDForPointer (omdp->dataptr);
12270 
12271   arrays[0] = bspextra->featsByLabel;
12272   array_sizes[0] = bspextra->numfeats;
12273   compare_funcs[0] = FeatureFindCompareLabel;
12274   arrays[1] = bspextra->genesByLocusTag;
12275   array_sizes[1] = bspextra->numgenes;
12276   compare_funcs[1] = FeatureFindCompareLocusTag;
12277 
12278   sfp = FindNthFeatureUseMultipleArrays (arrays, array_sizes, compare_funcs, 2, label, entityID,
12279                                          bsp, SEQFEAT_GENE, FEATDEF_GENE, n + 1, last_found,
12280                                          context);
12281 
12282   return sfp;
12283 }
12284 
12285 
SeqMgrClearBioseqExtraDataDescriptors(ObjMgrDataPtr omdp)12286 static Boolean SeqMgrClearBioseqExtraDataDescriptors (ObjMgrDataPtr omdp)
12287 {
12288   BioseqExtraPtr  bspextra;
12289 
12290   if (omdp == NULL) return FALSE;
12291   bspextra = (BioseqExtraPtr) omdp->extradata;
12292   if (bspextra == NULL) return FALSE;
12293 
12294   /* free sorted arrays of pointers into data blocks */
12295 
12296   bspextra->descrsByID = MemFree (bspextra->descrsByID);
12297   bspextra->descrsBySdp = MemFree (bspextra->descrsBySdp);
12298   bspextra->descrsByIndex = MemFree (bspextra->descrsByIndex);
12299 
12300   /* free list of descriptor information */
12301 
12302   bspextra->desclisthead = ValNodeFreeData (bspextra->desclisthead);
12303 
12304   return TRUE;
12305 }
12306 
12307 
SeqMgrClearDescriptorIndexesProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)12308 static void SeqMgrClearDescriptorIndexesProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
12309 
12310 {
12311   BioseqPtr      bsp;
12312   BioseqSetPtr   bssp;
12313   ObjMgrDataPtr  omdp = NULL;
12314   BoolPtr        rsult;
12315 
12316   if (sep == NULL || (! IS_Bioseq (sep))) return;
12317   if (IS_Bioseq (sep)) {
12318     bsp = (BioseqPtr) sep->data.ptrvalue;
12319     if (bsp == NULL) return;
12320     omdp = SeqMgrGetOmdpForBioseq (bsp);
12321   } else if (IS_Bioseq_set (sep)) {
12322     bssp = (BioseqSetPtr) sep->data.ptrvalue;
12323     if (bssp == NULL) return;
12324     omdp = SeqMgrGetOmdpForPointer (bssp);
12325   } else return;
12326   if (omdp != NULL && SeqMgrClearBioseqExtraDataDescriptors (omdp)) {
12327     rsult = (BoolPtr) mydata;
12328     *rsult = TRUE;
12329   }
12330 }
12331 
12332 
12333 /* NOTE - this function does NOT do basic seqentry cleanup;
12334  * it assumes that cleanup has been done already, probably
12335  * on just the descriptor that was changed.
12336  */
SeqMgrRedoDescriptorIndexes(Uint2 entityID,Pointer ptr)12337 NLM_EXTERN void SeqMgrRedoDescriptorIndexes (Uint2 entityID, Pointer ptr)
12338 
12339 {
12340   Boolean        rsult = FALSE;
12341   SeqEntryPtr    sep;
12342 
12343   if (entityID == 0) {
12344     entityID = ObjMgrGetEntityIDForPointer (ptr);
12345   }
12346   if (entityID == 0) return;
12347   sep = SeqMgrGetTopSeqEntryForEntity (entityID);
12348   if (sep == NULL) return;
12349   SeqEntryExplore (sep, (Pointer) (&rsult), SeqMgrClearDescriptorIndexesProc);
12350 
12351   /* finish indexing list of descriptors on each indexed bioseq */
12352 
12353   VisitBioseqsInSep (sep, NULL, RecordDescriptorsInBioseqs);
12354 
12355   if (IS_Bioseq_set (sep)) {
12356     RecordDescriptorsOnTopSet (sep);
12357   }
12358 
12359   SeqEntryExplore (sep, NULL, IndexRecordedDescriptors);
12360 }
12361 
12362 
SeqMgrRedoFeatByLabel(ObjMgrDataPtr omdp)12363 static void SeqMgrRedoFeatByLabel (ObjMgrDataPtr omdp)
12364 {
12365   BioseqExtraPtr      bspextra;
12366   SeqFeatPtr          sfp;
12367   Int4                i;
12368   Char                buf [129];
12369   CharPtr             ptr;
12370 
12371   if (omdp == NULL) return;
12372   bspextra = (BioseqExtraPtr) omdp->extradata;
12373   if (bspextra == NULL || bspextra->featsByLabel == NULL) return;
12374 
12375   for (i = 0; i < bspextra->numfeats; i++) {
12376     sfp = bspextra->featsByLabel[i]->sfp;
12377 
12378     FeatDefLabel (sfp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT);
12379     ptr = buf;
12380     if (sfp->data.choice == SEQFEAT_RNA) {
12381       ptr = StringStr (buf, "RNA-");
12382       if (ptr != NULL) {
12383         ptr += 4;
12384       } else {
12385         ptr = buf;
12386       }
12387     }
12388     bspextra->featsByLabel[i]->label = MemFree (bspextra->featsByLabel[i]->label);
12389     bspextra->featsByLabel[i]->label = StringSaveNoNull (ptr);
12390   }
12391 
12392   StableMergeSort ((VoidPtr) bspextra->featsByLabel, (size_t) bspextra->numfeats, sizeof (SMFeatItemPtr), SortFeatItemListByLabel);
12393 }
12394 
SeqMgrRedoFeatByLabelProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)12395 static void SeqMgrRedoFeatByLabelProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
12396 
12397 {
12398   BioseqPtr      bsp;
12399   BioseqSetPtr   bssp;
12400   ObjMgrDataPtr  omdp = NULL;
12401 
12402   if (sep == NULL || (! IS_Bioseq (sep))) return;
12403   if (IS_Bioseq (sep)) {
12404     bsp = (BioseqPtr) sep->data.ptrvalue;
12405     if (bsp == NULL) return;
12406     omdp = SeqMgrGetOmdpForBioseq (bsp);
12407   } else if (IS_Bioseq_set (sep)) {
12408     bssp = (BioseqSetPtr) sep->data.ptrvalue;
12409     if (bssp == NULL) return;
12410     omdp = SeqMgrGetOmdpForPointer (bssp);
12411   } else return;
12412   SeqMgrRedoFeatByLabel (omdp);
12413 }
12414 
12415 
SeqMgrRedoFeatByLabelIndexes(Uint2 entityID,Pointer ptr)12416 NLM_EXTERN void SeqMgrRedoFeatByLabelIndexes (Uint2 entityID, Pointer ptr)
12417 {
12418   Int4    ret;
12419   SeqEntryPtr sep;
12420 
12421   ret = NlmMutexLockEx (&smp_feat_index_mutex);
12422   if (ret) {
12423     ErrPostEx (SEV_FATAL, 0, 0, "SeqMgrIndexFeatures mutex failed [%ld]", (long) ret);
12424     return;
12425   }
12426   if (entityID == 0) {
12427     entityID = ObjMgrGetEntityIDForPointer (ptr);
12428   }
12429   if (entityID != 0) {
12430     sep = GetTopSeqEntryForEntityID (entityID);
12431     SeqEntryExplore (sep, NULL, SeqMgrRedoFeatByLabelProc);
12432   }
12433 
12434   NlmMutexUnlock (smp_feat_index_mutex);
12435 }
12436 
12437