1 /*  sequtil.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  sequtil.c
27 *
28 * Author:  James Ostell
29 *
30 * Version Creation Date: 4/1/91
31 *
32 * $Revision: 6.410 $
33 *
34 * File Description:  Sequence Utilities for objseq and objsset
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date       Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 * ==========================================================================
42 */
43 
44 /** for ErrPostEx() ****/
45 
46 static char *this_module = "ncbiapi";
47 #define THIS_MODULE this_module
48 static char *this_file = __FILE__;
49 #define THIS_FILE this_file
50 
51 /**********************/
52 
53 #include <sequtil.h>
54 #include <gather.h>
55 #include <seqport.h>
56 #include <sqnutils.h> /* prototype for SeqIdFindWorst */
57 #include <edutil.h>
58 #include <subutil.h>
59 
60 /****  Static variables used for randomized sequence conversions ****/
61 
62 /* This array contains final residues for ncbi2na encoding.
63    Na42[4] - number of possible choises for ambiguous residues
64    and these residues plased in Na42[0-3]     */
65 
66 static Int1  Na42[16][5] = {
67   { 0, 1, 2, 3, 4} , { 0, 0, 0, 0, 1 }, { 1, 1, 1, 1, 1} , { 0, 1, 0, 1, 2},
68   { 2, 2, 2, 2, 1} , { 0, 2, 0, 2, 2 }, { 1, 2, 1, 2, 2} , { 0, 1, 2, 2, 3},
69   { 3, 3, 3, 3, 1} , { 0, 3, 0, 3, 2 }, { 1, 3, 1, 3, 2} , { 0, 1, 3, 3, 3},
70   { 2, 3, 2, 3, 2} , { 0, 2, 3, 3, 3 }, { 1, 2, 3, 3, 3} , { 0, 1, 2, 3, 4}
71 };
72 
73 /* This array contains check values if we can do direct conversion */
74 
75 static Int1    Na42Set[16] = { -1,  0,  1, -1,  2, -1, -1, -1,
76                                 3, -1, -1, -1, -1, -1, -1, -1 };
77 
78 /* Analog arrays for ASCII --> ncbi2na conversion
79    NOTE: dimensions for NaI2 are reversed to allocate it
80    dynamically */
81 
82 static Int1    NaI2Set[256];
83 static Int1Ptr NaI2[5];
84 
85 static Boolean NaI2InitOk = FALSE;  /* We will allocate it only ones */
86 
87 /* Macros for random conversion */
88 
89 #define CONVERT_42_RAND(from) Na42[from][(Nlm_RandomNum()>>8)%Na42[from][4]]
90 #define CONVERT_I2_RAND(from) NaI2[(Nlm_RandomNum()>>8)%NaI2[4][from]][from]
91 
92 static Boolean InitNaI2Table(void);
93 
94 /**********************************************************************/
95 
96 /*   Defines for compression/rebuild DNA */
97 
98 #define BSC_BUFF_CHUNK 1024
99 #define RES_OFFSET(x) x & 0xFFFFFF
100 #define RES_VALUE(x)  x>>28
101 #define RES_LEN(x)    (x>>24) & 0xF
102 #define RES_LEN_NEW(x)    (x>>16) & 0xFFF
103 #define LEN_STEP_MASK 0x1000000
104 #define LEN_STEP_MASK_NEW 0x10000
105 
106 static NumberingPtr stdnum = NULL;  /* std Numbering object (start at 1) */
107 
108 /* find the last nucleotide bioseq in the bioseqset */
109 /* Used by SeqEntryExplore. */
FindNuc(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)110 NLM_EXTERN void FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
111 {
112     BioseqPtr PNTR bp;
113     BioseqPtr local_bsp;
114 
115     bp = (BioseqPtr PNTR) data;
116     if (IS_Bioseq(sep))
117     {
118         local_bsp = (BioseqPtr) sep->data.ptrvalue;
119         if (ISA_na(local_bsp->mol))
120           *bp = local_bsp;
121     }
122 }
123 
124 /* find the last protein bioseq in the bioseqset */
125 /* Used by SeqEntryExplore. */
FindProt(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)126 NLM_EXTERN void FindProt(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
127 {
128     BioseqPtr PNTR bp;
129     BioseqPtr local_bsp;
130 
131     bp = (BioseqPtr PNTR) data;
132     if (IS_Bioseq(sep))
133     {
134         local_bsp = (BioseqPtr) sep->data.ptrvalue;
135         if (ISA_aa(local_bsp->mol))
136           *bp = local_bsp;
137     }
138 }
139 
140 /*****************************************************************************
141 *
142 *   Boolean BioseqMatch(bsp, seqid)
143 *       returns TRUE if bsp points to the Bioseq identified by seqid
144 *
145 *****************************************************************************/
BioseqMatch(BioseqPtr bsp,SeqIdPtr seqid)146 NLM_EXTERN Boolean BioseqMatch (BioseqPtr bsp, SeqIdPtr seqid)
147 {
148     if (bsp == NULL) return FALSE;
149     return SeqIdIn(seqid, bsp->id);
150 }
151 
152 
153 typedef struct findse {
154     SeqIdPtr sip;
155     Boolean found;
156     BioseqPtr bsp;
157     Int4 indent;
158 } fse, PNTR fseptr;
159 
160 typedef struct {
161     SeqLocPtr slp;
162     Boolean findOnProtein;
163 } SpliceInfo, *SpliceInfoPtr;
164 
165 typedef struct {
166     SeqIdPtr sip;
167     Boolean isProtein;
168     Boolean retval;
169 } SeqIdChecker, *SeqIdCheckerPtr;
170 
171 typedef struct {
172     SeqIdPtr sip;
173     Int2 mtype;
174 } SeqIdMolType,  PNTR SeqIdMolTypePtr;
175 
176 /*****************************************************************************
177 *
178 *   FindSE()
179 *      SeqEntryExplore function used by SeqEntryFind()
180 *
181 *****************************************************************************/
182 NLM_EXTERN void FindSE (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
FindSE(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)183 NLM_EXTERN void FindSE (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
184 {
185     fseptr fep;
186     BioseqPtr bsp;
187 
188     fep = (fseptr)data;
189     if (fep->found)   /* already found it */
190         return;
191 
192     if (! IS_Bioseq(sep))
193         return;
194 
195     bsp = (BioseqPtr)(sep->data.ptrvalue);
196     if (BioseqMatch(bsp, fep->sip))
197     {
198         fep->found = TRUE;
199         fep->bsp = bsp;
200         fep->indent = indent;
201     }
202 
203     return;
204 }
205 
206 /*****************************************************************************
207 *
208 *   BioseqFindInSeqEntry(sip, sep)
209 *       Finds a Bioseq within a SeqEntry by SeqId
210 *
211 *****************************************************************************/
BioseqFindInSeqEntry(SeqIdPtr sip,SeqEntryPtr sep)212 NLM_EXTERN BioseqPtr BioseqFindInSeqEntry(SeqIdPtr sip, SeqEntryPtr sep)
213 {
214     BioseqPtr bsp = NULL;
215     fse fe;
216 
217     if (sip == NULL) return bsp;
218     if (sep == NULL) return bsp;
219 
220     fe.found = FALSE;
221     fe.sip = sip;
222     fe.bsp = NULL;
223 
224     SeqEntryExplore(sep, (Pointer)(&fe), FindSE);
225     if (fe.found)
226         return fe.bsp;
227     else
228         return bsp;
229 }
230 
231 /*****************************************************************************
232 *
233 *   BioseqGetSeqDescr(bsp, type, curr)
234 *       returns pointer to the next SeqDescr of this type
235 *       type gives type of Seq-descr
236 *       if 0, gets them all
237 *       curr is NULL or previous node of this type found
238 *
239 *****************************************************************************/
BioseqGetSeqDescr(BioseqPtr bsp,Int2 type,ValNodePtr curr)240 NLM_EXTERN ValNodePtr BioseqGetSeqDescr (BioseqPtr bsp, Int2 type, ValNodePtr curr)    /* the last one you used */
241 
242 {
243     if (bsp == NULL) return NULL;
244 
245     if (curr == NULL)
246             curr = bsp->descr;
247     else
248         curr = curr->next;     /* move past last one */
249 
250     while (curr != NULL)
251     {
252         if ((! type) || ((Int2)curr->choice == type))
253             return curr;
254         else
255             curr = curr->next;
256     }
257     return NULL;
258 }
259 
260 /*****************************************************************************
261 *
262 *   BioseqGetTitle(bsp)
263 *       returns pointer to the first title of this Bioseq
264 *
265 *****************************************************************************/
BioseqGetTitle(BioseqPtr bsp)266 NLM_EXTERN CharPtr BioseqGetTitle (BioseqPtr bsp)
267 
268 {
269     ValNodePtr ptr;
270 
271     ptr = BioseqGetSeqDescr(bsp, Seq_descr_title, NULL);
272     if (ptr != NULL)
273         return (CharPtr)ptr->data.ptrvalue;
274     else
275         return NULL;
276 }
277 
278 /*****************************************************************************
279 *
280 *   BioseqGetNumbering(bsp)
281 *       Gets either user supplied, or default number for a Bioseq
282 *       looks first for num Seqdescr, then in Pubdesc, then returns
283 *         default numbering
284 *
285 *****************************************************************************/
BioseqGetNumbering(BioseqPtr bsp)286 NLM_EXTERN NumberingPtr BioseqGetNumbering (BioseqPtr bsp)
287 
288 {
289     NumberingPtr np = NULL;
290     ValNodePtr anp;
291     PubdescPtr pdp;
292 
293     if (bsp == NULL)
294         return NULL;
295 
296     anp = BioseqGetSeqDescr(bsp, Seq_descr_num, NULL);
297     if (anp != NULL)    /* Numbering on this Bioseq */
298         np = (NumberingPtr)anp->data.ptrvalue;
299     else do                /* look for Pubdesc  */
300     {
301         anp = BioseqGetSeqDescr(bsp, Seq_descr_pub, anp);
302         if (anp != NULL)
303         {
304             pdp = (PubdescPtr)anp->data.ptrvalue;
305             np = pdp->num;
306         }
307     } while ((anp != NULL) && (np == NULL));
308 
309     if (np == NULL)   /* no numbering found */
310         np = NumberingDefaultGet();   /* fallback position */
311 
312     return np;
313 }
314 
315 
316 /*****************************************************************************
317 *
318 *   Bioseq_repr (BioseqPtr bsp)
319 *
320 *****************************************************************************/
Bioseq_repr(BioseqPtr bsp)321 NLM_EXTERN Uint1 Bioseq_repr (BioseqPtr bsp)
322 
323 {
324     return bsp->repr;
325 }
326 
327 /*****************************************************************************
328 *
329 *   Int4 BioseqGetLen (bsp)
330 *       returns total length of sequence in residues
331 *       if segmented:
332 *          includes length of virtual sequences with fixed length
333 *          does not include lengths of NULL gaps
334 *       returns -1 for error
335 *
336 *****************************************************************************/
BioseqGetLen(BioseqPtr bsp)337 NLM_EXTERN Int4 BioseqGetLen (BioseqPtr bsp)
338 
339 {
340     if (bsp == NULL)
341         return -1;
342 
343     return bsp->length;
344 }
345 
346 /*****************************************************************************
347 *
348 *   Int4 BioseqGetGaps (bsp)
349 *       returns total number of NULL gaps in sequence
350 *       virtual sequence with length set does not count as a gap
351 *       returns -1 for error
352 *
353 *****************************************************************************/
BioseqGetGaps(BioseqPtr bsp)354 NLM_EXTERN Int4 BioseqGetGaps (BioseqPtr bsp)
355 
356 {
357     ValNodePtr anp;
358     Int4 gaps = 0;
359     Uint1 repr;
360 
361     if (bsp == NULL)
362         return -1;
363 
364     repr = Bioseq_repr(bsp);
365 
366     switch (repr)
367     {
368         case  Seq_repr_seg:
369         case Seq_repr_ref:
370             anp = (ValNodePtr)bsp->seq_ext;
371              while (anp != NULL)    /* go through Seq-loc chain */
372             {
373                 gaps = SeqLocGetSegLens((SeqLocPtr)anp, NULL, gaps, TRUE);
374                 anp = anp->next;
375             }
376             break;
377         case Seq_repr_delta:
378             anp = (ValNodePtr)bsp->seq_ext;
379              while (anp != NULL)    /* go through delta seq chain */
380             {
381                  if (anp->choice == 1)
382                     gaps = SeqLocGetSegLens((SeqLocPtr)(anp->data.ptrvalue), NULL, gaps, TRUE);
383                 anp = anp->next;
384             }
385             break;
386         default:
387             break;
388     }
389 
390     return gaps;
391 }
392 
393 /*****************************************************************************
394 *
395 *   Int4 BioseqGetSegLens (bsp, lens)
396 *       returns total number of segments in sequence including NULLS
397 *       returns -1 for error
398 *       if lens != NULL fills with lengths of segments, 0 = NULL
399 *
400 *****************************************************************************/
BioseqGetSegLens(BioseqPtr bsp,Int4Ptr lens)401 NLM_EXTERN Int4 BioseqGetSegLens (BioseqPtr bsp, Int4Ptr lens)
402 
403 {
404     ValNodePtr anp;
405     Int4 segs = 0;
406     Uint1 repr;
407     SeqLitPtr slitp;
408 
409     if (bsp == NULL)
410         return -1;
411 
412     repr = Bioseq_repr(bsp);
413 
414     switch (repr)
415     {
416         case  Seq_repr_seg:
417         case Seq_repr_ref:
418             anp = (ValNodePtr)bsp->seq_ext;
419              while (anp != NULL)    /* go through Seq-loc chain */
420             {
421                 segs = SeqLocGetSegLens((SeqLocPtr)anp, lens, segs, FALSE);
422                 anp = anp->next;
423             }
424             break;
425         case Seq_repr_delta:
426             anp = (ValNodePtr)bsp->seq_ext;
427              while (anp != NULL)    /* go through delta seq chain */
428             {
429                  if (anp->choice == 1)
430                     segs = SeqLocGetSegLens((SeqLocPtr)(anp->data.ptrvalue), lens, segs, FALSE);
431                  else
432                  {
433                      slitp = (SeqLitPtr)(anp->data.ptrvalue);
434                      if (lens != NULL)
435                          lens[segs] = slitp->length;
436                      segs++;
437                  }
438                  anp = anp->next;
439             }
440             break;
441         default:
442             if (lens != NULL)
443                 lens[0] = BioseqGetLen(bsp);
444             segs = 1;
445             break;
446     }
447     return segs;
448 }
449 
450 /*****************************************************************************
451 *
452 *   BioseqGetCode(bsp)
453 *       returns type of code for data in sequence
454 *       if not bioseq or not raw returns 0
455 *       otherwise returns #defines from objseq.h
456 *
457 *****************************************************************************/
BioseqGetCode(BioseqPtr bsp)458 NLM_EXTERN Uint1 BioseqGetCode (BioseqPtr bsp)
459 
460 {
461     if (bsp == NULL)
462         return 0;
463 
464     if ((Bioseq_repr(bsp) == Seq_repr_raw) ||
465         (Bioseq_repr(bsp) == Seq_repr_const))
466         return bsp->seq_data_type;
467     else
468         return 0;
469 }
470 
471 /*****************************************************************************
472 *
473 *   Boolean BioseqConvert(bsp, newcode)
474 *      converts a raw or const bioseq or delta to a new sequence code
475 *
476 *****************************************************************************/
BioseqConvert(BioseqPtr bsp,Uint1 newcode)477 NLM_EXTERN Boolean BioseqConvert (BioseqPtr bsp, Uint1 newcode)
478 
479 {
480     ByteStorePtr to;
481     ValNodePtr vnp;
482     SeqLitPtr slp;
483 
484     if (bsp == NULL) return FALSE;
485 
486     if ((Bioseq_repr(bsp) == Seq_repr_raw) ||
487         (Bioseq_repr(bsp) == Seq_repr_const))
488         return BioseqRawConvert(bsp, newcode);
489 
490     if (Bioseq_repr(bsp) != Seq_repr_delta)
491         return FALSE;
492 
493                                 /* go through the delta chain */
494     for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next)
495     {
496        if (vnp->choice == 2)   /* SeqLit */
497        {
498            slp = (SeqLitPtr)(vnp->data.ptrvalue);
499            if (slp->length > 0 && slp->seq_data != NULL
500                && slp->seq_data_type != Seq_code_gap)
501            {
502                 to = BSConvertSeq((ByteStorePtr) slp->seq_data, newcode, slp->seq_data_type, slp->length);
503                if (to != NULL)
504                {
505                    slp->seq_data = (SeqDataPtr) to;
506                    slp->seq_data_type = newcode;
507                }
508            }
509        }
510     }
511 
512     return TRUE;
513 }
514 
515 /*****************************************************************************
516 *
517 *   Boolean BioseqRawPack(bsp)
518 *      converts a raw or const bioseq to it's densist possible code
519 *
520 *****************************************************************************/
BioseqRawPack(BioseqPtr bsp)521 NLM_EXTERN Boolean BioseqRawPack (BioseqPtr bsp)
522 
523 {
524   ByteStorePtr to;
525   Uint1 newcode;
526 
527   if (bsp == NULL) return FALSE;
528 
529   if (! ((Bioseq_repr(bsp) == Seq_repr_raw) ||
530          (Bioseq_repr(bsp) == Seq_repr_const)))
531     return FALSE;
532 
533   if(! ISA_na(bsp->mol)) {    /* protein ? */
534     if(!BioseqRawConvert (bsp, Seq_code_ncbieaa)) {
535       return FALSE;
536     }
537   } else if (bsp->seq_data_type != Seq_code_gap) {
538     if((to = BSPack((ByteStorePtr) bsp->seq_data,
539                     BioseqGetCode(bsp),
540                     BioseqGetLen(bsp),
541                     &newcode)) == NULL) {
542       return FALSE;
543     }
544     bsp->seq_data = (SeqDataPtr) to;
545     bsp->seq_data_type = newcode;
546   }
547   return TRUE;
548 }
549 
550 /*****************************************************************************
551 *
552 *   Boolean BioseqRawConvert(bsp, newcode)
553 *      converts a raw or const bioseq to a new sequence code
554 *
555 *****************************************************************************/
BioseqRawConvert(BioseqPtr bsp,Uint1 newcode)556 NLM_EXTERN Boolean BioseqRawConvert (BioseqPtr bsp, Uint1 newcode)
557 
558 {
559     ByteStorePtr to;
560     Int4 seqlen;
561     Uint1 oldcode;
562 
563     if (bsp == NULL) return FALSE;
564 
565     if (! ((Bioseq_repr(bsp) == Seq_repr_raw) ||
566         (Bioseq_repr(bsp) == Seq_repr_const)))
567         return FALSE;
568 
569     oldcode = BioseqGetCode(bsp);
570     if (! oldcode)   /* not a coded sequence */
571         return FALSE;
572 
573     if (oldcode == Seq_code_gap || newcode == Seq_code_gap) return FALSE;
574 
575     seqlen = BioseqGetLen(bsp);
576 
577     to = BSConvertSeq((ByteStorePtr) bsp->seq_data, newcode, oldcode, seqlen);
578     if (to == NULL)
579         return FALSE;
580 
581     bsp->seq_data = (SeqDataPtr) to;
582     bsp->seq_data_type = newcode;
583 
584     return TRUE;
585 }
586 
587 /*****************************************************************************
588 *
589 *   Boolean BioseqPack(bsp)
590 *      converts a raw or const or delta bioseq to it's densist possible code
591 *
592 *****************************************************************************/
BioseqPack(BioseqPtr bsp)593 NLM_EXTERN Boolean BioseqPack (BioseqPtr bsp)
594 
595 {
596   ValNodePtr vnp;
597 
598   if (bsp == NULL) return FALSE;
599 
600   if ((Bioseq_repr(bsp) == Seq_repr_raw) ||
601       (Bioseq_repr(bsp) == Seq_repr_const))
602     return BioseqRawPack(bsp);
603 
604   if (Bioseq_repr(bsp) != Seq_repr_delta)
605     return FALSE;
606 
607   /* not set up to compress delta proteins */
608 
609   if (ISA_aa (bsp->mol)) return FALSE;
610 
611   /* go through the delta chain */
612 
613   for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
614     if (vnp->choice == 2)   /* SeqLit */
615       SeqLitPack((SeqLitPtr)(vnp->data.ptrvalue));
616   }
617   return TRUE;
618 }
619 
620 /****************************************************************************
621 *
622 *  Boolean SeqLitPack(slp)
623 *      Pack a SeqLit as dense as possible
624 *
625 *****************************************************************************/
SeqLitPack(SeqLitPtr slp)626 NLM_EXTERN Boolean SeqLitPack (SeqLitPtr slp)
627 {
628     ByteStorePtr to = NULL;
629     Uint1 newcode = 0;
630 
631     if (slp == NULL) return FALSE;
632 
633     if ((slp->length == 0) || (slp->seq_data == NULL))
634         return FALSE;
635 
636     if (slp->seq_data_type == Seq_code_gap) return FALSE;
637 
638     to = BSPack((ByteStorePtr) slp->seq_data, slp->seq_data_type, slp->length, &newcode);
639 
640     if (to != NULL)
641     {
642         slp->seq_data = (SeqDataPtr) to;
643         slp->seq_data_type = newcode;
644     }
645 
646     return TRUE;
647 }
648 
649 /**************************************************************************
650 *
651 *  ByteStorePtr BSPack(from, oldcode, length, newcodeptr)
652 *
653 *     packs a bytestore containing a nucleic acid code as dense as possible
654 *     returns a new bytestoreptr and fills in newcodeptr if it can pack it
655 *     more. Otherwise returns null. length is number of residues.
656 *
657 *     if BSPack returns non-NULL, then it has already BSFree'd from.
658 *
659 ***************************************************************************/
BSPack(ByteStorePtr from,Uint1 oldcode,Int4 length,Uint1Ptr newcodeptr)660 NLM_EXTERN ByteStorePtr BSPack (ByteStorePtr from, Uint1 oldcode,
661                      Int4 length, Uint1Ptr newcodeptr)
662 {
663   Int4 i, seqlen;
664   Uint1 newcode, byte;
665   Char Code4na[256], CodeIna[256];
666   Boolean remained;
667   Int2 actual, j;
668   Int4 cntr;
669   Uint1 tmp [401];
670 
671   Uint1 set4na[16] = {17, 18, 20, 24,  33,  34,  36,  40,
672                       65, 66, 68, 72, 129, 130, 132, 136};
673   Uint1 setIna[4] = {65, 67, 71, 84};
674 
675   if ((! oldcode) || (! length) || (from == NULL))/* not a coded sequence */
676     return NULL;
677 
678   if (oldcode == Seq_code_ncbi2na)   /* already packed */
679     return NULL;
680 
681   if (oldcode == Seq_code_gap) return NULL;
682 
683   MemSet ((Pointer) tmp, 0, sizeof (tmp));
684 
685   BSSeek(from, 0L, SEEK_SET);
686   newcode = Seq_code_ncbi2na;    /* go for broke */
687 
688   switch (oldcode) {
689 
690   case Seq_code_ncbi4na:
691     remained = length%2;
692     seqlen = length/2;
693 
694     MemSet(Code4na, 1, sizeof(Code4na));
695     for(i=0; i< 16; i++)
696       Code4na[set4na[i]] = 0;
697 
698     cntr = (Int4) MIN ((Int4) seqlen, (Int4) (sizeof (tmp) - 1));
699     actual = (Int2) BSRead (from, tmp, (Int4) cntr);
700     j = 0;
701 
702     while(seqlen && actual > 0) {
703       if (j == actual) {
704         cntr = (Int4) MIN ((Int4) seqlen, (Int4) (sizeof (tmp) - 1));
705         actual = (Int2) BSRead (from, tmp, (Int4) cntr);
706         j = 0;
707       }
708       /* byte = (Uint1) BSGetByte(from); */
709       byte = (Uint1) tmp [j];
710       j++;
711       if(Code4na[byte]) {
712         newcode = Seq_code_ncbi4na;
713         if (newcodeptr != NULL) {
714           *newcodeptr = newcode;
715         }
716         return BSConvertSeq(from, newcode, oldcode, length);
717       }
718       seqlen--;
719     }
720     if(remained) { /* one more uncompleted byte */
721       byte = (Uint1) BSGetByte(from);
722       if(Code4na[byte+1])
723         newcode = Seq_code_ncbi4na;
724     }
725     break;
726   case Seq_code_iupacna:
727     MemSet(CodeIna, 1, sizeof(CodeIna));
728     for(i=0; i < 4; i++)
729       CodeIna[setIna[i]] = 0;
730     seqlen = length;
731 
732     cntr = (Int4) MIN ((Int4) seqlen, (Int4) (sizeof (tmp) - 1));
733     actual = (Int2) BSRead (from, tmp, (Int4) cntr);
734     j = 0;
735 
736     while(seqlen && actual > 0) {
737       if (j == actual) {
738         cntr = (Int4) MIN ((Int4) seqlen, (Int4) (sizeof (tmp) - 1));
739         actual = (Int2) BSRead (from, tmp, (Int4) cntr);
740         j = 0;
741       }
742       /* byte = (Uint1) BSGetByte(from); */
743       byte = (Uint1) tmp [j];
744       j++;
745       if(CodeIna[byte]) {
746         newcode = Seq_code_ncbi4na;
747         break;
748       }
749       seqlen--;
750     }
751     break;
752   default:
753     break;
754   }
755     if (newcodeptr != NULL) {
756       *newcodeptr = newcode;
757     }
758     return BSConvertSeq(from, newcode, oldcode, length);
759 }
760 
IsNASeqCode(Uint1 seqcode)761 static Boolean IsNASeqCode (Uint1 seqcode)
762 {
763   if (seqcode == Seq_code_iupacna
764       || seqcode == Seq_code_ncbi2na
765       || seqcode == Seq_code_ncbi4na
766       || seqcode == Seq_code_ncbi8na
767       || seqcode == Seq_code_ncbipna)
768   {
769     return TRUE;
770   }
771   else
772   {
773     return FALSE;
774   }
775 }
776 
IsAASeqCode(Uint1 seqcode)777 static Boolean IsAASeqCode (Uint1 seqcode)
778 {
779   if (seqcode == Seq_code_iupacaa
780       || seqcode == Seq_code_ncbi8aa
781       || seqcode == Seq_code_ncbieaa
782       || seqcode == Seq_code_ncbipaa
783       || seqcode == Seq_code_iupacaa3
784       || seqcode == Seq_code_ncbistdaa)
785   {
786     return TRUE;
787   }
788   else
789   {
790     return FALSE;
791   }
792 }
793 
794 /*****************************************************************************
795 *
796 *   BSConvertSeq(bytestoreptr, newcode, oldcode, len)
797 *       converts a bytestore to a new sequence representation
798 *       frees old bytestore
799 *       returns pointer to new one, or NULL on fail.
800 *       len is residues
801 *
802 *****************************************************************************/
803 
BSConvertSeq(ByteStorePtr from,Uint1 newcode,Uint1 oldcode,Int4 len)804 NLM_EXTERN ByteStorePtr BSConvertSeq (ByteStorePtr from, Uint1 newcode,
805                            Uint1 oldcode, Int4 len)
806 
807 {
808   ByteStorePtr to;
809   Uint1 byte_from, residue_from, bitctr_from, mask_from;
810   Uint1 lshift_from, rshift_from, bc_from, byte_to, bitctr_to;
811   Uint1 lshift_to[5], bc_to, byte_tmp;
812   SeqMapTablePtr smtp;
813   Int4 storelen, in_index = 0, out_index = 0;
814   Uint1Ptr out_buff, in_buff;
815 
816   if ((from == NULL) || (! oldcode) || (! newcode) || (len <= 0))
817     return NULL;
818 
819   if (oldcode == Seq_code_gap || newcode == Seq_code_gap) return NULL;
820 
821   if (oldcode == newcode)
822     return from;
823 
824   /* if we are converting from a protein to a nucleotide or vice versa,
825    * need this intermediate step.
826    */
827   if (IsAASeqCode (oldcode) && IsNASeqCode (newcode))
828   {
829     from = BSConvertSeq (from, Seq_code_iupacaa, oldcode, len);
830     oldcode = Seq_code_iupacna;
831   }
832   else if (IsNASeqCode (oldcode) && IsAASeqCode (newcode))
833   {
834     from = BSConvertSeq (from, Seq_code_iupacna, oldcode, len);
835     oldcode = Seq_code_iupacaa;
836   }
837   if (oldcode == newcode)
838     return from;
839 
840   if ((smtp = SeqMapTableFind(newcode, oldcode)) == NULL)
841     return NULL;
842 
843   if (newcode == Seq_code_ncbi2na)
844     storelen = (len / 4) + 1;
845   else if (newcode == Seq_code_ncbi4na)
846     storelen = (len / 2) + 1;
847   else
848     storelen = len;
849 
850   if((to = BSNew((Uint4)storelen)) == NULL)
851     return NULL;
852 
853   BSSeek(from, 0, 0);
854   BSSeek(to, 0, 0);
855 
856   in_buff  = (Uint1Ptr)MemNew(BSC_BUFF_CHUNK);
857   out_buff = (Uint1Ptr)MemNew(BSC_BUFF_CHUNK);
858 
859   switch (oldcode) {
860 
861   case Seq_code_ncbi2na:
862     bc_from = 4;            /* bit shifts needed */
863     rshift_from = 6;
864     lshift_from = 2;
865     mask_from = 192;
866     break;
867 
868   case Seq_code_ncbi4na:
869     bc_from = 2;
870     rshift_from = 4;
871     lshift_from = 4;
872     mask_from = 240;
873     break;
874 
875   default:
876     bc_from = 1;
877     rshift_from = 0;
878     lshift_from = 0;
879     mask_from = 255;
880     break;
881   }
882 
883   lshift_to[1] = 0;
884 
885   switch (newcode) {
886 
887   case Seq_code_ncbi2na:
888     bc_to = 4;            /* bit shifts needed */
889     lshift_to[2] = 2;
890     lshift_to[3] = 4;
891     lshift_to[4] = 6;
892     break;
893 
894   case Seq_code_ncbi4na:
895     bc_to = 2;
896     lshift_to[2] = 4;
897     break;
898 
899   default:
900     bc_to = 1;
901     break;
902   }
903 
904   bitctr_to = bc_to;
905   byte_to = 0;
906   bitctr_from = 0;
907 
908   in_index = BSC_BUFF_CHUNK;
909 
910   while (len) {
911     if (in_index == BSC_BUFF_CHUNK) {
912       in_index = (Int2) BSRead(from, (VoidPtr)in_buff, (Int4)BSC_BUFF_CHUNK);
913       in_index = 0;
914     }
915 
916     if (! bitctr_from) {       /* need a new byte */
917       byte_from = in_buff[in_index];
918       in_index++;
919       bitctr_from = bc_from;
920     }
921 
922     residue_from = byte_from & mask_from;
923     residue_from >>= rshift_from;
924     byte_from <<= lshift_from;
925     bitctr_from--;
926 
927     byte_tmp = SeqMapTableConvert(smtp, residue_from);
928 
929     if (byte_tmp == INVALID_RESIDUE) {
930       ErrPostEx(SEV_ERROR, 0, 0, "BSConvertSeq: invalid residue [%d=%c]",
931                 (int)residue_from, (char)residue_from);
932       BSFree(to);
933       MemFree(in_buff);
934       MemFree(out_buff);
935       return NULL;
936     }
937 
938     byte_tmp <<= lshift_to[bitctr_to];
939     byte_to |= byte_tmp;
940     bitctr_to--;
941 
942     if (! bitctr_to) {
943       if (out_index == BSC_BUFF_CHUNK) {
944 
945         /* Flush buffer if it is full */
946 
947         out_index = (Int2) BSWrite(to, (VoidPtr)out_buff, out_index);
948         out_index = 0;
949       }
950       out_buff[out_index] = byte_to;
951       out_index++;
952 
953       bitctr_to = bc_to;
954       byte_to = 0;
955     }
956     len--;
957   }
958 
959   /* Now we will BSWrite() all recorded bytes in buffer */
960 
961   out_index = (Int2) BSWrite(to, (VoidPtr)out_buff, out_index);
962 
963  /* And finaly partial byte not written */
964 
965   if (bitctr_to != bc_to)
966     BSPutByte(to, byte_to);
967 
968   BSFree(from);
969   MemFree(in_buff);
970   MemFree(out_buff);
971 
972   return to;
973 }
974 
975 /*****************************************************************************
976 *
977 *   BSRebuildDNA(bytestoreptr, len, lbytes)
978 *       restore ASCII sequence with abmiguity characters
979 *       lbytes[0] == length of this storage
980 *       frees old bytestore
981 *       returns pointer to new one, or NULL on fail.
982 *       len is residues
983 *       lbytes is pointer to ambiguity storage
984 *
985 *****************************************************************************/
BSRebuildDNA(ByteStorePtr from,Int4 len,Uint4Ptr PNTR lbytes)986 NLM_EXTERN ByteStorePtr BSRebuildDNA (ByteStorePtr from, Int4 len,
987                            Uint4Ptr PNTR lbytes)
988 
989 {
990   Int4      i, am_num;
991   Uint4Ptr  am_buff;
992   Uint1     char_to;
993   Int4     row_len, j;
994   SeqMapTablePtr smtp;
995 
996   if(from == NULL || len <=0)
997     return NULL;
998 
999   if(*lbytes == NULL)
1000     return from;
1001 
1002   if ((smtp = SeqMapTableFind(Seq_code_iupacna,
1003                               Seq_code_ncbi4na)) == NULL)
1004     return NULL;
1005 
1006   am_num  = **lbytes;
1007   am_buff = *lbytes + 1;
1008 
1009   for(i = 0; i < am_num; i++) {
1010     char_to = (Uint1)RES_VALUE(am_buff[i]);
1011     row_len = (Int4)RES_LEN(am_buff[i]);
1012 
1013     BSSeek(from, RES_OFFSET(am_buff[i]), SEEK_SET);
1014     for(j = 0; j <= row_len; j++)
1015       BSPutByte(from, SeqMapTableConvert(smtp, char_to));
1016   }
1017   return from;
1018 }
1019 /*****************************************************************************
1020 *
1021 *   RebuildDNA_4na(buffer, length, lbytes)
1022     works with Uint1 buffer, not ByteStore.
1023 *       restore ncbi4na sequence with abmiguity characters
1024 *       returns TRUE on success, FALSE on failure.
1025 *       lbytes is pointer to ambiguity storage
1026 *
1027 *****************************************************************************/
RebuildDNA_4na(Uint1Ptr buffer,Int4 length,Uint4Ptr lbytes)1028 NLM_EXTERN Boolean RebuildDNA_4na (Uint1Ptr buffer, Int4 length, Uint4Ptr lbytes)
1029 
1030 {
1031     Boolean    new = FALSE;
1032     Uint4        i;
1033     Uint4     amb_num;
1034     Uint4Ptr  amb_buff;
1035     Uint1     char_l, char_r;
1036     Int4      row_len;
1037     Uint1     C_Mask[] = {0x0F, 0xF0};
1038     Int4      j, position = 0, pos =0 , rem =0 , index;
1039 
1040     if(buffer == NULL || length == 0)
1041         return FALSE;
1042 
1043     if(lbytes == NULL)
1044         return TRUE;
1045 
1046     amb_num  = *lbytes;
1047     amb_buff = lbytes + 1;
1048 
1049     /* Check if highest order bit set. */
1050     if (amb_num & 0x80000000)
1051     {
1052     new = TRUE;
1053     amb_num &= 0x7FFFFFFF;
1054     }
1055 
1056     for(i = 0; i < amb_num; i++) {
1057 
1058     if (new)
1059     {
1060                char_r    = (Uint1)(RES_VALUE(amb_buff[i]));
1061                row_len   = (Int4)(RES_LEN_NEW(amb_buff[i]));
1062             position  =         amb_buff[i+1];
1063     }
1064     else
1065     {
1066                char_r    = (Uint1)(RES_VALUE(amb_buff[i]));
1067                row_len   = (Int4)(RES_LEN(amb_buff[i]));
1068             position  =         RES_OFFSET(amb_buff[i]);
1069     }
1070 
1071         pos = position/2;
1072         rem = position%2;  /* 0 or 1 */
1073         char_l = char_r << 4;
1074 
1075         for(index = pos, j =0; j <=row_len; j++) {
1076 
1077                buffer[index] = (buffer[index] & C_Mask[rem]) + (rem ? char_r : char_l);
1078                 rem = !rem;
1079 
1080                 if(!rem) index++;
1081         }
1082 
1083     if (new) /* for new format we have 8 bytes for each element. */
1084         i++;
1085     }
1086 
1087     return TRUE;
1088 }
1089 /*****************************************************************************
1090 *
1091 *   BSRebuildDNA_4na(bytestoreptr, lbytes)
1092 *       restore ncbi4na sequence with abmiguity characters
1093 *       lbytes[0] == length of this storage
1094 *       frees old bytestore
1095 *       returns pointer to new one, or NULL on fail.
1096 *       lbytes is pointer to ambiguity storage
1097 *
1098 *****************************************************************************/
BSRebuildDNA_4na(ByteStorePtr from,Uint4Ptr lbytes)1099 NLM_EXTERN ByteStorePtr BSRebuildDNA_4na (ByteStorePtr from, Uint4Ptr lbytes)
1100 
1101 {
1102     Int4      bs_length;
1103     Uint1Ptr  buffer;
1104     Int4      num_bytes;
1105 
1106     if(from == NULL)
1107         return NULL;
1108 
1109     if(lbytes == NULL)
1110         return from;
1111 
1112     bs_length = BSLen(from);
1113     buffer = (Uint1Ptr) Nlm_Malloc(bs_length);
1114     if (buffer == NULL)
1115         return NULL;
1116 
1117     BSSeek(from, 0, SEEK_SET);
1118 
1119     if((num_bytes = BSRead(from, buffer, bs_length)) != bs_length)
1120         return NULL;
1121 
1122     if (RebuildDNA_4na(buffer, bs_length, lbytes) == FALSE)
1123     return NULL;
1124 
1125     BSSeek(from, 0, SEEK_SET);
1126     BSWrite(from, buffer, bs_length);
1127 
1128     MemFree(buffer);
1129     return from;
1130 }
1131 
1132 /*****************************************************************************
1133 *
1134 *   Int4 BSCompressRead (Pointer data, Uint1Ptr buf, Int4 length)
1135 *     Hook function to read "length" bytes from "data" into "buf"
1136 *
1137 *     NOTE!! This function must return number or residues, but returns
1138 *            twice number of returned bytes.
1139 *            This function may be used ONLY if we know how many residues
1140 *            in the sequence and pass this value to GenericCompressDNA()
1141 *
1142 *****************************************************************************/
1143 static Int4 BSCompressRead (Pointer data, Uint1Ptr buf, Int4 length);
BSCompressRead(Pointer data,Uint1Ptr buf,Int4 length)1144 static Int4 BSCompressRead (Pointer data, Uint1Ptr buf, Int4 length)
1145 {
1146   Int4 residues;
1147 
1148   residues = (Int4) BSRead((ByteStorePtr)data, (VoidPtr)buf, length);
1149   return residues*2;
1150 }
1151 
1152 /*****************************************************************************
1153 *
1154 *   Int4 BSCompressWrite (Pointer data, Uint1Ptr buf, Int4 length)
1155 *     Hook function to write "length" bytes to "data" from "buf"
1156 *
1157 *     Returned number of bytes were written
1158 *****************************************************************************/
1159 static Int4 BSCompressWrite (Pointer data, Uint1Ptr buf, Int4 length);
BSCompressWrite(Pointer data,Uint1Ptr buf,Int4 length)1160 static Int4 BSCompressWrite (Pointer data, Uint1Ptr buf, Int4 length)
1161 {
1162   return (Int4) BSWrite((ByteStorePtr)data, (VoidPtr)buf, length);
1163 }
1164 
1165 /*****************************************************************************
1166 *
1167 *   BSCompressDNA(bytestoreptr, len, lbytes)
1168 *       converts a ncbi4na bytestore into ncbi2na
1169 *       returns pointer to ambiguity storage
1170 *       lbytes[0] == length of this storage
1171 *       frees old bytestore
1172 *       returns pointer to new one, or NULL on fail.
1173 *       len is residues
1174 *
1175 *****************************************************************************/
BSCompressDNA(ByteStorePtr from,Int4 len,Uint4Ptr PNTR lbytes)1176 NLM_EXTERN ByteStorePtr BSCompressDNA(ByteStorePtr from, Int4 len,
1177                               Uint4Ptr PNTR lbytes)
1178 {
1179   ByteStorePtr to;
1180   to = BSNew((Uint4)len/4+1);
1181 
1182   BSSeek(from, 0, 0);
1183   BSSeek(to, 0, 0);
1184 
1185   if(!GenericCompressDNA((VoidPtr) from, (VoidPtr) to,
1186                          (Uint4)len,
1187                          BSCompressRead,
1188                          BSCompressWrite,
1189                          lbytes
1190                          )) {
1191     return NULL;
1192   }
1193 
1194   BSFree(from);
1195   return to;
1196 }
1197 
1198 /*****************************************************************************
1199 *
1200 *   BSCompressDNANew(bytestoreptr, len, lbytes)
1201 *       converts a ncbi4na bytestore into ncbi2na
1202 *       returns pointer to ambiguity storage
1203 *       lbytes[0] == length of this storage
1204 *       frees old bytestore
1205 *       returns pointer to new one, or NULL on fail.
1206 *       len is residues
1207 *
1208 *    This function stores the ambiguity code in 8 bytes so
1209 *    that there is no cutoff for sequences greater than 16 million bps.
1210 *    as there is for BSCompressDNA.
1211 *
1212 *****************************************************************************/
BSCompressDNANew(ByteStorePtr from,Int4 len,Uint4Ptr PNTR lbytes)1213 NLM_EXTERN ByteStorePtr BSCompressDNANew(ByteStorePtr from, Int4 len,
1214                               Uint4Ptr PNTR lbytes)
1215 {
1216   ByteStorePtr to;
1217   to = BSNew((Uint4)len/4+1);
1218 
1219   BSSeek(from, 0, 0);
1220   BSSeek(to, 0, 0);
1221 
1222   if(!GenericCompressDNAEx((VoidPtr) from, (VoidPtr) to,
1223                          (Uint4)len,
1224                          BSCompressRead,
1225                          BSCompressWrite,
1226                          lbytes, TRUE)) {
1227     return NULL;
1228   }
1229 
1230   BSFree(from);
1231   return to;
1232 }
1233 
1234 /*****************************************************************************
1235 *
1236 *   GenericCompressDNA()
1237 *       converts from VoidPtr "from" in 4na encoding to
1238 *       VoidPtr "to" in 2Na encoding
1239 *       returns pointer to ambiguity storage
1240 *       lbytes[0] == length of this storage
1241 *       returns TRUE if succeded, or FALSE on fail.
1242 *       seq_len is maximum number of residues in sequence
1243 *       or ((Uint4) -1) if final length is unknown.
1244 *       read_func and write_func - hook functions to read from "from"
1245 *       and to write to "to"
1246 *
1247 *       NOTE! read_func must return number of residues read, that usualy
1248 *             twice as much as returned number of bytes. Only last returned
1249 *             byte may have only one residue and this will be handled by
1250 *             seq_len value or returned value from read_func()
1251 *****************************************************************************/
GenericCompressDNA(VoidPtr from,VoidPtr to,Uint4 seq_len,CompressRWFunc read_func,CompressRWFunc write_func,Uint4Ptr PNTR lbytes)1252 NLM_EXTERN Boolean GenericCompressDNA(VoidPtr from,
1253                            VoidPtr to,
1254                            Uint4 seq_len,
1255                            CompressRWFunc read_func,
1256                            CompressRWFunc write_func,
1257                            Uint4Ptr PNTR lbytes)
1258 {
1259     return GenericCompressDNAEx(from, to, seq_len, read_func, write_func, lbytes, FALSE);
1260 }
1261 
GenericCompressDNAEx(VoidPtr from,VoidPtr to,Uint4 seq_len,CompressRWFunc read_func,CompressRWFunc write_func,Uint4Ptr PNTR lbytes,Boolean x_new)1262 NLM_EXTERN Boolean GenericCompressDNAEx(VoidPtr from,
1263                            VoidPtr to,
1264                            Uint4 seq_len,
1265                            CompressRWFunc read_func,
1266                            CompressRWFunc write_func,
1267                            Uint4Ptr PNTR lbytes,
1268                            Boolean x_new)
1269 {
1270   Int4 total_read, chunk_used, seq_offset;
1271   Int4 in_index = 0, out_index = 0;
1272   Uint1Ptr out_buff, in_buff;
1273   Uint1 bc_from, rshift_from, lshift_from, mask_from;
1274   Uint1 bc_to, byte_tmp;
1275   Uint1 bitctr_to, byte_to, byte_from, bitctr_from, residue_from;
1276   Uint1 lshift_to[5] = {0, 0, 2, 4, 6 };
1277 
1278   Int4     row_len =0;
1279   Uint1    last_ambchar = INVALID_RESIDUE;
1280   Uint4Ptr ambchar;
1281   Int4     ambsize = 2*(BSC_BUFF_CHUNK/2); /* we need this to be a multiple of two for the new format. */
1282 
1283   if(from == NULL) /* Invalid ByteStore format */
1284     return FALSE;
1285 
1286   /* Translation tables Initialization  fot ncbi4na->ncbi2na*/
1287 
1288   in_buff  = (Uint1Ptr)MemNew(BSC_BUFF_CHUNK);
1289   out_buff = (Uint1Ptr)MemNew(BSC_BUFF_CHUNK);
1290 
1291   bc_from = 2;
1292   rshift_from = 4;
1293   lshift_from = 4;
1294   mask_from = 240;
1295   bc_to = 4;            /* bit shifts needed */
1296 
1297   bitctr_to = bc_to;
1298   byte_to = 0;
1299   bitctr_from = 0;
1300 
1301   ambchar = (Uint4Ptr) Nlm_Malloc(sizeof(Uint4)*(ambsize + 1)); /* all plus one */
1302   *ambchar = 0;
1303 
1304   seq_offset = chunk_used = in_index = total_read = 0;
1305 
1306   while(seq_offset != seq_len) {
1307     if (chunk_used == total_read) {
1308       /* supposed, that in 4na total_read = in_index*2 or in_index*2-1 */
1309       if((total_read = read_func(from, in_buff, (Int4)BSC_BUFF_CHUNK)) == 0)
1310         break;
1311       if(total_read < 0) { /* ERROR!!! */
1312         MemFree(ambchar);
1313         MemFree(in_buff);
1314         MemFree(out_buff);
1315         return FALSE;
1316       }
1317       in_index = 0;
1318       chunk_used = 0;
1319     }
1320 
1321     if (!bitctr_from) {        /* need a new byte */
1322       byte_from = in_buff[in_index];
1323       bitctr_from = bc_from;
1324       in_index++;
1325     }
1326     residue_from = byte_from & mask_from;
1327     residue_from >>= rshift_from;
1328     byte_from <<= lshift_from;
1329     bitctr_from--;
1330     if(!Convert4NaRandom(residue_from, &byte_tmp)) {
1331 
1332       /* We have to handle invalid residues in a good way */
1333 
1334       if(*ambchar >= (Uint4)(ambsize-1)) { /* Reallocating buffer if necessary */
1335         ambsize += 2*(BSC_BUFF_CHUNK/2); /* we need this to be a multiple of two for the new format. */
1336         ambchar = (Uint4Ptr) Realloc(ambchar, (ambsize+1)*sizeof(Uint4));
1337       }
1338 
1339       /* Constructing integer as <1111.  1111.  11111111.11111111.11111111
1340        *                         <char><length><--------- offset -------->
1341        * First interer in array will be length of array
1342        */
1343 
1344       if (x_new && seq_len >= 0xFFFFFF)
1345       {
1346           if(last_ambchar != residue_from || row_len == 0xFFF) {
1347      if ((*ambchar) == 0)
1348                 (*ambchar)++;
1349      else
1350           (*ambchar) += 2;
1351             ambchar[*ambchar] = 0;
1352             ambchar[*ambchar] += residue_from;
1353             ambchar[*ambchar] <<= 28;
1354     /* Put the seq_offset in the 2nd integer. */
1355             ambchar[(*ambchar)+1] = seq_offset;
1356 
1357             last_ambchar = residue_from;
1358             row_len = 0;
1359             /*  printf("Ambchar = %u(%u)(%u) : %u %u %u\n",
1360             residue_from, row_len, total_len-len,
1361             RES_VALUE(ambchar[*ambchar]),
1362             RES_LEN(ambchar[*ambchar]),
1363             RES_OFFSET(ambchar[*ambchar])); */
1364           } else {
1365             (ambchar[*ambchar]) += LEN_STEP_MASK_NEW;
1366             row_len++;
1367         /* printf("Ambchar = %u(%u)(%u) : %u %u %u\n",
1368            residue_from, row_len, total_len-len,
1369            RES_VALUE(ambchar[*ambchar]),
1370            RES_LEN(ambchar[*ambchar]),
1371            RES_OFFSET(ambchar[*ambchar]));  */
1372           }
1373       }
1374       else
1375       {
1376           if(last_ambchar != residue_from || row_len == 15) {
1377             (*ambchar)++;
1378             ambchar[*ambchar] = 0;
1379             ambchar[*ambchar] += residue_from;
1380             ambchar[*ambchar] <<= 28;
1381             ambchar[*ambchar] += seq_offset;
1382 
1383             last_ambchar = residue_from;
1384             row_len = 0;
1385             /*  printf("Ambchar = %u(%u)(%u) : %u %u %u\n",
1386             residue_from, row_len, total_len-len,
1387             RES_VALUE(ambchar[*ambchar]),
1388             RES_LEN(ambchar[*ambchar]),
1389             RES_OFFSET(ambchar[*ambchar])); */
1390           } else {
1391             (ambchar[*ambchar]) += LEN_STEP_MASK;
1392             row_len++;
1393         /* printf("Ambchar = %u(%u)(%u) : %u %u %u\n",
1394            residue_from, row_len, total_len-len,
1395            RES_VALUE(ambchar[*ambchar]),
1396            RES_LEN(ambchar[*ambchar]),
1397            RES_OFFSET(ambchar[*ambchar]));  */
1398           }
1399        }
1400     } else {
1401           last_ambchar = INVALID_RESIDUE; /* reset of last residue */
1402     }
1403     byte_tmp <<= lshift_to[bitctr_to];
1404     byte_to |= byte_tmp;
1405     bitctr_to--;
1406     if (! bitctr_to) {
1407       if (out_index == BSC_BUFF_CHUNK) {
1408 
1409         /* Flush buffer if it is full */
1410 
1411         out_index = write_func(to, out_buff, out_index);
1412         out_index = 0;
1413       }
1414 
1415       out_buff[out_index] = byte_to;
1416       out_index++;
1417 
1418       bitctr_to = bc_to;
1419       byte_to = 0;
1420     }
1421     chunk_used++;
1422     seq_offset++;
1423   } /* while TRUE */
1424 
1425   /* Now we will BSWrite() all recorded bytes in buffer */
1426 
1427   out_index = write_func(to, out_buff, out_index);
1428 
1429   if (bitctr_to != bc_to) {   /* partial byte not written */
1430     byte_to += (seq_len)%4;   /* last 2 bits will be remainder */
1431     write_func(to, &byte_to, 1);
1432   } else {
1433     write_func(to, &byte_to, 1);  /* NULLB anyway */
1434   }
1435 
1436   if(!*ambchar) { /* no ambiguous characters found */
1437     MemFree(ambchar);
1438     *lbytes = NULL;
1439   } else {
1440     if (x_new && seq_len >= 0xFFFFFF)
1441     {
1442            (*ambchar)++;
1443     *ambchar += 0x80000000;
1444     }
1445     *lbytes = (Uint4Ptr)ambchar;
1446   }
1447   MemFree(in_buff);
1448   MemFree(out_buff);
1449   return TRUE;
1450 }
1451 
1452 /*****************************************************************************
1453 *                 --- To be deleted ---
1454 *   BSCompressDNA(bytestoreptr, len, lbytes)
1455 *       converts a ncbi4na bytestore into ncbi2na
1456 *       returns pointer to ambiguity storage
1457 *       lbytes[0] == length of this storage
1458 *       frees old bytestore
1459 *       returns pointer to new one, or NULL on fail.
1460 *       len is residues
1461 *
1462 *****************************************************************************/
BSCompressDNAOld(ByteStorePtr from,Int4 len,Uint4Ptr PNTR lbytes)1463 NLM_EXTERN ByteStorePtr BSCompressDNAOld(ByteStorePtr from, Int4 len,
1464                               Uint4Ptr PNTR lbytes)
1465 {
1466   ByteStorePtr to;
1467   Int4 total_len = len;
1468   Int4 storelen = len/4 + 1, in_index = 0, out_index = 0;
1469   Uint1Ptr out_buff, in_buff;
1470   Uint1 bc_from, rshift_from, lshift_from, mask_from;
1471   Uint1 bc_to, byte_tmp;
1472   Uint1 bitctr_to, byte_to, byte_from, bitctr_from, residue_from;
1473   Uint1 lshift_to[5] = {0, 0, 2, 4, 6 };
1474 
1475   Uint1    row_len =0, last_ambchar = INVALID_RESIDUE;
1476   Uint4Ptr ambchar;
1477   Int4     ambsize = BSC_BUFF_CHUNK;
1478 
1479   if(from == NULL) /* Invalid ByteStore format */
1480     return NULL;
1481 
1482   /* Translation tables Initialization  fot ncbi4na->ncbi2na*/
1483 
1484   if((to = BSNew((Uint4)storelen)) == NULL)
1485     return NULL;
1486 
1487   BSSeek(from, 0, 0);
1488   BSSeek(to, 0, 0);
1489 
1490   in_buff  = (Uint1Ptr)MemNew(BSC_BUFF_CHUNK);
1491   out_buff = (Uint1Ptr)MemNew(BSC_BUFF_CHUNK);
1492 
1493   bc_from = 2;
1494   rshift_from = 4;
1495   lshift_from = 4;
1496   mask_from = 240;
1497   bc_to = 4;            /* bit shifts needed */
1498 
1499   bitctr_to = bc_to;
1500   byte_to = 0;
1501   bitctr_from = 0;
1502 
1503   ambchar = (Uint4Ptr) MemNew(sizeof(Uint4)*(ambsize + 1)); /* all plus one */
1504   *ambchar = 0;
1505 
1506   in_index = BSC_BUFF_CHUNK;
1507 
1508   while(len) {
1509     if (in_index == BSC_BUFF_CHUNK) {
1510       in_index = (Int2) BSRead(from, (VoidPtr)in_buff, (Int4)BSC_BUFF_CHUNK);
1511       in_index = 0;
1512     }
1513 
1514     if (! bitctr_from) {        /* need a new byte */
1515       byte_from = in_buff[in_index];
1516       in_index++;
1517       bitctr_from = bc_from;
1518     }
1519     residue_from = byte_from & mask_from;
1520     residue_from >>= rshift_from;
1521     byte_from <<= lshift_from;
1522     bitctr_from--;
1523     if(!Convert4NaRandom(residue_from, &byte_tmp)) {
1524 
1525       /* We have to handle invalid residues in a good way */
1526 
1527       if(*ambchar >= (Uint4)ambsize) { /* Reallocating buffer if necessary */
1528         ambsize += BSC_BUFF_CHUNK;
1529         ambchar = (Uint4Ptr) Realloc(ambchar, (ambsize+1)*sizeof(Uint4));
1530       }
1531 
1532       /* Constructing integer as <1111.  1111.  11111111.11111111.11111111
1533        *                         <char><length><--------- offset -------->
1534        * First interer in array will be length of array
1535        */
1536 
1537       if(last_ambchar != residue_from || row_len == 15) {
1538         (*ambchar)++;
1539         ambchar[*ambchar] = 0;
1540         ambchar[*ambchar] += residue_from;
1541         ambchar[*ambchar] <<= 28;
1542         ambchar[*ambchar] += (total_len-len);
1543 
1544         last_ambchar = residue_from;
1545         row_len = 0;
1546         /*  printf("Ambchar = %u(%u)(%u) : %u %u %u\n",
1547             residue_from, row_len, total_len-len,
1548             RES_VALUE(ambchar[*ambchar]),
1549             RES_LEN(ambchar[*ambchar]),
1550             RES_OFFSET(ambchar[*ambchar])); */
1551       } else {
1552         (ambchar[*ambchar]) += LEN_STEP_MASK;
1553         row_len++;
1554         /* printf("Ambchar = %u(%u)(%u) : %u %u %u\n",
1555            residue_from, row_len, total_len-len,
1556            RES_VALUE(ambchar[*ambchar]),
1557            RES_LEN(ambchar[*ambchar]),
1558            RES_OFFSET(ambchar[*ambchar]));  */
1559       }
1560     } else {
1561       last_ambchar = INVALID_RESIDUE; /* reset of last residue */
1562     }
1563     byte_tmp <<= lshift_to[bitctr_to];
1564     byte_to |= byte_tmp;
1565     bitctr_to--;
1566     if (! bitctr_to) {
1567       if (out_index == BSC_BUFF_CHUNK) {
1568 
1569         /* Flush buffer if it is full */
1570 
1571         out_index = (Int2) BSWrite(to, (VoidPtr)out_buff, out_index);
1572         out_index = 0;
1573       }
1574 
1575       out_buff[out_index] = byte_to;
1576       out_index++;
1577 
1578       bitctr_to = bc_to;
1579       byte_to = 0;
1580     }
1581     len--;
1582   }
1583 
1584   /* Now we will BSWrite() all recorded bytes in buffer */
1585 
1586   out_index = (Int2) BSWrite(to, (VoidPtr)out_buff, out_index);
1587 
1588   if (bitctr_to != bc_to) {   /* partial byte not written */
1589     byte_to += total_len%4;   /* last 2 bits will be remainder */
1590     BSPutByte(to, byte_to);
1591   } else {
1592     BSPutByte(to, byte_to);   /* NULLB anyway */
1593   }
1594   BSFree(from);
1595 
1596   if(!*ambchar) { /* no ambiguous characters found */
1597     MemFree(ambchar);
1598     *lbytes = NULL;
1599   } else {
1600     *lbytes = (Uint4Ptr)ambchar;
1601   }
1602   MemFree(in_buff);
1603   MemFree(out_buff);
1604   return to;
1605 }
1606 
1607 /*****************************************************************************
1608  *
1609  *   void CorrectGeneFeatLocation(sep, data, n, m)
1610  *
1611  *    Correct gene location for mRNA sequences, i.e.
1612  *   puts start = 0, end = total_length_of_sequence - 1.
1613  *
1614  *****************************************************************************/
CorrectGeneFeatLocation(SeqEntryPtr sep,Pointer data,Int4 n,Int2 m)1615 NLM_EXTERN void CorrectGeneFeatLocation(SeqEntryPtr sep, Pointer data,
1616                              Int4 n, Int2 m)
1617 {
1618     BioseqPtr      bsp;
1619     ValNodePtr      vnp;
1620     MolInfoPtr      mip;
1621     SeqAnnotPtr      sap;
1622     SeqFeatPtr      sfp;
1623     SeqIntPtr      sip;
1624     SeqDescrPtr   sdp;
1625     BioSourcePtr  biop;
1626     OrgRefPtr     orp;
1627 
1628     if(sep == NULL)
1629         return;
1630 
1631     /* We need only Bioseqs
1632      */
1633     if(IS_Bioseq(sep) != TRUE)
1634         return;
1635 
1636     bsp = sep->data.ptrvalue;
1637     if(bsp == NULL)
1638         return;
1639 
1640     /* Looks at nucleic acids with the non-zero length only
1641      */
1642     if(ISA_na(bsp->mol) != TRUE || bsp->length == 0)
1643         return;
1644 
1645     /* Checks bioseq if it is mRNA
1646      */
1647     for(vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
1648         if(vnp->choice != Seq_descr_molinfo)
1649             continue;
1650         mip = vnp->data.ptrvalue;
1651         if(mip == NULL || mip->biomol != 3)    /* not mRNA */
1652             continue;
1653         break;
1654     }
1655 
1656     /* If bioseq is not mRNA, does nothing, just return
1657      */
1658     if(vnp == NULL)
1659         return;
1660 
1661     sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
1662     if (sdp != NULL) {
1663       biop = (BioSourcePtr) sdp->data.ptrvalue;
1664       if (biop != NULL) {
1665         if (biop->origin == ORG_ARTIFICIAL) {
1666           orp = biop->org;
1667           if (orp != NULL) {
1668             if (StringICmp (orp->taxname, "synthetic construct") == 0) return;
1669           }
1670         }
1671       }
1672     }
1673 
1674     /* Otherwise go ahead
1675      */
1676     for(sap = bsp->annot; sap != NULL; sap = sap->next) {
1677         if(sap->type != 1)
1678             continue;
1679 
1680         for(sfp = sap->data; sfp != NULL; sfp = sfp->next) {
1681             /* Is it gene feature ?
1682              */
1683             if(sfp->data.choice != SEQFEAT_GENE)
1684                 continue;
1685 
1686             /* If so, is it not empty ?
1687              */
1688             if(sfp->data.value.ptrvalue == NULL)
1689                 continue;
1690 
1691             /* Then correct location
1692              */
1693             for(vnp = sfp->location; vnp != NULL; vnp = vnp->next) {
1694                 if(vnp->choice != SEQLOC_INT)
1695                     continue;
1696                 sip = vnp->data.ptrvalue;
1697                 if(sip == NULL)
1698                     continue;
1699                 if(sip->from != 0 || sip->to != bsp->length - 1) {
1700                     ErrPostEx(SEV_WARNING, 0, 0,
1701                               "Incorrect gene location: [%d..%d] "
1702                               "instead of [0..%d]. Fixed.",
1703                               sip->from, sip->to, bsp->length - 1);
1704                     sip->from = 0;
1705                     sip->to = bsp->length - 1;
1706                 }
1707             }
1708         }
1709     }
1710 }
1711 
1712 /*****************************************************************************
1713 *
1714 *   Int4 NumberingOffset(np, value)
1715 *       returns an offset to the sequence based on value
1716 *       returns -1 if invalid
1717 *       does NOT deal with Num-ref types
1718 *       does NOT deal with specified ranges on the sequence
1719 *
1720 *****************************************************************************/
NumberingOffset(NumberingPtr np,DataValPtr vp)1721 NLM_EXTERN Int4 NumberingOffset (NumberingPtr np, DataValPtr vp)
1722 
1723 {
1724     Int4 offset = -1, i, num;
1725     NumContPtr ncp;
1726     NumEnumPtr nep;
1727     NumRealPtr nrp;
1728     CharPtr PNTR ptr;
1729     CharPtr name;
1730     FloatHi foffset;
1731 
1732     if ((np == NULL) || (vp == NULL)) return -1;
1733 
1734     switch (np->choice)
1735     {
1736         case Numbering_cont:
1737             ncp = (NumContPtr)np->data.ptrvalue;
1738             if (ncp->ascending)
1739             {
1740                 offset = vp->intvalue - ncp->refnum;
1741                 if ((ncp->refnum  < 0) && (! ncp->has_zero) &&
1742                     (vp->intvalue > 0))
1743                     offset--;
1744             }
1745             else
1746             {
1747                 offset = ncp->refnum - vp->intvalue;
1748                 if ((ncp->refnum > 0) && (! ncp->has_zero) &&
1749                     (vp->intvalue < 0))
1750                     offset--;
1751             }
1752             break;
1753         case Numbering_enum:
1754             nep = (NumEnumPtr)np->data.ptrvalue;
1755             name = (CharPtr)vp->ptrvalue;
1756             num = nep->num;
1757             ptr = nep->names;
1758             for (i = 0; i < num; i++, ptr++)
1759             {
1760                 if (! StringCmp(name, *ptr))
1761                 {
1762                     offset = i;
1763                     break;
1764                 }
1765             }
1766             break;
1767         case Numbering_ref_source:
1768         case Numbering_ref_align:
1769             ErrPostEx(SEV_ERROR, 0,0, "Num-ref not supported yet");
1770             break;
1771         case Numbering_real:
1772             nrp = (NumRealPtr)np->data.ptrvalue;
1773             foffset = (vp->realvalue - nrp->b) / nrp->a;
1774             offset = (Int4) foffset;
1775             if ((foffset - (FloatHi)offset) >= 0.5)
1776                 offset++;
1777             break;
1778     }
1779     return offset;
1780 }
1781 
1782 /*****************************************************************************
1783 *
1784 *   NumberingValue (np, offset, value)
1785 *       fills value with the display value of offset
1786 *       return type indicates type of value
1787 *       0 = failed
1788 *       1 = intvalue
1789 *       2 = realvalue
1790 *       3 = ptrvalue (string)
1791 *
1792 *****************************************************************************/
NumberingValue(NumberingPtr np,Int4 offset,DataValPtr vp)1793 NLM_EXTERN Int2 NumberingValue (NumberingPtr np, Int4 offset, DataValPtr vp)
1794 
1795 {
1796     NumContPtr ncp;
1797     NumEnumPtr nep;
1798     NumRealPtr nrp;
1799     Int2 type = 0;
1800     Int4 intval;
1801     FloatHi fval;
1802 
1803     if ((np == NULL) || (vp == NULL)) return -1;
1804 
1805     switch (np->choice)
1806     {
1807         case Numbering_cont:
1808             ncp = (NumContPtr)np->data.ptrvalue;
1809             if (ncp->ascending)
1810             {
1811                 intval = offset + ncp->refnum;
1812                 if ((ncp->refnum  < 0) && (! ncp->has_zero) &&
1813                     (intval >= 0))
1814                     intval++;
1815             }
1816             else
1817             {
1818                 intval = ncp->refnum - offset;
1819                 if ((ncp->refnum > 0) && (! ncp->has_zero) &&
1820                     (intval <= 0))
1821                     intval--;
1822             }
1823             vp->intvalue = intval;
1824             type = 1;
1825             break;
1826         case Numbering_enum:
1827             nep = (NumEnumPtr)np->data.ptrvalue;
1828             if (offset < nep->num)
1829             {
1830                 vp->ptrvalue = nep->names[offset];
1831                 type = 3;
1832             }
1833             break;
1834         case Numbering_ref_source:
1835         case Numbering_ref_align:
1836             ErrPostEx(SEV_ERROR, 0,0, "Num-ref not supported yet");
1837             break;
1838         case Numbering_real:
1839             nrp = (NumRealPtr)np->data.ptrvalue;
1840             fval = ((FloatHi)offset * nrp->a) + nrp->b;
1841             type = 2;
1842             vp->realvalue = fval;
1843             break;
1844     }
1845 
1846     return type;
1847 }
1848 
1849 /*****************************************************************************
1850 *
1851 *   NumberingValueBySeqId(sip, offset, vp)
1852 *
1853 *****************************************************************************/
NumberingValueBySeqId(SeqIdPtr sip,Int4 offset,DataValPtr vp)1854 NLM_EXTERN Int2 NumberingValueBySeqId (SeqIdPtr sip, Int4 offset, DataValPtr vp)
1855 
1856 {
1857     BioseqPtr bsp;
1858     NumberingPtr np = NULL;
1859 
1860     if ((sip == NULL) || (vp == NULL)) return -1;
1861 
1862     bsp = BioseqFind(sip);
1863     if (bsp == NULL)
1864         np = NumberingDefaultGet();
1865     else
1866         np = BioseqGetNumbering(bsp);
1867 
1868     return NumberingValue(np, offset, vp);
1869 }
1870 
1871 /*****************************************************************************
1872 *
1873 *   NumberingDefaultLoad()
1874 *
1875 *****************************************************************************/
NumberingDefaultLoad(void)1876 NLM_EXTERN void NumberingDefaultLoad (void)
1877 
1878 {
1879     NumContPtr ncp;
1880 
1881     if (stdnum != NULL)
1882         return;
1883 
1884     stdnum = ValNodeNew(NULL);   /* set up numbering from 1 */
1885     stdnum->choice = Numbering_cont;
1886     ncp = NumContNew();
1887     ncp->refnum = 1;   /* number from one */
1888     ncp->ascending = TRUE;
1889     stdnum->data.ptrvalue = (Pointer) ncp;
1890     return;
1891 }
1892 
1893 /*****************************************************************************
1894 *
1895 *   NumberingDefaultGet()
1896 *       returns a default numbering object (start at 1, ascending, no 0)
1897 *
1898 *****************************************************************************/
NumberingDefaultGet(void)1899 NLM_EXTERN NumberingPtr NumberingDefaultGet (void)
1900 
1901 {
1902     if (stdnum == NULL)
1903         NumberingDefaultLoad();
1904     return stdnum;
1905 }
1906 
1907 /*****************************************************************************
1908 *
1909 *   SeqCodeTablePtr SeqCodeTableFind(code)
1910 *       Sequence codes defined in objseq.h
1911 *
1912 *****************************************************************************/
SeqCodeTableFind(Uint1 code)1913 NLM_EXTERN SeqCodeTablePtr LIBCALL SeqCodeTableFind (Uint1 code)
1914 {
1915     return SeqCodeTableFindObj (code);
1916 }
1917 
1918 /*****************************************************************************
1919 *
1920 *   OneLetterCode(sctp)
1921 *       returns TRUE if sequence code table sctp uses one letter symbols
1922 *
1923 *****************************************************************************/
OneLetterCode(SeqCodeTablePtr sctp)1924 NLM_EXTERN Boolean OneLetterCode (SeqCodeTablePtr sctp)
1925 {
1926     if (sctp == NULL) return FALSE;
1927     return sctp->one_letter;
1928 }
1929 
1930 /*****************************************************************************
1931 *
1932 *   FirstResidueInCode(sctp)
1933 *       returns first valid residue code in sequence code table
1934 *
1935 *****************************************************************************/
FirstResidueInCode(SeqCodeTablePtr sctp)1936 NLM_EXTERN Uint1 FirstResidueInCode (SeqCodeTablePtr sctp)
1937 {
1938     if (sctp == NULL) return INVALID_RESIDUE;
1939     return sctp->start_at;
1940 }
1941 
1942 /*****************************************************************************
1943 *
1944 *   LastResidueInCode(sctp)
1945 *      returns last valid residue code in sequence code table
1946 *      nb: some codes have "holes", a range of invalid values between first
1947 *      and last.
1948 *
1949 *****************************************************************************/
LastResidueInCode(SeqCodeTablePtr sctp)1950 NLM_EXTERN Uint1 LastResidueInCode (SeqCodeTablePtr sctp)
1951 {
1952     if (sctp == NULL) return INVALID_RESIDUE;
1953     return (Uint1)((int)(sctp->start_at) + (int)(sctp->num) - 1);
1954 }
1955 
1956 /*****************************************************************************
1957 *
1958 *   GetIndexForResidue(sctp, residue)
1959 *       gets index into sctp structs for residue
1960 *       returns INVALID_RESIDUE if no good
1961 *
1962 *****************************************************************************/
1963 NLM_EXTERN Uint1 GetIndexForResidue(SeqCodeTablePtr sctp, Uint1 residue);
GetIndexForResidue(SeqCodeTablePtr sctp,Uint1 residue)1964 NLM_EXTERN Uint1 GetIndexForResidue(SeqCodeTablePtr sctp, Uint1 residue)
1965 {
1966     if (sctp == NULL) return INVALID_RESIDUE;
1967     if (residue < sctp->start_at) return INVALID_RESIDUE;
1968     residue -= sctp->start_at;
1969     if (residue >= sctp->num) return INVALID_RESIDUE;
1970     return residue;
1971 }
1972 
1973 
1974 /*****************************************************************************
1975 *
1976 *   GetSymbolForResidue(sctp, residue)
1977 *       returns the ONE LETTER symbol for residue if sequence code has one
1978 *       letter symbols. returns INVALID_RESIDUE if not a valid residue or if
1979 *       sequence code uses multi-letter symbols
1980 *
1981 *****************************************************************************/
GetSymbolForResidue(SeqCodeTablePtr sctp,Uint1 residue)1982 NLM_EXTERN Uint1 GetSymbolForResidue (SeqCodeTablePtr sctp, Uint1 residue)
1983 {
1984     Uint1 offset;
1985 
1986     offset = GetIndexForResidue (sctp, residue);
1987     if (offset == INVALID_RESIDUE) return offset;
1988     if (! sctp->one_letter) return INVALID_RESIDUE;
1989     if (sctp->letters[offset] == '\0') return INVALID_RESIDUE;
1990     return (Uint1)(sctp->letters[offset]);
1991 }
1992 
1993 /*****************************************************************************
1994 *
1995 *   GetResidueForSymbol(sctp, residue)
1996 *       returns the residue for a ONE LETTER if sequence code has one
1997 *       letter symbols. returns INVALID_RESIDUE if not a valid symbol or if
1998 *       sequence code uses multi-letter symbols
1999 *       CASE matters
2000 *
2001 *****************************************************************************/
GetResidueForSymbol(SeqCodeTablePtr sctp,Uint1 symbol)2002 NLM_EXTERN Uint1 GetResidueForSymbol (SeqCodeTablePtr sctp, Uint1 symbol)
2003 {
2004     Int2 ctr;
2005     CharPtr letters;
2006 
2007     if (sctp == NULL) return INVALID_RESIDUE;
2008     if (! sctp->one_letter) return INVALID_RESIDUE;
2009 
2010     letters = sctp->letters;
2011     for (ctr = 0; ctr < (Int2)sctp->num; ctr++, letters++)
2012     {
2013         if ((Char)symbol == *letters)
2014             return ((Uint1)ctr + sctp->start_at);
2015     }
2016     return INVALID_RESIDUE;
2017 }
2018 
2019 /*****************************************************************************
2020 *
2021 *   GetLongSymbolForResidue(sctp, symbol)
2022 *       returns string symbol for residue if sequence code has string
2023 *       symbols. returns NULL if not a valid residue or if
2024 *       sequence code uses One letter symbols
2025 *
2026 *****************************************************************************/
GetLongSymbolForResidue(SeqCodeTablePtr sctp,Uint1 residue)2027 NLM_EXTERN const char * GetLongSymbolForResidue (SeqCodeTablePtr sctp, Uint1 residue)
2028 {
2029     Uint1 offset;
2030 
2031     offset = GetIndexForResidue (sctp, residue);
2032     if (offset == INVALID_RESIDUE) return NULL;
2033     if (sctp->one_letter) return NULL;
2034 
2035     return (const char *)(sctp->symbols[offset]);
2036 
2037 }
2038 
2039 /*****************************************************************************
2040 *
2041 *   GetResidueForLongSymbol(sctp, symbol)
2042 *       returns the residue for a STRING symbol if sequence code has string
2043 *       symbols. returns INVALID_RESIDUE if not a valid symbol or if
2044 *       sequence code uses one-letter symbols
2045 *       CASE matters
2046 *
2047 *****************************************************************************/
GetResidueForLongSymbol(SeqCodeTablePtr sctp,CharPtr symbol)2048 NLM_EXTERN Uint1 GetResidueForLongSymbol (SeqCodeTablePtr sctp, CharPtr symbol)
2049 {
2050     Int2 ctr;
2051     CharPtr PNTR symbols;
2052 
2053     if ((sctp == NULL) || (symbol == NULL)) return INVALID_RESIDUE;
2054     if (sctp->one_letter) return INVALID_RESIDUE;
2055 
2056     symbols = sctp->symbols;
2057     for (ctr = 0; ctr < (Int2)sctp->num; ctr++, symbols++)
2058     {
2059         if (! StringCmp(*symbols, symbol))
2060             return ((Uint1)ctr + sctp->start_at);
2061     }
2062     return INVALID_RESIDUE;
2063 }
2064 
2065 /*****************************************************************************
2066 *
2067 *   const char * GetNameForResidue (sctp, residue)
2068 *      returns the descriptive name (eg. "Leucine") for a residue in the
2069 *      sequence code defined by sctp
2070 *      returns NULL if not a valid code in the alphabet
2071 *      nb: some codes have "holes" in them, regions of values that are
2072 *       invalid.
2073 *
2074 *****************************************************************************/
GetNameForResidue(SeqCodeTablePtr sctp,Uint1 residue)2075 NLM_EXTERN const char * GetNameForResidue (SeqCodeTablePtr sctp, Uint1 residue)
2076 {
2077     Uint1 offset;
2078 
2079     offset = GetIndexForResidue (sctp, residue);
2080     if (offset == INVALID_RESIDUE) return NULL;
2081 
2082     return (const char *)(sctp->names[offset]);
2083 
2084 }
2085 
2086 /*****************************************************************************
2087 *
2088 *   SeqMapTablePtr SeqMapTableFind(to, from)
2089 *      Map from sequence code "from" to sequence code "to"
2090 *      Sequence codes defined in objseq.h
2091 *      For to == ncbi2na initialize Random generator and for
2092 *      Seq_code_iupacna --> Seq_code_ncbi2na initialize conversion table
2093 *****************************************************************************/
SeqMapTableFind(Uint1 to,Uint1 from)2094 NLM_EXTERN SeqMapTablePtr LIBCALL SeqMapTableFind (Uint1 to, Uint1 from)
2095 {
2096 
2097   /* If we want to convert iupacna to ncbi4na initialize
2098      randomize conversion table */
2099 
2100   if(to == Seq_code_ncbi2na) {
2101    /* Nlm_RandomSeed(Nlm_GetSecs()); */
2102 
2103     if(from == Seq_code_iupacna && !NaI2InitOk) {
2104       if(!InitNaI2Table())
2105         return NULL;
2106     }
2107   }
2108   return SeqMapTableFindObj (to, from);
2109 }
2110 
2111 /*****************************************************************************
2112  *
2113 *   void NaI2TableFree(void)
2114 *      Free allocated memory for
2115 *      Seq_code_iupacna --> Seq_code_ncbi2na transfer
2116 *****************************************************************************/
NaI2TableFree(void)2117 NLM_EXTERN void NaI2TableFree(void)
2118 {
2119   Int4 i;
2120   for(i=0; i < 5; i++)
2121     MemFree(NaI2[i]);
2122 }
2123 
2124 /*****************************************************************************
2125 *
2126 *   Boolean InitNaI2Table(void)
2127 *      Initialize random conversion table for
2128 *      Seq_code_iupacna --> Seq_code_ncbi2na transfer
2129 *****************************************************************************/
InitNaI2Table(void)2130 static Boolean InitNaI2Table(void)
2131 {
2132   SeqMapTablePtr smtp;
2133   register Int4 i, j;
2134   Uint1 ch;
2135 
2136   /* Initialization of random function by some long value */
2137 
2138   if((smtp = SeqMapTableFindObj(Seq_code_iupacna,
2139                                 Seq_code_ncbi4na)) == NULL)
2140     return FALSE;
2141 
2142   for(i = 0; i < 5; i++) {
2143     NaI2[i] = (Int1Ptr) MemNew(256);
2144     MemSet((CharPtr) NaI2[i], -1, 256);
2145   }
2146 
2147   MemSet((CharPtr)NaI2Set, -1, sizeof(NaI2Set));
2148 
2149   for(i = 0 ; i < 16; i ++) {
2150     NaI2Set[ch = (Uint1)SeqMapTableConvert(smtp, (Uint1)i)] = Na42Set[i];
2151     for(j = 0; j < 5; j++)
2152       NaI2[j][ch] = Na42[i][j];
2153   }
2154   NaI2InitOk = TRUE;
2155   return TRUE;
2156 }
2157 
2158 /*****************************************************************************
2159 *
2160 *   Convert4NaRandom(from, to)
2161 *       Converts Seq_code_ncbi4na "from" to  Seq_code_ncbi2na "to"
2162 *       with random conversions
2163 *       Return TRUE if conversion done without randomization
2164 *       Nlm_RandomSeed(Nlm_GetSecs()); recommended in calling function
2165 *****************************************************************************/
Convert4NaRandom(Uint1 from,Uint1 PNTR to)2166 NLM_EXTERN Boolean Convert4NaRandom(Uint1 from, Uint1 PNTR to)
2167 {
2168   Boolean retvalue;
2169 
2170   *to = (Uint1) (retvalue = (Na42Set[from] >= 0)) ?
2171     Na42Set[from] : CONVERT_42_RAND(from);
2172   return retvalue;
2173 }
2174 
2175 /*****************************************************************************
2176 *
2177 *   SeqMapTableConvert(smtp, from)
2178 *       returns conversion of "from" using SeqMapTable smtp
2179 *       To to == Seq_code_ncbi2na use random conversion table
2180 *
2181 *****************************************************************************/
SeqMapTableConvert(SeqMapTablePtr smtp,Uint1 from)2182 NLM_EXTERN Uint1 SeqMapTableConvert (SeqMapTablePtr smtp, Uint1 from)
2183 
2184 {
2185   Int2 index;
2186 
2187   if (smtp == NULL) return (Uint1)(INVALID_RESIDUE);
2188 
2189   /* For conversions into ncbi2na encoding we will use randomized
2190      generation of residues */
2191 
2192   if(smtp->to == Seq_code_ncbi2na) {
2193     if(smtp->from ==  Seq_code_ncbi4na)
2194       return (Uint1) (Na42Set[from] < 0) ?
2195         CONVERT_42_RAND(from) : Na42Set[from];
2196     else if(smtp->from == Seq_code_iupacna)
2197       return (Uint1) (NaI2Set[from] < 0) ?
2198         CONVERT_I2_RAND(from) : NaI2Set[from];
2199   }
2200 
2201   /* This will handle all other cases */
2202 
2203   index = (Int2)from - (Int2)(smtp->start_at);
2204   if ((index >= 0) && (index < (Int2)(smtp->num)))
2205     return (Uint1)(smtp->table[index]);
2206   else
2207     return (Uint1)(INVALID_RESIDUE);
2208 }
2209 
2210 /*****************************************************************************
2211 *
2212 *   SeqCodeTableComp(sctp, residue)
2213 *       returns complement of residue if possible
2214 *       or residue, if not
2215 *       assumes residue is in the same code as sctp
2216 *
2217 *****************************************************************************/
SeqCodeTableComp(SeqCodeTablePtr sctp,Uint1 residue)2218 NLM_EXTERN Uint1 SeqCodeTableComp (SeqCodeTablePtr sctp, Uint1 residue)
2219 
2220 {
2221     Int2 index;
2222 
2223     if ((sctp == NULL) || (sctp->comps == NULL))   /* no complement table */
2224         return INVALID_RESIDUE;
2225 
2226     index = (Int2)residue - (Int2)(sctp->start_at);
2227     if ((index < 0 ) || (index >= (Int2)(sctp->num)))
2228         return INVALID_RESIDUE;
2229     else
2230         return sctp->comps[index];
2231 }
2232 
2233 /*****************************************************************************
2234 *
2235 *   SeqEntryList(sep, mydata, mycallback, index, indent)
2236 *       traverses all Seq-entry nodes beginning with sep
2237 *       calls mycallback() at each node
2238 *
2239 *****************************************************************************/
SeqEntryList(SeqEntryPtr sep,Pointer mydata,SeqEntryFunc mycallback,Int4 index,Int2 indent)2240 NLM_EXTERN Int4 SeqEntryList (SeqEntryPtr sep, Pointer mydata, SeqEntryFunc mycallback, Int4 index, Int2 indent)
2241 
2242 {
2243     if (sep == NULL)
2244         return index;
2245 
2246     if (mycallback != NULL)
2247         (*mycallback)(sep, mydata, index, indent);
2248     index++;
2249 
2250     if (IS_Bioseq(sep))    /* bioseq, no contained sequences */
2251         return index;
2252 
2253     sep = ((BioseqSetPtr)sep->data.ptrvalue)->seq_set;
2254     indent++;
2255     while (sep != NULL)
2256     {
2257         index = SeqEntryList(sep, mydata, mycallback, index, indent);
2258         sep = sep->next;
2259     }
2260     return index;
2261 }
2262 
2263 /*****************************************************************************
2264 *
2265 *   BioseqList(sep, mydata, mycallback, index, indent)
2266 *       traverses all Seq-entry nodes beginning with sep
2267 *       calls mycallback() at each node that is a Bioseq
2268 *       Does NOT enter BioseqSets of _class "parts"
2269 *       Does NOT increment indent
2270 *
2271 *****************************************************************************/
BioseqList(SeqEntryPtr sep,Pointer mydata,SeqEntryFunc mycallback,Int4 index,Int2 indent)2272 NLM_EXTERN Int4 BioseqList (SeqEntryPtr sep, Pointer mydata, SeqEntryFunc mycallback, Int4 index, Int2 indent)
2273 
2274 {
2275     if (sep == NULL)
2276         return index;
2277 
2278     if (IS_Bioseq(sep))    /* bioseq, no contained sequences */
2279     {
2280         if (mycallback != NULL)
2281             (*mycallback)(sep, mydata, index, indent);
2282         return index+1;
2283     }
2284 
2285     if (Bioseq_set_class(sep) == 4)    /* parts, do not enter */
2286         return index;
2287 
2288     sep = ((BioseqSetPtr)sep->data.ptrvalue)->seq_set;
2289     while (sep != NULL)
2290     {
2291         index = BioseqList(sep, mydata, mycallback, index, indent);
2292         sep = sep->next;
2293     }
2294     return index;
2295 }
2296 
2297 /*****************************************************************************
2298 *
2299 *   SeqEntryGetSeqDescr(sep, type, curr)
2300 *       returns pointer to the next SeqDescr of this type
2301 *       type gives type of Seq-descr
2302 *        if 0, gives all types
2303 *       curr is NULL or previous node of this type found
2304 *
2305 *****************************************************************************/
SeqEntryGetSeqDescr(SeqEntryPtr sep,Int2 type,ValNodePtr curr)2306 NLM_EXTERN ValNodePtr SeqEntryGetSeqDescr (SeqEntryPtr sep, Int2 type, ValNodePtr curr)    /* the last one you used */
2307 
2308 {
2309 
2310     if (sep == NULL) return NULL;
2311 
2312     if (curr == NULL)
2313     {
2314         if (IS_Bioseq(sep))
2315             curr = ((BioseqPtr)sep->data.ptrvalue)->descr;
2316         else
2317             curr = ((BioseqSetPtr)sep->data.ptrvalue)->descr;
2318     }
2319     else
2320         curr = curr->next;     /* move past last one */
2321 
2322     while (curr != NULL)
2323     {
2324         if ((! type) || ((Int2)curr->choice == type))
2325             return curr;
2326         else
2327             curr = curr->next;
2328     }
2329     return NULL;
2330 }
2331 /*****************************************************************************
2332 *
2333 *   SeqEntryGetTitle(sep)
2334 *       returns pointer to the first title of this SeqEntry
2335 *
2336 *****************************************************************************/
SeqEntryGetTitle(SeqEntryPtr sep)2337 NLM_EXTERN CharPtr SeqEntryGetTitle (SeqEntryPtr sep)
2338 
2339 {
2340     ValNodePtr ptr;
2341 
2342     ptr = SeqEntryGetSeqDescr(sep, Seq_descr_title, NULL);
2343     if (ptr != NULL)
2344         return (CharPtr)ptr->data.ptrvalue;
2345     else
2346         return NULL;
2347 }
2348 
2349 /*****************************************************************************
2350 *
2351 *   Bioseq_set_class (SeqEntryPtr sep)
2352 *       returns class of set as is enumerated in ASN.1 spec
2353 *       returns 0 if not a Bioseq-set
2354 *
2355 *****************************************************************************/
Bioseq_set_class(SeqEntryPtr sep)2356 NLM_EXTERN Uint1 Bioseq_set_class (SeqEntryPtr sep)
2357 
2358 {
2359     if (sep == NULL) return 0;
2360 
2361     if (IS_Bioseq_set(sep))
2362         return ((BioseqSetPtr)sep->data.ptrvalue)->_class;
2363     else
2364         return 0;
2365 }
2366 
2367 /*****************************************************************************
2368 *
2369 *   SeqEntryDoConvert(sep, newcode, index, indent)
2370 *       converts a seqentry which is a raw bioseq to newcode
2371 *       callback used by SeqEntryConvert()
2372 *
2373 *****************************************************************************/
2374 NLM_EXTERN void SeqEntryDoConvert (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
SeqEntryDoConvert(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2375 NLM_EXTERN void SeqEntryDoConvert (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2376 
2377 {
2378     if (! IS_Bioseq(sep))
2379         return;
2380 
2381     if (((Uint1Ptr)data)[0] != 0)
2382     {
2383         if (BioseqConvert((BioseqPtr)sep->data.ptrvalue, * ((Uint1Ptr)data)))
2384             ((Uint1Ptr)data)[1]++;
2385     }
2386     else
2387     {
2388         if (BioseqPack((BioseqPtr)sep->data.ptrvalue))
2389             ((Uint1Ptr)data)[1]++;
2390     }
2391     return;
2392 }
2393 
2394 /*****************************************************************************
2395 *
2396 *   SeqEntryConvert(sep, newcode)
2397 *       converts any seqentry to newcode
2398 *       if (newcode == 0)
2399 *           calls BioseqRawPack instead of BioseqRawConvert
2400 *
2401 *****************************************************************************/
SeqEntryConvert(SeqEntryPtr sep,Uint1 newcode)2402 NLM_EXTERN Boolean SeqEntryConvert (SeqEntryPtr sep, Uint1 newcode)
2403 
2404 {
2405     Uint1 tbuf[2];
2406     tbuf[0] = newcode;
2407     tbuf[1] = 0;
2408 
2409     if (sep == NULL) return FALSE;
2410 
2411     SeqEntryExplore(sep, (Pointer)tbuf, SeqEntryDoConvert);
2412     if (tbuf[1])
2413         return TRUE;    /* at least one success */
2414     else
2415         return FALSE;
2416 }
2417 
2418 /*****************************************************************************
2419 *
2420 *   SeqIdBestRank(buf, num)
2421 *       fill buf of length num with std ranks used by SeqIdFindBest
2422 *       returns full length of list (useful if num is too small)
2423 *       std ranks always between 50 and 100
2424 *       rank < 50 guarantees SeqIdSelect() chooses over std rank
2425 *       rank > 100 guarantees SeqIdSelect() never chooses over std rank
2426 *       rank = 255 guarantees SeqIdSelect() never choses
2427 *       if buf == NULL, just returns count of supported Seq-ids
2428 *
2429 *****************************************************************************/
SeqIdBestRank(Uint1Ptr buf,Int2 num)2430 NLM_EXTERN Int2 SeqIdBestRank (Uint1Ptr buf, Int2 num)
2431 {
2432     static Uint1 std_order[NUM_SEQID] = {
2433      83,  /* 0 = not set */
2434     80,  /* 1 = local Object-id */
2435     70,  /* 2 = gibbsq */
2436     70,  /* 3 = gibbmt */
2437     70,  /* 4 = giim Giimport-id */
2438     60,  /* 5 = genbank */
2439     60,  /* 6 = embl */
2440     60,  /* 7 = pir */
2441     60,  /* 8 = swissprot */
2442     67,  /* 9 = patent */
2443     65,  /* 10 = other TextSeqId */
2444     80,  /* 11 = general Dbtag */
2445     51,  /* 12 = gi */
2446     60,  /* 13 = ddbj */
2447     60,  /* 14 = prf */
2448     60,  /* 15 = pdb */
2449     60,  /* 16 = tpg */
2450     60,  /* 17 = tpe */
2451     60,  /* 18 = tpd */
2452     68,  /* 19 = gpp */
2453     69   /* 20 = nat */
2454     };
2455 
2456     if (buf == NULL) return NUM_SEQID;
2457 
2458     if (num > NUM_SEQID)
2459         num = NUM_SEQID;
2460     MemCopy(buf, std_order, (size_t)(num * sizeof(Uint1)));
2461     return NUM_SEQID;
2462 }
2463 
2464 /*****************************************************************************
2465 *
2466 *   SeqIdFindBest(sip)
2467 *       Find the most reliable SeqId in a chain
2468 *
2469 *****************************************************************************/
SeqIdFindBest(SeqIdPtr sip,Uint1 target)2470 NLM_EXTERN SeqIdPtr SeqIdFindBest (SeqIdPtr sip, Uint1 target)
2471 {
2472     Uint1 order[NUM_SEQID];
2473 
2474     if (sip == NULL)
2475         return NULL;
2476 
2477     SeqIdBestRank(order, NUM_SEQID);
2478     if ((target > 0) && (target < NUM_SEQID))
2479         order[target] = 0;    /* select target */
2480     else if (target >= NUM_SEQID)
2481         ErrPostEx(SEV_ERROR, 0, 0, "SeqIdFindBest: target [%d] out of range [%d]",
2482             (int)target, (int)NUM_SEQID);
2483 
2484     return SeqIdSelect (sip, order, NUM_SEQID);
2485 }
2486 /*****************************************************************************
2487 *
2488 *   SeqIdFindBestAccn(sip)
2489 *       Find the most reliable Accession SeqId in a chain
2490 *       else returns gi;
2491 *
2492 *****************************************************************************/
SeqIdFindBestAccession(SeqIdPtr sip)2493 NLM_EXTERN SeqIdPtr SeqIdFindBestAccession (SeqIdPtr sip)
2494 {
2495     Uint1 order[NUM_SEQID];
2496 
2497     if (sip == NULL)
2498         return NULL;
2499     SeqIdBestRank(order, NUM_SEQID);
2500         order[SEQID_GI]=order[SEQID_LOCAL]+1;
2501     return SeqIdSelect (sip, order, NUM_SEQID);
2502 }
2503 
2504 /*****************************************************************************
2505 *
2506 *   SeqIdPtr SeqIdLocate (sip, order, num)
2507 *       Given a SeqId (sip):
2508 *           Locates the Bioseq in memory or cached
2509 *           Then calls SeqIdSelect with the Bioseq.id chain to find the
2510 *             SeqId type you want.
2511 *
2512 *****************************************************************************/
SeqIdLocate(SeqIdPtr sip,Uint1Ptr order,Int2 num)2513 NLM_EXTERN SeqIdPtr SeqIdLocate (SeqIdPtr sip, Uint1Ptr order, Int2 num)
2514 {
2515     BioseqPtr bsp;
2516     SeqIdPtr res = NULL;
2517     Boolean locked = FALSE;
2518 
2519     bsp = BioseqFindCore(sip);
2520     if (bsp == NULL)
2521     {
2522         bsp = BioseqLockById(sip);
2523         if (bsp != NULL)
2524             locked = TRUE;
2525         else
2526             return res;
2527     }
2528     res = SeqIdSelect(bsp->id, order, num);
2529     if (locked)
2530         BioseqUnlock(bsp);
2531     return res;
2532 }
2533 
2534 /*****************************************************************************
2535 *
2536 *   SeqIdPtr SeqIdSelect (sip, order, num)
2537 *       takes an array (order) num long.
2538 *       goes down chain starting with sip.
2539 *       finds lowest value of order[sip->choice] and returns it.
2540 *       if order[] == 255, it is skipped.
2541 *       if nothing is found < 255, NULL is returned
2542 *       ErrorMessage if sip->choice >= num
2543 *
2544 *****************************************************************************/
SeqIdSelect(SeqIdPtr sip,Uint1Ptr order,Int2 num)2545 NLM_EXTERN SeqIdPtr SeqIdSelect (SeqIdPtr sip, Uint1Ptr order, Int2 num)
2546 {
2547     SeqIdPtr bestid;
2548 
2549     if ((sip == NULL) || (order == NULL))
2550         return NULL;
2551 
2552     for ( bestid = NULL; sip != NULL; sip = sip -> next)
2553     {
2554         if ((Int2)sip->choice < num)
2555         {
2556             if (order[sip->choice] < 255)
2557             {
2558                 if (bestid == NULL)
2559                     bestid = sip;
2560                 else if (order[sip->choice] < order[bestid->choice])
2561                     bestid = sip;
2562             }
2563         } else {
2564             ErrPostEx(SEV_ERROR, 0,0, "SeqIdSelect: choice [%d] out of range [%d]",
2565                 (int)(sip->choice), (int)num);
2566             if(sip->choice > NUM_SEQID) /*** something is really wrong ***/
2567                 return NULL;
2568         }
2569     }
2570 
2571     return bestid;
2572 }
2573 
2574     static char * delim = "|";
2575     static char * txtid [NUM_SEQID] = {          /* FASTA_LONG formats */
2576         "???" ,        /* not-set = ??? */
2577         "lcl",        /* local = lcl|integer or string */
2578         "bbs",      /* gibbsq = bbs|integer */
2579         "bbm",        /* gibbmt = bbm|integer */
2580         "gim",        /* giim = gim|integer */
2581         "gb",        /* genbank = gb|accession|locus */
2582         "emb",        /* embl = emb|accession|locus */
2583         "pir",        /* pir = pir|accession|name */
2584         "sp",        /* swissprot = sp|accession|name */
2585         "pat",        /* patent = pat|country|patent number (string)|seq number (integer) - use pgp for pre-grant pub */
2586         "ref",        /* other = ref|accession|name|release - changed from oth to ref */
2587         "gnl",        /* general = gnl|database(string)|id (string or number) */
2588         "gi",        /* gi = gi|integer */
2589         "dbj",        /* ddbj = dbj|accession|locus */
2590         "prf",        /* prf = prf|accession|name */
2591         "pdb",        /* pdb = pdb|entry name (string)|chain id (char) */
2592         "tpg",      /* tpg = tpg|accession|name */
2593         "tpe",      /* tpe = tpe|accession|name */
2594         "tpd",      /* tpd = tpd|accession|name */
2595         "gpp",      /* gpp = gpp|accession|name */
2596         "nat"};     /* nat = nat|accession|name */
2597 
2598 /*****************************************************************************
2599 *
2600 *   SeqIdPrint(sip, buf, format)
2601 *       PRINTID_FASTA_LONG treats sip as a chain, printing gi|other id
2602 *           other id is as given in the comments for txtid. Empty fields
2603 *           do not eliminate | delimiters
2604 *       PRINTID_FASTA_SHORT prints only the sip.
2605 *           same format as FASTA_LONG (for other id)
2606 *
2607 *       PRINTID_TEXTID_LOCUS or ACCESSION
2608 *  --------------------------------------------------------
2609 *  | OLDWAY:                                              |
2610 *  |      TextSeqId types- fills request or first char in |
2611 *  |      buffer \0 if cannot be filled                   |
2612 *  |        gibbmt, gibbsq = fills with _M or _S [number] |
2613 *  |      other types- fills in as FASTA_SHORT            |
2614 *  --------------------------------------------------------
2615 *      CURRENTLY:
2616 *      for SEQID_GENBANK,SEQID_EMBL,SEQID_DDBJ, takes accession
2617 *        or locus field; for SEQID_LOCAL, takes str
2618 *              as accession only
2619 *       ALL others as FASTA_SHORT
2620 *
2621 *       PRINTID_REPORT- similar to FASTA_SHORT but removes extra optional
2622 *         fields and | to make more human readable (but less parseable)
2623 *
2624 *   if format is in the range ' ' to 127 (32-12y) ASCII, then the character
2625 *     given is used as a separator instead of '|' and the format is
2626 *     PRINTID_FASTA_SHORT. 127 is translated as TAB (ASCII 9)
2627 *     This makes this function flexible for bulk
2628 *     data processing. Note that this invalidates SeqIdParse() and may create
2629 *     conflicts with names. Use with caution.
2630 *
2631 *   return value points to \0 at end of buf
2632 *
2633 *****************************************************************************/
SeqIdPrint(SeqIdPtr isip,CharPtr buf,Uint1 format)2634 NLM_EXTERN CharPtr SeqIdPrint (SeqIdPtr isip, CharPtr buf, Uint1 format)
2635 
2636 {
2637     return SeqIdWrite (isip, buf, format, 255); /* no knowledge of buffer size */
2638 }
2639 
2640 /*****************************************************************************
2641 *
2642 *   SeqIdWrite (isip, buf, format, buflen)
2643 *       Similar to SeqIdPrint, has additional argument buflen,
2644 *       checks the buflen, writes up to buflen chars,
2645 *        makes the last character '>'
2646 *       always puts one '\0' to terminate the string in buf
2647 *       buf MUST be one character longer than buflen to leave room for the
2648 *           last '\0'
2649 *
2650 *****************************************************************************/
SeqIdWrite(SeqIdPtr isip,CharPtr buf,Uint1 format,Uint4 buflen)2651 NLM_EXTERN CharPtr SeqIdWrite (SeqIdPtr isip, CharPtr buf, Uint1 format, Uint4 buflen)
2652 
2653 {
2654     SeqIdPtr sip;
2655     char localbuf[32];    /* for MS Windows */
2656     char *ldelim;
2657     char d [2];
2658     CharPtr tmp;
2659     static Uint1 fasta_order[NUM_SEQID] = {  /* order for other id FASTA_LONG */
2660     33, /* 0 = not set */
2661     20, /* 1 = local Object-id */
2662     15,  /* 2 = gibbsq */
2663     16,  /* 3 = gibbmt */
2664     30, /* 4 = giim Giimport-id */
2665     10, /* 5 = genbank */
2666     10, /* 6 = embl */
2667     10, /* 7 = pir */
2668     10, /* 8 = swissprot */
2669     15,  /* 9 = patent */
2670     10, /* 10 = other = refseq */
2671     13, /* 11 = general Dbtag */
2672     255,  /* 12 = gi */
2673     10, /* 13 = ddbj */
2674     10, /* 14 = prf */
2675     12,  /* 15 = pdb */
2676     10,  /* 16 = tpg */
2677     10,  /* 17 = tpe */
2678     10,  /* 18 = tpd */
2679     15,  /* 19 = gpp */
2680     15   /* 20 = nat */
2681     };
2682     static Uint1 tmsmart_order[NUM_SEQID] = {  /* order for other id FASTA_LONG */
2683     33, /* 0 = not set */
2684     20, /* 1 = local Object-id */
2685     15,  /* 2 = gibbsq */
2686     16,  /* 3 = gibbmt */
2687     30, /* 4 = giim Giimport-id */
2688     10, /* 5 = genbank */
2689     10, /* 6 = embl */
2690     10, /* 7 = pir */
2691     10, /* 8 = swissprot */
2692     15,  /* 9 = patent */
2693     10, /* 10 = other = refseq */
2694     29, /* 11 = general Dbtag */
2695     255,  /* 12 = gi */
2696     10, /* 13 = ddbj */
2697     10, /* 14 = prf */
2698     12,  /* 15 = pdb */
2699     10,  /* 16 = tpg */
2700     10,  /* 17 = tpe */
2701     10,  /* 18 = tpd */
2702     15,  /* 19 = gpp */
2703     15   /* 20 = nat */
2704     };
2705     static Uint1 general_order[NUM_SEQID] = {  /* order for other id FASTA_LONG */
2706     33, /* 0 = not set */
2707     20, /* 1 = local Object-id */
2708     15,  /* 2 = gibbsq */
2709     16,  /* 3 = gibbmt */
2710     30, /* 4 = giim Giimport-id */
2711     10, /* 5 = genbank */
2712     10, /* 6 = embl */
2713     10, /* 7 = pir */
2714     10, /* 8 = swissprot */
2715     15,  /* 9 = patent */
2716     10, /* 10 = other = refseq */
2717     12, /* 11 = general Dbtag */
2718     255,  /* 12 = gi */
2719     10, /* 13 = ddbj */
2720     10, /* 14 = prf */
2721     12,  /* 15 = pdb */
2722     10,  /* 16 = tpg */
2723     10,  /* 17 = tpe */
2724     10,  /* 18 = tpd */
2725     15,  /* 19 = gpp */
2726     15   /* 20 = nat */
2727     };
2728     Boolean useGeneral = FALSE;
2729     TextSeqIdPtr tsip;
2730     PDBSeqIdPtr psip;
2731     ObjectIdPtr oip;
2732     PatentSeqIdPtr patsip;
2733     IdPatPtr ipp;
2734     Boolean got_gi = FALSE;
2735     Boolean got_tmsmart = FALSE;
2736     Boolean is_us_pre_grant = FALSE;
2737     DbtagPtr dbt;
2738     Char chainbuf[3];
2739     Char versionbuf[10];
2740     Int2 version = 0;
2741     CharPtr release = NULL;
2742 
2743     buf[0] = '\0';
2744     buflen--;
2745     tmp = buf;
2746     if (isip == NULL)
2747         return tmp;
2748 
2749     d [0] = *delim;
2750     d [1] = '\0';
2751     ldelim = &(d [0]);
2752     if ((format >= ' ') && (format <= 127))  /* change delimiter */
2753     {
2754         if (format == 127)
2755             d [0] = '\t';
2756         else
2757             d [0] = (char) format;
2758         format = PRINTID_FASTA_SHORT;
2759     }
2760 
2761     if (format == PRINTID_FASTA_GENERAL) {
2762         useGeneral = TRUE;
2763         format = PRINTID_FASTA_LONG;
2764     }
2765 
2766     if (format == PRINTID_FASTA_ALL) {
2767         Char allbuf [41];
2768         ValNodePtr vnp, head = NULL;
2769         size_t len = 0;
2770         CharPtr str;
2771         Boolean notfirst;
2772 
2773         for (sip = isip; sip != NULL; sip = sip->next) {
2774             SeqIdWrite (sip, allbuf, PRINTID_FASTA_SHORT, sizeof (allbuf) - 1);
2775             ValNodeCopyStr (&head, 0, allbuf);
2776         }
2777         for (vnp = head; vnp != NULL; vnp = vnp->next) {
2778           str = (CharPtr) vnp->data.ptrvalue;
2779           if (! StringHasNoText (str)) {
2780             len += StringLen (str) + 1;
2781           }
2782         }
2783         if (len < 1) return buf;
2784         tmp = MemNew (len + 2);
2785         if (tmp == NULL) return buf;
2786         notfirst = FALSE;
2787         for (vnp = head; vnp != NULL; vnp = vnp->next) {
2788           str = (CharPtr) vnp->data.ptrvalue;
2789           if (! StringHasNoText (str)) {
2790             if (notfirst) {
2791               StringCat (tmp, "|");
2792             }
2793             StringCat (tmp, str);
2794             notfirst = TRUE;
2795           }
2796         }
2797         ValNodeFreeData (head);
2798         StringNCpy_0 (buf, tmp, buflen + 1);
2799         MemFree (tmp);
2800         return buf;
2801     }
2802 
2803     localbuf[0] = '\0';
2804                             /* error on input, return ??? */
2805     if ( (! (isip -> choice)) || (format < PRINTID_FASTA_SHORT)
2806         || (format > PRINTID_REPORT))
2807     {
2808         Nlm_LabelCopyNext(&tmp, txtid[0], &buflen);
2809         return tmp;
2810     }
2811 
2812     if (format == PRINTID_FASTA_LONG)   /* find the ids in the chain */
2813     {
2814         for (sip = isip; sip != NULL; sip = sip->next)  /* GI present? */
2815         {
2816             if (sip->choice == SEQID_GI)
2817             {
2818                 sprintf(localbuf, "%s%s%lld", txtid[SEQID_GI], ldelim,
2819                     (long long)(sip->data.intvalue));
2820                 Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
2821                 got_gi = TRUE;
2822             } else if (sip->choice == SEQID_GENERAL) {
2823                 dbt = (DbtagPtr) sip->data.ptrvalue;
2824                 if (dbt != NULL && StringICmp (dbt->db, "TMSMART") == 0) {
2825                     got_tmsmart = TRUE;
2826                 }
2827             } else if (sip->choice == SEQID_PATENT) {
2828                 patsip = (PatentSeqIdPtr) sip->data.ptrvalue;
2829                 if (patsip != NULL) {
2830                     ipp = patsip->cit;
2831                     if (ipp != NULL && StringDoesHaveText (ipp->app_number)) {
2832                         is_us_pre_grant = TRUE;
2833                     }
2834                 }
2835             }
2836         }
2837         if (useGeneral) {
2838             sip = SeqIdSelect(isip, general_order, NUM_SEQID);
2839         } else if (got_tmsmart) {
2840             sip = SeqIdSelect(isip, tmsmart_order, NUM_SEQID);
2841         } else {
2842             sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
2843         }
2844         if (sip == NULL)   /* only GI */
2845             return tmp;
2846         else if (got_gi)
2847         {
2848             if (sip->choice == SEQID_GIIM)   /* don't show GIIM with GI */
2849                 return tmp;
2850 
2851             Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
2852         }
2853         format = PRINTID_FASTA_SHORT; /* put on second (or only) SeqId in this format */
2854     }
2855     else {
2856         sip = isip;          /* only one id processed */
2857         if (sip != NULL && sip->choice == SEQID_PATENT) {
2858             patsip = (PatentSeqIdPtr) sip->data.ptrvalue;
2859             if (patsip != NULL) {
2860                 ipp = patsip->cit;
2861                 if (ipp != NULL && StringDoesHaveText (ipp->app_number)) {
2862                     is_us_pre_grant = TRUE;
2863                 }
2864             }
2865         }
2866     }
2867 
2868                              /* deal with LOCUS and ACCESSION */
2869     if ((format == PRINTID_TEXTID_ACCESSION) || (format == PRINTID_TEXTID_LOCUS) ||
2870         (format == PRINTID_TEXTID_ACC_VER) || (format == PRINTID_TEXTID_ACC_ONLY))
2871     {
2872         if (format == PRINTID_TEXTID_ACCESSION) {
2873             format = PRINTID_TEXTID_ACC_ONLY;     /* current default */
2874         }
2875         switch (sip->choice)   /* get the real TextSeqId types */
2876         {
2877             case SEQID_GENBANK:
2878             case SEQID_EMBL:
2879             case SEQID_DDBJ:
2880             case SEQID_PIR:
2881             case SEQID_SWISSPROT:
2882             case SEQID_PRF:
2883             case SEQID_OTHER:
2884             case SEQID_TPG:
2885             case SEQID_TPE:
2886             case SEQID_TPD:
2887             case SEQID_GPIPE:
2888             case SEQID_NAMED_ANNOT_TRACK:
2889                 tsip = (TextSeqIdPtr)sip->data.ptrvalue;
2890                 release = tsip->release;
2891                 if (sip->choice == SEQID_SWISSPROT) {
2892                   release = NULL;
2893                 }
2894                 if ((format == PRINTID_TEXTID_LOCUS) && (tsip->name != NULL)) {
2895                     Nlm_LabelCopyNext(&tmp, tsip->name, &buflen);
2896                     return tmp;
2897                 } else if ((format == PRINTID_TEXTID_ACC_ONLY || format == PRINTID_TEXTID_LOCUS)
2898                     && (tsip->accession != NULL)) {
2899                     Nlm_LabelCopyNext(&tmp, tsip->accession, &buflen);
2900                     return tmp;
2901                 } else if ((format == PRINTID_TEXTID_ACC_VER)
2902                     && (tsip->accession != NULL)) {
2903                     if (tsip->version > 0 && release == NULL) {
2904                         sprintf(localbuf, "%s.%d", tsip->accession,
2905                             (int)(tsip->version));
2906                     } else {
2907                         sprintf(localbuf, "%s", tsip->accession);
2908                     }
2909                     Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
2910                     return tmp;
2911                 }
2912                 break;
2913             default:
2914                 break;
2915         }
2916     }
2917 
2918     if (format == PRINTID_FASTA_SHORT)
2919     {
2920         if (sip->choice == SEQID_PATENT && is_us_pre_grant) {
2921             Nlm_LabelCopyNext(&tmp, "pgp", &buflen);
2922         } else if (sip->choice == SEQID_SWISSPROT) {
2923             tsip = (TextSeqIdPtr)sip->data.ptrvalue;
2924             if (tsip->release && StringCmp(tsip->release, "unreviewed") == 0)
2925                 Nlm_LabelCopyNext(&tmp, "tr", &buflen);
2926             else
2927                 Nlm_LabelCopyNext(&tmp, txtid[sip->choice], &buflen);
2928         } else {
2929             Nlm_LabelCopyNext(&tmp, txtid[sip->choice], &buflen);
2930         }
2931         Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
2932     }
2933 
2934     switch (sip->choice)
2935     {
2936         case SEQID_LOCAL:           /* object id */
2937             if ((((ObjectIdPtr)sip->data.ptrvalue)->str) == NULL)
2938             {
2939                 sprintf(localbuf, "%ld",
2940                             (long)((ObjectIdPtr)sip->data.ptrvalue)->id);
2941                 Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
2942             }
2943             else
2944                 Nlm_LabelCopyNext(&tmp,
2945                         ((ObjectIdPtr)sip->data.ptrvalue)->str, &buflen);
2946             break;
2947         case SEQID_GIBBSQ:
2948         case SEQID_GIBBMT:
2949             sprintf(localbuf, "%ld", (long)sip->data.intvalue);
2950             Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
2951             break;
2952         case SEQID_GI:
2953             sprintf(localbuf, "%lld", (long long)sip->data.intvalue);
2954             Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
2955             break;
2956         case SEQID_GIIM:
2957             sprintf(localbuf, "%ld", (long)((GiimPtr)sip->data.ptrvalue)->id);
2958             Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
2959             break;
2960         case SEQID_GENBANK:
2961         case SEQID_EMBL:
2962         case SEQID_DDBJ:
2963         case SEQID_OTHER:
2964         case SEQID_TPG:
2965         case SEQID_TPE:
2966         case SEQID_TPD:
2967         case SEQID_GPIPE:
2968         case SEQID_NAMED_ANNOT_TRACK:
2969         case SEQID_SWISSPROT:
2970            tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
2971             release = tsip->release;
2972             if (sip->choice == SEQID_SWISSPROT) {
2973               release = NULL;
2974             }
2975            if ((tsip->version > 0) && (release == NULL) && SHOWVERSION)
2976              version = tsip->version;  /* show versions */
2977            sprintf(versionbuf, ".%d", (int)version);
2978         case SEQID_PIR:
2979         case SEQID_PRF:
2980             tsip = (TextSeqIdPtr)sip->data.ptrvalue;
2981             if (tsip->accession != NULL)
2982             {
2983                Nlm_LabelCopyNext(&tmp, tsip->accession, &buflen);
2984                            if (version)
2985                 Nlm_LabelCopyNext(&tmp, versionbuf,&buflen);
2986                if (format != PRINTID_FASTA_SHORT)
2987                  break;
2988             }
2989             if (format == PRINTID_FASTA_SHORT)
2990                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
2991             if (tsip->name != NULL)
2992                 Nlm_LabelCopyNext(&tmp, tsip->name, &buflen);
2993             /*
2994             if (sip->choice == SEQID_OTHER) {
2995                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
2996                 if (tsip->release != NULL)
2997                     Nlm_LabelCopyNext(&tmp, tsip->release, &buflen);
2998             }
2999             */
3000             break;
3001         case SEQID_PATENT:
3002             patsip = (PatentSeqIdPtr)(sip->data.ptrvalue);
3003             Nlm_LabelCopyNext(&tmp, patsip->cit->country, &buflen);
3004             if (format == PRINTID_FASTA_SHORT)
3005                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
3006             if (is_us_pre_grant) {
3007                 Nlm_LabelCopyNext(&tmp, patsip->cit->app_number, &buflen);
3008             } else {
3009                 Nlm_LabelCopyNext(&tmp, patsip->cit->number, &buflen);
3010             }
3011             if (format == PRINTID_FASTA_SHORT)
3012                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
3013             else
3014                 Nlm_LabelCopyNext(&tmp, "_", &buflen);
3015             sprintf(localbuf, "%d", (int)patsip->seqid);
3016             Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
3017             break;
3018         case SEQID_GENERAL:
3019             oip = ((DbtagPtr)sip->data.ptrvalue)->tag;
3020             if((format == PRINTID_FASTA_SHORT) || (format == PRINTID_REPORT))
3021                 Nlm_LabelCopyNext(&tmp,
3022                     ((DbtagPtr)sip->data.ptrvalue)->db, &buflen);
3023             if (format == PRINTID_FASTA_SHORT)
3024                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
3025             else if (format == PRINTID_REPORT)
3026                 Nlm_LabelCopyNext(&tmp, ":", &buflen);
3027 
3028             if (oip->str == NULL)
3029             {
3030                 sprintf(localbuf, "%ld", (long) oip->id);
3031                 Nlm_LabelCopyNext(&tmp, localbuf, &buflen);
3032             }
3033             else
3034                 Nlm_LabelCopyNext(&tmp, oip->str, &buflen);
3035             break;
3036         case SEQID_PDB:
3037             psip = (PDBSeqIdPtr) sip->data.ptrvalue;
3038             chainbuf[0] = TO_UPPER (psip->chain);
3039             chainbuf[1] = '\0';
3040             chainbuf[2] = '\0';
3041             if (IS_LOWER (psip->chain)) {
3042               chainbuf[1] = chainbuf [0];
3043             }
3044             Nlm_LabelCopyNext(&tmp, psip->mol, &buflen);
3045             if (format == PRINTID_FASTA_SHORT)
3046             {
3047                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
3048                 if (chainbuf[0] == '|') /* special */
3049                     Nlm_LabelCopyNext(&tmp, "VB",&buflen);
3050                 else if (chainbuf[0] != '\0')
3051                     Nlm_LabelCopyNext(&tmp,chainbuf, &buflen);
3052                 else
3053                     Nlm_LabelCopyNext(&tmp, " ", &buflen);
3054             }
3055             else if (psip->chain > ' ')
3056             {
3057                 Nlm_LabelCopyNext(&tmp, "_", &buflen);
3058                 Nlm_LabelCopyNext(&tmp,chainbuf, &buflen);
3059             }
3060             break;
3061         default:
3062             Nlm_LabelCopyNext(&tmp, txtid[0], &buflen);
3063             break;
3064 
3065     }
3066     return tmp;
3067 }
3068 
3069 
SeqIdLabelLen(SeqIdPtr isip,Uint1 format)3070 NLM_EXTERN Int4 SeqIdLabelLen (SeqIdPtr isip, Uint1 format)
3071 
3072 {
3073     Int4 label_len = 0;
3074     SeqIdPtr sip;
3075     char localbuf[32];    /* for MS Windows */
3076     char *ldelim;
3077     char d [2];
3078     static Uint1 fasta_order[NUM_SEQID] = {  /* order for other id FASTA_LONG */
3079      33, /* 0 = not set */
3080     20, /* 1 = local Object-id */
3081     15,  /* 2 = gibbsq */
3082     16,  /* 3 = gibbmt */
3083     30, /* 4 = giim Giimport-id */
3084     10, /* 5 = genbank */
3085     10, /* 6 = embl */
3086     10, /* 7 = pir */
3087     10, /* 8 = swissprot */
3088     15,  /* 9 = patent */
3089     12, /* 10 = other TextSeqId */
3090     13, /* 11 = general Dbtag */
3091     255,  /* 12 = gi */
3092     10, /* 13 = ddbj */
3093     10, /* 14 = prf */
3094     12,  /* 15 = pdb */
3095     10,  /* 16 = tpg */
3096     10,  /* 17 = tpe */
3097     10,  /* 18 = tpd */
3098     15,  /* 19 = gpp */
3099     15   /* 20 = nat */
3100     };
3101     static Uint1 tmsmart_order[NUM_SEQID] = {  /* order for other id FASTA_LONG */
3102      33, /* 0 = not set */
3103     20, /* 1 = local Object-id */
3104     15,  /* 2 = gibbsq */
3105     16,  /* 3 = gibbmt */
3106     30, /* 4 = giim Giimport-id */
3107     10, /* 5 = genbank */
3108     10, /* 6 = embl */
3109     10, /* 7 = pir */
3110     10, /* 8 = swissprot */
3111     15,  /* 9 = patent */
3112     12, /* 10 = other TextSeqId */
3113     29, /* 11 = general Dbtag */
3114     255,  /* 12 = gi */
3115     10, /* 13 = ddbj */
3116     10, /* 14 = prf */
3117     12,  /* 15 = pdb */
3118     10,  /* 16 = tpg */
3119     10,  /* 17 = tpe */
3120     10,  /* 18 = tpd */
3121     15,  /* 19 = gpp */
3122     15   /* 20 = nat */
3123     };
3124     static Uint1 general_order[NUM_SEQID] = {  /* order for other id FASTA_LONG */
3125      33, /* 0 = not set */
3126     20, /* 1 = local Object-id */
3127     15,  /* 2 = gibbsq */
3128     16,  /* 3 = gibbmt */
3129     30, /* 4 = giim Giimport-id */
3130     10, /* 5 = genbank */
3131     10, /* 6 = embl */
3132     10, /* 7 = pir */
3133     10, /* 8 = swissprot */
3134     15,  /* 9 = patent */
3135     13, /* 10 = other TextSeqId */
3136     12, /* 11 = general Dbtag */
3137     255,  /* 12 = gi */
3138     10, /* 13 = ddbj */
3139     10, /* 14 = prf */
3140     12,  /* 15 = pdb */
3141     10,  /* 16 = tpg */
3142     10,  /* 17 = tpe */
3143     10,  /* 18 = tpd */
3144     15,  /* 19 = gpp */
3145     15   /* 20 = nat */
3146     };
3147     Boolean useGeneral = FALSE;
3148     TextSeqIdPtr tsip;
3149     PDBSeqIdPtr psip;
3150     ObjectIdPtr oip;
3151     PatentSeqIdPtr patsip;
3152     IdPatPtr ipp;
3153     Boolean got_gi = FALSE;
3154     Boolean got_tmsmart = FALSE;
3155     Boolean is_us_pre_grant = FALSE;
3156     DbtagPtr dbt;
3157     Char chainbuf[3];
3158     Char versionbuf[10];
3159     Int2 version = 0;
3160     CharPtr release = NULL;
3161 
3162     if (isip == NULL)
3163         return 0;
3164 
3165     d [0] = *delim;
3166     d [1] = '\0';
3167     ldelim = &(d [0]);
3168     if ((format >= ' ') && (format <= 127))  /* change delimiter */
3169     {
3170         if (format == 127)
3171             d [0] = '\t';
3172         else
3173             d [0] = (char) format;
3174         format = PRINTID_FASTA_SHORT;
3175     }
3176 
3177     if (format == PRINTID_FASTA_GENERAL) {
3178         useGeneral = TRUE;
3179         format = PRINTID_FASTA_LONG;
3180     }
3181 
3182     if (format == PRINTID_FASTA_ALL) {
3183         for (sip = isip; sip != NULL; sip = sip->next) {
3184             label_len += SeqIdLabelLen (sip, PRINTID_FASTA_SHORT) + 1;
3185         }
3186         label_len += 2;
3187         return label_len;
3188     }
3189                                 /* error on input, return ??? */
3190     if ( (! (isip -> choice)) || (format < PRINTID_FASTA_SHORT)
3191         || (format > PRINTID_REPORT))
3192     {
3193         return StringLen (txtid[0]) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3194     }
3195 
3196     if (format == PRINTID_FASTA_LONG)   /* find the ids in the chain */
3197     {
3198         for (sip = isip; sip != NULL; sip = sip->next)  /* GI present? */
3199         {
3200             if (sip->choice == SEQID_GI)
3201             {
3202                 sprintf(localbuf, "%s%s%lld", txtid[SEQID_GI], ldelim,
3203                     (long long)(sip->data.intvalue));
3204                 label_len += StringLen (localbuf) + 1; /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3205                 got_gi = TRUE;
3206             } else if (sip->choice == SEQID_GENERAL) {
3207                 dbt = (DbtagPtr) sip->data.ptrvalue;
3208                 if (dbt != NULL && StringICmp (dbt->db, "TMSMART") == 0) {
3209                     got_tmsmart = TRUE;
3210                 }
3211             } else if (sip->choice == SEQID_PATENT) {
3212                 patsip = (PatentSeqIdPtr) sip->data.ptrvalue;
3213                 if (patsip != NULL) {
3214                     ipp = patsip->cit;
3215                     if (ipp != NULL && StringDoesHaveText (ipp->app_number)) {
3216                         is_us_pre_grant = TRUE;
3217                     }
3218                 }
3219             }
3220         }
3221         if (useGeneral) {
3222             sip = SeqIdSelect(isip, general_order, NUM_SEQID);
3223         } else if (got_tmsmart) {
3224             sip = SeqIdSelect(isip, tmsmart_order, NUM_SEQID);
3225         } else {
3226             sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3227         }
3228         if (sip == NULL)   /* only GI */
3229             return label_len;
3230         else if (got_gi)
3231         {
3232             if (sip->choice == SEQID_GIIM)   /* don't show GIIM with GI */
3233                 return label_len;
3234 
3235             label_len += StringLen (ldelim) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3236         }
3237         format = PRINTID_FASTA_SHORT; /* put on second (or only) SeqId in this format */
3238     }
3239     else {
3240         sip = isip;          /* only one id processed */
3241         if (sip != NULL && sip->choice == SEQID_PATENT) {
3242             patsip = (PatentSeqIdPtr) sip->data.ptrvalue;
3243             if (patsip != NULL) {
3244                 ipp = patsip->cit;
3245                 if (ipp != NULL && StringDoesHaveText (ipp->app_number)) {
3246                     is_us_pre_grant = TRUE;
3247                 }
3248             }
3249         }
3250     }
3251 
3252                              /* deal with LOCUS and ACCESSION */
3253     if ((format == PRINTID_TEXTID_ACCESSION) || (format == PRINTID_TEXTID_LOCUS) ||
3254         (format == PRINTID_TEXTID_ACC_VER) || (format == PRINTID_TEXTID_ACC_ONLY))
3255     {
3256         if (format == PRINTID_TEXTID_ACCESSION) {
3257             format = PRINTID_TEXTID_ACC_ONLY;     /* current default */
3258         }
3259         switch (sip->choice)   /* get the real TextSeqId types */
3260         {
3261             case SEQID_GENBANK:
3262             case SEQID_EMBL:
3263             case SEQID_DDBJ:
3264             case SEQID_PIR:
3265             case SEQID_SWISSPROT:
3266             case SEQID_PRF:
3267             case SEQID_OTHER:
3268             case SEQID_TPG:
3269             case SEQID_TPE:
3270             case SEQID_TPD:
3271             case SEQID_GPIPE:
3272             case SEQID_NAMED_ANNOT_TRACK:
3273                 tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3274                 release = tsip->release;
3275                 if (sip->choice == SEQID_SWISSPROT) {
3276                   release = NULL;
3277                 }
3278                 if ((format == PRINTID_TEXTID_LOCUS) && (tsip->name != NULL)) {
3279                     label_len += StringLen (tsip->name) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3280                     return label_len;
3281                 } else if ((format == PRINTID_TEXTID_ACC_ONLY || format == PRINTID_TEXTID_LOCUS)
3282                     && (tsip->accession != NULL)) {
3283                     label_len += StringLen (tsip->accession) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3284                     return label_len;
3285                 } else if ((format == PRINTID_TEXTID_ACC_VER)
3286                     && (tsip->accession != NULL)) {
3287                     label_len += StringLen (tsip->accession) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3288                     if (tsip->version > 0 && release == NULL) {
3289                         sprintf(localbuf, ".%d", (int)(tsip->version));
3290                         label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3291                     }
3292                     return label_len;
3293                 }
3294                 break;
3295             default:
3296                 break;
3297         }
3298     }
3299 
3300     if (format == PRINTID_FASTA_SHORT)
3301     {
3302         if (sip->choice == SEQID_PATENT && is_us_pre_grant) {
3303             label_len += 4;
3304         } else if (sip->choice == SEQID_SWISSPROT) {
3305             tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3306             if (tsip->release && StringCmp(tsip->release, "unreviewed") == 0)
3307                 label_len += 3;
3308             else
3309                 label_len += StringLen (txtid[sip->choice]) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3310         } else {
3311             label_len += StringLen (txtid[sip->choice]) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3312         }
3313         label_len += StringLen (ldelim) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3314     }
3315 
3316     switch (sip->choice)
3317     {
3318         case SEQID_LOCAL:           /* object id */
3319             if ((((ObjectIdPtr)sip->data.ptrvalue)->str) == NULL)
3320             {
3321                 sprintf(localbuf, "%ld",
3322                             (long)((ObjectIdPtr)sip->data.ptrvalue)->id);
3323                 label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3324             }
3325             else
3326             {
3327                 label_len += StringLen (((ObjectIdPtr)sip->data.ptrvalue)->str) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3328             }
3329             break;
3330         case SEQID_GIBBSQ:
3331         case SEQID_GIBBMT:
3332             sprintf(localbuf, "%ld", (long)sip->data.intvalue);
3333             label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3334             break;
3335         case SEQID_GI:
3336             sprintf(localbuf, "%lld", (long long)sip->data.intvalue);
3337             label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3338             break;
3339         case SEQID_GIIM:
3340             sprintf(localbuf, "%ld", (long)((GiimPtr)sip->data.ptrvalue)->id);
3341             label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3342             break;
3343         case SEQID_GENBANK:
3344         case SEQID_EMBL:
3345         case SEQID_DDBJ:
3346         case SEQID_OTHER:
3347         case SEQID_TPG:
3348         case SEQID_TPE:
3349         case SEQID_TPD:
3350         case SEQID_GPIPE:
3351         case SEQID_NAMED_ANNOT_TRACK:
3352         case SEQID_SWISSPROT:
3353            tsip = (TextSeqIdPtr)(sip->data.ptrvalue);
3354             release = tsip->release;
3355             if (sip->choice == SEQID_SWISSPROT) {
3356               release = NULL;
3357             }
3358            if ((tsip->version > 0) && (release == NULL) && SHOWVERSION)
3359              version = tsip->version;  /* show versions */
3360            sprintf(versionbuf, ".%d", (int)version);
3361         case SEQID_PIR:
3362         case SEQID_PRF:
3363             tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3364             if (tsip->accession != NULL)
3365             {
3366                label_len += StringLen (tsip->accession) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3367                if (version)
3368                {
3369                    label_len += StringLen (versionbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3370                }
3371                if (format != PRINTID_FASTA_SHORT)
3372                  break;
3373             }
3374             if (format == PRINTID_FASTA_SHORT)
3375                 label_len += StringLen (ldelim) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3376             if (tsip->name != NULL)
3377                 label_len += StringLen (tsip->name) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3378             /*
3379             if (sip->choice == SEQID_OTHER) {
3380                 Nlm_LabelCopyNext(&tmp, ldelim, &buflen);
3381                 if (tsip->release != NULL)
3382                     Nlm_LabelCopyNext(&tmp, tsip->release, &buflen);
3383             }
3384             */
3385             break;
3386         case SEQID_PATENT:
3387             patsip = (PatentSeqIdPtr)(sip->data.ptrvalue);
3388             label_len += StringLen (patsip->cit->country);
3389             if (format == PRINTID_FASTA_SHORT)
3390                 label_len += StringLen (ldelim) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3391             if (is_us_pre_grant) {
3392                 label_len += StringLen (patsip->cit->app_number) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3393             } else {
3394                 label_len += StringLen (patsip->cit->number) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3395             }
3396             if (format == PRINTID_FASTA_SHORT)
3397                 label_len += StringLen (ldelim) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3398             else
3399                 label_len += 1;
3400             sprintf(localbuf, "%d", (int)patsip->seqid);
3401             label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3402             break;
3403         case SEQID_GENERAL:
3404             oip = ((DbtagPtr)sip->data.ptrvalue)->tag;
3405             if((format == PRINTID_FASTA_SHORT) || (format == PRINTID_REPORT))
3406                 label_len += StringLen (((DbtagPtr)sip->data.ptrvalue)->db) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3407             if (format == PRINTID_FASTA_SHORT)
3408                 label_len += StringLen (ldelim) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3409             else if (format == PRINTID_REPORT)
3410                 label_len += 2;
3411 
3412             if (oip->str == NULL)
3413             {
3414                 sprintf(localbuf, "%ld", (long) oip->id);
3415                 label_len += StringLen (localbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3416             }
3417             else
3418                 label_len += StringLen (oip->str) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3419             break;
3420         case SEQID_PDB:
3421             psip = (PDBSeqIdPtr) sip->data.ptrvalue;
3422             chainbuf[0] = TO_UPPER (psip->chain);
3423             chainbuf[1] = '\0';
3424             chainbuf[2] = '\0';
3425             if (IS_LOWER (psip->chain)) {
3426               chainbuf[1] = chainbuf [0];
3427             }
3428             label_len += StringLen (psip->mol) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3429             if (format == PRINTID_FASTA_SHORT)
3430             {
3431                 label_len += StringLen (ldelim);
3432                 if (chainbuf[0] == '|') /* special */
3433                     label_len += 3;
3434                 else if (chainbuf[0] != '\0')
3435                     label_len += StringLen (chainbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3436                 else
3437                     label_len += 2;
3438             }
3439             else if (psip->chain > ' ')
3440             {
3441                 label_len += 2;
3442                 label_len += StringLen (chainbuf) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3443             }
3444             break;
3445         default:
3446             label_len += StringLen (txtid[0]) + 1;  /* have to include 1 for extra terminator from Nlm_LabelCopyNext */
3447             break;
3448 
3449     }
3450     return label_len;
3451 }
3452 
3453 
SeqIdWholeLabel(SeqIdPtr isip,Uint1 format)3454 NLM_EXTERN CharPtr SeqIdWholeLabel (SeqIdPtr isip, Uint1 format)
3455 {
3456     CharPtr label = NULL;
3457     Int4 id_len;
3458 
3459     if (isip == NULL)
3460     {
3461         return NULL;
3462     }
3463 
3464     id_len = SeqIdLabelLen (isip, format) + 1;
3465     label = (CharPtr) MemNew (sizeof (Char) * id_len);
3466     SeqIdWrite (isip, label, format, id_len);
3467     return label;
3468 }
3469 
3470 
3471 /* The following function finds either an integer or a string id from
3472    SeqIdPtr */
3473 
GetAccessionFromSeqId(SeqIdPtr sip,BIG_ID_PNTR gi,CharPtr PNTR id)3474 Boolean GetAccessionFromSeqId(SeqIdPtr sip, BIG_ID_PNTR gi, CharPtr PNTR id)
3475 {
3476    return GetAccessionVersionFromSeqId(sip, gi, id, FALSE);
3477 }
3478 
3479 /* Maximal length of a version number in Accession.version identifiers */
3480 #define MAX_VERSION_LENGTH 10
3481 
GetAccessionVersionFromSeqId(SeqIdPtr sip,BIG_ID_PNTR gi,CharPtr PNTR id,Boolean get_version)3482 Boolean GetAccessionVersionFromSeqId(SeqIdPtr sip, BIG_ID_PNTR gi,
3483                                      CharPtr PNTR id, Boolean get_version)
3484 {
3485    Boolean numeric_id_type = FALSE;
3486    Int2 id_len;
3487    GiimPtr gip;
3488    ObjectIdPtr oip;
3489    TextSeqIdPtr textsip;
3490    DbtagPtr dbtag;
3491    PatentSeqIdPtr psip;
3492    PDBSeqIdPtr pdbsip;
3493 
3494    *id = NULL;
3495    *gi = 0;
3496 
3497    switch (sip->choice) {
3498    case SEQID_GI: case SEQID_GIBBSQ: case SEQID_GIBBMT:
3499       *gi = sip->data.intvalue;
3500       numeric_id_type = TRUE;
3501       break;
3502    case SEQID_GIIM:
3503       gip = (GiimPtr) sip->data.ptrvalue;
3504       *gi = gip->id;
3505       numeric_id_type = TRUE;
3506       break;
3507    case SEQID_LOCAL:
3508       oip = (ObjectIdPtr) sip->data.ptrvalue;
3509 
3510       if (oip->str) {
3511          id_len = StringLen(oip->str);
3512          *id = (CharPtr) MemNew(id_len+1);
3513          sprintf(*id, "%s", oip->str);
3514       } else {
3515          *gi = oip->id;
3516          numeric_id_type = TRUE;
3517       }
3518       break;
3519    case SEQID_GENBANK:
3520    case SEQID_EMBL:
3521    case SEQID_PIR:
3522    case SEQID_SWISSPROT:
3523    case SEQID_DDBJ:
3524    case SEQID_PRF:
3525    case SEQID_OTHER:
3526    case SEQID_TPG:
3527    case SEQID_TPE:
3528    case SEQID_TPD:
3529    case SEQID_GPIPE:
3530    case SEQID_NAMED_ANNOT_TRACK:
3531       textsip = (TextSeqIdPtr)sip->data.ptrvalue;
3532       if (textsip->accession) {
3533          if (get_version && textsip->version > 0) {
3534             /* Assume versions are no longer than MAX_VERSION_LENGTH digits */
3535             id_len = StringLen(textsip->accession) + MAX_VERSION_LENGTH + 1;
3536             *id = (CharPtr) MemNew(id_len+1);
3537             sprintf(*id, "%s.%ld", textsip->accession, (long) textsip->version);
3538          } else {
3539             id_len = StringLen(textsip->accession);
3540             *id = (CharPtr) MemNew(id_len+1);
3541             sprintf(*id, "%s", textsip->accession);
3542          }
3543       } else if (textsip->name) {
3544          id_len = StringLen(textsip->name);
3545          *id = (CharPtr) MemNew(id_len+1);
3546          sprintf(*id, "%s", textsip->name);
3547       }
3548       break;
3549    case SEQID_GENERAL:
3550       dbtag = (DbtagPtr) sip->data.ptrvalue;
3551       if (dbtag->tag->str == NULL) {
3552      numeric_id_type = TRUE;
3553      *gi = dbtag->tag->id;
3554       } else {
3555      id_len = StringLen(dbtag->tag->str);
3556      *id = (CharPtr) MemNew(id_len+1);
3557      sprintf(*id, "%s", dbtag->tag->str);
3558       }
3559       break;
3560    case SEQID_PATENT:
3561       psip = (PatentSeqIdPtr) sip->data.ptrvalue;
3562       *gi = (Int4) psip->seqid;
3563       numeric_id_type = TRUE;
3564       break;
3565    case SEQID_PDB:
3566       pdbsip = (PDBSeqIdPtr) sip->data.ptrvalue;
3567       id_len = StringLen(pdbsip->mol);
3568       *id = (CharPtr) MemNew(id_len+4);
3569       sprintf(*id, "%s", pdbsip->mol);
3570       break;
3571    default: break;
3572    }
3573 
3574    return numeric_id_type;
3575 }
3576 
3577 /*****************************************************************************
3578 *
3579 *   SeqIdPtr SeqIdParse(buf)
3580 *       parses a string containing SeqIds formated by SeqIdPrint using
3581 *       FASTA_LONG or FASTA_SHORT, separated by |
3582 *       returns a SeqId linked list for them
3583 *       or NULL on failure for any SeqId
3584 *
3585 *****************************************************************************/
3586 #define SEQID_PARSE_BUF_SIZE 200
SeqIdParse(CharPtr buf)3587 NLM_EXTERN SeqIdPtr SeqIdParse(CharPtr buf)
3588 {
3589     char localbuf[SEQID_PARSE_BUF_SIZE + 2];
3590     char * tmp, *strt, * tokens[6], *chain;
3591     char d;
3592     long long num;
3593     CharPtr tp;
3594     Int2 numtoken, i, type = 0, j, ctr=0, numdigits; /* ctr is number of OK ids done */
3595     SeqIdPtr sip = NULL, head = NULL, last = NULL, tmpsip;
3596     ObjectIdPtr oip;
3597     DbtagPtr dp;
3598     TextSeqIdPtr tsip;
3599     PatentSeqIdPtr patsip;
3600     IdPatPtr ipp;
3601     PDBSeqIdPtr psip;
3602     GiimPtr gim;
3603     Boolean done = FALSE, is_us_pre_grant = FALSE;
3604     static Uint1 expect_tokens[NUM_SEQID] = {  /* number of tokens to expect */
3605      0, /* 0 = not set */
3606     1, /* 1 = local Object-id */
3607     1,  /* 2 = gibbsq */
3608     1,  /* 3 = gibbmt */
3609     1, /* 4 = giim Giimport-id */
3610     2, /* 5 = genbank */
3611     2, /* 6 = embl */
3612     2, /* 7 = pir */
3613     2, /* 8 = swissprot */
3614     3,  /* 9 = patent */
3615     3, /* 10 = other TextSeqId */
3616     2, /* 11 = general Dbtag */
3617     1,  /* 12 = gi */
3618     2, /* 13 = ddbj */
3619     2, /* 14 = prf */
3620     2,  /* 15 = pdb */
3621     2,  /* 16 = tpg */
3622     2,  /* 17 = tpe */
3623     2,  /* 18 = tpd */
3624     2,  /* 19 = gpp */
3625     2,  /* 20 = nat */
3626     };
3627 
3628     if ((buf == NULL) || (*buf == '\0'))
3629         return NULL;
3630 
3631     d = *delim;   /* delimiter */
3632     while (! done)
3633     {
3634         Boolean sp_prelim = FALSE;  /* Used to set release field in Swissprot TextSeqId */
3635                         /* set all tokens pointing to \0 */
3636         localbuf[SEQID_PARSE_BUF_SIZE + 1] = '\0';
3637         for (i = 0; i < 6; i++)
3638             tokens[i] = &localbuf[SEQID_PARSE_BUF_SIZE + 1];
3639         tp = buf;        /* save start of string */
3640                         /* copy and tokenize - token\0token\0\n */
3641         for (tmp=localbuf, i=0; ((*buf != d) && (*buf != '\0') && (i < SEQID_PARSE_BUF_SIZE));
3642                 i++,buf++,tmp++)
3643             *tmp = *buf;
3644         if (*buf != d) goto erret;  /* didn't get delimiter */
3645         *tmp = '\0';
3646         tmp++;
3647         buf++;
3648         for (j = 0, type = 0; j < NUM_SEQID; j++)
3649         {
3650             if (! StringCmp(localbuf, txtid[j]))
3651             {
3652                 type = j;
3653                 break;
3654             }
3655         }
3656 
3657         /* oth now ref, but still want to parse old style */
3658         if ((! type) && (! StringCmp(localbuf, "oth"))) {
3659             type = SEQID_OTHER;
3660         }
3661 
3662         /* pgp is for  pre-grant patent publications */
3663         if ((! type) && (! StringCmp(localbuf, "pgp"))) {
3664             type = SEQID_PATENT;
3665             is_us_pre_grant = TRUE;
3666         }
3667 
3668         /* Trembl ID is really Swissprot with release field of TextSeqId set to "unreviewed" */
3669         if ((! type) && (! StringCmp(localbuf, "tr"))) {
3670             type = SEQID_SWISSPROT;
3671             sp_prelim = TRUE;
3672         }
3673 
3674         if (! type) goto erret;
3675 
3676                         /* copy and tokenize - token\0token\0\n */
3677         for (numtoken=0, strt=tmp;
3678             ((i < SEQID_PARSE_BUF_SIZE) && (numtoken < (Int2)(expect_tokens[type])) && (! done));
3679             i++,buf++,tmp++)
3680         {
3681             if ((*buf == d) || (*buf == '\0'))
3682             {
3683                 *tmp = '\0';
3684                 tokens[numtoken] = strt;
3685                 numtoken++;
3686                 if (*buf == '\0')
3687                 {
3688                     if (type == SEQID_OTHER && (numtoken == 2 || numtoken == 1))
3689                         done = TRUE;
3690                     else if ((type == SEQID_GENBANK || type == SEQID_EMBL ||
3691                             type == SEQID_DDBJ || type == SEQID_TPG ||
3692                             type == SEQID_TPE || type == SEQID_TPD ||
3693                             type == SEQID_GPIPE || type == SEQID_NAMED_ANNOT_TRACK) &&
3694                             numtoken == 1)
3695                         done = TRUE;
3696                     else if (numtoken < (Int2)(expect_tokens[type]))
3697                         goto erret;
3698                     else
3699                         done = TRUE;
3700                 }
3701                 strt = tmp+1;
3702             }
3703             else
3704                 *tmp = *buf;
3705         }
3706         if (i == SEQID_PARSE_BUF_SIZE) goto erret;
3707 
3708         sip = ValNodeNew(head);
3709         if (head == NULL) head = sip;
3710         sip->choice = (Uint1) type;
3711         switch (type)
3712         {
3713             case SEQID_LOCAL:           /* object id */
3714                 if (*tokens[0] == '\0') goto erret;
3715                 oip = ObjectIdNew();
3716                 sip->data.ptrvalue = oip;
3717                 for (tmp = tokens[0], numdigits = 0; *tmp != '\0'; tmp++, numdigits++)
3718                 {
3719                     if (! IS_DIGIT(*tmp))   /* string type */
3720                     {
3721                         oip->str = StringSave(tokens[0]);
3722                         break;
3723                     }
3724                 }
3725                 if (oip->str == NULL)
3726                 {
3727                     sscanf(tokens[0], "%lld", &num);
3728                     oip->id = (Int4)num;
3729                     if (*tokens[0] != '0' && (numdigits < 10 ||
3730                         (numdigits == 10 && StringCmp (tokens [0], "2147483647") <= 0))) {
3731                         sscanf(tokens[0], "%lld", &num);
3732                         oip->id = (Int4)num;
3733                     } else {
3734                         oip->str = StringSave(tokens[0]);
3735                     }
3736                 }
3737                 break;
3738             case SEQID_GIBBSQ:
3739             case SEQID_GIBBMT:
3740                 if (! IS_DIGIT(*tokens[0]))
3741                     goto erret;
3742                 sscanf(tokens[0], "%lld", &num);
3743                 sip->data.intvalue = (BIG_ID)num;
3744                 break;
3745             case SEQID_GI:
3746                 if (! IS_DIGIT(*tokens[0]))
3747                     goto erret;
3748                 sscanf(tokens[0], "%lld", &num);
3749                 sip->data.intvalue = (BIG_ID)num;
3750                 break;
3751             case SEQID_GIIM:
3752                 if (! IS_DIGIT(*tokens[0])) goto erret;
3753                 gim = GiimNew();
3754                 sip->data.ptrvalue = gim;
3755                 sscanf(tokens[0], "%lld", &num);
3756                 gim->id = (BIG_ID)num;
3757                 break;
3758             case SEQID_GENBANK:
3759             case SEQID_EMBL:
3760             case SEQID_PIR:
3761             case SEQID_SWISSPROT:
3762             case SEQID_DDBJ:
3763             case SEQID_PRF:
3764             case SEQID_OTHER:
3765             case SEQID_TPG:
3766             case SEQID_TPE:
3767             case SEQID_TPD:
3768             case SEQID_GPIPE:
3769             case SEQID_NAMED_ANNOT_TRACK:
3770                 if ((*tokens[0] == '\0') && (*tokens[1] == '\0'))
3771                     goto erret;
3772                 tsip = TextSeqIdNew();
3773                 sip->data.ptrvalue = tsip;
3774                 if (*tokens[0] != '\0')
3775                 {
3776                                         tmp = tokens[0]; /* check for version */
3777                                         while (*tmp != '\0')
3778                     {
3779                         if (*tmp == '.')
3780                         {
3781                                                    if (IS_DIGIT(*(tmp+1)))
3782                                                    {
3783                             *tmp = '\0';
3784                                                         sscanf((tmp+1),"%lld",&num);
3785                                                         tsip->version =(Int2)num;
3786                            }
3787                            else
3788                             tmp++;
3789                         }
3790                         else
3791                           tmp++;
3792                     }
3793                     tsip->accession = StringSave(tokens[0]);
3794                     *(tsip->accession) = TO_UPPER(*(tsip->accession));
3795                 }
3796                 if (*tokens[1] != '\0')
3797                 {
3798                     tsip->name = StringSave(tokens[1]);
3799                     if (type != SEQID_OTHER) {
3800                         tmp = tsip->name;
3801                         while (*tmp != '\0')
3802                         {
3803                             *tmp = TO_UPPER(*tmp);
3804                             tmp++;
3805                         }
3806                     }
3807                 }
3808                 if (type == SEQID_SWISSPROT)
3809                 {
3810                      if (sp_prelim)
3811                         tsip->release = StringSave("unreviewed");
3812                      else
3813                         tsip->release = StringSave("reviewed");
3814                 }
3815                 break;
3816             case SEQID_PATENT:
3817                 if ((*tokens[0] == '\0') || (*tokens[1] == '\0')) goto erret;
3818                 if (! IS_DIGIT(*tokens[2])) goto erret;
3819                 patsip = PatentSeqIdNew();
3820                 sip->data.ptrvalue = patsip;
3821                 ipp = IdPatNew();
3822                 patsip->cit = ipp;
3823                 ipp->country = StringSave(tokens[0]);
3824                 if (is_us_pre_grant) {
3825                     ipp->app_number = StringSave(tokens[1]);
3826                 } else {
3827                     ipp->number = StringSave(tokens[1]);
3828                 }
3829                 sscanf(tokens[2], "%lld", &num);
3830                 patsip->seqid = (Int2)num;
3831                 break;
3832             case SEQID_GENERAL:
3833                 if ((*tokens[0] == '\0') || (*tokens[1] == '\0')) goto erret;
3834                 dp = DbtagNew();
3835                 sip->data.ptrvalue = dp;
3836                 oip = ObjectIdNew();
3837                 dp->tag = oip;
3838                 dp->db = StringSave(tokens[0]);
3839                 for (tmp = tokens[1], numdigits = 0; *tmp != '\0'; tmp++, numdigits++)
3840                 {
3841                     if (! IS_DIGIT(*tmp))   /* string type */
3842                     {
3843                         oip->str = StringSave(tokens[1]);
3844                         break;
3845                     }
3846                 }
3847                 if (oip->str == NULL)
3848                 {
3849                     if (*tokens[1] != '0' && (numdigits < 10 ||
3850                         (numdigits == 10 && StringCmp (tokens [1], "2147483647") <= 0))) {
3851                         sscanf(tokens[1], "%lld", &num);
3852                         oip->id = (Int4)num;
3853                     } else {
3854                         oip->str = StringSave(tokens[1]);
3855                     }
3856                 }
3857                 break;
3858             case SEQID_PDB:
3859                 if (*tokens[0] == '\0') goto erret;
3860                 psip = PDBSeqIdNew();
3861                 sip->data.ptrvalue = psip;
3862                 psip->mol = StringSave(tokens[0]);
3863                 tmp = psip->mol;
3864                 while (*tmp != '\0')
3865                 {
3866                     *tmp = TO_UPPER(*tmp);
3867                     tmp++;
3868                 }
3869                 chain = tokens [1];
3870                 if ((! StringICmp(tokens[1], "VB")) ||
3871                                     *(buf-1) == d)
3872                     psip->chain = '|';
3873                 else if (! StringHasNoText (tokens[1]))
3874                     psip->chain = *tokens[1];
3875                 /* double letter for chain indicates lower case */
3876                 if (StringLen (chain) == 2 && TO_UPPER (chain [0]) == TO_UPPER (chain [1])) {
3877                     psip->chain = TO_LOWER(psip->chain);
3878                 } else {
3879                     psip->chain = TO_UPPER(psip->chain);
3880                 }
3881                 break;
3882         }
3883         last = sip;
3884         sip = NULL;
3885         ctr++;
3886     }
3887 ret:
3888     return head;
3889 erret:
3890     StringNCpy(localbuf, tp, SEQID_PARSE_BUF_SIZE);
3891     localbuf[SEQID_PARSE_BUF_SIZE] = '\0';
3892     ErrPostEx(SEV_INFO, 0,0, "SeqIdParse Failure at %s", localbuf);
3893     if (sip == head)
3894         head = NULL;
3895     else
3896     {
3897         if (last != NULL)
3898             last->next = NULL;
3899         if (! ctr)     /* no good SeqIds */
3900             head = SeqIdSetFree(head);
3901         else           /* at least one good SeqId.. keep it */
3902         {
3903             tmpsip = head;
3904             last = NULL;
3905             for (i = 0; i < ctr; i++)
3906             {
3907                 last = tmpsip;
3908                 tmpsip = tmpsip->next;
3909             }
3910             if (last != NULL)
3911                 last->next = NULL;
3912             SeqIdSetFree(tmpsip);
3913         }
3914     }
3915     ValNodeFree(sip);
3916     goto ret;
3917 }
3918 
3919 
3920 /*****************************************************************************
3921 *
3922 *   Boolean SeqIdMatch(a, b)
3923 *       returns TRUE if SeqIds could be compared and are the same
3924 *       returns FALSE both if SeqIds could not be compared OR if they were
3925 *                        compared but are different
3926 *
3927 *   WARNING!!!! use SeqIdComp() instead of SeqIdMatch() in most cases
3928 *
3929 *  The code here must work the same is in two idloader
3930 *  context: function id_flatten_seq_obj (idsybase.c)
3931 *  and proc id_id_flatten_seq_obj
3932 *
3933 *****************************************************************************/
SeqIdMatch(SeqIdPtr a,SeqIdPtr b)3934 NLM_EXTERN Boolean SeqIdMatch (SeqIdPtr a, SeqIdPtr b)
3935 {
3936     Uint1 retval;
3937 
3938     retval = SeqIdComp(a, b);
3939     if (retval == SIC_YES)
3940         return TRUE;
3941     else
3942         return FALSE;
3943 }
3944 
GetGiFromSeqIdGeneral(SeqIdPtr seq_id)3945 static Int8 GetGiFromSeqIdGeneral( SeqIdPtr seq_id)
3946 {
3947     if( seq_id->choice != SEQID_GENERAL) return 0;
3948     DbtagPtr db_tag = (DbtagPtr) seq_id->data.ptrvalue;
3949     if( StringICmp( db_tag->db, "GI")) return 0;
3950     ObjectIdPtr tag = db_tag->tag;
3951     if( (tag == NULL) || (tag->str == NULL)) return 0;
3952     return atol( tag->str);
3953 }
3954 
3955 /*****************************************************************************
3956 *
3957 *   SeqIdComp(a, b)
3958 *       Compares a to b and returns
3959 *
3960 *   SIC_DIFF   = different types, could not be compared
3961 *   SIC_NO     = types could be compared, and ids are different
3962 *   SIC_YES    = types could be compared, and ids are the same
3963 *
3964 *****************************************************************************/
SeqIdComp(SeqIdPtr a,SeqIdPtr b)3965 NLM_EXTERN Uint1 SeqIdComp (SeqIdPtr a, SeqIdPtr b)
3966 {
3967     Uint1 choice;
3968     TextSeqIdPtr at, bt;
3969 
3970     if ((a == NULL) || (b == NULL))
3971         return SIC_DIFF;
3972 
3973     choice = a->choice;
3974     if (choice != b->choice)
3975     {
3976         switch (choice)
3977         {
3978             case SEQID_GENBANK:          /* these could be confused */
3979             case SEQID_EMBL:
3980             case SEQID_DDBJ:
3981             case SEQID_TPG:
3982             case SEQID_TPE:
3983             case SEQID_TPD:
3984             case SEQID_GPIPE:
3985             case SEQID_NAMED_ANNOT_TRACK:
3986                 switch (b->choice)
3987                 {
3988                     case SEQID_GENBANK:   /* its ok */
3989                     case SEQID_EMBL:
3990                     case SEQID_DDBJ:
3991                     case SEQID_TPG:
3992                     case SEQID_TPE:
3993                     case SEQID_TPD:
3994                     case SEQID_GPIPE:
3995                     case SEQID_NAMED_ANNOT_TRACK:
3996                         break;
3997                     default:
3998                         return SIC_DIFF;
3999                 }
4000                 break;
4001             case SEQID_GI:
4002             {
4003                 Int8 gi = GetGiFromSeqIdGeneral( b);
4004                 if( a->data.intvalue == gi) return SIC_YES;
4005                 return SIC_DIFF;
4006             }
4007             case SEQID_GENERAL:
4008             {
4009                 if( b->choice != SEQID_GI) return SIC_DIFF;
4010                 Int8 gi = GetGiFromSeqIdGeneral( a);
4011                 if( b->data.intvalue == gi) return SIC_YES;
4012                 return SIC_DIFF;
4013             }
4014             default:
4015                 return SIC_DIFF;
4016         }
4017     }
4018 
4019     switch (choice)
4020     {
4021         case SEQID_NOT_SET:
4022             return SIC_DIFF;
4023         case SEQID_LOCAL:
4024             if (ObjectIdMatch((ObjectIdPtr)a->data.ptrvalue, (ObjectIdPtr)b->data.ptrvalue))
4025                 return SIC_YES;
4026             else
4027                 return SIC_NO;
4028         case SEQID_GIBBSQ:   /* gibbsq */
4029         case SEQID_GIBBMT:   /* gibbmt */
4030         case SEQID_GI:  /* gi */
4031             if (a->data.intvalue == b->data.intvalue)
4032                 return SIC_YES;
4033             else
4034                 return SIC_NO;
4035         case SEQID_GIIM:   /* giim */
4036             if (((GiimPtr)a->data.ptrvalue)->id == ((GiimPtr)b->data.ptrvalue)->id)
4037                 return SIC_YES;
4038             else
4039                 return SIC_NO;
4040         case SEQID_PATENT:   /* patent seq */
4041             if (((PatentSeqIdPtr)a->data.ptrvalue)->seqid !=
4042                 ((PatentSeqIdPtr)b->data.ptrvalue)->seqid)
4043                 return SIC_NO;
4044             if (IdPatMatch(((PatentSeqIdPtr)a->data.ptrvalue)->cit,
4045                 ((PatentSeqIdPtr)b->data.ptrvalue)->cit))
4046                 return SIC_YES;
4047             else
4048                 return SIC_NO;
4049         case SEQID_PDB:     /* pdb */
4050             if ( StringICmp(((PDBSeqIdPtr)a->data.ptrvalue)->mol,
4051                 ((PDBSeqIdPtr)b->data.ptrvalue)->mol))
4052                 return SIC_NO;
4053             /*
4054             if (TO_UPPER(((PDBSeqIdPtr)a->data.ptrvalue)->chain) !=
4055                 TO_UPPER(((PDBSeqIdPtr)b->data.ptrvalue)->chain))
4056                 return SIC_NO;
4057             */
4058             if (((PDBSeqIdPtr)a->data.ptrvalue)->chain !=
4059                 ((PDBSeqIdPtr)b->data.ptrvalue)->chain)
4060                 return SIC_NO;
4061             return SIC_YES;
4062         case SEQID_GENERAL:  /* general */
4063             if (DbtagMatch((DbtagPtr)a->data.ptrvalue,
4064                 (DbtagPtr)b->data.ptrvalue))
4065                 return SIC_YES;
4066             else if (StringICmp(((DbtagPtr)a->data.ptrvalue)->db,
4067                 ((DbtagPtr)b->data.ptrvalue)->db))
4068                 return SIC_DIFF; /* db strings do not match, okay */
4069             else
4070                 return SIC_NO;
4071 
4072         case SEQID_GENBANK:
4073         case SEQID_EMBL:
4074         case SEQID_DDBJ:
4075         case SEQID_PIR:
4076         case SEQID_SWISSPROT:
4077         case SEQID_PRF:
4078         case SEQID_OTHER:
4079         case SEQID_TPG:
4080         case SEQID_TPE:
4081         case SEQID_TPD:
4082         case SEQID_GPIPE:
4083         case SEQID_NAMED_ANNOT_TRACK:
4084 
4085             at = (TextSeqIdPtr)a->data.ptrvalue;
4086             bt = (TextSeqIdPtr)b->data.ptrvalue;
4087             if ((at->accession != NULL) && (bt->accession != NULL))
4088             {
4089                 if (! StringICmp(at->accession, bt->accession)) {
4090                     if (at->version > 0 &&
4091                         bt->version > 0 &&
4092                         at->version != bt->version) {
4093                         return SIC_NO;
4094                     }
4095                     return SIC_YES;
4096                 } else {
4097                     return SIC_NO;
4098                 }
4099             }
4100             else if ((at->name != NULL) && (bt->name != NULL))
4101             {
4102                 if (! StringICmp(at->name, bt->name)) {
4103                     if (at->version > 0 &&
4104                         bt->version > 0 &&
4105                         at->version != bt->version) {
4106                         return SIC_NO;
4107                     }
4108                     return SIC_YES;
4109                 } else {
4110                     return SIC_NO;
4111                 }
4112             }
4113             else
4114                 return SIC_DIFF;
4115         default:
4116             ErrPostEx(SEV_ERROR, 0,0, "SeqIdComp: unsupported type [%d]",
4117                 (int)choice);
4118             return SIC_DIFF;
4119      }
4120 }
4121 
4122 /*****************************************************************************
4123 *
4124 *   Boolean SeqIdIn(a, b)
4125 *     Looks for single SeqId, "a" in chain of SeqIds, "b"
4126 *
4127 *****************************************************************************/
SeqIdIn(SeqIdPtr a,SeqIdPtr b)4128 NLM_EXTERN Boolean SeqIdIn (SeqIdPtr a, SeqIdPtr b)
4129 
4130 {
4131     SeqIdPtr now;
4132     Uint1 retval;
4133 
4134     if (a == NULL)
4135         return FALSE;
4136 
4137     for (now =b; now != NULL; now = now -> next)
4138     {
4139         retval = SeqIdComp(a, now);
4140         switch (retval)
4141         {
4142             case SIC_YES:
4143                 return TRUE;
4144             case SIC_NO:
4145                 return FALSE;
4146         }
4147     }
4148     return FALSE;
4149 }
4150 
4151 /*****************************************************************************
4152 *
4153 *   SeqIdForSameBioseq(a,b)
4154 *
4155 *****************************************************************************/
SeqIdForSameBioseq(SeqIdPtr a,SeqIdPtr b)4156 NLM_EXTERN Boolean SeqIdForSameBioseq (SeqIdPtr a, SeqIdPtr b)
4157 
4158 {
4159     BioseqPtr bsp;
4160     Uint1 retval;
4161     Boolean res = FALSE;
4162     /*
4163     Boolean locked = FALSE;
4164     */
4165 
4166     if ((a == NULL) || (b == NULL)) return FALSE;
4167 
4168     retval = SeqIdComp(a,b);   /* if match, all set */
4169     switch (retval)
4170     {
4171         case SIC_YES:
4172             return TRUE;
4173         case SIC_NO:
4174             return FALSE;
4175     }
4176 
4177     bsp = BioseqFindCore(a);
4178     if (bsp == NULL)
4179     {
4180         return FALSE;
4181         /*
4182         bsp = BioseqLockById(a);
4183         if (bsp != NULL)
4184             locked = TRUE;
4185         else
4186             return res;
4187         */
4188     }
4189 
4190     res = SeqIdIn(b, bsp->id);
4191     /*
4192     if (locked)
4193         BioseqUnlock(bsp);
4194     */
4195 
4196     return res;
4197 }
4198 
4199 /*****************************************************************************
4200 *
4201 *   MakeNewProteinSeqId(SeqLocPtr slp, SeqIdPtr sip)
4202 *       Makes a new protein SeqId of attempting to keep it unique
4203 *       Trys to match it to the input seqid type
4204 *       slp is the location on the DNA of the coding region making the protein
4205 *       sip is the SeqId of the DNA coding for the protein
4206 *       if (sip != NULL) uses it for a "base" first
4207 *       else if (slp != NULL) uses a SeqId from it for a base
4208 *       else base is the string tmpseq
4209 *
4210 *       id is then base_X where X is a number assigned as a serial number
4211 *       the returned id is guaranteed to be unique among all Bioseqs currently
4212 *       loaded in memory.
4213 *
4214 *
4215 *****************************************************************************/
MakeNewProteinSeqIdExMT(SeqLocPtr slp,SeqIdPtr sip,CharPtr prefix,Int2Ptr ctrptr,Boolean is_MT_safe)4216 NLM_EXTERN SeqIdPtr LIBCALL MakeNewProteinSeqIdExMT (SeqLocPtr slp, SeqIdPtr sip, CharPtr prefix, Int2Ptr ctrptr, Boolean is_MT_safe)
4217 {
4218     Char buf[60];
4219     CharPtr tmp;
4220     Int2 ctr = 0;
4221     Int2 start = 1;
4222     SeqLocPtr tslp;
4223     ValNodePtr newid;
4224     ObjectIdPtr oid;
4225     ValNode vn;
4226     TextSeqId tsi;
4227     ValNodePtr altid;
4228     size_t len;
4229     static Uint4 counter;
4230     static TNlmMutex lock = NULL;
4231 
4232 
4233     if (lock == NULL) {
4234         NlmMutexInit(&lock);
4235     }
4236 
4237     /* create a possible GenBankStyle id as well */
4238     altid = &vn;
4239     vn.choice = SEQID_GENBANK;
4240     vn.next = NULL;
4241     vn.data.ptrvalue = &tsi;
4242     tsi.name = NULL;
4243     tsi.accession = NULL;
4244     tsi.version = INT2_MIN;
4245     tsi.release = NULL;
4246 
4247     if ((sip == NULL) && (slp != NULL)) {
4248         tslp = NULL;
4249         while ((tslp = SeqLocFindNext(slp, tslp)) != NULL) {
4250             sip = SeqLocId(tslp);
4251             if (sip != NULL)
4252                 break;
4253         }
4254     }
4255 
4256     if (sip != NULL) {
4257         SeqIdWrite(sip, buf, PRINTID_TEXTID_ACCESSION, 50);
4258         tmp = buf;
4259         while (*tmp != '\0')
4260             tmp++;
4261         if (*(tmp-1) == '>')
4262             tmp--;
4263         *tmp = '_';
4264         tmp++;
4265         *tmp = '\0';
4266     } else {
4267         len = StringLen (prefix);
4268         if (len > 0 && len < 52) {
4269             tmp = StringMove(buf, prefix);
4270         } else {
4271             tmp = StringMove(buf, "tmpseq_");
4272         }
4273     }
4274 
4275     newid = ValNodeNew(NULL);
4276     oid = ObjectIdNew();
4277     oid->str = buf;   /* allocate this later */
4278     newid->choice = SEQID_LOCAL;
4279     newid->data.ptrvalue = oid;
4280 
4281     tsi.name = buf;   /* check for alternative form */
4282 
4283     if (ctrptr != NULL) {
4284         start = *ctrptr;
4285     }
4286     if (start < 1) {
4287         start = 1;
4288     }
4289 
4290     /* Very dangerous way to create new id - don't use if you can */
4291 
4292     if (is_MT_safe == FALSE) {
4293       for (ctr = start; ctr < 32000; ctr++) {
4294         sprintf(tmp, "%d", (int)ctr);
4295         if ((BioseqFindCore(newid) == NULL) && (BioseqFindCore(altid) == NULL)) {
4296           oid->str = StringSave(buf);
4297           if (ctrptr != NULL) {
4298             *ctrptr = ctr + 1;
4299           }
4300           return newid;
4301         }
4302       }
4303     }
4304 
4305     NlmMutexLock(lock);
4306 
4307     sprintf(tmp, "%d", (int)counter);
4308     oid->str = StringSave(buf);
4309     if (ctrptr != NULL) {
4310         *ctrptr = ctr + 1;
4311     }
4312 
4313     counter++;
4314     NlmMutexUnlock(lock);
4315 
4316     return newid;
4317 }
4318 
MakeNewProteinSeqIdEx(SeqLocPtr slp,SeqIdPtr sip,CharPtr prefix,Int2Ptr ctrptr)4319 NLM_EXTERN SeqIdPtr LIBCALL MakeNewProteinSeqIdEx (SeqLocPtr slp, SeqIdPtr sip, CharPtr prefix, Int2Ptr ctrptr)
4320 {
4321     return MakeNewProteinSeqIdExMT (slp, sip, prefix, ctrptr, FALSE);
4322 }
4323 
MakeNewProteinSeqId(SeqLocPtr slp,SeqIdPtr sip)4324 NLM_EXTERN SeqIdPtr LIBCALL MakeNewProteinSeqId (SeqLocPtr slp, SeqIdPtr sip)
4325 {
4326     return MakeNewProteinSeqIdEx (slp, sip, NULL, NULL);
4327 }
4328 
UniqueLocalId(void)4329 NLM_EXTERN ObjectIdPtr UniqueLocalId(void)
4330 {
4331     static TNlmMutex lock = NULL;
4332     static long count = 0;
4333     ObjectIdPtr oip;
4334     long l;
4335     Char buf[128];
4336 
4337     if (lock == NULL) {
4338         NlmMutexInit(&lock);
4339     }
4340     NlmMutexLock(lock);
4341     l = count;
4342     if (++count < 0) {
4343         count = 0;
4344     }
4345     NlmMutexUnlock(lock);
4346     sprintf(buf, "lcl|unique%08ld", l);
4347     oip = ObjectIdNew();
4348     oip->str = StringSave(buf);
4349     return oip;
4350 }
4351 
4352 /*****************************************************************************
4353 *
4354 *   Traversal routine for SeqLocFindNext
4355 *
4356 *****************************************************************************/
SeqLocNext(SeqLocPtr seqlochead,SeqLocPtr currseqloc,Uint1 equiv_status,BoolPtr founditptr)4357 static SeqLocPtr SeqLocNext (SeqLocPtr seqlochead, SeqLocPtr currseqloc, Uint1 equiv_status, BoolPtr founditptr)
4358 
4359 {
4360     SeqLocPtr currloc, retval;
4361     Boolean equiv_is_one, foundit=FALSE;
4362 
4363     switch (equiv_status)
4364     {
4365         case EQUIV_IS_ONE:
4366             equiv_is_one = TRUE;
4367             break;
4368         case FIRST_EQUIV_IS_MANY:
4369             equiv_status = EQUIV_IS_ONE;
4370         case EQUIV_IS_MANY:
4371         default:
4372             equiv_is_one = FALSE;
4373             break;
4374     }
4375 
4376     while (seqlochead != NULL)
4377     {
4378         if (IS_one_loc(seqlochead, equiv_is_one))
4379         {
4380             if (currseqloc == NULL)
4381                 return seqlochead;
4382             else if (currseqloc == seqlochead)   /* found it */
4383             {
4384                 *founditptr = TRUE;
4385                 if (seqlochead -> next != NULL)
4386                 {
4387                     if (IS_one_loc(seqlochead->next, equiv_is_one))
4388                         return seqlochead->next;
4389                     else
4390                         return SeqLocNext(seqlochead->next, NULL, equiv_status, &foundit);
4391                 }
4392                 else
4393                 {
4394                     return NULL;
4395                 }
4396             }
4397         }
4398         else
4399         {
4400             currloc = (SeqLocPtr)seqlochead->data.ptrvalue;
4401             if (currloc != NULL)
4402             {
4403                 if ((retval = SeqLocNext(currloc, currseqloc, equiv_status, &foundit)) != NULL)
4404                     return retval;
4405                 else
4406                     if (foundit)
4407                         currseqloc = NULL;   /* no need to keep looking */
4408             }
4409         }
4410 
4411         seqlochead = seqlochead->next;
4412     }
4413     return NULL;
4414 }
4415 
4416 /*****************************************************************************
4417 *
4418 *   SeqLocFindNext(seqlochead, currseqloc)
4419 *       finds the next Seq-loc after currseqloc
4420 *       seqlochead is the first of a chain of Seq-locs
4421 *       treats SEQLOC_EQUIV as multiple seq-locs
4422 *
4423 *****************************************************************************/
SeqLocFindNext(SeqLocPtr seqlochead,SeqLocPtr currseqloc)4424 NLM_EXTERN SeqLocPtr SeqLocFindNext (SeqLocPtr seqlochead, SeqLocPtr currseqloc)
4425 {
4426     return SeqLocFindPart(seqlochead, currseqloc, EQUIV_IS_MANY);
4427 }
4428 
4429 /*****************************************************************************
4430 *
4431 *   SeqLocFindPart(seqlochead, currseqloc, equiv_status)
4432 *       finds the next Seq-loc after currseqloc
4433 *       seqlochead is the first of a chain of Seq-locs
4434 *       equiv_status defines how to treat SEQLOC_EQUIV
4435 *
4436 *****************************************************************************/
SeqLocFindPart(SeqLocPtr seqlochead,SeqLocPtr currseqloc,Uint1 equiv_status)4437 NLM_EXTERN SeqLocPtr SeqLocFindPart (SeqLocPtr seqlochead, SeqLocPtr currseqloc, Uint1 equiv_status)
4438 {
4439     SeqLocPtr tmp, oldnext;
4440     Boolean equiv_is_one, foundit=FALSE;
4441 
4442     if (seqlochead == NULL) return NULL;
4443 
4444     if (equiv_status == EQUIV_IS_ONE)
4445         equiv_is_one = TRUE;
4446     else
4447         equiv_is_one = FALSE;
4448 
4449     if (IS_one_loc(seqlochead, equiv_is_one))    /* not a chain */
4450     {
4451         if (currseqloc == NULL)       /* first call */
4452             return seqlochead;
4453         else if (currseqloc == seqlochead)     /* second call */
4454             return NULL;
4455         else                           /* oops */
4456             goto erret;
4457     }
4458 
4459     if (currseqloc != NULL)
4460     {
4461         if (! IS_one_loc(currseqloc, equiv_is_one)) /* oops */
4462             goto erret;
4463         tmp = currseqloc->next;
4464         if (tmp != NULL)
4465         {
4466             if (IS_one_loc(tmp, equiv_is_one))
4467                 return tmp;
4468         }
4469     }
4470 
4471     oldnext = seqlochead->next;       /* protect from accidental chains */
4472     seqlochead->next = NULL;
4473 
4474     tmp = SeqLocNext(seqlochead, currseqloc, equiv_status, &foundit);
4475 
4476     seqlochead->next = oldnext;
4477     return tmp;
4478 
4479 erret:
4480     ErrPostEx(SEV_ERROR,0,0, "Invalid arguments to SeqLocFindNext");
4481     return NULL;
4482 }
4483 
4484 /*****************************************************************************
4485 *
4486 *   IS_one_loc(anp, equiv_is_one)
4487 *       returns TRUE if is a sequence location which refers to one piece
4488 *       of sequence
4489 *       used for moving through complicated Seq-locs
4490 *      if equiv_is_one == TRUE, then considers a SEQ_LOC_EQUIV a single
4491 *        location. If FALSE, does not.
4492 *
4493 *****************************************************************************/
IS_one_loc(SeqLocPtr anp,Boolean equiv_is_one)4494 NLM_EXTERN Boolean IS_one_loc (SeqLocPtr anp, Boolean equiv_is_one)      /* a SeqLoc */
4495 
4496 {
4497     Boolean retval = FALSE;
4498 
4499     if (anp == NULL) return FALSE;
4500 
4501     switch (anp->choice)
4502     {
4503         case SEQLOC_NULL:      /* null - not a valid single region */
4504         case SEQLOC_EMPTY:      /* empty */
4505         case SEQLOC_WHOLE:      /* whole */
4506         case SEQLOC_INT:      /* int */
4507         case SEQLOC_PNT:      /* pnt */
4508         case SEQLOC_PACKED_PNT:      /* packed-pnt   */
4509         case SEQLOC_BOND:      /* bond */
4510             retval = TRUE;
4511             break;
4512 
4513         case SEQLOC_EQUIV:     /* equiv */
4514             retval = equiv_is_one;
4515             break;
4516 
4517         case SEQLOC_PACKED_INT:      /* packed seqint */
4518         case SEQLOC_MIX:      /* mix */
4519         case SEQLOC_FEAT:
4520             retval = FALSE;
4521             break;
4522 
4523         default:
4524             ErrPostEx(SEV_ERROR,0,0, "IS_one_seq: unsupported seqloc [%d]",
4525                 (int)(anp->choice));
4526             retval = TRUE;
4527             break;
4528     }
4529     return retval;
4530 }
4531 /*****************************************************************************
4532 *
4533 *   SeqLocId(loc)
4534 *
4535 *****************************************************************************/
SeqLocId(SeqLocPtr anp)4536 NLM_EXTERN SeqIdPtr SeqLocId (SeqLocPtr anp)
4537 
4538 {
4539     SeqIdPtr seqid = NULL, currseqid = NULL;
4540     SeqLocPtr loc;
4541 
4542     if (anp == NULL) return NULL;
4543 
4544     switch (anp->choice)
4545     {
4546         case SEQLOC_NULL:    /* NULL */
4547         case SEQLOC_FEAT:   /* feat -- can't track yet */
4548             break;
4549         case SEQLOC_BOND:   /* bond -- 2 seqs */
4550             if (((SeqBondPtr)(anp->data.ptrvalue))->a != NULL)
4551                 seqid = ((SeqBondPtr)(anp->data.ptrvalue))->a->id;
4552             break;
4553         case SEQLOC_EMPTY:    /* empty */
4554         case SEQLOC_WHOLE:    /* whole */
4555             seqid = (SeqIdPtr)anp->data.ptrvalue;
4556             break;
4557         case SEQLOC_INT:    /* int */
4558             seqid = ((SeqIntPtr)anp->data.ptrvalue)->id;
4559             break;
4560         case SEQLOC_PACKED_INT:    /* packed int */
4561         case SEQLOC_MIX:    /* mix -- could be more than one seq */
4562         case SEQLOC_EQUIV:    /* equiv -- ditto */
4563             loc = (SeqLocPtr)anp->data.ptrvalue;
4564             while (loc != NULL)
4565             {
4566                 if (loc->choice == SEQLOC_NULL) {
4567                     loc = loc->next;
4568                     continue;
4569                 }
4570                 currseqid =    SeqLocId(loc);
4571                 if (seqid == NULL)
4572                     seqid = currseqid;
4573                 else
4574                 {
4575                     if (! SeqIdMatch(seqid, currseqid))
4576                     {
4577                         seqid = NULL;
4578                         loc = NULL;
4579                         break;
4580                     }
4581                 }
4582                 loc = loc->next;
4583             }
4584             break;
4585         case SEQLOC_PNT:    /* pnt */
4586             seqid = ((SeqPntPtr)anp->data.ptrvalue)->id;
4587             break;
4588         case SEQLOC_PACKED_PNT:    /* packed pnt */
4589             seqid = ((PackSeqPntPtr)anp->data.ptrvalue)->id;
4590             break;
4591         default:
4592             break;
4593     }
4594     return seqid;
4595 }
4596 
4597 /*****************************************************************************
4598 *
4599 *   SeqLocStart(loc)
4600 *       returns lowest number position for Seq-loc all on one bioseq
4601 *       returns -1 if impossible to meet that condition
4602 *
4603 *****************************************************************************/
SeqLocStart(SeqLocPtr anp)4604 NLM_EXTERN Int4 SeqLocStart (SeqLocPtr anp)   /* seqloc */
4605 
4606 {
4607     Int4 pos = -1L, tpos, numpnt;
4608     SeqIdPtr  sip;
4609     SeqLocPtr slp;
4610     SeqIntPtr sintp;
4611 
4612     if (anp == NULL)
4613         return pos;
4614 
4615     switch (anp->choice)
4616     {
4617         case SEQLOC_BOND:   /* bond -- 2 seqs */
4618             if (((SeqBondPtr)(anp->data.ptrvalue))->a != NULL)
4619                 pos =  ((SeqBondPtr)(anp->data.ptrvalue))->a->point;
4620             break;
4621         case SEQLOC_FEAT:   /* feat -- can't track yet */
4622         case SEQLOC_NULL:    /* NULL */
4623         case SEQLOC_EMPTY:    /* empty */
4624             break;
4625         case SEQLOC_WHOLE:    /* whole */
4626             pos = 0L;
4627             break;
4628         case SEQLOC_MIX:    /* mix -- more than one seq */
4629         case SEQLOC_EQUIV:    /* equiv -- ditto */
4630         case SEQLOC_PACKED_INT:    /* packed int */
4631             sip = SeqLocId(anp);
4632             if (sip != NULL)      /* all on one Bioseq */
4633             {
4634                 slp = (SeqLocPtr)anp->data.ptrvalue;
4635                 while (slp != NULL)
4636                 {
4637                     tpos = SeqLocStart(slp);
4638                     if (pos < 0)
4639                         pos = tpos;
4640                     else if (tpos < pos)
4641                         pos = tpos;
4642                     slp = slp->next;
4643                 }
4644             }
4645             break;
4646         case SEQLOC_INT:    /* int */
4647             sintp = (SeqIntPtr) anp->data.ptrvalue;
4648             pos = sintp->from;
4649             break;
4650         case SEQLOC_PNT:    /* pnt */
4651             pos = ((SeqPntPtr)anp->data.ptrvalue)->point;
4652             break;
4653         case SEQLOC_PACKED_PNT:    /* packed pnt */
4654             numpnt = PackSeqPntNum((PackSeqPntPtr)anp->data.ptrvalue);
4655             while (numpnt)
4656             {
4657                 numpnt--;
4658                 tpos = PackSeqPntGet((PackSeqPntPtr)anp->data.ptrvalue, numpnt);
4659                 if (pos < 0)
4660                     pos = tpos;
4661                 else if (tpos < pos)
4662                     pos = tpos;
4663             }
4664             break;
4665         default:
4666             break;
4667     }
4668     return pos;
4669 }
4670 
4671 /*****************************************************************************
4672 *
4673 *   SeqLocStop(loc)
4674 *       looks for highest position number on loc if on one Bioseq
4675 *       if fails, returns -1
4676 *
4677 *****************************************************************************/
SeqLocStop(SeqLocPtr anp)4678 NLM_EXTERN Int4 SeqLocStop (SeqLocPtr anp)   /* seqloc */
4679 
4680 {
4681     BioseqPtr bsp;
4682     Int4 pos = -1L, tpos, numpnt;
4683     SeqIdPtr sip;
4684     SeqLocPtr slp;
4685     Boolean locked = FALSE;
4686 
4687 
4688     if (anp == NULL)
4689         return pos;
4690 
4691     switch (anp->choice)
4692     {
4693         case SEQLOC_BOND:   /* bond -- 2 seqs */
4694             if (((SeqBondPtr)(anp->data.ptrvalue))->b != NULL)
4695                 pos =  ((SeqBondPtr)(anp->data.ptrvalue))->b->point;
4696             else if (((SeqBondPtr)(anp->data.ptrvalue))->a != NULL)
4697                 pos =  ((SeqBondPtr)(anp->data.ptrvalue))->a->point;
4698             break;
4699         case SEQLOC_FEAT:   /* feat -- can't track yet */
4700         case SEQLOC_NULL:    /* NULL */
4701         case SEQLOC_EMPTY:    /* empty */
4702             break;
4703         case SEQLOC_WHOLE:    /* whole */
4704             bsp = BioseqFindCore((SeqIdPtr)anp->data.ptrvalue);
4705                 if (bsp == NULL)
4706                 {
4707                     bsp = BioseqLockById((SeqIdPtr)anp->data.ptrvalue);
4708                     if (bsp != NULL)
4709                         locked = TRUE;
4710                 }
4711             pos = BioseqGetLen(bsp) - 1;
4712                 if (locked)
4713                     BioseqUnlock(bsp);
4714             break;
4715         case SEQLOC_MIX:    /* mix -- more than one seq */
4716         case SEQLOC_EQUIV:    /* equiv -- ditto */
4717         case SEQLOC_PACKED_INT:    /* packed int */
4718             sip = SeqLocId(anp);
4719             if (sip != NULL)      /* all on one Bioseq */
4720             {
4721                 slp = (SeqLocPtr)anp->data.ptrvalue;
4722                 while (slp != NULL)
4723                 {
4724                     tpos = SeqLocStop(slp);
4725                     if (pos < 0)
4726                         pos = tpos;
4727                     else if (tpos > pos)
4728                         pos = tpos;
4729                     slp = slp->next;
4730                 }
4731             }
4732             break;
4733         case SEQLOC_INT:    /* int */
4734             pos = ((SeqIntPtr)anp->data.ptrvalue)->to;
4735             break;
4736         case SEQLOC_PNT:    /* pnt */
4737             pos = ((SeqPntPtr)anp->data.ptrvalue)->point;
4738             break;
4739         case SEQLOC_PACKED_PNT:    /* packed pnt */
4740             numpnt = PackSeqPntNum((PackSeqPntPtr)anp->data.ptrvalue);
4741             while (numpnt)
4742             {
4743                 numpnt--;
4744                 tpos = PackSeqPntGet((PackSeqPntPtr)anp->data.ptrvalue, numpnt);
4745                 if (pos < 0)
4746                     pos = tpos;
4747                 else if (tpos > pos)
4748                     pos = tpos;
4749             }
4750             break;
4751         default:
4752             break;
4753     }
4754     return pos;
4755 }
4756 
4757 /*****************************************************************************
4758 *
4759 *   SeqLocStrand(loc)
4760 *       see objloc.h for strand value defines
4761 *       returns Seq_strand_other when series of locs on different strands
4762 *
4763 *****************************************************************************/
SeqLocStrand(SeqLocPtr anp)4764 NLM_EXTERN Uint1 SeqLocStrand (SeqLocPtr anp)   /* seqloc */
4765 
4766 {
4767     SeqIdPtr sip;
4768     SeqLocPtr slp;
4769     Uint1 strand = Seq_strand_unknown, tstrand;
4770 
4771     if (anp == NULL)
4772         return strand;
4773 
4774     switch (anp->choice)
4775     {
4776         case SEQLOC_BOND:   /* bond -- 2 seqs */
4777             if (((SeqBondPtr)(anp->data.ptrvalue))->a != NULL)
4778                 strand =  ((SeqBondPtr)(anp->data.ptrvalue))->a->strand;
4779             break;
4780         case SEQLOC_FEAT:   /* feat -- can't track yet */
4781         case SEQLOC_NULL:    /* NULL */
4782         case SEQLOC_EMPTY:    /* empty */
4783             break;
4784         case SEQLOC_WHOLE:    /* whole */
4785             strand = Seq_strand_both;
4786             break;
4787         case SEQLOC_MIX:    /* mix -- more than one seq */
4788         case SEQLOC_EQUIV:    /* equiv -- ditto */
4789         case SEQLOC_PACKED_INT:    /* packed int */
4790             sip = SeqLocId(anp);
4791             if (sip != NULL)      /* all on one Bioseq */
4792             {
4793                 for (slp = (SeqLocPtr)anp->data.ptrvalue,
4794                         strand = SeqLocStrand(slp), slp = slp -> next;
4795                         slp != NULL ; slp = slp->next)
4796                 {
4797                     if (slp->choice == SEQLOC_NULL || slp->choice == SEQLOC_EMPTY) continue;
4798                     tstrand = SeqLocStrand(slp);
4799                     if (strand == Seq_strand_unknown && tstrand == Seq_strand_plus) {
4800                         strand = Seq_strand_plus;
4801                     }
4802                     if (strand == Seq_strand_plus && tstrand == Seq_strand_unknown) {
4803                         tstrand = Seq_strand_plus;
4804                     }
4805                     if (strand != tstrand)
4806                     {
4807                         strand = Seq_strand_other;
4808                         break;
4809                     }
4810                 }
4811             }
4812             break;
4813         case SEQLOC_INT:    /* int */
4814             strand = ((SeqIntPtr)anp->data.ptrvalue)->strand;
4815             break;
4816         case SEQLOC_PNT:    /* pnt */
4817             strand = ((SeqPntPtr)anp->data.ptrvalue)->strand;
4818             break;
4819         case SEQLOC_PACKED_PNT:    /* packed pnt */
4820             strand = ((PackSeqPntPtr)anp->data.ptrvalue)->strand;
4821             break;
4822         default:
4823             break;
4824     }
4825     return strand;
4826 }
4827 
4828 /*****************************************************************************
4829 *
4830 *   Int4 SeqLocGetSegLens (slp, lens, ctr, gaps)
4831 *       returns total number of segments in SeqLoc including NULLS
4832 *       returns -1 for error
4833 *       if lens != NULL fills with lengths of segments, 0 = NULL
4834 *
4835 *****************************************************************************/
SeqLocGetSegLens(SeqLocPtr slp,Int4Ptr lens,Int4 ctr,Boolean gaps)4836 NLM_EXTERN Int4 SeqLocGetSegLens (SeqLocPtr slp, Int4Ptr lens, Int4 ctr, Boolean gaps)
4837 {
4838     SeqLocPtr slp2;
4839     BioseqPtr bsp;
4840     Boolean locked = FALSE;
4841 
4842     if (slp == NULL)
4843         return -1;
4844 
4845     switch (slp->choice)
4846     {
4847         case SEQLOC_BOND:   /* bond -- 2 seqs */
4848         case SEQLOC_FEAT:   /* feat -- can't track yet */
4849             break;
4850         case SEQLOC_NULL:    /* NULL */
4851         case SEQLOC_EMPTY:    /* empty */
4852             if (lens != NULL)
4853                 lens[ctr] = 0;
4854             ctr++;
4855             break;
4856         case SEQLOC_WHOLE:    /* whole */
4857             if (gaps)
4858                 break;
4859             if (lens != NULL)
4860             {
4861                 bsp = BioseqFindCore((SeqIdPtr)slp->data.ptrvalue);
4862                     if (bsp == NULL)
4863                     {
4864                         bsp = BioseqLockById((SeqIdPtr)slp->data.ptrvalue);
4865                         if (bsp != NULL)
4866                             locked = TRUE;
4867                     }
4868                 lens[ctr] = BioseqGetLen(bsp);
4869                   if (locked)
4870                         BioseqUnlock(bsp);
4871             }
4872             ctr++;
4873             break;
4874         case SEQLOC_MIX:    /* mix -- more than one seq */
4875         case SEQLOC_EQUIV:    /* equiv -- ditto */
4876         case SEQLOC_PACKED_INT:    /* packed int */
4877             slp2 = (SeqLocPtr)slp->data.ptrvalue;
4878             while (slp2 != NULL)
4879             {
4880                 ctr = SeqLocGetSegLens(slp2, lens, ctr, gaps);
4881                 slp2 = slp2->next;
4882             }
4883             break;
4884         case SEQLOC_INT:    /* int */
4885             if (gaps) break;
4886             if (lens != NULL)
4887                 lens[ctr] = ((SeqIntPtr)slp->data.ptrvalue)->to - ((SeqIntPtr)slp->data.ptrvalue)->from + 1;
4888             ctr++;
4889             break;
4890         case SEQLOC_PNT:    /* pnt */
4891             if (gaps) break;
4892             if (lens != NULL)
4893                 lens[ctr] = 1;
4894             ctr++;
4895             break;
4896         case SEQLOC_PACKED_PNT:    /* packed pnt */
4897             if (gaps) break;
4898             if (lens != NULL)
4899                 lens[ctr] = SeqLocStop(slp) - SeqLocStart(slp) + 1;
4900             ctr++;
4901             break;
4902         default:
4903             break;
4904     }
4905     return ctr;
4906 }
4907 
4908 /*****************************************************************************
4909 *
4910 *   SeqLocLen(loc)
4911 *       returns total length in residues of loc
4912 *       if fails, returns -1
4913 *
4914 *****************************************************************************/
SeqLocLen(SeqLocPtr anp)4915 NLM_EXTERN Int4 SeqLocLen (SeqLocPtr anp)   /* seqloc */
4916 
4917 {
4918     BioseqPtr bsp;
4919     Int4 len = -1L, tmp;
4920     SeqLocPtr slp;
4921     Boolean locked = FALSE;
4922     ErrSev logsev;
4923     Boolean average = FALSE;
4924     Int2 num;
4925     SeqIdPtr sip;
4926     BIG_ID gi;
4927     SeqMgrPtr smp;
4928     SeqLenLookupFunc func;
4929 
4930 
4931     if (anp == NULL)
4932         return len;
4933 
4934     switch (anp->choice)
4935     {
4936         case SEQLOC_BOND:   /* bond -- 2 seqs */
4937         case SEQLOC_FEAT:   /* feat -- can't track yet */
4938             break;
4939         case SEQLOC_NULL:    /* NULL */
4940         case SEQLOC_EMPTY:    /* empty */
4941             len = 0;
4942             break;
4943         case SEQLOC_WHOLE:    /* whole */
4944             sip = (SeqIdPtr) anp->data.ptrvalue;
4945             bsp = BioseqFindCore(sip);
4946             if (bsp == NULL) {
4947                 if (sip != NULL && sip->choice == SEQID_GI) {
4948                     gi = (BIG_ID) sip->data.intvalue;
4949                     /* try registered service for rapid length lookup */
4950                     smp = SeqMgrWriteLock ();
4951                     if (smp != NULL) {
4952                         func = smp->seq_len_lookup_func;
4953                         SeqMgrUnlock ();
4954                         if (func != NULL) {
4955                             len = (*func) (gi);
4956                             if (len > 0) break;
4957                         }
4958                     }
4959                 }
4960                 logsev = ErrSetLogLevel (SEV_MAX);
4961                 bsp = BioseqLockById(sip);
4962                 ErrSetLogLevel (logsev);
4963                 if (bsp != NULL)
4964                     locked = TRUE;
4965             }
4966             len = BioseqGetLen(bsp);
4967             if (locked)
4968                 BioseqUnlock(bsp);
4969             break;
4970         case SEQLOC_EQUIV:    /* equiv -- ditto */
4971             average = TRUE;
4972         case SEQLOC_MIX:    /* mix -- more than one seq */
4973         case SEQLOC_PACKED_INT:    /* packed int */
4974             slp = (SeqLocPtr)anp->data.ptrvalue;
4975             len = 0;
4976             num = 0;
4977             while (slp != NULL)
4978             {
4979                 tmp = SeqLocLen(slp);
4980                 if (tmp == -1)
4981                     return -1;
4982                 len += tmp;
4983                 num++;
4984                 slp = slp->next;
4985             }
4986             if (average && num != 0) {
4987                 len /= num;
4988             }
4989             break;
4990         case SEQLOC_INT:    /* int */
4991             len = ((SeqIntPtr)anp->data.ptrvalue)->to - ((SeqIntPtr)anp->data.ptrvalue)->from + 1;
4992             break;
4993         case SEQLOC_PNT:    /* pnt */
4994             len = 1;
4995             break;
4996         case SEQLOC_PACKED_PNT:    /* packed pnt */
4997             len = SeqLocStop(anp) - SeqLocStart(anp) + 1;
4998             break;
4999         default:
5000             break;
5001     }
5002     return len;
5003 }
5004 
5005 /*****************************************************************************
5006 *
5007 *   SeqLocRevCmp(loc)
5008 *       reverse complements a SeqLoc
5009 *       NO Check to be sure its on a nucleic acid
5010 *
5011 *****************************************************************************/
SeqLocRevCmp(SeqLocPtr anp)5012 NLM_EXTERN Boolean SeqLocRevCmp (SeqLocPtr anp)   /* seqloc */
5013 
5014 {
5015     SeqLocPtr slp, first, curr, prev;
5016     SeqPntPtr spp;
5017 
5018 
5019     if (anp == NULL)
5020         return FALSE;
5021 
5022     switch (anp->choice)
5023     {
5024         case SEQLOC_BOND:   /* bond -- 2 seqs */
5025             spp = ((SeqBondPtr)anp->data.ptrvalue)->a;
5026             spp->strand = StrandCmp(spp->strand);
5027             spp = ((SeqBondPtr)anp->data.ptrvalue)->b;
5028             if (spp != NULL)
5029                 spp->strand = StrandCmp(spp->strand);
5030             break;
5031         case SEQLOC_FEAT:   /* feat -- can't track yet */
5032         case SEQLOC_NULL:    /* NULL */
5033         case SEQLOC_EMPTY:    /* empty */
5034         case SEQLOC_WHOLE:    /* whole */
5035             break;
5036         case SEQLOC_MIX:    /* mix -- more than one seq */
5037         case SEQLOC_EQUIV:    /* equiv -- ditto */
5038         case SEQLOC_PACKED_INT:    /* packed int */
5039             slp = (SeqLocPtr)anp->data.ptrvalue;
5040             while (slp != NULL)
5041             {
5042                 SeqLocRevCmp(slp);     /* RevCmp subparts */
5043                 slp = slp->next;
5044             }
5045             first = NULL;
5046             curr = NULL;
5047             prev = (SeqLocPtr)anp->data.ptrvalue;
5048             while (prev != NULL)  /* reverse order of parts */
5049             {                      /* no effect on meaning of SEQLOC_EQUIV */
5050                 slp = (SeqLocPtr)anp->data.ptrvalue;
5051                 prev = NULL;
5052                 while (slp->next != NULL)
5053                 {
5054                     prev = slp;
5055                     slp = slp->next;
5056                 }
5057                 if (prev != NULL)
5058                        prev->next = NULL;
5059                 if (first == NULL)
5060                     first = slp;
5061                 else
5062                     curr->next = slp;
5063                 slp->next = NULL;
5064                 curr = slp;
5065             }
5066             anp->data.ptrvalue = first;
5067             break;
5068         case SEQLOC_INT:    /* int */
5069             ((SeqIntPtr)anp->data.ptrvalue)->strand = StrandCmp(((SeqIntPtr)anp->data.ptrvalue)->strand);
5070             break;
5071         case SEQLOC_PNT:    /* pnt */
5072             ((SeqPntPtr)anp->data.ptrvalue)->strand = StrandCmp(((SeqPntPtr)anp->data.ptrvalue)->strand);
5073             break;
5074         case SEQLOC_PACKED_PNT:    /* packed pnt */
5075             ((PackSeqPntPtr)anp->data.ptrvalue)->strand = StrandCmp(((PackSeqPntPtr)anp->data.ptrvalue)->strand);
5076             break;
5077         default:
5078             return FALSE;
5079     }
5080     return TRUE;
5081 }
5082 
5083 /*****************************************************************************
5084 *
5085 *   Uint1 StrandCmp(strand)
5086 *       returns the complement of a Strand
5087 *
5088 *****************************************************************************/
StrandCmp(Uint1 strand)5089 NLM_EXTERN Uint1 StrandCmp (Uint1 strand)
5090 
5091 {
5092     switch(strand)
5093     {
5094         case Seq_strand_unknown:     /* default to plus for this */
5095         case Seq_strand_plus:
5096             return (Uint1) Seq_strand_minus;
5097         case Seq_strand_minus:
5098             return (Uint1) Seq_strand_plus;
5099         case Seq_strand_both:
5100             return (Uint1) Seq_strand_both_rev;
5101         case Seq_strand_both_rev:
5102             return (Uint1) Seq_strand_both;
5103     }
5104     return strand;
5105 }
5106 
5107 
DoStrandsMatch(Uint1 strand1,Uint2 strand2)5108 static Boolean DoStrandsMatch(Uint1 strand1, Uint2 strand2)
5109 {
5110   if (strand1 == Seq_strand_minus && strand2 == Seq_strand_minus) {
5111     return TRUE;
5112   } else if (strand1 != Seq_strand_minus && strand2 != Seq_strand_minus) {
5113     return TRUE;
5114   } else {
5115     return FALSE;
5116   }
5117 }
5118 
5119 
SeqLocMixFromPackedSeqPnt(PackSeqPntPtr pspp)5120 static SeqLocPtr SeqLocMixFromPackedSeqPnt (PackSeqPntPtr pspp)
5121 {
5122     SeqPntPtr pnt;
5123     SeqLocPtr list = NULL, slp = NULL;
5124     Uint1     i;
5125 
5126     if (pspp == NULL)
5127     {
5128         return NULL;
5129     }
5130 
5131     while (pspp != NULL)
5132     {
5133         for (i = 0; i < pspp->used; i++)
5134         {
5135             pnt = SeqPntNew();
5136             pnt->id = SeqIdDup (pspp->id);
5137             pnt->strand = pspp->strand;
5138             pnt->point = pspp->pnts[i];
5139             ValNodeAddPointer (&list, SEQLOC_PNT, pnt);
5140         }
5141         pspp = pspp->next;
5142     }
5143     slp = ValNodeNew (NULL);
5144     slp->choice = SEQLOC_MIX;
5145     slp->data.ptrvalue = list;
5146     return slp;
5147 }
5148 
5149 
SeqLocMixFromSeqBond(SeqBondPtr sbp)5150 static SeqLocPtr SeqLocMixFromSeqBond (SeqBondPtr sbp)
5151 {
5152   SeqPntPtr pnt;
5153   SeqLocPtr list = NULL, slp = NULL;
5154 
5155   if (sbp == NULL || (sbp->a == NULL && sbp->b == NULL)) {
5156     return NULL;
5157   }
5158   if (sbp->a != NULL) {
5159     pnt = AsnIoMemCopy (sbp->a, (AsnReadFunc) SeqPntAsnRead, (AsnWriteFunc) SeqPntAsnWrite);
5160     ValNodeAddPointer (&list, SEQLOC_PNT, pnt);
5161   }
5162   if (sbp->b != NULL) {
5163     pnt = AsnIoMemCopy (sbp->b, (AsnReadFunc) SeqPntAsnRead, (AsnWriteFunc) SeqPntAsnWrite);
5164     ValNodeAddPointer (&list, SEQLOC_PNT, pnt);
5165   }
5166   slp = ValNodeNew (NULL);
5167   slp->choice = SEQLOC_MIX;
5168   slp->data.ptrvalue = list;
5169   return slp;
5170 }
5171 
5172 static
CreateSortedSeqLoc_comparator(VoidPtr ptr1,VoidPtr ptr2)5173 int LIBCALLBACK CreateSortedSeqLoc_comparator (VoidPtr ptr1, VoidPtr ptr2)
5174 {
5175     SeqLocPtr loc_piece1 = *(SeqLocPtr PNTR)ptr1;
5176     SeqLocPtr loc_piece2 = *(SeqLocPtr PNTR)ptr2;
5177     SeqIdPtr sip1;
5178     SeqIdPtr sip2;
5179     Char sip_name1[50];
5180     Char sip_name2[50];
5181     Int4 sip_name_comp;
5182     Int4 start1;
5183     Int4 start2;
5184     Int4 end1;
5185     Int4 end2;
5186 
5187     sip1 = SeqLocId( loc_piece1 );
5188     sip2 = SeqLocId( loc_piece2 );
5189     if( NULL == sip1 && NULL != sip2 ) {
5190         return -1;
5191     } else if( NULL != sip1 && NULL == sip2 ) {
5192         return 1;
5193     } else if( NULL != sip1 && NULL != sip2 ) {
5194         /* compare Seq-ids */
5195         if( ! seqid_name( sip1, sip_name1, FALSE, FALSE ) ) {
5196             sip_name1[0] = '\0';
5197         }
5198         if( ! seqid_name( sip2, sip_name2, FALSE, FALSE ) ) {
5199             sip_name2[0] = '\0';
5200         }
5201         sip_name_comp = StrCmp( sip_name1, sip_name2 );
5202         if( 0 != sip_name_comp ) {
5203             return sip_name_comp;
5204         }
5205     }
5206 
5207     start1 = SeqLocStart(loc_piece1);
5208     start2 = SeqLocStart(loc_piece2);
5209     if( start1 != start2 ) {
5210         return (start1 - start2);
5211     }
5212 
5213     end1 = SeqLocStop(loc_piece1);
5214     end2 = SeqLocStop(loc_piece2);
5215     return (end2 - end1);
5216 }
5217 
5218 /* Note that this doesn't return a SeqLocPtr because it's not creating
5219    a real usable SeqLoc.  Rather, it's returning an array of pointers
5220    into the given loc which points to them in order. */
5221 static SeqLocPtr PNTR
CreateSortedSeqLoc(SeqLocPtr loc,Uint4Ptr out_len)5222 CreateSortedSeqLoc( SeqLocPtr loc, Uint4Ptr out_len )
5223 {
5224     Int4 jj = 0;
5225     SeqLocPtr loc_piece = NULL;
5226     SeqLocPtr PNTR retval = NULL;
5227 
5228     *out_len = 0;
5229 
5230     /* First, see how big loc is */
5231     loc_piece = (SeqLocPtr)loc->data.ptrvalue;
5232     while( NULL != loc_piece ) {
5233         ++(*out_len);
5234         loc_piece = loc_piece->next;
5235     }
5236 
5237     /* allocate enough memory to fit everything, and copy
5238        the (not-yet-sorted) pointers over */
5239     loc_piece = (SeqLocPtr)loc->data.ptrvalue;
5240     retval = (SeqLocPtr PNTR) MemNew( sizeof(SeqLocPtr) * (*out_len) );
5241     for( jj = 0; jj < (*out_len); ++jj ) {
5242         retval[jj] = loc_piece;
5243         loc_piece = loc_piece->next;
5244     }
5245 
5246     /* now, sort what we have */
5247     StableMergeSort( retval, (*out_len), sizeof(SeqLocPtr),
5248                      CreateSortedSeqLoc_comparator );
5249     return retval;
5250 }
5251 
CompareMultiPartLocToMultiPartLoc(SeqLocPtr a,SeqLocPtr b,Boolean compare_strand)5252 static Int2 CompareMultiPartLocToMultiPartLoc(SeqLocPtr a, SeqLocPtr b, Boolean compare_strand)
5253 {
5254   Boolean got_one = FALSE;   /* for any overlap */
5255   Int2 retval = SLC_NO_MATCH,
5256         retval2 = SLC_NO_MATCH;
5257   /* Points to the pieces of a and b in sorted order */
5258   SeqLocPtr PNTR a_sorted = NULL;
5259   Uint4          a_sorted_len = 0;
5260   Uint4          a_idx = 0;     /* used to iterate through */
5261   SeqLocPtr PNTR b_sorted = NULL;
5262   Uint4           b_sorted_len = 0;
5263   Uint4           b_idx = 0;     /* used to iterate through */
5264 
5265   if (a == NULL || b == NULL) {
5266     return SLC_NO_MATCH;
5267   }
5268   if (a->choice != SEQLOC_MIX && a->choice != SEQLOC_EQUIV && a->choice != SEQLOC_PACKED_INT) {
5269     return SLC_NO_MATCH;
5270   }
5271   if (b->choice != SEQLOC_MIX && b->choice != SEQLOC_EQUIV && b->choice != SEQLOC_PACKED_INT) {
5272     return SLC_NO_MATCH;
5273   }
5274 
5275   /* create an array of pointers to the pieces of the seqloc, in order */
5276   a_sorted = CreateSortedSeqLoc( a, &a_sorted_len );
5277   b_sorted = CreateSortedSeqLoc( b, &b_sorted_len );
5278 
5279   /* check for identity */
5280   retval = SeqLocCompareEx(a_sorted[0], b_sorted[0], compare_strand);
5281   a_idx = 1;
5282   b_idx = 1;
5283   while ((a_idx < a_sorted_len) && (b_idx < b_sorted_len) && (retval == SLC_A_EQ_B))
5284   {
5285       retval = SeqLocCompareEx(a_sorted[a_idx], b_sorted[b_idx], compare_strand);
5286       ++a_idx;
5287       ++b_idx;
5288   }
5289   if ((a_idx == a_sorted_len) && (b_idx == b_sorted_len) && (retval == SLC_A_EQ_B))
5290       goto done;
5291 
5292   /* check for a in b */
5293   a_idx = 0;
5294   b_idx = 0;
5295   while ((a_idx < a_sorted_len) && (b_idx < b_sorted_len))
5296   {
5297       retval2 = SeqLocCompareEx(a_sorted[a_idx], b_sorted[b_idx], compare_strand);
5298       if (retval2 > SLC_NO_MATCH)
5299           got_one = TRUE;
5300       switch (retval2)
5301       {
5302           case SLC_NO_MATCH:
5303               ++b_idx;
5304               break;
5305           case SLC_A_EQ_B:
5306               ++a_idx;
5307               ++b_idx;
5308               break;
5309           case SLC_A_IN_B:
5310               ++a_idx;
5311               break;
5312           case SLC_B_IN_A:
5313           case SLC_A_OVERLAP_B:
5314               b_idx = b_sorted_len;
5315               break;
5316       }
5317   }
5318   if (a_idx == a_sorted_len) {   /* a all in b */
5319       retval = SLC_A_IN_B;
5320       goto done;
5321   }
5322 
5323   /* check for b in a */
5324   a_idx = 0;
5325   b_idx = 0;
5326   while ((a_idx < a_sorted_len) && (b_idx < b_sorted_len))
5327   {
5328       retval2 = SeqLocCompareEx(b_sorted[b_idx], a_sorted[a_idx], compare_strand);
5329       if (retval2 > SLC_NO_MATCH)
5330           got_one = TRUE;
5331       switch (retval2)
5332       {
5333           case SLC_NO_MATCH:
5334               ++a_idx;
5335               break;
5336           case SLC_A_EQ_B:
5337               ++a_idx;
5338               ++b_idx;
5339               break;
5340           case SLC_A_IN_B:
5341               ++b_idx;
5342               break;
5343           case SLC_B_IN_A:
5344           case SLC_A_OVERLAP_B:
5345               a_idx = a_sorted_len;
5346               break;
5347       }
5348   }
5349   if (b_idx == b_sorted_len) {   /* b all in a */
5350       retval = SLC_B_IN_A;
5351       goto done;
5352   }
5353 
5354   if (got_one) {
5355       retval = SLC_A_OVERLAP_B;
5356       goto done;
5357   }
5358 
5359   /* goto here instead of just calling "return" so we can clean up */
5360 done:
5361 
5362   if( NULL != a_sorted ) {
5363       a_sorted = MemFree(a_sorted);
5364   }
5365   if( NULL != b_sorted ) {
5366       b_sorted = MemFree(b_sorted);
5367   }
5368 
5369   return retval;
5370 }
5371 
5372 /*****************************************************************************
5373 *
5374 *   SeqLocCompare(a, b)
5375 *       returns
5376 *       0 = no overlap
5377 *       1 = a is completely contained in b
5378 *       2 = b is completely contained in a
5379 *       3 = a == b
5380 *       4 = a and b overlap, but neither completely contained in the other
5381 *
5382 *
5383 *****************************************************************************/
SeqLocCompareEx(SeqLocPtr a,SeqLocPtr b,Boolean compare_strand)5384 NLM_EXTERN Int2 SeqLocCompareEx (SeqLocPtr a, SeqLocPtr b, Boolean compare_strand)   /* seqloc */
5385 
5386 {
5387     BioseqPtr bsp;
5388     Int4 len = -1L, i, j, num, num2, point, hits;
5389     Uint1 strand;
5390     SeqLocPtr slp, tmp_a = NULL, tmp_b = NULL;
5391     ValNode tmp;
5392     SeqBondPtr sbp;
5393     SeqIntPtr sip, sip2;
5394     SeqIdPtr sidp;
5395     PackSeqPntPtr pspp, pspp2;
5396     Boolean got_one, missed_one, locked = FALSE;
5397     Int2 retval = SLC_NO_MATCH,
5398          retval2 = SLC_NO_MATCH;
5399     static Uint1 rettable [5][5] = {      /* for developing return values */
5400         { 0,4,2,2,4 } ,                      /* when a is longer than b */
5401         { 4,1,4,1,4 } ,
5402         { 2,4,2,2,4 } ,
5403         { 2,1,2,3,4 } ,
5404         { 4,4,4,4,4 }};
5405     static Uint1 rettable2 [5][5] = {      /* for developing return values */
5406         { 0,1,4,1,4 } ,                      /* when b is longer than a */
5407         { 1,1,1,1,1 } ,
5408         { 4,1,2,2,4 } ,
5409         { 1,1,4,3,4 } ,
5410         { 4,1,4,4,4 }};
5411 
5412     if ((a == NULL) || (b == NULL))
5413         return retval;
5414 
5415     switch (a->choice)
5416     {
5417         case SEQLOC_MIX:    /* mix -- more than one seq */
5418         case SEQLOC_EQUIV:    /* equiv -- ditto */
5419         case SEQLOC_PACKED_INT:    /* packed int */
5420         case SEQLOC_PACKED_PNT: /* packed points (need to convert to SEQLOC_MIX) */
5421         case SEQLOC_BOND: /* bond (need to convert to SEQLOC_MIX) */
5422             if (a->choice == SEQLOC_PACKED_PNT)
5423             {
5424                 tmp_a = SeqLocMixFromPackedSeqPnt ((PackSeqPntPtr)a->data.ptrvalue);
5425                 a = tmp_a;
5426             }
5427             else if (a->choice == SEQLOC_BOND)
5428             {
5429                 tmp_a = SeqLocMixFromSeqBond ((SeqBondPtr)a->data.ptrvalue);
5430                 a = tmp_a;
5431             }
5432             if ((b->choice == SEQLOC_MIX) ||  /* check for identity */
5433                 (b->choice == SEQLOC_EQUIV) ||
5434                 (b->choice == SEQLOC_PACKED_INT) ||
5435                 (b->choice == SEQLOC_PACKED_PNT) ||
5436                 (b->choice == SEQLOC_BOND))
5437             {
5438                 if (b->choice == SEQLOC_PACKED_PNT)
5439                 {
5440                     tmp_b = SeqLocMixFromPackedSeqPnt ((PackSeqPntPtr)b->data.ptrvalue);
5441                     b = tmp_b;
5442                 }
5443                 else if (b->choice == SEQLOC_BOND)
5444                 {
5445                     tmp_b = SeqLocMixFromSeqBond ((SeqBondPtr)b->data.ptrvalue);
5446                     b = tmp_b;
5447                 }
5448                 retval = CompareMultiPartLocToMultiPartLoc (a, b, compare_strand);
5449                 if (retval != SLC_NO_MATCH) {
5450                   tmp_a = SeqLocFree (tmp_a);
5451                   tmp_b = SeqLocFree (tmp_b);
5452                   return retval;
5453                 }
5454             }
5455 
5456             slp = (SeqLocPtr)a->data.ptrvalue; /* check for any overlap */
5457             retval = SeqLocCompareEx(slp, b, compare_strand);
5458             slp = slp->next;
5459             while (slp != NULL)
5460             {
5461                 retval2 = SeqLocCompareEx(slp, b, compare_strand);
5462                 retval = (Int2) rettable[retval][retval2];
5463                 slp = slp->next;
5464             }
5465             tmp_a = SeqLocFree (tmp_a);
5466             tmp_b = SeqLocFree (tmp_b);
5467             return retval;
5468             break;
5469         default:
5470             break;
5471     }
5472     switch (b->choice)
5473     {
5474         case SEQLOC_MIX:    /* mix -- more than one seq */
5475         case SEQLOC_EQUIV:    /* equiv -- ditto */
5476         case SEQLOC_PACKED_INT:    /* packed int */
5477             slp = (SeqLocPtr)b->data.ptrvalue;
5478             retval = SeqLocCompareEx(a, slp, compare_strand);
5479             slp = slp->next;
5480             while (slp != NULL)
5481             {
5482                 retval2 = SeqLocCompareEx(a, slp, compare_strand);
5483                 retval = (Int2)rettable2[retval][retval2];
5484                 slp = slp->next;
5485             }
5486             return retval;
5487             break;
5488         default:
5489             break;
5490     }
5491 
5492     tmp.next = NULL;
5493     switch (a->choice)
5494     {
5495         case SEQLOC_NULL:    /* NULL, can't match */
5496             if (b->choice == SEQLOC_NULL)
5497                 retval = SLC_A_EQ_B;
5498             break;
5499         case SEQLOC_FEAT:   /* feat -- can't track yet */
5500             break;
5501         case SEQLOC_EMPTY:    /* empty */
5502             if (b->choice == SEQLOC_EMPTY)
5503             {
5504                 if (SeqIdForSameBioseq((SeqIdPtr)a->data.ptrvalue, (SeqIdPtr)b->data.ptrvalue))
5505                     retval = SLC_A_EQ_B;
5506             }
5507             break;
5508         case SEQLOC_BOND:   /* bond -- 2 seqs */
5509             sbp = (SeqBondPtr)a->data.ptrvalue;
5510             tmp.choice = SEQLOC_PNT;    /* check the points */
5511             tmp.data.ptrvalue = (Pointer)sbp->a;
5512             retval = SeqLocCompareEx(&tmp, b, compare_strand);
5513             if (sbp->b != NULL)
5514             {
5515                 tmp.data.ptrvalue = (Pointer)sbp->b;
5516                 retval2 = SeqLocCompareEx(&tmp, b, compare_strand);
5517                 retval = (Int2) rettable[retval][retval2];
5518             }
5519             break;
5520         case SEQLOC_WHOLE:    /* whole */
5521             sidp = (SeqIdPtr)a->data.ptrvalue;
5522             switch (b->choice)
5523             {
5524                 case SEQLOC_BOND:   /* bond -- 2 seqs */
5525                     sbp = (SeqBondPtr)b->data.ptrvalue;
5526                     if (SeqIdForSameBioseq(sbp->a->id, sidp))
5527                         retval = SLC_B_IN_A;
5528                     if (sbp->b != NULL)
5529                     {
5530                         if (SeqIdForSameBioseq(sbp->b->id, sidp))
5531                             retval2 = SLC_B_IN_A;
5532                         retval = (Int2) rettable2[retval][retval2];
5533                     }
5534                     break;
5535                 case SEQLOC_WHOLE:    /* whole */
5536                     if (SeqIdForSameBioseq(sidp, (SeqIdPtr)b->data.ptrvalue))
5537                         retval = SLC_A_EQ_B;
5538                     break;
5539                 case SEQLOC_INT:    /* int */
5540                     sip = (SeqIntPtr)b->data.ptrvalue;
5541                     if (SeqIdForSameBioseq(sidp, sip->id))
5542                     {
5543                         retval = SLC_B_IN_A;
5544                     bsp = BioseqFindCore(sidp);
5545                         if (bsp == NULL)
5546                         {
5547                             bsp = BioseqLockById(sidp);
5548                             if (bsp != NULL)
5549                                 locked = TRUE;
5550                         }
5551                         if (bsp != NULL)
5552                         {
5553                             len = BioseqGetLen(bsp);
5554                             if ((sip->from == 0) && (sip->to == (len - 1)))
5555                                 retval = SLC_A_EQ_B;
5556                         }
5557                         if (locked)
5558                             BioseqUnlock(bsp);
5559                     }
5560                     break;
5561                 case SEQLOC_PNT:    /* pnt */
5562                     if (SeqIdForSameBioseq(sidp, ((SeqPntPtr)b->data.ptrvalue)->id))
5563                         retval = SLC_B_IN_A;
5564                     break;
5565                 case SEQLOC_PACKED_PNT:    /* packed pnt */
5566                     got_one = FALSE;
5567                     missed_one = FALSE;
5568                     for (pspp = (PackSeqPntPtr)b->data.ptrvalue;
5569                          pspp != NULL;
5570                          pspp = pspp->next)
5571                     {
5572                         if (SeqIdForSameBioseq(sidp, pspp->id))
5573                         {
5574                             got_one = TRUE;
5575                         }
5576                         else
5577                         {
5578                             missed_one = TRUE;
5579                         }
5580                     }
5581                     if (got_one)
5582                     {
5583                         if (missed_one)
5584                         {
5585                             retval = SLC_A_OVERLAP_B;
5586                         }
5587                         else
5588                         {
5589                             retval = SLC_B_IN_A;
5590                         }
5591                     }
5592                     break;
5593                 default:
5594                     break;
5595             }
5596             break;
5597         case SEQLOC_INT:    /* int */
5598             sip = (SeqIntPtr)a->data.ptrvalue;
5599             sidp = sip->id;
5600             switch (b->choice)
5601             {
5602                 case SEQLOC_BOND:   /* bond -- 2 seqs */
5603                     sbp = (SeqBondPtr)b->data.ptrvalue;
5604                     if (SeqIdForSameBioseq(sbp->a->id, sidp))
5605                     {
5606                         if ((sip->from <= sbp->a->point) &&
5607                             (sip->to >= sbp->a->point) &&
5608                             (!compare_strand || DoStrandsMatch(sip->strand, sbp->a->strand)))
5609                         {
5610                             retval = SLC_B_IN_A;
5611                         }
5612                     }
5613                     if (sbp->b != NULL)
5614                     {
5615                         if (SeqIdForSameBioseq(sbp->b->id, sidp))
5616                         {
5617                             if ((sip->from <= sbp->b->point) &&
5618                                 (sip->to >= sbp->b->point) &&
5619                                 (!compare_strand || DoStrandsMatch(sip->strand, sbp->b->strand)))
5620                             {
5621                                   retval2 = SLC_B_IN_A;
5622                             }
5623                         }
5624                         retval = (Int2) rettable2[retval][retval2];
5625                     }
5626                     break;
5627                 case SEQLOC_WHOLE:    /* whole */
5628                     if (SeqIdForSameBioseq(sidp, (SeqIdPtr)b->data.ptrvalue))
5629                     {
5630                         retval = SLC_A_IN_B;
5631                         bsp = BioseqFindCore((SeqIdPtr)b->data.ptrvalue);
5632                         if (bsp == NULL)
5633                         {
5634                             bsp = BioseqLockById((SeqIdPtr)b->data.ptrvalue);
5635                             if (bsp != NULL)
5636                                 locked = TRUE;
5637                         }
5638                         if (bsp != NULL)
5639                         {
5640                             len = BioseqGetLen(bsp);
5641                             if ((sip->from == 0) && (sip->to == (len - 1)))
5642                                 retval = SLC_A_EQ_B;
5643                         }
5644                         if (locked)
5645                             BioseqUnlock(bsp);
5646                     }
5647                     break;
5648                 case SEQLOC_INT:    /* int */
5649                     sip2 = (SeqIntPtr)b->data.ptrvalue;
5650                     if (SeqIdForSameBioseq(sidp, sip2->id)
5651                         && (!compare_strand || DoStrandsMatch (sip->strand, sip2->strand)))
5652                     {
5653                         if ((sip->from == sip2->from) && (sip->to == sip2->to))
5654                             retval = SLC_A_EQ_B;
5655                         else if ((sip->from <= sip2->from) && (sip->to >= sip2->to))
5656                             retval = SLC_B_IN_A;
5657                         else if ((sip->from >= sip2->from) && (sip->to <= sip2->to))
5658                             retval = SLC_A_IN_B;
5659                         else if ((sip->from >= sip2->from) && (sip->from <= sip2->to))
5660                             retval = SLC_A_OVERLAP_B;
5661                         else if ((sip->to >= sip2->from) && (sip->to <= sip2->to))
5662                             retval = SLC_A_OVERLAP_B;
5663                     }
5664                     break;
5665                 case SEQLOC_PNT:    /* pnt */
5666                     if (SeqIdForSameBioseq(sidp, ((SeqPntPtr)b->data.ptrvalue)->id)
5667                         && (!compare_strand || DoStrandsMatch (sip->strand, ((SeqPntPtr)b->data.ptrvalue)->strand)))
5668                     {
5669                         point = ((SeqPntPtr)b->data.ptrvalue)->point;
5670                         if ((point == sip->from) && (point == sip->to))
5671                             retval = SLC_A_EQ_B;
5672                         else if ((point >= sip->from) && (point <= sip->to))
5673                             retval = SLC_B_IN_A;
5674                     }
5675                     break;
5676                 case SEQLOC_PACKED_PNT:    /* packed pnt */
5677                     pspp = (PackSeqPntPtr)b->data.ptrvalue;
5678                     got_one = FALSE;
5679                     missed_one = FALSE;
5680                     while (pspp != NULL)
5681                     {
5682                         if (SeqIdForSameBioseq(sidp, pspp->id)
5683                             && (!compare_strand || DoStrandsMatch (sip->strand, pspp->strand)))
5684                         {
5685                             num = pspp->used;
5686                             for (i = 0; i < num; i++)
5687                             {
5688                                 point = pspp->pnts[i];
5689                                 if ((point < sip->from) || (point > sip->to))
5690                                 {
5691                                     missed_one = TRUE;
5692                                 }
5693                                 else
5694                                 {
5695                                     got_one = TRUE;
5696                                 }
5697                             }
5698                         }
5699                         pspp = pspp->next;
5700                     }
5701                     if (got_one)
5702                     {
5703                         if (missed_one)
5704                             retval = SLC_A_OVERLAP_B;
5705                         else
5706                             retval = SLC_B_IN_A;
5707                     }
5708                     break;
5709                 default:
5710                     break;
5711             }
5712             break;
5713         case SEQLOC_PNT:    /* pnt */
5714             sidp = ((SeqPntPtr)a->data.ptrvalue)->id;
5715             point = ((SeqPntPtr)a->data.ptrvalue)->point;
5716             strand = ((SeqPntPtr)a->data.ptrvalue)->strand;
5717             switch (b->choice)
5718             {
5719                 case SEQLOC_BOND:   /* bond -- 2 seqs */
5720                     sbp = (SeqBondPtr)b->data.ptrvalue;
5721                     if (SeqIdForSameBioseq(sbp->a->id, sidp)
5722                         && (!compare_strand || DoStrandsMatch (sbp->a->strand, strand)))
5723                     {
5724                         if (point == sbp->a->point)
5725                             retval = SLC_A_EQ_B;
5726                     }
5727                     if (sbp->b != NULL)
5728                     {
5729                         if (SeqIdForSameBioseq(sbp->b->id, sidp)
5730                             && (!compare_strand || DoStrandsMatch (sbp->b->strand, strand)))
5731                         {
5732                             if (point == sbp->b->point)
5733                                 retval2 = SLC_A_EQ_B;
5734                         }
5735                         retval = (Int2) rettable2[retval][retval2];
5736                     }
5737                     break;
5738                 case SEQLOC_WHOLE:    /* whole */
5739                     if (SeqIdForSameBioseq(sidp, (SeqIdPtr)b->data.ptrvalue))
5740                         retval = SLC_A_IN_B;
5741                     break;
5742                 case SEQLOC_INT:    /* int */
5743                     sip2 = (SeqIntPtr)b->data.ptrvalue;
5744                     if (SeqIdForSameBioseq(sidp, sip2->id)
5745                         && (!compare_strand || DoStrandsMatch (sip2->strand, strand)))
5746                     {
5747                         if ((point == sip2->from) && (point == sip2->to))
5748                             retval = SLC_A_EQ_B;
5749                         else if ((point >= sip2->from) && (point <= sip2->to))
5750                             retval = SLC_A_IN_B;
5751                     }
5752                     break;
5753                 case SEQLOC_PNT:    /* pnt */
5754                     if (SeqIdForSameBioseq(sidp, ((SeqPntPtr)b->data.ptrvalue)->id)
5755                         && (!compare_strand || DoStrandsMatch (strand, ((SeqPntPtr)b->data.ptrvalue)->strand)))
5756                     {
5757                         if (point == ((SeqPntPtr)b->data.ptrvalue)->point)
5758                             retval = SLC_A_EQ_B;
5759                     }
5760                     break;
5761                 case SEQLOC_PACKED_PNT:    /* packed pnt */
5762                     pspp = (PackSeqPntPtr)b->data.ptrvalue;
5763                     got_one = FALSE;
5764                     missed_one = FALSE;
5765                     while (pspp != NULL) {
5766                         if (SeqIdForSameBioseq(sidp, pspp->id)
5767                             && (!compare_strand || DoStrandsMatch (strand, pspp->strand)))
5768                         {
5769                             num = pspp->used;
5770                             for (i = 0; i < num; i++)
5771                             {
5772                                 if (point == pspp->pnts[i])
5773                                 {
5774                                     got_one = TRUE;
5775                                 }
5776                                 else
5777                                 {
5778                                     missed_one = TRUE;
5779                                 }
5780                             }
5781                         }
5782                         else
5783                         {
5784                             missed_one = TRUE;
5785                         }
5786                         pspp = pspp->next;
5787                     }
5788                     if (got_one)
5789                     {
5790                         if (missed_one)
5791                         {
5792                             retval = SLC_A_IN_B;
5793                         }
5794                         else
5795                         {
5796                             retval = SLC_A_EQ_B;
5797                         }
5798                     }
5799                     break;
5800                 default:
5801                     break;
5802             }
5803             break;
5804         case SEQLOC_PACKED_PNT:    /* packed pnt */
5805             pspp = (PackSeqPntPtr)a->data.ptrvalue;
5806             num = PackSeqPntNum(pspp);
5807             sidp = pspp->id;
5808             switch (b->choice)
5809             {
5810                 case SEQLOC_BOND:   /* bond -- 2 seqs */
5811                     sbp = (SeqBondPtr)b->data.ptrvalue;
5812                     if (SeqIdForSameBioseq(sbp->a->id, sidp)
5813                         && (!compare_strand || DoStrandsMatch (pspp->strand, sbp->a->strand)))
5814                     {
5815                         point = sbp->a->point;
5816                         for (i = 0; i < num; i++)
5817                         {
5818                             if (point == PackSeqPntGet(pspp, i))
5819                             {
5820                                 retval = SLC_B_IN_A;
5821                                 i = num;
5822                             }
5823                         }
5824                     }
5825                     if (sbp->b != NULL)
5826                     {
5827                         if (SeqIdForSameBioseq(sbp->b->id, sidp)
5828                             && (!compare_strand || DoStrandsMatch(pspp->strand, sbp->b->strand)))
5829                         {
5830                             point = sbp->b->point;
5831                             for (i = 0; i < num; i++)
5832                             {
5833                                 if (point == PackSeqPntGet(pspp, i))
5834                                 {
5835                                     if (retval != SLC_B_IN_A)
5836                                         retval = SLC_A_OVERLAP_B;
5837                                     i = num + 1;
5838                                 }
5839                             }
5840                             if ((i != num) && (retval == SLC_B_IN_A))
5841                                 retval = SLC_A_OVERLAP_B;
5842                         }
5843                     }
5844                     break;
5845                 case SEQLOC_WHOLE:    /* whole */
5846                     if (SeqIdForSameBioseq(sidp, (SeqIdPtr)b->data.ptrvalue))
5847                         retval = SLC_A_IN_B;
5848                     break;
5849                 case SEQLOC_INT:    /* int */
5850                     sip = (SeqIntPtr)b->data.ptrvalue;
5851                     if (SeqIdForSameBioseq(sidp, sip->id)
5852                         && (!compare_strand || DoStrandsMatch(sip->strand, pspp->strand)))
5853                     {
5854                         got_one = FALSE;
5855                         missed_one = FALSE;
5856                         for (i = 0; i < num; i++)
5857                         {
5858                             point = PackSeqPntGet(pspp, i);
5859                             if ((point < sip->from) || (point > sip->to))
5860                             {
5861                                 missed_one = TRUE;
5862                                 if (got_one)
5863                                     i = num + 1;
5864                             }
5865                             else
5866                             {
5867                                 got_one = TRUE;
5868                                 if (missed_one)
5869                                     i = num + 1;
5870                             }
5871                         }
5872                         if (got_one)
5873                         {
5874                             if (missed_one)
5875                                 retval = SLC_A_OVERLAP_B;
5876                             else
5877                                 retval = SLC_A_IN_B;
5878                         }
5879                     }
5880                     break;
5881                 case SEQLOC_PNT:    /* pnt */
5882                     if (SeqIdForSameBioseq(sidp, ((SeqPntPtr)b->data.ptrvalue)->id)
5883                         && (!compare_strand || DoStrandsMatch (pspp->strand, ((SeqPntPtr)b->data.ptrvalue)->strand)))
5884                     {
5885                         point = ((SeqPntPtr)b->data.ptrvalue)->point;
5886                         for (i = 0; i < num; i++)
5887                         {
5888                             if (point == PackSeqPntGet(pspp, i))
5889                             {
5890                                 retval = SLC_B_IN_A;
5891                                 i = num + 1;
5892                             }
5893                         }
5894                     }
5895                     break;
5896                 case SEQLOC_PACKED_PNT:    /* packed pnt */
5897                     pspp2 = (PackSeqPntPtr)b->data.ptrvalue;
5898                     if (SeqIdForSameBioseq(sidp, pspp->id)
5899                         && (!compare_strand || DoStrandsMatch(pspp->strand, pspp2->strand)))
5900                     {
5901                         num2 = PackSeqPntNum(pspp2);
5902                         if (num == num2)   /* check for identity */
5903                         {
5904                             for (i = 0; i < num; i++)
5905                             {
5906                                 if ( PackSeqPntGet(pspp, i) !=
5907                                      PackSeqPntGet(pspp2, i))
5908                                     i = num + 1;
5909                             }
5910                             if (i == num)
5911                                 retval = SLC_A_EQ_B;
5912                         }
5913                         if (retval != SLC_A_EQ_B)
5914                         {
5915                             hits = 0;
5916                             for (i = 0; i < num; i++)
5917                             {
5918                                 point = PackSeqPntGet(pspp, i);
5919                                 for (j = 0; j < num2; j++)
5920                                 {
5921                                     if (point == PackSeqPntGet(pspp2, j))
5922                                         hits++;
5923                                 }
5924                             }
5925                             if (hits == num)
5926                                 retval = SLC_A_IN_B;
5927                             else if (hits == num2)
5928                                 retval = SLC_B_IN_A;
5929                         }
5930                     }
5931                     break;
5932                 default:
5933                     break;
5934             }
5935             break;
5936         default:
5937             break;
5938     }
5939     return retval;
5940 }
5941 
5942 
SeqLocCompare(SeqLocPtr a,SeqLocPtr b)5943 NLM_EXTERN Int2 SeqLocCompare (SeqLocPtr a, SeqLocPtr b)   /* seqloc */
5944 {
5945   return SeqLocCompareEx (a, b, FALSE);
5946 }
5947 
5948 
ComplementLocCompare(Uint1 val)5949 static Uint1 ComplementLocCompare (Uint1 val)
5950 {
5951   if (val == SLC_A_IN_B) {
5952     val = SLC_B_IN_A;
5953   } else if (val == SLC_B_IN_A) {
5954     val = SLC_A_IN_B;
5955   }
5956   return val;
5957 }
5958 
5959 
CheckSeqLocCompResults(SeqLocPtr a,SeqLocPtr b,Uint1 allow_strand,Uint1 check_strand)5960 static Boolean CheckSeqLocCompResults (SeqLocPtr a, SeqLocPtr b, Uint1 allow_strand, Uint1 check_strand)
5961 {
5962   Boolean rval = TRUE;
5963 
5964   if (SeqLocCompare(a, b) != allow_strand) {
5965     rval = FALSE;
5966   } else if (SeqLocCompareEx(a, b, TRUE) != check_strand) {
5967     rval = FALSE;
5968   } else if (SeqLocCompare(b, a) != ComplementLocCompare(allow_strand)) {
5969     rval = FALSE;
5970   } else if (SeqLocCompareEx(b, a, TRUE) != ComplementLocCompare(check_strand)) {
5971     rval = FALSE;
5972   }
5973   return rval;
5974 }
5975 
5976 
UnitTestSeqLocCompare(void)5977 NLM_EXTERN Boolean UnitTestSeqLocCompare (void)
5978 {
5979   SeqLocPtr a, b;
5980   SeqIdPtr sip, sip2 = NULL;
5981   SeqIntPtr sint1, sint2, sint3, sint4;
5982   TextSeqIdPtr tsip, tsip2 = NULL;
5983   ValNodePtr list = NULL, list2 = NULL;
5984   SeqPntPtr pnt1, pnt2, pnt3, pnt4;
5985   PackSeqPntPtr pspp1, pspp2;
5986   SeqBondPtr sbp1, sbp2;
5987   Boolean    rval = FALSE;
5988 
5989   a = ValNodeNew (NULL);
5990 
5991   b = ValNodeNew (NULL);
5992 
5993   tsip = TextSeqIdNew ();
5994   tsip->accession = StringSave ("AY123456");
5995   sip = ValNodeNew (NULL);
5996   sip->choice = SEQID_GENBANK;
5997   sip->data.ptrvalue = tsip;
5998 
5999   tsip2 = TextSeqIdNew ();
6000   tsip2->accession = StringSave ("AY123457");
6001   sip2 = ValNodeNew (NULL);
6002   sip2->choice = SEQID_GENBANK;
6003   sip2->data.ptrvalue = tsip2;
6004 
6005   sint1 = SeqIntNew ();
6006   sint1->id = sip;
6007   sint1->from = 0;
6008   sint1->to = 10;
6009 
6010   sint2 = SeqIntNew ();
6011   sint2->id = sip;
6012   sint2->from = 15;
6013   sint2->to = 25;
6014 
6015   sint3 = SeqIntNew ();
6016   sint3->id = sip;
6017   sint3->from = 0;
6018   sint3->to = 10;
6019 
6020   sint4 = SeqIntNew ();
6021   sint4->id = sip;
6022   sint4->from = 15;
6023   sint4->to = 25;
6024 
6025   pnt1 = SeqPntNew ();
6026   pnt1->id = sip;
6027   pnt1->point = 5;
6028 
6029   pnt2 = SeqPntNew ();
6030   pnt2->id = sip;
6031   pnt2->point = 16;
6032 
6033   pnt3 = SeqPntNew ();
6034   pnt3->id = sip;
6035   pnt3->point = 5;
6036 
6037   pnt4 = SeqPntNew ();
6038   pnt4->id = sip;
6039   pnt4->point = 16;
6040 
6041   sbp1 = SeqBondNew ();
6042   sbp1->a = pnt1;
6043   sbp1->b = pnt2;
6044 
6045   sbp2 = SeqBondNew ();
6046   sbp2->a = pnt3;
6047   sbp2->b = pnt4;
6048 
6049   pspp1 = PackSeqPntNew ();
6050   pspp1->id = sip;
6051   pspp1->used = 2;
6052   pspp1->pnts[0] = 5;
6053   pspp1->pnts[1] = 16;
6054 
6055   pspp2 = PackSeqPntNew ();
6056   pspp2->id = sip;
6057   pspp2->used = 2;
6058   pspp2->pnts[0] = 5;
6059   pspp2->pnts[1] = 16;
6060 
6061   /* NULL */
6062   /* NULL vs NULL */
6063   a->choice = SEQLOC_NULL;
6064   b->choice = SEQLOC_NULL;
6065   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6066     goto UnitTestSeqLocCompare_end;
6067   }
6068   /* NULL vs EMPTY */
6069   b->choice = SEQLOC_EMPTY;
6070   b->data.ptrvalue = sip;
6071   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6072     goto UnitTestSeqLocCompare_end;
6073   }
6074 
6075   /* NULL vs WHOLE */
6076   b->choice = SEQLOC_WHOLE;
6077   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6078     goto UnitTestSeqLocCompare_end;
6079   }
6080 
6081   /* NULL vs INT */
6082   b->choice = SEQLOC_INT;
6083   b->data.ptrvalue = sint1;
6084   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6085     goto UnitTestSeqLocCompare_end;
6086   }
6087 
6088   /* NULL vs PACKED INT */
6089   ValNodeAddPointer (&list, SEQLOC_INT, sint1);
6090   ValNodeAddPointer (&list, SEQLOC_INT, sint2);
6091   b->choice = SEQLOC_PACKED_INT;
6092   b->data.ptrvalue = list;
6093   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6094     goto UnitTestSeqLocCompare_end;
6095   }
6096   list = ValNodeFree (list);
6097 
6098   /* NULL vs point */
6099   b->choice = SEQLOC_PNT;
6100   b->data.ptrvalue = pnt1;
6101   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6102     goto UnitTestSeqLocCompare_end;
6103   }
6104 
6105   /* NULL vs. packed pnt */
6106   b->choice = SEQLOC_PACKED_PNT;
6107   b->data.ptrvalue = pspp1;
6108   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6109     goto UnitTestSeqLocCompare_end;
6110   }
6111 
6112   /* NULL vs MIX */
6113   list = ValNodeNew (NULL);
6114   list->choice = SEQLOC_INT;
6115   list->data.ptrvalue = sint1;
6116   b->choice = SEQLOC_MIX;
6117   b->data.ptrvalue = list;
6118   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6119     goto UnitTestSeqLocCompare_end;
6120   }
6121   list = ValNodeFree (list);
6122 
6123   /* NULL vs BOND */
6124   b->choice = SEQLOC_BOND;
6125   b->data.ptrvalue = sbp1;
6126   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6127     goto UnitTestSeqLocCompare_end;
6128   }
6129 
6130   /* EMPTY vs EMPTY */
6131   a->choice = SEQLOC_EMPTY;
6132   a->data.ptrvalue = sip;
6133   b->choice = SEQLOC_EMPTY;
6134   b->data.ptrvalue = sip;
6135   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6136     goto UnitTestSeqLocCompare_end;
6137   }
6138 
6139   b->data.ptrvalue = sip2;
6140   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6141     goto UnitTestSeqLocCompare_end;
6142   }
6143 
6144   /* EMPTY vs WHOLE */
6145   b->choice = SEQLOC_WHOLE;
6146   b->data.ptrvalue = sip;
6147   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6148     goto UnitTestSeqLocCompare_end;
6149   }
6150 
6151   /* EMPTY vs INT */
6152   b->choice = SEQLOC_INT;
6153   sint1->id = sip;
6154   b->data.ptrvalue = sint1;
6155   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6156     goto UnitTestSeqLocCompare_end;
6157   }
6158 
6159   /* EMPTY vs packed-int */
6160   list = NULL;
6161   ValNodeAddPointer (&list, 0, sint1);
6162   ValNodeAddPointer (&list, 0, sint2);
6163   b->choice = SEQLOC_PACKED_INT;
6164   b->data.ptrvalue = list;
6165   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6166     goto UnitTestSeqLocCompare_end;
6167   }
6168   list = ValNodeFree (list);
6169 
6170   /* EMPTY vs point */
6171   b->choice = SEQLOC_PNT;
6172   b->data.ptrvalue = pnt1;
6173   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6174     goto UnitTestSeqLocCompare_end;
6175   }
6176 
6177   /* EMPTY vs. packed pnt */
6178   b->choice = SEQLOC_PACKED_PNT;
6179   b->data.ptrvalue = pspp1;
6180   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6181     goto UnitTestSeqLocCompare_end;
6182   }
6183 
6184   /* EMPTY vs MIX */
6185   list = ValNodeNew (NULL);
6186   list->choice = SEQLOC_INT;
6187   list->data.ptrvalue = sint1;
6188   b->choice = SEQLOC_MIX;
6189   b->data.ptrvalue = list;
6190   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6191     goto UnitTestSeqLocCompare_end;
6192   }
6193   list = ValNodeFree (list);
6194 
6195   /* EMPTY vs BOND */
6196   b->choice = SEQLOC_BOND;
6197   b->data.ptrvalue = sbp1;
6198   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6199     goto UnitTestSeqLocCompare_end;
6200   }
6201 
6202   a->choice = SEQLOC_WHOLE;
6203   /* WHOLE vs INT */
6204   b->choice = SEQLOC_INT;
6205   sint1->id = sip;
6206   sint1->from = 0;
6207   sint1->to = 10;
6208   sint1->strand = Seq_strand_plus;
6209   b->data.ptrvalue = sint1;
6210   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6211     goto UnitTestSeqLocCompare_end;
6212   }
6213   sint1->from = 0;
6214   sint1->to = 484;
6215   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6216     goto UnitTestSeqLocCompare_end;
6217   }
6218   sint1->strand = Seq_strand_minus;
6219   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6220     goto UnitTestSeqLocCompare_end;
6221   }
6222   sint1->id = sip2;
6223   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6224     goto UnitTestSeqLocCompare_end;
6225   }
6226   sint1->id = sip;
6227   sint1->from = 0;
6228   sint1->to = 10;
6229   sint1->strand = 0;
6230 
6231   /* WHOLE vs packed int */
6232   list = NULL;
6233   ValNodeAddPointer (&list, SEQLOC_INT, sint1);
6234   ValNodeAddPointer (&list, SEQLOC_INT, sint2);
6235   b->choice = SEQLOC_PACKED_INT;
6236   b->data.ptrvalue = list;
6237   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6238     goto UnitTestSeqLocCompare_end;
6239   }
6240   sint1->id = sip2;
6241   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6242     goto UnitTestSeqLocCompare_end;
6243   }
6244   sint2->id = sip2;
6245   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6246     goto UnitTestSeqLocCompare_end;
6247   }
6248   list = ValNodeFree (list);
6249   sint1->id = sip;
6250   sint2->id = sip;
6251 
6252   /* WHOLE vs pnt */
6253   b->choice = SEQLOC_PNT;
6254   b->data.ptrvalue = pnt1;
6255   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6256     goto UnitTestSeqLocCompare_end;
6257   }
6258   pnt1->strand = Seq_strand_minus;
6259   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6260     goto UnitTestSeqLocCompare_end;
6261   }
6262   pnt1->strand = 0;
6263   pnt1->id = sip2;
6264   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6265     goto UnitTestSeqLocCompare_end;
6266   }
6267   pnt1->id = sip;
6268 
6269   /* WHOLE vs SEQLOC_PACKED_PNT */
6270   b->choice = SEQLOC_PACKED_PNT;
6271   b->data.ptrvalue = pspp1;
6272   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6273     goto UnitTestSeqLocCompare_end;
6274   }
6275   pspp1->strand = Seq_strand_minus;
6276   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6277     goto UnitTestSeqLocCompare_end;
6278   }
6279   pspp1->strand = 0;
6280   pspp1->id = sip2;
6281   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6282     goto UnitTestSeqLocCompare_end;
6283   }
6284   pspp1->next = pspp2;
6285   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6286     pspp1->next = NULL;
6287     goto UnitTestSeqLocCompare_end;
6288   }
6289   pspp1->next = NULL;
6290   pspp1->id = sip;
6291 
6292   /* WHOLE vs SEQLOC_MIX */
6293   list = NULL;
6294   ValNodeAddPointer (&list, SEQLOC_INT, sint1);
6295   ValNodeAddPointer (&list, SEQLOC_INT, sint2);
6296   b->choice = SEQLOC_MIX;
6297   b->data.ptrvalue = list;
6298   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6299     goto UnitTestSeqLocCompare_end;
6300   }
6301   sint1->strand = Seq_strand_minus;
6302   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6303     goto UnitTestSeqLocCompare_end;
6304   }
6305   sint1->strand = 0;
6306   sint1->id = sip2;
6307   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6308     goto UnitTestSeqLocCompare_end;
6309   }
6310   sint2->id = sip2;
6311   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6312     goto UnitTestSeqLocCompare_end;
6313   }
6314   sint1->id = sip;
6315   sint2->id = sip;
6316   list = ValNodeFree (list);
6317 
6318   /* WHOLE vs SEQLOC_BOND */
6319   b->choice = SEQLOC_BOND;
6320   b->data.ptrvalue = sbp1;
6321   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6322     goto UnitTestSeqLocCompare_end;
6323   }
6324   sbp1->a->strand = Seq_strand_minus;
6325   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6326     goto UnitTestSeqLocCompare_end;
6327   }
6328   sbp1->a->strand = 0;
6329   sbp1->a->id = sip2;
6330   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6331     goto UnitTestSeqLocCompare_end;
6332   }
6333   sbp1->b->id = sip2;
6334   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6335     goto UnitTestSeqLocCompare_end;
6336   }
6337   sbp1->a->id = sip;
6338   sbp1->b->id = sip;
6339 
6340   /* INT */
6341   a->choice = SEQLOC_INT;
6342   a->data.ptrvalue = sint3;
6343   /* INT vs SEQLOC_INT */
6344   b->choice = SEQLOC_INT;
6345   b->data.ptrvalue = sint1;
6346   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6347     goto UnitTestSeqLocCompare_end;
6348   }
6349   sint1->strand = Seq_strand_minus;
6350   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_NO_MATCH)) {
6351     goto UnitTestSeqLocCompare_end;
6352   }
6353   sint1->strand = 0;
6354   sint1->to = 9;
6355   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6356     goto UnitTestSeqLocCompare_end;
6357   }
6358   sint1->strand = Seq_strand_minus;
6359   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6360     goto UnitTestSeqLocCompare_end;
6361   }
6362   sint1->strand = 0;
6363   sint1->to = 11;
6364   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6365     goto UnitTestSeqLocCompare_end;
6366   }
6367   sint1->strand = Seq_strand_minus;
6368   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_NO_MATCH)) {
6369     goto UnitTestSeqLocCompare_end;
6370   }
6371   sint1->strand = 0;
6372   sint1->from = 1;
6373   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6374     goto UnitTestSeqLocCompare_end;
6375   }
6376   sint1->strand = Seq_strand_minus;
6377   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_NO_MATCH)) {
6378     goto UnitTestSeqLocCompare_end;
6379   }
6380   sint1->strand = 0;
6381   sint1->from = 0;
6382   sint1->to = 10;
6383 
6384   /* INT vs PACKED_INT */
6385   list = NULL;
6386   ValNodeAddPointer (&list, SEQLOC_INT, sint1);
6387   ValNodeAddPointer (&list, SEQLOC_INT, sint2);
6388   b->choice = SEQLOC_PACKED_INT;
6389   b->data.ptrvalue = list;
6390   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6391     goto UnitTestSeqLocCompare_end;
6392   }
6393   sint1->strand = Seq_strand_minus;
6394   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_NO_MATCH)) {
6395     goto UnitTestSeqLocCompare_end;
6396   }
6397   sint1->strand = 0;
6398   sint1->to = 11;
6399   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6400     goto UnitTestSeqLocCompare_end;
6401   }
6402   sint1->strand = Seq_strand_minus;
6403   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_NO_MATCH)) {
6404     goto UnitTestSeqLocCompare_end;
6405   }
6406   sint1->strand = 0;
6407   sint1->from = 1;
6408   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6409     goto UnitTestSeqLocCompare_end;
6410   }
6411   sint1->strand = Seq_strand_minus;
6412   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_NO_MATCH)) {
6413     goto UnitTestSeqLocCompare_end;
6414   }
6415   sint1->strand = 0;
6416   sint1->from = 11;
6417   sint1->to = 24;
6418   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6419     goto UnitTestSeqLocCompare_end;
6420   }
6421   sint1->strand = Seq_strand_minus;
6422   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6423     goto UnitTestSeqLocCompare_end;
6424   }
6425   sint1->strand = 0;
6426   sint1->from = 0;
6427   sint1->to = 10;
6428   list = ValNodeFree (list);
6429 
6430   /* INT vs PNT */
6431   b->choice = SEQLOC_PNT;
6432   b->data.ptrvalue = pnt1;
6433   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6434     goto UnitTestSeqLocCompare_end;
6435   }
6436   pnt1->strand = Seq_strand_minus;
6437   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6438     goto UnitTestSeqLocCompare_end;
6439   }
6440   pnt1->strand = 0;
6441   pnt1->point = 13;
6442   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6443     goto UnitTestSeqLocCompare_end;
6444   }
6445   pnt1->strand = Seq_strand_minus;
6446   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6447     goto UnitTestSeqLocCompare_end;
6448   }
6449   pnt1->strand = 0;
6450   pnt1->point = 5;
6451   pnt1->id = sip2;
6452   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6453     goto UnitTestSeqLocCompare_end;
6454   }
6455   pnt1->id = sip;
6456 
6457   /* INT vs PACKED_PNT */
6458   b->choice = SEQLOC_PACKED_PNT;
6459   b->data.ptrvalue = pspp1;
6460   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6461     goto UnitTestSeqLocCompare_end;
6462   }
6463   pspp1->strand = Seq_strand_minus;
6464   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_NO_MATCH)) {
6465     goto UnitTestSeqLocCompare_end;
6466   }
6467   pspp1->strand = 0;
6468   pspp1->id = sip2;
6469   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6470     goto UnitTestSeqLocCompare_end;
6471   }
6472   pspp1->next = pspp2;
6473   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6474     pspp1->next = NULL;
6475     goto UnitTestSeqLocCompare_end;
6476   }
6477   pspp1->next = NULL;
6478   pspp1->id = sip;
6479   pspp1->pnts[1] = 9;
6480   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6481     goto UnitTestSeqLocCompare_end;
6482   }
6483   pspp1->strand = Seq_strand_minus;
6484   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6485     goto UnitTestSeqLocCompare_end;
6486   }
6487   pspp1->strand = 0;
6488   pspp1->pnts[1] = 16;
6489 
6490   /* INT vs BOND */
6491   b->choice = SEQLOC_BOND;
6492   b->data.ptrvalue = sbp1;
6493   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6494     goto UnitTestSeqLocCompare_end;
6495   }
6496   sbp1->a->strand = Seq_strand_minus;
6497   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_NO_MATCH)) {
6498     goto UnitTestSeqLocCompare_end;
6499   }
6500   sbp1->a->strand = 0;
6501   sbp1->b->strand = Seq_strand_minus;
6502   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6503     goto UnitTestSeqLocCompare_end;
6504   }
6505 
6506   sbp1->b->strand = 0;
6507   sbp1->b->point = 9;
6508   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6509     goto UnitTestSeqLocCompare_end;
6510   }
6511   sbp1->a->strand = Seq_strand_minus;
6512   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_A_OVERLAP_B)) {
6513     goto UnitTestSeqLocCompare_end;
6514   }
6515   sbp1->b->strand = Seq_strand_minus;
6516   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6517     goto UnitTestSeqLocCompare_end;
6518   }
6519   sbp1->a->strand = 0;
6520   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_A_OVERLAP_B)) {
6521     goto UnitTestSeqLocCompare_end;
6522   }
6523   sbp1->b->strand = 0;
6524 
6525   sbp1->a->id = sip2;
6526   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6527     goto UnitTestSeqLocCompare_end;
6528   }
6529   sbp1->b->id = sip2;
6530   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6531     goto UnitTestSeqLocCompare_end;
6532   }
6533   sbp1->a->id = sip;
6534   sbp1->b->id = sip;
6535   sbp1->b->point = 16;
6536 
6537   /* PACKED_INT */
6538   a->choice = SEQLOC_PACKED_INT;
6539   ValNodeAddPointer (&list2, SEQLOC_INT, sint3);
6540   ValNodeAddPointer (&list2, SEQLOC_INT, sint4);
6541   a->data.ptrvalue = list2;
6542 
6543   /* PACKED_INT vs PACKED_INT */
6544   b->choice = SEQLOC_PACKED_INT;
6545   ValNodeAddPointer (&list, SEQLOC_INT, sint1);
6546   ValNodeAddPointer (&list, SEQLOC_INT, sint2);
6547   b->data.ptrvalue = list;
6548   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6549     goto UnitTestSeqLocCompare_end;
6550   }
6551   sint1->strand = Seq_strand_minus;
6552   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_OVERLAP_B)) {
6553     goto UnitTestSeqLocCompare_end;
6554   }
6555   sint2->strand = Seq_strand_minus;
6556   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_NO_MATCH)) {
6557     goto UnitTestSeqLocCompare_end;
6558   }
6559   sint1->strand = 0;
6560   sint2->strand = 0;
6561   sint1->from = 1;
6562   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6563     goto UnitTestSeqLocCompare_end;
6564   }
6565   sint1->strand = Seq_strand_minus;
6566   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_A_OVERLAP_B)) {
6567     goto UnitTestSeqLocCompare_end;
6568   }
6569   sint1->strand = 0;
6570   sint1->from = 11;
6571   sint1->to = 14;
6572   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6573     goto UnitTestSeqLocCompare_end;
6574   }
6575   sint1->from = 0;
6576   sint1->to = 10;
6577 
6578   /* PACKED_INT vs PNT */
6579   b->choice = SEQLOC_PNT;
6580   b->data.ptrvalue = pnt1;
6581   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6582     goto UnitTestSeqLocCompare_end;
6583   }
6584   pnt1->strand = Seq_strand_minus;
6585   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6586     goto UnitTestSeqLocCompare_end;
6587   }
6588   pnt1->strand = 0;
6589   pnt1->point = 11;
6590   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6591     goto UnitTestSeqLocCompare_end;
6592   }
6593   pnt1->point = 16;
6594   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6595     goto UnitTestSeqLocCompare_end;
6596   }
6597 
6598   /* PACKED_INT vs SEQLOC_PACKED_PNT */
6599   b->choice = SEQLOC_PACKED_PNT;
6600   b->data.ptrvalue = pspp1;
6601   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6602     goto UnitTestSeqLocCompare_end;
6603   }
6604   pspp1->strand = Seq_strand_minus;
6605   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6606     goto UnitTestSeqLocCompare_end;
6607   }
6608   pspp1->strand = 0;
6609   pspp1->pnts[0] = 11;
6610   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6611     goto UnitTestSeqLocCompare_end;
6612   }
6613   pspp1->strand = Seq_strand_minus;
6614   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_NO_MATCH)) {
6615     goto UnitTestSeqLocCompare_end;
6616   }
6617   pspp1->strand = 0;
6618   pspp1->pnts[0] = 5;
6619   pspp1->id = sip2;
6620   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6621     goto UnitTestSeqLocCompare_end;
6622   }
6623   pspp1->next = pspp2;
6624   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6625     pspp1->next = NULL;
6626     goto UnitTestSeqLocCompare_end;
6627   }
6628   pspp1->id = sip;
6629   pspp1->next = NULL;
6630 
6631 
6632   /* PACKED_INT vs SEQLOC_BOND */
6633   b->choice = SEQLOC_BOND;
6634   b->data.ptrvalue = sbp1;
6635   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6636     goto UnitTestSeqLocCompare_end;
6637   }
6638   sbp1->a->strand = Seq_strand_minus;
6639   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_A_OVERLAP_B)) {
6640     goto UnitTestSeqLocCompare_end;
6641   }
6642   sbp1->b->strand = Seq_strand_minus;
6643   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_NO_MATCH)) {
6644     goto UnitTestSeqLocCompare_end;
6645   }
6646   sbp1->a->strand = 0;
6647   sbp1->b->strand = 0;
6648   sbp1->a->point = 11;
6649   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6650     goto UnitTestSeqLocCompare_end;
6651   }
6652   sbp1->b->strand = Seq_strand_minus;
6653   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_NO_MATCH)) {
6654     goto UnitTestSeqLocCompare_end;
6655   }
6656   sbp1->b->strand = 0;
6657   sbp1->b->point = 13;
6658   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6659     goto UnitTestSeqLocCompare_end;
6660   }
6661   sbp1->a->point = 5;
6662   sbp1->b->point = 16;
6663 
6664   list2 = ValNodeFree (list2);
6665 
6666   /* PNT */
6667   a->choice = SEQLOC_PNT;
6668   a->data.ptrvalue = pnt3;
6669 
6670   /* PNT vs PNT */
6671   b->choice = SEQLOC_PNT;
6672   b->data.ptrvalue = pnt1;
6673   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6674     goto UnitTestSeqLocCompare_end;
6675   }
6676   pnt1->strand = Seq_strand_minus;
6677   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_NO_MATCH)) {
6678     goto UnitTestSeqLocCompare_end;
6679   }
6680   pnt1->strand = 0;
6681   pnt1->id = sip2;
6682   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6683     goto UnitTestSeqLocCompare_end;
6684   }
6685   pnt1->id = sip;
6686   pnt1->point = 6;
6687   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6688     goto UnitTestSeqLocCompare_end;
6689   }
6690   pnt1->point = 5;
6691 
6692   /* PNT vs PACKED_PNT */
6693   b->choice = SEQLOC_PACKED_PNT;
6694   b->data.ptrvalue = pspp1;
6695   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6696     goto UnitTestSeqLocCompare_end;
6697   }
6698   pspp1->strand = Seq_strand_minus;
6699   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_NO_MATCH)) {
6700     goto UnitTestSeqLocCompare_end;
6701   }
6702   pspp1->strand = 0;
6703   pspp1->id = sip2;
6704   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6705     goto UnitTestSeqLocCompare_end;
6706   }
6707   pspp1->next = pspp2;
6708   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6709     pspp1->next = NULL;
6710     goto UnitTestSeqLocCompare_end;
6711   }
6712   pspp1->next = NULL;
6713   pspp1->id = sip;
6714   pspp1->pnts[0] = 6;
6715   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6716     goto UnitTestSeqLocCompare_end;
6717   }
6718   pspp1->pnts[0] = 4;
6719   pspp1->pnts[1] = 5;
6720   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6721     goto UnitTestSeqLocCompare_end;
6722   }
6723   pspp1->pnts[0] = 5;
6724   pspp1->pnts[1] = 16;
6725 
6726   /* PNT vs BOND */
6727   b->choice = SEQLOC_BOND;
6728   b->data.ptrvalue = sbp1;
6729   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6730     goto UnitTestSeqLocCompare_end;
6731   }
6732   sbp1->a->strand = Seq_strand_minus;
6733   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_NO_MATCH)) {
6734     goto UnitTestSeqLocCompare_end;
6735   }
6736   sbp1->a->strand = 0;
6737   pnt3->point = 16;
6738   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6739     goto UnitTestSeqLocCompare_end;
6740   }
6741   sbp1->b->strand = Seq_strand_minus;
6742   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_NO_MATCH)) {
6743     goto UnitTestSeqLocCompare_end;
6744   }
6745   sbp1->b->strand = 0;
6746   pnt3->point = 5;
6747   sbp1->a->id = sip2;
6748   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6749     goto UnitTestSeqLocCompare_end;
6750   }
6751   sbp1->a->id = sip;
6752 
6753   /* PACKED_PNT */
6754   a->choice = SEQLOC_PACKED_PNT;
6755   a->data.ptrvalue = pspp2;
6756   /* PACKED_PNT vs PACKED_PNT */
6757   b->choice = SEQLOC_PACKED_PNT;
6758   b->data.ptrvalue = pspp1;
6759   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6760     goto UnitTestSeqLocCompare_end;
6761   }
6762   pspp1->strand = Seq_strand_minus;
6763   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_NO_MATCH)) {
6764     goto UnitTestSeqLocCompare_end;
6765   }
6766   pspp1->strand = 0;
6767   pspp1->pnts[0] = 6;
6768   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6769     goto UnitTestSeqLocCompare_end;
6770   }
6771   pspp1->pnts[0] = 5;
6772   pspp1->pnts[1] = 17;
6773   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6774     goto UnitTestSeqLocCompare_end;
6775   }
6776   pspp1->pnts[1] = 16;
6777   pspp1->id = sip2;
6778   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6779     goto UnitTestSeqLocCompare_end;
6780   }
6781   pspp1->next = pspp2;
6782   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6783     pspp1->next = NULL;
6784     goto UnitTestSeqLocCompare_end;
6785   }
6786   pspp1->next = NULL;
6787   pspp1->id = sip;
6788   pspp1->used = 3;
6789   pspp1->pnts[2] = 23;
6790   if (!CheckSeqLocCompResults(a, b, SLC_A_IN_B, SLC_A_IN_B)) {
6791     goto UnitTestSeqLocCompare_end;
6792   }
6793   pspp1->used = 2;
6794 
6795   /* PACKED_PNT vs BOND */
6796   b->choice = SEQLOC_BOND;
6797   b->data.ptrvalue = sbp1;
6798   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6799     goto UnitTestSeqLocCompare_end;
6800   }
6801   pspp2->used = 3;
6802   pspp2->pnts[2] = 23;
6803   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6804     goto UnitTestSeqLocCompare_end;
6805   }
6806   pspp2->used = 2;
6807   pspp2->id = sip2;
6808   if (!CheckSeqLocCompResults(a, b, SLC_NO_MATCH, SLC_NO_MATCH)) {
6809     goto UnitTestSeqLocCompare_end;
6810   }
6811   pspp2->next = pspp1;
6812   if (!CheckSeqLocCompResults(a, b, SLC_B_IN_A, SLC_B_IN_A)) {
6813     pspp2->next = NULL;
6814     goto UnitTestSeqLocCompare_end;
6815   }
6816   pspp2->next = NULL;
6817   pspp2->id = sip;
6818   sbp1->a->strand = Seq_strand_minus;
6819   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_OVERLAP_B)) {
6820     goto UnitTestSeqLocCompare_end;
6821   }
6822   sbp1->b->strand = Seq_strand_minus;
6823   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_NO_MATCH)) {
6824     goto UnitTestSeqLocCompare_end;
6825   }
6826   sbp1->a->strand = 0;
6827   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_OVERLAP_B)) {
6828     goto UnitTestSeqLocCompare_end;
6829   }
6830   sbp1->b->strand = 0;
6831   sbp1->a->point = 4;
6832   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6833     goto UnitTestSeqLocCompare_end;
6834   }
6835   sbp1->b->point = 5;
6836   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6837     goto UnitTestSeqLocCompare_end;
6838   }
6839   sbp1->a->point = 5;
6840   sbp1->b->point = 16;
6841 
6842   /* BOND */
6843   a->choice = SEQLOC_BOND;
6844   a->data.ptrvalue = sbp2;
6845   /* BOND vs BOND */
6846   b->choice = SEQLOC_BOND;
6847   b->data.ptrvalue = sbp1;
6848   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_EQ_B)) {
6849     goto UnitTestSeqLocCompare_end;
6850   }
6851   sbp1->a->strand = Seq_strand_minus;
6852   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_OVERLAP_B)) {
6853     goto UnitTestSeqLocCompare_end;
6854   }
6855   sbp1->b->strand = Seq_strand_minus;
6856   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_NO_MATCH)) {
6857     goto UnitTestSeqLocCompare_end;
6858   }
6859   sbp1->a->strand = 0;
6860   if (!CheckSeqLocCompResults(a, b, SLC_A_EQ_B, SLC_A_OVERLAP_B)) {
6861     goto UnitTestSeqLocCompare_end;
6862   }
6863   sbp1->b->strand = 0;
6864   sbp1->a->point = 4;
6865   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6866     goto UnitTestSeqLocCompare_end;
6867   }
6868   sbp1->b->point = 5;
6869   if (!CheckSeqLocCompResults(a, b, SLC_A_OVERLAP_B, SLC_A_OVERLAP_B)) {
6870     goto UnitTestSeqLocCompare_end;
6871   }
6872   sbp1->a->point = 5;
6873   sbp1->b->point = 16;
6874 
6875   rval = TRUE;
6876 
6877 UnitTestSeqLocCompare_end:
6878   sint1->id = NULL;
6879   sint1 = SeqIntFree (sint1);
6880   sint2->id = NULL;
6881   sint2 = SeqIntFree (sint2);
6882   sint3->id = NULL;
6883   sint3 = SeqIntFree (sint3);
6884   sint4->id = NULL;
6885   sint4 = SeqIntFree (sint4);
6886   pnt1->id = NULL;
6887   pnt1 = SeqPntFree (pnt1);
6888   pnt2->id = NULL;
6889   pnt2 = SeqPntFree (pnt2);
6890   pspp1->id = NULL;
6891   pspp1 = PackSeqPntFree (pspp1);
6892   pspp2->id = NULL;
6893   pspp2 = PackSeqPntFree (pspp2);
6894   sbp1->a = NULL;
6895   sbp1->b = NULL;
6896   sbp1 = SeqBondFree (sbp1);
6897   sbp2->a = NULL;
6898   sbp2->b = NULL;
6899   sbp2 = SeqBondFree (sbp2);
6900 
6901   sip = SeqIdFree (sip);
6902   sip2 = SeqIdFree(sip2);
6903   a = ValNodeFree (a);
6904   b = ValNodeFree (b);
6905   return rval;
6906 }
6907 
6908 /* returns the number of unique nucleotides covered by slp */
SeqLocCoverage(SeqLocPtr slp)6909 static Int4 SeqLocCoverage (SeqLocPtr slp)
6910 {
6911   Int4Ptr ivals;
6912   Int4    numivals = 0;
6913   SeqLocPtr tmp;
6914   SeqIdPtr sip = NULL;
6915   SeqIdPtr PNTR id_list;
6916   Int4     coverage = 0, i = 0, from, to, j;
6917   Int4     i_from, i_to, j_from, j_to;
6918   Boolean  added_to_prev;
6919 
6920   tmp = NULL;
6921   while ((tmp = SeqLocFindNext (slp, tmp)) != NULL) {
6922     numivals++;
6923   }
6924   if (numivals > 0) {
6925     ivals = MemNew (sizeof (Int4) * (numivals * 2));
6926     id_list = (SeqIdPtr PNTR) MemNew (sizeof (SeqIdPtr) * numivals);
6927     tmp = NULL;
6928     i = 0;
6929     while ((tmp = SeqLocFindNext (slp, tmp)) != NULL) {
6930       from = SeqLocStart (tmp);
6931       to = SeqLocStop (tmp);
6932       sip = SeqLocId (tmp);
6933       id_list [i / 2] = sip;
6934       ivals [i] = from;
6935       i++;
6936       ivals [i] = to;
6937       i++;
6938     }
6939     /* now combine overlapping intervals */
6940     for (j = 0; j < numivals; j++) {
6941       i = j + 1;
6942       while (i < numivals) {
6943         added_to_prev = FALSE;
6944         if (SeqIdComp (sip, id_list[j]) == SIC_YES) {
6945           i_from = ivals[2 * i];
6946           i_to = ivals[2 * i + 1];
6947           j_from = ivals[2 * j];
6948           j_to = ivals[2 * j + 1];
6949 
6950           if ((i_from <= j_from && i_to >= j_from)
6951               || (i_from <= j_to && i_to >= j_to)
6952               || (i_from >= j_from && i_to <= j_to)) {
6953 
6954             /* merge i into j */
6955             ivals[2 * j] = MIN (i_from, j_from);
6956             ivals[2 * j + 1] = MAX (i_to, j_to);
6957 
6958             /* copy last piece into where i was, and delete last piece */
6959             /* This is okay since order doesn't matter, and this is
6960                cheaper than moving everything down after deleting i */
6961             if( i != (numivals - 1) ) {
6962               ivals[2 * i] = ivals[2 * (numivals -1)];
6963               ivals[2 * i + 1] = ivals[2 * (numivals -1) + 1];
6964             }
6965             numivals --;
6966 
6967             /* restart checking against j since j changed */
6968             i = j + 1;
6969             added_to_prev = TRUE;
6970           }
6971         }
6972         if (added_to_prev) {
6973           /* do not increment i */
6974         } else {
6975           i++;
6976         }
6977       }
6978     }
6979     /* now add up lengths of intervals */
6980     for (j = 0; j < numivals; j++) {
6981       /* The "if" checks for NULLs, etc. which have a range like "-1 to -1" */
6982       if( ivals [2 * j + 1] >= 0 ||  ivals [2 * j] >= 0 ) {
6983         coverage += ivals [2 * j + 1] - ivals [2 * j] + 1;
6984       }
6985     }
6986     ivals = MemFree (ivals);
6987     id_list = MemFree (id_list);
6988   }
6989   return coverage;
6990 }
6991 
6992 
6993 /*****************************************************************************
6994 *
6995 *   SeqLocAinB(a, b)
6996 *      if a is completely contained in b, a positive number is returned
6997 *         if 0, a is identical with b
6998 *         if not 0, is the number of residues bigger b is than a
6999 *      if a negative number is returned, a is not contained in b
7000 *         could overlap or not
7001 *      used to find features contained in genes
7002 *
7003 *****************************************************************************/
SeqLocAinB(SeqLocPtr a,SeqLocPtr b)7004 NLM_EXTERN Int4 SeqLocAinB (SeqLocPtr a, SeqLocPtr b)
7005 {
7006     Int4 diff = -1;
7007     Int2 res;
7008 
7009     if ((a == NULL) || (b == NULL))
7010         return diff;
7011 
7012     res = SeqLocCompare(a, b);
7013     switch (res)
7014     {
7015         case SLC_A_EQ_B:
7016             diff = 0;
7017             break;
7018         case SLC_A_IN_B:
7019             diff = (SeqLocCoverage(b) - SeqLocCoverage(a));
7020             break;
7021         default:
7022             break;
7023     }
7024     return diff;
7025 }
7026 
7027 /*****************************************************************************
7028 *
7029 *   Boolean SeqIntCheck(sip)
7030 *       checks that a seq interval is valid
7031 *
7032 *****************************************************************************/
SeqIntCheck(SeqIntPtr sip)7033 NLM_EXTERN Boolean SeqIntCheck (SeqIntPtr sip)
7034 
7035 {
7036     Int4 len = INT4_MAX;
7037     BioseqPtr bsp;
7038      Boolean locked = FALSE;
7039 
7040     if (sip == NULL) return TRUE;  /* makes it ok to pass a NULL */
7041 
7042     bsp = BioseqFindCore(sip->id);
7043      if (bsp == NULL)
7044      {
7045          bsp = BioseqLockById(sip->id);
7046          if (bsp != NULL)
7047              locked = TRUE;
7048      }
7049     if (bsp != NULL)
7050         len = BioseqGetLen(bsp);
7051 
7052     if (locked)
7053         BioseqUnlock(bsp);
7054     if ((sip->from < 0) || (sip->from > sip->to) || (sip->to >= len))
7055     {
7056         return FALSE;
7057     }
7058     else
7059         return TRUE;
7060 }
7061 
7062 /*****************************************************************************
7063 *
7064 *   Boolean SeqPntCheck(SeqPntPtr spp)
7065 *       checks that a seq point is valid
7066 *
7067 *****************************************************************************/
SeqPntCheck(SeqPntPtr spp)7068 NLM_EXTERN Boolean SeqPntCheck (SeqPntPtr spp)
7069 
7070 {
7071     Int4 len = INT4_MAX;
7072     BioseqPtr bsp;
7073      Boolean locked = FALSE;
7074 
7075     if (spp == NULL) return TRUE;   /* cant compare */
7076 
7077     bsp = BioseqFindCore(spp->id);
7078      if (bsp == NULL)
7079      {
7080          bsp = BioseqLockById(spp->id);
7081          if (bsp != NULL)
7082              locked = TRUE;
7083      }
7084     if (bsp != NULL)
7085         len = BioseqGetLen(bsp);
7086 
7087      if (locked)
7088         BioseqUnlock(bsp);
7089     if ((spp->point < 0) || (spp->point >= len))
7090     {
7091         return FALSE;
7092     }
7093     else
7094         return TRUE;
7095 }
7096 
7097 /*****************************************************************************
7098 *
7099 *   PackSeqPntCheck (pspp)
7100 *
7101 *****************************************************************************/
PackSeqPntCheck(PackSeqPntPtr pspp)7102 NLM_EXTERN Boolean PackSeqPntCheck (PackSeqPntPtr pspp)
7103 {
7104     Int4 len = INT4_MAX;
7105     BioseqPtr bsp;
7106     Int4 num, index, point;
7107     Boolean locked = FALSE;
7108 
7109     if (pspp == NULL) return TRUE;   /* cant compare */
7110 
7111     bsp = BioseqFindCore(pspp->id);
7112      if (bsp == NULL)
7113      {
7114          bsp = BioseqLockById(pspp->id);
7115          if (bsp != NULL)
7116              locked = TRUE;
7117      }
7118     if (bsp != NULL)
7119         len = BioseqGetLen(bsp);
7120 
7121      if (locked)
7122         BioseqUnlock(bsp);
7123     num = PackSeqPntNum(pspp);   /* total number of points */
7124     for (index = 0; index < num; index++)
7125     {
7126         point = PackSeqPntGet(pspp, index);
7127 
7128         if ((point < 0) || (point >= len))
7129             return FALSE;
7130     }
7131 
7132     return TRUE;
7133 
7134 }
7135 
7136 
7137 /*****************************************************************************
7138 *
7139 *   SeqLocCheck (slp)
7140 *
7141 *****************************************************************************/
SeqLocCheck(SeqLocPtr slp)7142 NLM_EXTERN Uint1 SeqLocCheck (SeqLocPtr slp)
7143 {
7144     SeqLocPtr tmp;
7145     Uint1 thisstrand, laststrand=0;
7146     Boolean first = TRUE;
7147     Uint1 retval = SEQLOCCHECK_OK;
7148 
7149     if (slp == NULL) return TRUE;
7150 
7151     tmp = NULL;
7152     while ((tmp = SeqLocFindNext(slp, tmp)) != NULL)
7153     {
7154       if (tmp->choice == SEQLOC_NULL)
7155       {
7156         continue;
7157       }
7158         thisstrand = SeqLocStrand(tmp);
7159         if (! first)
7160         {
7161             if (thisstrand != laststrand)
7162             {
7163                 ErrPostEx(SEV_WARNING,0,0,"Mixed strand location");
7164                 retval = SEQLOCCHECK_WARNING;
7165             }
7166         }
7167         first = FALSE;
7168         laststrand = thisstrand;
7169 
7170         switch (tmp->choice)
7171         {
7172             case SEQLOC_INT:
7173                 if (! SeqIntCheck ((SeqIntPtr)(tmp->data.ptrvalue)))
7174                     return SEQLOCCHECK_ERROR;
7175                 break;
7176             case SEQLOC_PNT:
7177                 if (! SeqPntCheck ((SeqPntPtr)(tmp->data.ptrvalue)))
7178                     return SEQLOCCHECK_ERROR;
7179                 break;
7180             case SEQLOC_PACKED_PNT:
7181                 if (! PackSeqPntCheck ((PackSeqPntPtr)(tmp->data.ptrvalue)))
7182                     return SEQLOCCHECK_ERROR;
7183                 break;
7184             default:
7185                 break;
7186         }
7187     }
7188 
7189     return retval;
7190 }
7191 
7192 
7193 /*****************************************************************************
7194 *
7195 *   SeqLocPartialCheck(head)
7196 *       sets bits for incomplete location and/or errors
7197 *       incomplete defined as Int-fuzz on start or stop with
7198 *         lim.unk, lim.gt, or lim.lt set
7199 *
7200 *   returns defined in header file
7201 *
7202 *****************************************************************************/
SeqLocPartialCheckEx(SeqLocPtr head,Boolean farFetch)7203 NLM_EXTERN Uint2 SeqLocPartialCheckEx (SeqLocPtr head, Boolean farFetch)
7204 {
7205     SeqLocPtr slp = NULL, first = NULL, last = NULL;
7206     Uint2 retval = 0;
7207     BioseqPtr bsp;
7208     SeqIntPtr sip;
7209     SeqPntPtr spp;
7210     PackSeqPntPtr pspp;
7211     IntFuzzPtr ifp;
7212     Boolean miss_end;
7213     ValNodePtr vnp, vnp2;
7214     Boolean locked, found_molinfo;
7215     MolInfoPtr mip;
7216 
7217     if (head == NULL) return retval;
7218 
7219     while ((slp = SeqLocFindNext(head, slp)) != NULL)
7220     {
7221         if (first == NULL)
7222             first = slp;
7223         last = slp;
7224     }
7225 
7226     if (first == NULL) return retval;
7227 
7228     slp = NULL;
7229     while ((slp = SeqLocFindNext(head, slp)) != NULL)
7230     {
7231         switch (slp->choice)
7232         {
7233             case SEQLOC_NULL:
7234                 if (slp == first)
7235                     retval |= SLP_START;
7236                 else if (slp == last)
7237                     retval |= SLP_STOP;
7238                 else
7239                     retval |= SLP_INTERNAL;
7240                 break;
7241             case SEQLOC_INT:
7242                 sip = (SeqIntPtr)(slp->data.ptrvalue);
7243                 ifp = sip->if_from;
7244                 if (ifp != NULL)
7245                 {
7246                     if (ifp->choice == 4)  /* lim */
7247                     {
7248                         if (ifp->a == 1)       /* gt */
7249                             retval |= SLP_LIM_WRONG;
7250                         else if ((ifp->a == 2) || (ifp->a == 0)) /* lt,unk */
7251                         {
7252                             if (sip->strand == Seq_strand_minus) /* stop */
7253                             {
7254                                 if (slp == last)
7255                                     retval |= SLP_STOP;
7256                                 else
7257                                     retval |= SLP_INTERNAL;
7258                                 if (sip->from != 0)
7259                                 {
7260                                     if (slp == last)
7261                                         retval |= SLP_NOSTOP;
7262                                     else
7263                                         retval |= SLP_NOINTERNAL;
7264                                 }
7265                             }
7266                             else                                /* start */
7267                             {
7268                                 if (slp == first)
7269                                     retval |= SLP_START;
7270                                 else
7271                                     retval |= SLP_INTERNAL;
7272                                 if (sip->from != 0)
7273                                 {
7274                                     if (slp == first)
7275                                         retval |= SLP_NOSTART;
7276                                     else
7277                                         retval |= SLP_NOINTERNAL;
7278                                 }
7279                             }
7280                         }
7281                     } else if (ifp->choice == 2) /* range */ {
7282                         if (sip->strand == Seq_strand_minus) {
7283                             if (slp == last) {
7284                                 retval |= SLP_STOP;
7285                             }
7286                         } else {
7287                             if (slp == first) {
7288                                 retval |= SLP_START;
7289                             }
7290                         }
7291                     }
7292 
7293                 }
7294                 ifp = sip->if_to;
7295                 if (ifp != NULL)
7296                 {
7297                     if (ifp->choice == 4)  /* lim */
7298                     {
7299                         if (ifp->a == 2)       /* lt */
7300                             retval |= SLP_LIM_WRONG;
7301                         else if ((ifp->a == 1) || (ifp->a == 0)) /* gt,unk */
7302                         {
7303                             locked = FALSE;
7304                             bsp = BioseqFindCore(sip->id);
7305                              if (bsp == NULL && farFetch)
7306                              {
7307                                  bsp = BioseqLockById(sip->id);
7308                                  if (bsp != NULL)
7309                                      locked = TRUE;
7310                              }
7311                             miss_end = FALSE;
7312                             if (bsp != NULL)
7313                             {
7314                                 if (sip->to != (bsp->length - 1))
7315                                     miss_end = TRUE;
7316                             }
7317                             if (locked)
7318                                 BioseqUnlock(bsp);
7319                             if (sip->strand == Seq_strand_minus) /* start */
7320                             {
7321                                 if (slp == first)
7322                                     retval |= SLP_START;
7323                                 else
7324                                     retval |= SLP_INTERNAL;
7325                                 if (miss_end)
7326                                 {
7327                                     if (slp == first /* was last */)
7328                                         retval |= SLP_NOSTART;
7329                                     else
7330                                         retval |= SLP_NOINTERNAL;
7331                                 }
7332                             }
7333                             else                                /* stop */
7334                             {
7335                                 if (slp == last)
7336                                     retval |= SLP_STOP;
7337                                 else
7338                                     retval |= SLP_INTERNAL;
7339                                 if (miss_end)
7340                                 {
7341                                     if (slp == last)
7342                                         retval |= SLP_NOSTOP;
7343                                     else
7344                                         retval |= SLP_NOINTERNAL;
7345                                 }
7346                             }
7347                         }
7348                     } else if (ifp->choice == 2) /* range */ {
7349                         if (sip->strand == Seq_strand_minus) {
7350                             if (slp == first) {
7351                                 retval |= SLP_START;
7352                             }
7353                         } else {
7354                             if (slp == last) {
7355                                 retval |= SLP_STOP;
7356                             }
7357                         }
7358                     }
7359                 }
7360                 break;
7361             case SEQLOC_PNT:
7362                 spp = (SeqPntPtr)(slp->data.ptrvalue);
7363                 ifp = spp->fuzz;
7364                 if (ifp != NULL)
7365                 {
7366                     if (ifp->choice == 4)  /* lim */
7367                     {
7368                         if ((ifp->a >= 0) && (ifp->a <= 2))  /* gt, lt,unk */
7369                         {
7370                             if (slp == first)
7371                                 retval |= SLP_START;
7372                             if (slp == last)
7373                                 retval |= SLP_STOP;
7374                             if ((slp != first) && (slp != last))
7375                                 retval |= SLP_INTERNAL;
7376                         }
7377                     }
7378                 }
7379                 break;
7380             case SEQLOC_PACKED_PNT:
7381                 pspp = (PackSeqPntPtr)(slp->data.ptrvalue);
7382                 ifp = pspp->fuzz;
7383                 if (ifp != NULL)
7384                 {
7385                     if (ifp->choice == 4)  /* lim */
7386                     {
7387                         if ((ifp->a >= 0) && (ifp->a <= 2)) /* gt, lt, unk */
7388                         {
7389                             if (slp == first)
7390                                 retval |= SLP_START;
7391                             if (slp == last)
7392                                 retval |= SLP_STOP;
7393                             if ((slp != first) && (slp != last))
7394                                 retval |= SLP_INTERNAL;
7395                         }
7396                     }
7397                 }
7398                 break;
7399             case SEQLOC_WHOLE:
7400                 found_molinfo = FALSE;
7401                 locked = FALSE;
7402                 bsp = BioseqFindCore((SeqIdPtr)(slp->data.ptrvalue));
7403                 if (bsp == NULL && farFetch)
7404                 {
7405                     bsp = BioseqLockById((SeqIdPtr)(slp->data.ptrvalue));
7406                     if (bsp != NULL)
7407                         locked = TRUE;
7408                 }
7409                 if (bsp != NULL) {
7410                     vnp = NULL;
7411                     while ((vnp = GetNextDescriptorUnindexed(bsp, Seq_descr_molinfo, vnp)) != NULL)
7412                     {
7413                         found_molinfo = TRUE;
7414                         mip = (MolInfoPtr)(vnp->data.ptrvalue);
7415                         switch (mip->completeness)
7416                         {
7417                             case 3:    /* no left */
7418                                 if (slp == first)
7419                                     retval |= SLP_START;
7420                                 else
7421                                     retval |= SLP_INTERNAL;
7422                                 break;
7423                             case 4:    /* no right */
7424                                 if (slp == last)
7425                                     retval |= SLP_STOP;
7426                                 else
7427                                     retval |= SLP_INTERNAL;
7428                                 break;
7429                             case 2:    /* partial */
7430                                 retval |= SLP_OTHER;
7431                                 break;
7432                             case 5:    /* no ends */
7433                                 retval |= SLP_START;
7434                                 retval |= SLP_STOP;
7435                                 break;
7436                             default:
7437                                 break;
7438                         }
7439                     }
7440                     if (! found_molinfo)
7441                     {
7442                         while ((vnp = GetNextDescriptorUnindexed(bsp, Seq_descr_modif, vnp)) != NULL)
7443                         {
7444                             for (vnp2 = (ValNodePtr)(vnp->data.ptrvalue); vnp2 != NULL; vnp2 = vnp2->next)
7445                             {
7446                                 switch (vnp2->data.intvalue)
7447                                 {
7448 
7449                                     case 16:    /* no left */
7450 
7451                                         if (slp == first)
7452 
7453                                             retval |= SLP_START;
7454 
7455                                         else
7456                                             retval |= SLP_INTERNAL;
7457                                         break;
7458                                     case 17:    /* no right */
7459                                         if (slp == last)
7460                                             retval |= SLP_STOP;
7461                                         else
7462                                             retval |= SLP_INTERNAL;
7463                                         break;
7464                                     case 10:    /* partial */
7465                                         retval |= SLP_OTHER;
7466                                         break;
7467                                 }
7468                             }
7469                         }
7470                     }
7471                 }
7472                 if (locked)
7473                     BioseqUnlock (bsp);
7474                 break;
7475             default:
7476                 break;
7477 
7478         }
7479     }
7480 
7481     return retval;
7482 }
7483 
SeqLocPartialCheck(SeqLocPtr head)7484 NLM_EXTERN Uint2 SeqLocPartialCheck(SeqLocPtr head)
7485 
7486 {
7487   return SeqLocPartialCheckEx (head, TRUE);
7488 }
7489 
7490 /*****************************************************************************
7491 *
7492 *   StringForSeqMethod(Int2 method)
7493 *       returns a descriptive string for sequencing method.
7494 *
7495 *****************************************************************************/
StringForSeqMethod(Int2 method)7496 NLM_EXTERN CharPtr StringForSeqMethod (Int2 method)
7497 {
7498 #define MAX_METHOD 6
7499     static char * methods[MAX_METHOD] = {
7500         "conceptual translation",
7501         "direct peptide sequencing",
7502         "conceptual translation with partial peptide sequencing",
7503         "sequenced peptide, ordered by overlap",
7504         "sequenced peptide, ordered by homology",
7505         "conceptual translation supplied by author" };
7506 
7507     if ((method < 1) || (method > MAX_METHOD))
7508         return NULL;
7509 
7510     return methods[method - 1];
7511 }
7512 
7513 /*****************************************************************************
7514 *
7515 *   StringForSeqTech(Int2 tech)
7516 *       returns a descriptive string for sequencing method.
7517 *        uses MolInfo from asn spec 4.0
7518 *****************************************************************************/
StringForSeqTech(Int2 tech)7519 NLM_EXTERN CharPtr StringForSeqTech (Int2 tech)
7520 {
7521 #define MAX_TECH 13
7522     static char * techs[MAX_TECH] = {
7523         NULL,    /*"standard sequencing", */
7524         NULL,  /*"Expressed Sequence Tag", */
7525         NULL, /*"Sequence Tagged Site", */
7526         NULL, /*"one-pass genomic sequence", */
7527         NULL, /*"from genetic mapping techniques", */
7528         NULL, /*"from physical mapping techniques", */
7529         NULL, /*"derived from other data, not a primary entity", */
7530         "conceptual translation",
7531         "direct peptide sequencing",
7532         "conceptual translation with partial peptide sequencing",
7533         "sequenced peptide, ordered by overlap",
7534         "sequenced peptide, ordered by homology",
7535         "conceptual translation supplied by author" };
7536 
7537     if ((tech < 1) || (tech > MAX_TECH))
7538         return NULL;
7539 
7540     return techs[tech - 1];
7541 }
7542 
7543 static Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end);
7544 Boolean GetThePointForOffsetEx(SeqLocPtr of, SeqPntPtr target, Uint1 which_end, Boolean is_circular);
7545 Boolean GetPointsForLeftAndRightOffsets(SeqLocPtr of, SeqPntPtr left, SeqPntPtr right, Boolean is_circular);
7546 static Int4 CheckOffsetInLoc(SeqLocPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr the_id);
7547 NLM_EXTERN Int4 CheckPointInBioseq(SeqPntPtr sp, BioseqPtr in, BoolPtr flip_strand, Boolean relaxed);
7548 
7549 /*****************************************************************************
7550 *
7551 * Int4 GetOffsetInLoc (SeqLocPtr of, SeqLocPtr in, Uint1 which_end)
7552 *   returns -1 if of not in, in
7553 *
7554 *****************************************************************************/
GetOffsetInLoc(SeqLocPtr of,SeqLocPtr in,Uint1 which_end)7555 NLM_EXTERN Int4 GetOffsetInLoc (SeqLocPtr of, SeqLocPtr in, Uint1 which_end)
7556 {
7557     SeqPnt sp;
7558     BioseqPtr bsp;
7559     Boolean locked = FALSE;
7560     Int4 result;
7561 
7562     if ((in == NULL) || (of == NULL))
7563         return -1L;
7564 
7565     if (! GetThePointForOffset(of, &sp, which_end))
7566         return -1L;
7567 
7568     if (! IS_one_loc(in, FALSE))    /* optimize for multiple hits */
7569     {
7570         bsp = BioseqFindCore(sp.id);  /* only need SeqIds */
7571          if (bsp == NULL)
7572          {
7573              bsp = BioseqLockById(sp.id);
7574              if (bsp != NULL)
7575                  locked = TRUE;
7576          }
7577     }
7578     else
7579         bsp = NULL;
7580 
7581     result = CheckOffsetInLoc(in, sp.point, bsp, sp.id);
7582     if (locked)
7583         BioseqUnlock(bsp);
7584     return result;
7585 }
7586 
7587 
7588 /*****************************************************************************
7589 *
7590 * Int4 GetOffsetInBioseq (SeqLocPtr of, BioseqPtr in, Uint1 which_end)
7591 *   return -1 if of not in "in"
7592 *
7593 *****************************************************************************/
GetOffsetInBioseq(SeqLocPtr of,BioseqPtr in,Uint1 which_end)7594 NLM_EXTERN Int4 GetOffsetInBioseq (SeqLocPtr of, BioseqPtr in, Uint1 which_end)
7595 {
7596     SeqPnt sp;
7597 
7598     if ((of == NULL) || (in == NULL))
7599         return -1;
7600 
7601     if (! GetThePointForOffset(of, &sp, which_end))
7602         return -1L;
7603 
7604     return CheckPointInBioseq(&sp, in, NULL, FALSE);
7605 }
7606 
7607 
GetOffsetInBioseqEx(SeqLocPtr of,BioseqPtr in,Uint1 which_end,Boolean is_circular,Boolean relaxed)7608 NLM_EXTERN Int4 GetOffsetInBioseqEx (SeqLocPtr of, BioseqPtr in, Uint1 which_end, Boolean is_circular, Boolean relaxed)
7609 {
7610     SeqPnt sp;
7611 
7612     if ((of == NULL) || (in == NULL))
7613         return -1;
7614 
7615     if (! GetThePointForOffsetEx(of, &sp, which_end, is_circular))
7616         return -1L;
7617 
7618     return CheckPointInBioseq(&sp, in, NULL, relaxed);
7619 }
7620 
7621 
GetLeftAndRightOffsetsInBioseq(SeqLocPtr of,BioseqPtr in,Int4Ptr left,Int4Ptr right,Boolean is_circular,Boolean relaxed,BoolPtr left_flip,BoolPtr right_flip)7622 NLM_EXTERN void GetLeftAndRightOffsetsInBioseq (SeqLocPtr of, BioseqPtr in, Int4Ptr left, Int4Ptr right, Boolean is_circular, Boolean relaxed, BoolPtr left_flip, BoolPtr right_flip)
7623 {
7624     SeqPnt l, r;
7625 
7626     if (left != NULL) {
7627       *left = -1;
7628     }
7629     if (right != NULL) {
7630       *right = -1;
7631     }
7632     if ((of == NULL) || (in == NULL))
7633         return;
7634 
7635     if (!GetPointsForLeftAndRightOffsets (of, &l, &r, is_circular)) {
7636         return;
7637     }
7638     if (left != NULL) {
7639         *left = CheckPointInBioseq (&l, in, left_flip, relaxed);
7640     }
7641     if (right != NULL) {
7642         *right = CheckPointInBioseq (&r, in, right_flip, relaxed);
7643     }
7644 }
7645 
7646 /*****************************************************************************
7647 *
7648 *   CheckPointInBioseq(pnt, in)
7649 *
7650 *****************************************************************************/
CheckPointInBioseq(SeqPntPtr sp,BioseqPtr in,BoolPtr flip_strand,Boolean relaxed)7651 NLM_EXTERN Int4 CheckPointInBioseq (SeqPntPtr sp, BioseqPtr in, BoolPtr flip_strand, Boolean relaxed)
7652 {
7653     ValNode sl;
7654     BioseqPtr bsp;
7655     Int4 retval = -1;
7656     SeqLocPtr slp = NULL, curr;
7657     Int4 offset, offset2, strt, stp;
7658     SeqIdPtr sip;
7659     Boolean locked = FALSE;
7660 
7661     if (SeqIdIn(sp->id, in->id))   /* in this one */
7662         return sp->point;
7663 
7664     switch (in->repr)
7665     {
7666         case Seq_repr_virtual:
7667         case Seq_repr_raw:
7668         case Seq_repr_const:
7669         case Seq_repr_map:
7670             return -1;    /* nothing more can be done */
7671 
7672         case Seq_repr_ref:
7673             slp = (ValNodePtr) in->seq_ext;
7674             break;
7675 
7676         case Seq_repr_seg:
7677             sl.choice = SEQLOC_MIX;
7678             sl.data.ptrvalue = in->seq_ext;
7679             slp = &sl;
7680             break;
7681 
7682         case Seq_repr_delta:
7683             break;
7684 
7685         default:
7686             return -1;
7687     }
7688 
7689     bsp = BioseqFindCore(sp->id);   /* only need SeqIds */
7690      if (bsp == NULL)
7691      {
7692          bsp = BioseqLockById(sp->id);
7693          if (bsp != NULL)
7694              locked = TRUE;
7695      }
7696     if (in->repr == Seq_repr_seg || in->repr == Seq_repr_delta) {
7697         retval = SeqMgrMapPartToSegmentedBioseq (in, sp->point, bsp, sp->id, flip_strand, relaxed);
7698     }
7699     if (retval == -1) {
7700         retval = CheckOffsetInLoc(slp, sp->point, bsp, sp->id);
7701     }
7702 
7703     if (locked)
7704         BioseqUnlock(bsp);
7705 
7706     if (retval >= 0) return retval;     /* got it on segments */
7707 
7708                                         /* look for segmented segments */
7709     offset = 0;
7710     curr = NULL;
7711     while ((curr = SeqLocFindNext(slp, curr)) != NULL)
7712     {
7713         sip = SeqLocId(curr);
7714         if (sip != NULL)
7715         {
7716             bsp = BioseqLockById(sip);
7717             if (bsp != NULL)
7718             {
7719                 switch (bsp->repr)
7720                 {
7721                     case Seq_repr_ref:   /* could have more levels */
7722                     case Seq_repr_seg:
7723                         offset2 = CheckPointInBioseq(sp, bsp, flip_strand, relaxed);
7724                         if (offset2 >= 0)   /* got it */
7725                         {
7726                             strt = SeqLocStart(curr);
7727                             stp = SeqLocStop(curr);
7728                             if ((offset2 >= strt) && (offset2 <= stp))
7729                             {
7730                                 if (SeqLocStrand(curr) == Seq_strand_minus)
7731                                     offset2 = stp - offset2;
7732                                 else
7733                                     offset2 -= strt;
7734                                 retval = offset2 + offset;
7735                                 return retval;
7736                             }
7737                         }
7738                         break;
7739                     default:           /* one level, already checked */
7740                         break;
7741                 }
7742                 BioseqUnlock(bsp);
7743             }
7744         }
7745         offset += SeqLocLen(curr);
7746     }
7747 
7748     return retval;    /* all failed */
7749 }
7750 
7751 
GetEarlierSeqIdPtr(SeqIdPtr sip1,SeqIdPtr sip2)7752 static SeqIdPtr GetEarlierSeqIdPtr (SeqIdPtr sip1, SeqIdPtr sip2)
7753 {
7754   BioseqPtr    bsp1, bsp2;
7755   BioseqSetPtr bssp = NULL;
7756   SeqEntryPtr  sep;
7757 
7758   if (sip1 == NULL && sip2 != NULL)
7759   {
7760     return sip2;
7761   }
7762   else if (sip1 != NULL && sip2 == NULL)
7763   {
7764     return sip1;
7765   }
7766   else if (SeqIdComp(sip1, sip2) == SIC_YES)
7767   {
7768     return sip1;
7769   }
7770 
7771   bsp1 = BioseqFind (sip1);
7772   bsp2 = BioseqFind (sip2);
7773   if (bsp1 == NULL && bsp2 == NULL)
7774   {
7775     return sip1;
7776   }
7777   else if (bsp1 == NULL)
7778   {
7779     return sip2;
7780   }
7781   else if (bsp2 == NULL)
7782   {
7783     return sip1;
7784   }
7785 
7786   if (bsp1->idx.parentptr != NULL && bsp2->idx.parentptr != 0 && bsp1->idx.parentptr != bsp2->idx.parentptr)
7787   {
7788     return NULL;
7789   }
7790   if (bsp1->idx.parentptr != NULL && bsp1->idx.parenttype == OBJ_BIOSEQSET) {
7791     bssp = bsp1->idx.parentptr;
7792   } else if (bsp2->idx.parentptr != NULL && bsp2->idx.parenttype == OBJ_BIOSEQSET) {
7793     bssp = bsp2->idx.parentptr;
7794   }
7795 
7796   if (bssp == NULL) return NULL;
7797 
7798   for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
7799   {
7800     if (sep->data.ptrvalue == bsp1)
7801     {
7802       return sip1;
7803     }
7804     else if (sep->data.ptrvalue == bsp2)
7805     {
7806       return sip2;
7807     }
7808   }
7809   return NULL;
7810 }
7811 
7812 /*****************************************************************************
7813 *
7814 *   Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
7815 *
7816 *****************************************************************************/
GetThePointForOffsetEx(SeqLocPtr of,SeqPntPtr target,Uint1 which_end,Boolean is_circular)7817 Boolean GetThePointForOffsetEx(SeqLocPtr of, SeqPntPtr target, Uint1 which_end, Boolean is_circular)
7818 {
7819     SeqLocPtr pnt, first=NULL, last=NULL;
7820     Uint1 first_strand, last_strand;
7821     Boolean all_minus = TRUE;
7822     Boolean all_non_minus = TRUE;
7823     Int4    lowest = -1, highest = 0, tmp;
7824     SeqIdPtr low_sip = NULL, high_sip = NULL, first_sip = NULL, last_sip = NULL;
7825     Boolean   id_same;
7826 
7827     pnt = NULL;    /* get first or last single span type in "of"*/
7828 
7829     while ((pnt = SeqLocFindNext(of, pnt)) != NULL)
7830     {
7831       if( pnt->choice == SEQLOC_NULL )
7832       {
7833         /* Skip NULL parts when determining offsets */
7834         continue;
7835       }
7836       last_strand = SeqLocStrand (pnt);
7837       last_sip = SeqLocId (pnt);
7838       if (last_strand == Seq_strand_minus) {
7839         all_non_minus = FALSE;
7840       } else {
7841         all_minus = FALSE;
7842       }
7843         last = pnt;
7844         if (first == NULL)
7845         {
7846             first = pnt;
7847             first_strand = last_strand;
7848             first_sip = last_sip;
7849             lowest = SeqLocStart(pnt);
7850             highest = SeqLocStop (pnt);
7851             low_sip = last_sip;
7852             high_sip = last_sip;
7853         }
7854         else
7855         {
7856           tmp = SeqLocStart (pnt);
7857           if (SeqIdComp (last_sip, low_sip))
7858           {
7859             id_same = TRUE;
7860           }
7861           else
7862           {
7863             id_same = FALSE;
7864           }
7865           if ((id_same && tmp < lowest)
7866               || (!id_same && last_sip == GetEarlierSeqIdPtr (last_sip, low_sip)))
7867           {
7868             lowest = tmp;
7869             low_sip = last_sip;
7870           }
7871           tmp = SeqLocStop (pnt);
7872 
7873           if (SeqIdComp (last_sip, high_sip))
7874           {
7875             id_same = TRUE;
7876           }
7877           else
7878           {
7879             id_same = FALSE;
7880           }
7881           if ((id_same && tmp > highest)
7882               || (!id_same && high_sip == GetEarlierSeqIdPtr (high_sip, last_sip)))
7883           {
7884             highest = tmp;
7885             high_sip = last_sip;
7886           }
7887         }
7888     }                   /* otherwise, get last */
7889     if (first == NULL)
7890         return FALSE;
7891 
7892     /* ignore circularity if strandedness is mixed */
7893     if( ! all_minus && ! all_non_minus ) {
7894       is_circular = FALSE;
7895     }
7896 
7897     switch (which_end)
7898     {
7899         case SEQLOC_LEFT_END:
7900           if (is_circular) {
7901             if (all_minus) {
7902               target->point = SeqLocStart (last);
7903               target->id = last_sip;
7904             } else {
7905               target->point = SeqLocStart (first);
7906               target->id = first_sip;
7907             }
7908           } else {
7909             target->point = lowest;
7910             target->id = low_sip;
7911           }
7912             break;
7913         case SEQLOC_RIGHT_END:
7914           if (is_circular) {
7915             if (all_minus) {
7916               target->point = SeqLocStop (first);
7917               target->id = first_sip;
7918             } else {
7919               target->point = SeqLocStop (last);
7920               target->id = last_sip;
7921             }
7922           } else {
7923             target->point = highest;
7924             target->id = high_sip;
7925           }
7926             break;
7927         case SEQLOC_START:
7928           if (all_minus)
7929           {
7930             target->point = SeqLocStop (first);
7931             target->id = first_sip;
7932           }
7933           else
7934           {
7935           if (first_strand == Seq_strand_minus)
7936           {
7937               target->point = SeqLocStop (first);
7938             }
7939             else
7940             {
7941               target->point = SeqLocStart (first);
7942             }
7943             target->id = first_sip;
7944           }
7945             break;
7946         case SEQLOC_STOP:
7947           if (all_minus)
7948           {
7949             target->point = SeqLocStart (last);
7950             target->id = last_sip;
7951           }
7952           else
7953           {
7954           if (last_strand == Seq_strand_minus)
7955           {
7956               target->point = SeqLocStart (last);
7957             }
7958             else
7959             {
7960               target->point = SeqLocStop (last);
7961             }
7962             target->id = last_sip;
7963           }
7964             break;
7965         default:
7966             return FALSE;   /* error */
7967     }
7968 
7969     /* SeqLocStart returns 'from', and SeqLocStop returns 'to', regardless of strand! */
7970 
7971     if ((target->point < 0) || (target->id == NULL))
7972         return FALSE;
7973 
7974     return TRUE;
7975 }
7976 
7977 
GetThePointForOffset(SeqLocPtr of,SeqPntPtr target,Uint1 which_end)7978 Boolean GetThePointForOffset(SeqLocPtr of, SeqPntPtr target, Uint1 which_end)
7979 {
7980     BioseqPtr bsp;
7981     Boolean is_circular = FALSE;
7982 
7983     bsp = BioseqFind (SeqLocId(of));
7984     if (bsp != NULL && bsp->topology == TOPOLOGY_CIRCULAR) {
7985         is_circular = TRUE;
7986     }
7987     return GetThePointForOffsetEx (of, target, which_end, is_circular);
7988 }
7989 
7990 
GetPointsForLeftAndRightOffsets(SeqLocPtr of,SeqPntPtr left,SeqPntPtr right,Boolean is_circular)7991 Boolean GetPointsForLeftAndRightOffsets(SeqLocPtr of, SeqPntPtr left, SeqPntPtr right, Boolean is_circular)
7992 {
7993     SeqLocPtr pnt, first=NULL, last=NULL;
7994     Uint1 first_strand, last_strand;
7995     Boolean all_minus = TRUE;
7996     Boolean all_non_minus = TRUE;
7997     Int4    lowest = -1, highest = 0, tmp;
7998     SeqIdPtr low_sip = NULL, high_sip = NULL, first_sip = NULL, last_sip = NULL;
7999     Boolean   id_same;
8000 
8001     pnt = NULL;    /* get first or last single span type in "of"*/
8002 
8003     while ((pnt = SeqLocFindNext(of, pnt)) != NULL)
8004     {
8005       if( pnt->choice == SEQLOC_NULL )
8006       {
8007         /* Skip NULL parts when determining offsets */
8008         continue;
8009       }
8010       last_strand = SeqLocStrand (pnt);
8011       last_sip = SeqLocId (pnt);
8012       if (last_strand == Seq_strand_minus) {
8013         all_non_minus = FALSE;
8014       } else {
8015         all_minus = FALSE;
8016       }
8017         last = pnt;
8018         if (first == NULL)
8019         {
8020             first = pnt;
8021             first_strand = last_strand;
8022             first_sip = last_sip;
8023             lowest = SeqLocStart(pnt);
8024             highest = SeqLocStop (pnt);
8025             low_sip = last_sip;
8026             high_sip = last_sip;
8027         }
8028         else
8029         {
8030           tmp = SeqLocStart (pnt);
8031           if (SeqIdComp (last_sip, low_sip))
8032           {
8033             id_same = TRUE;
8034           }
8035           else
8036           {
8037             id_same = FALSE;
8038           }
8039           if ((id_same && tmp < lowest)
8040               || (!id_same && last_sip == GetEarlierSeqIdPtr (last_sip, low_sip)))
8041           {
8042             lowest = tmp;
8043             low_sip = last_sip;
8044           }
8045           tmp = SeqLocStop (pnt);
8046 
8047           if (SeqIdComp (last_sip, high_sip))
8048           {
8049             id_same = TRUE;
8050           }
8051           else
8052           {
8053             id_same = FALSE;
8054           }
8055           if ((id_same && tmp > highest)
8056               || (!id_same && high_sip == GetEarlierSeqIdPtr (high_sip, last_sip)))
8057           {
8058             highest = tmp;
8059             high_sip = last_sip;
8060           }
8061         }
8062     }                   /* otherwise, get last */
8063     if (first == NULL)
8064         return FALSE;
8065 
8066     /* ignore circularity if strandedness is mixed */
8067     if( ! all_minus && ! all_non_minus ) {
8068       is_circular = FALSE;
8069     }
8070 
8071     /* left */
8072     if (is_circular) {
8073       if (all_minus) {
8074         left->point = SeqLocStart (last);
8075         left->id = last_sip;
8076       } else {
8077         left->point = SeqLocStart (first);
8078         left->id = first_sip;
8079       }
8080     } else {
8081       left->point = lowest;
8082       left->id = low_sip;
8083     }
8084 
8085     /* right */
8086     if (is_circular) {
8087       if (all_minus) {
8088         right->point = SeqLocStop (first);
8089         right->id = first_sip;
8090       } else {
8091         right->point = SeqLocStop (last);
8092         right->id = last_sip;
8093       }
8094     } else {
8095       right->point = highest;
8096       right->id = high_sip;
8097     }
8098 
8099 
8100     if ((left->point < 0) || (left->id == NULL) || (right->point < 0) || (right->id == NULL))
8101         return FALSE;
8102 
8103     return TRUE;
8104 }
8105 
8106 
8107 /*****************************************************************************
8108 *
8109 *   CheckOffsetInLoc()
8110 *
8111 *****************************************************************************/
CheckOffsetInLoc(SeqLocPtr in,Int4 pos,BioseqPtr bsp,SeqIdPtr the_id)8112 static Int4 CheckOffsetInLoc(SeqLocPtr in, Int4 pos, BioseqPtr bsp, SeqIdPtr the_id)
8113 {
8114     SeqIdPtr tsip, sip;
8115     SeqLocPtr tmp;
8116     SeqIntPtr sipp;
8117     Boolean checkin, doit;
8118     Int4 ctr = 0, len;
8119 
8120     if (bsp != NULL)
8121     {
8122         tsip = bsp->id;
8123         checkin = 1;
8124     }
8125     else
8126     {
8127         tsip = the_id;
8128         checkin = 0;
8129     }
8130 
8131     tmp = NULL;
8132     while ((tmp = SeqLocFindNext(in, tmp)) != NULL)
8133     {
8134         sip = SeqLocId(tmp);
8135         if (checkin)    /* optimizer */
8136             doit = SeqIdIn(sip, tsip);
8137         else
8138             doit = SeqIdForSameBioseq(sip, tsip);
8139         switch (tmp->choice)
8140         {
8141             case SEQLOC_PNT:
8142                 if (doit)
8143                 {
8144                     if (pos == ((SeqPntPtr)(tmp->data.ptrvalue))->point)
8145                         return ctr;
8146                 }
8147                 ctr++;
8148                 break;
8149             case SEQLOC_INT:
8150                 sipp = (SeqIntPtr)(tmp->data.ptrvalue);
8151                 if (doit)
8152                 {
8153                     if ((pos >= sipp->from) && (pos <= sipp->to))
8154                     {
8155                         if (sipp->strand == Seq_strand_minus)
8156                             ctr += (sipp->to - pos);
8157                         else
8158                             ctr += (pos - sipp->from);
8159                         return ctr;
8160                     }
8161                 }
8162                 ctr += (sipp->to - sipp->from + 1);
8163                 break;
8164             case SEQLOC_WHOLE:
8165                 if (doit)
8166                 {
8167                     ctr += pos;
8168                     return ctr;
8169                 }
8170             default:
8171                 len = SeqLocLen(tmp);
8172                 if (len > 0) ctr += len;
8173                 break;
8174         }
8175     }
8176 
8177     return -1;    /* failed */
8178 }
8179 
8180 
8181 /*****************************************************************************
8182 *
8183 *   Int2 SeqLocOrder(SeqLocPtr a, SeqLocPtr b, BioseqPtr in);
8184 *       This function is used to sort SeqLocs into ascending order by
8185 *   location on a Bioseq (segmented or otherwise)
8186 *       The first position is the point sorted on.
8187 *   Returns
8188 *       0   a and b start at same offset
8189 *       1   a > b
8190 *       -1  a < b
8191 *       -2  a in bsp, b not
8192 *       2   b in bsp, a not
8193 *       3   neither a nor b in bsp
8194 *        This function will attempt to sort locs not in bsp to the end of
8195 *   the list.  Values -2,2,3 can also be used to detect error conditions.
8196 *
8197 *****************************************************************************/
SeqLocOrder(SeqLocPtr a,SeqLocPtr b,BioseqPtr in)8198 NLM_EXTERN Int2 SeqLocOrder (SeqLocPtr a, SeqLocPtr b, BioseqPtr in)
8199 {
8200     Int4 aoffset, boffset;
8201 
8202 
8203     if ((a == NULL) || (b == NULL) || (in == NULL))
8204         return 3;
8205 
8206     aoffset = GetOffsetInBioseq(a, in, SEQLOC_LEFT_END);
8207     boffset = GetOffsetInBioseq(b, in, SEQLOC_LEFT_END);
8208 
8209     if ((aoffset == -1) && (boffset >= 0))
8210         return 2;
8211     else if ((aoffset >= 0) && (boffset == -1))
8212         return -2;
8213     else if ((aoffset == -1) && (boffset == -1))
8214         return 3;
8215     else if (aoffset == boffset)
8216         return 0;
8217     else if (aoffset < boffset)
8218         return -1;
8219     else
8220         return 1;
8221 }
8222 
8223 /*****************************************************************************
8224 *
8225 *   Int2 SeqLocMol(seqloc)
8226 *       returns Seq-inst.mol for all Bioseqs this seqloc points to.
8227 *       if all Seq-inst.mol the same, returns that.
8228 *       if mixed dna,rna, returns na
8229 *       if mixed na,aa or can't find any Bioseq, or bsp->mol = 0, or 255
8230 *               returns 0
8231 *
8232 *****************************************************************************/
SeqLocMol(SeqLocPtr seqloc)8233 NLM_EXTERN Int2 SeqLocMol (SeqLocPtr seqloc)
8234 {
8235     SeqLocPtr slp = NULL;
8236     SeqIdPtr sip;
8237     static Uint1 cases[5][4] = {
8238         { 1,2,3,4 } ,    /* was 0, not-set */
8239         { 1,4,0,4 } ,    /* was 1, dna */
8240         { 4,2,0,4 } ,    /* was 2, rna */
8241         { 0,0,3,0 } ,    /* was 3, aa */
8242         { 4,4,0,4 }};    /* was 4, na */
8243     Int2 the_mol = 0, tmp;
8244     BioseqPtr bsp;
8245     Boolean locked = FALSE;
8246 
8247     while ((slp = SeqLocFindNext(seqloc, slp)) != NULL)
8248     {
8249         sip = SeqLocId(slp);
8250         if (sip != NULL)
8251         {
8252             bsp = BioseqFindCore(sip);
8253              if (bsp == NULL)
8254              {
8255                  bsp = BioseqLockById(sip);
8256                  if (bsp != NULL)
8257                      locked = TRUE;
8258              }
8259             if (bsp == NULL)
8260                 return 0;
8261 
8262             tmp = (Int2)bsp->mol;
8263             if (locked)
8264                 BioseqUnlock(bsp);
8265             if ((tmp == 0) || (tmp == Seq_mol_other))
8266                 return 0;
8267             the_mol = (Int2)cases[the_mol][tmp-1];
8268             if (! the_mol)
8269                 return 0;
8270         }
8271     }
8272     return the_mol;
8273 }
8274 
8275 static SeqIdPtr SeqLocPrintProc(SeqLocPtr slp, ByteStorePtr bsp, Boolean first, SeqIdPtr lastid, Boolean use_best_id);
8276 static void BSstring(ByteStorePtr bsp, CharPtr str);
8277 
8278 
SeqLocPrintEx(SeqLocPtr slp,Boolean use_best_id)8279 static CharPtr SeqLocPrintEx (SeqLocPtr slp, Boolean use_best_id)
8280 {
8281     ByteStorePtr bsp;
8282     CharPtr str;
8283     SeqLocPtr tmp;
8284 
8285     if (slp == NULL) return NULL;
8286 
8287     bsp = BSNew(80);
8288 
8289     tmp = slp->next;    /* save possible chain */
8290     slp->next = NULL;   /* take out of possible chain */
8291 
8292     SeqLocPrintProc(slp, bsp, TRUE, NULL, use_best_id);
8293 
8294     slp->next = tmp;    /* replace possible chain */
8295     str = (CharPtr)BSMerge(bsp, NULL);
8296     BSFree(bsp);
8297 
8298     return str;
8299 }
8300 
8301 /*****************************************************************************
8302 *
8303 *   SeqLocPrint(slp)
8304 *
8305 *****************************************************************************/
SeqLocPrint(SeqLocPtr slp)8306 NLM_EXTERN CharPtr SeqLocPrint(SeqLocPtr slp)
8307 {
8308   return SeqLocPrintEx (slp, FALSE);
8309 }
8310 
SeqLocPrintUseBestID(SeqLocPtr slp)8311 NLM_EXTERN CharPtr SeqLocPrintUseBestID(SeqLocPtr slp)
8312 {
8313   return SeqLocPrintEx (slp, TRUE);
8314 }
8315 
8316 NLM_EXTERN SeqIdPtr SeqPointWrite(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen);
8317 NLM_EXTERN SeqIdPtr SeqPointPrint(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid);
8318 NLM_EXTERN void IntFuzzPrint(IntFuzzPtr ifp, Int4 pos, CharPtr buf, Boolean right);
8319 static char strandsymbol[5] = { '\0', '\0', 'c', 'b', 'r' };
8320 static SeqIdPtr SeqPointWriteEx (SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen, Boolean use_best_id);
8321 
8322 
8323 /*****************************************************************************
8324 *
8325 *   SeqLocPrintProc(slp, bsp, first, lastid)
8326 *       print traversal routine
8327 *       goes down slp chain
8328 *
8329 *****************************************************************************/
8330 static SeqIdPtr
SeqLocPrintProc(SeqLocPtr slp,ByteStorePtr bsp,Boolean first,SeqIdPtr lastid,Boolean use_best_id)8331 SeqLocPrintProc
8332 (SeqLocPtr    slp,
8333  ByteStorePtr bsp,
8334  Boolean      first,
8335  SeqIdPtr     lastid,
8336  Boolean      use_best_id)
8337 {
8338     Char buf[128];
8339     SeqBondPtr sbp;
8340     PackSeqPntPtr pspp;
8341     SeqIntPtr sip;
8342     IntFuzzPtr ifp1, ifp2;
8343     Int4 from, to;
8344     Int2 delim, delim2;
8345     BioseqPtr   seq;
8346     SeqIdPtr    thisid;
8347 
8348     while (slp != NULL)
8349     {
8350         if (! first)
8351         {
8352             BSPutByte(bsp, ',');
8353             BSPutByte(bsp, ' ');
8354         }
8355         first = FALSE;
8356 
8357         delim = 0;
8358         switch (slp->choice)
8359         {
8360             case SEQLOC_BOND:   /* bond -- 2 seqs */
8361                 sbp = (SeqBondPtr)(slp->data.ptrvalue);
8362                 if (sbp->a != NULL)
8363                 {
8364                     lastid = SeqPointWriteEx(sbp->a, buf, lastid, sizeof(buf) - 1, use_best_id);
8365                     BSstring(bsp, buf);
8366                 }
8367                 else
8368                     BSPutByte(bsp, '?');
8369 
8370                 BSPutByte(bsp, '=');
8371 
8372                 if (sbp->b != NULL)
8373                 {
8374                     lastid = SeqPointWriteEx(sbp->b, buf, lastid, sizeof(buf) - 1, use_best_id);
8375                     BSstring(bsp, buf);
8376                 }
8377                 else
8378                     BSPutByte(bsp, '?');
8379                 break;
8380             case SEQLOC_FEAT:   /* feat -- can't track yet */
8381                 BSstring(bsp, "(feat)");
8382                 break;
8383             case SEQLOC_NULL:    /* NULL */
8384                 BSPutByte(bsp, '~');
8385                 break;
8386             case SEQLOC_EMPTY:    /* empty */
8387                 BSPutByte(bsp, '{');
8388                 SeqIdWrite((SeqIdPtr)(slp->data.ptrvalue),
8389                             buf, PRINTID_FASTA_SHORT, sizeof(buf) - 1);
8390                 BSstring(bsp, buf);
8391                 BSPutByte(bsp, '}');
8392                 break;
8393             case SEQLOC_WHOLE:    /* whole */
8394                 SeqIdWrite((SeqIdPtr)(slp->data.ptrvalue),
8395                         buf, PRINTID_FASTA_SHORT, sizeof(buf) - 1);
8396                 BSstring(bsp, buf);
8397                 break;
8398             case SEQLOC_MIX:    /* mix -- more than one seq */
8399             case SEQLOC_PACKED_INT:    /* packed int */
8400                 delim = '(';
8401                 delim2 = ')';
8402             case SEQLOC_EQUIV:    /* equiv -- ditto */
8403                 if (! delim)
8404                 {
8405                     delim = '[';
8406                     delim2 = ']';
8407                 }
8408                 BSPutByte(bsp, delim);
8409                 lastid = SeqLocPrintProc((SeqLocPtr)(slp->data.ptrvalue), bsp, TRUE, lastid, use_best_id);
8410                 BSPutByte(bsp, delim2);
8411                 break;
8412             case SEQLOC_INT:    /* int */
8413             {
8414                 Uint1 seqid_format = PRINTID_FASTA_SHORT;
8415                 sip = (SeqIntPtr)(slp->data.ptrvalue);
8416                 thisid = sip->id;
8417                 if (use_best_id)
8418                 {
8419                   seq = BioseqFind (thisid);
8420                   if (seq != NULL)
8421                   {
8422                       /* JIRA ID-3530 : Find Seq-id containing accession */
8423                       thisid = SeqIdFindBestAccession (seq->id);
8424                       seqid_format = PRINTID_TEXTID_ACC_VER;
8425                   }
8426                 }
8427                 if (! SeqIdMatch(sip->id, lastid))
8428                 {
8429                     SeqIdWrite(thisid, buf, seqid_format, sizeof(buf) - 1);
8430                     BSstring(bsp, buf);
8431                     BSPutByte(bsp, ':');
8432                 }
8433                 lastid = thisid;
8434                 if (strandsymbol[sip->strand])
8435                     BSPutByte(bsp, (Int2)strandsymbol[sip->strand]);
8436                 if ((sip->strand == Seq_strand_minus) ||
8437                     (sip->strand == Seq_strand_both_rev))
8438                 {
8439                     ifp1 = sip->if_to;
8440                     ifp2 = sip->if_from;
8441                     to = sip->from;
8442                     from = sip->to;
8443                 }
8444                 else
8445                 {
8446                     ifp1 = sip->if_from;
8447                     ifp2 = sip->if_to;
8448                     to = sip->to;
8449                     from = sip->from;
8450 
8451                 }
8452                 IntFuzzPrint(ifp1, from, buf, FALSE);
8453                 BSstring(bsp, buf);
8454                 BSPutByte(bsp, '-');
8455                 IntFuzzPrint(ifp2, to, buf, TRUE);
8456                 BSstring(bsp, buf);
8457 
8458                 break;
8459             }
8460             case SEQLOC_PNT:    /* pnt */
8461                 lastid = SeqPointWriteEx((SeqPntPtr)(slp->data.ptrvalue),
8462                                            buf, lastid, sizeof(buf) - 1, use_best_id);
8463                 BSstring(bsp, buf);
8464                 break;
8465             case SEQLOC_PACKED_PNT:    /* packed pnt */
8466                 pspp = (PackSeqPntPtr)(slp->data.ptrvalue);
8467                 if (pspp != NULL)
8468                     BSstring(bsp, "PackSeqPnt");
8469                 break;
8470             default:
8471                 BSstring(bsp, "(\?\?)");
8472                 break;
8473         }
8474         slp = slp->next;
8475     }
8476     return lastid;
8477 }
8478 
8479 
8480 /*****************************************************************************
8481 *
8482 *   BSstring(bsp, str)
8483 *
8484 *****************************************************************************/
BSstring(ByteStorePtr bsp,CharPtr str)8485 static void BSstring(ByteStorePtr bsp, CharPtr str)
8486 {
8487     BSWrite(bsp, str, (Int4)(StringLen(str)));
8488     return;
8489 }
8490 
8491 /*****************************************************************************
8492 *
8493 *   SeqPointPrint(spp, buf, lastid)
8494 *
8495 *****************************************************************************/
SeqPointPrint(SeqPntPtr spp,CharPtr buf,SeqIdPtr lastid)8496 NLM_EXTERN SeqIdPtr SeqPointPrint(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid)
8497 {
8498     CharPtr tmp;
8499 
8500     if ((spp == NULL) || (buf == NULL)) return NULL;
8501 
8502     tmp = buf;
8503     *tmp = '\0';
8504     if (! SeqIdMatch(spp->id, lastid))
8505     {
8506         SeqIdPrint(spp->id, tmp, PRINTID_FASTA_SHORT);
8507         while (*tmp != '\0') tmp++;
8508         *tmp = ':';
8509         tmp++; *tmp = '\0';
8510     }
8511     if (strandsymbol[spp->strand])
8512     {
8513         *tmp = strandsymbol[spp->strand];
8514         tmp++; *tmp = '\0';
8515     }
8516     IntFuzzPrint(spp->fuzz, spp->point, tmp, TRUE);
8517 
8518     return spp->id;
8519 }
8520 
8521 static SeqIdPtr
SeqPointWriteEx(SeqPntPtr spp,CharPtr buf,SeqIdPtr lastid,Int2 buflen,Boolean use_best_id)8522 SeqPointWriteEx
8523 (SeqPntPtr spp,
8524  CharPtr buf,
8525  SeqIdPtr lastid,
8526  Int2 buflen,
8527  Boolean use_best_id)
8528 {
8529     CharPtr  tmp;
8530     SeqIdPtr best_id, tmp_next;
8531     BioseqPtr bsp;
8532     Int4      fuzzlen, id_len;
8533     Char      fuzzbuf[100];
8534 
8535     if ((spp == NULL) || (buf == NULL)) return NULL;
8536 
8537     tmp = buf;
8538     *tmp = '\0';
8539     if (buflen < 2) return NULL;
8540 
8541     best_id = spp->id;
8542     if (use_best_id)
8543     {
8544       bsp = BioseqFind (spp->id);
8545       if (bsp != NULL)
8546       {
8547         best_id = SeqIdFindBest (bsp->id, SEQID_GENBANK);
8548       }
8549     }
8550     tmp_next = best_id->next;
8551     best_id->next = NULL;
8552 
8553     IntFuzzPrint(spp->fuzz, spp->point, fuzzbuf, TRUE);
8554     fuzzlen = StringLen (fuzzbuf);
8555 
8556     if (! SeqIdMatch(best_id, lastid))
8557     {
8558         SeqIdWrite(best_id, tmp, PRINTID_FASTA_SHORT, buflen - 2);
8559         while (*tmp != '\0') tmp++;
8560         *tmp = ':';
8561         tmp++; *tmp = '\0';
8562     }
8563     if (strandsymbol[spp->strand])
8564     {
8565         *tmp = strandsymbol[spp->strand];
8566         tmp++; *tmp = '\0';
8567     }
8568 
8569     id_len = StringLen (buf);
8570     if (id_len < buflen - 1) {
8571       StringNCat (buf, fuzzbuf, buflen - id_len - 1);
8572       buf[buflen - 1] = 0;
8573     }
8574 
8575     best_id->next = tmp_next;
8576 
8577     return best_id;
8578 }
8579 
8580 /*****************************************************************************
8581 *
8582 *   SeqPointWrite(spp, buf, lastid, buflen)
8583 *
8584 *****************************************************************************/
SeqPointWrite(SeqPntPtr spp,CharPtr buf,SeqIdPtr lastid,Int2 buflen)8585 NLM_EXTERN SeqIdPtr SeqPointWrite(SeqPntPtr spp, CharPtr buf, SeqIdPtr lastid, Int2 buflen)
8586 {
8587   return SeqPointWriteEx (spp, buf, lastid, buflen, FALSE);
8588 }
8589 
8590 /*****************************************************************************
8591 *
8592 *   IntFuzzPrint(ifp, pos, buf, right)
8593 *
8594 *****************************************************************************/
IntFuzzPrint(IntFuzzPtr ifp,Int4 pos,CharPtr buf,Boolean right)8595 NLM_EXTERN void IntFuzzPrint(IntFuzzPtr ifp, Int4 pos, CharPtr buf, Boolean right)
8596 {
8597     Char lim=0;
8598     CharPtr tmp;
8599     Char tbuf[40];
8600 
8601     if (buf == NULL) return;
8602     pos++;  /* number from 1 */
8603     tmp = buf;
8604     *tmp = '\0';
8605     *tbuf = '\0';
8606     if (ifp != NULL)
8607     {
8608         switch (ifp->choice)
8609         {
8610             case 1:     /* plus minus */
8611                 sprintf(tbuf, "<+-%ld>", (long)ifp->a);
8612                 break;
8613             case 2:     /* range */
8614                 sprintf(tbuf, "<%ld.%ld>", (long)ifp->b, (long)ifp->a);
8615                 break;
8616             case 3:     /* percent */
8617                 sprintf(tbuf, "<%ld%%>", (long)ifp->a);
8618                 break;
8619             case 4:     /* limit */
8620                 switch (ifp->a)
8621                 {
8622                     case 0:    /* unknown */
8623                     case 255:  /* other */
8624                         sprintf(tbuf, "<?>");
8625                         break;
8626                     case 1:    /* gt */
8627                         lim = '>';
8628                         break;
8629                     case 2:    /* lt */
8630                         lim = '<';
8631                         break;
8632                     case 3:
8633                         lim = 'r';
8634                         break;
8635                     case 4:
8636                         lim = '^';
8637                         break;
8638                 }
8639                 break;
8640         }
8641     }
8642 
8643     if ((lim) && (lim != 'r'))
8644     {
8645         *tmp = lim;
8646         tmp++; *tmp = '\0';
8647         lim = 0;
8648     }
8649 
8650     if (right)
8651     {
8652         sprintf(tmp, "%ld", (long)pos);
8653         while (*tmp != '\0') tmp++;
8654     }
8655     if (lim == 'r')
8656     {
8657         *tmp = '^';
8658         tmp++;
8659         *tmp = '\0';
8660     }
8661     if (tbuf[0] != '\0')
8662     {
8663         tmp = StringMove(tmp, tbuf);
8664     }
8665     if (! right)
8666         sprintf(tmp, "%ld", (long)pos);
8667 
8668     return;
8669 
8670 }
8671 /*****************************************************************************
8672 *
8673 *   TaxNameFromCommon(common)
8674 *
8675 *****************************************************************************/
8676 typedef struct sturct_Nlm_taxcommon {
8677     char * common;
8678     char * taxname;
8679 } Nlm_TaxCommon, PNTR Nlm_TaxCommonPtr;
8680 
TaxNameFromCommon(CharPtr common)8681 NLM_EXTERN CharPtr TaxNameFromCommon (CharPtr common)
8682 {
8683     CharPtr taxname = NULL;
8684     CharPtr query = (CharPtr)MemNew(StringLen(common) + 2);
8685     int tax_try, dex;
8686 
8687     static Nlm_TaxCommon taxcommon[40] = {
8688     "Chinese hamsters", "Cricetulus sp."
8689     ,"Syrian golden hamsters", "Mesocricetus auratus"
8690     ,"Syrian hamsters", "Mesocricetus sp."
8691     ,"barley", "Hordeum sp."
8692     ,"carrots", "Daucus sp."
8693     ,"cats", "Felis sp."
8694     ,"cattles", "Bos sp."
8695     ,"chickens", "Gallus sp."
8696     ,"chimpanzees", "Pan sp."
8697     ,"chimpanzes", "Pan sp."
8698     ,"corn", "Zea sp."
8699     ,"cucumber", "Cucumis sativus"
8700     ,"dogs", "Canis sp."
8701     ,"goats", "Capra sp."
8702     ,"gorillas", "Gorilla sp."
8703     ,"guinea pigs", "Cavia sp."
8704     ,"hamsters", "Cricetidae gen. sp."
8705     ,"horses", "Equus sp."
8706     ,"humans", "Homo sapiens"
8707     ,"maize", "Zea sp."
8708     ,"mice", "Mus sp."
8709     ,"mouse", "Mus sp."
8710     ,"peas", "Pisum sp."
8711     ,"potatoes", "Solanum sp."
8712     ,"potato", "Solanum sp."
8713     ,"quails", "Phasianidae gen. sp."
8714     ,"rabbits", "Oryctolagus sp."
8715     ,"rats", "Rattus sp."
8716     ,"rices", "Oryza sp."
8717     ,"sheeps", "Ovis sp."
8718     ,"sorghums", "Sorghum sp."
8719     ,"soybeans", "Glycine sp."
8720     ,"spinach", "Spinacia sp."
8721     ,"swine", "Sus sp."
8722     ,"tobacco", "Nicotiania sp."
8723     ,"tomatoes", "Lycopersicon sp."
8724     ,"tomato", "Lycopersicon sp."
8725     ,"turkeys", "Meleagris sp."
8726     ,"wheat", "Triticum sp."
8727     ,"zebrafish", "Brachydanio sp."
8728 };
8729 
8730     if (common == NULL) return NULL;
8731 
8732     StringCpy(query,common);  /* space for 's' is at end */
8733     for (tax_try = 0; tax_try < 2; tax_try ++){
8734         for (dex = 0; dex < 40; dex ++ ){
8735             if (StringICmp(query,taxcommon[dex].common) == 0)
8736                 break;
8737         }
8738         if ( dex < 40)
8739             break;
8740         if (tax_try == 0)
8741             StringCat(query,"s");
8742     }
8743     MemFree (query);
8744     if (dex < 40)
8745         taxname = StringSave (taxcommon[dex].taxname);
8746 
8747     return taxname;
8748 }
8749 
8750 /*****************************************************************************
8751 *
8752 *   QualLocCreate(from, to)
8753 *       creates a UserObject of _class NCBI, type 1
8754 *       adds a field of type "qual_loc"
8755 *       puts the from and to numbers in
8756 *       no range check, no strand, no seqid
8757 *       this just carries locations for the qualifiers anticodon and rpt_unit
8758 *       Intended to go on SeqFeat.ext
8759 *
8760 *****************************************************************************/
QualLocCreate(Int4 from,Int4 to)8761 NLM_EXTERN UserObjectPtr QualLocCreate (Int4 from, Int4 to)
8762 {
8763     UserObjectPtr usop;
8764     UserFieldPtr ufp;
8765     ObjectIdPtr oip;
8766     Int4Ptr ints;
8767 
8768     usop = UserObjectNew();
8769     usop->_class = StringSave("NCBI");
8770     oip = ObjectIdNew();
8771     oip->id = 1;
8772     usop->type = oip;
8773 
8774     ufp = UserFieldNew();
8775         usop->data = ufp;
8776     oip = ObjectIdNew();
8777     oip->str = StringSave("qual_loc");
8778     ufp->label = oip;
8779     ufp->num = 2;
8780     ufp->choice = 8;   /* ints */
8781 
8782     ints = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * 2));
8783     ints[0] = from;
8784     ints[1] = to;
8785     ufp->data.ptrvalue = (Pointer)ints;
8786 
8787     return usop;
8788 
8789 }
8790 
8791 /*****************************************************************************
8792 *
8793 *   QualLocWrite(uop, buf)
8794 *       Checks a SeqFeat.ext to see if it is
8795 *           1) not null
8796 *           2) has a UserObject of _class NCBI, type 1
8797 *           3) has a field of label "qual_loc"
8798 *           4) if so, prints the two integers as a qualifier location
8799 *               from..to and returns a pointer to the \0 after "to"
8800 *       If any of the above fail, returns NULL
8801 *
8802 *****************************************************************************/
QualLocWrite(UserObjectPtr uop,CharPtr buf)8803 NLM_EXTERN CharPtr QualLocWrite(UserObjectPtr uop, CharPtr buf)
8804 {
8805     CharPtr tmp=NULL;
8806     UserFieldPtr ufp;
8807     Int4Ptr ints;
8808 
8809     if ((uop == NULL) || (buf == NULL))
8810         return tmp;
8811 
8812     if (StringCmp(uop->_class, "NCBI"))
8813         return tmp;
8814 
8815     if (uop->type->id != 1)
8816         return tmp;
8817 
8818     for (ufp = uop->data; ufp != NULL; ufp = ufp->next)
8819     {
8820         if (! StringCmp(ufp->label->str, "qual_loc"))
8821         {
8822             if (ufp->choice != 8)  /* not ints */
8823                 return NULL;
8824             if (ufp->num < 2)       /* not enough */
8825                 return NULL;
8826             ints = (Int4Ptr)(ufp->data.ptrvalue);
8827             if (ints == NULL)
8828                 return tmp;
8829             tmp = buf;
8830             sprintf(tmp, "%ld..%ld", (long)(ints[0]+1),
8831                                                           (long)(ints[1]+1));
8832             while (*tmp != '\0')
8833                 tmp++;
8834             return tmp;
8835         }
8836     }
8837 
8838     return tmp;
8839 }
8840 
8841 /*****************************************************************************
8842 *
8843 *   EntrezASN1Detected detects records retrieved from Entrez, which should
8844 *       not be edited by Sequin and replaced into ID.
8845 *
8846 *****************************************************************************/
8847 
EntrezAsnCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)8848 static void EntrezAsnCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
8849 
8850 {
8851   SeqDescrPtr  descr;
8852   SeqDescrPtr  sdp;
8853   BoolPtr      rsult;
8854   CharPtr      str;
8855 
8856   if (sep == NULL || sep->data.ptrvalue == NULL || mydata == NULL) return;
8857   rsult = (BoolPtr) mydata;
8858   descr = (IS_Bioseq (sep)) ?
8859          ((BioseqPtr) sep->data.ptrvalue)->descr :
8860          ((BioseqSetPtr) sep->data.ptrvalue)->descr;
8861   if (descr == NULL) return;
8862   sdp = NULL;
8863   while ((sdp = ValNodeFindNext (descr, sdp, Seq_descr_user)) != NULL) {
8864     if (sdp->data.ptrvalue != NULL) {
8865       str = ((UserObjectPtr) sdp->data.ptrvalue)->_class;
8866       if (StringCmp (str, "gbfix") == 0 || StringCmp (str, "pdbfix") == 0) {
8867         *rsult = TRUE;
8868       }
8869     }
8870   }
8871 }
8872 
EntrezASN1Detected(SeqEntryPtr sep)8873 NLM_EXTERN Boolean EntrezASN1Detected (SeqEntryPtr sep)
8874 
8875 {
8876   Boolean  rsult;
8877 
8878   rsult = FALSE;
8879   SeqEntryExplore (sep, (Pointer) &rsult, EntrezAsnCallback);
8880   return rsult;
8881 }
8882 
8883 /*****************************************************************************
8884 *
8885 *   SeqLocIntNew(Int4 from, Int4 to, Uint1 strand, SeqIdPtr sip)
8886 *      makes copy of incoming SeqId
8887 *
8888 *****************************************************************************/
SeqLocIntNew(Int4 from,Int4 to,Uint1 strand,SeqIdPtr sip)8889 NLM_EXTERN SeqLocPtr LIBCALL SeqLocIntNew (Int4 from, Int4 to, Uint1 strand, SeqIdPtr sip)
8890 {
8891     SeqIntPtr sintp;
8892     SeqLocPtr slp;
8893 
8894     if (sip == NULL) return NULL;
8895     sintp = SeqIntNew();
8896     sintp->id = SeqIdDup(sip);
8897     sintp->from = from;
8898     sintp->to = to;
8899     sintp->strand = strand;
8900 
8901     slp = ValNodeNew(NULL);
8902     slp->choice = SEQLOC_INT;
8903     slp->data.ptrvalue = (Pointer)sintp;
8904 
8905     return slp;
8906 }
8907 
8908 /*****************************************************************************
8909 *
8910 *   SeqLocPntNew(Int4 pos, Uint1 strand, SeqIdPtr sip, Boolean is_fuzz)
8911 *      makes copy of incoming SeqId
8912 *
8913 *****************************************************************************/
SeqLocPntNew(Int4 pos,Uint1 strand,SeqIdPtr sip,Boolean is_fuzz)8914 NLM_EXTERN SeqLocPtr LIBCALL SeqLocPntNew(Int4 pos, Uint1 strand, SeqIdPtr sip, Boolean is_fuzz)
8915 {
8916     SeqLocPtr slp;
8917     SeqPntPtr spp;
8918     IntFuzzPtr ifp;
8919 
8920     slp = ValNodeNew(NULL);
8921     slp->choice = SEQLOC_PNT;
8922     spp = SeqPntNew();
8923     spp->point = pos;
8924     spp->strand = strand;
8925     spp->id = SeqIdDup(sip);
8926     if(is_fuzz)
8927     {
8928         ifp = IntFuzzNew();
8929         ifp->choice = 4;
8930         ifp->a = 0;    /*unknown value*/
8931         spp->fuzz = ifp;
8932     }
8933     slp->data.ptrvalue = spp;
8934 
8935     return slp;
8936 
8937 }
8938 
FreeSeqLocSetComponents(SeqLocPtr list)8939 NLM_EXTERN void FreeSeqLocSetComponents (SeqLocPtr list)
8940 
8941 {
8942   BioseqPtr  bsp;
8943   SeqIdPtr   sip;
8944   SeqLocPtr  slp;
8945   Uint2      entityID;
8946 
8947   for (slp = list; slp != NULL; slp = slp->next) {
8948     sip = SeqLocId (slp);
8949     if (sip == NULL) continue;
8950     bsp = BioseqFind (sip);
8951     if (bsp == NULL) continue;
8952     entityID = ObjMgrGetEntityIDForPointer (bsp);
8953     if (entityID < 1) continue;
8954     ObjMgrFreeByEntityID (entityID);
8955   }
8956 }
8957 
8958 
8959 /* a "gather routine" which collects information about the coding region
8960    features */
gatherCodingRegions(GatherContextPtr gcp)8961 static Boolean gatherCodingRegions(GatherContextPtr gcp)
8962 {
8963     SpliceInfoPtr sip;
8964     SeqFeatPtr sfp;
8965     SeqLocPtr slp, current, next, protSlp, chain;
8966     Uint1 strand;
8967 
8968     if (gcp == NULL || gcp->thisitem == NULL) return TRUE;
8969 
8970     /* although we had to include higher-level types in GatherScope's
8971        "ignore" parameter so that Gather could "see" features, we're
8972        really only interested in features */
8973     if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
8974 
8975     sip = (SpliceInfoPtr) gcp->userdata;
8976 
8977     if (sip == NULL || sip->slp == NULL) return TRUE;
8978 
8979     sfp = (SeqFeatPtr) gcp->thisitem;
8980 
8981     /* we're only interested in coding regions */
8982     if (sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL) return TRUE;
8983 
8984     /* traverse all (but the last) intervals of the coding region feature */
8985     for (current = SeqLocFindNext(sfp->location, NULL);
8986          (next = SeqLocFindNext(sfp->location, current)) != NULL;
8987          current = next) {
8988         /* consider the last DNA base on the interval */
8989         strand = SeqLocStrand(current);
8990         slp = SeqLocPntNew(strand == Seq_strand_minus ?
8991              SeqLocStart(current) : SeqLocStop(current), strand,
8992              SeqLocId(current), FALSE);
8993         if (sip->findOnProtein)
8994         {
8995             /* find the corresponding location on the protein */
8996             protSlp = dnaLoc_to_aaLoc(sfp, slp, TRUE, NULL, FALSE);
8997 
8998             protSlp->next = NULL;
8999             SeqLocFree(slp);
9000 
9001             if (sip->slp->data.ptrvalue == NULL)
9002             {
9003                 sip->slp->data.ptrvalue = protSlp;
9004             } else {
9005                 for (chain = (SeqLocPtr) sip->slp->data.ptrvalue;
9006                  chain->next != NULL; chain = chain->next) {
9007                 }
9008                 chain->next = protSlp;
9009             }
9010         } else {
9011             if (sip->slp->data.ptrvalue == NULL)
9012             {
9013                 sip->slp->data.ptrvalue = slp;
9014             } else {
9015                 for (chain = (SeqLocPtr) sip->slp->data.ptrvalue;
9016                  chain->next != NULL; chain = chain->next) {
9017                 }
9018                 chain->next = slp;
9019             }
9020         }
9021     }
9022 
9023     return TRUE;
9024 }
9025 
9026 /*****************************************************************************
9027 *
9028 *   SeqLocPtr FindSpliceSites(SeqEntryPtr sep, Boolean findOnProtein)
9029 *      Finds the splice sites on this SeqEntry and returns them as a
9030 *      SeqLoc.
9031 *
9032 *****************************************************************************/
FindSpliceSites(SeqEntryPtr sep,Boolean findOnProtein)9033 NLM_EXTERN SeqLocPtr LIBCALL FindSpliceSites(SeqEntryPtr sep, Boolean findOnProtein)
9034 {
9035     SpliceInfo si;
9036     GatherScope gs;
9037     Int2 i;
9038     SeqLocPtr slp;
9039 
9040             MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
9041             gs.get_feats_location = FALSE;
9042             for (i = 0; i < OBJ_MAX; i++)
9043                 gs.ignore[i] = TRUE;
9044             gs.ignore[OBJ_SEQFEAT] = FALSE;
9045             gs.ignore[OBJ_SEQANNOT] = FALSE;
9046         slp = ValNodeNew(NULL);
9047         slp->choice = SEQLOC_EQUIV;
9048         slp->data.ptrvalue = NULL; /* to be filled in within Gather */
9049             si.slp = slp;
9050             si.findOnProtein = findOnProtein;
9051             GatherSeqEntry(sep, &si, gatherCodingRegions, &gs);
9052         if (slp->data.ptrvalue == NULL)
9053         {
9054         SeqLocFree(slp);
9055         return NULL;
9056         }
9057 
9058         return slp;
9059 }
9060 
9061 
9062 /* a "gather routine" which collects information about the coding region
9063    feature (based upon the assumption that there is only one) */
gatherTheCodingRegion(GatherContextPtr gcp)9064 static Boolean gatherTheCodingRegion(GatherContextPtr gcp)
9065 {
9066     SeqFeatPtr PNTR sffp;
9067     SeqFeatPtr sfp;
9068 
9069     if (gcp == NULL || gcp->thisitem == NULL) return TRUE;
9070 
9071     /* although we had to include higher-level types in GatherScope's
9072        "ignore" parameter so that Gather could "see" features, we're
9073        really only interested in features */
9074     if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
9075 
9076     sffp = (SeqFeatPtr PNTR) gcp->userdata;
9077 
9078     if (sffp == NULL) return FALSE;
9079 
9080     sfp = (SeqFeatPtr) gcp->thisitem;
9081 
9082     /* we're only interested in coding regions */
9083     if (sfp->data.choice != SEQFEAT_CDREGION || sfp->product == NULL) return TRUE;
9084 
9085     *sffp = sfp;
9086 
9087     return FALSE;
9088 }
9089 
9090 /*****************************************************************************
9091 *
9092 *   SeqFeatPtr FindCodingRegion(SeqEntryPtr sep)
9093 *      Finds the coding region feature on this protein SeqEntry and
9094 *      returns a copy of it.
9095 *
9096 *****************************************************************************/
FindCodingRegion(SeqEntryPtr sep)9097 NLM_EXTERN SeqFeatPtr LIBCALL FindCodingRegion(SeqEntryPtr sep)
9098 {
9099     SeqFeatPtr sfp = NULL;
9100     GatherScope gs;
9101     Int2 i;
9102 
9103             MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
9104             gs.get_feats_location = FALSE;
9105             for (i = 0; i < OBJ_MAX; i++)
9106                 gs.ignore[i] = TRUE;
9107             gs.ignore[OBJ_SEQFEAT] = FALSE;
9108             gs.ignore[OBJ_SEQANNOT] = FALSE;
9109             GatherSeqEntry(sep, &sfp, gatherTheCodingRegion, &gs);
9110         /* make a copy of it */
9111         if (sfp != NULL)
9112             sfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
9113         return sfp;
9114 }
9115 
gatherMolTypeCheck(GatherContextPtr gcp)9116 static Boolean gatherMolTypeCheck(GatherContextPtr gcp)
9117 {
9118     SeqIdCheckerPtr sicp;
9119     BioseqPtr bsp;
9120 
9121     if (gcp == NULL || gcp->thisitem == NULL) return TRUE;
9122 
9123     sicp = (SeqIdCheckerPtr) gcp->userdata;
9124     bsp = (BioseqPtr) gcp->thisitem;
9125 
9126     if (bsp == NULL) return TRUE;
9127     if (sicp == NULL) return FALSE;
9128 
9129     /* check for mol-type mismatch */
9130     if (ISA_na(bsp->mol) == sicp->isProtein) return TRUE;
9131 
9132     if (sicp->sip != NULL)
9133     {
9134         if (SeqIdComp(bsp->id, sicp->sip) != SIC_YES)
9135         return TRUE;
9136     }
9137 
9138     sicp->retval = TRUE;
9139 
9140     /* no need to examine other Bioseqs */
9141     return FALSE;
9142 }
9143 
9144 /*****************************************************************************
9145 *
9146 *   Boolean SeqEntryContainsSeqIdOfMolType(SeqEntryPtr sep, SeqIdPtr sip, Boolean isProtein)
9147 *      Tests to see if this SeqEntry contains a bioseq of the specified moltype
9148 *        (protein or DNA)
9149 *      if sip != NULL then it also insists upon finding a bioseq of the
9150 *        specified moltype where the SeqIds match
9151 *
9152 *****************************************************************************/
SeqEntryContainsSeqIdOfMolType(SeqEntryPtr sep,SeqIdPtr sip,Boolean isProtein)9153 NLM_EXTERN Boolean LIBCALL SeqEntryContainsSeqIdOfMolType(SeqEntryPtr sep, SeqIdPtr sip, Boolean isProtein)
9154 {
9155     SeqIdChecker sic;
9156     GatherScope gs;
9157     Int2 i;
9158 
9159     MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
9160     gs.get_feats_location = FALSE;
9161     for (i = 0; i < OBJ_MAX; i++)
9162         gs.ignore[i] = TRUE;
9163     gs.ignore[OBJ_BIOSEQ] = FALSE;
9164     sic.sip = sip;
9165     sic.isProtein = isProtein;
9166     sic.retval = FALSE;
9167     GatherSeqEntry(sep, &sic, gatherMolTypeCheck, &gs);
9168 
9169     return sic.retval;
9170 }
9171 
GIMolTypeCheck(GatherContextPtr gcp)9172 static Boolean GIMolTypeCheck(GatherContextPtr gcp)
9173 {
9174     SeqIdMolTypePtr sicp;
9175     BioseqPtr bsp;
9176 
9177     if (gcp == NULL || gcp->thisitem == NULL) return TRUE;
9178 
9179     sicp = (SeqIdMolTypePtr) gcp->userdata;
9180     bsp = (BioseqPtr) gcp->thisitem;
9181 
9182     if (bsp == NULL) return TRUE;
9183     if (sicp == NULL) return FALSE;
9184 
9185     if (sicp->sip != NULL) {
9186         if (SeqIdIn(sicp->sip, bsp->id) == FALSE)
9187         return TRUE;
9188     }
9189     if (ISA_na(bsp->mol)) {
9190         sicp->mtype = 2;
9191     } else {
9192         sicp->mtype = 1;
9193     }
9194 
9195     /* no need to examine other Bioseqs */
9196     return FALSE;
9197 }
9198 
9199 /*****************************************************************************
9200 *
9201 *      Tests to see if this SeqEntry contains a bioseq of the specified uid
9202 *      returns moltype of the bioseq where the SeqIds match
9203 *              0     id not found in this SeqEntry
9204 *              1     Amino Acid sequence
9205 *              2     Nucleotide sequence
9206 *
9207 *****************************************************************************/
MolTypeForGI(SeqEntryPtr sep,Int4 uid)9208 NLM_EXTERN Int2 LIBCALL MolTypeForGI(SeqEntryPtr sep, Int4 uid)
9209 {
9210     SeqIdMolType sic;
9211     SeqIdPtr sip;
9212     GatherScope gs;
9213 
9214     MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
9215     MemSet ((Pointer) (gs.ignore), (int)(TRUE),
9216             (size_t) (OBJ_MAX * sizeof(Boolean)));
9217     gs.get_feats_location = FALSE;
9218     gs.ignore[OBJ_BIOSEQ] = FALSE;
9219     sip = ValNodeNew(NULL);
9220     sip->choice = SEQID_GI;
9221     sip->data.intvalue = uid;
9222     sic.sip = sip;
9223     sic.mtype = 0;
9224     GatherSeqEntry(sep, &sic, GIMolTypeCheck, &gs);
9225     ValNodeFree(sip);
9226 
9227     return sic.mtype;
9228 }
9229 
9230 /******************************************************************
9231 ***
9232 *    local_id_make(): make SeqIdPtr with SEQID_LOCAL choice
9233 *
9234 *******************************************************************
9235 ***/
local_id_make(CharPtr name)9236 NLM_EXTERN SeqIdPtr local_id_make(CharPtr name)
9237 {
9238    SeqIdPtr new_id;
9239    ObjectIdPtr obj_id;
9240 
9241 
9242     new_id=(SeqIdPtr)ValNodeNew(NULL);
9243     new_id->choice = SEQID_LOCAL;
9244 
9245     obj_id = ObjectIdNew();
9246     obj_id->str = StringSave(name);
9247     new_id->data.ptrvalue = obj_id;
9248 
9249     return new_id;
9250 }
9251 
9252 
9253 
9254 /*************************************************************
9255 *
9256 *    MuskSeqIdWrite (sip, buf, buflen, format, do_find, do_entrez_find)
9257 *    print Seq-id to the buffer with the chromoscope format
9258 *    sip, buf, buflen, format is similar to what is defined in
9259 *    SeqIdWrite.
9260 *    do_find, if TRUE, find the most printable id
9261 *    do_entrez_find. if TRUE, if the id is a gi, find the printable id
9262 *
9263 ***************************************************************/
9264 
9265 /* a kludge function to make the special formatting for
9266    TIGRs THC sequences
9267 */
format_tigr_thc(SeqIdPtr sip,CharPtr buf,Int2 buflen)9268 static Boolean format_tigr_thc (SeqIdPtr sip, CharPtr buf, Int2 buflen)
9269 {
9270     DbtagPtr db_tag;
9271     Char temp[101];
9272     ObjectIdPtr oip;
9273 
9274     while(sip)
9275     {
9276         if(sip->choice == SEQID_GENERAL)
9277         {
9278             db_tag = (DbtagPtr) sip->data.ptrvalue;
9279             if(db_tag->db && StringCmp(db_tag->db, "THC") == 0)
9280             {
9281                 oip = db_tag->tag;
9282                 if(oip->id != 0)
9283                 {
9284                     sprintf(temp, "THC%ld", (long) oip->id);
9285                     StringNCpy(buf, temp, buflen);
9286                     return TRUE;
9287                 }
9288             }
9289         }
9290 
9291         sip = sip->next;
9292     }
9293 
9294     return FALSE;
9295 }
9296 
9297 
9298 
format_human_map_id(SeqIdPtr sip,CharPtr buf,Int2 buflen)9299 static Boolean format_human_map_id (SeqIdPtr sip, CharPtr buf, Int2 buflen)
9300 {
9301     DbtagPtr db_tag;
9302     Char temp[101];
9303     ObjectIdPtr oip;
9304 
9305     for (; sip; sip = sip->next) {
9306         if(sip->choice == SEQID_GENERAL) {
9307             break;
9308         }
9309     }
9310     if (sip == NULL) {
9311         return FALSE;
9312     }
9313     db_tag = (DbtagPtr) sip->data.ptrvalue;
9314     if (db_tag->db == NULL) {
9315         return FALSE;
9316     }
9317     if (StringCmp(db_tag->db, "MIT") == 0 || StringCmp(db_tag->db, "GENETHON") == 0 || StringCmp(db_tag->db, "CHLC") == 0 || StringCmp(db_tag->db, "GDB") == 0 || StringCmp(db_tag->db, "Stanford") == 0 || StringCmp(db_tag->db, "NCBI") == 0) {
9318         oip = db_tag->tag;
9319         if (oip->str != 0) {
9320             sprintf(temp, "%s", oip->str);
9321             StringNCpy(buf, temp, buflen);
9322             return TRUE;
9323         }
9324     }
9325 
9326     return FALSE;
9327 }
9328 
MuskSeqIdWrite(SeqIdPtr sip,CharPtr buf,Int2 buflen,Uint1 format,Boolean do_find,Boolean do_entrez_find)9329 NLM_EXTERN Boolean MuskSeqIdWrite (SeqIdPtr sip, CharPtr buf, Int2 buflen, Uint1 format, Boolean do_find, Boolean do_entrez_find)
9330 {
9331     SeqIdPtr entrez_id;
9332     Boolean retval;
9333     BioseqPtr bsp;
9334     Boolean bsp_found = FALSE;
9335 
9336     if(sip == NULL || buf == NULL)
9337         return FALSE;
9338 
9339     if(do_find)
9340     {
9341         if(sip->next == NULL)
9342         {
9343             bsp = BioseqFindCore(sip);
9344             if(bsp !=NULL)
9345             {
9346                 bsp_found = TRUE;
9347                 sip = bsp->id;
9348             }
9349         }
9350         sip = SeqIdFindWorst(sip);
9351     }
9352     if(sip->choice == SEQID_GI && do_entrez_find && !bsp_found)
9353     {
9354         entrez_id = GetSeqIdForGI(sip->data.intvalue);
9355         if(entrez_id !=NULL)
9356         {
9357             retval = MuskSeqIdWrite(entrez_id, buf, buflen, format, TRUE, FALSE);
9358             SeqIdSetFree(entrez_id);
9359             return retval;
9360         }
9361     }
9362 
9363     if(format_tigr_thc (sip, buf, buflen))
9364     {    /* give special format to the THC sequence*/
9365         return TRUE;
9366     }
9367     if(format_human_map_id (sip, buf, buflen))
9368     {    /* special format for human map ids */
9369         return TRUE;
9370     }
9371     SeqIdWrite(sip, buf,format, buflen);    /*in case this function can not work*/
9372 
9373     if(buf[0] == '\0')
9374         LabelCopy(buf, "Unidentified", buflen);
9375 
9376     return TRUE;
9377 }
9378 
9379 
9380 /***********************************************************************
9381 ***
9382 *    seqid_name(): return the most informative name from a SeqIdPtr
9383 *
9384 ************************************************************************
9385 ***/
seqid_name(SeqIdPtr hsip,CharPtr name,Boolean use_locus,Boolean check_chain)9386 NLM_EXTERN Boolean seqid_name(SeqIdPtr hsip, CharPtr name, Boolean use_locus, Boolean check_chain)
9387 {
9388     Uint1 format;
9389 
9390     if(use_locus)
9391         format = PRINTID_TEXTID_LOCUS;
9392     else
9393         format = PRINTID_TEXTID_ACCESSION;
9394     return MuskSeqIdWrite (hsip, name, 20, format, check_chain, check_chain);
9395 }
9396 
9397 /***********************************************************************
9398 ***
9399 *    update_seq_loc(): update the start, stop, strand info in SeqLoc
9400 *
9401 ************************************************************************
9402 ***/
update_seq_loc(Int4 start,Int4 stop,Uint1 strand,SeqLocPtr loc)9403 NLM_EXTERN SeqLocPtr update_seq_loc(Int4 start, Int4 stop, Uint1 strand, SeqLocPtr loc)
9404 {
9405    SeqIntPtr sint;
9406    SeqPntPtr spp;
9407 
9408     if(loc->choice == SEQLOC_INT)
9409     {
9410         sint = (SeqIntPtr) loc->data.ptrvalue;
9411         if(start != -1)
9412             sint->from = start;
9413         if(stop != -1)
9414             sint->to = stop;
9415         if(strand != 0)
9416             sint->strand = strand;
9417         loc->data.ptrvalue = sint;
9418     }
9419     else if(loc->choice == SEQLOC_PNT)
9420     {
9421         spp = (SeqPntPtr)(loc->data.ptrvalue);
9422         spp->point = start;
9423         spp->strand = strand;
9424         loc->data.ptrvalue = spp;
9425     }
9426 
9427     return loc;
9428 
9429 }
9430 
9431 
9432 /*
9433     Gets the SeqIdPtr for the subject or query sequence from the first SeqAlign.
9434     The SeqIdPtr is not saved  and should not be deleted.
9435 */
9436 static SeqIdPtr LIBCALL
TxGetIdFromSeqAlign(SeqAlignPtr seqalign,Boolean subject)9437 TxGetIdFromSeqAlign(SeqAlignPtr seqalign, Boolean subject)
9438 
9439 {
9440     DenseDiagPtr ddp;
9441     DenseSegPtr dsp;
9442     StdSegPtr ssp;
9443     SeqIdPtr sip;
9444 
9445     if (seqalign == NULL)
9446     return NULL;
9447 
9448     sip = NULL;
9449     switch (seqalign->segtype) {
9450     case 1: /*Dense-diag*/
9451         ddp = seqalign->segs;
9452         if (subject == TRUE)
9453             sip = ddp->id->next;
9454         else
9455             sip = ddp->id;
9456         break;
9457     case 2: /*Dense-seq */
9458         dsp = seqalign->segs;
9459         if (subject == TRUE)
9460             sip = dsp->ids->next;
9461         else
9462             sip = dsp->ids;
9463         break;
9464     case 3: /* Std-seg */
9465         ssp = seqalign->segs;
9466         if(ssp && ssp->loc && ssp->loc->next) {
9467             if (subject == TRUE)
9468                 sip = SeqLocId(ssp->loc->next);
9469             else
9470                 sip = SeqLocId(ssp->loc);
9471         }
9472         break;
9473     case 5: /* Discontinuous alignment */
9474 
9475         sip = TxGetIdFromSeqAlign(seqalign->segs, subject);
9476         break;
9477     default:
9478         break;
9479     }
9480 
9481     return sip;
9482 }
9483 
9484 /*
9485     Obtains the query (i.e., the first) SeqIdPtr from
9486     the first SeqAlignPtr.
9487 */
9488 NLM_EXTERN SeqIdPtr LIBCALL
TxGetQueryIdFromSeqAlign(SeqAlignPtr seqalign)9489 TxGetQueryIdFromSeqAlign(SeqAlignPtr seqalign)
9490 
9491 {
9492     return TxGetIdFromSeqAlign(seqalign, FALSE);
9493 }
9494 
9495 /*
9496     Obtains the subject (i.e., the second) SeqIdPtr from
9497     the first SeqAlignPtr.
9498 */
9499 NLM_EXTERN SeqIdPtr LIBCALL
TxGetSubjectIdFromSeqAlign(SeqAlignPtr seqalign)9500 TxGetSubjectIdFromSeqAlign(SeqAlignPtr seqalign)
9501 
9502 {
9503     return TxGetIdFromSeqAlign(seqalign, TRUE);
9504 }
9505 
9506 static Boolean
GetBestScoreAndEvalueFromScorePtr(ScorePtr sp,Int4 * score,Nlm_FloatHi * bit_score,Nlm_FloatHi * evalue,Int4 * number)9507 GetBestScoreAndEvalueFromScorePtr(ScorePtr sp, Int4 *score, Nlm_FloatHi *bit_score, Nlm_FloatHi *evalue, Int4 *number)
9508 
9509 {
9510     Boolean score_set=FALSE, evalue_set=FALSE, sum_set=FALSE, bit_set=FALSE;
9511     ObjectIdPtr obid;
9512     ScorePtr scrp;
9513 
9514     for (scrp=sp; scrp; scrp = scrp->next)
9515     {
9516         obid = scrp->id;
9517         if(obid && obid->str)
9518         {
9519             if (StringICmp(obid->str, "score") == 0)
9520             {
9521                 if (*score < scrp->value.intvalue)
9522                 {
9523                     score_set = TRUE;
9524                     *score = scrp->value.intvalue;
9525                 }
9526                 continue;
9527             }
9528             else if (StringICmp(obid->str, "e_value") == 0 || StringICmp(obid->str, "sum_e") == 0)
9529             {
9530                 if (*evalue > scrp->value.realvalue)
9531                 {
9532                     evalue_set = TRUE;
9533                     *evalue = scrp->value.realvalue;
9534                 }
9535                 continue;
9536             }
9537             else if (StringICmp(obid->str, "sum_n") == 0)
9538             {
9539                 if (*number < scrp->value.intvalue)
9540                 {
9541                     sum_set = TRUE;
9542                     *number = scrp->value.intvalue;
9543                 }
9544                 continue;
9545             }
9546             else if (StringICmp(obid->str, "bit_score") == 0)
9547             {
9548                 if (*bit_score < scrp->value.realvalue)
9549                 {
9550                     bit_set = TRUE;
9551                     *bit_score = scrp->value.realvalue;
9552                 }
9553                 continue;
9554             }
9555         }
9556     }
9557 
9558     /* Don't check for 'sum_set', as it's not always there. */
9559     if (score_set && evalue_set && bit_set)
9560         return TRUE;
9561     else if(score_set && evalue_set)
9562     {
9563         *bit_score = (FloatHi)(*score);
9564         return TRUE;
9565     }
9566 
9567     return FALSE;
9568 }
9569 
9570 
9571 NLM_EXTERN Boolean LIBCALL
GetScoreAndEvalue(SeqAlignPtr seqalign,Int4 * score,Nlm_FloatHi * bit_score,Nlm_FloatHi * evalue,Int4 * number)9572 GetScoreAndEvalue(SeqAlignPtr seqalign, Int4 *score, Nlm_FloatHi *bit_score, Nlm_FloatHi *evalue, Int4 *number)
9573 
9574 {
9575     Boolean local_retval, retval=FALSE;
9576         ScorePtr        sp;
9577         DenseDiagPtr ddp;
9578         DenseSegPtr dsp;
9579         StdSegPtr ssp;
9580 
9581     *score = 0;
9582     *bit_score = 0.0;
9583     *number = 1;
9584     *evalue = DBL_MAX;
9585 
9586     sp = seqalign->score;
9587     if (sp == NULL)
9588     {
9589         switch (seqalign->segtype)
9590         {
9591             case 1: /*Dense-diag*/
9592                         {
9593                                 Nlm_FloatHi best_evalue = *evalue;
9594                 ddp = seqalign->segs;
9595                 while (ddp)
9596                 {
9597                                    Int4 number_tmp = 1;
9598                                    local_retval =
9599                                       GetBestScoreAndEvalueFromScorePtr(ddp->scores, score,
9600                                          bit_score, evalue, &number_tmp);
9601                                    /* Use number corresponding to best evalue. */
9602                                    if (*evalue < best_evalue)
9603                                    {
9604                                       best_evalue = *evalue;
9605                                       *number = number_tmp;
9606                                    }
9607                                    if (local_retval == TRUE)
9608                                       retval = TRUE;
9609                                    ddp = ddp->next;
9610                 }
9611                 break;
9612                         }
9613             case 2:
9614                 dsp = seqalign->segs;
9615                 if (dsp)
9616                 {
9617                     retval = GetBestScoreAndEvalueFromScorePtr(dsp->scores, score, bit_score, evalue, number);
9618                 }
9619                 break;
9620             case 3:
9621                 ssp = seqalign->segs;
9622                 while (ssp)
9623                 {
9624                     local_retval = GetBestScoreAndEvalueFromScorePtr(ssp->scores, score, bit_score, evalue, number);
9625                     if (local_retval == TRUE)
9626                         retval = TRUE;
9627                     ssp = ssp->next;
9628                 }
9629                 break;
9630             default:
9631                 break;
9632         }
9633     }
9634     else
9635     {
9636         retval = GetBestScoreAndEvalueFromScorePtr(sp, score, bit_score, evalue, number);
9637     }
9638 
9639     return retval;
9640 }
9641 
9642 /***********************************************************************
9643 *
9644 *    Adjust the Offset in the SeqAlign to correspond to the beginning
9645 *    of the sequence and not where BLAST started.
9646 *
9647 **********************************************************************/
9648 
9649 NLM_EXTERN void LIBCALL
AdjustOffSetsInSeqAlign(SeqAlignPtr salp,SeqLocPtr slp1,SeqLocPtr slp2)9650 AdjustOffSetsInSeqAlign(SeqAlignPtr salp, SeqLocPtr slp1, SeqLocPtr slp2)
9651 
9652 {
9653     CharPtr err_string1, err_string2;
9654     DenseDiagPtr ddp;
9655     DenseSegPtr dsp;
9656     Int4 offset1=0, offset2=0, index;
9657     SeqIdPtr sip1=NULL, sip2=NULL;
9658     SeqIntPtr seq_int;
9659     SeqLocPtr seqloc, whole_slp;
9660     StdSegPtr ssp;
9661 
9662         while (salp)
9663         {
9664             if (salp->segtype == 1)
9665             {
9666                       ddp = salp->segs;
9667                       while (ddp)
9668                       {    /* Get the offset on the first call. */
9669             if (sip1 == NULL)
9670             {
9671                  sip1 = ddp->id;
9672                  whole_slp =
9673                 ValNodeAddPointer(NULL, SEQLOC_WHOLE, sip1);
9674                  if(SeqLocStrand(slp1) == Seq_strand_minus)
9675                      offset1 = GetOffsetInLoc(slp1, whole_slp, SEQLOC_STOP);
9676                  else
9677                      offset1 = GetOffsetInLoc(slp1, whole_slp, SEQLOC_START);
9678                  if (offset1 == -1)
9679                  {
9680                 err_string1 = SeqLocPrint(slp1);
9681                 err_string2 = SeqLocPrint(whole_slp);
9682                     ErrPostEx(SEV_ERROR, 0, 0, "AdjustOffSetInSeqAnnot: %s not in %s", err_string1, err_string2);
9683                  }
9684                  whole_slp = ValNodeFree(whole_slp);
9685             }
9686             if (sip2 == NULL && slp2)
9687             {
9688                  sip2 = ddp->id->next;
9689                  whole_slp =
9690                 ValNodeAddPointer(NULL, SEQLOC_WHOLE, sip2);
9691                  if(SeqLocStrand(slp2) == Seq_strand_minus)
9692                      offset2 = GetOffsetInLoc(slp2, whole_slp, SEQLOC_STOP);
9693                  else
9694                      offset2 = GetOffsetInLoc(slp2, whole_slp, SEQLOC_START);
9695                  if (offset2 == -1)
9696                  {
9697                 err_string1 = SeqLocPrint(slp2);
9698                 err_string2 = SeqLocPrint(whole_slp);
9699                     ErrPostEx(SEV_ERROR, 0, 0, "AdjustOffSetInSeqAnnot: %s not in %s", err_string1, err_string2);
9700                  }
9701                  whole_slp = ValNodeFree(whole_slp);
9702             }
9703             ddp->starts[0] += offset1;
9704             ddp->starts[1] += offset2;
9705                         ddp = ddp->next;
9706                       }
9707            }
9708        else if (salp->segtype == 2)
9709        {
9710         dsp = salp->segs;
9711         if (sip1 == NULL)
9712         {
9713              sip1 = dsp->ids;
9714              whole_slp =
9715             ValNodeAddPointer(NULL, SEQLOC_WHOLE, sip1);
9716              if(SeqLocStrand(slp1) == Seq_strand_minus)
9717                  offset1 = GetOffsetInLoc(slp1, whole_slp, SEQLOC_STOP);
9718              else
9719                  offset1 = GetOffsetInLoc(slp1, whole_slp, SEQLOC_START);
9720              if (offset1 == -1)
9721              {
9722             err_string1 = SeqLocPrint(slp1);
9723             err_string2 = SeqLocPrint(whole_slp);
9724                 ErrPostEx(SEV_ERROR, 0, 0, "AdjustOffSetInSeqAnnot: %s not in %s", err_string1, err_string2);
9725              }
9726              whole_slp = ValNodeFree(whole_slp);
9727         }
9728         if (sip2 == NULL && slp2)
9729         {
9730              sip2 = dsp->ids->next;
9731              whole_slp =
9732             ValNodeAddPointer(NULL, SEQLOC_WHOLE, sip2);
9733              offset2 =
9734             GetOffsetInLoc(slp2, whole_slp, SEQLOC_START);
9735              if(SeqLocStrand(slp2) == Seq_strand_minus)
9736                  offset2 = GetOffsetInLoc(slp2, whole_slp, SEQLOC_STOP);
9737              else
9738                  offset2 = GetOffsetInLoc(slp2, whole_slp, SEQLOC_START);
9739              if (offset2 == -1)
9740              {
9741             err_string1 = SeqLocPrint(slp2);
9742             err_string2 = SeqLocPrint(whole_slp);
9743                 ErrPostEx(SEV_ERROR, 0, 0, "AdjustOffSetInSeqAnnot: %s not in %s", err_string1, err_string2);
9744              }
9745              whole_slp = ValNodeFree(whole_slp);
9746         }
9747 
9748         for (index=0; index<dsp->numseg; index++)
9749         {
9750             if (dsp->starts[2*index] != -1)
9751                 dsp->starts[2*index] += offset1;
9752             if (dsp->starts[2*index+1] != -1)
9753                 dsp->starts[2*index+1] += offset2;
9754         }
9755            }
9756        else if (salp->segtype == 3)
9757        {
9758         ssp = salp->segs;
9759         while (ssp)
9760         {
9761             if (sip1 == NULL)
9762             {
9763                  sip1 = ssp->ids;
9764                  whole_slp =
9765                 ValNodeAddPointer(NULL, SEQLOC_WHOLE, sip1);
9766                              if(SeqLocStrand(slp1) == Seq_strand_minus)
9767                     offset1 = GetOffsetInLoc(slp1, whole_slp,
9768                                                          SEQLOC_STOP);
9769                              else
9770                     offset1 = GetOffsetInLoc(slp1, whole_slp,
9771                                                          SEQLOC_START);
9772 
9773                  if (offset1 == -1)
9774                  {
9775                 err_string1 = SeqLocPrint(slp1);
9776                 err_string2 = SeqLocPrint(whole_slp);
9777                     ErrPostEx(SEV_ERROR, 0, 0, "AdjustOffSetInSeqAnnot: %s not in %s", err_string1, err_string2);
9778                  }
9779                  whole_slp = ValNodeFree(whole_slp);
9780             }
9781             if (sip2 == NULL && slp2)
9782             {
9783                  sip2 = ssp->ids->next;
9784                  whole_slp =
9785                 ValNodeAddPointer(NULL, SEQLOC_WHOLE, sip2);
9786                              if(SeqLocStrand(slp2) == Seq_strand_minus)
9787                     offset2 = GetOffsetInLoc(slp2, whole_slp,
9788                                                          SEQLOC_STOP);
9789                              else
9790                     offset2 = GetOffsetInLoc(slp2, whole_slp,
9791                                                          SEQLOC_START);
9792 
9793                  if (offset2 == -1)
9794                  {
9795                 err_string1 = SeqLocPrint(slp2);
9796                 err_string2 = SeqLocPrint(whole_slp);
9797                     ErrPostEx(SEV_ERROR, 0, 0, "AdjustOffSetInSeqAnnot: %s not in %s", err_string1, err_string2);
9798                  }
9799                  whole_slp = ValNodeFree(whole_slp);
9800             }
9801             seqloc = ssp->loc;
9802                         if (seqloc->choice == SEQLOC_INT) {
9803                 seq_int = seqloc->data.ptrvalue;
9804                 seq_int->from += offset1;
9805                 seq_int->to += offset1;
9806                         }
9807             seqloc = ssp->loc->next;
9808                         if (seqloc->choice == SEQLOC_INT) {
9809                 seq_int = seqloc->data.ptrvalue;
9810                 seq_int->from += offset2;
9811                 seq_int->to += offset2;
9812                         }
9813                         ssp = ssp->next;
9814          }
9815               }
9816               salp = salp->next;
9817          }
9818 }
9819 
9820 
9821 /*****************************************************************************
9822 *
9823 *   Boolean SeqIdOrderInList(a, b)
9824 *     Looks for single SeqId, "a" in chain of SeqIds, "b"
9825 *     returns the position (>0) if found.. else returns 0;
9826 *
9827 *****************************************************************************/
9828 
SeqIdOrderInList(SeqIdPtr a,SeqIdPtr list)9829 NLM_EXTERN Uint4 LIBCALL SeqIdOrderInList (SeqIdPtr a, SeqIdPtr list) {
9830         SeqIdPtr now;
9831         Uint4 order;
9832         Uint1 retval;
9833 
9834         if (a == NULL)
9835             return 0;
9836 
9837         for (now =list,order=1; now != NULL; now = now -> next,order++)
9838         {
9839             retval = SeqIdComp(a, now);
9840             if(retval==SIC_YES)
9841                 return order;
9842         }
9843         return 0;
9844 }
9845 
9846 /*****************************************************************************
9847 *
9848 *   Boolean SeqIdOrderInBioseqIdList(a, b)
9849 *     Looks for single SeqId, "a" in chain of SeqIds, "b"
9850 *              and looks at all synonymous SeqIds of the Bioseq "b"
9851 *     returns the position (>0) if found.. else returns 0;
9852 *
9853 *****************************************************************************/
9854 
SeqIdOrderInBioseqIdList(SeqIdPtr a,SeqIdPtr list)9855 NLM_EXTERN Uint4 LIBCALL SeqIdOrderInBioseqIdList (SeqIdPtr a, SeqIdPtr list) {
9856         SeqIdPtr now;
9857         Uint4 order;
9858 
9859         if (a == NULL)
9860             return 0;
9861 
9862         for (now =list,order=1; now != NULL; now = now -> next,order++)
9863         {
9864             if(SeqIdForSameBioseq(a, now))
9865                 return order;
9866         }
9867         return 0;
9868 }
9869 
9870 /* Function to extract the Accession and version number from
9871    a \usedin GBQual string.
9872    (works for a plain Accession Number with version too)
9873    User must provide string buffers for answer.
9874    and make sure that last Character of Accession is not a ')'
9875    using a statement like
9876    if ((ptr = StringChr (accession, ')')) != NULL) *ptr = '\0';
9877    and user must StringTok the GBQual for ',' and repeatedly call this.
9878    */
9879 
ExtractAccession(CharPtr accn,CharPtr accession,CharPtr version)9880 NLM_EXTERN void LIBCALL ExtractAccession(CharPtr accn,CharPtr accession,CharPtr version) {
9881     CharPtr verptr;
9882     if(accn!=NULL) {
9883         if (*accn == '(') {
9884                 accn++;
9885         }
9886         verptr = StrChr(accn,'.');
9887         if(verptr==NULL) {
9888             if(version!=NULL)
9889                 version[0]='\0';
9890             if(accession!=NULL) {
9891                 StringCpy(accession,accn);
9892             }
9893         } else {
9894             Int4 len;
9895             if(version!=NULL)
9896                 StringCpy(version,verptr+1);
9897             len = verptr-accn;
9898             if(accession!=NULL) {
9899                 StringNCpy(accession,accn,len);
9900                 accession[len]=NULLB;
9901             }
9902         }
9903     } else {
9904         if(accession)
9905             accession[0]=NULLB;
9906         if(version)
9907             version[0]=NULLB;
9908     }
9909 }
9910 
9911 
9912 /*
9913   Hugues Sicotte:
9914   Function to make a proper type SeqId given a string that represents
9915   an accession Number.
9916   If version number is unknown, set version=0 for latest.
9917   name is ignored because it is not always consistently used in databases.
9918   User may need to Call ExtractAccession to parse out accession and version.
9919 
9920   *** WARNING *** In the non-network mode, this function depends on hardcoded
9921   accession prefix list to guess at the right prefix type.
9922 
9923   There is an inherent conflict in name space between pir proteins and
9924   nucleotide genbank accessions  (or swissprot. )
9925         There is a VERY low probability of conflict between pir and swissprot..
9926         .. so this codes ignores it. (no known cases).
9927   so Refseq, swissprot proteins and non-swissprot proteins have an independent name space.
9928 
9929   - some PIR names(locus-name looking) have no conflicts
9930              ([A-Z][0-9,A-Z]{3,5}) with nucleotide accession.
9931   - some PIR accessions have conflicts with 1+5 nucleotide accession, but the
9932           2+5 nucleotide accession have no conflict with pir.
9933 
9934   The Boolean flag AllowPIR: If TRUE, allows that accessions may be PIR.
9935   The Boolean flag Permissive,
9936              if FALSE,
9937                  - completely ignores PIR accessions,
9938                  - doesn't guess at unnassigned accessions prefix.
9939                     (even if they look like accession)
9940                  - the network will NOT be used.
9941              if TRUE
9942                  - allows unassigned accessions (as long as they fit the
9943                      accession patterns)
9944                  - allows for PIR accessions if AllowPIR==TRUE;
9945                  - allow for Network Access if UseNetwork==TRUE to resolve conflicts.
9946                  - if UseNetwork == FALSE, uses the boolean flag FavorNucleotide
9947                           to resolve conflicts.
9948 
9949   The Boolean flag FavorNucleotide chooses to believe that the conflicts are best
9950   resolved by believing that the sequence is a nucleotide (unless UseNetwork is set).
9951 
9952   The Boolean flag UseNetwork supersedes FavorNucleotide, and uses the network to
9953   resolve conflict and for 'unknown' or 'unnassigned' accessions.
9954 
9955   ***  .. Assumes that any new accession type is of nucleotide type
9956              (in permissive mode)
9957 
9958   ***  Using UseNetwork will not prevent unknown (even not in database)
9959             from resulting in a valid seqid.
9960 
9961 */
SeqIdFromAccession(CharPtr accession,Uint4 version,CharPtr name)9962 NLM_EXTERN  SeqIdPtr LIBCALL SeqIdFromAccession(CharPtr accession, Uint4 version,CharPtr name) {
9963     Boolean Permissive = TRUE;
9964     Boolean UseNetwork = FALSE;
9965     Boolean FavorNucleotide = TRUE;
9966     Boolean AllowPIR = FALSE;
9967     return SeqIdFromAccessionEx(accession,version,name,Permissive, AllowPIR,UseNetwork,FavorNucleotide);
9968 }
9969 
9970 
SeqIdFromAccessionEx(CharPtr accession,Uint4 version,CharPtr name,Boolean Permissive,Boolean AllowPIR,Boolean UseNetwork,Boolean FavorNucleotide)9971 NLM_EXTERN  SeqIdPtr LIBCALL SeqIdFromAccessionEx(CharPtr accession, Uint4 version,CharPtr name,Boolean Permissive, Boolean AllowPIR,Boolean UseNetwork,Boolean FavorNucleotide) {
9972     SeqIdPtr sip;
9973     BioseqPtr bsp=NULL;
9974     TextSeqIdPtr tsp;
9975     Uint4 status;
9976     if(accession==NULL || accession[0]=='\0' || accession[0]=='\n' || accession[0]=='\r')
9977         return NULL;
9978     sip=NULL;
9979     status = WHICH_db_accession(accession);
9980     if(!(ACCN_IS_UNKNOWN(status))) {
9981         Boolean formally_assigned;
9982         formally_assigned = !(ACCN_IS_UNASSIGNED(status));
9983         if(formally_assigned || Permissive) {
9984             /* new support for PDB */
9985             if (status == ACCN_PDB) {
9986                 Char pdbstr [41];
9987                 if (StringLen (accession) < 8) {
9988                     sprintf (pdbstr, "pdb|%s", accession);
9989                     sip = SeqIdParse (pdbstr);
9990                     return sip;
9991                 }
9992                 return NULL;
9993             }
9994             sip = ValNodeNew(NULL);
9995             tsp = TextSeqIdNew();
9996             tsp->accession = StringSave(accession);
9997             sip->data.ptrvalue = tsp;
9998             tsp->name = NULL;
9999             tsp->version = version;
10000             if(ACCN_IS_REFSEQ(status)) {
10001                 sip->choice = SEQID_OTHER;
10002             } else if(ACCN_IS_SWISSPROT(status)) {
10003                 sip->choice = SEQID_SWISSPROT;
10004             } else {
10005                 Boolean PIR=FALSE;
10006                 if(Permissive && AllowPIR) {
10007                     /* In this loop.. can only be PIR of type 1+5 accession */
10008                     if( ACCN_PIR_FORMAT(accession) && ((!FavorNucleotide) || UseNetwork)) {
10009                         if(UseNetwork) {
10010                             sip->choice = SEQID_GENBANK;
10011                             bsp = BioseqLockById(sip);
10012                             if(bsp) {
10013                                 if(bsp->mol==Seq_mol_aa)
10014                                     PIR=TRUE;
10015                                 BioseqUnlock(bsp);
10016                             } else {
10017                                 sip->choice = SEQID_PIR;
10018                                 bsp = BioseqLockById(sip);
10019                                 if(bsp) {
10020                                     if(bsp->mol==Seq_mol_aa)
10021                                         PIR=TRUE;
10022                                     BioseqUnlock(bsp);
10023                                 } else if (!FavorNucleotide) {
10024                                     PIR = TRUE;
10025                                 }
10026                             }
10027                         } else if(!FavorNucleotide) {
10028                             PIR = TRUE;
10029                         }
10030                     }
10031                 }
10032                 if(PIR) {
10033                     sip->choice = SEQID_PIR;
10034                 } else {
10035                     if(ACCN_IS_GENBANK(status)) {
10036                         sip->choice = SEQID_GENBANK;
10037                     } else if (ACCN_IS_EMBL(status)) {
10038                         sip->choice = SEQID_EMBL;
10039                     } else if (ACCN_IS_DDBJ(status)) {
10040                         sip->choice = SEQID_DDBJ;
10041                     } else if (ACCN_IS_TPA(status)) {
10042                         if (status == ACCN_NCBI_TPA || status == ACCN_NCBI_TPA_PROT) {
10043                             sip->choice = SEQID_TPG;
10044                         } else if (status == ACCN_EMBL_TPA || status == ACCN_EMBL_TPA_PROT) {
10045                             sip->choice = SEQID_TPE;
10046                         } else if (status == ACCN_DDBJ_TPA || status == ACCN_DDBJ_TPA_PROT) {
10047                             sip->choice = SEQID_TPD;
10048                         } else { /* default TPA */
10049                             sip->choice = SEQID_TPG;
10050                         }
10051                     } else /* default */
10052                         sip->choice = SEQID_GENBANK;
10053                 }
10054             }
10055         }
10056     } else if(Permissive) {
10057         /* can only be a locus name type accession.
10058            (i.e. an arbitrary string .. or a completely
10059            new type/format of accession)
10060            .. any 1+5, 2+6 3+5 refseq accession.. are
10061            handled above.
10062         */
10063         Boolean PIR = FALSE;
10064         sip = ValNodeNew(NULL);
10065         tsp = TextSeqIdNew();
10066         tsp->accession = StringSave(accession);
10067         sip->data.ptrvalue = tsp;
10068         tsp->name = NULL;
10069         tsp->version = version;
10070         sip->choice = SEQID_GENBANK; /* default */
10071         if(AllowPIR && ACCN_PIR_FORMAT(accession) && ( UseNetwork || !FavorNucleotide )) {
10072             if(UseNetwork) { /* Only if user application has
10073                                         allowed ID1 bioseq Fetching
10074                                         ID1Init();ID1BioseqFetchEnable("prog",TRUE);
10075                                      */
10076                 bsp = BioseqLockById(sip);
10077                 if(bsp) {
10078                     if(bsp->mol==Seq_mol_aa) {
10079                         SeqIdPtr sip2;
10080                         ErrPostEx(SEV_WARNING,0,0,"%s Should NOT be a protein but IS\n",accession);
10081                         sip2 = SeqIdFindBestAccession(bsp->id);
10082                         sip->choice = sip2->choice;
10083                         /* when fetching .. allow for the possibility
10084                            of new protein prefix of non PIR type */
10085                         if(sip->choice == SEQID_PIR) {
10086                             tsp->name = tsp->accession;
10087                             tsp->accession = NULL;
10088                             PIR=TRUE;
10089                         }
10090                     } else {
10091                         SeqIdPtr sip2;
10092                         sip2 = SeqIdFindBestAccession(bsp->id);
10093                         sip->choice = sip2->choice;
10094                         if(StringCmp(accession,((TextSeqIdPtr)(sip2->data.ptrvalue))->name)==0) {
10095                             /*
10096                                --> "accession" is the LOCUS
10097                             */
10098                             tsp->name = tsp->accession;
10099                             tsp->accession = NULL;
10100                         } /* else  unknown(not hardcoded) accession type (not locus)
10101                            */
10102                     }
10103                 } else {
10104                     sip->choice = SEQID_PIR;
10105                     tsp->name = tsp->accession;
10106                     tsp->accession = NULL;
10107                     bsp = BioseqLockById(sip);
10108                     if(bsp) {
10109                         if(bsp->mol==Seq_mol_aa) {
10110                             SeqIdPtr sip2;
10111                             sip2 = SeqIdFindBestAccession(bsp->id);
10112                             if(sip->choice != SEQID_PIR) {
10113                                 ErrPostEx(SEV_WARNING,0,0,"PIR SeqId retrieve non-PIR sequence!\n");
10114                             } else
10115                                 PIR=TRUE;
10116                         } else {
10117                             ErrPostEx(SEV_WARNING,0,0,"PIR SeqId retrieve non-amino-acid sequence!\n");
10118                         }
10119                     }
10120                     if(!PIR) {
10121                         /* revert to original accession <-> name  order
10122                          */
10123                         tsp->accession = tsp->name;
10124                         tsp->name = NULL;
10125                     }
10126                 }
10127                 if(!bsp) { /* No network was available .
10128                               or SeqIdFetch failed */
10129                     if(!FavorNucleotide) {
10130                         PIR = TRUE;
10131                     }
10132                     if(PIR) {
10133                         sip->choice = SEQID_PIR;
10134                         tsp->name = tsp->accession;
10135                         tsp->accession = NULL;
10136                     } else { /* LOCUS NAME  SeqId */
10137                         sip->choice = SEQID_GENBANK;
10138                         tsp->name = tsp->accession;
10139                         tsp->accession = NULL;
10140                     }
10141                 } else
10142                     BioseqUnlock(bsp);
10143             } else { /* !UseNetwork */
10144                 if(!FavorNucleotide && !UseNetwork) {
10145                     PIR = TRUE;
10146                 } else if(FavorNucleotide && !UseNetwork) {
10147                     PIR = FALSE; /* XXX Should never be called */
10148                 }
10149                 if(PIR) {
10150                     sip->choice = SEQID_PIR;
10151                     tsp->name = tsp->accession;
10152                     tsp->accession = NULL;
10153                 } else { /* LOCUS NAME  SeqId */
10154                     sip->choice = SEQID_GENBANK;
10155                     tsp->name = tsp->accession;
10156                     tsp->accession = NULL;
10157                 }
10158             }
10159         } else {
10160             /* Permissive .. but
10161                FavorNucleotide && !UseNetwork  OR
10162                it doesn't look like a PIR  (so it will be assumed it's a NUC                .. Independent of FavorNucleotide is.)
10163             */
10164             if(UseNetwork) {
10165                 /*
10166                   Use network to decide if it is genbank, embl or ddbj
10167                 */
10168                 sip->choice = SEQID_GENBANK;
10169                 bsp = BioseqLockById(sip);
10170                 if(bsp) {
10171                         SeqIdPtr sip2;
10172                         sip2 = SeqIdFindBestAccession(bsp->id);
10173                         sip->choice = sip2->choice;
10174                         if(StringCmp(accession,((TextSeqIdPtr)(sip2->data.ptrvalue))->name)==0) {
10175                             /*
10176                                --> "accession" is the LOCUS
10177                             */
10178                             tsp->name = tsp->accession;
10179                             tsp->accession = NULL;
10180                         }
10181                         BioseqUnlock(bsp);
10182                 } /* .. if not found .. Make it anyways */
10183             }
10184         }
10185     }
10186     return sip;
10187 }
10188 
10189 
10190 /* Variant of SeqIdFromAccession that works on accession.version string (JK) */
10191 
SeqIdFromAccessionDotVersion(CharPtr accession)10192 NLM_EXTERN SeqIdPtr SeqIdFromAccessionDotVersion (CharPtr accession)
10193 
10194 {
10195   Char      accn [41];
10196   CharPtr   ptr;
10197   long int  ver = INT2_MIN;
10198 
10199   StringNCpy_0 (accn, accession, sizeof (accn));
10200   ptr = StringChr (accn, '.');
10201   if (ptr != NULL) {
10202     *ptr = '\0';
10203     ptr++;
10204     if (sscanf (ptr, "%ld", &ver) != 1) {
10205       ver = INT2_MIN;
10206     }
10207   }
10208   return SeqIdFromAccession (accn, (Uint4) ver, NULL);
10209 }
10210 
10211 
10212 /* N* GSDB accession numbers were made secondary to
10213    genbank or embl or ddbj or genbank records
10214    .. but some of these N numbers had already been assigned by
10215    embl OR ddbj OR genbank.
10216 
10217    The net result is that N numbers can belong to either 3 databases,
10218    and the same N-numbers can point to two completely different sequences.
10219    .. One which was an N* from GSDB, the other one from one of the
10220    major databases.
10221 
10222    status as of 12/2000 :  using the [ACCN] field in Entrez.
10223    Maintenance by H. Sicotte and M. Cavanaug
10224 
10225 */
10226 static CharPtr gb_N_numbers = "00008/00013/00018/00019/00027/00041/00046/00048/00052/00054/18624/";
10227 static CharPtr embl_N_numbers = "00060/00064/";
10228 static CharPtr ddbj_N_numbers = "00028/00035/00037/00053/00061/00062/00063/00065/00066/00067/00068/00069/00078/00079/00083/00088/00090/00091/00092/00093/00094/";
10229 static CharPtr embl_ddbj_N_numbers = "00070/";
10230 static CharPtr embl_gb_N_numbers = "00001/00002/00011/00057/";
10231 static CharPtr embl_gb_ddbj_N_numbers = "00005/00009/00012/00020/00022/00025/00058/";
10232 /* No N_* accession assigned for these and N00095 .. N0****
10233    .. and only N18624 assigned in the N1**** range.
10234    N2*..N9* are genbank EST's.
10235    .. all other numbers (below 0have been assigned to BOTH ddbj and genbank.
10236 
10237  */
10238 static CharPtr nonexistant_N_numbers = "00071/00072/00073/00074/00075/00076/00077/00080/00081/00082/00084/00085/00086/00087/00089/00095/";
10239 
10240 /*    N00004 was replaced by another ID () which was withdrawn
10241  */
10242 static CharPtr gb_ddbj_N_numbers = "00003/00004/00006/00007/00010/00014/00015/00016/00017/00021/00023/00024/00026/00029/00030/00031/00032/00033/00034/00036/00038/00039/00040/00042/00043/00044/00045/00047/00049/00050/00051/00055/00056/00059/";
10243 
10244 
N_accession(CharPtr s)10245 static Uint4 LIBCALL N_accession (CharPtr s) {
10246     Uint4 retcode=ACCN_UNKNOWN;
10247     if(s && (*s=='N' || *s == 'n')) {
10248         Int4 id;
10249         id = atoi(s+1);
10250         if(id>20000) {
10251             retcode = ACCN_NCBI_EST;
10252         } else {
10253             if(id==0 || (id>=95 && id !=18624))
10254                 retcode = ACCN_UNKNOWN;
10255             else if(StringStr(embl_N_numbers,s+1)!=NULL)
10256                 retcode = ACCN_EMBL_OTHER;
10257             else if (StringStr(ddbj_N_numbers,s+1)!=NULL)
10258                 retcode = ACCN_DDBJ_OTHER;
10259             else if (StringStr(gb_N_numbers,s+1)!=NULL)
10260                 retcode = ACCN_NCBI_OTHER;
10261             else if (StringStr(nonexistant_N_numbers,s+1)!=NULL)
10262                 retcode = ACCN_UNKNOWN;
10263             else if (StringStr(embl_gb_N_numbers,s+1)!=NULL)
10264                 retcode = ACCN_EMBL_GB;
10265             else if (StringStr(embl_ddbj_N_numbers,s+1)!=NULL)
10266                 retcode = ACCN_EMBL_DDBJ;
10267             else if (StringStr(gb_ddbj_N_numbers,s+1)!=NULL)
10268                 retcode = ACCN_GB_DDBJ;
10269             else if (StringStr(embl_gb_ddbj_N_numbers,s+1)!=NULL)
10270                 retcode = ACCN_EMBL_GB_DDBJ;
10271             else {
10272                 ErrPostEx(SEV_WARNING,0,0,"sequtil::N_accession: Missing N-accession, not accounted for: %s\n",s);
10273                 retcode = ACCN_UNKNOWN;
10274             }
10275         }
10276     } else {
10277         ErrPostEx(SEV_WARNING,0,0,"sequtil::N_accession: Function called with non-N accession: %s\n",s == NULL ? "NULL Accession" : s);
10278         retcode = ACCN_UNKNOWN;
10279 
10280     }
10281     return retcode;
10282 }
10283 
10284 
10285 /*
10286   functions N_ACCN_IS_GENBANK()
10287   take an N-accession number and returns TRUE if
10288   it from the proper database.
10289   Take into account that N-accession can belong to many databases.
10290 */
10291 
NAccnIsGENBANK(CharPtr s)10292 NLM_EXTERN Boolean LIBCALL NAccnIsGENBANK (CharPtr s) {
10293     Boolean retstatus;
10294     Int4 id;
10295     id = atoi(s+1);
10296     if(*s != 'n' || *s != 'N')
10297         return FALSE;
10298     if(id == 0) {
10299         retstatus = FALSE;
10300     } else if(id>=20000) {
10301         retstatus = TRUE;
10302     } else {
10303         if(StringStr(gb_N_numbers,s+1)!=NULL
10304            || StringStr(embl_gb_N_numbers,s+1)!=NULL
10305            || StringStr(embl_gb_ddbj_N_numbers,s+1)!=NULL
10306            || StringStr(gb_ddbj_N_numbers,s+1)!=NULL)
10307             retstatus = TRUE;
10308         else
10309             retstatus = FALSE;
10310     }
10311     return retstatus;
10312 }
10313 
NAccnIsEMBL(CharPtr s)10314 NLM_EXTERN Boolean LIBCALL NAccnIsEMBL (CharPtr s) {
10315     Boolean retstatus;
10316     Int4 id;
10317     id = atoi(s+1);
10318     if(*s != 'n' || *s != 'N')
10319         return FALSE;
10320     if(id == 0 || id>20000) {
10321         retstatus = FALSE;
10322     } else {
10323         if(StringStr(embl_N_numbers,s+1)!=NULL
10324            || StringStr(embl_gb_N_numbers,s+1)!=NULL
10325            || StringStr(embl_ddbj_N_numbers,s+1)!=NULL
10326            || StringStr(embl_gb_ddbj_N_numbers,s+1)!=NULL)
10327             retstatus = TRUE;
10328         else
10329             retstatus = FALSE;
10330     }
10331     return retstatus;
10332 }
10333 
NAccnIsDDBJ(CharPtr s)10334 NLM_EXTERN Boolean LIBCALL NAccnIsDDBJ (CharPtr s) {
10335     Boolean retstatus;
10336     Int4 id;
10337     id = atoi(s+1);
10338     if(*s != 'n' || *s != 'N')
10339         return FALSE;
10340     if(id == 0 || id>20000) {
10341         retstatus = FALSE;
10342     } else {
10343         if(StringStr(ddbj_N_numbers,s+1)!=NULL
10344            || StringStr(embl_ddbj_N_numbers,s+1)!=NULL
10345            || StringStr(embl_gb_ddbj_N_numbers,s+1)!=NULL
10346            || StringStr(gb_ddbj_N_numbers,s+1)!=NULL)
10347             retstatus = TRUE;
10348         else
10349             retstatus = FALSE;
10350 
10351     }
10352     return retstatus;
10353 }
10354 
AccnIsSWISSPROT(CharPtr s)10355 NLM_EXTERN Boolean LIBCALL AccnIsSWISSPROT( CharPtr s) {
10356      Boolean retstatus = FALSE;
10357      if(s && *s && *(s+1) && *(s+2) && *(s+3) && *(s+4) && *(s+5) && *(s+6) ==NULLB) {
10358          if(*s == 'o' || *s == 'O' ||
10359             *s == 'p' || *s == 'P' ||
10360             *s == 'q' || *s == 'Q') {
10361              if(IS_DIGIT(*(s+1))) {
10362                  if(IS_ALPHA(*(s+2)) || IS_DIGIT(*(s+2))) {
10363                      if(IS_ALPHA(*(s+3)) || IS_DIGIT(*(s+3))) {
10364                          if(IS_ALPHA(*(s+4)) || IS_DIGIT(*(s+4))) {
10365                              if(IS_DIGIT(*(s+5))) {
10366                                  retstatus = TRUE;
10367                              }
10368                          }
10369                      }
10370                  }
10371              }
10372          }
10373      }
10374 
10375      return retstatus;
10376 }
10377 
AccnIsUniProt(CharPtr s)10378 NLM_EXTERN Boolean LIBCALL AccnIsUniProt (CharPtr s)
10379 
10380 {
10381   Char  ch;
10382 
10383   if (StringLen (s) != 6) return FALSE;
10384 
10385   ch = *s;
10386   if (! IS_ALPHA (ch)) return FALSE;
10387 
10388   s++;
10389   ch = *s;
10390   if (! IS_DIGIT (ch)) return FALSE;
10391 
10392   s++;
10393   ch = *s;
10394   if (! IS_ALPHA (ch)) return FALSE;
10395 
10396   s++;
10397   ch = *s;
10398   if (! (IS_ALPHA (ch) || IS_DIGIT (ch))) return FALSE;
10399 
10400   s++;
10401   ch = *s;
10402   if (! (IS_ALPHA (ch) || IS_DIGIT (ch))) return FALSE;
10403 
10404   s++;
10405   ch = *s;
10406   if (! IS_DIGIT (ch)) return FALSE;
10407 
10408    return TRUE;
10409 }
10410 
10411  /*
10412    function to tell if an accession is in the format
10413    of a PIR accession number.
10414    (either a 1+5 accession, or a locus name of length 4-6 alphanumerics)
10415   */
ACCN_PIR_FORMAT(CharPtr s)10416  NLM_EXTERN Boolean LIBCALL ACCN_PIR_FORMAT( CharPtr s) {
10417      Boolean retstatus = FALSE;
10418      if(s) {
10419          Int4 i,l;
10420          l = StringLen(s);
10421          if(*s && *(s+1) && *(s+2) && *(s+3) && l>=4 && l<=6) {
10422              if(IS_ALPHA(*s)) {
10423                  retstatus = TRUE;
10424                  for(i=1;i<l;i++) {
10425                      if(!(IS_ALPHA(*(s+i)) || IS_DIGIT(*(s+i))))
10426                          retstatus = FALSE;
10427                  }
10428              }
10429          }
10430      }
10431 
10432      return retstatus;
10433  }
10434 
10435 
ACCN_1_5_FORMAT(CharPtr s)10436  NLM_EXTERN Boolean LIBCALL ACCN_1_5_FORMAT( CharPtr s) {
10437      Boolean retstatus = FALSE;
10438      if(s) {
10439          Int4 i;
10440          if(*s && StringLen(s) ==6) {
10441              if(IS_ALPHA(*s)) {
10442                  retstatus = TRUE;
10443                  for(i=1;i<6;i++) {
10444                      if(!(IS_DIGIT(*(s+i))))
10445                          retstatus = FALSE;
10446                  }
10447              }
10448          }
10449      }
10450      return retstatus;
10451  }
10452 
10453 
10454 /*****************************************************************************
10455 *
10456 *  Function:    WHICH_db_accession
10457 *
10458 *  Description: Returns a non-zero code if the input string is a validly
10459 *               formatted database accession number
10460 *               The return code can be used to infer known infor
10461 *               mation about which database this accession belongs to.
10462 *               using a set of macros in accutils.h
10463 *               (GenBank, EMBL, DDBJ, Swissprot)
10464 *  *****WARNING****
10465 *
10466 *   this function must be maintained.
10467 *  *****WARNING****
10468 *
10469 *  Arguments:   s : CharPtr; pointer to accession number string.
10470 *                   Must be null terminated.
10471 *
10472 *  Author:      Mark Cavanaug, Hugues Sicotte (3/99)
10473 *  Date:        7/96(IS_ntdb_accession),3/99(WHICH_db_accession)
10474 *
10475 *  WARNING:     WHICH_db_accession() does not communicate with any central
10476 *               resource about accession numbers. So there's no way to
10477 *               inform it automatically about new accession number prefixes.
10478 *
10479 *               Version Number ".integer" MUST have been stripped out
10480 *               before calling this function.
10481 *****************************************************************************/
WHICH_db_accession(CharPtr s)10482 NLM_EXTERN Uint4 LIBCALL WHICH_db_accession (CharPtr s)
10483 {
10484   Uint4 retcode = 0;
10485   Boolean retval = TRUE;
10486   Boolean first = TRUE;
10487   size_t len;
10488   Int2 i;
10489   Char temp [16];
10490 
10491   if (s == NULL || ! *s)
10492     return FALSE;
10493 
10494   len = StringLen (s);
10495 
10496   if (IS_DIGIT (*s)) {
10497     if (len == 4 || (len > 4 && s [4] == '|')) {
10498       return ACCN_PDB;
10499     }
10500     return ACCN_UNKNOWN;
10501   }
10502 
10503   switch (len) {
10504 
10505   case 6:                       /* Old-style 6-character accession */
10506     if (AccnIsUniProt (s)) {
10507       return ACCN_SWISSPROT;
10508     }
10509     while (*s) {
10510       if (retval == FALSE)
10511         break;
10512 
10513       if (first) {
10514         if (! IS_ALPHA(*s)) {
10515           retval = FALSE;
10516           break;
10517         }
10518 
10519         switch (TO_UPPER(*s)) {
10520 
10521 /* Protein SWISS-PROT accessions */
10522         case 'O': case 'P': case 'Q':
10523             if (AccnIsSWISSPROT(s)) {
10524                 retcode = ACCN_SWISSPROT;
10525             }
10526             break;
10527 
10528 /* GenBank : EST */
10529         case 'H':  case 'R': case 'T': case 'W':
10530             retcode = ACCN_NCBI_EST;
10531             break;
10532         case 'N':
10533             retcode = N_accession(s);
10534             break;
10535             /* GenBank : non-EST */
10536         case 'B':
10537             retcode = ACCN_NCBI_GSS;
10538             break;
10539         case 'G':
10540             retcode = ACCN_NCBI_STS;
10541             break;
10542         case 'S':
10543             retcode = ACCN_NCBI_BACKBONE; /* Scanned journal articles */
10544             break;
10545         case 'U':
10546             retcode = ACCN_NCBI_EST;
10547             break;
10548 
10549             /* GenBank : before NCBI */
10550         case 'J': case 'K': case 'L': case 'M':
10551             retcode = ACCN_GSDB_DIRSUB;
10552             break;
10553 
10554             /* EMBL */
10555         case 'A':
10556             retcode = ACCN_EMBL_PATENT;
10557             break;
10558         case 'F':
10559             retcode = ACCN_EMBL_EST;
10560             break;
10561         case 'V': case 'X': case 'Y': case 'Z':
10562             retcode =  ACCN_EMBL_DIRSUB;
10563             break;
10564 
10565             /* DDBJ */
10566         case 'C':
10567             retcode =  ACCN_DDBJ_EST;
10568             break;
10569         case 'D':
10570             retcode = ACCN_DDBJ_DIRSUB;
10571             break;
10572         case 'E':
10573             retcode = ACCN_DDBJ_PATENT;
10574             break;
10575 
10576             /* Case I can be confused with pir accessions which
10577              use the I* protein namespace
10578             */
10579 
10580         case 'I' : /* NCBI patent */
10581             retcode = ACCN_NCBI_PATENT;
10582             break;
10583         default: /* should not happen.. all A-Z assigned */
10584             retcode = ACCN_IS_NT;
10585             ErrPostEx(SEV_WARNING,0,0,"sequtil:WHICH_db_accession : Bug in IS_ALPHA macro or memory trashing!!!; accession %s \n",s ==NULL ? "NULL Accession" : s);
10586             break;
10587         }
10588       first = FALSE;
10589       } else {
10590           switch (retcode) {
10591              case ACCN_SWISSPROT:
10592                  break;
10593              default:
10594                  if (! IS_DIGIT(*s)) {
10595                      retval = FALSE;
10596                  }
10597           }
10598       }
10599       s++;
10600     }
10601     break;
10602     case 8: /* New 8-character accession, two letters + 6 digits */
10603             /* OR three letters + 5 digits for proteins */
10604         /* Check that have 3 letters */
10605       if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
10606           break;
10607       if(IS_ALPHA(*(s+2))) {
10608           /* New(1999) 8-character protein accession, three letters + 5 digits */
10609           temp[0] = *s; s++;
10610           temp[1] = *s; s++;
10611           temp[2] = *s; s++;
10612           temp[3] = '\0';
10613 
10614           if ((StringICmp(temp,"AAA") >= 0) && (StringICmp(temp,"AZZ") <= 0)) {
10615               retcode = ACCN_NCBI_PROT;
10616           } else  if ((StringICmp(temp,"BAA") >= 0) && (StringICmp(temp,"BZZ") <= 0)) {
10617               retcode = ACCN_DDBJ_PROT;
10618           } else if ((StringICmp(temp,"CAA") >= 0) && (StringICmp(temp,"CZZ") <= 0)) {
10619               retcode = ACCN_EMBL_PROT;
10620           } else if ((StringICmp(temp,"DAA") >= 0) && (StringICmp(temp,"DZZ") <= 0)) {
10621               retcode = ACCN_NCBI_TPA_PROT;
10622           } else if ((StringICmp(temp,"EAA") >= 0) && (StringICmp(temp,"EZZ") <= 0)) {
10623               retcode = ACCN_NCBI_WGS_PROT;
10624           } else  if ((StringICmp(temp,"FAA") >= 0) && (StringICmp(temp,"FZZ") <= 0)) {
10625               retcode = ACCN_DDBJ_TPA_PROT;
10626           } else  if ((StringICmp(temp,"GAA") >= 0) && (StringICmp(temp,"GZZ") <= 0)) {
10627               retcode = ACCN_DDBJ_WGS_PROT;
10628           } else if ((StringICmp(temp,"HAA") >= 0) && (StringICmp(temp,"HZZ") <= 0)) {
10629               retcode = ACCN_NCBI_TPA_PROT;
10630           } else  if ((StringICmp(temp,"IAA") >= 0) && (StringICmp(temp,"IZZ") <= 0)) {
10631               retcode = ACCN_DDBJ_TPA_PROT;
10632           } else  if ((StringICmp(temp,"JAA") >= 0) && (StringICmp(temp,"JZZ") <= 0)) {
10633               retcode = ACCN_NCBI_TPA_PROT;
10634           } else if ((StringICmp(temp,"KAA") >= 0) && (StringICmp(temp,"KZZ") <= 0)) {
10635               retcode = ACCN_NCBI_WGS_PROT;
10636           } else if ((StringICmp(temp,"LAA") >= 0) && (StringICmp(temp,"LZZ") <= 0)) {
10637               retcode = ACCN_DDBJ_TPA_PROT;
10638           } else if ((StringICmp(temp,"OAA") >= 0) && (StringICmp(temp,"OZZ") <= 0)) {
10639               retcode = ACCN_NCBI_WGS_PROT;
10640           } else if ((StringICmp(temp,"PAA") >= 0) && (StringICmp(temp,"PZZ") <= 0)) {
10641               retcode = ACCN_NCBI_WGS_PROT;
10642           } else if ((StringICmp(temp,"SAA") >= 0) && (StringICmp(temp,"SZZ") <= 0)) {
10643               retcode = ACCN_EMBL_PROT;
10644           } else {
10645               retcode = ACCN_IS_PROTEIN;
10646               retval = TRUE;
10647               break;
10648           }
10649       } else if (IS_DIGIT(*(s+2))) {
10650           /* New 8-character accession, two letters + 6 digits */
10651           temp[0] = *s; s++;
10652           temp[1] = *s; s++;
10653           temp[2] = '\0';
10654 
10655           if ((StringICmp(temp,"AA") == 0) ||
10656           (StringICmp(temp,"AI") == 0) ||
10657           (StringICmp(temp,"AW") == 0) ||
10658           (StringICmp(temp,"BE") == 0) ||
10659           (StringICmp(temp,"BF") == 0) ||
10660           (StringICmp(temp,"BG") == 0) ||
10661           (StringICmp(temp,"BI") == 0) ||
10662           (StringICmp(temp,"BM") == 0) ||
10663           (StringICmp(temp,"BQ") == 0) ||
10664           (StringICmp(temp,"BU") == 0) ||
10665           (StringICmp(temp,"CA") == 0) ||
10666           (StringICmp(temp,"CB") == 0) ||
10667           (StringICmp(temp,"CD") == 0) ||
10668           (StringICmp(temp,"CF") == 0) ||
10669           (StringICmp(temp,"CK") == 0) ||
10670           (StringICmp(temp,"CN") == 0) ||
10671           (StringICmp(temp,"CO") == 0) ||
10672           (StringICmp(temp,"CV") == 0) ||
10673           (StringICmp(temp,"CX") == 0) ||
10674           (StringICmp(temp,"DN") == 0) ||
10675           (StringICmp(temp,"DR") == 0) ||
10676           (StringICmp(temp,"DT") == 0) ||
10677           (StringICmp(temp,"DV") == 0) ||
10678           (StringICmp(temp,"DW") == 0) ||
10679           (StringICmp(temp,"DY") == 0) ||
10680           (StringICmp(temp,"EB") == 0) ||
10681           (StringICmp(temp,"EC") == 0) ||
10682           (StringICmp(temp,"EE") == 0) ||
10683           (StringICmp(temp,"EG") == 0) ||
10684           (StringICmp(temp,"EH") == 0) ||
10685           (StringICmp(temp,"EL") == 0) ||
10686           (StringICmp(temp,"ES") == 0) ||
10687           (StringICmp(temp,"EV") == 0) ||
10688           (StringICmp(temp,"EW") == 0) ||
10689           (StringICmp(temp,"EX") == 0) ||
10690           (StringICmp(temp,"EY") == 0) ||
10691           (StringICmp(temp,"FC") == 0) ||
10692           (StringICmp(temp,"FD") == 0) ||
10693           (StringICmp(temp,"FE") == 0) ||
10694           (StringICmp(temp,"FF") == 0) ||
10695           (StringICmp(temp,"FG") == 0) ||
10696           (StringICmp(temp,"FK") == 0) ||
10697           (StringICmp(temp,"FL") == 0) ||
10698           (StringICmp(temp,"GD") == 0) ||
10699           (StringICmp(temp,"GE") == 0) ||
10700           (StringICmp(temp,"GH") == 0) ||
10701           (StringICmp(temp,"GO") == 0) ||
10702           (StringICmp(temp,"GR") == 0) ||
10703           (StringICmp(temp,"GT") == 0) ||
10704           (StringICmp(temp,"GW") == 0) ||
10705           (StringICmp(temp,"HO") == 0) ||
10706           (StringICmp(temp,"HS") == 0) ||
10707           (StringICmp(temp,"JG") == 0) ||
10708           (StringICmp(temp,"JK") == 0) ||
10709           (StringICmp(temp,"JZ") == 0) ) {                /* NCBI EST */
10710               retcode = ACCN_NCBI_EST;
10711           } else if ((StringICmp(temp,"BV") == 0) ||
10712                      (StringICmp(temp,"GF") == 0)) {      /* NCBI STS */
10713               retcode = ACCN_NCBI_STS;
10714           } else if ((StringICmp(temp,"AC") == 0) ||
10715                      (StringICmp(temp,"DP") == 0)) {      /* NCBI HTGS */
10716               retcode = ACCN_NCBI_HTGS;
10717           } else if ((StringICmp(temp,"AF") == 0) ||
10718                      (StringICmp(temp,"AY") == 0) ||
10719                      (StringICmp(temp,"DQ") == 0) ||
10720                      (StringICmp(temp,"EF") == 0) ||
10721                      (StringICmp(temp,"EU") == 0) ||
10722                      (StringICmp(temp,"FJ") == 0) ||
10723                      (StringICmp(temp,"GQ") == 0) ||
10724                      (StringICmp(temp,"GU") == 0) ||
10725                      (StringICmp(temp,"HM") == 0) ||
10726                      (StringICmp(temp,"JF") == 0)) {      /* NCBI direct submission */
10727               retcode = ACCN_NCBI_DIRSUB;
10728           } else if ((StringICmp(temp,"AE") == 0) ||
10729                      (StringICmp(temp,"CP") == 0) ||
10730                      (StringICmp(temp,"CY") == 0)) {      /* NCBI genome project data */
10731               retcode = ACCN_NCBI_GENOME;
10732           } else if ((StringICmp(temp,"AH") == 0)) {      /* NCBI segmented set header Bioseq */
10733               retcode = ACCN_NCBI_SEGSET | ACCN_AMBIGOUS_MOL; /* A few segmented proteins are AH */
10734           } else if ((StringICmp(temp,"CH") == 0) ||
10735                      (StringICmp(temp,"CM") == 0) ||
10736                      (StringICmp(temp,"DS") == 0) ||
10737                      (StringICmp(temp,"EM") == 0) ||
10738                      (StringICmp(temp,"EN") == 0) ||
10739                      (StringICmp(temp,"EP") == 0) ||
10740                      (StringICmp(temp,"EQ") == 0) ||
10741                      (StringICmp(temp,"FA") == 0) ||
10742                      (StringICmp(temp,"GG") == 0) ||
10743                      (StringICmp(temp,"GL") == 0) ||
10744                      (StringICmp(temp,"JH") == 0) ||
10745                      (StringICmp(temp,"KB") == 0) ||
10746                      (StringICmp(temp,"KD") == 0) ||
10747                      (StringICmp(temp,"KE") == 0) ||
10748                      (StringICmp(temp,"KI") == 0) ||
10749                      (StringICmp(temp,"KK") == 0) ||
10750                      (StringICmp(temp,"KL") == 0) ||
10751                      (StringICmp(temp,"KN") == 0) ||
10752                      (StringICmp(temp,"KQ") == 0) ||
10753                      (StringICmp(temp,"KV") == 0)) {      /* NCBI segmented set header Bioseq */
10754               retcode = ACCN_NCBI_SEGSET;
10755           } else if ((StringICmp(temp,"AS") == 0) ||
10756                      (StringICmp(temp,"HR") == 0) ||
10757                      (StringICmp(temp,"HS") == 0)) {      /* NCBI "other" */
10758               retcode = ACCN_NCBI_OTHER;
10759           } else if ((StringICmp(temp,"AD") == 0)) {      /* NCBI accessions assigned to GSDB entries */
10760               retcode = ACCN_NCBI_GSDB;
10761           } else if ((StringICmp(temp,"AQ") == 0) ||
10762                      (StringICmp(temp,"AZ") == 0) ||
10763                      (StringICmp(temp,"BH") == 0) ||
10764                      (StringICmp(temp,"BZ") == 0) ||
10765                      (StringICmp(temp,"CC") == 0) ||
10766                      (StringICmp(temp,"CE") == 0) ||
10767                      (StringICmp(temp,"CG") == 0) ||
10768                      (StringICmp(temp,"CL") == 0) ||
10769                      (StringICmp(temp,"CW") == 0) ||
10770                      (StringICmp(temp,"CZ") == 0) ||
10771                      (StringICmp(temp,"DU") == 0) ||
10772                      (StringICmp(temp,"DX") == 0) ||
10773                      (StringICmp(temp,"ED") == 0) ||
10774                      (StringICmp(temp,"EI") == 0) ||
10775                      (StringICmp(temp,"EJ") == 0) ||
10776                      (StringICmp(temp,"EK") == 0) ||
10777                      (StringICmp(temp,"ER") == 0) ||
10778                      (StringICmp(temp,"ET") == 0) ||
10779                      (StringICmp(temp,"FH") == 0) ||
10780                      (StringICmp(temp,"FI") == 0) ||
10781                      (StringICmp(temp,"GS") == 0) ||
10782                      (StringICmp(temp,"HN") == 0) ||
10783                      (StringICmp(temp,"HR") == 0) ||
10784                      (StringICmp(temp,"JJ") == 0) ||
10785                      (StringICmp(temp,"JM") == 0) ||
10786                      (StringICmp(temp,"JS") == 0) ||
10787                      (StringICmp(temp,"JY") == 0) ||
10788                      (StringICmp(temp,"KG") == 0) ||
10789                      (StringICmp(temp,"KO") == 0) ||
10790                      (StringICmp(temp,"KS") == 0) )  {     /* NCBI GSS */
10791               retcode = ACCN_NCBI_GSS;
10792           } else if ((StringICmp(temp,"AR") == 0) ||
10793                      (StringICmp(temp,"DZ") == 0) ||
10794                      (StringICmp(temp,"EA") == 0) ||
10795                      (StringICmp(temp,"GC") == 0) ||
10796                      (StringICmp(temp,"GP") == 0) ||
10797                      (StringICmp(temp,"GV") == 0) ||
10798                      (StringICmp(temp,"GX") == 0) ||
10799                      (StringICmp(temp,"GY") == 0) ||
10800                      (StringICmp(temp,"GZ") == 0) ||
10801                      (StringICmp(temp,"HJ") == 0) ||
10802                      (StringICmp(temp,"HK") == 0) ||
10803                      (StringICmp(temp,"HL") == 0) ||
10804                      (StringICmp(temp,"KH") == 0)) {      /* NCBI patent */
10805               retcode = ACCN_NCBI_PATENT;
10806           } else if((StringICmp(temp,"BC")==0)) {         /* NCBI long cDNA project : MGC */
10807               retcode = ACCN_NCBI_cDNA;
10808           } else if((StringICmp(temp,"BT")==0)) {         /* NCBI FLI_cDNA */
10809               retcode = ACCN_NCBI_cDNA;
10810           } else if ((StringICmp(temp,"BK") == 0) ||
10811                      (StringICmp(temp,"BL") == 0) ||
10812                      (StringICmp(temp,"GJ") == 0) ||
10813                      (StringICmp(temp,"GK") == 0) ||
10814                      (StringICmp(temp,"JP") == 0)) {      /* NCBI third-party annotation */
10815               retcode = ACCN_NCBI_TPA;
10816           } else if ((StringICmp(temp,"BN") == 0)) {      /* EMBL third-party annotation */
10817               retcode = ACCN_EMBL_TPA;
10818           } else if ((StringICmp(temp,"BR") == 0) ||
10819                     (StringICmp(temp,"HT") == 0) ||
10820                     (StringICmp(temp,"HU") == 0)) {      /* DDBJ third-party annotation */
10821               retcode = ACCN_DDBJ_TPA;
10822           } else if((StringICmp(temp,"EZ") == 0) ||
10823                     (StringICmp(temp,"HP") == 0) ||
10824                     (StringICmp(temp,"HQ") == 0) ||
10825                     (StringICmp(temp,"JI") == 0) ||
10826                     (StringICmp(temp,"JL") == 0) ||
10827                     (StringICmp(temp,"JN") == 0) ||
10828                     (StringICmp(temp,"JO") == 0) ||
10829                     (StringICmp(temp,"JQ") == 0) ||
10830                     (StringICmp(temp,"JR") == 0) ||
10831                     (StringICmp(temp,"JT") == 0) ||
10832                     (StringICmp(temp,"JU") == 0) ||
10833                     (StringICmp(temp,"JV") == 0) ||
10834                     (StringICmp(temp,"JW") == 0) ||
10835                     (StringICmp(temp,"JX") == 0) ||
10836                     (StringICmp(temp,"KA") == 0) ||
10837                     (StringICmp(temp,"KC") == 0) ||
10838                     (StringICmp(temp,"KF") == 0) ||
10839                     (StringICmp(temp,"KJ") == 0) ||
10840                     (StringICmp(temp,"KM") == 0) ||
10841                     (StringICmp(temp,"KP") == 0) ||
10842                     (StringICmp(temp,"KR") == 0) ||
10843                     (StringICmp(temp,"KT") == 0) ||
10844                     (StringICmp(temp,"KU") == 0) ||
10845                     (StringICmp(temp,"KX") == 0) ||
10846                     (StringICmp(temp,"KY") == 0)) {
10847               retcode = ACCN_NCBI_TSA;
10848           } else if((StringICmp(temp,"FX") == 0) ||
10849                     (StringICmp(temp,"LA") == 0) ||
10850                     (StringICmp(temp,"LE") == 0) ||
10851                     (StringICmp(temp,"LH") == 0) ||
10852                     (StringICmp(temp,"LI") == 0) ||
10853                     (StringICmp(temp,"LJ") == 0)) {
10854               retcode = ACCN_DDBJ_TSA;
10855           } else if ((StringICmp(temp,"AJ") == 0) ||
10856                      (StringICmp(temp,"AM") == 0) ||
10857                      (StringICmp(temp,"FM") == 0) ||
10858                      (StringICmp(temp,"FN") == 0) ||
10859                      (StringICmp(temp,"FO") == 0) ||
10860                      (StringICmp(temp,"FP") == 0) ||
10861                      (StringICmp(temp,"FQ") == 0) ||
10862                      (StringICmp(temp,"FR") == 0) ||
10863                      (StringICmp(temp,"HE") == 0) ||
10864                      (StringICmp(temp,"HF") == 0) ||
10865                      (StringICmp(temp,"HG") == 0) ||
10866                      (StringICmp(temp,"HI") == 0) ||
10867                      (StringICmp(temp,"LK") == 0) ||
10868                      (StringICmp(temp,"LL") == 0) ||
10869                      (StringICmp(temp,"LM") == 0) ||
10870                      (StringICmp(temp,"LN") == 0) ||
10871                      (StringICmp(temp,"LO") == 0) ||
10872                      (StringICmp(temp,"LP") == 0) ||
10873                      (StringICmp(temp,"LQ") == 0) ||
10874                      (StringICmp(temp,"LR") == 0) ||
10875                      (StringICmp(temp,"LS") == 0) ||
10876                      (StringICmp(temp,"LT") == 0)) {     /* EMBL direct submission */
10877               retcode = ACCN_EMBL_DIRSUB;
10878           } else if ((StringICmp(temp,"AL") == 0) ||
10879                      (StringICmp(temp,"BX") == 0)||
10880                      (StringICmp(temp,"CR") == 0)||
10881                      (StringICmp(temp,"CT") == 0)||
10882                      (StringICmp(temp,"CU") == 0)) {      /* EMBL genome project data */
10883               retcode = ACCN_EMBL_GENOME;
10884           } else if ((StringICmp(temp,"AN") == 0)) {      /* EMBL CON division */
10885               retcode = ACCN_EMBL_CON;
10886           } else if ((StringICmp(temp,"AX") == 0) ||
10887                      (StringICmp(temp,"CQ") == 0) ||
10888                      (StringICmp(temp,"CS") == 0) ||
10889                      (StringICmp(temp,"FB") == 0) ||
10890                      (StringICmp(temp,"GM") == 0) ||
10891                      (StringICmp(temp,"GN") == 0) ||
10892                      (StringICmp(temp,"HA") == 0) ||
10893                      (StringICmp(temp,"HB") == 0) ||
10894                      (StringICmp(temp,"HC") == 0) ||
10895                      (StringICmp(temp,"HD") == 0) ||
10896                      (StringICmp(temp,"HH") == 0) ||
10897                      (StringICmp(temp,"JA") == 0) ||
10898                      (StringICmp(temp,"JB") == 0) ||
10899                      (StringICmp(temp,"JC") == 0) ||
10900                      (StringICmp(temp,"JD") == 0) ||
10901                      (StringICmp(temp,"JE") == 0)) {      /* EMBL patent division */
10902               retcode = ACCN_EMBL_PATENT;
10903           } else if ((StringICmp(temp,"AT") == 0) ||
10904                      (StringICmp(temp,"AU") == 0) ||
10905                      (StringICmp(temp,"AV") == 0) ||
10906                      (StringICmp(temp,"BB") == 0) ||
10907                      (StringICmp(temp,"BJ") == 0) ||
10908                      (StringICmp(temp,"BP") == 0) ||
10909                      (StringICmp(temp,"BW") == 0) ||
10910                      (StringICmp(temp,"BY") == 0) ||
10911                      (StringICmp(temp,"CI") == 0) ||
10912                      (StringICmp(temp,"CJ") == 0) ||
10913                      (StringICmp(temp,"DA") == 0) ||
10914                      (StringICmp(temp,"DB") == 0) ||
10915                      (StringICmp(temp,"DC") == 0) ||
10916                      (StringICmp(temp,"DK") == 0) ||
10917                      (StringICmp(temp,"FS") == 0) ||
10918                      (StringICmp(temp,"FY") == 0) ||
10919                      (StringICmp(temp,"HX") == 0) ||
10920                      (StringICmp(temp,"HY") == 0) ||
10921                      (StringICmp(temp,"LU") == 0)) {      /* DDBJ EST's */
10922               retcode = ACCN_DDBJ_EST;
10923           } else if ((StringICmp(temp,"AB") == 0) ||
10924                      (StringICmp(temp,"LC") == 0)) {      /* DDBJ direct submission */
10925               retcode = ACCN_DDBJ_DIRSUB;
10926           } else if ((StringICmp(temp,"AG") == 0) ||
10927                      (StringICmp(temp,"AP") == 0) ||
10928                      (StringICmp(temp,"BS") == 0)) {      /* DDBJ genome project data */
10929               retcode = ACCN_DDBJ_GENOME;
10930           } else if ((StringICmp(temp,"AK") == 0))  {     /* DDBJ HTGS */
10931               retcode = ACCN_DDBJ_HTGS;
10932           } else if ((StringICmp(temp,"BA") == 0) ||
10933                      (StringICmp(temp,"DF") == 0) ||
10934                      (StringICmp(temp,"DG") == 0) ||
10935                      (StringICmp(temp,"LD") == 0)) {      /* DDBJ CON division */
10936               retcode = ACCN_DDBJ_CON;
10937           } else if ((StringICmp(temp,"BD") == 0) ||
10938                      (StringICmp(temp,"DD") == 0) ||
10939                      (StringICmp(temp,"DI") == 0) ||
10940                      (StringICmp(temp,"DJ") == 0) ||
10941                      (StringICmp(temp,"DL") == 0) ||
10942                      (StringICmp(temp,"DM") == 0) ||
10943                      (StringICmp(temp,"FU") == 0) ||
10944                      (StringICmp(temp,"FV") == 0) ||
10945                      (StringICmp(temp,"FW") == 0) ||
10946                      (StringICmp(temp,"FZ") == 0) ||
10947                      (StringICmp(temp,"GB") == 0) ||
10948                      (StringICmp(temp,"HV") == 0) ||
10949                      (StringICmp(temp,"HW") == 0) ||
10950                      (StringICmp(temp,"HZ") == 0) ||
10951                      (StringICmp(temp,"LF") == 0) ||
10952                      (StringICmp(temp,"LG") == 0) ||
10953                      (StringICmp(temp,"LV") == 0) ||
10954                      (StringICmp(temp,"LX") == 0)) {      /* DDBJ patent division */
10955               retcode = ACCN_DDBJ_PATENT;
10956           } else if ((StringICmp(temp,"DE") == 0) ||
10957                      (StringICmp(temp,"DH") == 0) ||
10958                      (StringICmp(temp,"FT") == 0) ||
10959                      (StringICmp(temp,"GA") == 0) ||
10960                      (StringICmp(temp,"LB") == 0)) {      /* DDBJ GSS */
10961               retcode = ACCN_DDBJ_GSS;
10962           } else {
10963               retcode = ACCN_IS_NT;
10964               break;
10965           }
10966 
10967           while (*s) {
10968               if (! IS_DIGIT(*s)) {
10969                   retval = FALSE;
10970                   break;
10971               }
10972               s++;
10973           }
10974           break;
10975       } else {
10976           retval = FALSE;
10977           break;
10978       }
10979       break;
10980     case 9: /* New 9-character accession, two letters +"_"+ 6 digits */
10981       if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
10982           break;
10983       if(*(s+2)!='_')
10984           break;
10985       /* New(1999) 8-character protein accession, three letters + 5 digits */
10986       temp[0] = *s; s++;
10987       temp[1] = *s; s++;
10988       temp[2] = NULLB; s++;
10989 
10990       if ((StringICmp(temp,"NP") == 0) || (StringICmp(temp,"AP") == 0)) {
10991           retcode = ACCN_REFSEQ_PROT;
10992       } else if ((StringICmp(temp,"NM") == 0)) {
10993           retcode = ACCN_REFSEQ_mRNA;
10994       } else if ((StringICmp(temp,"NT") == 0)) {
10995           retcode = ACCN_REFSEQ_CONTIG;
10996       } else if ((StringICmp(temp,"NW") == 0)) {
10997           retcode = ACCN_REFSEQ_CONTIG;
10998       } else if ((StringICmp(temp,"NC") == 0)) {
10999           retcode = ACCN_REFSEQ_CHROMOSOME;
11000       } else if ((StringICmp(temp,"XM") == 0)) {
11001           retcode = ACCN_REFSEQ_mRNA_PREDICTED;
11002       } else if ((StringICmp(temp,"XP") == 0)) {
11003           retcode = ACCN_REFSEQ_PROT_PREDICTED;
11004       } else if ((StringICmp(temp,"NG") == 0) || (StringICmp(temp,"AC") == 0)) {
11005           retcode = ACCN_REFSEQ_GENOMIC;
11006       } else if ((StringICmp(temp,"NS") == 0)) {
11007           retcode = ACCN_REFSEQ_ARTIFICIAL_ASSEMBLY;
11008       } else if (IS_ALPHA(*temp) && IS_ALPHA(*(temp+1))) {
11009           retcode =ACCN_REFSEQ | ACCN_AMBIGOUS_MOL;
11010       } else
11011           retval = FALSE;
11012       while (*s) {
11013           if (! IS_DIGIT(*s)) {
11014               retval = FALSE;
11015               break;
11016           }
11017           s++;
11018       }
11019       break;
11020     case 10: /* New 10-character accession, three letters +"_"+ 6 digits */
11021       if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
11022           break;
11023       if(*(s+3)!='_')
11024           break;
11025       temp[0] = *s; s++;
11026       temp[1] = *s; s++;
11027       temp[2] = *s; s++;
11028       temp[3] = NULLB; s++;
11029 
11030       if ((StringICmp(temp,"MAP") == 0)) {
11031           while (*s) {
11032               if (! IS_DIGIT(*s)) {
11033                   retval = FALSE;
11034                   break;
11035               }
11036               s++;
11037           }
11038           retcode = ACCN_NCBI_OTHER;
11039       } else
11040           retval = FALSE;
11041       break;
11042     case 11: /* New 11-character accession, two letters +"_"+ 8 digits */
11043       if(!IS_ALPHA(*s) || !IS_ALPHA(*(s+1)))
11044           break;
11045       if(*(s+2)!='_')
11046           break;
11047       temp[0] = *s; s++;
11048       temp[1] = *s; s++;
11049       temp[2] = NULLB; s++;
11050 
11051       if ((StringICmp(temp,"ZP") == 0)) {
11052           retcode = ACCN_REFSEQ_PROT_PREDICTED;
11053       } else
11054           retval = FALSE;
11055       while (*s) {
11056           if (! IS_DIGIT(*s)) {
11057               retval = FALSE;
11058               break;
11059           }
11060           s++;
11061       }
11062       break;
11063     case 12:
11064     case 13:
11065     case 14:
11066       if(IS_ALPHA(*s) && IS_ALPHA(*(s+1)) && IS_ALPHA(*(s+2)) && IS_ALPHA(*(s+3))) {
11067         /* whole genome shotgun 12-14-character accession, four letters + 8-10 digits */
11068         temp[0] = *s; s++;
11069         temp[1] = *s; s++;
11070         temp[2] = *s; s++;
11071         temp[3] = *s; s++;
11072         temp[4] = '\0';
11073         if ((StringNICmp(temp,"A", 1) == 0)) {
11074           retcode = ACCN_NCBI_WGS;
11075         } else if ((StringNICmp(temp,"B", 1) == 0)) {
11076           retcode = ACCN_DDBJ_WGS;
11077         } else if ((StringNICmp(temp,"C", 1) == 0)) {
11078           retcode = ACCN_EMBL_WGS;
11079         } else if ((StringNICmp(temp,"D", 1) == 0)) {
11080           retcode = ACCN_NCBI_WGS_TPA;
11081         } else if ((StringNICmp(temp,"E", 1) == 0)) {
11082           retcode = ACCN_DDBJ_WGS_TPA;
11083         } else if ((StringNICmp(temp,"F", 1) == 0)) {
11084           retcode = ACCN_EMBL_WGS;
11085         } else if ((StringNICmp(temp,"G", 1) == 0)) {
11086           retcode = ACCN_NCBI_TSA;
11087         } else if ((StringNICmp(temp,"H", 1) == 0)) {
11088           retcode = ACCN_EMBL_TSA;
11089         } else if ((StringNICmp(temp,"I", 1) == 0)) {
11090           retcode = ACCN_DDBJ_TSA;
11091         } else if ((StringNICmp(temp,"J", 1) == 0)) {
11092           retcode = ACCN_NCBI_WGS;
11093         } else if ((StringNICmp(temp,"K", 1) == 0)) {
11094           retcode = ACCN_NCBI_TARGETED;
11095         } else if ((StringNICmp(temp,"L", 1) == 0)) {
11096           retcode = ACCN_NCBI_WGS;
11097         } else if ((StringNICmp(temp,"M", 1) == 0)) {
11098           retcode = ACCN_NCBI_WGS;
11099         } else if ((StringNICmp(temp,"N", 1) == 0)) {
11100           retcode = ACCN_NCBI_WGS;
11101         } else if ((StringNICmp(temp,"O", 1) == 0)) {
11102           retcode = ACCN_EMBL_WGS;
11103         } else if ((StringNICmp(temp,"P", 1) == 0)) {
11104           retcode = ACCN_NCBI_WGS;
11105         } else if ((StringNICmp(temp,"Q", 1) == 0)) {
11106           retcode = ACCN_NCBI_WGS;
11107         } else if ((StringNICmp(temp,"R", 1) == 0)) {
11108           retcode = ACCN_NCBI_WGS;
11109         } else if ((StringNICmp(temp,"S", 1) == 0)) {
11110           retcode = ACCN_NCBI_PATENT;
11111         } else
11112           retval = FALSE;
11113         while (*s) {
11114           if (! IS_DIGIT(*s)) {
11115               retval = FALSE;
11116               break;
11117           }
11118           s++;
11119         }
11120       } else if(len == 12 && IS_ALPHA(*s) && IS_ALPHA(*(s+1)) && (*(s+2)=='_')) {
11121         /* New 12-character accession, two letters +"_"+ 9 digits */
11122         temp[0] = *s; s++;
11123         temp[1] = *s; s++;
11124         temp[2] = NULLB; s++;
11125 
11126         if ((StringICmp(temp,"NP") == 0)) {
11127           retcode = ACCN_REFSEQ_PROT;
11128         } else if ((StringICmp(temp,"NM") == 0)) {
11129           retcode = ACCN_REFSEQ_mRNA;
11130         } else if ((StringICmp(temp,"NW") == 0)) {
11131           retcode = ACCN_REFSEQ_CONTIG;
11132         } else if ((StringICmp(temp,"XM") == 0)) {
11133           retcode = ACCN_REFSEQ_mRNA_PREDICTED;
11134         } else if ((StringICmp(temp,"XP") == 0)) {
11135           retcode = ACCN_REFSEQ_PROT_PREDICTED;
11136         } else if (IS_ALPHA(*temp) && IS_ALPHA(*(temp+1))) {
11137           retcode =ACCN_REFSEQ | ACCN_AMBIGOUS_MOL;
11138         } else
11139           retval = FALSE;
11140         while (*s) {
11141           if (! IS_DIGIT(*s)) {
11142               retval = FALSE;
11143               break;
11144           }
11145           s++;
11146         }
11147       }
11148       break;
11149     case 15:
11150     case 16:
11151       if (IS_ALPHA(*s) && IS_ALPHA(*(s+1)) && (*(s+2)=='_')) {
11152         /* New 15-16-character accession, two letters +"_"+ four letters + 8-9 digits */
11153         temp[0] = *s; s++;
11154         temp[1] = *s; s++;
11155         temp[2] = NULLB; s++;
11156 
11157         if ((StringICmp(temp,"NZ") == 0)) {
11158           retcode = ACCN_REFSEQ_WGS;
11159         } else
11160           retval = FALSE;
11161         for (i = 0; i < 4; i++) {
11162           if (! IS_ALPHA (*s)) {
11163               retval = FALSE;
11164               break;
11165           }
11166           s++;
11167         }
11168         while (*s) {
11169           if (! IS_DIGIT(*s)) {
11170               retval = FALSE;
11171               break;
11172           }
11173           s++;
11174         }
11175       }
11176       break;
11177   default:
11178     retval = FALSE;
11179     break;
11180   }                     /* Endswitch, StringLen(s) */
11181 
11182   return (retval ? retcode : ACCN_UNKNOWN);
11183 }
11184 
11185 /****************************************************************************
11186 *
11187 *  Function:    IS_ntdb_accession
11188 *
11189 *  Description: Return TRUE if the input string is a validly formatted
11190 *               nucleotide database accession number (GenBank, EMBL, DDBJ, REFSEQ)
11191 *  ***WARNING*** DOES NO network access, relies on hardcoding in WHICH_db_accession.
11192 *
11193 *  Arguments:   s : CharPtr; pointer to accession number string.
11194 *                   Must be null terminated.
11195 *
11196 *  Author:      Mark Cavanaugh, Hugues Sicotte
11197 *  Date:        7/96,HS 12/2000
11198 *
11199 *  WARNING:     IS_ntdb_accession() does not communicate with any central
11200 *               resource about accession numbers. So there's no way to
11201 *               inform it automatically about new accession number prefixes.
11202 *
11203 *               Version Number ".integer" MUST have been stripped out
11204 *               before calling this function.
11205 *****************************************************************************/
11206 
IS_ntdb_accession(CharPtr s)11207 NLM_EXTERN Boolean LIBCALL IS_ntdb_accession (CharPtr s) {
11208     Uint4 status;
11209     status = WHICH_db_accession(s);
11210     return (Boolean)(ACCN_IS_NUC(status));
11211 }
11212 
11213 /*****************************************************************************
11214 *
11215 *  Function:    IS_protdb_accession
11216 *
11217 *  Description: Return TRUE if the input string is a validly formatted
11218 *               protein database accession number (SWISS-PROT)
11219 *               or the new 3 letter protein ID.
11220 *
11221 *  ***WARNING*** DOES NO network access, relies on hardcoding in WHICH_db_accession.
11222 *
11223 *  Arguments:   s : CharPtr; pointer to accession number string.
11224 *                   Must be null terminated.
11225 *
11226 *  Author:      Mark Cavanaugh, Hugues Sicotte (3/99)
11227 *  Date:        8/96, 3/99HS,12/2000
11228 *
11229 *  WARNING:     IS_protdb_accession() does not communicate with any central
11230 *               resource about accession numbers. So there's no way to
11231 *               inform it automatically about new accession number prefixes.
11232 *
11233 *               Version Number ".integer" MUST have been stripped out
11234 *               before calling this function.
11235 *****************************************************************************/
11236 
IS_protdb_accession(CharPtr s)11237 NLM_EXTERN Boolean LIBCALL IS_protdb_accession (CharPtr s) {
11238     Uint4 status;
11239     status = WHICH_db_accession(s);
11240     return (Boolean)(ACCN_IS_PROT(status));
11241 }
11242 
11243 /*
11244   Try to Find if the Bioseq represented by a SeqId is a SeqLoc List;
11245   May fetch the Bioseq to get all the synonymous SeqIds.
11246  */
11247 
SeqIdInSeqLocList(SeqIdPtr sip,ValNodePtr list)11248 NLM_EXTERN Boolean LIBCALL SeqIdInSeqLocList(SeqIdPtr sip, ValNodePtr list) {
11249   ValNodePtr vnptmp;
11250   SeqIdPtr   siptmp;
11251   SeqLocPtr slp;
11252 
11253   for (vnptmp=list; vnptmp!=NULL; vnptmp=vnptmp->next)
11254   {
11255      siptmp = SeqLocId((SeqLocPtr)vnptmp->data.ptrvalue);
11256      if (siptmp!=NULL) {
11257         if (SeqIdForSameBioseq(sip, siptmp))
11258            return TRUE;
11259      } else if((slp=(SeqLocPtr)vnptmp->data.ptrvalue)!=NULL && (
11260                slp->choice == SEQLOC_PACKED_INT ||
11261                slp->choice == SEQLOC_MIX ||
11262                slp->choice == SEQLOC_EQUIV)) {
11263          slp = (SeqLocPtr)slp->data.ptrvalue;
11264          while(slp!=NULL) {
11265              siptmp = SeqLocId(slp);
11266              if (siptmp!=NULL) {
11267                  if (SeqIdForSameBioseq(sip, siptmp))
11268                      return TRUE;
11269              }
11270              slp=slp->next;
11271          }
11272      }
11273   }
11274   return FALSE;
11275 }
11276 
11277 /*********************************************************
11278 ***
11279 ***    AddSeqId  : create a new seqid and add at the end
11280 ***                of the list starting with sip_head
11281 ***
11282 **********************************************************/
AddSeqId(SeqIdPtr * sip_head,SeqIdPtr sip)11283 NLM_EXTERN SeqIdPtr AddSeqId (SeqIdPtr *sip_head, SeqIdPtr sip)
11284 {
11285   SeqIdPtr sip_tmp,
11286            sip_copy;
11287 
11288   sip_copy = SeqIdDup (sip);
11289   sip_tmp = sip_copy->next;
11290   sip_copy->next = NULL;
11291   if (sip_tmp!=NULL)
11292      SeqIdFree (sip_tmp);
11293   if ( (sip_tmp = *sip_head) != NULL ) {
11294      while (sip_tmp->next != NULL)
11295         sip_tmp = sip_tmp->next;
11296      sip_tmp->next = sip_copy;
11297   }
11298   else {
11299      *sip_head = sip_copy;
11300   }
11301   return (*sip_head);
11302 
11303 }
11304 
11305 /*******************************************************
11306 ***
11307 ***   SeqIdDupList : duplicate a list of SeqIdPtr
11308 ***
11309 *******************************************************/
SeqIdDupList(SeqIdPtr id_list)11310 NLM_EXTERN SeqIdPtr SeqIdDupList (SeqIdPtr id_list)
11311 {
11312   SeqIdPtr     sip=NULL;
11313   SeqIdPtr     sid;
11314 
11315   for (sid = id_list; sid != NULL; sid = sid->next) {
11316          sip = AddSeqId (&sip, sid);
11317   }
11318   return sip;
11319 }
11320 
SeqIdDupBestList(SeqIdPtr id_list)11321 NLM_EXTERN SeqIdPtr SeqIdDupBestList (SeqIdPtr id_list)
11322 {
11323   SeqIdPtr     sip=NULL;
11324   SeqIdPtr     sid, sid2;
11325   BioseqPtr    bsp;
11326 
11327   for (sid = id_list; sid != NULL; sid = sid->next) {
11328      sid2 = NULL;
11329      bsp = BioseqLockById (sid);
11330      if (bsp!=NULL) {
11331         sid2 = SeqIdFindBest(bsp->id, 0);
11332         BioseqUnlock (bsp);
11333      }
11334      if (sid2!=NULL)
11335         sip = AddSeqId (&sip, sid2);
11336      else
11337         sip = AddSeqId (&sip, sid);
11338   }
11339   return sip;
11340 }
11341 
SeqIdListfromSeqLoc(ValNodePtr vnpslp)11342 NLM_EXTERN SeqIdPtr SeqIdListfromSeqLoc (ValNodePtr vnpslp)
11343 {
11344   SeqIdPtr     sip=NULL, siptmp;
11345   ValNodePtr   vnp=NULL;
11346   Int2         j = 0, k;
11347   for (vnp = vnpslp; vnp != NULL; vnp = vnp->next)
11348   {
11349          sip = AddSeqId (&sip, SeqLocId ((SeqLocPtr) vnp->data.ptrvalue));
11350          j++;
11351   }
11352   if (sip!=NULL) {
11353      for (siptmp=sip, k=0; k<j-1; siptmp=siptmp->next, k++) continue;
11354      siptmp->next = NULL;
11355   }
11356   return sip;
11357 }
11358 
11359 
11360 /* We frequently do not want to use TMSMART, BankIt, and NCBIFILE IDs
11361  * in displays, formatting, etc.
11362  */
IsSkippableDbtag(DbtagPtr dbt)11363 NLM_EXTERN Boolean IsSkippableDbtag (DbtagPtr dbt)
11364 {
11365   if (dbt == NULL
11366       || StringICmp (dbt->db, "TMSMART") == 0
11367       || StringICmp (dbt->db, "BankIt") == 0
11368       || StringICmp (dbt->db, "NCBIFILE") == 0) {
11369     return TRUE;
11370   } else {
11371     return FALSE;
11372   }
11373 }
11374 
11375 
DoesCDSEndWithStopCodon(SeqFeatPtr cds)11376 NLM_EXTERN Boolean DoesCDSEndWithStopCodon (SeqFeatPtr cds)
11377 {
11378   ByteStorePtr bs;
11379   CharPtr      prot_str;
11380   Boolean      retval = FALSE;
11381 
11382   if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) {
11383     return FALSE;
11384   }
11385   bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
11386   if (bs == NULL) return FALSE;
11387   prot_str = BSMerge (bs, NULL);
11388   bs = BSFree (bs);
11389   if (prot_str == NULL) return FALSE;
11390 
11391   if (prot_str[StringLen (prot_str) - 1] == '*') {
11392     retval = TRUE;
11393   } else {
11394     retval = FALSE;
11395   }
11396   prot_str = MemFree (prot_str);
11397   return retval;
11398 }
11399 
11400 
11401