1 /*   salsap.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  salsap.c
27 *
28 * Author:  Colombe Chappey
29 *
30 * Version Creation Date:   1/27/96
31 *
32 * $Revision: 6.16 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date     Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 #include <salsap.h>
45 #include <salpedit.h>
46 #include <salutil.h>
47 #include <sqnutils.h>
48 #include <edutil.h>
49 #include <objmgr.h>
50 #include <seqmgr.h>
51 
SeqAlignIDList(SeqAlignPtr salp)52 NLM_EXTERN SeqIdPtr LIBCALL SeqAlignIDList (SeqAlignPtr salp)
53 {
54     return SeqIdPtrFromSeqAlign(salp);
55 }
56 
57 /* Used in salsa.c once */
FindSeqIdinSeqAlign(SeqAlignPtr salphead,SeqIdPtr sip)58 NLM_EXTERN Boolean LIBCALL FindSeqIdinSeqAlign (SeqAlignPtr salphead, SeqIdPtr sip)
59 {
60     return SeqAlignFindSeqId (salphead, sip);
61 }
62 
63 /*
64 static Boolean LIBCALL SeqIdInSeqLocList (SeqIdPtr sip, ValNodePtr vnp)
65 {
66     return SeqIdInSeqLocList(sip, vnp);
67 }
68 */
is_dim1seqalign(SeqAlignPtr salp)69 NLM_EXTERN Boolean LIBCALL is_dim1seqalign (SeqAlignPtr salp)
70 {
71   if (salp->dim == 1) {
72      return TRUE;
73   }
74   return FALSE;
75 }
76 
is_dim2seqalign(SeqAlignPtr salp)77 NLM_EXTERN Boolean LIBCALL is_dim2seqalign (SeqAlignPtr salp)
78 {
79   SeqAlignPtr nextsalp;
80   DenseSegPtr dsp;
81   SeqIdPtr    sip;
82 
83   if (salp->dim == 2 && salp->next != NULL) {
84      if (salp->segtype == 2) {
85         dsp = (DenseSegPtr) salp->segs;
86         sip = dsp->ids;
87         nextsalp = salp->next;
88         while (nextsalp) {
89            if (nextsalp->dim != 2)
90               break;
91            if (nextsalp->segtype != 2)
92               break;
93            dsp = (DenseSegPtr) nextsalp->segs;
94            if (! SeqIdForSameBioseq (sip, dsp->ids))
95               break;
96            nextsalp = nextsalp->next;
97         }
98         if (nextsalp == NULL)
99            return TRUE;
100      }
101   }
102   return FALSE;
103 }
104 
is_fasta_seqalign(SeqAlignPtr salp)105 NLM_EXTERN Boolean LIBCALL is_fasta_seqalign (SeqAlignPtr salp)
106 {
107   DenseSegPtr dsp;
108   Int4Ptr     startp;
109   Boolean     gap;
110   Int4        k;
111   Int2        j;
112   Boolean     one_gap = FALSE;
113 
114   if (salp->dim > 2) {
115      if (salp->segtype == 2) {
116         dsp = (DenseSegPtr) salp->segs;
117         for (j=0; j<dsp->dim; j++) {
118            gap=FALSE;
119            for (k=0; k<dsp->numseg; k++) {
120               startp=dsp->starts;
121               if (startp[dsp->dim*k + j] < 0) {
122                  gap = TRUE;
123                  one_gap = TRUE;
124               }
125               else if (gap)
126                  return FALSE;
127            }
128         }
129      }
130   }
131   else
132      return FALSE;
133   if (!one_gap)
134      return FALSE;
135   return TRUE;
136 }
137 
138 
is_salp_in_sap(SeqAnnotPtr sap,Uint1 choice)139 NLM_EXTERN SeqAlignPtr LIBCALL is_salp_in_sap (SeqAnnotPtr sap, Uint1 choice)
140 {
141   SeqAlignPtr      salp = NULL;
142 
143   if (sap != NULL) {
144      for (; sap!= NULL; sap=sap->next) {
145         if (sap->type == choice) {
146            salp = (SeqAlignPtr) sap->data;
147            return salp;
148         }
149      }
150   }
151   return NULL;
152 }
153 
154 
155 
156 
157 /*
158   Function to Check if at least one of the SeqIds of a SeqLocList
159   is the same as one of the SeqIds in the SeqAlign
160   */
161 
SeqAlignSeqLocComp(SeqAlignPtr salphead,ValNodePtr vnp)162 NLM_EXTERN Boolean LIBCALL SeqAlignSeqLocComp (SeqAlignPtr salphead, ValNodePtr vnp)
163 {
164   SeqAlignPtr salp;
165   ValNodePtr tmp;
166   SeqLocPtr  slp;
167   SeqIdPtr   sip,
168              tmpsip;
169 
170   for (tmp=vnp; tmp!=NULL; tmp=tmp->next)
171   {
172      slp= (SeqLocPtr)tmp->data.ptrvalue;
173      if (slp)
174      {
175         sip=SeqLocId(slp);
176         for (salp = salphead; salp!= NULL; salp=salp->next)
177         {
178            tmpsip =SeqIdPtrFromSeqAlign (salp);
179            if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 0)
180            {
181               return TRUE;
182            }
183         }
184      }
185   }
186   return FALSE;
187 }
188 
189 /*
190   SeqAlignBestScore : Find The Last Score of type "score"
191   In the SeqAlign: Does not traverse Chain.
192   */
SeqAlignBestScore(SeqAlignPtr salp)193 NLM_EXTERN Int4 LIBCALL SeqAlignBestScore (SeqAlignPtr salp)
194 {
195   ScorePtr    score;
196   ObjectIdPtr oip;
197   Int4        val = 0;
198 
199   if (salp!=NULL) {
200      if (salp->segtype == 2) {
201         for (score = (ScorePtr) salp->score; score!=NULL; score=score->next)
202         {
203            oip = (ObjectIdPtr) score->id;
204            if (oip != NULL) {
205               if (StringStr(oip->str, "score")!=NULL) {
206                  if (score->choice == 1) {
207                     val = (Int4)score->value.intvalue;
208                  }
209               }
210            }
211         }
212      }
213   }
214   return val;
215 }
216 
SeqLocMixFromSeqAlign(SeqAlignPtr salp,SeqIdPtr sip)217 NLM_EXTERN SeqLocPtr LIBCALL SeqLocMixFromSeqAlign (SeqAlignPtr salp, SeqIdPtr sip)
218 {
219   DenseSegPtr dsp;
220   SeqLocPtr   headslp = NULL,
221               slp = NULL,
222               tmp;
223   SeqIdPtr    siptmp;
224   Int4Ptr     lens, start,
225               start2;
226   Int4        salp_start, salp_stop;
227   Int2        offset, j;
228 
229   if (salp == NULL)
230      return NULL;
231   if (salp->segtype != 2)
232      return NULL;
233   dsp =  (DenseSegPtr) salp->segs;
234   if (dsp == NULL)
235      return NULL;
236   offset = 0;
237   if (sip  == NULL)
238      sip = dsp->ids;
239   else {
240      sip = SeqIdFindBest (sip, 0);
241      for (siptmp=dsp->ids; siptmp!=NULL; siptmp=siptmp->next, offset++) {
242         if (SeqIdForSameBioseq (sip,siptmp))
243            break;
244      }
245      if (siptmp == NULL)
246         return NULL;
247   }
248   start=dsp->starts + offset;
249   if (offset==0)
250      start2 = start+1;
251   else
252      start2 = dsp->starts;
253   lens=dsp->lens;
254   for (j=0; j<dsp->numseg; j++) {
255      if (*start > -1)
256         break;
257      start +=dsp->dim;
258      start2+=dsp->dim;
259      lens++;
260   }
261   for (; j<dsp->numseg; j++, lens++) {
262      if (*start > -1 && *start2 > -1) {
263         salp_start = *start;
264         salp_stop = salp_start + *lens -1;
265         slp = SeqLocIntNew (salp_start, salp_stop, Seq_strand_plus, sip);
266         if (headslp == NULL) {
267            headslp = slp;
268         }
269         else if (headslp->choice == SEQLOC_MIX) {
270            tmp = (ValNodePtr)(headslp->data.ptrvalue);
271            while (tmp->next != NULL)
272               tmp = tmp->next;
273            tmp->next = slp;
274         }
275         else {
276            tmp = ValNodeNew(NULL);
277            tmp->choice = SEQLOC_MIX;
278            tmp->data.ptrvalue = (Pointer)headslp;
279            headslp = tmp;
280            tmp = (ValNodePtr)(headslp->data.ptrvalue);
281            tmp->next = slp;
282         }
283      }
284      start +=dsp->dim;
285      start2+=dsp->dim;
286   }
287   return headslp;
288 }
289 
290 
SeqLocListOfBioseqsFromSeqAlign(SeqAlignPtr salp)291 NLM_EXTERN ValNodePtr LIBCALL SeqLocListOfBioseqsFromSeqAlign (SeqAlignPtr salp)
292 {
293   SeqAlignPtr   salptmp;
294   BioseqPtr     bsp;
295   ValNodePtr    vnp = NULL;
296   DenseSegPtr   dsp;
297   DenseDiagPtr  ddp;
298   SeqIdPtr      sip,
299                 siptmp;
300   SeqLocPtr     slp;
301 
302   if (salp==NULL)
303      return NULL;
304   for (salptmp = salp; salptmp!=NULL; salptmp=salptmp->next)
305   {
306      sip = NULL;
307      if (salptmp->segtype == 1)
308      {
309         ddp = (DenseDiagPtr) salptmp->segs;
310         sip = ddp->id;
311      }
312      else if (salptmp->segtype == 2)
313      {
314         dsp = (DenseSegPtr) salptmp->segs;
315         sip = dsp->ids;
316      }
317      if (sip != NULL)
318      {
319         for (; sip != NULL; sip = sip->next)
320         {
321            siptmp = SeqIdDup (sip);
322            if (vnp==NULL || (vnp!=NULL && !SeqIdInSeqLocList (siptmp, vnp)))
323            {
324               bsp = BioseqLockById (siptmp);
325               if (bsp!= NULL)
326               {
327                  slp = SeqLocIntNew (0, bsp->length-1, Seq_strand_plus, siptmp);
328                  if (slp != NULL)
329                  {
330                     ValNodeAddPointer (&vnp, 0, (Pointer) slp);
331                  }
332                  BioseqUnlock (bsp);
333               }
334            }
335         }
336      }
337   }
338   return vnp;
339 }
340 
341 
MySeqIdSetFree(SeqIdPtr sip)342 static SeqIdPtr LIBCALL MySeqIdSetFree (SeqIdPtr sip)
343 {
344   SeqIdPtr next;
345 
346     while (sip != NULL)
347     {
348         next = sip->next;
349         sip->next = NULL;
350         if (sip!=NULL) {
351             if (sip->choice > 0 && sip->choice < 30) SeqIdFree(sip);
352         }
353         else break;
354         sip = next;
355     }
356     return NULL;
357 }
358 
CompSeqAlignFree(SeqAlignPtr salp)359 NLM_EXTERN SeqAlignPtr LIBCALL CompSeqAlignFree (SeqAlignPtr salp)
360 {
361   CompSegPtr csp;
362 
363   csp = (CompSegPtr) salp->segs;
364   if (csp->ids != NULL) MySeqIdSetFree (csp->ids);
365   csp->ids=NULL;
366   if (csp->from != NULL) MemFree (csp->from);
367   csp->from=NULL;
368   if (csp->lens != NULL) MemFree (csp->lens);
369   csp->lens=NULL;
370   if (csp->starts!= NULL) MemFree (csp->starts);
371   csp->starts=NULL;
372   MemFree ( csp);
373   salp->segs = NULL;
374   SeqAlignSetFree (salp);
375   return NULL;
376 }
377 
CompSeqAnnotFree(SeqAnnotPtr sap)378 NLM_EXTERN SeqAnnotPtr LIBCALL CompSeqAnnotFree (SeqAnnotPtr sap)
379 {
380   SeqAlignPtr salp;
381 
382   if ( sap == NULL ) return NULL;
383   if ( sap->type != 2 ) {
384       ErrPostEx(SEV_WARNING, 0, 0, "fail in CompSeqAnnotFree [1] %d", (int)sap->type);
385       return NULL;
386   }
387   if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
388       ErrPostEx(SEV_WARNING, 0, 0, "fail in CompSeqAnnotFree [2]");
389       return NULL;
390   }
391   if ( salp->segtype == COMPSEG )
392       CompSeqAlignFree (salp);
393   else if ( salp->segtype == 2 )
394       SeqAnnotFree (sap);
395   else {
396       ErrPostEx(SEV_WARNING, 0, 0, "fail in CompSeqAnnotFree [3] %d", salp->segtype);
397       return NULL;
398   }
399   sap->data = NULL;
400   return NULL;
401 }
402 
SeqAlignBoolSegCpy(SeqAnnotPtr sap,Int4 from,Int4 to)403 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAlignBoolSegCpy (SeqAnnotPtr sap, Int4 from, Int4 to)
404 {
405   SeqAnnotPtr sapnew =NULL;
406   SeqAlignPtr salp, salpnew =NULL, salpre =NULL;
407   CompSegPtr  csp =NULL;
408   CompSegPtr  newcsp =NULL;
409   BoolPtr     cspstart =NULL;
410   Int4Ptr     csplens =NULL, newcsplens =NULL;
411   Int4Ptr     cspfrom =NULL, newcspfrom =NULL;
412   Int4        j, k, nbseq, numseg, newnumseg;
413   Int4        sumlens, offset;
414 
415   if ( sap == NULL ) return NULL;
416   if ( sap->type != 2 ) {
417       ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [1]");
418       return NULL;
419   }
420   if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
421       ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [1-2]");
422       return NULL;
423   }
424   if ( salp->segtype != COMPSEG ) {
425       ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [2]");
426       return NULL;
427   }
428   sapnew = SeqAnnotNew ();
429   if (sapnew == NULL) {
430       return NULL;
431   }
432   sapnew->type = 2;
433 
434   while ( salp != NULL )
435   {
436       salpnew = SeqAlignNew ();
437       if ( salpnew == NULL ) {
438           SeqAnnotFree (sapnew);
439           return NULL;
440       }
441       salpnew->type = 3;
442       salpnew->segtype = COMPSEG;
443       salpnew->dim = salp->dim;
444       csp = (CompSegPtr) salp->segs;
445       newcsp = (CompSegPtr) MemNew ( sizeof (CompSeg) );
446       if ( newcsp == NULL ) {
447           ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [4]");
448           SeqAnnotFree (sapnew);
449           return NULL;
450       }
451       salpnew->segs = (Pointer) newcsp;
452       nbseq = newcsp->dim = csp->dim;
453       newcsp->ids = SeqIdDupList (csp->ids);
454       numseg = newcsp->numseg = csp->numseg;
455 
456           /* copy the first position (from) + sum of length */
457       newcsp->from = (Int4Ptr) MemNew ((size_t) ((nbseq +2) * sizeof (Int4)));
458       if ( newcsp->from == NULL ) {
459           SeqAnnotFree (sapnew);
460           return NULL;
461       }
462       for (j = 0; j < nbseq; j++) {
463           offset = -1;
464           sumlens = 0;
465           csplens = csp->lens;
466           cspstart = csp->starts;
467           cspstart += j;
468           for (k = 0; k < numseg ; k++, csplens++, cspstart +=nbseq) {
469                if ( from >= sumlens && from < sumlens + *csplens ) {
470                     if ( (Boolean)(*cspstart) ) offset =(sumlens +*csplens -from);
471                     break;
472                }
473                if ( (Boolean)(*cspstart) ) sumlens += (*csplens);
474           }
475           if ( offset < 0 )
476                for (; k < numseg && offset < 0; k++) {
477                     if ( (Boolean)(*cspstart) ) offset = 0;
478                }
479           newcsp->from [j] = csp->from [j] +offset;
480       }
481           /* count number of new segments between from and to */
482       sumlens =0;
483       newnumseg =0;
484       cspstart = csp->starts;
485       for (k=0; k<numseg; k++, sumlens += (Int4)(*csplens), csplens++, cspstart += (Int4)(nbseq))
486       {
487           if ( from >= sumlens && from < sumlens + *csplens ) break;
488       }
489       for (; k<numseg; k++, sumlens += *csplens, csplens++, cspstart +=nbseq)
490       {
491           newnumseg++;
492           if ( to >= sumlens && to < sumlens + *csplens ) break;
493       }
494           /* copy segment lengths within from and to */
495       newcsp->lens =(Int4Ptr) MemNew ((size_t) ((newnumseg+2) * sizeof (Int4)));
496       if ( newcsp->lens == NULL ) {
497           SeqAnnotFree (sapnew);
498           return NULL;
499       }
500       sumlens =0;
501       csplens = csp->lens;
502       cspstart = csp->starts;
503       newcsplens = newcsp->lens;
504       for (k=0; k<numseg; k++, sumlens += *csplens, csplens++, cspstart +=nbseq)
505       {
506           if ( from >= sumlens && from < sumlens + *csplens ) break;
507       }
508       *newcsplens = sumlens +*csplens -from;
509       newcsplens++;
510       csplens++;
511       k++;
512       for (; k<numseg; k++, sumlens += *csplens, csplens++, cspstart +=nbseq)
513       {
514           *newcsplens = *csplens;
515           if ( to >= sumlens && to < sumlens + *csplens ) break;
516           newcsplens++;
517       }
518       *newcsplens = sumlens -to;
519       newcsp->starts =(BoolPtr) MemNew((size_t)((nbseq *newnumseg +2) * sizeof(Boolean)));
520       if ( newcsp->starts == NULL ) {
521           SeqAnnotFree (sapnew);
522           return NULL;
523       }
524       for (j = 0; j < nbseq; j++)
525       {
526           sumlens = 0;
527           csplens = csp->lens;
528           cspfrom = csp->from;
529           cspfrom += j;
530           newcspfrom = newcsp->from;
531           newcspfrom += j;
532           for(k=0;k<numseg; k++,sumlens +=*csplens, csplens++, cspstart +=nbseq)
533           {
534                if ( from >= sumlens && from < sumlens + *csplens ) break;
535           }
536           for (; k<numseg; k++, sumlens +=*csplens, csplens++, cspstart +=nbseq)
537           {
538                *newcsplens = *csplens;
539                if ( to >= sumlens && to < sumlens + *csplens ) break;
540           }
541       }
542 /*
543       csp->strands
544       csp->scores
545 */
546       if ( salpre == NULL ) sapnew->data = (Pointer) salpnew;
547       else salpre->next = salpnew;
548       salpre = salpnew;
549       salp = salp->next;
550   }
551   return sapnew;
552 }
553 
SeqAlignDenseSegToBoolSeg(SeqAlignPtr salp)554 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDenseSegToBoolSeg (SeqAlignPtr salp)
555 {
556   SeqAlignPtr salpnew =NULL;
557   DenseSegPtr dsp =NULL;
558   Int4Ptr     dspstarts =NULL, dspstarts2 = NULL;
559   Int4Ptr     dsplens =NULL;
560   Uint1Ptr    strandp;
561   Uint1       strand;
562   CompSegPtr  csp =NULL;
563   Int4        j;
564   Int4        nbseq, numseg;
565 
566   salpnew = SeqAlignNew ();
567   if ( salpnew == NULL ) {
568           return NULL;
569   }
570   salpnew->type = 3;
571   salpnew->segtype = COMPSEG;
572   salpnew->dim = salp->dim;
573 
574   salpnew->score = salp->score;
575   salp->score = NULL;
576 
577   dsp = (DenseSegPtr) salp->segs;
578   csp = (CompSegPtr) MemNew ( sizeof (CompSeg) );
579   if ( csp == NULL ) {
580       return NULL;
581   }
582   salpnew->segs = (Pointer) csp;
583   csp->dim = dsp->dim;
584   nbseq = dsp->dim;
585 
586   csp->ids = SeqIdDupBestList (dsp->ids);
587   csp->numseg = dsp->numseg;
588   numseg = dsp->numseg;
589 
590   if (dsp->strands != NULL) {
591      strandp = dsp->strands;
592      csp->strands=(Uint1Ptr)MemNew((size_t)((nbseq*numseg+4)*sizeof (Uint1)));
593      for (j=0; j<nbseq*numseg; j++, strandp++)
594         csp->strands[j] = *strandp;
595   }
596   csp->lens = (Int4Ptr) MemNew ((size_t) ((numseg+2) * sizeof (Int4)));
597   if ( csp->lens == NULL ) {
598      return NULL;
599   }
600   for (j = 0, dsplens = dsp->lens; j < numseg ; j++, dsplens++ )
601      csp->lens [j] = *dsplens;
602   csp->from = (Int4Ptr) MemNew ((size_t) ((nbseq +2) * sizeof (Int4)));
603   if ( csp->from == NULL ) {
604      return NULL;
605   }
606   for (j = 0; j < nbseq +2; j++)
607      csp->from [j] = 0;
608   dspstarts=dsp->starts;
609   strandp = dsp->strands;
610   if (strandp!=NULL)
611      strand = *strandp;
612   else
613      strand = Seq_strand_unknown;
614   for (j = 0; j < nbseq ; j++, dspstarts++)
615   {
616      dsplens = dsp->lens;
617      if (*dspstarts < 0) {
618         for(dspstarts2=dspstarts;dspstarts2!=NULL;dspstarts2+=nbseq) {
619            if (*dspstarts2 > -1)
620               break;
621            dsplens++;
622         }
623         if (dspstarts2!=NULL) {
624            if (strand==Seq_strand_minus)
625               csp->from [j] = *dspstarts2 + *dsplens;
626            else
627               csp->from [j] = *dspstarts2;
628         }
629      }
630      else {
631         if (strand==Seq_strand_minus)
632            csp->from [j] = *dspstarts + *dsplens;
633         else
634            csp->from [j] = *dspstarts;
635      }
636      if (strandp!=NULL) {
637         strandp++;
638         strand = *strandp;
639      } else strand = Seq_strand_unknown;
640   }
641   csp->starts=(BoolPtr) MemNew((size_t)((nbseq *numseg +2) * sizeof(Boolean)));
642   if ( csp->starts == NULL ) {
643      return NULL;
644   }
645   for (j =0, dspstarts =dsp->starts; j <nbseq*numseg ; j++, dspstarts++ ) {
646           csp->starts [j] = (Boolean) (*dspstarts >= 0);
647   }
648   return salpnew;
649 }
650 
SeqAnnotDenseSegToBoolSeg(SeqAnnotPtr sap)651 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAnnotDenseSegToBoolSeg (SeqAnnotPtr sap)
652 {
653   SeqAnnotPtr sapnew =NULL;
654   SeqAlignPtr salp=NULL, salpnew=NULL, salpre=NULL;
655 
656   if ( sap == NULL ) return NULL;
657   if ( sap->type != 2 ) {
658       return NULL;
659   }
660   if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
661       return NULL;
662   }
663   if ( salp->segtype == COMPSEG ) return sap;
664   if ( salp->segtype != 2 ) {
665       return NULL;
666   }
667   sapnew = SeqAnnotNew ();
668   if (sapnew == NULL) {
669       return NULL;
670   }
671   sapnew->type = 2;
672   while ( salp != NULL )
673   {
674       salpnew = SeqAlignDenseSegToBoolSeg (salp);
675       if ( salpre == NULL ) sapnew->data = (Pointer) salpnew;
676       else salpre->next = salpnew;
677       salpre = salpnew;
678       salp = salp->next;
679   }
680   return sapnew;
681 }
682 
SeqAlignBoolSegToDenseSeg(SeqAlignPtr salp)683 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignBoolSegToDenseSeg (SeqAlignPtr salp)
684 {
685   SeqAlignPtr salpnew =NULL;
686   DenseSegPtr dsp =NULL;
687   CompSegPtr  csp =NULL;
688   BoolPtr     cspstarts =NULL;
689   Int4Ptr     cspfrom =NULL;
690   Int4Ptr     csplens =NULL;
691   Uint1Ptr    strandp;
692   Uint1       strand;
693   Int4Ptr     dspstarts =NULL;
694   Int4        sumlens, k;
695   Int4        j, nbseq, numseg;
696 
697   salpnew = SeqAlignNew ();
698   if (salpnew != NULL) {
699      salpnew->type = 3;
700      salpnew->segtype = 2;
701      salpnew->dim = salp->dim;
702 
703      salpnew->score = salp->score;
704      salp->score = NULL;
705 
706      csp = (CompSegPtr) salp->segs;
707      dsp = DenseSegNew ();
708      if ( dsp == NULL ) {
709         return NULL;
710      }
711      salpnew->segs = (Pointer) dsp;
712      nbseq = csp->dim;
713      dsp->dim = csp->dim;
714      dsp->ids = SeqIdDupList (csp->ids);
715      dsp->numseg = csp->numseg;
716      numseg = csp->numseg;
717 
718      dsp->lens  =(Int4Ptr)MemNew((size_t)((numseg + 2) * sizeof (Int4)));
719      for (j = 0, csplens = csp->lens; j < numseg ; j++, csplens++ )
720         dsp->lens [j] = *csplens;
721      if (csp->strands != NULL) {
722         strandp=csp->strands;
723         dsp->strands =(Uint1Ptr)MemNew((size_t)((nbseq*numseg+4)*sizeof (Uint1)));
724         for (j=0; j<nbseq*numseg; j++, strandp++)
725            dsp->strands[j] = *strandp;
726      }
727      dsp->starts=(Int4Ptr)MemNew((size_t)((nbseq *numseg +4) * sizeof (Int4)));
728      for (k = 0; k < nbseq *numseg +4; k++)
729         dsp->starts[k] = -1;
730      cspstarts = csp->starts;
731      dspstarts = dsp->starts;
732      strandp = csp->strands;
733      if (strandp!=NULL)
734         strand = *strandp;
735      else
736         strand = Seq_strand_unknown;
737      for (j = 0, cspfrom = csp->from; j < nbseq ; j++, cspfrom++)
738      {
739         csplens = csp->lens;
740         sumlens = 0;
741         for (k = 0; k < numseg; k++, csplens++) {
742            if ( (Boolean)(cspstarts [nbseq *k +j]) ) {
743               if (strand == Seq_strand_minus) {
744                  sumlens += *csplens;
745                  dspstarts [nbseq *k +j] = *cspfrom - sumlens;
746               }
747               else {
748                  dspstarts [nbseq *k +j] = *cspfrom + sumlens;
749                  sumlens += *csplens;
750               }
751            }
752         }
753         if (strandp!=NULL) {
754            strandp++;
755            strand = *strandp;
756         }
757         else
758            strand = Seq_strand_unknown;
759      }
760   }
761   return salpnew;
762 }
763 
SeqAnnotBoolSegToDenseSeg(SeqAnnotPtr sap)764 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAnnotBoolSegToDenseSeg (SeqAnnotPtr sap)
765 {
766   SeqAnnotPtr sapnew =NULL;
767   SeqAlignPtr salp, salpnew =NULL, salpre =NULL;
768 
769   if ( sap == NULL ) return NULL;
770   if ( sap->type != 2 ) {
771       return NULL;
772   }
773   if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
774       return NULL;
775   }
776   if ( salp->segtype == 2 ) return sap;
777   if ( salp->segtype != COMPSEG ) {
778       return NULL;
779   }
780   sapnew = SeqAnnotNew ();
781   if (sapnew == NULL) return NULL;
782   sapnew->type = 2;
783 
784   while ( salp != NULL )
785   {
786       salpnew = SeqAlignBoolSegToDenseSeg (salp);
787       if ( salpre == NULL ) sapnew->data = (Pointer) salpnew;
788       else salpre->next = salpnew;
789       salpre = salpnew;
790       salp = salp->next;
791   }
792   return sapnew;
793 }
794 
795 
CompSeqAlignPrint(SeqAlignPtr salp)796 NLM_EXTERN void LIBCALL CompSeqAlignPrint (SeqAlignPtr salp)
797 {
798   CompSegPtr  csp =NULL;
799   SeqIdPtr    sip;
800   BoolPtr     startp;
801   Int4        j, k;
802   FILE        *fout;
803   Char    strLog[128];
804 
805   csp = (CompSegPtr) salp->segs;
806   if (csp!=NULL) {
807      fout = FileOpen ("LogFile", "w");
808      if (fout!=NULL) {
809         fprintf (fout, "\n");
810         for (j=0, sip=csp->ids; sip!=NULL; sip=sip->next, j++)
811         {
812             SeqIdWrite (sip, strLog, PRINTID_FASTA_LONG, 120);
813             fprintf (fout, "%d %s \n", (int)(j+1), strLog);
814         }
815         fprintf (fout, "\n");
816         for (j=0; j<csp->dim; j++)
817            fprintf (fout, " %d ", (int)csp->from[j]);
818         fprintf (fout, "\n");
819         startp = csp->starts;
820         for (j = 0; j < csp->numseg; j++) {
821             fprintf (fout, "%3d lg %6ld ", (int)j, (long)csp->lens[j]);
822             for (k = 0; k < csp->dim; k++) {
823                fprintf (fout, " %d", (int)*startp);
824                startp++;
825             }
826 /*
827             for ( k = 0; k < csp->dim; k++) {
828                if (csp->strands!=NULL)
829                  fprintf (fout, " %d", (int)csp->strands[(Int4)csp->dim*k+j]);
830             }
831 */
832             fprintf (fout, "\n");
833         }
834         fprintf (fout, "\n");
835         FileClose (fout);
836      }
837   }
838 }
839 
build_seqalign_fromstart(Int2 dim,Int2 numseg,SeqIdPtr sip,Int4Ptr starts,Int4Ptr lens)840 NLM_EXTERN SeqAlignPtr LIBCALL build_seqalign_fromstart (Int2 dim, Int2 numseg, SeqIdPtr sip, Int4Ptr starts, Int4Ptr lens)
841 {
842   SeqAlignPtr salp;
843   DenseSegPtr dsp;
844   SeqIdPtr    next;
845 
846   salp = SeqAlignNew ();
847   if (salp != NULL) {
848      salp->type = 3;
849      salp->segtype = 2;
850      salp->dim = dim;
851      dsp = DenseSegNew ();
852      if (dsp != NULL) {
853         salp->segs = (Pointer) dsp;
854         dsp->dim = dim;
855         dsp->ids = sip;
856         dsp->numseg = numseg;
857         dsp->starts = starts;
858         dsp->lens = lens;
859         return salp;
860      }
861   }
862   MemFree (starts);
863   while (sip != NULL) {
864      next = sip->next;
865      sip->next = NULL;
866      SeqIdFree (sip);
867      sip = next;
868   }
869   return NULL;
870 }
871 
DenseDiagDup(DenseDiagPtr ddp)872 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagDup (DenseDiagPtr ddp)
873 {
874   DenseDiagPtr ddp2;
875   ddp2 = (DenseDiagPtr) AsnIoMemCopy ((Pointer) ddp, (AsnReadFunc) DenseDiagAsnRead, (AsnWriteFunc) DenseDiagAsnWrite);
876   return ddp2;
877 }
878 
SeqAlignDupRegion(SeqAlignPtr salp,Int2 to_numseg,Int4 subseg,Boolean first_part)879 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDupRegion (SeqAlignPtr salp, Int2 to_numseg, Int4 subseg, Boolean first_part)
880 {
881   SeqAlignPtr newsalp = NULL;
882   DenseSegPtr dsp = NULL, newdsp = NULL;
883   Int4Ptr     dspstart = NULL;
884   Int4Ptr     dsplen = NULL;
885   Int4Ptr     newstart = NULL;
886   Int4Ptr     newlens = NULL;
887   Int2        newseg;
888   Int2        n;
889   Int2        k;
890   Int4        j;
891 
892   dsp = (DenseSegPtr) salp->segs;
893   newsalp = SeqAlignNew ();
894   if ( newsalp == NULL ) return NULL;
895   newsalp->type = salp->type;
896   newsalp->segtype = salp->segtype;
897   newsalp->dim = salp->dim;
898   newdsp = DenseSegNew ();
899   if ( newdsp == NULL ) return NULL;
900   newsalp->segs = (Pointer) newdsp;
901   n = newdsp->dim = dsp->dim;
902   newdsp->ids = SeqIdDupList (dsp->ids);
903   if ( to_numseg > dsp->numseg ) to_numseg = 0;
904   if ( to_numseg == 0 )
905          newseg = dsp->numseg;
906   else if ( first_part )
907          newseg = to_numseg;
908   else if ( !first_part )
909          newseg = dsp->numseg - to_numseg;
910   newdsp->numseg = newseg;
911   newdsp->starts = (Int4Ptr) MemNew ((size_t) ((n*newseg+4) * sizeof (Int4)));
912   if ( newdsp->starts == NULL ) return NULL;
913   for (j = 0; j < n * newseg + 4; j++) newdsp->starts [j] = -1;
914   newdsp->lens  = (Int4Ptr) MemNew ((size_t) ((newseg + 2) * sizeof (Int4)));
915   if ( newdsp->lens == NULL ) return NULL;
916   for (j = 0; j < newseg + 2; j++)     newdsp->lens [j] = 0;
917   newstart = newdsp->starts;
918   newlens  = newdsp->lens;
919   dspstart = dsp->starts;
920   dsplen   = dsp->lens;
921   if ( first_part )
922   {
923          for ( k = 0; k < newseg - 1; k++ )
924          {
925                 for (j = 0; j < n; j++) newstart [j] = dspstart[j];
926                 *newlens = *dsplen;
927                 newstart += n;
928                 newlens++;
929                 dspstart += n;
930                 dsplen++;
931          }
932          for (j = 0; j < n; j++) newstart [j] = dspstart[j];
933          *newlens = subseg;
934   }
935   else {
936          for ( k = 0; k < to_numseg-1; k++ )
937          {
938                 dspstart += n;
939                 dsplen++;
940          }
941          for (j = 0; j < n; j++)
942                        if ( dspstart[j] > -1 )
943                               newstart [j] = dspstart[j] + subseg;
944                        else
945                               newstart [j] = dspstart[j];
946          *newlens = *dsplen - subseg;
947          newstart += n;
948          newlens++;
949          dspstart += n;
950          dsplen++;
951          k++;
952          for ( ; k < to_numseg + newseg; k++ )
953          {
954                 for (j = 0; j < n; j++) newstart [j] = dspstart[j];
955                 *newlens = *dsplen;
956                 newstart += n;
957                 newlens++;
958                 dspstart += n;
959                 dsplen++;
960          }
961   }
962   return newsalp;
963 }
964 
SeqAlignDupAdd(SeqAlignPtr * salp_head,SeqAlignPtr salp,Int2 to_numseg,Int4 subseg,Boolean first_part)965 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDupAdd (SeqAlignPtr *salp_head, SeqAlignPtr salp, Int2 to_numseg, Int4 subseg, Boolean first_part)
966 {
967   SeqAlignPtr salp_tmp, salp_copy;
968 
969   salp_copy = SeqAlignDupRegion (salp, to_numseg, subseg, first_part);
970   if ( salp_copy == NULL )
971          return *salp_head;
972   if ( (salp_tmp = *salp_head) != NULL )
973   {
974          while ( salp_tmp->next != NULL )
975                 salp_tmp = salp_tmp->next;
976          salp_tmp->next = salp_copy;
977   }
978   else *salp_head = salp_copy;
979   return *salp_head;
980 }
981 
SeqAlignEndExtend(SeqAlignPtr sap,Int4 start1,Int4 start2,Int4 stop1,Int4 stop2,Int4 x1,Int4 y1,Int4 x2,Int4 y2,Uint1 strand1,Uint1 strand2)982 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignEndExtend (SeqAlignPtr sap, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2, Uint1 strand1, Uint1 strand2)
983 {
984    DenseSegPtr   dsp;
985    Int4Ptr       n_starts, n_lens;
986    Uint1Ptr      n_strands;
987    Int4          index1,
988                  minlen = 0;
989    Int2          n;
990    Int2          offset=0;
991    Boolean       is_strands = FALSE;
992 
993    if (sap==NULL)
994       return NULL;
995    if (start1==x1 && start2==y1 && stop1==x2 && stop2==y2)
996       return sap;
997    if (sap->segtype == 2)
998     {
999       dsp = (DenseSegPtr) sap->segs;
1000       is_strands = (Boolean) (dsp->strands!=NULL);
1001       n = dsp->numseg;
1002       n_starts = (Int4Ptr) MemNew((2*(n+2)+4)*sizeof(Int4));
1003       n_lens = (Int4Ptr) MemNew((n+4)*sizeof(Int4));
1004       if (is_strands)
1005          n_strands = (Uint1Ptr)MemNew((2*(n+2)+4)*sizeof(Uint1));
1006       if (x1 > start1 && y1 > start2) {
1007          minlen = MIN ((x1-start1), (y1-start2));
1008          n = 0;
1009       }
1010       if ((x1 > start1 || y1 > start2)  && ((x1-start1) != (y1-start2))) {
1011          if (x1 > start1 && (x1-start1) > minlen) {
1012             n_starts[0] = 0;
1013             n_starts[1] = -1;
1014             n_lens[0] = (x1 - start1) - minlen;
1015             if (is_strands) {
1016                n_strands[0] = strand1;
1017                n_strands[1] = strand2;
1018             }
1019          }
1020          else if (y1 > start2 && (y1-start2) > minlen) {
1021             n_starts[0] = -1;
1022             n_starts[1] = 0;
1023             n_lens[0] = (y1 - start2) - minlen;
1024             if (is_strands) {
1025                n_strands[0] = strand1;
1026                n_strands[1] = strand2;
1027             }
1028          }
1029          offset += 1;
1030       }
1031       if (minlen > 0) {
1032          n += offset;
1033          n_starts[2*n] = x1 - minlen;
1034          n_starts[2*n+1] = y1 - minlen;
1035          n_lens[n] = minlen;
1036          if (is_strands) {
1037             n_strands[2*n] = strand1;
1038             n_strands[2*n+1] = strand2;
1039          }
1040          offset += 1;
1041       }
1042       n = dsp->numseg;
1043       for (index1=0; index1<n; index1++) {
1044          n_starts [2*(index1+offset)] = dsp->starts [2*index1];
1045          if (is_strands)
1046             n_strands[2*(index1+offset)] = dsp->strands[2*index1];
1047          n_starts [2*(index1+offset)+1]=dsp->starts [2*index1+1];
1048          if (is_strands)
1049             n_strands[2*(index1+offset)+1]=dsp->strands[2*index1+1];
1050          n_lens[index1+offset] = dsp->lens[index1];
1051       }
1052       if (x2 < stop1 &&  y2 < stop2) {
1053          n += offset;
1054          minlen = MIN ((stop1-x2), (stop2-y2));
1055          n_starts[2*n] = x2 + 1;
1056          n_starts[2*n+1] = y2 +1;
1057          n_lens[n] = minlen;
1058          if (is_strands) {
1059             n_strands[2*n] = strand1;
1060             n_strands[2*n+1] = strand2;
1061          }
1062          x2 += minlen;
1063          y2 += minlen;
1064          offset += 1;
1065       }
1066       if (x2 < stop1 || y2 < stop2) {
1067          n += offset;
1068          if (x2 < stop1) {
1069             n_starts[2*n] = x2 +1;
1070             n_starts[2*n+1] = -1;
1071             n_lens[n] = stop1 - x2;
1072             if (is_strands) {
1073                n_strands[2*n] = strand1;
1074                n_strands[2*n+1] = strand2;
1075             }
1076          }
1077          else if (y2 < stop2) {
1078             n_starts[2*n] = -1;
1079             n_starts[2*n+1] = y2 +1;
1080             n_lens[n] = stop2 - y2;
1081             if (is_strands) {
1082                n_strands[2*n] = strand1;
1083                n_strands[2*n+1] = strand2;
1084             }
1085          }
1086          offset += 1;
1087       }
1088       dsp->numseg = n+1;
1089       MemFree(dsp->starts);
1090       if (is_strands)
1091          MemFree(dsp->strands);
1092       MemFree(dsp->lens);
1093       dsp->starts = n_starts;
1094       dsp->lens = n_lens;
1095       if (is_strands)
1096          dsp->strands = n_strands;
1097    }
1098    return sap;
1099 }
1100 
get_pos_from_salp(SeqAlignPtr salp,Int4 pos,Int4 PNTR offset,Int4Ptr PNTR startp,Int4Ptr PNTR lenp,Int4 PNTR numseg)1101 NLM_EXTERN Boolean LIBCALL get_pos_from_salp (SeqAlignPtr salp, Int4 pos, Int4 PNTR offset,
1102 Int4Ptr PNTR startp, Int4Ptr PNTR lenp, Int4 PNTR numseg)
1103 {
1104   DenseSegPtr dsp;
1105   Int4Ptr     dspstart,
1106               dsplens;
1107   Int4        numsg = 0,
1108               sumlens = 0;
1109 
1110   Boolean     seen=FALSE;
1111 
1112   if (salp == NULL)
1113      return FALSE;
1114   if (salp->segtype == 2)
1115   {
1116      dsp = (DenseSegPtr) salp->segs;
1117      dspstart = dsp->starts;
1118      dsplens = dsp->lens;
1119      while ( !seen && numsg < dsp->numseg )
1120      {
1121         numsg++;
1122         if (pos >= sumlens && pos < sumlens +*dsplens )
1123         {
1124            *offset = pos - sumlens;
1125            seen = TRUE;
1126         }
1127         else if (numsg == dsp->numseg)
1128         {
1129            break;
1130         }
1131         else
1132         {
1133             dspstart += dsp->dim;
1134             sumlens += *dsplens;
1135             dsplens++;
1136         }
1137      }
1138      if (seen) {
1139         *startp = dspstart;
1140         *lenp = dsplens;
1141         *numseg = numsg;
1142      }
1143   }
1144   return seen;
1145 }
1146 
SeqAlignTrunc(SeqAlignPtr salp,Int4 from,Int4 to)1147 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignTrunc (SeqAlignPtr salp, Int4 from, Int4 to)
1148 {
1149   DenseSegPtr dsp;
1150   Int4Ptr     startp,
1151               lenp,
1152               startmp;
1153   Int4        tot_length,
1154               numseg,
1155               offset,
1156               j;
1157   Boolean     ble;
1158 
1159   if (salp == NULL)
1160      return salp;
1161   tot_length = SeqAlignLength (salp);
1162   if (salp->segtype == 2)
1163   {
1164      dsp = (DenseSegPtr) salp->segs;
1165      if (to > 0 && to < tot_length)
1166      {
1167         ble = get_pos_from_salp (salp, to, &offset, &startp, &lenp, &numseg);
1168         if(ble)
1169         {
1170            if (numseg > 0)
1171            {
1172               dsp->numseg = numseg;
1173            }
1174            if (offset>-1 && offset+1 < *lenp)
1175            {
1176               *lenp = offset + 1;
1177            }
1178         }
1179      }
1180      if (from > 0 && from < tot_length)
1181      {
1182         ble = get_pos_from_salp (salp, from, &offset, &startp, &lenp, &numseg);
1183         if(ble)
1184         {
1185            if (numseg > 1)
1186            {
1187               dsp->numseg -= (numseg-1);
1188               dsp->starts = startp;
1189               dsp->lens = lenp;
1190            }
1191            if (offset > 0) {
1192               for (startmp=dsp->starts, j=0; j<dsp->dim; startmp++, j++)
1193                  if (*startmp > -1)
1194                     *startmp += offset+1;
1195            }
1196            *(dsp->lens) -= offset;
1197         }
1198      }
1199   }
1200   return salp;
1201 }
1202 
SeqAlignTrunc2(SeqAlignPtr salp,Int4 from,Int4 to)1203 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignTrunc2 (SeqAlignPtr salp, Int4 from, Int4 to)
1204 {
1205   DenseSegPtr dsp;
1206   Int4Ptr     int4p,
1207               int4lp;
1208   Int4        k;
1209   Int2        j;
1210 
1211   if (salp == NULL)
1212      return NULL;
1213   if (salp->segtype == 2) {
1214      dsp = (DenseSegPtr) salp->segs;
1215      int4p = dsp->starts;
1216      int4lp = dsp->lens;
1217      for (j=0; j<dsp->dim;j++, int4p++)
1218      {
1219        if (SeqAlignStrand(salp, j)!=Seq_strand_minus)
1220        {
1221           if (from != 0) {
1222              if (*int4p > -1)
1223                 *int4p += from;
1224           }
1225        }
1226        else if (SeqAlignStrand(salp, j)==Seq_strand_minus)
1227        {
1228           if (to != 0) {
1229              for (k=0; k<(dsp->numseg-1); k++)
1230                 int4p++;
1231              if (*int4p > -1)
1232                 *int4p -= to;
1233           }
1234        }
1235      }
1236      if (from != 0)
1237         *int4lp -= from;
1238      if (to != 0) {
1239         for (k=0; k<(dsp->numseg-1); k++)
1240            int4lp++;
1241         *int4lp += to;
1242      }
1243   }
1244   return salp;
1245 }
1246 
SeqAlignMapOnFirstSeq(SeqAlignPtr salp)1247 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignMapOnFirstSeq (SeqAlignPtr salp)
1248 {
1249   SeqAlignPtr tmp;
1250   DenseSegPtr dsp;
1251   Int4Ptr     startp;
1252   Int4Ptr     lenp;
1253   Int4        j;
1254 
1255   if (salp == NULL)
1256      return NULL;
1257   for (tmp=salp; tmp!=NULL; tmp=tmp->next) {
1258      if (tmp->segtype == 2) {
1259         dsp = (DenseSegPtr) tmp->segs;
1260         for (j=0, startp = dsp->starts; j<(dsp->numseg-1); j++)
1261            startp += dsp->dim;
1262         while (*startp < 0) {
1263            startp -= dsp->dim;
1264            dsp->numseg--;
1265         }
1266         startp = dsp->starts;
1267         lenp = dsp->lens;
1268         while (*startp < 0) {
1269            startp += dsp->dim;
1270            lenp++;
1271            dsp->numseg--;
1272         }
1273         dsp->starts = startp;
1274         dsp->lens = lenp;
1275      }
1276   }
1277   return salp;
1278 }
1279 
SeqAlignMerge(SeqAlignPtr salp1,SeqAlignPtr salp2,Boolean return_salp)1280 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignMerge (SeqAlignPtr salp1, SeqAlignPtr salp2, Boolean return_salp)
1281 {
1282   SeqAlignPtr salp_toreturn;
1283   DenseSegPtr dsp1= NULL,
1284               dsp2 = NULL;
1285   Int4Ptr     dspstarts= NULL;
1286   Int4Ptr     dsptmp= NULL;
1287   Int4Ptr     dsplens= NULL;
1288   Uint1Ptr    dspstrands = NULL;
1289   Uint1Ptr    dsptmp1;
1290   Int4        j, k, n, newseg;
1291   Uint1       st1, st2;
1292   Boolean     merge_segment = FALSE;
1293 
1294   if (salp1==NULL) {
1295      if (salp2 == NULL)
1296         return NULL;
1297      else
1298         return salp2;
1299   }
1300   else if (salp2 == NULL)
1301      return salp1;
1302 
1303   if (return_salp)
1304      salp_toreturn = salp2;
1305   else
1306      salp_toreturn = salp1;
1307 
1308   if ( salp1->segtype != 2 || salp2->segtype != 2 ) {
1309      return salp_toreturn;
1310   }
1311   if (return_salp) {
1312      dsp1 = (DenseSegPtr) salp1->segs;
1313      dsp2 = (DenseSegPtr) salp2->segs;
1314   }
1315   else {
1316      dsp1 = (DenseSegPtr) salp2->segs;
1317      dsp2 = (DenseSegPtr) salp1->segs;
1318   }
1319   if ( dsp1==NULL || dsp2==NULL || dsp1->dim != dsp2->dim) {
1320      return salp_toreturn;
1321   }
1322   n = dsp1->dim;
1323   newseg = dsp1->numseg + dsp2->numseg;
1324   dspstarts = (Int4Ptr) MemNew ((size_t) ((n*newseg+4) * sizeof (Int4)));
1325   if ( dspstarts == NULL ) {
1326      return salp_toreturn;
1327   }
1328   st1 = SeqAlignStrand (salp1, 0);
1329   st2 = SeqAlignStrand (salp1, 1);
1330   dsptmp = dsp1->starts;
1331   for (j = 0; j < n * dsp1->numseg; j++, dsptmp++) {
1332      dspstarts [j] = *dsptmp;
1333   }
1334   dsptmp = dsp2->starts;
1335   if (n==2) {
1336      if (dspstarts [j-2]> -1 && dsptmp[0] > -1) {
1337         if (dspstarts [j-1] > -1 && dsptmp[1] > -1)
1338            merge_segment = TRUE;
1339      }
1340   }
1341   if (merge_segment) {
1342      if (st1==Seq_strand_minus)
1343         dspstarts [j-2] = dsptmp[0];
1344      if (st2==Seq_strand_minus)
1345         dspstarts [j-1] = dsptmp[1];
1346      newseg--;
1347      k=n;
1348      dsptmp += n;
1349   }
1350   else
1351      k = 0;
1352   for (; k < n * dsp2->numseg; k++, j++, dsptmp++) {
1353      dspstarts [j] = *dsptmp;
1354   }
1355   dsplens  = (Int4Ptr) MemNew ((size_t) ((newseg + 2) * sizeof (Int4)));
1356   if ( dsplens == NULL ) {
1357      return salp_toreturn;
1358   }
1359   dsptmp = dsp1->lens;
1360   for (j = 0; j < dsp1->numseg; j++, dsptmp++)
1361      dsplens [j] = *dsptmp;
1362   dsptmp = dsp2->lens;
1363   if (merge_segment) {
1364      dsplens [j-1] += *dsptmp;
1365      k=1;
1366      dsptmp ++;
1367   }
1368   else
1369      k = 0;
1370   for (; k < dsp2->numseg; k++, j++, dsptmp++)
1371      dsplens [j] = *dsptmp;
1372   if (dsp1->strands && dsp2->strands)
1373   {
1374      dspstrands = (Uint1Ptr) MemNew ((size_t) ((n*newseg+4) * sizeof (Uint1)));
1375      if ( dspstrands != NULL ) {
1376         dsptmp1=dsp1->strands;
1377         for (j = 0; j < n * dsp1->numseg; j++, dsptmp1++) {
1378            dspstrands [j] = *dsptmp1;
1379         }
1380         dsptmp1 = dsp2->strands;
1381         if (merge_segment) {
1382            k=n;
1383            dsptmp1 += n;
1384         }
1385         else
1386            k = 0;
1387         for (; k < n * dsp2->numseg; k++, j++, dsptmp1++) {
1388            dspstrands [j] = *dsptmp1;
1389         }
1390      }
1391   }
1392   dsp1 = (DenseSegPtr) salp1->segs;
1393   dsp1->numseg = newseg;
1394   MemFree(dsp1->starts);
1395   dsp1->starts = dspstarts;
1396   MemFree(dsp1->lens);
1397   dsp1->lens = dsplens;
1398   MemFree(dsp1->strands);
1399   dsp1->strands = dspstrands;
1400   return salp1;
1401 }
1402 
SeqAnnotMerge(SeqAnnotPtr sap1,SeqAnnotPtr sap2,Boolean return_salp)1403 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAnnotMerge (SeqAnnotPtr sap1, SeqAnnotPtr sap2, Boolean return_salp)
1404 {
1405   SeqAlignPtr salp1=NULL, salp2=NULL;
1406 
1407   if ( sap1 == NULL ) return sap2;
1408   if ( sap2 == NULL ) return sap1;
1409   if ( sap1->type != 2 ||  sap2->type != 2 ) {
1410          return NULL;
1411   }
1412   salp1 = (SeqAlignPtr) sap1->data;
1413   salp2 = (SeqAlignPtr) sap2->data;
1414   if (return_salp)
1415      salp1 = SeqAlignMerge (salp1, salp2, TRUE);
1416   else
1417      salp1 = SeqAlignMerge (salp1, salp2, FALSE);
1418   return sap1;
1419 }
1420 
SeqAlignAddSeg(SeqAlignPtr salp,Int4 pos1,Int4 pos2,Int4 len)1421 static SeqAlignPtr LIBCALL SeqAlignAddSeg (SeqAlignPtr salp, Int4 pos1, Int4 pos2, Int4 len)
1422 {
1423   DenseSegPtr dsp;
1424   Int4Ptr     startp;
1425   Int4Ptr     lenp;
1426   Uint1Ptr    dspstrands,
1427               dsptmp1;
1428   Int4Ptr     dsptmp;
1429   Int4        j;
1430 
1431   dsp = (DenseSegPtr) salp->segs;
1432 
1433   startp = (Int4Ptr) MemNew ((size_t) ((dsp->dim*dsp->numseg+4) * sizeof (Int4)));
1434   if ( startp == NULL ) {
1435      return NULL;
1436   }
1437   dsptmp = dsp->starts;
1438   for (j = 0; j < dsp->dim*dsp->numseg; j++, dsptmp++) {
1439      startp [j] = *dsptmp;
1440   }
1441   startp[j] = pos1;
1442   startp[j+1] = pos2;
1443   MemFree(dsp->starts);
1444   dsp->starts = startp;
1445 
1446   lenp  = (Int4Ptr) MemNew ((size_t) ((dsp->numseg + 2) * sizeof (Int4)));
1447   if ( lenp == NULL ) {
1448      return NULL;
1449   }
1450   dsptmp = dsp->lens;
1451   for (j = 0; j < dsp->numseg; j++, dsptmp++)
1452      lenp [j] = *dsptmp;
1453   lenp [j] = len;
1454   MemFree(dsp->lens);
1455   dsp->lens = lenp;
1456 
1457   dspstrands = (Uint1Ptr) MemNew ((size_t) ((dsp->dim*dsp->numseg+4) * sizeof (Uint1)));
1458   if ( dspstrands == NULL ) {
1459      return NULL;
1460   }
1461   dsptmp1=dsp->strands;
1462   for (j = 0; j < dsp->dim * dsp->numseg; j++, dsptmp1++) {
1463      dspstrands [j] = *dsptmp1;
1464   }
1465   dspstrands [j] = Seq_strand_unknown;
1466   dspstrands [j+1] = Seq_strand_unknown;
1467   MemFree(dsp->strands);
1468   dsp->strands = dspstrands;
1469 
1470   dsp->numseg += 1;
1471   return salp;
1472 }
1473 
shrinksap5(SeqAlignPtr salp,Int4 offset)1474 static SeqAlignPtr LIBCALL shrinksap5 (SeqAlignPtr salp, Int4 offset)
1475 {
1476   DenseSegPtr dsp;
1477   Int4Ptr     dsptmp;
1478   Int4        j;
1479 
1480   dsp = (DenseSegPtr) salp->segs;
1481   dsptmp = dsp->starts;
1482   for (j = 0; j < dsp->dim; j++)
1483      dsptmp[j] += offset;
1484   dsptmp = dsp->lens;
1485   *dsptmp -= offset;
1486   return salp;
1487 }
1488 
shrinksap3(SeqAlignPtr salp,Int4 offset)1489 static SeqAlignPtr LIBCALL shrinksap3 (SeqAlignPtr salp, Int4 offset)
1490 {
1491   DenseSegPtr dsp;
1492   Int4Ptr     dsptmp;
1493 
1494   dsp = (DenseSegPtr) salp->segs;
1495   dsptmp = dsp->lens;
1496   *dsptmp -= offset;
1497   return salp;
1498 }
1499 
SeqAlignExtend(SeqAlignPtr salp1,SeqAlignPtr salp2)1500 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignExtend (SeqAlignPtr salp1, SeqAlignPtr salp2)
1501 {
1502   SeqAlignPtr salptmp;
1503   SeqLocPtr   slp1, slp2,
1504               slp1b, slp2b;
1505   ValNodePtr  vnp1, vnp2,
1506               slpt1, slpt2;
1507   Int4        gaplen;
1508   Int4        gaplen1, stop, stopb;
1509   Int4        j;
1510   Uint1       choice;
1511   Boolean     goOn=TRUE;
1512 
1513   slpt1 = SeqLocListFromSeqAlign (salp1);
1514   while (goOn) {
1515      goOn = FALSE;
1516      for (salptmp = salp2; salptmp!=NULL; salptmp=salptmp->next) {
1517         slpt2 = SeqLocListFromSeqAlign (salptmp);
1518         choice = 0;
1519         for (vnp1=slpt1, vnp2=slpt2, j=0; vnp1!=NULL && vnp2!=NULL; j++) {
1520            slp1=(SeqLocPtr)vnp1->data.ptrvalue;
1521            slp2=(SeqLocPtr)vnp2->data.ptrvalue;
1522            gaplen1 = SeqLocStart(slp2) - SeqLocStop(slp1);
1523            if (gaplen1 ==0) {
1524               if (j==0) choice =1;
1525               else choice =2;
1526               break;
1527            }
1528            gaplen1 = SeqLocStart(slp1) - SeqLocStop(slp2);
1529            if (gaplen1 ==0 || gaplen1==1 || gaplen1==-1 || gaplen1==-2) {
1530               if (j==0) choice = 3;
1531               else choice =4;
1532               break;
1533            }
1534            vnp1=vnp1->next;
1535            vnp2=vnp2->next;
1536         }
1537         if (choice) {
1538            if (choice==1 || choice==3) {
1539               vnp1=vnp1->next;
1540               vnp2=vnp2->next;
1541            }
1542            else {
1543               vnp1=slpt1;
1544               vnp2=slpt2;
1545            }
1546            slp1b=(SeqLocPtr)vnp1->data.ptrvalue;
1547            slp2b=(SeqLocPtr)vnp2->data.ptrvalue;
1548            if (choice ==1 || choice == 2) {
1549               gaplen = SeqLocStart(slp2b) - SeqLocStop(slp1b) -1;
1550               if (gaplen >= 1) {
1551                  stop = SeqLocStop(slp1);
1552                  stopb = SeqLocStop(slp1b);
1553                  if (gaplen1==0) {
1554                     shrinksap5 (salptmp, 1);
1555                     gaplen-=1;
1556                  }
1557                  if (gaplen > 1) {
1558                     if (choice ==1) {
1559                        SeqAlignAddSeg(salp1, -1, stopb+1, gaplen);
1560                     }
1561                     else {
1562                        SeqAlignAddSeg(salp1,stop+1, -1, gaplen);
1563                     }
1564                  }
1565                  salp1 = SeqAlignMerge (salp1, salptmp, TRUE);
1566                  goOn=TRUE;
1567               }
1568            } else {
1569               gaplen = SeqLocStart(slp1b) - SeqLocStop(slp2b) -1;
1570               if (gaplen >= 1) {
1571                  stop = SeqLocStop(slp2);
1572                  stopb = SeqLocStop(slp2b);
1573                  if (gaplen1==0 || gaplen1==-1 || gaplen1==-2) {
1574                     gaplen1=ABS(gaplen1)+1;
1575                     shrinksap3 (salptmp, gaplen1);
1576                     stop-=gaplen1;
1577                     stopb-=gaplen1;
1578                     gaplen+=gaplen1;
1579                  }
1580                  if (gaplen > 1) {
1581                     if (choice ==3) {
1582                        SeqAlignAddSeg(salptmp,-1,stop+1, gaplen);
1583                     }
1584                     else {
1585                        SeqAlignAddSeg(salptmp, stopb+1,-1, gaplen);
1586                     }
1587                  }
1588                  salp1 = SeqAlignMerge (salp1, salptmp, FALSE);
1589                  goOn=TRUE;
1590               }
1591            }
1592            if (goOn) {
1593               ValNodeFreeType (&slpt1, TypeSeqLoc);
1594               slpt1 = SeqLocListFromSeqAlign (salp1);
1595               break;
1596            }
1597         }
1598         ValNodeFreeType (&slpt2, TypeSeqLoc);
1599      }
1600   }
1601   ValNodeFreeType (&slp1, TypeSeqLoc);
1602   return salp1;
1603 }
1604 
CleanStrandsSeqAlign(SeqAlignPtr salp)1605 NLM_EXTERN SeqAlignPtr LIBCALL CleanStrandsSeqAlign (SeqAlignPtr salp)
1606 {
1607   SeqAlignPtr salptmp;
1608   DenseSegPtr dsp;
1609   CompSegPtr  csp;
1610   Int4Ptr     lenp;
1611   Int4Ptr     startp;
1612   Uint1Ptr    strandp = NULL;
1613   Int4        numseg;
1614   Int2        dim;
1615   Int4        j, k, n, tmp;
1616   Boolean     retourne = FALSE;
1617   Boolean     delete_me = FALSE;
1618 
1619   for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
1620   {
1621      if (salptmp->segtype == COMPSEG)
1622      {
1623         csp=(CompSegPtr) salptmp->segs;
1624         strandp = csp->strands;
1625         numseg = csp->numseg;
1626         dim = csp->dim;
1627         lenp = csp->lens;
1628      }
1629      else if (salptmp->segtype == 2)
1630      {
1631         dsp = (DenseSegPtr) salptmp->segs;
1632         strandp = dsp->strands;
1633         numseg = dsp->numseg;
1634         dim = dsp->dim;
1635         lenp = dsp->lens;
1636         startp = dsp->starts;
1637      }
1638      if (strandp!=NULL) {
1639         for (j=0; j<numseg*dim; j++)
1640            if (*strandp==Seq_strand_minus)
1641               break;
1642         delete_me = (Boolean)(j==numseg*dim);
1643      }
1644      if (delete_me) {
1645         if (salptmp->segtype == COMPSEG)
1646            csp->strands = NULL;
1647         else if (salptmp->segtype == 2)
1648            dsp->strands = NULL;
1649         MemFree (strandp);
1650         return salp;
1651      }
1652      if (strandp!=NULL) {
1653         if (*strandp != Seq_strand_plus && *strandp!=Seq_strand_minus)
1654            for (j=0; j<numseg; j++, strandp+=dim) {
1655               if (*strandp == Seq_strand_plus || *strandp==Seq_strand_minus)
1656                  break;
1657            }
1658         if (strandp!=NULL)
1659            retourne = (Boolean) (*strandp == Seq_strand_minus);
1660      }
1661      if (retourne) {
1662         for (j=0; j < numseg*dim && strandp!=NULL; j++, strandp++)
1663         {
1664            if (*strandp == Seq_strand_minus)
1665               *strandp = Seq_strand_plus;
1666            else if (*strandp == Seq_strand_plus)
1667               *strandp = Seq_strand_minus;
1668         }
1669         for (j=0, k=numseg-1; j<numseg/2; j++, k--) {
1670            tmp=lenp[j];
1671            lenp[j]=lenp[k];
1672            lenp[k]=tmp;
1673         }
1674         for (j=0, k=(dim*numseg-dim); j<(dim*numseg-1)/2; j+=dim, k-=dim) {
1675            for (n=0; n<dim; n++) {
1676               tmp=startp[j+n];
1677               startp[j+n]=startp[k+n];
1678               startp[k+n]=tmp;
1679            }
1680         }
1681      }
1682   }
1683   return salp;
1684 }
1685 
1686 /*************************************************
1687 ***
1688 ***      LocalAlignToSeqAnnotDimn
1689 ***
1690 *************************************************/
LocalAlignToSeqAnnotDimn(ValNodePtr seqvnp,SeqIdPtr seqsip,ValNodePtr fromp,Int2 nbseq,Int4 lens,ValNodePtr strands,Boolean trunc_emptyends)1691 NLM_EXTERN SeqAnnotPtr LIBCALL LocalAlignToSeqAnnotDimn (ValNodePtr seqvnp, SeqIdPtr seqsip, ValNodePtr fromp, Int2 nbseq, Int4 lens, ValNodePtr strands, Boolean trunc_emptyends)
1692 {
1693   SeqAnnotPtr  sap;
1694   SeqAlignPtr  salp;
1695   DenseSegPtr  dsp;
1696   ValNodePtr   vnp;
1697   CharPtr      seqstr;
1698   Boolean PNTR filter;
1699   Boolean PNTR begun;
1700   Int4Ptr      from;
1701   Int4Ptr      lgseq;
1702   Int4Ptr      start;
1703   Uint1Ptr     strandp=NULL;
1704   Uint1        seq_strand;
1705   Int4         lenstmp, the_lens;
1706   Int4         j;
1707   Int2         seg, numseg;
1708   Int2         k;
1709   Boolean      nogap;
1710 
1711   for (k =0, vnp =seqvnp; k < nbseq && vnp !=NULL; k++, vnp =vnp->next)
1712   {
1713          seqstr = (CharPtr) vnp->data.ptrvalue;
1714          lenstmp = StringLen (seqstr);
1715          if (k==0)
1716             the_lens = lenstmp;
1717          else if (lenstmp != the_lens) {
1718             ErrPostEx (SEV_ERROR, 0, 0, "Sequence alignment of different lengths");
1719             return NULL;
1720          }
1721   }
1722   lens = the_lens;
1723 
1724          /*****************************
1725          ** count number of segments **
1726          *****************************/
1727   filter= (BoolPtr)MemNew ((size_t) ((nbseq + 1) * sizeof (Boolean)));
1728   j = 0;
1729   for (k =0, vnp =seqvnp; k < nbseq && vnp !=NULL; k++, vnp =vnp->next)
1730   {
1731          seqstr = (CharPtr) vnp->data.ptrvalue;
1732          filter [k] = (Boolean)( seqstr [j] != '-' );
1733   }
1734   numseg = 1;
1735   while ( j < lens )
1736   {
1737          seg = 0;
1738          for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1739          {
1740                 seqstr = (CharPtr) vnp->data.ptrvalue;
1741                 nogap = (Boolean) ( seqstr [j] != '-' );
1742                 if ( filter [k] != nogap ) {
1743                    seg++;
1744                    filter [k] = nogap;
1745                 }
1746          }
1747          if ( seg > 0 ) ++numseg;
1748          j++;
1749   }
1750          /********************************************
1751          ** allocate SeqAnnot + SeqAlign + DenseSeg  **
1752          *********************************************/
1753   sap = SeqAnnotNew ();
1754   if (sap == NULL) return NULL;
1755   sap->type = 2;
1756   salp = SeqAlignNew ();
1757   if (salp == NULL) return NULL;
1758   salp->type = 3;
1759   salp->segtype = 2;
1760   salp->dim = nbseq;
1761   sap->data = (Pointer) salp;
1762   dsp = DenseSegNew ();
1763   salp->segs = (Pointer) dsp;
1764   dsp->dim = nbseq;
1765   dsp->ids = SeqIdDupList (seqsip);
1766   dsp->numseg = numseg;
1767   dsp->starts = (Int4Ptr)MemNew((size_t)((nbseq *numseg + 4) * sizeof (Int4)));
1768   for (j = 0; j < nbseq *numseg + 4; j++)
1769      dsp->starts[j] = -1;
1770   dsp->lens   = (Int4Ptr) MemNew ((size_t) ((numseg + 2) * sizeof (Int4)));
1771   for (k = 0; k < numseg + 2; k++)
1772      dsp->lens[k] = 0;
1773   if (strands)
1774   {
1775      dsp->strands = (Uint1Ptr)MemNew((size_t) ((numseg*nbseq+4)*sizeof (Uint1)));
1776      strandp = dsp->strands;
1777      for (j = 0; j < numseg*nbseq ; j++, strandp++)
1778         *(strandp) = Seq_strand_unknown;
1779      strandp = dsp->strands;
1780      for (k=0; k<numseg; k++) {
1781         for (j = 0, vnp=strands; j < nbseq && vnp!=NULL; j++, vnp=vnp->next) {
1782            *strandp = (Uint1)vnp->data.intvalue;
1783            strandp++;
1784         }
1785      }
1786   }
1787   j = 0;
1788   for (k =0, vnp =seqvnp; k < nbseq && vnp !=NULL; k++, vnp =vnp->next)
1789   {
1790          seqstr = (CharPtr) vnp->data.ptrvalue;
1791          filter [k] = (Boolean)( seqstr [j] != '-' );
1792   }
1793   lenstmp= 0;
1794   numseg = 0;
1795   while ( j < lens )
1796   {
1797          seg = 0;
1798          for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1799          {
1800             seqstr = (CharPtr) vnp->data.ptrvalue;
1801             nogap = (Boolean) ( seqstr [j] != '-' );
1802             if ( filter [k] != nogap ) {
1803                seg++;
1804                filter [k] = nogap;
1805             }
1806          }
1807          if ( seg > 0 ) {
1808             dsp->lens[numseg] = lenstmp;
1809             ++numseg;
1810             lenstmp = 0;
1811          }
1812          lenstmp++;
1813          j++;
1814   }
1815   if (lenstmp > 0)
1816      dsp->lens[numseg] = lenstmp;
1817          /******************************
1818          ***  store the segments      **
1819          ******************************/
1820   lgseq = (Int4Ptr)MemNew ((size_t) ((nbseq + 1) * sizeof (Int4)));
1821   from = (Int4Ptr)MemNew ((size_t) ((nbseq + 1) * sizeof (Int4)));
1822   begun = (BoolPtr)MemNew ((size_t) ((nbseq + 1) * sizeof (Boolean)));
1823   if ( lgseq == NULL || from == NULL || begun == NULL )
1824      return NULL;
1825   for (k = 0; k < nbseq; k++)
1826      lgseq[k] = 0;
1827   if (fromp == NULL)
1828      for (k = 0; k < nbseq; k++) from [k] = 0;
1829   else {
1830      for (k=0, vnp=fromp; k<nbseq && vnp!=NULL; vnp=vnp->next, k++)
1831      {
1832         from [k] = (Int4)vnp->data.intvalue;
1833      }
1834   }
1835   for (k = 0; k < nbseq; k++)
1836      begun[k] = FALSE;
1837   start = dsp->starts;
1838   strandp = dsp->strands;
1839   j = 0;
1840   for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1841   {
1842      seqstr = (CharPtr) vnp->data.ptrvalue;
1843      filter [k] = (Boolean)( seqstr [j] != '-' );
1844      if ( filter [k] ) {
1845         start [k] = lgseq [k] + from [k];
1846         begun [k] = TRUE;
1847      }
1848   }
1849   j++;
1850   numseg = 0;
1851   while ( j < lens )
1852   {
1853          seg = 0;
1854          for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1855          {
1856                 seqstr = (CharPtr) vnp->data.ptrvalue;
1857                 nogap = ( seqstr [j] != '-' );
1858                 if  ( nogap && begun [k] )
1859                    ++lgseq [k];
1860                 if ( filter [k] != nogap ) {
1861                    seg++;
1862                    filter[k] = nogap;
1863                    begun [k] = TRUE;
1864                 }
1865          }
1866          if ( seg > 0 ) {
1867                 start += nbseq;
1868                 for (k = 0; k < nbseq; k++) {
1869                    if (filter[k]) {
1870                       seq_strand = Seq_strand_unknown;
1871                       if (strandp)
1872                          seq_strand = strandp[k];
1873                       if (seq_strand==Seq_strand_minus) {
1874                          start[k] = from[k] - lgseq[k];
1875                       } else {
1876                          start[k] = from[k] + lgseq[k];
1877                       }
1878                    }
1879                 }
1880                 ++numseg;
1881          }
1882          j++;
1883   }
1884   MemFree (filter);
1885   MemFree (lgseq);
1886   MemFree (from);
1887   MemFree (begun);
1888   if (trunc_emptyends && salp!=NULL) {
1889      nogap = TRUE;
1890      dsp = (DenseSegPtr) salp->segs;
1891      while (nogap && dsp->numseg>0) {
1892         start = dsp->starts;
1893         start += (dsp->dim * (dsp->numseg-1) );
1894         for (j=0; j < dsp->dim; j++, start++) {
1895            if (*start > -1)
1896               break;
1897         }
1898         if (j == dsp->dim)
1899            dsp->numseg --;
1900         else nogap = FALSE;
1901      }
1902   }
1903   if (strandp)
1904   {
1905      for (k=0, vnp=seqvnp; k <nbseq && vnp!=NULL; k++, vnp=vnp->next) {
1906         if (dsp->strands[k] == Seq_strand_minus) {
1907            start = dsp->starts;
1908            start += k;
1909            lgseq = dsp->lens;
1910            for (j=0; j<dsp->numseg; j++, lgseq++) {
1911               if (*start > -1) {
1912                  *start -= *lgseq;
1913               }
1914               start += nbseq;
1915            }
1916         }
1917      }
1918   }
1919   if (salp == NULL || dsp->numseg == 0) {
1920      sap = SeqAnnotFree (sap);
1921      return NULL;
1922   }
1923   if (salp == NULL) {
1924      sap = SeqAnnotFree (sap);
1925      return NULL;
1926   }
1927   return sap;
1928 }
1929 
1930 
1931 
SeqAlignIDCache(SeqAlignPtr salphead,SeqIdPtr sip)1932 NLM_EXTERN Boolean LIBCALL SeqAlignIDCache (SeqAlignPtr salphead, SeqIdPtr sip)
1933 {
1934   SeqAlignPtr salp;
1935   SeqIdPtr    tmpsip;
1936   Boolean     ok = FALSE;
1937 
1938   for (salp = salphead; salp!= NULL; salp=salp->next)
1939   {
1940      tmpsip =SeqIdPtrFromSeqAlign (salp);
1941      if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 1)
1942      {
1943         if (salp->segtype == 1 || salp->dim == 2)
1944         {
1945            salp->type = 0;
1946            ok = TRUE;
1947         }
1948         else if (salp->segtype == 2)
1949         {
1950            SeqAlignBioseqDeleteById (salp, sip);
1951            ok = TRUE;
1952         }
1953      }
1954   }
1955   return ok;
1956 }
1957 
SeqAlignIDUncache(SeqAlignPtr salphead,SeqIdPtr sip)1958 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignIDUncache (SeqAlignPtr salphead, SeqIdPtr sip)
1959 {
1960   SeqAlignPtr salp;
1961   SeqIdPtr    tmpsip;
1962 
1963   for (salp = salphead; salp!= NULL; salp=salp->next)
1964   {
1965      tmpsip =SeqIdPtrFromSeqAlign (salp);
1966      if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 1)
1967      {
1968         if (salp->segtype == 1 || salp->dim == 2)
1969         {
1970            salp->type = 3;
1971         }
1972      }
1973   }
1974   return salphead;
1975 }
1976 
SeqAlignIDUncacheAll(SeqAlignPtr salphead)1977 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignIDUncacheAll (SeqAlignPtr salphead)
1978 {
1979   SeqAlignPtr salp;
1980 
1981 
1982   for (salp = salphead; salp!= NULL; salp=salp->next)
1983   {
1984      if (salp->type < 1)
1985         if (salp->segtype == 1 || salp->dim == 2)
1986            salp->type = 3;
1987   }
1988   return salphead;
1989 }
1990 
1991 /* Find the segment where pos occurs for the nth segment.
1992  * If pos is not the start of the segment, cut the alignment
1993  * segment in two, with one of the segments with pos as the new start.
1994  */
CutAlignmentSegment(SeqAlignPtr salp,Int4 nth,Int4 pos)1995 static void CutAlignmentSegment(SeqAlignPtr salp, Int4 nth, Int4 pos)
1996 {
1997   DenseSegPtr dsp;
1998   Int4        seg_num, seg_start, k, j, first_len, second_len;
1999   Int4Ptr     new_starts, new_lens;
2000   Uint1Ptr    new_strands = NULL;
2001 
2002   if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL || nth < 1 || pos < 0) {
2003     return;
2004   }
2005 
2006   dsp = (DenseSegPtr) salp->segs;
2007   if (nth > dsp->dim) return;
2008 
2009   for (seg_num = 0; seg_num < dsp->numseg; seg_num++) {
2010     seg_start = dsp->starts[(seg_num * dsp->dim) + nth - 1];
2011     if (seg_start < 0) {
2012       continue;
2013     } else if (seg_start <= pos && seg_start + dsp->lens[seg_num] > pos) {
2014       /* found our segment */
2015       if (seg_start != pos) {
2016         /* only need to cut if pos is not already the start */
2017         new_starts = (Int4Ptr) MemNew (sizeof (Int4) * (dsp->numseg + 1) * dsp->dim);
2018         new_lens = (Int4Ptr) MemNew (sizeof(Int4) * (dsp->numseg + 1));
2019         if (dsp->strands != NULL) {
2020           new_strands = (Uint1Ptr) MemNew (sizeof (Uint1) * (dsp->numseg + 1) * dsp->dim);
2021         }
2022         /* copy before */
2023         for (k = 0; k < seg_num; k++) {
2024           for (j = 0; j < dsp->dim; j++) {
2025             new_starts[k * dsp->dim + j] = dsp->starts[k * dsp->dim + j];
2026             if (dsp->strands != NULL) {
2027               new_strands[k * dsp->dim + j] = dsp->strands[k * dsp->dim + j];
2028             }
2029           }
2030           new_lens[k] = dsp->lens[k];
2031         }
2032 
2033         if (dsp->strands == NULL || dsp->strands[seg_num * dsp->dim + nth - 1] != Seq_strand_minus) {
2034           first_len = pos - seg_start;
2035           second_len = dsp->lens[seg_num] - first_len;
2036         } else {
2037           second_len = pos - seg_start;
2038           first_len = dsp->lens[seg_num] - second_len;
2039         }
2040 
2041         /* split */
2042         for (j = 0; j < dsp->dim; j++) {
2043           /* set starts for split segments */
2044           if (dsp->starts[seg_num * dsp->dim + j] == -1) {
2045             new_starts[seg_num * dsp->dim + j] = -1;
2046             new_starts[(seg_num + 1) * dsp->dim + j] = -1;
2047           } else if (dsp->strands == NULL || dsp->strands[seg_num * dsp->dim + j] != Seq_strand_minus) {
2048             new_starts[seg_num * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j];
2049             new_starts[(seg_num + 1) * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j] + first_len;
2050           } else {
2051             new_starts[seg_num * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j] + second_len;
2052             new_starts[(seg_num + 1) * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j];
2053           }
2054           /* set strands for split segments */
2055           if (dsp->strands != NULL) {
2056             new_strands[seg_num * dsp->dim + j] = dsp->strands[seg_num * dsp->dim + j];
2057             new_strands[(seg_num + 1) * dsp->dim + j] = dsp->strands[seg_num * dsp->dim + j];
2058           }
2059         }
2060         /* set lens for split segments */
2061         new_lens[seg_num] = first_len;
2062         new_lens[seg_num + 1] = second_len;
2063 
2064         /* copy after */
2065         for (k = seg_num + 1; k < dsp->numseg; k++) {
2066           for (j = 0; j < dsp->dim; j++) {
2067             new_starts[(k + 1) * dsp->dim + j] = dsp->starts[k * dsp->dim + j];
2068             if (dsp->strands != NULL) {
2069               new_strands[(k + 1) * dsp->dim + j] = dsp->strands[k * dsp->dim + j];
2070             }
2071           }
2072           new_lens[(k + 1)] = dsp->lens[k];
2073         }
2074 
2075         /* replace in dsp */
2076         dsp->starts = MemFree (dsp->starts);
2077         dsp->starts = new_starts;
2078         dsp->lens = MemFree (dsp->lens);
2079         dsp->lens = new_lens;
2080         dsp->strands = MemFree (dsp->strands);
2081         dsp->strands = new_strands;
2082         /* can't fix scores */
2083         ScoreSetFree(dsp->scores);
2084         dsp->scores = NULL;
2085         /* now have one more segment */
2086         dsp->numseg++;
2087       }
2088       return;
2089     }
2090   }
2091 }
2092 
2093 /* A section of a sequence in an alignment has been deleted.  We need to adjust
2094  * the alignment to reflect the new positions of nucleotides that were previously
2095  * aligned to other sequences in the alignment.
2096  * We need to find the segments in which the start and stop of the deleted section
2097  * are found.  If the start and stop do not fall on segment boundaries, we need to
2098  * cut the segment at those boundaries.
2099  * Then we change the alignment segments that cover the portion of the sequence that
2100  * was deleted to be gaps for that sequence.
2101  * Finally, we decrease the start values for this sequence for any segment that started
2102  * after the cut by the length of the cut.
2103  */
SeqAlignDeleteByLoc(SeqLocPtr slp,SeqAlignPtr salp)2104 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDeleteByLoc (SeqLocPtr slp, SeqAlignPtr salp)
2105 {
2106   SeqIdPtr    sip;
2107   DenseSegPtr dsp;
2108   Int4        nth, pos, len, seg_index, seg_start;
2109   Int4        stop;
2110 
2111   if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL) {
2112     return NULL;
2113   }
2114 
2115   sip = SeqLocId(slp);
2116   dsp = (DenseSegPtr) salp->segs;
2117   if (dsp == NULL) {
2118          return salp;
2119   }
2120   nth = SeqIdOrderInBioseqIdList (sip, dsp->ids);
2121   if (nth == 0 || nth > dsp->dim) {
2122     return salp;
2123   }
2124 
2125   pos = SeqLocStart (slp);
2126   stop = SeqLocStop (slp);
2127   if (stop < pos) {
2128     len = pos - stop + 1;
2129     pos = stop;
2130   } else {
2131     len = stop - pos + 1;
2132   }
2133 
2134   CutAlignmentSegment(salp, nth, pos);
2135   CutAlignmentSegment(salp, nth, pos + len);
2136 
2137   /* change deleted sequence segment start values */
2138   for (seg_index = 0; seg_index < dsp->numseg; seg_index++) {
2139     seg_start = dsp->starts[dsp->dim * seg_index + nth - 1];
2140     if (seg_start < 0) {
2141       /* gap, no change */
2142       continue;
2143     } else if (seg_start < pos) {
2144       /* before cut, no change */
2145     } else if (seg_start >= pos && seg_start + dsp->lens[seg_index] <= pos + len) {
2146       /* in gap */
2147       dsp->starts[dsp->dim * seg_index + nth - 1] = -1;
2148     } else {
2149       /* after cut */
2150       dsp->starts[dsp->dim * seg_index + nth - 1] -= len;
2151     }
2152   }
2153 
2154   return salp;
2155 }
2156 
AreDenseSegSegmentsValid(DenseSegPtr dsp,Int4 start,Int4 num)2157 static Boolean AreDenseSegSegmentsValid (DenseSegPtr dsp, Int4 start, Int4 num)
2158 {
2159   Int4 k, seg_num, next_pos;
2160 
2161   if (dsp == NULL || start < 0 || num < 1)
2162   {
2163     return FALSE;
2164   }
2165 
2166   for (k = 0; k < dsp->dim; k++)
2167   {
2168     if (dsp->strands == NULL || dsp->strands[k] == Seq_strand_plus)
2169     {
2170       if(dsp->starts [dsp->dim * start + k] > -1)
2171       {
2172         next_pos = dsp->starts [dsp->dim * start + k] + dsp->lens[start];
2173       }
2174       else
2175       {
2176         next_pos = -1;
2177       }
2178       for (seg_num = start + 1; seg_num - start < num; seg_num++)
2179       {
2180         if (dsp->starts[dsp->dim * seg_num + k] == -1)
2181         {
2182           continue;
2183         }
2184         if (next_pos != -1)
2185         {
2186           if (dsp->starts[dsp->dim * seg_num + k] != next_pos)
2187           {
2188             return FALSE;
2189           }
2190         }
2191         next_pos = dsp->starts[dsp->dim * seg_num + k] + dsp->lens[seg_num];
2192       }
2193     }
2194     else
2195     {
2196       if (dsp->starts [dsp->dim * (start + num - 1) + k] > -1)
2197       {
2198         next_pos = dsp->starts [dsp->dim * (start + num - 1) + k] + dsp->lens [start + num - 1];
2199       }
2200       else
2201       {
2202         next_pos = -1;
2203       }
2204       for (seg_num = start + num - 2; seg_num >= start; seg_num--)
2205       {
2206         if (dsp->starts [dsp->dim * seg_num + k] == -1)
2207         {
2208           continue;
2209         }
2210         if (next_pos != -1)
2211         {
2212           if (dsp->starts[dsp->dim * seg_num + k] != next_pos)
2213           {
2214             return FALSE;
2215           }
2216         }
2217         next_pos = dsp->starts[dsp->dim * seg_num + k] + dsp->lens[seg_num];
2218       }
2219     }
2220 
2221   }
2222 
2223   return TRUE;
2224 }
2225 
2226 
2227 static void
FillInPlusStrandInsertionSegmentA(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2228 FillInPlusStrandInsertionSegmentA
2229 (DenseSegPtr dsp_orig,
2230  DenseSegPtr dsp_new,
2231  Int4        insert_start,
2232  Int4        insert_len,
2233  Int4        insert_row,
2234  Int4        first_len,
2235  Int4        second_len,
2236  Int4        orig_segment,
2237  Int4Ptr     this_seg)
2238 {
2239   Int4 k;
2240 
2241   if (dsp_orig == NULL || dsp_new == NULL
2242       || insert_start < 0 || insert_len < 0
2243       || first_len < 0
2244       || second_len < 0
2245       || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2246       || this_seg == NULL
2247       || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2248   {
2249     return;
2250   }
2251 
2252   if (first_len == 0)
2253   {
2254     return;
2255   }
2256 
2257   for (k = 0; k < dsp_orig->dim; k++)
2258   {
2259     dsp_new->starts[(*this_seg) * dsp_new->dim + k]
2260         = dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2261     if (dsp_orig->strands != NULL)
2262     {
2263       dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2264         = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2265       if (dsp_orig->strands[orig_segment * dsp_orig->dim + k] == Seq_strand_minus
2266           && dsp_new->starts[(*this_seg) * dsp_new->dim + k] > -1)
2267       {
2268         dsp_new->starts[(*this_seg) * dsp_new->dim + k] += second_len;
2269       }
2270     }
2271   }
2272 
2273   dsp_new->lens[*this_seg] = first_len;
2274   (*this_seg)++;
2275 }
2276 
2277 
2278 static void
FillInInsertionSegmentB(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2279 FillInInsertionSegmentB
2280 (DenseSegPtr dsp_orig,
2281  DenseSegPtr dsp_new,
2282  Int4        insert_start,
2283  Int4        insert_len,
2284  Int4        insert_row,
2285  Int4        first_len,
2286  Int4        second_len,
2287  Int4        orig_segment,
2288  Int4Ptr     this_seg)
2289 {
2290   Int4 k;
2291 
2292   if (dsp_orig == NULL || dsp_new == NULL
2293       || insert_start < 0 || insert_len < 0
2294       || first_len < 0
2295       || second_len < 0
2296       || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2297       || this_seg == NULL
2298       || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2299   {
2300     return;
2301   }
2302 
2303   if (insert_len == 0)
2304   {
2305     return;
2306   }
2307 
2308   for (k = 0; k < dsp_orig->dim; k++)
2309   {
2310     dsp_new->starts[(*this_seg) * dsp_new->dim + k] = -1;
2311     if (dsp_orig->strands != NULL)
2312     {
2313       dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2314         = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2315     }
2316   }
2317   dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] = insert_start;
2318 
2319   dsp_new->lens[*this_seg] = insert_len;
2320   (*this_seg)++;
2321 }
2322 
FillInPlusStrandInsertionSegmentC(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2323 static void FillInPlusStrandInsertionSegmentC
2324 (DenseSegPtr dsp_orig,
2325  DenseSegPtr dsp_new,
2326  Int4        insert_start,
2327  Int4        insert_len,
2328  Int4        insert_row,
2329  Int4        first_len,
2330  Int4        second_len,
2331  Int4        orig_segment,
2332  Int4Ptr     this_seg)
2333 {
2334   Int4 k;
2335 
2336   if (dsp_orig == NULL || dsp_new == NULL
2337       || insert_start < 0 || insert_len < 0
2338       || first_len < 0
2339       || second_len < 0
2340       || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2341       || this_seg == NULL
2342       || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2343   {
2344     return;
2345   }
2346 
2347   if (second_len == 0)
2348   {
2349     return;
2350   }
2351 
2352   for (k = 0; k < dsp_orig->dim; k++)
2353   {
2354     if ((dsp_orig->strands == NULL
2355         || dsp_orig->strands[orig_segment * dsp_new->dim + k] != Seq_strand_minus)
2356         && dsp_new->starts[(*this_seg) * dsp_new->dim + k] > -1)
2357     {
2358       dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
2359         dsp_orig->starts[orig_segment * dsp_orig->dim + k] + first_len;
2360     }
2361     else
2362     {
2363       dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
2364         dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2365     }
2366 
2367     if (dsp_orig->strands != NULL)
2368     {
2369       dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2370         = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2371     }
2372   }
2373   dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] += insert_len;
2374 
2375   dsp_new->lens[*this_seg] = second_len;
2376   (*this_seg)++;
2377 }
2378 
2379 
2380 static void
FillInMinusStrandInsertionSegmentA(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2381 FillInMinusStrandInsertionSegmentA
2382 (DenseSegPtr dsp_orig,
2383  DenseSegPtr dsp_new,
2384  Int4        insert_start,
2385  Int4        insert_len,
2386  Int4        insert_row,
2387  Int4        first_len,
2388  Int4        second_len,
2389  Int4        orig_segment,
2390  Int4Ptr     this_seg)
2391 {
2392   Int4 k;
2393 
2394   if (dsp_orig == NULL || dsp_new == NULL
2395       || insert_start < 0 || insert_len < 0
2396       || first_len < 0
2397       || second_len < 0
2398       || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2399       || this_seg == NULL
2400       || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2401   {
2402     return;
2403   }
2404 
2405   if (first_len == 0)
2406   {
2407     return;
2408   }
2409 
2410   for (k = 0; k < dsp_orig->dim; k++)
2411   {
2412     dsp_new->starts[(*this_seg) * dsp_new->dim + k]
2413            = dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2414     if (dsp_orig->strands != NULL)
2415     {
2416       dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2417         = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2418       if (dsp_orig->strands[orig_segment * dsp_orig->dim + k] == Seq_strand_minus
2419           && dsp_new->starts[(*this_seg) * dsp_new->dim + k] != -1)
2420       {
2421         dsp_new->starts[(*this_seg) * dsp_new->dim + k] += second_len;
2422       }
2423     }
2424   }
2425 
2426   dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] += insert_len;
2427 
2428   dsp_new->lens[*this_seg] = first_len;
2429   (*this_seg)++;
2430 }
2431 
FillInMinusStrandInsertionSegmentC(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2432 static void FillInMinusStrandInsertionSegmentC
2433 (DenseSegPtr dsp_orig,
2434  DenseSegPtr dsp_new,
2435  Int4        insert_start,
2436  Int4        insert_len,
2437  Int4        insert_row,
2438  Int4        first_len,
2439  Int4        second_len,
2440  Int4        orig_segment,
2441  Int4Ptr     this_seg)
2442 {
2443   Int4 k;
2444 
2445   if (dsp_orig == NULL || dsp_new == NULL
2446       || insert_start < 0 || insert_len < 0
2447       || first_len < 0
2448       || second_len < 0
2449       || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2450       || this_seg == NULL
2451       || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2452   {
2453     return;
2454   }
2455 
2456   if (second_len == 0)
2457   {
2458     return;
2459   }
2460 
2461   for (k = 0; k < dsp_orig->dim; k++)
2462   {
2463     dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
2464         dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2465 
2466     if ((dsp_orig->strands == NULL
2467         || dsp_orig->strands[orig_segment * dsp_orig->dim + k] != Seq_strand_minus)
2468         && dsp_new->starts[(*this_seg) * dsp_new->dim + k] != -1)
2469     {
2470       dsp_new->starts[(*this_seg) * dsp_new->dim + k] += first_len;
2471     }
2472 
2473     if (dsp_orig->strands != NULL)
2474     {
2475       dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2476         = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2477     }
2478   }
2479 
2480   dsp_new->lens[*this_seg] = second_len;
2481   (*this_seg)++;
2482 }
2483 
2484 static void
InsertInSegment(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 orig_segment,Int4Ptr this_segment)2485 InsertInSegment
2486 (DenseSegPtr dsp_orig,
2487  DenseSegPtr dsp_new,
2488  Int4        insert_start,
2489  Int4        insert_len,
2490  Int4        insert_row,
2491  Int4        orig_segment,
2492  Int4Ptr     this_segment)
2493 {
2494   /* The original segment needs to be replaced by either one or two segments
2495    * in the new alignment.
2496    * Call segment O the segment that contains insert_start.
2497    * If insert_start == the start of segment O, only one additional segment
2498    * will be needed, otherwise allocate space for two extra segments.
2499    * If insert_row is a plus row:
2500    *    If insert_start == the start of segment O,
2501    *              the gap segment will be inserted immediately before
2502    *              segment O.  For insert_row, all starts
2503    *              for segment O and beyond will be increased by insert_len.
2504    *    Otherwise, segment O will be replaced by segment (A) (a truncated
2505                   version of segment O), a gap segment (B) will be
2506    *              inserted after A, and a third segment (C) will be inserted
2507    *              after segment B.
2508    *    Call first_len = insert_start - start of segment O on insert_row
2509    *    Call second_len = length of segment O on insert_row - first_len
2510    *    The length of the segment A will be first_len.
2511    *    The start of segment A for all plus strand rows will be
2512    *              the start of segment O.
2513    *    The start of segment A for all minus strand rows will be
2514    *              the start of segment O + second_len.
2515    *    The start of segment B for insert_row will be insert_start,
2516    *    The start of segment B for all other rows in the gap segment will be -1.
2517    *    The length of segment B will be insert_len.
2518    *    For insert_row, the start of segment C will be
2519    *              the start of segment O + first_len + insert_len.
2520    *    For all remaining plus strand rows, the start of segment C
2521    *              will be the start of segment O + first_len.
2522    *    For all minus strand rows, the start of segment C will be
2523    *              be the start of segment O.
2524    *    The length of segment C will be second_len.
2525    * If insert_row is a minus row:
2526    *    If insert_start == the start of segment O,
2527    *                       the gap segment will be inserted immediately after
2528    *                       segment O and all of the starts for insert_row
2529    *                       before segment O will be increased by insert_len.
2530    *    Otherwise, segment O will be replaced by segment A, a gap segment (B),
2531    *               and segment C.
2532    *    Call first_len = start of segment O on insert_row + length of segment O on insert_row - insert_start
2533    *    Call second_len = insert_start - start of segment O on insert_row
2534    *    The length of segment A will be first_len.
2535    *    For every plus strand row, the start of segment A will be the start of
2536    *               segment O.
2537    *    For insert_row, the start of segment A will be the start of segment O + second_len + insert_len.
2538    *    For every other minus strand row, the start of segment A will be the
2539    *               the start of segment O + second_len.
2540    *    The length of segment B will be insert_len.
2541    *    For insert_row, the start of segment B will be insert_start.
2542    *    For every other row, the start of segment B will be -1.
2543    *    The length of segment C will be second_len.
2544    *    For every minus row, the start of segment C will be the start of segment O.
2545    *    For every plus row, the start of segment C will be the start of segment O + first_len.
2546    *    For insert_row, the start of every segment prior to segment O will be increased
2547    *               by insert_len.
2548    */
2549 
2550    Int4 first_len, second_len;
2551 
2552    if (dsp_orig->strands != NULL && dsp_orig->strands[insert_row] == Seq_strand_minus)
2553    {
2554      first_len = dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row]
2555                     + dsp_orig->lens [orig_segment] - insert_start;
2556      second_len = insert_start - dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row];
2557      FillInMinusStrandInsertionSegmentA(dsp_orig, dsp_new, insert_start, insert_len,
2558                                        insert_row, first_len, second_len,
2559                                        orig_segment, this_segment);
2560      FillInInsertionSegmentB(dsp_orig, dsp_new, insert_start, insert_len,
2561                                        insert_row, first_len, second_len,
2562                                        orig_segment, this_segment);
2563      FillInMinusStrandInsertionSegmentC(dsp_orig, dsp_new, insert_start, insert_len,
2564                                        insert_row, first_len, second_len,
2565                                        orig_segment, this_segment);
2566    }
2567    else
2568    {
2569      first_len = insert_start - dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row];
2570      second_len = dsp_orig->lens[orig_segment] - first_len;
2571      FillInPlusStrandInsertionSegmentA(dsp_orig, dsp_new, insert_start, insert_len,
2572                                        insert_row, first_len, second_len,
2573                                        orig_segment, this_segment);
2574      FillInInsertionSegmentB(dsp_orig, dsp_new, insert_start, insert_len,
2575                                        insert_row, first_len, second_len,
2576                                        orig_segment, this_segment);
2577      FillInPlusStrandInsertionSegmentC(dsp_orig, dsp_new, insert_start, insert_len,
2578                                        insert_row, first_len, second_len,
2579                                        orig_segment, this_segment);
2580    }
2581 }
2582 
2583 static Int4
FindSegmentForInsertPoint(DenseSegPtr dsp,Int4 insert_start,Int4 insert_row,Uint1 insert_strand)2584 FindSegmentForInsertPoint
2585 (DenseSegPtr dsp,
2586  Int4        insert_start,
2587  Int4        insert_row,
2588  Uint1       insert_strand)
2589 {
2590   Int4    insert_segment = -1, k = 0;
2591 
2592   if (dsp == NULL || insert_start < 0
2593       || insert_row < 0 || insert_row >= dsp->dim)
2594   {
2595     return -1;
2596   }
2597 
2598   if (insert_strand == Seq_strand_minus)
2599   {
2600     while (k < dsp->numseg && insert_segment == -1)
2601     {
2602       if (dsp->starts [k * dsp->dim + insert_row] != -1
2603           && dsp->starts [k * dsp->dim + insert_row] <= insert_start
2604           && dsp->starts [k * dsp->dim + insert_row] + dsp->lens[k] > insert_start)
2605       {
2606         insert_segment = k;
2607       }
2608       k++;
2609     }
2610   }
2611   else
2612   {
2613     while (k < dsp->numseg && insert_segment == -1)
2614     {
2615       if (dsp->starts [k * dsp->dim + insert_row] != -1
2616           && dsp->starts [dsp->dim * k + insert_row] <= insert_start
2617           && dsp->starts [dsp->dim * k + insert_row] + dsp->lens [k] > insert_start)
2618       {
2619         insert_segment = k;
2620       }
2621       k++;
2622     }
2623   }
2624   return insert_segment;
2625 }
2626 
2627 static void
CopyDensegSegments(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 start_seg,Int4 copy_seg,Int4 num_to_copy)2628 CopyDensegSegments
2629 (DenseSegPtr dsp_orig,
2630  DenseSegPtr dsp_new,
2631  Int4 start_seg,
2632  Int4 copy_seg,
2633  Int4 num_to_copy)
2634 {
2635   Int4 num_copied = 0, k;
2636 
2637   if (dsp_orig == NULL || dsp_new == NULL)
2638   {
2639     return;
2640   }
2641 
2642   while (start_seg < dsp_orig->numseg && copy_seg < dsp_new->numseg
2643          && num_copied < num_to_copy)
2644   {
2645     if (start_seg >= 0 && copy_seg >= 0)
2646     {
2647       for (k = 0; k < dsp_orig->dim && k < dsp_new->dim; k++)
2648       {
2649         dsp_new->starts [copy_seg * dsp_new->dim + k]
2650              = dsp_orig->starts[start_seg * dsp_orig->dim + k];
2651         if (dsp_orig->strands != NULL && dsp_new->strands != NULL)
2652         {
2653           dsp_new->strands[copy_seg * dsp_new->dim + k]
2654              = dsp_orig->strands[start_seg * dsp_orig->dim + k];
2655         }
2656       }
2657       dsp_new->lens [copy_seg] = dsp_orig->lens[start_seg];
2658       num_copied++;
2659     }
2660     start_seg ++;
2661     copy_seg ++;
2662   }
2663 }
2664 
2665 
2666 /**************************************************
2667 ***
2668 ***************************************************/
SeqAlignInsertByLoc(SeqLocPtr slp,SeqAlignPtr salp)2669 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignInsertByLoc (SeqLocPtr slp, SeqAlignPtr salp)
2670 {
2671   SeqIdPtr    sip;
2672   DenseSegPtr dsp, dsp_new;
2673   Int4        from, start;
2674   Int2        j;
2675   Int2        index;
2676   Int4        insert_len;
2677   Int4        extra_segs;
2678   Uint1       insert_strand;
2679   Int4        insert_seg;
2680   Int4        orig_segment;
2681 
2682   if (salp == NULL || salp->segtype != SAS_DENSEG)
2683      return salp;
2684   sip = SeqLocId(slp);
2685   insert_len = SeqLocLen (slp);
2686   dsp = (DenseSegPtr) salp->segs;
2687   if (dsp == NULL) {
2688      return salp;
2689   }
2690 
2691   index = SeqIdOrderInBioseqIdList (sip, dsp->ids);
2692   if (index == 0) {
2693      /* bioseq not in alignment */
2694      return salp;
2695   }
2696   index -= 1;
2697   insert_strand = SeqAlignStrand (salp, index);
2698 
2699   if (insert_strand == Seq_strand_minus)
2700   {
2701     from = SeqAlignStop (salp, index);
2702   }
2703   else
2704   {
2705     from = SeqAlignStart(salp, index);
2706   }
2707   start = SeqLocStart (slp);
2708   if (start <= from)
2709   {
2710     /* just adjust the starts */
2711     for (j = 0; j < dsp->numseg; j++)
2712     {
2713       if (dsp->starts [dsp->dim * j + index] > -1)
2714       {
2715         dsp->starts [dsp->dim * j + index] += insert_len;
2716       }
2717     }
2718   }
2719   else
2720   {
2721     /* need to insert gap of length insert_len at start */
2722     /* first, find affected segment */
2723     insert_seg = FindSegmentForInsertPoint (dsp, start, index, insert_strand);
2724     if (insert_seg < 0 || insert_seg > dsp->numseg)
2725     {
2726       return salp;
2727     }
2728 
2729     if (dsp->starts[dsp->dim * insert_seg + index] == start)
2730     {
2731       extra_segs = 1;
2732     }
2733     else
2734     {
2735       extra_segs = 2;
2736     }
2737 
2738     dsp_new = (DenseSegPtr) MemNew (sizeof (DenseSeg));
2739     dsp_new->dim = dsp->dim;
2740     dsp_new->numseg = dsp->numseg + extra_segs;
2741     dsp_new->starts = (Int4Ptr) MemNew (dsp->dim * (dsp->numseg + extra_segs) * sizeof (Int4));
2742     if (dsp->strands != NULL)
2743     {
2744       dsp_new->strands = (Uint1Ptr) MemNew (dsp->dim * (dsp->numseg + extra_segs) * sizeof (Uint1));
2745     }
2746     dsp_new->lens = (Int4Ptr) MemNew ((dsp->numseg + extra_segs) * sizeof (Int4));
2747 
2748     /* copy alignment up to point of insertion */
2749     CopyDensegSegments (dsp, dsp_new, 0, 0, insert_seg);
2750 
2751     /* adjust starts in insert_row before insert_seg if insert_row on minus strand */
2752     if (insert_strand == Seq_strand_minus)
2753     {
2754       for (j = 0; j < insert_seg; j++)
2755       {
2756         if (dsp_new->starts[dsp_new->dim * j + index] != -1)
2757         {
2758           dsp_new->starts[dsp_new->dim * j + index] += insert_len;
2759         }
2760       }
2761     }
2762 
2763     /* create gap */
2764     orig_segment = insert_seg;
2765     InsertInSegment (dsp, dsp_new, start, insert_len, index, orig_segment, &insert_seg);
2766 
2767     /* Copy after insertion point */
2768     CopyDensegSegments (dsp, dsp_new, orig_segment + 1, insert_seg, dsp->numseg - orig_segment);
2769 
2770     /* Adjust starts in insert row after insert_seg if insert_row on plus strand */
2771     if (insert_strand == Seq_strand_plus)
2772     {
2773       while (insert_seg < dsp_new->numseg)
2774       {
2775         if (dsp_new->starts[dsp_new->dim * insert_seg + index] != -1)
2776         {
2777           dsp_new->starts[dsp_new->dim * insert_seg + index] += insert_len;
2778         }
2779         insert_seg++;
2780       }
2781     }
2782 
2783     /* replace in old DenseSeg */
2784     dsp->starts = MemFree (dsp->starts);
2785     dsp->starts = dsp_new->starts;
2786     dsp_new->starts = NULL;
2787     dsp->strands = MemFree (dsp->strands);
2788     dsp->strands = dsp_new->strands;
2789     dsp_new->strands = NULL;
2790     dsp->lens = MemFree (dsp->lens);
2791     dsp->lens = dsp_new->lens;
2792     dsp_new->lens = NULL;
2793     dsp->numseg = dsp_new->numseg;
2794 
2795   }
2796 
2797   return salp;
2798 }
2799 
2800 
2801 /*******************************************
2802 ***
2803 ***   DeleteRegion
2804 ***  !@!!!!!!!!!!!!!!!!!!!!!!! > CompSegPtr
2805 ********************************************/
DeleteRegion(SeqIntPtr sip,SeqAlignPtr salp)2806 NLM_EXTERN SeqAlignPtr LIBCALL DeleteRegion (SeqIntPtr sip, SeqAlignPtr salp)
2807 {
2808   CompSegPtr  dsp;
2809   SeqAlignPtr salp1 =  NULL;
2810   BoolPtr     dspstart = NULL;
2811   Int4Ptr     dsplens = NULL;
2812   Int4        delete_from;
2813   Int4        delete_to;
2814   Int2        numseg_before = 0;
2815   Int2        subseglens = 0;
2816   Int4        sumseglens = 0;
2817   Boolean     seen = FALSE;
2818 
2819   if ( sip == NULL ) return salp;
2820   delete_from = sip->from;
2821   delete_to = sip->to;
2822 
2823         /*****************************************
2824          *** copy salp(s) until delete_from
2825          *****************************************/
2826   if ( (dsp = (CompSegPtr) salp->segs ) == NULL) {
2827          return NULL;
2828   }
2829   dspstart = dsp->starts;
2830   dsplens = dsp->lens;
2831   while ( !seen )
2832   {
2833          if ( !(seen = locate_in_seqalign (delete_from, dsp->dim, dsp->numseg, &dspstart, &dsplens, &numseg_before, &subseglens, &sumseglens)) )
2834          {
2835                 salp1 = SeqAlignDupAdd (&salp1, salp, 0, 0, 0);
2836                 if ( salp->next == NULL ) break;
2837                 else {
2838                       salp = salp->next;
2839                       dsp = (CompSegPtr) salp->segs;
2840                       dspstart = dsp->starts;
2841                       dsplens = dsp->lens;
2842                 }
2843          }
2844   }
2845   if ( !seen ) {
2846     return NULL;
2847   }
2848   salp1 = SeqAlignDupAdd (&salp1, salp, numseg_before, subseglens, TRUE);
2849         /*****************************************
2850          *** delete salp until delete_to
2851          *****************************************/
2852   seen = FALSE;
2853   while ( !seen )
2854   {
2855          if ( !(seen = locate_in_seqalign (delete_to, dsp->dim, dsp->numseg, &dspstart, &dsplens, &numseg_before, &subseglens, &sumseglens)) )
2856          {
2857                 if ( salp->next == NULL ) break;
2858                 else {
2859                       salp = salp->next;
2860                       dsp = (CompSegPtr) salp->segs;
2861                       dspstart = dsp->starts;
2862                       dsplens = dsp->lens;
2863                 }
2864          }
2865   }
2866   if ( !seen ) {
2867          return NULL;
2868   }
2869         /*****************************************
2870          *** copy salp from delete_to to the end
2871          *****************************************/
2872   salp1 = SeqAlignDupAdd (&salp1, salp, numseg_before, subseglens, FALSE);
2873   if ( ( salp = salp->next) != NULL )
2874      while ( salp != NULL )
2875      {
2876         salp1 = SeqAlignDupAdd (&salp1, salp, 0, 0, 0);
2877         salp = salp->next;
2878      }
2879   return salp1;
2880 }
2881 
2882 /*********************************************************
2883 ***
2884 ***  DenseDiagPtr procedures
2885 ***
2886 **********************************************************/
2887 
DenseDiagToDenseSegFunc(SeqAlignPtr salp,Boolean add_ends)2888 NLM_EXTERN SeqAlignPtr LIBCALL DenseDiagToDenseSegFunc (SeqAlignPtr salp, Boolean add_ends)
2889 {
2890   BioseqPtr    bsp;
2891   SeqAlignPtr  newsalp;
2892   DenseSegPtr  dsp;
2893   DenseDiagPtr ddp;
2894   DenseDiagPtr nextddp;
2895   ValNodePtr   head;
2896   ValNodePtr   vnp;
2897   SeqIdPtr     sip1, sip2;
2898   Int4Ptr      ddp_starts,
2899                nextddp_starts,
2900                dspstart;
2901   Int4Ptr      dsplen;
2902   Int4         minstart, laststart;
2903   Int4         ddp_st1, ddp_st2;
2904   Int4         nextddp_st1, nextddp_st2;
2905   Int4         ddlen;
2906   Int4         interlen1, interlen2;
2907   Int4         intermax, intermin;
2908   Int4         j;
2909   Int4         start1 = 0, start2 = 0,
2910                stop1 = 0, stop2 = 0;
2911   Int4         slpstart1 = 0, slpstart2 = 0,
2912                slpstop1 = 0, slpstop2 = 0;
2913   Uint1        strand1, strand2;
2914   Uint1Ptr     strandp=NULL;
2915   Int2         numseg;
2916   Boolean      delete_me;
2917 
2918   newsalp = SeqAlignNew ();
2919   newsalp->type = 3;
2920   newsalp->segtype = 2;
2921   newsalp->dim = 2;
2922   dsp = DenseSegNew ();
2923   newsalp->segs = (Pointer) dsp;
2924   dsp->dim = 2;
2925 
2926   ddp =(DenseDiagPtr)salp->segs;
2927   dsp->ids = SeqIdDupList (ddp->id);
2928 
2929   numseg = 0;
2930   for (ddp =(DenseDiagPtr)salp->segs; ddp != NULL; ddp = ddp->next)
2931      numseg++;
2932   numseg = numseg *3 -2;
2933 
2934   head = NULL;
2935   laststart = -10;
2936   for (j=0; j<numseg; j++) {
2937      minstart = INT4_MAX;
2938      nextddp = NULL;
2939      for (ddp = (DenseDiagPtr)salp->segs; ddp!=NULL; ddp=ddp->next) {
2940         if (laststart < *(ddp->starts) && minstart > *(ddp->starts)) {
2941            minstart = *(ddp->starts);
2942            nextddp = ddp;
2943         }
2944      }
2945      if (nextddp!=NULL) {
2946         ValNodeAddPointer(&head, 0, nextddp);
2947         laststart = *(nextddp->starts);
2948      }
2949   }
2950   if (head==NULL)
2951      return NULL;
2952   for (vnp=head; vnp!=NULL; vnp=vnp->next) {
2953      ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2954      ddp->next = NULL;
2955   }
2956   salp->segs = (Pointer) head->data.ptrvalue;
2957   head->data.ptrvalue = NULL;
2958   vnp = head->next;
2959   nextddp = (DenseDiagPtr) salp->segs;
2960   for (; vnp!=NULL; vnp=vnp->next) {
2961      ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2962      nextddp->next = ddp;
2963      nextddp = nextddp->next;
2964      vnp->data.ptrvalue = NULL;
2965   }
2966   head = ValNodeFree (head);
2967   ddp = (DenseDiagPtr) salp->segs;
2968   ddlen = ddp->len;
2969   ddp_starts = ddp->starts;
2970   ddp_st1 = *ddp_starts;
2971   ddp_starts++;
2972   ddp_st2 = *ddp_starts;
2973   ddp_starts++;
2974   while (ddp!=NULL) {
2975      delete_me=FALSE;
2976      if (ddp->next != NULL)
2977      {
2978         nextddp = ddp->next;
2979         nextddp_starts = nextddp->starts;
2980         nextddp_st1 = *nextddp_starts;
2981         nextddp_starts++;
2982         nextddp_st2 = *nextddp_starts;
2983         interlen1 = nextddp_st1 - ddp_st1 - ddlen;
2984         interlen2 = nextddp_st2 - ddp_st2 - ddlen;
2985         if (interlen1 < 0 || interlen2 < 0) {
2986            if (interlen1 < 0 && interlen2 < 0) {
2987               ddp->next = nextddp->next;
2988               nextddp->next = NULL;
2989               DenseDiagFree(nextddp);
2990               delete_me=TRUE;
2991            }
2992            else if (interlen1 < 0) {
2993               if (ABS(interlen1) >= ddlen) {
2994                 ddp->next = nextddp->next;
2995                 nextddp->next = NULL;
2996                 DenseDiagFree(nextddp);
2997                 delete_me=TRUE;
2998               }
2999            }
3000            else if (interlen2 < 0) {
3001               if (ABS(interlen2) >= ddlen) {
3002                 ddp->next = nextddp->next;
3003                 nextddp->next = NULL;
3004                 DenseDiagFree(nextddp);
3005                 delete_me=TRUE;
3006               }
3007            }
3008         }
3009      }
3010      if (!delete_me) {
3011         ddp = ddp->next;
3012         if (ddp != NULL)
3013         {
3014          ddlen = ddp->len;
3015          ddp_starts = ddp->starts;
3016          ddp_st1 = *ddp_starts;
3017          ddp_starts++;
3018          ddp_st2 = *ddp_starts;
3019          ddp_starts++;
3020         }
3021      }
3022   }
3023   dsp->starts = (Int4Ptr) MemNew ((size_t) ((2*numseg + 4) * sizeof (Int4)));
3024   dsp->lens  = (Int4Ptr) MemNew ((size_t) ((numseg + 2) * sizeof (Int4)));
3025   for (j = 0; j < 2*numseg + 4; j++) dsp->starts [j] = -1;
3026   for (j = 0; j < numseg + 2; j++)   dsp->lens [j] = 0;
3027   dspstart = dsp->starts;
3028   dsplen = dsp->lens;
3029 
3030   ddp =(DenseDiagPtr)salp->segs;
3031   ddlen = ddp->len;
3032   ddp_starts = ddp->starts;
3033   ddp_st1 = *ddp_starts;
3034   ddp_starts++;
3035   ddp_st2 = *ddp_starts;
3036   ddp_starts++;
3037   numseg = 0;
3038   while (ddp != NULL)
3039   {
3040      numseg++;
3041      *dspstart = ddp_st1;
3042      dspstart++;
3043      *dspstart = ddp_st2;
3044      dspstart++;
3045      if (ddp->next != NULL)
3046      {
3047         nextddp = ddp->next;
3048         nextddp_starts = nextddp->starts;
3049         nextddp_st1 = *nextddp_starts;
3050         nextddp_starts++;
3051         nextddp_st2 = *nextddp_starts;
3052         interlen1 = nextddp_st1 - ddp_st1 - ddlen;
3053         interlen2 = nextddp_st2 - ddp_st2 - ddlen;
3054         if (interlen1 < 0 || interlen2 < 0) {
3055            if (interlen1 < 0 && interlen2 < 0) {
3056               return NULL;
3057            }
3058            if (interlen1 < 0) {
3059               if (ABS(interlen1) < ddlen) ddlen += interlen1;
3060               else {
3061                  return NULL;
3062               }
3063            }
3064            if (interlen2 < 0) {
3065               if (ABS(interlen2) < ddlen) ddlen += interlen2;
3066               else {
3067                  return NULL;
3068               }
3069            }
3070            interlen1 = nextddp_st1 - ddp_st1 - ddlen;
3071            interlen2 = nextddp_st2 - ddp_st2 - ddlen;
3072         }
3073         *dsplen = ddlen;
3074         dsplen++;
3075         if (interlen1 <= 0)
3076         {
3077            if (interlen2>0) {
3078               numseg++;
3079               *dspstart = -1;
3080               dspstart++;
3081               *dspstart = ddp_st2 + ddlen;
3082               dspstart++;
3083               *dsplen = interlen2;
3084               dsplen++;
3085            }
3086         }
3087         else if (interlen2 <= 0)
3088         {
3089            numseg++;
3090            *dspstart = ddp_st1 + ddlen;
3091            dspstart++;
3092            *dspstart = -1;
3093            dspstart++;
3094            *dsplen = interlen1;
3095            dsplen++;
3096         }
3097         else if (interlen1 == interlen2)
3098         {
3099            numseg++;
3100            *dspstart = ddp_st1 + ddlen;
3101            dspstart++;
3102            *dspstart = ddp_st2 + ddlen;
3103            dspstart++;
3104            *dsplen = interlen1;
3105            dsplen++;
3106         }
3107         else
3108         {
3109            if (interlen1 > interlen2) {
3110               intermax = interlen1;
3111               intermin = interlen2;
3112            }
3113            else  {
3114               intermax = interlen2;
3115               intermin = interlen1;
3116            }
3117            numseg++;
3118            *dspstart = ddp_st1 + ddlen;
3119            dspstart++;
3120            *dspstart = ddp_st2 + ddlen;
3121            dspstart++;
3122            *dsplen = intermin;
3123            dsplen++;
3124            numseg++;
3125            if (interlen1 > interlen2) {
3126               *dspstart = ddp_st1 + ddlen + intermin;
3127               dspstart++;
3128               *dspstart = -1;
3129            }
3130            else {
3131               *dspstart = -1;
3132               dspstart++;
3133               *dspstart = ddp_st2 + ddlen + intermin;
3134            }
3135            dspstart++;
3136            *dsplen = intermax - intermin;
3137            dsplen++;
3138         }
3139      }
3140      else {
3141         *dsplen = ddlen;
3142         dsplen++;
3143      }
3144      ddp = ddp->next;
3145      if (ddp != NULL)
3146      {
3147         ddlen = ddp->len;
3148         ddp_starts = ddp->starts;
3149         ddp_st1 = *ddp_starts;
3150         ddp_starts++;
3151         ddp_st2 = *ddp_starts;
3152         ddp_starts++;
3153      }
3154   }
3155   dsp->numseg = numseg;
3156   if (add_ends && newsalp!=NULL)
3157   {
3158      strand1 = SeqAlignStrand (salp, 0);
3159      strand2 = SeqAlignStrand (salp, 1);
3160      start1 = SeqAlignStart (newsalp, 0);
3161      start2 = SeqAlignStart (newsalp, 1);
3162      sip1 = SeqAlignId (newsalp, 0);
3163      sip2 = SeqAlignId (newsalp, 1);
3164      slpstart1= 0;
3165      bsp = BioseqLockById(sip1);
3166      if (bsp!=NULL) {
3167         slpstop1 = bsp->length-1;
3168         BioseqUnlock (bsp);
3169      }
3170      else
3171         slpstop1 = stop1;
3172      slpstart2 = 0;
3173      bsp = BioseqLockById(sip2);
3174      if (bsp!=NULL) {
3175         slpstop2 = bsp->length-1;
3176         BioseqUnlock (bsp);
3177      }
3178      else
3179         slpstop2 = stop2;
3180 /**
3181      newsalp = SeqAlignEndExtend (newsalp, slpstart1, slpstart2, -1, -1, start1, start2, -1, -1, strand1, strand2);
3182      stop1 = SeqAlignStop (newsalp, 0);
3183      stop2 = SeqAlignStop (newsalp, 1);
3184      newsalp = SeqAlignEndExtend (newsalp, -1, -1, slpstop1, slpstop2, -1, -1, stop1, stop2, strand1, strand2);
3185 **/
3186      if (strand1!=strand2)
3187      {
3188        strandp =(Uint1Ptr)MemNew((size_t)((dsp->dim*dsp->numseg+4)*sizeof(Uint1)));
3189        dsp->strands = strandp;
3190        for (j = 0; j < 2*numseg; j+=2)
3191           dsp->strands [j] = strand1;
3192        for (j = 1; j < 2*numseg; j+=2)
3193           dsp->strands [j] = strand2;
3194      }
3195   }
3196   return newsalp;
3197 }
3198 
Compact(SeqAlignPtr salp)3199 static SeqAlignPtr Compact (SeqAlignPtr salp)
3200 {
3201   DenseSegPtr     dsp;
3202   Int4Ptr         startp,
3203                   stmp,
3204                   lenp,
3205                   ltmp;
3206   Int4            st1, st2, st3, st4;
3207   Int2            numseg,
3208                   j, k, n;
3209 
3210   dsp = (DenseSegPtr) salp->segs;
3211   if (dsp==NULL)
3212      return salp;
3213   lenp = (Int4Ptr) dsp->lens;
3214   startp = (Int4Ptr) dsp->starts;
3215   j=0;
3216   numseg=dsp->numseg;
3217   while(j<numseg-1) {
3218      st1=*startp;
3219      st2=*(startp+1);
3220      st3=*(startp+2);
3221      st4=*(startp+3);
3222      if (st1>-1 && st2>-1 && st3>-1 && st4>-1) {
3223         stmp=startp;
3224         stmp+=dsp->dim;
3225         for (k=j; k<dsp->numseg-2; k++) {
3226            for (n=0; n<dsp->dim; n++) {
3227               *stmp=*(stmp+dsp->dim);
3228               stmp++;
3229            }
3230         }
3231         ltmp=lenp;
3232         ltmp++;
3233         *lenp += *ltmp;
3234         for (k=j; k<dsp->numseg-2; k++, ltmp++) {
3235            *ltmp = *(ltmp+1);
3236         }
3237         numseg--;
3238      }
3239      j++;
3240      startp+=dsp->dim;
3241      lenp++;
3242   }
3243   dsp->numseg = numseg;
3244   return salp;
3245 }
3246 
DenseDiagToDenseSeg(SeqAlignPtr salp,Boolean add_ends)3247 NLM_EXTERN SeqAlignPtr LIBCALL DenseDiagToDenseSeg (SeqAlignPtr salp, Boolean add_ends)
3248 {
3249   SeqAlignPtr cur = NULL,
3250               newsalp = NULL,
3251               new1 = NULL,
3252               new2 = NULL;
3253 
3254   for (cur=salp; cur!= NULL; cur = cur->next) {
3255      newsalp = DenseDiagToDenseSegFunc(cur, add_ends);
3256      newsalp = Compact (newsalp);
3257      if (newsalp != NULL) {
3258         if (new1 == NULL) {
3259            new1 = newsalp;
3260            new2 = new1;
3261         } else {
3262            new2->next = newsalp;
3263            new2 = new2->next;
3264         }
3265      }
3266   }
3267   return new1;
3268 }
3269 
DenseSegToDenseDiag(SeqAlignPtr salp)3270 NLM_EXTERN SeqAlignPtr LIBCALL  DenseSegToDenseDiag (SeqAlignPtr salp)
3271 {
3272   SeqAlignPtr     salptmp,
3273                   salp2 = NULL,
3274                   salp2tmp;
3275   DenseSegPtr     dsp;
3276   DenseDiagPtr    ddp,
3277                   ddphead = NULL;
3278   SeqIdPtr        sip;
3279   Int4Ptr         startp,
3280                   lenp;
3281   Int2            j;
3282 
3283   if (salp!=NULL)
3284   {
3285      for (salptmp = salp; salptmp!=NULL; salptmp = salptmp->next)
3286      {
3287         if (salptmp->segtype == 2 && salptmp->dim == 2)
3288         {
3289            ddphead = NULL;
3290            dsp = (DenseSegPtr) salptmp->segs;
3291            if (dsp!=NULL)
3292            {
3293               sip = dsp->ids;
3294               lenp = (Int4Ptr) dsp->lens;
3295               startp = dsp->starts;
3296               for (j=0; j<dsp->numseg; j++, lenp++)
3297               {
3298                  if (*startp > -1 && *(startp+1) > -1)
3299                  {
3300                     ddp = DenseDiagCreate (2, sip, startp, *lenp, NULL, NULL);
3301                     DenseDiagLink (&ddphead, ddp);
3302                  }
3303                  startp += dsp->dim;
3304               }
3305            }
3306            if (ddphead != NULL)
3307            {
3308               salp2tmp = SeqAlignNew ();
3309               if (salp2tmp != NULL) {
3310                  salp2tmp->type = 3;
3311                  salp2tmp->segtype = 1;
3312                  salp2tmp->dim = dsp->dim;
3313                  salp2tmp->segs = (Pointer) ddphead;
3314                  salp2 = SeqAlignLink (salp2, salp2tmp);
3315               }
3316            }
3317         }
3318         else if (salptmp->segtype == 1)
3319         {
3320            salp2tmp = SeqAlignDup (salptmp);
3321            salp2 = SeqAlignLink (salp2, salp2tmp);
3322         }
3323      }
3324   }
3325   return salp2;
3326 }
3327 
DenseDiagCreate(Int4 dim,SeqIdPtr id,Int4Ptr starts,Int4 len,Uint1Ptr strands,ScorePtr scores)3328 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagCreate (Int4 dim, SeqIdPtr id, Int4Ptr starts, Int4 len, Uint1Ptr strands, ScorePtr scores)
3329 
3330 {
3331   DenseDiagPtr ddp_copy;
3332   Int4         j;
3333 
3334   ddp_copy = DenseDiagNew();
3335   ddp_copy->dim = dim;
3336   if (id != NULL) {
3337      ddp_copy->id = SeqIdDupList (id);
3338   }
3339   ddp_copy->starts = (Int4Ptr)MemNew((size_t)((dim+1)*sizeof(Int4)));
3340   for (j = 0; j < dim; j++, starts++) {
3341          ddp_copy->starts [j] = *starts;
3342   }
3343   ddp_copy->len = len;
3344   if ( strands != NULL )
3345      ddp_copy->strands = strands;
3346   if ( scores != NULL )
3347      ddp_copy->scores = scores;
3348   return ddp_copy;
3349 }
3350 
DenseDiagLink(DenseDiagPtr * ddp_head,DenseDiagPtr ddp)3351 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagLink (DenseDiagPtr *ddp_head, DenseDiagPtr ddp)
3352 {
3353   DenseDiagPtr ddp_tmp;
3354 
3355   if (ddp!=NULL)
3356   {
3357      if (*ddp_head != NULL)
3358      {
3359         ddp_tmp = *ddp_head;
3360         while (ddp_tmp->next != NULL)
3361            ddp_tmp = ddp_tmp->next;
3362         ddp_tmp->next = ddp;
3363      }
3364      else
3365         *ddp_head = ddp;
3366   }
3367   return *ddp_head;
3368 }
3369 
DenseDiagInsert(DenseDiagPtr ddp_before,DenseDiagPtr ddp)3370 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagInsert (DenseDiagPtr ddp_before, DenseDiagPtr ddp)
3371 
3372 {
3373   DenseDiagPtr ddp_tmp;
3374 
3375   if ( (ddp_tmp = ddp_before->next) == NULL) {
3376          ddp_before->next = ddp;
3377          return ddp_before;
3378   }
3379   ddp_before->next = ddp;
3380   ddp->next = ddp_tmp;
3381   return ddp_before;
3382 }
3383 
DenseDiagPrecede(DenseDiagPtr ddp_after,DenseDiagPtr * ddp)3384 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagPrecede (DenseDiagPtr ddp_after, DenseDiagPtr *ddp)
3385 
3386 {
3387   DenseDiagPtr ddp_tmp;
3388 
3389   ddp_tmp = *ddp;
3390   ddp_tmp->next = ddp_after;
3391   return *ddp;
3392 }
3393 
DenseDiagLinkSort(DenseDiagPtr * ddp_head,DenseDiagPtr ddp)3394 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagLinkSort (DenseDiagPtr *ddp_head, DenseDiagPtr ddp)
3395 
3396 {
3397   DenseDiagPtr ddp_tmp;
3398 
3399   if ( (ddp_tmp = *ddp_head) != NULL ) {
3400          if ( *(ddp->starts) < *(ddp_tmp->starts) )
3401                 *ddp_head = DenseDiagPrecede (ddp_tmp, &ddp);
3402 
3403          else if ( *(ddp->starts) > *(ddp_tmp->starts) && ddp_tmp->next == NULL)
3404                 ddp_tmp->next = ddp;
3405 
3406          else {
3407                 while ( ddp_tmp->next != NULL ) {
3408                        if ( *(ddp->starts) < *(ddp_tmp->next->starts) ) break;
3409                        ddp_tmp = ddp_tmp->next;
3410                 }
3411                 ddp_tmp = DenseDiagInsert (ddp_tmp, ddp);
3412          }
3413   }
3414   else *ddp_head = ddp;
3415   return *ddp_head;
3416 }
3417 
SeqLocToFastaSeqAlign(ValNodePtr vnp)3418 NLM_EXTERN SeqAlignPtr LIBCALL SeqLocToFastaSeqAlign (ValNodePtr vnp)
3419 {
3420   SeqAlignPtr  salp;
3421   ValNodePtr   vnptmp;
3422   DenseSegPtr  dsp;
3423   Int4Ptr      lengthsort;
3424   Int4         maxlen,
3425                len, min, pre_min;
3426   Int4         j, j1;
3427   Int2         nseq,
3428                k, numseg;
3429 
3430   nseq = 0;
3431   for (vnptmp=vnp; vnptmp!=NULL;vnptmp=vnptmp->next)
3432      if (vnptmp->data.ptrvalue != NULL) nseq++;
3433   if (nseq == 0)
3434      return NULL;
3435   salp = SeqAlignNew ();
3436   salp->type = 3;
3437   salp->segtype = 2;
3438   salp->dim = nseq;
3439   dsp = DenseSegNew ();
3440   salp->segs = (Pointer) dsp;
3441   dsp->dim = nseq;
3442   dsp->ids = SeqIdListfromSeqLoc (vnp);
3443 
3444          /****************************
3445          ** count nb of segments
3446          ****************************/
3447   maxlen = MaxLengthSeqLoc (vnp);
3448   lengthsort = (Int4Ptr)MemNew((size_t) ((nseq+1)*sizeof(Int4)));
3449   pre_min = 0;
3450   numseg = 1;
3451   lengthsort [numseg] = 0;
3452   for ( j = 0; j < nseq; j++ )
3453   {
3454          vnptmp = vnp;
3455          min = maxlen;
3456          for ( k = 0; k < nseq; k++ )
3457          {
3458                  len = SeqLocLen ((SeqLocPtr) vnptmp->data.ptrvalue);
3459                  if ( len < min && len > pre_min ) min = len;
3460                  vnptmp = vnptmp->next;
3461                  if ( vnptmp == NULL ) break;
3462          }
3463          if ( min > pre_min )
3464          {
3465                   lengthsort [numseg] = min;
3466                   pre_min = min;
3467                   numseg++;
3468          }
3469   }
3470          /****************************
3471          ** copy starts, lens
3472          ****************************/
3473   dsp->starts = (Int4Ptr) MemNew ((size_t) ((nseq*numseg + 4) * sizeof (Int4)));
3474   dsp->lens   = (Int4Ptr) MemNew ((size_t) ((numseg + 2) * sizeof (Int4)));
3475   for (j = 0; j < nseq*numseg + 4; j++) dsp->starts[j] = -1;
3476   for (j = 0; j < numseg + 2; j++) dsp->lens[j] = 0;
3477   for ( k = 1; k < numseg; k++ )
3478   {
3479          dsp->lens [k-1] = lengthsort [k] - lengthsort [k-1];
3480   }
3481   numseg--;
3482   dsp->numseg = numseg;
3483   vnptmp = vnp;
3484   for ( j = 0; j < nseq; j++ )
3485   {
3486      if (SeqLocStrand((SeqLocPtr) vnptmp->data.ptrvalue)==Seq_strand_minus)
3487      {
3488         j1=nseq*numseg-nseq+j;
3489         dsp->starts[j1] = SeqLocStart ((SeqLocPtr) vnptmp->data.ptrvalue);
3490         j1-=nseq;
3491         for(k=numseg-1;k>-1;k--)
3492         {
3493            if (dsp->starts[j1+nseq]+ dsp->lens[k] < SeqLocLen((SeqLocPtr) vnptmp->data.ptrvalue))
3494               dsp->starts [j1]= dsp->starts[j1+nseq]+ dsp->lens[k];
3495            j1-=nseq;
3496         }
3497      }
3498      else{
3499          j1=j;
3500          dsp->starts[j1] = SeqLocStart ((SeqLocPtr) vnptmp->data.ptrvalue);
3501          j1+=nseq;
3502          for ( k = 1; k < numseg; k++ )
3503          {
3504             if (dsp->starts[j1-nseq]+ dsp->lens[(k-1)] < SeqLocLen((SeqLocPtr) vnptmp->data.ptrvalue))
3505                dsp->starts [j1]= dsp->starts[j1-nseq]+ dsp->lens[(k-1)];
3506             j1+=nseq;
3507          }
3508      }
3509      vnptmp = vnptmp->next;
3510      if ( vnptmp == NULL ) break;
3511   }
3512   dsp->strands= (Uint1Ptr) MemNew ((size_t) ((numseg*nseq+4)*sizeof (Uint1)));
3513   j1=0;
3514   for(k=0; k<numseg; k++)
3515   {
3516          vnptmp = vnp;
3517          for ( j = 0; j < nseq; j++ )
3518          {
3519             dsp->strands[j1] = SeqLocStrand((SeqLocPtr) vnptmp->data.ptrvalue);
3520             vnptmp = vnptmp->next;
3521             j1++;
3522             if ( vnptmp == NULL ) break;
3523          }
3524   }
3525   MemFree (lengthsort);
3526   return salp;
3527 }
3528 
sap_replace(SeqAnnotPtr sap,SeqAlignPtr salp,Uint1 choice)3529 static Boolean LIBCALL sap_replace (SeqAnnotPtr sap, SeqAlignPtr salp, Uint1 choice)
3530 {
3531   if (sap != NULL) {
3532      for (; sap!= NULL; sap=sap->next) {
3533         if (sap->type == choice) {
3534            SeqAlignSetFree ((SeqAlignPtr)sap->data);
3535            sap->data = (Pointer)salp;
3536            return TRUE;
3537         }
3538      }
3539   }
3540   return FALSE;
3541 }
3542 
ReplaceSeqAlignInSeqEntry(Uint2 entityID,Uint4 itemID,SeqAlignPtr salp)3543 NLM_EXTERN void LIBCALL ReplaceSeqAlignInSeqEntry (Uint2 entityID, Uint4 itemID, SeqAlignPtr salp)
3544 {
3545   SeqEntryPtr      sep,
3546                    sep1 = NULL;
3547   SeqEntryPtr      sept = NULL;
3548   BioseqSetPtr     bssp = NULL;
3549   BioseqPtr        bsp = NULL;
3550   SeqAnnotPtr      sap = NULL;
3551 
3552   sep = GetBestTopParentForItemID (entityID, itemID, OBJ_BIOSEQ);
3553   if (sep != NULL) {
3554      if (IS_Bioseq(sep)) {
3555         entityID = ObjMgrGetEntityIDForChoice (sep);
3556         sep1 = GetTopSeqEntryForEntityID (entityID);
3557         bsp = (BioseqPtr) sep->data.ptrvalue;
3558      }
3559      else if(IS_Bioseq_set(sep)) {
3560         sep1 = sep;
3561      }
3562      if (sep1 != NULL) {
3563         bssp = NULL; bsp = NULL;
3564         if (IS_Bioseq(sep1)) {
3565            bsp = (BioseqPtr) sep1->data.ptrvalue;
3566            sap_replace(bsp->annot, salp, 2);
3567         }
3568         else if(IS_Bioseq_set(sep1)) {
3569            bssp = (BioseqSetPtr)sep1->data.ptrvalue;
3570            while (bssp!=NULL && bssp->_class == 7) {
3571               sept = bssp->seq_set;
3572               bssp = NULL; bsp = NULL;
3573               if (IS_Bioseq(sept))  {
3574                  bsp = (BioseqPtr) sept->data.ptrvalue;
3575                  break;
3576               }
3577               else if (IS_Bioseq_set(sept))
3578                  bssp = (BioseqSetPtr) sept->data.ptrvalue;
3579            }
3580            if (bssp!=NULL) {
3581               sap = bssp->annot;
3582               if((sap==NULL || salp==NULL) && IS_Bioseq(sep)) {
3583                  bsp = (BioseqPtr) sep->data.ptrvalue;
3584                  sap_replace(bsp->annot, salp, 2);
3585               }
3586               else
3587                  sap_replace(sap, salp, 2);
3588               if (sap==NULL && IS_Bioseq_set(sep)) {
3589                  bssp = (BioseqSetPtr) sep->data.ptrvalue;
3590                  for (sept = bssp->seq_set; sept!=NULL; sept=sept->next) {
3591                     if (IS_Bioseq(sept)) {
3592                        bsp = (BioseqPtr) sept->data.ptrvalue;
3593                        sap_replace(bsp->annot, salp, 2);
3594                     }
3595                  }
3596               }
3597            }
3598            else if (bsp!=NULL) {
3599               sap_replace(bsp->annot, salp, 2);
3600            }
3601         }
3602      }
3603   }
3604   return;
3605 }
3606 
3607 /*********************************************************/
sap_empty(SeqAnnotPtr sap,Uint1 type,Pointer PNTR ptr)3608 static Pointer LIBCALL sap_empty (SeqAnnotPtr sap, Uint1 type, Pointer PNTR ptr)
3609 {
3610   SeqAlignPtr      salp = NULL;
3611 
3612   if (sap != NULL) {
3613      for (; sap!= NULL; sap=sap->next) {
3614         if (sap->type == type) {
3615            salp = (SeqAlignPtr) sap->data;
3616            *ptr = (Pointer) sap;
3617            break;
3618         }
3619      }
3620   }
3621   return salp;
3622 }
3623 
3624 typedef struct ccid2 {
3625   Uint1      choice;
3626   SeqIdPtr   sip;
3627   Pointer    sap;
3628 } CcId2, PNTR CcId2Ptr;
3629 
3630 
FindSeqAlignCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3631 static void FindSeqAlignCallback (SeqEntryPtr sep, Pointer mydata,
3632                                           Int4 index, Int2 indent)
3633 {
3634   BioseqPtr          bsp;
3635   BioseqSetPtr       bssp;
3636   SeqAlignPtr        salp,
3637                      salptmp,
3638                      curr_sap;
3639   DenseSegPtr        dsp;
3640   CcId2Ptr           cip;
3641   Boolean            found;
3642   Pointer            this_sap;
3643 
3644   cip = (CcId2Ptr)mydata;
3645   if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
3646      if (IS_Bioseq(sep)) {
3647         bsp = (BioseqPtr) sep->data.ptrvalue;
3648         if (bsp!=NULL) {
3649            salp=(SeqAlignPtr)sap_empty(bsp->annot, 2, &this_sap);
3650            if (salp!=NULL) {
3651               found=FALSE;
3652               salptmp=salp;
3653               if (cip->sip!=NULL) {
3654                  while (!found && salptmp!=NULL)
3655                  {
3656                    switch (salptmp->segtype) {
3657                    case 5: /* disc */
3658                      curr_sap = (SeqAlignPtr)salptmp->segs;
3659                      while (!found && curr_sap!=NULL) {
3660                        dsp = (DenseSegPtr)curr_sap->segs;
3661                        found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3662                        curr_sap = curr_sap->next;
3663                      }
3664                      break;
3665                    default:
3666                      dsp = (DenseSegPtr)salptmp->segs;
3667                      found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3668                    }
3669                    salptmp=salptmp->next;
3670                  }
3671               }
3672               if (found || cip->sip==NULL) {
3673                  if (cip->sap==NULL) {
3674                     if (cip->choice==OBJ_SEQALIGN)
3675                        cip->sap = (Pointer)salp;
3676                     else if (cip->choice==OBJ_SEQANNOT)
3677                        cip->sap = (Pointer) this_sap;
3678                  }
3679               }
3680            }
3681         }
3682      }
3683      else if(IS_Bioseq_set(sep)) {
3684         bssp = (BioseqSetPtr)sep->data.ptrvalue;
3685         if (bssp!=NULL) {
3686            salp=(SeqAlignPtr)sap_empty(bssp->annot, 2, &this_sap);
3687            if (salp!=NULL) {
3688               found=FALSE;
3689               salptmp=salp;
3690               if (cip->sip!=NULL) {
3691                  while (!found && salptmp!=NULL)
3692                  {
3693                    switch (salptmp->segtype) {
3694                    case 5: /* disc */
3695                      curr_sap = (SeqAlignPtr)salptmp->segs;
3696                      while (!found && curr_sap!=NULL) {
3697                        dsp = (DenseSegPtr)curr_sap->segs;
3698                        found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3699                        curr_sap = curr_sap->next;
3700                      }
3701                      break;
3702                    default:
3703                      dsp = (DenseSegPtr)salptmp->segs;
3704                      found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3705                    }
3706                    salptmp=salptmp->next;
3707                  }
3708               }
3709               if (found || cip->sip==NULL) {
3710                  if (cip->sap==NULL) {
3711                     if (cip->choice==OBJ_SEQALIGN)
3712                        cip->sap = (Pointer)salp;
3713                     else if (cip->choice==OBJ_SEQANNOT)
3714                        cip->sap = (Pointer) this_sap;
3715                  }
3716               }
3717            }
3718         }
3719      }
3720   }
3721 }
3722 
IsIdInAlignment(SeqIdPtr sip,SeqAlignPtr sap)3723 static Boolean IsIdInAlignment (SeqIdPtr sip, SeqAlignPtr sap)
3724 {
3725   SeqAlignPtr sap_tmp;
3726   Boolean     found = FALSE;
3727   DenseSegPtr dsp;
3728 
3729   if (sip == NULL || sap == NULL) return FALSE;
3730 
3731   if (sap->segtype == SAS_DISC)
3732   {
3733   	sap_tmp = (SeqAlignPtr) sap->segs;
3734   	while (!found && sap_tmp != NULL)
3735   	{
3736   	  found = IsIdInAlignment (sip, sap_tmp);
3737   	  sap_tmp = sap_tmp->next;
3738   	}
3739   }
3740   else if (sap->segtype == SAS_DENSEG)
3741   {
3742   	dsp = (DenseSegPtr) sap->segs;
3743   	if (SeqIdOrderInBioseqIdList (sip, dsp->ids) > 0)
3744   	{
3745   	  found = TRUE;
3746   	}
3747   }
3748   return found;
3749 }
3750 
FindSeqAlignVisitCallback(SeqAnnotPtr sap,Pointer userdata)3751 static void FindSeqAlignVisitCallback (SeqAnnotPtr sap, Pointer userdata)
3752 {
3753   CcId2Ptr           cip;
3754   SeqAlignPtr        salp;
3755 
3756   if (sap == NULL || sap->type != 2 || (cip = (CcId2Ptr) userdata) == NULL || cip->sap != NULL) return;
3757 
3758   for (salp = sap->data; salp != NULL && cip->sap == NULL; salp = salp->next)
3759   {
3760     if (cip->sip == NULL || IsIdInAlignment (cip->sip, salp))
3761     {
3762       if (cip->choice == OBJ_SEQALIGN)
3763       {
3764       	cip->sap = (Pointer) salp;
3765       }
3766       else
3767       {
3768       	cip->sap = sap;
3769       }
3770     }
3771   }
3772 }
3773 
3774 
FindSeqAlignInSeqEntry(SeqEntryPtr sep,Uint1 choice)3775 NLM_EXTERN Pointer LIBCALL FindSeqAlignInSeqEntry (SeqEntryPtr sep, Uint1 choice)
3776 {
3777   SeqEntryPtr      sep_head;
3778   BioseqPtr        bsp;
3779   Uint2            entityID;
3780   CcId2            ci;
3781 
3782   if (sep==NULL)
3783      return NULL;
3784   ci.sap = NULL;
3785   ci.sip = NULL;
3786   if (choice != OBJ_SEQALIGN && choice != OBJ_SEQANNOT)
3787      return NULL;
3788   ci.choice = choice;
3789   if (IS_Bioseq(sep)) {
3790      bsp = (BioseqPtr) sep->data.ptrvalue;
3791      if (bsp!=NULL)
3792         ci.sip = SeqIdDup (bsp->id);
3793   }
3794   entityID = ObjMgrGetEntityIDForChoice (sep);
3795   sep_head = GetTopSeqEntryForEntityID (entityID);
3796   VisitAnnotsInSep (sep_head, (Pointer)&ci, FindSeqAlignVisitCallback);
3797   if (ci.sip != NULL)
3798      SeqIdFree (ci.sip);
3799   return ci.sap;
3800 }
3801 
3802 /***********************************************
3803 ***  ReadBuffer from spp+sap
3804 ***    in : spp, sap, from + to in seq coordinates
3805 ***    out: length of buffer + buffer
3806 ************************************************/
readbuff_fromseqalign(SeqPortPtr spp,SeqAlignPtr salp,Int2 index,CharPtr buffer,Int4 from,Int4 to,Int4 offset,Boolean strand)3807 NLM_EXTERN Int4 LIBCALL readbuff_fromseqalign (SeqPortPtr spp, SeqAlignPtr salp,  Int2 index, CharPtr buffer, Int4 from, Int4 to, Int4 offset, Boolean strand)
3808 {
3809   BioseqPtr   bsp;
3810   DenseSegPtr dsp;
3811   SeqIdPtr    sip;
3812   Int4Ptr     dspstart;
3813   Int4Ptr     dsplens;
3814   Int4        sumlens, sumstart;
3815   Int4        bufflen, buffstart;
3816   Int4        seglenstobuffer = 0;
3817   Int4        j;
3818   Int4        maxlen = 0;
3819   Int2        numseg;
3820   Boolean     seen = FALSE;
3821   Boolean     nogap;
3822   Boolean     ok = TRUE;
3823 
3824   if (spp == NULL) {
3825     return 0;
3826   }
3827   if (buffer == NULL) {
3828     return 0;
3829   }
3830                    /**********************************
3831                     ***  locate segment including 'from'
3832                     ***********************************/
3833   if ( (dsp = (DenseSegPtr) salp->segs ) == NULL) {
3834          return 0;
3835   }
3836   if (strand == Seq_strand_minus) {
3837      sip = dsp->ids;
3838      for (j = 0; sip!=NULL && j < index; j++)
3839         sip=sip->next;
3840      if (sip!=NULL) {
3841         bsp = BioseqLockById (sip);
3842         if (bsp!=NULL) {
3843            maxlen = bsp->length;
3844            BioseqUnlock(bsp);
3845         }
3846      }
3847      if (maxlen==0)
3848         return 0;
3849   }
3850   dsplens = dsp->lens;
3851   dspstart = dsp->starts + index;
3852   seen = LocateInSeqAlignDenSeg (from, dsp->dim, dsp->numseg, &dspstart, &dsplens, &numseg, &seglenstobuffer);
3853   if (!seen) {
3854     ErrPostEx (SEV_ERROR, 0, 0, "fail in readbuff_fromseqalign_sap [4] %ld %ld %ld %ld %ld %ld",
3855                (long) from, (long) dsp->dim, (long) dsp->numseg, (long) *dspstart, (long) *dsplens, (long) seglenstobuffer);
3856     return 0;
3857   }
3858                     /***********************************
3859                     ***  read segments until 'to'
3860                     ***********************************/
3861   bufflen = MIN((Int4)(*dsplens-seglenstobuffer),(Int4)(to - from));
3862   if (strand == Seq_strand_minus) {
3863      buffstart = *dspstart + seglenstobuffer;
3864      if (buffstart>-1)
3865         buffstart = maxlen - buffstart - bufflen;
3866   } else {
3867      buffstart = *dspstart + seglenstobuffer;
3868   }
3869   nogap = (*dspstart >= 0);
3870   if (offset < 0 && !nogap)
3871      offset = 0;
3872   sumlens = bufflen;
3873   sumstart = 0;
3874 /**
3875         WriteLog ("/ %d %d %d %d %d %d %d\n", (int)seglenstobuffer,
3876         (int)buffstart, (int)bufflen, (int) *dsplens, (int)*dspstart, to, from);
3877 **/
3878   while ( ok )
3879   {
3880     if ( nogap )
3881     {
3882        if (strand == Seq_strand_minus) {
3883         offset = ReadBufferFromSep (spp, buffer, (Int4)buffstart,
3884                                    (Int4)(buffstart+bufflen), offset);
3885        } else {
3886         offset = ReadBufferFromSep (spp, buffer, (Int4)buffstart,
3887                                    (Int4)(buffstart+bufflen), offset);
3888        }
3889     }
3890     else
3891     {
3892         for (j = 0; j < bufflen; j++, offset++)
3893                 buffer[offset] = '-';
3894         buffer[offset] = '\0';
3895     }
3896     ok = (numseg < dsp->numseg);
3897     if (!ok)
3898     {
3899         if ( salp->next == NULL ) break;
3900     }
3901     else {
3902         numseg++;
3903         dspstart += dsp->dim;
3904         dsplens++;
3905     }
3906     nogap = (*dspstart >= 0);
3907     bufflen = MIN ((Int4) (*dsplens), (Int4) (to - from));
3908     if ( nogap ) {
3909        if (strand == Seq_strand_minus) {
3910           buffstart = *dspstart + sumstart;
3911           buffstart = maxlen - buffstart - bufflen;
3912        } else {
3913           buffstart = *dspstart + sumstart;
3914        }
3915     }
3916     sumlens += bufflen;
3917 /**
3918     if (index != 0)
3919         WriteLog ("- %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld \n",
3920         (long)offset, (long)seglenstobuffer,
3921         (long)sumstart, (long)*dspstart, (long)buffstart,
3922         (long)*dsplens, (long)from, (long)to, (long)bufflen, (long)sumlens);
3923 **/
3924   }
3925   return ((Int4)offset);
3926 }
3927 
aaSeqAlign_to_dnaSeqAlign(SeqAlignPtr salp,ValNodePtr vnp,ValNodePtr framep)3928 NLM_EXTERN SeqAlignPtr LIBCALL aaSeqAlign_to_dnaSeqAlign (SeqAlignPtr salp, ValNodePtr vnp, ValNodePtr framep)
3929 {
3930   SeqAlignPtr      tmp;
3931   DenseSegPtr      dsp;
3932   ValNodePtr       vnptmp,
3933                    fvnp=NULL;
3934   SeqIdPtr         sip,
3935                    siptmp;
3936   Int4Ptr          startp;
3937   Int4Ptr          lenp;
3938   Int4             from, sumlens;
3939   Int2             j, k;
3940   Uint1            frame;
3941 
3942   if (salp == NULL)
3943      return NULL;
3944   for (tmp=salp; tmp!=NULL; tmp=tmp->next) {
3945      if (tmp->segtype == 2) {
3946         dsp = (DenseSegPtr) tmp->segs;
3947         if (dsp != NULL)  {
3948 
3949            if (vnp==NULL) {
3950               for (j=0, sip = dsp->ids; j<dsp->dim && sip!=NULL; j++, sip=sip->next) {
3951                  siptmp = MakeNewProteinSeqId (NULL, sip);
3952                  ValNodeAddPointer (&vnp, 0, siptmp);
3953               }
3954            }
3955            for (j=0, vnptmp = vnp; vnptmp != NULL; vnptmp = vnptmp->next) {
3956               if (vnptmp->data.ptrvalue != NULL)
3957                  j++;
3958               else
3959                  break;
3960            }
3961            if (j == dsp->dim) {
3962 
3963               dsp->ids = SeqIdFree (dsp->ids);
3964               dsp->ids = ValNodeSeqIdListDup (vnp);
3965               vnp = ValNodeFreeType (&vnp, TypeSeqId);
3966 
3967               lenp = dsp->lens;
3968               for (j = 0; j < dsp->numseg; j++, lenp++) {
3969                  *lenp = *lenp * 3;
3970               }
3971               frame=0;
3972               if (framep!=NULL) {
3973                  fvnp = framep;
3974               }
3975               for (k = 0; k < dsp->dim; k++)
3976               {
3977                  lenp = dsp->lens;
3978                  startp = dsp->starts;
3979                  startp += k;
3980                  while (*startp < 0) startp += dsp->dim;
3981                  from = *startp;
3982                  if (fvnp!=NULL)
3983                     frame=(Uint1) fvnp->data.intvalue;
3984                  if (frame == 2)
3985                     from +=1;
3986                  else if (frame == 3)
3987                     from+=2;
3988                  startp = dsp->starts;
3989                  startp += k;
3990                  sumlens = 0;
3991                  for (j = 0; j < dsp->numseg; j++) {
3992                     if (*startp > -1) {
3993                        *startp = from + sumlens;
3994                        sumlens += *lenp;
3995                     }
3996                     startp += dsp->dim;
3997                     lenp++;
3998                  }
3999                  if (framep!=NULL)
4000                     fvnp=fvnp->next;
4001               }
4002            }
4003         }
4004      }
4005   }
4006   return salp;
4007 }
4008 
4009 
aaSeqAnnot_to_dnaSeqAnnotFunc(SeqAnnotPtr PNTR sapnahead,SeqAlignPtr salpnew,ValNodePtr vnp,ValNodePtr framep)4010 NLM_EXTERN SeqAnnotPtr aaSeqAnnot_to_dnaSeqAnnotFunc (SeqAnnotPtr PNTR sapnahead, SeqAlignPtr salpnew, ValNodePtr vnp, ValNodePtr framep)
4011 {
4012   SeqAnnotPtr      sap1, sap2, sapna, saptmp;
4013   SeqAlignPtr      salpna;
4014 
4015   sap1 = SeqAnnotForSeqAlign (salpnew);
4016   sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap1, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
4017   salpna = aaSeqAlign_to_dnaSeqAlign ((SeqAlignPtr)(sap2->data), vnp, framep);
4018   sap1->data = NULL;
4019   sap2->data = NULL;
4020   SeqAnnotFree (sap1);
4021   SeqAnnotFree (sap2);
4022   if (salpna != NULL) {
4023      sapna = SeqAnnotForSeqAlign (salpna);
4024      if (*sapnahead == NULL)
4025 	*sapnahead = sapna;
4026      else {
4027         for (saptmp=*sapnahead; saptmp->next!=NULL; saptmp=saptmp->next)
4028            continue;
4029         saptmp->next = sapna;
4030      }
4031   }
4032   return *sapnahead;
4033 }
4034 
SortSeqAlign(SeqAlignPtr PNTR salp)4035 NLM_EXTERN SeqAlignPtr LIBCALL SortSeqAlign (SeqAlignPtr PNTR salp)
4036 {
4037   SeqAlignPtr      salp1, salptmp,
4038                    newsalp = NULL,
4039                    presalp = NULL,
4040                    minsalp = NULL,
4041                    preminsalp = NULL;
4042   DenseSegPtr      dsp,
4043                    dsptmp;
4044   SeqIdPtr         sip, siptmp;
4045   Int4Ptr          start;
4046   Int4             minstart;
4047   BioseqPtr        bsp;
4048 
4049   salp1=*salp;
4050   while (salp1!=NULL)
4051   {
4052      minstart = INT4_MAX;
4053      presalp=NULL;
4054      minsalp = preminsalp = NULL;
4055      salptmp=salp1;
4056      while (salptmp!=NULL) {
4057         dsp=(DenseSegPtr) salptmp->segs;
4058         if (dsp!=NULL) {
4059            start = dsp->starts;
4060            if (*start < minstart) {
4061               minstart = *start;
4062               minsalp = salptmp;
4063               preminsalp = presalp;
4064            }
4065         }
4066         presalp=salptmp;
4067         salptmp=salptmp->next;
4068      }
4069      if (minsalp != NULL) {
4070         dsp=(DenseSegPtr) minsalp->segs;
4071         sip=dsp->ids->next;
4072         bsp=BioseqLockById(sip);
4073         if (bsp==NULL) {
4074            minsalp=NULL;
4075         }
4076         else BioseqUnlock (bsp);
4077      }
4078      if (minsalp != NULL && newsalp != NULL) {
4079         dsp=(DenseSegPtr) minsalp->segs;
4080         sip=dsp->ids->next;
4081         salptmp=newsalp;
4082         while (salptmp!=NULL) {
4083            dsptmp=(DenseSegPtr) salptmp->segs;
4084            siptmp=dsptmp->ids->next;
4085            if (SeqIdForSameBioseq(sip, siptmp)) {
4086               break;
4087            }
4088            salptmp=salptmp->next;
4089         }
4090         if (salptmp!=NULL)
4091            minsalp=NULL;
4092      }
4093      if (minsalp != NULL) {
4094         if (preminsalp==NULL) {
4095            salp1 = salp1->next;
4096            minsalp->next = NULL;
4097         }
4098         else {
4099            preminsalp->next = minsalp->next;
4100            minsalp->next = NULL;
4101         }
4102         newsalp = SeqAlignLink (newsalp, minsalp);
4103      }
4104      else break;
4105   }
4106   *salp = newsalp;
4107   return newsalp;
4108 }
4109 
SortSeqAlignFromList(SeqAlignPtr salp,Int2Ptr sortlst)4110 NLM_EXTERN SeqAlignPtr LIBCALL SortSeqAlignFromList (SeqAlignPtr salp, Int2Ptr sortlst)
4111 {
4112   SeqAlignPtr newsalp;
4113   CompSegPtr  csp,
4114               newcsp;
4115   SeqIdPtr    sip;
4116   Int4Ptr     lenp;
4117   Uint1Ptr    stp, st2p;
4118   Int2        j, k, m;
4119 
4120   csp = (CompSegPtr) salp->segs;
4121   newsalp = SeqAlignNew ();
4122   if ( newsalp == NULL ) return NULL;
4123   newsalp->type = salp->type;
4124   newsalp->segtype = salp->segtype;
4125   newsalp->dim = salp->dim;
4126   newcsp = (CompSegPtr) MemNew ( sizeof (CompSeg) );
4127   newsalp->segs = newcsp;
4128   newcsp->dim = csp->dim;
4129   newcsp->numseg = csp->numseg;
4130   newcsp->ids = NULL;
4131   for (j=0; j < csp->dim; j++)
4132   {
4133      for (k=0, sip = csp->ids; k < csp->dim && sip!=NULL; k++) {
4134         if (sortlst[k] == (j+1)) {
4135            newcsp->ids = AddSeqId (&(newcsp->ids), sip);
4136            break;
4137         }
4138         sip = sip->next;
4139      }
4140   }
4141   newcsp->from = (Int4Ptr) MemNew ((size_t) ((csp->dim + 2) * sizeof (Int4)));
4142   for (j=0; j < csp->dim; j++)
4143   {
4144      for (k=0, lenp = csp->from; k < csp->dim; k++, lenp++)
4145         if (sortlst[k] == (j+1)) {
4146            newcsp->from [j] = *lenp;
4147            break;
4148         }
4149   }
4150   newcsp->starts =(BoolPtr)MemNew((size_t)((csp->dim*csp->numseg+ 4) * sizeof (Boolean)));
4151   for (j=0; j < csp->dim; j++)
4152   {
4153      for (k=0, stp = csp->starts; k < csp->dim; k++, stp ++)
4154         if (sortlst[k] == (j+1)) {
4155            for (m=0, st2p = stp; m < csp->numseg; m++, st2p+=csp->dim) {
4156               newcsp->starts [m*csp->dim + j] = *st2p;
4157            }
4158            break;
4159         }
4160   }
4161   newcsp->lens=(Int4Ptr) MemNew((size_t)((csp->numseg + 2) * sizeof (Int4)));
4162   for (j = 0; j < csp->numseg + 2; j++)
4163      newcsp->lens[j] = csp->lens[j];
4164 
4165   return newsalp;
4166 }
4167 
4168 
multseqalign_from_pairseqalign(SeqAlignPtr salp)4169 NLM_EXTERN SeqAnnotPtr multseqalign_from_pairseqalign (SeqAlignPtr salp)
4170 {
4171   SeqAlignPtr      salptmp,
4172                    tmp;
4173   DenseSegPtr      dsp = NULL;
4174   SeqPortPtr       spp;
4175   SeqLocPtr        slp;
4176   SeqIdPtr         sip,
4177                    siphead, siptmp,
4178                    sipcur;
4179   BioseqPtr        bsp;
4180   CharPtr          bufferin[500];
4181   CharPtr          bufferout[500];
4182   CharPtr          buffertmp[2];
4183   Int4Ptr          starttmp;
4184   Int4Ptr          lenp;
4185   Uint1Ptr         strandp;
4186 
4187   Int4             algfrom, algto, alglens;
4188   Int4             seqfrom, seqto;
4189   Int4             cur;
4190   Int4             dsp_len, bsp_len, sum_lens;
4191   Int4             max_lens = 0;
4192   Int4             seqoffset;
4193   Int4             gapoffset;
4194   Int4             gaptotal;
4195   Int4             step = SALSALENLIMIT;
4196   Int2             curnseq;
4197   Int2             j;
4198   Int4             k, k1, k2, k3;
4199   Char             c1, c2;
4200 
4201   ValNodePtr       vnp;
4202   ValNodePtr       vnpfrom;
4203   ValNodePtr       vnpstrand;
4204   SeqAnnotPtr      sap;
4205 
4206   Int4             letter;
4207   Uint1            strandtmp = Seq_strand_unknown;
4208   Boolean          strand_nonull;
4209   Boolean          ok;
4210   CharPtr       str;
4211 
4212   if (salp==NULL)
4213      return NULL;
4214 
4215   for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next) {
4216      if (salptmp->type > 0 && salptmp->segtype==2) {
4217         dsp = (DenseSegPtr) salptmp->segs;
4218         break;
4219      }
4220   }
4221   if (dsp==NULL)
4222      return NULL;
4223   siptmp = SeqIdDup (dsp->ids);
4224   bsp = BioseqLockById (siptmp);
4225   SeqIdFree (siptmp);
4226   if (bsp == NULL) {
4227      return NULL;
4228   }
4229   bsp_len = bsp->length;
4230   sip = SeqIdFindBest(bsp->id, 0);
4231   BioseqUnlock (bsp);
4232 
4233   /*----- check if not all sequences start with gaps ----*/
4234   for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next) {
4235      if (salptmp->type > 0 && salptmp->segtype==2) {
4236 	dsp = (DenseSegPtr) salptmp->segs;
4237         for (j=0, starttmp=dsp->starts; j<dsp->dim; j++, starttmp++)
4238            if ( *starttmp > -1)
4239               break;
4240         ok = (Boolean) (j < dsp->dim);
4241         if (!ok)
4242            break;
4243      }
4244   }
4245   if (!ok)
4246      return NULL;
4247   /*---- find longest seqalign --------*/
4248   sum_lens = 0;
4249   for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next) {
4250      if (salptmp->type > 0 && salptmp->segtype==2) {
4251         dsp_len = SeqAlignStart(salptmp, 0) + SeqAlignLength (salptmp);
4252         if (dsp_len > sum_lens)
4253            sum_lens = dsp_len;
4254      }
4255   }
4256   if (sum_lens > SALSALENLIMIT) {
4257      ErrPostEx (SEV_ERROR, 0, 0, "Too long alignment.\n Wait for next Sequin version");
4258      return NULL;
4259   }
4260   step = 2*sum_lens;
4261   step = MIN ((Int4)step, (Int4)SALSALENLIMIT);
4262   for (j=0; j<2; j++)
4263   {
4264      str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4265      buffertmp[j] = str;
4266      for (k=0; k<step; k++) buffertmp[j][k] = '-';
4267      str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4268      bufferout[j] = str;
4269      for (k=0; k<step; k++) bufferout[j][k] = '-';
4270      str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4271      bufferin[j] = str;
4272      for (k=0; k<step; k++) bufferin[j][k] = '-';
4273   }
4274   siphead = SeqIdDup (sip);
4275   sipcur = siphead;
4276   vnpstrand = NULL;
4277   vnpfrom = NULL;
4278   ValNodeAddInt (&vnpfrom, 1, (Int4) 0);
4279 
4280   for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
4281      if (salptmp->type > 0 && salptmp->segtype==2)
4282         break;
4283 
4284   /**************** if 1rst sequence start with gaps in one alignment ***/
4285   gapoffset = gaptotal = 0;
4286   for (tmp=salptmp, j=0; tmp!=NULL; tmp=tmp->next, j++) {
4287      if (tmp->type > 0 && tmp->segtype==2) {
4288         dsp = (DenseSegPtr) tmp->segs;
4289         if (*(dsp->starts) < 0) {
4290            if (j==0)
4291               gapoffset = *(dsp->lens);
4292            if (*(dsp->lens) > gaptotal) {
4293               gaptotal = *(dsp->lens);
4294            }
4295         }
4296      }
4297   }
4298   if (gaptotal > 0) {
4299      if (gaptotal <= gapoffset)
4300         gapoffset = gaptotal - gapoffset;
4301   }
4302   algfrom = 0;
4303   algto = 1;
4304   for (cur = algfrom; cur < algto; cur += 2*step)
4305   {
4306      curnseq = 2;
4307      dsp = (DenseSegPtr) salptmp->segs;
4308      seqoffset = *(dsp->starts);
4309      strand_nonull = (Boolean) (dsp->strands != NULL);
4310      if (strand_nonull)
4311         strandp = dsp->strands;
4312      else
4313         strandtmp = Seq_strand_unknown;
4314      for (sip = dsp->ids, j=0; sip!=NULL; sip=sip->next, j++)
4315      {
4316         starttmp = dsp->starts;
4317         if (j==0) {
4318            seqfrom = 0;
4319         } else {
4320            starttmp += j;
4321            seqfrom = *starttmp;
4322         }
4323         lenp = dsp->lens;
4324         dsp_len = 0;
4325         sum_lens = 0;
4326         for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim) {
4327            if ( *starttmp >= 0 ) dsp_len += *lenp;
4328            sum_lens += *lenp;
4329         }
4330         siptmp = SeqIdDup (sip);
4331         bsp = BioseqLockById (siptmp);
4332         SeqIdFree (siptmp);
4333         if (bsp == NULL) {
4334            return NULL;
4335         }
4336         seqto = MIN ((Int4) (seqfrom + step), (Int4) (bsp->length -1));
4337         if ( j != 0 ) {
4338            siptmp = SeqIdDup(SeqIdFindBest(bsp->id, 0));
4339            sipcur->next = siptmp;
4340            sipcur = siptmp;
4341            starttmp = dsp->starts;
4342            starttmp += j;
4343            lenp = dsp->lens;
4344            for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim)
4345               if ( *starttmp >= 0 ) break;
4346            if (strand_nonull && *strandp==Seq_strand_minus)
4347               ValNodeAddInt (&vnpfrom, 1, (Int4) (*starttmp+*lenp));
4348            else
4349               ValNodeAddInt (&vnpfrom, 1, (Int4) *starttmp);
4350         }
4351         BioseqUnlock (bsp);
4352         if (strand_nonull)
4353            strandtmp = *strandp;
4354         slp = SeqLocIntNew (0, seqto, strandtmp, sip);
4355         if ( slp == NULL) {
4356            return NULL;
4357         }
4358         spp = SeqPortNewByLoc (slp, Seq_code_iupacna);
4359         SeqLocFree (slp);
4360 /**
4361         SeqIdWrite (sip, strLog, PRINTID_FASTA_LONG, 120);
4362         if (spp!=NULL) WriteLog("mergesalp2 %s\n", strLog);
4363         else WriteLog("PRINTFspp NULL\n");
4364         WriteLog("1>  %ld  %ld    %ld %ld  > %ld   \n", dsp_len, sum_lens, seqto, seqoffset, seqfrom);
4365 **/
4366         if (seqoffset < 0) seqoffset = 0;  /*!!!!!!!!!!!!!!!!!!!!*/
4367 
4368         if ( j == 0 && seqoffset > seqfrom) {
4369            seqfrom = ReadBufferFromSep (spp, bufferin[j], (Int4)seqfrom,
4370                                          (Int4)(seqfrom + seqoffset), 0);
4371         }
4372 /**
4373 WriteLog ("3> %d  %d   %ld  %ld \n", j, seqfrom, sum_lens, seqfrom + sum_lens);
4374 **/
4375         alglens = readbuff_fromseqalign (spp, salptmp, j, bufferin[j], seqfrom, seqfrom + sum_lens, seqoffset+gapoffset, strandtmp);
4376         if (alglens == 0) {
4377            return NULL;
4378         }
4379 /**
4380         WriteLog("4>  %d  %ld   %ld %ld   %ld < %ld   %d\n", j, alglens, sum_lens, seqfrom+sum_lens, seqfrom + dsp_len, bsp_len, strandtmp);
4381 **/
4382         if ( j == 0 && (seqfrom + dsp_len) < bsp_len) {
4383            alglens = ReadBufferFromSep (spp, bufferin[j],
4384                            (Int4)(seqfrom + dsp_len), (Int4)bsp_len, alglens);
4385            bufferin[j][alglens] = '-';
4386         }
4387         else {
4388            for (k = alglens; k < step; k++)
4389               if (bufferin[j][k] != '-') bufferin[j][k] = '-';
4390         }
4391         SeqPortFree(spp);
4392         ValNodeAddInt (&vnpstrand, 1, (Int4)(strandtmp));
4393         if (strand_nonull)
4394            strandp++;
4395      }
4396      for (salptmp = salptmp->next; salptmp != NULL; salptmp = salptmp->next)
4397      {
4398       if (salptmp->type > 0 && salptmp->segtype==2)
4399       {
4400 /**
4401 WriteLog ("\n\nCURRENT ALIGN %d\n", curnseq);
4402 **/
4403        dsp = (DenseSegPtr) salptmp->segs;
4404        seqoffset = *(dsp->starts);
4405        if (seqoffset < 0) {
4406           gapoffset = gaptotal - *(dsp->lens);
4407        }
4408        else
4409           gapoffset = 0;
4410        strand_nonull = (Boolean) (dsp->strands != NULL);
4411        if (strand_nonull)
4412            strandp = dsp->strands;
4413        else
4414            strandtmp = Seq_strand_unknown;
4415        for (sip = dsp->ids, j=0; sip!=NULL; sip=sip->next, j++)
4416        {
4417            dsp = (DenseSegPtr) salptmp->segs;
4418            for (k=0; k<step; k++)
4419               buffertmp[j][k] = '-';
4420            starttmp = dsp->starts;
4421            starttmp += j;
4422            if (j == 0) seqfrom = 0;
4423            else seqfrom = *starttmp;
4424            lenp = dsp->lens;
4425            dsp_len = 0;
4426            sum_lens = 0;
4427            for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim) {
4428               if ( *starttmp >= 0 )
4429                  dsp_len += *lenp;
4430               sum_lens += *lenp;
4431            }
4432            siptmp = SeqIdDup (sip);
4433            bsp = BioseqLockById (siptmp);
4434            SeqIdFree (siptmp);
4435            if (bsp == NULL){
4436               return NULL;
4437            }
4438            seqto = MIN ((Int4) (seqfrom + step), (Int4) (bsp->length -1));
4439            if ( j != 0 ) {
4440               siptmp = SeqIdDup(SeqIdFindBest(bsp->id, 0));
4441               sipcur->next = siptmp;
4442               sipcur = siptmp;
4443               starttmp = dsp->starts;
4444               starttmp += j;
4445               lenp = dsp->lens;
4446               for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim)
4447                  if ( *starttmp >= 0 ) break;
4448               if (strand_nonull && *strandp==Seq_strand_minus)
4449                  ValNodeAddInt (&vnpfrom, 1, (Int4) (*starttmp+*lenp));
4450               else
4451                  ValNodeAddInt (&vnpfrom, 1, (Int4) *starttmp);
4452            }
4453            BioseqUnlock (bsp);
4454            if (strand_nonull)
4455               strandtmp = *strandp;
4456            slp = SeqLocIntNew (0, seqto, strandtmp, sip);
4457            if ( slp == NULL) {
4458               return NULL;
4459            }
4460            spp = SeqPortNewByLoc (slp, Seq_code_iupacna);
4461            SeqLocFree (slp);
4462 /**
4463            SeqIdWrite (sip, strLog, PRINTID_FASTA_LONG, 120);
4464            if (spp!=NULL) WriteLog ("mergesalp2 %s\n", strLog);
4465            else WriteLog ("spp NULL\n");
4466            WriteLog ("1>  %ld  %ld  from %ld  to %ld  offset %ld  \n", dsp_len, sum_lens, seqfrom, seqto, seqoffset);
4467 **/
4468            if (seqoffset < 0) seqoffset = 0;  /*!!!!!!!!!!!!!!!!!!!!*/
4469            if ( j == 0 && seqoffset > seqfrom) {
4470               seqfrom = ReadBufferFromSep (spp, buffertmp[j], (Int4)seqfrom,
4471                                          (Int4)(seqfrom + seqoffset), 0);
4472            }
4473 /**
4474            if ( j == 0)
4475              WriteLog ("2> %d   %ld  %ld \n", seqfrom, sum_lens, seqfrom + sum_lens);
4476            else
4477              WriteLog ("2> %d   %ld  %ld \n", seqfrom, sum_lens, seqfrom + sum_lens);
4478 **/
4479            alglens = readbuff_fromseqalign (spp, salptmp, j, buffertmp[j], seqfrom, seqfrom + sum_lens, (seqoffset + gapoffset), strandtmp);
4480            if (alglens == 0) {
4481               return NULL;
4482            }
4483 /**
4484            WriteLog ("3> %d  %c%c%c \n", j, buffertmp[j][0], buffertmp[j][1], buffertmp[j][2]);
4485            WriteLog ("4>  %ld     %ld %ld  %ld < %ld\n", alglens, sum_lens, sum_lens + seqfrom, seqfrom + dsp_len, bsp_len);
4486 **/
4487            if ( j == 0 && (seqfrom + dsp_len) < bsp_len) {
4488               alglens = ReadBufferFromSep (spp, buffertmp[j], (Int4)(seqfrom + dsp_len), (Int4)bsp_len, alglens);
4489            }
4490            else {
4491               for (k = alglens; k < step; k++)
4492                  if (buffertmp[j][k] != '-') buffertmp[j][k] = '-';
4493            }
4494            SeqPortFree(spp);
4495            if (j>0)
4496               ValNodeAddInt (&vnpstrand, 1, (Int4)strandtmp);
4497            if (strand_nonull)
4498               strandp++;
4499        }
4500        str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4501        bufferout [curnseq] = str;
4502        for (j=0; j < curnseq + 1; j++) {
4503            for (k=0; k<step; k++)
4504               bufferout [j][k] = '-';
4505        }
4506        k1=k2=k3=letter=0;
4507        while ( k1 < step && k2 < step && k3 < step)
4508        {
4509            c1 = bufferin[0][k1];
4510            c2 = buffertmp[0][k2];
4511            if ((c1 != '-' && c2 != '-') || (c1 == '-' && c2 == '-')) {
4512               if (c1 != '-' && c2 != '-' && c1 != c2) {
4513 /**
4514                  WriteLog ("ERROR %d [%c]  %d [%c] %c%c%c%c%c  %c%c%c%c%c", k1, c1, k2, c2, c1,bufferin[0][k1+1], bufferin[0][k1+2], bufferin[0][k1+3], bufferin[0][k1+4], c2,buffertmp[0][k2+1], buffertmp[0][k2+2], buffertmp[0][k2+3], buffertmp[0][k2+4]);
4515 **/
4516                  break;
4517               }
4518               for (j = 0; j < curnseq; ++j) {
4519                  bufferout[j][k3] = bufferin[j][k1];
4520               }
4521               for (j = 1; j < 2; ++j) {
4522                  bufferout[curnseq + j -1][k3] = buffertmp[j][k2];
4523               }
4524               if (bufferout[0][k3] !='-') letter++;
4525 /***
4526               WriteLog ("%d %d>%c ", (int)k3, (int)letter, c2);
4527               for (j=0; j<curnseq + 1; j++) WriteLog ("%c", bufferout[j][k3]);
4528               WriteLog ("\n");
4529 ***/
4530               k1++;
4531               k2++;
4532               k3++;
4533            }
4534            else if (c1 == '-' && c2 != '-') {
4535               for (j = 0; j < curnseq; ++j) {
4536                  bufferout[j][k3] = bufferin[j][k1];
4537               }
4538               if (bufferout[0][k3] !='-') letter++;
4539 /***
4540               WriteLog ("%d %d*%c ", (int)k3, (int)letter, c2);
4541               for (j=0; j<curnseq + 1; j++) WriteLog ("%c", bufferout[j][k3]);
4542               WriteLog ("\n");
4543 ***/
4544               k1++;
4545               k3++;
4546            }
4547            else if (c1 != '-' && c2 == '-') {
4548               for (j = 1; j < 2; ++j) {
4549                  bufferout[curnseq + j -1][k3] = buffertmp[j][k2];
4550               }
4551               if (bufferout[0][k3] !='-') letter++;
4552 /***
4553               WriteLog ("%d %d<%c ", (int)k3, (int)letter,c2);
4554               for (j=0; j<curnseq + 1; j++) WriteLog ("%c", bufferout[j][k3]);
4555               WriteLog ("\n");
4556 ***/
4557               k2++;
4558               k3++;
4559            }
4560        }
4561        if (k3 > 0) {
4562            if (k3 > max_lens)
4563               max_lens = k3;
4564            for (j=0; j < curnseq ; j++) {
4565               for (k=0; k<step; k++) {
4566                  bufferin[j][k] = bufferout[j][k];
4567               }
4568            }
4569            str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4570            bufferin[j] = str;
4571            for (k=0; k<step; k++) {
4572               bufferin[j][k] = bufferout[j][k] ;
4573            }
4574            curnseq++;
4575        }
4576       }
4577      }
4578   }
4579   MemFree (buffertmp[0]);
4580   MemFree (buffertmp[1]);
4581   for (j=0; j < curnseq ; j++)
4582      MemFree (bufferout[j]);
4583 
4584   vnp = NULL;
4585   for (j=0; j < curnseq; j++) {
4586      ValNodeAddPointer (&vnp, 0, (Pointer)(bufferin[j]));
4587   }
4588   sap = LocalAlignToSeqAnnotDimn (vnp, siphead, vnpfrom, curnseq, max_lens, vnpstrand, TRUE);
4589   sipcur = siphead;
4590   while (sipcur) {
4591      siptmp = sipcur->next;
4592      SeqIdFree (sipcur);
4593      sipcur = siptmp;
4594   }
4595   ValNodeFreeData (vnp);
4596   ValNodeFree (vnpfrom);
4597   ValNodeFree (vnpstrand);
4598   return sap;
4599 }
4600 
PairSeqAlign2MultiSeqAlign(SeqAlignPtr salp)4601 NLM_EXTERN SeqAlignPtr PairSeqAlign2MultiSeqAlign (SeqAlignPtr salp)
4602 {
4603   SeqAnnotPtr sap;
4604   SeqAlignPtr salptmp = NULL;
4605 
4606   if (salp!=NULL)
4607   {
4608    if (is_dim2seqalign (salp))
4609    {
4610      sap = multseqalign_from_pairseqalign (salp);
4611      if (sap!=NULL && sap->type==2)
4612      {
4613         salptmp = (SeqAlignPtr) sap->data;
4614         sap->data = NULL;
4615         SeqAnnotFree (sap);
4616 /******************************** DO NOT FREE Original SeqAlign anymore
4617         salp = SeqAlignSetFree (salp);
4618 ****************************************/
4619      }
4620    }
4621   }
4622   return salptmp;
4623 }
4624 
4625 
MultSeqAlignFromPairSeqAlign(Pointer data)4626 NLM_EXTERN Int2 LIBCALLBACK MultSeqAlignFromPairSeqAlign (Pointer data)
4627 
4628 {
4629   OMProcControlPtr  ompcp;
4630   SeqAnnotPtr       sap;
4631   SeqAlignPtr       salp;
4632   Uint2             entityID;
4633 
4634   ompcp = (OMProcControlPtr) data;
4635   if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
4636   switch (ompcp->input_itemtype) {
4637     case OBJ_SEQALIGN :
4638       break;
4639     case 0 :
4640       return OM_MSG_RET_ERROR;
4641     default :
4642       return OM_MSG_RET_ERROR;
4643   }
4644   if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
4645   salp = (SeqAlignPtr)ompcp->input_data;
4646   if (salp == NULL) return OM_MSG_RET_ERROR;
4647   if (is_dim2seqalign (salp))  {
4648      SortSeqAlign (&salp);
4649      sap = multseqalign_from_pairseqalign (salp);
4650   } else {
4651      sap = SeqAnnotNew ();
4652      sap->type = 2;
4653      sap->data = (Pointer) salp;
4654   }
4655   if (sap != NULL) {
4656     entityID = ObjMgrRegister (OBJ_SEQANNOT, (Pointer) sap);
4657     if (entityID > 0) {
4658       ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
4659     }
4660   }
4661   return OM_MSG_RET_DONE;
4662 }
4663 
multseqalign_to_pairseqalign(SeqAlignPtr salp)4664 NLM_EXTERN SeqAlignPtr LIBCALL multseqalign_to_pairseqalign (SeqAlignPtr salp)
4665 {
4666   SeqAlignPtr salpnew =NULL,
4667               salphead=NULL,
4668               nextsalp;
4669   DenseSegPtr dsp =NULL,
4670               newdsp;
4671   SeqIdPtr    sip;
4672   Int4Ptr     dsplen;
4673   Int4        k, j;
4674   Int2        salp_dim,
4675               dim;
4676 
4677   if (salp==NULL)
4678      return NULL;
4679   if (salp->segtype != 2)
4680      return salp;
4681   if (is_dim2seqalign (salp))
4682      return salp;
4683   salp_dim = salp->dim;
4684   dsp = (DenseSegPtr) salp->segs;
4685   salp->segs = NULL;
4686   for (dim=0; dim<salp_dim-1; dim++)
4687   {
4688      newdsp = DenseSegNew ();
4689      if ( newdsp != NULL )
4690      {
4691            newdsp->dim = 2;
4692            newdsp->ids = SeqIdDup (dsp->ids);
4693            for (sip=dsp->ids, j=0; sip!=NULL && j<dim+1; sip=sip->next, j++)
4694               continue;
4695            newdsp->ids->next = SeqIdDup (sip);
4696            newdsp->numseg = dsp->numseg;
4697 
4698            newdsp->lens  =(Int4Ptr)MemNew((size_t)((newdsp->numseg + 2) * sizeof (Int4)));
4699            for (j = 0, dsplen = dsp->lens; j < newdsp->numseg; j++, dsplen++ )
4700               newdsp->lens [j] = *dsplen;
4701 
4702            newdsp->starts=(Int4Ptr)MemNew((size_t)((2 *newdsp->numseg +4) * sizeof (Int4)));
4703            for (k = 0; k < newdsp->numseg; k++) {
4704               for (j=0; j<dsp->dim; j++) {
4705                  if (j==0)
4706                     newdsp->starts[k*newdsp->dim]=dsp->starts[k*dsp->dim];
4707                  if (j==dim+1)
4708                     newdsp->starts[k*newdsp->dim+1]=dsp->starts[k*dsp->dim+j];
4709               }
4710            }
4711 
4712            if (dsp->strands != NULL) {
4713               newdsp->strands =(Uint1Ptr)MemNew((size_t)((2 *newdsp->numseg+4)*sizeof (Uint1)));
4714               for (k = 0; k < newdsp->numseg; k++) {
4715                  for (j=0; j<dsp->dim; j++) {
4716                     if (j==0)
4717                        newdsp->starts[k*newdsp->dim]=dsp->starts[k*dsp->dim];
4718                     if (j==dim+1)
4719                        newdsp->starts[k*newdsp->dim+1]=dsp->starts[k*dsp->dim+j];
4720                  }
4721               }
4722            }
4723         if (salphead==NULL) {
4724            salp->type = 3;
4725            salp->segtype = 2;
4726            salp->dim = 2;
4727            salp->score = NULL;
4728            salp->segs = (Pointer) newdsp;
4729            salphead= salp;
4730            nextsalp = salp;
4731         }
4732         else {
4733            salpnew = SeqAlignNew();
4734            if (salpnew) {
4735               salpnew->type = 3;
4736               salpnew->segtype = 2;
4737               salpnew->dim = 2;
4738               salpnew->score = NULL;
4739               salpnew->segs = (Pointer) newdsp;
4740               salpnew = Compact (salpnew);
4741               nextsalp->next = salpnew;
4742               nextsalp = salpnew;
4743            }
4744         }
4745      }
4746   }
4747   return salphead;
4748 }
4749 
4750