1 /* salsap.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: salsap.c
27 *
28 * Author: Colombe Chappey
29 *
30 * Version Creation Date: 1/27/96
31 *
32 * $Revision: 6.16 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 #include <salsap.h>
45 #include <salpedit.h>
46 #include <salutil.h>
47 #include <sqnutils.h>
48 #include <edutil.h>
49 #include <objmgr.h>
50 #include <seqmgr.h>
51
SeqAlignIDList(SeqAlignPtr salp)52 NLM_EXTERN SeqIdPtr LIBCALL SeqAlignIDList (SeqAlignPtr salp)
53 {
54 return SeqIdPtrFromSeqAlign(salp);
55 }
56
57 /* Used in salsa.c once */
FindSeqIdinSeqAlign(SeqAlignPtr salphead,SeqIdPtr sip)58 NLM_EXTERN Boolean LIBCALL FindSeqIdinSeqAlign (SeqAlignPtr salphead, SeqIdPtr sip)
59 {
60 return SeqAlignFindSeqId (salphead, sip);
61 }
62
63 /*
64 static Boolean LIBCALL SeqIdInSeqLocList (SeqIdPtr sip, ValNodePtr vnp)
65 {
66 return SeqIdInSeqLocList(sip, vnp);
67 }
68 */
is_dim1seqalign(SeqAlignPtr salp)69 NLM_EXTERN Boolean LIBCALL is_dim1seqalign (SeqAlignPtr salp)
70 {
71 if (salp->dim == 1) {
72 return TRUE;
73 }
74 return FALSE;
75 }
76
is_dim2seqalign(SeqAlignPtr salp)77 NLM_EXTERN Boolean LIBCALL is_dim2seqalign (SeqAlignPtr salp)
78 {
79 SeqAlignPtr nextsalp;
80 DenseSegPtr dsp;
81 SeqIdPtr sip;
82
83 if (salp->dim == 2 && salp->next != NULL) {
84 if (salp->segtype == 2) {
85 dsp = (DenseSegPtr) salp->segs;
86 sip = dsp->ids;
87 nextsalp = salp->next;
88 while (nextsalp) {
89 if (nextsalp->dim != 2)
90 break;
91 if (nextsalp->segtype != 2)
92 break;
93 dsp = (DenseSegPtr) nextsalp->segs;
94 if (! SeqIdForSameBioseq (sip, dsp->ids))
95 break;
96 nextsalp = nextsalp->next;
97 }
98 if (nextsalp == NULL)
99 return TRUE;
100 }
101 }
102 return FALSE;
103 }
104
is_fasta_seqalign(SeqAlignPtr salp)105 NLM_EXTERN Boolean LIBCALL is_fasta_seqalign (SeqAlignPtr salp)
106 {
107 DenseSegPtr dsp;
108 Int4Ptr startp;
109 Boolean gap;
110 Int4 k;
111 Int2 j;
112 Boolean one_gap = FALSE;
113
114 if (salp->dim > 2) {
115 if (salp->segtype == 2) {
116 dsp = (DenseSegPtr) salp->segs;
117 for (j=0; j<dsp->dim; j++) {
118 gap=FALSE;
119 for (k=0; k<dsp->numseg; k++) {
120 startp=dsp->starts;
121 if (startp[dsp->dim*k + j] < 0) {
122 gap = TRUE;
123 one_gap = TRUE;
124 }
125 else if (gap)
126 return FALSE;
127 }
128 }
129 }
130 }
131 else
132 return FALSE;
133 if (!one_gap)
134 return FALSE;
135 return TRUE;
136 }
137
138
is_salp_in_sap(SeqAnnotPtr sap,Uint1 choice)139 NLM_EXTERN SeqAlignPtr LIBCALL is_salp_in_sap (SeqAnnotPtr sap, Uint1 choice)
140 {
141 SeqAlignPtr salp = NULL;
142
143 if (sap != NULL) {
144 for (; sap!= NULL; sap=sap->next) {
145 if (sap->type == choice) {
146 salp = (SeqAlignPtr) sap->data;
147 return salp;
148 }
149 }
150 }
151 return NULL;
152 }
153
154
155
156
157 /*
158 Function to Check if at least one of the SeqIds of a SeqLocList
159 is the same as one of the SeqIds in the SeqAlign
160 */
161
SeqAlignSeqLocComp(SeqAlignPtr salphead,ValNodePtr vnp)162 NLM_EXTERN Boolean LIBCALL SeqAlignSeqLocComp (SeqAlignPtr salphead, ValNodePtr vnp)
163 {
164 SeqAlignPtr salp;
165 ValNodePtr tmp;
166 SeqLocPtr slp;
167 SeqIdPtr sip,
168 tmpsip;
169
170 for (tmp=vnp; tmp!=NULL; tmp=tmp->next)
171 {
172 slp= (SeqLocPtr)tmp->data.ptrvalue;
173 if (slp)
174 {
175 sip=SeqLocId(slp);
176 for (salp = salphead; salp!= NULL; salp=salp->next)
177 {
178 tmpsip =SeqIdPtrFromSeqAlign (salp);
179 if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 0)
180 {
181 return TRUE;
182 }
183 }
184 }
185 }
186 return FALSE;
187 }
188
189 /*
190 SeqAlignBestScore : Find The Last Score of type "score"
191 In the SeqAlign: Does not traverse Chain.
192 */
SeqAlignBestScore(SeqAlignPtr salp)193 NLM_EXTERN Int4 LIBCALL SeqAlignBestScore (SeqAlignPtr salp)
194 {
195 ScorePtr score;
196 ObjectIdPtr oip;
197 Int4 val = 0;
198
199 if (salp!=NULL) {
200 if (salp->segtype == 2) {
201 for (score = (ScorePtr) salp->score; score!=NULL; score=score->next)
202 {
203 oip = (ObjectIdPtr) score->id;
204 if (oip != NULL) {
205 if (StringStr(oip->str, "score")!=NULL) {
206 if (score->choice == 1) {
207 val = (Int4)score->value.intvalue;
208 }
209 }
210 }
211 }
212 }
213 }
214 return val;
215 }
216
SeqLocMixFromSeqAlign(SeqAlignPtr salp,SeqIdPtr sip)217 NLM_EXTERN SeqLocPtr LIBCALL SeqLocMixFromSeqAlign (SeqAlignPtr salp, SeqIdPtr sip)
218 {
219 DenseSegPtr dsp;
220 SeqLocPtr headslp = NULL,
221 slp = NULL,
222 tmp;
223 SeqIdPtr siptmp;
224 Int4Ptr lens, start,
225 start2;
226 Int4 salp_start, salp_stop;
227 Int2 offset, j;
228
229 if (salp == NULL)
230 return NULL;
231 if (salp->segtype != 2)
232 return NULL;
233 dsp = (DenseSegPtr) salp->segs;
234 if (dsp == NULL)
235 return NULL;
236 offset = 0;
237 if (sip == NULL)
238 sip = dsp->ids;
239 else {
240 sip = SeqIdFindBest (sip, 0);
241 for (siptmp=dsp->ids; siptmp!=NULL; siptmp=siptmp->next, offset++) {
242 if (SeqIdForSameBioseq (sip,siptmp))
243 break;
244 }
245 if (siptmp == NULL)
246 return NULL;
247 }
248 start=dsp->starts + offset;
249 if (offset==0)
250 start2 = start+1;
251 else
252 start2 = dsp->starts;
253 lens=dsp->lens;
254 for (j=0; j<dsp->numseg; j++) {
255 if (*start > -1)
256 break;
257 start +=dsp->dim;
258 start2+=dsp->dim;
259 lens++;
260 }
261 for (; j<dsp->numseg; j++, lens++) {
262 if (*start > -1 && *start2 > -1) {
263 salp_start = *start;
264 salp_stop = salp_start + *lens -1;
265 slp = SeqLocIntNew (salp_start, salp_stop, Seq_strand_plus, sip);
266 if (headslp == NULL) {
267 headslp = slp;
268 }
269 else if (headslp->choice == SEQLOC_MIX) {
270 tmp = (ValNodePtr)(headslp->data.ptrvalue);
271 while (tmp->next != NULL)
272 tmp = tmp->next;
273 tmp->next = slp;
274 }
275 else {
276 tmp = ValNodeNew(NULL);
277 tmp->choice = SEQLOC_MIX;
278 tmp->data.ptrvalue = (Pointer)headslp;
279 headslp = tmp;
280 tmp = (ValNodePtr)(headslp->data.ptrvalue);
281 tmp->next = slp;
282 }
283 }
284 start +=dsp->dim;
285 start2+=dsp->dim;
286 }
287 return headslp;
288 }
289
290
SeqLocListOfBioseqsFromSeqAlign(SeqAlignPtr salp)291 NLM_EXTERN ValNodePtr LIBCALL SeqLocListOfBioseqsFromSeqAlign (SeqAlignPtr salp)
292 {
293 SeqAlignPtr salptmp;
294 BioseqPtr bsp;
295 ValNodePtr vnp = NULL;
296 DenseSegPtr dsp;
297 DenseDiagPtr ddp;
298 SeqIdPtr sip,
299 siptmp;
300 SeqLocPtr slp;
301
302 if (salp==NULL)
303 return NULL;
304 for (salptmp = salp; salptmp!=NULL; salptmp=salptmp->next)
305 {
306 sip = NULL;
307 if (salptmp->segtype == 1)
308 {
309 ddp = (DenseDiagPtr) salptmp->segs;
310 sip = ddp->id;
311 }
312 else if (salptmp->segtype == 2)
313 {
314 dsp = (DenseSegPtr) salptmp->segs;
315 sip = dsp->ids;
316 }
317 if (sip != NULL)
318 {
319 for (; sip != NULL; sip = sip->next)
320 {
321 siptmp = SeqIdDup (sip);
322 if (vnp==NULL || (vnp!=NULL && !SeqIdInSeqLocList (siptmp, vnp)))
323 {
324 bsp = BioseqLockById (siptmp);
325 if (bsp!= NULL)
326 {
327 slp = SeqLocIntNew (0, bsp->length-1, Seq_strand_plus, siptmp);
328 if (slp != NULL)
329 {
330 ValNodeAddPointer (&vnp, 0, (Pointer) slp);
331 }
332 BioseqUnlock (bsp);
333 }
334 }
335 }
336 }
337 }
338 return vnp;
339 }
340
341
MySeqIdSetFree(SeqIdPtr sip)342 static SeqIdPtr LIBCALL MySeqIdSetFree (SeqIdPtr sip)
343 {
344 SeqIdPtr next;
345
346 while (sip != NULL)
347 {
348 next = sip->next;
349 sip->next = NULL;
350 if (sip!=NULL) {
351 if (sip->choice > 0 && sip->choice < 30) SeqIdFree(sip);
352 }
353 else break;
354 sip = next;
355 }
356 return NULL;
357 }
358
CompSeqAlignFree(SeqAlignPtr salp)359 NLM_EXTERN SeqAlignPtr LIBCALL CompSeqAlignFree (SeqAlignPtr salp)
360 {
361 CompSegPtr csp;
362
363 csp = (CompSegPtr) salp->segs;
364 if (csp->ids != NULL) MySeqIdSetFree (csp->ids);
365 csp->ids=NULL;
366 if (csp->from != NULL) MemFree (csp->from);
367 csp->from=NULL;
368 if (csp->lens != NULL) MemFree (csp->lens);
369 csp->lens=NULL;
370 if (csp->starts!= NULL) MemFree (csp->starts);
371 csp->starts=NULL;
372 MemFree ( csp);
373 salp->segs = NULL;
374 SeqAlignSetFree (salp);
375 return NULL;
376 }
377
CompSeqAnnotFree(SeqAnnotPtr sap)378 NLM_EXTERN SeqAnnotPtr LIBCALL CompSeqAnnotFree (SeqAnnotPtr sap)
379 {
380 SeqAlignPtr salp;
381
382 if ( sap == NULL ) return NULL;
383 if ( sap->type != 2 ) {
384 ErrPostEx(SEV_WARNING, 0, 0, "fail in CompSeqAnnotFree [1] %d", (int)sap->type);
385 return NULL;
386 }
387 if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
388 ErrPostEx(SEV_WARNING, 0, 0, "fail in CompSeqAnnotFree [2]");
389 return NULL;
390 }
391 if ( salp->segtype == COMPSEG )
392 CompSeqAlignFree (salp);
393 else if ( salp->segtype == 2 )
394 SeqAnnotFree (sap);
395 else {
396 ErrPostEx(SEV_WARNING, 0, 0, "fail in CompSeqAnnotFree [3] %d", salp->segtype);
397 return NULL;
398 }
399 sap->data = NULL;
400 return NULL;
401 }
402
SeqAlignBoolSegCpy(SeqAnnotPtr sap,Int4 from,Int4 to)403 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAlignBoolSegCpy (SeqAnnotPtr sap, Int4 from, Int4 to)
404 {
405 SeqAnnotPtr sapnew =NULL;
406 SeqAlignPtr salp, salpnew =NULL, salpre =NULL;
407 CompSegPtr csp =NULL;
408 CompSegPtr newcsp =NULL;
409 BoolPtr cspstart =NULL;
410 Int4Ptr csplens =NULL, newcsplens =NULL;
411 Int4Ptr cspfrom =NULL, newcspfrom =NULL;
412 Int4 j, k, nbseq, numseg, newnumseg;
413 Int4 sumlens, offset;
414
415 if ( sap == NULL ) return NULL;
416 if ( sap->type != 2 ) {
417 ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [1]");
418 return NULL;
419 }
420 if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
421 ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [1-2]");
422 return NULL;
423 }
424 if ( salp->segtype != COMPSEG ) {
425 ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [2]");
426 return NULL;
427 }
428 sapnew = SeqAnnotNew ();
429 if (sapnew == NULL) {
430 return NULL;
431 }
432 sapnew->type = 2;
433
434 while ( salp != NULL )
435 {
436 salpnew = SeqAlignNew ();
437 if ( salpnew == NULL ) {
438 SeqAnnotFree (sapnew);
439 return NULL;
440 }
441 salpnew->type = 3;
442 salpnew->segtype = COMPSEG;
443 salpnew->dim = salp->dim;
444 csp = (CompSegPtr) salp->segs;
445 newcsp = (CompSegPtr) MemNew ( sizeof (CompSeg) );
446 if ( newcsp == NULL ) {
447 ErrPostEx(SEV_WARNING, 0, 0, "fail in SeqAlignBoolSegCpy [4]");
448 SeqAnnotFree (sapnew);
449 return NULL;
450 }
451 salpnew->segs = (Pointer) newcsp;
452 nbseq = newcsp->dim = csp->dim;
453 newcsp->ids = SeqIdDupList (csp->ids);
454 numseg = newcsp->numseg = csp->numseg;
455
456 /* copy the first position (from) + sum of length */
457 newcsp->from = (Int4Ptr) MemNew ((size_t) ((nbseq +2) * sizeof (Int4)));
458 if ( newcsp->from == NULL ) {
459 SeqAnnotFree (sapnew);
460 return NULL;
461 }
462 for (j = 0; j < nbseq; j++) {
463 offset = -1;
464 sumlens = 0;
465 csplens = csp->lens;
466 cspstart = csp->starts;
467 cspstart += j;
468 for (k = 0; k < numseg ; k++, csplens++, cspstart +=nbseq) {
469 if ( from >= sumlens && from < sumlens + *csplens ) {
470 if ( (Boolean)(*cspstart) ) offset =(sumlens +*csplens -from);
471 break;
472 }
473 if ( (Boolean)(*cspstart) ) sumlens += (*csplens);
474 }
475 if ( offset < 0 )
476 for (; k < numseg && offset < 0; k++) {
477 if ( (Boolean)(*cspstart) ) offset = 0;
478 }
479 newcsp->from [j] = csp->from [j] +offset;
480 }
481 /* count number of new segments between from and to */
482 sumlens =0;
483 newnumseg =0;
484 cspstart = csp->starts;
485 for (k=0; k<numseg; k++, sumlens += (Int4)(*csplens), csplens++, cspstart += (Int4)(nbseq))
486 {
487 if ( from >= sumlens && from < sumlens + *csplens ) break;
488 }
489 for (; k<numseg; k++, sumlens += *csplens, csplens++, cspstart +=nbseq)
490 {
491 newnumseg++;
492 if ( to >= sumlens && to < sumlens + *csplens ) break;
493 }
494 /* copy segment lengths within from and to */
495 newcsp->lens =(Int4Ptr) MemNew ((size_t) ((newnumseg+2) * sizeof (Int4)));
496 if ( newcsp->lens == NULL ) {
497 SeqAnnotFree (sapnew);
498 return NULL;
499 }
500 sumlens =0;
501 csplens = csp->lens;
502 cspstart = csp->starts;
503 newcsplens = newcsp->lens;
504 for (k=0; k<numseg; k++, sumlens += *csplens, csplens++, cspstart +=nbseq)
505 {
506 if ( from >= sumlens && from < sumlens + *csplens ) break;
507 }
508 *newcsplens = sumlens +*csplens -from;
509 newcsplens++;
510 csplens++;
511 k++;
512 for (; k<numseg; k++, sumlens += *csplens, csplens++, cspstart +=nbseq)
513 {
514 *newcsplens = *csplens;
515 if ( to >= sumlens && to < sumlens + *csplens ) break;
516 newcsplens++;
517 }
518 *newcsplens = sumlens -to;
519 newcsp->starts =(BoolPtr) MemNew((size_t)((nbseq *newnumseg +2) * sizeof(Boolean)));
520 if ( newcsp->starts == NULL ) {
521 SeqAnnotFree (sapnew);
522 return NULL;
523 }
524 for (j = 0; j < nbseq; j++)
525 {
526 sumlens = 0;
527 csplens = csp->lens;
528 cspfrom = csp->from;
529 cspfrom += j;
530 newcspfrom = newcsp->from;
531 newcspfrom += j;
532 for(k=0;k<numseg; k++,sumlens +=*csplens, csplens++, cspstart +=nbseq)
533 {
534 if ( from >= sumlens && from < sumlens + *csplens ) break;
535 }
536 for (; k<numseg; k++, sumlens +=*csplens, csplens++, cspstart +=nbseq)
537 {
538 *newcsplens = *csplens;
539 if ( to >= sumlens && to < sumlens + *csplens ) break;
540 }
541 }
542 /*
543 csp->strands
544 csp->scores
545 */
546 if ( salpre == NULL ) sapnew->data = (Pointer) salpnew;
547 else salpre->next = salpnew;
548 salpre = salpnew;
549 salp = salp->next;
550 }
551 return sapnew;
552 }
553
SeqAlignDenseSegToBoolSeg(SeqAlignPtr salp)554 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDenseSegToBoolSeg (SeqAlignPtr salp)
555 {
556 SeqAlignPtr salpnew =NULL;
557 DenseSegPtr dsp =NULL;
558 Int4Ptr dspstarts =NULL, dspstarts2 = NULL;
559 Int4Ptr dsplens =NULL;
560 Uint1Ptr strandp;
561 Uint1 strand;
562 CompSegPtr csp =NULL;
563 Int4 j;
564 Int4 nbseq, numseg;
565
566 salpnew = SeqAlignNew ();
567 if ( salpnew == NULL ) {
568 return NULL;
569 }
570 salpnew->type = 3;
571 salpnew->segtype = COMPSEG;
572 salpnew->dim = salp->dim;
573
574 salpnew->score = salp->score;
575 salp->score = NULL;
576
577 dsp = (DenseSegPtr) salp->segs;
578 csp = (CompSegPtr) MemNew ( sizeof (CompSeg) );
579 if ( csp == NULL ) {
580 return NULL;
581 }
582 salpnew->segs = (Pointer) csp;
583 csp->dim = dsp->dim;
584 nbseq = dsp->dim;
585
586 csp->ids = SeqIdDupBestList (dsp->ids);
587 csp->numseg = dsp->numseg;
588 numseg = dsp->numseg;
589
590 if (dsp->strands != NULL) {
591 strandp = dsp->strands;
592 csp->strands=(Uint1Ptr)MemNew((size_t)((nbseq*numseg+4)*sizeof (Uint1)));
593 for (j=0; j<nbseq*numseg; j++, strandp++)
594 csp->strands[j] = *strandp;
595 }
596 csp->lens = (Int4Ptr) MemNew ((size_t) ((numseg+2) * sizeof (Int4)));
597 if ( csp->lens == NULL ) {
598 return NULL;
599 }
600 for (j = 0, dsplens = dsp->lens; j < numseg ; j++, dsplens++ )
601 csp->lens [j] = *dsplens;
602 csp->from = (Int4Ptr) MemNew ((size_t) ((nbseq +2) * sizeof (Int4)));
603 if ( csp->from == NULL ) {
604 return NULL;
605 }
606 for (j = 0; j < nbseq +2; j++)
607 csp->from [j] = 0;
608 dspstarts=dsp->starts;
609 strandp = dsp->strands;
610 if (strandp!=NULL)
611 strand = *strandp;
612 else
613 strand = Seq_strand_unknown;
614 for (j = 0; j < nbseq ; j++, dspstarts++)
615 {
616 dsplens = dsp->lens;
617 if (*dspstarts < 0) {
618 for(dspstarts2=dspstarts;dspstarts2!=NULL;dspstarts2+=nbseq) {
619 if (*dspstarts2 > -1)
620 break;
621 dsplens++;
622 }
623 if (dspstarts2!=NULL) {
624 if (strand==Seq_strand_minus)
625 csp->from [j] = *dspstarts2 + *dsplens;
626 else
627 csp->from [j] = *dspstarts2;
628 }
629 }
630 else {
631 if (strand==Seq_strand_minus)
632 csp->from [j] = *dspstarts + *dsplens;
633 else
634 csp->from [j] = *dspstarts;
635 }
636 if (strandp!=NULL) {
637 strandp++;
638 strand = *strandp;
639 } else strand = Seq_strand_unknown;
640 }
641 csp->starts=(BoolPtr) MemNew((size_t)((nbseq *numseg +2) * sizeof(Boolean)));
642 if ( csp->starts == NULL ) {
643 return NULL;
644 }
645 for (j =0, dspstarts =dsp->starts; j <nbseq*numseg ; j++, dspstarts++ ) {
646 csp->starts [j] = (Boolean) (*dspstarts >= 0);
647 }
648 return salpnew;
649 }
650
SeqAnnotDenseSegToBoolSeg(SeqAnnotPtr sap)651 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAnnotDenseSegToBoolSeg (SeqAnnotPtr sap)
652 {
653 SeqAnnotPtr sapnew =NULL;
654 SeqAlignPtr salp=NULL, salpnew=NULL, salpre=NULL;
655
656 if ( sap == NULL ) return NULL;
657 if ( sap->type != 2 ) {
658 return NULL;
659 }
660 if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
661 return NULL;
662 }
663 if ( salp->segtype == COMPSEG ) return sap;
664 if ( salp->segtype != 2 ) {
665 return NULL;
666 }
667 sapnew = SeqAnnotNew ();
668 if (sapnew == NULL) {
669 return NULL;
670 }
671 sapnew->type = 2;
672 while ( salp != NULL )
673 {
674 salpnew = SeqAlignDenseSegToBoolSeg (salp);
675 if ( salpre == NULL ) sapnew->data = (Pointer) salpnew;
676 else salpre->next = salpnew;
677 salpre = salpnew;
678 salp = salp->next;
679 }
680 return sapnew;
681 }
682
SeqAlignBoolSegToDenseSeg(SeqAlignPtr salp)683 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignBoolSegToDenseSeg (SeqAlignPtr salp)
684 {
685 SeqAlignPtr salpnew =NULL;
686 DenseSegPtr dsp =NULL;
687 CompSegPtr csp =NULL;
688 BoolPtr cspstarts =NULL;
689 Int4Ptr cspfrom =NULL;
690 Int4Ptr csplens =NULL;
691 Uint1Ptr strandp;
692 Uint1 strand;
693 Int4Ptr dspstarts =NULL;
694 Int4 sumlens, k;
695 Int4 j, nbseq, numseg;
696
697 salpnew = SeqAlignNew ();
698 if (salpnew != NULL) {
699 salpnew->type = 3;
700 salpnew->segtype = 2;
701 salpnew->dim = salp->dim;
702
703 salpnew->score = salp->score;
704 salp->score = NULL;
705
706 csp = (CompSegPtr) salp->segs;
707 dsp = DenseSegNew ();
708 if ( dsp == NULL ) {
709 return NULL;
710 }
711 salpnew->segs = (Pointer) dsp;
712 nbseq = csp->dim;
713 dsp->dim = csp->dim;
714 dsp->ids = SeqIdDupList (csp->ids);
715 dsp->numseg = csp->numseg;
716 numseg = csp->numseg;
717
718 dsp->lens =(Int4Ptr)MemNew((size_t)((numseg + 2) * sizeof (Int4)));
719 for (j = 0, csplens = csp->lens; j < numseg ; j++, csplens++ )
720 dsp->lens [j] = *csplens;
721 if (csp->strands != NULL) {
722 strandp=csp->strands;
723 dsp->strands =(Uint1Ptr)MemNew((size_t)((nbseq*numseg+4)*sizeof (Uint1)));
724 for (j=0; j<nbseq*numseg; j++, strandp++)
725 dsp->strands[j] = *strandp;
726 }
727 dsp->starts=(Int4Ptr)MemNew((size_t)((nbseq *numseg +4) * sizeof (Int4)));
728 for (k = 0; k < nbseq *numseg +4; k++)
729 dsp->starts[k] = -1;
730 cspstarts = csp->starts;
731 dspstarts = dsp->starts;
732 strandp = csp->strands;
733 if (strandp!=NULL)
734 strand = *strandp;
735 else
736 strand = Seq_strand_unknown;
737 for (j = 0, cspfrom = csp->from; j < nbseq ; j++, cspfrom++)
738 {
739 csplens = csp->lens;
740 sumlens = 0;
741 for (k = 0; k < numseg; k++, csplens++) {
742 if ( (Boolean)(cspstarts [nbseq *k +j]) ) {
743 if (strand == Seq_strand_minus) {
744 sumlens += *csplens;
745 dspstarts [nbseq *k +j] = *cspfrom - sumlens;
746 }
747 else {
748 dspstarts [nbseq *k +j] = *cspfrom + sumlens;
749 sumlens += *csplens;
750 }
751 }
752 }
753 if (strandp!=NULL) {
754 strandp++;
755 strand = *strandp;
756 }
757 else
758 strand = Seq_strand_unknown;
759 }
760 }
761 return salpnew;
762 }
763
SeqAnnotBoolSegToDenseSeg(SeqAnnotPtr sap)764 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAnnotBoolSegToDenseSeg (SeqAnnotPtr sap)
765 {
766 SeqAnnotPtr sapnew =NULL;
767 SeqAlignPtr salp, salpnew =NULL, salpre =NULL;
768
769 if ( sap == NULL ) return NULL;
770 if ( sap->type != 2 ) {
771 return NULL;
772 }
773 if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
774 return NULL;
775 }
776 if ( salp->segtype == 2 ) return sap;
777 if ( salp->segtype != COMPSEG ) {
778 return NULL;
779 }
780 sapnew = SeqAnnotNew ();
781 if (sapnew == NULL) return NULL;
782 sapnew->type = 2;
783
784 while ( salp != NULL )
785 {
786 salpnew = SeqAlignBoolSegToDenseSeg (salp);
787 if ( salpre == NULL ) sapnew->data = (Pointer) salpnew;
788 else salpre->next = salpnew;
789 salpre = salpnew;
790 salp = salp->next;
791 }
792 return sapnew;
793 }
794
795
CompSeqAlignPrint(SeqAlignPtr salp)796 NLM_EXTERN void LIBCALL CompSeqAlignPrint (SeqAlignPtr salp)
797 {
798 CompSegPtr csp =NULL;
799 SeqIdPtr sip;
800 BoolPtr startp;
801 Int4 j, k;
802 FILE *fout;
803 Char strLog[128];
804
805 csp = (CompSegPtr) salp->segs;
806 if (csp!=NULL) {
807 fout = FileOpen ("LogFile", "w");
808 if (fout!=NULL) {
809 fprintf (fout, "\n");
810 for (j=0, sip=csp->ids; sip!=NULL; sip=sip->next, j++)
811 {
812 SeqIdWrite (sip, strLog, PRINTID_FASTA_LONG, 120);
813 fprintf (fout, "%d %s \n", (int)(j+1), strLog);
814 }
815 fprintf (fout, "\n");
816 for (j=0; j<csp->dim; j++)
817 fprintf (fout, " %d ", (int)csp->from[j]);
818 fprintf (fout, "\n");
819 startp = csp->starts;
820 for (j = 0; j < csp->numseg; j++) {
821 fprintf (fout, "%3d lg %6ld ", (int)j, (long)csp->lens[j]);
822 for (k = 0; k < csp->dim; k++) {
823 fprintf (fout, " %d", (int)*startp);
824 startp++;
825 }
826 /*
827 for ( k = 0; k < csp->dim; k++) {
828 if (csp->strands!=NULL)
829 fprintf (fout, " %d", (int)csp->strands[(Int4)csp->dim*k+j]);
830 }
831 */
832 fprintf (fout, "\n");
833 }
834 fprintf (fout, "\n");
835 FileClose (fout);
836 }
837 }
838 }
839
build_seqalign_fromstart(Int2 dim,Int2 numseg,SeqIdPtr sip,Int4Ptr starts,Int4Ptr lens)840 NLM_EXTERN SeqAlignPtr LIBCALL build_seqalign_fromstart (Int2 dim, Int2 numseg, SeqIdPtr sip, Int4Ptr starts, Int4Ptr lens)
841 {
842 SeqAlignPtr salp;
843 DenseSegPtr dsp;
844 SeqIdPtr next;
845
846 salp = SeqAlignNew ();
847 if (salp != NULL) {
848 salp->type = 3;
849 salp->segtype = 2;
850 salp->dim = dim;
851 dsp = DenseSegNew ();
852 if (dsp != NULL) {
853 salp->segs = (Pointer) dsp;
854 dsp->dim = dim;
855 dsp->ids = sip;
856 dsp->numseg = numseg;
857 dsp->starts = starts;
858 dsp->lens = lens;
859 return salp;
860 }
861 }
862 MemFree (starts);
863 while (sip != NULL) {
864 next = sip->next;
865 sip->next = NULL;
866 SeqIdFree (sip);
867 sip = next;
868 }
869 return NULL;
870 }
871
DenseDiagDup(DenseDiagPtr ddp)872 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagDup (DenseDiagPtr ddp)
873 {
874 DenseDiagPtr ddp2;
875 ddp2 = (DenseDiagPtr) AsnIoMemCopy ((Pointer) ddp, (AsnReadFunc) DenseDiagAsnRead, (AsnWriteFunc) DenseDiagAsnWrite);
876 return ddp2;
877 }
878
SeqAlignDupRegion(SeqAlignPtr salp,Int2 to_numseg,Int4 subseg,Boolean first_part)879 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDupRegion (SeqAlignPtr salp, Int2 to_numseg, Int4 subseg, Boolean first_part)
880 {
881 SeqAlignPtr newsalp = NULL;
882 DenseSegPtr dsp = NULL, newdsp = NULL;
883 Int4Ptr dspstart = NULL;
884 Int4Ptr dsplen = NULL;
885 Int4Ptr newstart = NULL;
886 Int4Ptr newlens = NULL;
887 Int2 newseg;
888 Int2 n;
889 Int2 k;
890 Int4 j;
891
892 dsp = (DenseSegPtr) salp->segs;
893 newsalp = SeqAlignNew ();
894 if ( newsalp == NULL ) return NULL;
895 newsalp->type = salp->type;
896 newsalp->segtype = salp->segtype;
897 newsalp->dim = salp->dim;
898 newdsp = DenseSegNew ();
899 if ( newdsp == NULL ) return NULL;
900 newsalp->segs = (Pointer) newdsp;
901 n = newdsp->dim = dsp->dim;
902 newdsp->ids = SeqIdDupList (dsp->ids);
903 if ( to_numseg > dsp->numseg ) to_numseg = 0;
904 if ( to_numseg == 0 )
905 newseg = dsp->numseg;
906 else if ( first_part )
907 newseg = to_numseg;
908 else if ( !first_part )
909 newseg = dsp->numseg - to_numseg;
910 newdsp->numseg = newseg;
911 newdsp->starts = (Int4Ptr) MemNew ((size_t) ((n*newseg+4) * sizeof (Int4)));
912 if ( newdsp->starts == NULL ) return NULL;
913 for (j = 0; j < n * newseg + 4; j++) newdsp->starts [j] = -1;
914 newdsp->lens = (Int4Ptr) MemNew ((size_t) ((newseg + 2) * sizeof (Int4)));
915 if ( newdsp->lens == NULL ) return NULL;
916 for (j = 0; j < newseg + 2; j++) newdsp->lens [j] = 0;
917 newstart = newdsp->starts;
918 newlens = newdsp->lens;
919 dspstart = dsp->starts;
920 dsplen = dsp->lens;
921 if ( first_part )
922 {
923 for ( k = 0; k < newseg - 1; k++ )
924 {
925 for (j = 0; j < n; j++) newstart [j] = dspstart[j];
926 *newlens = *dsplen;
927 newstart += n;
928 newlens++;
929 dspstart += n;
930 dsplen++;
931 }
932 for (j = 0; j < n; j++) newstart [j] = dspstart[j];
933 *newlens = subseg;
934 }
935 else {
936 for ( k = 0; k < to_numseg-1; k++ )
937 {
938 dspstart += n;
939 dsplen++;
940 }
941 for (j = 0; j < n; j++)
942 if ( dspstart[j] > -1 )
943 newstart [j] = dspstart[j] + subseg;
944 else
945 newstart [j] = dspstart[j];
946 *newlens = *dsplen - subseg;
947 newstart += n;
948 newlens++;
949 dspstart += n;
950 dsplen++;
951 k++;
952 for ( ; k < to_numseg + newseg; k++ )
953 {
954 for (j = 0; j < n; j++) newstart [j] = dspstart[j];
955 *newlens = *dsplen;
956 newstart += n;
957 newlens++;
958 dspstart += n;
959 dsplen++;
960 }
961 }
962 return newsalp;
963 }
964
SeqAlignDupAdd(SeqAlignPtr * salp_head,SeqAlignPtr salp,Int2 to_numseg,Int4 subseg,Boolean first_part)965 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDupAdd (SeqAlignPtr *salp_head, SeqAlignPtr salp, Int2 to_numseg, Int4 subseg, Boolean first_part)
966 {
967 SeqAlignPtr salp_tmp, salp_copy;
968
969 salp_copy = SeqAlignDupRegion (salp, to_numseg, subseg, first_part);
970 if ( salp_copy == NULL )
971 return *salp_head;
972 if ( (salp_tmp = *salp_head) != NULL )
973 {
974 while ( salp_tmp->next != NULL )
975 salp_tmp = salp_tmp->next;
976 salp_tmp->next = salp_copy;
977 }
978 else *salp_head = salp_copy;
979 return *salp_head;
980 }
981
SeqAlignEndExtend(SeqAlignPtr sap,Int4 start1,Int4 start2,Int4 stop1,Int4 stop2,Int4 x1,Int4 y1,Int4 x2,Int4 y2,Uint1 strand1,Uint1 strand2)982 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignEndExtend (SeqAlignPtr sap, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2, Uint1 strand1, Uint1 strand2)
983 {
984 DenseSegPtr dsp;
985 Int4Ptr n_starts, n_lens;
986 Uint1Ptr n_strands;
987 Int4 index1,
988 minlen = 0;
989 Int2 n;
990 Int2 offset=0;
991 Boolean is_strands = FALSE;
992
993 if (sap==NULL)
994 return NULL;
995 if (start1==x1 && start2==y1 && stop1==x2 && stop2==y2)
996 return sap;
997 if (sap->segtype == 2)
998 {
999 dsp = (DenseSegPtr) sap->segs;
1000 is_strands = (Boolean) (dsp->strands!=NULL);
1001 n = dsp->numseg;
1002 n_starts = (Int4Ptr) MemNew((2*(n+2)+4)*sizeof(Int4));
1003 n_lens = (Int4Ptr) MemNew((n+4)*sizeof(Int4));
1004 if (is_strands)
1005 n_strands = (Uint1Ptr)MemNew((2*(n+2)+4)*sizeof(Uint1));
1006 if (x1 > start1 && y1 > start2) {
1007 minlen = MIN ((x1-start1), (y1-start2));
1008 n = 0;
1009 }
1010 if ((x1 > start1 || y1 > start2) && ((x1-start1) != (y1-start2))) {
1011 if (x1 > start1 && (x1-start1) > minlen) {
1012 n_starts[0] = 0;
1013 n_starts[1] = -1;
1014 n_lens[0] = (x1 - start1) - minlen;
1015 if (is_strands) {
1016 n_strands[0] = strand1;
1017 n_strands[1] = strand2;
1018 }
1019 }
1020 else if (y1 > start2 && (y1-start2) > minlen) {
1021 n_starts[0] = -1;
1022 n_starts[1] = 0;
1023 n_lens[0] = (y1 - start2) - minlen;
1024 if (is_strands) {
1025 n_strands[0] = strand1;
1026 n_strands[1] = strand2;
1027 }
1028 }
1029 offset += 1;
1030 }
1031 if (minlen > 0) {
1032 n += offset;
1033 n_starts[2*n] = x1 - minlen;
1034 n_starts[2*n+1] = y1 - minlen;
1035 n_lens[n] = minlen;
1036 if (is_strands) {
1037 n_strands[2*n] = strand1;
1038 n_strands[2*n+1] = strand2;
1039 }
1040 offset += 1;
1041 }
1042 n = dsp->numseg;
1043 for (index1=0; index1<n; index1++) {
1044 n_starts [2*(index1+offset)] = dsp->starts [2*index1];
1045 if (is_strands)
1046 n_strands[2*(index1+offset)] = dsp->strands[2*index1];
1047 n_starts [2*(index1+offset)+1]=dsp->starts [2*index1+1];
1048 if (is_strands)
1049 n_strands[2*(index1+offset)+1]=dsp->strands[2*index1+1];
1050 n_lens[index1+offset] = dsp->lens[index1];
1051 }
1052 if (x2 < stop1 && y2 < stop2) {
1053 n += offset;
1054 minlen = MIN ((stop1-x2), (stop2-y2));
1055 n_starts[2*n] = x2 + 1;
1056 n_starts[2*n+1] = y2 +1;
1057 n_lens[n] = minlen;
1058 if (is_strands) {
1059 n_strands[2*n] = strand1;
1060 n_strands[2*n+1] = strand2;
1061 }
1062 x2 += minlen;
1063 y2 += minlen;
1064 offset += 1;
1065 }
1066 if (x2 < stop1 || y2 < stop2) {
1067 n += offset;
1068 if (x2 < stop1) {
1069 n_starts[2*n] = x2 +1;
1070 n_starts[2*n+1] = -1;
1071 n_lens[n] = stop1 - x2;
1072 if (is_strands) {
1073 n_strands[2*n] = strand1;
1074 n_strands[2*n+1] = strand2;
1075 }
1076 }
1077 else if (y2 < stop2) {
1078 n_starts[2*n] = -1;
1079 n_starts[2*n+1] = y2 +1;
1080 n_lens[n] = stop2 - y2;
1081 if (is_strands) {
1082 n_strands[2*n] = strand1;
1083 n_strands[2*n+1] = strand2;
1084 }
1085 }
1086 offset += 1;
1087 }
1088 dsp->numseg = n+1;
1089 MemFree(dsp->starts);
1090 if (is_strands)
1091 MemFree(dsp->strands);
1092 MemFree(dsp->lens);
1093 dsp->starts = n_starts;
1094 dsp->lens = n_lens;
1095 if (is_strands)
1096 dsp->strands = n_strands;
1097 }
1098 return sap;
1099 }
1100
get_pos_from_salp(SeqAlignPtr salp,Int4 pos,Int4 PNTR offset,Int4Ptr PNTR startp,Int4Ptr PNTR lenp,Int4 PNTR numseg)1101 NLM_EXTERN Boolean LIBCALL get_pos_from_salp (SeqAlignPtr salp, Int4 pos, Int4 PNTR offset,
1102 Int4Ptr PNTR startp, Int4Ptr PNTR lenp, Int4 PNTR numseg)
1103 {
1104 DenseSegPtr dsp;
1105 Int4Ptr dspstart,
1106 dsplens;
1107 Int4 numsg = 0,
1108 sumlens = 0;
1109
1110 Boolean seen=FALSE;
1111
1112 if (salp == NULL)
1113 return FALSE;
1114 if (salp->segtype == 2)
1115 {
1116 dsp = (DenseSegPtr) salp->segs;
1117 dspstart = dsp->starts;
1118 dsplens = dsp->lens;
1119 while ( !seen && numsg < dsp->numseg )
1120 {
1121 numsg++;
1122 if (pos >= sumlens && pos < sumlens +*dsplens )
1123 {
1124 *offset = pos - sumlens;
1125 seen = TRUE;
1126 }
1127 else if (numsg == dsp->numseg)
1128 {
1129 break;
1130 }
1131 else
1132 {
1133 dspstart += dsp->dim;
1134 sumlens += *dsplens;
1135 dsplens++;
1136 }
1137 }
1138 if (seen) {
1139 *startp = dspstart;
1140 *lenp = dsplens;
1141 *numseg = numsg;
1142 }
1143 }
1144 return seen;
1145 }
1146
SeqAlignTrunc(SeqAlignPtr salp,Int4 from,Int4 to)1147 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignTrunc (SeqAlignPtr salp, Int4 from, Int4 to)
1148 {
1149 DenseSegPtr dsp;
1150 Int4Ptr startp,
1151 lenp,
1152 startmp;
1153 Int4 tot_length,
1154 numseg,
1155 offset,
1156 j;
1157 Boolean ble;
1158
1159 if (salp == NULL)
1160 return salp;
1161 tot_length = SeqAlignLength (salp);
1162 if (salp->segtype == 2)
1163 {
1164 dsp = (DenseSegPtr) salp->segs;
1165 if (to > 0 && to < tot_length)
1166 {
1167 ble = get_pos_from_salp (salp, to, &offset, &startp, &lenp, &numseg);
1168 if(ble)
1169 {
1170 if (numseg > 0)
1171 {
1172 dsp->numseg = numseg;
1173 }
1174 if (offset>-1 && offset+1 < *lenp)
1175 {
1176 *lenp = offset + 1;
1177 }
1178 }
1179 }
1180 if (from > 0 && from < tot_length)
1181 {
1182 ble = get_pos_from_salp (salp, from, &offset, &startp, &lenp, &numseg);
1183 if(ble)
1184 {
1185 if (numseg > 1)
1186 {
1187 dsp->numseg -= (numseg-1);
1188 dsp->starts = startp;
1189 dsp->lens = lenp;
1190 }
1191 if (offset > 0) {
1192 for (startmp=dsp->starts, j=0; j<dsp->dim; startmp++, j++)
1193 if (*startmp > -1)
1194 *startmp += offset+1;
1195 }
1196 *(dsp->lens) -= offset;
1197 }
1198 }
1199 }
1200 return salp;
1201 }
1202
SeqAlignTrunc2(SeqAlignPtr salp,Int4 from,Int4 to)1203 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignTrunc2 (SeqAlignPtr salp, Int4 from, Int4 to)
1204 {
1205 DenseSegPtr dsp;
1206 Int4Ptr int4p,
1207 int4lp;
1208 Int4 k;
1209 Int2 j;
1210
1211 if (salp == NULL)
1212 return NULL;
1213 if (salp->segtype == 2) {
1214 dsp = (DenseSegPtr) salp->segs;
1215 int4p = dsp->starts;
1216 int4lp = dsp->lens;
1217 for (j=0; j<dsp->dim;j++, int4p++)
1218 {
1219 if (SeqAlignStrand(salp, j)!=Seq_strand_minus)
1220 {
1221 if (from != 0) {
1222 if (*int4p > -1)
1223 *int4p += from;
1224 }
1225 }
1226 else if (SeqAlignStrand(salp, j)==Seq_strand_minus)
1227 {
1228 if (to != 0) {
1229 for (k=0; k<(dsp->numseg-1); k++)
1230 int4p++;
1231 if (*int4p > -1)
1232 *int4p -= to;
1233 }
1234 }
1235 }
1236 if (from != 0)
1237 *int4lp -= from;
1238 if (to != 0) {
1239 for (k=0; k<(dsp->numseg-1); k++)
1240 int4lp++;
1241 *int4lp += to;
1242 }
1243 }
1244 return salp;
1245 }
1246
SeqAlignMapOnFirstSeq(SeqAlignPtr salp)1247 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignMapOnFirstSeq (SeqAlignPtr salp)
1248 {
1249 SeqAlignPtr tmp;
1250 DenseSegPtr dsp;
1251 Int4Ptr startp;
1252 Int4Ptr lenp;
1253 Int4 j;
1254
1255 if (salp == NULL)
1256 return NULL;
1257 for (tmp=salp; tmp!=NULL; tmp=tmp->next) {
1258 if (tmp->segtype == 2) {
1259 dsp = (DenseSegPtr) tmp->segs;
1260 for (j=0, startp = dsp->starts; j<(dsp->numseg-1); j++)
1261 startp += dsp->dim;
1262 while (*startp < 0) {
1263 startp -= dsp->dim;
1264 dsp->numseg--;
1265 }
1266 startp = dsp->starts;
1267 lenp = dsp->lens;
1268 while (*startp < 0) {
1269 startp += dsp->dim;
1270 lenp++;
1271 dsp->numseg--;
1272 }
1273 dsp->starts = startp;
1274 dsp->lens = lenp;
1275 }
1276 }
1277 return salp;
1278 }
1279
SeqAlignMerge(SeqAlignPtr salp1,SeqAlignPtr salp2,Boolean return_salp)1280 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignMerge (SeqAlignPtr salp1, SeqAlignPtr salp2, Boolean return_salp)
1281 {
1282 SeqAlignPtr salp_toreturn;
1283 DenseSegPtr dsp1= NULL,
1284 dsp2 = NULL;
1285 Int4Ptr dspstarts= NULL;
1286 Int4Ptr dsptmp= NULL;
1287 Int4Ptr dsplens= NULL;
1288 Uint1Ptr dspstrands = NULL;
1289 Uint1Ptr dsptmp1;
1290 Int4 j, k, n, newseg;
1291 Uint1 st1, st2;
1292 Boolean merge_segment = FALSE;
1293
1294 if (salp1==NULL) {
1295 if (salp2 == NULL)
1296 return NULL;
1297 else
1298 return salp2;
1299 }
1300 else if (salp2 == NULL)
1301 return salp1;
1302
1303 if (return_salp)
1304 salp_toreturn = salp2;
1305 else
1306 salp_toreturn = salp1;
1307
1308 if ( salp1->segtype != 2 || salp2->segtype != 2 ) {
1309 return salp_toreturn;
1310 }
1311 if (return_salp) {
1312 dsp1 = (DenseSegPtr) salp1->segs;
1313 dsp2 = (DenseSegPtr) salp2->segs;
1314 }
1315 else {
1316 dsp1 = (DenseSegPtr) salp2->segs;
1317 dsp2 = (DenseSegPtr) salp1->segs;
1318 }
1319 if ( dsp1==NULL || dsp2==NULL || dsp1->dim != dsp2->dim) {
1320 return salp_toreturn;
1321 }
1322 n = dsp1->dim;
1323 newseg = dsp1->numseg + dsp2->numseg;
1324 dspstarts = (Int4Ptr) MemNew ((size_t) ((n*newseg+4) * sizeof (Int4)));
1325 if ( dspstarts == NULL ) {
1326 return salp_toreturn;
1327 }
1328 st1 = SeqAlignStrand (salp1, 0);
1329 st2 = SeqAlignStrand (salp1, 1);
1330 dsptmp = dsp1->starts;
1331 for (j = 0; j < n * dsp1->numseg; j++, dsptmp++) {
1332 dspstarts [j] = *dsptmp;
1333 }
1334 dsptmp = dsp2->starts;
1335 if (n==2) {
1336 if (dspstarts [j-2]> -1 && dsptmp[0] > -1) {
1337 if (dspstarts [j-1] > -1 && dsptmp[1] > -1)
1338 merge_segment = TRUE;
1339 }
1340 }
1341 if (merge_segment) {
1342 if (st1==Seq_strand_minus)
1343 dspstarts [j-2] = dsptmp[0];
1344 if (st2==Seq_strand_minus)
1345 dspstarts [j-1] = dsptmp[1];
1346 newseg--;
1347 k=n;
1348 dsptmp += n;
1349 }
1350 else
1351 k = 0;
1352 for (; k < n * dsp2->numseg; k++, j++, dsptmp++) {
1353 dspstarts [j] = *dsptmp;
1354 }
1355 dsplens = (Int4Ptr) MemNew ((size_t) ((newseg + 2) * sizeof (Int4)));
1356 if ( dsplens == NULL ) {
1357 return salp_toreturn;
1358 }
1359 dsptmp = dsp1->lens;
1360 for (j = 0; j < dsp1->numseg; j++, dsptmp++)
1361 dsplens [j] = *dsptmp;
1362 dsptmp = dsp2->lens;
1363 if (merge_segment) {
1364 dsplens [j-1] += *dsptmp;
1365 k=1;
1366 dsptmp ++;
1367 }
1368 else
1369 k = 0;
1370 for (; k < dsp2->numseg; k++, j++, dsptmp++)
1371 dsplens [j] = *dsptmp;
1372 if (dsp1->strands && dsp2->strands)
1373 {
1374 dspstrands = (Uint1Ptr) MemNew ((size_t) ((n*newseg+4) * sizeof (Uint1)));
1375 if ( dspstrands != NULL ) {
1376 dsptmp1=dsp1->strands;
1377 for (j = 0; j < n * dsp1->numseg; j++, dsptmp1++) {
1378 dspstrands [j] = *dsptmp1;
1379 }
1380 dsptmp1 = dsp2->strands;
1381 if (merge_segment) {
1382 k=n;
1383 dsptmp1 += n;
1384 }
1385 else
1386 k = 0;
1387 for (; k < n * dsp2->numseg; k++, j++, dsptmp1++) {
1388 dspstrands [j] = *dsptmp1;
1389 }
1390 }
1391 }
1392 dsp1 = (DenseSegPtr) salp1->segs;
1393 dsp1->numseg = newseg;
1394 MemFree(dsp1->starts);
1395 dsp1->starts = dspstarts;
1396 MemFree(dsp1->lens);
1397 dsp1->lens = dsplens;
1398 MemFree(dsp1->strands);
1399 dsp1->strands = dspstrands;
1400 return salp1;
1401 }
1402
SeqAnnotMerge(SeqAnnotPtr sap1,SeqAnnotPtr sap2,Boolean return_salp)1403 NLM_EXTERN SeqAnnotPtr LIBCALL SeqAnnotMerge (SeqAnnotPtr sap1, SeqAnnotPtr sap2, Boolean return_salp)
1404 {
1405 SeqAlignPtr salp1=NULL, salp2=NULL;
1406
1407 if ( sap1 == NULL ) return sap2;
1408 if ( sap2 == NULL ) return sap1;
1409 if ( sap1->type != 2 || sap2->type != 2 ) {
1410 return NULL;
1411 }
1412 salp1 = (SeqAlignPtr) sap1->data;
1413 salp2 = (SeqAlignPtr) sap2->data;
1414 if (return_salp)
1415 salp1 = SeqAlignMerge (salp1, salp2, TRUE);
1416 else
1417 salp1 = SeqAlignMerge (salp1, salp2, FALSE);
1418 return sap1;
1419 }
1420
SeqAlignAddSeg(SeqAlignPtr salp,Int4 pos1,Int4 pos2,Int4 len)1421 static SeqAlignPtr LIBCALL SeqAlignAddSeg (SeqAlignPtr salp, Int4 pos1, Int4 pos2, Int4 len)
1422 {
1423 DenseSegPtr dsp;
1424 Int4Ptr startp;
1425 Int4Ptr lenp;
1426 Uint1Ptr dspstrands,
1427 dsptmp1;
1428 Int4Ptr dsptmp;
1429 Int4 j;
1430
1431 dsp = (DenseSegPtr) salp->segs;
1432
1433 startp = (Int4Ptr) MemNew ((size_t) ((dsp->dim*dsp->numseg+4) * sizeof (Int4)));
1434 if ( startp == NULL ) {
1435 return NULL;
1436 }
1437 dsptmp = dsp->starts;
1438 for (j = 0; j < dsp->dim*dsp->numseg; j++, dsptmp++) {
1439 startp [j] = *dsptmp;
1440 }
1441 startp[j] = pos1;
1442 startp[j+1] = pos2;
1443 MemFree(dsp->starts);
1444 dsp->starts = startp;
1445
1446 lenp = (Int4Ptr) MemNew ((size_t) ((dsp->numseg + 2) * sizeof (Int4)));
1447 if ( lenp == NULL ) {
1448 return NULL;
1449 }
1450 dsptmp = dsp->lens;
1451 for (j = 0; j < dsp->numseg; j++, dsptmp++)
1452 lenp [j] = *dsptmp;
1453 lenp [j] = len;
1454 MemFree(dsp->lens);
1455 dsp->lens = lenp;
1456
1457 dspstrands = (Uint1Ptr) MemNew ((size_t) ((dsp->dim*dsp->numseg+4) * sizeof (Uint1)));
1458 if ( dspstrands == NULL ) {
1459 return NULL;
1460 }
1461 dsptmp1=dsp->strands;
1462 for (j = 0; j < dsp->dim * dsp->numseg; j++, dsptmp1++) {
1463 dspstrands [j] = *dsptmp1;
1464 }
1465 dspstrands [j] = Seq_strand_unknown;
1466 dspstrands [j+1] = Seq_strand_unknown;
1467 MemFree(dsp->strands);
1468 dsp->strands = dspstrands;
1469
1470 dsp->numseg += 1;
1471 return salp;
1472 }
1473
shrinksap5(SeqAlignPtr salp,Int4 offset)1474 static SeqAlignPtr LIBCALL shrinksap5 (SeqAlignPtr salp, Int4 offset)
1475 {
1476 DenseSegPtr dsp;
1477 Int4Ptr dsptmp;
1478 Int4 j;
1479
1480 dsp = (DenseSegPtr) salp->segs;
1481 dsptmp = dsp->starts;
1482 for (j = 0; j < dsp->dim; j++)
1483 dsptmp[j] += offset;
1484 dsptmp = dsp->lens;
1485 *dsptmp -= offset;
1486 return salp;
1487 }
1488
shrinksap3(SeqAlignPtr salp,Int4 offset)1489 static SeqAlignPtr LIBCALL shrinksap3 (SeqAlignPtr salp, Int4 offset)
1490 {
1491 DenseSegPtr dsp;
1492 Int4Ptr dsptmp;
1493
1494 dsp = (DenseSegPtr) salp->segs;
1495 dsptmp = dsp->lens;
1496 *dsptmp -= offset;
1497 return salp;
1498 }
1499
SeqAlignExtend(SeqAlignPtr salp1,SeqAlignPtr salp2)1500 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignExtend (SeqAlignPtr salp1, SeqAlignPtr salp2)
1501 {
1502 SeqAlignPtr salptmp;
1503 SeqLocPtr slp1, slp2,
1504 slp1b, slp2b;
1505 ValNodePtr vnp1, vnp2,
1506 slpt1, slpt2;
1507 Int4 gaplen;
1508 Int4 gaplen1, stop, stopb;
1509 Int4 j;
1510 Uint1 choice;
1511 Boolean goOn=TRUE;
1512
1513 slpt1 = SeqLocListFromSeqAlign (salp1);
1514 while (goOn) {
1515 goOn = FALSE;
1516 for (salptmp = salp2; salptmp!=NULL; salptmp=salptmp->next) {
1517 slpt2 = SeqLocListFromSeqAlign (salptmp);
1518 choice = 0;
1519 for (vnp1=slpt1, vnp2=slpt2, j=0; vnp1!=NULL && vnp2!=NULL; j++) {
1520 slp1=(SeqLocPtr)vnp1->data.ptrvalue;
1521 slp2=(SeqLocPtr)vnp2->data.ptrvalue;
1522 gaplen1 = SeqLocStart(slp2) - SeqLocStop(slp1);
1523 if (gaplen1 ==0) {
1524 if (j==0) choice =1;
1525 else choice =2;
1526 break;
1527 }
1528 gaplen1 = SeqLocStart(slp1) - SeqLocStop(slp2);
1529 if (gaplen1 ==0 || gaplen1==1 || gaplen1==-1 || gaplen1==-2) {
1530 if (j==0) choice = 3;
1531 else choice =4;
1532 break;
1533 }
1534 vnp1=vnp1->next;
1535 vnp2=vnp2->next;
1536 }
1537 if (choice) {
1538 if (choice==1 || choice==3) {
1539 vnp1=vnp1->next;
1540 vnp2=vnp2->next;
1541 }
1542 else {
1543 vnp1=slpt1;
1544 vnp2=slpt2;
1545 }
1546 slp1b=(SeqLocPtr)vnp1->data.ptrvalue;
1547 slp2b=(SeqLocPtr)vnp2->data.ptrvalue;
1548 if (choice ==1 || choice == 2) {
1549 gaplen = SeqLocStart(slp2b) - SeqLocStop(slp1b) -1;
1550 if (gaplen >= 1) {
1551 stop = SeqLocStop(slp1);
1552 stopb = SeqLocStop(slp1b);
1553 if (gaplen1==0) {
1554 shrinksap5 (salptmp, 1);
1555 gaplen-=1;
1556 }
1557 if (gaplen > 1) {
1558 if (choice ==1) {
1559 SeqAlignAddSeg(salp1, -1, stopb+1, gaplen);
1560 }
1561 else {
1562 SeqAlignAddSeg(salp1,stop+1, -1, gaplen);
1563 }
1564 }
1565 salp1 = SeqAlignMerge (salp1, salptmp, TRUE);
1566 goOn=TRUE;
1567 }
1568 } else {
1569 gaplen = SeqLocStart(slp1b) - SeqLocStop(slp2b) -1;
1570 if (gaplen >= 1) {
1571 stop = SeqLocStop(slp2);
1572 stopb = SeqLocStop(slp2b);
1573 if (gaplen1==0 || gaplen1==-1 || gaplen1==-2) {
1574 gaplen1=ABS(gaplen1)+1;
1575 shrinksap3 (salptmp, gaplen1);
1576 stop-=gaplen1;
1577 stopb-=gaplen1;
1578 gaplen+=gaplen1;
1579 }
1580 if (gaplen > 1) {
1581 if (choice ==3) {
1582 SeqAlignAddSeg(salptmp,-1,stop+1, gaplen);
1583 }
1584 else {
1585 SeqAlignAddSeg(salptmp, stopb+1,-1, gaplen);
1586 }
1587 }
1588 salp1 = SeqAlignMerge (salp1, salptmp, FALSE);
1589 goOn=TRUE;
1590 }
1591 }
1592 if (goOn) {
1593 ValNodeFreeType (&slpt1, TypeSeqLoc);
1594 slpt1 = SeqLocListFromSeqAlign (salp1);
1595 break;
1596 }
1597 }
1598 ValNodeFreeType (&slpt2, TypeSeqLoc);
1599 }
1600 }
1601 ValNodeFreeType (&slp1, TypeSeqLoc);
1602 return salp1;
1603 }
1604
CleanStrandsSeqAlign(SeqAlignPtr salp)1605 NLM_EXTERN SeqAlignPtr LIBCALL CleanStrandsSeqAlign (SeqAlignPtr salp)
1606 {
1607 SeqAlignPtr salptmp;
1608 DenseSegPtr dsp;
1609 CompSegPtr csp;
1610 Int4Ptr lenp;
1611 Int4Ptr startp;
1612 Uint1Ptr strandp = NULL;
1613 Int4 numseg;
1614 Int2 dim;
1615 Int4 j, k, n, tmp;
1616 Boolean retourne = FALSE;
1617 Boolean delete_me = FALSE;
1618
1619 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
1620 {
1621 if (salptmp->segtype == COMPSEG)
1622 {
1623 csp=(CompSegPtr) salptmp->segs;
1624 strandp = csp->strands;
1625 numseg = csp->numseg;
1626 dim = csp->dim;
1627 lenp = csp->lens;
1628 }
1629 else if (salptmp->segtype == 2)
1630 {
1631 dsp = (DenseSegPtr) salptmp->segs;
1632 strandp = dsp->strands;
1633 numseg = dsp->numseg;
1634 dim = dsp->dim;
1635 lenp = dsp->lens;
1636 startp = dsp->starts;
1637 }
1638 if (strandp!=NULL) {
1639 for (j=0; j<numseg*dim; j++)
1640 if (*strandp==Seq_strand_minus)
1641 break;
1642 delete_me = (Boolean)(j==numseg*dim);
1643 }
1644 if (delete_me) {
1645 if (salptmp->segtype == COMPSEG)
1646 csp->strands = NULL;
1647 else if (salptmp->segtype == 2)
1648 dsp->strands = NULL;
1649 MemFree (strandp);
1650 return salp;
1651 }
1652 if (strandp!=NULL) {
1653 if (*strandp != Seq_strand_plus && *strandp!=Seq_strand_minus)
1654 for (j=0; j<numseg; j++, strandp+=dim) {
1655 if (*strandp == Seq_strand_plus || *strandp==Seq_strand_minus)
1656 break;
1657 }
1658 if (strandp!=NULL)
1659 retourne = (Boolean) (*strandp == Seq_strand_minus);
1660 }
1661 if (retourne) {
1662 for (j=0; j < numseg*dim && strandp!=NULL; j++, strandp++)
1663 {
1664 if (*strandp == Seq_strand_minus)
1665 *strandp = Seq_strand_plus;
1666 else if (*strandp == Seq_strand_plus)
1667 *strandp = Seq_strand_minus;
1668 }
1669 for (j=0, k=numseg-1; j<numseg/2; j++, k--) {
1670 tmp=lenp[j];
1671 lenp[j]=lenp[k];
1672 lenp[k]=tmp;
1673 }
1674 for (j=0, k=(dim*numseg-dim); j<(dim*numseg-1)/2; j+=dim, k-=dim) {
1675 for (n=0; n<dim; n++) {
1676 tmp=startp[j+n];
1677 startp[j+n]=startp[k+n];
1678 startp[k+n]=tmp;
1679 }
1680 }
1681 }
1682 }
1683 return salp;
1684 }
1685
1686 /*************************************************
1687 ***
1688 *** LocalAlignToSeqAnnotDimn
1689 ***
1690 *************************************************/
LocalAlignToSeqAnnotDimn(ValNodePtr seqvnp,SeqIdPtr seqsip,ValNodePtr fromp,Int2 nbseq,Int4 lens,ValNodePtr strands,Boolean trunc_emptyends)1691 NLM_EXTERN SeqAnnotPtr LIBCALL LocalAlignToSeqAnnotDimn (ValNodePtr seqvnp, SeqIdPtr seqsip, ValNodePtr fromp, Int2 nbseq, Int4 lens, ValNodePtr strands, Boolean trunc_emptyends)
1692 {
1693 SeqAnnotPtr sap;
1694 SeqAlignPtr salp;
1695 DenseSegPtr dsp;
1696 ValNodePtr vnp;
1697 CharPtr seqstr;
1698 Boolean PNTR filter;
1699 Boolean PNTR begun;
1700 Int4Ptr from;
1701 Int4Ptr lgseq;
1702 Int4Ptr start;
1703 Uint1Ptr strandp=NULL;
1704 Uint1 seq_strand;
1705 Int4 lenstmp, the_lens;
1706 Int4 j;
1707 Int2 seg, numseg;
1708 Int2 k;
1709 Boolean nogap;
1710
1711 for (k =0, vnp =seqvnp; k < nbseq && vnp !=NULL; k++, vnp =vnp->next)
1712 {
1713 seqstr = (CharPtr) vnp->data.ptrvalue;
1714 lenstmp = StringLen (seqstr);
1715 if (k==0)
1716 the_lens = lenstmp;
1717 else if (lenstmp != the_lens) {
1718 ErrPostEx (SEV_ERROR, 0, 0, "Sequence alignment of different lengths");
1719 return NULL;
1720 }
1721 }
1722 lens = the_lens;
1723
1724 /*****************************
1725 ** count number of segments **
1726 *****************************/
1727 filter= (BoolPtr)MemNew ((size_t) ((nbseq + 1) * sizeof (Boolean)));
1728 j = 0;
1729 for (k =0, vnp =seqvnp; k < nbseq && vnp !=NULL; k++, vnp =vnp->next)
1730 {
1731 seqstr = (CharPtr) vnp->data.ptrvalue;
1732 filter [k] = (Boolean)( seqstr [j] != '-' );
1733 }
1734 numseg = 1;
1735 while ( j < lens )
1736 {
1737 seg = 0;
1738 for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1739 {
1740 seqstr = (CharPtr) vnp->data.ptrvalue;
1741 nogap = (Boolean) ( seqstr [j] != '-' );
1742 if ( filter [k] != nogap ) {
1743 seg++;
1744 filter [k] = nogap;
1745 }
1746 }
1747 if ( seg > 0 ) ++numseg;
1748 j++;
1749 }
1750 /********************************************
1751 ** allocate SeqAnnot + SeqAlign + DenseSeg **
1752 *********************************************/
1753 sap = SeqAnnotNew ();
1754 if (sap == NULL) return NULL;
1755 sap->type = 2;
1756 salp = SeqAlignNew ();
1757 if (salp == NULL) return NULL;
1758 salp->type = 3;
1759 salp->segtype = 2;
1760 salp->dim = nbseq;
1761 sap->data = (Pointer) salp;
1762 dsp = DenseSegNew ();
1763 salp->segs = (Pointer) dsp;
1764 dsp->dim = nbseq;
1765 dsp->ids = SeqIdDupList (seqsip);
1766 dsp->numseg = numseg;
1767 dsp->starts = (Int4Ptr)MemNew((size_t)((nbseq *numseg + 4) * sizeof (Int4)));
1768 for (j = 0; j < nbseq *numseg + 4; j++)
1769 dsp->starts[j] = -1;
1770 dsp->lens = (Int4Ptr) MemNew ((size_t) ((numseg + 2) * sizeof (Int4)));
1771 for (k = 0; k < numseg + 2; k++)
1772 dsp->lens[k] = 0;
1773 if (strands)
1774 {
1775 dsp->strands = (Uint1Ptr)MemNew((size_t) ((numseg*nbseq+4)*sizeof (Uint1)));
1776 strandp = dsp->strands;
1777 for (j = 0; j < numseg*nbseq ; j++, strandp++)
1778 *(strandp) = Seq_strand_unknown;
1779 strandp = dsp->strands;
1780 for (k=0; k<numseg; k++) {
1781 for (j = 0, vnp=strands; j < nbseq && vnp!=NULL; j++, vnp=vnp->next) {
1782 *strandp = (Uint1)vnp->data.intvalue;
1783 strandp++;
1784 }
1785 }
1786 }
1787 j = 0;
1788 for (k =0, vnp =seqvnp; k < nbseq && vnp !=NULL; k++, vnp =vnp->next)
1789 {
1790 seqstr = (CharPtr) vnp->data.ptrvalue;
1791 filter [k] = (Boolean)( seqstr [j] != '-' );
1792 }
1793 lenstmp= 0;
1794 numseg = 0;
1795 while ( j < lens )
1796 {
1797 seg = 0;
1798 for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1799 {
1800 seqstr = (CharPtr) vnp->data.ptrvalue;
1801 nogap = (Boolean) ( seqstr [j] != '-' );
1802 if ( filter [k] != nogap ) {
1803 seg++;
1804 filter [k] = nogap;
1805 }
1806 }
1807 if ( seg > 0 ) {
1808 dsp->lens[numseg] = lenstmp;
1809 ++numseg;
1810 lenstmp = 0;
1811 }
1812 lenstmp++;
1813 j++;
1814 }
1815 if (lenstmp > 0)
1816 dsp->lens[numseg] = lenstmp;
1817 /******************************
1818 *** store the segments **
1819 ******************************/
1820 lgseq = (Int4Ptr)MemNew ((size_t) ((nbseq + 1) * sizeof (Int4)));
1821 from = (Int4Ptr)MemNew ((size_t) ((nbseq + 1) * sizeof (Int4)));
1822 begun = (BoolPtr)MemNew ((size_t) ((nbseq + 1) * sizeof (Boolean)));
1823 if ( lgseq == NULL || from == NULL || begun == NULL )
1824 return NULL;
1825 for (k = 0; k < nbseq; k++)
1826 lgseq[k] = 0;
1827 if (fromp == NULL)
1828 for (k = 0; k < nbseq; k++) from [k] = 0;
1829 else {
1830 for (k=0, vnp=fromp; k<nbseq && vnp!=NULL; vnp=vnp->next, k++)
1831 {
1832 from [k] = (Int4)vnp->data.intvalue;
1833 }
1834 }
1835 for (k = 0; k < nbseq; k++)
1836 begun[k] = FALSE;
1837 start = dsp->starts;
1838 strandp = dsp->strands;
1839 j = 0;
1840 for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1841 {
1842 seqstr = (CharPtr) vnp->data.ptrvalue;
1843 filter [k] = (Boolean)( seqstr [j] != '-' );
1844 if ( filter [k] ) {
1845 start [k] = lgseq [k] + from [k];
1846 begun [k] = TRUE;
1847 }
1848 }
1849 j++;
1850 numseg = 0;
1851 while ( j < lens )
1852 {
1853 seg = 0;
1854 for (k =0, vnp =seqvnp; k <nbseq && vnp !=NULL; k++, vnp =vnp->next)
1855 {
1856 seqstr = (CharPtr) vnp->data.ptrvalue;
1857 nogap = ( seqstr [j] != '-' );
1858 if ( nogap && begun [k] )
1859 ++lgseq [k];
1860 if ( filter [k] != nogap ) {
1861 seg++;
1862 filter[k] = nogap;
1863 begun [k] = TRUE;
1864 }
1865 }
1866 if ( seg > 0 ) {
1867 start += nbseq;
1868 for (k = 0; k < nbseq; k++) {
1869 if (filter[k]) {
1870 seq_strand = Seq_strand_unknown;
1871 if (strandp)
1872 seq_strand = strandp[k];
1873 if (seq_strand==Seq_strand_minus) {
1874 start[k] = from[k] - lgseq[k];
1875 } else {
1876 start[k] = from[k] + lgseq[k];
1877 }
1878 }
1879 }
1880 ++numseg;
1881 }
1882 j++;
1883 }
1884 MemFree (filter);
1885 MemFree (lgseq);
1886 MemFree (from);
1887 MemFree (begun);
1888 if (trunc_emptyends && salp!=NULL) {
1889 nogap = TRUE;
1890 dsp = (DenseSegPtr) salp->segs;
1891 while (nogap && dsp->numseg>0) {
1892 start = dsp->starts;
1893 start += (dsp->dim * (dsp->numseg-1) );
1894 for (j=0; j < dsp->dim; j++, start++) {
1895 if (*start > -1)
1896 break;
1897 }
1898 if (j == dsp->dim)
1899 dsp->numseg --;
1900 else nogap = FALSE;
1901 }
1902 }
1903 if (strandp)
1904 {
1905 for (k=0, vnp=seqvnp; k <nbseq && vnp!=NULL; k++, vnp=vnp->next) {
1906 if (dsp->strands[k] == Seq_strand_minus) {
1907 start = dsp->starts;
1908 start += k;
1909 lgseq = dsp->lens;
1910 for (j=0; j<dsp->numseg; j++, lgseq++) {
1911 if (*start > -1) {
1912 *start -= *lgseq;
1913 }
1914 start += nbseq;
1915 }
1916 }
1917 }
1918 }
1919 if (salp == NULL || dsp->numseg == 0) {
1920 sap = SeqAnnotFree (sap);
1921 return NULL;
1922 }
1923 if (salp == NULL) {
1924 sap = SeqAnnotFree (sap);
1925 return NULL;
1926 }
1927 return sap;
1928 }
1929
1930
1931
SeqAlignIDCache(SeqAlignPtr salphead,SeqIdPtr sip)1932 NLM_EXTERN Boolean LIBCALL SeqAlignIDCache (SeqAlignPtr salphead, SeqIdPtr sip)
1933 {
1934 SeqAlignPtr salp;
1935 SeqIdPtr tmpsip;
1936 Boolean ok = FALSE;
1937
1938 for (salp = salphead; salp!= NULL; salp=salp->next)
1939 {
1940 tmpsip =SeqIdPtrFromSeqAlign (salp);
1941 if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 1)
1942 {
1943 if (salp->segtype == 1 || salp->dim == 2)
1944 {
1945 salp->type = 0;
1946 ok = TRUE;
1947 }
1948 else if (salp->segtype == 2)
1949 {
1950 SeqAlignBioseqDeleteById (salp, sip);
1951 ok = TRUE;
1952 }
1953 }
1954 }
1955 return ok;
1956 }
1957
SeqAlignIDUncache(SeqAlignPtr salphead,SeqIdPtr sip)1958 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignIDUncache (SeqAlignPtr salphead, SeqIdPtr sip)
1959 {
1960 SeqAlignPtr salp;
1961 SeqIdPtr tmpsip;
1962
1963 for (salp = salphead; salp!= NULL; salp=salp->next)
1964 {
1965 tmpsip =SeqIdPtrFromSeqAlign (salp);
1966 if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 1)
1967 {
1968 if (salp->segtype == 1 || salp->dim == 2)
1969 {
1970 salp->type = 3;
1971 }
1972 }
1973 }
1974 return salphead;
1975 }
1976
SeqAlignIDUncacheAll(SeqAlignPtr salphead)1977 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignIDUncacheAll (SeqAlignPtr salphead)
1978 {
1979 SeqAlignPtr salp;
1980
1981
1982 for (salp = salphead; salp!= NULL; salp=salp->next)
1983 {
1984 if (salp->type < 1)
1985 if (salp->segtype == 1 || salp->dim == 2)
1986 salp->type = 3;
1987 }
1988 return salphead;
1989 }
1990
1991 /* Find the segment where pos occurs for the nth segment.
1992 * If pos is not the start of the segment, cut the alignment
1993 * segment in two, with one of the segments with pos as the new start.
1994 */
CutAlignmentSegment(SeqAlignPtr salp,Int4 nth,Int4 pos)1995 static void CutAlignmentSegment(SeqAlignPtr salp, Int4 nth, Int4 pos)
1996 {
1997 DenseSegPtr dsp;
1998 Int4 seg_num, seg_start, k, j, first_len, second_len;
1999 Int4Ptr new_starts, new_lens;
2000 Uint1Ptr new_strands = NULL;
2001
2002 if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL || nth < 1 || pos < 0) {
2003 return;
2004 }
2005
2006 dsp = (DenseSegPtr) salp->segs;
2007 if (nth > dsp->dim) return;
2008
2009 for (seg_num = 0; seg_num < dsp->numseg; seg_num++) {
2010 seg_start = dsp->starts[(seg_num * dsp->dim) + nth - 1];
2011 if (seg_start < 0) {
2012 continue;
2013 } else if (seg_start <= pos && seg_start + dsp->lens[seg_num] > pos) {
2014 /* found our segment */
2015 if (seg_start != pos) {
2016 /* only need to cut if pos is not already the start */
2017 new_starts = (Int4Ptr) MemNew (sizeof (Int4) * (dsp->numseg + 1) * dsp->dim);
2018 new_lens = (Int4Ptr) MemNew (sizeof(Int4) * (dsp->numseg + 1));
2019 if (dsp->strands != NULL) {
2020 new_strands = (Uint1Ptr) MemNew (sizeof (Uint1) * (dsp->numseg + 1) * dsp->dim);
2021 }
2022 /* copy before */
2023 for (k = 0; k < seg_num; k++) {
2024 for (j = 0; j < dsp->dim; j++) {
2025 new_starts[k * dsp->dim + j] = dsp->starts[k * dsp->dim + j];
2026 if (dsp->strands != NULL) {
2027 new_strands[k * dsp->dim + j] = dsp->strands[k * dsp->dim + j];
2028 }
2029 }
2030 new_lens[k] = dsp->lens[k];
2031 }
2032
2033 if (dsp->strands == NULL || dsp->strands[seg_num * dsp->dim + nth - 1] != Seq_strand_minus) {
2034 first_len = pos - seg_start;
2035 second_len = dsp->lens[seg_num] - first_len;
2036 } else {
2037 second_len = pos - seg_start;
2038 first_len = dsp->lens[seg_num] - second_len;
2039 }
2040
2041 /* split */
2042 for (j = 0; j < dsp->dim; j++) {
2043 /* set starts for split segments */
2044 if (dsp->starts[seg_num * dsp->dim + j] == -1) {
2045 new_starts[seg_num * dsp->dim + j] = -1;
2046 new_starts[(seg_num + 1) * dsp->dim + j] = -1;
2047 } else if (dsp->strands == NULL || dsp->strands[seg_num * dsp->dim + j] != Seq_strand_minus) {
2048 new_starts[seg_num * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j];
2049 new_starts[(seg_num + 1) * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j] + first_len;
2050 } else {
2051 new_starts[seg_num * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j] + second_len;
2052 new_starts[(seg_num + 1) * dsp->dim + j] = dsp->starts[seg_num * dsp->dim + j];
2053 }
2054 /* set strands for split segments */
2055 if (dsp->strands != NULL) {
2056 new_strands[seg_num * dsp->dim + j] = dsp->strands[seg_num * dsp->dim + j];
2057 new_strands[(seg_num + 1) * dsp->dim + j] = dsp->strands[seg_num * dsp->dim + j];
2058 }
2059 }
2060 /* set lens for split segments */
2061 new_lens[seg_num] = first_len;
2062 new_lens[seg_num + 1] = second_len;
2063
2064 /* copy after */
2065 for (k = seg_num + 1; k < dsp->numseg; k++) {
2066 for (j = 0; j < dsp->dim; j++) {
2067 new_starts[(k + 1) * dsp->dim + j] = dsp->starts[k * dsp->dim + j];
2068 if (dsp->strands != NULL) {
2069 new_strands[(k + 1) * dsp->dim + j] = dsp->strands[k * dsp->dim + j];
2070 }
2071 }
2072 new_lens[(k + 1)] = dsp->lens[k];
2073 }
2074
2075 /* replace in dsp */
2076 dsp->starts = MemFree (dsp->starts);
2077 dsp->starts = new_starts;
2078 dsp->lens = MemFree (dsp->lens);
2079 dsp->lens = new_lens;
2080 dsp->strands = MemFree (dsp->strands);
2081 dsp->strands = new_strands;
2082 /* can't fix scores */
2083 ScoreSetFree(dsp->scores);
2084 dsp->scores = NULL;
2085 /* now have one more segment */
2086 dsp->numseg++;
2087 }
2088 return;
2089 }
2090 }
2091 }
2092
2093 /* A section of a sequence in an alignment has been deleted. We need to adjust
2094 * the alignment to reflect the new positions of nucleotides that were previously
2095 * aligned to other sequences in the alignment.
2096 * We need to find the segments in which the start and stop of the deleted section
2097 * are found. If the start and stop do not fall on segment boundaries, we need to
2098 * cut the segment at those boundaries.
2099 * Then we change the alignment segments that cover the portion of the sequence that
2100 * was deleted to be gaps for that sequence.
2101 * Finally, we decrease the start values for this sequence for any segment that started
2102 * after the cut by the length of the cut.
2103 */
SeqAlignDeleteByLoc(SeqLocPtr slp,SeqAlignPtr salp)2104 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDeleteByLoc (SeqLocPtr slp, SeqAlignPtr salp)
2105 {
2106 SeqIdPtr sip;
2107 DenseSegPtr dsp;
2108 Int4 nth, pos, len, seg_index, seg_start;
2109 Int4 stop;
2110
2111 if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL) {
2112 return NULL;
2113 }
2114
2115 sip = SeqLocId(slp);
2116 dsp = (DenseSegPtr) salp->segs;
2117 if (dsp == NULL) {
2118 return salp;
2119 }
2120 nth = SeqIdOrderInBioseqIdList (sip, dsp->ids);
2121 if (nth == 0 || nth > dsp->dim) {
2122 return salp;
2123 }
2124
2125 pos = SeqLocStart (slp);
2126 stop = SeqLocStop (slp);
2127 if (stop < pos) {
2128 len = pos - stop + 1;
2129 pos = stop;
2130 } else {
2131 len = stop - pos + 1;
2132 }
2133
2134 CutAlignmentSegment(salp, nth, pos);
2135 CutAlignmentSegment(salp, nth, pos + len);
2136
2137 /* change deleted sequence segment start values */
2138 for (seg_index = 0; seg_index < dsp->numseg; seg_index++) {
2139 seg_start = dsp->starts[dsp->dim * seg_index + nth - 1];
2140 if (seg_start < 0) {
2141 /* gap, no change */
2142 continue;
2143 } else if (seg_start < pos) {
2144 /* before cut, no change */
2145 } else if (seg_start >= pos && seg_start + dsp->lens[seg_index] <= pos + len) {
2146 /* in gap */
2147 dsp->starts[dsp->dim * seg_index + nth - 1] = -1;
2148 } else {
2149 /* after cut */
2150 dsp->starts[dsp->dim * seg_index + nth - 1] -= len;
2151 }
2152 }
2153
2154 return salp;
2155 }
2156
AreDenseSegSegmentsValid(DenseSegPtr dsp,Int4 start,Int4 num)2157 static Boolean AreDenseSegSegmentsValid (DenseSegPtr dsp, Int4 start, Int4 num)
2158 {
2159 Int4 k, seg_num, next_pos;
2160
2161 if (dsp == NULL || start < 0 || num < 1)
2162 {
2163 return FALSE;
2164 }
2165
2166 for (k = 0; k < dsp->dim; k++)
2167 {
2168 if (dsp->strands == NULL || dsp->strands[k] == Seq_strand_plus)
2169 {
2170 if(dsp->starts [dsp->dim * start + k] > -1)
2171 {
2172 next_pos = dsp->starts [dsp->dim * start + k] + dsp->lens[start];
2173 }
2174 else
2175 {
2176 next_pos = -1;
2177 }
2178 for (seg_num = start + 1; seg_num - start < num; seg_num++)
2179 {
2180 if (dsp->starts[dsp->dim * seg_num + k] == -1)
2181 {
2182 continue;
2183 }
2184 if (next_pos != -1)
2185 {
2186 if (dsp->starts[dsp->dim * seg_num + k] != next_pos)
2187 {
2188 return FALSE;
2189 }
2190 }
2191 next_pos = dsp->starts[dsp->dim * seg_num + k] + dsp->lens[seg_num];
2192 }
2193 }
2194 else
2195 {
2196 if (dsp->starts [dsp->dim * (start + num - 1) + k] > -1)
2197 {
2198 next_pos = dsp->starts [dsp->dim * (start + num - 1) + k] + dsp->lens [start + num - 1];
2199 }
2200 else
2201 {
2202 next_pos = -1;
2203 }
2204 for (seg_num = start + num - 2; seg_num >= start; seg_num--)
2205 {
2206 if (dsp->starts [dsp->dim * seg_num + k] == -1)
2207 {
2208 continue;
2209 }
2210 if (next_pos != -1)
2211 {
2212 if (dsp->starts[dsp->dim * seg_num + k] != next_pos)
2213 {
2214 return FALSE;
2215 }
2216 }
2217 next_pos = dsp->starts[dsp->dim * seg_num + k] + dsp->lens[seg_num];
2218 }
2219 }
2220
2221 }
2222
2223 return TRUE;
2224 }
2225
2226
2227 static void
FillInPlusStrandInsertionSegmentA(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2228 FillInPlusStrandInsertionSegmentA
2229 (DenseSegPtr dsp_orig,
2230 DenseSegPtr dsp_new,
2231 Int4 insert_start,
2232 Int4 insert_len,
2233 Int4 insert_row,
2234 Int4 first_len,
2235 Int4 second_len,
2236 Int4 orig_segment,
2237 Int4Ptr this_seg)
2238 {
2239 Int4 k;
2240
2241 if (dsp_orig == NULL || dsp_new == NULL
2242 || insert_start < 0 || insert_len < 0
2243 || first_len < 0
2244 || second_len < 0
2245 || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2246 || this_seg == NULL
2247 || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2248 {
2249 return;
2250 }
2251
2252 if (first_len == 0)
2253 {
2254 return;
2255 }
2256
2257 for (k = 0; k < dsp_orig->dim; k++)
2258 {
2259 dsp_new->starts[(*this_seg) * dsp_new->dim + k]
2260 = dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2261 if (dsp_orig->strands != NULL)
2262 {
2263 dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2264 = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2265 if (dsp_orig->strands[orig_segment * dsp_orig->dim + k] == Seq_strand_minus
2266 && dsp_new->starts[(*this_seg) * dsp_new->dim + k] > -1)
2267 {
2268 dsp_new->starts[(*this_seg) * dsp_new->dim + k] += second_len;
2269 }
2270 }
2271 }
2272
2273 dsp_new->lens[*this_seg] = first_len;
2274 (*this_seg)++;
2275 }
2276
2277
2278 static void
FillInInsertionSegmentB(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2279 FillInInsertionSegmentB
2280 (DenseSegPtr dsp_orig,
2281 DenseSegPtr dsp_new,
2282 Int4 insert_start,
2283 Int4 insert_len,
2284 Int4 insert_row,
2285 Int4 first_len,
2286 Int4 second_len,
2287 Int4 orig_segment,
2288 Int4Ptr this_seg)
2289 {
2290 Int4 k;
2291
2292 if (dsp_orig == NULL || dsp_new == NULL
2293 || insert_start < 0 || insert_len < 0
2294 || first_len < 0
2295 || second_len < 0
2296 || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2297 || this_seg == NULL
2298 || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2299 {
2300 return;
2301 }
2302
2303 if (insert_len == 0)
2304 {
2305 return;
2306 }
2307
2308 for (k = 0; k < dsp_orig->dim; k++)
2309 {
2310 dsp_new->starts[(*this_seg) * dsp_new->dim + k] = -1;
2311 if (dsp_orig->strands != NULL)
2312 {
2313 dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2314 = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2315 }
2316 }
2317 dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] = insert_start;
2318
2319 dsp_new->lens[*this_seg] = insert_len;
2320 (*this_seg)++;
2321 }
2322
FillInPlusStrandInsertionSegmentC(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2323 static void FillInPlusStrandInsertionSegmentC
2324 (DenseSegPtr dsp_orig,
2325 DenseSegPtr dsp_new,
2326 Int4 insert_start,
2327 Int4 insert_len,
2328 Int4 insert_row,
2329 Int4 first_len,
2330 Int4 second_len,
2331 Int4 orig_segment,
2332 Int4Ptr this_seg)
2333 {
2334 Int4 k;
2335
2336 if (dsp_orig == NULL || dsp_new == NULL
2337 || insert_start < 0 || insert_len < 0
2338 || first_len < 0
2339 || second_len < 0
2340 || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2341 || this_seg == NULL
2342 || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2343 {
2344 return;
2345 }
2346
2347 if (second_len == 0)
2348 {
2349 return;
2350 }
2351
2352 for (k = 0; k < dsp_orig->dim; k++)
2353 {
2354 if ((dsp_orig->strands == NULL
2355 || dsp_orig->strands[orig_segment * dsp_new->dim + k] != Seq_strand_minus)
2356 && dsp_new->starts[(*this_seg) * dsp_new->dim + k] > -1)
2357 {
2358 dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
2359 dsp_orig->starts[orig_segment * dsp_orig->dim + k] + first_len;
2360 }
2361 else
2362 {
2363 dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
2364 dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2365 }
2366
2367 if (dsp_orig->strands != NULL)
2368 {
2369 dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2370 = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2371 }
2372 }
2373 dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] += insert_len;
2374
2375 dsp_new->lens[*this_seg] = second_len;
2376 (*this_seg)++;
2377 }
2378
2379
2380 static void
FillInMinusStrandInsertionSegmentA(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2381 FillInMinusStrandInsertionSegmentA
2382 (DenseSegPtr dsp_orig,
2383 DenseSegPtr dsp_new,
2384 Int4 insert_start,
2385 Int4 insert_len,
2386 Int4 insert_row,
2387 Int4 first_len,
2388 Int4 second_len,
2389 Int4 orig_segment,
2390 Int4Ptr this_seg)
2391 {
2392 Int4 k;
2393
2394 if (dsp_orig == NULL || dsp_new == NULL
2395 || insert_start < 0 || insert_len < 0
2396 || first_len < 0
2397 || second_len < 0
2398 || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2399 || this_seg == NULL
2400 || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2401 {
2402 return;
2403 }
2404
2405 if (first_len == 0)
2406 {
2407 return;
2408 }
2409
2410 for (k = 0; k < dsp_orig->dim; k++)
2411 {
2412 dsp_new->starts[(*this_seg) * dsp_new->dim + k]
2413 = dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2414 if (dsp_orig->strands != NULL)
2415 {
2416 dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2417 = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2418 if (dsp_orig->strands[orig_segment * dsp_orig->dim + k] == Seq_strand_minus
2419 && dsp_new->starts[(*this_seg) * dsp_new->dim + k] != -1)
2420 {
2421 dsp_new->starts[(*this_seg) * dsp_new->dim + k] += second_len;
2422 }
2423 }
2424 }
2425
2426 dsp_new->starts[(*this_seg) * dsp_new->dim + insert_row] += insert_len;
2427
2428 dsp_new->lens[*this_seg] = first_len;
2429 (*this_seg)++;
2430 }
2431
FillInMinusStrandInsertionSegmentC(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 first_len,Int4 second_len,Int4 orig_segment,Int4Ptr this_seg)2432 static void FillInMinusStrandInsertionSegmentC
2433 (DenseSegPtr dsp_orig,
2434 DenseSegPtr dsp_new,
2435 Int4 insert_start,
2436 Int4 insert_len,
2437 Int4 insert_row,
2438 Int4 first_len,
2439 Int4 second_len,
2440 Int4 orig_segment,
2441 Int4Ptr this_seg)
2442 {
2443 Int4 k;
2444
2445 if (dsp_orig == NULL || dsp_new == NULL
2446 || insert_start < 0 || insert_len < 0
2447 || first_len < 0
2448 || second_len < 0
2449 || orig_segment < 0 || orig_segment >= dsp_orig->numseg
2450 || this_seg == NULL
2451 || *this_seg < 0 || *this_seg >= dsp_new->numseg)
2452 {
2453 return;
2454 }
2455
2456 if (second_len == 0)
2457 {
2458 return;
2459 }
2460
2461 for (k = 0; k < dsp_orig->dim; k++)
2462 {
2463 dsp_new->starts[(*this_seg) * dsp_new->dim + k] =
2464 dsp_orig->starts[orig_segment * dsp_orig->dim + k];
2465
2466 if ((dsp_orig->strands == NULL
2467 || dsp_orig->strands[orig_segment * dsp_orig->dim + k] != Seq_strand_minus)
2468 && dsp_new->starts[(*this_seg) * dsp_new->dim + k] != -1)
2469 {
2470 dsp_new->starts[(*this_seg) * dsp_new->dim + k] += first_len;
2471 }
2472
2473 if (dsp_orig->strands != NULL)
2474 {
2475 dsp_new->strands[(*this_seg) * dsp_new->dim + k]
2476 = dsp_orig->strands[orig_segment * dsp_orig->dim + k];
2477 }
2478 }
2479
2480 dsp_new->lens[*this_seg] = second_len;
2481 (*this_seg)++;
2482 }
2483
2484 static void
InsertInSegment(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 insert_start,Int4 insert_len,Int4 insert_row,Int4 orig_segment,Int4Ptr this_segment)2485 InsertInSegment
2486 (DenseSegPtr dsp_orig,
2487 DenseSegPtr dsp_new,
2488 Int4 insert_start,
2489 Int4 insert_len,
2490 Int4 insert_row,
2491 Int4 orig_segment,
2492 Int4Ptr this_segment)
2493 {
2494 /* The original segment needs to be replaced by either one or two segments
2495 * in the new alignment.
2496 * Call segment O the segment that contains insert_start.
2497 * If insert_start == the start of segment O, only one additional segment
2498 * will be needed, otherwise allocate space for two extra segments.
2499 * If insert_row is a plus row:
2500 * If insert_start == the start of segment O,
2501 * the gap segment will be inserted immediately before
2502 * segment O. For insert_row, all starts
2503 * for segment O and beyond will be increased by insert_len.
2504 * Otherwise, segment O will be replaced by segment (A) (a truncated
2505 version of segment O), a gap segment (B) will be
2506 * inserted after A, and a third segment (C) will be inserted
2507 * after segment B.
2508 * Call first_len = insert_start - start of segment O on insert_row
2509 * Call second_len = length of segment O on insert_row - first_len
2510 * The length of the segment A will be first_len.
2511 * The start of segment A for all plus strand rows will be
2512 * the start of segment O.
2513 * The start of segment A for all minus strand rows will be
2514 * the start of segment O + second_len.
2515 * The start of segment B for insert_row will be insert_start,
2516 * The start of segment B for all other rows in the gap segment will be -1.
2517 * The length of segment B will be insert_len.
2518 * For insert_row, the start of segment C will be
2519 * the start of segment O + first_len + insert_len.
2520 * For all remaining plus strand rows, the start of segment C
2521 * will be the start of segment O + first_len.
2522 * For all minus strand rows, the start of segment C will be
2523 * be the start of segment O.
2524 * The length of segment C will be second_len.
2525 * If insert_row is a minus row:
2526 * If insert_start == the start of segment O,
2527 * the gap segment will be inserted immediately after
2528 * segment O and all of the starts for insert_row
2529 * before segment O will be increased by insert_len.
2530 * Otherwise, segment O will be replaced by segment A, a gap segment (B),
2531 * and segment C.
2532 * Call first_len = start of segment O on insert_row + length of segment O on insert_row - insert_start
2533 * Call second_len = insert_start - start of segment O on insert_row
2534 * The length of segment A will be first_len.
2535 * For every plus strand row, the start of segment A will be the start of
2536 * segment O.
2537 * For insert_row, the start of segment A will be the start of segment O + second_len + insert_len.
2538 * For every other minus strand row, the start of segment A will be the
2539 * the start of segment O + second_len.
2540 * The length of segment B will be insert_len.
2541 * For insert_row, the start of segment B will be insert_start.
2542 * For every other row, the start of segment B will be -1.
2543 * The length of segment C will be second_len.
2544 * For every minus row, the start of segment C will be the start of segment O.
2545 * For every plus row, the start of segment C will be the start of segment O + first_len.
2546 * For insert_row, the start of every segment prior to segment O will be increased
2547 * by insert_len.
2548 */
2549
2550 Int4 first_len, second_len;
2551
2552 if (dsp_orig->strands != NULL && dsp_orig->strands[insert_row] == Seq_strand_minus)
2553 {
2554 first_len = dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row]
2555 + dsp_orig->lens [orig_segment] - insert_start;
2556 second_len = insert_start - dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row];
2557 FillInMinusStrandInsertionSegmentA(dsp_orig, dsp_new, insert_start, insert_len,
2558 insert_row, first_len, second_len,
2559 orig_segment, this_segment);
2560 FillInInsertionSegmentB(dsp_orig, dsp_new, insert_start, insert_len,
2561 insert_row, first_len, second_len,
2562 orig_segment, this_segment);
2563 FillInMinusStrandInsertionSegmentC(dsp_orig, dsp_new, insert_start, insert_len,
2564 insert_row, first_len, second_len,
2565 orig_segment, this_segment);
2566 }
2567 else
2568 {
2569 first_len = insert_start - dsp_orig->starts [dsp_orig->dim * orig_segment + insert_row];
2570 second_len = dsp_orig->lens[orig_segment] - first_len;
2571 FillInPlusStrandInsertionSegmentA(dsp_orig, dsp_new, insert_start, insert_len,
2572 insert_row, first_len, second_len,
2573 orig_segment, this_segment);
2574 FillInInsertionSegmentB(dsp_orig, dsp_new, insert_start, insert_len,
2575 insert_row, first_len, second_len,
2576 orig_segment, this_segment);
2577 FillInPlusStrandInsertionSegmentC(dsp_orig, dsp_new, insert_start, insert_len,
2578 insert_row, first_len, second_len,
2579 orig_segment, this_segment);
2580 }
2581 }
2582
2583 static Int4
FindSegmentForInsertPoint(DenseSegPtr dsp,Int4 insert_start,Int4 insert_row,Uint1 insert_strand)2584 FindSegmentForInsertPoint
2585 (DenseSegPtr dsp,
2586 Int4 insert_start,
2587 Int4 insert_row,
2588 Uint1 insert_strand)
2589 {
2590 Int4 insert_segment = -1, k = 0;
2591
2592 if (dsp == NULL || insert_start < 0
2593 || insert_row < 0 || insert_row >= dsp->dim)
2594 {
2595 return -1;
2596 }
2597
2598 if (insert_strand == Seq_strand_minus)
2599 {
2600 while (k < dsp->numseg && insert_segment == -1)
2601 {
2602 if (dsp->starts [k * dsp->dim + insert_row] != -1
2603 && dsp->starts [k * dsp->dim + insert_row] <= insert_start
2604 && dsp->starts [k * dsp->dim + insert_row] + dsp->lens[k] > insert_start)
2605 {
2606 insert_segment = k;
2607 }
2608 k++;
2609 }
2610 }
2611 else
2612 {
2613 while (k < dsp->numseg && insert_segment == -1)
2614 {
2615 if (dsp->starts [k * dsp->dim + insert_row] != -1
2616 && dsp->starts [dsp->dim * k + insert_row] <= insert_start
2617 && dsp->starts [dsp->dim * k + insert_row] + dsp->lens [k] > insert_start)
2618 {
2619 insert_segment = k;
2620 }
2621 k++;
2622 }
2623 }
2624 return insert_segment;
2625 }
2626
2627 static void
CopyDensegSegments(DenseSegPtr dsp_orig,DenseSegPtr dsp_new,Int4 start_seg,Int4 copy_seg,Int4 num_to_copy)2628 CopyDensegSegments
2629 (DenseSegPtr dsp_orig,
2630 DenseSegPtr dsp_new,
2631 Int4 start_seg,
2632 Int4 copy_seg,
2633 Int4 num_to_copy)
2634 {
2635 Int4 num_copied = 0, k;
2636
2637 if (dsp_orig == NULL || dsp_new == NULL)
2638 {
2639 return;
2640 }
2641
2642 while (start_seg < dsp_orig->numseg && copy_seg < dsp_new->numseg
2643 && num_copied < num_to_copy)
2644 {
2645 if (start_seg >= 0 && copy_seg >= 0)
2646 {
2647 for (k = 0; k < dsp_orig->dim && k < dsp_new->dim; k++)
2648 {
2649 dsp_new->starts [copy_seg * dsp_new->dim + k]
2650 = dsp_orig->starts[start_seg * dsp_orig->dim + k];
2651 if (dsp_orig->strands != NULL && dsp_new->strands != NULL)
2652 {
2653 dsp_new->strands[copy_seg * dsp_new->dim + k]
2654 = dsp_orig->strands[start_seg * dsp_orig->dim + k];
2655 }
2656 }
2657 dsp_new->lens [copy_seg] = dsp_orig->lens[start_seg];
2658 num_copied++;
2659 }
2660 start_seg ++;
2661 copy_seg ++;
2662 }
2663 }
2664
2665
2666 /**************************************************
2667 ***
2668 ***************************************************/
SeqAlignInsertByLoc(SeqLocPtr slp,SeqAlignPtr salp)2669 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignInsertByLoc (SeqLocPtr slp, SeqAlignPtr salp)
2670 {
2671 SeqIdPtr sip;
2672 DenseSegPtr dsp, dsp_new;
2673 Int4 from, start;
2674 Int2 j;
2675 Int2 index;
2676 Int4 insert_len;
2677 Int4 extra_segs;
2678 Uint1 insert_strand;
2679 Int4 insert_seg;
2680 Int4 orig_segment;
2681
2682 if (salp == NULL || salp->segtype != SAS_DENSEG)
2683 return salp;
2684 sip = SeqLocId(slp);
2685 insert_len = SeqLocLen (slp);
2686 dsp = (DenseSegPtr) salp->segs;
2687 if (dsp == NULL) {
2688 return salp;
2689 }
2690
2691 index = SeqIdOrderInBioseqIdList (sip, dsp->ids);
2692 if (index == 0) {
2693 /* bioseq not in alignment */
2694 return salp;
2695 }
2696 index -= 1;
2697 insert_strand = SeqAlignStrand (salp, index);
2698
2699 if (insert_strand == Seq_strand_minus)
2700 {
2701 from = SeqAlignStop (salp, index);
2702 }
2703 else
2704 {
2705 from = SeqAlignStart(salp, index);
2706 }
2707 start = SeqLocStart (slp);
2708 if (start <= from)
2709 {
2710 /* just adjust the starts */
2711 for (j = 0; j < dsp->numseg; j++)
2712 {
2713 if (dsp->starts [dsp->dim * j + index] > -1)
2714 {
2715 dsp->starts [dsp->dim * j + index] += insert_len;
2716 }
2717 }
2718 }
2719 else
2720 {
2721 /* need to insert gap of length insert_len at start */
2722 /* first, find affected segment */
2723 insert_seg = FindSegmentForInsertPoint (dsp, start, index, insert_strand);
2724 if (insert_seg < 0 || insert_seg > dsp->numseg)
2725 {
2726 return salp;
2727 }
2728
2729 if (dsp->starts[dsp->dim * insert_seg + index] == start)
2730 {
2731 extra_segs = 1;
2732 }
2733 else
2734 {
2735 extra_segs = 2;
2736 }
2737
2738 dsp_new = (DenseSegPtr) MemNew (sizeof (DenseSeg));
2739 dsp_new->dim = dsp->dim;
2740 dsp_new->numseg = dsp->numseg + extra_segs;
2741 dsp_new->starts = (Int4Ptr) MemNew (dsp->dim * (dsp->numseg + extra_segs) * sizeof (Int4));
2742 if (dsp->strands != NULL)
2743 {
2744 dsp_new->strands = (Uint1Ptr) MemNew (dsp->dim * (dsp->numseg + extra_segs) * sizeof (Uint1));
2745 }
2746 dsp_new->lens = (Int4Ptr) MemNew ((dsp->numseg + extra_segs) * sizeof (Int4));
2747
2748 /* copy alignment up to point of insertion */
2749 CopyDensegSegments (dsp, dsp_new, 0, 0, insert_seg);
2750
2751 /* adjust starts in insert_row before insert_seg if insert_row on minus strand */
2752 if (insert_strand == Seq_strand_minus)
2753 {
2754 for (j = 0; j < insert_seg; j++)
2755 {
2756 if (dsp_new->starts[dsp_new->dim * j + index] != -1)
2757 {
2758 dsp_new->starts[dsp_new->dim * j + index] += insert_len;
2759 }
2760 }
2761 }
2762
2763 /* create gap */
2764 orig_segment = insert_seg;
2765 InsertInSegment (dsp, dsp_new, start, insert_len, index, orig_segment, &insert_seg);
2766
2767 /* Copy after insertion point */
2768 CopyDensegSegments (dsp, dsp_new, orig_segment + 1, insert_seg, dsp->numseg - orig_segment);
2769
2770 /* Adjust starts in insert row after insert_seg if insert_row on plus strand */
2771 if (insert_strand == Seq_strand_plus)
2772 {
2773 while (insert_seg < dsp_new->numseg)
2774 {
2775 if (dsp_new->starts[dsp_new->dim * insert_seg + index] != -1)
2776 {
2777 dsp_new->starts[dsp_new->dim * insert_seg + index] += insert_len;
2778 }
2779 insert_seg++;
2780 }
2781 }
2782
2783 /* replace in old DenseSeg */
2784 dsp->starts = MemFree (dsp->starts);
2785 dsp->starts = dsp_new->starts;
2786 dsp_new->starts = NULL;
2787 dsp->strands = MemFree (dsp->strands);
2788 dsp->strands = dsp_new->strands;
2789 dsp_new->strands = NULL;
2790 dsp->lens = MemFree (dsp->lens);
2791 dsp->lens = dsp_new->lens;
2792 dsp_new->lens = NULL;
2793 dsp->numseg = dsp_new->numseg;
2794
2795 }
2796
2797 return salp;
2798 }
2799
2800
2801 /*******************************************
2802 ***
2803 *** DeleteRegion
2804 *** !@!!!!!!!!!!!!!!!!!!!!!!! > CompSegPtr
2805 ********************************************/
DeleteRegion(SeqIntPtr sip,SeqAlignPtr salp)2806 NLM_EXTERN SeqAlignPtr LIBCALL DeleteRegion (SeqIntPtr sip, SeqAlignPtr salp)
2807 {
2808 CompSegPtr dsp;
2809 SeqAlignPtr salp1 = NULL;
2810 BoolPtr dspstart = NULL;
2811 Int4Ptr dsplens = NULL;
2812 Int4 delete_from;
2813 Int4 delete_to;
2814 Int2 numseg_before = 0;
2815 Int2 subseglens = 0;
2816 Int4 sumseglens = 0;
2817 Boolean seen = FALSE;
2818
2819 if ( sip == NULL ) return salp;
2820 delete_from = sip->from;
2821 delete_to = sip->to;
2822
2823 /*****************************************
2824 *** copy salp(s) until delete_from
2825 *****************************************/
2826 if ( (dsp = (CompSegPtr) salp->segs ) == NULL) {
2827 return NULL;
2828 }
2829 dspstart = dsp->starts;
2830 dsplens = dsp->lens;
2831 while ( !seen )
2832 {
2833 if ( !(seen = locate_in_seqalign (delete_from, dsp->dim, dsp->numseg, &dspstart, &dsplens, &numseg_before, &subseglens, &sumseglens)) )
2834 {
2835 salp1 = SeqAlignDupAdd (&salp1, salp, 0, 0, 0);
2836 if ( salp->next == NULL ) break;
2837 else {
2838 salp = salp->next;
2839 dsp = (CompSegPtr) salp->segs;
2840 dspstart = dsp->starts;
2841 dsplens = dsp->lens;
2842 }
2843 }
2844 }
2845 if ( !seen ) {
2846 return NULL;
2847 }
2848 salp1 = SeqAlignDupAdd (&salp1, salp, numseg_before, subseglens, TRUE);
2849 /*****************************************
2850 *** delete salp until delete_to
2851 *****************************************/
2852 seen = FALSE;
2853 while ( !seen )
2854 {
2855 if ( !(seen = locate_in_seqalign (delete_to, dsp->dim, dsp->numseg, &dspstart, &dsplens, &numseg_before, &subseglens, &sumseglens)) )
2856 {
2857 if ( salp->next == NULL ) break;
2858 else {
2859 salp = salp->next;
2860 dsp = (CompSegPtr) salp->segs;
2861 dspstart = dsp->starts;
2862 dsplens = dsp->lens;
2863 }
2864 }
2865 }
2866 if ( !seen ) {
2867 return NULL;
2868 }
2869 /*****************************************
2870 *** copy salp from delete_to to the end
2871 *****************************************/
2872 salp1 = SeqAlignDupAdd (&salp1, salp, numseg_before, subseglens, FALSE);
2873 if ( ( salp = salp->next) != NULL )
2874 while ( salp != NULL )
2875 {
2876 salp1 = SeqAlignDupAdd (&salp1, salp, 0, 0, 0);
2877 salp = salp->next;
2878 }
2879 return salp1;
2880 }
2881
2882 /*********************************************************
2883 ***
2884 *** DenseDiagPtr procedures
2885 ***
2886 **********************************************************/
2887
DenseDiagToDenseSegFunc(SeqAlignPtr salp,Boolean add_ends)2888 NLM_EXTERN SeqAlignPtr LIBCALL DenseDiagToDenseSegFunc (SeqAlignPtr salp, Boolean add_ends)
2889 {
2890 BioseqPtr bsp;
2891 SeqAlignPtr newsalp;
2892 DenseSegPtr dsp;
2893 DenseDiagPtr ddp;
2894 DenseDiagPtr nextddp;
2895 ValNodePtr head;
2896 ValNodePtr vnp;
2897 SeqIdPtr sip1, sip2;
2898 Int4Ptr ddp_starts,
2899 nextddp_starts,
2900 dspstart;
2901 Int4Ptr dsplen;
2902 Int4 minstart, laststart;
2903 Int4 ddp_st1, ddp_st2;
2904 Int4 nextddp_st1, nextddp_st2;
2905 Int4 ddlen;
2906 Int4 interlen1, interlen2;
2907 Int4 intermax, intermin;
2908 Int4 j;
2909 Int4 start1 = 0, start2 = 0,
2910 stop1 = 0, stop2 = 0;
2911 Int4 slpstart1 = 0, slpstart2 = 0,
2912 slpstop1 = 0, slpstop2 = 0;
2913 Uint1 strand1, strand2;
2914 Uint1Ptr strandp=NULL;
2915 Int2 numseg;
2916 Boolean delete_me;
2917
2918 newsalp = SeqAlignNew ();
2919 newsalp->type = 3;
2920 newsalp->segtype = 2;
2921 newsalp->dim = 2;
2922 dsp = DenseSegNew ();
2923 newsalp->segs = (Pointer) dsp;
2924 dsp->dim = 2;
2925
2926 ddp =(DenseDiagPtr)salp->segs;
2927 dsp->ids = SeqIdDupList (ddp->id);
2928
2929 numseg = 0;
2930 for (ddp =(DenseDiagPtr)salp->segs; ddp != NULL; ddp = ddp->next)
2931 numseg++;
2932 numseg = numseg *3 -2;
2933
2934 head = NULL;
2935 laststart = -10;
2936 for (j=0; j<numseg; j++) {
2937 minstart = INT4_MAX;
2938 nextddp = NULL;
2939 for (ddp = (DenseDiagPtr)salp->segs; ddp!=NULL; ddp=ddp->next) {
2940 if (laststart < *(ddp->starts) && minstart > *(ddp->starts)) {
2941 minstart = *(ddp->starts);
2942 nextddp = ddp;
2943 }
2944 }
2945 if (nextddp!=NULL) {
2946 ValNodeAddPointer(&head, 0, nextddp);
2947 laststart = *(nextddp->starts);
2948 }
2949 }
2950 if (head==NULL)
2951 return NULL;
2952 for (vnp=head; vnp!=NULL; vnp=vnp->next) {
2953 ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2954 ddp->next = NULL;
2955 }
2956 salp->segs = (Pointer) head->data.ptrvalue;
2957 head->data.ptrvalue = NULL;
2958 vnp = head->next;
2959 nextddp = (DenseDiagPtr) salp->segs;
2960 for (; vnp!=NULL; vnp=vnp->next) {
2961 ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2962 nextddp->next = ddp;
2963 nextddp = nextddp->next;
2964 vnp->data.ptrvalue = NULL;
2965 }
2966 head = ValNodeFree (head);
2967 ddp = (DenseDiagPtr) salp->segs;
2968 ddlen = ddp->len;
2969 ddp_starts = ddp->starts;
2970 ddp_st1 = *ddp_starts;
2971 ddp_starts++;
2972 ddp_st2 = *ddp_starts;
2973 ddp_starts++;
2974 while (ddp!=NULL) {
2975 delete_me=FALSE;
2976 if (ddp->next != NULL)
2977 {
2978 nextddp = ddp->next;
2979 nextddp_starts = nextddp->starts;
2980 nextddp_st1 = *nextddp_starts;
2981 nextddp_starts++;
2982 nextddp_st2 = *nextddp_starts;
2983 interlen1 = nextddp_st1 - ddp_st1 - ddlen;
2984 interlen2 = nextddp_st2 - ddp_st2 - ddlen;
2985 if (interlen1 < 0 || interlen2 < 0) {
2986 if (interlen1 < 0 && interlen2 < 0) {
2987 ddp->next = nextddp->next;
2988 nextddp->next = NULL;
2989 DenseDiagFree(nextddp);
2990 delete_me=TRUE;
2991 }
2992 else if (interlen1 < 0) {
2993 if (ABS(interlen1) >= ddlen) {
2994 ddp->next = nextddp->next;
2995 nextddp->next = NULL;
2996 DenseDiagFree(nextddp);
2997 delete_me=TRUE;
2998 }
2999 }
3000 else if (interlen2 < 0) {
3001 if (ABS(interlen2) >= ddlen) {
3002 ddp->next = nextddp->next;
3003 nextddp->next = NULL;
3004 DenseDiagFree(nextddp);
3005 delete_me=TRUE;
3006 }
3007 }
3008 }
3009 }
3010 if (!delete_me) {
3011 ddp = ddp->next;
3012 if (ddp != NULL)
3013 {
3014 ddlen = ddp->len;
3015 ddp_starts = ddp->starts;
3016 ddp_st1 = *ddp_starts;
3017 ddp_starts++;
3018 ddp_st2 = *ddp_starts;
3019 ddp_starts++;
3020 }
3021 }
3022 }
3023 dsp->starts = (Int4Ptr) MemNew ((size_t) ((2*numseg + 4) * sizeof (Int4)));
3024 dsp->lens = (Int4Ptr) MemNew ((size_t) ((numseg + 2) * sizeof (Int4)));
3025 for (j = 0; j < 2*numseg + 4; j++) dsp->starts [j] = -1;
3026 for (j = 0; j < numseg + 2; j++) dsp->lens [j] = 0;
3027 dspstart = dsp->starts;
3028 dsplen = dsp->lens;
3029
3030 ddp =(DenseDiagPtr)salp->segs;
3031 ddlen = ddp->len;
3032 ddp_starts = ddp->starts;
3033 ddp_st1 = *ddp_starts;
3034 ddp_starts++;
3035 ddp_st2 = *ddp_starts;
3036 ddp_starts++;
3037 numseg = 0;
3038 while (ddp != NULL)
3039 {
3040 numseg++;
3041 *dspstart = ddp_st1;
3042 dspstart++;
3043 *dspstart = ddp_st2;
3044 dspstart++;
3045 if (ddp->next != NULL)
3046 {
3047 nextddp = ddp->next;
3048 nextddp_starts = nextddp->starts;
3049 nextddp_st1 = *nextddp_starts;
3050 nextddp_starts++;
3051 nextddp_st2 = *nextddp_starts;
3052 interlen1 = nextddp_st1 - ddp_st1 - ddlen;
3053 interlen2 = nextddp_st2 - ddp_st2 - ddlen;
3054 if (interlen1 < 0 || interlen2 < 0) {
3055 if (interlen1 < 0 && interlen2 < 0) {
3056 return NULL;
3057 }
3058 if (interlen1 < 0) {
3059 if (ABS(interlen1) < ddlen) ddlen += interlen1;
3060 else {
3061 return NULL;
3062 }
3063 }
3064 if (interlen2 < 0) {
3065 if (ABS(interlen2) < ddlen) ddlen += interlen2;
3066 else {
3067 return NULL;
3068 }
3069 }
3070 interlen1 = nextddp_st1 - ddp_st1 - ddlen;
3071 interlen2 = nextddp_st2 - ddp_st2 - ddlen;
3072 }
3073 *dsplen = ddlen;
3074 dsplen++;
3075 if (interlen1 <= 0)
3076 {
3077 if (interlen2>0) {
3078 numseg++;
3079 *dspstart = -1;
3080 dspstart++;
3081 *dspstart = ddp_st2 + ddlen;
3082 dspstart++;
3083 *dsplen = interlen2;
3084 dsplen++;
3085 }
3086 }
3087 else if (interlen2 <= 0)
3088 {
3089 numseg++;
3090 *dspstart = ddp_st1 + ddlen;
3091 dspstart++;
3092 *dspstart = -1;
3093 dspstart++;
3094 *dsplen = interlen1;
3095 dsplen++;
3096 }
3097 else if (interlen1 == interlen2)
3098 {
3099 numseg++;
3100 *dspstart = ddp_st1 + ddlen;
3101 dspstart++;
3102 *dspstart = ddp_st2 + ddlen;
3103 dspstart++;
3104 *dsplen = interlen1;
3105 dsplen++;
3106 }
3107 else
3108 {
3109 if (interlen1 > interlen2) {
3110 intermax = interlen1;
3111 intermin = interlen2;
3112 }
3113 else {
3114 intermax = interlen2;
3115 intermin = interlen1;
3116 }
3117 numseg++;
3118 *dspstart = ddp_st1 + ddlen;
3119 dspstart++;
3120 *dspstart = ddp_st2 + ddlen;
3121 dspstart++;
3122 *dsplen = intermin;
3123 dsplen++;
3124 numseg++;
3125 if (interlen1 > interlen2) {
3126 *dspstart = ddp_st1 + ddlen + intermin;
3127 dspstart++;
3128 *dspstart = -1;
3129 }
3130 else {
3131 *dspstart = -1;
3132 dspstart++;
3133 *dspstart = ddp_st2 + ddlen + intermin;
3134 }
3135 dspstart++;
3136 *dsplen = intermax - intermin;
3137 dsplen++;
3138 }
3139 }
3140 else {
3141 *dsplen = ddlen;
3142 dsplen++;
3143 }
3144 ddp = ddp->next;
3145 if (ddp != NULL)
3146 {
3147 ddlen = ddp->len;
3148 ddp_starts = ddp->starts;
3149 ddp_st1 = *ddp_starts;
3150 ddp_starts++;
3151 ddp_st2 = *ddp_starts;
3152 ddp_starts++;
3153 }
3154 }
3155 dsp->numseg = numseg;
3156 if (add_ends && newsalp!=NULL)
3157 {
3158 strand1 = SeqAlignStrand (salp, 0);
3159 strand2 = SeqAlignStrand (salp, 1);
3160 start1 = SeqAlignStart (newsalp, 0);
3161 start2 = SeqAlignStart (newsalp, 1);
3162 sip1 = SeqAlignId (newsalp, 0);
3163 sip2 = SeqAlignId (newsalp, 1);
3164 slpstart1= 0;
3165 bsp = BioseqLockById(sip1);
3166 if (bsp!=NULL) {
3167 slpstop1 = bsp->length-1;
3168 BioseqUnlock (bsp);
3169 }
3170 else
3171 slpstop1 = stop1;
3172 slpstart2 = 0;
3173 bsp = BioseqLockById(sip2);
3174 if (bsp!=NULL) {
3175 slpstop2 = bsp->length-1;
3176 BioseqUnlock (bsp);
3177 }
3178 else
3179 slpstop2 = stop2;
3180 /**
3181 newsalp = SeqAlignEndExtend (newsalp, slpstart1, slpstart2, -1, -1, start1, start2, -1, -1, strand1, strand2);
3182 stop1 = SeqAlignStop (newsalp, 0);
3183 stop2 = SeqAlignStop (newsalp, 1);
3184 newsalp = SeqAlignEndExtend (newsalp, -1, -1, slpstop1, slpstop2, -1, -1, stop1, stop2, strand1, strand2);
3185 **/
3186 if (strand1!=strand2)
3187 {
3188 strandp =(Uint1Ptr)MemNew((size_t)((dsp->dim*dsp->numseg+4)*sizeof(Uint1)));
3189 dsp->strands = strandp;
3190 for (j = 0; j < 2*numseg; j+=2)
3191 dsp->strands [j] = strand1;
3192 for (j = 1; j < 2*numseg; j+=2)
3193 dsp->strands [j] = strand2;
3194 }
3195 }
3196 return newsalp;
3197 }
3198
Compact(SeqAlignPtr salp)3199 static SeqAlignPtr Compact (SeqAlignPtr salp)
3200 {
3201 DenseSegPtr dsp;
3202 Int4Ptr startp,
3203 stmp,
3204 lenp,
3205 ltmp;
3206 Int4 st1, st2, st3, st4;
3207 Int2 numseg,
3208 j, k, n;
3209
3210 dsp = (DenseSegPtr) salp->segs;
3211 if (dsp==NULL)
3212 return salp;
3213 lenp = (Int4Ptr) dsp->lens;
3214 startp = (Int4Ptr) dsp->starts;
3215 j=0;
3216 numseg=dsp->numseg;
3217 while(j<numseg-1) {
3218 st1=*startp;
3219 st2=*(startp+1);
3220 st3=*(startp+2);
3221 st4=*(startp+3);
3222 if (st1>-1 && st2>-1 && st3>-1 && st4>-1) {
3223 stmp=startp;
3224 stmp+=dsp->dim;
3225 for (k=j; k<dsp->numseg-2; k++) {
3226 for (n=0; n<dsp->dim; n++) {
3227 *stmp=*(stmp+dsp->dim);
3228 stmp++;
3229 }
3230 }
3231 ltmp=lenp;
3232 ltmp++;
3233 *lenp += *ltmp;
3234 for (k=j; k<dsp->numseg-2; k++, ltmp++) {
3235 *ltmp = *(ltmp+1);
3236 }
3237 numseg--;
3238 }
3239 j++;
3240 startp+=dsp->dim;
3241 lenp++;
3242 }
3243 dsp->numseg = numseg;
3244 return salp;
3245 }
3246
DenseDiagToDenseSeg(SeqAlignPtr salp,Boolean add_ends)3247 NLM_EXTERN SeqAlignPtr LIBCALL DenseDiagToDenseSeg (SeqAlignPtr salp, Boolean add_ends)
3248 {
3249 SeqAlignPtr cur = NULL,
3250 newsalp = NULL,
3251 new1 = NULL,
3252 new2 = NULL;
3253
3254 for (cur=salp; cur!= NULL; cur = cur->next) {
3255 newsalp = DenseDiagToDenseSegFunc(cur, add_ends);
3256 newsalp = Compact (newsalp);
3257 if (newsalp != NULL) {
3258 if (new1 == NULL) {
3259 new1 = newsalp;
3260 new2 = new1;
3261 } else {
3262 new2->next = newsalp;
3263 new2 = new2->next;
3264 }
3265 }
3266 }
3267 return new1;
3268 }
3269
DenseSegToDenseDiag(SeqAlignPtr salp)3270 NLM_EXTERN SeqAlignPtr LIBCALL DenseSegToDenseDiag (SeqAlignPtr salp)
3271 {
3272 SeqAlignPtr salptmp,
3273 salp2 = NULL,
3274 salp2tmp;
3275 DenseSegPtr dsp;
3276 DenseDiagPtr ddp,
3277 ddphead = NULL;
3278 SeqIdPtr sip;
3279 Int4Ptr startp,
3280 lenp;
3281 Int2 j;
3282
3283 if (salp!=NULL)
3284 {
3285 for (salptmp = salp; salptmp!=NULL; salptmp = salptmp->next)
3286 {
3287 if (salptmp->segtype == 2 && salptmp->dim == 2)
3288 {
3289 ddphead = NULL;
3290 dsp = (DenseSegPtr) salptmp->segs;
3291 if (dsp!=NULL)
3292 {
3293 sip = dsp->ids;
3294 lenp = (Int4Ptr) dsp->lens;
3295 startp = dsp->starts;
3296 for (j=0; j<dsp->numseg; j++, lenp++)
3297 {
3298 if (*startp > -1 && *(startp+1) > -1)
3299 {
3300 ddp = DenseDiagCreate (2, sip, startp, *lenp, NULL, NULL);
3301 DenseDiagLink (&ddphead, ddp);
3302 }
3303 startp += dsp->dim;
3304 }
3305 }
3306 if (ddphead != NULL)
3307 {
3308 salp2tmp = SeqAlignNew ();
3309 if (salp2tmp != NULL) {
3310 salp2tmp->type = 3;
3311 salp2tmp->segtype = 1;
3312 salp2tmp->dim = dsp->dim;
3313 salp2tmp->segs = (Pointer) ddphead;
3314 salp2 = SeqAlignLink (salp2, salp2tmp);
3315 }
3316 }
3317 }
3318 else if (salptmp->segtype == 1)
3319 {
3320 salp2tmp = SeqAlignDup (salptmp);
3321 salp2 = SeqAlignLink (salp2, salp2tmp);
3322 }
3323 }
3324 }
3325 return salp2;
3326 }
3327
DenseDiagCreate(Int4 dim,SeqIdPtr id,Int4Ptr starts,Int4 len,Uint1Ptr strands,ScorePtr scores)3328 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagCreate (Int4 dim, SeqIdPtr id, Int4Ptr starts, Int4 len, Uint1Ptr strands, ScorePtr scores)
3329
3330 {
3331 DenseDiagPtr ddp_copy;
3332 Int4 j;
3333
3334 ddp_copy = DenseDiagNew();
3335 ddp_copy->dim = dim;
3336 if (id != NULL) {
3337 ddp_copy->id = SeqIdDupList (id);
3338 }
3339 ddp_copy->starts = (Int4Ptr)MemNew((size_t)((dim+1)*sizeof(Int4)));
3340 for (j = 0; j < dim; j++, starts++) {
3341 ddp_copy->starts [j] = *starts;
3342 }
3343 ddp_copy->len = len;
3344 if ( strands != NULL )
3345 ddp_copy->strands = strands;
3346 if ( scores != NULL )
3347 ddp_copy->scores = scores;
3348 return ddp_copy;
3349 }
3350
DenseDiagLink(DenseDiagPtr * ddp_head,DenseDiagPtr ddp)3351 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagLink (DenseDiagPtr *ddp_head, DenseDiagPtr ddp)
3352 {
3353 DenseDiagPtr ddp_tmp;
3354
3355 if (ddp!=NULL)
3356 {
3357 if (*ddp_head != NULL)
3358 {
3359 ddp_tmp = *ddp_head;
3360 while (ddp_tmp->next != NULL)
3361 ddp_tmp = ddp_tmp->next;
3362 ddp_tmp->next = ddp;
3363 }
3364 else
3365 *ddp_head = ddp;
3366 }
3367 return *ddp_head;
3368 }
3369
DenseDiagInsert(DenseDiagPtr ddp_before,DenseDiagPtr ddp)3370 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagInsert (DenseDiagPtr ddp_before, DenseDiagPtr ddp)
3371
3372 {
3373 DenseDiagPtr ddp_tmp;
3374
3375 if ( (ddp_tmp = ddp_before->next) == NULL) {
3376 ddp_before->next = ddp;
3377 return ddp_before;
3378 }
3379 ddp_before->next = ddp;
3380 ddp->next = ddp_tmp;
3381 return ddp_before;
3382 }
3383
DenseDiagPrecede(DenseDiagPtr ddp_after,DenseDiagPtr * ddp)3384 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagPrecede (DenseDiagPtr ddp_after, DenseDiagPtr *ddp)
3385
3386 {
3387 DenseDiagPtr ddp_tmp;
3388
3389 ddp_tmp = *ddp;
3390 ddp_tmp->next = ddp_after;
3391 return *ddp;
3392 }
3393
DenseDiagLinkSort(DenseDiagPtr * ddp_head,DenseDiagPtr ddp)3394 NLM_EXTERN DenseDiagPtr LIBCALL DenseDiagLinkSort (DenseDiagPtr *ddp_head, DenseDiagPtr ddp)
3395
3396 {
3397 DenseDiagPtr ddp_tmp;
3398
3399 if ( (ddp_tmp = *ddp_head) != NULL ) {
3400 if ( *(ddp->starts) < *(ddp_tmp->starts) )
3401 *ddp_head = DenseDiagPrecede (ddp_tmp, &ddp);
3402
3403 else if ( *(ddp->starts) > *(ddp_tmp->starts) && ddp_tmp->next == NULL)
3404 ddp_tmp->next = ddp;
3405
3406 else {
3407 while ( ddp_tmp->next != NULL ) {
3408 if ( *(ddp->starts) < *(ddp_tmp->next->starts) ) break;
3409 ddp_tmp = ddp_tmp->next;
3410 }
3411 ddp_tmp = DenseDiagInsert (ddp_tmp, ddp);
3412 }
3413 }
3414 else *ddp_head = ddp;
3415 return *ddp_head;
3416 }
3417
SeqLocToFastaSeqAlign(ValNodePtr vnp)3418 NLM_EXTERN SeqAlignPtr LIBCALL SeqLocToFastaSeqAlign (ValNodePtr vnp)
3419 {
3420 SeqAlignPtr salp;
3421 ValNodePtr vnptmp;
3422 DenseSegPtr dsp;
3423 Int4Ptr lengthsort;
3424 Int4 maxlen,
3425 len, min, pre_min;
3426 Int4 j, j1;
3427 Int2 nseq,
3428 k, numseg;
3429
3430 nseq = 0;
3431 for (vnptmp=vnp; vnptmp!=NULL;vnptmp=vnptmp->next)
3432 if (vnptmp->data.ptrvalue != NULL) nseq++;
3433 if (nseq == 0)
3434 return NULL;
3435 salp = SeqAlignNew ();
3436 salp->type = 3;
3437 salp->segtype = 2;
3438 salp->dim = nseq;
3439 dsp = DenseSegNew ();
3440 salp->segs = (Pointer) dsp;
3441 dsp->dim = nseq;
3442 dsp->ids = SeqIdListfromSeqLoc (vnp);
3443
3444 /****************************
3445 ** count nb of segments
3446 ****************************/
3447 maxlen = MaxLengthSeqLoc (vnp);
3448 lengthsort = (Int4Ptr)MemNew((size_t) ((nseq+1)*sizeof(Int4)));
3449 pre_min = 0;
3450 numseg = 1;
3451 lengthsort [numseg] = 0;
3452 for ( j = 0; j < nseq; j++ )
3453 {
3454 vnptmp = vnp;
3455 min = maxlen;
3456 for ( k = 0; k < nseq; k++ )
3457 {
3458 len = SeqLocLen ((SeqLocPtr) vnptmp->data.ptrvalue);
3459 if ( len < min && len > pre_min ) min = len;
3460 vnptmp = vnptmp->next;
3461 if ( vnptmp == NULL ) break;
3462 }
3463 if ( min > pre_min )
3464 {
3465 lengthsort [numseg] = min;
3466 pre_min = min;
3467 numseg++;
3468 }
3469 }
3470 /****************************
3471 ** copy starts, lens
3472 ****************************/
3473 dsp->starts = (Int4Ptr) MemNew ((size_t) ((nseq*numseg + 4) * sizeof (Int4)));
3474 dsp->lens = (Int4Ptr) MemNew ((size_t) ((numseg + 2) * sizeof (Int4)));
3475 for (j = 0; j < nseq*numseg + 4; j++) dsp->starts[j] = -1;
3476 for (j = 0; j < numseg + 2; j++) dsp->lens[j] = 0;
3477 for ( k = 1; k < numseg; k++ )
3478 {
3479 dsp->lens [k-1] = lengthsort [k] - lengthsort [k-1];
3480 }
3481 numseg--;
3482 dsp->numseg = numseg;
3483 vnptmp = vnp;
3484 for ( j = 0; j < nseq; j++ )
3485 {
3486 if (SeqLocStrand((SeqLocPtr) vnptmp->data.ptrvalue)==Seq_strand_minus)
3487 {
3488 j1=nseq*numseg-nseq+j;
3489 dsp->starts[j1] = SeqLocStart ((SeqLocPtr) vnptmp->data.ptrvalue);
3490 j1-=nseq;
3491 for(k=numseg-1;k>-1;k--)
3492 {
3493 if (dsp->starts[j1+nseq]+ dsp->lens[k] < SeqLocLen((SeqLocPtr) vnptmp->data.ptrvalue))
3494 dsp->starts [j1]= dsp->starts[j1+nseq]+ dsp->lens[k];
3495 j1-=nseq;
3496 }
3497 }
3498 else{
3499 j1=j;
3500 dsp->starts[j1] = SeqLocStart ((SeqLocPtr) vnptmp->data.ptrvalue);
3501 j1+=nseq;
3502 for ( k = 1; k < numseg; k++ )
3503 {
3504 if (dsp->starts[j1-nseq]+ dsp->lens[(k-1)] < SeqLocLen((SeqLocPtr) vnptmp->data.ptrvalue))
3505 dsp->starts [j1]= dsp->starts[j1-nseq]+ dsp->lens[(k-1)];
3506 j1+=nseq;
3507 }
3508 }
3509 vnptmp = vnptmp->next;
3510 if ( vnptmp == NULL ) break;
3511 }
3512 dsp->strands= (Uint1Ptr) MemNew ((size_t) ((numseg*nseq+4)*sizeof (Uint1)));
3513 j1=0;
3514 for(k=0; k<numseg; k++)
3515 {
3516 vnptmp = vnp;
3517 for ( j = 0; j < nseq; j++ )
3518 {
3519 dsp->strands[j1] = SeqLocStrand((SeqLocPtr) vnptmp->data.ptrvalue);
3520 vnptmp = vnptmp->next;
3521 j1++;
3522 if ( vnptmp == NULL ) break;
3523 }
3524 }
3525 MemFree (lengthsort);
3526 return salp;
3527 }
3528
sap_replace(SeqAnnotPtr sap,SeqAlignPtr salp,Uint1 choice)3529 static Boolean LIBCALL sap_replace (SeqAnnotPtr sap, SeqAlignPtr salp, Uint1 choice)
3530 {
3531 if (sap != NULL) {
3532 for (; sap!= NULL; sap=sap->next) {
3533 if (sap->type == choice) {
3534 SeqAlignSetFree ((SeqAlignPtr)sap->data);
3535 sap->data = (Pointer)salp;
3536 return TRUE;
3537 }
3538 }
3539 }
3540 return FALSE;
3541 }
3542
ReplaceSeqAlignInSeqEntry(Uint2 entityID,Uint4 itemID,SeqAlignPtr salp)3543 NLM_EXTERN void LIBCALL ReplaceSeqAlignInSeqEntry (Uint2 entityID, Uint4 itemID, SeqAlignPtr salp)
3544 {
3545 SeqEntryPtr sep,
3546 sep1 = NULL;
3547 SeqEntryPtr sept = NULL;
3548 BioseqSetPtr bssp = NULL;
3549 BioseqPtr bsp = NULL;
3550 SeqAnnotPtr sap = NULL;
3551
3552 sep = GetBestTopParentForItemID (entityID, itemID, OBJ_BIOSEQ);
3553 if (sep != NULL) {
3554 if (IS_Bioseq(sep)) {
3555 entityID = ObjMgrGetEntityIDForChoice (sep);
3556 sep1 = GetTopSeqEntryForEntityID (entityID);
3557 bsp = (BioseqPtr) sep->data.ptrvalue;
3558 }
3559 else if(IS_Bioseq_set(sep)) {
3560 sep1 = sep;
3561 }
3562 if (sep1 != NULL) {
3563 bssp = NULL; bsp = NULL;
3564 if (IS_Bioseq(sep1)) {
3565 bsp = (BioseqPtr) sep1->data.ptrvalue;
3566 sap_replace(bsp->annot, salp, 2);
3567 }
3568 else if(IS_Bioseq_set(sep1)) {
3569 bssp = (BioseqSetPtr)sep1->data.ptrvalue;
3570 while (bssp!=NULL && bssp->_class == 7) {
3571 sept = bssp->seq_set;
3572 bssp = NULL; bsp = NULL;
3573 if (IS_Bioseq(sept)) {
3574 bsp = (BioseqPtr) sept->data.ptrvalue;
3575 break;
3576 }
3577 else if (IS_Bioseq_set(sept))
3578 bssp = (BioseqSetPtr) sept->data.ptrvalue;
3579 }
3580 if (bssp!=NULL) {
3581 sap = bssp->annot;
3582 if((sap==NULL || salp==NULL) && IS_Bioseq(sep)) {
3583 bsp = (BioseqPtr) sep->data.ptrvalue;
3584 sap_replace(bsp->annot, salp, 2);
3585 }
3586 else
3587 sap_replace(sap, salp, 2);
3588 if (sap==NULL && IS_Bioseq_set(sep)) {
3589 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3590 for (sept = bssp->seq_set; sept!=NULL; sept=sept->next) {
3591 if (IS_Bioseq(sept)) {
3592 bsp = (BioseqPtr) sept->data.ptrvalue;
3593 sap_replace(bsp->annot, salp, 2);
3594 }
3595 }
3596 }
3597 }
3598 else if (bsp!=NULL) {
3599 sap_replace(bsp->annot, salp, 2);
3600 }
3601 }
3602 }
3603 }
3604 return;
3605 }
3606
3607 /*********************************************************/
sap_empty(SeqAnnotPtr sap,Uint1 type,Pointer PNTR ptr)3608 static Pointer LIBCALL sap_empty (SeqAnnotPtr sap, Uint1 type, Pointer PNTR ptr)
3609 {
3610 SeqAlignPtr salp = NULL;
3611
3612 if (sap != NULL) {
3613 for (; sap!= NULL; sap=sap->next) {
3614 if (sap->type == type) {
3615 salp = (SeqAlignPtr) sap->data;
3616 *ptr = (Pointer) sap;
3617 break;
3618 }
3619 }
3620 }
3621 return salp;
3622 }
3623
3624 typedef struct ccid2 {
3625 Uint1 choice;
3626 SeqIdPtr sip;
3627 Pointer sap;
3628 } CcId2, PNTR CcId2Ptr;
3629
3630
FindSeqAlignCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)3631 static void FindSeqAlignCallback (SeqEntryPtr sep, Pointer mydata,
3632 Int4 index, Int2 indent)
3633 {
3634 BioseqPtr bsp;
3635 BioseqSetPtr bssp;
3636 SeqAlignPtr salp,
3637 salptmp,
3638 curr_sap;
3639 DenseSegPtr dsp;
3640 CcId2Ptr cip;
3641 Boolean found;
3642 Pointer this_sap;
3643
3644 cip = (CcId2Ptr)mydata;
3645 if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
3646 if (IS_Bioseq(sep)) {
3647 bsp = (BioseqPtr) sep->data.ptrvalue;
3648 if (bsp!=NULL) {
3649 salp=(SeqAlignPtr)sap_empty(bsp->annot, 2, &this_sap);
3650 if (salp!=NULL) {
3651 found=FALSE;
3652 salptmp=salp;
3653 if (cip->sip!=NULL) {
3654 while (!found && salptmp!=NULL)
3655 {
3656 switch (salptmp->segtype) {
3657 case 5: /* disc */
3658 curr_sap = (SeqAlignPtr)salptmp->segs;
3659 while (!found && curr_sap!=NULL) {
3660 dsp = (DenseSegPtr)curr_sap->segs;
3661 found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3662 curr_sap = curr_sap->next;
3663 }
3664 break;
3665 default:
3666 dsp = (DenseSegPtr)salptmp->segs;
3667 found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3668 }
3669 salptmp=salptmp->next;
3670 }
3671 }
3672 if (found || cip->sip==NULL) {
3673 if (cip->sap==NULL) {
3674 if (cip->choice==OBJ_SEQALIGN)
3675 cip->sap = (Pointer)salp;
3676 else if (cip->choice==OBJ_SEQANNOT)
3677 cip->sap = (Pointer) this_sap;
3678 }
3679 }
3680 }
3681 }
3682 }
3683 else if(IS_Bioseq_set(sep)) {
3684 bssp = (BioseqSetPtr)sep->data.ptrvalue;
3685 if (bssp!=NULL) {
3686 salp=(SeqAlignPtr)sap_empty(bssp->annot, 2, &this_sap);
3687 if (salp!=NULL) {
3688 found=FALSE;
3689 salptmp=salp;
3690 if (cip->sip!=NULL) {
3691 while (!found && salptmp!=NULL)
3692 {
3693 switch (salptmp->segtype) {
3694 case 5: /* disc */
3695 curr_sap = (SeqAlignPtr)salptmp->segs;
3696 while (!found && curr_sap!=NULL) {
3697 dsp = (DenseSegPtr)curr_sap->segs;
3698 found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3699 curr_sap = curr_sap->next;
3700 }
3701 break;
3702 default:
3703 dsp = (DenseSegPtr)salptmp->segs;
3704 found = (Boolean)(SeqIdOrderInBioseqIdList(cip->sip, dsp->ids)>0);
3705 }
3706 salptmp=salptmp->next;
3707 }
3708 }
3709 if (found || cip->sip==NULL) {
3710 if (cip->sap==NULL) {
3711 if (cip->choice==OBJ_SEQALIGN)
3712 cip->sap = (Pointer)salp;
3713 else if (cip->choice==OBJ_SEQANNOT)
3714 cip->sap = (Pointer) this_sap;
3715 }
3716 }
3717 }
3718 }
3719 }
3720 }
3721 }
3722
IsIdInAlignment(SeqIdPtr sip,SeqAlignPtr sap)3723 static Boolean IsIdInAlignment (SeqIdPtr sip, SeqAlignPtr sap)
3724 {
3725 SeqAlignPtr sap_tmp;
3726 Boolean found = FALSE;
3727 DenseSegPtr dsp;
3728
3729 if (sip == NULL || sap == NULL) return FALSE;
3730
3731 if (sap->segtype == SAS_DISC)
3732 {
3733 sap_tmp = (SeqAlignPtr) sap->segs;
3734 while (!found && sap_tmp != NULL)
3735 {
3736 found = IsIdInAlignment (sip, sap_tmp);
3737 sap_tmp = sap_tmp->next;
3738 }
3739 }
3740 else if (sap->segtype == SAS_DENSEG)
3741 {
3742 dsp = (DenseSegPtr) sap->segs;
3743 if (SeqIdOrderInBioseqIdList (sip, dsp->ids) > 0)
3744 {
3745 found = TRUE;
3746 }
3747 }
3748 return found;
3749 }
3750
FindSeqAlignVisitCallback(SeqAnnotPtr sap,Pointer userdata)3751 static void FindSeqAlignVisitCallback (SeqAnnotPtr sap, Pointer userdata)
3752 {
3753 CcId2Ptr cip;
3754 SeqAlignPtr salp;
3755
3756 if (sap == NULL || sap->type != 2 || (cip = (CcId2Ptr) userdata) == NULL || cip->sap != NULL) return;
3757
3758 for (salp = sap->data; salp != NULL && cip->sap == NULL; salp = salp->next)
3759 {
3760 if (cip->sip == NULL || IsIdInAlignment (cip->sip, salp))
3761 {
3762 if (cip->choice == OBJ_SEQALIGN)
3763 {
3764 cip->sap = (Pointer) salp;
3765 }
3766 else
3767 {
3768 cip->sap = sap;
3769 }
3770 }
3771 }
3772 }
3773
3774
FindSeqAlignInSeqEntry(SeqEntryPtr sep,Uint1 choice)3775 NLM_EXTERN Pointer LIBCALL FindSeqAlignInSeqEntry (SeqEntryPtr sep, Uint1 choice)
3776 {
3777 SeqEntryPtr sep_head;
3778 BioseqPtr bsp;
3779 Uint2 entityID;
3780 CcId2 ci;
3781
3782 if (sep==NULL)
3783 return NULL;
3784 ci.sap = NULL;
3785 ci.sip = NULL;
3786 if (choice != OBJ_SEQALIGN && choice != OBJ_SEQANNOT)
3787 return NULL;
3788 ci.choice = choice;
3789 if (IS_Bioseq(sep)) {
3790 bsp = (BioseqPtr) sep->data.ptrvalue;
3791 if (bsp!=NULL)
3792 ci.sip = SeqIdDup (bsp->id);
3793 }
3794 entityID = ObjMgrGetEntityIDForChoice (sep);
3795 sep_head = GetTopSeqEntryForEntityID (entityID);
3796 VisitAnnotsInSep (sep_head, (Pointer)&ci, FindSeqAlignVisitCallback);
3797 if (ci.sip != NULL)
3798 SeqIdFree (ci.sip);
3799 return ci.sap;
3800 }
3801
3802 /***********************************************
3803 *** ReadBuffer from spp+sap
3804 *** in : spp, sap, from + to in seq coordinates
3805 *** out: length of buffer + buffer
3806 ************************************************/
readbuff_fromseqalign(SeqPortPtr spp,SeqAlignPtr salp,Int2 index,CharPtr buffer,Int4 from,Int4 to,Int4 offset,Boolean strand)3807 NLM_EXTERN Int4 LIBCALL readbuff_fromseqalign (SeqPortPtr spp, SeqAlignPtr salp, Int2 index, CharPtr buffer, Int4 from, Int4 to, Int4 offset, Boolean strand)
3808 {
3809 BioseqPtr bsp;
3810 DenseSegPtr dsp;
3811 SeqIdPtr sip;
3812 Int4Ptr dspstart;
3813 Int4Ptr dsplens;
3814 Int4 sumlens, sumstart;
3815 Int4 bufflen, buffstart;
3816 Int4 seglenstobuffer = 0;
3817 Int4 j;
3818 Int4 maxlen = 0;
3819 Int2 numseg;
3820 Boolean seen = FALSE;
3821 Boolean nogap;
3822 Boolean ok = TRUE;
3823
3824 if (spp == NULL) {
3825 return 0;
3826 }
3827 if (buffer == NULL) {
3828 return 0;
3829 }
3830 /**********************************
3831 *** locate segment including 'from'
3832 ***********************************/
3833 if ( (dsp = (DenseSegPtr) salp->segs ) == NULL) {
3834 return 0;
3835 }
3836 if (strand == Seq_strand_minus) {
3837 sip = dsp->ids;
3838 for (j = 0; sip!=NULL && j < index; j++)
3839 sip=sip->next;
3840 if (sip!=NULL) {
3841 bsp = BioseqLockById (sip);
3842 if (bsp!=NULL) {
3843 maxlen = bsp->length;
3844 BioseqUnlock(bsp);
3845 }
3846 }
3847 if (maxlen==0)
3848 return 0;
3849 }
3850 dsplens = dsp->lens;
3851 dspstart = dsp->starts + index;
3852 seen = LocateInSeqAlignDenSeg (from, dsp->dim, dsp->numseg, &dspstart, &dsplens, &numseg, &seglenstobuffer);
3853 if (!seen) {
3854 ErrPostEx (SEV_ERROR, 0, 0, "fail in readbuff_fromseqalign_sap [4] %ld %ld %ld %ld %ld %ld",
3855 (long) from, (long) dsp->dim, (long) dsp->numseg, (long) *dspstart, (long) *dsplens, (long) seglenstobuffer);
3856 return 0;
3857 }
3858 /***********************************
3859 *** read segments until 'to'
3860 ***********************************/
3861 bufflen = MIN((Int4)(*dsplens-seglenstobuffer),(Int4)(to - from));
3862 if (strand == Seq_strand_minus) {
3863 buffstart = *dspstart + seglenstobuffer;
3864 if (buffstart>-1)
3865 buffstart = maxlen - buffstart - bufflen;
3866 } else {
3867 buffstart = *dspstart + seglenstobuffer;
3868 }
3869 nogap = (*dspstart >= 0);
3870 if (offset < 0 && !nogap)
3871 offset = 0;
3872 sumlens = bufflen;
3873 sumstart = 0;
3874 /**
3875 WriteLog ("/ %d %d %d %d %d %d %d\n", (int)seglenstobuffer,
3876 (int)buffstart, (int)bufflen, (int) *dsplens, (int)*dspstart, to, from);
3877 **/
3878 while ( ok )
3879 {
3880 if ( nogap )
3881 {
3882 if (strand == Seq_strand_minus) {
3883 offset = ReadBufferFromSep (spp, buffer, (Int4)buffstart,
3884 (Int4)(buffstart+bufflen), offset);
3885 } else {
3886 offset = ReadBufferFromSep (spp, buffer, (Int4)buffstart,
3887 (Int4)(buffstart+bufflen), offset);
3888 }
3889 }
3890 else
3891 {
3892 for (j = 0; j < bufflen; j++, offset++)
3893 buffer[offset] = '-';
3894 buffer[offset] = '\0';
3895 }
3896 ok = (numseg < dsp->numseg);
3897 if (!ok)
3898 {
3899 if ( salp->next == NULL ) break;
3900 }
3901 else {
3902 numseg++;
3903 dspstart += dsp->dim;
3904 dsplens++;
3905 }
3906 nogap = (*dspstart >= 0);
3907 bufflen = MIN ((Int4) (*dsplens), (Int4) (to - from));
3908 if ( nogap ) {
3909 if (strand == Seq_strand_minus) {
3910 buffstart = *dspstart + sumstart;
3911 buffstart = maxlen - buffstart - bufflen;
3912 } else {
3913 buffstart = *dspstart + sumstart;
3914 }
3915 }
3916 sumlens += bufflen;
3917 /**
3918 if (index != 0)
3919 WriteLog ("- %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld \n",
3920 (long)offset, (long)seglenstobuffer,
3921 (long)sumstart, (long)*dspstart, (long)buffstart,
3922 (long)*dsplens, (long)from, (long)to, (long)bufflen, (long)sumlens);
3923 **/
3924 }
3925 return ((Int4)offset);
3926 }
3927
aaSeqAlign_to_dnaSeqAlign(SeqAlignPtr salp,ValNodePtr vnp,ValNodePtr framep)3928 NLM_EXTERN SeqAlignPtr LIBCALL aaSeqAlign_to_dnaSeqAlign (SeqAlignPtr salp, ValNodePtr vnp, ValNodePtr framep)
3929 {
3930 SeqAlignPtr tmp;
3931 DenseSegPtr dsp;
3932 ValNodePtr vnptmp,
3933 fvnp=NULL;
3934 SeqIdPtr sip,
3935 siptmp;
3936 Int4Ptr startp;
3937 Int4Ptr lenp;
3938 Int4 from, sumlens;
3939 Int2 j, k;
3940 Uint1 frame;
3941
3942 if (salp == NULL)
3943 return NULL;
3944 for (tmp=salp; tmp!=NULL; tmp=tmp->next) {
3945 if (tmp->segtype == 2) {
3946 dsp = (DenseSegPtr) tmp->segs;
3947 if (dsp != NULL) {
3948
3949 if (vnp==NULL) {
3950 for (j=0, sip = dsp->ids; j<dsp->dim && sip!=NULL; j++, sip=sip->next) {
3951 siptmp = MakeNewProteinSeqId (NULL, sip);
3952 ValNodeAddPointer (&vnp, 0, siptmp);
3953 }
3954 }
3955 for (j=0, vnptmp = vnp; vnptmp != NULL; vnptmp = vnptmp->next) {
3956 if (vnptmp->data.ptrvalue != NULL)
3957 j++;
3958 else
3959 break;
3960 }
3961 if (j == dsp->dim) {
3962
3963 dsp->ids = SeqIdFree (dsp->ids);
3964 dsp->ids = ValNodeSeqIdListDup (vnp);
3965 vnp = ValNodeFreeType (&vnp, TypeSeqId);
3966
3967 lenp = dsp->lens;
3968 for (j = 0; j < dsp->numseg; j++, lenp++) {
3969 *lenp = *lenp * 3;
3970 }
3971 frame=0;
3972 if (framep!=NULL) {
3973 fvnp = framep;
3974 }
3975 for (k = 0; k < dsp->dim; k++)
3976 {
3977 lenp = dsp->lens;
3978 startp = dsp->starts;
3979 startp += k;
3980 while (*startp < 0) startp += dsp->dim;
3981 from = *startp;
3982 if (fvnp!=NULL)
3983 frame=(Uint1) fvnp->data.intvalue;
3984 if (frame == 2)
3985 from +=1;
3986 else if (frame == 3)
3987 from+=2;
3988 startp = dsp->starts;
3989 startp += k;
3990 sumlens = 0;
3991 for (j = 0; j < dsp->numseg; j++) {
3992 if (*startp > -1) {
3993 *startp = from + sumlens;
3994 sumlens += *lenp;
3995 }
3996 startp += dsp->dim;
3997 lenp++;
3998 }
3999 if (framep!=NULL)
4000 fvnp=fvnp->next;
4001 }
4002 }
4003 }
4004 }
4005 }
4006 return salp;
4007 }
4008
4009
aaSeqAnnot_to_dnaSeqAnnotFunc(SeqAnnotPtr PNTR sapnahead,SeqAlignPtr salpnew,ValNodePtr vnp,ValNodePtr framep)4010 NLM_EXTERN SeqAnnotPtr aaSeqAnnot_to_dnaSeqAnnotFunc (SeqAnnotPtr PNTR sapnahead, SeqAlignPtr salpnew, ValNodePtr vnp, ValNodePtr framep)
4011 {
4012 SeqAnnotPtr sap1, sap2, sapna, saptmp;
4013 SeqAlignPtr salpna;
4014
4015 sap1 = SeqAnnotForSeqAlign (salpnew);
4016 sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap1, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
4017 salpna = aaSeqAlign_to_dnaSeqAlign ((SeqAlignPtr)(sap2->data), vnp, framep);
4018 sap1->data = NULL;
4019 sap2->data = NULL;
4020 SeqAnnotFree (sap1);
4021 SeqAnnotFree (sap2);
4022 if (salpna != NULL) {
4023 sapna = SeqAnnotForSeqAlign (salpna);
4024 if (*sapnahead == NULL)
4025 *sapnahead = sapna;
4026 else {
4027 for (saptmp=*sapnahead; saptmp->next!=NULL; saptmp=saptmp->next)
4028 continue;
4029 saptmp->next = sapna;
4030 }
4031 }
4032 return *sapnahead;
4033 }
4034
SortSeqAlign(SeqAlignPtr PNTR salp)4035 NLM_EXTERN SeqAlignPtr LIBCALL SortSeqAlign (SeqAlignPtr PNTR salp)
4036 {
4037 SeqAlignPtr salp1, salptmp,
4038 newsalp = NULL,
4039 presalp = NULL,
4040 minsalp = NULL,
4041 preminsalp = NULL;
4042 DenseSegPtr dsp,
4043 dsptmp;
4044 SeqIdPtr sip, siptmp;
4045 Int4Ptr start;
4046 Int4 minstart;
4047 BioseqPtr bsp;
4048
4049 salp1=*salp;
4050 while (salp1!=NULL)
4051 {
4052 minstart = INT4_MAX;
4053 presalp=NULL;
4054 minsalp = preminsalp = NULL;
4055 salptmp=salp1;
4056 while (salptmp!=NULL) {
4057 dsp=(DenseSegPtr) salptmp->segs;
4058 if (dsp!=NULL) {
4059 start = dsp->starts;
4060 if (*start < minstart) {
4061 minstart = *start;
4062 minsalp = salptmp;
4063 preminsalp = presalp;
4064 }
4065 }
4066 presalp=salptmp;
4067 salptmp=salptmp->next;
4068 }
4069 if (minsalp != NULL) {
4070 dsp=(DenseSegPtr) minsalp->segs;
4071 sip=dsp->ids->next;
4072 bsp=BioseqLockById(sip);
4073 if (bsp==NULL) {
4074 minsalp=NULL;
4075 }
4076 else BioseqUnlock (bsp);
4077 }
4078 if (minsalp != NULL && newsalp != NULL) {
4079 dsp=(DenseSegPtr) minsalp->segs;
4080 sip=dsp->ids->next;
4081 salptmp=newsalp;
4082 while (salptmp!=NULL) {
4083 dsptmp=(DenseSegPtr) salptmp->segs;
4084 siptmp=dsptmp->ids->next;
4085 if (SeqIdForSameBioseq(sip, siptmp)) {
4086 break;
4087 }
4088 salptmp=salptmp->next;
4089 }
4090 if (salptmp!=NULL)
4091 minsalp=NULL;
4092 }
4093 if (minsalp != NULL) {
4094 if (preminsalp==NULL) {
4095 salp1 = salp1->next;
4096 minsalp->next = NULL;
4097 }
4098 else {
4099 preminsalp->next = minsalp->next;
4100 minsalp->next = NULL;
4101 }
4102 newsalp = SeqAlignLink (newsalp, minsalp);
4103 }
4104 else break;
4105 }
4106 *salp = newsalp;
4107 return newsalp;
4108 }
4109
SortSeqAlignFromList(SeqAlignPtr salp,Int2Ptr sortlst)4110 NLM_EXTERN SeqAlignPtr LIBCALL SortSeqAlignFromList (SeqAlignPtr salp, Int2Ptr sortlst)
4111 {
4112 SeqAlignPtr newsalp;
4113 CompSegPtr csp,
4114 newcsp;
4115 SeqIdPtr sip;
4116 Int4Ptr lenp;
4117 Uint1Ptr stp, st2p;
4118 Int2 j, k, m;
4119
4120 csp = (CompSegPtr) salp->segs;
4121 newsalp = SeqAlignNew ();
4122 if ( newsalp == NULL ) return NULL;
4123 newsalp->type = salp->type;
4124 newsalp->segtype = salp->segtype;
4125 newsalp->dim = salp->dim;
4126 newcsp = (CompSegPtr) MemNew ( sizeof (CompSeg) );
4127 newsalp->segs = newcsp;
4128 newcsp->dim = csp->dim;
4129 newcsp->numseg = csp->numseg;
4130 newcsp->ids = NULL;
4131 for (j=0; j < csp->dim; j++)
4132 {
4133 for (k=0, sip = csp->ids; k < csp->dim && sip!=NULL; k++) {
4134 if (sortlst[k] == (j+1)) {
4135 newcsp->ids = AddSeqId (&(newcsp->ids), sip);
4136 break;
4137 }
4138 sip = sip->next;
4139 }
4140 }
4141 newcsp->from = (Int4Ptr) MemNew ((size_t) ((csp->dim + 2) * sizeof (Int4)));
4142 for (j=0; j < csp->dim; j++)
4143 {
4144 for (k=0, lenp = csp->from; k < csp->dim; k++, lenp++)
4145 if (sortlst[k] == (j+1)) {
4146 newcsp->from [j] = *lenp;
4147 break;
4148 }
4149 }
4150 newcsp->starts =(BoolPtr)MemNew((size_t)((csp->dim*csp->numseg+ 4) * sizeof (Boolean)));
4151 for (j=0; j < csp->dim; j++)
4152 {
4153 for (k=0, stp = csp->starts; k < csp->dim; k++, stp ++)
4154 if (sortlst[k] == (j+1)) {
4155 for (m=0, st2p = stp; m < csp->numseg; m++, st2p+=csp->dim) {
4156 newcsp->starts [m*csp->dim + j] = *st2p;
4157 }
4158 break;
4159 }
4160 }
4161 newcsp->lens=(Int4Ptr) MemNew((size_t)((csp->numseg + 2) * sizeof (Int4)));
4162 for (j = 0; j < csp->numseg + 2; j++)
4163 newcsp->lens[j] = csp->lens[j];
4164
4165 return newsalp;
4166 }
4167
4168
multseqalign_from_pairseqalign(SeqAlignPtr salp)4169 NLM_EXTERN SeqAnnotPtr multseqalign_from_pairseqalign (SeqAlignPtr salp)
4170 {
4171 SeqAlignPtr salptmp,
4172 tmp;
4173 DenseSegPtr dsp = NULL;
4174 SeqPortPtr spp;
4175 SeqLocPtr slp;
4176 SeqIdPtr sip,
4177 siphead, siptmp,
4178 sipcur;
4179 BioseqPtr bsp;
4180 CharPtr bufferin[500];
4181 CharPtr bufferout[500];
4182 CharPtr buffertmp[2];
4183 Int4Ptr starttmp;
4184 Int4Ptr lenp;
4185 Uint1Ptr strandp;
4186
4187 Int4 algfrom, algto, alglens;
4188 Int4 seqfrom, seqto;
4189 Int4 cur;
4190 Int4 dsp_len, bsp_len, sum_lens;
4191 Int4 max_lens = 0;
4192 Int4 seqoffset;
4193 Int4 gapoffset;
4194 Int4 gaptotal;
4195 Int4 step = SALSALENLIMIT;
4196 Int2 curnseq;
4197 Int2 j;
4198 Int4 k, k1, k2, k3;
4199 Char c1, c2;
4200
4201 ValNodePtr vnp;
4202 ValNodePtr vnpfrom;
4203 ValNodePtr vnpstrand;
4204 SeqAnnotPtr sap;
4205
4206 Int4 letter;
4207 Uint1 strandtmp = Seq_strand_unknown;
4208 Boolean strand_nonull;
4209 Boolean ok;
4210 CharPtr str;
4211
4212 if (salp==NULL)
4213 return NULL;
4214
4215 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next) {
4216 if (salptmp->type > 0 && salptmp->segtype==2) {
4217 dsp = (DenseSegPtr) salptmp->segs;
4218 break;
4219 }
4220 }
4221 if (dsp==NULL)
4222 return NULL;
4223 siptmp = SeqIdDup (dsp->ids);
4224 bsp = BioseqLockById (siptmp);
4225 SeqIdFree (siptmp);
4226 if (bsp == NULL) {
4227 return NULL;
4228 }
4229 bsp_len = bsp->length;
4230 sip = SeqIdFindBest(bsp->id, 0);
4231 BioseqUnlock (bsp);
4232
4233 /*----- check if not all sequences start with gaps ----*/
4234 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next) {
4235 if (salptmp->type > 0 && salptmp->segtype==2) {
4236 dsp = (DenseSegPtr) salptmp->segs;
4237 for (j=0, starttmp=dsp->starts; j<dsp->dim; j++, starttmp++)
4238 if ( *starttmp > -1)
4239 break;
4240 ok = (Boolean) (j < dsp->dim);
4241 if (!ok)
4242 break;
4243 }
4244 }
4245 if (!ok)
4246 return NULL;
4247 /*---- find longest seqalign --------*/
4248 sum_lens = 0;
4249 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next) {
4250 if (salptmp->type > 0 && salptmp->segtype==2) {
4251 dsp_len = SeqAlignStart(salptmp, 0) + SeqAlignLength (salptmp);
4252 if (dsp_len > sum_lens)
4253 sum_lens = dsp_len;
4254 }
4255 }
4256 if (sum_lens > SALSALENLIMIT) {
4257 ErrPostEx (SEV_ERROR, 0, 0, "Too long alignment.\n Wait for next Sequin version");
4258 return NULL;
4259 }
4260 step = 2*sum_lens;
4261 step = MIN ((Int4)step, (Int4)SALSALENLIMIT);
4262 for (j=0; j<2; j++)
4263 {
4264 str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4265 buffertmp[j] = str;
4266 for (k=0; k<step; k++) buffertmp[j][k] = '-';
4267 str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4268 bufferout[j] = str;
4269 for (k=0; k<step; k++) bufferout[j][k] = '-';
4270 str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4271 bufferin[j] = str;
4272 for (k=0; k<step; k++) bufferin[j][k] = '-';
4273 }
4274 siphead = SeqIdDup (sip);
4275 sipcur = siphead;
4276 vnpstrand = NULL;
4277 vnpfrom = NULL;
4278 ValNodeAddInt (&vnpfrom, 1, (Int4) 0);
4279
4280 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
4281 if (salptmp->type > 0 && salptmp->segtype==2)
4282 break;
4283
4284 /**************** if 1rst sequence start with gaps in one alignment ***/
4285 gapoffset = gaptotal = 0;
4286 for (tmp=salptmp, j=0; tmp!=NULL; tmp=tmp->next, j++) {
4287 if (tmp->type > 0 && tmp->segtype==2) {
4288 dsp = (DenseSegPtr) tmp->segs;
4289 if (*(dsp->starts) < 0) {
4290 if (j==0)
4291 gapoffset = *(dsp->lens);
4292 if (*(dsp->lens) > gaptotal) {
4293 gaptotal = *(dsp->lens);
4294 }
4295 }
4296 }
4297 }
4298 if (gaptotal > 0) {
4299 if (gaptotal <= gapoffset)
4300 gapoffset = gaptotal - gapoffset;
4301 }
4302 algfrom = 0;
4303 algto = 1;
4304 for (cur = algfrom; cur < algto; cur += 2*step)
4305 {
4306 curnseq = 2;
4307 dsp = (DenseSegPtr) salptmp->segs;
4308 seqoffset = *(dsp->starts);
4309 strand_nonull = (Boolean) (dsp->strands != NULL);
4310 if (strand_nonull)
4311 strandp = dsp->strands;
4312 else
4313 strandtmp = Seq_strand_unknown;
4314 for (sip = dsp->ids, j=0; sip!=NULL; sip=sip->next, j++)
4315 {
4316 starttmp = dsp->starts;
4317 if (j==0) {
4318 seqfrom = 0;
4319 } else {
4320 starttmp += j;
4321 seqfrom = *starttmp;
4322 }
4323 lenp = dsp->lens;
4324 dsp_len = 0;
4325 sum_lens = 0;
4326 for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim) {
4327 if ( *starttmp >= 0 ) dsp_len += *lenp;
4328 sum_lens += *lenp;
4329 }
4330 siptmp = SeqIdDup (sip);
4331 bsp = BioseqLockById (siptmp);
4332 SeqIdFree (siptmp);
4333 if (bsp == NULL) {
4334 return NULL;
4335 }
4336 seqto = MIN ((Int4) (seqfrom + step), (Int4) (bsp->length -1));
4337 if ( j != 0 ) {
4338 siptmp = SeqIdDup(SeqIdFindBest(bsp->id, 0));
4339 sipcur->next = siptmp;
4340 sipcur = siptmp;
4341 starttmp = dsp->starts;
4342 starttmp += j;
4343 lenp = dsp->lens;
4344 for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim)
4345 if ( *starttmp >= 0 ) break;
4346 if (strand_nonull && *strandp==Seq_strand_minus)
4347 ValNodeAddInt (&vnpfrom, 1, (Int4) (*starttmp+*lenp));
4348 else
4349 ValNodeAddInt (&vnpfrom, 1, (Int4) *starttmp);
4350 }
4351 BioseqUnlock (bsp);
4352 if (strand_nonull)
4353 strandtmp = *strandp;
4354 slp = SeqLocIntNew (0, seqto, strandtmp, sip);
4355 if ( slp == NULL) {
4356 return NULL;
4357 }
4358 spp = SeqPortNewByLoc (slp, Seq_code_iupacna);
4359 SeqLocFree (slp);
4360 /**
4361 SeqIdWrite (sip, strLog, PRINTID_FASTA_LONG, 120);
4362 if (spp!=NULL) WriteLog("mergesalp2 %s\n", strLog);
4363 else WriteLog("PRINTFspp NULL\n");
4364 WriteLog("1> %ld %ld %ld %ld > %ld \n", dsp_len, sum_lens, seqto, seqoffset, seqfrom);
4365 **/
4366 if (seqoffset < 0) seqoffset = 0; /*!!!!!!!!!!!!!!!!!!!!*/
4367
4368 if ( j == 0 && seqoffset > seqfrom) {
4369 seqfrom = ReadBufferFromSep (spp, bufferin[j], (Int4)seqfrom,
4370 (Int4)(seqfrom + seqoffset), 0);
4371 }
4372 /**
4373 WriteLog ("3> %d %d %ld %ld \n", j, seqfrom, sum_lens, seqfrom + sum_lens);
4374 **/
4375 alglens = readbuff_fromseqalign (spp, salptmp, j, bufferin[j], seqfrom, seqfrom + sum_lens, seqoffset+gapoffset, strandtmp);
4376 if (alglens == 0) {
4377 return NULL;
4378 }
4379 /**
4380 WriteLog("4> %d %ld %ld %ld %ld < %ld %d\n", j, alglens, sum_lens, seqfrom+sum_lens, seqfrom + dsp_len, bsp_len, strandtmp);
4381 **/
4382 if ( j == 0 && (seqfrom + dsp_len) < bsp_len) {
4383 alglens = ReadBufferFromSep (spp, bufferin[j],
4384 (Int4)(seqfrom + dsp_len), (Int4)bsp_len, alglens);
4385 bufferin[j][alglens] = '-';
4386 }
4387 else {
4388 for (k = alglens; k < step; k++)
4389 if (bufferin[j][k] != '-') bufferin[j][k] = '-';
4390 }
4391 SeqPortFree(spp);
4392 ValNodeAddInt (&vnpstrand, 1, (Int4)(strandtmp));
4393 if (strand_nonull)
4394 strandp++;
4395 }
4396 for (salptmp = salptmp->next; salptmp != NULL; salptmp = salptmp->next)
4397 {
4398 if (salptmp->type > 0 && salptmp->segtype==2)
4399 {
4400 /**
4401 WriteLog ("\n\nCURRENT ALIGN %d\n", curnseq);
4402 **/
4403 dsp = (DenseSegPtr) salptmp->segs;
4404 seqoffset = *(dsp->starts);
4405 if (seqoffset < 0) {
4406 gapoffset = gaptotal - *(dsp->lens);
4407 }
4408 else
4409 gapoffset = 0;
4410 strand_nonull = (Boolean) (dsp->strands != NULL);
4411 if (strand_nonull)
4412 strandp = dsp->strands;
4413 else
4414 strandtmp = Seq_strand_unknown;
4415 for (sip = dsp->ids, j=0; sip!=NULL; sip=sip->next, j++)
4416 {
4417 dsp = (DenseSegPtr) salptmp->segs;
4418 for (k=0; k<step; k++)
4419 buffertmp[j][k] = '-';
4420 starttmp = dsp->starts;
4421 starttmp += j;
4422 if (j == 0) seqfrom = 0;
4423 else seqfrom = *starttmp;
4424 lenp = dsp->lens;
4425 dsp_len = 0;
4426 sum_lens = 0;
4427 for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim) {
4428 if ( *starttmp >= 0 )
4429 dsp_len += *lenp;
4430 sum_lens += *lenp;
4431 }
4432 siptmp = SeqIdDup (sip);
4433 bsp = BioseqLockById (siptmp);
4434 SeqIdFree (siptmp);
4435 if (bsp == NULL){
4436 return NULL;
4437 }
4438 seqto = MIN ((Int4) (seqfrom + step), (Int4) (bsp->length -1));
4439 if ( j != 0 ) {
4440 siptmp = SeqIdDup(SeqIdFindBest(bsp->id, 0));
4441 sipcur->next = siptmp;
4442 sipcur = siptmp;
4443 starttmp = dsp->starts;
4444 starttmp += j;
4445 lenp = dsp->lens;
4446 for(k=0; k < dsp->numseg; k++, lenp++, starttmp += dsp->dim)
4447 if ( *starttmp >= 0 ) break;
4448 if (strand_nonull && *strandp==Seq_strand_minus)
4449 ValNodeAddInt (&vnpfrom, 1, (Int4) (*starttmp+*lenp));
4450 else
4451 ValNodeAddInt (&vnpfrom, 1, (Int4) *starttmp);
4452 }
4453 BioseqUnlock (bsp);
4454 if (strand_nonull)
4455 strandtmp = *strandp;
4456 slp = SeqLocIntNew (0, seqto, strandtmp, sip);
4457 if ( slp == NULL) {
4458 return NULL;
4459 }
4460 spp = SeqPortNewByLoc (slp, Seq_code_iupacna);
4461 SeqLocFree (slp);
4462 /**
4463 SeqIdWrite (sip, strLog, PRINTID_FASTA_LONG, 120);
4464 if (spp!=NULL) WriteLog ("mergesalp2 %s\n", strLog);
4465 else WriteLog ("spp NULL\n");
4466 WriteLog ("1> %ld %ld from %ld to %ld offset %ld \n", dsp_len, sum_lens, seqfrom, seqto, seqoffset);
4467 **/
4468 if (seqoffset < 0) seqoffset = 0; /*!!!!!!!!!!!!!!!!!!!!*/
4469 if ( j == 0 && seqoffset > seqfrom) {
4470 seqfrom = ReadBufferFromSep (spp, buffertmp[j], (Int4)seqfrom,
4471 (Int4)(seqfrom + seqoffset), 0);
4472 }
4473 /**
4474 if ( j == 0)
4475 WriteLog ("2> %d %ld %ld \n", seqfrom, sum_lens, seqfrom + sum_lens);
4476 else
4477 WriteLog ("2> %d %ld %ld \n", seqfrom, sum_lens, seqfrom + sum_lens);
4478 **/
4479 alglens = readbuff_fromseqalign (spp, salptmp, j, buffertmp[j], seqfrom, seqfrom + sum_lens, (seqoffset + gapoffset), strandtmp);
4480 if (alglens == 0) {
4481 return NULL;
4482 }
4483 /**
4484 WriteLog ("3> %d %c%c%c \n", j, buffertmp[j][0], buffertmp[j][1], buffertmp[j][2]);
4485 WriteLog ("4> %ld %ld %ld %ld < %ld\n", alglens, sum_lens, sum_lens + seqfrom, seqfrom + dsp_len, bsp_len);
4486 **/
4487 if ( j == 0 && (seqfrom + dsp_len) < bsp_len) {
4488 alglens = ReadBufferFromSep (spp, buffertmp[j], (Int4)(seqfrom + dsp_len), (Int4)bsp_len, alglens);
4489 }
4490 else {
4491 for (k = alglens; k < step; k++)
4492 if (buffertmp[j][k] != '-') buffertmp[j][k] = '-';
4493 }
4494 SeqPortFree(spp);
4495 if (j>0)
4496 ValNodeAddInt (&vnpstrand, 1, (Int4)strandtmp);
4497 if (strand_nonull)
4498 strandp++;
4499 }
4500 str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4501 bufferout [curnseq] = str;
4502 for (j=0; j < curnseq + 1; j++) {
4503 for (k=0; k<step; k++)
4504 bufferout [j][k] = '-';
4505 }
4506 k1=k2=k3=letter=0;
4507 while ( k1 < step && k2 < step && k3 < step)
4508 {
4509 c1 = bufferin[0][k1];
4510 c2 = buffertmp[0][k2];
4511 if ((c1 != '-' && c2 != '-') || (c1 == '-' && c2 == '-')) {
4512 if (c1 != '-' && c2 != '-' && c1 != c2) {
4513 /**
4514 WriteLog ("ERROR %d [%c] %d [%c] %c%c%c%c%c %c%c%c%c%c", k1, c1, k2, c2, c1,bufferin[0][k1+1], bufferin[0][k1+2], bufferin[0][k1+3], bufferin[0][k1+4], c2,buffertmp[0][k2+1], buffertmp[0][k2+2], buffertmp[0][k2+3], buffertmp[0][k2+4]);
4515 **/
4516 break;
4517 }
4518 for (j = 0; j < curnseq; ++j) {
4519 bufferout[j][k3] = bufferin[j][k1];
4520 }
4521 for (j = 1; j < 2; ++j) {
4522 bufferout[curnseq + j -1][k3] = buffertmp[j][k2];
4523 }
4524 if (bufferout[0][k3] !='-') letter++;
4525 /***
4526 WriteLog ("%d %d>%c ", (int)k3, (int)letter, c2);
4527 for (j=0; j<curnseq + 1; j++) WriteLog ("%c", bufferout[j][k3]);
4528 WriteLog ("\n");
4529 ***/
4530 k1++;
4531 k2++;
4532 k3++;
4533 }
4534 else if (c1 == '-' && c2 != '-') {
4535 for (j = 0; j < curnseq; ++j) {
4536 bufferout[j][k3] = bufferin[j][k1];
4537 }
4538 if (bufferout[0][k3] !='-') letter++;
4539 /***
4540 WriteLog ("%d %d*%c ", (int)k3, (int)letter, c2);
4541 for (j=0; j<curnseq + 1; j++) WriteLog ("%c", bufferout[j][k3]);
4542 WriteLog ("\n");
4543 ***/
4544 k1++;
4545 k3++;
4546 }
4547 else if (c1 != '-' && c2 == '-') {
4548 for (j = 1; j < 2; ++j) {
4549 bufferout[curnseq + j -1][k3] = buffertmp[j][k2];
4550 }
4551 if (bufferout[0][k3] !='-') letter++;
4552 /***
4553 WriteLog ("%d %d<%c ", (int)k3, (int)letter,c2);
4554 for (j=0; j<curnseq + 1; j++) WriteLog ("%c", bufferout[j][k3]);
4555 WriteLog ("\n");
4556 ***/
4557 k2++;
4558 k3++;
4559 }
4560 }
4561 if (k3 > 0) {
4562 if (k3 > max_lens)
4563 max_lens = k3;
4564 for (j=0; j < curnseq ; j++) {
4565 for (k=0; k<step; k++) {
4566 bufferin[j][k] = bufferout[j][k];
4567 }
4568 }
4569 str = (CharPtr)MemNew ((size_t) ((step+10) * sizeof(Char)));
4570 bufferin[j] = str;
4571 for (k=0; k<step; k++) {
4572 bufferin[j][k] = bufferout[j][k] ;
4573 }
4574 curnseq++;
4575 }
4576 }
4577 }
4578 }
4579 MemFree (buffertmp[0]);
4580 MemFree (buffertmp[1]);
4581 for (j=0; j < curnseq ; j++)
4582 MemFree (bufferout[j]);
4583
4584 vnp = NULL;
4585 for (j=0; j < curnseq; j++) {
4586 ValNodeAddPointer (&vnp, 0, (Pointer)(bufferin[j]));
4587 }
4588 sap = LocalAlignToSeqAnnotDimn (vnp, siphead, vnpfrom, curnseq, max_lens, vnpstrand, TRUE);
4589 sipcur = siphead;
4590 while (sipcur) {
4591 siptmp = sipcur->next;
4592 SeqIdFree (sipcur);
4593 sipcur = siptmp;
4594 }
4595 ValNodeFreeData (vnp);
4596 ValNodeFree (vnpfrom);
4597 ValNodeFree (vnpstrand);
4598 return sap;
4599 }
4600
PairSeqAlign2MultiSeqAlign(SeqAlignPtr salp)4601 NLM_EXTERN SeqAlignPtr PairSeqAlign2MultiSeqAlign (SeqAlignPtr salp)
4602 {
4603 SeqAnnotPtr sap;
4604 SeqAlignPtr salptmp = NULL;
4605
4606 if (salp!=NULL)
4607 {
4608 if (is_dim2seqalign (salp))
4609 {
4610 sap = multseqalign_from_pairseqalign (salp);
4611 if (sap!=NULL && sap->type==2)
4612 {
4613 salptmp = (SeqAlignPtr) sap->data;
4614 sap->data = NULL;
4615 SeqAnnotFree (sap);
4616 /******************************** DO NOT FREE Original SeqAlign anymore
4617 salp = SeqAlignSetFree (salp);
4618 ****************************************/
4619 }
4620 }
4621 }
4622 return salptmp;
4623 }
4624
4625
MultSeqAlignFromPairSeqAlign(Pointer data)4626 NLM_EXTERN Int2 LIBCALLBACK MultSeqAlignFromPairSeqAlign (Pointer data)
4627
4628 {
4629 OMProcControlPtr ompcp;
4630 SeqAnnotPtr sap;
4631 SeqAlignPtr salp;
4632 Uint2 entityID;
4633
4634 ompcp = (OMProcControlPtr) data;
4635 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
4636 switch (ompcp->input_itemtype) {
4637 case OBJ_SEQALIGN :
4638 break;
4639 case 0 :
4640 return OM_MSG_RET_ERROR;
4641 default :
4642 return OM_MSG_RET_ERROR;
4643 }
4644 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
4645 salp = (SeqAlignPtr)ompcp->input_data;
4646 if (salp == NULL) return OM_MSG_RET_ERROR;
4647 if (is_dim2seqalign (salp)) {
4648 SortSeqAlign (&salp);
4649 sap = multseqalign_from_pairseqalign (salp);
4650 } else {
4651 sap = SeqAnnotNew ();
4652 sap->type = 2;
4653 sap->data = (Pointer) salp;
4654 }
4655 if (sap != NULL) {
4656 entityID = ObjMgrRegister (OBJ_SEQANNOT, (Pointer) sap);
4657 if (entityID > 0) {
4658 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
4659 }
4660 }
4661 return OM_MSG_RET_DONE;
4662 }
4663
multseqalign_to_pairseqalign(SeqAlignPtr salp)4664 NLM_EXTERN SeqAlignPtr LIBCALL multseqalign_to_pairseqalign (SeqAlignPtr salp)
4665 {
4666 SeqAlignPtr salpnew =NULL,
4667 salphead=NULL,
4668 nextsalp;
4669 DenseSegPtr dsp =NULL,
4670 newdsp;
4671 SeqIdPtr sip;
4672 Int4Ptr dsplen;
4673 Int4 k, j;
4674 Int2 salp_dim,
4675 dim;
4676
4677 if (salp==NULL)
4678 return NULL;
4679 if (salp->segtype != 2)
4680 return salp;
4681 if (is_dim2seqalign (salp))
4682 return salp;
4683 salp_dim = salp->dim;
4684 dsp = (DenseSegPtr) salp->segs;
4685 salp->segs = NULL;
4686 for (dim=0; dim<salp_dim-1; dim++)
4687 {
4688 newdsp = DenseSegNew ();
4689 if ( newdsp != NULL )
4690 {
4691 newdsp->dim = 2;
4692 newdsp->ids = SeqIdDup (dsp->ids);
4693 for (sip=dsp->ids, j=0; sip!=NULL && j<dim+1; sip=sip->next, j++)
4694 continue;
4695 newdsp->ids->next = SeqIdDup (sip);
4696 newdsp->numseg = dsp->numseg;
4697
4698 newdsp->lens =(Int4Ptr)MemNew((size_t)((newdsp->numseg + 2) * sizeof (Int4)));
4699 for (j = 0, dsplen = dsp->lens; j < newdsp->numseg; j++, dsplen++ )
4700 newdsp->lens [j] = *dsplen;
4701
4702 newdsp->starts=(Int4Ptr)MemNew((size_t)((2 *newdsp->numseg +4) * sizeof (Int4)));
4703 for (k = 0; k < newdsp->numseg; k++) {
4704 for (j=0; j<dsp->dim; j++) {
4705 if (j==0)
4706 newdsp->starts[k*newdsp->dim]=dsp->starts[k*dsp->dim];
4707 if (j==dim+1)
4708 newdsp->starts[k*newdsp->dim+1]=dsp->starts[k*dsp->dim+j];
4709 }
4710 }
4711
4712 if (dsp->strands != NULL) {
4713 newdsp->strands =(Uint1Ptr)MemNew((size_t)((2 *newdsp->numseg+4)*sizeof (Uint1)));
4714 for (k = 0; k < newdsp->numseg; k++) {
4715 for (j=0; j<dsp->dim; j++) {
4716 if (j==0)
4717 newdsp->starts[k*newdsp->dim]=dsp->starts[k*dsp->dim];
4718 if (j==dim+1)
4719 newdsp->starts[k*newdsp->dim+1]=dsp->starts[k*dsp->dim+j];
4720 }
4721 }
4722 }
4723 if (salphead==NULL) {
4724 salp->type = 3;
4725 salp->segtype = 2;
4726 salp->dim = 2;
4727 salp->score = NULL;
4728 salp->segs = (Pointer) newdsp;
4729 salphead= salp;
4730 nextsalp = salp;
4731 }
4732 else {
4733 salpnew = SeqAlignNew();
4734 if (salpnew) {
4735 salpnew->type = 3;
4736 salpnew->segtype = 2;
4737 salpnew->dim = 2;
4738 salpnew->score = NULL;
4739 salpnew->segs = (Pointer) newdsp;
4740 salpnew = Compact (salpnew);
4741 nextsalp->next = salpnew;
4742 nextsalp = salpnew;
4743 }
4744 }
4745 }
4746 }
4747 return salphead;
4748 }
4749
4750