1 /* $Id: salpedit.c,v 6.33 2007/05/07 13:32:02 kans Exp $ */
2 #include <ncbi.h>
3 #include <ncbimisc.h> /* ValNodeLen */
4 #include <sequtil.h> /* for GetScoreAndEvalue, SeqIdDupList */
5 #include <salpedit.h>
6 #include <salpacc.h>
7 #include <seqport.h>
8 #include <salsap.h>
9
10 #define MIN_DIAG_LEN 20 /*minimal amount of overlap required for build_overlap_list<-MergeTwoAlignList */
11
12 static Boolean DenseSegInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr dsp, Boolean merge, BoolPtr seq_insert);
13 static SeqAlignPtr AttachSeqInAlign(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head);
14
OrderInt4(Int4Ptr x,Int4Ptr y)15 static void OrderInt4(Int4Ptr x, Int4Ptr y) /* replace jzmisc: swap */
16 {
17 Int4 temp;
18
19 if((*x) > (*y)){
20 temp = *x;
21 *x = *y;
22 *y = temp;
23 }
24 }
25
26
27 /*******************************************
28 ***
29 *** SeqAlignBioseqDeleteById
30 *** if DenseDiag: delete the SeqAlign
31 *** if DenseSeg: delete the SeqId, the starts positions,
32 *** and the corresponding strands
33 ***
34 ********************************************/
35
SeqAlignBioseqDeleteById(SeqAlignPtr salphead,SeqIdPtr sip)36 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignBioseqDeleteById (SeqAlignPtr salphead, SeqIdPtr sip)
37 {
38 SeqAlignPtr salp,
39 presalp = NULL,
40 nextsalp;
41 SeqIdPtr presip, nextsip, tmpsip;
42 DenseSegPtr dsp;
43 Int4 j, k;
44 Int2 index = 0;
45
46 salp = salphead;
47 while (salp!= NULL)
48 {
49 nextsalp = salp->next;
50 tmpsip =SeqIdPtrFromSeqAlign (salp);
51 if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 0)
52 {
53 if (salp->segtype == 1 || salp->dim == 2)
54 {
55 if (salphead==NULL)
56 salphead = nextsalp;
57 if (presalp!=NULL)
58 presalp->next = nextsalp;
59 salp->next = NULL;
60 salp = SeqAlignFree (salp);
61 if (presalp!=NULL)
62 salp = presalp;
63 }
64 else if (salp->segtype == 2)
65 {
66 dsp = (DenseSegPtr) salp->segs;
67 presip = NULL;
68 while (tmpsip!=NULL)
69 {
70 nextsip = tmpsip->next;
71 if (SeqIdForSameBioseq (tmpsip, sip))
72 {
73 if (presip==NULL) {
74 dsp->ids = nextsip;
75 } else {
76 presip->next = nextsip;
77 }
78 tmpsip->next = NULL;
79 SeqIdFree (tmpsip);
80 k=0;
81 for (j=0; j<dsp->dim*dsp->numseg; j++) {
82 if (((j-index) % (dsp->dim))!=0) {
83 dsp->starts[k] = dsp->starts[j];
84 k++;
85 }
86 }
87 k=0;
88 if (dsp->strands) {
89 for (j=0; j<dsp->dim*dsp->numseg; j++) {
90 if (((j-index) % (dsp->dim))!=0) {
91 dsp->strands[k] = dsp->strands[j];
92 k++;
93 }
94 }
95 }
96 dsp->dim--;
97 salp->dim--;
98 }
99 else
100 presip = tmpsip;
101 tmpsip = nextsip;
102 index++;
103 }
104 }
105 }
106 presalp = salp;
107 salp = nextsalp;
108 }
109 return salphead;
110 }
111
SeqAlignBioseqDeleteByIdCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)112 static void SeqAlignBioseqDeleteByIdCallback (SeqEntryPtr sep, Pointer mydata,
113 Int4 index, Int2 indent)
114 {
115 BioseqPtr bsp;
116 BioseqSetPtr bssp;
117 SeqAlignPtr salp,
118 salptmp;
119 SeqIdPtr sip;
120
121 sip = (SeqIdPtr)(mydata);
122 if (sip!=NULL && sep != NULL && sep->data.ptrvalue) {
123 if (IS_Bioseq(sep)) {
124 bsp = (BioseqPtr) sep->data.ptrvalue;
125 if (bsp!=NULL) {
126 salp = is_salp_in_sap (bsp->annot, 2);
127 if (salp!=NULL) {
128 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
129 salptmp = SeqAlignBioseqDeleteById (salp, sip);
130 }
131 }
132 }
133 else if(IS_Bioseq_set(sep)) {
134 bssp = (BioseqSetPtr)sep->data.ptrvalue;
135 if (bssp!=NULL) {
136 salp = is_salp_in_sap (bssp->annot, 2);
137 if (salp!=NULL) {
138 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
139 salptmp = SeqAlignBioseqDeleteById (salp, sip);
140 }
141 }
142 }
143 }
144 }
145
SeqAlignBioseqDeleteByIdFromSeqEntry(SeqEntryPtr sep,SeqIdPtr sip)146 NLM_EXTERN Pointer LIBCALL SeqAlignBioseqDeleteByIdFromSeqEntry (SeqEntryPtr sep, SeqIdPtr sip)
147 {
148 SeqEntryExplore (sep, (Pointer)sip, SeqAlignBioseqDeleteByIdCallback);
149 return sep;
150 }
151
152 /**************************************************************************
153 ***
154 * Functions for editing a Seq-align object
155 *
156 ***************************************************************************
157 ***/
SeqAlignInsert(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,SeqAlignPtr head,Boolean merge,Boolean add_insertion)158 static SeqAlignPtr SeqAlignInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head, Boolean merge, Boolean add_insertion)
159 {
160 DenseSegPtr dsp;
161 DenseDiagPtr ddp;
162 StdSegPtr ssp;
163 Boolean seq_insert;
164 BioseqPtr bsp;
165 SeqAlignPtr align;
166
167 align = head;
168 seq_insert = FALSE;
169 while(align)
170 {
171 switch(align->segtype)
172 {
173 case 0:
174 break;
175 case 1:
176 ddp = (DenseDiagPtr)(align->segs);
177 break;
178 case 2:
179 dsp = (DenseSegPtr)(align->segs);
180 DenseSegInsert(from_id, from, to, strand, to_id, pos, dsp, merge, &seq_insert);
181 break;
182 case 3:
183 ssp = (StdSegPtr)(align->segs);
184 break;
185 default:
186 break;
187 }
188 align = align->next;
189 }
190
191 /*sequence is not inserted*/
192 if(!seq_insert && add_insertion)
193 {
194 bsp = BioseqFind(from_id);
195 if(bsp->repr != Seq_repr_seg)
196 head= AttachSeqInAlign(from_id, from, to, strand, to_id, pos, head);
197 }
198 return head;
199
200 }
201
get_t_order(SeqIdPtr ids,SeqIdPtr t_id)202 static Int2 get_t_order(SeqIdPtr ids, SeqIdPtr t_id)
203 {
204 Int2 t_order = 0;
205
206 if(t_id == NULL)
207 return -1;
208 while(ids)
209 {
210 if(SeqIdForSameBioseq(t_id, ids))
211 return t_order;
212 ++t_order;
213 ids = ids->next;
214 }
215 return -1;
216
217 }
218
219
220
get_seg(ValNodePtr v_starts,Int2 dim,Int4Ptr starts)221 static Boolean get_seg(ValNodePtr v_starts, Int2 dim, Int4Ptr starts)
222 {
223 Int2 i;
224
225 for(i=0; i<dim && v_starts != NULL; ++i, v_starts = v_starts->next)
226 starts[i] = v_starts->data.intvalue;
227 return (i == dim);
228 }
229
230
Free_node(ValNodePtr v_starts,ValNodePtr pv_starts,Int2 dim)231 static ValNodePtr Free_node(ValNodePtr v_starts, ValNodePtr pv_starts, Int2 dim)
232 {
233 Int2 i;
234 ValNodePtr next = NULL;
235
236 for(i =0; i<dim; ++i)
237 {
238 next= v_starts->next;
239 v_starts->next = NULL;
240 ValNodeFree(v_starts);
241 v_starts = next;
242 }
243
244 while(dim > 1)
245 {
246 pv_starts = pv_starts->next;
247 --dim;
248 }
249 pv_starts->next = next;
250 return next;
251
252 }
253
is_gap_segment(Int4Ptr c_starts,Int2 dim)254 static Boolean is_gap_segment(Int4Ptr c_starts, Int2 dim)
255 {
256 Int2 i;
257 Int2 numgap = 0;
258
259 for(i =0; i<dim; ++i)
260 {
261 if(c_starts[i] == -1)
262 ++numgap;
263 }
264 return (numgap >= (dim -1));
265 }
266
mod_prev_start(ValNodePtr p_starts,Int4Ptr c_starts,Int4Ptr c_strands,Int2 dim)267 static void mod_prev_start(ValNodePtr p_starts, Int4Ptr c_starts, Int4Ptr c_strands, Int2 dim)
268 {
269 Int2 i;
270
271 for(i =0; i<dim; ++i)
272 {
273 if(c_strands[i] == (Int4)Seq_strand_minus)
274 p_starts->data.intvalue = c_starts[i];
275 p_starts = p_starts->next;
276 }
277 }
278
279 /****************************************************************************
280 ***
281 * return TRUE if needs another round of modification
282 *
283 *****************************************************************************
284 ***/
mod_alignment(ValNodePtr v_starts,ValNodePtr v_strands,ValNodePtr v_lens,Int2 dim,Int2 m_order,Int2Ptr n_segs)285 static Boolean mod_alignment(ValNodePtr v_starts, ValNodePtr v_strands, ValNodePtr v_lens, Int2 dim, Int2 m_order, Int2Ptr n_segs)
286 {
287 ValNodePtr pv_starts, pv_lens, pv_strands;
288 Boolean has_prev;
289 Int4 c_len, p_len=0;
290 Int4 p_start, c_start;
291 Int2 i;
292 Boolean retval;
293 Int4Ptr cur_starts, prev_starts;
294 Int4Ptr cur_strands, prev_strands;
295 Boolean merge;
296
297 retval = FALSE;
298 cur_starts = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
299 prev_starts = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
300 cur_strands = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
301 prev_strands = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
302
303 has_prev = FALSE;
304 pv_starts = NULL;
305 pv_strands = NULL;
306 pv_lens = NULL;
307 while(v_starts && v_strands && v_lens)
308 {
309 get_seg(v_starts, dim, cur_starts);
310 get_seg(v_strands, dim, cur_strands);
311 c_len = v_lens->data.intvalue;
312 merge = FALSE;
313
314 if(has_prev) /*check if the two can be merged*/
315 {
316 merge = TRUE;
317 for(i =0; i<dim; ++i)
318 {
319 p_start = prev_starts[i];
320 c_start = cur_starts[i];
321 if(cur_strands[i] != prev_strands[i])
322 {
323 merge = FALSE;
324 break;
325 }
326 if(p_start == -1 && c_start != -1)
327 {
328 merge = FALSE;
329 break;
330 }
331 if(p_start != -1 && c_start == -1)
332 {
333 merge = FALSE;
334 break;
335 }
336 if(p_start != -1 && c_start != -1)
337 {
338 if(cur_strands[i] == (Int4)Seq_strand_minus)
339 {
340 if(c_start + c_len != p_start)
341 {
342 merge = FALSE;
343 break;
344 }
345 }
346 else
347 {
348 if(p_start + p_len != c_start)
349 {
350 merge = FALSE;
351 break;
352 }
353 }
354 }
355 }
356 }
357 if(merge)
358 {
359 p_len += c_len;
360 pv_lens->data.intvalue = p_len;
361 mod_prev_start(pv_starts, cur_starts, cur_strands, dim);
362 v_starts = Free_node(v_starts, pv_starts, dim);
363 v_strands = Free_node(v_strands, pv_strands, dim);
364 v_lens = Free_node(v_lens, pv_lens, 1);
365 -- (*n_segs);
366 retval = TRUE;
367 }
368 else
369 {
370 pv_starts = v_starts;
371 pv_strands = v_strands;
372 pv_lens = v_lens;
373 MemCopy(prev_starts, cur_starts, (size_t)dim * sizeof(Int4));
374 MemCopy(prev_strands, cur_strands, (size_t)dim * sizeof(Int4));
375 p_len = c_len;
376 for(i =0; i<dim; ++i)
377 {
378 v_starts = v_starts->next;
379 v_strands = v_strands->next;
380 }
381 v_lens = v_lens->next;
382 has_prev = TRUE;
383
384 }
385
386 }
387
388 MemFree(prev_starts);
389 MemFree(cur_starts);
390 MemFree(prev_strands);
391 MemFree(cur_strands);
392 return retval;
393
394 }
395
load_new_data(Int4Ptr starts,Uint1Ptr strands,Int4Ptr lens,ValNodePtr v_starts,ValNodePtr v_strands,ValNodePtr v_lens,Int2 dim,Int2 cur_seg)396 static void load_new_data(Int4Ptr starts, Uint1Ptr strands, Int4Ptr lens, ValNodePtr v_starts, ValNodePtr v_strands, ValNodePtr v_lens, Int2 dim, Int2 cur_seg)
397 {
398 Int2 i, j;
399
400 for (i =0; i<cur_seg; ++i)
401 {
402 for(j =0; j<dim; ++j)
403 {
404 starts[i*dim+j] = v_starts->data.intvalue;
405 strands[i*dim+j] = (Uint1)(v_strands->data.intvalue);
406 v_starts = v_starts->next;
407 v_strands = v_strands->next;
408 }
409 lens[i] = v_lens->data.intvalue;
410 v_lens = v_lens->next;
411 }
412
413 }
414
415
get_nth_id(SeqIdPtr ids,Int2 order)416 static SeqIdPtr get_nth_id(SeqIdPtr ids, Int2 order)
417 {
418 Int2 i;
419
420 i =0;
421 while(ids){
422 if(order == i)
423 return ids;
424 ++i;
425 ids = ids->next;
426 }
427
428 return NULL;
429 }
430
make_dsp(ValNodePtr v_starts,ValNodePtr v_lens,ValNodePtr v_strands,Int2 m_order,Int2 dim,DenseSegPtr prev,Int2 cur_seg,Boolean merge,SeqIdPtr m_sip,SeqIdPtr s_sip)431 static DenseSegPtr make_dsp(ValNodePtr v_starts, ValNodePtr v_lens, ValNodePtr v_strands, Int2 m_order, Int2 dim, DenseSegPtr prev, Int2 cur_seg, Boolean merge, SeqIdPtr m_sip, SeqIdPtr s_sip)
432 {
433 Int4Ptr starts, lens;
434 Uint1Ptr strands;
435 DenseSegPtr dsp;
436
437 /*while(merge)*/
438 if(merge)
439 merge = mod_alignment(v_starts, v_strands, v_lens, dim, m_order, &cur_seg);
440 starts = (Int4Ptr) MemNew((size_t)(dim* cur_seg) * sizeof(Int4));
441 lens= (Int4Ptr) MemNew((size_t)(cur_seg) * sizeof(Int4));
442 strands= (Uint1Ptr) MemNew((size_t)(dim * cur_seg) * sizeof(Uint1));
443
444 load_new_data(starts, strands, lens, v_starts, v_strands, v_lens, dim, cur_seg);
445 ValNodeFree(v_starts);
446 ValNodeFree(v_strands);
447 ValNodeFree(v_lens);
448 if(prev)
449 {
450 MemFree(prev->starts);
451 MemFree(prev->lens);
452 MemFree(prev->strands);
453 prev->numseg = cur_seg;
454 prev->starts = starts;
455 prev->strands = strands;
456 prev->lens = lens;
457 return prev;
458 }
459 else
460 {
461 if(m_sip == NULL || s_sip == NULL)
462 {
463 Message(MSG_ERROR, "Fail to get the new DenseSeg");
464 MemFree(starts);
465 MemFree(lens);
466 MemFree(strands);
467 return NULL;
468 }
469 dsp = DenseSegNew();
470 dsp->dim = dim;
471 dsp->ids = SeqIdDup(m_sip);
472 dsp->ids->next = SeqIdDup(s_sip);
473 dsp->numseg = cur_seg;
474 dsp->starts = starts;
475 dsp->lens = lens;
476 dsp->strands = strands;
477 return dsp;
478 }
479
480 }
481
482
DenseSegInsert(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,DenseSegPtr dsp,Boolean merge,BoolPtr seq_insert)483 static Boolean DenseSegInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr dsp, Boolean merge, BoolPtr seq_insert)
484 {
485 Int2 cur_seg, i, j;
486 Int2 t_order; /*the order of target sequence*/
487 Int4 t_start, t_stop, t_max;
488 Int4 cur_start;
489 Int2 dim;
490 Int4Ptr starts, lens;
491 Uint1 cur_strand;
492 Uint1Ptr strands;
493 ValNodePtr v_starts, v_lens, v_strands;
494 SeqIdPtr c_id;
495 Int4 ins_len;
496 Boolean add_insert_seg;
497 Boolean has_source_id;
498 Int4 gap_size, s_gap_size;
499 Int4 s_last, t_last, s_first, t_first;
500 Int2 s_order = -1;
501
502
503
504 if(pos <0)
505 return FALSE;
506 t_max = 0;
507 t_order = get_t_order(dsp->ids, to_id);
508 if(t_order == -1)
509 return FALSE;
510 dim = dsp->dim;
511 starts = dsp->starts;
512 lens = dsp->lens;
513 strands = dsp->strands;
514
515 cur_seg = 0;
516 v_starts = NULL;
517 v_lens = NULL;
518 v_strands = NULL;
519 ins_len = to - from +1;
520 has_source_id = FALSE;
521 for(c_id = dsp->ids, i =0; c_id != NULL; c_id = c_id->next, ++i)
522 {
523 if(SeqIdMatch(c_id, from_id))
524 {
525 s_order = i;
526 has_source_id = TRUE;
527 break;
528 }
529 }
530
531
532 /*check if it can be inserted before the start of the alignment*/
533 gap_size = -1;
534 s_gap_size = -1;
535 if(*seq_insert == FALSE && has_source_id)
536 {
537 t_first = dsp->starts[t_order];
538 cur_strand = dsp->strands[t_order];
539 if(t_first != -1)
540 {
541 if(cur_strand == Seq_strand_minus)
542 {
543 t_first += dsp->lens[0];
544 if(t_first <= pos)
545 gap_size = pos - t_first;
546 }
547 else /*plus strand*/
548 {
549 if(t_first >=pos )
550 gap_size = t_first - pos;
551 }
552 }
553
554 s_first = dsp->starts[s_order];
555 cur_strand = dsp->strands[s_order];
556 if(s_first != -1)
557 {
558
559 if(cur_strand == Seq_strand_minus)
560 {
561 s_first += dsp->lens[0];
562 if(s_first <= from)
563 s_gap_size = from - s_first;
564 }
565 else
566 {
567 if(s_first > 0)
568 {
569 --s_first;
570 if(s_first >=to)
571 s_gap_size = s_first - to;
572 }
573 }
574 }
575 if(gap_size >= 0 && s_gap_size >= 0 && gap_size == s_gap_size)
576 {
577 for(j = 0; j<dim; ++j)
578 {
579 cur_start = -1;
580 cur_strand = dsp->strands[dim*(dsp->numseg-1) + j];
581 if(j == t_order)
582 {
583 cur_start = pos;
584 }
585 if(j == s_order)
586 {
587 if(cur_strand == Seq_strand_minus)
588 cur_start = from - gap_size;
589 else
590 cur_start = from;
591 }
592 ValNodeAddInt(&v_starts, 0, cur_start);
593 ValNodeAddInt(&v_strands, 0, cur_strand);
594 }
595 ValNodeAddInt(&v_lens, 0, (ins_len + gap_size));
596 *seq_insert = TRUE;
597 ++cur_seg;
598 }
599 }
600
601
602
603 for(i =0; i<dsp->numseg; ++i)
604 {
605 t_start = starts[dim*i +t_order];
606 if(t_start != -1)
607 {
608 t_stop = t_start + lens[i] -1;
609 t_max = MAX(t_stop, t_max);
610
611 /*insertion is downstrems of the current segment*/
612 /*copy everything*/
613 if(pos > t_stop)
614 { /*downstream insertions*/
615 for(j=0; j<dim; ++j)
616 {
617 cur_start = starts[dim*i + j];
618 cur_strand = (strands[dim*i + j]);
619 ValNodeAddInt(&v_starts, 0, cur_start);
620 ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
621 }
622 ValNodeAddInt(&v_lens, 0, lens[i]);
623 ++cur_seg;
624 }
625
626 /*insertion is upstream, add offset*/
627 if(pos < t_start)
628 {
629 for(j=0; j<dim; ++j)
630 {
631 cur_start = starts[dim*i + j];
632 cur_strand = (strands[dim*i + j]);
633 if(j == t_order)
634 cur_start += ins_len;
635 ValNodeAddInt(&v_starts, 0, cur_start);
636 ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
637 }
638 ValNodeAddInt(&v_lens, 0, lens[i]);
639 ++cur_seg;
640 }
641
642 /*insert within the current segment*/
643 if(pos >= t_start && pos <= t_stop) /*insert within*/
644 {
645 /*if the segment is broken, add the first half*/
646 if(pos > t_start)
647 {
648 for(j = 0; j<dim; ++j)
649 {
650 cur_start = starts[dim*i+j];
651 cur_strand = strands[dim*i + j];
652 ValNodeAddInt(&v_starts, 0, cur_start);
653 ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
654 }
655 ValNodeAddInt(&v_lens, 0, (pos - t_start));
656 ++cur_seg;
657 }
658
659 /*add the inserted segment*/
660 if(i == 0)
661 add_insert_seg = (has_source_id && *seq_insert == FALSE);
662 else
663 add_insert_seg = TRUE;
664
665 if(add_insert_seg)
666 {
667 for(j = 0; j<dim ; ++j)
668 {
669 cur_start = -1;
670 cur_strand = strands[dim*i + j];
671 if(has_source_id && j == s_order)
672 {
673 *seq_insert = TRUE;
674 cur_start = from;
675 cur_strand = strand;
676 }
677 if(j == t_order)
678 cur_start = pos;
679 ValNodeAddInt(&v_starts, 0, cur_start);
680 ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
681 }
682 ValNodeAddInt(&v_lens, 0, ins_len);
683 ++cur_seg;
684 }
685
686 /*add the leftover segment*/
687 for(j=0; j<dim; ++j)
688 {
689 cur_start = starts[dim*i+j];
690 cur_strand = strands[dim*i + j];
691 if(j == t_order)
692 cur_start = pos + ins_len;
693 ValNodeAddInt(&v_starts, 0, cur_start);
694 ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
695 }
696 ValNodeAddInt(&v_lens, 0, (lens[i] - (pos - t_start)));
697 ++cur_seg;
698 }
699 }
700
701 else /*gaps in the target sequence*/
702 {
703 for(j=0; j<dim; ++j)
704 {
705 cur_start = starts[dim*i + j];
706 cur_strand = (strands[dim*i + j]);
707 ValNodeAddInt(&v_starts, 0, cur_start);
708 ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
709 }
710 ValNodeAddInt(&v_lens, 0, lens[i]);
711 ++cur_seg;
712 }
713 }
714
715
716 /*attach to the end of the sequence*/
717 gap_size = -1;
718 s_gap_size = -1;
719 if(*seq_insert == FALSE && has_source_id)
720 {
721 t_last = dsp->starts[dim*(dsp->numseg-1) +t_order];
722 cur_strand = dsp->strands[dim*(dsp->numseg-1) + t_order];
723 if(t_last != -1)
724 {
725 if(cur_strand == Seq_strand_minus)
726 {
727 t_last -= 1;
728 if(pos <= t_last)
729 gap_size = t_last - pos;
730 }
731 else /*plus strand*/
732 {
733 t_last += dsp->lens[dsp->numseg-1];
734 if(pos >= t_last)
735 gap_size = pos - t_last;
736 }
737 }
738
739 s_last = dsp->starts[dim*(dsp->numseg-1) +s_order];
740 cur_strand = dsp->strands[dim*(dsp->numseg-1) + s_order];
741 if(s_last != -1)
742 {
743
744 if(cur_strand == Seq_strand_minus)
745 {
746 s_last -= 1;
747 s_gap_size = s_last - to;
748 }
749 else
750 {
751 s_last += dsp->lens[dsp->numseg-1];
752 s_gap_size = from - s_last;
753 }
754 }
755 if(gap_size >=0 && s_gap_size >=0 && gap_size == s_gap_size)
756 {
757 for(j = 0; j<dim; ++j)
758 {
759 cur_start = -1;
760 cur_strand = dsp->strands[dim*(dsp->numseg-1) + j];
761 if(j == t_order)
762 {
763 if(cur_strand == Seq_strand_minus)
764 cur_start = t_last - (ins_len + gap_size -1);
765 else
766 cur_start = t_last;
767 }
768 if(j == s_order)
769 {
770 if(cur_strand == Seq_strand_minus)
771 cur_start = from;
772 else
773 cur_start = from - gap_size;
774 }
775 ValNodeAddInt(&v_starts, 0, cur_start);
776 ValNodeAddInt(&v_strands, 0, cur_strand);
777 }
778 ValNodeAddInt(&v_lens, 0, (ins_len + gap_size));
779 *seq_insert = TRUE;
780 ++cur_seg;
781 }
782 }
783
784
785 dsp = make_dsp(v_starts, v_lens, v_strands, t_order, dim, dsp, cur_seg, merge, NULL, NULL);
786 return TRUE;
787 }
788
789
store_data(Int4 val,ValNodePtr head)790 static ValNodePtr store_data(Int4 val, ValNodePtr head)
791 {
792 ValNodeAddInt(&head, 0, val);
793 return head;
794 }
795
store_reverse_data(Int4 val,ValNodePtr head)796 static ValNodePtr store_reverse_data(Int4 val, ValNodePtr head)
797 {
798 ValNodePtr new_vnp;
799
800 new_vnp = ValNodeNew(NULL);
801 new_vnp->data.intvalue = val;
802
803 new_vnp->next = head;
804 return new_vnp;
805 }
806
find_last_node(ValNodePtr head)807 static ValNodePtr find_last_node(ValNodePtr head)
808 {
809 if(head != NULL)
810 {
811 while(head->next != NULL)
812 head = head->next;
813 return head;
814 }
815 return NULL;
816
817 }
818
link_to_end(ValNodePtr new_vnp,ValNodePtr head)819 static ValNodePtr link_to_end(ValNodePtr new_vnp, ValNodePtr head)
820 {
821 ValNodePtr l_node;
822
823 if(head == NULL)
824 return new_vnp;
825 l_node = find_last_node(head);
826 l_node->next = new_vnp;
827 return head;
828 }
829
find_nth_node(ValNodePtr vnp,Int2 order)830 static ValNodePtr find_nth_node(ValNodePtr vnp, Int2 order)
831 {
832 Int2 i;
833
834 i =0;
835 while(vnp ){
836 if(i == order)
837 return vnp;
838 vnp = vnp->next;
839 ++i;
840 }
841 return NULL;
842 }
843
get_last_pos(ValNodePtr v_starts,ValNodePtr v_lens)844 static Int4 get_last_pos(ValNodePtr v_starts, ValNodePtr v_lens)
845 {
846 ValNodePtr prev;
847 Int4 start, pos;
848
849 prev = NULL;
850 while(v_starts->next != NULL)
851 {
852 prev = v_starts;
853 v_starts = v_starts->next;
854 }
855 if(prev == NULL)
856 return -1;
857 start = prev->data.intvalue;
858 while(v_lens->next != NULL)
859 v_lens = v_lens->next;
860 pos = start + v_lens->data.intvalue -1;
861 return pos;
862 }
863
864
865
866
867 /*************************************************************************
868 ***
869 * only consider the master sequence is on plus strand
870 * and m_order is always == 0
871 *
872 **************************************************************************
873 ***/
merge_two_seg(ValNodePtr PNTR v_starts,ValNodePtr vs_starts,ValNodePtr PNTR v_lens,ValNodePtr vs_lens,ValNodePtr PNTR v_strands,ValNodePtr vs_strands,Int4 pos,Uint1 s_strand,Int2 numseg)874 static Boolean merge_two_seg(ValNodePtr PNTR v_starts, ValNodePtr vs_starts, ValNodePtr PNTR v_lens, ValNodePtr vs_lens, ValNodePtr PNTR v_strands, ValNodePtr vs_strands, Int4 pos, Uint1 s_strand, Int2 numseg)
875 {
876 Int4 gap, last_pos;
877 ValNodePtr l_start, l_len;
878
879 gap = (*v_starts)->data.intvalue - (pos);
880 if(gap >=0)
881 {
882 /*if(gap > 0)
883 {
884 *v_starts = store_reverse_data(-1, *v_starts);
885 *v_starts = store_reverse_data(pos, *v_starts);
886 *v_strands = store_reverse_data(s_strand, *v_strands);
887 *v_strands = store_reverse_data(1, *v_strands);
888 *v_lens = store_reverse_data(gap+1, *v_lens);
889 }*/
890 last_pos = get_last_pos(vs_starts, vs_lens);
891 if(last_pos == -1)
892 return FALSE;
893
894 gap = (*v_starts)->data.intvalue - last_pos -1;
895 if(gap >0)
896 return FALSE;
897 *v_starts = link_to_end(*v_starts, vs_starts);
898 *v_strands = link_to_end(*v_strands, vs_strands);
899 *v_lens = link_to_end(*v_lens, vs_lens);
900 return TRUE;
901 }
902
903 l_start = find_nth_node(*v_starts, 2*(numseg-1));
904 l_len= find_nth_node(*v_lens, (numseg-1));
905 last_pos = l_start->data.intvalue + l_len->data.intvalue -1;
906 gap = pos - last_pos;
907 if(gap >=0)
908 {
909 /*if(gap >0)
910 {
911 *v_starts = store_data(pos, *v_starts);
912 *v_starts = store_data(-1, *v_starts);
913 *v_strands = store_data(1, *v_strands);
914 *v_strands = store_data(s_strand, *v_strands);
915 *v_lens = store_data(gap+1, *v_lens);
916 }*/
917 gap = vs_starts->data.intvalue - last_pos -1;
918 if(gap >0)
919 return FALSE;
920 *v_starts = link_to_end(vs_starts, *v_starts);
921 *v_strands = link_to_end(vs_strands, *v_strands);
922 *v_lens = link_to_end(vs_lens, *v_lens);
923 return TRUE;
924 }
925
926 return FALSE;
927 }
928
929
930 /************************************************************************
931 ***
932 * load the portion of alignment (from, to, strand) in f_dsp to
933 * the interval where to_id points to
934 * from_id has already been inserted into to_id, so the region
935 * can be mapped to the interval on to_id
936 *
937 *************************************************************************
938 ***/
dsp_process(DenseSegPtr f_dsp,Int4 from,Int4 to,Uint1 strand,SeqIdPtr from_id,SeqIdPtr to_id,Int4 pos,ValNodePtr PNTR vs_starts,ValNodePtr PNTR vs_strands,ValNodePtr PNTR vs_lens)939 static Boolean dsp_process(DenseSegPtr f_dsp, Int4 from, Int4 to, Uint1 strand, SeqIdPtr from_id, SeqIdPtr to_id, Int4 pos, ValNodePtr PNTR vs_starts, ValNodePtr PNTR vs_strands, ValNodePtr PNTR vs_lens)
940 {
941 Int4Ptr starts, lens;
942 Uint1Ptr strands;
943 Uint1 i_strand;
944 SeqLocPtr m_loc;
945 Int4 m_start, m_stop, m_pos;
946 Int4 off_start, off_stop;
947 Int4 t_start, t_stop;
948 Int4 s_start, s_len;
949 Int2 m_order, i;
950 Boolean is_found;
951 BioseqPtr tbsp;
952
953
954 if(f_dsp == NULL)
955 return FALSE;
956 tbsp = BioseqFind(to_id);
957 if(tbsp == NULL)
958 return FALSE;
959 starts = f_dsp->starts;
960 lens = f_dsp->lens;
961 strands = f_dsp->strands;
962 m_loc = SeqLocIntNew(0, 0, 1, from_id);
963 m_pos = -1;
964 m_order = get_t_order(f_dsp->ids, from_id);
965 is_found = FALSE;
966 *vs_starts = NULL;
967 *vs_strands = NULL;
968 *vs_lens = NULL;
969
970 for(i = 0; i<f_dsp->numseg; ++i)
971 {
972 m_start = starts[2*i+m_order];
973 if(m_start != -1){
974 m_stop = m_start + lens[i] -1;
975 m_pos = m_stop +1;
976 }
977 else
978 {
979 m_start = m_pos;
980 m_stop = m_pos;
981 }
982 if(m_start != -1 && m_stop != -1)
983 {
984 if(!(m_start > to) || (m_stop < from))
985 {
986 off_start = MAX(0, from-m_start);
987 off_stop = MAX(0, m_stop - to);
988 /*if(tbsp->repr == Seq_repr_seg)
989 {
990 m_start = MAX(m_start, from);
991 m_stop = MIN(m_stop, to);
992 update_seq_loc(m_start, m_stop, strand, m_loc);
993 t_start = GetOffsetInBioseq(m_loc, tbsp, SEQLOC_LEFT_END);
994 t_stop = GetOffsetInBioseq(m_loc, tbsp, SEQLOC_RIGHT_END);
995 }
996 else
997 {*/
998 if(strand == Seq_strand_minus)
999 {
1000 t_start = (pos-1) -(to - from) + MAX((to-m_stop), 0);
1001 t_stop = pos -1 - MAX((m_start-from), 0);
1002 }
1003 else
1004 {
1005 t_start = (pos-1) - (to - from) + MAX((m_start-from), 0);
1006 t_stop = pos-1 - MAX((to - m_stop), 0);
1007 }
1008 /*}*/
1009 if(t_start != -1 && t_stop != -1)
1010 {
1011 OrderInt4(&t_start, &t_stop);
1012 s_start = starts[2*i +1-m_order];
1013 if(s_start != -1)
1014 {
1015 if(strands[2*i] == strands[2*i+1])
1016 s_start += off_start;
1017 else
1018 s_start += off_stop;
1019 }
1020 s_len = lens[i] - (off_start + off_stop);
1021 if(s_len >0)
1022 {
1023 is_found = TRUE;
1024 i_strand = strands[2*i+1-m_order];
1025 if(starts[2*i+m_order] == -1)
1026 t_start = -1;
1027 if(strand != strands[2*i+m_order]){ /*reverse the seg order*/
1028 i_strand = 3 - i_strand;
1029 *vs_starts = store_reverse_data(s_start, *vs_starts);
1030 *vs_starts = store_reverse_data(t_start, *vs_starts);
1031 *vs_strands = store_reverse_data((Int4)i_strand, *vs_strands);
1032 *vs_strands = store_reverse_data(1, *vs_strands);
1033 *vs_lens = store_reverse_data(s_len, *vs_lens);
1034 }
1035 else
1036 {
1037 *vs_starts = store_data(t_start, *vs_starts);
1038 *vs_starts = store_data(s_start, *vs_starts);
1039 *vs_strands = store_data(1, *vs_strands);
1040 *vs_strands = store_data((Int4)i_strand, *vs_strands);
1041 *vs_lens = store_data(s_len, *vs_lens);
1042 }
1043 } /*end of if(s_len>0)*/
1044 }
1045 }
1046 }
1047 }
1048 SeqLocFree(m_loc);
1049 return is_found;
1050
1051 }
1052
1053
get_cur_seg(ValNodePtr lens)1054 static Int2 get_cur_seg(ValNodePtr lens)
1055 {
1056 Int2 i=0;
1057
1058 while(lens)
1059 {
1060 ++i;
1061 lens = lens->next;
1062 }
1063 return i;
1064 }
1065
1066 /************************************************************************
1067 ***
1068 * DenseSegMerge(only for the two-dimentional pairwise alignment
1069 * pos is the position on the merged sequence
1070 *
1071 *************************************************************************
1072 ***/
DenseSegMerge(DenseSegPtr f_dsp,SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,DenseSegPtr t_dsp)1073 static Boolean DenseSegMerge(DenseSegPtr f_dsp, SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr t_dsp)
1074 {
1075 Int2 f_order, t_order;
1076 SeqIdPtr fs_id, ts_id;
1077 Int2 i, j;
1078 ValNodePtr v_starts, v_lens, v_strands, vs_starts, vs_lens, vs_strands;
1079 Int4Ptr starts, lens;
1080 Uint1Ptr strands;
1081 Int2 cur_seg;
1082 Uint1 i_strand;
1083 BioseqPtr t_bsp;
1084
1085 t_order = get_t_order(t_dsp->ids, to_id);
1086 if(t_order == -1)
1087 return FALSE;
1088 f_order = get_t_order(f_dsp->ids, from_id);
1089 if(f_order == -1)
1090 return FALSE;
1091
1092 fs_id = get_nth_id(f_dsp->ids, 1-f_order);
1093 ts_id = get_nth_id(t_dsp->ids, 1-t_order);
1094 if(!SeqIdForSameBioseq(fs_id, ts_id))
1095 return FALSE;
1096
1097 vs_starts =NULL;
1098 vs_strands = NULL;
1099 vs_lens = NULL;
1100 if(!dsp_process(f_dsp, from, to, strand, from_id, to_id, pos, &vs_starts, &vs_strands, &vs_lens)) /*the alignment is not within [from, to]*/
1101 return TRUE;
1102 i_strand = (Uint1)(vs_strands->next->data.intvalue);
1103 t_bsp = BioseqFind(to_id);
1104 if(pos == BioseqGetLen(t_bsp)) /*insertion at the end*/
1105 pos = pos - (to - from +1) -1;
1106
1107 v_starts =NULL;
1108 v_strands = NULL;
1109 v_lens = NULL;
1110 starts = t_dsp->starts;
1111 lens = t_dsp->lens;
1112 strands = t_dsp->strands;
1113 for(i =0; i<t_dsp->numseg; ++i)
1114 {
1115 for(j =0; j<2; ++j)
1116 {
1117 v_starts = store_data(starts[2*i +j], v_starts);
1118 v_strands = store_data(strands[2*i+j], v_strands);
1119 }
1120 v_lens = store_data(lens[i], v_lens);
1121 }
1122
1123 if(!merge_two_seg(&v_starts, vs_starts, &v_lens, vs_lens, &v_strands, vs_strands, pos, i_strand, t_dsp->numseg)) /*fail to brought togather*/
1124 {
1125 ValNodeFree(v_starts);
1126 ValNodeFree(vs_starts);
1127 ValNodeFree(v_lens);
1128 ValNodeFree(vs_lens);
1129 ValNodeFree(v_strands);
1130 ValNodeFree(vs_strands);
1131 return FALSE;
1132 }
1133
1134 cur_seg = get_cur_seg(v_lens);
1135 t_dsp = make_dsp(v_starts, v_lens, v_strands, t_order, 2, t_dsp, cur_seg, TRUE, to_id, fs_id);
1136 return TRUE;
1137 }
1138
make_new_align(DenseSegPtr f_dsp,Int4 from,Int4 to,Uint1 strand,SeqIdPtr from_id,SeqIdPtr to_id,Int4 pos,SeqAlignPtr head)1139 static SeqAlignPtr make_new_align(DenseSegPtr f_dsp, Int4 from, Int4 to, Uint1 strand, SeqIdPtr from_id, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head)
1140 {
1141 ValNodePtr vs_starts, vs_strands, vs_lens;
1142 DenseSegPtr dsp;
1143 Int2 cur_seg;
1144 SeqAlignPtr align;
1145 SeqIdPtr fs_id;
1146
1147 if(!dsp_process(f_dsp, from, to, strand, from_id, to_id, pos, &vs_starts, &vs_strands, &vs_lens)) /*the alignment is not within [from, to]*/
1148 return head;
1149
1150 fs_id = f_dsp->ids->next;
1151 cur_seg = get_cur_seg(vs_lens);
1152 dsp = make_dsp(vs_starts, vs_lens, vs_strands, 0, 2, NULL, cur_seg, TRUE, to_id, fs_id);
1153 if(dsp)
1154 {
1155 align = SeqAlignNew();
1156 align->segtype = 2;
1157 align->segs = dsp;
1158 align->type =3;
1159 if(head == NULL)
1160 head = align;
1161 else
1162 head = SeqAlignLink(align, head);
1163 }
1164 return head;
1165 }
1166
1167
1168
JZSeqAlignMerge(SeqAlignPtr f_align,SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,SeqAlignPtr t_align)1169 static SeqAlignPtr JZSeqAlignMerge(SeqAlignPtr f_align, SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr t_align)
1170 {
1171 SeqAlignPtr new_salp, curr;
1172 DenseSegPtr f_dsp, t_dsp;
1173 Boolean is_found;
1174
1175 if(f_align == NULL)
1176 return t_align;
1177
1178 new_salp = NULL;
1179 while(f_align)
1180 {
1181 if(f_align->segtype == 2)
1182 {
1183 f_dsp = (DenseSegPtr) f_align->segs;
1184 is_found = FALSE;
1185 curr = t_align;
1186 while(curr && !is_found)
1187 {
1188 if(curr->segtype ==2)
1189 {
1190 t_dsp = (DenseSegPtr) curr->segs;
1191 is_found = DenseSegMerge(f_dsp, from_id, from, to, strand, to_id, pos, t_dsp);
1192 }
1193 curr = curr->next;
1194 }
1195
1196 if(!is_found)
1197 new_salp = make_new_align(f_dsp, from, to, strand, from_id, to_id, pos, new_salp);
1198
1199
1200 f_align = f_align->next;
1201 }
1202 }
1203
1204 if(new_salp != NULL){
1205 if(t_align == NULL)
1206 t_align = new_salp;
1207 else
1208 t_align = SeqAlignLink(new_salp, t_align);
1209 }
1210 return t_align;
1211
1212
1213 }
1214
1215
1216
1217 /***************************************************************************
1218 ***
1219 * AttachSeqInAlign(): for a sequence not found in the previous alignment
1220 * data and it is used in producing the alignment, it would be attached
1221 * as a part of the alignment node for the seq-hist of the new sequence
1222 *
1223 ******************************************************************************
1224 ***/
AttachSeqInAlign(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,SeqAlignPtr head)1225 static SeqAlignPtr AttachSeqInAlign(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head)
1226 {
1227 DenseSegPtr dsp;
1228 ValNodePtr v_starts, v_lens, v_strands;
1229 SeqAlignPtr align;
1230
1231 v_starts = NULL;
1232 v_lens = NULL;
1233 v_strands = NULL;
1234
1235
1236 ValNodeAddInt(&v_starts, 0, pos);
1237 ValNodeAddInt(&v_starts, 0, from);
1238 ValNodeAddInt(&v_strands, 0, (Int4)Seq_strand_plus);
1239 ValNodeAddInt(&v_strands, 0, (Int4)strand);
1240 ValNodeAddInt(&v_lens, 0, (to - from +1));
1241 dsp = make_dsp(v_starts, v_lens, v_strands, 0, 2, NULL, 1, TRUE, to_id, from_id);
1242 align = SeqAlignNew();
1243 align->segtype = 2;
1244 align->segs = dsp;
1245 align->type =3;
1246 if(head == NULL)
1247 head = align;
1248 else
1249 head = SeqAlignLink(align, head);
1250 return head;
1251
1252 }
1253
1254
mod_old_align(SeqAlignPtr PNTR old,SeqIdPtr sip)1255 static void mod_old_align(SeqAlignPtr PNTR old, SeqIdPtr sip)
1256 {
1257 SeqAlignPtr curr, prev;
1258 SeqIdPtr c_sip;
1259
1260 prev = NULL;
1261 curr = *old;
1262
1263 while(curr)
1264 {
1265 c_sip = SeqAlignId(curr, 1);
1266 if(SeqIdForSameBioseq(c_sip, sip))
1267 {
1268 if(prev == NULL)
1269 *old= curr->next;
1270 else
1271 prev->next = curr->next;
1272 curr->next = NULL;
1273 SeqAlignFree(curr);
1274 return;
1275 }
1276 prev = curr;
1277 curr = curr->next;
1278 }
1279 }
1280
attach_new_align(SeqAlignPtr new_salp,SeqAlignPtr PNTR old)1281 static void attach_new_align(SeqAlignPtr new_salp, SeqAlignPtr PNTR old) {
1282 SeqIdPtr sip;
1283 SeqAlignPtr align;
1284
1285 if(new_salp == NULL)
1286 return;
1287 align = new_salp;
1288 while(align)
1289 {
1290 sip = SeqAlignId(align, 1);
1291 if(sip)
1292 mod_old_align(old, sip);
1293 align= align->next;
1294 }
1295
1296 if(*old== NULL)
1297 *old = new_salp;
1298 else
1299 (*old) = SeqAlignLink(new_salp, (*old));
1300
1301 }
1302
delete_bad_node(ValNodePtr PNTR head,Uint1 choice)1303 static void delete_bad_node(ValNodePtr PNTR head, Uint1 choice)
1304 {
1305 ValNodePtr prev, next, curr;
1306
1307 curr = *head;
1308 prev = NULL;
1309
1310 while(curr)
1311 {
1312 next = curr->next;
1313 if(curr->choice != choice)
1314 {
1315 if(prev == NULL)
1316 *head = next;
1317 else
1318 prev->next = next;
1319 curr->next = NULL;
1320 ValNodeFree(curr);
1321 }
1322 else
1323 prev = curr;
1324
1325 curr = next;
1326 }
1327 }
count_coverage(BoolPtr used_list,Int4 size)1328 static Int4 count_coverage (BoolPtr used_list, Int4 size)
1329 {
1330 Int4 i;
1331 Int4 count;
1332
1333 count = 0;
1334 for(i = 0; i<size; ++i)
1335 {
1336 if(used_list[i] != 0)
1337 ++count;
1338 }
1339 return count;
1340 }
1341
1342
1343 static Int2 filter_seqid_order;
CompareChainProc(VoidPtr ptr1,VoidPtr ptr2)1344 static int LIBCALLBACK CompareChainProc(VoidPtr ptr1, VoidPtr ptr2)
1345 {
1346 ValNodePtr vnp1;
1347 ValNodePtr vnp2;
1348 DenseDiagPtr ddp1, ddp2;
1349 Int4 start1, stop1, start2, stop2;
1350
1351 start1 = -1;
1352 start2 = -1;
1353 if (ptr1 != NULL && ptr2 != NULL) {
1354 vnp1 = *((ValNodePtr PNTR) ptr1);
1355 vnp2 = *((ValNodePtr PNTR) ptr2);
1356 if (vnp1 != NULL && vnp2 != NULL) {
1357 ddp1 = (DenseDiagPtr) vnp1->data.ptrvalue;
1358 ddp2 = (DenseDiagPtr) vnp2->data.ptrvalue;
1359 start1 = ddp1->starts[filter_seqid_order];
1360 stop1 = start1 + ddp1->len -1;
1361 start2 = ddp2->starts[filter_seqid_order];
1362 stop2 = start2 + ddp2->len -1;
1363 if(stop1> stop2)
1364 return 1;
1365 else if(stop1 < stop2)
1366 return -1;
1367 else if (stop1 == stop2)
1368 {
1369 if(ddp1->len > ddp2->len)
1370 return -1;
1371 else if(ddp1->len < ddp2->len)
1372 return 1;
1373 else
1374 return 0;
1375 }
1376 }
1377 }
1378 return 0;
1379 }
1380
1381
1382 /*
1383 * return 0 for no overlap
1384 * return 1 for _2 overlap too much with _1
1385 * return 2 for _1 overlap too much with _1
1386 */
has_too_much_overlap(Int4 start_1,Int4 stop_1,Int4 start_2,Int4 stop_2)1387 static Uint1 has_too_much_overlap (Int4 start_1, Int4 stop_1, Int4 start_2, Int4 stop_2)
1388 {
1389 Int4 m_start, m_stop;
1390 FloatHi value;
1391 Int4 len_1, len_2;
1392
1393 if(start_2 > stop_1 || stop_2 < start_1) /*no overlap */
1394 return 0;
1395
1396 if(start_2 >= start_1 && stop_2 <= stop_1) /*completely contained within*/
1397 return 1;
1398
1399 if(start_1 >= start_2 && stop_1 <= start_2)
1400 return 2;
1401
1402 m_start = MAX(start_1, start_2);
1403 m_stop = MIN(stop_1, stop_2);
1404
1405 len_1 = stop_1 - start_1 +1 ;
1406 len_2 = stop_2 - start_2 + 1;
1407
1408 value = (FloatHi)(m_stop - m_start + 1)/(FloatHi)(MIN(len_1, len_2));
1409 if(value >= 0.7)
1410 {
1411 if(len_1 >= len_2)
1412 return 1;
1413 else
1414 return 2;
1415 }
1416 else
1417 return 0;
1418 }
1419
1420
1421
convert_ddp_to_array(ValNodePtr ddp_list,Uint1 strand,Int2 s_order,Int4Ptr p_num)1422 static Int4Ptr convert_ddp_to_array (ValNodePtr ddp_list, Uint1 strand, Int2 s_order, Int4Ptr p_num)
1423 {
1424 Int4Ptr val;
1425 Int4 num;
1426 ValNodePtr curr;
1427 DenseDiagPtr ddp;
1428 Int4 i;
1429
1430 num = 0;
1431 for(curr = ddp_list; curr != NULL; curr = curr->next)
1432 {
1433 ++num;
1434 }
1435
1436 val = (Int4Ptr) MemNew((size_t)num * sizeof(Int4));
1437
1438 /*if strand == minus, check the descending order of the stop.
1439 otherwise, check the ascending order of the start */
1440
1441 i = 0;
1442 for(curr = ddp_list; curr != NULL; curr = curr->next)
1443 {
1444 ddp = (DenseDiagPtr) curr->data.ptrvalue;
1445 if(strand == Seq_strand_minus)
1446 val[i++] = ddp->starts[1-s_order] + ddp->len -1;
1447 else
1448 val[i++] = ddp->starts[1-s_order];
1449 }
1450
1451 *p_num = num;
1452 return val;
1453 }
1454
1455
get_score_from_list(ScorePtr scores)1456 static Int4 get_score_from_list (ScorePtr scores)
1457 {
1458 ObjectIdPtr oip;
1459
1460 if(scores == NULL)
1461 return 0;
1462 while(scores)
1463 {
1464 oip = scores->id;
1465 if(oip && StringCmp(oip->str, "score") == 0)
1466 return scores->value.intvalue;
1467 scores = scores->next;
1468 }
1469
1470 return 0;
1471 }
1472
1473
get_score_from_DenseDiag(DenseDiagPtr ddp)1474 static Int4 get_score_from_DenseDiag (DenseDiagPtr ddp)
1475 {
1476 return get_score_from_list (ddp->scores);
1477 }
1478
filter_this_seg(DenseDiagPtr curr,DenseDiagPtr PNTR h_list,Int2 order)1479 static Boolean filter_this_seg (DenseDiagPtr curr, DenseDiagPtr PNTR h_list, Int2 order)
1480 {
1481 Int4 start, stop, t_start, t_stop;
1482 Int4 a_start, a_stop;
1483 Int4 score, t_score;
1484 DenseDiagPtr prev, next, list;
1485 Boolean found;
1486 Int4 len;
1487
1488 start = curr->starts[order];
1489 stop = start + curr->len -1;
1490 score = get_score_from_DenseDiag (curr);
1491 prev = NULL;
1492 list = *h_list;
1493 while(list)
1494 {
1495 next = list->next;
1496 t_start = list->starts[order];
1497 t_stop = t_start + list->len -1;
1498 t_score = get_score_from_DenseDiag (list);
1499
1500 found = FALSE;
1501 if(!(t_start > stop || t_stop < start))
1502 {
1503 a_start = MAX(t_start, start);
1504 a_stop = MIN(t_stop, stop);
1505 len = MAX(t_stop, stop) - MIN(t_start, start);
1506 if((FloatHi)(a_stop - a_start)/(FloatHi)len > 0.80)
1507 {
1508 if(score > t_score)
1509 {
1510 if(prev == NULL)
1511 *h_list = next;
1512 else
1513 prev->next = next;
1514 list->next = NULL;
1515 DenseDiagFree(list);
1516 found = TRUE;
1517 }
1518 }
1519 }
1520
1521 if(!found)
1522 prev = list;
1523 list = next;
1524 }
1525
1526 list = *h_list;
1527 while(list)
1528 {
1529 t_start = list->starts[order];
1530 t_stop = t_start+ list->len -1;
1531 t_score = get_score_from_DenseDiag (list);
1532
1533 if(!(t_start > stop || t_stop < start))
1534 {
1535 a_start = MAX(t_start, start);
1536 a_stop = MIN(t_stop, stop);
1537 if((FloatHi)(a_stop - a_start)/(FloatHi)(stop - start) > 0.50)
1538 {
1539 if(score < t_score)
1540 return TRUE;
1541 }
1542 }
1543 list = list->next;
1544 }
1545
1546 return FALSE;
1547 }
1548
1549
is_same_orientation(Uint1 strand_1,Uint1 strand_2)1550 static Boolean is_same_orientation(Uint1 strand_1, Uint1 strand_2)
1551 {
1552 if(strand_1 == strand_2)
1553 return TRUE;
1554 if(strand_1 == Seq_strand_minus)
1555 return (strand_2 == Seq_strand_minus);
1556 else
1557 return (strand_2 != Seq_strand_minus);
1558 }
1559
filter_wrong_orientation(SeqAlignPtr align,Boolean same_orient)1560 static void filter_wrong_orientation (SeqAlignPtr align, Boolean same_orient)
1561 {
1562 DenseDiagPtr ddp, prev, next;
1563 Boolean del;
1564
1565 ddp = (DenseDiagPtr) align->segs;
1566 prev = NULL;
1567
1568 while(ddp)
1569 {
1570 next = ddp->next;
1571 del = (is_same_orientation(ddp->strands[0], ddp->strands[1]) != same_orient);
1572 if(del)
1573 {
1574 if(prev == NULL)
1575 align->segs = next;
1576 else
1577 prev->next = next;
1578 ddp->next = NULL;
1579 DenseDiagFree(ddp);
1580 }
1581 else
1582 prev = ddp;
1583 ddp = next;
1584 }
1585 }
1586
1587
get_overlap(BoolPtr status,Int4 from,Int4 len,Int4Ptr poverlap)1588 static FloatHi get_overlap(BoolPtr status, Int4 from, Int4 len, Int4Ptr poverlap)
1589 {
1590 Int4 i;
1591 Int4 overlap_len;
1592
1593 overlap_len = 0;
1594 for(i = 0; i<len; ++i)
1595 {
1596 if(status[i+from])
1597 ++overlap_len;
1598 }
1599
1600 *poverlap = overlap_len;
1601 return (FloatHi)overlap_len/(FloatHi)len;
1602 }
1603
1604
is_overlap(BoolPtr status,Int4 from,Int4 len)1605 static Boolean is_overlap(BoolPtr status, Int4 from, Int4 len)
1606 {
1607 Int4 i;
1608 Int4 overlap_len;
1609
1610 overlap_len = 0;
1611 for(i = 0; i<len; ++i)
1612 {
1613 if(status[i+from])
1614 ++overlap_len;
1615 }
1616
1617 if((FloatHi)overlap_len/(FloatHi)len> 0.7)
1618 return TRUE;
1619 else
1620 {
1621 MemSet(status+from, 1, (size_t)(len) * sizeof(Boolean));
1622 return FALSE;
1623 }
1624 }
1625
1626
set_ddp_status(ValNodePtr ddp_list,Int4 first_len,Int4 second_len)1627 static void set_ddp_status(ValNodePtr ddp_list, Int4 first_len, Int4 second_len)
1628 {
1629 BoolPtr first_status, second_status;
1630 DenseDiagPtr ddp;
1631 FloatHi val_1, val_2;
1632 Int4 overlap_1, overlap_2;
1633
1634 first_status = (BoolPtr) MemNew((size_t)first_len * sizeof(Boolean));
1635 second_status = (BoolPtr) MemNew((size_t)second_len * sizeof(Boolean));
1636
1637 while(ddp_list)
1638 {
1639 ddp = (DenseDiagPtr) ddp_list->data.ptrvalue;
1640 val_1 = get_overlap(first_status, ddp->starts[0], ddp->len, &overlap_1);
1641 val_2 = get_overlap(second_status, ddp->starts[1], ddp->len, &overlap_2);
1642
1643 if(val_1 >= 0.95 || val_2 >=0.95
1644 || (ddp->len - MAX(overlap_1, overlap_2) < MIN(50, ddp->len-10))
1645 || (val_1 > 0.7 && val_2 >0.7))
1646 {
1647 /* if(ddp->starts[0] == 35)
1648 printf("val_1 = %lf, val_2 = %lf, overlap_1 = %ld, overlap_2 = %ld, len = %ld\n", val_1, val_2, overlap_1, overlap_2, ddp->len);
1649 */
1650 ddp_list->choice = 0;
1651 }
1652 else
1653 {
1654 MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
1655 MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
1656 ddp_list->choice = 1;
1657 }
1658 ddp_list = ddp_list->next;
1659 }
1660
1661 MemFree(first_status);
1662 MemFree(second_status);
1663 }
1664
free_diag_in_list(ValNodePtr ddp_list,DenseDiagPtr ddp)1665 static Boolean free_diag_in_list(ValNodePtr ddp_list, DenseDiagPtr ddp)
1666 {
1667 DenseDiagPtr c_ddp;
1668
1669 while(ddp_list)
1670 {
1671 c_ddp = (DenseDiagPtr) ddp_list->data.ptrvalue;
1672 if(c_ddp == ddp)
1673 return (ddp_list->choice == 1);
1674 ddp_list = ddp_list->next;
1675 }
1676
1677 return FALSE;
1678 }
1679
filter_repeats(ValNodePtr PNTR ddp_list,Int4 first_len,Int4 second_len)1680 static void filter_repeats (ValNodePtr PNTR ddp_list, Int4 first_len, Int4 second_len)
1681 {
1682
1683 set_ddp_status(*ddp_list, first_len, second_len);
1684 delete_bad_node(ddp_list, 1);
1685 }
1686
get_process_list(ValNodePtr ddp_list)1687 static Int4Ptr get_process_list (ValNodePtr ddp_list)
1688 {
1689 Int4 num, i, j;
1690 ValNodePtr curr;
1691 Int4Ptr order, score_list;
1692 DenseDiagPtr ddp;
1693 Boolean change = TRUE;
1694 Int4 temp;
1695
1696 num = 0;
1697 for(curr = ddp_list; curr != NULL; curr = curr->next)
1698 ++num;
1699
1700 score_list = (Int4Ptr) MemNew((size_t)num * sizeof(ValNodePtr));
1701 order = (Int4Ptr) MemNew((size_t)num * sizeof(Int4));
1702
1703 i = 0;
1704 for(curr = ddp_list; curr != NULL; curr = curr->next)
1705 {
1706 ddp = (DenseDiagPtr) curr->data.ptrvalue;
1707 score_list[i] = get_score_from_DenseDiag (ddp);
1708 order[i] = i;
1709 ++i;
1710 }
1711
1712 for(i = 0; i<num-1 && change; ++i)
1713 {
1714 change = FALSE;
1715 for(j = 0; j<num-i-1; ++j)
1716 {
1717 if(score_list[j+1] > score_list[j])
1718 {
1719
1720 change =TRUE;
1721 temp = score_list[j];
1722 score_list[j] = score_list[j+1];
1723 score_list[j+1] = temp;
1724
1725 temp = order[j];
1726 order[j] = order[j+1];
1727 order[j+1] = temp;
1728 }
1729 }
1730 }
1731
1732 MemFree(score_list);
1733 return order;
1734 }
1735
1736
get_nth_node(ValNodePtr vnp,Int4 order)1737 static ValNodePtr get_nth_node(ValNodePtr vnp, Int4 order)
1738 {
1739 Int4 i;
1740
1741 i = 0;
1742 while(vnp)
1743 {
1744 if(i == order)
1745 return vnp;
1746 ++i;
1747 vnp = vnp->next;
1748 }
1749
1750 return NULL;
1751 }
1752
get_alignment_orientation(ValNodePtr PNTR pddp_list,BioseqPtr bsp,Int2 s_order)1753 static Uint1 get_alignment_orientation(ValNodePtr PNTR pddp_list, BioseqPtr bsp, Int2 s_order)
1754 {
1755 Int4 num;
1756 Int4Ptr score_order;
1757 ValNodePtr curr;
1758 DenseDiagPtr ddp;
1759 BoolPtr plus_used_list, minus_used_list;
1760 Int4 plus_count, minus_count;
1761 Int4 i;
1762 ValNodePtr list, ddp_list;
1763 Uint1 strand, t_strand;
1764
1765 ddp_list = *pddp_list;
1766 num = 0;
1767 for(curr = ddp_list; curr != NULL; curr = curr->next)
1768 ++num;
1769
1770 score_order = get_process_list (ddp_list);
1771 plus_used_list = (BoolPtr) MemNew((size_t)bsp->length * sizeof(Boolean));
1772 minus_used_list = (BoolPtr) MemNew((size_t)bsp->length * sizeof(Boolean));
1773
1774
1775
1776 for(i = 0; i<MIN(num, 10); ++i)
1777 {
1778 curr = get_nth_node(ddp_list, score_order[i]);
1779 if(curr != NULL)
1780 {
1781 ddp = (DenseDiagPtr) curr->data.ptrvalue;
1782 if(ddp->strands[0] != ddp->strands[1] &&
1783 (ddp->strands[0] == Seq_strand_minus ||
1784 ddp->strands[1] == Seq_strand_minus))
1785 MemSet(minus_used_list+ddp->starts[s_order], 1,
1786 (size_t)(ddp->len) * sizeof(Boolean));
1787 else
1788 MemSet(plus_used_list+ddp->starts[s_order], 1,
1789 (size_t)(ddp->len) * sizeof(Boolean));
1790 }
1791 }
1792
1793 plus_count = count_coverage (plus_used_list, bsp->length);
1794 minus_count = count_coverage (minus_used_list, bsp->length);
1795 MemFree(plus_used_list);
1796 MemFree(minus_used_list);
1797
1798 if(plus_count == 0 && minus_count == 0)
1799 {
1800 MemFree(score_order);
1801 return 0;
1802 }
1803 else if(plus_count > minus_count)
1804 strand = Seq_strand_plus;
1805 else
1806 strand = Seq_strand_minus;
1807
1808 /*only load the ddp_list with the right orientation*/
1809 list = NULL;
1810 for(i = 0; i<num; ++i)
1811 {
1812 curr = get_nth_node(ddp_list, score_order[i]);
1813 ddp = (DenseDiagPtr) curr->data.ptrvalue;
1814 if(ddp->strands[0] != ddp->strands[1] &&
1815 (ddp->strands[0] == Seq_strand_minus ||
1816 ddp->strands[1] == Seq_strand_minus))
1817 t_strand = Seq_strand_minus;
1818 else
1819 t_strand = Seq_strand_plus;
1820 if(t_strand == strand)
1821 ValNodeAddPointer(&list, 0, ddp);
1822 }
1823 ValNodeFree(*pddp_list);
1824 *pddp_list = list;
1825 MemFree(score_order);
1826 return strand;
1827
1828
1829 }
1830
1831
1832
1833 /*
1834 * check to see if ddp falls in the order of the existing list
1835 *
1836 */
load_ddp_in_order(ValNodePtr PNTR h_list,DenseDiagPtr ddp,Boolean reverse,Boolean add)1837 static Boolean load_ddp_in_order(ValNodePtr PNTR h_list, DenseDiagPtr ddp, Boolean reverse, Boolean add)
1838 {
1839 Int4 m_stop, s_stop;
1840 Int4 mt_stop, st_stop;
1841 ValNodePtr curr, prev;
1842 DenseDiagPtr t_ddp;
1843 Boolean load;
1844 ValNodePtr vnp;
1845 Int4 i;
1846
1847 m_stop = ddp->starts[0] + ddp->len -1;
1848 s_stop = ddp->starts[1] + ddp->len -1;
1849
1850 for(curr = *h_list; curr != NULL; curr = curr->next)
1851 {
1852 t_ddp = (DenseDiagPtr) curr->data.ptrvalue;
1853 if(ddp->len <= t_ddp->len)
1854 {
1855 for(i = 0; i<2; ++i)
1856 {
1857 if(ddp->starts[i] >= t_ddp->starts[i])
1858 {
1859 if(t_ddp->starts[i] + t_ddp->len >= ddp->starts[i] + ddp->len)
1860 return FALSE;
1861 }
1862 }
1863 }
1864 }
1865 prev = NULL;
1866 curr = *h_list;
1867 while(curr)
1868 {
1869 t_ddp = (DenseDiagPtr) curr->data.ptrvalue;
1870 mt_stop = t_ddp->starts[0] + t_ddp->len -1;
1871 st_stop = t_ddp->starts[1] + t_ddp->len -1;
1872 if(m_stop <= mt_stop)
1873 {
1874 if(reverse)
1875 load = (s_stop >= st_stop);
1876 else
1877 load = (s_stop <= st_stop);
1878 /*check with the previous order*/
1879 if(load)
1880 {
1881 if(prev != NULL)
1882 {
1883 t_ddp = (DenseDiagPtr) prev->data.ptrvalue;
1884 mt_stop = t_ddp->starts[0] + t_ddp->len -1;
1885 st_stop = t_ddp->starts[1] + t_ddp->len -1;
1886
1887 if(m_stop < mt_stop)
1888 load = FALSE;
1889 else
1890 {
1891 if(reverse)
1892 load = (s_stop <= st_stop);
1893 else
1894 load = (s_stop >= st_stop);
1895 }
1896 }
1897 }
1898 if(load)
1899 {
1900 if(add)
1901 {
1902 vnp = ValNodeNew(NULL);
1903 vnp->choice = 1;
1904 vnp->data.ptrvalue = ddp;
1905 if(prev == NULL)
1906 *h_list = vnp;
1907 else
1908 prev->next = vnp;
1909 vnp->next = curr;
1910 }
1911 return TRUE;
1912 }
1913 else
1914 return FALSE;
1915 }
1916 prev = curr;
1917 curr = curr->next;
1918 }
1919
1920 if(prev != NULL)
1921 {
1922 if(reverse)
1923 load = (s_stop <= st_stop);
1924 else
1925 load = (s_stop >= st_stop);
1926
1927 if(load)
1928 {
1929 vnp = ValNodeNew(NULL);
1930 vnp->choice = 1;
1931 vnp->data.ptrvalue = ddp;
1932 prev->next = vnp;
1933 return TRUE;
1934 }
1935 }
1936
1937 return FALSE;
1938 }
1939
1940
1941
load_ddp_scores(ValNodePtr ddp_list,Int4Ptr num)1942 static Int4Ptr load_ddp_scores(ValNodePtr ddp_list, Int4Ptr num)
1943 {
1944 DenseDiagPtr ddp;
1945 Int4 i;
1946 Int4Ptr score_list;
1947 ValNodePtr curr;
1948
1949 i = 0;
1950 for(curr = ddp_list; curr != NULL; curr = curr->next)
1951 ++i;
1952 if(i == 0)
1953 return NULL;
1954 *num = i;
1955 score_list = (Int4Ptr) MemNew((size_t)i * sizeof(Int4));
1956 i = 0;
1957 for(curr = ddp_list; curr != NULL; curr = curr->next)
1958 {
1959 ddp = (DenseDiagPtr) curr->data.ptrvalue;
1960 score_list[i] = get_score_from_list (ddp->scores);
1961 ++i;
1962 }
1963 return score_list;
1964 }
1965
1966
is_ddp_overlap(DenseDiagPtr ddp,BoolPtr first_status,BoolPtr second_status)1967 static Boolean is_ddp_overlap(DenseDiagPtr ddp, BoolPtr first_status, BoolPtr second_status)
1968 {
1969 FloatHi val_1, val_2;
1970 Int4 overlap_1, overlap_2;
1971
1972 val_1 = get_overlap(first_status, ddp->starts[0], ddp->len, &overlap_1);
1973 val_2 = get_overlap(second_status, ddp->starts[1], ddp->len, &overlap_2);
1974
1975 if(val_1 >=0.95 || val_2 >=0.95 || (val_1 > 0.7 && val_2 >0.7))
1976 return TRUE;
1977
1978 /*maximum non-overlap region is less than 50-bp*/
1979 if(ddp->len - MAX(overlap_1, overlap_2) < MIN(50, ddp->len-10))
1980 return TRUE;
1981 return FALSE;
1982 }
1983
1984
1985 /*
1986 * lns.c - Given a sequence of numbers, this program computes
1987 * the Longest Nondecreasing Subsequence.
1988 *
1989 * Feb. 20, 1997
1990 *
1991 * Program by Jinghui Zhang & Kun-Mao Chao
1992 * Toolkit version : H. Sicotte, 1/04/1999
1993 */
1994
1995 /* Input :
1996 call lns()
1997 S : a sequence of numbers;
1998 len_s : the length of the sequences;
1999 Output:
2000 LNS : the positions of the Longest Nondecreasing Subsequence;
2001 return value : the length of the LNS;
2002 Note: "-1" entries are allowed.
2003 */
2004
2005 /* Input :
2006 SS : a sequence of numbers; (without "-1" entries)
2007 len_ss : the length of the sequences;
2008 Output:
2009 LNS : the positions of the Longest Nondecreasing Subsequence;
2010 return value : the length of the LNS;
2011 */
pure_lns(Int4Ptr SS,Int4 len_ss,Int4Ptr LNS)2012 static Int4 pure_lns(Int4Ptr SS, Int4 len_ss, Int4Ptr LNS)
2013 {
2014 Int4 len_lns;
2015 Int4Ptr PREV=NULL,BEST=NULL,BEST_POS=NULL;
2016 Int4 i, low, high, mid;
2017 Int4 t;
2018
2019 len_lns = 1;
2020 if(len_ss) {
2021 PREV = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
2022 BEST = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
2023 BEST_POS = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
2024 } else
2025 return 0;
2026 BEST[0] = SS[0];
2027 BEST_POS[0] = 0;
2028 PREV[0] = -1;
2029 for (i=1; i<len_ss; ++i) {
2030 t = SS[i];
2031 low = 0;
2032 high = len_lns-1;
2033 while (low < high) {
2034 mid = (low + high) / 2;
2035 if (t >= BEST[mid]) low = mid + 1;
2036 else high = mid;
2037 }
2038 if (t < BEST[low]) {
2039 if (low == 0) PREV[i] = -1;
2040 else PREV[i] = BEST_POS[low-1];
2041 BEST[low] = t;
2042 BEST_POS[low] = i;
2043 } else {
2044 BEST[len_lns] = t;
2045 BEST_POS[len_lns] = i;
2046 PREV[i] = BEST_POS[len_lns-1];
2047 ++len_lns;
2048 }
2049 }
2050
2051 /* trace back */
2052
2053 t = BEST_POS[len_lns-1];
2054 for (i=len_lns-1; i>=0; --i) {
2055 LNS[i] = t;
2056 t = PREV[t];
2057 }
2058
2059 if(PREV)
2060 Free(PREV);
2061 if(BEST)
2062 Free(BEST);
2063 if(BEST_POS)
2064 Free(BEST_POS);
2065 return len_lns;
2066 }
lns(Int4Ptr S,Int4 len_s,Int4Ptr LNS)2067 static Int4 lns(Int4Ptr S, Int4 len_s, Int4Ptr LNS)
2068 {
2069 Int4Ptr SS=NULL;
2070 Int4 len_ss, len_lns;
2071 Int4 i, j;
2072 SS=(Int4Ptr)Malloc(len_s*sizeof(Int4));
2073 /* screen out those "-1" entries */
2074 len_ss = 0;
2075 for (i=0; i<len_s; ++i)
2076 if (S[i] >= 0)
2077 SS[len_ss++] = S[i];
2078
2079 len_lns = pure_lns(SS, len_ss, LNS);
2080
2081 /* add the offset */
2082 for (i=0, j=0; j<len_lns; ++i)
2083 if (S[i] == SS[LNS[j]]) LNS[j++] = i;
2084
2085 if(SS)
2086 Free(SS);
2087 return len_lns;
2088 }
2089
2090
2091
2092
sort_list_order(ValNodePtr PNTR ph_list,Uint1 strand)2093 static void sort_list_order(ValNodePtr PNTR ph_list, Uint1 strand)
2094 {
2095 ValNodePtr h_list;
2096 ValNodePtr curr;
2097 DenseDiagPtr ddp;
2098 Int4 val;
2099 ValNodePtr best_list = NULL;
2100
2101
2102 h_list = *ph_list;
2103 if(h_list == NULL || h_list->next == NULL)
2104 return;
2105 ddp = (DenseDiagPtr) h_list->data.ptrvalue;
2106 ValNodeAddPointer(&best_list, 0, ddp);
2107 for(curr = h_list->next; curr != NULL; curr = curr->next)
2108 {
2109 ddp = (DenseDiagPtr) curr->data.ptrvalue;
2110 load_ddp_in_order(&best_list, ddp, (Boolean) (strand==Seq_strand_minus), TRUE);
2111 }
2112
2113
2114 filter_seqid_order = 0;
2115 h_list = ValNodeSort(h_list, CompareChainProc);
2116 {
2117 Int4 len_S=0,len_lns=0,rlen_lns=0;
2118 Int4Ptr S=NULL,rS=NULL,LNS=NULL,rLNS=NULL,this_LNS;
2119 Int4 i,j;
2120 len_S = ValNodeLen(h_list);
2121 if(len_S>0) {
2122 S=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2123 rS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2124 LNS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2125 rLNS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2126 for(curr = h_list,i=0; curr != NULL; curr = curr->next)
2127 {
2128 ddp = (DenseDiagPtr) curr->data.ptrvalue;
2129 if(strand == Seq_strand_minus)
2130 S[i++] = ddp->starts[1];
2131 else
2132 S[i++] = ddp->starts[1] + ddp->len-1;
2133 }
2134 for(i=len_S-1,j=0;i>=0;i--,j++)
2135 rS[j]=S[i];
2136 len_lns = lns(S, len_S, LNS);
2137 rlen_lns = lns(rS, len_S, rLNS);
2138 }
2139 if (len_lns <= 0 || rlen_lns <=0) {
2140 val=-1;
2141 Message(MSG_ERROR, "Fail in get_lns");
2142 if(len_S) {
2143 Free(S);
2144 Free(rS);
2145 Free(LNS);
2146 Free(rLNS);
2147 }
2148 exit(2);
2149 } else if(rlen_lns > len_lns) {
2150 val = 1; /* usr rLNS */
2151 len_lns=rlen_lns;
2152 this_LNS = rLNS;
2153 } else {
2154 val =0; /* use LNS */
2155 this_LNS = LNS;
2156 }
2157 /* inconsistent orientation from LNS */
2158 if((strand == Seq_strand_minus && val == 0) ||
2159 (strand != Seq_strand_minus && val== 1))
2160 {
2161 ValNodeFree(h_list);
2162 *ph_list = best_list;
2163 if(len_S) {
2164 Free(S);
2165 Free(rS);
2166 Free(LNS);
2167 Free(rLNS);
2168 }
2169 return;
2170 }
2171
2172 ValNodeFree(best_list);
2173 for(i=0;i<len_lns;i++) {
2174 val = this_LNS[i];
2175 if(strand == Seq_strand_minus)
2176 val =len_S -1 - val;
2177 curr = get_nth_node(h_list, val);
2178 if(curr != NULL)
2179 curr->choice = 1;
2180 }
2181 if(len_S) {
2182 Free(S);
2183 Free(rS);
2184 Free(LNS);
2185 Free(rLNS);
2186 }
2187
2188 }
2189 #if 0
2190 {
2191 FILE *tmp_fp;
2192 tmp_fp = FileOpen("Ins.test", "w");
2193 for(curr = h_list; curr != NULL; curr = curr->next)
2194 {
2195 ddp = curr->data.ptrvalue;
2196 if(strand == Seq_strand_minus)
2197 fprintf(tmp_fp, "%ld\t", ddp->starts[1]);
2198 else
2199 fprintf(tmp_fp, "%ld\t", ddp->starts[1]+ddp->len-1);
2200 }
2201 fprintf(tmp_fp, "\n");
2202 FileClose(tmp_fp);
2203 system("get_lns < Ins.test >out");
2204
2205 tmp_fp = FileOpen("out", "r");
2206 fscanf(tmp_fp, "%ld\n", &val);
2207 if(val == -1)
2208 {
2209 Message(MSG_ERROR, "Fail in get_lns");
2210 exit(1);
2211 }
2212 /* inconsistent orientation from LNS */
2213 if((strand == Seq_strand_minus && val == 0) ||
2214 (strand != Seq_strand_minus && val== 1))
2215 {
2216 FileClose(tmp_fp);
2217 ValNodeFree(h_list);
2218 *ph_list = best_list;
2219 return;
2220 }
2221
2222
2223 ValNodeFree(best_list);
2224 num = ValNodeLen(h_list);
2225 while(fscanf(tmp_fp, "%ld\n", &val) != EOF)
2226 {
2227 if(strand == Seq_strand_minus)
2228 val = num -1 - val;
2229 curr = get_nth_node(h_list, val);
2230 if(curr != NULL)
2231 curr->choice = 1;
2232 }
2233 FileClose(tmp_fp);
2234 }
2235 #endif
2236 delete_bad_node(&h_list, 1);
2237
2238 if(h_list == NULL)
2239 {
2240 Message(MSG_ERROR, "No good node in sort_list_order");
2241 exit(1);
2242 }
2243
2244 *ph_list = h_list;
2245 }
2246
2247
2248
check_ddp_order(ValNodePtr ddp_list,Uint1 strand,Int4 first_len,Int4 second_len)2249 static ValNodePtr check_ddp_order(ValNodePtr ddp_list, Uint1 strand, Int4 first_len, Int4 second_len)
2250 {
2251 DenseDiagPtr ddp;
2252 ValNodePtr h_list = NULL, t_list;
2253 ValNodePtr curr;
2254 Int4 num, i;
2255 Boolean reverse;
2256 BoolPtr first_status, second_status;
2257 Int4 p_score;
2258 Int4Ptr score_list;
2259 Boolean is_end;
2260
2261
2262 if(ddp_list == NULL || ddp_list->next == NULL)
2263 return ddp_list;
2264 score_list = load_ddp_scores(ddp_list, &num);
2265 if(score_list == NULL)
2266 return ddp_list;
2267
2268 for(curr = ddp_list; curr != NULL; curr = curr->next)
2269 curr->choice = 0;
2270 p_score = score_list[0];
2271 first_status = (BoolPtr) MemNew((size_t)first_len * sizeof(Boolean));
2272 second_status = (BoolPtr) MemNew((size_t)second_len * sizeof(Boolean));
2273
2274 reverse = (strand == Seq_strand_minus);
2275 h_list = NULL;
2276 is_end = FALSE;
2277 while(!is_end)
2278 {
2279 t_list = NULL;
2280 for(i= 0, curr = ddp_list; curr != NULL; curr = curr->next)
2281 {
2282 if(p_score/(score_list[i]) <=2)
2283 {
2284 if(curr->choice == 0)
2285 {
2286 curr->choice = 1;
2287 ddp = (DenseDiagPtr) curr->data.ptrvalue;
2288 if(h_list == NULL ||
2289 load_ddp_in_order(&h_list, ddp, reverse, FALSE))
2290 {
2291 if(!is_ddp_overlap(ddp, first_status, second_status))
2292 ValNodeAddPointer(&t_list, 0, ddp);
2293 }
2294 }
2295 if(curr->next == NULL)
2296 {
2297 p_score = score_list[i];
2298 is_end = TRUE;
2299 break;
2300 }
2301 }
2302 else
2303 {
2304 p_score = score_list[i];
2305 break;
2306 }
2307 ++i;
2308 }
2309 if(t_list != NULL)
2310 {
2311 sort_list_order(&t_list, strand);
2312 if(h_list == NULL)
2313 {
2314 h_list = t_list;
2315 for(curr = t_list; curr != NULL; curr = curr->next)
2316 {
2317 ddp = (DenseDiagPtr) curr->data.ptrvalue;
2318 MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
2319 MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
2320 }
2321 }
2322 else
2323 {
2324 for(curr = t_list; curr != NULL; curr = curr->next)
2325 {
2326 ddp = (DenseDiagPtr) curr->data.ptrvalue;
2327 if(load_ddp_in_order(&h_list, ddp, reverse, TRUE))
2328 {
2329 MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
2330 MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
2331 }
2332 }
2333 ValNodeFree(t_list);
2334 }
2335 }
2336 if(is_end)
2337 break;
2338 }
2339
2340 if(h_list == NULL)
2341 {
2342 Message(MSG_ERROR, "No good node in check_ddp_order");
2343 exit(1);
2344 }
2345
2346 MemFree(score_list);
2347 MemFree(first_status);
2348 MemFree(second_status);
2349 ValNodeFree(ddp_list);
2350 return h_list;
2351
2352
2353 }
2354
2355
2356
2357 /*
2358 *
2359 * assume the align is the alignment computed from BLAST. Get rid of
2360 * all the smaller segments that are out of the main diagnol. the ValNodePtr
2361 * returns a list of dense-diags that are on the main diagnol. s_sip specifies
2362 * the sequence that will have non-redundant coverage
2363 */
FilterSeqAlign(SeqAlignPtr align,SeqIdPtr s_sip,Uint1Ptr p_strand)2364 NLM_EXTERN ValNodePtr FilterSeqAlign(SeqAlignPtr align, SeqIdPtr s_sip, Uint1Ptr p_strand)
2365 {
2366 Int2 order = 0, s_order;
2367 DenseDiagPtr ddp;
2368 SeqIdPtr sip;
2369 Uint1 strand;
2370 BioseqPtr bsp;
2371 ValNodePtr ddp_list;
2372 BioseqPtr first_bsp, second_bsp;
2373
2374 if(s_sip == NULL || align == NULL)
2375 return NULL;
2376 if(align->segtype != 1)
2377 return NULL;
2378 bsp = BioseqFind(s_sip);
2379 if(bsp == NULL)
2380 {
2381 ErrPostEx(SEV_WARNING, 0, 0, "Fail to find Bioseq for s_sip");
2382 return NULL;
2383 }
2384
2385 ddp_list = NULL;
2386 s_order = -1;
2387 for(ddp = (DenseDiagPtr) align->segs; ddp != NULL; ddp = ddp->next)
2388 {
2389 ValNodeAddPointer(&ddp_list, 0, ddp);
2390 if(s_order == -1)
2391 {
2392 for(sip = ddp->id, order = 0; sip != NULL; sip = sip->next, ++order)
2393 {
2394 if(SeqIdMatch(s_sip, sip))
2395 {
2396 s_order = order;
2397 }
2398 }
2399 }
2400 }
2401 if(s_order == -1)
2402 {
2403 printf("error in finding the right order\n");
2404 exit(1);
2405 }
2406
2407 /*ddp_list only contains the alignment with the right orientation*/
2408 /* it also has been sorted according to the order of the scores*/
2409 strand = get_alignment_orientation(&ddp_list, bsp, s_order);
2410
2411 if(strand == 0)
2412 return NULL;
2413
2414 ddp = (DenseDiagPtr) align->segs;
2415 first_bsp = BioseqFind(ddp->id);
2416 second_bsp = BioseqFind(ddp->id->next);
2417 /* filter_wrong_orientation (align, strand == Seq_strand_plus); */
2418 filter_repeats(&ddp_list, first_bsp->length, second_bsp->length);
2419
2420 if(ddp_list == NULL)
2421 {
2422 Message(MSG_ERROR, "Nothing leftover after filter_repeats");
2423 exit(1);
2424 }
2425 *p_strand = strand;
2426 return check_ddp_order(ddp_list, strand, first_bsp->length, second_bsp->length);
2427
2428 }
2429
get_dsp_align_len(DenseSegPtr dsp)2430 static Int4 get_dsp_align_len(DenseSegPtr dsp)
2431 {
2432 Int2 i;
2433 Int4 len = 0;
2434
2435 for(i = 0; i<dsp->numseg; ++i)
2436 {
2437 if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
2438 len += dsp->lens[i];
2439 }
2440
2441 return len;
2442 }
2443
2444 /*
2445 *
2446 * convert the Dense-seg to Dense-diag
2447 * take the end point of aligned segment and make an alignment of
2448 * Dense-diag
2449 *
2450 */
2451
make_seg_score(Int4 align_len,Int4 seg_len,Int4 total_score)2452 static ScorePtr make_seg_score(Int4 align_len, Int4 seg_len, Int4 total_score)
2453 {
2454 Int4 score;
2455 ScorePtr sp;
2456 ObjectIdPtr oip;
2457
2458 /* score = (seg_len * total_score)/align_len; */
2459 score = total_score;
2460 sp = ScoreNew();
2461 oip = ObjectIdNew();
2462 oip->str = StringSave("score");
2463 sp->id = oip;
2464 sp->choice = 1;
2465 sp->value.intvalue = score;
2466 return sp;
2467 }
2468
2469
SeqAlignConvertDspToDdp(SeqAlignPtr align,DenseDiagPtr PNTR h_ddp)2470 static DenseDiagPtr SeqAlignConvertDspToDdp(SeqAlignPtr align, DenseDiagPtr PNTR h_ddp)
2471 {
2472 DenseSegPtr dsp;
2473 DenseDiagPtr ddp, prev;
2474 Int2 i;
2475 Int4 score;
2476 Int4 align_len;
2477
2478 if(align == NULL || align->segtype != 2)
2479 return NULL;
2480 dsp = (DenseSegPtr) align->segs;
2481 if(dsp == NULL || dsp->dim != 2)
2482 return NULL;
2483 align_len = get_dsp_align_len(dsp);
2484
2485 if(*h_ddp != NULL)
2486 {
2487 prev = *h_ddp;
2488 while(prev->next != NULL)
2489 prev = prev->next;
2490 }
2491 else
2492 prev = NULL;
2493 score = get_score_from_list (align->score);
2494 for(i = 0; i<dsp->numseg; ++i)
2495 {
2496 if(dsp->starts[2*i] != -1 &&
2497 dsp->starts[2*i+1] != -1 && dsp->lens[i]>0)
2498 {
2499 ddp = DenseDiagNew();
2500 ddp->dim = 2;
2501 ddp->id = SeqIdDup(dsp->ids);
2502 ddp->id->next = SeqIdDup(dsp->ids->next);
2503 ddp->starts = (Int4Ptr) MemNew((size_t)2 * sizeof(Int4));
2504 ddp->starts[0] = dsp->starts[2*i];
2505 ddp->starts[1] = dsp->starts[2*i + 1];
2506 ddp->len = dsp->lens[i];
2507 ddp->strands = (Uint1Ptr) MemNew((size_t)2 * sizeof(Uint1));
2508 if(dsp->strands != NULL)
2509 {
2510 ddp->strands[0] = dsp->strands[2*i];
2511 ddp->strands[1] = dsp->strands[2*i+1];
2512 }
2513 ddp->scores = make_seg_score(align_len, dsp->lens[i], score);
2514 if(*h_ddp == NULL)
2515 *h_ddp= ddp;
2516 else
2517 prev->next = ddp;
2518 prev = ddp;
2519 }
2520 }
2521
2522 return (*h_ddp);
2523
2524 }
2525
2526 /*
2527 * convert a list of DenseSeg to DenseDiag
2528 *
2529 */
SeqAlignConvertDspToDdpList(SeqAlignPtr align)2530 NLM_EXTERN SeqAlignPtr SeqAlignConvertDspToDdpList(SeqAlignPtr align)
2531 {
2532 SeqAlignPtr n_align;
2533 DenseDiagPtr h_ddp = NULL;
2534
2535
2536 while(align)
2537 {
2538 SeqAlignConvertDspToDdp(align, &h_ddp);
2539 align = align->next;
2540 }
2541 if(h_ddp != NULL)
2542 {
2543 n_align = SeqAlignNew();
2544 n_align->segtype = 1;
2545 n_align->segs = h_ddp;
2546 return n_align;
2547 }
2548 else
2549 return NULL;
2550
2551 }
2552
clean_up_align_list(SeqAlignPtr PNTR h_align,ValNodePtr head)2553 static SeqAlignPtr clean_up_align_list(SeqAlignPtr PNTR h_align, ValNodePtr head)
2554 {
2555 DenseDiagPtr ddp, n_ddp, p_ddp, c_ddp;
2556 SeqAlignPtr prev, next;
2557 Boolean found;
2558 ValNodePtr curr;
2559 SeqAlignPtr align;
2560
2561
2562 prev = NULL;
2563 align = *h_align;
2564 while(align)
2565 {
2566 next = align->next;
2567
2568 ddp = (DenseDiagPtr) align->segs;
2569 p_ddp = NULL;
2570 while(ddp)
2571 {
2572 found = FALSE;
2573 n_ddp = ddp->next;
2574 for(curr = head; curr != NULL; curr = curr->next)
2575 {
2576 c_ddp = (DenseDiagPtr) curr->data.ptrvalue;
2577 if(ddp == c_ddp)
2578 {
2579 found = TRUE;
2580 break;
2581 }
2582 }
2583 if(!found)
2584 {
2585 if(p_ddp == NULL)
2586 align->segs = n_ddp;
2587 else
2588 p_ddp->next = n_ddp;
2589 ddp->next = NULL;
2590 DenseDiagFree(ddp);
2591 }
2592 else
2593 p_ddp = ddp;
2594 ddp = n_ddp;
2595 }
2596 if(align->segs == NULL)
2597 {
2598 if(prev == NULL)
2599 *h_align = next;
2600 else
2601 prev->next = next;
2602 align->next = NULL;
2603 SeqAlignFree(align);
2604 }
2605 else
2606 prev = align;
2607 align = next;
2608 }
2609
2610 return (*h_align);
2611 }
2612
2613
2614
2615
2616 /*
2617 * return a Seq-align that was made up from the DenseDiag in the
2618 * right order. The Seq-align is freshly created
2619 * the chain of the Seq-align should be for the same Seq-id
2620 *
2621 */
FilterDenseSegAlign(SeqAlignPtr align,SeqIdPtr sip)2622 NLM_EXTERN SeqAlignPtr FilterDenseSegAlign(SeqAlignPtr align, SeqIdPtr sip)
2623 {
2624 SeqAlignPtr h_align;
2625 Uint1 strand;
2626 ValNodePtr head = NULL;
2627
2628 h_align = SeqAlignConvertDspToDdpList(align);
2629 if(h_align == NULL)
2630 return NULL;
2631 head = FilterSeqAlign(h_align, sip, &strand);
2632
2633 if(head == NULL)
2634 {
2635 SeqAlignFree(h_align);
2636 return NULL;
2637 }
2638 clean_up_align_list(&h_align, head);
2639 ValNodeFree(head);
2640 return h_align;
2641 }
2642
2643
2644
2645 /*
2646 *
2647 * from the align, figure out the maximum and minimum range on the two sequences.
2648 * the two Seq-locs can then be sent for the seond path algorithm to compute
2649 * sequence alignment
2650 * align is normally computed from a heuristic method, such as BLAST
2651 * return TRUE for success and FLASE for failure
2652 */
FigureAlignRange(SeqAlignPtr align,SeqLocPtr m_loc,SeqLocPtr s_loc)2653 static Boolean FigureAlignRange(SeqAlignPtr align, SeqLocPtr m_loc, SeqLocPtr s_loc)
2654 {
2655 DenseDiagPtr ddp;
2656 ValNodePtr head, vnp;
2657 SeqIntPtr sint;
2658 Int4 s_max =-1 , s_min = -1;
2659 Int4 m_max = -1, m_min = -1;
2660 Uint1 strand;
2661 Int2 s_order, order;
2662 SeqIdPtr sip;
2663 Uint1 m_strand, s_strand;
2664
2665
2666
2667 m_strand = 0;
2668 s_strand = 0;
2669 head = FilterSeqAlign(align, SeqLocId(s_loc), &strand);
2670 if(head == NULL)
2671 return FALSE;
2672
2673 ddp = (DenseDiagPtr) head->data.ptrvalue;
2674 s_order = -1;
2675 order = 0;
2676 for(sip = ddp->id; sip != NULL; sip = sip->next, ++order)
2677 {
2678 if(SeqIdForSameBioseq(sip, SeqLocId(s_loc)))
2679 {
2680 s_order = order;
2681 break;
2682 }
2683 }
2684 if(s_order == -1)
2685 {
2686 ValNodeFree(head);
2687 return FALSE;
2688 }
2689 s_min = ddp->starts[s_order];
2690 if(strand == Seq_strand_minus)
2691 {
2692 m_max = ddp->starts[1-s_order] + ddp->len -1;
2693 }
2694 else
2695 m_min = ddp->starts[1-s_order];
2696 vnp = head;
2697 while(vnp->next != NULL)
2698 {
2699 ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2700 /* printf("%ld, %ld, len = %ld\n", ddp->starts[0], ddp->starts[1], ddp->len); */
2701 vnp = vnp->next;
2702 }
2703 ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2704
2705 s_max = ddp->starts[s_order] + ddp->len -1;
2706 if(strand == Seq_strand_minus)
2707 {
2708 m_min = ddp->starts[1-s_order];
2709 }
2710 else
2711 m_max = ddp->starts[1-s_order] + ddp->len -1;
2712
2713 if(m_min > m_max)
2714 {
2715
2716 Message(MSG_ERROR, "m_min = %ld, m_max = %ld", m_min, m_max);
2717 ValNodeFree(head);
2718 return FALSE;
2719 }
2720 ValNodeFree(head);
2721
2722 if(s_min == -1 || s_max == -1 || m_min == -1 || m_max == -1)
2723 return FALSE;
2724
2725 m_strand = Seq_strand_plus;
2726 s_strand = strand;
2727
2728
2729 sint = (SeqIntPtr) m_loc->data.ptrvalue;
2730 sint->from = m_min;
2731 sint->to = m_max;
2732 sint->strand = m_strand;
2733
2734 sint = (SeqIntPtr) s_loc->data.ptrvalue;
2735 sint->from = s_min;
2736 sint->to = s_max;
2737 sint->strand = s_strand;
2738
2739 return TRUE;
2740 }
2741
2742
2743
is_loc_overlap(SeqLocPtr loc_1,SeqLocPtr loc_2,Int4Ptr min_val,Int4Ptr max_val,Int4 gap_len,Int4 max_align_size,Int4Ptr p_gap_val)2744 NLM_EXTERN Boolean is_loc_overlap(SeqLocPtr loc_1, SeqLocPtr loc_2, Int4Ptr min_val, Int4Ptr max_val, Int4 gap_len, Int4 max_align_size, Int4Ptr p_gap_val)
2745 {
2746 Int4 start_1, stop_1, start_2, stop_2;
2747
2748 start_1 = SeqLocStart(loc_1);
2749 stop_1 = SeqLocStop(loc_1);
2750 start_2 = SeqLocStart(loc_2);
2751 stop_2 = SeqLocStop(loc_2);
2752
2753 /*strict overlap*/
2754 if(!(stop_1 < start_2 || start_1 > stop_2))
2755 {
2756 *min_val = MAX(start_1, start_2);
2757 *max_val = MIN(stop_1, stop_2);
2758 return TRUE;
2759 }
2760
2761 if(gap_len == 0)
2762 return FALSE;
2763 if(stop_2 > stop_1)
2764 {
2765 /* if(start_2 - stop_1 <= gap_len) */
2766 if(start_2 - stop_1 <= max_align_size)
2767 {
2768 *min_val = stop_1;
2769 *max_val = start_2;
2770 if(p_gap_val != NULL)
2771 *p_gap_val = start_2 - stop_1;
2772 return TRUE;
2773 }
2774 else
2775 return FALSE;
2776 }
2777 else
2778 {
2779 /* if(start_1 - stop_2 <= gap_len) */
2780 if(start_1 - stop_2 <= max_align_size)
2781 {
2782 *min_val = stop_2;
2783 *max_val = start_1;
2784 if(p_gap_val != NULL)
2785 *p_gap_val = start_1 - stop_2;
2786 return TRUE;
2787 }
2788 else
2789 return FALSE;
2790 }
2791
2792 }
2793
2794
2795 /*
2796 *
2797 * ppos is the position of the known sequence, trying to find the matching sequence
2798 * position
2799 */
find_matching_position(DenseSegPtr dsp,Int4 pos,Int2 pos_order)2800 NLM_EXTERN Int4 find_matching_position(DenseSegPtr dsp, Int4 pos, Int2 pos_order)
2801 {
2802 Int2 i;
2803 Int4 start_1, start_2;
2804 Int4 offset;
2805
2806
2807 for(i = 0; i<dsp->numseg; ++i)
2808 {
2809 start_1 = dsp->starts[2*i + pos_order];
2810 start_2 = dsp->starts[2*i + 1 - pos_order];
2811
2812 if(start_1 != -1 && start_2 != -1)
2813 {
2814 if(pos >= start_1 && pos <= start_1 + dsp->lens[i] -1)
2815 {
2816 if(dsp->strands[pos_order] == Seq_strand_minus)
2817 {
2818 offset = start_1 + dsp->lens[i] - 1 - pos;
2819 }
2820 else
2821 offset = pos - start_1;
2822
2823 if(dsp->strands[1 - pos_order] == Seq_strand_minus)
2824 return (start_2 + dsp->lens[i] -1 - offset);
2825 else
2826 return (start_2 + offset);
2827 }
2828 }
2829 }
2830
2831
2832 return -1;
2833 }
2834
SeqAnnotWrite(SeqAlignPtr align)2835 static void SeqAnnotWrite(SeqAlignPtr align)
2836 {
2837 SeqAnnotPtr annot;
2838 AsnIoPtr aip;
2839
2840 annot = SeqAnnotNew();
2841 annot->type = 2;
2842 annot->data = align;
2843
2844 aip = AsnIoOpen("temp2.sat", "w");
2845 SeqAnnotAsnWrite(annot, aip, NULL);
2846 AsnIoClose(aip);
2847
2848 annot->data = NULL;
2849 SeqAnnotFree(annot);
2850 }
2851
select_overlap_loc(SeqLocPtr loc_1_1,SeqLocPtr loc_2_1,SeqLocPtr loc_1_2,SeqLocPtr loc_2_2,Int2Ptr p_order)2852 NLM_EXTERN Boolean select_overlap_loc(SeqLocPtr loc_1_1, SeqLocPtr loc_2_1, SeqLocPtr loc_1_2, SeqLocPtr loc_2_2, Int2Ptr p_order)
2853 {
2854 Int4 start_1, stop_1, start_2, stop_2;
2855 Int4 overlap_1, overlap_2;
2856
2857 overlap_1 = 0;
2858 overlap_2 = 0;
2859
2860 start_1 = SeqLocStart(loc_1_1);
2861 stop_1 = SeqLocStop(loc_1_1);
2862
2863 start_2 = SeqLocStart(loc_2_1);
2864 stop_2 = SeqLocStop(loc_2_1);
2865
2866 if(!(start_2 > stop_1 || stop_2 < start_1))
2867 {
2868 if(stop_2 >= stop_1)
2869 overlap_1 = MAX(0, stop_1 - start_2 + 1);
2870 else
2871 overlap_1 = MAX(0, stop_2 - start_1 + 1);
2872 }
2873
2874 start_1 = SeqLocStart(loc_1_2);
2875 stop_1 = SeqLocStop(loc_1_2);
2876
2877 start_2 = SeqLocStart(loc_2_2);
2878 stop_2 = SeqLocStop(loc_2_2);
2879
2880 if(!(start_2 > stop_1 || stop_2 < start_1))
2881 {
2882 if(stop_2 >= stop_1)
2883 overlap_2 = MAX(0, stop_1 - start_2 + 1);
2884 else
2885 overlap_2 = MAX(0, stop_2 - start_1 + 1);
2886 }
2887
2888 if(overlap_1 == 0 && overlap_2 == 0)
2889 return FALSE;
2890 else
2891 {
2892 if(overlap_1 >= overlap_2)
2893 *p_order = 0;
2894 else
2895 *p_order = 1;
2896 return TRUE;
2897 }
2898 }
2899
2900
2901
2902 /*###############################################################
2903 *
2904 * functions for merge the two alignments
2905 *
2906 ################################################################*/
2907
2908
get_align_len_in_overlap(SeqAlignPtr align,Int4 from,Int4 to,Int2 order)2909 NLM_EXTERN Int4 get_align_len_in_overlap (SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
2910 {
2911 DenseSegPtr dsp;
2912 Int2 i;
2913 Int4 start, stop;
2914 Int4 align_len = 0;
2915
2916 dsp = (DenseSegPtr) align->segs;
2917 for(i = 0; i<dsp->numseg; ++i)
2918 {
2919 if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
2920 {
2921 start = dsp->starts[2*i + order];
2922 stop = dsp->starts[2*i + order] + dsp->lens[i] -1;
2923 if(!(start > to || stop < from))
2924 {
2925 start = MAX(start, from);
2926 stop = MIN(stop, to);
2927
2928 if(stop >= start)
2929 align_len += (stop - start + 1);
2930 }
2931 }
2932 }
2933
2934 return align_len;
2935 }
2936
2937
2938
open_annot(CharPtr f_name)2939 static SeqAnnotPtr open_annot(CharPtr f_name)
2940 {
2941 SeqAnnotPtr annot;
2942 AsnIoPtr aip;
2943
2944 aip = AsnIoOpen(f_name, "r");
2945 annot = SeqAnnotAsnRead(aip, NULL);
2946 AsnIoClose(aip);
2947 return annot;
2948 }
2949
2950 typedef struct segdata {
2951 Int2 seg_order;
2952 Int4 overlap_len;
2953 }SegOrder, PNTR SegOrderPtr;
2954
2955 /*
2956 * find the longest diagnol in the overlapping region
2957 *
2958 */
SegOrderCompProc(VoidPtr ptr1,VoidPtr ptr2)2959 static int LIBCALLBACK SegOrderCompProc (VoidPtr ptr1, VoidPtr ptr2)
2960 {
2961 ValNodePtr vnp1, vnp2;
2962 SegOrderPtr sop1, sop2;
2963
2964 if (ptr1 != NULL && ptr2 != NULL) {
2965 vnp1 = *((ValNodePtr PNTR) ptr1);
2966 vnp2 = *((ValNodePtr PNTR) ptr2);
2967 if (vnp1 != NULL && vnp2 != NULL) {
2968 sop1 = (SegOrderPtr) vnp1->data.ptrvalue;
2969 sop2 = (SegOrderPtr) vnp2->data.ptrvalue;
2970 if(sop1 != NULL && sop2 != NULL)
2971 {
2972 if(sop1->overlap_len > sop2->overlap_len)
2973 return -1;
2974 else if(sop1->overlap_len < sop2->overlap_len)
2975 return 1;
2976 else
2977 return 0;
2978 }
2979 }
2980 }
2981
2982 return 0;
2983 }
2984
find_overlap_diagnol(SeqAlignPtr align,Int4 from,Int4 to,Int2 order)2985 static ValNodePtr find_overlap_diagnol(SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
2986 {
2987 Int2 i;
2988 DenseSegPtr dsp;
2989 Int4 start, stop;
2990 SegOrderPtr sop;
2991 ValNodePtr seg_list;
2992
2993 seg_list = NULL;
2994 dsp = (DenseSegPtr) align->segs;
2995 for(i = 0; i<dsp->numseg; ++i)
2996 {
2997 if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
2998 {
2999 start = dsp->starts[2*i+order];
3000 stop = start + dsp->lens[i] -1;
3001 if(!(start > to || stop < from))
3002 {
3003
3004 start = MAX(start, from);
3005 stop = MIN(stop, to);
3006 sop = (SegOrderPtr) MemNew(sizeof(SegOrder));
3007 sop->seg_order = i;
3008 sop->overlap_len = stop - start + 1;
3009 ValNodeAddPointer(&seg_list, 0, sop);
3010 }
3011 }
3012 }
3013
3014 if(seg_list != NULL)
3015 return ValNodeSort(seg_list, SegOrderCompProc);
3016 else
3017 return NULL;
3018 }
3019
is_dsp_same(DenseSegPtr dsp_1,DenseSegPtr dsp_2)3020 static Boolean is_dsp_same(DenseSegPtr dsp_1, DenseSegPtr dsp_2)
3021 {
3022 if(dsp_1 == NULL || dsp_2 == NULL)
3023 return FALSE;
3024 if(dsp_1->ids == NULL || dsp_2->ids == NULL)
3025 return FALSE;
3026 if(!SeqIdMatch(dsp_1->ids, dsp_2->ids))
3027 return FALSE;
3028
3029 if(dsp_1->ids->next == NULL || dsp_2->ids->next == NULL)
3030 return FALSE;
3031 if(!SeqIdMatch(dsp_1->ids->next, dsp_2->ids->next))
3032 return FALSE;
3033
3034 if(dsp_1->strands == NULL || dsp_2->strands == NULL)
3035 return FALSE;
3036
3037 if(!is_same_orientation(dsp_1->strands[0], dsp_2->strands[0]))
3038 return FALSE;
3039
3040 if(!is_same_orientation(dsp_1->strands[1], dsp_2->strands[1]))
3041 return FALSE;
3042
3043 return TRUE;
3044 }
3045
get_score_value(ScorePtr sp)3046 static Int4 get_score_value(ScorePtr sp)
3047 {
3048 ObjectIdPtr oip;
3049
3050 while(sp)
3051 {
3052 if(sp->id)
3053 {
3054 oip = sp->id;
3055 if(oip && oip->str && StringCmp(oip->str, "score") == 0)
3056 {
3057 if(sp->choice == 1)
3058 return sp->value.intvalue;
3059 }
3060 }
3061 sp = sp->next;
3062 }
3063
3064 return -1;
3065 }
3066
load_big_score(SeqAlignPtr align_1,SeqAlignPtr align_2)3067 static void load_big_score (SeqAlignPtr align_1, SeqAlignPtr align_2)
3068 {
3069 Int4 val_1, val_2;
3070
3071 val_1 = get_score_value(align_1->score);
3072 val_2 = get_score_value(align_2->score);
3073
3074 if(val_1 < val_2)
3075 {
3076 ScoreSetFree(align_1->score);
3077 align_1->score = align_2->score;
3078 align_2->score = NULL;
3079 }
3080 }
3081
3082 /*
3083 merge two seqaligns: align_1 contains the merged seqalign, align_2 is clobbered.
3084 reorders the seqalign on input so that align_1 is "left" of align_2
3085 in display-style coordinates.
3086
3087 */
3088
merge_two_align(SeqAlignPtr align_1,SeqAlignPtr align_2,Int4 from,Int4 to,Int2 order)3089 NLM_EXTERN Boolean merge_two_align(SeqAlignPtr align_1, SeqAlignPtr align_2, Int4 from, Int4 to, Int2 order)
3090 {
3091 ValNodePtr seg_list_1, seg_list_2;
3092 SegOrderPtr sop_1, sop_2;
3093 ValNodePtr curr_1, curr_2;
3094 Boolean found;
3095 Int2 seg_order_1, seg_order_2;
3096 DenseSegPtr dsp_1, dsp_2;
3097 Int4 start_1, stop_1, start_2, stop_2;
3098 Int4 overlap_1, overlap_2;
3099 Int4 extend_len;
3100 Int2 n_seg_num, i, j;
3101 Int4Ptr starts, lens;
3102 Uint1Ptr strands;
3103 Int4 m_start_1, s_start_1, len_1;
3104 Int4 m_start_2, s_start_2, len_2;
3105 Int4 offset_1;
3106 if(!align_1 || !align_2 || align_1->segtype != 2 || align_2->segtype!=2)
3107 return FALSE;
3108 dsp_1 = (DenseSegPtr) align_1->segs;
3109 dsp_2 = (DenseSegPtr) align_2->segs;
3110
3111 if(!is_dsp_same(dsp_1, dsp_2))
3112 return FALSE;
3113 seg_list_1 = find_overlap_diagnol(align_1, from, to, order);
3114 if(seg_list_1 == NULL)
3115 return FALSE;
3116 seg_list_2 = find_overlap_diagnol(align_2, from, to, order);
3117 if(seg_list_2 == NULL)
3118 {
3119 ValNodeFreeData(seg_list_1);
3120 return FALSE;
3121 }
3122
3123 if(SeqAlignStart(align_1,0)>SeqAlignStart(align_2,0)) {
3124 VoidPtr segs;
3125 ValNodePtr vnp;
3126 segs = align_1->segs;
3127 align_1->segs = align_2->segs;
3128 align_2->segs = segs;
3129 dsp_1 = (DenseSegPtr) align_1->segs;
3130 dsp_2 = (DenseSegPtr) align_2->segs;
3131 vnp = seg_list_1;
3132 seg_list_1 = seg_list_2;
3133 seg_list_2 = vnp;
3134 }
3135
3136 found = FALSE;
3137 for(curr_1 = seg_list_1; curr_1 != NULL && !found; curr_1 = curr_1->next)
3138 {
3139 sop_1 = (SegOrderPtr) curr_1->data.ptrvalue;
3140 seg_order_1 = sop_1->seg_order;
3141 m_start_1 = dsp_1->starts[2*seg_order_1 + order];
3142 s_start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
3143 len_1 = dsp_1->lens[seg_order_1];
3144 for(curr_2 = seg_list_2; curr_2 != NULL && !found; curr_2 = curr_2->next)
3145 {
3146 sop_2 = (SegOrderPtr) curr_2->data.ptrvalue;
3147 seg_order_2 = sop_2->seg_order;
3148
3149 m_start_2 = dsp_2->starts[2*seg_order_2 + order];
3150 s_start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
3151 len_2 = dsp_2->lens[seg_order_2];
3152
3153
3154 found = TRUE;
3155 start_1 = dsp_1->starts[2*seg_order_1 + order];
3156 stop_1 = start_1 + dsp_1->lens[seg_order_1] -1;
3157
3158 start_2 = dsp_2->starts[2*seg_order_2 + order];
3159 stop_2 = start_2 + dsp_2->lens[seg_order_2] -1;
3160
3161 /*check if the overlapping region on the sequence overlaps??*/
3162 if(start_1 > stop_2 || stop_1 < start_2)
3163 found = FALSE;
3164
3165 offset_1 = 0;
3166 /*trim the tail of the first alignment and the head of the second*/
3167 if(found)
3168 {
3169 if(dsp_1->strands[order] == Seq_strand_minus)
3170 {
3171 /*trim the head of the second alignment*/
3172 /* <====-------===================== first
3173 <================================ second
3174 trim ^^^^^^^^^^^^XXXXXXXXXXXXXXXXXXXXX overlap
3175 */
3176 if(stop_2 > stop_1)
3177 {
3178 offset_1 = stop_2 - stop_1;
3179 dsp_2->lens[seg_order_2] -= offset_1;
3180 if(dsp_2->strands[1-order] != Seq_strand_minus)
3181 dsp_2->starts[2*seg_order_2 + 1 - order] += offset_1;
3182
3183 stop_2 = stop_1;
3184 }
3185 /*trim the tail of the first alignment*/
3186 /*
3187 ^^^^^^^^^^ trim
3188 <=================================== first
3189 <=======================------========= second
3190 XXXXXXXXXXXXXXXXXXXXXXX overlap
3191 */
3192 if(start_1 < start_2)
3193 {
3194 offset_1 = start_2 - start_1;
3195 dsp_1->starts[2*seg_order_1 + order] += offset_1;
3196 dsp_1->lens[seg_order_1] -= offset_1;
3197 if(dsp_1->strands[1-order] == Seq_strand_minus)
3198 dsp_1->starts[2*seg_order_1 + 1 - order] += offset_1;
3199
3200 start_1 = start_2;
3201 }
3202 overlap_1 = start_1 - stop_2;
3203 }
3204 else
3205 {
3206 /*trim the head of the second alignment*/
3207 /*
3208 ====----====================> first
3209 ================================ second
3210 ^^^^^^^^XXXXXXXXXXXXXXXXXXXX
3211 trim overlap
3212 */
3213 if(start_2 < start_1)
3214 {
3215 offset_1 = start_1 - start_2;
3216 dsp_2->starts[2*seg_order_2 + order] += offset_1;
3217 if(dsp_2->strands[1-order] != Seq_strand_minus)
3218
3219 dsp_2->starts[2*seg_order_2 + 1 - order] += offset_1;
3220 dsp_2->lens[seg_order_2] -= offset_1;
3221 start_2 = start_1;
3222 }
3223 /*trim the tail of the first alignment*/
3224 /*
3225 overlap trim
3226 XXXXXXXXXXXXXXXXXXXXXXX^^^^^^^^^^
3227 =================================> first
3228 ========================------============= second
3229 */
3230
3231 if(stop_1 > stop_2)
3232 {
3233 offset_1 = stop_1 - stop_2;
3234 dsp_1->lens[seg_order_1] -= offset_1;
3235 if(dsp_1->strands[1-order] == Seq_strand_minus)
3236
3237 dsp_1->starts[2*seg_order_1 + 1 - order] += offset_1;
3238 stop_1 = stop_2;
3239 }
3240 overlap_1 = stop_1 - start_2;
3241 }
3242 if(found && overlap_1 <=0)
3243 found = FALSE;
3244 }
3245
3246
3247 /*check the second sequence*/
3248 if(found)
3249 {
3250 start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
3251 stop_1 = start_1 + dsp_1->lens[seg_order_1] -1;
3252
3253 start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
3254 stop_2 = start_2 + dsp_2->lens[seg_order_2] -1;
3255
3256 /*check if the overlapping region on the sequence overlaps??*/
3257 if(start_1 > stop_2 || stop_1 < start_2)
3258 found = FALSE;
3259 }
3260 if(found)
3261 {
3262 overlap_2 = MIN(stop_2,stop_1) - MAX(start_1,start_2);
3263 if(found && overlap_2 <=0)
3264 found = FALSE;
3265 }
3266
3267 if(found && overlap_2 != overlap_1)
3268 found = FALSE;
3269
3270 if(found)
3271 {
3272 if(dsp_1->strands[1-order] == Seq_strand_minus)
3273 {
3274 stop_1 = dsp_1->starts[2*seg_order_1 + 1 - order] +
3275 dsp_1->lens[seg_order_1] -1;
3276 stop_2 = dsp_2->starts[2*seg_order_2 + 1 - order] +
3277 dsp_2->lens[seg_order_2] -1;
3278 extend_len = stop_1 - stop_2;
3279 }
3280 else
3281 {
3282 start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
3283 start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
3284 extend_len = start_2 - start_1;
3285 }
3286 if(dsp_2->strands[order] != Seq_strand_minus)
3287 {
3288 dsp_2->starts[2*seg_order_2 + order] -= extend_len;
3289 }
3290 if(dsp_2->strands[1-order] != Seq_strand_minus)
3291 {
3292 dsp_2->starts[2*seg_order_2 + 1 - order] -= extend_len;
3293 }
3294
3295 dsp_2->lens[seg_order_2] += extend_len;
3296
3297 n_seg_num = seg_order_1 + (dsp_2->numseg - seg_order_2);
3298 if(n_seg_num < 1)
3299 found = FALSE;
3300 }
3301 if(found)
3302 {
3303
3304 starts = (Int4Ptr) MemNew((size_t)(2*n_seg_num) * sizeof(Int4));
3305 strands = (Uint1Ptr) MemNew((size_t)(2*n_seg_num) * sizeof(Uint1));
3306 lens = (Int4Ptr) MemNew((size_t) n_seg_num * sizeof(Int4));
3307
3308 j = 0;
3309 if(seg_order_1 > 0)
3310 {
3311 for(i = 0; i<seg_order_1; ++i)
3312 {
3313 starts[2*j] = dsp_1->starts[2*i];
3314 starts[2*j+1] = dsp_1->starts[2*i+1];
3315 strands[2*j] = dsp_1->strands[2*i];
3316 strands[2*j+1] = dsp_1->strands[2*i+1];
3317
3318 lens[j] = dsp_1->lens[i];
3319 ++j;
3320 }
3321 }
3322
3323 for(i = seg_order_2; i<dsp_2->numseg; ++i)
3324 {
3325 starts[2*j] = dsp_2->starts[2*i];
3326 starts[2*j+1] = dsp_2->starts[2*i+1];
3327 strands[2*j] = dsp_2->strands[2*i];
3328 strands[2*j+1] = dsp_2->strands[2*i+1];
3329 lens[j] = dsp_2->lens[i];
3330 ++j;
3331 }
3332
3333 dsp_1->numseg = n_seg_num;
3334 MemFree(dsp_1->starts);
3335 MemFree(dsp_1->strands);
3336 MemFree(dsp_1->lens);
3337
3338 dsp_1->starts = starts;
3339 dsp_1->strands = strands;
3340 dsp_1->lens = lens;
3341 load_big_score (align_1, align_2);
3342 break;
3343 }
3344 else
3345 {
3346 dsp_1->starts[2*seg_order_1 + order] = m_start_1;
3347 dsp_1->starts[2*seg_order_1 + 1 - order] = s_start_1;
3348 dsp_1->lens[seg_order_1] = len_1;
3349
3350 dsp_2->starts[2*seg_order_2 + order] = m_start_2;
3351 dsp_2->starts[2*seg_order_2 + 1 - order] = s_start_2;
3352 dsp_2->lens[seg_order_2] = len_2;
3353 }
3354 }
3355 }
3356
3357 ValNodeFreeData(seg_list_1);
3358 ValNodeFreeData(seg_list_2);
3359 return found;
3360
3361 }
3362
extract_align_from_list(SeqAlignPtr PNTR list,SeqAlignPtr align)3363 static SeqAlignPtr extract_align_from_list(SeqAlignPtr PNTR list, SeqAlignPtr align)
3364 {
3365 SeqAlignPtr curr, prev;
3366
3367 prev = NULL;
3368 curr = *list;
3369 while(curr)
3370 {
3371 if(curr == align)
3372 {
3373 if(prev == NULL)
3374 *list = curr->next;
3375 else
3376 prev->next = curr->next;
3377 curr->next = NULL;
3378 return curr;
3379 }
3380 prev = curr;
3381 curr = curr->next;
3382 }
3383 return NULL;
3384 }
3385
SeqAlignReverse(SeqAlignPtr align,Int2 order)3386 NLM_EXTERN void SeqAlignReverse(SeqAlignPtr align, Int2 order)
3387 {
3388 DenseSegPtr dsp;
3389 Int4Ptr starts, lens;
3390 Int2 numseg, i, j;
3391
3392 if(align->segtype != 2)
3393 return;
3394 dsp = (DenseSegPtr) align->segs;
3395 if(dsp == NULL || dsp->strands == NULL || dsp->strands[order] != Seq_strand_minus)
3396 return;
3397
3398 numseg = dsp->numseg;
3399 starts = (Int4Ptr) MemNew((size_t)2*numseg * sizeof(Int4));
3400 lens = (Int4Ptr) MemNew((size_t)numseg * sizeof(Int4));
3401
3402 for(i = numseg-1, j = 0; i>=0; --i, ++j)
3403 {
3404 starts[2*j] = dsp->starts[2*i];
3405 starts[2*j+1] = dsp->starts[2*i+1];
3406 lens[j] = dsp->lens[i];
3407 dsp->strands[2*i] = 3 - dsp->strands[2*i];
3408 dsp->strands[2*i + 1] = 3 - dsp->strands[2*i + 1];
3409 }
3410
3411 MemFree(dsp->starts);
3412 dsp->starts = starts;
3413 MemFree(dsp->lens);
3414 dsp->lens = lens;
3415 }
3416
3417 typedef struct align_overlap {
3418 SeqAlignPtr align;
3419 Int4 overlap_len;
3420 Int4 align_len;
3421 }AlignOverlap, PNTR AlignOverlapPtr;
3422
AlignOverlapCompProc(VoidPtr ptr1,VoidPtr ptr2)3423 static int LIBCALLBACK AlignOverlapCompProc (VoidPtr ptr1, VoidPtr ptr2)
3424 {
3425 ValNodePtr vnp1, vnp2;
3426 AlignOverlapPtr sop1, sop2;
3427
3428 if (ptr1 != NULL && ptr2 != NULL) {
3429 vnp1 = *((ValNodePtr PNTR) ptr1);
3430 vnp2 = *((ValNodePtr PNTR) ptr2);
3431 if (vnp1 != NULL && vnp2 != NULL) {
3432 sop1 = (AlignOverlapPtr) vnp1->data.ptrvalue;
3433 sop2 = (AlignOverlapPtr) vnp2->data.ptrvalue;
3434 if(sop1 != NULL && sop2 != NULL)
3435 {
3436 if(sop1->overlap_len > sop2->overlap_len)
3437 return -1;
3438 else if(sop1->overlap_len < sop2->overlap_len)
3439 return 1;
3440 else if(sop1->align_len > sop2->align_len)
3441 return -1;
3442 else if(sop1->align_len < sop2->align_len)
3443 return 1;
3444 else
3445 return 0;
3446 }
3447 }
3448 }
3449
3450 return 0;
3451 }
3452
3453
3454
build_overlap_list(SeqAlignPtr align,Int4 from,Int4 to,Int2 order)3455 static ValNodePtr build_overlap_list(SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
3456 {
3457 Int4 t_len;
3458 ValNodePtr list = NULL;
3459 AlignOverlapPtr aop;
3460
3461 /*sort the alignment by score*/
3462 while(align)
3463 {
3464 /*check if the alignment is significant*/
3465 t_len = get_align_len_in_overlap (align, from, to, order);
3466 if(t_len > MIN_DIAG_LEN)
3467 {
3468 aop = (AlignOverlapPtr) MemNew(sizeof(AlignOverlap));
3469 aop->overlap_len = t_len;
3470 aop->align = align;
3471 aop->align_len = SeqAlignLength(align);
3472 ValNodeAddPointer(&list, 0, aop);
3473 }
3474 align = align->next;
3475 }
3476
3477 if(list != NULL)
3478 return ValNodeSort(list, AlignOverlapCompProc);
3479 else
3480 return NULL;
3481
3482 }
3483
3484
3485
MergeTwoAlignList(SeqAlignPtr h_align_1,SeqAlignPtr PNTR p_align_2,Int4 from,Int4 to,Int2 order)3486 NLM_EXTERN Boolean MergeTwoAlignList (SeqAlignPtr h_align_1, SeqAlignPtr PNTR p_align_2, Int4 from, Int4 to, Int2 order)
3487 {
3488
3489 ValNodePtr list_1, list_2;
3490 AlignOverlapPtr aop_1, aop_2;
3491 ValNodePtr curr_1, curr_2;
3492 SeqAlignPtr align_1, align_2;
3493 Boolean found;
3494 SeqAlignPtr h_align_2;
3495 Boolean retval = FALSE;
3496
3497 h_align_2 = *p_align_2;
3498 if(p_align_2 == NULL || h_align_1 == NULL || (h_align_2 = *p_align_2)== NULL)
3499 return FALSE;
3500
3501 list_1 = build_overlap_list(h_align_1, from, to, order);
3502 if(list_1 == NULL)
3503 return FALSE;
3504 list_2 = build_overlap_list(h_align_2, from, to, order);
3505 if(list_2 == NULL)
3506 {
3507 ValNodeFreeData(list_1);
3508 return FALSE;
3509 }
3510
3511 for(curr_1 = list_1; curr_1 != NULL; curr_1 = curr_1->next)
3512 {
3513 aop_1 = (AlignOverlapPtr) curr_1->data.ptrvalue;
3514 align_1 = aop_1->align;
3515 SeqAlignReverse(align_1, order);
3516
3517 found = FALSE;
3518 for(curr_2 = list_2; !found && curr_2 != NULL; curr_2 = curr_2->next)
3519 {
3520 aop_2 = (AlignOverlapPtr) curr_2->data.ptrvalue;
3521 if(aop_2->align != NULL)
3522 {
3523 align_2 = aop_2->align;
3524 SeqAlignReverse(align_2, order);
3525 if(merge_two_align(align_1, align_2, from, to, 0))
3526 {
3527 retval = TRUE;
3528 aop_2->align = NULL;
3529 if(extract_align_from_list(p_align_2, align_2) != NULL)
3530 SeqAlignFree(align_2);
3531 found = TRUE;
3532 }
3533 }
3534 }
3535 }
3536
3537 ValNodeFreeData(list_1);
3538 ValNodeFreeData(list_2);
3539
3540 return retval;
3541 }
3542
3543
3544
3545 /*
3546 *
3547 * functions related to reduce the redundant level of the view
3548 */
3549
3550 typedef struct align_info {
3551 SeqAlignPtr align;
3552 Int4 align_len;
3553 }AlignInfo, PNTR AlignInfoPtr;
3554
CompareAlignInfoProc(VoidPtr ptr1,VoidPtr ptr2)3555 static int LIBCALLBACK CompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2)
3556 {
3557 AlignInfoPtr aip_1, aip_2;
3558
3559 if (ptr1 != NULL && ptr2 != NULL) {
3560 aip_1 = (AlignInfoPtr) ptr1;
3561 aip_2 = (AlignInfoPtr) ptr2;
3562 if(aip_1->align_len > aip_2->align_len)
3563 return -1;
3564 else if(aip_1->align_len < aip_2->align_len)
3565 return 1;
3566 else return 0;
3567 }
3568 return 0;
3569 }
3570
3571 typedef struct align_inforeal {
3572 SeqAlignPtr align;
3573 Nlm_FloatHi rvalue;
3574 }AlignInfoReal, PNTR AlignInfoRealPtr;
3575
3576
CompareAlignInfoRealProc(VoidPtr ptr1,VoidPtr ptr2)3577 static int LIBCALLBACK CompareAlignInfoRealProc(VoidPtr ptr1, VoidPtr ptr2)
3578 {
3579 AlignInfoRealPtr aip_1, aip_2;
3580
3581 if (ptr1 != NULL && ptr2 != NULL) {
3582 aip_1 = (AlignInfoRealPtr) ptr1;
3583 aip_2 = (AlignInfoRealPtr) ptr2;
3584 if(aip_1->rvalue > aip_2->rvalue)
3585 return -1;
3586 else if(aip_1->rvalue < aip_2->rvalue)
3587 return 1;
3588 else return 0;
3589 }
3590 return 0;
3591 }
3592
3593
3594
3595 /*
3596 Generic Function to Sort SeqAligns.. must
3597 provide Comparison Callback function
3598 */
3599
3600
SeqAlignSort(SeqAlignPtr salp,SeqAlignSortCB sort_fn)3601 NLM_EXTERN SeqAlignPtr SeqAlignSort(SeqAlignPtr salp,SeqAlignSortCB sort_fn) {
3602 SeqAlignPtr sap, PNTR salp_list;
3603 Int4 n,i;
3604
3605 if(salp==NULL || salp->next==NULL)
3606 return salp;
3607
3608 n=1;
3609 sap = salp->next;
3610 while (sap != NULL)
3611 {
3612 n++;
3613 sap = sap->next;
3614 }
3615 salp_list = Calloc(n, sizeof (SeqAlignPtr));
3616 for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3617 salp_list[i]=sap;
3618 }
3619 HeapSort (salp_list, n, sizeof (SeqAlignPtr), sort_fn);
3620 sap = salp = salp_list[0];
3621 for (i = 1; i < n; i++) {
3622 sap->next = salp_list[i];
3623 sap = sap->next;
3624 }
3625 sap->next = NULL;
3626 MemFree (salp_list);
3627 return salp;
3628 }
3629
3630
3631
SeqAlignSortByLength(SeqAlignPtr salp)3632 NLM_EXTERN SeqAlignPtr SeqAlignSortByLength(SeqAlignPtr salp)
3633 {
3634 SeqAlignPtr sap;
3635 AlignInfoPtr salp_list;
3636 Int4 n,i;
3637
3638 if(salp==NULL || salp->next==NULL)
3639 return salp;
3640
3641 n=1;
3642 sap = salp->next;
3643 while (sap != NULL)
3644 {
3645 n++;
3646 sap = sap->next;
3647 }
3648 salp_list = Calloc(n, sizeof (AlignInfo));
3649 for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3650 salp_list[i].align=sap;
3651 salp_list[i].align_len = SeqAlignLength(sap);
3652 }
3653 HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3654 sap = salp = salp_list[0].align;
3655 for (i = 1; i < n; i++) {
3656 sap->next = salp_list[i].align;
3657 sap = sap->next;
3658 }
3659 sap->next = NULL;
3660 MemFree (salp_list);
3661 return salp;
3662 }
3663
SeqAlignSortByScore(SeqAlignPtr salp)3664 NLM_EXTERN SeqAlignPtr SeqAlignSortByScore(SeqAlignPtr salp)
3665 {
3666 SeqAlignPtr sap;
3667 AlignInfoPtr salp_list;
3668 Int4 n,i;
3669
3670 if(salp==NULL || salp->next==NULL)
3671 return salp;
3672
3673 n=1;
3674 sap = salp->next;
3675 while (sap != NULL)
3676 {
3677 n++;
3678 sap = sap->next;
3679 }
3680 salp_list = Calloc(n, sizeof (AlignInfo));
3681 {
3682 Int4 score,number;
3683 Nlm_FloatHi bit_score,evalue;
3684 for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3685 salp_list[i].align=sap;
3686 GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
3687 salp_list[i].align_len = score;
3688 }
3689 }
3690 HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3691 sap = salp = salp_list[0].align;
3692 for (i = 1; i < n; i++) {
3693 sap->next = salp_list[i].align;
3694 sap = sap->next;
3695 }
3696 sap->next = NULL;
3697 MemFree (salp_list);
3698 return salp;
3699 }
3700
3701
SeqAlignSortByEvalue(SeqAlignPtr salp)3702 NLM_EXTERN SeqAlignPtr SeqAlignSortByEvalue(SeqAlignPtr salp)
3703 {
3704 SeqAlignPtr sap;
3705 AlignInfoRealPtr salp_list;
3706 Int4 n,i;
3707
3708 if(salp==NULL || salp->next==NULL)
3709 return salp;
3710
3711 n=1;
3712 sap = salp->next;
3713 while (sap != NULL)
3714 {
3715 n++;
3716 sap = sap->next;
3717 }
3718 salp_list = Calloc(n, sizeof (AlignInfoReal));
3719 {
3720 Int4 score,number;
3721 Nlm_FloatHi bit_score,evalue;
3722 for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3723 salp_list[i].align=sap;
3724 GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
3725 salp_list[i].rvalue = evalue;
3726 }
3727 }
3728 HeapSort (salp_list, n, sizeof (AlignInfoReal), CompareAlignInfoRealProc);
3729 /* reverse order */
3730 sap = salp = salp_list[n-1].align;
3731 for (i = n-2; i >=0; i--) {
3732 sap->next = salp_list[i].align;
3733 sap = sap->next;
3734 }
3735 sap->next = NULL;
3736 MemFree (salp_list);
3737 return salp;
3738 }
3739
SeqAlignSortByStart(SeqAlignPtr salp,Int2 order)3740 NLM_EXTERN SeqAlignPtr SeqAlignSortByStart(SeqAlignPtr salp,Int2 order)
3741 {
3742 SeqAlignPtr sap;
3743 AlignInfoPtr salp_list;
3744 Int4 n,i;
3745
3746 if(salp==NULL || salp->next==NULL)
3747 return salp;
3748
3749 n=1;
3750 sap = salp->next;
3751 while (sap != NULL)
3752 {
3753 n++;
3754 sap = sap->next;
3755 }
3756 salp_list = Calloc(n, sizeof (AlignInfo));
3757 {
3758 for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3759 salp_list[i].align=sap;
3760 salp_list[i].align_len = SeqAlignStart(sap,order);
3761 }
3762 }
3763 HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3764 /* reverse order */
3765 sap = salp = salp_list[n-1].align;
3766 for (i = n-2; i >=0; i--) {
3767 sap->next = salp_list[i].align;
3768 sap = sap->next;
3769 }
3770 sap->next = NULL;
3771 MemFree (salp_list);
3772 return salp;
3773 }
3774
3775
get_align_mid_point(SeqAlignPtr align,Int2 order)3776 static Int4 get_align_mid_point(SeqAlignPtr align, Int2 order)
3777 {
3778 DenseSegPtr dsp;
3779 Int4 start, stop;
3780 Uint1 strand, val;
3781 SeqIdPtr sip;
3782
3783 if(align->segtype == 2)
3784 {
3785 dsp = (DenseSegPtr) align->segs;
3786 val = 0;
3787 for(sip = dsp->ids; sip != NULL; sip = sip->next)
3788 {
3789 if(val == order)
3790 {
3791 SeqAlignStartStopById(align, sip, &start, &stop, &strand);
3792 return (start + stop)/2;
3793 }
3794 ++val;
3795 }
3796 }
3797
3798 return -1;
3799 }
SeqAlignSortByRegion(SeqAlignPtr salp,Int2 order)3800 NLM_EXTERN SeqAlignPtr SeqAlignSortByRegion(SeqAlignPtr salp,Int2 order)
3801 {
3802 SeqAlignPtr sap;
3803 AlignInfoPtr salp_list;
3804 Int4 n,i;
3805
3806 if(salp==NULL || salp->next==NULL)
3807 return salp;
3808
3809 n=1;
3810 sap = salp->next;
3811 while (sap != NULL)
3812 {
3813 n++;
3814 sap = sap->next;
3815 }
3816 salp_list = Calloc(n, sizeof (AlignInfo));
3817 for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3818 salp_list[i].align=sap;
3819 salp_list[i].align_len = get_align_mid_point(sap, order);
3820 }
3821 HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3822 /* reverse order */
3823 sap = salp = salp_list[n-1].align;
3824 for (i = n-2; i >=0; i--) {
3825 sap->next = salp_list[i].align;
3826 sap = sap->next;
3827 }
3828 sap->next = NULL;
3829 MemFree (salp_list);
3830 return salp;
3831 }
3832
3833
3834
3835
get_align_list_len(SeqAlignPtr align)3836 static Int4 get_align_list_len(SeqAlignPtr align)
3837 {
3838 Int4 len = 0;
3839
3840 while(align)
3841 {
3842 len += SeqAlignLength(align);
3843 align = align->next;
3844 }
3845
3846 return len;
3847 }
3848
3849
save_output(SeqIdPtr sip,CharPtr f_name,SeqAlignPtr align)3850 static void save_output (SeqIdPtr sip, CharPtr f_name, SeqAlignPtr align)
3851 {
3852 AsnIoPtr aip;
3853 BioseqPtr bsp;
3854 SeqEntryPtr sep;
3855
3856 bsp = BioseqFind(sip);
3857 sep = SeqEntryFind(sip);
3858
3859 if(bsp == NULL || sep == NULL)
3860 return;
3861
3862 bsp->hist = SeqHistNew();
3863 bsp->hist->assembly = align;
3864
3865 aip = AsnIoOpen(f_name, "w");
3866 SeqEntryAsnWrite(sep, aip, NULL);
3867 AsnIoClose(aip);
3868
3869 bsp->hist->assembly = NULL;
3870 SeqHistFree(bsp->hist);
3871 bsp->hist = NULL;
3872 }
3873
3874
find_best_align(SeqAlignPtr align,Int4Ptr pmax_score)3875 NLM_EXTERN SeqAlignPtr find_best_align(SeqAlignPtr align, Int4Ptr pmax_score)
3876 {
3877 SeqAlignPtr curr, best_align;
3878 Int4 score, max_score = 0, number;
3879 Nlm_FloatHi bit_score, evalue;
3880
3881 best_align = NULL;
3882 for(curr = align; curr != NULL; curr = curr->next)
3883 {
3884 GetScoreAndEvalue(curr, &score, &bit_score, &evalue , &number);
3885 if(score > max_score)
3886 {
3887 max_score = score;
3888 best_align = curr;
3889 }
3890 }
3891
3892
3893 *pmax_score = max_score;
3894 return best_align;
3895 }
3896
3897
3898
3899 /*flip the order of the first and the second sequences*/
SeqAlignSwapOrder(SeqAlignPtr align)3900 NLM_EXTERN void SeqAlignSwapOrder(SeqAlignPtr align)
3901 {
3902 SeqAlignPtr curr;
3903 DenseSegPtr dsp;
3904 SeqIdPtr sip;
3905 Int2 i;
3906 Int4 temp;
3907 Uint1 strand;
3908
3909 for(curr = align; curr != NULL; curr = curr->next)
3910 {
3911 SeqAlignReverse(curr, 1);
3912 dsp = (DenseSegPtr) curr->segs;
3913
3914 /*switching the Seq-ids*/
3915 sip = dsp->ids->next;
3916 dsp->ids->next = NULL;
3917 sip->next = dsp->ids;
3918 dsp->ids = sip;
3919
3920 for(i = 0; i<dsp->numseg; ++i)
3921 {
3922 temp = dsp->starts[2*i];
3923 dsp->starts[2*i] = dsp->starts[2*i+1];
3924 dsp->starts[2*i+1] = temp;
3925
3926 strand = dsp->strands[2*i];
3927 dsp->strands[2*i] = dsp->strands[2*i+1];
3928 dsp->strands[2*i+1] = strand;
3929 }
3930
3931 }
3932 return;
3933 }
3934
3935
3936
reverse_seqloc(SeqLocPtr s_loc)3937 static void reverse_seqloc (SeqLocPtr s_loc)
3938 {
3939 SeqLocPtr slp, t_slp, n_slp;
3940 SeqIntPtr sint;
3941 Uint1 strand;
3942
3943 if(s_loc == NULL || s_loc->choice != SEQLOC_MIX)
3944 return;
3945 slp = NULL;
3946 t_slp = NULL;
3947 while((slp = SeqLocFindNext(s_loc, slp)) != NULL)
3948 {
3949 if(slp->choice == SEQLOC_INT)
3950 {
3951 sint = (SeqIntPtr) slp->data.ptrvalue;
3952 strand = sint->strand;
3953 n_slp = SeqLocIntNew(sint->from, sint->to, 3-strand, sint->id);
3954 if(t_slp == NULL)
3955 t_slp = n_slp;
3956 else
3957 {
3958 n_slp->next = t_slp;
3959 t_slp = n_slp;
3960 }
3961 }
3962 }
3963 SeqLocSetFree((SeqLocPtr) s_loc->data.ptrvalue);
3964 s_loc->data.ptrvalue = t_slp;
3965 }
3966
3967
create_fake_bioseq(SeqLocPtr seq_loc,CharPtr fake_name)3968 static BioseqPtr create_fake_bioseq(SeqLocPtr seq_loc, CharPtr fake_name)
3969 {
3970
3971 BioseqPtr bsp;
3972 SeqPortPtr spp;
3973 ByteStorePtr b_store;
3974 Uint1 residue;
3975 Uint1 code;
3976 SeqLocPtr slp;
3977 Int4 length;
3978
3979 code = Seq_code_iupacna;
3980 length = SeqLocLen(seq_loc);
3981 b_store = BSNew(length + 2);
3982 BSSeek(b_store, 0, SEEK_SET);
3983
3984 slp = NULL;
3985 length = 0;
3986 while((slp = SeqLocFindNext(seq_loc, slp)) != NULL)
3987 {
3988 if(slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)
3989 {
3990 spp = SeqPortNewByLoc(slp, code);
3991 residue = SeqPortGetResidue(spp);
3992 while(residue != SEQPORT_EOF)
3993 {
3994 if(IS_ALPHA(residue))
3995 BSPutByte(b_store, (Int2)residue);
3996 residue = SeqPortGetResidue(spp);
3997 }
3998 SeqPortFree(spp);
3999 length += SeqLocLen(slp);
4000 }
4001 }
4002
4003 bsp = BioseqNew();
4004 bsp->id = local_id_make(fake_name);
4005 bsp->seq_data = (SeqDataPtr) b_store;
4006 bsp->repr = Seq_repr_raw;
4007 bsp->mol = Seq_mol_dna;
4008 bsp->length = length;
4009 bsp->topology = 1;
4010 bsp->seq_data_type = code;
4011 return bsp;
4012
4013
4014 }
4015
dup_mul_locs(SeqLocPtr slp)4016 static SeqLocPtr dup_mul_locs(SeqLocPtr slp)
4017 {
4018 Int4 start, stop;
4019 Uint1 strand;
4020 SeqLocPtr t_slp, h_slp;
4021 SeqLocPtr curr;
4022
4023 curr = NULL;
4024 h_slp = NULL;
4025 while((curr = SeqLocFindNext(slp, curr)) != NULL)
4026 {
4027 start = SeqLocStart(curr);
4028 stop = SeqLocStop(curr);
4029 strand = SeqLocStrand(curr);
4030 t_slp = SeqLocIntNew(start, stop, strand, SeqLocId(curr));
4031 ValNodeLink(&h_slp, t_slp);
4032 }
4033
4034 if(h_slp->next == NULL)
4035 return h_slp;
4036 else
4037 {
4038 t_slp = ValNodeNew(NULL);
4039 t_slp->choice = SEQLOC_MIX;
4040 t_slp->data.ptrvalue = h_slp;
4041 return t_slp;
4042 }
4043 }
4044
4045
second_seq_has_gap(BioseqPtr s_bsp)4046 static Boolean second_seq_has_gap(BioseqPtr s_bsp)
4047 {
4048 SeqLocPtr slp;
4049 SeqIdPtr sip;
4050 BioseqPtr t_bsp;
4051
4052 for(slp = (SeqLocPtr) s_bsp->seq_ext; slp != NULL; slp = slp->next)
4053 {
4054 if(slp->choice != SEQLOC_NULL)
4055 {
4056 sip = SeqLocId(slp);
4057 if(sip != NULL)
4058 {
4059 t_bsp = BioseqFind(sip);
4060 if(t_bsp != NULL)
4061 {
4062 if(t_bsp->repr == Seq_repr_virtual)
4063 return TRUE;
4064 }
4065 }
4066 }
4067 }
4068
4069 return FALSE;
4070 }
4071
4072
get_best_id(BioseqPtr bsp)4073 static SeqIdPtr get_best_id(BioseqPtr bsp)
4074 {
4075 SeqIdPtr sip;
4076
4077 sip = SeqIdFindBest(bsp->id, 0);
4078 if(sip == NULL || sip->choice != SEQID_GI)
4079 {
4080 for(sip = bsp->id; sip != NULL; sip = sip->next)
4081 {
4082 if(sip->choice == SEQID_LOCAL)
4083 return sip;
4084 }
4085 return bsp->id;
4086 }
4087 else
4088 return sip;
4089 }
4090
4091
4092 /*gap = unknown length is separated as separate segments. This
4093 is to make sure that create_fake_bioseq won't include those
4094 segments
4095 */
4096
is_virtual_segment(SeqIdPtr sip)4097 static Boolean is_virtual_segment(SeqIdPtr sip)
4098 {
4099 BioseqPtr bsp;
4100 ObjectIdPtr oip;
4101
4102 bsp = BioseqFind(sip);
4103 if(bsp != NULL)
4104 {
4105 if(bsp->repr == Seq_repr_virtual || bsp->repr == Seq_repr_map)
4106 return TRUE;
4107 else
4108 return FALSE;
4109 }
4110 if(sip->choice == SEQID_LOCAL)
4111 {
4112 oip = (ObjectIdPtr) sip->data.ptrvalue;
4113 return (oip->str && StringCmp(oip->str, "virtual") == 0);
4114 }
4115
4116 return FALSE;
4117 }
4118
4119
build_slp_for_gcontig(BioseqPtr gcontig_bsp,SeqIdPtr gcontig_sip)4120 static SeqLocPtr build_slp_for_gcontig (BioseqPtr gcontig_bsp, SeqIdPtr gcontig_sip)
4121 {
4122 SeqLocPtr slp, h_slp = NULL, t_slp;
4123 Int4 start = 0, stop = -1;
4124 SeqIdPtr sip;
4125 Boolean update;
4126
4127
4128 for(slp = (SeqLocPtr) gcontig_bsp->seq_ext; slp != NULL; slp = slp->next)
4129 {
4130 if(slp->choice != SEQLOC_NULL)
4131 {
4132 sip = SeqLocId(slp);
4133 /* if(sip->choice != SEQID_GI) */
4134 if(sip != NULL)
4135 {
4136 update = FALSE;
4137 if(is_virtual_segment(sip))
4138 {
4139 t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
4140 ValNodeLink(&h_slp, t_slp);
4141 update = TRUE;
4142 }
4143 stop += SeqLocLen(slp);
4144 if(update)
4145 start = stop + 1;
4146
4147 }
4148 }
4149 else
4150 {
4151 if(start < stop)
4152 {
4153 t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
4154 ValNodeLink(&h_slp, t_slp);
4155 start = stop + 1;
4156 }
4157 }
4158 }
4159
4160 if(start < stop)
4161 {
4162 t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
4163 ValNodeLink(&h_slp, t_slp);
4164 }
4165
4166 if(h_slp == NULL)
4167 return NULL;
4168
4169 if(h_slp->next != NULL)
4170 {
4171 t_slp = ValNodeNew(NULL);
4172 t_slp->choice = SEQLOC_MIX;
4173 t_slp->data.ptrvalue = h_slp;
4174 return t_slp;
4175 }
4176 else
4177 return h_slp;
4178 }
4179
4180
4181 /* Reverses the strand of a Linked list of SeqAlignPtr. */
SeqAlignListReverseStrand(SeqAlignPtr salp)4182 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignListReverseStrand (SeqAlignPtr salp)
4183 {
4184 SeqAlignPtr salptmp;
4185 Int4Ptr lenp;
4186 Int4Ptr startp;
4187 Uint1Ptr strandp;
4188 Int4 numseg;
4189 Int2 dim;
4190 Int4 j, k, n, tmp;
4191
4192 for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
4193 {
4194 if (salptmp->segtype == 2)
4195 {
4196 DenseSegPtr dsp;
4197 dsp = (DenseSegPtr) salptmp->segs;
4198 strandp = dsp->strands;
4199 numseg = dsp->numseg;
4200 dim = dsp->dim;
4201 lenp = dsp->lens;
4202 startp = dsp->starts;
4203 for (j=0; j < numseg*dim && strandp!=NULL; j++, strandp++)
4204 {
4205 if (*strandp == Seq_strand_minus)
4206 *strandp = Seq_strand_plus;
4207 else if (*strandp != Seq_strand_minus)
4208 *strandp = Seq_strand_minus;
4209 }
4210 for (j=0, k=numseg-1; j<numseg/2; j++, k--) {
4211 tmp=lenp[j];
4212 lenp[j]=lenp[k];
4213 lenp[k]=tmp;
4214 }
4215 for (j=0, k=(dim*numseg-dim); j<(dim*numseg-1)/2; j+=dim, k-=dim) {
4216 for (n=0; n<dim; n++) {
4217 tmp=startp[j+n];
4218 startp[j+n]=startp[k+n];
4219 startp[k+n]=tmp;
4220 }
4221 }
4222 } else if (salptmp->segtype ==1) {
4223 DenseDiagPtr ddp,ddp_next,ddp_prev;
4224 ddp_prev=NULL;
4225 ddp = (DenseDiagPtr) salptmp->segs;
4226 while(ddp!=NULL) {
4227 strandp = ddp->strands;
4228 dim = ddp->dim;
4229 for (j=0; j < dim && strandp!=NULL; j++, strandp++)
4230 {
4231 if (*strandp == Seq_strand_minus)
4232 *strandp = Seq_strand_plus;
4233 else if (*strandp != Seq_strand_minus)
4234 *strandp = Seq_strand_minus;
4235 }
4236 ddp_next = ddp->next;
4237 if(salptmp->type==3 || salptmp->type==1) {
4238 ddp->next = ddp_prev;
4239 ddp_prev =ddp;
4240 }
4241 ddp = ddp_next;
4242 }
4243 if(salptmp->type==3 || salptmp->type==1)
4244 salptmp->segs = ddp_prev;
4245 }
4246 }
4247 return salp;
4248 }
4249
SeqAlignDup(SeqAlignPtr salp)4250 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDup (SeqAlignPtr salp)
4251 {
4252 SeqAnnotPtr sap,
4253 sap2;
4254 SeqAlignPtr salp2 = NULL,
4255 next;
4256
4257 next = salp->next;
4258 salp->next = NULL;
4259 sap = SeqAnnotForSeqAlign (salp);
4260 sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
4261 if (sap2!=NULL) {
4262 salp2 = (SeqAlignPtr)sap2->data;
4263 sap2->data = NULL;
4264 SeqAnnotFree (sap2);
4265 }
4266 if (next != NULL)
4267 salp->next = next;
4268 sap->data = NULL;
4269 SeqAnnotFree (sap);
4270 return salp2;
4271 }
4272
SeqAlignListDup(SeqAlignPtr salp)4273 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignListDup (SeqAlignPtr salp)
4274 {
4275 SeqAnnotPtr sap,
4276 sap2;
4277 SeqAlignPtr salp2 = NULL;
4278 if (salp == NULL) return(NULL);
4279 sap = SeqAnnotForSeqAlign (salp);
4280 if (sap == NULL) return(NULL);
4281 sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
4282 if (sap2!=NULL) {
4283 salp2 = sap2->data;
4284 sap2->data = NULL;
4285 SeqAnnotFree (sap2);
4286 }
4287 sap->data = NULL;
4288 SeqAnnotFree (sap);
4289 return salp2;
4290 }
4291
4292