1 /* $Id: salpedit.c,v 6.33 2007/05/07 13:32:02 kans Exp $ */
2 #include <ncbi.h>
3 #include <ncbimisc.h> /* ValNodeLen */
4 #include <sequtil.h> /* for GetScoreAndEvalue, SeqIdDupList */
5 #include <salpedit.h>
6 #include <salpacc.h>
7 #include <seqport.h>
8 #include <salsap.h>
9 
10 #define MIN_DIAG_LEN	20	/*minimal amount of overlap required for build_overlap_list<-MergeTwoAlignList */
11 
12 static Boolean DenseSegInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr dsp, Boolean merge, BoolPtr seq_insert);
13 static SeqAlignPtr AttachSeqInAlign(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head);
14 
OrderInt4(Int4Ptr x,Int4Ptr y)15 static void OrderInt4(Int4Ptr x, Int4Ptr y) /* replace jzmisc: swap */
16 {
17   Int4 temp;
18 
19 	if((*x) > (*y)){
20 	  temp = *x;
21 	  *x = *y;
22 	  *y = temp;
23 	}
24 }
25 
26 
27 /*******************************************
28 ***
29 ***  SeqAlignBioseqDeleteById
30 ***     if DenseDiag: delete the SeqAlign
31 ***     if DenseSeg: delete the SeqId, the starts positions,
32 ***        and the corresponding strands
33 ***
34 ********************************************/
35 
SeqAlignBioseqDeleteById(SeqAlignPtr salphead,SeqIdPtr sip)36 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignBioseqDeleteById (SeqAlignPtr salphead, SeqIdPtr sip)
37 {
38   SeqAlignPtr salp,
39               presalp = NULL,
40               nextsalp;
41   SeqIdPtr    presip, nextsip, tmpsip;
42   DenseSegPtr dsp;
43   Int4        j, k;
44   Int2        index = 0;
45 
46   salp = salphead;
47   while (salp!= NULL)
48   {
49      nextsalp = salp->next;
50      tmpsip =SeqIdPtrFromSeqAlign (salp);
51      if ((SeqIdOrderInBioseqIdList(sip, tmpsip)) > 0)
52      {
53         if (salp->segtype == 1 || salp->dim == 2)
54         {
55            if (salphead==NULL)
56               salphead = nextsalp;
57            if (presalp!=NULL)
58               presalp->next = nextsalp;
59            salp->next = NULL;
60            salp = SeqAlignFree (salp);
61            if (presalp!=NULL)
62               salp = presalp;
63         }
64         else if (salp->segtype == 2)
65         {
66            dsp = (DenseSegPtr) salp->segs;
67            presip = NULL;
68            while (tmpsip!=NULL)
69            {
70             nextsip = tmpsip->next;
71             if (SeqIdForSameBioseq (tmpsip, sip))
72             {
73               if (presip==NULL) {
74                  dsp->ids = nextsip;
75               } else {
76                  presip->next = nextsip;
77               }
78               tmpsip->next = NULL;
79               SeqIdFree (tmpsip);
80               k=0;
81               for (j=0; j<dsp->dim*dsp->numseg; j++) {
82                  if (((j-index) % (dsp->dim))!=0) {
83                     dsp->starts[k] = dsp->starts[j];
84                     k++;
85                  }
86               }
87               k=0;
88               if (dsp->strands) {
89                for (j=0; j<dsp->dim*dsp->numseg; j++) {
90                  if (((j-index) % (dsp->dim))!=0) {
91                     dsp->strands[k] = dsp->strands[j];
92                     k++;
93                  }
94                }
95               }
96               dsp->dim--;
97               salp->dim--;
98             }
99             else
100               presip = tmpsip;
101             tmpsip = nextsip;
102             index++;
103            }
104         }
105      }
106      presalp = salp;
107      salp = nextsalp;
108   }
109   return salphead;
110 }
111 
SeqAlignBioseqDeleteByIdCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)112 static void SeqAlignBioseqDeleteByIdCallback (SeqEntryPtr sep, Pointer mydata,
113                                           Int4 index, Int2 indent)
114 {
115   BioseqPtr          bsp;
116   BioseqSetPtr       bssp;
117   SeqAlignPtr        salp,
118                      salptmp;
119   SeqIdPtr           sip;
120 
121   sip = (SeqIdPtr)(mydata);
122   if (sip!=NULL && sep != NULL && sep->data.ptrvalue) {
123      if (IS_Bioseq(sep)) {
124         bsp = (BioseqPtr) sep->data.ptrvalue;
125         if (bsp!=NULL) {
126            salp = is_salp_in_sap (bsp->annot, 2);
127            if (salp!=NULL) {
128               for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
129                  salptmp = SeqAlignBioseqDeleteById  (salp, sip);
130            }
131         }
132      }
133      else if(IS_Bioseq_set(sep)) {
134         bssp = (BioseqSetPtr)sep->data.ptrvalue;
135         if (bssp!=NULL) {
136            salp = is_salp_in_sap (bssp->annot, 2);
137            if (salp!=NULL) {
138               for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
139                  salptmp = SeqAlignBioseqDeleteById  (salp, sip);
140            }
141         }
142      }
143   }
144 }
145 
SeqAlignBioseqDeleteByIdFromSeqEntry(SeqEntryPtr sep,SeqIdPtr sip)146 NLM_EXTERN Pointer LIBCALL SeqAlignBioseqDeleteByIdFromSeqEntry (SeqEntryPtr sep, SeqIdPtr sip)
147 {
148   SeqEntryExplore (sep, (Pointer)sip, SeqAlignBioseqDeleteByIdCallback);
149   return sep;
150 }
151 
152 /**************************************************************************
153 ***
154 *	Functions for editing a Seq-align object
155 *
156 ***************************************************************************
157 ***/
SeqAlignInsert(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,SeqAlignPtr head,Boolean merge,Boolean add_insertion)158 static SeqAlignPtr SeqAlignInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head, Boolean merge, Boolean add_insertion)
159 {
160 	DenseSegPtr dsp;
161         DenseDiagPtr ddp;
162 	StdSegPtr ssp;
163 	Boolean seq_insert;
164 	BioseqPtr bsp;
165 	SeqAlignPtr align;
166 
167 	align = head;
168 	seq_insert = FALSE;
169 	while(align)
170 	{
171 		switch(align->segtype)
172 		{
173 			case 0:
174 				break;
175 			case 1:
176 				ddp = (DenseDiagPtr)(align->segs);
177 				break;
178 			case 2:
179 				dsp = (DenseSegPtr)(align->segs);
180 				DenseSegInsert(from_id, from, to, strand, to_id, pos, dsp, merge, &seq_insert);
181 				break;
182 		        case 3:
183 				ssp = (StdSegPtr)(align->segs);
184 				break;
185 			default:
186 				break;
187 		}
188 		align = align->next;
189 	}
190 
191 	/*sequence is not inserted*/
192 	if(!seq_insert && add_insertion)
193 	{
194 		bsp = BioseqFind(from_id);
195 		if(bsp->repr != Seq_repr_seg)
196 			head= AttachSeqInAlign(from_id, from, to, strand, to_id, pos, head);
197 	}
198 	return head;
199 
200 }
201 
get_t_order(SeqIdPtr ids,SeqIdPtr t_id)202 static Int2 get_t_order(SeqIdPtr ids, SeqIdPtr t_id)
203 {
204 	Int2 t_order = 0;
205 
206 	if(t_id == NULL)
207 		return -1;
208 	while(ids)
209 	{
210 		if(SeqIdForSameBioseq(t_id, ids))
211 			return t_order;
212 		++t_order;
213 		ids = ids->next;
214 	}
215 	return -1;
216 
217 }
218 
219 
220 
get_seg(ValNodePtr v_starts,Int2 dim,Int4Ptr starts)221 static Boolean get_seg(ValNodePtr v_starts, Int2 dim, Int4Ptr starts)
222 {
223 	Int2 i;
224 
225 	for(i=0; i<dim && v_starts != NULL; ++i, v_starts = v_starts->next)
226 		starts[i] = v_starts->data.intvalue;
227 	return (i == dim);
228 }
229 
230 
Free_node(ValNodePtr v_starts,ValNodePtr pv_starts,Int2 dim)231 static ValNodePtr Free_node(ValNodePtr v_starts, ValNodePtr pv_starts, Int2 dim)
232 {
233 	Int2 i;
234 	ValNodePtr next = NULL;
235 
236 	for(i =0; i<dim; ++i)
237 	{
238 		next= v_starts->next;
239 		v_starts->next = NULL;
240 		ValNodeFree(v_starts);
241 		v_starts = next;
242 	 }
243 
244 	while(dim > 1)
245 	{
246 	    pv_starts = pv_starts->next;
247 	    --dim;
248 	}
249 	 pv_starts->next = next;
250 	 return next;
251 
252 }
253 
is_gap_segment(Int4Ptr c_starts,Int2 dim)254 static Boolean is_gap_segment(Int4Ptr c_starts, Int2 dim)
255 {
256 	Int2 i;
257 	Int2 numgap = 0;
258 
259 	for(i =0; i<dim; ++i)
260 	{
261 		if(c_starts[i] == -1)
262 			++numgap;
263 	}
264 	return (numgap >= (dim -1));
265 }
266 
mod_prev_start(ValNodePtr p_starts,Int4Ptr c_starts,Int4Ptr c_strands,Int2 dim)267 static void mod_prev_start(ValNodePtr p_starts, Int4Ptr c_starts, Int4Ptr c_strands, Int2 dim)
268 {
269 	Int2 i;
270 
271 	for(i =0; i<dim; ++i)
272 	{
273 		if(c_strands[i] == (Int4)Seq_strand_minus)
274 			p_starts->data.intvalue = c_starts[i];
275 		p_starts = p_starts->next;
276 	}
277 }
278 
279 /****************************************************************************
280 ***
281 *	return TRUE if needs another round of modification
282 *
283 *****************************************************************************
284 ***/
mod_alignment(ValNodePtr v_starts,ValNodePtr v_strands,ValNodePtr v_lens,Int2 dim,Int2 m_order,Int2Ptr n_segs)285 static Boolean mod_alignment(ValNodePtr v_starts, ValNodePtr v_strands, ValNodePtr v_lens, Int2 dim, Int2 m_order, Int2Ptr n_segs)
286 {
287     ValNodePtr pv_starts, pv_lens, pv_strands;
288     Boolean has_prev;
289     Int4 c_len, p_len=0;
290     Int4 p_start, c_start;
291     Int2 i;
292     Boolean retval;
293     Int4Ptr cur_starts, prev_starts;
294     Int4Ptr cur_strands, prev_strands;
295     Boolean merge;
296 
297 	retval = FALSE;
298 	cur_starts = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
299 	prev_starts = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
300 	cur_strands = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
301 	prev_strands = (Int4Ptr) MemNew((size_t)dim * sizeof(Int4));
302 
303 	has_prev = FALSE;
304 	pv_starts = NULL;
305 	pv_strands = NULL;
306 	pv_lens = NULL;
307 	while(v_starts && v_strands && v_lens)
308 	{
309 		get_seg(v_starts, dim, cur_starts);
310 		get_seg(v_strands, dim, cur_strands);
311 		c_len = v_lens->data.intvalue;
312 		merge = FALSE;
313 
314 		if(has_prev) /*check if the two can be merged*/
315 		{
316 			merge = TRUE;
317 			for(i =0; i<dim; ++i)
318 			{
319 				p_start = prev_starts[i];
320 				c_start = cur_starts[i];
321 				if(cur_strands[i] != prev_strands[i])
322 				{
323 					merge = FALSE;
324 					break;
325 				}
326 				if(p_start == -1 && c_start != -1)
327 				{
328 					merge = FALSE;
329 					break;
330 				}
331 				if(p_start != -1 && c_start == -1)
332 				{
333 					merge = FALSE;
334 					break;
335 				}
336 				if(p_start != -1 && c_start != -1)
337 				{
338 					if(cur_strands[i] == (Int4)Seq_strand_minus)
339 					{
340 						if(c_start + c_len != p_start)
341 						{
342 							merge = FALSE;
343 							break;
344 						}
345 					}
346 					else
347 					{
348                                                 if(p_start + p_len != c_start)
349 						{
350 							merge = FALSE;
351 							break;
352 						}
353 					}
354 				}
355 			}
356 		}
357 		if(merge)
358 		{
359 			p_len += c_len;
360 			pv_lens->data.intvalue = p_len;
361 			mod_prev_start(pv_starts, cur_starts, cur_strands, dim);
362 			v_starts = Free_node(v_starts, pv_starts, dim);
363 			v_strands = Free_node(v_strands, pv_strands, dim);
364 			v_lens = Free_node(v_lens, pv_lens, 1);
365 			-- (*n_segs);
366 			retval = TRUE;
367 		}
368 		else
369 		{
370 			pv_starts = v_starts;
371 			pv_strands = v_strands;
372 			pv_lens = v_lens;
373 			MemCopy(prev_starts, cur_starts, (size_t)dim * sizeof(Int4));
374 			MemCopy(prev_strands, cur_strands, (size_t)dim * sizeof(Int4));
375 			p_len = c_len;
376 			for(i =0; i<dim; ++i)
377 			{
378 				v_starts = v_starts->next;
379 	  			v_strands = v_strands->next;
380 			}
381 			v_lens = v_lens->next;
382 			has_prev = TRUE;
383 
384 		}
385 
386 	}
387 
388 	MemFree(prev_starts);
389 	MemFree(cur_starts);
390 	MemFree(prev_strands);
391 	MemFree(cur_strands);
392 	return retval;
393 
394 }
395 
load_new_data(Int4Ptr starts,Uint1Ptr strands,Int4Ptr lens,ValNodePtr v_starts,ValNodePtr v_strands,ValNodePtr v_lens,Int2 dim,Int2 cur_seg)396 static void load_new_data(Int4Ptr starts, Uint1Ptr strands, Int4Ptr lens, ValNodePtr v_starts, ValNodePtr v_strands, ValNodePtr v_lens, Int2 dim, Int2 cur_seg)
397 {
398   Int2 i, j;
399 
400 	for (i =0; i<cur_seg; ++i)
401 	{
402 	   for(j =0; j<dim; ++j)
403 	   {
404 	      starts[i*dim+j] = v_starts->data.intvalue;
405 	      strands[i*dim+j] = (Uint1)(v_strands->data.intvalue);
406 	      v_starts = v_starts->next;
407 	      v_strands = v_strands->next;
408 	    }
409 	    lens[i] = v_lens->data.intvalue;
410 	    v_lens = v_lens->next;
411 	}
412 
413 }
414 
415 
get_nth_id(SeqIdPtr ids,Int2 order)416 static SeqIdPtr get_nth_id(SeqIdPtr ids, Int2 order)
417 {
418    Int2 i;
419 
420 	i =0;
421 	while(ids){
422 		if(order == i)
423 			return ids;
424 		++i;
425 		ids = ids->next;
426 	}
427 
428 	return NULL;
429 }
430 
make_dsp(ValNodePtr v_starts,ValNodePtr v_lens,ValNodePtr v_strands,Int2 m_order,Int2 dim,DenseSegPtr prev,Int2 cur_seg,Boolean merge,SeqIdPtr m_sip,SeqIdPtr s_sip)431 static DenseSegPtr make_dsp(ValNodePtr v_starts, ValNodePtr v_lens, ValNodePtr v_strands, Int2 m_order, Int2 dim, DenseSegPtr prev, Int2 cur_seg, Boolean merge, SeqIdPtr m_sip, SeqIdPtr s_sip)
432 {
433    Int4Ptr starts, lens;
434    Uint1Ptr strands;
435    DenseSegPtr dsp;
436 
437     /*while(merge)*/
438    if(merge)
439          merge = mod_alignment(v_starts, v_strands, v_lens, dim, m_order, &cur_seg);
440     starts = (Int4Ptr) MemNew((size_t)(dim* cur_seg) * sizeof(Int4));
441     lens= (Int4Ptr) MemNew((size_t)(cur_seg) * sizeof(Int4));
442     strands= (Uint1Ptr) MemNew((size_t)(dim * cur_seg) * sizeof(Uint1));
443 
444 	load_new_data(starts, strands, lens, v_starts, v_strands, v_lens, dim, cur_seg);
445 	ValNodeFree(v_starts);
446  	ValNodeFree(v_strands);
447 	ValNodeFree(v_lens);
448 	if(prev)
449     {
450 		MemFree(prev->starts);
451 		MemFree(prev->lens);
452 		MemFree(prev->strands);
453 		prev->numseg = cur_seg;
454 		prev->starts = starts;
455 		prev->strands = strands;
456 		prev->lens = lens;
457 		return prev;
458 	}
459 	else
460 	{
461 		if(m_sip == NULL || s_sip == NULL)
462 		{
463 			Message(MSG_ERROR, "Fail to get the new DenseSeg");
464 			MemFree(starts);
465 			MemFree(lens);
466 			MemFree(strands);
467 			return NULL;
468 		}
469 		dsp = DenseSegNew();
470 		dsp->dim = dim;
471 		dsp->ids = SeqIdDup(m_sip);
472 		dsp->ids->next = SeqIdDup(s_sip);
473 		dsp->numseg = cur_seg;
474 		dsp->starts = starts;
475 		dsp->lens = lens;
476 		dsp->strands = strands;
477 		return dsp;
478 	}
479 
480 }
481 
482 
DenseSegInsert(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,DenseSegPtr dsp,Boolean merge,BoolPtr seq_insert)483 static Boolean DenseSegInsert(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr dsp, Boolean merge, BoolPtr seq_insert)
484 {
485    Int2 cur_seg, i, j;
486    Int2 t_order;      /*the order of target sequence*/
487    Int4 t_start, t_stop, t_max;
488    Int4 cur_start;
489    Int2 dim;
490    Int4Ptr starts, lens;
491    Uint1 cur_strand;
492    Uint1Ptr strands;
493    ValNodePtr v_starts, v_lens, v_strands;
494    SeqIdPtr c_id;
495    Int4 ins_len;
496    Boolean add_insert_seg;
497    Boolean has_source_id;
498    Int4 gap_size, s_gap_size;
499    Int4 s_last, t_last, s_first, t_first;
500    Int2 s_order = -1;
501 
502 
503 
504 	if(pos <0)
505 		return FALSE;
506 	t_max = 0;
507 	t_order = get_t_order(dsp->ids, to_id);
508 	if(t_order == -1)
509 		return FALSE;
510 	dim = dsp->dim;
511 	starts = dsp->starts;
512 	lens = dsp->lens;
513 	strands = dsp->strands;
514 
515 	cur_seg = 0;
516 	v_starts = NULL;
517 	v_lens = NULL;
518 	v_strands = NULL;
519 	ins_len = to - from +1;
520 	has_source_id = FALSE;
521 	for(c_id = dsp->ids, i =0; c_id != NULL; c_id = c_id->next, ++i)
522 	{
523 		if(SeqIdMatch(c_id, from_id))
524 		{
525 			s_order = i;
526 			has_source_id = TRUE;
527 			break;
528 		}
529 	}
530 
531 
532 	/*check if it can be inserted before the start of the alignment*/
533 	gap_size = -1;
534 	s_gap_size = -1;
535 	if(*seq_insert == FALSE && has_source_id)
536 	{
537 		t_first = dsp->starts[t_order];
538 		cur_strand = dsp->strands[t_order];
539 		if(t_first != -1)
540 		{
541 			if(cur_strand == Seq_strand_minus)
542 			{
543 				t_first += dsp->lens[0];
544 				if(t_first <= pos)
545 					gap_size = pos - t_first;
546 			}
547 			else	/*plus strand*/
548 			{
549 				if(t_first >=pos )
550 					gap_size = t_first - pos;
551 			}
552 		}
553 
554 		s_first = dsp->starts[s_order];
555 		cur_strand = dsp->strands[s_order];
556 		if(s_first != -1)
557 		{
558 
559 			if(cur_strand == Seq_strand_minus)
560 			{
561 				s_first += dsp->lens[0];
562 				if(s_first <= from)
563 					s_gap_size = from - s_first;
564 			}
565 			else
566 			{
567 				if(s_first > 0)
568 				{
569 					--s_first;
570 					if(s_first >=to)
571 						s_gap_size = s_first - to;
572 				}
573 			}
574 		}
575 		if(gap_size >= 0 && s_gap_size >= 0  && gap_size == s_gap_size)
576 		{
577 			for(j = 0; j<dim; ++j)
578 			{
579 				cur_start = -1;
580 				cur_strand = dsp->strands[dim*(dsp->numseg-1) + j];
581 				if(j == t_order)
582 				{
583 					cur_start = pos;
584 				}
585 				if(j == s_order)
586 				{
587 					if(cur_strand == Seq_strand_minus)
588 						cur_start = from - gap_size;
589 					else
590 						cur_start = from;
591 				}
592 				ValNodeAddInt(&v_starts, 0, cur_start);
593 				ValNodeAddInt(&v_strands, 0, cur_strand);
594 			}
595 			ValNodeAddInt(&v_lens, 0, (ins_len + gap_size));
596 			*seq_insert = TRUE;
597 			++cur_seg;
598 		}
599 	}
600 
601 
602 
603 	for(i =0; i<dsp->numseg; ++i)
604 	{
605 		t_start = starts[dim*i +t_order];
606 		if(t_start != -1)
607 		{
608 			t_stop = t_start + lens[i] -1;
609 			t_max = MAX(t_stop, t_max);
610 
611 			/*insertion is downstrems of the current segment*/
612 			/*copy everything*/
613 			if(pos  > t_stop)
614 			{	/*downstream insertions*/
615 				for(j=0; j<dim; ++j)
616 				{
617 					cur_start = starts[dim*i + j];
618 					cur_strand = (strands[dim*i + j]);
619 					ValNodeAddInt(&v_starts, 0, cur_start);
620 					ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
621 				}
622 				ValNodeAddInt(&v_lens, 0, lens[i]);
623 				++cur_seg;
624 			}
625 
626 			/*insertion is upstream, add offset*/
627 			if(pos < t_start)
628 			{
629 				for(j=0; j<dim; ++j)
630 				{
631 					cur_start = starts[dim*i + j];
632 					cur_strand = (strands[dim*i + j]);
633 					if(j == t_order)
634 						cur_start += ins_len;
635 					ValNodeAddInt(&v_starts, 0, cur_start);
636 					ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
637 				}
638 				ValNodeAddInt(&v_lens, 0, lens[i]);
639 				++cur_seg;
640 			}
641 
642 			/*insert within the current segment*/
643 			if(pos >= t_start && pos <= t_stop) /*insert within*/
644 			{
645 				/*if the segment is broken, add the first half*/
646 				if(pos > t_start)
647 				{
648 					for(j = 0; j<dim; ++j)
649 					{
650 						cur_start = starts[dim*i+j];
651 						cur_strand = strands[dim*i + j];
652 						ValNodeAddInt(&v_starts, 0, cur_start);
653 						ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
654 					}
655 					ValNodeAddInt(&v_lens, 0, (pos - t_start));
656 					++cur_seg;
657 				}
658 
659 				/*add the inserted segment*/
660 				if(i == 0)
661 					add_insert_seg = (has_source_id && *seq_insert == FALSE);
662 				else
663 					add_insert_seg = TRUE;
664 
665 				if(add_insert_seg)
666 				{
667 					for(j = 0; j<dim ; ++j)
668 					{
669 						cur_start = -1;
670 						cur_strand = strands[dim*i + j];
671 						if(has_source_id && j == s_order)
672 						{
673 							*seq_insert = TRUE;
674 							cur_start = from;
675 							cur_strand = strand;
676 						}
677 						if(j == t_order)
678 							cur_start = pos;
679 						ValNodeAddInt(&v_starts, 0, cur_start);
680 						ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
681 					}
682 					ValNodeAddInt(&v_lens, 0, ins_len);
683 					++cur_seg;
684 				}
685 
686 				/*add the leftover segment*/
687 				for(j=0; j<dim; ++j)
688 				{
689 					cur_start = starts[dim*i+j];
690 					cur_strand = strands[dim*i + j];
691 					if(j == t_order)
692 						cur_start = pos + ins_len;
693 					ValNodeAddInt(&v_starts, 0, cur_start);
694 					ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
695 				}
696 				ValNodeAddInt(&v_lens, 0, (lens[i] - (pos - t_start)));
697 				++cur_seg;
698 			}
699 		}
700 
701 		else	/*gaps in the target sequence*/
702 		{
703 			for(j=0; j<dim; ++j)
704 			{
705 				cur_start = starts[dim*i + j];
706 				cur_strand = (strands[dim*i + j]);
707 				ValNodeAddInt(&v_starts, 0, cur_start);
708 				ValNodeAddInt(&v_strands, 0, (Int4)cur_strand);
709 			}
710 			ValNodeAddInt(&v_lens, 0, lens[i]);
711 			++cur_seg;
712 		}
713 	}
714 
715 
716 	/*attach to the end of the sequence*/
717 	gap_size = -1;
718 	s_gap_size = -1;
719 	if(*seq_insert == FALSE && has_source_id)
720 	{
721 		t_last = dsp->starts[dim*(dsp->numseg-1) +t_order];
722 		cur_strand = dsp->strands[dim*(dsp->numseg-1) + t_order];
723 		if(t_last != -1)
724 		{
725 			if(cur_strand == Seq_strand_minus)
726 			{
727 				t_last -= 1;
728 				if(pos <= t_last)
729 					gap_size = t_last - pos;
730 			}
731 			else	/*plus strand*/
732 			{
733 				t_last += dsp->lens[dsp->numseg-1];
734 				if(pos >= t_last)
735 					gap_size = pos - t_last;
736 			}
737 		}
738 
739 		s_last = dsp->starts[dim*(dsp->numseg-1) +s_order];
740 		cur_strand = dsp->strands[dim*(dsp->numseg-1) + s_order];
741 		if(s_last != -1)
742 		{
743 
744 			if(cur_strand == Seq_strand_minus)
745 			{
746 				s_last -= 1;
747 				s_gap_size = s_last - to;
748 			}
749 			else
750 			{
751 				s_last += dsp->lens[dsp->numseg-1];
752 				s_gap_size = from - s_last;
753 			}
754 		}
755 		if(gap_size >=0 && s_gap_size >=0 && gap_size == s_gap_size)
756 		{
757 			for(j = 0; j<dim; ++j)
758 			{
759 				cur_start = -1;
760 				cur_strand = dsp->strands[dim*(dsp->numseg-1) + j];
761 				if(j == t_order)
762 				{
763 					if(cur_strand == Seq_strand_minus)
764 						cur_start = t_last - (ins_len + gap_size -1);
765 					else
766 						cur_start = t_last;
767 				}
768 				if(j == s_order)
769 				{
770 					if(cur_strand == Seq_strand_minus)
771 						cur_start = from;
772 					else
773 						cur_start = from - gap_size;
774 				}
775 				ValNodeAddInt(&v_starts, 0, cur_start);
776 				ValNodeAddInt(&v_strands, 0, cur_strand);
777 			}
778 			ValNodeAddInt(&v_lens, 0, (ins_len + gap_size));
779 			*seq_insert = TRUE;
780 			++cur_seg;
781 		}
782 	}
783 
784 
785 	dsp = make_dsp(v_starts, v_lens, v_strands, t_order, dim, dsp, cur_seg, merge, NULL, NULL);
786 	return TRUE;
787 }
788 
789 
store_data(Int4 val,ValNodePtr head)790 static ValNodePtr store_data(Int4 val, ValNodePtr head)
791 {
792 	ValNodeAddInt(&head, 0, val);
793 	return head;
794 }
795 
store_reverse_data(Int4 val,ValNodePtr head)796 static ValNodePtr store_reverse_data(Int4 val, ValNodePtr head)
797 {
798    ValNodePtr new_vnp;
799 
800 	new_vnp = ValNodeNew(NULL);
801 	new_vnp->data.intvalue = val;
802 
803 	new_vnp->next = head;
804 	return new_vnp;
805 }
806 
find_last_node(ValNodePtr head)807 static ValNodePtr find_last_node(ValNodePtr head)
808 {
809 	if(head != NULL)
810 	{
811 	   while(head->next != NULL)
812 	      head = head->next;
813 	   return head;
814 	}
815 	return NULL;
816 
817 }
818 
link_to_end(ValNodePtr new_vnp,ValNodePtr head)819 static ValNodePtr link_to_end(ValNodePtr new_vnp, ValNodePtr head)
820 {
821      ValNodePtr l_node;
822 
823      if(head == NULL)
824 	return new_vnp;
825       l_node = find_last_node(head);
826       l_node->next = new_vnp;
827 	return head;
828 }
829 
find_nth_node(ValNodePtr vnp,Int2 order)830 static ValNodePtr find_nth_node(ValNodePtr vnp, Int2 order)
831 {
832    Int2 i;
833 
834 	i =0;
835 	while(vnp ){
836 	   if(i == order)
837 	      return vnp;
838 	   vnp = vnp->next;
839 	   ++i;
840 	}
841 	return NULL;
842 }
843 
get_last_pos(ValNodePtr v_starts,ValNodePtr v_lens)844 static Int4 get_last_pos(ValNodePtr v_starts, ValNodePtr v_lens)
845 {
846 	ValNodePtr prev;
847 	Int4 start, pos;
848 
849 	prev = NULL;
850 	while(v_starts->next != NULL)
851 	{
852 		prev = v_starts;
853 		v_starts = v_starts->next;
854 	}
855 	if(prev == NULL)
856 		return -1;
857 	start = prev->data.intvalue;
858 	while(v_lens->next != NULL)
859 		v_lens = v_lens->next;
860 	pos = start + v_lens->data.intvalue -1;
861 	return pos;
862 }
863 
864 
865 
866 
867 /*************************************************************************
868 ***
869 *	only consider the master sequence is on plus strand
870 *	and m_order is always == 0
871 *
872 **************************************************************************
873 ***/
merge_two_seg(ValNodePtr PNTR v_starts,ValNodePtr vs_starts,ValNodePtr PNTR v_lens,ValNodePtr vs_lens,ValNodePtr PNTR v_strands,ValNodePtr vs_strands,Int4 pos,Uint1 s_strand,Int2 numseg)874 static Boolean merge_two_seg(ValNodePtr PNTR v_starts, ValNodePtr vs_starts, ValNodePtr PNTR v_lens, ValNodePtr vs_lens, ValNodePtr PNTR v_strands, ValNodePtr vs_strands, Int4 pos, Uint1 s_strand, Int2 numseg)
875 {
876       Int4 gap, last_pos;
877       ValNodePtr l_start, l_len;
878 
879       gap = (*v_starts)->data.intvalue - (pos);
880       if(gap >=0)
881       {
882 	  /*if(gap > 0)
883 	  {
884 	     *v_starts = store_reverse_data(-1, *v_starts);
885 	     *v_starts = store_reverse_data(pos, *v_starts);
886 	     *v_strands = store_reverse_data(s_strand, *v_strands);
887 	     *v_strands = store_reverse_data(1, *v_strands);
888 	     *v_lens = store_reverse_data(gap+1, *v_lens);
889 	  }*/
890 	  last_pos = get_last_pos(vs_starts, vs_lens);
891 	  if(last_pos == -1)
892 		return FALSE;
893 
894 	  gap = (*v_starts)->data.intvalue - last_pos -1;
895 	  if(gap >0)
896 		return FALSE;
897 	  *v_starts = link_to_end(*v_starts, vs_starts);
898 	  *v_strands = link_to_end(*v_strands, vs_strands);
899 	  *v_lens = link_to_end(*v_lens, vs_lens);
900 	  return TRUE;
901       }
902 
903       l_start = find_nth_node(*v_starts, 2*(numseg-1));
904       l_len= find_nth_node(*v_lens, (numseg-1));
905       last_pos = l_start->data.intvalue + l_len->data.intvalue -1;
906       gap = pos - last_pos;
907       if(gap >=0)
908       {
909 	  /*if(gap >0)
910 	  {
911 	     *v_starts = store_data(pos, *v_starts);
912 	     *v_starts = store_data(-1, *v_starts);
913 	     *v_strands = store_data(1, *v_strands);
914 	     *v_strands = store_data(s_strand, *v_strands);
915 	     *v_lens = store_data(gap+1, *v_lens);
916 	   }*/
917 	  gap = vs_starts->data.intvalue - last_pos -1;
918 	  if(gap >0)
919 		return FALSE;
920 	  *v_starts = link_to_end(vs_starts, *v_starts);
921 	  *v_strands = link_to_end(vs_strands, *v_strands);
922 	  *v_lens = link_to_end(vs_lens, *v_lens);
923 	  return TRUE;
924 	}
925 
926 	return FALSE;
927 }
928 
929 
930 /************************************************************************
931 ***
932 *	load the portion of alignment (from, to, strand) in f_dsp to
933 *	the interval where to_id points to
934 *	from_id has already been inserted into to_id, so the region
935 *	can be mapped to the interval on to_id
936 *
937 *************************************************************************
938 ***/
dsp_process(DenseSegPtr f_dsp,Int4 from,Int4 to,Uint1 strand,SeqIdPtr from_id,SeqIdPtr to_id,Int4 pos,ValNodePtr PNTR vs_starts,ValNodePtr PNTR vs_strands,ValNodePtr PNTR vs_lens)939 static Boolean dsp_process(DenseSegPtr f_dsp, Int4 from, Int4 to, Uint1 strand, SeqIdPtr from_id, SeqIdPtr to_id, Int4 pos, ValNodePtr PNTR vs_starts, ValNodePtr PNTR vs_strands, ValNodePtr PNTR vs_lens)
940 {
941    Int4Ptr starts, lens;
942    Uint1Ptr strands;
943    Uint1 i_strand;
944    SeqLocPtr m_loc;
945    Int4 m_start, m_stop, m_pos;
946    Int4 off_start, off_stop;
947    Int4 t_start, t_stop;
948    Int4 s_start, s_len;
949    Int2 m_order, i;
950    Boolean is_found;
951    BioseqPtr tbsp;
952 
953 
954    if(f_dsp == NULL)
955 	return FALSE;
956    tbsp = BioseqFind(to_id);
957    if(tbsp == NULL)
958 	return FALSE;
959    starts = f_dsp->starts;
960    lens = f_dsp->lens;
961    strands = f_dsp->strands;
962    m_loc = SeqLocIntNew(0, 0, 1, from_id);
963    m_pos = -1;
964    m_order = get_t_order(f_dsp->ids, from_id);
965    is_found = FALSE;
966    *vs_starts = NULL;
967    *vs_strands = NULL;
968    *vs_lens = NULL;
969 
970    for(i = 0; i<f_dsp->numseg; ++i)
971    {
972 	m_start = starts[2*i+m_order];
973 	if(m_start != -1){
974 	    m_stop = m_start + lens[i] -1;
975 	    m_pos = m_stop +1;
976 	}
977 	else
978 	{
979 	    m_start = m_pos;
980 	    m_stop = m_pos;
981 	}
982 	if(m_start != -1 && m_stop != -1)
983 	{
984 	    if(!(m_start > to) || (m_stop < from))
985 	    {
986 	       off_start = MAX(0, from-m_start);
987 	       off_stop = MAX(0, m_stop - to);
988 	       /*if(tbsp->repr == Seq_repr_seg)
989 	       {
990 	         m_start = MAX(m_start, from);
991 	         m_stop = MIN(m_stop, to);
992 	         update_seq_loc(m_start, m_stop, strand, m_loc);
993 	         t_start = GetOffsetInBioseq(m_loc, tbsp, SEQLOC_LEFT_END);
994 	         t_stop = GetOffsetInBioseq(m_loc, tbsp, SEQLOC_RIGHT_END);
995 	       }
996 	       else
997 	       {*/
998 		 if(strand == Seq_strand_minus)
999 		 {
1000 		    t_start = (pos-1) -(to - from) +  MAX((to-m_stop), 0);
1001 		    t_stop = pos -1 - MAX((m_start-from), 0);
1002 		 }
1003 		 else
1004 		 {
1005 		    t_start = (pos-1) - (to - from)  + MAX((m_start-from), 0);
1006 		    t_stop = pos-1 - MAX((to - m_stop), 0);
1007 		 }
1008 		/*}*/
1009 	       if(t_start != -1 && t_stop != -1)
1010 	       {
1011 	       OrderInt4(&t_start, &t_stop);
1012 	       s_start = starts[2*i +1-m_order];
1013 	       if(s_start != -1)
1014 	       {
1015 	         if(strands[2*i] == strands[2*i+1])
1016 		   s_start += off_start;
1017 		 else
1018 		   s_start += off_stop;
1019 		}
1020 		s_len = lens[i] - (off_start + off_stop);
1021 		if(s_len >0)
1022 		{
1023 		 is_found = TRUE;
1024 		 i_strand = strands[2*i+1-m_order];
1025 		 if(starts[2*i+m_order] == -1)
1026 		    t_start = -1;
1027 		 if(strand != strands[2*i+m_order]){ /*reverse the seg order*/
1028 		    i_strand = 3 - i_strand;
1029 		    *vs_starts = store_reverse_data(s_start, *vs_starts);
1030 		    *vs_starts = store_reverse_data(t_start, *vs_starts);
1031 		    *vs_strands = store_reverse_data((Int4)i_strand, *vs_strands);
1032 		    *vs_strands = store_reverse_data(1, *vs_strands);
1033 		    *vs_lens = store_reverse_data(s_len, *vs_lens);
1034 		  }
1035 		  else
1036 		  {
1037 		    *vs_starts = store_data(t_start, *vs_starts);
1038 		    *vs_starts = store_data(s_start, *vs_starts);
1039 		    *vs_strands = store_data(1, *vs_strands);
1040 		    *vs_strands = store_data((Int4)i_strand, *vs_strands);
1041 		    *vs_lens = store_data(s_len, *vs_lens);
1042 		  }
1043 		 }  /*end of if(s_len>0)*/
1044 	       }
1045 	     }
1046 	}
1047     }
1048     SeqLocFree(m_loc);
1049     return is_found;
1050 
1051 }
1052 
1053 
get_cur_seg(ValNodePtr lens)1054 static Int2 get_cur_seg(ValNodePtr lens)
1055 {
1056   Int2 i=0;
1057 
1058 	while(lens)
1059 	{
1060 	  ++i;
1061 	  lens = lens->next;
1062 	}
1063 	return i;
1064 }
1065 
1066 /************************************************************************
1067 ***
1068 *	DenseSegMerge(only for the two-dimentional pairwise alignment
1069 *	pos is the position on the merged sequence
1070 *
1071 *************************************************************************
1072 ***/
DenseSegMerge(DenseSegPtr f_dsp,SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,DenseSegPtr t_dsp)1073 static Boolean DenseSegMerge(DenseSegPtr f_dsp, SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, DenseSegPtr t_dsp)
1074 {
1075    Int2 f_order, t_order;
1076    SeqIdPtr fs_id, ts_id;
1077    Int2 i, j;
1078    ValNodePtr v_starts, v_lens, v_strands, vs_starts, vs_lens, vs_strands;
1079    Int4Ptr starts, lens;
1080    Uint1Ptr strands;
1081    Int2 cur_seg;
1082    Uint1 i_strand;
1083    BioseqPtr t_bsp;
1084 
1085    t_order = get_t_order(t_dsp->ids, to_id);
1086    if(t_order == -1)
1087 	return FALSE;
1088    f_order = get_t_order(f_dsp->ids, from_id);
1089    if(f_order == -1)
1090 	return FALSE;
1091 
1092    fs_id = get_nth_id(f_dsp->ids, 1-f_order);
1093    ts_id = get_nth_id(t_dsp->ids, 1-t_order);
1094    if(!SeqIdForSameBioseq(fs_id, ts_id))
1095 	return FALSE;
1096 
1097    vs_starts =NULL;
1098    vs_strands = NULL;
1099    vs_lens = NULL;
1100    if(!dsp_process(f_dsp, from, to, strand, from_id, to_id, pos, &vs_starts, &vs_strands, &vs_lens))		/*the  alignment is not within [from, to]*/
1101    	return TRUE;
1102    i_strand = (Uint1)(vs_strands->next->data.intvalue);
1103    t_bsp = BioseqFind(to_id);
1104    if(pos == BioseqGetLen(t_bsp))		/*insertion at the end*/
1105 	pos = pos - (to - from +1) -1;
1106 
1107    v_starts =NULL;
1108    v_strands = NULL;
1109    v_lens = NULL;
1110    starts = t_dsp->starts;
1111    lens = t_dsp->lens;
1112    strands = t_dsp->strands;
1113    for(i =0; i<t_dsp->numseg; ++i)
1114    {
1115        for(j =0; j<2; ++j)
1116        {
1117           v_starts = store_data(starts[2*i +j], v_starts);
1118 	  v_strands = store_data(strands[2*i+j], v_strands);
1119 	}
1120 	v_lens = store_data(lens[i], v_lens);
1121     }
1122 
1123     if(!merge_two_seg(&v_starts, vs_starts, &v_lens, vs_lens, &v_strands, vs_strands, pos, i_strand, t_dsp->numseg))	/*fail to brought togather*/
1124     {
1125 	ValNodeFree(v_starts);
1126 	ValNodeFree(vs_starts);
1127 	ValNodeFree(v_lens);
1128 	ValNodeFree(vs_lens);
1129 	ValNodeFree(v_strands);
1130 	ValNodeFree(vs_strands);
1131 	return FALSE;
1132     }
1133 
1134     cur_seg = get_cur_seg(v_lens);
1135     t_dsp = make_dsp(v_starts, v_lens, v_strands, t_order, 2, t_dsp, cur_seg, TRUE, to_id, fs_id);
1136     return TRUE;
1137 }
1138 
make_new_align(DenseSegPtr f_dsp,Int4 from,Int4 to,Uint1 strand,SeqIdPtr from_id,SeqIdPtr to_id,Int4 pos,SeqAlignPtr head)1139 static SeqAlignPtr make_new_align(DenseSegPtr f_dsp, Int4 from, Int4 to, Uint1 strand, SeqIdPtr from_id, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head)
1140 {
1141    ValNodePtr vs_starts, vs_strands, vs_lens;
1142    DenseSegPtr dsp;
1143    Int2 cur_seg;
1144    SeqAlignPtr align;
1145    SeqIdPtr fs_id;
1146 
1147    if(!dsp_process(f_dsp, from, to, strand, from_id, to_id, pos, &vs_starts, &vs_strands, &vs_lens))		/*the  alignment is not within [from, to]*/
1148 	return head;
1149 
1150     fs_id = f_dsp->ids->next;
1151     cur_seg = get_cur_seg(vs_lens);
1152     dsp = make_dsp(vs_starts, vs_lens, vs_strands, 0, 2, NULL, cur_seg, TRUE, to_id, fs_id);
1153     if(dsp)
1154     {
1155        align = SeqAlignNew();
1156        align->segtype = 2;
1157        align->segs = dsp;
1158        align->type =3;
1159        if(head == NULL)
1160 	  head = align;
1161 	else
1162 	  head = SeqAlignLink(align, head);
1163     }
1164     return head;
1165 }
1166 
1167 
1168 
JZSeqAlignMerge(SeqAlignPtr f_align,SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,SeqAlignPtr t_align)1169 static SeqAlignPtr JZSeqAlignMerge(SeqAlignPtr f_align, SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr t_align)
1170 {
1171    SeqAlignPtr new_salp, curr;
1172    DenseSegPtr f_dsp, t_dsp;
1173    Boolean is_found;
1174 
1175 	if(f_align == NULL)
1176 		return t_align;
1177 
1178 	new_salp = NULL;
1179 	while(f_align)
1180 	{
1181 	   if(f_align->segtype == 2)
1182 	   {
1183 	      f_dsp = (DenseSegPtr) f_align->segs;
1184 	      is_found = FALSE;
1185 	      curr = t_align;
1186 	      while(curr && !is_found)
1187 	      {
1188 		 if(curr->segtype ==2)
1189 		 {
1190 		    t_dsp = (DenseSegPtr) curr->segs;
1191 		    is_found = DenseSegMerge(f_dsp, from_id, from, to, strand, to_id, pos, t_dsp);
1192 		 }
1193 		 curr = curr->next;
1194 	       }
1195 
1196 	       if(!is_found)
1197 	          new_salp = make_new_align(f_dsp, from, to, strand, from_id, to_id, pos, new_salp);
1198 
1199 
1200 		f_align = f_align->next;
1201 	     }
1202 	}
1203 
1204 	if(new_salp != NULL){
1205 	       if(t_align == NULL)
1206 		   t_align = new_salp;
1207 	       else
1208 	           t_align = SeqAlignLink(new_salp, t_align);
1209 	}
1210 	return t_align;
1211 
1212 
1213 }
1214 
1215 
1216 
1217 /***************************************************************************
1218 ***
1219 *	AttachSeqInAlign(): for a sequence not found in the previous alignment
1220 *	data and it is used in producing the alignment, it would be attached
1221 *	as a part of the alignment node for the seq-hist of the new sequence
1222 *
1223 ******************************************************************************
1224 ***/
AttachSeqInAlign(SeqIdPtr from_id,Int4 from,Int4 to,Uint1 strand,SeqIdPtr to_id,Int4 pos,SeqAlignPtr head)1225 static SeqAlignPtr AttachSeqInAlign(SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos, SeqAlignPtr head)
1226 {
1227    DenseSegPtr dsp;
1228    ValNodePtr v_starts, v_lens, v_strands;
1229    SeqAlignPtr align;
1230 
1231    v_starts = NULL;
1232    v_lens = NULL;
1233    v_strands = NULL;
1234 
1235 
1236    ValNodeAddInt(&v_starts, 0, pos);
1237    ValNodeAddInt(&v_starts, 0, from);
1238    ValNodeAddInt(&v_strands, 0, (Int4)Seq_strand_plus);
1239    ValNodeAddInt(&v_strands, 0, (Int4)strand);
1240    ValNodeAddInt(&v_lens, 0, (to - from +1));
1241    dsp = make_dsp(v_starts, v_lens, v_strands, 0, 2, NULL, 1, TRUE, to_id, from_id);
1242    align = SeqAlignNew();
1243    align->segtype = 2;
1244    align->segs = dsp;
1245    align->type =3;
1246    if(head == NULL)
1247         head = align;
1248    else
1249 	head = SeqAlignLink(align, head);
1250    return head;
1251 
1252 }
1253 
1254 
mod_old_align(SeqAlignPtr PNTR old,SeqIdPtr sip)1255 static void mod_old_align(SeqAlignPtr PNTR old, SeqIdPtr sip)
1256 {
1257 	SeqAlignPtr curr, prev;
1258 	SeqIdPtr c_sip;
1259 
1260 	prev = NULL;
1261 	curr = *old;
1262 
1263 	while(curr)
1264 	{
1265 		c_sip = SeqAlignId(curr, 1);
1266 		if(SeqIdForSameBioseq(c_sip, sip))
1267 		{
1268 			if(prev == NULL)
1269 				*old= curr->next;
1270 			else
1271 				prev->next = curr->next;
1272 			curr->next = NULL;
1273 			SeqAlignFree(curr);
1274 			return;
1275 		}
1276 		prev = curr;
1277 		curr = curr->next;
1278 	}
1279 }
1280 
attach_new_align(SeqAlignPtr new_salp,SeqAlignPtr PNTR old)1281 static void attach_new_align(SeqAlignPtr new_salp, SeqAlignPtr PNTR old) {
1282 	SeqIdPtr sip;
1283 	SeqAlignPtr align;
1284 
1285 	if(new_salp == NULL)
1286 		return;
1287 	align = new_salp;
1288 	while(align)
1289 	{
1290 		sip = SeqAlignId(align, 1);
1291 		if(sip)
1292 			mod_old_align(old, sip);
1293 		align= align->next;
1294 	}
1295 
1296 	if(*old== NULL)
1297 		*old = new_salp;
1298 	else
1299 		(*old) = SeqAlignLink(new_salp, (*old));
1300 
1301 }
1302 
delete_bad_node(ValNodePtr PNTR head,Uint1 choice)1303 static void delete_bad_node(ValNodePtr PNTR head, Uint1 choice)
1304 {
1305 	ValNodePtr prev, next, curr;
1306 
1307 	curr = *head;
1308 	prev = NULL;
1309 
1310 	while(curr)
1311 	{
1312 		next = curr->next;
1313 		if(curr->choice != choice)
1314 		{
1315 			if(prev == NULL)
1316 				*head = next;
1317 			else
1318 				prev->next = next;
1319 			curr->next = NULL;
1320 			ValNodeFree(curr);
1321 		}
1322 		else
1323 			prev = curr;
1324 
1325 		curr = next;
1326 	}
1327 }
count_coverage(BoolPtr used_list,Int4 size)1328 static Int4 count_coverage (BoolPtr used_list, Int4 size)
1329 {
1330 	Int4 i;
1331 	Int4 count;
1332 
1333 	count = 0;
1334 	for(i = 0; i<size; ++i)
1335 	{
1336 		if(used_list[i] != 0)
1337 			++count;
1338 	}
1339 	return count;
1340 }
1341 
1342 
1343 static Int2 filter_seqid_order;
CompareChainProc(VoidPtr ptr1,VoidPtr ptr2)1344 static int LIBCALLBACK CompareChainProc(VoidPtr ptr1, VoidPtr ptr2)
1345 {
1346   ValNodePtr   vnp1;
1347   ValNodePtr   vnp2;
1348   DenseDiagPtr ddp1, ddp2;
1349   Int4 start1, stop1, start2, stop2;
1350 
1351   start1 = -1;
1352   start2 = -1;
1353   if (ptr1 != NULL && ptr2 != NULL) {
1354     vnp1 = *((ValNodePtr PNTR) ptr1);
1355     vnp2 = *((ValNodePtr PNTR) ptr2);
1356     if (vnp1 != NULL && vnp2 != NULL) {
1357       ddp1 = (DenseDiagPtr) vnp1->data.ptrvalue;
1358       ddp2 = (DenseDiagPtr) vnp2->data.ptrvalue;
1359 	  start1 = ddp1->starts[filter_seqid_order];
1360 	  stop1 = start1 + ddp1->len -1;
1361 	  start2 = ddp2->starts[filter_seqid_order];
1362 	  stop2 = start2 + ddp2->len -1;
1363       if(stop1> stop2)
1364 		return 1;
1365       else if(stop1 < stop2)
1366 		return -1;
1367       else if (stop1 == stop2)
1368       {
1369 		if(ddp1->len > ddp2->len)
1370 			return -1;
1371 		else if(ddp1->len < ddp2->len)
1372 			return 1;
1373 		else
1374 			return 0;
1375       }
1376     }
1377   }
1378   return 0;
1379 }
1380 
1381 
1382 /*
1383 *	return 0 for no overlap
1384 *	return 1 for _2 overlap too much with _1
1385 *	return 2 for _1 overlap too much with _1
1386 */
has_too_much_overlap(Int4 start_1,Int4 stop_1,Int4 start_2,Int4 stop_2)1387 static Uint1 has_too_much_overlap (Int4 start_1, Int4 stop_1, Int4 start_2, Int4 stop_2)
1388 {
1389 	Int4 m_start, m_stop;
1390 	FloatHi value;
1391 	Int4 len_1, len_2;
1392 
1393 	if(start_2 > stop_1 || stop_2 < start_1) /*no overlap */
1394 		return 0;
1395 
1396 	if(start_2 >= start_1 && stop_2 <= stop_1)	/*completely contained within*/
1397 		return 1;
1398 
1399 	if(start_1 >= start_2 && stop_1 <= start_2)
1400 		return 2;
1401 
1402 	m_start = MAX(start_1, start_2);
1403 	m_stop = MIN(stop_1, stop_2);
1404 
1405 	len_1 = stop_1 - start_1 +1 ;
1406 	len_2 = stop_2 - start_2 + 1;
1407 
1408 	value = (FloatHi)(m_stop - m_start + 1)/(FloatHi)(MIN(len_1, len_2));
1409 	if(value >= 0.7)
1410 	{
1411 		if(len_1 >= len_2)
1412 			return 1;
1413 		else
1414 			return 2;
1415 	}
1416 	else
1417 		return 0;
1418 }
1419 
1420 
1421 
convert_ddp_to_array(ValNodePtr ddp_list,Uint1 strand,Int2 s_order,Int4Ptr p_num)1422 static Int4Ptr convert_ddp_to_array (ValNodePtr ddp_list, Uint1 strand, Int2 s_order, Int4Ptr p_num)
1423 {
1424 	Int4Ptr val;
1425 	Int4 num;
1426 	ValNodePtr curr;
1427 	DenseDiagPtr ddp;
1428 	Int4 i;
1429 
1430 	num = 0;
1431 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1432 	{
1433 		++num;
1434 	}
1435 
1436 	val = (Int4Ptr) MemNew((size_t)num * sizeof(Int4));
1437 
1438 	/*if strand == minus, check the descending order of the stop.
1439 	  otherwise, check the ascending order of the start */
1440 
1441 	i = 0;
1442 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1443 	{
1444 		ddp = (DenseDiagPtr) curr->data.ptrvalue;
1445 		if(strand == Seq_strand_minus)
1446 			val[i++] = ddp->starts[1-s_order] + ddp->len -1;
1447 		else
1448 			val[i++] = ddp->starts[1-s_order];
1449 	}
1450 
1451 	*p_num = num;
1452 	return val;
1453 }
1454 
1455 
get_score_from_list(ScorePtr scores)1456 static Int4 get_score_from_list (ScorePtr scores)
1457 {
1458 	ObjectIdPtr oip;
1459 
1460 	if(scores == NULL)
1461 		return 0;
1462 	while(scores)
1463 	{
1464 		oip = scores->id;
1465 		if(oip && StringCmp(oip->str, "score") == 0)
1466 			return scores->value.intvalue;
1467 		scores = scores->next;
1468 	}
1469 
1470 	return 0;
1471 }
1472 
1473 
get_score_from_DenseDiag(DenseDiagPtr ddp)1474 static Int4 get_score_from_DenseDiag (DenseDiagPtr ddp)
1475 {
1476 	return get_score_from_list (ddp->scores);
1477 }
1478 
filter_this_seg(DenseDiagPtr curr,DenseDiagPtr PNTR h_list,Int2 order)1479 static Boolean filter_this_seg (DenseDiagPtr curr, DenseDiagPtr PNTR h_list, Int2 order)
1480 {
1481 	Int4 start, stop, t_start, t_stop;
1482 	Int4 a_start, a_stop;
1483 	Int4 score, t_score;
1484 	DenseDiagPtr prev, next, list;
1485 	Boolean found;
1486 	Int4 len;
1487 
1488 	start = curr->starts[order];
1489 	stop = start + curr->len -1;
1490 	score = get_score_from_DenseDiag (curr);
1491 	prev = NULL;
1492 	list = *h_list;
1493 	while(list)
1494 	{
1495 		next = list->next;
1496 		t_start = list->starts[order];
1497 		t_stop = t_start + list->len -1;
1498 		t_score = get_score_from_DenseDiag (list);
1499 
1500 		found = FALSE;
1501 		if(!(t_start > stop || t_stop < start))
1502 		{
1503 			a_start = MAX(t_start, start);
1504 			a_stop = MIN(t_stop, stop);
1505 			len = MAX(t_stop, stop) - MIN(t_start, start);
1506 			if((FloatHi)(a_stop - a_start)/(FloatHi)len > 0.80)
1507 			{
1508 				if(score > t_score)
1509 				{
1510 					if(prev == NULL)
1511 						*h_list = next;
1512 					else
1513 						prev->next = next;
1514 					list->next = NULL;
1515 					DenseDiagFree(list);
1516 					found = TRUE;
1517 				}
1518 			}
1519 		}
1520 
1521 		if(!found)
1522 			prev = list;
1523 		list = next;
1524 	}
1525 
1526 	list = *h_list;
1527 	while(list)
1528 	{
1529 		t_start = list->starts[order];
1530 		t_stop = t_start+ list->len -1;
1531 		t_score = get_score_from_DenseDiag (list);
1532 
1533 		if(!(t_start > stop || t_stop < start))
1534 		{
1535 			a_start = MAX(t_start, start);
1536 			a_stop = MIN(t_stop, stop);
1537 			if((FloatHi)(a_stop - a_start)/(FloatHi)(stop - start) > 0.50)
1538 			{
1539 				if(score < t_score)
1540 					return TRUE;
1541 			}
1542 		}
1543 		list = list->next;
1544 	}
1545 
1546 	return FALSE;
1547 }
1548 
1549 
is_same_orientation(Uint1 strand_1,Uint1 strand_2)1550 static Boolean is_same_orientation(Uint1 strand_1, Uint1 strand_2)
1551 {
1552 	if(strand_1 == strand_2)
1553 		return TRUE;
1554 	if(strand_1 == Seq_strand_minus)
1555 		return (strand_2 == Seq_strand_minus);
1556 	else
1557 		return (strand_2 != Seq_strand_minus);
1558 }
1559 
filter_wrong_orientation(SeqAlignPtr align,Boolean same_orient)1560 static void filter_wrong_orientation (SeqAlignPtr align, Boolean same_orient)
1561 {
1562 	DenseDiagPtr ddp, prev, next;
1563 	Boolean del;
1564 
1565 	ddp = (DenseDiagPtr) align->segs;
1566 	prev = NULL;
1567 
1568 	while(ddp)
1569 	{
1570 		next = ddp->next;
1571 		del = (is_same_orientation(ddp->strands[0], ddp->strands[1]) != same_orient);
1572 		if(del)
1573 		{
1574 			if(prev == NULL)
1575 				align->segs = next;
1576 			else
1577 				prev->next = next;
1578 			ddp->next = NULL;
1579 			DenseDiagFree(ddp);
1580 		}
1581 		else
1582 			prev = ddp;
1583 		ddp = next;
1584 	}
1585 }
1586 
1587 
get_overlap(BoolPtr status,Int4 from,Int4 len,Int4Ptr poverlap)1588 static FloatHi get_overlap(BoolPtr status, Int4 from, Int4 len, Int4Ptr poverlap)
1589 {
1590 	Int4 i;
1591 	Int4 overlap_len;
1592 
1593 	overlap_len = 0;
1594 	for(i = 0; i<len; ++i)
1595 	{
1596 		if(status[i+from])
1597 			++overlap_len;
1598 	}
1599 
1600 	*poverlap = overlap_len;
1601 	return (FloatHi)overlap_len/(FloatHi)len;
1602 }
1603 
1604 
is_overlap(BoolPtr status,Int4 from,Int4 len)1605 static Boolean is_overlap(BoolPtr status, Int4 from, Int4 len)
1606 {
1607 	Int4 i;
1608 	Int4 overlap_len;
1609 
1610 	overlap_len = 0;
1611 	for(i = 0; i<len; ++i)
1612 	{
1613 		if(status[i+from])
1614 			++overlap_len;
1615 	}
1616 
1617 	if((FloatHi)overlap_len/(FloatHi)len> 0.7)
1618 		return TRUE;
1619 	else
1620 	{
1621 		MemSet(status+from, 1, (size_t)(len) * sizeof(Boolean));
1622 		return FALSE;
1623 	}
1624 }
1625 
1626 
set_ddp_status(ValNodePtr ddp_list,Int4 first_len,Int4 second_len)1627 static void set_ddp_status(ValNodePtr ddp_list, Int4 first_len, Int4 second_len)
1628 {
1629 	BoolPtr first_status, second_status;
1630 	DenseDiagPtr ddp;
1631 	FloatHi val_1, val_2;
1632 	Int4 overlap_1, overlap_2;
1633 
1634 	first_status = (BoolPtr) MemNew((size_t)first_len * sizeof(Boolean));
1635 	second_status = (BoolPtr) MemNew((size_t)second_len * sizeof(Boolean));
1636 
1637 	while(ddp_list)
1638 	{
1639 		ddp = (DenseDiagPtr) ddp_list->data.ptrvalue;
1640 		val_1 = get_overlap(first_status, ddp->starts[0], ddp->len, &overlap_1);
1641 		val_2 = get_overlap(second_status, ddp->starts[1], ddp->len, &overlap_2);
1642 
1643 		if(val_1 >= 0.95 || val_2 >=0.95
1644 			|| (ddp->len - MAX(overlap_1, overlap_2) < MIN(50, ddp->len-10))
1645 			|| (val_1 > 0.7 && val_2 >0.7))
1646 		{
1647 			/* if(ddp->starts[0] == 35)
1648 				printf("val_1 = %lf, val_2 = %lf, overlap_1 = %ld, overlap_2 = %ld, len = %ld\n", val_1, val_2, overlap_1, overlap_2, ddp->len);
1649 			*/
1650 			ddp_list->choice = 0;
1651 		}
1652 		else
1653 		{
1654 			MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
1655 			MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
1656 			ddp_list->choice = 1;
1657 		}
1658 		ddp_list = ddp_list->next;
1659 	}
1660 
1661 	MemFree(first_status);
1662 	MemFree(second_status);
1663 }
1664 
free_diag_in_list(ValNodePtr ddp_list,DenseDiagPtr ddp)1665 static Boolean free_diag_in_list(ValNodePtr ddp_list, DenseDiagPtr ddp)
1666 {
1667 	DenseDiagPtr c_ddp;
1668 
1669 	while(ddp_list)
1670 	{
1671 		c_ddp = (DenseDiagPtr) ddp_list->data.ptrvalue;
1672 		if(c_ddp == ddp)
1673 			return (ddp_list->choice == 1);
1674 		ddp_list = ddp_list->next;
1675 	}
1676 
1677 	return FALSE;
1678 }
1679 
filter_repeats(ValNodePtr PNTR ddp_list,Int4 first_len,Int4 second_len)1680 static void filter_repeats (ValNodePtr PNTR ddp_list, Int4 first_len, Int4 second_len)
1681 {
1682 
1683 	set_ddp_status(*ddp_list, first_len, second_len);
1684 	delete_bad_node(ddp_list, 1);
1685 }
1686 
get_process_list(ValNodePtr ddp_list)1687 static Int4Ptr get_process_list (ValNodePtr ddp_list)
1688 {
1689 	Int4 num, i, j;
1690 	ValNodePtr curr;
1691 	Int4Ptr order, score_list;
1692 	DenseDiagPtr ddp;
1693 	Boolean change = TRUE;
1694 	Int4 temp;
1695 
1696 	num = 0;
1697 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1698 		++num;
1699 
1700 	score_list = (Int4Ptr) MemNew((size_t)num * sizeof(ValNodePtr));
1701 	order = (Int4Ptr) MemNew((size_t)num * sizeof(Int4));
1702 
1703 	i = 0;
1704 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1705 	{
1706 		ddp = (DenseDiagPtr) curr->data.ptrvalue;
1707 		score_list[i] = get_score_from_DenseDiag (ddp);
1708 		order[i] = i;
1709 		++i;
1710 	}
1711 
1712 	for(i = 0; i<num-1 && change; ++i)
1713 	{
1714 		change = FALSE;
1715 		for(j = 0; j<num-i-1; ++j)
1716 		{
1717 			if(score_list[j+1] > score_list[j])
1718 			{
1719 
1720 				change =TRUE;
1721 				temp = score_list[j];
1722 				score_list[j] = score_list[j+1];
1723 				score_list[j+1] = temp;
1724 
1725 				temp = order[j];
1726 				order[j] = order[j+1];
1727 				order[j+1] = temp;
1728 			}
1729 		}
1730 	}
1731 
1732 	MemFree(score_list);
1733 	return order;
1734 }
1735 
1736 
get_nth_node(ValNodePtr vnp,Int4 order)1737 static ValNodePtr get_nth_node(ValNodePtr vnp, Int4 order)
1738 {
1739 	Int4 i;
1740 
1741 	i = 0;
1742 	while(vnp)
1743 	{
1744 		if(i == order)
1745 			return vnp;
1746 		++i;
1747 		vnp = vnp->next;
1748 	}
1749 
1750 	return NULL;
1751 }
1752 
get_alignment_orientation(ValNodePtr PNTR pddp_list,BioseqPtr bsp,Int2 s_order)1753 static Uint1 get_alignment_orientation(ValNodePtr PNTR pddp_list, BioseqPtr bsp, Int2 s_order)
1754 {
1755 	Int4 num;
1756 	Int4Ptr score_order;
1757 	ValNodePtr curr;
1758 	DenseDiagPtr ddp;
1759 	BoolPtr plus_used_list, minus_used_list;
1760 	Int4 plus_count, minus_count;
1761 	Int4 i;
1762 	ValNodePtr list, ddp_list;
1763 	Uint1 strand, t_strand;
1764 
1765 	ddp_list = *pddp_list;
1766 	num = 0;
1767 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1768 		++num;
1769 
1770 	score_order = get_process_list (ddp_list);
1771 	plus_used_list = (BoolPtr) MemNew((size_t)bsp->length * sizeof(Boolean));
1772 	minus_used_list = (BoolPtr) MemNew((size_t)bsp->length * sizeof(Boolean));
1773 
1774 
1775 
1776 	for(i = 0; i<MIN(num, 10); ++i)
1777 	{
1778 		curr = get_nth_node(ddp_list, score_order[i]);
1779 		if(curr != NULL)
1780 		{
1781 			ddp = (DenseDiagPtr) curr->data.ptrvalue;
1782 			if(ddp->strands[0] != ddp->strands[1] &&
1783 				(ddp->strands[0] == Seq_strand_minus ||
1784 					ddp->strands[1] == Seq_strand_minus))
1785 				MemSet(minus_used_list+ddp->starts[s_order], 1,
1786                         	(size_t)(ddp->len) * sizeof(Boolean));
1787 			else
1788 				MemSet(plus_used_list+ddp->starts[s_order], 1,
1789                         	(size_t)(ddp->len) * sizeof(Boolean));
1790 		}
1791 	}
1792 
1793 	plus_count = count_coverage (plus_used_list, bsp->length);
1794 	minus_count = count_coverage (minus_used_list, bsp->length);
1795 	MemFree(plus_used_list);
1796 	MemFree(minus_used_list);
1797 
1798 	if(plus_count == 0 && minus_count == 0)
1799 	{
1800 		MemFree(score_order);
1801 		return 0;
1802 	}
1803 	else if(plus_count > minus_count)
1804 		strand = Seq_strand_plus;
1805 	else
1806 		strand = Seq_strand_minus;
1807 
1808 	/*only load the ddp_list with the right orientation*/
1809 	list = NULL;
1810 	for(i = 0; i<num; ++i)
1811 	{
1812 		curr = get_nth_node(ddp_list, score_order[i]);
1813 		ddp = (DenseDiagPtr) curr->data.ptrvalue;
1814 		if(ddp->strands[0] != ddp->strands[1] &&
1815 			(ddp->strands[0] == Seq_strand_minus ||
1816 				ddp->strands[1] == Seq_strand_minus))
1817 			t_strand = Seq_strand_minus;
1818 		else
1819 			t_strand = Seq_strand_plus;
1820 		if(t_strand == strand)
1821 			ValNodeAddPointer(&list, 0, ddp);
1822 	}
1823 	ValNodeFree(*pddp_list);
1824 	*pddp_list = list;
1825 	MemFree(score_order);
1826 	return strand;
1827 
1828 
1829 }
1830 
1831 
1832 
1833 /*
1834 *	check to see if ddp falls in the order of the existing list
1835 *
1836 */
load_ddp_in_order(ValNodePtr PNTR h_list,DenseDiagPtr ddp,Boolean reverse,Boolean add)1837 static Boolean load_ddp_in_order(ValNodePtr PNTR h_list, DenseDiagPtr ddp, Boolean reverse, Boolean add)
1838 {
1839 	Int4 m_stop, s_stop;
1840 	Int4 mt_stop, st_stop;
1841 	ValNodePtr curr, prev;
1842 	DenseDiagPtr t_ddp;
1843 	Boolean load;
1844 	ValNodePtr vnp;
1845 	Int4 i;
1846 
1847 	m_stop = ddp->starts[0] + ddp->len -1;
1848 	s_stop = ddp->starts[1] + ddp->len -1;
1849 
1850 	for(curr = *h_list; curr != NULL; curr = curr->next)
1851 	{
1852 		t_ddp = (DenseDiagPtr) curr->data.ptrvalue;
1853 		if(ddp->len <= t_ddp->len)
1854 		{
1855 			for(i = 0; i<2; ++i)
1856 			{
1857 				if(ddp->starts[i] >= t_ddp->starts[i])
1858 				{
1859 					if(t_ddp->starts[i] + t_ddp->len >= ddp->starts[i] + ddp->len)
1860 						return FALSE;
1861 				}
1862 			}
1863 		}
1864 	}
1865 	prev = NULL;
1866 	curr = *h_list;
1867 	while(curr)
1868 	{
1869 		t_ddp = (DenseDiagPtr) curr->data.ptrvalue;
1870 		mt_stop = t_ddp->starts[0] + t_ddp->len -1;
1871 		st_stop = t_ddp->starts[1] + t_ddp->len -1;
1872 		if(m_stop <= mt_stop)
1873 		{
1874 			if(reverse)
1875 				load = (s_stop >= st_stop);
1876 			else
1877 				load = (s_stop <= st_stop);
1878 			/*check with the previous order*/
1879 			if(load)
1880 			{
1881 				if(prev != NULL)
1882 				{
1883 					t_ddp = (DenseDiagPtr) prev->data.ptrvalue;
1884 					mt_stop = t_ddp->starts[0] + t_ddp->len -1;
1885 					st_stop = t_ddp->starts[1] + t_ddp->len -1;
1886 
1887 					if(m_stop < mt_stop)
1888 						load = FALSE;
1889 					else
1890 					{
1891 						if(reverse)
1892 							load = (s_stop <= st_stop);
1893 						else
1894 							load = (s_stop >= st_stop);
1895 					}
1896 				}
1897 			}
1898 			if(load)
1899 			{
1900 				if(add)
1901 				{
1902 					vnp = ValNodeNew(NULL);
1903 					vnp->choice = 1;
1904 					vnp->data.ptrvalue = ddp;
1905 					if(prev == NULL)
1906 						*h_list = vnp;
1907 					else
1908 						prev->next = vnp;
1909 					vnp->next = curr;
1910 				}
1911 				return TRUE;
1912 			}
1913 			else
1914 				return FALSE;
1915 		}
1916 		prev = curr;
1917 		curr = curr->next;
1918 	}
1919 
1920 	if(prev != NULL)
1921 	{
1922 		if(reverse)
1923 			load = (s_stop <= st_stop);
1924 		else
1925 			load = (s_stop >= st_stop);
1926 
1927 		if(load)
1928 		{
1929 			vnp = ValNodeNew(NULL);
1930 			vnp->choice = 1;
1931 			vnp->data.ptrvalue = ddp;
1932 			prev->next = vnp;
1933 			return TRUE;
1934 		}
1935 	}
1936 
1937 	return FALSE;
1938 }
1939 
1940 
1941 
load_ddp_scores(ValNodePtr ddp_list,Int4Ptr num)1942 static Int4Ptr load_ddp_scores(ValNodePtr ddp_list, Int4Ptr num)
1943 {
1944 	DenseDiagPtr ddp;
1945 	Int4 i;
1946 	Int4Ptr score_list;
1947 	ValNodePtr curr;
1948 
1949 	i = 0;
1950 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1951 		++i;
1952 	if(i == 0)
1953 		return NULL;
1954 	*num = i;
1955 	score_list = (Int4Ptr) MemNew((size_t)i * sizeof(Int4));
1956 	i = 0;
1957 	for(curr = ddp_list; curr != NULL; curr = curr->next)
1958 	{
1959 		ddp = (DenseDiagPtr) curr->data.ptrvalue;
1960 		score_list[i] = get_score_from_list (ddp->scores);
1961 		++i;
1962 	}
1963 	return score_list;
1964 }
1965 
1966 
is_ddp_overlap(DenseDiagPtr ddp,BoolPtr first_status,BoolPtr second_status)1967 static Boolean is_ddp_overlap(DenseDiagPtr ddp, BoolPtr first_status, BoolPtr second_status)
1968 {
1969 	FloatHi val_1, val_2;
1970 	Int4 overlap_1, overlap_2;
1971 
1972 	val_1 = get_overlap(first_status, ddp->starts[0], ddp->len, &overlap_1);
1973 	val_2 = get_overlap(second_status, ddp->starts[1], ddp->len, &overlap_2);
1974 
1975 	if(val_1 >=0.95 || val_2 >=0.95 || (val_1 > 0.7 && val_2 >0.7))
1976 		return TRUE;
1977 
1978 	/*maximum non-overlap region is less than 50-bp*/
1979 	if(ddp->len - MAX(overlap_1, overlap_2) < MIN(50, ddp->len-10))
1980 		return TRUE;
1981 	return FALSE;
1982 }
1983 
1984 
1985 /*
1986 * lns.c - Given a sequence of numbers, this program computes
1987 *	  the Longest Nondecreasing Subsequence.
1988 *
1989 * Feb. 20, 1997
1990 *
1991 * Program by Jinghui Zhang & Kun-Mao Chao
1992 * Toolkit version : H. Sicotte, 1/04/1999
1993 */
1994 
1995 /* Input :
1996     call lns()
1997 	S : a sequence of numbers;
1998 	len_s : the length of the sequences;
1999    Output:
2000 	LNS : the positions of the Longest Nondecreasing Subsequence;
2001 	return value : the length of the LNS;
2002    Note: "-1" entries are allowed.
2003 */
2004 
2005 /* Input :
2006         SS : a sequence of numbers; (without "-1" entries)
2007         len_ss : the length of the sequences;
2008    Output:
2009         LNS : the positions of the Longest Nondecreasing Subsequence;
2010         return value : the length of the LNS;
2011 */
pure_lns(Int4Ptr SS,Int4 len_ss,Int4Ptr LNS)2012 static Int4 pure_lns(Int4Ptr SS, Int4 len_ss, Int4Ptr LNS)
2013 {
2014 	Int4 len_lns;
2015         Int4Ptr PREV=NULL,BEST=NULL,BEST_POS=NULL;
2016 	Int4 i, low, high, mid;
2017 	Int4 t;
2018 
2019 	len_lns = 1;
2020         if(len_ss) {
2021             PREV = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
2022             BEST = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
2023             BEST_POS = (Int4Ptr) Malloc(sizeof(Int4)*len_ss);
2024         } else
2025             return 0;
2026 	BEST[0] = SS[0];
2027 	BEST_POS[0] = 0;
2028 	PREV[0] = -1;
2029 	for (i=1; i<len_ss; ++i) {
2030 	   t = SS[i];
2031 	   low = 0;
2032 	   high = len_lns-1;
2033 	   while (low < high) {
2034 		mid = (low + high) / 2;
2035 		if (t >= BEST[mid]) low = mid + 1;
2036 		else high = mid;
2037 	   }
2038 	   if (t < BEST[low]) {
2039 		if (low == 0) PREV[i] = -1;
2040 		else PREV[i] = BEST_POS[low-1];
2041 		BEST[low] = t;
2042 		BEST_POS[low] = i;
2043 	   } else {
2044 		BEST[len_lns] = t;
2045 		BEST_POS[len_lns] = i;
2046 		PREV[i] = BEST_POS[len_lns-1];
2047 		++len_lns;
2048 	   }
2049 	}
2050 
2051 	/* trace back */
2052 
2053 	t = BEST_POS[len_lns-1];
2054 	for (i=len_lns-1; i>=0; --i) {
2055 		LNS[i] = t;
2056 		t = PREV[t];
2057 	}
2058 
2059         if(PREV)
2060             Free(PREV);
2061         if(BEST)
2062             Free(BEST);
2063         if(BEST_POS)
2064             Free(BEST_POS);
2065 	return len_lns;
2066 }
lns(Int4Ptr S,Int4 len_s,Int4Ptr LNS)2067 static Int4 lns(Int4Ptr S, Int4 len_s, Int4Ptr LNS)
2068 {
2069 	Int4Ptr SS=NULL;
2070 	Int4 len_ss, len_lns;
2071 	Int4 i, j;
2072         SS=(Int4Ptr)Malloc(len_s*sizeof(Int4));
2073 	/* screen out those "-1" entries */
2074 	len_ss = 0;
2075 	for (i=0; i<len_s; ++i)
2076 	   if (S[i] >= 0)
2077 		SS[len_ss++] = S[i];
2078 
2079 	len_lns = pure_lns(SS, len_ss, LNS);
2080 
2081 	/* add the offset */
2082 	for (i=0, j=0; j<len_lns; ++i)
2083 	   if (S[i] == SS[LNS[j]]) LNS[j++] = i;
2084 
2085         if(SS)
2086             Free(SS);
2087 	return len_lns;
2088 }
2089 
2090 
2091 
2092 
sort_list_order(ValNodePtr PNTR ph_list,Uint1 strand)2093 static void sort_list_order(ValNodePtr PNTR ph_list, Uint1 strand)
2094 {
2095 	ValNodePtr h_list;
2096 	ValNodePtr curr;
2097 	DenseDiagPtr ddp;
2098 	Int4 val;
2099 	ValNodePtr best_list = NULL;
2100 
2101 
2102 	h_list = *ph_list;
2103 	if(h_list == NULL || h_list->next == NULL)
2104 		return;
2105 	ddp = (DenseDiagPtr) h_list->data.ptrvalue;
2106 	ValNodeAddPointer(&best_list, 0, ddp);
2107 	for(curr = h_list->next; curr != NULL; curr = curr->next)
2108 	{
2109 		ddp = (DenseDiagPtr) curr->data.ptrvalue;
2110 		load_ddp_in_order(&best_list, ddp, (Boolean) (strand==Seq_strand_minus), TRUE);
2111 	}
2112 
2113 
2114 	filter_seqid_order = 0;
2115 	h_list = ValNodeSort(h_list, CompareChainProc);
2116         {
2117             Int4 len_S=0,len_lns=0,rlen_lns=0;
2118             Int4Ptr S=NULL,rS=NULL,LNS=NULL,rLNS=NULL,this_LNS;
2119             Int4 i,j;
2120             len_S = ValNodeLen(h_list);
2121             if(len_S>0) {
2122                 S=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2123                 rS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2124                 LNS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2125                 rLNS=(Int4Ptr)Malloc(len_S*sizeof(Int4));
2126                 for(curr = h_list,i=0; curr != NULL; curr = curr->next)
2127                     {
2128                         ddp = (DenseDiagPtr) curr->data.ptrvalue;
2129                         if(strand == Seq_strand_minus)
2130                             S[i++] = ddp->starts[1];
2131                         else
2132                             S[i++] = ddp->starts[1] + ddp->len-1;
2133                     }
2134                 for(i=len_S-1,j=0;i>=0;i--,j++)
2135                     rS[j]=S[i];
2136                 len_lns = lns(S, len_S, LNS);
2137                 rlen_lns = lns(rS, len_S, rLNS);
2138             }
2139             if (len_lns <= 0 || rlen_lns <=0) {
2140                 val=-1;
2141                 Message(MSG_ERROR, "Fail in get_lns");
2142                 if(len_S) {
2143                     Free(S);
2144                     Free(rS);
2145                     Free(LNS);
2146                     Free(rLNS);
2147                 }
2148                 exit(2);
2149             } else if(rlen_lns > len_lns) {
2150                 val = 1; /* usr rLNS */
2151                 len_lns=rlen_lns;
2152                 this_LNS = rLNS;
2153             } else {
2154                 val =0; /* use LNS */
2155                 this_LNS = LNS;
2156             }
2157                 /* inconsistent orientation from LNS */
2158             if((strand == Seq_strand_minus && val == 0) ||
2159                (strand != Seq_strand_minus && val== 1))
2160                 {
2161                     ValNodeFree(h_list);
2162                     *ph_list = best_list;
2163                     if(len_S) {
2164                         Free(S);
2165                         Free(rS);
2166                         Free(LNS);
2167                         Free(rLNS);
2168                     }
2169                     return;
2170                 }
2171 
2172             ValNodeFree(best_list);
2173             for(i=0;i<len_lns;i++) {
2174                 val = this_LNS[i];
2175                 if(strand == Seq_strand_minus)
2176                     val =len_S -1 - val;
2177                 curr = get_nth_node(h_list, val);
2178                 if(curr != NULL)
2179                     curr->choice = 1;
2180             }
2181             if(len_S) {
2182                 Free(S);
2183                 Free(rS);
2184                 Free(LNS);
2185                 Free(rLNS);
2186             }
2187 
2188         }
2189 #if 0
2190         {
2191             FILE *tmp_fp;
2192             tmp_fp = FileOpen("Ins.test", "w");
2193             for(curr = h_list; curr != NULL; curr = curr->next)
2194                 {
2195                     ddp = curr->data.ptrvalue;
2196                     if(strand == Seq_strand_minus)
2197                         fprintf(tmp_fp, "%ld\t", ddp->starts[1]);
2198                     else
2199                         fprintf(tmp_fp, "%ld\t", ddp->starts[1]+ddp->len-1);
2200                 }
2201             fprintf(tmp_fp, "\n");
2202             FileClose(tmp_fp);
2203             system("get_lns < Ins.test >out");
2204 
2205             tmp_fp = FileOpen("out", "r");
2206             fscanf(tmp_fp, "%ld\n", &val);
2207             if(val == -1)
2208                 {
2209                     Message(MSG_ERROR, "Fail in get_lns");
2210                     exit(1);
2211                 }
2212                 /* inconsistent orientation from LNS */
2213             if((strand == Seq_strand_minus && val == 0) ||
2214                (strand != Seq_strand_minus && val== 1))
2215                 {
2216                     FileClose(tmp_fp);
2217                     ValNodeFree(h_list);
2218                     *ph_list = best_list;
2219                     return;
2220                 }
2221 
2222 
2223             ValNodeFree(best_list);
2224             num = ValNodeLen(h_list);
2225             while(fscanf(tmp_fp, "%ld\n", &val) != EOF)
2226                 {
2227                     if(strand == Seq_strand_minus)
2228 			val = num -1 - val;
2229                     curr = get_nth_node(h_list, val);
2230                     if(curr != NULL)
2231 			curr->choice = 1;
2232                 }
2233             FileClose(tmp_fp);
2234         }
2235 #endif
2236 	delete_bad_node(&h_list, 1);
2237 
2238 	if(h_list == NULL)
2239 	{
2240 		Message(MSG_ERROR, "No good node in sort_list_order");
2241 		exit(1);
2242 	}
2243 
2244 	*ph_list = h_list;
2245 }
2246 
2247 
2248 
check_ddp_order(ValNodePtr ddp_list,Uint1 strand,Int4 first_len,Int4 second_len)2249 static ValNodePtr check_ddp_order(ValNodePtr ddp_list, Uint1 strand, Int4 first_len, Int4 second_len)
2250 {
2251 	DenseDiagPtr ddp;
2252 	ValNodePtr h_list = NULL, t_list;
2253 	ValNodePtr curr;
2254 	Int4 num, i;
2255 	Boolean reverse;
2256 	BoolPtr first_status, second_status;
2257 	Int4 p_score;
2258 	Int4Ptr score_list;
2259 	Boolean is_end;
2260 
2261 
2262 	if(ddp_list == NULL || ddp_list->next == NULL)
2263 		return ddp_list;
2264 	score_list = load_ddp_scores(ddp_list, &num);
2265 	if(score_list == NULL)
2266 		return ddp_list;
2267 
2268 	for(curr = ddp_list; curr != NULL; curr = curr->next)
2269 		curr->choice = 0;
2270 	p_score = score_list[0];
2271 	first_status = (BoolPtr) MemNew((size_t)first_len * sizeof(Boolean));
2272 	second_status = (BoolPtr) MemNew((size_t)second_len * sizeof(Boolean));
2273 
2274 	reverse = (strand == Seq_strand_minus);
2275 	h_list = NULL;
2276 	is_end = FALSE;
2277 	while(!is_end)
2278 	{
2279 		t_list = NULL;
2280 		for(i= 0, curr = ddp_list; curr != NULL; curr = curr->next)
2281 		{
2282  			if(p_score/(score_list[i]) <=2)
2283 			{
2284 				if(curr->choice == 0)
2285 				{
2286 					curr->choice = 1;
2287 					ddp = (DenseDiagPtr) curr->data.ptrvalue;
2288 					if(h_list == NULL ||
2289 						load_ddp_in_order(&h_list, ddp, reverse, FALSE))
2290 					{
2291 						if(!is_ddp_overlap(ddp, first_status, second_status))
2292 							ValNodeAddPointer(&t_list, 0, ddp);
2293 					}
2294 				}
2295 				if(curr->next == NULL)
2296 				{
2297 					p_score = score_list[i];
2298 					is_end = TRUE;
2299 					break;
2300 				}
2301 			}
2302 			else
2303 			{
2304 				p_score = score_list[i];
2305 				break;
2306 			}
2307 			++i;
2308 		}
2309 		if(t_list != NULL)
2310 		{
2311 			sort_list_order(&t_list, strand);
2312 			if(h_list == NULL)
2313 			{
2314 				h_list = t_list;
2315 				for(curr = t_list; curr != NULL; curr = curr->next)
2316 				{
2317 					ddp = (DenseDiagPtr) curr->data.ptrvalue;
2318 					MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
2319 					MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
2320 				}
2321 			}
2322 			else
2323 			{
2324 				for(curr = t_list; curr != NULL; curr = curr->next)
2325 				{
2326 					ddp = (DenseDiagPtr) curr->data.ptrvalue;
2327 					if(load_ddp_in_order(&h_list, ddp, reverse, TRUE))
2328 					{
2329 						MemSet(first_status+ddp->starts[0], 1, (size_t)(ddp->len) * sizeof(Boolean));
2330 						MemSet(second_status+ddp->starts[1], 1, (size_t)(ddp->len) * sizeof(Boolean));
2331 					}
2332 				}
2333 				ValNodeFree(t_list);
2334 			}
2335 		}
2336 		if(is_end)
2337 			break;
2338 	}
2339 
2340 	if(h_list == NULL)
2341 	{
2342 		Message(MSG_ERROR, "No good node in check_ddp_order");
2343 		exit(1);
2344 	}
2345 
2346 	MemFree(score_list);
2347 	MemFree(first_status);
2348 	MemFree(second_status);
2349 	ValNodeFree(ddp_list);
2350 	return h_list;
2351 
2352 
2353 }
2354 
2355 
2356 
2357 /*
2358 *
2359 *	assume the align is the alignment computed from BLAST. Get rid of
2360 *	all the smaller segments that are out of the main diagnol. the ValNodePtr
2361 *	returns a list of dense-diags that are on the main diagnol. s_sip specifies
2362 *	the sequence that will have non-redundant coverage
2363 */
FilterSeqAlign(SeqAlignPtr align,SeqIdPtr s_sip,Uint1Ptr p_strand)2364 NLM_EXTERN ValNodePtr FilterSeqAlign(SeqAlignPtr align, SeqIdPtr s_sip, Uint1Ptr p_strand)
2365 {
2366 	Int2 order = 0, s_order;
2367 	DenseDiagPtr ddp;
2368 	SeqIdPtr sip;
2369 	Uint1 strand;
2370 	BioseqPtr bsp;
2371 	ValNodePtr ddp_list;
2372 	BioseqPtr first_bsp, second_bsp;
2373 
2374 	if(s_sip == NULL || align == NULL)
2375 		return NULL;
2376 	if(align->segtype != 1)
2377 		return NULL;
2378 	bsp = BioseqFind(s_sip);
2379 	if(bsp == NULL)
2380 	{
2381 		ErrPostEx(SEV_WARNING, 0, 0, "Fail to find Bioseq for s_sip");
2382 		return NULL;
2383 	}
2384 
2385 	ddp_list = NULL;
2386 	s_order = -1;
2387 	for(ddp = (DenseDiagPtr) align->segs; ddp != NULL; ddp = ddp->next)
2388 	{
2389 		ValNodeAddPointer(&ddp_list, 0, ddp);
2390 		if(s_order == -1)
2391 		{
2392 			for(sip =  ddp->id, order = 0; sip != NULL; sip = sip->next, ++order)
2393 			{
2394 				if(SeqIdMatch(s_sip, sip))
2395 				{
2396 					s_order = order;
2397 				}
2398 			}
2399 		}
2400 	}
2401 	if(s_order == -1)
2402 	{
2403 		printf("error in finding the right order\n");
2404 		exit(1);
2405 	}
2406 
2407 	/*ddp_list only contains the alignment with the right orientation*/
2408 	/* it also has been sorted according to the order of the scores*/
2409 	strand = get_alignment_orientation(&ddp_list, bsp, s_order);
2410 
2411 	if(strand == 0)
2412 		return NULL;
2413 
2414 	ddp = (DenseDiagPtr) align->segs;
2415 	first_bsp = BioseqFind(ddp->id);
2416 	second_bsp = BioseqFind(ddp->id->next);
2417 	/* filter_wrong_orientation (align, strand == Seq_strand_plus); */
2418 	filter_repeats(&ddp_list, first_bsp->length, second_bsp->length);
2419 
2420 	if(ddp_list == NULL)
2421 	{
2422 		Message(MSG_ERROR, "Nothing leftover after filter_repeats");
2423 		exit(1);
2424 	}
2425 	*p_strand = strand;
2426 	return check_ddp_order(ddp_list, strand, first_bsp->length, second_bsp->length);
2427 
2428 }
2429 
get_dsp_align_len(DenseSegPtr dsp)2430 static Int4 get_dsp_align_len(DenseSegPtr dsp)
2431 {
2432 	Int2 i;
2433 	Int4 len = 0;
2434 
2435 	for(i = 0; i<dsp->numseg; ++i)
2436 	{
2437 		if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
2438 			len += dsp->lens[i];
2439 	}
2440 
2441 	return len;
2442 }
2443 
2444 /*
2445 *
2446 *	convert the Dense-seg to Dense-diag
2447 *	take the end point of aligned segment and make an alignment of
2448 *	Dense-diag
2449 *
2450 */
2451 
make_seg_score(Int4 align_len,Int4 seg_len,Int4 total_score)2452 static ScorePtr make_seg_score(Int4 align_len, Int4 seg_len, Int4 total_score)
2453 {
2454 	Int4 score;
2455 	ScorePtr sp;
2456 	ObjectIdPtr oip;
2457 
2458 	/* score = (seg_len * total_score)/align_len; */
2459 	score = total_score;
2460 	sp = ScoreNew();
2461 	oip = ObjectIdNew();
2462 	oip->str = StringSave("score");
2463 	sp->id = oip;
2464 	sp->choice = 1;
2465 	sp->value.intvalue = score;
2466 	return sp;
2467 }
2468 
2469 
SeqAlignConvertDspToDdp(SeqAlignPtr align,DenseDiagPtr PNTR h_ddp)2470 static DenseDiagPtr SeqAlignConvertDspToDdp(SeqAlignPtr align, DenseDiagPtr PNTR h_ddp)
2471 {
2472 	DenseSegPtr dsp;
2473 	DenseDiagPtr ddp, prev;
2474 	Int2 i;
2475 	Int4 score;
2476 	Int4 align_len;
2477 
2478 	if(align == NULL || align->segtype != 2)
2479 		return NULL;
2480 	dsp = (DenseSegPtr) align->segs;
2481 	if(dsp == NULL || dsp->dim != 2)
2482 		return NULL;
2483 	align_len = get_dsp_align_len(dsp);
2484 
2485 	if(*h_ddp != NULL)
2486 	{
2487 		prev = *h_ddp;
2488 		while(prev->next != NULL)
2489 			prev = prev->next;
2490 	}
2491 	else
2492 		prev = NULL;
2493 	score = get_score_from_list (align->score);
2494 	for(i = 0; i<dsp->numseg; ++i)
2495 	{
2496 		if(dsp->starts[2*i] != -1 &&
2497 			dsp->starts[2*i+1] != -1 && dsp->lens[i]>0)
2498 		{
2499 			ddp = DenseDiagNew();
2500 			ddp->dim = 2;
2501 			ddp->id = SeqIdDup(dsp->ids);
2502 			ddp->id->next = SeqIdDup(dsp->ids->next);
2503 			ddp->starts = (Int4Ptr) MemNew((size_t)2 * sizeof(Int4));
2504 			ddp->starts[0] = dsp->starts[2*i];
2505 			ddp->starts[1] = dsp->starts[2*i + 1];
2506 			ddp->len = dsp->lens[i];
2507 			ddp->strands = (Uint1Ptr) MemNew((size_t)2 * sizeof(Uint1));
2508 			if(dsp->strands != NULL)
2509 			{
2510 				ddp->strands[0] = dsp->strands[2*i];
2511 				ddp->strands[1] = dsp->strands[2*i+1];
2512 			}
2513 			ddp->scores = make_seg_score(align_len, dsp->lens[i], score);
2514 			if(*h_ddp == NULL)
2515 				*h_ddp= ddp;
2516 			else
2517 				prev->next = ddp;
2518 			prev = ddp;
2519 		}
2520 	}
2521 
2522 	return (*h_ddp);
2523 
2524 }
2525 
2526 /*
2527 * convert a list of DenseSeg to DenseDiag
2528 *
2529 */
SeqAlignConvertDspToDdpList(SeqAlignPtr align)2530 NLM_EXTERN SeqAlignPtr SeqAlignConvertDspToDdpList(SeqAlignPtr align)
2531 {
2532 	SeqAlignPtr n_align;
2533 	DenseDiagPtr h_ddp = NULL;
2534 
2535 
2536 	while(align)
2537 	{
2538 		SeqAlignConvertDspToDdp(align, &h_ddp);
2539 		align = align->next;
2540 	}
2541 	if(h_ddp != NULL)
2542 	{
2543 		n_align = SeqAlignNew();
2544 		n_align->segtype = 1;
2545 		n_align->segs = h_ddp;
2546 		return n_align;
2547 	}
2548 	else
2549 		return NULL;
2550 
2551 }
2552 
clean_up_align_list(SeqAlignPtr PNTR h_align,ValNodePtr head)2553 static SeqAlignPtr clean_up_align_list(SeqAlignPtr PNTR h_align, ValNodePtr head)
2554 {
2555 	DenseDiagPtr ddp, n_ddp, p_ddp, c_ddp;
2556 	SeqAlignPtr prev, next;
2557 	Boolean found;
2558 	ValNodePtr curr;
2559 	SeqAlignPtr align;
2560 
2561 
2562 	prev = NULL;
2563 	align = *h_align;
2564 	while(align)
2565 	{
2566 		next = align->next;
2567 
2568 		ddp = (DenseDiagPtr) align->segs;
2569 		p_ddp = NULL;
2570 		while(ddp)
2571 		{
2572 			found = FALSE;
2573 			n_ddp = ddp->next;
2574 			for(curr = head; curr != NULL; curr = curr->next)
2575 			{
2576 				c_ddp = (DenseDiagPtr) curr->data.ptrvalue;
2577 				if(ddp == c_ddp)
2578 				{
2579 					found = TRUE;
2580 					break;
2581 				}
2582 			}
2583 			if(!found)
2584 			{
2585 				if(p_ddp == NULL)
2586 					align->segs = n_ddp;
2587 				else
2588 					p_ddp->next = n_ddp;
2589 				ddp->next = NULL;
2590 				DenseDiagFree(ddp);
2591 			}
2592 			else
2593 				p_ddp = ddp;
2594 			ddp = n_ddp;
2595 		}
2596 		if(align->segs == NULL)
2597 		{
2598 			if(prev == NULL)
2599 				*h_align = next;
2600 			else
2601 				prev->next = next;
2602 			align->next = NULL;
2603 			SeqAlignFree(align);
2604 		}
2605 		else
2606 			prev = align;
2607 		align = next;
2608 	}
2609 
2610 	return (*h_align);
2611 }
2612 
2613 
2614 
2615 
2616 /*
2617 *	return a Seq-align that was made up from the DenseDiag in the
2618 *	right order. The Seq-align is freshly created
2619 *	the chain of the Seq-align should be for the same Seq-id
2620 *
2621 */
FilterDenseSegAlign(SeqAlignPtr align,SeqIdPtr sip)2622 NLM_EXTERN SeqAlignPtr FilterDenseSegAlign(SeqAlignPtr align, SeqIdPtr sip)
2623 {
2624 	SeqAlignPtr h_align;
2625 	Uint1 strand;
2626 	ValNodePtr head = NULL;
2627 
2628 	h_align = SeqAlignConvertDspToDdpList(align);
2629 	if(h_align == NULL)
2630 		return NULL;
2631 	head = FilterSeqAlign(h_align, sip, &strand);
2632 
2633 	if(head == NULL)
2634 	{
2635 		SeqAlignFree(h_align);
2636 		return NULL;
2637 	}
2638 	clean_up_align_list(&h_align, head);
2639 	ValNodeFree(head);
2640 	return h_align;
2641 }
2642 
2643 
2644 
2645 /*
2646 *
2647 *	from the align, figure out the maximum and minimum range on the two sequences.
2648 *   the two Seq-locs can then be sent for the seond path algorithm to compute
2649 *	sequence alignment
2650 *	align is normally computed from a heuristic method, such as BLAST
2651 *	return TRUE for success and FLASE for failure
2652 */
FigureAlignRange(SeqAlignPtr align,SeqLocPtr m_loc,SeqLocPtr s_loc)2653 static Boolean FigureAlignRange(SeqAlignPtr align, SeqLocPtr m_loc, SeqLocPtr s_loc)
2654 {
2655 	DenseDiagPtr ddp;
2656 	ValNodePtr head, vnp;
2657 	SeqIntPtr sint;
2658 	Int4 s_max =-1 , s_min = -1;
2659 	Int4 m_max = -1, m_min = -1;
2660 	Uint1 strand;
2661 	Int2 s_order, order;
2662 	SeqIdPtr sip;
2663 	Uint1 m_strand, s_strand;
2664 
2665 
2666 
2667 	m_strand = 0;
2668 	s_strand = 0;
2669 	head = FilterSeqAlign(align, SeqLocId(s_loc), &strand);
2670 	if(head == NULL)
2671 		return FALSE;
2672 
2673 	ddp = (DenseDiagPtr) head->data.ptrvalue;
2674 	s_order = -1;
2675 	order = 0;
2676 	for(sip = ddp->id; sip != NULL; sip = sip->next, ++order)
2677 	{
2678 		if(SeqIdForSameBioseq(sip, SeqLocId(s_loc)))
2679 		{
2680 			s_order = order;
2681 			break;
2682 		}
2683 	}
2684 	if(s_order == -1)
2685 	{
2686 		ValNodeFree(head);
2687 		return FALSE;
2688 	}
2689 	s_min = ddp->starts[s_order];
2690 	if(strand == Seq_strand_minus)
2691 	{
2692 		m_max = ddp->starts[1-s_order] + ddp->len -1;
2693 	}
2694 	else
2695 		m_min = ddp->starts[1-s_order];
2696 	vnp = head;
2697 	while(vnp->next != NULL)
2698 	{
2699 		ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2700 		/* printf("%ld, %ld, len = %ld\n", ddp->starts[0], ddp->starts[1], ddp->len); */
2701 		vnp = vnp->next;
2702 	}
2703 	ddp = (DenseDiagPtr) vnp->data.ptrvalue;
2704 
2705 	s_max = ddp->starts[s_order] + ddp->len -1;
2706 	if(strand == Seq_strand_minus)
2707 	{
2708 		m_min = ddp->starts[1-s_order];
2709 	}
2710 	else
2711 		m_max = ddp->starts[1-s_order] + ddp->len -1;
2712 
2713 	if(m_min > m_max)
2714 	{
2715 
2716 		Message(MSG_ERROR, "m_min = %ld, m_max = %ld", m_min, m_max);
2717 		ValNodeFree(head);
2718 		return FALSE;
2719 	}
2720 	ValNodeFree(head);
2721 
2722 	if(s_min == -1 || s_max == -1 || m_min == -1 || m_max == -1)
2723 		return FALSE;
2724 
2725 	m_strand = Seq_strand_plus;
2726 	s_strand = strand;
2727 
2728 
2729 	sint = (SeqIntPtr) m_loc->data.ptrvalue;
2730 	sint->from = m_min;
2731 	sint->to = m_max;
2732 	sint->strand = m_strand;
2733 
2734 	sint = (SeqIntPtr) s_loc->data.ptrvalue;
2735 	sint->from = s_min;
2736 	sint->to = s_max;
2737 	sint->strand = s_strand;
2738 
2739 	return TRUE;
2740 }
2741 
2742 
2743 
is_loc_overlap(SeqLocPtr loc_1,SeqLocPtr loc_2,Int4Ptr min_val,Int4Ptr max_val,Int4 gap_len,Int4 max_align_size,Int4Ptr p_gap_val)2744 NLM_EXTERN Boolean is_loc_overlap(SeqLocPtr loc_1, SeqLocPtr loc_2, Int4Ptr min_val, Int4Ptr max_val, Int4 gap_len, Int4 max_align_size, Int4Ptr p_gap_val)
2745 {
2746 	Int4 start_1, stop_1, start_2, stop_2;
2747 
2748 	start_1 = SeqLocStart(loc_1);
2749 	stop_1 = SeqLocStop(loc_1);
2750 	start_2 = SeqLocStart(loc_2);
2751 	stop_2 = SeqLocStop(loc_2);
2752 
2753 	/*strict overlap*/
2754 	if(!(stop_1 < start_2 || start_1 > stop_2))
2755 	{
2756 		*min_val = MAX(start_1, start_2);
2757 		*max_val = MIN(stop_1, stop_2);
2758 		return TRUE;
2759 	}
2760 
2761 	if(gap_len == 0)
2762 		return FALSE;
2763 	if(stop_2 > stop_1)
2764 	{
2765 		/* if(start_2 - stop_1 <= gap_len)  */
2766 		if(start_2 - stop_1 <= max_align_size)
2767 		{
2768 			*min_val = stop_1;
2769 			*max_val = start_2;
2770 			if(p_gap_val != NULL)
2771 				*p_gap_val = start_2 - stop_1;
2772 			return TRUE;
2773 		}
2774 		else
2775 			return FALSE;
2776 	}
2777 	else
2778 	{
2779 		/* if(start_1 - stop_2 <= gap_len) */
2780 		if(start_1 - stop_2 <= max_align_size)
2781 		{
2782 			*min_val = stop_2;
2783 			*max_val = start_1;
2784 			if(p_gap_val != NULL)
2785 				*p_gap_val = start_1 - stop_2;
2786 			return TRUE;
2787 		}
2788 		else
2789 			return FALSE;
2790 	}
2791 
2792 }
2793 
2794 
2795 /*
2796 *
2797 *	ppos is the position of the known sequence, trying to find the matching sequence
2798 *	position
2799 */
find_matching_position(DenseSegPtr dsp,Int4 pos,Int2 pos_order)2800 NLM_EXTERN Int4 find_matching_position(DenseSegPtr dsp, Int4 pos, Int2 pos_order)
2801 {
2802 	Int2 i;
2803 	Int4 start_1, start_2;
2804 	Int4 offset;
2805 
2806 
2807 	for(i = 0; i<dsp->numseg; ++i)
2808 	{
2809 		start_1 = dsp->starts[2*i + pos_order];
2810 		start_2 = dsp->starts[2*i + 1 - pos_order];
2811 
2812 		if(start_1 != -1 && start_2 != -1)
2813 		{
2814 			if(pos >= start_1 && pos <= start_1 + dsp->lens[i] -1)
2815 			{
2816 				if(dsp->strands[pos_order] == Seq_strand_minus)
2817 				{
2818 					offset = start_1 + dsp->lens[i] - 1 - pos;
2819 				}
2820 				else
2821 					offset = pos - start_1;
2822 
2823 				if(dsp->strands[1 - pos_order] == Seq_strand_minus)
2824 					return (start_2 + dsp->lens[i] -1 - offset);
2825 				else
2826 					return (start_2 + offset);
2827 			}
2828 		}
2829 	}
2830 
2831 
2832 	return -1;
2833 }
2834 
SeqAnnotWrite(SeqAlignPtr align)2835 static void SeqAnnotWrite(SeqAlignPtr align)
2836 {
2837         SeqAnnotPtr annot;
2838         AsnIoPtr aip;
2839 
2840         annot = SeqAnnotNew();
2841         annot->type = 2;
2842         annot->data = align;
2843 
2844         aip = AsnIoOpen("temp2.sat", "w");
2845         SeqAnnotAsnWrite(annot, aip, NULL);
2846         AsnIoClose(aip);
2847 
2848         annot->data = NULL;
2849         SeqAnnotFree(annot);
2850 }
2851 
select_overlap_loc(SeqLocPtr loc_1_1,SeqLocPtr loc_2_1,SeqLocPtr loc_1_2,SeqLocPtr loc_2_2,Int2Ptr p_order)2852 NLM_EXTERN Boolean select_overlap_loc(SeqLocPtr loc_1_1, SeqLocPtr loc_2_1, SeqLocPtr loc_1_2, SeqLocPtr loc_2_2, Int2Ptr p_order)
2853 {
2854 	Int4 start_1, stop_1, start_2, stop_2;
2855 	Int4 overlap_1, overlap_2;
2856 
2857 	overlap_1 = 0;
2858 	overlap_2 = 0;
2859 
2860 	start_1 = SeqLocStart(loc_1_1);
2861 	stop_1 = SeqLocStop(loc_1_1);
2862 
2863 	start_2 = SeqLocStart(loc_2_1);
2864 	stop_2 = SeqLocStop(loc_2_1);
2865 
2866 	if(!(start_2 > stop_1 || stop_2 < start_1))
2867 	{
2868 		if(stop_2 >= stop_1)
2869 			overlap_1 = MAX(0, stop_1 - start_2 + 1);
2870 		else
2871 			overlap_1 = MAX(0, stop_2 - start_1 + 1);
2872 	}
2873 
2874 	start_1 = SeqLocStart(loc_1_2);
2875 	stop_1 = SeqLocStop(loc_1_2);
2876 
2877 	start_2 = SeqLocStart(loc_2_2);
2878 	stop_2 = SeqLocStop(loc_2_2);
2879 
2880 	if(!(start_2 > stop_1 || stop_2 < start_1))
2881 	{
2882 		if(stop_2 >= stop_1)
2883 			overlap_2 = MAX(0, stop_1 - start_2 + 1);
2884 		else
2885 			overlap_2 = MAX(0, stop_2 - start_1 + 1);
2886 	}
2887 
2888 	if(overlap_1 == 0 && overlap_2 == 0)
2889 		return FALSE;
2890 	else
2891 	{
2892 		if(overlap_1 >= overlap_2)
2893 			*p_order = 0;
2894 		else
2895 			*p_order = 1;
2896 		return TRUE;
2897 	}
2898 }
2899 
2900 
2901 
2902 /*###############################################################
2903 *
2904 *	functions for merge the two alignments
2905 *
2906 ################################################################*/
2907 
2908 
get_align_len_in_overlap(SeqAlignPtr align,Int4 from,Int4 to,Int2 order)2909 NLM_EXTERN Int4 get_align_len_in_overlap (SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
2910 {
2911 	DenseSegPtr dsp;
2912 	Int2 i;
2913 	Int4 start, stop;
2914 	Int4 align_len = 0;
2915 
2916 	dsp = (DenseSegPtr) align->segs;
2917 	for(i = 0; i<dsp->numseg; ++i)
2918 	{
2919 		if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
2920 		{
2921 			start = dsp->starts[2*i + order];
2922 			stop = dsp->starts[2*i + order] + dsp->lens[i] -1;
2923 			if(!(start > to || stop < from))
2924 			{
2925 				start = MAX(start, from);
2926 				stop = MIN(stop, to);
2927 
2928 				if(stop >= start)
2929 					align_len += (stop - start + 1);
2930 			}
2931 		}
2932 	}
2933 
2934 	return align_len;
2935 }
2936 
2937 
2938 
open_annot(CharPtr f_name)2939 static SeqAnnotPtr open_annot(CharPtr f_name)
2940 {
2941 	SeqAnnotPtr annot;
2942 	AsnIoPtr aip;
2943 
2944 	aip = AsnIoOpen(f_name, "r");
2945 	annot = SeqAnnotAsnRead(aip, NULL);
2946 	AsnIoClose(aip);
2947 	return annot;
2948 }
2949 
2950 typedef struct segdata {
2951 	Int2 seg_order;
2952 	Int4 overlap_len;
2953 }SegOrder, PNTR SegOrderPtr;
2954 
2955 /*
2956 *	find the longest diagnol in the overlapping region
2957 *
2958 */
SegOrderCompProc(VoidPtr ptr1,VoidPtr ptr2)2959 static int LIBCALLBACK SegOrderCompProc (VoidPtr ptr1, VoidPtr ptr2)
2960 {
2961   ValNodePtr vnp1, vnp2;
2962   SegOrderPtr sop1, sop2;
2963 
2964   if (ptr1 != NULL && ptr2 != NULL) {
2965     vnp1 = *((ValNodePtr PNTR) ptr1);
2966     vnp2 = *((ValNodePtr PNTR) ptr2);
2967     if (vnp1 != NULL && vnp2 != NULL) {
2968 	sop1 = (SegOrderPtr) vnp1->data.ptrvalue;
2969 	sop2 = (SegOrderPtr) vnp2->data.ptrvalue;
2970 	if(sop1 != NULL && sop2 != NULL)
2971 	{
2972 		if(sop1->overlap_len > sop2->overlap_len)
2973 			return -1;
2974 		else if(sop1->overlap_len < sop2->overlap_len)
2975 			return 1;
2976 		else
2977 			return 0;
2978 	}
2979     }
2980   }
2981 
2982   return 0;
2983 }
2984 
find_overlap_diagnol(SeqAlignPtr align,Int4 from,Int4 to,Int2 order)2985 static ValNodePtr find_overlap_diagnol(SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
2986 {
2987 	Int2 i;
2988 	DenseSegPtr dsp;
2989 	Int4 start, stop;
2990 	SegOrderPtr sop;
2991 	ValNodePtr seg_list;
2992 
2993 	seg_list = NULL;
2994 	dsp = (DenseSegPtr) align->segs;
2995 	for(i = 0; i<dsp->numseg; ++i)
2996 	{
2997 		if(dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1)
2998 		{
2999 			start = dsp->starts[2*i+order];
3000 			stop = start + dsp->lens[i] -1;
3001 			if(!(start > to || stop < from))
3002 			{
3003 
3004 				start = MAX(start, from);
3005 				stop = MIN(stop, to);
3006 				sop = (SegOrderPtr) MemNew(sizeof(SegOrder));
3007 				sop->seg_order = i;
3008 				sop->overlap_len = stop - start + 1;
3009 				ValNodeAddPointer(&seg_list, 0, sop);
3010 			}
3011 		}
3012 	}
3013 
3014 	if(seg_list != NULL)
3015 		return ValNodeSort(seg_list, SegOrderCompProc);
3016 	else
3017 		return NULL;
3018 }
3019 
is_dsp_same(DenseSegPtr dsp_1,DenseSegPtr dsp_2)3020 static Boolean is_dsp_same(DenseSegPtr dsp_1, DenseSegPtr dsp_2)
3021 {
3022 	if(dsp_1 == NULL || dsp_2 == NULL)
3023 		return FALSE;
3024 	if(dsp_1->ids == NULL || dsp_2->ids == NULL)
3025 		return FALSE;
3026 	if(!SeqIdMatch(dsp_1->ids, dsp_2->ids))
3027 		return FALSE;
3028 
3029 	if(dsp_1->ids->next == NULL || dsp_2->ids->next == NULL)
3030 		return FALSE;
3031 	if(!SeqIdMatch(dsp_1->ids->next, dsp_2->ids->next))
3032 		return FALSE;
3033 
3034 	if(dsp_1->strands == NULL || dsp_2->strands == NULL)
3035 		return FALSE;
3036 
3037 	if(!is_same_orientation(dsp_1->strands[0], dsp_2->strands[0]))
3038 		return FALSE;
3039 
3040 	if(!is_same_orientation(dsp_1->strands[1], dsp_2->strands[1]))
3041 		return FALSE;
3042 
3043 	return TRUE;
3044 }
3045 
get_score_value(ScorePtr sp)3046 static Int4 get_score_value(ScorePtr sp)
3047 {
3048         ObjectIdPtr oip;
3049 
3050         while(sp)
3051         {
3052                 if(sp->id)
3053                 {
3054                         oip = sp->id;
3055                         if(oip && oip->str && StringCmp(oip->str, "score") == 0)
3056                         {
3057                                 if(sp->choice == 1)
3058                                         return sp->value.intvalue;
3059                         }
3060                 }
3061                 sp = sp->next;
3062         }
3063 
3064         return -1;
3065 }
3066 
load_big_score(SeqAlignPtr align_1,SeqAlignPtr align_2)3067 static void load_big_score (SeqAlignPtr align_1, SeqAlignPtr align_2)
3068 {
3069 	Int4 val_1, val_2;
3070 
3071 	val_1 = get_score_value(align_1->score);
3072 	val_2 = get_score_value(align_2->score);
3073 
3074 	if(val_1 < val_2)
3075 	{
3076 		ScoreSetFree(align_1->score);
3077 		align_1->score = align_2->score;
3078 		align_2->score  = NULL;
3079 	}
3080 }
3081 
3082 /*
3083   merge two seqaligns: align_1 contains the merged seqalign, align_2 is clobbered.
3084                        reorders the seqalign on input so that align_1 is "left" of align_2
3085                        in display-style coordinates.
3086 
3087  */
3088 
merge_two_align(SeqAlignPtr align_1,SeqAlignPtr align_2,Int4 from,Int4 to,Int2 order)3089 NLM_EXTERN Boolean merge_two_align(SeqAlignPtr align_1, SeqAlignPtr align_2, Int4 from, Int4 to, Int2 order)
3090 {
3091 	ValNodePtr seg_list_1, seg_list_2;
3092 	SegOrderPtr sop_1, sop_2;
3093 	ValNodePtr curr_1, curr_2;
3094 	Boolean found;
3095 	Int2 seg_order_1, seg_order_2;
3096 	DenseSegPtr dsp_1, dsp_2;
3097 	Int4 start_1, stop_1, start_2, stop_2;
3098 	Int4 overlap_1, overlap_2;
3099 	Int4 extend_len;
3100 	Int2 n_seg_num, i, j;
3101 	Int4Ptr starts, lens;
3102 	Uint1Ptr strands;
3103 	Int4 m_start_1, s_start_1, len_1;
3104 	Int4 m_start_2, s_start_2, len_2;
3105 	Int4 offset_1;
3106         if(!align_1 || !align_2 || align_1->segtype != 2 || align_2->segtype!=2)
3107             return FALSE;
3108         dsp_1 = (DenseSegPtr) align_1->segs;
3109         dsp_2 = (DenseSegPtr) align_2->segs;
3110 
3111 	if(!is_dsp_same(dsp_1, dsp_2))
3112 		return FALSE;
3113 	seg_list_1 = find_overlap_diagnol(align_1, from, to, order);
3114 	if(seg_list_1 == NULL)
3115 		return FALSE;
3116 	seg_list_2 = find_overlap_diagnol(align_2, from, to, order);
3117 	if(seg_list_2 == NULL)
3118 	{
3119 		ValNodeFreeData(seg_list_1);
3120 		return FALSE;
3121 	}
3122 
3123         if(SeqAlignStart(align_1,0)>SeqAlignStart(align_2,0)) {
3124             VoidPtr segs;
3125             ValNodePtr vnp;
3126             segs = align_1->segs;
3127             align_1->segs = align_2->segs;
3128             align_2->segs = segs;
3129             dsp_1 = (DenseSegPtr) align_1->segs;
3130             dsp_2 = (DenseSegPtr) align_2->segs;
3131             vnp = seg_list_1;
3132             seg_list_1 = seg_list_2;
3133             seg_list_2 = vnp;
3134         }
3135 
3136 	found = FALSE;
3137 	for(curr_1 = seg_list_1; curr_1 != NULL && !found; curr_1 = curr_1->next)
3138 	{
3139 		sop_1 = (SegOrderPtr) curr_1->data.ptrvalue;
3140 		seg_order_1 = sop_1->seg_order;
3141 		m_start_1 = dsp_1->starts[2*seg_order_1 + order];
3142 		s_start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
3143 		len_1 = dsp_1->lens[seg_order_1];
3144 		for(curr_2 = seg_list_2; curr_2 != NULL && !found; curr_2 = curr_2->next)
3145 		{
3146 			sop_2 = (SegOrderPtr) curr_2->data.ptrvalue;
3147 			seg_order_2 = sop_2->seg_order;
3148 
3149 			m_start_2 = dsp_2->starts[2*seg_order_2 + order];
3150 			s_start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
3151 			len_2 = dsp_2->lens[seg_order_2];
3152 
3153 
3154 			found = TRUE;
3155 			start_1 = dsp_1->starts[2*seg_order_1 + order];
3156 			stop_1 = start_1 + dsp_1->lens[seg_order_1] -1;
3157 
3158 			start_2 = dsp_2->starts[2*seg_order_2 + order];
3159 			stop_2 = start_2 + dsp_2->lens[seg_order_2] -1;
3160 
3161 			/*check if the overlapping region on the sequence overlaps??*/
3162 			if(start_1 > stop_2 || stop_1 < start_2)
3163 				found = FALSE;
3164 
3165 			offset_1 = 0;
3166 			/*trim the tail of the first alignment and the head of the second*/
3167 			if(found)
3168 			{
3169 				if(dsp_1->strands[order] == Seq_strand_minus)
3170 				{
3171 					/*trim the head of the second alignment*/
3172 					/*    <====-------===================== first
3173 					      <================================  second
3174 					trim  ^^^^^^^^^^^^XXXXXXXXXXXXXXXXXXXXX  overlap
3175 					*/
3176 					if(stop_2 > stop_1)
3177 					{
3178 						offset_1 = stop_2 - stop_1;
3179 						dsp_2->lens[seg_order_2] -= offset_1;
3180 						if(dsp_2->strands[1-order] != Seq_strand_minus)
3181 							dsp_2->starts[2*seg_order_2 + 1 - order] += offset_1;
3182 
3183 						stop_2 = stop_1;
3184 					}
3185 					/*trim the tail of the first alignment*/
3186 					/*
3187 						                           ^^^^^^^^^^ trim
3188 					         <=================================== first
3189 					            <=======================------=========  second
3190 						     XXXXXXXXXXXXXXXXXXXXXXX	overlap
3191 					*/
3192 					if(start_1 < start_2)
3193 					{
3194 						offset_1 = start_2 - start_1;
3195 						dsp_1->starts[2*seg_order_1 + order] += offset_1;
3196 						dsp_1->lens[seg_order_1] -= offset_1;
3197 						if(dsp_1->strands[1-order] == Seq_strand_minus)
3198 							dsp_1->starts[2*seg_order_1 + 1 - order] += offset_1;
3199 
3200 						start_1 = start_2;
3201 					}
3202 					overlap_1 = start_1 - stop_2;
3203 				}
3204 				else
3205 				{
3206 					/*trim the head of the second alignment*/
3207 					/*
3208 						====----====================> first
3209 						================================ second
3210 						^^^^^^^^XXXXXXXXXXXXXXXXXXXX
3211 						trim     overlap
3212 					*/
3213 					if(start_2 < start_1)
3214 					{
3215 						offset_1 = start_1 - start_2;
3216 						dsp_2->starts[2*seg_order_2 + order] += offset_1;
3217 						if(dsp_2->strands[1-order] != Seq_strand_minus)
3218 
3219 							dsp_2->starts[2*seg_order_2 + 1 - order] += offset_1;
3220 						dsp_2->lens[seg_order_2] -= offset_1;
3221 						start_2 = start_1;
3222 					}
3223 					/*trim the tail of the first alignment*/
3224 					/*
3225 						  overlap               trim
3226 						XXXXXXXXXXXXXXXXXXXXXXX^^^^^^^^^^
3227 						=================================> first
3228 						========================------============= second
3229 					*/
3230 
3231 					if(stop_1 > stop_2)
3232 					{
3233 						offset_1 = stop_1 - stop_2;
3234 						dsp_1->lens[seg_order_1] -= offset_1;
3235 						if(dsp_1->strands[1-order] == Seq_strand_minus)
3236 
3237 							dsp_1->starts[2*seg_order_1 + 1 - order] += offset_1;
3238 						stop_1 = stop_2;
3239 					}
3240 					overlap_1 = stop_1 - start_2;
3241 				}
3242 				if(found && overlap_1 <=0)
3243 					found = FALSE;
3244 			}
3245 
3246 
3247 			/*check the second sequence*/
3248 			if(found)
3249 			{
3250 				start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
3251 				stop_1 = start_1 + dsp_1->lens[seg_order_1] -1;
3252 
3253 				start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
3254 				stop_2 = start_2 + dsp_2->lens[seg_order_2] -1;
3255 
3256 				/*check if the overlapping region on the sequence overlaps??*/
3257 				if(start_1 > stop_2 || stop_1 < start_2)
3258 					found = FALSE;
3259 			}
3260 			if(found)
3261 			{
3262                             overlap_2 = MIN(stop_2,stop_1) - MAX(start_1,start_2);
3263                             if(found && overlap_2 <=0)
3264                                 found = FALSE;
3265 			}
3266 
3267 			if(found && overlap_2 != overlap_1)
3268 				found = FALSE;
3269 
3270 			if(found)
3271 			{
3272 				if(dsp_1->strands[1-order] == Seq_strand_minus)
3273 				{
3274 					stop_1 = dsp_1->starts[2*seg_order_1 + 1 - order] +
3275 						dsp_1->lens[seg_order_1] -1;
3276 					stop_2 = dsp_2->starts[2*seg_order_2 + 1 - order] +
3277 						dsp_2->lens[seg_order_2] -1;
3278 					extend_len = stop_1 - stop_2;
3279 				}
3280 				else
3281 				{
3282 					start_1 = dsp_1->starts[2*seg_order_1 + 1 - order];
3283 					start_2 = dsp_2->starts[2*seg_order_2 + 1 - order];
3284 					extend_len = start_2 - start_1;
3285 				}
3286 				if(dsp_2->strands[order] != Seq_strand_minus)
3287 				{
3288 					dsp_2->starts[2*seg_order_2 + order] -= extend_len;
3289 				}
3290 				if(dsp_2->strands[1-order] != Seq_strand_minus)
3291 				{
3292 					dsp_2->starts[2*seg_order_2 + 1 - order] -= extend_len;
3293 				}
3294 
3295 				dsp_2->lens[seg_order_2] += extend_len;
3296 
3297 				n_seg_num = seg_order_1 + (dsp_2->numseg - seg_order_2);
3298 				if(n_seg_num < 1)
3299 					found = FALSE;
3300 			}
3301 			if(found)
3302 			{
3303 
3304 				starts = (Int4Ptr) MemNew((size_t)(2*n_seg_num) * sizeof(Int4));
3305 				strands = (Uint1Ptr) MemNew((size_t)(2*n_seg_num) * sizeof(Uint1));
3306 				lens = (Int4Ptr) MemNew((size_t) n_seg_num * sizeof(Int4));
3307 
3308 				j = 0;
3309 				if(seg_order_1 > 0)
3310 				{
3311 					for(i = 0; i<seg_order_1; ++i)
3312 					{
3313 						starts[2*j] = dsp_1->starts[2*i];
3314 						starts[2*j+1] = dsp_1->starts[2*i+1];
3315 						strands[2*j] = dsp_1->strands[2*i];
3316 						strands[2*j+1] = dsp_1->strands[2*i+1];
3317 
3318 						lens[j] = dsp_1->lens[i];
3319 						++j;
3320 					}
3321 				}
3322 
3323 				for(i = seg_order_2; i<dsp_2->numseg; ++i)
3324 				{
3325 					starts[2*j] = dsp_2->starts[2*i];
3326 					starts[2*j+1] = dsp_2->starts[2*i+1];
3327 					strands[2*j] = dsp_2->strands[2*i];
3328 					strands[2*j+1] = dsp_2->strands[2*i+1];
3329 					lens[j] = dsp_2->lens[i];
3330 					++j;
3331 				}
3332 
3333 				dsp_1->numseg = n_seg_num;
3334 				MemFree(dsp_1->starts);
3335 				MemFree(dsp_1->strands);
3336 				MemFree(dsp_1->lens);
3337 
3338 				dsp_1->starts = starts;
3339 				dsp_1->strands = strands;
3340 				dsp_1->lens = lens;
3341 				load_big_score (align_1, align_2);
3342 				break;
3343 			}
3344 			else
3345 			{
3346 				dsp_1->starts[2*seg_order_1 + order] = m_start_1;
3347 				dsp_1->starts[2*seg_order_1 + 1 - order] = s_start_1;
3348 				dsp_1->lens[seg_order_1] = len_1;
3349 
3350 				dsp_2->starts[2*seg_order_2 + order] = m_start_2;
3351 				dsp_2->starts[2*seg_order_2 + 1 - order] = s_start_2;
3352 				dsp_2->lens[seg_order_2] = len_2;
3353 			}
3354 		}
3355 	}
3356 
3357 	ValNodeFreeData(seg_list_1);
3358 	ValNodeFreeData(seg_list_2);
3359 	return found;
3360 
3361 }
3362 
extract_align_from_list(SeqAlignPtr PNTR list,SeqAlignPtr align)3363 static SeqAlignPtr extract_align_from_list(SeqAlignPtr PNTR list, SeqAlignPtr align)
3364 {
3365 	SeqAlignPtr curr, prev;
3366 
3367 	prev = NULL;
3368 	curr = *list;
3369 	while(curr)
3370 	{
3371 		if(curr == align)
3372 		{
3373 			if(prev == NULL)
3374 				*list = curr->next;
3375 			else
3376 				prev->next = curr->next;
3377 			curr->next = NULL;
3378 			return curr;
3379 		}
3380 		prev = curr;
3381 		curr = curr->next;
3382 	}
3383 	return NULL;
3384 }
3385 
SeqAlignReverse(SeqAlignPtr align,Int2 order)3386 NLM_EXTERN void SeqAlignReverse(SeqAlignPtr align, Int2 order)
3387 {
3388 	DenseSegPtr dsp;
3389 	Int4Ptr starts, lens;
3390 	Int2 numseg, i, j;
3391 
3392 	if(align->segtype != 2)
3393 		return;
3394 	dsp = (DenseSegPtr) align->segs;
3395 	if(dsp == NULL || dsp->strands == NULL || dsp->strands[order] != Seq_strand_minus)
3396 		return;
3397 
3398 	numseg = dsp->numseg;
3399 	starts = (Int4Ptr) MemNew((size_t)2*numseg * sizeof(Int4));
3400 	lens = (Int4Ptr) MemNew((size_t)numseg * sizeof(Int4));
3401 
3402 	for(i = numseg-1, j = 0; i>=0; --i, ++j)
3403 	{
3404 		starts[2*j] = dsp->starts[2*i];
3405 		starts[2*j+1] = dsp->starts[2*i+1];
3406 		lens[j] = dsp->lens[i];
3407 		dsp->strands[2*i] = 3 - dsp->strands[2*i];
3408 		dsp->strands[2*i + 1] = 3 - dsp->strands[2*i + 1];
3409 	}
3410 
3411 	MemFree(dsp->starts);
3412 	dsp->starts = starts;
3413 	MemFree(dsp->lens);
3414 	dsp->lens = lens;
3415 }
3416 
3417 typedef struct align_overlap {
3418 	SeqAlignPtr align;
3419 	Int4 overlap_len;
3420 	Int4 align_len;
3421 }AlignOverlap, PNTR AlignOverlapPtr;
3422 
AlignOverlapCompProc(VoidPtr ptr1,VoidPtr ptr2)3423 static int LIBCALLBACK AlignOverlapCompProc (VoidPtr ptr1, VoidPtr ptr2)
3424 {
3425   ValNodePtr vnp1, vnp2;
3426   AlignOverlapPtr sop1, sop2;
3427 
3428   if (ptr1 != NULL && ptr2 != NULL) {
3429     vnp1 = *((ValNodePtr PNTR) ptr1);
3430     vnp2 = *((ValNodePtr PNTR) ptr2);
3431     if (vnp1 != NULL && vnp2 != NULL) {
3432 	sop1 = (AlignOverlapPtr) vnp1->data.ptrvalue;
3433 	sop2 = (AlignOverlapPtr) vnp2->data.ptrvalue;
3434 	if(sop1 != NULL && sop2 != NULL)
3435 	{
3436 		if(sop1->overlap_len > sop2->overlap_len)
3437 			return -1;
3438 		else if(sop1->overlap_len < sop2->overlap_len)
3439 			return 1;
3440 		else if(sop1->align_len > sop2->align_len)
3441 			return -1;
3442 		else if(sop1->align_len < sop2->align_len)
3443 			return 1;
3444 		else
3445 			return 0;
3446 	}
3447     }
3448   }
3449 
3450   return 0;
3451 }
3452 
3453 
3454 
build_overlap_list(SeqAlignPtr align,Int4 from,Int4 to,Int2 order)3455 static ValNodePtr build_overlap_list(SeqAlignPtr align, Int4 from, Int4 to, Int2 order)
3456 {
3457 	Int4 t_len;
3458 	ValNodePtr list = NULL;
3459 	AlignOverlapPtr aop;
3460 
3461 	/*sort the alignment by score*/
3462 	while(align)
3463 	{
3464 		/*check if the alignment is significant*/
3465 		t_len = get_align_len_in_overlap (align, from, to, order);
3466 		if(t_len > MIN_DIAG_LEN)
3467 		{
3468 			aop = (AlignOverlapPtr) MemNew(sizeof(AlignOverlap));
3469 			aop->overlap_len = t_len;
3470 			aop->align = align;
3471 			aop->align_len = SeqAlignLength(align);
3472 			ValNodeAddPointer(&list, 0, aop);
3473 		}
3474 		align = align->next;
3475 	}
3476 
3477 	if(list != NULL)
3478 		return ValNodeSort(list, AlignOverlapCompProc);
3479 	else
3480 		return NULL;
3481 
3482 }
3483 
3484 
3485 
MergeTwoAlignList(SeqAlignPtr h_align_1,SeqAlignPtr PNTR p_align_2,Int4 from,Int4 to,Int2 order)3486 NLM_EXTERN Boolean MergeTwoAlignList (SeqAlignPtr h_align_1, SeqAlignPtr PNTR p_align_2, Int4 from, Int4 to, Int2 order)
3487 {
3488 
3489 	ValNodePtr list_1, list_2;
3490 	AlignOverlapPtr aop_1, aop_2;
3491 	ValNodePtr curr_1, curr_2;
3492 	SeqAlignPtr align_1, align_2;
3493 	Boolean found;
3494 	SeqAlignPtr h_align_2;
3495 	Boolean retval = FALSE;
3496 
3497 	h_align_2 = *p_align_2;
3498 	if(p_align_2 == NULL || h_align_1 == NULL || (h_align_2 = *p_align_2)== NULL)
3499 		return FALSE;
3500 
3501 	list_1 = build_overlap_list(h_align_1, from, to, order);
3502 	if(list_1 == NULL)
3503 		return FALSE;
3504 	list_2 = build_overlap_list(h_align_2, from, to, order);
3505 	if(list_2 == NULL)
3506 	{
3507 		ValNodeFreeData(list_1);
3508 		return FALSE;
3509 	}
3510 
3511 	for(curr_1 = list_1; curr_1 != NULL; curr_1 = curr_1->next)
3512 	{
3513 		aop_1 = (AlignOverlapPtr) curr_1->data.ptrvalue;
3514 		align_1 = aop_1->align;
3515 		SeqAlignReverse(align_1, order);
3516 
3517 		found = FALSE;
3518 		for(curr_2 = list_2; !found && curr_2 != NULL; curr_2 = curr_2->next)
3519 		{
3520 			aop_2 = (AlignOverlapPtr) curr_2->data.ptrvalue;
3521 			if(aop_2->align != NULL)
3522 			{
3523 				align_2 = aop_2->align;
3524 				SeqAlignReverse(align_2, order);
3525 				if(merge_two_align(align_1, align_2, from, to, 0))
3526 				{
3527 					retval = TRUE;
3528 					aop_2->align = NULL;
3529 					if(extract_align_from_list(p_align_2, align_2) != NULL)
3530 						SeqAlignFree(align_2);
3531 					found = TRUE;
3532 				}
3533 			}
3534 		}
3535 	}
3536 
3537 	ValNodeFreeData(list_1);
3538 	ValNodeFreeData(list_2);
3539 
3540 	return retval;
3541 }
3542 
3543 
3544 
3545 /*
3546 *
3547 *	functions related to reduce the redundant level of the view
3548 */
3549 
3550 typedef struct align_info {
3551 	SeqAlignPtr align;
3552 	Int4 align_len;
3553 }AlignInfo, PNTR AlignInfoPtr;
3554 
CompareAlignInfoProc(VoidPtr ptr1,VoidPtr ptr2)3555 static int LIBCALLBACK CompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2)
3556 {
3557   AlignInfoPtr aip_1, aip_2;
3558 
3559   if (ptr1 != NULL && ptr2 != NULL) {
3560       aip_1 = (AlignInfoPtr) ptr1;
3561       aip_2 = (AlignInfoPtr) ptr2;
3562 	  if(aip_1->align_len > aip_2->align_len)
3563 		  return -1;
3564 	  else if(aip_1->align_len < aip_2->align_len)
3565 		  return 1;
3566 	  else return 0;
3567   }
3568   return 0;
3569 }
3570 
3571 typedef struct align_inforeal {
3572 	SeqAlignPtr align;
3573 	Nlm_FloatHi rvalue;
3574 }AlignInfoReal, PNTR AlignInfoRealPtr;
3575 
3576 
CompareAlignInfoRealProc(VoidPtr ptr1,VoidPtr ptr2)3577 static int LIBCALLBACK CompareAlignInfoRealProc(VoidPtr ptr1, VoidPtr ptr2)
3578 {
3579   AlignInfoRealPtr aip_1, aip_2;
3580 
3581   if (ptr1 != NULL && ptr2 != NULL) {
3582       aip_1 = (AlignInfoRealPtr) ptr1;
3583       aip_2 = (AlignInfoRealPtr) ptr2;
3584 	  if(aip_1->rvalue > aip_2->rvalue)
3585 		  return -1;
3586 	  else if(aip_1->rvalue < aip_2->rvalue)
3587 		  return 1;
3588 	  else return 0;
3589   }
3590   return 0;
3591 }
3592 
3593 
3594 
3595 /*
3596    Generic Function to Sort SeqAligns.. must
3597    provide Comparison Callback function
3598 */
3599 
3600 
SeqAlignSort(SeqAlignPtr salp,SeqAlignSortCB sort_fn)3601 NLM_EXTERN SeqAlignPtr SeqAlignSort(SeqAlignPtr salp,SeqAlignSortCB sort_fn) {
3602   SeqAlignPtr sap, PNTR salp_list;
3603   Int4 n,i;
3604 
3605   if(salp==NULL || salp->next==NULL)
3606       return salp;
3607 
3608   n=1;
3609   sap = salp->next;
3610   while (sap != NULL)
3611   {
3612       n++;
3613       sap = sap->next;
3614   }
3615   salp_list = Calloc(n, sizeof (SeqAlignPtr));
3616   for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3617       salp_list[i]=sap;
3618   }
3619   HeapSort (salp_list, n, sizeof (SeqAlignPtr), sort_fn);
3620   sap = salp = salp_list[0];
3621   for (i = 1; i < n; i++) {
3622       sap->next = salp_list[i];
3623       sap = sap->next;
3624   }
3625   sap->next = NULL;
3626   MemFree (salp_list);
3627   return salp;
3628 }
3629 
3630 
3631 
SeqAlignSortByLength(SeqAlignPtr salp)3632 NLM_EXTERN SeqAlignPtr SeqAlignSortByLength(SeqAlignPtr salp)
3633 {
3634   SeqAlignPtr sap;
3635   AlignInfoPtr salp_list;
3636   Int4 n,i;
3637 
3638   if(salp==NULL || salp->next==NULL)
3639       return salp;
3640 
3641   n=1;
3642   sap = salp->next;
3643   while (sap != NULL)
3644   {
3645       n++;
3646       sap = sap->next;
3647   }
3648   salp_list = Calloc(n, sizeof (AlignInfo));
3649   for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3650       salp_list[i].align=sap;
3651       salp_list[i].align_len = SeqAlignLength(sap);
3652   }
3653   HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3654   sap = salp = salp_list[0].align;
3655   for (i = 1; i < n; i++) {
3656       sap->next = salp_list[i].align;
3657       sap = sap->next;
3658   }
3659   sap->next = NULL;
3660   MemFree (salp_list);
3661   return salp;
3662 }
3663 
SeqAlignSortByScore(SeqAlignPtr salp)3664 NLM_EXTERN SeqAlignPtr SeqAlignSortByScore(SeqAlignPtr salp)
3665 {
3666   SeqAlignPtr sap;
3667   AlignInfoPtr salp_list;
3668   Int4 n,i;
3669 
3670   if(salp==NULL || salp->next==NULL)
3671       return salp;
3672 
3673   n=1;
3674   sap = salp->next;
3675   while (sap != NULL)
3676   {
3677       n++;
3678       sap = sap->next;
3679   }
3680   salp_list = Calloc(n, sizeof (AlignInfo));
3681   {
3682       Int4 score,number;
3683       Nlm_FloatHi bit_score,evalue;
3684       for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3685           salp_list[i].align=sap;
3686           GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
3687           salp_list[i].align_len = score;
3688       }
3689   }
3690   HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3691   sap = salp = salp_list[0].align;
3692   for (i = 1; i < n; i++) {
3693       sap->next = salp_list[i].align;
3694       sap = sap->next;
3695   }
3696   sap->next = NULL;
3697   MemFree (salp_list);
3698   return salp;
3699 }
3700 
3701 
SeqAlignSortByEvalue(SeqAlignPtr salp)3702 NLM_EXTERN SeqAlignPtr SeqAlignSortByEvalue(SeqAlignPtr salp)
3703 {
3704   SeqAlignPtr sap;
3705   AlignInfoRealPtr salp_list;
3706   Int4 n,i;
3707 
3708   if(salp==NULL || salp->next==NULL)
3709       return salp;
3710 
3711   n=1;
3712   sap = salp->next;
3713   while (sap != NULL)
3714   {
3715       n++;
3716       sap = sap->next;
3717   }
3718   salp_list = Calloc(n, sizeof (AlignInfoReal));
3719   {
3720       Int4 score,number;
3721       Nlm_FloatHi bit_score,evalue;
3722       for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3723           salp_list[i].align=sap;
3724           GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
3725           salp_list[i].rvalue = evalue;
3726       }
3727   }
3728   HeapSort (salp_list, n, sizeof (AlignInfoReal), CompareAlignInfoRealProc);
3729   /* reverse order */
3730   sap = salp = salp_list[n-1].align;
3731   for (i = n-2; i >=0; i--) {
3732       sap->next = salp_list[i].align;
3733       sap = sap->next;
3734   }
3735   sap->next = NULL;
3736   MemFree (salp_list);
3737   return salp;
3738 }
3739 
SeqAlignSortByStart(SeqAlignPtr salp,Int2 order)3740 NLM_EXTERN SeqAlignPtr SeqAlignSortByStart(SeqAlignPtr salp,Int2 order)
3741 {
3742   SeqAlignPtr sap;
3743   AlignInfoPtr salp_list;
3744   Int4 n,i;
3745 
3746   if(salp==NULL || salp->next==NULL)
3747       return salp;
3748 
3749   n=1;
3750   sap = salp->next;
3751   while (sap != NULL)
3752   {
3753       n++;
3754       sap = sap->next;
3755   }
3756   salp_list = Calloc(n, sizeof (AlignInfo));
3757   {
3758       for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3759           salp_list[i].align=sap;
3760           salp_list[i].align_len = SeqAlignStart(sap,order);
3761       }
3762   }
3763   HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3764   /* reverse order */
3765   sap = salp = salp_list[n-1].align;
3766   for (i = n-2; i >=0; i--) {
3767       sap->next = salp_list[i].align;
3768       sap = sap->next;
3769   }
3770   sap->next = NULL;
3771   MemFree (salp_list);
3772   return salp;
3773 }
3774 
3775 
get_align_mid_point(SeqAlignPtr align,Int2 order)3776 static Int4 get_align_mid_point(SeqAlignPtr align, Int2 order)
3777 {
3778 	DenseSegPtr dsp;
3779 	Int4 start, stop;
3780 	Uint1 strand, val;
3781 	SeqIdPtr sip;
3782 
3783 	if(align->segtype == 2)
3784 	{
3785 		dsp = (DenseSegPtr) align->segs;
3786 		val = 0;
3787 		for(sip = dsp->ids; sip != NULL; sip = sip->next)
3788 		{
3789 			if(val == order)
3790 			{
3791 				SeqAlignStartStopById(align, sip, &start, &stop, &strand);
3792 				return (start + stop)/2;
3793 			}
3794 			++val;
3795 		}
3796 	}
3797 
3798 	return -1;
3799 }
SeqAlignSortByRegion(SeqAlignPtr salp,Int2 order)3800 NLM_EXTERN SeqAlignPtr SeqAlignSortByRegion(SeqAlignPtr salp,Int2 order)
3801 {
3802   SeqAlignPtr sap;
3803   AlignInfoPtr salp_list;
3804   Int4 n,i;
3805 
3806   if(salp==NULL || salp->next==NULL)
3807       return salp;
3808 
3809   n=1;
3810   sap = salp->next;
3811   while (sap != NULL)
3812   {
3813       n++;
3814       sap = sap->next;
3815   }
3816   salp_list = Calloc(n, sizeof (AlignInfo));
3817   for(sap=salp,i=0;i<n;i++,sap=sap->next) {
3818       salp_list[i].align=sap;
3819       salp_list[i].align_len = get_align_mid_point(sap, order);
3820   }
3821   HeapSort (salp_list, n, sizeof (AlignInfo), CompareAlignInfoProc);
3822   /* reverse order */
3823   sap = salp = salp_list[n-1].align;
3824   for (i = n-2; i >=0; i--) {
3825       sap->next = salp_list[i].align;
3826       sap = sap->next;
3827   }
3828   sap->next = NULL;
3829   MemFree (salp_list);
3830   return salp;
3831 }
3832 
3833 
3834 
3835 
get_align_list_len(SeqAlignPtr align)3836 static Int4 get_align_list_len(SeqAlignPtr align)
3837 {
3838 	Int4 len = 0;
3839 
3840 	while(align)
3841 	{
3842 		len += SeqAlignLength(align);
3843 		align = align->next;
3844 	}
3845 
3846 	return len;
3847 }
3848 
3849 
save_output(SeqIdPtr sip,CharPtr f_name,SeqAlignPtr align)3850 static void save_output (SeqIdPtr sip, CharPtr f_name, SeqAlignPtr align)
3851 {
3852 	AsnIoPtr aip;
3853 	BioseqPtr bsp;
3854 	SeqEntryPtr sep;
3855 
3856 	bsp = BioseqFind(sip);
3857 	sep = SeqEntryFind(sip);
3858 
3859 	if(bsp == NULL || sep == NULL)
3860 		return;
3861 
3862 	bsp->hist = SeqHistNew();
3863 	bsp->hist->assembly = align;
3864 
3865 	aip = AsnIoOpen(f_name, "w");
3866 	SeqEntryAsnWrite(sep, aip, NULL);
3867 	AsnIoClose(aip);
3868 
3869 	bsp->hist->assembly = NULL;
3870 	SeqHistFree(bsp->hist);
3871 	bsp->hist = NULL;
3872 }
3873 
3874 
find_best_align(SeqAlignPtr align,Int4Ptr pmax_score)3875 NLM_EXTERN SeqAlignPtr find_best_align(SeqAlignPtr align, Int4Ptr pmax_score)
3876 {
3877         SeqAlignPtr curr, best_align;
3878         Int4 score, max_score = 0, number;
3879         Nlm_FloatHi bit_score, evalue;
3880 
3881         best_align = NULL;
3882         for(curr = align; curr != NULL; curr = curr->next)
3883         {
3884                 GetScoreAndEvalue(curr, &score, &bit_score, &evalue , &number);
3885                 if(score > max_score)
3886                 {
3887                         max_score = score;
3888                         best_align = curr;
3889                 }
3890         }
3891 
3892 
3893 	*pmax_score = max_score;
3894         return best_align;
3895 }
3896 
3897 
3898 
3899 /*flip the order of the first and the second sequences*/
SeqAlignSwapOrder(SeqAlignPtr align)3900 NLM_EXTERN void SeqAlignSwapOrder(SeqAlignPtr align)
3901 {
3902 	SeqAlignPtr curr;
3903 	DenseSegPtr dsp;
3904 	SeqIdPtr sip;
3905 	Int2 i;
3906 	Int4 temp;
3907 	Uint1 strand;
3908 
3909 	for(curr = align; curr != NULL; curr = curr->next)
3910 	{
3911 		SeqAlignReverse(curr, 1);
3912 		dsp = (DenseSegPtr) curr->segs;
3913 
3914 		/*switching the Seq-ids*/
3915 		sip = dsp->ids->next;
3916 		dsp->ids->next = NULL;
3917 		sip->next = dsp->ids;
3918 		dsp->ids = sip;
3919 
3920 		for(i = 0; i<dsp->numseg; ++i)
3921 		{
3922 			temp = dsp->starts[2*i];
3923 			dsp->starts[2*i] = dsp->starts[2*i+1];
3924 			dsp->starts[2*i+1] = temp;
3925 
3926 			strand = dsp->strands[2*i];
3927 			dsp->strands[2*i] = dsp->strands[2*i+1];
3928 			dsp->strands[2*i+1] = strand;
3929 		}
3930 
3931 	}
3932         return;
3933 }
3934 
3935 
3936 
reverse_seqloc(SeqLocPtr s_loc)3937 static void reverse_seqloc (SeqLocPtr s_loc)
3938 {
3939 	SeqLocPtr slp, t_slp, n_slp;
3940 	SeqIntPtr sint;
3941 	Uint1 strand;
3942 
3943 	if(s_loc == NULL || s_loc->choice != SEQLOC_MIX)
3944 		return;
3945 	slp = NULL;
3946 	t_slp = NULL;
3947 	while((slp = SeqLocFindNext(s_loc, slp)) != NULL)
3948 	{
3949 		if(slp->choice == SEQLOC_INT)
3950 		{
3951 			sint = (SeqIntPtr) slp->data.ptrvalue;
3952 			strand = sint->strand;
3953 			n_slp = SeqLocIntNew(sint->from, sint->to, 3-strand, sint->id);
3954 			if(t_slp == NULL)
3955 				t_slp = n_slp;
3956 			else
3957 			{
3958 				n_slp->next = t_slp;
3959 				t_slp = n_slp;
3960 			}
3961 		}
3962 	}
3963 	SeqLocSetFree((SeqLocPtr) s_loc->data.ptrvalue);
3964 	s_loc->data.ptrvalue = t_slp;
3965 }
3966 
3967 
create_fake_bioseq(SeqLocPtr seq_loc,CharPtr fake_name)3968 static BioseqPtr create_fake_bioseq(SeqLocPtr seq_loc, CharPtr fake_name)
3969 {
3970 
3971 	BioseqPtr bsp;
3972 	SeqPortPtr spp;
3973 	ByteStorePtr b_store;
3974 	Uint1 residue;
3975 	Uint1 code;
3976 	SeqLocPtr slp;
3977 	Int4 length;
3978 
3979 	code = Seq_code_iupacna;
3980 	length = SeqLocLen(seq_loc);
3981 	b_store = BSNew(length + 2);
3982 	BSSeek(b_store, 0, SEEK_SET);
3983 
3984 	slp = NULL;
3985 	length = 0;
3986 	while((slp = SeqLocFindNext(seq_loc, slp)) != NULL)
3987 	{
3988 		if(slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE)
3989 		{
3990 			spp = SeqPortNewByLoc(slp, code);
3991 			residue = SeqPortGetResidue(spp);
3992 			while(residue != SEQPORT_EOF)
3993 			{
3994 				if(IS_ALPHA(residue))
3995 					BSPutByte(b_store, (Int2)residue);
3996 				residue = SeqPortGetResidue(spp);
3997 			}
3998 			SeqPortFree(spp);
3999 			length += SeqLocLen(slp);
4000 		}
4001 	}
4002 
4003 	bsp = BioseqNew();
4004 	bsp->id = local_id_make(fake_name);
4005 	bsp->seq_data = (SeqDataPtr) b_store;
4006 	bsp->repr = Seq_repr_raw;
4007 	bsp->mol = Seq_mol_dna;
4008 	bsp->length = length;
4009 	bsp->topology = 1;
4010 	bsp->seq_data_type = code;
4011 	return bsp;
4012 
4013 
4014 }
4015 
dup_mul_locs(SeqLocPtr slp)4016 static SeqLocPtr dup_mul_locs(SeqLocPtr slp)
4017 {
4018 	Int4 start, stop;
4019 	Uint1 strand;
4020 	SeqLocPtr t_slp, h_slp;
4021 	SeqLocPtr curr;
4022 
4023 	curr = NULL;
4024 	h_slp = NULL;
4025 	while((curr = SeqLocFindNext(slp, curr)) != NULL)
4026 	{
4027 		start = SeqLocStart(curr);
4028 		stop = SeqLocStop(curr);
4029 		strand = SeqLocStrand(curr);
4030 		t_slp = SeqLocIntNew(start, stop, strand, SeqLocId(curr));
4031 		ValNodeLink(&h_slp, t_slp);
4032 	}
4033 
4034 	if(h_slp->next == NULL)
4035 		return h_slp;
4036 	else
4037 	{
4038 		t_slp = ValNodeNew(NULL);
4039 		t_slp->choice = SEQLOC_MIX;
4040 		t_slp->data.ptrvalue = h_slp;
4041 		return t_slp;
4042 	}
4043 }
4044 
4045 
second_seq_has_gap(BioseqPtr s_bsp)4046 static Boolean second_seq_has_gap(BioseqPtr s_bsp)
4047 {
4048 	SeqLocPtr slp;
4049 	SeqIdPtr sip;
4050 	BioseqPtr t_bsp;
4051 
4052 	for(slp = (SeqLocPtr) s_bsp->seq_ext; slp != NULL; slp = slp->next)
4053 	{
4054 		if(slp->choice != SEQLOC_NULL)
4055 		{
4056 			sip = SeqLocId(slp);
4057 			if(sip != NULL)
4058 			{
4059 				t_bsp = BioseqFind(sip);
4060 				if(t_bsp != NULL)
4061 				{
4062 					if(t_bsp->repr == Seq_repr_virtual)
4063 						return TRUE;
4064 				}
4065 			}
4066 		}
4067 	}
4068 
4069 	return FALSE;
4070 }
4071 
4072 
get_best_id(BioseqPtr bsp)4073 static SeqIdPtr get_best_id(BioseqPtr bsp)
4074 {
4075 	SeqIdPtr sip;
4076 
4077 	sip = SeqIdFindBest(bsp->id, 0);
4078 	if(sip == NULL || sip->choice != SEQID_GI)
4079 	{
4080 		for(sip = bsp->id; sip != NULL; sip = sip->next)
4081 		{
4082 			if(sip->choice == SEQID_LOCAL)
4083 				return sip;
4084 		}
4085 		return bsp->id;
4086 	}
4087 	else
4088 		return sip;
4089 }
4090 
4091 
4092 /*gap = unknown length is separated as separate segments. This
4093   is to make sure that create_fake_bioseq won't include those
4094   segments
4095  */
4096 
is_virtual_segment(SeqIdPtr sip)4097 static Boolean is_virtual_segment(SeqIdPtr sip)
4098 {
4099 	BioseqPtr bsp;
4100 	ObjectIdPtr oip;
4101 
4102 	bsp = BioseqFind(sip);
4103 	if(bsp != NULL)
4104 	{
4105 		if(bsp->repr == Seq_repr_virtual || bsp->repr == Seq_repr_map)
4106 			return TRUE;
4107 		else
4108 			return FALSE;
4109 	}
4110 	if(sip->choice == SEQID_LOCAL)
4111 	{
4112 		oip = (ObjectIdPtr) sip->data.ptrvalue;
4113 		return (oip->str && StringCmp(oip->str, "virtual") == 0);
4114 	}
4115 
4116 	return FALSE;
4117 }
4118 
4119 
build_slp_for_gcontig(BioseqPtr gcontig_bsp,SeqIdPtr gcontig_sip)4120 static SeqLocPtr build_slp_for_gcontig (BioseqPtr gcontig_bsp, SeqIdPtr gcontig_sip)
4121 {
4122 	SeqLocPtr slp, h_slp = NULL, t_slp;
4123 	Int4 start = 0, stop = -1;
4124 	SeqIdPtr sip;
4125 	Boolean update;
4126 
4127 
4128 	for(slp = (SeqLocPtr) gcontig_bsp->seq_ext; slp != NULL; slp = slp->next)
4129 	{
4130 		if(slp->choice != SEQLOC_NULL)
4131 		{
4132 			sip = SeqLocId(slp);
4133 			/* if(sip->choice != SEQID_GI) */
4134 			if(sip != NULL)
4135 			{
4136 				update = FALSE;
4137 				if(is_virtual_segment(sip))
4138 				{
4139 					t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
4140 					ValNodeLink(&h_slp, t_slp);
4141 					update = TRUE;
4142 				}
4143 				stop += SeqLocLen(slp);
4144 				if(update)
4145 					start = stop + 1;
4146 
4147 			}
4148 		}
4149 		else
4150 		{
4151 			if(start < stop)
4152 			{
4153 				t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
4154 				ValNodeLink(&h_slp, t_slp);
4155 				start = stop + 1;
4156 			}
4157 		}
4158 	}
4159 
4160 	if(start < stop)
4161 	{
4162 		t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, gcontig_sip);
4163 		ValNodeLink(&h_slp, t_slp);
4164 	}
4165 
4166 	if(h_slp == NULL)
4167 		return NULL;
4168 
4169 	if(h_slp->next != NULL)
4170 	{
4171 		t_slp = ValNodeNew(NULL);
4172 		t_slp->choice = SEQLOC_MIX;
4173 		t_slp->data.ptrvalue = h_slp;
4174 		return t_slp;
4175 	}
4176 	else
4177 		return h_slp;
4178 }
4179 
4180 
4181 /* Reverses the strand of a Linked list of SeqAlignPtr. */
SeqAlignListReverseStrand(SeqAlignPtr salp)4182 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignListReverseStrand (SeqAlignPtr salp)
4183 {
4184   SeqAlignPtr salptmp;
4185   Int4Ptr     lenp;
4186   Int4Ptr     startp;
4187   Uint1Ptr    strandp;
4188   Int4        numseg;
4189   Int2        dim;
4190   Int4        j, k, n, tmp;
4191 
4192   for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
4193   {
4194      if (salptmp->segtype == 2)
4195      {
4196          DenseSegPtr dsp;
4197         dsp = (DenseSegPtr) salptmp->segs;
4198         strandp = dsp->strands;
4199         numseg = dsp->numseg;
4200         dim = dsp->dim;
4201         lenp = dsp->lens;
4202         startp = dsp->starts;
4203         for (j=0; j < numseg*dim && strandp!=NULL; j++, strandp++)
4204             {
4205                 if (*strandp == Seq_strand_minus)
4206                     *strandp = Seq_strand_plus;
4207                 else if (*strandp != Seq_strand_minus)
4208                     *strandp = Seq_strand_minus;
4209             }
4210         for (j=0, k=numseg-1; j<numseg/2; j++, k--) {
4211             tmp=lenp[j];
4212             lenp[j]=lenp[k];
4213             lenp[k]=tmp;
4214         }
4215         for (j=0, k=(dim*numseg-dim); j<(dim*numseg-1)/2; j+=dim, k-=dim) {
4216             for (n=0; n<dim; n++) {
4217                 tmp=startp[j+n];
4218                 startp[j+n]=startp[k+n];
4219                 startp[k+n]=tmp;
4220             }
4221         }
4222      } else if (salptmp->segtype ==1) {
4223          DenseDiagPtr ddp,ddp_next,ddp_prev;
4224 	 ddp_prev=NULL;
4225          ddp = (DenseDiagPtr) salptmp->segs;
4226          while(ddp!=NULL) {
4227              strandp = ddp->strands;
4228              dim = ddp->dim;
4229              for (j=0; j < dim && strandp!=NULL; j++, strandp++)
4230                  {
4231                      if (*strandp == Seq_strand_minus)
4232                          *strandp = Seq_strand_plus;
4233                      else if (*strandp != Seq_strand_minus)
4234                          *strandp = Seq_strand_minus;
4235                  }
4236              ddp_next = ddp->next;
4237 	     if(salptmp->type==3 || salptmp->type==1) {
4238                  ddp->next = ddp_prev;
4239                  ddp_prev =ddp;
4240 	     }
4241              ddp = ddp_next;
4242         }
4243         if(salptmp->type==3 || salptmp->type==1)
4244 	      salptmp->segs = ddp_prev;
4245      }
4246   }
4247   return salp;
4248 }
4249 
SeqAlignDup(SeqAlignPtr salp)4250 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignDup (SeqAlignPtr salp)
4251 {
4252   SeqAnnotPtr sap,
4253               sap2;
4254   SeqAlignPtr salp2 = NULL,
4255               next;
4256 
4257   next = salp->next;
4258   salp->next = NULL;
4259   sap = SeqAnnotForSeqAlign (salp);
4260   sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
4261   if (sap2!=NULL) {
4262      salp2 = (SeqAlignPtr)sap2->data;
4263      sap2->data = NULL;
4264      SeqAnnotFree (sap2);
4265   }
4266   if (next != NULL)
4267      salp->next = next;
4268   sap->data = NULL;
4269   SeqAnnotFree (sap);
4270   return salp2;
4271 }
4272 
SeqAlignListDup(SeqAlignPtr salp)4273 NLM_EXTERN SeqAlignPtr LIBCALL SeqAlignListDup (SeqAlignPtr salp)
4274 {
4275   SeqAnnotPtr sap,
4276               sap2;
4277   SeqAlignPtr salp2 = NULL;
4278   if (salp == NULL) return(NULL);
4279   sap = SeqAnnotForSeqAlign (salp);
4280   if (sap == NULL) return(NULL);
4281   sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
4282   if (sap2!=NULL) {
4283      salp2 = sap2->data;
4284      sap2->data = NULL;
4285      SeqAnnotFree (sap2);
4286   }
4287   sap->data = NULL;
4288   SeqAnnotFree (sap);
4289   return salp2;
4290 }
4291 
4292