1 static char const rcsid[] = "$Id: sim2.c,v 6.1 2003/05/30 17:25:38 coulouri Exp $";
2 
3 #include <stdio.h>
4 #include <ncbi.h>
5 #include <objseq.h>
6 #include <objloc.h>
7 #include <seqport.h>
8 #include <sequtil.h>
9 #include "simutil.h"
10 
11 
12 
13 /* SIM2 - calling routine to falign() */
14 
SIM2(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 k,SimPamPtr a_spp,ValNodePtr PNTR new_list,Uint1 output_type,FilterProc filter)15 SeqAlignPtr SIM2(SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 k, SimPamPtr a_spp, ValNodePtr PNTR new_list, Uint1 output_type, FilterProc filter)
16 {
17 	SeqAlignPtr sap = NULL, sapC=NULL, out_sap, out_sapC;
18 	Boolean both_strand, is_dna;
19 	CharPtr A, B, compB = NULL;
20 	Int4 w, r, o, e, m, g, h;
21 	FloatHi cut_off;
22 	Int4 Width = -1;
23 	SimPam sp, PNTR spp;
24 	Uint1 m_strand, s_strand;
25 
26 
27 	if(cslp1 == NULL || cslp2 == NULL)
28 	{
29 		Message(MSG_ERROR, "In sufficient input");
30 		return NULL;
31 	}
32 
33 	if(cslp1->choice != SEQLOC_INT || cslp2->choice !=SEQLOC_INT)
34 	{
35 		Message(MSG_ERROR, "Incorrect type of Seq-loc. Only take Seq-int");
36 		return NULL;
37 	}
38 	if(output_type < OUTPUT_ALIGN || output_type > OUTPUT_BOTH)
39 	{
40 		Message(MSG_OK, "Illegal output type");
41 		output_type = OUTPUT_ALIGN;
42 	}
43 
44 	if(new_list !=NULL)
45 		*new_list = NULL;
46 	else
47 	{
48 		if(output_type == OUTPUT_BOTH || output_type == OUTPUT_ENDS)
49 		{
50 			Message(MSG_ERROR, "No space to store the ends");
51 			return NULL;
52 		}
53 	}
54 
55 
56 	both_strand = check_strand_mol(cslp2, &is_dna);
57 	if(!is_dna)
58 	{
59 		Message(MSG_ERROR, "Sorry, Sim2 can only compare two DNA sequences");
60 		return NULL;
61 	}
62 	if(!both_strand)
63 		both_strand = check_strand_mol(cslp1, &is_dna);
64 	if(a_spp == NULL)
65 	{
66 		DefaultSimPam(&sp);
67 		spp = &sp;
68 	}
69 	else
70 		spp = a_spp;
71 	w = spp->word;
72 	r = spp->mismatch;
73 	o = spp->gap_open;
74 	e = spp->gap_ext;
75 	m = spp->mismatch_2;
76 	g = spp->gap_open_2;
77 	h = spp->gap_ext_2;
78 	cut_off = spp->cutoff;
79 
80 
81 	if(both_strand)
82 	{
83 		m_strand = SeqLocStrand(cslp1);
84 		Change_Loc_Strand(cslp1, Seq_strand_plus);
85 		s_strand = SeqLocStrand(cslp2);
86 		Change_Loc_Strand(cslp2, Seq_strand_plus);
87 	}
88 
89 	A= make_sim_seq(cslp1, TRUE, NULL);
90 	if(A == NULL)
91 	{
92 		if(both_strand)
93 		{
94 			Change_Loc_Strand(cslp2, s_strand);
95 			Change_Loc_Strand(cslp1, m_strand);
96 		}
97 		return NULL;
98 	}
99 	B = make_sim_seq(cslp2, TRUE, NULL);
100 	if(B == NULL)
101 	{
102 		if(both_strand)
103 		{
104 			Change_Loc_Strand(cslp2, s_strand);
105 			Change_Loc_Strand(cslp1, m_strand);
106 		}
107 
108 		if(A != NULL)
109 			MemFree(A);
110 		return NULL;
111 	}
112 	sap = NULL;
113 	sap = falign(cslp1, cslp2, A+1, B+1, w, k, r, o, e);
114 	out_sap = Region(sap, cslp1, cslp2, A+1, B+1, m, g, h, Width, new_list, output_type, filter);
115 	sap = free_align_list(sap);/* free sap */
116 
117 	if(both_strand)
118 	{
119 		Change_Loc_Strand(cslp2, (Uint1)2);
120 		B = make_sim_seq(cslp2, TRUE, B);
121 		sap = falign(cslp1, cslp2, A+1, B+1, w, k, r, o, e);
122 		out_sapC = Region(sap, cslp1, cslp2, A+1, B+1, m, g, h, Width, new_list, output_type, filter);
123 		sap = free_align_list(sap);	/*free sap */
124 
125 		/* concatenate out_sap and out_sapC */
126 		out_sap = link_align(out_sapC, out_sap);
127 	}
128 
129 	MemFree(A);
130 	MemFree(B);
131 	if(output_type  != OUTPUT_ALIGN)
132 		*new_list = get_top_K(*new_list, k);
133 	if(both_strand)
134 	{
135 		Change_Loc_Strand(cslp2, s_strand);
136 		Change_Loc_Strand(cslp1, m_strand);
137 	}
138 	return get_top_K_alignment(out_sap, k, cut_off);
139 }
140 
141 
142 /**********************************************************************
143 *
144 *       SIM2ALN(cslp1, cslp2, K)
145 *       Computes top K alignment beteeen cslp1 and cslp2, return the Seq-align
146 *       object only
147 *
148 ***********************************************************************/
SIM2ALN(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 K)149 SeqAlignPtr SIM2ALN (SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 K)
150 {
151 	return SIM2(cslp1, cslp2, K, NULL, NULL, OUTPUT_ALIGN, NULL);
152 }
153 
154 
155 
156 
157 
158 /**********************************************************************
159 *
160 *       SIM2ENDS(cslp1, csp2, K, filter)
161 *       Computes top K alignment between cslp1, cslp2 and return the
162 *       a linked list (ValNodePtr) of RecordEnds
163 *       filter: filter based on percent identity. set to NULL when
164 *       it is not required
165 *
166 **********************************************************************/
SIM2ENDS(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 K,FilterProc filter)167 ValNodePtr SIM2ENDS (SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 K, FilterProc
168 filter)
169 {
170 
171 	ValNodePtr new_list;	/*the list for storin the ends*/
172 
173 	SIM2(cslp1, cslp2, K, NULL, &new_list, OUTPUT_ENDS, filter);
174 
175 	return new_list;
176 }
177 
178