1 #include <mconsist.h>
2 #include <maputil.h>
3 
4 /****************************************************************************
5 *
6 *	get_anchor_coordinates(h_align, anchor_id, sip, x_anchor_pos, x_pos, num)
7 *
8 *	load the positions of framework marker of the current sequence and its
9 *	corresponding positions in the anchor map (normally the NCBI sequence) into
10 *	two arays to extrapolate the positions for map integration
11 *	h_align:	the alignment of consistent markers (a.k.a good alignment)
12 *	anchor_id:	the Seq-id for the anchor map (normally, the NCBI sequence map)
13 *	sip:		the Seq-id for the current sequence
14 *	x_anchor_pos: the array to store the positions on the anchor map
15 *	x_pos:		  the array to store the positions on the current map. each
16 *				  member of x_anchor_pos and x_pos is a matching pair
17 *				  both arrays are ordered
18 *	num:		  the total number of framework markers in the current map.
19 *				  This is also the size of x_anchor_pos and x_pos
20 *
21 *	return TRUE for success and FALSE for fail
22 *
23 ****************************************************************************/
get_anchor_coordinates(SeqAlignPtr h_align,SeqIdPtr anchor_id,SeqIdPtr sip,Int4Ptr PNTR x_anchor_pos,Int4Ptr PNTR x_pos,Uint2Ptr num)24 Boolean get_anchor_coordinates(SeqAlignPtr h_align, SeqIdPtr anchor_id, SeqIdPtr sip, Int4Ptr PNTR x_anchor_pos, Int4Ptr PNTR x_pos, Uint2Ptr num)
25 {
26 	StdSegPtr ssp;
27 	SeqLocPtr slp;
28 	SeqIdPtr id;
29 	Int4 n, i, j;
30 	Int4 a_pos, t_pos;
31 	SeqAlignPtr align;
32 	Int4Ptr anchor_pos, pos;
33 	Boolean change;
34 	Int4 temp;
35 
36 	*x_anchor_pos = NULL;
37 	*x_pos = NULL;
38 	*num = 0;
39 
40 	align = h_align;
41 	if(align == NULL)
42 		return FALSE;
43 
44 	/* count the number of the consistent markers for the
45 	current sip */
46 	n = 0;
47 	while(align)
48 	{
49 		if(align->segtype == 3)
50 		{	/*Std-segs*/
51 			ssp = align->segs;
52 			for(slp = ssp->loc; slp != NULL; slp = slp->next)
53 			{
54 				id = SeqLocId(slp);
55 				if(SeqIdForSameBioseq(id, sip))
56 				{
57 					++n;
58 					break;
59 				}
60 			}
61 		}
62 		align = align->next;
63 	}
64 
65 	if(n == 0)
66 		return FALSE;
67 	anchor_pos = MemNew((size_t)n * sizeof(Int4));
68 	pos = MemNew((size_t)n * sizeof(Int4));
69 
70 	/*load the data*/
71 	n = 0;
72 	align = h_align;
73 	while(align)
74 	{
75 		if(align->segtype == 3)
76 		{
77 			ssp = align->segs;
78 			a_pos = -1;
79 			t_pos = -1;
80 			for(slp = ssp->loc; slp != NULL; slp = slp->next)
81 			{
82 				id = SeqLocId(slp);
83 				if(SeqIdForSameBioseq(id, sip))
84 					t_pos = (SeqLocStart(slp) + SeqLocStop(slp))/2;
85 				else if(SeqIdForSameBioseq(id, anchor_id))
86 					a_pos = SeqLocStart(slp);
87 			}
88 
89 			/*each time, load a matching pair*/
90 			if(t_pos >= 0 && a_pos >=0)
91 			{
92 				anchor_pos[n] = a_pos;
93 				pos[n] = t_pos;
94 				++n;
95 			}
96 		}
97 		align = align->next;
98 	}
99 
100 	/*sort the array*/
101 	change = TRUE;
102 	for(i = 0; i<n-1 && change == TRUE; ++i)
103 	{
104 		change = FALSE;
105 		for(j = 0; j<n-i-1; ++j)
106 		{
107 			if(anchor_pos[j] > anchor_pos[j+1])
108 			{
109 				change = TRUE;
110 				temp = anchor_pos[j+1];
111 				anchor_pos[j+1] = anchor_pos[j];
112 				anchor_pos[j] = temp;
113 
114 				temp = pos[j+1];
115 				pos[j+1] = pos[j];
116 				pos[j] = temp;
117 			}
118 		}
119 	}
120 
121 	/*check the linear relationship between anchor_pos and pos*/
122 	temp = pos[0];
123 	for(i = 1, j= 1; i<n; ++i)
124 	{
125 		if(pos[i] >= temp)
126 		{
127 			temp = pos[i];
128 			pos[j] = temp;
129 			anchor_pos[j] = anchor_pos[i];
130 			++j;
131 		}
132 	}
133 	*x_anchor_pos = anchor_pos;
134 	*x_pos = pos;
135 	*num = (Uint2)j;
136 	return TRUE;
137 }
138 
139 
140 /****************************************************************************
141 *
142 *	find_this_position_by_anchor(anchor_pos, pos, num, this_pos, length)
143 *	map the position on the current map to the anchor map (sequence map)
144 *	anchor_pos, pos: the two arrays of matching positions on the anchor mark
145 *					 and the current map for the consistent markers
146 *	num:	number of the consistent markers on the current map. (same as the
147 *			size of the anchor marker)
148 *	this_pos:	the position on the current map
149 *	length:		the length of the anchor map
150 *
151 *	return the position mapped to the anchor marker
152 *
153 *****************************************************************************/
find_this_position_by_anchor(Int4Ptr anchor_pos,Int4Ptr pos,Uint2 num,Int4 this_pos,Int4 length)154 Int4 find_this_position_by_anchor (Int4Ptr anchor_pos, Int4Ptr pos, Uint2 num, Int4 this_pos, Int4 length)
155 {
156 	Uint2 i;
157 	Int4 prev, curr;
158 	Int4 a_prev, a_curr;
159 	FloatHi val;
160 
161 	/*assumes that there are 0-0 mapping at the start*/
162 	prev = 0;
163 	a_prev = 0;
164 	for(i = 0; i<num; ++i)
165 	{
166 		curr = pos[i];
167 		a_curr = anchor_pos[i];
168 		if(this_pos == prev)
169 			return a_prev;
170 		if(this_pos == curr)
171 			return a_curr;
172 		if(this_pos >prev && this_pos <curr)
173 		{
174 			if(a_curr == a_prev)
175 				return a_curr;
176 			val = (FloatHi)(a_curr - a_prev) * (FloatHi)(this_pos - prev) / (FloatHi)(curr - prev);
177 			return (a_prev + (Int4)val);
178 		}
179 		prev = curr;
180 		a_prev = a_curr;
181 	}
182 
183 	curr = length-1;
184 	a_curr = length-1;
185 	val = (FloatHi)(a_curr - a_prev) * (FloatHi)(this_pos - prev) / (FloatHi)(curr - prev);
186 	return (a_prev + (Int4)val);
187 }
188 
189 
find_flanking_anchor_pos(Int4Ptr anchor_pos,Uint2 num,Int4 c_pos,Int4 length,Int4Ptr f_left,Int4Ptr f_right)190 Boolean find_flanking_anchor_pos(Int4Ptr anchor_pos, Uint2 num, Int4 c_pos, Int4 length, Int4Ptr f_left, Int4Ptr f_right)
191 {
192 	Uint2 i;
193 	Int4 prev;
194 
195 	prev = 0;
196 	for(i = 0; i<num; ++i)
197 	{
198 		if(anchor_pos[i] == c_pos)
199 		{
200 			*f_left = anchor_pos[i];
201 			*f_right = anchor_pos[i];
202 			return TRUE;
203 		}
204 		if(anchor_pos[i] > c_pos)
205 		{
206 			*f_left = prev;
207 			*f_right = anchor_pos[i];
208 			return TRUE;
209 		}
210 
211 		prev = anchor_pos[i];
212 	}
213 
214 	*f_left = prev;
215 	*f_right = length-1;
216 	return TRUE;
217 }
218 
219 
220 
221 /**********************************************************************
222 *
223 *	MapLocToAnchor(annot, slp, anchor)
224 *	map the current slp to a position on the anchor Bioseq
225 *	annot: Seq-annot that may contain the alignment of the consistent markers
226 *	slp: the current Bioseq
227 *	anchor_id: the Seq-id for the anchor Bioseq, that is the sequece map
228 *
229 ************************************************************************/
MapLocToAnchor(SeqAnnotPtr annot,SeqLocPtr slp,BioseqPtr anchor_bsp)230 SeqLocPtr MapLocToAnchor(SeqAnnotPtr annot, SeqLocPtr slp, BioseqPtr anchor_bsp)
231 {
232 	Int2 type;
233 	Int4Ptr x_a, x;
234 	Uint2 num;
235 	SeqIdPtr anchor_id;
236 	SeqLocPtr t_slp;
237 	Int4 start, stop;
238 	SeqAlignPtr align;
239 
240 
241 	if(annot == NULL || slp == NULL || anchor_bsp == NULL)
242 		return NULL;
243 	if(slp->choice != SEQLOC_PNT && slp->choice != SEQLOC_INT)
244 		return NULL;
245 	anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
246 	if(anchor_id == NULL)
247 		anchor_id = anchor_bsp->id;
248 
249 	while(annot)
250 	{
251 		if(annot->type == 2)
252 		{
253 			type = GetEquivAlignType(annot);
254 			if(type == 1)	/*this is consistent*/
255 			{
256 				align = annot->data;
257 				if(!get_anchor_coordinates(align, anchor_id, SeqLocId(slp), &x_a, &x, &num))
258 					return NULL;
259 				if(slp->choice == SEQLOC_INT)
260 				{
261 					start = find_this_position_by_anchor (x_a, x, num, SeqLocStart(slp), anchor_bsp->length);
262 					if(SeqLocStart(slp) != SeqLocStop(slp))
263 						stop = find_this_position_by_anchor (x_a, x, num, SeqLocStop(slp), anchor_bsp->length);
264 					else
265 						stop = start;
266 					t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, anchor_id);
267 				}
268 				else
269 				{
270 					start = SeqLocStart(slp);
271 					start = find_this_position_by_anchor (x_a, x, num, start, anchor_bsp->length);
272 					t_slp = SeqLocPntNew(start, Seq_strand_plus, anchor_id, FALSE);
273 				}
274 				MemFree(x_a);
275 				MemFree(x);
276 				return t_slp;
277 			}
278 		}
279 
280 		annot = annot->next;
281 	}
282 
283 	return NULL;
284 }
285 
286 
287 /*****************************************************************
288 *
289 *	map a position on the anchor_bsp (anchor_pos) to a
290 *	position on the other_bsp. It is the reverse operation of
291 *	MapLocToAnchor
292 *	return -1 for failure
293 *
294 ******************************************************************/
MapAnchorToLoc(SeqAnnotPtr annot,Int4 anchor_pos,BioseqPtr anchor_bsp,BioseqPtr other_bsp)295 Int4 MapAnchorToLoc(SeqAnnotPtr annot, Int4 anchor_pos, BioseqPtr anchor_bsp, BioseqPtr other_bsp)
296 {
297 	Int2 type;
298 	Int4Ptr x_a, x;
299 	Uint2 num;
300 	SeqIdPtr anchor_id, other_id;
301 	SeqAlignPtr align;
302 	Int4 other_pos;
303 
304 
305 	if(annot == NULL || anchor_bsp == NULL || other_bsp == NULL)
306 		return -1;
307 	if(anchor_pos < 0 || anchor_pos > anchor_bsp->length-1)
308 		return -1;
309 
310 	if(anchor_bsp == other_bsp)
311 		return anchor_pos;
312 
313 	anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
314 	if(anchor_id == NULL)
315 		anchor_id = anchor_bsp->id;
316 
317 	other_id = SeqIdFindBest(other_bsp->id, SEQID_GI);
318 	if(other_id == NULL)
319 		other_id = other_bsp->id;
320 
321 	other_pos = -1;
322 	while(annot)
323 	{
324 		if(annot->type == 2)
325 		{
326 			type = GetEquivAlignType(annot);
327 			if(type == 1)	/*this is consistent*/
328 			{
329 				align = annot->data;
330 				if(get_anchor_coordinates(align, anchor_id, other_id, &x_a, &x, &num))
331 				{
332 					if(num >= 10)
333 						other_pos = find_this_position_by_anchor (x, x_a, num, anchor_pos, other_bsp->length);
334 					MemFree(x_a);
335 					MemFree(x);
336 					if(other_pos != -1)
337 						return other_pos;
338 				}
339 			}
340 		}
341 
342 		annot = annot->next;
343 	}
344 
345 	return other_pos;
346 }
347 
348 
349 
350