1 #include <mconsist.h>
2 #include <maputil.h>
3
4 /****************************************************************************
5 *
6 * get_anchor_coordinates(h_align, anchor_id, sip, x_anchor_pos, x_pos, num)
7 *
8 * load the positions of framework marker of the current sequence and its
9 * corresponding positions in the anchor map (normally the NCBI sequence) into
10 * two arays to extrapolate the positions for map integration
11 * h_align: the alignment of consistent markers (a.k.a good alignment)
12 * anchor_id: the Seq-id for the anchor map (normally, the NCBI sequence map)
13 * sip: the Seq-id for the current sequence
14 * x_anchor_pos: the array to store the positions on the anchor map
15 * x_pos: the array to store the positions on the current map. each
16 * member of x_anchor_pos and x_pos is a matching pair
17 * both arrays are ordered
18 * num: the total number of framework markers in the current map.
19 * This is also the size of x_anchor_pos and x_pos
20 *
21 * return TRUE for success and FALSE for fail
22 *
23 ****************************************************************************/
get_anchor_coordinates(SeqAlignPtr h_align,SeqIdPtr anchor_id,SeqIdPtr sip,Int4Ptr PNTR x_anchor_pos,Int4Ptr PNTR x_pos,Uint2Ptr num)24 Boolean get_anchor_coordinates(SeqAlignPtr h_align, SeqIdPtr anchor_id, SeqIdPtr sip, Int4Ptr PNTR x_anchor_pos, Int4Ptr PNTR x_pos, Uint2Ptr num)
25 {
26 StdSegPtr ssp;
27 SeqLocPtr slp;
28 SeqIdPtr id;
29 Int4 n, i, j;
30 Int4 a_pos, t_pos;
31 SeqAlignPtr align;
32 Int4Ptr anchor_pos, pos;
33 Boolean change;
34 Int4 temp;
35
36 *x_anchor_pos = NULL;
37 *x_pos = NULL;
38 *num = 0;
39
40 align = h_align;
41 if(align == NULL)
42 return FALSE;
43
44 /* count the number of the consistent markers for the
45 current sip */
46 n = 0;
47 while(align)
48 {
49 if(align->segtype == 3)
50 { /*Std-segs*/
51 ssp = align->segs;
52 for(slp = ssp->loc; slp != NULL; slp = slp->next)
53 {
54 id = SeqLocId(slp);
55 if(SeqIdForSameBioseq(id, sip))
56 {
57 ++n;
58 break;
59 }
60 }
61 }
62 align = align->next;
63 }
64
65 if(n == 0)
66 return FALSE;
67 anchor_pos = MemNew((size_t)n * sizeof(Int4));
68 pos = MemNew((size_t)n * sizeof(Int4));
69
70 /*load the data*/
71 n = 0;
72 align = h_align;
73 while(align)
74 {
75 if(align->segtype == 3)
76 {
77 ssp = align->segs;
78 a_pos = -1;
79 t_pos = -1;
80 for(slp = ssp->loc; slp != NULL; slp = slp->next)
81 {
82 id = SeqLocId(slp);
83 if(SeqIdForSameBioseq(id, sip))
84 t_pos = (SeqLocStart(slp) + SeqLocStop(slp))/2;
85 else if(SeqIdForSameBioseq(id, anchor_id))
86 a_pos = SeqLocStart(slp);
87 }
88
89 /*each time, load a matching pair*/
90 if(t_pos >= 0 && a_pos >=0)
91 {
92 anchor_pos[n] = a_pos;
93 pos[n] = t_pos;
94 ++n;
95 }
96 }
97 align = align->next;
98 }
99
100 /*sort the array*/
101 change = TRUE;
102 for(i = 0; i<n-1 && change == TRUE; ++i)
103 {
104 change = FALSE;
105 for(j = 0; j<n-i-1; ++j)
106 {
107 if(anchor_pos[j] > anchor_pos[j+1])
108 {
109 change = TRUE;
110 temp = anchor_pos[j+1];
111 anchor_pos[j+1] = anchor_pos[j];
112 anchor_pos[j] = temp;
113
114 temp = pos[j+1];
115 pos[j+1] = pos[j];
116 pos[j] = temp;
117 }
118 }
119 }
120
121 /*check the linear relationship between anchor_pos and pos*/
122 temp = pos[0];
123 for(i = 1, j= 1; i<n; ++i)
124 {
125 if(pos[i] >= temp)
126 {
127 temp = pos[i];
128 pos[j] = temp;
129 anchor_pos[j] = anchor_pos[i];
130 ++j;
131 }
132 }
133 *x_anchor_pos = anchor_pos;
134 *x_pos = pos;
135 *num = (Uint2)j;
136 return TRUE;
137 }
138
139
140 /****************************************************************************
141 *
142 * find_this_position_by_anchor(anchor_pos, pos, num, this_pos, length)
143 * map the position on the current map to the anchor map (sequence map)
144 * anchor_pos, pos: the two arrays of matching positions on the anchor mark
145 * and the current map for the consistent markers
146 * num: number of the consistent markers on the current map. (same as the
147 * size of the anchor marker)
148 * this_pos: the position on the current map
149 * length: the length of the anchor map
150 *
151 * return the position mapped to the anchor marker
152 *
153 *****************************************************************************/
find_this_position_by_anchor(Int4Ptr anchor_pos,Int4Ptr pos,Uint2 num,Int4 this_pos,Int4 length)154 Int4 find_this_position_by_anchor (Int4Ptr anchor_pos, Int4Ptr pos, Uint2 num, Int4 this_pos, Int4 length)
155 {
156 Uint2 i;
157 Int4 prev, curr;
158 Int4 a_prev, a_curr;
159 FloatHi val;
160
161 /*assumes that there are 0-0 mapping at the start*/
162 prev = 0;
163 a_prev = 0;
164 for(i = 0; i<num; ++i)
165 {
166 curr = pos[i];
167 a_curr = anchor_pos[i];
168 if(this_pos == prev)
169 return a_prev;
170 if(this_pos == curr)
171 return a_curr;
172 if(this_pos >prev && this_pos <curr)
173 {
174 if(a_curr == a_prev)
175 return a_curr;
176 val = (FloatHi)(a_curr - a_prev) * (FloatHi)(this_pos - prev) / (FloatHi)(curr - prev);
177 return (a_prev + (Int4)val);
178 }
179 prev = curr;
180 a_prev = a_curr;
181 }
182
183 curr = length-1;
184 a_curr = length-1;
185 val = (FloatHi)(a_curr - a_prev) * (FloatHi)(this_pos - prev) / (FloatHi)(curr - prev);
186 return (a_prev + (Int4)val);
187 }
188
189
find_flanking_anchor_pos(Int4Ptr anchor_pos,Uint2 num,Int4 c_pos,Int4 length,Int4Ptr f_left,Int4Ptr f_right)190 Boolean find_flanking_anchor_pos(Int4Ptr anchor_pos, Uint2 num, Int4 c_pos, Int4 length, Int4Ptr f_left, Int4Ptr f_right)
191 {
192 Uint2 i;
193 Int4 prev;
194
195 prev = 0;
196 for(i = 0; i<num; ++i)
197 {
198 if(anchor_pos[i] == c_pos)
199 {
200 *f_left = anchor_pos[i];
201 *f_right = anchor_pos[i];
202 return TRUE;
203 }
204 if(anchor_pos[i] > c_pos)
205 {
206 *f_left = prev;
207 *f_right = anchor_pos[i];
208 return TRUE;
209 }
210
211 prev = anchor_pos[i];
212 }
213
214 *f_left = prev;
215 *f_right = length-1;
216 return TRUE;
217 }
218
219
220
221 /**********************************************************************
222 *
223 * MapLocToAnchor(annot, slp, anchor)
224 * map the current slp to a position on the anchor Bioseq
225 * annot: Seq-annot that may contain the alignment of the consistent markers
226 * slp: the current Bioseq
227 * anchor_id: the Seq-id for the anchor Bioseq, that is the sequece map
228 *
229 ************************************************************************/
MapLocToAnchor(SeqAnnotPtr annot,SeqLocPtr slp,BioseqPtr anchor_bsp)230 SeqLocPtr MapLocToAnchor(SeqAnnotPtr annot, SeqLocPtr slp, BioseqPtr anchor_bsp)
231 {
232 Int2 type;
233 Int4Ptr x_a, x;
234 Uint2 num;
235 SeqIdPtr anchor_id;
236 SeqLocPtr t_slp;
237 Int4 start, stop;
238 SeqAlignPtr align;
239
240
241 if(annot == NULL || slp == NULL || anchor_bsp == NULL)
242 return NULL;
243 if(slp->choice != SEQLOC_PNT && slp->choice != SEQLOC_INT)
244 return NULL;
245 anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
246 if(anchor_id == NULL)
247 anchor_id = anchor_bsp->id;
248
249 while(annot)
250 {
251 if(annot->type == 2)
252 {
253 type = GetEquivAlignType(annot);
254 if(type == 1) /*this is consistent*/
255 {
256 align = annot->data;
257 if(!get_anchor_coordinates(align, anchor_id, SeqLocId(slp), &x_a, &x, &num))
258 return NULL;
259 if(slp->choice == SEQLOC_INT)
260 {
261 start = find_this_position_by_anchor (x_a, x, num, SeqLocStart(slp), anchor_bsp->length);
262 if(SeqLocStart(slp) != SeqLocStop(slp))
263 stop = find_this_position_by_anchor (x_a, x, num, SeqLocStop(slp), anchor_bsp->length);
264 else
265 stop = start;
266 t_slp = SeqLocIntNew(start, stop, Seq_strand_plus, anchor_id);
267 }
268 else
269 {
270 start = SeqLocStart(slp);
271 start = find_this_position_by_anchor (x_a, x, num, start, anchor_bsp->length);
272 t_slp = SeqLocPntNew(start, Seq_strand_plus, anchor_id, FALSE);
273 }
274 MemFree(x_a);
275 MemFree(x);
276 return t_slp;
277 }
278 }
279
280 annot = annot->next;
281 }
282
283 return NULL;
284 }
285
286
287 /*****************************************************************
288 *
289 * map a position on the anchor_bsp (anchor_pos) to a
290 * position on the other_bsp. It is the reverse operation of
291 * MapLocToAnchor
292 * return -1 for failure
293 *
294 ******************************************************************/
MapAnchorToLoc(SeqAnnotPtr annot,Int4 anchor_pos,BioseqPtr anchor_bsp,BioseqPtr other_bsp)295 Int4 MapAnchorToLoc(SeqAnnotPtr annot, Int4 anchor_pos, BioseqPtr anchor_bsp, BioseqPtr other_bsp)
296 {
297 Int2 type;
298 Int4Ptr x_a, x;
299 Uint2 num;
300 SeqIdPtr anchor_id, other_id;
301 SeqAlignPtr align;
302 Int4 other_pos;
303
304
305 if(annot == NULL || anchor_bsp == NULL || other_bsp == NULL)
306 return -1;
307 if(anchor_pos < 0 || anchor_pos > anchor_bsp->length-1)
308 return -1;
309
310 if(anchor_bsp == other_bsp)
311 return anchor_pos;
312
313 anchor_id = SeqIdFindBest(anchor_bsp->id, SEQID_GI);
314 if(anchor_id == NULL)
315 anchor_id = anchor_bsp->id;
316
317 other_id = SeqIdFindBest(other_bsp->id, SEQID_GI);
318 if(other_id == NULL)
319 other_id = other_bsp->id;
320
321 other_pos = -1;
322 while(annot)
323 {
324 if(annot->type == 2)
325 {
326 type = GetEquivAlignType(annot);
327 if(type == 1) /*this is consistent*/
328 {
329 align = annot->data;
330 if(get_anchor_coordinates(align, anchor_id, other_id, &x_a, &x, &num))
331 {
332 if(num >= 10)
333 other_pos = find_this_position_by_anchor (x, x_a, num, anchor_pos, other_bsp->length);
334 MemFree(x_a);
335 MemFree(x);
336 if(other_pos != -1)
337 return other_pos;
338 }
339 }
340 }
341
342 annot = annot->next;
343 }
344
345 return other_pos;
346 }
347
348
349
350