1 /****************************************************************\
2 * *
3 * C4 dynamic programming library - suboptimal alignments *
4 * *
5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
6 * Copyright (C) 2000-2009. All Rights Reserved. *
7 * *
8 * This source code is distributed under the terms of the *
9 * GNU General Public License, version 3. See the file COPYING *
10 * or http://www.gnu.org/licenses/gpl.txt for details *
11 * *
12 * If you use this code, please keep this notice intact. *
13 * *
14 \****************************************************************/
15
16 #include <stdlib.h> /* For qsort() */
17
18 #include "subopt.h"
19
SubOpt_create(gint query_length,gint target_length)20 SubOpt *SubOpt_create(gint query_length, gint target_length){
21 register SubOpt *subopt = g_new(SubOpt, 1);
22 subopt->ref_count = 1;
23 subopt->query_length = query_length;
24 subopt->target_length = target_length;
25 subopt->range_tree = RangeTree_create();
26 subopt->path_count = 0;
27 return subopt;
28 }
29
SubOpt_share(SubOpt * subopt)30 SubOpt *SubOpt_share(SubOpt *subopt){
31 subopt->ref_count++;
32 return subopt;
33 }
34
SubOpt_destroy(SubOpt * subopt)35 void SubOpt_destroy(SubOpt *subopt){
36 if(--subopt->ref_count)
37 return;
38 RangeTree_destroy(subopt->range_tree, NULL, NULL);
39 g_free(subopt);
40 return;
41 }
42
43 /* Greatest Common Divisor : Euclid's algorithm */
SubOpt_get_gcd(gint a,gint b)44 static gint SubOpt_get_gcd(gint a, gint b){
45 register gint t;
46 g_assert(a > 0);
47 g_assert(b > 0);
48 while(a > 0){
49 if(a < b){
50 t = a;
51 a = b;
52 b = t;
53 }
54 a -= b;
55 }
56 return b;
57 }
58
SubOpt_check_pos(SubOpt * subopt,gint query_pos,gint target_pos)59 static gboolean SubOpt_check_pos(SubOpt *subopt,
60 gint query_pos, gint target_pos){
61 return RangeTree_check_pos(subopt->range_tree,
62 query_pos, target_pos);
63 }
64
SubOpt_add_AlignmentOperation(SubOpt * subopt,AlignmentOperation * ao,gint query_pos,gint target_pos)65 static void SubOpt_add_AlignmentOperation(SubOpt *subopt,
66 AlignmentOperation *ao, gint query_pos, gint target_pos){
67 register gint i;
68 register gint gcd = SubOpt_get_gcd(ao->transition->advance_query,
69 ao->transition->advance_target);
70 register gint q_move = ao->transition->advance_query / gcd,
71 t_move = ao->transition->advance_target / gcd;
72 register gint q_limit = query_pos, t_limit = target_pos;
73 register gint qp, tp;
74 /* Add operation */
75 for(i = 0; i < ao->length; i++){
76 /* Block positions in transition */
77 qp = q_limit;
78 tp = t_limit;
79 q_limit += ao->transition->advance_query;
80 t_limit += ao->transition->advance_target;
81 while(qp < q_limit){
82 /* We need to ensure that the position is unused,
83 * otherwise lead-out positions could clash with lead-in
84 * positions from a previous alignment.
85 */
86 if(!SubOpt_check_pos(subopt,
87 /* qp + ao->transition->advance_query,
88 tp + ao->transition->advance_target)){ */
89 qp, tp)){
90 RangeTree_add(subopt->range_tree,
91 /* qp + ao->transition->advance_query,
92 tp + ao->transition->advance_target, */
93 qp, tp,
94 GINT_TO_POINTER(subopt->path_count));
95 }
96 qp += q_move;
97 tp += t_move;
98 }
99 }
100 /* Block leading in positions */
101 qp = query_pos - ao->transition->advance_query + q_move;
102 tp = target_pos - ao->transition->advance_target + t_move;
103 while(qp < query_pos){
104 /* Only insert if not already present */
105 if(!SubOpt_check_pos(subopt,
106 /*
107 qp + ao->transition->advance_query,
108 tp + ao->transition->advance_target)){
109 */
110 qp, tp)){
111 /* FIXME: */
112 if((qp >= 0) && (tp >= 0))
113 RangeTree_add(subopt->range_tree,
114 /* qp + ao->transition->advance_query,
115 tp + ao->transition->advance_target, */
116 qp, tp,
117 GINT_TO_POINTER(subopt->path_count));
118 }
119 qp += q_move;
120 tp += t_move;
121 }
122 return;
123 }
124 /* FIXME: tidy */
125
SubOpt_add_alignment(SubOpt * subopt,Alignment * alignment)126 void SubOpt_add_alignment(SubOpt *subopt, Alignment *alignment){
127 register gint i;
128 register gint query_pos, target_pos;
129 register AlignmentOperation *ao;
130 query_pos = alignment->region->query_start;
131 target_pos = alignment->region->target_start;
132 for(i = 0; i < alignment->operation_list->len; i++){
133 ao = alignment->operation_list->pdata[i];
134 if(C4_Transition_is_match(ao->transition)){
135 SubOpt_add_AlignmentOperation(subopt, ao,
136 query_pos, target_pos);
137 }
138 query_pos += (ao->transition->advance_query * ao->length);
139 target_pos += (ao->transition->advance_target * ao->length);
140 }
141 subopt->path_count++;
142 return;
143 }
144
145 /**/
146
147 typedef struct {
148 gpointer user_data;
149 SubOpt_FindFunc find_func;
150 } SubOpt_FindData;
151
SubOpt_RangeTree_find(gint x,gint y,gpointer info,gpointer user_data)152 static gboolean SubOpt_RangeTree_find(gint x, gint y,
153 gpointer info, gpointer user_data){
154 register SubOpt_FindData *sufd = user_data;
155 register gint path_id = GPOINTER_TO_INT(info);
156 if(sufd->find_func(x, y, path_id, sufd->user_data))
157 return TRUE;
158 return FALSE;
159 }
160
SubOpt_overlap_find_func(gint query_pos,gint target_pos,gint path_id,gpointer user_data)161 static gboolean SubOpt_overlap_find_func(gint query_pos, gint target_pos,
162 gint path_id, gpointer user_data){
163 return TRUE;
164 }
165
SubOpt_find(SubOpt * subopt,Region * region,SubOpt_FindFunc find_func,gpointer user_data)166 gboolean SubOpt_find(SubOpt *subopt, Region *region,
167 SubOpt_FindFunc find_func, gpointer user_data){
168 SubOpt_FindData sufd;
169 sufd.user_data = user_data;
170 sufd.find_func = find_func;
171 return RangeTree_find(subopt->range_tree,
172 region->query_start, region->query_length,
173 region->target_start, region->target_length,
174 SubOpt_RangeTree_find, &sufd);
175 }
176
SubOpt_overlaps_alignment(SubOpt * subopt,Alignment * alignment)177 gboolean SubOpt_overlaps_alignment(SubOpt *subopt, Alignment *alignment){
178 register gint i, j;
179 register AlignmentOperation *ao;
180 Region region;
181 register gint qp = alignment->region->query_start,
182 tp = alignment->region->target_start;
183 for(i = 0; i < alignment->operation_list->len; i++){
184 ao = alignment->operation_list->pdata[i];
185 if(C4_Transition_is_match(ao->transition)){
186 for(j = 0; j < ao->length; j++){
187 region.query_start = qp;
188 region.target_start = tp;
189 region.query_length = ao->transition->advance_query;
190 region.target_length = ao->transition->advance_target;
191 if(SubOpt_find(subopt, ®ion,
192 SubOpt_overlap_find_func, NULL))
193 return TRUE;
194 qp += ao->transition->advance_query;
195 tp += ao->transition->advance_target;
196 }
197 } else {
198 qp += (ao->transition->advance_query * ao->length);
199 tp += (ao->transition->advance_target * ao->length);
200 }
201 }
202 return FALSE;
203 }
204
205 /**/
206
SubOpt_Index_Row_create(gint target_pos)207 static SubOpt_Index_Row *SubOpt_Index_Row_create(gint target_pos){
208 register SubOpt_Index_Row *soir = g_new(SubOpt_Index_Row, 1);
209 soir->target_pos = target_pos;
210 soir->total = 0;
211 soir->query_pos = NULL;
212 return soir;
213 }
214
SubOpt_Index_Row_destroy(SubOpt_Index_Row * soir)215 static void SubOpt_Index_Row_destroy(SubOpt_Index_Row *soir){
216 if(soir->query_pos)
217 g_free(soir->query_pos);
218 g_free(soir);
219 return;
220 }
221
222 /**/
223
224 typedef struct {
225 gint query_pos;
226 gint target_pos;
227 } SubOpt_Point;
228
SubOpt_RangeTree_traverse(gint x,gint y,gpointer info,gpointer user_data)229 static gboolean SubOpt_RangeTree_traverse(gint x, gint y, gpointer info,
230 gpointer user_data){
231 register GPtrArray *point_list = user_data;
232 register SubOpt_Point *sop = g_new(SubOpt_Point, 1);
233 sop->query_pos = x;
234 sop->target_pos = y;
235 g_ptr_array_add(point_list, sop);
236 return FALSE;
237 }
238
SubOpt_sort_by_target_then_query_pos(const void * a,const void * b)239 static int SubOpt_sort_by_target_then_query_pos(const void *a,
240 const void *b){
241 register SubOpt_Point **point_a = (SubOpt_Point**)a,
242 **point_b = (SubOpt_Point**)b;
243 register gint target_diff = (*point_a)->target_pos
244 - (*point_b)->target_pos;
245 if(!target_diff)
246 return (*point_a)->query_pos - (*point_b)->query_pos;
247 return target_diff;
248 }
249
SubOpt_Index_create(SubOpt * subopt,Region * region)250 SubOpt_Index *SubOpt_Index_create(SubOpt *subopt, Region *region){
251 register SubOpt_Index *soi;
252 register GPtrArray *point_list = g_ptr_array_new();
253 register gint i, j, start;
254 register SubOpt_Point *point, *prev_point = NULL;
255 register SubOpt_Index_Row *soir = NULL;
256 /* Copy all points in region into point_list */
257 RangeTree_find(subopt->range_tree,
258 region->query_start, region->query_length+1,
259 region->target_start, region->target_length+1,
260 SubOpt_RangeTree_traverse, point_list);
261 if(!point_list->len){
262 g_ptr_array_free(point_list, TRUE);
263 return NULL; /* Found no points in region */
264 }
265 /* Convert all points to region coordinates */
266 for(i = 0; i < point_list->len; i++){
267 point = point_list->pdata[i];
268 point->query_pos -= region->query_start;
269 point->target_pos -= region->target_start;
270 }
271 /* Sort points to correct order for DP */
272 qsort(point_list->pdata, point_list->len,
273 sizeof(gpointer), SubOpt_sort_by_target_then_query_pos);
274 soi = g_new(SubOpt_Index, 1);
275 soi->region = Region_share(region);
276 soi->row_list = g_ptr_array_new();
277 soi->curr_row_index = 0;
278 soi->curr_query_index = 0;
279 /* Set the row sizes */
280 for(i = 0; i < point_list->len; i++){
281 point = point_list->pdata[i];
282 if((!soir) || (soir->target_pos != point->target_pos)){
283 soir = SubOpt_Index_Row_create(point->target_pos);
284 soir->total = 1;
285 g_ptr_array_add(soi->row_list, soir);
286 soir->query_pos = GINT_TO_POINTER(i); /* << Hack */
287 } else {
288 g_assert(soir->target_pos == point->target_pos);
289 soir->total++;
290 }
291 }
292 /* Fill the rows */
293 for(i = 0; i < soi->row_list->len; i++){
294 soir = soi->row_list->pdata[i];
295 g_assert(soir->total);
296 start = GPOINTER_TO_INT(soir->query_pos); /* >> Hack */
297 soir->query_pos = g_new(gint, soir->total+1);
298 prev_point = NULL;
299 for(j = 0; j < soir->total; j++){
300 point = point_list->pdata[start+j];
301 soir->query_pos[j] = point->query_pos;
302 g_assert(soir->target_pos == point->target_pos);
303 g_assert((!prev_point) /* Ensure points are unique */
304 || (prev_point->query_pos != point->query_pos));
305 prev_point = point;
306 }
307 /* Tag on dummy end position (query_length+1) */
308 soir->query_pos[soir->total] = subopt->query_length + 1;
309 }
310 /**/
311 for(i = 0; i < point_list->len; i++){
312 point = point_list->pdata[i];
313 g_free(point);
314 }
315 g_ptr_array_free(point_list, TRUE);
316 soi->curr_row = soi->blank_row;
317 soi->blank_row = SubOpt_Index_Row_create(subopt->target_length+1);
318 soi->blank_row->total = 1;
319 soi->blank_row->query_pos = g_new(gint, 1);
320 soi->blank_row->query_pos[0] = subopt->query_length+1;
321 g_ptr_array_add(soi->row_list, soi->blank_row);
322 return soi;
323 }
324
SubOpt_Index_destroy(SubOpt_Index * soi)325 void SubOpt_Index_destroy(SubOpt_Index *soi){
326 register gint i;
327 register SubOpt_Index_Row *soir;
328 /* Free all rows (including final blank row) */
329 for(i = 0; i < soi->row_list->len; i++){
330 soir = soi->row_list->pdata[i];
331 SubOpt_Index_Row_destroy(soir);
332 }
333 g_ptr_array_free(soi->row_list, TRUE);
334 Region_destroy(soi->region);
335 g_free(soi);
336 return;
337 }
338
SubOpt_Index_set_row(SubOpt_Index * soi,gint target_pos)339 void SubOpt_Index_set_row(SubOpt_Index *soi, gint target_pos){
340 register SubOpt_Index_Row *soir = NULL;
341 g_assert(target_pos >= 0);
342 if(!soi)
343 return;
344 /**/
345 soir = soi->row_list->pdata[soi->curr_row_index];
346 while(soir->target_pos < target_pos){
347 if(soi->curr_row_index < soi->row_list->len){
348 soi->curr_row_index++;
349 soir = soi->row_list->pdata[soi->curr_row_index];
350 } else {
351 soir = NULL;
352 break;
353 }
354 }
355 if(soir){
356 while(soir->target_pos > target_pos){
357 if(soi->curr_row_index > 0){
358 soi->curr_row_index--;
359 soir = soi->row_list->pdata[soi->curr_row_index];
360 } else {
361 soir = NULL;
362 break;
363 }
364 }
365 if(soir && (soir->target_pos == target_pos)){
366 soi->curr_row = soir;
367 } else {
368 soi->curr_row = soi->blank_row;
369 }
370 }
371 /**/
372 soi->curr_query_index = 0;
373 return;
374 }
375
SubOpt_Index_is_blocked(SubOpt_Index * soi,gint query_pos)376 gboolean SubOpt_Index_is_blocked(SubOpt_Index *soi, gint query_pos){
377 g_assert(query_pos >= 0);
378 while(soi->curr_row->query_pos[soi->curr_query_index] <
379 query_pos){
380 soi->curr_query_index++;
381 }
382 while(soi->curr_row->query_pos[soi->curr_query_index] > query_pos){
383 if(!soi->curr_query_index)
384 break;
385 soi->curr_query_index--;
386 }
387 return (soi->curr_row->query_pos[soi->curr_query_index]
388 == query_pos);
389 }
390 /* FIXME: optimisation: use fwd and rev only versions of this for sdp */
391
392
393 /**/
394
395