1 /****************************************************************\
2 *                                                                *
3 *  C4 dynamic programming library - suboptimal alignments        *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include <stdlib.h> /* For qsort() */
17 
18 #include "subopt.h"
19 
SubOpt_create(gint query_length,gint target_length)20 SubOpt *SubOpt_create(gint query_length, gint target_length){
21     register SubOpt *subopt = g_new(SubOpt, 1);
22     subopt->ref_count = 1;
23     subopt->query_length = query_length;
24     subopt->target_length = target_length;
25     subopt->range_tree = RangeTree_create();
26     subopt->path_count = 0;
27     return subopt;
28     }
29 
SubOpt_share(SubOpt * subopt)30 SubOpt *SubOpt_share(SubOpt *subopt){
31     subopt->ref_count++;
32     return subopt;
33     }
34 
SubOpt_destroy(SubOpt * subopt)35 void SubOpt_destroy(SubOpt *subopt){
36     if(--subopt->ref_count)
37         return;
38     RangeTree_destroy(subopt->range_tree, NULL, NULL);
39     g_free(subopt);
40     return;
41     }
42 
43 /* Greatest Common Divisor : Euclid's algorithm */
SubOpt_get_gcd(gint a,gint b)44 static gint SubOpt_get_gcd(gint a, gint b){
45     register gint t;
46     g_assert(a > 0);
47     g_assert(b > 0);
48     while(a > 0){
49         if(a < b){
50             t = a;
51             a = b;
52             b = t;
53             }
54         a -= b;
55         }
56     return b;
57     }
58 
SubOpt_check_pos(SubOpt * subopt,gint query_pos,gint target_pos)59 static gboolean SubOpt_check_pos(SubOpt *subopt,
60                                  gint query_pos, gint target_pos){
61     return RangeTree_check_pos(subopt->range_tree,
62                                query_pos, target_pos);
63     }
64 
SubOpt_add_AlignmentOperation(SubOpt * subopt,AlignmentOperation * ao,gint query_pos,gint target_pos)65 static void SubOpt_add_AlignmentOperation(SubOpt *subopt,
66               AlignmentOperation *ao, gint query_pos, gint target_pos){
67     register gint i;
68     register gint gcd = SubOpt_get_gcd(ao->transition->advance_query,
69                                        ao->transition->advance_target);
70     register gint q_move = ao->transition->advance_query / gcd,
71                   t_move = ao->transition->advance_target / gcd;
72     register gint q_limit = query_pos, t_limit = target_pos;
73     register gint qp, tp;
74     /* Add operation */
75     for(i = 0; i < ao->length; i++){
76         /* Block positions in transition */
77         qp = q_limit;
78         tp = t_limit;
79         q_limit += ao->transition->advance_query;
80         t_limit += ao->transition->advance_target;
81         while(qp < q_limit){
82             /* We need to ensure that the position is unused,
83              * otherwise lead-out positions could clash with lead-in
84              * positions from a previous alignment.
85              */
86             if(!SubOpt_check_pos(subopt,
87                      /* qp + ao->transition->advance_query,
88                         tp + ao->transition->advance_target)){ */
89                         qp, tp)){
90                 RangeTree_add(subopt->range_tree,
91                     /* qp + ao->transition->advance_query,
92                     tp + ao->transition->advance_target, */
93                     qp, tp,
94                     GINT_TO_POINTER(subopt->path_count));
95                 }
96             qp += q_move;
97             tp += t_move;
98             }
99         }
100     /* Block leading in positions */
101     qp = query_pos - ao->transition->advance_query + q_move;
102     tp = target_pos - ao->transition->advance_target + t_move;
103     while(qp < query_pos){
104         /* Only insert if not already present */
105         if(!SubOpt_check_pos(subopt,
106                     /*
107                          qp + ao->transition->advance_query,
108                          tp + ao->transition->advance_target)){
109                      */
110                              qp, tp)){
111             /* FIXME: */
112             if((qp >= 0) && (tp >= 0))
113             RangeTree_add(subopt->range_tree,
114                     /* qp + ao->transition->advance_query,
115                     tp + ao->transition->advance_target, */
116                     qp, tp,
117                     GINT_TO_POINTER(subopt->path_count));
118             }
119         qp += q_move;
120         tp += t_move;
121         }
122     return;
123     }
124 /* FIXME: tidy */
125 
SubOpt_add_alignment(SubOpt * subopt,Alignment * alignment)126 void SubOpt_add_alignment(SubOpt *subopt, Alignment *alignment){
127     register gint i;
128     register gint query_pos, target_pos;
129     register AlignmentOperation *ao;
130     query_pos = alignment->region->query_start;
131     target_pos = alignment->region->target_start;
132     for(i = 0; i < alignment->operation_list->len; i++){
133         ao = alignment->operation_list->pdata[i];
134         if(C4_Transition_is_match(ao->transition)){
135             SubOpt_add_AlignmentOperation(subopt, ao,
136                                           query_pos, target_pos);
137             }
138         query_pos += (ao->transition->advance_query * ao->length);
139         target_pos += (ao->transition->advance_target * ao->length);
140         }
141     subopt->path_count++;
142     return;
143     }
144 
145 /**/
146 
147 typedef struct {
148     gpointer user_data;
149     SubOpt_FindFunc find_func;
150 } SubOpt_FindData;
151 
SubOpt_RangeTree_find(gint x,gint y,gpointer info,gpointer user_data)152 static gboolean SubOpt_RangeTree_find(gint x, gint y,
153                                  gpointer info, gpointer user_data){
154     register SubOpt_FindData *sufd = user_data;
155     register gint path_id = GPOINTER_TO_INT(info);
156     if(sufd->find_func(x, y, path_id, sufd->user_data))
157         return TRUE;
158     return FALSE;
159     }
160 
SubOpt_overlap_find_func(gint query_pos,gint target_pos,gint path_id,gpointer user_data)161 static gboolean SubOpt_overlap_find_func(gint query_pos, gint target_pos,
162                                          gint path_id, gpointer user_data){
163     return TRUE;
164     }
165 
SubOpt_find(SubOpt * subopt,Region * region,SubOpt_FindFunc find_func,gpointer user_data)166 gboolean SubOpt_find(SubOpt *subopt, Region *region,
167                      SubOpt_FindFunc find_func, gpointer user_data){
168     SubOpt_FindData sufd;
169     sufd.user_data = user_data;
170     sufd.find_func = find_func;
171     return RangeTree_find(subopt->range_tree,
172             region->query_start, region->query_length,
173             region->target_start, region->target_length,
174             SubOpt_RangeTree_find, &sufd);
175     }
176 
SubOpt_overlaps_alignment(SubOpt * subopt,Alignment * alignment)177 gboolean SubOpt_overlaps_alignment(SubOpt *subopt, Alignment *alignment){
178     register gint i, j;
179     register AlignmentOperation *ao;
180     Region region;
181     register gint qp = alignment->region->query_start,
182                   tp = alignment->region->target_start;
183     for(i = 0; i < alignment->operation_list->len; i++){
184         ao = alignment->operation_list->pdata[i];
185         if(C4_Transition_is_match(ao->transition)){
186             for(j = 0; j < ao->length; j++){
187                 region.query_start = qp;
188                 region.target_start = tp;
189                 region.query_length = ao->transition->advance_query;
190                 region.target_length = ao->transition->advance_target;
191                 if(SubOpt_find(subopt, &region,
192                                SubOpt_overlap_find_func, NULL))
193                     return TRUE;
194                 qp += ao->transition->advance_query;
195                 tp += ao->transition->advance_target;
196                 }
197         } else {
198             qp += (ao->transition->advance_query * ao->length);
199             tp += (ao->transition->advance_target * ao->length);
200             }
201         }
202     return FALSE;
203     }
204 
205 /**/
206 
SubOpt_Index_Row_create(gint target_pos)207 static SubOpt_Index_Row *SubOpt_Index_Row_create(gint target_pos){
208     register SubOpt_Index_Row *soir = g_new(SubOpt_Index_Row, 1);
209     soir->target_pos = target_pos;
210     soir->total = 0;
211     soir->query_pos = NULL;
212     return soir;
213     }
214 
SubOpt_Index_Row_destroy(SubOpt_Index_Row * soir)215 static void SubOpt_Index_Row_destroy(SubOpt_Index_Row *soir){
216     if(soir->query_pos)
217         g_free(soir->query_pos);
218     g_free(soir);
219     return;
220     }
221 
222 /**/
223 
224 typedef struct {
225     gint query_pos;
226     gint target_pos;
227 } SubOpt_Point;
228 
SubOpt_RangeTree_traverse(gint x,gint y,gpointer info,gpointer user_data)229 static gboolean SubOpt_RangeTree_traverse(gint x, gint y, gpointer info,
230                                           gpointer user_data){
231     register GPtrArray *point_list = user_data;
232     register SubOpt_Point *sop = g_new(SubOpt_Point, 1);
233     sop->query_pos = x;
234     sop->target_pos = y;
235     g_ptr_array_add(point_list, sop);
236     return FALSE;
237     }
238 
SubOpt_sort_by_target_then_query_pos(const void * a,const void * b)239 static int SubOpt_sort_by_target_then_query_pos(const void *a,
240                                                 const void *b){
241     register SubOpt_Point **point_a = (SubOpt_Point**)a,
242                           **point_b = (SubOpt_Point**)b;
243     register gint target_diff = (*point_a)->target_pos
244                               - (*point_b)->target_pos;
245     if(!target_diff)
246         return (*point_a)->query_pos - (*point_b)->query_pos;
247     return target_diff;
248     }
249 
SubOpt_Index_create(SubOpt * subopt,Region * region)250 SubOpt_Index *SubOpt_Index_create(SubOpt *subopt, Region *region){
251     register SubOpt_Index *soi;
252     register GPtrArray *point_list = g_ptr_array_new();
253     register gint i, j, start;
254     register SubOpt_Point *point, *prev_point = NULL;
255     register SubOpt_Index_Row *soir = NULL;
256     /* Copy all points in region into point_list */
257     RangeTree_find(subopt->range_tree,
258                    region->query_start, region->query_length+1,
259                    region->target_start, region->target_length+1,
260                    SubOpt_RangeTree_traverse, point_list);
261     if(!point_list->len){
262         g_ptr_array_free(point_list, TRUE);
263         return NULL; /* Found no points in region */
264         }
265     /* Convert all points to region coordinates */
266     for(i = 0; i < point_list->len; i++){
267         point = point_list->pdata[i];
268         point->query_pos -= region->query_start;
269         point->target_pos -= region->target_start;
270         }
271     /* Sort points to correct order for DP */
272     qsort(point_list->pdata, point_list->len,
273           sizeof(gpointer), SubOpt_sort_by_target_then_query_pos);
274     soi = g_new(SubOpt_Index, 1);
275     soi->region = Region_share(region);
276     soi->row_list = g_ptr_array_new();
277     soi->curr_row_index = 0;
278     soi->curr_query_index = 0;
279     /* Set the row sizes */
280     for(i = 0; i < point_list->len; i++){
281         point = point_list->pdata[i];
282         if((!soir) || (soir->target_pos != point->target_pos)){
283             soir = SubOpt_Index_Row_create(point->target_pos);
284             soir->total = 1;
285             g_ptr_array_add(soi->row_list, soir);
286             soir->query_pos = GINT_TO_POINTER(i); /* << Hack */
287         } else {
288             g_assert(soir->target_pos == point->target_pos);
289             soir->total++;
290             }
291         }
292     /* Fill the rows */
293     for(i = 0; i < soi->row_list->len; i++){
294         soir = soi->row_list->pdata[i];
295         g_assert(soir->total);
296         start = GPOINTER_TO_INT(soir->query_pos); /* >> Hack */
297         soir->query_pos = g_new(gint, soir->total+1);
298         prev_point = NULL;
299         for(j = 0; j < soir->total; j++){
300             point = point_list->pdata[start+j];
301             soir->query_pos[j] = point->query_pos;
302             g_assert(soir->target_pos == point->target_pos);
303             g_assert((!prev_point) /* Ensure points are unique */
304                   || (prev_point->query_pos != point->query_pos));
305             prev_point = point;
306             }
307         /* Tag on dummy end position (query_length+1) */
308         soir->query_pos[soir->total] = subopt->query_length + 1;
309         }
310     /**/
311     for(i = 0; i < point_list->len; i++){
312         point = point_list->pdata[i];
313         g_free(point);
314         }
315     g_ptr_array_free(point_list, TRUE);
316     soi->curr_row = soi->blank_row;
317     soi->blank_row = SubOpt_Index_Row_create(subopt->target_length+1);
318     soi->blank_row->total = 1;
319     soi->blank_row->query_pos = g_new(gint, 1);
320     soi->blank_row->query_pos[0] = subopt->query_length+1;
321     g_ptr_array_add(soi->row_list, soi->blank_row);
322     return soi;
323     }
324 
SubOpt_Index_destroy(SubOpt_Index * soi)325 void SubOpt_Index_destroy(SubOpt_Index *soi){
326     register gint i;
327     register SubOpt_Index_Row *soir;
328     /* Free all rows (including final blank row) */
329     for(i = 0; i < soi->row_list->len; i++){
330         soir = soi->row_list->pdata[i];
331         SubOpt_Index_Row_destroy(soir);
332         }
333     g_ptr_array_free(soi->row_list, TRUE);
334     Region_destroy(soi->region);
335     g_free(soi);
336     return;
337     }
338 
SubOpt_Index_set_row(SubOpt_Index * soi,gint target_pos)339 void SubOpt_Index_set_row(SubOpt_Index *soi, gint target_pos){
340     register SubOpt_Index_Row *soir = NULL;
341     g_assert(target_pos >= 0);
342     if(!soi)
343         return;
344     /**/
345     soir = soi->row_list->pdata[soi->curr_row_index];
346     while(soir->target_pos < target_pos){
347         if(soi->curr_row_index < soi->row_list->len){
348             soi->curr_row_index++;
349             soir = soi->row_list->pdata[soi->curr_row_index];
350         } else {
351             soir = NULL;
352             break;
353             }
354         }
355     if(soir){
356         while(soir->target_pos > target_pos){
357             if(soi->curr_row_index > 0){
358                 soi->curr_row_index--;
359                 soir = soi->row_list->pdata[soi->curr_row_index];
360             } else {
361                 soir = NULL;
362                 break;
363                 }
364             }
365         if(soir && (soir->target_pos == target_pos)){
366             soi->curr_row = soir;
367         } else {
368             soi->curr_row = soi->blank_row;
369             }
370         }
371     /**/
372     soi->curr_query_index = 0;
373     return;
374     }
375 
SubOpt_Index_is_blocked(SubOpt_Index * soi,gint query_pos)376 gboolean SubOpt_Index_is_blocked(SubOpt_Index *soi, gint query_pos){
377     g_assert(query_pos >= 0);
378     while(soi->curr_row->query_pos[soi->curr_query_index] <
379             query_pos){
380         soi->curr_query_index++;
381         }
382     while(soi->curr_row->query_pos[soi->curr_query_index] > query_pos){
383         if(!soi->curr_query_index)
384             break;
385         soi->curr_query_index--;
386         }
387     return (soi->curr_row->query_pos[soi->curr_query_index]
388             == query_pos);
389     }
390 /* FIXME: optimisation: use fwd and rev only versions of this for sdp */
391 
392 
393 /**/
394 
395