1 /****************************************************************\
2 *                                                                *
3 *  C4 dynamic programming library - Seeded Dynamic Programming   *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include <stdlib.h> /* For qsort() */
17 
18 #include "sdp.h"
19 #include "matrix.h"
20 
21 /**/
22 
SDP_ArgumentSet_create(Argument * arg)23 SDP_ArgumentSet *SDP_ArgumentSet_create(Argument *arg){
24     register ArgumentSet *as;
25     static SDP_ArgumentSet sas = {30};
26     if(arg){
27         as = ArgumentSet_create("Seeded Dynamic Programming options");
28         ArgumentSet_add_option(as, 'x', "extensionthreshold", NULL,
29                 "Gapped extension threshold", "50",
30                 Argument_parse_int, &sas.dropoff);
31         ArgumentSet_add_option(as, 0, "singlepass", NULL,
32                 "Generate suboptimal alignment in a single pass", "TRUE",
33                 Argument_parse_boolean, &sas.single_pass_subopt);
34         Argument_absorb_ArgumentSet(arg, as);
35         }
36     return &sas;
37     }
38 
39 /**/
40 
41 typedef struct {
42      GPtrArray *seed_list;
43           gint  seed_pos;
44           gint  seed_state_id;
45       SDP_Pair *sdp_pair;
46 } Scheduler_Seed_List;
47 
Scheduler_Seed_List_create(SDP_Pair * sdp_pair,gboolean is_forward)48 static Scheduler_Seed_List *Scheduler_Seed_List_create(SDP_Pair *sdp_pair,
49                                                        gboolean is_forward){
50     register Scheduler_Seed_List *seed_info_list
51         = g_new(Scheduler_Seed_List, 1);
52     register C4_Model *model = sdp_pair->sdp->model;
53     seed_info_list->seed_list = sdp_pair->seed_list;
54     seed_info_list->seed_pos = 0;
55     if(is_forward){
56         seed_info_list->seed_state_id = model->start_state->state->id;
57     } else {
58         seed_info_list->seed_state_id = model->end_state->state->id;
59         }
60     seed_info_list->sdp_pair = sdp_pair;
61     return seed_info_list;
62     }
63 
Scheduler_Seed_List_destroy(Scheduler_Seed_List * seed_info_list)64 static void Scheduler_Seed_List_destroy(
65             Scheduler_Seed_List *seed_info_list){
66     g_free(seed_info_list);
67     return;
68     }
69 
Scheduler_Seed_List_init_forward(gpointer seed_data)70 static void Scheduler_Seed_List_init_forward(gpointer seed_data){
71     register Scheduler_Seed_List *seed_info_list = seed_data;
72     g_assert(seed_info_list);
73     seed_info_list->seed_pos = 0;
74     return;
75     }
76 
Scheduler_Seed_List_init_reverse(gpointer seed_data)77 static void Scheduler_Seed_List_init_reverse(gpointer seed_data){
78     register Scheduler_Seed_List *seed_info_list = seed_data;
79     g_assert(seed_info_list);
80     seed_info_list->seed_pos = seed_info_list->seed_list->len - 1;
81     return;
82     }
83 
Scheduler_Seed_List_next_forward(gpointer seed_data)84 static void Scheduler_Seed_List_next_forward(gpointer seed_data){
85     register Scheduler_Seed_List *seed_info_list = seed_data;
86     g_assert(seed_info_list);
87     g_assert(seed_info_list->seed_pos < seed_info_list->seed_list->len);
88     seed_info_list->seed_pos++;
89     return;
90     }
91 
Scheduler_Seed_List_next_reverse(gpointer seed_data)92 static void Scheduler_Seed_List_next_reverse(gpointer seed_data){
93     register Scheduler_Seed_List *seed_info_list = seed_data;
94     g_assert(seed_info_list);
95     g_assert(seed_info_list->seed_pos >= 0);
96     seed_info_list->seed_pos--;
97     return;
98     }
99 
Scheduler_Seed_List_get_forward(gpointer seed_data,Scheduler_Seed * seed_info)100 static gboolean Scheduler_Seed_List_get_forward(gpointer seed_data,
101                                       Scheduler_Seed *seed_info){
102     register Scheduler_Seed_List *seed_info_list = seed_data;
103     register SDP_Seed *seed;
104     g_assert(seed_info_list);
105     if(seed_info_list->seed_pos >= seed_info_list->seed_list->len)
106         return FALSE;
107     seed = seed_info_list->seed_list->pdata[seed_info_list->seed_pos];
108     seed_info->query_pos = HSP_query_cobs(seed->hsp);
109     seed_info->target_pos = HSP_target_cobs(seed->hsp);
110     seed_info->seed_id = seed->seed_id;
111     seed_info->start_score = seed->max_start->score
112                           - (seed->hsp->score >> 1);
113     return TRUE;
114     }
115 
Scheduler_Seed_List_get_reverse(gpointer seed_data,Scheduler_Seed * seed_info)116 static gboolean Scheduler_Seed_List_get_reverse(gpointer seed_data,
117                                       Scheduler_Seed *seed_info){
118     register Scheduler_Seed_List *seed_info_list = seed_data;
119     register SDP_Seed *seed;
120     g_assert(seed_info_list);
121     if(seed_info_list->seed_pos < 0)
122         return FALSE;
123     seed = seed_info_list->seed_list->pdata[seed_info_list->seed_pos];
124     seed_info->query_pos = -HSP_query_cobs(seed->hsp);
125     seed_info->target_pos = -HSP_target_cobs(seed->hsp);
126     seed_info->seed_id = seed->seed_id;
127     seed_info->start_score = (seed->hsp->score >> 1);
128     return TRUE;
129     }
130 
Scheduler_Seed_List_start_func(gpointer seed_data,gint seed_id,C4_Score score,gint start_query_pos,gint start_target_pos,STraceback_Cell * stcell)131 static void Scheduler_Seed_List_start_func(gpointer seed_data,
132                                            gint seed_id, C4_Score score,
133                                            gint start_query_pos,
134                                            gint start_target_pos,
135                                            STraceback_Cell *stcell){
136     register Scheduler_Seed_List *seed_info_list = seed_data;
137     register SDP_Seed *seed;
138     g_assert(seed_info_list);
139     g_assert(seed_info_list->seed_list);
140     g_assert(seed_id < seed_info_list->sdp_pair->seed_list->len);
141     seed = seed_info_list->sdp_pair->seed_list->pdata[seed_id];
142     /* Is best start for this seed */
143     if(seed->max_start->score < score){
144         seed->max_start->score = score;
145         seed->max_start->query_pos = start_query_pos;
146         seed->max_start->target_pos = start_target_pos;
147         if(stcell)
148             seed->max_start->cell = STraceback_Cell_share(stcell);
149         else
150             seed->max_start->cell = NULL;
151         }
152     return;
153     }
154 
Scheduler_Seed_List_end_func(gpointer seed_data,gint seed_id,C4_Score score,gint end_query_pos,gint end_target_pos,STraceback_Cell * stcell)155 static void Scheduler_Seed_List_end_func(gpointer seed_data,
156                                          gint seed_id, C4_Score score,
157                                          gint end_query_pos,
158                                          gint end_target_pos,
159                                          STraceback_Cell *stcell){
160     register Scheduler_Seed_List *seed_info_list = seed_data;
161     register SDP_Seed *seed;
162     g_assert(seed_info_list);
163     g_assert(seed_info_list->seed_list);
164     g_assert(seed_id < seed_info_list->sdp_pair->seed_list->len);
165     seed = seed_info_list->sdp_pair->seed_list->pdata[seed_id];
166     /* Is best end for this seed */
167     if(seed->max_end->score < score){
168         seed->max_end->score = score;
169         seed->max_end->query_pos = end_query_pos;
170         seed->max_end->target_pos = end_target_pos;
171         g_assert(stcell);
172         seed->max_end->cell = STraceback_Cell_share(stcell);
173         }
174     return;
175     }
176 
177 /**/
178 
179 typedef struct {
180       Boundary *boundary;
181           gint  curr_row;
182           gint  curr_interval;
183           gint  curr_pos;
184       gboolean  is_finished;
185           gint  seed_state_id;
186       SDP_Pair *sdp_pair;
187 } Scheduler_Seed_Boundary;
188 
Scheduler_Seed_Boundary_create(Boundary * boundary,SDP_Pair * sdp_pair)189 static Scheduler_Seed_Boundary *Scheduler_Seed_Boundary_create(
190                                  Boundary *boundary,
191                                  SDP_Pair *sdp_pair){
192     register Scheduler_Seed_Boundary *seed_info_boundary
193         = g_new(Scheduler_Seed_Boundary, 1);
194     seed_info_boundary->boundary = Boundary_share(boundary);
195     seed_info_boundary->curr_row = 0;
196     seed_info_boundary->curr_interval = 0;
197     seed_info_boundary->curr_pos = 0;
198     seed_info_boundary->is_finished = FALSE;
199     seed_info_boundary->seed_state_id
200         = sdp_pair->sdp->model->start_state->state->id;
201     seed_info_boundary->sdp_pair = sdp_pair;
202     return seed_info_boundary;
203     }
204 
Scheduler_Seed_Boundary_destroy(Scheduler_Seed_Boundary * seed_info_boundary)205 static void Scheduler_Seed_Boundary_destroy(
206             Scheduler_Seed_Boundary *seed_info_boundary){
207     Boundary_destroy(seed_info_boundary->boundary);
208     g_free(seed_info_boundary);
209     return;
210     }
211 
Scheduler_Seed_Boundary_init_forward(gpointer seed_data)212 static void Scheduler_Seed_Boundary_init_forward(gpointer seed_data){
213     register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
214     g_assert(seed_info_boundary);
215     seed_info_boundary->curr_row = 0;
216     seed_info_boundary->curr_interval = 0;
217     seed_info_boundary->curr_pos = 0;
218     seed_info_boundary->is_finished = FALSE;
219     return;
220     }
221 
Scheduler_Seed_Boundary_next_forward(gpointer seed_data)222 static void Scheduler_Seed_Boundary_next_forward(gpointer seed_data){
223     register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
224     register Boundary_Row *boundary_row
225         = seed_info_boundary->boundary->row_list->pdata
226          [seed_info_boundary->curr_row];
227     register Boundary_Interval *interval
228         = boundary_row->interval_list->pdata
229          [seed_info_boundary->curr_interval];
230     /* If more seeds in interval, move curr_pos */
231     if(seed_info_boundary->curr_pos < (interval->length-1)){
232         seed_info_boundary->curr_pos++;
233         return;
234         }
235     /* If more intervals in row, move curr_interval */
236     if(seed_info_boundary->curr_interval
237     <  (boundary_row->interval_list->len-1)){
238         seed_info_boundary->curr_interval++;
239         seed_info_boundary->curr_pos = 0;
240         return;
241         }
242     /* If more rows in boundary, move curr_row */
243     if(seed_info_boundary->curr_row
244     <  (seed_info_boundary->boundary->row_list->len-1)){
245         seed_info_boundary->curr_row++;
246         seed_info_boundary->curr_interval = 0;
247         seed_info_boundary->curr_pos = 0;
248         return;
249         }
250     seed_info_boundary->is_finished = TRUE;
251     return;
252     }
253 
Scheduler_Seed_Boundary_get_forward(gpointer seed_data,Scheduler_Seed * seed_info)254 static gboolean Scheduler_Seed_Boundary_get_forward(gpointer seed_data,
255                                           Scheduler_Seed *seed_info){
256     register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
257     register Boundary_Row *boundary_row
258         = seed_info_boundary->boundary->row_list->pdata
259          [seed_info_boundary->curr_row];
260     /* FIXME: fails when boundary is empty */
261     register Boundary_Interval *interval
262         = boundary_row->interval_list->pdata
263          [seed_info_boundary->curr_interval];
264     register SDP_Seed *seed;
265     if(seed_info_boundary->is_finished)
266         return FALSE;
267     g_assert(interval->seed_id < seed_info_boundary->sdp_pair->seed_list->len);
268     seed = seed_info_boundary->sdp_pair->seed_list->pdata[interval->seed_id];
269     seed_info->query_pos = interval->query_pos
270                          + seed_info_boundary->curr_pos;
271     seed_info->target_pos = boundary_row->target_pos;
272     seed_info->seed_id = interval->seed_id;
273     seed_info->start_score = 0;
274     return TRUE;
275     }
276 
Scheduler_Seed_Boundary_end_func(gpointer seed_data,gint seed_id,C4_Score score,gint end_query_pos,gint end_target_pos,STraceback_Cell * stcell)277 static void Scheduler_Seed_Boundary_end_func(gpointer seed_data,
278                                            gint seed_id, C4_Score score,
279                                            gint end_query_pos,
280                                            gint end_target_pos,
281                                            STraceback_Cell *stcell){
282     register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
283     register SDP_Seed *seed
284         = seed_info_boundary->sdp_pair->seed_list->pdata[seed_id];
285     /* Is best end for this seed */
286     if(seed->max_end->score < score){
287         seed->max_end->score = score;
288         seed->max_end->query_pos = end_query_pos;
289         seed->max_end->target_pos = end_target_pos;
290         g_assert(stcell);
291         seed->max_end->cell = STraceback_Cell_share(stcell);
292         /* FIXME: free old seed max_end->cell ?? */
293         }
294     return;
295     }
296 
297 /**/
298 
SDP_create(C4_Model * model)299 SDP *SDP_create(C4_Model *model){
300     register SDP *sdp = g_new0(SDP, 1);
301     register C4_Portal *portal;
302     /**/
303     sdp->thread_ref = ThreadRef_create();
304     sdp->sas = SDP_ArgumentSet_create(NULL);
305     g_assert(model);
306     g_assert(!model->is_open);
307     g_assert(Lookahead_Mask_WIDTH > model->max_query_advance);
308     g_assert(Lookahead_Mask_WIDTH > model->max_target_advance);
309     sdp->model = C4_Model_share(model);
310     /* Perform bidirectional SDP only
311      * when there is a single match state and no shadows or spans
312      */
313     sdp->use_boundary = TRUE;
314     if((model->shadow_list->len == 0) && (model->span_list->len == 0)){
315         if(model->portal_list->len == 1){
316             portal = model->portal_list->pdata[0];
317             g_assert(portal);
318             if(portal->transition_list->len == 1)
319                 sdp->use_boundary = FALSE;
320             }
321         }
322     /**/
323     if(sdp->use_boundary){
324         sdp->find_starts_scheduler = Scheduler_create(model,
325                               FALSE, FALSE, TRUE,
326                               Scheduler_Seed_List_init_reverse,
327                               Scheduler_Seed_List_next_reverse,
328                               Scheduler_Seed_List_get_reverse,
329                               NULL, NULL,
330                               sdp->sas->dropoff);
331         sdp->find_ends_scheduler = Scheduler_create(model,
332                                  TRUE, TRUE, TRUE,
333                                  Scheduler_Seed_Boundary_init_forward,
334                                  Scheduler_Seed_Boundary_next_forward,
335                                  Scheduler_Seed_Boundary_get_forward,
336                                  NULL, Scheduler_Seed_Boundary_end_func,
337                                  sdp->sas->dropoff);
338     } else {
339         sdp->find_starts_scheduler = Scheduler_create(model,
340                               FALSE, TRUE, FALSE,
341                               Scheduler_Seed_List_init_reverse,
342                               Scheduler_Seed_List_next_reverse,
343                               Scheduler_Seed_List_get_reverse,
344                               Scheduler_Seed_List_start_func, NULL,
345                               sdp->sas->dropoff);
346         sdp->find_ends_scheduler = Scheduler_create(model,
347                                  TRUE, TRUE, FALSE,
348                                  Scheduler_Seed_List_init_forward,
349                                  Scheduler_Seed_List_next_forward,
350                                  Scheduler_Seed_List_get_forward,
351                                  NULL, Scheduler_Seed_List_end_func,
352                                  sdp->sas->dropoff);
353         }
354     return sdp;
355     }
356 
SDP_share(SDP * sdp)357 SDP *SDP_share(SDP *sdp){
358     g_assert(sdp);
359     ThreadRef_share(sdp->thread_ref);
360     return sdp;
361     }
362 
SDP_destroy(SDP * sdp)363 void SDP_destroy(SDP *sdp){
364     g_assert(sdp);
365     if(ThreadRef_destroy(sdp->thread_ref))
366         return;
367     if(sdp->find_starts_scheduler)
368         Scheduler_destroy(sdp->find_starts_scheduler);
369     if(sdp->find_ends_scheduler)
370         Scheduler_destroy(sdp->find_ends_scheduler);
371     C4_Model_destroy(sdp->model);
372     g_free(sdp);
373     return;
374     }
375 
376 /**/
377 
SDP_add_codegen(Scheduler * scheduler,GPtrArray * codegen_list)378 static void SDP_add_codegen(Scheduler *scheduler,
379                             GPtrArray *codegen_list){
380     register Codegen *codegen;
381     if(scheduler){
382         codegen = Scheduler_make_Codegen(scheduler);
383         g_ptr_array_add(codegen_list, codegen);
384         }
385     return;
386     }
387 
SDP_get_codegen_list(SDP * sdp)388 GPtrArray *SDP_get_codegen_list(SDP *sdp){
389     register GPtrArray *codegen_list = g_ptr_array_new();
390     SDP_add_codegen(sdp->find_starts_scheduler, codegen_list);
391     SDP_add_codegen(sdp->find_ends_scheduler, codegen_list);
392     g_assert(codegen_list->len);
393     return codegen_list;
394     }
395 
396 /**/
397 
SDP_Terminal_create(void)398 static SDP_Terminal *SDP_Terminal_create(void){
399     register SDP_Terminal *terminal = g_new(SDP_Terminal, 1);
400     terminal->query_pos = 0;
401     terminal->target_pos = 0;
402     terminal->score = C4_IMPOSSIBLY_LOW_SCORE;
403     terminal->cell = NULL;
404     return terminal;
405     }
406 
SDP_Terminal_destroy(SDP_Terminal * terminal,STraceback * straceback)407 static void SDP_Terminal_destroy(SDP_Terminal *terminal,
408                                  STraceback *straceback){
409     if(terminal->cell)
410         STraceback_Cell_destroy(terminal->cell, straceback);
411     g_free(terminal);
412     return;
413     }
414 
415 /**/
416 
SDP_Seed_create(HSP * hsp,gint id)417 static SDP_Seed *SDP_Seed_create(HSP *hsp, gint id){
418     register SDP_Seed *seed = g_new(SDP_Seed, 1);
419     seed->seed_id = id;
420     seed->hsp = hsp;
421     seed->max_start = SDP_Terminal_create();
422     seed->max_end = SDP_Terminal_create();
423     seed->pq_node = NULL;
424     return seed;
425     }
426 /* FIXME: optimisation: use RecycleBins for SDP_Terminal allocations
427  */
428 
SDP_Seed_destroy(SDP_Seed * seed,STraceback * fwd_straceback,STraceback * rev_straceback)429 static void SDP_Seed_destroy(SDP_Seed *seed, STraceback *fwd_straceback,
430                                              STraceback *rev_straceback){
431     SDP_Terminal_destroy(seed->max_start, rev_straceback);
432     SDP_Terminal_destroy(seed->max_end, fwd_straceback);
433     g_free(seed);
434     return;
435     }
436 
437 /**/
438 
SDP_compare_HSP_cobs_in_forward_dp_order(const void * a,const void * b)439 static int SDP_compare_HSP_cobs_in_forward_dp_order(const void *a,
440                                                     const void *b){
441     register HSP **hsp_a = (HSP**)a,
442                  **hsp_b = (HSP**)b;
443     register gint target_diff = HSP_target_cobs(*hsp_a)
444                               - HSP_target_cobs(*hsp_b);
445     if(!target_diff)
446         return HSP_query_cobs(*hsp_a)
447              - HSP_query_cobs(*hsp_b);
448     return target_diff;
449     }
450 
SDP_Pair_create_seed_list(Comparison * comparison)451 static GPtrArray *SDP_Pair_create_seed_list(Comparison *comparison){
452     register gint i;
453     register SDP_Seed *seed;
454     register HSP *hsp, *prev_hsp = NULL;
455     register GPtrArray *hsp_list = g_ptr_array_new(),
456                        *seed_list = g_ptr_array_new();
457     g_assert(Comparison_has_hsps(comparison));
458     /* Build a combined HSP list */
459     if(comparison->dna_hspset)
460         for(i = 0; i < comparison->dna_hspset->hsp_list->len; i++){
461             hsp = comparison->dna_hspset->hsp_list->pdata[i];
462             g_ptr_array_add(hsp_list, hsp);
463             }
464     if(comparison->protein_hspset){
465         for(i = 0; i < comparison->protein_hspset->hsp_list->len; i++){
466             hsp = comparison->protein_hspset->hsp_list->pdata[i];
467             g_ptr_array_add(hsp_list, hsp);
468             }
469         }
470     if(comparison->codon_hspset)
471         for(i = 0; i < comparison->codon_hspset->hsp_list->len; i++){
472             hsp = comparison->codon_hspset->hsp_list->pdata[i];
473             g_ptr_array_add(hsp_list, hsp);
474             }
475     /* Sort hsps on cobs point in DP order */
476     qsort(hsp_list->pdata,
477           hsp_list->len, sizeof(gpointer),
478           SDP_compare_HSP_cobs_in_forward_dp_order);
479     /* Make a seed for each unique HSP */
480     g_assert(hsp_list->len);
481     for(i = 0; i < hsp_list->len; i++){
482         hsp = hsp_list->pdata[i];
483         if((!prev_hsp)
484         || (HSP_query_cobs(hsp) != HSP_query_cobs(prev_hsp))
485         || (HSP_target_cobs(hsp) != HSP_target_cobs(prev_hsp))){
486             seed = SDP_Seed_create(hsp, seed_list->len);
487             g_ptr_array_add(seed_list, seed);
488             }
489         prev_hsp = hsp;
490         }
491     g_ptr_array_free(hsp_list, TRUE);
492     g_assert(seed_list->len);
493     return seed_list;
494     }
495 
SDP_Pair_create(SDP * sdp,SubOpt * subopt,Comparison * comparison,gpointer user_data)496 SDP_Pair *SDP_Pair_create(SDP *sdp, SubOpt *subopt,
497                           Comparison *comparison,
498                           gpointer user_data){
499     register SDP_Pair *sdp_pair = g_new(SDP_Pair, 1);
500     g_assert(sdp);
501     sdp_pair->sdp = SDP_share(sdp);
502     g_assert(comparison);
503     g_assert(Comparison_has_hsps(comparison));
504     sdp_pair->comparison = Comparison_share(comparison);
505     sdp_pair->user_data = user_data;
506     sdp_pair->alignment_count = 0;
507     sdp_pair->subopt = SubOpt_share(subopt);
508     sdp_pair->seed_list = SDP_Pair_create_seed_list(comparison);
509     sdp_pair->seed_list_by_score = NULL;
510     sdp_pair->boundary = NULL;
511     sdp_pair->last_score = C4_IMPOSSIBLY_LOW_SCORE;
512     sdp_pair->fwd_straceback = STraceback_create(sdp->model, TRUE);
513     sdp_pair->rev_straceback = STraceback_create(sdp->model, FALSE);
514     return sdp_pair;
515     }
516 
SDP_Pair_destroy(SDP_Pair * sdp_pair)517 void SDP_Pair_destroy(SDP_Pair *sdp_pair){
518     register gint i;
519     register SDP_Seed *seed;
520     g_assert(sdp_pair);
521     Comparison_destroy(sdp_pair->comparison);
522     SDP_destroy(sdp_pair->sdp);
523     for(i = 0; i < sdp_pair->seed_list->len; i++){
524         seed = sdp_pair->seed_list->pdata[i];
525         SDP_Seed_destroy(seed, sdp_pair->fwd_straceback,
526                                sdp_pair->rev_straceback);
527         }
528     g_ptr_array_free(sdp_pair->seed_list, TRUE);
529     if(sdp_pair->seed_list_by_score)
530         g_free(sdp_pair->seed_list_by_score);
531     SubOpt_destroy(sdp_pair->subopt);
532     if(sdp_pair->boundary)
533         Boundary_destroy(sdp_pair->boundary);
534     STraceback_destroy(sdp_pair->fwd_straceback);
535     STraceback_destroy(sdp_pair->rev_straceback);
536     g_free(sdp_pair);
537     return;
538     }
539 
540 /**/
541 
SDP_Pair_find_start_points(SDP_Pair * sdp_pair)542 static Boundary *SDP_Pair_find_start_points(SDP_Pair *sdp_pair){
543     register Scheduler_Seed_List *seed_info_list
544            = Scheduler_Seed_List_create(sdp_pair, FALSE);
545     register Boundary *boundary = NULL;
546     register Scheduler_Pair *spair;
547     if(sdp_pair->sdp->use_boundary)
548         boundary = Boundary_create();
549     spair = Scheduler_Pair_create(sdp_pair->sdp->find_starts_scheduler,
550                               sdp_pair->rev_straceback,
551                               sdp_pair->comparison->query->len,
552                               sdp_pair->comparison->target->len,
553                               sdp_pair->subopt, boundary, -1,
554                               seed_info_list, sdp_pair->user_data);
555     Scheduler_Pair_calculate(spair);
556     Scheduler_Pair_destroy(spair);
557     Scheduler_Seed_List_destroy(seed_info_list);
558     if(boundary)
559         Boundary_reverse(boundary);
560     return boundary;
561     }
562 
SDP_Pair_find_end_points(SDP_Pair * sdp_pair)563 static void SDP_Pair_find_end_points(SDP_Pair *sdp_pair){
564     register Scheduler_Pair *spair;
565     register Scheduler_Seed_List *seed_info_list = NULL;
566     register Scheduler_Seed_Boundary *seed_info_boundary = NULL;
567     if(sdp_pair->boundary){
568         g_assert(sdp_pair->boundary->row_list->len);
569         seed_info_boundary = Scheduler_Seed_Boundary_create(sdp_pair->boundary,
570                                                             sdp_pair);
571         spair = Scheduler_Pair_create(sdp_pair->sdp->find_ends_scheduler,
572                           sdp_pair->fwd_straceback,
573                           sdp_pair->comparison->query->len,
574                           sdp_pair->comparison->target->len,
575                           sdp_pair->subopt,
576                           sdp_pair->boundary, -1, seed_info_boundary,
577                           sdp_pair->user_data);
578     } else {
579         seed_info_list = Scheduler_Seed_List_create(sdp_pair, TRUE);
580         spair = Scheduler_Pair_create(sdp_pair->sdp->find_ends_scheduler,
581                           sdp_pair->fwd_straceback,
582                           sdp_pair->comparison->query->len,
583                           sdp_pair->comparison->target->len,
584                           sdp_pair->subopt,
585                           sdp_pair->boundary, -1, seed_info_list,
586                           sdp_pair->user_data);
587         }
588     Scheduler_Pair_calculate(spair);
589     Scheduler_Pair_destroy(spair);
590     if(sdp_pair->boundary){
591         g_assert(seed_info_boundary);
592         Scheduler_Seed_Boundary_destroy(seed_info_boundary);
593     } else {
594         g_assert(seed_info_list);
595         Scheduler_Seed_List_destroy(seed_info_list);
596         }
597     return;
598     }
599 
600 /**/
601 
SDP_Pair_update_starts(SDP_Pair * sdp_pair)602 static Boundary *SDP_Pair_update_starts(SDP_Pair *sdp_pair){
603     register gint i;
604     register SDP_Seed *seed;
605     g_assert(sdp_pair->seed_list->len);
606     /* Set max start scores low */
607     for(i = 0; i < sdp_pair->seed_list->len; i++){
608         seed = sdp_pair->seed_list->pdata[i];
609         seed->max_start->score = C4_IMPOSSIBLY_LOW_SCORE;
610         if(!sdp_pair->sdp->use_boundary){
611             g_assert(seed->max_start->cell);
612             STraceback_Cell_destroy(seed->max_start->cell,
613                                     sdp_pair->rev_straceback);
614             seed->max_start->cell = NULL;
615             }
616         }
617     return SDP_Pair_find_start_points(sdp_pair);
618     }
619 
SDP_Pair_update_ends(SDP_Pair * sdp_pair)620 static void SDP_Pair_update_ends(SDP_Pair *sdp_pair){
621     register gint i;
622     register SDP_Seed *seed;
623     g_assert(sdp_pair->seed_list->len);
624     /* Set max end scores low */
625     for(i = 0; i < sdp_pair->seed_list->len; i++){
626         seed = sdp_pair->seed_list->pdata[i];
627         seed->max_end->score = C4_IMPOSSIBLY_LOW_SCORE;
628         if(seed->max_end->cell){
629             STraceback_Cell_destroy(seed->max_end->cell,
630                                     sdp_pair->fwd_straceback);
631             seed->max_end->cell = NULL;
632             }
633         }
634     SDP_Pair_find_end_points(sdp_pair);
635     return;
636     }
637 
638 /**/
639 
SDP_Pair_add_traceback(SDP_Pair * sdp_pair,SDP_Seed * best_seed,gboolean is_forward,Alignment * alignment)640 static void SDP_Pair_add_traceback(SDP_Pair *sdp_pair, SDP_Seed *best_seed,
641                                    gboolean is_forward,
642                                    Alignment *alignment){
643     register STraceback_List *stlist = NULL;
644     register gint i;
645     register STraceback_Operation *operation;
646     register AlignmentOperation *last;
647     register STraceback_Operation *first;
648     g_assert(best_seed);
649     if(is_forward){
650         g_assert(best_seed->max_end);
651         g_assert(best_seed->max_end->cell);
652         g_assert(best_seed->max_end->cell->transition->output
653               == sdp_pair->sdp->model->end_state->state);
654         stlist = STraceback_List_create(sdp_pair->fwd_straceback,
655                                         best_seed->max_end->cell);
656         if(!sdp_pair->sdp->use_boundary){ /* Check join is valid */
657             g_assert(alignment->operation_list->len);
658             last = alignment->operation_list->pdata
659                   [alignment->operation_list->len-1];
660             g_assert(stlist->operation_list->len);
661             first = stlist->operation_list->pdata[0];
662             g_assert(last->transition->output == first->transition->output);
663             }
664         /* Include 1st operation only when using boundary */
665         for(i = sdp_pair->sdp->use_boundary?0:1;
666             i < stlist->operation_list->len; i++){
667             operation = stlist->operation_list->pdata[i];
668             Alignment_add(alignment, operation->transition,
669                           operation->length);
670             }
671     } else {
672         g_assert(best_seed->max_start->cell);
673         g_assert(best_seed->max_start->cell->transition->input
674               == sdp_pair->sdp->model->start_state->state);
675         stlist = STraceback_List_create(sdp_pair->rev_straceback,
676                                         best_seed->max_start->cell);
677         /* Don't include last operation (to end state) */
678         for(i = stlist->operation_list->len-1; i >= 1; i--){
679             operation = stlist->operation_list->pdata[i];
680             Alignment_add(alignment, operation->transition,
681                           operation->length);
682             }
683         }
684     STraceback_List_destroy(stlist);
685     return;
686     }
687 
SDP_Seed_find_start(SDP_Seed * best_seed,C4_Model * model)688 static void SDP_Seed_find_start(SDP_Seed *best_seed, C4_Model *model){
689     register STraceback_Cell *stcell;
690     best_seed->max_start->query_pos
691         = best_seed->max_end->query_pos;
692     best_seed->max_start->target_pos
693         = best_seed->max_end->target_pos;
694     stcell = best_seed->max_end->cell;
695     g_assert(stcell);
696     do {
697         best_seed->max_start->query_pos
698             -= (stcell->transition->advance_query * stcell->length);
699         best_seed->max_start->target_pos
700             -= (stcell->transition->advance_target * stcell->length);
701         stcell = stcell->prev;
702         g_assert(stcell);
703     } while(stcell->transition->input != model->start_state->state);
704     return;
705     }
706 
SDP_Pair_find_path(SDP_Pair * sdp_pair,SDP_Seed * best_seed,Boundary * boundary)707 static Alignment *SDP_Pair_find_path(SDP_Pair *sdp_pair,
708                                      SDP_Seed *best_seed,
709                                      Boundary *boundary){
710     register Region *alignment_region;
711     register Alignment *alignment;
712     /**/
713     if(sdp_pair->sdp->use_boundary)
714         SDP_Seed_find_start(best_seed, sdp_pair->sdp->model);
715     alignment_region = Region_create(best_seed->max_start->query_pos,
716                            best_seed->max_start->target_pos,
717                            best_seed->max_end->query_pos
718                           -best_seed->max_start->query_pos,
719                            best_seed->max_end->target_pos
720                           -best_seed->max_start->target_pos);
721     alignment = Alignment_create(sdp_pair->sdp->model,
722                          alignment_region, best_seed->max_end->score);
723     if(sdp_pair->sdp->use_boundary){
724         SDP_Pair_add_traceback(sdp_pair, best_seed, TRUE, alignment);
725     } else {
726         SDP_Pair_add_traceback(sdp_pair, best_seed, FALSE, alignment);
727         SDP_Pair_add_traceback(sdp_pair, best_seed, TRUE, alignment);
728         }
729     g_assert(Alignment_is_valid(alignment, alignment_region,
730                                 sdp_pair->user_data));
731     Region_destroy(alignment_region);
732     return alignment;
733     }
734 
SDP_compare_SDP_Seed_by_score(const void * a,const void * b)735 static int SDP_compare_SDP_Seed_by_score(const void *a,
736                                          const void *b){
737     register SDP_Seed **seed_a = (SDP_Seed**)a,
738                       **seed_b = (SDP_Seed**)b;
739     return (*seed_b)->max_end->score
740          - (*seed_a)->max_end->score;
741     }
742 
SDP_Pair_next_path(SDP_Pair * sdp_pair,C4_Score threshold)743 Alignment *SDP_Pair_next_path(SDP_Pair *sdp_pair, C4_Score threshold){
744     register SDP_Seed *seed, *best_seed = NULL;
745     register Alignment *alignment = NULL;
746     register gint i;
747     /* Make the start and end points up to date */
748     if(sdp_pair->alignment_count){
749         if(!sdp_pair->sdp->sas->single_pass_subopt){ /* multipass */
750             if(sdp_pair->boundary)
751                 Boundary_destroy(sdp_pair->boundary);
752             sdp_pair->boundary = SDP_Pair_update_starts(sdp_pair);
753             SDP_Pair_update_ends(sdp_pair);
754             }
755     } else {
756         g_assert(!sdp_pair->boundary);
757         sdp_pair->boundary = SDP_Pair_find_start_points(sdp_pair);
758         /* Boundary_print_gnuplot(sdp_pair->boundary, 1); */
759         SDP_Pair_find_end_points(sdp_pair);
760         if(sdp_pair->sdp->sas->single_pass_subopt){
761             sdp_pair->seed_list_by_score = g_new(gpointer,
762                                                  sdp_pair->seed_list->len);
763             for(i = 0; i < sdp_pair->seed_list->len; i++)
764                 sdp_pair->seed_list_by_score[i]
765                     = sdp_pair->seed_list->pdata[i];
766             qsort(sdp_pair->seed_list_by_score,
767                   sdp_pair->seed_list->len, sizeof(gpointer),
768                   SDP_compare_SDP_Seed_by_score);
769             sdp_pair->single_pass_pos = 0;
770             }
771         }
772 /**/
773     if(sdp_pair->sdp->sas->single_pass_subopt){ /* singlepass */
774         best_seed = NULL;
775         while(sdp_pair->single_pass_pos < sdp_pair->seed_list->len){
776             best_seed = sdp_pair->seed_list_by_score
777                        [sdp_pair->single_pass_pos++];
778             if(best_seed->max_end->score < threshold)
779                 return NULL;
780             alignment = SDP_Pair_find_path(sdp_pair, best_seed,
781                                            sdp_pair->boundary);
782             if(SubOpt_overlaps_alignment(sdp_pair->subopt, alignment)){
783                 Alignment_destroy(alignment);
784                 best_seed = NULL;
785             } else {
786                 break;
787                 }
788             }
789         if(!best_seed)
790             return NULL;
791         /**/
792     } else { /* multipass */
793         g_assert(sdp_pair->seed_list->len);
794         best_seed = sdp_pair->seed_list->pdata[0];
795         for(i = 1; i < sdp_pair->seed_list->len; i++){
796             seed = sdp_pair->seed_list->pdata[i];
797             if(best_seed->max_end->score < seed->max_end->score)
798                 best_seed = seed;
799             }
800         if(best_seed->max_end->score < threshold)
801             return NULL; /* Score below threshold */
802         alignment = SDP_Pair_find_path(sdp_pair, best_seed,
803                                        sdp_pair->boundary);
804         }
805     g_assert(best_seed);
806     g_assert(alignment);
807     /* Check score is less than previous */
808     g_assert((sdp_pair->last_score < 0)
809           || (best_seed->max_end->score <= sdp_pair->last_score));
810     sdp_pair->alignment_count++;
811     sdp_pair->last_score = best_seed->max_end->score;
812     best_seed->max_end->score = C4_IMPOSSIBLY_LOW_SCORE;
813     return alignment;
814     }
815 
816 /**/
817 
818