1 /****************************************************************\
2 * *
3 * C4 dynamic programming library - Seeded Dynamic Programming *
4 * *
5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
6 * Copyright (C) 2000-2009. All Rights Reserved. *
7 * *
8 * This source code is distributed under the terms of the *
9 * GNU General Public License, version 3. See the file COPYING *
10 * or http://www.gnu.org/licenses/gpl.txt for details *
11 * *
12 * If you use this code, please keep this notice intact. *
13 * *
14 \****************************************************************/
15
16 #include <stdlib.h> /* For qsort() */
17
18 #include "sdp.h"
19 #include "matrix.h"
20
21 /**/
22
SDP_ArgumentSet_create(Argument * arg)23 SDP_ArgumentSet *SDP_ArgumentSet_create(Argument *arg){
24 register ArgumentSet *as;
25 static SDP_ArgumentSet sas = {30};
26 if(arg){
27 as = ArgumentSet_create("Seeded Dynamic Programming options");
28 ArgumentSet_add_option(as, 'x', "extensionthreshold", NULL,
29 "Gapped extension threshold", "50",
30 Argument_parse_int, &sas.dropoff);
31 ArgumentSet_add_option(as, 0, "singlepass", NULL,
32 "Generate suboptimal alignment in a single pass", "TRUE",
33 Argument_parse_boolean, &sas.single_pass_subopt);
34 Argument_absorb_ArgumentSet(arg, as);
35 }
36 return &sas;
37 }
38
39 /**/
40
41 typedef struct {
42 GPtrArray *seed_list;
43 gint seed_pos;
44 gint seed_state_id;
45 SDP_Pair *sdp_pair;
46 } Scheduler_Seed_List;
47
Scheduler_Seed_List_create(SDP_Pair * sdp_pair,gboolean is_forward)48 static Scheduler_Seed_List *Scheduler_Seed_List_create(SDP_Pair *sdp_pair,
49 gboolean is_forward){
50 register Scheduler_Seed_List *seed_info_list
51 = g_new(Scheduler_Seed_List, 1);
52 register C4_Model *model = sdp_pair->sdp->model;
53 seed_info_list->seed_list = sdp_pair->seed_list;
54 seed_info_list->seed_pos = 0;
55 if(is_forward){
56 seed_info_list->seed_state_id = model->start_state->state->id;
57 } else {
58 seed_info_list->seed_state_id = model->end_state->state->id;
59 }
60 seed_info_list->sdp_pair = sdp_pair;
61 return seed_info_list;
62 }
63
Scheduler_Seed_List_destroy(Scheduler_Seed_List * seed_info_list)64 static void Scheduler_Seed_List_destroy(
65 Scheduler_Seed_List *seed_info_list){
66 g_free(seed_info_list);
67 return;
68 }
69
Scheduler_Seed_List_init_forward(gpointer seed_data)70 static void Scheduler_Seed_List_init_forward(gpointer seed_data){
71 register Scheduler_Seed_List *seed_info_list = seed_data;
72 g_assert(seed_info_list);
73 seed_info_list->seed_pos = 0;
74 return;
75 }
76
Scheduler_Seed_List_init_reverse(gpointer seed_data)77 static void Scheduler_Seed_List_init_reverse(gpointer seed_data){
78 register Scheduler_Seed_List *seed_info_list = seed_data;
79 g_assert(seed_info_list);
80 seed_info_list->seed_pos = seed_info_list->seed_list->len - 1;
81 return;
82 }
83
Scheduler_Seed_List_next_forward(gpointer seed_data)84 static void Scheduler_Seed_List_next_forward(gpointer seed_data){
85 register Scheduler_Seed_List *seed_info_list = seed_data;
86 g_assert(seed_info_list);
87 g_assert(seed_info_list->seed_pos < seed_info_list->seed_list->len);
88 seed_info_list->seed_pos++;
89 return;
90 }
91
Scheduler_Seed_List_next_reverse(gpointer seed_data)92 static void Scheduler_Seed_List_next_reverse(gpointer seed_data){
93 register Scheduler_Seed_List *seed_info_list = seed_data;
94 g_assert(seed_info_list);
95 g_assert(seed_info_list->seed_pos >= 0);
96 seed_info_list->seed_pos--;
97 return;
98 }
99
Scheduler_Seed_List_get_forward(gpointer seed_data,Scheduler_Seed * seed_info)100 static gboolean Scheduler_Seed_List_get_forward(gpointer seed_data,
101 Scheduler_Seed *seed_info){
102 register Scheduler_Seed_List *seed_info_list = seed_data;
103 register SDP_Seed *seed;
104 g_assert(seed_info_list);
105 if(seed_info_list->seed_pos >= seed_info_list->seed_list->len)
106 return FALSE;
107 seed = seed_info_list->seed_list->pdata[seed_info_list->seed_pos];
108 seed_info->query_pos = HSP_query_cobs(seed->hsp);
109 seed_info->target_pos = HSP_target_cobs(seed->hsp);
110 seed_info->seed_id = seed->seed_id;
111 seed_info->start_score = seed->max_start->score
112 - (seed->hsp->score >> 1);
113 return TRUE;
114 }
115
Scheduler_Seed_List_get_reverse(gpointer seed_data,Scheduler_Seed * seed_info)116 static gboolean Scheduler_Seed_List_get_reverse(gpointer seed_data,
117 Scheduler_Seed *seed_info){
118 register Scheduler_Seed_List *seed_info_list = seed_data;
119 register SDP_Seed *seed;
120 g_assert(seed_info_list);
121 if(seed_info_list->seed_pos < 0)
122 return FALSE;
123 seed = seed_info_list->seed_list->pdata[seed_info_list->seed_pos];
124 seed_info->query_pos = -HSP_query_cobs(seed->hsp);
125 seed_info->target_pos = -HSP_target_cobs(seed->hsp);
126 seed_info->seed_id = seed->seed_id;
127 seed_info->start_score = (seed->hsp->score >> 1);
128 return TRUE;
129 }
130
Scheduler_Seed_List_start_func(gpointer seed_data,gint seed_id,C4_Score score,gint start_query_pos,gint start_target_pos,STraceback_Cell * stcell)131 static void Scheduler_Seed_List_start_func(gpointer seed_data,
132 gint seed_id, C4_Score score,
133 gint start_query_pos,
134 gint start_target_pos,
135 STraceback_Cell *stcell){
136 register Scheduler_Seed_List *seed_info_list = seed_data;
137 register SDP_Seed *seed;
138 g_assert(seed_info_list);
139 g_assert(seed_info_list->seed_list);
140 g_assert(seed_id < seed_info_list->sdp_pair->seed_list->len);
141 seed = seed_info_list->sdp_pair->seed_list->pdata[seed_id];
142 /* Is best start for this seed */
143 if(seed->max_start->score < score){
144 seed->max_start->score = score;
145 seed->max_start->query_pos = start_query_pos;
146 seed->max_start->target_pos = start_target_pos;
147 if(stcell)
148 seed->max_start->cell = STraceback_Cell_share(stcell);
149 else
150 seed->max_start->cell = NULL;
151 }
152 return;
153 }
154
Scheduler_Seed_List_end_func(gpointer seed_data,gint seed_id,C4_Score score,gint end_query_pos,gint end_target_pos,STraceback_Cell * stcell)155 static void Scheduler_Seed_List_end_func(gpointer seed_data,
156 gint seed_id, C4_Score score,
157 gint end_query_pos,
158 gint end_target_pos,
159 STraceback_Cell *stcell){
160 register Scheduler_Seed_List *seed_info_list = seed_data;
161 register SDP_Seed *seed;
162 g_assert(seed_info_list);
163 g_assert(seed_info_list->seed_list);
164 g_assert(seed_id < seed_info_list->sdp_pair->seed_list->len);
165 seed = seed_info_list->sdp_pair->seed_list->pdata[seed_id];
166 /* Is best end for this seed */
167 if(seed->max_end->score < score){
168 seed->max_end->score = score;
169 seed->max_end->query_pos = end_query_pos;
170 seed->max_end->target_pos = end_target_pos;
171 g_assert(stcell);
172 seed->max_end->cell = STraceback_Cell_share(stcell);
173 }
174 return;
175 }
176
177 /**/
178
179 typedef struct {
180 Boundary *boundary;
181 gint curr_row;
182 gint curr_interval;
183 gint curr_pos;
184 gboolean is_finished;
185 gint seed_state_id;
186 SDP_Pair *sdp_pair;
187 } Scheduler_Seed_Boundary;
188
Scheduler_Seed_Boundary_create(Boundary * boundary,SDP_Pair * sdp_pair)189 static Scheduler_Seed_Boundary *Scheduler_Seed_Boundary_create(
190 Boundary *boundary,
191 SDP_Pair *sdp_pair){
192 register Scheduler_Seed_Boundary *seed_info_boundary
193 = g_new(Scheduler_Seed_Boundary, 1);
194 seed_info_boundary->boundary = Boundary_share(boundary);
195 seed_info_boundary->curr_row = 0;
196 seed_info_boundary->curr_interval = 0;
197 seed_info_boundary->curr_pos = 0;
198 seed_info_boundary->is_finished = FALSE;
199 seed_info_boundary->seed_state_id
200 = sdp_pair->sdp->model->start_state->state->id;
201 seed_info_boundary->sdp_pair = sdp_pair;
202 return seed_info_boundary;
203 }
204
Scheduler_Seed_Boundary_destroy(Scheduler_Seed_Boundary * seed_info_boundary)205 static void Scheduler_Seed_Boundary_destroy(
206 Scheduler_Seed_Boundary *seed_info_boundary){
207 Boundary_destroy(seed_info_boundary->boundary);
208 g_free(seed_info_boundary);
209 return;
210 }
211
Scheduler_Seed_Boundary_init_forward(gpointer seed_data)212 static void Scheduler_Seed_Boundary_init_forward(gpointer seed_data){
213 register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
214 g_assert(seed_info_boundary);
215 seed_info_boundary->curr_row = 0;
216 seed_info_boundary->curr_interval = 0;
217 seed_info_boundary->curr_pos = 0;
218 seed_info_boundary->is_finished = FALSE;
219 return;
220 }
221
Scheduler_Seed_Boundary_next_forward(gpointer seed_data)222 static void Scheduler_Seed_Boundary_next_forward(gpointer seed_data){
223 register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
224 register Boundary_Row *boundary_row
225 = seed_info_boundary->boundary->row_list->pdata
226 [seed_info_boundary->curr_row];
227 register Boundary_Interval *interval
228 = boundary_row->interval_list->pdata
229 [seed_info_boundary->curr_interval];
230 /* If more seeds in interval, move curr_pos */
231 if(seed_info_boundary->curr_pos < (interval->length-1)){
232 seed_info_boundary->curr_pos++;
233 return;
234 }
235 /* If more intervals in row, move curr_interval */
236 if(seed_info_boundary->curr_interval
237 < (boundary_row->interval_list->len-1)){
238 seed_info_boundary->curr_interval++;
239 seed_info_boundary->curr_pos = 0;
240 return;
241 }
242 /* If more rows in boundary, move curr_row */
243 if(seed_info_boundary->curr_row
244 < (seed_info_boundary->boundary->row_list->len-1)){
245 seed_info_boundary->curr_row++;
246 seed_info_boundary->curr_interval = 0;
247 seed_info_boundary->curr_pos = 0;
248 return;
249 }
250 seed_info_boundary->is_finished = TRUE;
251 return;
252 }
253
Scheduler_Seed_Boundary_get_forward(gpointer seed_data,Scheduler_Seed * seed_info)254 static gboolean Scheduler_Seed_Boundary_get_forward(gpointer seed_data,
255 Scheduler_Seed *seed_info){
256 register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
257 register Boundary_Row *boundary_row
258 = seed_info_boundary->boundary->row_list->pdata
259 [seed_info_boundary->curr_row];
260 /* FIXME: fails when boundary is empty */
261 register Boundary_Interval *interval
262 = boundary_row->interval_list->pdata
263 [seed_info_boundary->curr_interval];
264 register SDP_Seed *seed;
265 if(seed_info_boundary->is_finished)
266 return FALSE;
267 g_assert(interval->seed_id < seed_info_boundary->sdp_pair->seed_list->len);
268 seed = seed_info_boundary->sdp_pair->seed_list->pdata[interval->seed_id];
269 seed_info->query_pos = interval->query_pos
270 + seed_info_boundary->curr_pos;
271 seed_info->target_pos = boundary_row->target_pos;
272 seed_info->seed_id = interval->seed_id;
273 seed_info->start_score = 0;
274 return TRUE;
275 }
276
Scheduler_Seed_Boundary_end_func(gpointer seed_data,gint seed_id,C4_Score score,gint end_query_pos,gint end_target_pos,STraceback_Cell * stcell)277 static void Scheduler_Seed_Boundary_end_func(gpointer seed_data,
278 gint seed_id, C4_Score score,
279 gint end_query_pos,
280 gint end_target_pos,
281 STraceback_Cell *stcell){
282 register Scheduler_Seed_Boundary *seed_info_boundary = seed_data;
283 register SDP_Seed *seed
284 = seed_info_boundary->sdp_pair->seed_list->pdata[seed_id];
285 /* Is best end for this seed */
286 if(seed->max_end->score < score){
287 seed->max_end->score = score;
288 seed->max_end->query_pos = end_query_pos;
289 seed->max_end->target_pos = end_target_pos;
290 g_assert(stcell);
291 seed->max_end->cell = STraceback_Cell_share(stcell);
292 /* FIXME: free old seed max_end->cell ?? */
293 }
294 return;
295 }
296
297 /**/
298
SDP_create(C4_Model * model)299 SDP *SDP_create(C4_Model *model){
300 register SDP *sdp = g_new0(SDP, 1);
301 register C4_Portal *portal;
302 /**/
303 sdp->thread_ref = ThreadRef_create();
304 sdp->sas = SDP_ArgumentSet_create(NULL);
305 g_assert(model);
306 g_assert(!model->is_open);
307 g_assert(Lookahead_Mask_WIDTH > model->max_query_advance);
308 g_assert(Lookahead_Mask_WIDTH > model->max_target_advance);
309 sdp->model = C4_Model_share(model);
310 /* Perform bidirectional SDP only
311 * when there is a single match state and no shadows or spans
312 */
313 sdp->use_boundary = TRUE;
314 if((model->shadow_list->len == 0) && (model->span_list->len == 0)){
315 if(model->portal_list->len == 1){
316 portal = model->portal_list->pdata[0];
317 g_assert(portal);
318 if(portal->transition_list->len == 1)
319 sdp->use_boundary = FALSE;
320 }
321 }
322 /**/
323 if(sdp->use_boundary){
324 sdp->find_starts_scheduler = Scheduler_create(model,
325 FALSE, FALSE, TRUE,
326 Scheduler_Seed_List_init_reverse,
327 Scheduler_Seed_List_next_reverse,
328 Scheduler_Seed_List_get_reverse,
329 NULL, NULL,
330 sdp->sas->dropoff);
331 sdp->find_ends_scheduler = Scheduler_create(model,
332 TRUE, TRUE, TRUE,
333 Scheduler_Seed_Boundary_init_forward,
334 Scheduler_Seed_Boundary_next_forward,
335 Scheduler_Seed_Boundary_get_forward,
336 NULL, Scheduler_Seed_Boundary_end_func,
337 sdp->sas->dropoff);
338 } else {
339 sdp->find_starts_scheduler = Scheduler_create(model,
340 FALSE, TRUE, FALSE,
341 Scheduler_Seed_List_init_reverse,
342 Scheduler_Seed_List_next_reverse,
343 Scheduler_Seed_List_get_reverse,
344 Scheduler_Seed_List_start_func, NULL,
345 sdp->sas->dropoff);
346 sdp->find_ends_scheduler = Scheduler_create(model,
347 TRUE, TRUE, FALSE,
348 Scheduler_Seed_List_init_forward,
349 Scheduler_Seed_List_next_forward,
350 Scheduler_Seed_List_get_forward,
351 NULL, Scheduler_Seed_List_end_func,
352 sdp->sas->dropoff);
353 }
354 return sdp;
355 }
356
SDP_share(SDP * sdp)357 SDP *SDP_share(SDP *sdp){
358 g_assert(sdp);
359 ThreadRef_share(sdp->thread_ref);
360 return sdp;
361 }
362
SDP_destroy(SDP * sdp)363 void SDP_destroy(SDP *sdp){
364 g_assert(sdp);
365 if(ThreadRef_destroy(sdp->thread_ref))
366 return;
367 if(sdp->find_starts_scheduler)
368 Scheduler_destroy(sdp->find_starts_scheduler);
369 if(sdp->find_ends_scheduler)
370 Scheduler_destroy(sdp->find_ends_scheduler);
371 C4_Model_destroy(sdp->model);
372 g_free(sdp);
373 return;
374 }
375
376 /**/
377
SDP_add_codegen(Scheduler * scheduler,GPtrArray * codegen_list)378 static void SDP_add_codegen(Scheduler *scheduler,
379 GPtrArray *codegen_list){
380 register Codegen *codegen;
381 if(scheduler){
382 codegen = Scheduler_make_Codegen(scheduler);
383 g_ptr_array_add(codegen_list, codegen);
384 }
385 return;
386 }
387
SDP_get_codegen_list(SDP * sdp)388 GPtrArray *SDP_get_codegen_list(SDP *sdp){
389 register GPtrArray *codegen_list = g_ptr_array_new();
390 SDP_add_codegen(sdp->find_starts_scheduler, codegen_list);
391 SDP_add_codegen(sdp->find_ends_scheduler, codegen_list);
392 g_assert(codegen_list->len);
393 return codegen_list;
394 }
395
396 /**/
397
SDP_Terminal_create(void)398 static SDP_Terminal *SDP_Terminal_create(void){
399 register SDP_Terminal *terminal = g_new(SDP_Terminal, 1);
400 terminal->query_pos = 0;
401 terminal->target_pos = 0;
402 terminal->score = C4_IMPOSSIBLY_LOW_SCORE;
403 terminal->cell = NULL;
404 return terminal;
405 }
406
SDP_Terminal_destroy(SDP_Terminal * terminal,STraceback * straceback)407 static void SDP_Terminal_destroy(SDP_Terminal *terminal,
408 STraceback *straceback){
409 if(terminal->cell)
410 STraceback_Cell_destroy(terminal->cell, straceback);
411 g_free(terminal);
412 return;
413 }
414
415 /**/
416
SDP_Seed_create(HSP * hsp,gint id)417 static SDP_Seed *SDP_Seed_create(HSP *hsp, gint id){
418 register SDP_Seed *seed = g_new(SDP_Seed, 1);
419 seed->seed_id = id;
420 seed->hsp = hsp;
421 seed->max_start = SDP_Terminal_create();
422 seed->max_end = SDP_Terminal_create();
423 seed->pq_node = NULL;
424 return seed;
425 }
426 /* FIXME: optimisation: use RecycleBins for SDP_Terminal allocations
427 */
428
SDP_Seed_destroy(SDP_Seed * seed,STraceback * fwd_straceback,STraceback * rev_straceback)429 static void SDP_Seed_destroy(SDP_Seed *seed, STraceback *fwd_straceback,
430 STraceback *rev_straceback){
431 SDP_Terminal_destroy(seed->max_start, rev_straceback);
432 SDP_Terminal_destroy(seed->max_end, fwd_straceback);
433 g_free(seed);
434 return;
435 }
436
437 /**/
438
SDP_compare_HSP_cobs_in_forward_dp_order(const void * a,const void * b)439 static int SDP_compare_HSP_cobs_in_forward_dp_order(const void *a,
440 const void *b){
441 register HSP **hsp_a = (HSP**)a,
442 **hsp_b = (HSP**)b;
443 register gint target_diff = HSP_target_cobs(*hsp_a)
444 - HSP_target_cobs(*hsp_b);
445 if(!target_diff)
446 return HSP_query_cobs(*hsp_a)
447 - HSP_query_cobs(*hsp_b);
448 return target_diff;
449 }
450
SDP_Pair_create_seed_list(Comparison * comparison)451 static GPtrArray *SDP_Pair_create_seed_list(Comparison *comparison){
452 register gint i;
453 register SDP_Seed *seed;
454 register HSP *hsp, *prev_hsp = NULL;
455 register GPtrArray *hsp_list = g_ptr_array_new(),
456 *seed_list = g_ptr_array_new();
457 g_assert(Comparison_has_hsps(comparison));
458 /* Build a combined HSP list */
459 if(comparison->dna_hspset)
460 for(i = 0; i < comparison->dna_hspset->hsp_list->len; i++){
461 hsp = comparison->dna_hspset->hsp_list->pdata[i];
462 g_ptr_array_add(hsp_list, hsp);
463 }
464 if(comparison->protein_hspset){
465 for(i = 0; i < comparison->protein_hspset->hsp_list->len; i++){
466 hsp = comparison->protein_hspset->hsp_list->pdata[i];
467 g_ptr_array_add(hsp_list, hsp);
468 }
469 }
470 if(comparison->codon_hspset)
471 for(i = 0; i < comparison->codon_hspset->hsp_list->len; i++){
472 hsp = comparison->codon_hspset->hsp_list->pdata[i];
473 g_ptr_array_add(hsp_list, hsp);
474 }
475 /* Sort hsps on cobs point in DP order */
476 qsort(hsp_list->pdata,
477 hsp_list->len, sizeof(gpointer),
478 SDP_compare_HSP_cobs_in_forward_dp_order);
479 /* Make a seed for each unique HSP */
480 g_assert(hsp_list->len);
481 for(i = 0; i < hsp_list->len; i++){
482 hsp = hsp_list->pdata[i];
483 if((!prev_hsp)
484 || (HSP_query_cobs(hsp) != HSP_query_cobs(prev_hsp))
485 || (HSP_target_cobs(hsp) != HSP_target_cobs(prev_hsp))){
486 seed = SDP_Seed_create(hsp, seed_list->len);
487 g_ptr_array_add(seed_list, seed);
488 }
489 prev_hsp = hsp;
490 }
491 g_ptr_array_free(hsp_list, TRUE);
492 g_assert(seed_list->len);
493 return seed_list;
494 }
495
SDP_Pair_create(SDP * sdp,SubOpt * subopt,Comparison * comparison,gpointer user_data)496 SDP_Pair *SDP_Pair_create(SDP *sdp, SubOpt *subopt,
497 Comparison *comparison,
498 gpointer user_data){
499 register SDP_Pair *sdp_pair = g_new(SDP_Pair, 1);
500 g_assert(sdp);
501 sdp_pair->sdp = SDP_share(sdp);
502 g_assert(comparison);
503 g_assert(Comparison_has_hsps(comparison));
504 sdp_pair->comparison = Comparison_share(comparison);
505 sdp_pair->user_data = user_data;
506 sdp_pair->alignment_count = 0;
507 sdp_pair->subopt = SubOpt_share(subopt);
508 sdp_pair->seed_list = SDP_Pair_create_seed_list(comparison);
509 sdp_pair->seed_list_by_score = NULL;
510 sdp_pair->boundary = NULL;
511 sdp_pair->last_score = C4_IMPOSSIBLY_LOW_SCORE;
512 sdp_pair->fwd_straceback = STraceback_create(sdp->model, TRUE);
513 sdp_pair->rev_straceback = STraceback_create(sdp->model, FALSE);
514 return sdp_pair;
515 }
516
SDP_Pair_destroy(SDP_Pair * sdp_pair)517 void SDP_Pair_destroy(SDP_Pair *sdp_pair){
518 register gint i;
519 register SDP_Seed *seed;
520 g_assert(sdp_pair);
521 Comparison_destroy(sdp_pair->comparison);
522 SDP_destroy(sdp_pair->sdp);
523 for(i = 0; i < sdp_pair->seed_list->len; i++){
524 seed = sdp_pair->seed_list->pdata[i];
525 SDP_Seed_destroy(seed, sdp_pair->fwd_straceback,
526 sdp_pair->rev_straceback);
527 }
528 g_ptr_array_free(sdp_pair->seed_list, TRUE);
529 if(sdp_pair->seed_list_by_score)
530 g_free(sdp_pair->seed_list_by_score);
531 SubOpt_destroy(sdp_pair->subopt);
532 if(sdp_pair->boundary)
533 Boundary_destroy(sdp_pair->boundary);
534 STraceback_destroy(sdp_pair->fwd_straceback);
535 STraceback_destroy(sdp_pair->rev_straceback);
536 g_free(sdp_pair);
537 return;
538 }
539
540 /**/
541
SDP_Pair_find_start_points(SDP_Pair * sdp_pair)542 static Boundary *SDP_Pair_find_start_points(SDP_Pair *sdp_pair){
543 register Scheduler_Seed_List *seed_info_list
544 = Scheduler_Seed_List_create(sdp_pair, FALSE);
545 register Boundary *boundary = NULL;
546 register Scheduler_Pair *spair;
547 if(sdp_pair->sdp->use_boundary)
548 boundary = Boundary_create();
549 spair = Scheduler_Pair_create(sdp_pair->sdp->find_starts_scheduler,
550 sdp_pair->rev_straceback,
551 sdp_pair->comparison->query->len,
552 sdp_pair->comparison->target->len,
553 sdp_pair->subopt, boundary, -1,
554 seed_info_list, sdp_pair->user_data);
555 Scheduler_Pair_calculate(spair);
556 Scheduler_Pair_destroy(spair);
557 Scheduler_Seed_List_destroy(seed_info_list);
558 if(boundary)
559 Boundary_reverse(boundary);
560 return boundary;
561 }
562
SDP_Pair_find_end_points(SDP_Pair * sdp_pair)563 static void SDP_Pair_find_end_points(SDP_Pair *sdp_pair){
564 register Scheduler_Pair *spair;
565 register Scheduler_Seed_List *seed_info_list = NULL;
566 register Scheduler_Seed_Boundary *seed_info_boundary = NULL;
567 if(sdp_pair->boundary){
568 g_assert(sdp_pair->boundary->row_list->len);
569 seed_info_boundary = Scheduler_Seed_Boundary_create(sdp_pair->boundary,
570 sdp_pair);
571 spair = Scheduler_Pair_create(sdp_pair->sdp->find_ends_scheduler,
572 sdp_pair->fwd_straceback,
573 sdp_pair->comparison->query->len,
574 sdp_pair->comparison->target->len,
575 sdp_pair->subopt,
576 sdp_pair->boundary, -1, seed_info_boundary,
577 sdp_pair->user_data);
578 } else {
579 seed_info_list = Scheduler_Seed_List_create(sdp_pair, TRUE);
580 spair = Scheduler_Pair_create(sdp_pair->sdp->find_ends_scheduler,
581 sdp_pair->fwd_straceback,
582 sdp_pair->comparison->query->len,
583 sdp_pair->comparison->target->len,
584 sdp_pair->subopt,
585 sdp_pair->boundary, -1, seed_info_list,
586 sdp_pair->user_data);
587 }
588 Scheduler_Pair_calculate(spair);
589 Scheduler_Pair_destroy(spair);
590 if(sdp_pair->boundary){
591 g_assert(seed_info_boundary);
592 Scheduler_Seed_Boundary_destroy(seed_info_boundary);
593 } else {
594 g_assert(seed_info_list);
595 Scheduler_Seed_List_destroy(seed_info_list);
596 }
597 return;
598 }
599
600 /**/
601
SDP_Pair_update_starts(SDP_Pair * sdp_pair)602 static Boundary *SDP_Pair_update_starts(SDP_Pair *sdp_pair){
603 register gint i;
604 register SDP_Seed *seed;
605 g_assert(sdp_pair->seed_list->len);
606 /* Set max start scores low */
607 for(i = 0; i < sdp_pair->seed_list->len; i++){
608 seed = sdp_pair->seed_list->pdata[i];
609 seed->max_start->score = C4_IMPOSSIBLY_LOW_SCORE;
610 if(!sdp_pair->sdp->use_boundary){
611 g_assert(seed->max_start->cell);
612 STraceback_Cell_destroy(seed->max_start->cell,
613 sdp_pair->rev_straceback);
614 seed->max_start->cell = NULL;
615 }
616 }
617 return SDP_Pair_find_start_points(sdp_pair);
618 }
619
SDP_Pair_update_ends(SDP_Pair * sdp_pair)620 static void SDP_Pair_update_ends(SDP_Pair *sdp_pair){
621 register gint i;
622 register SDP_Seed *seed;
623 g_assert(sdp_pair->seed_list->len);
624 /* Set max end scores low */
625 for(i = 0; i < sdp_pair->seed_list->len; i++){
626 seed = sdp_pair->seed_list->pdata[i];
627 seed->max_end->score = C4_IMPOSSIBLY_LOW_SCORE;
628 if(seed->max_end->cell){
629 STraceback_Cell_destroy(seed->max_end->cell,
630 sdp_pair->fwd_straceback);
631 seed->max_end->cell = NULL;
632 }
633 }
634 SDP_Pair_find_end_points(sdp_pair);
635 return;
636 }
637
638 /**/
639
SDP_Pair_add_traceback(SDP_Pair * sdp_pair,SDP_Seed * best_seed,gboolean is_forward,Alignment * alignment)640 static void SDP_Pair_add_traceback(SDP_Pair *sdp_pair, SDP_Seed *best_seed,
641 gboolean is_forward,
642 Alignment *alignment){
643 register STraceback_List *stlist = NULL;
644 register gint i;
645 register STraceback_Operation *operation;
646 register AlignmentOperation *last;
647 register STraceback_Operation *first;
648 g_assert(best_seed);
649 if(is_forward){
650 g_assert(best_seed->max_end);
651 g_assert(best_seed->max_end->cell);
652 g_assert(best_seed->max_end->cell->transition->output
653 == sdp_pair->sdp->model->end_state->state);
654 stlist = STraceback_List_create(sdp_pair->fwd_straceback,
655 best_seed->max_end->cell);
656 if(!sdp_pair->sdp->use_boundary){ /* Check join is valid */
657 g_assert(alignment->operation_list->len);
658 last = alignment->operation_list->pdata
659 [alignment->operation_list->len-1];
660 g_assert(stlist->operation_list->len);
661 first = stlist->operation_list->pdata[0];
662 g_assert(last->transition->output == first->transition->output);
663 }
664 /* Include 1st operation only when using boundary */
665 for(i = sdp_pair->sdp->use_boundary?0:1;
666 i < stlist->operation_list->len; i++){
667 operation = stlist->operation_list->pdata[i];
668 Alignment_add(alignment, operation->transition,
669 operation->length);
670 }
671 } else {
672 g_assert(best_seed->max_start->cell);
673 g_assert(best_seed->max_start->cell->transition->input
674 == sdp_pair->sdp->model->start_state->state);
675 stlist = STraceback_List_create(sdp_pair->rev_straceback,
676 best_seed->max_start->cell);
677 /* Don't include last operation (to end state) */
678 for(i = stlist->operation_list->len-1; i >= 1; i--){
679 operation = stlist->operation_list->pdata[i];
680 Alignment_add(alignment, operation->transition,
681 operation->length);
682 }
683 }
684 STraceback_List_destroy(stlist);
685 return;
686 }
687
SDP_Seed_find_start(SDP_Seed * best_seed,C4_Model * model)688 static void SDP_Seed_find_start(SDP_Seed *best_seed, C4_Model *model){
689 register STraceback_Cell *stcell;
690 best_seed->max_start->query_pos
691 = best_seed->max_end->query_pos;
692 best_seed->max_start->target_pos
693 = best_seed->max_end->target_pos;
694 stcell = best_seed->max_end->cell;
695 g_assert(stcell);
696 do {
697 best_seed->max_start->query_pos
698 -= (stcell->transition->advance_query * stcell->length);
699 best_seed->max_start->target_pos
700 -= (stcell->transition->advance_target * stcell->length);
701 stcell = stcell->prev;
702 g_assert(stcell);
703 } while(stcell->transition->input != model->start_state->state);
704 return;
705 }
706
SDP_Pair_find_path(SDP_Pair * sdp_pair,SDP_Seed * best_seed,Boundary * boundary)707 static Alignment *SDP_Pair_find_path(SDP_Pair *sdp_pair,
708 SDP_Seed *best_seed,
709 Boundary *boundary){
710 register Region *alignment_region;
711 register Alignment *alignment;
712 /**/
713 if(sdp_pair->sdp->use_boundary)
714 SDP_Seed_find_start(best_seed, sdp_pair->sdp->model);
715 alignment_region = Region_create(best_seed->max_start->query_pos,
716 best_seed->max_start->target_pos,
717 best_seed->max_end->query_pos
718 -best_seed->max_start->query_pos,
719 best_seed->max_end->target_pos
720 -best_seed->max_start->target_pos);
721 alignment = Alignment_create(sdp_pair->sdp->model,
722 alignment_region, best_seed->max_end->score);
723 if(sdp_pair->sdp->use_boundary){
724 SDP_Pair_add_traceback(sdp_pair, best_seed, TRUE, alignment);
725 } else {
726 SDP_Pair_add_traceback(sdp_pair, best_seed, FALSE, alignment);
727 SDP_Pair_add_traceback(sdp_pair, best_seed, TRUE, alignment);
728 }
729 g_assert(Alignment_is_valid(alignment, alignment_region,
730 sdp_pair->user_data));
731 Region_destroy(alignment_region);
732 return alignment;
733 }
734
SDP_compare_SDP_Seed_by_score(const void * a,const void * b)735 static int SDP_compare_SDP_Seed_by_score(const void *a,
736 const void *b){
737 register SDP_Seed **seed_a = (SDP_Seed**)a,
738 **seed_b = (SDP_Seed**)b;
739 return (*seed_b)->max_end->score
740 - (*seed_a)->max_end->score;
741 }
742
SDP_Pair_next_path(SDP_Pair * sdp_pair,C4_Score threshold)743 Alignment *SDP_Pair_next_path(SDP_Pair *sdp_pair, C4_Score threshold){
744 register SDP_Seed *seed, *best_seed = NULL;
745 register Alignment *alignment = NULL;
746 register gint i;
747 /* Make the start and end points up to date */
748 if(sdp_pair->alignment_count){
749 if(!sdp_pair->sdp->sas->single_pass_subopt){ /* multipass */
750 if(sdp_pair->boundary)
751 Boundary_destroy(sdp_pair->boundary);
752 sdp_pair->boundary = SDP_Pair_update_starts(sdp_pair);
753 SDP_Pair_update_ends(sdp_pair);
754 }
755 } else {
756 g_assert(!sdp_pair->boundary);
757 sdp_pair->boundary = SDP_Pair_find_start_points(sdp_pair);
758 /* Boundary_print_gnuplot(sdp_pair->boundary, 1); */
759 SDP_Pair_find_end_points(sdp_pair);
760 if(sdp_pair->sdp->sas->single_pass_subopt){
761 sdp_pair->seed_list_by_score = g_new(gpointer,
762 sdp_pair->seed_list->len);
763 for(i = 0; i < sdp_pair->seed_list->len; i++)
764 sdp_pair->seed_list_by_score[i]
765 = sdp_pair->seed_list->pdata[i];
766 qsort(sdp_pair->seed_list_by_score,
767 sdp_pair->seed_list->len, sizeof(gpointer),
768 SDP_compare_SDP_Seed_by_score);
769 sdp_pair->single_pass_pos = 0;
770 }
771 }
772 /**/
773 if(sdp_pair->sdp->sas->single_pass_subopt){ /* singlepass */
774 best_seed = NULL;
775 while(sdp_pair->single_pass_pos < sdp_pair->seed_list->len){
776 best_seed = sdp_pair->seed_list_by_score
777 [sdp_pair->single_pass_pos++];
778 if(best_seed->max_end->score < threshold)
779 return NULL;
780 alignment = SDP_Pair_find_path(sdp_pair, best_seed,
781 sdp_pair->boundary);
782 if(SubOpt_overlaps_alignment(sdp_pair->subopt, alignment)){
783 Alignment_destroy(alignment);
784 best_seed = NULL;
785 } else {
786 break;
787 }
788 }
789 if(!best_seed)
790 return NULL;
791 /**/
792 } else { /* multipass */
793 g_assert(sdp_pair->seed_list->len);
794 best_seed = sdp_pair->seed_list->pdata[0];
795 for(i = 1; i < sdp_pair->seed_list->len; i++){
796 seed = sdp_pair->seed_list->pdata[i];
797 if(best_seed->max_end->score < seed->max_end->score)
798 best_seed = seed;
799 }
800 if(best_seed->max_end->score < threshold)
801 return NULL; /* Score below threshold */
802 alignment = SDP_Pair_find_path(sdp_pair, best_seed,
803 sdp_pair->boundary);
804 }
805 g_assert(best_seed);
806 g_assert(alignment);
807 /* Check score is less than previous */
808 g_assert((sdp_pair->last_score < 0)
809 || (best_seed->max_end->score <= sdp_pair->last_score));
810 sdp_pair->alignment_count++;
811 sdp_pair->last_score = best_seed->max_end->score;
812 best_seed->max_end->score = C4_IMPOSSIBLY_LOW_SCORE;
813 return alignment;
814 }
815
816 /**/
817
818