1 /****************************************************************\
2 * *
3 * GAM: Gapped Alignment Manager *
4 * *
5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
6 * Copyright (C) 2000-2009. All Rights Reserved. *
7 * *
8 * This source code is distributed under the terms of the *
9 * GNU General Public License, version 3. See the file COPYING *
10 * or http://www.gnu.org/licenses/gpl.txt for details *
11 * *
12 * If you use this code, please keep this notice intact. *
13 * *
14 \****************************************************************/
15
16 #include <stdio.h> /* For fopen() */
17 #include <stdlib.h> /* For qsort() */
18 #include <string.h> /* For strcmp() */
19 #include <unistd.h> /* For unlink(), getpid(), getppid() etc */
20
21 #include "gam.h"
22 #include "ungapped.h"
23 #include "opair.h"
24 #include "rangetree.h"
25
GAM_Argument_parse_Model_Type(gchar * arg_string,gpointer data)26 static gchar *GAM_Argument_parse_Model_Type(gchar *arg_string,
27 gpointer data){
28 gchar *type_str;
29 register gchar *ret_val = Argument_parse_string(arg_string,
30 &type_str);
31 register Model_Type *model_type = (Model_Type*)data;
32 if(ret_val)
33 return ret_val;
34 (*model_type) = Model_Type_from_string(type_str);
35 return NULL;
36 }
37
38 /**/
39
GAM_Refinement_to_string(GAM_Refinement refinement)40 static gchar *GAM_Refinement_to_string(GAM_Refinement refinement){
41 register gchar *name = NULL;
42 switch(refinement){
43 case GAM_Refinement_NONE:
44 name = "none";
45 break;
46 case GAM_Refinement_FULL:
47 name = "full";
48 break;
49 case GAM_Refinement_REGION:
50 name = "region";
51 break;
52 default:
53 g_error("Unknown GAM Refinement [%d]", refinement);
54 break;
55 }
56 return name;
57 }
58
GAM_Refinement_from_string(gchar * str)59 static GAM_Refinement GAM_Refinement_from_string(gchar *str){
60 gchar *name[GAM_Refinement_TOTAL] =
61 {"none", "full", "region"};
62 GAM_Refinement refinement[GAM_Refinement_TOTAL] =
63 {GAM_Refinement_NONE,
64 GAM_Refinement_FULL,
65 GAM_Refinement_REGION};
66 register gint i;
67 for(i = 0; i < GAM_Refinement_TOTAL; i++)
68 if(!g_strcasecmp(name[i], str))
69 return refinement[i];
70 g_error("Unknown refinement type [%s]", str);
71 return GAM_Refinement_NONE; /* Not reached */
72 }
73
GAM_Argument_parse_refinement(gchar * arg_string,gpointer data)74 static gchar *GAM_Argument_parse_refinement(gchar *arg_string,
75 gpointer data){
76 register GAM_Refinement *refinement
77 = (GAM_Refinement*)data;
78 gchar *refine_str;
79 register gchar *ret_val = Argument_parse_string(arg_string,
80 &refine_str);
81 if(ret_val)
82 return ret_val;
83 (*refinement) = GAM_Refinement_from_string(refine_str);
84 return NULL;
85 }
86
87 /**/
88
GAM_ArgumentSet_create(Argument * arg)89 GAM_ArgumentSet *GAM_ArgumentSet_create(Argument *arg){
90 register ArgumentSet *as;
91 static GAM_ArgumentSet gas;
92 if(arg){
93 as = ArgumentSet_create("Gapped Alignment Options");
94 ArgumentSet_add_option(as, 'm', "model", "alignment model",
95 "Specify alignment model type\n"
96 "Supported types:\n"
97 " ungapped ungapped:trans\n"
98 " affine:global affine:bestfit affine:local affine:overlap\n"
99 " est2genome ner protein2dna protein2genome\n"
100 " protein2dna:bestfit protein2genome:bestfit\n"
101 " coding2coding coding2genome cdna2genome genome2genome\n",
102 "ungapped",
103 GAM_Argument_parse_Model_Type, &gas.type);
104 ArgumentSet_add_option(as, 's', "score", "threshold",
105 "Score threshold for gapped alignment", "100",
106 Argument_parse_int, &gas.threshold);
107 ArgumentSet_add_option(as, '\0', "percent", "threshold",
108 "Percent self-score threshold", "0.0",
109 Argument_parse_float, &gas.percent_threshold);
110 ArgumentSet_add_option(as, 0, "showalignment", NULL,
111 "Include (human readable) alignment in results", "TRUE",
112 Argument_parse_boolean, &gas.show_alignment);
113 ArgumentSet_add_option(as, 0, "showsugar", NULL,
114 "Include 'sugar' format output in results", "FALSE",
115 Argument_parse_boolean, &gas.show_sugar);
116 ArgumentSet_add_option(as, 0, "showcigar", NULL,
117 "Include 'cigar' format output in results", "FALSE",
118 Argument_parse_boolean, &gas.show_cigar);
119 ArgumentSet_add_option(as, 0, "showvulgar", NULL,
120 "Include 'vulgar' format output in results", "TRUE",
121 Argument_parse_boolean, &gas.show_vulgar);
122 ArgumentSet_add_option(as, 0, "showquerygff", NULL,
123 "Include GFF output on query in results", "FALSE",
124 Argument_parse_boolean, &gas.show_query_gff);
125 ArgumentSet_add_option(as, 0, "showtargetgff", NULL,
126 "Include GFF output on target in results", "FALSE",
127 Argument_parse_boolean, &gas.show_target_gff);
128 ArgumentSet_add_option(as, 0, "ryo", "format",
129 "Roll-your-own printf-esque output format", "NULL",
130 Argument_parse_string, &gas.ryo);
131 ArgumentSet_add_option(as, 'n', "bestn", "number",
132 "Report best N results per query", "0",
133 Argument_parse_int, &gas.best_n);
134 ArgumentSet_add_option(as, 'S', "subopt", NULL,
135 "Search for suboptimal alignments", "TRUE",
136 Argument_parse_boolean, &gas.use_subopt);
137 ArgumentSet_add_option(as, 'g', "gappedextension", NULL,
138 "Use gapped extension (default is SDP)", "TRUE",
139 Argument_parse_boolean, &gas.use_gapped_extension);
140 /**/
141 ArgumentSet_add_option(as, '\0', "refine", NULL,
142 "Alignment refinement strategy [none|full|region]", "none",
143 GAM_Argument_parse_refinement, &gas.refinement);
144 ArgumentSet_add_option(as, '\0', "refineboundary", NULL,
145 "Refinement region boundary", "32",
146 Argument_parse_int, &gas.refinement_boundary);
147 /**/
148 Argument_absorb_ArgumentSet(arg, as);
149 }
150 return &gas;
151 }
152
153 /**/
154
GAM_StoredResult_compare(gpointer low,gpointer high,gpointer user_data)155 static gboolean GAM_StoredResult_compare(gpointer low,
156 gpointer high,
157 gpointer user_data){
158 register GAM_StoredResult *gsr_low = (GAM_StoredResult*)low,
159 *gsr_high = (GAM_StoredResult*)high;
160 return gsr_low->score < gsr_high->score;
161 }
162
163 static void GAM_display_alignment(GAM *gam,
164 Alignment *alignment, Sequence *query, Sequence *target,
165 gint result_id, gint rank,
166 gpointer user_data, gpointer self_data, FILE *fp);
167
GAM_StoredResult_create(GAM * gam,Sequence * query,Sequence * target,Alignment * alignment,gpointer user_data,gpointer self_data)168 static GAM_StoredResult *GAM_StoredResult_create(GAM *gam,
169 Sequence *query, Sequence *target,
170 Alignment *alignment,
171 gpointer user_data, gpointer self_data){
172 register GAM_StoredResult *gsr = g_new(GAM_StoredResult, 1);
173 gsr->score = alignment->score;
174 gsr->pos = ftell(gam->bestn_tmp_file);
175 GAM_display_alignment(gam, alignment, query, target,
176 0, -1, user_data, self_data, gam->bestn_tmp_file);
177 gsr->len = ftell(gam->bestn_tmp_file)
178 - gsr->pos;
179 return gsr;
180 }
181
GAM_StoredResult_destroy(GAM_StoredResult * gsr)182 static void GAM_StoredResult_destroy(GAM_StoredResult *gsr){
183 g_free(gsr);
184 return;
185 }
186 /* FIXME: optimisation:
187 * could store unused positions in the tmp file for reuse
188 */
189
GAM_StoredResult_display(GAM_StoredResult * gsr,GAM * gam,gint rank)190 static void GAM_StoredResult_display(GAM_StoredResult *gsr,
191 GAM *gam, gint rank){
192 register gint i, ch, tag_pos = 0;
193 register gchar *rank_tag = "%_EXONERATE_BESTN_RANK_%";
194 if(fseek(gam->bestn_tmp_file, gsr->pos, SEEK_SET))
195 g_error("Could not seek in tmp file");
196 for(i = 0; i < gsr->len; i++){
197 ch = getc(gam->bestn_tmp_file);
198 g_assert(ch != EOF);
199 if(ch == rank_tag[tag_pos]){
200 tag_pos++;
201 if(!rank_tag[tag_pos]){
202 printf("%d", rank);
203 tag_pos = 0;
204 }
205 } else {
206 if(tag_pos){
207 printf("%.*s", tag_pos, rank_tag);
208 tag_pos = 0;
209 }
210 putchar(ch);
211 }
212 }
213 fflush(stdout);
214 return;
215 }
216
217 /**/
218
GAM_compare_id(gconstpointer a,gconstpointer b)219 static gint GAM_compare_id(gconstpointer a, gconstpointer b){
220 register const gchar *id_a = a, *id_b = b;
221 return strcmp(id_a, id_b);
222 }
223
GAM_QueryResult_create(GAM * gam,gchar * query_id)224 static GAM_QueryResult *GAM_QueryResult_create(GAM *gam,
225 gchar *query_id){
226 register GAM_QueryResult *gqr = g_new(GAM_QueryResult, 1);
227 gqr->pq = PQueue_create(gam->pqueue_set,
228 GAM_StoredResult_compare, NULL);
229 gqr->query_id = g_strdup(query_id);
230 gqr->tie_count = 0;
231 gqr->tie_score = C4_IMPOSSIBLY_LOW_SCORE;
232 return gqr;
233 }
234
GAM_QueryResult_pqueue_destroy_GAM_Result(gpointer data,gpointer user_data)235 static void GAM_QueryResult_pqueue_destroy_GAM_Result(gpointer data,
236 gpointer user_data){
237 register GAM_StoredResult *gsr = data;
238 GAM_StoredResult_destroy(gsr);
239 return;
240 }
241
GAM_QueryResult_destroy(GAM_QueryResult * gqr)242 static void GAM_QueryResult_destroy(GAM_QueryResult *gqr){
243 PQueue_destroy(gqr->pq,
244 GAM_QueryResult_pqueue_destroy_GAM_Result, NULL);
245 g_free(gqr->query_id);
246 g_free(gqr);
247 return;
248 }
249
GAM_QueryResult_push(GAM_QueryResult * gqr,GAM_Result * gam_result,Alignment * alignment)250 static void GAM_QueryResult_push(GAM_QueryResult *gqr,
251 GAM_Result *gam_result,
252 Alignment *alignment){
253 register GAM_StoredResult *gsr = GAM_StoredResult_create(gam_result->gam,
254 gam_result->query,
255 gam_result->target,
256 alignment,
257 gam_result->user_data,
258 gam_result->self_data);
259 PQueue_push(gqr->pq, gsr);
260 return;
261 }
262
GAM_QueryResult_submit(GAM_QueryResult * gqr,GAM_Result * gam_result)263 static void GAM_QueryResult_submit(GAM_QueryResult *gqr,
264 GAM_Result *gam_result){
265 register GAM_StoredResult *gsr;
266 register Alignment *alignment;
267 register gint i;
268 register GPtrArray *tie_list = g_ptr_array_new();
269 g_assert(!strcmp(gqr->query_id, gam_result->query->id));
270 for(i = 0; i < gam_result->alignment_list->len; i++){
271 alignment = gam_result->alignment_list->pdata[i];
272 if(alignment->score == gqr->tie_score){
273 GAM_QueryResult_push(gqr, gam_result, alignment);
274 gqr->tie_count++;
275 } else if(alignment->score < gqr->tie_score){
276 if(PQueue_total(gqr->pq) < gam_result->gam->gas->best_n){
277 GAM_QueryResult_push(gqr, gam_result, alignment);
278 gqr->tie_count = 1;
279 gqr->tie_score = alignment->score;
280 } else {
281 break; /* Other alignments are worse */
282 }
283 } else { /* (alignment->score > gqr->tie_score) */
284 GAM_QueryResult_push(gqr, gam_result, alignment);
285 if((PQueue_total(gqr->pq)-gqr->tie_count)
286 >= gam_result->gam->gas->best_n){
287 /* Remove old ties */
288 while(gqr->tie_count){
289 gsr = PQueue_pop(gqr->pq);
290 GAM_StoredResult_destroy(gsr);
291 gqr->tie_count--;
292 }
293 /* Count new ties */
294 gsr = PQueue_top(gqr->pq);
295 gqr->tie_score = gsr->score;
296 do {
297 gsr = PQueue_top(gqr->pq);
298 if(gsr && (gsr->score == gqr->tie_score)){
299 gsr = PQueue_pop(gqr->pq);
300 g_ptr_array_add(tie_list, gsr);
301 } else {
302 break;
303 }
304 } while(TRUE);
305 gqr->tie_count = tie_list->len;
306 /* Replace new ties */
307 while(tie_list->len){
308 gsr = tie_list->pdata[tie_list->len-1];
309 PQueue_push(gqr->pq, gsr);
310 g_ptr_array_set_size(tie_list, tie_list->len-1);
311 }
312 } else {
313 if(PQueue_total(gqr->pq) == 1){ /* First alignment */
314 gqr->tie_count = 1;
315 gqr->tie_score = alignment->score;
316 }
317 }
318 }
319 }
320 g_ptr_array_free(tie_list, TRUE);
321 return;
322 }
323
324 /**/
325
GAM_QueryResult_report_traverse_func(gpointer data,gpointer user_data)326 static gboolean GAM_QueryResult_report_traverse_func(gpointer data,
327 gpointer user_data){
328 register GAM_StoredResult *gsr = data;
329 register GPtrArray *result_list = user_data;
330 g_ptr_array_add(result_list, gsr);
331 return FALSE;
332 }
333
GAM_QueryResult_report_sort_func(const void * a,const void * b)334 static int GAM_QueryResult_report_sort_func(const void *a,
335 const void *b){
336 register GAM_StoredResult **gsr_a = (GAM_StoredResult**)a,
337 **gsr_b = (GAM_StoredResult**)b;
338 return (*gsr_a)->score - (*gsr_b)->score;
339 }
340
GAM_QueryResult_report(GAM_QueryResult * gqr,GAM * gam)341 static void GAM_QueryResult_report(GAM_QueryResult *gqr, GAM *gam){
342 register GPtrArray *result_list = g_ptr_array_new();
343 register gint i;
344 register GAM_StoredResult *gsr;
345 if(gam->verbosity > 2)
346 g_message("Reporting [%d/%d] results for query [%s]",
347 PQueue_total(gqr->pq), gam->gas->best_n,
348 gqr->query_id);
349 PQueue_traverse(gqr->pq, GAM_QueryResult_report_traverse_func,
350 result_list);
351 qsort(result_list->pdata, result_list->len,
352 sizeof(gpointer), GAM_QueryResult_report_sort_func);
353 for(i = result_list->len-1; i >= 0; i--){
354 gsr = result_list->pdata[i];
355 GAM_StoredResult_display(gsr, gam, result_list->len-i);
356 }
357 g_ptr_array_free(result_list, TRUE);
358 return;
359 }
360
361 /**/
362
363 #define Combined_Alphabet_Type(qt,tt) ((qt) << 8 | (tt))
364
GAM_get_tmp_file_handler(void)365 static FILE *GAM_get_tmp_file_handler(void){
366 register gchar *path;
367 register FILE *fp;
368 gchar hostname[1024];
369 gethostname(hostname, 1024);
370 path = g_strdup_printf("%s%cxnr8_%s_%d_%d.txt",
371 g_get_tmp_dir(),
372 G_DIR_SEPARATOR,
373 hostname,
374 (gint)getppid(),
375 (gint)getpid());
376 fp = fopen(path, "w+");
377 if(!fp)
378 g_error("Could not write to tmp bestn file [%s]", path);
379 unlink(path); /* tmp file deleted now, as will not be re-opened */
380 g_free(path);
381 return fp;
382 }
383
GAM_build_match_list(C4_Model * model)384 static GPtrArray *GAM_build_match_list(C4_Model *model){
385 register GPtrArray *match_list = g_ptr_array_new();
386 Match *match_array[Match_Type_TOTAL] = {0};
387 register GPtrArray *match_transition_list
388 = C4_Model_select_transitions(model, C4_Label_MATCH);
389 register gint i;
390 register C4_Transition *transition;
391 register Match *match;
392 for(i = 0; i < match_transition_list->len; i++){
393 transition = match_transition_list->pdata[i];
394 g_assert(transition->label == C4_Label_MATCH);
395 g_assert(transition->label_data);
396 match = transition->label_data;
397 if(!match_array[match->type]){
398 match_array[match->type] = match;
399 g_ptr_array_add(match_list, match);
400 }
401 }
402 g_assert(match_list->len);
403 g_ptr_array_free(match_transition_list, TRUE);
404 return match_list;
405 }
406
GAM_create(Alphabet_Type query_type,Alphabet_Type target_type,Submat * dna_submat,Submat * protein_submat,Translate * translate,gboolean use_exhaustive,gint verbosity)407 GAM *GAM_create(Alphabet_Type query_type, Alphabet_Type target_type,
408 Submat *dna_submat, Submat *protein_submat,
409 Translate *translate, gboolean use_exhaustive,
410 gint verbosity){
411 register GAM *gam = g_new0(GAM, 1);
412 register gint i;
413 register C4_Span *span;
414 gam->thread_ref = ThreadRef_create();
415 gam->dna_submat = Submat_share(dna_submat);
416 gam->protein_submat = Submat_share(protein_submat);
417 gam->translate = Translate_share(translate);
418 gam->gas = GAM_ArgumentSet_create(NULL);
419 if(use_exhaustive && gam->gas->use_subopt)
420 g_warning("Exhaustively generating suboptimal alignments"
421 " will be VERY SLOW: use -S no");
422 gam->query_type = query_type;
423 gam->target_type = target_type;
424 if(gam->gas->best_n){
425 gam->bestn_tree = g_tree_new(GAM_compare_id);
426 gam->bestn_tmp_file = GAM_get_tmp_file_handler();
427 gam->pqueue_set = PQueueSet_create();
428 }
429 if(gam->gas->percent_threshold)
430 gam->percent_threshold_tree = g_tree_new(GAM_compare_id);
431 gam->translate_both = Model_Type_translate_both(gam->gas->type);
432 gam->dual_match = Model_Type_has_dual_match(gam->gas->type);
433 gam->model = Model_Type_get_model(gam->gas->type,
434 query_type, target_type);
435 if((!use_exhaustive) && (!C4_Model_is_local(gam->model)))
436 g_error("Cannot perform heuristic alignments using non-local models: use -E");
437 gam->match_list = GAM_build_match_list(gam->model);
438 if(use_exhaustive){
439 gam->optimal = Optimal_create(gam->model, NULL,
440 Optimal_Type_SCORE
441 |Optimal_Type_PATH
442 |Optimal_Type_REDUCED_SPACE, TRUE);
443 if(gam->gas->refinement != GAM_Refinement_NONE)
444 g_error("Exhaustive alignments cannot be refined");
445 } else {
446 if(gam->gas->refinement != GAM_Refinement_NONE){
447 gam->optimal = Optimal_create(gam->model, NULL,
448 Optimal_Type_SCORE
449 |Optimal_Type_PATH
450 |Optimal_Type_REDUCED_SPACE, TRUE);
451 }
452 if(Model_Type_is_gapped(gam->gas->type)){
453 if(gam->gas->use_gapped_extension){
454 gam->sdp = SDP_create(gam->model);
455 } else {
456 gam->heuristic = Heuristic_create(gam->model);
457 }
458 }
459 }
460 gam->verbosity = verbosity;
461 /* Find max_{query,target}_span */
462 gam->max_query_span = gam->max_target_span = 0;
463 for(i = 0; i < gam->model->span_list->len; i++){
464 span = gam->model->span_list->pdata[i];
465 if(gam->max_query_span < span->max_query)
466 gam->max_query_span = span->max_query;
467 if(gam->max_target_span < span->max_target)
468 gam->max_target_span = span->max_target;
469 }
470 #ifdef USE_PTHREADS
471 pthread_mutex_init(&gam->gam_lock, NULL);
472 #endif /* USE_PTHREADS */
473 return gam;
474 }
475
GAM_share(GAM * gam)476 GAM *GAM_share(GAM *gam){
477 g_assert(gam);
478 ThreadRef_share(gam->thread_ref);
479 return gam;
480 }
481
482 /**/
483
GAM_QueryInfo_create(Sequence * query,GAM * gam)484 static GAM_QueryInfo *GAM_QueryInfo_create(Sequence *query, GAM *gam){
485 register GAM_QueryInfo *gqi = g_new(GAM_QueryInfo, 1);
486 register gint i, j;
487 register Match *match;
488 register Match_Score th;
489 gqi->query_id = g_strdup(query->id);
490 gqi->threshold = 0;
491 /* Calculate best threshold for each query match */
492 for(i = 0; i < gam->match_list->len; i++){
493 match = gam->match_list->pdata[i];
494 th = 0;
495 for(j = 0; j < query->len; j += match->query->advance)
496 th += match->query->self_func(match->query, query, j);
497 if(gqi->threshold < th)
498 gqi->threshold = th;
499 }
500 gqi->threshold *= gam->gas->percent_threshold;
501 gqi->threshold /= 100;
502 if(gqi->threshold < gam->gas->threshold)
503 gqi->threshold = gam->gas->threshold;
504 return gqi;
505 }
506
GAM_QueryInfo_destroy(GAM_QueryInfo * gqi)507 static void GAM_QueryInfo_destroy(GAM_QueryInfo *gqi){
508 g_free(gqi->query_id);
509 g_free(gqi);
510 return;
511 }
512
513 /**/
514
GAM_tree_collect_values(gpointer key,gpointer value,gpointer data)515 static gint GAM_tree_collect_values(gpointer key,
516 gpointer value,
517 gpointer data){
518 register GPtrArray *list = data;
519 g_ptr_array_add(list, value);
520 return FALSE;
521 }
522
GAM_bestn_tree_destroy(GTree * bestn_tree)523 static void GAM_bestn_tree_destroy(GTree *bestn_tree){
524 register GPtrArray *query_data_list = g_ptr_array_new();
525 register gint i;
526 g_tree_traverse(bestn_tree, GAM_tree_collect_values,
527 G_IN_ORDER, query_data_list);
528 g_tree_destroy(bestn_tree);
529 for(i = 0; i < query_data_list->len; i++)
530 GAM_QueryResult_destroy(query_data_list->pdata[i]);
531 g_ptr_array_free(query_data_list, TRUE);
532 return;
533 }
534
GAM_percent_threshold_tree_destroy(GTree * percent_threshold_tree)535 static void GAM_percent_threshold_tree_destroy(
536 GTree *percent_threshold_tree){
537 register GPtrArray *query_info_list = g_ptr_array_new();
538 register gint i;
539 g_tree_traverse(percent_threshold_tree, GAM_tree_collect_values,
540 G_IN_ORDER, query_info_list);
541 g_tree_destroy(percent_threshold_tree);
542 for(i = 0; i < query_info_list->len; i++)
543 GAM_QueryInfo_destroy(query_info_list->pdata[i]);
544 g_ptr_array_free(query_info_list, TRUE);
545 return;
546 }
547
GAM_destroy(GAM * gam)548 void GAM_destroy(GAM *gam){
549 g_assert(gam);
550 if(ThreadRef_destroy(gam->thread_ref))
551 return;
552 #ifdef USE_PTHREADS
553 pthread_mutex_destroy(&gam->gam_lock);
554 #endif /* USE_PTHREADS */
555 g_assert(gam->model);
556 g_ptr_array_free(gam->match_list, TRUE);
557 C4_Model_destroy(gam->model);
558 if(gam->optimal)
559 Optimal_destroy(gam->optimal);
560 if(gam->heuristic)
561 Heuristic_destroy(gam->heuristic);
562 if(gam->sdp)
563 SDP_destroy(gam->sdp);
564 if(gam->translate)
565 Translate_destroy(gam->translate);
566 if(gam->bestn_tree)
567 GAM_bestn_tree_destroy(gam->bestn_tree);
568 if(gam->percent_threshold_tree)
569 GAM_percent_threshold_tree_destroy(gam->percent_threshold_tree);
570 if(gam->bestn_tmp_file)
571 fclose(gam->bestn_tmp_file);
572 if(gam->pqueue_set)
573 PQueueSet_destroy(gam->pqueue_set);
574 g_free(gam);
575 return;
576 }
577
GAM_bestn_tree_report_traverse(gpointer key,gpointer value,gpointer data)578 static gint GAM_bestn_tree_report_traverse(gpointer key,
579 gpointer value,
580 gpointer data){
581 register GAM_QueryResult *gqr = value;
582 register GAM *gam = data;
583 GAM_QueryResult_report(gqr, gam);
584 return FALSE;
585 }
586
GAM_report(GAM * gam)587 void GAM_report(GAM *gam){
588 if(gam->bestn_tree)
589 g_tree_traverse(gam->bestn_tree, GAM_bestn_tree_report_traverse,
590 G_IN_ORDER, gam);
591 return;
592 }
593
594 /**/
595
GAM_Pair_find_portal(C4_Model * model,HSPset * hspset)596 static C4_Portal *GAM_Pair_find_portal(C4_Model *model,
597 HSPset *hspset){
598 register gint i;
599 register C4_Portal *portal = NULL;
600 register C4_Transition *transition;
601 for(i = 0; i < model->portal_list->len; i++){
602 g_assert(model->portal_list);
603 portal = model->portal_list->pdata[i];
604 g_assert(portal->transition_list->len);
605 transition = portal->transition_list->pdata[0];
606 g_assert(transition);
607 if((transition->advance_query
608 == hspset->param->match->query->advance)
609 && (transition->advance_target
610 == hspset->param->match->target->advance)){
611 return portal;
612 }
613 }
614 if(!portal)
615 g_error("No compatible portal found for hspset");
616 return portal;
617 }
618
GAM_Result_create(GAM * gam,Sequence * query,Sequence * target)619 static GAM_Result *GAM_Result_create(GAM *gam,
620 Sequence *query,
621 Sequence *target){
622 register GAM_Result *gam_result = g_new(GAM_Result, 1);
623 g_assert(gam);
624 g_assert(query);
625 g_assert(target);
626 g_assert(query->alphabet->type == gam->query_type);
627 g_assert(target->alphabet->type == gam->target_type);
628 gam_result->ref_count = 1;
629 gam_result->gam = GAM_share(gam);
630 gam_result->alignment_list = NULL;
631 gam_result->query = Sequence_share(query);
632 gam_result->target = Sequence_share(target);
633 gam_result->user_data = Model_Type_create_data(gam->gas->type,
634 query, target);
635 gam_result->self_data = Model_Type_create_data(gam->gas->type,
636 query, query);
637 gam_result->subopt = SubOpt_create(query->len, target->len);
638 return gam_result;
639 }
640
GAM_Result_refine_alignment(GAM_Result * gam_result,Alignment * alignment)641 static Alignment *GAM_Result_refine_alignment(GAM_Result *gam_result,
642 Alignment *alignment){
643 register Alignment *refined_alignment = NULL;
644 register Region *region;
645 register gint query_region_start, target_region_start;
646 g_assert(gam_result->gam->optimal);
647 if(gam_result->gam->verbosity > 1)
648 g_message("Refining alignment ... (%d)", alignment->score);
649 switch(gam_result->gam->gas->refinement){
650 case GAM_Refinement_FULL:
651 region = Region_create(0, 0, gam_result->query->len,
652 gam_result->target->len);
653 refined_alignment = Optimal_find_path(
654 gam_result->gam->optimal, region,
655 gam_result->user_data, alignment->score, gam_result->subopt);
656 g_assert(refined_alignment);
657 Region_destroy(region);
658 break;
659 case GAM_Refinement_REGION:
660 query_region_start
661 = MAX(0, alignment->region->query_start
662 - gam_result->gam->gas->refinement_boundary);
663 target_region_start
664 = MAX(0, alignment->region->target_start
665 - gam_result->gam->gas->refinement_boundary);
666 region = Region_create(query_region_start,
667 target_region_start,
668 MIN(gam_result->query->len,
669 Region_query_end(alignment->region)
670 + gam_result->gam->gas->refinement_boundary)
671 - query_region_start,
672 MIN(gam_result->target->len,
673 Region_target_end(alignment->region)
674 + gam_result->gam->gas->refinement_boundary)
675 - target_region_start);
676 refined_alignment = Optimal_find_path(
677 gam_result->gam->optimal, region,
678 gam_result->user_data, alignment->score, gam_result->subopt);
679 g_assert(refined_alignment);
680 Region_destroy(region);
681 break;
682 default:
683 g_error("Bad type for refinement [%s]",
684 GAM_Refinement_to_string(gam_result->gam->gas->refinement));
685 break;
686 }
687 g_assert(refined_alignment);
688 g_assert(refined_alignment->score >= alignment->score);
689 if(gam_result->gam->verbosity > 1)
690 g_message("Refined alignment score [%d]", refined_alignment->score);
691 return refined_alignment;
692 }
693
GAM_Result_add_alignment(GAM_Result * gam_result,Alignment * alignment,C4_Score threshold)694 static void GAM_Result_add_alignment(GAM_Result *gam_result,
695 Alignment *alignment,
696 C4_Score threshold){
697 register Alignment *refined_alignment;
698 if(!gam_result->alignment_list)
699 gam_result->alignment_list = g_ptr_array_new();
700 if(gam_result->gam->gas->refinement != GAM_Refinement_NONE){
701 refined_alignment = GAM_Result_refine_alignment(gam_result, alignment);
702 Alignment_destroy(alignment);
703 alignment = refined_alignment;
704 /* Refinement may put alignment below threshold */
705 if(alignment->score < threshold){
706 Alignment_destroy(alignment);
707 return;
708 }
709 }
710 g_ptr_array_add(gam_result->alignment_list, alignment);
711 SubOpt_add_alignment(gam_result->subopt, alignment);
712 return;
713 }
714
GAM_get_query_threshold(GAM * gam,Sequence * query)715 static C4_Score GAM_get_query_threshold(GAM *gam, Sequence *query){
716 register GAM_QueryResult *gqr;
717 register GAM_StoredResult *gsr;
718 register GAM_QueryInfo *gqi;
719 if(gam->gas->best_n){
720 gqr = g_tree_lookup(gam->bestn_tree, query->id);
721 if(gqr && (PQueue_total(gqr->pq) >= gam->gas->best_n)){
722 gsr = PQueue_top(gqr->pq);
723 if(gam->verbosity > 2)
724 g_message("Using threshold [%d] for query [%s]",
725 gsr->score, query->id);
726 return gsr->score;
727 }
728 }
729 if(gam->gas->percent_threshold){
730 gqi = g_tree_lookup(gam->percent_threshold_tree, query->id);
731 if(!gqi){
732 gqi = GAM_QueryInfo_create(query, gam);
733 g_tree_insert(gam->percent_threshold_tree, gqi->query_id,
734 gqi);
735 }
736 return gqi->threshold;
737 }
738 return gam->gas->threshold;
739 }
740
GAM_Result_ungapped_create_sort_func(const void * a,const void * b)741 static int GAM_Result_ungapped_create_sort_func(const void *a,
742 const void *b){
743 register Alignment **alignment_a = (Alignment**)a,
744 **alignment_b = (Alignment**)b;
745 return (*alignment_b)->score - (*alignment_a)->score;
746 }
747
GAM_Result_ungapped_add_HSPset(GAM_Result * gam_result,C4_Model * model,HSPset * hspset)748 static void GAM_Result_ungapped_add_HSPset(GAM_Result *gam_result,
749 C4_Model *model,
750 HSPset *hspset){
751 register gint i;
752 register C4_Score threshold;
753 register Alignment *alignment;
754 register HSP *hsp;
755 HSPset_filter_ungapped(hspset);
756 for(i = 0; i < hspset->hsp_list->len; i++){
757 hsp = hspset->hsp_list->pdata[i];
758 threshold = GAM_get_query_threshold(gam_result->gam, hspset->query);
759 if(hsp->score >= threshold){
760 alignment = Ungapped_Alignment_create(model,
761 gam_result->user_data,
762 hsp);
763 g_assert(alignment);
764 GAM_Result_add_alignment(gam_result, alignment, threshold);
765 }
766 }
767 return;
768 }
769
GAM_Result_ungapped_create(GAM * gam,Comparison * comparison)770 GAM_Result *GAM_Result_ungapped_create(GAM *gam,
771 Comparison *comparison){
772 register GAM_Result *gam_result;
773 g_assert(comparison);
774 if(!Comparison_has_hsps(comparison))
775 return NULL;
776 gam_result = GAM_Result_create(gam, comparison->query,
777 comparison->target);
778 /**/
779 if(comparison->dna_hspset)
780 GAM_Result_ungapped_add_HSPset(gam_result, gam->model,
781 comparison->dna_hspset);
782 if(comparison->protein_hspset)
783 GAM_Result_ungapped_add_HSPset(gam_result, gam->model,
784 comparison->protein_hspset);
785 if(comparison->codon_hspset)
786 GAM_Result_ungapped_add_HSPset(gam_result, gam->model,
787 comparison->codon_hspset);
788 /**/
789 if(!gam_result->alignment_list){
790 GAM_Result_destroy(gam_result);
791 return NULL;
792 }
793 /* Sort results, best scores first */
794 qsort(gam_result->alignment_list->pdata,
795 gam_result->alignment_list->len,
796 sizeof(gpointer), GAM_Result_ungapped_create_sort_func);
797 return gam_result;
798 }
799
800 /**/
801
GAM_Result_BSDP_add_HSPset(HSPset * hspset,GAM * gam,HPair * hpair)802 static void GAM_Result_BSDP_add_HSPset(HSPset *hspset, GAM *gam,
803 HPair *hpair){
804 register C4_Portal *portal;
805 portal = GAM_Pair_find_portal(gam->model, hspset);
806 HPair_add_hspset(hpair, portal, hspset);
807 if(gam->verbosity > 2)
808 g_message("Added [%d] hsps to HPair",
809 hspset->hsp_list->len);
810 return;
811 }
812
GAM_Result_is_full(GAM_Result * gam_result)813 static gboolean GAM_Result_is_full(GAM_Result *gam_result){
814 register Alignment *penult, *last;
815 if((gam_result->gam->gas->best_n)
816 && (gam_result->alignment_list->len >= gam_result->gam->gas->best_n)
817 && (gam_result->alignment_list->len > 1)){
818 penult = gam_result->alignment_list->pdata
819 [gam_result->alignment_list->len-2];
820 last = gam_result->alignment_list->pdata
821 [gam_result->alignment_list->len-1];
822 if(penult->score != last->score)
823 return TRUE;
824 }
825 return FALSE;
826 }
827 /* GAM_Result is only full when best_n threshold has been met
828 * and all tie-breakers have been admitted into the GAM_Result
829 */
830
GAM_Result_BSDP_create(GAM * gam,Comparison * comparison)831 static GAM_Result *GAM_Result_BSDP_create(GAM *gam,
832 Comparison *comparison){
833 register HPair *hpair;
834 register Alignment *alignment;
835 register C4_Score threshold
836 = GAM_get_query_threshold(gam, comparison->query);
837 register GAM_Result *gam_result = GAM_Result_create(gam,
838 comparison->query,
839 comparison->target);
840 g_assert(gam->heuristic);
841 if(gam->verbosity > 2)
842 g_message("Preparing HPair for [%s][%s]",
843 comparison->query->id, comparison->target->id);
844 hpair = HPair_create(gam->heuristic, gam_result->subopt,
845 comparison->query->len,
846 comparison->target->len,
847 gam->verbosity, gam_result->user_data);
848 /**/
849 g_assert(comparison);
850 g_assert(Comparison_has_hsps(comparison));
851 if(comparison->dna_hspset)
852 GAM_Result_BSDP_add_HSPset(comparison->dna_hspset, gam, hpair);
853 if(comparison->protein_hspset)
854 GAM_Result_BSDP_add_HSPset(comparison->protein_hspset, gam,
855 hpair);
856 if(comparison->codon_hspset)
857 GAM_Result_BSDP_add_HSPset(comparison->codon_hspset, gam,
858 hpair);
859 HPair_finalise(hpair, threshold);
860 if(gam->verbosity > 2)
861 g_message("Finalised HPair");
862 do {
863 threshold = GAM_get_query_threshold(gam, comparison->query);
864 alignment = HPair_next_path(hpair, threshold);
865 if(!alignment)
866 break;
867 g_assert(alignment->score >= threshold);
868 if(gam->verbosity > 2)
869 g_message("Found BSDP alignment score [%d]",
870 alignment->score);
871 GAM_Result_add_alignment(gam_result, alignment, threshold);
872 if(GAM_Result_is_full(gam_result))
873 break;
874 } while(gam->gas->use_subopt);
875 HPair_destroy(hpair);
876 if(!gam_result->alignment_list){ /* No alignments */
877 GAM_Result_destroy(gam_result);
878 return NULL;
879 }
880 return gam_result;
881 }
882 /* FIXME: optimisation: change to update threshold between
883 * suboptimal alignments.
884 */
885
GAM_Result_SDP_create(GAM * gam,Comparison * comparison)886 static GAM_Result *GAM_Result_SDP_create(GAM *gam,
887 Comparison *comparison){
888 register SDP_Pair *sdp_pair;
889 register Alignment *alignment;
890 register C4_Score threshold;
891 register GAM_Result *gam_result = GAM_Result_create(gam,
892 comparison->query,
893 comparison->target);
894 if(gam->verbosity > 2)
895 g_message("Preparing SDP_Pair for [%s][%s]",
896 comparison->query->id, comparison->target->id);
897 g_assert(gam);
898 g_assert(gam->sdp);
899 g_assert(comparison);
900 g_assert(Comparison_has_hsps(comparison));
901 sdp_pair = SDP_Pair_create(gam->sdp, gam_result->subopt,
902 comparison, gam_result->user_data);
903 do {
904 threshold = GAM_get_query_threshold(gam, comparison->query);
905 alignment = SDP_Pair_next_path(sdp_pair, threshold);
906 if(!alignment)
907 break;
908 g_assert(alignment->score >= threshold);
909 if(gam->verbosity > 2)
910 g_message("Found SDP alignment score [%d]",
911 alignment->score);
912 GAM_Result_add_alignment(gam_result, alignment, threshold);
913 if(GAM_Result_is_full(gam_result))
914 break;
915 } while(gam->gas->use_subopt);
916 SDP_Pair_destroy(sdp_pair);
917 if(!gam_result->alignment_list){ /* No alignments */
918 GAM_Result_destroy(gam_result);
919 return NULL;
920 }
921 return gam_result;
922 }
923
924 /**/
925
926 typedef struct {
927 gboolean *fwd_keep;
928 gboolean *rev_keep;
929 GPtrArray *hsp_list;
930 RangeTree *rangetree;
931 HSP *max_cobs_hsp;
932 GAM *gam;
933 } GAM_Geneseed_Data;
934
GAM_Result_geneseed_add(HSPset * hspset,GAM_Geneseed_Data * gsd)935 static void GAM_Result_geneseed_add(HSPset *hspset, GAM_Geneseed_Data *gsd){
936 register gint i;
937 register HSP *hsp;
938 if(!hspset)
939 return;
940 for(i = 0; i < hspset->hsp_list->len; i++){
941 hsp = hspset->hsp_list->pdata[i];
942 if(!RangeTree_check_pos(gsd->rangetree,
943 HSP_query_cobs(hsp), HSP_target_cobs(hsp)))
944 RangeTree_add(gsd->rangetree,
945 HSP_query_cobs(hsp), HSP_target_cobs(hsp),
946 GINT_TO_POINTER(gsd->hsp_list->len));
947 g_ptr_array_add(gsd->hsp_list, hsp);
948 if((!gsd->max_cobs_hsp) || (gsd->max_cobs_hsp->cobs < hsp->cobs))
949 gsd->max_cobs_hsp = hsp;
950 }
951 return;
952 }
953
954 static void GAM_Result_geneseed_visit_hsp_fwd(GAM_Geneseed_Data *gsd,
955 gint hsp_id);
956 static void GAM_Result_geneseed_visit_hsp_rev(GAM_Geneseed_Data *gsd,
957 gint hsp_id);
958
GAM_Result_geneseed_report_fwd_func(gint x,gint y,gpointer info,gpointer user_data)959 static gboolean GAM_Result_geneseed_report_fwd_func(gint x, gint y,
960 gpointer info, gpointer user_data){
961 register GAM_Geneseed_Data *gsd = user_data;
962 register gint hsp_id = GPOINTER_TO_INT(info);
963 GAM_Result_geneseed_visit_hsp_fwd(gsd, hsp_id);
964 return FALSE;
965 }
966
GAM_Result_geneseed_report_rev_func(gint x,gint y,gpointer info,gpointer user_data)967 static gboolean GAM_Result_geneseed_report_rev_func(gint x, gint y,
968 gpointer info, gpointer user_data){
969 register GAM_Geneseed_Data *gsd = user_data;
970 register gint hsp_id = GPOINTER_TO_INT(info);
971 GAM_Result_geneseed_visit_hsp_rev(gsd, hsp_id);
972 return FALSE;
973 }
974
GAM_Result_geneseed_visit_hsp_fwd(GAM_Geneseed_Data * gsd,gint hsp_id)975 static void GAM_Result_geneseed_visit_hsp_fwd(GAM_Geneseed_Data *gsd,
976 gint hsp_id){
977 register HSP *hsp;
978 register gint query_range, target_range;
979 g_assert(hsp_id < gsd->hsp_list->len);
980 if(!gsd->fwd_keep[hsp_id]){
981 gsd->fwd_keep[hsp_id] = TRUE;
982 hsp = gsd->hsp_list->pdata[hsp_id];
983 query_range = gsd->gam->max_query_span
984 + (((HSP_query_end(hsp) - HSP_query_cobs(hsp))
985 + (HSP_query_cobs(gsd->max_cobs_hsp)
986 - gsd->max_cobs_hsp->query_start)) * 2);
987 target_range = gsd->gam->max_target_span
988 + (((HSP_target_end(hsp) - HSP_target_cobs(hsp))
989 + (HSP_target_cobs(gsd->max_cobs_hsp)
990 - gsd->max_cobs_hsp->target_start)) * 2);
991 /* Search forwards */
992 /*
993 g_message("find fwd (%d,%d,%d) (%d,%d,%d) [%d,%d]",
994 (HSP_query_end(hsp) - HSP_query_cobs(hsp)),
995 gsd->gam->max_query_span,
996 (HSP_query_cobs(gsd->max_cobs_hsp)
997 - gsd->max_cobs_hsp->query_start),
998 (HSP_target_end(hsp) - HSP_target_cobs(hsp)),
999 gsd->gam->max_target_span,
1000 (HSP_target_cobs(gsd->max_cobs_hsp)
1001 - gsd->max_cobs_hsp->target_start),
1002 query_range, target_range);
1003 g_message("RangeTree_find fwd");
1004 */
1005 RangeTree_find(gsd->rangetree,
1006 HSP_query_cobs(hsp), query_range,
1007 HSP_target_cobs(hsp), target_range,
1008 GAM_Result_geneseed_report_fwd_func, gsd);
1009 }
1010 return;
1011 }
1012
GAM_Result_geneseed_visit_hsp_rev(GAM_Geneseed_Data * gsd,gint hsp_id)1013 static void GAM_Result_geneseed_visit_hsp_rev(GAM_Geneseed_Data *gsd,
1014 gint hsp_id){
1015 register HSP *hsp;
1016 register gint query_range, target_range;
1017 g_assert(hsp_id < gsd->hsp_list->len);
1018 if(!gsd->rev_keep[hsp_id]){
1019 gsd->rev_keep[hsp_id] = TRUE;
1020 hsp = gsd->hsp_list->pdata[hsp_id];
1021 query_range = gsd->gam->max_query_span
1022 + (((HSP_query_end(hsp) - HSP_query_cobs(hsp))
1023 + (HSP_query_cobs(gsd->max_cobs_hsp)
1024 - gsd->max_cobs_hsp->query_start)) * 2);
1025 target_range = gsd->gam->max_target_span
1026 + (((HSP_target_end(hsp) - HSP_target_cobs(hsp))
1027 + (HSP_target_cobs(gsd->max_cobs_hsp)
1028 - gsd->max_cobs_hsp->target_start)) * 2);
1029 /*
1030 g_message("find rev (%d,%d,%d) (%d,%d,%d) [%d,%d]",
1031 (HSP_query_end(hsp) - HSP_query_cobs(hsp)),
1032 gsd->gam->max_query_span,
1033 (HSP_query_cobs(gsd->max_cobs_hsp)
1034 - gsd->max_cobs_hsp->query_start),
1035 (HSP_target_end(hsp) - HSP_target_cobs(hsp)),
1036 gsd->gam->max_target_span,
1037 (HSP_target_cobs(gsd->max_cobs_hsp)
1038 - gsd->max_cobs_hsp->target_start),
1039 query_range, target_range);
1040 g_message("RangeTree_find rev");
1041 */
1042 /* Search backwards */
1043 RangeTree_find(gsd->rangetree,
1044 HSP_query_cobs(hsp)-query_range, query_range,
1045 HSP_target_cobs(hsp)-target_range, target_range,
1046 GAM_Result_geneseed_report_rev_func, gsd);
1047 }
1048 return;
1049 }
1050
GAM_Result_geneseed_select(HSPset * hspset,gboolean * fwd_keep,gboolean * rev_keep,gint hsp_id)1051 static gint GAM_Result_geneseed_select(HSPset *hspset,
1052 gboolean *fwd_keep,
1053 gboolean *rev_keep,
1054 gint hsp_id){
1055 register gint i;
1056 register GPtrArray *keep_list = g_ptr_array_new();
1057 register HSP *hsp;
1058 if(!hspset)
1059 return hsp_id;
1060 keep_list = g_ptr_array_new();
1061 for(i = 0; i < hspset->hsp_list->len; i++){
1062 hsp = hspset->hsp_list->pdata[i];
1063 if(fwd_keep[hsp_id] || rev_keep[hsp_id]){
1064 g_ptr_array_add(keep_list, hsp);
1065 } else {
1066 HSP_destroy(hsp);
1067 }
1068 hsp_id++;
1069 }
1070 /*
1071 g_message("geneseed selected [%d] from [%d]", keep_list->len,
1072 hspset->hsp_list->len);
1073 */
1074 g_ptr_array_free(hspset->hsp_list, TRUE);
1075 hspset->hsp_list = keep_list;
1076 return hsp_id;
1077 }
1078
GAM_Result_geneseed_filter(GAM * gam,Comparison * comparison)1079 static void GAM_Result_geneseed_filter(GAM *gam, Comparison *comparison){
1080 register gint i, hsp_id = 0;
1081 register HSP *hsp;
1082 GAM_Geneseed_Data gsd;
1083 gsd.hsp_list = g_ptr_array_new();
1084 gsd.rangetree = RangeTree_create();
1085 gsd.max_cobs_hsp = NULL;
1086 gsd.gam = gam;
1087 /* Build the rangetree */
1088 GAM_Result_geneseed_add(comparison->dna_hspset, &gsd);
1089 GAM_Result_geneseed_add(comparison->protein_hspset, &gsd);
1090 GAM_Result_geneseed_add(comparison->codon_hspset, &gsd);
1091 g_assert(gsd.hsp_list->len);
1092 g_assert(gsd.max_cobs_hsp);
1093 gsd.fwd_keep = g_new0(gboolean, gsd.hsp_list->len);
1094 gsd.rev_keep = g_new0(gboolean, gsd.hsp_list->len);
1095 /* Find HSPs reachable from geneseed HSPs */
1096 for(i = 0; i < gsd.hsp_list->len; i++){
1097 hsp = gsd.hsp_list->pdata[i];
1098 if(hsp->score >= hsp->hsp_set->param->has->geneseed_threshold){
1099 GAM_Result_geneseed_visit_hsp_fwd(&gsd, i);
1100 GAM_Result_geneseed_visit_hsp_rev(&gsd, i);
1101 }
1102 }
1103 /* Keep HSPs marked as keep */
1104 hsp_id = GAM_Result_geneseed_select(comparison->dna_hspset,
1105 gsd.fwd_keep, gsd.rev_keep, hsp_id);
1106 hsp_id = GAM_Result_geneseed_select(comparison->protein_hspset,
1107 gsd.fwd_keep, gsd.rev_keep, hsp_id);
1108 hsp_id = GAM_Result_geneseed_select(comparison->codon_hspset,
1109 gsd.fwd_keep, gsd.rev_keep, hsp_id);
1110 /**/
1111 if(comparison->dna_hspset
1112 && (!comparison->dna_hspset->hsp_list->len)){
1113 HSPset_destroy(comparison->dna_hspset);
1114 comparison->dna_hspset = NULL;
1115 }
1116 if(comparison->protein_hspset
1117 && (!comparison->protein_hspset->hsp_list->len)){
1118 HSPset_destroy(comparison->protein_hspset);
1119 comparison->protein_hspset = NULL;
1120 }
1121 if(comparison->codon_hspset
1122 && (!comparison->codon_hspset->hsp_list->len)){
1123 HSPset_destroy(comparison->codon_hspset);
1124 comparison->codon_hspset = NULL;
1125 }
1126 /**/
1127 /**/
1128 /*
1129 g_message("start with [%d] FILTER down to [%d]",
1130 gsd.hsp_list->len,
1131 (comparison->dna_hspset?comparison->dna_hspset->hsp_list->len:0)
1132 +(comparison->protein_hspset?comparison->protein_hspset->hsp_list->len:0)
1133 +(comparison->codon_hspset?comparison->codon_hspset->hsp_list->len:0));
1134 */
1135 RangeTree_destroy(gsd.rangetree, NULL, NULL);
1136 g_free(gsd.fwd_keep);
1137 g_free(gsd.rev_keep);
1138 g_ptr_array_free(gsd.hsp_list, TRUE);
1139 return;
1140 }
1141
GAM_Result_heuristic_create(GAM * gam,Comparison * comparison)1142 GAM_Result *GAM_Result_heuristic_create(GAM *gam,
1143 Comparison *comparison){
1144 register GAM_Result *gam_result;
1145 register HSPset_ArgumentSet *has
1146 = Comparison_Param_get_HSPSet_Argument_Set(comparison->param);
1147 if(has->geneseed_threshold){
1148 /* Raise score threshold to geneseed
1149 * to prevent low-scoring subopt alignments.
1150 */
1151 GAM_lock(gam);
1152 if(gam->gas->threshold < has->geneseed_threshold)
1153 gam->gas->threshold = has->geneseed_threshold;
1154 GAM_unlock(gam);
1155 GAM_Result_geneseed_filter(gam, comparison);
1156 }
1157 if(!Comparison_has_hsps(comparison))
1158 return NULL;
1159 /*
1160 g_message("heuristic create with [%d,%d,%d]",
1161 comparison->dna_hspset?comparison->dna_hspset->hsp_list->len:0,
1162 comparison->protein_hspset?comparison->protein_hspset->hsp_list->len:0,
1163 comparison->codon_hspset?comparison->codon_hspset->hsp_list->len:0);
1164 Comparison_print(comparison);
1165 */
1166 if(gam->gas->use_gapped_extension)
1167 gam_result = GAM_Result_SDP_create(gam, comparison);
1168 else
1169 gam_result = GAM_Result_BSDP_create(gam, comparison);
1170 return gam_result;
1171 }
1172
1173 /**/
1174
GAM_Result_exhaustive_create(GAM * gam,Sequence * query,Sequence * target)1175 GAM_Result *GAM_Result_exhaustive_create(GAM *gam,
1176 Sequence *query,
1177 Sequence *target){
1178 register Alignment *alignment;
1179 register C4_Score threshold;
1180 register GAM_Result *gam_result;
1181 register OPair *opair;
1182 g_assert(gam->optimal);
1183 GAM_lock(gam);
1184 Sequence_lock(query);
1185 Sequence_lock(target);
1186 gam_result = GAM_Result_create(gam, query, target);
1187 Sequence_unlock(query);
1188 Sequence_unlock(target);
1189 opair = OPair_create(gam->optimal, gam_result->subopt,
1190 query->len, target->len, gam_result->user_data);
1191 GAM_unlock(gam);
1192 if(gam->verbosity > 1)
1193 g_message("Exhaustive alignment of [%s] [%s]",
1194 query->id, target->id);
1195 /**/
1196 do {
1197 GAM_lock(gam);
1198 threshold = GAM_get_query_threshold(gam, query);
1199 GAM_unlock(gam);
1200 alignment = OPair_next_path(opair, threshold);
1201 if(!alignment)
1202 break;
1203 g_assert(alignment->score >= threshold);
1204 GAM_Result_add_alignment(gam_result, alignment, threshold);
1205 if(gam->verbosity > 2)
1206 g_message("Found alignment number [%d] score [%d]",
1207 gam_result->alignment_list->len, alignment->score);
1208 } while(gam->gas->use_subopt);
1209 OPair_destroy(opair);
1210 if(!gam_result->alignment_list){ /* No alignments */
1211 GAM_Result_destroy(gam_result);
1212 return NULL;
1213 }
1214 return gam_result;
1215 }
1216
GAM_Result_share(GAM_Result * gam_result)1217 GAM_Result *GAM_Result_share(GAM_Result *gam_result){
1218 gam_result->ref_count++;
1219 return gam_result;
1220 }
1221
GAM_Result_destroy(GAM_Result * gam_result)1222 void GAM_Result_destroy(GAM_Result *gam_result){
1223 register gint i;
1224 if(--gam_result->ref_count)
1225 return;
1226 if(gam_result->alignment_list){
1227 for(i = 0; i < gam_result->alignment_list->len; i++)
1228 Alignment_destroy(gam_result->alignment_list->pdata[i]);
1229 g_ptr_array_free(gam_result->alignment_list, TRUE);
1230 }
1231 Model_Type_destroy_data(gam_result->gam->gas->type,
1232 gam_result->user_data);
1233 Model_Type_destroy_data(gam_result->gam->gas->type,
1234 gam_result->self_data);
1235 Sequence_destroy(gam_result->query);
1236 Sequence_destroy(gam_result->target);
1237 GAM_lock(gam_result->gam);
1238 GAM_destroy(gam_result->gam);
1239 SubOpt_destroy(gam_result->subopt);
1240 GAM_unlock(gam_result->gam);
1241 g_free(gam_result);
1242 return;
1243 }
1244
GAM_display_alignment(GAM * gam,Alignment * alignment,Sequence * query,Sequence * target,gint result_id,gint rank,gpointer user_data,gpointer self_data,FILE * fp)1245 static void GAM_display_alignment(GAM *gam, Alignment *alignment,
1246 Sequence *query, Sequence *target,
1247 gint result_id, gint rank,
1248 gpointer user_data, gpointer self_data, FILE *fp){
1249 if(gam->gas->show_alignment)
1250 Alignment_display(alignment, query, target,
1251 gam->dna_submat, gam->protein_submat,
1252 gam->translate, fp);
1253 if(gam->gas->show_sugar)
1254 Alignment_display_sugar(alignment, query, target, fp);
1255 if(gam->gas->show_cigar)
1256 Alignment_display_cigar(alignment, query, target, fp);
1257 if(gam->gas->show_vulgar)
1258 Alignment_display_vulgar(alignment, query, target, fp);
1259 if(gam->gas->show_query_gff)
1260 Alignment_display_gff(alignment, query, target, gam->translate,
1261 TRUE, FALSE, result_id, user_data, fp);
1262 if(gam->gas->show_target_gff)
1263 Alignment_display_gff(alignment, query, target, gam->translate,
1264 FALSE, Model_Type_has_genomic_target(gam->gas->type),
1265 result_id, user_data, fp);
1266 if(gam->gas->ryo)
1267 Alignment_display_ryo(alignment, query, target,
1268 gam->gas->ryo, gam->translate, rank,
1269 user_data, self_data, fp);
1270 fflush(fp);
1271 return;
1272 }
1273
GAM_Result_display(GAM_Result * gam_result)1274 static void GAM_Result_display(GAM_Result *gam_result){
1275 register gint i;
1276 register Alignment *alignment;
1277 g_assert(gam_result);
1278 for(i = 0; i < gam_result->alignment_list->len; i++){
1279 alignment = gam_result->alignment_list->pdata[i];
1280 GAM_display_alignment(gam_result->gam, alignment,
1281 gam_result->query, gam_result->target,
1282 i+1, 0, gam_result->user_data, gam_result->self_data, stdout);
1283 }
1284 return;
1285 }
1286
GAM_Result_submit(GAM_Result * gam_result)1287 void GAM_Result_submit(GAM_Result *gam_result){
1288 register GAM_QueryResult *gqr;
1289 g_assert(gam_result);
1290 GAM_lock(gam_result->gam);
1291 if(gam_result->gam->gas->best_n){
1292 gqr = g_tree_lookup(gam_result->gam->bestn_tree,
1293 gam_result->query->id);
1294 if(!gqr){
1295 gqr = GAM_QueryResult_create(gam_result->gam,
1296 gam_result->query->id);
1297 g_tree_insert(gam_result->gam->bestn_tree,
1298 gqr->query_id, gqr);
1299 }
1300 g_assert(gqr);
1301 g_assert(!strcmp(gqr->query_id, gam_result->query->id));
1302 GAM_QueryResult_submit(gqr, gam_result);
1303 } else {
1304 GAM_Result_display(gam_result);
1305 }
1306 GAM_unlock(gam_result->gam);
1307 return;
1308 }
1309
GAM_lock(GAM * gam)1310 void GAM_lock(GAM *gam){
1311 #ifdef USE_PTHREADS
1312 pthread_mutex_lock(&gam->gam_lock);
1313 #endif /* USE_PTHREADS */
1314 return;
1315 }
1316
GAM_unlock(GAM * gam)1317 void GAM_unlock(GAM *gam){
1318 #ifdef USE_PTHREADS
1319 pthread_mutex_unlock(&gam->gam_lock);
1320 #endif /* USE_PTHREADS */
1321 return;
1322 }
1323
1324 /**/
1325
1326