1 /****************************************************************\
2 *                                                                *
3 *  GAM: Gapped Alignment Manager                                 *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include <stdio.h>  /* For fopen() */
17 #include <stdlib.h> /* For qsort() */
18 #include <string.h> /* For strcmp() */
19 #include <unistd.h> /* For unlink(), getpid(), getppid() etc */
20 
21 #include "gam.h"
22 #include "ungapped.h"
23 #include "opair.h"
24 #include "rangetree.h"
25 
GAM_Argument_parse_Model_Type(gchar * arg_string,gpointer data)26 static gchar *GAM_Argument_parse_Model_Type(gchar *arg_string,
27                                             gpointer data){
28     gchar *type_str;
29     register gchar *ret_val = Argument_parse_string(arg_string,
30                                                     &type_str);
31     register Model_Type *model_type = (Model_Type*)data;
32     if(ret_val)
33         return ret_val;
34     (*model_type) = Model_Type_from_string(type_str);
35     return NULL;
36     }
37 
38 /**/
39 
GAM_Refinement_to_string(GAM_Refinement refinement)40 static gchar *GAM_Refinement_to_string(GAM_Refinement refinement){
41     register gchar *name = NULL;
42     switch(refinement){
43         case GAM_Refinement_NONE:
44             name = "none";
45             break;
46         case GAM_Refinement_FULL:
47             name = "full";
48             break;
49         case GAM_Refinement_REGION:
50             name = "region";
51             break;
52         default:
53             g_error("Unknown GAM Refinement [%d]", refinement);
54             break;
55         }
56     return name;
57     }
58 
GAM_Refinement_from_string(gchar * str)59 static GAM_Refinement GAM_Refinement_from_string(gchar *str){
60     gchar *name[GAM_Refinement_TOTAL] =
61         {"none", "full", "region"};
62     GAM_Refinement refinement[GAM_Refinement_TOTAL] =
63        {GAM_Refinement_NONE,
64         GAM_Refinement_FULL,
65         GAM_Refinement_REGION};
66     register gint i;
67     for(i = 0; i < GAM_Refinement_TOTAL; i++)
68         if(!g_strcasecmp(name[i], str))
69             return refinement[i];
70     g_error("Unknown refinement type [%s]", str);
71     return GAM_Refinement_NONE; /* Not reached */
72     }
73 
GAM_Argument_parse_refinement(gchar * arg_string,gpointer data)74 static gchar *GAM_Argument_parse_refinement(gchar *arg_string,
75                                                   gpointer data){
76     register GAM_Refinement *refinement
77           = (GAM_Refinement*)data;
78     gchar *refine_str;
79     register gchar *ret_val = Argument_parse_string(arg_string,
80                                                     &refine_str);
81     if(ret_val)
82         return ret_val;
83     (*refinement) = GAM_Refinement_from_string(refine_str);
84     return NULL;
85     }
86 
87 /**/
88 
GAM_ArgumentSet_create(Argument * arg)89 GAM_ArgumentSet *GAM_ArgumentSet_create(Argument *arg){
90     register ArgumentSet *as;
91     static GAM_ArgumentSet gas;
92     if(arg){
93         as = ArgumentSet_create("Gapped Alignment Options");
94         ArgumentSet_add_option(as, 'm', "model", "alignment model",
95         "Specify alignment model type\n"
96         "Supported types:\n"
97         "    ungapped ungapped:trans\n"
98         "    affine:global affine:bestfit affine:local affine:overlap\n"
99         "    est2genome ner protein2dna protein2genome\n"
100         "    protein2dna:bestfit protein2genome:bestfit\n"
101         "    coding2coding coding2genome cdna2genome genome2genome\n",
102          "ungapped",
103          GAM_Argument_parse_Model_Type, &gas.type);
104         ArgumentSet_add_option(as, 's', "score", "threshold",
105          "Score threshold for gapped alignment", "100",
106          Argument_parse_int, &gas.threshold);
107         ArgumentSet_add_option(as, '\0', "percent", "threshold",
108          "Percent self-score threshold", "0.0",
109          Argument_parse_float, &gas.percent_threshold);
110         ArgumentSet_add_option(as, 0, "showalignment", NULL,
111          "Include (human readable) alignment in results", "TRUE",
112          Argument_parse_boolean, &gas.show_alignment);
113         ArgumentSet_add_option(as, 0, "showsugar", NULL,
114          "Include 'sugar' format output in results", "FALSE",
115          Argument_parse_boolean, &gas.show_sugar);
116         ArgumentSet_add_option(as, 0, "showcigar", NULL,
117          "Include 'cigar' format output in results", "FALSE",
118          Argument_parse_boolean, &gas.show_cigar);
119         ArgumentSet_add_option(as, 0, "showvulgar", NULL,
120          "Include 'vulgar' format output in results", "TRUE",
121          Argument_parse_boolean, &gas.show_vulgar);
122         ArgumentSet_add_option(as, 0, "showquerygff", NULL,
123          "Include GFF output on query in results", "FALSE",
124          Argument_parse_boolean, &gas.show_query_gff);
125         ArgumentSet_add_option(as, 0, "showtargetgff", NULL,
126          "Include GFF output on target in results", "FALSE",
127          Argument_parse_boolean, &gas.show_target_gff);
128         ArgumentSet_add_option(as, 0, "ryo", "format",
129          "Roll-your-own printf-esque output format", "NULL",
130          Argument_parse_string, &gas.ryo);
131         ArgumentSet_add_option(as, 'n', "bestn", "number",
132          "Report best N results per query", "0",
133          Argument_parse_int, &gas.best_n);
134         ArgumentSet_add_option(as, 'S', "subopt", NULL,
135          "Search for suboptimal alignments", "TRUE",
136          Argument_parse_boolean, &gas.use_subopt);
137         ArgumentSet_add_option(as, 'g', "gappedextension", NULL,
138          "Use gapped extension (default is SDP)", "TRUE",
139          Argument_parse_boolean, &gas.use_gapped_extension);
140         /**/
141         ArgumentSet_add_option(as, '\0', "refine", NULL,
142         "Alignment refinement strategy [none|full|region]", "none",
143         GAM_Argument_parse_refinement, &gas.refinement);
144         ArgumentSet_add_option(as, '\0', "refineboundary", NULL,
145         "Refinement region boundary", "32",
146         Argument_parse_int, &gas.refinement_boundary);
147         /**/
148         Argument_absorb_ArgumentSet(arg, as);
149         }
150     return &gas;
151     }
152 
153 /**/
154 
GAM_StoredResult_compare(gpointer low,gpointer high,gpointer user_data)155 static gboolean GAM_StoredResult_compare(gpointer low,
156                                          gpointer high,
157                                          gpointer user_data){
158     register GAM_StoredResult *gsr_low = (GAM_StoredResult*)low,
159                               *gsr_high = (GAM_StoredResult*)high;
160     return gsr_low->score < gsr_high->score;
161     }
162 
163 static void GAM_display_alignment(GAM *gam,
164             Alignment *alignment, Sequence *query, Sequence *target,
165             gint result_id, gint rank,
166             gpointer user_data, gpointer self_data, FILE *fp);
167 
GAM_StoredResult_create(GAM * gam,Sequence * query,Sequence * target,Alignment * alignment,gpointer user_data,gpointer self_data)168 static GAM_StoredResult *GAM_StoredResult_create(GAM *gam,
169                          Sequence *query, Sequence *target,
170                          Alignment *alignment,
171                          gpointer user_data, gpointer self_data){
172     register GAM_StoredResult *gsr = g_new(GAM_StoredResult, 1);
173     gsr->score = alignment->score;
174     gsr->pos = ftell(gam->bestn_tmp_file);
175     GAM_display_alignment(gam, alignment, query, target,
176                           0, -1, user_data, self_data, gam->bestn_tmp_file);
177     gsr->len = ftell(gam->bestn_tmp_file)
178              - gsr->pos;
179     return gsr;
180     }
181 
GAM_StoredResult_destroy(GAM_StoredResult * gsr)182 static void GAM_StoredResult_destroy(GAM_StoredResult *gsr){
183     g_free(gsr);
184     return;
185     }
186 /* FIXME: optimisation:
187  *        could store unused positions in the tmp file for reuse
188  */
189 
GAM_StoredResult_display(GAM_StoredResult * gsr,GAM * gam,gint rank)190 static void GAM_StoredResult_display(GAM_StoredResult *gsr,
191                                      GAM *gam, gint rank){
192     register gint i, ch, tag_pos = 0;
193     register gchar *rank_tag = "%_EXONERATE_BESTN_RANK_%";
194     if(fseek(gam->bestn_tmp_file, gsr->pos, SEEK_SET))
195         g_error("Could not seek in tmp file");
196     for(i = 0; i < gsr->len; i++){
197         ch = getc(gam->bestn_tmp_file);
198         g_assert(ch != EOF);
199         if(ch == rank_tag[tag_pos]){
200             tag_pos++;
201             if(!rank_tag[tag_pos]){
202                 printf("%d", rank);
203                 tag_pos = 0;
204                 }
205         } else {
206             if(tag_pos){
207                 printf("%.*s", tag_pos, rank_tag);
208                 tag_pos = 0;
209                 }
210             putchar(ch);
211             }
212         }
213     fflush(stdout);
214     return;
215     }
216 
217 /**/
218 
GAM_compare_id(gconstpointer a,gconstpointer b)219 static gint GAM_compare_id(gconstpointer a, gconstpointer b){
220     register const gchar *id_a = a, *id_b = b;
221     return strcmp(id_a, id_b);
222     }
223 
GAM_QueryResult_create(GAM * gam,gchar * query_id)224 static GAM_QueryResult *GAM_QueryResult_create(GAM *gam,
225                                                gchar *query_id){
226     register GAM_QueryResult *gqr = g_new(GAM_QueryResult, 1);
227     gqr->pq = PQueue_create(gam->pqueue_set,
228                             GAM_StoredResult_compare, NULL);
229     gqr->query_id = g_strdup(query_id);
230     gqr->tie_count = 0;
231     gqr->tie_score = C4_IMPOSSIBLY_LOW_SCORE;
232     return gqr;
233     }
234 
GAM_QueryResult_pqueue_destroy_GAM_Result(gpointer data,gpointer user_data)235 static void GAM_QueryResult_pqueue_destroy_GAM_Result(gpointer data,
236                                                  gpointer user_data){
237     register GAM_StoredResult *gsr = data;
238     GAM_StoredResult_destroy(gsr);
239     return;
240     }
241 
GAM_QueryResult_destroy(GAM_QueryResult * gqr)242 static void GAM_QueryResult_destroy(GAM_QueryResult *gqr){
243     PQueue_destroy(gqr->pq,
244                    GAM_QueryResult_pqueue_destroy_GAM_Result, NULL);
245     g_free(gqr->query_id);
246     g_free(gqr);
247     return;
248     }
249 
GAM_QueryResult_push(GAM_QueryResult * gqr,GAM_Result * gam_result,Alignment * alignment)250 static void GAM_QueryResult_push(GAM_QueryResult *gqr,
251                                  GAM_Result *gam_result,
252                                  Alignment *alignment){
253     register GAM_StoredResult *gsr = GAM_StoredResult_create(gam_result->gam,
254                                           gam_result->query,
255                                           gam_result->target,
256                                           alignment,
257                                           gam_result->user_data,
258                                           gam_result->self_data);
259     PQueue_push(gqr->pq, gsr);
260     return;
261     }
262 
GAM_QueryResult_submit(GAM_QueryResult * gqr,GAM_Result * gam_result)263 static void GAM_QueryResult_submit(GAM_QueryResult *gqr,
264                                    GAM_Result *gam_result){
265     register GAM_StoredResult *gsr;
266     register Alignment *alignment;
267     register gint i;
268     register GPtrArray *tie_list = g_ptr_array_new();
269     g_assert(!strcmp(gqr->query_id, gam_result->query->id));
270     for(i = 0; i < gam_result->alignment_list->len; i++){
271         alignment = gam_result->alignment_list->pdata[i];
272         if(alignment->score == gqr->tie_score){
273             GAM_QueryResult_push(gqr, gam_result, alignment);
274             gqr->tie_count++;
275         } else if(alignment->score < gqr->tie_score){
276             if(PQueue_total(gqr->pq) < gam_result->gam->gas->best_n){
277                 GAM_QueryResult_push(gqr, gam_result, alignment);
278                 gqr->tie_count = 1;
279                 gqr->tie_score = alignment->score;
280             } else {
281                 break; /* Other alignments are worse */
282                 }
283         } else { /* (alignment->score > gqr->tie_score) */
284             GAM_QueryResult_push(gqr, gam_result, alignment);
285             if((PQueue_total(gqr->pq)-gqr->tie_count)
286                >= gam_result->gam->gas->best_n){
287                 /* Remove old ties */
288                 while(gqr->tie_count){
289                     gsr = PQueue_pop(gqr->pq);
290                     GAM_StoredResult_destroy(gsr);
291                     gqr->tie_count--;
292                     }
293                 /* Count new ties */
294                 gsr = PQueue_top(gqr->pq);
295                 gqr->tie_score = gsr->score;
296                 do {
297                     gsr = PQueue_top(gqr->pq);
298                     if(gsr && (gsr->score == gqr->tie_score)){
299                         gsr = PQueue_pop(gqr->pq);
300                         g_ptr_array_add(tie_list, gsr);
301                     } else {
302                         break;
303                         }
304                 } while(TRUE);
305                 gqr->tie_count = tie_list->len;
306                 /* Replace new ties */
307                 while(tie_list->len){
308                     gsr = tie_list->pdata[tie_list->len-1];
309                     PQueue_push(gqr->pq, gsr);
310                     g_ptr_array_set_size(tie_list, tie_list->len-1);
311                     }
312             } else {
313                 if(PQueue_total(gqr->pq) == 1){ /* First alignment */
314                     gqr->tie_count = 1;
315                     gqr->tie_score = alignment->score;
316                     }
317                 }
318             }
319         }
320     g_ptr_array_free(tie_list, TRUE);
321     return;
322     }
323 
324 /**/
325 
GAM_QueryResult_report_traverse_func(gpointer data,gpointer user_data)326 static gboolean GAM_QueryResult_report_traverse_func(gpointer data,
327                                                      gpointer user_data){
328     register GAM_StoredResult *gsr = data;
329     register GPtrArray *result_list = user_data;
330     g_ptr_array_add(result_list, gsr);
331     return FALSE;
332     }
333 
GAM_QueryResult_report_sort_func(const void * a,const void * b)334 static int GAM_QueryResult_report_sort_func(const void *a,
335                                             const void *b){
336     register GAM_StoredResult **gsr_a = (GAM_StoredResult**)a,
337                               **gsr_b = (GAM_StoredResult**)b;
338     return (*gsr_a)->score - (*gsr_b)->score;
339     }
340 
GAM_QueryResult_report(GAM_QueryResult * gqr,GAM * gam)341 static void GAM_QueryResult_report(GAM_QueryResult *gqr, GAM *gam){
342     register GPtrArray *result_list = g_ptr_array_new();
343     register gint i;
344     register GAM_StoredResult *gsr;
345     if(gam->verbosity > 2)
346         g_message("Reporting [%d/%d] results for query [%s]",
347               PQueue_total(gqr->pq), gam->gas->best_n,
348               gqr->query_id);
349     PQueue_traverse(gqr->pq, GAM_QueryResult_report_traverse_func,
350                     result_list);
351     qsort(result_list->pdata, result_list->len,
352           sizeof(gpointer), GAM_QueryResult_report_sort_func);
353     for(i = result_list->len-1; i >= 0; i--){
354         gsr = result_list->pdata[i];
355         GAM_StoredResult_display(gsr, gam, result_list->len-i);
356         }
357     g_ptr_array_free(result_list, TRUE);
358     return;
359     }
360 
361 /**/
362 
363 #define Combined_Alphabet_Type(qt,tt) ((qt) << 8 | (tt))
364 
GAM_get_tmp_file_handler(void)365 static FILE *GAM_get_tmp_file_handler(void){
366     register gchar *path;
367     register FILE *fp;
368     gchar hostname[1024];
369     gethostname(hostname, 1024);
370     path = g_strdup_printf("%s%cxnr8_%s_%d_%d.txt",
371                           g_get_tmp_dir(),
372                           G_DIR_SEPARATOR,
373                           hostname,
374                           (gint)getppid(),
375                           (gint)getpid());
376     fp = fopen(path, "w+");
377     if(!fp)
378         g_error("Could not write to tmp bestn file [%s]", path);
379     unlink(path); /* tmp file deleted now, as will not be re-opened */
380     g_free(path);
381     return fp;
382     }
383 
GAM_build_match_list(C4_Model * model)384 static GPtrArray *GAM_build_match_list(C4_Model *model){
385     register GPtrArray *match_list = g_ptr_array_new();
386     Match *match_array[Match_Type_TOTAL] = {0};
387     register GPtrArray *match_transition_list
388         = C4_Model_select_transitions(model, C4_Label_MATCH);
389     register gint i;
390     register C4_Transition *transition;
391     register Match *match;
392     for(i = 0; i < match_transition_list->len; i++){
393         transition = match_transition_list->pdata[i];
394         g_assert(transition->label == C4_Label_MATCH);
395         g_assert(transition->label_data);
396         match = transition->label_data;
397         if(!match_array[match->type]){
398             match_array[match->type] = match;
399             g_ptr_array_add(match_list, match);
400             }
401         }
402     g_assert(match_list->len);
403     g_ptr_array_free(match_transition_list, TRUE);
404     return match_list;
405     }
406 
GAM_create(Alphabet_Type query_type,Alphabet_Type target_type,Submat * dna_submat,Submat * protein_submat,Translate * translate,gboolean use_exhaustive,gint verbosity)407 GAM *GAM_create(Alphabet_Type query_type, Alphabet_Type target_type,
408                 Submat *dna_submat, Submat *protein_submat,
409                 Translate *translate, gboolean use_exhaustive,
410                 gint verbosity){
411     register GAM *gam = g_new0(GAM, 1);
412     register gint i;
413     register C4_Span *span;
414     gam->thread_ref = ThreadRef_create();
415     gam->dna_submat = Submat_share(dna_submat);
416     gam->protein_submat = Submat_share(protein_submat);
417     gam->translate = Translate_share(translate);
418     gam->gas = GAM_ArgumentSet_create(NULL);
419     if(use_exhaustive && gam->gas->use_subopt)
420         g_warning("Exhaustively generating suboptimal alignments"
421                   " will be VERY SLOW: use -S no");
422     gam->query_type = query_type;
423     gam->target_type = target_type;
424     if(gam->gas->best_n){
425         gam->bestn_tree = g_tree_new(GAM_compare_id);
426         gam->bestn_tmp_file = GAM_get_tmp_file_handler();
427         gam->pqueue_set = PQueueSet_create();
428         }
429     if(gam->gas->percent_threshold)
430         gam->percent_threshold_tree = g_tree_new(GAM_compare_id);
431     gam->translate_both = Model_Type_translate_both(gam->gas->type);
432     gam->dual_match = Model_Type_has_dual_match(gam->gas->type);
433     gam->model = Model_Type_get_model(gam->gas->type,
434                                       query_type, target_type);
435     if((!use_exhaustive) && (!C4_Model_is_local(gam->model)))
436         g_error("Cannot perform heuristic alignments using non-local models: use -E");
437     gam->match_list = GAM_build_match_list(gam->model);
438     if(use_exhaustive){
439         gam->optimal = Optimal_create(gam->model, NULL,
440                        Optimal_Type_SCORE
441                       |Optimal_Type_PATH
442                       |Optimal_Type_REDUCED_SPACE, TRUE);
443         if(gam->gas->refinement != GAM_Refinement_NONE)
444             g_error("Exhaustive alignments cannot be refined");
445     } else {
446         if(gam->gas->refinement != GAM_Refinement_NONE){
447             gam->optimal = Optimal_create(gam->model, NULL,
448                            Optimal_Type_SCORE
449                           |Optimal_Type_PATH
450                           |Optimal_Type_REDUCED_SPACE, TRUE);
451             }
452         if(Model_Type_is_gapped(gam->gas->type)){
453             if(gam->gas->use_gapped_extension){
454                 gam->sdp = SDP_create(gam->model);
455             } else {
456                 gam->heuristic = Heuristic_create(gam->model);
457                 }
458             }
459         }
460     gam->verbosity = verbosity;
461     /* Find max_{query,target}_span */
462     gam->max_query_span = gam->max_target_span = 0;
463     for(i = 0; i < gam->model->span_list->len; i++){
464         span = gam->model->span_list->pdata[i];
465         if(gam->max_query_span < span->max_query)
466             gam->max_query_span = span->max_query;
467         if(gam->max_target_span < span->max_target)
468             gam->max_target_span = span->max_target;
469         }
470 #ifdef USE_PTHREADS
471     pthread_mutex_init(&gam->gam_lock, NULL);
472 #endif /* USE_PTHREADS */
473     return gam;
474     }
475 
GAM_share(GAM * gam)476 GAM *GAM_share(GAM *gam){
477     g_assert(gam);
478     ThreadRef_share(gam->thread_ref);
479     return gam;
480     }
481 
482 /**/
483 
GAM_QueryInfo_create(Sequence * query,GAM * gam)484 static GAM_QueryInfo *GAM_QueryInfo_create(Sequence *query, GAM *gam){
485     register GAM_QueryInfo *gqi = g_new(GAM_QueryInfo, 1);
486     register gint i, j;
487     register Match *match;
488     register Match_Score th;
489     gqi->query_id = g_strdup(query->id);
490     gqi->threshold = 0;
491     /* Calculate best threshold for each query match */
492     for(i = 0; i < gam->match_list->len; i++){
493         match = gam->match_list->pdata[i];
494         th = 0;
495         for(j = 0; j < query->len; j += match->query->advance)
496             th += match->query->self_func(match->query, query, j);
497         if(gqi->threshold < th)
498             gqi->threshold = th;
499         }
500     gqi->threshold *= gam->gas->percent_threshold;
501     gqi->threshold /= 100;
502     if(gqi->threshold < gam->gas->threshold)
503         gqi->threshold = gam->gas->threshold;
504     return gqi;
505     }
506 
GAM_QueryInfo_destroy(GAM_QueryInfo * gqi)507 static void GAM_QueryInfo_destroy(GAM_QueryInfo *gqi){
508     g_free(gqi->query_id);
509     g_free(gqi);
510     return;
511     }
512 
513 /**/
514 
GAM_tree_collect_values(gpointer key,gpointer value,gpointer data)515 static gint GAM_tree_collect_values(gpointer key,
516                                     gpointer value,
517                                     gpointer data){
518     register GPtrArray *list = data;
519     g_ptr_array_add(list, value);
520     return FALSE;
521     }
522 
GAM_bestn_tree_destroy(GTree * bestn_tree)523 static void GAM_bestn_tree_destroy(GTree *bestn_tree){
524     register GPtrArray *query_data_list = g_ptr_array_new();
525     register gint i;
526     g_tree_traverse(bestn_tree, GAM_tree_collect_values,
527                     G_IN_ORDER, query_data_list);
528     g_tree_destroy(bestn_tree);
529     for(i = 0; i < query_data_list->len; i++)
530         GAM_QueryResult_destroy(query_data_list->pdata[i]);
531     g_ptr_array_free(query_data_list, TRUE);
532     return;
533     }
534 
GAM_percent_threshold_tree_destroy(GTree * percent_threshold_tree)535 static void GAM_percent_threshold_tree_destroy(
536          GTree *percent_threshold_tree){
537     register GPtrArray *query_info_list = g_ptr_array_new();
538     register gint i;
539     g_tree_traverse(percent_threshold_tree, GAM_tree_collect_values,
540                     G_IN_ORDER, query_info_list);
541     g_tree_destroy(percent_threshold_tree);
542     for(i = 0; i < query_info_list->len; i++)
543         GAM_QueryInfo_destroy(query_info_list->pdata[i]);
544     g_ptr_array_free(query_info_list, TRUE);
545     return;
546     }
547 
GAM_destroy(GAM * gam)548 void GAM_destroy(GAM *gam){
549     g_assert(gam);
550     if(ThreadRef_destroy(gam->thread_ref))
551         return;
552 #ifdef USE_PTHREADS
553     pthread_mutex_destroy(&gam->gam_lock);
554 #endif /* USE_PTHREADS */
555     g_assert(gam->model);
556     g_ptr_array_free(gam->match_list, TRUE);
557     C4_Model_destroy(gam->model);
558     if(gam->optimal)
559         Optimal_destroy(gam->optimal);
560     if(gam->heuristic)
561         Heuristic_destroy(gam->heuristic);
562     if(gam->sdp)
563         SDP_destroy(gam->sdp);
564     if(gam->translate)
565         Translate_destroy(gam->translate);
566     if(gam->bestn_tree)
567         GAM_bestn_tree_destroy(gam->bestn_tree);
568     if(gam->percent_threshold_tree)
569         GAM_percent_threshold_tree_destroy(gam->percent_threshold_tree);
570     if(gam->bestn_tmp_file)
571         fclose(gam->bestn_tmp_file);
572     if(gam->pqueue_set)
573         PQueueSet_destroy(gam->pqueue_set);
574     g_free(gam);
575     return;
576     }
577 
GAM_bestn_tree_report_traverse(gpointer key,gpointer value,gpointer data)578 static gint GAM_bestn_tree_report_traverse(gpointer key,
579                                            gpointer value,
580                                            gpointer data){
581     register GAM_QueryResult *gqr = value;
582     register GAM *gam = data;
583     GAM_QueryResult_report(gqr, gam);
584     return FALSE;
585     }
586 
GAM_report(GAM * gam)587 void GAM_report(GAM *gam){
588     if(gam->bestn_tree)
589         g_tree_traverse(gam->bestn_tree, GAM_bestn_tree_report_traverse,
590                         G_IN_ORDER, gam);
591     return;
592     }
593 
594 /**/
595 
GAM_Pair_find_portal(C4_Model * model,HSPset * hspset)596 static C4_Portal *GAM_Pair_find_portal(C4_Model *model,
597                                        HSPset *hspset){
598     register gint i;
599     register C4_Portal *portal = NULL;
600     register C4_Transition *transition;
601     for(i = 0; i < model->portal_list->len; i++){
602         g_assert(model->portal_list);
603         portal = model->portal_list->pdata[i];
604         g_assert(portal->transition_list->len);
605         transition = portal->transition_list->pdata[0];
606         g_assert(transition);
607         if((transition->advance_query
608             == hspset->param->match->query->advance)
609         && (transition->advance_target
610             == hspset->param->match->target->advance)){
611             return portal;
612             }
613         }
614     if(!portal)
615         g_error("No compatible portal found for hspset");
616     return portal;
617     }
618 
GAM_Result_create(GAM * gam,Sequence * query,Sequence * target)619 static GAM_Result *GAM_Result_create(GAM *gam,
620                                      Sequence *query,
621                                      Sequence *target){
622     register GAM_Result *gam_result = g_new(GAM_Result, 1);
623     g_assert(gam);
624     g_assert(query);
625     g_assert(target);
626     g_assert(query->alphabet->type == gam->query_type);
627     g_assert(target->alphabet->type == gam->target_type);
628     gam_result->ref_count = 1;
629     gam_result->gam = GAM_share(gam);
630     gam_result->alignment_list = NULL;
631     gam_result->query = Sequence_share(query);
632     gam_result->target = Sequence_share(target);
633     gam_result->user_data = Model_Type_create_data(gam->gas->type,
634                                                    query, target);
635     gam_result->self_data = Model_Type_create_data(gam->gas->type,
636                                                    query, query);
637     gam_result->subopt = SubOpt_create(query->len, target->len);
638     return gam_result;
639     }
640 
GAM_Result_refine_alignment(GAM_Result * gam_result,Alignment * alignment)641 static Alignment *GAM_Result_refine_alignment(GAM_Result *gam_result,
642                                               Alignment *alignment){
643     register Alignment *refined_alignment = NULL;
644     register Region *region;
645     register gint query_region_start, target_region_start;
646     g_assert(gam_result->gam->optimal);
647     if(gam_result->gam->verbosity > 1)
648         g_message("Refining alignment ... (%d)", alignment->score);
649     switch(gam_result->gam->gas->refinement){
650         case GAM_Refinement_FULL:
651             region = Region_create(0, 0, gam_result->query->len,
652                                          gam_result->target->len);
653             refined_alignment = Optimal_find_path(
654                     gam_result->gam->optimal, region,
655                     gam_result->user_data, alignment->score, gam_result->subopt);
656             g_assert(refined_alignment);
657             Region_destroy(region);
658             break;
659         case GAM_Refinement_REGION:
660             query_region_start
661                 = MAX(0, alignment->region->query_start
662                        - gam_result->gam->gas->refinement_boundary);
663             target_region_start
664                 = MAX(0, alignment->region->target_start
665                        - gam_result->gam->gas->refinement_boundary);
666             region = Region_create(query_region_start,
667                                    target_region_start,
668                 MIN(gam_result->query->len,
669                     Region_query_end(alignment->region)
670                   + gam_result->gam->gas->refinement_boundary)
671                 - query_region_start,
672                 MIN(gam_result->target->len,
673                     Region_target_end(alignment->region)
674                   + gam_result->gam->gas->refinement_boundary)
675                 - target_region_start);
676             refined_alignment = Optimal_find_path(
677                     gam_result->gam->optimal, region,
678                     gam_result->user_data, alignment->score, gam_result->subopt);
679             g_assert(refined_alignment);
680             Region_destroy(region);
681             break;
682         default:
683             g_error("Bad type for refinement [%s]",
684                 GAM_Refinement_to_string(gam_result->gam->gas->refinement));
685             break;
686         }
687     g_assert(refined_alignment);
688     g_assert(refined_alignment->score >= alignment->score);
689     if(gam_result->gam->verbosity > 1)
690         g_message("Refined alignment score [%d]", refined_alignment->score);
691     return refined_alignment;
692     }
693 
GAM_Result_add_alignment(GAM_Result * gam_result,Alignment * alignment,C4_Score threshold)694 static void GAM_Result_add_alignment(GAM_Result *gam_result,
695                                      Alignment *alignment,
696                                      C4_Score threshold){
697     register Alignment *refined_alignment;
698     if(!gam_result->alignment_list)
699         gam_result->alignment_list = g_ptr_array_new();
700     if(gam_result->gam->gas->refinement != GAM_Refinement_NONE){
701         refined_alignment = GAM_Result_refine_alignment(gam_result, alignment);
702         Alignment_destroy(alignment);
703         alignment = refined_alignment;
704         /* Refinement may put alignment below threshold */
705         if(alignment->score < threshold){
706             Alignment_destroy(alignment);
707             return;
708             }
709         }
710     g_ptr_array_add(gam_result->alignment_list, alignment);
711     SubOpt_add_alignment(gam_result->subopt, alignment);
712     return;
713     }
714 
GAM_get_query_threshold(GAM * gam,Sequence * query)715 static C4_Score GAM_get_query_threshold(GAM *gam, Sequence *query){
716     register GAM_QueryResult *gqr;
717     register GAM_StoredResult *gsr;
718     register GAM_QueryInfo *gqi;
719     if(gam->gas->best_n){
720         gqr = g_tree_lookup(gam->bestn_tree, query->id);
721         if(gqr && (PQueue_total(gqr->pq) >= gam->gas->best_n)){
722             gsr = PQueue_top(gqr->pq);
723             if(gam->verbosity > 2)
724                 g_message("Using threshold [%d] for query [%s]",
725                       gsr->score, query->id);
726             return gsr->score;
727             }
728         }
729     if(gam->gas->percent_threshold){
730         gqi = g_tree_lookup(gam->percent_threshold_tree, query->id);
731         if(!gqi){
732             gqi = GAM_QueryInfo_create(query, gam);
733             g_tree_insert(gam->percent_threshold_tree, gqi->query_id,
734                                                        gqi);
735             }
736         return gqi->threshold;
737         }
738     return gam->gas->threshold;
739     }
740 
GAM_Result_ungapped_create_sort_func(const void * a,const void * b)741 static int GAM_Result_ungapped_create_sort_func(const void *a,
742                                                 const void *b){
743     register Alignment **alignment_a = (Alignment**)a,
744                        **alignment_b = (Alignment**)b;
745     return (*alignment_b)->score - (*alignment_a)->score;
746     }
747 
GAM_Result_ungapped_add_HSPset(GAM_Result * gam_result,C4_Model * model,HSPset * hspset)748 static void GAM_Result_ungapped_add_HSPset(GAM_Result *gam_result,
749                                            C4_Model *model,
750                                            HSPset *hspset){
751     register gint i;
752     register C4_Score threshold;
753     register Alignment *alignment;
754     register HSP *hsp;
755     HSPset_filter_ungapped(hspset);
756     for(i = 0; i < hspset->hsp_list->len; i++){
757         hsp = hspset->hsp_list->pdata[i];
758         threshold = GAM_get_query_threshold(gam_result->gam, hspset->query);
759         if(hsp->score >= threshold){
760             alignment = Ungapped_Alignment_create(model,
761                                                   gam_result->user_data,
762                                                   hsp);
763             g_assert(alignment);
764             GAM_Result_add_alignment(gam_result, alignment, threshold);
765             }
766         }
767     return;
768     }
769 
GAM_Result_ungapped_create(GAM * gam,Comparison * comparison)770 GAM_Result *GAM_Result_ungapped_create(GAM *gam,
771                                        Comparison *comparison){
772     register GAM_Result *gam_result;
773     g_assert(comparison);
774     if(!Comparison_has_hsps(comparison))
775         return NULL;
776     gam_result = GAM_Result_create(gam, comparison->query,
777                                         comparison->target);
778     /**/
779     if(comparison->dna_hspset)
780         GAM_Result_ungapped_add_HSPset(gam_result, gam->model,
781                                        comparison->dna_hspset);
782     if(comparison->protein_hspset)
783         GAM_Result_ungapped_add_HSPset(gam_result, gam->model,
784                                        comparison->protein_hspset);
785     if(comparison->codon_hspset)
786         GAM_Result_ungapped_add_HSPset(gam_result, gam->model,
787                                        comparison->codon_hspset);
788     /**/
789     if(!gam_result->alignment_list){
790         GAM_Result_destroy(gam_result);
791         return NULL;
792         }
793     /* Sort results, best scores first */
794     qsort(gam_result->alignment_list->pdata,
795           gam_result->alignment_list->len,
796           sizeof(gpointer), GAM_Result_ungapped_create_sort_func);
797     return gam_result;
798     }
799 
800 /**/
801 
GAM_Result_BSDP_add_HSPset(HSPset * hspset,GAM * gam,HPair * hpair)802 static void GAM_Result_BSDP_add_HSPset(HSPset *hspset, GAM *gam,
803                                        HPair *hpair){
804     register C4_Portal *portal;
805     portal = GAM_Pair_find_portal(gam->model, hspset);
806     HPair_add_hspset(hpair, portal, hspset);
807     if(gam->verbosity > 2)
808         g_message("Added [%d] hsps to HPair",
809                   hspset->hsp_list->len);
810     return;
811     }
812 
GAM_Result_is_full(GAM_Result * gam_result)813 static gboolean GAM_Result_is_full(GAM_Result *gam_result){
814     register Alignment *penult, *last;
815     if((gam_result->gam->gas->best_n)
816     && (gam_result->alignment_list->len >= gam_result->gam->gas->best_n)
817     && (gam_result->alignment_list->len > 1)){
818         penult = gam_result->alignment_list->pdata
819                 [gam_result->alignment_list->len-2];
820         last   = gam_result->alignment_list->pdata
821                 [gam_result->alignment_list->len-1];
822         if(penult->score != last->score)
823             return TRUE;
824         }
825     return FALSE;
826     }
827 /* GAM_Result is only full when best_n threshold has been met
828  * and all tie-breakers have been admitted into the GAM_Result
829  */
830 
GAM_Result_BSDP_create(GAM * gam,Comparison * comparison)831 static GAM_Result *GAM_Result_BSDP_create(GAM *gam,
832                                           Comparison *comparison){
833     register HPair *hpair;
834     register Alignment *alignment;
835     register C4_Score threshold
836              = GAM_get_query_threshold(gam, comparison->query);
837     register GAM_Result *gam_result = GAM_Result_create(gam,
838                                         comparison->query,
839                                         comparison->target);
840     g_assert(gam->heuristic);
841     if(gam->verbosity > 2)
842         g_message("Preparing HPair for [%s][%s]",
843                   comparison->query->id, comparison->target->id);
844     hpair = HPair_create(gam->heuristic, gam_result->subopt,
845                          comparison->query->len,
846                          comparison->target->len,
847                          gam->verbosity, gam_result->user_data);
848     /**/
849     g_assert(comparison);
850     g_assert(Comparison_has_hsps(comparison));
851     if(comparison->dna_hspset)
852         GAM_Result_BSDP_add_HSPset(comparison->dna_hspset, gam, hpair);
853     if(comparison->protein_hspset)
854         GAM_Result_BSDP_add_HSPset(comparison->protein_hspset, gam,
855                                    hpair);
856     if(comparison->codon_hspset)
857         GAM_Result_BSDP_add_HSPset(comparison->codon_hspset, gam,
858                                    hpair);
859     HPair_finalise(hpair, threshold);
860     if(gam->verbosity > 2)
861         g_message("Finalised HPair");
862     do {
863         threshold = GAM_get_query_threshold(gam, comparison->query);
864         alignment = HPair_next_path(hpair, threshold);
865         if(!alignment)
866             break;
867         g_assert(alignment->score >= threshold);
868         if(gam->verbosity > 2)
869             g_message("Found BSDP alignment score [%d]",
870                       alignment->score);
871         GAM_Result_add_alignment(gam_result, alignment, threshold);
872         if(GAM_Result_is_full(gam_result))
873             break;
874     } while(gam->gas->use_subopt);
875     HPair_destroy(hpair);
876     if(!gam_result->alignment_list){ /* No alignments */
877         GAM_Result_destroy(gam_result);
878         return NULL;
879         }
880     return gam_result;
881     }
882 /* FIXME: optimisation: change to update threshold between
883  *        suboptimal alignments.
884  */
885 
GAM_Result_SDP_create(GAM * gam,Comparison * comparison)886 static GAM_Result *GAM_Result_SDP_create(GAM *gam,
887                                          Comparison *comparison){
888     register SDP_Pair *sdp_pair;
889     register Alignment *alignment;
890     register C4_Score threshold;
891     register GAM_Result *gam_result = GAM_Result_create(gam,
892                                         comparison->query,
893                                         comparison->target);
894     if(gam->verbosity > 2)
895         g_message("Preparing SDP_Pair for [%s][%s]",
896                 comparison->query->id, comparison->target->id);
897     g_assert(gam);
898     g_assert(gam->sdp);
899     g_assert(comparison);
900     g_assert(Comparison_has_hsps(comparison));
901     sdp_pair = SDP_Pair_create(gam->sdp, gam_result->subopt,
902                                comparison, gam_result->user_data);
903     do {
904         threshold = GAM_get_query_threshold(gam, comparison->query);
905         alignment = SDP_Pair_next_path(sdp_pair, threshold);
906         if(!alignment)
907             break;
908         g_assert(alignment->score >= threshold);
909         if(gam->verbosity > 2)
910             g_message("Found SDP alignment score [%d]",
911                       alignment->score);
912         GAM_Result_add_alignment(gam_result, alignment, threshold);
913         if(GAM_Result_is_full(gam_result))
914             break;
915     } while(gam->gas->use_subopt);
916     SDP_Pair_destroy(sdp_pair);
917     if(!gam_result->alignment_list){ /* No alignments */
918         GAM_Result_destroy(gam_result);
919         return NULL;
920         }
921     return gam_result;
922     }
923 
924 /**/
925 
926 typedef struct {
927      gboolean *fwd_keep;
928      gboolean *rev_keep;
929     GPtrArray *hsp_list;
930     RangeTree *rangetree;
931           HSP *max_cobs_hsp;
932           GAM *gam;
933 } GAM_Geneseed_Data;
934 
GAM_Result_geneseed_add(HSPset * hspset,GAM_Geneseed_Data * gsd)935 static void GAM_Result_geneseed_add(HSPset *hspset, GAM_Geneseed_Data *gsd){
936     register gint i;
937     register HSP *hsp;
938     if(!hspset)
939         return;
940     for(i = 0; i < hspset->hsp_list->len; i++){
941         hsp = hspset->hsp_list->pdata[i];
942         if(!RangeTree_check_pos(gsd->rangetree,
943                                 HSP_query_cobs(hsp), HSP_target_cobs(hsp)))
944             RangeTree_add(gsd->rangetree,
945                           HSP_query_cobs(hsp), HSP_target_cobs(hsp),
946                           GINT_TO_POINTER(gsd->hsp_list->len));
947         g_ptr_array_add(gsd->hsp_list, hsp);
948         if((!gsd->max_cobs_hsp) || (gsd->max_cobs_hsp->cobs < hsp->cobs))
949                 gsd->max_cobs_hsp = hsp;
950         }
951     return;
952     }
953 
954 static void GAM_Result_geneseed_visit_hsp_fwd(GAM_Geneseed_Data *gsd,
955                                               gint hsp_id);
956 static void GAM_Result_geneseed_visit_hsp_rev(GAM_Geneseed_Data *gsd,
957                                               gint hsp_id);
958 
GAM_Result_geneseed_report_fwd_func(gint x,gint y,gpointer info,gpointer user_data)959 static gboolean GAM_Result_geneseed_report_fwd_func(gint x, gint y,
960                           gpointer info, gpointer user_data){
961     register GAM_Geneseed_Data *gsd = user_data;
962     register gint hsp_id = GPOINTER_TO_INT(info);
963     GAM_Result_geneseed_visit_hsp_fwd(gsd, hsp_id);
964     return FALSE;
965     }
966 
GAM_Result_geneseed_report_rev_func(gint x,gint y,gpointer info,gpointer user_data)967 static gboolean GAM_Result_geneseed_report_rev_func(gint x, gint y,
968                           gpointer info, gpointer user_data){
969     register GAM_Geneseed_Data *gsd = user_data;
970     register gint hsp_id = GPOINTER_TO_INT(info);
971     GAM_Result_geneseed_visit_hsp_rev(gsd, hsp_id);
972     return FALSE;
973     }
974 
GAM_Result_geneseed_visit_hsp_fwd(GAM_Geneseed_Data * gsd,gint hsp_id)975 static void GAM_Result_geneseed_visit_hsp_fwd(GAM_Geneseed_Data *gsd,
976                                               gint hsp_id){
977     register HSP *hsp;
978     register gint query_range, target_range;
979     g_assert(hsp_id < gsd->hsp_list->len);
980     if(!gsd->fwd_keep[hsp_id]){
981         gsd->fwd_keep[hsp_id] = TRUE;
982         hsp = gsd->hsp_list->pdata[hsp_id];
983         query_range = gsd->gam->max_query_span
984                     + (((HSP_query_end(hsp) - HSP_query_cobs(hsp))
985                        + (HSP_query_cobs(gsd->max_cobs_hsp)
986                          - gsd->max_cobs_hsp->query_start)) * 2);
987         target_range = gsd->gam->max_target_span
988                      + (((HSP_target_end(hsp) - HSP_target_cobs(hsp))
989                        + (HSP_target_cobs(gsd->max_cobs_hsp)
990                          - gsd->max_cobs_hsp->target_start)) * 2);
991         /* Search forwards */
992         /*
993         g_message("find fwd (%d,%d,%d) (%d,%d,%d) [%d,%d]",
994                     (HSP_query_end(hsp) - HSP_query_cobs(hsp)),
995                     gsd->gam->max_query_span,
996                     (HSP_query_cobs(gsd->max_cobs_hsp)
997                       - gsd->max_cobs_hsp->query_start),
998                     (HSP_target_end(hsp) - HSP_target_cobs(hsp)),
999                     gsd->gam->max_target_span,
1000                     (HSP_target_cobs(gsd->max_cobs_hsp)
1001                       - gsd->max_cobs_hsp->target_start),
1002                     query_range, target_range);
1003         g_message("RangeTree_find fwd");
1004         */
1005         RangeTree_find(gsd->rangetree,
1006                        HSP_query_cobs(hsp), query_range,
1007                        HSP_target_cobs(hsp), target_range,
1008                        GAM_Result_geneseed_report_fwd_func, gsd);
1009         }
1010     return;
1011     }
1012 
GAM_Result_geneseed_visit_hsp_rev(GAM_Geneseed_Data * gsd,gint hsp_id)1013 static void GAM_Result_geneseed_visit_hsp_rev(GAM_Geneseed_Data *gsd,
1014                                               gint hsp_id){
1015     register HSP *hsp;
1016     register gint query_range, target_range;
1017     g_assert(hsp_id < gsd->hsp_list->len);
1018     if(!gsd->rev_keep[hsp_id]){
1019         gsd->rev_keep[hsp_id] = TRUE;
1020         hsp = gsd->hsp_list->pdata[hsp_id];
1021         query_range = gsd->gam->max_query_span
1022                     + (((HSP_query_end(hsp) - HSP_query_cobs(hsp))
1023                        + (HSP_query_cobs(gsd->max_cobs_hsp)
1024                          - gsd->max_cobs_hsp->query_start)) * 2);
1025         target_range = gsd->gam->max_target_span
1026                      + (((HSP_target_end(hsp) - HSP_target_cobs(hsp))
1027                        + (HSP_target_cobs(gsd->max_cobs_hsp)
1028                          - gsd->max_cobs_hsp->target_start)) * 2);
1029         /*
1030         g_message("find rev (%d,%d,%d) (%d,%d,%d) [%d,%d]",
1031                     (HSP_query_end(hsp) - HSP_query_cobs(hsp)),
1032                     gsd->gam->max_query_span,
1033                     (HSP_query_cobs(gsd->max_cobs_hsp)
1034                       - gsd->max_cobs_hsp->query_start),
1035                     (HSP_target_end(hsp) - HSP_target_cobs(hsp)),
1036                     gsd->gam->max_target_span,
1037                     (HSP_target_cobs(gsd->max_cobs_hsp)
1038                       - gsd->max_cobs_hsp->target_start),
1039                     query_range, target_range);
1040         g_message("RangeTree_find rev");
1041         */
1042         /* Search backwards */
1043         RangeTree_find(gsd->rangetree,
1044                        HSP_query_cobs(hsp)-query_range, query_range,
1045                        HSP_target_cobs(hsp)-target_range, target_range,
1046                        GAM_Result_geneseed_report_rev_func, gsd);
1047         }
1048     return;
1049     }
1050 
GAM_Result_geneseed_select(HSPset * hspset,gboolean * fwd_keep,gboolean * rev_keep,gint hsp_id)1051 static gint GAM_Result_geneseed_select(HSPset *hspset,
1052                                        gboolean *fwd_keep,
1053                                        gboolean *rev_keep,
1054                                        gint hsp_id){
1055     register gint i;
1056     register GPtrArray *keep_list = g_ptr_array_new();
1057     register HSP *hsp;
1058     if(!hspset)
1059         return hsp_id;
1060     keep_list = g_ptr_array_new();
1061     for(i = 0; i < hspset->hsp_list->len; i++){
1062         hsp = hspset->hsp_list->pdata[i];
1063         if(fwd_keep[hsp_id] || rev_keep[hsp_id]){
1064             g_ptr_array_add(keep_list, hsp);
1065         } else {
1066             HSP_destroy(hsp);
1067             }
1068         hsp_id++;
1069         }
1070     /*
1071     g_message("geneseed selected [%d] from [%d]", keep_list->len,
1072                                                   hspset->hsp_list->len);
1073     */
1074     g_ptr_array_free(hspset->hsp_list, TRUE);
1075     hspset->hsp_list = keep_list;
1076     return hsp_id;
1077     }
1078 
GAM_Result_geneseed_filter(GAM * gam,Comparison * comparison)1079 static void GAM_Result_geneseed_filter(GAM *gam, Comparison *comparison){
1080     register gint i, hsp_id = 0;
1081     register HSP *hsp;
1082     GAM_Geneseed_Data gsd;
1083     gsd.hsp_list = g_ptr_array_new();
1084     gsd.rangetree = RangeTree_create();
1085     gsd.max_cobs_hsp = NULL;
1086     gsd.gam = gam;
1087     /* Build the rangetree */
1088     GAM_Result_geneseed_add(comparison->dna_hspset, &gsd);
1089     GAM_Result_geneseed_add(comparison->protein_hspset, &gsd);
1090     GAM_Result_geneseed_add(comparison->codon_hspset, &gsd);
1091     g_assert(gsd.hsp_list->len);
1092     g_assert(gsd.max_cobs_hsp);
1093     gsd.fwd_keep = g_new0(gboolean, gsd.hsp_list->len);
1094     gsd.rev_keep = g_new0(gboolean, gsd.hsp_list->len);
1095     /* Find HSPs reachable from geneseed HSPs */
1096     for(i = 0; i < gsd.hsp_list->len; i++){
1097         hsp = gsd.hsp_list->pdata[i];
1098         if(hsp->score >= hsp->hsp_set->param->has->geneseed_threshold){
1099             GAM_Result_geneseed_visit_hsp_fwd(&gsd, i);
1100             GAM_Result_geneseed_visit_hsp_rev(&gsd, i);
1101             }
1102         }
1103     /* Keep HSPs marked as keep */
1104     hsp_id = GAM_Result_geneseed_select(comparison->dna_hspset,
1105                  gsd.fwd_keep, gsd.rev_keep, hsp_id);
1106     hsp_id = GAM_Result_geneseed_select(comparison->protein_hspset,
1107                  gsd.fwd_keep, gsd.rev_keep, hsp_id);
1108     hsp_id = GAM_Result_geneseed_select(comparison->codon_hspset,
1109                  gsd.fwd_keep, gsd.rev_keep, hsp_id);
1110     /**/
1111     if(comparison->dna_hspset
1112     && (!comparison->dna_hspset->hsp_list->len)){
1113         HSPset_destroy(comparison->dna_hspset);
1114         comparison->dna_hspset = NULL;
1115         }
1116     if(comparison->protein_hspset
1117     && (!comparison->protein_hspset->hsp_list->len)){
1118         HSPset_destroy(comparison->protein_hspset);
1119         comparison->protein_hspset = NULL;
1120         }
1121     if(comparison->codon_hspset
1122     && (!comparison->codon_hspset->hsp_list->len)){
1123         HSPset_destroy(comparison->codon_hspset);
1124         comparison->codon_hspset = NULL;
1125         }
1126     /**/
1127     /**/
1128     /*
1129     g_message("start with [%d] FILTER down to [%d]",
1130             gsd.hsp_list->len,
1131         (comparison->dna_hspset?comparison->dna_hspset->hsp_list->len:0)
1132        +(comparison->protein_hspset?comparison->protein_hspset->hsp_list->len:0)
1133        +(comparison->codon_hspset?comparison->codon_hspset->hsp_list->len:0));
1134        */
1135     RangeTree_destroy(gsd.rangetree, NULL, NULL);
1136     g_free(gsd.fwd_keep);
1137     g_free(gsd.rev_keep);
1138     g_ptr_array_free(gsd.hsp_list, TRUE);
1139     return;
1140     }
1141 
GAM_Result_heuristic_create(GAM * gam,Comparison * comparison)1142 GAM_Result *GAM_Result_heuristic_create(GAM *gam,
1143                                         Comparison *comparison){
1144     register GAM_Result *gam_result;
1145     register HSPset_ArgumentSet *has
1146         = Comparison_Param_get_HSPSet_Argument_Set(comparison->param);
1147     if(has->geneseed_threshold){
1148         /* Raise score threshold to geneseed
1149          * to prevent low-scoring subopt alignments.
1150          */
1151         GAM_lock(gam);
1152         if(gam->gas->threshold < has->geneseed_threshold)
1153             gam->gas->threshold = has->geneseed_threshold;
1154         GAM_unlock(gam);
1155         GAM_Result_geneseed_filter(gam, comparison);
1156         }
1157     if(!Comparison_has_hsps(comparison))
1158         return NULL;
1159     /*
1160     g_message("heuristic create with [%d,%d,%d]",
1161         comparison->dna_hspset?comparison->dna_hspset->hsp_list->len:0,
1162         comparison->protein_hspset?comparison->protein_hspset->hsp_list->len:0,
1163         comparison->codon_hspset?comparison->codon_hspset->hsp_list->len:0);
1164     Comparison_print(comparison);
1165     */
1166     if(gam->gas->use_gapped_extension)
1167         gam_result = GAM_Result_SDP_create(gam, comparison);
1168     else
1169         gam_result = GAM_Result_BSDP_create(gam, comparison);
1170     return gam_result;
1171     }
1172 
1173 /**/
1174 
GAM_Result_exhaustive_create(GAM * gam,Sequence * query,Sequence * target)1175 GAM_Result *GAM_Result_exhaustive_create(GAM *gam,
1176                                          Sequence *query,
1177                                          Sequence *target){
1178     register Alignment *alignment;
1179     register C4_Score threshold;
1180     register GAM_Result *gam_result;
1181     register OPair *opair;
1182     g_assert(gam->optimal);
1183     GAM_lock(gam);
1184     Sequence_lock(query);
1185     Sequence_lock(target);
1186     gam_result = GAM_Result_create(gam, query, target);
1187     Sequence_unlock(query);
1188     Sequence_unlock(target);
1189     opair = OPair_create(gam->optimal, gam_result->subopt,
1190                          query->len, target->len, gam_result->user_data);
1191     GAM_unlock(gam);
1192     if(gam->verbosity > 1)
1193         g_message("Exhaustive alignment of [%s] [%s]",
1194                   query->id, target->id);
1195     /**/
1196     do {
1197         GAM_lock(gam);
1198         threshold = GAM_get_query_threshold(gam, query);
1199         GAM_unlock(gam);
1200         alignment = OPair_next_path(opair, threshold);
1201         if(!alignment)
1202             break;
1203         g_assert(alignment->score >= threshold);
1204         GAM_Result_add_alignment(gam_result, alignment, threshold);
1205         if(gam->verbosity > 2)
1206             g_message("Found alignment number [%d] score [%d]",
1207                 gam_result->alignment_list->len, alignment->score);
1208     } while(gam->gas->use_subopt);
1209     OPair_destroy(opair);
1210     if(!gam_result->alignment_list){ /* No alignments */
1211         GAM_Result_destroy(gam_result);
1212         return NULL;
1213         }
1214     return gam_result;
1215     }
1216 
GAM_Result_share(GAM_Result * gam_result)1217 GAM_Result *GAM_Result_share(GAM_Result *gam_result){
1218     gam_result->ref_count++;
1219     return gam_result;
1220     }
1221 
GAM_Result_destroy(GAM_Result * gam_result)1222 void GAM_Result_destroy(GAM_Result *gam_result){
1223     register gint i;
1224     if(--gam_result->ref_count)
1225         return;
1226     if(gam_result->alignment_list){
1227         for(i = 0; i < gam_result->alignment_list->len; i++)
1228             Alignment_destroy(gam_result->alignment_list->pdata[i]);
1229         g_ptr_array_free(gam_result->alignment_list, TRUE);
1230         }
1231     Model_Type_destroy_data(gam_result->gam->gas->type,
1232                             gam_result->user_data);
1233     Model_Type_destroy_data(gam_result->gam->gas->type,
1234                             gam_result->self_data);
1235     Sequence_destroy(gam_result->query);
1236     Sequence_destroy(gam_result->target);
1237     GAM_lock(gam_result->gam);
1238     GAM_destroy(gam_result->gam);
1239     SubOpt_destroy(gam_result->subopt);
1240     GAM_unlock(gam_result->gam);
1241     g_free(gam_result);
1242     return;
1243     }
1244 
GAM_display_alignment(GAM * gam,Alignment * alignment,Sequence * query,Sequence * target,gint result_id,gint rank,gpointer user_data,gpointer self_data,FILE * fp)1245 static void GAM_display_alignment(GAM *gam, Alignment *alignment,
1246             Sequence *query, Sequence *target,
1247             gint result_id, gint rank,
1248             gpointer user_data, gpointer self_data, FILE *fp){
1249     if(gam->gas->show_alignment)
1250         Alignment_display(alignment, query, target,
1251                           gam->dna_submat, gam->protein_submat,
1252                           gam->translate, fp);
1253     if(gam->gas->show_sugar)
1254         Alignment_display_sugar(alignment, query, target, fp);
1255     if(gam->gas->show_cigar)
1256         Alignment_display_cigar(alignment, query, target, fp);
1257     if(gam->gas->show_vulgar)
1258         Alignment_display_vulgar(alignment, query, target, fp);
1259     if(gam->gas->show_query_gff)
1260         Alignment_display_gff(alignment, query, target, gam->translate,
1261                               TRUE, FALSE, result_id, user_data, fp);
1262     if(gam->gas->show_target_gff)
1263         Alignment_display_gff(alignment, query, target, gam->translate,
1264              FALSE, Model_Type_has_genomic_target(gam->gas->type),
1265              result_id, user_data, fp);
1266     if(gam->gas->ryo)
1267         Alignment_display_ryo(alignment, query, target,
1268                               gam->gas->ryo, gam->translate, rank,
1269                               user_data, self_data, fp);
1270     fflush(fp);
1271     return;
1272     }
1273 
GAM_Result_display(GAM_Result * gam_result)1274 static void GAM_Result_display(GAM_Result *gam_result){
1275     register gint i;
1276     register Alignment *alignment;
1277     g_assert(gam_result);
1278     for(i = 0; i < gam_result->alignment_list->len; i++){
1279         alignment = gam_result->alignment_list->pdata[i];
1280         GAM_display_alignment(gam_result->gam, alignment,
1281                 gam_result->query, gam_result->target,
1282                 i+1, 0, gam_result->user_data, gam_result->self_data, stdout);
1283         }
1284     return;
1285     }
1286 
GAM_Result_submit(GAM_Result * gam_result)1287 void GAM_Result_submit(GAM_Result *gam_result){
1288     register GAM_QueryResult *gqr;
1289     g_assert(gam_result);
1290     GAM_lock(gam_result->gam);
1291     if(gam_result->gam->gas->best_n){
1292         gqr = g_tree_lookup(gam_result->gam->bestn_tree,
1293                             gam_result->query->id);
1294         if(!gqr){
1295             gqr = GAM_QueryResult_create(gam_result->gam,
1296                                          gam_result->query->id);
1297             g_tree_insert(gam_result->gam->bestn_tree,
1298                           gqr->query_id, gqr);
1299             }
1300         g_assert(gqr);
1301         g_assert(!strcmp(gqr->query_id, gam_result->query->id));
1302         GAM_QueryResult_submit(gqr, gam_result);
1303     } else {
1304         GAM_Result_display(gam_result);
1305         }
1306     GAM_unlock(gam_result->gam);
1307     return;
1308     }
1309 
GAM_lock(GAM * gam)1310 void GAM_lock(GAM *gam){
1311 #ifdef USE_PTHREADS
1312     pthread_mutex_lock(&gam->gam_lock);
1313 #endif /* USE_PTHREADS */
1314     return;
1315     }
1316 
GAM_unlock(GAM * gam)1317 void GAM_unlock(GAM *gam){
1318 #ifdef USE_PTHREADS
1319     pthread_mutex_unlock(&gam->gam_lock);
1320 #endif /* USE_PTHREADS */
1321     return;
1322     }
1323 
1324 /**/
1325 
1326