1 /****************************************************************\
2 *                                                                *
3 *  Analysis module for exonerate                                 *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include "analysis.h"
17 #include "ungapped.h"
18 #include "compoundfile.h"
19 #include "hspset.h"
20 #include "lineparse.h"
21 
22 #include <stdlib.h> /* For atoi() */
23 #include <unistd.h> /* For sleep() */
24 #include <string.h> /* For strcmp() */
25 #include <ctype.h>  /* For isdigit() */
26 
27 #include <sys/types.h>  /* For stat() */
28 #include <sys/stat.h>   /* For stat() */
29 #include <unistd.h>     /* For stat() */
30 
Analysis_ArgumentSet_create(Argument * arg)31 Analysis_ArgumentSet *Analysis_ArgumentSet_create(Argument *arg){
32     register ArgumentSet *as;
33     static Analysis_ArgumentSet aas;
34     if(arg){
35         as = ArgumentSet_create("Analysis Options");
36         ArgumentSet_add_option(as, 'E', "exhaustive", NULL,
37             "Perform exhaustive alignment (slow)", "FALSE",
38             Argument_parse_boolean, &aas.use_exhaustive);
39         ArgumentSet_add_option(as, 'B', "bigseq", NULL,
40             "Allow rapid comparison between big sequences", "FALSE",
41             Argument_parse_boolean, &aas.use_bigseq);
42         ArgumentSet_add_option(as, 'r', "revcomp", NULL,
43             "Also search reverse complement of query and target", "TRUE",
44             Argument_parse_boolean, &aas.use_revcomp);
45         ArgumentSet_add_option(as, '\0', "forcescan", "[q|t]",
46             "Force FSM scan on query or target sequences", "none",
47             Argument_parse_string, &aas.force_scan);
48         /**/
49         ArgumentSet_add_option(as, 0, "saturatethreshold", "int",
50             "Word saturation threshold", "0",
51             Argument_parse_int, &aas.saturate_threshold);
52         /**/
53         ArgumentSet_add_option(as, 0, "customserver", "command",
54             "Custom command to send non-standard server", "NULL",
55             Argument_parse_string, &aas.custom_server_command);
56         ArgumentSet_add_option(as, 'c', "cores", "number",
57             "Number of cores/CPUs/threads for alignment computation", "1",
58             Argument_parse_int, &aas.thread_count);
59         Argument_absorb_ArgumentSet(arg, as);
60         }
61     return &aas;
62     }
63 
64 /**/
65 
66 typedef struct {
67            GAM *gam;
68     Comparison *comparison;
69 } Analysis_HeuristicJob;
70 
Analysis_HeuristicJob_create(GAM * gam,Comparison * comparison)71 static Analysis_HeuristicJob *Analysis_HeuristicJob_create(GAM *gam,
72                                        Comparison *comparison){
73     register Analysis_HeuristicJob *ahj = g_new(Analysis_HeuristicJob, 1);
74     ahj->gam = GAM_share(gam);
75     ahj->comparison = Comparison_share(comparison);
76     return ahj;
77     }
78 
Analysis_HeuristicJob_destroy(Analysis_HeuristicJob * ahj)79 static void Analysis_HeuristicJob_destroy(Analysis_HeuristicJob *ahj){
80     Comparison_destroy(ahj->comparison);
81     GAM_destroy(ahj->gam);
82     g_free(ahj);
83     return;
84     }
85 
Analysis_HeuristicJob_run(gpointer data)86 static void Analysis_HeuristicJob_run(gpointer data){
87     register Analysis_HeuristicJob *ahj = data;
88     register GAM_Result *gam_result
89            = GAM_Result_heuristic_create(ahj->gam, ahj->comparison);
90     if(gam_result){
91         GAM_Result_submit(gam_result);
92         GAM_Result_destroy(gam_result);
93         }
94     Analysis_HeuristicJob_destroy(ahj);
95     return;
96     }
97 
Analysis_report_func(Comparison * comparison,gpointer user_data)98 static void Analysis_report_func(Comparison *comparison,
99                                  gpointer user_data){
100     register Analysis *analysis = user_data;
101     register GAM_Result *gam_result;
102     register Analysis_HeuristicJob *ahj;
103     g_assert(Comparison_has_hsps(comparison));
104     if(analysis->scan_query){
105         /* Swap back query and target after a query scan */
106         Comparison_swap(comparison);
107     } else {
108         /* Revert target scan alignments to +ve query strand when possible */
109         if((comparison->query->alphabet->type == Alphabet_Type_DNA)
110         && (comparison->target->alphabet->type == Alphabet_Type_DNA)
111         && (comparison->query->strand == Sequence_Strand_REVCOMP)
112         && (comparison->target->strand == Sequence_Strand_FORWARD)
113         && (!analysis->gam->translate_both))
114             Comparison_revcomp(comparison);
115         }
116     if(Model_Type_is_gapped(analysis->gam->gas->type)){
117         ahj = Analysis_HeuristicJob_create(analysis->gam, comparison);
118 #ifdef USE_PTHREADS
119         g_assert(analysis->job_queue);
120         /* FIXME: need to use priority here */
121         JobQueue_submit(analysis->job_queue, Analysis_HeuristicJob_run, ahj, 2);
122 #else /* USE_PTHREADS */
123         Analysis_HeuristicJob_run(ahj);
124 #endif /* USE_PTHREADS */
125     } else {
126         gam_result = GAM_Result_ungapped_create(analysis->gam,
127                                                 comparison);
128         if(gam_result){
129             GAM_Result_submit(gam_result);
130             GAM_Result_destroy(gam_result);
131             }
132         }
133     return;
134     }
135 
136 /**/
137 
Analysis_FastaPipe_Pair_init_func(gpointer user_data)138 static void Analysis_FastaPipe_Pair_init_func(gpointer user_data){
139     register Analysis *analysis = user_data;
140     g_assert(!analysis->curr_query);
141     return;
142     }
143 /* Called before query pipeline loading */
144 
Analysis_FastaPipe_Pair_prep_func(gpointer user_data)145 static void Analysis_FastaPipe_Pair_prep_func(gpointer user_data){
146     register Analysis *analysis = user_data;
147     g_assert(analysis->curr_query);
148     return;
149     }
150 /* Called after query pipeline loading */
151 
Analysis_FastaPipe_Pair_term_func(gpointer user_data)152 static void Analysis_FastaPipe_Pair_term_func(gpointer user_data){
153     register Analysis *analysis = user_data;
154     FastaDB_Seq_destroy(analysis->curr_query);
155     analysis->curr_query = NULL;
156     return;
157     }
158 /* Called after query pipeline analysis */
159 
Analysis_FastaPipe_Pair_query_func(FastaDB_Seq * fdbs,gpointer user_data)160 static gboolean Analysis_FastaPipe_Pair_query_func(FastaDB_Seq *fdbs,
161                                                   gpointer user_data){
162     register Analysis *analysis = user_data;
163     g_assert(!analysis->curr_query);
164     /*
165     if(analysis->aas->use_exhaustive)
166         g_strup(fdbs->seq->seq);
167     */
168     if(analysis->verbosity > 1)
169         g_message("Load query for pairwise comparision [%s] (%d)",
170                   fdbs->seq->id, fdbs->seq->len);
171     analysis->curr_query = FastaDB_Seq_share(fdbs);
172     return TRUE; /* take queries one at a time */
173     }
174 /* Called on query loading */
175 
Analysis_BSAM_compare(Analysis * analysis,FastaDB_Seq * query,FastaDB_Seq * target)176 static void Analysis_BSAM_compare(Analysis *analysis,
177                                   FastaDB_Seq *query,
178                                   FastaDB_Seq *target){
179     register Comparison *comparison = BSAM_compare(analysis->bsam,
180                                                    query->seq,
181                                                    target->seq);
182     if(comparison){
183         if(Comparison_has_hsps(comparison))
184             Analysis_report_func(comparison, analysis);
185         Comparison_destroy(comparison);
186         }
187     return;
188     }
189 
190 /**/
191 
192 typedef struct {
193          GAM *gam;
194     Sequence *query;
195     Sequence *target;
196 } Analysis_ExhaustiveJob;
197 
Analysis_ExhaustiveJob_create(GAM * gam,Sequence * query,Sequence * target)198 static Analysis_ExhaustiveJob *Analysis_ExhaustiveJob_create(GAM *gam,
199                                         Sequence *query, Sequence *target){
200     register Analysis_ExhaustiveJob *aej = g_new(Analysis_ExhaustiveJob, 1);
201     aej->gam = GAM_share(gam);
202     aej->query = Sequence_share(query);
203     aej->target = Sequence_share(target);
204     return aej;
205     }
206 
Analysis_ExhaustiveJob_destroy(Analysis_ExhaustiveJob * aej)207 static void Analysis_ExhaustiveJob_destroy(Analysis_ExhaustiveJob *aej){
208     GAM_destroy(aej->gam);
209     Sequence_destroy(aej->query);
210     Sequence_destroy(aej->target);
211     g_free(aej);
212     return;
213     }
214 
Analysis_ExhaustiveJob_run(gpointer data)215 static void Analysis_ExhaustiveJob_run(gpointer data){
216     register Analysis_ExhaustiveJob *aej = data;
217     register GAM_Result *gam_result;
218     gam_result = GAM_Result_exhaustive_create(aej->gam,
219                                               aej->query, aej->target);
220     if(gam_result){
221         GAM_Result_submit(gam_result);
222         GAM_Result_destroy(gam_result);
223         }
224     Analysis_ExhaustiveJob_destroy(aej);
225     return;
226     }
227 
Analysis_Pair_compare(Analysis * analysis,FastaDB_Seq * fdbs)228 static void Analysis_Pair_compare(Analysis *analysis,
229                                   FastaDB_Seq *fdbs){
230     register Analysis_ExhaustiveJob *aej;
231     if(analysis->aas->use_exhaustive){
232         aej = Analysis_ExhaustiveJob_create(analysis->gam,
233                                             analysis->curr_query->seq,
234                                             fdbs->seq);
235 #ifdef USE_PTHREADS
236         g_assert(analysis->job_queue);
237         JobQueue_submit(analysis->job_queue, Analysis_ExhaustiveJob_run, aej, 1);
238 #else /* USE_PTHREADS */
239         Analysis_ExhaustiveJob_run(aej);
240 #endif /* USE_PTHREADS */
241     } else {
242         Analysis_BSAM_compare(analysis, analysis->curr_query, fdbs);
243         }
244     return;
245     }
246 
247 /**/
248 
Analysis_FastaPipe_Pair_target_func(FastaDB_Seq * fdbs,gpointer user_data)249 static gboolean Analysis_FastaPipe_Pair_target_func(FastaDB_Seq *fdbs,
250                                                     gpointer user_data){
251     register Analysis *analysis = user_data;
252     g_assert(analysis->gam);
253     g_assert(analysis->curr_query);
254     /*
255     if(analysis->aas->use_exhaustive)
256         g_strup(fdbs->seq->seq);
257     */
258     if(analysis->verbosity > 1)
259         g_message("Load target for pairwise comparison [%s] (%d)",
260                 fdbs->seq->id, fdbs->seq->len);
261     /**/
262     Analysis_Pair_compare(analysis, fdbs);
263     return FALSE;
264     }
265 /* Called on target loading */
266 
267 /**/
268 
Analysis_FastaPipe_Seeder_init_func(gpointer user_data)269 static void Analysis_FastaPipe_Seeder_init_func(gpointer user_data){
270     register Analysis *analysis = user_data;
271     g_assert(!analysis->curr_seeder);
272     register Comparison_Param *comparison_param
273            = Comparison_Param_share(analysis->comparison_param);
274     if(analysis->scan_query)
275         comparison_param = Comparison_Param_swap(comparison_param);
276     analysis->curr_seeder = Seeder_create(analysis->verbosity,
277             comparison_param, analysis->aas->saturate_threshold,
278             Analysis_report_func, analysis);
279     Comparison_Param_destroy(comparison_param);
280     return;
281     }
282 /* Called before query pipeline loading */
283 
Analysis_FastaPipe_Seeder_prep_func(gpointer user_data)284 static void Analysis_FastaPipe_Seeder_prep_func(gpointer user_data){
285     /* does nothing */
286     return;
287     }
288 /* Called after query pipeline loading */
289 
Analysis_FastaPipe_Seeder_term_func(gpointer user_data)290 static void Analysis_FastaPipe_Seeder_term_func(gpointer user_data){
291     register Analysis *analysis = user_data;
292     Seeder_destroy(analysis->curr_seeder);
293     if(analysis->verbosity > 2){
294         g_message("### Seeder destroyed ###");
295         RecycleBin_profile();
296         }
297     analysis->curr_seeder = NULL;
298     return;
299     }
300 /* Called after query pipeline analysis */
301 
Analysis_FastaPipe_Seeder_query_func(FastaDB_Seq * fdbs,gpointer user_data)302 static gboolean Analysis_FastaPipe_Seeder_query_func(FastaDB_Seq *fdbs,
303                                                     gpointer user_data){
304     register Analysis *analysis = user_data;
305     if(analysis->verbosity > 1)
306         g_message("Load query for Seeder [%s] (%d)",
307                 fdbs->seq->id, fdbs->seq->len);
308     return Seeder_add_query(analysis->curr_seeder, fdbs->seq);
309     }
310 /* Called on query loading */
311 
Analysis_FastaPipe_Seeder_target_func(FastaDB_Seq * fdbs,gpointer user_data)312 static gboolean Analysis_FastaPipe_Seeder_target_func(FastaDB_Seq *fdbs,
313                                                     gpointer user_data){
314     register Analysis *analysis = user_data;
315     if(analysis->verbosity > 1)
316         g_message("Load target for Seeder [%s] (%d)",
317                 fdbs->seq->id, fdbs->seq->len);
318     Seeder_add_target(analysis->curr_seeder, fdbs->seq);
319     return FALSE;
320     }
321 /* Called on target loading */
322 
323 /**/
324 
Analysis_decide_scan_query(FastaDB * query_fdb,FastaDB * target_fdb,gchar * force_scan)325 static gboolean Analysis_decide_scan_query(FastaDB *query_fdb,
326                                            FastaDB *target_fdb,
327                                            gchar *force_scan){
328     register CompoundFile_Pos query_size, target_size;
329     if(!g_strcasecmp(force_scan, "none")){
330         query_size = CompoundFile_get_length(query_fdb->cf);
331         target_size = CompoundFile_get_length(target_fdb->cf);
332         if((query_size >> 4) < target_size)
333             return FALSE;
334         else
335             return TRUE;
336     } else if((!g_strcasecmp(force_scan, "query"))
337            || (!g_strcasecmp(force_scan, "q"))){
338         return TRUE;
339     } else if((!g_strcasecmp(force_scan, "target"))
340            || (!g_strcasecmp(force_scan, "t"))){
341         return FALSE;
342     } else {
343         g_error("Unknown force_scan command [%s]", force_scan);
344         }
345     return FALSE; /* not reached */
346     }
347 
Analysis_find_matches(Analysis * analysis,Match ** dna_match,Match ** protein_match,Match ** codon_match)348 static void Analysis_find_matches(Analysis *analysis,
349                        Match **dna_match, Match **protein_match,
350                        Match **codon_match){
351     register GPtrArray *transition_list
352         = C4_Model_select_transitions(analysis->gam->model,
353                                       C4_Label_MATCH);
354     register gint i;
355     register C4_Transition *transition;
356     register Match *match;
357     for(i = 0; i < transition_list->len; i++){
358         transition = transition_list->pdata[i];
359         g_assert(transition);
360         g_assert(transition->label == C4_Label_MATCH);
361         match = transition->label_data;
362         if(match){
363             switch(match->type){
364                 case Match_Type_DNA2DNA:
365                     if((*dna_match) && ((*dna_match) != match))
366                         g_error("Multiple DNA matches not implemented");
367                     (*dna_match) = match;
368                     break;
369                 case Match_Type_PROTEIN2PROTEIN:
370                 case Match_Type_DNA2PROTEIN:
371                 case Match_Type_PROTEIN2DNA:
372                     if((*protein_match) && ((*protein_match) != match))
373                         g_error("Multiple protein matches"
374                                 " not supported");
375                     (*protein_match) = match;
376                     break;
377                 case Match_Type_CODON2CODON:
378                     if((*codon_match) && ((*codon_match) != match))
379                         g_error("Multiple codon matches"
380                                 " not implemented");
381                     (*codon_match) = match;
382                     break;
383                 default:
384                     break;
385                 }
386             }
387         }
388     g_ptr_array_free(transition_list, TRUE);
389     return;
390     }
391 
392 /**/
393 
Analysis_Client_connect(gchar * path)394 static SocketClient *Analysis_Client_connect(gchar *path){
395     register SocketClient *sc;
396     register gint i, divider = 0, port;
397     register gchar *server;
398     register gint connection_attempts = 10;
399     for(i = 0; path[i]; i++)
400         if(path[i] == ':'){
401             divider = i;
402             break;
403             }
404     if(divider){
405         port = atoi(path+divider+1);
406         server = g_strndup(path, divider);
407         for(i = connection_attempts; i >= 0; i--){
408             sc = SocketClient_create(server, port);
409             if(sc)
410                 break;
411             g_warning("Failed connection to server (retrys left: %d", i);
412             sleep(1);
413             }
414         g_free(server);
415         return sc;
416         }
417     return NULL;
418     }
419 
Analysis_Client_send(Analysis_Client * aclient,gchar * msg,gchar * expect,gboolean multi_line_reply)420 static gchar *Analysis_Client_send(Analysis_Client *aclient, gchar *msg,
421                                   gchar *expect, gboolean multi_line_reply){
422     register gchar *reply = SocketClient_send(aclient->sc, msg);
423     register gchar *line, *p = reply, *processed_reply;
424     register gint line_count = 0;
425     register GString *str = g_string_sized_new(64);
426     if(aclient->verbosity >= 3){
427         g_print("Message: client sent message [%s]\n", msg);
428         if((aclient->verbosity == 3) && (strlen(reply) >= 80))
429             g_print("Message: client received reply [%.*s<truncated>] \n",
430                                80, reply);
431         else
432             g_print("Message: client received reply [%s]\n", reply);
433         }
434     do {
435         while(*p){
436             if(!isspace(*p)) /* Strip blank lines */
437                 break;
438             p++;
439             }
440         line = p;
441         if(!strncmp(p, "error:", 6))
442             g_error("Error from server: [%s]", p);
443         if(!strncmp(p, "warning:", 8)){
444             g_warning("Warning from server: [%s]", p);
445         } else if(!strncmp(p, "linecount:", 8)){
446             while(*p){ /* Skip to end of line */
447                 if(*p == '\n')
448                     break;
449                 p++;
450                 }
451         } else if(!strncmp(p, expect, strlen(expect))){
452             while(*p){ /* Skip to end of line */
453                 if(*p == '\n')
454                     break;
455                 p++;
456                 }
457             line_count++;
458             *p = '\0';
459             g_string_append(str, line);
460             g_string_append_c(str, '\n');
461             *p = '\n';
462         } else {
463             if(!*p)
464                 break;
465             g_error("Unexpected line from server [%s]", p);
466             }
467     } while(TRUE);
468     g_free(reply);
469     if(!line_count)
470         g_error("No reply received from server msg=[%s]", msg);
471     if((!multi_line_reply) && (line_count > 1))
472         g_error("Unexpected multi-line reply from server");
473     processed_reply = str->str;
474     g_string_free(str, FALSE);
475     return processed_reply;
476     }
477 
Analysis_Client_set_param(Analysis_Client * aclient,GAM * gam)478 static void Analysis_Client_set_param(Analysis_Client *aclient, GAM *gam){
479     register HSPset_ArgumentSet *has = HSPset_ArgumentSet_create(NULL);
480     register Analysis_ArgumentSet *aas = Analysis_ArgumentSet_create(NULL);
481     register gchar *msg, *reply;
482     /**/
483     if(aas->custom_server_command){
484         msg = g_strdup_printf("%s\n", aas->custom_server_command);
485         reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
486         g_free(msg);
487         g_free(reply);
488         }
489     /**/
490     msg = g_strdup_printf("set param seedrepeat %d",
491                           has->seed_repeat);
492     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
493     g_free(msg);
494     g_free(reply);
495     /**/
496     msg = g_strdup_printf("set param dnahspthreshold %d",
497                           has->dna_hsp_threshold);
498     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
499     g_free(msg);
500     g_free(reply);
501     /**/
502     msg = g_strdup_printf("set param proteinhspthreshold %d",
503                           has->protein_hsp_threshold);
504     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
505     g_free(msg);
506     g_free(reply);
507     /**/
508     msg = g_strdup_printf("set param codonhspthreshold %d",
509                           has->codon_hsp_threshold);
510     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
511     g_free(msg);
512     g_free(reply);
513     /**/
514     msg = g_strdup_printf("set param dnawordlimit %d",
515                           has->dna_word_limit);
516     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
517     g_free(msg);
518     g_free(reply);
519     /**/
520     msg = g_strdup_printf("set param proteinwordlimit %d",
521                           has->protein_word_limit);
522     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
523     g_free(msg);
524     g_free(reply);
525     /**/
526     msg = g_strdup_printf("set param codonwordlimit %d",
527                           has->codon_word_limit);
528     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
529     g_free(msg);
530     g_free(reply);
531     /**/
532     msg = g_strdup_printf("set param dnahspdropoff %d",
533                           has->dna_hsp_dropoff);
534     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
535     g_free(msg);
536     g_free(reply);
537     /**/
538     msg = g_strdup_printf("set param proteinhspdropoff %d",
539                           has->protein_hsp_dropoff);
540     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
541     g_free(msg);
542     g_free(reply);
543     /**/
544     msg = g_strdup_printf("set param codonhspdropoff %d",
545                           has->codon_hsp_dropoff);
546     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
547     g_free(msg);
548     g_free(reply);
549     /**/
550     msg = g_strdup_printf("set param geneseedthreshold %d",
551                           has->geneseed_threshold);
552     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
553     g_free(msg);
554     g_free(reply);
555     /**/
556     msg = g_strdup_printf("set param geneseedrepeat %d",
557                           has->geneseed_repeat);
558     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
559     g_free(msg);
560     g_free(reply);
561     /**/
562     msg = g_strdup_printf("set param maxqueryspan %d",
563                           gam->max_query_span);
564     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
565     g_free(msg);
566     g_free(reply);
567     /**/
568     msg = g_strdup_printf("set param maxtargetspan %d",
569                           gam->max_target_span);
570     reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
571     g_free(msg);
572     g_free(reply);
573     /**/
574     return;
575     }
576 
Analysis_Client_info(Analysis_Client * aclient)577 static void Analysis_Client_info(Analysis_Client *aclient){
578     return;
579     }
580 
Analysis_Client_create(gchar * path,gint verbosity)581 static Analysis_Client *Analysis_Client_create(gchar *path, gint verbosity){
582     register Analysis_Client *aclient;
583     register SocketClient *sc = Analysis_Client_connect(path);
584     register Alphabet_Type alphabet_type;
585     register gchar *dbinfo, **dbinfo_word;
586     if(!sc)
587         return NULL;
588     aclient = g_new(Analysis_Client, 1);
589     aclient->ref_count = 1;
590     aclient->sc = sc;
591     aclient->verbosity = verbosity;
592     aclient->probe_fdb = NULL;
593     dbinfo = Analysis_Client_send(aclient, "dbinfo", "dbinfo:", FALSE);
594     dbinfo_word = g_strsplit(dbinfo, " ", 8);
595     /**/
596     g_assert(dbinfo_word[0]);
597     g_assert(dbinfo_word[1]);
598     g_assert(dbinfo_word[2]);
599     g_assert(dbinfo_word[3]);
600     g_assert(dbinfo_word[4]);
601     /**/
602     alphabet_type = Alphabet_Type_UNKNOWN;
603     if(!strcmp(dbinfo_word[1], "dna"))
604         alphabet_type = Alphabet_Type_DNA;
605     if(!strcmp(dbinfo_word[1], "protein"))
606         alphabet_type = Alphabet_Type_PROTEIN;
607     /**/
608     aclient->is_masked = FALSE;
609     if(!strcmp(dbinfo_word[2], "softmasked"))
610         aclient->is_masked = TRUE;
611     else if(!strcmp(dbinfo_word[2], "unmasked"))
612         aclient->is_masked = TRUE;
613     else
614         g_error("Unrecognised dbinfo masking state [%s]", dbinfo_word[2]);
615     /**/
616     aclient->server_alphabet = Alphabet_create(alphabet_type, aclient->is_masked);
617     aclient->num_seqs      = atoll(dbinfo_word[3]);
618     aclient->max_seq_len   = atoll(dbinfo_word[4]);
619     aclient->total_seq_len = atoll(dbinfo_word[5]);
620     /**/
621     g_strfreev(dbinfo_word);
622     g_free(dbinfo);
623     aclient->curr_query = NULL;
624     aclient->seq_cache = g_new0(Sequence*, aclient->num_seqs);
625     Analysis_Client_info(aclient);
626     return aclient;
627     }
628 
Analysis_Client_share(Analysis_Client * aclient)629 static Analysis_Client *Analysis_Client_share(Analysis_Client *aclient){
630     aclient->ref_count++;
631     return aclient;
632     }
633 
Analysis_Client_destroy(Analysis_Client * aclient)634 static void Analysis_Client_destroy(Analysis_Client *aclient){
635     register gint i;
636     register Sequence *seq;
637     if(--aclient->ref_count)
638         return;
639     if(aclient->curr_query)
640         Sequence_destroy(aclient->curr_query);
641     for(i = 0; i < aclient->num_seqs; i++){
642         seq = aclient->seq_cache[i];
643         if(seq)
644             Sequence_destroy(seq);
645         }
646     g_free(aclient->seq_cache);
647     if(aclient->probe_fdb)
648         FastaDB_close(aclient->probe_fdb);
649     SocketClient_destroy(aclient->sc);
650     Alphabet_destroy(aclient->server_alphabet);
651     g_free(aclient);
652     return;
653     }
654 
Analysis_Client_set_probe_fdb(Analysis_Client * aclient,FastaDB * probe_fdb)655 static void Analysis_Client_set_probe_fdb(Analysis_Client *aclient,
656                                           FastaDB *probe_fdb){
657     g_assert(!aclient->probe_fdb);
658     aclient->probe_fdb = FastaDB_dup(probe_fdb);
659     return;
660     }
661 
Analysis_Client_set_query(Analysis_Client * aclient,Sequence * seq)662 static void Analysis_Client_set_query(Analysis_Client *aclient, Sequence *seq){
663     register gchar *seq_str = Sequence_get_str(seq);
664     register gchar *msg = g_strdup_printf("set query %s", seq_str);
665     register gchar *reply = Analysis_Client_send(aclient, msg, "ok:", FALSE);
666     register gchar **word;
667     register gint len, checksum;
668     if(strncmp(reply, "ok:", 3))
669         g_error("Could not set query [%s] on server", seq->id);
670     word = g_strsplit(reply+4, " ", 4);
671     len = atoi(word[0]);
672     checksum = atoi(word[1]);
673     if(seq->len != len)
674         g_error("Query length mismatch on server %d %d", seq->len, len);
675     if(Sequence_checksum(seq) != checksum)
676         g_error("Query checksum mismatch on server %d %d",
677                 Sequence_checksum(seq), checksum);
678     g_strfreev(word);
679     g_free(reply);
680     g_free(msg);
681     g_free(seq_str);
682     if(aclient->curr_query)
683         Sequence_destroy(aclient->curr_query);
684     aclient->curr_query = Sequence_share(seq);
685     return;
686     }
687 
Analysis_Client_revcomp_query(Analysis_Client * aclient)688 static void Analysis_Client_revcomp_query(Analysis_Client *aclient){
689     register gchar *reply = Analysis_Client_send(aclient,
690             "revcomp query", "ok: query strand revcomp", FALSE);
691     register Sequence *curr_query;
692     curr_query = aclient->curr_query;
693     aclient->curr_query = Sequence_revcomp(aclient->curr_query);
694     Sequence_destroy(curr_query);
695     g_free(reply);
696     return;
697     }
698 
Analysis_Client_revcomp_target(Analysis_Client * aclient)699 static void Analysis_Client_revcomp_target(Analysis_Client *aclient){
700     register gchar *reply = Analysis_Client_send(aclient,
701             "revcomp target", "ok: target strand", FALSE);
702     g_free(reply);
703     return;
704     }
705 
706 /**/
707 
708 typedef struct {
709     Analysis_Client *aclient;
710                gint  target_id;
711                gint  seq_len;
712 } Analysis_Client_Key;
713 
Analysis_Client_Key_create(Analysis_Client * aclient,gint target_id,gint seq_len)714 static Analysis_Client_Key *Analysis_Client_Key_create(Analysis_Client *aclient,
715                                                    gint target_id, gint seq_len){
716     register Analysis_Client_Key *key = g_new(Analysis_Client_Key, 1);
717     key->aclient = Analysis_Client_share(aclient);
718     key->target_id = target_id;
719     key->seq_len = seq_len;
720     return key;
721     }
722 
Analysis_Client_Key_destroy(Analysis_Client_Key * key)723 static void Analysis_Client_Key_destroy(Analysis_Client_Key *key){
724     Analysis_Client_destroy(key->aclient);
725     g_free(key);
726     return;
727     }
728 
Analysis_Client_SparseCache_get_func(gint pos,gpointer page_data,gpointer user_data)729 static gpointer Analysis_Client_SparseCache_get_func(gint pos,
730                                                      gpointer page_data,
731                                                      gpointer user_data){
732     return GINT_TO_POINTER((gint)((gchar*)page_data)[pos]);
733     }
734 
Analysis_Client_SparseCache_fill_func(gint start,gpointer user_data)735 static SparseCache_Page *Analysis_Client_SparseCache_fill_func(gint start,
736                                                      gpointer user_data){
737     register Analysis_Client_Key *key = user_data;
738     register SparseCache_Page *page = g_new(SparseCache_Page, 1);
739     register gint len = MIN(SparseCache_PAGE_SIZE, key->seq_len-start),
740                   page_len;
741     register gchar *msg = g_strdup_printf("get subseq %d %d %d",
742             key->target_id, start, len);
743     register gchar *reply = Analysis_Client_send(key->aclient, msg,
744                                                 "subseq:", FALSE);
745     if(strncmp(reply, "subseq:", 7))
746         g_error("Failed to get subseq for target (%d,%d,%d) [%s]",
747                 key->target_id, start, len, reply);
748     page->get_func = Analysis_Client_SparseCache_get_func;
749     page->copy_func = NULL;
750     page_len = strlen(reply+8)-1;
751     page->data = g_strndup(reply+8, page_len);
752     page->data_size = sizeof(gchar)*page_len;
753     g_free(msg);
754     g_free(reply);
755     FastaDB_SparseCache_compress(page, page_len);
756     return page;
757     }
758 /* FIXME: move compression stuff to SeqPage in Sequence */
759 
Analysis_Client_SparseCache_free_func(gpointer user_data)760 static void Analysis_Client_SparseCache_free_func(gpointer user_data){
761     register Analysis_Client_Key *key = user_data;
762     Analysis_Client_Key_destroy(key);
763     return;
764     }
765 
Analysis_Client_get_SparseCache(Analysis_Client * aclient,gint sequence_id,gint len)766 static SparseCache *Analysis_Client_get_SparseCache(
767                     Analysis_Client *aclient,
768                     gint sequence_id, gint len){
769     register Analysis_Client_Key *key
770         = Analysis_Client_Key_create(aclient, sequence_id, len);
771     return SparseCache_create(len, Analysis_Client_SparseCache_fill_func,
772                          NULL, Analysis_Client_SparseCache_free_func, key);
773     }
774 
Analysis_Client_get_Sequence(Analysis_Client * aclient,gint sequence_id,gboolean revcomp_target)775 static Sequence *Analysis_Client_get_Sequence(Analysis_Client *aclient,
776                                               gint sequence_id,
777                                               gboolean revcomp_target){
778     register gchar *msg, *reply, *id, *def;
779     register SparseCache *cache;
780     register Sequence *seq = aclient->seq_cache[sequence_id];
781     register gint len, checksum;
782     register gchar **seqinfo_word;
783     if(seq){
784         if(revcomp_target)
785             return Sequence_revcomp(seq);
786         return Sequence_share(seq);
787         }
788     msg = g_strdup_printf("get info %d", sequence_id);
789     reply = Analysis_Client_send(aclient, msg, "seqinfo:", FALSE);
790     /**/
791     if(strncmp(reply, "seqinfo:", 8))
792         g_error("Failed to set info for target [%d]", sequence_id);
793     /* parse seqinfo for <len> <checksum> <id> and [<def>] */
794     seqinfo_word = g_strsplit(reply+9, " ", 4);
795     len = atoi(seqinfo_word[0]);
796     checksum = atoi(seqinfo_word[1]);
797     id = seqinfo_word[2];
798     def = seqinfo_word[3];
799     /* Strip any trailing newlines */
800     if(id[strlen(id)-1] == '\n')
801         id[strlen(id)-1] = '\0';
802     if(def && (def[strlen(def)-1] == '\n'))
803         def[strlen(def)-1] = '\0';
804     /**/
805     cache = Analysis_Client_get_SparseCache(aclient, sequence_id, len);
806     seq = Sequence_create_extmem(id, def, len,
807                        (aclient->server_alphabet->type == Alphabet_Type_DNA)
808                        ?Sequence_Strand_FORWARD:Sequence_Strand_UNKNOWN,
809                        aclient->server_alphabet, cache);
810     g_assert(!aclient->seq_cache[sequence_id]);
811     aclient->seq_cache[sequence_id] = seq;
812     g_strfreev(seqinfo_word);
813     SparseCache_destroy(cache);
814     g_free(reply);
815     g_free(msg);
816     if(revcomp_target)
817         return Sequence_revcomp(seq);
818     return Sequence_share(seq);
819     }
820 
821 typedef enum {
822    Analysis_Client_HSP_TOKEN_BEGIN_SET,
823    Analysis_Client_HSP_TOKEN_END_SET,
824    Analysis_Client_HSP_TOKEN_INT,
825    Analysis_Client_HSP_TOKEN_FINISH
826 } Analysis_Client_HSP_TOKEN;
827 
Analysis_Client_get_hsp_token(gchar * str,gint * pos,gint * intval)828 static Analysis_Client_HSP_TOKEN Analysis_Client_get_hsp_token(gchar *str,
829                                                    gint *pos, gint *intval){
830     register gint ch;
831     gchar *endptr;
832     g_assert(str);
833     while((ch = str[(*pos)])){
834         switch(ch){
835             case 'h':
836                 if(strncmp(str+(*pos), "hspset:", 7))
837                     g_error("Unexpected string in HSPset list");
838                 (*pos) += 7;
839                 if(strncmp(str+(*pos), " empty\n", 7)){
840                     return Analysis_Client_HSP_TOKEN_BEGIN_SET;
841                 } else {
842                     (*pos) += 7;
843                     break;
844                     }
845             case ' ':
846                 (*pos)++;
847                 break;
848             case '\n':
849                 (*pos)++;
850                 return Analysis_Client_HSP_TOKEN_END_SET;
851             default:
852                 if(isdigit(ch)){
853                     (*intval) = strtol(str+(*pos), &endptr, 10);
854                     (*pos) = endptr-str;
855                     return Analysis_Client_HSP_TOKEN_INT;
856                 } else {
857                     g_error("Unexpected character [%d] in HSPset list", ch);
858                     }
859                 break;
860             }
861         }
862     return Analysis_Client_HSP_TOKEN_FINISH;
863     }
864 
Analysis_Client_get_hsp_sets(Analysis_Client * aclient,Analysis * analysis,gboolean swap_chains,gboolean revcomp_target)865 static void Analysis_Client_get_hsp_sets(Analysis_Client *aclient,
866                                          Analysis *analysis,
867                                          gboolean swap_chains,
868                                          gboolean revcomp_target){
869     register gchar *reply = Analysis_Client_send(aclient,
870                                                 "get hsps", "hspset:", TRUE);
871     gint pos = 0, intval = 0;
872     register gboolean ok = TRUE;
873     register Analysis_Client_HSP_TOKEN token;
874     register gint target_id = -1, query_pos = -1, target_pos = -1, length;
875     register Comparison *comparison = NULL;
876     register Sequence *target = NULL;
877     register Match_Type match_type
878            = Match_Type_find(aclient->curr_query->alphabet->type,
879                              aclient->server_alphabet->type, FALSE);
880     /* FIXME: use Match_Type_find with translate_both for codon alignments */
881     register HSPset *hsp_set = NULL;
882     do {
883         token = Analysis_Client_get_hsp_token(reply, &pos, &intval);
884         switch(token){
885             case Analysis_Client_HSP_TOKEN_BEGIN_SET:
886                 break;
887             case Analysis_Client_HSP_TOKEN_INT:
888                 if(target_id == -1){
889                     target_id = intval;
890                     target = Analysis_Client_get_Sequence(aclient,
891                                                target_id, revcomp_target);
892                     g_assert(!comparison);
893                     g_assert(aclient->curr_query);
894                     /* FIXME: temp : make work with other HSP types */
895                     /* FIXME: should take necessary HSP params from server */
896                     if(swap_chains)
897                         comparison = Comparison_create(analysis->comparison_param,
898                                                    target, aclient->curr_query);
899                     else
900                         comparison = Comparison_create(analysis->comparison_param,
901                                                    aclient->curr_query, target);
902                     /* FIXME: should ensure that the HSPset is created
903                      * without a horizon
904                      */
905                     Sequence_destroy(target);
906                     target = NULL;
907                 } else if(query_pos == -1){
908                     query_pos = intval;
909                 } else if(target_pos == -1){
910                     target_pos = intval;
911                 } else {
912                     length = intval;
913                     g_assert(comparison);
914                     /*
915                     g_message("adding one [%d,%d,%d]", query_pos, target_pos,
916                             length);
917                     */
918                     /* FIXME: need fix to work with for other match types */
919                     switch(match_type){
920                         case Match_Type_DNA2DNA:
921                             hsp_set = comparison->dna_hspset;
922                             break;
923                         case Match_Type_PROTEIN2PROTEIN:
924                         case Match_Type_PROTEIN2DNA:
925                         case Match_Type_DNA2PROTEIN:
926                             hsp_set = comparison->protein_hspset;
927                             break;
928                         default:
929                             g_error("Match_Type not supported [%s]",
930                                     Match_Type_get_name(match_type));
931                         }
932                     g_assert(hsp_set);
933                     if(swap_chains)
934                         HSPset_add_known_hsp(hsp_set, target_pos, query_pos,
935                                              length);
936                     else
937                         HSPset_add_known_hsp(hsp_set, query_pos, target_pos,
938                                              length);
939                     query_pos = target_pos = -1;
940                     }
941                 break;
942             case Analysis_Client_HSP_TOKEN_END_SET:
943                 /* FIXME: needs to work for other hsp_set types */
944                 Comparison_finalise(comparison);
945                 if(Comparison_has_hsps(comparison)){
946 #if 0
947                     /* FIXME: move to use scan_query swap in report */
948                     if(swap_chains)
949                         Comparison_swap(comparison);
950 #endif /* 0 */
951                     Analysis_report_func(comparison, analysis);
952                     }
953                 Comparison_destroy(comparison);
954                 comparison = NULL;
955                 target_id = -1;
956                 break;
957             case Analysis_Client_HSP_TOKEN_FINISH:
958                 ok = FALSE;
959                 break;
960             }
961     } while(ok);
962     g_assert(target_id == -1);
963     /* format: <HSPSET> <TARGETID> { <QSTART TSTART LEN> } */
964     /* tokens <BEGIN_HSPSET> <INT> <ENDHSPSET> */
965     g_free(reply);
966     return;
967     }
968 /* FIXME: only working for single hspset comparisons */
969 
Analysis_Client_process_query(Analysis_Client * aclient,Analysis * analysis,Sequence * query,gboolean swap_chains,gboolean revcomp_target,gint priority)970 static void Analysis_Client_process_query(Analysis_Client *aclient,
971                                           Analysis *analysis,
972                                           Sequence *query,
973                                           gboolean swap_chains,
974                                           gboolean revcomp_target,
975                                           gint priority){
976     Analysis_Client_set_query(aclient, query);
977     Analysis_Client_get_hsp_sets(aclient, analysis, swap_chains, revcomp_target);
978     /* Revcomp query if DNA */
979     if(aclient->curr_query->alphabet->type == Alphabet_Type_DNA){
980         Analysis_Client_revcomp_query(aclient);
981         Analysis_Client_get_hsp_sets(aclient, analysis,
982                                      swap_chains, revcomp_target);
983         }
984     return;
985     }
986 
Analysis_Client_process(Analysis_Client * aclient,Analysis * analysis,gboolean swap_chains,gint priority)987 static void Analysis_Client_process(Analysis_Client *aclient, Analysis *analysis,
988                                     gboolean swap_chains, gint priority){
989     register FastaDB_Seq *fdbs;
990     /* FIXME: need to check for appropriate database type */
991     while((fdbs = FastaDB_next(aclient->probe_fdb, FastaDB_Mask_ALL))){
992         Analysis_Client_process_query(aclient, analysis, fdbs->seq,
993                                       swap_chains, FALSE, priority);
994         /* Revcomp target if protein vs DNA or translate_both */
995         if(((aclient->curr_query->alphabet->type == Alphabet_Type_PROTEIN)
996           && (aclient->server_alphabet->type == Alphabet_Type_DNA))
997            || analysis->gam->translate_both){
998             Analysis_Client_revcomp_target(aclient);
999             Analysis_Client_process_query(aclient, analysis,
1000                                           fdbs->seq, swap_chains, TRUE,
1001                                           priority);
1002             Analysis_Client_revcomp_target(aclient);
1003             }
1004         FastaDB_Seq_destroy(fdbs);
1005         }
1006     return;
1007     }
1008 
1009 /**/
1010 
Analysis_Server_create(Analysis_Builder * ab,gchar * name,gint priority)1011 static Analysis_Server *Analysis_Server_create(Analysis_Builder *ab,
1012                                                gchar *name, gint priority){
1013     register Analysis_Server *as = g_new(Analysis_Server, 1);
1014     as->name = g_strdup(name);
1015     as->ab = ab;
1016     as->priority = priority;
1017     return as;
1018     }
1019 
Analysis_Server_destroy(Analysis_Server * as)1020 static void Analysis_Server_destroy(Analysis_Server *as){
1021     g_free(as->name);
1022     g_free(as);
1023     return;
1024     }
1025 
1026 /**/
1027 
Analysis_Builder_create(GPtrArray * path_list,Analysis * analysis,gint verbosity)1028 static Analysis_Builder *Analysis_Builder_create(GPtrArray *path_list,
1029                                                  Analysis *analysis,
1030                                                  gint verbosity){
1031     register Analysis_Builder *ab;
1032     register gint i;
1033     register Analysis_Client *ac;
1034     register Analysis_Server *as;
1035     g_assert(path_list->len);
1036     ac = Analysis_Client_create(path_list->pdata[0], analysis->verbosity);
1037     if(!ac)
1038         return NULL;
1039     /**/
1040     ab = g_new(Analysis_Builder, 1);
1041     ab->verbosity = verbosity;
1042     ab->server_list = g_ptr_array_new();
1043     ab->server_type = ac->server_alphabet->type;
1044     ab->probe_fdb = NULL;
1045     ab->analysis = analysis;
1046     Analysis_Client_destroy(ac);
1047     for(i = 0; i < path_list->len; i++){
1048         as = Analysis_Server_create(ab, path_list->pdata[i], i);
1049         g_ptr_array_add(ab->server_list, as);
1050         }
1051     return ab;
1052     }
1053 
Analysis_Builder_destroy(Analysis_Builder * ab)1054 static void Analysis_Builder_destroy(Analysis_Builder *ab){
1055     register gint i;
1056     register Analysis_Server *as;
1057     for(i = 0; i < ab->server_list->len; i++){
1058         as = ab->server_list->pdata[i];
1059         Analysis_Server_destroy(as);
1060         }
1061     g_ptr_array_free(ab->server_list, TRUE);
1062     if(ab->probe_fdb)
1063         FastaDB_close(ab->probe_fdb);
1064     g_free(ab);
1065     return;
1066     }
1067 
Analysis_Server_run(gpointer data)1068 static void Analysis_Server_run(gpointer data){
1069     register Analysis_Server *server = data;
1070     register Analysis_Client *client
1071                 = Analysis_Client_create(server->name,
1072                                          server->ab->analysis->verbosity);
1073     if(!client)
1074         g_error("Could not connect to server [%s]", server->name);
1075     Analysis_Client_set_param(client, server->ab->analysis->gam);
1076     Analysis_Client_set_probe_fdb(client, server->ab->probe_fdb);
1077     Analysis_Client_process(client, server->ab->analysis,
1078                             server->ab->swap_chains, server->priority);
1079     /**/
1080     Analysis_Client_destroy(client);
1081     return;
1082     }
1083 
Analysis_Builder_process(Analysis_Builder * ab,Analysis * analysis,gboolean swap_chains)1084 static void Analysis_Builder_process(Analysis_Builder *ab,
1085                                      Analysis *analysis, gboolean swap_chains){
1086     register gint i;
1087     register Analysis_Server *as;
1088     ab->swap_chains = swap_chains;
1089     for(i = 0; i < ab->server_list->len; i++){
1090         as = ab->server_list->pdata[i];
1091 #ifdef USE_PTHREADS
1092         g_assert(analysis->job_queue);
1093         JobQueue_submit(analysis->job_queue, Analysis_Server_run, as, i);
1094 #else /* USE_PTHREADS */
1095         Analysis_Server_run(as);
1096 #endif /* USE_PTHREADS */
1097         }
1098     return;
1099     }
1100 
Analysis_Builder_set_probe_fdb(Analysis_Builder * ab,FastaDB * probe_fdb)1101 static void Analysis_Builder_set_probe_fdb(Analysis_Builder *ab,
1102                                            FastaDB *probe_fdb){
1103     g_assert(!ab->probe_fdb);
1104     ab->probe_fdb = FastaDB_share(probe_fdb);
1105     return;
1106     }
1107 
1108 /**/
1109 
Analysis_path_list_destroy(GPtrArray * path_list)1110 static void Analysis_path_list_destroy(GPtrArray *path_list){
1111     register gint i;
1112     register gchar *path;
1113     for(i = 0; i < path_list->len; i++){
1114         path = path_list->pdata[i];
1115         g_free(path);
1116         }
1117     g_ptr_array_free(path_list, TRUE);
1118     return;
1119     }
1120 
Analysis_FOSN_expand_path_list(GPtrArray * path_list)1121 static GPtrArray *Analysis_FOSN_expand_path_list(GPtrArray *path_list){
1122     register gint i;
1123     register gchar *path, *npath;
1124     struct stat buf;
1125     register gboolean expanded_list = FALSE;
1126     register GPtrArray *npath_list = g_ptr_array_new();
1127     register FILE *fp;
1128     register LineParse *lp;
1129     for(i = 0; i < path_list->len; i++){
1130         path = path_list->pdata[i];
1131         if((stat(path, &buf))              /* Cannot read file (ie. servername) */
1132         || (S_ISDIR(buf.st_mode))          /* Is directory */
1133         || (FastaDB_file_is_fasta(path))){ /* 1st non-whitespace char is '>' */
1134             npath = g_strdup(path);
1135             g_ptr_array_add(npath_list, npath);
1136         } else { /* Assume to be fosn */
1137             fp = fopen(path, "r");
1138             if(!fp)
1139                 g_error("Could not open FOSN file [%s]", path);
1140             lp = LineParse_create(fp);
1141             while(LineParse_line(lp) != EOF){
1142                 g_strstrip(lp->line->str); /* strip whitespace */
1143                 if(lp->line->str[0]){
1144                     npath = g_strdup(lp->line->str);
1145                     g_ptr_array_add(npath_list, npath);
1146                     expanded_list = TRUE;
1147                     }
1148                 }
1149             LineParse_destroy(lp);
1150             fclose(fp);
1151             }
1152         }
1153     if(!expanded_list){
1154         for(i = 0; i < npath_list->len; i++){
1155             npath = npath_list->pdata[i];
1156             g_free(npath);
1157             }
1158         g_ptr_array_free(npath_list, TRUE);
1159         return NULL;
1160         }
1161     return npath_list;
1162     }
1163 /* If members of the path list are files
1164  * where the first non-whitespace character is not '>',
1165  * it is assumed to be a FOSN, and parsed to expand the path_list.
1166  */
1167 
Analysis_create(GPtrArray * query_path_list,Alphabet_Type query_type,gint query_chunk_id,gint query_chunk_total,GPtrArray * target_path_list,Alphabet_Type target_type,gint target_chunk_id,gint target_chunk_total,gint verbosity)1168 Analysis *Analysis_create(
1169               GPtrArray *query_path_list, Alphabet_Type query_type,
1170               gint query_chunk_id, gint query_chunk_total,
1171               GPtrArray *target_path_list, Alphabet_Type target_type,
1172               gint target_chunk_id, gint target_chunk_total,
1173               gint verbosity){
1174     register Analysis *analysis = g_new0(Analysis, 1);
1175     register FastaDB *query_fdb = NULL, *target_fdb = NULL,
1176                      *seeder_query_fdb, *seeder_target_fdb;
1177     register Match *match;
1178     Match *dna_match = NULL, *protein_match = NULL,
1179           *codon_match = NULL;
1180     register HSP_Param *dna_hsp_param, *protein_hsp_param,
1181                        *codon_hsp_param;
1182     register Match_ArgumentSet *mas = Match_ArgumentSet_create(NULL);
1183     register gboolean use_horizon;
1184     register GPtrArray *expanded_query_path_list = NULL,
1185                        *expanded_target_path_list = NULL;
1186     g_assert(query_path_list);
1187     g_assert(target_path_list);
1188     g_assert(query_path_list->len);
1189     g_assert(target_path_list->len);
1190     analysis->aas = Analysis_ArgumentSet_create(NULL);
1191     analysis->verbosity = verbosity;
1192     analysis->job_queue = JobQueue_create(analysis->aas->thread_count);
1193     /* Expand FOSN paths */
1194     expanded_query_path_list = Analysis_FOSN_expand_path_list(
1195                                                     query_path_list);
1196     expanded_target_path_list = Analysis_FOSN_expand_path_list(
1197                                                     target_path_list);
1198     if(expanded_query_path_list)
1199         query_path_list = expanded_query_path_list;
1200     if(expanded_target_path_list)
1201         target_path_list = expanded_target_path_list;
1202     /**/
1203     analysis->query_builder = Analysis_Builder_create(query_path_list,
1204                                                      analysis, verbosity);
1205     analysis->target_builder = Analysis_Builder_create(target_path_list,
1206                                                      analysis, verbosity);
1207     /**/
1208     if(query_type == Alphabet_Type_UNKNOWN){
1209         if(analysis->query_builder){
1210             query_type = analysis->query_builder->server_type;
1211         } else {
1212             query_type = FastaDB_guess_type(
1213                               (gchar*)query_path_list->pdata[0]);
1214             if(verbosity > 1)
1215                 g_message("Guessed query type [%s]",
1216                         Alphabet_Type_get_name(query_type));
1217             }
1218         }
1219     if(target_type == Alphabet_Type_UNKNOWN){
1220         if(analysis->target_builder){
1221             target_type = analysis->target_builder->server_type;
1222         } else {
1223             target_type = FastaDB_guess_type(
1224                               (gchar*)target_path_list->pdata[0]);
1225             if(verbosity > 1)
1226                 g_message("Guessed target type [%s]",
1227                         Alphabet_Type_get_name(target_type));
1228             }
1229         }
1230     g_assert((query_type == Alphabet_Type_DNA)
1231            ||(query_type == Alphabet_Type_PROTEIN));
1232     g_assert((target_type == Alphabet_Type_DNA)
1233            ||(target_type == Alphabet_Type_PROTEIN));
1234     if(verbosity > 1)
1235         g_message("Creating analysis with query[%s] target[%s]",
1236                 Alphabet_Type_get_name(query_type),
1237                 Alphabet_Type_get_name(target_type));
1238     analysis->gam = GAM_create(query_type, target_type,
1239                                mas->dna_submat,
1240                                mas->protein_submat,
1241                                mas->translate,
1242                                analysis->aas->use_exhaustive,
1243                                verbosity);
1244     /**/
1245     Analysis_find_matches(analysis, &dna_match, &protein_match,
1246                                     &codon_match);
1247     match = dna_match;
1248     if(!match)
1249         match = protein_match;
1250     if(!match)
1251         match = codon_match;
1252     g_assert(match);
1253     if(!analysis->query_builder)
1254         query_fdb = FastaDB_open_list_with_limit(query_path_list,
1255                 match->query->alphabet, query_chunk_id, query_chunk_total);
1256     if(!analysis->target_builder)
1257        target_fdb = FastaDB_open_list_with_limit(target_path_list,
1258                match->target->alphabet, target_chunk_id, target_chunk_total);
1259     if(analysis->aas->use_exhaustive){
1260         if(analysis->query_builder || analysis->target_builder) /* FIXME: ni */
1261             g_error("Exhaustive alignment against server not implemented");
1262         analysis->fasta_pipe = FastaPipe_create(
1263                                   query_fdb, target_fdb,
1264                                   Analysis_FastaPipe_Pair_init_func,
1265                                   Analysis_FastaPipe_Pair_prep_func,
1266                                   Analysis_FastaPipe_Pair_term_func,
1267                                   Analysis_FastaPipe_Pair_query_func,
1268                                   Analysis_FastaPipe_Pair_target_func,
1269                                   FastaDB_Mask_ALL,
1270                                   analysis->gam->translate_both,
1271                                   analysis->aas->use_revcomp);
1272         analysis->curr_query = NULL;
1273     } else { /* Not exhaustive */
1274         use_horizon = (analysis->aas->use_bigseq
1275                     || analysis->query_builder
1276                     || analysis->target_builder)?FALSE:TRUE;
1277         dna_hsp_param = dna_match
1278                       ? HSP_Param_create(dna_match, use_horizon)
1279                       : NULL;
1280         protein_hsp_param = protein_match
1281                           ? HSP_Param_create(protein_match, use_horizon)
1282                           : NULL;
1283         codon_hsp_param = codon_match
1284                         ? HSP_Param_create(codon_match, use_horizon)
1285                         : NULL;
1286         analysis->comparison_param = Comparison_Param_create(
1287                 query_type, target_type,
1288                 dna_hsp_param, protein_hsp_param, codon_hsp_param);
1289         if(dna_hsp_param)
1290             HSP_Param_destroy(dna_hsp_param);
1291         if(protein_hsp_param)
1292             HSP_Param_destroy(protein_hsp_param);
1293         if(codon_hsp_param)
1294             HSP_Param_destroy(codon_hsp_param);
1295         /* Raise HSP thresholds to score if ungapped */
1296         if(!Model_Type_is_gapped(analysis->gam->gas->type)){
1297             if(analysis->comparison_param->dna_hsp_param
1298             && (analysis->comparison_param->dna_hsp_param->threshold
1299               < analysis->gam->gas->threshold))
1300                 analysis->comparison_param->dna_hsp_param->threshold
1301                     = analysis->gam->gas->threshold;
1302             if(analysis->comparison_param->protein_hsp_param
1303             && (analysis->comparison_param->protein_hsp_param->threshold
1304               < analysis->gam->gas->threshold))
1305                 analysis->comparison_param->protein_hsp_param->threshold
1306                     = analysis->gam->gas->threshold;
1307             if(analysis->comparison_param->codon_hsp_param
1308             && (analysis->comparison_param->codon_hsp_param->threshold
1309               < analysis->gam->gas->threshold))
1310                 analysis->comparison_param->codon_hsp_param->threshold
1311                     = analysis->gam->gas->threshold;
1312             }
1313         /* Don't need HSP horizon for bigseq comparison */
1314         if(analysis->query_builder || analysis->target_builder){
1315             if(analysis->query_builder && analysis->target_builder)
1316                 g_error("Server vs server comparison not impelemented");
1317             analysis->fasta_pipe = NULL;
1318             if(analysis->query_builder){
1319                 Analysis_Builder_set_probe_fdb(analysis->query_builder,
1320                                               target_fdb);
1321             } else {
1322                 g_assert(analysis->target_builder);
1323                 Analysis_Builder_set_probe_fdb(analysis->target_builder,
1324                                                query_fdb);
1325                 }
1326         } else {
1327             if(analysis->aas->use_bigseq){
1328                 analysis->bsam = BSAM_create(analysis->comparison_param,
1329                         analysis->aas->saturate_threshold,
1330                         verbosity);
1331                 analysis->fasta_pipe = FastaPipe_create(
1332                                       query_fdb, target_fdb,
1333                                       Analysis_FastaPipe_Pair_init_func,
1334                                       Analysis_FastaPipe_Pair_prep_func,
1335                                       Analysis_FastaPipe_Pair_term_func,
1336                                       Analysis_FastaPipe_Pair_query_func,
1337                                       Analysis_FastaPipe_Pair_target_func,
1338                                       FastaDB_Mask_ALL,
1339                                       analysis->gam->translate_both,
1340                                       analysis->aas->use_revcomp);
1341                 analysis->curr_query = NULL;
1342             } else { /* Use Seeder */
1343                 analysis->scan_query = Analysis_decide_scan_query(query_fdb,
1344                                                         target_fdb,
1345                                                  analysis->aas->force_scan);
1346                 if(verbosity > 1)
1347                     g_message("Applying FSM scan to [%s]",
1348                               analysis->scan_query?"query":"target");
1349                 /* Swap paths and types
1350                  * for query and target when scan on query
1351                  */
1352                 if(analysis->scan_query){
1353                     seeder_query_fdb = target_fdb;
1354                     seeder_target_fdb = query_fdb;
1355                 } else {
1356                     seeder_query_fdb = query_fdb;
1357                     seeder_target_fdb = target_fdb;
1358                     }
1359                 analysis->curr_seeder = NULL;
1360                 analysis->fasta_pipe = FastaPipe_create(
1361                     seeder_query_fdb, seeder_target_fdb,
1362                     Analysis_FastaPipe_Seeder_init_func,
1363                     Analysis_FastaPipe_Seeder_prep_func,
1364                     Analysis_FastaPipe_Seeder_term_func,
1365                     Analysis_FastaPipe_Seeder_query_func,
1366                     Analysis_FastaPipe_Seeder_target_func,
1367                     FastaDB_Mask_ALL, analysis->gam->translate_both,
1368                     analysis->aas->use_revcomp);
1369                 }
1370             }
1371         }
1372     if(query_fdb)
1373         FastaDB_close(query_fdb);
1374     if(target_fdb)
1375         FastaDB_close(target_fdb);
1376     /**/
1377     if(expanded_query_path_list)
1378         Analysis_path_list_destroy(expanded_query_path_list);
1379     if(expanded_target_path_list)
1380         Analysis_path_list_destroy(expanded_target_path_list);
1381     return analysis;
1382     }
1383 
Analysis_destroy(Analysis * analysis)1384 void Analysis_destroy(Analysis *analysis){
1385     JobQueue_destroy(analysis->job_queue);
1386     if(analysis->fasta_pipe)
1387         FastaPipe_destroy(analysis->fasta_pipe);
1388     if(analysis->curr_query)
1389         FastaDB_Seq_destroy(analysis->curr_query);
1390     if(analysis->curr_seeder)
1391         Seeder_destroy(analysis->curr_seeder);
1392     if(analysis->bsam)
1393         BSAM_destroy(analysis->bsam);
1394     if(analysis->comparison_param)
1395         Comparison_Param_destroy(analysis->comparison_param);
1396     /*
1397     if(analysis->query_ac)
1398         Analysis_Client_destroy(analysis->query_ac);
1399     if(analysis->target_ac)
1400         Analysis_Client_destroy(analysis->target_ac);
1401     */
1402     if(analysis->query_builder)
1403         Analysis_Builder_destroy(analysis->query_builder);
1404     if(analysis->target_builder)
1405         Analysis_Builder_destroy(analysis->target_builder);
1406     GAM_destroy(analysis->gam);
1407     g_free(analysis);
1408     return;
1409     }
1410 
Analysis_process(Analysis * analysis)1411 void Analysis_process(Analysis *analysis){
1412     if(analysis->query_builder){
1413         Analysis_Builder_process(analysis->query_builder, analysis, TRUE);
1414     } else if(analysis->target_builder){
1415         Analysis_Builder_process(analysis->target_builder, analysis, FALSE);
1416     } else {
1417         while(FastaPipe_process(analysis->fasta_pipe, analysis));
1418         }
1419     if(analysis->job_queue)
1420         JobQueue_complete(analysis->job_queue);
1421     GAM_report(analysis->gam);
1422     return;
1423     }
1424 
1425 /**/
1426 
1427