1 #ifdef WIN32
2 #include <windows.h>
3 #else
4 #include <sys/times.h>
5 #endif
6 #include "seaview.h"
7 #include "treedraw.h"
8 #include "tbe.h"
9 #include <FL/Fl_Round_Button.H>
10 #include <FL/Fl_Check_Button.H>
11 #include <FL/Fl_Select_Browser.H>
12 #include <FL/Fl_Return_Button.H>
13 #include <FL/Fl_Int_Input.H>
14 #include <FL/Fl_Float_Input.H>
15 #include <FL/Fl_Value_Slider.H>
16 #include <FL/Fl.H>
17 #include <FL/fl_ask.H>
18 #ifdef __APPLE__
19 #include <FL/x.H> // for fl_mac_os_version
20 #endif
21
22 extern "C" {
23 #include "phyml_util.h"
24 }
25
26 typedef enum {observed_pdist, Poisson_pdist, Kimura_pdist} protein_distances;
27 typedef enum {observed_ndist, Jukes_Cantor, Kimura2P, HKY, LogDet, Ka, Ks} nucleotide_distances;
28 typedef enum {more_thorough, less_thorough, rearrange_best_tree} dnapars_S_option;
29
30 allseq *view_to_allseq(SEA_VIEW *view, int remove_all_gaps);
31 void gaps_as_unknown_states(allseq *phyml_seqs, int protein);
32 void free_after_view_to_allseq(allseq *phyml_seqs);
33 //void initialize_random_series(void);
34 extern "C" {
35 double next_random(void);
36 }
37 void bootstrap_weights(allseq *seqs, int use_codons);
38 void savedistinfile(matrix *mat, const char *dirname, const char *outfname);
39 matrix *calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess);
40 void phyml_dialog(SEA_VIEW *view);
41 static void change_searching_callback(Fl_Widget *ob, void *data);
42 static void cancel_callback(Fl_Widget *ob, void *data);
43 static void model_callback(Fl_Widget *ob, void *data);
44 void run_phyml_callback(Fl_Widget *ob, void *data);
45 void distance_method_dialog(SEA_VIEW *view);
46 matrix *run_calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess, int no_gui);
47 char *run_distance_method(SEA_VIEW *view, int distkind, int remove_all_gaps, int use_bionj, int use_bootstrap,
48 int replicates, Fl_Box *box, const char *distance_name, int using_kaks,
49 const char* distancefname, const char *user_tree, int no_gui, void (*alert)(const char*, ...), int keep_b_trees, int compute_tbe );
50 char *bootstrap_reformat(char *tree, int replicates);
51 matrix *Obs_Dist(allseq *data, model *mod);
52 matrix *Kimura_p_Dist(allseq *data);
53 char *alrt_reformat(char *tree);
54 matrix *K80_dist_nogamma(allseq *data);
55 matrix *HKY_dist(allseq *data);
56 double hky(char* seq1, char* seq2, int lg, int *wght);
57 int freq_obs(char* seq1, char* seq2, double* freq, int lgseq, int *wght);
58 int informative(char** sequ, int nb);
59 void jumble_fct(char** seq, char** name, int notu);
60 static char *run_parsimony_method(SEA_VIEW *view, int nogaps, const char *gapsunknown, int nreplicates, int njumbles,
61 int maxbest, int user_tree, Fl_Box *w_count, dnapars_S_option s_option,
62 const char **pprogname, float consensus_threshold, int no_gui, char **p_bootstrap_trees, void (*alert)(const char *,...) );
63 void parsimony_dialog(SEA_VIEW *view);
64 char *parsimony_with_jumble(char **pars_seqs, char **pars_names, int n_otu, int njumbles, int protein, int *psteps,
65 int *pcount, int maxbest, Fl_Widget *w_count, dnapars_S_option s_option, int no_gui, float consensus_threshold);
66 static char *parsimony_with_bootstrap(allseq *tmpallseq, char **pars_seqs, char **pars_names, int n_otu, int njumbles,
67 int protein, int *psteps, int maxbest, int replicates, Fl_Box *w_count, int *preplicates_done, dnapars_S_option s_option, int no_gui,
68 float consensus_threshold, int *pcount_best, char **p_bootstrap_trees);
69 double *vector(long nl, long nh);
70 void free_vector(double *v, long nl, long nh);
71 int ludcmp(double **a, int n, int *indx, double *d);
72 double det4(double mat[4][4]);
73 double logdet(char* seq1, char* seq2, int lg, int *wght);
74 matrix *LOGDET_dist(allseq *data);
75 char *put_names_back_in_tree(char *oldtree, char **names);
76 char *replace_tree_names_by_rank(const char *tree, char **names, int notu);
77 void command_line_phylogeny(int argc, char **argv);
78 void save_bootstrap_replicates(const char *fname, int replicates, SEA_VIEW *view);
79
80
81 extern void interrupt_callback(Fl_Widget *ob, void *data);
82 extern int save_phylip_file(const char *fname, char **seq,
83 char **seqname, int totseqs, int *eachlength, region *region_used,
84 int *sel_seqs, int tot_sel_seqs, int phylipwidnames);
85 extern int run_external_prog_in_pseudoterm(char *cmd, const char *dialogfname, const char *label);
86 extern int init_add_bootstrap(char *full_tree);
87 extern char *get_res_value(const char *name, const char *def_value);
88 extern int process_one_replicate(char *one_tree, float w);
89 extern char *finish_add_bootstrap(int replicates);
90 extern int compare_newick_with_names(const char *tree, char **names, int notu, char **pname);
91 extern const char *make_binary_or_unrooted(char *arbre);
92 extern int save_resources(void);
93 extern int set_res_value(const char *name, const char *value);
94 extern char *create_tmp_filename(void);
95 extern void delete_tmp_filename(const char *base_fname);
96 extern int is_view_valid(SEA_VIEW *view);
97 char *parsimony(char** seq, char** seqname, int notu, int njumbles, int* steps, char* toevaluate, int arg_maxtrees,
98 int *bt_weights, bool protein, dnapars_S_option s_option, int no_gui, Fl_Widget *w_count);
99 extern int fl_create_thread(void (*f) (void *), void* p, unsigned);
100 extern char *argname(int argc, char *argv[], const char *arg);
101 extern float argval(int argc, char *argv[], const char *arg, float defval);
102 extern bool isarg(int argc, char *argv[], const char *arg);
103 extern SEA_VIEW *cmdline_read_input_alignment(int argc, char **argv);
104 extern void init_consensus_calculation(int n);
105 extern int process_one_consensus_item(char *one_tree, char **seqnames);
106 extern int after_consensus_items(float threshold, int item_count, int ***prows);
107 extern char *compute_consensus_tree(char **seqnames, int **rows, int branch_count);
108 extern "C" {
109 void prefastlwl(void);
110 void loadrl(void);
111 int fastlwl(char **seq, int nbseq, int lgseq, double **ka, double **ks,
112 double **vka, double **vks, int* sat1, int* sat2, int *wght);
113 char* dnapars(char** seq, char** seqname, int notu, int njumbles, int *jumble_no, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights, dnapars_S_option s_option);
114 char* protpars(char** seq, char** seqname, int notu, int njumbles, int *jumble_no, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights, dnapars_S_option s_option);
115 char *create_tmp_filename_from_C(void);
116 FILE *fl_fopen_from_C(const char *fname, const char *mode);
117 int fl_unlink_from_C(const char*fname);
118 void awake_from_C(void);
119 }
120
121 int tree_build_interrupted;
122
create_tmp_filename_from_C(void)123 char *create_tmp_filename_from_C(void)
124 {
125 return create_tmp_filename();
126 }
127
fl_fopen_from_C(const char * fname,const char * mode)128 FILE *fl_fopen_from_C(const char *fname, const char *mode)
129 {
130 return fl_fopen(fname, mode);
131 }
132
fl_unlink_from_C(const char * fname)133 int fl_unlink_from_C(const char*fname)
134 {
135 return fl_unlink(fname);
136 }
137
trees_callback(Fl_Widget * ob,void * data)138 void trees_callback(Fl_Widget *ob, void *data)
139 /* 0:Distance 1:PhyML 2:import
140 */
141 {
142 SEA_VIEW *view = (SEA_VIEW *)ob->user_data();
143 int reponse = ((Fl_Menu_*)ob)->mvalue() - view->menu_trees->get_menu();
144 if(reponse <= 2) {
145 if(view->tot_seqs < 3 || (view->tot_sel_seqs > 0 && view->tot_sel_seqs < 3) ) {
146 fl_alert("Can't draw tree for less than 3 sequences");
147 return;
148 }
149 for(int i = 0; i < view->tot_seqs; i++) {//check that no ,():; in sequence names
150 if(view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
151 if(strchr(view->seqname[i], '(') != NULL || strchr(view->seqname[i], ')') != NULL ||
152 strchr(view->seqname[i], ',') != NULL || strchr(view->seqname[i], ':') != NULL
153 || strchr(view->seqname[i], ';') != NULL) {
154 fl_alert("Sequence: %s\nTrees can't be built with any of (),:; in sequence names", view->seqname[i]);
155 return;
156 }
157 }
158 }
159 if(reponse == 0) {//Parsimony methods
160 for (int i = 0; i < view->tot_seqs; i++) {//check that no repeated sequence names
161 if (view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
162 for (int j = i+1; j < view->tot_seqs; j++) {
163 if (view->tot_sel_seqs > 0 && !view->sel_seqs[j]) continue;
164 if (strcmp(view->seqname[i], view->seqname[j]) == 0) {
165 fl_alert("The parsimony tree can't be computed if different sequences (#%d, #%d) have the same name: %s", i+1, j+1, view->seqname[i]);
166 return;
167 }
168 }
169 }
170 parsimony_dialog(view);
171 }
172 else if(reponse == 1) {//Distance methods
173 distance_method_dialog(view);
174 }
175 else if(reponse == 2) {//PhyML
176 phyml_dialog(view);
177 }
178 else if(reponse == 3) {//Import tree
179 Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
180 chooser->type(Fl_Native_File_Chooser::BROWSE_FILE); // let user browse a single file
181 chooser->title("Import treefile");
182 char *filename = run_and_close_native_file_chooser(chooser);
183 if(filename == NULL) return;
184 FILE *in = fopen(filename, "r");
185 fseek(in, 0, SEEK_END);
186 long l = ftell(in);
187 fseek(in, 0, SEEK_SET);
188 char *tree = (char *)malloc(l + 1);
189 char c, *p = tree;
190 while((c = fgetc(in)) != EOF) {
191 if(c != '\n' && c != '\r') *(p++) = c;
192 }
193 *p = 0;
194 fclose(in);
195 treedraw(tree, view, extract_filename(filename), FALSE);
196 }
197 else if(reponse == 4) {//New empty tree window
198 treedraw(NULL, view, NULL, FALSE);
199 }
200 else {
201 reponse = ((Fl_Menu_*)ob)->mvalue() - view->menu_trees->vitem(0);
202 char *p = view->trees[reponse];
203 Fl_Window *w = Fl::first_window();
204 while(w != NULL) {
205 const char *c = w->xclass();
206 if(c != NULL && strcmp(c, TREE_WINDOW) == 0 &&
207 strcmp( ((FD_nj_plot*)w->user_data())->trees, p) == 0) {
208 w->show();
209 return;
210 }
211 w = Fl::next_window(w);
212 }
213 char *tree = strdup(p);
214 treedraw(tree, view, ((Fl_Menu_*)ob)->mvalue()->label(), TRUE);
215 }
216 }
217
218
distance_callback(Fl_Widget * wgt,void * data)219 static void distance_callback(Fl_Widget *wgt, void *data)
220 {
221 if( ((Fl_Choice *)wgt)->value() >= Ka) ((Fl_Widget *)data)->deactivate();
222 else ((Fl_Widget *)data)->activate();
223 }
224
dist_b_cb(Fl_Widget * wgt,void * data)225 static void dist_b_cb(Fl_Widget *wgt, void *data)
226 {
227 Fl_Button **buttons = (Fl_Button **)data;
228 Fl_Button *me = (Fl_Button*)wgt;
229 for (int i=0; i<2; i++){
230 Fl_Button *target = buttons[i];
231 if (target){
232 if (me->value()) target->activate();
233 else {
234 target->deactivate();
235 target->value(0);
236 }
237 }
238 }
239 }
240
distance_method_dialog(SEA_VIEW * view)241 void distance_method_dialog(SEA_VIEW *view)
242 {
243 static int first = TRUE;
244 static int def_replicates = 100;
245 static int distance_choice = Jukes_Cantor;
246 static Fl_Window *w;
247 static Fl_Int_Input *replicates;
248 static Fl_Box *box;
249 static Fl_Choice *distance;
250 static Fl_Check_Button *ignore_all_gaps;
251 static Fl_Round_Button *nj;
252 static Fl_Round_Button *bionj;
253 static Fl_Round_Button *savetofile;
254 static Fl_Round_Button *usertree;
255 static Fl_Choice *treechoice;
256 static Fl_Check_Button *bootstrap;
257 static Fl_Button *interrupt;
258 static Fl_Return_Button *go;
259 static int started, maxchoice;
260 static Fl_Check_Button *keep_b_trees;
261 static Fl_Check_Button *compute_tbe;
262 if(first) {
263 first = FALSE;
264 w = new Fl_Window(270, 265);
265 w->label("Distance analysis");
266 w->set_modal();
267 nj = new Fl_Round_Button(3, 3, 60, 20, "NJ");
268 nj->type(FL_RADIO_BUTTON);
269 bionj = new Fl_Round_Button(nj->x() + nj->w() + 10, nj->y(), nj->w(), nj->h(), "BioNJ");
270 bionj->type(FL_RADIO_BUTTON);
271 savetofile = new Fl_Round_Button(bionj->x() + bionj->w() + 10, nj->y(), 110, nj->h(), "Save to File");
272 savetofile->type(FL_RADIO_BUTTON);
273 distance = new Fl_Choice(90, nj->y() + nj->h() + 15, 100, 20, "Distance");
274 distance->align(FL_ALIGN_LEFT);
275 ignore_all_gaps = new Fl_Check_Button(160, distance->y() + distance->h() + 5, 50, 20,
276 "Ignore all gap sites");
277 ignore_all_gaps->value(1);
278 ignore_all_gaps->align(FL_ALIGN_LEFT);
279 int y = ignore_all_gaps->y() + ignore_all_gaps->h() + 25;
280 bootstrap = new Fl_Check_Button(3, y, 80, 20, "Bootstrap");
281 replicates = new Fl_Int_Input(bootstrap->x() + bootstrap->w() + 5, y, 100, 20, "# of replicates");
282 replicates->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
283 char tmp[10];
284 sprintf(tmp, "%d", def_replicates);
285 replicates->value(tmp);
286
287 compute_tbe = new Fl_Check_Button(3, bootstrap->y() + bootstrap->h() + 5, 280, 20, "Transfer Bootstrap Expectation method");
288 compute_tbe->deactivate();
289
290 keep_b_trees = new Fl_Check_Button(3, compute_tbe->y() + compute_tbe->h() + 5, 180, 20, "Show bootstrap trees");
291 keep_b_trees->deactivate();
292
293 static Fl_Check_Button *buttons[2];
294 buttons[0]=compute_tbe;
295 buttons[1]=keep_b_trees;
296 bootstrap->callback(dist_b_cb, buttons);
297 bootstrap->when(FL_WHEN_CHANGED);
298
299 box = new Fl_Box(3, keep_b_trees->y() + keep_b_trees->h() + 5, 150, 20, "");
300 usertree = new Fl_Round_Button(nj->x(), box->y() + box->h() + 15, 85, nj->h(), "User tree:");
301 usertree->type(FL_RADIO_BUTTON);
302 treechoice = new Fl_Choice(usertree->x() + usertree->w(), usertree->y(), 155, 20, "Optimize branch lengths");
303 treechoice->align(FL_ALIGN_TOP|FL_ALIGN_RIGHT);
304 bionj->setonly();
305
306 interrupt = new Fl_Button(3, w->h() - 25, 70, 20, "");
307 interrupt->callback(interrupt_callback, &tree_build_interrupted);
308 go = new Fl_Return_Button(w->w() - 70 - 3, interrupt->y() , 70, 20, "Go");
309 go->callback(interrupt_callback, &started);
310 w->end();
311 w->callback(interrupt_callback, &tree_build_interrupted);
312 }
313 interrupt->label("Cancel");
314 distance->clear();
315 if(view->protein) {
316 distance->add("Observed");
317 distance->add("Poisson");
318 distance->add("Kimura");
319 maxchoice = Kimura_pdist;
320 ignore_all_gaps->activate();
321 }
322 else {
323 distance->add("Observed");
324 distance->add("J-C");
325 distance->add("K2P");
326 distance->add("HKY");
327 distance->add("LogDet");
328 distance->add("Ka");
329 distance->add("Ks");
330 maxchoice = Ks;
331 }
332 distance->value( FL_min(distance_choice, maxchoice) );
333 distance->callback(distance_callback, ignore_all_gaps);
334 keep_b_trees->value(0);
335 treechoice->clear();
336 vlength_menu *vmenu = view->menu_trees;
337 int count = view->menu_trees->vlength();
338 for (int i = 0; i < count; i++)
339 treechoice->add( vmenu->vitem(i)->label() );
340 treechoice->value(0);
341 treechoice->deactivate();
342 bootstrap->activate();
343 if (usertree->value()) bionj->setonly();
344 if (count) usertree->activate();
345 else usertree->deactivate();
346 started = tree_build_interrupted = 0;
347 box->label("");
348 go->show();
349 w->show();
350 while(!started && !tree_build_interrupted) {
351 Fl_Widget *o = Fl::readqueue();
352 if (!o) Fl::wait();
353 else if(o == usertree || o == nj || o == bionj || o == savetofile) {
354 if( usertree->value() || savetofile->value() ) {
355 bootstrap->value(0);
356 bootstrap->deactivate();
357 bootstrap->do_callback();
358 }
359 else {
360 bootstrap->activate();
361 }
362 if( usertree->value() ) {
363 treechoice->activate();
364 }
365 else {
366 treechoice->deactivate();
367 }
368 }
369 }
370 distance_choice = distance->value();
371 int using_kaks = ((!view->protein) && (distance_choice == Ka || distance_choice == Ks) );
372 if(!tree_build_interrupted) {
373 sscanf(replicates->value(), "%d", &def_replicates);
374 interrupt->label("Interrupt");
375 if(bootstrap->value()) {
376 box->label("Count: 0");
377 }
378 go->hide();
379 Fl::flush();
380 char *tree = run_distance_method(view, distance_choice, ignore_all_gaps->value(), bionj->value(),
381 bootstrap->value(), def_replicates, box, distance->text(), using_kaks,
382 savetofile->value()?"":NULL, usertree->value() ? view->trees[treechoice->value()] : NULL,
383 false, fl_alert, keep_b_trees->value(), compute_tbe->value());
384 const char *choice = bionj->value()? "BioNJ_tree" : "NJ_tree";
385 const char *n = extract_filename(view->masename);
386 char *title = NULL;
387 if (usertree->value()) { title = new char[20]; strcpy(title, "Least_squares_brl");}
388 else if (n) {
389 char *q = (char*)strchr(n, '.');
390 if (q) *q = 0;
391 title = new char[strlen(n) + 12];
392 sprintf(title, "%s-%s", n, choice);
393 if (q) *q = '.';
394 }
395 Fl_Window *treew = NULL;
396 if (tree) treew = treedraw(tree, view, n || usertree->value() ? title:choice, FALSE);
397 #if !defined(__APPLE__)
398 if (keep_b_trees->value() && treew) {
399 treew->position(treew->x() + 40, treew->y());
400 }
401 #endif
402 delete[] title;
403 }
404 w->hide();
405 }
406
407 struct dist_struct {
408 allseq *seqs;
409 int distkind;
410 int protein;
411 char **lwlseqs;
412 int in_bootstrap;
413 char *err_mess;
414 matrix *result;
415 bool done;
416 };
417
prep_calc_dist_matrix(dist_struct * ds)418 void prep_calc_dist_matrix(dist_struct *ds)
419 {
420 ds->result = calc_dist_matrix(ds->seqs, ds->distkind, ds->protein, ds->lwlseqs, ds->in_bootstrap, &(ds->err_mess));
421 if (tree_build_interrupted && ds->result) {
422 Free_Mat(ds->result);
423 ds->result = NULL;
424 }
425 Fl::lock();
426 ds->done = true;
427 Fl::awake();
428 Fl::unlock();
429 }
430
run_calc_dist_matrix(allseq * seqs,int distkind,int protein,char ** lwlseqs,int in_bootstrap,char ** p_err_mess,int no_gui)431 matrix *run_calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess, int no_gui)
432 {
433 dist_struct *ds = new dist_struct;
434 ds->seqs = seqs;
435 ds->distkind = distkind;
436 ds->protein = protein;
437 ds->lwlseqs =lwlseqs;
438 ds->in_bootstrap = in_bootstrap;
439 ds->err_mess = NULL;
440 ds->result = NULL;
441 ds->done = false;
442 tree_build_interrupted = false;
443 if (no_gui) {
444 ds->result = calc_dist_matrix(ds->seqs, ds->distkind, ds->protein, ds->lwlseqs, ds->in_bootstrap, &(ds->err_mess));
445 }
446 else {
447 fl_create_thread( (void (*)(void*))prep_calc_dist_matrix, ds, 0);
448 while (!ds->done) Fl::wait();
449 }
450 matrix* m = ds->result;
451 if (p_err_mess) *p_err_mess = ds->err_mess;
452 delete ds;
453 return m;
454 }
455
456
run_distance_method(SEA_VIEW * view,int distkind,int remove_all_gaps,int use_bionj,int use_bootstrap,int replicates,Fl_Box * box,const char * distance_name,int using_kaks,const char * distancefname,const char * user_tree,int no_gui,void (* alert)(const char *,...),int keep_b_trees,int compute_tbe)457 char *run_distance_method(SEA_VIEW *view, int distkind, int remove_all_gaps, int use_bionj, int use_bootstrap,
458 int replicates, Fl_Box *box, const char *distance_name, int using_kaks, const char* distancefname,
459 const char *user_tree, int no_gui, void (*alert)(const char*, ...), int keep_b_trees, int compute_tbe )
460 {
461 char *display_tree, *bootstrap_trees = NULL, *err_mess, tree_label[100]="";
462 int total = 0, i, j, lbtrees;
463 char **lwlseqs = NULL;
464 if(using_kaks) remove_all_gaps = FALSE;//keep gaps not to alter reading frame
465 allseq *phyml_seqs = view_to_allseq(view, remove_all_gaps);
466 if(use_bootstrap) {//for bootstrap, seq names must be all distinct
467 for(i = 0; i < phyml_seqs->n_otu - 1; i++) {
468 for(j = i+1; j < phyml_seqs->n_otu; j++) {
469 if(strcmp(phyml_seqs->c_seq[i]->name, phyml_seqs->c_seq[j]->name) == 0) {
470 alert("Can't run bootstrap because sequence name %s is used twice",
471 phyml_seqs->c_seq[i]->name);
472 free_after_view_to_allseq(phyml_seqs);
473 return NULL;
474 }
475 }
476 }
477 }
478 if(using_kaks) {
479 lwlseqs = (char **)malloc(phyml_seqs->n_otu * sizeof(char *));
480 for(i = 0; i < phyml_seqs->n_otu; i++) lwlseqs[i] = phyml_seqs->c_seq[i]->state;
481 }
482 matrix *phyml_mat = run_calc_dist_matrix(phyml_seqs, distkind, view->protein, lwlseqs, FALSE, &err_mess, no_gui);
483 if(phyml_mat == NULL) {
484 free_after_view_to_allseq(phyml_seqs);
485 if( using_kaks ) {
486 free(lwlseqs);
487 if (err_mess) {
488 alert("%s", err_mess);
489 delete[] err_mess;
490 }
491 }
492 else if (!tree_build_interrupted) alert("Can't compute distances because sequences are too divergent.\nAre they aligned?");
493 return NULL;
494 }
495 if(user_tree != NULL) {//process user tree
496 extern char *least_squares_brl(const char *tree, int nbseq, double **dist, char **names);
497 char *p, **usednames;
498 int i, j;
499 p = (char *)user_tree;
500 if(*p== '[')p=strchr(p,']')+1;
501 if(view->tot_sel_seqs > 0) {
502 usednames = (char **)malloc(view->tot_sel_seqs * sizeof(char *));
503 j = 0;
504 for(i = 0; i < view->tot_seqs; i++) {
505 if(view->sel_seqs[i]) usednames[j++] = view->seqname[i];
506 }
507 }
508 else usednames = view->seqname;
509 p = least_squares_brl(p, phyml_seqs->n_otu, phyml_mat->dist, usednames);
510 if(view->tot_sel_seqs > 0) free(usednames);
511 if(p != NULL) {
512 display_tree = (char *)malloc(strlen(p) + strlen(distance_name) + 60);
513 sprintf(display_tree, "[User-tree w/ least-squares br lengths, %d sites %s]%s",
514 phyml_seqs->clean_len, distance_name, p);
515 free(p);
516 if (box) box->window()->hide();//necessary under X11 so tree is in foreground
517 }
518 Free_Mat(phyml_mat);
519 free_after_view_to_allseq(phyml_seqs);
520 return display_tree;
521 }
522 if(distancefname) {
523 savedistinfile(phyml_mat, extract_dirname(view->masename), no_gui ? distancefname : NULL);
524 Free_Mat(phyml_mat);
525 free_after_view_to_allseq(phyml_seqs);
526 return NULL;
527 }
528 phyml_mat->method = use_bionj; //0:NJ 1:BioNJ
529 phyml_mat->tree = Make_Tree_From_Scratch(phyml_seqs->n_otu, phyml_seqs);
530 Bionj(phyml_mat);
531 sprintf(tree_label, "%s %d sites %s", use_bionj ? "BioNJ" : "NJ",
532 phyml_seqs->clean_len, distance_name);
533 if (use_bootstrap) {
534 char *full_tree = Write_Tree(phyml_mat->tree);
535 Free_Tree(phyml_mat->tree);
536 Free_Mat(phyml_mat);
537 int error = init_add_bootstrap(full_tree);
538 free(full_tree);
539 if(!error) {
540 char *p;
541 for (int r = 0; r < replicates; r++) {
542 if (tree_build_interrupted) break;
543 bootstrap_weights(phyml_seqs, using_kaks );
544 phyml_mat = run_calc_dist_matrix(phyml_seqs, distkind, view->protein, lwlseqs, TRUE, &err_mess, no_gui);
545 if(phyml_mat == NULL) {
546 if (!tree_build_interrupted) total = 0;
547 break;
548 }
549 phyml_mat->method = use_bionj; // 0:NJ 1:BioNJ
550 phyml_mat->tree = Make_Tree_From_Scratch(phyml_seqs->n_otu, phyml_seqs);
551 Bionj(phyml_mat);
552 char *one_replicate_tree = Write_Tree(phyml_mat->tree);
553 if (keep_b_trees || compute_tbe) {
554 if (r == 0) {
555 lbtrees = replicates * (strlen(one_replicate_tree) + 1);
556 bootstrap_trees = (char *)malloc(lbtrees + 1);
557 p = bootstrap_trees;
558 *p = 0;
559 }
560 int l = strlen(one_replicate_tree);
561 if ((p-bootstrap_trees) + l + 1 > lbtrees) {
562 lbtrees = (p-bootstrap_trees) + l + 1000;
563 bootstrap_trees = (char*)realloc(bootstrap_trees, lbtrees + 1);
564 p = bootstrap_trees + strlen(bootstrap_trees);
565 }
566 memcpy(p, one_replicate_tree, l); p += l;
567 *p++ = '\n';
568 *p = 0;
569 }
570 Free_Tree(phyml_mat->tree);
571 Free_Mat(phyml_mat);
572 total += process_one_replicate(one_replicate_tree, 1.);
573 free(one_replicate_tree);
574 char newbox[20];
575 sprintf(newbox,"Count: %d", total);
576 if (box) {
577 box->label(newbox);
578 box->redraw();
579 Fl::wait(0);
580 }
581 }
582 }
583 if(total > 0) {
584 display_tree = finish_add_bootstrap(total);
585 sprintf(tree_label + strlen(tree_label), " %d repl.%s", total, compute_tbe ? " (TBE)" : "" );
586 }
587 else display_tree = NULL;
588 }
589 else {
590 display_tree = Write_Tree(phyml_mat->tree);
591 Free_Tree(phyml_mat->tree);
592 Free_Mat(phyml_mat);
593 }
594 free_after_view_to_allseq(phyml_seqs);
595 if(using_kaks) {
596 free(lwlseqs);
597 }
598 if(display_tree == NULL) {
599 if( !using_kaks) alert("Can't compute distances because sequences are too divergent.\nAre they aligned?");
600 return NULL;
601 }
602 if (bootstrap_trees && compute_tbe) {
603 display_tree=TBE_Compute(display_tree,bootstrap_trees);
604 }
605 //add label to tree
606 char *tree = (char *)malloc(strlen(tree_label) + strlen(display_tree) + 4);
607 sprintf(tree, "[%s] %s", tree_label, display_tree);
608 free(display_tree);
609 if (box) box->window()->hide();//necessary under X11 so tree is in foreground
610 if (bootstrap_trees && keep_b_trees) {
611 treedraw(bootstrap_trees, view, "bootstrap trees", FALSE);
612 }
613 return tree;
614 }
615
savedistinfile(matrix * mat,const char * dirname,const char * outfname)616 void savedistinfile(matrix *mat, const char *dirname, const char *outfname)
617 {
618 FILE *out;
619 int i, j;
620 if (!outfname) {
621 Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
622 chooser->type(Fl_Native_File_Chooser::BROWSE_SAVE_FILE);
623 chooser->options(Fl_Native_File_Chooser::SAVEAS_CONFIRM | chooser->options());
624 chooser->title("Set distance output filename");
625 chooser->directory(dirname);
626 chooser->preset_file("seqs.dist");
627 outfname = run_and_close_native_file_chooser(chooser);
628 if(outfname == NULL) return;
629 }
630 out = fopen(outfname, "w");
631 if(out == NULL) return;
632 fputs("#distances order: d(1,2),...,d(1,n) <new line> d(2,3),...,d(2,n) <new line>...\n", out);
633 fprintf(out, "%d\n", mat->n_otu);
634 for( i = 0; i < mat->n_otu - 1; i++) {
635 for( j = i + 1; j < mat->n_otu; j++) {
636 fprintf(out, "%g ", mat->dist[i][j]);
637 }
638 fputs("\n", out);
639 }
640 for( i = 0; i < mat->n_otu ; i++) {
641 fprintf(out, "%s ", mat->name[i]);
642 }
643 fputs("\n\n", out);
644 fputs("#pairwise distances\n", out);
645 for (i = 0; i < mat->n_otu; i++) {
646 for (j = i + 1; j < mat->n_otu; j++) {
647 char *n1 = mat->name[i];
648 char *n2 = mat->name[j];
649 if (strcmp(n1, n2) > 0) { char *p = n1; n1 = n2; n2 = p; }
650 fprintf(out, "%s,%s: %g\n", n1, n2, mat->dist[i][j]);
651 }
652 }
653 fclose(out);
654 }
655
calc_dist_matrix(allseq * seqs,int distkind,int protein,char ** lwlseqs,int in_bootstrap,char ** p_err_mess)656 matrix *calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess)
657 {
658 matrix *phyml_mat = NULL;
659 if(distkind == observed_pdist || distkind == observed_ndist) {//observed
660 model jcmodel;
661 jcmodel.stepsize = 1;
662 jcmodel.datatype = (protein ? 1 : 0);
663 jcmodel.ns = (protein ? 20 : 4);
664 phyml_mat = Obs_Dist(seqs, &jcmodel);
665 }
666 else if(distkind == Kimura_pdist ) {//Kimura protein distance
667 phyml_mat = Kimura_p_Dist(seqs);
668 if(phyml_mat == NULL) return NULL;
669 }
670 else if(distkind == Poisson_pdist || distkind == Jukes_Cantor) {//JC or Poisson
671 model jcmodel;
672 jcmodel.stepsize = 1;
673 jcmodel.datatype = (protein ? 1 : 0);
674 jcmodel.ns = (protein ? 20 : 4);
675 phyml_mat = JC69_Dist(seqs, &jcmodel);
676 if(phyml_mat == NULL) return NULL;
677 }
678 else if((distkind == Kimura2P) && ! protein) {//K2P
679 phyml_mat = K80_dist_nogamma(seqs);
680 if(phyml_mat == NULL) return NULL;
681 }
682 else if(distkind == HKY && ! protein) {//HKY
683 phyml_mat = HKY_dist(seqs);
684 if(phyml_mat == NULL) return NULL;
685 }
686 else if(distkind == LogDet && ! protein) {//LogDet
687 phyml_mat = LOGDET_dist(seqs);
688 if(phyml_mat == NULL) return NULL;
689 }
690 else if( (distkind == Ka || distkind == Ks) && ! protein) {//Ka or Ks
691 int sat1, sat2, count = 0, err = 0, i, j;
692 int l3 = 3*(seqs->clean_len / 3);//dismiss partial last codon
693 *p_err_mess = NULL;
694 static int first = TRUE;
695 if(first) {
696 first = FALSE;
697 loadrl(); // needed once only
698 prefastlwl();
699 }
700 phyml_mat = Make_Mat(seqs->n_otu);
701 Init_Mat(phyml_mat, seqs);
702 // Ks distances can become saturated, re-run a bootstrap replicate in this case at most 50 times
703 do {
704 if(in_bootstrap && err) bootstrap_weights(seqs, TRUE );
705 count++;
706 if(distkind == Ka) //Ka
707 err = fastlwl(lwlseqs, seqs->n_otu, l3, phyml_mat->dist, NULL, NULL, NULL, &sat1, &sat2, seqs->wght);
708 else //Ks
709 err = fastlwl(lwlseqs, seqs->n_otu, l3, NULL, phyml_mat->dist, NULL, NULL, &sat1, &sat2, seqs->wght);
710 }
711 while (in_bootstrap && err && count <=50);
712 if (err) {
713 if (!in_bootstrap && !tree_build_interrupted) {
714 char *err_mess = new char[200];
715 snprintf(err_mess, 200, "Saturation between %s and %s", phyml_mat->name[sat1], phyml_mat->name[sat2]);
716 *p_err_mess = err_mess;
717 }
718 Free_Mat(phyml_mat);
719 return NULL;
720 }
721 for(i = 0; i < seqs->n_otu - 1; i++) {
722 for(j = i + 1; j < seqs->n_otu; j++) {
723 phyml_mat->dist[j][i] = phyml_mat->dist[i][j];
724 }
725 }
726 for(i = 0; i < seqs->n_otu; i++) phyml_mat->dist[i][i] = 0;
727 }
728 return phyml_mat;
729 }
730
731 /* a window where the return key always triggers a button
732 */
733 class return_window : public Fl_Window {
734 Fl_Return_Button *run_button;
735 public:
return_window(int w,int h,const char * t)736 return_window(int w, int h, const char *t) : Fl_Window(w, h, t) {}
run(Fl_Return_Button * b)737 void run(Fl_Return_Button *b) { run_button = b; }
handle(int event)738 int handle(int event) {
739 if (event == FL_KEYBOARD && (Fl::event_key() == FL_Enter || Fl::event_key() == FL_KP_Enter)) {
740 run_button->do_callback();
741 return 1;
742 }
743 return Fl_Window::handle(event);
744 }
745 };
746
747 Fl_Input *name;
748 Fl_Choice *mymodel;
749 Fl_Round_Button *b_alrt; Fl_Round_Button *b_no; Fl_Int_Input *b_count;
750 Fl_Round_Button *f_empir;
751 Fl_Round_Button *r_estim; Fl_Float_Input *r_val;
752 Fl_Round_Button *i_no; Fl_Round_Button *i_estim; Fl_Float_Input *i_val;
753 Fl_Round_Button *g_no; Fl_Choice *g_cats; Fl_Round_Button *a_est; Fl_Float_Input *a_val;
754 Fl_Round_Button *nni_b, *spr_b, *nni_spr_b;
755 Fl_Round_Button *u_bionj, *u_menutree, *u_random;
756 Fl_Choice *u_choice;
757 Fl_Check_Button *u_optimize, *u_quiet, *b_keep_trees, *b_tbe;
758 Fl_Int_Input *u_random_count;
759 char *phyml_path;
760
support_cb(Fl_Widget * wid,void * data)761 static void support_cb(Fl_Widget *wid, void *data)
762 {
763 Fl_Button *b = (Fl_Button*)data;
764 if (b->value()) {
765 b_keep_trees->activate();
766 b_tbe->activate();
767 }
768 else {
769 b_keep_trees->deactivate();
770 b_keep_trees->value(0);
771 b_tbe->deactivate();
772 }
773 }
774
phyml_dialog(SEA_VIEW * view)775 void phyml_dialog(SEA_VIEW *view)
776 {
777 int y = 3;
778 static int first = TRUE;
779 if(first) {
780 char *p, *q;
781 #if defined(__APPLE__)
782 # ifdef __ppc__
783 p = (char*)"phyml_3.0_universal";
784 # else
785 p = (char*)(fl_mac_os_version < 100800 ? "phyml_3.0_universal" : "PhyML-3.1_macOS-MountainLion");
786 # endif // __ppc__
787 if ((q = get_full_path(p)) == NULL) {
788 fl_message("Warning: PhyML executable not found. Please, download the seaview macOS application again.");
789 return;
790 }
791 #else
792 p = get_res_value("phyml",
793 # ifdef WIN32
794 "PhyML-3.1_win32.exe"
795 # elif defined(sparc)
796 "phyml_3.0_sparc"
797 # else
798 # ifndef PHYMLNAME
799 # ifdef __i386__
800 # define PHYMLNAME "PhyML-3.1_linux32"
801 # else
802 # define PHYMLNAME "PhyML-3.1_linux64"
803 # endif // __i386__
804 # endif // PHYMLNAME
805 PHYMLNAME
806 # endif // WIN32
807 );
808 #if defined(__linux__)
809 if ((q = get_full_path(p)) == NULL) p = (char*)"phyml";
810 #endif // __linux__
811 if ((q = get_full_path(p)) == NULL) {
812 int rep = fl_choice("Seaview does not find PhyML under the name %s.\n"
813 "Is PhyML available on your computer ?", "No", "Available", NULL, p);
814 if(rep == 0) return;
815 Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
816 chooser->type(Fl_Native_File_Chooser::BROWSE_FILE); // let user browse a single file
817 chooser->title("Find PhyML on your computer");
818 q = run_and_close_native_file_chooser(chooser);
819 if(q == NULL) return;
820 set_res_value("phyml", q);
821 save_resources();
822 }
823 #endif // __APPLE__
824 first = FALSE;
825 phyml_path = (char *)malloc(strlen(q) + 3);
826 sprintf(phyml_path, "\"%s\"", q);
827 }
828 return_window *w = new return_window(270,130, "PhyML options");
829 w->set_modal();
830 //model
831 mymodel = new Fl_Choice(50, y, 90, 20, "Model:");
832 mymodel->align(FL_ALIGN_LEFT);
833 if(view->protein) {
834 mymodel->add("LG");
835 mymodel->add("WAG");
836 mymodel->add("Dayhoff");
837 mymodel->add("JTT");
838 mymodel->add("Blosum62");
839 mymodel->add("MtREV");
840 mymodel->add("RtREV");
841 mymodel->add("CpREV");
842 mymodel->add("DCMut");
843 mymodel->add("VT");
844 mymodel->add("MtMam");
845 mymodel->add("MtArt");
846 mymodel->add("HIVw");
847 mymodel->add("HIVb");
848 mymodel->add("FLU");
849 mymodel->value(0);
850 }
851 else {
852 mymodel->add("JC69");
853 mymodel->add("K80");
854 mymodel->add("F81");
855 mymodel->add("HKY85");
856 mymodel->add("F84");
857 mymodel->add("TN93");
858 mymodel->add("GTR");
859 mymodel->value(6);
860 }
861 y += mymodel->h() + 25;
862 //bootstrap
863 Fl_Group *b_group = new Fl_Group(0, y, w->w(), 95, "Branch Support");
864 b_group->box(FL_ROUNDED_BOX);
865 b_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
866 y += 5;
867 b_alrt = new Fl_Round_Button(3, y, 110, 20, "aLRT (SH-like)");
868 b_alrt->type(FL_RADIO_BUTTON);
869 b_no = new Fl_Round_Button(b_alrt->x() + b_alrt->w() + 10, y, 60, 20, "None");
870 b_no->type(FL_RADIO_BUTTON);
871 y += b_alrt->h() + 3;
872 Fl_Round_Button *b_yes = new Fl_Round_Button(3, y, 115, 20, "Bootstrap with");
873 b_yes->type(FL_RADIO_BUTTON);
874 b_alrt->setonly();
875 b_count = new Fl_Int_Input(b_yes->x() + b_yes->w() + 5, y, 50, 20, "replicates");
876 b_count->align(FL_ALIGN_RIGHT);
877 b_count->static_value("100");
878
879 y += b_count->h();
880 b_tbe = new Fl_Check_Button(b_yes->x(), y, 265, 20, "Transfer Bootstrap Expectation method");
881 b_tbe->deactivate();
882 y += b_tbe->h();
883 b_keep_trees = new Fl_Check_Button(b_yes->x(), y, 180, 20, "Show bootstrap trees");
884 b_keep_trees->deactivate();
885
886 b_alrt->callback(support_cb, b_yes);
887 b_no->callback(support_cb, b_yes);
888 b_yes->callback(support_cb, b_yes);
889 b_group->end();
890 y += b_count->h() + 30;
891 // freq + Ts/Tv group
892 Fl_Group *fr_group = new Fl_Group(0, y, w->w(), 75, "");
893 // frequencies group
894 Fl_Group *f_group = new Fl_Group(0, y, w->w(), 30, view->protein?"Amino acid equilibrium frequencies":"Nucleotide equilibrium frequencies");
895 f_group->box(FL_ROUNDED_BOX);
896 f_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
897 y += 5;
898 f_empir = new Fl_Round_Button(3, y, 85, 20, "Empirical");
899 f_empir->type(FL_RADIO_BUTTON);
900 Fl_Round_Button *f_m = new Fl_Round_Button(f_empir->x() + f_empir->w() + 5, y, 60, 20, view->protein?"Model-given":"Optimized");
901 f_m->type(FL_RADIO_BUTTON);
902 (view->protein?f_m:f_empir)->setonly();
903 f_group->end();
904 y += f_group->h() + 15;
905 //Ts/Tv ratio
906 Fl_Group *r_group = new Fl_Group(0, y, w->w(), 30, "Ts/Tv ratio");
907 r_group->box(FL_ROUNDED_BOX);
908 r_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
909 y += 5;
910 r_estim = new Fl_Round_Button(3, y, 85, 20, "Optimized");
911 r_estim->type(FL_RADIO_BUTTON);
912 Fl_Round_Button *r_fix = new Fl_Round_Button(r_estim->x() + r_estim->w() + 5, y, 60, 20, "Fixed");
913 r_fix->type(FL_RADIO_BUTTON);
914 r_fix->setonly();
915 r_val = new Fl_Float_Input(r_fix->x() + r_fix->w() + 10, y, 40, 20, "");
916 r_val->value("4.0");
917 r_group->end();
918 r_group->deactivate();
919 if (view->protein) r_group->hide();
920 else y += r_group->h() + 15;
921 fr_group->end();
922 if (!view->protein) mymodel->callback(model_callback, fr_group);
923 //invariable
924 Fl_Group *i_group = new Fl_Group(0, y, w->w(), 30, "Invariable sites");
925 i_group->box(FL_ROUNDED_BOX);
926 i_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
927 y += 5;
928 i_no = new Fl_Round_Button(3, y, 52, 20, "None");
929 i_no->type(FL_RADIO_BUTTON);
930 i_estim = new Fl_Round_Button(i_no->x() + i_no->w() + 5, y, 85, 20, "Optimized");
931 i_estim->type(FL_RADIO_BUTTON);
932 Fl_Round_Button *i_fix = new Fl_Round_Button(i_estim->x() + i_estim->w() + 2, y, 60, 20, "Fixed");
933 i_fix->type(FL_RADIO_BUTTON);
934 i_no->setonly();
935 i_val = new Fl_Float_Input(i_fix->x() + i_fix->w() + 10, y, 40, 20, "");
936 i_val->value("0.10");
937 i_group->end();
938 y += i_group->h() + 15;
939 //gamma categories
940 Fl_Group *g_group = new Fl_Group(0, y, w->w(), 55, "Across site rate variation");
941 g_group->box(FL_ROUNDED_BOX);
942 g_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
943 y += 5;
944 g_no = new Fl_Round_Button(3, y, 75, 20, "None");
945 g_no->type(FL_RADIO_BUTTON);
946 g_cats = new Fl_Choice(g_no->x() + g_no->w() + 130, y, 50, 20, "# of rate categories");
947 mymodel->align(FL_ALIGN_LEFT);
948 g_cats->add("4");
949 g_cats->add("6");
950 g_cats->add("8");
951 g_cats->add("10");
952 g_cats->add("12");
953 g_cats->value(0);
954 y = g_cats->y() + g_cats->h() + 5;
955 a_est = new Fl_Round_Button(3, y, 85, 20, "Optimized");
956 a_est->type(FL_RADIO_BUTTON);
957 a_est->setonly();
958 Fl_Round_Button *a_fixed = new Fl_Round_Button(a_est->x() + a_est->w() + 5, y, 60, 20, "Fixed");
959 a_fixed->type(FL_RADIO_BUTTON);
960 a_val = new Fl_Float_Input(a_fixed->x() + a_fixed->w() + 10, y, 40, 20, "");
961 a_val->value("2.0");
962 g_group->end();
963 y += a_val->h() + 25;
964 //tree searching
965 Fl_Group *s_group = new Fl_Group(0, y, w->w(), 35, "Tree searching operations");
966 s_group->box(FL_ROUNDED_BOX);
967 s_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
968 y += 5;
969 nni_b = new Fl_Round_Button(3, y, 45, 20, "NNI");
970 nni_b->type(FL_RADIO_BUTTON);
971 spr_b = new Fl_Round_Button(nni_b->x() + nni_b->w() + 5, y, 50, 20, "SPR");
972 spr_b->type(FL_RADIO_BUTTON);
973 nni_spr_b = new Fl_Round_Button(spr_b->x() + spr_b->w() + 5, y, 150, 20, "Best of NNI && SPR");
974 nni_spr_b->type(FL_RADIO_BUTTON);
975 nni_b->setonly();
976 nni_b->callback(change_searching_callback, NULL);
977 spr_b->callback(change_searching_callback, NULL);
978 nni_spr_b->callback(change_searching_callback, NULL);
979 s_group->end();
980 y = s_group->y() + s_group->h() + 5;
981 //starting tree
982 y += 15;
983 Fl_Group *u_group = new Fl_Group(0, y, w->w(), 80, "Starting tree");
984 u_group->box(FL_ROUNDED_BOX);
985 u_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
986 y += 5;
987 u_bionj = new Fl_Round_Button(3, y, 45, 20, "BioNJ");
988 u_bionj->type(FL_RADIO_BUTTON);
989 u_bionj->setonly();
990 u_optimize = new Fl_Check_Button(u_bionj->x() + u_bionj->w() + 30, y, 170, 20, "Optimize tree topology");
991 u_optimize->value(1);
992 y += u_bionj->h() + 5;
993 u_menutree = new Fl_Round_Button(3, y, 95, 20, "User given:");
994 u_menutree->type(FL_RADIO_BUTTON);
995 u_choice = new Fl_Choice(u_menutree->x() + u_menutree->w() + 5, y, 100, 20, "");
996 vlength_menu *vmenu = view->menu_trees;
997 int count = view->menu_trees->vlength();
998 for(int i = 0; i < count; i++) u_choice->add( vmenu->vitem(i)->label() );
999 u_choice->value(0);
1000 if(count <= 0) {u_menutree->deactivate(); u_choice->deactivate(); }
1001 u_random = new Fl_Round_Button(3, u_menutree->y() + u_menutree->h() + 5, 80, 20, "Add: ");
1002 u_random->deactivate();
1003 u_random->type(FL_RADIO_BUTTON);
1004 u_random_count = new Fl_Int_Input(u_random->x() + u_random->w(), u_random->y(), 35, 20, "random starts");
1005 u_random_count->align(FL_ALIGN_RIGHT);
1006 u_random_count->value("5");
1007 u_group->end();
1008 y = u_group->y() + u_group->h() + 5;
1009 // Run
1010 Fl_Button *cancel = new Fl_Button(3, y, 50, 20, "Cancel");
1011 cancel->callback(cancel_callback, NULL);
1012 Fl_Return_Button *ok = new Fl_Return_Button(w->w() - 60, y, 55, 20, "Run");
1013 w->run(ok);
1014 ok->callback(run_phyml_callback, view);
1015 u_quiet = new Fl_Check_Button(cancel->x() + cancel->w() + 40, y, 65, 20, "Quiet");
1016 u_quiet->value(0);
1017 #ifndef WIN32
1018 //u_quiet->hide();
1019 #endif
1020 w->end();
1021 w->size(w->w(), y + cancel->h() + 5);
1022 w->callback(cancel_callback, NULL);
1023 w->show();
1024 }
1025
change_searching_callback(Fl_Widget * ob,void * unused)1026 static void change_searching_callback(Fl_Widget *ob, void *unused)
1027 {
1028 if(ob == nni_b) u_random->deactivate();
1029 else u_random->activate();
1030 }
1031
cancel_callback(Fl_Widget * ob,void * data)1032 static void cancel_callback(Fl_Widget *ob, void *data)
1033 {
1034 Fl_Window *w = ob->window();
1035 if(w == NULL) w = (Fl_Window *)ob;
1036 w->hide();
1037 Fl::delete_widget(w);
1038 }
1039
model_callback(Fl_Widget * ob,void * data)1040 static void model_callback(Fl_Widget *ob, void *data)
1041 {
1042 Fl_Choice *c = (Fl_Choice *)ob;
1043 Fl_Group *fr_group = (Fl_Group*)data;
1044 Fl_Group *f_group = (Fl_Group *)fr_group->child(0);
1045 Fl_Group *r_group = (Fl_Group *)fr_group->child(1);
1046 int v = c->value();
1047 if (v >=0 && v <= 2) f_group->deactivate(); else f_group->activate();
1048 if (v == 1 || v == 3 || v == 4 || v == 5) r_group->activate();
1049 else r_group->deactivate();
1050 }
1051
1052
run_phyml_callback(Fl_Widget * ob,void * data)1053 void run_phyml_callback(Fl_Widget *ob, void *data)
1054 {
1055 FILE *in;
1056 SEA_VIEW *view = (SEA_VIEW *)data;
1057 char input[PATH_MAX], base_fname[PATH_MAX], invar[10], alpha[10], *p;
1058 int replicates, cats, status, seqlen, user_tree = FALSE, i;
1059 float logL;
1060 char **tmpseqs, **tmpnames, **truenames;
1061 int tot_seqs = view->tot_seqs;
1062
1063 p = create_tmp_filename();
1064 if(p == NULL) return;
1065 strcpy(base_fname, p);
1066 strcpy(input, base_fname);
1067 strcat(input, ".phy");
1068 region r, *pr = &r;
1069 list_segments ls;
1070 if(view->tot_sel_seqs > 0 && view->active_region == NULL) {
1071 ls.debut = 1; ls.fin = view->seq_length; ls.next = NULL;
1072 r.list = &ls;
1073 }
1074 else {
1075 pr = view->active_region;
1076 }
1077 if(pr != NULL) {
1078 list_segments *pls;
1079 pls = pr->list;
1080 seqlen = 0;
1081 while(pls != NULL) {
1082 seqlen += pls->fin - pls->debut + 1;
1083 pls = pls->next;
1084 }
1085 }
1086 else seqlen = view->seq_length;
1087 //remove * from saved protein data
1088 if(view->protein) {
1089 tmpseqs = (char **)calloc(view->tot_seqs, sizeof(char *));
1090 for( i = 0; i < view->tot_seqs; i++) {
1091 if(view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
1092 tmpseqs[i] = strdup(view->sequence[i]);
1093 char *p = tmpseqs[i];
1094 while((p = strchr(p, '*')) != NULL) *p = '-';
1095 }
1096 }
1097 else tmpseqs = view->sequence;
1098 //Use Seq##_ as sequence names
1099 tmpnames = (char **)calloc(view->tot_seqs, sizeof(char *));
1100 //memorize seq names because can be changed while phyml runs
1101 truenames = (char **)calloc(view->tot_seqs, sizeof(char *));
1102 int length = 0;
1103 for(i = 0; i < view->tot_seqs; i++) {
1104 if(view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
1105 tmpnames[i] = (char *)malloc(15);
1106 truenames[i] = strdup(view->seqname[i]);
1107 sprintf(tmpnames[i], "Seq%d_", i);
1108 length = FL_max(length, (int)strlen(tmpnames[i]));
1109 }
1110 status = save_phylip_file(input, tmpseqs,
1111 tmpnames, view->tot_seqs, view->each_length, pr,
1112 view->sel_seqs, view->tot_sel_seqs, length);
1113 for(i = 0; i < view->tot_seqs; i++) if(tmpnames[i] != NULL) free(tmpnames[i]);
1114 free(tmpnames);
1115 if(view->protein) {
1116 for( i = 0; i < view->tot_seqs; i++) {
1117 if(tmpseqs[i] != NULL) free(tmpseqs[i]);
1118 }
1119 free(tmpseqs);
1120 }
1121 if(status) goto way_out;
1122 if(b_alrt->value()) replicates = -4;
1123 else if (b_no->value()) replicates = 0;
1124 else sscanf(b_count->value(), "%d", &replicates);
1125 if (!u_optimize->value()) replicates = 0;
1126 if(i_no->value()) strcpy(invar, "0.0");
1127 else if(i_estim->value()) strcpy(invar, "e");
1128 else strcpy(invar, i_val->value());
1129 if(g_no->value()) { cats = 1; strcpy(alpha, "2.0"); }
1130 else {
1131 sscanf(g_cats->mvalue()->label(), "%d", &cats);
1132 if(a_est->value()) strcpy(alpha, "e");
1133 else strcpy(alpha, a_val->value());
1134 }
1135 char modelname[20], *args;
1136 strcpy(modelname, mymodel->mvalue()->label());
1137 //build command line
1138 args = (char *)malloc(1000);
1139 sprintf(args, "%s -d %s -m %s -b %d -v %s -c %d -a %s",
1140 phyml_path, view->protein?"aa":"nt", modelname, replicates, invar, cats, alpha);
1141 if( (!view->protein) &&
1142 (strcmp(modelname,"K80")==0 || strcmp(modelname,"HKY85")==0 || strcmp(modelname,"F84")==0 || strcmp(modelname,"TN93")==0) ) {
1143 if(r_estim->value()) {
1144 strcat(args, " -t e");
1145 }
1146 else {
1147 sprintf(args + strlen(args), " -t %s", r_val->value());
1148 }
1149 }
1150 if( (view->protein) ||
1151 (strcmp(modelname,"JC69")!=0 && strcmp(modelname,"K80")!=0 && strcmp(modelname,"F81")!=0) ) {
1152 if(f_empir->value()) {
1153 strcat(args, " -f e");
1154 }
1155 else {
1156 strcat(args, " -f m");
1157 }
1158 }
1159 if(spr_b->value()) strcat(args, " -s SPR");
1160 else if(nni_spr_b->value()) strcat(args, " -s BEST");
1161 if(!u_optimize->value()) { strcat(args, " -o lr"); user_tree = TRUE; }
1162 if (u_quiet->value()) strcat(args, " --quiet");
1163 else strcat(args, " --no_memory_check");
1164 if(u_menutree->value()) {
1165 char *tree = view->trees[u_choice->value()];
1166 if(*tree == '[' && (p = strchr(tree, ']')) != NULL) { //remove tree header
1167 tree = p + 1;
1168 while(*tree == ' ') tree++;
1169 }
1170 tree = replace_tree_names_by_rank(tree, view->seqname, view->tot_seqs);
1171 if(tree == NULL) {
1172 fl_alert("Names in starting tree and alignment don't match.\nStarting tree will be ignored.");
1173 }
1174 else {
1175 char tmp[PATH_MAX];
1176 sprintf(tmp, "%s.startingtree", base_fname);
1177 FILE *out = fopen(tmp, "w");
1178 fputs(tree, out);
1179 free(tree);
1180 fclose(out);
1181 sprintf(args + strlen(args), " -u \"%s\"", tmp);
1182 }
1183 }
1184 if(u_random->value()) {
1185 sprintf(args + strlen(args), " --rand_start --n_rand_starts %d", atoi(u_random_count->value()));
1186 }
1187 //finish with (long) input filename
1188 sprintf(args + strlen(args), " -i \"%s\"", input);
1189 ob->window()->hide(); //deletion of this window should be done totally at the end of this callback
1190 status = run_external_prog_in_pseudoterm(args, NULL, "tree-building");
1191 free(args);
1192 sprintf(input, "%s.phy_phyml_stats.txt", base_fname);
1193 in = fopen(input, "r");
1194 if (in == NULL) {
1195 sprintf(input, "%s.phy_phyml_stat.txt", base_fname);
1196 in = fopen(input, "r");
1197 }
1198 if (in == NULL) {
1199 sprintf(input, "%s.phy_phyml_stats", base_fname);
1200 in = fopen(input, "r");
1201 }
1202 logL = 0;
1203 if(in != NULL) {//search tree likelihood
1204 while(TRUE) {
1205 char line[100];
1206 p = fgets(line, sizeof(line), in);
1207 if(p == NULL) break;
1208 if( (p=strstr(line, "Log-likelihood:")) != NULL) sscanf(p+15, "%f", &logL);
1209 else if( (p=strstr(line, "Log-likelihood :")) != NULL) sscanf(p+16, "%f", &logL);
1210 }
1211 fclose(in);
1212 }
1213 sprintf(input, "%s.phy_phyml_tree.txt", base_fname);
1214 in = fopen(input, "r");
1215 if (in == NULL) {
1216 sprintf(input, "%s.phy_phyml_tree", base_fname);
1217 in = fopen(input, "r");
1218 }
1219 if( status != 0 || in == NULL ) {
1220 if(in != NULL) fclose(in);
1221 if (is_view_valid(view)) fl_message("%s: tree building cancelled", extract_filename(view->masename));
1222 }
1223 else {
1224 char *display_tree;
1225 fseek(in, 0, SEEK_END);
1226 long l = ftell(in);
1227 display_tree = (char *)malloc(l + 1);
1228 int c; p = display_tree;
1229 fseek(in, 0, SEEK_SET);
1230 while( (c = getc(in)) != EOF) {
1231 if(c != '\n' && c != '\r') *(p++) = c;
1232 }
1233 *p = 0;
1234 fclose(in);
1235 display_tree = put_names_back_in_tree(display_tree, truenames);
1236 //process bootstrap trees
1237 char *bootstrap_trees = NULL;
1238 if (replicates > 0) {
1239 sprintf(input, "%s.phy_phyml_boot_stats.txt", base_fname);
1240 in = fopen(input, "r");
1241 if (!in) {
1242 sprintf(input, "%s.phy_phyml_boot_stats", base_fname);
1243 in = fopen(input, "r");
1244 }
1245 if (in) { // compute the true # of bootstrap replicates because parallelized phyML can compute a few more
1246 char line[150];
1247 while ((p = fgets(line, sizeof(line), in))) {
1248 while (*p == ' ') p++;
1249 if (*p == '#') {
1250 sscanf(p+1, "%d", &replicates);
1251 }
1252 }
1253 fclose(in);
1254 }
1255 }
1256 if (replicates > 0 && (b_keep_trees->value() || b_tbe->value())) {
1257 sprintf(input, "%s.phy_phyml_boot_trees.txt", base_fname);
1258 in = fopen(input, "r");
1259 if (!in) {
1260 sprintf(input, "%s.phy_phyml_boot_trees", base_fname);
1261 in = fopen(input, "r");
1262 }
1263 if (in) {
1264 FILE *out = NULL;
1265 sprintf(input, "%s.phy_phyml_boot_trees_truename.txt", base_fname);
1266 out = fopen(input, "w");
1267 long l = 1000;
1268 while (out) { // for all bootstrap trees
1269 char *b_tree, *q, *tree2;
1270 int c;
1271 b_tree = (char*)malloc(l + 1);
1272 q = b_tree;
1273 do { // read one line = one tree
1274 c = fgetc(in);
1275 if (c == EOF) {
1276 free(b_tree);
1277 break;
1278 }
1279 if (q - b_tree >= l) {
1280 l += 1000;
1281 tree2 = (char *)realloc(b_tree, l + 1);
1282 q = tree2 + (q - b_tree);
1283 b_tree = tree2;
1284 }
1285 *q++ = c;
1286 } while (c != '\n');
1287 if (c == EOF) break;
1288 *q = 0;
1289 b_tree = put_names_back_in_tree(b_tree, truenames);
1290 fputs(b_tree, out);
1291 free(b_tree);
1292 }
1293 fclose(in);
1294 if (out) {
1295 fclose(out);
1296 in = fopen(input, "r");
1297 fseek(in, 0, SEEK_END);
1298 l = ftell(in);
1299 fseek(in, 0, SEEK_SET);
1300 bootstrap_trees = (char *)malloc(l+1);
1301 fread(bootstrap_trees, l, 1, in);
1302 bootstrap_trees[l] = 0;
1303 fclose(in);
1304 }
1305 }
1306 }
1307 //clean tree internal labels
1308 char *display_tree_tbe = NULL;
1309 if(replicates > 0) {
1310 if (b_tbe->value()){
1311 char *tmp = strdup(display_tree);
1312 display_tree_tbe = TBE_Compute(tmp, bootstrap_trees);
1313 }
1314 display_tree = bootstrap_reformat(display_tree, replicates);
1315 if(bootstrap_trees && !b_keep_trees->value()) {
1316 free(bootstrap_trees);
1317 bootstrap_trees = NULL;
1318 }
1319 }
1320 else if(replicates < 0) display_tree = alrt_reformat(display_tree);
1321
1322 //assemble tree description
1323 char tree_label[100];
1324 sprintf(tree_label, "PhyML ln(L)=%.1f %d sites %s", logL, seqlen, modelname);
1325 if(replicates > 0) {
1326 sprintf(tree_label + strlen(tree_label), " %d replic.", replicates);
1327 }
1328 if(cats > 1) sprintf(tree_label + strlen(tree_label), " %d rate classes", cats);
1329 if(user_tree) strcat(tree_label, " User-tree");
1330 //add label to tree
1331 char *tree = (char *)malloc(strlen(tree_label) + strlen(display_tree) + 4);
1332 sprintf(tree, "[%s] %s", tree_label, display_tree);
1333 free(display_tree);
1334
1335 if (replicates > 0 && b_tbe->value()) {
1336 //assemble TBE tree description
1337 char tree_label_tbe[100];
1338 sprintf(tree_label_tbe, "PhyML ln(L)=%.1f %d sites %s", logL, seqlen, modelname);
1339 sprintf(tree_label_tbe + strlen(tree_label_tbe), " %d replic. (TBE)", replicates);
1340 if(cats > 1) sprintf(tree_label_tbe + strlen(tree_label_tbe), " %d rate classes", cats);
1341 if(user_tree) strcat(tree_label_tbe, " User-tree");
1342 //add label to tree
1343 char *tree = (char *)malloc(strlen(tree_label_tbe) + strlen(display_tree_tbe) + 4);
1344 sprintf(tree, "[%s] %s", tree_label_tbe, display_tree_tbe);
1345 free(display_tree_tbe);
1346 display_tree_tbe = tree;
1347 }
1348
1349 if (is_view_valid(view)) {
1350 const char *n = extract_filename(view->masename);
1351 char *title = NULL;
1352 if (n) {
1353 char *q = (char*)strchr(n, '.');
1354 if (q) *q = 0;
1355 title = new char[strlen(n) + 12];
1356 sprintf(title, "%s-%s", n, "PhyML_tree");
1357 if (q) *q = '.';
1358 }
1359 Fl_Window *btreew=0, *treew=0;
1360 if (replicates > 0 && b_keep_trees->value()) {
1361 btreew = treedraw(bootstrap_trees, view, "bootstrap trees", FALSE);
1362 }
1363 if (replicates > 0 && b_tbe->value()) {
1364 const char *p = n ? title:"PhyML_tree";
1365 char *q = (char*)malloc(strlen(p) + 5);
1366 sprintf(q, "%s_TBE", p);
1367 treedraw(display_tree_tbe, view, q, FALSE);
1368 }
1369 treew = treedraw(tree, view, n ? title:"PhyML_tree", FALSE);
1370 #if !defined(__APPLE__)
1371 if (treew && btreew) treew->position(btreew->x() + 30, btreew->y() + 40);
1372 #endif
1373 delete[] title;
1374 }
1375 }
1376 Fl::delete_widget(ob->window());
1377 way_out:
1378 for(i = 0; i < tot_seqs; i++) {
1379 if(truenames[i]) free(truenames[i]);
1380 }
1381 free(truenames);
1382 delete_tmp_filename(base_fname);
1383 }
1384
1385
view_to_allseq(SEA_VIEW * view,int remove_all_gaps)1386 allseq *view_to_allseq(SEA_VIEW *view, int remove_all_gaps)
1387 {
1388 int i, j, l;
1389 list_segments *ls;
1390 char *p;
1391 allseq *phyml_seqs = (allseq *)calloc(1, sizeof(allseq));
1392 phyml_seqs->n_otu = view->tot_sel_seqs == 0 ? view->tot_seqs : view->tot_sel_seqs;
1393 phyml_seqs->c_seq = (struct __Seq **)calloc(phyml_seqs->n_otu, sizeof(struct __Seq*));
1394 l = 0;
1395 if(view->active_region == NULL) {
1396 for( i = 0; i < view->tot_seqs; i++) {
1397 if(view->tot_sel_seqs != 0 && !view->sel_seqs[i]) continue;
1398 if(view->each_length[i] > l) l = view->each_length[i];
1399 }
1400 }
1401 else {
1402 ls = view->active_region->list;
1403 while(ls != NULL) {
1404 l += ls->fin - ls->debut + 1;
1405 ls = ls->next;
1406 }
1407 }
1408 phyml_seqs->clean_len = l;
1409 phyml_seqs->wght = (int *)calloc(1, l * sizeof(int));
1410 for( i = 0; i < l; i++) {
1411 phyml_seqs->wght[i] = 1;
1412 }
1413 j = 0;
1414 for( i = 0; i < view->tot_seqs; i++) {
1415 if(view->tot_sel_seqs != 0 && !view->sel_seqs[i]) continue;
1416 phyml_seqs->c_seq[j] = (struct __Seq *)calloc(1, sizeof(struct __Seq));
1417 phyml_seqs->c_seq[j]->name = view->seqname[i];
1418 phyml_seqs->c_seq[j]->len = l;
1419 phyml_seqs->c_seq[j]->state = (char *)malloc(l + 1);
1420 if(view->active_region == NULL) {
1421 memcpy(phyml_seqs->c_seq[j]->state, view->sequence[i], view->each_length[i] );
1422 if( l > view->each_length[i] ) {
1423 memset(phyml_seqs->c_seq[j]->state + view->each_length[i], '-', l - view->each_length[i]);
1424 }
1425 }
1426 else {
1427 ls = view->active_region->list;
1428 p = phyml_seqs->c_seq[j]->state;
1429 while(ls != NULL) {
1430 if(ls->fin <= view->each_length[i]) {
1431 memcpy(p, view->sequence[i] + ls->debut - 1, ls->fin - ls->debut + 1 );
1432 }
1433 else {
1434 int lrem = view->each_length[i] - ls->debut + 1;
1435 if(lrem > 0) memcpy(p, view->sequence[i] + ls->debut - 1, lrem );
1436 if(lrem < 0) lrem = 0;
1437 memset(p + lrem, '-', ls->fin - ls->debut + 1 - lrem);
1438 }
1439 p += ls->fin - ls->debut + 1;
1440 ls = ls->next;
1441 }
1442 }
1443 phyml_seqs->c_seq[j]->state[l] = 0;
1444 majuscules(phyml_seqs->c_seq[j]->state);
1445 p = phyml_seqs->c_seq[j]->state;
1446 if(!view->protein) { while((p = strchr(p, 'U')) != NULL) *p = 'T'; }
1447 else { while((p = strchr(p, '*')) != NULL) *p = '-'; }//replace stops by gaps
1448 if (!view->protein) { // remove non-nucleotide characters
1449 p = phyml_seqs->c_seq[j]->state;
1450 while (*p) {
1451 if (strchr("ABCDGHKMNRSTUVWXY-", *p) == NULL) *p = 'N';
1452 p++;
1453 }
1454 }
1455 j++;
1456 }
1457 //remove gap-only or gap-with sites
1458 for( j = 0; j < phyml_seqs->clean_len; j++) {
1459 if(remove_all_gaps) {//remove any gap-containing site
1460 for( i = 0; i < phyml_seqs->n_otu; i++) {
1461 if(phyml_seqs->c_seq[i]->state[j] == '-') break;
1462 }
1463 if(i == phyml_seqs->n_otu) continue;
1464 }
1465 else {//remove gap-only sites
1466 for( i = 0; i < phyml_seqs->n_otu; i++) {
1467 if(phyml_seqs->c_seq[i]->state[j] != '-') break;
1468 }
1469 if(i != phyml_seqs->n_otu) continue;
1470 }
1471 for( i = 0; i < phyml_seqs->n_otu; i++) {
1472 memmove(phyml_seqs->c_seq[i]->state + j, phyml_seqs->c_seq[i]->state + j + 1,
1473 phyml_seqs->clean_len - j);
1474 }
1475 j--; phyml_seqs->clean_len--;
1476 }
1477 return phyml_seqs;
1478 }
1479
1480
gaps_as_unknown_states(allseq * phyml_seqs,int protein)1481 void gaps_as_unknown_states(allseq *phyml_seqs, int protein)
1482 {
1483 char unknown = (protein ? 'X' : 'N');
1484 for( int i = 0; i < phyml_seqs->n_otu; i++) {
1485 for( int j = 0; j < phyml_seqs->clean_len; j++) {
1486 if(phyml_seqs->c_seq[i]->state[j] == '-') phyml_seqs->c_seq[i]->state[j] = unknown;
1487 }
1488 }
1489 }
1490
free_after_view_to_allseq(allseq * phyml_seqs)1491 void free_after_view_to_allseq(allseq *phyml_seqs)
1492 {
1493 int i;
1494 for( i = 0; i < phyml_seqs->n_otu; i++) {
1495 free(phyml_seqs->c_seq[i]->state);
1496 free(phyml_seqs->c_seq[i]);
1497 }
1498 free(phyml_seqs->c_seq);
1499 free(phyml_seqs->wght);
1500 free(phyml_seqs);
1501 }
1502
1503 /*void initialize_random_series(void)
1504 {
1505 static int first = TRUE;
1506 unsigned seed;
1507 if(first) {
1508 first = FALSE;
1509 #ifdef WIN32
1510 SYSTEMTIME timing;
1511 GetSystemTime(&timing);
1512 seed = (unsigned)(timing.wMilliseconds + 1000*(timing.wSecond + 60*(timing.wMinute + 60*(timing.wHour + 24*timing.wDay))));
1513 #else
1514 struct tms timing;
1515 seed = (unsigned)times(&timing);
1516 #endif
1517 srand(seed);
1518 }
1519 }*/
1520
1521
next_random(void)1522 double next_random(void)
1523 {
1524 static int first = TRUE;
1525 unsigned seed;
1526 if (first) {
1527 first = FALSE;
1528 #ifdef WIN32
1529 SYSTEMTIME timing;
1530 GetSystemTime(&timing);
1531 seed = (unsigned)(timing.wMilliseconds + 1000*(timing.wSecond + 60*(timing.wMinute + 60*(timing.wHour + 24*timing.wDay))));
1532 srand(seed);
1533 #else
1534 struct tms timing;
1535 seed = (unsigned)times(&timing);
1536 srandom(seed);
1537 #endif
1538 }
1539
1540 #ifdef WIN32
1541 return rand() / (RAND_MAX +(double)1.);
1542 #else
1543 return random() / (RAND_MAX +(double)1.);
1544 #endif
1545 }
1546
bootstrap_weights(allseq * seqs,int use_codons)1547 void bootstrap_weights(allseq *seqs, int use_codons)
1548 {
1549 phydbl buff;
1550 int j, position;
1551 //initialize_random_series();
1552 memset(seqs->wght, 0, seqs->clean_len * sizeof(int));
1553 if(!use_codons) {//bootstrap sites
1554 for(j = 0; j < seqs->clean_len; j++) {
1555 /*buff = rand();
1556 buff /= (RAND_MAX+1.);*/
1557 buff = next_random();
1558 buff *= seqs->clean_len;
1559 position = (int)floor(buff);
1560 seqs->wght[position] += 1;
1561 }
1562 }
1563 else {//bootstrap codons
1564 int ncodons = (seqs->clean_len/3);
1565 int l3 = 3*ncodons;
1566 for(j = 0; j < l3; j += 3) {
1567 /*buff = rand();
1568 buff /= (RAND_MAX+1.);*/
1569 buff = next_random();
1570 buff *= ncodons;
1571 position = (int)floor(buff);
1572 seqs->wght[3 * position] += 1;//only 0, 3, 6,... have weights
1573 }
1574 }
1575 }
1576
1577
Obs_Dist(allseq * data,model * mod)1578 matrix *Obs_Dist(allseq *data, model *mod)
1579 {
1580 int site,i,j,k;
1581 phydbl unc_len;
1582 matrix *mat;
1583 phydbl **len;
1584
1585
1586 len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
1587 For(i,data->n_otu)
1588 len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
1589
1590 unc_len = .0;
1591
1592 mat = Make_Mat(data->n_otu);
1593 Init_Mat(mat,data);
1594
1595 Fors(site,data->c_seq[0]->len,mod->stepsize)
1596 {
1597 if (tree_build_interrupted) break;
1598 For(j,data->n_otu-1)
1599 {
1600 for(k=j+1;k<data->n_otu;k++)
1601 {
1602 if((!Is_Ambigu(data->c_seq[j]->state+site,mod->datatype,mod->stepsize)) &&
1603 (!Is_Ambigu(data->c_seq[k]->state+site,mod->datatype,mod->stepsize)))
1604 {
1605 len[j][k]+=data->wght[site];
1606 len[k][j]=len[j][k];
1607 if(strncmp(data->c_seq[j]->state+site,
1608 data->c_seq[k]->state+site,
1609 mod->stepsize))
1610 mat->P[j][k]+=data->wght[site];
1611 }
1612 }
1613 }
1614 }
1615
1616
1617 For(i,data->n_otu-1)
1618 for(j=i+1;j<data->n_otu;j++)
1619 {
1620 if(len[i][j])
1621 {
1622 mat->P[i][j] /= len[i][j];
1623 }
1624 else
1625 {
1626 mat->P[i][j] = 1.;
1627 }
1628
1629 mat->P[j][i] = mat->P[i][j];
1630
1631 mat->dist[i][j] = mat->P[i][j];
1632
1633 mat->dist[j][i] = mat->dist[i][j];
1634 }
1635
1636 For(i,data->n_otu) free(len[i]);
1637 free(len);
1638
1639 return mat;
1640 }
1641
1642
Kimura_p_Dist(allseq * data)1643 matrix *Kimura_p_Dist(allseq *data)
1644 {
1645 int i, j;
1646 model jcmodel;
1647 jcmodel.stepsize = 1;
1648 jcmodel.datatype = 1;
1649 jcmodel.ns = 20;
1650 matrix *mat = Obs_Dist(data, &jcmodel);
1651 for(i = 0; i < data->n_otu; i++) {
1652 if (tree_build_interrupted) break;
1653 for(j = i + 1; j < data->n_otu; j++) {
1654 phydbl x = 1 - mat->dist[i][j] - 0.2 * mat->dist[i][j] * mat->dist[i][j];
1655 if(x <= 0) {
1656 Free_Mat(mat);
1657 return NULL;
1658 }
1659 mat->dist[i][j] = -log(x);
1660 mat->dist[j][i] = mat->dist[i][j];
1661 }
1662 }
1663 return mat;
1664 }
1665
1666
alrt_reformat(char * tree)1667 char *alrt_reformat(char *tree)
1668 //set aLRT support values to 2 decimal digits
1669 {
1670 char *p, *q, tmp[20];
1671 float alrt;
1672 int l;
1673 p = tree;
1674 while((p = strchr(p, ')')) != NULL) {
1675 q = p++;
1676 while(TRUE) {
1677 q++;
1678 if(*q == 0 ) break;
1679 if(strchr("0123456789.-+ ", *q) == NULL) break;
1680 }
1681 if(*q != ':' ) continue;
1682 sscanf(p, "%f", &alrt);
1683 sprintf(tmp, "%.2f", alrt);
1684 l = strlen(tmp);
1685 if(l < q - p) { memset(p, ' ', q - p); memcpy(p, tmp, l); }
1686 }
1687 return tree;
1688 }
1689
bootstrap_reformat(char * tree,int replicates)1690 char *bootstrap_reformat(char *tree, int replicates)
1691 //replace absolute bootstrap values by percentages
1692 {
1693 char *p, *q, *newtree, *pnew, *next;
1694 int count;
1695 newtree = (char *)malloc(500000);
1696 p = tree; pnew = newtree; next = tree;
1697 while((p = strchr(p, ')')) != NULL) {
1698 q = p++;
1699 while(TRUE) {
1700 q++;
1701 if(*q == 0 ) break;
1702 if(strchr("0123456789 ", *q) == NULL) break;
1703 }
1704 if(*q != ':' ) continue;
1705 memcpy(pnew, next, p - next);
1706 pnew += p - next;
1707 next = p;
1708 sscanf(p, "%d", &count);
1709 sprintf(pnew, "%d", (int)(100*count/float(replicates) + .5));
1710 pnew += strlen(pnew);
1711 next = q;
1712 }
1713 memcpy(pnew, next, strlen(next));
1714 pnew += strlen(next);
1715 *pnew = 0;
1716 free(tree);
1717 return newtree;
1718 }
1719
K80_dist_nogamma(allseq * data)1720 matrix *K80_dist_nogamma(allseq *data)
1721 {
1722 int i,j,k;
1723 phydbl unc_len;
1724 matrix *mat;
1725 phydbl **len;
1726
1727 len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
1728 For(i,data->n_otu)
1729 len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
1730
1731 unc_len = .0;
1732
1733 mat = Make_Mat(data->n_otu);
1734 Init_Mat(mat,data);
1735
1736
1737 For(i,data->c_seq[0]->len)
1738 {
1739 if (tree_build_interrupted) break;
1740 For(j,data->n_otu-1)
1741 {
1742 for(k=j+1;k<data->n_otu;k++)
1743 {
1744 if(((data->c_seq[j]->state[i] == 'A' || data->c_seq[j]->state[i] == 'G') &&
1745 (data->c_seq[k]->state[i] == 'C' || data->c_seq[k]->state[i] == 'T'))||
1746 ((data->c_seq[j]->state[i] == 'C' || data->c_seq[j]->state[i] == 'T') &&
1747 (data->c_seq[k]->state[i] == 'A' || data->c_seq[k]->state[i] == 'G')))
1748 {
1749 mat->Q[j][k]+=data->wght[i];
1750 len[j][k]+=data->wght[i];
1751 len[k][j]=len[j][k];
1752 }
1753
1754 else
1755 if(((data->c_seq[j]->state[i] == 'A' && data->c_seq[k]->state[i] == 'G') ||
1756 (data->c_seq[j]->state[i] == 'G' && data->c_seq[k]->state[i] == 'A'))||
1757 ((data->c_seq[j]->state[i] == 'C' && data->c_seq[k]->state[i] == 'T') ||
1758 (data->c_seq[j]->state[i] == 'T' && data->c_seq[k]->state[i] == 'C')))
1759 {
1760 mat->P[j][k]+=data->wght[i];
1761 len[j][k]+=data->wght[i];
1762 len[k][j]=len[j][k];
1763 }
1764 else
1765 if((data->c_seq[j]->state[i] == 'A' ||
1766 data->c_seq[j]->state[i] == 'C' ||
1767 data->c_seq[j]->state[i] == 'G' ||
1768 data->c_seq[j]->state[i] == 'T')&&
1769 (data->c_seq[k]->state[i] == 'A' ||
1770 data->c_seq[k]->state[i] == 'C' ||
1771 data->c_seq[k]->state[i] == 'G' ||
1772 data->c_seq[k]->state[i] == 'T'))
1773 {
1774 len[j][k]+=data->wght[i];
1775 len[k][j]=len[j][k];
1776 }
1777 }
1778 }
1779 }
1780
1781
1782 For(i,data->n_otu-1)
1783 for(j=i+1;j<data->n_otu;j++)
1784 {
1785 if(len[i][j])
1786 {
1787 mat->P[i][j] /= len[i][j];
1788 mat->Q[i][j] /= len[i][j];
1789 }
1790 else
1791 {
1792 mat->P[i][j] = .5;
1793 mat->Q[i][j] = .5;
1794 }
1795
1796 mat->P[j][i] = mat->P[i][j];
1797 mat->Q[j][i] = mat->Q[i][j];
1798
1799
1800 if((1-2*mat->P[i][j]-mat->Q[i][j] <= .0) || (1-2*mat->Q[i][j] <= .0))
1801 {
1802 Free_Mat(mat);
1803 mat = NULL;
1804 goto out;
1805 }
1806
1807 /* mat->dist[i][j] = (g_shape/2)*
1808 (pow(1-2*mat->P[i][j]-mat->Q[i][j],-1./g_shape) +
1809 0.5*pow(1-2*mat->Q[i][j],-1./g_shape) - 1.5); */
1810
1811 mat->dist[i][j] = -0.5 * log(1-2*mat->P[i][j]-mat->Q[i][j]) - 0.25 * log(1-2*mat->Q[i][j]);
1812 mat->dist[j][i] = mat->dist[i][j];
1813 }
1814
1815 out:For(i,data->n_otu) free(len[i]);
1816 free(len);
1817 return mat;
1818 }
1819
HKY_dist(allseq * data)1820 matrix *HKY_dist(allseq *data)
1821 {
1822 int i,j;
1823 matrix *mat;
1824 double x;
1825
1826 mat = Make_Mat(data->n_otu);
1827 Init_Mat(mat,data);
1828 For(i,data->n_otu - 1)
1829 {
1830 for(j = i + 1; j < data->n_otu; j++)
1831 {
1832 if (tree_build_interrupted) return mat;
1833 x = hky(data->c_seq[i]->state, data->c_seq[j]->state, data->clean_len, data->wght);
1834 if(x == -1) {
1835 Free_Mat(mat);
1836 return NULL;
1837 }
1838 mat->dist[i][j] = x;
1839 mat->dist[j][i] = mat->dist[i][j];
1840 }
1841 }
1842 return mat;
1843 }
1844
1845
1846 /* Distance for Hasegawa, Kishino and Yano model */
hky(char * seq1,char * seq2,int lg,int * wght)1847 double hky(char* seq1, char* seq2, int lg, int *wght)
1848 {
1849 double d, freq[16], a, c, g, t, r, y, P, P1, P2, Q, A1, A2, A3, gamma, cc, ee,
1850 va1, va2, cova1a2, cova1a3, cova2a3, delta, epsilon, ksi, eta, nu, ff;
1851 double larg1, larg2, larg3;
1852
1853 freq_obs(seq1, seq2, freq, lg, wght);
1854
1855 P1=freq[2]+freq[8];
1856 P2=freq[7]+freq[13];
1857 P=P1+P2;
1858 Q=freq[1]+freq[3]+freq[4]+freq[6]+freq[9]+freq[11]+freq[12]+freq[14];
1859
1860 if(P+Q == 0) return 0;
1861
1862 a=freq[0]+(freq[1]+freq[2]+freq[3]+freq[4]+freq[8]+freq[12])/2;
1863 c=freq[5]+(freq[1]+freq[4]+freq[6]+freq[7]+freq[9]+freq[13])/2;
1864 g=freq[10]+(freq[2]+freq[6]+freq[8]+freq[9]+freq[11]+freq[14])/2;
1865 t=1.-a-c-g;
1866 r=a+g;
1867 y=c+t;
1868
1869 larg1=1-Q/(2*r*y);
1870 larg2=1-Q/(2*r)-(r*P1)/(2*a*g);
1871 larg3=1-Q/(2*y)-(y*P2)/(2*c*t);
1872
1873 if(larg1<=0. || larg2<=0. || larg3<=0.)
1874 return -1.;
1875
1876 A1=(y/r)*log(larg1)-log(larg2)/r;
1877 A2=(r/y)*log(larg1)-log(larg3)/y;
1878 A3=-log(larg1);
1879
1880 cc=1-Q/(2*r*y);
1881 ee=1-(r*P1)/(2*a*g)-Q/(2*r);
1882 ff=1-(y*P2)/(2*c*t)-Q/(2*y);
1883
1884 delta=1/(2*ee*r*r)-1/(2*cc*r*r);
1885 epsilon=1/(2*ee*a*g);
1886 ksi=1/(2*y*y*ff)-1/(2*y*y*cc);
1887 eta=1/(2*c*t*ff);
1888 nu=1/(2*r*y*cc);
1889
1890 va1=((delta*delta*Q+epsilon*epsilon*P1)-(delta*Q+epsilon*P1)*(delta*Q+epsilon*P1))/lg;
1891 va2=((ksi *ksi *Q+eta *eta *P2)-(ksi *Q+eta *P2)*(ksi *Q+eta *P2))/lg;
1892 cova1a2=(delta*ksi*Q*(1-Q)-delta*eta*Q*P2-epsilon*eta*P1*P2)/lg;
1893 cova1a3=nu*Q*(delta*(1-Q)-epsilon*P1)/lg;
1894 cova2a3=nu*Q*(ksi *(1-Q)-eta *P2)/lg;
1895
1896 gamma=(va2-cova1a2)/(va1+va2-2*cova1a2) + ((r*y)/(a*g+c*t)) * ((cova1a3-cova2a3)/(va1+va2-2*cova1a2));
1897
1898 d=2*(a*g+c*t)*(gamma*A1+(1-gamma)*A2)+2*r*y*A3;
1899
1900 return(d);
1901 }
1902
1903
1904 /* freq_obs */
1905 /* Write at address freq observed frequencies of 16 di-nucleotides XY */
1906 /* (X= A,C,G,T , Y=A,C,G,T) X and Y being homologous nucleotides of sequences */
1907 /* seq1 and seq2. Alphabetic order is used : freq[0]=AA frequency, freq[1]=AC, */
1908 /* ..., freq[15]=TT. */
freq_obs(char * seq1,char * seq2,double * freq,int lgseq,int * wght)1909 int freq_obs(char* seq1, char* seq2, double* freq, int lgseq, int *wght)
1910 {
1911 int i, lgseqvrai, w;
1912
1913 for(i=0;i<16;i++) freq[i]=0;
1914 lgseqvrai = lgseq;
1915 for(i=0;i<lgseq;i++){
1916 if( (w = wght[i]) == 0) continue;
1917 switch(seq1[i]){
1918 case 'A':
1919 switch(seq2[i]){
1920 case 'A' : freq[0] += w; break;
1921 case 'C' : freq[1] += w; break;
1922 case 'G' : freq[2] += w; break;
1923 case 'T' : freq[3] += w; break;
1924 default : lgseqvrai -= w; break;
1925 }
1926 break;
1927 case 'C':
1928 switch(seq2[i]){
1929 case 'A' : freq[4] += w; break;
1930 case 'C' : freq[5] += w; break;
1931 case 'G' : freq[6] += w; break;
1932 case 'T' : freq[7] += w; break;
1933 default : lgseqvrai -= w; break;
1934 }
1935 break;
1936 case 'G':
1937 switch(seq2[i]){
1938 case 'A' : freq[8] += w; break;
1939 case 'C' : freq[9] += w; break;
1940 case 'G' : freq[10] += w; break;
1941 case 'T' : freq[11] += w; break;
1942 default : lgseqvrai -= w; break;
1943 }
1944 break;
1945 case 'T':
1946 switch(seq2[i]){
1947 case 'A' : freq[12] += w; break;
1948 case 'C' : freq[13] += w; break;
1949 case 'G' : freq[14] += w; break;
1950 case 'T' : freq[15] += w; break;
1951 default : lgseqvrai -= w; break;
1952 }
1953 break;
1954 default :
1955 lgseqvrai -= w;
1956 }
1957 }
1958 if(lgseqvrai != 0){
1959 for(i=0;i<16;i++) freq[i] /= lgseqvrai;
1960 return 1;
1961 }
1962 else return 0;
1963 }
1964
informative(char ** sequ,int nb)1965 int informative(char** sequ, int nb)
1966 {
1967
1968 int i, j, k, info=0, siteok=0;
1969 char firstpair;
1970
1971
1972 for(i=0;sequ[0][i];i++){
1973 firstpair=0;
1974 siteok=0;
1975 for(j=0;j<nb-1;j++){
1976 for(k=j+1;k<nb;k++){
1977 if(sequ[j][i]==sequ[k][i]){
1978 if(firstpair){
1979 if(sequ[j][i]!=firstpair) { info++; siteok=1; break; }
1980 }
1981 else
1982 firstpair=sequ[j][i];
1983 }
1984 }
1985 if (siteok) break;
1986 }
1987 }
1988
1989 return info;
1990 }
1991
1992
1993 /*void jumble_fct(char** seq, char** name, int notu)
1994 {
1995 int i, j, rando;
1996 double buff;
1997 char *tmp;
1998 //initialize_random_series();
1999 for(i = 0; i<notu - 1; i++){
2000 j = notu - i - 1;
2001 //buff = rand();
2002 //buff /= (RAND_MAX+1.);
2003 buff = next_random();
2004 buff *= j + 1;
2005 rando = (int)floor(buff);
2006 if(j != rando) {
2007 tmp = seq[j],
2008 seq[j] = seq[rando];
2009 seq[rando] = tmp;
2010 tmp = name[j],
2011 name[j] = name[rando];
2012 name[rando] = tmp;
2013 }
2014 }
2015 }*/
2016
add_equally_best(char * tree,char * all_best,int count)2017 char *add_equally_best(char *tree, char *all_best, int count)
2018 {
2019 char *p, *q, *r, *final_tree;
2020 final_tree = (char*)malloc(strlen(tree) + strlen(all_best)+ count * 30);
2021 strcpy(final_tree, tree);
2022 q = final_tree + strlen(final_tree);
2023 *q++ = '\n';
2024 p = all_best;
2025 if (count > 1) {
2026 int rank = 0;
2027 while (count-- > 0) {
2028 r = strchr(p, '\n');
2029 sprintf(q, "[equally best tree #%d]", ++rank);
2030 q += strlen(q);
2031 memcpy(q, p, r - p + 1);
2032 q += r-p+1;
2033 p = r+1;
2034 }
2035 }
2036 *q = 0;
2037 free(tree);
2038 free(all_best);
2039 return final_tree;
2040 }
2041
run_parsimony_method(SEA_VIEW * view,int nogaps,const char * gapsunknown,int nreplicates,int njumbles,int maxbest,int user_tree,Fl_Box * w_count,dnapars_S_option s_option,const char ** pprogname,float consensus_threshold,int no_gui,char ** p_bootstrap_trees,void (* alert)(const char *,...))2042 static char *run_parsimony_method(SEA_VIEW *view, int nogaps, const char *gapsunknown, int nreplicates, int njumbles,
2043 int maxbest, int user_tree, Fl_Box *w_count, dnapars_S_option s_option,
2044 const char **pprogname, float consensus_threshold, int no_gui, char **p_bootstrap_trees, void (*alert)(const char *,...) )
2045 {
2046 char *tree = 0, *final_tree = 0, *all_best;
2047 int i, steps, info, count, replicates_done, count_best;
2048 const char *p;
2049 allseq *tmpallseqs = view_to_allseq(view, nogaps);
2050 if(*gapsunknown) gaps_as_unknown_states(tmpallseqs, view->protein);
2051 char **pars_seqs = (char **)malloc(tmpallseqs->n_otu * sizeof(char *));
2052 char **pars_names = (char **)malloc(tmpallseqs->n_otu * sizeof(char *));
2053 for( i = 0; i < tmpallseqs->n_otu; i++) {
2054 pars_seqs[i] = tmpallseqs->c_seq[i]->state;
2055 pars_names[i] = tmpallseqs->c_seq[i]->name;
2056 }
2057 info = informative(pars_seqs, tmpallseqs->n_otu);
2058 if(view->protein) *pprogname = "Protpars";
2059 else {
2060 *pprogname = "Dnapars";
2061 if (tmpallseqs->clean_len >= SHRT_MAX) {
2062 alert("Can't compute parsimony with DNA seqs longer than %d.", SHRT_MAX);
2063 goto way_out;
2064 }
2065 }
2066 if (nreplicates) {
2067 tree = parsimony_with_bootstrap(tmpallseqs, pars_seqs, pars_names, tmpallseqs->n_otu, njumbles,
2068 view->protein, &steps, maxbest, nreplicates, w_count, &replicates_done, s_option, no_gui, consensus_threshold, &count_best,
2069 p_bootstrap_trees);
2070 if (tree) {
2071 final_tree = (char *)malloc(strlen(tree) + 200);
2072 char best[35] = "";
2073 if (count_best > 1) sprintf(best, "consensus of %d best trees, ", count_best);
2074 sprintf(final_tree, "[%s, bootstrap with %d replic., %d steps, %s%d sites (%d informative)%s]%s",
2075 *pprogname, replicates_done, steps, best, tmpallseqs->clean_len, info, gapsunknown, tree);
2076 }
2077 }
2078 else if(user_tree) {
2079 char *differs, *input_tree;
2080 p = view->trees[user_tree - 1];
2081 while(*p == ' ') p++;
2082 if(*p == '[') p = strchr(p, ']')+ 1;
2083 while(*p == ' ') p++;
2084 input_tree = strdup(p);
2085 i = compare_newick_with_names(input_tree, pars_names, tmpallseqs->n_otu, &differs);
2086 if (i != 0) {
2087 alert("Names in user-tree and target sequences differ:\n%s in %s only",
2088 differs, i == 1 ? "user-tree" : "target sequences");
2089 tree = NULL;
2090 }
2091 else {
2092 input_tree = (char *)realloc(input_tree, strlen(input_tree) + 4 * tmpallseqs->n_otu + 6 );
2093 if( make_binary_or_unrooted(input_tree) != NULL) return NULL;
2094 strcat(input_tree, ";");
2095 tree = parsimony(pars_seqs, pars_names, tmpallseqs->n_otu, 0, &steps, input_tree, maxbest, NULL, view->protein, s_option, no_gui, NULL);
2096 free(input_tree);
2097 if(tree == NULL) {
2098 alert("Error in user-tree");
2099 }
2100 else {
2101 final_tree = (char *)malloc(strlen(tree) + 150);
2102 sprintf(final_tree, "[%s, user-tree, %d steps, %d sites (%d informative)%s]%s",
2103 *pprogname, steps, tmpallseqs->clean_len, info, gapsunknown, tree);
2104 }
2105 }
2106 }
2107 else {
2108 tree = parsimony_with_jumble(pars_seqs, pars_names, tmpallseqs->n_otu, njumbles, view->protein,
2109 &steps, &count, maxbest, w_count, s_option, no_gui, consensus_threshold);
2110 if (count > 1) { // memorize all equally best trees in all_best string
2111 char *q = strchr(tree, '\n');
2112 all_best = strdup(q + 1);
2113 *q = 0;
2114 }
2115 if (tree && count > 1 && !view->protein) { /* with DNA, fit branch lengths to consensus of most parsimonious trees */
2116 char *input_tree = tree;
2117 int steps2; /* don't use the steps of this consensus tree because it can be larger than parsimonious steps */
2118 tree = parsimony(pars_seqs, pars_names, tmpallseqs->n_otu, 0, &steps2, input_tree, maxbest, NULL, false, s_option, no_gui, NULL);
2119 free(input_tree);
2120 }
2121 if (tree) {
2122 char aux[30];
2123 final_tree = (char *)malloc(strlen(tree) + 150);
2124 if (count > 1) sprintf(aux, " (%d%% consensus)", int(consensus_threshold*100 + 0.5));
2125 else *aux = 0;
2126 sprintf(final_tree, "[%s, %d best trees%s, %d steps, %d sites (%d informative)%s]%s",
2127 *pprogname, count, aux, steps, tmpallseqs->clean_len, info, gapsunknown, tree);
2128 if (count > 1) final_tree = add_equally_best(final_tree, all_best, count);
2129 }
2130 }
2131 way_out:
2132 if (tree) free(tree);
2133 free_after_view_to_allseq(tmpallseqs);
2134 free(pars_seqs);
2135 free(pars_names);
2136 return final_tree;
2137 }
2138
user_tree_cb(Fl_Widget * wid,void * data)2139 static void user_tree_cb(Fl_Widget *wid, void *data)
2140 {
2141 Fl_Button *b = (Fl_Button*)data;
2142 b->value(0);
2143 b->do_callback();
2144 }
2145
parsimony_dialog(SEA_VIEW * view)2146 void parsimony_dialog(SEA_VIEW *view)
2147 {
2148 int njumbles, i, count, maxbest = 100, started = 0, use_bootstrap,
2149 nreplicates, user_tree, nogaps;
2150 char *final_tree, *bootstrap_trees = NULL;
2151 const char *p, *progname;
2152 dnapars_S_option s_option = more_thorough;
2153
2154 tree_build_interrupted = 0;
2155 return_window *w = new return_window(255, view->protein ? 225: 300, "Parsimony analysis");
2156 w->set_modal();
2157 Fl_Check_Button *w_dojumble = new Fl_Check_Button(2, 5, 170, 20, "Randomize seq. order");
2158 w_dojumble->value(0);
2159 Fl_Int_Input *w_jumbles = new Fl_Int_Input(w_dojumble->x() + w_dojumble->w() + 5, w_dojumble->y(), 30, 20, "times");
2160 w_jumbles->align(FL_ALIGN_RIGHT);
2161 w_jumbles->value("5");
2162 Fl_Check_Button *w_nogaps = new Fl_Check_Button(2, w_dojumble->y() + w_dojumble->h() + 5, 160, 20, "Ignore all gap sites");
2163 w_nogaps->value(1);
2164 Fl_Check_Button *w_gapsunknown = new Fl_Check_Button(2, w_nogaps->y() + w_nogaps->h() + 5, 180, 20,
2165 "Gaps as unknown states");
2166 w_gapsunknown->value(0);
2167 w_gapsunknown->deactivate();
2168 Fl_Widget *witness = w_gapsunknown;
2169 Fl_Round_Button *thorough_b, *less_thorough_b;
2170 Fl_Int_Input *w_best;
2171 if (!view->protein) {
2172 Fl_Group *g = new Fl_Group(2, witness->y() + witness->h() + 5, w->w()-4, 70);
2173 g->box(FL_DOWN_FRAME);
2174 thorough_b = new Fl_Round_Button(7, witness->y() + witness->h() + 5, 180, 20, "More thorough tree search");
2175 thorough_b->type(FL_RADIO_BUTTON);
2176 less_thorough_b = new Fl_Round_Button(7, thorough_b->y() + thorough_b->h() + 5, 180, 20, "Less thorough tree search");
2177 less_thorough_b->type(FL_RADIO_BUTTON);
2178 Fl_Round_Button *rearrange_best_b = new Fl_Round_Button(7, less_thorough_b->y() + less_thorough_b->h() + 5, 180, 20, "Rearrange on best tree");
2179 rearrange_best_b->type(FL_RADIO_BUTTON);
2180 rearrange_best_b->activate();
2181 thorough_b->setonly();
2182 g->end();
2183 w_best = new Fl_Int_Input(195, rearrange_best_b->y() + rearrange_best_b->h() + 5, 50, 20, "Equally best trees retained");
2184 w_best->align(FL_ALIGN_LEFT);
2185 w_best->value("10000");
2186 witness = w_best;
2187 }
2188 Fl_Value_Slider* slider = new Fl_Value_Slider(2, witness->y() + witness->h() + 25, w->w()-4, 20, "% level for consensus tree building");
2189 slider->align(FL_ALIGN_TOP);
2190 slider->type(FL_HOR_NICE_SLIDER);
2191 slider->box(FL_FLAT_BOX);
2192 slider->color(FL_YELLOW);
2193 slider->selection_color(FL_RED);
2194 slider->bounds(50, 100);
2195 slider->value(100);
2196 slider->precision(0);
2197 Fl_Round_Button *w_dobootstrap = new Fl_Round_Button(2, slider->y() + slider->h() + 10, 115, 20, "Bootstrap with");
2198 w_dobootstrap->type(FL_RADIO_BUTTON);
2199 Fl_Int_Input *w_btrepl = new Fl_Int_Input(w_dobootstrap->x() + w_dobootstrap->w() + 5, w_dobootstrap->y() ,
2200 40, 20, "replicates");
2201 w_btrepl->value("100");
2202 w_btrepl->align(FL_ALIGN_RIGHT);
2203 Fl_Check_Button *keep_b_trees = new Fl_Check_Button(2, w_dobootstrap->y() + w_dobootstrap->h() + 5, 180, 20, "Show bootstrap trees");
2204 keep_b_trees->deactivate();
2205 static Fl_Check_Button *buttons[2] = {keep_b_trees, NULL};
2206 w_dobootstrap->callback(dist_b_cb, buttons);
2207 w_dobootstrap->when(FL_WHEN_CHANGED);
2208
2209 Fl_Box *w_count = new Fl_Box(0, keep_b_trees->y() + keep_b_trees->h() + 5, w->w(), 20, NULL);
2210
2211 Fl_Round_Button *w_user = new Fl_Round_Button(2, w_count->y() + w_count->h() + 5, 85, 20, "User tree:");
2212 w_user->type(FL_RADIO_BUTTON);
2213 w_user->callback(user_tree_cb, w_dobootstrap);
2214 Fl_Choice *w_choice = new Fl_Choice(w_user->x() + w_user->w() + 5, w_user->y(), 130, 20, "");
2215 count = view->menu_trees->vlength();
2216 for( i = 0; i < count; i++) w_choice->add( view->menu_trees->vitem(i)->label() );
2217 w_choice->value(0);
2218 if (!count) {w_user->deactivate(); w_choice->deactivate(); }
2219
2220 Fl_Button *w_interrupt = new Fl_Button(w_dobootstrap->x(), w_user->y() + w_user->h() + 5, 60, 20, "Cancel");
2221 Fl_Return_Button *w_ok = new Fl_Return_Button(w->w() - 65, w_interrupt->y(), 60, 20, "OK");
2222 w->run(w_ok);
2223 w_interrupt->callback(interrupt_callback, &tree_build_interrupted);
2224 w_ok->callback(interrupt_callback, &started);
2225 w->end();
2226 w->callback(interrupt_callback, &tree_build_interrupted);
2227 w->size(w->w(), w_ok->y() + w_ok->h() + 5);
2228 w->show();
2229 while(!started && !tree_build_interrupted) {
2230 Fl::wait();
2231 if(w_gapsunknown->value()) { w_nogaps->value(0); w_nogaps->deactivate(); }
2232 else w_nogaps->activate();
2233 if(w_nogaps->value()) { w_gapsunknown->value(0); w_gapsunknown->deactivate(); }
2234 else w_gapsunknown->activate();
2235 }
2236 if (tree_build_interrupted) {
2237 delete w;
2238 return;
2239 }
2240 w_ok->hide();
2241 w_interrupt->label("Interrupt");
2242 if(w_dojumble->value()) {
2243 p = w_jumbles->value();
2244 sscanf(p, "%d", &njumbles);
2245 if(njumbles < 1) njumbles = 1;
2246 }
2247 else njumbles = 0;
2248 use_bootstrap = w_dobootstrap->value();
2249 user_tree = w_user->value();
2250 nogaps = w_nogaps->value();
2251 char gapsunknown[25];
2252 if(w_gapsunknown->value())
2253 sprintf(gapsunknown, ", gaps treated as %c", (view->protein ? 'X' : 'N') );
2254 else gapsunknown[0] = 0;
2255 if(use_bootstrap) {
2256 p = w_btrepl->value();
2257 sscanf(p, "%d", &nreplicates);
2258 }
2259 else {
2260 if(user_tree) user_tree = w_choice->value() + 1;
2261 }
2262 if (!view->protein) {
2263 if (thorough_b->value()) s_option = more_thorough;
2264 else if (less_thorough_b->value()) s_option = less_thorough;
2265 else s_option = rearrange_best_tree;
2266 p = w_best->value();
2267 sscanf(p, "%d", &maxbest);
2268 }
2269 my_watch_cursor(w);
2270
2271 final_tree = run_parsimony_method(view, nogaps, gapsunknown,
2272 use_bootstrap && nreplicates > 1 ? nreplicates : 0, njumbles,
2273 maxbest, user_tree, w_count, s_option,
2274 &progname, slider->value()/100, false, keep_b_trees->value() ? &bootstrap_trees : NULL, fl_alert);
2275
2276 fl_reset_cursor(w);
2277 delete w;
2278 if (final_tree != NULL) {
2279 const char *n = extract_filename(view->masename);
2280 char *title = NULL;
2281 if (n) {
2282 char *q = (char*)strchr(n, '.');
2283 if (q) *q = 0;
2284 title = new char[strlen(n) + strlen(progname) + 2];
2285 sprintf(title, "%s-%s", n, progname);
2286 if (q) *q = '.';
2287 }
2288 int tree_count = 0;
2289 p = final_tree;
2290 while (TRUE) {
2291 p = strchr(p+1, '\n');
2292 if (!p) break;
2293 tree_count++;
2294 }
2295 if (tree_count == 0) tree_count = 1;
2296 Fl_Window *btreew = NULL;
2297 if (bootstrap_trees) btreew = treedraw(bootstrap_trees, view, "bootstrap trees", FALSE);
2298 Fl_Window *treew = treedraw(final_tree, view, n ? title:progname, FALSE, tree_count);
2299 #if !defined(__APPLE__)
2300 if (btreew) treew->position(btreew->x() + 30, btreew->y() + 40);
2301 #endif
2302 delete[] title;
2303 }
2304 }
2305
2306
parsimony_with_jumble(char ** pars_seqs,char ** pars_names,int n_otu,int njumbles,int protein,int * psteps,int * pcount,int maxbest,Fl_Widget * w_count,dnapars_S_option s_option,int no_gui,float consensus_threshold)2307 char *parsimony_with_jumble(char **pars_seqs, char **pars_names, int n_otu, int njumbles, int protein, int *psteps,
2308 int *pcount, int maxbest, Fl_Widget *w_count, dnapars_S_option s_option, int no_gui, float consensus_threshold)
2309 {
2310 int i, steps, count = 0;
2311 char *tree, *newtree, *p, *q;
2312 if (njumbles > 0) {
2313 if (w_count) w_count->label("First jumble attempt");
2314 }
2315 tree = parsimony(pars_seqs, pars_names, n_otu, njumbles, &steps, NULL, maxbest, NULL, protein, s_option, no_gui, w_count);
2316 if (!tree) return NULL;
2317 p = tree - 1;
2318 while((p = strchr(p+1, '\n')) != NULL) count++;//count equally parsimonious trees
2319 if(count > 1) {//compute threshold consensus of all equally parsimonious trees
2320 p = strchr(tree, '\n');
2321 newtree = (char *)malloc(p - tree + 1);
2322 memcpy(newtree, tree, p - tree); newtree[p - tree] = 0;
2323 p++; i = count; count = 0;
2324 init_consensus_calculation(n_otu);
2325 while(TRUE) {
2326 count += process_one_consensus_item(newtree, pars_names);
2327 free(newtree);
2328 i--;
2329 if(i == 0) break;
2330 q = strchr(p, '\n');
2331 newtree = (char *)malloc(q - p + 1);
2332 memcpy(newtree, p, q - p); newtree[q - p] = 0;
2333 p = q + 1;
2334 }
2335 int **rows;
2336 int br_count = after_consensus_items(consensus_threshold, count, &rows);
2337 char *consensus_tree = compute_consensus_tree(pars_names, rows, br_count);
2338 for (i = 0; i < br_count; i++) free(rows[i]);
2339 free(rows);
2340 char *newtree = (char*)malloc(strlen(consensus_tree) + strlen(tree) + 2);
2341 sprintf(newtree, "%s\n%s", consensus_tree, tree);
2342 free(consensus_tree);
2343 free(tree);
2344 tree = newtree;
2345 }
2346 *psteps = steps;
2347 *pcount = count;
2348 return tree;
2349 }
2350
2351
parsimony_with_bootstrap(allseq * tmpallseqs,char ** pars_seqs,char ** pars_names,int n_otu,int njumbles,int protein,int * psteps,int maxbest,int replicates,Fl_Box * w_count,int * preplicates_done,dnapars_S_option s_option,int no_gui,float consensus_threshold,int * pcount_best,char ** p_bootstrap_trees)2352 static char *parsimony_with_bootstrap(allseq *tmpallseqs, char **pars_seqs, char **pars_names, int n_otu, int njumbles, int protein, int *psteps,
2353 int maxbest, int replicates, Fl_Box *w_count, int *preplicates_done, dnapars_S_option s_option, int no_gui,
2354 float consensus_threshold, int *pcount_best, char **p_bootstrap_trees)
2355 {
2356 int i, j, steps, newsteps, count = 0, replicates_done = 0, repl_count, lbtrees = 0;
2357 char *tree, *newtree, *p, *q, *all_best, *bootstrap_trees = NULL;
2358
2359 if (w_count) w_count->label("Full sequence tree");
2360 tree = parsimony_with_jumble(pars_seqs, pars_names, n_otu, njumbles, protein,
2361 &steps, &count, maxbest, NULL, s_option, no_gui, consensus_threshold);
2362 if (!tree) return NULL;
2363 if (count > 1) { // memorize all equally best trees in all_best string
2364 p = strchr(tree, '\n');
2365 all_best = strdup(p + 1);
2366 *p = 0;
2367 }
2368 if (count > 1 && !protein) { /* with DNA, fit branch lengths to strict consensus of most parsimonious trees */
2369 char *input_tree = tree;
2370 /* don't use the steps of this consensus tree because it can be larger than parsimonious steps */
2371 tree = parsimony(pars_seqs, pars_names, n_otu, njumbles, &newsteps, input_tree, maxbest, NULL, false, s_option, no_gui, NULL);
2372 free(input_tree);
2373 }
2374 *psteps = steps;
2375 init_add_bootstrap(tree); // memorize topology and br lengths of strict consensus of most parsimonious trees
2376 for(j = 0; j < replicates; j++) {
2377 if (w_count) {
2378 char tmp[30];
2379 sprintf(tmp, "Bootstrap replicate: %d", j + 1);
2380 w_count->label(tmp);
2381 }
2382 if (tree_build_interrupted) break;
2383 bootstrap_weights(tmpallseqs, FALSE);
2384 tree = parsimony(pars_seqs, pars_names, n_otu, njumbles, &steps, NULL, maxbest, tmpallseqs->wght, protein, s_option, no_gui, NULL);
2385 if (!tree) break;
2386 p = tree - 1; repl_count = 0;
2387 while((p = strchr(p+1, '\n')) != NULL) repl_count++;//count equally parsimonious trees
2388 i = repl_count;
2389 while (TRUE) {
2390 p = strchr(tree, '\n');
2391 newtree = (char *)malloc(p - tree + 1);
2392 memcpy(newtree, tree, p - tree); newtree[p - tree] = 0;
2393 process_one_replicate(newtree, 1./repl_count);
2394 if (p_bootstrap_trees) { // memorize all bootstrap trees
2395 if (lbtrees == 0) {
2396 lbtrees = 10000;
2397 bootstrap_trees = (char *)malloc(lbtrees + 1);
2398 q = bootstrap_trees;
2399 *q = 0;
2400 }
2401 int l = strrchr(newtree, ')') - newtree + 1; // length of (....)
2402 if ((q-bootstrap_trees) + l + 8 > lbtrees) {
2403 lbtrees = (q-bootstrap_trees) + l + 8 + 10000;
2404 char *tmpc = (char*)realloc(bootstrap_trees, lbtrees + 1);
2405 q = tmpc + (q - bootstrap_trees);
2406 bootstrap_trees = tmpc;
2407 }
2408 sprintf(q, "[%.2f]", 1./repl_count); q += strlen(q); // indicate tree weight
2409 memcpy(q, newtree, l); q += l; // (...) part of tree
2410 memcpy(q, ";\n", 3); q += 2; // end of tree
2411 }
2412 free(newtree);
2413 i--;
2414 if(i == 0) break;
2415 newtree = strdup(p+1);
2416 free(tree);
2417 tree = newtree;
2418 }
2419 ++replicates_done;
2420 free(tree);
2421 }
2422 *preplicates_done = replicates_done;
2423 tree = finish_add_bootstrap(replicates_done);
2424 if (count > 1) tree = add_equally_best(tree, all_best, count);
2425 *pcount_best = count;
2426 if (p_bootstrap_trees) *p_bootstrap_trees = bootstrap_trees;
2427 return tree;
2428 }
2429
2430 struct pars_struct {
2431 char** seq;
2432 char** seqname;
2433 int notu;
2434 int njumbles;
2435 int *jumble_no;
2436 int *steps;
2437 char* toevaluate;
2438 int arg_maxtrees;
2439 int *bt_weights;
2440 dnapars_S_option s_option;
2441 char *result;
2442 bool protein;
2443 bool done;
2444 };
2445
2446
awake_from_C(void)2447 void awake_from_C(void)
2448 {
2449 Fl::lock();
2450 Fl::awake();
2451 Fl::unlock();
2452 }
2453
2454
parsimony_thread(pars_struct * args)2455 static void parsimony_thread(pars_struct* args)
2456 {
2457 typedef char* (*pars_f)(char** seq, char** seqname, int notu, int njumbles, int *jumble_no, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights, dnapars_S_option s_option);
2458 pars_f pf = args->protein ? protpars : dnapars;
2459 args->result = (*pf)(args->seq, args->seqname, args->notu, args->njumbles, args->jumble_no, args->steps, args->toevaluate, args->arg_maxtrees, args->bt_weights, args->s_option);
2460 Fl::lock();
2461 args->done = true;
2462 Fl::awake();
2463 Fl::unlock();
2464 }
2465
parsimony(char ** seq,char ** seqname,int notu,int njumbles,int * steps,char * toevaluate,int arg_maxtrees,int * bt_weights,bool protein,dnapars_S_option s_option,int no_gui,Fl_Widget * w_count)2466 char *parsimony(char** seq, char** seqname, int notu, int njumbles, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights,
2467 bool protein, dnapars_S_option s_option, int no_gui, Fl_Widget *w_count)
2468 {
2469 char *p;
2470 int jumble_no = 0;
2471 int pre_jumble_no = 0;
2472 tree_build_interrupted = 0;
2473 if (no_gui) {
2474 if (protein) p = protpars(seq, seqname, notu, njumbles, NULL, steps, toevaluate, arg_maxtrees, bt_weights, s_option);
2475 else p = dnapars(seq, seqname, notu, njumbles, NULL, steps, toevaluate, arg_maxtrees, bt_weights, s_option);
2476 }
2477 else {
2478 pars_struct *args = (pars_struct*)malloc(sizeof(pars_struct));
2479 args->seq = seq;
2480 args->seqname = seqname;
2481 args->notu = notu;
2482 args->njumbles = njumbles;
2483 args->jumble_no = &jumble_no;
2484 args->steps = steps;
2485 args->toevaluate = toevaluate;
2486 args->arg_maxtrees = arg_maxtrees;
2487 args->bt_weights = bt_weights;
2488 args->s_option = s_option;
2489 args->result = NULL;
2490 args->protein = protein;
2491 args->done = false;
2492 if ( ! fl_create_thread((void (*) (void *))parsimony_thread, args, 1024) ) {
2493 while (!args->done) {
2494 Fl::wait();
2495 if (jumble_no > pre_jumble_no && jumble_no < njumbles && w_count) {
2496 pre_jumble_no = jumble_no;
2497 char tmp[40];
2498 sprintf(tmp, "Jumble attempt: %d steps: %d", jumble_no + 1, *args->steps);
2499 w_count->label(tmp);
2500 }
2501 }
2502 }
2503 p = args->result;
2504 free(args);
2505 }
2506 if (tree_build_interrupted && p) {
2507 free(p);
2508 p = NULL;
2509 }
2510 return p;
2511 }
2512
2513
2514 /* from Numerical Recipes in C */
2515 /* Allocation function for ludcmp function */
vector(long nl,long nh)2516 double *vector(long nl, long nh)
2517 {
2518 double *v;
2519
2520 v=(double *)malloc((size_t) ((nh-nl+1+1)*sizeof(double)));
2521 if (!v) printf("allocation failure in vector()");
2522 return v-nl+1;
2523 }
2524
2525
2526 /* free_vector */
2527 /* from Numerical Recipes in C */
2528 /* Memory freeing function for ludcmp function */
free_vector(double * v,long nl,long nh)2529 void free_vector(double *v, long nl, long nh)
2530 {
2531 free((char*) (v+nl-1));
2532 }
2533
2534
2535 /* ludcmp */
2536 /* from Numerical Recipes in C */
2537 /* Replace matrix a by a rowwise permutation of its LU decomposition. */
2538
ludcmp(double ** a,int n,int * indx,double * d)2539 int ludcmp(double **a, int n, int *indx, double *d)
2540 {
2541 int i,imax=0,j,k;
2542 double big,dum,sum,temp;
2543 double *vv;
2544
2545 vv=vector(1,n);
2546 *d=1.0;
2547 for (i=1;i<=n;i++) {
2548 big=0.0;
2549 for (j=1;j<=n;j++)
2550 if ((temp=fabs(a[i][j])) > big) big=temp;
2551 if (big == 0.0) return 0;
2552 vv[i]=1.0/big;
2553 }
2554 for (j=1;j<=n;j++) {
2555 for (i=1;i<j;i++) {
2556 sum=a[i][j];
2557 for (k=1;k<i;k++) sum -= a[i][k]*a[k][j];
2558 a[i][j]=sum;
2559 }
2560 big=0.0;
2561 for (i=j;i<=n;i++) {
2562 sum=a[i][j];
2563 for (k=1;k<j;k++)
2564 sum -= a[i][k]*a[k][j];
2565 a[i][j]=sum;
2566 if ( (dum=vv[i]*fabs(sum)) >= big) {
2567 big=dum;
2568 imax=i;
2569 }
2570 }
2571 if (j != imax) {
2572 for (k=1;k<=n;k++) {
2573 dum=a[imax][k];
2574 a[imax][k]=a[j][k];
2575 a[j][k]=dum;
2576 }
2577 *d = -(*d);
2578 vv[imax]=vv[j];
2579 }
2580 indx[j]=imax;
2581 if (a[j][j] == 0.0) a[j][j]=1.0e-20;
2582 if (j != n) {
2583 dum=1.0/(a[j][j]);
2584 for (i=j+1;i<=n;i++) a[i][j] *= dum;
2585 }
2586 }
2587 free_vector(vv,1,n);
2588 return 1;
2589 }
2590
2591
2592 /* Returns the determinant of matrix mat(4, 4) */
det4(double mat[4][4])2593 double det4(double mat[4][4])
2594 {
2595
2596 int i, j, *indx;
2597 double *lu[5], d;
2598
2599 for(i=1;i<5;i++){
2600 lu[i]=(double*)calloc(5, sizeof(double));
2601 for(j=1;j<5;j++) lu[i][j]=mat[i-1][j-1];
2602 }
2603 indx=(int*)calloc(5, sizeof(int));
2604
2605 if(!ludcmp(lu, 4, indx, &d)) return 0.;
2606
2607 for(i=1;i<5;i++) d *= lu[i][i];
2608 for(i=1;i<5;i++)
2609 free(lu[i]);
2610 return d;
2611 }
2612
2613
2614 /* logdet distance */
logdet(char * seq1,char * seq2,int lg,int * wght)2615 double logdet(char* seq1, char* seq2, int lg, int *wght)
2616 {
2617 double matxy[4][4];
2618 double freq[16], d, a1, c1, g1, t1, a2, c2, g2, t2;
2619
2620 if(!freq_obs(seq1, seq2, freq, lg, wght)) return -1.;
2621
2622 matxy[0][0]=freq[0]; matxy[0][1]=freq[1]; matxy[0][2]=freq[2]; matxy[0][3]=freq[3];
2623 matxy[1][0]=freq[4]; matxy[1][1]=freq[5]; matxy[1][2]=freq[6]; matxy[1][3]=freq[7];
2624 matxy[2][0]=freq[8]; matxy[2][1]=freq[9]; matxy[2][2]=freq[10]; matxy[2][3]=freq[11];
2625 matxy[3][0]=freq[12]; matxy[3][1]=freq[13]; matxy[3][2]=freq[14]; matxy[3][3]=freq[15];
2626
2627 a1=matxy[0][0]+matxy[0][1]+matxy[0][2]+matxy[0][3];
2628 c1=matxy[1][0]+matxy[1][1]+matxy[1][2]+matxy[1][3];
2629 g1=matxy[2][0]+matxy[2][1]+matxy[2][2]+matxy[2][3];
2630 t1=matxy[3][0]+matxy[3][1]+matxy[3][2]+matxy[3][3];
2631 a2=matxy[0][0]+matxy[1][0]+matxy[2][0]+matxy[3][0];
2632 c2=matxy[0][1]+matxy[1][1]+matxy[2][1]+matxy[3][1];
2633 g2=matxy[0][2]+matxy[1][2]+matxy[2][2]+matxy[3][2];
2634 t2=matxy[0][3]+matxy[1][3]+matxy[2][3]+matxy[3][3];
2635
2636
2637 d=det4(matxy);
2638 if(d<=0.) return -1.;
2639 return (-log(d)+log(a1*c1*g1*t1*a2*c2*g2*t2)/2)/4;
2640 }
2641
2642
LOGDET_dist(allseq * data)2643 matrix *LOGDET_dist(allseq *data)
2644 {
2645 int i,j;
2646 matrix *mat;
2647
2648 mat = Make_Mat(data->n_otu);
2649 Init_Mat(mat,data);
2650 For(i,data->n_otu - 1)
2651 {
2652 for(j = i + 1; j < data->n_otu; j++)
2653 {
2654 if (tree_build_interrupted) return mat;
2655 double x = logdet(data->c_seq[i]->state, data->c_seq[j]->state, data->clean_len, data->wght);
2656 if(x == -1.) {
2657 Free_Mat(mat);
2658 return NULL;
2659 }
2660 mat->dist[i][j] = x;
2661 mat->dist[j][i] = x;
2662 }
2663 }
2664 return mat;
2665 }
2666
2667
put_names_back_in_tree(char * oldtree,char ** names)2668 char *put_names_back_in_tree(char *oldtree, char **names)
2669 //replace Seq###_ in oldtree by names in newtree
2670 {
2671 char *p, *q, *newtree;
2672 int rank, l = 0;
2673
2674 p = oldtree;
2675 while((p = strstr(p, "Seq")) != NULL) {
2676 p += 3;
2677 sscanf(p, "%d", &rank);
2678 l += strlen(names[rank]);
2679 }
2680 newtree = (char *)malloc(strlen(oldtree) + l + 1);
2681 p = newtree;
2682 q = oldtree;
2683 while(*q != 0) {
2684 if(strncmp(q, "Seq", 3) != 0) *p++ = *q++;
2685 else {
2686 sscanf(q + 3, "%d", &rank);
2687 l = strlen(names[rank]);
2688 strcpy(p, names[rank]);
2689 p += l;
2690 do q++; while(*q != '_');
2691 q++;
2692 }
2693 }
2694 free(oldtree);
2695 *p = 0;
2696 newtree = (char *)realloc(newtree, strlen(newtree) + 1);
2697 return newtree;
2698 }
2699
replace_tree_names_by_rank(const char * tree,char ** names,int notu)2700 char *replace_tree_names_by_rank(const char *tree, char **names, int notu)
2701 /* replace names from tree listed in table names by Seq##_
2702 returns altered tree or NULL if some tree name not found in table names
2703 */
2704 {
2705 char *p = (char *)tree - 1;
2706 char *newtree, *q, *qstart, c;
2707 int found = 0, i, l;
2708 char *start = NULL;
2709 static char name[200];
2710 newtree = (char *)malloc(strlen(tree) + 10 * notu + 1);
2711 q = newtree;
2712 while(*(++p) != 0) {
2713 *(q++) = *p;
2714 if(*p == '(' ) {
2715 start = p + 1;
2716 qstart = q;
2717 }
2718 else if(start == NULL && *p == ',') { start = p + 1; qstart = q; }
2719 else if(start != NULL && (*p == ')' || *p == ':' || *p == ',') ) {
2720 found++;
2721 while(*start == ' ') start++;
2722 memcpy(name, start, p - start); name[p - start] = 0;
2723 l = strlen(name) - 1; while( l >= 0 && name[l] == ' ') name[l--] = 0;
2724 for(i = 0; i < notu; i++) if(strcmp(name, names[i]) == 0) break;
2725 if(i == notu) {
2726 free(newtree);
2727 return NULL;
2728 }
2729 else {
2730 c = *(q - 1);
2731 sprintf(qstart, "Seq%d_", i);
2732 q = qstart + strlen(qstart);
2733 *q++ = c;
2734 }
2735 if(*p != ',') start = NULL; else { start = p + 1; qstart = q; }
2736 }
2737 }
2738 *q = 0;
2739 return newtree;
2740 }
2741
2742
command_line_phylogeny(int argc,char ** argv)2743 void command_line_phylogeny(int argc, char **argv)
2744 {
2745 char *tree, *p;
2746 int replicates = 0, jumbles = 0, i;
2747 bool err = false;
2748 if (!isarg(argc, argv, "-distance") && !isarg(argc, argv, "-parsimony")) {
2749 fputs("Option -build_tree requires either -distance or -parsimony\n", stderr);
2750 exit(1);
2751 }
2752 if (isarg(argc, argv, "-distance") && !(isarg(argc, argv, "-o") || isarg(argc, argv, "-distance_matrix"))) err = true;
2753 if (isarg(argc, argv, "-parsimony") && !isarg(argc, argv, "-o")) err = true;
2754 if (err) {
2755 fputs("Missing '-o fname' option\n", stderr);
2756 exit(1);
2757 }
2758 SEA_VIEW *view = cmdline_read_input_alignment(argc, argv);
2759 if ( (p = argname(argc, argv, "-sites")) ) {
2760 list_regions *elt = view->regions;
2761 while (elt) {
2762 if (strcmp(elt->element->name, p) == 0) break;
2763 elt = elt->next;
2764 }
2765 if (elt) view->active_region = elt->element;
2766 }
2767
2768 if ( (p = argname(argc, argv, "-species")) ) {
2769 for (i = 0; i < view->numb_species_sets; i++) {
2770 if (strcmp(view->name_species_sets[i], p) == 0) break;
2771 }
2772 if (i < view->numb_species_sets) {
2773 view->sel_seqs = view->list_species_sets[i];
2774 view->tot_sel_seqs = 0;
2775 for (i = 0; i < view->tot_seqs; i++) if (view->sel_seqs[i]) view->tot_sel_seqs++;
2776 }
2777 }
2778
2779 replicates = argval(argc, argv, "-replicates", 0);
2780
2781 char *utree = NULL;
2782 if (isarg(argc, argv, "-usertree")) {
2783 char *utname;
2784 if ( (utname = argname(argc, argv, "-usertree"))) {
2785 FILE *in = fopen(utname, "r");
2786 if (!in) {
2787 fprintf(stderr, "Usertree file not found: %s\n",utname);
2788 exit(1);
2789 }
2790 fseek(in, 0, SEEK_END);
2791 long s = ftell(in);
2792 fseek(in, 0, SEEK_SET);
2793 utree = (char*)malloc(s+1);
2794 fread(utree, s, 1, in);
2795 utree[s] = 0;
2796 fclose(in);
2797 }
2798 }
2799
2800 if (isarg(argc, argv, "-distance")) { // distance method
2801 int distkind = 0; // == observed_p/ndist
2802 char *distname;
2803 if ( (distname = argname(argc, argv, "-distance"))) {
2804 if (strcmp(distname, "JC") == 0) distkind = Jukes_Cantor;
2805 else if (strcmp(distname, "K2P") == 0) distkind = Kimura2P;
2806 else if (strcmp(distname, "HKY") == 0) distkind = HKY;
2807 else if (strcmp(distname, "logdet") == 0) distkind = LogDet;
2808 else if (strcmp(distname, "Ka") == 0) distkind = Ka;
2809 else if (strcmp(distname, "Ks") == 0) distkind = Ks;
2810 else if (strcmp(distname, "Poisson") == 0) distkind = Poisson_pdist;
2811 else if (strcmp(distname, "Kimura") == 0) distkind = Kimura_pdist;
2812 }
2813 if (distkind == 0) distname = strdup("Observed");
2814
2815 view->tot_trees = 0;
2816 tree = run_distance_method(view, distkind, isarg(argc, argv, "-nogaps"), !isarg(argc, argv, "-NJ"), replicates > 1, replicates, NULL,
2817 distname, distkind == Ka || distkind == Ks, argname(argc, argv, "-distance_matrix"), utree, true, Fl::fatal, false, false);
2818 if (isarg(argc, argv, "-distance_matrix")) exit(0);
2819 }
2820
2821 if (isarg(argc, argv, "-parsimony")) { // parsimony method
2822 const char *progname;
2823 jumbles = argval(argc, argv, "-jumbles", 0);
2824 char *search = argname(argc, argv, "-search");
2825 dnapars_S_option s_option;
2826 if (search && strcmp(search, "less") == 0) s_option = less_thorough;
2827 else if (search && strcmp(search, "best") == 0) s_option = rearrange_best_tree;
2828 else s_option = more_thorough;
2829 int usertreerank = 0;
2830 if (utree) {
2831 view->tot_trees = 1;
2832 view->trees = (char**)malloc(sizeof(char*));
2833 view->trees[0] = utree;
2834 usertreerank = 1;
2835 }
2836 tree = run_parsimony_method(view, isarg(argc, argv, "-nogaps"),
2837 isarg(argc, argv, "-gaps_as_unknown") ? ", gaps treated as ?" : "",
2838 replicates, jumbles, 10000, usertreerank, NULL, s_option, &progname, 1, true, 0, Fl::fatal);
2839 if (tree && (p = strchr(tree, ';')) != NULL) *(p+1) = 0; // tree string can contain several trees, keep first only
2840 }
2841
2842 if (tree) {
2843 char *outfname = argname(argc, argv, "-o");
2844 FILE *out = (strcmp(outfname, "-") != 0 ? fopen(outfname, "w") : stdout);
2845 fwrite(tree, strlen(tree), 1, out);
2846 fputs("\n", out); fflush(out);
2847 if (out != stdout) fclose(out);
2848 }
2849 }
2850
save_bootstrap_replicates(const char * fname,int replicates,SEA_VIEW * view)2851 void save_bootstrap_replicates(const char *fname, int replicates, SEA_VIEW *view)
2852 {
2853 allseq *seqs = view_to_allseq(view, FALSE);
2854 int *each_length = new int[seqs->n_otu];
2855 for (int i = 0; i < seqs->n_otu; i++) each_length[i] = seqs->clean_len;
2856 char **sequences = new char*[seqs->n_otu];
2857 for (int i = 0; i < seqs->n_otu; i++) {
2858 sequences[i] = new char[seqs->clean_len];
2859 }
2860 FILE *out = fl_fopen(fname, "w");
2861 char *tmp_fname = create_tmp_filename();
2862 for (int repl = 1; repl <= replicates; repl++) {
2863 bootstrap_weights(seqs, FALSE);
2864 for (int i = 0; i < seqs->n_otu; i++) {
2865 int l = 0;
2866 for (int j = 0; j < seqs->clean_len; j++) {
2867 for (int k = 0; k < seqs->wght[j]; k++) {
2868 sequences[i][l++] = seqs->c_seq[i]->state[j];
2869 }
2870 }
2871 }
2872 save_alignment_or_region(tmp_fname, sequences, NULL,
2873 NULL, view->seqname, seqs->n_otu, each_length,
2874 NULL, NULL, PHYLIP_FORMAT,
2875 0, NULL,
2876 NULL, NULL, 0, view->protein,
2877 0, NULL,
2878 NULL, view->phylipwidnames,
2879 0, NULL,
2880 NULL, view->spaces_in_fasta_names);
2881 // add to growing output file
2882 FILE *in = fl_fopen(tmp_fname, "r");
2883 char line[100];
2884 while (fgets(line, sizeof(line), in)) {
2885 fputs(line, out);
2886 }
2887 fclose(in);
2888 fl_unlink(tmp_fname);
2889 }
2890 fclose(out);
2891 for (int i = 0; i < seqs->n_otu; i++) delete[] sequences[i];
2892 delete[] sequences;
2893 delete[] each_length;
2894 free_after_view_to_allseq(seqs);
2895 }
2896