1 #ifdef WIN32
2 #include <windows.h>
3 #else
4 #include <sys/times.h>
5 #endif
6 #include "seaview.h"
7 #include "treedraw.h"
8 #include "tbe.h"
9 #include <FL/Fl_Round_Button.H>
10 #include <FL/Fl_Check_Button.H>
11 #include <FL/Fl_Select_Browser.H>
12 #include <FL/Fl_Return_Button.H>
13 #include <FL/Fl_Int_Input.H>
14 #include <FL/Fl_Float_Input.H>
15 #include <FL/Fl_Value_Slider.H>
16 #include <FL/Fl.H>
17 #include <FL/fl_ask.H>
18 #ifdef __APPLE__
19 #include <FL/x.H> // for fl_mac_os_version
20 #endif
21 
22 extern "C" {
23 #include "phyml_util.h"
24 	}
25 
26 typedef enum {observed_pdist, Poisson_pdist, Kimura_pdist} protein_distances;
27 typedef enum {observed_ndist, Jukes_Cantor, Kimura2P, HKY, LogDet, Ka, Ks} nucleotide_distances;
28 typedef enum {more_thorough, less_thorough, rearrange_best_tree} dnapars_S_option;
29 
30 allseq *view_to_allseq(SEA_VIEW *view, int remove_all_gaps);
31 void gaps_as_unknown_states(allseq *phyml_seqs, int protein);
32 void free_after_view_to_allseq(allseq *phyml_seqs);
33 //void initialize_random_series(void);
34 extern "C" {
35   double next_random(void);
36   }
37 void bootstrap_weights(allseq *seqs, int use_codons);
38 void savedistinfile(matrix *mat, const char *dirname, const char *outfname);
39 matrix *calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess);
40 void phyml_dialog(SEA_VIEW *view);
41 static void change_searching_callback(Fl_Widget *ob, void *data);
42 static void cancel_callback(Fl_Widget *ob, void *data);
43 static void model_callback(Fl_Widget *ob, void *data);
44 void run_phyml_callback(Fl_Widget *ob, void *data);
45 void distance_method_dialog(SEA_VIEW *view);
46 matrix *run_calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess, int no_gui);
47 char *run_distance_method(SEA_VIEW *view, int distkind, int remove_all_gaps, int use_bionj, int use_bootstrap,
48 			 int replicates, Fl_Box *box, const char *distance_name, int using_kaks,
49 			  const char* distancefname, const char *user_tree, int no_gui, void (*alert)(const char*, ...), int keep_b_trees, int compute_tbe );
50 char *bootstrap_reformat(char *tree, int replicates);
51 matrix *Obs_Dist(allseq *data, model *mod);
52 matrix *Kimura_p_Dist(allseq *data);
53 char *alrt_reformat(char *tree);
54 matrix *K80_dist_nogamma(allseq *data);
55 matrix *HKY_dist(allseq *data);
56 double hky(char* seq1, char* seq2, int lg, int *wght);
57 int freq_obs(char* seq1, char* seq2, double* freq, int lgseq, int *wght);
58 int informative(char** sequ, int nb);
59 void jumble_fct(char** seq, char** name, int notu);
60 static char *run_parsimony_method(SEA_VIEW *view, int nogaps, const char *gapsunknown, int nreplicates, int njumbles,
61 			   int maxbest, int user_tree, Fl_Box *w_count, dnapars_S_option s_option,
62 			   const char **pprogname, float consensus_threshold, int no_gui, char **p_bootstrap_trees, void (*alert)(const char *,...) );
63 void parsimony_dialog(SEA_VIEW *view);
64 char *parsimony_with_jumble(char **pars_seqs, char **pars_names, int n_otu, int njumbles, int protein, int *psteps,
65 	int *pcount, int maxbest, Fl_Widget *w_count, dnapars_S_option s_option, int no_gui, float consensus_threshold);
66 static char *parsimony_with_bootstrap(allseq *tmpallseq, char **pars_seqs, char **pars_names, int n_otu, int njumbles,
67 	int protein, int *psteps, int maxbest, int replicates, Fl_Box *w_count, int *preplicates_done, dnapars_S_option s_option, int no_gui,
68 			       float consensus_threshold, int *pcount_best, char **p_bootstrap_trees);
69 double *vector(long nl, long nh);
70 void free_vector(double *v, long nl, long nh);
71 int ludcmp(double **a, int n, int *indx, double *d);
72 double det4(double mat[4][4]);
73 double logdet(char* seq1, char* seq2, int lg, int *wght);
74 matrix *LOGDET_dist(allseq *data);
75 char *put_names_back_in_tree(char *oldtree, char **names);
76 char *replace_tree_names_by_rank(const char *tree, char **names, int notu);
77 void command_line_phylogeny(int argc, char **argv);
78 void save_bootstrap_replicates(const char *fname, int replicates, SEA_VIEW *view);
79 
80 
81 extern void interrupt_callback(Fl_Widget *ob, void *data);
82 extern int save_phylip_file(const char *fname, char **seq,
83 					 char **seqname, int totseqs, int *eachlength, region *region_used,
84 					 int *sel_seqs, int tot_sel_seqs, int phylipwidnames);
85 extern int run_external_prog_in_pseudoterm(char *cmd, const char *dialogfname, const char *label);
86 extern int init_add_bootstrap(char *full_tree);
87 extern char *get_res_value(const char *name, const char *def_value);
88 extern int process_one_replicate(char *one_tree, float w);
89 extern char *finish_add_bootstrap(int replicates);
90 extern int compare_newick_with_names(const char *tree, char **names, int notu, char **pname);
91 extern const char *make_binary_or_unrooted(char *arbre);
92 extern int save_resources(void);
93 extern int set_res_value(const char *name, const char *value);
94 extern char *create_tmp_filename(void);
95 extern void delete_tmp_filename(const char *base_fname);
96 extern int is_view_valid(SEA_VIEW *view);
97 char *parsimony(char** seq, char** seqname, int notu, int njumbles, int* steps, char* toevaluate, int arg_maxtrees,
98 		int *bt_weights, bool protein, dnapars_S_option s_option, int no_gui, Fl_Widget *w_count);
99 extern int fl_create_thread(void (*f) (void *), void* p, unsigned);
100 extern char *argname(int argc, char *argv[], const char *arg);
101 extern float argval(int argc, char *argv[], const char *arg, float defval);
102 extern bool isarg(int argc, char *argv[], const char *arg);
103 extern SEA_VIEW *cmdline_read_input_alignment(int argc, char **argv);
104 extern void init_consensus_calculation(int n);
105 extern int process_one_consensus_item(char *one_tree, char **seqnames);
106 extern int after_consensus_items(float threshold, int item_count, int ***prows);
107 extern char *compute_consensus_tree(char **seqnames, int **rows, int branch_count);
108 extern "C" {
109   void prefastlwl(void);
110   void loadrl(void);
111   int fastlwl(char **seq, int nbseq, int lgseq, double **ka, double **ks,
112 					double **vka, double **vks, int* sat1, int* sat2, int *wght);
113   char* dnapars(char** seq, char** seqname, int notu, int njumbles, int *jumble_no, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights, dnapars_S_option s_option);
114   char* protpars(char** seq, char** seqname, int notu, int njumbles, int *jumble_no, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights, dnapars_S_option s_option);
115   char *create_tmp_filename_from_C(void);
116   FILE *fl_fopen_from_C(const char *fname, const char *mode);
117   int fl_unlink_from_C(const char*fname);
118   void awake_from_C(void);
119 }
120 
121 int tree_build_interrupted;
122 
create_tmp_filename_from_C(void)123 char *create_tmp_filename_from_C(void)
124 {
125   return   create_tmp_filename();
126 }
127 
fl_fopen_from_C(const char * fname,const char * mode)128 FILE *fl_fopen_from_C(const char *fname, const char *mode)
129 {
130   return fl_fopen(fname, mode);
131 }
132 
fl_unlink_from_C(const char * fname)133 int fl_unlink_from_C(const char*fname)
134 {
135   return fl_unlink(fname);
136 }
137 
trees_callback(Fl_Widget * ob,void * data)138 void trees_callback(Fl_Widget *ob, void *data)
139 /* 0:Distance  1:PhyML 2:import
140  */
141 {
142 	SEA_VIEW *view = (SEA_VIEW *)ob->user_data();
143 	int reponse = ((Fl_Menu_*)ob)->mvalue() - view->menu_trees->get_menu();
144 	if(reponse <= 2) {
145 		if(view->tot_seqs < 3 || (view->tot_sel_seqs > 0 && view->tot_sel_seqs < 3) ) {
146 			fl_alert("Can't draw tree for less than 3 sequences");
147 			return;
148 		}
149 		for(int i = 0; i < view->tot_seqs; i++) {//check that no ,():; in sequence names
150 			if(view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
151 			if(strchr(view->seqname[i], '(') != NULL || strchr(view->seqname[i], ')') != NULL ||
152 			   strchr(view->seqname[i], ',') != NULL || strchr(view->seqname[i], ':') != NULL
153 			   || strchr(view->seqname[i], ';') != NULL) {
154 				fl_alert("Sequence: %s\nTrees can't be built with any of (),:; in sequence names", view->seqname[i]);
155 				return;
156 				}
157 			}
158 		}
159 	if(reponse == 0) {//Parsimony methods
160 	  for (int i = 0; i < view->tot_seqs; i++) {//check that no repeated sequence names
161 	    if (view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
162 	    for (int j = i+1; j < view->tot_seqs; j++) {
163 	      if (view->tot_sel_seqs > 0 && !view->sel_seqs[j]) continue;
164 	      if (strcmp(view->seqname[i], view->seqname[j]) == 0) {
165 		fl_alert("The parsimony tree can't be computed if different sequences (#%d, #%d) have the same name: %s", i+1, j+1, view->seqname[i]);
166 		return;
167 		}
168 	      }
169 	    }
170 	    parsimony_dialog(view);
171 	}
172 	else if(reponse == 1) {//Distance methods
173 		distance_method_dialog(view);
174 	}
175 	else if(reponse == 2) {//PhyML
176 		phyml_dialog(view);
177 	}
178 	else if(reponse == 3) {//Import tree
179 		Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
180 		chooser->type(Fl_Native_File_Chooser::BROWSE_FILE);   // let user browse a single file
181 		chooser->title("Import treefile");
182 		char *filename = run_and_close_native_file_chooser(chooser);
183 		if(filename == NULL) return;
184 		FILE *in = fopen(filename, "r");
185 		fseek(in, 0, SEEK_END);
186 		long l = ftell(in);
187 		fseek(in, 0, SEEK_SET);
188 		char *tree = (char *)malloc(l + 1);
189 		char c, *p = tree;
190 		while((c = fgetc(in)) != EOF) {
191 			if(c != '\n' && c != '\r') *(p++) = c;
192 			}
193 		*p = 0;
194 		fclose(in);
195 		treedraw(tree, view, extract_filename(filename), FALSE);
196 	}
197 	else if(reponse == 4) {//New empty tree window
198 		treedraw(NULL, view, NULL, FALSE);
199 		}
200 	else {
201 	        reponse = ((Fl_Menu_*)ob)->mvalue() - view->menu_trees->vitem(0);
202 		char *p = view->trees[reponse];
203 		Fl_Window *w = Fl::first_window();
204 		while(w != NULL) {
205 			const char *c = w->xclass();
206 			if(c != NULL && strcmp(c, TREE_WINDOW) == 0 &&
207 				strcmp( ((FD_nj_plot*)w->user_data())->trees, p) == 0) {
208 					w->show();
209 					return;
210 				}
211 			w = Fl::next_window(w);
212 			}
213 		char *tree = strdup(p);
214 		treedraw(tree, view, ((Fl_Menu_*)ob)->mvalue()->label(), TRUE);
215 		}
216 }
217 
218 
distance_callback(Fl_Widget * wgt,void * data)219 static void distance_callback(Fl_Widget *wgt, void *data)
220 {
221 	if( ((Fl_Choice *)wgt)->value() >= Ka) ((Fl_Widget *)data)->deactivate();
222 	else ((Fl_Widget *)data)->activate();
223 }
224 
dist_b_cb(Fl_Widget * wgt,void * data)225 static void dist_b_cb(Fl_Widget *wgt, void *data)
226 {
227   Fl_Button **buttons = (Fl_Button **)data;
228   Fl_Button *me = (Fl_Button*)wgt;
229   for (int i=0; i<2; i++){
230     Fl_Button *target = buttons[i];
231     if (target){
232       if (me->value()) target->activate();
233       else {
234         target->deactivate();
235         target->value(0);
236       }
237     }
238   }
239 }
240 
distance_method_dialog(SEA_VIEW * view)241 void distance_method_dialog(SEA_VIEW *view)
242 {
243 	static int first = TRUE;
244 	static int def_replicates = 100;
245 	static int distance_choice = Jukes_Cantor;
246 	static Fl_Window *w;
247 	static Fl_Int_Input *replicates;
248 	static Fl_Box *box;
249 	static Fl_Choice *distance;
250 	static Fl_Check_Button *ignore_all_gaps;
251 	static Fl_Round_Button *nj;
252 	static Fl_Round_Button *bionj;
253 	static Fl_Round_Button *savetofile;
254 	static Fl_Round_Button *usertree;
255 	static Fl_Choice *treechoice;
256 	static Fl_Check_Button *bootstrap;
257 	static Fl_Button *interrupt;
258 	static Fl_Return_Button *go;
259 	static int started, maxchoice;
260 	static Fl_Check_Button *keep_b_trees;
261 	static Fl_Check_Button *compute_tbe;
262 	if(first) {
263 		first = FALSE;
264 		w = new Fl_Window(270, 265);
265 		w->label("Distance analysis");
266 		w->set_modal();
267 		nj = new Fl_Round_Button(3, 3, 60, 20, "NJ");
268 		nj->type(FL_RADIO_BUTTON);
269 		bionj = new Fl_Round_Button(nj->x() + nj->w() + 10, nj->y(), nj->w(), nj->h(), "BioNJ");
270 		bionj->type(FL_RADIO_BUTTON);
271 		savetofile = new Fl_Round_Button(bionj->x() + bionj->w() + 10, nj->y(), 110, nj->h(), "Save to File");
272 		savetofile->type(FL_RADIO_BUTTON);
273 		distance = new Fl_Choice(90, nj->y() + nj->h() + 15, 100, 20, "Distance");
274 		distance->align(FL_ALIGN_LEFT);
275 		ignore_all_gaps = new Fl_Check_Button(160, distance->y() + distance->h() + 5, 50, 20,
276 						      "Ignore all gap sites");
277 		ignore_all_gaps->value(1);
278 		ignore_all_gaps->align(FL_ALIGN_LEFT);
279 		int y = ignore_all_gaps->y() + ignore_all_gaps->h() + 25;
280 		bootstrap = new Fl_Check_Button(3, y, 80, 20, "Bootstrap");
281 		replicates = new Fl_Int_Input(bootstrap->x() + bootstrap->w() + 5, y, 100, 20, "# of replicates");
282 		replicates->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
283 		char tmp[10];
284 		sprintf(tmp, "%d", def_replicates);
285 		replicates->value(tmp);
286 
287 		compute_tbe = new Fl_Check_Button(3, bootstrap->y() + bootstrap->h() + 5, 280, 20, "Transfer Bootstrap Expectation method");
288 		compute_tbe->deactivate();
289 
290 		keep_b_trees = new Fl_Check_Button(3, compute_tbe->y() + compute_tbe->h() + 5, 180, 20, "Show bootstrap trees");
291 		keep_b_trees->deactivate();
292 
293     static Fl_Check_Button *buttons[2];
294 		buttons[0]=compute_tbe;
295 		buttons[1]=keep_b_trees;
296 		bootstrap->callback(dist_b_cb, buttons);
297 		bootstrap->when(FL_WHEN_CHANGED);
298 
299 		box = new Fl_Box(3, keep_b_trees->y() + keep_b_trees->h() + 5, 150, 20, "");
300 		usertree = new Fl_Round_Button(nj->x(), box->y() + box->h() + 15, 85, nj->h(), "User tree:");
301 		usertree->type(FL_RADIO_BUTTON);
302 		treechoice = new Fl_Choice(usertree->x() + usertree->w(), usertree->y(), 155, 20, "Optimize branch lengths");
303 		treechoice->align(FL_ALIGN_TOP|FL_ALIGN_RIGHT);
304 		bionj->setonly();
305 
306 		interrupt = new Fl_Button(3, w->h() - 25, 70, 20, "");
307 		interrupt->callback(interrupt_callback, &tree_build_interrupted);
308 		go = new Fl_Return_Button(w->w() - 70 - 3, interrupt->y() , 70, 20, "Go");
309 		go->callback(interrupt_callback, &started);
310 		w->end();
311 		w->callback(interrupt_callback, &tree_build_interrupted);
312 		}
313 	interrupt->label("Cancel");
314 	distance->clear();
315 	if(view->protein) {
316 		distance->add("Observed");
317 		distance->add("Poisson");
318 		distance->add("Kimura");
319 		maxchoice = Kimura_pdist;
320 		ignore_all_gaps->activate();
321 	}
322 	else {
323 		distance->add("Observed");
324 		distance->add("J-C");
325 		distance->add("K2P");
326 		distance->add("HKY");
327 		distance->add("LogDet");
328 		distance->add("Ka");
329 		distance->add("Ks");
330 		maxchoice = Ks;
331 	}
332 	distance->value( FL_min(distance_choice, maxchoice) );
333 	distance->callback(distance_callback, ignore_all_gaps);
334         keep_b_trees->value(0);
335 	treechoice->clear();
336 	vlength_menu *vmenu = view->menu_trees;
337 	int count = view->menu_trees->vlength();
338 	for (int i = 0; i < count; i++)
339 	  treechoice->add( vmenu->vitem(i)->label() );
340 	treechoice->value(0);
341 	treechoice->deactivate();
342         bootstrap->activate();
343 	if (usertree->value()) bionj->setonly();
344 	if (count) usertree->activate();
345 	else usertree->deactivate();
346 	started = tree_build_interrupted = 0;
347 	box->label("");
348 	go->show();
349 	w->show();
350   while(!started && !tree_build_interrupted) {
351     Fl_Widget *o = Fl::readqueue();
352     if (!o) Fl::wait();
353     else if(o == usertree || o == nj || o == bionj || o == savetofile) {
354       if( usertree->value() || savetofile->value() ) {
355         bootstrap->value(0);
356         bootstrap->deactivate();
357         bootstrap->do_callback();
358       }
359       else {
360         bootstrap->activate();
361       }
362       if( usertree->value()  ) {
363         treechoice->activate();
364       }
365       else {
366         treechoice->deactivate();
367       }
368     }
369   }
370 	distance_choice = distance->value();
371 	int using_kaks = ((!view->protein) && (distance_choice == Ka || distance_choice == Ks) );
372 	if(!tree_build_interrupted) {
373 	  sscanf(replicates->value(), "%d", &def_replicates);
374 	  interrupt->label("Interrupt");
375 	  if(bootstrap->value()) {
376 		  box->label("Count: 0");
377 		  }
378 	  go->hide();
379 	  Fl::flush();
380 	  char *tree = run_distance_method(view, distance_choice, ignore_all_gaps->value(), bionj->value(),
381 						  bootstrap->value(), def_replicates, box, distance->text(), using_kaks,
382 			      savetofile->value()?"":NULL, usertree->value() ? view->trees[treechoice->value()] : NULL,
383 					   false, fl_alert, keep_b_trees->value(), compute_tbe->value());
384 	  const char *choice = bionj->value()? "BioNJ_tree" : "NJ_tree";
385 	  const char *n = extract_filename(view->masename);
386 	  char *title = NULL;
387 	  if (usertree->value()) { title = new char[20]; strcpy(title, "Least_squares_brl");}
388 	  else if (n) {
389 	    char *q = (char*)strchr(n, '.');
390 	    if (q) *q = 0;
391 	    title = new char[strlen(n) + 12];
392 	    sprintf(title, "%s-%s", n, choice);
393 	    if (q) *q = '.';
394 	  }
395     Fl_Window *treew = NULL;
396 	  if (tree) treew = treedraw(tree, view, n || usertree->value() ? title:choice, FALSE);
397 #if !defined(__APPLE__)
398     if (keep_b_trees->value() && treew) {
399       treew->position(treew->x() + 40, treew->y());
400     }
401 #endif
402 	  delete[] title;
403 	}
404 	w->hide();
405 }
406 
407 struct dist_struct {
408   allseq *seqs;
409   int distkind;
410   int protein;
411   char **lwlseqs;
412   int in_bootstrap;
413   char *err_mess;
414   matrix *result;
415   bool done;
416 };
417 
prep_calc_dist_matrix(dist_struct * ds)418 void prep_calc_dist_matrix(dist_struct *ds)
419 {
420   ds->result = calc_dist_matrix(ds->seqs, ds->distkind, ds->protein, ds->lwlseqs, ds->in_bootstrap, &(ds->err_mess));
421   if (tree_build_interrupted && ds->result) {
422     Free_Mat(ds->result);
423     ds->result = NULL;
424     }
425   Fl::lock();
426   ds->done = true;
427   Fl::awake();
428   Fl::unlock();
429 }
430 
run_calc_dist_matrix(allseq * seqs,int distkind,int protein,char ** lwlseqs,int in_bootstrap,char ** p_err_mess,int no_gui)431 matrix *run_calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess, int no_gui)
432 {
433   dist_struct *ds = new dist_struct;
434   ds->seqs = seqs;
435   ds->distkind = distkind;
436   ds->protein = protein;
437   ds->lwlseqs =lwlseqs;
438   ds->in_bootstrap = in_bootstrap;
439   ds->err_mess = NULL;
440   ds->result = NULL;
441   ds->done = false;
442   tree_build_interrupted = false;
443   if (no_gui) {
444     ds->result = calc_dist_matrix(ds->seqs, ds->distkind, ds->protein, ds->lwlseqs, ds->in_bootstrap, &(ds->err_mess));
445   }
446   else {
447     fl_create_thread( (void (*)(void*))prep_calc_dist_matrix, ds, 0);
448     while (!ds->done) Fl::wait();
449   }
450   matrix* m = ds->result;
451   if (p_err_mess) *p_err_mess = ds->err_mess;
452   delete ds;
453   return m;
454 }
455 
456 
run_distance_method(SEA_VIEW * view,int distkind,int remove_all_gaps,int use_bionj,int use_bootstrap,int replicates,Fl_Box * box,const char * distance_name,int using_kaks,const char * distancefname,const char * user_tree,int no_gui,void (* alert)(const char *,...),int keep_b_trees,int compute_tbe)457 char *run_distance_method(SEA_VIEW *view, int distkind, int remove_all_gaps, int use_bionj, int use_bootstrap,
458 	int replicates, Fl_Box *box, const char *distance_name, int using_kaks, const char* distancefname,
459 			  const char *user_tree, int no_gui, void (*alert)(const char*, ...), int keep_b_trees, int compute_tbe )
460 {
461 	char *display_tree, *bootstrap_trees = NULL, *err_mess, tree_label[100]="";
462 	int total = 0, i, j, lbtrees;
463 	char **lwlseqs = NULL;
464   	if(using_kaks) remove_all_gaps = FALSE;//keep gaps not to alter reading frame
465 	allseq *phyml_seqs = view_to_allseq(view, remove_all_gaps);
466 	if(use_bootstrap) {//for bootstrap, seq names must be all distinct
467 		for(i = 0; i < phyml_seqs->n_otu - 1; i++) {
468 			for(j = i+1; j < phyml_seqs->n_otu; j++) {
469 				if(strcmp(phyml_seqs->c_seq[i]->name, phyml_seqs->c_seq[j]->name) == 0) {
470 					alert("Can't run bootstrap because sequence name %s is used twice",
471 							 phyml_seqs->c_seq[i]->name);
472 					free_after_view_to_allseq(phyml_seqs);
473 					return NULL;
474 					}
475 				}
476 			}
477 		}
478 	if(using_kaks) {
479 		lwlseqs = (char **)malloc(phyml_seqs->n_otu * sizeof(char *));
480 		for(i = 0; i < phyml_seqs->n_otu; i++) lwlseqs[i] = phyml_seqs->c_seq[i]->state;
481 		}
482 	matrix *phyml_mat = run_calc_dist_matrix(phyml_seqs, distkind, view->protein, lwlseqs, FALSE, &err_mess, no_gui);
483 	if(phyml_mat == NULL) {
484 	  free_after_view_to_allseq(phyml_seqs);
485 	  if( using_kaks ) {
486 	    free(lwlseqs);
487 	    if (err_mess) {
488 	      alert("%s", err_mess);
489 	      delete[] err_mess;
490 	      }
491 	    }
492 	  else if (!tree_build_interrupted) alert("Can't compute distances because sequences are too divergent.\nAre they aligned?");
493 	  return NULL;
494 	  }
495 	if(user_tree != NULL) {//process user tree
496 		extern char *least_squares_brl(const char *tree, int nbseq, double **dist, char **names);
497 		char *p, **usednames;
498 		int i, j;
499 		p = (char *)user_tree;
500 		if(*p== '[')p=strchr(p,']')+1;
501 		if(view->tot_sel_seqs > 0) {
502 			usednames = (char **)malloc(view->tot_sel_seqs * sizeof(char *));
503 			j = 0;
504 			for(i = 0; i < view->tot_seqs; i++) {
505 				if(view->sel_seqs[i]) usednames[j++] = view->seqname[i];
506 				}
507 			}
508 		else usednames = view->seqname;
509 		p = least_squares_brl(p, phyml_seqs->n_otu, phyml_mat->dist, usednames);
510 		if(view->tot_sel_seqs > 0) free(usednames);
511 		if(p != NULL) {
512 			display_tree = (char *)malloc(strlen(p) + strlen(distance_name) + 60);
513 			sprintf(display_tree, "[User-tree w/ least-squares br lengths, %d sites %s]%s",
514 					phyml_seqs->clean_len, distance_name, p);
515 			free(p);
516 			if (box) box->window()->hide();//necessary under X11 so tree is in foreground
517 			}
518 		Free_Mat(phyml_mat);
519 		free_after_view_to_allseq(phyml_seqs);
520 		return display_tree;
521 		}
522 	if(distancefname) {
523 		savedistinfile(phyml_mat, extract_dirname(view->masename), no_gui ? distancefname : NULL);
524 		Free_Mat(phyml_mat);
525 		free_after_view_to_allseq(phyml_seqs);
526 		return NULL;
527 		}
528 	phyml_mat->method = use_bionj; //0:NJ 1:BioNJ
529 	phyml_mat->tree = Make_Tree_From_Scratch(phyml_seqs->n_otu, phyml_seqs);
530 	Bionj(phyml_mat);
531 	sprintf(tree_label, "%s %d sites %s", use_bionj ? "BioNJ" : "NJ",
532 		phyml_seqs->clean_len, distance_name);
533 	if (use_bootstrap) {
534 		char *full_tree = Write_Tree(phyml_mat->tree);
535 		Free_Tree(phyml_mat->tree);
536 		Free_Mat(phyml_mat);
537 		int error = init_add_bootstrap(full_tree);
538 		free(full_tree);
539 		if(!error) {
540       char *p;
541 			for (int r = 0; r < replicates; r++) {
542 				if (tree_build_interrupted) break;
543 				bootstrap_weights(phyml_seqs, using_kaks );
544 				phyml_mat = run_calc_dist_matrix(phyml_seqs, distkind, view->protein, lwlseqs, TRUE, &err_mess, no_gui);
545 				if(phyml_mat == NULL) {
546 					if (!tree_build_interrupted) total = 0;
547 					break;
548 					}
549 				phyml_mat->method = use_bionj; // 0:NJ 1:BioNJ
550 				phyml_mat->tree = Make_Tree_From_Scratch(phyml_seqs->n_otu, phyml_seqs);
551 				Bionj(phyml_mat);
552 				char *one_replicate_tree = Write_Tree(phyml_mat->tree);
553         if (keep_b_trees || compute_tbe) {
554           if (r == 0) {
555             lbtrees = replicates * (strlen(one_replicate_tree) + 1);
556             bootstrap_trees = (char *)malloc(lbtrees + 1);
557             p = bootstrap_trees;
558             *p = 0;
559           }
560           int l = strlen(one_replicate_tree);
561           if ((p-bootstrap_trees) + l + 1 > lbtrees) {
562             lbtrees = (p-bootstrap_trees) + l + 1000;
563             bootstrap_trees = (char*)realloc(bootstrap_trees, lbtrees + 1);
564             p = bootstrap_trees + strlen(bootstrap_trees);
565           }
566           memcpy(p, one_replicate_tree, l); p += l;
567           *p++ = '\n';
568           *p = 0;
569         }
570 				Free_Tree(phyml_mat->tree);
571 				Free_Mat(phyml_mat);
572 				total += process_one_replicate(one_replicate_tree, 1.);
573 				free(one_replicate_tree);
574 				char newbox[20];
575 				sprintf(newbox,"Count: %d", total);
576 				if (box) {
577 				  box->label(newbox);
578 				  box->redraw();
579 				  Fl::wait(0);
580 				}
581 			}
582 		}
583 		if(total > 0) {
584 			display_tree = finish_add_bootstrap(total);
585 			sprintf(tree_label + strlen(tree_label), " %d repl.%s", total, compute_tbe ? " (TBE)" : "" );
586 			}
587 		else display_tree = NULL;
588 	}
589 	else {
590 		display_tree = Write_Tree(phyml_mat->tree);
591 		Free_Tree(phyml_mat->tree);
592 		Free_Mat(phyml_mat);
593 	}
594 	free_after_view_to_allseq(phyml_seqs);
595 	if(using_kaks) {
596 		free(lwlseqs);
597 		}
598 	if(display_tree == NULL) {
599 	  if( !using_kaks) alert("Can't compute distances because sequences are too divergent.\nAre they aligned?");
600 	  return NULL;
601 	}
602 	if (bootstrap_trees && compute_tbe) {
603 	  display_tree=TBE_Compute(display_tree,bootstrap_trees);
604 	}
605 	//add label to tree
606 	char *tree = (char *)malloc(strlen(tree_label) + strlen(display_tree) + 4);
607 	sprintf(tree, "[%s] %s", tree_label, display_tree);
608 	free(display_tree);
609 	if (box) box->window()->hide();//necessary under X11 so tree is in foreground
610   if (bootstrap_trees && keep_b_trees) {
611     treedraw(bootstrap_trees, view, "bootstrap trees", FALSE);
612   }
613   return tree;
614 }
615 
savedistinfile(matrix * mat,const char * dirname,const char * outfname)616 void savedistinfile(matrix *mat, const char *dirname, const char *outfname)
617 {
618 	FILE *out;
619 	int i, j;
620 	if (!outfname) {
621 	  Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
622 	  chooser->type(Fl_Native_File_Chooser::BROWSE_SAVE_FILE);
623 	  chooser->options(Fl_Native_File_Chooser::SAVEAS_CONFIRM | chooser->options());
624 	  chooser->title("Set distance output filename");
625 	  chooser->directory(dirname);
626 	  chooser->preset_file("seqs.dist");
627 	  outfname = run_and_close_native_file_chooser(chooser);
628 	  if(outfname == NULL) return;
629 	}
630 	out = fopen(outfname, "w");
631 	if(out == NULL) return;
632 	fputs("#distances order: d(1,2),...,d(1,n) <new line> d(2,3),...,d(2,n) <new line>...\n", out);
633 	fprintf(out, "%d\n", mat->n_otu);
634 	for( i = 0; i < mat->n_otu - 1; i++) {
635 		for( j = i + 1; j < mat->n_otu; j++) {
636 			fprintf(out, "%g ", mat->dist[i][j]);
637 			}
638 		fputs("\n", out);
639 		}
640 	for( i = 0; i < mat->n_otu ; i++) {
641 		fprintf(out, "%s ", mat->name[i]);
642 		}
643 	fputs("\n\n", out);
644   fputs("#pairwise distances\n", out);
645   for (i = 0; i < mat->n_otu; i++) {
646     for (j = i + 1; j < mat->n_otu; j++) {
647       char *n1 = mat->name[i];
648       char *n2 = mat->name[j];
649       if (strcmp(n1, n2) > 0) { char *p = n1; n1 = n2; n2 = p; }
650       fprintf(out, "%s,%s: %g\n", n1, n2, mat->dist[i][j]);
651     }
652   }
653   fclose(out);
654 }
655 
calc_dist_matrix(allseq * seqs,int distkind,int protein,char ** lwlseqs,int in_bootstrap,char ** p_err_mess)656 matrix *calc_dist_matrix(allseq *seqs, int distkind, int protein, char **lwlseqs, int in_bootstrap, char **p_err_mess)
657 {
658 	matrix *phyml_mat = NULL;
659 	if(distkind == observed_pdist || distkind == observed_ndist) {//observed
660 		model jcmodel;
661 		jcmodel.stepsize = 1;
662 		jcmodel.datatype = (protein ? 1 : 0);
663 		jcmodel.ns = (protein ? 20 : 4);
664 		phyml_mat = Obs_Dist(seqs, &jcmodel);
665 	}
666 	else if(distkind == Kimura_pdist ) {//Kimura protein distance
667 	  phyml_mat = Kimura_p_Dist(seqs);
668 	  if(phyml_mat == NULL) return NULL;
669 	}
670 	else if(distkind == Poisson_pdist || distkind == Jukes_Cantor) {//JC or Poisson
671 		model jcmodel;
672 		jcmodel.stepsize = 1;
673 		jcmodel.datatype = (protein ? 1 : 0);
674 		jcmodel.ns = (protein ? 20 : 4);
675 		phyml_mat = JC69_Dist(seqs, &jcmodel);
676 		if(phyml_mat == NULL) return NULL;
677 	}
678 	else if((distkind == Kimura2P) && ! protein) {//K2P
679 		phyml_mat = K80_dist_nogamma(seqs);
680 		if(phyml_mat == NULL) return NULL;
681 	}
682 	else if(distkind == HKY && ! protein) {//HKY
683 		phyml_mat = HKY_dist(seqs);
684 		if(phyml_mat == NULL) return NULL;
685 	}
686 	else if(distkind == LogDet && ! protein) {//LogDet
687 		phyml_mat = LOGDET_dist(seqs);
688 		if(phyml_mat == NULL) return NULL;
689 	}
690 	else if( (distkind == Ka || distkind == Ks) && ! protein) {//Ka or Ks
691 		int sat1, sat2, count = 0, err = 0, i, j;
692 		int l3 = 3*(seqs->clean_len / 3);//dismiss partial last codon
693 		*p_err_mess = NULL;
694 		static int first = TRUE;
695 		if(first) {
696 			first = FALSE;
697 			loadrl(); // needed once only
698 			prefastlwl();
699 			}
700 		phyml_mat = Make_Mat(seqs->n_otu);
701 		Init_Mat(phyml_mat, seqs);
702 // Ks distances can become saturated, re-run a bootstrap replicate in this case at most 50 times
703 		do {
704 			if(in_bootstrap && err) bootstrap_weights(seqs, TRUE );
705 			count++;
706 			if(distkind == Ka) //Ka
707 				err = fastlwl(lwlseqs, seqs->n_otu, l3, phyml_mat->dist, NULL, NULL, NULL, &sat1, &sat2, seqs->wght);
708 			else //Ks
709 				err = fastlwl(lwlseqs, seqs->n_otu, l3, NULL, phyml_mat->dist, NULL, NULL, &sat1, &sat2, seqs->wght);
710 			}
711 		while (in_bootstrap && err && count <=50);
712 		if (err) {
713 		    if (!in_bootstrap && !tree_build_interrupted) {
714 			char *err_mess = new char[200];
715 			snprintf(err_mess, 200, "Saturation between %s and %s", phyml_mat->name[sat1], phyml_mat->name[sat2]);
716 			*p_err_mess = err_mess;
717 		    }
718 		    Free_Mat(phyml_mat);
719 		    return NULL;
720 		    }
721 		for(i = 0; i < seqs->n_otu - 1; i++) {
722 			for(j = i + 1; j < seqs->n_otu; j++) {
723 				phyml_mat->dist[j][i] = phyml_mat->dist[i][j];
724 			}
725 		}
726 		for(i = 0; i < seqs->n_otu; i++) phyml_mat->dist[i][i] = 0;
727 	}
728 	return phyml_mat;
729 }
730 
731 /* a window where the return key always triggers a button
732  */
733 class return_window : public Fl_Window {
734   Fl_Return_Button *run_button;
735 public:
return_window(int w,int h,const char * t)736   return_window(int w, int h, const char *t) : Fl_Window(w, h, t) {}
run(Fl_Return_Button * b)737   void run(Fl_Return_Button *b) { run_button = b; }
handle(int event)738   int handle(int event) {
739     if (event == FL_KEYBOARD && (Fl::event_key() == FL_Enter || Fl::event_key() == FL_KP_Enter)) {
740       run_button->do_callback();
741       return 1;
742       }
743     return Fl_Window::handle(event);
744     }
745 };
746 
747 Fl_Input *name;
748 Fl_Choice *mymodel;
749 Fl_Round_Button *b_alrt; Fl_Round_Button *b_no; Fl_Int_Input *b_count;
750 Fl_Round_Button *f_empir;
751 Fl_Round_Button *r_estim; Fl_Float_Input *r_val;
752 Fl_Round_Button *i_no; Fl_Round_Button *i_estim; Fl_Float_Input *i_val;
753 Fl_Round_Button *g_no; Fl_Choice *g_cats; Fl_Round_Button *a_est; Fl_Float_Input *a_val;
754 Fl_Round_Button *nni_b, *spr_b, *nni_spr_b;
755 Fl_Round_Button *u_bionj, *u_menutree, *u_random;
756 Fl_Choice *u_choice;
757 Fl_Check_Button *u_optimize, *u_quiet, *b_keep_trees, *b_tbe;
758 Fl_Int_Input *u_random_count;
759 char *phyml_path;
760 
support_cb(Fl_Widget * wid,void * data)761 static void support_cb(Fl_Widget *wid, void *data)
762 {
763   Fl_Button *b = (Fl_Button*)data;
764   if (b->value()) {
765     b_keep_trees->activate();
766     b_tbe->activate();
767   }
768   else {
769     b_keep_trees->deactivate();
770     b_keep_trees->value(0);
771     b_tbe->deactivate();
772   }
773 }
774 
phyml_dialog(SEA_VIEW * view)775 void phyml_dialog(SEA_VIEW *view)
776 {
777 	int y = 3;
778 	static int first = TRUE;
779 	if(first) {
780 		char *p, *q;
781 #if defined(__APPLE__)
782 #  ifdef __ppc__
783           p = (char*)"phyml_3.0_universal";
784 #  else
785           p = (char*)(fl_mac_os_version < 100800 ? "phyml_3.0_universal" : "PhyML-3.1_macOS-MountainLion");
786 #  endif // __ppc__
787           if ((q = get_full_path(p)) == NULL) {
788             fl_message("Warning: PhyML executable not found. Please, download the seaview macOS application again.");
789             return;
790           }
791 #else
792 		p = get_res_value("phyml",
793 #  ifdef WIN32
794 					"PhyML-3.1_win32.exe"
795 #  elif defined(sparc)
796 						  "phyml_3.0_sparc"
797 #  else
798 #    ifndef PHYMLNAME
799 #      ifdef __i386__
800 #        define PHYMLNAME "PhyML-3.1_linux32"
801 #      else
802 #        define PHYMLNAME "PhyML-3.1_linux64"
803 #      endif // __i386__
804 #    endif // PHYMLNAME
805 					PHYMLNAME
806 #  endif // WIN32
807 		);
808 #if defined(__linux__)
809 	  if ((q = get_full_path(p)) == NULL) p = (char*)"phyml";
810 #endif // __linux__
811 		if ((q = get_full_path(p)) == NULL) {
812 			int rep = fl_choice("Seaview does not find PhyML under the name %s.\n"
813 					  "Is PhyML available on your computer ?", "No", "Available", NULL, p);
814 			if(rep == 0) return;
815 			Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
816 			chooser->type(Fl_Native_File_Chooser::BROWSE_FILE);   // let user browse a single file
817 			chooser->title("Find PhyML on your computer");
818 			q = run_and_close_native_file_chooser(chooser);
819 			if(q == NULL) return;
820 			set_res_value("phyml", q);
821 			save_resources();
822 			}
823 #endif // __APPLE__
824 		first = FALSE;
825 		phyml_path = (char *)malloc(strlen(q) + 3);
826 		sprintf(phyml_path, "\"%s\"", q);
827 		}
828 	return_window *w = new return_window(270,130, "PhyML options");
829 	w->set_modal();
830 	//model
831 	mymodel = new Fl_Choice(50, y, 90, 20, "Model:");
832 	mymodel->align(FL_ALIGN_LEFT);
833 	if(view->protein) {
834 		mymodel->add("LG");
835 		mymodel->add("WAG");
836 		mymodel->add("Dayhoff");
837 		mymodel->add("JTT");
838 		mymodel->add("Blosum62");
839 		mymodel->add("MtREV");
840 		mymodel->add("RtREV");
841 		mymodel->add("CpREV");
842 		mymodel->add("DCMut");
843 		mymodel->add("VT");
844 		mymodel->add("MtMam");
845 		mymodel->add("MtArt");
846 		mymodel->add("HIVw");
847 		mymodel->add("HIVb");
848 	  mymodel->add("FLU");
849 		mymodel->value(0);
850 		}
851 	else {
852 		mymodel->add("JC69");
853 		mymodel->add("K80");
854 		mymodel->add("F81");
855 		mymodel->add("HKY85");
856 		mymodel->add("F84");
857 		mymodel->add("TN93");
858 		mymodel->add("GTR");
859 		mymodel->value(6);
860 		}
861 	y += mymodel->h() + 25;
862 	//bootstrap
863 	Fl_Group *b_group = new Fl_Group(0, y, w->w(), 95, "Branch Support");
864 	b_group->box(FL_ROUNDED_BOX);
865 	b_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
866 	y += 5;
867 	b_alrt = new Fl_Round_Button(3, y, 110, 20, "aLRT (SH-like)");
868 	b_alrt->type(FL_RADIO_BUTTON);
869 	b_no = new Fl_Round_Button(b_alrt->x() + b_alrt->w() + 10, y, 60, 20, "None");
870 	b_no->type(FL_RADIO_BUTTON);
871 	y += b_alrt->h() + 3;
872 	Fl_Round_Button *b_yes = new Fl_Round_Button(3, y, 115, 20, "Bootstrap with");
873 	b_yes->type(FL_RADIO_BUTTON);
874 	b_alrt->setonly();
875 	b_count = new Fl_Int_Input(b_yes->x() + b_yes->w() + 5, y, 50, 20, "replicates");
876 	b_count->align(FL_ALIGN_RIGHT);
877 	b_count->static_value("100");
878 
879   y += b_count->h();
880   b_tbe = new Fl_Check_Button(b_yes->x(), y, 265, 20, "Transfer Bootstrap Expectation method");
881   b_tbe->deactivate();
882   y += b_tbe->h();
883   b_keep_trees = new Fl_Check_Button(b_yes->x(), y, 180, 20, "Show bootstrap trees");
884   b_keep_trees->deactivate();
885 
886   b_alrt->callback(support_cb, b_yes);
887   b_no->callback(support_cb, b_yes);
888   b_yes->callback(support_cb, b_yes);
889 	b_group->end();
890 	y += b_count->h() + 30;
891   // freq + Ts/Tv group
892   Fl_Group *fr_group = new Fl_Group(0, y, w->w(), 75, "");
893   // frequencies group
894   Fl_Group *f_group = new Fl_Group(0, y, w->w(), 30, view->protein?"Amino acid equilibrium frequencies":"Nucleotide equilibrium frequencies");
895   f_group->box(FL_ROUNDED_BOX);
896   f_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
897   y += 5;
898   f_empir = new Fl_Round_Button(3, y, 85, 20, "Empirical");
899   f_empir->type(FL_RADIO_BUTTON);
900   Fl_Round_Button *f_m = new Fl_Round_Button(f_empir->x() + f_empir->w() + 5, y, 60, 20, view->protein?"Model-given":"Optimized");
901   f_m->type(FL_RADIO_BUTTON);
902   (view->protein?f_m:f_empir)->setonly();
903   f_group->end();
904   y += f_group->h() + 15;
905 	//Ts/Tv ratio
906 	Fl_Group *r_group = new Fl_Group(0, y, w->w(), 30, "Ts/Tv ratio");
907 	r_group->box(FL_ROUNDED_BOX);
908 	r_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
909 	y += 5;
910 	r_estim = new Fl_Round_Button(3, y, 85, 20, "Optimized");
911 	r_estim->type(FL_RADIO_BUTTON);
912 	Fl_Round_Button *r_fix = new Fl_Round_Button(r_estim->x() + r_estim->w() + 5, y, 60, 20, "Fixed");
913 	r_fix->type(FL_RADIO_BUTTON);
914 	r_fix->setonly();
915 	r_val = new Fl_Float_Input(r_fix->x() + r_fix->w() + 10, y, 40, 20, "");
916 	r_val->value("4.0");
917 	r_group->end();
918 	r_group->deactivate();
919 	if (view->protein) r_group->hide();
920 	else y += r_group->h() + 15;
921   fr_group->end();
922   if (!view->protein) mymodel->callback(model_callback, fr_group);
923 	//invariable
924 	Fl_Group *i_group = new Fl_Group(0, y, w->w(), 30, "Invariable sites");
925 	i_group->box(FL_ROUNDED_BOX);
926 	i_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
927 	y += 5;
928 	i_no = new Fl_Round_Button(3, y, 52, 20, "None");
929 	i_no->type(FL_RADIO_BUTTON);
930 	i_estim = new Fl_Round_Button(i_no->x() + i_no->w() + 5, y, 85, 20, "Optimized");
931 	i_estim->type(FL_RADIO_BUTTON);
932 	Fl_Round_Button *i_fix = new Fl_Round_Button(i_estim->x() + i_estim->w() + 2, y, 60, 20, "Fixed");
933 	i_fix->type(FL_RADIO_BUTTON);
934 	i_no->setonly();
935 	i_val = new Fl_Float_Input(i_fix->x() + i_fix->w() + 10, y, 40, 20, "");
936 	i_val->value("0.10");
937 	i_group->end();
938 	y += i_group->h() + 15;
939 	//gamma categories
940 	Fl_Group *g_group = new Fl_Group(0, y, w->w(), 55, "Across site rate variation");
941 	g_group->box(FL_ROUNDED_BOX);
942 	g_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
943 	y += 5;
944 	g_no = new Fl_Round_Button(3, y, 75, 20, "None");
945 	g_no->type(FL_RADIO_BUTTON);
946 	g_cats = new Fl_Choice(g_no->x() + g_no->w() + 130, y, 50, 20, "# of rate categories");
947 	mymodel->align(FL_ALIGN_LEFT);
948 	g_cats->add("4");
949 	g_cats->add("6");
950 	g_cats->add("8");
951 	g_cats->add("10");
952 	g_cats->add("12");
953 	g_cats->value(0);
954 	y = g_cats->y() + g_cats->h() + 5;
955 	a_est = new Fl_Round_Button(3, y, 85, 20, "Optimized");
956 	a_est->type(FL_RADIO_BUTTON);
957 	a_est->setonly();
958 	Fl_Round_Button *a_fixed = new Fl_Round_Button(a_est->x() + a_est->w() + 5, y, 60, 20, "Fixed");
959 	a_fixed->type(FL_RADIO_BUTTON);
960 	a_val = new Fl_Float_Input(a_fixed->x() + a_fixed->w() + 10, y, 40, 20, "");
961 	a_val->value("2.0");
962 	g_group->end();
963 	y += a_val->h() + 25;
964 	//tree searching
965 	Fl_Group *s_group = new Fl_Group(0, y, w->w(), 35, "Tree searching operations");
966 	s_group->box(FL_ROUNDED_BOX);
967 	s_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
968 	y += 5;
969 	nni_b = new Fl_Round_Button(3, y, 45, 20, "NNI");
970 	nni_b->type(FL_RADIO_BUTTON);
971 	spr_b = new Fl_Round_Button(nni_b->x() + nni_b->w() + 5, y, 50, 20, "SPR");
972 	spr_b->type(FL_RADIO_BUTTON);
973 	nni_spr_b = new Fl_Round_Button(spr_b->x() + spr_b->w() + 5, y, 150, 20, "Best of NNI && SPR");
974 	nni_spr_b->type(FL_RADIO_BUTTON);
975 	nni_b->setonly();
976 	nni_b->callback(change_searching_callback, NULL);
977 	spr_b->callback(change_searching_callback, NULL);
978 	nni_spr_b->callback(change_searching_callback, NULL);
979 	s_group->end();
980 	y = s_group->y() + s_group->h() + 5;
981 	//starting tree
982 	y += 15;
983 	Fl_Group *u_group = new Fl_Group(0, y, w->w(), 80, "Starting tree");
984 	u_group->box(FL_ROUNDED_BOX);
985 	u_group->align(FL_ALIGN_TOP|FL_ALIGN_CENTER);
986 	y += 5;
987 	u_bionj = new Fl_Round_Button(3, y, 45, 20, "BioNJ");
988 	u_bionj->type(FL_RADIO_BUTTON);
989 	u_bionj->setonly();
990 	u_optimize = new Fl_Check_Button(u_bionj->x() + u_bionj->w() + 30, y, 170, 20, "Optimize tree topology");
991 	u_optimize->value(1);
992 	y += u_bionj->h() + 5;
993 	u_menutree = new Fl_Round_Button(3, y, 95, 20, "User given:");
994 	u_menutree->type(FL_RADIO_BUTTON);
995 	u_choice = new Fl_Choice(u_menutree->x() + u_menutree->w() + 5, y, 100, 20, "");
996 	vlength_menu *vmenu = view->menu_trees;
997 	int count = view->menu_trees->vlength();
998 	for(int i = 0; i < count; i++) u_choice->add( vmenu->vitem(i)->label() );
999 	u_choice->value(0);
1000 	if(count <= 0) {u_menutree->deactivate(); u_choice->deactivate(); }
1001 	u_random = new Fl_Round_Button(3, u_menutree->y() + u_menutree->h() + 5, 80, 20, "Add: ");
1002 	u_random->deactivate();
1003 	u_random->type(FL_RADIO_BUTTON);
1004 	u_random_count = new Fl_Int_Input(u_random->x() + u_random->w(), u_random->y(), 35, 20, "random starts");
1005 	u_random_count->align(FL_ALIGN_RIGHT);
1006 	u_random_count->value("5");
1007 	u_group->end();
1008 	y = u_group->y() + u_group->h() + 5;
1009 	// Run
1010 	Fl_Button *cancel = new Fl_Button(3, y, 50, 20, "Cancel");
1011 	cancel->callback(cancel_callback, NULL);
1012 	Fl_Return_Button *ok = new Fl_Return_Button(w->w() - 60, y, 55, 20, "Run");
1013 	w->run(ok);
1014 	ok->callback(run_phyml_callback, view);
1015   u_quiet = new Fl_Check_Button(cancel->x() + cancel->w() + 40, y, 65, 20, "Quiet");
1016   u_quiet->value(0);
1017 #ifndef WIN32
1018   //u_quiet->hide();
1019 #endif
1020 	w->end();
1021 	w->size(w->w(), y + cancel->h() + 5);
1022 	w->callback(cancel_callback, NULL);
1023 	w->show();
1024 }
1025 
change_searching_callback(Fl_Widget * ob,void * unused)1026 static void change_searching_callback(Fl_Widget *ob, void *unused)
1027 {
1028 	if(ob == nni_b) u_random->deactivate();
1029 	else u_random->activate();
1030 }
1031 
cancel_callback(Fl_Widget * ob,void * data)1032 static void cancel_callback(Fl_Widget *ob, void *data)
1033 {
1034 	Fl_Window *w = ob->window();
1035 	if(w == NULL) w = (Fl_Window *)ob;
1036 	w->hide();
1037 	Fl::delete_widget(w);
1038 }
1039 
model_callback(Fl_Widget * ob,void * data)1040 static void model_callback(Fl_Widget *ob, void *data)
1041 {
1042   Fl_Choice *c = (Fl_Choice *)ob;
1043   Fl_Group *fr_group = (Fl_Group*)data;
1044   Fl_Group *f_group = (Fl_Group *)fr_group->child(0);
1045   Fl_Group *r_group = (Fl_Group *)fr_group->child(1);
1046   int v = c->value();
1047   if (v >=0 && v <= 2) f_group->deactivate(); else f_group->activate();
1048   if (v == 1 || v == 3 || v == 4 || v == 5) r_group->activate();
1049   else r_group->deactivate();
1050 }
1051 
1052 
run_phyml_callback(Fl_Widget * ob,void * data)1053 void run_phyml_callback(Fl_Widget *ob, void *data)
1054 {
1055   FILE *in;
1056   SEA_VIEW *view = (SEA_VIEW *)data;
1057   char input[PATH_MAX], base_fname[PATH_MAX], invar[10], alpha[10], *p;
1058   int replicates, cats, status, seqlen, user_tree = FALSE, i;
1059   float logL;
1060   char **tmpseqs, **tmpnames, **truenames;
1061   int tot_seqs = view->tot_seqs;
1062 
1063   p = create_tmp_filename();
1064   if(p == NULL) return;
1065   strcpy(base_fname, p);
1066   strcpy(input, base_fname);
1067   strcat(input, ".phy");
1068   region r, *pr = &r;
1069   list_segments ls;
1070   if(view->tot_sel_seqs > 0 && view->active_region == NULL) {
1071 	  ls.debut = 1; ls.fin = view->seq_length; ls.next = NULL;
1072 	  r.list = &ls;
1073 	  }
1074   else {
1075 	  pr = view->active_region;
1076 	  }
1077   if(pr != NULL) {
1078 	  list_segments *pls;
1079 	  pls = pr->list;
1080 	  seqlen = 0;
1081 	  while(pls != NULL) {
1082 		  seqlen += pls->fin - pls->debut + 1;
1083 		  pls = pls->next;
1084 	  }
1085   }
1086   else seqlen = view->seq_length;
1087   //remove * from saved protein data
1088   if(view->protein) {
1089 	  tmpseqs = (char **)calloc(view->tot_seqs, sizeof(char *));
1090 	  for( i = 0; i < view->tot_seqs; i++) {
1091 		  if(view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
1092 		  tmpseqs[i] = strdup(view->sequence[i]);
1093 		  char *p = tmpseqs[i];
1094 		  while((p = strchr(p, '*')) != NULL) *p = '-';
1095 		  }
1096 	  }
1097   else tmpseqs = view->sequence;
1098   //Use Seq##_ as sequence names
1099   tmpnames = (char **)calloc(view->tot_seqs, sizeof(char *));
1100   //memorize seq names because can be changed while phyml runs
1101   truenames = (char **)calloc(view->tot_seqs, sizeof(char *));
1102   int length = 0;
1103   for(i = 0; i < view->tot_seqs; i++) {
1104     if(view->tot_sel_seqs > 0 && !view->sel_seqs[i]) continue;
1105     tmpnames[i] = (char *)malloc(15);
1106     truenames[i] = strdup(view->seqname[i]);
1107     sprintf(tmpnames[i], "Seq%d_", i);
1108     length = FL_max(length, (int)strlen(tmpnames[i]));
1109     }
1110   status = save_phylip_file(input, tmpseqs,
1111 				   tmpnames, view->tot_seqs, view->each_length, pr,
1112 				   view->sel_seqs, view->tot_sel_seqs, length);
1113   for(i = 0; i < view->tot_seqs; i++) if(tmpnames[i] != NULL) free(tmpnames[i]);
1114   free(tmpnames);
1115   if(view->protein) {
1116 	  for( i = 0; i < view->tot_seqs; i++) {
1117 		  if(tmpseqs[i] != NULL) free(tmpseqs[i]);
1118 		  }
1119 	  free(tmpseqs);
1120 	  }
1121   if(status) goto way_out;
1122   if(b_alrt->value()) replicates = -4;
1123   else if (b_no->value()) replicates = 0;
1124   else sscanf(b_count->value(), "%d", &replicates);
1125   if (!u_optimize->value()) replicates = 0;
1126   if(i_no->value()) strcpy(invar, "0.0");
1127   else if(i_estim->value()) strcpy(invar, "e");
1128   else strcpy(invar, i_val->value());
1129   if(g_no->value()) { cats = 1; strcpy(alpha, "2.0"); }
1130   else {
1131 	  sscanf(g_cats->mvalue()->label(), "%d", &cats);
1132 	  if(a_est->value())  strcpy(alpha, "e");
1133 	  else strcpy(alpha, a_val->value());
1134 	  }
1135   char modelname[20], *args;
1136   strcpy(modelname, mymodel->mvalue()->label());
1137   //build command line
1138   args = (char *)malloc(1000);
1139   sprintf(args, "%s -d %s -m %s -b %d -v %s -c %d -a %s",
1140 		  phyml_path, view->protein?"aa":"nt", modelname, replicates, invar, cats, alpha);
1141   if( (!view->protein) &&
1142   (strcmp(modelname,"K80")==0 || strcmp(modelname,"HKY85")==0 || strcmp(modelname,"F84")==0 || strcmp(modelname,"TN93")==0) ) {
1143   if(r_estim->value()) {
1144 	  strcat(args, " -t e");
1145 	  }
1146   else {
1147 	  sprintf(args + strlen(args), " -t %s", r_val->value());
1148 	  }
1149   }
1150   if( (view->protein) ||
1151      (strcmp(modelname,"JC69")!=0 && strcmp(modelname,"K80")!=0 && strcmp(modelname,"F81")!=0) ) {
1152     if(f_empir->value()) {
1153       strcat(args, " -f e");
1154     }
1155     else {
1156       strcat(args, " -f m");
1157     }
1158   }
1159   if(spr_b->value()) strcat(args, " -s SPR");
1160   else if(nni_spr_b->value()) strcat(args, " -s BEST");
1161   if(!u_optimize->value()) { strcat(args, " -o lr"); user_tree = TRUE; }
1162   if (u_quiet->value()) strcat(args, " --quiet");
1163   else                  strcat(args, " --no_memory_check");
1164   if(u_menutree->value()) {
1165 	   char *tree = view->trees[u_choice->value()];
1166 	   if(*tree == '[' && (p = strchr(tree, ']')) != NULL) { //remove tree header
1167 		   tree = p + 1;
1168 		   while(*tree == ' ') tree++;
1169 		   }
1170 	   tree = replace_tree_names_by_rank(tree, view->seqname, view->tot_seqs);
1171 	  if(tree == NULL) {
1172 		  fl_alert("Names in starting tree and alignment don't match.\nStarting tree will be ignored.");
1173 		  }
1174 	  else {
1175 		  char tmp[PATH_MAX];
1176 		  sprintf(tmp, "%s.startingtree", base_fname);
1177 		   FILE *out = fopen(tmp, "w");
1178 		   fputs(tree, out);
1179 		   free(tree);
1180 		   fclose(out);
1181 		   sprintf(args + strlen(args), " -u \"%s\"", tmp);
1182 		  }
1183    }
1184   if(u_random->value()) {
1185 		  sprintf(args + strlen(args), " --rand_start --n_rand_starts %d", atoi(u_random_count->value()));
1186 		  }
1187   //finish with (long) input filename
1188   sprintf(args + strlen(args), " -i \"%s\"", input);
1189   ob->window()->hide(); //deletion of this window should be done totally at the end of this callback
1190   status = run_external_prog_in_pseudoterm(args, NULL, "tree-building");
1191   free(args);
1192   sprintf(input, "%s.phy_phyml_stats.txt", base_fname);
1193   in = fopen(input, "r");
1194   if (in == NULL) {
1195     sprintf(input, "%s.phy_phyml_stat.txt", base_fname);
1196     in = fopen(input, "r");
1197     }
1198   if (in == NULL) {
1199     sprintf(input, "%s.phy_phyml_stats", base_fname);
1200     in = fopen(input, "r");
1201     }
1202   logL = 0;
1203   if(in != NULL) {//search tree likelihood
1204 	  while(TRUE) {
1205 		  char line[100];
1206 		  p = fgets(line, sizeof(line), in);
1207 		  if(p == NULL) break;
1208 		  if( (p=strstr(line, "Log-likelihood:")) != NULL) sscanf(p+15, "%f", &logL);
1209 		  else if( (p=strstr(line, "Log-likelihood :")) != NULL) sscanf(p+16, "%f", &logL);
1210 		  }
1211 	  fclose(in);
1212 	  }
1213   sprintf(input, "%s.phy_phyml_tree.txt", base_fname);
1214   in = fopen(input, "r");
1215   if (in == NULL) {
1216     sprintf(input, "%s.phy_phyml_tree", base_fname);
1217     in = fopen(input, "r");
1218     }
1219   if( status != 0 || in == NULL ) {
1220 	  if(in != NULL) fclose(in);
1221 	  if (is_view_valid(view)) fl_message("%s: tree building cancelled", extract_filename(view->masename));
1222 	  }
1223   else {
1224 	  char *display_tree;
1225 	  fseek(in, 0, SEEK_END);
1226 	  long l = ftell(in);
1227 	  display_tree = (char *)malloc(l + 1);
1228 	  int c; p = display_tree;
1229 	  fseek(in, 0, SEEK_SET);
1230 	  while( (c = getc(in)) != EOF) {
1231 		  if(c != '\n' && c != '\r') *(p++) = c;
1232 		  }
1233 	  *p = 0;
1234 	  fclose(in);
1235 	  display_tree = put_names_back_in_tree(display_tree, truenames);
1236     //process bootstrap trees
1237     char *bootstrap_trees = NULL;
1238     if (replicates > 0) {
1239       sprintf(input, "%s.phy_phyml_boot_stats.txt", base_fname);
1240       in = fopen(input, "r");
1241       if (!in) {
1242         sprintf(input, "%s.phy_phyml_boot_stats", base_fname);
1243         in = fopen(input, "r");
1244       }
1245       if (in) { // compute the true # of bootstrap replicates because parallelized phyML can compute a few more
1246         char line[150];
1247         while ((p = fgets(line, sizeof(line), in))) {
1248           while (*p == ' ') p++;
1249           if (*p == '#') {
1250             sscanf(p+1, "%d", &replicates);
1251           }
1252         }
1253         fclose(in);
1254       }
1255     }
1256     if (replicates > 0 && (b_keep_trees->value() || b_tbe->value())) {
1257       sprintf(input, "%s.phy_phyml_boot_trees.txt", base_fname);
1258       in = fopen(input, "r");
1259       if (!in) {
1260       	sprintf(input, "%s.phy_phyml_boot_trees", base_fname);
1261       	in = fopen(input, "r");
1262       }
1263       if (in) {
1264         FILE *out = NULL;
1265         sprintf(input, "%s.phy_phyml_boot_trees_truename.txt", base_fname);
1266         out = fopen(input, "w");
1267         long l = 1000;
1268         while (out) { // for all bootstrap trees
1269           char *b_tree, *q, *tree2;
1270           int c;
1271           b_tree = (char*)malloc(l + 1);
1272           q = b_tree;
1273           do { // read one line = one tree
1274             c = fgetc(in);
1275             if (c == EOF) {
1276               free(b_tree);
1277               break;
1278             }
1279             if (q - b_tree >= l) {
1280               l += 1000;
1281               tree2 = (char *)realloc(b_tree, l + 1);
1282               q = tree2 + (q - b_tree);
1283               b_tree = tree2;
1284             }
1285             *q++ = c;
1286           } while (c != '\n');
1287           if (c == EOF) break;
1288           *q = 0;
1289           b_tree = put_names_back_in_tree(b_tree, truenames);
1290           fputs(b_tree, out);
1291           free(b_tree);
1292         }
1293         fclose(in);
1294         if (out) {
1295           fclose(out);
1296           in = fopen(input, "r");
1297           fseek(in, 0, SEEK_END);
1298           l = ftell(in);
1299           fseek(in, 0, SEEK_SET);
1300           bootstrap_trees = (char *)malloc(l+1);
1301           fread(bootstrap_trees, l, 1, in);
1302           bootstrap_trees[l] = 0;
1303           fclose(in);
1304         }
1305       }
1306     }
1307 	  //clean tree internal labels
1308     char *display_tree_tbe = NULL;
1309     if(replicates > 0) {
1310       if (b_tbe->value()){
1311         char *tmp = strdup(display_tree);
1312         display_tree_tbe = TBE_Compute(tmp, bootstrap_trees);
1313       }
1314       display_tree = bootstrap_reformat(display_tree, replicates);
1315       if(bootstrap_trees && !b_keep_trees->value()) {
1316         free(bootstrap_trees);
1317         bootstrap_trees = NULL;
1318       }
1319     }
1320 	  else if(replicates < 0) display_tree = alrt_reformat(display_tree);
1321 
1322 	  //assemble tree description
1323 	  char tree_label[100];
1324 	  sprintf(tree_label, "PhyML ln(L)=%.1f %d sites %s", logL, seqlen, modelname);
1325 	  if(replicates > 0) {
1326       sprintf(tree_label + strlen(tree_label), " %d replic.", replicates);
1327 	  }
1328 	  if(cats > 1) sprintf(tree_label + strlen(tree_label), " %d rate classes", cats);
1329 	  if(user_tree) strcat(tree_label, " User-tree");
1330 	  //add label to tree
1331 	  char *tree = (char *)malloc(strlen(tree_label) + strlen(display_tree) + 4);
1332 	  sprintf(tree, "[%s] %s", tree_label, display_tree);
1333 	  free(display_tree);
1334 
1335     if (replicates > 0 && b_tbe->value()) {
1336       //assemble TBE tree description
1337       char tree_label_tbe[100];
1338       sprintf(tree_label_tbe, "PhyML ln(L)=%.1f %d sites %s", logL, seqlen, modelname);
1339       sprintf(tree_label_tbe + strlen(tree_label_tbe), " %d replic. (TBE)", replicates);
1340       if(cats > 1) sprintf(tree_label_tbe + strlen(tree_label_tbe), " %d rate classes", cats);
1341       if(user_tree) strcat(tree_label_tbe, " User-tree");
1342       //add label to tree
1343       char *tree = (char *)malloc(strlen(tree_label_tbe) + strlen(display_tree_tbe) + 4);
1344       sprintf(tree, "[%s] %s", tree_label_tbe, display_tree_tbe);
1345       free(display_tree_tbe);
1346       display_tree_tbe = tree;
1347     }
1348 
1349 	  if (is_view_valid(view)) {
1350 	    const char *n = extract_filename(view->masename);
1351 	    char *title = NULL;
1352 	    if (n) {
1353 	      char *q = (char*)strchr(n, '.');
1354 	      if (q) *q = 0;
1355 	      title = new char[strlen(n) + 12];
1356 	      sprintf(title, "%s-%s", n, "PhyML_tree");
1357 	      if (q) *q = '.';
1358 	    }
1359       Fl_Window *btreew=0, *treew=0;
1360       if (replicates > 0 && b_keep_trees->value()) {
1361         btreew = treedraw(bootstrap_trees, view, "bootstrap trees", FALSE);
1362       }
1363       if (replicates > 0 && b_tbe->value()) {
1364         const char *p = n ? title:"PhyML_tree";
1365         char *q = (char*)malloc(strlen(p) + 5);
1366         sprintf(q, "%s_TBE", p);
1367         treedraw(display_tree_tbe, view, q, FALSE);
1368       }
1369 	    treew = treedraw(tree, view, n ? title:"PhyML_tree", FALSE);
1370 #if !defined(__APPLE__)
1371       if (treew && btreew) treew->position(btreew->x() + 30, btreew->y() + 40);
1372 #endif
1373 	    delete[] title;
1374 	  }
1375 	}
1376   Fl::delete_widget(ob->window());
1377 way_out:
1378   for(i = 0; i < tot_seqs; i++) {
1379     if(truenames[i]) free(truenames[i]);
1380     }
1381   free(truenames);
1382   delete_tmp_filename(base_fname);
1383 }
1384 
1385 
view_to_allseq(SEA_VIEW * view,int remove_all_gaps)1386 allseq *view_to_allseq(SEA_VIEW *view, int remove_all_gaps)
1387 {
1388 	int i, j, l;
1389 	list_segments *ls;
1390 	char *p;
1391 	allseq *phyml_seqs = (allseq *)calloc(1, sizeof(allseq));
1392 	phyml_seqs->n_otu = view->tot_sel_seqs == 0 ? view->tot_seqs : view->tot_sel_seqs;
1393 	phyml_seqs->c_seq = (struct __Seq **)calloc(phyml_seqs->n_otu, sizeof(struct __Seq*));
1394 	l = 0;
1395 	if(view->active_region == NULL) {
1396 		for( i = 0; i < view->tot_seqs; i++) {
1397 			if(view->tot_sel_seqs != 0 && !view->sel_seqs[i]) continue;
1398 			if(view->each_length[i] > l) l = view->each_length[i];
1399 		}
1400 	}
1401 	else {
1402 		ls = view->active_region->list;
1403 		while(ls != NULL) {
1404 			l += ls->fin - ls->debut + 1;
1405 			ls = ls->next;
1406 			}
1407 		}
1408 	phyml_seqs->clean_len = l;
1409 	phyml_seqs->wght = (int *)calloc(1, l * sizeof(int));
1410 	for( i = 0; i < l; i++) {
1411 		phyml_seqs->wght[i] = 1;
1412 		}
1413 	j = 0;
1414 	for( i = 0; i < view->tot_seqs; i++) {
1415 		if(view->tot_sel_seqs != 0 && !view->sel_seqs[i]) continue;
1416 		phyml_seqs->c_seq[j] = (struct __Seq *)calloc(1, sizeof(struct __Seq));
1417 		phyml_seqs->c_seq[j]->name = view->seqname[i];
1418 		phyml_seqs->c_seq[j]->len = l;
1419 		phyml_seqs->c_seq[j]->state = (char *)malloc(l + 1);
1420 		if(view->active_region == NULL) {
1421 			memcpy(phyml_seqs->c_seq[j]->state, view->sequence[i], view->each_length[i] );
1422 			if( l > view->each_length[i] ) {
1423 				memset(phyml_seqs->c_seq[j]->state + view->each_length[i], '-', l - view->each_length[i]);
1424 			}
1425 		}
1426 		else {
1427 			ls = view->active_region->list;
1428 			p = phyml_seqs->c_seq[j]->state;
1429 			while(ls != NULL) {
1430 				if(ls->fin <= view->each_length[i]) {
1431 					memcpy(p, view->sequence[i] + ls->debut - 1, ls->fin - ls->debut + 1 );
1432 					}
1433 				else {
1434 					int lrem = view->each_length[i] - ls->debut + 1;
1435 					if(lrem > 0) memcpy(p, view->sequence[i] + ls->debut - 1, lrem );
1436 					if(lrem < 0) lrem = 0;
1437 					memset(p + lrem, '-', ls->fin - ls->debut + 1 - lrem);
1438 					}
1439 				p += ls->fin - ls->debut + 1;
1440 				ls = ls->next;
1441 			}
1442 		}
1443 		phyml_seqs->c_seq[j]->state[l] = 0;
1444 		majuscules(phyml_seqs->c_seq[j]->state);
1445 		p = phyml_seqs->c_seq[j]->state;
1446 		if(!view->protein) { while((p = strchr(p, 'U')) != NULL) *p = 'T'; }
1447 		else { while((p = strchr(p, '*')) != NULL) *p = '-'; }//replace stops by gaps
1448 		if (!view->protein) { // remove non-nucleotide characters
1449 		  p = phyml_seqs->c_seq[j]->state;
1450 		  while (*p) {
1451 		    if (strchr("ABCDGHKMNRSTUVWXY-", *p) == NULL) *p = 'N';
1452 		    p++;
1453 		  }
1454 		}
1455 		j++;
1456 	}
1457 	//remove gap-only or gap-with sites
1458 	for( j = 0; j < phyml_seqs->clean_len; j++) {
1459 		if(remove_all_gaps) {//remove any gap-containing site
1460 			for( i = 0; i < phyml_seqs->n_otu; i++) {
1461 				if(phyml_seqs->c_seq[i]->state[j] == '-')  break;
1462 			}
1463 			if(i == phyml_seqs->n_otu) continue;
1464 		}
1465 		else {//remove gap-only sites
1466 			for( i = 0; i < phyml_seqs->n_otu; i++) {
1467 				if(phyml_seqs->c_seq[i]->state[j] != '-')  break;
1468 			}
1469 			if(i != phyml_seqs->n_otu) continue;
1470 		}
1471 		for( i = 0; i < phyml_seqs->n_otu; i++) {
1472 			memmove(phyml_seqs->c_seq[i]->state + j, phyml_seqs->c_seq[i]->state + j + 1,
1473 					phyml_seqs->clean_len - j);
1474 			}
1475 		j--; phyml_seqs->clean_len--;
1476 	}
1477 	return phyml_seqs;
1478 }
1479 
1480 
gaps_as_unknown_states(allseq * phyml_seqs,int protein)1481 void gaps_as_unknown_states(allseq *phyml_seqs, int protein)
1482 {
1483   char unknown = (protein ? 'X' : 'N');
1484   for( int i = 0; i < phyml_seqs->n_otu; i++) {
1485 	for( int j = 0; j < phyml_seqs->clean_len; j++) {
1486 	  if(phyml_seqs->c_seq[i]->state[j] == '-') phyml_seqs->c_seq[i]->state[j] = unknown;
1487 	  }
1488 	}
1489 }
1490 
free_after_view_to_allseq(allseq * phyml_seqs)1491 void free_after_view_to_allseq(allseq *phyml_seqs)
1492 {
1493 	int i;
1494 	for( i = 0; i < phyml_seqs->n_otu; i++) {
1495 		free(phyml_seqs->c_seq[i]->state);
1496 		free(phyml_seqs->c_seq[i]);
1497 	}
1498 	free(phyml_seqs->c_seq);
1499 	free(phyml_seqs->wght);
1500 	free(phyml_seqs);
1501 }
1502 
1503 /*void initialize_random_series(void)
1504 {
1505   static int first = TRUE;
1506   unsigned seed;
1507   if(first) {
1508     first = FALSE;
1509 #ifdef WIN32
1510     SYSTEMTIME timing;
1511     GetSystemTime(&timing);
1512     seed = (unsigned)(timing.wMilliseconds + 1000*(timing.wSecond + 60*(timing.wMinute + 60*(timing.wHour + 24*timing.wDay))));
1513 #else
1514     struct tms timing;
1515     seed = (unsigned)times(&timing);
1516 #endif
1517     srand(seed);
1518   }
1519 }*/
1520 
1521 
next_random(void)1522 double next_random(void)
1523 {
1524   static int first = TRUE;
1525   unsigned seed;
1526   if (first) {
1527     first = FALSE;
1528 #ifdef WIN32
1529     SYSTEMTIME timing;
1530     GetSystemTime(&timing);
1531     seed = (unsigned)(timing.wMilliseconds + 1000*(timing.wSecond + 60*(timing.wMinute + 60*(timing.wHour + 24*timing.wDay))));
1532     srand(seed);
1533 #else
1534     struct tms timing;
1535     seed = (unsigned)times(&timing);
1536     srandom(seed);
1537 #endif
1538   }
1539 
1540 #ifdef WIN32
1541   return rand() / (RAND_MAX +(double)1.);
1542 #else
1543   return random() / (RAND_MAX +(double)1.);
1544 #endif
1545 }
1546 
bootstrap_weights(allseq * seqs,int use_codons)1547 void bootstrap_weights(allseq *seqs, int use_codons)
1548 {
1549 	phydbl buff;
1550 	int j, position;
1551 	//initialize_random_series();
1552 	memset(seqs->wght, 0, seqs->clean_len * sizeof(int));
1553 	if(!use_codons) {//bootstrap sites
1554 		for(j = 0; j < seqs->clean_len; j++) {
1555 			/*buff  = rand();
1556 			buff /= (RAND_MAX+1.);*/
1557 		  buff = next_random();
1558 			buff *= seqs->clean_len;
1559 			position = (int)floor(buff);
1560 			seqs->wght[position] += 1;
1561 			}
1562 		}
1563 	else {//bootstrap codons
1564 		int ncodons = (seqs->clean_len/3);
1565 		int l3 = 3*ncodons;
1566 		for(j = 0; j < l3; j += 3) {
1567 			/*buff  = rand();
1568 			buff /= (RAND_MAX+1.);*/
1569 		  buff = next_random();
1570 			buff *= ncodons;
1571 			position = (int)floor(buff);
1572 			seqs->wght[3 * position] += 1;//only 0, 3, 6,... have weights
1573 			}
1574 		}
1575 }
1576 
1577 
Obs_Dist(allseq * data,model * mod)1578 matrix *Obs_Dist(allseq *data, model *mod)
1579 {
1580 	int site,i,j,k;
1581 	phydbl unc_len;
1582 	matrix *mat;
1583 	phydbl **len;
1584 
1585 
1586 	len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
1587 	For(i,data->n_otu)
1588     len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
1589 
1590 	unc_len = .0;
1591 
1592 	mat = Make_Mat(data->n_otu);
1593 	Init_Mat(mat,data);
1594 
1595 	Fors(site,data->c_seq[0]->len,mod->stepsize)
1596     {
1597       if (tree_build_interrupted) break;
1598 		For(j,data->n_otu-1)
1599 		{
1600 			for(k=j+1;k<data->n_otu;k++)
1601 			{
1602 				if((!Is_Ambigu(data->c_seq[j]->state+site,mod->datatype,mod->stepsize)) &&
1603 				   (!Is_Ambigu(data->c_seq[k]->state+site,mod->datatype,mod->stepsize)))
1604 				{
1605 					len[j][k]+=data->wght[site];
1606 					len[k][j]=len[j][k];
1607 					if(strncmp(data->c_seq[j]->state+site,
1608 							   data->c_seq[k]->state+site,
1609 							   mod->stepsize))
1610 						mat->P[j][k]+=data->wght[site];
1611 				}
1612 			}
1613 		}
1614     }
1615 
1616 
1617 	For(i,data->n_otu-1)
1618     for(j=i+1;j<data->n_otu;j++)
1619 	{
1620 		if(len[i][j])
1621 		{
1622 			mat->P[i][j] /= len[i][j];
1623 		}
1624 		else
1625 		{
1626 			mat->P[i][j] = 1.;
1627 		}
1628 
1629 		mat->P[j][i] = mat->P[i][j];
1630 
1631 		mat->dist[i][j] = mat->P[i][j];
1632 
1633 		mat->dist[j][i] = mat->dist[i][j];
1634 	}
1635 
1636 	For(i,data->n_otu) free(len[i]);
1637 	free(len);
1638 
1639 	return mat;
1640 }
1641 
1642 
Kimura_p_Dist(allseq * data)1643 matrix *Kimura_p_Dist(allseq *data)
1644 {
1645 	int i, j;
1646 	model jcmodel;
1647 	jcmodel.stepsize = 1;
1648 	jcmodel.datatype = 1;
1649 	jcmodel.ns = 20;
1650 	matrix *mat = Obs_Dist(data, &jcmodel);
1651 	for(i = 0; i < data->n_otu; i++) {
1652 	  if (tree_build_interrupted) break;
1653 		for(j = i + 1; j < data->n_otu; j++) {
1654 		  phydbl x = 1 - mat->dist[i][j] - 0.2 * mat->dist[i][j] * mat->dist[i][j];
1655 		  if(x <= 0) {
1656 			Free_Mat(mat);
1657 			return NULL;
1658 			}
1659 		  mat->dist[i][j] = -log(x);
1660 		  mat->dist[j][i] = mat->dist[i][j];
1661 		  }
1662 		}
1663 	return mat;
1664 }
1665 
1666 
alrt_reformat(char * tree)1667 char *alrt_reformat(char *tree)
1668 //set aLRT support values to 2 decimal digits
1669 {
1670 	char *p, *q, tmp[20];
1671 	float alrt;
1672 	int l;
1673 	p = tree;
1674 	while((p = strchr(p, ')')) != NULL) {
1675 		q = p++;
1676 		while(TRUE) {
1677 			q++;
1678 			if(*q == 0 ) break;
1679 			if(strchr("0123456789.-+ ", *q) == NULL) break;
1680 			}
1681 		if(*q != ':' ) continue;
1682 		sscanf(p, "%f", &alrt);
1683 		sprintf(tmp, "%.2f", alrt);
1684 		l = strlen(tmp);
1685 		if(l < q - p) { memset(p, ' ', q - p); memcpy(p, tmp, l); }
1686 		}
1687 	return tree;
1688 }
1689 
bootstrap_reformat(char * tree,int replicates)1690 char *bootstrap_reformat(char *tree, int replicates)
1691 //replace absolute bootstrap values by percentages
1692 {
1693 	char *p, *q, *newtree, *pnew, *next;
1694 	int count;
1695 	newtree = (char *)malloc(500000);
1696 	p = tree; pnew = newtree; next = tree;
1697 	while((p = strchr(p, ')')) != NULL) {
1698 		q = p++;
1699 		while(TRUE) {
1700 			q++;
1701 			if(*q == 0 ) break;
1702 			if(strchr("0123456789 ", *q) == NULL) break;
1703 		}
1704 		if(*q != ':' ) continue;
1705 		memcpy(pnew, next, p - next);
1706 		pnew += p - next;
1707 		next = p;
1708 		sscanf(p, "%d", &count);
1709 		sprintf(pnew, "%d", (int)(100*count/float(replicates) + .5));
1710 		pnew += strlen(pnew);
1711 		next = q;
1712 	}
1713 	memcpy(pnew, next, strlen(next));
1714 	pnew += strlen(next);
1715 	*pnew = 0;
1716 	free(tree);
1717 	return newtree;
1718 }
1719 
K80_dist_nogamma(allseq * data)1720 matrix *K80_dist_nogamma(allseq *data)
1721 {
1722 	int i,j,k;
1723 	phydbl unc_len;
1724 	matrix *mat;
1725 	phydbl **len;
1726 
1727 	len = (phydbl **)mCalloc(data->n_otu,sizeof(phydbl *));
1728 	For(i,data->n_otu)
1729     len[i] = (phydbl *)mCalloc(data->n_otu,sizeof(phydbl));
1730 
1731 	unc_len = .0;
1732 
1733 	mat = Make_Mat(data->n_otu);
1734 	Init_Mat(mat,data);
1735 
1736 
1737 	For(i,data->c_seq[0]->len)
1738     {
1739       if (tree_build_interrupted) break;
1740 		For(j,data->n_otu-1)
1741 		{
1742 			for(k=j+1;k<data->n_otu;k++)
1743 			{
1744 				if(((data->c_seq[j]->state[i] == 'A' || data->c_seq[j]->state[i] == 'G') &&
1745 					(data->c_seq[k]->state[i] == 'C' || data->c_seq[k]->state[i] == 'T'))||
1746 				   ((data->c_seq[j]->state[i] == 'C' || data->c_seq[j]->state[i] == 'T') &&
1747 					(data->c_seq[k]->state[i] == 'A' || data->c_seq[k]->state[i] == 'G')))
1748 				{
1749 					mat->Q[j][k]+=data->wght[i];
1750 					len[j][k]+=data->wght[i];
1751 					len[k][j]=len[j][k];
1752 				}
1753 
1754 				else
1755 					if(((data->c_seq[j]->state[i] == 'A' && data->c_seq[k]->state[i] == 'G') ||
1756 						(data->c_seq[j]->state[i] == 'G' && data->c_seq[k]->state[i] == 'A'))||
1757 					   ((data->c_seq[j]->state[i] == 'C' && data->c_seq[k]->state[i] == 'T') ||
1758 						(data->c_seq[j]->state[i] == 'T' && data->c_seq[k]->state[i] == 'C')))
1759 					{
1760 						mat->P[j][k]+=data->wght[i];
1761 						len[j][k]+=data->wght[i];
1762 						len[k][j]=len[j][k];
1763 					}
1764 					else
1765 						if((data->c_seq[j]->state[i] == 'A' ||
1766 							data->c_seq[j]->state[i] == 'C' ||
1767 							data->c_seq[j]->state[i] == 'G' ||
1768 							data->c_seq[j]->state[i] == 'T')&&
1769 						   (data->c_seq[k]->state[i] == 'A' ||
1770 							data->c_seq[k]->state[i] == 'C' ||
1771 							data->c_seq[k]->state[i] == 'G' ||
1772 							data->c_seq[k]->state[i] == 'T'))
1773 						{
1774 							len[j][k]+=data->wght[i];
1775 							len[k][j]=len[j][k];
1776 						}
1777 			}
1778 		}
1779     }
1780 
1781 
1782 	For(i,data->n_otu-1)
1783     for(j=i+1;j<data->n_otu;j++)
1784 	{
1785 		if(len[i][j])
1786 		{
1787 			mat->P[i][j] /= len[i][j];
1788 			mat->Q[i][j] /= len[i][j];
1789 		}
1790 		else
1791 		{
1792 			mat->P[i][j] = .5;
1793 			mat->Q[i][j] = .5;
1794 		}
1795 
1796 		mat->P[j][i] = mat->P[i][j];
1797 		mat->Q[j][i] = mat->Q[i][j];
1798 
1799 
1800 		if((1-2*mat->P[i][j]-mat->Q[i][j] <= .0) || (1-2*mat->Q[i][j] <= .0))
1801 		{
1802 		  Free_Mat(mat);
1803 		  mat = NULL;
1804 		  goto out;
1805 		}
1806 
1807 /*		mat->dist[i][j] = (g_shape/2)*
1808 		(pow(1-2*mat->P[i][j]-mat->Q[i][j],-1./g_shape) +
1809 		 0.5*pow(1-2*mat->Q[i][j],-1./g_shape) - 1.5); */
1810 
1811 		mat->dist[i][j] = -0.5 * log(1-2*mat->P[i][j]-mat->Q[i][j]) - 0.25 * log(1-2*mat->Q[i][j]);
1812 		mat->dist[j][i] = mat->dist[i][j];
1813 	}
1814 
1815 out:For(i,data->n_otu) free(len[i]);
1816 	free(len);
1817 	return mat;
1818 }
1819 
HKY_dist(allseq * data)1820 matrix *HKY_dist(allseq *data)
1821 {
1822   int i,j;
1823   matrix *mat;
1824   double x;
1825 
1826   mat = Make_Mat(data->n_otu);
1827   Init_Mat(mat,data);
1828   For(i,data->n_otu - 1)
1829   {
1830 	  for(j = i + 1; j < data->n_otu; j++)
1831 	  {
1832 	    if (tree_build_interrupted) return mat;
1833 		x = hky(data->c_seq[i]->state, data->c_seq[j]->state, data->clean_len, data->wght);
1834 		if(x == -1) {
1835 		  Free_Mat(mat);
1836 		  return NULL;
1837 		  }
1838 		mat->dist[i][j] = x;
1839 		mat->dist[j][i] = mat->dist[i][j];
1840 	  }
1841   }
1842   return mat;
1843 }
1844 
1845 
1846 /* Distance for Hasegawa, Kishino and Yano model */
hky(char * seq1,char * seq2,int lg,int * wght)1847 double hky(char* seq1, char* seq2, int lg, int *wght)
1848 {
1849 	double d, freq[16], a, c, g, t, r, y, P, P1, P2, Q, A1, A2, A3, gamma, cc, ee,
1850 		va1, va2, cova1a2, cova1a3, cova2a3, delta, epsilon, ksi, eta, nu, ff;
1851 	double larg1, larg2, larg3;
1852 
1853 	freq_obs(seq1, seq2, freq, lg, wght);
1854 
1855 	P1=freq[2]+freq[8];
1856 	P2=freq[7]+freq[13];
1857 	P=P1+P2;
1858 	Q=freq[1]+freq[3]+freq[4]+freq[6]+freq[9]+freq[11]+freq[12]+freq[14];
1859 
1860 	if(P+Q == 0) return 0;
1861 
1862 	a=freq[0]+(freq[1]+freq[2]+freq[3]+freq[4]+freq[8]+freq[12])/2;
1863 	c=freq[5]+(freq[1]+freq[4]+freq[6]+freq[7]+freq[9]+freq[13])/2;
1864 	g=freq[10]+(freq[2]+freq[6]+freq[8]+freq[9]+freq[11]+freq[14])/2;
1865 	t=1.-a-c-g;
1866 	r=a+g;
1867 	y=c+t;
1868 
1869 	larg1=1-Q/(2*r*y);
1870 	larg2=1-Q/(2*r)-(r*P1)/(2*a*g);
1871 	larg3=1-Q/(2*y)-(y*P2)/(2*c*t);
1872 
1873 	if(larg1<=0. || larg2<=0. || larg3<=0.)
1874 		return -1.;
1875 
1876 	A1=(y/r)*log(larg1)-log(larg2)/r;
1877 	A2=(r/y)*log(larg1)-log(larg3)/y;
1878 	A3=-log(larg1);
1879 
1880 	cc=1-Q/(2*r*y);
1881 	ee=1-(r*P1)/(2*a*g)-Q/(2*r);
1882 	ff=1-(y*P2)/(2*c*t)-Q/(2*y);
1883 
1884 	delta=1/(2*ee*r*r)-1/(2*cc*r*r);
1885 	epsilon=1/(2*ee*a*g);
1886 	ksi=1/(2*y*y*ff)-1/(2*y*y*cc);
1887 	eta=1/(2*c*t*ff);
1888 	nu=1/(2*r*y*cc);
1889 
1890 	va1=((delta*delta*Q+epsilon*epsilon*P1)-(delta*Q+epsilon*P1)*(delta*Q+epsilon*P1))/lg;
1891 	va2=((ksi  *ksi  *Q+eta    *eta    *P2)-(ksi  *Q+eta    *P2)*(ksi  *Q+eta    *P2))/lg;
1892 	cova1a2=(delta*ksi*Q*(1-Q)-delta*eta*Q*P2-epsilon*eta*P1*P2)/lg;
1893 	cova1a3=nu*Q*(delta*(1-Q)-epsilon*P1)/lg;
1894 	cova2a3=nu*Q*(ksi  *(1-Q)-eta    *P2)/lg;
1895 
1896 	gamma=(va2-cova1a2)/(va1+va2-2*cova1a2) + ((r*y)/(a*g+c*t)) * ((cova1a3-cova2a3)/(va1+va2-2*cova1a2));
1897 
1898 	d=2*(a*g+c*t)*(gamma*A1+(1-gamma)*A2)+2*r*y*A3;
1899 
1900 	return(d);
1901 }
1902 
1903 
1904 /* freq_obs */
1905 /* Write at address freq observed frequencies of 16 di-nucleotides XY */
1906 /* (X= A,C,G,T  ,  Y=A,C,G,T) X and Y being homologous nucleotides of sequences */
1907 /* seq1 and seq2. Alphabetic order is used : freq[0]=AA frequency, freq[1]=AC, */
1908 /* ..., freq[15]=TT. */
freq_obs(char * seq1,char * seq2,double * freq,int lgseq,int * wght)1909 int freq_obs(char* seq1, char* seq2, double* freq, int lgseq, int *wght)
1910 {
1911 	int i, lgseqvrai, w;
1912 
1913 	for(i=0;i<16;i++) freq[i]=0;
1914 	lgseqvrai = lgseq;
1915 	for(i=0;i<lgseq;i++){
1916 		if( (w = wght[i]) == 0) continue;
1917 		switch(seq1[i]){
1918 			case 'A':
1919 				switch(seq2[i]){
1920 					case 'A' : freq[0] += w; break;
1921 					case 'C' : freq[1] += w; break;
1922 					case 'G' : freq[2] += w; break;
1923 					case 'T' : freq[3] += w; break;
1924 					default : lgseqvrai -= w; break;
1925 				}
1926 				break;
1927 			case 'C':
1928 				switch(seq2[i]){
1929 					case 'A' : freq[4] += w; break;
1930 					case 'C' : freq[5] += w; break;
1931 					case 'G' : freq[6] += w; break;
1932 					case 'T' : freq[7] += w; break;
1933 					default : lgseqvrai -= w; break;
1934 				}
1935 				break;
1936 			case 'G':
1937 				switch(seq2[i]){
1938 					case 'A' : freq[8] += w; break;
1939 					case 'C' : freq[9] += w; break;
1940 					case 'G' : freq[10] += w; break;
1941 					case 'T' : freq[11] += w; break;
1942 					default : lgseqvrai -= w; break;
1943 				}
1944 				break;
1945 			case 'T':
1946 				switch(seq2[i]){
1947 					case 'A' : freq[12] += w; break;
1948 					case 'C' : freq[13] += w; break;
1949 					case 'G' : freq[14] += w; break;
1950 					case 'T' : freq[15] += w; break;
1951 					default : lgseqvrai -= w; break;
1952 				}
1953 				break;
1954 			default :
1955 				lgseqvrai -= w;
1956 		}
1957 	}
1958 	if(lgseqvrai != 0){
1959 		for(i=0;i<16;i++) freq[i] /= lgseqvrai;
1960 		return 1;
1961 	}
1962 	else return 0;
1963 }
1964 
informative(char ** sequ,int nb)1965 int informative(char** sequ, int nb)
1966 {
1967 
1968 	int i, j, k, info=0, siteok=0;
1969 	char firstpair;
1970 
1971 
1972 	for(i=0;sequ[0][i];i++){
1973 		firstpair=0;
1974 		siteok=0;
1975 		for(j=0;j<nb-1;j++){
1976 			for(k=j+1;k<nb;k++){
1977 				if(sequ[j][i]==sequ[k][i]){
1978 					if(firstpair){
1979 						if(sequ[j][i]!=firstpair) { info++; siteok=1; break; }
1980 					}
1981 					else
1982 						firstpair=sequ[j][i];
1983 				}
1984 			}
1985 			if (siteok) break;
1986 		}
1987 	}
1988 
1989 	return info;
1990 }
1991 
1992 
1993 /*void jumble_fct(char** seq, char** name, int notu)
1994 {
1995 	int i, j, rando;
1996 	double buff;
1997 	char *tmp;
1998 	//initialize_random_series();
1999 	for(i = 0; i<notu - 1; i++){
2000 		j = notu - i - 1;
2001 		//buff  = rand();
2002 		//buff /= (RAND_MAX+1.);
2003 		buff = next_random();
2004 		buff *= j + 1;
2005 		rando = (int)floor(buff);
2006 		if(j != rando) {
2007 			tmp = seq[j],
2008 			seq[j] = seq[rando];
2009 			seq[rando] = tmp;
2010 			tmp = name[j],
2011 			name[j] = name[rando];
2012 			name[rando] = tmp;
2013 			}
2014 		}
2015 }*/
2016 
add_equally_best(char * tree,char * all_best,int count)2017 char *add_equally_best(char *tree, char *all_best, int count)
2018 {
2019   char *p, *q, *r, *final_tree;
2020   final_tree = (char*)malloc(strlen(tree) + strlen(all_best)+ count * 30);
2021   strcpy(final_tree, tree);
2022   q = final_tree + strlen(final_tree);
2023   *q++ = '\n';
2024   p = all_best;
2025   if (count > 1) {
2026     int rank = 0;
2027     while (count-- > 0) {
2028       r = strchr(p, '\n');
2029       sprintf(q, "[equally best tree #%d]", ++rank);
2030       q += strlen(q);
2031       memcpy(q, p, r - p + 1);
2032       q += r-p+1;
2033       p = r+1;
2034     }
2035   }
2036   *q = 0;
2037   free(tree);
2038   free(all_best);
2039   return final_tree;
2040 }
2041 
run_parsimony_method(SEA_VIEW * view,int nogaps,const char * gapsunknown,int nreplicates,int njumbles,int maxbest,int user_tree,Fl_Box * w_count,dnapars_S_option s_option,const char ** pprogname,float consensus_threshold,int no_gui,char ** p_bootstrap_trees,void (* alert)(const char *,...))2042 static char *run_parsimony_method(SEA_VIEW *view, int nogaps, const char *gapsunknown, int nreplicates, int njumbles,
2043 			   int maxbest, int user_tree, Fl_Box *w_count, dnapars_S_option s_option,
2044 			   const char **pprogname, float consensus_threshold, int no_gui, char **p_bootstrap_trees, void (*alert)(const char *,...) )
2045 {
2046   char *tree = 0, *final_tree = 0, *all_best;
2047   int i, steps, info, count, replicates_done, count_best;
2048   const char *p;
2049   allseq *tmpallseqs = view_to_allseq(view, nogaps);
2050   if(*gapsunknown) gaps_as_unknown_states(tmpallseqs, view->protein);
2051   char **pars_seqs = (char **)malloc(tmpallseqs->n_otu * sizeof(char *));
2052   char **pars_names = (char **)malloc(tmpallseqs->n_otu * sizeof(char *));
2053   for( i = 0; i < tmpallseqs->n_otu; i++) {
2054     pars_seqs[i] = tmpallseqs->c_seq[i]->state;
2055     pars_names[i] = tmpallseqs->c_seq[i]->name;
2056   }
2057   info = informative(pars_seqs, tmpallseqs->n_otu);
2058   if(view->protein) *pprogname = "Protpars";
2059   else {
2060     *pprogname = "Dnapars";
2061     if (tmpallseqs->clean_len >= SHRT_MAX) {
2062       alert("Can't compute parsimony with DNA seqs longer than %d.", SHRT_MAX);
2063       goto way_out;
2064     }
2065   }
2066   if (nreplicates) {
2067     tree = parsimony_with_bootstrap(tmpallseqs, pars_seqs, pars_names, tmpallseqs->n_otu, njumbles,
2068 				    view->protein, &steps, maxbest, nreplicates, w_count, &replicates_done, s_option, no_gui, consensus_threshold, &count_best,
2069                                     p_bootstrap_trees);
2070     if (tree) {
2071       final_tree = (char *)malloc(strlen(tree) + 200);
2072       char best[35] = "";
2073       if (count_best > 1) sprintf(best, "consensus of %d best trees, ", count_best);
2074       sprintf(final_tree, "[%s, bootstrap with %d replic., %d steps, %s%d sites (%d informative)%s]%s",
2075 	      *pprogname, replicates_done, steps, best, tmpallseqs->clean_len, info, gapsunknown, tree);
2076     }
2077   }
2078   else if(user_tree) {
2079     char *differs, *input_tree;
2080     p = view->trees[user_tree - 1];
2081     while(*p == ' ') p++;
2082     if(*p == '[') p = strchr(p, ']')+ 1;
2083     while(*p == ' ') p++;
2084     input_tree = strdup(p);
2085     i = compare_newick_with_names(input_tree, pars_names, tmpallseqs->n_otu, &differs);
2086     if (i != 0) {
2087       alert("Names in user-tree and target sequences differ:\n%s in %s only",
2088 	       differs, i == 1 ? "user-tree" : "target sequences");
2089       tree = NULL;
2090     }
2091     else {
2092       input_tree = (char *)realloc(input_tree, strlen(input_tree) + 4 * tmpallseqs->n_otu + 6 );
2093       if( make_binary_or_unrooted(input_tree) != NULL) return NULL;
2094       strcat(input_tree, ";");
2095       tree = parsimony(pars_seqs, pars_names, tmpallseqs->n_otu, 0, &steps, input_tree, maxbest, NULL, view->protein, s_option, no_gui, NULL);
2096       free(input_tree);
2097       if(tree == NULL) {
2098         alert("Error in user-tree");
2099       }
2100       else {
2101         final_tree = (char *)malloc(strlen(tree) + 150);
2102         sprintf(final_tree, "[%s, user-tree, %d steps, %d sites (%d informative)%s]%s",
2103                 *pprogname, steps, tmpallseqs->clean_len, info, gapsunknown, tree);
2104       }
2105     }
2106   }
2107   else {
2108     tree = parsimony_with_jumble(pars_seqs, pars_names, tmpallseqs->n_otu, njumbles, view->protein,
2109 				 &steps, &count, maxbest, w_count, s_option, no_gui, consensus_threshold);
2110     if (count > 1) { // memorize all equally best trees in all_best string
2111       char *q = strchr(tree, '\n');
2112       all_best = strdup(q + 1);
2113       *q = 0;
2114     }
2115     if (tree && count > 1 && !view->protein) { /* with DNA, fit branch lengths to consensus of most parsimonious trees */
2116       char *input_tree = tree;
2117       int steps2; /* don't use the steps of this consensus tree because it can be larger than parsimonious steps */
2118       tree = parsimony(pars_seqs, pars_names, tmpallseqs->n_otu, 0, &steps2, input_tree, maxbest, NULL, false, s_option, no_gui, NULL);
2119       free(input_tree);
2120       }
2121     if (tree) {
2122       char aux[30];
2123       final_tree = (char *)malloc(strlen(tree) + 150);
2124       if (count > 1) sprintf(aux, " (%d%% consensus)", int(consensus_threshold*100 + 0.5));
2125       else *aux = 0;
2126       sprintf(final_tree, "[%s, %d best trees%s, %d steps, %d sites (%d informative)%s]%s",
2127 	      *pprogname, count, aux, steps, tmpallseqs->clean_len, info, gapsunknown, tree);
2128       if (count > 1) final_tree = add_equally_best(final_tree, all_best, count);
2129     }
2130   }
2131 way_out:
2132   if (tree) free(tree);
2133   free_after_view_to_allseq(tmpallseqs);
2134   free(pars_seqs);
2135   free(pars_names);
2136   return final_tree;
2137 }
2138 
user_tree_cb(Fl_Widget * wid,void * data)2139 static void user_tree_cb(Fl_Widget *wid, void *data)
2140 {
2141   Fl_Button *b = (Fl_Button*)data;
2142   b->value(0);
2143   b->do_callback();
2144 }
2145 
parsimony_dialog(SEA_VIEW * view)2146 void parsimony_dialog(SEA_VIEW *view)
2147 {
2148 	int njumbles, i, count, maxbest = 100, started = 0, use_bootstrap,
2149 		nreplicates, user_tree, nogaps;
2150 	char *final_tree, *bootstrap_trees = NULL;
2151 	const char *p, *progname;
2152   dnapars_S_option s_option = more_thorough;
2153 
2154 	tree_build_interrupted = 0;
2155 	return_window *w = new return_window(255, view->protein ? 225: 300, "Parsimony analysis");
2156 	w->set_modal();
2157 	Fl_Check_Button *w_dojumble = new Fl_Check_Button(2, 5, 170, 20, "Randomize seq. order");
2158 	w_dojumble->value(0);
2159 	Fl_Int_Input *w_jumbles = new Fl_Int_Input(w_dojumble->x() + w_dojumble->w() + 5, w_dojumble->y(), 30, 20, "times");
2160 	w_jumbles->align(FL_ALIGN_RIGHT);
2161 	w_jumbles->value("5");
2162 	Fl_Check_Button *w_nogaps = new Fl_Check_Button(2, w_dojumble->y() + w_dojumble->h() + 5, 160, 20, "Ignore all gap sites");
2163 	w_nogaps->value(1);
2164 	Fl_Check_Button *w_gapsunknown = new Fl_Check_Button(2, w_nogaps->y() + w_nogaps->h() + 5, 180, 20,
2165 														 "Gaps as unknown states");
2166 	w_gapsunknown->value(0);
2167 	w_gapsunknown->deactivate();
2168   Fl_Widget *witness = w_gapsunknown;
2169   Fl_Round_Button *thorough_b, *less_thorough_b;
2170   Fl_Int_Input *w_best;
2171   if (!view->protein) {
2172     Fl_Group *g = new Fl_Group(2, witness->y() + witness->h() + 5, w->w()-4, 70);
2173     g->box(FL_DOWN_FRAME);
2174     thorough_b = new Fl_Round_Button(7, witness->y() + witness->h() + 5, 180, 20, "More thorough tree search");
2175     thorough_b->type(FL_RADIO_BUTTON);
2176     less_thorough_b = new Fl_Round_Button(7, thorough_b->y() + thorough_b->h() + 5, 180, 20, "Less thorough tree search");
2177     less_thorough_b->type(FL_RADIO_BUTTON);
2178     Fl_Round_Button *rearrange_best_b = new Fl_Round_Button(7, less_thorough_b->y() + less_thorough_b->h() + 5, 180, 20, "Rearrange on best tree");
2179     rearrange_best_b->type(FL_RADIO_BUTTON);
2180     rearrange_best_b->activate();
2181     thorough_b->setonly();
2182     g->end();
2183     w_best = new Fl_Int_Input(195, rearrange_best_b->y() + rearrange_best_b->h() + 5, 50, 20, "Equally best trees retained");
2184     w_best->align(FL_ALIGN_LEFT);
2185     w_best->value("10000");
2186     witness = w_best;
2187   }
2188   Fl_Value_Slider* slider = new Fl_Value_Slider(2, witness->y() + witness->h() + 25, w->w()-4, 20, "% level for consensus tree building");
2189   slider->align(FL_ALIGN_TOP);
2190   slider->type(FL_HOR_NICE_SLIDER);
2191   slider->box(FL_FLAT_BOX);
2192   slider->color(FL_YELLOW);
2193   slider->selection_color(FL_RED);
2194   slider->bounds(50, 100);
2195   slider->value(100);
2196   slider->precision(0);
2197 	Fl_Round_Button *w_dobootstrap = new Fl_Round_Button(2, slider->y() + slider->h() + 10, 115, 20, "Bootstrap with");
2198 	w_dobootstrap->type(FL_RADIO_BUTTON);
2199 	Fl_Int_Input *w_btrepl = new Fl_Int_Input(w_dobootstrap->x() + w_dobootstrap->w() + 5, w_dobootstrap->y() ,
2200 											  40, 20, "replicates");
2201 	w_btrepl->value("100");
2202 	w_btrepl->align(FL_ALIGN_RIGHT);
2203   Fl_Check_Button *keep_b_trees = new Fl_Check_Button(2, w_dobootstrap->y() + w_dobootstrap->h() + 5, 180, 20, "Show bootstrap trees");
2204   keep_b_trees->deactivate();
2205   static Fl_Check_Button *buttons[2] = {keep_b_trees, NULL};
2206   w_dobootstrap->callback(dist_b_cb, buttons);
2207   w_dobootstrap->when(FL_WHEN_CHANGED);
2208 
2209 	Fl_Box *w_count = new Fl_Box(0, keep_b_trees->y() + keep_b_trees->h() + 5, w->w(), 20, NULL);
2210 
2211 	Fl_Round_Button *w_user = new Fl_Round_Button(2, w_count->y() + w_count->h() + 5, 85, 20, "User tree:");
2212 	w_user->type(FL_RADIO_BUTTON);
2213   w_user->callback(user_tree_cb, w_dobootstrap);
2214 	Fl_Choice *w_choice = new Fl_Choice(w_user->x() + w_user->w() + 5, w_user->y(), 130, 20, "");
2215 	count = view->menu_trees->vlength();
2216 	for( i = 0; i < count; i++) w_choice->add( view->menu_trees->vitem(i)->label() );
2217 	w_choice->value(0);
2218 	if (!count) {w_user->deactivate(); w_choice->deactivate(); }
2219 
2220 	Fl_Button *w_interrupt = new Fl_Button(w_dobootstrap->x(), w_user->y() + w_user->h() + 5, 60, 20, "Cancel");
2221 	Fl_Return_Button *w_ok = new Fl_Return_Button(w->w() - 65, w_interrupt->y(), 60, 20, "OK");
2222 	w->run(w_ok);
2223 	w_interrupt->callback(interrupt_callback, &tree_build_interrupted);
2224 	w_ok->callback(interrupt_callback, &started);
2225 	w->end();
2226 	w->callback(interrupt_callback, &tree_build_interrupted);
2227 	w->size(w->w(), w_ok->y() + w_ok->h() + 5);
2228 	w->show();
2229 	while(!started && !tree_build_interrupted) {
2230 	  Fl::wait();
2231 	  if(w_gapsunknown->value()) { w_nogaps->value(0); w_nogaps->deactivate(); }
2232 	  else w_nogaps->activate();
2233 	  if(w_nogaps->value()) { w_gapsunknown->value(0); w_gapsunknown->deactivate(); }
2234 	  else w_gapsunknown->activate();
2235 	  }
2236 	if (tree_build_interrupted) {
2237 		delete w;
2238 		return;
2239 	}
2240 	w_ok->hide();
2241 	w_interrupt->label("Interrupt");
2242 	if(w_dojumble->value()) {
2243 		p = w_jumbles->value();
2244 		sscanf(p, "%d", &njumbles);
2245 		if(njumbles < 1) njumbles = 1;
2246 		}
2247 	else njumbles = 0;
2248 	use_bootstrap = w_dobootstrap->value();
2249 	user_tree = w_user->value();
2250 	nogaps = w_nogaps->value();
2251 	char gapsunknown[25];
2252 	if(w_gapsunknown->value())
2253 	sprintf(gapsunknown, ", gaps treated as %c", (view->protein ? 'X' : 'N') );
2254 	else gapsunknown[0] = 0;
2255 	if(use_bootstrap) {
2256 		p = w_btrepl->value();
2257 		sscanf(p, "%d", &nreplicates);
2258 		}
2259 	else {
2260 		if(user_tree) user_tree = w_choice->value() + 1;
2261 	}
2262   if (!view->protein) {
2263     if (thorough_b->value()) s_option = more_thorough;
2264     else if (less_thorough_b->value()) s_option = less_thorough;
2265     else s_option = rearrange_best_tree;
2266     p = w_best->value();
2267     sscanf(p, "%d", &maxbest);
2268     }
2269 	my_watch_cursor(w);
2270 
2271 	final_tree = run_parsimony_method(view, nogaps, gapsunknown,
2272 				    use_bootstrap && nreplicates > 1 ? nreplicates : 0, njumbles,
2273 					  maxbest, user_tree, w_count, s_option,
2274                                     &progname, slider->value()/100, false, keep_b_trees->value() ? &bootstrap_trees : NULL, fl_alert);
2275 
2276 	fl_reset_cursor(w);
2277 	delete w;
2278 	if (final_tree != NULL) {
2279 	  const char *n = extract_filename(view->masename);
2280 	  char *title = NULL;
2281 	  if (n) {
2282 	    char *q = (char*)strchr(n, '.');
2283 	    if (q) *q = 0;
2284 	    title = new char[strlen(n) + strlen(progname) + 2];
2285 	    sprintf(title, "%s-%s", n, progname);
2286 	    if (q) *q = '.';
2287 	  }
2288     int tree_count = 0;
2289     p = final_tree;
2290     while (TRUE) {
2291       p = strchr(p+1, '\n');
2292       if (!p) break;
2293       tree_count++;
2294     }
2295     if (tree_count == 0) tree_count = 1;
2296     Fl_Window *btreew = NULL;
2297     if (bootstrap_trees) btreew = treedraw(bootstrap_trees, view, "bootstrap trees", FALSE);
2298 	  Fl_Window *treew = treedraw(final_tree, view, n ? title:progname, FALSE, tree_count);
2299 #if !defined(__APPLE__)
2300       if (btreew) treew->position(btreew->x() + 30, btreew->y() + 40);
2301 #endif
2302 	  delete[] title;
2303 	}
2304 }
2305 
2306 
parsimony_with_jumble(char ** pars_seqs,char ** pars_names,int n_otu,int njumbles,int protein,int * psteps,int * pcount,int maxbest,Fl_Widget * w_count,dnapars_S_option s_option,int no_gui,float consensus_threshold)2307 char *parsimony_with_jumble(char **pars_seqs, char **pars_names, int n_otu, int njumbles, int protein, int *psteps,
2308 	int *pcount, int maxbest, Fl_Widget *w_count, dnapars_S_option s_option, int no_gui, float consensus_threshold)
2309 {
2310 	int i, steps, count = 0;
2311 	char *tree, *newtree, *p, *q;
2312 	if (njumbles > 0) {
2313 	  if (w_count) w_count->label("First jumble attempt");
2314 	  }
2315 	tree = parsimony(pars_seqs, pars_names, n_otu, njumbles, &steps, NULL, maxbest, NULL, protein, s_option, no_gui, w_count);
2316 	if (!tree) return NULL;
2317 	p = tree - 1;
2318 	while((p = strchr(p+1, '\n')) != NULL) count++;//count equally parsimonious trees
2319 	if(count > 1) {//compute threshold consensus of all equally parsimonious trees
2320 		p = strchr(tree, '\n');
2321 		newtree = (char *)malloc(p - tree + 1);
2322 		memcpy(newtree, tree, p - tree); newtree[p - tree] = 0;
2323 		p++; i = count; count = 0;
2324 		init_consensus_calculation(n_otu);
2325 		while(TRUE) {
2326 			count += process_one_consensus_item(newtree, pars_names);
2327 			free(newtree);
2328 			i--;
2329 			if(i == 0) break;
2330 			q = strchr(p, '\n');
2331 			newtree = (char *)malloc(q - p + 1);
2332 			memcpy(newtree, p, q - p); newtree[q - p] = 0;
2333 			p = q + 1;
2334 		}
2335 		int **rows;
2336 		int br_count = after_consensus_items(consensus_threshold, count, &rows);
2337 		char *consensus_tree = compute_consensus_tree(pars_names, rows, br_count);
2338 		for (i = 0; i < br_count; i++) free(rows[i]);
2339 		free(rows);
2340     char *newtree = (char*)malloc(strlen(consensus_tree) + strlen(tree) + 2);
2341     sprintf(newtree, "%s\n%s", consensus_tree, tree);
2342     free(consensus_tree);
2343     free(tree);
2344     tree = newtree;
2345 	}
2346 	*psteps = steps;
2347 	*pcount = count;
2348 	return tree;
2349 }
2350 
2351 
parsimony_with_bootstrap(allseq * tmpallseqs,char ** pars_seqs,char ** pars_names,int n_otu,int njumbles,int protein,int * psteps,int maxbest,int replicates,Fl_Box * w_count,int * preplicates_done,dnapars_S_option s_option,int no_gui,float consensus_threshold,int * pcount_best,char ** p_bootstrap_trees)2352 static char *parsimony_with_bootstrap(allseq *tmpallseqs, char **pars_seqs, char **pars_names, int n_otu, int njumbles, int protein, int *psteps,
2353 			int maxbest, int replicates, Fl_Box *w_count, int *preplicates_done, dnapars_S_option s_option, int no_gui,
2354 			       float consensus_threshold, int *pcount_best, char **p_bootstrap_trees)
2355 {
2356 	int i, j, steps, newsteps, count = 0, replicates_done = 0, repl_count, lbtrees = 0;
2357 	char *tree, *newtree, *p, *q, *all_best, *bootstrap_trees = NULL;
2358 
2359 	if (w_count) w_count->label("Full sequence tree");
2360 	tree = parsimony_with_jumble(pars_seqs, pars_names, n_otu, njumbles, protein,
2361 								 &steps, &count, maxbest, NULL, s_option, no_gui, consensus_threshold);
2362 	if (!tree) return NULL;
2363   if (count > 1) { // memorize all equally best trees in all_best string
2364     p = strchr(tree, '\n');
2365     all_best = strdup(p + 1);
2366     *p = 0;
2367   }
2368 	if (count > 1 && !protein) { /* with DNA, fit branch lengths to strict consensus of most parsimonious trees */
2369 	  char *input_tree = tree;
2370 	  /* don't use the steps of this consensus tree because it can be larger than parsimonious steps */
2371 	  tree = parsimony(pars_seqs, pars_names, n_otu, njumbles, &newsteps, input_tree, maxbest, NULL, false, s_option, no_gui, NULL);
2372 	  free(input_tree);
2373 	}
2374 	*psteps = steps;
2375 	init_add_bootstrap(tree); // memorize topology and br lengths of strict consensus of most parsimonious trees
2376 	for(j = 0; j < replicates; j++) {
2377 		if (w_count) {
2378 		  char tmp[30];
2379 		  sprintf(tmp, "Bootstrap replicate: %d", j + 1);
2380 		  w_count->label(tmp);
2381 		}
2382 		if (tree_build_interrupted) break;
2383 		bootstrap_weights(tmpallseqs, FALSE);
2384 		tree = parsimony(pars_seqs, pars_names, n_otu, njumbles, &steps, NULL, maxbest, tmpallseqs->wght, protein, s_option, no_gui, NULL);
2385 		if (!tree) break;
2386 		p = tree - 1; repl_count = 0;
2387 		while((p = strchr(p+1, '\n')) != NULL) repl_count++;//count equally parsimonious trees
2388 		i = repl_count;
2389 		while (TRUE) {
2390 			p = strchr(tree, '\n');
2391 			newtree = (char *)malloc(p - tree + 1);
2392 			memcpy(newtree, tree, p - tree); newtree[p - tree] = 0;
2393 			process_one_replicate(newtree, 1./repl_count);
2394       if (p_bootstrap_trees) { // memorize all bootstrap trees
2395         if (lbtrees == 0) {
2396           lbtrees = 10000;
2397           bootstrap_trees = (char *)malloc(lbtrees + 1);
2398           q = bootstrap_trees;
2399           *q = 0;
2400         }
2401         int l = strrchr(newtree, ')') - newtree + 1; // length of (....)
2402         if ((q-bootstrap_trees) + l + 8 > lbtrees) {
2403           lbtrees = (q-bootstrap_trees) + l + 8 + 10000;
2404           char *tmpc = (char*)realloc(bootstrap_trees, lbtrees + 1);
2405           q = tmpc + (q - bootstrap_trees);
2406           bootstrap_trees = tmpc;
2407         }
2408         sprintf(q, "[%.2f]", 1./repl_count); q += strlen(q); // indicate tree weight
2409         memcpy(q, newtree, l); q += l; // (...) part of tree
2410         memcpy(q, ";\n", 3); q += 2; // end of tree
2411       }
2412 			free(newtree);
2413 			i--;
2414 			if(i == 0) break;
2415 			newtree = strdup(p+1);
2416 			free(tree);
2417 			tree = newtree;
2418 		}
2419 		++replicates_done;
2420 		free(tree);
2421 		}
2422 	*preplicates_done = replicates_done;
2423 	tree = finish_add_bootstrap(replicates_done);
2424   if (count > 1) tree = add_equally_best(tree, all_best, count);
2425   *pcount_best = count;
2426   if (p_bootstrap_trees) *p_bootstrap_trees = bootstrap_trees;
2427 	return tree;
2428 }
2429 
2430 struct pars_struct {
2431   char** seq;
2432   char** seqname;
2433   int notu;
2434   int njumbles;
2435   int *jumble_no;
2436   int *steps;
2437   char* toevaluate;
2438   int arg_maxtrees;
2439   int *bt_weights;
2440   dnapars_S_option s_option;
2441   char *result;
2442   bool protein;
2443   bool done;
2444 };
2445 
2446 
awake_from_C(void)2447 void awake_from_C(void)
2448 {
2449   Fl::lock();
2450   Fl::awake();
2451   Fl::unlock();
2452 }
2453 
2454 
parsimony_thread(pars_struct * args)2455 static void parsimony_thread(pars_struct* args)
2456 {
2457   typedef char* (*pars_f)(char** seq, char** seqname, int notu, int njumbles, int *jumble_no, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights, dnapars_S_option s_option);
2458   pars_f pf = args->protein ? protpars : dnapars;
2459   args->result = (*pf)(args->seq, args->seqname, args->notu, args->njumbles, args->jumble_no, args->steps, args->toevaluate, args->arg_maxtrees, args->bt_weights, args->s_option);
2460   Fl::lock();
2461   args->done = true;
2462   Fl::awake();
2463   Fl::unlock();
2464 }
2465 
parsimony(char ** seq,char ** seqname,int notu,int njumbles,int * steps,char * toevaluate,int arg_maxtrees,int * bt_weights,bool protein,dnapars_S_option s_option,int no_gui,Fl_Widget * w_count)2466 char *parsimony(char** seq, char** seqname, int notu, int njumbles, int *steps, char* toevaluate, int arg_maxtrees, int *bt_weights,
2467 		bool protein, dnapars_S_option s_option, int no_gui, Fl_Widget *w_count)
2468 {
2469   char *p;
2470   int jumble_no = 0;
2471   int pre_jumble_no = 0;
2472   tree_build_interrupted = 0;
2473   if (no_gui) {
2474     if (protein) p = protpars(seq, seqname, notu, njumbles, NULL, steps, toevaluate, arg_maxtrees, bt_weights, s_option);
2475     else  p = dnapars(seq, seqname, notu, njumbles, NULL, steps, toevaluate, arg_maxtrees, bt_weights, s_option);
2476   }
2477   else {
2478     pars_struct *args = (pars_struct*)malloc(sizeof(pars_struct));
2479     args->seq = seq;
2480     args->seqname = seqname;
2481     args->notu = notu;
2482     args->njumbles = njumbles;
2483     args->jumble_no = &jumble_no;
2484     args->steps = steps;
2485     args->toevaluate = toevaluate;
2486     args->arg_maxtrees = arg_maxtrees;
2487     args->bt_weights = bt_weights;
2488     args->s_option = s_option;
2489     args->result = NULL;
2490     args->protein = protein;
2491     args->done = false;
2492     if ( ! fl_create_thread((void (*) (void *))parsimony_thread, args, 1024) ) {
2493       while (!args->done) {
2494         Fl::wait();
2495         if (jumble_no > pre_jumble_no && jumble_no < njumbles && w_count) {
2496           pre_jumble_no = jumble_no;
2497           char tmp[40];
2498           sprintf(tmp, "Jumble attempt: %d steps: %d", jumble_no + 1, *args->steps);
2499           w_count->label(tmp);
2500         }
2501       }
2502     }
2503     p = args->result;
2504     free(args);
2505   }
2506   if (tree_build_interrupted && p) {
2507     free(p);
2508     p = NULL;
2509   }
2510   return p;
2511 }
2512 
2513 
2514 /* from Numerical Recipes in C */
2515 /* Allocation function for ludcmp function */
vector(long nl,long nh)2516 double *vector(long nl, long nh)
2517 {
2518 	double *v;
2519 
2520 	v=(double *)malloc((size_t) ((nh-nl+1+1)*sizeof(double)));
2521 	if (!v) printf("allocation failure in vector()");
2522 	return v-nl+1;
2523 }
2524 
2525 
2526 /* free_vector */
2527 /* from Numerical Recipes in C */
2528 /* Memory freeing function for ludcmp function */
free_vector(double * v,long nl,long nh)2529 void free_vector(double *v, long nl, long nh)
2530 {
2531 	free((char*) (v+nl-1));
2532 }
2533 
2534 
2535 /* ludcmp */
2536 /* from Numerical Recipes in C */
2537 /* Replace matrix a by a rowwise permutation of its LU decomposition. */
2538 
ludcmp(double ** a,int n,int * indx,double * d)2539 int ludcmp(double **a, int n, int *indx, double *d)
2540 {
2541 	int i,imax=0,j,k;
2542 	double big,dum,sum,temp;
2543 	double *vv;
2544 
2545 	vv=vector(1,n);
2546 	*d=1.0;
2547 	for (i=1;i<=n;i++) {
2548 		big=0.0;
2549 		for (j=1;j<=n;j++)
2550 			if ((temp=fabs(a[i][j])) > big) big=temp;
2551 		if (big == 0.0) return 0;
2552 		vv[i]=1.0/big;
2553 	}
2554 	for (j=1;j<=n;j++) {
2555 		for (i=1;i<j;i++) {
2556 			sum=a[i][j];
2557 			for (k=1;k<i;k++) sum -= a[i][k]*a[k][j];
2558 			a[i][j]=sum;
2559 		}
2560 		big=0.0;
2561 		for (i=j;i<=n;i++) {
2562 			sum=a[i][j];
2563 			for (k=1;k<j;k++)
2564 				sum -= a[i][k]*a[k][j];
2565 			a[i][j]=sum;
2566 			if ( (dum=vv[i]*fabs(sum)) >= big) {
2567 				big=dum;
2568 				imax=i;
2569 			}
2570 		}
2571 		if (j != imax) {
2572 			for (k=1;k<=n;k++) {
2573 				dum=a[imax][k];
2574 				a[imax][k]=a[j][k];
2575 				a[j][k]=dum;
2576 			}
2577 			*d = -(*d);
2578 			vv[imax]=vv[j];
2579 		}
2580 		indx[j]=imax;
2581 		if (a[j][j] == 0.0) a[j][j]=1.0e-20;
2582 		if (j != n) {
2583 			dum=1.0/(a[j][j]);
2584 			for (i=j+1;i<=n;i++) a[i][j] *= dum;
2585 		}
2586 	}
2587 	free_vector(vv,1,n);
2588 	return 1;
2589 }
2590 
2591 
2592 /* Returns the determinant of matrix mat(4, 4) */
det4(double mat[4][4])2593 double det4(double mat[4][4])
2594 {
2595 
2596 	int i, j, *indx;
2597 	double *lu[5], d;
2598 
2599 	for(i=1;i<5;i++){
2600 		lu[i]=(double*)calloc(5, sizeof(double));
2601 		for(j=1;j<5;j++) lu[i][j]=mat[i-1][j-1];
2602 	}
2603 	indx=(int*)calloc(5, sizeof(int));
2604 
2605 	if(!ludcmp(lu, 4, indx, &d)) return 0.;
2606 
2607 	for(i=1;i<5;i++) d *= lu[i][i];
2608 	for(i=1;i<5;i++)
2609 		free(lu[i]);
2610 	return d;
2611 }
2612 
2613 
2614 /* logdet distance */
logdet(char * seq1,char * seq2,int lg,int * wght)2615 double logdet(char* seq1, char* seq2, int lg, int *wght)
2616 {
2617 	double matxy[4][4];
2618 	double freq[16], d, a1, c1, g1, t1, a2, c2, g2, t2;
2619 
2620 	if(!freq_obs(seq1, seq2, freq, lg, wght)) return -1.;
2621 
2622 	matxy[0][0]=freq[0]; matxy[0][1]=freq[1]; matxy[0][2]=freq[2]; matxy[0][3]=freq[3];
2623 	matxy[1][0]=freq[4]; matxy[1][1]=freq[5]; matxy[1][2]=freq[6]; matxy[1][3]=freq[7];
2624 	matxy[2][0]=freq[8]; matxy[2][1]=freq[9]; matxy[2][2]=freq[10]; matxy[2][3]=freq[11];
2625 	matxy[3][0]=freq[12]; matxy[3][1]=freq[13]; matxy[3][2]=freq[14]; matxy[3][3]=freq[15];
2626 
2627 	a1=matxy[0][0]+matxy[0][1]+matxy[0][2]+matxy[0][3];
2628 	c1=matxy[1][0]+matxy[1][1]+matxy[1][2]+matxy[1][3];
2629 	g1=matxy[2][0]+matxy[2][1]+matxy[2][2]+matxy[2][3];
2630 	t1=matxy[3][0]+matxy[3][1]+matxy[3][2]+matxy[3][3];
2631 	a2=matxy[0][0]+matxy[1][0]+matxy[2][0]+matxy[3][0];
2632 	c2=matxy[0][1]+matxy[1][1]+matxy[2][1]+matxy[3][1];
2633 	g2=matxy[0][2]+matxy[1][2]+matxy[2][2]+matxy[3][2];
2634 	t2=matxy[0][3]+matxy[1][3]+matxy[2][3]+matxy[3][3];
2635 
2636 
2637 	d=det4(matxy);
2638 	if(d<=0.) return -1.;
2639 	return (-log(d)+log(a1*c1*g1*t1*a2*c2*g2*t2)/2)/4;
2640 }
2641 
2642 
LOGDET_dist(allseq * data)2643 matrix *LOGDET_dist(allseq *data)
2644 {
2645 	int i,j;
2646 	matrix *mat;
2647 
2648 	mat = Make_Mat(data->n_otu);
2649 	Init_Mat(mat,data);
2650 	For(i,data->n_otu - 1)
2651     {
2652 		for(j = i + 1; j < data->n_otu; j++)
2653 		{
2654 		  if (tree_build_interrupted) return mat;
2655 		  double x = logdet(data->c_seq[i]->state, data->c_seq[j]->state, data->clean_len, data->wght);
2656 		  if(x == -1.) {
2657 			Free_Mat(mat);
2658 			return NULL;
2659 			}
2660 		  mat->dist[i][j] = x;
2661 		  mat->dist[j][i] = x;
2662 		}
2663 	}
2664 	return mat;
2665 }
2666 
2667 
put_names_back_in_tree(char * oldtree,char ** names)2668 char *put_names_back_in_tree(char *oldtree, char **names)
2669 //replace Seq###_ in oldtree by names in newtree
2670 {
2671 	char *p, *q, *newtree;
2672 	int rank, l = 0;
2673 
2674 	p = oldtree;
2675 	while((p = strstr(p, "Seq")) != NULL) {
2676 		p += 3;
2677 		sscanf(p, "%d", &rank);
2678 		l += strlen(names[rank]);
2679 		}
2680 	newtree = (char *)malloc(strlen(oldtree) + l + 1);
2681 	p = newtree;
2682 	q = oldtree;
2683 	while(*q != 0) {
2684 		if(strncmp(q, "Seq", 3) != 0) *p++ = *q++;
2685 		else {
2686 			sscanf(q + 3, "%d", &rank);
2687 			l = strlen(names[rank]);
2688 			strcpy(p, names[rank]);
2689 			p += l;
2690 			do q++; while(*q != '_');
2691 			q++;
2692 			}
2693 		}
2694 	free(oldtree);
2695 	*p = 0;
2696 	newtree = (char *)realloc(newtree, strlen(newtree) + 1);
2697 	return newtree;
2698 }
2699 
replace_tree_names_by_rank(const char * tree,char ** names,int notu)2700 char *replace_tree_names_by_rank(const char *tree, char **names, int notu)
2701 /* replace names from tree listed in table names by Seq##_
2702  returns altered tree or NULL if some tree name not found in table names
2703  */
2704 {
2705 	char *p = (char *)tree - 1;
2706 	char *newtree, *q, *qstart, c;
2707 	int found = 0, i, l;
2708 	char *start = NULL;
2709 	static char name[200];
2710 	newtree = (char *)malloc(strlen(tree) + 10 * notu + 1);
2711 	q = newtree;
2712 	while(*(++p) != 0) {
2713 		*(q++) = *p;
2714 		if(*p == '(' ) {
2715 			start = p + 1;
2716 			qstart = q;
2717 			}
2718 		else if(start == NULL && *p == ',') { start = p + 1; qstart = q; }
2719 		else if(start != NULL && (*p == ')' || *p == ':' || *p == ',') ) {
2720 			found++;
2721 			while(*start == ' ') start++;
2722 			memcpy(name, start, p - start); name[p - start] = 0;
2723 			l = strlen(name) - 1; while( l >= 0 && name[l] == ' ')  name[l--] = 0;
2724 			for(i = 0; i < notu; i++) if(strcmp(name, names[i]) == 0) break;
2725 			if(i == notu) {
2726 				free(newtree);
2727 				return NULL;
2728 			}
2729 			else {
2730 				c = *(q - 1);
2731 				sprintf(qstart, "Seq%d_", i);
2732 				q = qstart + strlen(qstart);
2733 				*q++ = c;
2734 				}
2735 			if(*p != ',') start = NULL; else { start = p + 1; qstart = q; }
2736 		}
2737 	}
2738 	*q = 0;
2739 	return newtree;
2740 }
2741 
2742 
command_line_phylogeny(int argc,char ** argv)2743 void command_line_phylogeny(int argc, char **argv)
2744 {
2745   char *tree, *p;
2746   int replicates = 0, jumbles = 0, i;
2747   bool err = false;
2748   if (!isarg(argc, argv, "-distance") && !isarg(argc, argv, "-parsimony")) {
2749     fputs("Option -build_tree requires either -distance or -parsimony\n", stderr);
2750     exit(1);
2751     }
2752   if (isarg(argc, argv, "-distance") && !(isarg(argc, argv, "-o") || isarg(argc, argv, "-distance_matrix"))) err = true;
2753   if (isarg(argc, argv, "-parsimony") && !isarg(argc, argv, "-o")) err = true;
2754   if (err) {
2755     fputs("Missing '-o fname' option\n", stderr);
2756     exit(1);
2757     }
2758   SEA_VIEW *view = cmdline_read_input_alignment(argc, argv);
2759   if ( (p = argname(argc, argv, "-sites")) ) {
2760     list_regions *elt = view->regions;
2761     while (elt) {
2762       if (strcmp(elt->element->name, p) == 0) break;
2763       elt = elt->next;
2764     }
2765     if (elt) view->active_region = elt->element;
2766   }
2767 
2768   if ( (p = argname(argc, argv, "-species")) ) {
2769     for (i = 0; i < view->numb_species_sets; i++) {
2770       if (strcmp(view->name_species_sets[i], p) == 0) break;
2771     }
2772     if (i < view->numb_species_sets) {
2773       view->sel_seqs = view->list_species_sets[i];
2774       view->tot_sel_seqs = 0;
2775       for (i = 0; i < view->tot_seqs; i++) if (view->sel_seqs[i]) view->tot_sel_seqs++;
2776     }
2777   }
2778 
2779   replicates = argval(argc, argv, "-replicates", 0);
2780 
2781   char *utree = NULL;
2782   if (isarg(argc, argv, "-usertree")) {
2783     char *utname;
2784     if ( (utname = argname(argc, argv, "-usertree"))) {
2785       FILE *in = fopen(utname, "r");
2786       if (!in) {
2787         fprintf(stderr, "Usertree file not found: %s\n",utname);
2788         exit(1);
2789       }
2790       fseek(in, 0, SEEK_END);
2791       long s = ftell(in);
2792       fseek(in, 0, SEEK_SET);
2793       utree = (char*)malloc(s+1);
2794       fread(utree, s, 1, in);
2795       utree[s] = 0;
2796       fclose(in);
2797     }
2798   }
2799 
2800   if (isarg(argc, argv, "-distance")) { // distance method
2801     int distkind = 0; // == observed_p/ndist
2802     char *distname;
2803     if ( (distname = argname(argc, argv, "-distance"))) {
2804       if (strcmp(distname, "JC") == 0) distkind = Jukes_Cantor;
2805       else if (strcmp(distname, "K2P") == 0) distkind = Kimura2P;
2806       else if (strcmp(distname, "HKY") == 0) distkind = HKY;
2807       else if (strcmp(distname, "logdet") == 0) distkind = LogDet;
2808       else if (strcmp(distname, "Ka") == 0) distkind = Ka;
2809       else if (strcmp(distname, "Ks") == 0) distkind = Ks;
2810       else if (strcmp(distname, "Poisson") == 0) distkind = Poisson_pdist;
2811       else if (strcmp(distname, "Kimura") == 0) distkind = Kimura_pdist;
2812       }
2813     if (distkind == 0) distname = strdup("Observed");
2814 
2815     view->tot_trees = 0;
2816     tree = run_distance_method(view, distkind, isarg(argc, argv, "-nogaps"), !isarg(argc, argv, "-NJ"), replicates > 1, replicates, NULL,
2817 			       distname, distkind == Ka || distkind == Ks, argname(argc, argv, "-distance_matrix"), utree, true, Fl::fatal, false, false);
2818     if (isarg(argc, argv, "-distance_matrix")) exit(0);
2819   }
2820 
2821   if (isarg(argc, argv, "-parsimony")) { // parsimony method
2822     const char *progname;
2823     jumbles = argval(argc, argv, "-jumbles", 0);
2824     char *search = argname(argc, argv, "-search");
2825     dnapars_S_option s_option;
2826     if (search && strcmp(search, "less") == 0) s_option = less_thorough;
2827     else if (search && strcmp(search, "best") == 0) s_option = rearrange_best_tree;
2828     else s_option = more_thorough;
2829     int usertreerank = 0;
2830     if (utree) {
2831       view->tot_trees = 1;
2832       view->trees = (char**)malloc(sizeof(char*));
2833       view->trees[0] = utree;
2834       usertreerank = 1;
2835     }
2836     tree = run_parsimony_method(view, isarg(argc, argv, "-nogaps"),
2837 				isarg(argc, argv, "-gaps_as_unknown") ? ", gaps treated as ?" : "",
2838 				replicates, jumbles, 10000, usertreerank, NULL, s_option, &progname, 1, true, 0, Fl::fatal);
2839     if (tree && (p = strchr(tree, ';')) != NULL) *(p+1) = 0; // tree string can contain several trees, keep first only
2840   }
2841 
2842   if (tree) {
2843     char *outfname = argname(argc, argv, "-o");
2844     FILE *out = (strcmp(outfname, "-") != 0 ? fopen(outfname, "w") : stdout);
2845     fwrite(tree, strlen(tree), 1, out);
2846     fputs("\n", out); fflush(out);
2847     if (out != stdout) fclose(out);
2848   }
2849 }
2850 
save_bootstrap_replicates(const char * fname,int replicates,SEA_VIEW * view)2851 void save_bootstrap_replicates(const char *fname, int replicates, SEA_VIEW *view)
2852 {
2853   allseq *seqs = view_to_allseq(view, FALSE);
2854   int *each_length = new int[seqs->n_otu];
2855   for (int i = 0; i < seqs->n_otu; i++) each_length[i] = seqs->clean_len;
2856   char **sequences = new char*[seqs->n_otu];
2857   for (int i = 0; i < seqs->n_otu; i++) {
2858     sequences[i] = new char[seqs->clean_len];
2859   }
2860   FILE *out = fl_fopen(fname, "w");
2861   char *tmp_fname = create_tmp_filename();
2862   for (int repl = 1; repl <= replicates; repl++) {
2863     bootstrap_weights(seqs, FALSE);
2864     for (int i = 0; i < seqs->n_otu; i++) {
2865       int l = 0;
2866       for (int j = 0; j < seqs->clean_len; j++) {
2867         for (int k = 0; k < seqs->wght[j]; k++) {
2868           sequences[i][l++] = seqs->c_seq[i]->state[j];
2869         }
2870       }
2871     }
2872     save_alignment_or_region(tmp_fname, sequences, NULL,
2873                              NULL, view->seqname, seqs->n_otu, each_length,
2874                              NULL, NULL, PHYLIP_FORMAT,
2875                              0, NULL,
2876                              NULL, NULL, 0, view->protein,
2877                              0, NULL,
2878                              NULL, view->phylipwidnames,
2879                              0, NULL,
2880                              NULL, view->spaces_in_fasta_names);
2881     // add to growing output file
2882     FILE *in = fl_fopen(tmp_fname, "r");
2883     char line[100];
2884     while (fgets(line, sizeof(line), in)) {
2885       fputs(line, out);
2886     }
2887     fclose(in);
2888     fl_unlink(tmp_fname);
2889   }
2890   fclose(out);
2891   for (int i = 0; i < seqs->n_otu; i++) delete[] sequences[i];
2892   delete[] sequences;
2893   delete[] each_length;
2894   free_after_view_to_allseq(seqs);
2895 }
2896