1LoadFunctionLibrary("../IOFunctions.bf");
2LoadFunctionLibrary("../all-terms.bf");
3LoadFunctionLibrary("../convenience/regexp.bf");
4LoadFunctionLibrary("../UtilityFunctions.bf");
5LoadFunctionLibrary("TreeTools");
6LoadFunctionLibrary("distances.bf");
7
8/** @module trees */
9
10/*
11 * Sample tree object
12 * trees = {
13 *       "string":"((((PIG,COW)Node3,HORSE,CAT)Node2,((RHMONKEY,BABOON)Node9,(HUMAN,CHIMP)Node12)Node8)Node1,RAT,MOUSE)",
14 *       "string_with_lengths":"((((PIG:0.147969,COW:0.21343)Node3:0.085099,HORSE:0.165787,CAT:0.264806)Node2:0.058611,((RHMONKEY:0.002015,BABOON:0.003108)Node9:0.022733,(HUMAN:0.004349,CHIMP:0.000799)Node12:0.011873)Node8:0.101856)Node1:0.340802,RAT:0.050958,MOUSE:0.09795)",
15 *       "branch length":{
16 *         "PIG":0.147969,
17 *         "COW":0.21343,
18 *         "Node3":0.08509899999999999,
19 *         "HORSE":0.165787,
20 *         "CAT":0.264806,
21 *         "Node2":0.058611,
22 *         "RHMONKEY":0.002015,
23 *         "BABOON":0.003108,
24 *         "Node9":0.022733,
25 *         "HUMAN":0.004349,
26 *         "CHIMP":0.000799,
27 *         "Node12":0.011873,
28 *         "Node8":0.101856,
29 *         "Node1":0.340802,
30 *         "RAT":0.050958,
31 *         "MOUSE":0.09795
32 *        },
33 *       "annotated_string":"((((PIG,COW)Node3,HORSE,CAT)Node2,((RHMONKEY{PR},BABOON{PR})Node9{PR},(HUMAN{PR},CHIMP{PR})Node12{PR})Node8{PR})Node1{PR},RAT,MOUSE);",
34 *       "model_map":{
35 *         "PIG":"",
36 *         "COW":"",
37 *         "Node3":"",
38 *         "HORSE":"",
39 *         "CAT":"",
40 *         "Node2":"",
41 *         "RHMONKEY":"PR",
42 *         "BABOON":"PR",
43 *         "Node9":"PR",
44 *         "HUMAN":"PR",
45 *         "CHIMP":"PR",
46 *         "Node12":"PR",
47 *         "Node8":"PR",
48 *         "Node1":"PR",
49 *         "RAT":"",
50 *         "MOUSE":""
51 *        },
52 *       "partitioned":{
53 *         "PIG":"leaf",
54 *         "COW":"leaf",
55 *         "Node3":"internal",
56 *         "HORSE":"leaf",
57 *         "CAT":"leaf",
58 *         "Node2":"internal",
59 *         "RHMONKEY":"leaf",
60 *         "BABOON":"leaf",
61 *         "Node9":"internal",
62 *         "HUMAN":"leaf",
63 *         "CHIMP":"leaf",
64 *         "Node12":"internal",
65 *         "Node8":"internal",
66 *         "Node1":"internal",
67 *         "RAT":"leaf",
68 *         "MOUSE":"leaf"
69 *        },
70 *       "model_list":{
71 *        {"", "PR"}
72 *        }
73 *      }
74 *
75 */
76
77
78/**
79 * Returns sanitized Newick tree string
80 * @name trees.GetTreeString._sanitize
81 * @private
82 * @param {String} string
83 * @returns {String} sanitized string
84 */
85lfunction trees.GetTreeString._sanitize(string) {
86
87    if (utility.GetEnvVariable("_DO_TREE_REBALANCE_")) {
88        //console.log ("BEFORE REBALANCE\n");
89        //console.log (string);
90        string = RerootTree(string, 0);
91        //console.log ("AFTER REBALANCE\n");
92        //console.log (string);
93    }
94
95    if (utility.GetEnvVariable("_KEEP_I_LABELS_")) {
96        utility.ToggleEnvVariable("INTERNAL_NODE_PREFIX", "intNode");
97    }
98
99    if (utility.GetEnvVariable("_KEEP_I_LABELS_")) {
100        utility.ToggleEnvVariable("INTERNAL_NODE_PREFIX", None);
101    }
102
103
104
105    return string;
106}
107
108/**
109 * Looks for a newick tree in an alignment file
110 * @name trees.GetTreeString
111 * @param {String|Bool} look_for_newick_tree - If a string, sanitizes and returns the string.
112 * If TRUE, search the alignment file for a newick tree. If FALSE, the user will be prompted for a nwk tree file.
113 * @returns {String} a newick tree string
114 */
115lfunction trees.GetTreeString(look_for_newick_tree) {
116
117
118
119    UseModel(USE_NO_MODEL);
120
121    if (Type(look_for_newick_tree) == "String") {
122        treeString = trees.GetTreeString._sanitize(look_for_newick_tree);
123    } else {
124        if (look_for_newick_tree == FALSE) {
125            utility.SetEnvVariable("IS_TREE_PRESENT_IN_DATA", FALSE);
126        }
127
128        if (utility.GetEnvVariable("IS_TREE_PRESENT_IN_DATA")) {
129            treeString = trees.GetTreeString._sanitize(utility.GetEnvVariable("DATAFILE_TREE"));
130            utility.SetEnvVariable("IS_TREE_PRESENT_IN_DATA", TRUE);
131        }
132
133        if (!utility.GetEnvVariable("IS_TREE_PRESENT_IN_DATA")) {
134            SetDialogPrompt("Please select a tree file for the data:");
135            utility.SetEnvVariable ("LAST_FILE_IO_EXCEPTION", None);
136            utility.ToggleEnvVariable ("SOFT_FILE_IO_EXCEPTIONS",TRUE);
137            fscanf(PROMPT_FOR_FILE, REWIND, "Raw", treeString);
138
139            look_for_newick_tree = utility.getGlobalValue ("LAST_FILE_PATH");
140
141            if (None != utility.GetEnvVariable ("LAST_FILE_IO_EXCEPTION")) {
142                if (utility.getGlobalValue ("LAST_RAW_FILE_PROMPT") ==  utility.getGlobalValue ("terms.trees.neighbor_joining")) {
143                    datafilter_name = utility.GetEnvVariable(utility.getGlobalValue ("terms.trees.data_for_neighbor_joining"));
144                    assert (Type (datafilter_name) == "String", "Expected a string for the datafilter to  build a NJ tree from");
145                    treeString = tree.infer.NJ (datafilter_name, None);
146                } else {
147                    assert (0, utility.GetEnvVariable ("LAST_FILE_IO_EXCEPTION"));
148                }
149                utility.SetEnvVariable ("LAST_FILE_IO_EXCEPTION", None);
150            }
151
152            utility.ToggleEnvVariable ("SOFT_FILE_IO_EXCEPTIONS",None);
153            fprintf(stdout, "\n");
154
155
156            if (regexp.Find(treeString, "^#NEXUS")) {
157
158                ExecuteCommands(treeString);
159
160                if (!utility.GetEnvVariable("IS_TREE_PRESENT_IN_DATA")) {
161                    fprintf(stdout, "\n> **This NEXUS file doesn't contain a valid tree block**");
162                    return 1;
163                }
164
165                nftm = utility.GetEnvVariable("NEXUS_FILE_TREE_MATRIX");
166
167
168                if (Rows(nftm) > 1) {
169                    ChoiceList(treeChoice, "Select a tree", 1, SKIP_NONE, nftm);
170                    if (treeChoice < 0) {
171                        return 1;
172                    }
173                    treeString = nftm[treeChoice][1];
174                } else {
175                    treeString = nftm[0][1];
176                }
177            } else {
178                start = (treeString $ "\\(")[0];
179                if (start < 0) {
180                    fprintf(stdout, "\n> **This doesn't seem to be a valid Newick string file**. Can't find the opening parenthesis. ``", treeString, "``\n");
181                    return 1;
182                } else {
183                    parenCounter = 1;
184                    current = start + 1;
185                    while (current < Abs(treeString) && parenCounter) {
186                        char = treeString[current];
187                        if (char == "(") {
188                            parenCounter += 1;
189                        } else {
190                            if (char == ")") {
191                                parenCounter += (-1);
192                            }
193                        }
194                        current += 1;
195                    }
196
197                    if (parenCounter) {
198                        fprintf(stdout, "\n> ** This doesn't seem to be a valid Newick string file**. Can't match the parentheses. \n``", treeString, "``\n");
199                        return 1;
200                    }
201
202                    treeString = treeString[start][current - 1];
203                }
204
205                treeString = trees.GetTreeString._sanitize(treeString);
206
207            }
208        }
209    }
210
211    return
212    {
213        utility.getGlobalValue("terms.data.file"): look_for_newick_tree,
214        utility.getGlobalValue("terms.data.tree"): treeString
215    };
216}
217
218
219///////// TODO: HOW TO UN-HARDCODE THIS FUNCTION? Regular strategies are not working. //////////////
220/**
221 * Partitions a tree by assigning nodes to either being internal or leaf
222 * @name trees.PartitionTree
223 * @param {Dictionary} avl - an AVL representation of the tree to be partitioned
224 * @returns nothing
225 */
226lfunction trees.PartitionTree(avl, l) {
227    for (k = 0; k < Abs(avl); k += 1) {
228        if ((avl[k])["Parent"]) {
229            if (Abs((avl[k])["Children"])) {
230                l[(avl[k])["Name"]] = "internal";
231            } else {
232                l[(avl[k])["Name"]] = "leaf";
233            }
234        }
235    }
236}
237
238/**
239 * Loads an annatotaed tree topology from a newick tree
240 * @name trees.LoadAnnotatedTopology
241 * @param {String|Bool} look_for_newick_tree - If a string, sanitizes and
242 *  returns the string. If TRUE, search the alignment file for a newick tree. If
243 *  FALSE, the user will be prompted for a nwk tree file.
244 * @returns {Dictionary} an annotated tree
245 */
246lfunction trees.LoadAnnotatedTopology(look_for_newick_tree) {
247    return trees.ExtractTreeInfo(trees.GetTreeString(look_for_newick_tree));
248}
249
250/**
251 * @name trees.LoadAnnotatedTopologyAndMap
252 * @param dataset_name
253 * @returns {Dictionary} an annotated tree
254 */
255lfunction trees.LoadAnnotatedTopologyAndMap(look_for_newick_tree, mapping) {
256
257
258    reverse = {};
259
260    for (k,v; in; mapping) {
261      reverse[v] = k;
262    }
263
264    //utility.ForEach(utility.Keys(mapping), "_key_", "`&reverse`[`&mapping`[_key_]] = _key_");
265
266    io.CheckAssertion("Abs (`&mapping`) == Abs (`&reverse`)", "The mapping between original and normalized tree sequence names must be one to one");
267    utility.ToggleEnvVariable("TREE_NODE_NAME_MAPPING", reverse);
268    result = trees.ExtractTreeInfo(trees.GetTreeString(look_for_newick_tree));
269    utility.ToggleEnvVariable("TREE_NODE_NAME_MAPPING", None);
270
271
272    return result;
273}
274
275/**
276 * Loads a tree topology with node name label mappings and annotations from a list of partitions
277 * @name trees.LoadAnnotatedTreeTopology.match_partitions
278 * @param {Matrix} partitions - a 1xN vector of partition names, typically
279 * retrieved from the `partitions` key in the dictionary returned from an
280 * alignments parser function.
281 * @param {Dictionary} mapping - a mapping of node names to labels
282 * @returns {Dictionary} of matched partitions
283 * @example
284 *  hky85_nucdata_info = alignments.ReadNucleotideAlignment(file_name, "hky85.nuc_data", "hky85.nuc_filter");
285 *  name_mapping = {
286 *   "HUMAN":"HUMAN",
287 *   "CHIMP":"CHIMP",
288 *   "BABOON":"BABOON",
289 *   "RHMONKEY":"RHMONKEY",
290 *   "COW":"COW",
291 *   "PIG":"PIG",
292 *   "HORSE":"HORSE",
293 *   "CAT":"CAT",
294 *   "MOUSE":"MOUSE",
295 *   "RAT":"RAT"
296 *  };
297 *  trees.LoadAnnotatedTreeTopology.match_partitions(hky85_nucdata_info[terms.json.partitions], name_mapping);
298 *  =>
299 *  {
300 *   "0":{
301 *     "name":"default",
302 *     "filter-string":"",
303 *     "tree": *    }
304 *  }
305 */
306lfunction trees.LoadAnnotatedTreeTopology.match_partitions(partitions, mapping) {
307
308
309    partition_count = Rows(partitions);
310    partrees = {};
311
312    tree_matrix = utility.GetEnvVariable("NEXUS_FILE_TREE_MATRIX");
313
314    if (Type(tree_matrix) == "Matrix") {
315        io.CheckAssertion("Rows(`&tree_matrix`) >= partition_count", "The number of trees in the NEXUS block cannot be smaller than the number of partitions in the file");
316        for (i = 0; i < partition_count; i += 1) {
317            partrees + {
318                utility.getGlobalValue("terms.data.name"): partitions[i][0],
319                utility.getGlobalValue("terms.data.filter_string"): partitions[i][1],
320                utility.getGlobalValue("terms.data.tree"): trees.LoadAnnotatedTopologyAndMap(tree_matrix[i][1], mapping)
321            };
322        }
323    } else { // no tree matrix; apply the same tree to all partitions
324
325        tree_info = trees.LoadAnnotatedTopologyAndMap(TRUE, mapping);
326
327        for (i = 0; i < partition_count; i += 1) {
328            partrees + {
329                utility.getGlobalValue("terms.data.name"): partitions[i][0],
330                utility.getGlobalValue("terms.data.filter_string"): partitions[i][1],
331                utility.getGlobalValue("terms.data.tree"): tree_info
332            };
333        }
334    }
335
336
337
338    return partrees;
339
340}
341
342/**
343 * @name trees.branch_names
344 * @param tree
345 * @param respect_case
346 * @returns result
347 */
348lfunction trees.branch_names(tree, respect_case) {
349    result = {};
350    branch_names = BranchName(tree, -1);
351    branch_count = Columns(branch_names) - 1;
352    if (respect_case) {
353        for (k = 0; k < branch_count; k += 1) {
354            result + branch_names[k];
355        }
356    } else {
357        for (k = 0; k < branch_count; k += 1) {
358            result + (branch_names[k] && 1);
359        }
360
361    }
362    return result;
363}
364
365
366/**
367 * @name trees.KillZeroBranches
368 * Given a tree dict and a list of matching parameter estimates
369 * this returns a modified tree dict with all zero-branch length
370 * internal branches removed and modifies in in place
371 * @param tree - dict of the tree object
372 * @param estimates - dict with branch length estimates
373 * @param branch_set - if not null, treat as test set and delete branches form it as well
374 * @param zero_internal -- store branches that were deleted here
375 * @return modified tree
376 */
377
378lfunction trees.KillZeroBranches (tree, estimates, branch_set, zero_internal) {
379
380    for (branch, value; in; tree [^"terms.trees.partitioned"]) {
381        if (value == ^"terms.tree_attributes.internal") {
382            if (estimates / branch) {
383                if ((estimates[branch])[^"terms.fit.MLE"] < 1e-10) {
384                    zero_internal + branch;
385                }
386            } else {
387                zero_internal + branch;
388            }
389        }
390    }
391
392    if (Abs (zero_internal) > 0) { // has zero internal branches
393        Topology T = tree[^"terms.trees.newick_annotated"];
394        T -  Columns(zero_internal);
395        if (None != branch_set) {
396            for (branch; in; zero_internal) {
397                branch_set - branch;
398            }
399        }
400        return trees.ExtractTreeInfoFromTopology (&T);
401
402    }
403
404    return tree;
405
406}
407
408/**
409 * @name trees.RootTree
410 * @param {Dict} tree_info
411 * @param {String} root on this node (or prompt if empty)
412 * @returns a {Dictionary} (same as ExtractTreeInfo) for the rerooted tree
413 */
414
415lfunction trees.RootTree(tree_info, root_on) {
416    if (Type (root_on) != "String") {
417        root_on = io.SelectAnOption (tree_info[^"terms.trees.partitioned"],
418                                     "Select a root");
419    }
420
421
422    io.CheckAssertion("`&tree_info`[^'terms.trees.partitioned']/`&root_on`", "Not a valid root choice '" + root_on + "'");
423
424    Topology T = tree_info[^"terms.trees.newick_with_lengths"];
425
426    utility.ToggleEnvVariable("ACCEPT_ROOTED_TREES", TRUE);
427    tree_info = trees.ExtractTreeInfo(RerootTree (T, root_on));
428    utility.ToggleEnvVariable("ACCEPT_ROOTED_TREES", None);
429
430    return {
431        ^"terms.trees.root"   : root_on,
432        ^"terms.data.tree"    : tree_info
433    };
434}
435
436/**
437 * @name trees.ExtractTreeInfoFromTopology
438 * @param {String} Topology
439 * @returns a {Dictionary} of the following tree information :
440 * - newick string
441 * - newick string with branch lengths
442 * - annotated string
443 * - model map
444 * - internal leaves
445 * - list of models
446 */
447lfunction trees.ExtractTreeInfoFromTopology(topology_object) {
448
449
450    branch_lengths = BranchLength(^topology_object, -1);
451    branch_names   = BranchName(^topology_object, -1);
452
453    branch_count   = Max (2,utility.Array1D (branch_names) - 1);
454
455
456    bls = {};
457
458    for (k = 0; k < branch_count; k+=1) {
459        if (branch_lengths[k] >= 0.) {
460            bls [branch_names[k]] = branch_lengths[k];
461        }
462    }
463
464
465    GetInformation(modelMap, ^topology_object);
466
467
468    leaves_internals = {};
469    flat_tree = (^topology_object) ^ 0;
470    trees.PartitionTree(flat_tree, leaves_internals);
471
472    utility.ToggleEnvVariable("INCLUDE_MODEL_SPECS", TRUE);
473    T.str = "" + ^topology_object;
474    utility.ToggleEnvVariable("INCLUDE_MODEL_SPECS", None);
475
476    rooted = utility.Array1D ((flat_tree[(flat_tree[0])["Root"]])["Children"]) == 2;
477
478    flat_tree       = None;
479    branch_lengths  = None;
480    branch_names    = None;
481    branch_count    = None;
482
483    return {
484        ^"terms.trees.newick": Format(^topology_object, 1, 0),
485        ^"terms.trees.newick_with_lengths": Format(^topology_object, 1, 1),
486        ^"terms.branch_length": bls,
487        ^"terms.trees.newick_annotated": T.str,
488        ^"terms.trees.model_map": modelMap,
489        ^"terms.trees.partitioned": leaves_internals,
490        ^"terms.trees.model_list": Columns(modelMap),
491        ^"terms.trees.rooted" : rooted,
492        ^"terms.trees.meta" : Eval(^(topology_object+".__meta")),
493        ^"terms.data.file" : file_name
494
495    };
496}
497
498/**
499 * @name trees.ExtractTreeInfo
500 * @param {String} tree_string
501 * @returns a {Dictionary} of the following tree information :
502 * - newick string
503 * - newick string with branch lengths
504 * - annotated string
505 * - model map
506 * - internal leaves
507 * - list of models
508 */
509lfunction trees.ExtractTreeInfo(tree_string) {
510
511    if (Type (tree_string) == "AssociativeList") {
512        file_name   = tree_string[^"terms.data.file"];
513        tree_string = tree_string[^"terms.data.tree"];
514    } else {
515        file_name = None;
516    }
517
518    Topology T = tree_string;
519    return trees.ExtractTreeInfoFromTopology (&T);
520
521}
522
523/**
524 * @name trees.HasBranchLengths
525 * @param {Dictionary} tree information object (e.g. as returned by LoadAnnotatedTopology)
526 * @returns a {Boolean} to indicate whether the tree has a valid branch length array
527 */
528
529lfunction trees.HasBranchLengths (tree_info) {
530    return utility.Array1D (tree_info [^"terms.trees.partitioned"]) == utility.Array1D (tree_info [^"terms.branch_length"]);
531}
532
533/**
534 * @name trees.BootstrapSupport
535 * @param {Dictionary} tree information object (e.g. as returned by LoadAnnotatedTopology)
536 * @returns a {Dictionary} (could be empty or partially filled) with "node name" -> bootstrap support
537 */
538
539lfunction trees.BootstrapSupport (tree_info) {
540    return utility.Map (utility.Filter (tree_info[^"terms.trees.meta"], "_value_", "(_value_/'bootstrap')"), "_value_",
541                        "_value_['bootstrap']");
542}
543
544/**
545 * Gets total branch count of supplied tree
546 * @name trees.GetBranchCount
547 * @param {String} tree_string
548 * @returns {Number} total branch count
549 */
550function trees.GetBranchCount(tree_string) {
551    Topology T = tree_string;
552    return BranchCount(T) + TipCount(T);
553}
554
555/**
556 * Sorts branch lengths in a descending order
557 * @name trees.SortedBranchLengths
558 * @param {String} tree_string
559 * @returns {Number} total branch count
560 */
561
562lfunction trees.SortedBranchLengths(tree_string) {
563
564    tree_count = trees.GetBranchCount(tree_string);
565    Tree T = tree_string;
566
567    branch_lengths = BranchLength(T,-1);
568
569    sorted_bls = {};
570    sorted_bls = {tree_count, 2}["branch_lengths[_MATRIX_ELEMENT_ROW_]*(_MATRIX_ELEMENT_COLUMN_==0)+_MATRIX_ELEMENT_ROW_*(_MATRIX_ELEMENT_COLUMN_==1)"];
571    sorted_bls = sorted_bls%0;
572    return sorted_bls;
573
574}
575
576/**
577 * Return branch names
578 * @name trees.BranchNames
579 * @param {String} tree_string
580 * @returns {Matrix} 1xN sorted branch names
581 */
582
583lfunction trees.BranchNames(tree) {
584    tree_string = tree[^"terms.trees.newick"];
585    Topology T = tree_string;
586    branch_names = BranchName(T, -1);
587    return branch_names;
588}
589
590/**
591 * Compute branch labeling using parsimony
592 * @name trees.ParsimonyLabel
593 * @param 	{String} tree ID
594 * @param 	{Dict} 	 leaf -> label
595 					 labels may be missing for some of the leaves to induce partial labeling of the tree
596 * @returns {Dict} 	 {"score" : value, "labels" : Internal Branch -> label}
597 */
598
599lfunction trees.ParsimonyLabel(tree_id, given_labels) {
600   tree_avl = (^tree_id) ^ 0;
601   label_values = utility.UniqueValues (given_labels);
602   label_count  = utility.Array1D (label_values);
603   labels = {};
604   scores = {}; // node name -> score of optimal labeling staring at this now given parent state
605   optimal_labeling = {}; // node name -> current node labeling which achieves this score
606   resulting_labels = {}; // internal nodes -> label
607
608   // pass 1 to fill in the score matrix
609   for (k = 0; k < Abs (tree_avl) ; k += 1) {
610   	 	node_name = (tree_avl[k])["Name"];
611   	 	node_children = (tree_avl[k])["Children"];
612   	 	c_count = Abs (node_children);
613   	 	if (c_count) { // internal node
614   	 		// first check to see if all the children are labeled
615
616   	 		c_names = {c_count , 1};
617
618			for (c = 0; c < c_count; c+=1) {
619				c_names[c] = (tree_avl[node_children[c]])["Name"];
620   	 			if (scores / c_names[c] == FALSE)  {
621   	 				break;
622   	 			}
623
624   	 		}
625   	 		if (c == c_count) {
626
627   	 			scores [node_name] = {1, label_count};
628   	 			labels [node_name] = {1, label_count};
629
630
631    	 		for (parent_state = 0; parent_state < label_count; parent_state+=1) {
632    	 			best_score = 1e100;
633    	 			best_state = None;
634    	 			for (my_state = 0; my_state < label_count; my_state+=1) {
635    	 				my_score = 0;
636						for (c = 0; c < c_count; c+=1) {
637							my_score += (scores [c_names[c]])[my_state];
638						}
639						if (my_state != parent_state) {
640							my_score += 1;
641						}
642						if (my_score < best_score) {
643							best_score = my_score;
644							best_state = my_state;
645						}
646					}
647   	 	 			(scores [node_name])[parent_state] = best_score;
648   	 	 			(labels [node_name])[parent_state] = best_state;
649   	 	 		}
650
651   	 	 	}
652   	 	} else {
653   	 		if (utility.Has (given_labels, node_name, "String")) {
654   	 			node_label = given_labels[node_name];
655   	 			scores [node_name] = {1, label_count};
656   	 			labels [node_name] = {1, label_count};
657
658   	 			for (z = 0; z < label_count; z+=1) {
659   	 				if (node_label == label_values[z]) {
660   	 					break;
661   	 				}
662   	 			}
663
664    	 			for (i = 0; i < label_count; i+=1) {
665   	 				(scores [node_name]) [i] = 1 - (z == i);
666   	 				(labels [node_name]) [i] = z;
667   	 			}
668   	 		}
669   	 	}
670   }
671
672
673   // pass 2 to choose the best state for subtree parents
674
675
676   total_score = 0;
677
678   for (k = 0; k < Abs (tree_avl); k += 1) {
679   	 	node_name = (tree_avl[k])["Name"];
680   	 	node_children = (tree_avl[k])["Children"];
681   	 	c_count = Abs (node_children);
682   	 	if (c_count) { // internal node
683   	 		parent = (tree_avl[k])["Parent"];
684   	 		if (parent > 0) {
685   	 			if (utility.Has (scores, (tree_avl[parent])["Name"], "Matrix")) {
686   	 				continue;
687   	 			}
688   	 		}
689
690   	 		if (utility.Has (scores, node_name, "Matrix") == FALSE) {
691   	 			continue;
692   	 		}
693
694   	 		best_score = 1e100;
695   	 		best_label = None;
696
697   	 		for (i = 0; i < label_count; i+=1) {
698   	 			my_score = (scores [node_name])[i];
699
700   	 			if (my_score < best_score) {
701   	 				best_score = my_score;
702   	 				best_label = i;
703   	 			}
704   	 		}
705
706   	 		total_score += best_score;
707   	 		resulting_labels [node_name] = best_label;
708   	 	}
709 	}
710
711   tree_avl = (^tree_id) ^ 1;
712   for (k = 2; k < Abs (tree_avl); k += 1) {
713   	 node_name = (tree_avl[k])["Name"];
714   	 if (utility.Array1D((tree_avl[k])["Children"])) {
715   	 	parent = (tree_avl[k])["Parent"];
716   	 	if (utility.Has (resulting_labels, (tree_avl[parent])["Name"], "Number")) {
717   	 		parent = resulting_labels[(tree_avl[parent])["Name"]];
718   	 		resulting_labels[node_name] = (labels [node_name])[parent];
719   	 	}
720   	 }
721   }
722
723   return {"score" : total_score, "labels" : utility.Map (resulting_labels, "_value_", "`&label_values`[_value_]")};
724   // pass 1 to choose the best state for subtree parents
725}
726
727/**
728 * Compute branch labeling using conjunction, i.e. node N is labeled 'X' iff
729 * all of the nodes that are in the subtree rooted at 'N' are also labeled 'N'
730 * @name trees.ConjunctionLabel
731 * @param 	{String} tree ID
732 * @param 	{Dict} 	 leaf -> label
733 					 labels may be missing for some of the leaves to induce partial labeling of the tree
734 * @returns {Dict} 	 {"labeled" : # of labeled internal  nodes, "labels" : Internal Branch -> label}
735 */
736
737lfunction trees.ConjunctionLabel (tree_id, given_labels) {
738   tree_avl = (^tree_id) ^ 0;
739   label_values = utility.UniqueValues (given_labels);
740   label_count  = utility.Array1D (label_values);
741   labels = {};
742   resulting_labels = {}; // internal nodes -> label
743   inodes_labeled = 0;
744
745   // pass 1 to fill in the score matrix
746   for (k = 0; k < Abs (tree_avl) ; k += 1) {
747   	 	node_name = (tree_avl[k])["Name"];
748   	 	node_children = (tree_avl[k])["Children"];
749   	 	c_count = utility.Array1D (node_children);
750
751   	 	if (c_count) { // internal node
752   	 		// first check to see if all the children are labeled
753
754   	 	   	child_labels = {};
755
756			for (c = 0; c < c_count; c+=1) {
757				c_name = (tree_avl[node_children[c]])["Name"];
758   	 			if (utility.Has (labels, c_name, "String") == FALSE)  {
759   	 				break;
760   	 			}
761   	 			child_labels [labels[c_name]] = TRUE;
762
763   	 		}
764   	 		if (c == c_count) { // all children labeled
765    	 	   inodes_labeled += 1;
766    	 	   if (utility.Array1D (child_labels) == 1) {
767    	 	       labels [node_name] = (utility.Keys (child_labels))[0];
768    	 	       resulting_labels[node_name] = labels [node_name];
769    	 	   }
770   	 	 	}
771   	 	} else { // leaf
772   	 		if (utility.Has (given_labels, node_name, "String")) {
773                labels[node_name] = given_labels[node_name];
774   	 		}
775   	 	}
776   }
777
778
779
780   return {"labeled" : inodes_labeled, "labels" : resulting_labels};
781   // pass 1 to choose the best state for subtree parents
782}
783
784/**
785 * Compute branch labeling using conjunction, i.e. node N is labeled 'X' iff
786 * SOME of the nodes that are in the subtree rooted at 'N' are also labeled 'N'
787 * @name trees.ConjunctionLabel
788 * @param 	{String} tree ID
789 * @param 	{Dict} 	 leaf -> label
790 					 labels may be missing for some of the leaves to induce partial labeling of the tree
791 * @returns {Dict} 	 {"labeled" : # of labeled internal  nodes, "labels" : Internal Branch -> label}
792 */
793
794lfunction trees.DisjunctionLabel (tree_id, given_labels) {
795   tree_avl = (^tree_id) ^ 0;
796   label_values = utility.UniqueValues (given_labels);
797   label_count  = utility.Array1D (label_values);
798   labels = {};
799   resulting_labels = {}; // internal nodes -> label
800   inodes_labeled = 0;
801
802   // pass 1 to fill in the score matrix
803   for (k = 0; k < Abs (tree_avl) ; k += 1) {
804   	 	node_name = (tree_avl[k])["Name"];
805   	 	node_children = (tree_avl[k])["Children"];
806   	 	c_count = utility.Array1D (node_children);
807
808   	 	if (c_count) { // internal node
809   	 		// first check to see if all the children are labeled
810
811   	 	   	child_labels = {};
812
813			for (c = 0; c < c_count; c+=1) {
814				c_name = (tree_avl[node_children[c]])["Name"];
815   	 			if (utility.Has (labels, c_name, "String") == FALSE)  {
816   	 				break;
817   	 			}
818   	 			child_labels [labels[c_name]] = TRUE;
819
820   	 		}
821   	 		if (c > 0) { // all children labeled
822    	 	   inodes_labeled += 1;
823    	 	   if (utility.Array1D (child_labels) == 1) {
824    	 	       labels [node_name] = (utility.Keys (child_labels))[0];
825    	 	       resulting_labels[node_name] = labels [node_name];
826    	 	   }
827   	 	 	}
828   	 	} else { // leaf
829   	 		if (utility.Has (given_labels, node_name, "String")) {
830                labels[node_name] = given_labels[node_name];
831   	 		}
832   	 	}
833   }
834
835
836
837   return {"labeled" : inodes_labeled, "labels" : resulting_labels};
838   // pass 1 to choose the best state for subtree parents
839}
840
841/**
842 * Annotate a tree string with using user-specified labels
843 * @name trees.ParsimonyLabel
844 * @param 	{String} tree ID
845 * @param 	{Dict/String} 	 node_name -> label OR (if string) (node_name) => name + annotation
846 * @param {String} a pair of characters to enclose the label in
847 * @param {Dict/String} node_name -> length OR (if string) (node_name) => length annotation
848 * @return {String} annotated string
849 */
850
851lfunction tree.Annotate (tree_id, labels, chars, doLengths) {
852	theAVL = (^tree_id)^0;
853	_ost = "";
854	_ost * 256;
855
856	lastLevel = 0;
857	treeSize  = Abs(theAVL);
858	treeInfo  = theAVL[0];
859	rootIndex = treeInfo["Root"];
860	lastDepth = 0;
861
862	for (nodeIndex = 1; nodeIndex < treeSize; nodeIndex += 1) {
863        nodeInfo = theAVL[nodeIndex];
864        myDepth = nodeInfo["Depth"];
865        if (lastDepth < myDepth) {
866            if (lastDepth) {
867                _ost * ",";
868            }
869            for (pidx = lastDepth; pidx < myDepth; pidx += 1) {
870                _ost * "(";
871            }
872        } else {
873            if (lastDepth > myDepth) {
874                for (pidx = myDepth; pidx < lastDepth; pidx += 1) {
875                    _ost * ")";
876                }
877            } else {
878                _ost * ",";
879            }
880        }
881
882
883        if (Type (labels) == "String") {
884            _ost * Call (labels, nodeInfo["Name"]);
885        } else {
886            _ost * nodeInfo["Name"];
887            if (labels / nodeInfo["Name"]) {
888                if (Abs(labels[nodeInfo["Name"]])) {
889                    _ost * (chars[0] + labels[nodeInfo["Name"]] + chars[1]);
890                }
891            }
892        }
893
894        if (doLengths ) {
895
896             if (nodeIndex < treeSize - 1) {
897                _ost * ":";
898                if (Type (doLengths) == "String") {
899                    _ost * Call (doLengths, nodeInfo);
900                } else {
901                    _ost * (""+nodeInfo ["Length"]);
902
903                }
904            }
905        }
906        lastDepth = myDepth;
907	}
908
909	_ost * 0;
910	return _ost;
911}
912
913
914/**
915 * Generate a symmetric binary tree on N leaves (only perfectly symmetric if N is a power of 2)
916 * @name trees.GenerateSymmetricTree
917 * @param 	{Number} N : number of leavers
918 * @param   {Bool} rooted : whether the tree is rooted
919 * @param 	{None/String} branch_name   : if a string, then it is assumed to be a function with an integer argument (node index) that generates branch names
920                                        default is to use numeric names
921 * @param 	{None/String} branch_length : if a string, then it is assumed to be a function with no arguments that generates branch lengths
922 * @return  {String} Newick tree string
923 */
924
925lfunction tree.GenerateSymmetricTree (N, rooted, branch_name, branch_length) {
926    assert (N>=2, "Can't generate trees with fewer than 2 leaves");
927    internal_nodes = N-2;
928    if (rooted) {
929        internal_nodes += 1;
930    }
931
932    total_nodes = N + internal_nodes;
933    flat_tree  = {total_nodes, 4}["-1"];
934
935
936    // each row represents the corresponding node (in its' index), [parent, child 1, child 2, child 3] record
937    // each node is represented with a [0, total_nodes-1] integer
938    // -1 means NULL
939    // for example ((1,2),(3,4)) will be represented as
940        /*
941           {4,-1,-1,-1}
942           {4,-1,-1,-1}
943           {5,-1,-1,-1}
944           {5,-1,-1,-1}
945           {6,0,1,-1}
946           {6,2,3,-1}
947           {-1,4,5,-1}
948        */
949
950    current_parent_node = N;
951    current_child_node  = 0;
952
953
954    while (current_parent_node < total_nodes) {
955        flat_tree[current_child_node][0] = current_parent_node;
956        flat_tree[current_child_node+1][0] = current_parent_node;
957        flat_tree[current_parent_node][1] = current_child_node;
958        if (current_child_node < total_nodes-2) {
959            flat_tree[current_parent_node][2] = current_child_node + 1;
960        }
961        current_parent_node += 1;
962        current_child_node += 2;
963        if (current_child_node == N-1) {
964            // if the number of leaves in not divisible for 2, we skip the unpaired leaf and attach it to the root
965            flat_tree[total_nodes-1][3-(rooted!=FALSE)] = N-1;
966            flat_tree[current_child_node][0] = total_nodes-1;
967            current_child_node += 1;
968        }
969    }
970
971
972    if (current_child_node < total_nodes - 1) { // rooted tree
973        /*
974        {
975            {4, -1, -1, -1}
976            {4, -1, -1, -1}
977            {5, -1, -1, -1}
978            {5, -1, -1, -1}
979            {-1, 0, 1, -1}
980            {-1, 2, 3, -1}
981            }
982        */
983        if (flat_tree[total_nodes-1][3] < 0) { // doesn't already have the 3rd leaf attached
984            flat_tree[total_nodes-1][3] = total_nodes-2;
985            flat_tree[total_nodes-2][0] = total_nodes-1;
986            flat_tree[flat_tree[total_nodes-1][1]] = total_nodes-1;
987            flat_tree[flat_tree[total_nodes-1][2]] = total_nodes-1;
988        }
989    }
990
991    return tree._NewickFromMatrix (&flat_tree, total_nodes-1, branch_name, branch_length);
992
993}
994
995
996/**
997 * Generate a ladder tree on N leaves
998 * @name trees.GenerateSymmetricTree
999 * @param 	{Number} N : number of leavers
1000 * @param   {Bool} rooted : whether the tree is rooted
1001 * @param 	{None/String} branch_name   : if a string, then it is assumed to be a function with an integer argument (node index) that generates branch names
1002                                        default is to use numeric names
1003 * @param 	{None/String} branch_length : if a string, then it is assumed to be a function with no arguments that generates branch lengths
1004 * @return  {String} Newick tree string
1005 */
1006
1007lfunction tree.GenerateLadderTree (N, rooted, branch_name, branch_length) {
1008    assert (N>=2, "Can't generate trees with fewer than 2 leaves");
1009    internal_nodes = N-2;
1010    if (rooted) {
1011        internal_nodes += 1;
1012    }
1013
1014    total_nodes = N + internal_nodes;
1015    flat_tree  = {total_nodes, 4}["-1"];
1016
1017
1018    current_parent_node = N;
1019    current_child_node  = 0;
1020
1021    // create the first cherry
1022
1023    flat_tree[current_child_node][0] = current_parent_node;
1024    flat_tree[current_child_node+1][0] = current_parent_node;
1025    flat_tree[current_parent_node][1] = current_child_node;
1026    flat_tree[current_parent_node][2] = current_child_node + 1;
1027    current_child_node = 2;
1028    current_parent_node += 1;
1029
1030
1031    while (current_parent_node < total_nodes) {
1032        flat_tree[current_child_node][0] = current_parent_node;
1033        flat_tree[current_parent_node-1][0] = current_parent_node;
1034        flat_tree[current_parent_node][1] = current_parent_node-1;
1035        flat_tree[current_parent_node][2] = current_child_node;
1036        current_parent_node += 1;
1037        current_child_node += 1;
1038
1039    }
1040
1041
1042    if (current_child_node < N) { // unrooted tree
1043        flat_tree[total_nodes-1][3] = N-1;
1044        flat_tree[N-1][0] = total_nodes-1;
1045    }
1046
1047    return tree._NewickFromMatrix (&flat_tree, total_nodes-1, branch_name, branch_length);
1048
1049}
1050
1051/**
1052 * Generate a RANDOM tree on N leaves
1053 * @name trees.GenerateRandomTree
1054 * @param 	{Number} N : number of leavers
1055 * @param   {Bool} rooted : whether the tree is rooted
1056 * @param 	{None/String} branch_name   : if a string, then it is assumed to be a function with an integer argument (node index) that generates branch names
1057                                        default is to use numeric names
1058 * @param 	{None/String} branch_length : if a string, then it is assumed to be a function with no arguments that generates branch lengths
1059 * @return  {String} Newick tree string
1060 */
1061
1062
1063lfunction tree._GenerateRandomTreeDraw2 (nodes) {
1064    r = Rows (nodes);
1065    n = Abs (nodes);
1066    n1 = Random (0,n) $ 1;
1067    do {
1068        n2 = Random (0,n) $ 1;
1069    } while (n1 == n2);
1070
1071
1072    nodes - r[n1];
1073    nodes - r[n2];
1074
1075    return {"0" : +r[n1], "1" : +r[n2]};
1076}
1077
1078
1079lfunction tree.GenerateRandomTree (N, rooted, branch_name, branch_length) {
1080    assert (N>=2, "Can't generate trees with fewer than 2 leaves");
1081    internal_nodes = N-2;
1082    if (rooted) {
1083        internal_nodes += 1;
1084    }
1085
1086    total_nodes = N + internal_nodes;
1087    flat_tree  = {total_nodes, 4}["-1"];
1088
1089
1090    available_to_join = {};
1091    for (k = 0; k < N; k+=1) {
1092        available_to_join[k] = TRUE;
1093    }
1094
1095
1096    current_parent_node = N;
1097    downto = 1 + (rooted == 0);
1098
1099
1100    while (Abs (available_to_join) > downto) {
1101        pair = tree._GenerateRandomTreeDraw2 (available_to_join);
1102        flat_tree[pair[0]][0] = current_parent_node;
1103        flat_tree[pair[1]][0] = current_parent_node;
1104        flat_tree[current_parent_node][1] = pair[0];
1105        flat_tree[current_parent_node][2] = pair[1];
1106        available_to_join[current_parent_node] = TRUE;
1107        current_parent_node += 1;
1108    }
1109
1110    if (!rooted) { // attach the last node to the root
1111        available_to_join - (total_nodes-1);
1112        leaf_index = +((Rows(available_to_join))[0]);
1113        flat_tree[leaf_index][0] = total_nodes-1;
1114        flat_tree[total_nodes-1][3] = leaf_index;
1115
1116    }
1117
1118    return tree._NewickFromMatrix (&flat_tree, total_nodes-1, branch_name, branch_length);
1119
1120}
1121
1122lfunction tree._NewickFromMatrix (flat_tree, index, branch_name, branch_length) {
1123    if ((^flat_tree)[index][0] >= 0) { // not a root
1124        if ((^flat_tree)[index][1] < 0) { // a leaf
1125            if (branch_name) {
1126                name = Call (branch_name, index);
1127            } else {
1128                name = "" + index;
1129            }
1130            if (branch_length) {
1131                return name + ":" + Call (branch_length);
1132            }
1133            return name;
1134        } else {
1135            if (branch_length) {
1136                bl := ":" + Call (branch_length);
1137            } else {
1138                bl := "";
1139            }
1140            if (branch_name) {
1141                bn := Call (branch_name, index);
1142            } else {
1143                bn := "";
1144            }
1145
1146            return "("   +  tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][1], branch_name, branch_length) +
1147                     "," +  tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][2], branch_name, branch_length) +
1148                     ")" + bn + bl;
1149        }
1150    }  else {
1151        if ((^flat_tree)[index][3] < 0) { // 2 root children
1152            return "(" + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][1], branch_name, branch_length) +
1153                    "," +  tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][2], branch_name, branch_length) +
1154                    ")";
1155        } else {
1156            return "(" + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][1], branch_name, branch_length) +
1157            "," +  tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][2], branch_name, branch_length) +
1158            "," +  tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][3], branch_name, branch_length) +
1159            ")";
1160        }
1161    }
1162    return None;
1163}
1164
1165/**
1166 * Infer a neighbor joining tree from a data filter using a specific distance
1167 * @name trees.infer.NJ
1168 * @param 	{String} datafilter the ID of an existing datafilter
1169 * @param 	{null/String/Matrix},
1170        null   : use the default distance calculation appropriate for the datatype
1171        String : a callback which takes (filter_id, seq1, seq2) arguments and returns d(seq1,seq2)
1172        Matrix : a precomputed distance matrix (same order of rows/column as datafilter names)
1173
1174 * @return  {String} inferred tree string
1175 */
1176
1177lfunction tree.infer.NJ (datafilter, distances) {
1178
1179    flush_distances = FALSE;
1180    if (None == distances) { // use default distances
1181        type = alignments.FilterType (datafilter);
1182        if (type == ^"terms.nucleotide" || type == ^"terms.codon") {
1183            distances = distances.nucleotide.tn93 (datafilter, null, null);
1184            flush_distances = TRUE;
1185        } else {
1186            if (type == ^"terms.amino_acid" || type == ^"terms.binary") {
1187                distances = distances.p_distance (datafilter, null);
1188                flush_distances = TRUE;
1189            } else {
1190                io.ReportAnExecutionError ("No default distance available for filter type of `type`");
1191            }
1192        }
1193    }
1194
1195    N = ^(datafilter + ".species");
1196    assert (N == Rows (distances), "Incompatible dimensions for the distance matrix and datafilter");
1197
1198
1199    if (N == 2) {
1200		d1 = distances[0][1]/2;
1201		treeNodes = {{0,1,d1__},
1202					 {1,1,d1__},
1203					 {2,0,0}};
1204		cladesInfo = {{2,0}};
1205	}
1206	else {
1207		if (N == 3) {
1208			d1 = (distances[0][1]+distances[0][2]-distances[1][2])/2;
1209			d2 = (distances[0][1]-distances[0][2]+distances[1][2])/2;
1210			d3 = (distances[1][2]+distances[0][2]-distances[0][1])/2;
1211			treeNodes = {{0,1,d1__},
1212						 {1,1,d2__},
1213						 {2,1,d3__}
1214						 {3,0,0}};
1215
1216			cladesInfo = {{3,0}};
1217		}
1218		else {
1219
1220			njm = (distances > 0) >= N;
1221
1222			treeNodes 		= {2*N,3};
1223
1224            for (i = 0; i < 2*N; i += 1) {
1225                treeNodes[i][0] = njm[i][0]; // node index; leaves in [0,N), internal nodes N and higher
1226                treeNodes[i][1] = njm[i][1]; // node depth relative to the root
1227                treeNodes[i][2] = njm[i][2]; // branch length
1228            }
1229
1230            njm = None;
1231		}
1232	}
1233
1234    if (flush_distances) {
1235        distances = None;
1236    }
1237
1238    return tree._matrix2string (treeNodes, N, alignments.GetSequenceNames (datafilter), TRUE);
1239
1240}
1241
1242/* ____________________________________________*/
1243
1244/**
1245 * Convert a matrix representation of a tree into Newick
1246 * @name trees._matrix2string
1247 * @param 	{Matrix} matrix_form : Kx3 matrix;
1248            each row represents ?
1249 * @param 	{Number} N : number of leaves
1250 * @param 	{Matrix/Dict} names : leaf index -> name
1251 * @param 	{Bool} do_lengths : include branch lengths in the output
1252
1253 * @return  {String} newick tree string
1254 */
1255
1256lfunction tree._matrix2string (matrix_form, N, names, do_lengths) {
1257
1258	newick = ""; newick * 1024;
1259
1260	p = 0;                 // tree depth of previous node
1261	k = 0;                 // current row in matrix_form
1262
1263	m = matrix_form[0][1]; // current tree depth (0 == root)
1264	n = matrix_form[0][0]; // current node index
1265
1266	while (m) {
1267		if (m>p) { // going down in the tree
1268			if (p) {
1269				newick * ",";
1270			}
1271			for (j=p; j<m; j += 1 ) {
1272				newick * "(";
1273			}
1274		} else { // going up in the tree
1275			if (m<p) {
1276				for (j=m;j<p; j += 1) {
1277					newick * ")";
1278				}
1279			}
1280			else {
1281				newick * ",";
1282			}
1283		}
1284
1285		if (n < N) {
1286		    newick * names [n];
1287		}
1288
1289		if (do_lengths) {
1290			newick * (":"+matrix_form[k][2]);
1291		}
1292
1293		k += 1;
1294		p = m;
1295		n = matrix_form[k][0];
1296		m = matrix_form[k][1];
1297	}
1298
1299	for (j=m;j<p; j += 1) {
1300		newick *")";
1301	}
1302
1303	newick *0;
1304	return newick;
1305}
1306