1LoadFunctionLibrary("../IOFunctions.bf"); 2LoadFunctionLibrary("../all-terms.bf"); 3LoadFunctionLibrary("../convenience/regexp.bf"); 4LoadFunctionLibrary("../UtilityFunctions.bf"); 5LoadFunctionLibrary("TreeTools"); 6LoadFunctionLibrary("distances.bf"); 7 8/** @module trees */ 9 10/* 11 * Sample tree object 12 * trees = { 13 * "string":"((((PIG,COW)Node3,HORSE,CAT)Node2,((RHMONKEY,BABOON)Node9,(HUMAN,CHIMP)Node12)Node8)Node1,RAT,MOUSE)", 14 * "string_with_lengths":"((((PIG:0.147969,COW:0.21343)Node3:0.085099,HORSE:0.165787,CAT:0.264806)Node2:0.058611,((RHMONKEY:0.002015,BABOON:0.003108)Node9:0.022733,(HUMAN:0.004349,CHIMP:0.000799)Node12:0.011873)Node8:0.101856)Node1:0.340802,RAT:0.050958,MOUSE:0.09795)", 15 * "branch length":{ 16 * "PIG":0.147969, 17 * "COW":0.21343, 18 * "Node3":0.08509899999999999, 19 * "HORSE":0.165787, 20 * "CAT":0.264806, 21 * "Node2":0.058611, 22 * "RHMONKEY":0.002015, 23 * "BABOON":0.003108, 24 * "Node9":0.022733, 25 * "HUMAN":0.004349, 26 * "CHIMP":0.000799, 27 * "Node12":0.011873, 28 * "Node8":0.101856, 29 * "Node1":0.340802, 30 * "RAT":0.050958, 31 * "MOUSE":0.09795 32 * }, 33 * "annotated_string":"((((PIG,COW)Node3,HORSE,CAT)Node2,((RHMONKEY{PR},BABOON{PR})Node9{PR},(HUMAN{PR},CHIMP{PR})Node12{PR})Node8{PR})Node1{PR},RAT,MOUSE);", 34 * "model_map":{ 35 * "PIG":"", 36 * "COW":"", 37 * "Node3":"", 38 * "HORSE":"", 39 * "CAT":"", 40 * "Node2":"", 41 * "RHMONKEY":"PR", 42 * "BABOON":"PR", 43 * "Node9":"PR", 44 * "HUMAN":"PR", 45 * "CHIMP":"PR", 46 * "Node12":"PR", 47 * "Node8":"PR", 48 * "Node1":"PR", 49 * "RAT":"", 50 * "MOUSE":"" 51 * }, 52 * "partitioned":{ 53 * "PIG":"leaf", 54 * "COW":"leaf", 55 * "Node3":"internal", 56 * "HORSE":"leaf", 57 * "CAT":"leaf", 58 * "Node2":"internal", 59 * "RHMONKEY":"leaf", 60 * "BABOON":"leaf", 61 * "Node9":"internal", 62 * "HUMAN":"leaf", 63 * "CHIMP":"leaf", 64 * "Node12":"internal", 65 * "Node8":"internal", 66 * "Node1":"internal", 67 * "RAT":"leaf", 68 * "MOUSE":"leaf" 69 * }, 70 * "model_list":{ 71 * {"", "PR"} 72 * } 73 * } 74 * 75 */ 76 77 78/** 79 * Returns sanitized Newick tree string 80 * @name trees.GetTreeString._sanitize 81 * @private 82 * @param {String} string 83 * @returns {String} sanitized string 84 */ 85lfunction trees.GetTreeString._sanitize(string) { 86 87 if (utility.GetEnvVariable("_DO_TREE_REBALANCE_")) { 88 //console.log ("BEFORE REBALANCE\n"); 89 //console.log (string); 90 string = RerootTree(string, 0); 91 //console.log ("AFTER REBALANCE\n"); 92 //console.log (string); 93 } 94 95 if (utility.GetEnvVariable("_KEEP_I_LABELS_")) { 96 utility.ToggleEnvVariable("INTERNAL_NODE_PREFIX", "intNode"); 97 } 98 99 if (utility.GetEnvVariable("_KEEP_I_LABELS_")) { 100 utility.ToggleEnvVariable("INTERNAL_NODE_PREFIX", None); 101 } 102 103 104 105 return string; 106} 107 108/** 109 * Looks for a newick tree in an alignment file 110 * @name trees.GetTreeString 111 * @param {String|Bool} look_for_newick_tree - If a string, sanitizes and returns the string. 112 * If TRUE, search the alignment file for a newick tree. If FALSE, the user will be prompted for a nwk tree file. 113 * @returns {String} a newick tree string 114 */ 115lfunction trees.GetTreeString(look_for_newick_tree) { 116 117 118 119 UseModel(USE_NO_MODEL); 120 121 if (Type(look_for_newick_tree) == "String") { 122 treeString = trees.GetTreeString._sanitize(look_for_newick_tree); 123 } else { 124 if (look_for_newick_tree == FALSE) { 125 utility.SetEnvVariable("IS_TREE_PRESENT_IN_DATA", FALSE); 126 } 127 128 if (utility.GetEnvVariable("IS_TREE_PRESENT_IN_DATA")) { 129 treeString = trees.GetTreeString._sanitize(utility.GetEnvVariable("DATAFILE_TREE")); 130 utility.SetEnvVariable("IS_TREE_PRESENT_IN_DATA", TRUE); 131 } 132 133 if (!utility.GetEnvVariable("IS_TREE_PRESENT_IN_DATA")) { 134 SetDialogPrompt("Please select a tree file for the data:"); 135 utility.SetEnvVariable ("LAST_FILE_IO_EXCEPTION", None); 136 utility.ToggleEnvVariable ("SOFT_FILE_IO_EXCEPTIONS",TRUE); 137 fscanf(PROMPT_FOR_FILE, REWIND, "Raw", treeString); 138 139 look_for_newick_tree = utility.getGlobalValue ("LAST_FILE_PATH"); 140 141 if (None != utility.GetEnvVariable ("LAST_FILE_IO_EXCEPTION")) { 142 if (utility.getGlobalValue ("LAST_RAW_FILE_PROMPT") == utility.getGlobalValue ("terms.trees.neighbor_joining")) { 143 datafilter_name = utility.GetEnvVariable(utility.getGlobalValue ("terms.trees.data_for_neighbor_joining")); 144 assert (Type (datafilter_name) == "String", "Expected a string for the datafilter to build a NJ tree from"); 145 treeString = tree.infer.NJ (datafilter_name, None); 146 } else { 147 assert (0, utility.GetEnvVariable ("LAST_FILE_IO_EXCEPTION")); 148 } 149 utility.SetEnvVariable ("LAST_FILE_IO_EXCEPTION", None); 150 } 151 152 utility.ToggleEnvVariable ("SOFT_FILE_IO_EXCEPTIONS",None); 153 fprintf(stdout, "\n"); 154 155 156 if (regexp.Find(treeString, "^#NEXUS")) { 157 158 ExecuteCommands(treeString); 159 160 if (!utility.GetEnvVariable("IS_TREE_PRESENT_IN_DATA")) { 161 fprintf(stdout, "\n> **This NEXUS file doesn't contain a valid tree block**"); 162 return 1; 163 } 164 165 nftm = utility.GetEnvVariable("NEXUS_FILE_TREE_MATRIX"); 166 167 168 if (Rows(nftm) > 1) { 169 ChoiceList(treeChoice, "Select a tree", 1, SKIP_NONE, nftm); 170 if (treeChoice < 0) { 171 return 1; 172 } 173 treeString = nftm[treeChoice][1]; 174 } else { 175 treeString = nftm[0][1]; 176 } 177 } else { 178 start = (treeString $ "\\(")[0]; 179 if (start < 0) { 180 fprintf(stdout, "\n> **This doesn't seem to be a valid Newick string file**. Can't find the opening parenthesis. ``", treeString, "``\n"); 181 return 1; 182 } else { 183 parenCounter = 1; 184 current = start + 1; 185 while (current < Abs(treeString) && parenCounter) { 186 char = treeString[current]; 187 if (char == "(") { 188 parenCounter += 1; 189 } else { 190 if (char == ")") { 191 parenCounter += (-1); 192 } 193 } 194 current += 1; 195 } 196 197 if (parenCounter) { 198 fprintf(stdout, "\n> ** This doesn't seem to be a valid Newick string file**. Can't match the parentheses. \n``", treeString, "``\n"); 199 return 1; 200 } 201 202 treeString = treeString[start][current - 1]; 203 } 204 205 treeString = trees.GetTreeString._sanitize(treeString); 206 207 } 208 } 209 } 210 211 return 212 { 213 utility.getGlobalValue("terms.data.file"): look_for_newick_tree, 214 utility.getGlobalValue("terms.data.tree"): treeString 215 }; 216} 217 218 219///////// TODO: HOW TO UN-HARDCODE THIS FUNCTION? Regular strategies are not working. ////////////// 220/** 221 * Partitions a tree by assigning nodes to either being internal or leaf 222 * @name trees.PartitionTree 223 * @param {Dictionary} avl - an AVL representation of the tree to be partitioned 224 * @returns nothing 225 */ 226lfunction trees.PartitionTree(avl, l) { 227 for (k = 0; k < Abs(avl); k += 1) { 228 if ((avl[k])["Parent"]) { 229 if (Abs((avl[k])["Children"])) { 230 l[(avl[k])["Name"]] = "internal"; 231 } else { 232 l[(avl[k])["Name"]] = "leaf"; 233 } 234 } 235 } 236} 237 238/** 239 * Loads an annatotaed tree topology from a newick tree 240 * @name trees.LoadAnnotatedTopology 241 * @param {String|Bool} look_for_newick_tree - If a string, sanitizes and 242 * returns the string. If TRUE, search the alignment file for a newick tree. If 243 * FALSE, the user will be prompted for a nwk tree file. 244 * @returns {Dictionary} an annotated tree 245 */ 246lfunction trees.LoadAnnotatedTopology(look_for_newick_tree) { 247 return trees.ExtractTreeInfo(trees.GetTreeString(look_for_newick_tree)); 248} 249 250/** 251 * @name trees.LoadAnnotatedTopologyAndMap 252 * @param dataset_name 253 * @returns {Dictionary} an annotated tree 254 */ 255lfunction trees.LoadAnnotatedTopologyAndMap(look_for_newick_tree, mapping) { 256 257 258 reverse = {}; 259 260 for (k,v; in; mapping) { 261 reverse[v] = k; 262 } 263 264 //utility.ForEach(utility.Keys(mapping), "_key_", "`&reverse`[`&mapping`[_key_]] = _key_"); 265 266 io.CheckAssertion("Abs (`&mapping`) == Abs (`&reverse`)", "The mapping between original and normalized tree sequence names must be one to one"); 267 utility.ToggleEnvVariable("TREE_NODE_NAME_MAPPING", reverse); 268 result = trees.ExtractTreeInfo(trees.GetTreeString(look_for_newick_tree)); 269 utility.ToggleEnvVariable("TREE_NODE_NAME_MAPPING", None); 270 271 272 return result; 273} 274 275/** 276 * Loads a tree topology with node name label mappings and annotations from a list of partitions 277 * @name trees.LoadAnnotatedTreeTopology.match_partitions 278 * @param {Matrix} partitions - a 1xN vector of partition names, typically 279 * retrieved from the `partitions` key in the dictionary returned from an 280 * alignments parser function. 281 * @param {Dictionary} mapping - a mapping of node names to labels 282 * @returns {Dictionary} of matched partitions 283 * @example 284 * hky85_nucdata_info = alignments.ReadNucleotideAlignment(file_name, "hky85.nuc_data", "hky85.nuc_filter"); 285 * name_mapping = { 286 * "HUMAN":"HUMAN", 287 * "CHIMP":"CHIMP", 288 * "BABOON":"BABOON", 289 * "RHMONKEY":"RHMONKEY", 290 * "COW":"COW", 291 * "PIG":"PIG", 292 * "HORSE":"HORSE", 293 * "CAT":"CAT", 294 * "MOUSE":"MOUSE", 295 * "RAT":"RAT" 296 * }; 297 * trees.LoadAnnotatedTreeTopology.match_partitions(hky85_nucdata_info[terms.json.partitions], name_mapping); 298 * => 299 * { 300 * "0":{ 301 * "name":"default", 302 * "filter-string":"", 303 * "tree": * } 304 * } 305 */ 306lfunction trees.LoadAnnotatedTreeTopology.match_partitions(partitions, mapping) { 307 308 309 partition_count = Rows(partitions); 310 partrees = {}; 311 312 tree_matrix = utility.GetEnvVariable("NEXUS_FILE_TREE_MATRIX"); 313 314 if (Type(tree_matrix) == "Matrix") { 315 io.CheckAssertion("Rows(`&tree_matrix`) >= partition_count", "The number of trees in the NEXUS block cannot be smaller than the number of partitions in the file"); 316 for (i = 0; i < partition_count; i += 1) { 317 partrees + { 318 utility.getGlobalValue("terms.data.name"): partitions[i][0], 319 utility.getGlobalValue("terms.data.filter_string"): partitions[i][1], 320 utility.getGlobalValue("terms.data.tree"): trees.LoadAnnotatedTopologyAndMap(tree_matrix[i][1], mapping) 321 }; 322 } 323 } else { // no tree matrix; apply the same tree to all partitions 324 325 tree_info = trees.LoadAnnotatedTopologyAndMap(TRUE, mapping); 326 327 for (i = 0; i < partition_count; i += 1) { 328 partrees + { 329 utility.getGlobalValue("terms.data.name"): partitions[i][0], 330 utility.getGlobalValue("terms.data.filter_string"): partitions[i][1], 331 utility.getGlobalValue("terms.data.tree"): tree_info 332 }; 333 } 334 } 335 336 337 338 return partrees; 339 340} 341 342/** 343 * @name trees.branch_names 344 * @param tree 345 * @param respect_case 346 * @returns result 347 */ 348lfunction trees.branch_names(tree, respect_case) { 349 result = {}; 350 branch_names = BranchName(tree, -1); 351 branch_count = Columns(branch_names) - 1; 352 if (respect_case) { 353 for (k = 0; k < branch_count; k += 1) { 354 result + branch_names[k]; 355 } 356 } else { 357 for (k = 0; k < branch_count; k += 1) { 358 result + (branch_names[k] && 1); 359 } 360 361 } 362 return result; 363} 364 365 366/** 367 * @name trees.KillZeroBranches 368 * Given a tree dict and a list of matching parameter estimates 369 * this returns a modified tree dict with all zero-branch length 370 * internal branches removed and modifies in in place 371 * @param tree - dict of the tree object 372 * @param estimates - dict with branch length estimates 373 * @param branch_set - if not null, treat as test set and delete branches form it as well 374 * @param zero_internal -- store branches that were deleted here 375 * @return modified tree 376 */ 377 378lfunction trees.KillZeroBranches (tree, estimates, branch_set, zero_internal) { 379 380 for (branch, value; in; tree [^"terms.trees.partitioned"]) { 381 if (value == ^"terms.tree_attributes.internal") { 382 if (estimates / branch) { 383 if ((estimates[branch])[^"terms.fit.MLE"] < 1e-10) { 384 zero_internal + branch; 385 } 386 } else { 387 zero_internal + branch; 388 } 389 } 390 } 391 392 if (Abs (zero_internal) > 0) { // has zero internal branches 393 Topology T = tree[^"terms.trees.newick_annotated"]; 394 T - Columns(zero_internal); 395 if (None != branch_set) { 396 for (branch; in; zero_internal) { 397 branch_set - branch; 398 } 399 } 400 return trees.ExtractTreeInfoFromTopology (&T); 401 402 } 403 404 return tree; 405 406} 407 408/** 409 * @name trees.RootTree 410 * @param {Dict} tree_info 411 * @param {String} root on this node (or prompt if empty) 412 * @returns a {Dictionary} (same as ExtractTreeInfo) for the rerooted tree 413 */ 414 415lfunction trees.RootTree(tree_info, root_on) { 416 if (Type (root_on) != "String") { 417 root_on = io.SelectAnOption (tree_info[^"terms.trees.partitioned"], 418 "Select a root"); 419 } 420 421 422 io.CheckAssertion("`&tree_info`[^'terms.trees.partitioned']/`&root_on`", "Not a valid root choice '" + root_on + "'"); 423 424 Topology T = tree_info[^"terms.trees.newick_with_lengths"]; 425 426 utility.ToggleEnvVariable("ACCEPT_ROOTED_TREES", TRUE); 427 tree_info = trees.ExtractTreeInfo(RerootTree (T, root_on)); 428 utility.ToggleEnvVariable("ACCEPT_ROOTED_TREES", None); 429 430 return { 431 ^"terms.trees.root" : root_on, 432 ^"terms.data.tree" : tree_info 433 }; 434} 435 436/** 437 * @name trees.ExtractTreeInfoFromTopology 438 * @param {String} Topology 439 * @returns a {Dictionary} of the following tree information : 440 * - newick string 441 * - newick string with branch lengths 442 * - annotated string 443 * - model map 444 * - internal leaves 445 * - list of models 446 */ 447lfunction trees.ExtractTreeInfoFromTopology(topology_object) { 448 449 450 branch_lengths = BranchLength(^topology_object, -1); 451 branch_names = BranchName(^topology_object, -1); 452 453 branch_count = Max (2,utility.Array1D (branch_names) - 1); 454 455 456 bls = {}; 457 458 for (k = 0; k < branch_count; k+=1) { 459 if (branch_lengths[k] >= 0.) { 460 bls [branch_names[k]] = branch_lengths[k]; 461 } 462 } 463 464 465 GetInformation(modelMap, ^topology_object); 466 467 468 leaves_internals = {}; 469 flat_tree = (^topology_object) ^ 0; 470 trees.PartitionTree(flat_tree, leaves_internals); 471 472 utility.ToggleEnvVariable("INCLUDE_MODEL_SPECS", TRUE); 473 T.str = "" + ^topology_object; 474 utility.ToggleEnvVariable("INCLUDE_MODEL_SPECS", None); 475 476 rooted = utility.Array1D ((flat_tree[(flat_tree[0])["Root"]])["Children"]) == 2; 477 478 flat_tree = None; 479 branch_lengths = None; 480 branch_names = None; 481 branch_count = None; 482 483 return { 484 ^"terms.trees.newick": Format(^topology_object, 1, 0), 485 ^"terms.trees.newick_with_lengths": Format(^topology_object, 1, 1), 486 ^"terms.branch_length": bls, 487 ^"terms.trees.newick_annotated": T.str, 488 ^"terms.trees.model_map": modelMap, 489 ^"terms.trees.partitioned": leaves_internals, 490 ^"terms.trees.model_list": Columns(modelMap), 491 ^"terms.trees.rooted" : rooted, 492 ^"terms.trees.meta" : Eval(^(topology_object+".__meta")), 493 ^"terms.data.file" : file_name 494 495 }; 496} 497 498/** 499 * @name trees.ExtractTreeInfo 500 * @param {String} tree_string 501 * @returns a {Dictionary} of the following tree information : 502 * - newick string 503 * - newick string with branch lengths 504 * - annotated string 505 * - model map 506 * - internal leaves 507 * - list of models 508 */ 509lfunction trees.ExtractTreeInfo(tree_string) { 510 511 if (Type (tree_string) == "AssociativeList") { 512 file_name = tree_string[^"terms.data.file"]; 513 tree_string = tree_string[^"terms.data.tree"]; 514 } else { 515 file_name = None; 516 } 517 518 Topology T = tree_string; 519 return trees.ExtractTreeInfoFromTopology (&T); 520 521} 522 523/** 524 * @name trees.HasBranchLengths 525 * @param {Dictionary} tree information object (e.g. as returned by LoadAnnotatedTopology) 526 * @returns a {Boolean} to indicate whether the tree has a valid branch length array 527 */ 528 529lfunction trees.HasBranchLengths (tree_info) { 530 return utility.Array1D (tree_info [^"terms.trees.partitioned"]) == utility.Array1D (tree_info [^"terms.branch_length"]); 531} 532 533/** 534 * @name trees.BootstrapSupport 535 * @param {Dictionary} tree information object (e.g. as returned by LoadAnnotatedTopology) 536 * @returns a {Dictionary} (could be empty or partially filled) with "node name" -> bootstrap support 537 */ 538 539lfunction trees.BootstrapSupport (tree_info) { 540 return utility.Map (utility.Filter (tree_info[^"terms.trees.meta"], "_value_", "(_value_/'bootstrap')"), "_value_", 541 "_value_['bootstrap']"); 542} 543 544/** 545 * Gets total branch count of supplied tree 546 * @name trees.GetBranchCount 547 * @param {String} tree_string 548 * @returns {Number} total branch count 549 */ 550function trees.GetBranchCount(tree_string) { 551 Topology T = tree_string; 552 return BranchCount(T) + TipCount(T); 553} 554 555/** 556 * Sorts branch lengths in a descending order 557 * @name trees.SortedBranchLengths 558 * @param {String} tree_string 559 * @returns {Number} total branch count 560 */ 561 562lfunction trees.SortedBranchLengths(tree_string) { 563 564 tree_count = trees.GetBranchCount(tree_string); 565 Tree T = tree_string; 566 567 branch_lengths = BranchLength(T,-1); 568 569 sorted_bls = {}; 570 sorted_bls = {tree_count, 2}["branch_lengths[_MATRIX_ELEMENT_ROW_]*(_MATRIX_ELEMENT_COLUMN_==0)+_MATRIX_ELEMENT_ROW_*(_MATRIX_ELEMENT_COLUMN_==1)"]; 571 sorted_bls = sorted_bls%0; 572 return sorted_bls; 573 574} 575 576/** 577 * Return branch names 578 * @name trees.BranchNames 579 * @param {String} tree_string 580 * @returns {Matrix} 1xN sorted branch names 581 */ 582 583lfunction trees.BranchNames(tree) { 584 tree_string = tree[^"terms.trees.newick"]; 585 Topology T = tree_string; 586 branch_names = BranchName(T, -1); 587 return branch_names; 588} 589 590/** 591 * Compute branch labeling using parsimony 592 * @name trees.ParsimonyLabel 593 * @param {String} tree ID 594 * @param {Dict} leaf -> label 595 labels may be missing for some of the leaves to induce partial labeling of the tree 596 * @returns {Dict} {"score" : value, "labels" : Internal Branch -> label} 597 */ 598 599lfunction trees.ParsimonyLabel(tree_id, given_labels) { 600 tree_avl = (^tree_id) ^ 0; 601 label_values = utility.UniqueValues (given_labels); 602 label_count = utility.Array1D (label_values); 603 labels = {}; 604 scores = {}; // node name -> score of optimal labeling staring at this now given parent state 605 optimal_labeling = {}; // node name -> current node labeling which achieves this score 606 resulting_labels = {}; // internal nodes -> label 607 608 // pass 1 to fill in the score matrix 609 for (k = 0; k < Abs (tree_avl) ; k += 1) { 610 node_name = (tree_avl[k])["Name"]; 611 node_children = (tree_avl[k])["Children"]; 612 c_count = Abs (node_children); 613 if (c_count) { // internal node 614 // first check to see if all the children are labeled 615 616 c_names = {c_count , 1}; 617 618 for (c = 0; c < c_count; c+=1) { 619 c_names[c] = (tree_avl[node_children[c]])["Name"]; 620 if (scores / c_names[c] == FALSE) { 621 break; 622 } 623 624 } 625 if (c == c_count) { 626 627 scores [node_name] = {1, label_count}; 628 labels [node_name] = {1, label_count}; 629 630 631 for (parent_state = 0; parent_state < label_count; parent_state+=1) { 632 best_score = 1e100; 633 best_state = None; 634 for (my_state = 0; my_state < label_count; my_state+=1) { 635 my_score = 0; 636 for (c = 0; c < c_count; c+=1) { 637 my_score += (scores [c_names[c]])[my_state]; 638 } 639 if (my_state != parent_state) { 640 my_score += 1; 641 } 642 if (my_score < best_score) { 643 best_score = my_score; 644 best_state = my_state; 645 } 646 } 647 (scores [node_name])[parent_state] = best_score; 648 (labels [node_name])[parent_state] = best_state; 649 } 650 651 } 652 } else { 653 if (utility.Has (given_labels, node_name, "String")) { 654 node_label = given_labels[node_name]; 655 scores [node_name] = {1, label_count}; 656 labels [node_name] = {1, label_count}; 657 658 for (z = 0; z < label_count; z+=1) { 659 if (node_label == label_values[z]) { 660 break; 661 } 662 } 663 664 for (i = 0; i < label_count; i+=1) { 665 (scores [node_name]) [i] = 1 - (z == i); 666 (labels [node_name]) [i] = z; 667 } 668 } 669 } 670 } 671 672 673 // pass 2 to choose the best state for subtree parents 674 675 676 total_score = 0; 677 678 for (k = 0; k < Abs (tree_avl); k += 1) { 679 node_name = (tree_avl[k])["Name"]; 680 node_children = (tree_avl[k])["Children"]; 681 c_count = Abs (node_children); 682 if (c_count) { // internal node 683 parent = (tree_avl[k])["Parent"]; 684 if (parent > 0) { 685 if (utility.Has (scores, (tree_avl[parent])["Name"], "Matrix")) { 686 continue; 687 } 688 } 689 690 if (utility.Has (scores, node_name, "Matrix") == FALSE) { 691 continue; 692 } 693 694 best_score = 1e100; 695 best_label = None; 696 697 for (i = 0; i < label_count; i+=1) { 698 my_score = (scores [node_name])[i]; 699 700 if (my_score < best_score) { 701 best_score = my_score; 702 best_label = i; 703 } 704 } 705 706 total_score += best_score; 707 resulting_labels [node_name] = best_label; 708 } 709 } 710 711 tree_avl = (^tree_id) ^ 1; 712 for (k = 2; k < Abs (tree_avl); k += 1) { 713 node_name = (tree_avl[k])["Name"]; 714 if (utility.Array1D((tree_avl[k])["Children"])) { 715 parent = (tree_avl[k])["Parent"]; 716 if (utility.Has (resulting_labels, (tree_avl[parent])["Name"], "Number")) { 717 parent = resulting_labels[(tree_avl[parent])["Name"]]; 718 resulting_labels[node_name] = (labels [node_name])[parent]; 719 } 720 } 721 } 722 723 return {"score" : total_score, "labels" : utility.Map (resulting_labels, "_value_", "`&label_values`[_value_]")}; 724 // pass 1 to choose the best state for subtree parents 725} 726 727/** 728 * Compute branch labeling using conjunction, i.e. node N is labeled 'X' iff 729 * all of the nodes that are in the subtree rooted at 'N' are also labeled 'N' 730 * @name trees.ConjunctionLabel 731 * @param {String} tree ID 732 * @param {Dict} leaf -> label 733 labels may be missing for some of the leaves to induce partial labeling of the tree 734 * @returns {Dict} {"labeled" : # of labeled internal nodes, "labels" : Internal Branch -> label} 735 */ 736 737lfunction trees.ConjunctionLabel (tree_id, given_labels) { 738 tree_avl = (^tree_id) ^ 0; 739 label_values = utility.UniqueValues (given_labels); 740 label_count = utility.Array1D (label_values); 741 labels = {}; 742 resulting_labels = {}; // internal nodes -> label 743 inodes_labeled = 0; 744 745 // pass 1 to fill in the score matrix 746 for (k = 0; k < Abs (tree_avl) ; k += 1) { 747 node_name = (tree_avl[k])["Name"]; 748 node_children = (tree_avl[k])["Children"]; 749 c_count = utility.Array1D (node_children); 750 751 if (c_count) { // internal node 752 // first check to see if all the children are labeled 753 754 child_labels = {}; 755 756 for (c = 0; c < c_count; c+=1) { 757 c_name = (tree_avl[node_children[c]])["Name"]; 758 if (utility.Has (labels, c_name, "String") == FALSE) { 759 break; 760 } 761 child_labels [labels[c_name]] = TRUE; 762 763 } 764 if (c == c_count) { // all children labeled 765 inodes_labeled += 1; 766 if (utility.Array1D (child_labels) == 1) { 767 labels [node_name] = (utility.Keys (child_labels))[0]; 768 resulting_labels[node_name] = labels [node_name]; 769 } 770 } 771 } else { // leaf 772 if (utility.Has (given_labels, node_name, "String")) { 773 labels[node_name] = given_labels[node_name]; 774 } 775 } 776 } 777 778 779 780 return {"labeled" : inodes_labeled, "labels" : resulting_labels}; 781 // pass 1 to choose the best state for subtree parents 782} 783 784/** 785 * Compute branch labeling using conjunction, i.e. node N is labeled 'X' iff 786 * SOME of the nodes that are in the subtree rooted at 'N' are also labeled 'N' 787 * @name trees.ConjunctionLabel 788 * @param {String} tree ID 789 * @param {Dict} leaf -> label 790 labels may be missing for some of the leaves to induce partial labeling of the tree 791 * @returns {Dict} {"labeled" : # of labeled internal nodes, "labels" : Internal Branch -> label} 792 */ 793 794lfunction trees.DisjunctionLabel (tree_id, given_labels) { 795 tree_avl = (^tree_id) ^ 0; 796 label_values = utility.UniqueValues (given_labels); 797 label_count = utility.Array1D (label_values); 798 labels = {}; 799 resulting_labels = {}; // internal nodes -> label 800 inodes_labeled = 0; 801 802 // pass 1 to fill in the score matrix 803 for (k = 0; k < Abs (tree_avl) ; k += 1) { 804 node_name = (tree_avl[k])["Name"]; 805 node_children = (tree_avl[k])["Children"]; 806 c_count = utility.Array1D (node_children); 807 808 if (c_count) { // internal node 809 // first check to see if all the children are labeled 810 811 child_labels = {}; 812 813 for (c = 0; c < c_count; c+=1) { 814 c_name = (tree_avl[node_children[c]])["Name"]; 815 if (utility.Has (labels, c_name, "String") == FALSE) { 816 break; 817 } 818 child_labels [labels[c_name]] = TRUE; 819 820 } 821 if (c > 0) { // all children labeled 822 inodes_labeled += 1; 823 if (utility.Array1D (child_labels) == 1) { 824 labels [node_name] = (utility.Keys (child_labels))[0]; 825 resulting_labels[node_name] = labels [node_name]; 826 } 827 } 828 } else { // leaf 829 if (utility.Has (given_labels, node_name, "String")) { 830 labels[node_name] = given_labels[node_name]; 831 } 832 } 833 } 834 835 836 837 return {"labeled" : inodes_labeled, "labels" : resulting_labels}; 838 // pass 1 to choose the best state for subtree parents 839} 840 841/** 842 * Annotate a tree string with using user-specified labels 843 * @name trees.ParsimonyLabel 844 * @param {String} tree ID 845 * @param {Dict/String} node_name -> label OR (if string) (node_name) => name + annotation 846 * @param {String} a pair of characters to enclose the label in 847 * @param {Dict/String} node_name -> length OR (if string) (node_name) => length annotation 848 * @return {String} annotated string 849 */ 850 851lfunction tree.Annotate (tree_id, labels, chars, doLengths) { 852 theAVL = (^tree_id)^0; 853 _ost = ""; 854 _ost * 256; 855 856 lastLevel = 0; 857 treeSize = Abs(theAVL); 858 treeInfo = theAVL[0]; 859 rootIndex = treeInfo["Root"]; 860 lastDepth = 0; 861 862 for (nodeIndex = 1; nodeIndex < treeSize; nodeIndex += 1) { 863 nodeInfo = theAVL[nodeIndex]; 864 myDepth = nodeInfo["Depth"]; 865 if (lastDepth < myDepth) { 866 if (lastDepth) { 867 _ost * ","; 868 } 869 for (pidx = lastDepth; pidx < myDepth; pidx += 1) { 870 _ost * "("; 871 } 872 } else { 873 if (lastDepth > myDepth) { 874 for (pidx = myDepth; pidx < lastDepth; pidx += 1) { 875 _ost * ")"; 876 } 877 } else { 878 _ost * ","; 879 } 880 } 881 882 883 if (Type (labels) == "String") { 884 _ost * Call (labels, nodeInfo["Name"]); 885 } else { 886 _ost * nodeInfo["Name"]; 887 if (labels / nodeInfo["Name"]) { 888 if (Abs(labels[nodeInfo["Name"]])) { 889 _ost * (chars[0] + labels[nodeInfo["Name"]] + chars[1]); 890 } 891 } 892 } 893 894 if (doLengths ) { 895 896 if (nodeIndex < treeSize - 1) { 897 _ost * ":"; 898 if (Type (doLengths) == "String") { 899 _ost * Call (doLengths, nodeInfo); 900 } else { 901 _ost * (""+nodeInfo ["Length"]); 902 903 } 904 } 905 } 906 lastDepth = myDepth; 907 } 908 909 _ost * 0; 910 return _ost; 911} 912 913 914/** 915 * Generate a symmetric binary tree on N leaves (only perfectly symmetric if N is a power of 2) 916 * @name trees.GenerateSymmetricTree 917 * @param {Number} N : number of leavers 918 * @param {Bool} rooted : whether the tree is rooted 919 * @param {None/String} branch_name : if a string, then it is assumed to be a function with an integer argument (node index) that generates branch names 920 default is to use numeric names 921 * @param {None/String} branch_length : if a string, then it is assumed to be a function with no arguments that generates branch lengths 922 * @return {String} Newick tree string 923 */ 924 925lfunction tree.GenerateSymmetricTree (N, rooted, branch_name, branch_length) { 926 assert (N>=2, "Can't generate trees with fewer than 2 leaves"); 927 internal_nodes = N-2; 928 if (rooted) { 929 internal_nodes += 1; 930 } 931 932 total_nodes = N + internal_nodes; 933 flat_tree = {total_nodes, 4}["-1"]; 934 935 936 // each row represents the corresponding node (in its' index), [parent, child 1, child 2, child 3] record 937 // each node is represented with a [0, total_nodes-1] integer 938 // -1 means NULL 939 // for example ((1,2),(3,4)) will be represented as 940 /* 941 {4,-1,-1,-1} 942 {4,-1,-1,-1} 943 {5,-1,-1,-1} 944 {5,-1,-1,-1} 945 {6,0,1,-1} 946 {6,2,3,-1} 947 {-1,4,5,-1} 948 */ 949 950 current_parent_node = N; 951 current_child_node = 0; 952 953 954 while (current_parent_node < total_nodes) { 955 flat_tree[current_child_node][0] = current_parent_node; 956 flat_tree[current_child_node+1][0] = current_parent_node; 957 flat_tree[current_parent_node][1] = current_child_node; 958 if (current_child_node < total_nodes-2) { 959 flat_tree[current_parent_node][2] = current_child_node + 1; 960 } 961 current_parent_node += 1; 962 current_child_node += 2; 963 if (current_child_node == N-1) { 964 // if the number of leaves in not divisible for 2, we skip the unpaired leaf and attach it to the root 965 flat_tree[total_nodes-1][3-(rooted!=FALSE)] = N-1; 966 flat_tree[current_child_node][0] = total_nodes-1; 967 current_child_node += 1; 968 } 969 } 970 971 972 if (current_child_node < total_nodes - 1) { // rooted tree 973 /* 974 { 975 {4, -1, -1, -1} 976 {4, -1, -1, -1} 977 {5, -1, -1, -1} 978 {5, -1, -1, -1} 979 {-1, 0, 1, -1} 980 {-1, 2, 3, -1} 981 } 982 */ 983 if (flat_tree[total_nodes-1][3] < 0) { // doesn't already have the 3rd leaf attached 984 flat_tree[total_nodes-1][3] = total_nodes-2; 985 flat_tree[total_nodes-2][0] = total_nodes-1; 986 flat_tree[flat_tree[total_nodes-1][1]] = total_nodes-1; 987 flat_tree[flat_tree[total_nodes-1][2]] = total_nodes-1; 988 } 989 } 990 991 return tree._NewickFromMatrix (&flat_tree, total_nodes-1, branch_name, branch_length); 992 993} 994 995 996/** 997 * Generate a ladder tree on N leaves 998 * @name trees.GenerateSymmetricTree 999 * @param {Number} N : number of leavers 1000 * @param {Bool} rooted : whether the tree is rooted 1001 * @param {None/String} branch_name : if a string, then it is assumed to be a function with an integer argument (node index) that generates branch names 1002 default is to use numeric names 1003 * @param {None/String} branch_length : if a string, then it is assumed to be a function with no arguments that generates branch lengths 1004 * @return {String} Newick tree string 1005 */ 1006 1007lfunction tree.GenerateLadderTree (N, rooted, branch_name, branch_length) { 1008 assert (N>=2, "Can't generate trees with fewer than 2 leaves"); 1009 internal_nodes = N-2; 1010 if (rooted) { 1011 internal_nodes += 1; 1012 } 1013 1014 total_nodes = N + internal_nodes; 1015 flat_tree = {total_nodes, 4}["-1"]; 1016 1017 1018 current_parent_node = N; 1019 current_child_node = 0; 1020 1021 // create the first cherry 1022 1023 flat_tree[current_child_node][0] = current_parent_node; 1024 flat_tree[current_child_node+1][0] = current_parent_node; 1025 flat_tree[current_parent_node][1] = current_child_node; 1026 flat_tree[current_parent_node][2] = current_child_node + 1; 1027 current_child_node = 2; 1028 current_parent_node += 1; 1029 1030 1031 while (current_parent_node < total_nodes) { 1032 flat_tree[current_child_node][0] = current_parent_node; 1033 flat_tree[current_parent_node-1][0] = current_parent_node; 1034 flat_tree[current_parent_node][1] = current_parent_node-1; 1035 flat_tree[current_parent_node][2] = current_child_node; 1036 current_parent_node += 1; 1037 current_child_node += 1; 1038 1039 } 1040 1041 1042 if (current_child_node < N) { // unrooted tree 1043 flat_tree[total_nodes-1][3] = N-1; 1044 flat_tree[N-1][0] = total_nodes-1; 1045 } 1046 1047 return tree._NewickFromMatrix (&flat_tree, total_nodes-1, branch_name, branch_length); 1048 1049} 1050 1051/** 1052 * Generate a RANDOM tree on N leaves 1053 * @name trees.GenerateRandomTree 1054 * @param {Number} N : number of leavers 1055 * @param {Bool} rooted : whether the tree is rooted 1056 * @param {None/String} branch_name : if a string, then it is assumed to be a function with an integer argument (node index) that generates branch names 1057 default is to use numeric names 1058 * @param {None/String} branch_length : if a string, then it is assumed to be a function with no arguments that generates branch lengths 1059 * @return {String} Newick tree string 1060 */ 1061 1062 1063lfunction tree._GenerateRandomTreeDraw2 (nodes) { 1064 r = Rows (nodes); 1065 n = Abs (nodes); 1066 n1 = Random (0,n) $ 1; 1067 do { 1068 n2 = Random (0,n) $ 1; 1069 } while (n1 == n2); 1070 1071 1072 nodes - r[n1]; 1073 nodes - r[n2]; 1074 1075 return {"0" : +r[n1], "1" : +r[n2]}; 1076} 1077 1078 1079lfunction tree.GenerateRandomTree (N, rooted, branch_name, branch_length) { 1080 assert (N>=2, "Can't generate trees with fewer than 2 leaves"); 1081 internal_nodes = N-2; 1082 if (rooted) { 1083 internal_nodes += 1; 1084 } 1085 1086 total_nodes = N + internal_nodes; 1087 flat_tree = {total_nodes, 4}["-1"]; 1088 1089 1090 available_to_join = {}; 1091 for (k = 0; k < N; k+=1) { 1092 available_to_join[k] = TRUE; 1093 } 1094 1095 1096 current_parent_node = N; 1097 downto = 1 + (rooted == 0); 1098 1099 1100 while (Abs (available_to_join) > downto) { 1101 pair = tree._GenerateRandomTreeDraw2 (available_to_join); 1102 flat_tree[pair[0]][0] = current_parent_node; 1103 flat_tree[pair[1]][0] = current_parent_node; 1104 flat_tree[current_parent_node][1] = pair[0]; 1105 flat_tree[current_parent_node][2] = pair[1]; 1106 available_to_join[current_parent_node] = TRUE; 1107 current_parent_node += 1; 1108 } 1109 1110 if (!rooted) { // attach the last node to the root 1111 available_to_join - (total_nodes-1); 1112 leaf_index = +((Rows(available_to_join))[0]); 1113 flat_tree[leaf_index][0] = total_nodes-1; 1114 flat_tree[total_nodes-1][3] = leaf_index; 1115 1116 } 1117 1118 return tree._NewickFromMatrix (&flat_tree, total_nodes-1, branch_name, branch_length); 1119 1120} 1121 1122lfunction tree._NewickFromMatrix (flat_tree, index, branch_name, branch_length) { 1123 if ((^flat_tree)[index][0] >= 0) { // not a root 1124 if ((^flat_tree)[index][1] < 0) { // a leaf 1125 if (branch_name) { 1126 name = Call (branch_name, index); 1127 } else { 1128 name = "" + index; 1129 } 1130 if (branch_length) { 1131 return name + ":" + Call (branch_length); 1132 } 1133 return name; 1134 } else { 1135 if (branch_length) { 1136 bl := ":" + Call (branch_length); 1137 } else { 1138 bl := ""; 1139 } 1140 if (branch_name) { 1141 bn := Call (branch_name, index); 1142 } else { 1143 bn := ""; 1144 } 1145 1146 return "(" + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][1], branch_name, branch_length) + 1147 "," + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][2], branch_name, branch_length) + 1148 ")" + bn + bl; 1149 } 1150 } else { 1151 if ((^flat_tree)[index][3] < 0) { // 2 root children 1152 return "(" + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][1], branch_name, branch_length) + 1153 "," + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][2], branch_name, branch_length) + 1154 ")"; 1155 } else { 1156 return "(" + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][1], branch_name, branch_length) + 1157 "," + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][2], branch_name, branch_length) + 1158 "," + tree._NewickFromMatrix (flat_tree, (^flat_tree)[index][3], branch_name, branch_length) + 1159 ")"; 1160 } 1161 } 1162 return None; 1163} 1164 1165/** 1166 * Infer a neighbor joining tree from a data filter using a specific distance 1167 * @name trees.infer.NJ 1168 * @param {String} datafilter the ID of an existing datafilter 1169 * @param {null/String/Matrix}, 1170 null : use the default distance calculation appropriate for the datatype 1171 String : a callback which takes (filter_id, seq1, seq2) arguments and returns d(seq1,seq2) 1172 Matrix : a precomputed distance matrix (same order of rows/column as datafilter names) 1173 1174 * @return {String} inferred tree string 1175 */ 1176 1177lfunction tree.infer.NJ (datafilter, distances) { 1178 1179 flush_distances = FALSE; 1180 if (None == distances) { // use default distances 1181 type = alignments.FilterType (datafilter); 1182 if (type == ^"terms.nucleotide" || type == ^"terms.codon") { 1183 distances = distances.nucleotide.tn93 (datafilter, null, null); 1184 flush_distances = TRUE; 1185 } else { 1186 if (type == ^"terms.amino_acid" || type == ^"terms.binary") { 1187 distances = distances.p_distance (datafilter, null); 1188 flush_distances = TRUE; 1189 } else { 1190 io.ReportAnExecutionError ("No default distance available for filter type of `type`"); 1191 } 1192 } 1193 } 1194 1195 N = ^(datafilter + ".species"); 1196 assert (N == Rows (distances), "Incompatible dimensions for the distance matrix and datafilter"); 1197 1198 1199 if (N == 2) { 1200 d1 = distances[0][1]/2; 1201 treeNodes = {{0,1,d1__}, 1202 {1,1,d1__}, 1203 {2,0,0}}; 1204 cladesInfo = {{2,0}}; 1205 } 1206 else { 1207 if (N == 3) { 1208 d1 = (distances[0][1]+distances[0][2]-distances[1][2])/2; 1209 d2 = (distances[0][1]-distances[0][2]+distances[1][2])/2; 1210 d3 = (distances[1][2]+distances[0][2]-distances[0][1])/2; 1211 treeNodes = {{0,1,d1__}, 1212 {1,1,d2__}, 1213 {2,1,d3__} 1214 {3,0,0}}; 1215 1216 cladesInfo = {{3,0}}; 1217 } 1218 else { 1219 1220 njm = (distances > 0) >= N; 1221 1222 treeNodes = {2*N,3}; 1223 1224 for (i = 0; i < 2*N; i += 1) { 1225 treeNodes[i][0] = njm[i][0]; // node index; leaves in [0,N), internal nodes N and higher 1226 treeNodes[i][1] = njm[i][1]; // node depth relative to the root 1227 treeNodes[i][2] = njm[i][2]; // branch length 1228 } 1229 1230 njm = None; 1231 } 1232 } 1233 1234 if (flush_distances) { 1235 distances = None; 1236 } 1237 1238 return tree._matrix2string (treeNodes, N, alignments.GetSequenceNames (datafilter), TRUE); 1239 1240} 1241 1242/* ____________________________________________*/ 1243 1244/** 1245 * Convert a matrix representation of a tree into Newick 1246 * @name trees._matrix2string 1247 * @param {Matrix} matrix_form : Kx3 matrix; 1248 each row represents ? 1249 * @param {Number} N : number of leaves 1250 * @param {Matrix/Dict} names : leaf index -> name 1251 * @param {Bool} do_lengths : include branch lengths in the output 1252 1253 * @return {String} newick tree string 1254 */ 1255 1256lfunction tree._matrix2string (matrix_form, N, names, do_lengths) { 1257 1258 newick = ""; newick * 1024; 1259 1260 p = 0; // tree depth of previous node 1261 k = 0; // current row in matrix_form 1262 1263 m = matrix_form[0][1]; // current tree depth (0 == root) 1264 n = matrix_form[0][0]; // current node index 1265 1266 while (m) { 1267 if (m>p) { // going down in the tree 1268 if (p) { 1269 newick * ","; 1270 } 1271 for (j=p; j<m; j += 1 ) { 1272 newick * "("; 1273 } 1274 } else { // going up in the tree 1275 if (m<p) { 1276 for (j=m;j<p; j += 1) { 1277 newick * ")"; 1278 } 1279 } 1280 else { 1281 newick * ","; 1282 } 1283 } 1284 1285 if (n < N) { 1286 newick * names [n]; 1287 } 1288 1289 if (do_lengths) { 1290 newick * (":"+matrix_form[k][2]); 1291 } 1292 1293 k += 1; 1294 p = m; 1295 n = matrix_form[k][0]; 1296 m = matrix_form[k][1]; 1297 } 1298 1299 for (j=m;j<p; j += 1) { 1300 newick *")"; 1301 } 1302 1303 newick *0; 1304 return newick; 1305} 1306