1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.analysis; 22 23 import jalview.datamodel.AlignmentView; 24 import jalview.datamodel.BinaryNode; 25 import jalview.datamodel.NodeTransformI; 26 import jalview.datamodel.Sequence; 27 import jalview.datamodel.SequenceI; 28 import jalview.datamodel.SequenceNode; 29 import jalview.io.NewickFile; 30 31 import java.util.ArrayList; 32 import java.util.Enumeration; 33 import java.util.List; 34 import java.util.Vector; 35 36 /** 37 * A model of a tree, either computed by Jalview or loaded from a file or other 38 * resource or service 39 */ 40 public class TreeModel 41 { 42 43 SequenceI[] sequences; 44 45 /* 46 * SequenceData is a string representation of what the user 47 * sees. The display may contain hidden columns. 48 */ 49 private AlignmentView seqData; 50 51 int noseqs; 52 53 SequenceNode top; 54 55 double maxDistValue; 56 57 double maxheight; 58 59 int ycount; 60 61 Vector<SequenceNode> node; 62 63 boolean hasDistances = true; // normal case for jalview trees 64 65 boolean hasBootstrap = false; // normal case for jalview trees 66 67 private boolean hasRootDistance = true; 68 69 /** 70 * Create a new TreeModel object with leaves associated with sequences in 71 * seqs, and (optionally) original alignment data represented by Cigar strings 72 * 73 * @param seqs 74 * SequenceI[] 75 * @param odata 76 * Cigar[] 77 * @param treefile 78 * NewickFile 79 */ TreeModel(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)80 public TreeModel(SequenceI[] seqs, AlignmentView odata, 81 NewickFile treefile) 82 { 83 this(seqs, treefile.getTree(), treefile.HasDistances(), 84 treefile.HasBootstrap(), treefile.HasRootDistance()); 85 seqData = odata; 86 87 associateLeavesToSequences(seqs); 88 } 89 90 /** 91 * Constructor given a calculated tree 92 * 93 * @param tree 94 */ TreeModel(TreeBuilder tree)95 public TreeModel(TreeBuilder tree) 96 { 97 this(tree.getSequences(), tree.getTopNode(), tree.hasDistances(), 98 tree.hasBootstrap(), tree.hasRootDistance()); 99 seqData = tree.getOriginalData(); 100 } 101 102 /** 103 * Constructor given sequences, root node and tree property flags 104 * 105 * @param seqs 106 * @param root 107 * @param hasDist 108 * @param hasBoot 109 * @param hasRootDist 110 */ TreeModel(SequenceI[] seqs, SequenceNode root, boolean hasDist, boolean hasBoot, boolean hasRootDist)111 public TreeModel(SequenceI[] seqs, SequenceNode root, boolean hasDist, 112 boolean hasBoot, boolean hasRootDist) 113 { 114 this.sequences = seqs; 115 top = root; 116 117 hasDistances = hasDist; 118 hasBootstrap = hasBoot; 119 hasRootDistance = hasRootDist; 120 121 maxheight = findHeight(top); 122 } 123 124 /** 125 * @param seqs 126 */ associateLeavesToSequences(SequenceI[] seqs)127 public void associateLeavesToSequences(SequenceI[] seqs) 128 { 129 SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); 130 131 Vector<SequenceNode> leaves = findLeaves(top); 132 133 int i = 0; 134 int namesleft = seqs.length; 135 136 SequenceNode j; 137 SequenceI nam; 138 String realnam; 139 Vector<SequenceI> one2many = new Vector<SequenceI>(); 140 // int countOne2Many = 0; 141 while (i < leaves.size()) 142 { 143 j = leaves.elementAt(i++); 144 realnam = j.getName(); 145 nam = null; 146 147 if (namesleft > -1) 148 { 149 nam = algnIds.findIdMatch(realnam); 150 } 151 152 if (nam != null) 153 { 154 j.setElement(nam); 155 if (one2many.contains(nam)) 156 { 157 // countOne2Many++; 158 // if (jalview.bin.Cache.log.isDebugEnabled()) 159 // jalview.bin.Cache.log.debug("One 2 many relationship for 160 // "+nam.getName()); 161 } 162 else 163 { 164 one2many.addElement(nam); 165 namesleft--; 166 } 167 } 168 else 169 { 170 j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); 171 j.setPlaceholder(true); 172 } 173 } 174 // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { 175 // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment 176 // sequence ids (out of "+one2many.size()+" unique ids) linked to two or 177 // more leaves."); 178 // } 179 // one2many.clear(); 180 } 181 182 /** 183 * Generate a string representation of the Tree 184 * 185 * @return Newick File with all tree data available 186 */ print()187 public String print() 188 { 189 NewickFile fout = new NewickFile(getTopNode()); 190 191 return fout.print(hasBootstrap(), hasDistances(), hasRootDistance()); // output 192 // all 193 // data 194 // available 195 // for 196 // tree 197 } 198 199 /** 200 * 201 * used when the alignment associated to a tree has changed. 202 * 203 * @param list 204 * Sequence set to be associated with tree nodes 205 */ updatePlaceHolders(List<SequenceI> list)206 public void updatePlaceHolders(List<SequenceI> list) 207 { 208 Vector<SequenceNode> leaves = findLeaves(top); 209 210 int sz = leaves.size(); 211 SequenceIdMatcher seqmatcher = null; 212 int i = 0; 213 214 while (i < sz) 215 { 216 SequenceNode leaf = leaves.elementAt(i++); 217 218 if (list.contains(leaf.element())) 219 { 220 leaf.setPlaceholder(false); 221 } 222 else 223 { 224 if (seqmatcher == null) 225 { 226 // Only create this the first time we need it 227 SequenceI[] seqs = new SequenceI[list.size()]; 228 229 for (int j = 0; j < seqs.length; j++) 230 { 231 seqs[j] = list.get(j); 232 } 233 234 seqmatcher = new SequenceIdMatcher(seqs); 235 } 236 237 SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); 238 239 if (nam != null) 240 { 241 if (!leaf.isPlaceholder()) 242 { 243 // remapping the node to a new sequenceI - should remove any refs to 244 // old one. 245 // TODO - make many sequenceI to one leaf mappings possible! 246 // (JBPNote) 247 } 248 leaf.setPlaceholder(false); 249 leaf.setElement(nam); 250 } 251 else 252 { 253 if (!leaf.isPlaceholder()) 254 { 255 // Construct a new placeholder sequence object for this leaf 256 leaf.setElement( 257 new Sequence(leaf.getName(), "THISISAPLACEHLDER")); 258 } 259 leaf.setPlaceholder(true); 260 261 } 262 } 263 } 264 } 265 266 /** 267 * rename any nodes according to their associated sequence. This will modify 268 * the tree's metadata! (ie the original NewickFile or newly generated 269 * BinaryTree's label data) 270 */ renameAssociatedNodes()271 public void renameAssociatedNodes() 272 { 273 applyToNodes(new NodeTransformI() 274 { 275 276 @Override 277 public void transform(BinaryNode nd) 278 { 279 Object el = nd.element(); 280 if (el != null && el instanceof SequenceI) 281 { 282 nd.setName(((SequenceI) el).getName()); 283 } 284 } 285 }); 286 } 287 288 /** 289 * Search for leaf nodes below (or at) the given node 290 * 291 * @param nd 292 * root node to search from 293 * 294 * @return 295 */ findLeaves(SequenceNode nd)296 public Vector<SequenceNode> findLeaves(SequenceNode nd) 297 { 298 Vector<SequenceNode> leaves = new Vector<SequenceNode>(); 299 findLeaves(nd, leaves); 300 return leaves; 301 } 302 303 /** 304 * Search for leaf nodes. 305 * 306 * @param nd 307 * root node to search from 308 * @param leaves 309 * Vector of leaves to add leaf node objects too. 310 * 311 * @return Vector of leaf nodes on binary tree 312 */ findLeaves(SequenceNode nd, Vector<SequenceNode> leaves)313 Vector<SequenceNode> findLeaves(SequenceNode nd, 314 Vector<SequenceNode> leaves) 315 { 316 if (nd == null) 317 { 318 return leaves; 319 } 320 321 if ((nd.left() == null) && (nd.right() == null)) // Interior node 322 // detection 323 { 324 leaves.addElement(nd); 325 326 return leaves; 327 } 328 else 329 { 330 /* 331 * TODO: Identify internal nodes... if (node.isSequenceLabel()) { 332 * leaves.addElement(node); } 333 */ 334 findLeaves((SequenceNode) nd.left(), leaves); 335 findLeaves((SequenceNode) nd.right(), leaves); 336 } 337 338 return leaves; 339 } 340 341 /** 342 * printNode is mainly for debugging purposes. 343 * 344 * @param nd 345 * SequenceNode 346 */ printNode(SequenceNode nd)347 void printNode(SequenceNode nd) 348 { 349 if (nd == null) 350 { 351 return; 352 } 353 354 if ((nd.left() == null) && (nd.right() == null)) 355 { 356 System.out.println("Leaf = " + ((SequenceI) nd.element()).getName()); 357 System.out.println("Dist " + nd.dist); 358 System.out.println("Boot " + nd.getBootstrap()); 359 } 360 else 361 { 362 System.out.println("Dist " + nd.dist); 363 printNode((SequenceNode) nd.left()); 364 printNode((SequenceNode) nd.right()); 365 } 366 } 367 368 /** 369 * DOCUMENT ME! 370 * 371 * @return DOCUMENT ME! 372 */ getMaxHeight()373 public double getMaxHeight() 374 { 375 return maxheight; 376 } 377 378 /** 379 * Makes a list of groups, where each group is represented by a node whose 380 * height (distance from the root node), as a fraction of the height of the 381 * whole tree, is greater than the given threshold. This corresponds to 382 * selecting the nodes immediately to the right of a vertical line 383 * partitioning the tree (if the tree is drawn with root to the left). Each 384 * such node represents a group that contains all of the sequences linked to 385 * the child leaf nodes. 386 * 387 * @param threshold 388 * @see #getGroups() 389 */ groupNodes(float threshold)390 public List<SequenceNode> groupNodes(float threshold) 391 { 392 List<SequenceNode> groups = new ArrayList<SequenceNode>(); 393 _groupNodes(groups, getTopNode(), threshold); 394 return groups; 395 } 396 _groupNodes(List<SequenceNode> groups, SequenceNode nd, float threshold)397 protected void _groupNodes(List<SequenceNode> groups, SequenceNode nd, 398 float threshold) 399 { 400 if (nd == null) 401 { 402 return; 403 } 404 405 if ((nd.height / maxheight) > threshold) 406 { 407 groups.add(nd); 408 } 409 else 410 { 411 _groupNodes(groups, (SequenceNode) nd.left(), threshold); 412 _groupNodes(groups, (SequenceNode) nd.right(), threshold); 413 } 414 } 415 416 /** 417 * DOCUMENT ME! 418 * 419 * @param nd 420 * DOCUMENT ME! 421 * 422 * @return DOCUMENT ME! 423 */ findHeight(SequenceNode nd)424 public double findHeight(SequenceNode nd) 425 { 426 if (nd == null) 427 { 428 return maxheight; 429 } 430 431 if ((nd.left() == null) && (nd.right() == null)) 432 { 433 nd.height = ((SequenceNode) nd.parent()).height + nd.dist; 434 435 if (nd.height > maxheight) 436 { 437 return nd.height; 438 } 439 else 440 { 441 return maxheight; 442 } 443 } 444 else 445 { 446 if (nd.parent() != null) 447 { 448 nd.height = ((SequenceNode) nd.parent()).height + nd.dist; 449 } 450 else 451 { 452 maxheight = 0; 453 nd.height = (float) 0.0; 454 } 455 456 maxheight = findHeight((SequenceNode) (nd.left())); 457 maxheight = findHeight((SequenceNode) (nd.right())); 458 } 459 460 return maxheight; 461 } 462 463 /** 464 * DOCUMENT ME! 465 * 466 * @param nd 467 * DOCUMENT ME! 468 */ printN(SequenceNode nd)469 void printN(SequenceNode nd) 470 { 471 if (nd == null) 472 { 473 return; 474 } 475 476 if ((nd.left() != null) && (nd.right() != null)) 477 { 478 printN((SequenceNode) nd.left()); 479 printN((SequenceNode) nd.right()); 480 } 481 else 482 { 483 System.out.println(" name = " + ((SequenceI) nd.element()).getName()); 484 } 485 486 System.out.println( 487 " dist = " + nd.dist + " " + nd.count + " " + nd.height); 488 } 489 490 /** 491 * DOCUMENT ME! 492 * 493 * @param nd 494 * DOCUMENT ME! 495 */ reCount(SequenceNode nd)496 public void reCount(SequenceNode nd) 497 { 498 ycount = 0; 499 // _lycount = 0; 500 // _lylimit = this.node.size(); 501 _reCount(nd); 502 } 503 504 // private long _lycount = 0, _lylimit = 0; 505 506 /** 507 * DOCUMENT ME! 508 * 509 * @param nd 510 * DOCUMENT ME! 511 */ _reCount(SequenceNode nd)512 void _reCount(SequenceNode nd) 513 { 514 // if (_lycount<_lylimit) 515 // { 516 // System.err.println("Warning: depth of _recount greater than number of 517 // nodes."); 518 // } 519 if (nd == null) 520 { 521 return; 522 } 523 // _lycount++; 524 525 if ((nd.left() != null) && (nd.right() != null)) 526 { 527 528 _reCount((SequenceNode) nd.left()); 529 _reCount((SequenceNode) nd.right()); 530 531 SequenceNode l = (SequenceNode) nd.left(); 532 SequenceNode r = (SequenceNode) nd.right(); 533 534 nd.count = l.count + r.count; 535 nd.ycount = (l.ycount + r.ycount) / 2; 536 } 537 else 538 { 539 nd.count = 1; 540 nd.ycount = ycount++; 541 } 542 // _lycount--; 543 } 544 545 /** 546 * DOCUMENT ME! 547 * 548 * @param nd 549 * DOCUMENT ME! 550 */ swapNodes(SequenceNode nd)551 public void swapNodes(SequenceNode nd) 552 { 553 if (nd == null) 554 { 555 return; 556 } 557 558 SequenceNode tmp = (SequenceNode) nd.left(); 559 560 nd.setLeft(nd.right()); 561 nd.setRight(tmp); 562 } 563 564 /** 565 * DOCUMENT ME! 566 * 567 * @param nd 568 * DOCUMENT ME! 569 * @param dir 570 * DOCUMENT ME! 571 */ changeDirection(SequenceNode nd, SequenceNode dir)572 void changeDirection(SequenceNode nd, SequenceNode dir) 573 { 574 if (nd == null) 575 { 576 return; 577 } 578 579 if (nd.parent() != top) 580 { 581 changeDirection((SequenceNode) nd.parent(), nd); 582 583 SequenceNode tmp = (SequenceNode) nd.parent(); 584 585 if (dir == nd.left()) 586 { 587 nd.setParent(dir); 588 nd.setLeft(tmp); 589 } 590 else if (dir == nd.right()) 591 { 592 nd.setParent(dir); 593 nd.setRight(tmp); 594 } 595 } 596 else 597 { 598 if (dir == nd.left()) 599 { 600 nd.setParent(nd.left()); 601 602 if (top.left() == nd) 603 { 604 nd.setRight(top.right()); 605 } 606 else 607 { 608 nd.setRight(top.left()); 609 } 610 } 611 else 612 { 613 nd.setParent(nd.right()); 614 615 if (top.left() == nd) 616 { 617 nd.setLeft(top.right()); 618 } 619 else 620 { 621 nd.setLeft(top.left()); 622 } 623 } 624 } 625 } 626 627 /** 628 * DOCUMENT ME! 629 * 630 * @return DOCUMENT ME! 631 */ getTopNode()632 public SequenceNode getTopNode() 633 { 634 return top; 635 } 636 637 /** 638 * 639 * @return true if tree has real distances 640 */ hasDistances()641 public boolean hasDistances() 642 { 643 return hasDistances; 644 } 645 646 /** 647 * 648 * @return true if tree has real bootstrap values 649 */ hasBootstrap()650 public boolean hasBootstrap() 651 { 652 return hasBootstrap; 653 } 654 hasRootDistance()655 public boolean hasRootDistance() 656 { 657 return hasRootDistance; 658 } 659 660 /** 661 * apply the given transform to all the nodes in the tree. 662 * 663 * @param nodeTransformI 664 */ applyToNodes(NodeTransformI nodeTransformI)665 public void applyToNodes(NodeTransformI nodeTransformI) 666 { 667 for (Enumeration<SequenceNode> nodes = node.elements(); nodes 668 .hasMoreElements(); nodeTransformI 669 .transform(nodes.nextElement())) 670 { 671 ; 672 } 673 } 674 getOriginalData()675 public AlignmentView getOriginalData() 676 { 677 return seqData; 678 } 679 } 680