1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 // NewickFile.java 22 // Tree I/O 23 // http://evolution.genetics.washington.edu/phylip/newick_doc.html 24 // TODO: Implement Basic NHX tag parsing and preservation 25 // TODO: http://evolution.genetics.wustl.edu/eddy/forester/NHX.html 26 // TODO: Extended SequenceNodeI to hold parsed NHX strings 27 package jalview.io; 28 29 import jalview.datamodel.SequenceNode; 30 import jalview.util.MessageManager; 31 32 import java.io.BufferedReader; 33 import java.io.File; 34 import java.io.FileReader; 35 import java.io.IOException; 36 import java.util.StringTokenizer; 37 38 /** 39 * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the 40 * tree distances and topology are unreliable when they are parsed. TODO: on 41 * this: NHX codes are appended in comments beginning with &&NHX. The codes are 42 * given below (from http://www.phylosoft.org/forester/NHX.html): Element Type 43 * Description Corresponding phyloXML element (parent element in parentheses) no 44 * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) 45 * <name>(<clade>) : decimal branch length to parent node (MUST BE SECOND, IF 46 * ASSIGNED) <branch_length>(<clade>) :GN= string gene name <name>(<sequence>) 47 * :AC= string sequence accession <accession>(<sequence>) :ND= string node 48 * identifier - if this is being used, it has to be unique within each phylogeny 49 * <node_id>(<clade>) :B= decimal confidence value for parent branch 50 * <confidence>(<clade>) :D= 'T', 'F', or '?' 'T' if this node represents a 51 * duplication event - 'F' if this node represents a speciation event, '?' if 52 * this node represents an unknown event (D= tag should be replaced by Ev= tag) 53 * n/a :Ev=duplications>speciations>gene losses>event type>duplication type int 54 * int int string string event (replaces the =D tag), number of duplication, 55 * speciation, and gene loss events, type of event (transfer, fusion, root, 56 * unknown, other, speciation_duplication_loss, unassigned) <events>(<clade>) 57 * :E= string EC number at this node <annotation>(<sequence>) :Fu= string 58 * function at this node <annotation>(<sequence>) 59 * :DS=protein-length>from>to>support>name>from>... int int int double string 60 * int ... domain structure at this node <domain_architecture>(<sequence>) :S= 61 * string species name of the species/phylum at this node <taxonomy>(<clade>) 62 * :T= integer taxonomy ID of the species/phylum at this node <id>(<taxonomy>) 63 * :W= integer width of parent branch <width>(<clade>) :C=rrr.ggg.bbb 64 * integer.integer.integer color of parent branch <color>(<clade>) :Co= 'Y' or 65 * 'N' collapse this node when drawing the tree (default is not to collapse) n/a 66 * :XB= string custom data associated with a branch <property>(<clade>) :XN= 67 * string custom data associated with a node <property>(<clade>) :O= integer 68 * orthologous to this external node n/a :SN= integer subtree neighbors n/a :SO= 69 * integer super orthologous (no duplications on paths) to this external node 70 * n/a 71 * 72 * @author Jim Procter 73 * @version $Revision$ 74 */ 75 public class NewickFile extends FileParse 76 { 77 SequenceNode root; 78 79 private boolean HasBootstrap = false; 80 81 private boolean HasDistances = false; 82 83 private boolean RootHasDistance = false; 84 85 // File IO Flags 86 boolean ReplaceUnderscores = false; 87 88 boolean printRootInfo = true; 89 90 private com.stevesoft.pat.Regex[] NodeSafeName = new com.stevesoft.pat.Regex[] { 91 new com.stevesoft.pat.Regex().perlCode("m/[\\[,:'()]/"), // test for 92 // requiring 93 // quotes 94 new com.stevesoft.pat.Regex().perlCode("s/'/''/"), // escaping quote 95 // characters 96 new com.stevesoft.pat.Regex().perlCode("s/\\/w/_/") // unqoted whitespace 97 // transformation 98 }; 99 100 char QuoteChar = '\''; 101 102 /** 103 * Creates a new NewickFile object. 104 * 105 * @param inStr 106 * DOCUMENT ME! 107 * 108 * @throws IOException 109 * DOCUMENT ME! 110 */ NewickFile(String inStr)111 public NewickFile(String inStr) throws IOException 112 { 113 super(inStr, DataSourceType.PASTE); 114 } 115 116 /** 117 * Creates a new NewickFile object. 118 * 119 * @param inFile 120 * DOCUMENT ME! 121 * @param protocol 122 * DOCUMENT ME! 123 * 124 * @throws IOException 125 * DOCUMENT ME! 126 */ NewickFile(String inFile, DataSourceType protocol)127 public NewickFile(String inFile, DataSourceType protocol) 128 throws IOException 129 { 130 super(inFile, protocol); 131 } 132 NewickFile(FileParse source)133 public NewickFile(FileParse source) throws IOException 134 { 135 super(source); 136 } 137 138 /** 139 * Creates a new NewickFile object. 140 * 141 * @param newtree 142 * DOCUMENT ME! 143 */ NewickFile(SequenceNode newtree)144 public NewickFile(SequenceNode newtree) 145 { 146 root = newtree; 147 } 148 149 /** 150 * Creates a new NewickFile object. 151 * 152 * @param newtree 153 * DOCUMENT ME! 154 * @param bootstrap 155 * DOCUMENT ME! 156 */ NewickFile(SequenceNode newtree, boolean bootstrap)157 public NewickFile(SequenceNode newtree, boolean bootstrap) 158 { 159 HasBootstrap = bootstrap; 160 root = newtree; 161 } 162 163 /** 164 * Creates a new NewickFile object. 165 * 166 * @param newtree 167 * DOCUMENT ME! 168 * @param bootstrap 169 * DOCUMENT ME! 170 * @param distances 171 * DOCUMENT ME! 172 */ NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances)173 public NewickFile(SequenceNode newtree, boolean bootstrap, 174 boolean distances) 175 { 176 root = newtree; 177 HasBootstrap = bootstrap; 178 HasDistances = distances; 179 } 180 181 /** 182 * Creates a new NewickFile object. 183 * 184 * @param newtree 185 * DOCUMENT ME! 186 * @param bootstrap 187 * DOCUMENT ME! 188 * @param distances 189 * DOCUMENT ME! 190 * @param rootdistance 191 * DOCUMENT ME! 192 */ NewickFile(SequenceNode newtree, boolean bootstrap, boolean distances, boolean rootdistance)193 public NewickFile(SequenceNode newtree, boolean bootstrap, 194 boolean distances, boolean rootdistance) 195 { 196 root = newtree; 197 HasBootstrap = bootstrap; 198 HasDistances = distances; 199 RootHasDistance = rootdistance; 200 } 201 202 /** 203 * DOCUMENT ME! 204 * 205 * @param Error 206 * DOCUMENT ME! 207 * @param Er 208 * DOCUMENT ME! 209 * @param r 210 * DOCUMENT ME! 211 * @param p 212 * DOCUMENT ME! 213 * @param s 214 * DOCUMENT ME! 215 * 216 * @return DOCUMENT ME! 217 */ ErrorStringrange(String Error, String Er, int r, int p, String s)218 private String ErrorStringrange(String Error, String Er, int r, int p, 219 String s) 220 { 221 return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( " 222 + s.substring(((p - r) < 0) ? 0 : (p - r), 223 ((p + r) > s.length()) ? s.length() : (p + r)) 224 + " )\n"; 225 } 226 227 // @tree annotations 228 // These are set automatically by the reader HasBootstrap()229 public boolean HasBootstrap() 230 { 231 return HasBootstrap; 232 } 233 234 /** 235 * DOCUMENT ME! 236 * 237 * @return DOCUMENT ME! 238 */ HasDistances()239 public boolean HasDistances() 240 { 241 return HasDistances; 242 } 243 HasRootDistance()244 public boolean HasRootDistance() 245 { 246 return RootHasDistance; 247 } 248 249 /** 250 * parse the filesource as a newick file (new hampshire and/or extended) 251 * 252 * @throws IOException 253 * with a line number and character position for badly formatted NH 254 * strings 255 */ parse()256 public void parse() throws IOException 257 { 258 String nf; 259 260 { // fill nf with complete tree file 261 262 StringBuffer file = new StringBuffer(); 263 264 while ((nf = nextLine()) != null) 265 { 266 file.append(nf); 267 } 268 269 nf = file.toString(); 270 } 271 272 root = new SequenceNode(); 273 274 SequenceNode realroot = null; 275 SequenceNode c = root; 276 277 int d = -1; 278 int cp = 0; 279 // int flen = nf.length(); 280 281 String Error = null; 282 String nodename = null; 283 String commentString2 = null; // comments after simple node props 284 285 float DefDistance = (float) 0.001; // @param Default distance for a node - 286 // very very small 287 int DefBootstrap = -1; // @param Default bootstrap for a node 288 289 float distance = DefDistance; 290 int bootstrap = DefBootstrap; 291 292 boolean ascending = false; // flag indicating that we are leaving the 293 // current node 294 295 com.stevesoft.pat.Regex majorsyms = new com.stevesoft.pat.Regex( 296 "[(\\['),;]"); 297 298 int nextcp = 0; 299 int ncp = cp; 300 boolean parsednodename = false; 301 while (majorsyms.searchFrom(nf, cp) && (Error == null)) 302 { 303 int fcp = majorsyms.matchedFrom(); 304 char schar; 305 switch (schar = nf.charAt(fcp)) 306 { 307 case '(': 308 309 // ascending should not be set 310 // New Internal node 311 if (ascending) 312 { 313 Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf); 314 315 continue; 316 } 317 318 ; 319 d++; 320 321 if (c.right() == null) 322 { 323 c.setRight(new SequenceNode(null, c, null, DefDistance, 324 DefBootstrap, false)); 325 c = (SequenceNode) c.right(); 326 } 327 else 328 { 329 if (c.left() != null) 330 { 331 // Dummy node for polytomy - keeps c.left free for new node 332 SequenceNode tmpn = new SequenceNode(null, c, null, 0, 0, true); 333 tmpn.SetChildren(c.left(), c.right()); 334 c.setRight(tmpn); 335 } 336 337 c.setLeft(new SequenceNode(null, c, null, DefDistance, 338 DefBootstrap, false)); 339 c = (SequenceNode) c.left(); 340 } 341 342 if (realroot == null) 343 { 344 realroot = c; 345 } 346 347 nodename = null; 348 distance = DefDistance; 349 bootstrap = DefBootstrap; 350 cp = fcp + 1; 351 352 break; 353 354 // Deal with quoted fields 355 case '\'': 356 357 com.stevesoft.pat.Regex qnodename = new com.stevesoft.pat.Regex( 358 "'([^']|'')+'"); 359 360 if (qnodename.searchFrom(nf, fcp)) 361 { 362 int nl = qnodename.stringMatched().length(); 363 nodename = new String( 364 qnodename.stringMatched().substring(1, nl - 1)); 365 // unpack any escaped colons 366 com.stevesoft.pat.Regex xpandquotes = com.stevesoft.pat.Regex 367 .perlCode("s/''/'/"); 368 String widernodename = xpandquotes.replaceAll(nodename); 369 nodename = widernodename; 370 // jump to after end of quoted nodename 371 nextcp = fcp + nl + 1; 372 parsednodename = true; 373 } 374 else 375 { 376 Error = ErrorStringrange(Error, 377 "Unterminated quotes for nodename", 7, fcp, nf); 378 } 379 380 break; 381 382 default: 383 if (schar == ';') 384 { 385 if (d != -1) 386 { 387 Error = ErrorStringrange(Error, 388 "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); 389 } 390 // cp advanced at the end of default 391 } 392 if (schar == '[') 393 { 394 // node string contains Comment or structured/extended NH format info 395 /* 396 * if ((fcp-cp>1 && nf.substring(cp,fcp).trim().length()>1)) { // will 397 * process in remains System.err.println("skipped text: 398 * '"+nf.substring(cp,fcp)+"'"); } 399 */ 400 // verify termination. 401 com.stevesoft.pat.Regex comment = new com.stevesoft.pat.Regex( 402 "]"); 403 if (comment.searchFrom(nf, fcp)) 404 { 405 // Skip the comment field 406 nextcp = comment.matchedFrom() + 1; 407 warningMessage = "Tree file contained comments which may confuse input algorithm."; 408 break; 409 410 // cp advanced at the end of default to nextcp, ncp is unchanged so 411 // any node info can be read. 412 } 413 else 414 { 415 Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, 416 nf); 417 } 418 419 ; 420 } 421 // Parse simpler field strings 422 String fstring = nf.substring(ncp, fcp); 423 // remove any comments before we parse the node info 424 // TODO: test newick file with quoted square brackets in node name (is 425 // this allowed?) 426 while (fstring.indexOf(']') > -1) 427 { 428 int cstart = fstring.indexOf('['); 429 int cend = fstring.indexOf(']'); 430 commentString2 = fstring.substring(cstart + 1, cend); 431 fstring = fstring.substring(0, cstart) 432 + fstring.substring(cend + 1); 433 434 } 435 com.stevesoft.pat.Regex uqnodename = new com.stevesoft.pat.Regex( 436 "\\b([^' :;\\](),]+)"); 437 com.stevesoft.pat.Regex nbootstrap = new com.stevesoft.pat.Regex( 438 "\\s*([0-9+]+)\\s*:"); 439 com.stevesoft.pat.Regex ndist = new com.stevesoft.pat.Regex( 440 ":([-0-9Ee.+]+)"); 441 442 if (!parsednodename && uqnodename.search(fstring) 443 && ((uqnodename.matchedFrom(1) == 0) || (fstring 444 .charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote 445 // HACK! 446 { 447 if (nodename == null) 448 { 449 if (ReplaceUnderscores) 450 { 451 nodename = uqnodename.stringMatched(1).replace('_', ' '); 452 } 453 else 454 { 455 nodename = uqnodename.stringMatched(1); 456 } 457 } 458 else 459 { 460 Error = ErrorStringrange(Error, 461 "File has broken algorithm - overwritten nodename", 10, 462 fcp, nf); 463 } 464 } 465 // get comment bootstraps 466 467 if (nbootstrap.search(fstring)) 468 { 469 if (nbootstrap.stringMatched(1) 470 .equals(uqnodename.stringMatched(1))) 471 { 472 nodename = null; // no nodename here. 473 } 474 if (nodename == null || nodename.length() == 0 475 || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) 476 + uqnodename.stringMatched().length())) 477 { 478 try 479 { 480 bootstrap = (Integer.valueOf(nbootstrap.stringMatched(1))) 481 .intValue(); 482 HasBootstrap = true; 483 } catch (Exception e) 484 { 485 Error = ErrorStringrange(Error, "Can't parse bootstrap value", 486 4, ncp + nbootstrap.matchedFrom(), nf); 487 } 488 } 489 } 490 491 boolean nodehasdistance = false; 492 493 if (ndist.search(fstring)) 494 { 495 try 496 { 497 distance = (Float.valueOf(ndist.stringMatched(1))).floatValue(); 498 HasDistances = true; 499 nodehasdistance = true; 500 } catch (Exception e) 501 { 502 Error = ErrorStringrange(Error, 503 "Can't parse node distance value", 7, 504 ncp + ndist.matchedFrom(), nf); 505 } 506 } 507 508 if (ascending) 509 { 510 // Write node info here 511 c.setName(nodename); 512 // Trees without distances still need a render distance 513 c.dist = (HasDistances) ? distance : DefDistance; 514 // be consistent for internal bootstrap defaults too 515 c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap); 516 if (c == realroot) 517 { 518 RootHasDistance = nodehasdistance; // JBPNote This is really 519 // UGLY!!! Ensure root node gets 520 // its given distance 521 } 522 parseNHXNodeProps(c, commentString2); 523 commentString2 = null; 524 } 525 else 526 { 527 // Find a place to put the leaf 528 SequenceNode newnode = new SequenceNode(null, c, nodename, 529 (HasDistances) ? distance : DefDistance, 530 (HasBootstrap) ? bootstrap : DefBootstrap, false); 531 parseNHXNodeProps(c, commentString2); 532 commentString2 = null; 533 534 if (c.right() == null) 535 { 536 c.setRight(newnode); 537 } 538 else 539 { 540 if (c.left() == null) 541 { 542 c.setLeft(newnode); 543 } 544 else 545 { 546 // Insert a dummy node for polytomy 547 // dummy nodes have distances 548 SequenceNode newdummy = new SequenceNode(null, c, null, 549 (HasDistances ? 0 : DefDistance), 0, true); 550 newdummy.SetChildren(c.left(), newnode); 551 c.setLeft(newdummy); 552 } 553 } 554 } 555 556 if (ascending) 557 { 558 // move back up the tree from preceding closure 559 c = c.AscendTree(); 560 561 if ((d > -1) && (c == null)) 562 { 563 Error = ErrorStringrange(Error, 564 "File broke algorithm: Lost place in tree (is there an extra ')' ?)", 565 7, fcp, nf); 566 } 567 } 568 569 if (nf.charAt(fcp) == ')') 570 { 571 d--; 572 ascending = true; 573 } 574 else 575 { 576 if (nf.charAt(fcp) == ',') 577 { 578 if (ascending) 579 { 580 ascending = false; 581 } 582 else 583 { 584 // Just advance focus, if we need to 585 if ((c.left() != null) && (!c.left().isLeaf())) 586 { 587 c = (SequenceNode) c.left(); 588 } 589 } 590 } 591 } 592 593 // Reset new node properties to obvious fakes 594 nodename = null; 595 distance = DefDistance; 596 bootstrap = DefBootstrap; 597 commentString2 = null; 598 parsednodename = false; 599 } 600 if (nextcp == 0) 601 { 602 ncp = cp = fcp + 1; 603 } 604 else 605 { 606 cp = nextcp; 607 nextcp = 0; 608 } 609 } 610 611 if (Error != null) 612 { 613 throw (new IOException( 614 MessageManager.formatMessage("exception.newfile", new String[] 615 { Error.toString() }))); 616 } 617 if (root == null) 618 { 619 throw (new IOException( 620 MessageManager.formatMessage("exception.newfile", new String[] 621 { MessageManager.getString("label.no_tree_read_in") }))); 622 } 623 // THe next line is failing for topali trees - not sure why yet. if 624 // (root.right()!=null && root.isDummy()) 625 root = (SequenceNode) root.right().detach(); // remove the imaginary root. 626 627 if (!RootHasDistance) 628 { 629 root.dist = (HasDistances) ? 0 : DefDistance; 630 } 631 } 632 633 /** 634 * parse NHX codes in comment strings and update NewickFile state flags for 635 * distances and bootstraps, and add any additional properties onto the node. 636 * 637 * @param c 638 * @param commentString 639 * @param commentString2 640 */ parseNHXNodeProps(SequenceNode c, String commentString)641 private void parseNHXNodeProps(SequenceNode c, String commentString) 642 { 643 // TODO: store raw comment on the sequenceNode so it can be recovered when 644 // tree is output 645 if (commentString != null && commentString.startsWith("&&NHX")) 646 { 647 StringTokenizer st = new StringTokenizer(commentString.substring(5), 648 ":"); 649 while (st.hasMoreTokens()) 650 { 651 String tok = st.nextToken(); 652 int colpos = tok.indexOf("="); 653 654 if (colpos > -1) 655 { 656 String code = tok.substring(0, colpos); 657 String value = tok.substring(colpos + 1); 658 try 659 { 660 // parse out code/value pairs 661 if (code.toLowerCase().equals("b")) 662 { 663 int v = -1; 664 Float iv = Float.valueOf(value); 665 v = iv.intValue(); // jalview only does integer bootstraps 666 // currently 667 c.setBootstrap(v); 668 HasBootstrap = true; 669 } 670 // more codes here. 671 } catch (Exception e) 672 { 673 System.err.println( 674 "Couldn't parse code '" + code + "' = '" + value + "'"); 675 e.printStackTrace(System.err); 676 } 677 } 678 } 679 } 680 681 } 682 683 /** 684 * DOCUMENT ME! 685 * 686 * @return DOCUMENT ME! 687 */ getTree()688 public SequenceNode getTree() 689 { 690 return root; 691 } 692 693 /** 694 * Generate a newick format tree according to internal flags for bootstraps, 695 * distances and root distances. 696 * 697 * @return new hampshire tree in a single line 698 */ print()699 public String print() 700 { 701 synchronized (this) 702 { 703 StringBuffer tf = new StringBuffer(); 704 print(tf, root); 705 706 return (tf.append(";").toString()); 707 } 708 } 709 710 /** 711 * 712 * 713 * Generate a newick format tree according to internal flags for distances and 714 * root distances and user specificied writing of bootstraps. 715 * 716 * @param withbootstraps 717 * controls if bootstrap values are explicitly written. 718 * 719 * @return new hampshire tree in a single line 720 */ print(boolean withbootstraps)721 public String print(boolean withbootstraps) 722 { 723 synchronized (this) 724 { 725 boolean boots = this.HasBootstrap; 726 this.HasBootstrap = withbootstraps; 727 728 String rv = print(); 729 this.HasBootstrap = boots; 730 731 return rv; 732 } 733 } 734 735 /** 736 * 737 * Generate newick format tree according to internal flags for writing root 738 * node distances. 739 * 740 * @param withbootstraps 741 * explicitly write bootstrap values 742 * @param withdists 743 * explicitly write distances 744 * 745 * @return new hampshire tree in a single line 746 */ print(boolean withbootstraps, boolean withdists)747 public String print(boolean withbootstraps, boolean withdists) 748 { 749 synchronized (this) 750 { 751 boolean dists = this.HasDistances; 752 this.HasDistances = withdists; 753 754 String rv = print(withbootstraps); 755 this.HasDistances = dists; 756 757 return rv; 758 } 759 } 760 761 /** 762 * Generate newick format tree according to user specified flags 763 * 764 * @param withbootstraps 765 * explicitly write bootstrap values 766 * @param withdists 767 * explicitly write distances 768 * @param printRootInfo 769 * explicitly write root distance 770 * 771 * @return new hampshire tree in a single line 772 */ print(boolean withbootstraps, boolean withdists, boolean printRootInfo)773 public String print(boolean withbootstraps, boolean withdists, 774 boolean printRootInfo) 775 { 776 synchronized (this) 777 { 778 boolean rootinfo = printRootInfo; 779 this.printRootInfo = printRootInfo; 780 781 String rv = print(withbootstraps, withdists); 782 this.printRootInfo = rootinfo; 783 784 return rv; 785 } 786 } 787 788 /** 789 * DOCUMENT ME! 790 * 791 * @return DOCUMENT ME! 792 */ getQuoteChar()793 char getQuoteChar() 794 { 795 return QuoteChar; 796 } 797 798 /** 799 * DOCUMENT ME! 800 * 801 * @param c 802 * DOCUMENT ME! 803 * 804 * @return DOCUMENT ME! 805 */ setQuoteChar(char c)806 char setQuoteChar(char c) 807 { 808 char old = QuoteChar; 809 QuoteChar = c; 810 811 return old; 812 } 813 814 /** 815 * DOCUMENT ME! 816 * 817 * @param name 818 * DOCUMENT ME! 819 * 820 * @return DOCUMENT ME! 821 */ nodeName(String name)822 private String nodeName(String name) 823 { 824 if (NodeSafeName[0].search(name)) 825 { 826 return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; 827 } 828 else 829 { 830 return NodeSafeName[2].replaceAll(name); 831 } 832 } 833 834 /** 835 * DOCUMENT ME! 836 * 837 * @param c 838 * DOCUMENT ME! 839 * 840 * @return DOCUMENT ME! 841 */ printNodeField(SequenceNode c)842 private String printNodeField(SequenceNode c) 843 { 844 return ((c.getName() == null) ? "" : nodeName(c.getName())) 845 + ((HasBootstrap) ? ((c.getBootstrap() > -1) 846 ? ((c.getName() != null ? " " : "") + c.getBootstrap()) 847 : "") : "") 848 + ((HasDistances) ? (":" + c.dist) : ""); 849 } 850 851 /** 852 * DOCUMENT ME! 853 * 854 * @param root 855 * DOCUMENT ME! 856 * 857 * @return DOCUMENT ME! 858 */ printRootField(SequenceNode root)859 private String printRootField(SequenceNode root) 860 { 861 return (printRootInfo) 862 ? (((root.getName() == null) ? "" : nodeName(root.getName())) 863 + ((HasBootstrap) 864 ? ((root.getBootstrap() > -1) 865 ? ((root.getName() != null ? " " : "") 866 + +root.getBootstrap()) 867 : "") 868 : "") 869 + ((RootHasDistance) ? (":" + root.dist) : "")) 870 : ""; 871 } 872 873 // Non recursive call deals with root node properties print(StringBuffer tf, SequenceNode root)874 public void print(StringBuffer tf, SequenceNode root) 875 { 876 if (root != null) 877 { 878 if (root.isLeaf() && printRootInfo) 879 { 880 tf.append(printRootField(root)); 881 } 882 else 883 { 884 if (root.isDummy()) 885 { 886 _print(tf, (SequenceNode) root.right()); 887 _print(tf, (SequenceNode) root.left()); 888 } 889 else 890 { 891 tf.append("("); 892 _print(tf, (SequenceNode) root.right()); 893 894 if (root.left() != null) 895 { 896 tf.append(","); 897 } 898 899 _print(tf, (SequenceNode) root.left()); 900 tf.append(")" + printRootField(root)); 901 } 902 } 903 } 904 } 905 906 // Recursive call for non-root nodes _print(StringBuffer tf, SequenceNode c)907 public void _print(StringBuffer tf, SequenceNode c) 908 { 909 if (c != null) 910 { 911 if (c.isLeaf()) 912 { 913 tf.append(printNodeField(c)); 914 } 915 else 916 { 917 if (c.isDummy()) 918 { 919 _print(tf, (SequenceNode) c.left()); 920 if (c.left() != null) 921 { 922 tf.append(","); 923 } 924 _print(tf, (SequenceNode) c.right()); 925 } 926 else 927 { 928 tf.append("("); 929 _print(tf, (SequenceNode) c.right()); 930 931 if (c.left() != null) 932 { 933 tf.append(","); 934 } 935 936 _print(tf, (SequenceNode) c.left()); 937 tf.append(")" + printNodeField(c)); 938 } 939 } 940 } 941 } 942 943 // Test main(String[] args)944 public static void main(String[] args) 945 { 946 try 947 { 948 if (args == null || args.length != 1) 949 { 950 System.err.println( 951 "Takes one argument - file name of a newick tree file."); 952 System.exit(0); 953 } 954 955 File fn = new File(args[0]); 956 957 StringBuffer newickfile = new StringBuffer(); 958 BufferedReader treefile = new BufferedReader(new FileReader(fn)); 959 String l; 960 961 while ((l = treefile.readLine()) != null) 962 { 963 newickfile.append(l); 964 } 965 966 treefile.close(); 967 System.out.println("Read file :\n"); 968 969 NewickFile trf = new NewickFile(args[0], DataSourceType.FILE); 970 trf.parse(); 971 System.out.println("Original file :\n"); 972 973 com.stevesoft.pat.Regex nonl = new com.stevesoft.pat.Regex("\n+", ""); 974 System.out.println(nonl.replaceAll(newickfile.toString()) + "\n"); 975 976 System.out.println("Parsed file.\n"); 977 System.out.println("Default output type for original input.\n"); 978 System.out.println(trf.print()); 979 System.out.println("Without bootstraps.\n"); 980 System.out.println(trf.print(false)); 981 System.out.println("Without distances.\n"); 982 System.out.println(trf.print(true, false)); 983 System.out.println("Without bootstraps but with distanecs.\n"); 984 System.out.println(trf.print(false, true)); 985 System.out.println("Without bootstraps or distanecs.\n"); 986 System.out.println(trf.print(false, false)); 987 System.out.println("With bootstraps and with distances.\n"); 988 System.out.println(trf.print(true, true)); 989 } catch (java.io.IOException e) 990 { 991 System.err.println("Exception\n" + e); 992 e.printStackTrace(); 993 } 994 } 995 } 996