1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.datamodel; 22 23 import static org.testng.AssertJUnit.assertEquals; 24 import static org.testng.AssertJUnit.assertFalse; 25 import static org.testng.AssertJUnit.assertNotNull; 26 import static org.testng.AssertJUnit.assertNull; 27 import static org.testng.AssertJUnit.assertSame; 28 import static org.testng.AssertJUnit.assertTrue; 29 30 import jalview.analysis.AlignmentGenerator; 31 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; 32 import jalview.gui.JvOptionPane; 33 import jalview.io.DataSourceType; 34 import jalview.io.FileFormat; 35 import jalview.io.FileFormatI; 36 import jalview.io.FormatAdapter; 37 import jalview.util.Comparison; 38 import jalview.util.MapList; 39 40 import java.io.IOException; 41 import java.util.Arrays; 42 import java.util.Iterator; 43 import java.util.List; 44 45 import org.testng.Assert; 46 import org.testng.annotations.BeforeClass; 47 import org.testng.annotations.BeforeMethod; 48 import org.testng.annotations.Test; 49 50 /** 51 * Unit tests for Alignment datamodel. 52 * 53 * @author gmcarstairs 54 * 55 */ 56 public class AlignmentTest 57 { 58 59 @BeforeClass(alwaysRun = true) setUpJvOptionPane()60 public void setUpJvOptionPane() 61 { 62 JvOptionPane.setInteractiveMode(false); 63 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); 64 } 65 66 // @formatter:off 67 private static final String TEST_DATA = 68 "# STOCKHOLM 1.0\n" + 69 "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" + 70 "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" + 71 "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" + 72 "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" + 73 "#=GR D.melanogaster.1 SS ................((((\n" + 74 "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" + 75 "#=GR D.melanogaster.2 SS ................((((\n" + 76 "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + 77 "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + 78 "//"; 79 80 private static final String AA_SEQS_1 = 81 ">Seq1Name/5-8\n" + 82 "K-QY--L\n" + 83 ">Seq2Name/12-15\n" + 84 "-R-FP-W-\n"; 85 86 private static final String CDNA_SEQS_1 = 87 ">Seq1Name/100-111\n" + 88 "AC-GG--CUC-CAA-CT\n" + 89 ">Seq2Name/200-211\n" + 90 "-CG-TTA--ACG---AAGT\n"; 91 92 private static final String CDNA_SEQS_2 = 93 ">Seq1Name/50-61\n" + 94 "GCTCGUCGTACT\n" + 95 ">Seq2Name/60-71\n" + 96 "GGGTCAGGCAGT\n"; 97 // @formatter:on 98 99 private AlignmentI al; 100 101 /** 102 * Helper method to load an alignment and ensure dataset sequences are set up. 103 * 104 * @param data 105 * @param format 106 * TODO 107 * @return 108 * @throws IOException 109 */ loadAlignment(final String data, FileFormatI format)110 protected AlignmentI loadAlignment(final String data, FileFormatI format) 111 throws IOException 112 { 113 AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE, 114 format); 115 a.setDataset(null); 116 return a; 117 } 118 119 /** 120 * assert wrapper: tests all references in the given alignment are consistent 121 * 122 * @param alignment 123 */ assertAlignmentDatasetRefs(AlignmentI alignment)124 public static void assertAlignmentDatasetRefs(AlignmentI alignment) 125 { 126 verifyAlignmentDatasetRefs(alignment, true, null); 127 } 128 129 /** 130 * assert wrapper: tests all references in the given alignment are consistent 131 * 132 * @param alignment 133 * @param message 134 * - prefixed to any assert failed messages 135 */ assertAlignmentDatasetRefs(AlignmentI alignment, String message)136 public static void assertAlignmentDatasetRefs(AlignmentI alignment, 137 String message) 138 { 139 verifyAlignmentDatasetRefs(alignment, true, message); 140 } 141 142 /** 143 * verify sequence and dataset references are properly contained within 144 * dataset 145 * 146 * @param alignment 147 * - the alignmentI object to verify (either alignment or dataset) 148 * @param raiseAssert 149 * - when set, testng assertions are raised. 150 * @param message 151 * - null or a string message to prepend to the assert failed 152 * messages. 153 * @return true if alignment references were in order, otherwise false. 154 */ verifyAlignmentDatasetRefs(AlignmentI alignment, boolean raiseAssert, String message)155 public static boolean verifyAlignmentDatasetRefs(AlignmentI alignment, 156 boolean raiseAssert, String message) 157 { 158 if (message == null) 159 { 160 message = ""; 161 } 162 if (alignment == null) 163 { 164 if (raiseAssert) 165 { 166 Assert.fail(message + "Alignment for verification was null."); 167 } 168 return false; 169 } 170 if (alignment.getDataset() != null) 171 { 172 AlignmentI dataset = alignment.getDataset(); 173 // check all alignment sequences have their dataset within the dataset 174 for (SequenceI seq : alignment.getSequences()) 175 { 176 SequenceI seqds = seq.getDatasetSequence(); 177 if (seqds.getDatasetSequence() != null) 178 { 179 if (raiseAssert) 180 { 181 Assert.fail(message 182 + " Alignment contained a sequence who's dataset sequence has a second dataset reference."); 183 } 184 return false; 185 } 186 if (dataset.findIndex(seqds) == -1) 187 { 188 if (raiseAssert) 189 { 190 Assert.fail(message 191 + " Alignment contained a sequence who's dataset sequence was not in the dataset."); 192 } 193 return false; 194 } 195 } 196 return verifyAlignmentDatasetRefs(alignment.getDataset(), 197 raiseAssert, message); 198 } 199 else 200 { 201 int dsp = -1; 202 // verify all dataset sequences 203 for (SequenceI seqds : alignment.getSequences()) 204 { 205 dsp++; 206 if (seqds.getDatasetSequence() != null) 207 { 208 if (raiseAssert) 209 { 210 Assert.fail(message 211 + " Dataset contained a sequence with non-null dataset reference (ie not a dataset sequence!)"); 212 } 213 return false; 214 } 215 int foundp = alignment.findIndex(seqds); 216 if (foundp != dsp) 217 { 218 if (raiseAssert) 219 { 220 Assert.fail(message 221 + " Dataset sequence array contains a reference at " 222 + dsp + " to a sequence first seen at " + foundp + " (" 223 + seqds.toString() + ")"); 224 } 225 return false; 226 } 227 if (seqds.getDBRefs() != null) 228 { 229 for (DBRefEntry dbr : seqds.getDBRefs()) 230 { 231 if (dbr.getMap() != null) 232 { 233 SequenceI seqdbrmapto = dbr.getMap().getTo(); 234 if (seqdbrmapto != null) 235 { 236 if (seqdbrmapto.getDatasetSequence() != null) 237 { 238 if (raiseAssert) 239 { 240 Assert.fail(message 241 + " DBRefEntry for sequence in alignment had map to sequence which was not a dataset sequence"); 242 } 243 return false; 244 245 } 246 if (alignment.findIndex(dbr.getMap().getTo()) == -1) 247 { 248 if (raiseAssert) 249 { 250 Assert.fail(message 251 + " DBRefEntry " + dbr + " for sequence " 252 + seqds 253 + " in alignment has map to sequence not in dataset"); 254 } 255 return false; 256 } 257 } 258 } 259 } 260 } 261 } 262 // finally, verify codonmappings involve only dataset sequences. 263 if (alignment.getCodonFrames() != null) 264 { 265 for (AlignedCodonFrame alc : alignment.getCodonFrames()) 266 { 267 for (SequenceToSequenceMapping ssm : alc.getMappings()) 268 { 269 if (ssm.getFromSeq().getDatasetSequence() != null) 270 { 271 if (raiseAssert) 272 { 273 Assert.fail(message 274 + " CodonFrame-SSM-FromSeq is not a dataset sequence"); 275 } 276 return false; 277 } 278 if (alignment.findIndex(ssm.getFromSeq()) == -1) 279 { 280 281 if (raiseAssert) 282 { 283 Assert.fail(message 284 + " CodonFrame-SSM-FromSeq is not contained in dataset"); 285 } 286 return false; 287 } 288 if (ssm.getMapping().getTo().getDatasetSequence() != null) 289 { 290 if (raiseAssert) 291 { 292 Assert.fail(message 293 + " CodonFrame-SSM-Mapping-ToSeq is not a dataset sequence"); 294 } 295 return false; 296 } 297 if (alignment.findIndex(ssm.getMapping().getTo()) == -1) 298 { 299 300 if (raiseAssert) 301 { 302 Assert.fail(message 303 + " CodonFrame-SSM-Mapping-ToSeq is not contained in dataset"); 304 } 305 return false; 306 } 307 } 308 } 309 } 310 } 311 return true; // all relationships verified! 312 } 313 314 /** 315 * call verifyAlignmentDatasetRefs with and without assertion raising enabled, 316 * to check expected pass/fail actually occurs in both conditions 317 * 318 * @param al 319 * @param expected 320 * @param msg 321 */ assertVerifyAlignment(AlignmentI al, boolean expected, String msg)322 private void assertVerifyAlignment(AlignmentI al, boolean expected, 323 String msg) 324 { 325 if (expected) 326 { 327 try 328 { 329 330 Assert.assertTrue(verifyAlignmentDatasetRefs(al, true, null), 331 "Valid test alignment failed when raiseAsserts enabled:" 332 + msg); 333 } catch (AssertionError ae) 334 { 335 ae.printStackTrace(); 336 Assert.fail( 337 "Valid test alignment raised assertion errors when raiseAsserts enabled: " 338 + msg, ae); 339 } 340 // also check validation passes with asserts disabled 341 Assert.assertTrue(verifyAlignmentDatasetRefs(al, false, null), 342 "Valid test alignment tested false when raiseAsserts disabled:" 343 + msg); 344 } 345 else 346 { 347 boolean assertRaised = false; 348 try 349 { 350 verifyAlignmentDatasetRefs(al, true, null); 351 } catch (AssertionError ae) 352 { 353 // expected behaviour 354 assertRaised = true; 355 } 356 if (!assertRaised) 357 { 358 Assert.fail("Invalid test alignment passed when raiseAsserts enabled:" 359 + msg); 360 } 361 // also check validation passes with asserts disabled 362 Assert.assertFalse(verifyAlignmentDatasetRefs(al, false, null), 363 "Invalid test alignment tested true when raiseAsserts disabled:" 364 + msg); 365 } 366 } 367 368 @Test(groups = { "Functional" }) testVerifyAlignmentDatasetRefs()369 public void testVerifyAlignmentDatasetRefs() 370 { 371 SequenceI sq1 = new Sequence("sq1", "ASFDD"), sq2 = new Sequence("sq2", 372 "TTTTTT"); 373 374 // construct simple valid alignment dataset 375 Alignment al = new Alignment(new SequenceI[] { sq1, sq2 }); 376 // expect this to pass 377 assertVerifyAlignment(al, true, "Simple valid alignment didn't verify"); 378 379 // check test for sequence->datasetSequence validity 380 sq1.setDatasetSequence(sq2); 381 assertVerifyAlignment(al, false, 382 "didn't detect dataset sequence with a dataset sequence reference."); 383 384 sq1.setDatasetSequence(null); 385 assertVerifyAlignment( 386 al, 387 true, 388 "didn't reinstate validity after nulling dataset sequence dataset reference"); 389 390 // now create dataset and check again 391 al.createDatasetAlignment(); 392 assertNotNull(al.getDataset()); 393 394 assertVerifyAlignment(al, true, 395 "verify failed after createDatasetAlignment"); 396 397 // create a dbref on sq1 with a sequence ref to sq2 398 DBRefEntry dbrs1tos2 = new DBRefEntry("UNIPROT", "1", "Q111111"); 399 dbrs1tos2.setMap(new Mapping(sq2.getDatasetSequence(), 400 new int[] { 1, 5 }, new int[] { 2, 6 }, 1, 1)); 401 sq1.getDatasetSequence().addDBRef(dbrs1tos2); 402 assertVerifyAlignment(al, true, 403 "verify failed after addition of valid DBRefEntry/map"); 404 // now create a dbref on a new sequence which maps to another sequence 405 // outside of the dataset 406 SequenceI sqout = new Sequence("sqout", "ututututucagcagcag"), sqnew = new Sequence( 407 "sqnew", "EEERRR"); 408 DBRefEntry sqnewsqout = new DBRefEntry("ENAFOO", "1", "R000001"); 409 sqnewsqout.setMap(new Mapping(sqout, new int[] { 1, 6 }, new int[] { 1, 410 18 }, 1, 3)); 411 al.getDataset().addSequence(sqnew); 412 413 assertVerifyAlignment(al, true, 414 "verify failed after addition of new sequence to dataset"); 415 // now start checking exception conditions 416 sqnew.addDBRef(sqnewsqout); 417 assertVerifyAlignment( 418 al, 419 false, 420 "verify passed when a dbref with map to sequence outside of dataset was added"); 421 // make the verify pass by adding the outsider back in 422 al.getDataset().addSequence(sqout); 423 assertVerifyAlignment(al, true, 424 "verify should have passed after adding dbref->to sequence in to dataset"); 425 // and now the same for a codon mapping... 426 SequenceI sqanotherout = new Sequence("sqanotherout", 427 "aggtutaggcagcagcag"); 428 429 AlignedCodonFrame alc = new AlignedCodonFrame(); 430 alc.addMap(sqanotherout, sqnew, new MapList(new int[] { 1, 6 }, 431 new int[] { 1, 18 }, 3, 1)); 432 433 al.addCodonFrame(alc); 434 Assert.assertEquals(al.getDataset().getCodonFrames().size(), 1); 435 436 assertVerifyAlignment( 437 al, 438 false, 439 "verify passed when alCodonFrame mapping to sequence outside of dataset was added"); 440 // make the verify pass by adding the outsider back in 441 al.getDataset().addSequence(sqanotherout); 442 assertVerifyAlignment( 443 al, 444 true, 445 "verify should have passed once all sequences involved in alCodonFrame were added to dataset"); 446 al.getDataset().addSequence(sqanotherout); 447 assertVerifyAlignment(al, false, 448 "verify should have failed when a sequence was added twice to the dataset"); 449 al.getDataset().deleteSequence(sqanotherout); 450 assertVerifyAlignment(al, true, 451 "verify should have passed after duplicate entry for sequence was removed"); 452 } 453 454 /** 455 * checks that the sequence data for an alignment's dataset is non-redundant. 456 * Fails if there are sequences with same id, sequence, start, and. 457 */ 458 assertDatasetIsNormalised(AlignmentI al)459 public static void assertDatasetIsNormalised(AlignmentI al) 460 { 461 assertDatasetIsNormalised(al, null); 462 } 463 464 /** 465 * checks that the sequence data for an alignment's dataset is non-redundant. 466 * Fails if there are sequences with same id, sequence, start, and. 467 * 468 * @param al 469 * - alignment to verify 470 * @param message 471 * - null or message prepended to exception message. 472 */ assertDatasetIsNormalised(AlignmentI al, String message)473 public static void assertDatasetIsNormalised(AlignmentI al, String message) 474 { 475 if (al.getDataset() != null) 476 { 477 assertDatasetIsNormalised(al.getDataset(), message); 478 return; 479 } 480 /* 481 * look for pairs of sequences with same ID, start, end, and sequence 482 */ 483 List<SequenceI> seqSet = al.getSequences(); 484 for (int p = 0; p < seqSet.size(); p++) 485 { 486 SequenceI pSeq = seqSet.get(p); 487 for (int q = p + 1; q < seqSet.size(); q++) 488 { 489 SequenceI qSeq = seqSet.get(q); 490 if (pSeq.getStart() != qSeq.getStart()) 491 { 492 continue; 493 } 494 if (pSeq.getEnd() != qSeq.getEnd()) 495 { 496 continue; 497 } 498 if (!pSeq.getName().equals(qSeq.getName())) 499 { 500 continue; 501 } 502 if (!Arrays.equals(pSeq.getSequence(), qSeq.getSequence())) 503 { 504 continue; 505 } 506 Assert.fail((message == null ? "" : message + " :") 507 + "Found similar sequences at position " + p + " and " + q 508 + "\n" + pSeq.toString()); 509 } 510 } 511 } 512 513 @Test(groups = { "Functional", "Asserts" }) testAssertDatasetIsNormalised()514 public void testAssertDatasetIsNormalised() 515 { 516 Sequence sq1 = new Sequence("s1/1-4", "asdf"); 517 Sequence sq1shift = new Sequence("s1/2-5", "asdf"); 518 Sequence sq1seqd = new Sequence("s1/1-4", "asdt"); 519 Sequence sq2 = new Sequence("s2/1-4", "asdf"); 520 Sequence sq1dup = new Sequence("s1/1-4", "asdf"); 521 522 Alignment al = new Alignment(new SequenceI[] { sq1 }); 523 al.setDataset(null); 524 525 try 526 { 527 assertDatasetIsNormalised(al); 528 } catch (AssertionError ae) 529 { 530 Assert.fail("Single sequence should be valid normalised dataset."); 531 } 532 al.addSequence(sq2); 533 try 534 { 535 assertDatasetIsNormalised(al); 536 } catch (AssertionError ae) 537 { 538 Assert.fail("Two different sequences should be valid normalised dataset."); 539 } 540 /* 541 * now change sq2's name in the alignment. should still be valid 542 */ 543 al.findName(sq2.getName()).setName("sq1"); 544 try 545 { 546 assertDatasetIsNormalised(al); 547 } catch (AssertionError ae) 548 { 549 Assert.fail("Two different sequences in dataset, but same name in alignment, should be valid normalised dataset."); 550 } 551 552 al.addSequence(sq1seqd); 553 try 554 { 555 assertDatasetIsNormalised(al); 556 } catch (AssertionError ae) 557 { 558 Assert.fail("sq1 and sq1 with different sequence should be distinct."); 559 } 560 561 al.addSequence(sq1shift); 562 try 563 { 564 assertDatasetIsNormalised(al); 565 } catch (AssertionError ae) 566 { 567 Assert.fail("sq1 and sq1 with different start/end should be distinct."); 568 } 569 /* 570 * finally, the failure case 571 */ 572 al.addSequence(sq1dup); 573 boolean ssertRaised = false; 574 try 575 { 576 assertDatasetIsNormalised(al); 577 578 } catch (AssertionError ae) 579 { 580 ssertRaised = true; 581 } 582 if (!ssertRaised) 583 { 584 Assert.fail("Expected identical sequence to raise exception."); 585 } 586 } 587 588 /* 589 * Read in Stockholm format test data including secondary structure 590 * annotations. 591 */ 592 @BeforeMethod(alwaysRun = true) setUp()593 public void setUp() throws IOException 594 { 595 al = loadAlignment(TEST_DATA, FileFormat.Stockholm); 596 int i = 0; 597 for (AlignmentAnnotation ann : al.getAlignmentAnnotation()) 598 { 599 ann.setCalcId("CalcIdFor" + al.getSequenceAt(i).getName()); 600 i++; 601 } 602 } 603 604 /** 605 * Test method that returns annotations that match on calcId. 606 */ 607 @Test(groups = { "Functional" }) testFindAnnotation_byCalcId()608 public void testFindAnnotation_byCalcId() 609 { 610 Iterable<AlignmentAnnotation> anns = al 611 .findAnnotation("CalcIdForD.melanogaster.2"); 612 Iterator<AlignmentAnnotation> iter = anns.iterator(); 613 assertTrue(iter.hasNext()); 614 AlignmentAnnotation ann = iter.next(); 615 assertEquals("D.melanogaster.2", ann.sequenceRef.getName()); 616 assertFalse(iter.hasNext()); 617 618 // invalid id 619 anns = al.findAnnotation("CalcIdForD.melanogaster.?"); 620 assertFalse(iter.hasNext()); 621 anns = al.findAnnotation(null); 622 assertFalse(iter.hasNext()); 623 } 624 625 /** 626 * Test method that returns annotations that match on reference sequence, 627 * label, or calcId. 628 */ 629 @Test(groups = { "Functional" }) testFindAnnotations_bySeqLabelandorCalcId()630 public void testFindAnnotations_bySeqLabelandorCalcId() 631 { 632 // TODO: finish testFindAnnotations_bySeqLabelandorCalcId test 633 /* Note - this is an incomplete test - need to check null or 634 * non-null [ matches, not matches ] behaviour for each of the three 635 * parameters..*/ 636 637 // search for a single, unique calcId with wildcards on other params 638 Iterable<AlignmentAnnotation> anns = al.findAnnotations(null, 639 "CalcIdForD.melanogaster.2", null); 640 Iterator<AlignmentAnnotation> iter = anns.iterator(); 641 assertTrue(iter.hasNext()); 642 AlignmentAnnotation ann = iter.next(); 643 assertEquals("D.melanogaster.2", ann.sequenceRef.getName()); 644 assertFalse(iter.hasNext()); 645 646 // save reference to test sequence reference parameter 647 SequenceI rseq = ann.sequenceRef; 648 649 // search for annotation associated with a single sequence 650 anns = al.findAnnotations(rseq, null, null); 651 iter = anns.iterator(); 652 assertTrue(iter.hasNext()); 653 ann = iter.next(); 654 assertEquals("D.melanogaster.2", ann.sequenceRef.getName()); 655 assertFalse(iter.hasNext()); 656 657 // search for annotation with a non-existant calcId 658 anns = al.findAnnotations(null, "CalcIdForD.melanogaster.?", null); 659 iter = anns.iterator(); 660 assertFalse(iter.hasNext()); 661 662 // search for annotation with a particular label - expect three 663 anns = al.findAnnotations(null, null, "Secondary Structure"); 664 iter = anns.iterator(); 665 assertTrue(iter.hasNext()); 666 iter.next(); 667 assertTrue(iter.hasNext()); 668 iter.next(); 669 assertTrue(iter.hasNext()); 670 iter.next(); 671 // third found.. so 672 assertFalse(iter.hasNext()); 673 674 // search for annotation on one sequence with a particular label - expect 675 // one 676 SequenceI sqfound; 677 anns = al.findAnnotations(sqfound = al.getSequenceAt(1), null, 678 "Secondary Structure"); 679 iter = anns.iterator(); 680 assertTrue(iter.hasNext()); 681 // expect reference to sequence 1 in the alignment 682 assertTrue(sqfound == iter.next().sequenceRef); 683 assertFalse(iter.hasNext()); 684 685 // null on all parameters == find all annotations 686 anns = al.findAnnotations(null, null, null); 687 iter = anns.iterator(); 688 int n = al.getAlignmentAnnotation().length; 689 while (iter.hasNext()) 690 { 691 n--; 692 iter.next(); 693 } 694 assertTrue("Found " + n + " fewer annotations from search.", n == 0); 695 } 696 697 @Test(groups = { "Functional" }) testDeleteAllAnnotations_includingAutocalculated()698 public void testDeleteAllAnnotations_includingAutocalculated() 699 { 700 AlignmentAnnotation aa = new AlignmentAnnotation("Consensus", 701 "Consensus", 0.5); 702 aa.autoCalculated = true; 703 al.addAnnotation(aa); 704 AlignmentAnnotation[] anns = al.getAlignmentAnnotation(); 705 assertEquals("Wrong number of annotations before deleting", 4, 706 anns.length); 707 al.deleteAllAnnotations(true); 708 assertEquals("Not all deleted", 0, al.getAlignmentAnnotation().length); 709 } 710 711 @Test(groups = { "Functional" }) testDeleteAllAnnotations_excludingAutocalculated()712 public void testDeleteAllAnnotations_excludingAutocalculated() 713 { 714 AlignmentAnnotation aa = new AlignmentAnnotation("Consensus", 715 "Consensus", 0.5); 716 aa.autoCalculated = true; 717 al.addAnnotation(aa); 718 AlignmentAnnotation[] anns = al.getAlignmentAnnotation(); 719 assertEquals("Wrong number of annotations before deleting", 4, 720 anns.length); 721 al.deleteAllAnnotations(false); 722 assertEquals("Not just one annotation left", 1, 723 al.getAlignmentAnnotation().length); 724 } 725 726 /** 727 * Tests for realigning as per a supplied alignment: Dna as Dna. 728 * 729 * Note: AlignedCodonFrame's state variables are named for protein-to-cDNA 730 * mapping, but can be exploited for a general 'sequence-to-sequence' mapping 731 * as here. 732 * 733 * @throws IOException 734 */ 735 @Test(groups = { "Functional" }) testAlignAs_dnaAsDna()736 public void testAlignAs_dnaAsDna() throws IOException 737 { 738 // aligned cDNA: 739 AlignmentI al1 = loadAlignment(CDNA_SEQS_1, FileFormat.Fasta); 740 // unaligned cDNA: 741 AlignmentI al2 = loadAlignment(CDNA_SEQS_2, FileFormat.Fasta); 742 743 /* 744 * Make mappings between sequences. The 'aligned cDNA' is playing the role 745 * of what would normally be protein here. 746 */ 747 makeMappings(al1, al2); 748 749 ((Alignment) al2).alignAs(al1, false, true); 750 assertEquals("GC-TC--GUC-GTACT", al2.getSequenceAt(0) 751 .getSequenceAsString()); 752 assertEquals("-GG-GTC--AGG--CAGT", al2.getSequenceAt(1) 753 .getSequenceAsString()); 754 } 755 756 /** 757 * Aligning protein from cDNA. 758 * 759 * @throws IOException 760 */ 761 @Test(groups = { "Functional" }) testAlignAs_proteinAsCdna()762 public void testAlignAs_proteinAsCdna() throws IOException 763 { 764 // see also AlignmentUtilsTests 765 AlignmentI al1 = loadAlignment(CDNA_SEQS_1, FileFormat.Fasta); 766 AlignmentI al2 = loadAlignment(AA_SEQS_1, FileFormat.Fasta); 767 makeMappings(al1, al2); 768 769 // Fudge - alignProteinAsCdna expects mappings to be on protein 770 al2.getCodonFrames().addAll(al1.getCodonFrames()); 771 772 ((Alignment) al2).alignAs(al1, false, true); 773 assertEquals("K-Q-Y-L-", al2.getSequenceAt(0).getSequenceAsString()); 774 assertEquals("-R-F-P-W", al2.getSequenceAt(1).getSequenceAsString()); 775 } 776 777 /** 778 * Test aligning cdna as per protein alignment. 779 * 780 * @throws IOException 781 */ 782 @Test(groups = { "Functional" }, enabled = true) 783 // TODO review / update this test after redesign of alignAs method testAlignAs_cdnaAsProtein()784 public void testAlignAs_cdnaAsProtein() throws IOException 785 { 786 /* 787 * Load alignments and add mappings for cDNA to protein 788 */ 789 AlignmentI al1 = loadAlignment(CDNA_SEQS_1, FileFormat.Fasta); 790 AlignmentI al2 = loadAlignment(AA_SEQS_1, FileFormat.Fasta); 791 makeMappings(al1, al2); 792 793 /* 794 * Realign DNA; currently keeping existing gaps in introns only 795 */ 796 ((Alignment) al1).alignAs(al2, false, true); 797 assertEquals("ACG---GCUCCA------ACT---", al1.getSequenceAt(0) 798 .getSequenceAsString()); 799 assertEquals("---CGT---TAACGA---AGT---", al1.getSequenceAt(1) 800 .getSequenceAsString()); 801 } 802 803 /** 804 * Test aligning cdna as per protein - single sequences 805 * 806 * @throws IOException 807 */ 808 @Test(groups = { "Functional" }, enabled = true) 809 // TODO review / update this test after redesign of alignAs method testAlignAs_cdnaAsProtein_singleSequence()810 public void testAlignAs_cdnaAsProtein_singleSequence() throws IOException 811 { 812 /* 813 * simple case insert one gap 814 */ 815 verifyAlignAs(">dna\nCAAaaa\n", ">protein\nQ-K\n", "CAA---aaa"); 816 817 /* 818 * simple case but with sequence offsets 819 */ 820 verifyAlignAs(">dna/5-10\nCAAaaa\n", ">protein/20-21\nQ-K\n", 821 "CAA---aaa"); 822 823 /* 824 * insert gaps as per protein, drop gaps within codons 825 */ 826 verifyAlignAs(">dna/10-18\nCA-Aa-aa--AGA\n", ">aa/6-8\n-Q-K--R\n", 827 "---CAA---aaa------AGA"); 828 } 829 830 /** 831 * Helper method that makes mappings and then aligns the first alignment as 832 * the second 833 * 834 * @param fromSeqs 835 * @param toSeqs 836 * @param expected 837 * @throws IOException 838 */ verifyAlignAs(String fromSeqs, String toSeqs, String expected)839 public void verifyAlignAs(String fromSeqs, String toSeqs, String expected) 840 throws IOException 841 { 842 /* 843 * Load alignments and add mappings from nucleotide to protein (or from 844 * first to second if both the same type) 845 */ 846 AlignmentI al1 = loadAlignment(fromSeqs, FileFormat.Fasta); 847 AlignmentI al2 = loadAlignment(toSeqs, FileFormat.Fasta); 848 makeMappings(al1, al2); 849 850 /* 851 * Realign DNA; currently keeping existing gaps in introns only 852 */ 853 ((Alignment) al1).alignAs(al2, false, true); 854 assertEquals(expected, al1.getSequenceAt(0).getSequenceAsString()); 855 } 856 857 /** 858 * Helper method to make mappings between sequences, and add the mappings to 859 * the 'mapped from' alignment 860 * 861 * @param alFrom 862 * @param alTo 863 */ makeMappings(AlignmentI alFrom, AlignmentI alTo)864 public void makeMappings(AlignmentI alFrom, AlignmentI alTo) 865 { 866 int ratio = (alFrom.isNucleotide() == alTo.isNucleotide() ? 1 : 3); 867 868 AlignedCodonFrame acf = new AlignedCodonFrame(); 869 870 for (int i = 0; i < alFrom.getHeight(); i++) 871 { 872 SequenceI seqFrom = alFrom.getSequenceAt(i); 873 SequenceI seqTo = alTo.getSequenceAt(i); 874 MapList ml = new MapList(new int[] { seqFrom.getStart(), 875 seqFrom.getEnd() }, 876 new int[] { seqTo.getStart(), seqTo.getEnd() }, ratio, 1); 877 acf.addMap(seqFrom, seqTo, ml); 878 } 879 880 /* 881 * not sure whether mappings 'belong' or protein or nucleotide 882 * alignment, so adding to both ;~) 883 */ 884 alFrom.addCodonFrame(acf); 885 alTo.addCodonFrame(acf); 886 } 887 888 /** 889 * Test aligning dna as per protein alignment, for the case where there are 890 * introns (i.e. some dna sites have no mapping from a peptide). 891 * 892 * @throws IOException 893 */ 894 @Test(groups = { "Functional" }, enabled = false) 895 // TODO review / update this test after redesign of alignAs method testAlignAs_dnaAsProtein_withIntrons()896 public void testAlignAs_dnaAsProtein_withIntrons() throws IOException 897 { 898 /* 899 * Load alignments and add mappings for cDNA to protein 900 */ 901 String dna1 = "A-Aa-gG-GCC-cT-TT"; 902 String dna2 = "c--CCGgg-TT--T-AA-A"; 903 AlignmentI al1 = loadAlignment(">Dna1/6-17\n" + dna1 904 + "\n>Dna2/20-31\n" + dna2 + "\n", FileFormat.Fasta); 905 AlignmentI al2 = loadAlignment( 906 ">Pep1/7-9\n-P--YK\n>Pep2/11-13\nG-T--F\n", FileFormat.Fasta); 907 AlignedCodonFrame acf = new AlignedCodonFrame(); 908 // Seq1 has intron at dna positions 3,4,9 so splice is AAG GCC TTT 909 // Seq2 has intron at dna positions 1,5,6 so splice is CCG TTT AAA 910 MapList ml1 = new MapList(new int[] { 6, 7, 10, 13, 15, 17 }, new int[] 911 { 7, 9 }, 3, 1); 912 acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml1); 913 MapList ml2 = new MapList(new int[] { 21, 23, 26, 31 }, new int[] { 11, 914 13 }, 3, 1); 915 acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml2); 916 al2.addCodonFrame(acf); 917 918 /* 919 * Align ignoring gaps in dna introns and exons 920 */ 921 ((Alignment) al1).alignAs(al2, false, false); 922 assertEquals("---AAagG------GCCcTTT", al1.getSequenceAt(0) 923 .getSequenceAsString()); 924 // note 1 gap in protein corresponds to 'gg-' in DNA (3 positions) 925 assertEquals("cCCGgg-TTT------AAA", al1.getSequenceAt(1) 926 .getSequenceAsString()); 927 928 /* 929 * Reset and realign, preserving gaps in dna introns and exons 930 */ 931 al1.getSequenceAt(0).setSequence(dna1); 932 al1.getSequenceAt(1).setSequence(dna2); 933 ((Alignment) al1).alignAs(al2, true, true); 934 // String dna1 = "A-Aa-gG-GCC-cT-TT"; 935 // String dna2 = "c--CCGgg-TT--T-AA-A"; 936 // assumption: we include 'the greater of' protein/dna gap lengths, not both 937 assertEquals("---A-Aa-gG------GCC-cT-TT", al1.getSequenceAt(0) 938 .getSequenceAsString()); 939 assertEquals("c--CCGgg-TT--T------AA-A", al1.getSequenceAt(1) 940 .getSequenceAsString()); 941 } 942 943 @Test(groups = "Functional") testCopyConstructor()944 public void testCopyConstructor() throws IOException 945 { 946 AlignmentI protein = loadAlignment(AA_SEQS_1, FileFormat.Fasta); 947 // create sequence and alignment datasets 948 protein.setDataset(null); 949 AlignedCodonFrame acf = new AlignedCodonFrame(); 950 List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[] 951 { acf }); 952 protein.getDataset().setCodonFrames(acfList); 953 AlignmentI copy = new Alignment(protein); 954 955 /* 956 * copy has different aligned sequences but the same dataset sequences 957 */ 958 assertFalse(copy.getSequenceAt(0) == protein.getSequenceAt(0)); 959 assertFalse(copy.getSequenceAt(1) == protein.getSequenceAt(1)); 960 assertSame(copy.getSequenceAt(0).getDatasetSequence(), protein 961 .getSequenceAt(0).getDatasetSequence()); 962 assertSame(copy.getSequenceAt(1).getDatasetSequence(), protein 963 .getSequenceAt(1).getDatasetSequence()); 964 965 // TODO should the copy constructor copy the dataset? 966 // or make a new one referring to the same dataset sequences?? 967 assertNull(copy.getDataset()); 968 // TODO test metadata is copied when AlignmentI is a dataset 969 970 // assertArrayEquals(copy.getDataset().getSequencesArray(), protein 971 // .getDataset().getSequencesArray()); 972 } 973 974 /** 975 * Test behaviour of createDataset 976 * 977 * @throws IOException 978 */ 979 @Test(groups = "Functional") testCreateDatasetAlignment()980 public void testCreateDatasetAlignment() throws IOException 981 { 982 AlignmentI protein = new FormatAdapter().readFile(AA_SEQS_1, 983 DataSourceType.PASTE, FileFormat.Fasta); 984 /* 985 * create a dataset sequence on first sequence 986 * leave the second without one 987 */ 988 protein.getSequenceAt(0).createDatasetSequence(); 989 assertNotNull(protein.getSequenceAt(0).getDatasetSequence()); 990 assertNull(protein.getSequenceAt(1).getDatasetSequence()); 991 992 /* 993 * add a mapping to the alignment 994 */ 995 AlignedCodonFrame acf = new AlignedCodonFrame(); 996 protein.addCodonFrame(acf); 997 assertNull(protein.getDataset()); 998 assertTrue(protein.getCodonFrames().contains(acf)); 999 1000 /* 1001 * create the alignment dataset 1002 * note this creates sequence datasets where missing 1003 * as a side-effect (in this case, on seq2 1004 */ 1005 // TODO promote this method to AlignmentI 1006 ((Alignment) protein).createDatasetAlignment(); 1007 1008 AlignmentI ds = protein.getDataset(); 1009 1010 // side-effect: dataset created on second sequence 1011 assertNotNull(protein.getSequenceAt(1).getDatasetSequence()); 1012 // dataset alignment has references to dataset sequences 1013 assertEquals(ds.getSequenceAt(0), protein.getSequenceAt(0) 1014 .getDatasetSequence()); 1015 assertEquals(ds.getSequenceAt(1), protein.getSequenceAt(1) 1016 .getDatasetSequence()); 1017 1018 // codon frames should have been moved to the dataset 1019 // getCodonFrames() should delegate to the dataset: 1020 assertTrue(protein.getCodonFrames().contains(acf)); 1021 // prove the codon frames are indeed on the dataset: 1022 assertTrue(ds.getCodonFrames().contains(acf)); 1023 } 1024 1025 /** 1026 * tests the addition of *all* sequences referred to by a sequence being added 1027 * to the dataset 1028 */ 1029 @Test(groups = "Functional") testCreateDatasetAlignmentWithMappedToSeqs()1030 public void testCreateDatasetAlignmentWithMappedToSeqs() 1031 { 1032 // Alignment with two sequences, gapped. 1033 SequenceI sq1 = new Sequence("sq1", "A--SDF"); 1034 SequenceI sq2 = new Sequence("sq2", "G--TRQ"); 1035 1036 // cross-references to two more sequences. 1037 DBRefEntry dbr = new DBRefEntry("SQ1", "", "sq3"); 1038 SequenceI sq3 = new Sequence("sq3", "VWANG"); 1039 dbr.setMap(new Mapping(sq3, new MapList(new int[] { 1, 4 }, new int[] { 1040 2, 5 }, 1, 1))); 1041 sq1.addDBRef(dbr); 1042 1043 SequenceI sq4 = new Sequence("sq4", "ERKWI"); 1044 DBRefEntry dbr2 = new DBRefEntry("SQ2", "", "sq4"); 1045 dbr2.setMap(new Mapping(sq4, new MapList(new int[] { 1, 4 }, new int[] { 1046 2, 5 }, 1, 1))); 1047 sq2.addDBRef(dbr2); 1048 // and a 1:1 codonframe mapping between them. 1049 AlignedCodonFrame alc = new AlignedCodonFrame(); 1050 alc.addMap(sq1, sq2, new MapList(new int[] { 1, 4 }, 1051 new int[] { 1, 4 }, 1, 1)); 1052 1053 AlignmentI protein = new Alignment(new SequenceI[] { sq1, sq2 }); 1054 1055 /* 1056 * create the alignment dataset 1057 * note this creates sequence datasets where missing 1058 * as a side-effect (in this case, on seq2 1059 */ 1060 1061 // TODO promote this method to AlignmentI 1062 ((Alignment) protein).createDatasetAlignment(); 1063 1064 AlignmentI ds = protein.getDataset(); 1065 1066 // should be 4 sequences in dataset - two materialised, and two propagated 1067 // from dbref 1068 assertEquals(4, ds.getHeight()); 1069 assertTrue(ds.getSequences().contains(sq1.getDatasetSequence())); 1070 assertTrue(ds.getSequences().contains(sq2.getDatasetSequence())); 1071 assertTrue(ds.getSequences().contains(sq3)); 1072 assertTrue(ds.getSequences().contains(sq4)); 1073 // Should have one codon frame mapping between sq1 and sq2 via dataset 1074 // sequences 1075 assertEquals(ds.getCodonFrame(sq1.getDatasetSequence()), 1076 ds.getCodonFrame(sq2.getDatasetSequence())); 1077 } 1078 1079 @Test(groups = "Functional") testAddCodonFrame()1080 public void testAddCodonFrame() 1081 { 1082 AlignmentI align = new Alignment(new SequenceI[] {}); 1083 AlignedCodonFrame acf = new AlignedCodonFrame(); 1084 align.addCodonFrame(acf); 1085 assertEquals(1, align.getCodonFrames().size()); 1086 assertTrue(align.getCodonFrames().contains(acf)); 1087 // can't add the same object twice: 1088 align.addCodonFrame(acf); 1089 assertEquals(1, align.getCodonFrames().size()); 1090 1091 // create dataset alignment - mappings move to dataset 1092 ((Alignment) align).createDatasetAlignment(); 1093 assertSame(align.getCodonFrames(), align.getDataset().getCodonFrames()); 1094 assertEquals(1, align.getCodonFrames().size()); 1095 1096 AlignedCodonFrame acf2 = new AlignedCodonFrame(); 1097 align.addCodonFrame(acf2); 1098 assertTrue(align.getDataset().getCodonFrames().contains(acf)); 1099 } 1100 1101 @Test(groups = "Functional") testAddSequencePreserveDatasetIntegrity()1102 public void testAddSequencePreserveDatasetIntegrity() 1103 { 1104 Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 1105 Alignment align = new Alignment(new SequenceI[] { seq }); 1106 align.createDatasetAlignment(); 1107 AlignmentI ds = align.getDataset(); 1108 SequenceI copy = new Sequence(seq); 1109 copy.insertCharAt(3, 5, '-'); 1110 align.addSequence(copy); 1111 Assert.assertEquals(align.getDataset().getHeight(), 1, 1112 "Dataset shouldn't have more than one sequence."); 1113 1114 Sequence seq2 = new Sequence("newtestSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 1115 align.addSequence(seq2); 1116 Assert.assertEquals(align.getDataset().getHeight(), 2, 1117 "Dataset should now have two sequences."); 1118 1119 assertAlignmentDatasetRefs(align, 1120 "addSequence broke dataset reference integrity"); 1121 } 1122 1123 /** 1124 * Tests that dbrefs with mappings to sequence get updated if the sequence 1125 * acquires a dataset sequence 1126 */ 1127 @Test(groups = "Functional") testCreateDataset_updateDbrefMappings()1128 public void testCreateDataset_updateDbrefMappings() 1129 { 1130 SequenceI pep = new Sequence("pep", "ASD"); 1131 SequenceI dna = new Sequence("dna", "aaaGCCTCGGATggg"); 1132 SequenceI cds = new Sequence("cds", "GCCTCGGAT"); 1133 1134 // add dbref from dna to peptide 1135 DBRefEntry dbr = new DBRefEntry("UNIPROT", "", "pep"); 1136 dbr.setMap(new Mapping(pep, new MapList(new int[] { 4, 15 }, new int[] { 1137 1, 4 }, 3, 1))); 1138 dna.addDBRef(dbr); 1139 1140 // add dbref from dna to peptide 1141 DBRefEntry dbr2 = new DBRefEntry("UNIPROT", "", "pep"); 1142 dbr2.setMap(new Mapping(pep, new MapList(new int[] { 1, 12 }, new int[] 1143 { 1, 4 }, 3, 1))); 1144 cds.addDBRef(dbr2); 1145 1146 // add dbref from peptide to dna 1147 DBRefEntry dbr3 = new DBRefEntry("EMBL", "", "dna"); 1148 dbr3.setMap(new Mapping(dna, new MapList(new int[] { 1, 4 }, new int[] { 1149 4, 15 }, 1, 3))); 1150 pep.addDBRef(dbr3); 1151 1152 // add dbref from peptide to cds 1153 DBRefEntry dbr4 = new DBRefEntry("EMBLCDS", "", "cds"); 1154 dbr4.setMap(new Mapping(cds, new MapList(new int[] { 1, 4 }, new int[] { 1155 1, 12 }, 1, 3))); 1156 pep.addDBRef(dbr4); 1157 1158 AlignmentI protein = new Alignment(new SequenceI[] { pep }); 1159 1160 /* 1161 * create the alignment dataset 1162 */ 1163 ((Alignment) protein).createDatasetAlignment(); 1164 1165 AlignmentI ds = protein.getDataset(); 1166 1167 // should be 3 sequences in dataset 1168 assertEquals(3, ds.getHeight()); 1169 assertTrue(ds.getSequences().contains(pep.getDatasetSequence())); 1170 assertTrue(ds.getSequences().contains(dna)); 1171 assertTrue(ds.getSequences().contains(cds)); 1172 1173 /* 1174 * verify peptide.cdsdbref.peptidedbref is now mapped to peptide dataset 1175 */ 1176 DBRefEntry[] dbRefs = pep.getDBRefs(); 1177 assertEquals(2, dbRefs.length); 1178 assertSame(dna, dbRefs[0].map.to); 1179 assertSame(cds, dbRefs[1].map.to); 1180 assertEquals(1, dna.getDBRefs().length); 1181 assertSame(pep.getDatasetSequence(), dna.getDBRefs()[0].map.to); 1182 assertEquals(1, cds.getDBRefs().length); 1183 assertSame(pep.getDatasetSequence(), cds.getDBRefs()[0].map.to); 1184 } 1185 1186 @Test(groups = { "Functional" }) testFindGroup()1187 public void testFindGroup() 1188 { 1189 SequenceI seq1 = new Sequence("seq1", "ABCDEF---GHI"); 1190 SequenceI seq2 = new Sequence("seq2", "---JKLMNO---"); 1191 AlignmentI a = new Alignment(new SequenceI[] { seq1, seq2 }); 1192 1193 assertNull(a.findGroup(null, 0)); 1194 assertNull(a.findGroup(seq1, 1)); 1195 assertNull(a.findGroup(seq1, -1)); 1196 1197 /* 1198 * add a group consisting of just "DEF" 1199 */ 1200 SequenceGroup sg1 = new SequenceGroup(); 1201 sg1.addSequence(seq1, false); 1202 sg1.setStartRes(3); 1203 sg1.setEndRes(5); 1204 a.addGroup(sg1); 1205 1206 assertNull(a.findGroup(seq1, 2)); // position not in group 1207 assertNull(a.findGroup(seq1, 6)); // position not in group 1208 assertNull(a.findGroup(seq2, 5)); // sequence not in group 1209 assertSame(a.findGroup(seq1, 3), sg1); // yes 1210 assertSame(a.findGroup(seq1, 4), sg1); 1211 assertSame(a.findGroup(seq1, 5), sg1); 1212 1213 /* 1214 * add a group consisting of 1215 * EF-- 1216 * KLMN 1217 */ 1218 SequenceGroup sg2 = new SequenceGroup(); 1219 sg2.addSequence(seq1, false); 1220 sg2.addSequence(seq2, false); 1221 sg2.setStartRes(4); 1222 sg2.setEndRes(7); 1223 a.addGroup(sg2); 1224 1225 assertNull(a.findGroup(seq1, 2)); // unchanged 1226 assertSame(a.findGroup(seq1, 3), sg1); // unchanged 1227 /* 1228 * if a residue is in more than one group, method returns 1229 * the first found (in order groups were added) 1230 */ 1231 assertSame(a.findGroup(seq1, 4), sg1); 1232 assertSame(a.findGroup(seq1, 5), sg1); 1233 1234 /* 1235 * seq2 only belongs to the second group 1236 */ 1237 assertSame(a.findGroup(seq2, 4), sg2); 1238 assertSame(a.findGroup(seq2, 5), sg2); 1239 assertSame(a.findGroup(seq2, 6), sg2); 1240 assertSame(a.findGroup(seq2, 7), sg2); 1241 assertNull(a.findGroup(seq2, 3)); 1242 assertNull(a.findGroup(seq2, 8)); 1243 } 1244 1245 @Test(groups = { "Functional" }) testDeleteSequenceByIndex()1246 public void testDeleteSequenceByIndex() 1247 { 1248 // create random alignment 1249 AlignmentGenerator gen = new AlignmentGenerator(false); 1250 AlignmentI a = gen.generate(20, 15, 123, 5, 5); 1251 1252 // delete sequence 10, alignment reduced by 1 1253 int height = a.getAbsoluteHeight(); 1254 a.deleteSequence(10); 1255 assertEquals(a.getAbsoluteHeight(), height - 1); 1256 1257 // try to delete -ve index, nothing happens 1258 a.deleteSequence(-1); 1259 assertEquals(a.getAbsoluteHeight(), height - 1); 1260 1261 // try to delete beyond end of alignment, nothing happens 1262 a.deleteSequence(14); 1263 assertEquals(a.getAbsoluteHeight(), height - 1); 1264 } 1265 1266 @Test(groups = { "Functional" }) testDeleteSequenceBySeq()1267 public void testDeleteSequenceBySeq() 1268 { 1269 // create random alignment 1270 AlignmentGenerator gen = new AlignmentGenerator(false); 1271 AlignmentI a = gen.generate(20, 15, 123, 5, 5); 1272 1273 // delete sequence 10, alignment reduced by 1 1274 int height = a.getAbsoluteHeight(); 1275 SequenceI seq = a.getSequenceAt(10); 1276 a.deleteSequence(seq); 1277 assertEquals(a.getAbsoluteHeight(), height - 1); 1278 1279 // try to delete non-existent sequence, nothing happens 1280 seq = new Sequence("cds", "GCCTCGGAT"); 1281 assertEquals(a.getAbsoluteHeight(), height - 1); 1282 } 1283 1284 @Test(groups = { "Functional" }) testDeleteHiddenSequence()1285 public void testDeleteHiddenSequence() 1286 { 1287 // create random alignment 1288 AlignmentGenerator gen = new AlignmentGenerator(false); 1289 AlignmentI a = gen.generate(20, 15, 123, 5, 5); 1290 1291 // delete a sequence which is hidden, check it is NOT removed from hidden 1292 // sequences 1293 int height = a.getAbsoluteHeight(); 1294 SequenceI seq = a.getSequenceAt(2); 1295 a.getHiddenSequences().hideSequence(seq); 1296 assertEquals(a.getHiddenSequences().getSize(), 1); 1297 a.deleteSequence(2); 1298 assertEquals(a.getAbsoluteHeight(), height - 1); 1299 assertEquals(a.getHiddenSequences().getSize(), 1); 1300 1301 // delete a sequence which is not hidden, check hiddenSequences are not 1302 // affected 1303 a.deleteSequence(10); 1304 assertEquals(a.getAbsoluteHeight(), height - 2); 1305 assertEquals(a.getHiddenSequences().getSize(), 1); 1306 } 1307 1308 @Test( 1309 groups = "Functional", 1310 expectedExceptions = { IllegalArgumentException.class }) testSetDataset_selfReference()1311 public void testSetDataset_selfReference() 1312 { 1313 SequenceI seq = new Sequence("a", "a"); 1314 AlignmentI alignment = new Alignment(new SequenceI[] { seq }); 1315 alignment.setDataset(alignment); 1316 } 1317 1318 @Test(groups = "Functional") testAppend()1319 public void testAppend() 1320 { 1321 SequenceI seq = new Sequence("seq1", "FRMLPSRT-A--L-"); 1322 AlignmentI alignment = new Alignment(new SequenceI[] { seq }); 1323 alignment.setGapCharacter('-'); 1324 SequenceI seq2 = new Sequence("seq1", "KP..L.FQII."); 1325 AlignmentI alignment2 = new Alignment(new SequenceI[] { seq2 }); 1326 alignment2.setGapCharacter('.'); 1327 1328 alignment.append(alignment2); 1329 1330 assertEquals('-', alignment.getGapCharacter()); 1331 assertSame(seq, alignment.getSequenceAt(0)); 1332 assertEquals("KP--L-FQII-", alignment.getSequenceAt(1) 1333 .getSequenceAsString()); 1334 1335 // todo test coverage for annotations, mappings, groups, 1336 // hidden sequences, properties 1337 } 1338 1339 /** 1340 * test that calcId == null on findOrCreate doesn't raise an NPE, and yields 1341 * an annotation with a null calcId 1342 * 1343 */ 1344 @Test(groups = "Functional") testFindOrCreateForNullCalcId()1345 public void testFindOrCreateForNullCalcId() 1346 { 1347 SequenceI seq = new Sequence("seq1", "FRMLPSRT-A--L-"); 1348 AlignmentI alignment = new Alignment(new SequenceI[] { seq }); 1349 1350 AlignmentAnnotation ala = alignment.findOrCreateAnnotation( 1351 "Temperature Factor", null, false, seq, null); 1352 assertNotNull(ala); 1353 assertEquals(seq, ala.sequenceRef); 1354 assertEquals("", ala.calcId); 1355 } 1356 1357 @Test(groups = "Functional") testPropagateInsertions()1358 public void testPropagateInsertions() 1359 { 1360 // create an alignment with no gaps - this will be the profile seq and other 1361 // JPRED seqs 1362 AlignmentGenerator gen = new AlignmentGenerator(false); 1363 AlignmentI al = gen.generate(25, 10, 1234, 0, 0); 1364 1365 // get the profileseq 1366 SequenceI profileseq = al.getSequenceAt(0); 1367 SequenceI gappedseq = new Sequence(profileseq); 1368 gappedseq.insertCharAt(5, al.getGapCharacter()); 1369 gappedseq.insertCharAt(6, al.getGapCharacter()); 1370 gappedseq.insertCharAt(7, al.getGapCharacter()); 1371 gappedseq.insertCharAt(8, al.getGapCharacter()); 1372 1373 // force different kinds of padding 1374 al.getSequenceAt(3).deleteChars(2, 23); 1375 al.getSequenceAt(4).deleteChars(2, 27); 1376 al.getSequenceAt(5).deleteChars(10, 27); 1377 1378 // create an alignment view with the gapped sequence 1379 SequenceI[] seqs = new SequenceI[1]; 1380 seqs[0] = gappedseq; 1381 AlignmentI newal = new Alignment(seqs); 1382 HiddenColumns hidden = new HiddenColumns(); 1383 hidden.hideColumns(15, 17); 1384 1385 AlignmentView view = new AlignmentView(newal, hidden, null, true, false, 1386 false); 1387 1388 // confirm that original contigs are as expected 1389 Iterator<int[]> visible = hidden.getVisContigsIterator(0, 25, false); 1390 int[] region = visible.next(); 1391 assertEquals("[0, 14]", Arrays.toString(region)); 1392 region = visible.next(); 1393 assertEquals("[18, 24]", Arrays.toString(region)); 1394 1395 // propagate insertions 1396 HiddenColumns result = al.propagateInsertions(profileseq, view); 1397 1398 // confirm that the contigs have changed to account for the gaps 1399 visible = result.getVisContigsIterator(0, 25, false); 1400 region = visible.next(); 1401 assertEquals("[0, 10]", Arrays.toString(region)); 1402 region = visible.next(); 1403 assertEquals("[14, 24]", Arrays.toString(region)); 1404 1405 // confirm the alignment has been changed so that the other sequences have 1406 // gaps inserted where the columns are hidden 1407 assertFalse(Comparison.isGap(al.getSequenceAt(1).getSequence()[10])); 1408 assertTrue(Comparison.isGap(al.getSequenceAt(1).getSequence()[11])); 1409 assertTrue(Comparison.isGap(al.getSequenceAt(1).getSequence()[12])); 1410 assertTrue(Comparison.isGap(al.getSequenceAt(1).getSequence()[13])); 1411 assertFalse(Comparison.isGap(al.getSequenceAt(1).getSequence()[14])); 1412 1413 } 1414 1415 @Test(groups = "Functional") testPropagateInsertionsOverlap()1416 public void testPropagateInsertionsOverlap() 1417 { 1418 // test propagateInsertions where gaps and hiddenColumns overlap 1419 1420 // create an alignment with no gaps - this will be the profile seq and other 1421 // JPRED seqs 1422 AlignmentGenerator gen = new AlignmentGenerator(false); 1423 AlignmentI al = gen.generate(20, 10, 1234, 0, 0); 1424 1425 // get the profileseq 1426 SequenceI profileseq = al.getSequenceAt(0); 1427 SequenceI gappedseq = new Sequence(profileseq); 1428 gappedseq.insertCharAt(5, al.getGapCharacter()); 1429 gappedseq.insertCharAt(6, al.getGapCharacter()); 1430 gappedseq.insertCharAt(7, al.getGapCharacter()); 1431 gappedseq.insertCharAt(8, al.getGapCharacter()); 1432 1433 // create an alignment view with the gapped sequence 1434 SequenceI[] seqs = new SequenceI[1]; 1435 seqs[0] = gappedseq; 1436 AlignmentI newal = new Alignment(seqs); 1437 1438 // hide columns so that some overlap with the gaps 1439 HiddenColumns hidden = new HiddenColumns(); 1440 hidden.hideColumns(7, 10); 1441 1442 AlignmentView view = new AlignmentView(newal, hidden, null, true, false, 1443 false); 1444 1445 // confirm that original contigs are as expected 1446 Iterator<int[]> visible = hidden.getVisContigsIterator(0, 20, false); 1447 int[] region = visible.next(); 1448 assertEquals("[0, 6]", Arrays.toString(region)); 1449 region = visible.next(); 1450 assertEquals("[11, 19]", Arrays.toString(region)); 1451 assertFalse(visible.hasNext()); 1452 1453 // propagate insertions 1454 HiddenColumns result = al.propagateInsertions(profileseq, view); 1455 1456 // confirm that the contigs have changed to account for the gaps 1457 visible = result.getVisContigsIterator(0, 20, false); 1458 region = visible.next(); 1459 assertEquals("[0, 4]", Arrays.toString(region)); 1460 region = visible.next(); 1461 assertEquals("[7, 19]", Arrays.toString(region)); 1462 assertFalse(visible.hasNext()); 1463 1464 // confirm the alignment has been changed so that the other sequences have 1465 // gaps inserted where the columns are hidden 1466 assertFalse(Comparison.isGap(al.getSequenceAt(1).getSequence()[4])); 1467 assertTrue(Comparison.isGap(al.getSequenceAt(1).getSequence()[5])); 1468 assertTrue(Comparison.isGap(al.getSequenceAt(1).getSequence()[6])); 1469 assertFalse(Comparison.isGap(al.getSequenceAt(1).getSequence()[7])); 1470 } 1471 1472 @Test(groups = { "Functional" }) testPadGaps()1473 public void testPadGaps() 1474 { 1475 SequenceI seq1 = new Sequence("seq1", "ABCDEF--"); 1476 SequenceI seq2 = new Sequence("seq2", "-JKLMNO--"); 1477 SequenceI seq3 = new Sequence("seq2", "-PQR"); 1478 AlignmentI a = new Alignment(new SequenceI[] { seq1, seq2, seq3 }); 1479 a.setGapCharacter('.'); // this replaces existing gaps 1480 assertEquals("ABCDEF..", seq1.getSequenceAsString()); 1481 a.padGaps(); 1482 // trailing gaps are pruned, short sequences padded with gap character 1483 assertEquals("ABCDEF.", seq1.getSequenceAsString()); 1484 assertEquals(".JKLMNO", seq2.getSequenceAsString()); 1485 assertEquals(".PQR...", seq3.getSequenceAsString()); 1486 } 1487 1488 /** 1489 * Test for setHiddenColumns, to check it returns true if the hidden columns 1490 * have changed, else false 1491 */ 1492 @Test(groups = { "Functional" }) testSetHiddenColumns()1493 public void testSetHiddenColumns() 1494 { 1495 AlignmentI al = new Alignment(new SequenceI[] {}); 1496 assertFalse(al.getHiddenColumns().hasHiddenColumns()); 1497 1498 HiddenColumns hc = new HiddenColumns(); 1499 assertFalse(al.setHiddenColumns(hc)); // no change 1500 assertSame(hc, al.getHiddenColumns()); 1501 1502 hc.hideColumns(2, 4); 1503 assertTrue(al.getHiddenColumns().hasHiddenColumns()); 1504 1505 /* 1506 * set a different object but with the same columns hidden 1507 */ 1508 HiddenColumns hc2 = new HiddenColumns(); 1509 hc2.hideColumns(2, 4); 1510 assertFalse(al.setHiddenColumns(hc2)); // no change 1511 assertSame(hc2, al.getHiddenColumns()); 1512 1513 assertTrue(al.setHiddenColumns(null)); 1514 assertNull(al.getHiddenColumns()); 1515 assertTrue(al.setHiddenColumns(hc)); 1516 assertSame(hc, al.getHiddenColumns()); 1517 1518 al.getHiddenColumns().hideColumns(10, 12); 1519 hc2.hideColumns(10, 12); 1520 assertFalse(al.setHiddenColumns(hc2)); // no change 1521 1522 /* 1523 * hide columns 15-16 then 17-18 in hc 1524 * hide columns 15-18 in hc2 1525 * these are not now 'equal' objects even though they 1526 * represent the same set of columns 1527 */ 1528 assertSame(hc2, al.getHiddenColumns()); 1529 hc.hideColumns(15, 16); 1530 hc.hideColumns(17, 18); 1531 hc2.hideColumns(15, 18); 1532 assertFalse(hc.equals(hc2)); 1533 assertTrue(al.setHiddenColumns(hc)); // 'changed' 1534 } 1535 1536 @Test(groups = { "Functional" }) testGetWidth()1537 public void testGetWidth() 1538 { 1539 SequenceI seq1 = new Sequence("seq1", "ABCDEF--"); 1540 SequenceI seq2 = new Sequence("seq2", "-JKLMNO--"); 1541 SequenceI seq3 = new Sequence("seq2", "-PQR"); 1542 AlignmentI a = new Alignment(new SequenceI[] { seq1, seq2, seq3 }); 1543 1544 assertEquals(9, a.getWidth()); 1545 1546 // width includes hidden columns 1547 a.getHiddenColumns().hideColumns(2, 5); 1548 assertEquals(9, a.getWidth()); 1549 } 1550 1551 @Test(groups = { "Functional" }) testGetVisibleWidth()1552 public void testGetVisibleWidth() 1553 { 1554 SequenceI seq1 = new Sequence("seq1", "ABCDEF--"); 1555 SequenceI seq2 = new Sequence("seq2", "-JKLMNO--"); 1556 SequenceI seq3 = new Sequence("seq2", "-PQR"); 1557 AlignmentI a = new Alignment(new SequenceI[] { seq1, seq2, seq3 }); 1558 1559 assertEquals(9, a.getVisibleWidth()); 1560 1561 // width excludes hidden columns 1562 a.getHiddenColumns().hideColumns(2, 5); 1563 assertEquals(5, a.getVisibleWidth()); 1564 } 1565 } 1566