1# Copyright 2009-2017 by Peter Cock. All rights reserved. 2# This code is part of the Biopython distribution and governed by its 3# license. Please see the LICENSE file that should have been included 4# as part of this package. 5"""SeqFeature related tests for SeqRecord objects from Bio.SeqIO. 6 7Initially this takes matched tests of GenBank and FASTA files from the NCBI 8and confirms they are consistent using our different parsers. 9""" 10import unittest 11 12from Bio import SeqIO 13from Bio.Seq import MutableSeq 14from Bio.Seq import Seq 15from Bio.SeqFeature import AfterPosition 16from Bio.SeqFeature import BeforePosition 17from Bio.SeqFeature import ExactPosition 18from Bio.SeqFeature import FeatureLocation 19from Bio.SeqFeature import OneOfPosition 20from Bio.SeqFeature import SeqFeature 21from Bio.SeqFeature import WithinPosition 22from Bio.SeqRecord import SeqRecord 23 24 25class SeqRecordCreation(unittest.TestCase): 26 """Test basic creation of SeqRecords.""" 27 28 def test_annotations(self): 29 """Pass in annotations to SeqRecords.""" 30 rec = SeqRecord(Seq("ACGT"), id="Test", name="Test", description="Test") 31 self.assertEqual(rec.annotations, {}) 32 rec = SeqRecord( 33 Seq("ACGT"), 34 id="Test", 35 name="Test", 36 description="Test", 37 annotations={"test": ["a test"]}, 38 ) 39 self.assertEqual(rec.annotations["test"], ["a test"]) 40 41 def test_letter_annotations(self): 42 """Pass in letter annotations to SeqRecords.""" 43 rec = SeqRecord(Seq("ACGT"), id="Test", name="Test", description="Test") 44 self.assertEqual(rec.annotations, {}) 45 rec = SeqRecord( 46 Seq("ACGT"), 47 id="Test", 48 name="Test", 49 description="Test", 50 letter_annotations={"test": [1, 2, 3, 4]}, 51 ) 52 self.assertEqual(rec.letter_annotations["test"], [1, 2, 3, 4]) 53 # Now try modifying it to a bad value... 54 try: 55 rec.letter_annotations["bad"] = "abc" 56 self.fail("Adding a bad letter_annotation should fail!") 57 except (TypeError, ValueError) as e: 58 pass 59 # Now try setting it afterwards to a bad value... 60 rec = SeqRecord(Seq("ACGT"), id="Test", name="Test", description="Test") 61 try: 62 rec.letter_annotations = {"test": [1, 2, 3]} 63 self.fail("Changing to bad letter_annotations should fail!") 64 except (TypeError, ValueError) as e: 65 pass 66 # Now try setting it at creation time to a bad value... 67 try: 68 rec = SeqRecord( 69 Seq("ACGT"), 70 id="Test", 71 name="Test", 72 description="Test", 73 letter_annotations={"test": [1, 2, 3]}, 74 ) 75 self.fail("Wrong length letter_annotations should fail!") 76 except (TypeError, ValueError) as e: 77 pass 78 79 def test_replacing_seq(self): 80 """Replacing .seq if .letter_annotation present.""" 81 rec = SeqRecord( 82 Seq("ACGT"), 83 id="Test", 84 name="Test", 85 description="Test", 86 letter_annotations={"example": [1, 2, 3, 4]}, 87 ) 88 try: 89 rec.seq = Seq("ACGTACGT") 90 self.fail( 91 "Changing .seq length with letter_annotations present should fail!" 92 ) 93 except ValueError as e: 94 self.assertEqual(str(e), "You must empty the letter annotations first!") 95 # Check we can replace IF the length is the same 96 self.assertEqual(rec.seq, "ACGT") 97 self.assertEqual(rec.letter_annotations, {"example": [1, 2, 3, 4]}) 98 rec.seq = Seq("NNNN") 99 self.assertEqual(rec.seq, "NNNN") 100 self.assertEqual(rec.letter_annotations, {"example": [1, 2, 3, 4]}) 101 102 def test_valid_id(self): 103 with self.assertRaises(TypeError): 104 SeqRecord(Seq("ACGT"), id={}) 105 106 def test_valid_name(self): 107 with self.assertRaises(TypeError): 108 SeqRecord(Seq("ACGT"), name={}) 109 110 def test_valid_description(self): 111 with self.assertRaises(TypeError): 112 SeqRecord(Seq("ACGT"), description={}) 113 114 def test_valid_dbxrefs(self): 115 with self.assertRaises(TypeError): 116 SeqRecord(Seq("ACGT"), dbxrefs={}) 117 118 def test_valid_annotations(self): 119 with self.assertRaises(TypeError): 120 SeqRecord(Seq("ACGT"), annotations=[]) 121 122 def test_valid_features(self): 123 with self.assertRaises(TypeError): 124 SeqRecord(Seq("ACGT"), features={}) 125 126 127class SeqRecordMethods(unittest.TestCase): 128 """Test SeqRecord methods.""" 129 130 def setUp(self): 131 f0 = SeqFeature( 132 FeatureLocation(0, 26), 133 type="source", 134 qualifiers={"mol_type": ["fake protein"]}, 135 ) 136 f1 = SeqFeature(FeatureLocation(0, ExactPosition(10))) 137 f2 = SeqFeature( 138 FeatureLocation(WithinPosition(12, left=12, right=15), BeforePosition(22)) 139 ) 140 f3 = SeqFeature( 141 FeatureLocation( 142 AfterPosition(16), 143 OneOfPosition(26, [ExactPosition(25), AfterPosition(26)]), 144 ) 145 ) 146 self.record = SeqRecord( 147 Seq("ABCDEFGHIJKLMNOPQRSTUVWZYX"), 148 id="TestID", 149 name="TestName", 150 description="TestDescr", 151 dbxrefs=["TestXRef"], 152 annotations={"k": "v"}, 153 letter_annotations={"fake": "X" * 26}, 154 features=[f0, f1, f2, f3], 155 ) 156 157 def test_iter(self): 158 for amino in self.record: 159 self.assertEqual("A", amino) 160 break 161 162 def test_contains(self): 163 self.assertIn(Seq("ABC"), self.record) 164 165 def test_str(self): 166 expected = """ 167ID: TestID 168Name: TestName 169Description: TestDescr 170Database cross-references: TestXRef 171Number of features: 4 172/k=v 173Per letter annotation for: fake 174Seq('ABCDEFGHIJKLMNOPQRSTUVWZYX')""" 175 self.assertEqual(expected.lstrip(), str(self.record)) 176 177 def test_repr(self): 178 expected = ( 179 "SeqRecord(seq=Seq('ABCDEFGHIJKLMNOPQRSTUVWZYX'), " 180 "id='TestID', name='TestName', description='TestDescr', dbxrefs=['TestXRef'])" 181 ) 182 self.assertEqual(expected, repr(self.record)) 183 184 def test_format(self): 185 expected = ">TestID TestDescr\nABCDEFGHIJKLMNOPQRSTUVWZYX\n" 186 self.assertEqual(expected, self.record.format("fasta")) 187 188 def test_format_str(self): 189 expected = ">TestID TestDescr\nABCDEFGHIJKLMNOPQRSTUVWZYX\n" 190 self.assertEqual(expected, f"{self.record:fasta}") 191 192 def test_format_str_binary(self): 193 with self.assertRaisesRegex( 194 ValueError, "Binary format sff cannot be used with SeqRecord format method" 195 ): 196 f"{self.record:sff}" 197 198 def test_format_spaces(self): 199 rec = SeqRecord( 200 Seq("ABCDEFGHIJKLMNOPQRSTUVWZYX"), 201 id="TestID", 202 name="TestName", 203 description="TestDescr", 204 ) 205 rec.description = "TestDescr with5spaces" 206 expected = ">TestID TestDescr with5spaces\nABCDEFGHIJKLMNOPQRSTUVWZYX\n" 207 self.assertEqual(expected, rec.format("fasta")) 208 209 def test_upper(self): 210 self.assertEqual("ABCDEFGHIJKLMNOPQRSTUVWZYX", self.record.lower().upper().seq) 211 212 def test_lower(self): 213 self.assertEqual("abcdefghijklmnopqrstuvwzyx", self.record.lower().seq) 214 215 def test_slicing(self): 216 self.assertEqual("B", self.record[1]) 217 self.assertEqual("BC", self.record[1:3].seq) 218 with self.assertRaises(ValueError): 219 c = self.record["a"].seq 220 221 def test_slice_variants(self): 222 """Simple slices using different start/end values.""" 223 for start in list(range(-30, 30)) + [None]: 224 for end in list(range(-30, 30)) + [None]: 225 if start is None and end is None: 226 continue 227 rec = self.record[start:end] 228 seq = self.record.seq[start:end] 229 seq_str = str(self.record.seq)[start:end] 230 self.assertEqual(seq_str, str(seq)) 231 self.assertEqual(seq_str, str(rec.seq)) 232 self.assertEqual("X" * len(seq_str), rec.letter_annotations["fake"]) 233 234 def test_slice_simple(self): 235 """Simple slice.""" 236 rec = self.record 237 self.assertEqual(len(rec), 26) 238 left = rec[:10] 239 self.assertEqual(left.seq, rec.seq[:10]) 240 right = rec[-10:] 241 self.assertEqual(right.seq, rec.seq[-10:]) 242 mid = rec[12:22] 243 self.assertEqual(mid.seq, rec.seq[12:22]) 244 for sub in [left, right, mid]: 245 self.assertEqual(len(sub), 10) 246 self.assertEqual(sub.id, "TestID") 247 self.assertEqual(sub.name, "TestName") 248 self.assertEqual(sub.description, "TestDescr") 249 self.assertEqual(sub.letter_annotations, {"fake": "X" * 10}) 250 self.assertEqual(sub.dbxrefs, []) # May change this... 251 self.assertEqual(sub.annotations, {}) # May change this... 252 self.assertEqual(len(sub.features), 1) 253 # By construction, each feature matches the full sliced region: 254 self.assertEqual(sub.features[0].extract(sub.seq), sub.seq) 255 self.assertEqual(sub.features[0].extract(sub.seq), sub.seq) 256 257 def test_slice_zero(self): 258 """Zero slice.""" 259 rec = self.record 260 self.assertEqual(len(rec), 26) 261 self.assertEqual(len(rec[2:-2]), 22) 262 self.assertEqual(len(rec[5:2]), 0) 263 self.assertEqual(len(rec[5:2][2:-2]), 0) 264 265 def test_add_simple(self): 266 """Simple addition.""" 267 rec = self.record + self.record 268 self.assertEqual(len(rec), 52) 269 self.assertEqual(rec.id, "TestID") 270 self.assertEqual(rec.name, "TestName") 271 self.assertEqual(rec.description, "TestDescr") 272 self.assertEqual(rec.dbxrefs, ["TestXRef"]) 273 self.assertEqual(rec.annotations, {"k": "v"}) 274 self.assertEqual(rec.letter_annotations, {"fake": "X" * 52}) 275 self.assertEqual(len(rec.features), 2 * len(self.record.features)) 276 277 def test_add_seq(self): 278 """Simple addition of Seq or string.""" 279 for other in [Seq("BIO"), "BIO"]: 280 rec = self.record + other # will use SeqRecord's __add__ method 281 self.assertEqual(len(rec), 26 + 3) 282 self.assertEqual(rec.seq, str(self.record.seq) + "BIO") 283 self.assertEqual(rec.id, "TestID") 284 self.assertEqual(rec.name, "TestName") 285 self.assertEqual(rec.description, "TestDescr") 286 self.assertEqual(rec.dbxrefs, ["TestXRef"]) 287 self.assertEqual(rec.annotations, {"k": "v"}) 288 self.assertEqual(rec.letter_annotations, {}) 289 self.assertEqual(len(rec.features), len(self.record.features)) 290 self.assertEqual(rec.features[0].type, "source") 291 self.assertEqual(rec.features[0].location.nofuzzy_start, 0) 292 self.assertEqual(rec.features[0].location.nofuzzy_end, 26) # not +3 293 294 def test_add_seqrecord(self): 295 """Simple left addition of SeqRecord from genbank file.""" 296 other = SeqIO.read("GenBank/dbsource_wrap.gb", "gb") 297 other.dbxrefs = ["dummy"] 298 rec = self.record + other 299 self.assertEqual(len(rec), len(self.record) + len(other)) 300 self.assertEqual(rec.seq, self.record.seq + other.seq) 301 self.assertEqual(rec.id, "<unknown id>") 302 self.assertEqual(rec.name, "<unknown name>") 303 self.assertEqual(rec.description, "<unknown description>") 304 self.assertEqual(rec.dbxrefs, ["TestXRef", "dummy"]) 305 self.assertEqual(len(rec.annotations), 0) 306 self.assertEqual(len(rec.letter_annotations), 0) 307 self.assertEqual( 308 len(rec.features), len(self.record.features) + len(other.features) 309 ) 310 self.assertEqual(rec.features[0].type, "source") 311 self.assertEqual(rec.features[0].location.nofuzzy_start, 0) 312 self.assertEqual( 313 rec.features[0].location.nofuzzy_end, len(self.record) 314 ) # not +3 315 i = len(self.record.features) 316 self.assertEqual(rec.features[i].type, "source") 317 self.assertEqual(rec.features[i].location.nofuzzy_start, len(self.record)) 318 self.assertEqual(rec.features[i].location.nofuzzy_end, len(rec)) 319 320 def test_add_seq_left(self): 321 """Simple left addition of Seq or string.""" 322 for other in [Seq("BIO"), "BIO"]: 323 rec = other + self.record # will use SeqRecord's __radd__ method 324 self.assertEqual(len(rec), 26 + 3) 325 self.assertEqual(rec.seq, "BIO" + self.record.seq) 326 self.assertEqual(rec.id, "TestID") 327 self.assertEqual(rec.name, "TestName") 328 self.assertEqual(rec.description, "TestDescr") 329 self.assertEqual(rec.dbxrefs, ["TestXRef"]) 330 self.assertEqual(rec.annotations, {"k": "v"}) 331 self.assertEqual(rec.letter_annotations, {}) 332 self.assertEqual(len(rec.features), len(self.record.features)) 333 self.assertEqual(rec.features[0].type, "source") 334 self.assertEqual(rec.features[0].location.nofuzzy_start, 3) 335 self.assertEqual(rec.features[0].location.nofuzzy_end, 26 + 3) 336 337 def test_slice_add_simple(self): 338 """Simple slice and add.""" 339 for cut in range(27): 340 rec = self.record[:cut] + self.record[cut:] 341 self.assertEqual(rec.seq, self.record.seq) 342 self.assertEqual(len(rec), 26) 343 self.assertEqual(rec.id, "TestID") 344 self.assertEqual(rec.name, "TestName") 345 self.assertEqual(rec.description, "TestDescr") 346 self.assertEqual(rec.dbxrefs, []) # May change this... 347 self.assertEqual(rec.annotations, {}) # May change this... 348 self.assertEqual(rec.letter_annotations, {"fake": "X" * 26}) 349 self.assertLessEqual(len(rec.features), len(self.record.features)) 350 351 def test_slice_add_shift(self): 352 """Simple slice and add to shift.""" 353 for cut in range(27): 354 rec = self.record[cut:] + self.record[:cut] 355 self.assertEqual(rec.seq, self.record.seq[cut:] + self.record.seq[:cut]) 356 self.assertEqual(len(rec), 26) 357 self.assertEqual(rec.id, "TestID") 358 self.assertEqual(rec.name, "TestName") 359 self.assertEqual(rec.description, "TestDescr") 360 self.assertEqual(rec.dbxrefs, []) # May change this... 361 self.assertEqual(rec.annotations, {}) # May change this... 362 self.assertEqual(rec.letter_annotations, {"fake": "X" * 26}) 363 self.assertLessEqual(len(rec.features), len(self.record.features)) 364 365 366class SeqRecordMethodsMore(unittest.TestCase): 367 """Test SeqRecord methods cont.""" 368 369 # This class does not have a setUp defining self.record 370 371 def test_reverse_complement_seq(self): 372 s = SeqRecord( 373 Seq("ACTG"), 374 id="TestID", 375 name="TestName", 376 description="TestDescription", 377 dbxrefs=["TestDbxrefs"], 378 features=[SeqFeature(FeatureLocation(0, 3), type="Site")], 379 annotations={"organism": "bombyx"}, 380 letter_annotations={"test": "abcd"}, 381 ) 382 rc = s.reverse_complement( 383 id=True, 384 name=True, 385 description=True, 386 dbxrefs=True, 387 features=True, 388 annotations=True, 389 letter_annotations=True, 390 ) 391 392 self.assertEqual("CAGT", rc.seq) 393 self.assertEqual("TestID", rc.id) 394 self.assertEqual("TestID", s.reverse_complement(id="TestID").id) 395 396 self.assertEqual("TestName", rc.name) 397 self.assertEqual("TestName", s.reverse_complement(name="TestName").name) 398 399 self.assertEqual("TestDescription", rc.description) 400 self.assertEqual( 401 "TestDescription", 402 s.reverse_complement(description="TestDescription").description, 403 ) 404 405 self.assertEqual(["TestDbxrefs"], rc.dbxrefs) 406 self.assertEqual( 407 ["TestDbxrefs"], s.reverse_complement(dbxrefs=["TestDbxrefs"]).dbxrefs 408 ) 409 410 self.assertEqual( 411 "[SeqFeature(FeatureLocation(ExactPosition(1), ExactPosition(4)), type='Site')]", 412 repr(rc.features), 413 ) 414 rc2 = s.reverse_complement( 415 features=[SeqFeature(FeatureLocation(1, 4), type="Site")] 416 ) 417 self.assertEqual( 418 "[SeqFeature(FeatureLocation(ExactPosition(1), ExactPosition(4)), type='Site')]", 419 repr(rc2.features), 420 ) 421 422 self.assertEqual({"organism": "bombyx"}, rc.annotations) 423 self.assertEqual( 424 {"organism": "bombyx"}, 425 s.reverse_complement(annotations={"organism": "bombyx"}).annotations, 426 ) 427 428 self.assertEqual({"test": "dcba"}, rc.letter_annotations) 429 self.assertEqual( 430 {"test": "abcd"}, 431 s.reverse_complement( 432 letter_annotations={"test": "abcd"} 433 ).letter_annotations, 434 ) 435 436 def test_reverse_complement_mutable_seq(self): 437 s = SeqRecord(MutableSeq("ACTG")) 438 self.assertEqual("CAGT", s.reverse_complement().seq) 439 440 def test_translate(self): 441 s = SeqRecord( 442 Seq("ATGGTGTAA"), 443 id="TestID", 444 name="TestName", 445 description="TestDescription", 446 dbxrefs=["TestDbxrefs"], 447 features=[SeqFeature(FeatureLocation(0, 3), type="Site")], 448 annotations={"organism": "bombyx"}, 449 letter_annotations={"test": "abcdefghi"}, 450 ) 451 452 t = s.translate() 453 self.assertEqual(t.seq, "MV*") 454 self.assertEqual(t.id, "<unknown id>") 455 self.assertEqual(t.name, "<unknown name>") 456 self.assertEqual(t.description, "<unknown description>") 457 self.assertFalse(t.dbxrefs) 458 self.assertFalse(t.features) 459 self.assertEqual(t.annotations, {"molecule_type": "protein"}) 460 self.assertFalse(t.letter_annotations) 461 462 t = s.translate( 463 cds=True, 464 id=True, 465 name=True, 466 description=True, 467 dbxrefs=True, 468 annotations=True, 469 ) 470 self.assertEqual(t.seq, "MV") 471 self.assertEqual(t.id, "TestID") 472 self.assertEqual(t.name, "TestName") 473 self.assertEqual(t.description, "TestDescription") 474 self.assertEqual(t.dbxrefs, ["TestDbxrefs"]) 475 self.assertFalse(t.features) 476 self.assertEqual( 477 t.annotations, {"organism": "bombyx", "molecule_type": "protein"} 478 ) 479 self.assertFalse(t.letter_annotations) 480 481 def test_lt_exception(self): 482 def lt(): 483 return SeqRecord(Seq("A")) < SeqRecord(Seq("A")) 484 485 self.assertRaises(NotImplementedError, lt) 486 487 def test_le_exception(self): 488 def le(): 489 return SeqRecord(Seq("A")) <= SeqRecord(Seq("A")) 490 491 self.assertRaises(NotImplementedError, le) 492 493 def test_eq_exception(self): 494 def equality(): 495 return SeqRecord(Seq("A")) == SeqRecord(Seq("A")) 496 497 self.assertRaises(NotImplementedError, equality) 498 499 def test_ne_exception(self): 500 def notequality(): 501 return SeqRecord(Seq("A")) != SeqRecord(Seq("A")) 502 503 self.assertRaises(NotImplementedError, notequality) 504 505 def test_gt_exception(self): 506 def gt(): 507 return SeqRecord(Seq("A")) > SeqRecord(Seq("A")) 508 509 self.assertRaises(NotImplementedError, gt) 510 511 def test_ge_exception(self): 512 def ge(): 513 return SeqRecord(Seq("A")) >= SeqRecord(Seq("A")) 514 515 self.assertRaises(NotImplementedError, ge) 516 517 def test_hash_exception(self): 518 def hash1(): 519 hash(SeqRecord(Seq("A"))) 520 521 self.assertRaises(TypeError, hash1) 522 523 def hash2(): 524 SeqRecord(Seq("A")).__hash__() 525 526 self.assertRaises(TypeError, hash2) 527 528 529class TestTranslation(unittest.TestCase): 530 def setUp(self): 531 self.s = SeqRecord( 532 Seq("ATGGTGTAA"), 533 id="TestID", 534 name="TestName", 535 description="TestDescription", 536 dbxrefs=["TestDbxrefs"], 537 features=[SeqFeature(FeatureLocation(0, 3), type="Site")], 538 annotations={"organism": "bombyx"}, 539 letter_annotations={"test": "abcdefghi"}, 540 ) 541 542 def test_defaults(self): 543 t = self.s.translate() 544 self.assertEqual(t.seq, "MV*") 545 self.assertEqual(t.id, "<unknown id>") 546 self.assertEqual(t.name, "<unknown name>") 547 self.assertEqual(t.description, "<unknown description>") 548 self.assertFalse(t.dbxrefs) 549 self.assertFalse(t.features) 550 self.assertEqual(t.annotations, {"molecule_type": "protein"}) 551 self.assertFalse(t.letter_annotations) 552 553 def test_preserve(self): 554 t = self.s.translate( 555 cds=True, 556 id=True, 557 name=True, 558 description=True, 559 dbxrefs=True, 560 annotations=True, 561 ) 562 self.assertEqual(t.seq, "MV") 563 self.assertEqual(t.id, "TestID") 564 self.assertEqual(t.name, "TestName") 565 self.assertEqual(t.description, "TestDescription") 566 self.assertEqual(t.dbxrefs, ["TestDbxrefs"]) 567 self.assertFalse(t.features) 568 self.assertEqual( 569 t.annotations, {"organism": "bombyx", "molecule_type": "protein"} 570 ) 571 self.assertFalse(t.letter_annotations) 572 573 # Should not preserve these 574 self.assertRaises(TypeError, self.s.translate, features=True) 575 self.assertRaises(TypeError, self.s.translate, letter_annotations=True) 576 577 def test_new_annot(self): 578 t = self.s.translate( 579 1, 580 to_stop=True, 581 gap="-", 582 id="Foo", 583 name="Bar", 584 description="Baz", 585 dbxrefs=["Nope"], 586 features=[SeqFeature(FeatureLocation(0, 3), type="Site")], 587 annotations={"a": "team"}, 588 letter_annotations={"aa": ["Met", "Val"]}, 589 ) 590 self.assertEqual(t.seq, "MV") 591 self.assertEqual(t.id, "Foo") 592 self.assertEqual(t.name, "Bar") 593 self.assertEqual(t.description, "Baz") 594 self.assertEqual(t.dbxrefs, ["Nope"]) 595 self.assertEqual(len(t.features), 1) 596 self.assertEqual(t.annotations, {"a": "team", "molecule_type": "protein"}) 597 self.assertEqual(t.letter_annotations, {"aa": ["Met", "Val"]}) 598 599 600if __name__ == "__main__": 601 runner = unittest.TextTestRunner(verbosity=2) 602 unittest.main(testRunner=runner) 603