1# Copyright 2009-2017 by Peter Cock.  All rights reserved.
2# This code is part of the Biopython distribution and governed by its
3# license.  Please see the LICENSE file that should have been included
4# as part of this package.
5"""SeqFeature related tests for SeqRecord objects from Bio.SeqIO.
6
7Initially this takes matched tests of GenBank and FASTA files from the NCBI
8and confirms they are consistent using our different parsers.
9"""
10import unittest
11
12from Bio import SeqIO
13from Bio.Seq import MutableSeq
14from Bio.Seq import Seq
15from Bio.SeqFeature import AfterPosition
16from Bio.SeqFeature import BeforePosition
17from Bio.SeqFeature import ExactPosition
18from Bio.SeqFeature import FeatureLocation
19from Bio.SeqFeature import OneOfPosition
20from Bio.SeqFeature import SeqFeature
21from Bio.SeqFeature import WithinPosition
22from Bio.SeqRecord import SeqRecord
23
24
25class SeqRecordCreation(unittest.TestCase):
26    """Test basic creation of SeqRecords."""
27
28    def test_annotations(self):
29        """Pass in annotations to SeqRecords."""
30        rec = SeqRecord(Seq("ACGT"), id="Test", name="Test", description="Test")
31        self.assertEqual(rec.annotations, {})
32        rec = SeqRecord(
33            Seq("ACGT"),
34            id="Test",
35            name="Test",
36            description="Test",
37            annotations={"test": ["a test"]},
38        )
39        self.assertEqual(rec.annotations["test"], ["a test"])
40
41    def test_letter_annotations(self):
42        """Pass in letter annotations to SeqRecords."""
43        rec = SeqRecord(Seq("ACGT"), id="Test", name="Test", description="Test")
44        self.assertEqual(rec.annotations, {})
45        rec = SeqRecord(
46            Seq("ACGT"),
47            id="Test",
48            name="Test",
49            description="Test",
50            letter_annotations={"test": [1, 2, 3, 4]},
51        )
52        self.assertEqual(rec.letter_annotations["test"], [1, 2, 3, 4])
53        # Now try modifying it to a bad value...
54        try:
55            rec.letter_annotations["bad"] = "abc"
56            self.fail("Adding a bad letter_annotation should fail!")
57        except (TypeError, ValueError) as e:
58            pass
59        # Now try setting it afterwards to a bad value...
60        rec = SeqRecord(Seq("ACGT"), id="Test", name="Test", description="Test")
61        try:
62            rec.letter_annotations = {"test": [1, 2, 3]}
63            self.fail("Changing to bad letter_annotations should fail!")
64        except (TypeError, ValueError) as e:
65            pass
66        # Now try setting it at creation time to a bad value...
67        try:
68            rec = SeqRecord(
69                Seq("ACGT"),
70                id="Test",
71                name="Test",
72                description="Test",
73                letter_annotations={"test": [1, 2, 3]},
74            )
75            self.fail("Wrong length letter_annotations should fail!")
76        except (TypeError, ValueError) as e:
77            pass
78
79    def test_replacing_seq(self):
80        """Replacing .seq if .letter_annotation present."""
81        rec = SeqRecord(
82            Seq("ACGT"),
83            id="Test",
84            name="Test",
85            description="Test",
86            letter_annotations={"example": [1, 2, 3, 4]},
87        )
88        try:
89            rec.seq = Seq("ACGTACGT")
90            self.fail(
91                "Changing .seq length with letter_annotations present should fail!"
92            )
93        except ValueError as e:
94            self.assertEqual(str(e), "You must empty the letter annotations first!")
95        # Check we can replace IF the length is the same
96        self.assertEqual(rec.seq, "ACGT")
97        self.assertEqual(rec.letter_annotations, {"example": [1, 2, 3, 4]})
98        rec.seq = Seq("NNNN")
99        self.assertEqual(rec.seq, "NNNN")
100        self.assertEqual(rec.letter_annotations, {"example": [1, 2, 3, 4]})
101
102    def test_valid_id(self):
103        with self.assertRaises(TypeError):
104            SeqRecord(Seq("ACGT"), id={})
105
106    def test_valid_name(self):
107        with self.assertRaises(TypeError):
108            SeqRecord(Seq("ACGT"), name={})
109
110    def test_valid_description(self):
111        with self.assertRaises(TypeError):
112            SeqRecord(Seq("ACGT"), description={})
113
114    def test_valid_dbxrefs(self):
115        with self.assertRaises(TypeError):
116            SeqRecord(Seq("ACGT"), dbxrefs={})
117
118    def test_valid_annotations(self):
119        with self.assertRaises(TypeError):
120            SeqRecord(Seq("ACGT"), annotations=[])
121
122    def test_valid_features(self):
123        with self.assertRaises(TypeError):
124            SeqRecord(Seq("ACGT"), features={})
125
126
127class SeqRecordMethods(unittest.TestCase):
128    """Test SeqRecord methods."""
129
130    def setUp(self):
131        f0 = SeqFeature(
132            FeatureLocation(0, 26),
133            type="source",
134            qualifiers={"mol_type": ["fake protein"]},
135        )
136        f1 = SeqFeature(FeatureLocation(0, ExactPosition(10)))
137        f2 = SeqFeature(
138            FeatureLocation(WithinPosition(12, left=12, right=15), BeforePosition(22))
139        )
140        f3 = SeqFeature(
141            FeatureLocation(
142                AfterPosition(16),
143                OneOfPosition(26, [ExactPosition(25), AfterPosition(26)]),
144            )
145        )
146        self.record = SeqRecord(
147            Seq("ABCDEFGHIJKLMNOPQRSTUVWZYX"),
148            id="TestID",
149            name="TestName",
150            description="TestDescr",
151            dbxrefs=["TestXRef"],
152            annotations={"k": "v"},
153            letter_annotations={"fake": "X" * 26},
154            features=[f0, f1, f2, f3],
155        )
156
157    def test_iter(self):
158        for amino in self.record:
159            self.assertEqual("A", amino)
160            break
161
162    def test_contains(self):
163        self.assertIn(Seq("ABC"), self.record)
164
165    def test_str(self):
166        expected = """
167ID: TestID
168Name: TestName
169Description: TestDescr
170Database cross-references: TestXRef
171Number of features: 4
172/k=v
173Per letter annotation for: fake
174Seq('ABCDEFGHIJKLMNOPQRSTUVWZYX')"""
175        self.assertEqual(expected.lstrip(), str(self.record))
176
177    def test_repr(self):
178        expected = (
179            "SeqRecord(seq=Seq('ABCDEFGHIJKLMNOPQRSTUVWZYX'), "
180            "id='TestID', name='TestName', description='TestDescr', dbxrefs=['TestXRef'])"
181        )
182        self.assertEqual(expected, repr(self.record))
183
184    def test_format(self):
185        expected = ">TestID TestDescr\nABCDEFGHIJKLMNOPQRSTUVWZYX\n"
186        self.assertEqual(expected, self.record.format("fasta"))
187
188    def test_format_str(self):
189        expected = ">TestID TestDescr\nABCDEFGHIJKLMNOPQRSTUVWZYX\n"
190        self.assertEqual(expected, f"{self.record:fasta}")
191
192    def test_format_str_binary(self):
193        with self.assertRaisesRegex(
194            ValueError, "Binary format sff cannot be used with SeqRecord format method"
195        ):
196            f"{self.record:sff}"
197
198    def test_format_spaces(self):
199        rec = SeqRecord(
200            Seq("ABCDEFGHIJKLMNOPQRSTUVWZYX"),
201            id="TestID",
202            name="TestName",
203            description="TestDescr",
204        )
205        rec.description = "TestDescr     with5spaces"
206        expected = ">TestID TestDescr     with5spaces\nABCDEFGHIJKLMNOPQRSTUVWZYX\n"
207        self.assertEqual(expected, rec.format("fasta"))
208
209    def test_upper(self):
210        self.assertEqual("ABCDEFGHIJKLMNOPQRSTUVWZYX", self.record.lower().upper().seq)
211
212    def test_lower(self):
213        self.assertEqual("abcdefghijklmnopqrstuvwzyx", self.record.lower().seq)
214
215    def test_slicing(self):
216        self.assertEqual("B", self.record[1])
217        self.assertEqual("BC", self.record[1:3].seq)
218        with self.assertRaises(ValueError):
219            c = self.record["a"].seq
220
221    def test_slice_variants(self):
222        """Simple slices using different start/end values."""
223        for start in list(range(-30, 30)) + [None]:
224            for end in list(range(-30, 30)) + [None]:
225                if start is None and end is None:
226                    continue
227                rec = self.record[start:end]
228                seq = self.record.seq[start:end]
229                seq_str = str(self.record.seq)[start:end]
230                self.assertEqual(seq_str, str(seq))
231                self.assertEqual(seq_str, str(rec.seq))
232                self.assertEqual("X" * len(seq_str), rec.letter_annotations["fake"])
233
234    def test_slice_simple(self):
235        """Simple slice."""
236        rec = self.record
237        self.assertEqual(len(rec), 26)
238        left = rec[:10]
239        self.assertEqual(left.seq, rec.seq[:10])
240        right = rec[-10:]
241        self.assertEqual(right.seq, rec.seq[-10:])
242        mid = rec[12:22]
243        self.assertEqual(mid.seq, rec.seq[12:22])
244        for sub in [left, right, mid]:
245            self.assertEqual(len(sub), 10)
246            self.assertEqual(sub.id, "TestID")
247            self.assertEqual(sub.name, "TestName")
248            self.assertEqual(sub.description, "TestDescr")
249            self.assertEqual(sub.letter_annotations, {"fake": "X" * 10})
250            self.assertEqual(sub.dbxrefs, [])  # May change this...
251            self.assertEqual(sub.annotations, {})  # May change this...
252            self.assertEqual(len(sub.features), 1)
253            # By construction, each feature matches the full sliced region:
254            self.assertEqual(sub.features[0].extract(sub.seq), sub.seq)
255            self.assertEqual(sub.features[0].extract(sub.seq), sub.seq)
256
257    def test_slice_zero(self):
258        """Zero slice."""
259        rec = self.record
260        self.assertEqual(len(rec), 26)
261        self.assertEqual(len(rec[2:-2]), 22)
262        self.assertEqual(len(rec[5:2]), 0)
263        self.assertEqual(len(rec[5:2][2:-2]), 0)
264
265    def test_add_simple(self):
266        """Simple addition."""
267        rec = self.record + self.record
268        self.assertEqual(len(rec), 52)
269        self.assertEqual(rec.id, "TestID")
270        self.assertEqual(rec.name, "TestName")
271        self.assertEqual(rec.description, "TestDescr")
272        self.assertEqual(rec.dbxrefs, ["TestXRef"])
273        self.assertEqual(rec.annotations, {"k": "v"})
274        self.assertEqual(rec.letter_annotations, {"fake": "X" * 52})
275        self.assertEqual(len(rec.features), 2 * len(self.record.features))
276
277    def test_add_seq(self):
278        """Simple addition of Seq or string."""
279        for other in [Seq("BIO"), "BIO"]:
280            rec = self.record + other  # will use SeqRecord's __add__ method
281            self.assertEqual(len(rec), 26 + 3)
282            self.assertEqual(rec.seq, str(self.record.seq) + "BIO")
283            self.assertEqual(rec.id, "TestID")
284            self.assertEqual(rec.name, "TestName")
285            self.assertEqual(rec.description, "TestDescr")
286            self.assertEqual(rec.dbxrefs, ["TestXRef"])
287            self.assertEqual(rec.annotations, {"k": "v"})
288            self.assertEqual(rec.letter_annotations, {})
289            self.assertEqual(len(rec.features), len(self.record.features))
290            self.assertEqual(rec.features[0].type, "source")
291            self.assertEqual(rec.features[0].location.nofuzzy_start, 0)
292            self.assertEqual(rec.features[0].location.nofuzzy_end, 26)  # not +3
293
294    def test_add_seqrecord(self):
295        """Simple left addition of SeqRecord from genbank file."""
296        other = SeqIO.read("GenBank/dbsource_wrap.gb", "gb")
297        other.dbxrefs = ["dummy"]
298        rec = self.record + other
299        self.assertEqual(len(rec), len(self.record) + len(other))
300        self.assertEqual(rec.seq, self.record.seq + other.seq)
301        self.assertEqual(rec.id, "<unknown id>")
302        self.assertEqual(rec.name, "<unknown name>")
303        self.assertEqual(rec.description, "<unknown description>")
304        self.assertEqual(rec.dbxrefs, ["TestXRef", "dummy"])
305        self.assertEqual(len(rec.annotations), 0)
306        self.assertEqual(len(rec.letter_annotations), 0)
307        self.assertEqual(
308            len(rec.features), len(self.record.features) + len(other.features)
309        )
310        self.assertEqual(rec.features[0].type, "source")
311        self.assertEqual(rec.features[0].location.nofuzzy_start, 0)
312        self.assertEqual(
313            rec.features[0].location.nofuzzy_end, len(self.record)
314        )  # not +3
315        i = len(self.record.features)
316        self.assertEqual(rec.features[i].type, "source")
317        self.assertEqual(rec.features[i].location.nofuzzy_start, len(self.record))
318        self.assertEqual(rec.features[i].location.nofuzzy_end, len(rec))
319
320    def test_add_seq_left(self):
321        """Simple left addition of Seq or string."""
322        for other in [Seq("BIO"), "BIO"]:
323            rec = other + self.record  # will use SeqRecord's __radd__ method
324            self.assertEqual(len(rec), 26 + 3)
325            self.assertEqual(rec.seq, "BIO" + self.record.seq)
326            self.assertEqual(rec.id, "TestID")
327            self.assertEqual(rec.name, "TestName")
328            self.assertEqual(rec.description, "TestDescr")
329            self.assertEqual(rec.dbxrefs, ["TestXRef"])
330            self.assertEqual(rec.annotations, {"k": "v"})
331            self.assertEqual(rec.letter_annotations, {})
332            self.assertEqual(len(rec.features), len(self.record.features))
333            self.assertEqual(rec.features[0].type, "source")
334            self.assertEqual(rec.features[0].location.nofuzzy_start, 3)
335            self.assertEqual(rec.features[0].location.nofuzzy_end, 26 + 3)
336
337    def test_slice_add_simple(self):
338        """Simple slice and add."""
339        for cut in range(27):
340            rec = self.record[:cut] + self.record[cut:]
341            self.assertEqual(rec.seq, self.record.seq)
342            self.assertEqual(len(rec), 26)
343            self.assertEqual(rec.id, "TestID")
344            self.assertEqual(rec.name, "TestName")
345            self.assertEqual(rec.description, "TestDescr")
346            self.assertEqual(rec.dbxrefs, [])  # May change this...
347            self.assertEqual(rec.annotations, {})  # May change this...
348            self.assertEqual(rec.letter_annotations, {"fake": "X" * 26})
349            self.assertLessEqual(len(rec.features), len(self.record.features))
350
351    def test_slice_add_shift(self):
352        """Simple slice and add to shift."""
353        for cut in range(27):
354            rec = self.record[cut:] + self.record[:cut]
355            self.assertEqual(rec.seq, self.record.seq[cut:] + self.record.seq[:cut])
356            self.assertEqual(len(rec), 26)
357            self.assertEqual(rec.id, "TestID")
358            self.assertEqual(rec.name, "TestName")
359            self.assertEqual(rec.description, "TestDescr")
360            self.assertEqual(rec.dbxrefs, [])  # May change this...
361            self.assertEqual(rec.annotations, {})  # May change this...
362            self.assertEqual(rec.letter_annotations, {"fake": "X" * 26})
363            self.assertLessEqual(len(rec.features), len(self.record.features))
364
365
366class SeqRecordMethodsMore(unittest.TestCase):
367    """Test SeqRecord methods cont."""
368
369    # This class does not have a setUp defining self.record
370
371    def test_reverse_complement_seq(self):
372        s = SeqRecord(
373            Seq("ACTG"),
374            id="TestID",
375            name="TestName",
376            description="TestDescription",
377            dbxrefs=["TestDbxrefs"],
378            features=[SeqFeature(FeatureLocation(0, 3), type="Site")],
379            annotations={"organism": "bombyx"},
380            letter_annotations={"test": "abcd"},
381        )
382        rc = s.reverse_complement(
383            id=True,
384            name=True,
385            description=True,
386            dbxrefs=True,
387            features=True,
388            annotations=True,
389            letter_annotations=True,
390        )
391
392        self.assertEqual("CAGT", rc.seq)
393        self.assertEqual("TestID", rc.id)
394        self.assertEqual("TestID", s.reverse_complement(id="TestID").id)
395
396        self.assertEqual("TestName", rc.name)
397        self.assertEqual("TestName", s.reverse_complement(name="TestName").name)
398
399        self.assertEqual("TestDescription", rc.description)
400        self.assertEqual(
401            "TestDescription",
402            s.reverse_complement(description="TestDescription").description,
403        )
404
405        self.assertEqual(["TestDbxrefs"], rc.dbxrefs)
406        self.assertEqual(
407            ["TestDbxrefs"], s.reverse_complement(dbxrefs=["TestDbxrefs"]).dbxrefs
408        )
409
410        self.assertEqual(
411            "[SeqFeature(FeatureLocation(ExactPosition(1), ExactPosition(4)), type='Site')]",
412            repr(rc.features),
413        )
414        rc2 = s.reverse_complement(
415            features=[SeqFeature(FeatureLocation(1, 4), type="Site")]
416        )
417        self.assertEqual(
418            "[SeqFeature(FeatureLocation(ExactPosition(1), ExactPosition(4)), type='Site')]",
419            repr(rc2.features),
420        )
421
422        self.assertEqual({"organism": "bombyx"}, rc.annotations)
423        self.assertEqual(
424            {"organism": "bombyx"},
425            s.reverse_complement(annotations={"organism": "bombyx"}).annotations,
426        )
427
428        self.assertEqual({"test": "dcba"}, rc.letter_annotations)
429        self.assertEqual(
430            {"test": "abcd"},
431            s.reverse_complement(
432                letter_annotations={"test": "abcd"}
433            ).letter_annotations,
434        )
435
436    def test_reverse_complement_mutable_seq(self):
437        s = SeqRecord(MutableSeq("ACTG"))
438        self.assertEqual("CAGT", s.reverse_complement().seq)
439
440    def test_translate(self):
441        s = SeqRecord(
442            Seq("ATGGTGTAA"),
443            id="TestID",
444            name="TestName",
445            description="TestDescription",
446            dbxrefs=["TestDbxrefs"],
447            features=[SeqFeature(FeatureLocation(0, 3), type="Site")],
448            annotations={"organism": "bombyx"},
449            letter_annotations={"test": "abcdefghi"},
450        )
451
452        t = s.translate()
453        self.assertEqual(t.seq, "MV*")
454        self.assertEqual(t.id, "<unknown id>")
455        self.assertEqual(t.name, "<unknown name>")
456        self.assertEqual(t.description, "<unknown description>")
457        self.assertFalse(t.dbxrefs)
458        self.assertFalse(t.features)
459        self.assertEqual(t.annotations, {"molecule_type": "protein"})
460        self.assertFalse(t.letter_annotations)
461
462        t = s.translate(
463            cds=True,
464            id=True,
465            name=True,
466            description=True,
467            dbxrefs=True,
468            annotations=True,
469        )
470        self.assertEqual(t.seq, "MV")
471        self.assertEqual(t.id, "TestID")
472        self.assertEqual(t.name, "TestName")
473        self.assertEqual(t.description, "TestDescription")
474        self.assertEqual(t.dbxrefs, ["TestDbxrefs"])
475        self.assertFalse(t.features)
476        self.assertEqual(
477            t.annotations, {"organism": "bombyx", "molecule_type": "protein"}
478        )
479        self.assertFalse(t.letter_annotations)
480
481    def test_lt_exception(self):
482        def lt():
483            return SeqRecord(Seq("A")) < SeqRecord(Seq("A"))
484
485        self.assertRaises(NotImplementedError, lt)
486
487    def test_le_exception(self):
488        def le():
489            return SeqRecord(Seq("A")) <= SeqRecord(Seq("A"))
490
491        self.assertRaises(NotImplementedError, le)
492
493    def test_eq_exception(self):
494        def equality():
495            return SeqRecord(Seq("A")) == SeqRecord(Seq("A"))
496
497        self.assertRaises(NotImplementedError, equality)
498
499    def test_ne_exception(self):
500        def notequality():
501            return SeqRecord(Seq("A")) != SeqRecord(Seq("A"))
502
503        self.assertRaises(NotImplementedError, notequality)
504
505    def test_gt_exception(self):
506        def gt():
507            return SeqRecord(Seq("A")) > SeqRecord(Seq("A"))
508
509        self.assertRaises(NotImplementedError, gt)
510
511    def test_ge_exception(self):
512        def ge():
513            return SeqRecord(Seq("A")) >= SeqRecord(Seq("A"))
514
515        self.assertRaises(NotImplementedError, ge)
516
517    def test_hash_exception(self):
518        def hash1():
519            hash(SeqRecord(Seq("A")))
520
521        self.assertRaises(TypeError, hash1)
522
523        def hash2():
524            SeqRecord(Seq("A")).__hash__()
525
526        self.assertRaises(TypeError, hash2)
527
528
529class TestTranslation(unittest.TestCase):
530    def setUp(self):
531        self.s = SeqRecord(
532            Seq("ATGGTGTAA"),
533            id="TestID",
534            name="TestName",
535            description="TestDescription",
536            dbxrefs=["TestDbxrefs"],
537            features=[SeqFeature(FeatureLocation(0, 3), type="Site")],
538            annotations={"organism": "bombyx"},
539            letter_annotations={"test": "abcdefghi"},
540        )
541
542    def test_defaults(self):
543        t = self.s.translate()
544        self.assertEqual(t.seq, "MV*")
545        self.assertEqual(t.id, "<unknown id>")
546        self.assertEqual(t.name, "<unknown name>")
547        self.assertEqual(t.description, "<unknown description>")
548        self.assertFalse(t.dbxrefs)
549        self.assertFalse(t.features)
550        self.assertEqual(t.annotations, {"molecule_type": "protein"})
551        self.assertFalse(t.letter_annotations)
552
553    def test_preserve(self):
554        t = self.s.translate(
555            cds=True,
556            id=True,
557            name=True,
558            description=True,
559            dbxrefs=True,
560            annotations=True,
561        )
562        self.assertEqual(t.seq, "MV")
563        self.assertEqual(t.id, "TestID")
564        self.assertEqual(t.name, "TestName")
565        self.assertEqual(t.description, "TestDescription")
566        self.assertEqual(t.dbxrefs, ["TestDbxrefs"])
567        self.assertFalse(t.features)
568        self.assertEqual(
569            t.annotations, {"organism": "bombyx", "molecule_type": "protein"}
570        )
571        self.assertFalse(t.letter_annotations)
572
573        # Should not preserve these
574        self.assertRaises(TypeError, self.s.translate, features=True)
575        self.assertRaises(TypeError, self.s.translate, letter_annotations=True)
576
577    def test_new_annot(self):
578        t = self.s.translate(
579            1,
580            to_stop=True,
581            gap="-",
582            id="Foo",
583            name="Bar",
584            description="Baz",
585            dbxrefs=["Nope"],
586            features=[SeqFeature(FeatureLocation(0, 3), type="Site")],
587            annotations={"a": "team"},
588            letter_annotations={"aa": ["Met", "Val"]},
589        )
590        self.assertEqual(t.seq, "MV")
591        self.assertEqual(t.id, "Foo")
592        self.assertEqual(t.name, "Bar")
593        self.assertEqual(t.description, "Baz")
594        self.assertEqual(t.dbxrefs, ["Nope"])
595        self.assertEqual(len(t.features), 1)
596        self.assertEqual(t.annotations, {"a": "team", "molecule_type": "protein"})
597        self.assertEqual(t.letter_annotations, {"aa": ["Met", "Val"]})
598
599
600if __name__ == "__main__":
601    runner = unittest.TextTestRunner(verbosity=2)
602    unittest.main(testRunner=runner)
603