1# This code is part of the Biopython distribution and governed by its
2# license.  Please see the LICENSE file that should have been included
3# as part of this package.
4"""Tests for GenomeDiagram general functionality."""
5
6
7import os
8import unittest
9import math
10
11
12# Do we have ReportLab?  Raise error if not present.
13from Bio import MissingPythonDependencyError
14
15try:
16    from reportlab.lib import colors
17    from reportlab.pdfbase import pdfmetrics
18    from reportlab.pdfbase.ttfonts import TTFont
19    from reportlab.lib.units import cm
20except ImportError:
21    raise MissingPythonDependencyError(
22        "Install reportlab if you want to use Bio.Graphics."
23    ) from None
24
25try:
26    # The preferred PIL import has changed over time...
27    try:
28        from PIL import Image
29    except ImportError:
30        import Image
31    from reportlab.graphics import renderPM
32except ImportError:
33    # This is an optional part of ReportLab, so may not be installed.
34    # We'll raise a missing dependency error if rendering to a
35    # bitmap format is attempted.
36    renderPM = None
37
38from Bio import SeqIO
39from Bio.SeqFeature import SeqFeature, FeatureLocation
40
41from Bio.Graphics.GenomeDiagram import FeatureSet, GraphSet, Track, Diagram
42from Bio.Graphics.GenomeDiagram import CrossLink
43from Bio.Graphics.GenomeDiagram._Graph import GraphData
44from Bio.Graphics.GenomeDiagram._Colors import ColorTranslator
45
46from reportlab import rl_config
47
48rl_config.invariant = True
49
50
51def fill_and_border(base_color, alpha=0.5):
52    """Return fill and border colors given a base color."""
53    try:
54        c = base_color.clone()
55        c.alpha = alpha
56        return c, base_color
57    except AttributeError:
58        # Old ReportLab, no transparency and/or no clone
59        return base_color, base_color
60
61
62###############################################################################
63# Utility functions for graph plotting, originally in GenomeDiagram.Utilities #
64# See Bug 2705 for discussion on where to put these functions in Biopython... #
65###############################################################################
66
67
68def apply_to_window(sequence, window_size, function, step=None):
69    """Apply function to windows of the given sequence.
70
71    Returns a list of (position, value) tuples for fragments of the passed
72    sequence of length window_size (stepped by step), calculated by the passed
73    function.  Returned positions are the midpoint of each window.
74
75    - sequence - Bio.Seq.Seq object.
76    - window_size - an integer describing the length of sequence to consider.
77    - step - an integer describing the step to take between windows
78      (default = window_size//2).
79    - function - Method or function that accepts a Bio.Seq.Seq object
80      as its sole argument and returns a single value.
81    """
82    seqlen = len(sequence)  # Total length of sequence to be used
83    if step is None:  # No step specified, so use half window-width or 1 if larger
84        step = max(window_size // 2, 1)
85    else:  # Use specified step, or 1 if greater
86        step = max(step, 1)
87
88    results = []  # Holds (position, value) results
89
90    # Perform the passed function on as many windows as possible, short of
91    # overrunning the sequence
92    pos = 0
93    while pos < seqlen - window_size + 1:
94        # Obtain sequence fragment
95        start, middle, end = pos, (pos + window_size + pos) // 2, pos + window_size
96        fragment = sequence[start:end]
97        # Apply function to the sequence fragment
98        value = function(fragment)
99        results.append((middle, value))  # Add results to list
100        # Advance to next fragment
101        pos += step
102
103    # Use the last available window on the sequence, even if it means
104    # re-covering old ground
105    if pos != seqlen - window_size:
106        # Obtain sequence fragment
107        pos = seqlen - window_size
108        start, middle, end = pos, (pos + window_size + pos) // 2, pos + window_size
109        fragment = sequence[start:end]
110        # Apply function to sequence fragment
111        value = function(fragment)
112        results.append((middle, value))  # Add results to list
113
114    return results  # Return the list of (position, value) results
115
116
117def calc_gc_content(sequence):
118    """Return the % G+C content in a passed sequence.
119
120    Arguments:
121        - sequence  - a Bio.Seq.Seq object.
122
123    calc_gc_content(sequence)
124
125    """
126    d = {}
127    for nt in ["A", "T", "G", "C"]:
128        d[nt] = sequence.count(nt) + sequence.count(nt.lower())
129    gc = d.get("G", 0) + d.get("C", 0)
130
131    if gc == 0:
132        return 0
133    # print(gc*100.0/(d['A'] +d['T'] + gc))
134    return gc * 1.0 / (d["A"] + d["T"] + gc)
135
136
137def calc_at_content(sequence):
138    """Return the % A+T content in a passed sequence.
139
140    Arguments:
141        - sequence  - a Bio.Seq.Seq object.
142
143    calc_at_content(sequence)
144
145    """
146    d = {}
147    for nt in ["A", "T", "G", "C"]:
148        d[nt] = sequence.count(nt) + sequence.count(nt.lower())
149    at = d.get("A", 0) + d.get("T", 0)
150
151    if at == 0:
152        return 0
153    return at * 1.0 / (d["G"] + d["G"] + at)
154
155
156def calc_gc_skew(sequence):
157    """Return the (G-C)/(G+C) GC skew in a passed sequence.
158
159    Arguments:
160        - sequence   - a Bio.Seq.Seq object.
161
162    calc_gc_skew(sequence)
163
164    """
165    g = sequence.count("G") + sequence.count("g")
166    c = sequence.count("C") + sequence.count("c")
167    if g + c == 0:
168        return 0.0  # TODO - return NaN or None here?
169    else:
170        return (g - c) / float(g + c)
171
172
173def calc_at_skew(sequence):
174    """Return the (A-T)/(A+T) AT skew in a passed sequence.
175
176    Arguments:
177        - sequence   - a Bio.Seq.Seq object.
178
179    calc_at_skew(sequence)
180
181    """
182    a = sequence.count("A") + sequence.count("a")
183    t = sequence.count("T") + sequence.count("t")
184    if a + t == 0:
185        return 0.0  # TODO - return NaN or None here?
186    else:
187        return (a - t) / float(a + t)
188
189
190def calc_dinucleotide_counts(sequence):
191    """Return the total count of di-nucleotides repeats (e.g. "AA", "CC").
192
193    This is purely for the sake of generating some non-random sequence
194    based score for plotting, with no expected biological meaning.
195
196    NOTE - Only considers same case pairs.
197    NOTE - "AA" scores 1, "AAA" scores 2, "AAAA" scores 3 etc.
198    """
199    total = 0
200    for letter in "ACTGUactgu":
201        total += sequence.count(letter + letter)
202    return total
203
204
205###############################################################################
206# End of utility functions for graph plotting                                 #
207###############################################################################
208
209
210# Tests
211# class TrackTest(unittest.TestCase):
212#    # TODO Bring code from Track.py, unsure about what test does
213#    pass
214
215
216class ColorsTest(unittest.TestCase):
217    """Color tests."""
218
219    def test_color_conversions(self):
220        """Test color translations."""
221        translator = ColorTranslator()
222
223        # Does the translate method correctly convert the passed argument?
224        self.assertEqual(
225            translator.float1_color((0.5, 0.5, 0.5)),
226            translator.translate((0.5, 0.5, 0.5)),
227            "Did not correctly translate colour from floating point RGB tuple",
228        )
229        self.assertEqual(
230            translator.int255_color((1, 75, 240)),
231            translator.translate((1, 75, 240)),
232            "Did not correctly translate colour from integer RGB tuple",
233        )
234        self.assertEqual(
235            translator.artemis_color(7),
236            translator.translate(7),
237            "Did not correctly translate colour from Artemis colour scheme",
238        )
239        self.assertEqual(
240            translator.scheme_color(2),
241            translator.translate(2),
242            "Did not correctly translate colour from user-defined colour scheme",
243        )
244
245
246class GraphTest(unittest.TestCase):
247    """Graph tests."""
248
249    def test_limits(self):
250        """Check line graphs."""
251        # TODO - Fix GD so that the same min/max is used for all three lines?
252        points = 1000
253        scale = math.pi * 2.0 / points
254        data1 = [math.sin(x * scale) for x in range(points)]
255        data2 = [math.cos(x * scale) for x in range(points)]
256        data3 = [2 * math.sin(2 * x * scale) for x in range(points)]
257
258        gdd = Diagram(
259            "Test Diagram",
260            circular=False,
261            y=0.01,
262            yt=0.01,
263            yb=0.01,
264            x=0.01,
265            xl=0.01,
266            xr=0.01,
267        )
268        gdt_data = gdd.new_track(1, greytrack=False)
269        gds_data = gdt_data.new_set("graph")
270        for data_values, _name, color in zip(
271            [data1, data2, data3], ["sin", "cos", "2sin2"], ["red", "green", "blue"]
272        ):
273            data = list(zip(range(points), data_values))
274            gds_data.new_graph(
275                data, "", style="line", color=color, altcolor=color, center=0
276            )
277
278        gdd.draw(
279            format="linear",
280            tracklines=False,
281            pagesize=(15 * cm, 15 * cm),
282            fragments=1,
283            start=0,
284            end=points,
285        )
286        gdd.write(os.path.join("Graphics", "line_graph.pdf"), "pdf")
287        # Circular diagram
288        gdd.draw(
289            tracklines=False,
290            pagesize=(15 * cm, 15 * cm),
291            circular=True,  # Data designed to be periodic
292            start=0,
293            end=points,
294            circle_core=0.5,
295        )
296        gdd.write(os.path.join("Graphics", "line_graph_c.pdf"), "pdf")
297
298    def test_slicing(self):
299        """Check GraphData slicing."""
300        gd = GraphData()
301        gd.set_data([(1, 10), (5, 15), (20, 40)])
302        gd.add_point((10, 20))
303
304        self.assertEqual(
305            gd[4:16],
306            [(5, 15), (10, 20),],  # noqa 231
307            "Unable to insert and retrieve points correctly",
308        )
309
310
311class LabelTest(unittest.TestCase):
312    """Check label positioning."""
313
314    def setUp(self):
315        """Start a diagram."""
316        self.gdd = Diagram(
317            "Test Diagram",
318            circular=False,
319            y=0.01,
320            yt=0.01,
321            yb=0.01,
322            x=0.01,
323            xl=0.01,
324            xr=0.01,
325        )
326
327    def finish(self, name, circular=True):
328        """Draw it..."""
329        tracks = len(self.gdd.tracks)
330        # Work around the page orientation code being too clever
331        # and flipping the h & w round:
332        if tracks <= 3:
333            orient = "landscape"
334        else:
335            orient = "portrait"
336        self.gdd.draw(
337            format="linear",
338            orientation=orient,
339            tracklines=False,
340            pagesize=(15 * cm, 5 * cm * tracks),
341            fragments=1,
342            start=0,
343            end=400,
344        )
345        self.gdd.write(os.path.join("Graphics", name + ".pdf"), "pdf")
346        global renderPM
347        if renderPM:
348            try:
349                # For the tutorial this is useful:
350                self.gdd.write(os.path.join("Graphics", name + ".png"), "png")
351            except renderPM.RenderPMError:
352                # Probably a font problem, e.g.
353                # RenderPMError: Can't setFont(Times-Roman) missing the T1 files?
354                # Originally <type 'exceptions.TypeError'>: makeT1Font() argument 2
355                # must be string, not None
356                renderPM = None
357            except OSError:
358                # Probably a library problem, e.g.
359                # OSError: encoder zip not available
360                renderPM = None
361        if circular:
362            # Circular diagram
363            self.gdd.draw(
364                tracklines=False,
365                pagesize=(15 * cm, 15 * cm),
366                fragments=1,
367                circle_core=0.5,
368                start=0,
369                end=400,
370            )
371            self.gdd.write(os.path.join("Graphics", name + "_c.pdf"), "pdf")
372
373    def add_track_with_sigils(self, **kwargs):
374        """Add track with sigils."""
375        self.gdt_features = self.gdd.new_track(1, greytrack=False)
376        self.gds_features = self.gdt_features.new_set()
377        for i in range(18):
378            start = int((400 * i) / 18.0)
379            end = start + 17
380            if i % 3 == 0:
381                strand = None
382                name = "Strandless"
383                color = colors.orange
384            elif i % 3 == 1:
385                strand = +1
386                name = "Forward"
387                color = colors.red
388            else:
389                strand = -1
390                name = "Reverse"
391                color = colors.blue
392            feature = SeqFeature(FeatureLocation(start, end), strand=strand)
393            self.gds_features.add_feature(
394                feature, name=name, color=color, label=True, **kwargs
395            )
396
397    def test_label_default(self):
398        """Feature labels - default."""
399        self.add_track_with_sigils()
400        self.finish("labels_default")
401
402
403class SigilsTest(unittest.TestCase):
404    """Check the different feature sigils.
405
406    These figures are intended to be used in the Tutorial...
407    """
408
409    def setUp(self):
410        """Initialise diagram."""
411        self.gdd = Diagram(
412            "Test Diagram",
413            circular=False,
414            y=0.01,
415            yt=0.01,
416            yb=0.01,
417            x=0.01,
418            xl=0.01,
419            xr=0.01,
420        )
421
422    def add_track_with_sigils(self, track_caption="", **kwargs):
423        """Add a track of features."""
424        self.gdt_features = self.gdd.new_track(
425            1, greytrack=(track_caption != ""), name=track_caption, greytrack_labels=1
426        )
427        # We'll just use one feature set for these features,
428        self.gds_features = self.gdt_features.new_set()
429        # Add three features to show the strand options,
430        feature = SeqFeature(FeatureLocation(25, 125), strand=+1)
431        self.gds_features.add_feature(feature, name="Forward", **kwargs)
432        feature = SeqFeature(FeatureLocation(150, 250), strand=None)
433        self.gds_features.add_feature(feature, name="Strandless", **kwargs)
434        feature = SeqFeature(FeatureLocation(275, 375), strand=-1)
435        self.gds_features.add_feature(feature, name="Reverse", **kwargs)
436
437    def finish(self, name, circular=True):
438        """Draw it..."""
439        tracks = len(self.gdd.tracks)
440        # Work around the page orientation code being too clever
441        # and flipping the h & w round:
442        if tracks <= 3:
443            orient = "landscape"
444        else:
445            orient = "portrait"
446        self.gdd.draw(
447            format="linear",
448            orientation=orient,
449            tracklines=False,
450            pagesize=(15 * cm, 5 * cm * tracks),
451            fragments=1,
452            start=0,
453            end=400,
454        )
455        self.gdd.write(os.path.join("Graphics", name + ".pdf"), "pdf")
456        global renderPM
457        if renderPM:
458            # For the tutorial this might be useful:
459            try:
460                self.gdd.write(os.path.join("Graphics", name + ".png"), "png")
461            except renderPM.RenderPMError:
462                # Probably a font problem
463                renderPM = None
464        if circular:
465            # Circular diagram
466            self.gdd.draw(
467                tracklines=False,
468                pagesize=(15 * cm, 15 * cm),
469                fragments=1,
470                circle_core=0.5,
471                start=0,
472                end=400,
473            )
474            self.gdd.write(os.path.join("Graphics", name + "_c.pdf"), "pdf")
475
476    def test_all_sigils(self):
477        """All sigils."""
478        for glyph in ["BOX", "OCTO", "JAGGY", "ARROW", "BIGARROW"]:
479            self.add_track_with_sigils(
480                track_caption='  sigil="%s"' % glyph, sigil=glyph
481            )
482        self.finish("GD_sigils")
483
484    def test_labels(self):
485        """Feature labels."""
486        self.add_track_with_sigils(label=True)
487        self.add_track_with_sigils(
488            label=True,
489            color="green",
490            # label_position left as default!
491            label_size=25,
492            label_angle=0,
493        )
494        self.add_track_with_sigils(
495            label=True,
496            color="purple",
497            label_position="end",
498            label_size=4,
499            label_angle=90,
500        )
501        self.add_track_with_sigils(
502            label=True,
503            color="blue",
504            label_position="middle",
505            label_size=6,
506            label_angle=-90,
507        )
508        self.add_track_with_sigils(
509            label=True,
510            color="cyan",
511            label_position="start",
512            label_size=6,
513            label_angle=-90,
514        )
515        self.assertEqual(len(self.gdd.tracks), 5)
516        self.finish("GD_sigil_labels", circular=True)
517
518    def test_arrow_shafts(self):
519        """Feature arrow sigils, varying shafts."""
520        self.add_track_with_sigils(sigil="ARROW")
521        self.add_track_with_sigils(sigil="ARROW", color="brown", arrowshaft_height=1.0)
522        self.add_track_with_sigils(sigil="ARROW", color="teal", arrowshaft_height=0.2)
523        self.add_track_with_sigils(
524            sigil="ARROW", color="darkgreen", arrowshaft_height=0.1
525        )
526        self.assertEqual(len(self.gdd.tracks), 4)
527        self.finish("GD_sigil_arrow_shafts")
528
529    def test_big_arrow_shafts(self):
530        """Feature big-arrow sigils, varying shafts."""
531        self.add_track_with_sigils(sigil="BIGARROW")
532        self.add_track_with_sigils(
533            sigil="BIGARROW", color="orange", arrowshaft_height=1.0
534        )
535        self.add_track_with_sigils(
536            sigil="BIGARROW", color="teal", arrowshaft_height=0.2
537        )
538        self.add_track_with_sigils(
539            sigil="BIGARROW", color="green", arrowshaft_height=0.1
540        )
541        self.assertEqual(len(self.gdd.tracks), 4)
542        self.finish("GD_sigil_bigarrow_shafts")
543
544    def test_arrow_heads(self):
545        """Feature arrow sigils, varying heads."""
546        self.add_track_with_sigils(sigil="ARROW")
547        self.add_track_with_sigils(sigil="ARROW", color="blue", arrowhead_length=0.25)
548        self.add_track_with_sigils(sigil="ARROW", color="orange", arrowhead_length=1)
549        self.add_track_with_sigils(
550            sigil="ARROW", color="red", arrowhead_length=10000
551        )  # Triangles
552        self.assertEqual(len(self.gdd.tracks), 4)
553        self.finish("GD_sigil_arrows")
554
555    def short_sigils(self, glyph):
556        """Draw sigils on top of grey box backgrounds."""
557        # The blue boxes are only relevant for the BIGARROW
558        # Add a track of features, bigger height to emphasise any sigil errors
559        self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
560        # We'll just use one feature set for these features,
561        self.gds_features = self.gdt_features.new_set()
562        # For the ARROW and BIGARROW sigils:
563        # - Green arrows just have small heads (meaning if there is a mitre
564        #   it will escape the bounding box).
565        # - Red arrows should be small triangles (so short no shaft shown)
566
567        # Forward strand:
568        feature = SeqFeature(FeatureLocation(15, 30), strand=-1)
569        self.gds_features.add_feature(feature, color="blue")
570        feature = SeqFeature(FeatureLocation(15, 30), strand=+1)
571        self.gds_features.add_feature(feature, color="grey")
572        self.gds_features.add_feature(
573            feature, name="Forward", sigil=glyph, arrowhead_length=0.05
574        )
575
576        feature = SeqFeature(FeatureLocation(55, 60), strand=-1)
577        self.gds_features.add_feature(feature, color="blue")
578        feature = SeqFeature(FeatureLocation(55, 60), strand=+1)
579        self.gds_features.add_feature(feature, color="grey")
580        self.gds_features.add_feature(
581            feature, name="Forward", sigil=glyph, arrowhead_length=1000, color="red"
582        )
583
584        feature = SeqFeature(FeatureLocation(75, 125), strand=-1)
585        self.gds_features.add_feature(feature, color="blue")
586        feature = SeqFeature(FeatureLocation(75, 125), strand=+1)
587        self.gds_features.add_feature(feature, color="grey")
588        self.gds_features.add_feature(
589            feature, name="Forward", sigil=glyph, arrowhead_length=0.05
590        )
591
592        # Strandless:
593        feature = SeqFeature(FeatureLocation(140, 155), strand=None)
594        self.gds_features.add_feature(feature, color="grey")
595        self.gds_features.add_feature(
596            feature, name="Strandless", sigil=glyph, arrowhead_length=0.05
597        )
598
599        feature = SeqFeature(FeatureLocation(180, 185), strand=None)
600        self.gds_features.add_feature(feature, color="grey")
601        self.gds_features.add_feature(
602            feature, name="Strandless", sigil=glyph, arrowhead_length=1000, color="red"
603        )
604
605        feature = SeqFeature(FeatureLocation(200, 250), strand=None)
606        self.gds_features.add_feature(feature, color="grey")
607        self.gds_features.add_feature(
608            feature, name="Strandless", sigil=glyph, arrowhead_length=0.05
609        )
610
611        # Reverse strand:
612        feature = SeqFeature(FeatureLocation(265, 280), strand=+1)
613        self.gds_features.add_feature(feature, color="blue")
614        feature = SeqFeature(FeatureLocation(265, 280), strand=-1)
615        self.gds_features.add_feature(feature, color="grey")
616        self.gds_features.add_feature(
617            feature, name="Reverse", sigil=glyph, arrowhead_length=0.05
618        )
619
620        feature = SeqFeature(FeatureLocation(305, 310), strand=+1)
621        self.gds_features.add_feature(feature, color="blue")
622        feature = SeqFeature(FeatureLocation(305, 310), strand=-1)
623        self.gds_features.add_feature(feature, color="grey")
624        self.gds_features.add_feature(
625            feature, name="Reverse", sigil=glyph, arrowhead_length=1000, color="red"
626        )
627
628        feature = SeqFeature(FeatureLocation(325, 375), strand=+1)
629        self.gds_features.add_feature(feature, color="blue")
630        feature = SeqFeature(FeatureLocation(325, 375), strand=-1)
631        self.gds_features.add_feature(feature, color="grey")
632        self.gds_features.add_feature(
633            feature, name="Reverse", sigil=glyph, arrowhead_length=0.05
634        )
635
636        self.finish("GD_sigil_short_%s" % glyph)
637
638    def test_short_arrow(self):
639        """Feature arrow sigil heads within bounding box."""
640        self.short_sigils("ARROW")
641
642    def test_short_bigarrow(self):
643        """Feature big-arrow sigil heads within bounding box."""
644        self.short_sigils("BIGARROW")
645
646    def test_short_jaggy(self):
647        """Feature arrow sigil heads within bounding box."""
648        self.short_sigils("JAGGY")
649
650    def test_short_octo(self):
651        """Feature big-arrow sigil heads within bounding box."""
652        self.short_sigils("OCTO")
653
654    def long_sigils(self, glyph):
655        """Check feature sigils within bounding box."""
656        # Add a track of features, bigger height to emphasise any sigil errors
657        self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
658        # We'll just use one feature set for these features if strand specific
659        self.gds_features = self.gdt_features.new_set()
660        if glyph in ["BIGARROW"]:
661            # These straddle the axis, so don't want to draw them on top of each other
662            feature = SeqFeature(FeatureLocation(25, 375), strand=None)
663            self.gds_features.add_feature(feature, color="lightblue")
664            feature = SeqFeature(FeatureLocation(25, 375), strand=+1)
665        else:
666            feature = SeqFeature(FeatureLocation(25, 375), strand=+1)
667            self.gds_features.add_feature(feature, color="lightblue")
668        self.gds_features.add_feature(
669            feature, name="Forward", sigil=glyph, color="blue", arrowhead_length=2.0
670        )
671
672        if glyph in ["BIGARROW"]:
673            # These straddle the axis, so don't want to draw them on top of each other
674            self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
675            self.gds_features = self.gdt_features.new_set()
676            feature = SeqFeature(FeatureLocation(25, 375), strand=None)
677            self.gds_features.add_feature(feature, color="pink")
678            feature = SeqFeature(FeatureLocation(25, 375), strand=-1)
679        else:
680            feature = SeqFeature(FeatureLocation(25, 375), strand=-1)
681            self.gds_features.add_feature(feature, color="pink")
682        self.gds_features.add_feature(
683            feature, name="Reverse", sigil=glyph, color="red", arrowhead_length=2.0
684        )
685        # Add another track of features, bigger height to emphasise any sigil errors
686        self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
687        # We'll just use one feature set for these features,
688        self.gds_features = self.gdt_features.new_set()
689        feature = SeqFeature(FeatureLocation(25, 375), strand=None)
690        self.gds_features.add_feature(feature, color="lightgreen")
691        self.gds_features.add_feature(
692            feature, name="Standless", sigil=glyph, color="green", arrowhead_length=2.0
693        )
694        self.finish("GD_sigil_long_%s" % glyph)
695
696    def test_long_arrow_heads(self):
697        """Feature ARROW sigil heads within bounding box."""
698        self.long_sigils("ARROW")
699
700    def test_long_bigarrow_heads(self):
701        """Feature BIGARROW sigil heads within bounding box."""
702        self.long_sigils("BIGARROW")
703
704    def test_long_octo_heads(self):
705        """Feature OCTO sigil heads within bounding box."""
706        self.long_sigils("OCTO")
707
708    def test_long_jaggy(self):
709        """Feature JAGGY sigil heads within bounding box."""
710        self.long_sigils("JAGGY")
711
712
713class DiagramTest(unittest.TestCase):
714    """Creating feature sets, graph sets, tracks etc individually for the diagram."""
715
716    def setUp(self):
717        """Test setup, just loads a GenBank file as a SeqRecord."""
718        with open(os.path.join("GenBank", "NC_005816.gb")) as handle:
719            self.record = SeqIO.read(handle, "genbank")
720
721        self.gdd = Diagram("Test Diagram")
722        # Add a track of features,
723        self.gdd.new_track(
724            1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5
725        )
726
727    def tearDown(self):
728        """Release the drawing objects."""
729        del self.gdd
730
731    def test_str(self):
732        """Test diagram's info as string."""
733        expected = (
734            "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>"
735            "\n1 tracks"
736            "\nTrack 1: "
737            "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>"
738            "\n0 sets"
739            "\n"
740        )
741        self.assertEqual(expected, str(self.gdd))
742
743    def test_add_track(self):
744        """Add track."""
745        track = Track(name="Annotated Features")
746        self.gdd.add_track(track, 2)
747        self.assertEqual(2, len(self.gdd.get_tracks()))
748
749    def test_add_track_to_occupied_level(self):
750        """Add track to occupied level."""
751        new_track = self.gdd.get_tracks()[0]
752        self.gdd.add_track(new_track, 1)
753        self.assertEqual(2, len(self.gdd.get_tracks()))
754
755    def test_add_track_error(self):
756        """Test adding unspecified track."""
757        self.assertRaises(ValueError, self.gdd.add_track, None, 1)
758
759    def test_del_tracks(self):
760        """Delete track."""
761        self.gdd.del_track(1)
762        self.assertEqual(0, len(self.gdd.get_tracks()))
763
764    def test_get_tracks(self):
765        """Get track."""
766        self.assertEqual(1, len(self.gdd.get_tracks()))
767
768    def test_move_track(self):
769        """Move a track."""
770        self.gdd.move_track(1, 2)
771        expected = (
772            "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>"
773            "\n1 tracks"
774            "\nTrack 2: "
775            "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>"
776            "\n0 sets"
777            "\n"
778        )
779        self.assertEqual(expected, str(self.gdd))
780
781    def test_renumber(self):
782        """Test renumbering tracks."""
783        self.gdd.renumber_tracks(0)
784        expected = (
785            "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>"
786            "\n1 tracks"
787            "\nTrack 0: "
788            "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>"
789            "\n0 sets"
790            "\n"
791        )
792        self.assertEqual(expected, str(self.gdd))
793
794    def test_write_arguments(self):
795        """Check how the write methods respond to output format arguments."""
796        gdd = Diagram("Test Diagram")
797        gdd.drawing = None  # Hack - need the ReportLab drawing object to be created.
798        filename = os.path.join("Graphics", "error.txt")
799        # We (now) allow valid formats in any case.
800        for output in ["XXX", "xxx", None, 123, 5.9]:
801            with self.assertRaises(ValueError):
802                gdd.write(filename, output)
803            with self.assertRaises(ValueError):
804                gdd.write_to_string(output)
805
806    def test_partial_diagram(self):
807        """Construct and draw SVG and PDF for just part of a SeqRecord."""
808        genbank_entry = self.record
809        start = 6500
810        end = 8750
811
812        gdd = Diagram(
813            "Test Diagram",
814            # For the circular diagram we don't want a closed cirle:
815            circular=False,
816        )
817        # Add a track of features,
818        gdt_features = gdd.new_track(
819            1,
820            greytrack=True,
821            name="CDS Features",
822            scale_largetick_interval=1000,
823            scale_smalltick_interval=100,
824            scale_format="SInt",
825            greytrack_labels=False,
826            height=0.5,
827        )
828        # We'll just use one feature set for these features,
829        gds_features = gdt_features.new_set()
830        for feature in genbank_entry.features:
831            if feature.type != "CDS":
832                # We're going to ignore these.
833                continue
834            if feature.location.end.position < start:
835                # Out of frame (too far left)
836                continue
837            if feature.location.start.position > end:
838                # Out of frame (too far right)
839                continue
840
841            # This URL should work in SVG output from recent versions
842            # of ReportLab.  You need ReportLab 2.4 or later
843            try:
844                url = (
845                    "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi"
846                    + "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]
847                )
848            except KeyError:
849                url = None
850
851            # Note that I am using strings for color names, instead
852            # of passing in color objects.  This should also work!
853            if len(gds_features) % 2 == 0:
854                color = "white"  # for testing the automatic black border!
855            else:
856                color = "red"
857            # Checking it can cope with the old UK spelling colour.
858            # Also show the labels perpendicular to the track.
859            gds_features.add_feature(
860                feature,
861                colour=color,
862                url=url,
863                sigil="ARROW",
864                label_position=None,
865                label_size=8,
866                label_angle=90,
867                label=True,
868            )
869
870        # And draw it...
871        gdd.draw(
872            format="linear",
873            orientation="landscape",
874            tracklines=False,
875            pagesize=(10 * cm, 6 * cm),
876            fragments=1,
877            start=start,
878            end=end,
879        )
880        output_filename = os.path.join("Graphics", "GD_region_linear.pdf")
881        gdd.write(output_filename, "PDF")
882
883        # Also check the write_to_string (bytes string) method matches,
884        with open(output_filename, "rb") as handle:
885            self.assertEqual(handle.read(), gdd.write_to_string("PDF"))
886
887        output_filename = os.path.join("Graphics", "GD_region_linear.svg")
888        gdd.write(output_filename, "SVG")
889
890        # Circular with a particular start/end is a bit odd, but by setting
891        # circular=False (above) a sweep of 90% is used (a wedge is left out)
892        gdd.draw(
893            format="circular",
894            tracklines=False,
895            pagesize=(10 * cm, 10 * cm),
896            start=start,
897            end=end,
898        )
899        output_filename = os.path.join("Graphics", "GD_region_circular.pdf")
900        gdd.write(output_filename, "PDF")
901        output_filename = os.path.join("Graphics", "GD_region_circular.svg")
902        gdd.write(output_filename, "SVG")
903
904    def test_diagram_via_methods_pdf(self):
905        """Construct and draw PDF using method approach."""
906        genbank_entry = self.record
907        gdd = Diagram("Test Diagram")
908
909        # Add a track of features,
910        gdt_features = gdd.new_track(
911            1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5
912        )
913        # We'll just use one feature set for the genes and misc_features,
914        gds_features = gdt_features.new_set()
915        for feature in genbank_entry.features:
916            if feature.type == "gene":
917                if len(gds_features) % 2 == 0:
918                    color = "blue"
919                else:
920                    color = "lightblue"
921                gds_features.add_feature(
922                    feature,
923                    color=color,
924                    # label_position="middle",
925                    # label_position="end",
926                    label_position="start",
927                    label_size=11,
928                    # label_angle=90,
929                    sigil="ARROW",
930                    label=True,
931                )
932
933        # I want to include some strandless features, so for an example
934        # will use EcoRI recognition sites etc.
935        for site, name, color in [
936            ("GAATTC", "EcoRI", "green"),
937            ("CCCGGG", "SmaI", "orange"),
938            ("AAGCTT", "HindIII", "red"),
939            ("GGATCC", "BamHI", "purple"),
940        ]:
941            index = 0
942            while True:
943                index = genbank_entry.seq.find(site, start=index)
944                if index == -1:
945                    break
946                feature = SeqFeature(FeatureLocation(index, index + 6), strand=None)
947
948                # This URL should work in SVG output from recent versions
949                # of ReportLab.  You need ReportLab 2.4 or later
950                try:
951                    url = (
952                        "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi"
953                        + "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]
954                    )
955                except KeyError:
956                    url = None
957
958                gds_features.add_feature(
959                    feature,
960                    color=color,
961                    url=url,
962                    # label_position="middle",
963                    label_size=10,
964                    label_color=color,
965                    # label_angle=90,
966                    name=name,
967                    label=True,
968                )
969                index += len(site)
970            del index
971
972        # Now add a graph track...
973        gdt_at_gc = gdd.new_track(
974            2, greytrack=True, name="AT and GC content", greytrack_labels=True
975        )
976        gds_at_gc = gdt_at_gc.new_set(type="graph")
977
978        step = len(genbank_entry) // 200
979        gds_at_gc.new_graph(
980            apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
981            "GC content",
982            style="line",
983            color=colors.lightgreen,
984            altcolor=colors.darkseagreen,
985        )
986        gds_at_gc.new_graph(
987            apply_to_window(genbank_entry.seq, step, calc_at_content, step),
988            "AT content",
989            style="line",
990            color=colors.orange,
991            altcolor=colors.red,
992        )
993
994        # Finally draw it in both formats,
995        gdd.draw(
996            format="linear",
997            orientation="landscape",
998            tracklines=0,
999            pagesize="A4",
1000            fragments=3,
1001        )
1002        output_filename = os.path.join("Graphics", "GD_by_meth_linear.pdf")
1003        gdd.write(output_filename, "PDF")
1004
1005        gdd.draw(
1006            format="circular",
1007            tracklines=False,
1008            circle_core=0.8,
1009            pagesize=(20 * cm, 20 * cm),
1010            circular=True,
1011        )
1012        output_filename = os.path.join("Graphics", "GD_by_meth_circular.pdf")
1013        gdd.write(output_filename, "PDF")
1014
1015    def test_diagram_via_object_pdf(self):
1016        """Construct and draw PDF using object approach."""
1017        genbank_entry = self.record
1018        gdd = Diagram("Test Diagram")
1019
1020        gdt1 = Track(
1021            "CDS features",
1022            greytrack=True,
1023            scale_largetick_interval=1e4,
1024            scale_smalltick_interval=1e3,
1025            greytrack_labels=10,
1026            greytrack_font_color="red",
1027            scale_format="SInt",
1028        )
1029        gdt2 = Track("gene features", greytrack=1, scale_largetick_interval=1e4)
1030
1031        # First add some feature sets:
1032        gdfsA = FeatureSet(name="CDS backgrounds")
1033        gdfsB = FeatureSet(name="gene background")
1034
1035        gdfs1 = FeatureSet(name="CDS features")
1036        gdfs2 = FeatureSet(name="gene features")
1037        gdfs3 = FeatureSet(name="misc_features")
1038        gdfs4 = FeatureSet(name="repeat regions")
1039
1040        prev_gene = None
1041        cds_count = 0
1042        for feature in genbank_entry.features:
1043            if feature.type == "CDS":
1044                cds_count += 1
1045                if prev_gene:
1046                    # Assuming it goes with this CDS!
1047                    if cds_count % 2 == 0:
1048                        dark, light = colors.peru, colors.tan
1049                    else:
1050                        dark, light = colors.burlywood, colors.bisque
1051                    # Background for CDS,
1052                    a = gdfsA.add_feature(
1053                        SeqFeature(
1054                            FeatureLocation(
1055                                feature.location.start, feature.location.end, strand=0
1056                            )
1057                        ),
1058                        color=dark,
1059                    )
1060                    # Background for gene,
1061                    b = gdfsB.add_feature(
1062                        SeqFeature(
1063                            FeatureLocation(
1064                                prev_gene.location.start,
1065                                prev_gene.location.end,
1066                                strand=0,
1067                            )
1068                        ),
1069                        color=dark,
1070                    )
1071                    # Cross link,
1072                    gdd.cross_track_links.append(CrossLink(a, b, light, dark))
1073                    prev_gene = None
1074            if feature.type == "gene":
1075                prev_gene = feature
1076
1077        # Some cross links on the same linear diagram fragment,
1078        f, c = fill_and_border(colors.red)
1079        a = gdfsA.add_feature(
1080            SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c
1081        )
1082        b = gdfsB.add_feature(
1083            SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c
1084        )
1085        gdd.cross_track_links.append(CrossLink(a, b, f, c))
1086
1087        f, c = fill_and_border(colors.blue)
1088        a = gdfsA.add_feature(
1089            SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c
1090        )
1091        b = gdfsB.add_feature(
1092            SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c
1093        )
1094        gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))
1095
1096        f, c = fill_and_border(colors.green)
1097        a = gdfsA.add_feature(
1098            SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c
1099        )
1100        b = gdfsB.add_feature(
1101            SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c
1102        )
1103        gdd.cross_track_links.append(CrossLink(a, b, f, c))
1104
1105        # Some cross links where both parts are saddling the linear diagram fragment boundary,
1106        f, c = fill_and_border(colors.red)
1107        a = gdfsA.add_feature(
1108            SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c
1109        )
1110        b = gdfsB.add_feature(
1111            SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c
1112        )
1113        gdd.cross_track_links.append(CrossLink(a, b, f, c))
1114        # Nestled within that (drawn on top),
1115        f, c = fill_and_border(colors.blue)
1116        a = gdfsA.add_feature(
1117            SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c
1118        )
1119        b = gdfsB.add_feature(
1120            SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c
1121        )
1122        gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))
1123
1124        # Some cross links where two features are on either side of the linear diagram fragment boundary,
1125        f, c = fill_and_border(colors.green)
1126        a = gdfsA.add_feature(
1127            SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c
1128        )
1129        b = gdfsB.add_feature(
1130            SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c
1131        )
1132        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
1133        f, c = fill_and_border(colors.gold)
1134        a = gdfsA.add_feature(
1135            SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c
1136        )
1137        b = gdfsB.add_feature(
1138            SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c
1139        )
1140        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
1141        f, c = fill_and_border(colors.red)
1142        a = gdfsA.add_feature(
1143            SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c
1144        )
1145        b = gdfsB.add_feature(
1146            SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c
1147        )
1148        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True))
1149        f, c = fill_and_border(colors.blue)
1150        a = gdfsA.add_feature(
1151            SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c
1152        )
1153        b = gdfsB.add_feature(
1154            SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c
1155        )
1156        gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True))
1157
1158        cds_count = 0
1159        for feature in genbank_entry.features:
1160            if feature.type == "CDS":
1161                cds_count += 1
1162                if cds_count % 2 == 0:
1163                    gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW")
1164                else:
1165                    gdfs1.add_feature(feature, color=colors.red, sigil="ARROW")
1166
1167            if feature.type == "gene":
1168                # Note we set the colour of ALL the genes later on as a test,
1169                gdfs2.add_feature(feature, sigil="ARROW")
1170
1171            if feature.type == "misc_feature":
1172                gdfs3.add_feature(feature, color=colors.orange)
1173
1174            if feature.type == "repeat_region":
1175                gdfs4.add_feature(feature, color=colors.purple)
1176
1177        # gdd.cross_track_links = gdd.cross_track_links[:1]
1178
1179        gdfs1.set_all_features("label", 1)
1180        gdfs2.set_all_features("label", 1)
1181        gdfs3.set_all_features("label", 1)
1182        gdfs4.set_all_features("label", 1)
1183
1184        gdfs3.set_all_features("hide", 0)
1185        gdfs4.set_all_features("hide", 0)
1186
1187        # gdfs1.set_all_features('color', colors.red)
1188        gdfs2.set_all_features("color", colors.blue)
1189
1190        gdt1.add_set(gdfsA)  # Before CDS so under them!
1191        gdt1.add_set(gdfs1)
1192
1193        gdt2.add_set(gdfsB)  # Before genes so under them!
1194        gdt2.add_set(gdfs2)
1195
1196        gdt3 = Track(
1197            "misc features and repeats", greytrack=1, scale_largetick_interval=1e4
1198        )
1199        gdt3.add_set(gdfs3)
1200        gdt3.add_set(gdfs4)
1201
1202        # Now add some graph sets:
1203
1204        # Use a fairly large step so we can easily tell the difference
1205        # between the bar and line graphs.
1206        step = len(genbank_entry) // 200
1207        gdgs1 = GraphSet("GC skew")
1208
1209        graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step)
1210        gdgs1.new_graph(
1211            graphdata1,
1212            "GC Skew",
1213            style="bar",
1214            color=colors.violet,
1215            altcolor=colors.purple,
1216        )
1217
1218        gdt4 = Track(
1219            "GC Skew (bar)", height=1.94, greytrack=1, scale_largetick_interval=1e4
1220        )
1221        gdt4.add_set(gdgs1)
1222
1223        gdgs2 = GraphSet("GC and AT Content")
1224        gdgs2.new_graph(
1225            apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
1226            "GC content",
1227            style="line",
1228            color=colors.lightgreen,
1229            altcolor=colors.darkseagreen,
1230        )
1231
1232        gdgs2.new_graph(
1233            apply_to_window(genbank_entry.seq, step, calc_at_content, step),
1234            "AT content",
1235            style="line",
1236            color=colors.orange,
1237            altcolor=colors.red,
1238        )
1239
1240        gdt5 = Track(
1241            "GC Content(green line), AT Content(red line)",
1242            height=1.94,
1243            greytrack=1,
1244            scale_largetick_interval=1e4,
1245        )
1246        gdt5.add_set(gdgs2)
1247
1248        gdgs3 = GraphSet("Di-nucleotide count")
1249        step = len(genbank_entry) // 400  # smaller step
1250        gdgs3.new_graph(
1251            apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step),
1252            "Di-nucleotide count",
1253            style="heat",
1254            color=colors.red,
1255            altcolor=colors.orange,
1256        )
1257        gdt6 = Track("Di-nucleotide count", height=0.5, greytrack=False, scale=False)
1258        gdt6.add_set(gdgs3)
1259
1260        # Add the tracks (from both features and graphs)
1261        # Leave some white space in the middle/bottom
1262        gdd.add_track(gdt4, 3)  # GC skew
1263        gdd.add_track(gdt5, 4)  # GC and AT content
1264        gdd.add_track(gdt1, 5)  # CDS features
1265        gdd.add_track(gdt2, 6)  # Gene features
1266        gdd.add_track(gdt3, 7)  # Misc features and repeat feature
1267        gdd.add_track(gdt6, 8)  # Feature depth
1268
1269        # Finally draw it in both formats, and full view and partial
1270        gdd.draw(
1271            format="circular", orientation="landscape", tracklines=0, pagesize="A0"
1272        )
1273        output_filename = os.path.join("Graphics", "GD_by_obj_circular.pdf")
1274        gdd.write(output_filename, "PDF")
1275
1276        gdd.circular = False
1277        gdd.draw(
1278            format="circular",
1279            orientation="landscape",
1280            tracklines=0,
1281            pagesize="A0",
1282            start=3000,
1283            end=6300,
1284        )
1285        output_filename = os.path.join("Graphics", "GD_by_obj_frag_circular.pdf")
1286        gdd.write(output_filename, "PDF")
1287
1288        gdd.draw(
1289            format="linear",
1290            orientation="landscape",
1291            tracklines=0,
1292            pagesize="A0",
1293            fragments=3,
1294        )
1295        output_filename = os.path.join("Graphics", "GD_by_obj_linear.pdf")
1296        gdd.write(output_filename, "PDF")
1297
1298        gdd.set_all_tracks("greytrack_labels", 2)
1299        gdd.draw(
1300            format="linear",
1301            orientation="landscape",
1302            tracklines=0,
1303            pagesize=(30 * cm, 10 * cm),
1304            fragments=1,
1305            start=3000,
1306            end=6300,
1307        )
1308        output_filename = os.path.join("Graphics", "GD_by_obj_frag_linear.pdf")
1309        gdd.write(output_filename, "PDF")
1310
1311
1312if __name__ == "__main__":
1313    runner = unittest.TextTestRunner(verbosity=2)
1314    unittest.main(testRunner=runner)
1315