1# This code is part of the Biopython distribution and governed by its 2# license. Please see the LICENSE file that should have been included 3# as part of this package. 4"""Tests for GenomeDiagram general functionality.""" 5 6 7import os 8import unittest 9import math 10 11 12# Do we have ReportLab? Raise error if not present. 13from Bio import MissingPythonDependencyError 14 15try: 16 from reportlab.lib import colors 17 from reportlab.pdfbase import pdfmetrics 18 from reportlab.pdfbase.ttfonts import TTFont 19 from reportlab.lib.units import cm 20except ImportError: 21 raise MissingPythonDependencyError( 22 "Install reportlab if you want to use Bio.Graphics." 23 ) from None 24 25try: 26 # The preferred PIL import has changed over time... 27 try: 28 from PIL import Image 29 except ImportError: 30 import Image 31 from reportlab.graphics import renderPM 32except ImportError: 33 # This is an optional part of ReportLab, so may not be installed. 34 # We'll raise a missing dependency error if rendering to a 35 # bitmap format is attempted. 36 renderPM = None 37 38from Bio import SeqIO 39from Bio.SeqFeature import SeqFeature, FeatureLocation 40 41from Bio.Graphics.GenomeDiagram import FeatureSet, GraphSet, Track, Diagram 42from Bio.Graphics.GenomeDiagram import CrossLink 43from Bio.Graphics.GenomeDiagram._Graph import GraphData 44from Bio.Graphics.GenomeDiagram._Colors import ColorTranslator 45 46from reportlab import rl_config 47 48rl_config.invariant = True 49 50 51def fill_and_border(base_color, alpha=0.5): 52 """Return fill and border colors given a base color.""" 53 try: 54 c = base_color.clone() 55 c.alpha = alpha 56 return c, base_color 57 except AttributeError: 58 # Old ReportLab, no transparency and/or no clone 59 return base_color, base_color 60 61 62############################################################################### 63# Utility functions for graph plotting, originally in GenomeDiagram.Utilities # 64# See Bug 2705 for discussion on where to put these functions in Biopython... # 65############################################################################### 66 67 68def apply_to_window(sequence, window_size, function, step=None): 69 """Apply function to windows of the given sequence. 70 71 Returns a list of (position, value) tuples for fragments of the passed 72 sequence of length window_size (stepped by step), calculated by the passed 73 function. Returned positions are the midpoint of each window. 74 75 - sequence - Bio.Seq.Seq object. 76 - window_size - an integer describing the length of sequence to consider. 77 - step - an integer describing the step to take between windows 78 (default = window_size//2). 79 - function - Method or function that accepts a Bio.Seq.Seq object 80 as its sole argument and returns a single value. 81 """ 82 seqlen = len(sequence) # Total length of sequence to be used 83 if step is None: # No step specified, so use half window-width or 1 if larger 84 step = max(window_size // 2, 1) 85 else: # Use specified step, or 1 if greater 86 step = max(step, 1) 87 88 results = [] # Holds (position, value) results 89 90 # Perform the passed function on as many windows as possible, short of 91 # overrunning the sequence 92 pos = 0 93 while pos < seqlen - window_size + 1: 94 # Obtain sequence fragment 95 start, middle, end = pos, (pos + window_size + pos) // 2, pos + window_size 96 fragment = sequence[start:end] 97 # Apply function to the sequence fragment 98 value = function(fragment) 99 results.append((middle, value)) # Add results to list 100 # Advance to next fragment 101 pos += step 102 103 # Use the last available window on the sequence, even if it means 104 # re-covering old ground 105 if pos != seqlen - window_size: 106 # Obtain sequence fragment 107 pos = seqlen - window_size 108 start, middle, end = pos, (pos + window_size + pos) // 2, pos + window_size 109 fragment = sequence[start:end] 110 # Apply function to sequence fragment 111 value = function(fragment) 112 results.append((middle, value)) # Add results to list 113 114 return results # Return the list of (position, value) results 115 116 117def calc_gc_content(sequence): 118 """Return the % G+C content in a passed sequence. 119 120 Arguments: 121 - sequence - a Bio.Seq.Seq object. 122 123 calc_gc_content(sequence) 124 125 """ 126 d = {} 127 for nt in ["A", "T", "G", "C"]: 128 d[nt] = sequence.count(nt) + sequence.count(nt.lower()) 129 gc = d.get("G", 0) + d.get("C", 0) 130 131 if gc == 0: 132 return 0 133 # print(gc*100.0/(d['A'] +d['T'] + gc)) 134 return gc * 1.0 / (d["A"] + d["T"] + gc) 135 136 137def calc_at_content(sequence): 138 """Return the % A+T content in a passed sequence. 139 140 Arguments: 141 - sequence - a Bio.Seq.Seq object. 142 143 calc_at_content(sequence) 144 145 """ 146 d = {} 147 for nt in ["A", "T", "G", "C"]: 148 d[nt] = sequence.count(nt) + sequence.count(nt.lower()) 149 at = d.get("A", 0) + d.get("T", 0) 150 151 if at == 0: 152 return 0 153 return at * 1.0 / (d["G"] + d["G"] + at) 154 155 156def calc_gc_skew(sequence): 157 """Return the (G-C)/(G+C) GC skew in a passed sequence. 158 159 Arguments: 160 - sequence - a Bio.Seq.Seq object. 161 162 calc_gc_skew(sequence) 163 164 """ 165 g = sequence.count("G") + sequence.count("g") 166 c = sequence.count("C") + sequence.count("c") 167 if g + c == 0: 168 return 0.0 # TODO - return NaN or None here? 169 else: 170 return (g - c) / float(g + c) 171 172 173def calc_at_skew(sequence): 174 """Return the (A-T)/(A+T) AT skew in a passed sequence. 175 176 Arguments: 177 - sequence - a Bio.Seq.Seq object. 178 179 calc_at_skew(sequence) 180 181 """ 182 a = sequence.count("A") + sequence.count("a") 183 t = sequence.count("T") + sequence.count("t") 184 if a + t == 0: 185 return 0.0 # TODO - return NaN or None here? 186 else: 187 return (a - t) / float(a + t) 188 189 190def calc_dinucleotide_counts(sequence): 191 """Return the total count of di-nucleotides repeats (e.g. "AA", "CC"). 192 193 This is purely for the sake of generating some non-random sequence 194 based score for plotting, with no expected biological meaning. 195 196 NOTE - Only considers same case pairs. 197 NOTE - "AA" scores 1, "AAA" scores 2, "AAAA" scores 3 etc. 198 """ 199 total = 0 200 for letter in "ACTGUactgu": 201 total += sequence.count(letter + letter) 202 return total 203 204 205############################################################################### 206# End of utility functions for graph plotting # 207############################################################################### 208 209 210# Tests 211# class TrackTest(unittest.TestCase): 212# # TODO Bring code from Track.py, unsure about what test does 213# pass 214 215 216class ColorsTest(unittest.TestCase): 217 """Color tests.""" 218 219 def test_color_conversions(self): 220 """Test color translations.""" 221 translator = ColorTranslator() 222 223 # Does the translate method correctly convert the passed argument? 224 self.assertEqual( 225 translator.float1_color((0.5, 0.5, 0.5)), 226 translator.translate((0.5, 0.5, 0.5)), 227 "Did not correctly translate colour from floating point RGB tuple", 228 ) 229 self.assertEqual( 230 translator.int255_color((1, 75, 240)), 231 translator.translate((1, 75, 240)), 232 "Did not correctly translate colour from integer RGB tuple", 233 ) 234 self.assertEqual( 235 translator.artemis_color(7), 236 translator.translate(7), 237 "Did not correctly translate colour from Artemis colour scheme", 238 ) 239 self.assertEqual( 240 translator.scheme_color(2), 241 translator.translate(2), 242 "Did not correctly translate colour from user-defined colour scheme", 243 ) 244 245 246class GraphTest(unittest.TestCase): 247 """Graph tests.""" 248 249 def test_limits(self): 250 """Check line graphs.""" 251 # TODO - Fix GD so that the same min/max is used for all three lines? 252 points = 1000 253 scale = math.pi * 2.0 / points 254 data1 = [math.sin(x * scale) for x in range(points)] 255 data2 = [math.cos(x * scale) for x in range(points)] 256 data3 = [2 * math.sin(2 * x * scale) for x in range(points)] 257 258 gdd = Diagram( 259 "Test Diagram", 260 circular=False, 261 y=0.01, 262 yt=0.01, 263 yb=0.01, 264 x=0.01, 265 xl=0.01, 266 xr=0.01, 267 ) 268 gdt_data = gdd.new_track(1, greytrack=False) 269 gds_data = gdt_data.new_set("graph") 270 for data_values, _name, color in zip( 271 [data1, data2, data3], ["sin", "cos", "2sin2"], ["red", "green", "blue"] 272 ): 273 data = list(zip(range(points), data_values)) 274 gds_data.new_graph( 275 data, "", style="line", color=color, altcolor=color, center=0 276 ) 277 278 gdd.draw( 279 format="linear", 280 tracklines=False, 281 pagesize=(15 * cm, 15 * cm), 282 fragments=1, 283 start=0, 284 end=points, 285 ) 286 gdd.write(os.path.join("Graphics", "line_graph.pdf"), "pdf") 287 # Circular diagram 288 gdd.draw( 289 tracklines=False, 290 pagesize=(15 * cm, 15 * cm), 291 circular=True, # Data designed to be periodic 292 start=0, 293 end=points, 294 circle_core=0.5, 295 ) 296 gdd.write(os.path.join("Graphics", "line_graph_c.pdf"), "pdf") 297 298 def test_slicing(self): 299 """Check GraphData slicing.""" 300 gd = GraphData() 301 gd.set_data([(1, 10), (5, 15), (20, 40)]) 302 gd.add_point((10, 20)) 303 304 self.assertEqual( 305 gd[4:16], 306 [(5, 15), (10, 20),], # noqa 231 307 "Unable to insert and retrieve points correctly", 308 ) 309 310 311class LabelTest(unittest.TestCase): 312 """Check label positioning.""" 313 314 def setUp(self): 315 """Start a diagram.""" 316 self.gdd = Diagram( 317 "Test Diagram", 318 circular=False, 319 y=0.01, 320 yt=0.01, 321 yb=0.01, 322 x=0.01, 323 xl=0.01, 324 xr=0.01, 325 ) 326 327 def finish(self, name, circular=True): 328 """Draw it...""" 329 tracks = len(self.gdd.tracks) 330 # Work around the page orientation code being too clever 331 # and flipping the h & w round: 332 if tracks <= 3: 333 orient = "landscape" 334 else: 335 orient = "portrait" 336 self.gdd.draw( 337 format="linear", 338 orientation=orient, 339 tracklines=False, 340 pagesize=(15 * cm, 5 * cm * tracks), 341 fragments=1, 342 start=0, 343 end=400, 344 ) 345 self.gdd.write(os.path.join("Graphics", name + ".pdf"), "pdf") 346 global renderPM 347 if renderPM: 348 try: 349 # For the tutorial this is useful: 350 self.gdd.write(os.path.join("Graphics", name + ".png"), "png") 351 except renderPM.RenderPMError: 352 # Probably a font problem, e.g. 353 # RenderPMError: Can't setFont(Times-Roman) missing the T1 files? 354 # Originally <type 'exceptions.TypeError'>: makeT1Font() argument 2 355 # must be string, not None 356 renderPM = None 357 except OSError: 358 # Probably a library problem, e.g. 359 # OSError: encoder zip not available 360 renderPM = None 361 if circular: 362 # Circular diagram 363 self.gdd.draw( 364 tracklines=False, 365 pagesize=(15 * cm, 15 * cm), 366 fragments=1, 367 circle_core=0.5, 368 start=0, 369 end=400, 370 ) 371 self.gdd.write(os.path.join("Graphics", name + "_c.pdf"), "pdf") 372 373 def add_track_with_sigils(self, **kwargs): 374 """Add track with sigils.""" 375 self.gdt_features = self.gdd.new_track(1, greytrack=False) 376 self.gds_features = self.gdt_features.new_set() 377 for i in range(18): 378 start = int((400 * i) / 18.0) 379 end = start + 17 380 if i % 3 == 0: 381 strand = None 382 name = "Strandless" 383 color = colors.orange 384 elif i % 3 == 1: 385 strand = +1 386 name = "Forward" 387 color = colors.red 388 else: 389 strand = -1 390 name = "Reverse" 391 color = colors.blue 392 feature = SeqFeature(FeatureLocation(start, end), strand=strand) 393 self.gds_features.add_feature( 394 feature, name=name, color=color, label=True, **kwargs 395 ) 396 397 def test_label_default(self): 398 """Feature labels - default.""" 399 self.add_track_with_sigils() 400 self.finish("labels_default") 401 402 403class SigilsTest(unittest.TestCase): 404 """Check the different feature sigils. 405 406 These figures are intended to be used in the Tutorial... 407 """ 408 409 def setUp(self): 410 """Initialise diagram.""" 411 self.gdd = Diagram( 412 "Test Diagram", 413 circular=False, 414 y=0.01, 415 yt=0.01, 416 yb=0.01, 417 x=0.01, 418 xl=0.01, 419 xr=0.01, 420 ) 421 422 def add_track_with_sigils(self, track_caption="", **kwargs): 423 """Add a track of features.""" 424 self.gdt_features = self.gdd.new_track( 425 1, greytrack=(track_caption != ""), name=track_caption, greytrack_labels=1 426 ) 427 # We'll just use one feature set for these features, 428 self.gds_features = self.gdt_features.new_set() 429 # Add three features to show the strand options, 430 feature = SeqFeature(FeatureLocation(25, 125), strand=+1) 431 self.gds_features.add_feature(feature, name="Forward", **kwargs) 432 feature = SeqFeature(FeatureLocation(150, 250), strand=None) 433 self.gds_features.add_feature(feature, name="Strandless", **kwargs) 434 feature = SeqFeature(FeatureLocation(275, 375), strand=-1) 435 self.gds_features.add_feature(feature, name="Reverse", **kwargs) 436 437 def finish(self, name, circular=True): 438 """Draw it...""" 439 tracks = len(self.gdd.tracks) 440 # Work around the page orientation code being too clever 441 # and flipping the h & w round: 442 if tracks <= 3: 443 orient = "landscape" 444 else: 445 orient = "portrait" 446 self.gdd.draw( 447 format="linear", 448 orientation=orient, 449 tracklines=False, 450 pagesize=(15 * cm, 5 * cm * tracks), 451 fragments=1, 452 start=0, 453 end=400, 454 ) 455 self.gdd.write(os.path.join("Graphics", name + ".pdf"), "pdf") 456 global renderPM 457 if renderPM: 458 # For the tutorial this might be useful: 459 try: 460 self.gdd.write(os.path.join("Graphics", name + ".png"), "png") 461 except renderPM.RenderPMError: 462 # Probably a font problem 463 renderPM = None 464 if circular: 465 # Circular diagram 466 self.gdd.draw( 467 tracklines=False, 468 pagesize=(15 * cm, 15 * cm), 469 fragments=1, 470 circle_core=0.5, 471 start=0, 472 end=400, 473 ) 474 self.gdd.write(os.path.join("Graphics", name + "_c.pdf"), "pdf") 475 476 def test_all_sigils(self): 477 """All sigils.""" 478 for glyph in ["BOX", "OCTO", "JAGGY", "ARROW", "BIGARROW"]: 479 self.add_track_with_sigils( 480 track_caption=' sigil="%s"' % glyph, sigil=glyph 481 ) 482 self.finish("GD_sigils") 483 484 def test_labels(self): 485 """Feature labels.""" 486 self.add_track_with_sigils(label=True) 487 self.add_track_with_sigils( 488 label=True, 489 color="green", 490 # label_position left as default! 491 label_size=25, 492 label_angle=0, 493 ) 494 self.add_track_with_sigils( 495 label=True, 496 color="purple", 497 label_position="end", 498 label_size=4, 499 label_angle=90, 500 ) 501 self.add_track_with_sigils( 502 label=True, 503 color="blue", 504 label_position="middle", 505 label_size=6, 506 label_angle=-90, 507 ) 508 self.add_track_with_sigils( 509 label=True, 510 color="cyan", 511 label_position="start", 512 label_size=6, 513 label_angle=-90, 514 ) 515 self.assertEqual(len(self.gdd.tracks), 5) 516 self.finish("GD_sigil_labels", circular=True) 517 518 def test_arrow_shafts(self): 519 """Feature arrow sigils, varying shafts.""" 520 self.add_track_with_sigils(sigil="ARROW") 521 self.add_track_with_sigils(sigil="ARROW", color="brown", arrowshaft_height=1.0) 522 self.add_track_with_sigils(sigil="ARROW", color="teal", arrowshaft_height=0.2) 523 self.add_track_with_sigils( 524 sigil="ARROW", color="darkgreen", arrowshaft_height=0.1 525 ) 526 self.assertEqual(len(self.gdd.tracks), 4) 527 self.finish("GD_sigil_arrow_shafts") 528 529 def test_big_arrow_shafts(self): 530 """Feature big-arrow sigils, varying shafts.""" 531 self.add_track_with_sigils(sigil="BIGARROW") 532 self.add_track_with_sigils( 533 sigil="BIGARROW", color="orange", arrowshaft_height=1.0 534 ) 535 self.add_track_with_sigils( 536 sigil="BIGARROW", color="teal", arrowshaft_height=0.2 537 ) 538 self.add_track_with_sigils( 539 sigil="BIGARROW", color="green", arrowshaft_height=0.1 540 ) 541 self.assertEqual(len(self.gdd.tracks), 4) 542 self.finish("GD_sigil_bigarrow_shafts") 543 544 def test_arrow_heads(self): 545 """Feature arrow sigils, varying heads.""" 546 self.add_track_with_sigils(sigil="ARROW") 547 self.add_track_with_sigils(sigil="ARROW", color="blue", arrowhead_length=0.25) 548 self.add_track_with_sigils(sigil="ARROW", color="orange", arrowhead_length=1) 549 self.add_track_with_sigils( 550 sigil="ARROW", color="red", arrowhead_length=10000 551 ) # Triangles 552 self.assertEqual(len(self.gdd.tracks), 4) 553 self.finish("GD_sigil_arrows") 554 555 def short_sigils(self, glyph): 556 """Draw sigils on top of grey box backgrounds.""" 557 # The blue boxes are only relevant for the BIGARROW 558 # Add a track of features, bigger height to emphasise any sigil errors 559 self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3) 560 # We'll just use one feature set for these features, 561 self.gds_features = self.gdt_features.new_set() 562 # For the ARROW and BIGARROW sigils: 563 # - Green arrows just have small heads (meaning if there is a mitre 564 # it will escape the bounding box). 565 # - Red arrows should be small triangles (so short no shaft shown) 566 567 # Forward strand: 568 feature = SeqFeature(FeatureLocation(15, 30), strand=-1) 569 self.gds_features.add_feature(feature, color="blue") 570 feature = SeqFeature(FeatureLocation(15, 30), strand=+1) 571 self.gds_features.add_feature(feature, color="grey") 572 self.gds_features.add_feature( 573 feature, name="Forward", sigil=glyph, arrowhead_length=0.05 574 ) 575 576 feature = SeqFeature(FeatureLocation(55, 60), strand=-1) 577 self.gds_features.add_feature(feature, color="blue") 578 feature = SeqFeature(FeatureLocation(55, 60), strand=+1) 579 self.gds_features.add_feature(feature, color="grey") 580 self.gds_features.add_feature( 581 feature, name="Forward", sigil=glyph, arrowhead_length=1000, color="red" 582 ) 583 584 feature = SeqFeature(FeatureLocation(75, 125), strand=-1) 585 self.gds_features.add_feature(feature, color="blue") 586 feature = SeqFeature(FeatureLocation(75, 125), strand=+1) 587 self.gds_features.add_feature(feature, color="grey") 588 self.gds_features.add_feature( 589 feature, name="Forward", sigil=glyph, arrowhead_length=0.05 590 ) 591 592 # Strandless: 593 feature = SeqFeature(FeatureLocation(140, 155), strand=None) 594 self.gds_features.add_feature(feature, color="grey") 595 self.gds_features.add_feature( 596 feature, name="Strandless", sigil=glyph, arrowhead_length=0.05 597 ) 598 599 feature = SeqFeature(FeatureLocation(180, 185), strand=None) 600 self.gds_features.add_feature(feature, color="grey") 601 self.gds_features.add_feature( 602 feature, name="Strandless", sigil=glyph, arrowhead_length=1000, color="red" 603 ) 604 605 feature = SeqFeature(FeatureLocation(200, 250), strand=None) 606 self.gds_features.add_feature(feature, color="grey") 607 self.gds_features.add_feature( 608 feature, name="Strandless", sigil=glyph, arrowhead_length=0.05 609 ) 610 611 # Reverse strand: 612 feature = SeqFeature(FeatureLocation(265, 280), strand=+1) 613 self.gds_features.add_feature(feature, color="blue") 614 feature = SeqFeature(FeatureLocation(265, 280), strand=-1) 615 self.gds_features.add_feature(feature, color="grey") 616 self.gds_features.add_feature( 617 feature, name="Reverse", sigil=glyph, arrowhead_length=0.05 618 ) 619 620 feature = SeqFeature(FeatureLocation(305, 310), strand=+1) 621 self.gds_features.add_feature(feature, color="blue") 622 feature = SeqFeature(FeatureLocation(305, 310), strand=-1) 623 self.gds_features.add_feature(feature, color="grey") 624 self.gds_features.add_feature( 625 feature, name="Reverse", sigil=glyph, arrowhead_length=1000, color="red" 626 ) 627 628 feature = SeqFeature(FeatureLocation(325, 375), strand=+1) 629 self.gds_features.add_feature(feature, color="blue") 630 feature = SeqFeature(FeatureLocation(325, 375), strand=-1) 631 self.gds_features.add_feature(feature, color="grey") 632 self.gds_features.add_feature( 633 feature, name="Reverse", sigil=glyph, arrowhead_length=0.05 634 ) 635 636 self.finish("GD_sigil_short_%s" % glyph) 637 638 def test_short_arrow(self): 639 """Feature arrow sigil heads within bounding box.""" 640 self.short_sigils("ARROW") 641 642 def test_short_bigarrow(self): 643 """Feature big-arrow sigil heads within bounding box.""" 644 self.short_sigils("BIGARROW") 645 646 def test_short_jaggy(self): 647 """Feature arrow sigil heads within bounding box.""" 648 self.short_sigils("JAGGY") 649 650 def test_short_octo(self): 651 """Feature big-arrow sigil heads within bounding box.""" 652 self.short_sigils("OCTO") 653 654 def long_sigils(self, glyph): 655 """Check feature sigils within bounding box.""" 656 # Add a track of features, bigger height to emphasise any sigil errors 657 self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3) 658 # We'll just use one feature set for these features if strand specific 659 self.gds_features = self.gdt_features.new_set() 660 if glyph in ["BIGARROW"]: 661 # These straddle the axis, so don't want to draw them on top of each other 662 feature = SeqFeature(FeatureLocation(25, 375), strand=None) 663 self.gds_features.add_feature(feature, color="lightblue") 664 feature = SeqFeature(FeatureLocation(25, 375), strand=+1) 665 else: 666 feature = SeqFeature(FeatureLocation(25, 375), strand=+1) 667 self.gds_features.add_feature(feature, color="lightblue") 668 self.gds_features.add_feature( 669 feature, name="Forward", sigil=glyph, color="blue", arrowhead_length=2.0 670 ) 671 672 if glyph in ["BIGARROW"]: 673 # These straddle the axis, so don't want to draw them on top of each other 674 self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3) 675 self.gds_features = self.gdt_features.new_set() 676 feature = SeqFeature(FeatureLocation(25, 375), strand=None) 677 self.gds_features.add_feature(feature, color="pink") 678 feature = SeqFeature(FeatureLocation(25, 375), strand=-1) 679 else: 680 feature = SeqFeature(FeatureLocation(25, 375), strand=-1) 681 self.gds_features.add_feature(feature, color="pink") 682 self.gds_features.add_feature( 683 feature, name="Reverse", sigil=glyph, color="red", arrowhead_length=2.0 684 ) 685 # Add another track of features, bigger height to emphasise any sigil errors 686 self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3) 687 # We'll just use one feature set for these features, 688 self.gds_features = self.gdt_features.new_set() 689 feature = SeqFeature(FeatureLocation(25, 375), strand=None) 690 self.gds_features.add_feature(feature, color="lightgreen") 691 self.gds_features.add_feature( 692 feature, name="Standless", sigil=glyph, color="green", arrowhead_length=2.0 693 ) 694 self.finish("GD_sigil_long_%s" % glyph) 695 696 def test_long_arrow_heads(self): 697 """Feature ARROW sigil heads within bounding box.""" 698 self.long_sigils("ARROW") 699 700 def test_long_bigarrow_heads(self): 701 """Feature BIGARROW sigil heads within bounding box.""" 702 self.long_sigils("BIGARROW") 703 704 def test_long_octo_heads(self): 705 """Feature OCTO sigil heads within bounding box.""" 706 self.long_sigils("OCTO") 707 708 def test_long_jaggy(self): 709 """Feature JAGGY sigil heads within bounding box.""" 710 self.long_sigils("JAGGY") 711 712 713class DiagramTest(unittest.TestCase): 714 """Creating feature sets, graph sets, tracks etc individually for the diagram.""" 715 716 def setUp(self): 717 """Test setup, just loads a GenBank file as a SeqRecord.""" 718 with open(os.path.join("GenBank", "NC_005816.gb")) as handle: 719 self.record = SeqIO.read(handle, "genbank") 720 721 self.gdd = Diagram("Test Diagram") 722 # Add a track of features, 723 self.gdd.new_track( 724 1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5 725 ) 726 727 def tearDown(self): 728 """Release the drawing objects.""" 729 del self.gdd 730 731 def test_str(self): 732 """Test diagram's info as string.""" 733 expected = ( 734 "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" 735 "\n1 tracks" 736 "\nTrack 1: " 737 "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" 738 "\n0 sets" 739 "\n" 740 ) 741 self.assertEqual(expected, str(self.gdd)) 742 743 def test_add_track(self): 744 """Add track.""" 745 track = Track(name="Annotated Features") 746 self.gdd.add_track(track, 2) 747 self.assertEqual(2, len(self.gdd.get_tracks())) 748 749 def test_add_track_to_occupied_level(self): 750 """Add track to occupied level.""" 751 new_track = self.gdd.get_tracks()[0] 752 self.gdd.add_track(new_track, 1) 753 self.assertEqual(2, len(self.gdd.get_tracks())) 754 755 def test_add_track_error(self): 756 """Test adding unspecified track.""" 757 self.assertRaises(ValueError, self.gdd.add_track, None, 1) 758 759 def test_del_tracks(self): 760 """Delete track.""" 761 self.gdd.del_track(1) 762 self.assertEqual(0, len(self.gdd.get_tracks())) 763 764 def test_get_tracks(self): 765 """Get track.""" 766 self.assertEqual(1, len(self.gdd.get_tracks())) 767 768 def test_move_track(self): 769 """Move a track.""" 770 self.gdd.move_track(1, 2) 771 expected = ( 772 "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" 773 "\n1 tracks" 774 "\nTrack 2: " 775 "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" 776 "\n0 sets" 777 "\n" 778 ) 779 self.assertEqual(expected, str(self.gdd)) 780 781 def test_renumber(self): 782 """Test renumbering tracks.""" 783 self.gdd.renumber_tracks(0) 784 expected = ( 785 "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" 786 "\n1 tracks" 787 "\nTrack 0: " 788 "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" 789 "\n0 sets" 790 "\n" 791 ) 792 self.assertEqual(expected, str(self.gdd)) 793 794 def test_write_arguments(self): 795 """Check how the write methods respond to output format arguments.""" 796 gdd = Diagram("Test Diagram") 797 gdd.drawing = None # Hack - need the ReportLab drawing object to be created. 798 filename = os.path.join("Graphics", "error.txt") 799 # We (now) allow valid formats in any case. 800 for output in ["XXX", "xxx", None, 123, 5.9]: 801 with self.assertRaises(ValueError): 802 gdd.write(filename, output) 803 with self.assertRaises(ValueError): 804 gdd.write_to_string(output) 805 806 def test_partial_diagram(self): 807 """Construct and draw SVG and PDF for just part of a SeqRecord.""" 808 genbank_entry = self.record 809 start = 6500 810 end = 8750 811 812 gdd = Diagram( 813 "Test Diagram", 814 # For the circular diagram we don't want a closed cirle: 815 circular=False, 816 ) 817 # Add a track of features, 818 gdt_features = gdd.new_track( 819 1, 820 greytrack=True, 821 name="CDS Features", 822 scale_largetick_interval=1000, 823 scale_smalltick_interval=100, 824 scale_format="SInt", 825 greytrack_labels=False, 826 height=0.5, 827 ) 828 # We'll just use one feature set for these features, 829 gds_features = gdt_features.new_set() 830 for feature in genbank_entry.features: 831 if feature.type != "CDS": 832 # We're going to ignore these. 833 continue 834 if feature.location.end.position < start: 835 # Out of frame (too far left) 836 continue 837 if feature.location.start.position > end: 838 # Out of frame (too far right) 839 continue 840 841 # This URL should work in SVG output from recent versions 842 # of ReportLab. You need ReportLab 2.4 or later 843 try: 844 url = ( 845 "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" 846 + "?db=protein&id=%s" % feature.qualifiers["protein_id"][0] 847 ) 848 except KeyError: 849 url = None 850 851 # Note that I am using strings for color names, instead 852 # of passing in color objects. This should also work! 853 if len(gds_features) % 2 == 0: 854 color = "white" # for testing the automatic black border! 855 else: 856 color = "red" 857 # Checking it can cope with the old UK spelling colour. 858 # Also show the labels perpendicular to the track. 859 gds_features.add_feature( 860 feature, 861 colour=color, 862 url=url, 863 sigil="ARROW", 864 label_position=None, 865 label_size=8, 866 label_angle=90, 867 label=True, 868 ) 869 870 # And draw it... 871 gdd.draw( 872 format="linear", 873 orientation="landscape", 874 tracklines=False, 875 pagesize=(10 * cm, 6 * cm), 876 fragments=1, 877 start=start, 878 end=end, 879 ) 880 output_filename = os.path.join("Graphics", "GD_region_linear.pdf") 881 gdd.write(output_filename, "PDF") 882 883 # Also check the write_to_string (bytes string) method matches, 884 with open(output_filename, "rb") as handle: 885 self.assertEqual(handle.read(), gdd.write_to_string("PDF")) 886 887 output_filename = os.path.join("Graphics", "GD_region_linear.svg") 888 gdd.write(output_filename, "SVG") 889 890 # Circular with a particular start/end is a bit odd, but by setting 891 # circular=False (above) a sweep of 90% is used (a wedge is left out) 892 gdd.draw( 893 format="circular", 894 tracklines=False, 895 pagesize=(10 * cm, 10 * cm), 896 start=start, 897 end=end, 898 ) 899 output_filename = os.path.join("Graphics", "GD_region_circular.pdf") 900 gdd.write(output_filename, "PDF") 901 output_filename = os.path.join("Graphics", "GD_region_circular.svg") 902 gdd.write(output_filename, "SVG") 903 904 def test_diagram_via_methods_pdf(self): 905 """Construct and draw PDF using method approach.""" 906 genbank_entry = self.record 907 gdd = Diagram("Test Diagram") 908 909 # Add a track of features, 910 gdt_features = gdd.new_track( 911 1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5 912 ) 913 # We'll just use one feature set for the genes and misc_features, 914 gds_features = gdt_features.new_set() 915 for feature in genbank_entry.features: 916 if feature.type == "gene": 917 if len(gds_features) % 2 == 0: 918 color = "blue" 919 else: 920 color = "lightblue" 921 gds_features.add_feature( 922 feature, 923 color=color, 924 # label_position="middle", 925 # label_position="end", 926 label_position="start", 927 label_size=11, 928 # label_angle=90, 929 sigil="ARROW", 930 label=True, 931 ) 932 933 # I want to include some strandless features, so for an example 934 # will use EcoRI recognition sites etc. 935 for site, name, color in [ 936 ("GAATTC", "EcoRI", "green"), 937 ("CCCGGG", "SmaI", "orange"), 938 ("AAGCTT", "HindIII", "red"), 939 ("GGATCC", "BamHI", "purple"), 940 ]: 941 index = 0 942 while True: 943 index = genbank_entry.seq.find(site, start=index) 944 if index == -1: 945 break 946 feature = SeqFeature(FeatureLocation(index, index + 6), strand=None) 947 948 # This URL should work in SVG output from recent versions 949 # of ReportLab. You need ReportLab 2.4 or later 950 try: 951 url = ( 952 "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" 953 + "?db=protein&id=%s" % feature.qualifiers["protein_id"][0] 954 ) 955 except KeyError: 956 url = None 957 958 gds_features.add_feature( 959 feature, 960 color=color, 961 url=url, 962 # label_position="middle", 963 label_size=10, 964 label_color=color, 965 # label_angle=90, 966 name=name, 967 label=True, 968 ) 969 index += len(site) 970 del index 971 972 # Now add a graph track... 973 gdt_at_gc = gdd.new_track( 974 2, greytrack=True, name="AT and GC content", greytrack_labels=True 975 ) 976 gds_at_gc = gdt_at_gc.new_set(type="graph") 977 978 step = len(genbank_entry) // 200 979 gds_at_gc.new_graph( 980 apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 981 "GC content", 982 style="line", 983 color=colors.lightgreen, 984 altcolor=colors.darkseagreen, 985 ) 986 gds_at_gc.new_graph( 987 apply_to_window(genbank_entry.seq, step, calc_at_content, step), 988 "AT content", 989 style="line", 990 color=colors.orange, 991 altcolor=colors.red, 992 ) 993 994 # Finally draw it in both formats, 995 gdd.draw( 996 format="linear", 997 orientation="landscape", 998 tracklines=0, 999 pagesize="A4", 1000 fragments=3, 1001 ) 1002 output_filename = os.path.join("Graphics", "GD_by_meth_linear.pdf") 1003 gdd.write(output_filename, "PDF") 1004 1005 gdd.draw( 1006 format="circular", 1007 tracklines=False, 1008 circle_core=0.8, 1009 pagesize=(20 * cm, 20 * cm), 1010 circular=True, 1011 ) 1012 output_filename = os.path.join("Graphics", "GD_by_meth_circular.pdf") 1013 gdd.write(output_filename, "PDF") 1014 1015 def test_diagram_via_object_pdf(self): 1016 """Construct and draw PDF using object approach.""" 1017 genbank_entry = self.record 1018 gdd = Diagram("Test Diagram") 1019 1020 gdt1 = Track( 1021 "CDS features", 1022 greytrack=True, 1023 scale_largetick_interval=1e4, 1024 scale_smalltick_interval=1e3, 1025 greytrack_labels=10, 1026 greytrack_font_color="red", 1027 scale_format="SInt", 1028 ) 1029 gdt2 = Track("gene features", greytrack=1, scale_largetick_interval=1e4) 1030 1031 # First add some feature sets: 1032 gdfsA = FeatureSet(name="CDS backgrounds") 1033 gdfsB = FeatureSet(name="gene background") 1034 1035 gdfs1 = FeatureSet(name="CDS features") 1036 gdfs2 = FeatureSet(name="gene features") 1037 gdfs3 = FeatureSet(name="misc_features") 1038 gdfs4 = FeatureSet(name="repeat regions") 1039 1040 prev_gene = None 1041 cds_count = 0 1042 for feature in genbank_entry.features: 1043 if feature.type == "CDS": 1044 cds_count += 1 1045 if prev_gene: 1046 # Assuming it goes with this CDS! 1047 if cds_count % 2 == 0: 1048 dark, light = colors.peru, colors.tan 1049 else: 1050 dark, light = colors.burlywood, colors.bisque 1051 # Background for CDS, 1052 a = gdfsA.add_feature( 1053 SeqFeature( 1054 FeatureLocation( 1055 feature.location.start, feature.location.end, strand=0 1056 ) 1057 ), 1058 color=dark, 1059 ) 1060 # Background for gene, 1061 b = gdfsB.add_feature( 1062 SeqFeature( 1063 FeatureLocation( 1064 prev_gene.location.start, 1065 prev_gene.location.end, 1066 strand=0, 1067 ) 1068 ), 1069 color=dark, 1070 ) 1071 # Cross link, 1072 gdd.cross_track_links.append(CrossLink(a, b, light, dark)) 1073 prev_gene = None 1074 if feature.type == "gene": 1075 prev_gene = feature 1076 1077 # Some cross links on the same linear diagram fragment, 1078 f, c = fill_and_border(colors.red) 1079 a = gdfsA.add_feature( 1080 SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c 1081 ) 1082 b = gdfsB.add_feature( 1083 SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c 1084 ) 1085 gdd.cross_track_links.append(CrossLink(a, b, f, c)) 1086 1087 f, c = fill_and_border(colors.blue) 1088 a = gdfsA.add_feature( 1089 SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c 1090 ) 1091 b = gdfsB.add_feature( 1092 SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c 1093 ) 1094 gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) 1095 1096 f, c = fill_and_border(colors.green) 1097 a = gdfsA.add_feature( 1098 SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c 1099 ) 1100 b = gdfsB.add_feature( 1101 SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c 1102 ) 1103 gdd.cross_track_links.append(CrossLink(a, b, f, c)) 1104 1105 # Some cross links where both parts are saddling the linear diagram fragment boundary, 1106 f, c = fill_and_border(colors.red) 1107 a = gdfsA.add_feature( 1108 SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c 1109 ) 1110 b = gdfsB.add_feature( 1111 SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c 1112 ) 1113 gdd.cross_track_links.append(CrossLink(a, b, f, c)) 1114 # Nestled within that (drawn on top), 1115 f, c = fill_and_border(colors.blue) 1116 a = gdfsA.add_feature( 1117 SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c 1118 ) 1119 b = gdfsB.add_feature( 1120 SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c 1121 ) 1122 gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) 1123 1124 # Some cross links where two features are on either side of the linear diagram fragment boundary, 1125 f, c = fill_and_border(colors.green) 1126 a = gdfsA.add_feature( 1127 SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c 1128 ) 1129 b = gdfsB.add_feature( 1130 SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c 1131 ) 1132 gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) 1133 f, c = fill_and_border(colors.gold) 1134 a = gdfsA.add_feature( 1135 SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c 1136 ) 1137 b = gdfsB.add_feature( 1138 SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c 1139 ) 1140 gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) 1141 f, c = fill_and_border(colors.red) 1142 a = gdfsA.add_feature( 1143 SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c 1144 ) 1145 b = gdfsB.add_feature( 1146 SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c 1147 ) 1148 gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) 1149 f, c = fill_and_border(colors.blue) 1150 a = gdfsA.add_feature( 1151 SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c 1152 ) 1153 b = gdfsB.add_feature( 1154 SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c 1155 ) 1156 gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) 1157 1158 cds_count = 0 1159 for feature in genbank_entry.features: 1160 if feature.type == "CDS": 1161 cds_count += 1 1162 if cds_count % 2 == 0: 1163 gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") 1164 else: 1165 gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") 1166 1167 if feature.type == "gene": 1168 # Note we set the colour of ALL the genes later on as a test, 1169 gdfs2.add_feature(feature, sigil="ARROW") 1170 1171 if feature.type == "misc_feature": 1172 gdfs3.add_feature(feature, color=colors.orange) 1173 1174 if feature.type == "repeat_region": 1175 gdfs4.add_feature(feature, color=colors.purple) 1176 1177 # gdd.cross_track_links = gdd.cross_track_links[:1] 1178 1179 gdfs1.set_all_features("label", 1) 1180 gdfs2.set_all_features("label", 1) 1181 gdfs3.set_all_features("label", 1) 1182 gdfs4.set_all_features("label", 1) 1183 1184 gdfs3.set_all_features("hide", 0) 1185 gdfs4.set_all_features("hide", 0) 1186 1187 # gdfs1.set_all_features('color', colors.red) 1188 gdfs2.set_all_features("color", colors.blue) 1189 1190 gdt1.add_set(gdfsA) # Before CDS so under them! 1191 gdt1.add_set(gdfs1) 1192 1193 gdt2.add_set(gdfsB) # Before genes so under them! 1194 gdt2.add_set(gdfs2) 1195 1196 gdt3 = Track( 1197 "misc features and repeats", greytrack=1, scale_largetick_interval=1e4 1198 ) 1199 gdt3.add_set(gdfs3) 1200 gdt3.add_set(gdfs4) 1201 1202 # Now add some graph sets: 1203 1204 # Use a fairly large step so we can easily tell the difference 1205 # between the bar and line graphs. 1206 step = len(genbank_entry) // 200 1207 gdgs1 = GraphSet("GC skew") 1208 1209 graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) 1210 gdgs1.new_graph( 1211 graphdata1, 1212 "GC Skew", 1213 style="bar", 1214 color=colors.violet, 1215 altcolor=colors.purple, 1216 ) 1217 1218 gdt4 = Track( 1219 "GC Skew (bar)", height=1.94, greytrack=1, scale_largetick_interval=1e4 1220 ) 1221 gdt4.add_set(gdgs1) 1222 1223 gdgs2 = GraphSet("GC and AT Content") 1224 gdgs2.new_graph( 1225 apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 1226 "GC content", 1227 style="line", 1228 color=colors.lightgreen, 1229 altcolor=colors.darkseagreen, 1230 ) 1231 1232 gdgs2.new_graph( 1233 apply_to_window(genbank_entry.seq, step, calc_at_content, step), 1234 "AT content", 1235 style="line", 1236 color=colors.orange, 1237 altcolor=colors.red, 1238 ) 1239 1240 gdt5 = Track( 1241 "GC Content(green line), AT Content(red line)", 1242 height=1.94, 1243 greytrack=1, 1244 scale_largetick_interval=1e4, 1245 ) 1246 gdt5.add_set(gdgs2) 1247 1248 gdgs3 = GraphSet("Di-nucleotide count") 1249 step = len(genbank_entry) // 400 # smaller step 1250 gdgs3.new_graph( 1251 apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 1252 "Di-nucleotide count", 1253 style="heat", 1254 color=colors.red, 1255 altcolor=colors.orange, 1256 ) 1257 gdt6 = Track("Di-nucleotide count", height=0.5, greytrack=False, scale=False) 1258 gdt6.add_set(gdgs3) 1259 1260 # Add the tracks (from both features and graphs) 1261 # Leave some white space in the middle/bottom 1262 gdd.add_track(gdt4, 3) # GC skew 1263 gdd.add_track(gdt5, 4) # GC and AT content 1264 gdd.add_track(gdt1, 5) # CDS features 1265 gdd.add_track(gdt2, 6) # Gene features 1266 gdd.add_track(gdt3, 7) # Misc features and repeat feature 1267 gdd.add_track(gdt6, 8) # Feature depth 1268 1269 # Finally draw it in both formats, and full view and partial 1270 gdd.draw( 1271 format="circular", orientation="landscape", tracklines=0, pagesize="A0" 1272 ) 1273 output_filename = os.path.join("Graphics", "GD_by_obj_circular.pdf") 1274 gdd.write(output_filename, "PDF") 1275 1276 gdd.circular = False 1277 gdd.draw( 1278 format="circular", 1279 orientation="landscape", 1280 tracklines=0, 1281 pagesize="A0", 1282 start=3000, 1283 end=6300, 1284 ) 1285 output_filename = os.path.join("Graphics", "GD_by_obj_frag_circular.pdf") 1286 gdd.write(output_filename, "PDF") 1287 1288 gdd.draw( 1289 format="linear", 1290 orientation="landscape", 1291 tracklines=0, 1292 pagesize="A0", 1293 fragments=3, 1294 ) 1295 output_filename = os.path.join("Graphics", "GD_by_obj_linear.pdf") 1296 gdd.write(output_filename, "PDF") 1297 1298 gdd.set_all_tracks("greytrack_labels", 2) 1299 gdd.draw( 1300 format="linear", 1301 orientation="landscape", 1302 tracklines=0, 1303 pagesize=(30 * cm, 10 * cm), 1304 fragments=1, 1305 start=3000, 1306 end=6300, 1307 ) 1308 output_filename = os.path.join("Graphics", "GD_by_obj_frag_linear.pdf") 1309 gdd.write(output_filename, "PDF") 1310 1311 1312if __name__ == "__main__": 1313 runner = unittest.TextTestRunner(verbosity=2) 1314 unittest.main(testRunner=runner) 1315