1# This code is part of the Biopython distribution and governed by its 2# license. Please see the LICENSE file that should have been included 3# as part of this package. 4 5 6"""Tests for the PairwiseAligner in Bio.Align.""" 7 8import array 9import os 10import unittest 11 12from Bio import Align, SeqIO 13from Bio.Seq import Seq, reverse_complement 14from Bio.SeqUtils import GC 15 16 17class TestAlignerProperties(unittest.TestCase): 18 def test_aligner_property_epsilon(self): 19 aligner = Align.PairwiseAligner() 20 self.assertAlmostEqual(aligner.epsilon, 1.0e-6) 21 aligner.epsilon = 1.0e-4 22 self.assertAlmostEqual(aligner.epsilon, 1.0e-4) 23 aligner.epsilon = 1.0e-8 24 self.assertAlmostEqual(aligner.epsilon, 1.0e-8) 25 with self.assertRaises(TypeError): 26 aligner.epsilon = "not a number" 27 with self.assertRaises(TypeError): 28 aligner.epsilon = None 29 30 def test_aligner_property_mode(self): 31 aligner = Align.PairwiseAligner() 32 aligner.mode = "global" 33 self.assertEqual(aligner.mode, "global") 34 aligner.mode = "local" 35 self.assertEqual(aligner.mode, "local") 36 with self.assertRaises(ValueError): 37 aligner.mode = "wrong" 38 39 def test_aligner_property_match_mismatch(self): 40 aligner = Align.PairwiseAligner() 41 aligner.match_score = 3.0 42 self.assertAlmostEqual(aligner.match_score, 3.0) 43 aligner.mismatch_score = -2.0 44 self.assertAlmostEqual(aligner.mismatch_score, -2.0) 45 with self.assertRaises(ValueError): 46 aligner.match_score = "not a number" 47 with self.assertRaises(ValueError): 48 aligner.mismatch_score = "not a number" 49 self.assertEqual( 50 str(aligner), 51 """\ 52Pairwise sequence aligner with parameters 53 wildcard: None 54 match_score: 3.000000 55 mismatch_score: -2.000000 56 target_internal_open_gap_score: 0.000000 57 target_internal_extend_gap_score: 0.000000 58 target_left_open_gap_score: 0.000000 59 target_left_extend_gap_score: 0.000000 60 target_right_open_gap_score: 0.000000 61 target_right_extend_gap_score: 0.000000 62 query_internal_open_gap_score: 0.000000 63 query_internal_extend_gap_score: 0.000000 64 query_left_open_gap_score: 0.000000 65 query_left_extend_gap_score: 0.000000 66 query_right_open_gap_score: 0.000000 67 query_right_extend_gap_score: 0.000000 68 mode: global 69""", 70 ) 71 72 def test_aligner_property_gapscores(self): 73 aligner = Align.PairwiseAligner() 74 open_score, extend_score = (-5, -1) 75 aligner.target_open_gap_score = open_score 76 aligner.target_extend_gap_score = extend_score 77 self.assertAlmostEqual(aligner.target_open_gap_score, open_score) 78 self.assertAlmostEqual(aligner.target_extend_gap_score, extend_score) 79 open_score, extend_score = (-6, -7) 80 aligner.query_open_gap_score = open_score 81 aligner.query_extend_gap_score = extend_score 82 self.assertAlmostEqual(aligner.query_open_gap_score, open_score) 83 self.assertAlmostEqual(aligner.query_extend_gap_score, extend_score) 84 open_score, extend_score = (-3, -9) 85 aligner.target_end_open_gap_score = open_score 86 aligner.target_end_extend_gap_score = extend_score 87 self.assertAlmostEqual(aligner.target_end_open_gap_score, open_score) 88 self.assertAlmostEqual(aligner.target_end_extend_gap_score, extend_score) 89 open_score, extend_score = (-1, -2) 90 aligner.query_end_open_gap_score = open_score 91 aligner.query_end_extend_gap_score = extend_score 92 self.assertEqual( 93 str(aligner), 94 """\ 95Pairwise sequence aligner with parameters 96 wildcard: None 97 match_score: 1.000000 98 mismatch_score: 0.000000 99 target_internal_open_gap_score: -5.000000 100 target_internal_extend_gap_score: -1.000000 101 target_left_open_gap_score: -3.000000 102 target_left_extend_gap_score: -9.000000 103 target_right_open_gap_score: -3.000000 104 target_right_extend_gap_score: -9.000000 105 query_internal_open_gap_score: -6.000000 106 query_internal_extend_gap_score: -7.000000 107 query_left_open_gap_score: -1.000000 108 query_left_extend_gap_score: -2.000000 109 query_right_open_gap_score: -1.000000 110 query_right_extend_gap_score: -2.000000 111 mode: global 112""", 113 ) 114 self.assertAlmostEqual(aligner.query_end_open_gap_score, open_score) 115 self.assertAlmostEqual(aligner.query_end_extend_gap_score, extend_score) 116 score = -3 117 aligner.target_gap_score = score 118 self.assertAlmostEqual(aligner.target_gap_score, score) 119 self.assertAlmostEqual(aligner.target_open_gap_score, score) 120 self.assertAlmostEqual(aligner.target_extend_gap_score, score) 121 score = -2 122 aligner.query_gap_score = score 123 self.assertAlmostEqual(aligner.query_gap_score, score) 124 self.assertAlmostEqual(aligner.query_open_gap_score, score) 125 self.assertAlmostEqual(aligner.query_extend_gap_score, score) 126 score = -4 127 aligner.target_end_gap_score = score 128 self.assertAlmostEqual(aligner.target_end_gap_score, score) 129 self.assertAlmostEqual(aligner.target_end_open_gap_score, score) 130 self.assertAlmostEqual(aligner.target_end_extend_gap_score, score) 131 self.assertAlmostEqual(aligner.target_left_gap_score, score) 132 self.assertAlmostEqual(aligner.target_left_open_gap_score, score) 133 self.assertAlmostEqual(aligner.target_left_extend_gap_score, score) 134 self.assertAlmostEqual(aligner.target_right_gap_score, score) 135 self.assertAlmostEqual(aligner.target_right_open_gap_score, score) 136 self.assertAlmostEqual(aligner.target_right_extend_gap_score, score) 137 score = -5 138 aligner.query_end_gap_score = score 139 self.assertAlmostEqual(aligner.query_end_gap_score, score) 140 self.assertAlmostEqual(aligner.query_end_open_gap_score, score) 141 self.assertAlmostEqual(aligner.query_end_extend_gap_score, score) 142 self.assertAlmostEqual(aligner.query_left_gap_score, score) 143 self.assertAlmostEqual(aligner.query_left_open_gap_score, score) 144 self.assertAlmostEqual(aligner.query_left_extend_gap_score, score) 145 self.assertAlmostEqual(aligner.query_right_gap_score, score) 146 self.assertAlmostEqual(aligner.query_right_open_gap_score, score) 147 self.assertAlmostEqual(aligner.query_right_extend_gap_score, score) 148 self.assertEqual( 149 str(aligner), 150 """\ 151Pairwise sequence aligner with parameters 152 wildcard: None 153 match_score: 1.000000 154 mismatch_score: 0.000000 155 target_internal_open_gap_score: -3.000000 156 target_internal_extend_gap_score: -3.000000 157 target_left_open_gap_score: -4.000000 158 target_left_extend_gap_score: -4.000000 159 target_right_open_gap_score: -4.000000 160 target_right_extend_gap_score: -4.000000 161 query_internal_open_gap_score: -2.000000 162 query_internal_extend_gap_score: -2.000000 163 query_left_open_gap_score: -5.000000 164 query_left_extend_gap_score: -5.000000 165 query_right_open_gap_score: -5.000000 166 query_right_extend_gap_score: -5.000000 167 mode: global 168""", 169 ) 170 with self.assertRaises(ValueError): 171 aligner.target_gap_score = "wrong" 172 with self.assertRaises(ValueError): 173 aligner.query_gap_score = "wrong" 174 with self.assertRaises(TypeError): 175 aligner.target_end_gap_score = "wrong" 176 with self.assertRaises(TypeError): 177 aligner.query_end_gap_score = "wrong" 178 179 def test_aligner_nonexisting_property(self): 180 aligner = Align.PairwiseAligner() 181 with self.assertRaises(AttributeError) as cm: 182 aligner.no_such_property 183 self.assertEqual( 184 str(cm.exception), 185 "'PairwiseAligner' object has no attribute 'no_such_property'", 186 ) 187 with self.assertRaises(AttributeError) as cm: 188 aligner.no_such_property = 1 189 self.assertEqual( 190 str(cm.exception), 191 "'PairwiseAligner' object has no attribute 'no_such_property'", 192 ) 193 194 195class TestPairwiseGlobal(unittest.TestCase): 196 def test_needlemanwunsch_simple1(self): 197 seq1 = "GAACT" 198 seq2 = "GAT" 199 aligner = Align.PairwiseAligner() 200 aligner.mode = "global" 201 self.assertEqual( 202 str(aligner), 203 """\ 204Pairwise sequence aligner with parameters 205 wildcard: None 206 match_score: 1.000000 207 mismatch_score: 0.000000 208 target_internal_open_gap_score: 0.000000 209 target_internal_extend_gap_score: 0.000000 210 target_left_open_gap_score: 0.000000 211 target_left_extend_gap_score: 0.000000 212 target_right_open_gap_score: 0.000000 213 target_right_extend_gap_score: 0.000000 214 query_internal_open_gap_score: 0.000000 215 query_internal_extend_gap_score: 0.000000 216 query_left_open_gap_score: 0.000000 217 query_left_extend_gap_score: 0.000000 218 query_right_open_gap_score: 0.000000 219 query_right_extend_gap_score: 0.000000 220 mode: global 221""", 222 ) 223 self.assertEqual(aligner.algorithm, "Needleman-Wunsch") 224 score = aligner.score(seq1, seq2) 225 self.assertAlmostEqual(score, 3.0) 226 score = aligner.score(seq1, reverse_complement(seq2), "-") 227 self.assertAlmostEqual(score, 3.0) 228 alignments = aligner.align(seq1, seq2) 229 self.assertEqual(len(alignments), 2) 230 alignment = alignments[0] 231 self.assertAlmostEqual(alignment.score, 3.0) 232 self.assertEqual( 233 str(alignment), 234 """\ 235GAACT 236||--| 237GA--T 238""", 239 ) 240 self.assertEqual(alignment.shape, (2, 5)) 241 self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((0, 2), (2, 3)))) 242 alignment = alignments[1] 243 self.assertAlmostEqual(alignment.score, 3.0) 244 self.assertEqual( 245 str(alignment), 246 """\ 247GAACT 248|-|-| 249G-A-T 250""", 251 ) 252 self.assertEqual(alignment.shape, (2, 5)) 253 self.assertEqual( 254 alignment.aligned, (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3))) 255 ) 256 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 257 self.assertEqual(len(alignments), 2) 258 alignment = alignments[0] 259 self.assertAlmostEqual(alignment.score, 3.0) 260 self.assertEqual( 261 str(alignment), 262 """\ 263GAACT 264||--| 265GA--T 266""", 267 ) 268 self.assertEqual(alignment.shape, (2, 5)) 269 self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((3, 1), (1, 0)))) 270 alignment = alignments[1] 271 self.assertAlmostEqual(alignment.score, 3.0) 272 self.assertEqual( 273 str(alignment), 274 """\ 275GAACT 276|-|-| 277G-A-T 278""", 279 ) 280 self.assertEqual(alignment.shape, (2, 5)) 281 self.assertEqual( 282 alignment.aligned, (((0, 1), (2, 3), (4, 5)), ((3, 2), (2, 1), (1, 0))) 283 ) 284 285 def test_align_affine1_score(self): 286 seq1 = "CC" 287 seq2 = "ACCT" 288 aligner = Align.PairwiseAligner() 289 aligner.mode = "global" 290 aligner.match_score = 0 291 aligner.mismatch_score = -1 292 aligner.open_gap_score = -5 293 aligner.extend_gap_score = -1 294 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 295 self.assertEqual( 296 str(aligner), 297 """\ 298Pairwise sequence aligner with parameters 299 wildcard: None 300 match_score: 0.000000 301 mismatch_score: -1.000000 302 target_internal_open_gap_score: -5.000000 303 target_internal_extend_gap_score: -1.000000 304 target_left_open_gap_score: -5.000000 305 target_left_extend_gap_score: -1.000000 306 target_right_open_gap_score: -5.000000 307 target_right_extend_gap_score: -1.000000 308 query_internal_open_gap_score: -5.000000 309 query_internal_extend_gap_score: -1.000000 310 query_left_open_gap_score: -5.000000 311 query_left_extend_gap_score: -1.000000 312 query_right_open_gap_score: -5.000000 313 query_right_extend_gap_score: -1.000000 314 mode: global 315""", 316 ) 317 score = aligner.score(seq1, seq2) 318 self.assertAlmostEqual(score, -7.0) 319 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 320 self.assertAlmostEqual(score, -7.0) 321 322 323class TestPairwiseLocal(unittest.TestCase): 324 def test_smithwaterman(self): 325 aligner = Align.PairwiseAligner() 326 aligner.mode = "local" 327 aligner.gap_score = -0.1 328 self.assertEqual(aligner.algorithm, "Smith-Waterman") 329 self.assertEqual( 330 str(aligner), 331 """\ 332Pairwise sequence aligner with parameters 333 wildcard: None 334 match_score: 1.000000 335 mismatch_score: 0.000000 336 target_internal_open_gap_score: -0.100000 337 target_internal_extend_gap_score: -0.100000 338 target_left_open_gap_score: -0.100000 339 target_left_extend_gap_score: -0.100000 340 target_right_open_gap_score: -0.100000 341 target_right_extend_gap_score: -0.100000 342 query_internal_open_gap_score: -0.100000 343 query_internal_extend_gap_score: -0.100000 344 query_left_open_gap_score: -0.100000 345 query_left_extend_gap_score: -0.100000 346 query_right_open_gap_score: -0.100000 347 query_right_extend_gap_score: -0.100000 348 mode: local 349""", 350 ) 351 score = aligner.score("AwBw", "zABz") 352 self.assertAlmostEqual(score, 1.9) 353 alignments = aligner.align("AwBw", "zABz") 354 self.assertEqual(len(alignments), 1) 355 alignment = alignments[0] 356 self.assertAlmostEqual(alignment.score, 1.9) 357 self.assertEqual( 358 str(alignment), 359 """\ 360 AwBw 361 |-| 362zA-Bz 363""", 364 ) 365 self.assertEqual(alignment.shape, (2, 3)) 366 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3)))) 367 368 def test_gotoh_local(self): 369 aligner = Align.PairwiseAligner() 370 aligner.mode = "local" 371 aligner.open_gap_score = -0.1 372 aligner.extend_gap_score = 0.0 373 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 374 self.assertEqual( 375 str(aligner), 376 """\ 377Pairwise sequence aligner with parameters 378 wildcard: None 379 match_score: 1.000000 380 mismatch_score: 0.000000 381 target_internal_open_gap_score: -0.100000 382 target_internal_extend_gap_score: 0.000000 383 target_left_open_gap_score: -0.100000 384 target_left_extend_gap_score: 0.000000 385 target_right_open_gap_score: -0.100000 386 target_right_extend_gap_score: 0.000000 387 query_internal_open_gap_score: -0.100000 388 query_internal_extend_gap_score: 0.000000 389 query_left_open_gap_score: -0.100000 390 query_left_extend_gap_score: 0.000000 391 query_right_open_gap_score: -0.100000 392 query_right_extend_gap_score: 0.000000 393 mode: local 394""", 395 ) 396 score = aligner.score("AwBw", "zABz") 397 self.assertAlmostEqual(score, 1.9) 398 alignments = aligner.align("AwBw", "zABz") 399 self.assertEqual(len(alignments), 1) 400 alignment = alignments[0] 401 self.assertAlmostEqual(alignment.score, 1.9) 402 self.assertEqual( 403 str(alignment), 404 """\ 405 AwBw 406 |-| 407zA-Bz 408""", 409 ) 410 self.assertEqual(alignment.shape, (2, 3)) 411 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3)))) 412 413 414class TestUnknownCharacter(unittest.TestCase): 415 def test_needlemanwunsch_simple1(self): 416 seq1 = "GACT" 417 seq2 = "GA?T" 418 aligner = Align.PairwiseAligner() 419 aligner.mode = "global" 420 aligner.gap_score = -1.0 421 aligner.mismatch_score = -1.0 422 aligner.wildcard = "?" 423 score = aligner.score(seq1, seq2) 424 self.assertAlmostEqual(score, 3.0) 425 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 426 self.assertAlmostEqual(score, 3.0) 427 alignments = aligner.align(seq1, seq2) 428 self.assertEqual(len(alignments), 1) 429 alignment = alignments[0] 430 self.assertAlmostEqual(alignment.score, 3.0) 431 self.assertEqual( 432 str(alignment), 433 """\ 434GACT 435||.| 436GA?T 437""", 438 ) 439 self.assertEqual(alignment.shape, (2, 4)) 440 self.assertEqual(alignment.aligned, (((0, 4),), ((0, 4),))) 441 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 442 self.assertEqual(len(alignments), 1) 443 alignment = alignments[0] 444 self.assertAlmostEqual(alignment.score, 3.0) 445 self.assertEqual( 446 str(alignment), 447 """\ 448GACT 449||.| 450GA?T 451""", 452 ) 453 self.assertEqual(alignment.shape, (2, 4)) 454 self.assertEqual(alignment.aligned, (((0, 4),), ((4, 0),))) 455 seq2 = "GAXT" 456 aligner.wildcard = "X" 457 score = aligner.score(seq1, seq2) 458 self.assertAlmostEqual(score, 3.0) 459 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 460 self.assertAlmostEqual(score, 3.0) 461 alignments = aligner.align(seq1, seq2) 462 self.assertEqual(len(alignments), 1) 463 alignment = alignments[0] 464 self.assertAlmostEqual(alignment.score, 3.0) 465 self.assertEqual( 466 str(alignment), 467 """\ 468GACT 469||.| 470GAXT 471""", 472 ) 473 self.assertEqual(alignment.shape, (2, 4)) 474 self.assertEqual(alignment.aligned, (((0, 4),), ((0, 4),))) 475 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 476 self.assertEqual(len(alignments), 1) 477 alignment = alignments[0] 478 self.assertAlmostEqual(alignment.score, 3.0) 479 self.assertEqual( 480 str(alignment), 481 """\ 482GACT 483||.| 484GAXT 485""", 486 ) 487 self.assertEqual(alignment.shape, (2, 4)) 488 self.assertEqual(alignment.aligned, (((0, 4),), ((4, 0),))) 489 aligner.wildcard = None 490 score = aligner.score(seq1, seq2) 491 self.assertAlmostEqual(score, 2.0) 492 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 493 self.assertAlmostEqual(score, 2.0) 494 alignments = aligner.align(seq1, seq2) 495 self.assertEqual(len(alignments), 1) 496 alignment = alignments[0] 497 self.assertAlmostEqual(alignment.score, 2.0) 498 self.assertEqual( 499 str(alignment), 500 """\ 501GACT 502||.| 503GAXT 504""", 505 ) 506 self.assertEqual(alignment.shape, (2, 4)) 507 self.assertEqual(alignment.aligned, (((0, 4),), ((0, 4),))) 508 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 509 self.assertEqual(len(alignments), 1) 510 alignment = alignments[0] 511 self.assertAlmostEqual(alignment.score, 2.0) 512 self.assertEqual( 513 str(alignment), 514 """\ 515GACT 516||.| 517GAXT 518""", 519 ) 520 self.assertEqual(alignment.shape, (2, 4)) 521 self.assertEqual(alignment.aligned, (((0, 4),), ((4, 0),))) 522 523 def test_needlemanwunsch_simple2(self): 524 seq1 = "GA?AT" 525 seq2 = "GAA?T" 526 aligner = Align.PairwiseAligner() 527 aligner.mode = "global" 528 aligner.wildcard = "?" 529 score = aligner.score(seq1, seq2) 530 self.assertAlmostEqual(score, 4.0) 531 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 532 self.assertAlmostEqual(score, 4.0) 533 alignments = aligner.align(seq1, seq2) 534 self.assertEqual(len(alignments), 1) 535 alignment = alignments[0] 536 self.assertAlmostEqual(alignment.score, 4.0) 537 self.assertEqual( 538 str(alignment), 539 """\ 540GA?A-T 541||-|-| 542GA-A?T 543""", 544 ) 545 self.assertEqual(alignment.shape, (2, 6)) 546 self.assertEqual( 547 alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((0, 2), (2, 3), (4, 5))) 548 ) 549 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 550 self.assertEqual(len(alignments), 1) 551 alignment = alignments[0] 552 self.assertAlmostEqual(alignment.score, 4.0) 553 self.assertEqual( 554 str(alignment), 555 """\ 556GA?A-T 557||-|-| 558GA-A?T 559""", 560 ) 561 self.assertEqual(alignment.shape, (2, 6)) 562 self.assertEqual( 563 alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((5, 3), (3, 2), (1, 0))) 564 ) 565 seq1 = "GAXAT" 566 seq2 = "GAAXT" 567 aligner.wildcard = "X" 568 score = aligner.score(seq1, seq2) 569 self.assertAlmostEqual(score, 4.0) 570 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 571 self.assertAlmostEqual(score, 4.0) 572 alignments = aligner.align(seq1, seq2) 573 self.assertEqual(len(alignments), 1) 574 alignment = alignments[0] 575 self.assertAlmostEqual(alignment.score, 4.0) 576 self.assertEqual( 577 str(alignment), 578 """\ 579GAXA-T 580||-|-| 581GA-AXT 582""", 583 ) 584 self.assertEqual(alignment.shape, (2, 6)) 585 self.assertEqual( 586 alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((0, 2), (2, 3), (4, 5))) 587 ) 588 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 589 self.assertEqual(len(alignments), 1) 590 alignment = alignments[0] 591 self.assertAlmostEqual(alignment.score, 4.0) 592 self.assertEqual( 593 str(alignment), 594 """\ 595GAXA-T 596||-|-| 597GA-AXT 598""", 599 ) 600 self.assertEqual(alignment.shape, (2, 6)) 601 self.assertEqual( 602 alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((5, 3), (3, 2), (1, 0))) 603 ) 604 605 606class TestPairwiseOpenPenalty(unittest.TestCase): 607 def test_match_score_open_penalty1(self): 608 aligner = Align.PairwiseAligner() 609 aligner.mode = "global" 610 aligner.match_score = 2 611 aligner.mismatch_score = -1 612 aligner.open_gap_score = -0.1 613 aligner.extend_gap_score = 0.0 614 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 615 self.assertEqual( 616 str(aligner), 617 """\ 618Pairwise sequence aligner with parameters 619 wildcard: None 620 match_score: 2.000000 621 mismatch_score: -1.000000 622 target_internal_open_gap_score: -0.100000 623 target_internal_extend_gap_score: 0.000000 624 target_left_open_gap_score: -0.100000 625 target_left_extend_gap_score: 0.000000 626 target_right_open_gap_score: -0.100000 627 target_right_extend_gap_score: 0.000000 628 query_internal_open_gap_score: -0.100000 629 query_internal_extend_gap_score: 0.000000 630 query_left_open_gap_score: -0.100000 631 query_left_extend_gap_score: 0.000000 632 query_right_open_gap_score: -0.100000 633 query_right_extend_gap_score: 0.000000 634 mode: global 635""", 636 ) 637 seq1 = "AA" 638 seq2 = "A" 639 score = aligner.score(seq1, seq2) 640 self.assertAlmostEqual(score, 1.9) 641 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 642 self.assertAlmostEqual(score, 1.9) 643 alignments = aligner.align(seq1, seq2) 644 self.assertEqual(len(alignments), 2) 645 alignment = alignments[0] 646 self.assertAlmostEqual(alignment.score, 1.9) 647 self.assertEqual( 648 str(alignment), 649 """\ 650AA 651-| 652-A 653""", 654 ) 655 self.assertEqual(alignment.shape, (2, 2)) 656 self.assertEqual(alignment.aligned, (((1, 2),), ((0, 1),))) 657 alignment = alignments[1] 658 self.assertAlmostEqual(alignment.score, 1.9) 659 self.assertEqual( 660 str(alignment), 661 """\ 662AA 663|- 664A- 665""", 666 ) 667 self.assertEqual(alignment.shape, (2, 2)) 668 self.assertEqual(alignment.aligned, (((0, 1),), ((0, 1),))) 669 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 670 self.assertEqual(len(alignments), 2) 671 alignment = alignments[0] 672 self.assertAlmostEqual(alignment.score, 1.9) 673 self.assertEqual( 674 str(alignment), 675 """\ 676AA 677-| 678-A 679""", 680 ) 681 self.assertEqual(alignment.shape, (2, 2)) 682 self.assertEqual(alignment.aligned, (((1, 2),), ((1, 0),))) 683 alignment = alignments[1] 684 self.assertAlmostEqual(alignment.score, 1.9) 685 self.assertEqual( 686 str(alignment), 687 """\ 688AA 689|- 690A- 691""", 692 ) 693 self.assertEqual(alignment.shape, (2, 2)) 694 self.assertEqual(alignment.aligned, (((0, 1),), ((1, 0),))) 695 696 def test_match_score_open_penalty2(self): 697 aligner = Align.PairwiseAligner() 698 aligner.mode = "global" 699 aligner.match_score = 1.5 700 aligner.mismatch_score = 0.0 701 aligner.open_gap_score = -0.1 702 aligner.extend_gap_score = 0.0 703 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 704 self.assertEqual( 705 str(aligner), 706 """\ 707Pairwise sequence aligner with parameters 708 wildcard: None 709 match_score: 1.500000 710 mismatch_score: 0.000000 711 target_internal_open_gap_score: -0.100000 712 target_internal_extend_gap_score: 0.000000 713 target_left_open_gap_score: -0.100000 714 target_left_extend_gap_score: 0.000000 715 target_right_open_gap_score: -0.100000 716 target_right_extend_gap_score: 0.000000 717 query_internal_open_gap_score: -0.100000 718 query_internal_extend_gap_score: 0.000000 719 query_left_open_gap_score: -0.100000 720 query_left_extend_gap_score: 0.000000 721 query_right_open_gap_score: -0.100000 722 query_right_extend_gap_score: 0.000000 723 mode: global 724""", 725 ) 726 seq1 = "GAA" 727 seq2 = "GA" 728 score = aligner.score(seq1, seq2) 729 self.assertAlmostEqual(score, 2.9) 730 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 731 self.assertAlmostEqual(score, 2.9) 732 alignments = aligner.align(seq1, seq2) 733 self.assertEqual(len(alignments), 2) 734 alignment = alignments[0] 735 self.assertAlmostEqual(alignment.score, 2.9) 736 self.assertEqual( 737 str(alignment), 738 """\ 739GAA 740|-| 741G-A 742""", 743 ) 744 self.assertEqual(alignment.shape, (2, 3)) 745 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (1, 2)))) 746 alignment = alignments[1] 747 self.assertAlmostEqual(alignment.score, 2.9) 748 self.assertEqual( 749 str(alignment), 750 """\ 751GAA 752||- 753GA- 754""", 755 ) 756 self.assertEqual(alignment.shape, (2, 3)) 757 self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),))) 758 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 759 self.assertEqual(len(alignments), 2) 760 alignment = alignments[0] 761 self.assertAlmostEqual(alignment.score, 2.9) 762 self.assertEqual( 763 str(alignment), 764 """\ 765GAA 766|-| 767G-A 768""", 769 ) 770 self.assertEqual(alignment.shape, (2, 3)) 771 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((2, 1), (1, 0)))) 772 alignment = alignments[1] 773 self.assertAlmostEqual(alignment.score, 2.9) 774 self.assertEqual( 775 str(alignment), 776 """\ 777GAA 778||- 779GA- 780""", 781 ) 782 self.assertEqual(alignment.shape, (2, 3)) 783 self.assertEqual(alignment.aligned, (((0, 2),), ((2, 0),))) 784 785 def test_match_score_open_penalty3(self): 786 aligner = Align.PairwiseAligner() 787 aligner.mode = "global" 788 aligner.query_open_gap_score = -0.1 789 aligner.query_extend_gap_score = 0.0 790 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 791 self.assertEqual( 792 str(aligner), 793 """\ 794Pairwise sequence aligner with parameters 795 wildcard: None 796 match_score: 1.000000 797 mismatch_score: 0.000000 798 target_internal_open_gap_score: 0.000000 799 target_internal_extend_gap_score: 0.000000 800 target_left_open_gap_score: 0.000000 801 target_left_extend_gap_score: 0.000000 802 target_right_open_gap_score: 0.000000 803 target_right_extend_gap_score: 0.000000 804 query_internal_open_gap_score: -0.100000 805 query_internal_extend_gap_score: 0.000000 806 query_left_open_gap_score: -0.100000 807 query_left_extend_gap_score: 0.000000 808 query_right_open_gap_score: -0.100000 809 query_right_extend_gap_score: 0.000000 810 mode: global 811""", 812 ) 813 seq1 = "GAACT" 814 seq2 = "GAT" 815 score = aligner.score(seq1, seq2) 816 self.assertAlmostEqual(score, 2.9) 817 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 818 self.assertAlmostEqual(score, 2.9) 819 alignments = aligner.align(seq1, seq2) 820 self.assertEqual(len(alignments), 1) 821 alignment = alignments[0] 822 self.assertAlmostEqual(alignment.score, 2.9) 823 self.assertEqual( 824 str(alignment), 825 """\ 826GAACT 827||--| 828GA--T 829""", 830 ) 831 self.assertEqual(alignment.shape, (2, 5)) 832 self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((0, 2), (2, 3)))) 833 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 834 self.assertEqual(len(alignments), 1) 835 alignment = alignments[0] 836 self.assertAlmostEqual(alignment.score, 2.9) 837 self.assertEqual( 838 str(alignment), 839 """\ 840GAACT 841||--| 842GA--T 843""", 844 ) 845 self.assertEqual(alignment.shape, (2, 5)) 846 self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((3, 1), (1, 0)))) 847 848 def test_match_score_open_penalty4(self): 849 aligner = Align.PairwiseAligner() 850 aligner.mode = "global" 851 aligner.mismatch_score = -2.0 852 aligner.open_gap_score = -0.1 853 aligner.extend_gap_score = 0.0 854 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 855 self.assertEqual( 856 str(aligner), 857 """\ 858Pairwise sequence aligner with parameters 859 wildcard: None 860 match_score: 1.000000 861 mismatch_score: -2.000000 862 target_internal_open_gap_score: -0.100000 863 target_internal_extend_gap_score: 0.000000 864 target_left_open_gap_score: -0.100000 865 target_left_extend_gap_score: 0.000000 866 target_right_open_gap_score: -0.100000 867 target_right_extend_gap_score: 0.000000 868 query_internal_open_gap_score: -0.100000 869 query_internal_extend_gap_score: 0.000000 870 query_left_open_gap_score: -0.100000 871 query_left_extend_gap_score: 0.000000 872 query_right_open_gap_score: -0.100000 873 query_right_extend_gap_score: 0.000000 874 mode: global 875""", 876 ) 877 seq1 = "GCT" 878 seq2 = "GATA" 879 score = aligner.score(seq1, seq2) 880 self.assertAlmostEqual(score, 1.7) 881 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 882 self.assertAlmostEqual(score, 1.7) 883 alignments = aligner.align(seq1, seq2) 884 self.assertEqual(len(alignments), 2) 885 alignment = alignments[0] 886 self.assertAlmostEqual(alignment.score, 1.7) 887 self.assertEqual( 888 str(alignment), 889 """\ 890G-CT- 891|--|- 892GA-TA 893""", 894 ) 895 self.assertEqual(alignment.shape, (2, 5)) 896 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (2, 3)))) 897 alignment = alignments[1] 898 self.assertAlmostEqual(alignment.score, 1.7) 899 self.assertEqual( 900 str(alignment), 901 """\ 902GC-T- 903|--|- 904G-ATA 905""", 906 ) 907 self.assertEqual(alignment.shape, (2, 5)) 908 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (2, 3)))) 909 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 910 self.assertEqual(len(alignments), 2) 911 alignment = alignments[0] 912 self.assertAlmostEqual(alignment.score, 1.7) 913 self.assertEqual( 914 str(alignment), 915 """\ 916G-CT- 917|--|- 918GA-TA 919""", 920 ) 921 self.assertEqual(alignment.shape, (2, 5)) 922 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((4, 3), (2, 1)))) 923 alignment = alignments[1] 924 self.assertAlmostEqual(alignment.score, 1.7) 925 self.assertEqual( 926 str(alignment), 927 """\ 928GC-T- 929|--|- 930G-ATA 931""", 932 ) 933 self.assertEqual(alignment.shape, (2, 5)) 934 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((4, 3), (2, 1)))) 935 936 937class TestPairwiseExtendPenalty(unittest.TestCase): 938 def test_extend_penalty1(self): 939 aligner = Align.PairwiseAligner() 940 aligner.mode = "global" 941 aligner.open_gap_score = -0.2 942 aligner.extend_gap_score = -0.5 943 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 944 self.assertEqual( 945 str(aligner), 946 """\ 947Pairwise sequence aligner with parameters 948 wildcard: None 949 match_score: 1.000000 950 mismatch_score: 0.000000 951 target_internal_open_gap_score: -0.200000 952 target_internal_extend_gap_score: -0.500000 953 target_left_open_gap_score: -0.200000 954 target_left_extend_gap_score: -0.500000 955 target_right_open_gap_score: -0.200000 956 target_right_extend_gap_score: -0.500000 957 query_internal_open_gap_score: -0.200000 958 query_internal_extend_gap_score: -0.500000 959 query_left_open_gap_score: -0.200000 960 query_left_extend_gap_score: -0.500000 961 query_right_open_gap_score: -0.200000 962 query_right_extend_gap_score: -0.500000 963 mode: global 964""", 965 ) 966 seq1 = "GACT" 967 seq2 = "GT" 968 score = aligner.score(seq1, seq2) 969 self.assertAlmostEqual(score, 1.3) 970 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 971 self.assertAlmostEqual(score, 1.3) 972 alignments = aligner.align(seq1, seq2) 973 self.assertEqual(len(alignments), 1) 974 alignment = alignments[0] 975 self.assertAlmostEqual(alignment.score, 1.3) 976 self.assertEqual( 977 str(alignment), 978 """\ 979GACT 980|--| 981G--T 982""", 983 ) 984 self.assertEqual(alignment.shape, (2, 4)) 985 self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((0, 1), (1, 2)))) 986 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 987 self.assertEqual(len(alignments), 1) 988 alignment = alignments[0] 989 self.assertAlmostEqual(alignment.score, 1.3) 990 self.assertEqual( 991 str(alignment), 992 """\ 993GACT 994|--| 995G--T 996""", 997 ) 998 self.assertEqual(alignment.shape, (2, 4)) 999 self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((2, 1), (1, 0)))) 1000 1001 def test_extend_penalty2(self): 1002 aligner = Align.PairwiseAligner() 1003 aligner.mode = "global" 1004 aligner.open_gap_score = -0.2 1005 aligner.extend_gap_score = -1.5 1006 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 1007 self.assertEqual( 1008 str(aligner), 1009 """\ 1010Pairwise sequence aligner with parameters 1011 wildcard: None 1012 match_score: 1.000000 1013 mismatch_score: 0.000000 1014 target_internal_open_gap_score: -0.200000 1015 target_internal_extend_gap_score: -1.500000 1016 target_left_open_gap_score: -0.200000 1017 target_left_extend_gap_score: -1.500000 1018 target_right_open_gap_score: -0.200000 1019 target_right_extend_gap_score: -1.500000 1020 query_internal_open_gap_score: -0.200000 1021 query_internal_extend_gap_score: -1.500000 1022 query_left_open_gap_score: -0.200000 1023 query_left_extend_gap_score: -1.500000 1024 query_right_open_gap_score: -0.200000 1025 query_right_extend_gap_score: -1.500000 1026 mode: global 1027""", 1028 ) 1029 seq1 = "GACT" 1030 seq2 = "GT" 1031 score = aligner.score(seq1, seq2) 1032 self.assertAlmostEqual(score, 0.6) 1033 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1034 self.assertAlmostEqual(score, 0.6) 1035 alignments = aligner.align(seq1, seq2) 1036 self.assertEqual(len(alignments), 2) 1037 alignment = alignments[0] 1038 self.assertAlmostEqual(alignment.score, 0.6) 1039 self.assertEqual( 1040 str(alignment), 1041 """\ 1042GACT 1043-.-| 1044-G-T 1045""", 1046 ) 1047 self.assertEqual(alignment.shape, (2, 4)) 1048 self.assertEqual(alignment.aligned, (((1, 2), (3, 4)), ((0, 1), (1, 2)))) 1049 alignment = alignments[1] 1050 self.assertAlmostEqual(alignment.score, 0.6) 1051 self.assertEqual( 1052 str(alignment), 1053 """\ 1054GACT 1055|-.- 1056G-T- 1057""", 1058 ) 1059 self.assertEqual(alignment.shape, (2, 4)) 1060 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (1, 2)))) 1061 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1062 self.assertEqual(len(alignments), 2) 1063 alignment = alignments[0] 1064 self.assertAlmostEqual(alignment.score, 0.6) 1065 self.assertEqual( 1066 str(alignment), 1067 """\ 1068GACT 1069-.-| 1070-G-T 1071""", 1072 ) 1073 self.assertEqual(alignment.shape, (2, 4)) 1074 self.assertEqual(alignment.aligned, (((1, 2), (3, 4)), ((2, 1), (1, 0)))) 1075 alignment = alignments[1] 1076 self.assertAlmostEqual(alignment.score, 0.6) 1077 self.assertEqual( 1078 str(alignment), 1079 """\ 1080GACT 1081|-.- 1082G-T- 1083""", 1084 ) 1085 self.assertEqual(alignment.shape, (2, 4)) 1086 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((2, 1), (1, 0)))) 1087 1088 1089class TestPairwisePenalizeExtendWhenOpening(unittest.TestCase): 1090 def test_penalize_extend_when_opening(self): 1091 aligner = Align.PairwiseAligner() 1092 aligner.mode = "global" 1093 aligner.open_gap_score = -1.7 1094 aligner.extend_gap_score = -1.5 1095 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 1096 self.assertEqual( 1097 str(aligner), 1098 """\ 1099Pairwise sequence aligner with parameters 1100 wildcard: None 1101 match_score: 1.000000 1102 mismatch_score: 0.000000 1103 target_internal_open_gap_score: -1.700000 1104 target_internal_extend_gap_score: -1.500000 1105 target_left_open_gap_score: -1.700000 1106 target_left_extend_gap_score: -1.500000 1107 target_right_open_gap_score: -1.700000 1108 target_right_extend_gap_score: -1.500000 1109 query_internal_open_gap_score: -1.700000 1110 query_internal_extend_gap_score: -1.500000 1111 query_left_open_gap_score: -1.700000 1112 query_left_extend_gap_score: -1.500000 1113 query_right_open_gap_score: -1.700000 1114 query_right_extend_gap_score: -1.500000 1115 mode: global 1116""", 1117 ) 1118 seq1 = "GACT" 1119 seq2 = "GT" 1120 score = aligner.score(seq1, seq2) 1121 self.assertAlmostEqual(score, -1.2) 1122 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1123 self.assertAlmostEqual(score, -1.2) 1124 alignments = aligner.align(seq1, seq2) 1125 self.assertEqual(len(alignments), 1) 1126 alignment = alignments[0] 1127 self.assertAlmostEqual(alignment.score, -1.2) 1128 self.assertEqual( 1129 str(alignment), 1130 """\ 1131GACT 1132|--| 1133G--T 1134""", 1135 ) 1136 self.assertEqual(alignment.shape, (2, 4)) 1137 self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((0, 1), (1, 2)))) 1138 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1139 self.assertEqual(len(alignments), 1) 1140 alignment = alignments[0] 1141 self.assertAlmostEqual(alignment.score, -1.2) 1142 self.assertEqual( 1143 str(alignment), 1144 """\ 1145GACT 1146|--| 1147G--T 1148""", 1149 ) 1150 self.assertEqual(alignment.shape, (2, 4)) 1151 self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((2, 1), (1, 0)))) 1152 1153 1154class TestPairwisePenalizeEndgaps(unittest.TestCase): 1155 def test_penalize_end_gaps(self): 1156 aligner = Align.PairwiseAligner() 1157 aligner.mode = "global" 1158 aligner.open_gap_score = -0.2 1159 aligner.extend_gap_score = -0.8 1160 end_score = 0.0 1161 aligner.target_end_gap_score = end_score 1162 aligner.query_end_gap_score = end_score 1163 self.assertEqual( 1164 str(aligner), 1165 """\ 1166Pairwise sequence aligner with parameters 1167 wildcard: None 1168 match_score: 1.000000 1169 mismatch_score: 0.000000 1170 target_internal_open_gap_score: -0.200000 1171 target_internal_extend_gap_score: -0.800000 1172 target_left_open_gap_score: 0.000000 1173 target_left_extend_gap_score: 0.000000 1174 target_right_open_gap_score: 0.000000 1175 target_right_extend_gap_score: 0.000000 1176 query_internal_open_gap_score: -0.200000 1177 query_internal_extend_gap_score: -0.800000 1178 query_left_open_gap_score: 0.000000 1179 query_left_extend_gap_score: 0.000000 1180 query_right_open_gap_score: 0.000000 1181 query_right_extend_gap_score: 0.000000 1182 mode: global 1183""", 1184 ) 1185 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 1186 seq1 = "GACT" 1187 seq2 = "GT" 1188 score = aligner.score(seq1, seq2) 1189 self.assertAlmostEqual(score, 1.0) 1190 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1191 self.assertAlmostEqual(score, 1.0) 1192 alignments = aligner.align(seq1, seq2) 1193 self.assertEqual(len(alignments), 3) 1194 alignment = alignments[0] 1195 self.assertAlmostEqual(alignment.score, 1.0) 1196 self.assertEqual( 1197 str(alignment), 1198 """\ 1199GACT 1200--.| 1201--GT 1202""", 1203 ) 1204 self.assertEqual(alignment.shape, (2, 4)) 1205 self.assertEqual(alignment.aligned, (((2, 4),), ((0, 2),))) 1206 alignment = alignments[1] 1207 self.assertAlmostEqual(alignment.score, 1.0) 1208 self.assertEqual( 1209 str(alignment), 1210 """\ 1211GACT 1212|--| 1213G--T 1214""", 1215 ) 1216 self.assertEqual(alignment.shape, (2, 4)) 1217 self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((0, 1), (1, 2)))) 1218 alignment = alignments[2] 1219 self.assertAlmostEqual(alignment.score, 1.0) 1220 self.assertEqual( 1221 str(alignment), 1222 """\ 1223GACT 1224|.-- 1225GT-- 1226""", 1227 ) 1228 self.assertEqual(alignment.shape, (2, 4)) 1229 self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),))) 1230 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1231 self.assertEqual(len(alignments), 3) 1232 alignment = alignments[0] 1233 self.assertAlmostEqual(alignment.score, 1.0) 1234 self.assertEqual( 1235 str(alignment), 1236 """\ 1237GACT 1238--.| 1239--GT 1240""", 1241 ) 1242 self.assertEqual(alignment.shape, (2, 4)) 1243 self.assertEqual(alignment.aligned, (((2, 4),), ((2, 0),))) 1244 alignment = alignments[1] 1245 self.assertAlmostEqual(alignment.score, 1.0) 1246 self.assertEqual( 1247 str(alignment), 1248 """\ 1249GACT 1250|--| 1251G--T 1252""", 1253 ) 1254 self.assertEqual(alignment.shape, (2, 4)) 1255 self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((2, 1), (1, 0)))) 1256 alignment = alignments[2] 1257 self.assertAlmostEqual(alignment.score, 1.0) 1258 self.assertEqual( 1259 str(alignment), 1260 """\ 1261GACT 1262|.-- 1263GT-- 1264""", 1265 ) 1266 self.assertEqual(alignment.shape, (2, 4)) 1267 self.assertEqual(alignment.aligned, (((0, 2),), ((2, 0),))) 1268 1269 1270class TestPairwiseSeparateGapPenalties(unittest.TestCase): 1271 def test_separate_gap_penalties1(self): 1272 seq1 = "GAT" 1273 seq2 = "GTCT" 1274 aligner = Align.PairwiseAligner() 1275 aligner.mode = "local" 1276 open_score, extend_score = (-0.3, 0) 1277 aligner.target_open_gap_score = open_score 1278 aligner.target_extend_gap_score = extend_score 1279 aligner.target_end_open_gap_score = open_score 1280 aligner.target_end_extend_gap_score = extend_score 1281 open_score, extend_score = (-0.8, 0) 1282 aligner.query_open_gap_score = open_score 1283 aligner.query_extend_gap_score = extend_score 1284 aligner.query_end_open_gap_score = open_score 1285 aligner.query_end_extend_gap_score = extend_score 1286 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 1287 self.assertEqual( 1288 str(aligner), 1289 """\ 1290Pairwise sequence aligner with parameters 1291 wildcard: None 1292 match_score: 1.000000 1293 mismatch_score: 0.000000 1294 target_internal_open_gap_score: -0.300000 1295 target_internal_extend_gap_score: 0.000000 1296 target_left_open_gap_score: -0.300000 1297 target_left_extend_gap_score: 0.000000 1298 target_right_open_gap_score: -0.300000 1299 target_right_extend_gap_score: 0.000000 1300 query_internal_open_gap_score: -0.800000 1301 query_internal_extend_gap_score: 0.000000 1302 query_left_open_gap_score: -0.800000 1303 query_left_extend_gap_score: 0.000000 1304 query_right_open_gap_score: -0.800000 1305 query_right_extend_gap_score: 0.000000 1306 mode: local 1307""", 1308 ) 1309 score = aligner.score(seq1, seq2) 1310 self.assertAlmostEqual(score, 1.7) 1311 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1312 self.assertAlmostEqual(score, 1.7) 1313 alignments = aligner.align(seq1, seq2) 1314 self.assertEqual(len(alignments), 2) 1315 alignment = alignments[0] 1316 self.assertAlmostEqual(alignment.score, 1.7) 1317 self.assertEqual( 1318 str(alignment), 1319 """\ 1320G-AT 1321|-.| 1322GTCT 1323""", 1324 ) 1325 self.assertEqual(alignment.shape, (2, 4)) 1326 self.assertEqual(alignment.aligned, (((0, 1), (1, 3)), ((0, 1), (2, 4)))) 1327 alignment = alignments[1] 1328 self.assertAlmostEqual(alignment.score, 1.7) 1329 self.assertEqual( 1330 str(alignment), 1331 """\ 1332GA-T 1333|.-| 1334GTCT 1335""", 1336 ) 1337 self.assertEqual(alignment.shape, (2, 4)) 1338 self.assertEqual(alignment.aligned, (((0, 2), (2, 3)), ((0, 2), (3, 4)))) 1339 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1340 self.assertEqual(len(alignments), 2) 1341 alignment = alignments[0] 1342 self.assertAlmostEqual(alignment.score, 1.7) 1343 self.assertEqual( 1344 str(alignment), 1345 """\ 1346G-AT 1347|-.| 1348GTCT 1349""", 1350 ) 1351 self.assertEqual(alignment.shape, (2, 4)) 1352 self.assertEqual(alignment.aligned, (((0, 1), (1, 3)), ((4, 3), (2, 0)))) 1353 alignment = alignments[1] 1354 self.assertAlmostEqual(alignment.score, 1.7) 1355 self.assertEqual( 1356 str(alignment), 1357 """\ 1358GA-T 1359|.-| 1360GTCT 1361""", 1362 ) 1363 self.assertEqual(alignment.shape, (2, 4)) 1364 self.assertEqual(alignment.aligned, (((0, 2), (2, 3)), ((4, 2), (1, 0)))) 1365 1366 def test_separate_gap_penalties2(self): 1367 aligner = Align.PairwiseAligner() 1368 aligner.mode = "local" 1369 aligner.target_open_gap_score = -0.3 1370 aligner.target_extend_gap_score = 0.0 1371 aligner.query_open_gap_score = -0.2 1372 aligner.query_extend_gap_score = 0.0 1373 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 1374 self.assertEqual( 1375 str(aligner), 1376 """\ 1377Pairwise sequence aligner with parameters 1378 wildcard: None 1379 match_score: 1.000000 1380 mismatch_score: 0.000000 1381 target_internal_open_gap_score: -0.300000 1382 target_internal_extend_gap_score: 0.000000 1383 target_left_open_gap_score: -0.300000 1384 target_left_extend_gap_score: 0.000000 1385 target_right_open_gap_score: -0.300000 1386 target_right_extend_gap_score: 0.000000 1387 query_internal_open_gap_score: -0.200000 1388 query_internal_extend_gap_score: 0.000000 1389 query_left_open_gap_score: -0.200000 1390 query_left_extend_gap_score: 0.000000 1391 query_right_open_gap_score: -0.200000 1392 query_right_extend_gap_score: 0.000000 1393 mode: local 1394""", 1395 ) 1396 seq1 = "GAT" 1397 seq2 = "GTCT" 1398 score = aligner.score(seq1, seq2) 1399 self.assertAlmostEqual(score, 1.8) 1400 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1401 self.assertAlmostEqual(score, 1.8) 1402 alignments = aligner.align(seq1, seq2) 1403 self.assertEqual(len(alignments), 1) 1404 alignment = alignments[0] 1405 self.assertAlmostEqual(alignment.score, 1.8) 1406 self.assertEqual( 1407 str(alignment), 1408 """\ 1409GAT 1410|-| 1411G-TCT 1412""", 1413 ) 1414 self.assertEqual(alignment.shape, (2, 3)) 1415 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (1, 2)))) 1416 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1417 self.assertEqual(len(alignments), 1) 1418 alignment = alignments[0] 1419 self.assertAlmostEqual(alignment.score, 1.8) 1420 self.assertEqual( 1421 str(alignment), 1422 """\ 1423GAT 1424|-| 1425G-TCT 1426""", 1427 ) 1428 self.assertEqual(alignment.shape, (2, 3)) 1429 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((4, 3), (3, 2)))) 1430 1431 1432class TestPairwiseSeparateGapPenaltiesWithExtension(unittest.TestCase): 1433 def test_separate_gap_penalties_with_extension(self): 1434 seq1 = "GAAT" 1435 seq2 = "GTCCT" 1436 aligner = Align.PairwiseAligner() 1437 aligner.mode = "local" 1438 open_score, extend_score = (-0.1, 0) 1439 aligner.target_open_gap_score = open_score 1440 aligner.target_extend_gap_score = extend_score 1441 aligner.target_end_open_gap_score = open_score 1442 aligner.target_end_extend_gap_score = extend_score 1443 score = -0.1 1444 aligner.query_gap_score = score 1445 aligner.query_end_gap_score = score 1446 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 1447 self.assertEqual( 1448 str(aligner), 1449 """\ 1450Pairwise sequence aligner with parameters 1451 wildcard: None 1452 match_score: 1.000000 1453 mismatch_score: 0.000000 1454 target_internal_open_gap_score: -0.100000 1455 target_internal_extend_gap_score: 0.000000 1456 target_left_open_gap_score: -0.100000 1457 target_left_extend_gap_score: 0.000000 1458 target_right_open_gap_score: -0.100000 1459 target_right_extend_gap_score: 0.000000 1460 query_internal_open_gap_score: -0.100000 1461 query_internal_extend_gap_score: -0.100000 1462 query_left_open_gap_score: -0.100000 1463 query_left_extend_gap_score: -0.100000 1464 query_right_open_gap_score: -0.100000 1465 query_right_extend_gap_score: -0.100000 1466 mode: local 1467""", 1468 ) 1469 score = aligner.score(seq1, seq2) 1470 self.assertAlmostEqual(score, 1.9) 1471 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1472 self.assertAlmostEqual(score, 1.9) 1473 alignments = aligner.align(seq1, seq2) 1474 self.assertEqual(len(alignments), 3) 1475 alignment = alignments[0] 1476 self.assertAlmostEqual(alignment.score, 1.9) 1477 self.assertEqual( 1478 str(alignment), 1479 """\ 1480G-AAT 1481|-..| 1482GTCCT 1483""", 1484 ) 1485 self.assertEqual(alignment.shape, (2, 5)) 1486 self.assertEqual(alignment.aligned, (((0, 1), (1, 4)), ((0, 1), (2, 5)))) 1487 alignment = alignments[1] 1488 self.assertAlmostEqual(alignment.score, 1.9) 1489 self.assertEqual( 1490 str(alignment), 1491 """\ 1492GA-AT 1493|.-.| 1494GTCCT 1495""", 1496 ) 1497 self.assertEqual(alignment.shape, (2, 5)) 1498 self.assertEqual(alignment.aligned, (((0, 2), (2, 4)), ((0, 2), (3, 5)))) 1499 alignment = alignments[2] 1500 self.assertAlmostEqual(alignment.score, 1.9) 1501 self.assertEqual( 1502 str(alignment), 1503 """\ 1504GAA-T 1505|..-| 1506GTCCT 1507""", 1508 ) 1509 self.assertEqual(alignment.shape, (2, 5)) 1510 self.assertEqual(alignment.aligned, (((0, 3), (3, 4)), ((0, 3), (4, 5)))) 1511 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1512 self.assertEqual(len(alignments), 3) 1513 alignment = alignments[0] 1514 self.assertAlmostEqual(alignment.score, 1.9) 1515 self.assertEqual( 1516 str(alignment), 1517 """\ 1518G-AAT 1519|-..| 1520GTCCT 1521""", 1522 ) 1523 self.assertEqual(alignment.shape, (2, 5)) 1524 self.assertEqual(alignment.aligned, (((0, 1), (1, 4)), ((5, 4), (3, 0)))) 1525 alignment = alignments[1] 1526 self.assertAlmostEqual(alignment.score, 1.9) 1527 self.assertEqual( 1528 str(alignment), 1529 """\ 1530GA-AT 1531|.-.| 1532GTCCT 1533""", 1534 ) 1535 self.assertEqual(alignment.shape, (2, 5)) 1536 self.assertEqual(alignment.aligned, (((0, 2), (2, 4)), ((5, 3), (2, 0)))) 1537 alignment = alignments[2] 1538 self.assertAlmostEqual(alignment.score, 1.9) 1539 self.assertEqual( 1540 str(alignment), 1541 """\ 1542GAA-T 1543|..-| 1544GTCCT 1545""", 1546 ) 1547 self.assertEqual(alignment.shape, (2, 5)) 1548 self.assertEqual(alignment.aligned, (((0, 3), (3, 4)), ((5, 2), (1, 0)))) 1549 1550 1551class TestPairwiseMatchDictionary(unittest.TestCase): 1552 1553 match_dict = {("A", "A"): 1.5, ("A", "T"): 0.5, ("T", "A"): 0.5, ("T", "T"): 1.0} 1554 1555 def test_match_dictionary1(self): 1556 try: 1557 from Bio.Align import substitution_matrices 1558 except ImportError: 1559 return 1560 substitution_matrix = substitution_matrices.Array(data=self.match_dict) 1561 seq1 = "ATAT" 1562 seq2 = "ATT" 1563 aligner = Align.PairwiseAligner() 1564 aligner.mode = "local" 1565 aligner.substitution_matrix = substitution_matrix 1566 aligner.open_gap_score = -0.5 1567 aligner.extend_gap_score = 0.0 1568 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 1569 lines = str(aligner).splitlines() 1570 self.assertEqual(len(lines), 15) 1571 self.assertEqual(lines[0], "Pairwise sequence aligner with parameters") 1572 line = lines[1] 1573 prefix = " substitution_matrix: <Array object at " 1574 suffix = ">" 1575 self.assertTrue(line.startswith(prefix)) 1576 self.assertTrue(line.endswith(suffix)) 1577 address = int(line[len(prefix) : -len(suffix)], 16) 1578 self.assertEqual(lines[2], " target_internal_open_gap_score: -0.500000") 1579 self.assertEqual(lines[3], " target_internal_extend_gap_score: 0.000000") 1580 self.assertEqual(lines[4], " target_left_open_gap_score: -0.500000") 1581 self.assertEqual(lines[5], " target_left_extend_gap_score: 0.000000") 1582 self.assertEqual(lines[6], " target_right_open_gap_score: -0.500000") 1583 self.assertEqual(lines[7], " target_right_extend_gap_score: 0.000000") 1584 self.assertEqual(lines[8], " query_internal_open_gap_score: -0.500000") 1585 self.assertEqual(lines[9], " query_internal_extend_gap_score: 0.000000") 1586 self.assertEqual(lines[10], " query_left_open_gap_score: -0.500000") 1587 self.assertEqual(lines[11], " query_left_extend_gap_score: 0.000000") 1588 self.assertEqual(lines[12], " query_right_open_gap_score: -0.500000") 1589 self.assertEqual(lines[13], " query_right_extend_gap_score: 0.000000") 1590 self.assertEqual(lines[14], " mode: local") 1591 score = aligner.score(seq1, seq2) 1592 self.assertAlmostEqual(score, 3.0) 1593 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1594 self.assertAlmostEqual(score, 3.0) 1595 alignments = aligner.align(seq1, seq2) 1596 self.assertEqual(len(alignments), 2) 1597 alignment = alignments[0] 1598 self.assertAlmostEqual(alignment.score, 3.0) 1599 self.assertEqual( 1600 str(alignment), 1601 """\ 1602ATAT 1603||. 1604ATT 1605""", 1606 ) 1607 self.assertEqual(alignment.shape, (2, 3)) 1608 self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),))) 1609 alignment = alignments[1] 1610 self.assertAlmostEqual(alignment.score, 3.0) 1611 self.assertEqual( 1612 str(alignment), 1613 """\ 1614ATAT 1615||-| 1616AT-T 1617""", 1618 ) 1619 self.assertEqual(alignment.shape, (2, 4)) 1620 self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((0, 2), (2, 3)))) 1621 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1622 self.assertEqual(len(alignments), 2) 1623 alignment = alignments[0] 1624 self.assertAlmostEqual(alignment.score, 3.0) 1625 self.assertEqual( 1626 str(alignment), 1627 """\ 1628ATAT 1629||. 1630ATT 1631""", 1632 ) 1633 self.assertEqual(alignment.shape, (2, 3)) 1634 self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),))) 1635 alignment = alignments[1] 1636 self.assertAlmostEqual(alignment.score, 3.0) 1637 self.assertEqual( 1638 str(alignment), 1639 """\ 1640ATAT 1641||-| 1642AT-T 1643""", 1644 ) 1645 self.assertEqual(alignment.shape, (2, 4)) 1646 self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((3, 1), (1, 0)))) 1647 1648 def test_match_dictionary2(self): 1649 try: 1650 from Bio.Align import substitution_matrices 1651 except ImportError: 1652 return 1653 substitution_matrix = substitution_matrices.Array(data=self.match_dict) 1654 seq1 = "ATAT" 1655 seq2 = "ATT" 1656 aligner = Align.PairwiseAligner() 1657 aligner.mode = "local" 1658 aligner.substitution_matrix = substitution_matrix 1659 aligner.open_gap_score = -1.0 1660 aligner.extend_gap_score = 0.0 1661 lines = str(aligner).splitlines() 1662 self.assertEqual(len(lines), 15) 1663 self.assertEqual(lines[0], "Pairwise sequence aligner with parameters") 1664 line = lines[1] 1665 prefix = " substitution_matrix: <Array object at " 1666 suffix = ">" 1667 self.assertTrue(line.startswith(prefix)) 1668 self.assertTrue(line.endswith(suffix)) 1669 address = int(line[len(prefix) : -len(suffix)], 16) 1670 self.assertEqual(lines[2], " target_internal_open_gap_score: -1.000000") 1671 self.assertEqual(lines[3], " target_internal_extend_gap_score: 0.000000") 1672 self.assertEqual(lines[4], " target_left_open_gap_score: -1.000000") 1673 self.assertEqual(lines[5], " target_left_extend_gap_score: 0.000000") 1674 self.assertEqual(lines[6], " target_right_open_gap_score: -1.000000") 1675 self.assertEqual(lines[7], " target_right_extend_gap_score: 0.000000") 1676 self.assertEqual(lines[8], " query_internal_open_gap_score: -1.000000") 1677 self.assertEqual(lines[9], " query_internal_extend_gap_score: 0.000000") 1678 self.assertEqual(lines[10], " query_left_open_gap_score: -1.000000") 1679 self.assertEqual(lines[11], " query_left_extend_gap_score: 0.000000") 1680 self.assertEqual(lines[12], " query_right_open_gap_score: -1.000000") 1681 self.assertEqual(lines[13], " query_right_extend_gap_score: 0.000000") 1682 self.assertEqual(lines[14], " mode: local") 1683 score = aligner.score(seq1, seq2) 1684 self.assertAlmostEqual(score, 3.0) 1685 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1686 self.assertAlmostEqual(score, 3.0) 1687 alignments = aligner.align(seq1, seq2) 1688 self.assertEqual(len(alignments), 1) 1689 alignment = alignments[0] 1690 self.assertAlmostEqual(alignment.score, 3.0) 1691 self.assertEqual( 1692 str(alignment), 1693 """\ 1694ATAT 1695||. 1696ATT 1697""", 1698 ) 1699 self.assertEqual(alignment.shape, (2, 3)) 1700 self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),))) 1701 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1702 self.assertEqual(len(alignments), 1) 1703 alignment = alignments[0] 1704 self.assertAlmostEqual(alignment.score, 3.0) 1705 self.assertEqual( 1706 str(alignment), 1707 """\ 1708ATAT 1709||. 1710ATT 1711""", 1712 ) 1713 self.assertEqual(alignment.shape, (2, 3)) 1714 self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),))) 1715 1716 def test_match_dictionary3(self): 1717 try: 1718 from Bio.Align import substitution_matrices 1719 except ImportError: 1720 return 1721 substitution_matrix = substitution_matrices.Array(data=self.match_dict) 1722 seq1 = "ATT" 1723 seq2 = "ATAT" 1724 aligner = Align.PairwiseAligner() 1725 aligner.mode = "local" 1726 aligner.substitution_matrix = substitution_matrix 1727 aligner.open_gap_score = -1.0 1728 aligner.extend_gap_score = 0.0 1729 lines = str(aligner).splitlines() 1730 self.assertEqual(len(lines), 15) 1731 self.assertEqual(lines[0], "Pairwise sequence aligner with parameters") 1732 line = lines[1] 1733 prefix = " substitution_matrix: <Array object at " 1734 suffix = ">" 1735 self.assertTrue(line.startswith(prefix)) 1736 self.assertTrue(line.endswith(suffix)) 1737 address = int(line[len(prefix) : -len(suffix)], 16) 1738 self.assertEqual(lines[2], " target_internal_open_gap_score: -1.000000") 1739 self.assertEqual(lines[3], " target_internal_extend_gap_score: 0.000000") 1740 self.assertEqual(lines[4], " target_left_open_gap_score: -1.000000") 1741 self.assertEqual(lines[5], " target_left_extend_gap_score: 0.000000") 1742 self.assertEqual(lines[6], " target_right_open_gap_score: -1.000000") 1743 self.assertEqual(lines[7], " target_right_extend_gap_score: 0.000000") 1744 self.assertEqual(lines[8], " query_internal_open_gap_score: -1.000000") 1745 self.assertEqual(lines[9], " query_internal_extend_gap_score: 0.000000") 1746 self.assertEqual(lines[10], " query_left_open_gap_score: -1.000000") 1747 self.assertEqual(lines[11], " query_left_extend_gap_score: 0.000000") 1748 self.assertEqual(lines[12], " query_right_open_gap_score: -1.000000") 1749 self.assertEqual(lines[13], " query_right_extend_gap_score: 0.000000") 1750 self.assertEqual(lines[14], " mode: local") 1751 score = aligner.score(seq1, seq2) 1752 self.assertAlmostEqual(score, 3.0) 1753 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1754 self.assertAlmostEqual(score, 3.0) 1755 alignments = aligner.align(seq1, seq2) 1756 self.assertEqual(len(alignments), 1) 1757 alignment = alignments[0] 1758 self.assertAlmostEqual(alignment.score, 3.0) 1759 self.assertEqual( 1760 str(alignment), 1761 """\ 1762ATT 1763||. 1764ATAT 1765""", 1766 ) 1767 self.assertEqual(alignment.shape, (2, 3)) 1768 self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),))) 1769 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1770 self.assertEqual(len(alignments), 1) 1771 alignment = alignments[0] 1772 self.assertAlmostEqual(alignment.score, 3.0) 1773 self.assertEqual( 1774 str(alignment), 1775 """\ 1776ATT 1777||. 1778ATAT 1779""", 1780 ) 1781 self.assertEqual(alignment.shape, (2, 3)) 1782 self.assertEqual(alignment.aligned, (((0, 3),), ((4, 1),))) 1783 1784 def test_match_dictionary4(self): 1785 try: 1786 from Bio.Align import substitution_matrices 1787 except ImportError: 1788 return 1789 substitution_matrix = substitution_matrices.Array(alphabet="AT", dims=2) 1790 self.assertEqual(substitution_matrix.shape, (2, 2)) 1791 substitution_matrix.update(self.match_dict) 1792 seq1 = "ATAT" 1793 seq2 = "ATT" 1794 aligner = Align.PairwiseAligner() 1795 aligner.mode = "local" 1796 aligner.substitution_matrix = substitution_matrix 1797 aligner.open_gap_score = -0.5 1798 aligner.extend_gap_score = 0.0 1799 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 1800 lines = str(aligner).splitlines() 1801 self.assertEqual(len(lines), 15) 1802 self.assertEqual(lines[0], "Pairwise sequence aligner with parameters") 1803 line = lines[1] 1804 prefix = " substitution_matrix: <Array object at " 1805 suffix = ">" 1806 self.assertTrue(line.startswith(prefix)) 1807 self.assertTrue(line.endswith(suffix)) 1808 address = int(line[len(prefix) : -len(suffix)], 16) 1809 self.assertEqual(lines[2], " target_internal_open_gap_score: -0.500000") 1810 self.assertEqual(lines[3], " target_internal_extend_gap_score: 0.000000") 1811 self.assertEqual(lines[4], " target_left_open_gap_score: -0.500000") 1812 self.assertEqual(lines[5], " target_left_extend_gap_score: 0.000000") 1813 self.assertEqual(lines[6], " target_right_open_gap_score: -0.500000") 1814 self.assertEqual(lines[7], " target_right_extend_gap_score: 0.000000") 1815 self.assertEqual(lines[8], " query_internal_open_gap_score: -0.500000") 1816 self.assertEqual(lines[9], " query_internal_extend_gap_score: 0.000000") 1817 self.assertEqual(lines[10], " query_left_open_gap_score: -0.500000") 1818 self.assertEqual(lines[11], " query_left_extend_gap_score: 0.000000") 1819 self.assertEqual(lines[12], " query_right_open_gap_score: -0.500000") 1820 self.assertEqual(lines[13], " query_right_extend_gap_score: 0.000000") 1821 self.assertEqual(lines[14], " mode: local") 1822 score = aligner.score(seq1, seq2) 1823 self.assertAlmostEqual(score, 3.0) 1824 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1825 self.assertAlmostEqual(score, 3.0) 1826 alignments = aligner.align(seq1, seq2) 1827 self.assertEqual(len(alignments), 2) 1828 alignment = alignments[0] 1829 self.assertAlmostEqual(alignment.score, 3.0) 1830 self.assertEqual( 1831 str(alignment), 1832 """\ 1833ATAT 1834||. 1835ATT 1836""", 1837 ) 1838 self.assertEqual(alignment.shape, (2, 3)) 1839 self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),))) 1840 alignment = alignments[1] 1841 self.assertAlmostEqual(alignment.score, 3.0) 1842 self.assertEqual( 1843 str(alignment), 1844 """\ 1845ATAT 1846||-| 1847AT-T 1848""", 1849 ) 1850 self.assertEqual(alignment.shape, (2, 4)) 1851 self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((0, 2), (2, 3)))) 1852 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1853 self.assertEqual(len(alignments), 2) 1854 alignment = alignments[0] 1855 self.assertAlmostEqual(alignment.score, 3.0) 1856 self.assertEqual( 1857 str(alignment), 1858 """\ 1859ATAT 1860||. 1861ATT 1862""", 1863 ) 1864 self.assertEqual(alignment.shape, (2, 3)) 1865 self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),))) 1866 alignment = alignments[1] 1867 self.assertAlmostEqual(alignment.score, 3.0) 1868 self.assertEqual( 1869 str(alignment), 1870 """\ 1871ATAT 1872||-| 1873AT-T 1874""", 1875 ) 1876 self.assertEqual(alignment.shape, (2, 4)) 1877 self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((3, 1), (1, 0)))) 1878 1879 def test_match_dictionary5(self): 1880 try: 1881 from Bio.Align import substitution_matrices 1882 except ImportError: 1883 return 1884 substitution_matrix = substitution_matrices.Array(alphabet="AT", dims=2) 1885 self.assertEqual(substitution_matrix.shape, (2, 2)) 1886 substitution_matrix.update(self.match_dict) 1887 seq1 = "ATAT" 1888 seq2 = "ATT" 1889 aligner = Align.PairwiseAligner() 1890 aligner.mode = "local" 1891 aligner.substitution_matrix = substitution_matrix 1892 aligner.open_gap_score = -1.0 1893 aligner.extend_gap_score = 0.0 1894 lines = str(aligner).splitlines() 1895 self.assertEqual(len(lines), 15) 1896 self.assertEqual(lines[0], "Pairwise sequence aligner with parameters") 1897 line = lines[1] 1898 prefix = " substitution_matrix: <Array object at " 1899 suffix = ">" 1900 self.assertTrue(line.startswith(prefix)) 1901 self.assertTrue(line.endswith(suffix)) 1902 address = int(line[len(prefix) : -len(suffix)], 16) 1903 self.assertEqual(lines[2], " target_internal_open_gap_score: -1.000000") 1904 self.assertEqual(lines[3], " target_internal_extend_gap_score: 0.000000") 1905 self.assertEqual(lines[4], " target_left_open_gap_score: -1.000000") 1906 self.assertEqual(lines[5], " target_left_extend_gap_score: 0.000000") 1907 self.assertEqual(lines[6], " target_right_open_gap_score: -1.000000") 1908 self.assertEqual(lines[7], " target_right_extend_gap_score: 0.000000") 1909 self.assertEqual(lines[8], " query_internal_open_gap_score: -1.000000") 1910 self.assertEqual(lines[9], " query_internal_extend_gap_score: 0.000000") 1911 self.assertEqual(lines[10], " query_left_open_gap_score: -1.000000") 1912 self.assertEqual(lines[11], " query_left_extend_gap_score: 0.000000") 1913 self.assertEqual(lines[12], " query_right_open_gap_score: -1.000000") 1914 self.assertEqual(lines[13], " query_right_extend_gap_score: 0.000000") 1915 self.assertEqual(lines[14], " mode: local") 1916 score = aligner.score(seq1, seq2) 1917 self.assertAlmostEqual(score, 3.0) 1918 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1919 self.assertAlmostEqual(score, 3.0) 1920 alignments = aligner.align(seq1, seq2) 1921 self.assertEqual(len(alignments), 1) 1922 alignment = alignments[0] 1923 self.assertAlmostEqual(alignment.score, 3.0) 1924 self.assertEqual( 1925 str(alignment), 1926 """\ 1927ATAT 1928||. 1929ATT 1930""", 1931 ) 1932 self.assertEqual(alignment.shape, (2, 3)) 1933 self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),))) 1934 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 1935 self.assertEqual(len(alignments), 1) 1936 alignment = alignments[0] 1937 self.assertAlmostEqual(alignment.score, 3.0) 1938 self.assertEqual( 1939 str(alignment), 1940 """\ 1941ATAT 1942||. 1943ATT 1944""", 1945 ) 1946 self.assertEqual(alignment.shape, (2, 3)) 1947 self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),))) 1948 1949 def test_match_dictionary6(self): 1950 try: 1951 from Bio.Align import substitution_matrices 1952 except ImportError: 1953 return 1954 substitution_matrix = substitution_matrices.Array(alphabet="AT", dims=2) 1955 self.assertEqual(substitution_matrix.shape, (2, 2)) 1956 substitution_matrix.update(self.match_dict) 1957 seq1 = "ATT" 1958 seq2 = "ATAT" 1959 aligner = Align.PairwiseAligner() 1960 aligner.mode = "local" 1961 aligner.substitution_matrix = substitution_matrix 1962 aligner.open_gap_score = -1.0 1963 aligner.extend_gap_score = 0.0 1964 lines = str(aligner).splitlines() 1965 self.assertEqual(len(lines), 15) 1966 self.assertEqual(lines[0], "Pairwise sequence aligner with parameters") 1967 line = lines[1] 1968 prefix = " substitution_matrix: <Array object at " 1969 suffix = ">" 1970 self.assertTrue(line.startswith(prefix)) 1971 self.assertTrue(line.endswith(suffix)) 1972 address = int(line[len(prefix) : -len(suffix)], 16) 1973 self.assertEqual(lines[2], " target_internal_open_gap_score: -1.000000") 1974 self.assertEqual(lines[3], " target_internal_extend_gap_score: 0.000000") 1975 self.assertEqual(lines[4], " target_left_open_gap_score: -1.000000") 1976 self.assertEqual(lines[5], " target_left_extend_gap_score: 0.000000") 1977 self.assertEqual(lines[6], " target_right_open_gap_score: -1.000000") 1978 self.assertEqual(lines[7], " target_right_extend_gap_score: 0.000000") 1979 self.assertEqual(lines[8], " query_internal_open_gap_score: -1.000000") 1980 self.assertEqual(lines[9], " query_internal_extend_gap_score: 0.000000") 1981 self.assertEqual(lines[10], " query_left_open_gap_score: -1.000000") 1982 self.assertEqual(lines[11], " query_left_extend_gap_score: 0.000000") 1983 self.assertEqual(lines[12], " query_right_open_gap_score: -1.000000") 1984 self.assertEqual(lines[13], " query_right_extend_gap_score: 0.000000") 1985 self.assertEqual(lines[14], " mode: local") 1986 score = aligner.score(seq1, seq2) 1987 self.assertAlmostEqual(score, 3.0) 1988 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 1989 self.assertAlmostEqual(score, 3.0) 1990 alignments = aligner.align(seq1, seq2) 1991 self.assertEqual(len(alignments), 1) 1992 alignment = alignments[0] 1993 self.assertAlmostEqual(alignment.score, 3.0) 1994 self.assertEqual( 1995 str(alignment), 1996 """\ 1997ATT 1998||. 1999ATAT 2000""", 2001 ) 2002 self.assertEqual(alignment.shape, (2, 3)) 2003 self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),))) 2004 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2005 self.assertEqual(len(alignments), 1) 2006 alignment = alignments[0] 2007 self.assertAlmostEqual(alignment.score, 3.0) 2008 self.assertEqual( 2009 str(alignment), 2010 """\ 2011ATT 2012||. 2013ATAT 2014""", 2015 ) 2016 self.assertEqual(alignment.shape, (2, 3)) 2017 self.assertEqual(alignment.aligned, (((0, 3),), ((4, 1),))) 2018 2019 2020class TestPairwiseOneCharacter(unittest.TestCase): 2021 def test_align_one_char1(self): 2022 aligner = Align.PairwiseAligner() 2023 aligner.mode = "local" 2024 aligner.open_gap_score = -0.3 2025 aligner.extend_gap_score = -0.1 2026 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 2027 self.assertEqual( 2028 str(aligner), 2029 """\ 2030Pairwise sequence aligner with parameters 2031 wildcard: None 2032 match_score: 1.000000 2033 mismatch_score: 0.000000 2034 target_internal_open_gap_score: -0.300000 2035 target_internal_extend_gap_score: -0.100000 2036 target_left_open_gap_score: -0.300000 2037 target_left_extend_gap_score: -0.100000 2038 target_right_open_gap_score: -0.300000 2039 target_right_extend_gap_score: -0.100000 2040 query_internal_open_gap_score: -0.300000 2041 query_internal_extend_gap_score: -0.100000 2042 query_left_open_gap_score: -0.300000 2043 query_left_extend_gap_score: -0.100000 2044 query_right_open_gap_score: -0.300000 2045 query_right_extend_gap_score: -0.100000 2046 mode: local 2047""", 2048 ) 2049 score = aligner.score("abcde", "c") 2050 self.assertAlmostEqual(score, 1) 2051 alignments = aligner.align("abcde", "c") 2052 self.assertEqual(len(alignments), 1) 2053 alignment = alignments[0] 2054 self.assertAlmostEqual(alignment.score, 1) 2055 self.assertEqual( 2056 str(alignment), 2057 """\ 2058abcde 2059 | 2060 c 2061""", 2062 ) 2063 self.assertEqual(alignment.shape, (2, 1)) 2064 self.assertEqual(alignment.aligned, (((2, 3),), ((0, 1),))) 2065 2066 def test_align_one_char2(self): 2067 aligner = Align.PairwiseAligner() 2068 aligner.mode = "local" 2069 aligner.open_gap_score = -0.3 2070 aligner.extend_gap_score = -0.1 2071 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 2072 self.assertEqual( 2073 str(aligner), 2074 """\ 2075Pairwise sequence aligner with parameters 2076 wildcard: None 2077 match_score: 1.000000 2078 mismatch_score: 0.000000 2079 target_internal_open_gap_score: -0.300000 2080 target_internal_extend_gap_score: -0.100000 2081 target_left_open_gap_score: -0.300000 2082 target_left_extend_gap_score: -0.100000 2083 target_right_open_gap_score: -0.300000 2084 target_right_extend_gap_score: -0.100000 2085 query_internal_open_gap_score: -0.300000 2086 query_internal_extend_gap_score: -0.100000 2087 query_left_open_gap_score: -0.300000 2088 query_left_extend_gap_score: -0.100000 2089 query_right_open_gap_score: -0.300000 2090 query_right_extend_gap_score: -0.100000 2091 mode: local 2092""", 2093 ) 2094 score = aligner.score("abcce", "c") 2095 self.assertAlmostEqual(score, 1) 2096 alignments = aligner.align("abcce", "c") 2097 self.assertEqual(len(alignments), 2) 2098 alignment = alignments[0] 2099 self.assertAlmostEqual(alignment.score, 1) 2100 self.assertEqual( 2101 str(alignment), 2102 """\ 2103abcce 2104 | 2105 c 2106""", 2107 ) 2108 self.assertEqual(alignment.shape, (2, 1)) 2109 self.assertEqual(alignment.aligned, (((2, 3),), ((0, 1),))) 2110 alignment = alignments[1] 2111 self.assertAlmostEqual(alignment.score, 1) 2112 self.assertEqual( 2113 str(alignment), 2114 """\ 2115abcce 2116 | 2117 c 2118""", 2119 ) 2120 self.assertEqual(alignment.shape, (2, 1)) 2121 self.assertEqual(alignment.aligned, (((3, 4),), ((0, 1),))) 2122 2123 def test_align_one_char3(self): 2124 aligner = Align.PairwiseAligner() 2125 aligner.mode = "global" 2126 aligner.open_gap_score = -0.3 2127 aligner.extend_gap_score = -0.1 2128 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 2129 self.assertEqual( 2130 str(aligner), 2131 """\ 2132Pairwise sequence aligner with parameters 2133 wildcard: None 2134 match_score: 1.000000 2135 mismatch_score: 0.000000 2136 target_internal_open_gap_score: -0.300000 2137 target_internal_extend_gap_score: -0.100000 2138 target_left_open_gap_score: -0.300000 2139 target_left_extend_gap_score: -0.100000 2140 target_right_open_gap_score: -0.300000 2141 target_right_extend_gap_score: -0.100000 2142 query_internal_open_gap_score: -0.300000 2143 query_internal_extend_gap_score: -0.100000 2144 query_left_open_gap_score: -0.300000 2145 query_left_extend_gap_score: -0.100000 2146 query_right_open_gap_score: -0.300000 2147 query_right_extend_gap_score: -0.100000 2148 mode: global 2149""", 2150 ) 2151 seq1 = "abcde" 2152 seq2 = "c" 2153 score = aligner.score(seq1, seq2) 2154 self.assertAlmostEqual(score, 0.2) 2155 alignments = aligner.align(seq1, seq2) 2156 self.assertEqual(len(alignments), 1) 2157 alignment = alignments[0] 2158 self.assertAlmostEqual(alignment.score, 0.2) 2159 self.assertEqual( 2160 str(alignment), 2161 """\ 2162abcde 2163--|-- 2164--c-- 2165""", 2166 ) 2167 self.assertEqual(alignment.shape, (2, 5)) 2168 self.assertEqual(alignment.aligned, (((2, 3),), ((0, 1),))) 2169 2170 def test_align_one_char_score3(self): 2171 aligner = Align.PairwiseAligner() 2172 aligner.mode = "global" 2173 aligner.open_gap_score = -0.3 2174 aligner.extend_gap_score = -0.1 2175 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 2176 self.assertEqual( 2177 str(aligner), 2178 """\ 2179Pairwise sequence aligner with parameters 2180 wildcard: None 2181 match_score: 1.000000 2182 mismatch_score: 0.000000 2183 target_internal_open_gap_score: -0.300000 2184 target_internal_extend_gap_score: -0.100000 2185 target_left_open_gap_score: -0.300000 2186 target_left_extend_gap_score: -0.100000 2187 target_right_open_gap_score: -0.300000 2188 target_right_extend_gap_score: -0.100000 2189 query_internal_open_gap_score: -0.300000 2190 query_internal_extend_gap_score: -0.100000 2191 query_left_open_gap_score: -0.300000 2192 query_left_extend_gap_score: -0.100000 2193 query_right_open_gap_score: -0.300000 2194 query_right_extend_gap_score: -0.100000 2195 mode: global 2196""", 2197 ) 2198 score = aligner.score("abcde", "c") 2199 self.assertAlmostEqual(score, 0.2) 2200 2201 2202class TestPerSiteGapPenalties(unittest.TestCase): 2203 """Check gap penalty callbacks use correct gap opening position. 2204 2205 This tests that the gap penalty callbacks are really being used 2206 with the correct gap opening position. 2207 """ 2208 2209 def test_gap_here_only_1(self): 2210 seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA" 2211 seq2 = "AABBBAAAACCCCAAAABBBAA" 2212 breaks = [0, 11, len(seq2)] 2213 2214 # Very expensive to open a gap in seq1: 2215 def nogaps(x, y): 2216 return -2000 - y 2217 2218 # Very expensive to open a gap in seq2 unless it is in one of the allowed positions: 2219 def specificgaps(x, y): 2220 if x in breaks: 2221 return -2 - y 2222 else: 2223 return -2000 - y 2224 2225 aligner = Align.PairwiseAligner() 2226 aligner.mode = "global" 2227 aligner.match_score = 1 2228 aligner.mismatch_score = -1 2229 aligner.target_gap_score = nogaps 2230 aligner.query_gap_score = specificgaps 2231 self.assertEqual( 2232 str(aligner), 2233 """\ 2234Pairwise sequence aligner with parameters 2235 wildcard: None 2236 match_score: 1.000000 2237 mismatch_score: -1.000000 2238 target_gap_function: %s 2239 query_gap_function: %s 2240 mode: global 2241""" 2242 % (nogaps, specificgaps), 2243 ) 2244 self.assertEqual( 2245 aligner.algorithm, "Waterman-Smith-Beyer global alignment algorithm" 2246 ) 2247 score = aligner.score(seq1, seq2) 2248 self.assertAlmostEqual(score, 2) 2249 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2250 self.assertAlmostEqual(score, 2) 2251 alignments = aligner.align(seq1, seq2) 2252 self.assertEqual(len(alignments), 1) 2253 alignment = alignments[0] 2254 self.assertAlmostEqual(alignment.score, 2) 2255 self.assertEqual( 2256 str(alignment), 2257 """\ 2258AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2259--|||||||||||----------|||||||||||-- 2260--AABBBAAAACC----------CCAAAABBBAA-- 2261""", 2262 ) 2263 self.assertEqual(alignment.shape, (2, 36)) 2264 self.assertEqual(alignment.aligned, (((2, 13), (23, 34)), ((0, 11), (11, 22)))) 2265 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2266 self.assertEqual(len(alignments), 1) 2267 alignment = alignments[0] 2268 self.assertAlmostEqual(alignment.score, 2) 2269 self.assertEqual( 2270 str(alignment), 2271 """\ 2272AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2273--|||||||||||----------|||||||||||-- 2274--AABBBAAAACC----------CCAAAABBBAA-- 2275""", 2276 ) 2277 self.assertEqual(alignment.shape, (2, 36)) 2278 self.assertEqual(alignment.aligned, (((2, 13), (23, 34)), ((22, 11), (11, 0)))) 2279 2280 def test_gap_here_only_2(self): 2281 # Force a bad alignment. 2282 # 2283 # Forces a bad alignment by having a very expensive gap penalty 2284 # where one would normally expect a gap, and a cheap gap penalty 2285 # in another place. 2286 seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA" 2287 seq2 = "AABBBAAAACCCCAAAABBBAA" 2288 breaks = [0, 3, len(seq2)] 2289 2290 # Very expensive to open a gap in seq1: 2291 def nogaps(x, y): 2292 return -2000 - y 2293 2294 # Very expensive to open a gap in seq2 unless it is in one of the allowed positions: 2295 def specificgaps(x, y): 2296 if x in breaks: 2297 return -2 - y 2298 else: 2299 return -2000 - y 2300 2301 aligner = Align.PairwiseAligner() 2302 aligner.mode = "global" 2303 aligner.match_score = 1 2304 aligner.mismatch_score = -1 2305 aligner.target_gap_score = nogaps 2306 aligner.query_gap_score = specificgaps 2307 self.assertEqual( 2308 str(aligner), 2309 """\ 2310Pairwise sequence aligner with parameters 2311 wildcard: None 2312 match_score: 1.000000 2313 mismatch_score: -1.000000 2314 target_gap_function: %s 2315 query_gap_function: %s 2316 mode: global 2317""" 2318 % (nogaps, specificgaps), 2319 ) 2320 self.assertEqual( 2321 aligner.algorithm, "Waterman-Smith-Beyer global alignment algorithm" 2322 ) 2323 score = aligner.score(seq1, seq2) 2324 self.assertAlmostEqual(score, -10) 2325 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2326 self.assertAlmostEqual(score, -10) 2327 alignments = aligner.align(seq1, seq2) 2328 self.assertEqual(len(alignments), 2) 2329 alignment = alignments[0] 2330 self.assertAlmostEqual(alignment.score, -10) 2331 self.assertEqual( 2332 str(alignment), 2333 """\ 2334AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2335--|||----------......|||||||||||||-- 2336--AAB----------BBAAAACCCCAAAABBBAA-- 2337""", 2338 ) 2339 self.assertEqual(alignment.shape, (2, 36)) 2340 self.assertEqual(alignment.aligned, (((2, 5), (15, 34)), ((0, 3), (3, 22)))) 2341 alignment = alignments[1] 2342 self.assertAlmostEqual(alignment.score, -10) 2343 self.assertEqual( 2344 str(alignment), 2345 """\ 2346AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2347||.------------......|||||||||||||-- 2348AAB------------BBAAAACCCCAAAABBBAA-- 2349""", 2350 ) 2351 self.assertEqual(alignment.shape, (2, 36)) 2352 self.assertEqual(alignment.aligned, (((0, 3), (15, 34)), ((0, 3), (3, 22)))) 2353 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2354 self.assertEqual(len(alignments), 2) 2355 alignment = alignments[0] 2356 self.assertAlmostEqual(alignment.score, -10) 2357 self.assertEqual( 2358 str(alignment), 2359 """\ 2360AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2361--|||||||||||||......------------.|| 2362--AABBBAAAACCCCAAAABB------------BAA 2363""", 2364 ) 2365 self.assertEqual(alignment.shape, (2, 36)) 2366 self.assertEqual(alignment.aligned, (((2, 21), (33, 36)), ((22, 3), (3, 0)))) 2367 alignment = alignments[1] 2368 self.assertAlmostEqual(alignment.score, -10) 2369 self.assertEqual( 2370 str(alignment), 2371 """\ 2372AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2373--|||||||||||||......----------|||-- 2374--AABBBAAAACCCCAAAABB----------BAA-- 2375""", 2376 ) 2377 self.assertEqual(alignment.shape, (2, 36)) 2378 self.assertEqual(alignment.aligned, (((2, 21), (31, 34)), ((22, 3), (3, 0)))) 2379 2380 def test_gap_here_only_3(self): 2381 # Check if gap open and gap extend penalties are handled correctly. 2382 seq1 = "TTCCAA" 2383 seq2 = "TTGGAA" 2384 2385 def gap_score(i, n): 2386 if i == 3: 2387 return -10 2388 if n == 1: 2389 return -1 2390 return -10 2391 2392 aligner = Align.PairwiseAligner() 2393 aligner.mode = "global" 2394 aligner.match_score = 1 2395 aligner.mismatch_score = -10 2396 aligner.target_gap_score = gap_score 2397 self.assertEqual( 2398 aligner.algorithm, "Waterman-Smith-Beyer global alignment algorithm" 2399 ) 2400 self.assertEqual( 2401 str(aligner), 2402 """\ 2403Pairwise sequence aligner with parameters 2404 wildcard: None 2405 match_score: 1.000000 2406 mismatch_score: -10.000000 2407 target_gap_function: %s 2408 query_internal_open_gap_score: 0.000000 2409 query_internal_extend_gap_score: 0.000000 2410 query_left_open_gap_score: 0.000000 2411 query_left_extend_gap_score: 0.000000 2412 query_right_open_gap_score: 0.000000 2413 query_right_extend_gap_score: 0.000000 2414 mode: global 2415""" 2416 % gap_score, 2417 ) 2418 score = aligner.score(seq1, seq2) 2419 self.assertAlmostEqual(score, 2.0) 2420 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2421 self.assertAlmostEqual(score, 2.0) 2422 alignments = aligner.align(seq1, seq2) 2423 self.assertEqual(len(alignments), 1) 2424 alignment = alignments[0] 2425 self.assertAlmostEqual(alignment.score, 2.0) 2426 self.assertEqual( 2427 str(alignment), 2428 """\ 2429TT-CC-AA 2430||----|| 2431TTG--GAA 2432""", 2433 ) 2434 self.assertEqual(alignment.shape, (2, 8)) 2435 self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((0, 2), (4, 6)))) 2436 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2437 self.assertEqual(len(alignments), 1) 2438 alignment = alignments[0] 2439 self.assertAlmostEqual(alignment.score, 2.0) 2440 self.assertEqual( 2441 str(alignment), 2442 """\ 2443TT-CC-AA 2444||----|| 2445TTG--GAA 2446""", 2447 ) 2448 self.assertEqual(alignment.shape, (2, 8)) 2449 self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((6, 4), (2, 0)))) 2450 aligner.query_gap_score = gap_score 2451 self.assertEqual( 2452 str(aligner), 2453 """\ 2454Pairwise sequence aligner with parameters 2455 wildcard: None 2456 match_score: 1.000000 2457 mismatch_score: -10.000000 2458 target_gap_function: %s 2459 query_gap_function: %s 2460 mode: global 2461""" 2462 % (gap_score, gap_score), 2463 ) 2464 score = aligner.score(seq1, seq2) 2465 self.assertAlmostEqual(score, -8.0) 2466 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2467 self.assertAlmostEqual(score, -8.0) 2468 alignments = aligner.align(seq1, seq2) 2469 self.assertEqual(len(alignments), 4) 2470 alignment = alignments[0] 2471 self.assertAlmostEqual(alignment.score, -8.0) 2472 self.assertEqual( 2473 str(alignment), 2474 """\ 2475TT-CCAA 2476||-.-|| 2477TTGG-AA 2478""", 2479 ) 2480 self.assertEqual(alignment.shape, (2, 7)) 2481 self.assertEqual( 2482 alignment.aligned, (((0, 2), (2, 3), (4, 6)), ((0, 2), (3, 4), (4, 6))) 2483 ) 2484 alignment = alignments[1] 2485 self.assertAlmostEqual(alignment.score, -8.0) 2486 self.assertEqual( 2487 str(alignment), 2488 """\ 2489TTC--CAA 2490||----|| 2491TT-GG-AA 2492""", 2493 ) 2494 self.assertEqual(alignment.shape, (2, 8)) 2495 self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((0, 2), (4, 6)))) 2496 alignment = alignments[2] 2497 self.assertAlmostEqual(alignment.score, -8.0) 2498 self.assertEqual( 2499 str(alignment), 2500 """\ 2501TTCC-AA 2502||-.-|| 2503TT-GGAA 2504""", 2505 ) 2506 self.assertEqual(alignment.shape, (2, 7)) 2507 self.assertEqual( 2508 alignment.aligned, (((0, 2), (3, 4), (4, 6)), ((0, 2), (2, 3), (4, 6))) 2509 ) 2510 alignment = alignments[3] 2511 self.assertAlmostEqual(alignment.score, -8.0) 2512 self.assertEqual( 2513 str(alignment), 2514 """\ 2515TT-CC-AA 2516||----|| 2517TTG--GAA 2518""", 2519 ) 2520 self.assertEqual(alignment.shape, (2, 8)) 2521 self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((0, 2), (4, 6)))) 2522 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2523 self.assertEqual(len(alignments), 4) 2524 alignment = alignments[0] 2525 self.assertAlmostEqual(alignment.score, -8.0) 2526 self.assertEqual( 2527 str(alignment), 2528 """\ 2529TT-CCAA 2530||-.-|| 2531TTGG-AA 2532""", 2533 ) 2534 self.assertEqual(alignment.shape, (2, 7)) 2535 self.assertEqual( 2536 alignment.aligned, (((0, 2), (2, 3), (4, 6)), ((6, 4), (3, 2), (2, 0))) 2537 ) 2538 alignment = alignments[1] 2539 self.assertAlmostEqual(alignment.score, -8.0) 2540 self.assertEqual( 2541 str(alignment), 2542 """\ 2543TTC--CAA 2544||----|| 2545TT-GG-AA 2546""", 2547 ) 2548 self.assertEqual(alignment.shape, (2, 8)) 2549 self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((6, 4), (2, 0)))) 2550 alignment = alignments[2] 2551 self.assertAlmostEqual(alignment.score, -8.0) 2552 self.assertEqual( 2553 str(alignment), 2554 """\ 2555TTCC-AA 2556||-.-|| 2557TT-GGAA 2558""", 2559 ) 2560 self.assertEqual(alignment.shape, (2, 7)) 2561 self.assertEqual( 2562 alignment.aligned, (((0, 2), (3, 4), (4, 6)), ((6, 4), (4, 3), (2, 0))) 2563 ) 2564 alignment = alignments[3] 2565 self.assertAlmostEqual(alignment.score, -8.0) 2566 self.assertEqual( 2567 str(alignment), 2568 """\ 2569TT-CC-AA 2570||----|| 2571TTG--GAA 2572""", 2573 ) 2574 self.assertEqual(alignment.shape, (2, 8)) 2575 self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((6, 4), (2, 0)))) 2576 2577 def test_gap_here_only_local_1(self): 2578 seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA" 2579 seq2 = "AABBBAAAACCCCAAAABBBAA" 2580 breaks = [0, 11, len(seq2)] 2581 2582 # Very expensive to open a gap in seq1: 2583 def nogaps(x, y): 2584 return -2000 - y 2585 2586 # Very expensive to open a gap in seq2 unless it is in one of the allowed positions: 2587 def specificgaps(x, y): 2588 if x in breaks: 2589 return -2 - y 2590 else: 2591 return -2000 - y 2592 2593 aligner = Align.PairwiseAligner() 2594 aligner.mode = "local" 2595 aligner.match_score = 1 2596 aligner.mismatch_score = -1 2597 aligner.target_gap_score = nogaps 2598 aligner.query_gap_score = specificgaps 2599 self.assertEqual( 2600 aligner.algorithm, "Waterman-Smith-Beyer local alignment algorithm" 2601 ) 2602 self.assertEqual( 2603 str(aligner), 2604 """\ 2605Pairwise sequence aligner with parameters 2606 wildcard: None 2607 match_score: 1.000000 2608 mismatch_score: -1.000000 2609 target_gap_function: %s 2610 query_gap_function: %s 2611 mode: local 2612""" 2613 % (nogaps, specificgaps), 2614 ) 2615 score = aligner.score(seq1, seq2) 2616 self.assertAlmostEqual(score, 13) 2617 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2618 self.assertAlmostEqual(score, 13) 2619 alignments = aligner.align(seq1, seq2) 2620 self.assertEqual(len(alignments), 2) 2621 alignment = alignments[0] 2622 self.assertAlmostEqual(alignment.score, 13) 2623 self.assertEqual( 2624 str(alignment), 2625 """\ 2626AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2627 ||||||||||||| 2628 AABBBAAAACCCCAAAABBBAA 2629""", 2630 ) 2631 self.assertEqual(alignment.shape, (2, 13)) 2632 self.assertEqual(alignment.aligned, (((2, 15),), ((0, 13),))) 2633 alignment = alignments[1] 2634 self.assertAlmostEqual(alignment.score, 13) 2635 self.assertEqual( 2636 str(alignment), 2637 """\ 2638AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2639 ||||||||||||| 2640 AABBBAAAACCCCAAAABBBAA 2641""", 2642 ) 2643 self.assertEqual(alignment.shape, (2, 13)) 2644 self.assertEqual(alignment.aligned, (((21, 34),), ((9, 22),))) 2645 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2646 self.assertEqual(len(alignments), 2) 2647 alignment = alignments[0] 2648 self.assertAlmostEqual(alignment.score, 13) 2649 self.assertEqual( 2650 str(alignment), 2651 """\ 2652AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2653 ||||||||||||| 2654 AABBBAAAACCCCAAAABBBAA 2655""", 2656 ) 2657 self.assertEqual(alignment.shape, (2, 13)) 2658 self.assertEqual(alignment.aligned, (((2, 15),), ((22, 9),))) 2659 alignment = alignments[1] 2660 self.assertAlmostEqual(alignment.score, 13) 2661 self.assertEqual( 2662 str(alignment), 2663 """\ 2664AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2665 ||||||||||||| 2666 AABBBAAAACCCCAAAABBBAA 2667""", 2668 ) 2669 self.assertEqual(alignment.shape, (2, 13)) 2670 self.assertEqual(alignment.aligned, (((21, 34),), ((13, 0),))) 2671 2672 def test_gap_here_only_local_2(self): 2673 # Force a bad alignment. 2674 # 2675 # Forces a bad alignment by having a very expensive gap penalty 2676 # where one would normally expect a gap, and a cheap gap penalty 2677 # in another place. 2678 seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA" 2679 seq2 = "AABBBAAAACCCCAAAABBBAA" 2680 breaks = [0, 3, len(seq2)] 2681 2682 # Very expensive to open a gap in seq1: 2683 def nogaps(x, y): 2684 return -2000 - y 2685 2686 # Very expensive to open a gap in seq2 unless it is in one of the allowed positions: 2687 def specificgaps(x, y): 2688 if x in breaks: 2689 return -2 - y 2690 else: 2691 return -2000 - y 2692 2693 aligner = Align.PairwiseAligner() 2694 aligner.mode = "local" 2695 aligner.match_score = 1 2696 aligner.mismatch_score = -1 2697 aligner.target_gap_score = nogaps 2698 aligner.query_gap_score = specificgaps 2699 self.assertEqual( 2700 str(aligner), 2701 """\ 2702Pairwise sequence aligner with parameters 2703 wildcard: None 2704 match_score: 1.000000 2705 mismatch_score: -1.000000 2706 target_gap_function: %s 2707 query_gap_function: %s 2708 mode: local 2709""" 2710 % (nogaps, specificgaps), 2711 ) 2712 self.assertEqual( 2713 aligner.algorithm, "Waterman-Smith-Beyer local alignment algorithm" 2714 ) 2715 score = aligner.score(seq1, seq2) 2716 self.assertAlmostEqual(score, 13) 2717 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2718 self.assertAlmostEqual(score, 13) 2719 alignments = aligner.align(seq1, seq2) 2720 self.assertEqual(len(alignments), 2) 2721 alignment = alignments[0] 2722 self.assertAlmostEqual(alignment.score, 13) 2723 self.assertEqual( 2724 str(alignment), 2725 """\ 2726AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2727 ||||||||||||| 2728 AABBBAAAACCCCAAAABBBAA 2729""", 2730 ) 2731 self.assertEqual(alignment.shape, (2, 13)) 2732 self.assertEqual(alignment.aligned, (((2, 15),), ((0, 13),))) 2733 alignment = alignments[1] 2734 self.assertAlmostEqual(alignment.score, 13) 2735 self.assertEqual( 2736 str(alignment), 2737 """\ 2738AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2739 ||||||||||||| 2740 AABBBAAAACCCCAAAABBBAA 2741""", 2742 ) 2743 self.assertEqual(alignment.shape, (2, 13)) 2744 self.assertEqual(alignment.aligned, (((21, 34),), ((9, 22),))) 2745 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2746 self.assertEqual(len(alignments), 2) 2747 alignment = alignments[0] 2748 self.assertAlmostEqual(alignment.score, 13) 2749 self.assertEqual( 2750 str(alignment), 2751 """\ 2752AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2753 ||||||||||||| 2754 AABBBAAAACCCCAAAABBBAA 2755""", 2756 ) 2757 self.assertEqual(alignment.shape, (2, 13)) 2758 self.assertEqual(alignment.aligned, (((2, 15),), ((22, 9),))) 2759 alignment = alignments[1] 2760 self.assertAlmostEqual(alignment.score, 13) 2761 self.assertEqual( 2762 str(alignment), 2763 """\ 2764AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA 2765 ||||||||||||| 2766 AABBBAAAACCCCAAAABBBAA 2767""", 2768 ) 2769 self.assertEqual(alignment.shape, (2, 13)) 2770 self.assertEqual(alignment.aligned, (((21, 34),), ((13, 0),))) 2771 2772 def test_gap_here_only_local_3(self): 2773 # Check if gap open and gap extend penalties are handled correctly. 2774 seq1 = "TTCCAA" 2775 seq2 = "TTGGAA" 2776 2777 def gap_score(i, n): 2778 if i == 3: 2779 return -10 2780 if n == 1: 2781 return -1 2782 return -10 2783 2784 aligner = Align.PairwiseAligner() 2785 aligner.mode = "local" 2786 aligner.match_score = 1 2787 aligner.mismatch_score = -10 2788 aligner.target_gap_score = gap_score 2789 self.assertEqual( 2790 aligner.algorithm, "Waterman-Smith-Beyer local alignment algorithm" 2791 ) 2792 self.assertEqual( 2793 str(aligner), 2794 """\ 2795Pairwise sequence aligner with parameters 2796 wildcard: None 2797 match_score: 1.000000 2798 mismatch_score: -10.000000 2799 target_gap_function: %s 2800 query_internal_open_gap_score: 0.000000 2801 query_internal_extend_gap_score: 0.000000 2802 query_left_open_gap_score: 0.000000 2803 query_left_extend_gap_score: 0.000000 2804 query_right_open_gap_score: 0.000000 2805 query_right_extend_gap_score: 0.000000 2806 mode: local 2807""" 2808 % gap_score, 2809 ) 2810 score = aligner.score(seq1, seq2) 2811 self.assertAlmostEqual(score, 2.0) 2812 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2813 self.assertAlmostEqual(score, 2.0) 2814 alignments = aligner.align(seq1, seq2) 2815 self.assertEqual(len(alignments), 2) 2816 alignment = alignments[0] 2817 self.assertAlmostEqual(alignment.score, 2.0) 2818 self.assertEqual( 2819 str(alignment), 2820 """\ 2821TTCCAA 2822|| 2823TTGGAA 2824""", 2825 ) 2826 self.assertEqual(alignment.shape, (2, 2)) 2827 self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),))) 2828 alignment = alignments[1] 2829 self.assertAlmostEqual(alignment.score, 2.0) 2830 self.assertEqual( 2831 str(alignment), 2832 """\ 2833TTCCAA 2834 || 2835TTGGAA 2836""", 2837 ) 2838 self.assertEqual(alignment.shape, (2, 2)) 2839 self.assertEqual(alignment.aligned, (((4, 6),), ((4, 6),))) 2840 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2841 self.assertEqual(len(alignments), 2) 2842 alignment = alignments[0] 2843 self.assertAlmostEqual(alignment.score, 2.0) 2844 self.assertEqual( 2845 str(alignment), 2846 """\ 2847TTCCAA 2848|| 2849TTGGAA 2850""", 2851 ) 2852 self.assertEqual(alignment.shape, (2, 2)) 2853 self.assertEqual(alignment.aligned, (((0, 2),), ((6, 4),))) 2854 alignment = alignments[1] 2855 self.assertAlmostEqual(alignment.score, 2.0) 2856 self.assertEqual( 2857 str(alignment), 2858 """\ 2859TTCCAA 2860 || 2861TTGGAA 2862""", 2863 ) 2864 self.assertEqual(alignment.shape, (2, 2)) 2865 self.assertEqual(alignment.aligned, (((4, 6),), ((2, 0),))) 2866 aligner.query_gap_score = gap_score 2867 self.assertEqual( 2868 str(aligner), 2869 """\ 2870Pairwise sequence aligner with parameters 2871 wildcard: None 2872 match_score: 1.000000 2873 mismatch_score: -10.000000 2874 target_gap_function: %s 2875 query_gap_function: %s 2876 mode: local 2877""" 2878 % (gap_score, gap_score), 2879 ) 2880 score = aligner.score(seq1, seq2) 2881 self.assertAlmostEqual(score, 2.0) 2882 score = aligner.score(seq1, reverse_complement(seq2), strand="-") 2883 self.assertAlmostEqual(score, 2.0) 2884 alignments = aligner.align(seq1, seq2) 2885 self.assertEqual(len(alignments), 2) 2886 alignment = alignments[0] 2887 self.assertAlmostEqual(alignment.score, 2.0) 2888 self.assertEqual( 2889 str(alignment), 2890 """\ 2891TTCCAA 2892|| 2893TTGGAA 2894""", 2895 ) 2896 self.assertEqual(alignment.shape, (2, 2)) 2897 self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),))) 2898 alignment = alignments[1] 2899 self.assertAlmostEqual(alignment.score, 2.0) 2900 self.assertEqual( 2901 str(alignment), 2902 """\ 2903TTCCAA 2904 || 2905TTGGAA 2906""", 2907 ) 2908 self.assertEqual(alignment.shape, (2, 2)) 2909 self.assertEqual(alignment.aligned, (((4, 6),), ((4, 6),))) 2910 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2911 self.assertEqual(len(alignments), 2) 2912 alignment = alignments[0] 2913 self.assertAlmostEqual(alignment.score, 2.0) 2914 self.assertEqual( 2915 str(alignment), 2916 """\ 2917TTCCAA 2918|| 2919TTGGAA 2920""", 2921 ) 2922 self.assertEqual(alignment.shape, (2, 2)) 2923 self.assertEqual(alignment.aligned, (((0, 2),), ((6, 4),))) 2924 alignment = alignments[1] 2925 self.assertAlmostEqual(alignment.score, 2.0) 2926 self.assertEqual( 2927 str(alignment), 2928 """\ 2929TTCCAA 2930 || 2931TTGGAA 2932""", 2933 ) 2934 self.assertEqual(alignment.shape, (2, 2)) 2935 self.assertEqual(alignment.aligned, (((4, 6),), ((2, 0),))) 2936 2937 def test_broken_gap_function(self): 2938 # Check if an Exception is propagated if the gap function raises one 2939 seq1 = "TTCCAA" 2940 seq2 = "TTGGAA" 2941 2942 def gap_score(i, n): 2943 raise RuntimeError("broken gap function") 2944 2945 aligner = Align.PairwiseAligner() 2946 aligner.target_gap_score = gap_score 2947 aligner.query_gap_score = -1 2948 aligner.mode = "global" 2949 with self.assertRaises(RuntimeError): 2950 aligner.score(seq1, seq2) 2951 with self.assertRaises(RuntimeError): 2952 aligner.score(seq1, reverse_complement(seq2), strand="-") 2953 with self.assertRaises(RuntimeError): 2954 alignments = aligner.align(seq1, seq2) 2955 alignments = list(alignments) 2956 with self.assertRaises(RuntimeError): 2957 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2958 alignments = list(alignments) 2959 aligner.mode = "local" 2960 with self.assertRaises(RuntimeError): 2961 aligner.score(seq1, seq2) 2962 with self.assertRaises(RuntimeError): 2963 aligner.score(seq1, reverse_complement(seq2), strand="-") 2964 with self.assertRaises(RuntimeError): 2965 alignments = aligner.align(seq1, seq2) 2966 alignments = list(alignments) 2967 with self.assertRaises(RuntimeError): 2968 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2969 alignments = list(alignments) 2970 aligner.target_gap_score = -1 2971 aligner.query_gap_score = gap_score 2972 aligner.mode = "global" 2973 with self.assertRaises(RuntimeError): 2974 aligner.score(seq1, seq2) 2975 with self.assertRaises(RuntimeError): 2976 aligner.score(seq1, reverse_complement(seq2), strand="-") 2977 with self.assertRaises(RuntimeError): 2978 alignments = aligner.align(seq1, seq2) 2979 alignments = list(alignments) 2980 with self.assertRaises(RuntimeError): 2981 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2982 alignments = list(alignments) 2983 aligner.mode = "local" 2984 with self.assertRaises(RuntimeError): 2985 aligner.score(seq1, seq2) 2986 with self.assertRaises(RuntimeError): 2987 aligner.score(seq1, reverse_complement(seq2), strand="-") 2988 with self.assertRaises(RuntimeError): 2989 alignments = aligner.align(seq1, seq2) 2990 alignments = list(alignments) 2991 with self.assertRaises(RuntimeError): 2992 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 2993 alignments = list(alignments) 2994 2995 2996class TestSequencesAsLists(unittest.TestCase): 2997 """Check aligning sequences provided as lists. 2998 2999 This tests whether we can align sequences that are provided as lists 3000 consisting of three-letter codons or three-letter amino acids. 3001 """ 3002 3003 def test_three_letter_amino_acids_global(self): 3004 seq1 = ["Gly", "Ala", "Thr"] 3005 seq2 = ["Gly", "Ala", "Ala", "Cys", "Thr"] 3006 aligner = Align.PairwiseAligner() 3007 aligner.mode = "global" 3008 # fmt: off 3009 aligner.alphabet = [ 3010 "Ala", "Arg", "Asn", "Asp", "Cys", "Gln", "Glu", "Gly", "His", "Ile", 3011 "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val", 3012 ] 3013 # fmt: on 3014 score = aligner.score(seq1, seq2) 3015 self.assertAlmostEqual(score, 3.0) 3016 alignments = aligner.align(seq1, seq2) 3017 self.assertEqual(len(alignments), 2) 3018 self.assertEqual( 3019 str(alignments[0]), 3020 """\ 3021Gly Ala --- --- Thr 3022||| ||| --- --- ||| 3023Gly Ala Ala Cys Thr 3024""", 3025 ) 3026 self.assertEqual( 3027 str(alignments[1]), 3028 """\ 3029Gly --- Ala --- Thr 3030||| --- ||| --- ||| 3031Gly Ala Ala Cys Thr 3032""", 3033 ) 3034 self.assertAlmostEqual(alignments[0].score, 3.0) 3035 self.assertAlmostEqual(alignments[1].score, 3.0) 3036 3037 seq1 = ["Pro", "Pro", "Gly", "Ala", "Thr"] 3038 seq2 = ["Gly", "Ala", "Ala", "Cys", "Thr", "Asn", "Asn"] 3039 score = aligner.score(seq1, seq2) 3040 self.assertAlmostEqual(score, 3.0) 3041 alignments = aligner.align(seq1, seq2) 3042 self.assertEqual(len(alignments), 2) 3043 self.assertEqual( 3044 str(alignments[0]), 3045 """\ 3046Pro Pro Gly Ala --- --- Thr --- --- 3047--- --- ||| ||| --- --- ||| --- --- 3048--- --- Gly Ala Ala Cys Thr Asn Asn 3049""", 3050 ) 3051 self.assertEqual( 3052 str(alignments[1]), 3053 """\ 3054Pro Pro Gly --- Ala --- Thr --- --- 3055--- --- ||| --- ||| --- ||| --- --- 3056--- --- Gly Ala Ala Cys Thr Asn Asn 3057""", 3058 ) 3059 self.assertAlmostEqual(alignments[0].score, 3.0) 3060 self.assertAlmostEqual(alignments[1].score, 3.0) 3061 3062 def test_three_letter_amino_acids_local(self): 3063 seq1 = ["Asn", "Asn", "Gly", "Ala", "Thr", "Glu", "Glu"] 3064 seq2 = ["Pro", "Pro", "Gly", "Ala", "Ala", "Cys", "Thr", "Leu"] 3065 aligner = Align.PairwiseAligner() 3066 aligner.mode = "local" 3067 # fmt: off 3068 aligner.alphabet = [ 3069 "Ala", "Arg", "Asn", "Asp", "Cys", "Gln", "Glu", "Gly", "His", "Ile", 3070 "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val", 3071 ] 3072 # fmt: on 3073 score = aligner.score(seq1, seq2) 3074 self.assertAlmostEqual(score, 3.0) 3075 alignments = aligner.align(seq1, seq2) 3076 self.assertEqual(len(alignments), 2) 3077 self.assertEqual( 3078 str(alignments[0]), 3079 """\ 3080Gly Ala --- --- Thr 3081||| ||| --- --- ||| 3082Gly Ala Ala Cys Thr 3083""", 3084 ) 3085 self.assertEqual( 3086 str(alignments[1]), 3087 """\ 3088Gly --- Ala --- Thr 3089||| --- ||| --- ||| 3090Gly Ala Ala Cys Thr 3091""", 3092 ) 3093 self.assertAlmostEqual(alignments[0].score, 3.0) 3094 self.assertAlmostEqual(alignments[1].score, 3.0) 3095 3096 3097class TestArgumentErrors(unittest.TestCase): 3098 def test_aligner_string_errors(self): 3099 aligner = Align.PairwiseAligner() 3100 message = "^argument should support the sequence protocol$" 3101 with self.assertRaisesRegex(TypeError, message): 3102 aligner.score("AAA", 3) 3103 message = "^sequence has zero length$" 3104 with self.assertRaisesRegex(ValueError, message): 3105 aligner.score("AAA", "") 3106 with self.assertRaisesRegex(ValueError, message): 3107 aligner.score("AAA", "", strand="-") 3108 message = "^sequence contains letters not in the alphabet$" 3109 aligner.alphabet = "ABCD" 3110 with self.assertRaisesRegex(ValueError, message): 3111 aligner.score("AAA", "AAE") 3112 3113 def test_aligner_array_errors(self): 3114 aligner = Align.PairwiseAligner() 3115 s1 = "GGG" 3116 s2 = array.array("i", [ord("G"), ord("A"), ord("G")]) 3117 score = aligner.score(s1, s2) 3118 self.assertAlmostEqual(score, 2.0) 3119 s2 = array.array("f", [1.0, 0.0, 1.0]) 3120 message = "^sequence has incorrect data type 'f'$" 3121 with self.assertRaisesRegex(ValueError, message): 3122 aligner.score(s1, s2) 3123 aligner.wildcard = chr(99) 3124 s1 = array.array("i", [1, 5, 6]) 3125 s2 = array.array("i", [1, 8, 6]) 3126 s2a = array.array("i", [1, 8, 99]) 3127 s2b = array.array("i", [1, 28, 6]) 3128 aligner.match = 3.0 3129 aligner.mismatch = -2.0 3130 aligner.gap_score = -10.0 3131 score = aligner.score(s1, s2) 3132 self.assertAlmostEqual(score, 4.0) 3133 # the following two are valid as we are using match/mismatch scores 3134 # instead of a substitution matrix: 3135 score = aligner.score(s1, s2a) 3136 # since we set the wildcard character to chr(99), the number 99 3137 # is interpreted as an unknown character, and gets a zero score: 3138 self.assertAlmostEqual(score, 1.0) 3139 score = aligner.score(s1, s2b) 3140 self.assertAlmostEqual(score, 4.0) 3141 try: 3142 import numpy 3143 except ImportError: 3144 return 3145 aligner = Align.PairwiseAligner() 3146 aligner.wildcard = chr(99) 3147 s1 = "GGG" 3148 s2 = numpy.array([ord("G"), ord("A"), ord("G")], numpy.int32) 3149 score = aligner.score(s1, s2) 3150 self.assertAlmostEqual(score, 2.0) 3151 s2 = numpy.array([1.0, 0.0, 1.0]) 3152 message = "^sequence has incorrect data type 'd'$" 3153 with self.assertRaisesRegex(ValueError, message): 3154 aligner.score(s1, s2) 3155 s2 = numpy.zeros((3, 2), numpy.int32) 3156 message = "^sequence has incorrect rank \\(2 expected 1\\)$" 3157 with self.assertRaisesRegex(ValueError, message): 3158 aligner.score(s1, s2) 3159 s1 = numpy.array([1, 5, 6], numpy.int32) 3160 s2 = numpy.array([1, 8, 6], numpy.int32) 3161 s2a = numpy.array([1, 8, 99], numpy.int32) 3162 s2b = numpy.array([1, 28, 6], numpy.int32) 3163 s2c = numpy.array([1, 8, -6], numpy.int32) 3164 aligner.match = 3.0 3165 aligner.mismatch = -2.0 3166 aligner.gap_score = -10.0 3167 score = aligner.score(s1, s2) 3168 self.assertAlmostEqual(score, 4.0) 3169 # the following two are valid as we are using match/mismatch scores 3170 # instead of a substitution matrix: 3171 score = aligner.score(s1, s2a) 3172 self.assertAlmostEqual(score, 1.0) 3173 score = aligner.score(s1, s2b) 3174 self.assertAlmostEqual(score, 4.0) 3175 # when using a substitution matrix, all indices should be between 0 3176 # and the size of the substitution matrix: 3177 m = 5 * numpy.eye(10) 3178 aligner.substitution_matrix = m 3179 score = aligner.score(s1, s2) # no ValueError 3180 self.assertAlmostEqual(score, 10.0) 3181 message = "^sequence item 2 is negative \\(-6\\)$" 3182 with self.assertRaisesRegex(ValueError, message): 3183 aligner.score(s1, s2c) 3184 message = "^sequence item 1 is out of bound \\(28, should be < 10\\)$" 3185 with self.assertRaisesRegex(ValueError, message): 3186 aligner.score(s1, s2b) 3187 # note that the wildcard character is ignored when using a substitution 3188 # matrix, so 99 is interpreted as an index here: 3189 message = "^sequence item 2 is out of bound \\(99, should be < 10\\)$" 3190 with self.assertRaisesRegex(ValueError, message): 3191 aligner.score(s1, s2a) 3192 3193 3194class TestOverflowError(unittest.TestCase): 3195 def test_align_overflow_error(self): 3196 aligner = Align.PairwiseAligner() 3197 path = os.path.join("Align", "bsubtilis.fa") 3198 record = SeqIO.read(path, "fasta") 3199 seq1 = record.seq 3200 path = os.path.join("Align", "ecoli.fa") 3201 record = SeqIO.read(path, "fasta") 3202 seq2 = record.seq 3203 alignments = aligner.align(seq1, seq2) 3204 self.assertAlmostEqual(alignments.score, 1286.0) 3205 message = "^number of optimal alignments is larger than (%d|%d)$" % ( 3206 2147483647, # on 32-bit systems 3207 9223372036854775807, 3208 ) # on 64-bit systems 3209 with self.assertRaisesRegex(OverflowError, message): 3210 n = len(alignments) 3211 # confirm that we can still pull out individual alignments 3212 alignment = alignments[0] 3213 self.assertEqual( 3214 str(alignment), 3215 """\ 3216ATTTA-TC-GGA-GAGTTTGATCC-TGGCTCAGGAC--GAACGCTGGCGGC-GTGCCTAAT-ACATGCAAGTCGAG-CGG-A-CAG-AT-GGGA-GCTTGCT-C----CCTGAT-GTTAGC-GGCGGACGGGTGAGTAACAC-GT--GGGTAA-CCTGCCTGTAA-G-ACTGGG--ATAACT-CC-GGGAAACCGG--GGCTAATACCGG-ATGGTTGTTTGAACCGCAT-GGTTCAA-AC-ATAA-AAGGTGG--C-TTCGG-C-TACCACTTA-C-A--G-ATG-GACCC-GC--GGCGCATTAGCTAGTT-GGTGAGG-TAACGGCTCACC-AAGGCGACGATGCG--TAGCC-GA--CCTGAGAGGG-TGATC--GGCCACACTGGGA-CTGAGACACGG-CCCAGACTCCTACGGGAGGCAGCAGTAGGG-AATC-TTCCGCA-A-TGGA-CG-AAAGTC-TGAC-GG-AGCAAC--GCCGCGTG-AGTGAT-GAAGG--TTTTCGGA-TC-GTAAAGCT-CTGTTGTT-AG-GG--A--A-G--A--ACAAGTGCCGTTCGAATAGGGC----GG-TACC-TTGACGGT-ACCTAAC-CAGAA-A-GCCAC-GGCTAACTAC-GTGCCAGCAGCCGCGGTAATACGT-AGG-TGGCAAGCGTTG--TCCGGAATTA-TTGGGCGTAAAG-GGCT-CGCAGGCGGTTTC-TTAAGTCT-GATGTGAAAG-CCCCCGG-CTCAACC-GGGGAGGG--T-CAT-TGGA-AACTGGGG-AA-CTTGAGTGCA--G-AAGAGGAGAGTGG-A-A-TTCCACG-TGTAGCGGTGAAATGCGTAGAGATG-TGGAGGAAC-ACCAG-TGGCGAAGGCGA-CTCTC--TGGT-CTGTAA--CTGACGCTG-AGGA-GCGAAAGCGTGGGGAGCGAA-CAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGT-G-CTAAGTGTT-AGGGGGTT-TCCGCCCCTT-AGTGC-TG-C------AGCTAACGCA-TTAAG-C-ACTCCGCCTGGGGAGTACGGTC-GCAAGACTG--AAA-CTCAAA-GGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAA-GCAACGCGAAGAACCTTACCA-GGTCTTGACATCCTCTGACA-A--T--CCTAGAGATAGGAC--G-T-CCCCTTCGGGGGCAGA--GTGA--CAGGTGG-TGCATGG-TTGTCGTCAGCTCGTGTC-GTGAGA-TGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTA--GTTGCCAGCA--TTCA-GTTG--GGC-A-CTCTAA-GGT-GACTGCC-GGTGAC-AAACC-GGAGGAAGGTGGGGATGACGTCAAA-TCATCATG-CCCCTTAT-GACCT-GGGCTACACACGTGCTACAATGGACAG-A-ACAAAG-GGCA-GCGAAACC--GCGAG-GTT-AAGCC--AATCC-CAC-AAA-T-CTGTTC-TCAGTTC-GGATC-GC-AGTCTGCAACTCGACTGCG--TGAAGCT-GGAATCGCTAGTAATCGC-GGATCAGCA-TGCCG-CGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCAC-GAG-AGT---TTGT-AACACCC-GAAGTC-GGTGAGG-T-AACCTTTTA-GG-AG--C-C--AGCCG-CC---GAAGGTGGGA--CAGATGA-TTGGGGTGAAGTCGTAACAAGGTAG-CCGTATCGGAAGG----TGCGGCT-GGATCACCTCCTTTCTA 3217|---|-|--|-|-||||||||||--||||||||-|---|||||||||||||-|-||||||--|||||||||||||--|||-|-|||-|--|--|-|||||||-|----|-|||--|--||--|||||||||||||||||----||--|||-||-|-||||||-|--|-|--|||--||||||-|--||-||||-||--|-|||||||||--||---------|||-|--|-|---|||-||-|-||-|-||-||--|-|||||-|-|-|---||--|-|--|-|||-|-|||-|---||-|-||||||||||--||||-||-||||||||||||-|-|||||||||-|---||||--|---|-|||||||--|||-|--|-|||||||||-|-|||||||||||-||-|||||||||||||||||||||||-|||-|||--||--|||-|-|||--||-||-|-|-|||--|--|||--|--||||||||-|-|||--|||||--||--|||--|--||||||-|-||-||----||-||--|--|-|--|--|-||||----|---||||---|----|--|-|--||||||-|-|||---|-|||||-|-||-||-||||||||-|-|||||||||||||||||||||||--|||-||-||||||||---||-|||||||-|-||||||||||-|-|--||||||||||||--|||||||--|||||||||--||||-||-|||||||-|||-|-----|-|||-||-|-|-||||---||-|||||||-|---|-|-||||-|-|-||-|-|-|||||-|-||||||||||||||||||||||||--||||||||--|||-|-|||||||||||--|-|-|--|||--|-|-||--||||||||--|||--|||||||||||||||||-||-|||||||||||||||||||||||||||||||||||||||--|-|-||---||---|||---||-|--||||-||-||-||-||-|------|||||||||--|||||-|-||-|-|||||||||||||||-|-|||||---|--|||-||||||-|-|||||||||||||||||||||||||||||||||||||||||||||||--||||||||||||||||||||--|||||||||||||----|||-|--|--||-||||||-|||---|-|-||--||||||---|-|--||||--||||||--|||||||-|-|||||||||||||||--||||-|-||||||||||||||||||||||||||||||||||-|||||---|||||||||---|-|--|--|--||--|-|||-||-||--|||||||-|-|||--||||--||||||||||||||||||||||||--||||||||-|||-|||--||||--|||||||||||||||||||||||-|-|-|-||||||-|--|-||||--||--|||||-|---||||---|--||-||--|||-|-|-||-|-|-|||-|-||||--|--||||||||||||||||-|---|||||-|-|||||||||||||||||--|||||||-|-||||--|||||||||||||||||||||||||||||||||||||||||||||--|-|-|||---|||--||-|----|||||--|||-||--|-||||||----||-||--|-|--|-||--|----|----||--|--||--|||-|-||||||||||||||||||||||--|||||--||--||----|||||-|-|||||||||||||---| 3218A---AAT-TG-AAGAGTTTGATC-ATGGCTCAG-A-TTGAACGCTGGCGGCAG-GCCTAA-CACATGCAAGTCGA-ACGGTAACAGGA-AG--AAGCTTGCTTCTTTGC-TGA-CG--AG-TGGCGGACGGGTGAGTAA---TGTCTGGG-AAAC-TGCCTG-A-TGGA--GGGGGATAACTAC-TGG-AAAC-GGTAG-CTAATACCG-CAT---------AAC-G--TCG---CAAGACCA-AAGA-GG-GGGACCTTCGGGCCT-C---TT-GCCATCGGATGTG-CCCAG-ATGG-G-ATTAGCTAGT-AGGTG-GGGTAACGGCTCACCTA-GGCGACGAT-C-CCTAGC-TG-GTC-TGAGAGG-ATGA-CCAG-CCACACTGG-AACTGAGACACGGTCC-AGACTCCTACGGGAGGCAGCAGT-GGGGAAT-ATT--GCACAATGG-GCGCAA-G-CCTGA-TG-CAGC--CATGCCGCGTGTA-TGA-AGAAGGCCTT--CGG-GT-TGTAAAG-TACT-TT---CAGCGGGGAGGAAGGGAGTA-AAGT----T---AATA---CCTTTG-CT-C-ATTGACG-TTACC---CGCAGAAGAAGC-ACCGGCTAACT-CCGTGCCAGCAGCCGCGGTAATACG-GAGGGTG-CAAGCGTT-AATC-GGAATTACT-GGGCGTAAAGCG-C-ACGCAGGCGGTTT-GTTAAGTC-AGATGTGAAA-TCCCC-GGGCTCAACCTGGG-A---ACTGCATCTG-ATA-CTGG--CAAGCTTGAGT-C-TCGTA-GAGG-G-G-GGTAGAATTCCA-GGTGTAGCGGTGAAATGCGTAGAGAT-CTGGAGGAA-TACC-GGTGGCGAAGGCG-GC-C-CCCTGG-AC-G-AAGACTGACGCT-CAGG-TGCGAAAGCGTGGGGAGC-AAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATG--TCGACT---TG--GAGG---TTGT--GCCC-TTGAG-GCGTGGCTTCCGGAGCTAACGC-GTTAAGTCGAC-C-GCCTGGGGAGTACGG-CCGCAAG---GTTAAAACTCAAATG-AATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGA-TGCAACGCGAAGAACCTTACC-TGGTCTTGACATCC----ACAGAACTTTCC-AGAGAT-GGA-TTGGTGCC--TTCGGG---A-ACTGTGAGACAGGTG-CTGCATGGCT-GTCGTCAGCTCGTGT-TGTGA-AATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTT-ATCTT-TTGTTGCCAGC-GGT-C-CG--GCCGG-GAACTC-AAAGG-AGACTGCCAG-TGA-TAAAC-TGGAGGAAGGTGGGGATGACGTCAA-GTCATCATGGCCC-TTA-CGACC-AGGGCTACACACGTGCTACAATGG-C-GCATACAAAGAG--AAGCGA--CCTCGCGAGAG--CAAGC-GGA--CCTCA-TAAAGTGC-GT-CGT-AGT-CCGGAT-TG-GAGTCTGCAACTCGACT-C-CATGAAG-TCGGAATCGCTAGTAATCG-TGGATCAG-AATGCC-ACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCA-TG-GGAGTGGGTTG-CAA-A---AGAAGT-AGGT-AG-CTTAACCTT---CGGGAGGGCGCTTA-CC-AC-TTTG----TG--ATTCA--TGACT-GGGGTGAAGTCGTAACAAGGTA-ACCGTA--GG--GGAACCTGCGG-TTGGATCACCTCCTT---A 3219""", 3220 ) 3221 self.assertEqual(alignment.shape, (2, 1811)) 3222 self.assertAlmostEqual(alignment.score, 1286.0) 3223 alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") 3224 self.assertAlmostEqual(alignments.score, 1286.0) 3225 message = "^number of optimal alignments is larger than (%d|%d)$" % ( 3226 2147483647, # on 32-bit systems 3227 9223372036854775807, 3228 ) # on 64-bit systems 3229 with self.assertRaisesRegex(OverflowError, message): 3230 n = len(alignments) 3231 # confirm that we can still pull out individual alignments 3232 alignment = alignments[0] 3233 self.assertEqual( 3234 str(alignment), 3235 """\ 3236ATTTA-TC-GGA-GAGTTTGATCC-TGGCTCAGGAC--GAACGCTGGCGGC-GTGCCTAAT-ACATGCAAGTCGAG-CGG-A-CAG-AT-GGGA-GCTTGCT-C----CCTGAT-GTTAGC-GGCGGACGGGTGAGTAACAC-GT--GGGTAA-CCTGCCTGTAA-G-ACTGGG--ATAACT-CC-GGGAAACCGG--GGCTAATACCGG-ATGGTTGTTTGAACCGCAT-GGTTCAA-AC-ATAA-AAGGTGG--C-TTCGG-C-TACCACTTA-C-A--G-ATG-GACCC-GC--GGCGCATTAGCTAGTT-GGTGAGG-TAACGGCTCACC-AAGGCGACGATGCG--TAGCC-GA--CCTGAGAGGG-TGATC--GGCCACACTGGGA-CTGAGACACGG-CCCAGACTCCTACGGGAGGCAGCAGTAGGG-AATC-TTCCGCA-A-TGGA-CG-AAAGTC-TGAC-GG-AGCAAC--GCCGCGTG-AGTGAT-GAAGG--TTTTCGGA-TC-GTAAAGCT-CTGTTGTT-AG-GG--A--A-G--A--ACAAGTGCCGTTCGAATAGGGC----GG-TACC-TTGACGGT-ACCTAAC-CAGAA-A-GCCAC-GGCTAACTAC-GTGCCAGCAGCCGCGGTAATACGT-AGG-TGGCAAGCGTTG--TCCGGAATTA-TTGGGCGTAAAG-GGCT-CGCAGGCGGTTTC-TTAAGTCT-GATGTGAAAG-CCCCCGG-CTCAACC-GGGGAGGG--T-CAT-TGGA-AACTGGGG-AA-CTTGAGTGCA--G-AAGAGGAGAGTGG-A-A-TTCCACG-TGTAGCGGTGAAATGCGTAGAGATG-TGGAGGAAC-ACCAG-TGGCGAAGGCGA-CTCTC--TGGT-CTGTAA--CTGACGCTG-AGGA-GCGAAAGCGTGGGGAGCGAA-CAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGT-G-CTAAGTGTT-AGGGGGTT-TCCGCCCCTT-AGTGC-TG-C------AGCTAACGCA-TTAAG-C-ACTCCGCCTGGGGAGTACGGTC-GCAAGACTG--AAA-CTCAAA-GGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAA-GCAACGCGAAGAACCTTACCA-GGTCTTGACATCCTCTGACA-A--T--CCTAGAGATAGGAC--G-T-CCCCTTCGGGGGCAGA--GTGA--CAGGTGG-TGCATGG-TTGTCGTCAGCTCGTGTC-GTGAGA-TGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTA--GTTGCCAGCA--TTCA-GTTG--GGC-A-CTCTAA-GGT-GACTGCC-GGTGAC-AAACC-GGAGGAAGGTGGGGATGACGTCAAA-TCATCATG-CCCCTTAT-GACCT-GGGCTACACACGTGCTACAATGGACAG-A-ACAAAG-GGCA-GCGAAACC--GCGAG-GTT-AAGCC--AATCC-CAC-AAA-T-CTGTTC-TCAGTTC-GGATC-GC-AGTCTGCAACTCGACTGCG--TGAAGCT-GGAATCGCTAGTAATCGC-GGATCAGCA-TGCCG-CGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCAC-GAG-AGT---TTGT-AACACCC-GAAGTC-GGTGAGG-T-AACCTTTTA-GG-AG--C-C--AGCCG-CC---GAAGGTGGGA--CAGATGA-TTGGGGTGAAGTCGTAACAAGGTAG-CCGTATCGGAAGG----TGCGGCT-GGATCACCTCCTTTCTA 3237|---|-|--|-|-||||||||||--||||||||-|---|||||||||||||-|-||||||--|||||||||||||--|||-|-|||-|--|--|-|||||||-|----|-|||--|--||--|||||||||||||||||----||--|||-||-|-||||||-|--|-|--|||--||||||-|--||-||||-||--|-|||||||||--||---------|||-|--|-|---|||-||-|-||-|-||-||--|-|||||-|-|-|---||--|-|--|-|||-|-|||-|---||-|-||||||||||--||||-||-||||||||||||-|-|||||||||-|---||||--|---|-|||||||--|||-|--|-|||||||||-|-|||||||||||-||-|||||||||||||||||||||||-|||-|||--||--|||-|-|||--||-||-|-|-|||--|--|||--|--||||||||-|-|||--|||||--||--|||--|--||||||-|-||-||----||-||--|--|-|--|--|-||||----|---||||---|----|--|-|--||||||-|-|||---|-|||||-|-||-||-||||||||-|-|||||||||||||||||||||||--|||-||-||||||||---||-|||||||-|-||||||||||-|-|--||||||||||||--|||||||--|||||||||--||||-||-|||||||-|||-|-----|-|||-||-|-|-||||---||-|||||||-|---|-|-||||-|-|-||-|-|-|||||-|-||||||||||||||||||||||||--||||||||--|||-|-|||||||||||--|-|-|--|||--|-|-||--||||||||--|||--|||||||||||||||||-||-|||||||||||||||||||||||||||||||||||||||--|-|-||---||---|||---||-|--||||-||-||-||-||-|------|||||||||--|||||-|-||-|-|||||||||||||||-|-|||||---|--|||-||||||-|-|||||||||||||||||||||||||||||||||||||||||||||||--||||||||||||||||||||--|||||||||||||----|||-|--|--||-||||||-|||---|-|-||--||||||---|-|--||||--||||||--|||||||-|-|||||||||||||||--||||-|-||||||||||||||||||||||||||||||||||-|||||---|||||||||---|-|--|--|--||--|-|||-||-||--|||||||-|-|||--||||--||||||||||||||||||||||||--||||||||-|||-|||--||||--|||||||||||||||||||||||-|-|-|-||||||-|--|-||||--||--|||||-|---||||---|--||-||--|||-|-|-||-|-|-|||-|-||||--|--||||||||||||||||-|---|||||-|-|||||||||||||||||--|||||||-|-||||--|||||||||||||||||||||||||||||||||||||||||||||--|-|-|||---|||--||-|----|||||--|||-||--|-||||||----||-||--|-|--|-||--|----|----||--|--||--|||-|-||||||||||||||||||||||--|||||--||--||----|||||-|-|||||||||||||---| 3238A---AAT-TG-AAGAGTTTGATC-ATGGCTCAG-A-TTGAACGCTGGCGGCAG-GCCTAA-CACATGCAAGTCGA-ACGGTAACAGGA-AG--AAGCTTGCTTCTTTGC-TGA-CG--AG-TGGCGGACGGGTGAGTAA---TGTCTGGG-AAAC-TGCCTG-A-TGGA--GGGGGATAACTAC-TGG-AAAC-GGTAG-CTAATACCG-CAT---------AAC-G--TCG---CAAGACCA-AAGA-GG-GGGACCTTCGGGCCT-C---TT-GCCATCGGATGTG-CCCAG-ATGG-G-ATTAGCTAGT-AGGTG-GGGTAACGGCTCACCTA-GGCGACGAT-C-CCTAGC-TG-GTC-TGAGAGG-ATGA-CCAG-CCACACTGG-AACTGAGACACGGTCC-AGACTCCTACGGGAGGCAGCAGT-GGGGAAT-ATT--GCACAATGG-GCGCAA-G-CCTGA-TG-CAGC--CATGCCGCGTGTA-TGA-AGAAGGCCTT--CGG-GT-TGTAAAG-TACT-TT---CAGCGGGGAGGAAGGGAGTA-AAGT----T---AATA---CCTTTG-CT-C-ATTGACG-TTACC---CGCAGAAGAAGC-ACCGGCTAACT-CCGTGCCAGCAGCCGCGGTAATACG-GAGGGTG-CAAGCGTT-AATC-GGAATTACT-GGGCGTAAAGCG-C-ACGCAGGCGGTTT-GTTAAGTC-AGATGTGAAA-TCCCC-GGGCTCAACCTGGG-A---ACTGCATCTG-ATA-CTGG--CAAGCTTGAGT-C-TCGTA-GAGG-G-G-GGTAGAATTCCA-GGTGTAGCGGTGAAATGCGTAGAGAT-CTGGAGGAA-TACC-GGTGGCGAAGGCG-GC-C-CCCTGG-AC-G-AAGACTGACGCT-CAGG-TGCGAAAGCGTGGGGAGC-AAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATG--TCGACT---TG--GAGG---TTGT--GCCC-TTGAG-GCGTGGCTTCCGGAGCTAACGC-GTTAAGTCGAC-C-GCCTGGGGAGTACGG-CCGCAAG---GTTAAAACTCAAATG-AATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGA-TGCAACGCGAAGAACCTTACC-TGGTCTTGACATCC----ACAGAACTTTCC-AGAGAT-GGA-TTGGTGCC--TTCGGG---A-ACTGTGAGACAGGTG-CTGCATGGCT-GTCGTCAGCTCGTGT-TGTGA-AATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTT-ATCTT-TTGTTGCCAGC-GGT-C-CG--GCCGG-GAACTC-AAAGG-AGACTGCCAG-TGA-TAAAC-TGGAGGAAGGTGGGGATGACGTCAA-GTCATCATGGCCC-TTA-CGACC-AGGGCTACACACGTGCTACAATGG-C-GCATACAAAGAG--AAGCGA--CCTCGCGAGAG--CAAGC-GGA--CCTCA-TAAAGTGC-GT-CGT-AGT-CCGGAT-TG-GAGTCTGCAACTCGACT-C-CATGAAG-TCGGAATCGCTAGTAATCG-TGGATCAG-AATGCC-ACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCA-TG-GGAGTGGGTTG-CAA-A---AGAAGT-AGGT-AG-CTTAACCTT---CGGGAGGGCGCTTA-CC-AC-TTTG----TG--ATTCA--TGACT-GGGGTGAAGTCGTAACAAGGTA-ACCGTA--GG--GGAACCTGCGG-TTGGATCACCTCCTT---A 3239""", 3240 ) 3241 self.assertAlmostEqual(alignment.score, 1286.0) 3242 self.assertEqual(alignment.shape, (2, 1811)) 3243 3244 3245class TestKeywordArgumentsConstructor(unittest.TestCase): 3246 def test_confusing_arguments(self): 3247 aligner = Align.PairwiseAligner( 3248 mode="local", 3249 open_gap_score=-0.3, 3250 extend_gap_score=-0.1, 3251 target_open_gap_score=-0.2, 3252 ) 3253 self.assertEqual( 3254 str(aligner), 3255 """\ 3256Pairwise sequence aligner with parameters 3257 wildcard: None 3258 match_score: 1.000000 3259 mismatch_score: 0.000000 3260 target_internal_open_gap_score: -0.200000 3261 target_internal_extend_gap_score: -0.100000 3262 target_left_open_gap_score: -0.200000 3263 target_left_extend_gap_score: -0.100000 3264 target_right_open_gap_score: -0.200000 3265 target_right_extend_gap_score: -0.100000 3266 query_internal_open_gap_score: -0.300000 3267 query_internal_extend_gap_score: -0.100000 3268 query_left_open_gap_score: -0.300000 3269 query_left_extend_gap_score: -0.100000 3270 query_right_open_gap_score: -0.300000 3271 query_right_extend_gap_score: -0.100000 3272 mode: local 3273""", 3274 ) 3275 3276 3277class TestUnicodeStrings(unittest.TestCase): 3278 def test_needlemanwunsch_simple1(self): 3279 seq1 = "ĞĀĀČŦ" 3280 seq2 = "ĞĀŦ" 3281 aligner = Align.PairwiseAligner() 3282 aligner.mode = "global" 3283 aligner.alphabet = None 3284 self.assertEqual(aligner.algorithm, "Needleman-Wunsch") 3285 score = aligner.score(seq1, seq2) 3286 self.assertAlmostEqual(score, 3.0) 3287 alignments = aligner.align(seq1, seq2) 3288 self.assertEqual(len(alignments), 2) 3289 alignment = alignments[0] 3290 self.assertAlmostEqual(alignment.score, 3.0) 3291 self.assertEqual( 3292 str(alignment), 3293 """\ 3294ĞĀĀČŦ 3295||--| 3296ĞĀ--Ŧ 3297""", 3298 ) 3299 self.assertEqual(alignment.shape, (2, 5)) 3300 self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((0, 2), (2, 3)))) 3301 alignment = alignments[1] 3302 self.assertAlmostEqual(alignment.score, 3.0) 3303 self.assertEqual( 3304 str(alignment), 3305 """\ 3306ĞĀĀČŦ 3307|-|-| 3308Ğ-Ā-Ŧ 3309""", 3310 ) 3311 self.assertEqual(alignment.shape, (2, 5)) 3312 self.assertEqual( 3313 alignment.aligned, (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3))) 3314 ) 3315 3316 def test_align_affine1_score(self): 3317 aligner = Align.PairwiseAligner() 3318 aligner.mode = "global" 3319 aligner.alphabet = None 3320 aligner.match_score = 0 3321 aligner.mismatch_score = -1 3322 aligner.open_gap_score = -5 3323 aligner.extend_gap_score = -1 3324 self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm") 3325 score = aligner.score("いい", "あいいう") 3326 self.assertAlmostEqual(score, -7.0) 3327 3328 def test_smithwaterman(self): 3329 aligner = Align.PairwiseAligner() 3330 aligner.mode = "local" 3331 aligner.alphabet = None 3332 aligner.gap_score = -0.1 3333 self.assertEqual(aligner.algorithm, "Smith-Waterman") 3334 score = aligner.score("ℵℷℶℷ", "ℸℵℶℸ") 3335 self.assertAlmostEqual(score, 1.9) 3336 alignments = aligner.align("ℵℷℶℷ", "ℸℵℶℸ") 3337 self.assertEqual(len(alignments), 1) 3338 alignment = alignments[0] 3339 self.assertAlmostEqual(alignment.score, 1.9) 3340 self.assertEqual( 3341 str(alignment), 3342 """\ 3343 ℵℷℶℷ 3344 |-| 3345ℸℵ-ℶℸ 3346""", 3347 ) 3348 self.assertEqual(alignment.shape, (2, 3)) 3349 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3)))) 3350 3351 def test_gotoh_local(self): 3352 aligner = Align.PairwiseAligner() 3353 aligner.alphabet = None 3354 aligner.mode = "local" 3355 aligner.open_gap_score = -0.1 3356 aligner.extend_gap_score = 0.0 3357 self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm") 3358 score = aligner.score("生物科物", "学生科学") 3359 self.assertAlmostEqual(score, 1.9) 3360 alignments = aligner.align("生物科物", "学生科学") 3361 self.assertEqual(len(alignments), 1) 3362 alignment = alignments[0] 3363 self.assertAlmostEqual(alignment.score, 1.9) 3364 self.assertEqual( 3365 str(alignment), 3366 """\ 3367 生物科物 3368 |-| 3369学生-科学 3370""", 3371 ) 3372 self.assertEqual(alignment.shape, (2, 3)) 3373 self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3)))) 3374 3375 3376class TestAlignerPickling(unittest.TestCase): 3377 def test_pickle_aligner_match_mismatch(self): 3378 import pickle 3379 3380 aligner = Align.PairwiseAligner() 3381 aligner.wildcard = "X" 3382 aligner.match_score = 3 3383 aligner.mismatch_score = -2 3384 aligner.target_internal_open_gap_score = -2.5 3385 aligner.target_internal_extend_gap_score = -3.5 3386 aligner.target_left_open_gap_score = -2.5 3387 aligner.target_left_extend_gap_score = -3.5 3388 aligner.target_right_open_gap_score = -4 3389 aligner.target_right_extend_gap_score = -4 3390 aligner.query_internal_open_gap_score = -0.1 3391 aligner.query_internal_extend_gap_score = -2 3392 aligner.query_left_open_gap_score = -9 3393 aligner.query_left_extend_gap_score = +1 3394 aligner.query_right_open_gap_score = -1 3395 aligner.query_right_extend_gap_score = -2 3396 aligner.mode = "local" 3397 state = pickle.dumps(aligner) 3398 pickled_aligner = pickle.loads(state) 3399 self.assertEqual(aligner.wildcard, pickled_aligner.wildcard) 3400 self.assertAlmostEqual(aligner.match_score, pickled_aligner.match_score) 3401 self.assertAlmostEqual(aligner.mismatch_score, pickled_aligner.mismatch_score) 3402 self.assertIsNone(pickled_aligner.substitution_matrix) 3403 self.assertAlmostEqual( 3404 aligner.target_internal_open_gap_score, 3405 pickled_aligner.target_internal_open_gap_score, 3406 ) 3407 self.assertAlmostEqual( 3408 aligner.target_internal_extend_gap_score, 3409 pickled_aligner.target_internal_extend_gap_score, 3410 ) 3411 self.assertAlmostEqual( 3412 aligner.target_left_open_gap_score, 3413 pickled_aligner.target_left_open_gap_score, 3414 ) 3415 self.assertAlmostEqual( 3416 aligner.target_left_extend_gap_score, 3417 pickled_aligner.target_left_extend_gap_score, 3418 ) 3419 self.assertAlmostEqual( 3420 aligner.target_right_open_gap_score, 3421 pickled_aligner.target_right_open_gap_score, 3422 ) 3423 self.assertAlmostEqual( 3424 aligner.target_right_extend_gap_score, 3425 pickled_aligner.target_right_extend_gap_score, 3426 ) 3427 self.assertAlmostEqual( 3428 aligner.query_internal_open_gap_score, 3429 pickled_aligner.query_internal_open_gap_score, 3430 ) 3431 self.assertAlmostEqual( 3432 aligner.query_internal_extend_gap_score, 3433 pickled_aligner.query_internal_extend_gap_score, 3434 ) 3435 self.assertAlmostEqual( 3436 aligner.query_left_open_gap_score, 3437 pickled_aligner.query_left_open_gap_score, 3438 ) 3439 self.assertAlmostEqual( 3440 aligner.query_left_extend_gap_score, 3441 pickled_aligner.query_left_extend_gap_score, 3442 ) 3443 self.assertAlmostEqual( 3444 aligner.query_right_open_gap_score, 3445 pickled_aligner.query_right_open_gap_score, 3446 ) 3447 self.assertAlmostEqual( 3448 aligner.query_right_extend_gap_score, 3449 pickled_aligner.query_right_extend_gap_score, 3450 ) 3451 self.assertEqual(aligner.mode, pickled_aligner.mode) 3452 3453 def test_pickle_aligner_substitution_matrix(self): 3454 try: 3455 from Bio.Align import substitution_matrices 3456 except ImportError: 3457 return 3458 import pickle 3459 3460 aligner = Align.PairwiseAligner() 3461 aligner.wildcard = "N" 3462 aligner.substitution_matrix = substitution_matrices.load("BLOSUM80") 3463 aligner.target_internal_open_gap_score = -5 3464 aligner.target_internal_extend_gap_score = -3 3465 aligner.target_left_open_gap_score = -2 3466 aligner.target_left_extend_gap_score = -3 3467 aligner.target_right_open_gap_score = -4.5 3468 aligner.target_right_extend_gap_score = -4.3 3469 aligner.query_internal_open_gap_score = -2 3470 aligner.query_internal_extend_gap_score = -2.5 3471 aligner.query_left_open_gap_score = -9.1 3472 aligner.query_left_extend_gap_score = +1.7 3473 aligner.query_right_open_gap_score = -1.9 3474 aligner.query_right_extend_gap_score = -2.0 3475 aligner.mode = "global" 3476 state = pickle.dumps(aligner) 3477 pickled_aligner = pickle.loads(state) 3478 self.assertEqual(aligner.wildcard, pickled_aligner.wildcard) 3479 self.assertIsNone(pickled_aligner.match_score) 3480 self.assertIsNone(pickled_aligner.mismatch_score) 3481 self.assertTrue( 3482 (aligner.substitution_matrix == pickled_aligner.substitution_matrix).all() 3483 ) 3484 self.assertEqual( 3485 aligner.substitution_matrix.alphabet, 3486 pickled_aligner.substitution_matrix.alphabet, 3487 ) 3488 self.assertAlmostEqual( 3489 aligner.target_internal_open_gap_score, 3490 pickled_aligner.target_internal_open_gap_score, 3491 ) 3492 self.assertAlmostEqual( 3493 aligner.target_internal_extend_gap_score, 3494 pickled_aligner.target_internal_extend_gap_score, 3495 ) 3496 self.assertAlmostEqual( 3497 aligner.target_left_open_gap_score, 3498 pickled_aligner.target_left_open_gap_score, 3499 ) 3500 self.assertAlmostEqual( 3501 aligner.target_left_extend_gap_score, 3502 pickled_aligner.target_left_extend_gap_score, 3503 ) 3504 self.assertAlmostEqual( 3505 aligner.target_right_open_gap_score, 3506 pickled_aligner.target_right_open_gap_score, 3507 ) 3508 self.assertAlmostEqual( 3509 aligner.target_right_extend_gap_score, 3510 pickled_aligner.target_right_extend_gap_score, 3511 ) 3512 self.assertAlmostEqual( 3513 aligner.query_internal_open_gap_score, 3514 pickled_aligner.query_internal_open_gap_score, 3515 ) 3516 self.assertAlmostEqual( 3517 aligner.query_internal_extend_gap_score, 3518 pickled_aligner.query_internal_extend_gap_score, 3519 ) 3520 self.assertAlmostEqual( 3521 aligner.query_left_open_gap_score, 3522 pickled_aligner.query_left_open_gap_score, 3523 ) 3524 self.assertAlmostEqual( 3525 aligner.query_left_extend_gap_score, 3526 pickled_aligner.query_left_extend_gap_score, 3527 ) 3528 self.assertAlmostEqual( 3529 aligner.query_right_open_gap_score, 3530 pickled_aligner.query_right_open_gap_score, 3531 ) 3532 self.assertAlmostEqual( 3533 aligner.query_right_extend_gap_score, 3534 pickled_aligner.query_right_extend_gap_score, 3535 ) 3536 self.assertEqual(aligner.mode, pickled_aligner.mode) 3537 3538 3539class TestAlignmentFormat(unittest.TestCase): 3540 def test_alignment_simple(self): 3541 chromosome = "ACGATCAGCGAGCATNGAGCACTACGACAGCGAGTGACCACTATTCGCGATCAGGAGCAGATACTTTACGAGCATCGGC" 3542 transcript = "AGCATCGAGCGACTTGAGTACTATTCATACTTTCGAGC" 3543 aligner = Align.PairwiseAligner() 3544 aligner.query_extend_gap_score = 0 3545 aligner.query_open_gap_score = -3 3546 aligner.target_gap_score = -3 3547 aligner.end_gap_score = 0 3548 aligner.mismatch = -1 3549 alignments = aligner.align(chromosome, transcript) 3550 self.assertEqual(len(alignments), 1) 3551 alignment = alignments[0] 3552 self.assertAlmostEqual(alignment.score, 19.0) 3553 self.assertEqual( 3554 str(alignment), 3555 """\ 3556ACGATCAGCGAGCATNGAGC-ACTACGACAGCGAGTGACCACTATTCGCGATCAGGAGCAGATACTTTACGAGCATCGGC 3557----------|||||.||||-|||-----------|||..|||||||--------------|||||||-|||||------ 3558----------AGCATCGAGCGACT-----------TGAGTACTATTC--------------ATACTTT-CGAGC------ 3559""", 3560 ) 3561 self.assertEqual(alignment.shape, (2, 80)) 3562 self.assertEqual( 3563 alignment.format("psl"), 3564 """\ 356534 2 0 1 1 1 3 26 + query 38 0 38 target 79 10 73 5 10,3,12,7,5, 0,11,14,26,33, 10,20,34,60,68, 3566""", 3567 ) 3568 self.assertEqual( 3569 alignment.format("bed"), 3570 """\ 3571target 10 73 query 19.0 + 10 73 0 5 10,3,12,7,5, 0,10,24,50,58, 3572""", 3573 ) 3574 self.assertEqual( 3575 alignment.format("sam"), 3576 """\ 3577query 0 target 11 255 10M1I3M11D12M14D7M1D5M * 0 0 AGCATCGAGCGACTTGAGTACTATTCATACTTTCGAGC * AS:i:19 3578""", 3579 ) 3580 alignments = aligner.align( 3581 chromosome, reverse_complement(transcript), strand="-" 3582 ) 3583 self.assertEqual(len(alignments), 1) 3584 alignment = alignments[0] 3585 self.assertAlmostEqual(alignment.score, 19.0) 3586 self.assertEqual( 3587 str(alignment), 3588 """\ 3589ACGATCAGCGAGCATNGAGC-ACTACGACAGCGAGTGACCACTATTCGCGATCAGGAGCAGATACTTTACGAGCATCGGC 3590----------|||||.||||-|||-----------|||..|||||||--------------|||||||-|||||------ 3591----------AGCATCGAGCGACT-----------TGAGTACTATTC--------------ATACTTT-CGAGC------ 3592""", 3593 ) 3594 self.assertEqual(alignment.shape, (2, 80)) 3595 self.assertEqual( 3596 alignment.format("psl"), 3597 """\ 359834 2 0 1 1 1 3 26 - query 38 0 38 target 79 10 73 5 10,3,12,7,5, 0,11,14,26,33, 10,20,34,60,68, 3599""", 3600 ) 3601 self.assertEqual( 3602 alignment.format("bed"), 3603 """\ 3604target 10 73 query 19.0 - 10 73 0 5 10,3,12,7,5, 0,10,24,50,58, 3605""", 3606 ) 3607 self.assertEqual( 3608 alignment.format("sam"), 3609 """\ 3610query 16 target 11 255 10M1I3M11D12M14D7M1D5M * 0 0 AGCATCGAGCGACTTGAGTACTATTCATACTTTCGAGC * AS:i:19 3611""", 3612 ) 3613 3614 def test_alignment_end_gap(self): 3615 aligner = Align.PairwiseAligner() 3616 aligner.gap_score = -1 3617 aligner.end_gap_score = 0 3618 aligner.mismatch = -10 3619 alignments = aligner.align("ACGTAGCATCAGC", "CCCCACGTAGCATCAGC") 3620 self.assertEqual(len(alignments), 1) 3621 self.assertAlmostEqual(alignments.score, 13.0) 3622 alignment = alignments[0] 3623 self.assertEqual( 3624 str(alignment), 3625 """\ 3626----ACGTAGCATCAGC 3627----||||||||||||| 3628CCCCACGTAGCATCAGC 3629""", 3630 ) 3631 self.assertEqual(alignment.shape, (2, 17)) 3632 self.assertEqual( 3633 alignment.format("psl"), 3634 """\ 363513 0 0 0 0 0 0 0 + query 17 4 17 target 13 0 13 1 13, 4, 0, 3636""", 3637 ) 3638 self.assertEqual( 3639 alignment.format("bed"), 3640 """\ 3641target 0 13 query 13.0 + 0 13 0 1 13, 0, 3642""", 3643 ) 3644 self.assertEqual( 3645 alignment.format("sam"), 3646 """\ 3647query 0 target 1 255 4S13M * 0 0 CCCCACGTAGCATCAGC * AS:i:13 3648""", 3649 ) 3650 alignments = aligner.align( 3651 "ACGTAGCATCAGC", reverse_complement("CCCCACGTAGCATCAGC"), strand="-" 3652 ) 3653 self.assertEqual(len(alignments), 1) 3654 self.assertAlmostEqual(alignments.score, 13.0) 3655 alignment = alignments[0] 3656 self.assertEqual( 3657 str(alignment), 3658 """\ 3659----ACGTAGCATCAGC 3660----||||||||||||| 3661CCCCACGTAGCATCAGC 3662""", 3663 ) 3664 self.assertEqual(alignment.shape, (2, 17)) 3665 self.assertEqual( 3666 alignment.format("psl"), 3667 """\ 366813 0 0 0 0 0 0 0 - query 17 0 13 target 13 0 13 1 13, 4, 0, 3669""", 3670 ) 3671 self.assertEqual( 3672 alignment.format("bed"), 3673 """\ 3674target 0 13 query 13.0 - 0 13 0 1 13, 0, 3675""", 3676 ) 3677 self.assertEqual( 3678 alignment.format("sam"), 3679 """\ 3680query 16 target 1 255 4S13M * 0 0 CCCCACGTAGCATCAGC * AS:i:13 3681""", 3682 ) 3683 alignments = aligner.align("CCCCACGTAGCATCAGC", "ACGTAGCATCAGC") 3684 self.assertEqual(len(alignments), 1) 3685 alignment = alignments[0] 3686 self.assertAlmostEqual(alignment.score, 13.0) 3687 self.assertEqual( 3688 str(alignment), 3689 """\ 3690CCCCACGTAGCATCAGC 3691----||||||||||||| 3692----ACGTAGCATCAGC 3693""", 3694 ) 3695 self.assertEqual(alignment.shape, (2, 17)) 3696 self.assertEqual( 3697 alignment.format("psl"), 3698 """\ 369913 0 0 0 0 0 0 0 + query 13 0 13 target 17 4 17 1 13, 0, 4, 3700""", 3701 ) 3702 self.assertEqual( 3703 alignment.format("bed"), 3704 """\ 3705target 4 17 query 13.0 + 4 17 0 1 13, 0, 3706""", 3707 ) 3708 self.assertEqual( 3709 alignment.format("sam"), 3710 """\ 3711query 0 target 5 255 13M * 0 0 ACGTAGCATCAGC * AS:i:13 3712""", 3713 ) 3714 alignments = aligner.align( 3715 "CCCCACGTAGCATCAGC", reverse_complement("ACGTAGCATCAGC"), strand="-" 3716 ) 3717 self.assertEqual(len(alignments), 1) 3718 alignment = alignments[0] 3719 self.assertAlmostEqual(alignment.score, 13.0) 3720 self.assertEqual( 3721 str(alignment), 3722 """\ 3723CCCCACGTAGCATCAGC 3724----||||||||||||| 3725----ACGTAGCATCAGC 3726""", 3727 ) 3728 self.assertEqual(alignment.shape, (2, 17)) 3729 self.assertEqual( 3730 alignment.format("psl"), 3731 """\ 373213 0 0 0 0 0 0 0 - query 13 0 13 target 17 4 17 1 13, 0, 4, 3733""", 3734 ) 3735 self.assertEqual( 3736 alignment.format("bed"), 3737 """\ 3738target 4 17 query 13.0 - 4 17 0 1 13, 0, 3739""", 3740 ) 3741 self.assertEqual( 3742 alignment.format("sam"), 3743 """\ 3744query 16 target 5 255 13M * 0 0 ACGTAGCATCAGC * AS:i:13 3745""", 3746 ) 3747 alignments = aligner.align("ACGTAGCATCAGC", "ACGTAGCATCAGCGGGG") 3748 self.assertEqual(len(alignments), 1) 3749 alignment = alignments[0] 3750 self.assertEqual( 3751 str(alignment), 3752 """\ 3753ACGTAGCATCAGC---- 3754|||||||||||||---- 3755ACGTAGCATCAGCGGGG 3756""", 3757 ) 3758 self.assertEqual(alignment.shape, (2, 17)) 3759 self.assertEqual( 3760 alignment.format("psl"), 3761 """\ 376213 0 0 0 0 0 0 0 + query 17 0 13 target 13 0 13 1 13, 0, 0, 3763""", 3764 ) 3765 self.assertEqual( 3766 alignment.format("bed"), 3767 """\ 3768target 0 13 query 13.0 + 0 13 0 1 13, 0, 3769""", 3770 ) 3771 self.assertEqual( 3772 alignment.format("sam"), 3773 """\ 3774query 0 target 1 255 13M4S * 0 0 ACGTAGCATCAGCGGGG * AS:i:13 3775""", 3776 ) 3777 alignments = aligner.align( 3778 "ACGTAGCATCAGC", reverse_complement("ACGTAGCATCAGCGGGG"), strand="-" 3779 ) 3780 self.assertEqual(len(alignments), 1) 3781 alignment = alignments[0] 3782 self.assertEqual( 3783 str(alignment), 3784 """\ 3785ACGTAGCATCAGC---- 3786|||||||||||||---- 3787ACGTAGCATCAGCGGGG 3788""", 3789 ) 3790 self.assertEqual(alignment.shape, (2, 17)) 3791 self.assertEqual( 3792 alignment.format("psl"), 3793 """\ 379413 0 0 0 0 0 0 0 - query 17 4 17 target 13 0 13 1 13, 0, 0, 3795""", 3796 ) 3797 self.assertEqual( 3798 alignment.format("bed"), 3799 """\ 3800target 0 13 query 13.0 - 0 13 0 1 13, 0, 3801""", 3802 ) 3803 self.assertEqual( 3804 alignment.format("sam"), 3805 """\ 3806query 16 target 1 255 13M4S * 0 0 ACGTAGCATCAGCGGGG * AS:i:13 3807""", 3808 ) 3809 alignments = aligner.align("ACGTAGCATCAGCGGGG", "ACGTAGCATCAGC") 3810 self.assertEqual(len(alignments), 1) 3811 alignment = alignments[0] 3812 self.assertAlmostEqual(alignment.score, 13.0) 3813 self.assertEqual( 3814 str(alignment), 3815 """\ 3816ACGTAGCATCAGCGGGG 3817|||||||||||||---- 3818ACGTAGCATCAGC---- 3819""", 3820 ) 3821 self.assertEqual(alignment.shape, (2, 17)) 3822 self.assertEqual( 3823 alignment.format("psl"), 3824 """\ 382513 0 0 0 0 0 0 0 + query 13 0 13 target 17 0 13 1 13, 0, 0, 3826""", 3827 ) 3828 self.assertEqual( 3829 alignment.format("bed"), 3830 """\ 3831target 0 13 query 13.0 + 0 13 0 1 13, 0, 3832""", 3833 ) 3834 self.assertEqual( 3835 alignment.format("sam"), 3836 """\ 3837query 0 target 1 255 13M * 0 0 ACGTAGCATCAGC * AS:i:13 3838""", 3839 ) 3840 alignments = aligner.align( 3841 "ACGTAGCATCAGCGGGG", reverse_complement("ACGTAGCATCAGC"), strand="-" 3842 ) 3843 self.assertEqual(len(alignments), 1) 3844 alignment = alignments[0] 3845 self.assertAlmostEqual(alignment.score, 13.0) 3846 self.assertEqual( 3847 str(alignment), 3848 """\ 3849ACGTAGCATCAGCGGGG 3850|||||||||||||---- 3851ACGTAGCATCAGC---- 3852""", 3853 ) 3854 self.assertEqual(alignment.shape, (2, 17)) 3855 self.assertEqual( 3856 alignment.format("psl"), 3857 """\ 385813 0 0 0 0 0 0 0 - query 13 0 13 target 17 0 13 1 13, 0, 0, 3859""", 3860 ) 3861 self.assertEqual( 3862 alignment.format("bed"), 3863 """\ 3864target 0 13 query 13.0 - 0 13 0 1 13, 0, 3865""", 3866 ) 3867 self.assertEqual( 3868 alignment.format("sam"), 3869 """\ 3870query 16 target 1 255 13M * 0 0 ACGTAGCATCAGC * AS:i:13 3871""", 3872 ) 3873 3874 def test_alignment_wildcard(self): 3875 aligner = Align.PairwiseAligner() 3876 aligner.gap_score = -10 3877 aligner.end_gap_score = 0 3878 aligner.mismatch = -2 3879 aligner.wildcard = "N" 3880 target = "TTTTTNACGCTCGAGCAGCTACG" 3881 query = "ACGATCGAGCNGCTACGCCCNC" 3882 # use strings for target and query 3883 alignments = aligner.align(target, query) 3884 self.assertEqual(len(alignments), 1) 3885 alignment = alignments[0] 3886 self.assertEqual( 3887 str(alignment), 3888 """\ 3889TTTTTNACGCTCGAGCAGCTACG----- 3890------|||.||||||.||||||----- 3891------ACGATCGAGCNGCTACGCCCNC 3892""", 3893 ) 3894 self.assertEqual(alignment.shape, (2, 28)) 3895 self.assertEqual( 3896 alignment.format("psl"), 3897 """\ 389815 1 0 1 0 0 0 0 + query 22 0 17 target 23 6 23 1 17, 0, 6, 3899""", 3900 ) 3901 self.assertEqual( 3902 alignment.format("bed"), 3903 """\ 3904target 6 23 query 13.0 + 6 23 0 1 17, 0, 3905""", 3906 ) 3907 self.assertEqual( 3908 alignment.format("sam"), 3909 """\ 3910query 0 target 7 255 17M5S * 0 0 ACGATCGAGCNGCTACGCCCNC * AS:i:13 3911""", 3912 ) 3913 alignments = aligner.align(target, reverse_complement(query), strand="-") 3914 self.assertEqual(len(alignments), 1) 3915 alignment = alignments[0] 3916 self.assertEqual( 3917 str(alignment), 3918 """\ 3919TTTTTNACGCTCGAGCAGCTACG----- 3920------|||.||||||.||||||----- 3921------ACGATCGAGCNGCTACGCCCNC 3922""", 3923 ) 3924 self.assertEqual(alignment.shape, (2, 28)) 3925 self.assertEqual( 3926 alignment.format("psl"), 3927 """\ 392815 1 0 1 0 0 0 0 - query 22 5 22 target 23 6 23 1 17, 0, 6, 3929""", 3930 ) 3931 self.assertEqual( 3932 alignment.format("bed"), 3933 """\ 3934target 6 23 query 13.0 - 6 23 0 1 17, 0, 3935""", 3936 ) 3937 self.assertEqual( 3938 alignment.format("sam"), 3939 """\ 3940query 16 target 7 255 17M5S * 0 0 ACGATCGAGCNGCTACGCCCNC * AS:i:13 3941""", 3942 ) 3943 # use Seq objects for target and query 3944 alignments = aligner.align(Seq(target), Seq(query)) 3945 self.assertEqual(len(alignments), 1) 3946 alignment = alignments[0] 3947 self.assertEqual( 3948 str(alignment), 3949 """\ 3950TTTTTNACGCTCGAGCAGCTACG----- 3951------|||.||||||.||||||----- 3952------ACGATCGAGCNGCTACGCCCNC 3953""", 3954 ) 3955 self.assertEqual(alignment.shape, (2, 28)) 3956 self.assertEqual( 3957 alignment.format("psl"), 3958 """\ 395915 1 0 1 0 0 0 0 + query 22 0 17 target 23 6 23 1 17, 0, 6, 3960""", 3961 ) 3962 self.assertEqual( 3963 alignment.format("bed"), 3964 """\ 3965target 6 23 query 13.0 + 6 23 0 1 17, 0, 3966""", 3967 ) 3968 self.assertEqual( 3969 alignment.format("sam"), 3970 """\ 3971query 0 target 7 255 17M5S * 0 0 ACGATCGAGCNGCTACGCCCNC * AS:i:13 3972""", 3973 ) 3974 alignments = aligner.align( 3975 Seq(target), Seq(query).reverse_complement(), strand="-" 3976 ) 3977 self.assertEqual(len(alignments), 1) 3978 alignment = alignments[0] 3979 self.assertEqual( 3980 str(alignment), 3981 """\ 3982TTTTTNACGCTCGAGCAGCTACG----- 3983------|||.||||||.||||||----- 3984------ACGATCGAGCNGCTACGCCCNC 3985""", 3986 ) 3987 self.assertEqual(alignment.shape, (2, 28)) 3988 self.assertEqual( 3989 alignment.format("psl"), 3990 """\ 399115 1 0 1 0 0 0 0 - query 22 5 22 target 23 6 23 1 17, 0, 6, 3992""", 3993 ) 3994 self.assertEqual( 3995 alignment.format("bed"), 3996 """\ 3997target 6 23 query 13.0 - 6 23 0 1 17, 0, 3998""", 3999 ) 4000 self.assertEqual( 4001 alignment.format("sam"), 4002 """\ 4003query 16 target 7 255 17M5S * 0 0 ACGATCGAGCNGCTACGCCCNC * AS:i:13 4004""", 4005 ) 4006 4007 4008class TestAlignmentMethods(unittest.TestCase): 4009 def test_indexing_slicing(self): 4010 aligner = Align.PairwiseAligner() 4011 alignments = aligner.align("AACCGGGACCG", "ACGGAAC") 4012 self.assertEqual(len(alignments), 88) 4013 alignment = alignments[0] 4014 self.assertEqual( 4015 str(alignment), 4016 """\ 4017AACCGGGA-CCG 4018|-|-||-|-|-- 4019A-C-GG-AAC-- 4020""", 4021 ) 4022 self.assertAlmostEqual(alignment.score, 6.0) 4023 self.assertAlmostEqual(alignment[:, :].score, 6.0) 4024 self.assertEqual( 4025 str(alignment[:, :]), 4026 """\ 4027AACCGGGA-CCG 4028|-|-||-|-|-- 4029A-C-GG-AAC-- 4030""", 4031 ) 4032 self.assertAlmostEqual(alignment[:, 0:].score, 6.0) 4033 self.assertEqual( 4034 str(alignment[:, 0:]), 4035 """\ 4036AACCGGGA-CCG 4037|-|-||-|-|-- 4038A-C-GG-AAC-- 4039""", 4040 ) 4041 self.assertAlmostEqual(alignment[:, :12].score, 6.0) 4042 self.assertEqual( 4043 str(alignment[:, :12]), 4044 """\ 4045AACCGGGA-CCG 4046|-|-||-|-|-- 4047A-C-GG-AAC-- 4048""", 4049 ) 4050 self.assertAlmostEqual(alignment[:, 0:12].score, 6.0) 4051 self.assertEqual( 4052 str(alignment[:, 0:12]), 4053 """\ 4054AACCGGGA-CCG 4055|-|-||-|-|-- 4056A-C-GG-AAC-- 4057""", 4058 ) 4059 self.assertIsNone(alignment[:, 1:].score) 4060 self.assertEqual( 4061 str(alignment[:, 1:]), 4062 """\ 4063AACCGGGA-CCG 4064 -|-||-|-|-- 4065A-C-GG-AAC-- 4066""", 4067 ) 4068 self.assertIsNone(alignment[:, 2:].score) 4069 self.assertEqual( 4070 str(alignment[:, 2:]), 4071 """\ 4072AACCGGGA-CCG 4073 |-||-|-|-- 4074 AC-GG-AAC-- 4075""", 4076 ) 4077 self.assertIsNone(alignment[:, 3:].score) 4078 self.assertEqual( 4079 str(alignment[:, 3:]), 4080 """\ 4081AACCGGGA-CCG 4082 -||-|-|-- 4083 AC-GG-AAC-- 4084""", 4085 ) 4086 self.assertIsNone(alignment[:, 4:].score) 4087 self.assertEqual( 4088 str(alignment[:, 4:]), 4089 """\ 4090AACCGGGA-CCG 4091 ||-|-|-- 4092 ACGG-AAC-- 4093""", 4094 ) 4095 self.assertIsNone(alignment[:, :-1].score) 4096 self.assertEqual( 4097 str(alignment[:, :-1]), 4098 """\ 4099AACCGGGA-CCG 4100|-|-||-|-|- 4101A-C-GG-AAC- 4102""", 4103 ) 4104 self.assertIsNone(alignment[:, :-2].score) 4105 self.assertEqual( 4106 str(alignment[:, :-2]), 4107 """\ 4108AACCGGGA-CCG 4109|-|-||-|-| 4110A-C-GG-AAC 4111""", 4112 ) 4113 self.assertIsNone(alignment[:, :-3].score) 4114 self.assertEqual( 4115 str(alignment[:, :-3]), 4116 """\ 4117AACCGGGA-CCG 4118|-|-||-|- 4119A-C-GG-AAC 4120""", 4121 ) 4122 self.assertIsNone(alignment[:, 1:-1].score) 4123 self.assertEqual( 4124 str(alignment[:, 1:-1]), 4125 """\ 4126AACCGGGA-CCG 4127 -|-||-|-|- 4128A-C-GG-AAC- 4129""", 4130 ) 4131 self.assertIsNone(alignment[:, 1:-2].score) 4132 self.assertEqual( 4133 str(alignment[:, 1:-2]), 4134 """\ 4135AACCGGGA-CCG 4136 -|-||-|-| 4137A-C-GG-AAC 4138""", 4139 ) 4140 self.assertIsNone(alignment[:, 2:-1].score) 4141 self.assertEqual( 4142 str(alignment[:, 2:-1]), 4143 """\ 4144AACCGGGA-CCG 4145 |-||-|-|- 4146 AC-GG-AAC- 4147""", 4148 ) 4149 self.assertIsNone(alignment[:, 2:-2].score) 4150 self.assertEqual( 4151 str(alignment[:, 2:-2]), 4152 """\ 4153AACCGGGA-CCG 4154 |-||-|-| 4155 AC-GG-AAC 4156""", 4157 ) 4158 with self.assertRaises(NotImplementedError): 4159 alignment[:1] 4160 with self.assertRaises(NotImplementedError): 4161 alignment[0] 4162 with self.assertRaises(NotImplementedError): 4163 alignment[0, :] 4164 with self.assertRaises(NotImplementedError): 4165 alignment[:1, :] 4166 with self.assertRaises(NotImplementedError): 4167 alignment[:, 0] 4168 with self.assertRaises(NotImplementedError): 4169 alignment[:, ::3] 4170 4171 def test_sort(self): 4172 aligner = Align.PairwiseAligner() 4173 aligner.gap_score = -1 4174 target = Seq("ACTT") 4175 query = Seq("ACCT") 4176 alignments = aligner.align(target, query) 4177 self.assertEqual(len(alignments), 1) 4178 alignment = alignments[0] 4179 self.assertEqual( 4180 str(alignment), 4181 """\ 4182ACTT 4183||.| 4184ACCT 4185""", 4186 ) 4187 alignment.sort() 4188 self.assertEqual( 4189 str(alignment), 4190 """\ 4191ACCT 4192||.| 4193ACTT 4194""", 4195 ) 4196 alignment.sort(reverse=True) 4197 self.assertEqual( 4198 str(alignment), 4199 """\ 4200ACTT 4201||.| 4202ACCT 4203""", 4204 ) 4205 target.id = "seq1" 4206 query.id = "seq2" 4207 alignment.sort() 4208 self.assertEqual( 4209 str(alignment), 4210 """\ 4211ACTT 4212||.| 4213ACCT 4214""", 4215 ) 4216 alignment.sort(reverse=True) 4217 self.assertEqual( 4218 str(alignment), 4219 """\ 4220ACCT 4221||.| 4222ACTT 4223""", 4224 ) 4225 alignment.sort(key=GC) 4226 self.assertEqual( 4227 str(alignment), 4228 """\ 4229ACTT 4230||.| 4231ACCT 4232""", 4233 ) 4234 alignment.sort(key=GC, reverse=True) 4235 self.assertEqual( 4236 str(alignment), 4237 """\ 4238ACCT 4239||.| 4240ACTT 4241""", 4242 ) 4243 4244 def test_substitutions(self): 4245 aligner = Align.PairwiseAligner() 4246 path = os.path.join("Align", "ecoli.fa") 4247 record = SeqIO.read(path, "fasta") 4248 target = record.seq 4249 path = os.path.join("Align", "bsubtilis.fa") 4250 record = SeqIO.read(path, "fasta") 4251 query = record.seq 4252 # blastn default parameters: 4253 aligner.open_gap_score = -5 4254 aligner.extend_gap_score = -2 4255 aligner.match = +1 4256 aligner.mismatch = -3 4257 aligner.mode = "local" 4258 alignments = aligner.align(target, query) 4259 self.assertEqual(len(alignments), 9031680) 4260 alignment = alignments[0] 4261 m = alignment.substitutions 4262 self.assertEqual( 4263 str(m), 4264 """\ 4265 A C G T 4266A 191.0 3.0 15.0 13.0 4267C 5.0 186.0 9.0 14.0 4268G 12.0 11.0 248.0 8.0 4269T 11.0 19.0 6.0 145.0 4270""", 4271 ) 4272 self.assertAlmostEqual(m["T", "C"], 19.0) 4273 self.assertAlmostEqual(m["C", "T"], 14.0) 4274 m += m.transpose() 4275 m /= 2.0 4276 self.assertEqual( 4277 str(m), 4278 """\ 4279 A C G T 4280A 191.0 4.0 13.5 12.0 4281C 4.0 186.0 10.0 16.5 4282G 13.5 10.0 248.0 7.0 4283T 12.0 16.5 7.0 145.0 4284""", 4285 ) 4286 self.assertAlmostEqual(m["C", "T"], 16.5) 4287 self.assertAlmostEqual(m["T", "C"], 16.5) 4288 4289 4290if __name__ == "__main__": 4291 runner = unittest.TextTestRunner(verbosity=2) 4292 unittest.main(testRunner=runner) 4293