1# This code is part of the Biopython distribution and governed by its
2# license.  Please see the LICENSE file that should have been included
3# as part of this package.
4
5
6"""Tests for the PairwiseAligner in Bio.Align."""
7
8import array
9import os
10import unittest
11
12from Bio import Align, SeqIO
13from Bio.Seq import Seq, reverse_complement
14from Bio.SeqUtils import GC
15
16
17class TestAlignerProperties(unittest.TestCase):
18    def test_aligner_property_epsilon(self):
19        aligner = Align.PairwiseAligner()
20        self.assertAlmostEqual(aligner.epsilon, 1.0e-6)
21        aligner.epsilon = 1.0e-4
22        self.assertAlmostEqual(aligner.epsilon, 1.0e-4)
23        aligner.epsilon = 1.0e-8
24        self.assertAlmostEqual(aligner.epsilon, 1.0e-8)
25        with self.assertRaises(TypeError):
26            aligner.epsilon = "not a number"
27        with self.assertRaises(TypeError):
28            aligner.epsilon = None
29
30    def test_aligner_property_mode(self):
31        aligner = Align.PairwiseAligner()
32        aligner.mode = "global"
33        self.assertEqual(aligner.mode, "global")
34        aligner.mode = "local"
35        self.assertEqual(aligner.mode, "local")
36        with self.assertRaises(ValueError):
37            aligner.mode = "wrong"
38
39    def test_aligner_property_match_mismatch(self):
40        aligner = Align.PairwiseAligner()
41        aligner.match_score = 3.0
42        self.assertAlmostEqual(aligner.match_score, 3.0)
43        aligner.mismatch_score = -2.0
44        self.assertAlmostEqual(aligner.mismatch_score, -2.0)
45        with self.assertRaises(ValueError):
46            aligner.match_score = "not a number"
47        with self.assertRaises(ValueError):
48            aligner.mismatch_score = "not a number"
49        self.assertEqual(
50            str(aligner),
51            """\
52Pairwise sequence aligner with parameters
53  wildcard: None
54  match_score: 3.000000
55  mismatch_score: -2.000000
56  target_internal_open_gap_score: 0.000000
57  target_internal_extend_gap_score: 0.000000
58  target_left_open_gap_score: 0.000000
59  target_left_extend_gap_score: 0.000000
60  target_right_open_gap_score: 0.000000
61  target_right_extend_gap_score: 0.000000
62  query_internal_open_gap_score: 0.000000
63  query_internal_extend_gap_score: 0.000000
64  query_left_open_gap_score: 0.000000
65  query_left_extend_gap_score: 0.000000
66  query_right_open_gap_score: 0.000000
67  query_right_extend_gap_score: 0.000000
68  mode: global
69""",
70        )
71
72    def test_aligner_property_gapscores(self):
73        aligner = Align.PairwiseAligner()
74        open_score, extend_score = (-5, -1)
75        aligner.target_open_gap_score = open_score
76        aligner.target_extend_gap_score = extend_score
77        self.assertAlmostEqual(aligner.target_open_gap_score, open_score)
78        self.assertAlmostEqual(aligner.target_extend_gap_score, extend_score)
79        open_score, extend_score = (-6, -7)
80        aligner.query_open_gap_score = open_score
81        aligner.query_extend_gap_score = extend_score
82        self.assertAlmostEqual(aligner.query_open_gap_score, open_score)
83        self.assertAlmostEqual(aligner.query_extend_gap_score, extend_score)
84        open_score, extend_score = (-3, -9)
85        aligner.target_end_open_gap_score = open_score
86        aligner.target_end_extend_gap_score = extend_score
87        self.assertAlmostEqual(aligner.target_end_open_gap_score, open_score)
88        self.assertAlmostEqual(aligner.target_end_extend_gap_score, extend_score)
89        open_score, extend_score = (-1, -2)
90        aligner.query_end_open_gap_score = open_score
91        aligner.query_end_extend_gap_score = extend_score
92        self.assertEqual(
93            str(aligner),
94            """\
95Pairwise sequence aligner with parameters
96  wildcard: None
97  match_score: 1.000000
98  mismatch_score: 0.000000
99  target_internal_open_gap_score: -5.000000
100  target_internal_extend_gap_score: -1.000000
101  target_left_open_gap_score: -3.000000
102  target_left_extend_gap_score: -9.000000
103  target_right_open_gap_score: -3.000000
104  target_right_extend_gap_score: -9.000000
105  query_internal_open_gap_score: -6.000000
106  query_internal_extend_gap_score: -7.000000
107  query_left_open_gap_score: -1.000000
108  query_left_extend_gap_score: -2.000000
109  query_right_open_gap_score: -1.000000
110  query_right_extend_gap_score: -2.000000
111  mode: global
112""",
113        )
114        self.assertAlmostEqual(aligner.query_end_open_gap_score, open_score)
115        self.assertAlmostEqual(aligner.query_end_extend_gap_score, extend_score)
116        score = -3
117        aligner.target_gap_score = score
118        self.assertAlmostEqual(aligner.target_gap_score, score)
119        self.assertAlmostEqual(aligner.target_open_gap_score, score)
120        self.assertAlmostEqual(aligner.target_extend_gap_score, score)
121        score = -2
122        aligner.query_gap_score = score
123        self.assertAlmostEqual(aligner.query_gap_score, score)
124        self.assertAlmostEqual(aligner.query_open_gap_score, score)
125        self.assertAlmostEqual(aligner.query_extend_gap_score, score)
126        score = -4
127        aligner.target_end_gap_score = score
128        self.assertAlmostEqual(aligner.target_end_gap_score, score)
129        self.assertAlmostEqual(aligner.target_end_open_gap_score, score)
130        self.assertAlmostEqual(aligner.target_end_extend_gap_score, score)
131        self.assertAlmostEqual(aligner.target_left_gap_score, score)
132        self.assertAlmostEqual(aligner.target_left_open_gap_score, score)
133        self.assertAlmostEqual(aligner.target_left_extend_gap_score, score)
134        self.assertAlmostEqual(aligner.target_right_gap_score, score)
135        self.assertAlmostEqual(aligner.target_right_open_gap_score, score)
136        self.assertAlmostEqual(aligner.target_right_extend_gap_score, score)
137        score = -5
138        aligner.query_end_gap_score = score
139        self.assertAlmostEqual(aligner.query_end_gap_score, score)
140        self.assertAlmostEqual(aligner.query_end_open_gap_score, score)
141        self.assertAlmostEqual(aligner.query_end_extend_gap_score, score)
142        self.assertAlmostEqual(aligner.query_left_gap_score, score)
143        self.assertAlmostEqual(aligner.query_left_open_gap_score, score)
144        self.assertAlmostEqual(aligner.query_left_extend_gap_score, score)
145        self.assertAlmostEqual(aligner.query_right_gap_score, score)
146        self.assertAlmostEqual(aligner.query_right_open_gap_score, score)
147        self.assertAlmostEqual(aligner.query_right_extend_gap_score, score)
148        self.assertEqual(
149            str(aligner),
150            """\
151Pairwise sequence aligner with parameters
152  wildcard: None
153  match_score: 1.000000
154  mismatch_score: 0.000000
155  target_internal_open_gap_score: -3.000000
156  target_internal_extend_gap_score: -3.000000
157  target_left_open_gap_score: -4.000000
158  target_left_extend_gap_score: -4.000000
159  target_right_open_gap_score: -4.000000
160  target_right_extend_gap_score: -4.000000
161  query_internal_open_gap_score: -2.000000
162  query_internal_extend_gap_score: -2.000000
163  query_left_open_gap_score: -5.000000
164  query_left_extend_gap_score: -5.000000
165  query_right_open_gap_score: -5.000000
166  query_right_extend_gap_score: -5.000000
167  mode: global
168""",
169        )
170        with self.assertRaises(ValueError):
171            aligner.target_gap_score = "wrong"
172        with self.assertRaises(ValueError):
173            aligner.query_gap_score = "wrong"
174        with self.assertRaises(TypeError):
175            aligner.target_end_gap_score = "wrong"
176        with self.assertRaises(TypeError):
177            aligner.query_end_gap_score = "wrong"
178
179    def test_aligner_nonexisting_property(self):
180        aligner = Align.PairwiseAligner()
181        with self.assertRaises(AttributeError) as cm:
182            aligner.no_such_property
183        self.assertEqual(
184            str(cm.exception),
185            "'PairwiseAligner' object has no attribute 'no_such_property'",
186        )
187        with self.assertRaises(AttributeError) as cm:
188            aligner.no_such_property = 1
189        self.assertEqual(
190            str(cm.exception),
191            "'PairwiseAligner' object has no attribute 'no_such_property'",
192        )
193
194
195class TestPairwiseGlobal(unittest.TestCase):
196    def test_needlemanwunsch_simple1(self):
197        seq1 = "GAACT"
198        seq2 = "GAT"
199        aligner = Align.PairwiseAligner()
200        aligner.mode = "global"
201        self.assertEqual(
202            str(aligner),
203            """\
204Pairwise sequence aligner with parameters
205  wildcard: None
206  match_score: 1.000000
207  mismatch_score: 0.000000
208  target_internal_open_gap_score: 0.000000
209  target_internal_extend_gap_score: 0.000000
210  target_left_open_gap_score: 0.000000
211  target_left_extend_gap_score: 0.000000
212  target_right_open_gap_score: 0.000000
213  target_right_extend_gap_score: 0.000000
214  query_internal_open_gap_score: 0.000000
215  query_internal_extend_gap_score: 0.000000
216  query_left_open_gap_score: 0.000000
217  query_left_extend_gap_score: 0.000000
218  query_right_open_gap_score: 0.000000
219  query_right_extend_gap_score: 0.000000
220  mode: global
221""",
222        )
223        self.assertEqual(aligner.algorithm, "Needleman-Wunsch")
224        score = aligner.score(seq1, seq2)
225        self.assertAlmostEqual(score, 3.0)
226        score = aligner.score(seq1, reverse_complement(seq2), "-")
227        self.assertAlmostEqual(score, 3.0)
228        alignments = aligner.align(seq1, seq2)
229        self.assertEqual(len(alignments), 2)
230        alignment = alignments[0]
231        self.assertAlmostEqual(alignment.score, 3.0)
232        self.assertEqual(
233            str(alignment),
234            """\
235GAACT
236||--|
237GA--T
238""",
239        )
240        self.assertEqual(alignment.shape, (2, 5))
241        self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((0, 2), (2, 3))))
242        alignment = alignments[1]
243        self.assertAlmostEqual(alignment.score, 3.0)
244        self.assertEqual(
245            str(alignment),
246            """\
247GAACT
248|-|-|
249G-A-T
250""",
251        )
252        self.assertEqual(alignment.shape, (2, 5))
253        self.assertEqual(
254            alignment.aligned, (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3)))
255        )
256        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
257        self.assertEqual(len(alignments), 2)
258        alignment = alignments[0]
259        self.assertAlmostEqual(alignment.score, 3.0)
260        self.assertEqual(
261            str(alignment),
262            """\
263GAACT
264||--|
265GA--T
266""",
267        )
268        self.assertEqual(alignment.shape, (2, 5))
269        self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((3, 1), (1, 0))))
270        alignment = alignments[1]
271        self.assertAlmostEqual(alignment.score, 3.0)
272        self.assertEqual(
273            str(alignment),
274            """\
275GAACT
276|-|-|
277G-A-T
278""",
279        )
280        self.assertEqual(alignment.shape, (2, 5))
281        self.assertEqual(
282            alignment.aligned, (((0, 1), (2, 3), (4, 5)), ((3, 2), (2, 1), (1, 0)))
283        )
284
285    def test_align_affine1_score(self):
286        seq1 = "CC"
287        seq2 = "ACCT"
288        aligner = Align.PairwiseAligner()
289        aligner.mode = "global"
290        aligner.match_score = 0
291        aligner.mismatch_score = -1
292        aligner.open_gap_score = -5
293        aligner.extend_gap_score = -1
294        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
295        self.assertEqual(
296            str(aligner),
297            """\
298Pairwise sequence aligner with parameters
299  wildcard: None
300  match_score: 0.000000
301  mismatch_score: -1.000000
302  target_internal_open_gap_score: -5.000000
303  target_internal_extend_gap_score: -1.000000
304  target_left_open_gap_score: -5.000000
305  target_left_extend_gap_score: -1.000000
306  target_right_open_gap_score: -5.000000
307  target_right_extend_gap_score: -1.000000
308  query_internal_open_gap_score: -5.000000
309  query_internal_extend_gap_score: -1.000000
310  query_left_open_gap_score: -5.000000
311  query_left_extend_gap_score: -1.000000
312  query_right_open_gap_score: -5.000000
313  query_right_extend_gap_score: -1.000000
314  mode: global
315""",
316        )
317        score = aligner.score(seq1, seq2)
318        self.assertAlmostEqual(score, -7.0)
319        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
320        self.assertAlmostEqual(score, -7.0)
321
322
323class TestPairwiseLocal(unittest.TestCase):
324    def test_smithwaterman(self):
325        aligner = Align.PairwiseAligner()
326        aligner.mode = "local"
327        aligner.gap_score = -0.1
328        self.assertEqual(aligner.algorithm, "Smith-Waterman")
329        self.assertEqual(
330            str(aligner),
331            """\
332Pairwise sequence aligner with parameters
333  wildcard: None
334  match_score: 1.000000
335  mismatch_score: 0.000000
336  target_internal_open_gap_score: -0.100000
337  target_internal_extend_gap_score: -0.100000
338  target_left_open_gap_score: -0.100000
339  target_left_extend_gap_score: -0.100000
340  target_right_open_gap_score: -0.100000
341  target_right_extend_gap_score: -0.100000
342  query_internal_open_gap_score: -0.100000
343  query_internal_extend_gap_score: -0.100000
344  query_left_open_gap_score: -0.100000
345  query_left_extend_gap_score: -0.100000
346  query_right_open_gap_score: -0.100000
347  query_right_extend_gap_score: -0.100000
348  mode: local
349""",
350        )
351        score = aligner.score("AwBw", "zABz")
352        self.assertAlmostEqual(score, 1.9)
353        alignments = aligner.align("AwBw", "zABz")
354        self.assertEqual(len(alignments), 1)
355        alignment = alignments[0]
356        self.assertAlmostEqual(alignment.score, 1.9)
357        self.assertEqual(
358            str(alignment),
359            """\
360 AwBw
361 |-|
362zA-Bz
363""",
364        )
365        self.assertEqual(alignment.shape, (2, 3))
366        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3))))
367
368    def test_gotoh_local(self):
369        aligner = Align.PairwiseAligner()
370        aligner.mode = "local"
371        aligner.open_gap_score = -0.1
372        aligner.extend_gap_score = 0.0
373        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
374        self.assertEqual(
375            str(aligner),
376            """\
377Pairwise sequence aligner with parameters
378  wildcard: None
379  match_score: 1.000000
380  mismatch_score: 0.000000
381  target_internal_open_gap_score: -0.100000
382  target_internal_extend_gap_score: 0.000000
383  target_left_open_gap_score: -0.100000
384  target_left_extend_gap_score: 0.000000
385  target_right_open_gap_score: -0.100000
386  target_right_extend_gap_score: 0.000000
387  query_internal_open_gap_score: -0.100000
388  query_internal_extend_gap_score: 0.000000
389  query_left_open_gap_score: -0.100000
390  query_left_extend_gap_score: 0.000000
391  query_right_open_gap_score: -0.100000
392  query_right_extend_gap_score: 0.000000
393  mode: local
394""",
395        )
396        score = aligner.score("AwBw", "zABz")
397        self.assertAlmostEqual(score, 1.9)
398        alignments = aligner.align("AwBw", "zABz")
399        self.assertEqual(len(alignments), 1)
400        alignment = alignments[0]
401        self.assertAlmostEqual(alignment.score, 1.9)
402        self.assertEqual(
403            str(alignment),
404            """\
405 AwBw
406 |-|
407zA-Bz
408""",
409        )
410        self.assertEqual(alignment.shape, (2, 3))
411        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3))))
412
413
414class TestUnknownCharacter(unittest.TestCase):
415    def test_needlemanwunsch_simple1(self):
416        seq1 = "GACT"
417        seq2 = "GA?T"
418        aligner = Align.PairwiseAligner()
419        aligner.mode = "global"
420        aligner.gap_score = -1.0
421        aligner.mismatch_score = -1.0
422        aligner.wildcard = "?"
423        score = aligner.score(seq1, seq2)
424        self.assertAlmostEqual(score, 3.0)
425        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
426        self.assertAlmostEqual(score, 3.0)
427        alignments = aligner.align(seq1, seq2)
428        self.assertEqual(len(alignments), 1)
429        alignment = alignments[0]
430        self.assertAlmostEqual(alignment.score, 3.0)
431        self.assertEqual(
432            str(alignment),
433            """\
434GACT
435||.|
436GA?T
437""",
438        )
439        self.assertEqual(alignment.shape, (2, 4))
440        self.assertEqual(alignment.aligned, (((0, 4),), ((0, 4),)))
441        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
442        self.assertEqual(len(alignments), 1)
443        alignment = alignments[0]
444        self.assertAlmostEqual(alignment.score, 3.0)
445        self.assertEqual(
446            str(alignment),
447            """\
448GACT
449||.|
450GA?T
451""",
452        )
453        self.assertEqual(alignment.shape, (2, 4))
454        self.assertEqual(alignment.aligned, (((0, 4),), ((4, 0),)))
455        seq2 = "GAXT"
456        aligner.wildcard = "X"
457        score = aligner.score(seq1, seq2)
458        self.assertAlmostEqual(score, 3.0)
459        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
460        self.assertAlmostEqual(score, 3.0)
461        alignments = aligner.align(seq1, seq2)
462        self.assertEqual(len(alignments), 1)
463        alignment = alignments[0]
464        self.assertAlmostEqual(alignment.score, 3.0)
465        self.assertEqual(
466            str(alignment),
467            """\
468GACT
469||.|
470GAXT
471""",
472        )
473        self.assertEqual(alignment.shape, (2, 4))
474        self.assertEqual(alignment.aligned, (((0, 4),), ((0, 4),)))
475        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
476        self.assertEqual(len(alignments), 1)
477        alignment = alignments[0]
478        self.assertAlmostEqual(alignment.score, 3.0)
479        self.assertEqual(
480            str(alignment),
481            """\
482GACT
483||.|
484GAXT
485""",
486        )
487        self.assertEqual(alignment.shape, (2, 4))
488        self.assertEqual(alignment.aligned, (((0, 4),), ((4, 0),)))
489        aligner.wildcard = None
490        score = aligner.score(seq1, seq2)
491        self.assertAlmostEqual(score, 2.0)
492        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
493        self.assertAlmostEqual(score, 2.0)
494        alignments = aligner.align(seq1, seq2)
495        self.assertEqual(len(alignments), 1)
496        alignment = alignments[0]
497        self.assertAlmostEqual(alignment.score, 2.0)
498        self.assertEqual(
499            str(alignment),
500            """\
501GACT
502||.|
503GAXT
504""",
505        )
506        self.assertEqual(alignment.shape, (2, 4))
507        self.assertEqual(alignment.aligned, (((0, 4),), ((0, 4),)))
508        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
509        self.assertEqual(len(alignments), 1)
510        alignment = alignments[0]
511        self.assertAlmostEqual(alignment.score, 2.0)
512        self.assertEqual(
513            str(alignment),
514            """\
515GACT
516||.|
517GAXT
518""",
519        )
520        self.assertEqual(alignment.shape, (2, 4))
521        self.assertEqual(alignment.aligned, (((0, 4),), ((4, 0),)))
522
523    def test_needlemanwunsch_simple2(self):
524        seq1 = "GA?AT"
525        seq2 = "GAA?T"
526        aligner = Align.PairwiseAligner()
527        aligner.mode = "global"
528        aligner.wildcard = "?"
529        score = aligner.score(seq1, seq2)
530        self.assertAlmostEqual(score, 4.0)
531        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
532        self.assertAlmostEqual(score, 4.0)
533        alignments = aligner.align(seq1, seq2)
534        self.assertEqual(len(alignments), 1)
535        alignment = alignments[0]
536        self.assertAlmostEqual(alignment.score, 4.0)
537        self.assertEqual(
538            str(alignment),
539            """\
540GA?A-T
541||-|-|
542GA-A?T
543""",
544        )
545        self.assertEqual(alignment.shape, (2, 6))
546        self.assertEqual(
547            alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((0, 2), (2, 3), (4, 5)))
548        )
549        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
550        self.assertEqual(len(alignments), 1)
551        alignment = alignments[0]
552        self.assertAlmostEqual(alignment.score, 4.0)
553        self.assertEqual(
554            str(alignment),
555            """\
556GA?A-T
557||-|-|
558GA-A?T
559""",
560        )
561        self.assertEqual(alignment.shape, (2, 6))
562        self.assertEqual(
563            alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((5, 3), (3, 2), (1, 0)))
564        )
565        seq1 = "GAXAT"
566        seq2 = "GAAXT"
567        aligner.wildcard = "X"
568        score = aligner.score(seq1, seq2)
569        self.assertAlmostEqual(score, 4.0)
570        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
571        self.assertAlmostEqual(score, 4.0)
572        alignments = aligner.align(seq1, seq2)
573        self.assertEqual(len(alignments), 1)
574        alignment = alignments[0]
575        self.assertAlmostEqual(alignment.score, 4.0)
576        self.assertEqual(
577            str(alignment),
578            """\
579GAXA-T
580||-|-|
581GA-AXT
582""",
583        )
584        self.assertEqual(alignment.shape, (2, 6))
585        self.assertEqual(
586            alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((0, 2), (2, 3), (4, 5)))
587        )
588        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
589        self.assertEqual(len(alignments), 1)
590        alignment = alignments[0]
591        self.assertAlmostEqual(alignment.score, 4.0)
592        self.assertEqual(
593            str(alignment),
594            """\
595GAXA-T
596||-|-|
597GA-AXT
598""",
599        )
600        self.assertEqual(alignment.shape, (2, 6))
601        self.assertEqual(
602            alignment.aligned, (((0, 2), (3, 4), (4, 5)), ((5, 3), (3, 2), (1, 0)))
603        )
604
605
606class TestPairwiseOpenPenalty(unittest.TestCase):
607    def test_match_score_open_penalty1(self):
608        aligner = Align.PairwiseAligner()
609        aligner.mode = "global"
610        aligner.match_score = 2
611        aligner.mismatch_score = -1
612        aligner.open_gap_score = -0.1
613        aligner.extend_gap_score = 0.0
614        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
615        self.assertEqual(
616            str(aligner),
617            """\
618Pairwise sequence aligner with parameters
619  wildcard: None
620  match_score: 2.000000
621  mismatch_score: -1.000000
622  target_internal_open_gap_score: -0.100000
623  target_internal_extend_gap_score: 0.000000
624  target_left_open_gap_score: -0.100000
625  target_left_extend_gap_score: 0.000000
626  target_right_open_gap_score: -0.100000
627  target_right_extend_gap_score: 0.000000
628  query_internal_open_gap_score: -0.100000
629  query_internal_extend_gap_score: 0.000000
630  query_left_open_gap_score: -0.100000
631  query_left_extend_gap_score: 0.000000
632  query_right_open_gap_score: -0.100000
633  query_right_extend_gap_score: 0.000000
634  mode: global
635""",
636        )
637        seq1 = "AA"
638        seq2 = "A"
639        score = aligner.score(seq1, seq2)
640        self.assertAlmostEqual(score, 1.9)
641        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
642        self.assertAlmostEqual(score, 1.9)
643        alignments = aligner.align(seq1, seq2)
644        self.assertEqual(len(alignments), 2)
645        alignment = alignments[0]
646        self.assertAlmostEqual(alignment.score, 1.9)
647        self.assertEqual(
648            str(alignment),
649            """\
650AA
651-|
652-A
653""",
654        )
655        self.assertEqual(alignment.shape, (2, 2))
656        self.assertEqual(alignment.aligned, (((1, 2),), ((0, 1),)))
657        alignment = alignments[1]
658        self.assertAlmostEqual(alignment.score, 1.9)
659        self.assertEqual(
660            str(alignment),
661            """\
662AA
663|-
664A-
665""",
666        )
667        self.assertEqual(alignment.shape, (2, 2))
668        self.assertEqual(alignment.aligned, (((0, 1),), ((0, 1),)))
669        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
670        self.assertEqual(len(alignments), 2)
671        alignment = alignments[0]
672        self.assertAlmostEqual(alignment.score, 1.9)
673        self.assertEqual(
674            str(alignment),
675            """\
676AA
677-|
678-A
679""",
680        )
681        self.assertEqual(alignment.shape, (2, 2))
682        self.assertEqual(alignment.aligned, (((1, 2),), ((1, 0),)))
683        alignment = alignments[1]
684        self.assertAlmostEqual(alignment.score, 1.9)
685        self.assertEqual(
686            str(alignment),
687            """\
688AA
689|-
690A-
691""",
692        )
693        self.assertEqual(alignment.shape, (2, 2))
694        self.assertEqual(alignment.aligned, (((0, 1),), ((1, 0),)))
695
696    def test_match_score_open_penalty2(self):
697        aligner = Align.PairwiseAligner()
698        aligner.mode = "global"
699        aligner.match_score = 1.5
700        aligner.mismatch_score = 0.0
701        aligner.open_gap_score = -0.1
702        aligner.extend_gap_score = 0.0
703        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
704        self.assertEqual(
705            str(aligner),
706            """\
707Pairwise sequence aligner with parameters
708  wildcard: None
709  match_score: 1.500000
710  mismatch_score: 0.000000
711  target_internal_open_gap_score: -0.100000
712  target_internal_extend_gap_score: 0.000000
713  target_left_open_gap_score: -0.100000
714  target_left_extend_gap_score: 0.000000
715  target_right_open_gap_score: -0.100000
716  target_right_extend_gap_score: 0.000000
717  query_internal_open_gap_score: -0.100000
718  query_internal_extend_gap_score: 0.000000
719  query_left_open_gap_score: -0.100000
720  query_left_extend_gap_score: 0.000000
721  query_right_open_gap_score: -0.100000
722  query_right_extend_gap_score: 0.000000
723  mode: global
724""",
725        )
726        seq1 = "GAA"
727        seq2 = "GA"
728        score = aligner.score(seq1, seq2)
729        self.assertAlmostEqual(score, 2.9)
730        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
731        self.assertAlmostEqual(score, 2.9)
732        alignments = aligner.align(seq1, seq2)
733        self.assertEqual(len(alignments), 2)
734        alignment = alignments[0]
735        self.assertAlmostEqual(alignment.score, 2.9)
736        self.assertEqual(
737            str(alignment),
738            """\
739GAA
740|-|
741G-A
742""",
743        )
744        self.assertEqual(alignment.shape, (2, 3))
745        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (1, 2))))
746        alignment = alignments[1]
747        self.assertAlmostEqual(alignment.score, 2.9)
748        self.assertEqual(
749            str(alignment),
750            """\
751GAA
752||-
753GA-
754""",
755        )
756        self.assertEqual(alignment.shape, (2, 3))
757        self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),)))
758        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
759        self.assertEqual(len(alignments), 2)
760        alignment = alignments[0]
761        self.assertAlmostEqual(alignment.score, 2.9)
762        self.assertEqual(
763            str(alignment),
764            """\
765GAA
766|-|
767G-A
768""",
769        )
770        self.assertEqual(alignment.shape, (2, 3))
771        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((2, 1), (1, 0))))
772        alignment = alignments[1]
773        self.assertAlmostEqual(alignment.score, 2.9)
774        self.assertEqual(
775            str(alignment),
776            """\
777GAA
778||-
779GA-
780""",
781        )
782        self.assertEqual(alignment.shape, (2, 3))
783        self.assertEqual(alignment.aligned, (((0, 2),), ((2, 0),)))
784
785    def test_match_score_open_penalty3(self):
786        aligner = Align.PairwiseAligner()
787        aligner.mode = "global"
788        aligner.query_open_gap_score = -0.1
789        aligner.query_extend_gap_score = 0.0
790        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
791        self.assertEqual(
792            str(aligner),
793            """\
794Pairwise sequence aligner with parameters
795  wildcard: None
796  match_score: 1.000000
797  mismatch_score: 0.000000
798  target_internal_open_gap_score: 0.000000
799  target_internal_extend_gap_score: 0.000000
800  target_left_open_gap_score: 0.000000
801  target_left_extend_gap_score: 0.000000
802  target_right_open_gap_score: 0.000000
803  target_right_extend_gap_score: 0.000000
804  query_internal_open_gap_score: -0.100000
805  query_internal_extend_gap_score: 0.000000
806  query_left_open_gap_score: -0.100000
807  query_left_extend_gap_score: 0.000000
808  query_right_open_gap_score: -0.100000
809  query_right_extend_gap_score: 0.000000
810  mode: global
811""",
812        )
813        seq1 = "GAACT"
814        seq2 = "GAT"
815        score = aligner.score(seq1, seq2)
816        self.assertAlmostEqual(score, 2.9)
817        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
818        self.assertAlmostEqual(score, 2.9)
819        alignments = aligner.align(seq1, seq2)
820        self.assertEqual(len(alignments), 1)
821        alignment = alignments[0]
822        self.assertAlmostEqual(alignment.score, 2.9)
823        self.assertEqual(
824            str(alignment),
825            """\
826GAACT
827||--|
828GA--T
829""",
830        )
831        self.assertEqual(alignment.shape, (2, 5))
832        self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((0, 2), (2, 3))))
833        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
834        self.assertEqual(len(alignments), 1)
835        alignment = alignments[0]
836        self.assertAlmostEqual(alignment.score, 2.9)
837        self.assertEqual(
838            str(alignment),
839            """\
840GAACT
841||--|
842GA--T
843""",
844        )
845        self.assertEqual(alignment.shape, (2, 5))
846        self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((3, 1), (1, 0))))
847
848    def test_match_score_open_penalty4(self):
849        aligner = Align.PairwiseAligner()
850        aligner.mode = "global"
851        aligner.mismatch_score = -2.0
852        aligner.open_gap_score = -0.1
853        aligner.extend_gap_score = 0.0
854        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
855        self.assertEqual(
856            str(aligner),
857            """\
858Pairwise sequence aligner with parameters
859  wildcard: None
860  match_score: 1.000000
861  mismatch_score: -2.000000
862  target_internal_open_gap_score: -0.100000
863  target_internal_extend_gap_score: 0.000000
864  target_left_open_gap_score: -0.100000
865  target_left_extend_gap_score: 0.000000
866  target_right_open_gap_score: -0.100000
867  target_right_extend_gap_score: 0.000000
868  query_internal_open_gap_score: -0.100000
869  query_internal_extend_gap_score: 0.000000
870  query_left_open_gap_score: -0.100000
871  query_left_extend_gap_score: 0.000000
872  query_right_open_gap_score: -0.100000
873  query_right_extend_gap_score: 0.000000
874  mode: global
875""",
876        )
877        seq1 = "GCT"
878        seq2 = "GATA"
879        score = aligner.score(seq1, seq2)
880        self.assertAlmostEqual(score, 1.7)
881        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
882        self.assertAlmostEqual(score, 1.7)
883        alignments = aligner.align(seq1, seq2)
884        self.assertEqual(len(alignments), 2)
885        alignment = alignments[0]
886        self.assertAlmostEqual(alignment.score, 1.7)
887        self.assertEqual(
888            str(alignment),
889            """\
890G-CT-
891|--|-
892GA-TA
893""",
894        )
895        self.assertEqual(alignment.shape, (2, 5))
896        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (2, 3))))
897        alignment = alignments[1]
898        self.assertAlmostEqual(alignment.score, 1.7)
899        self.assertEqual(
900            str(alignment),
901            """\
902GC-T-
903|--|-
904G-ATA
905""",
906        )
907        self.assertEqual(alignment.shape, (2, 5))
908        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (2, 3))))
909        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
910        self.assertEqual(len(alignments), 2)
911        alignment = alignments[0]
912        self.assertAlmostEqual(alignment.score, 1.7)
913        self.assertEqual(
914            str(alignment),
915            """\
916G-CT-
917|--|-
918GA-TA
919""",
920        )
921        self.assertEqual(alignment.shape, (2, 5))
922        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((4, 3), (2, 1))))
923        alignment = alignments[1]
924        self.assertAlmostEqual(alignment.score, 1.7)
925        self.assertEqual(
926            str(alignment),
927            """\
928GC-T-
929|--|-
930G-ATA
931""",
932        )
933        self.assertEqual(alignment.shape, (2, 5))
934        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((4, 3), (2, 1))))
935
936
937class TestPairwiseExtendPenalty(unittest.TestCase):
938    def test_extend_penalty1(self):
939        aligner = Align.PairwiseAligner()
940        aligner.mode = "global"
941        aligner.open_gap_score = -0.2
942        aligner.extend_gap_score = -0.5
943        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
944        self.assertEqual(
945            str(aligner),
946            """\
947Pairwise sequence aligner with parameters
948  wildcard: None
949  match_score: 1.000000
950  mismatch_score: 0.000000
951  target_internal_open_gap_score: -0.200000
952  target_internal_extend_gap_score: -0.500000
953  target_left_open_gap_score: -0.200000
954  target_left_extend_gap_score: -0.500000
955  target_right_open_gap_score: -0.200000
956  target_right_extend_gap_score: -0.500000
957  query_internal_open_gap_score: -0.200000
958  query_internal_extend_gap_score: -0.500000
959  query_left_open_gap_score: -0.200000
960  query_left_extend_gap_score: -0.500000
961  query_right_open_gap_score: -0.200000
962  query_right_extend_gap_score: -0.500000
963  mode: global
964""",
965        )
966        seq1 = "GACT"
967        seq2 = "GT"
968        score = aligner.score(seq1, seq2)
969        self.assertAlmostEqual(score, 1.3)
970        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
971        self.assertAlmostEqual(score, 1.3)
972        alignments = aligner.align(seq1, seq2)
973        self.assertEqual(len(alignments), 1)
974        alignment = alignments[0]
975        self.assertAlmostEqual(alignment.score, 1.3)
976        self.assertEqual(
977            str(alignment),
978            """\
979GACT
980|--|
981G--T
982""",
983        )
984        self.assertEqual(alignment.shape, (2, 4))
985        self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((0, 1), (1, 2))))
986        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
987        self.assertEqual(len(alignments), 1)
988        alignment = alignments[0]
989        self.assertAlmostEqual(alignment.score, 1.3)
990        self.assertEqual(
991            str(alignment),
992            """\
993GACT
994|--|
995G--T
996""",
997        )
998        self.assertEqual(alignment.shape, (2, 4))
999        self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((2, 1), (1, 0))))
1000
1001    def test_extend_penalty2(self):
1002        aligner = Align.PairwiseAligner()
1003        aligner.mode = "global"
1004        aligner.open_gap_score = -0.2
1005        aligner.extend_gap_score = -1.5
1006        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
1007        self.assertEqual(
1008            str(aligner),
1009            """\
1010Pairwise sequence aligner with parameters
1011  wildcard: None
1012  match_score: 1.000000
1013  mismatch_score: 0.000000
1014  target_internal_open_gap_score: -0.200000
1015  target_internal_extend_gap_score: -1.500000
1016  target_left_open_gap_score: -0.200000
1017  target_left_extend_gap_score: -1.500000
1018  target_right_open_gap_score: -0.200000
1019  target_right_extend_gap_score: -1.500000
1020  query_internal_open_gap_score: -0.200000
1021  query_internal_extend_gap_score: -1.500000
1022  query_left_open_gap_score: -0.200000
1023  query_left_extend_gap_score: -1.500000
1024  query_right_open_gap_score: -0.200000
1025  query_right_extend_gap_score: -1.500000
1026  mode: global
1027""",
1028        )
1029        seq1 = "GACT"
1030        seq2 = "GT"
1031        score = aligner.score(seq1, seq2)
1032        self.assertAlmostEqual(score, 0.6)
1033        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1034        self.assertAlmostEqual(score, 0.6)
1035        alignments = aligner.align(seq1, seq2)
1036        self.assertEqual(len(alignments), 2)
1037        alignment = alignments[0]
1038        self.assertAlmostEqual(alignment.score, 0.6)
1039        self.assertEqual(
1040            str(alignment),
1041            """\
1042GACT
1043-.-|
1044-G-T
1045""",
1046        )
1047        self.assertEqual(alignment.shape, (2, 4))
1048        self.assertEqual(alignment.aligned, (((1, 2), (3, 4)), ((0, 1), (1, 2))))
1049        alignment = alignments[1]
1050        self.assertAlmostEqual(alignment.score, 0.6)
1051        self.assertEqual(
1052            str(alignment),
1053            """\
1054GACT
1055|-.-
1056G-T-
1057""",
1058        )
1059        self.assertEqual(alignment.shape, (2, 4))
1060        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (1, 2))))
1061        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1062        self.assertEqual(len(alignments), 2)
1063        alignment = alignments[0]
1064        self.assertAlmostEqual(alignment.score, 0.6)
1065        self.assertEqual(
1066            str(alignment),
1067            """\
1068GACT
1069-.-|
1070-G-T
1071""",
1072        )
1073        self.assertEqual(alignment.shape, (2, 4))
1074        self.assertEqual(alignment.aligned, (((1, 2), (3, 4)), ((2, 1), (1, 0))))
1075        alignment = alignments[1]
1076        self.assertAlmostEqual(alignment.score, 0.6)
1077        self.assertEqual(
1078            str(alignment),
1079            """\
1080GACT
1081|-.-
1082G-T-
1083""",
1084        )
1085        self.assertEqual(alignment.shape, (2, 4))
1086        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((2, 1), (1, 0))))
1087
1088
1089class TestPairwisePenalizeExtendWhenOpening(unittest.TestCase):
1090    def test_penalize_extend_when_opening(self):
1091        aligner = Align.PairwiseAligner()
1092        aligner.mode = "global"
1093        aligner.open_gap_score = -1.7
1094        aligner.extend_gap_score = -1.5
1095        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
1096        self.assertEqual(
1097            str(aligner),
1098            """\
1099Pairwise sequence aligner with parameters
1100  wildcard: None
1101  match_score: 1.000000
1102  mismatch_score: 0.000000
1103  target_internal_open_gap_score: -1.700000
1104  target_internal_extend_gap_score: -1.500000
1105  target_left_open_gap_score: -1.700000
1106  target_left_extend_gap_score: -1.500000
1107  target_right_open_gap_score: -1.700000
1108  target_right_extend_gap_score: -1.500000
1109  query_internal_open_gap_score: -1.700000
1110  query_internal_extend_gap_score: -1.500000
1111  query_left_open_gap_score: -1.700000
1112  query_left_extend_gap_score: -1.500000
1113  query_right_open_gap_score: -1.700000
1114  query_right_extend_gap_score: -1.500000
1115  mode: global
1116""",
1117        )
1118        seq1 = "GACT"
1119        seq2 = "GT"
1120        score = aligner.score(seq1, seq2)
1121        self.assertAlmostEqual(score, -1.2)
1122        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1123        self.assertAlmostEqual(score, -1.2)
1124        alignments = aligner.align(seq1, seq2)
1125        self.assertEqual(len(alignments), 1)
1126        alignment = alignments[0]
1127        self.assertAlmostEqual(alignment.score, -1.2)
1128        self.assertEqual(
1129            str(alignment),
1130            """\
1131GACT
1132|--|
1133G--T
1134""",
1135        )
1136        self.assertEqual(alignment.shape, (2, 4))
1137        self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((0, 1), (1, 2))))
1138        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1139        self.assertEqual(len(alignments), 1)
1140        alignment = alignments[0]
1141        self.assertAlmostEqual(alignment.score, -1.2)
1142        self.assertEqual(
1143            str(alignment),
1144            """\
1145GACT
1146|--|
1147G--T
1148""",
1149        )
1150        self.assertEqual(alignment.shape, (2, 4))
1151        self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((2, 1), (1, 0))))
1152
1153
1154class TestPairwisePenalizeEndgaps(unittest.TestCase):
1155    def test_penalize_end_gaps(self):
1156        aligner = Align.PairwiseAligner()
1157        aligner.mode = "global"
1158        aligner.open_gap_score = -0.2
1159        aligner.extend_gap_score = -0.8
1160        end_score = 0.0
1161        aligner.target_end_gap_score = end_score
1162        aligner.query_end_gap_score = end_score
1163        self.assertEqual(
1164            str(aligner),
1165            """\
1166Pairwise sequence aligner with parameters
1167  wildcard: None
1168  match_score: 1.000000
1169  mismatch_score: 0.000000
1170  target_internal_open_gap_score: -0.200000
1171  target_internal_extend_gap_score: -0.800000
1172  target_left_open_gap_score: 0.000000
1173  target_left_extend_gap_score: 0.000000
1174  target_right_open_gap_score: 0.000000
1175  target_right_extend_gap_score: 0.000000
1176  query_internal_open_gap_score: -0.200000
1177  query_internal_extend_gap_score: -0.800000
1178  query_left_open_gap_score: 0.000000
1179  query_left_extend_gap_score: 0.000000
1180  query_right_open_gap_score: 0.000000
1181  query_right_extend_gap_score: 0.000000
1182  mode: global
1183""",
1184        )
1185        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
1186        seq1 = "GACT"
1187        seq2 = "GT"
1188        score = aligner.score(seq1, seq2)
1189        self.assertAlmostEqual(score, 1.0)
1190        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1191        self.assertAlmostEqual(score, 1.0)
1192        alignments = aligner.align(seq1, seq2)
1193        self.assertEqual(len(alignments), 3)
1194        alignment = alignments[0]
1195        self.assertAlmostEqual(alignment.score, 1.0)
1196        self.assertEqual(
1197            str(alignment),
1198            """\
1199GACT
1200--.|
1201--GT
1202""",
1203        )
1204        self.assertEqual(alignment.shape, (2, 4))
1205        self.assertEqual(alignment.aligned, (((2, 4),), ((0, 2),)))
1206        alignment = alignments[1]
1207        self.assertAlmostEqual(alignment.score, 1.0)
1208        self.assertEqual(
1209            str(alignment),
1210            """\
1211GACT
1212|--|
1213G--T
1214""",
1215        )
1216        self.assertEqual(alignment.shape, (2, 4))
1217        self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((0, 1), (1, 2))))
1218        alignment = alignments[2]
1219        self.assertAlmostEqual(alignment.score, 1.0)
1220        self.assertEqual(
1221            str(alignment),
1222            """\
1223GACT
1224|.--
1225GT--
1226""",
1227        )
1228        self.assertEqual(alignment.shape, (2, 4))
1229        self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),)))
1230        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1231        self.assertEqual(len(alignments), 3)
1232        alignment = alignments[0]
1233        self.assertAlmostEqual(alignment.score, 1.0)
1234        self.assertEqual(
1235            str(alignment),
1236            """\
1237GACT
1238--.|
1239--GT
1240""",
1241        )
1242        self.assertEqual(alignment.shape, (2, 4))
1243        self.assertEqual(alignment.aligned, (((2, 4),), ((2, 0),)))
1244        alignment = alignments[1]
1245        self.assertAlmostEqual(alignment.score, 1.0)
1246        self.assertEqual(
1247            str(alignment),
1248            """\
1249GACT
1250|--|
1251G--T
1252""",
1253        )
1254        self.assertEqual(alignment.shape, (2, 4))
1255        self.assertEqual(alignment.aligned, (((0, 1), (3, 4)), ((2, 1), (1, 0))))
1256        alignment = alignments[2]
1257        self.assertAlmostEqual(alignment.score, 1.0)
1258        self.assertEqual(
1259            str(alignment),
1260            """\
1261GACT
1262|.--
1263GT--
1264""",
1265        )
1266        self.assertEqual(alignment.shape, (2, 4))
1267        self.assertEqual(alignment.aligned, (((0, 2),), ((2, 0),)))
1268
1269
1270class TestPairwiseSeparateGapPenalties(unittest.TestCase):
1271    def test_separate_gap_penalties1(self):
1272        seq1 = "GAT"
1273        seq2 = "GTCT"
1274        aligner = Align.PairwiseAligner()
1275        aligner.mode = "local"
1276        open_score, extend_score = (-0.3, 0)
1277        aligner.target_open_gap_score = open_score
1278        aligner.target_extend_gap_score = extend_score
1279        aligner.target_end_open_gap_score = open_score
1280        aligner.target_end_extend_gap_score = extend_score
1281        open_score, extend_score = (-0.8, 0)
1282        aligner.query_open_gap_score = open_score
1283        aligner.query_extend_gap_score = extend_score
1284        aligner.query_end_open_gap_score = open_score
1285        aligner.query_end_extend_gap_score = extend_score
1286        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
1287        self.assertEqual(
1288            str(aligner),
1289            """\
1290Pairwise sequence aligner with parameters
1291  wildcard: None
1292  match_score: 1.000000
1293  mismatch_score: 0.000000
1294  target_internal_open_gap_score: -0.300000
1295  target_internal_extend_gap_score: 0.000000
1296  target_left_open_gap_score: -0.300000
1297  target_left_extend_gap_score: 0.000000
1298  target_right_open_gap_score: -0.300000
1299  target_right_extend_gap_score: 0.000000
1300  query_internal_open_gap_score: -0.800000
1301  query_internal_extend_gap_score: 0.000000
1302  query_left_open_gap_score: -0.800000
1303  query_left_extend_gap_score: 0.000000
1304  query_right_open_gap_score: -0.800000
1305  query_right_extend_gap_score: 0.000000
1306  mode: local
1307""",
1308        )
1309        score = aligner.score(seq1, seq2)
1310        self.assertAlmostEqual(score, 1.7)
1311        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1312        self.assertAlmostEqual(score, 1.7)
1313        alignments = aligner.align(seq1, seq2)
1314        self.assertEqual(len(alignments), 2)
1315        alignment = alignments[0]
1316        self.assertAlmostEqual(alignment.score, 1.7)
1317        self.assertEqual(
1318            str(alignment),
1319            """\
1320G-AT
1321|-.|
1322GTCT
1323""",
1324        )
1325        self.assertEqual(alignment.shape, (2, 4))
1326        self.assertEqual(alignment.aligned, (((0, 1), (1, 3)), ((0, 1), (2, 4))))
1327        alignment = alignments[1]
1328        self.assertAlmostEqual(alignment.score, 1.7)
1329        self.assertEqual(
1330            str(alignment),
1331            """\
1332GA-T
1333|.-|
1334GTCT
1335""",
1336        )
1337        self.assertEqual(alignment.shape, (2, 4))
1338        self.assertEqual(alignment.aligned, (((0, 2), (2, 3)), ((0, 2), (3, 4))))
1339        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1340        self.assertEqual(len(alignments), 2)
1341        alignment = alignments[0]
1342        self.assertAlmostEqual(alignment.score, 1.7)
1343        self.assertEqual(
1344            str(alignment),
1345            """\
1346G-AT
1347|-.|
1348GTCT
1349""",
1350        )
1351        self.assertEqual(alignment.shape, (2, 4))
1352        self.assertEqual(alignment.aligned, (((0, 1), (1, 3)), ((4, 3), (2, 0))))
1353        alignment = alignments[1]
1354        self.assertAlmostEqual(alignment.score, 1.7)
1355        self.assertEqual(
1356            str(alignment),
1357            """\
1358GA-T
1359|.-|
1360GTCT
1361""",
1362        )
1363        self.assertEqual(alignment.shape, (2, 4))
1364        self.assertEqual(alignment.aligned, (((0, 2), (2, 3)), ((4, 2), (1, 0))))
1365
1366    def test_separate_gap_penalties2(self):
1367        aligner = Align.PairwiseAligner()
1368        aligner.mode = "local"
1369        aligner.target_open_gap_score = -0.3
1370        aligner.target_extend_gap_score = 0.0
1371        aligner.query_open_gap_score = -0.2
1372        aligner.query_extend_gap_score = 0.0
1373        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
1374        self.assertEqual(
1375            str(aligner),
1376            """\
1377Pairwise sequence aligner with parameters
1378  wildcard: None
1379  match_score: 1.000000
1380  mismatch_score: 0.000000
1381  target_internal_open_gap_score: -0.300000
1382  target_internal_extend_gap_score: 0.000000
1383  target_left_open_gap_score: -0.300000
1384  target_left_extend_gap_score: 0.000000
1385  target_right_open_gap_score: -0.300000
1386  target_right_extend_gap_score: 0.000000
1387  query_internal_open_gap_score: -0.200000
1388  query_internal_extend_gap_score: 0.000000
1389  query_left_open_gap_score: -0.200000
1390  query_left_extend_gap_score: 0.000000
1391  query_right_open_gap_score: -0.200000
1392  query_right_extend_gap_score: 0.000000
1393  mode: local
1394""",
1395        )
1396        seq1 = "GAT"
1397        seq2 = "GTCT"
1398        score = aligner.score(seq1, seq2)
1399        self.assertAlmostEqual(score, 1.8)
1400        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1401        self.assertAlmostEqual(score, 1.8)
1402        alignments = aligner.align(seq1, seq2)
1403        self.assertEqual(len(alignments), 1)
1404        alignment = alignments[0]
1405        self.assertAlmostEqual(alignment.score, 1.8)
1406        self.assertEqual(
1407            str(alignment),
1408            """\
1409GAT
1410|-|
1411G-TCT
1412""",
1413        )
1414        self.assertEqual(alignment.shape, (2, 3))
1415        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((0, 1), (1, 2))))
1416        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1417        self.assertEqual(len(alignments), 1)
1418        alignment = alignments[0]
1419        self.assertAlmostEqual(alignment.score, 1.8)
1420        self.assertEqual(
1421            str(alignment),
1422            """\
1423GAT
1424|-|
1425G-TCT
1426""",
1427        )
1428        self.assertEqual(alignment.shape, (2, 3))
1429        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((4, 3), (3, 2))))
1430
1431
1432class TestPairwiseSeparateGapPenaltiesWithExtension(unittest.TestCase):
1433    def test_separate_gap_penalties_with_extension(self):
1434        seq1 = "GAAT"
1435        seq2 = "GTCCT"
1436        aligner = Align.PairwiseAligner()
1437        aligner.mode = "local"
1438        open_score, extend_score = (-0.1, 0)
1439        aligner.target_open_gap_score = open_score
1440        aligner.target_extend_gap_score = extend_score
1441        aligner.target_end_open_gap_score = open_score
1442        aligner.target_end_extend_gap_score = extend_score
1443        score = -0.1
1444        aligner.query_gap_score = score
1445        aligner.query_end_gap_score = score
1446        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
1447        self.assertEqual(
1448            str(aligner),
1449            """\
1450Pairwise sequence aligner with parameters
1451  wildcard: None
1452  match_score: 1.000000
1453  mismatch_score: 0.000000
1454  target_internal_open_gap_score: -0.100000
1455  target_internal_extend_gap_score: 0.000000
1456  target_left_open_gap_score: -0.100000
1457  target_left_extend_gap_score: 0.000000
1458  target_right_open_gap_score: -0.100000
1459  target_right_extend_gap_score: 0.000000
1460  query_internal_open_gap_score: -0.100000
1461  query_internal_extend_gap_score: -0.100000
1462  query_left_open_gap_score: -0.100000
1463  query_left_extend_gap_score: -0.100000
1464  query_right_open_gap_score: -0.100000
1465  query_right_extend_gap_score: -0.100000
1466  mode: local
1467""",
1468        )
1469        score = aligner.score(seq1, seq2)
1470        self.assertAlmostEqual(score, 1.9)
1471        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1472        self.assertAlmostEqual(score, 1.9)
1473        alignments = aligner.align(seq1, seq2)
1474        self.assertEqual(len(alignments), 3)
1475        alignment = alignments[0]
1476        self.assertAlmostEqual(alignment.score, 1.9)
1477        self.assertEqual(
1478            str(alignment),
1479            """\
1480G-AAT
1481|-..|
1482GTCCT
1483""",
1484        )
1485        self.assertEqual(alignment.shape, (2, 5))
1486        self.assertEqual(alignment.aligned, (((0, 1), (1, 4)), ((0, 1), (2, 5))))
1487        alignment = alignments[1]
1488        self.assertAlmostEqual(alignment.score, 1.9)
1489        self.assertEqual(
1490            str(alignment),
1491            """\
1492GA-AT
1493|.-.|
1494GTCCT
1495""",
1496        )
1497        self.assertEqual(alignment.shape, (2, 5))
1498        self.assertEqual(alignment.aligned, (((0, 2), (2, 4)), ((0, 2), (3, 5))))
1499        alignment = alignments[2]
1500        self.assertAlmostEqual(alignment.score, 1.9)
1501        self.assertEqual(
1502            str(alignment),
1503            """\
1504GAA-T
1505|..-|
1506GTCCT
1507""",
1508        )
1509        self.assertEqual(alignment.shape, (2, 5))
1510        self.assertEqual(alignment.aligned, (((0, 3), (3, 4)), ((0, 3), (4, 5))))
1511        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1512        self.assertEqual(len(alignments), 3)
1513        alignment = alignments[0]
1514        self.assertAlmostEqual(alignment.score, 1.9)
1515        self.assertEqual(
1516            str(alignment),
1517            """\
1518G-AAT
1519|-..|
1520GTCCT
1521""",
1522        )
1523        self.assertEqual(alignment.shape, (2, 5))
1524        self.assertEqual(alignment.aligned, (((0, 1), (1, 4)), ((5, 4), (3, 0))))
1525        alignment = alignments[1]
1526        self.assertAlmostEqual(alignment.score, 1.9)
1527        self.assertEqual(
1528            str(alignment),
1529            """\
1530GA-AT
1531|.-.|
1532GTCCT
1533""",
1534        )
1535        self.assertEqual(alignment.shape, (2, 5))
1536        self.assertEqual(alignment.aligned, (((0, 2), (2, 4)), ((5, 3), (2, 0))))
1537        alignment = alignments[2]
1538        self.assertAlmostEqual(alignment.score, 1.9)
1539        self.assertEqual(
1540            str(alignment),
1541            """\
1542GAA-T
1543|..-|
1544GTCCT
1545""",
1546        )
1547        self.assertEqual(alignment.shape, (2, 5))
1548        self.assertEqual(alignment.aligned, (((0, 3), (3, 4)), ((5, 2), (1, 0))))
1549
1550
1551class TestPairwiseMatchDictionary(unittest.TestCase):
1552
1553    match_dict = {("A", "A"): 1.5, ("A", "T"): 0.5, ("T", "A"): 0.5, ("T", "T"): 1.0}
1554
1555    def test_match_dictionary1(self):
1556        try:
1557            from Bio.Align import substitution_matrices
1558        except ImportError:
1559            return
1560        substitution_matrix = substitution_matrices.Array(data=self.match_dict)
1561        seq1 = "ATAT"
1562        seq2 = "ATT"
1563        aligner = Align.PairwiseAligner()
1564        aligner.mode = "local"
1565        aligner.substitution_matrix = substitution_matrix
1566        aligner.open_gap_score = -0.5
1567        aligner.extend_gap_score = 0.0
1568        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
1569        lines = str(aligner).splitlines()
1570        self.assertEqual(len(lines), 15)
1571        self.assertEqual(lines[0], "Pairwise sequence aligner with parameters")
1572        line = lines[1]
1573        prefix = "  substitution_matrix: <Array object at "
1574        suffix = ">"
1575        self.assertTrue(line.startswith(prefix))
1576        self.assertTrue(line.endswith(suffix))
1577        address = int(line[len(prefix) : -len(suffix)], 16)
1578        self.assertEqual(lines[2], "  target_internal_open_gap_score: -0.500000")
1579        self.assertEqual(lines[3], "  target_internal_extend_gap_score: 0.000000")
1580        self.assertEqual(lines[4], "  target_left_open_gap_score: -0.500000")
1581        self.assertEqual(lines[5], "  target_left_extend_gap_score: 0.000000")
1582        self.assertEqual(lines[6], "  target_right_open_gap_score: -0.500000")
1583        self.assertEqual(lines[7], "  target_right_extend_gap_score: 0.000000")
1584        self.assertEqual(lines[8], "  query_internal_open_gap_score: -0.500000")
1585        self.assertEqual(lines[9], "  query_internal_extend_gap_score: 0.000000")
1586        self.assertEqual(lines[10], "  query_left_open_gap_score: -0.500000")
1587        self.assertEqual(lines[11], "  query_left_extend_gap_score: 0.000000")
1588        self.assertEqual(lines[12], "  query_right_open_gap_score: -0.500000")
1589        self.assertEqual(lines[13], "  query_right_extend_gap_score: 0.000000")
1590        self.assertEqual(lines[14], "  mode: local")
1591        score = aligner.score(seq1, seq2)
1592        self.assertAlmostEqual(score, 3.0)
1593        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1594        self.assertAlmostEqual(score, 3.0)
1595        alignments = aligner.align(seq1, seq2)
1596        self.assertEqual(len(alignments), 2)
1597        alignment = alignments[0]
1598        self.assertAlmostEqual(alignment.score, 3.0)
1599        self.assertEqual(
1600            str(alignment),
1601            """\
1602ATAT
1603||.
1604ATT
1605""",
1606        )
1607        self.assertEqual(alignment.shape, (2, 3))
1608        self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),)))
1609        alignment = alignments[1]
1610        self.assertAlmostEqual(alignment.score, 3.0)
1611        self.assertEqual(
1612            str(alignment),
1613            """\
1614ATAT
1615||-|
1616AT-T
1617""",
1618        )
1619        self.assertEqual(alignment.shape, (2, 4))
1620        self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((0, 2), (2, 3))))
1621        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1622        self.assertEqual(len(alignments), 2)
1623        alignment = alignments[0]
1624        self.assertAlmostEqual(alignment.score, 3.0)
1625        self.assertEqual(
1626            str(alignment),
1627            """\
1628ATAT
1629||.
1630ATT
1631""",
1632        )
1633        self.assertEqual(alignment.shape, (2, 3))
1634        self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),)))
1635        alignment = alignments[1]
1636        self.assertAlmostEqual(alignment.score, 3.0)
1637        self.assertEqual(
1638            str(alignment),
1639            """\
1640ATAT
1641||-|
1642AT-T
1643""",
1644        )
1645        self.assertEqual(alignment.shape, (2, 4))
1646        self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((3, 1), (1, 0))))
1647
1648    def test_match_dictionary2(self):
1649        try:
1650            from Bio.Align import substitution_matrices
1651        except ImportError:
1652            return
1653        substitution_matrix = substitution_matrices.Array(data=self.match_dict)
1654        seq1 = "ATAT"
1655        seq2 = "ATT"
1656        aligner = Align.PairwiseAligner()
1657        aligner.mode = "local"
1658        aligner.substitution_matrix = substitution_matrix
1659        aligner.open_gap_score = -1.0
1660        aligner.extend_gap_score = 0.0
1661        lines = str(aligner).splitlines()
1662        self.assertEqual(len(lines), 15)
1663        self.assertEqual(lines[0], "Pairwise sequence aligner with parameters")
1664        line = lines[1]
1665        prefix = "  substitution_matrix: <Array object at "
1666        suffix = ">"
1667        self.assertTrue(line.startswith(prefix))
1668        self.assertTrue(line.endswith(suffix))
1669        address = int(line[len(prefix) : -len(suffix)], 16)
1670        self.assertEqual(lines[2], "  target_internal_open_gap_score: -1.000000")
1671        self.assertEqual(lines[3], "  target_internal_extend_gap_score: 0.000000")
1672        self.assertEqual(lines[4], "  target_left_open_gap_score: -1.000000")
1673        self.assertEqual(lines[5], "  target_left_extend_gap_score: 0.000000")
1674        self.assertEqual(lines[6], "  target_right_open_gap_score: -1.000000")
1675        self.assertEqual(lines[7], "  target_right_extend_gap_score: 0.000000")
1676        self.assertEqual(lines[8], "  query_internal_open_gap_score: -1.000000")
1677        self.assertEqual(lines[9], "  query_internal_extend_gap_score: 0.000000")
1678        self.assertEqual(lines[10], "  query_left_open_gap_score: -1.000000")
1679        self.assertEqual(lines[11], "  query_left_extend_gap_score: 0.000000")
1680        self.assertEqual(lines[12], "  query_right_open_gap_score: -1.000000")
1681        self.assertEqual(lines[13], "  query_right_extend_gap_score: 0.000000")
1682        self.assertEqual(lines[14], "  mode: local")
1683        score = aligner.score(seq1, seq2)
1684        self.assertAlmostEqual(score, 3.0)
1685        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1686        self.assertAlmostEqual(score, 3.0)
1687        alignments = aligner.align(seq1, seq2)
1688        self.assertEqual(len(alignments), 1)
1689        alignment = alignments[0]
1690        self.assertAlmostEqual(alignment.score, 3.0)
1691        self.assertEqual(
1692            str(alignment),
1693            """\
1694ATAT
1695||.
1696ATT
1697""",
1698        )
1699        self.assertEqual(alignment.shape, (2, 3))
1700        self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),)))
1701        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1702        self.assertEqual(len(alignments), 1)
1703        alignment = alignments[0]
1704        self.assertAlmostEqual(alignment.score, 3.0)
1705        self.assertEqual(
1706            str(alignment),
1707            """\
1708ATAT
1709||.
1710ATT
1711""",
1712        )
1713        self.assertEqual(alignment.shape, (2, 3))
1714        self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),)))
1715
1716    def test_match_dictionary3(self):
1717        try:
1718            from Bio.Align import substitution_matrices
1719        except ImportError:
1720            return
1721        substitution_matrix = substitution_matrices.Array(data=self.match_dict)
1722        seq1 = "ATT"
1723        seq2 = "ATAT"
1724        aligner = Align.PairwiseAligner()
1725        aligner.mode = "local"
1726        aligner.substitution_matrix = substitution_matrix
1727        aligner.open_gap_score = -1.0
1728        aligner.extend_gap_score = 0.0
1729        lines = str(aligner).splitlines()
1730        self.assertEqual(len(lines), 15)
1731        self.assertEqual(lines[0], "Pairwise sequence aligner with parameters")
1732        line = lines[1]
1733        prefix = "  substitution_matrix: <Array object at "
1734        suffix = ">"
1735        self.assertTrue(line.startswith(prefix))
1736        self.assertTrue(line.endswith(suffix))
1737        address = int(line[len(prefix) : -len(suffix)], 16)
1738        self.assertEqual(lines[2], "  target_internal_open_gap_score: -1.000000")
1739        self.assertEqual(lines[3], "  target_internal_extend_gap_score: 0.000000")
1740        self.assertEqual(lines[4], "  target_left_open_gap_score: -1.000000")
1741        self.assertEqual(lines[5], "  target_left_extend_gap_score: 0.000000")
1742        self.assertEqual(lines[6], "  target_right_open_gap_score: -1.000000")
1743        self.assertEqual(lines[7], "  target_right_extend_gap_score: 0.000000")
1744        self.assertEqual(lines[8], "  query_internal_open_gap_score: -1.000000")
1745        self.assertEqual(lines[9], "  query_internal_extend_gap_score: 0.000000")
1746        self.assertEqual(lines[10], "  query_left_open_gap_score: -1.000000")
1747        self.assertEqual(lines[11], "  query_left_extend_gap_score: 0.000000")
1748        self.assertEqual(lines[12], "  query_right_open_gap_score: -1.000000")
1749        self.assertEqual(lines[13], "  query_right_extend_gap_score: 0.000000")
1750        self.assertEqual(lines[14], "  mode: local")
1751        score = aligner.score(seq1, seq2)
1752        self.assertAlmostEqual(score, 3.0)
1753        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1754        self.assertAlmostEqual(score, 3.0)
1755        alignments = aligner.align(seq1, seq2)
1756        self.assertEqual(len(alignments), 1)
1757        alignment = alignments[0]
1758        self.assertAlmostEqual(alignment.score, 3.0)
1759        self.assertEqual(
1760            str(alignment),
1761            """\
1762ATT
1763||.
1764ATAT
1765""",
1766        )
1767        self.assertEqual(alignment.shape, (2, 3))
1768        self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),)))
1769        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1770        self.assertEqual(len(alignments), 1)
1771        alignment = alignments[0]
1772        self.assertAlmostEqual(alignment.score, 3.0)
1773        self.assertEqual(
1774            str(alignment),
1775            """\
1776ATT
1777||.
1778ATAT
1779""",
1780        )
1781        self.assertEqual(alignment.shape, (2, 3))
1782        self.assertEqual(alignment.aligned, (((0, 3),), ((4, 1),)))
1783
1784    def test_match_dictionary4(self):
1785        try:
1786            from Bio.Align import substitution_matrices
1787        except ImportError:
1788            return
1789        substitution_matrix = substitution_matrices.Array(alphabet="AT", dims=2)
1790        self.assertEqual(substitution_matrix.shape, (2, 2))
1791        substitution_matrix.update(self.match_dict)
1792        seq1 = "ATAT"
1793        seq2 = "ATT"
1794        aligner = Align.PairwiseAligner()
1795        aligner.mode = "local"
1796        aligner.substitution_matrix = substitution_matrix
1797        aligner.open_gap_score = -0.5
1798        aligner.extend_gap_score = 0.0
1799        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
1800        lines = str(aligner).splitlines()
1801        self.assertEqual(len(lines), 15)
1802        self.assertEqual(lines[0], "Pairwise sequence aligner with parameters")
1803        line = lines[1]
1804        prefix = "  substitution_matrix: <Array object at "
1805        suffix = ">"
1806        self.assertTrue(line.startswith(prefix))
1807        self.assertTrue(line.endswith(suffix))
1808        address = int(line[len(prefix) : -len(suffix)], 16)
1809        self.assertEqual(lines[2], "  target_internal_open_gap_score: -0.500000")
1810        self.assertEqual(lines[3], "  target_internal_extend_gap_score: 0.000000")
1811        self.assertEqual(lines[4], "  target_left_open_gap_score: -0.500000")
1812        self.assertEqual(lines[5], "  target_left_extend_gap_score: 0.000000")
1813        self.assertEqual(lines[6], "  target_right_open_gap_score: -0.500000")
1814        self.assertEqual(lines[7], "  target_right_extend_gap_score: 0.000000")
1815        self.assertEqual(lines[8], "  query_internal_open_gap_score: -0.500000")
1816        self.assertEqual(lines[9], "  query_internal_extend_gap_score: 0.000000")
1817        self.assertEqual(lines[10], "  query_left_open_gap_score: -0.500000")
1818        self.assertEqual(lines[11], "  query_left_extend_gap_score: 0.000000")
1819        self.assertEqual(lines[12], "  query_right_open_gap_score: -0.500000")
1820        self.assertEqual(lines[13], "  query_right_extend_gap_score: 0.000000")
1821        self.assertEqual(lines[14], "  mode: local")
1822        score = aligner.score(seq1, seq2)
1823        self.assertAlmostEqual(score, 3.0)
1824        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1825        self.assertAlmostEqual(score, 3.0)
1826        alignments = aligner.align(seq1, seq2)
1827        self.assertEqual(len(alignments), 2)
1828        alignment = alignments[0]
1829        self.assertAlmostEqual(alignment.score, 3.0)
1830        self.assertEqual(
1831            str(alignment),
1832            """\
1833ATAT
1834||.
1835ATT
1836""",
1837        )
1838        self.assertEqual(alignment.shape, (2, 3))
1839        self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),)))
1840        alignment = alignments[1]
1841        self.assertAlmostEqual(alignment.score, 3.0)
1842        self.assertEqual(
1843            str(alignment),
1844            """\
1845ATAT
1846||-|
1847AT-T
1848""",
1849        )
1850        self.assertEqual(alignment.shape, (2, 4))
1851        self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((0, 2), (2, 3))))
1852        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1853        self.assertEqual(len(alignments), 2)
1854        alignment = alignments[0]
1855        self.assertAlmostEqual(alignment.score, 3.0)
1856        self.assertEqual(
1857            str(alignment),
1858            """\
1859ATAT
1860||.
1861ATT
1862""",
1863        )
1864        self.assertEqual(alignment.shape, (2, 3))
1865        self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),)))
1866        alignment = alignments[1]
1867        self.assertAlmostEqual(alignment.score, 3.0)
1868        self.assertEqual(
1869            str(alignment),
1870            """\
1871ATAT
1872||-|
1873AT-T
1874""",
1875        )
1876        self.assertEqual(alignment.shape, (2, 4))
1877        self.assertEqual(alignment.aligned, (((0, 2), (3, 4)), ((3, 1), (1, 0))))
1878
1879    def test_match_dictionary5(self):
1880        try:
1881            from Bio.Align import substitution_matrices
1882        except ImportError:
1883            return
1884        substitution_matrix = substitution_matrices.Array(alphabet="AT", dims=2)
1885        self.assertEqual(substitution_matrix.shape, (2, 2))
1886        substitution_matrix.update(self.match_dict)
1887        seq1 = "ATAT"
1888        seq2 = "ATT"
1889        aligner = Align.PairwiseAligner()
1890        aligner.mode = "local"
1891        aligner.substitution_matrix = substitution_matrix
1892        aligner.open_gap_score = -1.0
1893        aligner.extend_gap_score = 0.0
1894        lines = str(aligner).splitlines()
1895        self.assertEqual(len(lines), 15)
1896        self.assertEqual(lines[0], "Pairwise sequence aligner with parameters")
1897        line = lines[1]
1898        prefix = "  substitution_matrix: <Array object at "
1899        suffix = ">"
1900        self.assertTrue(line.startswith(prefix))
1901        self.assertTrue(line.endswith(suffix))
1902        address = int(line[len(prefix) : -len(suffix)], 16)
1903        self.assertEqual(lines[2], "  target_internal_open_gap_score: -1.000000")
1904        self.assertEqual(lines[3], "  target_internal_extend_gap_score: 0.000000")
1905        self.assertEqual(lines[4], "  target_left_open_gap_score: -1.000000")
1906        self.assertEqual(lines[5], "  target_left_extend_gap_score: 0.000000")
1907        self.assertEqual(lines[6], "  target_right_open_gap_score: -1.000000")
1908        self.assertEqual(lines[7], "  target_right_extend_gap_score: 0.000000")
1909        self.assertEqual(lines[8], "  query_internal_open_gap_score: -1.000000")
1910        self.assertEqual(lines[9], "  query_internal_extend_gap_score: 0.000000")
1911        self.assertEqual(lines[10], "  query_left_open_gap_score: -1.000000")
1912        self.assertEqual(lines[11], "  query_left_extend_gap_score: 0.000000")
1913        self.assertEqual(lines[12], "  query_right_open_gap_score: -1.000000")
1914        self.assertEqual(lines[13], "  query_right_extend_gap_score: 0.000000")
1915        self.assertEqual(lines[14], "  mode: local")
1916        score = aligner.score(seq1, seq2)
1917        self.assertAlmostEqual(score, 3.0)
1918        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1919        self.assertAlmostEqual(score, 3.0)
1920        alignments = aligner.align(seq1, seq2)
1921        self.assertEqual(len(alignments), 1)
1922        alignment = alignments[0]
1923        self.assertAlmostEqual(alignment.score, 3.0)
1924        self.assertEqual(
1925            str(alignment),
1926            """\
1927ATAT
1928||.
1929ATT
1930""",
1931        )
1932        self.assertEqual(alignment.shape, (2, 3))
1933        self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),)))
1934        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
1935        self.assertEqual(len(alignments), 1)
1936        alignment = alignments[0]
1937        self.assertAlmostEqual(alignment.score, 3.0)
1938        self.assertEqual(
1939            str(alignment),
1940            """\
1941ATAT
1942||.
1943ATT
1944""",
1945        )
1946        self.assertEqual(alignment.shape, (2, 3))
1947        self.assertEqual(alignment.aligned, (((0, 3),), ((3, 0),)))
1948
1949    def test_match_dictionary6(self):
1950        try:
1951            from Bio.Align import substitution_matrices
1952        except ImportError:
1953            return
1954        substitution_matrix = substitution_matrices.Array(alphabet="AT", dims=2)
1955        self.assertEqual(substitution_matrix.shape, (2, 2))
1956        substitution_matrix.update(self.match_dict)
1957        seq1 = "ATT"
1958        seq2 = "ATAT"
1959        aligner = Align.PairwiseAligner()
1960        aligner.mode = "local"
1961        aligner.substitution_matrix = substitution_matrix
1962        aligner.open_gap_score = -1.0
1963        aligner.extend_gap_score = 0.0
1964        lines = str(aligner).splitlines()
1965        self.assertEqual(len(lines), 15)
1966        self.assertEqual(lines[0], "Pairwise sequence aligner with parameters")
1967        line = lines[1]
1968        prefix = "  substitution_matrix: <Array object at "
1969        suffix = ">"
1970        self.assertTrue(line.startswith(prefix))
1971        self.assertTrue(line.endswith(suffix))
1972        address = int(line[len(prefix) : -len(suffix)], 16)
1973        self.assertEqual(lines[2], "  target_internal_open_gap_score: -1.000000")
1974        self.assertEqual(lines[3], "  target_internal_extend_gap_score: 0.000000")
1975        self.assertEqual(lines[4], "  target_left_open_gap_score: -1.000000")
1976        self.assertEqual(lines[5], "  target_left_extend_gap_score: 0.000000")
1977        self.assertEqual(lines[6], "  target_right_open_gap_score: -1.000000")
1978        self.assertEqual(lines[7], "  target_right_extend_gap_score: 0.000000")
1979        self.assertEqual(lines[8], "  query_internal_open_gap_score: -1.000000")
1980        self.assertEqual(lines[9], "  query_internal_extend_gap_score: 0.000000")
1981        self.assertEqual(lines[10], "  query_left_open_gap_score: -1.000000")
1982        self.assertEqual(lines[11], "  query_left_extend_gap_score: 0.000000")
1983        self.assertEqual(lines[12], "  query_right_open_gap_score: -1.000000")
1984        self.assertEqual(lines[13], "  query_right_extend_gap_score: 0.000000")
1985        self.assertEqual(lines[14], "  mode: local")
1986        score = aligner.score(seq1, seq2)
1987        self.assertAlmostEqual(score, 3.0)
1988        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
1989        self.assertAlmostEqual(score, 3.0)
1990        alignments = aligner.align(seq1, seq2)
1991        self.assertEqual(len(alignments), 1)
1992        alignment = alignments[0]
1993        self.assertAlmostEqual(alignment.score, 3.0)
1994        self.assertEqual(
1995            str(alignment),
1996            """\
1997ATT
1998||.
1999ATAT
2000""",
2001        )
2002        self.assertEqual(alignment.shape, (2, 3))
2003        self.assertEqual(alignment.aligned, (((0, 3),), ((0, 3),)))
2004        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2005        self.assertEqual(len(alignments), 1)
2006        alignment = alignments[0]
2007        self.assertAlmostEqual(alignment.score, 3.0)
2008        self.assertEqual(
2009            str(alignment),
2010            """\
2011ATT
2012||.
2013ATAT
2014""",
2015        )
2016        self.assertEqual(alignment.shape, (2, 3))
2017        self.assertEqual(alignment.aligned, (((0, 3),), ((4, 1),)))
2018
2019
2020class TestPairwiseOneCharacter(unittest.TestCase):
2021    def test_align_one_char1(self):
2022        aligner = Align.PairwiseAligner()
2023        aligner.mode = "local"
2024        aligner.open_gap_score = -0.3
2025        aligner.extend_gap_score = -0.1
2026        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
2027        self.assertEqual(
2028            str(aligner),
2029            """\
2030Pairwise sequence aligner with parameters
2031  wildcard: None
2032  match_score: 1.000000
2033  mismatch_score: 0.000000
2034  target_internal_open_gap_score: -0.300000
2035  target_internal_extend_gap_score: -0.100000
2036  target_left_open_gap_score: -0.300000
2037  target_left_extend_gap_score: -0.100000
2038  target_right_open_gap_score: -0.300000
2039  target_right_extend_gap_score: -0.100000
2040  query_internal_open_gap_score: -0.300000
2041  query_internal_extend_gap_score: -0.100000
2042  query_left_open_gap_score: -0.300000
2043  query_left_extend_gap_score: -0.100000
2044  query_right_open_gap_score: -0.300000
2045  query_right_extend_gap_score: -0.100000
2046  mode: local
2047""",
2048        )
2049        score = aligner.score("abcde", "c")
2050        self.assertAlmostEqual(score, 1)
2051        alignments = aligner.align("abcde", "c")
2052        self.assertEqual(len(alignments), 1)
2053        alignment = alignments[0]
2054        self.assertAlmostEqual(alignment.score, 1)
2055        self.assertEqual(
2056            str(alignment),
2057            """\
2058abcde
2059  |
2060  c
2061""",
2062        )
2063        self.assertEqual(alignment.shape, (2, 1))
2064        self.assertEqual(alignment.aligned, (((2, 3),), ((0, 1),)))
2065
2066    def test_align_one_char2(self):
2067        aligner = Align.PairwiseAligner()
2068        aligner.mode = "local"
2069        aligner.open_gap_score = -0.3
2070        aligner.extend_gap_score = -0.1
2071        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
2072        self.assertEqual(
2073            str(aligner),
2074            """\
2075Pairwise sequence aligner with parameters
2076  wildcard: None
2077  match_score: 1.000000
2078  mismatch_score: 0.000000
2079  target_internal_open_gap_score: -0.300000
2080  target_internal_extend_gap_score: -0.100000
2081  target_left_open_gap_score: -0.300000
2082  target_left_extend_gap_score: -0.100000
2083  target_right_open_gap_score: -0.300000
2084  target_right_extend_gap_score: -0.100000
2085  query_internal_open_gap_score: -0.300000
2086  query_internal_extend_gap_score: -0.100000
2087  query_left_open_gap_score: -0.300000
2088  query_left_extend_gap_score: -0.100000
2089  query_right_open_gap_score: -0.300000
2090  query_right_extend_gap_score: -0.100000
2091  mode: local
2092""",
2093        )
2094        score = aligner.score("abcce", "c")
2095        self.assertAlmostEqual(score, 1)
2096        alignments = aligner.align("abcce", "c")
2097        self.assertEqual(len(alignments), 2)
2098        alignment = alignments[0]
2099        self.assertAlmostEqual(alignment.score, 1)
2100        self.assertEqual(
2101            str(alignment),
2102            """\
2103abcce
2104  |
2105  c
2106""",
2107        )
2108        self.assertEqual(alignment.shape, (2, 1))
2109        self.assertEqual(alignment.aligned, (((2, 3),), ((0, 1),)))
2110        alignment = alignments[1]
2111        self.assertAlmostEqual(alignment.score, 1)
2112        self.assertEqual(
2113            str(alignment),
2114            """\
2115abcce
2116   |
2117   c
2118""",
2119        )
2120        self.assertEqual(alignment.shape, (2, 1))
2121        self.assertEqual(alignment.aligned, (((3, 4),), ((0, 1),)))
2122
2123    def test_align_one_char3(self):
2124        aligner = Align.PairwiseAligner()
2125        aligner.mode = "global"
2126        aligner.open_gap_score = -0.3
2127        aligner.extend_gap_score = -0.1
2128        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
2129        self.assertEqual(
2130            str(aligner),
2131            """\
2132Pairwise sequence aligner with parameters
2133  wildcard: None
2134  match_score: 1.000000
2135  mismatch_score: 0.000000
2136  target_internal_open_gap_score: -0.300000
2137  target_internal_extend_gap_score: -0.100000
2138  target_left_open_gap_score: -0.300000
2139  target_left_extend_gap_score: -0.100000
2140  target_right_open_gap_score: -0.300000
2141  target_right_extend_gap_score: -0.100000
2142  query_internal_open_gap_score: -0.300000
2143  query_internal_extend_gap_score: -0.100000
2144  query_left_open_gap_score: -0.300000
2145  query_left_extend_gap_score: -0.100000
2146  query_right_open_gap_score: -0.300000
2147  query_right_extend_gap_score: -0.100000
2148  mode: global
2149""",
2150        )
2151        seq1 = "abcde"
2152        seq2 = "c"
2153        score = aligner.score(seq1, seq2)
2154        self.assertAlmostEqual(score, 0.2)
2155        alignments = aligner.align(seq1, seq2)
2156        self.assertEqual(len(alignments), 1)
2157        alignment = alignments[0]
2158        self.assertAlmostEqual(alignment.score, 0.2)
2159        self.assertEqual(
2160            str(alignment),
2161            """\
2162abcde
2163--|--
2164--c--
2165""",
2166        )
2167        self.assertEqual(alignment.shape, (2, 5))
2168        self.assertEqual(alignment.aligned, (((2, 3),), ((0, 1),)))
2169
2170    def test_align_one_char_score3(self):
2171        aligner = Align.PairwiseAligner()
2172        aligner.mode = "global"
2173        aligner.open_gap_score = -0.3
2174        aligner.extend_gap_score = -0.1
2175        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
2176        self.assertEqual(
2177            str(aligner),
2178            """\
2179Pairwise sequence aligner with parameters
2180  wildcard: None
2181  match_score: 1.000000
2182  mismatch_score: 0.000000
2183  target_internal_open_gap_score: -0.300000
2184  target_internal_extend_gap_score: -0.100000
2185  target_left_open_gap_score: -0.300000
2186  target_left_extend_gap_score: -0.100000
2187  target_right_open_gap_score: -0.300000
2188  target_right_extend_gap_score: -0.100000
2189  query_internal_open_gap_score: -0.300000
2190  query_internal_extend_gap_score: -0.100000
2191  query_left_open_gap_score: -0.300000
2192  query_left_extend_gap_score: -0.100000
2193  query_right_open_gap_score: -0.300000
2194  query_right_extend_gap_score: -0.100000
2195  mode: global
2196""",
2197        )
2198        score = aligner.score("abcde", "c")
2199        self.assertAlmostEqual(score, 0.2)
2200
2201
2202class TestPerSiteGapPenalties(unittest.TestCase):
2203    """Check gap penalty callbacks use correct gap opening position.
2204
2205    This tests that the gap penalty callbacks are really being used
2206    with the correct gap opening position.
2207    """
2208
2209    def test_gap_here_only_1(self):
2210        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
2211        seq2 = "AABBBAAAACCCCAAAABBBAA"
2212        breaks = [0, 11, len(seq2)]
2213
2214        # Very expensive to open a gap in seq1:
2215        def nogaps(x, y):
2216            return -2000 - y
2217
2218        # Very expensive to open a gap in seq2 unless it is in one of the allowed positions:
2219        def specificgaps(x, y):
2220            if x in breaks:
2221                return -2 - y
2222            else:
2223                return -2000 - y
2224
2225        aligner = Align.PairwiseAligner()
2226        aligner.mode = "global"
2227        aligner.match_score = 1
2228        aligner.mismatch_score = -1
2229        aligner.target_gap_score = nogaps
2230        aligner.query_gap_score = specificgaps
2231        self.assertEqual(
2232            str(aligner),
2233            """\
2234Pairwise sequence aligner with parameters
2235  wildcard: None
2236  match_score: 1.000000
2237  mismatch_score: -1.000000
2238  target_gap_function: %s
2239  query_gap_function: %s
2240  mode: global
2241"""
2242            % (nogaps, specificgaps),
2243        )
2244        self.assertEqual(
2245            aligner.algorithm, "Waterman-Smith-Beyer global alignment algorithm"
2246        )
2247        score = aligner.score(seq1, seq2)
2248        self.assertAlmostEqual(score, 2)
2249        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2250        self.assertAlmostEqual(score, 2)
2251        alignments = aligner.align(seq1, seq2)
2252        self.assertEqual(len(alignments), 1)
2253        alignment = alignments[0]
2254        self.assertAlmostEqual(alignment.score, 2)
2255        self.assertEqual(
2256            str(alignment),
2257            """\
2258AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2259--|||||||||||----------|||||||||||--
2260--AABBBAAAACC----------CCAAAABBBAA--
2261""",
2262        )
2263        self.assertEqual(alignment.shape, (2, 36))
2264        self.assertEqual(alignment.aligned, (((2, 13), (23, 34)), ((0, 11), (11, 22))))
2265        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2266        self.assertEqual(len(alignments), 1)
2267        alignment = alignments[0]
2268        self.assertAlmostEqual(alignment.score, 2)
2269        self.assertEqual(
2270            str(alignment),
2271            """\
2272AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2273--|||||||||||----------|||||||||||--
2274--AABBBAAAACC----------CCAAAABBBAA--
2275""",
2276        )
2277        self.assertEqual(alignment.shape, (2, 36))
2278        self.assertEqual(alignment.aligned, (((2, 13), (23, 34)), ((22, 11), (11, 0))))
2279
2280    def test_gap_here_only_2(self):
2281        # Force a bad alignment.
2282        #
2283        # Forces a bad alignment by having a very expensive gap penalty
2284        # where one would normally expect a gap, and a cheap gap penalty
2285        # in another place.
2286        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
2287        seq2 = "AABBBAAAACCCCAAAABBBAA"
2288        breaks = [0, 3, len(seq2)]
2289
2290        # Very expensive to open a gap in seq1:
2291        def nogaps(x, y):
2292            return -2000 - y
2293
2294        # Very expensive to open a gap in seq2 unless it is in one of the allowed positions:
2295        def specificgaps(x, y):
2296            if x in breaks:
2297                return -2 - y
2298            else:
2299                return -2000 - y
2300
2301        aligner = Align.PairwiseAligner()
2302        aligner.mode = "global"
2303        aligner.match_score = 1
2304        aligner.mismatch_score = -1
2305        aligner.target_gap_score = nogaps
2306        aligner.query_gap_score = specificgaps
2307        self.assertEqual(
2308            str(aligner),
2309            """\
2310Pairwise sequence aligner with parameters
2311  wildcard: None
2312  match_score: 1.000000
2313  mismatch_score: -1.000000
2314  target_gap_function: %s
2315  query_gap_function: %s
2316  mode: global
2317"""
2318            % (nogaps, specificgaps),
2319        )
2320        self.assertEqual(
2321            aligner.algorithm, "Waterman-Smith-Beyer global alignment algorithm"
2322        )
2323        score = aligner.score(seq1, seq2)
2324        self.assertAlmostEqual(score, -10)
2325        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2326        self.assertAlmostEqual(score, -10)
2327        alignments = aligner.align(seq1, seq2)
2328        self.assertEqual(len(alignments), 2)
2329        alignment = alignments[0]
2330        self.assertAlmostEqual(alignment.score, -10)
2331        self.assertEqual(
2332            str(alignment),
2333            """\
2334AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2335--|||----------......|||||||||||||--
2336--AAB----------BBAAAACCCCAAAABBBAA--
2337""",
2338        )
2339        self.assertEqual(alignment.shape, (2, 36))
2340        self.assertEqual(alignment.aligned, (((2, 5), (15, 34)), ((0, 3), (3, 22))))
2341        alignment = alignments[1]
2342        self.assertAlmostEqual(alignment.score, -10)
2343        self.assertEqual(
2344            str(alignment),
2345            """\
2346AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2347||.------------......|||||||||||||--
2348AAB------------BBAAAACCCCAAAABBBAA--
2349""",
2350        )
2351        self.assertEqual(alignment.shape, (2, 36))
2352        self.assertEqual(alignment.aligned, (((0, 3), (15, 34)), ((0, 3), (3, 22))))
2353        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2354        self.assertEqual(len(alignments), 2)
2355        alignment = alignments[0]
2356        self.assertAlmostEqual(alignment.score, -10)
2357        self.assertEqual(
2358            str(alignment),
2359            """\
2360AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2361--|||||||||||||......------------.||
2362--AABBBAAAACCCCAAAABB------------BAA
2363""",
2364        )
2365        self.assertEqual(alignment.shape, (2, 36))
2366        self.assertEqual(alignment.aligned, (((2, 21), (33, 36)), ((22, 3), (3, 0))))
2367        alignment = alignments[1]
2368        self.assertAlmostEqual(alignment.score, -10)
2369        self.assertEqual(
2370            str(alignment),
2371            """\
2372AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2373--|||||||||||||......----------|||--
2374--AABBBAAAACCCCAAAABB----------BAA--
2375""",
2376        )
2377        self.assertEqual(alignment.shape, (2, 36))
2378        self.assertEqual(alignment.aligned, (((2, 21), (31, 34)), ((22, 3), (3, 0))))
2379
2380    def test_gap_here_only_3(self):
2381        # Check if gap open and gap extend penalties are handled correctly.
2382        seq1 = "TTCCAA"
2383        seq2 = "TTGGAA"
2384
2385        def gap_score(i, n):
2386            if i == 3:
2387                return -10
2388            if n == 1:
2389                return -1
2390            return -10
2391
2392        aligner = Align.PairwiseAligner()
2393        aligner.mode = "global"
2394        aligner.match_score = 1
2395        aligner.mismatch_score = -10
2396        aligner.target_gap_score = gap_score
2397        self.assertEqual(
2398            aligner.algorithm, "Waterman-Smith-Beyer global alignment algorithm"
2399        )
2400        self.assertEqual(
2401            str(aligner),
2402            """\
2403Pairwise sequence aligner with parameters
2404  wildcard: None
2405  match_score: 1.000000
2406  mismatch_score: -10.000000
2407  target_gap_function: %s
2408  query_internal_open_gap_score: 0.000000
2409  query_internal_extend_gap_score: 0.000000
2410  query_left_open_gap_score: 0.000000
2411  query_left_extend_gap_score: 0.000000
2412  query_right_open_gap_score: 0.000000
2413  query_right_extend_gap_score: 0.000000
2414  mode: global
2415"""
2416            % gap_score,
2417        )
2418        score = aligner.score(seq1, seq2)
2419        self.assertAlmostEqual(score, 2.0)
2420        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2421        self.assertAlmostEqual(score, 2.0)
2422        alignments = aligner.align(seq1, seq2)
2423        self.assertEqual(len(alignments), 1)
2424        alignment = alignments[0]
2425        self.assertAlmostEqual(alignment.score, 2.0)
2426        self.assertEqual(
2427            str(alignment),
2428            """\
2429TT-CC-AA
2430||----||
2431TTG--GAA
2432""",
2433        )
2434        self.assertEqual(alignment.shape, (2, 8))
2435        self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((0, 2), (4, 6))))
2436        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2437        self.assertEqual(len(alignments), 1)
2438        alignment = alignments[0]
2439        self.assertAlmostEqual(alignment.score, 2.0)
2440        self.assertEqual(
2441            str(alignment),
2442            """\
2443TT-CC-AA
2444||----||
2445TTG--GAA
2446""",
2447        )
2448        self.assertEqual(alignment.shape, (2, 8))
2449        self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((6, 4), (2, 0))))
2450        aligner.query_gap_score = gap_score
2451        self.assertEqual(
2452            str(aligner),
2453            """\
2454Pairwise sequence aligner with parameters
2455  wildcard: None
2456  match_score: 1.000000
2457  mismatch_score: -10.000000
2458  target_gap_function: %s
2459  query_gap_function: %s
2460  mode: global
2461"""
2462            % (gap_score, gap_score),
2463        )
2464        score = aligner.score(seq1, seq2)
2465        self.assertAlmostEqual(score, -8.0)
2466        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2467        self.assertAlmostEqual(score, -8.0)
2468        alignments = aligner.align(seq1, seq2)
2469        self.assertEqual(len(alignments), 4)
2470        alignment = alignments[0]
2471        self.assertAlmostEqual(alignment.score, -8.0)
2472        self.assertEqual(
2473            str(alignment),
2474            """\
2475TT-CCAA
2476||-.-||
2477TTGG-AA
2478""",
2479        )
2480        self.assertEqual(alignment.shape, (2, 7))
2481        self.assertEqual(
2482            alignment.aligned, (((0, 2), (2, 3), (4, 6)), ((0, 2), (3, 4), (4, 6)))
2483        )
2484        alignment = alignments[1]
2485        self.assertAlmostEqual(alignment.score, -8.0)
2486        self.assertEqual(
2487            str(alignment),
2488            """\
2489TTC--CAA
2490||----||
2491TT-GG-AA
2492""",
2493        )
2494        self.assertEqual(alignment.shape, (2, 8))
2495        self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((0, 2), (4, 6))))
2496        alignment = alignments[2]
2497        self.assertAlmostEqual(alignment.score, -8.0)
2498        self.assertEqual(
2499            str(alignment),
2500            """\
2501TTCC-AA
2502||-.-||
2503TT-GGAA
2504""",
2505        )
2506        self.assertEqual(alignment.shape, (2, 7))
2507        self.assertEqual(
2508            alignment.aligned, (((0, 2), (3, 4), (4, 6)), ((0, 2), (2, 3), (4, 6)))
2509        )
2510        alignment = alignments[3]
2511        self.assertAlmostEqual(alignment.score, -8.0)
2512        self.assertEqual(
2513            str(alignment),
2514            """\
2515TT-CC-AA
2516||----||
2517TTG--GAA
2518""",
2519        )
2520        self.assertEqual(alignment.shape, (2, 8))
2521        self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((0, 2), (4, 6))))
2522        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2523        self.assertEqual(len(alignments), 4)
2524        alignment = alignments[0]
2525        self.assertAlmostEqual(alignment.score, -8.0)
2526        self.assertEqual(
2527            str(alignment),
2528            """\
2529TT-CCAA
2530||-.-||
2531TTGG-AA
2532""",
2533        )
2534        self.assertEqual(alignment.shape, (2, 7))
2535        self.assertEqual(
2536            alignment.aligned, (((0, 2), (2, 3), (4, 6)), ((6, 4), (3, 2), (2, 0)))
2537        )
2538        alignment = alignments[1]
2539        self.assertAlmostEqual(alignment.score, -8.0)
2540        self.assertEqual(
2541            str(alignment),
2542            """\
2543TTC--CAA
2544||----||
2545TT-GG-AA
2546""",
2547        )
2548        self.assertEqual(alignment.shape, (2, 8))
2549        self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((6, 4), (2, 0))))
2550        alignment = alignments[2]
2551        self.assertAlmostEqual(alignment.score, -8.0)
2552        self.assertEqual(
2553            str(alignment),
2554            """\
2555TTCC-AA
2556||-.-||
2557TT-GGAA
2558""",
2559        )
2560        self.assertEqual(alignment.shape, (2, 7))
2561        self.assertEqual(
2562            alignment.aligned, (((0, 2), (3, 4), (4, 6)), ((6, 4), (4, 3), (2, 0)))
2563        )
2564        alignment = alignments[3]
2565        self.assertAlmostEqual(alignment.score, -8.0)
2566        self.assertEqual(
2567            str(alignment),
2568            """\
2569TT-CC-AA
2570||----||
2571TTG--GAA
2572""",
2573        )
2574        self.assertEqual(alignment.shape, (2, 8))
2575        self.assertEqual(alignment.aligned, (((0, 2), (4, 6)), ((6, 4), (2, 0))))
2576
2577    def test_gap_here_only_local_1(self):
2578        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
2579        seq2 = "AABBBAAAACCCCAAAABBBAA"
2580        breaks = [0, 11, len(seq2)]
2581
2582        # Very expensive to open a gap in seq1:
2583        def nogaps(x, y):
2584            return -2000 - y
2585
2586        # Very expensive to open a gap in seq2 unless it is in one of the allowed positions:
2587        def specificgaps(x, y):
2588            if x in breaks:
2589                return -2 - y
2590            else:
2591                return -2000 - y
2592
2593        aligner = Align.PairwiseAligner()
2594        aligner.mode = "local"
2595        aligner.match_score = 1
2596        aligner.mismatch_score = -1
2597        aligner.target_gap_score = nogaps
2598        aligner.query_gap_score = specificgaps
2599        self.assertEqual(
2600            aligner.algorithm, "Waterman-Smith-Beyer local alignment algorithm"
2601        )
2602        self.assertEqual(
2603            str(aligner),
2604            """\
2605Pairwise sequence aligner with parameters
2606  wildcard: None
2607  match_score: 1.000000
2608  mismatch_score: -1.000000
2609  target_gap_function: %s
2610  query_gap_function: %s
2611  mode: local
2612"""
2613            % (nogaps, specificgaps),
2614        )
2615        score = aligner.score(seq1, seq2)
2616        self.assertAlmostEqual(score, 13)
2617        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2618        self.assertAlmostEqual(score, 13)
2619        alignments = aligner.align(seq1, seq2)
2620        self.assertEqual(len(alignments), 2)
2621        alignment = alignments[0]
2622        self.assertAlmostEqual(alignment.score, 13)
2623        self.assertEqual(
2624            str(alignment),
2625            """\
2626AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2627  |||||||||||||
2628  AABBBAAAACCCCAAAABBBAA
2629""",
2630        )
2631        self.assertEqual(alignment.shape, (2, 13))
2632        self.assertEqual(alignment.aligned, (((2, 15),), ((0, 13),)))
2633        alignment = alignments[1]
2634        self.assertAlmostEqual(alignment.score, 13)
2635        self.assertEqual(
2636            str(alignment),
2637            """\
2638AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2639                     |||||||||||||
2640            AABBBAAAACCCCAAAABBBAA
2641""",
2642        )
2643        self.assertEqual(alignment.shape, (2, 13))
2644        self.assertEqual(alignment.aligned, (((21, 34),), ((9, 22),)))
2645        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2646        self.assertEqual(len(alignments), 2)
2647        alignment = alignments[0]
2648        self.assertAlmostEqual(alignment.score, 13)
2649        self.assertEqual(
2650            str(alignment),
2651            """\
2652AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2653  |||||||||||||
2654  AABBBAAAACCCCAAAABBBAA
2655""",
2656        )
2657        self.assertEqual(alignment.shape, (2, 13))
2658        self.assertEqual(alignment.aligned, (((2, 15),), ((22, 9),)))
2659        alignment = alignments[1]
2660        self.assertAlmostEqual(alignment.score, 13)
2661        self.assertEqual(
2662            str(alignment),
2663            """\
2664AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2665                     |||||||||||||
2666            AABBBAAAACCCCAAAABBBAA
2667""",
2668        )
2669        self.assertEqual(alignment.shape, (2, 13))
2670        self.assertEqual(alignment.aligned, (((21, 34),), ((13, 0),)))
2671
2672    def test_gap_here_only_local_2(self):
2673        # Force a bad alignment.
2674        #
2675        # Forces a bad alignment by having a very expensive gap penalty
2676        # where one would normally expect a gap, and a cheap gap penalty
2677        # in another place.
2678        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
2679        seq2 = "AABBBAAAACCCCAAAABBBAA"
2680        breaks = [0, 3, len(seq2)]
2681
2682        # Very expensive to open a gap in seq1:
2683        def nogaps(x, y):
2684            return -2000 - y
2685
2686        # Very expensive to open a gap in seq2 unless it is in one of the allowed positions:
2687        def specificgaps(x, y):
2688            if x in breaks:
2689                return -2 - y
2690            else:
2691                return -2000 - y
2692
2693        aligner = Align.PairwiseAligner()
2694        aligner.mode = "local"
2695        aligner.match_score = 1
2696        aligner.mismatch_score = -1
2697        aligner.target_gap_score = nogaps
2698        aligner.query_gap_score = specificgaps
2699        self.assertEqual(
2700            str(aligner),
2701            """\
2702Pairwise sequence aligner with parameters
2703  wildcard: None
2704  match_score: 1.000000
2705  mismatch_score: -1.000000
2706  target_gap_function: %s
2707  query_gap_function: %s
2708  mode: local
2709"""
2710            % (nogaps, specificgaps),
2711        )
2712        self.assertEqual(
2713            aligner.algorithm, "Waterman-Smith-Beyer local alignment algorithm"
2714        )
2715        score = aligner.score(seq1, seq2)
2716        self.assertAlmostEqual(score, 13)
2717        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2718        self.assertAlmostEqual(score, 13)
2719        alignments = aligner.align(seq1, seq2)
2720        self.assertEqual(len(alignments), 2)
2721        alignment = alignments[0]
2722        self.assertAlmostEqual(alignment.score, 13)
2723        self.assertEqual(
2724            str(alignment),
2725            """\
2726AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2727  |||||||||||||
2728  AABBBAAAACCCCAAAABBBAA
2729""",
2730        )
2731        self.assertEqual(alignment.shape, (2, 13))
2732        self.assertEqual(alignment.aligned, (((2, 15),), ((0, 13),)))
2733        alignment = alignments[1]
2734        self.assertAlmostEqual(alignment.score, 13)
2735        self.assertEqual(
2736            str(alignment),
2737            """\
2738AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2739                     |||||||||||||
2740            AABBBAAAACCCCAAAABBBAA
2741""",
2742        )
2743        self.assertEqual(alignment.shape, (2, 13))
2744        self.assertEqual(alignment.aligned, (((21, 34),), ((9, 22),)))
2745        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2746        self.assertEqual(len(alignments), 2)
2747        alignment = alignments[0]
2748        self.assertAlmostEqual(alignment.score, 13)
2749        self.assertEqual(
2750            str(alignment),
2751            """\
2752AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2753  |||||||||||||
2754  AABBBAAAACCCCAAAABBBAA
2755""",
2756        )
2757        self.assertEqual(alignment.shape, (2, 13))
2758        self.assertEqual(alignment.aligned, (((2, 15),), ((22, 9),)))
2759        alignment = alignments[1]
2760        self.assertAlmostEqual(alignment.score, 13)
2761        self.assertEqual(
2762            str(alignment),
2763            """\
2764AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
2765                     |||||||||||||
2766            AABBBAAAACCCCAAAABBBAA
2767""",
2768        )
2769        self.assertEqual(alignment.shape, (2, 13))
2770        self.assertEqual(alignment.aligned, (((21, 34),), ((13, 0),)))
2771
2772    def test_gap_here_only_local_3(self):
2773        # Check if gap open and gap extend penalties are handled correctly.
2774        seq1 = "TTCCAA"
2775        seq2 = "TTGGAA"
2776
2777        def gap_score(i, n):
2778            if i == 3:
2779                return -10
2780            if n == 1:
2781                return -1
2782            return -10
2783
2784        aligner = Align.PairwiseAligner()
2785        aligner.mode = "local"
2786        aligner.match_score = 1
2787        aligner.mismatch_score = -10
2788        aligner.target_gap_score = gap_score
2789        self.assertEqual(
2790            aligner.algorithm, "Waterman-Smith-Beyer local alignment algorithm"
2791        )
2792        self.assertEqual(
2793            str(aligner),
2794            """\
2795Pairwise sequence aligner with parameters
2796  wildcard: None
2797  match_score: 1.000000
2798  mismatch_score: -10.000000
2799  target_gap_function: %s
2800  query_internal_open_gap_score: 0.000000
2801  query_internal_extend_gap_score: 0.000000
2802  query_left_open_gap_score: 0.000000
2803  query_left_extend_gap_score: 0.000000
2804  query_right_open_gap_score: 0.000000
2805  query_right_extend_gap_score: 0.000000
2806  mode: local
2807"""
2808            % gap_score,
2809        )
2810        score = aligner.score(seq1, seq2)
2811        self.assertAlmostEqual(score, 2.0)
2812        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2813        self.assertAlmostEqual(score, 2.0)
2814        alignments = aligner.align(seq1, seq2)
2815        self.assertEqual(len(alignments), 2)
2816        alignment = alignments[0]
2817        self.assertAlmostEqual(alignment.score, 2.0)
2818        self.assertEqual(
2819            str(alignment),
2820            """\
2821TTCCAA
2822||
2823TTGGAA
2824""",
2825        )
2826        self.assertEqual(alignment.shape, (2, 2))
2827        self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),)))
2828        alignment = alignments[1]
2829        self.assertAlmostEqual(alignment.score, 2.0)
2830        self.assertEqual(
2831            str(alignment),
2832            """\
2833TTCCAA
2834    ||
2835TTGGAA
2836""",
2837        )
2838        self.assertEqual(alignment.shape, (2, 2))
2839        self.assertEqual(alignment.aligned, (((4, 6),), ((4, 6),)))
2840        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2841        self.assertEqual(len(alignments), 2)
2842        alignment = alignments[0]
2843        self.assertAlmostEqual(alignment.score, 2.0)
2844        self.assertEqual(
2845            str(alignment),
2846            """\
2847TTCCAA
2848||
2849TTGGAA
2850""",
2851        )
2852        self.assertEqual(alignment.shape, (2, 2))
2853        self.assertEqual(alignment.aligned, (((0, 2),), ((6, 4),)))
2854        alignment = alignments[1]
2855        self.assertAlmostEqual(alignment.score, 2.0)
2856        self.assertEqual(
2857            str(alignment),
2858            """\
2859TTCCAA
2860    ||
2861TTGGAA
2862""",
2863        )
2864        self.assertEqual(alignment.shape, (2, 2))
2865        self.assertEqual(alignment.aligned, (((4, 6),), ((2, 0),)))
2866        aligner.query_gap_score = gap_score
2867        self.assertEqual(
2868            str(aligner),
2869            """\
2870Pairwise sequence aligner with parameters
2871  wildcard: None
2872  match_score: 1.000000
2873  mismatch_score: -10.000000
2874  target_gap_function: %s
2875  query_gap_function: %s
2876  mode: local
2877"""
2878            % (gap_score, gap_score),
2879        )
2880        score = aligner.score(seq1, seq2)
2881        self.assertAlmostEqual(score, 2.0)
2882        score = aligner.score(seq1, reverse_complement(seq2), strand="-")
2883        self.assertAlmostEqual(score, 2.0)
2884        alignments = aligner.align(seq1, seq2)
2885        self.assertEqual(len(alignments), 2)
2886        alignment = alignments[0]
2887        self.assertAlmostEqual(alignment.score, 2.0)
2888        self.assertEqual(
2889            str(alignment),
2890            """\
2891TTCCAA
2892||
2893TTGGAA
2894""",
2895        )
2896        self.assertEqual(alignment.shape, (2, 2))
2897        self.assertEqual(alignment.aligned, (((0, 2),), ((0, 2),)))
2898        alignment = alignments[1]
2899        self.assertAlmostEqual(alignment.score, 2.0)
2900        self.assertEqual(
2901            str(alignment),
2902            """\
2903TTCCAA
2904    ||
2905TTGGAA
2906""",
2907        )
2908        self.assertEqual(alignment.shape, (2, 2))
2909        self.assertEqual(alignment.aligned, (((4, 6),), ((4, 6),)))
2910        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2911        self.assertEqual(len(alignments), 2)
2912        alignment = alignments[0]
2913        self.assertAlmostEqual(alignment.score, 2.0)
2914        self.assertEqual(
2915            str(alignment),
2916            """\
2917TTCCAA
2918||
2919TTGGAA
2920""",
2921        )
2922        self.assertEqual(alignment.shape, (2, 2))
2923        self.assertEqual(alignment.aligned, (((0, 2),), ((6, 4),)))
2924        alignment = alignments[1]
2925        self.assertAlmostEqual(alignment.score, 2.0)
2926        self.assertEqual(
2927            str(alignment),
2928            """\
2929TTCCAA
2930    ||
2931TTGGAA
2932""",
2933        )
2934        self.assertEqual(alignment.shape, (2, 2))
2935        self.assertEqual(alignment.aligned, (((4, 6),), ((2, 0),)))
2936
2937    def test_broken_gap_function(self):
2938        # Check if an Exception is propagated if the gap function raises one
2939        seq1 = "TTCCAA"
2940        seq2 = "TTGGAA"
2941
2942        def gap_score(i, n):
2943            raise RuntimeError("broken gap function")
2944
2945        aligner = Align.PairwiseAligner()
2946        aligner.target_gap_score = gap_score
2947        aligner.query_gap_score = -1
2948        aligner.mode = "global"
2949        with self.assertRaises(RuntimeError):
2950            aligner.score(seq1, seq2)
2951        with self.assertRaises(RuntimeError):
2952            aligner.score(seq1, reverse_complement(seq2), strand="-")
2953        with self.assertRaises(RuntimeError):
2954            alignments = aligner.align(seq1, seq2)
2955            alignments = list(alignments)
2956        with self.assertRaises(RuntimeError):
2957            alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2958            alignments = list(alignments)
2959        aligner.mode = "local"
2960        with self.assertRaises(RuntimeError):
2961            aligner.score(seq1, seq2)
2962        with self.assertRaises(RuntimeError):
2963            aligner.score(seq1, reverse_complement(seq2), strand="-")
2964        with self.assertRaises(RuntimeError):
2965            alignments = aligner.align(seq1, seq2)
2966            alignments = list(alignments)
2967        with self.assertRaises(RuntimeError):
2968            alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2969            alignments = list(alignments)
2970        aligner.target_gap_score = -1
2971        aligner.query_gap_score = gap_score
2972        aligner.mode = "global"
2973        with self.assertRaises(RuntimeError):
2974            aligner.score(seq1, seq2)
2975        with self.assertRaises(RuntimeError):
2976            aligner.score(seq1, reverse_complement(seq2), strand="-")
2977        with self.assertRaises(RuntimeError):
2978            alignments = aligner.align(seq1, seq2)
2979            alignments = list(alignments)
2980        with self.assertRaises(RuntimeError):
2981            alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2982            alignments = list(alignments)
2983        aligner.mode = "local"
2984        with self.assertRaises(RuntimeError):
2985            aligner.score(seq1, seq2)
2986        with self.assertRaises(RuntimeError):
2987            aligner.score(seq1, reverse_complement(seq2), strand="-")
2988        with self.assertRaises(RuntimeError):
2989            alignments = aligner.align(seq1, seq2)
2990            alignments = list(alignments)
2991        with self.assertRaises(RuntimeError):
2992            alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
2993            alignments = list(alignments)
2994
2995
2996class TestSequencesAsLists(unittest.TestCase):
2997    """Check aligning sequences provided as lists.
2998
2999    This tests whether we can align sequences that are provided as lists
3000    consisting of three-letter codons or three-letter amino acids.
3001    """
3002
3003    def test_three_letter_amino_acids_global(self):
3004        seq1 = ["Gly", "Ala", "Thr"]
3005        seq2 = ["Gly", "Ala", "Ala", "Cys", "Thr"]
3006        aligner = Align.PairwiseAligner()
3007        aligner.mode = "global"
3008        # fmt: off
3009        aligner.alphabet = [
3010            "Ala", "Arg", "Asn", "Asp", "Cys", "Gln", "Glu", "Gly", "His", "Ile",
3011            "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val",
3012        ]
3013        # fmt: on
3014        score = aligner.score(seq1, seq2)
3015        self.assertAlmostEqual(score, 3.0)
3016        alignments = aligner.align(seq1, seq2)
3017        self.assertEqual(len(alignments), 2)
3018        self.assertEqual(
3019            str(alignments[0]),
3020            """\
3021Gly Ala --- --- Thr
3022||| ||| --- --- |||
3023Gly Ala Ala Cys Thr
3024""",
3025        )
3026        self.assertEqual(
3027            str(alignments[1]),
3028            """\
3029Gly --- Ala --- Thr
3030||| --- ||| --- |||
3031Gly Ala Ala Cys Thr
3032""",
3033        )
3034        self.assertAlmostEqual(alignments[0].score, 3.0)
3035        self.assertAlmostEqual(alignments[1].score, 3.0)
3036
3037        seq1 = ["Pro", "Pro", "Gly", "Ala", "Thr"]
3038        seq2 = ["Gly", "Ala", "Ala", "Cys", "Thr", "Asn", "Asn"]
3039        score = aligner.score(seq1, seq2)
3040        self.assertAlmostEqual(score, 3.0)
3041        alignments = aligner.align(seq1, seq2)
3042        self.assertEqual(len(alignments), 2)
3043        self.assertEqual(
3044            str(alignments[0]),
3045            """\
3046Pro Pro Gly Ala --- --- Thr --- ---
3047--- --- ||| ||| --- --- ||| --- ---
3048--- --- Gly Ala Ala Cys Thr Asn Asn
3049""",
3050        )
3051        self.assertEqual(
3052            str(alignments[1]),
3053            """\
3054Pro Pro Gly --- Ala --- Thr --- ---
3055--- --- ||| --- ||| --- ||| --- ---
3056--- --- Gly Ala Ala Cys Thr Asn Asn
3057""",
3058        )
3059        self.assertAlmostEqual(alignments[0].score, 3.0)
3060        self.assertAlmostEqual(alignments[1].score, 3.0)
3061
3062    def test_three_letter_amino_acids_local(self):
3063        seq1 = ["Asn", "Asn", "Gly", "Ala", "Thr", "Glu", "Glu"]
3064        seq2 = ["Pro", "Pro", "Gly", "Ala", "Ala", "Cys", "Thr", "Leu"]
3065        aligner = Align.PairwiseAligner()
3066        aligner.mode = "local"
3067        # fmt: off
3068        aligner.alphabet = [
3069            "Ala", "Arg", "Asn", "Asp", "Cys", "Gln", "Glu", "Gly", "His", "Ile",
3070            "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val",
3071        ]
3072        # fmt: on
3073        score = aligner.score(seq1, seq2)
3074        self.assertAlmostEqual(score, 3.0)
3075        alignments = aligner.align(seq1, seq2)
3076        self.assertEqual(len(alignments), 2)
3077        self.assertEqual(
3078            str(alignments[0]),
3079            """\
3080Gly Ala --- --- Thr
3081||| ||| --- --- |||
3082Gly Ala Ala Cys Thr
3083""",
3084        )
3085        self.assertEqual(
3086            str(alignments[1]),
3087            """\
3088Gly --- Ala --- Thr
3089||| --- ||| --- |||
3090Gly Ala Ala Cys Thr
3091""",
3092        )
3093        self.assertAlmostEqual(alignments[0].score, 3.0)
3094        self.assertAlmostEqual(alignments[1].score, 3.0)
3095
3096
3097class TestArgumentErrors(unittest.TestCase):
3098    def test_aligner_string_errors(self):
3099        aligner = Align.PairwiseAligner()
3100        message = "^argument should support the sequence protocol$"
3101        with self.assertRaisesRegex(TypeError, message):
3102            aligner.score("AAA", 3)
3103        message = "^sequence has zero length$"
3104        with self.assertRaisesRegex(ValueError, message):
3105            aligner.score("AAA", "")
3106        with self.assertRaisesRegex(ValueError, message):
3107            aligner.score("AAA", "", strand="-")
3108        message = "^sequence contains letters not in the alphabet$"
3109        aligner.alphabet = "ABCD"
3110        with self.assertRaisesRegex(ValueError, message):
3111            aligner.score("AAA", "AAE")
3112
3113    def test_aligner_array_errors(self):
3114        aligner = Align.PairwiseAligner()
3115        s1 = "GGG"
3116        s2 = array.array("i", [ord("G"), ord("A"), ord("G")])
3117        score = aligner.score(s1, s2)
3118        self.assertAlmostEqual(score, 2.0)
3119        s2 = array.array("f", [1.0, 0.0, 1.0])
3120        message = "^sequence has incorrect data type 'f'$"
3121        with self.assertRaisesRegex(ValueError, message):
3122            aligner.score(s1, s2)
3123        aligner.wildcard = chr(99)
3124        s1 = array.array("i", [1, 5, 6])
3125        s2 = array.array("i", [1, 8, 6])
3126        s2a = array.array("i", [1, 8, 99])
3127        s2b = array.array("i", [1, 28, 6])
3128        aligner.match = 3.0
3129        aligner.mismatch = -2.0
3130        aligner.gap_score = -10.0
3131        score = aligner.score(s1, s2)
3132        self.assertAlmostEqual(score, 4.0)
3133        # the following two are valid as we are using match/mismatch scores
3134        # instead of a substitution matrix:
3135        score = aligner.score(s1, s2a)
3136        # since we set the wildcard character to chr(99), the number 99
3137        # is interpreted as an unknown character, and gets a zero score:
3138        self.assertAlmostEqual(score, 1.0)
3139        score = aligner.score(s1, s2b)
3140        self.assertAlmostEqual(score, 4.0)
3141        try:
3142            import numpy
3143        except ImportError:
3144            return
3145        aligner = Align.PairwiseAligner()
3146        aligner.wildcard = chr(99)
3147        s1 = "GGG"
3148        s2 = numpy.array([ord("G"), ord("A"), ord("G")], numpy.int32)
3149        score = aligner.score(s1, s2)
3150        self.assertAlmostEqual(score, 2.0)
3151        s2 = numpy.array([1.0, 0.0, 1.0])
3152        message = "^sequence has incorrect data type 'd'$"
3153        with self.assertRaisesRegex(ValueError, message):
3154            aligner.score(s1, s2)
3155        s2 = numpy.zeros((3, 2), numpy.int32)
3156        message = "^sequence has incorrect rank \\(2 expected 1\\)$"
3157        with self.assertRaisesRegex(ValueError, message):
3158            aligner.score(s1, s2)
3159        s1 = numpy.array([1, 5, 6], numpy.int32)
3160        s2 = numpy.array([1, 8, 6], numpy.int32)
3161        s2a = numpy.array([1, 8, 99], numpy.int32)
3162        s2b = numpy.array([1, 28, 6], numpy.int32)
3163        s2c = numpy.array([1, 8, -6], numpy.int32)
3164        aligner.match = 3.0
3165        aligner.mismatch = -2.0
3166        aligner.gap_score = -10.0
3167        score = aligner.score(s1, s2)
3168        self.assertAlmostEqual(score, 4.0)
3169        # the following two are valid as we are using match/mismatch scores
3170        # instead of a substitution matrix:
3171        score = aligner.score(s1, s2a)
3172        self.assertAlmostEqual(score, 1.0)
3173        score = aligner.score(s1, s2b)
3174        self.assertAlmostEqual(score, 4.0)
3175        # when using a substitution matrix, all indices should be between 0
3176        # and the size of the substitution matrix:
3177        m = 5 * numpy.eye(10)
3178        aligner.substitution_matrix = m
3179        score = aligner.score(s1, s2)  # no ValueError
3180        self.assertAlmostEqual(score, 10.0)
3181        message = "^sequence item 2 is negative \\(-6\\)$"
3182        with self.assertRaisesRegex(ValueError, message):
3183            aligner.score(s1, s2c)
3184        message = "^sequence item 1 is out of bound \\(28, should be < 10\\)$"
3185        with self.assertRaisesRegex(ValueError, message):
3186            aligner.score(s1, s2b)
3187        # note that the wildcard character is ignored when using a substitution
3188        # matrix, so 99 is interpreted as an index here:
3189        message = "^sequence item 2 is out of bound \\(99, should be < 10\\)$"
3190        with self.assertRaisesRegex(ValueError, message):
3191            aligner.score(s1, s2a)
3192
3193
3194class TestOverflowError(unittest.TestCase):
3195    def test_align_overflow_error(self):
3196        aligner = Align.PairwiseAligner()
3197        path = os.path.join("Align", "bsubtilis.fa")
3198        record = SeqIO.read(path, "fasta")
3199        seq1 = record.seq
3200        path = os.path.join("Align", "ecoli.fa")
3201        record = SeqIO.read(path, "fasta")
3202        seq2 = record.seq
3203        alignments = aligner.align(seq1, seq2)
3204        self.assertAlmostEqual(alignments.score, 1286.0)
3205        message = "^number of optimal alignments is larger than (%d|%d)$" % (
3206            2147483647,  # on 32-bit systems
3207            9223372036854775807,
3208        )  # on 64-bit systems
3209        with self.assertRaisesRegex(OverflowError, message):
3210            n = len(alignments)
3211        # confirm that we can still pull out individual alignments
3212        alignment = alignments[0]
3213        self.assertEqual(
3214            str(alignment),
3215            """\
3216ATTTA-TC-GGA-GAGTTTGATCC-TGGCTCAGGAC--GAACGCTGGCGGC-GTGCCTAAT-ACATGCAAGTCGAG-CGG-A-CAG-AT-GGGA-GCTTGCT-C----CCTGAT-GTTAGC-GGCGGACGGGTGAGTAACAC-GT--GGGTAA-CCTGCCTGTAA-G-ACTGGG--ATAACT-CC-GGGAAACCGG--GGCTAATACCGG-ATGGTTGTTTGAACCGCAT-GGTTCAA-AC-ATAA-AAGGTGG--C-TTCGG-C-TACCACTTA-C-A--G-ATG-GACCC-GC--GGCGCATTAGCTAGTT-GGTGAGG-TAACGGCTCACC-AAGGCGACGATGCG--TAGCC-GA--CCTGAGAGGG-TGATC--GGCCACACTGGGA-CTGAGACACGG-CCCAGACTCCTACGGGAGGCAGCAGTAGGG-AATC-TTCCGCA-A-TGGA-CG-AAAGTC-TGAC-GG-AGCAAC--GCCGCGTG-AGTGAT-GAAGG--TTTTCGGA-TC-GTAAAGCT-CTGTTGTT-AG-GG--A--A-G--A--ACAAGTGCCGTTCGAATAGGGC----GG-TACC-TTGACGGT-ACCTAAC-CAGAA-A-GCCAC-GGCTAACTAC-GTGCCAGCAGCCGCGGTAATACGT-AGG-TGGCAAGCGTTG--TCCGGAATTA-TTGGGCGTAAAG-GGCT-CGCAGGCGGTTTC-TTAAGTCT-GATGTGAAAG-CCCCCGG-CTCAACC-GGGGAGGG--T-CAT-TGGA-AACTGGGG-AA-CTTGAGTGCA--G-AAGAGGAGAGTGG-A-A-TTCCACG-TGTAGCGGTGAAATGCGTAGAGATG-TGGAGGAAC-ACCAG-TGGCGAAGGCGA-CTCTC--TGGT-CTGTAA--CTGACGCTG-AGGA-GCGAAAGCGTGGGGAGCGAA-CAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGT-G-CTAAGTGTT-AGGGGGTT-TCCGCCCCTT-AGTGC-TG-C------AGCTAACGCA-TTAAG-C-ACTCCGCCTGGGGAGTACGGTC-GCAAGACTG--AAA-CTCAAA-GGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAA-GCAACGCGAAGAACCTTACCA-GGTCTTGACATCCTCTGACA-A--T--CCTAGAGATAGGAC--G-T-CCCCTTCGGGGGCAGA--GTGA--CAGGTGG-TGCATGG-TTGTCGTCAGCTCGTGTC-GTGAGA-TGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTA--GTTGCCAGCA--TTCA-GTTG--GGC-A-CTCTAA-GGT-GACTGCC-GGTGAC-AAACC-GGAGGAAGGTGGGGATGACGTCAAA-TCATCATG-CCCCTTAT-GACCT-GGGCTACACACGTGCTACAATGGACAG-A-ACAAAG-GGCA-GCGAAACC--GCGAG-GTT-AAGCC--AATCC-CAC-AAA-T-CTGTTC-TCAGTTC-GGATC-GC-AGTCTGCAACTCGACTGCG--TGAAGCT-GGAATCGCTAGTAATCGC-GGATCAGCA-TGCCG-CGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCAC-GAG-AGT---TTGT-AACACCC-GAAGTC-GGTGAGG-T-AACCTTTTA-GG-AG--C-C--AGCCG-CC---GAAGGTGGGA--CAGATGA-TTGGGGTGAAGTCGTAACAAGGTAG-CCGTATCGGAAGG----TGCGGCT-GGATCACCTCCTTTCTA
3217|---|-|--|-|-||||||||||--||||||||-|---|||||||||||||-|-||||||--|||||||||||||--|||-|-|||-|--|--|-|||||||-|----|-|||--|--||--|||||||||||||||||----||--|||-||-|-||||||-|--|-|--|||--||||||-|--||-||||-||--|-|||||||||--||---------|||-|--|-|---|||-||-|-||-|-||-||--|-|||||-|-|-|---||--|-|--|-|||-|-|||-|---||-|-||||||||||--||||-||-||||||||||||-|-|||||||||-|---||||--|---|-|||||||--|||-|--|-|||||||||-|-|||||||||||-||-|||||||||||||||||||||||-|||-|||--||--|||-|-|||--||-||-|-|-|||--|--|||--|--||||||||-|-|||--|||||--||--|||--|--||||||-|-||-||----||-||--|--|-|--|--|-||||----|---||||---|----|--|-|--||||||-|-|||---|-|||||-|-||-||-||||||||-|-|||||||||||||||||||||||--|||-||-||||||||---||-|||||||-|-||||||||||-|-|--||||||||||||--|||||||--|||||||||--||||-||-|||||||-|||-|-----|-|||-||-|-|-||||---||-|||||||-|---|-|-||||-|-|-||-|-|-|||||-|-||||||||||||||||||||||||--||||||||--|||-|-|||||||||||--|-|-|--|||--|-|-||--||||||||--|||--|||||||||||||||||-||-|||||||||||||||||||||||||||||||||||||||--|-|-||---||---|||---||-|--||||-||-||-||-||-|------|||||||||--|||||-|-||-|-|||||||||||||||-|-|||||---|--|||-||||||-|-|||||||||||||||||||||||||||||||||||||||||||||||--||||||||||||||||||||--|||||||||||||----|||-|--|--||-||||||-|||---|-|-||--||||||---|-|--||||--||||||--|||||||-|-|||||||||||||||--||||-|-||||||||||||||||||||||||||||||||||-|||||---|||||||||---|-|--|--|--||--|-|||-||-||--|||||||-|-|||--||||--||||||||||||||||||||||||--||||||||-|||-|||--||||--|||||||||||||||||||||||-|-|-|-||||||-|--|-||||--||--|||||-|---||||---|--||-||--|||-|-|-||-|-|-|||-|-||||--|--||||||||||||||||-|---|||||-|-|||||||||||||||||--|||||||-|-||||--|||||||||||||||||||||||||||||||||||||||||||||--|-|-|||---|||--||-|----|||||--|||-||--|-||||||----||-||--|-|--|-||--|----|----||--|--||--|||-|-||||||||||||||||||||||--|||||--||--||----|||||-|-|||||||||||||---|
3218A---AAT-TG-AAGAGTTTGATC-ATGGCTCAG-A-TTGAACGCTGGCGGCAG-GCCTAA-CACATGCAAGTCGA-ACGGTAACAGGA-AG--AAGCTTGCTTCTTTGC-TGA-CG--AG-TGGCGGACGGGTGAGTAA---TGTCTGGG-AAAC-TGCCTG-A-TGGA--GGGGGATAACTAC-TGG-AAAC-GGTAG-CTAATACCG-CAT---------AAC-G--TCG---CAAGACCA-AAGA-GG-GGGACCTTCGGGCCT-C---TT-GCCATCGGATGTG-CCCAG-ATGG-G-ATTAGCTAGT-AGGTG-GGGTAACGGCTCACCTA-GGCGACGAT-C-CCTAGC-TG-GTC-TGAGAGG-ATGA-CCAG-CCACACTGG-AACTGAGACACGGTCC-AGACTCCTACGGGAGGCAGCAGT-GGGGAAT-ATT--GCACAATGG-GCGCAA-G-CCTGA-TG-CAGC--CATGCCGCGTGTA-TGA-AGAAGGCCTT--CGG-GT-TGTAAAG-TACT-TT---CAGCGGGGAGGAAGGGAGTA-AAGT----T---AATA---CCTTTG-CT-C-ATTGACG-TTACC---CGCAGAAGAAGC-ACCGGCTAACT-CCGTGCCAGCAGCCGCGGTAATACG-GAGGGTG-CAAGCGTT-AATC-GGAATTACT-GGGCGTAAAGCG-C-ACGCAGGCGGTTT-GTTAAGTC-AGATGTGAAA-TCCCC-GGGCTCAACCTGGG-A---ACTGCATCTG-ATA-CTGG--CAAGCTTGAGT-C-TCGTA-GAGG-G-G-GGTAGAATTCCA-GGTGTAGCGGTGAAATGCGTAGAGAT-CTGGAGGAA-TACC-GGTGGCGAAGGCG-GC-C-CCCTGG-AC-G-AAGACTGACGCT-CAGG-TGCGAAAGCGTGGGGAGC-AAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATG--TCGACT---TG--GAGG---TTGT--GCCC-TTGAG-GCGTGGCTTCCGGAGCTAACGC-GTTAAGTCGAC-C-GCCTGGGGAGTACGG-CCGCAAG---GTTAAAACTCAAATG-AATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGA-TGCAACGCGAAGAACCTTACC-TGGTCTTGACATCC----ACAGAACTTTCC-AGAGAT-GGA-TTGGTGCC--TTCGGG---A-ACTGTGAGACAGGTG-CTGCATGGCT-GTCGTCAGCTCGTGT-TGTGA-AATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTT-ATCTT-TTGTTGCCAGC-GGT-C-CG--GCCGG-GAACTC-AAAGG-AGACTGCCAG-TGA-TAAAC-TGGAGGAAGGTGGGGATGACGTCAA-GTCATCATGGCCC-TTA-CGACC-AGGGCTACACACGTGCTACAATGG-C-GCATACAAAGAG--AAGCGA--CCTCGCGAGAG--CAAGC-GGA--CCTCA-TAAAGTGC-GT-CGT-AGT-CCGGAT-TG-GAGTCTGCAACTCGACT-C-CATGAAG-TCGGAATCGCTAGTAATCG-TGGATCAG-AATGCC-ACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCA-TG-GGAGTGGGTTG-CAA-A---AGAAGT-AGGT-AG-CTTAACCTT---CGGGAGGGCGCTTA-CC-AC-TTTG----TG--ATTCA--TGACT-GGGGTGAAGTCGTAACAAGGTA-ACCGTA--GG--GGAACCTGCGG-TTGGATCACCTCCTT---A
3219""",
3220        )
3221        self.assertEqual(alignment.shape, (2, 1811))
3222        self.assertAlmostEqual(alignment.score, 1286.0)
3223        alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
3224        self.assertAlmostEqual(alignments.score, 1286.0)
3225        message = "^number of optimal alignments is larger than (%d|%d)$" % (
3226            2147483647,  # on 32-bit systems
3227            9223372036854775807,
3228        )  # on 64-bit systems
3229        with self.assertRaisesRegex(OverflowError, message):
3230            n = len(alignments)
3231        # confirm that we can still pull out individual alignments
3232        alignment = alignments[0]
3233        self.assertEqual(
3234            str(alignment),
3235            """\
3236ATTTA-TC-GGA-GAGTTTGATCC-TGGCTCAGGAC--GAACGCTGGCGGC-GTGCCTAAT-ACATGCAAGTCGAG-CGG-A-CAG-AT-GGGA-GCTTGCT-C----CCTGAT-GTTAGC-GGCGGACGGGTGAGTAACAC-GT--GGGTAA-CCTGCCTGTAA-G-ACTGGG--ATAACT-CC-GGGAAACCGG--GGCTAATACCGG-ATGGTTGTTTGAACCGCAT-GGTTCAA-AC-ATAA-AAGGTGG--C-TTCGG-C-TACCACTTA-C-A--G-ATG-GACCC-GC--GGCGCATTAGCTAGTT-GGTGAGG-TAACGGCTCACC-AAGGCGACGATGCG--TAGCC-GA--CCTGAGAGGG-TGATC--GGCCACACTGGGA-CTGAGACACGG-CCCAGACTCCTACGGGAGGCAGCAGTAGGG-AATC-TTCCGCA-A-TGGA-CG-AAAGTC-TGAC-GG-AGCAAC--GCCGCGTG-AGTGAT-GAAGG--TTTTCGGA-TC-GTAAAGCT-CTGTTGTT-AG-GG--A--A-G--A--ACAAGTGCCGTTCGAATAGGGC----GG-TACC-TTGACGGT-ACCTAAC-CAGAA-A-GCCAC-GGCTAACTAC-GTGCCAGCAGCCGCGGTAATACGT-AGG-TGGCAAGCGTTG--TCCGGAATTA-TTGGGCGTAAAG-GGCT-CGCAGGCGGTTTC-TTAAGTCT-GATGTGAAAG-CCCCCGG-CTCAACC-GGGGAGGG--T-CAT-TGGA-AACTGGGG-AA-CTTGAGTGCA--G-AAGAGGAGAGTGG-A-A-TTCCACG-TGTAGCGGTGAAATGCGTAGAGATG-TGGAGGAAC-ACCAG-TGGCGAAGGCGA-CTCTC--TGGT-CTGTAA--CTGACGCTG-AGGA-GCGAAAGCGTGGGGAGCGAA-CAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGT-G-CTAAGTGTT-AGGGGGTT-TCCGCCCCTT-AGTGC-TG-C------AGCTAACGCA-TTAAG-C-ACTCCGCCTGGGGAGTACGGTC-GCAAGACTG--AAA-CTCAAA-GGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAA-GCAACGCGAAGAACCTTACCA-GGTCTTGACATCCTCTGACA-A--T--CCTAGAGATAGGAC--G-T-CCCCTTCGGGGGCAGA--GTGA--CAGGTGG-TGCATGG-TTGTCGTCAGCTCGTGTC-GTGAGA-TGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTA--GTTGCCAGCA--TTCA-GTTG--GGC-A-CTCTAA-GGT-GACTGCC-GGTGAC-AAACC-GGAGGAAGGTGGGGATGACGTCAAA-TCATCATG-CCCCTTAT-GACCT-GGGCTACACACGTGCTACAATGGACAG-A-ACAAAG-GGCA-GCGAAACC--GCGAG-GTT-AAGCC--AATCC-CAC-AAA-T-CTGTTC-TCAGTTC-GGATC-GC-AGTCTGCAACTCGACTGCG--TGAAGCT-GGAATCGCTAGTAATCGC-GGATCAGCA-TGCCG-CGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCAC-GAG-AGT---TTGT-AACACCC-GAAGTC-GGTGAGG-T-AACCTTTTA-GG-AG--C-C--AGCCG-CC---GAAGGTGGGA--CAGATGA-TTGGGGTGAAGTCGTAACAAGGTAG-CCGTATCGGAAGG----TGCGGCT-GGATCACCTCCTTTCTA
3237|---|-|--|-|-||||||||||--||||||||-|---|||||||||||||-|-||||||--|||||||||||||--|||-|-|||-|--|--|-|||||||-|----|-|||--|--||--|||||||||||||||||----||--|||-||-|-||||||-|--|-|--|||--||||||-|--||-||||-||--|-|||||||||--||---------|||-|--|-|---|||-||-|-||-|-||-||--|-|||||-|-|-|---||--|-|--|-|||-|-|||-|---||-|-||||||||||--||||-||-||||||||||||-|-|||||||||-|---||||--|---|-|||||||--|||-|--|-|||||||||-|-|||||||||||-||-|||||||||||||||||||||||-|||-|||--||--|||-|-|||--||-||-|-|-|||--|--|||--|--||||||||-|-|||--|||||--||--|||--|--||||||-|-||-||----||-||--|--|-|--|--|-||||----|---||||---|----|--|-|--||||||-|-|||---|-|||||-|-||-||-||||||||-|-|||||||||||||||||||||||--|||-||-||||||||---||-|||||||-|-||||||||||-|-|--||||||||||||--|||||||--|||||||||--||||-||-|||||||-|||-|-----|-|||-||-|-|-||||---||-|||||||-|---|-|-||||-|-|-||-|-|-|||||-|-||||||||||||||||||||||||--||||||||--|||-|-|||||||||||--|-|-|--|||--|-|-||--||||||||--|||--|||||||||||||||||-||-|||||||||||||||||||||||||||||||||||||||--|-|-||---||---|||---||-|--||||-||-||-||-||-|------|||||||||--|||||-|-||-|-|||||||||||||||-|-|||||---|--|||-||||||-|-|||||||||||||||||||||||||||||||||||||||||||||||--||||||||||||||||||||--|||||||||||||----|||-|--|--||-||||||-|||---|-|-||--||||||---|-|--||||--||||||--|||||||-|-|||||||||||||||--||||-|-||||||||||||||||||||||||||||||||||-|||||---|||||||||---|-|--|--|--||--|-|||-||-||--|||||||-|-|||--||||--||||||||||||||||||||||||--||||||||-|||-|||--||||--|||||||||||||||||||||||-|-|-|-||||||-|--|-||||--||--|||||-|---||||---|--||-||--|||-|-|-||-|-|-|||-|-||||--|--||||||||||||||||-|---|||||-|-|||||||||||||||||--|||||||-|-||||--|||||||||||||||||||||||||||||||||||||||||||||--|-|-|||---|||--||-|----|||||--|||-||--|-||||||----||-||--|-|--|-||--|----|----||--|--||--|||-|-||||||||||||||||||||||--|||||--||--||----|||||-|-|||||||||||||---|
3238A---AAT-TG-AAGAGTTTGATC-ATGGCTCAG-A-TTGAACGCTGGCGGCAG-GCCTAA-CACATGCAAGTCGA-ACGGTAACAGGA-AG--AAGCTTGCTTCTTTGC-TGA-CG--AG-TGGCGGACGGGTGAGTAA---TGTCTGGG-AAAC-TGCCTG-A-TGGA--GGGGGATAACTAC-TGG-AAAC-GGTAG-CTAATACCG-CAT---------AAC-G--TCG---CAAGACCA-AAGA-GG-GGGACCTTCGGGCCT-C---TT-GCCATCGGATGTG-CCCAG-ATGG-G-ATTAGCTAGT-AGGTG-GGGTAACGGCTCACCTA-GGCGACGAT-C-CCTAGC-TG-GTC-TGAGAGG-ATGA-CCAG-CCACACTGG-AACTGAGACACGGTCC-AGACTCCTACGGGAGGCAGCAGT-GGGGAAT-ATT--GCACAATGG-GCGCAA-G-CCTGA-TG-CAGC--CATGCCGCGTGTA-TGA-AGAAGGCCTT--CGG-GT-TGTAAAG-TACT-TT---CAGCGGGGAGGAAGGGAGTA-AAGT----T---AATA---CCTTTG-CT-C-ATTGACG-TTACC---CGCAGAAGAAGC-ACCGGCTAACT-CCGTGCCAGCAGCCGCGGTAATACG-GAGGGTG-CAAGCGTT-AATC-GGAATTACT-GGGCGTAAAGCG-C-ACGCAGGCGGTTT-GTTAAGTC-AGATGTGAAA-TCCCC-GGGCTCAACCTGGG-A---ACTGCATCTG-ATA-CTGG--CAAGCTTGAGT-C-TCGTA-GAGG-G-G-GGTAGAATTCCA-GGTGTAGCGGTGAAATGCGTAGAGAT-CTGGAGGAA-TACC-GGTGGCGAAGGCG-GC-C-CCCTGG-AC-G-AAGACTGACGCT-CAGG-TGCGAAAGCGTGGGGAGC-AAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATG--TCGACT---TG--GAGG---TTGT--GCCC-TTGAG-GCGTGGCTTCCGGAGCTAACGC-GTTAAGTCGAC-C-GCCTGGGGAGTACGG-CCGCAAG---GTTAAAACTCAAATG-AATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGA-TGCAACGCGAAGAACCTTACC-TGGTCTTGACATCC----ACAGAACTTTCC-AGAGAT-GGA-TTGGTGCC--TTCGGG---A-ACTGTGAGACAGGTG-CTGCATGGCT-GTCGTCAGCTCGTGT-TGTGA-AATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTT-ATCTT-TTGTTGCCAGC-GGT-C-CG--GCCGG-GAACTC-AAAGG-AGACTGCCAG-TGA-TAAAC-TGGAGGAAGGTGGGGATGACGTCAA-GTCATCATGGCCC-TTA-CGACC-AGGGCTACACACGTGCTACAATGG-C-GCATACAAAGAG--AAGCGA--CCTCGCGAGAG--CAAGC-GGA--CCTCA-TAAAGTGC-GT-CGT-AGT-CCGGAT-TG-GAGTCTGCAACTCGACT-C-CATGAAG-TCGGAATCGCTAGTAATCG-TGGATCAG-AATGCC-ACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCA-TG-GGAGTGGGTTG-CAA-A---AGAAGT-AGGT-AG-CTTAACCTT---CGGGAGGGCGCTTA-CC-AC-TTTG----TG--ATTCA--TGACT-GGGGTGAAGTCGTAACAAGGTA-ACCGTA--GG--GGAACCTGCGG-TTGGATCACCTCCTT---A
3239""",
3240        )
3241        self.assertAlmostEqual(alignment.score, 1286.0)
3242        self.assertEqual(alignment.shape, (2, 1811))
3243
3244
3245class TestKeywordArgumentsConstructor(unittest.TestCase):
3246    def test_confusing_arguments(self):
3247        aligner = Align.PairwiseAligner(
3248            mode="local",
3249            open_gap_score=-0.3,
3250            extend_gap_score=-0.1,
3251            target_open_gap_score=-0.2,
3252        )
3253        self.assertEqual(
3254            str(aligner),
3255            """\
3256Pairwise sequence aligner with parameters
3257  wildcard: None
3258  match_score: 1.000000
3259  mismatch_score: 0.000000
3260  target_internal_open_gap_score: -0.200000
3261  target_internal_extend_gap_score: -0.100000
3262  target_left_open_gap_score: -0.200000
3263  target_left_extend_gap_score: -0.100000
3264  target_right_open_gap_score: -0.200000
3265  target_right_extend_gap_score: -0.100000
3266  query_internal_open_gap_score: -0.300000
3267  query_internal_extend_gap_score: -0.100000
3268  query_left_open_gap_score: -0.300000
3269  query_left_extend_gap_score: -0.100000
3270  query_right_open_gap_score: -0.300000
3271  query_right_extend_gap_score: -0.100000
3272  mode: local
3273""",
3274        )
3275
3276
3277class TestUnicodeStrings(unittest.TestCase):
3278    def test_needlemanwunsch_simple1(self):
3279        seq1 = "ĞĀĀČŦ"
3280        seq2 = "ĞĀŦ"
3281        aligner = Align.PairwiseAligner()
3282        aligner.mode = "global"
3283        aligner.alphabet = None
3284        self.assertEqual(aligner.algorithm, "Needleman-Wunsch")
3285        score = aligner.score(seq1, seq2)
3286        self.assertAlmostEqual(score, 3.0)
3287        alignments = aligner.align(seq1, seq2)
3288        self.assertEqual(len(alignments), 2)
3289        alignment = alignments[0]
3290        self.assertAlmostEqual(alignment.score, 3.0)
3291        self.assertEqual(
3292            str(alignment),
3293            """\
3294ĞĀĀČŦ
3295||--|
3296ĞĀ--Ŧ
3297""",
3298        )
3299        self.assertEqual(alignment.shape, (2, 5))
3300        self.assertEqual(alignment.aligned, (((0, 2), (4, 5)), ((0, 2), (2, 3))))
3301        alignment = alignments[1]
3302        self.assertAlmostEqual(alignment.score, 3.0)
3303        self.assertEqual(
3304            str(alignment),
3305            """\
3306ĞĀĀČŦ
3307|-|-|
3308Ğ-Ā-Ŧ
3309""",
3310        )
3311        self.assertEqual(alignment.shape, (2, 5))
3312        self.assertEqual(
3313            alignment.aligned, (((0, 1), (2, 3), (4, 5)), ((0, 1), (1, 2), (2, 3)))
3314        )
3315
3316    def test_align_affine1_score(self):
3317        aligner = Align.PairwiseAligner()
3318        aligner.mode = "global"
3319        aligner.alphabet = None
3320        aligner.match_score = 0
3321        aligner.mismatch_score = -1
3322        aligner.open_gap_score = -5
3323        aligner.extend_gap_score = -1
3324        self.assertEqual(aligner.algorithm, "Gotoh global alignment algorithm")
3325        score = aligner.score("いい", "あいいう")
3326        self.assertAlmostEqual(score, -7.0)
3327
3328    def test_smithwaterman(self):
3329        aligner = Align.PairwiseAligner()
3330        aligner.mode = "local"
3331        aligner.alphabet = None
3332        aligner.gap_score = -0.1
3333        self.assertEqual(aligner.algorithm, "Smith-Waterman")
3334        score = aligner.score("ℵℷℶℷ", "ℸℵℶℸ")
3335        self.assertAlmostEqual(score, 1.9)
3336        alignments = aligner.align("ℵℷℶℷ", "ℸℵℶℸ")
3337        self.assertEqual(len(alignments), 1)
3338        alignment = alignments[0]
3339        self.assertAlmostEqual(alignment.score, 1.9)
3340        self.assertEqual(
3341            str(alignment),
3342            """\
3343 ℵℷℶℷ
3344 |-|
3345ℸℵ-ℶℸ
3346""",
3347        )
3348        self.assertEqual(alignment.shape, (2, 3))
3349        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3))))
3350
3351    def test_gotoh_local(self):
3352        aligner = Align.PairwiseAligner()
3353        aligner.alphabet = None
3354        aligner.mode = "local"
3355        aligner.open_gap_score = -0.1
3356        aligner.extend_gap_score = 0.0
3357        self.assertEqual(aligner.algorithm, "Gotoh local alignment algorithm")
3358        score = aligner.score("生物科物", "学生科学")
3359        self.assertAlmostEqual(score, 1.9)
3360        alignments = aligner.align("生物科物", "学生科学")
3361        self.assertEqual(len(alignments), 1)
3362        alignment = alignments[0]
3363        self.assertAlmostEqual(alignment.score, 1.9)
3364        self.assertEqual(
3365            str(alignment),
3366            """\
3367 生物科物
3368 |-|
3369学生-科学
3370""",
3371        )
3372        self.assertEqual(alignment.shape, (2, 3))
3373        self.assertEqual(alignment.aligned, (((0, 1), (2, 3)), ((1, 2), (2, 3))))
3374
3375
3376class TestAlignerPickling(unittest.TestCase):
3377    def test_pickle_aligner_match_mismatch(self):
3378        import pickle
3379
3380        aligner = Align.PairwiseAligner()
3381        aligner.wildcard = "X"
3382        aligner.match_score = 3
3383        aligner.mismatch_score = -2
3384        aligner.target_internal_open_gap_score = -2.5
3385        aligner.target_internal_extend_gap_score = -3.5
3386        aligner.target_left_open_gap_score = -2.5
3387        aligner.target_left_extend_gap_score = -3.5
3388        aligner.target_right_open_gap_score = -4
3389        aligner.target_right_extend_gap_score = -4
3390        aligner.query_internal_open_gap_score = -0.1
3391        aligner.query_internal_extend_gap_score = -2
3392        aligner.query_left_open_gap_score = -9
3393        aligner.query_left_extend_gap_score = +1
3394        aligner.query_right_open_gap_score = -1
3395        aligner.query_right_extend_gap_score = -2
3396        aligner.mode = "local"
3397        state = pickle.dumps(aligner)
3398        pickled_aligner = pickle.loads(state)
3399        self.assertEqual(aligner.wildcard, pickled_aligner.wildcard)
3400        self.assertAlmostEqual(aligner.match_score, pickled_aligner.match_score)
3401        self.assertAlmostEqual(aligner.mismatch_score, pickled_aligner.mismatch_score)
3402        self.assertIsNone(pickled_aligner.substitution_matrix)
3403        self.assertAlmostEqual(
3404            aligner.target_internal_open_gap_score,
3405            pickled_aligner.target_internal_open_gap_score,
3406        )
3407        self.assertAlmostEqual(
3408            aligner.target_internal_extend_gap_score,
3409            pickled_aligner.target_internal_extend_gap_score,
3410        )
3411        self.assertAlmostEqual(
3412            aligner.target_left_open_gap_score,
3413            pickled_aligner.target_left_open_gap_score,
3414        )
3415        self.assertAlmostEqual(
3416            aligner.target_left_extend_gap_score,
3417            pickled_aligner.target_left_extend_gap_score,
3418        )
3419        self.assertAlmostEqual(
3420            aligner.target_right_open_gap_score,
3421            pickled_aligner.target_right_open_gap_score,
3422        )
3423        self.assertAlmostEqual(
3424            aligner.target_right_extend_gap_score,
3425            pickled_aligner.target_right_extend_gap_score,
3426        )
3427        self.assertAlmostEqual(
3428            aligner.query_internal_open_gap_score,
3429            pickled_aligner.query_internal_open_gap_score,
3430        )
3431        self.assertAlmostEqual(
3432            aligner.query_internal_extend_gap_score,
3433            pickled_aligner.query_internal_extend_gap_score,
3434        )
3435        self.assertAlmostEqual(
3436            aligner.query_left_open_gap_score,
3437            pickled_aligner.query_left_open_gap_score,
3438        )
3439        self.assertAlmostEqual(
3440            aligner.query_left_extend_gap_score,
3441            pickled_aligner.query_left_extend_gap_score,
3442        )
3443        self.assertAlmostEqual(
3444            aligner.query_right_open_gap_score,
3445            pickled_aligner.query_right_open_gap_score,
3446        )
3447        self.assertAlmostEqual(
3448            aligner.query_right_extend_gap_score,
3449            pickled_aligner.query_right_extend_gap_score,
3450        )
3451        self.assertEqual(aligner.mode, pickled_aligner.mode)
3452
3453    def test_pickle_aligner_substitution_matrix(self):
3454        try:
3455            from Bio.Align import substitution_matrices
3456        except ImportError:
3457            return
3458        import pickle
3459
3460        aligner = Align.PairwiseAligner()
3461        aligner.wildcard = "N"
3462        aligner.substitution_matrix = substitution_matrices.load("BLOSUM80")
3463        aligner.target_internal_open_gap_score = -5
3464        aligner.target_internal_extend_gap_score = -3
3465        aligner.target_left_open_gap_score = -2
3466        aligner.target_left_extend_gap_score = -3
3467        aligner.target_right_open_gap_score = -4.5
3468        aligner.target_right_extend_gap_score = -4.3
3469        aligner.query_internal_open_gap_score = -2
3470        aligner.query_internal_extend_gap_score = -2.5
3471        aligner.query_left_open_gap_score = -9.1
3472        aligner.query_left_extend_gap_score = +1.7
3473        aligner.query_right_open_gap_score = -1.9
3474        aligner.query_right_extend_gap_score = -2.0
3475        aligner.mode = "global"
3476        state = pickle.dumps(aligner)
3477        pickled_aligner = pickle.loads(state)
3478        self.assertEqual(aligner.wildcard, pickled_aligner.wildcard)
3479        self.assertIsNone(pickled_aligner.match_score)
3480        self.assertIsNone(pickled_aligner.mismatch_score)
3481        self.assertTrue(
3482            (aligner.substitution_matrix == pickled_aligner.substitution_matrix).all()
3483        )
3484        self.assertEqual(
3485            aligner.substitution_matrix.alphabet,
3486            pickled_aligner.substitution_matrix.alphabet,
3487        )
3488        self.assertAlmostEqual(
3489            aligner.target_internal_open_gap_score,
3490            pickled_aligner.target_internal_open_gap_score,
3491        )
3492        self.assertAlmostEqual(
3493            aligner.target_internal_extend_gap_score,
3494            pickled_aligner.target_internal_extend_gap_score,
3495        )
3496        self.assertAlmostEqual(
3497            aligner.target_left_open_gap_score,
3498            pickled_aligner.target_left_open_gap_score,
3499        )
3500        self.assertAlmostEqual(
3501            aligner.target_left_extend_gap_score,
3502            pickled_aligner.target_left_extend_gap_score,
3503        )
3504        self.assertAlmostEqual(
3505            aligner.target_right_open_gap_score,
3506            pickled_aligner.target_right_open_gap_score,
3507        )
3508        self.assertAlmostEqual(
3509            aligner.target_right_extend_gap_score,
3510            pickled_aligner.target_right_extend_gap_score,
3511        )
3512        self.assertAlmostEqual(
3513            aligner.query_internal_open_gap_score,
3514            pickled_aligner.query_internal_open_gap_score,
3515        )
3516        self.assertAlmostEqual(
3517            aligner.query_internal_extend_gap_score,
3518            pickled_aligner.query_internal_extend_gap_score,
3519        )
3520        self.assertAlmostEqual(
3521            aligner.query_left_open_gap_score,
3522            pickled_aligner.query_left_open_gap_score,
3523        )
3524        self.assertAlmostEqual(
3525            aligner.query_left_extend_gap_score,
3526            pickled_aligner.query_left_extend_gap_score,
3527        )
3528        self.assertAlmostEqual(
3529            aligner.query_right_open_gap_score,
3530            pickled_aligner.query_right_open_gap_score,
3531        )
3532        self.assertAlmostEqual(
3533            aligner.query_right_extend_gap_score,
3534            pickled_aligner.query_right_extend_gap_score,
3535        )
3536        self.assertEqual(aligner.mode, pickled_aligner.mode)
3537
3538
3539class TestAlignmentFormat(unittest.TestCase):
3540    def test_alignment_simple(self):
3541        chromosome = "ACGATCAGCGAGCATNGAGCACTACGACAGCGAGTGACCACTATTCGCGATCAGGAGCAGATACTTTACGAGCATCGGC"
3542        transcript = "AGCATCGAGCGACTTGAGTACTATTCATACTTTCGAGC"
3543        aligner = Align.PairwiseAligner()
3544        aligner.query_extend_gap_score = 0
3545        aligner.query_open_gap_score = -3
3546        aligner.target_gap_score = -3
3547        aligner.end_gap_score = 0
3548        aligner.mismatch = -1
3549        alignments = aligner.align(chromosome, transcript)
3550        self.assertEqual(len(alignments), 1)
3551        alignment = alignments[0]
3552        self.assertAlmostEqual(alignment.score, 19.0)
3553        self.assertEqual(
3554            str(alignment),
3555            """\
3556ACGATCAGCGAGCATNGAGC-ACTACGACAGCGAGTGACCACTATTCGCGATCAGGAGCAGATACTTTACGAGCATCGGC
3557----------|||||.||||-|||-----------|||..|||||||--------------|||||||-|||||------
3558----------AGCATCGAGCGACT-----------TGAGTACTATTC--------------ATACTTT-CGAGC------
3559""",
3560        )
3561        self.assertEqual(alignment.shape, (2, 80))
3562        self.assertEqual(
3563            alignment.format("psl"),
3564            """\
356534	2	0	1	1	1	3	26	+	query	38	0	38	target	79	10	73	5	10,3,12,7,5,	0,11,14,26,33,	10,20,34,60,68,
3566""",
3567        )
3568        self.assertEqual(
3569            alignment.format("bed"),
3570            """\
3571target	10	73	query	19.0	+	10	73	0	5	10,3,12,7,5,	0,10,24,50,58,
3572""",
3573        )
3574        self.assertEqual(
3575            alignment.format("sam"),
3576            """\
3577query	0	target	11	255	10M1I3M11D12M14D7M1D5M	*	0	0	AGCATCGAGCGACTTGAGTACTATTCATACTTTCGAGC	*	AS:i:19
3578""",
3579        )
3580        alignments = aligner.align(
3581            chromosome, reverse_complement(transcript), strand="-"
3582        )
3583        self.assertEqual(len(alignments), 1)
3584        alignment = alignments[0]
3585        self.assertAlmostEqual(alignment.score, 19.0)
3586        self.assertEqual(
3587            str(alignment),
3588            """\
3589ACGATCAGCGAGCATNGAGC-ACTACGACAGCGAGTGACCACTATTCGCGATCAGGAGCAGATACTTTACGAGCATCGGC
3590----------|||||.||||-|||-----------|||..|||||||--------------|||||||-|||||------
3591----------AGCATCGAGCGACT-----------TGAGTACTATTC--------------ATACTTT-CGAGC------
3592""",
3593        )
3594        self.assertEqual(alignment.shape, (2, 80))
3595        self.assertEqual(
3596            alignment.format("psl"),
3597            """\
359834	2	0	1	1	1	3	26	-	query	38	0	38	target	79	10	73	5	10,3,12,7,5,	0,11,14,26,33,	10,20,34,60,68,
3599""",
3600        )
3601        self.assertEqual(
3602            alignment.format("bed"),
3603            """\
3604target	10	73	query	19.0	-	10	73	0	5	10,3,12,7,5,	0,10,24,50,58,
3605""",
3606        )
3607        self.assertEqual(
3608            alignment.format("sam"),
3609            """\
3610query	16	target	11	255	10M1I3M11D12M14D7M1D5M	*	0	0	AGCATCGAGCGACTTGAGTACTATTCATACTTTCGAGC	*	AS:i:19
3611""",
3612        )
3613
3614    def test_alignment_end_gap(self):
3615        aligner = Align.PairwiseAligner()
3616        aligner.gap_score = -1
3617        aligner.end_gap_score = 0
3618        aligner.mismatch = -10
3619        alignments = aligner.align("ACGTAGCATCAGC", "CCCCACGTAGCATCAGC")
3620        self.assertEqual(len(alignments), 1)
3621        self.assertAlmostEqual(alignments.score, 13.0)
3622        alignment = alignments[0]
3623        self.assertEqual(
3624            str(alignment),
3625            """\
3626----ACGTAGCATCAGC
3627----|||||||||||||
3628CCCCACGTAGCATCAGC
3629""",
3630        )
3631        self.assertEqual(alignment.shape, (2, 17))
3632        self.assertEqual(
3633            alignment.format("psl"),
3634            """\
363513	0	0	0	0	0	0	0	+	query	17	4	17	target	13	0	13	1	13,	4,	0,
3636""",
3637        )
3638        self.assertEqual(
3639            alignment.format("bed"),
3640            """\
3641target	0	13	query	13.0	+	0	13	0	1	13,	0,
3642""",
3643        )
3644        self.assertEqual(
3645            alignment.format("sam"),
3646            """\
3647query	0	target	1	255	4S13M	*	0	0	CCCCACGTAGCATCAGC	*	AS:i:13
3648""",
3649        )
3650        alignments = aligner.align(
3651            "ACGTAGCATCAGC", reverse_complement("CCCCACGTAGCATCAGC"), strand="-"
3652        )
3653        self.assertEqual(len(alignments), 1)
3654        self.assertAlmostEqual(alignments.score, 13.0)
3655        alignment = alignments[0]
3656        self.assertEqual(
3657            str(alignment),
3658            """\
3659----ACGTAGCATCAGC
3660----|||||||||||||
3661CCCCACGTAGCATCAGC
3662""",
3663        )
3664        self.assertEqual(alignment.shape, (2, 17))
3665        self.assertEqual(
3666            alignment.format("psl"),
3667            """\
366813	0	0	0	0	0	0	0	-	query	17	0	13	target	13	0	13	1	13,	4,	0,
3669""",
3670        )
3671        self.assertEqual(
3672            alignment.format("bed"),
3673            """\
3674target	0	13	query	13.0	-	0	13	0	1	13,	0,
3675""",
3676        )
3677        self.assertEqual(
3678            alignment.format("sam"),
3679            """\
3680query	16	target	1	255	4S13M	*	0	0	CCCCACGTAGCATCAGC	*	AS:i:13
3681""",
3682        )
3683        alignments = aligner.align("CCCCACGTAGCATCAGC", "ACGTAGCATCAGC")
3684        self.assertEqual(len(alignments), 1)
3685        alignment = alignments[0]
3686        self.assertAlmostEqual(alignment.score, 13.0)
3687        self.assertEqual(
3688            str(alignment),
3689            """\
3690CCCCACGTAGCATCAGC
3691----|||||||||||||
3692----ACGTAGCATCAGC
3693""",
3694        )
3695        self.assertEqual(alignment.shape, (2, 17))
3696        self.assertEqual(
3697            alignment.format("psl"),
3698            """\
369913	0	0	0	0	0	0	0	+	query	13	0	13	target	17	4	17	1	13,	0,	4,
3700""",
3701        )
3702        self.assertEqual(
3703            alignment.format("bed"),
3704            """\
3705target	4	17	query	13.0	+	4	17	0	1	13,	0,
3706""",
3707        )
3708        self.assertEqual(
3709            alignment.format("sam"),
3710            """\
3711query	0	target	5	255	13M	*	0	0	ACGTAGCATCAGC	*	AS:i:13
3712""",
3713        )
3714        alignments = aligner.align(
3715            "CCCCACGTAGCATCAGC", reverse_complement("ACGTAGCATCAGC"), strand="-"
3716        )
3717        self.assertEqual(len(alignments), 1)
3718        alignment = alignments[0]
3719        self.assertAlmostEqual(alignment.score, 13.0)
3720        self.assertEqual(
3721            str(alignment),
3722            """\
3723CCCCACGTAGCATCAGC
3724----|||||||||||||
3725----ACGTAGCATCAGC
3726""",
3727        )
3728        self.assertEqual(alignment.shape, (2, 17))
3729        self.assertEqual(
3730            alignment.format("psl"),
3731            """\
373213	0	0	0	0	0	0	0	-	query	13	0	13	target	17	4	17	1	13,	0,	4,
3733""",
3734        )
3735        self.assertEqual(
3736            alignment.format("bed"),
3737            """\
3738target	4	17	query	13.0	-	4	17	0	1	13,	0,
3739""",
3740        )
3741        self.assertEqual(
3742            alignment.format("sam"),
3743            """\
3744query	16	target	5	255	13M	*	0	0	ACGTAGCATCAGC	*	AS:i:13
3745""",
3746        )
3747        alignments = aligner.align("ACGTAGCATCAGC", "ACGTAGCATCAGCGGGG")
3748        self.assertEqual(len(alignments), 1)
3749        alignment = alignments[0]
3750        self.assertEqual(
3751            str(alignment),
3752            """\
3753ACGTAGCATCAGC----
3754|||||||||||||----
3755ACGTAGCATCAGCGGGG
3756""",
3757        )
3758        self.assertEqual(alignment.shape, (2, 17))
3759        self.assertEqual(
3760            alignment.format("psl"),
3761            """\
376213	0	0	0	0	0	0	0	+	query	17	0	13	target	13	0	13	1	13,	0,	0,
3763""",
3764        )
3765        self.assertEqual(
3766            alignment.format("bed"),
3767            """\
3768target	0	13	query	13.0	+	0	13	0	1	13,	0,
3769""",
3770        )
3771        self.assertEqual(
3772            alignment.format("sam"),
3773            """\
3774query	0	target	1	255	13M4S	*	0	0	ACGTAGCATCAGCGGGG	*	AS:i:13
3775""",
3776        )
3777        alignments = aligner.align(
3778            "ACGTAGCATCAGC", reverse_complement("ACGTAGCATCAGCGGGG"), strand="-"
3779        )
3780        self.assertEqual(len(alignments), 1)
3781        alignment = alignments[0]
3782        self.assertEqual(
3783            str(alignment),
3784            """\
3785ACGTAGCATCAGC----
3786|||||||||||||----
3787ACGTAGCATCAGCGGGG
3788""",
3789        )
3790        self.assertEqual(alignment.shape, (2, 17))
3791        self.assertEqual(
3792            alignment.format("psl"),
3793            """\
379413	0	0	0	0	0	0	0	-	query	17	4	17	target	13	0	13	1	13,	0,	0,
3795""",
3796        )
3797        self.assertEqual(
3798            alignment.format("bed"),
3799            """\
3800target	0	13	query	13.0	-	0	13	0	1	13,	0,
3801""",
3802        )
3803        self.assertEqual(
3804            alignment.format("sam"),
3805            """\
3806query	16	target	1	255	13M4S	*	0	0	ACGTAGCATCAGCGGGG	*	AS:i:13
3807""",
3808        )
3809        alignments = aligner.align("ACGTAGCATCAGCGGGG", "ACGTAGCATCAGC")
3810        self.assertEqual(len(alignments), 1)
3811        alignment = alignments[0]
3812        self.assertAlmostEqual(alignment.score, 13.0)
3813        self.assertEqual(
3814            str(alignment),
3815            """\
3816ACGTAGCATCAGCGGGG
3817|||||||||||||----
3818ACGTAGCATCAGC----
3819""",
3820        )
3821        self.assertEqual(alignment.shape, (2, 17))
3822        self.assertEqual(
3823            alignment.format("psl"),
3824            """\
382513	0	0	0	0	0	0	0	+	query	13	0	13	target	17	0	13	1	13,	0,	0,
3826""",
3827        )
3828        self.assertEqual(
3829            alignment.format("bed"),
3830            """\
3831target	0	13	query	13.0	+	0	13	0	1	13,	0,
3832""",
3833        )
3834        self.assertEqual(
3835            alignment.format("sam"),
3836            """\
3837query	0	target	1	255	13M	*	0	0	ACGTAGCATCAGC	*	AS:i:13
3838""",
3839        )
3840        alignments = aligner.align(
3841            "ACGTAGCATCAGCGGGG", reverse_complement("ACGTAGCATCAGC"), strand="-"
3842        )
3843        self.assertEqual(len(alignments), 1)
3844        alignment = alignments[0]
3845        self.assertAlmostEqual(alignment.score, 13.0)
3846        self.assertEqual(
3847            str(alignment),
3848            """\
3849ACGTAGCATCAGCGGGG
3850|||||||||||||----
3851ACGTAGCATCAGC----
3852""",
3853        )
3854        self.assertEqual(alignment.shape, (2, 17))
3855        self.assertEqual(
3856            alignment.format("psl"),
3857            """\
385813	0	0	0	0	0	0	0	-	query	13	0	13	target	17	0	13	1	13,	0,	0,
3859""",
3860        )
3861        self.assertEqual(
3862            alignment.format("bed"),
3863            """\
3864target	0	13	query	13.0	-	0	13	0	1	13,	0,
3865""",
3866        )
3867        self.assertEqual(
3868            alignment.format("sam"),
3869            """\
3870query	16	target	1	255	13M	*	0	0	ACGTAGCATCAGC	*	AS:i:13
3871""",
3872        )
3873
3874    def test_alignment_wildcard(self):
3875        aligner = Align.PairwiseAligner()
3876        aligner.gap_score = -10
3877        aligner.end_gap_score = 0
3878        aligner.mismatch = -2
3879        aligner.wildcard = "N"
3880        target = "TTTTTNACGCTCGAGCAGCTACG"
3881        query = "ACGATCGAGCNGCTACGCCCNC"
3882        # use strings for target and query
3883        alignments = aligner.align(target, query)
3884        self.assertEqual(len(alignments), 1)
3885        alignment = alignments[0]
3886        self.assertEqual(
3887            str(alignment),
3888            """\
3889TTTTTNACGCTCGAGCAGCTACG-----
3890------|||.||||||.||||||-----
3891------ACGATCGAGCNGCTACGCCCNC
3892""",
3893        )
3894        self.assertEqual(alignment.shape, (2, 28))
3895        self.assertEqual(
3896            alignment.format("psl"),
3897            """\
389815	1	0	1	0	0	0	0	+	query	22	0	17	target	23	6	23	1	17,	0,	6,
3899""",
3900        )
3901        self.assertEqual(
3902            alignment.format("bed"),
3903            """\
3904target	6	23	query	13.0	+	6	23	0	1	17,	0,
3905""",
3906        )
3907        self.assertEqual(
3908            alignment.format("sam"),
3909            """\
3910query	0	target	7	255	17M5S	*	0	0	ACGATCGAGCNGCTACGCCCNC	*	AS:i:13
3911""",
3912        )
3913        alignments = aligner.align(target, reverse_complement(query), strand="-")
3914        self.assertEqual(len(alignments), 1)
3915        alignment = alignments[0]
3916        self.assertEqual(
3917            str(alignment),
3918            """\
3919TTTTTNACGCTCGAGCAGCTACG-----
3920------|||.||||||.||||||-----
3921------ACGATCGAGCNGCTACGCCCNC
3922""",
3923        )
3924        self.assertEqual(alignment.shape, (2, 28))
3925        self.assertEqual(
3926            alignment.format("psl"),
3927            """\
392815	1	0	1	0	0	0	0	-	query	22	5	22	target	23	6	23	1	17,	0,	6,
3929""",
3930        )
3931        self.assertEqual(
3932            alignment.format("bed"),
3933            """\
3934target	6	23	query	13.0	-	6	23	0	1	17,	0,
3935""",
3936        )
3937        self.assertEqual(
3938            alignment.format("sam"),
3939            """\
3940query	16	target	7	255	17M5S	*	0	0	ACGATCGAGCNGCTACGCCCNC	*	AS:i:13
3941""",
3942        )
3943        # use Seq objects for target and query
3944        alignments = aligner.align(Seq(target), Seq(query))
3945        self.assertEqual(len(alignments), 1)
3946        alignment = alignments[0]
3947        self.assertEqual(
3948            str(alignment),
3949            """\
3950TTTTTNACGCTCGAGCAGCTACG-----
3951------|||.||||||.||||||-----
3952------ACGATCGAGCNGCTACGCCCNC
3953""",
3954        )
3955        self.assertEqual(alignment.shape, (2, 28))
3956        self.assertEqual(
3957            alignment.format("psl"),
3958            """\
395915	1	0	1	0	0	0	0	+	query	22	0	17	target	23	6	23	1	17,	0,	6,
3960""",
3961        )
3962        self.assertEqual(
3963            alignment.format("bed"),
3964            """\
3965target	6	23	query	13.0	+	6	23	0	1	17,	0,
3966""",
3967        )
3968        self.assertEqual(
3969            alignment.format("sam"),
3970            """\
3971query	0	target	7	255	17M5S	*	0	0	ACGATCGAGCNGCTACGCCCNC	*	AS:i:13
3972""",
3973        )
3974        alignments = aligner.align(
3975            Seq(target), Seq(query).reverse_complement(), strand="-"
3976        )
3977        self.assertEqual(len(alignments), 1)
3978        alignment = alignments[0]
3979        self.assertEqual(
3980            str(alignment),
3981            """\
3982TTTTTNACGCTCGAGCAGCTACG-----
3983------|||.||||||.||||||-----
3984------ACGATCGAGCNGCTACGCCCNC
3985""",
3986        )
3987        self.assertEqual(alignment.shape, (2, 28))
3988        self.assertEqual(
3989            alignment.format("psl"),
3990            """\
399115	1	0	1	0	0	0	0	-	query	22	5	22	target	23	6	23	1	17,	0,	6,
3992""",
3993        )
3994        self.assertEqual(
3995            alignment.format("bed"),
3996            """\
3997target	6	23	query	13.0	-	6	23	0	1	17,	0,
3998""",
3999        )
4000        self.assertEqual(
4001            alignment.format("sam"),
4002            """\
4003query	16	target	7	255	17M5S	*	0	0	ACGATCGAGCNGCTACGCCCNC	*	AS:i:13
4004""",
4005        )
4006
4007
4008class TestAlignmentMethods(unittest.TestCase):
4009    def test_indexing_slicing(self):
4010        aligner = Align.PairwiseAligner()
4011        alignments = aligner.align("AACCGGGACCG", "ACGGAAC")
4012        self.assertEqual(len(alignments), 88)
4013        alignment = alignments[0]
4014        self.assertEqual(
4015            str(alignment),
4016            """\
4017AACCGGGA-CCG
4018|-|-||-|-|--
4019A-C-GG-AAC--
4020""",
4021        )
4022        self.assertAlmostEqual(alignment.score, 6.0)
4023        self.assertAlmostEqual(alignment[:, :].score, 6.0)
4024        self.assertEqual(
4025            str(alignment[:, :]),
4026            """\
4027AACCGGGA-CCG
4028|-|-||-|-|--
4029A-C-GG-AAC--
4030""",
4031        )
4032        self.assertAlmostEqual(alignment[:, 0:].score, 6.0)
4033        self.assertEqual(
4034            str(alignment[:, 0:]),
4035            """\
4036AACCGGGA-CCG
4037|-|-||-|-|--
4038A-C-GG-AAC--
4039""",
4040        )
4041        self.assertAlmostEqual(alignment[:, :12].score, 6.0)
4042        self.assertEqual(
4043            str(alignment[:, :12]),
4044            """\
4045AACCGGGA-CCG
4046|-|-||-|-|--
4047A-C-GG-AAC--
4048""",
4049        )
4050        self.assertAlmostEqual(alignment[:, 0:12].score, 6.0)
4051        self.assertEqual(
4052            str(alignment[:, 0:12]),
4053            """\
4054AACCGGGA-CCG
4055|-|-||-|-|--
4056A-C-GG-AAC--
4057""",
4058        )
4059        self.assertIsNone(alignment[:, 1:].score)
4060        self.assertEqual(
4061            str(alignment[:, 1:]),
4062            """\
4063AACCGGGA-CCG
4064 -|-||-|-|--
4065A-C-GG-AAC--
4066""",
4067        )
4068        self.assertIsNone(alignment[:, 2:].score)
4069        self.assertEqual(
4070            str(alignment[:, 2:]),
4071            """\
4072AACCGGGA-CCG
4073  |-||-|-|--
4074 AC-GG-AAC--
4075""",
4076        )
4077        self.assertIsNone(alignment[:, 3:].score)
4078        self.assertEqual(
4079            str(alignment[:, 3:]),
4080            """\
4081AACCGGGA-CCG
4082   -||-|-|--
4083 AC-GG-AAC--
4084""",
4085        )
4086        self.assertIsNone(alignment[:, 4:].score)
4087        self.assertEqual(
4088            str(alignment[:, 4:]),
4089            """\
4090AACCGGGA-CCG
4091    ||-|-|--
4092  ACGG-AAC--
4093""",
4094        )
4095        self.assertIsNone(alignment[:, :-1].score)
4096        self.assertEqual(
4097            str(alignment[:, :-1]),
4098            """\
4099AACCGGGA-CCG
4100|-|-||-|-|-
4101A-C-GG-AAC-
4102""",
4103        )
4104        self.assertIsNone(alignment[:, :-2].score)
4105        self.assertEqual(
4106            str(alignment[:, :-2]),
4107            """\
4108AACCGGGA-CCG
4109|-|-||-|-|
4110A-C-GG-AAC
4111""",
4112        )
4113        self.assertIsNone(alignment[:, :-3].score)
4114        self.assertEqual(
4115            str(alignment[:, :-3]),
4116            """\
4117AACCGGGA-CCG
4118|-|-||-|-
4119A-C-GG-AAC
4120""",
4121        )
4122        self.assertIsNone(alignment[:, 1:-1].score)
4123        self.assertEqual(
4124            str(alignment[:, 1:-1]),
4125            """\
4126AACCGGGA-CCG
4127 -|-||-|-|-
4128A-C-GG-AAC-
4129""",
4130        )
4131        self.assertIsNone(alignment[:, 1:-2].score)
4132        self.assertEqual(
4133            str(alignment[:, 1:-2]),
4134            """\
4135AACCGGGA-CCG
4136 -|-||-|-|
4137A-C-GG-AAC
4138""",
4139        )
4140        self.assertIsNone(alignment[:, 2:-1].score)
4141        self.assertEqual(
4142            str(alignment[:, 2:-1]),
4143            """\
4144AACCGGGA-CCG
4145  |-||-|-|-
4146 AC-GG-AAC-
4147""",
4148        )
4149        self.assertIsNone(alignment[:, 2:-2].score)
4150        self.assertEqual(
4151            str(alignment[:, 2:-2]),
4152            """\
4153AACCGGGA-CCG
4154  |-||-|-|
4155 AC-GG-AAC
4156""",
4157        )
4158        with self.assertRaises(NotImplementedError):
4159            alignment[:1]
4160        with self.assertRaises(NotImplementedError):
4161            alignment[0]
4162        with self.assertRaises(NotImplementedError):
4163            alignment[0, :]
4164        with self.assertRaises(NotImplementedError):
4165            alignment[:1, :]
4166        with self.assertRaises(NotImplementedError):
4167            alignment[:, 0]
4168        with self.assertRaises(NotImplementedError):
4169            alignment[:, ::3]
4170
4171    def test_sort(self):
4172        aligner = Align.PairwiseAligner()
4173        aligner.gap_score = -1
4174        target = Seq("ACTT")
4175        query = Seq("ACCT")
4176        alignments = aligner.align(target, query)
4177        self.assertEqual(len(alignments), 1)
4178        alignment = alignments[0]
4179        self.assertEqual(
4180            str(alignment),
4181            """\
4182ACTT
4183||.|
4184ACCT
4185""",
4186        )
4187        alignment.sort()
4188        self.assertEqual(
4189            str(alignment),
4190            """\
4191ACCT
4192||.|
4193ACTT
4194""",
4195        )
4196        alignment.sort(reverse=True)
4197        self.assertEqual(
4198            str(alignment),
4199            """\
4200ACTT
4201||.|
4202ACCT
4203""",
4204        )
4205        target.id = "seq1"
4206        query.id = "seq2"
4207        alignment.sort()
4208        self.assertEqual(
4209            str(alignment),
4210            """\
4211ACTT
4212||.|
4213ACCT
4214""",
4215        )
4216        alignment.sort(reverse=True)
4217        self.assertEqual(
4218            str(alignment),
4219            """\
4220ACCT
4221||.|
4222ACTT
4223""",
4224        )
4225        alignment.sort(key=GC)
4226        self.assertEqual(
4227            str(alignment),
4228            """\
4229ACTT
4230||.|
4231ACCT
4232""",
4233        )
4234        alignment.sort(key=GC, reverse=True)
4235        self.assertEqual(
4236            str(alignment),
4237            """\
4238ACCT
4239||.|
4240ACTT
4241""",
4242        )
4243
4244    def test_substitutions(self):
4245        aligner = Align.PairwiseAligner()
4246        path = os.path.join("Align", "ecoli.fa")
4247        record = SeqIO.read(path, "fasta")
4248        target = record.seq
4249        path = os.path.join("Align", "bsubtilis.fa")
4250        record = SeqIO.read(path, "fasta")
4251        query = record.seq
4252        # blastn default parameters:
4253        aligner.open_gap_score = -5
4254        aligner.extend_gap_score = -2
4255        aligner.match = +1
4256        aligner.mismatch = -3
4257        aligner.mode = "local"
4258        alignments = aligner.align(target, query)
4259        self.assertEqual(len(alignments), 9031680)
4260        alignment = alignments[0]
4261        m = alignment.substitutions
4262        self.assertEqual(
4263            str(m),
4264            """\
4265      A     C     G     T
4266A 191.0   3.0  15.0  13.0
4267C   5.0 186.0   9.0  14.0
4268G  12.0  11.0 248.0   8.0
4269T  11.0  19.0   6.0 145.0
4270""",
4271        )
4272        self.assertAlmostEqual(m["T", "C"], 19.0)
4273        self.assertAlmostEqual(m["C", "T"], 14.0)
4274        m += m.transpose()
4275        m /= 2.0
4276        self.assertEqual(
4277            str(m),
4278            """\
4279      A     C     G     T
4280A 191.0   4.0  13.5  12.0
4281C   4.0 186.0  10.0  16.5
4282G  13.5  10.0 248.0   7.0
4283T  12.0  16.5   7.0 145.0
4284""",
4285        )
4286        self.assertAlmostEqual(m["C", "T"], 16.5)
4287        self.assertAlmostEqual(m["T", "C"], 16.5)
4288
4289
4290if __name__ == "__main__":
4291    runner = unittest.TextTestRunner(verbosity=2)
4292    unittest.main(testRunner=runner)
4293