1# Copyright 2001 Brad Chapman.  All rights reserved.
2#
3# This file is part of the Biopython distribution and governed by your
4# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
5# Please see the LICENSE file that should have been included as part of this
6# package.
7
8"""Test the HMM.MarkovModel and HMM.DynamicProgramming modules.
9
10Also tests Training methods.
11"""
12# standard modules
13
14import unittest
15import math
16
17# biopython
18from Bio.Seq import Seq
19
20
21# stuff we are testing
22from Bio.HMM import MarkovModel
23from Bio.HMM import DynamicProgramming
24from Bio.HMM import Trainer
25
26
27# create some simple alphabets
28number_alphabet = ("1", "2")  # Numbers as the states of the model.
29letter_alphabet = ("A", "B")  # Letters as the emissions of the model.
30
31
32class TrainingSequenceTest(unittest.TestCase):
33    """Training sequence tests."""
34
35    def test_empty_state_training_sequence(self):
36        emission_seq = Seq("AB")
37        state_seq = ()
38        training_seq = Trainer.TrainingSequence(emission_seq, state_seq)
39        self.assertEqual(training_seq.emissions, emission_seq)
40        self.assertEqual(training_seq.states, state_seq)
41
42    def test_valid_training_sequence(self):
43        emission_seq = Seq("AB")
44        state_seq = ("1", "2")
45        training_seq = Trainer.TrainingSequence(emission_seq, state_seq)
46        self.assertEqual(training_seq.emissions, emission_seq)
47        self.assertEqual(training_seq.states, state_seq)
48
49    def test_invalid_training_sequence(self):
50        emission_seq = Seq("AB")
51        state_seq = ("1",)
52        with self.assertRaises(ValueError):
53            Trainer.TrainingSequence(emission_seq, state_seq)
54
55
56class MarkovModelBuilderTest(unittest.TestCase):
57    """Markov Model builder tests."""
58
59    def setUp(self):
60        self.mm_builder = MarkovModel.MarkovModelBuilder(
61            number_alphabet, letter_alphabet
62        )
63
64    def test_test_initialize(self):
65        """Making sure MarkovModelBuilder is initialized correctly."""
66        expected_transition_prob = {}
67        expected_transition_pseudo = {}
68
69        expected_emission_prob = {
70            ("2", "A"): 0,
71            ("1", "A"): 0,
72            ("1", "B"): 0,
73            ("2", "B"): 0,
74        }
75        expected_emission_pseudo = {
76            ("2", "A"): 1,
77            ("1", "A"): 1,
78            ("1", "B"): 1,
79            ("2", "B"): 1,
80        }
81
82        assertions = []
83        self.assertEqual(self.mm_builder.transition_prob, expected_transition_prob)
84        self.assertEqual(self.mm_builder.transition_pseudo, expected_transition_pseudo)
85        self.assertEqual(self.mm_builder.emission_prob, expected_emission_prob)
86        self.assertEqual(self.mm_builder.emission_pseudo, expected_emission_pseudo)
87
88    def test_allow_all_transitions(self):
89        """Testing allow_all_transitions."""
90        self.mm_builder.allow_all_transitions()
91
92        expected_prob = {("2", "1"): 0, ("1", "1"): 0, ("1", "2"): 0, ("2", "2"): 0}
93
94        expected_pseudo = {("2", "1"): 1, ("1", "1"): 1, ("1", "2"): 1, ("2", "2"): 1}
95
96        self.assertEqual(self.mm_builder.transition_prob, expected_prob)
97
98        self.assertEqual(self.mm_builder.transition_pseudo, expected_pseudo)
99
100    def test_set_initial_probabilities(self):
101        self.mm_builder.set_initial_probabilities({})
102        self.assertEqual(self.mm_builder.initial_prob, {"1": 0.5, "2": 0.5})
103
104        # initial probability sum > 1, should raise an exception
105        self.assertRaises(
106            Exception, self.mm_builder.set_initial_probabilities, {"1": 0.6, "2": 0.5}
107        )
108
109        # referencing invalid states should raise an exception
110        self.assertRaises(
111            Exception, self.mm_builder.set_initial_probabilities, {"666": 0.1}
112        )
113
114        self.mm_builder.set_initial_probabilities({"1": 0.2})
115        self.assertEqual(self.mm_builder.initial_prob, {"1": 0.2, "2": 0.8})
116
117        self.mm_builder.set_initial_probabilities({"1": 0.9, "2": 0.1})
118        self.assertEqual(self.mm_builder.initial_prob, {"1": 0.9, "2": 0.1})
119
120    def test_set_equal_probabilities(self):
121        self.mm_builder.allow_transition("1", "2", 0.05)
122        self.mm_builder.allow_transition("2", "1", 0.95)
123        self.mm_builder.set_equal_probabilities()
124
125        self.assertEqual(
126            self.mm_builder.initial_prob, {"1": 0.5, "2": 0.5},
127        )
128        self.assertEqual(
129            self.mm_builder.transition_prob, {("1", "2"): 0.5, ("2", "1"): 0.5}
130        )
131        self.assertEqual(
132            self.mm_builder.emission_prob,
133            {("2", "A"): 0.25, ("1", "B"): 0.25, ("1", "A"): 0.25, ("2", "B"): 0.25},
134        )
135
136    def test_set_random_probabilities(self):
137        self.mm_builder.allow_transition("1", "2", 0.05)
138        self.mm_builder.allow_transition("2", "1", 0.95)
139        self.mm_builder.set_random_probabilities()
140
141        self.assertEqual(
142            len(self.mm_builder.initial_prob), len(self.mm_builder._state_alphabet)
143        )
144        # To test this more thoroughly, perhaps mock random.random() and
145        # verify that it's being called as expected?
146
147
148class HiddenMarkovModelTest(unittest.TestCase):
149    def setUp(self):
150        self.mm_builder = MarkovModel.MarkovModelBuilder(
151            number_alphabet, letter_alphabet
152        )
153
154    def test_transitions_from(self):
155        """Testing the calculation of transitions_from."""
156        self.mm_builder.allow_transition("1", "2", 1.0)
157        self.mm_builder.allow_transition("2", "1", 0.5)
158        self.mm_builder.allow_transition("2", "2", 0.5)
159        self.mm_builder.set_initial_probabilities({})
160        self.mm = self.mm_builder.get_markov_model()
161
162        state_1 = self.mm.transitions_from("1")
163        expected_state_1 = ["2"]
164        state_1.sort()
165        expected_state_1.sort()
166        self.assertEqual(state_1, expected_state_1)
167
168        state_2 = self.mm.transitions_from("2")
169        expected_state_2 = ["1", "2"]
170        state_2.sort()
171        expected_state_2.sort()
172        self.assertEqual(state_2, expected_state_2)
173
174        fake_state = self.mm.transitions_from("Fake")
175        expected_fake_state = []
176        self.assertEqual(fake_state, expected_fake_state)
177
178    def test_transitions_to(self):
179        """Testing the calculation of transitions_to."""
180        self.mm_builder.allow_transition("1", "1", 0.5)
181        self.mm_builder.allow_transition("1", "2", 0.5)
182        self.mm_builder.allow_transition("2", "1", 1.0)
183        self.mm_builder.set_initial_probabilities({})
184        self.mm = self.mm_builder.get_markov_model()
185
186        state_1 = self.mm.transitions_to("1")
187        expected_state_1 = ["1", "2"]
188        state_1.sort()
189        expected_state_1.sort()
190        self.assertEqual(state_1, expected_state_1)
191
192        state_2 = self.mm.transitions_to("2")
193        expected_state_2 = ["1"]
194        state_2.sort()
195        expected_state_2.sort()
196        self.assertEqual(state_2, expected_state_2)
197
198        fake_state = self.mm.transitions_to("Fake")
199        expected_fake_state = []
200        self.assertEqual(fake_state, expected_fake_state)
201
202    def test_allow_transition(self):
203        """Testing allow_transition."""
204        self.mm_builder.allow_transition("1", "2", 1.0)
205        self.mm_builder.set_initial_probabilities({})
206        self.mm = self.mm_builder.get_markov_model()
207
208        state_1 = self.mm.transitions_from("1")
209        expected_state_1 = ["2"]
210        state_1.sort()
211        expected_state_1.sort()
212        self.assertEqual(state_1, expected_state_1)
213
214        state_2 = self.mm.transitions_from("2")
215        expected_state_2 = []
216        state_2.sort()
217        expected_state_2.sort()
218        self.assertEqual(state_2, expected_state_2)
219
220        state_1 = self.mm.transitions_to("1")
221        expected_state_1 = []
222        state_1.sort()
223        expected_state_1.sort()
224        self.assertEqual(state_1, expected_state_1)
225
226        state_2 = self.mm.transitions_to("2")
227        expected_state_2 = ["1"]
228        state_2.sort()
229        expected_state_2.sort()
230        self.assertEqual(state_2, expected_state_2)
231
232    def test_simple_hmm(self):
233        """Test a simple model with 2 states and 2 symbols."""
234        # set initial probabilities
235        prob_initial = [0.4, 0.6]
236        self.mm_builder.set_initial_probabilities(
237            {"1": prob_initial[0], "2": prob_initial[1]}
238        )
239
240        # set transition probabilities
241        prob_transition = [[0.35, 0.65], [0.45, 0.55]]
242        self.mm_builder.allow_transition("1", "1", prob_transition[0][0])
243        self.mm_builder.allow_transition("1", "2", prob_transition[0][1])
244        self.mm_builder.allow_transition("2", "1", prob_transition[1][0])
245        self.mm_builder.allow_transition("2", "2", prob_transition[1][1])
246
247        # set emission probabilities
248        prob_emission = [[0.45, 0.55], [0.75, 0.25]]
249        self.mm_builder.set_emission_score("1", "A", prob_emission[0][0])
250        self.mm_builder.set_emission_score("1", "B", prob_emission[0][1])
251        self.mm_builder.set_emission_score("2", "A", prob_emission[1][0])
252        self.mm_builder.set_emission_score("2", "B", prob_emission[1][1])
253
254        # Check all two letter sequences using a brute force calculation
255        model = self.mm_builder.get_markov_model()
256        for first_letter in letter_alphabet:
257            for second_letter in letter_alphabet:
258                observed_emissions = [first_letter, second_letter]
259                viterbi = model.viterbi(observed_emissions, number_alphabet)
260                self._checkSimpleHmm(
261                    prob_initial,
262                    prob_transition,
263                    prob_emission,
264                    viterbi,
265                    observed_emissions,
266                )
267
268    def _checkSimpleHmm(
269        self, prob_initial, prob_transition, prob_emission, viterbi, observed_emissions
270    ):
271        max_prob = 0
272
273        # expected first and second states in the sequence, calculated below
274        seq_first_state = None
275        seq_second_state = None
276
277        # convert the observed letters 'A' or 'B' into 0 or 1
278        letter1 = ord(observed_emissions[0]) - ord("A")
279        letter2 = ord(observed_emissions[1]) - ord("A")
280
281        for first_state in number_alphabet:
282            for second_state in number_alphabet:
283                # compute the probability of the state sequence first_state,
284                # second_state emitting the observed_emissions
285                state1 = ord(first_state) - ord("1")
286                state2 = ord(second_state) - ord("1")
287                prob = (
288                    prob_initial[state1]
289                    * prob_emission[state1][letter1]
290                    * prob_transition[state1][state2]
291                    * prob_emission[state2][letter2]
292                )
293                if prob > max_prob:
294                    seq_first_state = first_state
295                    seq_second_state = second_state
296                    max_prob = prob
297
298        max_prob = math.log(max_prob)
299        seq = viterbi[0]
300        prob = viterbi[1]
301        self.assertEqual(seq, seq_first_state + seq_second_state)
302        self.assertAlmostEqual(prob, max_prob, 11)
303
304    def test_non_ergodic(self):
305        """Non-ergodic model (meaning that some transitions are not allowed)."""
306        # make state '1' the initial state
307        prob_1_initial = 1.0
308        self.mm_builder.set_initial_probabilities({"1": prob_1_initial})
309
310        # probabilities of transitioning from state 1 to 1, and 1 to 2
311        prob_1_to_1 = 0.5
312        prob_1_to_2 = 0.5
313
314        # set up allowed transitions
315        self.mm_builder.allow_transition("1", "1", prob_1_to_1)
316        self.mm_builder.allow_transition("1", "2", prob_1_to_2)
317
318        # Emission probabilities
319        # In state 1 the most likely emission is A, in state 2 the most
320        # likely emission is B. (Would be simpler just to use 1.0 and 0.0
321        # emission probabilities here, but the algorithm blows up on zero
322        # probabilities because of the conversion to log space.)
323        prob_1_A = 0.95
324        prob_1_B = 0.05
325        prob_2_A = 0.05
326        prob_2_B = 0.95
327
328        # set emission probabilities
329        self.mm_builder.set_emission_score("1", "A", prob_1_A)
330        self.mm_builder.set_emission_score("1", "B", prob_1_B)
331        self.mm_builder.set_emission_score("2", "A", prob_2_A)
332        self.mm_builder.set_emission_score("2", "B", prob_2_B)
333
334        # run the Viterbi algorithm to find the most probable state path
335        model = self.mm_builder.get_markov_model()
336        observed_emissions = ["A", "B"]
337        viterbi = model.viterbi(observed_emissions, number_alphabet)
338        seq = viterbi[0]
339        prob = viterbi[1]
340
341        # the most probable path must be from state 1 to state 2
342        self.assertEqual(seq, "12")
343
344        # The probability of that path is the probability of starting in
345        # state 1, then emitting an A, then transitioning 1 -> 2, then
346        # emitting a B.
347        # Note that probabilities are converted into log space.
348        expected_prob = (
349            math.log(prob_1_initial)
350            + math.log(prob_1_A)
351            + math.log(prob_1_to_2)
352            + math.log(prob_2_B)
353        )
354        self.assertEqual(prob, expected_prob)
355
356
357class ScaledDPAlgorithmsTest(unittest.TestCase):
358    def setUp(self):
359        # set up our Markov Model
360        mm_builder = MarkovModel.MarkovModelBuilder(number_alphabet, letter_alphabet)
361        mm_builder.allow_all_transitions()
362        mm_builder.set_equal_probabilities()
363
364        mm = mm_builder.get_markov_model()
365
366        # now set up a test sequence
367        emission_seq = Seq("ABB")
368        state_seq = ()
369        training_seq = Trainer.TrainingSequence(emission_seq, state_seq)
370
371        # finally set up the DP
372        self.dp = DynamicProgramming.ScaledDPAlgorithms(mm, training_seq)
373
374    def test_calculate_s_value(self):
375        """Testing the calculation of s values."""
376        previous_vars = {("1", 0): 0.5, ("2", 0): 0.7}
377        s_value = self.dp._calculate_s_value(1, previous_vars)
378
379
380class AbstractTrainerTest(unittest.TestCase):
381    def setUp(self):
382        # set up a bogus HMM and our trainer
383        hmm = MarkovModel.HiddenMarkovModel((), (), {}, {}, {}, {}, {})
384        self.test_trainer = Trainer.AbstractTrainer(hmm)
385
386    def test_ml_estimator(self):
387        """Test the maximum likelihood estimator for simple cases."""
388        # set up a simple dictionary
389        counts = {
390            ("A", "A"): 10,
391            ("A", "B"): 20,
392            ("A", "C"): 15,
393            ("B", "B"): 5,
394            ("C", "A"): 15,
395            ("C", "C"): 10,
396        }
397
398        results = self.test_trainer.ml_estimator(counts)
399
400        # now make sure we are getting back the right thing
401        result_tests = []
402        result_tests.append([("A", "A"), float(10) / float(45)])
403        result_tests.append([("A", "B"), float(20) / float(45)])
404        result_tests.append([("A", "C"), float(15) / float(45)])
405        result_tests.append([("B", "B"), float(5) / float(5)])
406        result_tests.append([("C", "A"), float(15) / float(25)])
407        result_tests.append([("C", "C"), float(10) / float(25)])
408
409        for test_result in result_tests:
410            self.assertEqual(results[test_result[0]], test_result[1])
411
412    def test_log_likelihood(self):
413        """Calculate log likelihood."""
414        probs = [0.25, 0.13, 0.12, 0.17]
415
416        log_prob = self.test_trainer.log_likelihood(probs)
417        expected_log_prob = -7.31873556778
418        self.assertAlmostEqual(expected_log_prob, log_prob)
419
420
421# run the tests
422if __name__ == "__main__":
423    runner = unittest.TextTestRunner(verbosity=2)
424    unittest.main(testRunner=runner)
425