1# Copyright 2001 Brad Chapman. All rights reserved. 2# 3# This file is part of the Biopython distribution and governed by your 4# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 5# Please see the LICENSE file that should have been included as part of this 6# package. 7 8"""Test the HMM.MarkovModel and HMM.DynamicProgramming modules. 9 10Also tests Training methods. 11""" 12# standard modules 13 14import unittest 15import math 16 17# biopython 18from Bio.Seq import Seq 19 20 21# stuff we are testing 22from Bio.HMM import MarkovModel 23from Bio.HMM import DynamicProgramming 24from Bio.HMM import Trainer 25 26 27# create some simple alphabets 28number_alphabet = ("1", "2") # Numbers as the states of the model. 29letter_alphabet = ("A", "B") # Letters as the emissions of the model. 30 31 32class TrainingSequenceTest(unittest.TestCase): 33 """Training sequence tests.""" 34 35 def test_empty_state_training_sequence(self): 36 emission_seq = Seq("AB") 37 state_seq = () 38 training_seq = Trainer.TrainingSequence(emission_seq, state_seq) 39 self.assertEqual(training_seq.emissions, emission_seq) 40 self.assertEqual(training_seq.states, state_seq) 41 42 def test_valid_training_sequence(self): 43 emission_seq = Seq("AB") 44 state_seq = ("1", "2") 45 training_seq = Trainer.TrainingSequence(emission_seq, state_seq) 46 self.assertEqual(training_seq.emissions, emission_seq) 47 self.assertEqual(training_seq.states, state_seq) 48 49 def test_invalid_training_sequence(self): 50 emission_seq = Seq("AB") 51 state_seq = ("1",) 52 with self.assertRaises(ValueError): 53 Trainer.TrainingSequence(emission_seq, state_seq) 54 55 56class MarkovModelBuilderTest(unittest.TestCase): 57 """Markov Model builder tests.""" 58 59 def setUp(self): 60 self.mm_builder = MarkovModel.MarkovModelBuilder( 61 number_alphabet, letter_alphabet 62 ) 63 64 def test_test_initialize(self): 65 """Making sure MarkovModelBuilder is initialized correctly.""" 66 expected_transition_prob = {} 67 expected_transition_pseudo = {} 68 69 expected_emission_prob = { 70 ("2", "A"): 0, 71 ("1", "A"): 0, 72 ("1", "B"): 0, 73 ("2", "B"): 0, 74 } 75 expected_emission_pseudo = { 76 ("2", "A"): 1, 77 ("1", "A"): 1, 78 ("1", "B"): 1, 79 ("2", "B"): 1, 80 } 81 82 assertions = [] 83 self.assertEqual(self.mm_builder.transition_prob, expected_transition_prob) 84 self.assertEqual(self.mm_builder.transition_pseudo, expected_transition_pseudo) 85 self.assertEqual(self.mm_builder.emission_prob, expected_emission_prob) 86 self.assertEqual(self.mm_builder.emission_pseudo, expected_emission_pseudo) 87 88 def test_allow_all_transitions(self): 89 """Testing allow_all_transitions.""" 90 self.mm_builder.allow_all_transitions() 91 92 expected_prob = {("2", "1"): 0, ("1", "1"): 0, ("1", "2"): 0, ("2", "2"): 0} 93 94 expected_pseudo = {("2", "1"): 1, ("1", "1"): 1, ("1", "2"): 1, ("2", "2"): 1} 95 96 self.assertEqual(self.mm_builder.transition_prob, expected_prob) 97 98 self.assertEqual(self.mm_builder.transition_pseudo, expected_pseudo) 99 100 def test_set_initial_probabilities(self): 101 self.mm_builder.set_initial_probabilities({}) 102 self.assertEqual(self.mm_builder.initial_prob, {"1": 0.5, "2": 0.5}) 103 104 # initial probability sum > 1, should raise an exception 105 self.assertRaises( 106 Exception, self.mm_builder.set_initial_probabilities, {"1": 0.6, "2": 0.5} 107 ) 108 109 # referencing invalid states should raise an exception 110 self.assertRaises( 111 Exception, self.mm_builder.set_initial_probabilities, {"666": 0.1} 112 ) 113 114 self.mm_builder.set_initial_probabilities({"1": 0.2}) 115 self.assertEqual(self.mm_builder.initial_prob, {"1": 0.2, "2": 0.8}) 116 117 self.mm_builder.set_initial_probabilities({"1": 0.9, "2": 0.1}) 118 self.assertEqual(self.mm_builder.initial_prob, {"1": 0.9, "2": 0.1}) 119 120 def test_set_equal_probabilities(self): 121 self.mm_builder.allow_transition("1", "2", 0.05) 122 self.mm_builder.allow_transition("2", "1", 0.95) 123 self.mm_builder.set_equal_probabilities() 124 125 self.assertEqual( 126 self.mm_builder.initial_prob, {"1": 0.5, "2": 0.5}, 127 ) 128 self.assertEqual( 129 self.mm_builder.transition_prob, {("1", "2"): 0.5, ("2", "1"): 0.5} 130 ) 131 self.assertEqual( 132 self.mm_builder.emission_prob, 133 {("2", "A"): 0.25, ("1", "B"): 0.25, ("1", "A"): 0.25, ("2", "B"): 0.25}, 134 ) 135 136 def test_set_random_probabilities(self): 137 self.mm_builder.allow_transition("1", "2", 0.05) 138 self.mm_builder.allow_transition("2", "1", 0.95) 139 self.mm_builder.set_random_probabilities() 140 141 self.assertEqual( 142 len(self.mm_builder.initial_prob), len(self.mm_builder._state_alphabet) 143 ) 144 # To test this more thoroughly, perhaps mock random.random() and 145 # verify that it's being called as expected? 146 147 148class HiddenMarkovModelTest(unittest.TestCase): 149 def setUp(self): 150 self.mm_builder = MarkovModel.MarkovModelBuilder( 151 number_alphabet, letter_alphabet 152 ) 153 154 def test_transitions_from(self): 155 """Testing the calculation of transitions_from.""" 156 self.mm_builder.allow_transition("1", "2", 1.0) 157 self.mm_builder.allow_transition("2", "1", 0.5) 158 self.mm_builder.allow_transition("2", "2", 0.5) 159 self.mm_builder.set_initial_probabilities({}) 160 self.mm = self.mm_builder.get_markov_model() 161 162 state_1 = self.mm.transitions_from("1") 163 expected_state_1 = ["2"] 164 state_1.sort() 165 expected_state_1.sort() 166 self.assertEqual(state_1, expected_state_1) 167 168 state_2 = self.mm.transitions_from("2") 169 expected_state_2 = ["1", "2"] 170 state_2.sort() 171 expected_state_2.sort() 172 self.assertEqual(state_2, expected_state_2) 173 174 fake_state = self.mm.transitions_from("Fake") 175 expected_fake_state = [] 176 self.assertEqual(fake_state, expected_fake_state) 177 178 def test_transitions_to(self): 179 """Testing the calculation of transitions_to.""" 180 self.mm_builder.allow_transition("1", "1", 0.5) 181 self.mm_builder.allow_transition("1", "2", 0.5) 182 self.mm_builder.allow_transition("2", "1", 1.0) 183 self.mm_builder.set_initial_probabilities({}) 184 self.mm = self.mm_builder.get_markov_model() 185 186 state_1 = self.mm.transitions_to("1") 187 expected_state_1 = ["1", "2"] 188 state_1.sort() 189 expected_state_1.sort() 190 self.assertEqual(state_1, expected_state_1) 191 192 state_2 = self.mm.transitions_to("2") 193 expected_state_2 = ["1"] 194 state_2.sort() 195 expected_state_2.sort() 196 self.assertEqual(state_2, expected_state_2) 197 198 fake_state = self.mm.transitions_to("Fake") 199 expected_fake_state = [] 200 self.assertEqual(fake_state, expected_fake_state) 201 202 def test_allow_transition(self): 203 """Testing allow_transition.""" 204 self.mm_builder.allow_transition("1", "2", 1.0) 205 self.mm_builder.set_initial_probabilities({}) 206 self.mm = self.mm_builder.get_markov_model() 207 208 state_1 = self.mm.transitions_from("1") 209 expected_state_1 = ["2"] 210 state_1.sort() 211 expected_state_1.sort() 212 self.assertEqual(state_1, expected_state_1) 213 214 state_2 = self.mm.transitions_from("2") 215 expected_state_2 = [] 216 state_2.sort() 217 expected_state_2.sort() 218 self.assertEqual(state_2, expected_state_2) 219 220 state_1 = self.mm.transitions_to("1") 221 expected_state_1 = [] 222 state_1.sort() 223 expected_state_1.sort() 224 self.assertEqual(state_1, expected_state_1) 225 226 state_2 = self.mm.transitions_to("2") 227 expected_state_2 = ["1"] 228 state_2.sort() 229 expected_state_2.sort() 230 self.assertEqual(state_2, expected_state_2) 231 232 def test_simple_hmm(self): 233 """Test a simple model with 2 states and 2 symbols.""" 234 # set initial probabilities 235 prob_initial = [0.4, 0.6] 236 self.mm_builder.set_initial_probabilities( 237 {"1": prob_initial[0], "2": prob_initial[1]} 238 ) 239 240 # set transition probabilities 241 prob_transition = [[0.35, 0.65], [0.45, 0.55]] 242 self.mm_builder.allow_transition("1", "1", prob_transition[0][0]) 243 self.mm_builder.allow_transition("1", "2", prob_transition[0][1]) 244 self.mm_builder.allow_transition("2", "1", prob_transition[1][0]) 245 self.mm_builder.allow_transition("2", "2", prob_transition[1][1]) 246 247 # set emission probabilities 248 prob_emission = [[0.45, 0.55], [0.75, 0.25]] 249 self.mm_builder.set_emission_score("1", "A", prob_emission[0][0]) 250 self.mm_builder.set_emission_score("1", "B", prob_emission[0][1]) 251 self.mm_builder.set_emission_score("2", "A", prob_emission[1][0]) 252 self.mm_builder.set_emission_score("2", "B", prob_emission[1][1]) 253 254 # Check all two letter sequences using a brute force calculation 255 model = self.mm_builder.get_markov_model() 256 for first_letter in letter_alphabet: 257 for second_letter in letter_alphabet: 258 observed_emissions = [first_letter, second_letter] 259 viterbi = model.viterbi(observed_emissions, number_alphabet) 260 self._checkSimpleHmm( 261 prob_initial, 262 prob_transition, 263 prob_emission, 264 viterbi, 265 observed_emissions, 266 ) 267 268 def _checkSimpleHmm( 269 self, prob_initial, prob_transition, prob_emission, viterbi, observed_emissions 270 ): 271 max_prob = 0 272 273 # expected first and second states in the sequence, calculated below 274 seq_first_state = None 275 seq_second_state = None 276 277 # convert the observed letters 'A' or 'B' into 0 or 1 278 letter1 = ord(observed_emissions[0]) - ord("A") 279 letter2 = ord(observed_emissions[1]) - ord("A") 280 281 for first_state in number_alphabet: 282 for second_state in number_alphabet: 283 # compute the probability of the state sequence first_state, 284 # second_state emitting the observed_emissions 285 state1 = ord(first_state) - ord("1") 286 state2 = ord(second_state) - ord("1") 287 prob = ( 288 prob_initial[state1] 289 * prob_emission[state1][letter1] 290 * prob_transition[state1][state2] 291 * prob_emission[state2][letter2] 292 ) 293 if prob > max_prob: 294 seq_first_state = first_state 295 seq_second_state = second_state 296 max_prob = prob 297 298 max_prob = math.log(max_prob) 299 seq = viterbi[0] 300 prob = viterbi[1] 301 self.assertEqual(seq, seq_first_state + seq_second_state) 302 self.assertAlmostEqual(prob, max_prob, 11) 303 304 def test_non_ergodic(self): 305 """Non-ergodic model (meaning that some transitions are not allowed).""" 306 # make state '1' the initial state 307 prob_1_initial = 1.0 308 self.mm_builder.set_initial_probabilities({"1": prob_1_initial}) 309 310 # probabilities of transitioning from state 1 to 1, and 1 to 2 311 prob_1_to_1 = 0.5 312 prob_1_to_2 = 0.5 313 314 # set up allowed transitions 315 self.mm_builder.allow_transition("1", "1", prob_1_to_1) 316 self.mm_builder.allow_transition("1", "2", prob_1_to_2) 317 318 # Emission probabilities 319 # In state 1 the most likely emission is A, in state 2 the most 320 # likely emission is B. (Would be simpler just to use 1.0 and 0.0 321 # emission probabilities here, but the algorithm blows up on zero 322 # probabilities because of the conversion to log space.) 323 prob_1_A = 0.95 324 prob_1_B = 0.05 325 prob_2_A = 0.05 326 prob_2_B = 0.95 327 328 # set emission probabilities 329 self.mm_builder.set_emission_score("1", "A", prob_1_A) 330 self.mm_builder.set_emission_score("1", "B", prob_1_B) 331 self.mm_builder.set_emission_score("2", "A", prob_2_A) 332 self.mm_builder.set_emission_score("2", "B", prob_2_B) 333 334 # run the Viterbi algorithm to find the most probable state path 335 model = self.mm_builder.get_markov_model() 336 observed_emissions = ["A", "B"] 337 viterbi = model.viterbi(observed_emissions, number_alphabet) 338 seq = viterbi[0] 339 prob = viterbi[1] 340 341 # the most probable path must be from state 1 to state 2 342 self.assertEqual(seq, "12") 343 344 # The probability of that path is the probability of starting in 345 # state 1, then emitting an A, then transitioning 1 -> 2, then 346 # emitting a B. 347 # Note that probabilities are converted into log space. 348 expected_prob = ( 349 math.log(prob_1_initial) 350 + math.log(prob_1_A) 351 + math.log(prob_1_to_2) 352 + math.log(prob_2_B) 353 ) 354 self.assertEqual(prob, expected_prob) 355 356 357class ScaledDPAlgorithmsTest(unittest.TestCase): 358 def setUp(self): 359 # set up our Markov Model 360 mm_builder = MarkovModel.MarkovModelBuilder(number_alphabet, letter_alphabet) 361 mm_builder.allow_all_transitions() 362 mm_builder.set_equal_probabilities() 363 364 mm = mm_builder.get_markov_model() 365 366 # now set up a test sequence 367 emission_seq = Seq("ABB") 368 state_seq = () 369 training_seq = Trainer.TrainingSequence(emission_seq, state_seq) 370 371 # finally set up the DP 372 self.dp = DynamicProgramming.ScaledDPAlgorithms(mm, training_seq) 373 374 def test_calculate_s_value(self): 375 """Testing the calculation of s values.""" 376 previous_vars = {("1", 0): 0.5, ("2", 0): 0.7} 377 s_value = self.dp._calculate_s_value(1, previous_vars) 378 379 380class AbstractTrainerTest(unittest.TestCase): 381 def setUp(self): 382 # set up a bogus HMM and our trainer 383 hmm = MarkovModel.HiddenMarkovModel((), (), {}, {}, {}, {}, {}) 384 self.test_trainer = Trainer.AbstractTrainer(hmm) 385 386 def test_ml_estimator(self): 387 """Test the maximum likelihood estimator for simple cases.""" 388 # set up a simple dictionary 389 counts = { 390 ("A", "A"): 10, 391 ("A", "B"): 20, 392 ("A", "C"): 15, 393 ("B", "B"): 5, 394 ("C", "A"): 15, 395 ("C", "C"): 10, 396 } 397 398 results = self.test_trainer.ml_estimator(counts) 399 400 # now make sure we are getting back the right thing 401 result_tests = [] 402 result_tests.append([("A", "A"), float(10) / float(45)]) 403 result_tests.append([("A", "B"), float(20) / float(45)]) 404 result_tests.append([("A", "C"), float(15) / float(45)]) 405 result_tests.append([("B", "B"), float(5) / float(5)]) 406 result_tests.append([("C", "A"), float(15) / float(25)]) 407 result_tests.append([("C", "C"), float(10) / float(25)]) 408 409 for test_result in result_tests: 410 self.assertEqual(results[test_result[0]], test_result[1]) 411 412 def test_log_likelihood(self): 413 """Calculate log likelihood.""" 414 probs = [0.25, 0.13, 0.12, 0.17] 415 416 log_prob = self.test_trainer.log_likelihood(probs) 417 expected_log_prob = -7.31873556778 418 self.assertAlmostEqual(expected_log_prob, log_prob) 419 420 421# run the tests 422if __name__ == "__main__": 423 runner = unittest.TextTestRunner(verbosity=2) 424 unittest.main(testRunner=runner) 425