1 /** 2 * @file methods/hmm/hmm.hpp 3 * @author Ryan Curtin 4 * @author Tran Quoc Long 5 * @author Michael Fox 6 * 7 * Definition of HMM class. 8 * 9 * mlpack is free software; you may redistribute it and/or modify it under the 10 * terms of the 3-clause BSD license. You should have received a copy of the 11 * 3-clause BSD license along with mlpack. If not, see 12 * http://www.opensource.org/licenses/BSD-3-Clause for more information. 13 */ 14 #ifndef MLPACK_METHODS_HMM_HMM_HPP 15 #define MLPACK_METHODS_HMM_HMM_HPP 16 17 #include <mlpack/prereqs.hpp> 18 #include <mlpack/core/dists/discrete_distribution.hpp> 19 20 namespace mlpack { 21 namespace hmm /** Hidden Markov Models. */ { 22 23 /** 24 * A class that represents a Hidden Markov Model with an arbitrary type of 25 * emission distribution. This HMM class supports training (supervised and 26 * unsupervised), prediction of state sequences via the Viterbi algorithm, 27 * estimation of state probabilities, generation of random sequences, and 28 * calculation of the log-likelihood of a given sequence. 29 * 30 * The template parameter, Distribution, specifies the distribution which the 31 * emissions follow. The class should implement the following functions: 32 * 33 * @code 34 * class Distribution 35 * { 36 * public: 37 * // The type of observation used by this distribution. 38 * typedef something DataType; 39 * 40 * // Return the probability of the given observation. 41 * double Probability(const DataType& observation) const; 42 * 43 * // Estimate the distribution based on the given observations. 44 * double Train(const std::vector<DataType>& observations); 45 * 46 * // Estimate the distribution based on the given observations, given also 47 * // the probability of each observation coming from this distribution. 48 * double Train(const std::vector<DataType>& observations, 49 * const std::vector<double>& probabilities); 50 * }; 51 * @endcode 52 * 53 * See the mlpack::distribution::DiscreteDistribution class for an example. One 54 * would use the DiscreteDistribution class when the observations are 55 * non-negative integers. Other distributions could be Gaussians, a mixture of 56 * Gaussians (GMM), or any other probability distribution implementing the 57 * four Distribution functions. 58 * 59 * Usage of the HMM class generally involves either training an HMM or loading 60 * an already-known HMM and taking probability measurements of sequences. 61 * Example code for supervised training of a Gaussian HMM (that is, where the 62 * emission output distribution is a single Gaussian for each hidden state) is 63 * given below. 64 * 65 * @code 66 * extern arma::mat observations; // Each column is an observation. 67 * extern arma::Row<size_t> states; // Hidden states for each observation. 68 * // Create an untrained HMM with 5 hidden states and default (N(0, 1)) 69 * // Gaussian distributions with the dimensionality of the dataset. 70 * HMM<GaussianDistribution> hmm(5, GaussianDistribution(observations.n_rows)); 71 * 72 * // Train the HMM (the labels could be omitted to perform unsupervised 73 * // training). 74 * hmm.Train(observations, states); 75 * @endcode 76 * 77 * Once initialized, the HMM can evaluate the probability of a certain sequence 78 * (with LogLikelihood()), predict the most likely sequence of hidden states 79 * (with Predict()), generate a sequence (with Generate()), or estimate the 80 * probabilities of each state for a sequence of observations (with Train()). 81 * 82 * @tparam Distribution Type of emission distribution for this HMM. 83 */ 84 template<typename Distribution = distribution::DiscreteDistribution> 85 class HMM 86 { 87 public: 88 /** 89 * Create the Hidden Markov Model with the given number of hidden states and 90 * the given default distribution for emissions. The dimensionality of the 91 * observations is taken from the emissions variable, so it is important that 92 * the given default emission distribution is set with the correct 93 * dimensionality. Alternately, set the dimensionality with Dimensionality(). 94 * Optionally, the tolerance for convergence of the Baum-Welch algorithm can 95 * be set. 96 * 97 * By default, the transition matrix and initial probability vector are set to 98 * contain equal probability for each state. 99 * 100 * @param states Number of states. 101 * @param emissions Default distribution for emissions. 102 * @param tolerance Tolerance for convergence of training algorithm 103 * (Baum-Welch). 104 */ 105 HMM(const size_t states = 0, 106 const Distribution emissions = Distribution(), 107 const double tolerance = 1e-5); 108 109 /** 110 * Create the Hidden Markov Model with the given initial probability vector, 111 * the given transition matrix, and the given emission distributions. The 112 * dimensionality of the observations of the HMM are taken from the given 113 * emission distributions. Alternately, the dimensionality can be set with 114 * Dimensionality(). 115 * 116 * The initial state probability vector should have length equal to the number 117 * of states, and each entry represents the probability of being in the given 118 * state at time T = 0 (the beginning of a sequence). 119 * 120 * The transition matrix should be such that T(i, j) is the probability of 121 * transition to state i from state j. The columns of the matrix should sum 122 * to 1. 123 * 124 * The emission matrix should be such that E(i, j) is the probability of 125 * emission i while in state j. The columns of the matrix should sum to 1. 126 * 127 * Optionally, the tolerance for convergence of the Baum-Welch algorithm can 128 * be set. 129 * 130 * @param initial Initial state probabilities. 131 * @param transition Transition matrix. 132 * @param emission Emission distributions. 133 * @param tolerance Tolerance for convergence of training algorithm 134 * (Baum-Welch). 135 */ 136 HMM(const arma::vec& initial, 137 const arma::mat& transition, 138 const std::vector<Distribution>& emission, 139 const double tolerance = 1e-5); 140 141 /** 142 * Train the model using the Baum-Welch algorithm, with only the given 143 * unlabeled observations. Instead of giving a guess transition and emission 144 * matrix here, do that in the constructor. Each matrix in the vector of data 145 * sequences holds an individual data sequence; each point in each individual 146 * data sequence should be a column in the matrix. The number of rows in each 147 * matrix should be equal to the dimensionality of the HMM (which is set in 148 * the constructor). 149 * 150 * It is preferable to use the other overload of Train(), with labeled data. 151 * That will produce much better results. However, if labeled data is 152 * unavailable, this will work. In addition, it is possible to use Train() 153 * with labeled data first, and then continue to train the model using this 154 * overload of Train() with unlabeled data. 155 * 156 * The tolerance of the Baum-Welch algorithm can be set either in the 157 * constructor or with the Tolerance() method. When the change in 158 * log-likelihood of the model between iterations is less than the tolerance, 159 * the Baum-Welch algorithm terminates. 160 * 161 * @note 162 * Train() can be called multiple times with different sequences; each time it 163 * is called, it uses the current parameters of the HMM as a starting point 164 * for training. 165 * 166 * @param dataSeq Vector of observation sequences. 167 * @return Log-likelihood of state sequence. 168 */ 169 double Train(const std::vector<arma::mat>& dataSeq); 170 171 /** 172 * Train the model using the given labeled observations; the transition and 173 * emission matrices are directly estimated. Each matrix in the vector of 174 * data sequences corresponds to a vector in the vector of state sequences. 175 * Each point in each individual data sequence should be a column in the 176 * matrix, and its state should be the corresponding element in the state 177 * sequence vector. For instance, dataSeq[0].col(3) corresponds to the fourth 178 * observation in the first data sequence, and its state is stateSeq[0][3]. 179 * The number of rows in each matrix should be equal to the dimensionality of 180 * the HMM (which is set in the constructor). 181 * 182 * @note 183 * Train() can be called multiple times with different sequences; each time it 184 * is called, it uses the current parameters of the HMM as a starting point 185 * for training. 186 * 187 * @param dataSeq Vector of observation sequences. 188 * @param stateSeq Vector of state sequences, corresponding to each 189 * observation. 190 */ 191 void Train(const std::vector<arma::mat>& dataSeq, 192 const std::vector<arma::Row<size_t> >& stateSeq); 193 194 /** 195 * Estimate the probabilities of each hidden state at each time step for each 196 * given data observation, using the Forward-Backward algorithm. Each matrix 197 * which is returned has columns equal to the number of data observations, and 198 * rows equal to the number of hidden states in the model. The log-likelihood 199 * of the most probable sequence is returned. 200 * 201 * @param dataSeq Sequence of observations. 202 * @param stateLogProb Matrix in which the log probabilities of each state at 203 * each time interval will be stored. 204 * @param forwardLogProb Matrix in which the forward log probabilities of each 205 * state at each time interval will be stored. 206 * @param backwardLogProb Matrix in which the backward log probabilities of each 207 * state at each time interval will be stored. 208 * @param logScales Vector in which the log of scaling factors at each time 209 * interval will be stored. 210 * @return Log-likelihood of most likely state sequence. 211 */ 212 double LogEstimate(const arma::mat& dataSeq, 213 arma::mat& stateLogProb, 214 arma::mat& forwardLogProb, 215 arma::mat& backwardLogProb, 216 arma::vec& logScales) const; 217 218 /** 219 * Estimate the probabilities of each hidden state at each time step for each 220 * given data observation, using the Forward-Backward algorithm. Each matrix 221 * which is returned has columns equal to the number of data observations, and 222 * rows equal to the number of hidden states in the model. The log-likelihood 223 * of the most probable sequence is returned. 224 * 225 * @param dataSeq Sequence of observations. 226 * @param stateProb Matrix in which the probabilities of each state at each 227 * time interval will be stored. 228 * @param forwardProb Matrix in which the forward probabilities of each state 229 * at each time interval will be stored. 230 * @param backwardProb Matrix in which the backward probabilities of each 231 * state at each time interval will be stored. 232 * @param scales Vector in which the scaling factors at each time interval 233 * will be stored. 234 * @return Log-likelihood of most likely state sequence. 235 */ 236 double Estimate(const arma::mat& dataSeq, 237 arma::mat& stateProb, 238 arma::mat& forwardProb, 239 arma::mat& backwardProb, 240 arma::vec& scales) const; 241 242 /** 243 * Estimate the probabilities of each hidden state at each time step of each 244 * given data observation, using the Forward-Backward algorithm. The returned 245 * matrix of state probabilities has columns equal to the number of data 246 * observations, and rows equal to the number of hidden states in the model. 247 * The log-likelihood of the most probable sequence is returned. 248 * 249 * @param dataSeq Sequence of observations. 250 * @param stateProb Probabilities of each state at each time interval. 251 * @return Log-likelihood of most likely state sequence. 252 */ 253 double Estimate(const arma::mat& dataSeq, 254 arma::mat& stateProb) const; 255 256 /** 257 * Generate a random data sequence of the given length. The data sequence is 258 * stored in the dataSequence parameter, and the state sequence is stored in 259 * the stateSequence parameter. Each column of dataSequence represents a 260 * random observation. 261 * 262 * @param length Length of random sequence to generate. 263 * @param dataSequence Vector to store data in. 264 * @param stateSequence Vector to store states in. 265 * @param startState Hidden state to start sequence in (default 0). 266 */ 267 void Generate(const size_t length, 268 arma::mat& dataSequence, 269 arma::Row<size_t>& stateSequence, 270 const size_t startState = 0) const; 271 272 /** 273 * Compute the most probable hidden state sequence for the given data 274 * sequence, using the Viterbi algorithm, returning the log-likelihood of the 275 * most likely state sequence. 276 * 277 * @param dataSeq Sequence of observations. 278 * @param stateSeq Vector in which the most probable state sequence will be 279 * stored. 280 * @return Log-likelihood of most probable state sequence. 281 */ 282 double Predict(const arma::mat& dataSeq, 283 arma::Row<size_t>& stateSeq) const; 284 285 /** 286 * Compute the log-likelihood of the given data sequence. 287 * 288 * @param dataSeq Data sequence to evaluate the likelihood of. 289 * @return Log-likelihood of the given sequence. 290 */ 291 double LogLikelihood(const arma::mat& dataSeq) const; 292 293 /** 294 * HMM filtering. Computes the k-step-ahead expected emission at each time 295 * conditioned only on prior observations. That is 296 * E{ Y[t+k] | Y[0], ..., Y[t] }. 297 * The returned matrix has columns equal to the number of observations. Note 298 * that the expectation may not be meaningful for discrete emissions. 299 * 300 * @param dataSeq Sequence of observations. 301 * @param filterSeq Vector in which the expected emission sequence will be 302 * stored. 303 * @param ahead Number of steps ahead (k) for expectations. 304 */ 305 void Filter(const arma::mat& dataSeq, 306 arma::mat& filterSeq, 307 size_t ahead = 0) const; 308 309 /** 310 * HMM smoothing. Computes expected emission at each time conditioned on all 311 * observations. That is 312 * E{ Y[t] | Y[0], ..., Y[T] }. 313 * The returned matrix has columns equal to the number of observations. Note 314 * that the expectation may not be meaningful for discrete emissions. 315 * 316 * @param dataSeq Sequence of observations. 317 * @param smoothSeq Vector in which the expected emission sequence will be 318 * stored. 319 */ 320 void Smooth(const arma::mat& dataSeq, 321 arma::mat& smoothSeq) const; 322 323 //! Return the vector of initial state probabilities. Initial() const324 const arma::vec& Initial() const { return initialProxy; } 325 //! Modify the vector of initial state probabilities. Initial()326 arma::vec& Initial() 327 { 328 recalculateInitial = true; 329 return initialProxy; 330 } 331 332 //! Return the transition matrix. Transition() const333 const arma::mat& Transition() const { return transitionProxy; } 334 //! Return a modifiable transition matrix reference. Transition()335 arma::mat& Transition() 336 { 337 recalculateTransition = true; 338 return transitionProxy; 339 } 340 341 //! Return the emission distributions. Emission() const342 const std::vector<Distribution>& Emission() const { return emission; } 343 //! Return a modifiable emission probability matrix reference. Emission()344 std::vector<Distribution>& Emission() { return emission; } 345 346 //! Get the dimensionality of observations. Dimensionality() const347 size_t Dimensionality() const { return dimensionality; } 348 //! Set the dimensionality of observations. Dimensionality()349 size_t& Dimensionality() { return dimensionality; } 350 351 //! Get the tolerance of the Baum-Welch algorithm. Tolerance() const352 double Tolerance() const { return tolerance; } 353 //! Modify the tolerance of the Baum-Welch algorithm. Tolerance()354 double& Tolerance() { return tolerance; } 355 356 /** 357 * Load the object. 358 */ 359 template<typename Archive> 360 void load(Archive& ar, const unsigned int version); 361 362 /** 363 * Save the object. 364 */ 365 template<typename Archive> 366 void save(Archive& ar, const unsigned int version) const; 367 368 BOOST_SERIALIZATION_SPLIT_MEMBER(); 369 370 371 protected: 372 // Helper functions. 373 /** 374 * The Forward algorithm (part of the Forward-Backward algorithm). Computes 375 * forward probabilities for each state for each observation in the given data 376 * sequence. The returned matrix has rows equal to the number of hidden 377 * states and columns equal to the number of observations. 378 * 379 * @param dataSeq Data sequence to compute probabilities for. 380 * @param logScales Vector in which scaling factors will be saved. 381 * @param forwardLogProb Matrix in which forward probabilities will be saved. 382 */ 383 void Forward(const arma::mat& dataSeq, 384 arma::vec& logScales, 385 arma::mat& forwardLogProb) const; 386 387 /** 388 * The Backward algorithm (part of the Forward-Backward algorithm). Computes 389 * backward probabilities for each state for each observation in the given 390 * data sequence, using the scaling factors found (presumably) by Forward(). 391 * The returned matrix has rows equal to the number of hidden states and 392 * columns equal to the number of observations. 393 * 394 * @param dataSeq Data sequence to compute probabilities for. 395 * @param logScales Vector of scaling factors. 396 * @param backwardLogProb Matrix in which backward probabilities will be saved. 397 */ 398 void Backward(const arma::mat& dataSeq, 399 const arma::vec& logScales, 400 arma::mat& backwardLogProb) const; 401 402 //! Set of emission probability distributions; one for each state. 403 std::vector<Distribution> emission; 404 405 /** 406 * A proxy variable in linear space for logTransition. 407 * Should be removed in mlpack 4.0. 408 */ 409 arma::mat transitionProxy; 410 411 //! Transition probability matrix. No need to be mutable in mlpack 4.0. 412 mutable arma::mat logTransition; 413 414 private: 415 /** 416 * Make sure the variables in log space are in sync 417 * with the linear counter parts. 418 * Should be removed in mlpack 4.0. 419 */ 420 void ConvertToLogSpace() const; 421 422 /** 423 * A proxy vriable in linear space for logInitial. 424 * Should be removed in mlpack 4.0. 425 */ 426 arma::vec initialProxy; 427 428 //! Initial state probability vector. No need to be mutable in mlpack 4.0. 429 mutable arma::vec logInitial; 430 431 //! Dimensionality of observations. 432 size_t dimensionality; 433 434 //! Tolerance of Baum-Welch algorithm. 435 double tolerance; 436 437 /** 438 * Whether or not we need to update the logInitial from initialProxy. 439 * Should be removed in mlpack 4.0. 440 */ 441 mutable bool recalculateInitial; 442 443 /** 444 * Whether or not we need to update the logTransition from transitionProxy. 445 * Should be removed in mlpack 4.0. 446 */ 447 mutable bool recalculateTransition; 448 }; 449 450 } // namespace hmm 451 } // namespace mlpack 452 453 // Include implementation. 454 #include "hmm_impl.hpp" 455 456 #endif 457