1 /**
2  * @file methods/hmm/hmm.hpp
3  * @author Ryan Curtin
4  * @author Tran Quoc Long
5  * @author Michael Fox
6  *
7  * Definition of HMM class.
8  *
9  * mlpack is free software; you may redistribute it and/or modify it under the
10  * terms of the 3-clause BSD license.  You should have received a copy of the
11  * 3-clause BSD license along with mlpack.  If not, see
12  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
13  */
14 #ifndef MLPACK_METHODS_HMM_HMM_HPP
15 #define MLPACK_METHODS_HMM_HMM_HPP
16 
17 #include <mlpack/prereqs.hpp>
18 #include <mlpack/core/dists/discrete_distribution.hpp>
19 
20 namespace mlpack {
21 namespace hmm /** Hidden Markov Models. */ {
22 
23 /**
24  * A class that represents a Hidden Markov Model with an arbitrary type of
25  * emission distribution.  This HMM class supports training (supervised and
26  * unsupervised), prediction of state sequences via the Viterbi algorithm,
27  * estimation of state probabilities, generation of random sequences, and
28  * calculation of the log-likelihood of a given sequence.
29  *
30  * The template parameter, Distribution, specifies the distribution which the
31  * emissions follow.  The class should implement the following functions:
32  *
33  * @code
34  * class Distribution
35  * {
36  *  public:
37  *   // The type of observation used by this distribution.
38  *   typedef something DataType;
39  *
40  *   // Return the probability of the given observation.
41  *   double Probability(const DataType& observation) const;
42  *
43  *   // Estimate the distribution based on the given observations.
44  *   double Train(const std::vector<DataType>& observations);
45  *
46  *   // Estimate the distribution based on the given observations, given also
47  *   // the probability of each observation coming from this distribution.
48  *   double Train(const std::vector<DataType>& observations,
49  *                const std::vector<double>& probabilities);
50  * };
51  * @endcode
52  *
53  * See the mlpack::distribution::DiscreteDistribution class for an example.  One
54  * would use the DiscreteDistribution class when the observations are
55  * non-negative integers.  Other distributions could be Gaussians, a mixture of
56  * Gaussians (GMM), or any other probability distribution implementing the
57  * four Distribution functions.
58  *
59  * Usage of the HMM class generally involves either training an HMM or loading
60  * an already-known HMM and taking probability measurements of sequences.
61  * Example code for supervised training of a Gaussian HMM (that is, where the
62  * emission output distribution is a single Gaussian for each hidden state) is
63  * given below.
64  *
65  * @code
66  * extern arma::mat observations; // Each column is an observation.
67  * extern arma::Row<size_t> states; // Hidden states for each observation.
68  * // Create an untrained HMM with 5 hidden states and default (N(0, 1))
69  * // Gaussian distributions with the dimensionality of the dataset.
70  * HMM<GaussianDistribution> hmm(5, GaussianDistribution(observations.n_rows));
71  *
72  * // Train the HMM (the labels could be omitted to perform unsupervised
73  * // training).
74  * hmm.Train(observations, states);
75  * @endcode
76  *
77  * Once initialized, the HMM can evaluate the probability of a certain sequence
78  * (with LogLikelihood()), predict the most likely sequence of hidden states
79  * (with Predict()), generate a sequence (with Generate()), or estimate the
80  * probabilities of each state for a sequence of observations (with Train()).
81  *
82  * @tparam Distribution Type of emission distribution for this HMM.
83  */
84 template<typename Distribution = distribution::DiscreteDistribution>
85 class HMM
86 {
87  public:
88   /**
89    * Create the Hidden Markov Model with the given number of hidden states and
90    * the given default distribution for emissions.  The dimensionality of the
91    * observations is taken from the emissions variable, so it is important that
92    * the given default emission distribution is set with the correct
93    * dimensionality.  Alternately, set the dimensionality with Dimensionality().
94    * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
95    * be set.
96    *
97    * By default, the transition matrix and initial probability vector are set to
98    * contain equal probability for each state.
99    *
100    * @param states Number of states.
101    * @param emissions Default distribution for emissions.
102    * @param tolerance Tolerance for convergence of training algorithm
103    *      (Baum-Welch).
104    */
105   HMM(const size_t states = 0,
106       const Distribution emissions = Distribution(),
107       const double tolerance = 1e-5);
108 
109   /**
110    * Create the Hidden Markov Model with the given initial probability vector,
111    * the given transition matrix, and the given emission distributions.  The
112    * dimensionality of the observations of the HMM are taken from the given
113    * emission distributions.  Alternately, the dimensionality can be set with
114    * Dimensionality().
115    *
116    * The initial state probability vector should have length equal to the number
117    * of states, and each entry represents the probability of being in the given
118    * state at time T = 0 (the beginning of a sequence).
119    *
120    * The transition matrix should be such that T(i, j) is the probability of
121    * transition to state i from state j.  The columns of the matrix should sum
122    * to 1.
123    *
124    * The emission matrix should be such that E(i, j) is the probability of
125    * emission i while in state j.  The columns of the matrix should sum to 1.
126    *
127    * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
128    * be set.
129    *
130    * @param initial Initial state probabilities.
131    * @param transition Transition matrix.
132    * @param emission Emission distributions.
133    * @param tolerance Tolerance for convergence of training algorithm
134    *      (Baum-Welch).
135    */
136   HMM(const arma::vec& initial,
137       const arma::mat& transition,
138       const std::vector<Distribution>& emission,
139       const double tolerance = 1e-5);
140 
141   /**
142    * Train the model using the Baum-Welch algorithm, with only the given
143    * unlabeled observations.  Instead of giving a guess transition and emission
144    * matrix here, do that in the constructor.  Each matrix in the vector of data
145    * sequences holds an individual data sequence; each point in each individual
146    * data sequence should be a column in the matrix.  The number of rows in each
147    * matrix should be equal to the dimensionality of the HMM (which is set in
148    * the constructor).
149    *
150    * It is preferable to use the other overload of Train(), with labeled data.
151    * That will produce much better results.  However, if labeled data is
152    * unavailable, this will work.  In addition, it is possible to use Train()
153    * with labeled data first, and then continue to train the model using this
154    * overload of Train() with unlabeled data.
155    *
156    * The tolerance of the Baum-Welch algorithm can be set either in the
157    * constructor or with the Tolerance() method.  When the change in
158    * log-likelihood of the model between iterations is less than the tolerance,
159    * the Baum-Welch algorithm terminates.
160    *
161    * @note
162    * Train() can be called multiple times with different sequences; each time it
163    * is called, it uses the current parameters of the HMM as a starting point
164    * for training.
165    *
166    * @param dataSeq Vector of observation sequences.
167    * @return Log-likelihood of state sequence.
168    */
169   double Train(const std::vector<arma::mat>& dataSeq);
170 
171   /**
172    * Train the model using the given labeled observations; the transition and
173    * emission matrices are directly estimated.  Each matrix in the vector of
174    * data sequences corresponds to a vector in the vector of state sequences.
175    * Each point in each individual data sequence should be a column in the
176    * matrix, and its state should be the corresponding element in the state
177    * sequence vector.  For instance, dataSeq[0].col(3) corresponds to the fourth
178    * observation in the first data sequence, and its state is stateSeq[0][3].
179    * The number of rows in each matrix should be equal to the dimensionality of
180    * the HMM (which is set in the constructor).
181    *
182    * @note
183    * Train() can be called multiple times with different sequences; each time it
184    * is called, it uses the current parameters of the HMM as a starting point
185    * for training.
186    *
187    * @param dataSeq Vector of observation sequences.
188    * @param stateSeq Vector of state sequences, corresponding to each
189    *     observation.
190    */
191   void Train(const std::vector<arma::mat>& dataSeq,
192              const std::vector<arma::Row<size_t> >& stateSeq);
193 
194   /**
195    * Estimate the probabilities of each hidden state at each time step for each
196    * given data observation, using the Forward-Backward algorithm.  Each matrix
197    * which is returned has columns equal to the number of data observations, and
198    * rows equal to the number of hidden states in the model.  The log-likelihood
199    * of the most probable sequence is returned.
200    *
201    * @param dataSeq Sequence of observations.
202    * @param stateLogProb Matrix in which the log probabilities of each state at
203    *    each time interval will be stored.
204    * @param forwardLogProb Matrix in which the forward log probabilities of each
205    *    state at each time interval will be stored.
206    * @param backwardLogProb Matrix in which the backward log probabilities of each
207    *    state at each time interval will be stored.
208    * @param logScales Vector in which the log of scaling factors at each time
209    *    interval will be stored.
210    * @return Log-likelihood of most likely state sequence.
211    */
212   double LogEstimate(const arma::mat& dataSeq,
213                      arma::mat& stateLogProb,
214                      arma::mat& forwardLogProb,
215                      arma::mat& backwardLogProb,
216                      arma::vec& logScales) const;
217 
218   /**
219    * Estimate the probabilities of each hidden state at each time step for each
220    * given data observation, using the Forward-Backward algorithm.  Each matrix
221    * which is returned has columns equal to the number of data observations, and
222    * rows equal to the number of hidden states in the model.  The log-likelihood
223    * of the most probable sequence is returned.
224    *
225    * @param dataSeq Sequence of observations.
226    * @param stateProb Matrix in which the probabilities of each state at each
227    *    time interval will be stored.
228    * @param forwardProb Matrix in which the forward probabilities of each state
229    *    at each time interval will be stored.
230    * @param backwardProb Matrix in which the backward probabilities of each
231    *    state at each time interval will be stored.
232    * @param scales Vector in which the scaling factors at each time interval
233    *    will be stored.
234    * @return Log-likelihood of most likely state sequence.
235    */
236   double Estimate(const arma::mat& dataSeq,
237                   arma::mat& stateProb,
238                   arma::mat& forwardProb,
239                   arma::mat& backwardProb,
240                   arma::vec& scales) const;
241 
242   /**
243    * Estimate the probabilities of each hidden state at each time step of each
244    * given data observation, using the Forward-Backward algorithm.  The returned
245    * matrix of state probabilities has columns equal to the number of data
246    * observations, and rows equal to the number of hidden states in the model.
247    * The log-likelihood of the most probable sequence is returned.
248    *
249    * @param dataSeq Sequence of observations.
250    * @param stateProb Probabilities of each state at each time interval.
251    * @return Log-likelihood of most likely state sequence.
252    */
253   double Estimate(const arma::mat& dataSeq,
254                   arma::mat& stateProb) const;
255 
256   /**
257    * Generate a random data sequence of the given length.  The data sequence is
258    * stored in the dataSequence parameter, and the state sequence is stored in
259    * the stateSequence parameter.  Each column of dataSequence represents a
260    * random observation.
261    *
262    * @param length Length of random sequence to generate.
263    * @param dataSequence Vector to store data in.
264    * @param stateSequence Vector to store states in.
265    * @param startState Hidden state to start sequence in (default 0).
266    */
267   void Generate(const size_t length,
268                 arma::mat& dataSequence,
269                 arma::Row<size_t>& stateSequence,
270                 const size_t startState = 0) const;
271 
272   /**
273    * Compute the most probable hidden state sequence for the given data
274    * sequence, using the Viterbi algorithm, returning the log-likelihood of the
275    * most likely state sequence.
276    *
277    * @param dataSeq Sequence of observations.
278    * @param stateSeq Vector in which the most probable state sequence will be
279    *    stored.
280    * @return Log-likelihood of most probable state sequence.
281    */
282   double Predict(const arma::mat& dataSeq,
283                  arma::Row<size_t>& stateSeq) const;
284 
285   /**
286    * Compute the log-likelihood of the given data sequence.
287    *
288    * @param dataSeq Data sequence to evaluate the likelihood of.
289    * @return Log-likelihood of the given sequence.
290    */
291   double LogLikelihood(const arma::mat& dataSeq) const;
292 
293   /**
294    * HMM filtering. Computes the k-step-ahead expected emission at each time
295    * conditioned only on prior observations. That is
296    * E{ Y[t+k] | Y[0], ..., Y[t] }.
297    * The returned matrix has columns equal to the number of observations. Note
298    * that the expectation may not be meaningful for discrete emissions.
299    *
300    * @param dataSeq Sequence of observations.
301    * @param filterSeq Vector in which the expected emission sequence will be
302    *    stored.
303    * @param ahead Number of steps ahead (k) for expectations.
304    */
305   void Filter(const arma::mat& dataSeq,
306               arma::mat& filterSeq,
307               size_t ahead = 0) const;
308 
309   /**
310    * HMM smoothing. Computes expected emission at each time conditioned on all
311    * observations. That is
312    * E{ Y[t] | Y[0], ..., Y[T] }.
313    * The returned matrix has columns equal to the number of observations. Note
314    * that the expectation may not be meaningful for discrete emissions.
315    *
316    * @param dataSeq Sequence of observations.
317    * @param smoothSeq Vector in which the expected emission sequence will be
318    *    stored.
319    */
320   void Smooth(const arma::mat& dataSeq,
321               arma::mat& smoothSeq) const;
322 
323   //! Return the vector of initial state probabilities.
Initial() const324   const arma::vec& Initial() const { return initialProxy; }
325   //! Modify the vector of initial state probabilities.
Initial()326   arma::vec& Initial()
327   {
328     recalculateInitial = true;
329     return initialProxy;
330   }
331 
332   //! Return the transition matrix.
Transition() const333   const arma::mat& Transition() const { return transitionProxy; }
334   //! Return a modifiable transition matrix reference.
Transition()335   arma::mat& Transition()
336   {
337     recalculateTransition = true;
338     return transitionProxy;
339   }
340 
341   //! Return the emission distributions.
Emission() const342   const std::vector<Distribution>& Emission() const { return emission; }
343   //! Return a modifiable emission probability matrix reference.
Emission()344   std::vector<Distribution>& Emission() { return emission; }
345 
346   //! Get the dimensionality of observations.
Dimensionality() const347   size_t Dimensionality() const { return dimensionality; }
348   //! Set the dimensionality of observations.
Dimensionality()349   size_t& Dimensionality() { return dimensionality; }
350 
351   //! Get the tolerance of the Baum-Welch algorithm.
Tolerance() const352   double Tolerance() const { return tolerance; }
353   //! Modify the tolerance of the Baum-Welch algorithm.
Tolerance()354   double& Tolerance() { return tolerance; }
355 
356   /**
357    * Load the object.
358    */
359   template<typename Archive>
360   void load(Archive& ar, const unsigned int version);
361 
362   /**
363    * Save the object.
364    */
365   template<typename Archive>
366   void save(Archive& ar, const unsigned int version) const;
367 
368   BOOST_SERIALIZATION_SPLIT_MEMBER();
369 
370 
371  protected:
372   // Helper functions.
373   /**
374    * The Forward algorithm (part of the Forward-Backward algorithm).  Computes
375    * forward probabilities for each state for each observation in the given data
376    * sequence.  The returned matrix has rows equal to the number of hidden
377    * states and columns equal to the number of observations.
378    *
379    * @param dataSeq Data sequence to compute probabilities for.
380    * @param logScales Vector in which scaling factors will be saved.
381    * @param forwardLogProb Matrix in which forward probabilities will be saved.
382    */
383   void Forward(const arma::mat& dataSeq,
384                arma::vec& logScales,
385                arma::mat& forwardLogProb) const;
386 
387   /**
388    * The Backward algorithm (part of the Forward-Backward algorithm).  Computes
389    * backward probabilities for each state for each observation in the given
390    * data sequence, using the scaling factors found (presumably) by Forward().
391    * The returned matrix has rows equal to the number of hidden states and
392    * columns equal to the number of observations.
393    *
394    * @param dataSeq Data sequence to compute probabilities for.
395    * @param logScales Vector of scaling factors.
396    * @param backwardLogProb Matrix in which backward probabilities will be saved.
397    */
398   void Backward(const arma::mat& dataSeq,
399                 const arma::vec& logScales,
400                 arma::mat& backwardLogProb) const;
401 
402   //! Set of emission probability distributions; one for each state.
403   std::vector<Distribution> emission;
404 
405   /**
406    * A proxy variable in linear space for logTransition.
407    * Should be removed in mlpack 4.0.
408    */
409   arma::mat transitionProxy;
410 
411   //! Transition probability matrix. No need to be mutable in mlpack 4.0.
412   mutable arma::mat logTransition;
413 
414  private:
415   /**
416    * Make sure the variables in log space are in sync
417    * with the linear counter parts.
418    * Should be removed in mlpack 4.0.
419    */
420   void ConvertToLogSpace() const;
421 
422   /**
423    * A proxy vriable in linear space for logInitial.
424    * Should be removed in mlpack 4.0.
425    */
426   arma::vec initialProxy;
427 
428   //! Initial state probability vector. No need to be mutable in mlpack 4.0.
429   mutable arma::vec logInitial;
430 
431   //! Dimensionality of observations.
432   size_t dimensionality;
433 
434   //! Tolerance of Baum-Welch algorithm.
435   double tolerance;
436 
437   /**
438    * Whether or not we need to update the logInitial from initialProxy.
439    * Should be removed in mlpack 4.0.
440    */
441   mutable bool recalculateInitial;
442 
443   /**
444    * Whether or not we need to update the logTransition from transitionProxy.
445    * Should be removed in mlpack 4.0.
446    */
447   mutable bool recalculateTransition;
448 };
449 
450 } // namespace hmm
451 } // namespace mlpack
452 
453 // Include implementation.
454 #include "hmm_impl.hpp"
455 
456 #endif
457