1 
2 //
3 // C++ Interface: StateSpace
4 //
5 // Description:
6 //
7 //
8 // Author: BUI Quang Minh (c) 2018
9 //
10 // Copyright: See COPYING file that comes with this distribution
11 //
12 //
13 
14 #ifndef STATESPACE_H
15 #define STATESPACE_H
16 
17 #include <iostream>
18 #include <string>
19 #include <vector>
20 #include <stdint.h>
21 #include "utils/tools.h"
22 #include "yaml-cpp/yaml.h"
23 
24 namespace PML {
25 
26 /**
27  StateType as 32-bit unsigned int
28  */
29 typedef uint32_t StateType;
30 
31 typedef vector<StateType> StateVector;
32 
33 enum SeqType {
34     SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_POMO, SEQ_UNKNOWN
35 };
36 
37 // IMPORTANT: refactor STATE_UNKNOWN
38 //const char STATE_UNKNOWN = 126;
39 
40 // TODO DS: This seems like a significant restriction.
41 /* PoMo: STATE_INVALID is not handled in PoMo.  Set STATE_INVALID to
42  127 to remove warning about comparison to char in alignment.cpp.
43  This is important if the maximum N will be increased above 21
44  because then the state space is larger than 127 and we have to
45  think about something else. */
46 /* const unsigned char STATE_INVALID = 255; */
47 const unsigned char STATE_INVALID = 127;
48 
49 #ifdef USE_HASH_MAP
50 typedef unordered_map<string, int> StringIntMap;
51 typedef unordered_map<string, StateType> StringStateMap;
52 typedef unordered_map<StateType, string> StateStringMap;
53 typedef unordered_map<string, double> StringDoubleHashMap;
54 typedef unordered_map<uint32_t, uint32_t> IntIntMap;
55 #else
56 typedef map<string, int> StringIntMap;
57 typedef map<string, StateType> StringStateMap;
58 typedef map<StateType, string> StateStringMap;
59 typedef map<string, double> StringDoubleHashMap;
60 typedef map<uint32_t, uint32_t> IntIntMap;
61 #endif
62 
63 
64 /**
65  general class defining state space
66  */
67 class StateSpace {
68 public:
69     /** constructor */
70     StateSpace();
71 
72     /** destructor */
73     ~StateSpace();
74 
75     /** convert a raw string to single state ID */
76     StateType toState(string str);
77 
78     /**
79     convert the entire string into vector of states
80     @param[in] str input string
81     @param[out] str_states output vector of StateType
82     */
83     void toState(string &str, StateVector &str_states);
84 
85     /** convert a state back to raw string */
86     string toString(StateType state);
87 
88     /**
89     check if a state is unknown (missing or gap)
90     */
91     bool isUnknown(StateType state);
92 
93     /** get number of states */
getNStates()94     inline int getNStates() { return num_states; }
95 
96     /** get all number of states incl. missing/gap/ambiguous states */
getNAllStates()97     inline int getNAllStates() { return states.size(); }
98 
99     /**
100      initialise from a state definition string
101      @param datatype a YAML::Node structure
102      */
103     void parseStateSpace(YAML::Node datatype);
104 
105     /**
106      initialise state space from a SeqType
107      @param seqtype sequence type
108     */
109     void initStateSpace(SeqType seqtype);
110 
111     /**
112     reset state space
113     */
114     void resetStateSpace();
115 
116     /** number of state */
117     int num_states;
118 
119 protected:
120 
121     /** state space name */
122     string space_name;
123 
124     /** number of state */
125     int num_all_states;
126 
127     /** map from raw state string to state ID */
128     StringStateMap states;
129 
130     /** map from state ID to raw state string */
131     StateStringMap raw_states;
132 
133     /** map from ambiguous states to vector of state ID */
134     unordered_map<StateType, StateVector>equate;
135 
136     /** vector of the same size as states to translate to another state space */
137     StrVector translate;
138 
139 private:
140 
141     /** minimum length of state string */
142     int min_state_len;
143 
144     /** maximum length of state string */
145     int max_state_len;
146 
147 };
148 
149 } // namespace PML
150 
151 #endif
152