1 2 // 3 // C++ Interface: StateSpace 4 // 5 // Description: 6 // 7 // 8 // Author: BUI Quang Minh (c) 2018 9 // 10 // Copyright: See COPYING file that comes with this distribution 11 // 12 // 13 14 #ifndef STATESPACE_H 15 #define STATESPACE_H 16 17 #include <iostream> 18 #include <string> 19 #include <vector> 20 #include <stdint.h> 21 #include "utils/tools.h" 22 #include "yaml-cpp/yaml.h" 23 24 namespace PML { 25 26 /** 27 StateType as 32-bit unsigned int 28 */ 29 typedef uint32_t StateType; 30 31 typedef vector<StateType> StateVector; 32 33 enum SeqType { 34 SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_POMO, SEQ_UNKNOWN 35 }; 36 37 // IMPORTANT: refactor STATE_UNKNOWN 38 //const char STATE_UNKNOWN = 126; 39 40 // TODO DS: This seems like a significant restriction. 41 /* PoMo: STATE_INVALID is not handled in PoMo. Set STATE_INVALID to 42 127 to remove warning about comparison to char in alignment.cpp. 43 This is important if the maximum N will be increased above 21 44 because then the state space is larger than 127 and we have to 45 think about something else. */ 46 /* const unsigned char STATE_INVALID = 255; */ 47 const unsigned char STATE_INVALID = 127; 48 49 #ifdef USE_HASH_MAP 50 typedef unordered_map<string, int> StringIntMap; 51 typedef unordered_map<string, StateType> StringStateMap; 52 typedef unordered_map<StateType, string> StateStringMap; 53 typedef unordered_map<string, double> StringDoubleHashMap; 54 typedef unordered_map<uint32_t, uint32_t> IntIntMap; 55 #else 56 typedef map<string, int> StringIntMap; 57 typedef map<string, StateType> StringStateMap; 58 typedef map<StateType, string> StateStringMap; 59 typedef map<string, double> StringDoubleHashMap; 60 typedef map<uint32_t, uint32_t> IntIntMap; 61 #endif 62 63 64 /** 65 general class defining state space 66 */ 67 class StateSpace { 68 public: 69 /** constructor */ 70 StateSpace(); 71 72 /** destructor */ 73 ~StateSpace(); 74 75 /** convert a raw string to single state ID */ 76 StateType toState(string str); 77 78 /** 79 convert the entire string into vector of states 80 @param[in] str input string 81 @param[out] str_states output vector of StateType 82 */ 83 void toState(string &str, StateVector &str_states); 84 85 /** convert a state back to raw string */ 86 string toString(StateType state); 87 88 /** 89 check if a state is unknown (missing or gap) 90 */ 91 bool isUnknown(StateType state); 92 93 /** get number of states */ getNStates()94 inline int getNStates() { return num_states; } 95 96 /** get all number of states incl. missing/gap/ambiguous states */ getNAllStates()97 inline int getNAllStates() { return states.size(); } 98 99 /** 100 initialise from a state definition string 101 @param datatype a YAML::Node structure 102 */ 103 void parseStateSpace(YAML::Node datatype); 104 105 /** 106 initialise state space from a SeqType 107 @param seqtype sequence type 108 */ 109 void initStateSpace(SeqType seqtype); 110 111 /** 112 reset state space 113 */ 114 void resetStateSpace(); 115 116 /** number of state */ 117 int num_states; 118 119 protected: 120 121 /** state space name */ 122 string space_name; 123 124 /** number of state */ 125 int num_all_states; 126 127 /** map from raw state string to state ID */ 128 StringStateMap states; 129 130 /** map from state ID to raw state string */ 131 StateStringMap raw_states; 132 133 /** map from ambiguous states to vector of state ID */ 134 unordered_map<StateType, StateVector>equate; 135 136 /** vector of the same size as states to translate to another state space */ 137 StrVector translate; 138 139 private: 140 141 /** minimum length of state string */ 142 int min_state_len; 143 144 /** maximum length of state string */ 145 int max_state_len; 146 147 }; 148 149 } // namespace PML 150 151 #endif 152