1 /* The MIT License 2 3 Copyright (c) 2017 Adrian Tan <atks@umich.edu> 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy 6 of this software and associated documentation files (the "Software"), to deal 7 in the Software without restriction, including without limitation the rights 8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 copies of the Software, and to permit persons to whom the Software is 10 furnished to do so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in 13 all copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 THE SOFTWARE. 22 */ 23 #ifndef WDP_AHMM_H 24 #define WDP_AHMM_H 25 26 #include "hts_utils.h" 27 #include "utils.h" 28 #include "log_tool.h" 29 30 #define NSTATES 6 31 32 /** 33 * Parameters for WDP_AHMM 34 */ 35 class WDP_AHMMParameters 36 { 37 public: 38 39 float delta; 40 float epsilon; 41 float tau; 42 float eta; 43 float mismatch_penalty; 44 WDP_AHMMParameters()45 WDP_AHMMParameters() 46 { 47 delta = 0.0001; 48 epsilon = 0.001; 49 tau = 0.01; 50 eta = 0.01; 51 mismatch_penalty = 1; 52 }; 53 }; 54 55 /** 56 * Wrap around Dynamic Programming implementation of AHMM 57 */ 58 class WDP_AHMM 59 { 60 public: 61 62 int32_t max_len; 63 64 const char* read; 65 const char* qual; 66 67 //model variables 68 char *motif; 69 int32_t mlen; 70 int32_t rlen; 71 72 /*result variables*/ 73 int32_t motif_start[2], motif_end[2]; 74 int32_t motif_count, exact_motif_count, motif_m, motif_xid; 75 float frac_no_repeats; 76 int32_t* motif_discordance; 77 float motif_concordance, maxLogOdds; 78 int32_t trf_score; 79 80 WDP_AHMMParameters par; 81 82 //for track intermediate scores during Viterbi algorithm 83 float max_score; 84 int32_t max_track; 85 86 //for optimal alignment 87 bool optimal_path_traced; 88 float optimal_score; 89 int32_t optimal_state; 90 int32_t optimal_track; 91 int32_t optimal_probe_len; 92 int32_t *optimal_path; // for storage 93 int32_t *optimal_path_ptr; //just a pointer 94 int32_t optimal_path_len; 95 96 float T[NSTATES][NSTATES]; 97 98 float **V; 99 int32_t **U; 100 101 LogTool *lt; 102 103 bool debug; 104 105 /** 106 * Constructor. 107 */ 108 WDP_AHMM(bool debug=false); 109 110 /** 111 * Constructor. 112 */ 113 WDP_AHMM(LogTool *lt, bool debug=false); 114 115 /** 116 * Destructor. 117 */ 118 ~WDP_AHMM(); 119 120 /** 121 * Initializes object, helper function for constructor. 122 */ 123 void initialize(); 124 125 /** 126 * Initializes objects for constructor. 127 */ 128 void initialize_structures(); 129 130 /** 131 * Initialize transition matrix based on parameters. 132 */ 133 void initialize_T(); 134 135 /** 136 * Initializes U and V. 137 */ 138 void initialize_UV(); 139 140 /** 141 * Sets a model. 142 */ 143 void set_model(const char* motif); 144 145 /** 146 * Sets delta. 147 */ 148 void set_delta(float delta); 149 150 /** 151 * Sets epsilon. 152 */ 153 void set_epsilon(float epsilon); 154 155 /** 156 * Sets tau. 157 */ 158 void set_tau(float tau); 159 160 /** 161 * Sets eta. 162 */ 163 void set_eta(float eta); 164 165 /** 166 * Sets mismatch penalty. 167 */ 168 void set_mismatch_penalty(float mismatch_penalty); 169 170 /** 171 * Sets debug. 172 */ 173 void set_debug(bool debug); 174 175 /** 176 * Get motif start position for model. 177 */ 178 int32_t get_motif_model_spos1(); 179 180 /** 181 * Get motif end position for model. 182 */ 183 int32_t get_motif_model_epos1(); 184 185 /** 186 * Get motif start position for read. 187 */ 188 int32_t get_motif_read_spos1(); 189 190 /** 191 * Get motif end position for read. 192 */ 193 int32_t get_motif_read_epos1(); 194 195 /** 196 * Get motif concordance. 197 */ 198 float get_motif_concordance(); 199 200 /** 201 * Get exact motif count. 202 */ 203 uint32_t get_exact_motif_count(); 204 205 /** 206 * Get motif count. 207 */ 208 uint32_t get_motif_count(); 209 210 /** 211 * Align and compute genotype likelihood. 212 */ 213 void align(const char* y, const char* qual=NULL); 214 215 /** 216 * Trace path after alignment. 217 */ 218 void trace_path(); 219 220 /** 221 * Compute log10 emission odds based on equal error probability distribution contrasted against log10(1/16). 222 */ 223 float log10_emission_odds(char read_base, char probe_base, uint32_t pl, float mismatch_penalty); 224 225 /** 226 * Compute log10 emission odds based on equal error probability distribution contrasted against log10(1/16). 227 */ 228 float log10_emission_odds(char read_base, char probe_base, uint32_t pl); 229 230 /** 231 * Converts state to string representation. 232 */ 233 std::string state2string(int32_t state); 234 235 /** 236 * Converts state to cigar string representation. 237 */ 238 std::string state2cigarstring(int32_t state); 239 240 /** 241 * Converts track to cigar string representation. 242 */ 243 std::string track2cigarstring1(int32_t t, int32_t j); 244 245 /** 246 * Converts state to cigar string representation. 247 */ 248 std::string track2cigarstring2(int32_t t); 249 250 /** 251 * Converts model component to string representation. 252 */ 253 std::string component2string(int32_t component); 254 255 /** 256 * Prints an alignment. 257 */ 258 void print_alignment(); 259 260 /** 261 * Prints an alignment with padding. 262 */ 263 void print_alignment(std::string& pad); 264 265 /** 266 * Prints a float matrix. 267 */ 268 void print(float *v, size_t plen, size_t rlen); 269 270 /** 271 * Prints a int32_t matrix. 272 */ 273 void print(int32_t *v, size_t plen, size_t rlen); 274 275 /** 276 * Prints the transition matrix. 277 */ 278 void print_T(); 279 280 /** 281 * Prints U. 282 */ 283 void print_U(int32_t *U, size_t plen, size_t rlen); 284 285 /** 286 * Prints U and V. 287 */ 288 void print_trace(int32_t state, size_t plen, size_t rlen); 289 290 /** 291 * Collect alignment summary statistics. 292 */ 293 void collect_statistics(int32_t t1, int32_t t2, int32_t j); 294 295 /** 296 * Clear alignment statistics. 297 */ 298 void clear_statistics(); 299 300 /** 301 * Update alignment statistics after collection. 302 */ 303 void update_statistics(); 304 305 /** 306 * Prints track. 307 */ 308 void print_track(int32_t t); 309 }; 310 311 #endif