1 /* The MIT License
2 
3    Copyright (c) 2017 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 #ifndef WDP_AHMM_H
24 #define WDP_AHMM_H
25 
26 #include "hts_utils.h"
27 #include "utils.h"
28 #include "log_tool.h"
29 
30 #define NSTATES 6
31 
32 /**
33  * Parameters for WDP_AHMM
34  */
35 class WDP_AHMMParameters
36 {
37     public:
38 
39     float delta;
40     float epsilon;
41     float tau;
42     float eta;
43     float mismatch_penalty;
44 
WDP_AHMMParameters()45     WDP_AHMMParameters()
46     {
47         delta = 0.0001;
48         epsilon = 0.001;
49         tau = 0.01;
50         eta = 0.01;
51         mismatch_penalty = 1;
52     };
53 };
54 
55 /**
56  * Wrap around Dynamic Programming implementation of AHMM
57  */
58 class WDP_AHMM
59 {
60     public:
61 
62     int32_t max_len;
63 
64     const char* read;
65     const char* qual;
66 
67     //model variables
68     char *motif;
69     int32_t mlen;
70     int32_t rlen;
71 
72     /*result variables*/
73     int32_t motif_start[2], motif_end[2];
74     int32_t motif_count, exact_motif_count, motif_m, motif_xid;
75     float frac_no_repeats;
76     int32_t* motif_discordance;
77     float motif_concordance, maxLogOdds;
78     int32_t trf_score;
79 
80     WDP_AHMMParameters par;
81 
82     //for track intermediate scores during Viterbi algorithm
83     float max_score;
84     int32_t max_track;
85 
86     //for optimal alignment
87     bool optimal_path_traced;
88     float optimal_score;
89     int32_t optimal_state;
90     int32_t optimal_track;
91     int32_t optimal_probe_len;
92     int32_t *optimal_path;     // for storage
93     int32_t *optimal_path_ptr; //just a pointer
94     int32_t optimal_path_len;
95 
96     float T[NSTATES][NSTATES];
97 
98     float **V;
99     int32_t **U;
100 
101     LogTool *lt;
102 
103     bool debug;
104 
105     /**
106      * Constructor.
107      */
108     WDP_AHMM(bool debug=false);
109 
110     /**
111      * Constructor.
112      */
113     WDP_AHMM(LogTool *lt, bool debug=false);
114 
115     /**
116      * Destructor.
117      */
118     ~WDP_AHMM();
119 
120     /**
121      * Initializes object, helper function for constructor.
122      */
123     void initialize();
124 
125     /**
126      * Initializes objects for constructor.
127      */
128     void initialize_structures();
129 
130     /**
131      * Initialize transition matrix based on parameters.
132      */
133     void initialize_T();
134 
135     /**
136      * Initializes U and V.
137      */
138     void initialize_UV();
139 
140     /**
141      * Sets a model.
142      */
143     void set_model(const char* motif);
144 
145     /**
146      * Sets delta.
147      */
148     void set_delta(float delta);
149 
150     /**
151      * Sets epsilon.
152      */
153     void set_epsilon(float epsilon);
154 
155     /**
156      * Sets tau.
157      */
158     void set_tau(float tau);
159 
160     /**
161      * Sets eta.
162      */
163     void set_eta(float eta);
164 
165     /**
166      * Sets mismatch penalty.
167      */
168     void set_mismatch_penalty(float mismatch_penalty);
169 
170     /**
171      * Sets debug.
172      */
173     void set_debug(bool debug);
174 
175     /**
176      * Get motif start position for model.
177      */
178     int32_t get_motif_model_spos1();
179 
180     /**
181      * Get motif end position for model.
182      */
183     int32_t get_motif_model_epos1();
184 
185     /**
186      * Get motif start position for read.
187      */
188     int32_t get_motif_read_spos1();
189 
190     /**
191      * Get motif end position for read.
192      */
193     int32_t get_motif_read_epos1();
194 
195     /**
196      * Get motif concordance.
197      */
198     float get_motif_concordance();
199 
200     /**
201      * Get exact motif count.
202      */
203     uint32_t get_exact_motif_count();
204 
205     /**
206      * Get motif count.
207      */
208     uint32_t get_motif_count();
209 
210     /**
211      * Align and compute genotype likelihood.
212      */
213     void align(const char* y, const char* qual=NULL);
214 
215     /**
216      * Trace path after alignment.
217      */
218     void trace_path();
219 
220     /**
221      * Compute log10 emission odds based on equal error probability distribution contrasted against log10(1/16).
222      */
223     float log10_emission_odds(char read_base, char probe_base, uint32_t pl, float mismatch_penalty);
224 
225     /**
226      * Compute log10 emission odds based on equal error probability distribution contrasted against log10(1/16).
227      */
228     float log10_emission_odds(char read_base, char probe_base, uint32_t pl);
229 
230     /**
231      * Converts state to string representation.
232      */
233     std::string state2string(int32_t state);
234 
235     /**
236      * Converts state to cigar string representation.
237      */
238     std::string state2cigarstring(int32_t state);
239 
240     /**
241      * Converts track to cigar string representation.
242      */
243     std::string track2cigarstring1(int32_t t, int32_t j);
244 
245     /**
246      * Converts state to cigar string representation.
247      */
248     std::string track2cigarstring2(int32_t t);
249 
250     /**
251      * Converts model component to string representation.
252      */
253     std::string component2string(int32_t component);
254 
255     /**
256      * Prints an alignment.
257      */
258     void print_alignment();
259 
260     /**
261      * Prints an alignment with padding.
262      */
263     void print_alignment(std::string& pad);
264 
265     /**
266      * Prints a float matrix.
267      */
268     void print(float *v, size_t plen, size_t rlen);
269 
270     /**
271      * Prints a int32_t matrix.
272      */
273     void print(int32_t *v, size_t plen, size_t rlen);
274 
275     /**
276      * Prints the transition matrix.
277      */
278     void print_T();
279 
280     /**
281      * Prints U.
282      */
283     void print_U(int32_t *U, size_t plen, size_t rlen);
284 
285     /**
286      * Prints U and V.
287      */
288     void print_trace(int32_t state, size_t plen, size_t rlen);
289 
290     /**
291      * Collect alignment summary statistics.
292      */
293     void collect_statistics(int32_t t1, int32_t t2, int32_t j);
294 
295     /**
296      * Clear alignment statistics.
297      */
298     void clear_statistics();
299 
300     /**
301      * Update alignment statistics after collection.
302      */
303     void update_statistics();
304 
305     /**
306      * Prints track.
307      */
308     void print_track(int32_t t);
309 };
310 
311 #endif