1 /*
2 * author: Imran Fanaswala
3 */
4
5 #ifndef BEAGLE_UTILS_CPP
6 #define BEAGLE_UTILS_CPP
7
8 #include <stdio.h>
9 #include "beagle_utils.h"
10
11 #define CLEAN_BEAGLE_API /* Attempting to remove unnecessary communication with BEAGLE device */
12
int_to_double(const int * src,int num_elems)13 double* int_to_double(const int* src, int num_elems)
14 {
15 double* dest = (double*)malloc(num_elems*sizeof(double)); if (NULL==dest) Warn_And_Exit("\n. Couldn't allocate memory.\n");
16 int i;
17 for(i=0; i<num_elems;++i)
18 dest[i] = (double)src[i];
19 return dest;
20 }
21
short_to_double(const short * src,int num_elems)22 double* short_to_double(const short* src, int num_elems)
23 {
24 double* dest = (double*)malloc(num_elems*sizeof(double)); if (NULL==dest) Warn_And_Exit("\n. Couldn't allocate memory. \n");
25 int i;
26 for(i=0; i<num_elems;++i)
27 dest[i] = (double)src[i];
28 return dest;
29 }
30
float_to_double(const phydbl * src,int num_elems)31 double* float_to_double(const phydbl* src, int num_elems)
32 {
33 double* dest = (double*)malloc(num_elems*sizeof(double)); if (NULL==dest) Warn_And_Exit("\n. Couldn't allocate memory. \n");
34 int i;
35 for(i=0; i<num_elems;++i)
36 dest[i] = (double)src[i];
37 return dest;
38 }
39
print_beagle_flags(long inFlags)40 void print_beagle_flags(long inFlags) {
41 if (inFlags & BEAGLE_FLAG_PROCESSOR_CPU) fprintf(stdout, " PROCESSOR_CPU");
42 if (inFlags & BEAGLE_FLAG_PROCESSOR_GPU) fprintf(stdout, " PROCESSOR_GPU");
43 if (inFlags & BEAGLE_FLAG_PROCESSOR_FPGA) fprintf(stdout, " PROCESSOR_FPGA");
44 if (inFlags & BEAGLE_FLAG_PROCESSOR_CELL) fprintf(stdout, " PROCESSOR_CELL");
45 if (inFlags & BEAGLE_FLAG_PRECISION_DOUBLE) fprintf(stdout, " PRECISION_DOUBLE");
46 if (inFlags & BEAGLE_FLAG_PRECISION_SINGLE) fprintf(stdout, " PRECISION_SINGLE");
47 if (inFlags & BEAGLE_FLAG_COMPUTATION_ASYNCH) fprintf(stdout, " COMPUTATION_ASYNCH");
48 if (inFlags & BEAGLE_FLAG_COMPUTATION_SYNCH) fprintf(stdout, " COMPUTATION_SYNCH");
49 if (inFlags & BEAGLE_FLAG_EIGEN_REAL) fprintf(stdout, " EIGEN_REAL");
50 if (inFlags & BEAGLE_FLAG_EIGEN_COMPLEX) fprintf(stdout, " EIGEN_COMPLEX");
51 if (inFlags & BEAGLE_FLAG_SCALING_MANUAL) fprintf(stdout, " SCALING_MANUAL");
52 if (inFlags & BEAGLE_FLAG_SCALING_AUTO) fprintf(stdout, " SCALING_AUTO");
53 if (inFlags & BEAGLE_FLAG_SCALING_ALWAYS) fprintf(stdout, " SCALING_ALWAYS");
54 if (inFlags & BEAGLE_FLAG_SCALING_DYNAMIC) fprintf(stdout, " SCALING_DYNAMIC");
55 if (inFlags & BEAGLE_FLAG_SCALERS_RAW) fprintf(stdout, " SCALERS_RAW");
56 if (inFlags & BEAGLE_FLAG_SCALERS_log) fprintf(stdout, " SCALERS_log");
57 if (inFlags & BEAGLE_FLAG_VECTOR_NONE) fprintf(stdout, " VECTOR_NONE");
58 if (inFlags & BEAGLE_FLAG_VECTOR_SSE) fprintf(stdout, " VECTOR_SSE");
59 if (inFlags & BEAGLE_FLAG_VECTOR_AVX) fprintf(stdout, " VECTOR_AVX");
60 if (inFlags & BEAGLE_FLAG_THREADING_NONE) fprintf(stdout, " THREADING_NONE");
61 if (inFlags & BEAGLE_FLAG_THREADING_OPENMP) fprintf(stdout, " THREADING_OPENMP");
62 if (inFlags & BEAGLE_FLAG_FRAMEWORK_CPU) fprintf(stdout, " FRAMEWORK_CPU");
63 if (inFlags & BEAGLE_FLAG_FRAMEWORK_CUDA) fprintf(stdout, " FRAMEWORK_CUDA");
64 if (inFlags & BEAGLE_FLAG_FRAMEWORK_OPENCL) fprintf(stdout, " FRAMEWORK_OPENCL");
65 fflush(stdout);
66 }
67
print_beagle_resource_list()68 void print_beagle_resource_list()
69 {
70 int i;
71 BeagleResourceList* rList;
72 rList = beagleGetResourceList();
73 fprintf(stdout, "\n\tAvailable resources:\n");
74 for (i = 0; i < rList->length; i++) {
75 fprintf(stdout, "\t\tResource %i:\n\t\tName : %s\n", i, rList->list[i].name);
76 fprintf(stdout, "\t\t\tDesc : %s\n", rList->list[i].description);
77 fprintf(stdout, "\t\t\tFlags:");
78 print_beagle_flags(rList->list[i].supportFlags);
79 fprintf(stdout, "\n");
80 }
81 fflush(stdout);
82 }
83
print_beagle_instance_details(BeagleInstanceDetails * inst)84 void print_beagle_instance_details(BeagleInstanceDetails *inst)
85 {
86 int rNumber = inst->resourceNumber;
87 fprintf(stdout, "\tUsing resource %i:\n", rNumber);
88 fprintf(stdout, "\t\tRsrc Name : %s\n", inst->resourceName);
89 fprintf(stdout, "\t\tImpl Name : %s\n", inst->implName);
90 fprintf(stdout, "\t\tImpl Desc : %s\n", inst->implDescription);
91 fprintf(stdout, "\t\tFlags:");
92 fflush(stdout);
93 print_beagle_flags(inst->flags);
94 fflush(stdout);
95 }
96
create_beagle_instance(t_tree * tree,int quiet,option * io)97 int create_beagle_instance(t_tree *tree, int quiet, option* io)
98 {
99 if(UNINITIALIZED != tree->b_inst){
100 // fprintf(stdout,"\n\tWARNING: Creating a BEAGLE instance on a tree with an existing BEAGLE instance:%d\n",tree->b_inst);
101 }
102 if(!quiet){
103 // print_beagle_resource_list();
104 }
105 int i;
106 BeagleInstanceDetails inst_d;
107 int num_rate_catg = tree->mod->ras->n_catg;
108 int num_branches = 2*tree->n_otu-1; //rooted tree
109 //Recall that in PhyML, each edge has a "left" and "right" partial vectors. Therefore,
110 //in BEAGLE we have 2*num_branches number of partials.
111 //BEAGLE's partials buffer = [ tax1, tax2, ..., taxN, b1Left, b2Left, b3Left,...,bMLeft, b1Rght, b2Rght, b3Rght,...,bMRght] (N taxa, M branches)
112 int num_partials = 2*(tree->n_otu + num_branches); /* TODO: This does not seem correct; suspect poor indexing elsewhere */
113 /* In update_operations, indexes range from 0 to almost 2 (n_otu + num_branches), but not all integers are used */
114
115 int resourceList[1];
116 resourceList[0] = io->beagle_resource;
117
118 // DUMP_I(tree->n_otu, num_rate_catg, num_partials, num_branches, tree->mod->ns, tree->n_pattern, tree->mod->whichmodel);
119 int beagle_inst = beagleCreateInstance(
120 tree->n_otu, /**< Number of tip data elements (input) */
121 num_partials, /**< Number of partial buffer (input) */
122 /* PhyML uses partials */ 0, /**< Number of compact state representation buffers to create (input) */
123 tree->mod->ns, /**< Number of states in the continuous-time Markov chain (input) */
124 tree->n_pattern, /**< Number of site patterns to be handled by the instance (input) */
125 1, /**< Number of rate matrix eigen-decomposition,state freqs, and category weight buffers*/
126 num_branches, /**< Number of rate matrix buffers (input) */
127 num_rate_catg, /**< Number of rate categories (input) */
128 -1, /**< Number of scaling buffers. Unused because we use SCALING_ALWAYS */
129 resourceList, /**< List of potential resource on which this instance is allowed (input, NULL implies no restriction */
130 1, /**< Length of resourceList list (input) */
131 BEAGLE_FLAG_FRAMEWORK_CPU | BEAGLE_FLAG_PROCESSOR_CPU | BEAGLE_FLAG_SCALING_ALWAYS | BEAGLE_FLAG_EIGEN_REAL | ((sizeof(float)==sizeof(phydbl)) ? BEAGLE_FLAG_PRECISION_SINGLE:BEAGLE_FLAG_PRECISION_DOUBLE),
132 0, /**< Bit-flags indicating required implementation characteristics, see BeagleFlags (input) */
133 &inst_d);
134 if (beagle_inst < 0){
135 fprintf(stderr, "beagleCreateInstance() failed:%i\n\n",beagle_inst);
136 return beagle_inst;
137 }
138
139 if(!quiet){
140 fprintf(stdout, "\n\tUnique BEAGLE instance id:%i\n", beagle_inst);
141 print_beagle_instance_details(&inst_d);
142 }
143
144 //Set the tips
145 for(i=0; i<2*tree->n_otu-1; ++i) //taxa+internal nodes
146 {
147 // Print_Tip_Partials(tree, tree->a_nodes[i]);
148 if(tree->a_nodes[i]->tax)
149 {
150 assert(tree->a_nodes[i]->c_seq->len == tree->n_pattern); // number of compacts sites == number of distinct site patterns
151 double* tip = short_to_double(tree->a_nodes[i]->b[0]->p_lk_tip_r, tree->n_pattern*tree->mod->ns); //The tip states are stored on the branch leading to the tip
152 //Recall we store tip partials on the branch leading to the tip, rather than the tip itself.
153 int ret = beagleSetTipPartials(beagle_inst, tree->a_nodes[i]->b[0]->p_lk_tip_idx, tip);
154 if(ret<0){
155 fprintf(stderr, "beagleSetTipPartials() on instance %i failed:%i\n\n",beagle_inst,ret);
156 Free(tip);
157 return ret;
158 }
159 Free(tip);
160 }
161 }
162
163 //Set the pattern weights
164 double* pwts = int_to_double(tree->data->wght,tree->n_pattern); //BTW, These weights are absolute counts, and not freqs
165 int ret = beagleSetPatternWeights(beagle_inst, pwts);
166 if(ret<0){
167 fprintf(stderr, "beagleSetPatternWeights() on instance %i failed:%i\n\n",beagle_inst,ret);
168 Free(pwts);
169 return ret;
170 }
171 Free(pwts);
172
173 tree->mod->b_inst=beagle_inst;
174
175 update_beagle_ras(tree->mod);
176 update_beagle_efrqs(tree->mod);
177
178 return beagle_inst;
179 }
180
181 /* Update partial likelihood on edge b on the side of b where
182 node d lies.
183 */
update_beagle_partials(t_tree * tree,t_edge * b,t_node * d)184 void update_beagle_partials(t_tree* tree, t_edge* b, t_node* d)
185 {
186 /*
187 |
188 |<- b
189 |
190 d
191 / \
192 b1 / \ b2
193 / \
194 n_v1 n_v2
195 */
196
197 if(d->tax) //Partial likelihoods are only calculated on internal nodes
198 {
199 PhyML_Printf("\n== t_node %d is a leaf...",d->num);
200 PhyML_Printf("\n== Err. in file %s at line %d (function '%s')\n",__FILE__,__LINE__,__FUNCTION__);
201 Warn_And_Exit("\n");
202 }
203
204 //Determine d's "left" and "right" neighbors.
205 t_node *n_v1, *n_v2;//d's "left" and "right" neighbor nodes
206 phydbl *p_lk,*p_lk_v1,*p_lk_v2;
207 phydbl *Pij1,*Pij2;
208 int *sum_scale, *sum_scale_v1, *sum_scale_v2;
209 int *p_lk_loc;
210 int dest_p_idx, child1_p_idx, child2_p_idx, Pij1_idx, Pij2_idx;
211 n_v1 = n_v2 = NULL;
212 p_lk = p_lk_v1 = p_lk_v2 = NULL;
213 Pij1 = Pij2 = NULL;
214 sum_scale_v1 = sum_scale_v2 = NULL;
215 p_lk_loc = NULL;
216 dest_p_idx = child1_p_idx = child2_p_idx = Pij1_idx = Pij2_idx = UNINITIALIZED;
217 Set_All_P_Lk(&n_v1,&n_v2,
218 &p_lk,&sum_scale,&p_lk_loc,
219 &Pij1,&p_lk_v1,&sum_scale_v1,
220 &Pij2,&p_lk_v2,&sum_scale_v2,
221 d,b,tree,
222 &dest_p_idx, &child1_p_idx, &child2_p_idx, &Pij1_idx, &Pij2_idx);
223
224
225 // fprintf(stdout, "\nUpdating partials on Branch %d (on the side where Node %d lies)\n",b->num,d->num);fflush(stdout);
226 // double* p_lk_v1_b = (double*)malloc(tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern*sizeof(double));if(NULL==p_lk_v1_b) Warn_And_Exit("Couldnt allocate memory");
227 // beagleGetPartials(tree->b_inst, child1_p_idx, BEAGLE_OP_NONE, (double*)p_lk_v1_b);
228 // double* p_lk_v2_b = (double*)malloc(tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern*sizeof(double));if(NULL==p_lk_v2_b) Warn_And_Exit("Couldnt allocate memory");
229 // beagleGetPartials(tree->b_inst, child2_p_idx, BEAGLE_OP_NONE, (double*)p_lk_v2_b);
230
231 // fprintf(stdout, "Left partials :");fflush(stdout);
232 // Dump_Arr_D(p_lk_v1_b, tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern);
233 // fprintf(stdout, "Right partials:");fflush(stdout);
234 // Dump_Arr_D(p_lk_v2_b, tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern);
235 // Free(p_lk_v1_b);
236 // Free(p_lk_v2_b);
237
238
239 //Create the corresponding BEAGLE operation
240
241 // fprintf(stderr,"%d, %d, %d, ", dest_p_idx, child1_p_idx, child2_p_idx);
242
243 BeagleOperation operations[1] = {{dest_p_idx, BEAGLE_OP_NONE, BEAGLE_OP_NONE, child1_p_idx, Pij1_idx, child2_p_idx, Pij2_idx}};
244 //Compute the partials
245 int ret = beagleUpdatePartials(tree->b_inst, operations, 1, BEAGLE_OP_NONE);
246 if(ret<0){
247 fprintf(stderr, "beagleUpdatePartials() on instance %i failed:%i\n\n",tree->b_inst,ret);
248 Exit("");
249 }
250 //Load the computed/updated partial partials
251 #ifndef CLEAN_BEAGLE_API
252 ret = beagleGetPartials(tree->b_inst, dest_p_idx, BEAGLE_OP_NONE, (double*)p_lk);
253 if(ret<0){
254 fprintf(stderr, "beagleGetPartials() on instance %i failed:%i\n\n",tree->b_inst,ret);
255 Exit("");
256 }
257 #endif
258
259 // fprintf(stdout, "Updated partials:");fflush(stdout);
260 // Dump_Arr_D(p_lk, tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern);
261 }
262
finalize_beagle_instance(t_tree * tree)263 int finalize_beagle_instance(t_tree *tree)
264 {
265 if(tree->b_inst >= 0) {
266 int ret = beagleFinalizeInstance(tree->b_inst);
267 if(ret<0) fprintf(stderr, "\nFailed to finalize BEAGLE instance %i: %i\n\n", tree->b_inst, ret);
268 return ret;
269 }
270 return 0;
271 }
272
update_beagle_ras(t_mod * mod)273 void update_beagle_ras(t_mod* mod)
274 {
275 assert(UNINITIALIZED != mod->b_inst);
276
277 int ret=-1;
278 if((sizeof(float)==sizeof(phydbl))) //Do we need to convert?
279 {
280 double* catg_rates = float_to_double(mod->ras->gamma_rr->v, mod->ras->n_catg);
281 ret = beagleSetCategoryRates(mod->b_inst, catg_rates);
282 Free(catg_rates);
283 if(ret<0){
284 fprintf(stderr, "beagleSetCategoryRates() on instance %i failed:%i\n\n",mod->b_inst,ret);
285 Exit("");
286 }
287 double* catg_wts = float_to_double(mod->ras->gamma_r_proba->v, mod->ras->n_catg);
288 if(!mod->optimizing_topology) {
289 ret = beagleSetCategoryWeights(mod->b_inst, 0, catg_wts);
290 Free(catg_wts);
291 if(ret<0){
292 fprintf(stderr, "beagleSetCategoryWeights() on instance %i failed:%i\n\n",mod->b_inst,ret);
293 Exit("");
294 }
295 }
296 }
297 else
298 {
299 ret = beagleSetCategoryRates(mod->b_inst, mod->ras->gamma_rr->v);
300 if(ret<0){
301 fprintf(stderr, "beagleSetCategoryRates() on instance %i failed:%i\n\n",mod->b_inst,ret);
302 Exit("");
303 }
304 if(!mod->optimizing_topology) {
305 ret = beagleSetCategoryWeights(mod->b_inst, 0, mod->ras->gamma_r_proba->v);
306 if(ret<0){
307 fprintf(stderr, "beagleSetCategoryWeights() on instance %i failed:%i\n\n",mod->b_inst,ret);
308 Exit("");
309 }
310
311 }
312 }
313 }
314
update_beagle_efrqs(t_mod * mod)315 void update_beagle_efrqs(t_mod* mod)
316 {
317 assert(UNINITIALIZED != mod->b_inst);
318
319 int ret=-1;
320 if((sizeof(float)==sizeof(phydbl))) {
321 double* efrqs = float_to_double(mod->e_frq->pi->v, mod->ns);
322 ret = beagleSetStateFrequencies(mod->b_inst, 0, efrqs);
323 Free(efrqs);
324 } else {
325 ret = beagleSetStateFrequencies(mod->b_inst, 0, mod->e_frq->pi->v);
326 }
327 if(ret<0){
328 fprintf(stderr, "beagleSetStateFrequencies() on instance %i failed:%i\n\n",mod->b_inst,ret);
329 Exit("");
330 }
331 }
332
calc_edgelks_beagle(t_edge * b,t_tree * tree)333 void calc_edgelks_beagle(t_edge *b, t_tree *tree)
334 {
335 assert(UNINITIALIZED != tree->b_inst);
336
337 //Compute the edge likelihood
338 int parents[1] = {b->p_lk_left_idx};
339 int children[1] = {b->rght->tax?b->p_lk_tip_idx:b->p_lk_rght_idx};
340 int pmats[1] = {b->Pij_rr_idx};
341 int other[1] = {0};//Category Weights and State Frequencies both have a single buffer, hence they are both indexed at 0
342 double lnL[1] = {UNINITIALIZED};
343 // DUMP_I(parents[0], children[0], pmats[0], b->num, b->rght->tax);
344 int ret=beagleCalculateEdgeLogLikelihoods(tree->b_inst, parents, children, pmats, NULL, NULL, other, other, NULL, 1, lnL, NULL, NULL);
345 int i;
346
347 if(ret<0){
348 fprintf(stderr, "beagleCalculateEdgeLogLikelihoods() on instance %i failed:%i\n\n",tree->b_inst,ret);
349 Exit("");
350 }
351
352 tree->c_lnL = sizeof(phydbl)==sizeof(float)?(float)lnL[0]:lnL[0];
353
354 //Retrieve the site likelihoods that were computed during the previous edge likelihood computation
355 ret = beagleGetSiteLogLikelihoods(tree->b_inst,tree->cur_site_lk);//TODO: Handle when cur_site_lk is float
356 if(ret<0){
357 fprintf(stderr, "beagleGetSiteLogLikelihoods() on instance %i failed:%i\n\n",tree->b_inst,ret);
358 Exit("");
359 }
360 //Transform
361 for(i=0;i<tree->n_pattern;++i)
362 tree->cur_site_lk[i]=exp(tree->cur_site_lk[i]);
363 }
364
update_beagle_eigen(t_mod * mod)365 void update_beagle_eigen(t_mod* mod)
366 {
367 assert(UNINITIALIZED != mod->b_inst);
368
369 int whichmodel = mod->whichmodel;
370 //We use Eigen Decomposition only for GTR models and AA datasets
371 if((mod->io->datatype == AA || whichmodel==GTR || whichmodel==CUSTOM) && mod->use_m4mod == NO)
372 {
373 //Beagle expects untransformed eigen-values (i.e. recall e_val is exp() scaled, so we undo that)
374 phydbl* evals = (phydbl*)malloc(mod->ns * sizeof(phydbl));
375 int i;
376 for(i=0;i<mod->ns;++i) evals[i]=log(mod->eigen->e_val[i]);
377 int ret=-1;
378 if((sizeof(float)==sizeof(phydbl)))//Need to convert to doubles?
379 {
380 double* eigen_vects = float_to_double(mod->eigen->r_e_vect, mod->eigen->size*mod->eigen->size);
381 double* eigen_vects_inv = float_to_double(mod->eigen->l_e_vect, mod->eigen->size*mod->eigen->size);
382 double* eigen_vals = float_to_double(evals,mod->eigen->size);
383 ret = beagleSetEigenDecomposition(mod->b_inst,0,eigen_vects,eigen_vects_inv,eigen_vals);
384 Free(eigen_vects);Free(eigen_vects_inv);Free(eigen_vals);
385 } else {
386 ret = beagleSetEigenDecomposition(mod->b_inst,0,mod->eigen->r_e_vect,mod->eigen->l_e_vect,evals);
387 }
388 if(ret<0){
389 fprintf(stderr, "beagleSetEigenDecomposition() on instance %i failed:%i\n\n",mod->b_inst,ret);
390 Free(evals);
391 Exit("");
392 }
393 Free(evals);
394 }
395 }
396
397 #endif // BEAGLE_UTILS_CPP
398
399