1 /*
2  * author: Imran Fanaswala
3  */
4 
5 #ifndef BEAGLE_UTILS_CPP
6 #define BEAGLE_UTILS_CPP
7 
8 #include  <stdio.h>
9 #include "beagle_utils.h"
10 
11 #define CLEAN_BEAGLE_API  /* Attempting to remove unnecessary communication with  BEAGLE device */
12 
int_to_double(const int * src,int num_elems)13 double* int_to_double(const int* src, int num_elems)
14 {
15     double* dest = (double*)malloc(num_elems*sizeof(double)); if (NULL==dest) Warn_And_Exit("\n. Couldn't allocate memory.\n");
16     int i;
17     for(i=0; i<num_elems;++i)
18         dest[i] = (double)src[i];
19     return dest;
20 }
21 
short_to_double(const short * src,int num_elems)22 double* short_to_double(const short* src, int num_elems)
23 {
24     double* dest = (double*)malloc(num_elems*sizeof(double)); if (NULL==dest) Warn_And_Exit("\n. Couldn't allocate memory. \n");
25     int i;
26     for(i=0; i<num_elems;++i)
27         dest[i] = (double)src[i];
28     return dest;
29 }
30 
float_to_double(const phydbl * src,int num_elems)31 double* float_to_double(const phydbl* src, int num_elems)
32 {
33     double* dest = (double*)malloc(num_elems*sizeof(double)); if (NULL==dest) Warn_And_Exit("\n. Couldn't allocate memory. \n");
34     int i;
35     for(i=0; i<num_elems;++i)
36         dest[i] = (double)src[i];
37     return dest;
38 }
39 
print_beagle_flags(long inFlags)40 void print_beagle_flags(long inFlags) {
41     if (inFlags & BEAGLE_FLAG_PROCESSOR_CPU)      fprintf(stdout, " PROCESSOR_CPU");
42     if (inFlags & BEAGLE_FLAG_PROCESSOR_GPU)      fprintf(stdout, " PROCESSOR_GPU");
43     if (inFlags & BEAGLE_FLAG_PROCESSOR_FPGA)     fprintf(stdout, " PROCESSOR_FPGA");
44     if (inFlags & BEAGLE_FLAG_PROCESSOR_CELL)     fprintf(stdout, " PROCESSOR_CELL");
45     if (inFlags & BEAGLE_FLAG_PRECISION_DOUBLE)   fprintf(stdout, " PRECISION_DOUBLE");
46     if (inFlags & BEAGLE_FLAG_PRECISION_SINGLE)   fprintf(stdout, " PRECISION_SINGLE");
47     if (inFlags & BEAGLE_FLAG_COMPUTATION_ASYNCH) fprintf(stdout, " COMPUTATION_ASYNCH");
48     if (inFlags & BEAGLE_FLAG_COMPUTATION_SYNCH)  fprintf(stdout, " COMPUTATION_SYNCH");
49     if (inFlags & BEAGLE_FLAG_EIGEN_REAL)         fprintf(stdout, " EIGEN_REAL");
50     if (inFlags & BEAGLE_FLAG_EIGEN_COMPLEX)      fprintf(stdout, " EIGEN_COMPLEX");
51     if (inFlags & BEAGLE_FLAG_SCALING_MANUAL)     fprintf(stdout, " SCALING_MANUAL");
52     if (inFlags & BEAGLE_FLAG_SCALING_AUTO)       fprintf(stdout, " SCALING_AUTO");
53     if (inFlags & BEAGLE_FLAG_SCALING_ALWAYS)     fprintf(stdout, " SCALING_ALWAYS");
54     if (inFlags & BEAGLE_FLAG_SCALING_DYNAMIC)    fprintf(stdout, " SCALING_DYNAMIC");
55     if (inFlags & BEAGLE_FLAG_SCALERS_RAW)        fprintf(stdout, " SCALERS_RAW");
56     if (inFlags & BEAGLE_FLAG_SCALERS_log)        fprintf(stdout, " SCALERS_log");
57     if (inFlags & BEAGLE_FLAG_VECTOR_NONE)        fprintf(stdout, " VECTOR_NONE");
58     if (inFlags & BEAGLE_FLAG_VECTOR_SSE)         fprintf(stdout, " VECTOR_SSE");
59     if (inFlags & BEAGLE_FLAG_VECTOR_AVX)         fprintf(stdout, " VECTOR_AVX");
60     if (inFlags & BEAGLE_FLAG_THREADING_NONE)     fprintf(stdout, " THREADING_NONE");
61     if (inFlags & BEAGLE_FLAG_THREADING_OPENMP)   fprintf(stdout, " THREADING_OPENMP");
62     if (inFlags & BEAGLE_FLAG_FRAMEWORK_CPU)      fprintf(stdout, " FRAMEWORK_CPU");
63     if (inFlags & BEAGLE_FLAG_FRAMEWORK_CUDA)     fprintf(stdout, " FRAMEWORK_CUDA");
64     if (inFlags & BEAGLE_FLAG_FRAMEWORK_OPENCL)   fprintf(stdout, " FRAMEWORK_OPENCL");
65     fflush(stdout);
66 }
67 
print_beagle_resource_list()68 void print_beagle_resource_list()
69 {
70   int i;
71   BeagleResourceList* rList;
72   rList = beagleGetResourceList();
73   fprintf(stdout, "\n\tAvailable resources:\n");
74   for (i = 0; i < rList->length; i++) {
75     fprintf(stdout, "\t\tResource %i:\n\t\tName : %s\n", i, rList->list[i].name);
76     fprintf(stdout, "\t\t\tDesc : %s\n", rList->list[i].description);
77     fprintf(stdout, "\t\t\tFlags:");
78     print_beagle_flags(rList->list[i].supportFlags);
79     fprintf(stdout, "\n");
80   }
81   fflush(stdout);
82 }
83 
print_beagle_instance_details(BeagleInstanceDetails * inst)84 void print_beagle_instance_details(BeagleInstanceDetails *inst)
85 {
86     int rNumber = inst->resourceNumber;
87     fprintf(stdout, "\tUsing resource %i:\n", rNumber);
88     fprintf(stdout, "\t\tRsrc Name : %s\n", inst->resourceName);
89     fprintf(stdout, "\t\tImpl Name : %s\n", inst->implName);
90     fprintf(stdout, "\t\tImpl Desc : %s\n", inst->implDescription);
91     fprintf(stdout, "\t\tFlags:");
92     fflush(stdout);
93     print_beagle_flags(inst->flags);
94     fflush(stdout);
95 }
96 
create_beagle_instance(t_tree * tree,int quiet,option * io)97 int create_beagle_instance(t_tree *tree, int quiet, option* io)
98 {
99     if(UNINITIALIZED != tree->b_inst){
100 //        fprintf(stdout,"\n\tWARNING: Creating a BEAGLE instance on a tree with an existing BEAGLE instance:%d\n",tree->b_inst);
101     }
102     if(!quiet){
103 //        print_beagle_resource_list();
104     }
105     int i;
106     BeagleInstanceDetails inst_d;
107     int num_rate_catg = tree->mod->ras->n_catg;
108     int num_branches  = 2*tree->n_otu-1; //rooted tree
109     //Recall that in PhyML, each edge has a "left" and "right" partial vectors. Therefore,
110     //in BEAGLE we have 2*num_branches number of partials.
111     //BEAGLE's partials buffer = [ tax1, tax2, ..., taxN, b1Left, b2Left, b3Left,...,bMLeft, b1Rght, b2Rght, b3Rght,...,bMRght] (N taxa, M branches)
112     int num_partials  = 2*(tree->n_otu + num_branches); /* TODO: This does not seem correct; suspect poor indexing elsewhere */
113     													/* In update_operations, indexes range from 0 to almost 2 (n_otu + num_branches), but not all integers are used */
114 
115     int resourceList[1];
116     resourceList[0] = io->beagle_resource;
117 
118 //    DUMP_I(tree->n_otu, num_rate_catg, num_partials, num_branches, tree->mod->ns, tree->n_pattern, tree->mod->whichmodel);
119     int beagle_inst = beagleCreateInstance(
120                                   tree->n_otu,                /**< Number of tip data elements (input) */
121                                   num_partials,               /**< Number of partial buffer (input) */
122     /* PhyML uses partials */     0,              			  /**< Number of compact state representation buffers to create (input) */
123                                   tree->mod->ns,              /**< Number of states in the continuous-time Markov chain (input) */
124                                   tree->n_pattern,            /**< Number of site patterns to be handled by the instance (input) */
125                                   1,                          /**< Number of rate matrix eigen-decomposition,state freqs, and category weight buffers*/
126                                   num_branches,               /**< Number of rate matrix buffers (input) */
127                                   num_rate_catg,              /**< Number of rate categories (input) */
128                                   -1,                         /**< Number of scaling buffers. Unused because we use SCALING_ALWAYS */
129                                   resourceList,               /**< List of potential resource on which this instance is allowed (input, NULL implies no restriction */
130                                   1,			              /**< Length of resourceList list (input) */
131                                   BEAGLE_FLAG_FRAMEWORK_CPU | BEAGLE_FLAG_PROCESSOR_CPU | BEAGLE_FLAG_SCALING_ALWAYS | BEAGLE_FLAG_EIGEN_REAL | ((sizeof(float)==sizeof(phydbl)) ? BEAGLE_FLAG_PRECISION_SINGLE:BEAGLE_FLAG_PRECISION_DOUBLE),
132                                   0,                		  /**< Bit-flags indicating required implementation characteristics, see BeagleFlags (input) */
133                                   &inst_d);
134     if (beagle_inst < 0){
135         fprintf(stderr, "beagleCreateInstance() failed:%i\n\n",beagle_inst);
136         return beagle_inst;
137     }
138 
139     if(!quiet){
140         fprintf(stdout, "\n\tUnique BEAGLE instance id:%i\n", beagle_inst);
141         print_beagle_instance_details(&inst_d);
142     }
143 
144     //Set the tips
145     for(i=0; i<2*tree->n_otu-1; ++i) //taxa+internal nodes
146       {
147         //        Print_Tip_Partials(tree, tree->a_nodes[i]);
148         if(tree->a_nodes[i]->tax)
149           {
150             assert(tree->a_nodes[i]->c_seq->len == tree->n_pattern); // number of compacts sites == number of distinct site patterns
151             double* tip = short_to_double(tree->a_nodes[i]->b[0]->p_lk_tip_r, tree->n_pattern*tree->mod->ns); //The tip states are stored on the branch leading to the tip
152             //Recall we store tip partials on the branch leading to the tip, rather than the tip itself.
153             int ret = beagleSetTipPartials(beagle_inst, tree->a_nodes[i]->b[0]->p_lk_tip_idx, tip);
154             if(ret<0){
155               fprintf(stderr, "beagleSetTipPartials() on instance %i failed:%i\n\n",beagle_inst,ret);
156               Free(tip);
157               return ret;
158             }
159             Free(tip);
160           }
161       }
162 
163     //Set the pattern weights
164     double* pwts = int_to_double(tree->data->wght,tree->n_pattern); //BTW, These weights are absolute counts, and not freqs
165     int ret = beagleSetPatternWeights(beagle_inst, pwts);
166     if(ret<0){
167       fprintf(stderr, "beagleSetPatternWeights() on instance %i failed:%i\n\n",beagle_inst,ret);
168       Free(pwts);
169       return ret;
170     }
171     Free(pwts);
172 
173     tree->mod->b_inst=beagle_inst;
174 
175     update_beagle_ras(tree->mod);
176     update_beagle_efrqs(tree->mod);
177 
178     return beagle_inst;
179 }
180 
181 /* Update partial likelihood on edge b on the side of b where
182    node d lies.
183 */
update_beagle_partials(t_tree * tree,t_edge * b,t_node * d)184 void update_beagle_partials(t_tree* tree, t_edge* b, t_node* d)
185 {
186     /*
187                |
188                |<- b
189                |
190                d
191               / \
192           b1 /   \ b2
193             /     \
194         n_v1     n_v2
195     */
196 
197   if(d->tax) //Partial likelihoods are only calculated on internal nodes
198     {
199       PhyML_Printf("\n== t_node %d is a leaf...",d->num);
200       PhyML_Printf("\n== Err. in file %s at line %d (function '%s')\n",__FILE__,__LINE__,__FUNCTION__);
201       Warn_And_Exit("\n");
202     }
203 
204   //Determine d's "left" and "right" neighbors.
205   t_node *n_v1, *n_v2;//d's "left" and "right" neighbor nodes
206   phydbl *p_lk,*p_lk_v1,*p_lk_v2;
207   phydbl *Pij1,*Pij2;
208   int *sum_scale, *sum_scale_v1, *sum_scale_v2;
209   int *p_lk_loc;
210   int dest_p_idx, child1_p_idx, child2_p_idx, Pij1_idx, Pij2_idx;
211   n_v1 = n_v2                 = NULL;
212   p_lk = p_lk_v1 = p_lk_v2    = NULL;
213   Pij1 = Pij2                 = NULL;
214   sum_scale_v1 = sum_scale_v2 = NULL;
215   p_lk_loc                    = NULL;
216   dest_p_idx = child1_p_idx = child2_p_idx = Pij1_idx = Pij2_idx = UNINITIALIZED;
217   Set_All_P_Lk(&n_v1,&n_v2,
218                &p_lk,&sum_scale,&p_lk_loc,
219                &Pij1,&p_lk_v1,&sum_scale_v1,
220                &Pij2,&p_lk_v2,&sum_scale_v2,
221                d,b,tree,
222                &dest_p_idx, &child1_p_idx, &child2_p_idx, &Pij1_idx, &Pij2_idx);
223 
224 
225   //    fprintf(stdout, "\nUpdating partials on Branch %d (on the side where Node %d lies)\n",b->num,d->num);fflush(stdout);
226   //    double* p_lk_v1_b = (double*)malloc(tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern*sizeof(double));if(NULL==p_lk_v1_b) Warn_And_Exit("Couldnt allocate memory");
227   //    beagleGetPartials(tree->b_inst, child1_p_idx, BEAGLE_OP_NONE, (double*)p_lk_v1_b);
228   //    double* p_lk_v2_b = (double*)malloc(tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern*sizeof(double));if(NULL==p_lk_v2_b) Warn_And_Exit("Couldnt allocate memory");
229   //    beagleGetPartials(tree->b_inst, child2_p_idx, BEAGLE_OP_NONE, (double*)p_lk_v2_b);
230 
231   //    fprintf(stdout, "Left partials :");fflush(stdout);
232   //    Dump_Arr_D(p_lk_v1_b,   tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern);
233   //    fprintf(stdout, "Right partials:");fflush(stdout);
234   //    Dump_Arr_D(p_lk_v2_b,   tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern);
235   //    Free(p_lk_v1_b);
236   //    Free(p_lk_v2_b);
237 
238 
239   //Create the corresponding BEAGLE operation
240 
241   // fprintf(stderr,"%d, %d, %d, ", dest_p_idx, child1_p_idx, child2_p_idx);
242 
243   BeagleOperation operations[1] = {{dest_p_idx, BEAGLE_OP_NONE, BEAGLE_OP_NONE, child1_p_idx, Pij1_idx, child2_p_idx, Pij2_idx}};
244   //Compute the partials
245   int ret = beagleUpdatePartials(tree->b_inst, operations, 1, BEAGLE_OP_NONE);
246   if(ret<0){
247     fprintf(stderr, "beagleUpdatePartials() on instance %i failed:%i\n\n",tree->b_inst,ret);
248     Exit("");
249   }
250   //Load the computed/updated partial partials
251 #ifndef CLEAN_BEAGLE_API
252    ret = beagleGetPartials(tree->b_inst, dest_p_idx, BEAGLE_OP_NONE, (double*)p_lk);
253    if(ret<0){
254      fprintf(stderr, "beagleGetPartials() on instance %i failed:%i\n\n",tree->b_inst,ret);
255      Exit("");
256    }
257 #endif
258 
259   //    fprintf(stdout, "Updated partials:");fflush(stdout);
260   //    Dump_Arr_D(p_lk, tree->mod->ras->n_catg*tree->mod->ns*tree->n_pattern);
261 }
262 
finalize_beagle_instance(t_tree * tree)263 int finalize_beagle_instance(t_tree *tree)
264 {
265   if(tree->b_inst >= 0) {
266     int ret = beagleFinalizeInstance(tree->b_inst);
267     if(ret<0) fprintf(stderr, "\nFailed to finalize BEAGLE instance %i: %i\n\n", tree->b_inst, ret);
268     return ret;
269   }
270   return 0;
271 }
272 
update_beagle_ras(t_mod * mod)273 void update_beagle_ras(t_mod* mod)
274 {
275   assert(UNINITIALIZED != mod->b_inst);
276 
277   int ret=-1;
278   if((sizeof(float)==sizeof(phydbl))) //Do we need to convert?
279     {
280       double* catg_rates = float_to_double(mod->ras->gamma_rr->v, mod->ras->n_catg);
281       ret = beagleSetCategoryRates(mod->b_inst, catg_rates);
282       Free(catg_rates);
283       if(ret<0){
284         fprintf(stderr, "beagleSetCategoryRates() on instance %i failed:%i\n\n",mod->b_inst,ret);
285         Exit("");
286       }
287       double* catg_wts = float_to_double(mod->ras->gamma_r_proba->v, mod->ras->n_catg);
288       if(!mod->optimizing_topology) {
289         ret = beagleSetCategoryWeights(mod->b_inst, 0, catg_wts);
290         Free(catg_wts);
291         if(ret<0){
292           fprintf(stderr, "beagleSetCategoryWeights() on instance %i failed:%i\n\n",mod->b_inst,ret);
293           Exit("");
294         }
295       }
296     }
297   else
298     {
299       ret = beagleSetCategoryRates(mod->b_inst, mod->ras->gamma_rr->v);
300       if(ret<0){
301         fprintf(stderr, "beagleSetCategoryRates() on instance %i failed:%i\n\n",mod->b_inst,ret);
302         Exit("");
303       }
304       if(!mod->optimizing_topology) {
305         ret = beagleSetCategoryWeights(mod->b_inst, 0, mod->ras->gamma_r_proba->v);
306         if(ret<0){
307           fprintf(stderr, "beagleSetCategoryWeights() on instance %i failed:%i\n\n",mod->b_inst,ret);
308           Exit("");
309         }
310 
311       }
312     }
313 }
314 
update_beagle_efrqs(t_mod * mod)315 void update_beagle_efrqs(t_mod* mod)
316 {
317   assert(UNINITIALIZED != mod->b_inst);
318 
319   int ret=-1;
320   if((sizeof(float)==sizeof(phydbl))) {
321     double* efrqs = float_to_double(mod->e_frq->pi->v, mod->ns);
322         ret = beagleSetStateFrequencies(mod->b_inst, 0, efrqs);
323         Free(efrqs);
324   } else {
325     ret = beagleSetStateFrequencies(mod->b_inst, 0, mod->e_frq->pi->v);
326   }
327   if(ret<0){
328     fprintf(stderr, "beagleSetStateFrequencies() on instance %i failed:%i\n\n",mod->b_inst,ret);
329     Exit("");
330   }
331 }
332 
calc_edgelks_beagle(t_edge * b,t_tree * tree)333 void calc_edgelks_beagle(t_edge *b, t_tree *tree)
334 {
335   assert(UNINITIALIZED != tree->b_inst);
336 
337   //Compute the edge likelihood
338   int parents[1]  = {b->p_lk_left_idx};
339   int children[1] = {b->rght->tax?b->p_lk_tip_idx:b->p_lk_rght_idx};
340   int pmats[1]    = {b->Pij_rr_idx};
341   int other[1]    = {0};//Category Weights and State Frequencies both have a single buffer, hence they are both indexed at 0
342   double lnL[1]   = {UNINITIALIZED};
343   //    DUMP_I(parents[0], children[0], pmats[0], b->num, b->rght->tax);
344   int ret=beagleCalculateEdgeLogLikelihoods(tree->b_inst, parents, children, pmats, NULL, NULL, other, other, NULL, 1, lnL, NULL, NULL);
345   int i;
346 
347 if(ret<0){
348     fprintf(stderr, "beagleCalculateEdgeLogLikelihoods() on instance %i failed:%i\n\n",tree->b_inst,ret);
349     Exit("");
350   }
351 
352   tree->c_lnL = sizeof(phydbl)==sizeof(float)?(float)lnL[0]:lnL[0];
353 
354   //Retrieve the site likelihoods that were computed during the previous edge likelihood computation
355   ret = beagleGetSiteLogLikelihoods(tree->b_inst,tree->cur_site_lk);//TODO: Handle when cur_site_lk is float
356   if(ret<0){
357     fprintf(stderr, "beagleGetSiteLogLikelihoods() on instance %i failed:%i\n\n",tree->b_inst,ret);
358     Exit("");
359   }
360   //Transform
361   for(i=0;i<tree->n_pattern;++i)
362     tree->cur_site_lk[i]=exp(tree->cur_site_lk[i]);
363 }
364 
update_beagle_eigen(t_mod * mod)365 void update_beagle_eigen(t_mod* mod)
366 {
367     assert(UNINITIALIZED != mod->b_inst);
368 
369     int whichmodel = mod->whichmodel;
370     //We use Eigen Decomposition only for GTR models and AA datasets
371     if((mod->io->datatype == AA || whichmodel==GTR || whichmodel==CUSTOM) && mod->use_m4mod == NO)
372     {
373         //Beagle expects untransformed eigen-values (i.e. recall e_val is exp() scaled, so we undo that)
374         phydbl* evals = (phydbl*)malloc(mod->ns * sizeof(phydbl));
375         int i;
376         for(i=0;i<mod->ns;++i)  evals[i]=log(mod->eigen->e_val[i]);
377         int ret=-1;
378         if((sizeof(float)==sizeof(phydbl)))//Need to convert to doubles?
379         {
380             double* eigen_vects     = float_to_double(mod->eigen->r_e_vect, mod->eigen->size*mod->eigen->size);
381             double* eigen_vects_inv = float_to_double(mod->eigen->l_e_vect, mod->eigen->size*mod->eigen->size);
382             double* eigen_vals      = float_to_double(evals,mod->eigen->size);
383             ret = beagleSetEigenDecomposition(mod->b_inst,0,eigen_vects,eigen_vects_inv,eigen_vals);
384             Free(eigen_vects);Free(eigen_vects_inv);Free(eigen_vals);
385         } else {
386             ret = beagleSetEigenDecomposition(mod->b_inst,0,mod->eigen->r_e_vect,mod->eigen->l_e_vect,evals);
387         }
388         if(ret<0){
389           fprintf(stderr, "beagleSetEigenDecomposition() on instance %i failed:%i\n\n",mod->b_inst,ret);
390           Free(evals);
391           Exit("");
392         }
393         Free(evals);
394     }
395 }
396 
397 #endif // BEAGLE_UTILS_CPP
398 
399