1 // This is brl/bseg/boxm2/ocl/algo/boxm2_ocl_update_based_dispersion.cxx
2 #include <fstream>
3 #include <iostream>
4 #include <algorithm>
5 #include "boxm2_ocl_update_based_dispersion.h"
6 //:
7 // \file
8 // \brief  A process for updating a color model
9 //
10 // \author Vishal Jain
11 // \date Mar 25, 2011
12 
13 #ifdef _MSC_VER
14 #  include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/boxm2_util.h>
22 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
23 #include "vil/vil_image_view.h"
24 #include "vil/vil_save.h"
25 
26 //directory utility
27 #include "vul/vul_timer.h"
28 #include <vcl_where_root_dir.h>
29 #include <bocl/bocl_device.h>
30 #include <bocl/bocl_kernel.h>
31 #include <boct/boct_bit_tree.h>
32 #include "vnl/vnl_numeric_traits.h"
33 
34 //: Map of kernels should persist between process executions
35 std::map<std::string,std::vector<bocl_kernel*> > boxm2_ocl_update_based_dispersion::kernels_;
36 
37 //Main public method, updates color model
update(const boxm2_scene_sptr & scene,bocl_device_sptr device,const boxm2_opencl_cache_sptr & opencl_cache,vpgl_camera_double_sptr cam,const vil_image_view_base_sptr & img,const std::string & ident,const vil_image_view_base_sptr & mask_sptr,bool update_alpha,float mog_var,bool update_app,float resnearfactor,float resfarfactor,std::size_t startI,std::size_t startJ)38 bool boxm2_ocl_update_based_dispersion::update(const boxm2_scene_sptr&              scene,
39                                                bocl_device_sptr              device,
40                                                const boxm2_opencl_cache_sptr&       opencl_cache,
41                                                vpgl_camera_double_sptr       cam,
42                                                const vil_image_view_base_sptr&      img,
43                                                const std::string&                    ident,
44                                                const vil_image_view_base_sptr&      mask_sptr,
45                                                bool                          update_alpha,
46                                                float                         mog_var,
47                                                bool                          update_app,
48                                                float resnearfactor,
49                                                float resfarfactor,
50                                                std::size_t                    startI,
51                                                std::size_t                    startJ)
52 {
53   enum {
54     UPDATE_SEGLEN = 0,
55     UPDATE_PREINF = 1,
56     UPDATE_PROC   = 2,
57     UPDATE_BAYES  = 3,
58     UPDATE_CELL_BASED_DISPERSION   = 4
59   };
60   float transfer_time=0.0f;
61   float gpu_time=0.0f;
62   std::size_t local_threads[2]={8,8};
63   std::size_t global_threads[2]={8,8};
64 
65   //catch a "null" mask (not really null because that throws an error)
66   bool use_mask = false;
67   if ( mask_sptr->ni() == img->ni() && mask_sptr->nj() == img->nj() ) {
68     std::cout<<"Update using mask."<<std::endl;
69     use_mask = true;
70   }
71   vil_image_view<unsigned char >* mask_map = nullptr;
72   if (use_mask) {
73     mask_map = dynamic_cast<vil_image_view<unsigned char> *>(mask_sptr.ptr());
74     if (!mask_map) {
75       std::cout<<"boxm2_update_process:: mask map is not an unsigned char map"<<std::endl;
76       return false;
77     }
78   }
79 
80   //cache size sanity check
81   std::size_t binCache = opencl_cache.ptr()->bytes_in_cache();
82   std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
83 
84   //make correct data types are here
85   std::string data_type, num_obs_type,options;
86   int appTypeSize;
87   bool isRGB = false;
88   if (!validate_appearances(scene, data_type, appTypeSize, num_obs_type, options, isRGB))
89     return false;
90   if (ident.size() > 0) {
91     data_type += "_" + ident;
92     num_obs_type += "_" + ident;
93   }
94 
95 
96   // create a command queue.
97   int status=0;
98   cl_command_queue queue = clCreateCommandQueue( device->context(),
99                                                  *(device->device_id()),
100                                                  CL_QUEUE_PROFILING_ENABLE,
101                                                  &status);
102   if (status!=0)
103     return false;
104 
105   // compile the kernel if not already compiled
106   std::vector<bocl_kernel*>& kernels = get_kernels(device, options);
107 
108   //grab input image, establish cl_ni, cl_nj (so global size is divisible by local size)
109   vil_image_view_base_sptr float_img = boxm2_util::prepare_input_image(img, true);
110   auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
111   auto cl_ni=(unsigned)RoundUp(img_view->ni(),(int)local_threads[0]);
112   auto cl_nj=(unsigned)RoundUp(img_view->nj(),(int)local_threads[1]);
113   global_threads[0]=cl_ni;
114   global_threads[1]=cl_nj;
115 
116   //set generic cam
117   auto* ray_origins    = new cl_float[4*cl_ni*cl_nj];
118   auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
119   bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins, "ray_origins buffer");
120   bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions, "ray_directions buffer");
121   boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff, startI, startJ);
122 
123   float ray_origin[4];
124   ray_origin[0] = ray_origins[0];
125   ray_origin[1] = ray_origins[1];
126   ray_origin[2] = ray_origins[2];
127   ray_origin[3] = 0.0f;
128   bocl_mem_sptr ray_origin_ptr = opencl_cache->alloc_mem(4*sizeof(float), ray_origin, "global ray origin  buffer");
129   ray_origin_ptr->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
130   float tnearfar[2] = { 0.0f, 1000000} ;
131   if(cam->type_name() == "vpgl_perspective_camera")
132   {
133 
134       float f  = ((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().focal_length()*((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().x_scale();
135       tnearfar[0] = f* scene->finest_resolution()/resnearfactor ;
136       tnearfar[1] = f* scene->finest_resolution()/resfarfactor ;
137       std::cout<<"Near and Far Clipping planes "<<tnearfar[0]<<" "<<tnearfar[1]<<std::endl;
138   }
139   bocl_mem_sptr tnearfar_mem_ptr = opencl_cache->alloc_mem(2*sizeof(float), tnearfar, "tnearfar  buffer");
140   tnearfar_mem_ptr->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
141   //Visibility, Preinf, Norm, and input image buffers
142   auto* vis_buff = new float[cl_ni*cl_nj];
143   auto* pre_buff = new float[cl_ni*cl_nj];
144   auto* norm_buff = new float[cl_ni*cl_nj];
145   auto* input_buff=new float[cl_ni*cl_nj];
146   for (unsigned i=0;i<cl_ni*cl_nj;i++)
147   {
148     vis_buff[i]=1.0f;
149     pre_buff[i]=0.0f;
150     norm_buff[i]=0.0f;
151   }
152   //copy input vals into image
153   int count=0;
154   for (unsigned int j=0;j<cl_nj;++j) {
155     for (unsigned int i=0;i<cl_ni;++i) {
156       input_buff[count] = 0.0f;
157       if ( i<img_view->ni() && j< img_view->nj() )
158         input_buff[count] = (*img_view)(i,j);
159       ++count;
160     }
161   }
162 
163 
164   //bocl_mem_sptr in_image=new bocl_mem(device->context(),input_buff,cl_ni*cl_nj*sizeof(float),"input image buffer");
165   bocl_mem_sptr in_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), input_buff, "input image buffer");
166   in_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
167 
168   //bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
169   bocl_mem_sptr vis_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), vis_buff, "vis image buffer");
170   vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
171 
172   //bocl_mem_sptr pre_image=new bocl_mem(device->context(),pre_buff,cl_ni*cl_nj*sizeof(float),"pre image buffer");
173   bocl_mem_sptr pre_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), pre_buff, "pre image buffer");
174   pre_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
175 
176   //bocl_mem_sptr norm_image=new bocl_mem(device->context(),norm_buff,cl_ni*cl_nj*sizeof(float),"pre image buffer");
177   bocl_mem_sptr norm_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), norm_buff, "norm image buffer");
178   norm_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
179 
180   // Image Dimensions
181   int img_dim_buff[4];
182   img_dim_buff[0] = 0;
183   img_dim_buff[1] = 0;
184   img_dim_buff[2] = img_view->ni();
185   img_dim_buff[3] = img_view->nj();
186 
187   bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
188   img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
189   bocl_mem_sptr centerX = new bocl_mem(device->context(), boct_bit_tree::centerX, sizeof(cl_float)*585, "centersX lookup buffer");
190   bocl_mem_sptr centerY = new bocl_mem(device->context(), boct_bit_tree::centerY, sizeof(cl_float)*585, "centersY lookup buffer");
191   bocl_mem_sptr centerZ = new bocl_mem(device->context(), boct_bit_tree::centerZ, sizeof(cl_float)*585, "centersZ lookup buffer");
192   centerX->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
193   centerY->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
194   centerZ->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
195   // Output Array
196   float output_arr[100];
197   for (float & i : output_arr) i = 0.0f;
198   bocl_mem_sptr  cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
199   cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
200 
201   // bit lookup buffer
202   cl_uchar lookup_arr[256];
203   boxm2_ocl_util::set_bit_lookup(lookup_arr);
204   bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
205   lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
206 
207   // app density used for proc_norm_image
208   float app_buffer[4]={1.0,0.0,0.0,0.0};
209   bocl_mem_sptr app_density = new bocl_mem(device->context(), app_buffer, sizeof(cl_float4), "app density buffer");
210   app_density->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
211 
212   // set arguments
213   std::vector<boxm2_block_id> vis_order;
214   if(cam->type_name() == "vpgl_perspective_camera")
215       vis_order= scene->get_vis_blocks_opt((vpgl_perspective_camera<double>*)cam.ptr(),img_view->ni(),img_view->nj());
216   else
217       vis_order= scene->get_vis_blocks(cam);
218   std::vector<boxm2_block_id>::iterator id;
219   for (unsigned int i=0; i<kernels.size(); ++i)
220   {
221     if ( i == UPDATE_PROC ) {
222       bocl_kernel * proc_kern=kernels[i];
223       proc_kern->set_arg( norm_image.ptr() );
224       proc_kern->set_arg( in_image.ptr() );
225       proc_kern->set_arg( vis_image.ptr() );
226       proc_kern->set_arg( pre_image.ptr());
227       proc_kern->set_arg( img_dim.ptr() );
228       proc_kern->set_arg( app_density.ptr() );
229       //execute kernel
230       proc_kern->execute( queue, 2, local_threads, global_threads);
231       int status = clFinish(queue);
232       if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
233         return false;
234       proc_kern->clear_args();
235       norm_image->read_to_buffer(queue);
236 
237     }
238 
239     //set masked values
240     vis_image->read_to_buffer(queue);
241     if (use_mask)
242     {
243       int count = 0;
244       for (unsigned int j=0;j<cl_nj;++j) {
245         for (unsigned int i=0;i<cl_ni;++i) {
246           if ( i<mask_map->ni() && j<mask_map->nj() ) {
247             if ( (*mask_map)(i,j)==0 ) {
248               input_buff[count] = -1.0f;
249               vis_buff  [count] = 0.0f;
250             }
251           }
252           ++count;
253         }
254       }
255       in_image->write_to_buffer(queue);
256       vis_image->write_to_buffer(queue);
257       clFinish(queue);
258     }
259 
260     for (id = vis_order.begin(); id != vis_order.end(); ++id)
261     {
262       //choose correct render kernel
263       boxm2_block_metadata mdata = scene->get_block_metadata(*id);
264       bocl_kernel* kern = kernels[i];
265 
266       //write the image values to the buffer
267       vul_timer transfer;
268       bocl_mem* blk       = opencl_cache->get_block(scene,*id);
269       bocl_mem* blk_info  = opencl_cache->loaded_block_info();
270       bocl_mem* alpha     = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id,0,false);
271       auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
272       int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
273       info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
274       blk_info->write_to_buffer((queue));
275 
276       int nobsTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_NUM_OBS>::prefix());
277       // data type string may contain an identifier so determine the buffer size
278       bocl_mem* mog       = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*appTypeSize,false);    //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
279       bocl_mem* num_obs   = opencl_cache->get_data(scene,*id,num_obs_type,alpha->num_bytes()/alphaTypeSize*nobsTypeSize,false);//,info_buffer->data_buffer_length*boxm2_data_info::datasize(num_obs_type));
280 
281       //grab an appropriately sized AUX data buffer
282       int auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix());
283       bocl_mem *aux0   = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
284       auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX1>::prefix());
285       bocl_mem *aux1   = opencl_cache->get_data<BOXM2_AUX1>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
286 
287       transfer_time += (float) transfer.all();
288       if (i==UPDATE_SEGLEN)
289       {
290         aux0->zero_gpu_buffer(queue);
291         aux1->zero_gpu_buffer(queue);
292         kern->set_arg( blk_info );
293         kern->set_arg( blk );
294         kern->set_arg( alpha );
295         kern->set_arg( aux0 );
296         kern->set_arg( aux1 );
297         kern->set_arg( lookup.ptr() );
298 
299         // kern->set_arg( persp_cam.ptr() );
300         kern->set_arg( ray_o_buff.ptr() );
301         kern->set_arg( ray_d_buff.ptr() );
302         kern->set_arg( tnearfar_mem_ptr.ptr() );
303         kern->set_arg( img_dim.ptr() );
304         kern->set_arg( in_image.ptr() );
305         kern->set_arg( cl_output.ptr() );
306         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
307         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar4) ); //ray bundle,
308         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );    //cell pointers,
309         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_float4) ); //cached aux,
310         kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
311 
312         //execute kernel
313         kern->execute(queue, 2, local_threads, global_threads);
314         int status = clFinish(queue);
315         if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
316           return false;
317         gpu_time += kern->exec_time();
318 
319         //clear render kernel args so it can reset em on next execution
320         kern->clear_args();
321 
322         aux0->read_to_buffer(queue);
323         aux1->read_to_buffer(queue);
324       }
325       else if (i==UPDATE_PREINF)
326       {
327         kern->set_arg( blk_info );
328         kern->set_arg( blk );
329         kern->set_arg( alpha );
330         kern->set_arg( mog );
331         kern->set_arg( num_obs );
332         kern->set_arg( aux0 );
333         kern->set_arg( aux1 );
334         kern->set_arg( lookup.ptr() );
335         kern->set_arg( ray_o_buff.ptr() );
336         kern->set_arg( ray_d_buff.ptr() );
337         kern->set_arg( tnearfar_mem_ptr.ptr() );
338         kern->set_arg( img_dim.ptr() );
339         kern->set_arg( vis_image.ptr() );
340         kern->set_arg( pre_image.ptr() );
341         kern->set_arg( cl_output.ptr() );
342         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
343         kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
344         //execute kernel
345         kern->execute(queue, 2, local_threads, global_threads);
346         int status = clFinish(queue);
347         if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
348           return false;
349         gpu_time += kern->exec_time();
350 
351         //clear render kernel args so it can reset em on next execution
352         kern->clear_args();
353 
354         //write info to disk
355       }
356       else if (i==UPDATE_BAYES)
357       {
358         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX2>::prefix());
359         bocl_mem *aux2   = opencl_cache->get_data<BOXM2_AUX2>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
360         aux2->zero_gpu_buffer(queue);
361         auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
362         bocl_mem *aux3   = opencl_cache->get_data<BOXM2_AUX3>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
363         aux3->zero_gpu_buffer(queue);
364 
365         kern->set_arg( blk_info );
366         kern->set_arg( blk );
367         kern->set_arg( alpha );
368         kern->set_arg( mog );
369         kern->set_arg( num_obs );
370         kern->set_arg( aux0 );
371         kern->set_arg( aux1 );
372         kern->set_arg( aux2 );
373         kern->set_arg( aux3 );
374         kern->set_arg( lookup.ptr() );
375         kern->set_arg( ray_o_buff.ptr() );
376         kern->set_arg( ray_d_buff.ptr() );
377         kern->set_arg( tnearfar_mem_ptr.ptr() );
378         kern->set_arg( img_dim.ptr() );
379         kern->set_arg( vis_image.ptr() );
380         kern->set_arg( pre_image.ptr() );
381         kern->set_arg( norm_image.ptr() );
382         kern->set_arg( cl_output.ptr() );
383         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
384         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_short2) ); //ray bundle,
385         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );    //cell pointers,
386         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_float) ); //cached aux,
387         kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
388                 //execute kernel
389         kern->execute(queue, 2, local_threads, global_threads);
390         int status = clFinish(queue);
391         if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
392           return false;
393         gpu_time += kern->exec_time();
394 
395         //clear render kernel args so it can reset em on next execution
396         kern->clear_args();
397         aux2->read_to_buffer(queue);
398         aux3->read_to_buffer(queue);
399       }
400       else if (i==UPDATE_CELL_BASED_DISPERSION)
401       {
402         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX2>::prefix());
403         bocl_mem *aux2   = opencl_cache->get_data<BOXM2_AUX2>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
404 
405         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
406         bocl_mem *aux3   = opencl_cache->get_data<BOXM2_AUX3>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
407 
408         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("direction_expectation_x"));
409         bocl_mem *aux0_dir_x   = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "direction_expectation_x");
410 
411         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("direction_expectation_y"));
412         bocl_mem *aux0_dir_y   = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "direction_expectation_y");
413 
414         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("direction_expectation_z"));
415         bocl_mem *aux0_dir_z   = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "direction_expectation_z");
416 
417         auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("visibility_expectation"));
418         bocl_mem *aux0_vis_exp   = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "visibility_expectation");
419 
420        // update_alpha boolean buffer
421         cl_int up_alpha[1];
422         up_alpha[0] = update_alpha ? 1 : 0;
423         bocl_mem_sptr up_alpha_mem = new bocl_mem(device->context(), up_alpha, sizeof(up_alpha), "update alpha bool buffer");
424         up_alpha_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
425 
426         // update_app boolean buffer
427         cl_int up_app[1];
428         up_app[0] = update_app ? 1 : 0;
429         bocl_mem_sptr up_app_mem = new bocl_mem(device->context(), up_app, sizeof(up_app), "update app bool buffer");
430         up_app_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
431         //mog variance, if 0.0f or less, then var will be learned
432         bocl_mem_sptr mog_var_mem = new bocl_mem(device->context(), &mog_var, sizeof(mog_var), "update gauss variance");
433         mog_var_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
434 
435         local_threads[0] = 64;
436         local_threads[1] = 1 ;
437         global_threads[0] = (unsigned) RoundUp(mdata.sub_block_num_.x()*mdata.sub_block_num_.y()*mdata.sub_block_num_.z(),(int)local_threads[0]);
438         global_threads[1]=1;
439 
440         kern->set_arg( centerX.ptr() );
441         kern->set_arg( centerY.ptr() );
442         kern->set_arg( centerZ.ptr() );
443         kern->set_arg( lookup.ptr() );
444         kern->set_arg( blk_info );
445         kern->set_arg( blk );
446         kern->set_arg( alpha );
447         kern->set_arg( mog );
448         kern->set_arg( num_obs );
449         kern->set_arg( aux0 );
450         kern->set_arg( aux1 );
451         kern->set_arg( aux2 );
452         kern->set_arg( aux3 );
453         kern->set_arg( aux0_dir_x );
454         kern->set_arg( aux0_dir_y );
455         kern->set_arg( aux0_dir_z );
456         kern->set_arg( aux0_vis_exp );
457         kern->set_arg( ray_origin_ptr.ptr() );
458 
459         kern->set_arg( up_alpha_mem.ptr() );
460         kern->set_arg( mog_var_mem.ptr() );
461         kern->set_arg( up_app_mem.ptr() );
462         kern->set_arg( cl_output.ptr() );
463         kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
464         kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
465         //execute kernel
466         kern->execute(queue, 2, local_threads, global_threads);
467         int status = clFinish(queue);
468         if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
469           return false;
470         gpu_time += kern->exec_time();
471 
472         //clear render kernel args so it can reset em on next execution
473         kern->clear_args();
474         //write info to disk
475         alpha->read_to_buffer(queue);
476         mog->read_to_buffer(queue);
477         num_obs->read_to_buffer(queue);
478         aux0_dir_x->read_to_buffer(queue);
479         aux0_dir_y->read_to_buffer(queue);
480         aux0_dir_z->read_to_buffer(queue);
481         aux0_vis_exp->read_to_buffer(queue);
482       }
483 
484       //read image out to buffer (from gpu)
485       in_image->read_to_buffer(queue);
486       vis_image->read_to_buffer(queue);
487       pre_image->read_to_buffer(queue);
488       cl_output->read_to_buffer(queue);
489       clFinish(queue);
490     }
491   }
492 
493   delete [] vis_buff;
494   delete [] pre_buff;
495   delete [] norm_buff;
496   delete [] input_buff;
497   delete [] ray_origins;
498   delete [] ray_directions;
499   opencl_cache->unref_mem(in_image.ptr());
500   opencl_cache->unref_mem(vis_image.ptr());
501   opencl_cache->unref_mem(pre_image.ptr());
502   opencl_cache->unref_mem(norm_image.ptr());
503   opencl_cache->unref_mem(ray_o_buff.ptr());
504   opencl_cache->unref_mem(ray_d_buff.ptr());
505   opencl_cache->unref_mem(tnearfar_mem_ptr.ptr());
506   std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
507   clReleaseCommandQueue(queue);
508   return true;
509 }
510 
511 
512 //Returns vector of color update kernels (and caches them per device
get_kernels(const bocl_device_sptr & device,const std::string & opts,bool isRGB)513 std::vector<bocl_kernel*>& boxm2_ocl_update_based_dispersion::get_kernels(const bocl_device_sptr& device, const std::string& opts, bool isRGB)
514 {
515   // compile kernels if not already compiled
516   std::string identifier = device->device_identifier() + opts;
517   if (kernels_.find(identifier) != kernels_.end())
518     return kernels_[identifier];
519 
520   //otherwise compile the kernels
521   std::cout<<"=== boxm2_ocl_update_based_dispersion_process::compiling kernels on device "<<identifier<<"==="<<std::endl;
522 
523   std::vector<std::string> src_paths;
524   std::string source_dir = boxm2_ocl_util::ocl_src_root();
525   src_paths.push_back(source_dir + "scene_info.cl");
526   src_paths.push_back(source_dir + "pixel_conversion.cl");
527   src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
528   src_paths.push_back(source_dir + "backproject.cl");
529   src_paths.push_back(source_dir + "atomics_util.cl");
530   src_paths.push_back(source_dir + "statistics_library_functions.cl");
531   src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
532   if (isRGB)
533     src_paths.push_back(source_dir + "bit/update_rgb_kernels.cl");
534   else
535     src_paths.push_back(source_dir + "bit/update_kernels.cl");
536   std::vector<std::string> non_ray_src = std::vector<std::string>(src_paths);
537 
538   //push ray trace files
539   if (isRGB)
540     src_paths.push_back(source_dir + "update_rgb_functors.cl");
541   else
542     src_paths.push_back(source_dir + "update_functors.cl");
543   src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
544 
545   //compilation options
546   const std::string& options = /*"-D ATOMIC_FLOAT " +*/ opts;
547 
548   //populate vector of kernels
549   std::vector<bocl_kernel*> vec_kernels;
550 
551   //seg len pass
552   auto* seg_len = new bocl_kernel();
553   std::string seg_opts = options + " -D SEGLEN  -D STEP_CELL=step_cell_seglen(aux_args,data_ptr,llid,d)";
554   seg_len->create_kernel(&device->context(), device->device_id(), src_paths, "seg_len_main", seg_opts, "update::seg_len");
555   vec_kernels.push_back(seg_len);
556 
557   //create  compress rgb pass
558   if (isRGB) {
559     auto* comp = new bocl_kernel();
560     std::string comp_opts = options + " -D COMPRESS_RGB";
561     comp->create_kernel(&device->context(), device->device_id(), non_ray_src, "compress_rgb", comp_opts, "update::compress_rgb");
562     vec_kernels.push_back(comp);
563   }
564   else {
565     //vec_kernels.push_back(NULL);
566   }
567   auto* pre_inf = new bocl_kernel();
568   std::string pre_opts = options + " -D PREINF  -D STEP_CELL=step_cell_preinf(aux_args,data_ptr,llid,d)";
569   pre_inf->create_kernel(&device->context(), device->device_id(), src_paths, "pre_inf_main", pre_opts, "update::pre_inf");
570   vec_kernels.push_back(pre_inf);
571 
572   //may need DIFF LIST OF SOURCES FOR THIS GUY
573   auto* proc_img = new bocl_kernel();
574   std::string proc_opts = options + " -D PROC_NORM ";
575   proc_img->create_kernel(&device->context(), device->device_id(), non_ray_src, "proc_norm_image", proc_opts, "update::proc_norm_image");
576   vec_kernels.push_back(proc_img);
577 
578   //push back cast_ray_bit
579   auto* bayes_main = new bocl_kernel();
580   std::string bayes_opt = options + " -D BAYES  -D STEP_CELL=step_cell_bayes(aux_args,data_ptr,llid,d)";
581   bayes_main->create_kernel(&device->context(), device->device_id(), src_paths, "bayes_main", bayes_opt, "update::bayes_main");
582   vec_kernels.push_back(bayes_main);
583 
584 
585   //may need DIFF LIST OF SOURCES FOR THSI GUY TOO
586   auto* update = new bocl_kernel();
587   std::string update_opts = options + " -D UPDATE_BIT_BASED_DISPERSION";
588   update->create_kernel(&device->context(), device->device_id(), non_ray_src, "update_bit_scene_based_dispersion", update_opts, "update::update_main");
589   vec_kernels.push_back(update);
590 
591   //store and return
592   kernels_[identifier] = vec_kernels;
593   return kernels_[identifier];
594 }
595 
596 
597 //makes sure appearance types correspond correctly
validate_appearances(const boxm2_scene_sptr & scene,std::string & data_type,int & appTypeSize,std::string & num_obs_type,std::string & options,bool &)598 bool boxm2_ocl_update_based_dispersion::validate_appearances(const boxm2_scene_sptr& scene,
599                                             std::string& data_type,
600                                             int& appTypeSize,
601                                             std::string& num_obs_type,
602                                             std::string& options,
603                                             bool&  /*isRGB*/)
604 {
605   std::vector<std::string> apps = scene->appearances();
606   bool foundDataType = false, foundNumObsType = false;
607   for (const auto & app : apps) {
608     if ( app == boxm2_data_traits<BOXM2_MOG3_GREY>::prefix() )
609     {
610       data_type = app;
611       foundDataType = true;
612       options=" -D MOG_TYPE_8";
613       appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY>::prefix());
614     }
615     else if ( app == boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix() )
616     {
617       data_type = app;
618       foundDataType = true;
619       options=" -D MOG_TYPE_16";
620       appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix());
621     }
622     else if ( app == boxm2_data_traits<BOXM2_NUM_OBS>::prefix() )
623     {
624       num_obs_type = app;
625       foundNumObsType = true;
626     }
627   }
628   if (!foundDataType) {
629     std::cout<<"BOXM2_OPENCL_UPDATE_PROCESS ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
630     return false;
631   }
632   if (!foundNumObsType) {
633     std::cout<<"BOXM2_OPENCL_UPDATE_PROCESS ERROR: scene doesn't have BOXM2_NUM_OBS type"<<std::endl;
634     return false;
635   }
636   return true;
637 }
638