1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_probability_of_image_process.cxx
2 //:
3 // \file
4 // \brief  A process for change detection
5 //
6 // \author Vishal Jain
7 // \date Mar 10, 2011
8 
9 #include <fstream>
10 #include <iostream>
11 #include <algorithm>
12 #include <bprb/bprb_func_process.h>
13 
14 #ifdef _MSC_VER
15 #  include "vcl_msvc_warnings.h"
16 #endif
17 #include <boxm2/ocl/boxm2_opencl_cache.h>
18 #include <boxm2/boxm2_scene.h>
19 #include <boxm2/boxm2_block.h>
20 #include <boxm2/boxm2_data_base.h>
21 #include <boxm2/ocl/boxm2_ocl_util.h>
22 #include <boxm2/boxm2_util.h>
23 #include "vil/vil_image_view.h"
24 //brdb stuff
25 #include <brdb/brdb_value.h>
26 
27 //directory utility
28 #include "vul/vul_timer.h"
29 #include <vcl_where_root_dir.h>
30 #include <bocl/bocl_device.h>
31 #include <bocl/bocl_kernel.h>
32 
33 
34 namespace boxm2_ocl_probability_of_image_process_globals
35 {
36   constexpr unsigned n_inputs_ = 8;
37   constexpr unsigned n_outputs_ = 1;
38   std::size_t lthreads[2]={8,8};
39 
40   static std::map<std::string,std::vector<bocl_kernel*> > kernels;
41 
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,std::string opts)42   void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, std::string opts)
43   {
44     //gather all render sources... seems like a lot for rendering...
45   //gather all render sources... seems like a lot for rendering...
46   std::vector<std::string> src_paths;
47   std::string source_dir = boxm2_ocl_util::ocl_src_root();
48   src_paths.push_back(source_dir + "scene_info.cl");
49   src_paths.push_back(source_dir + "cell_utils.cl");
50   src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
51   src_paths.push_back(source_dir + "backproject.cl");
52   src_paths.push_back(source_dir + "statistics_library_functions.cl");
53   src_paths.push_back(source_dir + "expected_functor.cl");
54   src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
55   src_paths.push_back(source_dir + "bit/compute_probability_of_image.cl");
56   src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
57 
58   //set kernel options
59   opts += " -D PROB_IMAGE -D DETERMINISTIC ";
60   std::string options=opts;
61 
62   opts += " -D STEP_CELL=step_cell_compute_probability_of_intensity(aux_args.mog,aux_args.alpha,data_ptr,d*linfo->block_len,vis,aux_args.prob_image,aux_args.intensity) ";
63 
64     //have kernel construct itself using the context and device
65     auto * ray_trace_kernel=new bocl_kernel();
66 
67     ray_trace_kernel->create_kernel( &device->context(),
68                                      device->device_id(),
69                                      src_paths,
70                                      "compute_probability_of_image",   //kernel name
71                                      opts,              //options
72                                      "boxm2 ocl probability computation"); //kernel identifier (for error checking)
73     vec_kernels.push_back(ray_trace_kernel);
74     //create normalize image kernel
75     std::vector<std::string> norm_src_paths;
76     norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
77     norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
78     auto * normalize_render_kernel=new bocl_kernel();
79 
80     normalize_render_kernel->create_kernel( &device->context(),
81                                             device->device_id(),
82                                             norm_src_paths,
83                                             "normalize_probability_image_kernel",   //kernel name
84                                             options,              //options
85                                             "normalize probability image kernel"); //kernel identifier (for error checking)
86 
87     vec_kernels.push_back(normalize_render_kernel);
88   }
89 }
90 
boxm2_ocl_probability_of_image_process_cons(bprb_func_process & pro)91 bool boxm2_ocl_probability_of_image_process_cons(bprb_func_process& pro)
92 {
93   using namespace boxm2_ocl_probability_of_image_process_globals;
94 
95   //process takes 1 input
96   std::vector<std::string> input_types_(n_inputs_);
97   input_types_[0] = "bocl_device_sptr";
98   input_types_[1] = "boxm2_scene_sptr";
99   input_types_[2] = "boxm2_opencl_cache_sptr";
100   input_types_[3] = "vpgl_camera_double_sptr";
101   input_types_[4] = "vil_image_view_base_sptr";
102   input_types_[5] = "vcl_string";  //identifier
103   input_types_[6] = "float";                        // near factor ( # of pixels should map to the finest voxel )
104   input_types_[7] = "float";                        // far factor (  # of pixels should map to the finest voxel )
105 
106   // process has 1 output:
107   // output[0]: scene sptr
108   std::vector<std::string>  output_types_(n_outputs_);
109   output_types_[0] = "vil_image_view_base_sptr";
110 
111   brdb_value_sptr idx = new brdb_value_t<std::string>("");
112   pro.set_input(5, idx);
113   brdb_value_sptr tnearfactor = new brdb_value_t<float>(100000.0f);  //by default update alpha
114   brdb_value_sptr tfarfactor = new brdb_value_t<float>(0.0001f);  //by default update alpha
115 
116   pro.set_input(6, tnearfactor);
117   pro.set_input(7, tfarfactor);
118   return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
119 
120 }
121 
boxm2_ocl_probability_of_image_process(bprb_func_process & pro)122 bool boxm2_ocl_probability_of_image_process(bprb_func_process& pro)
123 {
124   using namespace boxm2_ocl_probability_of_image_process_globals;
125   std::size_t local_threads[2]={8,8};
126   std::size_t global_threads[2]={8,8};
127 
128   if ( pro.n_inputs() < n_inputs_ ) {
129     std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
130     return false;
131   }
132   float transfer_time=0.0f;
133   float gpu_time=0.0f;
134   //get the inputs
135   unsigned i = 0;
136   bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
137   boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
138   boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
139   vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(i++);
140   vil_image_view_base_sptr img =pro.get_input<vil_image_view_base_sptr>(i++);
141   std::string data_identifier =pro.get_input<std::string>(i++);
142   auto                    nearfactor = pro.get_input<float>(i++);
143   auto                    farfactor = pro.get_input<float>(i++);
144   unsigned ni=img->ni();
145   unsigned nj=img->nj();
146   bool foundDataType = false;
147   std::string data_type,options;
148   std::vector<std::string> apps = scene->appearances();
149   for (const auto & app : apps) {
150     if ( app == boxm2_data_traits<BOXM2_MOG3_GREY>::prefix() )
151     {
152       data_type = app;
153       foundDataType = true;
154       options=" -D MOG_TYPE_8 ";
155     }
156     else if ( app == boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix() )
157     {
158       data_type = app;
159       foundDataType = true;
160       options=" -D MOG_TYPE_16 ";
161     }
162     else if ( app == boxm2_data_traits<BOXM2_FLOAT8>::prefix() )
163     {
164       data_type = app;
165       foundDataType = true;
166       options=" -D FLOAT8 ";
167     }
168   }
169   if (!foundDataType) {
170     std::cout<<"boxm2_ocl_probability_of_image_process ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
171     return false;
172   }
173 
174   if (data_identifier!="")
175     data_type += data_identifier;
176 
177 //: create a command queue.
178   int status=0;
179   cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
180                                                 CL_QUEUE_PROFILING_ENABLE,&status);
181   if (status!=0) return false;
182 
183   std::string identifier=device->device_identifier()+options;
184   // compile the kernel
185   if (kernels.find(identifier)==kernels.end())
186   {
187     std::cout<<"===========Compiling kernels==========="<<std::endl;
188     std::vector<bocl_kernel*> ks;
189     compile_kernel(device,ks,options);
190     kernels[identifier]=ks;
191   }
192 
193   // create all buffers
194   cl_float cam_buffer[48];
195   boxm2_ocl_util::set_persp_camera( (vpgl_perspective_camera<double>*) cam.ptr(), cam_buffer);
196   bocl_mem_sptr persp_cam=new bocl_mem(device->context(), cam_buffer, 3*sizeof(cl_float16), "persp cam buffer");
197   persp_cam->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
198 
199   vil_image_view_base_sptr float_img=boxm2_util::prepare_input_image(img);
200   auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
201 
202   unsigned cl_ni=RoundUp(img_view->ni(),local_threads[0]);
203   unsigned cl_nj=RoundUp(img_view->nj(),local_threads[1]);
204 
205   global_threads[0]=cl_ni;
206   global_threads[1]=cl_nj;
207   auto* vis_buff = new float[cl_ni*cl_nj];
208   auto* prob_image_buff = new float[cl_ni*cl_nj];
209   auto* input_buff=new float[cl_ni*cl_nj];
210   for (unsigned i=0;i<cl_ni*cl_nj;i++)
211   {
212       vis_buff[i]=1.0f;
213       prob_image_buff[i]=0.0f;
214   }
215   int count=0;
216   for (unsigned int j=0;j<cl_nj;++j)
217     for (unsigned int i=0;i<cl_ni;++i)
218     {
219       input_buff[count] = 0.0f;
220       if (i<img_view->ni() && j< img_view->nj())
221       {
222           input_buff[count]=(*img_view)(i,j);
223       }
224       ++count;
225     }
226 
227   bocl_mem_sptr in_image=new bocl_mem(device->context(),input_buff,cl_ni*cl_nj*sizeof(float),"input image buffer");
228   in_image->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
229 
230   bocl_mem_sptr prob_image=new bocl_mem(device->context(),prob_image_buff,cl_ni*cl_nj*sizeof(float),"expected image buffer");
231   prob_image->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
232 
233   bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
234   vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
235 
236   // Image Dimensions
237   int img_dim_buff[4];
238   img_dim_buff[0] = 0;
239   img_dim_buff[1] = 0;
240   img_dim_buff[2] = img_view->ni();
241   img_dim_buff[3] = img_view->nj();
242   bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
243   img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
244 
245   // Output Array
246   float output_arr[100];
247   for (float & i : output_arr) i = 0.0f;
248   bocl_mem_sptr  cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
249   cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
250 
251   // bit lookup buffer
252   cl_uchar lookup_arr[256];
253   boxm2_ocl_util::set_bit_lookup(lookup_arr);
254   bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
255   lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
256   //For each ID in the visibility order, grab that block
257   std::vector<boxm2_block_id> vis_order = scene->get_vis_blocks(  (vpgl_perspective_camera<double>*) cam.ptr());
258   std::vector<boxm2_block_id>::iterator id;
259   for (id = vis_order.begin(); id != vis_order.end(); ++id)
260   {
261     //choose correct render kernel
262     boxm2_block_metadata mdata = scene->get_block_metadata(*id);
263     bocl_kernel* kern =  kernels[identifier][0];
264 
265     //write the image values to the buffer
266     vul_timer transfer;
267     bocl_mem* blk = opencl_cache->get_block(scene,*id);
268     bocl_mem* blk_info = opencl_cache->loaded_block_info();
269     bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id);
270     int mogTypeSize = (int) boxm2_data_info::datasize(data_type);
271     auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
272     int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
273     info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
274     blk_info->write_to_buffer((queue));
275 
276     bocl_mem* mog = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*mogTypeSize,false);    //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
277     transfer_time += (float) transfer.all();
278 
279     ////3. SET args
280     kern->set_arg( blk_info );
281     kern->set_arg( blk );
282     kern->set_arg( alpha );
283     kern->set_arg( mog );
284     kern->set_arg( persp_cam.ptr() );
285     kern->set_arg( in_image.ptr() );
286     kern->set_arg( prob_image.ptr() );
287     kern->set_arg( img_dim.ptr());
288     kern->set_arg( cl_output.ptr() );
289     kern->set_arg( lookup.ptr() );
290     kern->set_arg( vis_image.ptr() );
291 
292     //local tree , cumsum buffer, imindex buffer
293     kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );
294     kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) );
295     kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );
296 
297     //execute kernel
298     kern->execute(queue, 2, local_threads, global_threads);
299     clFinish(queue);
300     gpu_time += kern->exec_time();
301 
302     //clear render kernel args so it can reset em on next execution
303     kern->clear_args();
304   }
305   // normalize
306   bocl_kernel* normalize_prob_image_kernel =  kernels[identifier][1];
307   {
308     normalize_prob_image_kernel->set_arg( prob_image.ptr() );
309     normalize_prob_image_kernel->set_arg( vis_image.ptr() );
310     normalize_prob_image_kernel->set_arg( img_dim.ptr());
311     normalize_prob_image_kernel->execute( queue, 2, local_threads, global_threads);
312     clFinish(queue);
313     gpu_time += normalize_prob_image_kernel->exec_time();
314 
315     //clear render kernel args so it can reset em on next execution
316     normalize_prob_image_kernel->clear_args();
317   }
318 
319   // read out expected image
320   prob_image->read_to_buffer(queue);
321   auto* prob_img_out=new vil_image_view<float>(ni,nj);
322 
323   for (unsigned c=0;c<nj;c++)
324     for (unsigned r=0;r<ni;r++)
325       (*prob_img_out)(r,c)=prob_image_buff[c*cl_ni+r];
326 
327   delete [] prob_image_buff;
328   delete [] vis_buff;
329   delete [] input_buff;
330 
331   clReleaseCommandQueue(queue);
332   i=0;
333   // store scene smaprt pointer
334   pro.set_output_val<vil_image_view_base_sptr>(i++, prob_img_out);
335   return true;
336 }
337