1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_probability_of_image_wcubic_process.cxx
2 //:
3 // \file
4 // \brief  A process for change detection
5 //
6 // \author Vishal Jain
7 // \date Mar 10, 2011
8 
9 #include <fstream>
10 #include <iostream>
11 #include <algorithm>
12 #include <bprb/bprb_func_process.h>
13 
14 #ifdef _MSC_VER
15 #  include "vcl_msvc_warnings.h"
16 #endif
17 #include <boxm2/ocl/boxm2_opencl_cache.h>
18 #include <boxm2/boxm2_scene.h>
19 #include <boxm2/boxm2_block.h>
20 #include <boxm2/boxm2_data_base.h>
21 #include <boxm2/ocl/boxm2_ocl_util.h>
22 #include <boxm2/boxm2_util.h>
23 #include "vil/vil_image_view.h"
24 //brdb stuff
25 #include <brdb/brdb_value.h>
26 
27 //directory utility
28 #include "vul/vul_timer.h"
29 #include <vcl_where_root_dir.h>
30 #include <bocl/bocl_device.h>
31 #include <bocl/bocl_kernel.h>
32 
33 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
34 
35 namespace boxm2_ocl_probability_of_image_wcubic_process_globals
36 {
37   constexpr unsigned n_inputs_ = 7;
38   constexpr unsigned n_outputs_ = 1;
39   std::size_t lthreads[2]={8,8};
40 
41   static std::map<std::string,std::vector<bocl_kernel*> > kernels;
42 
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,std::string opts)43   void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, std::string opts)
44   {
45     //gather all render sources... seems like a lot for rendering...
46   //gather all render sources... seems like a lot for rendering...
47   std::vector<std::string> src_paths;
48   std::string source_dir = boxm2_ocl_util::ocl_src_root();
49   src_paths.push_back(source_dir + "scene_info.cl");
50   src_paths.push_back(source_dir + "cell_utils.cl");
51   src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
52   src_paths.push_back(source_dir + "backproject.cl");
53   src_paths.push_back(source_dir + "statistics_library_functions.cl");
54   src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
55   src_paths.push_back(source_dir + "bit/compute_probability_of_image.cl");
56   src_paths.push_back(source_dir + "update_cubic_functors.cl");
57   src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
58 
59   //set kernel options
60   opts += " -D PROB_CUBIC_IMAGE  ";
61   std::string options=opts;
62 
63   opts += " -D STEP_CELL=step_cell_cubic_compute_probability_of_intensity(aux_args,data_ptr,d*linfo->block_len,vis,aux_args.prob_image) ";
64 
65     //have kernel construct itself using the context and device
66     auto * ray_trace_kernel=new bocl_kernel();
67 
68     ray_trace_kernel->create_kernel( &device->context(),
69                                      device->device_id(),
70                                      src_paths,
71                                      "cubic_compute_probability_of_image",   //kernel name
72                                      opts,              //options
73                                      "boxm2 ocl probability computation"); //kernel identifier (for error checking)
74     vec_kernels.push_back(ray_trace_kernel);
75     //create normalize image kernel
76     std::vector<std::string> norm_src_paths;
77     norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
78     norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
79     auto * normalize_render_kernel=new bocl_kernel();
80 
81     normalize_render_kernel->create_kernel( &device->context(),
82                                             device->device_id(),
83                                             norm_src_paths,
84                                             "normalize_probability_image_kernel",   //kernel name
85                                             "-D PROB_IMAGE",              //options
86                                             "normalize probability image kernel"); //kernel identifier (for error checking)
87 
88     vec_kernels.push_back(normalize_render_kernel);
89   }
90 }
91 
boxm2_ocl_probability_of_image_wcubic_process_cons(bprb_func_process & pro)92 bool boxm2_ocl_probability_of_image_wcubic_process_cons(bprb_func_process& pro)
93 {
94   using namespace boxm2_ocl_probability_of_image_wcubic_process_globals;
95 
96   //process takes 1 input
97   std::vector<std::string> input_types_(n_inputs_);
98   input_types_[0] = "bocl_device_sptr";
99   input_types_[1] = "boxm2_scene_sptr";
100   input_types_[2] = "boxm2_opencl_cache_sptr";
101   input_types_[3] = "vpgl_camera_double_sptr";
102   input_types_[4] = "vil_image_view_base_sptr";
103   input_types_[5] = "vcl_string";  // model identifier
104   input_types_[6] = "vcl_string";  // aux identifier
105 
106   // process has 1 output:
107   // output[0]: scene sptr
108   std::vector<std::string>  output_types_(n_outputs_);
109   output_types_[0] = "vil_image_view_base_sptr";
110 
111   brdb_value_sptr idx = new brdb_value_t<std::string>("");
112   pro.set_input(5, idx);
113   return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
114 }
115 
boxm2_ocl_probability_of_image_wcubic_process(bprb_func_process & pro)116 bool boxm2_ocl_probability_of_image_wcubic_process(bprb_func_process& pro)
117 {
118   using namespace boxm2_ocl_probability_of_image_wcubic_process_globals;
119   std::size_t local_threads[2]={8,8};
120   std::size_t global_threads[2]={8,8};
121 
122   if ( pro.n_inputs() < n_inputs_ ) {
123     std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
124     return false;
125   }
126   float transfer_time=0.0f;
127   float gpu_time=0.0f;
128   //get the inputs
129   unsigned i = 0;
130   bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
131   boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
132   boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
133   vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(i++);
134   vil_image_view_base_sptr img =pro.get_input<vil_image_view_base_sptr>(i++);
135   std::string data_identifier =pro.get_input<std::string>(i++);
136   std::string image_identifier =pro.get_input<std::string>(i++);
137   unsigned ni=img->ni();
138   unsigned nj=img->nj();
139   bool foundDataType = false;
140   std::string data_type,options;
141   std::vector<std::string> apps = scene->appearances();
142   for (const auto & app : apps) {
143     if ( app == boxm2_data_traits<BOXM2_FLOAT8>::prefix() )
144     {
145       data_type = app;
146       foundDataType = true;
147       options=" -D FLOAT8 ";
148     }
149   }
150   if (!foundDataType) {
151     std::cout<<"boxm2_ocl_probability_of_image_wcubic_process ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
152     return false;
153   }
154 
155   if (data_identifier!="")
156       data_type = data_type+"_"+data_identifier;
157 
158 //: create a command queue.
159   int status=0;
160   cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
161                                                 CL_QUEUE_PROFILING_ENABLE,&status);
162   if (status!=0) return false;
163 
164   std::string identifier=device->device_identifier()+options;
165   // compile the kernel
166   if (kernels.find(identifier)==kernels.end())
167   {
168     std::cout<<"===========Compiling kernels==========="<<std::endl;
169     std::vector<bocl_kernel*> ks;
170     compile_kernel(device,ks,options);
171     kernels[identifier]=ks;
172   }
173 
174   //set generic cam
175 
176   vil_image_view_base_sptr float_img=boxm2_util::prepare_input_image(img);
177   auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
178 
179   unsigned cl_ni=RoundUp(img_view->ni(),local_threads[0]);
180   unsigned cl_nj=RoundUp(img_view->nj(),local_threads[1]);
181 
182   auto* ray_origins = new cl_float[4*cl_ni*cl_nj];
183   auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
184   bocl_mem_sptr ray_o_buff = new bocl_mem(device->context(), ray_origins, cl_ni*cl_nj * sizeof(cl_float4) , "ray_origins buffer");
185   bocl_mem_sptr ray_d_buff = new bocl_mem(device->context(), ray_directions,  cl_ni*cl_nj * sizeof(cl_float4), "ray_directions buffer");
186   boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff);
187 
188   global_threads[0]=cl_ni;
189   global_threads[1]=cl_nj;
190   auto* vis_buff = new float[cl_ni*cl_nj];
191   auto* prob_image_buff = new float[cl_ni*cl_nj];
192   for (unsigned i=0;i<cl_ni*cl_nj;i++)
193   {
194     vis_buff[i]=1.0f;
195     prob_image_buff[i]=0.0f;
196   }
197 
198   bocl_mem_sptr prob_image=new bocl_mem(device->context(),prob_image_buff,cl_ni*cl_nj*sizeof(float),"expected image buffer");
199   prob_image->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
200 
201   bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
202   vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
203 
204   // Image Dimensions
205   int img_dim_buff[4];
206   img_dim_buff[0] = 0;
207   img_dim_buff[1] = 0;
208   img_dim_buff[2] = img_view->ni();
209   img_dim_buff[3] = img_view->nj();
210   bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
211   img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
212 
213   // Output Array
214   float output_arr[100];
215   for (float & i : output_arr) i = 0.0f;
216   bocl_mem_sptr  cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
217   cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
218 
219   // bit lookup buffer
220   cl_uchar lookup_arr[256];
221   boxm2_ocl_util::set_bit_lookup(lookup_arr);
222   bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
223   lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
224 
225   //For each ID in the visibility order, grab that block
226   std::vector<boxm2_block_id> vis_order = scene->get_vis_blocks(  (vpgl_perspective_camera<double>*) cam.ptr());
227   std::vector<boxm2_block_id>::iterator id;
228   for (id = vis_order.begin(); id != vis_order.end(); ++id)
229   {
230     //choose correct render kernel
231     boxm2_block_metadata mdata = scene->get_block_metadata(*id);
232     bocl_kernel* kern =  kernels[identifier][0];
233     //write the image values to the buffer
234     vul_timer transfer;
235     bocl_mem* blk = opencl_cache->get_block(scene,*id);
236     bocl_mem* blk_info = opencl_cache->loaded_block_info();
237     bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id);
238     int mogTypeSize = (int) boxm2_data_info::datasize(data_type);
239     auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
240     int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
241     info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
242     blk_info->write_to_buffer((queue));
243 
244     bocl_mem* mog = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*mogTypeSize,false);    //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
245     std::string aux0_datatype = boxm2_data_traits<BOXM2_AUX0>::prefix(image_identifier);
246     bocl_mem* aux0 = opencl_cache->get_data(scene,*id,aux0_datatype,alpha->num_bytes(),false);
247     std::string aux1_datatype = boxm2_data_traits<BOXM2_AUX1>::prefix(image_identifier);
248     bocl_mem* aux1 = opencl_cache->get_data(scene,*id,aux1_datatype,alpha->num_bytes(),false);
249 
250     transfer_time += (float) transfer.all();
251 
252     ////3. SET args
253     kern->set_arg( blk_info );
254     kern->set_arg( blk );
255     kern->set_arg( alpha );
256     kern->set_arg( mog );
257     kern->set_arg( aux0 );
258     kern->set_arg( aux1 );
259     kern->set_arg( ray_o_buff.ptr() );
260     kern->set_arg( ray_d_buff.ptr() );
261     kern->set_arg( prob_image.ptr() );
262     kern->set_arg( vis_image.ptr() );
263     kern->set_arg( img_dim.ptr());
264     kern->set_arg( cl_output.ptr() );
265     kern->set_arg( lookup.ptr() );
266 
267     //local tree , cumsum buffer, imindex buffer
268     kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );
269     kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) );
270     kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );
271 
272     //execute kernel
273     kern->execute(queue, 2, local_threads, global_threads);
274     clFinish(queue);
275     gpu_time += kern->exec_time();
276 
277     //clear render kernel args so it can reset em on next execution
278     kern->clear_args();
279   }
280 
281 #if 1
282   // normalize
283   bocl_kernel* normalize_prob_image_kernel =  kernels[identifier][1];
284   {
285     normalize_prob_image_kernel->set_arg( prob_image.ptr() );
286     normalize_prob_image_kernel->set_arg( vis_image.ptr() );
287     normalize_prob_image_kernel->set_arg( img_dim.ptr());
288     normalize_prob_image_kernel->execute( queue, 2, local_threads, global_threads);
289     clFinish(queue);
290     gpu_time += normalize_prob_image_kernel->exec_time();
291 
292     //clear render kernel args so it can reset em on next execution
293     normalize_prob_image_kernel->clear_args();
294   }
295 #endif
296   // read out expected image
297   prob_image->read_to_buffer(queue);
298   vis_image->read_to_buffer(queue);
299    clFinish(queue);
300   auto* prob_img_out=new vil_image_view<float>(ni,nj);
301 
302   for (unsigned c=0;c<nj;c++)
303     for (unsigned r=0;r<ni;r++)
304       (*prob_img_out)(r,c)=prob_image_buff[c*cl_ni+r];
305 
306   delete [] prob_image_buff;
307   delete [] vis_buff;
308 
309   clReleaseCommandQueue(queue);
310   i=0;
311   // store scene smaprt pointer
312   pro.set_output_val<vil_image_view_base_sptr>(i++, prob_img_out);
313   return true;
314 }
315