1 // This is brl/bseg/bstm/ocl/pro/processes/bstm_ocl_get_surface_pt_process.cxx
2 //:
3 // \file
4 // \brief  A process to help localize surface points in the 4d world. Given a camera and pixel location, the process shoots a ray
5 //         and traverses the volume until it hits a voxel with a prob > prob_t. It returns the location of this voxel so it can be
6 //         queried later on.
7 //
8 // \author Ali Osman Ulusoy
9 // \date Jan 30, 2013
10 
11 #include <fstream>
12 #include <iostream>
13 #include <algorithm>
14 #include <bprb/bprb_func_process.h>
15 
16 #ifdef _MSC_VER
17 #  include "vcl_msvc_warnings.h"
18 #endif
19 #include <bstm/ocl/bstm_opencl_cache.h>
20 #include <bstm/bstm_scene.h>
21 #include <bstm/bstm_block.h>
22 #include <bstm/bstm_data_base.h>
23 #include <bstm/bstm_util.h>
24 #include <bstm/ocl/bstm_ocl_util.h>
25 //brdb stuff
26 #include <brdb/brdb_value.h>
27 
28 //directory utility
29 #include <vcl_where_root_dir.h>
30 #include <bocl/bocl_device.h>
31 
32 #include <bocl/bocl_kernel.h>
33 #include "vul/vul_timer.h"
34 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
35 #include <vsph/vsph_camera_bounds.h>
36 #include "vgl/vgl_ray_3d.h"
37 #include <boct/boct_bit_tree.h>
38 
39 
40 namespace bstm_ocl_get_surface_pt_process_globals
41 {
42   constexpr unsigned n_inputs_ = 10;
43   constexpr unsigned n_outputs_ = 3;
44   std::size_t lthreads[2]={8,8};
45 
46   static std::map<std::string,std::vector<bocl_kernel*> > kernels;
47 
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,const std::string & opts)48   void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, const std::string& opts)
49   {
50     //gather all render sources... seems like a lot for rendering...
51     std::vector<std::string> src_paths;
52     std::string source_dir = std::string(VCL_SOURCE_ROOT_DIR) + "/contrib/brl/bseg/bstm/ocl/cl/";
53     src_paths.push_back(source_dir + "scene_info.cl");
54     src_paths.push_back(source_dir + "pixel_conversion.cl");
55     src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
56     src_paths.push_back(source_dir + "bit/time_tree_library_functions.cl");
57     src_paths.push_back(source_dir + "backproject.cl");
58     src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
59     src_paths.push_back(source_dir + "bit/compute_surface_pt.cl");
60     src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
61 
62     //set kernel options
63     std::string options = opts + "-D SURFACE_PT ";
64     options += " -D STEP_CELL=step_cell_surface_pt(aux_args,data_ptr_tt,d*linfo->block_len,posx*linfo->block_len+linfo->origin.x,posy*linfo->block_len+linfo->origin.y,posz*linfo->block_len+linfo->origin.z)";
65 
66     //have kernel construct itself using the context and device
67     auto * ray_trace_kernel=new bocl_kernel();
68 
69     std::cout << "Compiling with options: " << options << std::endl;
70     ray_trace_kernel->create_kernel( &device->context(),
71                                      device->device_id(),
72                                      src_paths,
73                                      "compute_surface_pt",   //kernel name
74                                      options,              //options
75                                      "bstm compute_surface_pt"); //kernel identifier (for error checking)
76     vec_kernels.push_back(ray_trace_kernel);
77 
78 
79   }
80 }
81 
bstm_ocl_get_surface_pt_process_cons(bprb_func_process & pro)82 bool bstm_ocl_get_surface_pt_process_cons(bprb_func_process& pro)
83 {
84   using namespace bstm_ocl_get_surface_pt_process_globals;
85 
86   //process takes 1 input
87   std::vector<std::string> input_types_(n_inputs_);
88   input_types_[0] = "bocl_device_sptr";
89   input_types_[1] = "bstm_scene_sptr";
90   input_types_[2] = "bstm_opencl_cache_sptr";
91   input_types_[3] = "vpgl_camera_double_sptr";
92   input_types_[4] = "unsigned"; //ni
93   input_types_[5] = "unsigned"; //nj
94   input_types_[6] = "unsigned"; //pixel_x
95   input_types_[7] = "unsigned"; //pixel_y
96   input_types_[8] = "float"; // time
97   input_types_[9] = "float"; // prob threshold
98 
99   std::vector<std::string> output_types_(n_outputs_);
100   output_types_[0] = "float";
101   output_types_[1] = "float";
102   output_types_[2] = "float";
103   return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
104 
105 }
106 
bstm_ocl_get_surface_pt_process(bprb_func_process & pro)107 bool bstm_ocl_get_surface_pt_process(bprb_func_process& pro)
108 {
109   using namespace bstm_ocl_get_surface_pt_process_globals;
110 
111   if ( pro.n_inputs() < n_inputs_ ) {
112     std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
113     return false;
114   }
115   //get the inputs
116   unsigned i = 0;
117   bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
118   bstm_scene_sptr scene =pro.get_input<bstm_scene_sptr>(i++);
119   bstm_opencl_cache_sptr opencl_cache= pro.get_input<bstm_opencl_cache_sptr>(i++);
120   vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(i++);
121   auto ni=pro.get_input<unsigned>(i++);
122   auto nj=pro.get_input<unsigned>(i++);
123   auto pixel_x=pro.get_input<unsigned>(i++);
124   auto pixel_y=pro.get_input<unsigned>(i++);
125   auto time = pro.get_input<float>(i++);
126   auto prob_t = pro.get_input<float>(i++);
127 
128 
129   //: create a command queue.
130   int status=0;
131   cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()), CL_QUEUE_PROFILING_ENABLE,&status);
132   if (status!=0) return false;
133   std::string options = "";
134   std::string identifier=device->device_identifier()+options;
135   if (kernels.find(identifier)==kernels.end())  // compile the kernel
136   {
137     std::cout<<"===========Compiling kernels==========="<<std::endl;
138     std::vector<bocl_kernel*> ks;
139     compile_kernel(device,ks,options);
140     kernels[identifier]=ks;
141   }
142 
143 
144 
145   //start ray tracing
146   float transfer_time=0.0f;
147   float gpu_time=0.0f;
148 
149   //camera check
150   if (cam->type_name()!= "vpgl_perspective_camera" && cam->type_name()!= "vpgl_generic_camera" ) {
151     std::cout<<"Cannot render with camera of type "<<cam->type_name()<<std::endl;
152     return 0.0f;
153   }
154 
155   // create all buffers
156   unsigned cl_ni=RoundUp(ni,8);
157   unsigned cl_nj=RoundUp(nj,8);
158   auto* ray_origins = new cl_float[4*cl_ni*cl_nj];
159   auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
160   bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins, "ray_origins buffer");
161   bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions, "ray_directions buffer");
162   boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff);
163 
164   // Output Array
165   float output_arr[100];
166   for (float & i : output_arr) i = -1.0f;
167   bocl_mem_sptr  cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
168   cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
169 
170   // bit lookup buffer
171   cl_uchar lookup_arr[256];
172   bstm_ocl_util::set_bit_lookup(lookup_arr);
173   bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
174   lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
175 
176   int img_dim_buff[4];
177   img_dim_buff[0] = pixel_x;   img_dim_buff[2] = ni;
178   img_dim_buff[1] = pixel_y;   img_dim_buff[3] = nj;
179   bocl_mem_sptr exp_img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
180   exp_img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
181 
182   auto cl_prob_t = (cl_float)prob_t;
183   bocl_mem_sptr prob_t_mem =new bocl_mem(device->context(), &cl_prob_t, sizeof(cl_float), "prob t buffer");
184   prob_t_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
185 
186   //2. set global thread size
187   std::size_t gThreads[] = {cl_ni,cl_nj};
188 
189   //3. set arguments
190   std::vector<bstm_block_id> vis_order = scene->get_vis_blocks(cam);
191   std::vector<bstm_block_id>::iterator id;
192   for (id = vis_order.begin(); id != vis_order.end(); ++id)
193   {
194       //choose correct render kernel
195       bstm_block_metadata mdata = scene->get_block_metadata(*id);
196 
197       //if the current blk does not contain the queried time, no need to ray cast
198       double local_time;
199       if(!mdata.contains_t(time,local_time))
200         continue;
201 
202       auto cl_time = (cl_float)local_time;
203       bocl_mem_sptr time_mem =new bocl_mem(device->context(), &cl_time, sizeof(cl_float), "time instance buffer");
204       time_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
205 
206       bocl_kernel* kern =  kernels[identifier][0];
207 
208       //write the image values to the buffer
209       vul_timer transfer;
210       bocl_mem* blk = opencl_cache->get_block(*id);
211       bocl_mem* blk_info = opencl_cache->loaded_block_info();
212       bocl_mem* blk_t = opencl_cache->get_time_block(*id);
213       bocl_mem* alpha = opencl_cache->get_data<BSTM_ALPHA>(*id);
214       transfer_time += (float) transfer.all();
215 
216       ////3. SET args
217       kern->set_arg( blk_info );
218       kern->set_arg( blk );
219       kern->set_arg( blk_t );
220       kern->set_arg( alpha );
221       kern->set_arg( ray_o_buff.ptr() );
222       kern->set_arg( ray_d_buff.ptr() );
223       kern->set_arg(exp_img_dim.ptr());
224       kern->set_arg( cl_output.ptr() );
225       kern->set_arg( lookup.ptr() );
226       kern->set_arg( time_mem.ptr() );
227       kern->set_arg( prob_t_mem.ptr() );
228 
229       //local tree , cumsum buffer, imindex buffer
230       kern->set_local_arg( lthreads[0]*lthreads[1]*sizeof(cl_uchar16) );
231       kern->set_local_arg( lthreads[0]*lthreads[1]*sizeof(cl_uchar8) );
232       kern->set_local_arg( lthreads[0]*lthreads[1]*10*sizeof(cl_uchar) );
233       kern->set_local_arg( lthreads[0]*lthreads[1]*sizeof(cl_int) );
234 
235       //execute kernel
236       kern->execute(queue, 2, lthreads, gThreads);
237       clFinish(queue);
238       gpu_time += kern->exec_time();
239 
240       //clear render kernel args so it can reset em on next execution
241       kern->clear_args();
242       kern->release_current_event();
243   }
244 
245   //read outout
246   cl_output->read_to_buffer(queue);
247   std::cout << "prob: " << output_arr[0] << " pt: (" <<  output_arr[1] << "," << output_arr[2] << "," << output_arr[3] << ")" << std::endl;
248 
249 
250   //clean up cam
251   delete[] ray_origins;
252   delete[] ray_directions;
253   opencl_cache->unref_mem(ray_o_buff.ptr());
254   opencl_cache->unref_mem(ray_d_buff.ptr());
255 
256   clReleaseCommandQueue(queue);
257 
258   i=0;
259   pro.set_output_val<float>(i++, output_arr[1]);
260   pro.set_output_val<float>(i++, output_arr[2]);
261   pro.set_output_val<float>(i++, output_arr[3]);
262   return true;
263 }
264