1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_flip_normals_using_vis_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief  A process for solving the sign ambiguity in the normals, typically computed using filtering.
8 // This processes uses visibility to disambiguate the normal direction.
9 // Given boxm2_point and boxm2_normal data, it computes maximum visibility on the hemisphere centered
10 // at the normal and also on the hemisphere centered at the flipped normal. The direction with the higher
11 // visibility is saved in boxm2_normal. Also, the highest visibility is saved in boxm2_vis_score.
12 //
13 // TODO: implement a vis_sphere initializer kernel.
14 // \author Ali Osman Ulusoy
15 // \date Oct 10, 2011
16 
17 #ifdef _MSC_VER
18 #  include "vcl_msvc_warnings.h"
19 #endif
20 #include <boxm2/ocl/boxm2_opencl_cache.h>
21 #include <boxm2/boxm2_scene.h>
22 #include <boxm2/boxm2_block.h>
23 #include <boxm2/boxm2_data_base.h>
24 #include <boxm2/ocl/boxm2_ocl_util.h>
25 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
26 
27 //brdb stuff
28 #include <brdb/brdb_value.h>
29 
30 //directory utility
31 #include "vul/vul_timer.h"
32 #include <bocl/bocl_device.h>
33 #include <bocl/bocl_kernel.h>
34 
35 namespace boxm2_ocl_flip_normals_using_vis_process_globals
36 {
37   constexpr unsigned n_inputs_ = 4;
38   constexpr unsigned n_outputs_ = 0;
39   enum {
40       COMPUTE_VIS = 0,
41       DECIDE_NORMAL = 1
42   };
43 
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,const std::string & opts)44   void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels,const std::string& opts)
45   {
46     //gather all render sources... seems like a lot for rendering...
47     std::vector<std::string> src_paths;
48     std::string source_dir = boxm2_ocl_util::ocl_src_root();
49     src_paths.push_back(source_dir + "scene_info.cl");
50     src_paths.push_back(source_dir + "cell_utils.cl");
51     src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
52     src_paths.push_back(source_dir + "basic/sort_vector.cl");
53     src_paths.push_back(source_dir + "backproject.cl");
54     src_paths.push_back(source_dir + "statistics_library_functions.cl");
55     src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
56     src_paths.push_back(source_dir + "bit/compute_vis.cl");
57     src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
58 
59     //compilation options
60     std::string options = opts+ "-D INTENSITY ";
61 
62     auto* compute_vis = new bocl_kernel();
63     std::string seg_opts = options + "-D COMPVIS -D STEP_CELL=step_cell_computevis(aux_args,data_ptr,llid,d)";
64     compute_vis->create_kernel(&device->context(),device->device_id(), src_paths, "compute_vis", seg_opts, "compute_vis");
65     vec_kernels.push_back(compute_vis);
66 
67     auto* decide_normal_dir = new bocl_kernel();
68     decide_normal_dir->create_kernel(&device->context(),device->device_id(), src_paths, "decide_normal_dir", seg_opts, "decide_normal_dir");
69     vec_kernels.push_back(decide_normal_dir);
70     return ;
71   }
72 
73   static std::map<std::string,std::vector<bocl_kernel*> > kernels;
74 }
75 
boxm2_ocl_flip_normals_using_vis_process_cons(bprb_func_process & pro)76 bool boxm2_ocl_flip_normals_using_vis_process_cons(bprb_func_process& pro)
77 {
78   using namespace boxm2_ocl_flip_normals_using_vis_process_globals;
79 
80   //process takes 4 inputs
81   std::vector<std::string> input_types_(n_inputs_);
82   input_types_[0] = "bocl_device_sptr";
83   input_types_[1] = "boxm2_scene_sptr";
84   input_types_[2] = "boxm2_opencl_cache_sptr";
85   input_types_[3] = "bool";
86 
87   // process has no outputs
88   std::vector<std::string>  output_types_(n_outputs_);
89   bool good = pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
90 
91   return good;
92 }
93 
94 
boxm2_ocl_flip_normals_using_vis_process(bprb_func_process & pro)95 bool boxm2_ocl_flip_normals_using_vis_process(bprb_func_process& pro)
96 {
97   using namespace boxm2_ocl_flip_normals_using_vis_process_globals;
98   std::size_t local_threads[2]={8,8};
99   std::size_t global_threads[2]={8,8};
100 
101   //sanity check inputs
102   if ( pro.n_inputs() < n_inputs_ ) {
103     std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
104     return false;
105   }
106   float transfer_time=0.0f;
107   float gpu_time=0.0f;
108 
109   //get the inputs
110   unsigned i = 0;
111   bocl_device_sptr         device = pro.get_input<bocl_device_sptr>(i++);
112   boxm2_scene_sptr         scene = pro.get_input<boxm2_scene_sptr>(i++);
113   boxm2_opencl_cache_sptr  opencl_cache = pro.get_input<boxm2_opencl_cache_sptr>(i++);
114   bool use_sum = false;    use_sum = pro.get_input<bool>(i++);
115 
116   //cache size sanity check
117   long binCache = opencl_cache.ptr()->bytes_in_cache();
118   std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
119 
120   //make correct data types are here
121   std::string data_type,num_obs_type,options;
122 
123   if (use_sum) {
124     options="-D USESUM ";
125     std::cout << "Using sum to compute visibility" << std::endl;
126   }
127 
128 
129   // create a command queue.
130   int status=0;
131   cl_command_queue queue = clCreateCommandQueue( device->context(),
132                                                  *(device->device_id()),
133                                                  CL_QUEUE_PROFILING_ENABLE,
134                                                  &status);
135   if (status!=0)
136     return false;
137 
138   // compile the kernel if not already compiled
139   std::string identifier=device->device_identifier()+options;
140   if (kernels.find(identifier)==kernels.end()) {
141     std::cout<<"===========Compiling kernels==========="<<std::endl;
142     std::vector<bocl_kernel*> ks;
143     compile_kernel(device,ks,options);
144     kernels[identifier]=ks;
145   }
146 
147   // bit lookup buffer
148   cl_uchar lookup_arr[256];
149   boxm2_ocl_util::set_bit_lookup(lookup_arr);
150   bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
151   lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
152 
153   // dodecahedron directions lookup buffer
154   cl_float4 dodecahedron_dir[12];
155   boxm2_ocl_util::set_dodecahedron_dir_lookup(dodecahedron_dir);
156   bocl_mem_sptr dodecahedron_dir_lookup=new bocl_mem(device->context(), dodecahedron_dir, sizeof(cl_float4)*12, "dodecahedron directions lookup buffer");
157   dodecahedron_dir_lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
158 
159   cl_bool contain_point[1];
160   bocl_mem_sptr contain_point_mem =new bocl_mem(device->context(), contain_point, sizeof(cl_bool), "contains point buffer");
161   contain_point_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
162 
163   cl_uint datasize[1];
164   bocl_mem_sptr datasize_mem =new bocl_mem(device->context(), datasize, sizeof(cl_uint), "data buffer size");
165   datasize_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
166 
167   //zip through each block
168   std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
169   std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter;
170   for (unsigned int i=0; i<kernels[identifier].size(); ++i)
171   {
172       //remove all the alphas and points from opencl cache
173       if (i == DECIDE_NORMAL) {
174           for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
175           {
176             boxm2_block_id id = blk_iter->first;
177             opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_ALPHA>::prefix());
178             opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_POINT>::prefix());
179           }
180       }
181 
182       for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
183       {
184         boxm2_block_id id = blk_iter->first;
185         std::cout << "Processing block: " << id << std::endl;
186 
187         //get kernel
188         bocl_kernel* kern =  kernels[identifier][i];
189 
190         vul_timer transfer;
191 
192         //load normals
193         bocl_mem* normals = opencl_cache->get_data<BOXM2_NORMAL>(scene,blk_iter->first,0,false);
194         std::size_t normalsTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_NORMAL>::prefix());
195 
196         //load block info
197         datasize[0] = (unsigned)(normals->num_bytes()/normalsTypeSize);
198         datasize_mem->write_to_buffer((queue));
199 
200         transfer_time += (float) transfer.all();
201         if (i==COMPUTE_VIS) {
202 
203             //array to store visibilities computed around a sphere
204             //ask for a new BOXM2_VIS_SPHERE data so that it gets initialized properly.
205             std::size_t visTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_VIS_SPHERE>::prefix());
206             bocl_mem *vis_sphere = opencl_cache->get_data_new<BOXM2_VIS_SPHERE>(scene,blk_iter->first, (normals->num_bytes()/normalsTypeSize)*visTypeSize, false);
207 
208             //zip through each block
209             std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter_inner;
210             for (blk_iter_inner = blocks.begin(); blk_iter_inner != blocks.end(); ++blk_iter_inner) {
211 
212               transfer.mark();
213               boxm2_block_id id_inner = blk_iter_inner->first;
214               //std::cout << "--Loading block " << id_inner << std::endl;
215 
216               //load tree and alpha
217               boxm2_block_metadata mdata = blk_iter_inner->second;
218               vul_timer transfer;
219               bocl_mem* blk = opencl_cache->get_block(scene,blk_iter_inner->first);
220               bocl_mem* blk_info = opencl_cache->loaded_block_info();
221               bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,blk_iter_inner->first,0,false);
222               auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
223               int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
224               info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
225               blk_info->write_to_buffer((queue));
226 
227               bocl_mem* points = opencl_cache->get_data<BOXM2_POINT>(scene,blk_iter->first,0,false);
228 
229               if (id == id_inner)
230                 contain_point[0] = true;
231               else
232                 contain_point[0] = false;
233               contain_point_mem->write_to_buffer(queue);
234 
235               transfer_time += (float) transfer.all();
236 
237               local_threads[0] = 64;
238               local_threads[1] = 1;
239               global_threads[0] = RoundUp((normals->num_bytes()/normalsTypeSize), local_threads[0]);
240               global_threads[1]=1;
241 
242               kern->set_arg( datasize_mem.ptr() );
243               kern->set_arg( blk_info );
244               kern->set_arg( dodecahedron_dir_lookup.ptr());
245               kern->set_arg( blk );
246               kern->set_arg( lookup.ptr()  );
247               kern->set_arg( alpha  );
248               kern->set_arg( points );
249               kern->set_arg( normals );
250               kern->set_arg( vis_sphere);
251               kern->set_arg( contain_point_mem.ptr());
252               kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
253               kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
254 
255               //execute kernel
256               kern->execute(queue, 2, local_threads, global_threads);
257               int status = clFinish(queue);
258               check_val(status, MEM_FAILURE, "VISIBIITY EXECUTE FAILED: " + error_to_string(status));
259               gpu_time += kern->exec_time();
260 
261               //clear render kernel args so it can reset em on next execution
262               kern->clear_args();
263             }
264 
265           //read from gpu
266           vis_sphere->read_to_buffer(queue);
267           int status = clFinish(queue);
268           check_val(status, MEM_FAILURE, "READ VIS_SPHERE FAILED: " + error_to_string(status));
269         }
270         else if (i == DECIDE_NORMAL) {
271           transfer.mark();
272 
273           //load tree
274           boxm2_block_metadata mdata = blk_iter->second;
275           vul_timer transfer;
276           /* bocl_mem* blk = */ opencl_cache->get_block(scene,blk_iter->first);
277           bocl_mem* blk_info = opencl_cache->loaded_block_info();
278           auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
279           info_buffer->data_buffer_length = (int) (normals->num_bytes()/normalsTypeSize);
280           blk_info->write_to_buffer((queue));
281 
282           //load visibilities
283           bocl_mem* vis_sphere = opencl_cache->get_data<BOXM2_VIS_SPHERE>(scene,blk_iter->first,0,false);
284 
285           //array to store final visibility score of a point
286           bocl_mem* vis = opencl_cache->get_data<BOXM2_VIS_SCORE>(scene,blk_iter->first, (normals->num_bytes()/normalsTypeSize)
287                                                   *boxm2_data_info::datasize(boxm2_data_traits<BOXM2_VIS_SCORE>::prefix()),false);
288 
289           transfer_time += (float) transfer.all();
290 
291           local_threads[0] = 128;
292           local_threads[1] = 1;
293           global_threads[0] = RoundUp((normals->num_bytes()/normalsTypeSize), local_threads[0]);
294           global_threads[1]=1;
295 
296           kern->set_arg( blk_info );
297           kern->set_arg( dodecahedron_dir_lookup.ptr() );
298           kern->set_arg( normals );
299           kern->set_arg( vis );
300           kern->set_arg( vis_sphere);
301 
302           //execute kernel
303           kern->execute(queue, 2, local_threads, global_threads);
304           int status = clFinish(queue);
305           check_val(status, MEM_FAILURE, "DECIDE NORMAL DIR EXECUTE FAILED: " + error_to_string(status));
306           gpu_time += kern->exec_time();
307 
308           //read normals and vis from gpu
309           normals->read_to_buffer(queue);
310           vis->read_to_buffer(queue);
311           status = clFinish(queue);
312           check_val(status, MEM_FAILURE, "READ NORMALS FAILED: " + error_to_string(status));
313 
314           //clear render kernel args so it can reset em on next execution
315           kern->clear_args();
316       }
317 
318       //shallow remove from ocl cache unnecessary items from ocl cache.
319       opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_VIS_SPHERE>::prefix());
320     }
321   }
322 
323   std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
324   clReleaseCommandQueue(queue);
325   return true;
326 }
327