1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_flip_normals_using_vis_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief A process for solving the sign ambiguity in the normals, typically computed using filtering.
8 // This processes uses visibility to disambiguate the normal direction.
9 // Given boxm2_point and boxm2_normal data, it computes maximum visibility on the hemisphere centered
10 // at the normal and also on the hemisphere centered at the flipped normal. The direction with the higher
11 // visibility is saved in boxm2_normal. Also, the highest visibility is saved in boxm2_vis_score.
12 //
13 // TODO: implement a vis_sphere initializer kernel.
14 // \author Ali Osman Ulusoy
15 // \date Oct 10, 2011
16
17 #ifdef _MSC_VER
18 # include "vcl_msvc_warnings.h"
19 #endif
20 #include <boxm2/ocl/boxm2_opencl_cache.h>
21 #include <boxm2/boxm2_scene.h>
22 #include <boxm2/boxm2_block.h>
23 #include <boxm2/boxm2_data_base.h>
24 #include <boxm2/ocl/boxm2_ocl_util.h>
25 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
26
27 //brdb stuff
28 #include <brdb/brdb_value.h>
29
30 //directory utility
31 #include "vul/vul_timer.h"
32 #include <bocl/bocl_device.h>
33 #include <bocl/bocl_kernel.h>
34
35 namespace boxm2_ocl_flip_normals_using_vis_process_globals
36 {
37 constexpr unsigned n_inputs_ = 4;
38 constexpr unsigned n_outputs_ = 0;
39 enum {
40 COMPUTE_VIS = 0,
41 DECIDE_NORMAL = 1
42 };
43
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,const std::string & opts)44 void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels,const std::string& opts)
45 {
46 //gather all render sources... seems like a lot for rendering...
47 std::vector<std::string> src_paths;
48 std::string source_dir = boxm2_ocl_util::ocl_src_root();
49 src_paths.push_back(source_dir + "scene_info.cl");
50 src_paths.push_back(source_dir + "cell_utils.cl");
51 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
52 src_paths.push_back(source_dir + "basic/sort_vector.cl");
53 src_paths.push_back(source_dir + "backproject.cl");
54 src_paths.push_back(source_dir + "statistics_library_functions.cl");
55 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
56 src_paths.push_back(source_dir + "bit/compute_vis.cl");
57 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
58
59 //compilation options
60 std::string options = opts+ "-D INTENSITY ";
61
62 auto* compute_vis = new bocl_kernel();
63 std::string seg_opts = options + "-D COMPVIS -D STEP_CELL=step_cell_computevis(aux_args,data_ptr,llid,d)";
64 compute_vis->create_kernel(&device->context(),device->device_id(), src_paths, "compute_vis", seg_opts, "compute_vis");
65 vec_kernels.push_back(compute_vis);
66
67 auto* decide_normal_dir = new bocl_kernel();
68 decide_normal_dir->create_kernel(&device->context(),device->device_id(), src_paths, "decide_normal_dir", seg_opts, "decide_normal_dir");
69 vec_kernels.push_back(decide_normal_dir);
70 return ;
71 }
72
73 static std::map<std::string,std::vector<bocl_kernel*> > kernels;
74 }
75
boxm2_ocl_flip_normals_using_vis_process_cons(bprb_func_process & pro)76 bool boxm2_ocl_flip_normals_using_vis_process_cons(bprb_func_process& pro)
77 {
78 using namespace boxm2_ocl_flip_normals_using_vis_process_globals;
79
80 //process takes 4 inputs
81 std::vector<std::string> input_types_(n_inputs_);
82 input_types_[0] = "bocl_device_sptr";
83 input_types_[1] = "boxm2_scene_sptr";
84 input_types_[2] = "boxm2_opencl_cache_sptr";
85 input_types_[3] = "bool";
86
87 // process has no outputs
88 std::vector<std::string> output_types_(n_outputs_);
89 bool good = pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
90
91 return good;
92 }
93
94
boxm2_ocl_flip_normals_using_vis_process(bprb_func_process & pro)95 bool boxm2_ocl_flip_normals_using_vis_process(bprb_func_process& pro)
96 {
97 using namespace boxm2_ocl_flip_normals_using_vis_process_globals;
98 std::size_t local_threads[2]={8,8};
99 std::size_t global_threads[2]={8,8};
100
101 //sanity check inputs
102 if ( pro.n_inputs() < n_inputs_ ) {
103 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
104 return false;
105 }
106 float transfer_time=0.0f;
107 float gpu_time=0.0f;
108
109 //get the inputs
110 unsigned i = 0;
111 bocl_device_sptr device = pro.get_input<bocl_device_sptr>(i++);
112 boxm2_scene_sptr scene = pro.get_input<boxm2_scene_sptr>(i++);
113 boxm2_opencl_cache_sptr opencl_cache = pro.get_input<boxm2_opencl_cache_sptr>(i++);
114 bool use_sum = false; use_sum = pro.get_input<bool>(i++);
115
116 //cache size sanity check
117 long binCache = opencl_cache.ptr()->bytes_in_cache();
118 std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
119
120 //make correct data types are here
121 std::string data_type,num_obs_type,options;
122
123 if (use_sum) {
124 options="-D USESUM ";
125 std::cout << "Using sum to compute visibility" << std::endl;
126 }
127
128
129 // create a command queue.
130 int status=0;
131 cl_command_queue queue = clCreateCommandQueue( device->context(),
132 *(device->device_id()),
133 CL_QUEUE_PROFILING_ENABLE,
134 &status);
135 if (status!=0)
136 return false;
137
138 // compile the kernel if not already compiled
139 std::string identifier=device->device_identifier()+options;
140 if (kernels.find(identifier)==kernels.end()) {
141 std::cout<<"===========Compiling kernels==========="<<std::endl;
142 std::vector<bocl_kernel*> ks;
143 compile_kernel(device,ks,options);
144 kernels[identifier]=ks;
145 }
146
147 // bit lookup buffer
148 cl_uchar lookup_arr[256];
149 boxm2_ocl_util::set_bit_lookup(lookup_arr);
150 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
151 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
152
153 // dodecahedron directions lookup buffer
154 cl_float4 dodecahedron_dir[12];
155 boxm2_ocl_util::set_dodecahedron_dir_lookup(dodecahedron_dir);
156 bocl_mem_sptr dodecahedron_dir_lookup=new bocl_mem(device->context(), dodecahedron_dir, sizeof(cl_float4)*12, "dodecahedron directions lookup buffer");
157 dodecahedron_dir_lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
158
159 cl_bool contain_point[1];
160 bocl_mem_sptr contain_point_mem =new bocl_mem(device->context(), contain_point, sizeof(cl_bool), "contains point buffer");
161 contain_point_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
162
163 cl_uint datasize[1];
164 bocl_mem_sptr datasize_mem =new bocl_mem(device->context(), datasize, sizeof(cl_uint), "data buffer size");
165 datasize_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
166
167 //zip through each block
168 std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
169 std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter;
170 for (unsigned int i=0; i<kernels[identifier].size(); ++i)
171 {
172 //remove all the alphas and points from opencl cache
173 if (i == DECIDE_NORMAL) {
174 for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
175 {
176 boxm2_block_id id = blk_iter->first;
177 opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_ALPHA>::prefix());
178 opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_POINT>::prefix());
179 }
180 }
181
182 for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
183 {
184 boxm2_block_id id = blk_iter->first;
185 std::cout << "Processing block: " << id << std::endl;
186
187 //get kernel
188 bocl_kernel* kern = kernels[identifier][i];
189
190 vul_timer transfer;
191
192 //load normals
193 bocl_mem* normals = opencl_cache->get_data<BOXM2_NORMAL>(scene,blk_iter->first,0,false);
194 std::size_t normalsTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_NORMAL>::prefix());
195
196 //load block info
197 datasize[0] = (unsigned)(normals->num_bytes()/normalsTypeSize);
198 datasize_mem->write_to_buffer((queue));
199
200 transfer_time += (float) transfer.all();
201 if (i==COMPUTE_VIS) {
202
203 //array to store visibilities computed around a sphere
204 //ask for a new BOXM2_VIS_SPHERE data so that it gets initialized properly.
205 std::size_t visTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_VIS_SPHERE>::prefix());
206 bocl_mem *vis_sphere = opencl_cache->get_data_new<BOXM2_VIS_SPHERE>(scene,blk_iter->first, (normals->num_bytes()/normalsTypeSize)*visTypeSize, false);
207
208 //zip through each block
209 std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter_inner;
210 for (blk_iter_inner = blocks.begin(); blk_iter_inner != blocks.end(); ++blk_iter_inner) {
211
212 transfer.mark();
213 boxm2_block_id id_inner = blk_iter_inner->first;
214 //std::cout << "--Loading block " << id_inner << std::endl;
215
216 //load tree and alpha
217 boxm2_block_metadata mdata = blk_iter_inner->second;
218 vul_timer transfer;
219 bocl_mem* blk = opencl_cache->get_block(scene,blk_iter_inner->first);
220 bocl_mem* blk_info = opencl_cache->loaded_block_info();
221 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,blk_iter_inner->first,0,false);
222 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
223 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
224 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
225 blk_info->write_to_buffer((queue));
226
227 bocl_mem* points = opencl_cache->get_data<BOXM2_POINT>(scene,blk_iter->first,0,false);
228
229 if (id == id_inner)
230 contain_point[0] = true;
231 else
232 contain_point[0] = false;
233 contain_point_mem->write_to_buffer(queue);
234
235 transfer_time += (float) transfer.all();
236
237 local_threads[0] = 64;
238 local_threads[1] = 1;
239 global_threads[0] = RoundUp((normals->num_bytes()/normalsTypeSize), local_threads[0]);
240 global_threads[1]=1;
241
242 kern->set_arg( datasize_mem.ptr() );
243 kern->set_arg( blk_info );
244 kern->set_arg( dodecahedron_dir_lookup.ptr());
245 kern->set_arg( blk );
246 kern->set_arg( lookup.ptr() );
247 kern->set_arg( alpha );
248 kern->set_arg( points );
249 kern->set_arg( normals );
250 kern->set_arg( vis_sphere);
251 kern->set_arg( contain_point_mem.ptr());
252 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
253 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
254
255 //execute kernel
256 kern->execute(queue, 2, local_threads, global_threads);
257 int status = clFinish(queue);
258 check_val(status, MEM_FAILURE, "VISIBIITY EXECUTE FAILED: " + error_to_string(status));
259 gpu_time += kern->exec_time();
260
261 //clear render kernel args so it can reset em on next execution
262 kern->clear_args();
263 }
264
265 //read from gpu
266 vis_sphere->read_to_buffer(queue);
267 int status = clFinish(queue);
268 check_val(status, MEM_FAILURE, "READ VIS_SPHERE FAILED: " + error_to_string(status));
269 }
270 else if (i == DECIDE_NORMAL) {
271 transfer.mark();
272
273 //load tree
274 boxm2_block_metadata mdata = blk_iter->second;
275 vul_timer transfer;
276 /* bocl_mem* blk = */ opencl_cache->get_block(scene,blk_iter->first);
277 bocl_mem* blk_info = opencl_cache->loaded_block_info();
278 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
279 info_buffer->data_buffer_length = (int) (normals->num_bytes()/normalsTypeSize);
280 blk_info->write_to_buffer((queue));
281
282 //load visibilities
283 bocl_mem* vis_sphere = opencl_cache->get_data<BOXM2_VIS_SPHERE>(scene,blk_iter->first,0,false);
284
285 //array to store final visibility score of a point
286 bocl_mem* vis = opencl_cache->get_data<BOXM2_VIS_SCORE>(scene,blk_iter->first, (normals->num_bytes()/normalsTypeSize)
287 *boxm2_data_info::datasize(boxm2_data_traits<BOXM2_VIS_SCORE>::prefix()),false);
288
289 transfer_time += (float) transfer.all();
290
291 local_threads[0] = 128;
292 local_threads[1] = 1;
293 global_threads[0] = RoundUp((normals->num_bytes()/normalsTypeSize), local_threads[0]);
294 global_threads[1]=1;
295
296 kern->set_arg( blk_info );
297 kern->set_arg( dodecahedron_dir_lookup.ptr() );
298 kern->set_arg( normals );
299 kern->set_arg( vis );
300 kern->set_arg( vis_sphere);
301
302 //execute kernel
303 kern->execute(queue, 2, local_threads, global_threads);
304 int status = clFinish(queue);
305 check_val(status, MEM_FAILURE, "DECIDE NORMAL DIR EXECUTE FAILED: " + error_to_string(status));
306 gpu_time += kern->exec_time();
307
308 //read normals and vis from gpu
309 normals->read_to_buffer(queue);
310 vis->read_to_buffer(queue);
311 status = clFinish(queue);
312 check_val(status, MEM_FAILURE, "READ NORMALS FAILED: " + error_to_string(status));
313
314 //clear render kernel args so it can reset em on next execution
315 kern->clear_args();
316 }
317
318 //shallow remove from ocl cache unnecessary items from ocl cache.
319 opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_VIS_SPHERE>::prefix());
320 }
321 }
322
323 std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
324 clReleaseCommandQueue(queue);
325 return true;
326 }
327