1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_make_inside_voxels_empty_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief A process for making the inside cells empty
8 //
9 // TODO: implement a vis_sphere initializer kernel.
10 // \author Ali Osman Ulusoy
11 // \date Oct 10, 2011
12
13 #ifdef _MSC_VER
14 # include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
22
23 //brdb stuff
24 #include <brdb/brdb_value.h>
25
26 //directory utility
27 #include "vul/vul_timer.h"
28 #include <bocl/bocl_device.h>
29 #include <bocl/bocl_kernel.h>
30
31 namespace boxm2_ocl_make_inside_voxels_empty_process_globals
32 {
33 constexpr unsigned n_inputs_ = 4;
34 constexpr unsigned n_outputs_ = 0;
35 enum {
36 COMPUTE_VIS = 0,
37 DECIDE_INSIDE = 1
38 };
39
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,const std::string & opts)40 void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels,const std::string& opts)
41 {
42 //gather all render sources... seems like a lot for rendering...
43 std::vector<std::string> src_paths;
44 std::string source_dir = boxm2_ocl_util::ocl_src_root();
45 src_paths.push_back(source_dir + "scene_info.cl");
46 src_paths.push_back(source_dir + "cell_utils.cl");
47 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
48 src_paths.push_back(source_dir + "basic/sort_vector.cl");
49 src_paths.push_back(source_dir + "backproject.cl");
50 src_paths.push_back(source_dir + "statistics_library_functions.cl");
51 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
52 src_paths.push_back(source_dir + "bit/compute_vis.cl");
53 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
54
55 //compilation options
56 std::string options = opts+ "-D INTENSITY ";
57
58 auto* compute_vis = new bocl_kernel();
59 std::string seg_opts = options + "-D COMPVIS -D STEP_CELL=step_cell_computevis(aux_args,data_ptr,llid,d)";
60 compute_vis->create_kernel(&device->context(),device->device_id(), src_paths, "compute_vis", seg_opts, "compute_vis");
61 vec_kernels.push_back(compute_vis);
62
63 auto* decide_inside_cell = new bocl_kernel();
64 decide_inside_cell->create_kernel(&device->context(),device->device_id(), src_paths, "decide_inside_cell", seg_opts, "decide_inside_cell");
65 vec_kernels.push_back(decide_inside_cell);
66 return ;
67 }
68
69 static std::map<std::string,std::vector<bocl_kernel*> > kernels;
70 }
71
boxm2_ocl_make_inside_voxels_empty_process_cons(bprb_func_process & pro)72 bool boxm2_ocl_make_inside_voxels_empty_process_cons(bprb_func_process& pro)
73 {
74 using namespace boxm2_ocl_make_inside_voxels_empty_process_globals;
75
76 //process takes 4 inputs
77 std::vector<std::string> input_types_(n_inputs_);
78 input_types_[0] = "bocl_device_sptr";
79 input_types_[1] = "boxm2_scene_sptr";
80 input_types_[2] = "boxm2_opencl_cache_sptr";
81 input_types_[3] = "bool";
82
83 // process has no outputs
84 std::vector<std::string> output_types_(n_outputs_);
85 bool good = pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
86
87 return good;
88 }
89
90
boxm2_ocl_make_inside_voxels_empty_process(bprb_func_process & pro)91 bool boxm2_ocl_make_inside_voxels_empty_process(bprb_func_process& pro)
92 {
93 using namespace boxm2_ocl_make_inside_voxels_empty_process_globals;
94 std::size_t local_threads[2]={8,8};
95 std::size_t global_threads[2]={8,8};
96
97 //sanity check inputs
98 if ( pro.n_inputs() < n_inputs_ ) {
99 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
100 return false;
101 }
102 float transfer_time=0.0f;
103 float gpu_time=0.0f;
104
105 //get the inputs
106 unsigned i = 0;
107 bocl_device_sptr device = pro.get_input<bocl_device_sptr>(i++);
108 boxm2_scene_sptr scene = pro.get_input<boxm2_scene_sptr>(i++);
109 boxm2_opencl_cache_sptr opencl_cache = pro.get_input<boxm2_opencl_cache_sptr>(i++);
110 bool use_sum = false; use_sum = pro.get_input<bool>(i++);
111
112 //cache size sanity check
113 long binCache = opencl_cache.ptr()->bytes_in_cache();
114 std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
115
116 //make correct data types are here
117 std::string data_type,num_obs_type,options;
118
119 if (use_sum) {
120 options="-D USESUM ";
121 std::cout << "Using sum to compute visibility" << std::endl;
122 }
123
124
125 // create a command queue.
126 int status=0;
127 cl_command_queue queue = clCreateCommandQueue( device->context(),
128 *(device->device_id()),
129 CL_QUEUE_PROFILING_ENABLE,
130 &status);
131 if (status!=0)
132 return false;
133
134 // compile the kernel if not already compiled
135 std::string identifier=device->device_identifier()+options;
136 if (kernels.find(identifier)==kernels.end()) {
137 std::cout<<"===========Compiling kernels==========="<<std::endl;
138 std::vector<bocl_kernel*> ks;
139 compile_kernel(device,ks,options);
140 kernels[identifier]=ks;
141 }
142
143 // bit lookup buffer
144 cl_uchar lookup_arr[256];
145 boxm2_ocl_util::set_bit_lookup(lookup_arr);
146 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
147 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
148
149 // dodecahedron directions lookup buffer
150 cl_float4 dodecahedron_dir[12];
151 boxm2_ocl_util::set_dodecahedron_dir_lookup(dodecahedron_dir);
152 bocl_mem_sptr dodecahedron_dir_lookup=new bocl_mem(device->context(), dodecahedron_dir, sizeof(cl_float4)*12, "dodecahedron directions lookup buffer");
153 dodecahedron_dir_lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
154
155 cl_bool contain_point[1];
156 bocl_mem_sptr contain_point_mem =new bocl_mem(device->context(), contain_point, sizeof(cl_bool), "contains point buffer");
157 contain_point_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
158
159 cl_uint datasize[1];
160 bocl_mem_sptr datasize_mem =new bocl_mem(device->context(), datasize, sizeof(cl_uint), "data buffer size");
161 datasize_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
162
163 //zip through each block
164 std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
165 std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter;
166 for (unsigned int i=0; i<kernels[identifier].size(); ++i)
167 {
168 //remove all the alphas and points from opencl cache
169 if (i == DECIDE_INSIDE) {
170 for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
171 {
172 boxm2_block_id id = blk_iter->first;
173 //opencl_cache->shallow_remove_data(id,boxm2_data_traits<BOXM2_ALPHA>::prefix());
174 opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_POINT>::prefix());
175 }
176 }
177
178 for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
179 {
180 boxm2_block_id id = blk_iter->first;
181 std::cout << "Processing block: " << id << std::endl;
182
183 //get kernel
184 bocl_kernel* kern = kernels[identifier][i];
185
186 vul_timer transfer;
187
188 //load normals
189 bocl_mem* normals = opencl_cache->get_data<BOXM2_NORMAL>(scene,blk_iter->first,0,false);
190 std::size_t normalsTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_NORMAL>::prefix());
191
192 //load block info
193 datasize[0] = (unsigned)(normals->num_bytes()/normalsTypeSize);
194 datasize_mem->write_to_buffer((queue));
195
196 transfer_time += (float) transfer.all();
197 if (i==COMPUTE_VIS) {
198
199 //array to store visibilities computed around a sphere
200 //ask for a new BOXM2_VIS_SPHERE data so that it gets initialized properly.
201 std::size_t visTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_VIS_SPHERE>::prefix());
202 bocl_mem *vis_sphere = opencl_cache->get_data_new<BOXM2_VIS_SPHERE>(scene,blk_iter->first, (normals->num_bytes()/normalsTypeSize)*visTypeSize, false);
203
204 //zip through each block
205 std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter_inner;
206 for (blk_iter_inner = blocks.begin(); blk_iter_inner != blocks.end(); ++blk_iter_inner) {
207
208 transfer.mark();
209 boxm2_block_id id_inner = blk_iter_inner->first;
210 //std::cout << "--Loading block " << id_inner << std::endl;
211
212 //load tree and alpha
213 boxm2_block_metadata mdata = blk_iter_inner->second;
214 vul_timer transfer;
215 bocl_mem* blk = opencl_cache->get_block(scene,blk_iter_inner->first);
216 bocl_mem* blk_info = opencl_cache->loaded_block_info();
217 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,blk_iter_inner->first,0,false);
218 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
219 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
220 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
221 blk_info->write_to_buffer((queue));
222
223 bocl_mem* points = opencl_cache->get_data<BOXM2_POINT>(scene,blk_iter->first,0,false);
224
225 if (id == id_inner)
226 contain_point[0] = true;
227 else
228 contain_point[0] = false;
229 contain_point_mem->write_to_buffer(queue);
230
231 transfer_time += (float) transfer.all();
232
233 local_threads[0] = 64;
234 local_threads[1] = 1;
235 global_threads[0] = RoundUp((normals->num_bytes()/normalsTypeSize), local_threads[0]);
236 global_threads[1]=1;
237
238 kern->set_arg( datasize_mem.ptr() );
239 kern->set_arg( blk_info );
240 kern->set_arg( dodecahedron_dir_lookup.ptr());
241 kern->set_arg( blk );
242 kern->set_arg( lookup.ptr() );
243 kern->set_arg( alpha );
244 kern->set_arg( points );
245 kern->set_arg( normals );
246 kern->set_arg( vis_sphere);
247 kern->set_arg( contain_point_mem.ptr());
248 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
249 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
250
251 //execute kernel
252 kern->execute(queue, 2, local_threads, global_threads);
253 int status = clFinish(queue);
254 check_val(status, MEM_FAILURE, "VISIBIITY EXECUTE FAILED: " + error_to_string(status));
255 gpu_time += kern->exec_time();
256
257 //clear render kernel args so it can reset em on next execution
258 kern->clear_args();
259 }
260
261 //read from gpu
262 vis_sphere->read_to_buffer(queue);
263 int status = clFinish(queue);
264 check_val(status, MEM_FAILURE, "READ VIS_SPHERE FAILED: " + error_to_string(status));
265 }
266 else if (i == DECIDE_INSIDE) {
267 transfer.mark();
268
269 //load tree
270 boxm2_block_metadata mdata = blk_iter->second;
271 vul_timer transfer;
272 /* bocl_mem* blk = */ opencl_cache->get_block(scene,blk_iter->first);
273 bocl_mem* blk_info = opencl_cache->loaded_block_info();
274 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
275 info_buffer->data_buffer_length = (int) (normals->num_bytes()/normalsTypeSize);
276 blk_info->write_to_buffer((queue));
277 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,blk_iter->first,0,false);
278
279 //load visibilities
280 bocl_mem* vis_sphere = opencl_cache->get_data<BOXM2_VIS_SPHERE>(scene,blk_iter->first,0,false);
281
282 //array to store final visibility score of a point
283 bocl_mem* vis = opencl_cache->get_data<BOXM2_VIS_SCORE>(scene,blk_iter->first, (normals->num_bytes()/normalsTypeSize)
284 *boxm2_data_info::datasize(boxm2_data_traits<BOXM2_VIS_SCORE>::prefix()),false);
285
286 transfer_time += (float) transfer.all();
287
288 local_threads[0] = 128;
289 local_threads[1] = 1;
290 global_threads[0] = RoundUp((normals->num_bytes()/normalsTypeSize), local_threads[0]);
291 global_threads[1]=1;
292
293 kern->set_arg( blk_info );
294 kern->set_arg( alpha );
295 kern->set_arg( vis );
296 kern->set_arg( vis_sphere);
297 //execute kernel
298 kern->execute(queue, 2, local_threads, global_threads);
299 int status = clFinish(queue);
300 check_val(status, MEM_FAILURE, "DECIDE NORMAL DIR EXECUTE FAILED: " + error_to_string(status));
301 gpu_time += kern->exec_time();
302
303 //read normals and vis from gpu
304 alpha->read_to_buffer(queue);
305 status = clFinish(queue);
306 check_val(status, MEM_FAILURE, "READ NORMALS FAILED: " + error_to_string(status));
307
308 //clear render kernel args so it can reset em on next execution
309 kern->clear_args();
310 }
311
312 //shallow remove from ocl cache unnecessary items from ocl cache.
313 opencl_cache->shallow_remove_data(scene,id,boxm2_data_traits<BOXM2_VIS_SPHERE>::prefix());
314 }
315 }
316
317 std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
318 clReleaseCommandQueue(queue);
319 return true;
320 }
321