1 // This is brl/bseg/boxm2/ocl/algo/boxm2_ocl_update_auxQ.cxx
2 #include <fstream>
3 #include <iostream>
4 #include <algorithm>
5 #include "boxm2_ocl_update_auxQ.h"
6 //:
7 // \file
8 // \brief A process for updating a color model
9 //
10 // \author Vishal Jain
11 // \date Mar 25, 2011
12
13 #ifdef _MSC_VER
14 # include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/boxm2_util.h>
22 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
23 #include "vil/vil_image_view.h"
24
25 //directory utility
26 #include "vul/vul_timer.h"
27 #include <vcl_where_root_dir.h>
28 #include <bocl/bocl_device.h>
29 #include <bocl/bocl_kernel.h>
30
31 //: Map of kernels should persist between process executions
32 std::map<std::string,std::vector<bocl_kernel*> > boxm2_ocl_update_auxQ::kernels_;
33
34 //Main public method, updates color model
update_auxQ(const boxm2_scene_sptr & scene,bocl_device_sptr device,const boxm2_opencl_cache_sptr & opencl_cache,vpgl_camera_double_sptr cam,const vil_image_view_base_sptr & img,const std::string & app_ident,std::string view_ident,float resnearfactor,float resfarfactor)35 bool boxm2_ocl_update_auxQ::update_auxQ(const boxm2_scene_sptr& scene,
36 bocl_device_sptr device,
37 const boxm2_opencl_cache_sptr& opencl_cache,
38 vpgl_camera_double_sptr cam,
39 const vil_image_view_base_sptr& img,
40 const std::string& app_ident,
41 std::string view_ident,
42 float resnearfactor ,
43 float resfarfactor )
44 {
45 enum {
46 UPDATE_SEGLEN = 0,
47 UPDATE_PREINF = 1,
48 UPDATE_PROC = 2,
49 UPDATE_BAYES = 3,
50 CONVERT_AUX_INT_FLOAT = 4,
51 };
52 float transfer_time=0.0f;
53 float gpu_time=0.0f;
54 std::size_t local_threads[2]={8,8};
55 std::size_t global_threads[2]={8,8};
56 view_ident = view_ident +"_curr";
57 //cache size sanity check
58 std::size_t binCache = opencl_cache.ptr()->bytes_in_cache();
59 std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
60
61 //make correct data types are here
62 std::string data_type, num_obs_type,options;
63
64 int appTypeSize;
65 bool isRGB = false;
66 if (!validate_appearances(scene, data_type, appTypeSize, num_obs_type, options, isRGB))
67 return false;
68 if (app_ident.size() > 0) {
69 data_type += "_" + app_ident;
70 num_obs_type += "_" + app_ident;
71 }
72 // create a command queue.
73 int status=0;
74 cl_command_queue queue = clCreateCommandQueue( device->context(),
75 *(device->device_id()),
76 CL_QUEUE_PROFILING_ENABLE,
77 &status);
78 if (status!=0)
79 return false;
80 // compile the kernel if not already compiled
81 std::vector<bocl_kernel*>& kernels = get_kernels(device, options);
82
83 //grab input image, establish cl_ni, cl_nj (so global size is divisible by local size)
84 vil_image_view_base_sptr float_img = boxm2_util::prepare_input_image(img, true);
85 auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
86 auto cl_ni=(unsigned)RoundUp(img_view->ni(),(int)local_threads[0]);
87 auto cl_nj=(unsigned)RoundUp(img_view->nj(),(int)local_threads[1]);
88 global_threads[0]=cl_ni;
89 global_threads[1]=cl_nj;
90 //set generic cam
91 auto* ray_origins = new cl_float[4*cl_ni*cl_nj];
92 auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
93 bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins, "ray_origins buffer");
94 bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions, "ray_directions buffer");
95 boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff);
96
97 float tnearfar[2] = { 0.0f, 1000000} ;
98
99 if(cam->type_name() == "vpgl_perspective_camera")
100 {
101
102 float f = ((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().focal_length()*((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().x_scale();
103 std::cout<<"Focal Length " << f<<std::endl;
104 tnearfar[0] = f* scene->finest_resolution()/resnearfactor ;
105 tnearfar[1] = f* scene->finest_resolution()*resfarfactor ;
106 std::cout<<"Near and Far Clipping planes "<<tnearfar[0]<<" "<<tnearfar[1]<<std::endl;
107 }
108 bocl_mem_sptr tnearfar_mem_ptr = opencl_cache->alloc_mem(2*sizeof(float), tnearfar, "tnearfar buffer");
109 tnearfar_mem_ptr->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
110 //Visibility, Preinf, Norm, and input image buffers
111 auto* vis_buff = new float[cl_ni*cl_nj];
112 auto* pre_buff = new float[cl_ni*cl_nj];
113 auto* norm_buff = new float[cl_ni*cl_nj];
114 auto* input_buff=new float[cl_ni*cl_nj];
115 for (unsigned i=0;i<cl_ni*cl_nj;i++)
116 {
117 vis_buff[i]=1.0f;
118 pre_buff[i]=0.0f;
119 norm_buff[i]=0.0f;
120 }
121 //copy input vals into image
122 int count=0;
123 for (unsigned int j=0;j<cl_nj;++j) {
124 for (unsigned int i=0;i<cl_ni;++i) {
125 input_buff[count] = 0.0f;
126 if ( i<img_view->ni() && j< img_view->nj() )
127 input_buff[count] = (*img_view)(i,j);
128 ++count;
129 }
130 }
131 //bocl_mem_sptr in_image=new bocl_mem(device->context(),input_buff,cl_ni*cl_nj*sizeof(float),"input image buffer");
132 bocl_mem_sptr in_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), input_buff, "input image buffer");
133 in_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
134
135 //bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
136 bocl_mem_sptr vis_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), vis_buff, "vis image buffer");
137 vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
138
139 //bocl_mem_sptr pre_image=new bocl_mem(device->context(),pre_buff,cl_ni*cl_nj*sizeof(float),"pre image buffer");
140 bocl_mem_sptr pre_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), pre_buff, "pre image buffer");
141 pre_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
142
143 //bocl_mem_sptr norm_image=new bocl_mem(device->context(),norm_buff,cl_ni*cl_nj*sizeof(float),"pre image buffer");
144 bocl_mem_sptr norm_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), norm_buff, "norm image buffer");
145 norm_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
146
147 // Image Dimensions
148 int img_dim_buff[4];
149 img_dim_buff[0] = 0;
150 img_dim_buff[1] = 0;
151 img_dim_buff[2] = img_view->ni();
152 img_dim_buff[3] = img_view->nj();
153
154 bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
155 img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
156
157 // Output Array
158 float output_arr[100];
159 for (float & i : output_arr) i = 0.0f;
160 bocl_mem_sptr cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
161 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
162
163 // bit lookup buffer
164 cl_uchar lookup_arr[256];
165 boxm2_ocl_util::set_bit_lookup(lookup_arr);
166 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
167 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
168
169 // app density used for proc_norm_image
170 float app_buffer[4]={1.0,0.0,0.0,0.0};
171 bocl_mem_sptr app_density = new bocl_mem(device->context(), app_buffer, sizeof(cl_float4), "app density buffer");
172 app_density->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
173
174 // set arguments
175 std::vector<boxm2_block_id> vis_order;
176 //if(cam->type_name() == "vpgl_perspective_camera")
177 // vis_order= scene->get_vis_blocks_opt((vpgl_perspective_camera<double>*)cam.ptr(),img_view->ni(),img_view->nj());
178 //else
179 vis_order= scene->get_vis_blocks(cam);
180 std::vector<boxm2_block_id>::iterator id;
181 for (unsigned int i=0; i<kernels.size(); ++i)
182 {
183 if ( i == UPDATE_PROC ) {
184 bocl_kernel * proc_kern=kernels[i];
185
186 proc_kern->set_arg( norm_image.ptr() );
187 proc_kern->set_arg( vis_image.ptr() );
188 proc_kern->set_arg( pre_image.ptr());
189 proc_kern->set_arg( img_dim.ptr() );
190
191 //execute kernel
192 proc_kern->execute( queue, 2, local_threads, global_threads);
193 int status = clFinish(queue);
194 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
195 return false;
196 proc_kern->clear_args();
197 norm_image->read_to_buffer(queue);
198 vil_image_view<float> norm(img_view->ni(),img_view->nj());
199
200 int count=0;
201 for (unsigned int j=0;j<cl_nj;++j) {
202 for (unsigned int i=0;i<cl_ni;++i) {
203 if ( i<img_view->ni() && j< img_view->nj() )
204 norm(i,j) = norm_buff[count] ;
205 ++count;
206 }
207 }
208 continue;
209 }
210
211 //set masked values
212 vis_image->read_to_buffer(queue);
213 for (id = vis_order.begin(); id != vis_order.end(); ++id)
214 {
215 //choose correct render kernel
216 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
217 bocl_kernel* kern = kernels[i];
218
219 //write the image values to the buffer
220 vul_timer transfer;
221 bocl_mem* blk = opencl_cache->get_block(scene,*id);
222 bocl_mem* blk_info = opencl_cache->loaded_block_info();
223 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id,0,false);
224 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
225 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
226 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
227 blk_info->write_to_buffer((queue));
228
229 int nobsTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_NUM_OBS>::prefix());
230 int appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY>::prefix());
231 // data type string may contain an identifier so determine the buffer size
232 bocl_mem* mog = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*appTypeSize,false); //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
233 bocl_mem* num_obs = opencl_cache->get_data(scene,*id,num_obs_type,alpha->num_bytes()/alphaTypeSize*nobsTypeSize,false);//,info_buffer->data_buffer_length*boxm2_data_info::datasize(num_obs_type));
234
235 //grab an appropriately sized AUX data buffer
236 int auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix());
237 bocl_mem *aux0 = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX0>::prefix(view_ident),info_buffer->data_buffer_length*auxTypeSize,false);
238 auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX1>::prefix());
239 bocl_mem *aux1 = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX1>::prefix(view_ident),info_buffer->data_buffer_length*auxTypeSize,false);
240 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX2>::prefix());
241 bocl_mem *aux2 = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX2>::prefix(view_ident),info_buffer->data_buffer_length*auxTypeSize,false);
242
243 auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
244 bocl_mem *aux3 = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX3>::prefix(view_ident),info_buffer->data_buffer_length*auxTypeSize,false);
245
246 transfer_time += (float) transfer.all();
247 if (i==UPDATE_SEGLEN)
248 {
249 aux0->zero_gpu_buffer(queue);
250 aux1->zero_gpu_buffer(queue);
251 kern->set_arg( blk_info );
252 kern->set_arg( blk );
253 kern->set_arg( alpha );
254 kern->set_arg( aux0 );
255 kern->set_arg( aux1 );
256 kern->set_arg( lookup.ptr() );
257 kern->set_arg( ray_o_buff.ptr() );
258 kern->set_arg( ray_d_buff.ptr() );
259 kern->set_arg( tnearfar_mem_ptr.ptr() );
260 kern->set_arg( img_dim.ptr() );
261 kern->set_arg( in_image.ptr() );
262 kern->set_arg( cl_output.ptr() );
263 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
264 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar4) ); //ray bundle,
265 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) ); //cell pointers,
266 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_float4) ); //cached aux,
267 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
268 //execute kernel
269 kern->execute(queue, 2, local_threads, global_threads);
270 int status = clFinish(queue);
271 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
272 return false;
273 gpu_time += kern->exec_time();
274 //clear render kernel args so it can reset em on next execution
275 kern->clear_args();
276 aux0->read_to_buffer(queue);
277 aux1->read_to_buffer(queue);
278 }
279 else if (i==UPDATE_PREINF)
280 {
281
282 kern->set_arg( blk_info );
283 kern->set_arg( blk );
284 kern->set_arg( alpha );
285 kern->set_arg( mog );
286 kern->set_arg( num_obs );
287 kern->set_arg( aux0 );
288 kern->set_arg( aux1 );
289 kern->set_arg( lookup.ptr() );
290 kern->set_arg( ray_o_buff.ptr() );
291 kern->set_arg( ray_d_buff.ptr() );
292 kern->set_arg( tnearfar_mem_ptr.ptr() );
293 kern->set_arg( img_dim.ptr() );
294 kern->set_arg( vis_image.ptr() );
295 kern->set_arg( pre_image.ptr() );
296 kern->set_arg( cl_output.ptr() );
297 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
298 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
299 //execute kernel
300 kern->execute(queue, 2, local_threads, global_threads);
301 int status = clFinish(queue);
302 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
303 return false;
304 gpu_time += kern->exec_time();
305 //clear render kernel args so it can reset em on next execution
306 kern->clear_args();
307 //write info to disk
308 }
309 else if (i==UPDATE_BAYES)
310 {
311 aux2->zero_gpu_buffer(queue);
312 aux3->zero_gpu_buffer(queue);
313 kern->set_arg( blk_info );
314 kern->set_arg( blk );
315 kern->set_arg( alpha );
316 kern->set_arg( mog );
317 kern->set_arg( num_obs );
318 kern->set_arg( aux0 );
319 kern->set_arg( aux1 );
320 kern->set_arg( aux2 );
321 kern->set_arg( aux3 );
322 kern->set_arg( lookup.ptr() );
323 kern->set_arg( ray_o_buff.ptr() );
324 kern->set_arg( ray_d_buff.ptr() );
325 kern->set_arg( tnearfar_mem_ptr.ptr() );
326 kern->set_arg( img_dim.ptr() );
327 kern->set_arg( vis_image.ptr() );
328 kern->set_arg( pre_image.ptr() );
329 kern->set_arg( norm_image.ptr() );
330 kern->set_arg( cl_output.ptr() );
331 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
332 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_short2) ); //ray bundle,
333 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) ); //cell pointers,
334 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_float) ); //cached aux,
335 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
336 //execute kernel
337 kern->execute(queue, 2, local_threads, global_threads);
338 int status = clFinish(queue);
339 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
340 return false;
341 gpu_time += kern->exec_time();
342 //clear render kernel args so it can reset em on next execution
343 kern->clear_args();
344
345
346 }
347 else if (i==CONVERT_AUX_INT_FLOAT)
348 {
349 std::size_t ltr[2];
350 ltr[0] = 64;
351 ltr[1] = 1;
352 std::size_t gt[2];
353 gt[0] = RoundUp(info_buffer->data_buffer_length,ltr[0]);
354 gt[1] = 1;
355 kern->set_arg( blk_info );
356 kern->set_arg( aux0 );
357 kern->set_arg( aux1 );
358 kern->set_arg( aux2 );
359 kern->set_arg( aux3 );
360
361 //execute kernel
362 kern->execute(queue, 2, ltr, gt);
363 int status = clFinish(queue);
364 check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status));
365 gpu_time += kern->exec_time();
366
367 //clear render kernel args so it can reset em on next execution
368 kern->clear_args();
369 //write info to disk
370 aux0->read_to_buffer(queue);
371 aux1->read_to_buffer(queue);
372 aux2->read_to_buffer(queue);
373 aux3->read_to_buffer(queue);
374
375 opencl_cache->deep_remove_data(scene,*id,boxm2_data_traits<BOXM2_AUX0>::prefix(view_ident),true);
376 opencl_cache->deep_remove_data(scene,*id,boxm2_data_traits<BOXM2_AUX1>::prefix(view_ident),true);
377 opencl_cache->deep_remove_data(scene,*id,boxm2_data_traits<BOXM2_AUX2>::prefix(view_ident),true);
378 opencl_cache->deep_remove_data(scene,*id,boxm2_data_traits<BOXM2_AUX3>::prefix(view_ident),true);
379
380 }
381 //read image out to buffer (from gpu)
382 in_image->read_to_buffer(queue);
383 vis_image->read_to_buffer(queue);
384 pre_image->read_to_buffer(queue);
385 cl_output->read_to_buffer(queue);
386 clFinish(queue);
387 }
388 }
389
390 delete [] vis_buff;
391 delete [] pre_buff;
392 delete [] norm_buff;
393 delete [] input_buff;
394 delete [] ray_origins;
395 delete [] ray_directions;
396 opencl_cache->unref_mem(in_image.ptr());
397 opencl_cache->unref_mem(vis_image.ptr());
398 opencl_cache->unref_mem(pre_image.ptr());
399 opencl_cache->unref_mem(norm_image.ptr());
400 opencl_cache->unref_mem(ray_o_buff.ptr());
401 opencl_cache->unref_mem(ray_d_buff.ptr());
402 opencl_cache->unref_mem(tnearfar_mem_ptr.ptr());
403 std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
404 clReleaseCommandQueue(queue);
405 return true;
406 }
407
408
409 //Returns vector of color update kernels (and caches them per device
get_kernels(const bocl_device_sptr & device,const std::string & opts)410 std::vector<bocl_kernel*>& boxm2_ocl_update_auxQ::get_kernels(const bocl_device_sptr& device, const std::string& opts)
411 {
412 // compile kernels if not already compiled
413 std::string identifier = device->device_identifier() + opts;
414 if (kernels_.find(identifier) != kernels_.end())
415 return kernels_[identifier];
416
417 //otherwise compile the kernels
418 std::cout<<"=== boxm2_ocl_update_process::compiling kernels on device "<<identifier<<"==="<<std::endl;
419
420 std::vector<std::string> src_paths;
421 std::string source_dir = boxm2_ocl_util::ocl_src_root();
422 src_paths.push_back(source_dir + "scene_info.cl");
423 src_paths.push_back(source_dir + "pixel_conversion.cl");
424 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
425 src_paths.push_back(source_dir + "backproject.cl");
426 src_paths.push_back(source_dir + "atomics_util.cl");
427 src_paths.push_back(source_dir + "statistics_library_functions.cl");
428 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
429 src_paths.push_back(source_dir + "bit/update_kernels.cl");
430 std::vector<std::string> non_ray_src = std::vector<std::string>(src_paths);
431
432 src_paths.push_back(source_dir + "update_functors.cl");
433 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
434
435 //compilation options
436 const std::string& options = /*"-D ATOMIC_FLOAT " +*/ opts;
437
438 //populate vector of kernels
439 std::vector<bocl_kernel*> vec_kernels;
440
441 //seg len pass
442 auto* seg_len = new bocl_kernel();
443 std::string seg_opts = options + " -D SEGLEN -D STEP_CELL=step_cell_seglen(aux_args,data_ptr,llid,d)";
444 seg_len->create_kernel(&device->context(), device->device_id(), src_paths, "seg_len_main", seg_opts, "update::seg_len");
445 vec_kernels.push_back(seg_len);
446
447
448 auto* pre_inf = new bocl_kernel();
449 std::string pre_opts = options + " -D PREINF -D STEP_CELL=step_cell_preinf(aux_args,data_ptr,llid,d)";
450 pre_inf->create_kernel(&device->context(), device->device_id(), src_paths, "pre_inf_main", pre_opts, "update::pre_inf");
451 vec_kernels.push_back(pre_inf);
452
453 //may need DIFF LIST OF SOURCES FOR THIS GUY
454 auto* proc_img = new bocl_kernel();
455 std::string proc_opts = options + " -D PROC_NORM ";
456 proc_img->create_kernel(&device->context(), device->device_id(), non_ray_src, "proc_norm_image", proc_opts, "update::proc_norm_image");
457 vec_kernels.push_back(proc_img);
458
459 //push back cast_ray_bit
460 auto* bayes_main = new bocl_kernel();
461 std::string bayes_opt = options + " -D BAYES -D STEP_CELL=step_cell_bayes(aux_args,data_ptr,llid,d)";
462 bayes_main->create_kernel(&device->context(), device->device_id(), src_paths, "bayes_main", bayes_opt, "update::bayes_main");
463 vec_kernels.push_back(bayes_main);
464
465 std::vector<std::string> src_paths_4;
466 src_paths_4.push_back(source_dir + "scene_info.cl");
467 src_paths_4.push_back(source_dir + "bit/batch_update_kernels.cl");
468 //convert aux buffer int values to float (just divide by SEGLENFACTOR
469 auto* convert_aux_int_float = new bocl_kernel();
470 convert_aux_int_float->create_kernel(&device->context(),device->device_id(), src_paths_4, "convert_aux_int_to_float", opts+" -D CONVERT_AUX ", "batch_update::convert_aux_int_to_float");
471 vec_kernels.push_back(convert_aux_int_float);
472
473 //store and return
474 kernels_[identifier] = vec_kernels;
475 return kernels_[identifier];
476 }
477 //makes sure appearance types correspond correctly
validate_appearances(const boxm2_scene_sptr & scene,std::string & data_type,int & appTypeSize,std::string & num_obs_type,std::string & options,bool &)478 bool boxm2_ocl_update_auxQ::validate_appearances(const boxm2_scene_sptr& scene,
479 std::string& data_type,
480 int& appTypeSize,
481 std::string& num_obs_type,
482 std::string& options,
483 bool& /*isRGB*/)
484 {
485 std::vector<std::string> apps = scene->appearances();
486 bool foundDataType = false, foundNumObsType = false;
487 for (const auto & app : apps) {
488 if ( app == boxm2_data_traits<BOXM2_MOG3_GREY>::prefix() )
489 {
490 data_type = app;
491 foundDataType = true;
492 options=" -D MOG_TYPE_8";
493 appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY>::prefix());
494 }
495 else if ( app == boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix() )
496 {
497 data_type = app;
498 foundDataType = true;
499 options=" -D MOG_TYPE_16";
500 appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix());
501 }
502 else if ( app == boxm2_data_traits<BOXM2_NUM_OBS>::prefix() )
503 {
504 num_obs_type = app;
505 foundNumObsType = true;
506 }
507 }
508 if (!foundDataType) {
509 std::cout<<"BOXM2_OPENCL_UPDATE_PROCESS ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
510 return false;
511 }
512 if (!foundNumObsType) {
513 std::cout<<"BOXM2_OPENCL_UPDATE_PROCESS ERROR: scene doesn't have BOXM2_NUM_OBS type"<<std::endl;
514 return false;
515 }
516 return true;
517 }
518
519
520 std::map<std::string, std::vector<bocl_kernel*> > boxm2_ocl_update_PusingQ::kernels_;
521
init_product(boxm2_scene_sptr scene,const boxm2_cache_sptr & cache)522 bool boxm2_ocl_update_PusingQ::init_product(boxm2_scene_sptr scene, const boxm2_cache_sptr& cache)
523 {
524 std::vector<boxm2_block_id> vis_order = scene->get_block_ids();
525 std::vector<boxm2_block_id>::iterator id;
526 for (id = vis_order.begin(); id != vis_order.end(); ++id)
527 {
528 boxm2_data_base * aux3 = cache->get_data_base(scene, *id,boxm2_data_traits<BOXM2_AUX3>::prefix(),0,false);
529 auto * aux3_data = reinterpret_cast<boxm2_data_traits<BOXM2_AUX3>::datatype*> ( aux3->data_buffer());
530 std::fill_n(aux3_data,aux3->buffer_length()/boxm2_data_traits<BOXM2_AUX3>::datasize(),1);
531 cache->remove_data_base(scene,*id,boxm2_data_traits<BOXM2_AUX3>::prefix());
532 }
533 return true;
534 }
535
accumulate_product(const boxm2_scene_sptr & scene,const bocl_device_sptr & device,const boxm2_opencl_cache_sptr & opencl_cache,const std::string & identifier)536 bool boxm2_ocl_update_PusingQ::accumulate_product(const boxm2_scene_sptr& scene,
537 const bocl_device_sptr& device,
538 const boxm2_opencl_cache_sptr& opencl_cache,
539 const std::string& identifier)
540 {
541 float transfer_time=0.0f;
542 float gpu_time=0.0f;
543 //cache size sanity check
544 std::size_t binCache = opencl_cache.ptr()->bytes_in_cache();
545 std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
546 // create a command queue.
547 int status=0;
548 cl_command_queue queue = clCreateCommandQueue( device->context(),*(device->device_id()),CL_QUEUE_PROFILING_ENABLE,&status);
549 if (status!=0)
550 return false;
551
552 // compile the kernel if not already compiled
553 std::vector<bocl_kernel*>& kernels = get_kernels(device,"");
554 // bit lookup buffer
555 cl_uchar lookup_arr[256];
556 boxm2_ocl_util::set_bit_lookup(lookup_arr);
557 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
558 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
559 // set arguments
560 std::vector<boxm2_block_id> vis_order = scene->get_block_ids();
561 std::vector<boxm2_block_id>::iterator id;
562 bocl_kernel * kern=kernels[0];
563 for (id = vis_order.begin(); id != vis_order.end(); ++id)
564 {
565 //choose correct render kernel
566 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
567 //write the image values to the buffer
568 vul_timer transfer;
569 bocl_mem* blk = opencl_cache->get_block(scene,*id);
570 bocl_mem* blk_info = opencl_cache->loaded_block_info();
571 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id,0,false);
572 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
573 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
574 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
575 blk_info->write_to_buffer((queue));
576 //grab an appropriately sized AUX data buffer
577 int auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
578 bocl_mem *aux3_curr = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX3>::prefix(identifier+"_curr"),info_buffer->data_buffer_length*auxTypeSize,false);
579 bocl_mem *aux3_prev = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX3>::prefix(identifier+"_prev"),info_buffer->data_buffer_length*auxTypeSize,false);
580 bocl_mem *aux0_curr = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX0>::prefix(identifier+"_curr"),info_buffer->data_buffer_length*auxTypeSize,false);
581 transfer_time += (float) transfer.all();
582 //set workspace
583 std::size_t ltr[] = {64};
584 std::size_t gtr[] = { RoundUp(info_buffer->data_buffer_length, ltr[0]) };
585 kern->set_arg( blk_info );
586 kern->set_arg( aux3_prev );
587 kern->set_arg( aux3_curr );
588 kern->set_arg( aux0_curr );
589 kern->set_arg( alpha );
590 //execute kernel
591 kern->execute(queue, 1, ltr, gtr);
592 int status = clFinish(queue);
593 check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status));
594 gpu_time += kern->exec_time();
595 //clear render kernel args so it can reset em on next execution
596 kern->clear_args();
597 aux3_prev->read_to_buffer(queue);
598 alpha->read_to_buffer(queue);
599 opencl_cache->deep_remove_data(scene,*id,boxm2_data_traits<BOXM2_AUX3>::prefix(identifier+"_prev"),true);
600 opencl_cache->deep_remove_data(scene,*id,boxm2_data_traits<BOXM2_AUX3>::prefix(identifier+"_curr"),false);
601 }
602 //read image out to buffer (from gpu)
603 clFinish(queue);
604 std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
605 clReleaseCommandQueue(queue);
606 return true;
607 }
compute_probability(const boxm2_scene_sptr & scene,const bocl_device_sptr & device,const boxm2_opencl_cache_sptr & opencl_cache)608 bool boxm2_ocl_update_PusingQ::compute_probability(const boxm2_scene_sptr& scene,
609 const bocl_device_sptr& device,
610 const boxm2_opencl_cache_sptr& opencl_cache)
611
612 {
613 float transfer_time=0.0f;
614 float gpu_time=0.0f;
615 //cache size sanity check
616 std::size_t binCache = opencl_cache.ptr()->bytes_in_cache();
617 std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
618 // create a command queue.
619 int status=0;
620 cl_command_queue queue = clCreateCommandQueue( device->context(),*(device->device_id()),CL_QUEUE_PROFILING_ENABLE,&status);
621 if (status!=0)
622 return false;
623
624
625
626 // compile the kernel if not already compiled
627 std::vector<bocl_kernel*>& kernels = get_kernels(device,"");
628 // bit lookup buffer
629 cl_uchar lookup_arr[256];
630 boxm2_ocl_util::set_bit_lookup(lookup_arr);
631 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
632 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
633 // set arguments
634 std::vector<boxm2_block_id> vis_order = scene->get_block_ids();
635 std::vector<boxm2_block_id>::iterator id;
636 bocl_kernel * kern=kernels[1];
637 for (id = vis_order.begin(); id != vis_order.end(); ++id)
638 {
639 //choose correct render kernel
640 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
641 float pinit_buf[1];
642 pinit_buf[0] = mdata.p_init_;
643 bocl_mem * pinit=new bocl_mem(device->context(), pinit_buf, sizeof(float), "pinit");
644 pinit->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
645 //write the image values to the buffer
646 vul_timer transfer;
647 bocl_mem* blk = opencl_cache->get_block(scene,*id);
648 bocl_mem* blk_info = opencl_cache->loaded_block_info();
649 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id,0,false);
650 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
651 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
652 // check for invalid parameters
653 if( alphaTypeSize == 0 ) //This should never happen, it will result in division by zero later
654 {
655 std::cout << "ERROR: alphaTypeSize == 0 in " << __FILE__ << __LINE__ << std::endl;
656 return false;
657 }
658
659 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
660 blk_info->write_to_buffer((queue));
661 //grab an appropriately sized AUX data buffer
662 int auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
663 bocl_mem *aux3_product = opencl_cache->get_data(scene,*id, boxm2_data_traits<BOXM2_AUX3>::prefix(),info_buffer->data_buffer_length*auxTypeSize,true);
664 transfer_time += (float) transfer.all();
665
666 //set workspace
667 std::size_t ltr[] = {4, 4, 4};
668 std::size_t gtr[] = { RoundUp(mdata.sub_block_num_.x(), ltr[0]),
669 RoundUp(mdata.sub_block_num_.y(), ltr[1]),
670 RoundUp(mdata.sub_block_num_.z(), ltr[2])};
671
672 kern->set_arg( blk_info );
673 kern->set_arg( blk );
674 kern->set_arg( alpha );
675 kern->set_arg( aux3_product );
676 kern->set_arg( pinit );
677 kern->set_arg( lookup.ptr() );
678 kern->set_local_arg( ltr[0]*ltr[1]*ltr[2]*10*sizeof(cl_uchar) );
679 kern->set_local_arg( ltr[0]*ltr[1]*ltr[2]*sizeof(cl_uchar16) );
680 //execute kernel
681 kern->execute(queue, 3, ltr, gtr);
682 int status = clFinish(queue);
683 check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status));
684 gpu_time += kern->exec_time();
685
686 //clear render kernel args so it can reset em on next execution
687 kern->clear_args();
688
689 alpha->read_to_buffer(queue);
690 clFinish(queue);
691 //pinit->release_memory();
692
693 //delete pinit;
694 //opencl_cache->deep_remove_data(*id,boxm2_data_traits<BOXM2_ALPHA>::prefix(),true);
695 }
696 //read image out to buffer (from gpu)
697 clFinish(queue);
698 std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
699 clReleaseCommandQueue(queue);
700
701 return true;
702 }
703
get_kernels(const bocl_device_sptr & device,const std::string & opts)704 std::vector<bocl_kernel*>& boxm2_ocl_update_PusingQ::get_kernels(const bocl_device_sptr& device, const std::string& opts)
705 {
706 // compile kernels if not already compiled
707 std::string identifier = device->device_identifier() + opts;
708 if (kernels_.find(identifier) != kernels_.end())
709 return kernels_[identifier];
710
711 //otherwise compile the kernels
712 std::cout<<"=== boxm2_ocl_update_auxQ_process::compiling kernels on device "<<identifier<<"==="<<std::endl;
713 std::vector<std::string> src_paths;
714 std::string source_dir = boxm2_ocl_util::ocl_src_root();
715 src_paths.push_back(source_dir + "scene_info.cl");
716 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
717 src_paths.push_back(source_dir + "bit/update_kernels.cl");
718 std::vector<std::string> non_ray_src = std::vector<std::string>(src_paths);
719
720 //populate vector of kernels
721 std::vector<bocl_kernel*> vec_kernels;
722
723
724 //push back cast_ray_bit
725 auto* apply_beta = new bocl_kernel();
726 std::string apply_beta_opts = opts + " -D APPLYBETA";
727 apply_beta->create_kernel(&device->context(), device->device_id(), non_ray_src, "apply_beta", apply_beta_opts, "update::apply_beta");
728 vec_kernels.push_back(apply_beta);
729
730 auto* compute_product_Q = new bocl_kernel();
731 std::string product_q = opts + " -D PRODUCTQ";
732 compute_product_Q->create_kernel(&device->context(), device->device_id(), non_ray_src, "compute_product_Q", product_q, "update::compute_product_Q");
733 vec_kernels.push_back(compute_product_Q);
734 auto* update_P = new bocl_kernel();
735 std::string update_q_opts = opts + " -D UPDATEP";
736 update_P->create_kernel(&device->context(), device->device_id(), non_ray_src, "update_P_using_Q", update_q_opts, "update::update_P_using_Q");
737 vec_kernels.push_back(update_P);
738
739 //store and return
740 kernels_[identifier] = vec_kernels;
741 return kernels_[identifier];
742 }
743