1 // This is brl/bseg/boxm2/ocl/algo/boxm2_ocl_update_based_dispersion.cxx
2 #include <fstream>
3 #include <iostream>
4 #include <algorithm>
5 #include "boxm2_ocl_update_based_dispersion.h"
6 //:
7 // \file
8 // \brief A process for updating a color model
9 //
10 // \author Vishal Jain
11 // \date Mar 25, 2011
12
13 #ifdef _MSC_VER
14 # include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/boxm2_util.h>
22 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
23 #include "vil/vil_image_view.h"
24 #include "vil/vil_save.h"
25
26 //directory utility
27 #include "vul/vul_timer.h"
28 #include <vcl_where_root_dir.h>
29 #include <bocl/bocl_device.h>
30 #include <bocl/bocl_kernel.h>
31 #include <boct/boct_bit_tree.h>
32 #include "vnl/vnl_numeric_traits.h"
33
34 //: Map of kernels should persist between process executions
35 std::map<std::string,std::vector<bocl_kernel*> > boxm2_ocl_update_based_dispersion::kernels_;
36
37 //Main public method, updates color model
update(const boxm2_scene_sptr & scene,bocl_device_sptr device,const boxm2_opencl_cache_sptr & opencl_cache,vpgl_camera_double_sptr cam,const vil_image_view_base_sptr & img,const std::string & ident,const vil_image_view_base_sptr & mask_sptr,bool update_alpha,float mog_var,bool update_app,float resnearfactor,float resfarfactor,std::size_t startI,std::size_t startJ)38 bool boxm2_ocl_update_based_dispersion::update(const boxm2_scene_sptr& scene,
39 bocl_device_sptr device,
40 const boxm2_opencl_cache_sptr& opencl_cache,
41 vpgl_camera_double_sptr cam,
42 const vil_image_view_base_sptr& img,
43 const std::string& ident,
44 const vil_image_view_base_sptr& mask_sptr,
45 bool update_alpha,
46 float mog_var,
47 bool update_app,
48 float resnearfactor,
49 float resfarfactor,
50 std::size_t startI,
51 std::size_t startJ)
52 {
53 enum {
54 UPDATE_SEGLEN = 0,
55 UPDATE_PREINF = 1,
56 UPDATE_PROC = 2,
57 UPDATE_BAYES = 3,
58 UPDATE_CELL_BASED_DISPERSION = 4
59 };
60 float transfer_time=0.0f;
61 float gpu_time=0.0f;
62 std::size_t local_threads[2]={8,8};
63 std::size_t global_threads[2]={8,8};
64
65 //catch a "null" mask (not really null because that throws an error)
66 bool use_mask = false;
67 if ( mask_sptr->ni() == img->ni() && mask_sptr->nj() == img->nj() ) {
68 std::cout<<"Update using mask."<<std::endl;
69 use_mask = true;
70 }
71 vil_image_view<unsigned char >* mask_map = nullptr;
72 if (use_mask) {
73 mask_map = dynamic_cast<vil_image_view<unsigned char> *>(mask_sptr.ptr());
74 if (!mask_map) {
75 std::cout<<"boxm2_update_process:: mask map is not an unsigned char map"<<std::endl;
76 return false;
77 }
78 }
79
80 //cache size sanity check
81 std::size_t binCache = opencl_cache.ptr()->bytes_in_cache();
82 std::cout<<"Update MBs in cache: "<<binCache/(1024.0*1024.0)<<std::endl;
83
84 //make correct data types are here
85 std::string data_type, num_obs_type,options;
86 int appTypeSize;
87 bool isRGB = false;
88 if (!validate_appearances(scene, data_type, appTypeSize, num_obs_type, options, isRGB))
89 return false;
90 if (ident.size() > 0) {
91 data_type += "_" + ident;
92 num_obs_type += "_" + ident;
93 }
94
95
96 // create a command queue.
97 int status=0;
98 cl_command_queue queue = clCreateCommandQueue( device->context(),
99 *(device->device_id()),
100 CL_QUEUE_PROFILING_ENABLE,
101 &status);
102 if (status!=0)
103 return false;
104
105 // compile the kernel if not already compiled
106 std::vector<bocl_kernel*>& kernels = get_kernels(device, options);
107
108 //grab input image, establish cl_ni, cl_nj (so global size is divisible by local size)
109 vil_image_view_base_sptr float_img = boxm2_util::prepare_input_image(img, true);
110 auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
111 auto cl_ni=(unsigned)RoundUp(img_view->ni(),(int)local_threads[0]);
112 auto cl_nj=(unsigned)RoundUp(img_view->nj(),(int)local_threads[1]);
113 global_threads[0]=cl_ni;
114 global_threads[1]=cl_nj;
115
116 //set generic cam
117 auto* ray_origins = new cl_float[4*cl_ni*cl_nj];
118 auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
119 bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins, "ray_origins buffer");
120 bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions, "ray_directions buffer");
121 boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff, startI, startJ);
122
123 float ray_origin[4];
124 ray_origin[0] = ray_origins[0];
125 ray_origin[1] = ray_origins[1];
126 ray_origin[2] = ray_origins[2];
127 ray_origin[3] = 0.0f;
128 bocl_mem_sptr ray_origin_ptr = opencl_cache->alloc_mem(4*sizeof(float), ray_origin, "global ray origin buffer");
129 ray_origin_ptr->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
130 float tnearfar[2] = { 0.0f, 1000000} ;
131 if(cam->type_name() == "vpgl_perspective_camera")
132 {
133
134 float f = ((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().focal_length()*((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().x_scale();
135 tnearfar[0] = f* scene->finest_resolution()/resnearfactor ;
136 tnearfar[1] = f* scene->finest_resolution()/resfarfactor ;
137 std::cout<<"Near and Far Clipping planes "<<tnearfar[0]<<" "<<tnearfar[1]<<std::endl;
138 }
139 bocl_mem_sptr tnearfar_mem_ptr = opencl_cache->alloc_mem(2*sizeof(float), tnearfar, "tnearfar buffer");
140 tnearfar_mem_ptr->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
141 //Visibility, Preinf, Norm, and input image buffers
142 auto* vis_buff = new float[cl_ni*cl_nj];
143 auto* pre_buff = new float[cl_ni*cl_nj];
144 auto* norm_buff = new float[cl_ni*cl_nj];
145 auto* input_buff=new float[cl_ni*cl_nj];
146 for (unsigned i=0;i<cl_ni*cl_nj;i++)
147 {
148 vis_buff[i]=1.0f;
149 pre_buff[i]=0.0f;
150 norm_buff[i]=0.0f;
151 }
152 //copy input vals into image
153 int count=0;
154 for (unsigned int j=0;j<cl_nj;++j) {
155 for (unsigned int i=0;i<cl_ni;++i) {
156 input_buff[count] = 0.0f;
157 if ( i<img_view->ni() && j< img_view->nj() )
158 input_buff[count] = (*img_view)(i,j);
159 ++count;
160 }
161 }
162
163
164 //bocl_mem_sptr in_image=new bocl_mem(device->context(),input_buff,cl_ni*cl_nj*sizeof(float),"input image buffer");
165 bocl_mem_sptr in_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), input_buff, "input image buffer");
166 in_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
167
168 //bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
169 bocl_mem_sptr vis_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), vis_buff, "vis image buffer");
170 vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
171
172 //bocl_mem_sptr pre_image=new bocl_mem(device->context(),pre_buff,cl_ni*cl_nj*sizeof(float),"pre image buffer");
173 bocl_mem_sptr pre_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), pre_buff, "pre image buffer");
174 pre_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
175
176 //bocl_mem_sptr norm_image=new bocl_mem(device->context(),norm_buff,cl_ni*cl_nj*sizeof(float),"pre image buffer");
177 bocl_mem_sptr norm_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), norm_buff, "norm image buffer");
178 norm_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
179
180 // Image Dimensions
181 int img_dim_buff[4];
182 img_dim_buff[0] = 0;
183 img_dim_buff[1] = 0;
184 img_dim_buff[2] = img_view->ni();
185 img_dim_buff[3] = img_view->nj();
186
187 bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
188 img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
189 bocl_mem_sptr centerX = new bocl_mem(device->context(), boct_bit_tree::centerX, sizeof(cl_float)*585, "centersX lookup buffer");
190 bocl_mem_sptr centerY = new bocl_mem(device->context(), boct_bit_tree::centerY, sizeof(cl_float)*585, "centersY lookup buffer");
191 bocl_mem_sptr centerZ = new bocl_mem(device->context(), boct_bit_tree::centerZ, sizeof(cl_float)*585, "centersZ lookup buffer");
192 centerX->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
193 centerY->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
194 centerZ->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
195 // Output Array
196 float output_arr[100];
197 for (float & i : output_arr) i = 0.0f;
198 bocl_mem_sptr cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
199 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
200
201 // bit lookup buffer
202 cl_uchar lookup_arr[256];
203 boxm2_ocl_util::set_bit_lookup(lookup_arr);
204 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
205 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
206
207 // app density used for proc_norm_image
208 float app_buffer[4]={1.0,0.0,0.0,0.0};
209 bocl_mem_sptr app_density = new bocl_mem(device->context(), app_buffer, sizeof(cl_float4), "app density buffer");
210 app_density->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
211
212 // set arguments
213 std::vector<boxm2_block_id> vis_order;
214 if(cam->type_name() == "vpgl_perspective_camera")
215 vis_order= scene->get_vis_blocks_opt((vpgl_perspective_camera<double>*)cam.ptr(),img_view->ni(),img_view->nj());
216 else
217 vis_order= scene->get_vis_blocks(cam);
218 std::vector<boxm2_block_id>::iterator id;
219 for (unsigned int i=0; i<kernels.size(); ++i)
220 {
221 if ( i == UPDATE_PROC ) {
222 bocl_kernel * proc_kern=kernels[i];
223 proc_kern->set_arg( norm_image.ptr() );
224 proc_kern->set_arg( in_image.ptr() );
225 proc_kern->set_arg( vis_image.ptr() );
226 proc_kern->set_arg( pre_image.ptr());
227 proc_kern->set_arg( img_dim.ptr() );
228 proc_kern->set_arg( app_density.ptr() );
229 //execute kernel
230 proc_kern->execute( queue, 2, local_threads, global_threads);
231 int status = clFinish(queue);
232 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
233 return false;
234 proc_kern->clear_args();
235 norm_image->read_to_buffer(queue);
236
237 }
238
239 //set masked values
240 vis_image->read_to_buffer(queue);
241 if (use_mask)
242 {
243 int count = 0;
244 for (unsigned int j=0;j<cl_nj;++j) {
245 for (unsigned int i=0;i<cl_ni;++i) {
246 if ( i<mask_map->ni() && j<mask_map->nj() ) {
247 if ( (*mask_map)(i,j)==0 ) {
248 input_buff[count] = -1.0f;
249 vis_buff [count] = 0.0f;
250 }
251 }
252 ++count;
253 }
254 }
255 in_image->write_to_buffer(queue);
256 vis_image->write_to_buffer(queue);
257 clFinish(queue);
258 }
259
260 for (id = vis_order.begin(); id != vis_order.end(); ++id)
261 {
262 //choose correct render kernel
263 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
264 bocl_kernel* kern = kernels[i];
265
266 //write the image values to the buffer
267 vul_timer transfer;
268 bocl_mem* blk = opencl_cache->get_block(scene,*id);
269 bocl_mem* blk_info = opencl_cache->loaded_block_info();
270 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id,0,false);
271 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
272 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
273 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
274 blk_info->write_to_buffer((queue));
275
276 int nobsTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_NUM_OBS>::prefix());
277 // data type string may contain an identifier so determine the buffer size
278 bocl_mem* mog = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*appTypeSize,false); //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
279 bocl_mem* num_obs = opencl_cache->get_data(scene,*id,num_obs_type,alpha->num_bytes()/alphaTypeSize*nobsTypeSize,false);//,info_buffer->data_buffer_length*boxm2_data_info::datasize(num_obs_type));
280
281 //grab an appropriately sized AUX data buffer
282 int auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix());
283 bocl_mem *aux0 = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
284 auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX1>::prefix());
285 bocl_mem *aux1 = opencl_cache->get_data<BOXM2_AUX1>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
286
287 transfer_time += (float) transfer.all();
288 if (i==UPDATE_SEGLEN)
289 {
290 aux0->zero_gpu_buffer(queue);
291 aux1->zero_gpu_buffer(queue);
292 kern->set_arg( blk_info );
293 kern->set_arg( blk );
294 kern->set_arg( alpha );
295 kern->set_arg( aux0 );
296 kern->set_arg( aux1 );
297 kern->set_arg( lookup.ptr() );
298
299 // kern->set_arg( persp_cam.ptr() );
300 kern->set_arg( ray_o_buff.ptr() );
301 kern->set_arg( ray_d_buff.ptr() );
302 kern->set_arg( tnearfar_mem_ptr.ptr() );
303 kern->set_arg( img_dim.ptr() );
304 kern->set_arg( in_image.ptr() );
305 kern->set_arg( cl_output.ptr() );
306 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
307 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar4) ); //ray bundle,
308 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) ); //cell pointers,
309 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_float4) ); //cached aux,
310 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
311
312 //execute kernel
313 kern->execute(queue, 2, local_threads, global_threads);
314 int status = clFinish(queue);
315 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
316 return false;
317 gpu_time += kern->exec_time();
318
319 //clear render kernel args so it can reset em on next execution
320 kern->clear_args();
321
322 aux0->read_to_buffer(queue);
323 aux1->read_to_buffer(queue);
324 }
325 else if (i==UPDATE_PREINF)
326 {
327 kern->set_arg( blk_info );
328 kern->set_arg( blk );
329 kern->set_arg( alpha );
330 kern->set_arg( mog );
331 kern->set_arg( num_obs );
332 kern->set_arg( aux0 );
333 kern->set_arg( aux1 );
334 kern->set_arg( lookup.ptr() );
335 kern->set_arg( ray_o_buff.ptr() );
336 kern->set_arg( ray_d_buff.ptr() );
337 kern->set_arg( tnearfar_mem_ptr.ptr() );
338 kern->set_arg( img_dim.ptr() );
339 kern->set_arg( vis_image.ptr() );
340 kern->set_arg( pre_image.ptr() );
341 kern->set_arg( cl_output.ptr() );
342 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
343 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
344 //execute kernel
345 kern->execute(queue, 2, local_threads, global_threads);
346 int status = clFinish(queue);
347 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
348 return false;
349 gpu_time += kern->exec_time();
350
351 //clear render kernel args so it can reset em on next execution
352 kern->clear_args();
353
354 //write info to disk
355 }
356 else if (i==UPDATE_BAYES)
357 {
358 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX2>::prefix());
359 bocl_mem *aux2 = opencl_cache->get_data<BOXM2_AUX2>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
360 aux2->zero_gpu_buffer(queue);
361 auxTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
362 bocl_mem *aux3 = opencl_cache->get_data<BOXM2_AUX3>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
363 aux3->zero_gpu_buffer(queue);
364
365 kern->set_arg( blk_info );
366 kern->set_arg( blk );
367 kern->set_arg( alpha );
368 kern->set_arg( mog );
369 kern->set_arg( num_obs );
370 kern->set_arg( aux0 );
371 kern->set_arg( aux1 );
372 kern->set_arg( aux2 );
373 kern->set_arg( aux3 );
374 kern->set_arg( lookup.ptr() );
375 kern->set_arg( ray_o_buff.ptr() );
376 kern->set_arg( ray_d_buff.ptr() );
377 kern->set_arg( tnearfar_mem_ptr.ptr() );
378 kern->set_arg( img_dim.ptr() );
379 kern->set_arg( vis_image.ptr() );
380 kern->set_arg( pre_image.ptr() );
381 kern->set_arg( norm_image.ptr() );
382 kern->set_arg( cl_output.ptr() );
383 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
384 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_short2) ); //ray bundle,
385 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) ); //cell pointers,
386 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_float) ); //cached aux,
387 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
388 //execute kernel
389 kern->execute(queue, 2, local_threads, global_threads);
390 int status = clFinish(queue);
391 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
392 return false;
393 gpu_time += kern->exec_time();
394
395 //clear render kernel args so it can reset em on next execution
396 kern->clear_args();
397 aux2->read_to_buffer(queue);
398 aux3->read_to_buffer(queue);
399 }
400 else if (i==UPDATE_CELL_BASED_DISPERSION)
401 {
402 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX2>::prefix());
403 bocl_mem *aux2 = opencl_cache->get_data<BOXM2_AUX2>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
404
405 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX3>::prefix());
406 bocl_mem *aux3 = opencl_cache->get_data<BOXM2_AUX3>(scene,*id, info_buffer->data_buffer_length*auxTypeSize);
407
408 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("direction_expectation_x"));
409 bocl_mem *aux0_dir_x = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "direction_expectation_x");
410
411 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("direction_expectation_y"));
412 bocl_mem *aux0_dir_y = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "direction_expectation_y");
413
414 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("direction_expectation_z"));
415 bocl_mem *aux0_dir_z = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "direction_expectation_z");
416
417 auxTypeSize = boxm2_data_info::datasize(boxm2_data_traits<BOXM2_AUX0>::prefix("visibility_expectation"));
418 bocl_mem *aux0_vis_exp = opencl_cache->get_data<BOXM2_AUX0>(scene,*id, info_buffer->data_buffer_length*auxTypeSize,false, "visibility_expectation");
419
420 // update_alpha boolean buffer
421 cl_int up_alpha[1];
422 up_alpha[0] = update_alpha ? 1 : 0;
423 bocl_mem_sptr up_alpha_mem = new bocl_mem(device->context(), up_alpha, sizeof(up_alpha), "update alpha bool buffer");
424 up_alpha_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
425
426 // update_app boolean buffer
427 cl_int up_app[1];
428 up_app[0] = update_app ? 1 : 0;
429 bocl_mem_sptr up_app_mem = new bocl_mem(device->context(), up_app, sizeof(up_app), "update app bool buffer");
430 up_app_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
431 //mog variance, if 0.0f or less, then var will be learned
432 bocl_mem_sptr mog_var_mem = new bocl_mem(device->context(), &mog_var, sizeof(mog_var), "update gauss variance");
433 mog_var_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
434
435 local_threads[0] = 64;
436 local_threads[1] = 1 ;
437 global_threads[0] = (unsigned) RoundUp(mdata.sub_block_num_.x()*mdata.sub_block_num_.y()*mdata.sub_block_num_.z(),(int)local_threads[0]);
438 global_threads[1]=1;
439
440 kern->set_arg( centerX.ptr() );
441 kern->set_arg( centerY.ptr() );
442 kern->set_arg( centerZ.ptr() );
443 kern->set_arg( lookup.ptr() );
444 kern->set_arg( blk_info );
445 kern->set_arg( blk );
446 kern->set_arg( alpha );
447 kern->set_arg( mog );
448 kern->set_arg( num_obs );
449 kern->set_arg( aux0 );
450 kern->set_arg( aux1 );
451 kern->set_arg( aux2 );
452 kern->set_arg( aux3 );
453 kern->set_arg( aux0_dir_x );
454 kern->set_arg( aux0_dir_y );
455 kern->set_arg( aux0_dir_z );
456 kern->set_arg( aux0_vis_exp );
457 kern->set_arg( ray_origin_ptr.ptr() );
458
459 kern->set_arg( up_alpha_mem.ptr() );
460 kern->set_arg( mog_var_mem.ptr() );
461 kern->set_arg( up_app_mem.ptr() );
462 kern->set_arg( cl_output.ptr() );
463 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) ); //cumsum buffer, imindex buffer
464 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );//local tree,
465 //execute kernel
466 kern->execute(queue, 2, local_threads, global_threads);
467 int status = clFinish(queue);
468 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
469 return false;
470 gpu_time += kern->exec_time();
471
472 //clear render kernel args so it can reset em on next execution
473 kern->clear_args();
474 //write info to disk
475 alpha->read_to_buffer(queue);
476 mog->read_to_buffer(queue);
477 num_obs->read_to_buffer(queue);
478 aux0_dir_x->read_to_buffer(queue);
479 aux0_dir_y->read_to_buffer(queue);
480 aux0_dir_z->read_to_buffer(queue);
481 aux0_vis_exp->read_to_buffer(queue);
482 }
483
484 //read image out to buffer (from gpu)
485 in_image->read_to_buffer(queue);
486 vis_image->read_to_buffer(queue);
487 pre_image->read_to_buffer(queue);
488 cl_output->read_to_buffer(queue);
489 clFinish(queue);
490 }
491 }
492
493 delete [] vis_buff;
494 delete [] pre_buff;
495 delete [] norm_buff;
496 delete [] input_buff;
497 delete [] ray_origins;
498 delete [] ray_directions;
499 opencl_cache->unref_mem(in_image.ptr());
500 opencl_cache->unref_mem(vis_image.ptr());
501 opencl_cache->unref_mem(pre_image.ptr());
502 opencl_cache->unref_mem(norm_image.ptr());
503 opencl_cache->unref_mem(ray_o_buff.ptr());
504 opencl_cache->unref_mem(ray_d_buff.ptr());
505 opencl_cache->unref_mem(tnearfar_mem_ptr.ptr());
506 std::cout<<"Gpu time "<<gpu_time<<" transfer time "<<transfer_time<<std::endl;
507 clReleaseCommandQueue(queue);
508 return true;
509 }
510
511
512 //Returns vector of color update kernels (and caches them per device
get_kernels(const bocl_device_sptr & device,const std::string & opts,bool isRGB)513 std::vector<bocl_kernel*>& boxm2_ocl_update_based_dispersion::get_kernels(const bocl_device_sptr& device, const std::string& opts, bool isRGB)
514 {
515 // compile kernels if not already compiled
516 std::string identifier = device->device_identifier() + opts;
517 if (kernels_.find(identifier) != kernels_.end())
518 return kernels_[identifier];
519
520 //otherwise compile the kernels
521 std::cout<<"=== boxm2_ocl_update_based_dispersion_process::compiling kernels on device "<<identifier<<"==="<<std::endl;
522
523 std::vector<std::string> src_paths;
524 std::string source_dir = boxm2_ocl_util::ocl_src_root();
525 src_paths.push_back(source_dir + "scene_info.cl");
526 src_paths.push_back(source_dir + "pixel_conversion.cl");
527 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
528 src_paths.push_back(source_dir + "backproject.cl");
529 src_paths.push_back(source_dir + "atomics_util.cl");
530 src_paths.push_back(source_dir + "statistics_library_functions.cl");
531 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
532 if (isRGB)
533 src_paths.push_back(source_dir + "bit/update_rgb_kernels.cl");
534 else
535 src_paths.push_back(source_dir + "bit/update_kernels.cl");
536 std::vector<std::string> non_ray_src = std::vector<std::string>(src_paths);
537
538 //push ray trace files
539 if (isRGB)
540 src_paths.push_back(source_dir + "update_rgb_functors.cl");
541 else
542 src_paths.push_back(source_dir + "update_functors.cl");
543 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
544
545 //compilation options
546 const std::string& options = /*"-D ATOMIC_FLOAT " +*/ opts;
547
548 //populate vector of kernels
549 std::vector<bocl_kernel*> vec_kernels;
550
551 //seg len pass
552 auto* seg_len = new bocl_kernel();
553 std::string seg_opts = options + " -D SEGLEN -D STEP_CELL=step_cell_seglen(aux_args,data_ptr,llid,d)";
554 seg_len->create_kernel(&device->context(), device->device_id(), src_paths, "seg_len_main", seg_opts, "update::seg_len");
555 vec_kernels.push_back(seg_len);
556
557 //create compress rgb pass
558 if (isRGB) {
559 auto* comp = new bocl_kernel();
560 std::string comp_opts = options + " -D COMPRESS_RGB";
561 comp->create_kernel(&device->context(), device->device_id(), non_ray_src, "compress_rgb", comp_opts, "update::compress_rgb");
562 vec_kernels.push_back(comp);
563 }
564 else {
565 //vec_kernels.push_back(NULL);
566 }
567 auto* pre_inf = new bocl_kernel();
568 std::string pre_opts = options + " -D PREINF -D STEP_CELL=step_cell_preinf(aux_args,data_ptr,llid,d)";
569 pre_inf->create_kernel(&device->context(), device->device_id(), src_paths, "pre_inf_main", pre_opts, "update::pre_inf");
570 vec_kernels.push_back(pre_inf);
571
572 //may need DIFF LIST OF SOURCES FOR THIS GUY
573 auto* proc_img = new bocl_kernel();
574 std::string proc_opts = options + " -D PROC_NORM ";
575 proc_img->create_kernel(&device->context(), device->device_id(), non_ray_src, "proc_norm_image", proc_opts, "update::proc_norm_image");
576 vec_kernels.push_back(proc_img);
577
578 //push back cast_ray_bit
579 auto* bayes_main = new bocl_kernel();
580 std::string bayes_opt = options + " -D BAYES -D STEP_CELL=step_cell_bayes(aux_args,data_ptr,llid,d)";
581 bayes_main->create_kernel(&device->context(), device->device_id(), src_paths, "bayes_main", bayes_opt, "update::bayes_main");
582 vec_kernels.push_back(bayes_main);
583
584
585 //may need DIFF LIST OF SOURCES FOR THSI GUY TOO
586 auto* update = new bocl_kernel();
587 std::string update_opts = options + " -D UPDATE_BIT_BASED_DISPERSION";
588 update->create_kernel(&device->context(), device->device_id(), non_ray_src, "update_bit_scene_based_dispersion", update_opts, "update::update_main");
589 vec_kernels.push_back(update);
590
591 //store and return
592 kernels_[identifier] = vec_kernels;
593 return kernels_[identifier];
594 }
595
596
597 //makes sure appearance types correspond correctly
validate_appearances(const boxm2_scene_sptr & scene,std::string & data_type,int & appTypeSize,std::string & num_obs_type,std::string & options,bool &)598 bool boxm2_ocl_update_based_dispersion::validate_appearances(const boxm2_scene_sptr& scene,
599 std::string& data_type,
600 int& appTypeSize,
601 std::string& num_obs_type,
602 std::string& options,
603 bool& /*isRGB*/)
604 {
605 std::vector<std::string> apps = scene->appearances();
606 bool foundDataType = false, foundNumObsType = false;
607 for (const auto & app : apps) {
608 if ( app == boxm2_data_traits<BOXM2_MOG3_GREY>::prefix() )
609 {
610 data_type = app;
611 foundDataType = true;
612 options=" -D MOG_TYPE_8";
613 appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY>::prefix());
614 }
615 else if ( app == boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix() )
616 {
617 data_type = app;
618 foundDataType = true;
619 options=" -D MOG_TYPE_16";
620 appTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix());
621 }
622 else if ( app == boxm2_data_traits<BOXM2_NUM_OBS>::prefix() )
623 {
624 num_obs_type = app;
625 foundNumObsType = true;
626 }
627 }
628 if (!foundDataType) {
629 std::cout<<"BOXM2_OPENCL_UPDATE_PROCESS ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
630 return false;
631 }
632 if (!foundNumObsType) {
633 std::cout<<"BOXM2_OPENCL_UPDATE_PROCESS ERROR: scene doesn't have BOXM2_NUM_OBS type"<<std::endl;
634 return false;
635 }
636 return true;
637 }
638