1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_probability_of_image_process.cxx
2 //:
3 // \file
4 // \brief A process for change detection
5 //
6 // \author Vishal Jain
7 // \date Mar 10, 2011
8
9 #include <fstream>
10 #include <iostream>
11 #include <algorithm>
12 #include <bprb/bprb_func_process.h>
13
14 #ifdef _MSC_VER
15 # include "vcl_msvc_warnings.h"
16 #endif
17 #include <boxm2/ocl/boxm2_opencl_cache.h>
18 #include <boxm2/boxm2_scene.h>
19 #include <boxm2/boxm2_block.h>
20 #include <boxm2/boxm2_data_base.h>
21 #include <boxm2/ocl/boxm2_ocl_util.h>
22 #include <boxm2/boxm2_util.h>
23 #include "vil/vil_image_view.h"
24 //brdb stuff
25 #include <brdb/brdb_value.h>
26
27 //directory utility
28 #include "vul/vul_timer.h"
29 #include <vcl_where_root_dir.h>
30 #include <bocl/bocl_device.h>
31 #include <bocl/bocl_kernel.h>
32
33
34 namespace boxm2_ocl_probability_of_image_process_globals
35 {
36 constexpr unsigned n_inputs_ = 8;
37 constexpr unsigned n_outputs_ = 1;
38 std::size_t lthreads[2]={8,8};
39
40 static std::map<std::string,std::vector<bocl_kernel*> > kernels;
41
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,std::string opts)42 void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, std::string opts)
43 {
44 //gather all render sources... seems like a lot for rendering...
45 //gather all render sources... seems like a lot for rendering...
46 std::vector<std::string> src_paths;
47 std::string source_dir = boxm2_ocl_util::ocl_src_root();
48 src_paths.push_back(source_dir + "scene_info.cl");
49 src_paths.push_back(source_dir + "cell_utils.cl");
50 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
51 src_paths.push_back(source_dir + "backproject.cl");
52 src_paths.push_back(source_dir + "statistics_library_functions.cl");
53 src_paths.push_back(source_dir + "expected_functor.cl");
54 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
55 src_paths.push_back(source_dir + "bit/compute_probability_of_image.cl");
56 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
57
58 //set kernel options
59 opts += " -D PROB_IMAGE -D DETERMINISTIC ";
60 std::string options=opts;
61
62 opts += " -D STEP_CELL=step_cell_compute_probability_of_intensity(aux_args.mog,aux_args.alpha,data_ptr,d*linfo->block_len,vis,aux_args.prob_image,aux_args.intensity) ";
63
64 //have kernel construct itself using the context and device
65 auto * ray_trace_kernel=new bocl_kernel();
66
67 ray_trace_kernel->create_kernel( &device->context(),
68 device->device_id(),
69 src_paths,
70 "compute_probability_of_image", //kernel name
71 opts, //options
72 "boxm2 ocl probability computation"); //kernel identifier (for error checking)
73 vec_kernels.push_back(ray_trace_kernel);
74 //create normalize image kernel
75 std::vector<std::string> norm_src_paths;
76 norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
77 norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
78 auto * normalize_render_kernel=new bocl_kernel();
79
80 normalize_render_kernel->create_kernel( &device->context(),
81 device->device_id(),
82 norm_src_paths,
83 "normalize_probability_image_kernel", //kernel name
84 options, //options
85 "normalize probability image kernel"); //kernel identifier (for error checking)
86
87 vec_kernels.push_back(normalize_render_kernel);
88 }
89 }
90
boxm2_ocl_probability_of_image_process_cons(bprb_func_process & pro)91 bool boxm2_ocl_probability_of_image_process_cons(bprb_func_process& pro)
92 {
93 using namespace boxm2_ocl_probability_of_image_process_globals;
94
95 //process takes 1 input
96 std::vector<std::string> input_types_(n_inputs_);
97 input_types_[0] = "bocl_device_sptr";
98 input_types_[1] = "boxm2_scene_sptr";
99 input_types_[2] = "boxm2_opencl_cache_sptr";
100 input_types_[3] = "vpgl_camera_double_sptr";
101 input_types_[4] = "vil_image_view_base_sptr";
102 input_types_[5] = "vcl_string"; //identifier
103 input_types_[6] = "float"; // near factor ( # of pixels should map to the finest voxel )
104 input_types_[7] = "float"; // far factor ( # of pixels should map to the finest voxel )
105
106 // process has 1 output:
107 // output[0]: scene sptr
108 std::vector<std::string> output_types_(n_outputs_);
109 output_types_[0] = "vil_image_view_base_sptr";
110
111 brdb_value_sptr idx = new brdb_value_t<std::string>("");
112 pro.set_input(5, idx);
113 brdb_value_sptr tnearfactor = new brdb_value_t<float>(100000.0f); //by default update alpha
114 brdb_value_sptr tfarfactor = new brdb_value_t<float>(0.0001f); //by default update alpha
115
116 pro.set_input(6, tnearfactor);
117 pro.set_input(7, tfarfactor);
118 return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
119
120 }
121
boxm2_ocl_probability_of_image_process(bprb_func_process & pro)122 bool boxm2_ocl_probability_of_image_process(bprb_func_process& pro)
123 {
124 using namespace boxm2_ocl_probability_of_image_process_globals;
125 std::size_t local_threads[2]={8,8};
126 std::size_t global_threads[2]={8,8};
127
128 if ( pro.n_inputs() < n_inputs_ ) {
129 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
130 return false;
131 }
132 float transfer_time=0.0f;
133 float gpu_time=0.0f;
134 //get the inputs
135 unsigned i = 0;
136 bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
137 boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
138 boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
139 vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(i++);
140 vil_image_view_base_sptr img =pro.get_input<vil_image_view_base_sptr>(i++);
141 std::string data_identifier =pro.get_input<std::string>(i++);
142 auto nearfactor = pro.get_input<float>(i++);
143 auto farfactor = pro.get_input<float>(i++);
144 unsigned ni=img->ni();
145 unsigned nj=img->nj();
146 bool foundDataType = false;
147 std::string data_type,options;
148 std::vector<std::string> apps = scene->appearances();
149 for (const auto & app : apps) {
150 if ( app == boxm2_data_traits<BOXM2_MOG3_GREY>::prefix() )
151 {
152 data_type = app;
153 foundDataType = true;
154 options=" -D MOG_TYPE_8 ";
155 }
156 else if ( app == boxm2_data_traits<BOXM2_MOG3_GREY_16>::prefix() )
157 {
158 data_type = app;
159 foundDataType = true;
160 options=" -D MOG_TYPE_16 ";
161 }
162 else if ( app == boxm2_data_traits<BOXM2_FLOAT8>::prefix() )
163 {
164 data_type = app;
165 foundDataType = true;
166 options=" -D FLOAT8 ";
167 }
168 }
169 if (!foundDataType) {
170 std::cout<<"boxm2_ocl_probability_of_image_process ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
171 return false;
172 }
173
174 if (data_identifier!="")
175 data_type += data_identifier;
176
177 //: create a command queue.
178 int status=0;
179 cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
180 CL_QUEUE_PROFILING_ENABLE,&status);
181 if (status!=0) return false;
182
183 std::string identifier=device->device_identifier()+options;
184 // compile the kernel
185 if (kernels.find(identifier)==kernels.end())
186 {
187 std::cout<<"===========Compiling kernels==========="<<std::endl;
188 std::vector<bocl_kernel*> ks;
189 compile_kernel(device,ks,options);
190 kernels[identifier]=ks;
191 }
192
193 // create all buffers
194 cl_float cam_buffer[48];
195 boxm2_ocl_util::set_persp_camera( (vpgl_perspective_camera<double>*) cam.ptr(), cam_buffer);
196 bocl_mem_sptr persp_cam=new bocl_mem(device->context(), cam_buffer, 3*sizeof(cl_float16), "persp cam buffer");
197 persp_cam->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
198
199 vil_image_view_base_sptr float_img=boxm2_util::prepare_input_image(img);
200 auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
201
202 unsigned cl_ni=RoundUp(img_view->ni(),local_threads[0]);
203 unsigned cl_nj=RoundUp(img_view->nj(),local_threads[1]);
204
205 global_threads[0]=cl_ni;
206 global_threads[1]=cl_nj;
207 auto* vis_buff = new float[cl_ni*cl_nj];
208 auto* prob_image_buff = new float[cl_ni*cl_nj];
209 auto* input_buff=new float[cl_ni*cl_nj];
210 for (unsigned i=0;i<cl_ni*cl_nj;i++)
211 {
212 vis_buff[i]=1.0f;
213 prob_image_buff[i]=0.0f;
214 }
215 int count=0;
216 for (unsigned int j=0;j<cl_nj;++j)
217 for (unsigned int i=0;i<cl_ni;++i)
218 {
219 input_buff[count] = 0.0f;
220 if (i<img_view->ni() && j< img_view->nj())
221 {
222 input_buff[count]=(*img_view)(i,j);
223 }
224 ++count;
225 }
226
227 bocl_mem_sptr in_image=new bocl_mem(device->context(),input_buff,cl_ni*cl_nj*sizeof(float),"input image buffer");
228 in_image->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
229
230 bocl_mem_sptr prob_image=new bocl_mem(device->context(),prob_image_buff,cl_ni*cl_nj*sizeof(float),"expected image buffer");
231 prob_image->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
232
233 bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
234 vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
235
236 // Image Dimensions
237 int img_dim_buff[4];
238 img_dim_buff[0] = 0;
239 img_dim_buff[1] = 0;
240 img_dim_buff[2] = img_view->ni();
241 img_dim_buff[3] = img_view->nj();
242 bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
243 img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
244
245 // Output Array
246 float output_arr[100];
247 for (float & i : output_arr) i = 0.0f;
248 bocl_mem_sptr cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
249 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
250
251 // bit lookup buffer
252 cl_uchar lookup_arr[256];
253 boxm2_ocl_util::set_bit_lookup(lookup_arr);
254 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
255 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
256 //For each ID in the visibility order, grab that block
257 std::vector<boxm2_block_id> vis_order = scene->get_vis_blocks( (vpgl_perspective_camera<double>*) cam.ptr());
258 std::vector<boxm2_block_id>::iterator id;
259 for (id = vis_order.begin(); id != vis_order.end(); ++id)
260 {
261 //choose correct render kernel
262 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
263 bocl_kernel* kern = kernels[identifier][0];
264
265 //write the image values to the buffer
266 vul_timer transfer;
267 bocl_mem* blk = opencl_cache->get_block(scene,*id);
268 bocl_mem* blk_info = opencl_cache->loaded_block_info();
269 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id);
270 int mogTypeSize = (int) boxm2_data_info::datasize(data_type);
271 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
272 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
273 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
274 blk_info->write_to_buffer((queue));
275
276 bocl_mem* mog = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*mogTypeSize,false); //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
277 transfer_time += (float) transfer.all();
278
279 ////3. SET args
280 kern->set_arg( blk_info );
281 kern->set_arg( blk );
282 kern->set_arg( alpha );
283 kern->set_arg( mog );
284 kern->set_arg( persp_cam.ptr() );
285 kern->set_arg( in_image.ptr() );
286 kern->set_arg( prob_image.ptr() );
287 kern->set_arg( img_dim.ptr());
288 kern->set_arg( cl_output.ptr() );
289 kern->set_arg( lookup.ptr() );
290 kern->set_arg( vis_image.ptr() );
291
292 //local tree , cumsum buffer, imindex buffer
293 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );
294 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) );
295 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );
296
297 //execute kernel
298 kern->execute(queue, 2, local_threads, global_threads);
299 clFinish(queue);
300 gpu_time += kern->exec_time();
301
302 //clear render kernel args so it can reset em on next execution
303 kern->clear_args();
304 }
305 // normalize
306 bocl_kernel* normalize_prob_image_kernel = kernels[identifier][1];
307 {
308 normalize_prob_image_kernel->set_arg( prob_image.ptr() );
309 normalize_prob_image_kernel->set_arg( vis_image.ptr() );
310 normalize_prob_image_kernel->set_arg( img_dim.ptr());
311 normalize_prob_image_kernel->execute( queue, 2, local_threads, global_threads);
312 clFinish(queue);
313 gpu_time += normalize_prob_image_kernel->exec_time();
314
315 //clear render kernel args so it can reset em on next execution
316 normalize_prob_image_kernel->clear_args();
317 }
318
319 // read out expected image
320 prob_image->read_to_buffer(queue);
321 auto* prob_img_out=new vil_image_view<float>(ni,nj);
322
323 for (unsigned c=0;c<nj;c++)
324 for (unsigned r=0;r<ni;r++)
325 (*prob_img_out)(r,c)=prob_image_buff[c*cl_ni+r];
326
327 delete [] prob_image_buff;
328 delete [] vis_buff;
329 delete [] input_buff;
330
331 clReleaseCommandQueue(queue);
332 i=0;
333 // store scene smaprt pointer
334 pro.set_output_val<vil_image_view_base_sptr>(i++, prob_img_out);
335 return true;
336 }
337