1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_probability_of_image_wcubic_process.cxx
2 //:
3 // \file
4 // \brief A process for change detection
5 //
6 // \author Vishal Jain
7 // \date Mar 10, 2011
8
9 #include <fstream>
10 #include <iostream>
11 #include <algorithm>
12 #include <bprb/bprb_func_process.h>
13
14 #ifdef _MSC_VER
15 # include "vcl_msvc_warnings.h"
16 #endif
17 #include <boxm2/ocl/boxm2_opencl_cache.h>
18 #include <boxm2/boxm2_scene.h>
19 #include <boxm2/boxm2_block.h>
20 #include <boxm2/boxm2_data_base.h>
21 #include <boxm2/ocl/boxm2_ocl_util.h>
22 #include <boxm2/boxm2_util.h>
23 #include "vil/vil_image_view.h"
24 //brdb stuff
25 #include <brdb/brdb_value.h>
26
27 //directory utility
28 #include "vul/vul_timer.h"
29 #include <vcl_where_root_dir.h>
30 #include <bocl/bocl_device.h>
31 #include <bocl/bocl_kernel.h>
32
33 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
34
35 namespace boxm2_ocl_probability_of_image_wcubic_process_globals
36 {
37 constexpr unsigned n_inputs_ = 7;
38 constexpr unsigned n_outputs_ = 1;
39 std::size_t lthreads[2]={8,8};
40
41 static std::map<std::string,std::vector<bocl_kernel*> > kernels;
42
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,std::string opts)43 void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, std::string opts)
44 {
45 //gather all render sources... seems like a lot for rendering...
46 //gather all render sources... seems like a lot for rendering...
47 std::vector<std::string> src_paths;
48 std::string source_dir = boxm2_ocl_util::ocl_src_root();
49 src_paths.push_back(source_dir + "scene_info.cl");
50 src_paths.push_back(source_dir + "cell_utils.cl");
51 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
52 src_paths.push_back(source_dir + "backproject.cl");
53 src_paths.push_back(source_dir + "statistics_library_functions.cl");
54 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
55 src_paths.push_back(source_dir + "bit/compute_probability_of_image.cl");
56 src_paths.push_back(source_dir + "update_cubic_functors.cl");
57 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
58
59 //set kernel options
60 opts += " -D PROB_CUBIC_IMAGE ";
61 std::string options=opts;
62
63 opts += " -D STEP_CELL=step_cell_cubic_compute_probability_of_intensity(aux_args,data_ptr,d*linfo->block_len,vis,aux_args.prob_image) ";
64
65 //have kernel construct itself using the context and device
66 auto * ray_trace_kernel=new bocl_kernel();
67
68 ray_trace_kernel->create_kernel( &device->context(),
69 device->device_id(),
70 src_paths,
71 "cubic_compute_probability_of_image", //kernel name
72 opts, //options
73 "boxm2 ocl probability computation"); //kernel identifier (for error checking)
74 vec_kernels.push_back(ray_trace_kernel);
75 //create normalize image kernel
76 std::vector<std::string> norm_src_paths;
77 norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
78 norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
79 auto * normalize_render_kernel=new bocl_kernel();
80
81 normalize_render_kernel->create_kernel( &device->context(),
82 device->device_id(),
83 norm_src_paths,
84 "normalize_probability_image_kernel", //kernel name
85 "-D PROB_IMAGE", //options
86 "normalize probability image kernel"); //kernel identifier (for error checking)
87
88 vec_kernels.push_back(normalize_render_kernel);
89 }
90 }
91
boxm2_ocl_probability_of_image_wcubic_process_cons(bprb_func_process & pro)92 bool boxm2_ocl_probability_of_image_wcubic_process_cons(bprb_func_process& pro)
93 {
94 using namespace boxm2_ocl_probability_of_image_wcubic_process_globals;
95
96 //process takes 1 input
97 std::vector<std::string> input_types_(n_inputs_);
98 input_types_[0] = "bocl_device_sptr";
99 input_types_[1] = "boxm2_scene_sptr";
100 input_types_[2] = "boxm2_opencl_cache_sptr";
101 input_types_[3] = "vpgl_camera_double_sptr";
102 input_types_[4] = "vil_image_view_base_sptr";
103 input_types_[5] = "vcl_string"; // model identifier
104 input_types_[6] = "vcl_string"; // aux identifier
105
106 // process has 1 output:
107 // output[0]: scene sptr
108 std::vector<std::string> output_types_(n_outputs_);
109 output_types_[0] = "vil_image_view_base_sptr";
110
111 brdb_value_sptr idx = new brdb_value_t<std::string>("");
112 pro.set_input(5, idx);
113 return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
114 }
115
boxm2_ocl_probability_of_image_wcubic_process(bprb_func_process & pro)116 bool boxm2_ocl_probability_of_image_wcubic_process(bprb_func_process& pro)
117 {
118 using namespace boxm2_ocl_probability_of_image_wcubic_process_globals;
119 std::size_t local_threads[2]={8,8};
120 std::size_t global_threads[2]={8,8};
121
122 if ( pro.n_inputs() < n_inputs_ ) {
123 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
124 return false;
125 }
126 float transfer_time=0.0f;
127 float gpu_time=0.0f;
128 //get the inputs
129 unsigned i = 0;
130 bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
131 boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
132 boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
133 vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(i++);
134 vil_image_view_base_sptr img =pro.get_input<vil_image_view_base_sptr>(i++);
135 std::string data_identifier =pro.get_input<std::string>(i++);
136 std::string image_identifier =pro.get_input<std::string>(i++);
137 unsigned ni=img->ni();
138 unsigned nj=img->nj();
139 bool foundDataType = false;
140 std::string data_type,options;
141 std::vector<std::string> apps = scene->appearances();
142 for (const auto & app : apps) {
143 if ( app == boxm2_data_traits<BOXM2_FLOAT8>::prefix() )
144 {
145 data_type = app;
146 foundDataType = true;
147 options=" -D FLOAT8 ";
148 }
149 }
150 if (!foundDataType) {
151 std::cout<<"boxm2_ocl_probability_of_image_wcubic_process ERROR: scene doesn't have BOXM2_MOG3_GREY or BOXM2_MOG3_GREY_16 data type"<<std::endl;
152 return false;
153 }
154
155 if (data_identifier!="")
156 data_type = data_type+"_"+data_identifier;
157
158 //: create a command queue.
159 int status=0;
160 cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
161 CL_QUEUE_PROFILING_ENABLE,&status);
162 if (status!=0) return false;
163
164 std::string identifier=device->device_identifier()+options;
165 // compile the kernel
166 if (kernels.find(identifier)==kernels.end())
167 {
168 std::cout<<"===========Compiling kernels==========="<<std::endl;
169 std::vector<bocl_kernel*> ks;
170 compile_kernel(device,ks,options);
171 kernels[identifier]=ks;
172 }
173
174 //set generic cam
175
176 vil_image_view_base_sptr float_img=boxm2_util::prepare_input_image(img);
177 auto* img_view = static_cast<vil_image_view<float>* >(float_img.ptr());
178
179 unsigned cl_ni=RoundUp(img_view->ni(),local_threads[0]);
180 unsigned cl_nj=RoundUp(img_view->nj(),local_threads[1]);
181
182 auto* ray_origins = new cl_float[4*cl_ni*cl_nj];
183 auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
184 bocl_mem_sptr ray_o_buff = new bocl_mem(device->context(), ray_origins, cl_ni*cl_nj * sizeof(cl_float4) , "ray_origins buffer");
185 bocl_mem_sptr ray_d_buff = new bocl_mem(device->context(), ray_directions, cl_ni*cl_nj * sizeof(cl_float4), "ray_directions buffer");
186 boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff);
187
188 global_threads[0]=cl_ni;
189 global_threads[1]=cl_nj;
190 auto* vis_buff = new float[cl_ni*cl_nj];
191 auto* prob_image_buff = new float[cl_ni*cl_nj];
192 for (unsigned i=0;i<cl_ni*cl_nj;i++)
193 {
194 vis_buff[i]=1.0f;
195 prob_image_buff[i]=0.0f;
196 }
197
198 bocl_mem_sptr prob_image=new bocl_mem(device->context(),prob_image_buff,cl_ni*cl_nj*sizeof(float),"expected image buffer");
199 prob_image->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
200
201 bocl_mem_sptr vis_image=new bocl_mem(device->context(),vis_buff,cl_ni*cl_nj*sizeof(float),"vis image buffer");
202 vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
203
204 // Image Dimensions
205 int img_dim_buff[4];
206 img_dim_buff[0] = 0;
207 img_dim_buff[1] = 0;
208 img_dim_buff[2] = img_view->ni();
209 img_dim_buff[3] = img_view->nj();
210 bocl_mem_sptr img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
211 img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
212
213 // Output Array
214 float output_arr[100];
215 for (float & i : output_arr) i = 0.0f;
216 bocl_mem_sptr cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
217 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
218
219 // bit lookup buffer
220 cl_uchar lookup_arr[256];
221 boxm2_ocl_util::set_bit_lookup(lookup_arr);
222 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
223 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
224
225 //For each ID in the visibility order, grab that block
226 std::vector<boxm2_block_id> vis_order = scene->get_vis_blocks( (vpgl_perspective_camera<double>*) cam.ptr());
227 std::vector<boxm2_block_id>::iterator id;
228 for (id = vis_order.begin(); id != vis_order.end(); ++id)
229 {
230 //choose correct render kernel
231 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
232 bocl_kernel* kern = kernels[identifier][0];
233 //write the image values to the buffer
234 vul_timer transfer;
235 bocl_mem* blk = opencl_cache->get_block(scene,*id);
236 bocl_mem* blk_info = opencl_cache->loaded_block_info();
237 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,*id);
238 int mogTypeSize = (int) boxm2_data_info::datasize(data_type);
239 auto* info_buffer = (boxm2_scene_info*) blk_info->cpu_buffer();
240 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
241 info_buffer->data_buffer_length = (int) (alpha->num_bytes()/alphaTypeSize);
242 blk_info->write_to_buffer((queue));
243
244 bocl_mem* mog = opencl_cache->get_data(scene,*id,data_type,alpha->num_bytes()/alphaTypeSize*mogTypeSize,false); //info_buffer->data_buffer_length*boxm2_data_info::datasize(data_type));
245 std::string aux0_datatype = boxm2_data_traits<BOXM2_AUX0>::prefix(image_identifier);
246 bocl_mem* aux0 = opencl_cache->get_data(scene,*id,aux0_datatype,alpha->num_bytes(),false);
247 std::string aux1_datatype = boxm2_data_traits<BOXM2_AUX1>::prefix(image_identifier);
248 bocl_mem* aux1 = opencl_cache->get_data(scene,*id,aux1_datatype,alpha->num_bytes(),false);
249
250 transfer_time += (float) transfer.all();
251
252 ////3. SET args
253 kern->set_arg( blk_info );
254 kern->set_arg( blk );
255 kern->set_arg( alpha );
256 kern->set_arg( mog );
257 kern->set_arg( aux0 );
258 kern->set_arg( aux1 );
259 kern->set_arg( ray_o_buff.ptr() );
260 kern->set_arg( ray_d_buff.ptr() );
261 kern->set_arg( prob_image.ptr() );
262 kern->set_arg( vis_image.ptr() );
263 kern->set_arg( img_dim.ptr());
264 kern->set_arg( cl_output.ptr() );
265 kern->set_arg( lookup.ptr() );
266
267 //local tree , cumsum buffer, imindex buffer
268 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );
269 kern->set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) );
270 kern->set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );
271
272 //execute kernel
273 kern->execute(queue, 2, local_threads, global_threads);
274 clFinish(queue);
275 gpu_time += kern->exec_time();
276
277 //clear render kernel args so it can reset em on next execution
278 kern->clear_args();
279 }
280
281 #if 1
282 // normalize
283 bocl_kernel* normalize_prob_image_kernel = kernels[identifier][1];
284 {
285 normalize_prob_image_kernel->set_arg( prob_image.ptr() );
286 normalize_prob_image_kernel->set_arg( vis_image.ptr() );
287 normalize_prob_image_kernel->set_arg( img_dim.ptr());
288 normalize_prob_image_kernel->execute( queue, 2, local_threads, global_threads);
289 clFinish(queue);
290 gpu_time += normalize_prob_image_kernel->exec_time();
291
292 //clear render kernel args so it can reset em on next execution
293 normalize_prob_image_kernel->clear_args();
294 }
295 #endif
296 // read out expected image
297 prob_image->read_to_buffer(queue);
298 vis_image->read_to_buffer(queue);
299 clFinish(queue);
300 auto* prob_img_out=new vil_image_view<float>(ni,nj);
301
302 for (unsigned c=0;c<nj;c++)
303 for (unsigned r=0;r<ni;r++)
304 (*prob_img_out)(r,c)=prob_image_buff[c*cl_ni+r];
305
306 delete [] prob_image_buff;
307 delete [] vis_buff;
308
309 clReleaseCommandQueue(queue);
310 i=0;
311 // store scene smaprt pointer
312 pro.set_output_val<vil_image_view_base_sptr>(i++, prob_img_out);
313 return true;
314 }
315