1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_render_expected_height_map_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief  A process for rendering height map of a scene.
8 //
9 // \author Vishal Jain
10 // \date Mar 30, 2011
11 
12 #ifdef _MSC_VER
13 #  include "vcl_msvc_warnings.h"
14 #endif
15 #include <boxm2/ocl/boxm2_opencl_cache.h>
16 #include <boxm2/boxm2_scene.h>
17 #include <boxm2/boxm2_block.h>
18 #include <boxm2/boxm2_data_base.h>
19 #include <boxm2/ocl/boxm2_ocl_util.h>
20 #include "vil/vil_image_view.h"
21 //brdb stuff
22 #include <brdb/brdb_value.h>
23 
24 //directory utility
25 #include "vul/vul_timer.h"
26 #include <vcl_where_root_dir.h>
27 #include <bocl/bocl_device.h>
28 #include <bocl/bocl_kernel.h>
29 
30 namespace boxm2_ocl_render_expected_height_map_process_globals
31 {
32   constexpr unsigned n_inputs_ = 3;
33   constexpr unsigned n_outputs_ = 5;
34   std::size_t local_threads[2]={8,8};
35   static std::map<std::string, std::vector<bocl_kernel*> > kernels_;
get_kernels(const bocl_device_sptr & device,const std::string & opts)36   std::vector<bocl_kernel*>& get_kernels(const bocl_device_sptr& device, const std::string& opts)
37   {
38       std::string identifier = device->device_identifier() + opts;
39       if (kernels_.find(identifier) != kernels_.end())
40           return kernels_[identifier];
41       std::vector<bocl_kernel*> vec_kernels;
42       //gather all render sources... seems like a lot for rendering...
43       std::vector<std::string> src_paths;
44       std::string source_dir = boxm2_ocl_util::ocl_src_root();
45       src_paths.push_back(source_dir + "scene_info.cl");
46       src_paths.push_back(source_dir + "pixel_conversion.cl");
47       src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
48       src_paths.push_back(source_dir + "backproject.cl");
49       src_paths.push_back(source_dir + "statistics_library_functions.cl");
50       src_paths.push_back(source_dir + "expected_functor.cl");
51       src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
52       src_paths.push_back(source_dir + "bit/render_bit_scene.cl");
53       src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
54 
55       //set kernel options
56       std::string options = "-D RENDER_DEPTH -D DETERMINISTIC -D STEP_CELL=step_cell_render_height(tblock,linfo->block_len,aux_args.alpha,data_ptr,d*linfo->block_len,aux_args.vis,aux_args.expdepth,aux_args.expdepthsqr,aux_args.probsum,aux_args.t)";
57       //create normalize image kernel
58       std::vector<std::string> norm_src_paths;
59       norm_src_paths.push_back(source_dir + "scene_info.cl");
60 
61       norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
62       norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
63       auto * normalize_render_kernel = new bocl_kernel();
64 
65       normalize_render_kernel->create_kernel(&device->context(),
66           device->device_id(),
67           norm_src_paths,
68           "normalize_render_depth_kernel",   //kernel name
69           options,              //options
70           "normalize render depth kernel"); //kernel identifier (for error checking)
71       //have kernel construct itself using the context and device
72       auto * ray_trace_kernel = new bocl_kernel();
73 
74       ray_trace_kernel->create_kernel(&device->context(),
75                                        device->device_id(),
76                                        src_paths,
77                                        "render_depth",   //kernel name
78                                        options,              //options
79                                        "boxm2 opencl render depth image"); //kernel identifier (for error checking)
80       vec_kernels.push_back(ray_trace_kernel);
81 
82 
83 
84 
85       vec_kernels.push_back(normalize_render_kernel);
86       kernels_[identifier] = vec_kernels;
87 
88       return kernels_[identifier];
89   }
90 
91 }
92 
boxm2_ocl_render_expected_height_map_process_cons(bprb_func_process & pro)93 bool boxm2_ocl_render_expected_height_map_process_cons(bprb_func_process& pro)
94 {
95   using namespace boxm2_ocl_render_expected_height_map_process_globals;
96 
97   //process takes 1 input
98   std::vector<std::string> input_types_(n_inputs_);
99   input_types_[0] = "bocl_device_sptr";
100   input_types_[1] = "boxm2_scene_sptr";
101   input_types_[2] = "boxm2_opencl_cache_sptr";
102 
103   // process has 1 output:
104   // output[0]: scene sptr
105   std::vector<std::string>  output_types_(n_outputs_);
106   output_types_[0] = "vil_image_view_base_sptr";
107   output_types_[1] = "vil_image_view_base_sptr";
108   output_types_[2] = "vil_image_view_base_sptr";
109   output_types_[3] = "vil_image_view_base_sptr";
110   output_types_[4] = "vil_image_view_base_sptr";
111   //output_types_[5] = "vil_image_view_base_sptr";
112 
113   return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
114 }
115 
boxm2_ocl_render_expected_height_map_process(bprb_func_process & pro)116 bool boxm2_ocl_render_expected_height_map_process(bprb_func_process& pro)
117 {
118   using namespace boxm2_ocl_render_expected_height_map_process_globals;
119 
120 
121   if ( pro.n_inputs() < n_inputs_ ) {
122     std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
123     return false;
124   }
125   float transfer_time=0.0f;
126   float gpu_time=0.0f;
127   //get the inputs
128   unsigned i = 0;
129   bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
130   boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
131   boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
132   vgl_box_3d<double> bbox=scene->bounding_box();
133 
134 
135   //get x and y size from scene
136   std::vector<boxm2_block_id> vis_order = scene->get_block_ids();
137   std::vector<boxm2_block_id>::iterator id;
138   float xint=0.0f;
139   float yint=0.0f;
140   for (id = vis_order.begin(); id != vis_order.end(); ++id)
141   {
142     boxm2_block_metadata mdata=scene->get_block_metadata(*id);
143     float num_octree_cells=std::pow(2.0f,(float)mdata.max_level_-1);
144     xint=mdata.sub_block_dim_.x()/num_octree_cells;
145     yint=mdata.sub_block_dim_.y()/num_octree_cells;
146   }
147   auto ni=(unsigned int)std::ceil(bbox.width()/xint);
148   auto nj=(unsigned int)std::ceil(bbox.height()/yint);
149   std::cout<<"Size of the image "<<ni<<','<<nj<<std::endl;
150   float z= bbox.max_z();
151   std::string identifier=device->device_identifier();
152 
153   //: create a command queue.
154   int status=0;
155   cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
156                                                 CL_QUEUE_PROFILING_ENABLE,&status);
157   if (status!=0)return false;
158 
159   std::vector<bocl_kernel*>& kernels = get_kernels(device, "");
160 
161   float scene_origin[4];
162   scene_origin[0]=bbox.min_x();
163   scene_origin[1]=bbox.min_y();
164   scene_origin[2]=bbox.min_z();
165   scene_origin[3]=1.0;
166   unsigned cl_ni=RoundUp(ni,local_threads[0]);
167   unsigned cl_nj=RoundUp(nj,local_threads[1]);
168 
169   auto* ray_origins = new cl_float[4 * cl_ni*cl_nj];
170   auto* ray_directions = new cl_float[4 * cl_ni*cl_nj];
171 
172   float ray_dx = 0, ray_dy = 0, ray_dz = -1;
173   // initialize ray origin buffer, first and last return buffers
174   int count = 0;
175   for (unsigned int j = 0; j<cl_nj; ++j) {
176       for (unsigned int i = 0; i<cl_ni; ++i) {
177 
178           int count4 = count * 4;
179           ray_origins[count4 + 0] = scene_origin[0] + ((float)i + 0.15f)*(xint);
180           ray_origins[count4 + 1] = scene_origin[1] + ((float)j + 0.15f)*(yint);
181 
182           ray_origins[count4 + 2] = z + 1.0f;
183           ray_origins[count4 + 3] = 0.0;
184           ray_directions[count4 + 0] = 0.0;
185           ray_directions[count4 + 1] = 0.0;
186           ray_directions[count4 + 2] = -1.0;
187           ray_directions[count4 + 3] = 0.0;
188 
189           ++count;
190       }
191   }
192   bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj * sizeof(cl_float4), ray_origins, "ray_origins buffer");
193   bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj * sizeof(cl_float4), ray_directions, "ray_directions buffer");
194 
195   ray_o_buff->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
196   ray_d_buff->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
197 
198   auto* buff = new float[cl_ni*cl_nj];
199   for (unsigned i = 0; i<cl_ni*cl_nj; i++) buff[i] = 0.0f;
200   auto* var_buff = new float[cl_ni*cl_nj];
201   for (unsigned i = 0; i<cl_ni*cl_nj; i++) var_buff[i] = 0.0f;
202   auto* vis_buff = new float[cl_ni*cl_nj];
203   for (unsigned i = 0; i<cl_ni*cl_nj; i++) vis_buff[i] = 1.0f;
204   auto* prob_buff = new float[cl_ni*cl_nj];
205   for (unsigned i = 0; i<cl_ni*cl_nj; i++) prob_buff[i] = 0.0f;
206   auto* t_infinity_buff = new float[cl_ni*cl_nj];
207   for (unsigned i = 0; i<cl_ni*cl_nj; i++) t_infinity_buff[i] = 0.0f;
208 
209   bocl_mem_sptr exp_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), buff, "exp image buffer");
210   exp_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
211 
212   bocl_mem_sptr var_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), var_buff, "var image buffer");
213   var_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
214 
215   bocl_mem_sptr vis_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), vis_buff, "vis image buffer");
216   vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
217 
218   bocl_mem_sptr prob_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), prob_buff, "vis x omega image buffer");
219   prob_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
220 
221   bocl_mem_sptr t_infinity = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), t_infinity_buff, "t infinity buffer");
222   t_infinity->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
223 
224   // Image Dimensions
225   int img_dim_buff[4];
226   img_dim_buff[0] = 0;
227   img_dim_buff[1] = 0;
228   img_dim_buff[2] = ni;
229   img_dim_buff[3] = nj;
230   bocl_mem_sptr exp_img_dim = new bocl_mem(device->context(), img_dim_buff, sizeof(int) * 4, "image dims");
231   exp_img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
232 
233   // Output Array
234   float output_arr[100];
235   for (float & i : output_arr) i = 0.0f;
236   bocl_mem_sptr  cl_output = new bocl_mem(device->context(), output_arr, sizeof(float) * 100, "output buffer");
237   cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
238 
239   // bit lookup buffer
240   cl_uchar lookup_arr[256];
241   boxm2_ocl_util::set_bit_lookup(lookup_arr);
242   bocl_mem_sptr lookup = new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar) * 256, "bit lookup buffer");
243   lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
244 
245   //2. set workgroup size
246   std::size_t lThreads[] = {8, 8};
247   std::size_t gThreads[] = {cl_ni,cl_nj};
248   float subblk_dim = 0.0;
249   for (id = vis_order.begin(); id != vis_order.end(); ++id)
250   {
251       //choose correct render kernel
252       boxm2_block_metadata mdata = scene->get_block_metadata(*id);
253       bocl_kernel* kern = kernels[0];
254 
255       //write the image values to the buffer
256       vul_timer transfer;
257       bocl_mem* blk = opencl_cache->get_block(scene, *id);
258       bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene, *id);
259       bocl_mem * blk_info = opencl_cache->loaded_block_info();
260       transfer_time += (float)transfer.all();
261       subblk_dim = mdata.sub_block_dim_.x();
262       ////3. SET args
263       kern->set_arg(blk_info);
264       kern->set_arg(blk);
265       kern->set_arg(alpha);
266       kern->set_arg(ray_o_buff.ptr());
267       kern->set_arg(ray_d_buff.ptr());
268       kern->set_arg(exp_image.ptr());
269       kern->set_arg(var_image.ptr());
270       kern->set_arg(exp_img_dim.ptr());
271       kern->set_arg(cl_output.ptr());
272       kern->set_arg(lookup.ptr());
273       kern->set_arg(vis_image.ptr());
274       kern->set_arg(prob_image.ptr());
275       kern->set_arg(t_infinity.ptr());
276       kern->set_local_arg(local_threads[0] * local_threads[1] * sizeof(cl_uchar16));
277       kern->set_local_arg(local_threads[0] * local_threads[1] * 10 * sizeof(cl_uchar));
278       kern->set_local_arg(local_threads[0] * local_threads[1] * sizeof(cl_int));
279 
280       //execute kernel
281       kern->execute(queue, 2, lThreads, gThreads);
282       clFinish(queue);
283       gpu_time += kern->exec_time();
284       cl_output->read_to_buffer(queue);
285 
286       // clear render kernel args so it can reset em on next execution
287       kern->clear_args();
288   }
289 
290   bocl_mem_sptr  subblk_dim_mem = new bocl_mem(device->context(), &(subblk_dim), sizeof(float), "sub block dim buffer");
291   subblk_dim_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
292   // normalize
293   {
294       bocl_kernel* normalize_kern = kernels[1];
295       normalize_kern->set_arg(exp_image.ptr());
296       normalize_kern->set_arg(var_image.ptr());
297       normalize_kern->set_arg(vis_image.ptr());
298       normalize_kern->set_arg(exp_img_dim.ptr());
299       normalize_kern->set_arg(t_infinity.ptr());
300       normalize_kern->set_arg(subblk_dim_mem.ptr());
301       normalize_kern->execute(queue, 2, local_threads, gThreads);
302       clFinish(queue);
303       gpu_time += normalize_kern->exec_time();
304       //clear render kernel args so it can reset em on next execution
305       normalize_kern->clear_args();
306       exp_image->read_to_buffer(queue);
307       var_image->read_to_buffer(queue);
308       vis_image->read_to_buffer(queue);
309   }
310   clReleaseCommandQueue(queue);
311 
312   i=0;
313   auto* exp_img_out=new vil_image_view<float>(ni,nj);
314   auto* exp_var_out=new vil_image_view<float>(ni,nj);
315   auto* xcoord_img=new vil_image_view<float>(ni,nj);
316   auto* ycoord_img=new vil_image_view<float>(ni,nj);
317   auto* prob_img=new vil_image_view<float>(ni,nj);
318 
319   for (unsigned c=0;c<nj;++c)
320     for (unsigned r=0;r<ni;++r)
321     {
322       (*exp_img_out)(r,c) = /*z-*/buff[c*cl_ni+r];
323       (*exp_var_out)(r,c) = var_buff[c*cl_ni+r];
324       (*xcoord_img)(r, c) = ray_origins[(c*cl_ni + r) * 4 + 0];
325       (*ycoord_img)(r, c) = ray_origins[(c*cl_ni + r) * 4 + 1];
326       (*prob_img)(r,c) = prob_buff[c*cl_ni+r];
327     }
328   // store scene smaprt pointer
329   pro.set_output_val<vil_image_view_base_sptr>(i++, exp_img_out);
330   pro.set_output_val<vil_image_view_base_sptr>(i++, exp_var_out);
331   pro.set_output_val<vil_image_view_base_sptr>(i++, xcoord_img);
332   pro.set_output_val<vil_image_view_base_sptr>(i++, ycoord_img);
333   pro.set_output_val<vil_image_view_base_sptr>(i++, prob_img);
334   delete[] buff;
335   delete[] var_buff;
336   delete[] vis_buff;
337   delete[] prob_buff;
338   delete[] t_infinity_buff;
339   //delete[] app_buff;
340   opencl_cache->unref_mem(exp_image.ptr());
341   opencl_cache->unref_mem(var_image.ptr());
342   opencl_cache->unref_mem(vis_image.ptr());
343   opencl_cache->unref_mem(prob_image.ptr());
344   opencl_cache->unref_mem(t_infinity.ptr());
345   opencl_cache->unref_mem(ray_o_buff.ptr());
346   opencl_cache->unref_mem(ray_d_buff.ptr());
347   return true;
348 }
349