1 // This is brl/bseg/boxm2/ocl/algo/boxm2_ocl_depth_renderer.cxx
2 //:
3 // \file
4 // \brief  Render and expected image of a boxm2 model
5 //
6 // \author Daniel Crispell, adapted from process version
7 // \date 4 Nov 2014
8 #include <iostream>
9 #include <algorithm>
10 #include <stdexcept>
11 #include "boxm2_ocl_depth_renderer.h"
12 
13 #ifdef _MSC_VER
14 #  include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include "vil/vil_image_view.h"
22 //brdb stuff
23 
24 //directory utility
25 #include <vcl_where_root_dir.h>
26 #include <bocl/bocl_device.h>
27 #include <bocl/bocl_kernel.h>
28 #include "boxm2_ocl_camera_converter.h"
29 #include "boxm2_ocl_render_expected_image_function.h"
30 #include "vul/vul_timer.h"
31 
32 boxm2_ocl_depth_renderer
boxm2_ocl_depth_renderer(const boxm2_scene_sptr & scene,const boxm2_opencl_cache_sptr & ocl_cache,const std::string &)33 ::boxm2_ocl_depth_renderer(const boxm2_scene_sptr& scene,
34                            const boxm2_opencl_cache_sptr& ocl_cache,
35                            const std::string&  /*ident*/) :
36   scene_(scene),
37   opencl_cache_(ocl_cache),
38   buffers_allocated_(false),
39   render_success_(false)
40 {
41   device_ = ocl_cache->get_device();
42   compile_kernels(device_);
43 }
44 
45 bool
46 boxm2_ocl_depth_renderer
allocate_render_buffers(int cl_ni,int cl_nj)47 ::allocate_render_buffers(int cl_ni, int cl_nj)
48 {
49   if ( buffers_allocated_ && (prev_ni_ == cl_ni) && (prev_nj_ == cl_nj) ) {
50     // can reuse old buffers
51     return true;
52   }
53   // else we need to allocate new buffers
54   if (buffers_allocated_) {
55     cleanup_render_buffers();
56   }
57   depth_buff_ = new float[cl_ni*cl_nj];
58   vis_buff_ = new float[cl_ni*cl_nj];
59   prob_buff_ = new float[cl_ni*cl_nj];
60   var_buff_ = new float[cl_ni*cl_nj];
61   t_infinity_buff_ = new float[cl_ni*cl_nj];
62 
63   ray_origins_buff_ = new cl_float[4*cl_ni*cl_nj];
64   ray_directions_buff_ = new cl_float[4*cl_ni*cl_nj];
65 
66   depth_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), depth_buff_,"depth image buffer");
67   depth_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
68 
69   vis_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), vis_buff_,"vis image buffer");
70   vis_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
71 
72   prob_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), prob_buff_,"vis image buffer");
73   prob_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
74 
75   var_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), var_buff_, "var image buffer");
76   var_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
77 
78   t_infinity_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), t_infinity_buff_, "t_inf image buffer");
79   t_infinity_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
80 
81   ray_origins_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins_buff_, "ray_origins buffer");
82   ray_origins_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
83 
84   ray_directions_image_= opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions_buff_, "ray_directions buffer");
85   ray_directions_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
86 
87   img_dim_ = opencl_cache_->alloc_mem(sizeof(cl_int)*4, img_dim_buff_, "image dims");
88   img_dim_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
89 
90   tnearfar_ = opencl_cache_->alloc_mem(2*sizeof(cl_float), tnearfar_buff_, "tnearfar  buffer");
91   tnearfar_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
92 
93   cl_output_ = opencl_cache_->alloc_mem(100*sizeof(cl_float), output_buff_, "output buffer");
94   cl_output_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
95 
96   boxm2_ocl_util::set_bit_lookup( lookup_buff_ );
97   lookup_ = opencl_cache_->alloc_mem(256*sizeof(cl_uchar), lookup_buff_, "lookup buffer");
98   lookup_->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
99 
100   cl_subblk_dim_ = opencl_cache_->alloc_mem(sizeof(cl_float), &subblk_dim_, "subblock dim");
101   cl_subblk_dim_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
102 
103   buffers_allocated_ = true;
104   prev_ni_ = cl_ni;
105   prev_nj_ = cl_nj;
106   return true;
107 }
108 
109 bool
110 boxm2_ocl_depth_renderer
cleanup_render_buffers()111 ::cleanup_render_buffers()
112 {
113   if(!buffers_allocated_) {
114     return false;
115   }
116 
117   delete[] depth_buff_;
118   delete[] vis_buff_;
119   delete[] prob_buff_;
120   delete[] var_buff_;
121   delete[] t_infinity_buff_;
122   delete[] ray_origins_buff_;
123   delete[] ray_directions_buff_;
124 
125   opencl_cache_->unref_mem(depth_image_.ptr());
126   depth_image_ = bocl_mem_sptr(nullptr);
127   opencl_cache_->unref_mem(vis_image_.ptr());
128   vis_image_ = bocl_mem_sptr(nullptr);
129   opencl_cache_->unref_mem(prob_image_.ptr());
130   prob_image_ = bocl_mem_sptr(nullptr);
131   opencl_cache_->unref_mem(var_image_.ptr());
132   var_image_ = bocl_mem_sptr(nullptr);
133   opencl_cache_->unref_mem(t_infinity_image_.ptr());
134   t_infinity_image_ = bocl_mem_sptr(nullptr);
135   opencl_cache_->unref_mem(ray_origins_image_.ptr());
136   ray_origins_image_ = bocl_mem_sptr(nullptr);
137   opencl_cache_->unref_mem(ray_directions_image_.ptr());
138   ray_directions_image_ = bocl_mem_sptr(nullptr);
139 
140   opencl_cache_->unref_mem(tnearfar_.ptr());
141   tnearfar_ = bocl_mem_sptr(nullptr);
142   opencl_cache_->unref_mem(img_dim_.ptr());
143   img_dim_ = bocl_mem_sptr(nullptr);
144   opencl_cache_->unref_mem(cl_output_.ptr());
145   cl_output_ = bocl_mem_sptr(nullptr);
146   opencl_cache_->unref_mem(cl_subblk_dim_.ptr());
147   cl_subblk_dim_ = bocl_mem_sptr(nullptr);
148   opencl_cache_->unref_mem(lookup_.ptr());
149   lookup_ = bocl_mem_sptr(nullptr);
150 
151   buffers_allocated_ = false;
152   return true;
153 }
154 
155 boxm2_ocl_depth_renderer
~boxm2_ocl_depth_renderer()156 ::~boxm2_ocl_depth_renderer()
157 {
158   cleanup_render_buffers();
159 }
160 
161 bool
162 boxm2_ocl_depth_renderer
get_last_rendered(vil_image_view<float> & img)163 ::get_last_rendered(vil_image_view<float> &img)
164 {
165   if (render_success_) {
166     img.deep_copy(depth_img_);
167     return true;
168   }
169   return false;
170 }
171 
172 bool
173 boxm2_ocl_depth_renderer
get_last_vis(vil_image_view<float> & vis_img)174 ::get_last_vis(vil_image_view<float> &vis_img)
175 {
176   if (render_success_) {
177     vis_img.deep_copy( vis_img_ );
178     return true;
179   }
180   return false;
181 }
182 
183 bool
184 boxm2_ocl_depth_renderer
render(vpgl_camera_double_sptr camera,unsigned ni,unsigned nj,float,float)185 ::render(vpgl_camera_double_sptr camera, unsigned ni, unsigned nj, float  /*nearfactor*/, float  /*farfactor*/)
186 {
187   render_success_ = false;
188 
189   vul_timer rtime;
190 
191   std::size_t local_threads[2]={8,8};
192 
193   //: create a command queue.
194   int status=0;
195   cl_command_queue queue = clCreateCommandQueue(device_->context(),*(device_->device_id()),
196                                                 CL_QUEUE_PROFILING_ENABLE,&status);
197 
198   bool good_queue = check_val(status, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Failed to create command queue: " + error_to_string(status));
199   if(!good_queue) {
200     return false;
201   }
202 
203   unsigned cl_ni=RoundUp(ni,local_threads[0]);
204   unsigned cl_nj=RoundUp(nj,local_threads[1]);
205   std::size_t global_threads[] = {cl_ni,cl_nj};
206 
207   allocate_render_buffers(cl_ni, cl_nj);
208 
209   // intialize the render image planes
210   std::fill(depth_buff_, depth_buff_ + cl_ni*cl_nj, 0.0f);
211   std::fill(vis_buff_, vis_buff_ + cl_ni*cl_nj, 1.0f);
212   std::fill(var_buff_, var_buff_ + cl_ni*cl_nj, 0.0f);
213   std::fill(prob_buff_, prob_buff_ + cl_ni*cl_nj, 0.0f);
214   std::fill(t_infinity_buff_, t_infinity_buff_ + cl_ni*cl_nj, 0.0f);
215 
216   img_dim_buff_[0] = 0;
217   img_dim_buff_[1] = 0;
218   img_dim_buff_[2] = ni;
219   img_dim_buff_[3] = nj;
220 
221   depth_image_->write_to_buffer(queue);
222   vis_image_->write_to_buffer(queue);
223   var_image_->write_to_buffer(queue);
224   prob_image_->write_to_buffer(queue);
225   t_infinity_image_->write_to_buffer(queue);
226   img_dim_->write_to_buffer(queue);
227 
228   // assumes that the camera may be changing between calls
229   boxm2_ocl_camera_converter::compute_ray_image( device_, queue, camera, cl_ni, cl_nj, ray_origins_image_, ray_directions_image_, 0, 0, false);
230 
231   int statusw = clFinish(queue);
232   bool good_write = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Initial write to GPU failed: " + error_to_string(statusw));
233   if(!good_write) {
234     return false;
235   }
236 
237   std::vector<boxm2_block_id> vis_order;
238   if(camera->type_name() == "vpgl_perspective_camera") {
239       vis_order = scene_->get_vis_blocks_opt(dynamic_cast<vpgl_perspective_camera<double>*>(camera.ptr()),ni,nj);
240   }
241   else {
242       vis_order = scene_->get_vis_blocks(camera);
243   }
244 
245   subblk_dim_ = 0.0f; // in case there are no visible blocks;
246 
247   for (auto & id : vis_order) {
248 
249     boxm2_block_metadata mdata = scene_->get_block_metadata(id);
250 
251     bocl_mem* blk       = opencl_cache_->get_block(scene_,id);
252     bocl_mem* alpha     = opencl_cache_->get_data<BOXM2_ALPHA>(scene_,id);
253     bocl_mem* blk_info  = opencl_cache_->loaded_block_info();
254     subblk_dim_         = mdata.sub_block_dim_.x(); // assume this is not changing per block
255 
256     depth_kern_.set_arg( blk_info );
257     depth_kern_.set_arg( blk );
258     depth_kern_.set_arg( alpha );
259     depth_kern_.set_arg( ray_origins_image_.ptr() );
260     depth_kern_.set_arg( ray_directions_image_.ptr() );
261     depth_kern_.set_arg( depth_image_.ptr() );
262     depth_kern_.set_arg( var_image_.ptr() );
263     depth_kern_.set_arg( img_dim_.ptr() );
264     depth_kern_.set_arg( cl_output_.ptr() );
265     depth_kern_.set_arg( lookup_.ptr() );
266     depth_kern_.set_arg( vis_image_.ptr() );
267     depth_kern_.set_arg( prob_image_.ptr() );
268     depth_kern_.set_arg( t_infinity_image_.ptr() );
269 
270     //local tree , cumsum buffer, imindex buffer
271     depth_kern_.set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );
272     depth_kern_.set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) );
273     depth_kern_.set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );
274 
275     //execute kernel
276     depth_kern_.execute(queue, 2, local_threads, global_threads);
277     statusw = clFinish(queue);
278     bool good_run = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Execution of depth kernel failed: " + error_to_string(statusw));
279     if (!good_run) {
280       return false;
281     }
282     depth_kern_.clear_args();
283   }
284 
285   cl_subblk_dim_->write_to_buffer(queue, true);
286 
287   depth_norm_kern_.set_arg( depth_image_.ptr() );
288   depth_norm_kern_.set_arg( var_image_.ptr() );
289   depth_norm_kern_.set_arg( vis_image_.ptr() );
290   depth_norm_kern_.set_arg( img_dim_.ptr());
291   depth_norm_kern_.set_arg( t_infinity_image_.ptr());
292   depth_norm_kern_.set_arg( cl_subblk_dim_.ptr() );
293   depth_norm_kern_.execute( queue, 2, local_threads, global_threads);
294   statusw = clFinish(queue);
295 
296   bool good_norm = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Execution of depth norm kernel failed: " + error_to_string(statusw));
297   if (!good_norm) {
298     return false;
299   }
300 
301   depth_image_->read_to_buffer(queue);
302   var_image_->read_to_buffer(queue);
303   vis_image_->read_to_buffer(queue);
304   statusw = clFinish(queue);
305   bool good_read = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Read of depth buffers failed: " + error_to_string(statusw));
306   if (!good_read) {
307     return false;
308   }
309   depth_norm_kern_.clear_args();
310 
311   depth_img_.set_size(ni, nj);
312   vis_img_.set_size(ni, nj);
313 
314   for (unsigned r=0;r<nj;r++) {
315     for (unsigned c=0;c<ni;c++) {
316       const unsigned offset = r*cl_ni + c;
317       depth_img_(c,r)=depth_buff_[offset];
318       vis_img_(c,r)=vis_buff_[offset];
319     }
320   }
321 
322   std::cout<<"Total Render time: "<<rtime.all()<<" ms"<<std::endl;
323   clReleaseCommandQueue(queue);
324 
325   render_success_ = true;
326   return true;
327 }
328 
329 bool
330 boxm2_ocl_depth_renderer
compile_kernels(const bocl_device_sptr &)331 ::compile_kernels(const bocl_device_sptr&  /*device*/)
332 {
333   {
334     std::vector<std::string> src_paths;
335     std::string source_dir = boxm2_ocl_util::ocl_src_root();
336     src_paths.push_back(source_dir + "scene_info.cl");
337     src_paths.push_back(source_dir + "pixel_conversion.cl");
338     src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
339     src_paths.push_back(source_dir + "backproject.cl");
340     src_paths.push_back(source_dir + "statistics_library_functions.cl");
341     src_paths.push_back(source_dir + "expected_functor.cl");
342     src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
343     src_paths.push_back(source_dir + "bit/render_bit_scene.cl");
344     src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
345 
346     //set kernel options
347     std::string options = " -D RENDER_DEPTH ";
348     options +=  "-D DETERMINISTIC";
349     options += " -D STEP_CELL=step_cell_render_depth2(tblock,linfo->block_len,aux_args.alpha,data_ptr,d*linfo->block_len,aux_args.vis,aux_args.expdepth,aux_args.expdepthsqr,aux_args.probsum,aux_args.t)";
350 
351     //have kernel construct itself using the context and device
352     bool good = depth_kern_.create_kernel( &device_->context(),
353                                            device_->device_id(),
354                                            src_paths,
355                                            "render_depth",   //kernel name
356                                            options,              //options
357                                            "boxm2 opencl render depth image"); //kernel identifier (for error checking)
358     if (!good) {
359       std::cerr << "ERROR: boxm2_ocl_depth_renderer: error compiling depth kernel" << std::endl;
360       return false;
361     }
362   }
363   {
364     std::vector<std::string> norm_src_paths;
365     std::string source_dir = boxm2_ocl_util::ocl_src_root();
366     norm_src_paths.push_back(source_dir + "scene_info.cl");
367 
368     norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
369     norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
370 
371     std::string options = " -D RENDER_DEPTH ";
372 
373     bool good = depth_norm_kern_.create_kernel( &device_->context(),
374                                                 device_->device_id(),
375                                                 norm_src_paths,
376                                                 "normalize_render_depth_kernel",   //kernel name
377                                                 options,              //options
378                                                 "normalize render depth kernel"); //kernel identifier (for error checking)
379 
380     if (!good) {
381       std::cerr << "ERROR: boxm2_ocl_depth_renderer: error compiling depth normalization kernel" << std::endl;
382       return false;
383     }
384   }
385   return true;
386 }
387