1 // This is brl/bseg/boxm2/ocl/algo/boxm2_ocl_depth_renderer.cxx
2 //:
3 // \file
4 // \brief Render and expected image of a boxm2 model
5 //
6 // \author Daniel Crispell, adapted from process version
7 // \date 4 Nov 2014
8 #include <iostream>
9 #include <algorithm>
10 #include <stdexcept>
11 #include "boxm2_ocl_depth_renderer.h"
12
13 #ifdef _MSC_VER
14 # include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include "vil/vil_image_view.h"
22 //brdb stuff
23
24 //directory utility
25 #include <vcl_where_root_dir.h>
26 #include <bocl/bocl_device.h>
27 #include <bocl/bocl_kernel.h>
28 #include "boxm2_ocl_camera_converter.h"
29 #include "boxm2_ocl_render_expected_image_function.h"
30 #include "vul/vul_timer.h"
31
32 boxm2_ocl_depth_renderer
boxm2_ocl_depth_renderer(const boxm2_scene_sptr & scene,const boxm2_opencl_cache_sptr & ocl_cache,const std::string &)33 ::boxm2_ocl_depth_renderer(const boxm2_scene_sptr& scene,
34 const boxm2_opencl_cache_sptr& ocl_cache,
35 const std::string& /*ident*/) :
36 scene_(scene),
37 opencl_cache_(ocl_cache),
38 buffers_allocated_(false),
39 render_success_(false)
40 {
41 device_ = ocl_cache->get_device();
42 compile_kernels(device_);
43 }
44
45 bool
46 boxm2_ocl_depth_renderer
allocate_render_buffers(int cl_ni,int cl_nj)47 ::allocate_render_buffers(int cl_ni, int cl_nj)
48 {
49 if ( buffers_allocated_ && (prev_ni_ == cl_ni) && (prev_nj_ == cl_nj) ) {
50 // can reuse old buffers
51 return true;
52 }
53 // else we need to allocate new buffers
54 if (buffers_allocated_) {
55 cleanup_render_buffers();
56 }
57 depth_buff_ = new float[cl_ni*cl_nj];
58 vis_buff_ = new float[cl_ni*cl_nj];
59 prob_buff_ = new float[cl_ni*cl_nj];
60 var_buff_ = new float[cl_ni*cl_nj];
61 t_infinity_buff_ = new float[cl_ni*cl_nj];
62
63 ray_origins_buff_ = new cl_float[4*cl_ni*cl_nj];
64 ray_directions_buff_ = new cl_float[4*cl_ni*cl_nj];
65
66 depth_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), depth_buff_,"depth image buffer");
67 depth_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
68
69 vis_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), vis_buff_,"vis image buffer");
70 vis_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
71
72 prob_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), prob_buff_,"vis image buffer");
73 prob_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
74
75 var_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), var_buff_, "var image buffer");
76 var_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
77
78 t_infinity_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float), t_infinity_buff_, "t_inf image buffer");
79 t_infinity_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
80
81 ray_origins_image_ = opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins_buff_, "ray_origins buffer");
82 ray_origins_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
83
84 ray_directions_image_= opencl_cache_->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions_buff_, "ray_directions buffer");
85 ray_directions_image_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
86
87 img_dim_ = opencl_cache_->alloc_mem(sizeof(cl_int)*4, img_dim_buff_, "image dims");
88 img_dim_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
89
90 tnearfar_ = opencl_cache_->alloc_mem(2*sizeof(cl_float), tnearfar_buff_, "tnearfar buffer");
91 tnearfar_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
92
93 cl_output_ = opencl_cache_->alloc_mem(100*sizeof(cl_float), output_buff_, "output buffer");
94 cl_output_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
95
96 boxm2_ocl_util::set_bit_lookup( lookup_buff_ );
97 lookup_ = opencl_cache_->alloc_mem(256*sizeof(cl_uchar), lookup_buff_, "lookup buffer");
98 lookup_->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
99
100 cl_subblk_dim_ = opencl_cache_->alloc_mem(sizeof(cl_float), &subblk_dim_, "subblock dim");
101 cl_subblk_dim_->create_buffer(CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR);
102
103 buffers_allocated_ = true;
104 prev_ni_ = cl_ni;
105 prev_nj_ = cl_nj;
106 return true;
107 }
108
109 bool
110 boxm2_ocl_depth_renderer
cleanup_render_buffers()111 ::cleanup_render_buffers()
112 {
113 if(!buffers_allocated_) {
114 return false;
115 }
116
117 delete[] depth_buff_;
118 delete[] vis_buff_;
119 delete[] prob_buff_;
120 delete[] var_buff_;
121 delete[] t_infinity_buff_;
122 delete[] ray_origins_buff_;
123 delete[] ray_directions_buff_;
124
125 opencl_cache_->unref_mem(depth_image_.ptr());
126 depth_image_ = bocl_mem_sptr(nullptr);
127 opencl_cache_->unref_mem(vis_image_.ptr());
128 vis_image_ = bocl_mem_sptr(nullptr);
129 opencl_cache_->unref_mem(prob_image_.ptr());
130 prob_image_ = bocl_mem_sptr(nullptr);
131 opencl_cache_->unref_mem(var_image_.ptr());
132 var_image_ = bocl_mem_sptr(nullptr);
133 opencl_cache_->unref_mem(t_infinity_image_.ptr());
134 t_infinity_image_ = bocl_mem_sptr(nullptr);
135 opencl_cache_->unref_mem(ray_origins_image_.ptr());
136 ray_origins_image_ = bocl_mem_sptr(nullptr);
137 opencl_cache_->unref_mem(ray_directions_image_.ptr());
138 ray_directions_image_ = bocl_mem_sptr(nullptr);
139
140 opencl_cache_->unref_mem(tnearfar_.ptr());
141 tnearfar_ = bocl_mem_sptr(nullptr);
142 opencl_cache_->unref_mem(img_dim_.ptr());
143 img_dim_ = bocl_mem_sptr(nullptr);
144 opencl_cache_->unref_mem(cl_output_.ptr());
145 cl_output_ = bocl_mem_sptr(nullptr);
146 opencl_cache_->unref_mem(cl_subblk_dim_.ptr());
147 cl_subblk_dim_ = bocl_mem_sptr(nullptr);
148 opencl_cache_->unref_mem(lookup_.ptr());
149 lookup_ = bocl_mem_sptr(nullptr);
150
151 buffers_allocated_ = false;
152 return true;
153 }
154
155 boxm2_ocl_depth_renderer
~boxm2_ocl_depth_renderer()156 ::~boxm2_ocl_depth_renderer()
157 {
158 cleanup_render_buffers();
159 }
160
161 bool
162 boxm2_ocl_depth_renderer
get_last_rendered(vil_image_view<float> & img)163 ::get_last_rendered(vil_image_view<float> &img)
164 {
165 if (render_success_) {
166 img.deep_copy(depth_img_);
167 return true;
168 }
169 return false;
170 }
171
172 bool
173 boxm2_ocl_depth_renderer
get_last_vis(vil_image_view<float> & vis_img)174 ::get_last_vis(vil_image_view<float> &vis_img)
175 {
176 if (render_success_) {
177 vis_img.deep_copy( vis_img_ );
178 return true;
179 }
180 return false;
181 }
182
183 bool
184 boxm2_ocl_depth_renderer
render(vpgl_camera_double_sptr camera,unsigned ni,unsigned nj,float,float)185 ::render(vpgl_camera_double_sptr camera, unsigned ni, unsigned nj, float /*nearfactor*/, float /*farfactor*/)
186 {
187 render_success_ = false;
188
189 vul_timer rtime;
190
191 std::size_t local_threads[2]={8,8};
192
193 //: create a command queue.
194 int status=0;
195 cl_command_queue queue = clCreateCommandQueue(device_->context(),*(device_->device_id()),
196 CL_QUEUE_PROFILING_ENABLE,&status);
197
198 bool good_queue = check_val(status, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Failed to create command queue: " + error_to_string(status));
199 if(!good_queue) {
200 return false;
201 }
202
203 unsigned cl_ni=RoundUp(ni,local_threads[0]);
204 unsigned cl_nj=RoundUp(nj,local_threads[1]);
205 std::size_t global_threads[] = {cl_ni,cl_nj};
206
207 allocate_render_buffers(cl_ni, cl_nj);
208
209 // intialize the render image planes
210 std::fill(depth_buff_, depth_buff_ + cl_ni*cl_nj, 0.0f);
211 std::fill(vis_buff_, vis_buff_ + cl_ni*cl_nj, 1.0f);
212 std::fill(var_buff_, var_buff_ + cl_ni*cl_nj, 0.0f);
213 std::fill(prob_buff_, prob_buff_ + cl_ni*cl_nj, 0.0f);
214 std::fill(t_infinity_buff_, t_infinity_buff_ + cl_ni*cl_nj, 0.0f);
215
216 img_dim_buff_[0] = 0;
217 img_dim_buff_[1] = 0;
218 img_dim_buff_[2] = ni;
219 img_dim_buff_[3] = nj;
220
221 depth_image_->write_to_buffer(queue);
222 vis_image_->write_to_buffer(queue);
223 var_image_->write_to_buffer(queue);
224 prob_image_->write_to_buffer(queue);
225 t_infinity_image_->write_to_buffer(queue);
226 img_dim_->write_to_buffer(queue);
227
228 // assumes that the camera may be changing between calls
229 boxm2_ocl_camera_converter::compute_ray_image( device_, queue, camera, cl_ni, cl_nj, ray_origins_image_, ray_directions_image_, 0, 0, false);
230
231 int statusw = clFinish(queue);
232 bool good_write = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Initial write to GPU failed: " + error_to_string(statusw));
233 if(!good_write) {
234 return false;
235 }
236
237 std::vector<boxm2_block_id> vis_order;
238 if(camera->type_name() == "vpgl_perspective_camera") {
239 vis_order = scene_->get_vis_blocks_opt(dynamic_cast<vpgl_perspective_camera<double>*>(camera.ptr()),ni,nj);
240 }
241 else {
242 vis_order = scene_->get_vis_blocks(camera);
243 }
244
245 subblk_dim_ = 0.0f; // in case there are no visible blocks;
246
247 for (auto & id : vis_order) {
248
249 boxm2_block_metadata mdata = scene_->get_block_metadata(id);
250
251 bocl_mem* blk = opencl_cache_->get_block(scene_,id);
252 bocl_mem* alpha = opencl_cache_->get_data<BOXM2_ALPHA>(scene_,id);
253 bocl_mem* blk_info = opencl_cache_->loaded_block_info();
254 subblk_dim_ = mdata.sub_block_dim_.x(); // assume this is not changing per block
255
256 depth_kern_.set_arg( blk_info );
257 depth_kern_.set_arg( blk );
258 depth_kern_.set_arg( alpha );
259 depth_kern_.set_arg( ray_origins_image_.ptr() );
260 depth_kern_.set_arg( ray_directions_image_.ptr() );
261 depth_kern_.set_arg( depth_image_.ptr() );
262 depth_kern_.set_arg( var_image_.ptr() );
263 depth_kern_.set_arg( img_dim_.ptr() );
264 depth_kern_.set_arg( cl_output_.ptr() );
265 depth_kern_.set_arg( lookup_.ptr() );
266 depth_kern_.set_arg( vis_image_.ptr() );
267 depth_kern_.set_arg( prob_image_.ptr() );
268 depth_kern_.set_arg( t_infinity_image_.ptr() );
269
270 //local tree , cumsum buffer, imindex buffer
271 depth_kern_.set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_uchar16) );
272 depth_kern_.set_local_arg( local_threads[0]*local_threads[1]*10*sizeof(cl_uchar) );
273 depth_kern_.set_local_arg( local_threads[0]*local_threads[1]*sizeof(cl_int) );
274
275 //execute kernel
276 depth_kern_.execute(queue, 2, local_threads, global_threads);
277 statusw = clFinish(queue);
278 bool good_run = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Execution of depth kernel failed: " + error_to_string(statusw));
279 if (!good_run) {
280 return false;
281 }
282 depth_kern_.clear_args();
283 }
284
285 cl_subblk_dim_->write_to_buffer(queue, true);
286
287 depth_norm_kern_.set_arg( depth_image_.ptr() );
288 depth_norm_kern_.set_arg( var_image_.ptr() );
289 depth_norm_kern_.set_arg( vis_image_.ptr() );
290 depth_norm_kern_.set_arg( img_dim_.ptr());
291 depth_norm_kern_.set_arg( t_infinity_image_.ptr());
292 depth_norm_kern_.set_arg( cl_subblk_dim_.ptr() );
293 depth_norm_kern_.execute( queue, 2, local_threads, global_threads);
294 statusw = clFinish(queue);
295
296 bool good_norm = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Execution of depth norm kernel failed: " + error_to_string(statusw));
297 if (!good_norm) {
298 return false;
299 }
300
301 depth_image_->read_to_buffer(queue);
302 var_image_->read_to_buffer(queue);
303 vis_image_->read_to_buffer(queue);
304 statusw = clFinish(queue);
305 bool good_read = check_val(statusw, CL_SUCCESS, "ERROR: boxm2_ocl_depth_renderer: Read of depth buffers failed: " + error_to_string(statusw));
306 if (!good_read) {
307 return false;
308 }
309 depth_norm_kern_.clear_args();
310
311 depth_img_.set_size(ni, nj);
312 vis_img_.set_size(ni, nj);
313
314 for (unsigned r=0;r<nj;r++) {
315 for (unsigned c=0;c<ni;c++) {
316 const unsigned offset = r*cl_ni + c;
317 depth_img_(c,r)=depth_buff_[offset];
318 vis_img_(c,r)=vis_buff_[offset];
319 }
320 }
321
322 std::cout<<"Total Render time: "<<rtime.all()<<" ms"<<std::endl;
323 clReleaseCommandQueue(queue);
324
325 render_success_ = true;
326 return true;
327 }
328
329 bool
330 boxm2_ocl_depth_renderer
compile_kernels(const bocl_device_sptr &)331 ::compile_kernels(const bocl_device_sptr& /*device*/)
332 {
333 {
334 std::vector<std::string> src_paths;
335 std::string source_dir = boxm2_ocl_util::ocl_src_root();
336 src_paths.push_back(source_dir + "scene_info.cl");
337 src_paths.push_back(source_dir + "pixel_conversion.cl");
338 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
339 src_paths.push_back(source_dir + "backproject.cl");
340 src_paths.push_back(source_dir + "statistics_library_functions.cl");
341 src_paths.push_back(source_dir + "expected_functor.cl");
342 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
343 src_paths.push_back(source_dir + "bit/render_bit_scene.cl");
344 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
345
346 //set kernel options
347 std::string options = " -D RENDER_DEPTH ";
348 options += "-D DETERMINISTIC";
349 options += " -D STEP_CELL=step_cell_render_depth2(tblock,linfo->block_len,aux_args.alpha,data_ptr,d*linfo->block_len,aux_args.vis,aux_args.expdepth,aux_args.expdepthsqr,aux_args.probsum,aux_args.t)";
350
351 //have kernel construct itself using the context and device
352 bool good = depth_kern_.create_kernel( &device_->context(),
353 device_->device_id(),
354 src_paths,
355 "render_depth", //kernel name
356 options, //options
357 "boxm2 opencl render depth image"); //kernel identifier (for error checking)
358 if (!good) {
359 std::cerr << "ERROR: boxm2_ocl_depth_renderer: error compiling depth kernel" << std::endl;
360 return false;
361 }
362 }
363 {
364 std::vector<std::string> norm_src_paths;
365 std::string source_dir = boxm2_ocl_util::ocl_src_root();
366 norm_src_paths.push_back(source_dir + "scene_info.cl");
367
368 norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
369 norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
370
371 std::string options = " -D RENDER_DEPTH ";
372
373 bool good = depth_norm_kern_.create_kernel( &device_->context(),
374 device_->device_id(),
375 norm_src_paths,
376 "normalize_render_depth_kernel", //kernel name
377 options, //options
378 "normalize render depth kernel"); //kernel identifier (for error checking)
379
380 if (!good) {
381 std::cerr << "ERROR: boxm2_ocl_depth_renderer: error compiling depth normalization kernel" << std::endl;
382 return false;
383 }
384 }
385 return true;
386 }
387