1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_render_expected_height_map_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief A process for rendering height map of a scene.
8 //
9 // \author Vishal Jain
10 // \date Mar 30, 2011
11
12 #ifdef _MSC_VER
13 # include "vcl_msvc_warnings.h"
14 #endif
15 #include <boxm2/ocl/boxm2_opencl_cache.h>
16 #include <boxm2/boxm2_scene.h>
17 #include <boxm2/boxm2_block.h>
18 #include <boxm2/boxm2_data_base.h>
19 #include <boxm2/ocl/boxm2_ocl_util.h>
20 #include "vil/vil_image_view.h"
21 //brdb stuff
22 #include <brdb/brdb_value.h>
23
24 //directory utility
25 #include "vul/vul_timer.h"
26 #include <vcl_where_root_dir.h>
27 #include <bocl/bocl_device.h>
28 #include <bocl/bocl_kernel.h>
29
30 namespace boxm2_ocl_render_expected_height_map_process_globals
31 {
32 constexpr unsigned n_inputs_ = 3;
33 constexpr unsigned n_outputs_ = 5;
34 std::size_t local_threads[2]={8,8};
35 static std::map<std::string, std::vector<bocl_kernel*> > kernels_;
get_kernels(const bocl_device_sptr & device,const std::string & opts)36 std::vector<bocl_kernel*>& get_kernels(const bocl_device_sptr& device, const std::string& opts)
37 {
38 std::string identifier = device->device_identifier() + opts;
39 if (kernels_.find(identifier) != kernels_.end())
40 return kernels_[identifier];
41 std::vector<bocl_kernel*> vec_kernels;
42 //gather all render sources... seems like a lot for rendering...
43 std::vector<std::string> src_paths;
44 std::string source_dir = boxm2_ocl_util::ocl_src_root();
45 src_paths.push_back(source_dir + "scene_info.cl");
46 src_paths.push_back(source_dir + "pixel_conversion.cl");
47 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
48 src_paths.push_back(source_dir + "backproject.cl");
49 src_paths.push_back(source_dir + "statistics_library_functions.cl");
50 src_paths.push_back(source_dir + "expected_functor.cl");
51 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
52 src_paths.push_back(source_dir + "bit/render_bit_scene.cl");
53 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
54
55 //set kernel options
56 std::string options = "-D RENDER_DEPTH -D DETERMINISTIC -D STEP_CELL=step_cell_render_height(tblock,linfo->block_len,aux_args.alpha,data_ptr,d*linfo->block_len,aux_args.vis,aux_args.expdepth,aux_args.expdepthsqr,aux_args.probsum,aux_args.t)";
57 //create normalize image kernel
58 std::vector<std::string> norm_src_paths;
59 norm_src_paths.push_back(source_dir + "scene_info.cl");
60
61 norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
62 norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
63 auto * normalize_render_kernel = new bocl_kernel();
64
65 normalize_render_kernel->create_kernel(&device->context(),
66 device->device_id(),
67 norm_src_paths,
68 "normalize_render_depth_kernel", //kernel name
69 options, //options
70 "normalize render depth kernel"); //kernel identifier (for error checking)
71 //have kernel construct itself using the context and device
72 auto * ray_trace_kernel = new bocl_kernel();
73
74 ray_trace_kernel->create_kernel(&device->context(),
75 device->device_id(),
76 src_paths,
77 "render_depth", //kernel name
78 options, //options
79 "boxm2 opencl render depth image"); //kernel identifier (for error checking)
80 vec_kernels.push_back(ray_trace_kernel);
81
82
83
84
85 vec_kernels.push_back(normalize_render_kernel);
86 kernels_[identifier] = vec_kernels;
87
88 return kernels_[identifier];
89 }
90
91 }
92
boxm2_ocl_render_expected_height_map_process_cons(bprb_func_process & pro)93 bool boxm2_ocl_render_expected_height_map_process_cons(bprb_func_process& pro)
94 {
95 using namespace boxm2_ocl_render_expected_height_map_process_globals;
96
97 //process takes 1 input
98 std::vector<std::string> input_types_(n_inputs_);
99 input_types_[0] = "bocl_device_sptr";
100 input_types_[1] = "boxm2_scene_sptr";
101 input_types_[2] = "boxm2_opencl_cache_sptr";
102
103 // process has 1 output:
104 // output[0]: scene sptr
105 std::vector<std::string> output_types_(n_outputs_);
106 output_types_[0] = "vil_image_view_base_sptr";
107 output_types_[1] = "vil_image_view_base_sptr";
108 output_types_[2] = "vil_image_view_base_sptr";
109 output_types_[3] = "vil_image_view_base_sptr";
110 output_types_[4] = "vil_image_view_base_sptr";
111 //output_types_[5] = "vil_image_view_base_sptr";
112
113 return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
114 }
115
boxm2_ocl_render_expected_height_map_process(bprb_func_process & pro)116 bool boxm2_ocl_render_expected_height_map_process(bprb_func_process& pro)
117 {
118 using namespace boxm2_ocl_render_expected_height_map_process_globals;
119
120
121 if ( pro.n_inputs() < n_inputs_ ) {
122 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
123 return false;
124 }
125 float transfer_time=0.0f;
126 float gpu_time=0.0f;
127 //get the inputs
128 unsigned i = 0;
129 bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
130 boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
131 boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
132 vgl_box_3d<double> bbox=scene->bounding_box();
133
134
135 //get x and y size from scene
136 std::vector<boxm2_block_id> vis_order = scene->get_block_ids();
137 std::vector<boxm2_block_id>::iterator id;
138 float xint=0.0f;
139 float yint=0.0f;
140 for (id = vis_order.begin(); id != vis_order.end(); ++id)
141 {
142 boxm2_block_metadata mdata=scene->get_block_metadata(*id);
143 float num_octree_cells=std::pow(2.0f,(float)mdata.max_level_-1);
144 xint=mdata.sub_block_dim_.x()/num_octree_cells;
145 yint=mdata.sub_block_dim_.y()/num_octree_cells;
146 }
147 auto ni=(unsigned int)std::ceil(bbox.width()/xint);
148 auto nj=(unsigned int)std::ceil(bbox.height()/yint);
149 std::cout<<"Size of the image "<<ni<<','<<nj<<std::endl;
150 float z= bbox.max_z();
151 std::string identifier=device->device_identifier();
152
153 //: create a command queue.
154 int status=0;
155 cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
156 CL_QUEUE_PROFILING_ENABLE,&status);
157 if (status!=0)return false;
158
159 std::vector<bocl_kernel*>& kernels = get_kernels(device, "");
160
161 float scene_origin[4];
162 scene_origin[0]=bbox.min_x();
163 scene_origin[1]=bbox.min_y();
164 scene_origin[2]=bbox.min_z();
165 scene_origin[3]=1.0;
166 unsigned cl_ni=RoundUp(ni,local_threads[0]);
167 unsigned cl_nj=RoundUp(nj,local_threads[1]);
168
169 auto* ray_origins = new cl_float[4 * cl_ni*cl_nj];
170 auto* ray_directions = new cl_float[4 * cl_ni*cl_nj];
171
172 float ray_dx = 0, ray_dy = 0, ray_dz = -1;
173 // initialize ray origin buffer, first and last return buffers
174 int count = 0;
175 for (unsigned int j = 0; j<cl_nj; ++j) {
176 for (unsigned int i = 0; i<cl_ni; ++i) {
177
178 int count4 = count * 4;
179 ray_origins[count4 + 0] = scene_origin[0] + ((float)i + 0.15f)*(xint);
180 ray_origins[count4 + 1] = scene_origin[1] + ((float)j + 0.15f)*(yint);
181
182 ray_origins[count4 + 2] = z + 1.0f;
183 ray_origins[count4 + 3] = 0.0;
184 ray_directions[count4 + 0] = 0.0;
185 ray_directions[count4 + 1] = 0.0;
186 ray_directions[count4 + 2] = -1.0;
187 ray_directions[count4 + 3] = 0.0;
188
189 ++count;
190 }
191 }
192 bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj * sizeof(cl_float4), ray_origins, "ray_origins buffer");
193 bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj * sizeof(cl_float4), ray_directions, "ray_directions buffer");
194
195 ray_o_buff->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
196 ray_d_buff->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
197
198 auto* buff = new float[cl_ni*cl_nj];
199 for (unsigned i = 0; i<cl_ni*cl_nj; i++) buff[i] = 0.0f;
200 auto* var_buff = new float[cl_ni*cl_nj];
201 for (unsigned i = 0; i<cl_ni*cl_nj; i++) var_buff[i] = 0.0f;
202 auto* vis_buff = new float[cl_ni*cl_nj];
203 for (unsigned i = 0; i<cl_ni*cl_nj; i++) vis_buff[i] = 1.0f;
204 auto* prob_buff = new float[cl_ni*cl_nj];
205 for (unsigned i = 0; i<cl_ni*cl_nj; i++) prob_buff[i] = 0.0f;
206 auto* t_infinity_buff = new float[cl_ni*cl_nj];
207 for (unsigned i = 0; i<cl_ni*cl_nj; i++) t_infinity_buff[i] = 0.0f;
208
209 bocl_mem_sptr exp_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), buff, "exp image buffer");
210 exp_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
211
212 bocl_mem_sptr var_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), var_buff, "var image buffer");
213 var_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
214
215 bocl_mem_sptr vis_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), vis_buff, "vis image buffer");
216 vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
217
218 bocl_mem_sptr prob_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), prob_buff, "vis x omega image buffer");
219 prob_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
220
221 bocl_mem_sptr t_infinity = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), t_infinity_buff, "t infinity buffer");
222 t_infinity->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
223
224 // Image Dimensions
225 int img_dim_buff[4];
226 img_dim_buff[0] = 0;
227 img_dim_buff[1] = 0;
228 img_dim_buff[2] = ni;
229 img_dim_buff[3] = nj;
230 bocl_mem_sptr exp_img_dim = new bocl_mem(device->context(), img_dim_buff, sizeof(int) * 4, "image dims");
231 exp_img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
232
233 // Output Array
234 float output_arr[100];
235 for (float & i : output_arr) i = 0.0f;
236 bocl_mem_sptr cl_output = new bocl_mem(device->context(), output_arr, sizeof(float) * 100, "output buffer");
237 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
238
239 // bit lookup buffer
240 cl_uchar lookup_arr[256];
241 boxm2_ocl_util::set_bit_lookup(lookup_arr);
242 bocl_mem_sptr lookup = new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar) * 256, "bit lookup buffer");
243 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
244
245 //2. set workgroup size
246 std::size_t lThreads[] = {8, 8};
247 std::size_t gThreads[] = {cl_ni,cl_nj};
248 float subblk_dim = 0.0;
249 for (id = vis_order.begin(); id != vis_order.end(); ++id)
250 {
251 //choose correct render kernel
252 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
253 bocl_kernel* kern = kernels[0];
254
255 //write the image values to the buffer
256 vul_timer transfer;
257 bocl_mem* blk = opencl_cache->get_block(scene, *id);
258 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene, *id);
259 bocl_mem * blk_info = opencl_cache->loaded_block_info();
260 transfer_time += (float)transfer.all();
261 subblk_dim = mdata.sub_block_dim_.x();
262 ////3. SET args
263 kern->set_arg(blk_info);
264 kern->set_arg(blk);
265 kern->set_arg(alpha);
266 kern->set_arg(ray_o_buff.ptr());
267 kern->set_arg(ray_d_buff.ptr());
268 kern->set_arg(exp_image.ptr());
269 kern->set_arg(var_image.ptr());
270 kern->set_arg(exp_img_dim.ptr());
271 kern->set_arg(cl_output.ptr());
272 kern->set_arg(lookup.ptr());
273 kern->set_arg(vis_image.ptr());
274 kern->set_arg(prob_image.ptr());
275 kern->set_arg(t_infinity.ptr());
276 kern->set_local_arg(local_threads[0] * local_threads[1] * sizeof(cl_uchar16));
277 kern->set_local_arg(local_threads[0] * local_threads[1] * 10 * sizeof(cl_uchar));
278 kern->set_local_arg(local_threads[0] * local_threads[1] * sizeof(cl_int));
279
280 //execute kernel
281 kern->execute(queue, 2, lThreads, gThreads);
282 clFinish(queue);
283 gpu_time += kern->exec_time();
284 cl_output->read_to_buffer(queue);
285
286 // clear render kernel args so it can reset em on next execution
287 kern->clear_args();
288 }
289
290 bocl_mem_sptr subblk_dim_mem = new bocl_mem(device->context(), &(subblk_dim), sizeof(float), "sub block dim buffer");
291 subblk_dim_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
292 // normalize
293 {
294 bocl_kernel* normalize_kern = kernels[1];
295 normalize_kern->set_arg(exp_image.ptr());
296 normalize_kern->set_arg(var_image.ptr());
297 normalize_kern->set_arg(vis_image.ptr());
298 normalize_kern->set_arg(exp_img_dim.ptr());
299 normalize_kern->set_arg(t_infinity.ptr());
300 normalize_kern->set_arg(subblk_dim_mem.ptr());
301 normalize_kern->execute(queue, 2, local_threads, gThreads);
302 clFinish(queue);
303 gpu_time += normalize_kern->exec_time();
304 //clear render kernel args so it can reset em on next execution
305 normalize_kern->clear_args();
306 exp_image->read_to_buffer(queue);
307 var_image->read_to_buffer(queue);
308 vis_image->read_to_buffer(queue);
309 }
310 clReleaseCommandQueue(queue);
311
312 i=0;
313 auto* exp_img_out=new vil_image_view<float>(ni,nj);
314 auto* exp_var_out=new vil_image_view<float>(ni,nj);
315 auto* xcoord_img=new vil_image_view<float>(ni,nj);
316 auto* ycoord_img=new vil_image_view<float>(ni,nj);
317 auto* prob_img=new vil_image_view<float>(ni,nj);
318
319 for (unsigned c=0;c<nj;++c)
320 for (unsigned r=0;r<ni;++r)
321 {
322 (*exp_img_out)(r,c) = /*z-*/buff[c*cl_ni+r];
323 (*exp_var_out)(r,c) = var_buff[c*cl_ni+r];
324 (*xcoord_img)(r, c) = ray_origins[(c*cl_ni + r) * 4 + 0];
325 (*ycoord_img)(r, c) = ray_origins[(c*cl_ni + r) * 4 + 1];
326 (*prob_img)(r,c) = prob_buff[c*cl_ni+r];
327 }
328 // store scene smaprt pointer
329 pro.set_output_val<vil_image_view_base_sptr>(i++, exp_img_out);
330 pro.set_output_val<vil_image_view_base_sptr>(i++, exp_var_out);
331 pro.set_output_val<vil_image_view_base_sptr>(i++, xcoord_img);
332 pro.set_output_val<vil_image_view_base_sptr>(i++, ycoord_img);
333 pro.set_output_val<vil_image_view_base_sptr>(i++, prob_img);
334 delete[] buff;
335 delete[] var_buff;
336 delete[] vis_buff;
337 delete[] prob_buff;
338 delete[] t_infinity_buff;
339 //delete[] app_buff;
340 opencl_cache->unref_mem(exp_image.ptr());
341 opencl_cache->unref_mem(var_image.ptr());
342 opencl_cache->unref_mem(vis_image.ptr());
343 opencl_cache->unref_mem(prob_image.ptr());
344 opencl_cache->unref_mem(t_infinity.ptr());
345 opencl_cache->unref_mem(ray_o_buff.ptr());
346 opencl_cache->unref_mem(ray_d_buff.ptr());
347 return true;
348 }
349