1 // This is brl/bseg/bstm/ocl/pro/processes/bstm_ocl_get_surface_pt_process.cxx
2 //:
3 // \file
4 // \brief A process to help localize surface points in the 4d world. Given a camera and pixel location, the process shoots a ray
5 // and traverses the volume until it hits a voxel with a prob > prob_t. It returns the location of this voxel so it can be
6 // queried later on.
7 //
8 // \author Ali Osman Ulusoy
9 // \date Jan 30, 2013
10
11 #include <fstream>
12 #include <iostream>
13 #include <algorithm>
14 #include <bprb/bprb_func_process.h>
15
16 #ifdef _MSC_VER
17 # include "vcl_msvc_warnings.h"
18 #endif
19 #include <bstm/ocl/bstm_opencl_cache.h>
20 #include <bstm/bstm_scene.h>
21 #include <bstm/bstm_block.h>
22 #include <bstm/bstm_data_base.h>
23 #include <bstm/bstm_util.h>
24 #include <bstm/ocl/bstm_ocl_util.h>
25 //brdb stuff
26 #include <brdb/brdb_value.h>
27
28 //directory utility
29 #include <vcl_where_root_dir.h>
30 #include <bocl/bocl_device.h>
31
32 #include <bocl/bocl_kernel.h>
33 #include "vul/vul_timer.h"
34 #include <boxm2/ocl/algo/boxm2_ocl_camera_converter.h>
35 #include <vsph/vsph_camera_bounds.h>
36 #include "vgl/vgl_ray_3d.h"
37 #include <boct/boct_bit_tree.h>
38
39
40 namespace bstm_ocl_get_surface_pt_process_globals
41 {
42 constexpr unsigned n_inputs_ = 10;
43 constexpr unsigned n_outputs_ = 3;
44 std::size_t lthreads[2]={8,8};
45
46 static std::map<std::string,std::vector<bocl_kernel*> > kernels;
47
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,const std::string & opts)48 void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, const std::string& opts)
49 {
50 //gather all render sources... seems like a lot for rendering...
51 std::vector<std::string> src_paths;
52 std::string source_dir = std::string(VCL_SOURCE_ROOT_DIR) + "/contrib/brl/bseg/bstm/ocl/cl/";
53 src_paths.push_back(source_dir + "scene_info.cl");
54 src_paths.push_back(source_dir + "pixel_conversion.cl");
55 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
56 src_paths.push_back(source_dir + "bit/time_tree_library_functions.cl");
57 src_paths.push_back(source_dir + "backproject.cl");
58 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
59 src_paths.push_back(source_dir + "bit/compute_surface_pt.cl");
60 src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
61
62 //set kernel options
63 std::string options = opts + "-D SURFACE_PT ";
64 options += " -D STEP_CELL=step_cell_surface_pt(aux_args,data_ptr_tt,d*linfo->block_len,posx*linfo->block_len+linfo->origin.x,posy*linfo->block_len+linfo->origin.y,posz*linfo->block_len+linfo->origin.z)";
65
66 //have kernel construct itself using the context and device
67 auto * ray_trace_kernel=new bocl_kernel();
68
69 std::cout << "Compiling with options: " << options << std::endl;
70 ray_trace_kernel->create_kernel( &device->context(),
71 device->device_id(),
72 src_paths,
73 "compute_surface_pt", //kernel name
74 options, //options
75 "bstm compute_surface_pt"); //kernel identifier (for error checking)
76 vec_kernels.push_back(ray_trace_kernel);
77
78
79 }
80 }
81
bstm_ocl_get_surface_pt_process_cons(bprb_func_process & pro)82 bool bstm_ocl_get_surface_pt_process_cons(bprb_func_process& pro)
83 {
84 using namespace bstm_ocl_get_surface_pt_process_globals;
85
86 //process takes 1 input
87 std::vector<std::string> input_types_(n_inputs_);
88 input_types_[0] = "bocl_device_sptr";
89 input_types_[1] = "bstm_scene_sptr";
90 input_types_[2] = "bstm_opencl_cache_sptr";
91 input_types_[3] = "vpgl_camera_double_sptr";
92 input_types_[4] = "unsigned"; //ni
93 input_types_[5] = "unsigned"; //nj
94 input_types_[6] = "unsigned"; //pixel_x
95 input_types_[7] = "unsigned"; //pixel_y
96 input_types_[8] = "float"; // time
97 input_types_[9] = "float"; // prob threshold
98
99 std::vector<std::string> output_types_(n_outputs_);
100 output_types_[0] = "float";
101 output_types_[1] = "float";
102 output_types_[2] = "float";
103 return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
104
105 }
106
bstm_ocl_get_surface_pt_process(bprb_func_process & pro)107 bool bstm_ocl_get_surface_pt_process(bprb_func_process& pro)
108 {
109 using namespace bstm_ocl_get_surface_pt_process_globals;
110
111 if ( pro.n_inputs() < n_inputs_ ) {
112 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
113 return false;
114 }
115 //get the inputs
116 unsigned i = 0;
117 bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
118 bstm_scene_sptr scene =pro.get_input<bstm_scene_sptr>(i++);
119 bstm_opencl_cache_sptr opencl_cache= pro.get_input<bstm_opencl_cache_sptr>(i++);
120 vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(i++);
121 auto ni=pro.get_input<unsigned>(i++);
122 auto nj=pro.get_input<unsigned>(i++);
123 auto pixel_x=pro.get_input<unsigned>(i++);
124 auto pixel_y=pro.get_input<unsigned>(i++);
125 auto time = pro.get_input<float>(i++);
126 auto prob_t = pro.get_input<float>(i++);
127
128
129 //: create a command queue.
130 int status=0;
131 cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()), CL_QUEUE_PROFILING_ENABLE,&status);
132 if (status!=0) return false;
133 std::string options = "";
134 std::string identifier=device->device_identifier()+options;
135 if (kernels.find(identifier)==kernels.end()) // compile the kernel
136 {
137 std::cout<<"===========Compiling kernels==========="<<std::endl;
138 std::vector<bocl_kernel*> ks;
139 compile_kernel(device,ks,options);
140 kernels[identifier]=ks;
141 }
142
143
144
145 //start ray tracing
146 float transfer_time=0.0f;
147 float gpu_time=0.0f;
148
149 //camera check
150 if (cam->type_name()!= "vpgl_perspective_camera" && cam->type_name()!= "vpgl_generic_camera" ) {
151 std::cout<<"Cannot render with camera of type "<<cam->type_name()<<std::endl;
152 return 0.0f;
153 }
154
155 // create all buffers
156 unsigned cl_ni=RoundUp(ni,8);
157 unsigned cl_nj=RoundUp(nj,8);
158 auto* ray_origins = new cl_float[4*cl_ni*cl_nj];
159 auto* ray_directions = new cl_float[4*cl_ni*cl_nj];
160 bocl_mem_sptr ray_o_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_origins, "ray_origins buffer");
161 bocl_mem_sptr ray_d_buff = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4), ray_directions, "ray_directions buffer");
162 boxm2_ocl_camera_converter::compute_ray_image( device, queue, cam, cl_ni, cl_nj, ray_o_buff, ray_d_buff);
163
164 // Output Array
165 float output_arr[100];
166 for (float & i : output_arr) i = -1.0f;
167 bocl_mem_sptr cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
168 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
169
170 // bit lookup buffer
171 cl_uchar lookup_arr[256];
172 bstm_ocl_util::set_bit_lookup(lookup_arr);
173 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
174 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
175
176 int img_dim_buff[4];
177 img_dim_buff[0] = pixel_x; img_dim_buff[2] = ni;
178 img_dim_buff[1] = pixel_y; img_dim_buff[3] = nj;
179 bocl_mem_sptr exp_img_dim=new bocl_mem(device->context(), img_dim_buff, sizeof(int)*4, "image dims");
180 exp_img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
181
182 auto cl_prob_t = (cl_float)prob_t;
183 bocl_mem_sptr prob_t_mem =new bocl_mem(device->context(), &cl_prob_t, sizeof(cl_float), "prob t buffer");
184 prob_t_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
185
186 //2. set global thread size
187 std::size_t gThreads[] = {cl_ni,cl_nj};
188
189 //3. set arguments
190 std::vector<bstm_block_id> vis_order = scene->get_vis_blocks(cam);
191 std::vector<bstm_block_id>::iterator id;
192 for (id = vis_order.begin(); id != vis_order.end(); ++id)
193 {
194 //choose correct render kernel
195 bstm_block_metadata mdata = scene->get_block_metadata(*id);
196
197 //if the current blk does not contain the queried time, no need to ray cast
198 double local_time;
199 if(!mdata.contains_t(time,local_time))
200 continue;
201
202 auto cl_time = (cl_float)local_time;
203 bocl_mem_sptr time_mem =new bocl_mem(device->context(), &cl_time, sizeof(cl_float), "time instance buffer");
204 time_mem->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
205
206 bocl_kernel* kern = kernels[identifier][0];
207
208 //write the image values to the buffer
209 vul_timer transfer;
210 bocl_mem* blk = opencl_cache->get_block(*id);
211 bocl_mem* blk_info = opencl_cache->loaded_block_info();
212 bocl_mem* blk_t = opencl_cache->get_time_block(*id);
213 bocl_mem* alpha = opencl_cache->get_data<BSTM_ALPHA>(*id);
214 transfer_time += (float) transfer.all();
215
216 ////3. SET args
217 kern->set_arg( blk_info );
218 kern->set_arg( blk );
219 kern->set_arg( blk_t );
220 kern->set_arg( alpha );
221 kern->set_arg( ray_o_buff.ptr() );
222 kern->set_arg( ray_d_buff.ptr() );
223 kern->set_arg(exp_img_dim.ptr());
224 kern->set_arg( cl_output.ptr() );
225 kern->set_arg( lookup.ptr() );
226 kern->set_arg( time_mem.ptr() );
227 kern->set_arg( prob_t_mem.ptr() );
228
229 //local tree , cumsum buffer, imindex buffer
230 kern->set_local_arg( lthreads[0]*lthreads[1]*sizeof(cl_uchar16) );
231 kern->set_local_arg( lthreads[0]*lthreads[1]*sizeof(cl_uchar8) );
232 kern->set_local_arg( lthreads[0]*lthreads[1]*10*sizeof(cl_uchar) );
233 kern->set_local_arg( lthreads[0]*lthreads[1]*sizeof(cl_int) );
234
235 //execute kernel
236 kern->execute(queue, 2, lthreads, gThreads);
237 clFinish(queue);
238 gpu_time += kern->exec_time();
239
240 //clear render kernel args so it can reset em on next execution
241 kern->clear_args();
242 kern->release_current_event();
243 }
244
245 //read outout
246 cl_output->read_to_buffer(queue);
247 std::cout << "prob: " << output_arr[0] << " pt: (" << output_arr[1] << "," << output_arr[2] << "," << output_arr[3] << ")" << std::endl;
248
249
250 //clean up cam
251 delete[] ray_origins;
252 delete[] ray_directions;
253 opencl_cache->unref_mem(ray_o_buff.ptr());
254 opencl_cache->unref_mem(ray_d_buff.ptr());
255
256 clReleaseCommandQueue(queue);
257
258 i=0;
259 pro.set_output_val<float>(i++, output_arr[1]);
260 pro.set_output_val<float>(i++, output_arr[2]);
261 pro.set_output_val<float>(i++, output_arr[3]);
262 return true;
263 }
264