1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_ocl_render_expected_color_process.cxx
2 //:
3 // \file
4 // \brief  A process for rendering the scene.
5 //
6 // \author Vishal Jain
7 // \date Mar 10, 2011
8 
9 #include <fstream>
10 #include <iostream>
11 #include <algorithm>
12 #include <bprb/bprb_func_process.h>
13 
14 #ifdef _MSC_VER
15 #  include "vcl_msvc_warnings.h"
16 #endif
17 #include <boxm2/ocl/boxm2_opencl_cache.h>
18 #include <boxm2/boxm2_scene.h>
19 #include <boxm2/boxm2_block.h>
20 #include <boxm2/boxm2_data_base.h>
21 #include <boxm2/ocl/boxm2_ocl_util.h>
22 #include "vil/vil_image_view.h"
23 //brdb stuff
24 #include <brdb/brdb_value.h>
25 
26 //directory utility
27 #include <vcl_where_root_dir.h>
28 #include <bocl/bocl_device.h>
29 #include <bocl/bocl_kernel.h>
30 #include <boxm2/ocl/algo/boxm2_ocl_render_expected_image_function.h>
31 
32 
33 namespace boxm2_ocl_render_expected_color_process_globals
34 {
35   constexpr unsigned n_inputs_ = 8;
36   constexpr unsigned n_outputs_ = 2;
37   std::size_t lthreads[2]={8,8};
38 
39   static std::map<std::string,std::vector<bocl_kernel*> > kernels;
40 
compile_kernel(const bocl_device_sptr & device,std::vector<bocl_kernel * > & vec_kernels,const std::string & opts)41   void compile_kernel(const bocl_device_sptr& device,std::vector<bocl_kernel*> & vec_kernels, const std::string& opts)
42   {
43     //gather all render sources... seems like a lot for rendering...
44     std::vector<std::string> src_paths;
45     std::string source_dir = boxm2_ocl_util::ocl_src_root();
46     src_paths.push_back(source_dir + "scene_info.cl");
47     src_paths.push_back(source_dir + "pixel_conversion.cl");
48     src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
49     src_paths.push_back(source_dir + "backproject.cl");
50     src_paths.push_back(source_dir + "statistics_library_functions.cl");
51     src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
52     src_paths.push_back(source_dir + "bit/render_rgb.cl");
53     src_paths.push_back(source_dir + "bit/cast_ray_bit.cl");
54 
55     //set kernel options
56     //#define STEP_CELL step_cell_render(mixture_array, alpha_array, data_ptr, d, &vis, &expected_int);
57     std::string options = opts + " -D RENDER ";
58     options += " -D DETERMINISTIC ";
59     options += " -D YUV -D STEP_CELL=step_cell_render(aux_args,data_ptr,llid,d*linfo->block_len)";
60 
61     //have kernel construct itself using the context and device
62     auto * ray_trace_kernel=new bocl_kernel();
63     ray_trace_kernel->create_kernel( &device->context(),
64                                      device->device_id(),
65                                      src_paths,
66                                      "render_bit_scene",   //kernel name
67                                      options,              //options
68                                      "boxm2 opencl render process color"); //kernel identifier (for error checking)
69     vec_kernels.push_back(ray_trace_kernel);
70 
71     //create normalize image kernel
72     std::vector<std::string> norm_src_paths;
73     norm_src_paths.push_back(source_dir + "pixel_conversion.cl");
74     norm_src_paths.push_back(source_dir + "bit/normalize_kernels.cl");
75     auto * normalize_render_kernel=new bocl_kernel();
76 
77     normalize_render_kernel->create_kernel( &device->context(),
78                                             device->device_id(),
79                                             norm_src_paths,
80                                             "normalize_render_rgb_kernel",   //kernel name
81                                             options,              //options
82                                             "normalize render color kernel"); //kernel identifier (for error checking)
83     vec_kernels.push_back(normalize_render_kernel);
84   }
85 }
86 
boxm2_ocl_render_expected_color_process_cons(bprb_func_process & pro)87 bool boxm2_ocl_render_expected_color_process_cons(bprb_func_process& pro)
88 {
89   using namespace boxm2_ocl_render_expected_color_process_globals;
90 
91   //process takes 1 input
92   std::vector<std::string> input_types_(n_inputs_);
93   input_types_[0] = "bocl_device_sptr";
94   input_types_[1] = "boxm2_scene_sptr";
95   input_types_[2] = "boxm2_opencl_cache_sptr";
96   input_types_[3] = "vpgl_camera_double_sptr";
97   input_types_[4] = "unsigned";
98   input_types_[5] = "unsigned";
99   input_types_[6] = "float";                        // near factor ( maximum # of pixels should map to the finest voxel )
100   input_types_[7] = "float";                        // far factor ( minimum # of pixels should map to the finest voxel )
101 
102   brdb_value_sptr tnearfactor = new brdb_value_t<float>(100000.0f);  //by default update alpha
103   brdb_value_sptr tfarfactor = new brdb_value_t<float>(100000.0f);  //by default update alpha
104 
105   pro.set_input(6, tnearfactor);
106   pro.set_input(7, tfarfactor);
107   // process has 1 output:
108   // output[0]: scene sptr
109   std::vector<std::string>  output_types_(n_outputs_);
110   output_types_[0] = "vil_image_view_base_sptr";
111   output_types_[1] = "vil_image_view_base_sptr";
112 
113   return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
114 }
115 
boxm2_ocl_render_expected_color_process(bprb_func_process & pro)116 bool boxm2_ocl_render_expected_color_process(bprb_func_process& pro)
117 {
118   using namespace boxm2_ocl_render_expected_color_process_globals;
119 
120   if ( pro.n_inputs() < n_inputs_ ) {
121     std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
122     return false;
123   }
124   //get the inputs
125   unsigned argIdx = 0;
126   bocl_device_sptr device= pro.get_input<bocl_device_sptr>(argIdx++);
127   boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(argIdx++);
128   boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(argIdx++);
129   vpgl_camera_double_sptr cam= pro.get_input<vpgl_camera_double_sptr>(argIdx++);
130   auto ni=pro.get_input<unsigned>(argIdx++);
131   auto nj=pro.get_input<unsigned>(argIdx++);
132   auto   nearfactor = pro.get_input<float>(argIdx++);
133   auto   farfactor = pro.get_input<float>(argIdx++);
134   //make sure the data types match the scene
135   bool foundDataType = false;
136   std::string data_type,options;
137   std::vector<std::string> apps = scene->appearances();
138   int apptypesize = 0;
139   for (const auto & app : apps) {
140     if ( app == boxm2_data_traits<BOXM2_GAUSS_RGB>::prefix() )
141     {
142       data_type = app;
143       foundDataType = true;
144       options=" -D MOG_TYPE_8 ";
145       apptypesize= boxm2_data_traits<BOXM2_GAUSS_RGB>::datasize();
146     }
147   }
148   if (!foundDataType) {
149     std::cout<<"BOXM2_OCL_RENDER_COLOR_PROCESS ERROR: scene doesn't have BOXM2_GAUSS_RGB data type"<<std::endl;
150     return false;
151   }
152 
153   // create a command queue.
154   int status=0;
155   cl_command_queue queue = clCreateCommandQueue(device->context(),*(device->device_id()),
156                                                 CL_QUEUE_PROFILING_ENABLE,&status);
157   if (status!=0) return false;
158   std::string identifier=device->device_identifier()+options;
159 
160   // compile the kernel
161   if (kernels.find(identifier)==kernels.end())
162   {
163     std::cout<<"====boxm2_ocl_render_color_process::Compiling kernels on "<<identifier<<std::endl;
164     std::vector<bocl_kernel*> ks;
165     compile_kernel(device,ks,options);
166     kernels[identifier]=ks;
167   }
168   unsigned cl_ni=RoundUp(ni,lthreads[0]);
169   unsigned cl_nj=RoundUp(nj,lthreads[1]);
170 
171   //create color buffer
172   auto* buff = new float[4*cl_ni*cl_nj];
173   std::fill(buff, buff + 4*cl_ni*cl_nj, 0.0f);
174   bocl_mem_sptr exp_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(cl_float4),buff,  "exp color image (float4) buffer");
175   exp_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
176 
177   // visibility image
178   auto* vis_buff = new float[cl_ni*cl_nj];
179   std::fill(vis_buff, vis_buff + cl_ni*cl_nj, 1.0f);
180   bocl_mem_sptr vis_image = opencl_cache->alloc_mem( cl_ni*cl_nj*sizeof(cl_float), vis_buff, "vis image (single float) buffer");
181   vis_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
182 
183 
184   //image dimensions
185   int img_dim_buff[4];
186   img_dim_buff[0] = 0;   img_dim_buff[2] = ni;
187   img_dim_buff[1] = 0;   img_dim_buff[3] = nj;
188   bocl_mem_sptr exp_img_dim=opencl_cache->alloc_mem(sizeof(int)*4,img_dim_buff,  "image dims");
189   exp_img_dim->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
190 
191   float tnearfar[2] = { 0.0f, 1000000} ;
192   if(cam->type_name() == "vpgl_perspective_camera")
193   {
194 
195       float f = ((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().focal_length()*((vpgl_perspective_camera<double> *)cam.ptr())->get_calibration().x_scale();
196       std::cout<<"Focal Length " << f<<std::endl;
197       tnearfar[0] = f* scene->finest_resolution()/nearfactor ;
198       tnearfar[1] = f* scene->finest_resolution()*farfactor ;
199 
200       std::cout<<"Near and Far Clipping planes "<<tnearfar[0]<<" "<<tnearfar[1]<<std::endl;
201   }
202   bocl_mem_sptr tnearfar_mem_ptr = opencl_cache->alloc_mem(2*sizeof(float), tnearfar, "tnearfar  buffer");
203   tnearfar_mem_ptr->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
204 
205   auto* max_omega_buff = new float[cl_ni*cl_nj];
206   std::fill(max_omega_buff, max_omega_buff + cl_ni*cl_nj, 0.0f);
207   bocl_mem_sptr max_omega_image = opencl_cache->alloc_mem(cl_ni*cl_nj*sizeof(float), max_omega_buff,"max omega image buffer");
208   max_omega_image->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
209 
210   // run expected image function
211   render_expected_image(scene, device, opencl_cache, queue,
212                         cam, exp_image, vis_image, max_omega_image, exp_img_dim,
213                         data_type, kernels[identifier][0], lthreads, cl_ni, cl_nj,apptypesize,tnearfar_mem_ptr);
214 
215   // normalize
216   {
217     std::size_t gThreads[] = {cl_ni,cl_nj};
218     bocl_kernel* normalize_kern = kernels[identifier][1];
219     normalize_kern->set_arg( exp_image.ptr() );
220     normalize_kern->set_arg( vis_image.ptr() );
221     normalize_kern->set_arg( exp_img_dim.ptr());
222     normalize_kern->execute( queue, 2, lthreads, gThreads);
223     clFinish(queue);
224 
225     //clear render kernel args so it can reset em on next execution
226     normalize_kern->clear_args();
227   }
228 
229   // read out expected image
230   exp_image->read_to_buffer(queue);
231   vis_image->read_to_buffer(queue);
232   clFinish(queue);
233   auto* exp_img_out = new vil_image_view<vil_rgba<vxl_byte> >(ni,nj);
234   int numFloats = 4;
235   int count = 0;
236   for (unsigned c=0;c<nj;++c) {
237     for (unsigned r=0;r<ni;++r,count+=numFloats) {
238       buff[count] = buff[count] > 1.0 ? 1.0 : buff[count] ;
239       buff[count+1] = buff[count+1] > 1.0 ? 1.0 : buff[count+1] ;
240       buff[count+2] = buff[count+2] > 1.0 ? 1.0 : buff[count+2] ;
241 
242       (*exp_img_out)(r,c) =
243       vil_rgba<vxl_byte> ( (vxl_byte) (buff[count]*255.0f),
244                            (vxl_byte) (buff[count+1]*255.0f),
245                            (vxl_byte) (buff[count+2]*255.0f),
246                            (vxl_byte) 255 );
247     }
248   }
249   auto* vis_img_out=new vil_image_view<float>(ni,nj);
250   for (unsigned c=0;c<nj;c++)
251     for (unsigned r=0;r<ni;r++)
252       (*vis_img_out)(r,c)=vis_buff[c*cl_ni+r];
253 
254   opencl_cache->unref_mem(exp_image.ptr());
255   opencl_cache->unref_mem(vis_image.ptr());
256   opencl_cache->unref_mem(exp_img_dim.ptr());
257   opencl_cache->unref_mem(max_omega_image.ptr());
258   opencl_cache->unref_mem(tnearfar_mem_ptr.ptr());
259   delete [] buff;
260   delete [] vis_buff;
261   delete [] max_omega_buff;
262   clReleaseCommandQueue(queue);
263 
264 
265   argIdx=0;
266   // store scene smaprt pointer
267   pro.set_output_val<vil_image_view_base_sptr>(argIdx++, exp_img_out);
268   pro.set_output_val<vil_image_view_base_sptr>(argIdx++, vis_img_out);
269   return true;
270 }
271