1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_update_parents_alpha_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief  A process for updating parents alpha by the max prob of the children
8 //
9 // \author Vishal Jain
10 // \date Apr 23, 2013
11 
12 #ifdef _MSC_VER
13 #  include "vcl_msvc_warnings.h"
14 #endif
15 #include <boxm2/ocl/boxm2_opencl_cache.h>
16 #include <boxm2/boxm2_scene.h>
17 #include <boxm2/boxm2_block.h>
18 #include <boxm2/boxm2_data_base.h>
19 #include <boxm2/ocl/boxm2_ocl_util.h>
20 //brdb stuff
21 #include <brdb/brdb_value.h>
22 
23 //directory utility
24 #include "vul/vul_timer.h"
25 #include <vcl_where_root_dir.h>
26 #include <bocl/bocl_device.h>
27 #include <bocl/bocl_kernel.h>
28 
29 
30 namespace boxm2_ocl_update_parents_alpha_process_globals
31 {
32     constexpr unsigned n_inputs_ = 3;
33     constexpr unsigned n_outputs_ = 0;
compile_kernel(const bocl_device_sptr & device,bocl_kernel * merge_kernel)34     void compile_kernel(const bocl_device_sptr& device, bocl_kernel* merge_kernel)
35     {
36         //gather all render sources... seems like a lot for rendering...
37         std::vector<std::string> src_paths;
38         std::string source_dir = boxm2_ocl_util::ocl_src_root();
39         src_paths.push_back(source_dir + "scene_info.cl");
40         src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
41         src_paths.push_back(source_dir + "bit/update_parents_alpha.cl");
42 
43         merge_kernel->create_kernel( &device->context(),
44                                      device->device_id(),
45                                      src_paths,
46                                      "update_parents_alpha",
47                                      "",
48                                      "boxm2 opencl update parents alpha"); //kernel identifier (for error checking)
49     }
50 
51     //map of compiled kernels, organized by data type
52     static std::map<std::string,bocl_kernel* > kernels;
53 }
54 
boxm2_ocl_update_parents_alpha_process_cons(bprb_func_process & pro)55 bool boxm2_ocl_update_parents_alpha_process_cons(bprb_func_process& pro)
56 {
57     using namespace boxm2_ocl_update_parents_alpha_process_globals;
58 
59     //process takes 1 input
60     std::vector<std::string> input_types_(n_inputs_);
61     input_types_[0] = "bocl_device_sptr";
62     input_types_[1] = "boxm2_scene_sptr";
63     input_types_[2] = "boxm2_opencl_cache_sptr";
64 
65     // process has 1 output:
66     // output[0]: scene sptr
67     std::vector<std::string>  output_types_(n_outputs_);
68     return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
69 }
70 
boxm2_ocl_update_parents_alpha_process(bprb_func_process & pro)71 bool boxm2_ocl_update_parents_alpha_process(bprb_func_process& pro)
72 {
73     using namespace boxm2_ocl_update_parents_alpha_process_globals;
74     if ( pro.n_inputs() < n_inputs_ ) {
75         std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
76         return false;
77     }
78     float transfer_time=0.0f;
79     float gpu_time=0.0f;
80     //get the inputs
81     unsigned i = 0;
82     bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
83     boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
84     boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
85 
86     std::string identifier=device->device_identifier();
87     // create a command queue.
88     int status=0;
89     cl_command_queue queue = clCreateCommandQueue(device->context(),
90                                                   *(device->device_id()),
91                                                   CL_QUEUE_PROFILING_ENABLE,&status);
92     if (status!=0) {
93         std::cout<<" ERROR in initializing a queue"<<std::endl;
94         return false;
95     }
96 
97     //set tree identifier and compile (indexes into merge tree kernel)
98     if (kernels.find(identifier)==kernels.end())
99     {
100         std::cout<<"===========Compiling kernels==========="<<std::endl;
101         auto * kernel=new bocl_kernel();
102         compile_kernel(device,kernel);
103         kernels[identifier]=kernel;
104     }
105 
106     float output_arr[100];
107     for (float & i : output_arr) i = 0.0f;
108     bocl_mem_sptr  cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
109     cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
110     // bit lookup buffer
111     cl_uchar lookup_arr[256];
112     boxm2_ocl_util::set_bit_lookup(lookup_arr);
113     bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
114     lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
115 
116     //2. set workgroup size
117     opencl_cache->clear_cache();
118     std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
119     std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter;
120         bocl_kernel* kern=kernels[identifier];
121     for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
122     {
123         boxm2_block_id id = blk_iter->first;
124         std::cout<<"Updating Parents Alpha"<<id<<std::endl;
125         //clear cache
126         boxm2_block_metadata data = blk_iter->second;
127         int numTrees = data.sub_block_num_.x() * data.sub_block_num_.y() * data.sub_block_num_.z();
128 
129         //write the image values to the buffer
130         vul_timer transfer;
131         bocl_mem* blk = opencl_cache->get_block(scene,id);
132         bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,id);
133         bocl_mem* blk_info = opencl_cache->loaded_block_info();
134         transfer_time += (float) transfer.all();
135         std::size_t lThreads[] = {16, 1};
136         std::size_t gThreads[] = {RoundUp(numTrees,lThreads[0]), 1};
137 
138         //set first kernel args
139         kern->set_arg( blk_info );
140         kern->set_arg( lookup.ptr() );
141         kern->set_arg( blk );
142         kern->set_arg( alpha );
143         kern->set_local_arg(lThreads[0]*lThreads[1]*16*sizeof(cl_uchar)  );
144         kern->set_local_arg(lThreads[0]*lThreads[1]*10*sizeof(cl_uchar)  );
145         //kern->set_arg( cl_output.ptr() );
146 
147         //execute kernel
148         kern->execute( queue, 2, lThreads, gThreads);
149         clFinish(queue);
150         gpu_time += kern->exec_time();
151 
152         //clear render kernel args so it can reset em on next execution
153         kern->clear_args();
154         alpha->read_to_buffer(queue);
155         clFinish(queue);
156 
157     }
158 
159     std::cout<<"Update Parents Alpha: "<<gpu_time<<std::endl;
160     return true;
161 }
162