1 // This is brl/bseg/boxm2/ocl/pro/processes/boxm2_update_parents_alpha_process.cxx
2 #include <iostream>
3 #include <fstream>
4 #include <bprb/bprb_func_process.h>
5 //:
6 // \file
7 // \brief A process for updating parents alpha by the max prob of the children
8 //
9 // \author Vishal Jain
10 // \date Apr 23, 2013
11
12 #ifdef _MSC_VER
13 # include "vcl_msvc_warnings.h"
14 #endif
15 #include <boxm2/ocl/boxm2_opencl_cache.h>
16 #include <boxm2/boxm2_scene.h>
17 #include <boxm2/boxm2_block.h>
18 #include <boxm2/boxm2_data_base.h>
19 #include <boxm2/ocl/boxm2_ocl_util.h>
20 //brdb stuff
21 #include <brdb/brdb_value.h>
22
23 //directory utility
24 #include "vul/vul_timer.h"
25 #include <vcl_where_root_dir.h>
26 #include <bocl/bocl_device.h>
27 #include <bocl/bocl_kernel.h>
28
29
30 namespace boxm2_ocl_update_parents_alpha_process_globals
31 {
32 constexpr unsigned n_inputs_ = 3;
33 constexpr unsigned n_outputs_ = 0;
compile_kernel(const bocl_device_sptr & device,bocl_kernel * merge_kernel)34 void compile_kernel(const bocl_device_sptr& device, bocl_kernel* merge_kernel)
35 {
36 //gather all render sources... seems like a lot for rendering...
37 std::vector<std::string> src_paths;
38 std::string source_dir = boxm2_ocl_util::ocl_src_root();
39 src_paths.push_back(source_dir + "scene_info.cl");
40 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
41 src_paths.push_back(source_dir + "bit/update_parents_alpha.cl");
42
43 merge_kernel->create_kernel( &device->context(),
44 device->device_id(),
45 src_paths,
46 "update_parents_alpha",
47 "",
48 "boxm2 opencl update parents alpha"); //kernel identifier (for error checking)
49 }
50
51 //map of compiled kernels, organized by data type
52 static std::map<std::string,bocl_kernel* > kernels;
53 }
54
boxm2_ocl_update_parents_alpha_process_cons(bprb_func_process & pro)55 bool boxm2_ocl_update_parents_alpha_process_cons(bprb_func_process& pro)
56 {
57 using namespace boxm2_ocl_update_parents_alpha_process_globals;
58
59 //process takes 1 input
60 std::vector<std::string> input_types_(n_inputs_);
61 input_types_[0] = "bocl_device_sptr";
62 input_types_[1] = "boxm2_scene_sptr";
63 input_types_[2] = "boxm2_opencl_cache_sptr";
64
65 // process has 1 output:
66 // output[0]: scene sptr
67 std::vector<std::string> output_types_(n_outputs_);
68 return pro.set_input_types(input_types_) && pro.set_output_types(output_types_);
69 }
70
boxm2_ocl_update_parents_alpha_process(bprb_func_process & pro)71 bool boxm2_ocl_update_parents_alpha_process(bprb_func_process& pro)
72 {
73 using namespace boxm2_ocl_update_parents_alpha_process_globals;
74 if ( pro.n_inputs() < n_inputs_ ) {
75 std::cout << pro.name() << ": The input number should be " << n_inputs_<< std::endl;
76 return false;
77 }
78 float transfer_time=0.0f;
79 float gpu_time=0.0f;
80 //get the inputs
81 unsigned i = 0;
82 bocl_device_sptr device= pro.get_input<bocl_device_sptr>(i++);
83 boxm2_scene_sptr scene =pro.get_input<boxm2_scene_sptr>(i++);
84 boxm2_opencl_cache_sptr opencl_cache= pro.get_input<boxm2_opencl_cache_sptr>(i++);
85
86 std::string identifier=device->device_identifier();
87 // create a command queue.
88 int status=0;
89 cl_command_queue queue = clCreateCommandQueue(device->context(),
90 *(device->device_id()),
91 CL_QUEUE_PROFILING_ENABLE,&status);
92 if (status!=0) {
93 std::cout<<" ERROR in initializing a queue"<<std::endl;
94 return false;
95 }
96
97 //set tree identifier and compile (indexes into merge tree kernel)
98 if (kernels.find(identifier)==kernels.end())
99 {
100 std::cout<<"===========Compiling kernels==========="<<std::endl;
101 auto * kernel=new bocl_kernel();
102 compile_kernel(device,kernel);
103 kernels[identifier]=kernel;
104 }
105
106 float output_arr[100];
107 for (float & i : output_arr) i = 0.0f;
108 bocl_mem_sptr cl_output=new bocl_mem(device->context(), output_arr, sizeof(float)*100, "output buffer");
109 cl_output->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
110 // bit lookup buffer
111 cl_uchar lookup_arr[256];
112 boxm2_ocl_util::set_bit_lookup(lookup_arr);
113 bocl_mem_sptr lookup=new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
114 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
115
116 //2. set workgroup size
117 opencl_cache->clear_cache();
118 std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
119 std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter;
120 bocl_kernel* kern=kernels[identifier];
121 for (blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter)
122 {
123 boxm2_block_id id = blk_iter->first;
124 std::cout<<"Updating Parents Alpha"<<id<<std::endl;
125 //clear cache
126 boxm2_block_metadata data = blk_iter->second;
127 int numTrees = data.sub_block_num_.x() * data.sub_block_num_.y() * data.sub_block_num_.z();
128
129 //write the image values to the buffer
130 vul_timer transfer;
131 bocl_mem* blk = opencl_cache->get_block(scene,id);
132 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene,id);
133 bocl_mem* blk_info = opencl_cache->loaded_block_info();
134 transfer_time += (float) transfer.all();
135 std::size_t lThreads[] = {16, 1};
136 std::size_t gThreads[] = {RoundUp(numTrees,lThreads[0]), 1};
137
138 //set first kernel args
139 kern->set_arg( blk_info );
140 kern->set_arg( lookup.ptr() );
141 kern->set_arg( blk );
142 kern->set_arg( alpha );
143 kern->set_local_arg(lThreads[0]*lThreads[1]*16*sizeof(cl_uchar) );
144 kern->set_local_arg(lThreads[0]*lThreads[1]*10*sizeof(cl_uchar) );
145 //kern->set_arg( cl_output.ptr() );
146
147 //execute kernel
148 kern->execute( queue, 2, lThreads, gThreads);
149 clFinish(queue);
150 gpu_time += kern->exec_time();
151
152 //clear render kernel args so it can reset em on next execution
153 kern->clear_args();
154 alpha->read_to_buffer(queue);
155 clFinish(queue);
156
157 }
158
159 std::cout<<"Update Parents Alpha: "<<gpu_time<<std::endl;
160 return true;
161 }
162