1 // This is brl/bseg/boxm2/ocl/algo/boxm2_ocl_fuse_factors.cxx
2 #include <fstream>
3 #include <iostream>
4 #include <algorithm>
5 #include "boxm2_ocl_fuse_factors.h"
6 //:
7 // \file
8 // \brief A process for combining the factors
9 //
10 // \author Vishal Jain
11 // \date Nov 24, 2015
12
13 #ifdef _MSC_VER
14 # include "vcl_msvc_warnings.h"
15 #endif
16 #include <boxm2/ocl/boxm2_opencl_cache.h>
17 #include <boxm2/boxm2_scene.h>
18 #include <boxm2/boxm2_block.h>
19 #include <boxm2/boxm2_data_base.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/boxm2_util.h>
22
23 #include "vil/vil_image_view.h"
24
25 //directory utility
26 #include "vul/vul_timer.h"
27 #include <vcl_where_root_dir.h>
28 #include <bocl/bocl_device.h>
29 #include <bocl/bocl_kernel.h>
30
31 //: Map of kernels should persist between process executions
32
33 std::map<std::string, std::vector<bocl_kernel*> > boxm2_ocl_fuse_factors::fuse_factors_kernels_;
fuse_factors(const boxm2_scene_sptr & scene,const bocl_device_sptr & device,const boxm2_opencl_cache_sptr & opencl_cache,std::vector<std::string> factors_ident,std::vector<float> weights)34 bool boxm2_ocl_fuse_factors::fuse_factors(const boxm2_scene_sptr& scene,
35 const bocl_device_sptr& device,
36 const boxm2_opencl_cache_sptr& opencl_cache,
37 std::vector<std::string> factors_ident,
38 std::vector<float> weights)
39 {
40 float transfer_time = 0.0f;
41 float gpu_time = 0.0f;
42 std::size_t local_threads[1] = { 64 };
43 std::size_t global_threads[1] = { 64 };
44 //cache size sanity check
45 std::size_t binCache = opencl_cache.ptr()->bytes_in_cache();
46 std::cout << "Update MBs in cache: " << binCache / (1024.0*1024.0) << std::endl;
47 // create a command queue.
48 int status = 0;
49 cl_command_queue queue = clCreateCommandQueue(device->context(),
50 *(device->device_id()),
51 CL_QUEUE_PROFILING_ENABLE,
52 &status);
53 if (status != 0)
54 return false;
55 std::vector<boxm2_block_id> blks_order;
56 blks_order = scene->get_block_ids();
57 std::vector<boxm2_block_id>::iterator id;
58 // compile the kernel if not already compiled
59 //: Initialize Cumulative factor
60 bocl_kernel * kern = get_fuse_factors_kernels(device)[0];
61 float weight_buf[] = { 0.0 };
62 bocl_mem * weight = new bocl_mem(device->context(), weight_buf, sizeof(float), "weight buffer");
63
64 for (id = blks_order.begin(); id != blks_order.end(); ++id)
65 {
66 //choose correct render kernel
67 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
68 //write the image values to the buffer
69 vul_timer transfer;
70 bocl_mem* blk = opencl_cache->get_block(scene, *id);
71 bocl_mem* blk_info = opencl_cache->loaded_block_info();
72 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene, *id);
73 auto* info_buffer = (boxm2_scene_info*)blk_info->cpu_buffer();
74 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
75 info_buffer->data_buffer_length = (int)(alpha->num_bytes() / alphaTypeSize);
76 blk_info->write_to_buffer((queue));
77 local_threads[0] = 64;
78 global_threads[0] = RoundUp(info_buffer->data_buffer_length, local_threads[0]);
79 //grab an appropriately sized AUX data buffer
80 bocl_mem *aux0_cum = opencl_cache->get_data(scene, *id, boxm2_data_traits<BOXM2_AUX0>::prefix("cum"), 0, false);
81 bocl_mem* prob_init = opencl_cache->get_data(scene, *id, boxm2_data_traits<BOXM2_AUX0>::prefix("prob_init"), 0, false);
82 transfer_time += (float)transfer.all();
83 kern->set_arg(blk_info);
84 kern->set_arg(prob_init);
85 kern->set_arg(aux0_cum);
86 //execute kernel
87 kern->execute(queue, 1, local_threads, global_threads);
88 int status = clFinish(queue);
89 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
90 return false;
91 gpu_time += kern->exec_time();
92 //clear render kernel args so it can reset em on next execution
93 aux0_cum->read_to_buffer(queue);
94 kern->clear_args();
95 opencl_cache->deep_remove_data(scene, *id, boxm2_data_traits<BOXM2_AUX0>::prefix("prob_init"), true);
96 }
97 kern = get_fuse_factors_kernels(device)[1];
98 for (id = blks_order.begin(); id != blks_order.end(); ++id)
99 {
100 //choose correct render kernel
101 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
102 //write the image values to the buffer
103 vul_timer transfer;
104 bocl_mem* blk = opencl_cache->get_block(scene, *id);
105 bocl_mem* blk_info = opencl_cache->loaded_block_info();
106 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene, *id);
107 auto* info_buffer = (boxm2_scene_info*)blk_info->cpu_buffer();
108 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
109 info_buffer->data_buffer_length = (int)(alpha->num_bytes() / alphaTypeSize);
110 blk_info->write_to_buffer((queue));
111 local_threads[0] = 64;
112 global_threads[0] = RoundUp(info_buffer->data_buffer_length, local_threads[0]);
113 //grab an appropriately sized AUX data buffer
114 bocl_mem *aux0_cum = opencl_cache->get_data(scene, *id, boxm2_data_traits<BOXM2_AUX0>::prefix("cum"), 0, false);
115 weight->create_buffer(CL_MEM_READ_ONLY, queue);
116 for (unsigned int j = 0; j < factors_ident.size(); j++)
117 {
118
119 weight_buf[0] = weights[j];
120 weight->write_to_buffer(queue);
121 bocl_mem *aux0_factor = opencl_cache->get_data(scene, *id, boxm2_data_traits<BOXM2_AUX0>::prefix(factors_ident[j]), 0, false);
122
123 kern->set_arg(blk_info);
124 kern->set_arg(aux0_factor);
125 kern->set_arg(aux0_cum);
126 kern->set_arg(weight);
127 //execute kernel
128 kern->execute(queue, 1, local_threads, global_threads);
129 int status = clFinish(queue);
130 if (!check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status)))
131 return false;
132 gpu_time += kern->exec_time();
133 aux0_cum->read_to_buffer(queue);
134 kern->clear_args();
135 }
136
137 }
138 cl_uchar lookup_arr[256];
139 boxm2_ocl_util::set_bit_lookup(lookup_arr);
140 bocl_mem_sptr lookup = new bocl_mem(device->context(), lookup_arr, sizeof(cl_uchar) * 256, "bit lookup buffer");
141 lookup->create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
142 kern = get_fuse_factors_kernels(device)[2];
143 for (id = blks_order.begin(); id != blks_order.end(); ++id)
144 {
145 //choose correct render kernel
146 boxm2_block_metadata mdata = scene->get_block_metadata(*id);
147 //write the image values to the buffer
148 vul_timer transfer;
149 bocl_mem* blk = opencl_cache->get_block(scene, *id);
150 bocl_mem* blk_info = opencl_cache->loaded_block_info();
151 bocl_mem* alpha = opencl_cache->get_data<BOXM2_ALPHA>(scene, *id);
152 auto* info_buffer = (boxm2_scene_info*)blk_info->cpu_buffer();
153 int alphaTypeSize = (int)boxm2_data_info::datasize(boxm2_data_traits<BOXM2_ALPHA>::prefix());
154 info_buffer->data_buffer_length = (int)(alpha->num_bytes() / alphaTypeSize);
155 blk_info->write_to_buffer((queue));
156 //grab an appropriately sized AUX data buffer
157 bocl_mem *aux0_cum = opencl_cache->get_data(scene, *id, boxm2_data_traits<BOXM2_AUX0>::prefix("cum"), 0, false);
158
159 //set workspace
160 std::size_t ltr[] = { 4, 4, 4 };
161 std::size_t gtr[] = { RoundUp(mdata.sub_block_num_.x(), ltr[0]),
162 RoundUp(mdata.sub_block_num_.y(), ltr[1]),
163 RoundUp(mdata.sub_block_num_.z(), ltr[2]) };
164 kern->set_arg(blk_info);
165 kern->set_arg(blk);
166 kern->set_arg(alpha);
167 kern->set_arg(aux0_cum);
168 kern->set_arg(lookup.ptr());
169 kern->set_local_arg(ltr[0] * ltr[1] * ltr[2] * 10 * sizeof(cl_uchar));
170 kern->set_local_arg(ltr[0] * ltr[1] * ltr[2] * sizeof(cl_uchar16));
171
172 //execute kernel
173 kern->execute(queue, 3, ltr, gtr);
174 int status = clFinish(queue);
175 check_val(status, MEM_FAILURE, "UPDATE EXECUTE FAILED: " + error_to_string(status));
176 gpu_time += kern->exec_time();
177
178 alpha->read_to_buffer(queue);
179 kern->clear_args();
180 //opencl_cache->deep_remove_data(scene, *id, boxm2_data_traits<BOXM2_ALPHA>::prefix(), true);
181 }
182 clFinish(queue);
183
184 return true;
185 }
186
187
188
get_fuse_factors_kernels(const bocl_device_sptr & device,const std::string & opts)189 std::vector<bocl_kernel*>& boxm2_ocl_fuse_factors::get_fuse_factors_kernels(const bocl_device_sptr& device, const std::string& opts)
190 {
191 // compile kernels if not already compiled
192 std::string identifier = device->device_identifier() + opts;
193 if (fuse_factors_kernels_.find(identifier) != fuse_factors_kernels_.end())
194 return fuse_factors_kernels_[identifier];
195
196 //otherwise compile the kernels
197 std::cout << "=== boxm2_ocl_update_process::compiling kernels on device " << identifier << "===" << std::endl;
198 std::vector<std::string> src_paths;
199 std::string source_dir = boxm2_ocl_util::ocl_src_root();
200 src_paths.push_back(source_dir + "scene_info.cl");
201 src_paths.push_back(source_dir + "pixel_conversion.cl");
202 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
203 src_paths.push_back(source_dir + "backproject.cl");
204 src_paths.push_back(source_dir + "atomics_util.cl");
205 src_paths.push_back(source_dir + "statistics_library_functions.cl");
206 src_paths.push_back(source_dir + "ray_bundle_library_opt.cl");
207 src_paths.push_back(source_dir + "bit/update_kernels.cl");
208 src_paths.push_back(source_dir + "bit/update_bp_kernels.cl");
209 //populate vector of kernels
210 std::vector<bocl_kernel*> vec_kernels;
211
212 //compilation options
213 std::string options = "-D INIT_CUM";
214 auto* init_cum = new bocl_kernel();
215 std::string init_cum_opts = options;
216 init_cum->create_kernel(&device->context(), device->device_id(), src_paths, "init_cum_main", init_cum_opts, "update::init_cum");
217 vec_kernels.push_back(init_cum);
218
219 options = "-D FUSE_FACTORS";
220 auto* fusefactors = new bocl_kernel();
221 std::string fusefactors_opts = options;
222 fusefactors->create_kernel(&device->context(), device->device_id(), src_paths, "fuse_factors_main", fusefactors_opts, "update::fuse_factors");
223 vec_kernels.push_back(fusefactors);
224
225 options = "-D EVALUATE_ALPHA";
226 auto* evaluate_alpha = new bocl_kernel();
227 std::string evaluate_alpha_opts = options;
228 evaluate_alpha->create_kernel(&device->context(), device->device_id(), src_paths, "evaluate_alpha_main", evaluate_alpha_opts, "update::evaluate_alpha");
229 vec_kernels.push_back(evaluate_alpha);
230 //store and return
231 fuse_factors_kernels_[identifier] = vec_kernels;
232 return fuse_factors_kernels_[identifier];
233 }
234