1 //:
2 // \file
3 // \author Andy Miller
4 // \date 26-Oct-2010
5
6 #include <iostream>
7 #include <string>
8 #include <vector>
9 #include <map>
10 #include <algorithm>
11 #include "testlib/testlib_test.h"
12 #include "testlib/testlib_root_dir.h"
13 #include <vcl_where_root_dir.h>
14
15 #include <boxm2/boxm2_scene.h>
16 #if 0 // all tests commented out for the time being...
17 #include <bocl/bocl_kernel.h>
18 #include <bocl/bocl_mem.h>
19 #include <boxm2/basic/boxm2_block_id.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/ocl/boxm2_opencl_cache.h>
22 #include <boxm2/ocl/pro/boxm2_opencl_processor.h> // beware: does not exist anymore
23
24 #ifdef _MSC_VER
25 # include "vcl_msvc_warnings.h"
26 #endif
27 #endif
28
29 //: Three unit tests for the three main refine kernel functions
30
test_refine_trees_kernel()31 void test_refine_trees_kernel()
32 {
33 //----------------------------------------------------------------------------
34 //--- BEGIN BOXM2 API EXAMPLE ------------------------------------------------
35 //----------------------------------------------------------------------------
36 //start out rendering with the CPU
37 std::string test_dir = testlib_root_dir()+ "/contrib/brl/bseg/boxm2/ocl/tests/";
38 std::string test_file = test_dir + "scene.xml";
39 boxm2_scene_sptr scene = new boxm2_scene(test_file);
40
41 #if 0
42 //initialize a block and data cache
43 boxm2_lru_cache::create(scene.ptr());
44
45 //initialize gpu pro / manager
46 boxm2_opencl_processor* gpu_pro = boxm2_opencl_processor::instance();
47 gpu_pro->set_scene(scene.ptr());
48 gpu_pro->set_cpu_cache(boxm2_cache::instance());
49 gpu_pro->init();
50
51 //rendering sources
52 std::vector<std::string> src_paths;
53 std::string source_dir = std::string(VCL_SOURCE_ROOT_DIR) + "/contrib/brl/bseg/boxm2/ocl/cl/";
54 src_paths.push_back(source_dir + "scene_info.cl");
55 src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
56 src_paths.push_back(source_dir + "bit/refine_bit_scene.cl");
57 bocl_kernel refine_trees;
58 if (!refine_trees.create_kernel( &gpu_pro->context(), &gpu_pro->devices()[0], src_paths,
59 "refine_trees", "-D MOG_TYPE_8", "boxm2 opencl refine trees (pass one)") ) {
60 TEST("Refine Trees Kernel creation ", true, false);
61 return;
62 }
63
64 //prob_thresh buffer
65 float* prob_buff = new float[1];
66 prob_buff[0] = 0.00001f; //EVERYTHING should refine
67 bocl_mem prob_thresh( gpu_pro->context(), prob_buff, sizeof(float), "prob_thresh buffer");
68 prob_thresh.create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
69
70 //output buffer
71 float* output_arr = new float[100];
72 for (int i=0; i<100; ++i) output_arr[i] = 0.0f;
73 bocl_mem cl_output( gpu_pro->context(), output_arr, sizeof(float)*100, "output buffer");
74 cl_output.create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
75
76 //bit lookup buffer
77 cl_uchar* lookup_arr = new cl_uchar[256];
78 boxm2_ocl_util::set_bit_lookup(lookup_arr);
79 bocl_mem lookup( gpu_pro->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
80 lookup.create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
81
82 //For each ID in the visibility order, grab that block
83 std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
84 std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter = blocks.begin();
85 boxm2_block_metadata data = blk_iter->second;
86
87 //get id
88 boxm2_block_id id = blk_iter->first;
89
90 //write the image values to the buffer
91 bocl_mem* blk = gpu_pro->gpu_cache()->get_block(id);
92 bocl_mem* alpha = gpu_pro->gpu_cache()->get_data<BOXM2_ALPHA>(id);
93 bocl_mem* blk_info = gpu_pro->gpu_cache()->loaded_block_info();
94
95 //set up tree copy
96 vxl_byte* cpybuff = new vxl_byte[blk->num_bytes()];
97 bocl_mem* blk_copy = new bocl_mem( gpu_pro->context(), cpybuff, blk->num_bytes(), "refine trees block copy buffer");
98 blk_copy->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
99
100 //set up tree size (first find num trees)
101 int numTrees = data.sub_block_num_.x() * data.sub_block_num_.y() * data.sub_block_num_.z();
102 //int* sizebuff = new int[numTrees];
103 bocl_mem* tree_sizes = new bocl_mem( gpu_pro->context(), /*sizebuff*/ NULL, sizeof(cl_int)*numTrees, "refine tree sizes buffer");
104 tree_sizes->create_buffer(CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, *gpu_pro->get_queue());
105
106 //set first kernel args
107 refine_trees.set_arg( blk_info );
108 refine_trees.set_arg( blk );
109 refine_trees.set_arg( blk_copy );
110 refine_trees.set_arg( alpha );
111 refine_trees.set_arg( tree_sizes );
112 refine_trees.set_arg( &prob_thresh );
113 refine_trees.set_arg( &lookup );
114 refine_trees.set_arg( &cl_output );
115 refine_trees.set_local_arg( 16*sizeof(cl_uchar) );
116 refine_trees.set_local_arg( sizeof(cl_uchar16) );
117 refine_trees.set_local_arg( sizeof(cl_uchar16) );
118
119 //set workspace
120 std::size_t lThreads[] = {1, 1};
121 std::size_t gThreads[] = {numTrees, 1};
122
123 //execute kernel
124 refine_trees.execute( *gpu_pro->get_queue(), 2, lThreads, gThreads);
125 clFinish( *gpu_pro->get_queue());
126
127 //clear render kernel args so it can reset em on next execution
128 refine_trees.clear_args();
129
130 blk_copy->read_to_buffer(*gpu_pro->get_queue());
131 std::cout<<"Testing refine results"<<std::endl;
132 vxl_byte* cpy = (vxl_byte*) blk_copy->cpu_buffer();
133 for (int i=0; i<numTrees; ++i) {
134 if (cpy[16*i] != 1) {
135 TEST(" FIRST PASS REFINE TEST (trees) ", true, false);
136 return;
137 }
138 }
139 TEST(" First pass refine trees test ", true, true);
140
141 tree_sizes->read_to_buffer(*gpu_pro->get_queue());
142 std::cout<<"outputting new tree sizes"<<std::endl;
143 int* sizes = (int*) tree_sizes->cpu_buffer();
144 for (int i=0; i<numTrees; ++i) {
145 if (sizes[i] != 9) {
146 TEST(" FIRST PASS REFINE TEST (SIZES) ", true, false);
147 return;
148 }
149 }
150 TEST(" first pass refine test sizes ", true, true);
151
152
153 /////////////////////////////////////////////////////////////////////////
154 //STEP TWO
155 //read out tree_sizes and do cumulative sum on it
156 tree_sizes->read_to_buffer(*gpu_pro->get_queue());
157 int* sizebuff = (int*) tree_sizes->cpu_buffer();
158 for (int i=1; i<numTrees; ++i) sizebuff[i] += sizebuff[i-1];
159 int newDataSize = sizebuff[numTrees-1];
160 for (int i=numTrees-1; i>0; --i) sizebuff[i] = sizebuff[i-1];
161 sizebuff[0] = 0;
162 tree_sizes->write_to_buffer(*gpu_pro->get_queue());
163 std::cout<<"New data size: "<<newDataSize<<std::endl;
164 /////////////////////////////////////////////////////////////////////////
165
166 /////////////////////////////////////////////////////////////////////////////
167 // TEST REFINE DATA
168 /////////////////////////////////////////////////////////////////////////////
169 bocl_kernel refine_data;
170 refine_data.create_kernel( &gpu_pro->context(), &gpu_pro->devices()[0], src_paths,
171 "refine_data", "-D MOG_TYPE_4 ", "boxm2 opencl refine data size 4 (pass three)");
172
173 //set up alpha copy
174 bocl_mem* alpha_copy = new bocl_mem(gpu_pro->context(), NULL, newDataSize*sizeof(float), "alpha block copy buffer");
175 alpha_copy->create_buffer(CL_MEM_READ_WRITE| CL_MEM_ALLOC_HOST_PTR, *gpu_pro->get_queue());
176
177 //init value buffer
178 float* init_abuff = new float[1]; init_abuff[0] = .01f;
179 bocl_mem init_alpha(gpu_pro->context(), init_abuff, sizeof(float), "init_alpha buffer");
180 init_alpha.create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
181
182 refine_data.set_arg( blk_info );
183 refine_data.set_arg( blk );
184 refine_data.set_arg( blk_copy );
185 refine_data.set_arg( tree_sizes );
186 refine_data.set_arg( alpha );
187 refine_data.set_arg( alpha_copy );
188 refine_data.set_arg( &init_alpha );
189 refine_data.set_arg( &prob_thresh );
190 refine_data.set_arg( &lookup );
191 refine_data.set_arg( &cl_output );
192 refine_data.set_local_arg( 16*sizeof(cl_uchar) );
193 refine_data.set_local_arg( sizeof(cl_uchar16) );
194 refine_data.set_local_arg( sizeof(cl_uchar16) );
195
196 //execute kernel
197 refine_data.execute( *gpu_pro->get_queue(), 2, lThreads, gThreads);
198 clFinish( *gpu_pro->get_queue());
199
200 //original alphas
201 float* abuf = (float*) alpha->cpu_buffer();
202 for (int i=0; i<8; ++i)
203 std::cout<<abuf[i]<<std::endl;
204
205 std::cout<<"NEW ALPHAS"<<std::endl;
206 alpha_copy->read_to_buffer(*gpu_pro->get_queue());
207 float* alph = (float*) alpha_copy->cpu_buffer();
208 for (int i=0; i<newDataSize; ++i)
209 std::cout<<alph[i]<<std::endl;
210
211 //TEST to make sure the new trees are lined up correctly
212 blk->read_to_buffer(*gpu_pro->get_queue());
213 vxl_byte* refined = (vxl_byte*) blk->cpu_buffer();
214 for (int i=0; i<numTrees; ++i) {
215 if (refined[16*i] != 1) {
216 std::cout<<"value is: "<<(int) refined[16*i]<<"... should be 1 at "<<i<<std::endl;
217 TEST(" THIRD PASS REFINE TEST (trees) ", true, false);
218 return;
219 }
220 int pointer;
221 std::memcpy(&pointer, &refined[16*i]+10, sizeof(int));
222 if (pointer != 9*i) {
223 TEST(" THIRD PASS Refine data pointer ", true, false);
224 std::cout<<"Pointer is: "<<pointer<<"... should be "<<9*i<<" at "<<i<<std::endl;
225 return;
226 }
227 }
228 #endif
229
230 TEST(" Third Pass TREE passes ", true, true);
231 }
232
test_refine_scan_kernel()233 void test_refine_scan_kernel() {}
test_refine_data_kernel()234 void test_refine_data_kernel() {}
test_refine()235 void test_refine() { test_refine_trees_kernel(); test_refine_scan_kernel(); test_refine_data_kernel(); }
236
237 TESTMAIN( test_refine );
238