1 //:
2 // \file
3 // \author Andy Miller
4 // \date 26-Oct-2010
5 
6 #include <iostream>
7 #include <string>
8 #include <vector>
9 #include <map>
10 #include <algorithm>
11 #include "testlib/testlib_test.h"
12 #include "testlib/testlib_root_dir.h"
13 #include <vcl_where_root_dir.h>
14 
15 #include <boxm2/boxm2_scene.h>
16 #if 0 // all tests commented out for the time being...
17 #include <bocl/bocl_kernel.h>
18 #include <bocl/bocl_mem.h>
19 #include <boxm2/basic/boxm2_block_id.h>
20 #include <boxm2/ocl/boxm2_ocl_util.h>
21 #include <boxm2/ocl/boxm2_opencl_cache.h>
22 #include <boxm2/ocl/pro/boxm2_opencl_processor.h> // beware: does not exist anymore
23 
24 #ifdef _MSC_VER
25 #  include "vcl_msvc_warnings.h"
26 #endif
27 #endif
28 
29 //: Three unit tests for the three main refine kernel functions
30 
test_refine_trees_kernel()31 void test_refine_trees_kernel()
32 {
33   //----------------------------------------------------------------------------
34   //--- BEGIN BOXM2 API EXAMPLE ------------------------------------------------
35   //----------------------------------------------------------------------------
36   //start out rendering with the CPU
37   std::string test_dir  = testlib_root_dir()+ "/contrib/brl/bseg/boxm2/ocl/tests/";
38   std::string test_file = test_dir + "scene.xml";
39   boxm2_scene_sptr scene = new boxm2_scene(test_file);
40 
41 #if 0
42   //initialize a block and data cache
43   boxm2_lru_cache::create(scene.ptr());
44 
45   //initialize gpu pro / manager
46   boxm2_opencl_processor* gpu_pro = boxm2_opencl_processor::instance();
47   gpu_pro->set_scene(scene.ptr());
48   gpu_pro->set_cpu_cache(boxm2_cache::instance());
49   gpu_pro->init();
50 
51   //rendering sources
52   std::vector<std::string> src_paths;
53   std::string source_dir = std::string(VCL_SOURCE_ROOT_DIR) + "/contrib/brl/bseg/boxm2/ocl/cl/";
54   src_paths.push_back(source_dir + "scene_info.cl");
55   src_paths.push_back(source_dir + "bit/bit_tree_library_functions.cl");
56   src_paths.push_back(source_dir + "bit/refine_bit_scene.cl");
57   bocl_kernel refine_trees;
58   if (!refine_trees.create_kernel( &gpu_pro->context(), &gpu_pro->devices()[0], src_paths,
59                                    "refine_trees", "-D MOG_TYPE_8", "boxm2 opencl refine trees (pass one)") ) {
60     TEST("Refine Trees Kernel creation ", true, false);
61     return;
62   }
63 
64   //prob_thresh buffer
65   float* prob_buff = new float[1];
66   prob_buff[0] = 0.00001f; //EVERYTHING should refine
67   bocl_mem prob_thresh( gpu_pro->context(), prob_buff, sizeof(float), "prob_thresh buffer");
68   prob_thresh.create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
69 
70   //output buffer
71   float* output_arr = new float[100];
72   for (int i=0; i<100; ++i) output_arr[i] = 0.0f;
73   bocl_mem cl_output( gpu_pro->context(), output_arr, sizeof(float)*100, "output buffer");
74   cl_output.create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
75 
76   //bit lookup buffer
77   cl_uchar* lookup_arr = new cl_uchar[256];
78   boxm2_ocl_util::set_bit_lookup(lookup_arr);
79   bocl_mem lookup( gpu_pro->context(), lookup_arr, sizeof(cl_uchar)*256, "bit lookup buffer");
80   lookup.create_buffer(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
81 
82   //For each ID in the visibility order, grab that block
83   std::map<boxm2_block_id, boxm2_block_metadata> blocks = scene->blocks();
84   std::map<boxm2_block_id, boxm2_block_metadata>::iterator blk_iter = blocks.begin();
85   boxm2_block_metadata data = blk_iter->second;
86 
87   //get id
88   boxm2_block_id id = blk_iter->first;
89 
90   //write the image values to the buffer
91   bocl_mem* blk       = gpu_pro->gpu_cache()->get_block(id);
92   bocl_mem* alpha     = gpu_pro->gpu_cache()->get_data<BOXM2_ALPHA>(id);
93   bocl_mem* blk_info  = gpu_pro->gpu_cache()->loaded_block_info();
94 
95   //set up tree copy
96   vxl_byte* cpybuff = new vxl_byte[blk->num_bytes()];
97   bocl_mem* blk_copy = new bocl_mem( gpu_pro->context(), cpybuff, blk->num_bytes(), "refine trees block copy buffer");
98   blk_copy->create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
99 
100   //set up tree size (first find num trees)
101   int numTrees = data.sub_block_num_.x() * data.sub_block_num_.y() * data.sub_block_num_.z();
102   //int* sizebuff = new int[numTrees];
103   bocl_mem* tree_sizes = new bocl_mem( gpu_pro->context(), /*sizebuff*/ NULL, sizeof(cl_int)*numTrees, "refine tree sizes buffer");
104   tree_sizes->create_buffer(CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, *gpu_pro->get_queue());
105 
106   //set first kernel args
107   refine_trees.set_arg( blk_info );
108   refine_trees.set_arg( blk );
109   refine_trees.set_arg( blk_copy );
110   refine_trees.set_arg( alpha );
111   refine_trees.set_arg( tree_sizes );
112   refine_trees.set_arg( &prob_thresh );
113   refine_trees.set_arg( &lookup );
114   refine_trees.set_arg( &cl_output );
115   refine_trees.set_local_arg( 16*sizeof(cl_uchar) );
116   refine_trees.set_local_arg( sizeof(cl_uchar16) );
117   refine_trees.set_local_arg( sizeof(cl_uchar16) );
118 
119   //set workspace
120   std::size_t lThreads[] = {1, 1};
121   std::size_t gThreads[] = {numTrees, 1};
122 
123   //execute kernel
124   refine_trees.execute( *gpu_pro->get_queue(), 2, lThreads, gThreads);
125   clFinish( *gpu_pro->get_queue());
126 
127   //clear render kernel args so it can reset em on next execution
128   refine_trees.clear_args();
129 
130   blk_copy->read_to_buffer(*gpu_pro->get_queue());
131   std::cout<<"Testing refine results"<<std::endl;
132   vxl_byte* cpy = (vxl_byte*) blk_copy->cpu_buffer();
133   for (int i=0; i<numTrees; ++i) {
134     if (cpy[16*i] != 1) {
135       TEST(" FIRST PASS REFINE TEST (trees) ", true, false);
136       return;
137     }
138   }
139   TEST(" First pass refine trees test ", true, true);
140 
141   tree_sizes->read_to_buffer(*gpu_pro->get_queue());
142   std::cout<<"outputting new tree sizes"<<std::endl;
143   int* sizes = (int*) tree_sizes->cpu_buffer();
144   for (int i=0; i<numTrees; ++i) {
145     if (sizes[i] != 9) {
146       TEST(" FIRST PASS REFINE TEST (SIZES) ", true, false);
147       return;
148     }
149   }
150   TEST(" first pass refine test sizes ", true, true);
151 
152 
153   /////////////////////////////////////////////////////////////////////////
154   //STEP TWO
155   //read out tree_sizes and do cumulative sum on it
156   tree_sizes->read_to_buffer(*gpu_pro->get_queue());
157   int* sizebuff = (int*) tree_sizes->cpu_buffer();
158   for (int i=1; i<numTrees; ++i) sizebuff[i] += sizebuff[i-1];
159   int newDataSize = sizebuff[numTrees-1];
160   for (int i=numTrees-1; i>0; --i) sizebuff[i] = sizebuff[i-1];
161   sizebuff[0] = 0;
162   tree_sizes->write_to_buffer(*gpu_pro->get_queue());
163   std::cout<<"New data size: "<<newDataSize<<std::endl;
164   /////////////////////////////////////////////////////////////////////////
165 
166   /////////////////////////////////////////////////////////////////////////////
167   // TEST REFINE DATA
168   /////////////////////////////////////////////////////////////////////////////
169   bocl_kernel refine_data;
170   refine_data.create_kernel( &gpu_pro->context(), &gpu_pro->devices()[0], src_paths,
171                              "refine_data", "-D MOG_TYPE_4 ", "boxm2 opencl refine data size 4 (pass three)");
172 
173   //set up alpha copy
174   bocl_mem* alpha_copy = new bocl_mem(gpu_pro->context(), NULL, newDataSize*sizeof(float), "alpha block copy buffer");
175   alpha_copy->create_buffer(CL_MEM_READ_WRITE| CL_MEM_ALLOC_HOST_PTR, *gpu_pro->get_queue());
176 
177   //init value buffer
178   float* init_abuff = new float[1]; init_abuff[0] = .01f;
179   bocl_mem init_alpha(gpu_pro->context(), init_abuff, sizeof(float), "init_alpha buffer");
180   init_alpha.create_buffer(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR);
181 
182   refine_data.set_arg( blk_info );
183   refine_data.set_arg( blk );
184   refine_data.set_arg( blk_copy );
185   refine_data.set_arg( tree_sizes );
186   refine_data.set_arg( alpha );
187   refine_data.set_arg( alpha_copy );
188   refine_data.set_arg( &init_alpha );
189   refine_data.set_arg( &prob_thresh );
190   refine_data.set_arg( &lookup );
191   refine_data.set_arg( &cl_output );
192   refine_data.set_local_arg( 16*sizeof(cl_uchar) );
193   refine_data.set_local_arg( sizeof(cl_uchar16) );
194   refine_data.set_local_arg( sizeof(cl_uchar16) );
195 
196   //execute kernel
197   refine_data.execute( *gpu_pro->get_queue(), 2, lThreads, gThreads);
198   clFinish( *gpu_pro->get_queue());
199 
200   //original alphas
201   float* abuf = (float*) alpha->cpu_buffer();
202   for (int i=0; i<8; ++i)
203     std::cout<<abuf[i]<<std::endl;
204 
205   std::cout<<"NEW ALPHAS"<<std::endl;
206   alpha_copy->read_to_buffer(*gpu_pro->get_queue());
207   float* alph = (float*) alpha_copy->cpu_buffer();
208   for (int i=0; i<newDataSize; ++i)
209     std::cout<<alph[i]<<std::endl;
210 
211   //TEST to make sure the new trees are lined up correctly
212   blk->read_to_buffer(*gpu_pro->get_queue());
213   vxl_byte* refined = (vxl_byte*) blk->cpu_buffer();
214   for (int i=0; i<numTrees; ++i) {
215     if (refined[16*i] != 1) {
216       std::cout<<"value is: "<<(int) refined[16*i]<<"... should be 1 at "<<i<<std::endl;
217       TEST(" THIRD PASS REFINE TEST (trees) ", true, false);
218       return;
219     }
220     int pointer;
221     std::memcpy(&pointer, &refined[16*i]+10, sizeof(int));
222     if (pointer != 9*i) {
223       TEST(" THIRD PASS Refine data pointer ", true, false);
224       std::cout<<"Pointer is: "<<pointer<<"... should be "<<9*i<<" at "<<i<<std::endl;
225       return;
226     }
227   }
228 #endif
229 
230   TEST(" Third Pass TREE passes ", true, true);
231 }
232 
test_refine_scan_kernel()233 void test_refine_scan_kernel() {}
test_refine_data_kernel()234 void test_refine_data_kernel() {}
test_refine()235 void test_refine() { test_refine_trees_kernel(); test_refine_scan_kernel(); test_refine_data_kernel(); }
236 
237 TESTMAIN( test_refine );
238