1//THIS IS UPDATE BIT SCENE OPT 2//Created Sept 30, 2010, 3//Implements the parallel work group segmentation algorithm. 4#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics: enable 5#if NVIDIA 6 #pragma OPENCL EXTENSION cl_khr_gl_sharing : enable 7#endif 8#ifdef MOG_TYPE_16 9 #define CONVERT_FUNC_SAT_RTE(lhs,data) lhs=convert_ushort8_sat_rte(data); 10 #define MOG_TYPE ushort8 11 #define NORM 65535; 12#endif 13#ifdef MOG_TYPE_8 14 #define CONVERT_FUNC_SAT_RTE(lhs,data) lhs=convert_uchar8_sat_rte(data); 15 #define MOG_TYPE uchar8 16 #define NORM 255; 17#endif 18 19#ifdef AUX_PREVIS 20typedef struct 21{ 22 __global float* alpha; 23 __global MOG_TYPE * mog; 24 __global int* seg_len; 25 __global int* mean_obs; 26 __global int* vis_array; 27 __global int* pre_array; 28 29 __local short2* ray_bundle_array; 30 __local int* cell_ptrs; 31 __local float* cached_vis; 32 float* ray_vis; 33 float* ray_pre; 34 __constant RenderSceneInfo * linfo; 35} AuxArgs; 36 37//forward declare cast ray (so you can use it) 38void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*, 39 __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar); 40 41__kernel 42void 43aux_previs_main(__constant RenderSceneInfo * linfo, 44 __global int4 * tree_array, // tree structure for each block 45 __global float * alpha_array, // alpha for each block 46 __global MOG_TYPE * mixture_array, // mixture for each block 47 __global int * aux_array0, // four aux arrays strung together 48 __global int * aux_array1, // four aux arrays strung together 49 __global int * aux_array2, // four aux arrays strung together 50 __global int * aux_array3, // four aux arrays strung together 51 __constant uchar * bit_lookup, // used to get data_index 52 __global float4 * ray_origins, 53 __global float4 * ray_directions, 54 __global uint4 * imgdims, // dimensions of the input image 55 __global float * vis_image, // visibility image (for keeping vis across blocks) 56 __global float * pre_image, // preinf image (for keeping pre across blocks) 57 __global float * output, 58 __local uchar16 * local_tree, // cache current tree into local memory 59 __local short2 * ray_bundle_array, // gives information for which ray takes over in the workgroup 60 __local int * cell_ptrs, // local list of cell_ptrs (cells that are hit by this workgroup 61 __local float * cached_vis, // cached vis used to sum up vis contribution locally 62 __local uchar * cumsum) // cumulative sum for calculating data pointer 63{ 64 // get local id (0-63 for an 8x8) of this patch 65 uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1)); 66 67 // initialize pre-broken ray information (non broken rays will be re initialized) 68 ray_bundle_array[llid] = (short2) (-1, 0); 69 cell_ptrs[llid] = -1; 70 71 //---------------------------------------------------------------------------- 72 // get image coordinates and camera, 73 // check for validity before proceeding 74 //---------------------------------------------------------------------------- 75 int i=0,j=0; 76 i=get_global_id(0); 77 j=get_global_id(1); 78 79 // check to see if the thread corresponds to an actual pixel as in some 80 // cases #of threads will be more than the pixels. 81 if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y) 82 return; 83 float vis0 = 1.0f; 84 float vis = vis_image[j*get_global_size(0) + i]; 85 float pre = pre_image[j*get_global_size(0) + i]; 86 87 barrier(CLK_LOCAL_MEM_FENCE); 88 89 //---------------------------------------------------------------------------- 90 // we know i,j map to a point on the image, 91 // BEGIN RAY TRACE 92 //---------------------------------------------------------------------------- 93 float4 ray_o = ray_origins[ j*get_global_size(0) + i ]; 94 float4 ray_d = ray_directions[ j*get_global_size(0) + i ]; 95 float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz; 96 calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz); 97 98 //---------------------------------------------------------------------------- 99 // we know i,j map to a point on the image, have calculated ray 100 // BEGIN RAY TRACE 101 //---------------------------------------------------------------------------- 102 AuxArgs aux_args; 103 aux_args.linfo = linfo; 104 aux_args.alpha = alpha_array; 105 aux_args.mog = mixture_array; 106 aux_args.seg_len = aux_array0; 107 aux_args.mean_obs = aux_array1; 108 aux_args.vis_array = aux_array2; 109 aux_args.pre_array = aux_array3; 110 111 aux_args.ray_bundle_array = ray_bundle_array; 112 aux_args.cell_ptrs = cell_ptrs; 113 aux_args.cached_vis = cached_vis; 114 aux_args.ray_vis = &vis; 115 aux_args.ray_pre = ⪯ 116 cast_ray( i, j, 117 ray_ox, ray_oy, ray_oz, 118 ray_dx, ray_dy, ray_dz, 119 linfo, tree_array, //scene info 120 local_tree, bit_lookup, cumsum, &vis0, aux_args,0, MAXFLOAT); //utility info 121 122 //write out vis and pre 123 vis_image[j*get_global_size(0)+i] = vis; 124 pre_image[j*get_global_size(0)+i] = pre; 125} 126#endif // 127 128#ifdef AUX_LEN_INT_VIS 129typedef struct 130{ 131 __global float* alpha; 132 __global int* seg_len; 133 __global int* mean_obs; 134 __global int* vis_array; 135 __global int* pre_array; 136 137 __local short2* ray_bundle_array; 138 __local int* cell_ptrs; 139 __local float* cached_vis; 140 float* ray_vis; 141 float* ray_pre; 142 float obs; 143 __constant RenderSceneInfo * linfo; 144} AuxArgs; 145 146//forward declare cast ray (so you can use it) 147void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*, 148 __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar); 149 150__kernel 151void 152aux_len_int_vis_main(__constant RenderSceneInfo * linfo, 153 __global int4 * tree_array, // tree structure for each block 154 __global float * alpha_array, // alpha for each block 155 __global int * aux_array0, // four aux arrays strung together 156 __global int * aux_array1, // four aux arrays strung together 157 __global int * aux_array2, // four aux arrays strung together 158 __global int * aux_array3, // four aux arrays strung together 159 __constant uchar * bit_lookup, // used to get data_index 160 __global float4 * ray_origins, 161 __global float4 * ray_directions, 162 __global uint4 * imgdims, // dimensions of the input image 163 __global float * vis_image, // visibility image (for keeping vis across blocks) 164 __global float * in_image, // preinf image (for keeping pre across blocks) 165 __global float * output, 166 __local uchar16 * local_tree, // cache current tree into local memory 167 __local short2 * ray_bundle_array, // gives information for which ray takes over in the workgroup 168 __local int * cell_ptrs, // local list of cell_ptrs (cells that are hit by this workgroup 169 __local float * cached_vis, // cached vis used to sum up vis contribution locally 170 __local uchar * cumsum) // cumulative sum for calculating data pointer 171{ 172 // get local id (0-63 for an 8x8) of this patch 173 uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1)); 174 175 // initialize pre-broken ray information (non broken rays will be re initialized) 176 ray_bundle_array[llid] = (short2) (-1, 0); 177 cell_ptrs[llid] = -1; 178 179 //---------------------------------------------------------------------------- 180 // get image coordinates and camera, 181 // check for validity before proceeding 182 //---------------------------------------------------------------------------- 183 int i=0,j=0; 184 i=get_global_id(0); 185 j=get_global_id(1); 186 187 // check to see if the thread corresponds to an actual pixel as in some 188 // cases #of threads will be more than the pixels. 189 if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y) 190 return; 191 float vis0 = 1.0f; 192 float vis = vis_image[j*get_global_size(0) + i]; 193 194 195 barrier(CLK_LOCAL_MEM_FENCE); 196 197 //---------------------------------------------------------------------------- 198 // we know i,j map to a point on the image, 199 // BEGIN RAY TRACE 200 //---------------------------------------------------------------------------- 201 float4 ray_o = ray_origins[ j*get_global_size(0) + i ]; 202 float4 ray_d = ray_directions[ j*get_global_size(0) + i ]; 203 float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz; 204 calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz); 205 206 //---------------------------------------------------------------------------- 207 // we know i,j map to a point on the image, have calculated ray 208 // BEGIN RAY TRACE 209 //---------------------------------------------------------------------------- 210 AuxArgs aux_args; 211 aux_args.linfo = linfo; 212 aux_args.alpha = alpha_array; 213 aux_args.seg_len = aux_array0; 214 aux_args.mean_obs = aux_array1; 215 aux_args.vis_array = aux_array2; 216 aux_args.pre_array = aux_array3; 217 218 aux_args.ray_bundle_array = ray_bundle_array; 219 aux_args.cell_ptrs = cell_ptrs; 220 aux_args.cached_vis = cached_vis; 221 aux_args.ray_vis = &vis; 222 aux_args.obs = in_image[j*get_global_size(0) + i]; 223 224 cast_ray( i, j, 225 ray_ox, ray_oy, ray_oz, 226 ray_dx, ray_dy, ray_dz, 227 linfo, tree_array, //scene info 228 local_tree, bit_lookup, cumsum, &vis0, aux_args,0,MAXFLOAT); //utility info 229 230 //write out vis and pre 231 vis_image[j*get_global_size(0)+i] = vis; 232} 233#endif // 234 235#ifdef UPDATE_AUX_DIRECTION 236typedef struct 237{ 238 __global int* len; 239 __global int* X; 240 __global int* Y; 241 __global int* Z; 242 243 float xdir; 244 float ydir; 245 float zdir; 246 247 __local short2* ray_bundle_array; 248 __local int* cell_ptrs; 249} AuxArgs; 250 251//forward declare cast ray (so you can use it) 252void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*, 253 __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar); 254 255__kernel 256void 257aux_directions_main(__constant RenderSceneInfo * linfo, 258 __global int4 * tree_array, // tree structure for each block 259 __global int * aux_array0, // four aux arrays strung together 260 __global int * aux_array1, // four aux arrays strung together 261 __global int * aux_array2, // four aux arrays strung together 262 __global int * aux_array3, // four aux arrays strung together 263 __constant uchar * bit_lookup, // used to get data_index 264 __global float4 * ray_origins, 265 __global float4 * ray_directions, 266 __global uint4 * imgdims, // dimensions of the input image 267 __global float * output, 268 __local uchar16 * local_tree, // cache current tree into local memory 269 __local short2 * ray_bundle_array, // gives information for which ray takes over in the workgroup 270 __local int * cell_ptrs, // local list of cell_ptrs (cells that are hit by this workgroup 271 __local float * cached_vis, // cached vis used to sum up vis contribution locally 272 __local uchar * cumsum) // cumulative sum for calculating data pointer 273{ 274 // get local id (0-63 for an 8x8) of this patch 275 uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1)); 276 277 // initialize pre-broken ray information (non broken rays will be re initialized) 278 ray_bundle_array[llid] = (short2) (-1, 0); 279 cell_ptrs[llid] = -1; 280 281 //---------------------------------------------------------------------------- 282 // get image coordinates and camera, 283 // check for validity before proceeding 284 //---------------------------------------------------------------------------- 285 int i=0,j=0; 286 i=get_global_id(0); 287 j=get_global_id(1); 288 289 // check to see if the thread corresponds to an actual pixel as in some 290 // cases #of threads will be more than the pixels. 291 if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y) 292 return; 293 294 barrier(CLK_LOCAL_MEM_FENCE); 295 296 //---------------------------------------------------------------------------- 297 // we know i,j map to a point on the image, 298 // BEGIN RAY TRACE 299 //---------------------------------------------------------------------------- 300 float4 ray_o = ray_origins[ j*get_global_size(0) + i ]; 301 float4 ray_d = ray_directions[ j*get_global_size(0) + i ]; 302 float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz; 303 calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz); 304 305 //---------------------------------------------------------------------------- 306 // we know i,j map to a point on the image, have calculated ray 307 // BEGIN RAY TRACE 308 //---------------------------------------------------------------------------- 309 AuxArgs aux_args; 310 aux_args.len = aux_array0; 311 aux_args.X = aux_array1; 312 aux_args.Y = aux_array2; 313 aux_args.Z = aux_array3; 314 315 aux_args.xdir = ray_dx; 316 aux_args.ydir = ray_dy; 317 aux_args.zdir = ray_dz; 318 319 float vis =1.0; 320 aux_args.ray_bundle_array = ray_bundle_array; 321 aux_args.cell_ptrs = cell_ptrs; 322 cast_ray( i, j, 323 ray_ox, ray_oy, ray_oz, 324 ray_dx, ray_dy, ray_dz, 325 linfo, tree_array, //scene info 326 local_tree, bit_lookup, cumsum, &vis, aux_args,0,MAXFLOAT); //utility info 327} 328#endif 329 330 331#ifdef AUX_PREVISPOST 332typedef struct 333{ 334 __global float* alpha; 335 __global MOG_TYPE * mog; 336 __global int* seg_len; 337 __global int* mean_obs; 338 __global int* vis_array; 339 __global int* pre_array; 340 __global int* post_array; 341 __constant RenderSceneInfo * linfo; 342 float* ray_vis; 343 float* ray_pre; 344 float* vis_inf; 345 float* pre_inf; 346} AuxArgs; 347 348//forward declare cast ray (so you can use it) 349void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*, 350 __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar); 351 352__kernel 353void 354aux_previspost_main(__constant RenderSceneInfo * linfo, 355 __global int4 * tree_array, // tree structure for each block 356 __global float * alpha_array, // alpha for each block 357 __global MOG_TYPE * mixture_array, // mixture for each block 358 __global int * aux_array0, // seglen 359 __global int * aux_array1, // meanobs 360 __global int * aux_array2, // pre array 361 __global int * aux_array3, // vis array 362 __global int * aux_array4, // post array 363 __constant uchar * bit_lookup, // used to get data_index 364 __global float4 * ray_origins, 365 __global float4 * ray_directions, 366 __global uint4 * imgdims, // dimensions of the input image 367 __global float * vis_image, // visibility image (for keeping vis across blocks) 368 __global float * pre_image, // preinf image (for keeping pre across blocks) 369 __global float * vis_inf_image, // vis_inf image 370 __global float * pre_inf_image, // pre_inf image 371 __global float * output, 372 __local uchar16 * local_tree, // cache current tree into local memory 373 __local uchar * cumsum) // cumulative sum for calculating data pointer 374{ 375 // get local id (0-63 for an 8x8) of this patch 376 uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1)); 377 378 // initialize pre-broken ray information (non broken rays will be re initialized) 379 //ray_bundle_array[llid] = (short2) (-1, 0); 380 //cell_ptrs[llid] = -1; 381 382 //---------------------------------------------------------------------------- 383 // get image coordinates and camera, 384 // check for validity before proceeding 385 //---------------------------------------------------------------------------- 386 int i=0,j=0; 387 i=get_global_id(0); 388 j=get_global_id(1); 389 390 // check to see if the thread corresponds to an actual pixel as in some 391 // cases #of threads will be more than the pixels. 392 if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y) 393 return; 394 float vis0 = 1.0f; 395 float vis = vis_image[j*get_global_size(0) + i]; 396 float pre = pre_image[j*get_global_size(0) + i]; 397 398 float vis_inf = vis_inf_image[j*get_global_size(0) + i]; 399 float pre_inf = pre_inf_image[j*get_global_size(0) + i]; 400 401 barrier(CLK_LOCAL_MEM_FENCE); 402 403 //---------------------------------------------------------------------------- 404 // we know i,j map to a point on the image, 405 // BEGIN RAY TRACE 406 //---------------------------------------------------------------------------- 407 float4 ray_o = ray_origins[ j*get_global_size(0) + i ]; 408 float4 ray_d = ray_directions[ j*get_global_size(0) + i ]; 409 float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz; 410 calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz); 411 412 //---------------------------------------------------------------------------- 413 // we know i,j map to a point on the image, have calculated ray 414 // BEGIN RAY TRACE 415 //---------------------------------------------------------------------------- 416 AuxArgs aux_args; 417 aux_args.linfo = linfo; 418 aux_args.alpha = alpha_array; 419 aux_args.mog = mixture_array; 420 aux_args.seg_len = aux_array0; 421 aux_args.mean_obs = aux_array1; 422 423 aux_args.pre_array = aux_array2; 424 aux_args.vis_array = aux_array3; 425 aux_args.post_array = aux_array4; 426 427 //aux_args.ray_bundle_array = ray_bundle_array; 428 //aux_args.cell_ptrs = cell_ptrs; 429 //aux_args.cached_vis = cached_vis; 430 aux_args.ray_vis = &vis; 431 aux_args.ray_pre = ⪯ 432 aux_args.vis_inf = &vis_inf; 433 aux_args.pre_inf = &pre_inf; 434 435 cast_ray( i, j, 436 ray_ox, ray_oy, ray_oz, 437 ray_dx, ray_dy, ray_dz, 438 linfo, tree_array, //scene info 439 local_tree, bit_lookup, cumsum, &vis0, aux_args,0,MAXFLOAT); //utility info 440 441 //write out vis and pre 442 vis_image[j*get_global_size(0)+i] = vis; 443 pre_image[j*get_global_size(0)+i] = pre; 444} 445#endif //AUX_PREVISPOST 446 447 448#ifdef CONVERT_AUX 449__kernel void 450convert_aux_int_to_float(__constant RenderSceneInfo * linfo, 451 __global float* aux_array0, 452 __global float* aux_array1, 453 __global float* aux_array2, 454 __global float* aux_array3) 455{ 456 int gid=get_global_id(0); 457 int datasize = linfo->data_len ; 458 if (gid<datasize) 459 { 460 int obs0= as_int(aux_array0[gid]); 461 int obs1= as_int(aux_array1[gid]); 462 int obs2= as_int(aux_array2[gid]); 463 int obs3= as_int(aux_array3[gid]); 464 465 aux_array0[gid]=((float)obs0); 466 aux_array1[gid]=((float)obs1); 467 aux_array2[gid]=((float)obs2); 468 aux_array3[gid]=((float)obs3); 469 } 470} 471#endif //CONVERT_AUX 472 473#ifdef CONVERT_AUX_NORMALIZE 474__kernel void 475convert_aux_and_normalize(__constant RenderSceneInfo * linfo, 476 __global float* aux_array0, 477 __global float* aux_array1, 478 __global float* aux_array2, //vis 479 __global float* aux_array3, //pre 480 __global float* aux_array4) //post 481{ 482 int gid=get_global_id(0); 483 int datasize = linfo->data_len ;//* info->num_buffer; 484 if (gid<datasize) 485 { 486 int obs0= as_int(aux_array0[gid]); 487 int obs1= as_int(aux_array1[gid]); 488 int obs2= as_int(aux_array2[gid]); 489 int obs3= as_int(aux_array3[gid]); 490 int obs4= as_int(aux_array4[gid]); 491 492 aux_array0[gid]= (((float)obs0)/SEGLEN_FACTOR) * linfo->block_len; 493 aux_array1[gid]= (((float)obs1)/SEGLEN_FACTOR) * linfo->block_len; 494 aux_array2[gid]= (((float)obs2)/SEGLEN_FACTOR) * linfo->block_len; 495 aux_array3[gid]= (((float)obs3)/SEGLEN_FACTOR) * linfo->block_len; 496 aux_array4[gid]= (((float)obs4)/SEGLEN_FACTOR) * linfo->block_len; 497 } 498} 499#endif //CONVERT_AUX_NORMALIZE 500 501#ifdef CONVERT_NOBS_INT_SHORT 502__kernel void 503convert_nobs_int_short(__constant RenderSceneInfo * linfo, 504 __global unsigned int* num_obs, 505 __global unsigned short* num_obsShort) 506{ 507 int gid=get_global_id(0); 508 int datasize = linfo->data_len ;//* info->num_buffer; 509 if (gid<datasize) { 510 num_obsShort[gid]=(unsigned short)(num_obs[gid]); 511 } 512} 513#endif //CONVERT_NOBS_INT_SHORT 514#ifdef CONVERT_AUX_XYZ_THETAPHI 515__kernel void 516convert_aux_xyz_to_thetaphi(__constant RenderSceneInfo * linfo, 517 __global float* aux_array0, 518 __global float* aux_array1, 519 __global float* aux_array2, 520 __global float* aux_array3) 521{ 522 int gid=get_global_id(0); 523 int datasize = linfo->data_len ;//* info->num_buffer; 524 if (gid<datasize) 525 { 526 float obs0= (float) as_int(aux_array0[gid]); 527 float obs1= (float) as_int(aux_array1[gid]); 528 float obs2= (float) as_int(aux_array2[gid]); 529 float obs3= (float) as_int(aux_array3[gid]); 530 531 float phi = atan2(obs2,obs1); 532 float denom = sqrt(obs1*obs1+obs2*obs2+obs3*obs3); 533 float theta = acos(obs3/denom); 534 535 aux_array0[gid]=theta; 536 aux_array1[gid]=phi; 537 } 538} 539#endif //CONVERT_AUX 540 541#ifdef SEGLENNOBS 542typedef struct 543{ 544 __global int* seg_len; 545 __global int* mean_obs; 546 __global uint* nobs; 547 __local short2* ray_bundle_array; 548 __local int* cell_ptrs; 549 __local float4* cached_aux; 550 float obs; 551 __global float * output; 552 __constant RenderSceneInfo * linfo; 553} AuxArgs; 554 555//forward declare cast ray (so you can use it) 556void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*, 557 __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs, float tnear, float tfar); 558 559 560__kernel void 561seg_len_nobs_main(__constant RenderSceneInfo * linfo, 562 __global int4 * tree_array, // tree structure for each block 563 __global float * alpha_array, // alpha for each block 564 __global int * aux_array0, // aux data array (four aux arrays strung together) 565 __global int * aux_array1, // aux data array (four aux arrays strung together) 566 __global uint * nobs, // data array to keep track of the number of observations per cell 567 __constant uchar * bit_lookup, // used to get data_index 568 __global float4 * ray_origins, 569 __global float4 * ray_directions, 570 __global uint4 * imgdims, // dimensions of the input image 571 __global float * in_image, // the input image 572 __global float * output, 573 __local uchar16 * local_tree, // cache current tree into local memory 574 __local short2 * ray_bundle_array, // gives information for which ray takes over in the workgroup 575 __local int * cell_ptrs, // local list of cell_ptrs (cells that are hit by this workgroup 576 __local float4 * cached_aux_data, // seg len cached aux data is only a float2 577 __local uchar * cumsum ) // cumulative sum for calculating data pointer 578{ 579 // get local id (0-63 for an 8x8) of this patch 580 uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1)); 581 582 // initialize pre-broken ray information (non broken rays will be re initialized) 583 ray_bundle_array[llid] = (short2) (-1, 0); 584 cell_ptrs[llid] = -1; 585 586 // ---------------------------------------------------------------------------- 587 // get image coordinates and camera, 588 // check for validity before proceeding 589 // ---------------------------------------------------------------------------- 590 int i=0,j=0; 591 i=get_global_id(0); 592 j=get_global_id(1); 593 int imIndex = j*get_global_size(0) + i; 594 595 // grab input image value (also holds vis) 596 float obs = in_image[imIndex]; 597 598 float vis = 1.0f; //no visibility in this pass 599 barrier(CLK_LOCAL_MEM_FENCE); 600 601 // cases #of threads will be more than the pixels. 602 if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y || obs < 0.0f) 603 return; 604 605 // ---------------------------------------------------------------------------- 606 // we know i,j map to a point on the image, 607 // BEGIN RAY TRACE 608 // ---------------------------------------------------------------------------- 609 float4 ray_o = ray_origins[ imIndex ]; 610 float4 ray_d = ray_directions[ imIndex ]; 611 float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz; 612 calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz); 613 614 // ---------------------------------------------------------------------------- 615 // we know i,j map to a point on the image, have calculated ray 616 // BEGIN RAY TRACE 617 // ---------------------------------------------------------------------------- 618 619 AuxArgs aux_args; 620 aux_args.linfo = linfo; 621 aux_args.seg_len = aux_array0; 622 aux_args.mean_obs = aux_array1; 623 aux_args.nobs = nobs; 624 aux_args.ray_bundle_array = ray_bundle_array; 625 aux_args.cell_ptrs = cell_ptrs; 626 aux_args.cached_aux = cached_aux_data; 627 aux_args.obs = obs; 628 aux_args.output = output; 629 630 cast_ray( i, j, 631 ray_ox, ray_oy, ray_oz, 632 ray_dx, ray_dy, ray_dz, 633 linfo, tree_array, //scene info 634 local_tree, bit_lookup, cumsum, &vis, aux_args,0, MAXFLOAT); //utility info 635} 636 637#endif //SEGLENNOBS 638