1//THIS IS UPDATE BIT SCENE OPT
2//Created Sept 30, 2010,
3//Implements the parallel work group segmentation algorithm.
4#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics: enable
5#if NVIDIA
6 #pragma OPENCL EXTENSION cl_khr_gl_sharing : enable
7#endif
8#ifdef MOG_TYPE_16
9    #define CONVERT_FUNC_SAT_RTE(lhs,data) lhs=convert_ushort8_sat_rte(data);
10    #define MOG_TYPE ushort8
11    #define NORM 65535;
12#endif
13#ifdef MOG_TYPE_8
14   #define CONVERT_FUNC_SAT_RTE(lhs,data) lhs=convert_uchar8_sat_rte(data);
15   #define MOG_TYPE uchar8
16   #define NORM 255;
17#endif
18
19#ifdef AUX_PREVIS
20typedef struct
21{
22  __global float*   alpha;
23  __global MOG_TYPE * mog;
24  __global int* seg_len;
25  __global int* mean_obs;
26  __global int* vis_array;
27  __global int* pre_array;
28
29  __local  short2* ray_bundle_array;
30  __local  int*    cell_ptrs;
31  __local  float*  cached_vis;
32           float*  ray_vis;
33           float*  ray_pre;
34  __constant RenderSceneInfo * linfo;
35} AuxArgs;
36
37//forward declare cast ray (so you can use it)
38void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*,
39              __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar);
40
41__kernel
42void
43aux_previs_main(__constant  RenderSceneInfo    * linfo,
44                __global    int4               * tree_array,        // tree structure for each block
45                __global    float              * alpha_array,       // alpha for each block
46                __global    MOG_TYPE           * mixture_array,     // mixture for each block
47                __global    int                * aux_array0,        // four aux arrays strung together
48                __global    int                * aux_array1,        // four aux arrays strung together
49                __global    int                * aux_array2,        // four aux arrays strung together
50                __global    int                * aux_array3,        // four aux arrays strung together
51                __constant  uchar              * bit_lookup,        // used to get data_index
52                __global    float4             * ray_origins,
53                __global    float4             * ray_directions,
54                __global    uint4              * imgdims,           // dimensions of the input image
55                __global    float              * vis_image,         // visibility image (for keeping vis across blocks)
56                __global    float              * pre_image,         // preinf image (for keeping pre across blocks)
57                __global    float              * output,
58                __local     uchar16            * local_tree,        // cache current tree into local memory
59                __local     short2             * ray_bundle_array,  // gives information for which ray takes over in the workgroup
60                __local     int                * cell_ptrs,         // local list of cell_ptrs (cells that are hit by this workgroup
61                __local     float              * cached_vis,        // cached vis used to sum up vis contribution locally
62                __local     uchar              * cumsum)            // cumulative sum for calculating data pointer
63{
64  // get local id (0-63 for an 8x8) of this patch
65  uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1));
66
67  // initialize pre-broken ray information (non broken rays will be re initialized)
68  ray_bundle_array[llid] = (short2) (-1, 0);
69  cell_ptrs[llid] = -1;
70
71  //----------------------------------------------------------------------------
72  // get image coordinates and camera,
73  // check for validity before proceeding
74  //----------------------------------------------------------------------------
75  int i=0,j=0;
76  i=get_global_id(0);
77  j=get_global_id(1);
78
79  // check to see if the thread corresponds to an actual pixel as in some
80  // cases #of threads will be more than the pixels.
81  if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y)
82    return;
83  float vis0 = 1.0f;
84  float vis = vis_image[j*get_global_size(0) + i];
85  float pre = pre_image[j*get_global_size(0) + i];
86
87  barrier(CLK_LOCAL_MEM_FENCE);
88
89  //----------------------------------------------------------------------------
90  // we know i,j map to a point on the image,
91  // BEGIN RAY TRACE
92  //----------------------------------------------------------------------------
93  float4 ray_o = ray_origins[ j*get_global_size(0) + i ];
94  float4 ray_d = ray_directions[ j*get_global_size(0) + i ];
95  float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz;
96  calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz);
97
98  //----------------------------------------------------------------------------
99  // we know i,j map to a point on the image, have calculated ray
100  // BEGIN RAY TRACE
101  //----------------------------------------------------------------------------
102  AuxArgs aux_args;
103  aux_args.linfo    = linfo;
104  aux_args.alpha      = alpha_array;
105  aux_args.mog        = mixture_array;
106  aux_args.seg_len    = aux_array0;
107  aux_args.mean_obs   = aux_array1;
108  aux_args.vis_array  = aux_array2;
109  aux_args.pre_array  = aux_array3;
110
111  aux_args.ray_bundle_array = ray_bundle_array;
112  aux_args.cell_ptrs = cell_ptrs;
113  aux_args.cached_vis = cached_vis;
114  aux_args.ray_vis = &vis;
115  aux_args.ray_pre = &pre;
116  cast_ray( i, j,
117            ray_ox, ray_oy, ray_oz,
118            ray_dx, ray_dy, ray_dz,
119            linfo, tree_array,                                  //scene info
120            local_tree, bit_lookup, cumsum, &vis0, aux_args,0, MAXFLOAT);    //utility info
121
122  //write out vis and pre
123  vis_image[j*get_global_size(0)+i] = vis;
124  pre_image[j*get_global_size(0)+i] = pre;
125}
126#endif //
127
128#ifdef AUX_LEN_INT_VIS
129typedef struct
130{
131  __global float*   alpha;
132  __global int* seg_len;
133  __global int* mean_obs;
134  __global int* vis_array;
135  __global int* pre_array;
136
137  __local  short2* ray_bundle_array;
138  __local  int*    cell_ptrs;
139  __local  float*  cached_vis;
140           float*  ray_vis;
141           float*  ray_pre;
142           float   obs;
143  __constant RenderSceneInfo * linfo;
144} AuxArgs;
145
146//forward declare cast ray (so you can use it)
147void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*,
148              __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar);
149
150__kernel
151void
152aux_len_int_vis_main(__constant  RenderSceneInfo    * linfo,
153                     __global    int4               * tree_array,        // tree structure for each block
154                     __global    float              * alpha_array,       // alpha for each block
155                     __global    int                * aux_array0,        // four aux arrays strung together
156                     __global    int                * aux_array1,        // four aux arrays strung together
157                     __global    int                * aux_array2,        // four aux arrays strung together
158                     __global    int                * aux_array3,        // four aux arrays strung together
159                     __constant  uchar              * bit_lookup,        // used to get data_index
160                     __global    float4             * ray_origins,
161                     __global    float4             * ray_directions,
162                     __global    uint4              * imgdims,           // dimensions of the input image
163                     __global    float              * vis_image,         // visibility image (for keeping vis across blocks)
164                     __global    float              * in_image,          // preinf image (for keeping pre across blocks)
165                     __global    float              * output,
166                     __local     uchar16            * local_tree,        // cache current tree into local memory
167                     __local     short2             * ray_bundle_array,  // gives information for which ray takes over in the workgroup
168                     __local     int                * cell_ptrs,         // local list of cell_ptrs (cells that are hit by this workgroup
169                     __local     float              * cached_vis,        // cached vis used to sum up vis contribution locally
170                     __local     uchar              * cumsum)            // cumulative sum for calculating data pointer
171{
172  // get local id (0-63 for an 8x8) of this patch
173  uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1));
174
175  // initialize pre-broken ray information (non broken rays will be re initialized)
176  ray_bundle_array[llid] = (short2) (-1, 0);
177  cell_ptrs[llid] = -1;
178
179  //----------------------------------------------------------------------------
180  // get image coordinates and camera,
181  // check for validity before proceeding
182  //----------------------------------------------------------------------------
183  int i=0,j=0;
184  i=get_global_id(0);
185  j=get_global_id(1);
186
187  // check to see if the thread corresponds to an actual pixel as in some
188  // cases #of threads will be more than the pixels.
189  if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y)
190    return;
191  float vis0 = 1.0f;
192  float vis = vis_image[j*get_global_size(0) + i];
193
194
195  barrier(CLK_LOCAL_MEM_FENCE);
196
197  //----------------------------------------------------------------------------
198  // we know i,j map to a point on the image,
199  // BEGIN RAY TRACE
200  //----------------------------------------------------------------------------
201  float4 ray_o = ray_origins[ j*get_global_size(0) + i ];
202  float4 ray_d = ray_directions[ j*get_global_size(0) + i ];
203  float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz;
204  calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz);
205
206  //----------------------------------------------------------------------------
207  // we know i,j map to a point on the image, have calculated ray
208  // BEGIN RAY TRACE
209  //----------------------------------------------------------------------------
210  AuxArgs aux_args;
211  aux_args.linfo    = linfo;
212  aux_args.alpha      = alpha_array;
213  aux_args.seg_len    = aux_array0;
214  aux_args.mean_obs   = aux_array1;
215  aux_args.vis_array  = aux_array2;
216  aux_args.pre_array  = aux_array3;
217
218  aux_args.ray_bundle_array = ray_bundle_array;
219  aux_args.cell_ptrs = cell_ptrs;
220  aux_args.cached_vis = cached_vis;
221  aux_args.ray_vis = &vis;
222  aux_args.obs     = in_image[j*get_global_size(0) + i];
223
224  cast_ray( i, j,
225            ray_ox, ray_oy, ray_oz,
226            ray_dx, ray_dy, ray_dz,
227            linfo, tree_array,                                  //scene info
228            local_tree, bit_lookup, cumsum, &vis0, aux_args,0,MAXFLOAT);    //utility info
229
230  //write out vis and pre
231  vis_image[j*get_global_size(0)+i] = vis;
232}
233#endif //
234
235#ifdef UPDATE_AUX_DIRECTION
236typedef struct
237{
238  __global int* len;
239  __global int* X;
240  __global int* Y;
241  __global int* Z;
242
243  float xdir;
244  float ydir;
245  float zdir;
246
247  __local  short2* ray_bundle_array;
248  __local  int*    cell_ptrs;
249} AuxArgs;
250
251//forward declare cast ray (so you can use it)
252void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*,
253              __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar);
254
255__kernel
256void
257aux_directions_main(__constant  RenderSceneInfo    * linfo,
258                    __global    int4               * tree_array,        // tree structure for each block
259                    __global    int                * aux_array0,        // four aux arrays strung together
260                    __global    int                * aux_array1,        // four aux arrays strung together
261                    __global    int                * aux_array2,        // four aux arrays strung together
262                    __global    int                * aux_array3,        // four aux arrays strung together
263                    __constant  uchar              * bit_lookup,        // used to get data_index
264                    __global    float4             * ray_origins,
265                    __global    float4             * ray_directions,
266                    __global    uint4              * imgdims,           // dimensions of the input image
267                    __global    float              * output,
268                    __local     uchar16            * local_tree,        // cache current tree into local memory
269                    __local     short2             * ray_bundle_array,  // gives information for which ray takes over in the workgroup
270                    __local     int                * cell_ptrs,         // local list of cell_ptrs (cells that are hit by this workgroup
271                    __local     float              * cached_vis,        // cached vis used to sum up vis contribution locally
272                    __local     uchar              * cumsum)            // cumulative sum for calculating data pointer
273{
274  // get local id (0-63 for an 8x8) of this patch
275  uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1));
276
277  // initialize pre-broken ray information (non broken rays will be re initialized)
278  ray_bundle_array[llid] = (short2) (-1, 0);
279  cell_ptrs[llid] = -1;
280
281  //----------------------------------------------------------------------------
282  // get image coordinates and camera,
283  // check for validity before proceeding
284  //----------------------------------------------------------------------------
285  int i=0,j=0;
286  i=get_global_id(0);
287  j=get_global_id(1);
288
289  // check to see if the thread corresponds to an actual pixel as in some
290  // cases #of threads will be more than the pixels.
291  if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y)
292    return;
293
294  barrier(CLK_LOCAL_MEM_FENCE);
295
296  //----------------------------------------------------------------------------
297  // we know i,j map to a point on the image,
298  // BEGIN RAY TRACE
299  //----------------------------------------------------------------------------
300  float4 ray_o = ray_origins[ j*get_global_size(0) + i ];
301  float4 ray_d = ray_directions[ j*get_global_size(0) + i ];
302  float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz;
303  calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz);
304
305  //----------------------------------------------------------------------------
306  // we know i,j map to a point on the image, have calculated ray
307  // BEGIN RAY TRACE
308  //----------------------------------------------------------------------------
309  AuxArgs aux_args;
310  aux_args.len    = aux_array0;
311  aux_args.X      = aux_array1;
312  aux_args.Y      = aux_array2;
313  aux_args.Z      = aux_array3;
314
315  aux_args.xdir = ray_dx;
316  aux_args.ydir = ray_dy;
317  aux_args.zdir = ray_dz;
318
319  float vis =1.0;
320  aux_args.ray_bundle_array = ray_bundle_array;
321  aux_args.cell_ptrs = cell_ptrs;
322  cast_ray( i, j,
323            ray_ox, ray_oy, ray_oz,
324            ray_dx, ray_dy, ray_dz,
325            linfo, tree_array,                                  //scene info
326            local_tree, bit_lookup, cumsum, &vis, aux_args,0,MAXFLOAT);    //utility info
327}
328#endif
329
330
331#ifdef AUX_PREVISPOST
332typedef struct
333{
334  __global float*   alpha;
335  __global MOG_TYPE * mog;
336  __global int* seg_len;
337  __global int* mean_obs;
338  __global int* vis_array;
339  __global int* pre_array;
340  __global int* post_array;
341  __constant RenderSceneInfo * linfo;
342           float*  ray_vis;
343           float*  ray_pre;
344           float*  vis_inf;
345           float*  pre_inf;
346} AuxArgs;
347
348//forward declare cast ray (so you can use it)
349void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*,
350              __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs,float tnear, float tfar);
351
352__kernel
353void
354aux_previspost_main(__constant  RenderSceneInfo    * linfo,
355                    __global    int4               * tree_array,        // tree structure for each block
356                    __global    float              * alpha_array,       // alpha for each block
357                    __global    MOG_TYPE           * mixture_array,     // mixture for each block
358                    __global    int                * aux_array0,        // seglen
359                    __global    int                * aux_array1,        // meanobs
360                    __global    int                * aux_array2,        // pre array
361                    __global    int                * aux_array3,        // vis array
362                    __global    int                * aux_array4,        // post array
363                    __constant  uchar              * bit_lookup,        // used to get data_index
364                    __global    float4             * ray_origins,
365                    __global    float4             * ray_directions,
366                    __global    uint4              * imgdims,           // dimensions of the input image
367                    __global    float              * vis_image,         // visibility image (for keeping vis across blocks)
368                    __global    float              * pre_image,         // preinf image (for keeping pre across blocks)
369                    __global    float              * vis_inf_image,     // vis_inf image
370                    __global    float              * pre_inf_image,     // pre_inf image
371                    __global    float              * output,
372                    __local     uchar16            * local_tree,        // cache current tree into local memory
373                    __local     uchar              * cumsum)            // cumulative sum for calculating data pointer
374{
375  // get local id (0-63 for an 8x8) of this patch
376  uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1));
377
378  // initialize pre-broken ray information (non broken rays will be re initialized)
379  //ray_bundle_array[llid] = (short2) (-1, 0);
380  //cell_ptrs[llid] = -1;
381
382  //----------------------------------------------------------------------------
383  // get image coordinates and camera,
384  // check for validity before proceeding
385  //----------------------------------------------------------------------------
386  int i=0,j=0;
387  i=get_global_id(0);
388  j=get_global_id(1);
389
390  // check to see if the thread corresponds to an actual pixel as in some
391  // cases #of threads will be more than the pixels.
392  if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y)
393    return;
394  float vis0 = 1.0f;
395  float vis = vis_image[j*get_global_size(0) + i];
396  float pre = pre_image[j*get_global_size(0) + i];
397
398  float vis_inf = vis_inf_image[j*get_global_size(0) + i];
399  float pre_inf = pre_inf_image[j*get_global_size(0) + i];
400
401  barrier(CLK_LOCAL_MEM_FENCE);
402
403  //----------------------------------------------------------------------------
404  // we know i,j map to a point on the image,
405  // BEGIN RAY TRACE
406  //----------------------------------------------------------------------------
407  float4 ray_o = ray_origins[ j*get_global_size(0) + i ];
408  float4 ray_d = ray_directions[ j*get_global_size(0) + i ];
409  float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz;
410  calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz);
411
412  //----------------------------------------------------------------------------
413  // we know i,j map to a point on the image, have calculated ray
414  // BEGIN RAY TRACE
415  //----------------------------------------------------------------------------
416  AuxArgs aux_args;
417  aux_args.linfo   = linfo;
418  aux_args.alpha      = alpha_array;
419  aux_args.mog        = mixture_array;
420  aux_args.seg_len    = aux_array0;
421  aux_args.mean_obs   = aux_array1;
422
423  aux_args.pre_array  = aux_array2;
424  aux_args.vis_array  = aux_array3;
425  aux_args.post_array  = aux_array4;
426
427  //aux_args.ray_bundle_array = ray_bundle_array;
428  //aux_args.cell_ptrs = cell_ptrs;
429  //aux_args.cached_vis = cached_vis;
430  aux_args.ray_vis = &vis;
431  aux_args.ray_pre = &pre;
432  aux_args.vis_inf = &vis_inf;
433  aux_args.pre_inf = &pre_inf;
434
435  cast_ray( i, j,
436            ray_ox, ray_oy, ray_oz,
437            ray_dx, ray_dy, ray_dz,
438            linfo, tree_array,                                   //scene info
439            local_tree, bit_lookup, cumsum, &vis0, aux_args,0,MAXFLOAT);    //utility info
440
441  //write out vis and pre
442  vis_image[j*get_global_size(0)+i] = vis;
443  pre_image[j*get_global_size(0)+i] = pre;
444}
445#endif //AUX_PREVISPOST
446
447
448#ifdef CONVERT_AUX
449__kernel void
450convert_aux_int_to_float(__constant  RenderSceneInfo    * linfo,
451                         __global float* aux_array0,
452                         __global float* aux_array1,
453                         __global float* aux_array2,
454                         __global float* aux_array3)
455{
456  int gid=get_global_id(0);
457  int datasize = linfo->data_len ;
458  if (gid<datasize)
459  {
460    int obs0= as_int(aux_array0[gid]);
461    int obs1= as_int(aux_array1[gid]);
462    int obs2= as_int(aux_array2[gid]);
463    int obs3= as_int(aux_array3[gid]);
464
465    aux_array0[gid]=((float)obs0);
466    aux_array1[gid]=((float)obs1);
467    aux_array2[gid]=((float)obs2);
468    aux_array3[gid]=((float)obs3);
469  }
470}
471#endif //CONVERT_AUX
472
473#ifdef CONVERT_AUX_NORMALIZE
474__kernel void
475convert_aux_and_normalize(__constant  RenderSceneInfo    * linfo,
476                          __global float* aux_array0,
477                          __global float* aux_array1,
478                          __global float* aux_array2, //vis
479                          __global float* aux_array3, //pre
480                          __global float* aux_array4) //post
481{
482  int gid=get_global_id(0);
483  int datasize = linfo->data_len ;//* info->num_buffer;
484  if (gid<datasize)
485  {
486    int obs0= as_int(aux_array0[gid]);
487    int obs1= as_int(aux_array1[gid]);
488    int obs2= as_int(aux_array2[gid]);
489    int obs3= as_int(aux_array3[gid]);
490    int obs4= as_int(aux_array4[gid]);
491
492    aux_array0[gid]= (((float)obs0)/SEGLEN_FACTOR) * linfo->block_len;
493    aux_array1[gid]= (((float)obs1)/SEGLEN_FACTOR) * linfo->block_len;
494    aux_array2[gid]= (((float)obs2)/SEGLEN_FACTOR) * linfo->block_len;
495    aux_array3[gid]= (((float)obs3)/SEGLEN_FACTOR) * linfo->block_len;
496    aux_array4[gid]= (((float)obs4)/SEGLEN_FACTOR) * linfo->block_len;
497  }
498}
499#endif //CONVERT_AUX_NORMALIZE
500
501#ifdef CONVERT_NOBS_INT_SHORT
502__kernel void
503convert_nobs_int_short(__constant  RenderSceneInfo    * linfo,
504                       __global unsigned int* num_obs,
505                       __global unsigned short* num_obsShort)
506{
507   int gid=get_global_id(0);
508   int datasize = linfo->data_len ;//* info->num_buffer;
509   if (gid<datasize) {
510        num_obsShort[gid]=(unsigned short)(num_obs[gid]);
511   }
512}
513#endif //CONVERT_NOBS_INT_SHORT
514#ifdef CONVERT_AUX_XYZ_THETAPHI
515__kernel void
516convert_aux_xyz_to_thetaphi(__constant  RenderSceneInfo * linfo,
517                            __global float* aux_array0,
518                            __global float* aux_array1,
519                            __global float* aux_array2,
520                            __global float* aux_array3)
521{
522  int gid=get_global_id(0);
523  int datasize = linfo->data_len ;//* info->num_buffer;
524  if (gid<datasize)
525  {
526    float obs0= (float) as_int(aux_array0[gid]);
527    float obs1= (float) as_int(aux_array1[gid]);
528    float obs2= (float) as_int(aux_array2[gid]);
529    float obs3= (float) as_int(aux_array3[gid]);
530
531    float phi   = atan2(obs2,obs1);
532    float denom = sqrt(obs1*obs1+obs2*obs2+obs3*obs3);
533    float theta = acos(obs3/denom);
534
535    aux_array0[gid]=theta;
536    aux_array1[gid]=phi;
537  }
538}
539#endif //CONVERT_AUX
540
541#ifdef SEGLENNOBS
542typedef struct
543{
544  __global int* seg_len;
545  __global int* mean_obs;
546  __global uint* nobs;
547  __local  short2* ray_bundle_array;
548  __local  int*    cell_ptrs;
549  __local  float4* cached_aux;
550           float   obs;
551  __global float * output;
552  __constant RenderSceneInfo * linfo;
553} AuxArgs;
554
555//forward declare cast ray (so you can use it)
556void cast_ray(int,int,float,float,float,float,float,float,__constant RenderSceneInfo*,
557              __global int4*,local uchar16*,constant uchar *,local uchar *,float*,AuxArgs, float tnear, float tfar);
558
559
560__kernel void
561seg_len_nobs_main(__constant  RenderSceneInfo    * linfo,
562                  __global    int4               * tree_array,       // tree structure for each block
563                  __global    float              * alpha_array,      // alpha for each block
564                  __global    int                * aux_array0,       // aux data array (four aux arrays strung together)
565                  __global    int                * aux_array1,       // aux data array (four aux arrays strung together)
566                  __global    uint               * nobs,             // data array to keep track of the number of observations per cell
567                  __constant  uchar              * bit_lookup,       // used to get data_index
568                  __global    float4             * ray_origins,
569                  __global    float4             * ray_directions,
570                  __global    uint4              * imgdims,          // dimensions of the input image
571                  __global    float              * in_image,         // the input image
572                  __global    float              * output,
573                  __local     uchar16            * local_tree,       // cache current tree into local memory
574                  __local     short2             * ray_bundle_array, // gives information for which ray takes over in the workgroup
575                  __local     int                * cell_ptrs,        // local list of cell_ptrs (cells that are hit by this workgroup
576                  __local     float4             * cached_aux_data,  // seg len cached aux data is only a float2
577                  __local     uchar              * cumsum )          // cumulative sum for calculating data pointer
578{
579  // get local id (0-63 for an 8x8) of this patch
580  uchar llid = (uchar)(get_local_id(0) + get_local_size(0)*get_local_id(1));
581
582  // initialize pre-broken ray information (non broken rays will be re initialized)
583  ray_bundle_array[llid] = (short2) (-1, 0);
584  cell_ptrs[llid] = -1;
585
586  // ----------------------------------------------------------------------------
587  // get image coordinates and camera,
588  // check for validity before proceeding
589  // ----------------------------------------------------------------------------
590  int i=0,j=0;
591  i=get_global_id(0);
592  j=get_global_id(1);
593  int imIndex = j*get_global_size(0) + i;
594
595  // grab input image value (also holds vis)
596  float obs = in_image[imIndex];
597
598  float vis = 1.0f;  //no visibility in this pass
599  barrier(CLK_LOCAL_MEM_FENCE);
600
601  // cases #of threads will be more than the pixels.
602  if (i>=(*imgdims).z || j>=(*imgdims).w || i<(*imgdims).x || j<(*imgdims).y || obs < 0.0f)
603    return;
604
605  // ----------------------------------------------------------------------------
606  // we know i,j map to a point on the image,
607  // BEGIN RAY TRACE
608  // ----------------------------------------------------------------------------
609  float4 ray_o = ray_origins[ imIndex ];
610  float4 ray_d = ray_directions[ imIndex ];
611  float ray_ox, ray_oy, ray_oz, ray_dx, ray_dy, ray_dz;
612  calc_scene_ray_generic_cam(linfo, ray_o, ray_d, &ray_ox, &ray_oy, &ray_oz, &ray_dx, &ray_dy, &ray_dz);
613
614  // ----------------------------------------------------------------------------
615  // we know i,j map to a point on the image, have calculated ray
616  // BEGIN RAY TRACE
617  // ----------------------------------------------------------------------------
618
619  AuxArgs aux_args;
620  aux_args.linfo    = linfo;
621  aux_args.seg_len  = aux_array0;
622  aux_args.mean_obs = aux_array1;
623  aux_args.nobs = nobs;
624  aux_args.ray_bundle_array = ray_bundle_array;
625  aux_args.cell_ptrs  = cell_ptrs;
626  aux_args.cached_aux = cached_aux_data;
627  aux_args.obs = obs;
628  aux_args.output = output;
629
630  cast_ray( i, j,
631            ray_ox, ray_oy, ray_oz,
632            ray_dx, ray_dy, ray_dz,
633            linfo, tree_array,                                  //scene info
634            local_tree, bit_lookup, cumsum, &vis, aux_args,0, MAXFLOAT);    //utility info
635}
636
637#endif //SEGLENNOBS
638