1 /*
2  * Software License Agreement (BSD License)
3  *
4  *  Point Cloud Library (PCL) - www.pointclouds.org
5  *  Copyright (c) 2011, Willow Garage, Inc.
6  *
7  *  All rights reserved.
8  *
9  *  Redistribution and use in source and binary forms, with or without
10  *  modification, are permitted provided that the following conditions
11  *  are met:
12  *
13  *   * Redistributions of source code must retain the above copyright
14  *     notice, this list of conditions and the following disclaimer.
15  *   * Redistributions in binary form must reproduce the above
16  *     copyright notice, this list of conditions and the following
17  *     disclaimer in the documentation and/or other materials provided
18  *     with the distribution.
19  *   * Neither the name of Willow Garage, Inc. nor the names of its
20  *     contributors may be used to endorse or promote products derived
21  *     from this software without specific prior written permission.
22  *
23  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  *  POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #include "device.hpp"
39 
40 using namespace pcl::device;
41 
42 namespace pcl
43 {
44   namespace device
45   {
46     template<typename T>
47     __global__ void
initializeVolume(PtrStep<T> volume)48     initializeVolume (PtrStep<T> volume)
49     {
50       int x = threadIdx.x + blockIdx.x * blockDim.x;
51       int y = threadIdx.y + blockIdx.y * blockDim.y;
52 
53 
54       if (x < VOLUME_X && y < VOLUME_Y)
55       {
56           T *pos = volume.ptr(y) + x;
57           int z_step = VOLUME_Y * volume.step / sizeof(*pos);
58 
59 #pragma unroll
60           for(int z = 0; z < VOLUME_Z; ++z, pos+=z_step)
61              pack_tsdf (0.f, 0, *pos);
62       }
63     }
64   }
65 }
66 
67 void
initVolume(PtrStep<short2> volume)68 pcl::device::initVolume (PtrStep<short2> volume)
69 {
70   dim3 block (16, 16);
71   dim3 grid (1, 1, 1);
72   grid.x = divUp (VOLUME_X, block.x);
73   grid.y = divUp (VOLUME_Y, block.y);
74 
75   initializeVolume<<<grid, block>>>(volume);
76   cudaSafeCall ( cudaGetLastError () );
77   cudaSafeCall (cudaDeviceSynchronize ());
78 }
79 
80 namespace pcl
81 {
82   namespace device
83   {
84     struct Tsdf
85     {
86       static constexpr int CTA_SIZE_X = 32;
87       static constexpr int CTA_SIZE_Y = 8;
88       static constexpr int MAX_WEIGHT = 1 << 7;
89 
90       mutable PtrStep<short2> volume;
91       float3 cell_size;
92 
93       Intr intr;
94 
95       Mat33 Rcurr_inv;
96       float3 tcurr;
97 
98       PtrStepSz<ushort> depth_raw; //depth in mm
99 
100       float tranc_dist_mm;
101 
102       __device__ __forceinline__ float3
getVoxelGCoopcl::device::Tsdf103       getVoxelGCoo (int x, int y, int z) const
104       {
105         float3 coo = make_float3 (x, y, z);
106         coo += 0.5f;         //shift to cell center;
107 
108         coo.x *= cell_size.x;
109         coo.y *= cell_size.y;
110         coo.z *= cell_size.z;
111 
112         return coo;
113       }
114 
115       __device__ __forceinline__ void
operator ()pcl::device::Tsdf116       operator () () const
117       {
118         int x = threadIdx.x + blockIdx.x * CTA_SIZE_X;
119         int y = threadIdx.y + blockIdx.y * CTA_SIZE_Y;
120 
121         if (x >= VOLUME_X || y >= VOLUME_Y)
122           return;
123 
124         short2 *pos = volume.ptr (y) + x;
125         int elem_step = volume.step * VOLUME_Y / sizeof(*pos);
126 
127         for (int z = 0; z < VOLUME_Z; ++z, pos += elem_step)
128         {
129           float3 v_g = getVoxelGCoo (x, y, z);            //3 // p
130 
131           //transform to curr cam coo space
132           float3 v = Rcurr_inv * (v_g - tcurr);           //4
133 
134           int2 coo;           //project to current cam
135           coo.x = __float2int_rn (v.x * intr.fx / v.z + intr.cx);
136           coo.y = __float2int_rn (v.y * intr.fy / v.z + intr.cy);
137 
138           if (v.z > 0 && coo.x >= 0 && coo.y >= 0 && coo.x < depth_raw.cols && coo.y < depth_raw.rows)           //6
139           {
140             int Dp = depth_raw.ptr (coo.y)[coo.x];
141 
142             if (Dp != 0)
143             {
144               float xl = (coo.x - intr.cx) / intr.fx;
145               float yl = (coo.y - intr.cy) / intr.fy;
146               float lambda_inv = rsqrtf (xl * xl + yl * yl + 1);
147 
148               float sdf = 1000 * norm (tcurr - v_g) * lambda_inv - Dp; //mm
149 
150               sdf *= (-1);
151 
152               if (sdf >= -tranc_dist_mm)
153               {
154                 float tsdf = fmin (1.f, sdf / tranc_dist_mm);
155 
156                 int weight_prev;
157                 float tsdf_prev;
158 
159                 //read and unpack
160                 unpack_tsdf (*pos, tsdf_prev, weight_prev);
161 
162                 const int Wrk = 1;
163 
164                 float tsdf_new = (tsdf_prev * weight_prev + Wrk * tsdf) / (weight_prev + Wrk);
165                 int weight_new = min (weight_prev + Wrk, MAX_WEIGHT);
166 
167                 pack_tsdf (tsdf_new, weight_new, *pos);
168               }
169             }
170           }
171         }
172       }
173     };
174 
175     __global__ void
integrateTsdfKernel(const Tsdf tsdf)176     integrateTsdfKernel (const Tsdf tsdf) {
177       tsdf ();
178     }
179 
180     __global__ void
tsdf2(PtrStep<short2> volume,const float tranc_dist_mm,const Mat33 Rcurr_inv,float3 tcurr,const Intr intr,const PtrStepSz<ushort> depth_raw,const float3 cell_size)181     tsdf2 (PtrStep<short2> volume, const float tranc_dist_mm, const Mat33 Rcurr_inv, float3 tcurr,
182            const Intr intr, const PtrStepSz<ushort> depth_raw, const float3 cell_size)
183     {
184       int x = threadIdx.x + blockIdx.x * blockDim.x;
185       int y = threadIdx.y + blockIdx.y * blockDim.y;
186 
187       if (x >= VOLUME_X || y >= VOLUME_Y)
188         return;
189 
190       short2 *pos = volume.ptr (y) + x;
191       int elem_step = volume.step * VOLUME_Y / sizeof(short2);
192 
193       float v_g_x = (x + 0.5f) * cell_size.x - tcurr.x;
194       float v_g_y = (y + 0.5f) * cell_size.y - tcurr.y;
195       float v_g_z = (0 + 0.5f) * cell_size.z - tcurr.z;
196 
197       float v_x = Rcurr_inv.data[0].x * v_g_x + Rcurr_inv.data[0].y * v_g_y + Rcurr_inv.data[0].z * v_g_z;
198       float v_y = Rcurr_inv.data[1].x * v_g_x + Rcurr_inv.data[1].y * v_g_y + Rcurr_inv.data[1].z * v_g_z;
199       float v_z = Rcurr_inv.data[2].x * v_g_x + Rcurr_inv.data[2].y * v_g_y + Rcurr_inv.data[2].z * v_g_z;
200 
201 //#pragma unroll
202       for (int z = 0; z < VOLUME_Z; ++z)
203       {
204         float3 vr;
205         vr.x = v_g_x;
206         vr.y = v_g_y;
207         vr.z = (v_g_z + z * cell_size.z);
208 
209         float3 v;
210         v.x = v_x + Rcurr_inv.data[0].z * z * cell_size.z;
211         v.y = v_y + Rcurr_inv.data[1].z * z * cell_size.z;
212         v.z = v_z + Rcurr_inv.data[2].z * z * cell_size.z;
213 
214         int2 coo;         //project to current cam
215         coo.x = __float2int_rn (v.x * intr.fx / v.z + intr.cx);
216         coo.y = __float2int_rn (v.y * intr.fy / v.z + intr.cy);
217 
218 
219         if (v.z > 0 && coo.x >= 0 && coo.y >= 0 && coo.x < depth_raw.cols && coo.y < depth_raw.rows)         //6
220         {
221           int Dp = depth_raw.ptr (coo.y)[coo.x]; //mm
222 
223           if (Dp != 0)
224           {
225             float xl = (coo.x - intr.cx) / intr.fx;
226             float yl = (coo.y - intr.cy) / intr.fy;
227             float lambda_inv = rsqrtf (xl * xl + yl * yl + 1);
228 
229             float sdf = Dp - norm (vr) * lambda_inv * 1000; //mm
230 
231 
232             if (sdf >= -tranc_dist_mm)
233             {
234               float tsdf = fmin (1.f, sdf / tranc_dist_mm);
235 
236               int weight_prev;
237               float tsdf_prev;
238 
239               //read and unpack
240               unpack_tsdf (*pos, tsdf_prev, weight_prev);
241 
242               const int Wrk = 1;
243 
244               float tsdf_new = (tsdf_prev * weight_prev + Wrk * tsdf) / (weight_prev + Wrk);
245               int weight_new = min (weight_prev + Wrk, Tsdf::MAX_WEIGHT);
246 
247               pack_tsdf (tsdf_new, weight_new, *pos);
248             }
249           }
250         }
251         pos += elem_step;
252       }       /* for(int z = 0; z < VOLUME_Z; ++z) */
253     }      /* __global__ */
254   }
255 }
256 
257 
258 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
259 void
integrateTsdfVolume(const PtrStepSz<ushort> & depth_raw,const Intr & intr,const float3 & volume_size,const Mat33 & Rcurr_inv,const float3 & tcurr,float tranc_dist,PtrStep<short2> volume)260 pcl::device::integrateTsdfVolume (const PtrStepSz<ushort>& depth_raw, const Intr& intr, const float3& volume_size,
261                                   const Mat33& Rcurr_inv, const float3& tcurr, float tranc_dist,
262                                   PtrStep<short2> volume)
263 {
264   Tsdf tsdf;
265 
266   tsdf.volume = volume;
267   tsdf.cell_size.x = volume_size.x / VOLUME_X;
268   tsdf.cell_size.y = volume_size.y / VOLUME_Y;
269   tsdf.cell_size.z = volume_size.z / VOLUME_Z;
270 
271   tsdf.intr = intr;
272 
273   tsdf.Rcurr_inv = Rcurr_inv;
274   tsdf.tcurr = tcurr;
275   tsdf.depth_raw = depth_raw;
276 
277   tsdf.tranc_dist_mm = tranc_dist*1000; //mm
278 
279   dim3 block (Tsdf::CTA_SIZE_X, Tsdf::CTA_SIZE_Y);
280   dim3 grid (divUp (VOLUME_X, block.x), divUp (VOLUME_Y, block.y));
281 
282 #if 0
283    //tsdf2<<<grid, block>>>(volume, tranc_dist, Rcurr_inv, tcurr, intr, depth_raw, tsdf.cell_size);
284    integrateTsdfKernel<<<grid, block>>>(tsdf);
285 #endif
286   cudaSafeCall ( cudaGetLastError () );
287   cudaSafeCall (cudaDeviceSynchronize ());
288 }
289 
290 
291 namespace pcl
292 {
293   namespace device
294   {
295     __global__ void
scaleDepth(const PtrStepSz<ushort> depth,PtrStep<float> scaled,const Intr intr)296     scaleDepth (const PtrStepSz<ushort> depth, PtrStep<float> scaled, const Intr intr)
297     {
298       int x = threadIdx.x + blockIdx.x * blockDim.x;
299       int y = threadIdx.y + blockIdx.y * blockDim.y;
300 
301       if (x >= depth.cols || y >= depth.rows)
302         return;
303 
304       int Dp = depth.ptr (y)[x];
305 
306       float xl = (x - intr.cx) / intr.fx;
307       float yl = (y - intr.cy) / intr.fy;
308       float lambda = sqrtf (xl * xl + yl * yl + 1);
309 
310       scaled.ptr (y)[x] = Dp * lambda/1000.f; //meters
311     }
312 
313     __global__ void
tsdf23(const PtrStepSz<float> depthScaled,PtrStep<short2> volume,const float tranc_dist,const Mat33 Rcurr_inv,const float3 tcurr,const Intr intr,const float3 cell_size)314     tsdf23 (const PtrStepSz<float> depthScaled, PtrStep<short2> volume,
315             const float tranc_dist, const Mat33 Rcurr_inv, const float3 tcurr, const Intr intr, const float3 cell_size)
316     {
317       int x = threadIdx.x + blockIdx.x * blockDim.x;
318       int y = threadIdx.y + blockIdx.y * blockDim.y;
319 
320       if (x >= VOLUME_X || y >= VOLUME_Y)
321         return;
322 
323       float v_g_x = (x + 0.5f) * cell_size.x - tcurr.x;
324       float v_g_y = (y + 0.5f) * cell_size.y - tcurr.y;
325       float v_g_z = (0 + 0.5f) * cell_size.z - tcurr.z;
326 
327       float v_g_part_norm = v_g_x * v_g_x + v_g_y * v_g_y;
328 
329       float v_x = (Rcurr_inv.data[0].x * v_g_x + Rcurr_inv.data[0].y * v_g_y + Rcurr_inv.data[0].z * v_g_z) * intr.fx;
330       float v_y = (Rcurr_inv.data[1].x * v_g_x + Rcurr_inv.data[1].y * v_g_y + Rcurr_inv.data[1].z * v_g_z) * intr.fy;
331       float v_z = (Rcurr_inv.data[2].x * v_g_x + Rcurr_inv.data[2].y * v_g_y + Rcurr_inv.data[2].z * v_g_z);
332 
333       float z_scaled = 0;
334 
335       float Rcurr_inv_0_z_scaled = Rcurr_inv.data[0].z * cell_size.z * intr.fx;
336       float Rcurr_inv_1_z_scaled = Rcurr_inv.data[1].z * cell_size.z * intr.fy;
337 
338       float tranc_dist_inv = 1.0f / tranc_dist;
339 
340       short2* pos = volume.ptr (y) + x;
341       int elem_step = volume.step * VOLUME_Y / sizeof(short2);
342 
343 //#pragma unroll
344       for (int z = 0; z < VOLUME_Z;
345            ++z,
346            v_g_z += cell_size.z,
347            z_scaled += cell_size.z,
348            v_x += Rcurr_inv_0_z_scaled,
349            v_y += Rcurr_inv_1_z_scaled,
350            pos += elem_step)
351       {
352         float inv_z = 1.0f / (v_z + Rcurr_inv.data[2].z * z_scaled);
353         if (inv_z < 0)
354             continue;
355 
356         // project to current cam
357         int2 coo =
358         {
359           __float2int_rn (v_x * inv_z + intr.cx),
360           __float2int_rn (v_y * inv_z + intr.cy)
361         };
362 
363         if (coo.x >= 0 && coo.y >= 0 && coo.x < depthScaled.cols && coo.y < depthScaled.rows)         //6
364         {
365           float Dp_scaled = depthScaled.ptr (coo.y)[coo.x]; //meters
366 
367           float sdf = Dp_scaled - sqrtf (v_g_z * v_g_z + v_g_part_norm);
368 
369           if (Dp_scaled != 0 && sdf >= -tranc_dist) //meters
370           {
371             float tsdf = fmin (1.0f, sdf * tranc_dist_inv);
372 
373             //read and unpack
374             float tsdf_prev;
375             int weight_prev;
376             unpack_tsdf (*pos, tsdf_prev, weight_prev);
377 
378             const int Wrk = 1;
379 
380             float tsdf_new = (tsdf_prev * weight_prev + Wrk * tsdf) / (weight_prev + Wrk);
381             int weight_new = min (weight_prev + Wrk, Tsdf::MAX_WEIGHT);
382 
383             pack_tsdf (tsdf_new, weight_new, *pos);
384           }
385         }
386       }       // for(int z = 0; z < VOLUME_Z; ++z)
387     }      // __global__
388 
389     __global__ void
tsdf23normal_hack(const PtrStepSz<float> depthScaled,PtrStep<short2> volume,const float tranc_dist,const Mat33 Rcurr_inv,const float3 tcurr,const Intr intr,const float3 cell_size)390     tsdf23normal_hack (const PtrStepSz<float> depthScaled, PtrStep<short2> volume,
391                   const float tranc_dist, const Mat33 Rcurr_inv, const float3 tcurr, const Intr intr, const float3 cell_size)
392     {
393         int x = threadIdx.x + blockIdx.x * blockDim.x;
394         int y = threadIdx.y + blockIdx.y * blockDim.y;
395 
396         if (x >= VOLUME_X || y >= VOLUME_Y)
397             return;
398 
399         const float v_g_x = (x + 0.5f) * cell_size.x - tcurr.x;
400         const float v_g_y = (y + 0.5f) * cell_size.y - tcurr.y;
401         float v_g_z = (0 + 0.5f) * cell_size.z - tcurr.z;
402 
403         float v_g_part_norm = v_g_x * v_g_x + v_g_y * v_g_y;
404 
405         float v_x = (Rcurr_inv.data[0].x * v_g_x + Rcurr_inv.data[0].y * v_g_y + Rcurr_inv.data[0].z * v_g_z) * intr.fx;
406         float v_y = (Rcurr_inv.data[1].x * v_g_x + Rcurr_inv.data[1].y * v_g_y + Rcurr_inv.data[1].z * v_g_z) * intr.fy;
407         float v_z = (Rcurr_inv.data[2].x * v_g_x + Rcurr_inv.data[2].y * v_g_y + Rcurr_inv.data[2].z * v_g_z);
408 
409         float z_scaled = 0;
410 
411         float Rcurr_inv_0_z_scaled = Rcurr_inv.data[0].z * cell_size.z * intr.fx;
412         float Rcurr_inv_1_z_scaled = Rcurr_inv.data[1].z * cell_size.z * intr.fy;
413 
414         float tranc_dist_inv = 1.0f / tranc_dist;
415 
416         short2* pos = volume.ptr (y) + x;
417         int elem_step = volume.step * VOLUME_Y / sizeof(short2);
418 
419         //#pragma unroll
420         for (int z = 0; z < VOLUME_Z;
421             ++z,
422             v_g_z += cell_size.z,
423             z_scaled += cell_size.z,
424             v_x += Rcurr_inv_0_z_scaled,
425             v_y += Rcurr_inv_1_z_scaled,
426             pos += elem_step)
427         {
428             float inv_z = 1.0f / (v_z + Rcurr_inv.data[2].z * z_scaled);
429             if (inv_z < 0)
430                 continue;
431 
432             // project to current cam
433             int2 coo =
434             {
435                 __float2int_rn (v_x * inv_z + intr.cx),
436                 __float2int_rn (v_y * inv_z + intr.cy)
437             };
438 
439             if (coo.x >= 0 && coo.y >= 0 && coo.x < depthScaled.cols && coo.y < depthScaled.rows)         //6
440             {
441                 float Dp_scaled = depthScaled.ptr (coo.y)[coo.x]; //meters
442 
443                 float sdf = Dp_scaled - sqrtf (v_g_z * v_g_z + v_g_part_norm);
444 
445                 if (Dp_scaled != 0 && sdf >= -tranc_dist) //meters
446                 {
447                     float tsdf = fmin (1.0f, sdf * tranc_dist_inv);
448 
449                     bool integrate = true;
450                     if ((x > 0 &&  x < VOLUME_X-2) && (y > 0 && y < VOLUME_Y-2) && (z > 0 && z < VOLUME_Z-2))
451                     {
452                         const float qnan = std::numeric_limits<float>::quiet_NaN();
453                         float3 normal = make_float3(qnan, qnan, qnan);
454 
455                         float Fn, Fp;
456                         int Wn = 0, Wp = 0;
457                         unpack_tsdf (*(pos + elem_step), Fn, Wn);
458                         unpack_tsdf (*(pos - elem_step), Fp, Wp);
459 
460                         if (Wn > 16 && Wp > 16)
461                             normal.z = (Fn - Fp)/cell_size.z;
462 
463                         unpack_tsdf (*(pos + volume.step/sizeof(short2) ), Fn, Wn);
464                         unpack_tsdf (*(pos - volume.step/sizeof(short2) ), Fp, Wp);
465 
466                         if (Wn > 16 && Wp > 16)
467                             normal.y = (Fn - Fp)/cell_size.y;
468 
469                         unpack_tsdf (*(pos + 1), Fn, Wn);
470                         unpack_tsdf (*(pos - 1), Fp, Wp);
471 
472                         if (Wn > 16 && Wp > 16)
473                             normal.x = (Fn - Fp)/cell_size.x;
474 
475                         if (normal.x != qnan && normal.y != qnan && normal.z != qnan)
476                         {
477                             float norm2 = dot(normal, normal);
478                             if (norm2 >= 1e-10)
479                             {
480                                 normal *= rsqrt(norm2);
481 
482                                 float nt = v_g_x * normal.x + v_g_y * normal.y + v_g_z * normal.z;
483                                 float cosine = nt * rsqrt(v_g_x * v_g_x + v_g_y * v_g_y + v_g_z * v_g_z);
484 
485                                 if (cosine < 0.5)
486                                     integrate = false;
487                             }
488                         }
489                     }
490 
491                     if (integrate)
492                     {
493                         //read and unpack
494                         float tsdf_prev;
495                         int weight_prev;
496                         unpack_tsdf (*pos, tsdf_prev, weight_prev);
497 
498                         const int Wrk = 1;
499 
500                         float tsdf_new = (tsdf_prev * weight_prev + Wrk * tsdf) / (weight_prev + Wrk);
501                         int weight_new = min (weight_prev + Wrk, Tsdf::MAX_WEIGHT);
502 
503                         pack_tsdf (tsdf_new, weight_new, *pos);
504                     }
505                 }
506             }
507         }       // for(int z = 0; z < VOLUME_Z; ++z)
508     }      // __global__
509   }
510 }
511 
512 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
513 void
integrateTsdfVolume(const PtrStepSz<ushort> & depth,const Intr & intr,const float3 & volume_size,const Mat33 & Rcurr_inv,const float3 & tcurr,float tranc_dist,PtrStep<short2> volume,DeviceArray2D<float> & depthScaled)514 pcl::device::integrateTsdfVolume (const PtrStepSz<ushort>& depth, const Intr& intr,
515                                   const float3& volume_size, const Mat33& Rcurr_inv, const float3& tcurr,
516                                   float tranc_dist,
517                                   PtrStep<short2> volume, DeviceArray2D<float>& depthScaled)
518 {
519   depthScaled.create (depth.rows, depth.cols);
520 
521   dim3 block_scale (32, 8);
522   dim3 grid_scale (divUp (depth.cols, block_scale.x), divUp (depth.rows, block_scale.y));
523 
524   //scales depth along ray and converts mm -> meters.
525   scaleDepth<<<grid_scale, block_scale>>>(depth, depthScaled, intr);
526   cudaSafeCall ( cudaGetLastError () );
527 
528   float3 cell_size;
529   cell_size.x = volume_size.x / VOLUME_X;
530   cell_size.y = volume_size.y / VOLUME_Y;
531   cell_size.z = volume_size.z / VOLUME_Z;
532 
533   //dim3 block(Tsdf::CTA_SIZE_X, Tsdf::CTA_SIZE_Y);
534   dim3 block (16, 16);
535   dim3 grid (divUp (VOLUME_X, block.x), divUp (VOLUME_Y, block.y));
536 
537   tsdf23<<<grid, block>>>(depthScaled, volume, tranc_dist, Rcurr_inv, tcurr, intr, cell_size);
538   //tsdf23normal_hack<<<grid, block>>>(depthScaled, volume, tranc_dist, Rcurr_inv, tcurr, intr, cell_size);
539 
540   cudaSafeCall ( cudaGetLastError () );
541   cudaSafeCall (cudaDeviceSynchronize ());
542 }
543