1 /*
2  * Software License Agreement (BSD License)
3  *
4  *  Point Cloud Library (PCL) - www.pointclouds.org
5  *  Copyright (c) 2011, Willow Garage, Inc.
6  *
7  *  All rights reserved.
8  *
9  *  Redistribution and use in source and binary forms, with or without
10  *  modification, are permitted provided that the following conditions
11  *  are met:
12  *
13  *   * Redistributions of source code must retain the above copyright
14  *     notice, this list of conditions and the following disclaimer.
15  *   * Redistributions in binary form must reproduce the above
16  *     copyright notice, this list of conditions and the following
17  *     disclaimer in the documentation and/or other materials provided
18  *     with the distribution.
19  *   * Neither the name of Willow Garage, Inc. nor the names of its
20  *     contributors may be used to endorse or promote products derived
21  *     from this software without specific prior written permission.
22  *
23  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  *  POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #include "device.hpp"
39 //#include <pcl/gpu/utils/device/block.hpp>
40 
41 namespace pcl
42 {
43   namespace device
44   {
45     __device__ unsigned int count = 0;
46 
47     struct CorespSearch
48     {
49       enum { CTA_SIZE_X = 32, CTA_SIZE_Y = 8, CTA_SIZE = CTA_SIZE_X * CTA_SIZE_Y };
50 
51       struct plus
52       {
53         __forceinline__ __device__ int
operator ()pcl::device::CorespSearch::plus54         operator () (const int &lhs, const volatile int& rhs) const {
55           return lhs + rhs;
56         }
57       };
58 
59       PtrStep<float> vmap_g_curr;
60       PtrStep<float> nmap_g_curr;
61 
62       Mat33 Rprev_inv;
63       float3 tprev;
64 
65       Intr intr;
66 
67       PtrStep<float> vmap_g_prev;
68       PtrStep<float> nmap_g_prev;
69 
70       float distThres;
71       float angleThres;
72 
73       mutable PtrStepSz<short2> coresp;
74 
75       mutable int* gbuf;
76 
77       __device__ __forceinline__ int
searchpcl::device::CorespSearch78       search () const
79       {
80         int x = threadIdx.x + blockIdx.x * blockDim.x;
81         int y = threadIdx.y + blockIdx.y * blockDim.y;
82 
83         if (x >= coresp.cols || y >= coresp.rows)
84           return 0;
85 
86         coresp.ptr (y)[x] = make_short2 (-1, -1);
87 
88         float3 ncurr_g;
89         ncurr_g.x = nmap_g_curr.ptr (y)[x];
90 
91         if (isnan (ncurr_g.x))
92           return 0;
93 
94         float3 vcurr_g;
95         vcurr_g.x = vmap_g_curr.ptr (y              )[x];
96         vcurr_g.y = vmap_g_curr.ptr (y + coresp.rows)[x];
97         vcurr_g.z = vmap_g_curr.ptr (y + 2 * coresp.rows)[x];
98 
99         float3 vcurr_cp = Rprev_inv * (vcurr_g - tprev);         // prev camera coo space
100 
101         int2 ukr;         //projection
102         ukr.x = __float2int_rn (vcurr_cp.x * intr.fx / vcurr_cp.z + intr.cx);      //4
103         ukr.y = __float2int_rn (vcurr_cp.y * intr.fy / vcurr_cp.z + intr.cy);                      //4
104 
105         if (ukr.x < 0 || ukr.y < 0 || ukr.x >= coresp.cols || ukr.y >= coresp.rows)
106           return 0;
107 
108         float3 nprev_g;
109         nprev_g.x = nmap_g_prev.ptr (ukr.y)[ukr.x];
110 
111         if (isnan (nprev_g.x))
112           return 0;
113 
114         float3 vprev_g;
115         vprev_g.x = vmap_g_prev.ptr (ukr.y              )[ukr.x];
116         vprev_g.y = vmap_g_prev.ptr (ukr.y + coresp.rows)[ukr.x];
117         vprev_g.z = vmap_g_prev.ptr (ukr.y + 2 * coresp.rows)[ukr.x];
118 
119         float dist = norm (vcurr_g - vprev_g);
120         if (dist > distThres)
121           return 0;
122 
123         ncurr_g.y = nmap_g_curr.ptr (y + coresp.rows)[x];
124         ncurr_g.z = nmap_g_curr.ptr (y + 2 * coresp.rows)[x];
125 
126         nprev_g.y = nmap_g_prev.ptr (ukr.y + coresp.rows)[ukr.x];
127         nprev_g.z = nmap_g_prev.ptr (ukr.y + 2 * coresp.rows)[ukr.x];
128 
129         float sine = norm (cross (ncurr_g, nprev_g));
130 
131         /*if (sine >= 1 || asinf(sine) >= angleThres)
132             return 0;*/
133 
134         if (/*sine >= 1 || */ sine >= angleThres)
135           return 0;
136 
137         coresp.ptr (y)[x] = make_short2 (ukr.x, ukr.y);
138         return 1;
139       }
140 
141       __device__ __forceinline__ void
reducepcl::device::CorespSearch142       reduce (int i) const
143       {
144         __shared__ volatile int smem[CTA_SIZE];
145 
146         int tid = Block::flattenedThreadId ();
147 
148         smem[tid] = i;
149         __syncthreads ();
150 
151         Block::reduce<CTA_SIZE>(smem, plus ());
152 
153         __shared__ bool isLastBlockDone;
154 
155         if (tid == 0)
156         {
157           gbuf[blockIdx.x + gridDim.x * blockIdx.y] = smem[0];
158           __threadfence ();
159 
160           unsigned int value = atomicInc (&count, gridDim.x * gridDim.y);
161 
162           isLastBlockDone = (value == (gridDim.x * gridDim.y - 1));
163         }
164         __syncthreads ();
165 
166         if (isLastBlockDone)
167         {
168           int sum = 0;
169           int stride = Block::stride ();
170           for (int pos = tid; pos < gridDim.x * gridDim.y; pos += stride)
171             sum += gbuf[pos];
172 
173           smem[tid] = sum;
174           __syncthreads ();
175           Block::reduce<CTA_SIZE>(smem, plus ());
176 
177           if (tid == 0)
178           {
179             gbuf[0] = smem[0];
180             count = 0;
181           }
182         }
183       }
184 
185       __device__ __forceinline__ void
operator ()pcl::device::CorespSearch186       operator () () const
187       {
188         int mask = search ();
189         //reduce(mask); if uncomment -> need to allocate and set gbuf
190       }
191     };
192 
193     __global__ void
corespKernel(const CorespSearch cs)194     corespKernel (const CorespSearch cs) {
195       cs ();
196     }
197   }
198 }
199 
200 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
201 void
findCoresp(const MapArr & vmap_g_curr,const MapArr & nmap_g_curr,const Mat33 & Rprev_inv,const float3 & tprev,const Intr & intr,const MapArr & vmap_g_prev,const MapArr & nmap_g_prev,float distThres,float angleThres,PtrStepSz<short2> coresp)202 pcl::device::findCoresp (const MapArr& vmap_g_curr, const MapArr& nmap_g_curr,
203                          const Mat33& Rprev_inv, const float3& tprev, const Intr& intr,
204                          const MapArr& vmap_g_prev, const MapArr& nmap_g_prev,
205                          float distThres, float angleThres, PtrStepSz<short2> coresp)
206 {
207   CorespSearch cs;
208 
209   cs.vmap_g_curr = vmap_g_curr;
210   cs.nmap_g_curr = nmap_g_curr;
211 
212   cs.Rprev_inv = Rprev_inv;
213   cs.tprev = tprev;
214 
215   cs.intr = intr;
216 
217   cs.vmap_g_prev = vmap_g_prev;
218   cs.nmap_g_prev = nmap_g_prev;
219 
220   cs.distThres = distThres;
221   cs.angleThres = angleThres;
222 
223   cs.coresp = coresp;
224 
225   dim3 block (CorespSearch::CTA_SIZE_X, CorespSearch::CTA_SIZE_Y);
226   dim3 grid (divUp (coresp.cols, block.x), divUp (coresp.rows, block.y));
227 
228   corespKernel<<<grid, block>>>(cs);
229 
230   cudaSafeCall ( cudaGetLastError () );
231   cudaSafeCall (cudaDeviceSynchronize ());
232 }
233