1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Point Cloud Library (PCL) - www.pointclouds.org
5 * Copyright (c) 2011, Willow Garage, Inc.
6 *
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * * Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer in the documentation and/or other materials provided
18 * with the distribution.
19 * * Neither the name of Willow Garage, Inc. nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 *
36 */
37
38 #include "device.hpp"
39 //#include <pcl/gpu/utils/device/block.hpp>
40
41 namespace pcl
42 {
43 namespace device
44 {
45 __device__ unsigned int count = 0;
46
47 struct CorespSearch
48 {
49 enum { CTA_SIZE_X = 32, CTA_SIZE_Y = 8, CTA_SIZE = CTA_SIZE_X * CTA_SIZE_Y };
50
51 struct plus
52 {
53 __forceinline__ __device__ int
operator ()pcl::device::CorespSearch::plus54 operator () (const int &lhs, const volatile int& rhs) const {
55 return lhs + rhs;
56 }
57 };
58
59 PtrStep<float> vmap_g_curr;
60 PtrStep<float> nmap_g_curr;
61
62 Mat33 Rprev_inv;
63 float3 tprev;
64
65 Intr intr;
66
67 PtrStep<float> vmap_g_prev;
68 PtrStep<float> nmap_g_prev;
69
70 float distThres;
71 float angleThres;
72
73 mutable PtrStepSz<short2> coresp;
74
75 mutable int* gbuf;
76
77 __device__ __forceinline__ int
searchpcl::device::CorespSearch78 search () const
79 {
80 int x = threadIdx.x + blockIdx.x * blockDim.x;
81 int y = threadIdx.y + blockIdx.y * blockDim.y;
82
83 if (x >= coresp.cols || y >= coresp.rows)
84 return 0;
85
86 coresp.ptr (y)[x] = make_short2 (-1, -1);
87
88 float3 ncurr_g;
89 ncurr_g.x = nmap_g_curr.ptr (y)[x];
90
91 if (isnan (ncurr_g.x))
92 return 0;
93
94 float3 vcurr_g;
95 vcurr_g.x = vmap_g_curr.ptr (y )[x];
96 vcurr_g.y = vmap_g_curr.ptr (y + coresp.rows)[x];
97 vcurr_g.z = vmap_g_curr.ptr (y + 2 * coresp.rows)[x];
98
99 float3 vcurr_cp = Rprev_inv * (vcurr_g - tprev); // prev camera coo space
100
101 int2 ukr; //projection
102 ukr.x = __float2int_rn (vcurr_cp.x * intr.fx / vcurr_cp.z + intr.cx); //4
103 ukr.y = __float2int_rn (vcurr_cp.y * intr.fy / vcurr_cp.z + intr.cy); //4
104
105 if (ukr.x < 0 || ukr.y < 0 || ukr.x >= coresp.cols || ukr.y >= coresp.rows)
106 return 0;
107
108 float3 nprev_g;
109 nprev_g.x = nmap_g_prev.ptr (ukr.y)[ukr.x];
110
111 if (isnan (nprev_g.x))
112 return 0;
113
114 float3 vprev_g;
115 vprev_g.x = vmap_g_prev.ptr (ukr.y )[ukr.x];
116 vprev_g.y = vmap_g_prev.ptr (ukr.y + coresp.rows)[ukr.x];
117 vprev_g.z = vmap_g_prev.ptr (ukr.y + 2 * coresp.rows)[ukr.x];
118
119 float dist = norm (vcurr_g - vprev_g);
120 if (dist > distThres)
121 return 0;
122
123 ncurr_g.y = nmap_g_curr.ptr (y + coresp.rows)[x];
124 ncurr_g.z = nmap_g_curr.ptr (y + 2 * coresp.rows)[x];
125
126 nprev_g.y = nmap_g_prev.ptr (ukr.y + coresp.rows)[ukr.x];
127 nprev_g.z = nmap_g_prev.ptr (ukr.y + 2 * coresp.rows)[ukr.x];
128
129 float sine = norm (cross (ncurr_g, nprev_g));
130
131 /*if (sine >= 1 || asinf(sine) >= angleThres)
132 return 0;*/
133
134 if (/*sine >= 1 || */ sine >= angleThres)
135 return 0;
136
137 coresp.ptr (y)[x] = make_short2 (ukr.x, ukr.y);
138 return 1;
139 }
140
141 __device__ __forceinline__ void
reducepcl::device::CorespSearch142 reduce (int i) const
143 {
144 __shared__ volatile int smem[CTA_SIZE];
145
146 int tid = Block::flattenedThreadId ();
147
148 smem[tid] = i;
149 __syncthreads ();
150
151 Block::reduce<CTA_SIZE>(smem, plus ());
152
153 __shared__ bool isLastBlockDone;
154
155 if (tid == 0)
156 {
157 gbuf[blockIdx.x + gridDim.x * blockIdx.y] = smem[0];
158 __threadfence ();
159
160 unsigned int value = atomicInc (&count, gridDim.x * gridDim.y);
161
162 isLastBlockDone = (value == (gridDim.x * gridDim.y - 1));
163 }
164 __syncthreads ();
165
166 if (isLastBlockDone)
167 {
168 int sum = 0;
169 int stride = Block::stride ();
170 for (int pos = tid; pos < gridDim.x * gridDim.y; pos += stride)
171 sum += gbuf[pos];
172
173 smem[tid] = sum;
174 __syncthreads ();
175 Block::reduce<CTA_SIZE>(smem, plus ());
176
177 if (tid == 0)
178 {
179 gbuf[0] = smem[0];
180 count = 0;
181 }
182 }
183 }
184
185 __device__ __forceinline__ void
operator ()pcl::device::CorespSearch186 operator () () const
187 {
188 int mask = search ();
189 //reduce(mask); if uncomment -> need to allocate and set gbuf
190 }
191 };
192
193 __global__ void
corespKernel(const CorespSearch cs)194 corespKernel (const CorespSearch cs) {
195 cs ();
196 }
197 }
198 }
199
200 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
201 void
findCoresp(const MapArr & vmap_g_curr,const MapArr & nmap_g_curr,const Mat33 & Rprev_inv,const float3 & tprev,const Intr & intr,const MapArr & vmap_g_prev,const MapArr & nmap_g_prev,float distThres,float angleThres,PtrStepSz<short2> coresp)202 pcl::device::findCoresp (const MapArr& vmap_g_curr, const MapArr& nmap_g_curr,
203 const Mat33& Rprev_inv, const float3& tprev, const Intr& intr,
204 const MapArr& vmap_g_prev, const MapArr& nmap_g_prev,
205 float distThres, float angleThres, PtrStepSz<short2> coresp)
206 {
207 CorespSearch cs;
208
209 cs.vmap_g_curr = vmap_g_curr;
210 cs.nmap_g_curr = nmap_g_curr;
211
212 cs.Rprev_inv = Rprev_inv;
213 cs.tprev = tprev;
214
215 cs.intr = intr;
216
217 cs.vmap_g_prev = vmap_g_prev;
218 cs.nmap_g_prev = nmap_g_prev;
219
220 cs.distThres = distThres;
221 cs.angleThres = angleThres;
222
223 cs.coresp = coresp;
224
225 dim3 block (CorespSearch::CTA_SIZE_X, CorespSearch::CTA_SIZE_Y);
226 dim3 grid (divUp (coresp.cols, block.x), divUp (coresp.rows, block.y));
227
228 corespKernel<<<grid, block>>>(cs);
229
230 cudaSafeCall ( cudaGetLastError () );
231 cudaSafeCall (cudaDeviceSynchronize ());
232 }
233