1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Copyright (c) 2011, Willow Garage, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 * * Neither the name of Willow Garage, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * @author: Koen Buys, Anatoly Baksheev
35 */
36
37 #include <pcl/gpu/people/bodyparts_detector.h>
38 //#include <pcl/gpu/people/conversions.h>
39 #include <pcl/gpu/people/label_common.h>
40 //#include <pcl/gpu/people/label_segment.h>
41 #include <pcl/gpu/people/label_tree.h>
42
43 #include <pcl/common/time.h>
44 #include <pcl/console/print.h>
45 #include "cuda.h"
46 #include "emmintrin.h"
47
48 #include <cassert>
49 #include "internal.h"
50 #include "cuda_async_copy.h"
51
52 const int MAX_CLUST_SIZE = 25000;
53 const float CLUST_TOL = 0.05f;
54
RDFBodyPartsDetector(const std::vector<std::string> & tree_files,int rows,int cols)55 pcl::gpu::people::RDFBodyPartsDetector::RDFBodyPartsDetector( const std::vector<std::string>& tree_files, int rows, int cols)
56 : max_cluster_size_(MAX_CLUST_SIZE), cluster_tolerance_(CLUST_TOL)
57 {
58 PCL_DEBUG("[pcl::gpu::people::RDFBodyPartsDetector::RDFBodyPartsDetector] : (D) : Constructor called\n");
59 //TODO replace all asserts with exceptions
60 assert(!tree_files.empty());
61
62 impl_.reset ( new device::MultiTreeLiveProc(rows, cols) );
63
64 for(const auto &tree_file : tree_files)
65 {
66 // load the tree file
67 std::vector<trees::Node> nodes;
68 std::vector<trees::Label> leaves;
69
70 // this might throw but we haven't done any malloc yet
71 int height = loadTree (tree_file, nodes, leaves );
72 impl_->trees.emplace_back(height, nodes, leaves);
73 }
74
75 allocate_buffers(rows, cols);
76 }
77
78 ////////////////////////////////////////////////////////////////////////////////////
79 /// getters
80
81 size_t
getNumberTrees() const82 pcl::gpu::people::RDFBodyPartsDetector::getNumberTrees() const
83 {
84 return impl_->trees.size();
85 }
86
87 const pcl::device::Labels&
getLabels() const88 pcl::gpu::people::RDFBodyPartsDetector::getLabels() const
89 {
90 return labels_smoothed_;
91 }
92
93 const pcl::device::LabelProbability&
getProbability() const94 pcl::gpu::people::RDFBodyPartsDetector::getProbability() const
95 {
96 return P_l_;
97 }
98
99 const pcl::device::LabelProbability&
getProbability1() const100 pcl::gpu::people::RDFBodyPartsDetector::getProbability1() const
101 {
102 return P_l_1_;
103 }
104
105 const pcl::device::LabelProbability&
getProbability2() const106 pcl::gpu::people::RDFBodyPartsDetector::getProbability2() const
107 {
108 return P_l_2_;
109 }
110
111 const pcl::device::LabelProbability&
getPrevProbability1() const112 pcl::gpu::people::RDFBodyPartsDetector::getPrevProbability1() const
113 {
114 return P_l_prev_1_;
115 }
116
117 const pcl::device::LabelProbability&
getPrevProbability2() const118 pcl::gpu::people::RDFBodyPartsDetector::getPrevProbability2() const
119 {
120 return P_l_prev_2_;
121 }
122
123 const pcl::gpu::people::RDFBodyPartsDetector::BlobMatrix&
getBlobMatrix() const124 pcl::gpu::people::RDFBodyPartsDetector::getBlobMatrix() const
125 {
126 return blob_matrix_;
127 }
128
129 ////////////////////////////////////////////////////////////////////////////////////
130
131 void
allocate_buffers(int rows,int cols)132 pcl::gpu::people::RDFBodyPartsDetector::allocate_buffers(int rows, int cols)
133 {
134 //std::cout << "(I) : RDFBodyPartsDetector::allocate_buffers called with: " << cols << "x" << rows << std::endl;
135
136 labels_.create(rows, cols);
137 labels_smoothed_.create(rows, cols);
138
139 // Create all the label probabilities
140 P_l_.create(rows,cols);
141 P_l_Gaus_.create(rows,cols);
142 P_l_Gaus_Temp_.create(rows,cols);
143 P_l_1_.create(rows,cols);
144 P_l_2_.create(rows,cols);
145 P_l_prev_1_.create(rows,cols);
146 P_l_prev_2_.create(rows,cols);
147
148 lmap_host_.resize(rows * cols);
149
150 dst_labels_.resize(rows * cols);
151 region_sizes_.resize(rows*cols+1);
152 remap_.resize(rows*cols);
153
154 comps_.create(rows, cols);
155 device::ConnectedComponents::initEdges(rows, cols, edges_);
156
157 means_storage_.resize((cols * rows + 1) * 3); // float3 * cols * rows and float3 for cc == -1.
158
159 blob_matrix_.resize(NUM_PARTS);
160 for(auto &matrix : blob_matrix_)
161 {
162 matrix.clear();
163 matrix.reserve(5000);
164 }
165 }
166
167 void
process(const pcl::device::Depth & depth,const PointCloud<PointXYZ> & cloud,int min_pts_per_cluster)168 pcl::gpu::people::RDFBodyPartsDetector::process (const pcl::device::Depth& depth, const PointCloud<PointXYZ>& cloud, int min_pts_per_cluster)
169 {
170 //ScopeTime time("ev");
171
172 int cols = depth.cols();
173 int rows = depth.rows();
174
175 allocate_buffers(rows, cols);
176
177 {
178 {
179 //ScopeTime time("--");
180 // Process the depthimage (CUDA)
181 impl_->process(depth, labels_);
182 device::smoothLabelImage(labels_, depth, labels_smoothed_, NUM_PARTS, 5, 300);
183 }
184
185 //AsyncCopy<unsigned char> async_labels_download(lmap_host_);
186
187 int c;
188 labels_smoothed_.download(lmap_host_, c);
189 //async_labels_download.download(labels_smoothed_);
190
191 // cc = generalized floodfill = approximation of euclidian clusterisation
192 device::ConnectedComponents::computeEdges(labels_smoothed_, depth, NUM_PARTS, cluster_tolerance_ * cluster_tolerance_, edges_);
193 device::ConnectedComponents::labelComponents(edges_, comps_);
194
195 comps_.download(dst_labels_, c);
196
197 //async_labels_download.waitForCompeltion();
198 }
199
200 // This was sort indices to blob (sortIndicesToBlob2) method (till line 236)
201 {
202 //ScopeTime time("cvt");
203 std::fill(remap_.begin(), remap_.end(), -1);
204 std::fill(region_sizes_.begin(), region_sizes_.end(), 0);
205
206 std::fill(means_storage_.begin(), means_storage_.end(), 0);
207 float3* means = (float3*) &means_storage_[3];
208 int *rsizes = ®ion_sizes_[1];
209
210 for(auto &matrix : blob_matrix_)
211 matrix.clear();
212
213 for(std::size_t k = 0; k < dst_labels_.size(); ++k)
214 {
215 const PointXYZ& p = cloud[k];
216 int cc = dst_labels_[k];
217 means[cc].x += p.x;
218 means[cc].y += p.y;
219 means[cc].z += p.z;
220 ++rsizes[cc];
221 }
222
223 means[-1].z = 0; // cc == -1 means invalid
224
225 for(std::size_t k = 0; k < dst_labels_.size(); ++k)
226 {
227 int label = lmap_host_[k];
228 int cc = dst_labels_[k];
229
230 if (means[cc].z != 0 && min_pts_per_cluster <= rsizes[cc] && rsizes[cc] <= max_cluster_size_)
231 {
232 int ccindex = remap_[cc];
233 if (ccindex == -1)
234 {
235 ccindex = static_cast<int> (blob_matrix_[label].size ());
236 blob_matrix_[label].resize(ccindex + 1);
237 remap_[cc] = ccindex;
238
239 blob_matrix_[label][ccindex].label = static_cast<part_t> (label);
240 blob_matrix_[label][ccindex].mean.coeffRef(0) = means[cc].x / static_cast<float> (rsizes[cc]);
241 blob_matrix_[label][ccindex].mean.coeffRef(1) = means[cc].y / static_cast<float> (rsizes[cc]);
242 blob_matrix_[label][ccindex].mean.coeffRef(2) = means[cc].z / static_cast<float> (rsizes[cc]);
243 blob_matrix_[label][ccindex].indices.indices.reserve(rsizes[cc]);
244 }
245 blob_matrix_[label][ccindex].indices.indices.push_back(static_cast<int> (k));
246 }
247 }
248
249 int id = 0;
250 for(auto &matrix : blob_matrix_)
251 for(std::size_t b = 0; b < matrix.size(); ++b)
252 {
253 matrix[b].id = id++;
254 matrix[b].lid = static_cast<int> (b);
255 }
256
257 buildRelations ( blob_matrix_ );
258 }
259 }
260
261 void
processProb(const pcl::device::Depth & depth)262 pcl::gpu::people::RDFBodyPartsDetector::processProb (const pcl::device::Depth& depth)
263 {
264 int cols = depth.cols();
265 int rows = depth.rows();
266
267 allocate_buffers(rows, cols);
268
269 // Process the depthimage into probabilities (CUDA)
270 //impl_->process(depth, labels_);
271 //impl_->processProb(depth, labels_, P_l_, (int) std::numeric_limits<std::int16_t>::max());
272 impl_->processProb(depth, labels_, P_l_, std::numeric_limits<int>::max());
273 }
274
275 void
processSmooth(const pcl::device::Depth & depth,const PointCloud<PointXYZ> & cloud,int min_pts_per_cluster)276 pcl::gpu::people::RDFBodyPartsDetector::processSmooth (const pcl::device::Depth& depth, const PointCloud<PointXYZ>& cloud, int min_pts_per_cluster)
277 {
278 device::smoothLabelImage(labels_, depth, labels_smoothed_, NUM_PARTS, 5, 300);
279
280 //AsyncCopy<unsigned char> async_labels_download(lmap_host_);
281
282 int c;
283 labels_smoothed_.download(lmap_host_, c);
284 //async_labels_download.download(labels_smoothed_);
285
286 // cc = generalized floodfill = approximation of euclidian clusterisation
287 device::ConnectedComponents::computeEdges(labels_smoothed_, depth, NUM_PARTS, cluster_tolerance_ * cluster_tolerance_, edges_);
288 device::ConnectedComponents::labelComponents(edges_, comps_);
289
290 comps_.download(dst_labels_, c);
291
292 //async_labels_download.waitForCompeltion();
293
294 // This was sort indices to blob (sortIndicesToBlob2) method (till line 236)
295 {
296 ScopeTime time("[pcl::gpu::people::RDFBodyPartsDetector::processSmooth] : cvt");
297 std::fill(remap_.begin(), remap_.end(), -1);
298 std::fill(region_sizes_.begin(), region_sizes_.end(), 0);
299
300 std::fill(means_storage_.begin(), means_storage_.end(), 0);
301 float3* means = (float3*) &means_storage_[3];
302 int *rsizes = ®ion_sizes_[1];
303
304 for(auto &matrix : blob_matrix_)
305 matrix.clear();
306
307 for(std::size_t k = 0; k < dst_labels_.size(); ++k)
308 {
309 const PointXYZ& p = cloud[k];
310 int cc = dst_labels_[k];
311 means[cc].x += p.x;
312 means[cc].y += p.y;
313 means[cc].z += p.z;
314 ++rsizes[cc];
315 }
316
317 means[-1].z = 0; // cc == -1 means invalid
318
319 for(std::size_t k = 0; k < dst_labels_.size(); ++k)
320 {
321 int label = lmap_host_[k];
322 int cc = dst_labels_[k];
323
324 if (means[cc].z != 0 && min_pts_per_cluster <= rsizes[cc] && rsizes[cc] <= max_cluster_size_)
325 {
326 int ccindex = remap_[cc];
327 if (ccindex == -1)
328 {
329 ccindex = static_cast<int> (blob_matrix_[label].size ());
330 blob_matrix_[label].resize(ccindex + 1);
331 remap_[cc] = ccindex;
332
333 blob_matrix_[label][ccindex].label = static_cast<part_t> (label);
334 blob_matrix_[label][ccindex].mean.coeffRef(0) = means[cc].x / static_cast<float> (rsizes[cc]);
335 blob_matrix_[label][ccindex].mean.coeffRef(1) = means[cc].y / static_cast<float> (rsizes[cc]);
336 blob_matrix_[label][ccindex].mean.coeffRef(2) = means[cc].z / static_cast<float> (rsizes[cc]);
337 blob_matrix_[label][ccindex].indices.indices.reserve(rsizes[cc]);
338 }
339 blob_matrix_[label][ccindex].indices.indices.push_back(static_cast<int> (k));
340 }
341 }
342
343 int id = 0;
344 for(auto &matrix : blob_matrix_)
345 for(std::size_t b = 0; b < matrix.size(); ++b)
346 {
347 matrix[b].id = id++;
348 matrix[b].lid = static_cast<int> (b);
349 }
350 }
351 }
352
353 int
processRelations()354 pcl::gpu::people::RDFBodyPartsDetector::processRelations ()
355 {
356 return buildRelations ( blob_matrix_ );
357 }
358
359 int
processRelations(PersonAttribs::Ptr person_attribs)360 pcl::gpu::people::RDFBodyPartsDetector::processRelations (PersonAttribs::Ptr person_attribs)
361 {
362 return buildRelations ( blob_matrix_, person_attribs );
363 }
364