1 /*
2 * Software License Agreement (BSD License)
3 *
4 * Point Cloud Library (PCL) - www.pointclouds.org
5 * Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
6 * Third party copyrights are property of their respective owners.
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution.
20 * * Neither the name of Willow Garage, Inc. nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 * $Id: $
38 * Ported to PCL by Koen Buys : Attention Work in progress!
39 */
40
41 #include <iostream>
42 #include <string>
43 #include <vector>
44 #include <algorithm>
45 #include "NCV.hpp"
46
47
48 //==============================================================================
49 //
50 // Error handling helpers
51 //
52 //==============================================================================
53
54
stdDebugOutput(const std::string & msg)55 static void stdDebugOutput(const std::string &msg)
56 {
57 std::cout << msg;
58 }
59
60
61 static NCVDebugOutputHandler *debugOutputHandler = stdDebugOutput;
62
63
ncvDebugOutput(const std::string & msg)64 void ncvDebugOutput(const std::string &msg)
65 {
66 debugOutputHandler(msg);
67 }
68
69
ncvSetDebugOutputHandler(NCVDebugOutputHandler * func)70 void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
71 {
72 debugOutputHandler = func;
73 }
74
75
76 //==============================================================================
77 //
78 // Memory wrappers and helpers
79 //
80 //==============================================================================
81
82
alignUp(Ncv32u what,Ncv32u alignment)83 Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
84 {
85 Ncv32u alignMask = alignment-1;
86 Ncv32u inverseAlignMask = ~alignMask;
87 Ncv32u res = (what + alignMask) & inverseAlignMask;
88 return res;
89 }
90
91
clear()92 void NCVMemPtr::clear()
93 {
94 ptr = NULL;
95 memtype = NCVMemoryTypeNone;
96 }
97
98
clear()99 void NCVMemSegment::clear()
100 {
101 begin.clear();
102 size = 0;
103 }
104
105
memSegCopyHelper(void * dst,NCVMemoryType dstType,const void * src,NCVMemoryType srcType,std::size_t sz,cudaStream_t cuStream)106 NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, std::size_t sz, cudaStream_t cuStream)
107 {
108 NCVStatus ncvStat;
109 switch (dstType)
110 {
111 case NCVMemoryTypeHostPageable:
112 case NCVMemoryTypeHostPinned:
113 switch (srcType)
114 {
115 case NCVMemoryTypeHostPageable:
116 case NCVMemoryTypeHostPinned:
117 memcpy(dst, src, sz);
118 ncvStat = NCV_SUCCESS;
119 break;
120 case NCVMemoryTypeDevice:
121 if (cuStream != 0)
122 {
123 ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
124 }
125 else
126 {
127 ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
128 }
129 ncvStat = NCV_SUCCESS;
130 break;
131 default:
132 ncvStat = NCV_MEM_RESIDENCE_ERROR;
133 }
134 break;
135 case NCVMemoryTypeDevice:
136 switch (srcType)
137 {
138 case NCVMemoryTypeHostPageable:
139 case NCVMemoryTypeHostPinned:
140 if (cuStream != 0)
141 {
142 ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
143 }
144 else
145 {
146 ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
147 }
148 ncvStat = NCV_SUCCESS;
149 break;
150 case NCVMemoryTypeDevice:
151 if (cuStream != 0)
152 {
153 ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
154 }
155 else
156 {
157 ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
158 }
159 ncvStat = NCV_SUCCESS;
160 break;
161 default:
162 ncvStat = NCV_MEM_RESIDENCE_ERROR;
163 }
164 break;
165 default:
166 ncvStat = NCV_MEM_RESIDENCE_ERROR;
167 }
168
169 return ncvStat;
170 }
171
172
memSegCopyHelper2D(void * dst,Ncv32u dstPitch,NCVMemoryType dstType,const void * src,Ncv32u srcPitch,NCVMemoryType srcType,Ncv32u widthbytes,Ncv32u height,cudaStream_t cuStream)173 NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
174 const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
175 Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream)
176 {
177 NCVStatus ncvStat;
178 switch (dstType)
179 {
180 case NCVMemoryTypeHostPageable:
181 case NCVMemoryTypeHostPinned:
182 switch (srcType)
183 {
184 case NCVMemoryTypeHostPageable:
185 case NCVMemoryTypeHostPinned:
186 for (Ncv32u i=0; i<height; i++)
187 {
188 memcpy((char*)dst + i * dstPitch, (char*)src + i * srcPitch, widthbytes);
189 }
190 ncvStat = NCV_SUCCESS;
191 break;
192 case NCVMemoryTypeDevice:
193 if (cuStream != 0)
194 {
195 ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);
196 }
197 else
198 {
199 ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);
200 }
201 ncvStat = NCV_SUCCESS;
202 break;
203 default:
204 ncvStat = NCV_MEM_RESIDENCE_ERROR;
205 }
206 break;
207 case NCVMemoryTypeDevice:
208 switch (srcType)
209 {
210 case NCVMemoryTypeHostPageable:
211 case NCVMemoryTypeHostPinned:
212 if (cuStream != 0)
213 {
214 ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);
215 }
216 else
217 {
218 ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);
219 }
220 ncvStat = NCV_SUCCESS;
221 break;
222 case NCVMemoryTypeDevice:
223 if (cuStream != 0)
224 {
225 ncvAssertCUDAReturn(cudaMemcpy2DAsync(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);
226 }
227 else
228 {
229 ncvAssertCUDAReturn(cudaMemcpy2D(dst, dstPitch, src, srcPitch, widthbytes, height, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);
230 }
231 ncvStat = NCV_SUCCESS;
232 break;
233 default:
234 ncvStat = NCV_MEM_RESIDENCE_ERROR;
235 }
236 break;
237 default:
238 ncvStat = NCV_MEM_RESIDENCE_ERROR;
239 }
240
241 return ncvStat;
242 }
243
244
245 //===================================================================
246 //
247 // NCVMemStackAllocator class members implementation
248 //
249 //===================================================================
250
251
NCVMemStackAllocator(Ncv32u alignment)252 NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
253 :
254 currentSize(0),
255 _maxSize(0),
256 allocBegin(NULL),
257 begin(NULL),
258 end(NULL),
259 _memType(NCVMemoryTypeNone),
260 _alignment(alignment),
261 bReusesMemory(false)
262 {
263 NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
264 ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");
265 }
266
267
NCVMemStackAllocator(NCVMemoryType memT,std::size_t capacity,Ncv32u alignment,void * reusePtr)268 NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, std::size_t capacity, Ncv32u alignment, void *reusePtr)
269 :
270 currentSize(0),
271 _maxSize(0),
272 allocBegin(NULL),
273 _memType(memT),
274 _alignment(alignment)
275 {
276 NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;
277 ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");
278 ncvAssertPrintCheck(memT != NCVMemoryTypeNone, "NCVMemStackAllocator ctor:: Incorrect allocator type");
279
280 allocBegin = NULL;
281
282 if (reusePtr == NULL && capacity != 0)
283 {
284 bReusesMemory = false;
285 switch (memT)
286 {
287 case NCVMemoryTypeDevice:
288 ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
289 break;
290 case NCVMemoryTypeHostPinned:
291 ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
292 break;
293 case NCVMemoryTypeHostPageable:
294 allocBegin = (Ncv8u *)malloc(capacity);
295 break;
296 default:;
297 }
298 }
299 else
300 {
301 bReusesMemory = true;
302 allocBegin = (Ncv8u *)reusePtr;
303 }
304
305 if (capacity == 0)
306 {
307 allocBegin = (Ncv8u *)(0x1);
308 }
309
310 if (!isCounting())
311 {
312 begin = allocBegin;
313 end = begin + capacity;
314 }
315 }
316
317
~NCVMemStackAllocator()318 NCVMemStackAllocator::~NCVMemStackAllocator()
319 {
320 if (allocBegin != NULL)
321 {
322 ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
323
324 if (!bReusesMemory && (allocBegin != (Ncv8u *)(0x1)))
325 {
326 switch (_memType)
327 {
328 case NCVMemoryTypeDevice:
329 ncvAssertCUDAReturn(cudaFree(allocBegin), );
330 break;
331 case NCVMemoryTypeHostPinned:
332 ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
333 break;
334 case NCVMemoryTypeHostPageable:
335 free(allocBegin);
336 break;
337 default:;
338 }
339 }
340
341 allocBegin = NULL;
342 }
343 }
344
345
alloc(NCVMemSegment & seg,std::size_t size)346 NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, std::size_t size)
347 {
348 seg.clear();
349 ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
350
351 size = alignUp(size, this->_alignment);
352 this->currentSize += size;
353 this->_maxSize = max(this->_maxSize, this->currentSize);
354
355 if (!isCounting())
356 {
357 std::size_t availSize = end - begin;
358 ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY);
359 }
360
361 seg.begin.ptr = begin;
362 seg.begin.memtype = this->_memType;
363 seg.size = size;
364 begin += size;
365
366 return NCV_SUCCESS;
367 }
368
369
dealloc(NCVMemSegment & seg)370 NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)
371 {
372 ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
373 ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
374 ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC);
375 ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER);
376
377 currentSize -= seg.size;
378 begin -= seg.size;
379
380 seg.clear();
381
382 ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC);
383
384 return NCV_SUCCESS;
385 }
386
387
isInitialized(void) const388 NcvBool NCVMemStackAllocator::isInitialized(void) const
389 {
390 return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;
391 }
392
393
isCounting(void) const394 NcvBool NCVMemStackAllocator::isCounting(void) const
395 {
396 return this->_memType == NCVMemoryTypeNone;
397 }
398
399
memType(void) const400 NCVMemoryType NCVMemStackAllocator::memType(void) const
401 {
402 return this->_memType;
403 }
404
405
alignment(void) const406 Ncv32u NCVMemStackAllocator::alignment(void) const
407 {
408 return this->_alignment;
409 }
410
411
maxSize(void) const412 size_t NCVMemStackAllocator::maxSize(void) const
413 {
414 return this->_maxSize;
415 }
416
417
418 //===================================================================
419 //
420 // NCVMemNativeAllocator class members implementation
421 //
422 //===================================================================
423
424
NCVMemNativeAllocator(NCVMemoryType memT,Ncv32u alignment)425 NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
426 :
427 currentSize(0),
428 _maxSize(0),
429 _memType(memT),
430 _alignment(alignment)
431 {
432 ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
433 }
434
435
~NCVMemNativeAllocator()436 NCVMemNativeAllocator::~NCVMemNativeAllocator()
437 {
438 ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak");
439 }
440
441
alloc(NCVMemSegment & seg,std::size_t size)442 NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, std::size_t size)
443 {
444 seg.clear();
445 ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
446
447 switch (this->_memType)
448 {
449 case NCVMemoryTypeDevice:
450 ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR);
451 break;
452 case NCVMemoryTypeHostPinned:
453 ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR);
454 break;
455 case NCVMemoryTypeHostPageable:
456 seg.begin.ptr = (Ncv8u *)malloc(size);
457 break;
458 default:;
459 }
460
461 this->currentSize += alignUp(size, this->_alignment);
462 this->_maxSize = max(this->_maxSize, this->currentSize);
463
464 seg.begin.memtype = this->_memType;
465 seg.size = size;
466
467 return NCV_SUCCESS;
468 }
469
470
dealloc(NCVMemSegment & seg)471 NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)
472 {
473 ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);
474 ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);
475 ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);
476
477 ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);
478 currentSize -= alignUp(seg.size, this->_alignment);
479
480 switch (this->_memType)
481 {
482 case NCVMemoryTypeDevice:
483 ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR);
484 break;
485 case NCVMemoryTypeHostPinned:
486 ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR);
487 break;
488 case NCVMemoryTypeHostPageable:
489 free(seg.begin.ptr);
490 break;
491 default:;
492 }
493
494 seg.clear();
495
496 return NCV_SUCCESS;
497 }
498
499
isInitialized(void) const500 NcvBool NCVMemNativeAllocator::isInitialized(void) const
501 {
502 return (this->_alignment != 0);
503 }
504
505
isCounting(void) const506 NcvBool NCVMemNativeAllocator::isCounting(void) const
507 {
508 return false;
509 }
510
511
memType(void) const512 NCVMemoryType NCVMemNativeAllocator::memType(void) const
513 {
514 return this->_memType;
515 }
516
517
alignment(void) const518 Ncv32u NCVMemNativeAllocator::alignment(void) const
519 {
520 return this->_alignment;
521 }
522
523
maxSize(void) const524 size_t NCVMemNativeAllocator::maxSize(void) const
525 {
526 return this->_maxSize;
527 }
528
529
530 //===================================================================
531 //
532 // Operations with rectangles
533 //
534 //===================================================================
535
536
537 template <class T>
drawRectsWrapperHost(T * h_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * h_rects,Ncv32u numRects,T color)538 static NCVStatus drawRectsWrapperHost(T *h_dst,
539 Ncv32u dstStride,
540 Ncv32u dstWidth,
541 Ncv32u dstHeight,
542 NcvRect32u *h_rects,
543 Ncv32u numRects,
544 T color)
545 {
546 ncvAssertReturn(h_dst != NULL && h_rects != NULL, NCV_NULL_PTR);
547 ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
548 ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
549 ncvAssertReturn(numRects != 0, NCV_SUCCESS);
550 ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
551
552 for (Ncv32u i=0; i<numRects; i++)
553 {
554 NcvRect32u rect = h_rects[i];
555
556 if (rect.x < dstWidth)
557 {
558 for (Ncv32u i=rect.y; i<rect.y+rect.height && i<dstHeight; i++)
559 {
560 h_dst[i*dstStride+rect.x] = color;
561 }
562 }
563 if (rect.x+rect.width-1 < dstWidth)
564 {
565 for (Ncv32u i=rect.y; i<rect.y+rect.height && i<dstHeight; i++)
566 {
567 h_dst[i*dstStride+rect.x+rect.width-1] = color;
568 }
569 }
570 if (rect.y < dstHeight)
571 {
572 for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
573 {
574 h_dst[rect.y*dstStride+j] = color;
575 }
576 }
577 if (rect.y + rect.height - 1 < dstHeight)
578 {
579 for (Ncv32u j=rect.x; j<rect.x+rect.width && j<dstWidth; j++)
580 {
581 h_dst[(rect.y+rect.height-1)*dstStride+j] = color;
582 }
583 }
584 }
585
586 return NCV_SUCCESS;
587 }
588
589
ncvDrawRects_8u_host(Ncv8u * h_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * h_rects,Ncv32u numRects,Ncv8u color)590 NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
591 Ncv32u dstStride,
592 Ncv32u dstWidth,
593 Ncv32u dstHeight,
594 NcvRect32u *h_rects,
595 Ncv32u numRects,
596 Ncv8u color)
597 {
598 return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
599 }
600
601
ncvDrawRects_32u_host(Ncv32u * h_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * h_rects,Ncv32u numRects,Ncv32u color)602 NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
603 Ncv32u dstStride,
604 Ncv32u dstWidth,
605 Ncv32u dstHeight,
606 NcvRect32u *h_rects,
607 Ncv32u numRects,
608 Ncv32u color)
609 {
610 return drawRectsWrapperHost(h_dst, dstStride, dstWidth, dstHeight, h_rects, numRects, color);
611 }
612
613
614 const Ncv32u NUMTHREADS_DRAWRECTS = 32;
615 const Ncv32u NUMTHREADS_DRAWRECTS_LOG2 = 5;
616
617
618 template <class T>
drawRects(T * d_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * d_rects,Ncv32u numRects,T color)619 __global__ void drawRects(T *d_dst,
620 Ncv32u dstStride,
621 Ncv32u dstWidth,
622 Ncv32u dstHeight,
623 NcvRect32u *d_rects,
624 Ncv32u numRects,
625 T color)
626 {
627 Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
628 if (blockId > numRects * 4)
629 {
630 return;
631 }
632
633 NcvRect32u curRect = d_rects[blockId >> 2];
634 NcvBool bVertical = blockId & 0x1;
635 NcvBool bTopLeft = blockId & 0x2;
636
637 Ncv32u pt0x, pt0y;
638 if (bVertical)
639 {
640 Ncv32u numChunks = (curRect.height + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
641
642 pt0x = bTopLeft ? curRect.x : curRect.x + curRect.width - 1;
643 pt0y = curRect.y;
644
645 if (pt0x < dstWidth)
646 {
647 for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
648 {
649 Ncv32u ptY = pt0y + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
650 if (ptY < pt0y + curRect.height && ptY < dstHeight)
651 {
652 d_dst[ptY * dstStride + pt0x] = color;
653 }
654 }
655 }
656 }
657 else
658 {
659 Ncv32u numChunks = (curRect.width + NUMTHREADS_DRAWRECTS - 1) >> NUMTHREADS_DRAWRECTS_LOG2;
660
661 pt0x = curRect.x;
662 pt0y = bTopLeft ? curRect.y : curRect.y + curRect.height - 1;
663
664 if (pt0y < dstHeight)
665 {
666 for (Ncv32u chunkId = 0; chunkId < numChunks; chunkId++)
667 {
668 Ncv32u ptX = pt0x + chunkId * NUMTHREADS_DRAWRECTS + threadIdx.x;
669 if (ptX < pt0x + curRect.width && ptX < dstWidth)
670 {
671 d_dst[pt0y * dstStride + ptX] = color;
672 }
673 }
674 }
675 }
676 }
677
678
679 template <class T>
drawRectsWrapperDevice(T * d_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * d_rects,Ncv32u numRects,T color,cudaStream_t cuStream)680 static NCVStatus drawRectsWrapperDevice(T *d_dst,
681 Ncv32u dstStride,
682 Ncv32u dstWidth,
683 Ncv32u dstHeight,
684 NcvRect32u *d_rects,
685 Ncv32u numRects,
686 T color,
687 cudaStream_t cuStream)
688 {
689 ncvAssertReturn(d_dst != NULL && d_rects != NULL, NCV_NULL_PTR);
690 ncvAssertReturn(dstWidth > 0 && dstHeight > 0, NCV_DIMENSIONS_INVALID);
691 ncvAssertReturn(dstStride >= dstWidth, NCV_INVALID_STEP);
692 ncvAssertReturn(numRects <= dstWidth * dstHeight, NCV_DIMENSIONS_INVALID);
693
694 if (numRects == 0)
695 {
696 return NCV_SUCCESS;
697 }
698
699 dim3 grid(numRects * 4);
700 dim3 block(NUMTHREADS_DRAWRECTS);
701 if (grid.x > 65535)
702 {
703 grid.y = (grid.x + 65534) / 65535;
704 grid.x = 65535;
705 }
706
707 drawRects<T><<<grid, block>>>(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color);
708
709 ncvAssertCUDALastErrorReturn(NCV_CUDA_ERROR);
710
711 return NCV_SUCCESS;
712 }
713
714
ncvDrawRects_8u_device(Ncv8u * d_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * d_rects,Ncv32u numRects,Ncv8u color,cudaStream_t cuStream)715 NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
716 Ncv32u dstStride,
717 Ncv32u dstWidth,
718 Ncv32u dstHeight,
719 NcvRect32u *d_rects,
720 Ncv32u numRects,
721 Ncv8u color,
722 cudaStream_t cuStream)
723 {
724 return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
725 }
726
727
ncvDrawRects_32u_device(Ncv32u * d_dst,Ncv32u dstStride,Ncv32u dstWidth,Ncv32u dstHeight,NcvRect32u * d_rects,Ncv32u numRects,Ncv32u color,cudaStream_t cuStream)728 NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
729 Ncv32u dstStride,
730 Ncv32u dstWidth,
731 Ncv32u dstHeight,
732 NcvRect32u *d_rects,
733 Ncv32u numRects,
734 Ncv32u color,
735 cudaStream_t cuStream)
736 {
737 return drawRectsWrapperDevice(d_dst, dstStride, dstWidth, dstHeight, d_rects, numRects, color, cuStream);
738 }
739