1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 using namespace cv;
46 using namespace cv::cuda;
47 
48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
49 
calcHist(InputArray,OutputArray,Stream &)50 void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
51 
equalizeHist(InputArray,OutputArray,Stream &)52 void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
53 
createCLAHE(double,cv::Size)54 cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
55 
evenLevels(OutputArray,int,int,int,Stream &)56 void cv::cuda::evenLevels(OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
57 
histEven(InputArray,OutputArray,int,int,int,Stream &)58 void cv::cuda::histEven(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
histEven(InputArray,GpuMat *,int *,int *,int *,Stream &)59 void cv::cuda::histEven(InputArray, GpuMat*, int*, int*, int*, Stream&) { throw_no_cuda(); }
60 
histRange(InputArray,OutputArray,InputArray,Stream &)61 void cv::cuda::histRange(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
histRange(InputArray,GpuMat *,const GpuMat *,Stream &)62 void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, Stream&) { throw_no_cuda(); }
63 
64 #else /* !defined (HAVE_CUDA) */
65 
66 ////////////////////////////////////////////////////////////////////////
67 // calcHist
68 
69 namespace hist
70 {
71     void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
72     void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream);
73 }
74 
calcHist(InputArray _src,OutputArray _hist,Stream & stream)75 void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
76 {
77     calcHist(_src, cv::cuda::GpuMat(), _hist, stream);
78 }
79 
calcHist(InputArray _src,InputArray _mask,OutputArray _hist,Stream & stream)80 void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, Stream& stream)
81 {
82     GpuMat src = _src.getGpuMat();
83     GpuMat mask = _mask.getGpuMat();
84 
85     CV_Assert( src.type() == CV_8UC1 );
86     CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
87     CV_Assert( mask.empty() || mask.size() == src.size() );
88 
89     _hist.create(1, 256, CV_32SC1);
90     GpuMat hist = _hist.getGpuMat();
91 
92     hist.setTo(Scalar::all(0), stream);
93 
94     if (mask.empty())
95         hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
96     else
97         hist::histogram256(src, mask, hist.ptr<int>(), StreamAccessor::getStream(stream));
98 }
99 
100 ////////////////////////////////////////////////////////////////////////
101 // equalizeHist
102 
103 namespace hist
104 {
105     void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const uchar* lut, cudaStream_t stream);
106     void buildLut(PtrStepSzi hist, PtrStepSzb lut, int size, cudaStream_t stream);
107 }
108 
equalizeHist(InputArray _src,OutputArray _dst,Stream & _stream)109 void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream)
110 {
111     GpuMat src = getInputMat(_src, _stream);
112 
113     CV_Assert( src.type() == CV_8UC1 );
114 
115     _dst.create(src.size(), src.type());
116     GpuMat dst = _dst.getGpuMat();
117 
118     size_t bufSize = 256 * sizeof(int) + 256 * sizeof(uchar);
119 
120     BufferPool pool(_stream);
121     GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
122 
123     GpuMat hist(1, 256, CV_32SC1, buf.data);
124     GpuMat lut(1, 256, CV_8UC1, buf.data + 256 * sizeof(int));
125 
126     cuda::calcHist(src, hist, _stream);
127 
128     cudaStream_t stream = StreamAccessor::getStream(_stream);
129 
130     hist::buildLut(hist, lut, src.rows * src.cols, stream);
131 
132     hist::equalizeHist(src, dst, lut.data, stream);
133 }
134 
135 ////////////////////////////////////////////////////////////////////////
136 // CLAHE
137 
138 namespace clahe
139 {
140     void calcLut_8U(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream);
141     void calcLut_16U(PtrStepSzus src, PtrStepus lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, PtrStepSzi hist, cudaStream_t stream);
142     template <typename T> void transform(PtrStepSz<T> src, PtrStepSz<T> dst, PtrStep<T> lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream);
143 }
144 
145 namespace
146 {
147     class CLAHE_Impl : public cv::cuda::CLAHE
148     {
149     public:
150         CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
151 
152         void apply(cv::InputArray src, cv::OutputArray dst);
153         void apply(InputArray src, OutputArray dst, Stream& stream);
154 
155         void setClipLimit(double clipLimit);
156         double getClipLimit() const;
157 
158         void setTilesGridSize(cv::Size tileGridSize);
159         cv::Size getTilesGridSize() const;
160 
161         void collectGarbage();
162 
163     private:
164         double clipLimit_;
165         int tilesX_;
166         int tilesY_;
167 
168         GpuMat srcExt_;
169         GpuMat lut_;
170         GpuMat hist_; // histogram on global memory for CV_16UC1 case
171     };
172 
CLAHE_Impl(double clipLimit,int tilesX,int tilesY)173     CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
174         clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
175     {
176     }
177 
apply(cv::InputArray _src,cv::OutputArray _dst)178     void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
179     {
180         apply(_src, _dst, Stream::Null());
181     }
182 
apply(InputArray _src,OutputArray _dst,Stream & s)183     void CLAHE_Impl::apply(InputArray _src, OutputArray _dst, Stream& s)
184     {
185         GpuMat src = _src.getGpuMat();
186 
187         const int type = src.type();
188 
189         CV_Assert( type == CV_8UC1 || type == CV_16UC1 );
190 
191         _dst.create( src.size(), type );
192         GpuMat dst = _dst.getGpuMat();
193 
194         const int histSize = type == CV_8UC1 ? 256 : 65536;
195 
196         ensureSizeIsEnough(tilesX_ * tilesY_, histSize, type, lut_);
197 
198         cudaStream_t stream = StreamAccessor::getStream(s);
199 
200         cv::Size tileSize;
201         GpuMat srcForLut;
202 
203         if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
204         {
205             tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
206             srcForLut = src;
207         }
208         else
209         {
210 #ifndef HAVE_OPENCV_CUDAARITHM
211             throw_no_cuda();
212 #else
213             cv::cuda::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar(), s);
214 #endif
215 
216             tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
217             srcForLut = srcExt_;
218         }
219 
220         const int tileSizeTotal = tileSize.area();
221         const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
222 
223         int clipLimit = 0;
224         if (clipLimit_ > 0.0)
225         {
226             clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
227             clipLimit = std::max(clipLimit, 1);
228         }
229 
230         if (type == CV_8UC1)
231             clahe::calcLut_8U(srcForLut, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), clipLimit, lutScale, stream);
232         else // type == CV_16UC1
233         {
234             ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_32SC1, hist_);
235             clahe::calcLut_16U(srcForLut, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), clipLimit, lutScale, hist_, stream);
236         }
237 
238         if (type == CV_8UC1)
239             clahe::transform<uchar>(src, dst, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), stream);
240         else // type == CV_16UC1
241             clahe::transform<ushort>(src, dst, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), stream);
242     }
243 
setClipLimit(double clipLimit)244     void CLAHE_Impl::setClipLimit(double clipLimit)
245     {
246         clipLimit_ = clipLimit;
247     }
248 
getClipLimit() const249     double CLAHE_Impl::getClipLimit() const
250     {
251         return clipLimit_;
252     }
253 
setTilesGridSize(cv::Size tileGridSize)254     void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
255     {
256         tilesX_ = tileGridSize.width;
257         tilesY_ = tileGridSize.height;
258     }
259 
getTilesGridSize() const260     cv::Size CLAHE_Impl::getTilesGridSize() const
261     {
262         return cv::Size(tilesX_, tilesY_);
263     }
264 
collectGarbage()265     void CLAHE_Impl::collectGarbage()
266     {
267         srcExt_.release();
268         lut_.release();
269     }
270 }
271 
createCLAHE(double clipLimit,cv::Size tileGridSize)272 cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double clipLimit, cv::Size tileGridSize)
273 {
274     return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
275 }
276 
277 ////////////////////////////////////////////////////////////////////////
278 // NPP Histogram
279 
280 namespace
281 {
282     typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);
283     typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);
284 
285     template<int SDEPTH> struct NppHistogramEvenFuncC1
286     {
287         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
288 
289     typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
290             int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
291     };
292     template<int SDEPTH> struct NppHistogramEvenFuncC4
293     {
294         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
295 
296         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
297             Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
298     };
299 
300     template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
301     struct NppHistogramEvenC1
302     {
303         typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
304 
hist__anon6ba364fe0211::NppHistogramEvenC1305         static void hist(const GpuMat& src, OutputArray _hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
306         {
307             const int levels = histSize + 1;
308 
309             _hist.create(1, histSize, CV_32S);
310             GpuMat hist = _hist.getGpuMat();
311 
312             NppiSize sz;
313             sz.width = src.cols;
314             sz.height = src.rows;
315 
316             int buf_size;
317             get_buf_size(sz, levels, &buf_size);
318 
319             BufferPool pool(stream);
320             GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
321 
322             NppStreamHandler h(stream);
323 
324             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
325                 lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
326 
327             if (!stream)
328                 cudaSafeCall( cudaDeviceSynchronize() );
329         }
330     };
331     template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
332     struct NppHistogramEvenC4
333     {
334         typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
335 
hist__anon6ba364fe0211::NppHistogramEvenC4336         static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
337         {
338             int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
339             hist[0].create(1, histSize[0], CV_32S);
340             hist[1].create(1, histSize[1], CV_32S);
341             hist[2].create(1, histSize[2], CV_32S);
342             hist[3].create(1, histSize[3], CV_32S);
343 
344             NppiSize sz;
345             sz.width = src.cols;
346             sz.height = src.rows;
347 
348             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
349 
350             int buf_size;
351             get_buf_size(sz, levels, &buf_size);
352 
353             BufferPool pool(stream);
354             GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
355 
356             NppStreamHandler h(stream);
357 
358             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
359 
360             if (!stream)
361                 cudaSafeCall( cudaDeviceSynchronize() );
362         }
363     };
364 
365     template<int SDEPTH> struct NppHistogramRangeFuncC1
366     {
367         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
368         typedef Npp32s level_t;
369         enum {LEVEL_TYPE_CODE=CV_32SC1};
370 
371         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
372             const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);
373     };
374     template<> struct NppHistogramRangeFuncC1<CV_32F>
375     {
376         typedef Npp32f src_t;
377         typedef Npp32f level_t;
378         enum {LEVEL_TYPE_CODE=CV_32FC1};
379 
380         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
381             const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);
382     };
383     template<int SDEPTH> struct NppHistogramRangeFuncC4
384     {
385         typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
386         typedef Npp32s level_t;
387         enum {LEVEL_TYPE_CODE=CV_32SC1};
388 
389         typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
390             const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);
391     };
392     template<> struct NppHistogramRangeFuncC4<CV_32F>
393     {
394         typedef Npp32f src_t;
395         typedef Npp32f level_t;
396         enum {LEVEL_TYPE_CODE=CV_32FC1};
397 
398         typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
399             const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);
400     };
401 
402     template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
403     struct NppHistogramRangeC1
404     {
405         typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;
406         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
407         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
408 
hist__anon6ba364fe0211::NppHistogramRangeC1409         static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, Stream& stream)
410         {
411             CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
412 
413             _hist.create(1, levels.cols - 1, CV_32S);
414             GpuMat hist = _hist.getGpuMat();
415 
416             NppiSize sz;
417             sz.width = src.cols;
418             sz.height = src.rows;
419 
420             int buf_size;
421             get_buf_size(sz, levels.cols, &buf_size);
422 
423             BufferPool pool(stream);
424             GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
425 
426             NppStreamHandler h(stream);
427 
428             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buf.ptr<Npp8u>()) );
429 
430             if (stream == 0)
431                 cudaSafeCall( cudaDeviceSynchronize() );
432         }
433     };
434     template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
435     struct NppHistogramRangeC4
436     {
437         typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;
438         typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
439         enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
440 
hist__anon6ba364fe0211::NppHistogramRangeC4441         static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
442         {
443             CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
444             CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
445             CV_Assert( levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1 );
446             CV_Assert( levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1 );
447 
448             hist[0].create(1, levels[0].cols - 1, CV_32S);
449             hist[1].create(1, levels[1].cols - 1, CV_32S);
450             hist[2].create(1, levels[2].cols - 1, CV_32S);
451             hist[3].create(1, levels[3].cols - 1, CV_32S);
452 
453             Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
454             int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};
455             const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};
456 
457             NppiSize sz;
458             sz.width = src.cols;
459             sz.height = src.rows;
460 
461             int buf_size;
462             get_buf_size(sz, nLevels, &buf_size);
463 
464             BufferPool pool(stream);
465             GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
466 
467             NppStreamHandler h(stream);
468 
469             nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buf.ptr<Npp8u>()) );
470 
471             if (stream == 0)
472                 cudaSafeCall( cudaDeviceSynchronize() );
473         }
474     };
475 }
476 
evenLevels(OutputArray _levels,int nLevels,int lowerLevel,int upperLevel,Stream & stream)477 void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream)
478 {
479     const int kind = _levels.kind();
480 
481     _levels.create(1, nLevels, CV_32SC1);
482 
483     Mat host_levels;
484     if (kind == _InputArray::CUDA_GPU_MAT)
485         host_levels.create(1, nLevels, CV_32SC1);
486     else
487         host_levels = _levels.getMat();
488 
489     nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
490 
491     if (kind == _InputArray::CUDA_GPU_MAT)
492         _levels.getGpuMatRef().upload(host_levels, stream);
493 }
494 
495 namespace hist
496 {
497     void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream);
498 }
499 
500 namespace
501 {
histEven8u(const GpuMat & src,GpuMat & hist,int histSize,int lowerLevel,int upperLevel,cudaStream_t stream)502     void histEven8u(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
503     {
504         hist.create(1, histSize, CV_32S);
505         cudaSafeCall( cudaMemsetAsync(hist.data, 0, histSize * sizeof(int), stream) );
506         hist::histEven8u(src, hist.ptr<int>(), histSize, lowerLevel, upperLevel, stream);
507     }
508 }
509 
histEven(InputArray _src,OutputArray hist,int histSize,int lowerLevel,int upperLevel,Stream & stream)510 void cv::cuda::histEven(InputArray _src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
511 {
512     typedef void (*hist_t)(const GpuMat& src, OutputArray hist, int levels, int lowerLevel, int upperLevel, Stream& stream);
513     static const hist_t hist_callers[] =
514     {
515         NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
516         0,
517         NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,
518         NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
519     };
520 
521     GpuMat src = _src.getGpuMat();
522 
523     if (src.depth() == CV_8U && deviceSupports(FEATURE_SET_COMPUTE_30))
524     {
525         histEven8u(src, hist.getGpuMatRef(), histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
526         return;
527     }
528 
529     CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
530 
531     hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
532 }
533 
histEven(InputArray _src,GpuMat hist[4],int histSize[4],int lowerLevel[4],int upperLevel[4],Stream & stream)534 void cv::cuda::histEven(InputArray _src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
535 {
536     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4], Stream& stream);
537     static const hist_t hist_callers[] =
538     {
539         NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
540         0,
541         NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,
542         NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist
543     };
544 
545     GpuMat src = _src.getGpuMat();
546 
547     CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
548 
549     hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
550 }
551 
histRange(InputArray _src,OutputArray hist,InputArray _levels,Stream & stream)552 void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, Stream& stream)
553 {
554     typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, Stream& stream);
555     static const hist_t hist_callers[] =
556     {
557         NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
558         0,
559         NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,
560         NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,
561         0,
562         NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist
563     };
564 
565     GpuMat src = _src.getGpuMat();
566     GpuMat levels = _levels.getGpuMat();
567 
568     CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
569 
570     hist_callers[src.depth()](src, hist, levels, stream);
571 }
572 
histRange(InputArray _src,GpuMat hist[4],const GpuMat levels[4],Stream & stream)573 void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
574 {
575     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream);
576     static const hist_t hist_callers[] =
577     {
578         NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
579         0,
580         NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,
581         NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,
582         0,
583         NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist
584     };
585 
586     GpuMat src = _src.getGpuMat();
587 
588     CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
589 
590     hist_callers[src.depth()](src, hist, levels, stream);
591 }
592 
593 #endif /* !defined (HAVE_CUDA) */
594