1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                          License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 //   * Redistribution's of source code must retain the above copyright notice,
22 //     this list of conditions and the following disclaimer.
23 //
24 //   * Redistribution's in binary form must reproduce the above copyright notice,
25 //     this list of conditions and the following disclaimer in the documentation
26 //     and/or other materials provided with the distribution.
27 //
28 //   * The name of the copyright holders may not be used to endorse or promote products
29 //     derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43 
44 #ifndef OPENCV_CORE_CUDA_HPP
45 #define OPENCV_CORE_CUDA_HPP
46 
47 #ifndef __cplusplus
48 #  error cuda.hpp header must be compiled as C++
49 #endif
50 
51 #include "opencv2/core.hpp"
52 #include "opencv2/core/cuda_types.hpp"
53 
54 /**
55   @defgroup cuda CUDA-accelerated Computer Vision
56   @{
57     @defgroup cudacore Core part
58     @{
59       @defgroup cudacore_init Initialization and Information
60       @defgroup cudacore_struct Data Structures
61     @}
62   @}
63  */
64 
65 namespace cv { namespace cuda {
66 
67 //! @addtogroup cudacore_struct
68 //! @{
69 
70 //===================================================================================
71 // GpuMat
72 //===================================================================================
73 
74 /** @brief Base storage class for GPU memory with reference counting.
75 
76 Its interface matches the Mat interface with the following limitations:
77 
78 -   no arbitrary dimensions support (only 2D)
79 -   no functions that return references to their data (because references on GPU are not valid for
80     CPU)
81 -   no expression templates technique support
82 
83 Beware that the latter limitation may lead to overloaded matrix operators that cause memory
84 allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
85 passed directly to the kernel.
86 
87 @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
88 aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
89 
90 @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
91 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
92 release function returns error if the CUDA context has been destroyed before.
93 
94 Some member functions are described as a "Blocking Call" while some are described as a
95 "Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
96 operation is finished when the function returns. However, non-blocking functions are asynchronous to
97 host. Those functions may return even if the GPU operation is not finished.
98 
99 Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
100 argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
101 streams.
102 
103 @sa Mat
104  */
105 class CV_EXPORTS_W GpuMat
106 {
107 public:
108     class CV_EXPORTS_W Allocator
109     {
110     public:
~Allocator()111         virtual ~Allocator() {}
112 
113         // allocator must fill data, step and refcount fields
114         virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
115         virtual void free(GpuMat* mat) = 0;
116     };
117 
118     //! default allocator
119     CV_WRAP static GpuMat::Allocator* defaultAllocator();
120     CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
121 
122     //! default constructor
123     CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
124 
125     //! constructs GpuMat of the specified size and type
126     CV_WRAP GpuMat(int rows, int cols, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
127     CV_WRAP GpuMat(Size size, int type, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
128 
129     //! constructs GpuMat and fills it with the specified value _s
130     CV_WRAP GpuMat(int rows, int cols, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
131     CV_WRAP GpuMat(Size size, int type, Scalar s, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
132 
133     //! copy constructor
134     CV_WRAP GpuMat(const GpuMat& m);
135 
136     //! constructor for GpuMat headers pointing to user-allocated data
137     GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
138     GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
139 
140     //! creates a GpuMat header for a part of the bigger matrix
141     CV_WRAP GpuMat(const GpuMat& m, Range rowRange, Range colRange);
142     CV_WRAP GpuMat(const GpuMat& m, Rect roi);
143 
144     //! builds GpuMat from host memory (Blocking call)
145     CV_WRAP explicit GpuMat(InputArray arr, GpuMat::Allocator* allocator = GpuMat::defaultAllocator());
146 
147     //! destructor - calls release()
148     ~GpuMat();
149 
150     //! assignment operators
151     GpuMat& operator =(const GpuMat& m);
152 
153     //! allocates new GpuMat data unless the GpuMat already has specified size and type
154     CV_WRAP void create(int rows, int cols, int type);
155     CV_WRAP void create(Size size, int type);
156 
157     //! decreases reference counter, deallocate the data when reference counter reaches 0
158     void release();
159 
160     //! swaps with other smart pointer
161     CV_WRAP void swap(GpuMat& mat);
162 
163     /** @brief Performs data upload to GpuMat (Blocking call)
164 
165     This function copies data from host memory to device memory. As being a blocking call, it is
166     guaranteed that the copy operation is finished when this function returns.
167     */
168     CV_WRAP void upload(InputArray arr);
169 
170     /** @brief Performs data upload to GpuMat (Non-Blocking call)
171 
172     This function copies data from host memory to device memory. As being a non-blocking call, this
173     function may return even if the copy operation is not finished.
174 
175     The copy operation may be overlapped with operations in other non-default streams if \p stream is
176     not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
177     */
178     CV_WRAP void upload(InputArray arr, Stream& stream);
179 
180     /** @brief Performs data download from GpuMat (Blocking call)
181 
182     This function copies data from device memory to host memory. As being a blocking call, it is
183     guaranteed that the copy operation is finished when this function returns.
184     */
185     CV_WRAP void download(OutputArray dst) const;
186 
187     /** @brief Performs data download from GpuMat (Non-Blocking call)
188 
189     This function copies data from device memory to host memory. As being a non-blocking call, this
190     function may return even if the copy operation is not finished.
191 
192     The copy operation may be overlapped with operations in other non-default streams if \p stream is
193     not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
194     */
195     CV_WRAP void download(OutputArray dst, Stream& stream) const;
196 
197     //! returns deep copy of the GpuMat, i.e. the data is copied
198     CV_WRAP GpuMat clone() const;
199 
200     //! copies the GpuMat content to device memory (Blocking call)
201     CV_WRAP void copyTo(OutputArray dst) const;
202 
203     //! copies the GpuMat content to device memory (Non-Blocking call)
204     CV_WRAP void copyTo(OutputArray dst, Stream& stream) const;
205 
206     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
207     CV_WRAP void copyTo(OutputArray dst, InputArray mask) const;
208 
209     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
210     CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
211 
212     //! sets some of the GpuMat elements to s (Blocking call)
213     CV_WRAP GpuMat& setTo(Scalar s);
214 
215     //! sets some of the GpuMat elements to s (Non-Blocking call)
216     CV_WRAP GpuMat& setTo(Scalar s, Stream& stream);
217 
218     //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
219     CV_WRAP GpuMat& setTo(Scalar s, InputArray mask);
220 
221     //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
222     CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
223 
224     //! converts GpuMat to another datatype (Blocking call)
225     CV_WRAP void convertTo(OutputArray dst, int rtype) const;
226 
227     //! converts GpuMat to another datatype (Non-Blocking call)
228     CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const;
229 
230     //! converts GpuMat to another datatype with scaling (Blocking call)
231     CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
232 
233     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
234     CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
235 
236     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
237     CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
238 
239     CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
240 
241     //! returns pointer to y-th row
242     uchar* ptr(int y = 0);
243     const uchar* ptr(int y = 0) const;
244 
245     //! template version of the above method
246     template<typename _Tp> _Tp* ptr(int y = 0);
247     template<typename _Tp> const _Tp* ptr(int y = 0) const;
248 
249     template <typename _Tp> operator PtrStepSz<_Tp>() const;
250     template <typename _Tp> operator PtrStep<_Tp>() const;
251 
252     //! returns a new GpuMat header for the specified row
253     CV_WRAP GpuMat row(int y) const;
254 
255     //! returns a new GpuMat header for the specified column
256     CV_WRAP GpuMat col(int x) const;
257 
258     //! ... for the specified row span
259     CV_WRAP GpuMat rowRange(int startrow, int endrow) const;
260     CV_WRAP GpuMat rowRange(Range r) const;
261 
262     //! ... for the specified column span
263     CV_WRAP GpuMat colRange(int startcol, int endcol) const;
264     CV_WRAP GpuMat colRange(Range r) const;
265 
266     //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
267     GpuMat operator ()(Range rowRange, Range colRange) const;
268     GpuMat operator ()(Rect roi) const;
269 
270     //! creates alternative GpuMat header for the same data, with different
271     //! number of channels and/or different number of rows
272     CV_WRAP GpuMat reshape(int cn, int rows = 0) const;
273 
274     //! locates GpuMat header within a parent GpuMat
275     CV_WRAP void locateROI(Size& wholeSize, Point& ofs) const;
276 
277     //! moves/resizes the current GpuMat ROI inside the parent GpuMat
278     CV_WRAP GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
279 
280     //! returns true iff the GpuMat data is continuous
281     //! (i.e. when there are no gaps between successive rows)
282     CV_WRAP bool isContinuous() const;
283 
284     //! returns element size in bytes
285     CV_WRAP size_t elemSize() const;
286 
287     //! returns the size of element channel in bytes
288     CV_WRAP size_t elemSize1() const;
289 
290     //! returns element type
291     CV_WRAP int type() const;
292 
293     //! returns element type
294     CV_WRAP int depth() const;
295 
296     //! returns number of channels
297     CV_WRAP int channels() const;
298 
299     //! returns step/elemSize1()
300     CV_WRAP size_t step1() const;
301 
302     //! returns GpuMat size : width == number of columns, height == number of rows
303     CV_WRAP Size size() const;
304 
305     //! returns true if GpuMat data is NULL
306     CV_WRAP bool empty() const;
307 
308     // returns pointer to cuda memory
309     CV_WRAP void* cudaPtr() const;
310 
311     //! internal use method: updates the continuity flag
312     CV_WRAP void updateContinuityFlag();
313 
314     /*! includes several bit-fields:
315     - the magic signature
316     - continuity flag
317     - depth
318     - number of channels
319     */
320     int flags;
321 
322     //! the number of rows and columns
323     int rows, cols;
324 
325     //! a distance between successive rows in bytes; includes the gap if any
326     CV_PROP size_t step;
327 
328     //! pointer to the data
329     uchar* data;
330 
331     //! pointer to the reference counter;
332     //! when GpuMat points to user-allocated data, the pointer is NULL
333     int* refcount;
334 
335     //! helper fields used in locateROI and adjustROI
336     uchar* datastart;
337     const uchar* dataend;
338 
339     //! allocator
340     Allocator* allocator;
341 };
342 
343 struct CV_EXPORTS_W GpuData
344 {
345     explicit GpuData(size_t _size);
346      ~GpuData();
347 
348     GpuData(const GpuData&) = delete;
349     GpuData& operator=(const GpuData&) = delete;
350 
351     GpuData(GpuData&&) = delete;
352     GpuData& operator=(GpuData&&) = delete;
353 
354     uchar* data;
355     size_t size;
356 };
357 
358 class CV_EXPORTS_W GpuMatND
359 {
360 public:
361     using SizeArray = std::vector<int>;
362     using StepArray = std::vector<size_t>;
363     using IndexArray = std::vector<int>;
364 
365     //! destructor
366     ~GpuMatND();
367 
368     //! default constructor
369     GpuMatND();
370 
371     /** @overload
372     @param size Array of integers specifying an n-dimensional array shape.
373     @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
374     CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
375     */
376     GpuMatND(SizeArray size, int type);
377 
378     /** @overload
379     @param size Array of integers specifying an n-dimensional array shape.
380     @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
381     CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
382     @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
383     allocate matrix data. Instead, they just initialize the matrix header that points to the specified
384     data, which means that no data is copied. This operation is very efficient and can be used to
385     process external data using OpenCV functions. The external data is not automatically deallocated, so
386     you should take care of it.
387     @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always
388     set to the element size). If not specified, the matrix is assumed to be continuous.
389     */
390     GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray());
391 
392     /** @brief Allocates GPU memory.
393     Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that
394     GPU memory under the specific condition: it must be of the same size and type, not externally allocated,
395     the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND
396     (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by
397     this method is always continuous and is not a sub-region of another GpuMatND.
398     */
399     void create(SizeArray size, int type);
400 
401     void release();
402 
403     void swap(GpuMatND& m) noexcept;
404 
405     /** @brief Creates a full copy of the array and the underlying data.
406     The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e.
407     the original step is not taken into account. So, the array copy is a continuous array
408     occupying total()\*elemSize() bytes.
409     */
410     GpuMatND clone() const;
411 
412     /** @overload
413     This overload is non-blocking, so it may return even if the copy operation is not finished.
414     */
415     GpuMatND clone(Stream& stream) const;
416 
417     /** @brief Extracts a sub-matrix.
418     The operator makes a new header for the specified sub-array of \*this.
419     The operator is an O(1) operation, that is, no matrix data is copied.
420     @param ranges Array of selected ranges along each dimension.
421     */
422     GpuMatND operator()(const std::vector<Range>& ranges) const;
423 
424     /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix.
425     @note The returned GpuMat is constructed with the constructor for user-allocated data.
426     That is, It does not perform reference counting.
427     @note This function does not increment this GpuMatND's reference counter.
428     */
429     GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const;
430 
431     /** @overload
432     Creates a GpuMat header if this GpuMatND is effectively 2D.
433     @note The returned GpuMat is constructed with the constructor for user-allocated data.
434     That is, It does not perform reference counting.
435     @note This function does not increment this GpuMatND's reference counter.
436     */
437     GpuMat createGpuMatHeader() const;
438 
439     /** @brief Extracts a 2D plane part of an n-dim matrix.
440     It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this
441     GpuMatND to the returned GpuMat.
442     @note This operator does not increment this GpuMatND's reference counter;
443     */
444     GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const;
445 
446     /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D.
447     It differs from createGpuMatHeader() in that it clones a part of this GpuMatND.
448     @note This operator does not increment this GpuMatND's reference counter;
449     */
450     operator GpuMat() const;
451 
452     GpuMatND(const GpuMatND&) = default;
453     GpuMatND& operator=(const GpuMatND&) = default;
454 
455 #if defined(__GNUC__) && __GNUC__ < 5
456     // error: function '...' defaulted on its first declaration with an exception-specification
457     // that differs from the implicit declaration '...'
458 
459     GpuMatND(GpuMatND&&) = default;
460     GpuMatND& operator=(GpuMatND&&) = default;
461 #else
462     GpuMatND(GpuMatND&&) noexcept = default;
463     GpuMatND& operator=(GpuMatND&&) noexcept = default;
464 #endif
465 
466     void upload(InputArray src);
467     void upload(InputArray src, Stream& stream);
468     void download(OutputArray dst) const;
469     void download(OutputArray dst, Stream& stream) const;
470 
471     //! returns true iff the GpuMatND data is continuous
472     //! (i.e. when there are no gaps between successive rows)
473     bool isContinuous() const;
474 
475     //! returns true if the matrix is a sub-matrix of another matrix
476     bool isSubmatrix() const;
477 
478     //! returns element size in bytes
479     size_t elemSize() const;
480 
481     //! returns the size of element channel in bytes
482     size_t elemSize1() const;
483 
484     //! returns true if data is null
485     bool empty() const;
486 
487     //! returns true if not empty and points to external(user-allocated) gpu memory
488     bool external() const;
489 
490     //! returns pointer to the first byte of the GPU memory
491     uchar* getDevicePtr() const;
492 
493     //! returns the total number of array elements
494     size_t total() const;
495 
496     //! returns the size of underlying memory in bytes
497     size_t totalMemSize() const;
498 
499     //! returns element type
500     int type() const;
501 
502 private:
503     //! internal use
504     void setFields(SizeArray size, int type, StepArray step = StepArray());
505 
506 public:
507     /*! includes several bit-fields:
508     - the magic signature
509     - continuity flag
510     - depth
511     - number of channels
512     */
513     int flags;
514 
515     //! matrix dimensionality
516     int dims;
517 
518     //! shape of this array
519     SizeArray size;
520 
521     /*! step values
522     Their semantics is identical to the semantics of step for Mat.
523     */
524     StepArray step;
525 
526 private:
527     /*! internal use
528     If this GpuMatND holds external memory, this is empty.
529     */
530     std::shared_ptr<GpuData> data_;
531 
532     /*! internal use
533     If this GpuMatND manages memory with reference counting, this value is
534     always equal to data_->data. If this GpuMatND holds external memory,
535     data_ is empty and data points to the external memory.
536     */
537     uchar* data;
538 
539     /*! internal use
540     If this GpuMatND is a sub-matrix of a larger matrix, this value is the
541     difference of the first byte between the sub-matrix and the whole matrix.
542     */
543     size_t offset;
544 };
545 
546 /** @brief Creates a continuous matrix.
547 
548 @param rows Row count.
549 @param cols Column count.
550 @param type Type of the matrix.
551 @param arr Destination matrix. This parameter changes only if it has a proper type and area (
552 \f$\texttt{rows} \times \texttt{cols}\f$ ).
553 
554 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
555 end of each row.
556  */
557 CV_EXPORTS_W void createContinuous(int rows, int cols, int type, OutputArray arr);
558 
559 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
560 
561 @param rows Minimum desired number of rows.
562 @param cols Minimum desired number of columns.
563 @param type Desired matrix type.
564 @param arr Destination matrix.
565 
566 The function does not reallocate memory if the matrix has proper attributes already.
567  */
568 CV_EXPORTS_W void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
569 
570 /** @brief BufferPool for use with CUDA streams
571 
572 BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
573 only useful when enabled with #setBufferPoolUsage.
574 
575 @code
576     setBufferPoolUsage(true);
577 @endcode
578 
579 @note #setBufferPoolUsage must be called \em before any Stream declaration.
580 
581 Users may specify custom allocator for Stream and may implement their own stream based
582 functions utilizing the same underlying GPU memory management.
583 
584 If custom allocator is not specified, BufferPool utilizes StackAllocator by
585 default. StackAllocator allocates a chunk of GPU device memory beforehand,
586 and when GpuMat is declared later on, it is given the pre-allocated memory.
587 This kind of strategy reduces the number of calls for memory allocating APIs
588 such as cudaMalloc or cudaMallocPitch.
589 
590 Below is an example that utilizes BufferPool with StackAllocator:
591 
592 @code
593     #include <opencv2/opencv.hpp>
594 
595     using namespace cv;
596     using namespace cv::cuda
597 
598     int main()
599     {
600         setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
601         setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2);  // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
602 
603         Stream stream1, stream2;                                // Each stream uses 1 stack
604         BufferPool pool1(stream1), pool2(stream2);
605 
606         GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1);   // 16MB
607         GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3);   // 48MB, pool1 is now full
608 
609         GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1);   // 1MB
610         GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3);   // 3MB
611 
612         cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
613         cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
614     }
615 @endcode
616 
617 If we allocate another GpuMat on pool1 in the above example, it will be carried out by
618 the DefaultAllocator since the stack for pool1 is full.
619 
620 @code
621     GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1);   // Stack for pool1 is full, memory is allocated with DefaultAllocator
622 @endcode
623 
624 If a third stream is declared in the above example, allocating with #getBuffer
625 within that stream will also be carried out by the DefaultAllocator because we've run out of
626 stacks.
627 
628 @code
629     Stream stream3;                                         // Only 2 stacks were allocated, we've run out of stacks
630     BufferPool pool3(stream3);
631     GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1);   // Memory is allocated with DefaultAllocator
632 @endcode
633 
634 @warning When utilizing StackAllocator, deallocation order is important.
635 
636 Just like a stack, deallocation must be done in LIFO order. Below is an example of
637 erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
638 sample code will emit CV_Assert error.
639 
640 @code
641     int main()
642     {
643         setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
644         Stream stream;                                          // A default size (10 MB) stack is allocated to this stream
645         BufferPool pool(stream);
646 
647         GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1);      // Allocate mat1 (1MB)
648         GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1);      // Allocate mat2 (1MB)
649 
650         mat1.release();                                         // erroneous usage : mat2 must be deallocated before mat1
651     }
652 @endcode
653 
654 Since C++ local variables are destroyed in the reverse order of construction,
655 the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
656 and the corresponding memory is automatically returned to the pool for later usage.
657 
658 @code
659     int main()
660     {
661         setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
662         setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2);  // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
663 
664         Stream stream1, stream2;                                // Each stream uses 1 stack
665         BufferPool pool1(stream1), pool2(stream2);
666 
667         for (int i = 0; i < 10; i++)
668         {
669             GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1);   // 16MB
670             GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3);   // 48MB, pool1 is now full
671 
672             GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1);   // 1MB
673             GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3);   // 3MB
674 
675             d_src1.setTo(Scalar(i), stream1);
676             d_src2.setTo(Scalar(i), stream2);
677 
678             cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
679             cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
680                                                                     // The order of destruction of the local variables is:
681                                                                     //   d_dst2 => d_src2 => d_dst1 => d_src1
682                                                                     // LIFO rule is satisfied, this code runs without error
683         }
684     }
685 @endcode
686  */
687 class CV_EXPORTS_W BufferPool
688 {
689 public:
690 
691     //! Gets the BufferPool for the given stream.
692     explicit BufferPool(Stream& stream);
693 
694     //! Allocates a new GpuMat of given size and type.
695     CV_WRAP GpuMat getBuffer(int rows, int cols, int type);
696 
697     //! Allocates a new GpuMat of given size and type.
getBuffer(Size size,int type)698     CV_WRAP GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
699 
700     //! Returns the allocator associated with the stream.
getAllocator() const701     CV_WRAP Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
702 
703 private:
704     Ptr<GpuMat::Allocator> allocator_;
705 };
706 
707 //! BufferPool management (must be called before Stream creation)
708 CV_EXPORTS_W void setBufferPoolUsage(bool on);
709 CV_EXPORTS_W void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
710 
711 //===================================================================================
712 // HostMem
713 //===================================================================================
714 
715 /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
716 
717 Its interface is also Mat-like but with additional memory type parameters.
718 
719 -   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
720     uploading/downloading data from/to GPU.
721 -   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
722     address space, if supported.
723 -   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
724     used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
725     utilization.
726 
727 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
728 Pinned Memory APIs* document or *CUDA C Programming Guide*.
729  */
730 class CV_EXPORTS_W HostMem
731 {
732 public:
733     enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
734 
735     static MatAllocator* getAllocator(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
736 
737     CV_WRAP explicit HostMem(HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
738 
739     HostMem(const HostMem& m);
740 
741     CV_WRAP HostMem(int rows, int cols, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
742     CV_WRAP HostMem(Size size, int type, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
743 
744     //! creates from host memory with coping data
745     CV_WRAP explicit HostMem(InputArray arr, HostMem::AllocType alloc_type = HostMem::AllocType::PAGE_LOCKED);
746 
747     ~HostMem();
748 
749     HostMem& operator =(const HostMem& m);
750 
751     //! swaps with other smart pointer
752     CV_WRAP void swap(HostMem& b);
753 
754     //! returns deep copy of the matrix, i.e. the data is copied
755     CV_WRAP HostMem clone() const;
756 
757     //! allocates new matrix data unless the matrix already has specified size and type.
758     CV_WRAP void create(int rows, int cols, int type);
759     void create(Size size, int type);
760 
761     //! creates alternative HostMem header for the same data, with different
762     //! number of channels and/or different number of rows
763     CV_WRAP HostMem reshape(int cn, int rows = 0) const;
764 
765     //! decrements reference counter and released memory if needed.
766     void release();
767 
768     //! returns matrix header with disabled reference counting for HostMem data.
769     CV_WRAP Mat createMatHeader() const;
770 
771     /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
772     for it.
773 
774     This can be done only if memory was allocated with the SHARED flag and if it is supported by the
775     hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
776     eliminates an extra copy.
777      */
778     GpuMat createGpuMatHeader() const;
779 
780     // Please see cv::Mat for descriptions
781     CV_WRAP bool isContinuous() const;
782     CV_WRAP size_t elemSize() const;
783     CV_WRAP size_t elemSize1() const;
784     CV_WRAP int type() const;
785     CV_WRAP int depth() const;
786     CV_WRAP int channels() const;
787     CV_WRAP size_t step1() const;
788     CV_WRAP Size size() const;
789     CV_WRAP bool empty() const;
790 
791     // Please see cv::Mat for descriptions
792     int flags;
793     int rows, cols;
794     CV_PROP size_t step;
795 
796     uchar* data;
797     int* refcount;
798 
799     uchar* datastart;
800     const uchar* dataend;
801 
802     AllocType alloc_type;
803 };
804 
805 /** @brief Page-locks the memory of matrix and maps it for the device(s).
806 
807 @param m Input matrix.
808  */
809 CV_EXPORTS_W void registerPageLocked(Mat& m);
810 
811 /** @brief Unmaps the memory of matrix and makes it pageable again.
812 
813 @param m Input matrix.
814  */
815 CV_EXPORTS_W void unregisterPageLocked(Mat& m);
816 
817 //===================================================================================
818 // Stream
819 //===================================================================================
820 
821 /** @brief This class encapsulates a queue of asynchronous calls.
822 
823 @note Currently, you may face problems if an operation is enqueued twice with different data. Some
824 functions use the constant GPU memory, and next call may update the memory before the previous one
825 has been finished. But calling different operations asynchronously is safe because each operation
826 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
827 also safe.
828 
829 @note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
830 
831 @code
832 void thread1()
833 {
834     cv::cuda::Stream stream1;
835     cv::cuda::func1(..., stream1);
836 }
837 
838 void thread2()
839 {
840     cv::cuda::Stream stream2;
841     cv::cuda::func2(..., stream2);
842 }
843 @endcode
844 
845 @note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
846 In multi-threading environment the stream objects must be passed explicitly (see previous note).
847  */
848 class CV_EXPORTS_W Stream
849 {
850     typedef void (Stream::*bool_type)() const;
this_type_does_not_support_comparisons() const851     void this_type_does_not_support_comparisons() const {}
852 
853 public:
854     typedef void (*StreamCallback)(int status, void* userData);
855 
856     //! creates a new asynchronous stream
857     CV_WRAP Stream();
858 
859     //! creates a new asynchronous stream with custom allocator
860     CV_WRAP Stream(const Ptr<GpuMat::Allocator>& allocator);
861 
862     /** @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream
863 
864     @note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and
865     supports the same parameter values.
866     @code
867         // creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking,
868         // non-default CUDA stream
869         cv::cuda::Stream cvStream(cudaStreamNonBlocking);
870     @endcode
871      */
872     CV_WRAP Stream(const size_t cudaFlags);
873 
874     /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
875     */
876     CV_WRAP bool queryIfComplete() const;
877 
878     /** @brief Blocks the current CPU thread until all operations in the stream are complete.
879     */
880     CV_WRAP void waitForCompletion();
881 
882     /** @brief Makes a compute stream wait on an event.
883     */
884     CV_WRAP void waitEvent(const Event& event);
885 
886     /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
887     completed.
888 
889     @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
890     that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
891     Callbacks without a mandated order (in independent streams) execute in undefined order and may be
892     serialized.
893      */
894     void enqueueHostCallback(StreamCallback callback, void* userData);
895 
896     //! return Stream object for default CUDA stream
897     CV_WRAP static Stream& Null();
898 
899     //! returns true if stream object is not default (!= 0)
900     operator bool_type() const;
901 
902     //! return Pointer to CUDA stream
903     CV_WRAP void* cudaPtr() const;
904 
905     class Impl;
906 
907 private:
908     Ptr<Impl> impl_;
909     Stream(const Ptr<Impl>& impl);
910 
911     friend struct StreamAccessor;
912     friend class BufferPool;
913     friend class DefaultDeviceInitializer;
914 };
915 
916 class CV_EXPORTS_W Event
917 {
918 public:
919     enum CreateFlags
920     {
921         DEFAULT        = 0x00,  /**< Default event flag */
922         BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
923         DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
924         INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
925     };
926 
927     CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT);
928 
929     //! records an event
930     CV_WRAP void record(Stream& stream = Stream::Null());
931 
932     //! queries an event's status
933     CV_WRAP bool queryIfComplete() const;
934 
935     //! waits for an event to complete
936     CV_WRAP void waitForCompletion();
937 
938     //! computes the elapsed time between events
939     CV_WRAP static float elapsedTime(const Event& start, const Event& end);
940 
941     class Impl;
942 
943 private:
944     Ptr<Impl> impl_;
945     Event(const Ptr<Impl>& impl);
946 
947     friend struct EventAccessor;
948 };
949 
950 //! @} cudacore_struct
951 
952 //===================================================================================
953 // Initialization & Info
954 //===================================================================================
955 
956 //! @addtogroup cudacore_init
957 //! @{
958 
959 /** @brief Returns the number of installed CUDA-enabled devices.
960 
961 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
962 this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
963 returns -1.
964  */
965 CV_EXPORTS_W int getCudaEnabledDeviceCount();
966 
967 /** @brief Sets a device and initializes it for the current thread.
968 
969 @param device System index of a CUDA device starting with 0.
970 
971 If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
972  */
973 CV_EXPORTS_W void setDevice(int device);
974 
975 /** @brief Returns the current device index set by cuda::setDevice or initialized by default.
976  */
977 CV_EXPORTS_W int getDevice();
978 
979 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
980 process.
981 
982 Any subsequent API call to this device will reinitialize the device.
983  */
984 CV_EXPORTS_W void resetDevice();
985 
986 /** @brief Enumeration providing CUDA computing features.
987  */
988 enum FeatureSet
989 {
990     FEATURE_SET_COMPUTE_10 = 10,
991     FEATURE_SET_COMPUTE_11 = 11,
992     FEATURE_SET_COMPUTE_12 = 12,
993     FEATURE_SET_COMPUTE_13 = 13,
994     FEATURE_SET_COMPUTE_20 = 20,
995     FEATURE_SET_COMPUTE_21 = 21,
996     FEATURE_SET_COMPUTE_30 = 30,
997     FEATURE_SET_COMPUTE_32 = 32,
998     FEATURE_SET_COMPUTE_35 = 35,
999     FEATURE_SET_COMPUTE_50 = 50,
1000 
1001     GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
1002     SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
1003     NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
1004     WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
1005     DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
1006 };
1007 
1008 //! checks whether current device supports the given feature
1009 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
1010 
1011 /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
1012 built for.
1013 
1014 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
1015 capability can always be compiled to binary code of greater or equal compute capability".
1016  */
1017 class CV_EXPORTS_W TargetArchs
1018 {
1019 public:
1020     /** @brief The following method checks whether the module was built with the support of the given feature:
1021 
1022     @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
1023      */
1024     static bool builtWith(FeatureSet feature_set);
1025 
1026     /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
1027     code for the given architecture(s):
1028 
1029     @param major Major compute capability version.
1030     @param minor Minor compute capability version.
1031      */
1032     CV_WRAP static bool has(int major, int minor);
1033     CV_WRAP static bool hasPtx(int major, int minor);
1034     CV_WRAP static bool hasBin(int major, int minor);
1035 
1036     CV_WRAP static bool hasEqualOrLessPtx(int major, int minor);
1037     CV_WRAP static bool hasEqualOrGreater(int major, int minor);
1038     CV_WRAP static bool hasEqualOrGreaterPtx(int major, int minor);
1039     CV_WRAP static bool hasEqualOrGreaterBin(int major, int minor);
1040 };
1041 
1042 /** @brief Class providing functionality for querying the specified GPU properties.
1043  */
1044 class CV_EXPORTS_W DeviceInfo
1045 {
1046 public:
1047     //! creates DeviceInfo object for the current GPU
1048     CV_WRAP DeviceInfo();
1049 
1050     /** @brief The constructors.
1051 
1052     @param device_id System index of the CUDA device starting with 0.
1053 
1054     Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
1055     constructs an object for the current device.
1056      */
1057     CV_WRAP DeviceInfo(int device_id);
1058 
1059     /** @brief Returns system index of the CUDA device starting with 0.
1060     */
1061     CV_WRAP int deviceID() const;
1062 
1063     //! ASCII string identifying device
1064     const char* name() const;
1065 
1066     //! global memory available on device in bytes
1067     CV_WRAP size_t totalGlobalMem() const;
1068 
1069     //! shared memory available per block in bytes
1070     CV_WRAP size_t sharedMemPerBlock() const;
1071 
1072     //! 32-bit registers available per block
1073     CV_WRAP int regsPerBlock() const;
1074 
1075     //! warp size in threads
1076     CV_WRAP int warpSize() const;
1077 
1078     //! maximum pitch in bytes allowed by memory copies
1079     CV_WRAP size_t memPitch() const;
1080 
1081     //! maximum number of threads per block
1082     CV_WRAP int maxThreadsPerBlock() const;
1083 
1084     //! maximum size of each dimension of a block
1085     CV_WRAP Vec3i maxThreadsDim() const;
1086 
1087     //! maximum size of each dimension of a grid
1088     CV_WRAP Vec3i maxGridSize() const;
1089 
1090     //! clock frequency in kilohertz
1091     CV_WRAP int clockRate() const;
1092 
1093     //! constant memory available on device in bytes
1094     CV_WRAP size_t totalConstMem() const;
1095 
1096     //! major compute capability
1097     CV_WRAP int majorVersion() const;
1098 
1099     //! minor compute capability
1100     CV_WRAP int minorVersion() const;
1101 
1102     //! alignment requirement for textures
1103     CV_WRAP size_t textureAlignment() const;
1104 
1105     //! pitch alignment requirement for texture references bound to pitched memory
1106     CV_WRAP size_t texturePitchAlignment() const;
1107 
1108     //! number of multiprocessors on device
1109     CV_WRAP int multiProcessorCount() const;
1110 
1111     //! specified whether there is a run time limit on kernels
1112     CV_WRAP bool kernelExecTimeoutEnabled() const;
1113 
1114     //! device is integrated as opposed to discrete
1115     CV_WRAP bool integrated() const;
1116 
1117     //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
1118     CV_WRAP bool canMapHostMemory() const;
1119 
1120     enum ComputeMode
1121     {
1122         ComputeModeDefault,         /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
1123         ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
1124         ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
1125         ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
1126     };
1127 
1128     //! compute mode
1129     CV_WRAP DeviceInfo::ComputeMode computeMode() const;
1130 
1131     //! maximum 1D texture size
1132     CV_WRAP int maxTexture1D() const;
1133 
1134     //! maximum 1D mipmapped texture size
1135     CV_WRAP int maxTexture1DMipmap() const;
1136 
1137     //! maximum size for 1D textures bound to linear memory
1138     CV_WRAP int maxTexture1DLinear() const;
1139 
1140     //! maximum 2D texture dimensions
1141     CV_WRAP Vec2i maxTexture2D() const;
1142 
1143     //! maximum 2D mipmapped texture dimensions
1144     CV_WRAP Vec2i maxTexture2DMipmap() const;
1145 
1146     //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
1147     CV_WRAP Vec3i maxTexture2DLinear() const;
1148 
1149     //! maximum 2D texture dimensions if texture gather operations have to be performed
1150     CV_WRAP Vec2i maxTexture2DGather() const;
1151 
1152     //! maximum 3D texture dimensions
1153     CV_WRAP Vec3i maxTexture3D() const;
1154 
1155     //! maximum Cubemap texture dimensions
1156     CV_WRAP int maxTextureCubemap() const;
1157 
1158     //! maximum 1D layered texture dimensions
1159     CV_WRAP Vec2i maxTexture1DLayered() const;
1160 
1161     //! maximum 2D layered texture dimensions
1162     CV_WRAP Vec3i maxTexture2DLayered() const;
1163 
1164     //! maximum Cubemap layered texture dimensions
1165     CV_WRAP Vec2i maxTextureCubemapLayered() const;
1166 
1167     //! maximum 1D surface size
1168     CV_WRAP int maxSurface1D() const;
1169 
1170     //! maximum 2D surface dimensions
1171     CV_WRAP Vec2i maxSurface2D() const;
1172 
1173     //! maximum 3D surface dimensions
1174     CV_WRAP Vec3i maxSurface3D() const;
1175 
1176     //! maximum 1D layered surface dimensions
1177     CV_WRAP Vec2i maxSurface1DLayered() const;
1178 
1179     //! maximum 2D layered surface dimensions
1180     CV_WRAP Vec3i maxSurface2DLayered() const;
1181 
1182     //! maximum Cubemap surface dimensions
1183     CV_WRAP int maxSurfaceCubemap() const;
1184 
1185     //! maximum Cubemap layered surface dimensions
1186     CV_WRAP Vec2i maxSurfaceCubemapLayered() const;
1187 
1188     //! alignment requirements for surfaces
1189     CV_WRAP size_t surfaceAlignment() const;
1190 
1191     //! device can possibly execute multiple kernels concurrently
1192     CV_WRAP bool concurrentKernels() const;
1193 
1194     //! device has ECC support enabled
1195     CV_WRAP bool ECCEnabled() const;
1196 
1197     //! PCI bus ID of the device
1198     CV_WRAP int pciBusID() const;
1199 
1200     //! PCI device ID of the device
1201     CV_WRAP int pciDeviceID() const;
1202 
1203     //! PCI domain ID of the device
1204     CV_WRAP int pciDomainID() const;
1205 
1206     //! true if device is a Tesla device using TCC driver, false otherwise
1207     CV_WRAP bool tccDriver() const;
1208 
1209     //! number of asynchronous engines
1210     CV_WRAP int asyncEngineCount() const;
1211 
1212     //! device shares a unified address space with the host
1213     CV_WRAP bool unifiedAddressing() const;
1214 
1215     //! peak memory clock frequency in kilohertz
1216     CV_WRAP int memoryClockRate() const;
1217 
1218     //! global memory bus width in bits
1219     CV_WRAP int memoryBusWidth() const;
1220 
1221     //! size of L2 cache in bytes
1222     CV_WRAP int l2CacheSize() const;
1223 
1224     //! maximum resident threads per multiprocessor
1225     CV_WRAP int maxThreadsPerMultiProcessor() const;
1226 
1227     //! gets free and total device memory
1228     CV_WRAP void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
1229     CV_WRAP size_t freeMemory() const;
1230     CV_WRAP size_t totalMemory() const;
1231 
1232     /** @brief Provides information on CUDA feature support.
1233 
1234     @param feature_set Features to be checked. See cuda::FeatureSet.
1235 
1236     This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
1237      */
1238     bool supports(FeatureSet feature_set) const;
1239 
1240     /** @brief Checks the CUDA module and device compatibility.
1241 
1242     This function returns true if the CUDA module can be run on the specified device. Otherwise, it
1243     returns false .
1244      */
1245     CV_WRAP bool isCompatible() const;
1246 
1247 private:
1248     int device_id_;
1249 };
1250 
1251 CV_EXPORTS_W void printCudaDeviceInfo(int device);
1252 CV_EXPORTS_W void printShortCudaDeviceInfo(int device);
1253 
1254 /** @brief Converts an array to half precision floating number.
1255 
1256 @param _src input array.
1257 @param _dst output array.
1258 @param stream Stream for the asynchronous version.
1259 @sa convertFp16
1260 */
1261 CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
1262 
1263 //! @} cudacore_init
1264 
1265 }} // namespace cv { namespace cuda {
1266 
1267 
1268 #include "opencv2/core/cuda.inl.hpp"
1269 
1270 #endif /* OPENCV_CORE_CUDA_HPP */
1271