1 // Comes from
2 // https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
3
4 #include <cstdio>
5 #include <iostream>
6
7 #include <assert.h>
8 #include <cuda_runtime_api.h>
9 #include <nppi_filtering_functions.h>
10
nppif_main()11 int nppif_main()
12 {
13 /**
14 * 8-bit unsigned single-channel 1D row convolution.
15 */
16 const int simgrows = 32;
17 const int simgcols = 32;
18 Npp8u *d_pSrc, *d_pDst;
19 const int nMaskSize = 3;
20 NppiSize oROI;
21 oROI.width = simgcols - nMaskSize;
22 oROI.height = simgrows;
23 const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
24 const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
25 const int simgpix = simgrows * simgcols;
26 const int dimgpix = oROI.width * oROI.height;
27 const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
28 const int nDstStep = oROI.width * sizeof(d_pDst[0]);
29 const int pixval = 1;
30 const int nDivisor = 1;
31 const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
32 Npp32s* d_pKernel;
33 const Npp32s nAnchor = 2;
34 cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
35 if (err != cudaSuccess) {
36 fprintf(stderr, "Cuda error %d\n", __LINE__);
37 return 1;
38 }
39 err = cudaMalloc((void**)&d_pDst, dimgsize);
40 if (err != cudaSuccess) {
41 fprintf(stderr, "Cuda error %d\n", __LINE__);
42 return 1;
43 }
44 err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
45 if (err != cudaSuccess) {
46 fprintf(stderr, "Cuda error %d\n", __LINE__);
47 return 1;
48 }
49 // set image to pixval initially
50 err = cudaMemset(d_pSrc, pixval, simgsize);
51 if (err != cudaSuccess) {
52 fprintf(stderr, "Cuda error %d\n", __LINE__);
53 return 1;
54 }
55 err = cudaMemset(d_pDst, 0, dimgsize);
56 if (err != cudaSuccess) {
57 fprintf(stderr, "Cuda error %d\n", __LINE__);
58 return 1;
59 }
60 err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
61 cudaMemcpyHostToDevice);
62 if (err != cudaSuccess) {
63 fprintf(stderr, "Cuda error %d\n", __LINE__);
64 return 1;
65 }
66 // copy src to dst
67 NppStatus ret =
68 nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
69 nMaskSize, nAnchor, nDivisor);
70 assert(ret == NPP_NO_ERROR);
71 Npp8u* h_imgres = new Npp8u[dimgpix];
72 err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
73 if (err != cudaSuccess) {
74 fprintf(stderr, "Cuda error %d\n", __LINE__);
75 return 1;
76 }
77 // test for filtering
78 for (int i = 0; i < dimgpix; i++) {
79 if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
80 fprintf(stderr, "h_imgres at index %d failed to match\n", i);
81 return 1;
82 }
83 }
84
85 return 0;
86 }
87