1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2018 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 #include <deal.II/base/cuda_size.h>
17 #include <deal.II/base/exceptions.h>
18 
19 #include <deal.II/lac/cuda_atomic.h>
20 #include <deal.II/lac/cuda_sparse_matrix.h>
21 
22 #ifdef DEAL_II_WITH_CUDA
23 
24 #  include <cusparse.h>
25 
26 DEAL_II_NAMESPACE_OPEN
27 
28 namespace CUDAWrappers
29 {
30   namespace internal
31   {
32     template <typename Number>
33     __global__ void
scale(Number * val,const Number a,const typename SparseMatrix<Number>::size_type N)34     scale(Number *                                       val,
35           const Number                                   a,
36           const typename SparseMatrix<Number>::size_type N)
37     {
38       const typename SparseMatrix<Number>::size_type idx =
39         threadIdx.x + blockIdx.x * blockDim.x;
40       if (idx < N)
41         val[idx] *= a;
42     }
43 
44 
45 
46     void
create_sp_mat_descr(int m,int n,int nnz,const float * A_val_dev,const int * A_row_ptr_dev,const int * A_column_index_dev,cusparseSpMatDescr_t & sp_descr)47     create_sp_mat_descr(int                   m,
48                         int                   n,
49                         int                   nnz,
50                         const float *         A_val_dev,
51                         const int *           A_row_ptr_dev,
52                         const int *           A_column_index_dev,
53                         cusparseSpMatDescr_t &sp_descr)
54     {
55       cusparseStatus_t error_code = cusparseCreateCsr(
56         &sp_descr,
57         m,
58         n,
59         nnz,
60         reinterpret_cast<void *>(const_cast<int *>(A_row_ptr_dev)),
61         reinterpret_cast<void *>(const_cast<int *>(A_column_index_dev)),
62         reinterpret_cast<void *>(const_cast<float *>(A_val_dev)),
63         CUSPARSE_INDEX_32I,
64         CUSPARSE_INDEX_32I,
65         CUSPARSE_INDEX_BASE_ZERO,
66         CUDA_R_32F);
67       AssertCusparse(error_code);
68     }
69 
70 
71 
72     void
create_sp_mat_descr(int m,int n,int nnz,const double * A_val_dev,const int * A_row_ptr_dev,const int * A_column_index_dev,cusparseSpMatDescr_t & sp_descr)73     create_sp_mat_descr(int                   m,
74                         int                   n,
75                         int                   nnz,
76                         const double *        A_val_dev,
77                         const int *           A_row_ptr_dev,
78                         const int *           A_column_index_dev,
79                         cusparseSpMatDescr_t &sp_descr)
80     {
81       cusparseStatus_t error_code = cusparseCreateCsr(
82         &sp_descr,
83         m,
84         n,
85         nnz,
86         reinterpret_cast<void *>(const_cast<int *>(A_row_ptr_dev)),
87         reinterpret_cast<void *>(const_cast<int *>(A_column_index_dev)),
88         reinterpret_cast<void *>(const_cast<double *>(A_val_dev)),
89         CUSPARSE_INDEX_32I,
90         CUSPARSE_INDEX_32I,
91         CUSPARSE_INDEX_BASE_ZERO,
92         CUDA_R_64F);
93       AssertCusparse(error_code);
94     }
95 
96 
97 
98     void
csrmv(cusparseHandle_t handle,bool transpose,int m,int n,const cusparseSpMatDescr_t sp_descr,const float * x,bool add,float * y)99     csrmv(cusparseHandle_t           handle,
100           bool                       transpose,
101           int                        m,
102           int                        n,
103           const cusparseSpMatDescr_t sp_descr,
104           const float *              x,
105           bool                       add,
106           float *                    y)
107     {
108       float               alpha = 1.;
109       float               beta  = add ? 1. : 0.;
110       cusparseOperation_t cusparse_operation =
111         transpose ? CUSPARSE_OPERATION_TRANSPOSE :
112                     CUSPARSE_OPERATION_NON_TRANSPOSE;
113 
114       // Move the data to cuSPARSE data type
115       cusparseDnVecDescr_t x_cuvec;
116       cusparseStatus_t     error_code =
117         cusparseCreateDnVec(&x_cuvec,
118                             n,
119                             reinterpret_cast<void *>(const_cast<float *>(x)),
120                             CUDA_R_32F);
121       AssertCusparse(error_code);
122 
123       cusparseDnVecDescr_t y_cuvec;
124       error_code =
125         cusparseCreateDnVec(&y_cuvec,
126                             m,
127                             reinterpret_cast<void *>(const_cast<float *>(y)),
128                             CUDA_R_32F);
129       AssertCusparse(error_code);
130 
131       // This function performs y = alpha*op(A)*x + beta*y
132       size_t buffer_size = 0;
133       error_code         = cusparseSpMV_bufferSize(handle,
134                                            cusparse_operation,
135                                            &alpha,
136                                            sp_descr,
137                                            x_cuvec,
138                                            &beta,
139                                            y_cuvec,
140                                            CUDA_R_32F,
141                                            CUSPARSE_MV_ALG_DEFAULT,
142                                            &buffer_size);
143 
144       void *      buffer          = nullptr;
145       cudaError_t cuda_error_code = cudaMalloc(&buffer, buffer_size);
146       AssertCuda(cuda_error_code);
147 
148       // execute SpMV
149       error_code = cusparseSpMV(handle,
150                                 cusparse_operation,
151                                 &alpha,
152                                 sp_descr,
153                                 x_cuvec,
154                                 &beta,
155                                 y_cuvec,
156                                 CUDA_R_32F,
157                                 CUSPARSE_MV_ALG_DEFAULT,
158                                 buffer);
159       AssertCusparse(error_code);
160 
161       cuda_error_code = cudaFree(buffer);
162       AssertCuda(cuda_error_code);
163       error_code = cusparseDestroyDnVec(x_cuvec);
164       AssertCusparse(error_code);
165       error_code = cusparseDestroyDnVec(y_cuvec);
166       AssertCusparse(error_code);
167     }
168 
169 
170 
171     void
csrmv(cusparseHandle_t handle,bool transpose,int m,int n,const cusparseSpMatDescr_t sp_descr,const double * x,bool add,double * y)172     csrmv(cusparseHandle_t           handle,
173           bool                       transpose,
174           int                        m,
175           int                        n,
176           const cusparseSpMatDescr_t sp_descr,
177           const double *             x,
178           bool                       add,
179           double *                   y)
180     {
181       double              alpha = 1.;
182       double              beta  = add ? 1. : 0.;
183       cusparseOperation_t cusparse_operation =
184         transpose ? CUSPARSE_OPERATION_TRANSPOSE :
185                     CUSPARSE_OPERATION_NON_TRANSPOSE;
186 
187       // Move the data to cuSPARSE data type
188       cusparseDnVecDescr_t x_cuvec;
189       cusparseStatus_t     error_code =
190         cusparseCreateDnVec(&x_cuvec,
191                             n,
192                             reinterpret_cast<void *>(const_cast<double *>(x)),
193                             CUDA_R_64F);
194       AssertCusparse(error_code);
195 
196       cusparseDnVecDescr_t y_cuvec;
197       error_code =
198         cusparseCreateDnVec(&y_cuvec,
199                             m,
200                             reinterpret_cast<void *>(const_cast<double *>(y)),
201                             CUDA_R_64F);
202       AssertCusparse(error_code);
203 
204       // This function performs y = alpha*op(A)*x + beta*y
205       size_t buffer_size = 0;
206       error_code         = cusparseSpMV_bufferSize(handle,
207                                            cusparse_operation,
208                                            &alpha,
209                                            sp_descr,
210                                            x_cuvec,
211                                            &beta,
212                                            y_cuvec,
213                                            CUDA_R_64F,
214                                            CUSPARSE_MV_ALG_DEFAULT,
215                                            &buffer_size);
216 
217       void *      buffer          = nullptr;
218       cudaError_t cuda_error_code = cudaMalloc(&buffer, buffer_size);
219       AssertCuda(cuda_error_code);
220 
221       // execute SpMV
222       error_code = cusparseSpMV(handle,
223                                 cusparse_operation,
224                                 &alpha,
225                                 sp_descr,
226                                 x_cuvec,
227                                 &beta,
228                                 y_cuvec,
229                                 CUDA_R_64F,
230                                 CUSPARSE_MV_ALG_DEFAULT,
231                                 buffer);
232       AssertCusparse(error_code);
233 
234       cuda_error_code = cudaFree(buffer);
235       AssertCuda(cuda_error_code);
236       error_code = cusparseDestroyDnVec(x_cuvec);
237       AssertCusparse(error_code);
238       error_code = cusparseDestroyDnVec(y_cuvec);
239       AssertCusparse(error_code);
240     }
241 
242 
243 
244     template <typename Number>
245     __global__ void
l1_norm(const typename SparseMatrix<Number>::size_type n_rows,const Number * val_dev,const int * column_index_dev,const int * row_ptr_dev,Number * sums)246     l1_norm(const typename SparseMatrix<Number>::size_type n_rows,
247             const Number *                                 val_dev,
248             const int *                                    column_index_dev,
249             const int *                                    row_ptr_dev,
250             Number *                                       sums)
251     {
252       const typename SparseMatrix<Number>::size_type row =
253         threadIdx.x + blockIdx.x * blockDim.x;
254 
255       if (row < n_rows)
256         {
257           for (int j = row_ptr_dev[row]; j < row_ptr_dev[row + 1]; ++j)
258             atomicAdd(&sums[column_index_dev[j]], abs(val_dev[j]));
259         }
260     }
261 
262 
263 
264     template <typename Number>
265     __global__ void
linfty_norm(const typename SparseMatrix<Number>::size_type n_rows,const Number * val_dev,const int * column_index_dev,const int * row_ptr_dev,Number * sums)266     linfty_norm(const typename SparseMatrix<Number>::size_type n_rows,
267                 const Number *                                 val_dev,
268                 const int *                                    column_index_dev,
269                 const int *                                    row_ptr_dev,
270                 Number *                                       sums)
271     {
272       const typename SparseMatrix<Number>::size_type row =
273         threadIdx.x + blockIdx.x * blockDim.x;
274 
275       if (row < n_rows)
276         {
277           sums[row] = (Number)0.;
278           for (int j = row_ptr_dev[row]; j < row_ptr_dev[row + 1]; ++j)
279             sums[row] += abs(val_dev[j]);
280         }
281     }
282   } // namespace internal
283 
284 
285 
286   template <typename Number>
SparseMatrix()287   SparseMatrix<Number>::SparseMatrix()
288     : nnz(0)
289     , n_rows(0)
290     , val_dev(nullptr, Utilities::CUDA::delete_device_data<Number>)
291     , column_index_dev(nullptr, Utilities::CUDA::delete_device_data<int>)
292     , row_ptr_dev(nullptr, Utilities::CUDA::delete_device_data<int>)
293     , descr(nullptr)
294     , sp_descr(nullptr)
295   {}
296 
297 
298 
299   template <typename Number>
SparseMatrix(Utilities::CUDA::Handle & handle,const::dealii::SparseMatrix<Number> & sparse_matrix_host)300   SparseMatrix<Number>::SparseMatrix(
301     Utilities::CUDA::Handle &             handle,
302     const ::dealii::SparseMatrix<Number> &sparse_matrix_host)
303     : val_dev(nullptr, Utilities::CUDA::delete_device_data<Number>)
304     , column_index_dev(nullptr, Utilities::CUDA::delete_device_data<int>)
305     , row_ptr_dev(nullptr, Utilities::CUDA::delete_device_data<int>)
306     , descr(nullptr)
307     , sp_descr(nullptr)
308   {
309     reinit(handle, sparse_matrix_host);
310   }
311 
312 
313 
314   template <typename Number>
SparseMatrix(CUDAWrappers::SparseMatrix<Number> && other)315   SparseMatrix<Number>::SparseMatrix(CUDAWrappers::SparseMatrix<Number> &&other)
316     : cusparse_handle(other.cusparse_handle)
317     , nnz(other.nnz)
318     , n_rows(other.n_rows)
319     , n_cols(other.n_cols)
320     , val_dev(std::move(other.val_dev))
321     , column_index_dev(std::move(other.column_index_dev))
322     , row_ptr_dev(std::move(other.row_ptr_dev))
323     , descr(other.descr)
324     , sp_descr(other.sp_descr)
325   {
326     other.nnz      = 0;
327     other.n_rows   = 0;
328     other.n_cols   = 0;
329     other.descr    = nullptr;
330     other.sp_descr = nullptr;
331   }
332 
333 
334 
335   template <typename Number>
~SparseMatrix()336   SparseMatrix<Number>::~SparseMatrix<Number>()
337   {
338     if (descr != nullptr)
339       {
340         const cusparseStatus_t cusparse_error_code =
341           cusparseDestroyMatDescr(descr);
342         AssertNothrowCusparse(cusparse_error_code);
343         descr = nullptr;
344       }
345 
346     if (sp_descr != nullptr)
347       {
348         const cusparseStatus_t cusparse_error_code =
349           cusparseDestroySpMat(sp_descr);
350         AssertNothrowCusparse(cusparse_error_code);
351         sp_descr = nullptr;
352       }
353 
354     nnz    = 0;
355     n_rows = 0;
356   }
357 
358 
359 
360   template <typename Number>
361   SparseMatrix<Number> &
operator =(SparseMatrix<Number> && other)362   SparseMatrix<Number>::operator=(SparseMatrix<Number> &&other)
363   {
364     cusparse_handle  = other.cusparse_handle;
365     nnz              = other.nnz;
366     n_rows           = other.n_rows;
367     n_cols           = other.n_cols;
368     val_dev          = std::move(other.val_dev);
369     column_index_dev = std::move(other.column_index_dev);
370     row_ptr_dev      = std::move(other.row_ptr_dev);
371     descr            = other.descr;
372     sp_descr         = other.sp_descr;
373 
374     other.nnz      = 0;
375     other.n_rows   = 0;
376     other.n_cols   = 0;
377     other.descr    = nullptr;
378     other.sp_descr = nullptr;
379 
380     return *this;
381   }
382 
383 
384 
385   template <typename Number>
386   void
reinit(Utilities::CUDA::Handle & handle,const::dealii::SparseMatrix<Number> & sparse_matrix_host)387   SparseMatrix<Number>::reinit(
388     Utilities::CUDA::Handle &             handle,
389     const ::dealii::SparseMatrix<Number> &sparse_matrix_host)
390   {
391     cusparse_handle                  = handle.cusparse_handle;
392     nnz                              = sparse_matrix_host.n_nonzero_elements();
393     n_rows                           = sparse_matrix_host.m();
394     n_cols                           = sparse_matrix_host.n();
395     unsigned int const  row_ptr_size = n_rows + 1;
396     std::vector<Number> val;
397     val.reserve(nnz);
398     std::vector<int> column_index;
399     column_index.reserve(nnz);
400     std::vector<int> row_ptr(row_ptr_size, 0);
401 
402     // dealii::SparseMatrix stores the diagonal first in each row so we need to
403     // do some reordering
404     for (int row = 0; row < n_rows; ++row)
405       {
406         auto         p_end   = sparse_matrix_host.end(row);
407         unsigned int counter = 0;
408         for (auto p = sparse_matrix_host.begin(row); p != p_end; ++p)
409           {
410             val.emplace_back(p->value());
411             column_index.emplace_back(p->column());
412             ++counter;
413           }
414         row_ptr[row + 1] = row_ptr[row] + counter;
415 
416         // Sort the elements in the row
417         unsigned int const offset     = row_ptr[row];
418         int const          diag_index = column_index[offset];
419         Number             diag_elem  = sparse_matrix_host.diag_element(row);
420         unsigned int       pos        = 1;
421         while ((column_index[offset + pos] < row) && (pos < counter))
422           {
423             val[offset + pos - 1]          = val[offset + pos];
424             column_index[offset + pos - 1] = column_index[offset + pos];
425             ++pos;
426           }
427         val[offset + pos - 1]          = diag_elem;
428         column_index[offset + pos - 1] = diag_index;
429       }
430 
431     // Copy the elements to the gpu
432     val_dev.reset(Utilities::CUDA::allocate_device_data<Number>(nnz));
433     cudaError_t error_code = cudaMemcpy(val_dev.get(),
434                                         val.data(),
435                                         nnz * sizeof(Number),
436                                         cudaMemcpyHostToDevice);
437     AssertCuda(error_code);
438 
439     // Copy the column indices to the gpu
440     column_index_dev.reset(Utilities::CUDA::allocate_device_data<int>(nnz));
441     AssertCuda(error_code);
442     error_code = cudaMemcpy(column_index_dev.get(),
443                             column_index.data(),
444                             nnz * sizeof(int),
445                             cudaMemcpyHostToDevice);
446     AssertCuda(error_code);
447 
448     // Copy the row pointer to the gpu
449     row_ptr_dev.reset(Utilities::CUDA::allocate_device_data<int>(row_ptr_size));
450     AssertCuda(error_code);
451     error_code = cudaMemcpy(row_ptr_dev.get(),
452                             row_ptr.data(),
453                             row_ptr_size * sizeof(int),
454                             cudaMemcpyHostToDevice);
455     AssertCuda(error_code);
456 
457     // Create the matrix descriptor
458     cusparseStatus_t cusparse_error_code = cusparseCreateMatDescr(&descr);
459     AssertCusparse(cusparse_error_code);
460     cusparse_error_code =
461       cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
462     AssertCusparse(cusparse_error_code);
463     cusparse_error_code =
464       cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
465     AssertCusparse(cusparse_error_code);
466 
467     // Create the sparse matrix descriptor
468     internal::create_sp_mat_descr(n_rows,
469                                   n_cols,
470                                   nnz,
471                                   val_dev.get(),
472                                   row_ptr_dev.get(),
473                                   column_index_dev.get(),
474                                   sp_descr);
475   }
476 
477 
478 
479   template <typename Number>
480   SparseMatrix<Number> &
operator *=(const Number factor)481   SparseMatrix<Number>::operator*=(const Number factor)
482   {
483     AssertIsFinite(factor);
484     const int n_blocks = 1 + (nnz - 1) / block_size;
485     internal::scale<Number>
486       <<<n_blocks, block_size>>>(val_dev.get(), factor, nnz);
487     AssertCudaKernel();
488 
489     return *this;
490   }
491 
492 
493 
494   template <typename Number>
495   SparseMatrix<Number> &
operator /=(const Number factor)496   SparseMatrix<Number>::operator/=(const Number factor)
497   {
498     AssertIsFinite(factor);
499     Assert(factor != Number(0.), ExcZero());
500     const int n_blocks = 1 + (nnz - 1) / block_size;
501     internal::scale<Number>
502       <<<n_blocks, block_size>>>(val_dev.get(), 1. / factor, nnz);
503     AssertCudaKernel();
504 
505     return *this;
506   }
507 
508 
509 
510   template <typename Number>
511   void
vmult(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const512   SparseMatrix<Number>::vmult(
513     LinearAlgebra::CUDAWrappers::Vector<Number> &      dst,
514     const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const
515   {
516     internal::csrmv(cusparse_handle,
517                     false,
518                     n_rows,
519                     n_cols,
520                     sp_descr,
521                     src.get_values(),
522                     false,
523                     dst.get_values());
524   }
525 
526 
527 
528   template <typename Number>
529   void
Tvmult(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const530   SparseMatrix<Number>::Tvmult(
531     LinearAlgebra::CUDAWrappers::Vector<Number> &      dst,
532     const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const
533   {
534     internal::csrmv(cusparse_handle,
535                     true,
536                     n_rows,
537                     n_cols,
538                     sp_descr,
539                     src.get_values(),
540                     false,
541                     dst.get_values());
542   }
543 
544 
545 
546   template <typename Number>
547   void
vmult_add(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const548   SparseMatrix<Number>::vmult_add(
549     LinearAlgebra::CUDAWrappers::Vector<Number> &      dst,
550     const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const
551   {
552     internal::csrmv(cusparse_handle,
553                     false,
554                     n_rows,
555                     n_cols,
556                     sp_descr,
557                     src.get_values(),
558                     true,
559                     dst.get_values());
560   }
561 
562 
563 
564   template <typename Number>
565   void
Tvmult_add(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const566   SparseMatrix<Number>::Tvmult_add(
567     LinearAlgebra::CUDAWrappers::Vector<Number> &      dst,
568     const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const
569   {
570     internal::csrmv(cusparse_handle,
571                     true,
572                     n_rows,
573                     n_cols,
574                     sp_descr,
575                     src.get_values(),
576                     true,
577                     dst.get_values());
578   }
579 
580 
581 
582   template <typename Number>
583   Number
matrix_norm_square(const LinearAlgebra::CUDAWrappers::Vector<Number> & v) const584   SparseMatrix<Number>::matrix_norm_square(
585     const LinearAlgebra::CUDAWrappers::Vector<Number> &v) const
586   {
587     LinearAlgebra::CUDAWrappers::Vector<Number> tmp = v;
588     vmult(tmp, v);
589 
590     return v * tmp;
591   }
592 
593 
594 
595   template <typename Number>
596   Number
matrix_scalar_product(const LinearAlgebra::CUDAWrappers::Vector<Number> & u,const LinearAlgebra::CUDAWrappers::Vector<Number> & v) const597   SparseMatrix<Number>::matrix_scalar_product(
598     const LinearAlgebra::CUDAWrappers::Vector<Number> &u,
599     const LinearAlgebra::CUDAWrappers::Vector<Number> &v) const
600   {
601     LinearAlgebra::CUDAWrappers::Vector<Number> tmp = v;
602     vmult(tmp, v);
603 
604     return u * tmp;
605   }
606 
607 
608 
609   template <typename Number>
610   Number
residual(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & x,const LinearAlgebra::CUDAWrappers::Vector<Number> & b) const611   SparseMatrix<Number>::residual(
612     LinearAlgebra::CUDAWrappers::Vector<Number> &      dst,
613     const LinearAlgebra::CUDAWrappers::Vector<Number> &x,
614     const LinearAlgebra::CUDAWrappers::Vector<Number> &b) const
615   {
616     vmult(dst, x);
617     dst.sadd(-1., 1., b);
618 
619     return dst.l2_norm();
620   }
621 
622 
623 
624   template <typename Number>
625   Number
l1_norm() const626   SparseMatrix<Number>::l1_norm() const
627   {
628     LinearAlgebra::CUDAWrappers::Vector<real_type> column_sums(n_cols);
629     const int n_blocks = 1 + (nnz - 1) / block_size;
630     internal::l1_norm<Number>
631       <<<n_blocks, block_size>>>(n_rows,
632                                  val_dev.get(),
633                                  column_index_dev.get(),
634                                  row_ptr_dev.get(),
635                                  column_sums.get_values());
636     AssertCudaKernel();
637 
638     return column_sums.linfty_norm();
639   }
640 
641 
642 
643   template <typename Number>
644   Number
linfty_norm() const645   SparseMatrix<Number>::linfty_norm() const
646   {
647     LinearAlgebra::CUDAWrappers::Vector<real_type> row_sums(n_rows);
648     const int n_blocks = 1 + (nnz - 1) / block_size;
649     internal::linfty_norm<Number>
650       <<<n_blocks, block_size>>>(n_rows,
651                                  val_dev.get(),
652                                  column_index_dev.get(),
653                                  row_ptr_dev.get(),
654                                  row_sums.get_values());
655     AssertCudaKernel();
656 
657     return row_sums.linfty_norm();
658   }
659 
660 
661 
662   template <typename Number>
663   Number
frobenius_norm() const664   SparseMatrix<Number>::frobenius_norm() const
665   {
666     LinearAlgebra::CUDAWrappers::Vector<real_type> matrix_values(nnz);
667     cudaError_t cuda_error = cudaMemcpy(matrix_values.get_values(),
668                                         val_dev.get(),
669                                         nnz * sizeof(Number),
670                                         cudaMemcpyDeviceToDevice);
671 
672     return matrix_values.l2_norm();
673   }
674 
675 
676 
677   template <typename Number>
678   std::tuple<Number *, int *, int *, cusparseMatDescr_t, cusparseSpMatDescr_t>
get_cusparse_matrix() const679   SparseMatrix<Number>::get_cusparse_matrix() const
680   {
681     return std::make_tuple(val_dev.get(),
682                            column_index_dev.get(),
683                            row_ptr_dev.get(),
684                            descr,
685                            sp_descr);
686   }
687 
688 
689 
690   template class SparseMatrix<float>;
691   template class SparseMatrix<double>;
692 } // namespace CUDAWrappers
693 DEAL_II_NAMESPACE_CLOSE
694 
695 #endif
696