1 // --------------------------------------------------------------------- 2 // 3 // Copyright (C) 2018 - 2020 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 // --------------------------------------------------------------------- 15 16 #include <deal.II/base/cuda_size.h> 17 #include <deal.II/base/exceptions.h> 18 19 #include <deal.II/lac/cuda_atomic.h> 20 #include <deal.II/lac/cuda_sparse_matrix.h> 21 22 #ifdef DEAL_II_WITH_CUDA 23 24 # include <cusparse.h> 25 26 DEAL_II_NAMESPACE_OPEN 27 28 namespace CUDAWrappers 29 { 30 namespace internal 31 { 32 template <typename Number> 33 __global__ void scale(Number * val,const Number a,const typename SparseMatrix<Number>::size_type N)34 scale(Number * val, 35 const Number a, 36 const typename SparseMatrix<Number>::size_type N) 37 { 38 const typename SparseMatrix<Number>::size_type idx = 39 threadIdx.x + blockIdx.x * blockDim.x; 40 if (idx < N) 41 val[idx] *= a; 42 } 43 44 45 46 void create_sp_mat_descr(int m,int n,int nnz,const float * A_val_dev,const int * A_row_ptr_dev,const int * A_column_index_dev,cusparseSpMatDescr_t & sp_descr)47 create_sp_mat_descr(int m, 48 int n, 49 int nnz, 50 const float * A_val_dev, 51 const int * A_row_ptr_dev, 52 const int * A_column_index_dev, 53 cusparseSpMatDescr_t &sp_descr) 54 { 55 cusparseStatus_t error_code = cusparseCreateCsr( 56 &sp_descr, 57 m, 58 n, 59 nnz, 60 reinterpret_cast<void *>(const_cast<int *>(A_row_ptr_dev)), 61 reinterpret_cast<void *>(const_cast<int *>(A_column_index_dev)), 62 reinterpret_cast<void *>(const_cast<float *>(A_val_dev)), 63 CUSPARSE_INDEX_32I, 64 CUSPARSE_INDEX_32I, 65 CUSPARSE_INDEX_BASE_ZERO, 66 CUDA_R_32F); 67 AssertCusparse(error_code); 68 } 69 70 71 72 void create_sp_mat_descr(int m,int n,int nnz,const double * A_val_dev,const int * A_row_ptr_dev,const int * A_column_index_dev,cusparseSpMatDescr_t & sp_descr)73 create_sp_mat_descr(int m, 74 int n, 75 int nnz, 76 const double * A_val_dev, 77 const int * A_row_ptr_dev, 78 const int * A_column_index_dev, 79 cusparseSpMatDescr_t &sp_descr) 80 { 81 cusparseStatus_t error_code = cusparseCreateCsr( 82 &sp_descr, 83 m, 84 n, 85 nnz, 86 reinterpret_cast<void *>(const_cast<int *>(A_row_ptr_dev)), 87 reinterpret_cast<void *>(const_cast<int *>(A_column_index_dev)), 88 reinterpret_cast<void *>(const_cast<double *>(A_val_dev)), 89 CUSPARSE_INDEX_32I, 90 CUSPARSE_INDEX_32I, 91 CUSPARSE_INDEX_BASE_ZERO, 92 CUDA_R_64F); 93 AssertCusparse(error_code); 94 } 95 96 97 98 void csrmv(cusparseHandle_t handle,bool transpose,int m,int n,const cusparseSpMatDescr_t sp_descr,const float * x,bool add,float * y)99 csrmv(cusparseHandle_t handle, 100 bool transpose, 101 int m, 102 int n, 103 const cusparseSpMatDescr_t sp_descr, 104 const float * x, 105 bool add, 106 float * y) 107 { 108 float alpha = 1.; 109 float beta = add ? 1. : 0.; 110 cusparseOperation_t cusparse_operation = 111 transpose ? CUSPARSE_OPERATION_TRANSPOSE : 112 CUSPARSE_OPERATION_NON_TRANSPOSE; 113 114 // Move the data to cuSPARSE data type 115 cusparseDnVecDescr_t x_cuvec; 116 cusparseStatus_t error_code = 117 cusparseCreateDnVec(&x_cuvec, 118 n, 119 reinterpret_cast<void *>(const_cast<float *>(x)), 120 CUDA_R_32F); 121 AssertCusparse(error_code); 122 123 cusparseDnVecDescr_t y_cuvec; 124 error_code = 125 cusparseCreateDnVec(&y_cuvec, 126 m, 127 reinterpret_cast<void *>(const_cast<float *>(y)), 128 CUDA_R_32F); 129 AssertCusparse(error_code); 130 131 // This function performs y = alpha*op(A)*x + beta*y 132 size_t buffer_size = 0; 133 error_code = cusparseSpMV_bufferSize(handle, 134 cusparse_operation, 135 &alpha, 136 sp_descr, 137 x_cuvec, 138 &beta, 139 y_cuvec, 140 CUDA_R_32F, 141 CUSPARSE_MV_ALG_DEFAULT, 142 &buffer_size); 143 144 void * buffer = nullptr; 145 cudaError_t cuda_error_code = cudaMalloc(&buffer, buffer_size); 146 AssertCuda(cuda_error_code); 147 148 // execute SpMV 149 error_code = cusparseSpMV(handle, 150 cusparse_operation, 151 &alpha, 152 sp_descr, 153 x_cuvec, 154 &beta, 155 y_cuvec, 156 CUDA_R_32F, 157 CUSPARSE_MV_ALG_DEFAULT, 158 buffer); 159 AssertCusparse(error_code); 160 161 cuda_error_code = cudaFree(buffer); 162 AssertCuda(cuda_error_code); 163 error_code = cusparseDestroyDnVec(x_cuvec); 164 AssertCusparse(error_code); 165 error_code = cusparseDestroyDnVec(y_cuvec); 166 AssertCusparse(error_code); 167 } 168 169 170 171 void csrmv(cusparseHandle_t handle,bool transpose,int m,int n,const cusparseSpMatDescr_t sp_descr,const double * x,bool add,double * y)172 csrmv(cusparseHandle_t handle, 173 bool transpose, 174 int m, 175 int n, 176 const cusparseSpMatDescr_t sp_descr, 177 const double * x, 178 bool add, 179 double * y) 180 { 181 double alpha = 1.; 182 double beta = add ? 1. : 0.; 183 cusparseOperation_t cusparse_operation = 184 transpose ? CUSPARSE_OPERATION_TRANSPOSE : 185 CUSPARSE_OPERATION_NON_TRANSPOSE; 186 187 // Move the data to cuSPARSE data type 188 cusparseDnVecDescr_t x_cuvec; 189 cusparseStatus_t error_code = 190 cusparseCreateDnVec(&x_cuvec, 191 n, 192 reinterpret_cast<void *>(const_cast<double *>(x)), 193 CUDA_R_64F); 194 AssertCusparse(error_code); 195 196 cusparseDnVecDescr_t y_cuvec; 197 error_code = 198 cusparseCreateDnVec(&y_cuvec, 199 m, 200 reinterpret_cast<void *>(const_cast<double *>(y)), 201 CUDA_R_64F); 202 AssertCusparse(error_code); 203 204 // This function performs y = alpha*op(A)*x + beta*y 205 size_t buffer_size = 0; 206 error_code = cusparseSpMV_bufferSize(handle, 207 cusparse_operation, 208 &alpha, 209 sp_descr, 210 x_cuvec, 211 &beta, 212 y_cuvec, 213 CUDA_R_64F, 214 CUSPARSE_MV_ALG_DEFAULT, 215 &buffer_size); 216 217 void * buffer = nullptr; 218 cudaError_t cuda_error_code = cudaMalloc(&buffer, buffer_size); 219 AssertCuda(cuda_error_code); 220 221 // execute SpMV 222 error_code = cusparseSpMV(handle, 223 cusparse_operation, 224 &alpha, 225 sp_descr, 226 x_cuvec, 227 &beta, 228 y_cuvec, 229 CUDA_R_64F, 230 CUSPARSE_MV_ALG_DEFAULT, 231 buffer); 232 AssertCusparse(error_code); 233 234 cuda_error_code = cudaFree(buffer); 235 AssertCuda(cuda_error_code); 236 error_code = cusparseDestroyDnVec(x_cuvec); 237 AssertCusparse(error_code); 238 error_code = cusparseDestroyDnVec(y_cuvec); 239 AssertCusparse(error_code); 240 } 241 242 243 244 template <typename Number> 245 __global__ void l1_norm(const typename SparseMatrix<Number>::size_type n_rows,const Number * val_dev,const int * column_index_dev,const int * row_ptr_dev,Number * sums)246 l1_norm(const typename SparseMatrix<Number>::size_type n_rows, 247 const Number * val_dev, 248 const int * column_index_dev, 249 const int * row_ptr_dev, 250 Number * sums) 251 { 252 const typename SparseMatrix<Number>::size_type row = 253 threadIdx.x + blockIdx.x * blockDim.x; 254 255 if (row < n_rows) 256 { 257 for (int j = row_ptr_dev[row]; j < row_ptr_dev[row + 1]; ++j) 258 atomicAdd(&sums[column_index_dev[j]], abs(val_dev[j])); 259 } 260 } 261 262 263 264 template <typename Number> 265 __global__ void linfty_norm(const typename SparseMatrix<Number>::size_type n_rows,const Number * val_dev,const int * column_index_dev,const int * row_ptr_dev,Number * sums)266 linfty_norm(const typename SparseMatrix<Number>::size_type n_rows, 267 const Number * val_dev, 268 const int * column_index_dev, 269 const int * row_ptr_dev, 270 Number * sums) 271 { 272 const typename SparseMatrix<Number>::size_type row = 273 threadIdx.x + blockIdx.x * blockDim.x; 274 275 if (row < n_rows) 276 { 277 sums[row] = (Number)0.; 278 for (int j = row_ptr_dev[row]; j < row_ptr_dev[row + 1]; ++j) 279 sums[row] += abs(val_dev[j]); 280 } 281 } 282 } // namespace internal 283 284 285 286 template <typename Number> SparseMatrix()287 SparseMatrix<Number>::SparseMatrix() 288 : nnz(0) 289 , n_rows(0) 290 , val_dev(nullptr, Utilities::CUDA::delete_device_data<Number>) 291 , column_index_dev(nullptr, Utilities::CUDA::delete_device_data<int>) 292 , row_ptr_dev(nullptr, Utilities::CUDA::delete_device_data<int>) 293 , descr(nullptr) 294 , sp_descr(nullptr) 295 {} 296 297 298 299 template <typename Number> SparseMatrix(Utilities::CUDA::Handle & handle,const::dealii::SparseMatrix<Number> & sparse_matrix_host)300 SparseMatrix<Number>::SparseMatrix( 301 Utilities::CUDA::Handle & handle, 302 const ::dealii::SparseMatrix<Number> &sparse_matrix_host) 303 : val_dev(nullptr, Utilities::CUDA::delete_device_data<Number>) 304 , column_index_dev(nullptr, Utilities::CUDA::delete_device_data<int>) 305 , row_ptr_dev(nullptr, Utilities::CUDA::delete_device_data<int>) 306 , descr(nullptr) 307 , sp_descr(nullptr) 308 { 309 reinit(handle, sparse_matrix_host); 310 } 311 312 313 314 template <typename Number> SparseMatrix(CUDAWrappers::SparseMatrix<Number> && other)315 SparseMatrix<Number>::SparseMatrix(CUDAWrappers::SparseMatrix<Number> &&other) 316 : cusparse_handle(other.cusparse_handle) 317 , nnz(other.nnz) 318 , n_rows(other.n_rows) 319 , n_cols(other.n_cols) 320 , val_dev(std::move(other.val_dev)) 321 , column_index_dev(std::move(other.column_index_dev)) 322 , row_ptr_dev(std::move(other.row_ptr_dev)) 323 , descr(other.descr) 324 , sp_descr(other.sp_descr) 325 { 326 other.nnz = 0; 327 other.n_rows = 0; 328 other.n_cols = 0; 329 other.descr = nullptr; 330 other.sp_descr = nullptr; 331 } 332 333 334 335 template <typename Number> ~SparseMatrix()336 SparseMatrix<Number>::~SparseMatrix<Number>() 337 { 338 if (descr != nullptr) 339 { 340 const cusparseStatus_t cusparse_error_code = 341 cusparseDestroyMatDescr(descr); 342 AssertNothrowCusparse(cusparse_error_code); 343 descr = nullptr; 344 } 345 346 if (sp_descr != nullptr) 347 { 348 const cusparseStatus_t cusparse_error_code = 349 cusparseDestroySpMat(sp_descr); 350 AssertNothrowCusparse(cusparse_error_code); 351 sp_descr = nullptr; 352 } 353 354 nnz = 0; 355 n_rows = 0; 356 } 357 358 359 360 template <typename Number> 361 SparseMatrix<Number> & operator =(SparseMatrix<Number> && other)362 SparseMatrix<Number>::operator=(SparseMatrix<Number> &&other) 363 { 364 cusparse_handle = other.cusparse_handle; 365 nnz = other.nnz; 366 n_rows = other.n_rows; 367 n_cols = other.n_cols; 368 val_dev = std::move(other.val_dev); 369 column_index_dev = std::move(other.column_index_dev); 370 row_ptr_dev = std::move(other.row_ptr_dev); 371 descr = other.descr; 372 sp_descr = other.sp_descr; 373 374 other.nnz = 0; 375 other.n_rows = 0; 376 other.n_cols = 0; 377 other.descr = nullptr; 378 other.sp_descr = nullptr; 379 380 return *this; 381 } 382 383 384 385 template <typename Number> 386 void reinit(Utilities::CUDA::Handle & handle,const::dealii::SparseMatrix<Number> & sparse_matrix_host)387 SparseMatrix<Number>::reinit( 388 Utilities::CUDA::Handle & handle, 389 const ::dealii::SparseMatrix<Number> &sparse_matrix_host) 390 { 391 cusparse_handle = handle.cusparse_handle; 392 nnz = sparse_matrix_host.n_nonzero_elements(); 393 n_rows = sparse_matrix_host.m(); 394 n_cols = sparse_matrix_host.n(); 395 unsigned int const row_ptr_size = n_rows + 1; 396 std::vector<Number> val; 397 val.reserve(nnz); 398 std::vector<int> column_index; 399 column_index.reserve(nnz); 400 std::vector<int> row_ptr(row_ptr_size, 0); 401 402 // dealii::SparseMatrix stores the diagonal first in each row so we need to 403 // do some reordering 404 for (int row = 0; row < n_rows; ++row) 405 { 406 auto p_end = sparse_matrix_host.end(row); 407 unsigned int counter = 0; 408 for (auto p = sparse_matrix_host.begin(row); p != p_end; ++p) 409 { 410 val.emplace_back(p->value()); 411 column_index.emplace_back(p->column()); 412 ++counter; 413 } 414 row_ptr[row + 1] = row_ptr[row] + counter; 415 416 // Sort the elements in the row 417 unsigned int const offset = row_ptr[row]; 418 int const diag_index = column_index[offset]; 419 Number diag_elem = sparse_matrix_host.diag_element(row); 420 unsigned int pos = 1; 421 while ((column_index[offset + pos] < row) && (pos < counter)) 422 { 423 val[offset + pos - 1] = val[offset + pos]; 424 column_index[offset + pos - 1] = column_index[offset + pos]; 425 ++pos; 426 } 427 val[offset + pos - 1] = diag_elem; 428 column_index[offset + pos - 1] = diag_index; 429 } 430 431 // Copy the elements to the gpu 432 val_dev.reset(Utilities::CUDA::allocate_device_data<Number>(nnz)); 433 cudaError_t error_code = cudaMemcpy(val_dev.get(), 434 val.data(), 435 nnz * sizeof(Number), 436 cudaMemcpyHostToDevice); 437 AssertCuda(error_code); 438 439 // Copy the column indices to the gpu 440 column_index_dev.reset(Utilities::CUDA::allocate_device_data<int>(nnz)); 441 AssertCuda(error_code); 442 error_code = cudaMemcpy(column_index_dev.get(), 443 column_index.data(), 444 nnz * sizeof(int), 445 cudaMemcpyHostToDevice); 446 AssertCuda(error_code); 447 448 // Copy the row pointer to the gpu 449 row_ptr_dev.reset(Utilities::CUDA::allocate_device_data<int>(row_ptr_size)); 450 AssertCuda(error_code); 451 error_code = cudaMemcpy(row_ptr_dev.get(), 452 row_ptr.data(), 453 row_ptr_size * sizeof(int), 454 cudaMemcpyHostToDevice); 455 AssertCuda(error_code); 456 457 // Create the matrix descriptor 458 cusparseStatus_t cusparse_error_code = cusparseCreateMatDescr(&descr); 459 AssertCusparse(cusparse_error_code); 460 cusparse_error_code = 461 cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL); 462 AssertCusparse(cusparse_error_code); 463 cusparse_error_code = 464 cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO); 465 AssertCusparse(cusparse_error_code); 466 467 // Create the sparse matrix descriptor 468 internal::create_sp_mat_descr(n_rows, 469 n_cols, 470 nnz, 471 val_dev.get(), 472 row_ptr_dev.get(), 473 column_index_dev.get(), 474 sp_descr); 475 } 476 477 478 479 template <typename Number> 480 SparseMatrix<Number> & operator *=(const Number factor)481 SparseMatrix<Number>::operator*=(const Number factor) 482 { 483 AssertIsFinite(factor); 484 const int n_blocks = 1 + (nnz - 1) / block_size; 485 internal::scale<Number> 486 <<<n_blocks, block_size>>>(val_dev.get(), factor, nnz); 487 AssertCudaKernel(); 488 489 return *this; 490 } 491 492 493 494 template <typename Number> 495 SparseMatrix<Number> & operator /=(const Number factor)496 SparseMatrix<Number>::operator/=(const Number factor) 497 { 498 AssertIsFinite(factor); 499 Assert(factor != Number(0.), ExcZero()); 500 const int n_blocks = 1 + (nnz - 1) / block_size; 501 internal::scale<Number> 502 <<<n_blocks, block_size>>>(val_dev.get(), 1. / factor, nnz); 503 AssertCudaKernel(); 504 505 return *this; 506 } 507 508 509 510 template <typename Number> 511 void vmult(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const512 SparseMatrix<Number>::vmult( 513 LinearAlgebra::CUDAWrappers::Vector<Number> & dst, 514 const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const 515 { 516 internal::csrmv(cusparse_handle, 517 false, 518 n_rows, 519 n_cols, 520 sp_descr, 521 src.get_values(), 522 false, 523 dst.get_values()); 524 } 525 526 527 528 template <typename Number> 529 void Tvmult(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const530 SparseMatrix<Number>::Tvmult( 531 LinearAlgebra::CUDAWrappers::Vector<Number> & dst, 532 const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const 533 { 534 internal::csrmv(cusparse_handle, 535 true, 536 n_rows, 537 n_cols, 538 sp_descr, 539 src.get_values(), 540 false, 541 dst.get_values()); 542 } 543 544 545 546 template <typename Number> 547 void vmult_add(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const548 SparseMatrix<Number>::vmult_add( 549 LinearAlgebra::CUDAWrappers::Vector<Number> & dst, 550 const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const 551 { 552 internal::csrmv(cusparse_handle, 553 false, 554 n_rows, 555 n_cols, 556 sp_descr, 557 src.get_values(), 558 true, 559 dst.get_values()); 560 } 561 562 563 564 template <typename Number> 565 void Tvmult_add(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & src) const566 SparseMatrix<Number>::Tvmult_add( 567 LinearAlgebra::CUDAWrappers::Vector<Number> & dst, 568 const LinearAlgebra::CUDAWrappers::Vector<Number> &src) const 569 { 570 internal::csrmv(cusparse_handle, 571 true, 572 n_rows, 573 n_cols, 574 sp_descr, 575 src.get_values(), 576 true, 577 dst.get_values()); 578 } 579 580 581 582 template <typename Number> 583 Number matrix_norm_square(const LinearAlgebra::CUDAWrappers::Vector<Number> & v) const584 SparseMatrix<Number>::matrix_norm_square( 585 const LinearAlgebra::CUDAWrappers::Vector<Number> &v) const 586 { 587 LinearAlgebra::CUDAWrappers::Vector<Number> tmp = v; 588 vmult(tmp, v); 589 590 return v * tmp; 591 } 592 593 594 595 template <typename Number> 596 Number matrix_scalar_product(const LinearAlgebra::CUDAWrappers::Vector<Number> & u,const LinearAlgebra::CUDAWrappers::Vector<Number> & v) const597 SparseMatrix<Number>::matrix_scalar_product( 598 const LinearAlgebra::CUDAWrappers::Vector<Number> &u, 599 const LinearAlgebra::CUDAWrappers::Vector<Number> &v) const 600 { 601 LinearAlgebra::CUDAWrappers::Vector<Number> tmp = v; 602 vmult(tmp, v); 603 604 return u * tmp; 605 } 606 607 608 609 template <typename Number> 610 Number residual(LinearAlgebra::CUDAWrappers::Vector<Number> & dst,const LinearAlgebra::CUDAWrappers::Vector<Number> & x,const LinearAlgebra::CUDAWrappers::Vector<Number> & b) const611 SparseMatrix<Number>::residual( 612 LinearAlgebra::CUDAWrappers::Vector<Number> & dst, 613 const LinearAlgebra::CUDAWrappers::Vector<Number> &x, 614 const LinearAlgebra::CUDAWrappers::Vector<Number> &b) const 615 { 616 vmult(dst, x); 617 dst.sadd(-1., 1., b); 618 619 return dst.l2_norm(); 620 } 621 622 623 624 template <typename Number> 625 Number l1_norm() const626 SparseMatrix<Number>::l1_norm() const 627 { 628 LinearAlgebra::CUDAWrappers::Vector<real_type> column_sums(n_cols); 629 const int n_blocks = 1 + (nnz - 1) / block_size; 630 internal::l1_norm<Number> 631 <<<n_blocks, block_size>>>(n_rows, 632 val_dev.get(), 633 column_index_dev.get(), 634 row_ptr_dev.get(), 635 column_sums.get_values()); 636 AssertCudaKernel(); 637 638 return column_sums.linfty_norm(); 639 } 640 641 642 643 template <typename Number> 644 Number linfty_norm() const645 SparseMatrix<Number>::linfty_norm() const 646 { 647 LinearAlgebra::CUDAWrappers::Vector<real_type> row_sums(n_rows); 648 const int n_blocks = 1 + (nnz - 1) / block_size; 649 internal::linfty_norm<Number> 650 <<<n_blocks, block_size>>>(n_rows, 651 val_dev.get(), 652 column_index_dev.get(), 653 row_ptr_dev.get(), 654 row_sums.get_values()); 655 AssertCudaKernel(); 656 657 return row_sums.linfty_norm(); 658 } 659 660 661 662 template <typename Number> 663 Number frobenius_norm() const664 SparseMatrix<Number>::frobenius_norm() const 665 { 666 LinearAlgebra::CUDAWrappers::Vector<real_type> matrix_values(nnz); 667 cudaError_t cuda_error = cudaMemcpy(matrix_values.get_values(), 668 val_dev.get(), 669 nnz * sizeof(Number), 670 cudaMemcpyDeviceToDevice); 671 672 return matrix_values.l2_norm(); 673 } 674 675 676 677 template <typename Number> 678 std::tuple<Number *, int *, int *, cusparseMatDescr_t, cusparseSpMatDescr_t> get_cusparse_matrix() const679 SparseMatrix<Number>::get_cusparse_matrix() const 680 { 681 return std::make_tuple(val_dev.get(), 682 column_index_dev.get(), 683 row_ptr_dev.get(), 684 descr, 685 sp_descr); 686 } 687 688 689 690 template class SparseMatrix<float>; 691 template class SparseMatrix<double>; 692 } // namespace CUDAWrappers 693 DEAL_II_NAMESPACE_CLOSE 694 695 #endif 696