1! This file is part of ELPA. 2! 3! The ELPA library was originally created by the ELPA consortium, 4! consisting of the following organizations: 5! 6! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), 7! - Bergische Universität Wuppertal, Lehrstuhl für angewandte 8! Informatik, 9! - Technische Universität München, Lehrstuhl für Informatik mit 10! Schwerpunkt Wissenschaftliches Rechnen , 11! - Fritz-Haber-Institut, Berlin, Abt. Theorie, 12! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, 13! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, 14! and 15! - IBM Deutschland GmbH 16! 17! 18! More information can be found here: 19! http://elpa.rzg.mpg.de/ 20! 21! ELPA is free software: you can redistribute it and/or modify 22! it under the terms of the version 3 of the license of the 23! GNU Lesser General Public License as published by the Free 24! Software Foundation. 25! 26! ELPA is distributed in the hope that it will be useful, 27! but WITHOUT ANY WARRANTY; without even the implied warranty of 28! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29! GNU Lesser General Public License for more details. 30! 31! You should have received a copy of the GNU Lesser General Public License 32! along with ELPA. If not, see <http://www.gnu.org/licenses/> 33! 34! ELPA reflects a substantial effort on the part of the original 35! ELPA consortium, and we ask you to respect the spirit of the 36! license that we chose: i.e., please contribute any changes you 37! may have back to the original ELPA library distribution, and keep 38! any derivatives of ELPA under the same license that we chose for 39! the original distribution, the GNU Lesser General Public License. 40! 41! This file was written by A. Marek, MPCDF 42 43 44#include "config-f90.h" 45module cuda_functions 46 use iso_c_binding 47 use precision 48 implicit none 49 50 public 51 52 integer(kind=ik) :: cudaMemcpyHostToDevice 53 integer(kind=ik) :: cudaMemcpyDeviceToHost 54 integer(kind=ik) :: cudaHostRegisterPortable 55 integer(kind=ik) :: cudaHostRegisterMapped 56 integer(kind=ik) :: cudaMemcpyDeviceToDevice 57 58 ! TODO global variable, has to be changed 59 integer(kind=C_intptr_T) :: cublasHandle = -1 60 61 integer(kind=c_intptr_t), parameter :: size_of_double_real = 8_rk8 62#ifdef WANT_SINGLE_PRECISION_REAL 63 integer(kind=c_intptr_t), parameter :: size_of_single_real = 4_rk4 64#endif 65 66 integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8 67#ifdef WANT_SINGLE_PRECISION_COMPLEX 68 integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4 69#endif 70 71 ! functions to set and query the CUDA devices 72 interface 73 function cublas_create_c(handle) result(istat) & 74 bind(C, name="cublasCreateFromC") 75 use iso_c_binding 76 implicit none 77 integer(kind=C_intptr_T) :: handle 78 integer(kind=C_INT) :: istat 79 end function cublas_create_c 80 end interface 81 82 interface 83 function cublas_destroy_c(handle) result(istat) & 84 bind(C, name="cublasDestroyFromC") 85 use iso_c_binding 86 implicit none 87 integer(kind=C_intptr_T) :: handle 88 integer(kind=C_INT) :: istat 89 end function cublas_destroy_c 90 end interface 91 92 interface 93 function cuda_threadsynchronize_c() result(istat) & 94 bind(C,name="cudaThreadSynchronizeFromC") 95 use iso_c_binding 96 implicit none 97 integer(kind=C_INT) :: istat 98 end function cuda_threadsynchronize_c 99 end interface 100 101 interface 102 function cuda_setdevice_c(n) result(istat) & 103 bind(C, name="cudaSetDeviceFromC") 104 105 use iso_c_binding 106 implicit none 107 integer(kind=C_INT), value :: n 108 integer(kind=C_INT) :: istat 109 end function cuda_setdevice_c 110 end interface 111 112 interface 113 function cuda_getdevicecount_c(n) result(istat) & 114 bind(C, name="cudaGetDeviceCountFromC") 115 use iso_c_binding 116 implicit none 117 integer(kind=C_INT), intent(out) :: n 118 integer(kind=C_INT) :: istat 119 end function cuda_getdevicecount_c 120 end interface 121 122 interface 123 function cuda_devicesynchronize_c()result(istat) & 124 bind(C,name='cudaDeviceSynchronizeFromC') 125 126 use iso_c_binding 127 128 implicit none 129 integer(kind=C_INT) :: istat 130 131 end function cuda_devicesynchronize_c 132 end interface 133 134 135 ! functions to copy CUDA memory 136 interface 137 function cuda_memcpyDeviceToDevice_c() result(flag) & 138 bind(C, name="cudaMemcpyDeviceToDeviceFromC") 139 use iso_c_binding 140 implicit none 141 integer(kind=c_int) :: flag 142 end function 143 end interface 144 145 interface 146 function cuda_memcpyHostToDevice_c() result(flag) & 147 bind(C, name="cudaMemcpyHostToDeviceFromC") 148 use iso_c_binding 149 implicit none 150 integer(kind=c_int) :: flag 151 end function 152 end interface 153 154 interface 155 function cuda_memcpyDeviceToHost_c() result(flag) & 156 bind(C, name="cudaMemcpyDeviceToHostFromC") 157 use iso_c_binding 158 implicit none 159 integer(kind=c_int) :: flag 160 end function 161 end interface 162 163 interface 164 function cuda_hostRegisterPortable_c() result(flag) & 165 bind(C, name="cudaHostRegisterPortableFromC") 166 use iso_c_binding 167 implicit none 168 integer(kind=c_int) :: flag 169 end function 170 end interface 171 172 interface 173 function cuda_hostRegisterMapped_c() result(flag) & 174 bind(C, name="cudaHostRegisterMappedFromC") 175 use iso_c_binding 176 implicit none 177 integer(kind=c_int) :: flag 178 end function 179 end interface 180 181 interface 182 function cuda_memcpy_c(dst, src, size, dir) result(istat) & 183 bind(C, name="cudaMemcpyFromC") 184 185 use iso_c_binding 186 187 implicit none 188 integer(kind=C_intptr_t), value :: dst 189 integer(kind=C_intptr_t), value :: src 190 integer(kind=c_intptr_t), intent(in), value :: size 191 integer(kind=C_INT), intent(in), value :: dir 192 integer(kind=C_INT) :: istat 193 194 end function cuda_memcpy_c 195 end interface 196 197 interface 198 function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) & 199 bind(C, name="cudaMemcpy2dFromC") 200 201 use iso_c_binding 202 203 implicit none 204 205 integer(kind=C_intptr_T), value :: dst 206 integer(kind=c_intptr_t), intent(in), value :: dpitch 207 integer(kind=C_intptr_T), value :: src 208 integer(kind=c_intptr_t), intent(in), value :: spitch 209 integer(kind=c_intptr_t), intent(in), value :: width 210 integer(kind=c_intptr_t), intent(in), value :: height 211 integer(kind=C_INT), intent(in), value :: dir 212 integer(kind=C_INT) :: istat 213 214 end function cuda_memcpy2d_c 215 end interface 216 217 ! functions to allocate and free CUDA memory 218 219 interface 220 function cuda_free_c(a) result(istat) & 221 bind(C, name="cudaFreeFromC") 222 223 use iso_c_binding 224 225 implicit none 226 integer(kind=C_intptr_T), value :: a 227 integer(kind=C_INT) :: istat 228 229 end function cuda_free_c 230 end interface 231 232 interface 233 function cuda_malloc_c(a, width_height) result(istat) & 234 bind(C, name="cudaMallocFromC") 235 236 use iso_c_binding 237 implicit none 238 239 integer(kind=C_intptr_T) :: a 240 integer(kind=c_intptr_t), intent(in), value :: width_height 241 integer(kind=C_INT) :: istat 242 243 end function cuda_malloc_c 244 end interface 245 246 interface 247 function cuda_memset_c(a, val, size) result(istat) & 248 bind(C, name="cudaMemsetFromC") 249 250 use iso_c_binding 251 252 implicit none 253 254 integer(kind=C_intptr_T), value :: a 255 integer(kind=C_INT), value :: val 256 integer(kind=c_intptr_t), intent(in), value :: size 257 integer(kind=C_INT) :: istat 258 259 end function cuda_memset_c 260 end interface 261 262 ! cuBLAS 263 interface 264 subroutine cublas_dgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) & 265 bind(C,name='cublasDgemm_elpa_wrapper') 266 267 use iso_c_binding 268 269 implicit none 270 character(1,C_CHAR),value :: cta, ctb 271 integer(kind=C_INT),value :: m,n,k 272 integer(kind=C_INT), intent(in), value :: lda,ldb,ldc 273 real(kind=C_DOUBLE),value :: alpha,beta 274 integer(kind=C_intptr_T), value :: a, b, c 275 integer(kind=C_intptr_T), value :: handle 276 277 end subroutine cublas_dgemm_c 278 end interface 279 280 interface 281 subroutine cublas_sgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) & 282 bind(C,name='cublasSgemm_elpa_wrapper') 283 284 use iso_c_binding 285 286 implicit none 287 character(1,C_CHAR),value :: cta, ctb 288 integer(kind=C_INT),value :: m,n,k 289 integer(kind=C_INT), intent(in), value :: lda,ldb,ldc 290 real(kind=C_FLOAT),value :: alpha,beta 291 integer(kind=C_intptr_T), value :: a, b, c 292 integer(kind=C_intptr_T), value :: handle 293 294 end subroutine cublas_sgemm_c 295 end interface 296 297 interface 298 subroutine cublas_dtrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) & 299 bind(C,name='cublasDtrmm_elpa_wrapper') 300 301 use iso_c_binding 302 303 implicit none 304 character(1,C_CHAR),value :: side, uplo, trans, diag 305 integer(kind=C_INT),value :: m,n 306 integer(kind=C_INT), intent(in), value :: lda,ldb 307 real(kind=C_DOUBLE), value :: alpha 308 integer(kind=C_intptr_T), value :: a, b 309 integer(kind=C_intptr_T), value :: handle 310 311 end subroutine cublas_dtrmm_c 312 end interface 313 314 interface 315 subroutine cublas_strmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) & 316 bind(C,name='cublasStrmm_elpa_wrapper') 317 318 use iso_c_binding 319 320 implicit none 321 character(1,C_CHAR),value :: side, uplo, trans, diag 322 integer(kind=C_INT),value :: m,n 323 integer(kind=C_INT), intent(in), value :: lda,ldb 324 real(kind=C_FLOAT), value :: alpha 325 integer(kind=C_intptr_T), value :: a, b 326 integer(kind=C_intptr_T), value :: handle 327 328 end subroutine cublas_strmm_c 329 end interface 330 331 interface 332 subroutine cublas_zgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) & 333 bind(C,name='cublasZgemm_elpa_wrapper') 334 335 use iso_c_binding 336 337 implicit none 338 character(1,C_CHAR),value :: cta, ctb 339 integer(kind=C_INT),value :: m,n,k 340 integer(kind=C_INT), intent(in), value :: lda,ldb,ldc 341 complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta 342 integer(kind=C_intptr_T), value :: a, b, c 343 integer(kind=C_intptr_T), value :: handle 344 345 end subroutine cublas_zgemm_c 346 end interface 347 348 interface 349 subroutine cublas_cgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) & 350 bind(C,name='cublasCgemm_elpa_wrapper') 351 352 use iso_c_binding 353 354 implicit none 355 character(1,C_CHAR),value :: cta, ctb 356 integer(kind=C_INT),value :: m,n,k 357 integer(kind=C_INT), intent(in), value :: lda,ldb,ldc 358 complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta 359 integer(kind=C_intptr_T), value :: a, b, c 360 integer(kind=C_intptr_T), value :: handle 361 362 end subroutine cublas_cgemm_c 363 end interface 364 365 interface 366 subroutine cublas_ztrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) & 367 bind(C,name='cublasZtrmm_elpa_wrapper') 368 369 use iso_c_binding 370 371 implicit none 372 character(1,C_CHAR),value :: side, uplo, trans, diag 373 integer(kind=C_INT),value :: m,n 374 integer(kind=C_INT), intent(in), value :: lda,ldb 375 complex(kind=C_DOUBLE_COMPLEX), value :: alpha 376 integer(kind=C_intptr_T), value :: a, b 377 integer(kind=C_intptr_T), value :: handle 378 379 end subroutine cublas_ztrmm_c 380 end interface 381 382 interface 383 subroutine cublas_ctrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) & 384 bind(C,name='cublasCtrmm_elpa_wrapper') 385 386 use iso_c_binding 387 388 implicit none 389 character(1,C_CHAR),value :: side, uplo, trans, diag 390 integer(kind=C_INT),value :: m,n 391 integer(kind=C_INT), intent(in), value :: lda,ldb 392 complex(kind=C_FLOAT_COMPLEX), value :: alpha 393 integer(kind=C_intptr_T), value :: a, b 394 integer(kind=C_intptr_T), value :: handle 395 396 end subroutine cublas_ctrmm_c 397 end interface 398 399 interface 400 subroutine cublas_dgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) & 401 bind(C,name='cublasDgemv_elpa_wrapper') 402 403 use iso_c_binding 404 405 implicit none 406 character(1,C_CHAR),value :: cta 407 integer(kind=C_INT),value :: m,n 408 integer(kind=C_INT), intent(in), value :: lda,incx,incy 409 real(kind=C_DOUBLE),value :: alpha,beta 410 integer(kind=C_intptr_T), value :: a, x, y 411 integer(kind=C_intptr_T), value :: handle 412 413 end subroutine cublas_dgemv_c 414 end interface 415 416 interface 417 subroutine cublas_sgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) & 418 bind(C,name='cublasSgemv_elpa_wrapper') 419 420 use iso_c_binding 421 422 implicit none 423 character(1,C_CHAR),value :: cta 424 integer(kind=C_INT),value :: m,n 425 integer(kind=C_INT), intent(in), value :: lda,incx,incy 426 real(kind=C_FLOAT),value :: alpha,beta 427 integer(kind=C_intptr_T), value :: a, x, y 428 integer(kind=C_intptr_T), value :: handle 429 430 end subroutine cublas_sgemv_c 431 end interface 432 433 interface 434 subroutine cublas_zgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) & 435 bind(C,name='cublasZgemv_elpa_wrapper') 436 437 use iso_c_binding 438 439 implicit none 440 character(1,C_CHAR),value :: cta 441 integer(kind=C_INT),value :: m,n 442 integer(kind=C_INT), intent(in), value :: lda,incx,incy 443 complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta 444 integer(kind=C_intptr_T), value :: a, x, y 445 integer(kind=C_intptr_T), value :: handle 446 447 end subroutine cublas_zgemv_c 448 end interface 449 450 interface 451 subroutine cublas_cgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) & 452 bind(C,name='cublasCgemv_elpa_wrapper') 453 454 use iso_c_binding 455 456 implicit none 457 character(1,C_CHAR),value :: cta 458 integer(kind=C_INT),value :: m,n 459 integer(kind=C_INT), intent(in), value :: lda,incx,incy 460 complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta 461 integer(kind=C_intptr_T), value :: a, x, y 462 integer(kind=C_intptr_T), value :: handle 463 464 end subroutine cublas_cgemv_c 465 end interface 466 467 468 contains 469 470 ! functions to set and query the CUDA devices 471 472 function cublas_create(handle) result(success) 473 use iso_c_binding 474 implicit none 475 476 integer(kind=C_intptr_t) :: handle 477 logical :: success 478#ifdef WITH_GPU_VERSION 479 success = cublas_create_c(handle) /= 0 480#else 481 success = .true. 482#endif 483 end function 484 485 function cublas_destroy(handle) result(success) 486 use iso_c_binding 487 implicit none 488 489 integer(kind=C_intptr_t) :: handle 490 logical :: success 491#ifdef WITH_GPU_VERSION 492 success = cublas_destroy_c(handle) /= 0 493#else 494 success = .true. 495#endif 496 end function 497 498 function cuda_threadsynchronize() result(success) 499 use iso_c_binding 500 501 implicit none 502 503 logical :: success 504#ifdef WITH_GPU_VERSION 505 success = cuda_threadsynchronize_c() /= 0 506#else 507 success = .true. 508#endif 509 end function cuda_threadsynchronize 510 511 function cuda_setdevice(n) result(success) 512 use iso_c_binding 513 514 implicit none 515 516 integer(kind=ik), intent(in) :: n 517 logical :: success 518#ifdef WITH_GPU_VERSION 519 success = cuda_setdevice_c(int(n,kind=c_int)) /= 0 520#else 521 success = .true. 522#endif 523 end function cuda_setdevice 524 525 function cuda_getdevicecount(n) result(success) 526 use iso_c_binding 527 implicit none 528 529 integer(kind=ik) :: n 530 integer(kind=c_int) :: nCasted 531 logical :: success 532#ifdef WITH_GPU_VERSION 533 success = cuda_getdevicecount_c(nCasted) /=0 534 n = int(nCasted) 535#else 536 success = .true. 537 n = 0 538#endif 539 end function cuda_getdevicecount 540 541 function cuda_devicesynchronize()result(success) 542 543 use iso_c_binding 544 545 implicit none 546 logical :: success 547#ifdef WITH_GPU_VERSION 548 success = cuda_devicesynchronize_c() /=0 549#else 550 success = .true. 551#endif 552 end function cuda_devicesynchronize 553 ! functions to allocate and free memory 554 555 function cuda_malloc(a, width_height) result(success) 556 557 use iso_c_binding 558 implicit none 559 560 integer(kind=C_intptr_t) :: a 561 integer(kind=c_intptr_t), intent(in) :: width_height 562 logical :: success 563#ifdef WITH_GPU_VERSION 564 success = cuda_malloc_c(a, width_height) /= 0 565#else 566 success = .true. 567#endif 568 end function 569 570 function cuda_free(a) result(success) 571 572 use iso_c_binding 573 574 implicit none 575 integer(kind=C_intptr_T) :: a 576 logical :: success 577#ifdef WITH_GPU_VERSION 578 success = cuda_free_c(a) /= 0 579#else 580 success = .true. 581#endif 582 end function cuda_free 583 584 function cuda_memset(a, val, size) result(success) 585 586 use iso_c_binding 587 588 implicit none 589 590 integer(kind=c_intptr_t) :: a 591 integer(kind=ik) :: val 592 integer(kind=c_intptr_t), intent(in) :: size 593 integer(kind=C_INT) :: istat 594 595 logical :: success 596#ifdef WITH_GPU_VERSION 597 success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0 598#else 599 success = .true. 600#endif 601 end function cuda_memset 602 603 ! functions to memcopy CUDA memory 604 605 function cuda_memcpyDeviceToDevice() result(flag) 606 use iso_c_binding 607 implicit none 608 integer(kind=ik) :: flag 609#ifdef WITH_GPU_VERSION 610 flag = int(cuda_memcpyDeviceToDevice_c()) 611#else 612 flag = 0 613#endif 614 end function 615 616 function cuda_memcpyHostToDevice() result(flag) 617 use iso_c_binding 618 use precision 619 implicit none 620 integer(kind=ik) :: flag 621#ifdef WITH_GPU_VERSION 622 flag = int(cuda_memcpyHostToDevice_c()) 623#else 624 flag = 0 625#endif 626 end function 627 628 function cuda_memcpyDeviceToHost() result(flag) 629 use iso_c_binding 630 use precision 631 implicit none 632 integer(kind=ik) :: flag 633#ifdef WITH_GPU_VERSION 634 flag = int( cuda_memcpyDeviceToHost_c()) 635#else 636 flag = 0 637#endif 638 end function 639 640 function cuda_hostRegisterPortable() result(flag) 641 use iso_c_binding 642 use precision 643 implicit none 644 integer(kind=ik) :: flag 645#ifdef WITH_GPU_VERSION 646 flag = int(cuda_hostRegisterPortable_c()) 647#else 648 flag = 0 649#endif 650 end function 651 652 function cuda_hostRegisterMapped() result(flag) 653 use iso_c_binding 654 use precision 655 implicit none 656 integer(kind=ik) :: flag 657#ifdef WITH_GPU_VERSION 658 flag = int(cuda_hostRegisterMapped_c()) 659#else 660 flag = 0 661#endif 662 end function 663 664 function cuda_memcpy(dst, src, size, dir) result(success) 665 666 use iso_c_binding 667 668 implicit none 669 integer(kind=C_intptr_t) :: dst 670 integer(kind=C_intptr_t) :: src 671 integer(kind=c_intptr_t), intent(in) :: size 672 integer(kind=C_INT), intent(in) :: dir 673 logical :: success 674 675#ifdef WITH_GPU_VERSION 676 success = cuda_memcpy_c(dst, src, size, dir) /= 0 677#else 678 success = .true. 679#endif 680 end function 681 682 function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success) 683 684 use iso_c_binding 685 686 implicit none 687 688 integer(kind=C_intptr_T) :: dst 689 integer(kind=c_intptr_t), intent(in) :: dpitch 690 integer(kind=C_intptr_T) :: src 691 integer(kind=c_intptr_t), intent(in) :: spitch 692 integer(kind=c_intptr_t), intent(in) :: width 693 integer(kind=c_intptr_t), intent(in) :: height 694 integer(kind=C_INT), intent(in) :: dir 695 logical :: success 696#ifdef WITH_GPU_VERSION 697 success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0 698#else 699 success = .true. 700#endif 701 end function cuda_memcpy2d 702 703 ! cuBLAS 704 subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 705 use iso_c_binding 706 707 implicit none 708 character(1,C_CHAR),value :: cta, ctb 709 integer(kind=C_INT) :: m,n,k 710 integer(kind=C_INT), intent(in) :: lda,ldb,ldc 711 real(kind=C_DOUBLE) :: alpha,beta 712 integer(kind=C_intptr_T) :: a, b, c 713#ifdef WITH_GPU_VERSION 714 call cublas_dgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 715#endif 716 end subroutine cublas_dgemm 717 718 subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 719 use iso_c_binding 720 721 implicit none 722 character(1,C_CHAR),value :: cta, ctb 723 integer(kind=C_INT) :: m,n,k 724 integer(kind=C_INT), intent(in) :: lda,ldb,ldc 725 real(kind=C_FLOAT) :: alpha,beta 726 integer(kind=C_intptr_T) :: a, b, c 727#ifdef WITH_GPU_VERSION 728 call cublas_sgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) 729#endif 730 end subroutine cublas_sgemm 731 732 subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 733 734 use iso_c_binding 735 736 implicit none 737 character(1,C_CHAR),value :: side, uplo, trans, diag 738 integer(kind=C_INT) :: m,n 739 integer(kind=C_INT), intent(in) :: lda,ldb 740 real(kind=C_DOUBLE) :: alpha 741 integer(kind=C_intptr_T) :: a, b 742#ifdef WITH_GPU_VERSION 743 call cublas_dtrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 744#endif 745 end subroutine cublas_dtrmm 746 747 subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 748 749 use iso_c_binding 750 751 implicit none 752 character(1,C_CHAR),value :: side, uplo, trans, diag 753 integer(kind=C_INT) :: m,n 754 integer(kind=C_INT), intent(in) :: lda,ldb 755 real(kind=C_FLOAT) :: alpha 756 integer(kind=C_intptr_T) :: a, b 757#ifdef WITH_GPU_VERSION 758 call cublas_strmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 759#endif 760 end subroutine cublas_strmm 761 762 subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) 763 764 use iso_c_binding 765 766 implicit none 767 character(1,C_CHAR),value :: cta, ctb 768 integer(kind=C_INT) :: m,n,k 769 integer(kind=C_INT), intent(in) :: lda,ldb,ldc 770 complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta 771 integer(kind=C_intptr_T) :: a, b, c 772#ifdef WITH_GPU_VERSION 773 call cublas_zgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) 774#endif 775 end subroutine cublas_zgemm 776 777 subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) 778 779 use iso_c_binding 780 781 implicit none 782 character(1,C_CHAR),value :: cta, ctb 783 integer(kind=C_INT) :: m,n,k 784 integer(kind=C_INT), intent(in) :: lda,ldb,ldc 785 complex(kind=C_FLOAT_COMPLEX) :: alpha,beta 786 integer(kind=C_intptr_T) :: a, b, c 787#ifdef WITH_GPU_VERSION 788 call cublas_cgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) 789#endif 790 end subroutine cublas_cgemm 791 792 subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 793 794 use iso_c_binding 795 796 implicit none 797 character(1,C_CHAR),value :: side, uplo, trans, diag 798 integer(kind=C_INT) :: m,n 799 integer(kind=C_INT), intent(in) :: lda,ldb 800 complex(kind=C_DOUBLE_COMPLEX) :: alpha 801 integer(kind=C_intptr_T) :: a, b 802#ifdef WITH_GPU_VERSION 803 call cublas_ztrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 804#endif 805 end subroutine cublas_ztrmm 806 807 subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 808 809 use iso_c_binding 810 811 implicit none 812 character(1,C_CHAR),value :: side, uplo, trans, diag 813 integer(kind=C_INT) :: m,n 814 integer(kind=C_INT), intent(in) :: lda,ldb 815 complex(kind=C_FLOAT_COMPLEX) :: alpha 816 integer(kind=C_intptr_T) :: a, b 817#ifdef WITH_GPU_VERSION 818 call cublas_ctrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) 819#endif 820 end subroutine cublas_ctrmm 821 822 subroutine cublas_dgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 823 use iso_c_binding 824 825 implicit none 826 character(1,C_CHAR),value :: cta 827 integer(kind=C_INT) :: m,n 828 integer(kind=C_INT), intent(in) :: lda,incx,incy 829 real(kind=C_DOUBLE) :: alpha,beta 830 integer(kind=C_intptr_T) :: a, x, y 831#ifdef WITH_GPU_VERSION 832 call cublas_dgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 833#endif 834 end subroutine cublas_dgemv 835 836 subroutine cublas_sgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 837 use iso_c_binding 838 839 implicit none 840 character(1,C_CHAR),value :: cta 841 integer(kind=C_INT) :: m,n 842 integer(kind=C_INT), intent(in) :: lda,incx,incy 843 real(kind=C_FLOAT) :: alpha,beta 844 integer(kind=C_intptr_T) :: a, x, y 845#ifdef WITH_GPU_VERSION 846 call cublas_sgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 847#endif 848 end subroutine cublas_sgemv 849 850 subroutine cublas_zgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 851 use iso_c_binding 852 853 implicit none 854 character(1,C_CHAR),value :: cta 855 integer(kind=C_INT) :: m,n 856 integer(kind=C_INT), intent(in) :: lda,incx,incy 857 complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta 858 integer(kind=C_intptr_T) :: a, x, y 859#ifdef WITH_GPU_VERSION 860 call cublas_zgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 861#endif 862 end subroutine cublas_zgemv 863 864 subroutine cublas_cgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 865 use iso_c_binding 866 867 implicit none 868 character(1,C_CHAR),value :: cta 869 integer(kind=C_INT) :: m,n 870 integer(kind=C_INT), intent(in) :: lda,incx,incy 871 complex(kind=C_FLOAT_COMPLEX) :: alpha,beta 872 integer(kind=C_intptr_T) :: a, x, y 873#ifdef WITH_GPU_VERSION 874 call cublas_cgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) 875#endif 876 end subroutine cublas_cgemv 877 878 879! subroutine cublas_dsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy) 880! use iso_c_binding 881! 882! implicit none 883! character(1,C_CHAR),value :: cta 884! integer(kind=C_INT) :: n 885! integer(kind=C_INT), intent(in) :: lda,incx,incy 886! real(kind=C_DOUBLE) :: alpha,beta 887! integer(kind=C_intptr_T) :: a, x, y 888! #ifdef WITH_GPU_VERSION 889! call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) 890! #endif 891! end subroutine cublas_dsymv 892! 893! subroutine cublas_ssymv(cta, n, alpha, a, lda, x, incx, beta, y, incy) 894! use iso_c_binding 895! 896! implicit none 897! character(1,C_CHAR),value :: cta 898! integer(kind=C_INT) :: n 899! integer(kind=C_INT), intent(in) :: lda,incx,incy 900! real(kind=C_FLOAT) :: alpha,beta 901! integer(kind=C_intptr_T) :: a, x, y 902! #ifdef WITH_GPU_VERSION 903! call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) 904! #endif 905! end subroutine cublas_ssymv 906! 907! subroutine cublas_zsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy) 908! use iso_c_binding 909! 910! implicit none 911! character(1,C_CHAR),value :: cta 912! integer(kind=C_INT) :: n 913! integer(kind=C_INT), intent(in) :: lda,incx,incy 914! complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta 915! integer(kind=C_intptr_T) :: a, x, y 916! #ifdef WITH_GPU_VERSION 917! ! call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) 918! #endif 919! end subroutine cublas_zsymv 920! 921! subroutine cublas_csymv(cta, n, alpha, a, lda, x, incx, beta, y, incy) 922! use iso_c_binding 923! 924! implicit none 925! character(1,C_CHAR),value :: cta 926! integer(kind=C_INT) :: n 927! integer(kind=C_INT), intent(in) :: lda,incx,incy 928! complex(kind=C_FLOAT_COMPLEX) :: alpha,beta 929! integer(kind=C_intptr_T) :: a, x, y 930! #ifdef WITH_GPU_VERSION 931! ! call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) 932! #endif 933! end subroutine cublas_csymv 934 935 936end module cuda_functions 937