1!    This file is part of ELPA.
2!
3!    The ELPA library was originally created by the ELPA consortium,
4!    consisting of the following organizations:
5!
6!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
7!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
8!      Informatik,
9!    - Technische Universität München, Lehrstuhl für Informatik mit
10!      Schwerpunkt Wissenschaftliches Rechnen ,
11!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
14!      and
15!    - IBM Deutschland GmbH
16!
17!
18!    More information can be found here:
19!    http://elpa.rzg.mpg.de/
20!
21!    ELPA is free software: you can redistribute it and/or modify
22!    it under the terms of the version 3 of the license of the
23!    GNU Lesser General Public License as published by the Free
24!    Software Foundation.
25!
26!    ELPA is distributed in the hope that it will be useful,
27!    but WITHOUT ANY WARRANTY; without even the implied warranty of
28!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29!    GNU Lesser General Public License for more details.
30!
31!    You should have received a copy of the GNU Lesser General Public License
32!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
33!
34!    ELPA reflects a substantial effort on the part of the original
35!    ELPA consortium, and we ask you to respect the spirit of the
36!    license that we chose: i.e., please contribute any changes you
37!    may have back to the original ELPA library distribution, and keep
38!    any derivatives of ELPA under the same license that we chose for
39!    the original distribution, the GNU Lesser General Public License.
40!
41! This file was written by A. Marek, MPCDF
42
43
44#include "config-f90.h"
45module cuda_functions
46  use iso_c_binding
47  use precision
48  implicit none
49
50  public
51
52  integer(kind=ik) :: cudaMemcpyHostToDevice
53  integer(kind=ik) :: cudaMemcpyDeviceToHost
54  integer(kind=ik) :: cudaHostRegisterPortable
55  integer(kind=ik) :: cudaHostRegisterMapped
56  integer(kind=ik) :: cudaMemcpyDeviceToDevice
57
58  ! TODO global variable, has to be changed
59  integer(kind=C_intptr_T) :: cublasHandle = -1
60
61  integer(kind=c_intptr_t), parameter :: size_of_double_real    = 8_rk8
62#ifdef WANT_SINGLE_PRECISION_REAL
63  integer(kind=c_intptr_t), parameter :: size_of_single_real    = 4_rk4
64#endif
65
66  integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8
67#ifdef WANT_SINGLE_PRECISION_COMPLEX
68  integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4
69#endif
70
71  ! functions to set and query the CUDA devices
72  interface
73    function cublas_create_c(handle) result(istat) &
74             bind(C, name="cublasCreateFromC")
75      use iso_c_binding
76      implicit none
77      integer(kind=C_intptr_T) :: handle
78      integer(kind=C_INT)  :: istat
79    end function cublas_create_c
80  end interface
81
82  interface
83    function cublas_destroy_c(handle) result(istat) &
84             bind(C, name="cublasDestroyFromC")
85      use iso_c_binding
86      implicit none
87      integer(kind=C_intptr_T) :: handle
88      integer(kind=C_INT)  :: istat
89    end function cublas_destroy_c
90  end interface
91
92  interface
93    function cuda_threadsynchronize_c() result(istat) &
94             bind(C,name="cudaThreadSynchronizeFromC")
95      use iso_c_binding
96      implicit none
97      integer(kind=C_INT)  :: istat
98    end function cuda_threadsynchronize_c
99  end interface
100
101  interface
102    function cuda_setdevice_c(n) result(istat) &
103             bind(C, name="cudaSetDeviceFromC")
104
105      use iso_c_binding
106      implicit none
107      integer(kind=C_INT), value    :: n
108      integer(kind=C_INT)           :: istat
109    end function cuda_setdevice_c
110  end interface
111
112  interface
113    function cuda_getdevicecount_c(n) result(istat) &
114             bind(C, name="cudaGetDeviceCountFromC")
115      use iso_c_binding
116      implicit none
117      integer(kind=C_INT), intent(out) :: n
118      integer(kind=C_INT)              :: istat
119    end function cuda_getdevicecount_c
120  end interface
121
122  interface
123    function cuda_devicesynchronize_c()result(istat) &
124             bind(C,name='cudaDeviceSynchronizeFromC')
125
126      use iso_c_binding
127
128      implicit none
129      integer(kind=C_INT)                       :: istat
130
131    end function cuda_devicesynchronize_c
132  end interface
133
134
135  ! functions to copy CUDA memory
136  interface
137    function cuda_memcpyDeviceToDevice_c() result(flag) &
138             bind(C, name="cudaMemcpyDeviceToDeviceFromC")
139      use iso_c_binding
140      implicit none
141      integer(kind=c_int) :: flag
142    end function
143  end interface
144
145  interface
146    function cuda_memcpyHostToDevice_c() result(flag) &
147             bind(C, name="cudaMemcpyHostToDeviceFromC")
148      use iso_c_binding
149      implicit none
150      integer(kind=c_int) :: flag
151    end function
152  end interface
153
154  interface
155    function cuda_memcpyDeviceToHost_c() result(flag) &
156             bind(C, name="cudaMemcpyDeviceToHostFromC")
157      use iso_c_binding
158      implicit none
159      integer(kind=c_int) :: flag
160    end function
161  end interface
162
163  interface
164    function cuda_hostRegisterPortable_c() result(flag) &
165             bind(C, name="cudaHostRegisterPortableFromC")
166      use iso_c_binding
167      implicit none
168      integer(kind=c_int) :: flag
169    end function
170  end interface
171
172  interface
173    function cuda_hostRegisterMapped_c() result(flag) &
174             bind(C, name="cudaHostRegisterMappedFromC")
175      use iso_c_binding
176      implicit none
177      integer(kind=c_int) :: flag
178    end function
179  end interface
180
181  interface
182    function cuda_memcpy_c(dst, src, size, dir) result(istat) &
183             bind(C, name="cudaMemcpyFromC")
184
185      use iso_c_binding
186
187      implicit none
188      integer(kind=C_intptr_t), value              :: dst
189      integer(kind=C_intptr_t), value              :: src
190      integer(kind=c_intptr_t), intent(in), value    :: size
191      integer(kind=C_INT), intent(in), value       :: dir
192      integer(kind=C_INT)                          :: istat
193
194    end function cuda_memcpy_c
195  end interface
196
197  interface
198    function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
199             bind(C, name="cudaMemcpy2dFromC")
200
201      use iso_c_binding
202
203      implicit none
204
205      integer(kind=C_intptr_T), value                :: dst
206      integer(kind=c_intptr_t), intent(in), value    :: dpitch
207      integer(kind=C_intptr_T), value                :: src
208      integer(kind=c_intptr_t), intent(in), value    :: spitch
209      integer(kind=c_intptr_t), intent(in), value    :: width
210      integer(kind=c_intptr_t), intent(in), value    :: height
211      integer(kind=C_INT), intent(in), value         :: dir
212      integer(kind=C_INT)                            :: istat
213
214    end function cuda_memcpy2d_c
215  end interface
216
217  ! functions to allocate and free CUDA memory
218
219  interface
220    function cuda_free_c(a) result(istat) &
221             bind(C, name="cudaFreeFromC")
222
223      use iso_c_binding
224
225      implicit none
226      integer(kind=C_intptr_T), value  :: a
227      integer(kind=C_INT)              :: istat
228
229    end function cuda_free_c
230  end interface
231
232  interface
233    function cuda_malloc_c(a, width_height) result(istat) &
234             bind(C, name="cudaMallocFromC")
235
236      use iso_c_binding
237      implicit none
238
239      integer(kind=C_intptr_T)                    :: a
240      integer(kind=c_intptr_t), intent(in), value   :: width_height
241      integer(kind=C_INT)                         :: istat
242
243    end function cuda_malloc_c
244  end interface
245
246  interface
247    function cuda_memset_c(a, val, size) result(istat) &
248             bind(C, name="cudaMemsetFromC")
249
250      use iso_c_binding
251
252      implicit none
253
254      integer(kind=C_intptr_T), value            :: a
255      integer(kind=C_INT), value                 :: val
256      integer(kind=c_intptr_t), intent(in), value  :: size
257      integer(kind=C_INT)                        :: istat
258
259    end function cuda_memset_c
260  end interface
261
262  ! cuBLAS
263  interface
264    subroutine cublas_dgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
265                              bind(C,name='cublasDgemm_elpa_wrapper')
266
267      use iso_c_binding
268
269      implicit none
270      character(1,C_CHAR),value               :: cta, ctb
271      integer(kind=C_INT),value               :: m,n,k
272      integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
273      real(kind=C_DOUBLE),value               :: alpha,beta
274      integer(kind=C_intptr_T), value         :: a, b, c
275      integer(kind=C_intptr_T), value         :: handle
276
277    end subroutine cublas_dgemm_c
278  end interface
279
280  interface
281    subroutine cublas_sgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
282                              bind(C,name='cublasSgemm_elpa_wrapper')
283
284      use iso_c_binding
285
286      implicit none
287      character(1,C_CHAR),value               :: cta, ctb
288      integer(kind=C_INT),value               :: m,n,k
289      integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
290      real(kind=C_FLOAT),value                :: alpha,beta
291      integer(kind=C_intptr_T), value         :: a, b, c
292      integer(kind=C_intptr_T), value         :: handle
293
294    end subroutine cublas_sgemm_c
295  end interface
296
297  interface
298    subroutine cublas_dtrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
299                              bind(C,name='cublasDtrmm_elpa_wrapper')
300
301      use iso_c_binding
302
303      implicit none
304      character(1,C_CHAR),value               :: side, uplo, trans, diag
305      integer(kind=C_INT),value               :: m,n
306      integer(kind=C_INT), intent(in), value  :: lda,ldb
307      real(kind=C_DOUBLE), value              :: alpha
308      integer(kind=C_intptr_T), value         :: a, b
309      integer(kind=C_intptr_T), value         :: handle
310
311    end subroutine cublas_dtrmm_c
312  end interface
313
314  interface
315    subroutine cublas_strmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
316                              bind(C,name='cublasStrmm_elpa_wrapper')
317
318      use iso_c_binding
319
320      implicit none
321      character(1,C_CHAR),value               :: side, uplo, trans, diag
322      integer(kind=C_INT),value               :: m,n
323      integer(kind=C_INT), intent(in), value  :: lda,ldb
324      real(kind=C_FLOAT), value               :: alpha
325      integer(kind=C_intptr_T), value         :: a, b
326      integer(kind=C_intptr_T), value         :: handle
327
328    end subroutine cublas_strmm_c
329  end interface
330
331  interface
332    subroutine cublas_zgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
333                              bind(C,name='cublasZgemm_elpa_wrapper')
334
335      use iso_c_binding
336
337      implicit none
338      character(1,C_CHAR),value              :: cta, ctb
339      integer(kind=C_INT),value              :: m,n,k
340      integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
341      complex(kind=C_DOUBLE_COMPLEX),value           :: alpha,beta
342      integer(kind=C_intptr_T), value        :: a, b, c
343      integer(kind=C_intptr_T), value         :: handle
344
345    end subroutine cublas_zgemm_c
346  end interface
347
348  interface
349    subroutine cublas_cgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
350                              bind(C,name='cublasCgemm_elpa_wrapper')
351
352      use iso_c_binding
353
354      implicit none
355      character(1,C_CHAR),value              :: cta, ctb
356      integer(kind=C_INT),value              :: m,n,k
357      integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
358      complex(kind=C_FLOAT_COMPLEX),value            :: alpha,beta
359      integer(kind=C_intptr_T), value        :: a, b, c
360      integer(kind=C_intptr_T), value         :: handle
361
362    end subroutine cublas_cgemm_c
363  end interface
364
365  interface
366    subroutine cublas_ztrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
367                              bind(C,name='cublasZtrmm_elpa_wrapper')
368
369      use iso_c_binding
370
371      implicit none
372      character(1,C_CHAR),value              :: side, uplo, trans, diag
373      integer(kind=C_INT),value              :: m,n
374      integer(kind=C_INT), intent(in), value :: lda,ldb
375      complex(kind=C_DOUBLE_COMPLEX), value          :: alpha
376      integer(kind=C_intptr_T), value        :: a, b
377      integer(kind=C_intptr_T), value         :: handle
378
379    end subroutine cublas_ztrmm_c
380  end interface
381
382  interface
383    subroutine cublas_ctrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
384                              bind(C,name='cublasCtrmm_elpa_wrapper')
385
386      use iso_c_binding
387
388      implicit none
389      character(1,C_CHAR),value              :: side, uplo, trans, diag
390      integer(kind=C_INT),value              :: m,n
391      integer(kind=C_INT), intent(in), value :: lda,ldb
392      complex(kind=C_FLOAT_COMPLEX), value           :: alpha
393      integer(kind=C_intptr_T), value        :: a, b
394      integer(kind=C_intptr_T), value         :: handle
395
396    end subroutine cublas_ctrmm_c
397  end interface
398
399  interface
400    subroutine cublas_dgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
401                              bind(C,name='cublasDgemv_elpa_wrapper')
402
403      use iso_c_binding
404
405      implicit none
406      character(1,C_CHAR),value               :: cta
407      integer(kind=C_INT),value               :: m,n
408      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
409      real(kind=C_DOUBLE),value               :: alpha,beta
410      integer(kind=C_intptr_T), value         :: a, x, y
411      integer(kind=C_intptr_T), value         :: handle
412
413    end subroutine cublas_dgemv_c
414  end interface
415
416  interface
417    subroutine cublas_sgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
418                              bind(C,name='cublasSgemv_elpa_wrapper')
419
420      use iso_c_binding
421
422      implicit none
423      character(1,C_CHAR),value               :: cta
424      integer(kind=C_INT),value               :: m,n
425      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
426      real(kind=C_FLOAT),value                :: alpha,beta
427      integer(kind=C_intptr_T), value         :: a, x, y
428      integer(kind=C_intptr_T), value         :: handle
429
430    end subroutine cublas_sgemv_c
431  end interface
432
433  interface
434    subroutine cublas_zgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
435                              bind(C,name='cublasZgemv_elpa_wrapper')
436
437      use iso_c_binding
438
439      implicit none
440      character(1,C_CHAR),value               :: cta
441      integer(kind=C_INT),value               :: m,n
442      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
443      complex(kind=C_DOUBLE_COMPLEX),value               :: alpha,beta
444      integer(kind=C_intptr_T), value         :: a, x, y
445      integer(kind=C_intptr_T), value         :: handle
446
447    end subroutine cublas_zgemv_c
448  end interface
449
450  interface
451    subroutine cublas_cgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
452                              bind(C,name='cublasCgemv_elpa_wrapper')
453
454      use iso_c_binding
455
456      implicit none
457      character(1,C_CHAR),value               :: cta
458      integer(kind=C_INT),value               :: m,n
459      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
460      complex(kind=C_FLOAT_COMPLEX),value                :: alpha,beta
461      integer(kind=C_intptr_T), value         :: a, x, y
462      integer(kind=C_intptr_T), value         :: handle
463
464    end subroutine cublas_cgemv_c
465  end interface
466
467
468  contains
469
470    ! functions to set and query the CUDA devices
471
472   function cublas_create(handle) result(success)
473     use iso_c_binding
474     implicit none
475
476     integer(kind=C_intptr_t)                  :: handle
477     logical                                   :: success
478#ifdef WITH_GPU_VERSION
479     success = cublas_create_c(handle) /= 0
480#else
481     success = .true.
482#endif
483   end function
484
485   function cublas_destroy(handle) result(success)
486     use iso_c_binding
487     implicit none
488
489     integer(kind=C_intptr_t)                  :: handle
490     logical                                   :: success
491#ifdef WITH_GPU_VERSION
492     success = cublas_destroy_c(handle) /= 0
493#else
494     success = .true.
495#endif
496   end function
497
498    function cuda_threadsynchronize() result(success)
499      use iso_c_binding
500
501      implicit none
502
503      logical :: success
504#ifdef WITH_GPU_VERSION
505      success = cuda_threadsynchronize_c() /= 0
506#else
507      success = .true.
508#endif
509    end function cuda_threadsynchronize
510
511    function cuda_setdevice(n) result(success)
512      use iso_c_binding
513
514      implicit none
515
516      integer(kind=ik), intent(in)  :: n
517      logical                       :: success
518#ifdef WITH_GPU_VERSION
519      success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
520#else
521      success = .true.
522#endif
523    end function cuda_setdevice
524
525    function cuda_getdevicecount(n) result(success)
526      use iso_c_binding
527      implicit none
528
529      integer(kind=ik)     :: n
530      integer(kind=c_int)  :: nCasted
531      logical              :: success
532#ifdef WITH_GPU_VERSION
533      success = cuda_getdevicecount_c(nCasted) /=0
534      n = int(nCasted)
535#else
536      success = .true.
537      n = 0
538#endif
539    end function cuda_getdevicecount
540
541    function cuda_devicesynchronize()result(success)
542
543      use iso_c_binding
544
545      implicit none
546      logical :: success
547#ifdef WITH_GPU_VERSION
548      success = cuda_devicesynchronize_c() /=0
549#else
550      success = .true.
551#endif
552    end function cuda_devicesynchronize
553    ! functions to allocate and free memory
554
555    function cuda_malloc(a, width_height) result(success)
556
557     use iso_c_binding
558     implicit none
559
560     integer(kind=C_intptr_t)                  :: a
561     integer(kind=c_intptr_t), intent(in)        :: width_height
562     logical                                   :: success
563#ifdef WITH_GPU_VERSION
564     success = cuda_malloc_c(a, width_height) /= 0
565#else
566     success = .true.
567#endif
568   end function
569
570   function cuda_free(a) result(success)
571
572     use iso_c_binding
573
574     implicit none
575     integer(kind=C_intptr_T) :: a
576     logical                  :: success
577#ifdef WITH_GPU_VERSION
578     success = cuda_free_c(a) /= 0
579#else
580     success = .true.
581#endif
582   end function cuda_free
583
584 function cuda_memset(a, val, size) result(success)
585
586   use iso_c_binding
587
588   implicit none
589
590   integer(kind=c_intptr_t)                :: a
591   integer(kind=ik)                        :: val
592   integer(kind=c_intptr_t), intent(in)      :: size
593   integer(kind=C_INT)                     :: istat
594
595   logical :: success
596#ifdef WITH_GPU_VERSION
597   success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0
598#else
599   success = .true.
600#endif
601 end function cuda_memset
602
603 ! functions to memcopy CUDA memory
604
605 function cuda_memcpyDeviceToDevice() result(flag)
606   use iso_c_binding
607   implicit none
608   integer(kind=ik) :: flag
609#ifdef WITH_GPU_VERSION
610   flag = int(cuda_memcpyDeviceToDevice_c())
611#else
612   flag = 0
613#endif
614 end function
615
616 function cuda_memcpyHostToDevice() result(flag)
617   use iso_c_binding
618   use precision
619   implicit none
620   integer(kind=ik) :: flag
621#ifdef WITH_GPU_VERSION
622   flag = int(cuda_memcpyHostToDevice_c())
623#else
624   flag = 0
625#endif
626 end function
627
628 function cuda_memcpyDeviceToHost() result(flag)
629   use iso_c_binding
630   use precision
631   implicit none
632   integer(kind=ik) :: flag
633#ifdef WITH_GPU_VERSION
634   flag = int( cuda_memcpyDeviceToHost_c())
635#else
636   flag = 0
637#endif
638 end function
639
640 function cuda_hostRegisterPortable() result(flag)
641   use iso_c_binding
642   use precision
643   implicit none
644   integer(kind=ik) :: flag
645#ifdef WITH_GPU_VERSION
646   flag = int(cuda_hostRegisterPortable_c())
647#else
648   flag = 0
649#endif
650 end function
651
652 function cuda_hostRegisterMapped() result(flag)
653   use iso_c_binding
654   use precision
655   implicit none
656   integer(kind=ik) :: flag
657#ifdef WITH_GPU_VERSION
658   flag = int(cuda_hostRegisterMapped_c())
659#else
660   flag = 0
661#endif
662 end function
663
664 function cuda_memcpy(dst, src, size, dir) result(success)
665
666      use iso_c_binding
667
668      implicit none
669      integer(kind=C_intptr_t)              :: dst
670      integer(kind=C_intptr_t)              :: src
671      integer(kind=c_intptr_t), intent(in)    :: size
672      integer(kind=C_INT), intent(in)       :: dir
673      logical :: success
674
675#ifdef WITH_GPU_VERSION
676        success = cuda_memcpy_c(dst, src, size, dir) /= 0
677#else
678        success = .true.
679#endif
680    end function
681
682    function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)
683
684      use iso_c_binding
685
686      implicit none
687
688      integer(kind=C_intptr_T)           :: dst
689      integer(kind=c_intptr_t), intent(in) :: dpitch
690      integer(kind=C_intptr_T)           :: src
691      integer(kind=c_intptr_t), intent(in) :: spitch
692      integer(kind=c_intptr_t), intent(in) :: width
693      integer(kind=c_intptr_t), intent(in) :: height
694      integer(kind=C_INT), intent(in)    :: dir
695      logical                            :: success
696#ifdef WITH_GPU_VERSION
697      success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
698#else
699      success = .true.
700#endif
701    end function cuda_memcpy2d
702
703    ! cuBLAS
704    subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
705      use iso_c_binding
706
707      implicit none
708      character(1,C_CHAR),value       :: cta, ctb
709      integer(kind=C_INT)             :: m,n,k
710      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
711      real(kind=C_DOUBLE)             :: alpha,beta
712      integer(kind=C_intptr_T)        :: a, b, c
713#ifdef WITH_GPU_VERSION
714      call cublas_dgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
715#endif
716    end subroutine cublas_dgemm
717
718    subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
719      use iso_c_binding
720
721      implicit none
722      character(1,C_CHAR),value       :: cta, ctb
723      integer(kind=C_INT)             :: m,n,k
724      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
725      real(kind=C_FLOAT)              :: alpha,beta
726      integer(kind=C_intptr_T)        :: a, b, c
727#ifdef WITH_GPU_VERSION
728      call cublas_sgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
729#endif
730    end subroutine cublas_sgemm
731
732    subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
733
734      use iso_c_binding
735
736      implicit none
737      character(1,C_CHAR),value       :: side, uplo, trans, diag
738      integer(kind=C_INT)             :: m,n
739      integer(kind=C_INT), intent(in) :: lda,ldb
740      real(kind=C_DOUBLE)             :: alpha
741      integer(kind=C_intptr_T)        :: a, b
742#ifdef WITH_GPU_VERSION
743      call cublas_dtrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
744#endif
745    end subroutine cublas_dtrmm
746
747    subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
748
749      use iso_c_binding
750
751      implicit none
752      character(1,C_CHAR),value       :: side, uplo, trans, diag
753      integer(kind=C_INT)             :: m,n
754      integer(kind=C_INT), intent(in) :: lda,ldb
755      real(kind=C_FLOAT)              :: alpha
756      integer(kind=C_intptr_T)        :: a, b
757#ifdef WITH_GPU_VERSION
758      call cublas_strmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
759#endif
760    end subroutine cublas_strmm
761
762    subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
763
764      use iso_c_binding
765
766      implicit none
767      character(1,C_CHAR),value       :: cta, ctb
768      integer(kind=C_INT)             :: m,n,k
769      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
770      complex(kind=C_DOUBLE_COMPLEX)          :: alpha,beta
771      integer(kind=C_intptr_T)        :: a, b, c
772#ifdef WITH_GPU_VERSION
773      call cublas_zgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
774#endif
775    end subroutine cublas_zgemm
776
777    subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
778
779      use iso_c_binding
780
781      implicit none
782      character(1,C_CHAR),value       :: cta, ctb
783      integer(kind=C_INT)             :: m,n,k
784      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
785      complex(kind=C_FLOAT_COMPLEX)           :: alpha,beta
786      integer(kind=C_intptr_T)        :: a, b, c
787#ifdef WITH_GPU_VERSION
788      call cublas_cgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
789#endif
790    end subroutine cublas_cgemm
791
792    subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
793
794      use iso_c_binding
795
796      implicit none
797      character(1,C_CHAR),value       :: side, uplo, trans, diag
798      integer(kind=C_INT)             :: m,n
799      integer(kind=C_INT), intent(in) :: lda,ldb
800      complex(kind=C_DOUBLE_COMPLEX)          :: alpha
801      integer(kind=C_intptr_T)        :: a, b
802#ifdef WITH_GPU_VERSION
803      call cublas_ztrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
804#endif
805    end subroutine cublas_ztrmm
806
807    subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
808
809      use iso_c_binding
810
811      implicit none
812      character(1,C_CHAR),value       :: side, uplo, trans, diag
813      integer(kind=C_INT)             :: m,n
814      integer(kind=C_INT), intent(in) :: lda,ldb
815      complex(kind=C_FLOAT_COMPLEX)           :: alpha
816      integer(kind=C_intptr_T)        :: a, b
817#ifdef WITH_GPU_VERSION
818      call cublas_ctrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
819#endif
820    end subroutine cublas_ctrmm
821
822    subroutine cublas_dgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
823      use iso_c_binding
824
825      implicit none
826      character(1,C_CHAR),value       :: cta
827      integer(kind=C_INT)             :: m,n
828      integer(kind=C_INT), intent(in) :: lda,incx,incy
829      real(kind=C_DOUBLE)             :: alpha,beta
830      integer(kind=C_intptr_T)        :: a, x, y
831#ifdef WITH_GPU_VERSION
832      call cublas_dgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
833#endif
834    end subroutine cublas_dgemv
835
836    subroutine cublas_sgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
837      use iso_c_binding
838
839      implicit none
840      character(1,C_CHAR),value       :: cta
841      integer(kind=C_INT)             :: m,n
842      integer(kind=C_INT), intent(in) :: lda,incx,incy
843      real(kind=C_FLOAT)              :: alpha,beta
844      integer(kind=C_intptr_T)        :: a, x, y
845#ifdef WITH_GPU_VERSION
846      call cublas_sgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
847#endif
848    end subroutine cublas_sgemv
849
850    subroutine cublas_zgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
851      use iso_c_binding
852
853      implicit none
854      character(1,C_CHAR),value       :: cta
855      integer(kind=C_INT)             :: m,n
856      integer(kind=C_INT), intent(in) :: lda,incx,incy
857      complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
858      integer(kind=C_intptr_T)        :: a, x, y
859#ifdef WITH_GPU_VERSION
860      call cublas_zgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
861#endif
862    end subroutine cublas_zgemv
863
864    subroutine cublas_cgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
865      use iso_c_binding
866
867      implicit none
868      character(1,C_CHAR),value       :: cta
869      integer(kind=C_INT)             :: m,n
870      integer(kind=C_INT), intent(in) :: lda,incx,incy
871      complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
872      integer(kind=C_intptr_T)        :: a, x, y
873#ifdef WITH_GPU_VERSION
874      call cublas_cgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
875#endif
876    end subroutine cublas_cgemv
877
878
879!     subroutine cublas_dsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
880!       use iso_c_binding
881!
882!       implicit none
883!       character(1,C_CHAR),value       :: cta
884!       integer(kind=C_INT)             :: n
885!       integer(kind=C_INT), intent(in) :: lda,incx,incy
886!       real(kind=C_DOUBLE)             :: alpha,beta
887!       integer(kind=C_intptr_T)        :: a, x, y
888! #ifdef WITH_GPU_VERSION
889!       call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
890! #endif
891!     end subroutine cublas_dsymv
892!
893!     subroutine cublas_ssymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
894!       use iso_c_binding
895!
896!       implicit none
897!       character(1,C_CHAR),value       :: cta
898!       integer(kind=C_INT)             :: n
899!       integer(kind=C_INT), intent(in) :: lda,incx,incy
900!       real(kind=C_FLOAT)              :: alpha,beta
901!       integer(kind=C_intptr_T)        :: a, x, y
902! #ifdef WITH_GPU_VERSION
903!       call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
904! #endif
905!     end subroutine cublas_ssymv
906!
907!     subroutine cublas_zsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
908!       use iso_c_binding
909!
910!       implicit none
911!       character(1,C_CHAR),value       :: cta
912!       integer(kind=C_INT)             :: n
913!       integer(kind=C_INT), intent(in) :: lda,incx,incy
914!       complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
915!       integer(kind=C_intptr_T)        :: a, x, y
916! #ifdef WITH_GPU_VERSION
917! !       call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
918! #endif
919!     end subroutine cublas_zsymv
920!
921!     subroutine cublas_csymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
922!       use iso_c_binding
923!
924!       implicit none
925!       character(1,C_CHAR),value       :: cta
926!       integer(kind=C_INT)             :: n
927!       integer(kind=C_INT), intent(in) :: lda,incx,incy
928!       complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
929!       integer(kind=C_intptr_T)        :: a, x, y
930! #ifdef WITH_GPU_VERSION
931! !       call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
932! #endif
933!     end subroutine cublas_csymv
934
935
936end module cuda_functions
937