1!! Copyright (C) 2016 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include <global.h>
20
21module accel_blas_oct_m
22#ifdef HAVE_OPENCL
23  use clblas
24#endif
25  use accel_oct_m
26  use global_oct_m
27  use iso_c_binding
28  use messages_oct_m
29  use profiling_oct_m
30  use types_oct_m
31
32  implicit none
33
34  private
35
36  public ::                        &
37    daccel_dot,                    &
38    zaccel_dot,                    &
39    daccel_nrm2,                   &
40    zaccel_nrm2,                   &
41    daccel_herk,                   &
42    zaccel_herk,                   &
43    daccel_trsm,                   &
44    zaccel_trsm,                   &
45    daccel_gemm,                   &
46    zaccel_gemm,                   &
47    daccel_gemv,                   &
48    zaccel_gemv
49
50  integer, parameter, public ::                      &
51    CUBLAS_DIAG_NON_UNIT = 0,                        &
52    CUBLAS_DIAG_UNIT     = 1
53
54  integer, parameter, public ::                      &
55    CUBLAS_OP_N = 0,                                 &
56    CUBLAS_OP_T = 1,                                 &
57    CUBLAS_OP_C = 2
58
59  integer, parameter, public ::                      &
60    CUBLAS_FILL_MODE_LOWER = 0,                      &
61    CUBLAS_FILL_MODE_UPPER = 1
62
63  integer, parameter, public ::                      &
64    CUBLAS_SIDE_LEFT  = 0,                           &
65    CUBLAS_SIDE_RIGHT = 1
66
67#ifdef HAVE_OPENCL
68  integer, parameter, public ::                      &
69    ACCEL_BLAS_LEFT  = clblasLeft,                   &
70    ACCEL_BLAS_RIGHT = clblasRight
71
72  integer, parameter, public ::                      &
73    ACCEL_BLAS_LOWER = clblasLower,                  &
74    ACCEL_BLAS_UPPER = clblasUpper
75
76  integer, parameter, public ::                      &
77    ACCEL_BLAS_N = clblasNoTrans,                    &
78    ACCEL_BLAS_T = clblasTrans,                      &
79    ACCEL_BLAS_C = clblasConjTrans
80
81  integer, parameter, public ::                      &
82    ACCEL_BLAS_DIAG_NON_UNIT = clblasNonUnit,        &
83    ACCEL_BLAS_DIAG_UNIT     = clblasUnit
84#else
85  integer, parameter, public ::                      &
86    ACCEL_BLAS_LEFT  = CUBLAS_SIDE_LEFT,             &
87    ACCEL_BLAS_RIGHT = CUBLAS_SIDE_RIGHT
88
89  integer, parameter, public ::                      &
90    ACCEL_BLAS_LOWER = CUBLAS_FILL_MODE_LOWER,       &
91    ACCEL_BLAS_UPPER = CUBLAS_FILL_MODE_UPPER
92
93  integer, parameter, public ::                      &
94    ACCEL_BLAS_N = CUBLAS_OP_N,                      &
95    ACCEL_BLAS_T = CUBLAS_OP_T,                      &
96    ACCEL_BLAS_C = CUBLAS_OP_C
97
98  integer, parameter, public ::                      &
99    ACCEL_BLAS_DIAG_NON_UNIT = CUBLAS_DIAG_NON_UNIT, &
100    ACCEL_BLAS_DIAG_UNIT     = CUBLAS_DIAG_UNIT
101#endif
102
103  ! DOT
104  interface
105    subroutine cuda_blas_ddot(handle, n, x, offx, incx, y, offy, incy, res, offres)
106      use iso_c_binding
107
108      implicit none
109
110      type(c_ptr),  intent(in)    :: handle
111      integer(8),   intent(in)    :: n
112      type(c_ptr),  intent(in)    :: x
113      integer(8),   intent(in)    :: offx
114      integer(8),   intent(in)    :: incx
115      type(c_ptr),  intent(in)    :: y
116      integer(8),   intent(in)    :: offy
117      integer(8),   intent(in)    :: incy
118      type(c_ptr),  intent(inout) :: res
119      integer(8),   intent(in)    :: offres
120    end subroutine cuda_blas_ddot
121
122    subroutine cuda_blas_zdotc(handle, n, x, offx, incx, y, offy, incy, res, offres)
123      use iso_c_binding
124
125      implicit none
126
127      type(c_ptr),  intent(in)    :: handle
128      integer(8),   intent(in)    :: n
129      type(c_ptr),  intent(in)    :: x
130      integer(8),   intent(in)    :: offx
131      integer(8),   intent(in)    :: incx
132      type(c_ptr),  intent(in)    :: y
133      integer(8),   intent(in)    :: offy
134      integer(8),   intent(in)    :: incy
135      type(c_ptr),  intent(inout) :: res
136      integer(8),   intent(in)    :: offres
137    end subroutine cuda_blas_zdotc
138  end interface
139
140  ! NRM2
141  interface
142    subroutine cuda_blas_dnrm2(handle, n, x, offx, incx, res, offres)
143      use iso_c_binding
144
145      implicit none
146
147      type(c_ptr),  intent(in)    :: handle
148      integer(8),   intent(in)    :: n
149      type(c_ptr),  intent(in)    :: x
150      integer(8),   intent(in)    :: offx
151      integer(8),   intent(in)    :: incx
152      type(c_ptr),  intent(inout) :: res
153      integer(8),   intent(in)    :: offres
154    end subroutine cuda_blas_dnrm2
155
156    subroutine cuda_blas_znrm2(handle, n, x, offx, incx, res, offres)
157      use iso_c_binding
158
159      implicit none
160
161      type(c_ptr),  intent(in)    :: handle
162      integer(8),   intent(in)    :: n
163      type(c_ptr),  intent(in)    :: x
164      integer(8),   intent(in)    :: offx
165      integer(8),   intent(in)    :: incx
166      type(c_ptr),  intent(inout) :: res
167      integer(8),   intent(in)    :: offres
168    end subroutine cuda_blas_znrm2
169  end interface
170
171  ! GEMM
172  interface
173    subroutine cuda_blas_dgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)
174      use iso_c_binding
175
176      implicit none
177
178      type(c_ptr),  intent(in)    :: handle
179      integer,      intent(in)    :: transa
180      integer,      intent(in)    :: transb
181      integer(8),   intent(in)    :: m
182      integer(8),   intent(in)    :: n
183      integer(8),   intent(in)    :: k
184      type(c_ptr),  intent(in)    :: alpha
185      type(c_ptr),  intent(in)    :: A
186      integer(8),   intent(in)    :: lda
187      type(c_ptr),  intent(in)    :: B
188      integer(8),   intent(in)    :: ldb
189      type(c_ptr),  intent(in)    :: beta
190      type(c_ptr),  intent(inout) :: C
191      integer(8),   intent(in)    :: ldc
192    end subroutine cuda_blas_dgemm
193
194    subroutine cuda_blas_zgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)
195      use iso_c_binding
196
197      implicit none
198
199      type(c_ptr),  intent(in)    :: handle
200      integer,      intent(in)    :: transa
201      integer,      intent(in)    :: transb
202      integer(8),   intent(in)    :: m
203      integer(8),   intent(in)    :: n
204      integer(8),   intent(in)    :: k
205      type(c_ptr),  intent(in)    :: alpha
206      type(c_ptr),  intent(in)    :: A
207      integer(8),   intent(in)    :: lda
208      type(c_ptr),  intent(in)    :: B
209      integer(8),   intent(in)    :: ldb
210      type(c_ptr),  intent(in)    :: beta
211      type(c_ptr),  intent(inout) :: C
212      integer(8),   intent(in)    :: ldc
213    end subroutine cuda_blas_zgemm
214  end interface
215
216
217  ! GEMV
218  interface
219    subroutine cuda_blas_dgemv(handle, transa, m, n, alpha, A, lda, x, incx, beta, y, incy)
220      use iso_c_binding
221
222      implicit none
223
224      type(c_ptr),  intent(in)    :: handle
225      integer,      intent(in)    :: transa
226      integer(8),   intent(in)    :: m
227      integer(8),   intent(in)    :: n
228      type(c_ptr),  intent(in)    :: alpha
229      type(c_ptr),  intent(in)    :: A
230      integer(8),   intent(in)    :: lda
231      type(c_ptr),  intent(in)    :: x
232      integer(8),   intent(in)    :: incx
233      type(c_ptr),  intent(in)    :: beta
234      type(c_ptr),  intent(inout) :: y
235      integer(8),   intent(in)    :: incy
236    end subroutine cuda_blas_dgemv
237
238    subroutine cuda_blas_zgemv(handle, transa, m, n, alpha, A, lda, x, incx, beta, y, incy)
239      use iso_c_binding
240
241      implicit none
242
243      type(c_ptr),  intent(in)    :: handle
244      integer,      intent(in)    :: transa
245      integer(8),   intent(in)    :: m
246      integer(8),   intent(in)    :: n
247      type(c_ptr),  intent(in)    :: alpha
248      type(c_ptr),  intent(in)    :: A
249      integer(8),   intent(in)    :: lda
250      type(c_ptr),  intent(in)    :: x
251      integer(8),   intent(in)    :: incx
252      type(c_ptr),  intent(in)    :: beta
253      type(c_ptr),  intent(inout) :: y
254      integer(8),   intent(in)    :: incy
255    end subroutine cuda_blas_zgemv
256  end interface
257  ! SYRK/HERK
258  interface
259    subroutine cuda_blas_dsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc)
260      use iso_c_binding
261
262      implicit none
263
264      type(c_ptr),  intent(in)    :: handle
265      integer,      intent(in)    :: uplo
266      integer,      intent(in)    :: trans
267      integer(8),   intent(in)    :: n
268      integer(8),   intent(in)    :: k
269      type(c_ptr),  intent(in)    :: alpha
270      type(c_ptr),  intent(in)    :: A
271      integer(8),   intent(in)    :: lda
272      type(c_ptr),  intent(in)    :: beta
273      type(c_ptr),  intent(inout) :: C
274      integer(8),   intent(in)    :: ldc
275    end subroutine cuda_blas_dsyrk
276
277    subroutine cuda_blas_zherk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc)
278      use iso_c_binding
279
280      implicit none
281
282      type(c_ptr),  intent(in)    :: handle
283      integer,      intent(in)    :: uplo
284      integer,      intent(in)    :: trans
285      integer(8),   intent(in)    :: n
286      integer(8),   intent(in)    :: k
287      type(c_ptr),  intent(in)    :: alpha
288      type(c_ptr),  intent(in)    :: A
289      integer(8),   intent(in)    :: lda
290      type(c_ptr),  intent(in)    :: beta
291      type(c_ptr),  intent(inout) :: C
292      integer(8),   intent(in)    :: ldc
293    end subroutine cuda_blas_zherk
294  end interface
295
296  ! TRSM
297  interface
298    subroutine cuda_blas_dtrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb)
299      use iso_c_binding
300
301      implicit none
302
303      type(c_ptr),  intent(in)    :: handle
304      integer,      intent(in)    :: side
305      integer,      intent(in)    :: uplo
306      integer,      intent(in)    :: trans
307      integer,      intent(in)    :: diag
308      integer(8),   intent(in)    :: m
309      integer(8),   intent(in)    :: n
310      type(c_ptr),  intent(in)    :: alpha
311      type(c_ptr),  intent(in)    :: A
312      integer(8),   intent(in)    :: lda
313      type(c_ptr),  intent(inout) :: B
314      integer(8),   intent(in)    :: ldb
315    end subroutine cuda_blas_dtrsm
316
317    subroutine cuda_blas_ztrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb)
318      use iso_c_binding
319
320      implicit none
321
322      type(c_ptr),  intent(in)    :: handle
323      integer,      intent(in)    :: side
324      integer,      intent(in)    :: uplo
325      integer,      intent(in)    :: trans
326      integer,      intent(in)    :: diag
327      integer(8),   intent(in)    :: m
328      integer(8),   intent(in)    :: n
329      type(c_ptr),  intent(in)    :: alpha
330      type(c_ptr),  intent(in)    :: A
331      integer(8),   intent(in)    :: lda
332      type(c_ptr),  intent(inout) :: B
333      integer(8),   intent(in)    :: ldb
334    end subroutine cuda_blas_ztrsm
335  end interface
336
337contains
338
339#include "undef.F90"
340#include "complex.F90"
341#include "accel_blas_inc.F90"
342
343#include "undef.F90"
344#include "real.F90"
345#include "accel_blas_inc.F90"
346
347end module accel_blas_oct_m
348