1!=======================================================================!
2! Copyright (c) Intel Corporation - All rights reserved.                !
3! This file is part of the LIBXSMM library.                             !
4!                                                                       !
5! For information on the license, see the LICENSE file.                 !
6! Further information: https://github.com/hfp/libxsmm/                  !
7! SPDX-License-Identifier: BSD-3-Clause                                 !
8!=======================================================================!
9! Hans Pabst (Intel Corp.)
10!=======================================================================!
11
12      MODULE LIBXSMM
13        USE, INTRINSIC :: ISO_C_BINDING, ONLY:                          &
14     &    C_DOUBLE, C_FLOAT, C_DOUBLE_COMPLEX, C_FLOAT_COMPLEX,         &
15     &    C_LONG_LONG, C_INT, C_SHORT, C_CHAR, C_INT8_T, C_BOOL,        &
16     &    C_F_POINTER, C_ASSOCIATED, C_LOC, C_PTR,                      &
17     &    C_FUNPTR, C_NULL_FUNPTR, C_NULL_PTR
18        IMPLICIT NONE
19
20        !> Name of the version (stringized set of version numbers).
21        CHARACTER(*), PARAMETER :: LIBXSMM_VERSION = "$VERSION"
22        !> Name of the branch of which the version is derived from.
23        CHARACTER(*), PARAMETER :: LIBXSMM_BRANCH = "$BRANCH"
24        !> Major version based on the last reachable tag under RCS.
25        INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_MAJOR = $MAJOR
26        !> Minor version based on the last reachable tag of the RCS.
27        INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_MINOR = $MINOR
28        !> Update number based on the last reachable tag under RCS.
29        INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_UPDATE = $UPDATE
30        !> Patch number counting commits since the last version stamp.
31        INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_PATCH = $PATCH
32
33        !> Parameters the library and static kernels were built for.
34        INTEGER(C_INT), PARAMETER :: LIBXSMM_CACHELINE = $CACHELINE
35        INTEGER(C_INT), PARAMETER :: LIBXSMM_ALIGNMENT = $CACHELINE
36        INTEGER(C_INT), PARAMETER :: LIBXSMM_PREFETCH = $PREFETCH
37        INTEGER(C_INT), PARAMETER :: LIBXSMM_MAX_MNK = $MAX_MNK
38        INTEGER(C_INT), PARAMETER :: LIBXSMM_MAX_DIM = $MAX_DIM
39        INTEGER(C_INT), PARAMETER :: LIBXSMM_FLAGS = $FLAGS
40        INTEGER(C_INT), PARAMETER :: LIBXSMM_ILP64 = $ILP64
41
42        !> Parameters supplied for backward compatibility (deprecated).
43        INTEGER(C_INT), PARAMETER :: LIBXSMM_COL_MAJOR = 1
44        INTEGER(C_INT), PARAMETER :: LIBXSMM_ROW_MAJOR = 0
45
46        !> LIBXSMM_BLASINT_KIND impacts BLAS interface (LP64: 32-bit, ILP64: 64-bit).
47        INTEGER(C_INT), PARAMETER :: LIBXSMM_BLASINT_KIND = $BLASINT_KIND
48        !> Integer kind used by timer interface.
49        INTEGER(C_INT), PARAMETER :: LIBXSMM_TICKINT_KIND = C_LONG_LONG
50
51        !> Parameters representing the GEMM performed by the simplified interface.
52        REAL(C_DOUBLE), PARAMETER :: LIBXSMM_ALPHA = REAL($ALPHA, C_DOUBLE)
53        REAL(C_DOUBLE), PARAMETER :: LIBXSMM_BETA = REAL($BETA, C_DOUBLE)
54
55        !> Flag enumeration which can be IORed.
56        INTEGER(C_INT), PARAMETER ::                                    &
57     &    LIBXSMM_GEMM_FLAG_NONE     = 0,                               &
58     &    LIBXSMM_GEMM_FLAG_TRANS_A  = 1,                               &
59     &    LIBXSMM_GEMM_FLAG_TRANS_B  = 2,                               &
60     &    LIBXSMM_GEMM_FLAG_TRANS_AB = IOR(                             &
61     &        LIBXSMM_GEMM_FLAG_TRANS_A, LIBXSMM_GEMM_FLAG_TRANS_B),    &
62     &    LIBXSMM_GEMM_FLAG_BETA_0   = 16
63
64        !> Flag enumeration which can be IORed.
65        INTEGER(C_INT), PARAMETER ::                                    &
66          ! Handle recorded batch unsynchronized-parallel.
67     &    LIBXSMM_MMBATCH_FLAG_DEFAULT      = 0,                        &
68          ! Synchronize among C matrices.
69     &    LIBXSMM_MMBATCH_FLAG_SYNCHRONIZED = 512,                      &
70          ! Handle recorded batch sequentially.
71     &    LIBXSMM_MMBATCH_FLAG_SEQUENTIAL   = 1024,                     &
72          ! Only record a statistic of potential SMMs.
73     &    LIBXSMM_MMBATCH_FLAG_STATISTIC    = 2048
74
75        !> Enumerates element/data types.
76        INTEGER(C_INT), PARAMETER ::                                    &
77     &    LIBXSMM_DATATYPE_F64  = 0,                                    &
78     &    LIBXSMM_DATATYPE_F32  = 1,                                    &
79     &    LIBXSMM_DATATYPE_BF16 = 2,                                    &
80     &    LIBXSMM_DATATYPE_I64  = 3,                                    &
81     &    LIBXSMM_DATATYPE_I32  = 4,                                    &
82     &    LIBXSMM_DATATYPE_I16  = 5,                                    &
83     &    LIBXSMM_DATATYPE_I8   = 6,                                    &
84     &    LIBXSMM_DATATYPE_UNSUPPORTED = 7
85
86        !> Denotes the precision/data type of GEMM (for weak-typed
87        !> interface functions such as libxsmm_xmmdispatch).
88        INTEGER(C_INT), PARAMETER ::                                    &
89     &    LIBXSMM_GEMM_PRECISION_F64  = LIBXSMM_DATATYPE_F64,           &
90     &    LIBXSMM_GEMM_PRECISION_F32  = LIBXSMM_DATATYPE_F32,           &
91     &    LIBXSMM_GEMM_PRECISION_BF16 = LIBXSMM_DATATYPE_BF16,          &
92     &    LIBXSMM_GEMM_PRECISION_I32  = LIBXSMM_DATATYPE_I32,           &
93     &    LIBXSMM_GEMM_PRECISION_I16  = LIBXSMM_DATATYPE_I16,           &
94     &    LIBXSMM_GEMM_PRECISION_I8   = LIBXSMM_DATATYPE_I8
95
96        !> Enumeration of the available prefetch strategies which can be IORed.
97        INTEGER(C_INT), PARAMETER ::                                    &
98          ! Automatically select strategy (frontend).
99     &    LIBXSMM_PREFETCH_AUTO                     = -1,               &
100          ! No prefetching and no prefetch function signature.
101     &    LIBXSMM_PREFETCH_NONE                     = 0,                &
102          ! Only function prefetch signature.
103     &    LIBXSMM_PREFETCH_SIGONLY                  = 1,                &
104          ! Prefetch PA using accesses to A.
105     &    LIBXSMM_GEMM_PREFETCH_AL2                 = 2,                &
106          ! Prefetch PB using accesses to C.
107     &    LIBXSMM_GEMM_PREFETCH_BL2_VIA_C           = 4,                &
108          ! Prefetch A ahead.
109     &    LIBXSMM_GEMM_PREFETCH_AL2_AHEAD           = 8,                &
110          ! Composed prefetch strategies.
111     &    LIBXSMM_GEMM_PREFETCH_AL2BL2_VIA_C        = IOR(              &
112     &        LIBXSMM_GEMM_PREFETCH_BL2_VIA_C,                          &
113     &        LIBXSMM_GEMM_PREFETCH_AL2),                               &
114     &    LIBXSMM_GEMM_PREFETCH_AL2BL2_VIA_C_AHEAD  = IOR(              &
115     &        LIBXSMM_GEMM_PREFETCH_BL2_VIA_C,                          &
116     &        LIBXSMM_GEMM_PREFETCH_AL2_AHEAD),                         &
117          ! Current B into L1.
118     &    LIBXSMM_GEMM_PREFETCH_BL1                 = 16
119
120        !> Enumerates the available target architectures and instruction
121        !> set extensions as returned by libxsmm_get_target_archid().
122        INTEGER(C_INT), PARAMETER ::                                    &
123     &    LIBXSMM_TARGET_ARCH_UNKNOWN = 0,                              &
124     &    LIBXSMM_TARGET_ARCH_GENERIC = 1,                              &
125     &    LIBXSMM_X86_GENERIC     = 1002,                               &
126     &    LIBXSMM_X86_SSE3        = 1003,                               &
127     &    LIBXSMM_X86_SSE4        = 1004,                               &
128     &    LIBXSMM_X86_AVX         = 1005,                               &
129     &    LIBXSMM_X86_AVX2        = 1006,                               &
130     &    LIBXSMM_X86_AVX512      = 1007,                               &
131     &    LIBXSMM_X86_AVX512_MIC  = 1010,                               &
132     &    LIBXSMM_X86_AVX512_KNM  = 1011,                               &
133     &    LIBXSMM_X86_AVX512_CORE = 1020,                               &
134     &    LIBXSMM_X86_AVX512_CLX  = 1021,                               &
135     &    LIBXSMM_X86_AVX512_CPX  = 1022
136
137        !> Generic function type (double-precision).
138        TYPE, BIND(C) :: LIBXSMM_DMMFUNCTION
139          TYPE(C_FUNPTR) :: handle = C_NULL_FUNPTR
140        END TYPE
141
142        !> Generic function type (single-precision).
143        TYPE, BIND(C) :: LIBXSMM_SMMFUNCTION
144          TYPE(C_FUNPTR) :: handle = C_NULL_FUNPTR
145        END TYPE
146
147        !> Generic function type (low-precision)
148        TYPE, BIND(C) :: LIBXSMM_WIMMFUNCTION
149          TYPE(C_FUNPTR) :: handle = C_NULL_FUNPTR
150        END TYPE
151
152        !> Generic function types with certain arity.
153        ABSTRACT INTERFACE
154          PURE SUBROUTINE LIBXSMM_FUNCTION3(a, b, c) BIND(C)
155            IMPORT :: C_PTR
156            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c
157          END SUBROUTINE
158
159          PURE SUBROUTINE LIBXSMM_FUNCTION6(a, b, c, pa, pb, pc) BIND(C)
160            IMPORT :: C_PTR
161            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c
162            TYPE(C_PTR), INTENT(IN), VALUE :: pa, pb, pc
163          END SUBROUTINE
164        END INTERFACE
165
166        !> Structure of differences with matrix norms according
167        !> to http://www.netlib.org/lapack/lug/node75.html).
168        TYPE, BIND(C) :: LIBXSMM_MATDIFF_INFO
169          REAL(C_DOUBLE) norm1_abs, norm1_rel !! One-norm
170          REAL(C_DOUBLE) normi_abs, normi_rel !! Infinity-norm
171          REAL(C_DOUBLE) normf_rel            !! Froebenius-norm
172          !> Maximum difference, and L2-norm (both absolute and relative).
173          REAL(C_DOUBLE) linf_abs, linf_rel, l2_abs, l2_rel
174          !> Statistics: sum/l1, min., max., arith. avg., and variance.
175          REAL(C_DOUBLE) l1_ref, min_ref, max_ref, avg_ref, var_ref
176          !> Statistics: sum/l1, min., max., arith. avg., and variance.
177          REAL(C_DOUBLE) l1_tst, min_tst, max_tst, avg_tst, var_tst
178          !> Location (m, n) of largest difference (linf_abs).
179          INTEGER(LIBXSMM_BLASINT_KIND) m
180          INTEGER(LIBXSMM_BLASINT_KIND) n
181        END TYPE
182
183        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_init, libxsmm_finalize
184        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_gemm_auto_prefetch
185        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_gemm_auto_prefetch
186        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_target_archid
187        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_target_archid
188        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_target_arch
189        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_verbosity
190        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_verbosity
191        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_kernel
192        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matdiff_reduce
193        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matdiff_clear
194        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmdispatch2
195        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmdispatch
196        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmcall_abc
197        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmcall_prf
198        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xclear
199        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xrelease
200        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmatcopy
201        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xitrans
202        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xotrans
203        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_omp
204        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_omp
205        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm_omp
206        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm_omp
207        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_mmbatch
208        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_mmbatch_begin
209        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_mmbatch_end
210        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_gemm_batch
211        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_gemm_batch_omp
212        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_timer_duration
213        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_timer_tick
214        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xhash
215        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xdiff
216        INTERFACE
217          !> Initialize the library; pay for setup cost at a specific point.
218          SUBROUTINE libxsmm_init() BIND(C)
219          END SUBROUTINE
220
221          !> De-initialize the library and free internal memory (optional).
222          SUBROUTINE libxsmm_finalize() BIND(C)
223          END SUBROUTINE
224
225          !> Get the default prefetch strategy.
226          PURE FUNCTION libxsmm_get_gemm_auto_prefetch() BIND(C)
227            IMPORT :: C_INT
228            INTEGER(C_INT) :: libxsmm_get_gemm_auto_prefetch
229          END FUNCTION
230
231          !> Set the default prefetch strategy.
232          SUBROUTINE libxsmm_set_gemm_auto_prefetch(strategy) BIND(C)
233            IMPORT :: C_INT
234            INTEGER(C_INT), INTENT(IN), VALUE :: strategy
235          END SUBROUTINE
236
237          !> Returns the architecture and instruction set extension as determined
238          !> by the CPUID flags, as set by the libxsmm_get_target_arch* functions,
239          !> or as set by the LIBXSMM_TARGET environment variable.
240          PURE FUNCTION libxsmm_get_target_archid() BIND(C)
241            IMPORT :: C_INT
242            INTEGER(C_INT) :: libxsmm_get_target_archid
243          END FUNCTION
244
245          !> Set target architecture (archid: see PARAMETER enumeration)
246          !> for subsequent code generation (JIT).
247          SUBROUTINE libxsmm_set_target_archid(archid) BIND(C)
248            IMPORT :: C_INT
249            INTEGER(C_INT), INTENT(IN), VALUE :: archid
250          END SUBROUTINE
251
252          !> Set target architecture for subsequent code generation (JIT).
253          !> arch="0"|"sse"|"snb"|"hsw"|"knl"|"knm"|"skx"|"clx"|"cpx",
254          !> or "0" to rely on the CPUID (default).
255          !> There are some alternative target names as well:
256          !> "sse", "avx", "avx2", "avx3" (incomplete list).
257          SUBROUTINE libxsmm_set_target_arch(arch) BIND(C)
258            IMPORT :: C_CHAR
259            CHARACTER(C_CHAR), INTENT(IN) :: arch(*)
260          END SUBROUTINE
261
262          !> Get the level of verbosity.
263          PURE FUNCTION libxsmm_get_verbosity() BIND(C)
264            IMPORT :: C_INT
265            INTEGER(C_INT) :: libxsmm_get_verbosity
266          END FUNCTION
267
268          !> Set the level of verbosity (0: off, positive value: verbosity level,
269          !> negative value: maximum verbosity, which also dumps JIT-code).
270          SUBROUTINE libxsmm_set_verbosity(level) BIND(C)
271            IMPORT :: C_INT
272            INTEGER(C_INT), INTENT(IN), VALUE :: level
273          END SUBROUTINE
274
275          !> Impure function which returns the current clock tick of a
276          !> monotonic timer source; uses a platform-specific resolution.
277          !> Implicit FORTRAN 77 interface: not available.
278          INTEGER(LIBXSMM_TICKINT_KIND)                                 &
279     &    FUNCTION libxsmm_timer_tick() BIND(C)
280            IMPORT :: LIBXSMM_TICKINT_KIND
281          END FUNCTION
282
283          !> Impure function (timer freq. may vary) which returns the duration
284          !> (in seconds) between two values received by libxsmm_timer_tick.
285          !> Implicit FORTRAN 77 interface: not available.
286          FUNCTION libxsmm_timer_duration(tick0, tick1) BIND(C)
287            IMPORT :: LIBXSMM_TICKINT_KIND, C_DOUBLE
288            INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN), VALUE :: tick0
289            INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN), VALUE :: tick1
290            REAL(C_DOUBLE) :: libxsmm_timer_duration
291          END FUNCTION
292
293          !> Deallocates the JIT'ted code, or unregisters
294          !> and releases code from the registry.
295          !> Implicit FORTRAN 77 interface:
296          !> INTEGER(8) :: kernel
297          SUBROUTINE libxsmm_release_kernel(kernel)                     &
298     &    BIND(C, NAME="libxsmm_release_kernel_")
299            IMPORT :: C_FUNPTR
300            TYPE(C_FUNPTR), INTENT(IN) :: kernel
301          END SUBROUTINE
302
303          !> Type-generic (unsafe) code dispatch (trylock: impure routine).
304          !> Implicit FORTRAN 77 interface:
305          !> INTEGER(4)   :: gemm_precision, flags, prefetch
306          !> INTEGER(4|8) :: m, n, k, lda, ldb, ldc
307          !> REAL(4|8)    :: alpha, beta
308          !> INTEGER(8)   :: kernel
309          SUBROUTINE libxsmm_xmmdispatch(kernel, gemm_precision,        &
310     &    m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch)         &
311     &    BIND(C, NAME="libxsmm_xmmdispatch_")
312            IMPORT :: C_FUNPTR, C_PTR, C_INT, LIBXSMM_BLASINT_KIND
313            TYPE(C_FUNPTR), INTENT(OUT) :: kernel
314            INTEGER(C_INT), INTENT(IN)  :: gemm_precision
315            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
316            TYPE(C_PTR), INTENT(IN), VALUE :: lda, ldb, ldc
317            TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta
318            TYPE(C_PTR), INTENT(IN), VALUE :: flags, prefetch
319          END SUBROUTINE
320
321          !> Type-generic (unsafe) code dispatch (trylock: impure routine).
322          !> Implicit FORTRAN 77 interface:
323          !> INTEGER(4)   :: iprec, oprec, flags, prefetch
324          !> INTEGER(4|8) :: m, n, k, lda, ldb, ldc
325          !> REAL(4|8)    :: alpha, beta
326          !> INTEGER(8)   :: kernel
327          SUBROUTINE libxsmm_xmmdispatch2(kernel, iprec, oprec,         &
328     &    m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch)         &
329     &    BIND(C, NAME="libxsmm_xmmdispatch2_")
330            IMPORT :: C_FUNPTR, C_PTR, C_INT, LIBXSMM_BLASINT_KIND
331            TYPE(C_FUNPTR), INTENT(OUT) :: kernel
332            INTEGER(C_INT), INTENT(IN)  :: iprec, oprec
333            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
334            TYPE(C_PTR), INTENT(IN), VALUE :: lda, ldb, ldc
335            TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta
336            TYPE(C_PTR), INTENT(IN), VALUE :: flags, prefetch
337          END SUBROUTINE
338
339          !> Generic call routine (3-argument form).
340          !> Implicit FORTRAN 77 interface:
341          !> REAL(4|8)  :: a, b, c
342          !> INTEGER(8) :: kernel
343          PURE SUBROUTINE libxsmm_xmmcall_abc(kernel, a, b, c)          &
344     &    BIND(C, NAME="libxsmm_xmmcall_abc_")
345            IMPORT C_FUNPTR, C_PTR
346            TYPE(C_FUNPTR), INTENT(IN) :: kernel
347            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c
348          END SUBROUTINE
349
350          !> Generic call routine (6-argument form).
351          !> Implicit FORTRAN 77 interface:
352          !> REAL(4|8)  :: a, b, c, pa, pb, pc
353          !> INTEGER(8) :: kernel
354          PURE SUBROUTINE libxsmm_xmmcall_prf(kernel,                   &
355     &    a, b, c, pa, pb, pc)                                          &
356     &    BIND(C, NAME="libxsmm_xmmcall_prf_")
357            IMPORT C_FUNPTR, C_PTR
358            TYPE(C_FUNPTR), INTENT(IN) :: kernel
359            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c, pa, pb, pc
360          END SUBROUTINE
361
362          !> Fill destination with zeros; treats dst in raw/binary fashion.
363          SUBROUTINE libxsmm_xclear(dst, nbytes)                        &
364     &    BIND(C, NAME="libxsmm_xclear_")
365            IMPORT C_PTR, C_INT
366            TYPE(C_PTR), INTENT(IN), VALUE :: dst
367            INTEGER(C_INT), INTENT(IN) :: nbytes
368          END SUBROUTINE
369
370          !> Remove key-value pair from code registry and release memory.
371          SUBROUTINE libxsmm_xrelease(key, keysize)                     &
372     &    BIND(C, NAME="libxsmm_xrelease_")
373            IMPORT C_PTR, C_INT
374            TYPE(C_PTR), INTENT(IN), VALUE :: key
375            INTEGER(C_INT), INTENT(IN) :: keysize
376          END SUBROUTINE
377
378          !> Matrix-copy (2-dimensional copy) routine.
379          !> Implicit FORTRAN 77 interface:
380          !> ARRAY        :: input, output
381          !> INTEGER(4|8) :: m, n, ldi, ldo
382          !> INTEGER(4)   :: typesize
383          PURE SUBROUTINE libxsmm_xmatcopy(output, input, typesize,     &
384     &    m, n, ldi, ldo) BIND(C, NAME="libxsmm_matcopy_")
385            IMPORT LIBXSMM_BLASINT_KIND, C_PTR, C_INT
386            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo
387            TYPE(C_PTR), INTENT(IN), VALUE :: output, input
388            INTEGER(C_INT), INTENT(IN) :: typesize
389          END SUBROUTINE
390
391          !> Transpose a matrix (in-place form).
392          !> Implicit FORTRAN 77 interface:
393          !> ARRAY        :: matrix
394          !> INTEGER(4|8) :: m, n, ld
395          !> INTEGER(4)   :: typesize
396          PURE SUBROUTINE libxsmm_xitrans(matrix, typesize, m, n, ld)   &
397     &    BIND(C, NAME="libxsmm_itrans_")
398            IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND
399            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ld
400            TYPE(C_PTR), INTENT(IN), VALUE :: matrix
401            INTEGER(C_INT), INTENT(IN) :: typesize
402          END SUBROUTINE
403
404          !> Transpose a matrix (out-of-place form).
405          !> Implicit FORTRAN 77 interface:
406          !> ARRAY        :: input, output
407          !> INTEGER(4|8) :: m, n, ldi, ldo
408          !> INTEGER(4)   :: typesize
409          PURE SUBROUTINE libxsmm_xotrans(output, input,                &
410     &    typesize, m, n, ldi, ldo)                                     &
411     &    BIND(C, NAME="libxsmm_otrans_")
412            IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND
413            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo
414            TYPE(C_PTR), INTENT(IN), VALUE :: output, input
415            INTEGER(C_INT), INTENT(IN) :: typesize
416          END SUBROUTINE
417
418          !> Matrix copy; MT via libxsmmext (out-of-place form).
419          !> Implicit FORTRAN 77 interface:
420          !> ARRAY        :: output, input
421          !> INTEGER(4|8) :: m, n, ldi, ldo
422          !> INTEGER(4)   :: typesize
423          PURE SUBROUTINE libxsmm_matcopy_omp(output, input,            &
424     &    typesize, m, n, ldi, ldo)                                     &
425     &    BIND(C, NAME="libxsmm_matcopy_omp_")
426            IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND
427            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo
428            TYPE(C_PTR), INTENT(IN), VALUE :: output, input
429            INTEGER(C_INT), INTENT(IN) :: typesize
430          END SUBROUTINE
431
432          !> Matrix transposition; MT via libxsmmext (out-of-place form).
433          !> Implicit FORTRAN 77 interface:
434          !> ARRAY        :: output, input
435          !> INTEGER(4|8) :: m, n, ldi, ldo
436          !> INTEGER(4)   :: typesize
437          PURE SUBROUTINE libxsmm_otrans_omp(output, input,             &
438     &    typesize, m, n, ldi, ldo)                                     &
439     &    BIND(C, NAME="libxsmm_otrans_omp_")
440            IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND
441            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo
442            TYPE(C_PTR), INTENT(IN), VALUE :: output, input
443            INTEGER(C_INT), INTENT(IN) :: typesize
444          END SUBROUTINE
445
446          !> General dense MM; MT via libxsmmext (double-precision).
447          !> Implicit FORTRAN 77 interface: similar to DGEMM.
448          PURE SUBROUTINE libxsmm_dgemm_omp(transa, transb, m, n, k,    &
449     &    alpha, a, lda, b, ldb, beta, c, ldc)                          &
450     &    BIND(C, NAME="libxsmm_dgemm_omp_")
451            IMPORT C_DOUBLE, C_CHAR, LIBXSMM_BLASINT_KIND
452            CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
453            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
454            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
455            REAL(C_DOUBLE), INTENT(IN) :: alpha, beta
456            REAL(C_DOUBLE), INTENT(IN) :: a(lda,*), b(ldb,*)
457            REAL(C_DOUBLE), INTENT(INOUT) :: c(ldc,*)
458          END SUBROUTINE
459
460          !> General dense MM; MT via libxsmmext (single-precision).
461          !> Implicit FORTRAN 77 interface: similar to SGEMM.
462          PURE SUBROUTINE libxsmm_sgemm_omp(transa, transb, m, n, k,    &
463     &    alpha, a, lda, b, ldb, beta, c, ldc)                          &
464     &    BIND(C, NAME="libxsmm_sgemm_omp_")
465            IMPORT C_FLOAT, C_CHAR, LIBXSMM_BLASINT_KIND
466            CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
467            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
468            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
469            REAL(C_FLOAT), INTENT(IN)    :: alpha, beta
470            REAL(C_FLOAT), INTENT(IN)    :: a(lda,*), b(ldb,*)
471            REAL(C_FLOAT), INTENT(INOUT) :: c(ldc,*)
472          END SUBROUTINE
473
474          !> Process a series of MMs (batch). See also libxsmm_gemm_batch_omp.
475          !> The kind of matrix operands (a, b, c) depend on index_stride:
476          !> index_stride==0: pointers to pointers of elements, e.g.,
477          !> double** for the C matrices.
478          !> index_stride!=0: pointer to elements, e.g.,
479          !> const double* for the A and B matrices.
480          !> Implicit FORTRAN 77 interface:
481          !> INTEGER(4)   :: iprec, oprec
482          !> REAL(4|8)    :: alpha, beta
483          !> ARRAY        :: a, b, c
484          !> ARRAY/VALUE  :: stride_a, stride_b, stride_c
485          !> INTEGER(4|8) :: index_base, index_stride, batchsize
486          !> INTEGER(4)   :: tid, nthreads
487          !> Otherwise arguments are similar to GEMM.
488          PURE SUBROUTINE libxsmm_mmbatch(iprec, oprec, transa, transb, &
489     &    m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, index_base,     &
490     &    index_stride, stride_a, stride_b, stride_c, batchsize,        &
491     &    tid, nthreads)                                                &
492     &    BIND(C, NAME="libxsmm_mmbatch_")
493            IMPORT C_PTR, C_CHAR, C_INT, LIBXSMM_BLASINT_KIND
494            !> Determines index-base (usually 0, 1 for one-based indexes).
495            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_base
496            !> Stride (measured in Bytes) used to walk stride_*.
497            !> In Fortran: index_stride!=0.
498            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_stride
499            !> Number of SMMs. If the size is given as a negative value,
500            !> then internal synchronization is omitted.
501            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: batchsize
502            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
503            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
504            CHARACTER(C_CHAR),  INTENT(IN) :: transa, transb
505            TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta
506            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c
507            !> Arrays of indexes determining the position of
508            !> a, b, and c operands.
509            TYPE(C_PTR), INTENT(IN), VALUE :: stride_a
510            TYPE(C_PTR), INTENT(IN), VALUE :: stride_b
511            TYPE(C_PTR), INTENT(IN), VALUE :: stride_c
512            INTEGER(C_INT), INTENT(IN) :: iprec, oprec
513            !> Thread-ID (TID), and number of threads.
514            INTEGER(C_INT), INTENT(IN) :: tid, nthreads
515          END SUBROUTINE
516
517          !> Process a series of SMMs (batch). See also libxsmm_mmbatch.
518          !> Implicit FORTRAN 77 interface:
519          !> INTEGER(4)   :: iprec, oprec
520          !> REAL(4|8)    :: alpha, beta
521          !> ARRAY        :: a, b, c
522          !> ARRAY/VALUE  :: stride_a, stride_b, stride_c
523          !> INTEGER(4|8) :: index_base, index_stride, batchsize
524          !> Otherwise arguments are similar to GEMM.
525          PURE SUBROUTINE libxsmm_gemm_batch(iprec, oprec,              &
526     &    transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, &
527     &    index_base, index_stride, stride_a, stride_b, stride_c,       &
528     &    batchsize)                                                    &
529     &    BIND(C, NAME="libxsmm_gemm_batch_")
530            IMPORT C_PTR, C_CHAR, C_INT, LIBXSMM_BLASINT_KIND
531            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_base
532            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_stride
533            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: batchsize
534            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
535            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
536            CHARACTER(C_CHAR),  INTENT(IN) :: transa, transb
537            TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta
538            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c
539            TYPE(C_PTR), INTENT(IN), VALUE :: stride_a
540            TYPE(C_PTR), INTENT(IN), VALUE :: stride_b
541            TYPE(C_PTR), INTENT(IN), VALUE :: stride_c
542            INTEGER(C_INT), INTENT(IN) :: iprec, oprec
543          END SUBROUTINE
544
545          !> Process a series of SMMs (batch) with OpenMP (libxsmmext).
546          !> Implicit FORTRAN 77 interface:
547          !> INTEGER(4)   :: iprec, oprec
548          !> REAL(4|8)    :: alpha, beta
549          !> ARRAY        :: a, b, c
550          !> ARRAY/VALUE  :: stride_a, stride_b, stride_c
551          !> INTEGER(4|8) :: index_base, index_stride, batchsize
552          !> Otherwise arguments are similar to GEMM.
553          PURE SUBROUTINE libxsmm_gemm_batch_omp(iprec, oprec,          &
554     &    transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, &
555     &    index_base, index_stride, stride_a, stride_b, stride_c,       &
556     &    batchsize)                                                    &
557     &    BIND(C, NAME="libxsmm_gemm_batch_omp_")
558            IMPORT C_PTR, C_CHAR, C_INT, LIBXSMM_BLASINT_KIND
559            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_base
560            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_stride
561            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: batchsize
562            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
563            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
564            CHARACTER(C_CHAR),  INTENT(IN) :: transa, transb
565            TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta
566            TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c
567            TYPE(C_PTR), INTENT(IN), VALUE :: stride_a
568            TYPE(C_PTR), INTENT(IN), VALUE :: stride_b
569            TYPE(C_PTR), INTENT(IN), VALUE :: stride_c
570            INTEGER(C_INT), INTENT(IN) :: iprec, oprec
571          END SUBROUTINE
572
573          !> This function is a no-op unless LIBXSMM is built to intercept GEMM.
574          !> Pointer arguments are used to filter intercepted GEMM calls such that
575          !> non-NULL values match. Otherwise (NULL) the respective argument is
576          !> considered a "free value", i.e., every value can match;
577          !> libxsmmext required.
578          !> Implicit FORTRAN 77 interface:
579          !> INTEGER(4)   :: gemm_precision, flags
580          !> INTEGER(4|8) :: m, n, k, lda, ldb, ldc
581          !> REAL(4|8)    :: alpha, beta
582          SUBROUTINE libxsmm_mmbatch_begin(gemm_precision, flags,       &
583     &    m, n, k,  lda, ldb, ldc, alpha, beta) BIND(C)
584            IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND
585            INTEGER(C_INT), INTENT(IN), VALUE :: gemm_precision
586            INTEGER(C_INT), INTENT(IN) :: flags
587            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
588            INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
589            TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta
590          END SUBROUTINE
591
592          !> Processes the batch of previously recorded SMMs
593          !> (libxsmm_mmbatch_begin); libxsmmext required.
594          !> Implicit FORTRAN 77 interface: available.
595          SUBROUTINE libxsmm_mmbatch_end() BIND(C)
596          END SUBROUTINE
597
598          !> Reduces input into output such that the difference is maintained
599          !> or increased (max function). The very first (initial) output
600          !> should be zeroed (libxsmm_matdiff_clear).
601          !> Implicit FORTRAN 77 interface: available.
602          PURE SUBROUTINE libxsmm_matdiff_reduce(output, input) BIND(C)
603            IMPORT LIBXSMM_MATDIFF_INFO
604            TYPE(LIBXSMM_MATDIFF_INFO), INTENT(INOUT) :: output
605            TYPE(LIBXSMM_MATDIFF_INFO), INTENT(IN)    :: input
606          END SUBROUTINE
607
608          !> Clears the given info-structure, e.g., for the initial
609          !> reduction-value (libxsmm_matdiff_reduce).
610          !> Implicit FORTRAN 77 interface: available.
611          PURE SUBROUTINE libxsmm_matdiff_clear(info) BIND(C)
612            IMPORT LIBXSMM_MATDIFF_INFO
613            TYPE(LIBXSMM_MATDIFF_INFO), INTENT(OUT) :: info
614          END SUBROUTINE
615
616          !> Calculates a hash value for the given array and seed.
617          !> Routine suitable for FORTRAN 77; keysize in Bytes.
618          PURE SUBROUTINE libxsmm_xhash(hash_seed, key, keysize)        &
619     &    BIND(C, NAME="libxsmm_xhash_")
620            IMPORT C_INT, C_PTR
621            INTEGER(C_INT), INTENT(INOUT)  :: hash_seed
622            INTEGER(C_INT), INTENT(IN)     :: keysize
623            TYPE(C_PTR), INTENT(IN), VALUE :: key
624          END SUBROUTINE
625
626          !> Calculates if there is a difference between two arrays.
627          !> Routine suitable for FORTRAN 77; size in Bytes.
628          PURE SUBROUTINE libxsmm_xdiff(diff, a, b, nbytes)             &
629     &    BIND(C, NAME="libxsmm_xdiff_")
630            IMPORT C_PTR, C_LONG_LONG, C_BOOL
631            TYPE(C_PTR), INTENT(IN), VALUE   :: a, b
632            INTEGER(C_LONG_LONG), INTENT(IN) :: nbytes
633            LOGICAL(C_BOOL), INTENT(OUT)     :: diff
634          END SUBROUTINE
635        END INTERFACE$MNK_INTERFACE_LIST
636
637        INTERFACE libxsmm_ptr0
638          MODULE PROCEDURE libxsmm_ptr_z0, libxsmm_ptr_c0
639          MODULE PROCEDURE libxsmm_ptr_d0, libxsmm_ptr_s0
640          MODULE PROCEDURE libxsmm_ptr_i0, libxsmm_ptr_w0
641          MODULE PROCEDURE libxsmm_ptr_j0 !! Byte/char
642          MODULE PROCEDURE libxsmm_ptr_b0 !! Byte/char
643          MODULE PROCEDURE libxsmm_ptr_l0 !! long long
644        END INTERFACE
645
646        INTERFACE libxsmm_ptr1
647          MODULE PROCEDURE libxsmm_ptr_z1, libxsmm_ptr_c1
648          MODULE PROCEDURE libxsmm_ptr_d1, libxsmm_ptr_s1
649          MODULE PROCEDURE libxsmm_ptr_i1, libxsmm_ptr_w1
650          MODULE PROCEDURE libxsmm_ptr_j1 !! Byte/char
651          MODULE PROCEDURE libxsmm_ptr_b1 !! Byte/char
652          MODULE PROCEDURE libxsmm_ptr_l1 !! long long
653          MODULE PROCEDURE libxsmm_ptr_dmm
654          MODULE PROCEDURE libxsmm_ptr_smm
655          MODULE PROCEDURE libxsmm_ptr_wimm
656        END INTERFACE
657
658        INTERFACE libxsmm_ptr2
659          MODULE PROCEDURE libxsmm_ptr_z2, libxsmm_ptr_c2
660          MODULE PROCEDURE libxsmm_ptr_d2, libxsmm_ptr_s2
661          MODULE PROCEDURE libxsmm_ptr_i2, libxsmm_ptr_w2
662          MODULE PROCEDURE libxsmm_ptr_j2 !! Byte/char
663          MODULE PROCEDURE libxsmm_ptr_b2 !! Byte/char
664          MODULE PROCEDURE libxsmm_ptr_l2 !! long long
665        END INTERFACE
666
667        INTERFACE libxsmm_ptr
668          MODULE PROCEDURE libxsmm_ptr_z0, libxsmm_ptr_c0
669          MODULE PROCEDURE libxsmm_ptr_d0, libxsmm_ptr_s0
670          MODULE PROCEDURE libxsmm_ptr_i0, libxsmm_ptr_w0
671          MODULE PROCEDURE libxsmm_ptr_j0 !! Byte/char
672          MODULE PROCEDURE libxsmm_ptr_b0 !! Byte/char
673          MODULE PROCEDURE libxsmm_ptr_l0 !! long long
674          MODULE PROCEDURE libxsmm_ptr_z1, libxsmm_ptr_c1
675          MODULE PROCEDURE libxsmm_ptr_d1, libxsmm_ptr_s1
676          MODULE PROCEDURE libxsmm_ptr_i1, libxsmm_ptr_w1
677          MODULE PROCEDURE libxsmm_ptr_j1 !! Byte/char
678          MODULE PROCEDURE libxsmm_ptr_b1 !! Byte/char
679          MODULE PROCEDURE libxsmm_ptr_l1 !! long long
680          MODULE PROCEDURE libxsmm_ptr_z2, libxsmm_ptr_c2
681          MODULE PROCEDURE libxsmm_ptr_d2, libxsmm_ptr_s2
682          MODULE PROCEDURE libxsmm_ptr_i2, libxsmm_ptr_w2
683          MODULE PROCEDURE libxsmm_ptr_j2 !! Byte/char
684          MODULE PROCEDURE libxsmm_ptr_b2 !! Byte/char
685          MODULE PROCEDURE libxsmm_ptr_l2 !! long long
686          MODULE PROCEDURE libxsmm_ptr_dmm
687          MODULE PROCEDURE libxsmm_ptr_smm
688          MODULE PROCEDURE libxsmm_ptr_wimm
689        END INTERFACE
690
691        !> Deallocates JIT'ted code, or unregisters/releases code from registry.
692        INTERFACE libxsmm_release_mmkernel
693          MODULE PROCEDURE libxsmm_release_dmmkernel
694          MODULE PROCEDURE libxsmm_release_smmkernel
695          MODULE PROCEDURE libxsmm_release_wimmkernel
696        END INTERFACE
697
698        !> Construct JIT-code depending on given argument set.
699        INTERFACE libxsmm_mmdispatch
700          MODULE PROCEDURE libxsmm_dmmdispatch, libxsmm_smmdispatch
701          MODULE PROCEDURE libxsmm_wimmdispatch
702        END INTERFACE
703
704        !> Construct JIT-code depending on given argument set.
705        INTERFACE libxsmm_dispatch
706          MODULE PROCEDURE libxsmm_dmmdispatch, libxsmm_smmdispatch
707          MODULE PROCEDURE libxsmm_wimmdispatch
708        END INTERFACE
709
710        !> Check if a function is available (LIBXSMM_?MMFUNCTION).
711        INTERFACE libxsmm_mmavailable
712          MODULE PROCEDURE libxsmm_dmmavailable, libxsmm_smmavailable
713          MODULE PROCEDURE libxsmm_wimmavailable
714        END INTERFACE
715
716        !> Check if a function is available (LIBXSMM_?MMFUNCTION).
717        INTERFACE libxsmm_available
718          MODULE PROCEDURE libxsmm_smmavailable, libxsmm_dmmavailable
719          MODULE PROCEDURE libxsmm_wimmavailable
720        END INTERFACE
721
722        !> Overloaded GEMM routines (double-precision).
723        INTERFACE libxsmm_dgemm
724          MODULE PROCEDURE libxsmm_dgemm0
725          MODULE PROCEDURE libxsmm_dgemm1
726          MODULE PROCEDURE libxsmm_dgemm2
727          MODULE PROCEDURE libxsmm_dgemm3
728        END INTERFACE
729
730        !> Overloaded GEMM routines (single-precision).
731        INTERFACE libxsmm_sgemm
732          MODULE PROCEDURE libxsmm_sgemm0
733          MODULE PROCEDURE libxsmm_sgemm1
734          MODULE PROCEDURE libxsmm_sgemm2
735        END INTERFACE
736
737        !> Overloaded GEMM routines (low-precision).
738        INTERFACE libxsmm_wigemm
739          MODULE PROCEDURE libxsmm_wigemm0
740          MODULE PROCEDURE libxsmm_wigemm1
741          MODULE PROCEDURE libxsmm_wigemm2
742        END INTERFACE
743
744        !> Overloaded GEMM routines.
745        INTERFACE libxsmm_gemm
746          MODULE PROCEDURE libxsmm_dgemm0
747          MODULE PROCEDURE libxsmm_dgemm1
748          MODULE PROCEDURE libxsmm_dgemm2
749          MODULE PROCEDURE libxsmm_dgemm3
750          MODULE PROCEDURE libxsmm_sgemm0
751          MODULE PROCEDURE libxsmm_sgemm1
752          MODULE PROCEDURE libxsmm_sgemm2
753          MODULE PROCEDURE libxsmm_sgemm3
754          MODULE PROCEDURE libxsmm_wigemm0
755          MODULE PROCEDURE libxsmm_wigemm1
756          MODULE PROCEDURE libxsmm_wigemm2
757          MODULE PROCEDURE libxsmm_wigemm3
758        END INTERFACE
759
760        !> Overloaded BLAS GEMM routines (double-precision).
761        INTERFACE libxsmm_blas_dgemm
762          MODULE PROCEDURE libxsmm_blas_dgemm0
763          MODULE PROCEDURE libxsmm_blas_dgemm1
764          MODULE PROCEDURE libxsmm_blas_dgemm2
765          MODULE PROCEDURE libxsmm_blas_dgemm3
766        END INTERFACE
767
768        !> Overloaded BLAS GEMM routines (single-precision).
769        INTERFACE libxsmm_blas_sgemm
770          MODULE PROCEDURE libxsmm_blas_sgemm0
771          MODULE PROCEDURE libxsmm_blas_sgemm1
772          MODULE PROCEDURE libxsmm_blas_sgemm2
773          MODULE PROCEDURE libxsmm_blas_sgemm3
774        END INTERFACE
775
776        !> Overloaded BLAS GEMM routines (single/double-precision).
777        INTERFACE libxsmm_blas_gemm
778          MODULE PROCEDURE libxsmm_blas_dgemm0
779          MODULE PROCEDURE libxsmm_blas_dgemm1
780          MODULE PROCEDURE libxsmm_blas_dgemm2
781          MODULE PROCEDURE libxsmm_blas_dgemm3
782          MODULE PROCEDURE libxsmm_blas_sgemm0
783          MODULE PROCEDURE libxsmm_blas_sgemm1
784          MODULE PROCEDURE libxsmm_blas_sgemm2
785          MODULE PROCEDURE libxsmm_blas_sgemm3
786        END INTERFACE
787
788        !> Overloaded MATCOPY routines (2d-copy).
789        INTERFACE libxsmm_matcopy
790          MODULE PROCEDURE libxsmm_matcopy_p0
791          MODULE PROCEDURE libxsmm_matcopy_d1
792          MODULE PROCEDURE libxsmm_matcopy_d2
793          MODULE PROCEDURE libxsmm_matcopy_s1
794          MODULE PROCEDURE libxsmm_matcopy_s2
795        END INTERFACE
796
797        !> Overloaded TRANSPOSE routines (in-place form).
798        INTERFACE libxsmm_itrans
799          MODULE PROCEDURE libxsmm_itrans_p0
800          MODULE PROCEDURE libxsmm_itrans_d1
801          MODULE PROCEDURE libxsmm_itrans_d2
802          MODULE PROCEDURE libxsmm_itrans_s1
803          MODULE PROCEDURE libxsmm_itrans_s2
804        END INTERFACE
805
806        !> Overloaded TRANSPOSE routines (out-of-place form).
807        INTERFACE libxsmm_otrans
808          MODULE PROCEDURE libxsmm_otrans_p0
809          MODULE PROCEDURE libxsmm_otrans_d1
810          MODULE PROCEDURE libxsmm_otrans_d2
811          MODULE PROCEDURE libxsmm_otrans_s1
812          MODULE PROCEDURE libxsmm_otrans_s2
813        END INTERFACE
814
815        !> Calculate a hash value for a given key value (binary blob).
816        !> Conceptually pure, but C_LOC may be (incorrectly) impure.
817        INTERFACE libxsmm_hash
818          MODULE PROCEDURE libxsmm_hash_char
819          MODULE PROCEDURE libxsmm_hash_i8
820          MODULE PROCEDURE libxsmm_hash_i32
821          MODULE PROCEDURE libxsmm_hash_i64
822        END INTERFACE
823
824        !> Calculate whether there is a difference between two series of items.
825        !> Conceptually pure, but C_LOC may be (incorrectly) impure.
826        INTERFACE libxsmm_diff
827          MODULE PROCEDURE libxsmm_diff_char
828          MODULE PROCEDURE libxsmm_diff_i8
829          MODULE PROCEDURE libxsmm_diff_i32
830          MODULE PROCEDURE libxsmm_diff_i64
831        END INTERFACE
832
833      CONTAINS
834        !> Returns the name of the target architecture as determined by
835        !> the CPUID flags, as set by the libxsmm_get_target_arch* functions,
836        !> or as set by the LIBXSMM_TARGET environment variable.
837        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_target_arch
838        FUNCTION libxsmm_get_target_arch()
839          !CHARACTER(LEN=:), POINTER :: libxsmm_get_target_arch
840          CHARACTER, POINTER :: libxsmm_get_target_arch(:)
841          INTEGER(C_INT) :: length(1)
842          TYPE(C_PTR) :: arch
843          !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmmf_get_target_arch
844          INTERFACE
845            FUNCTION libxsmmf_get_target_arch(length) BIND(C)
846              IMPORT :: C_INT, C_PTR
847              INTEGER(C_INT), INTENT(OUT) :: length
848              TYPE(C_PTR) :: libxsmmf_get_target_arch
849            END FUNCTION
850          END INTERFACE
851          arch = libxsmmf_get_target_arch(length(1))
852          CALL C_F_POINTER(arch, libxsmm_get_target_arch, length)
853        END FUNCTION
854
855        !> Returns C_NULL_PTR.
856        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_null
857        PURE FUNCTION libxsmm_ptr_null()
858          TYPE(C_PTR) :: libxsmm_ptr_null
859          libxsmm_ptr_null = C_NULL_PTR
860        END FUNCTION
861
862        !> Determines the C-address of the given array.
863        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_z0
864        FUNCTION libxsmm_ptr_z0(a)
865          COMPLEX(C_DOUBLE_COMPLEX), INTENT(IN), TARGET :: a
866          TYPE(C_PTR) :: libxsmm_ptr_z0
867          libxsmm_ptr_z0 = C_LOC(a)
868        END FUNCTION
869        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_z1
870        FUNCTION libxsmm_ptr_z1(a)
871          COMPLEX(C_DOUBLE_COMPLEX), INTENT(IN), TARGET :: a(*)
872          TYPE(C_PTR) :: libxsmm_ptr_z1
873          libxsmm_ptr_z1 = C_LOC(a)
874        END FUNCTION
875        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_z2
876        FUNCTION libxsmm_ptr_z2(a)
877          COMPLEX(C_DOUBLE_COMPLEX), INTENT(IN) :: a(:,:)
878          TYPE(C_PTR) :: libxsmm_ptr_z2
879          libxsmm_ptr_z2 = libxsmm_ptr_z1(a)
880        END FUNCTION
881
882        !> Determines the C-address of the given array.
883        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_c0
884        FUNCTION libxsmm_ptr_c0(a)
885          COMPLEX(C_FLOAT_COMPLEX), INTENT(IN), TARGET :: a
886          TYPE(C_PTR) :: libxsmm_ptr_c0
887          libxsmm_ptr_c0 = C_LOC(a)
888        END FUNCTION
889        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_c1
890        FUNCTION libxsmm_ptr_c1(a)
891          COMPLEX(C_FLOAT_COMPLEX), INTENT(IN), TARGET :: a(*)
892          TYPE(C_PTR) :: libxsmm_ptr_c1
893          libxsmm_ptr_c1 = C_LOC(a)
894        END FUNCTION
895        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_c2
896        FUNCTION libxsmm_ptr_c2(a)
897          COMPLEX(C_FLOAT_COMPLEX), INTENT(IN) :: a(:,:)
898          TYPE(C_PTR) :: libxsmm_ptr_c2
899          libxsmm_ptr_c2 = libxsmm_ptr_c1(a)
900        END FUNCTION
901
902        !> Determines the C-address of the given array.
903        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_d0
904        FUNCTION libxsmm_ptr_d0(a)
905          REAL(C_DOUBLE), INTENT(IN), TARGET :: a
906          TYPE(C_PTR) :: libxsmm_ptr_d0
907          libxsmm_ptr_d0 = C_LOC(a)
908        END FUNCTION
909        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_d1
910        FUNCTION libxsmm_ptr_d1(a)
911          REAL(C_DOUBLE), INTENT(IN), TARGET :: a(*)
912          TYPE(C_PTR) :: libxsmm_ptr_d1
913          libxsmm_ptr_d1 = C_LOC(a)
914        END FUNCTION
915        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_d2
916        FUNCTION libxsmm_ptr_d2(a)
917          REAL(C_DOUBLE), INTENT(IN) :: a(:,:)
918          TYPE(C_PTR) :: libxsmm_ptr_d2
919          libxsmm_ptr_d2 = libxsmm_ptr_d1(a)
920        END FUNCTION
921
922        !> Determines the C-address of the given array.
923        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_s0
924        FUNCTION libxsmm_ptr_s0(a)
925          REAL(C_FLOAT), INTENT(IN), TARGET :: a
926          TYPE(C_PTR) :: libxsmm_ptr_s0
927          libxsmm_ptr_s0 = C_LOC(a)
928        END FUNCTION
929        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_s1
930        FUNCTION libxsmm_ptr_s1(a)
931          REAL(C_FLOAT), INTENT(IN), TARGET :: a(*)
932          TYPE(C_PTR) :: libxsmm_ptr_s1
933          libxsmm_ptr_s1 = C_LOC(a)
934        END FUNCTION
935        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_s2
936        FUNCTION libxsmm_ptr_s2(a)
937          REAL(C_FLOAT), INTENT(IN) :: a(:,:)
938          TYPE(C_PTR) :: libxsmm_ptr_s2
939          libxsmm_ptr_s2 = libxsmm_ptr_s1(a)
940        END FUNCTION
941
942        !> Determines the C-address of the given array.
943        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_i0
944        FUNCTION libxsmm_ptr_i0(a)
945          INTEGER(C_INT), INTENT(IN), TARGET :: a
946          TYPE(C_PTR) :: libxsmm_ptr_i0
947          libxsmm_ptr_i0 = C_LOC(a)
948        END FUNCTION
949        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_i1
950        FUNCTION libxsmm_ptr_i1(a)
951          INTEGER(C_INT), INTENT(IN), TARGET :: a(*)
952          TYPE(C_PTR) :: libxsmm_ptr_i1
953          libxsmm_ptr_i1 = C_LOC(a)
954        END FUNCTION
955        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_i2
956        FUNCTION libxsmm_ptr_i2(a)
957          INTEGER(C_INT), INTENT(IN) :: a(:,:)
958          TYPE(C_PTR) :: libxsmm_ptr_i2
959          libxsmm_ptr_i2 = libxsmm_ptr_i1(a)
960        END FUNCTION
961
962        !> Determines the C-address of the given array.
963        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_w0
964        FUNCTION libxsmm_ptr_w0(a)
965          INTEGER(C_SHORT), INTENT(IN), TARGET :: a
966          TYPE(C_PTR) :: libxsmm_ptr_w0
967          libxsmm_ptr_w0 = C_LOC(a)
968        END FUNCTION
969        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_w1
970        FUNCTION libxsmm_ptr_w1(a)
971          INTEGER(C_SHORT), INTENT(IN), TARGET :: a(*)
972          TYPE(C_PTR) :: libxsmm_ptr_w1
973          libxsmm_ptr_w1 = C_LOC(a)
974        END FUNCTION
975        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_w2
976        FUNCTION libxsmm_ptr_w2(a)
977          INTEGER(C_SHORT), INTENT(IN) :: a(:,:)
978          TYPE(C_PTR) :: libxsmm_ptr_w2
979          libxsmm_ptr_w2 = libxsmm_ptr_w1(a)
980        END FUNCTION
981
982        !> Determines the C-address of the given array.
983        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_j0
984        FUNCTION libxsmm_ptr_j0(a)
985          INTEGER(C_INT8_T), INTENT(IN), TARGET :: a
986          TYPE(C_PTR) :: libxsmm_ptr_j0
987          libxsmm_ptr_j0 = C_LOC(a)
988        END FUNCTION
989        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_j1
990        FUNCTION libxsmm_ptr_j1(a)
991          INTEGER(C_INT8_T), INTENT(IN), TARGET :: a(*)
992          TYPE(C_PTR) :: libxsmm_ptr_j1
993          libxsmm_ptr_j1 = C_LOC(a)
994        END FUNCTION
995        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_j2
996        FUNCTION libxsmm_ptr_j2(a)
997          INTEGER(C_INT8_T), INTENT(IN) :: a(:,:)
998          TYPE(C_PTR) :: libxsmm_ptr_j2
999          libxsmm_ptr_j2 = libxsmm_ptr_j1(a)
1000        END FUNCTION
1001
1002        !> Determines the C-address of the given array.
1003        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_b0
1004        FUNCTION libxsmm_ptr_b0(a)
1005          CHARACTER(C_CHAR), INTENT(IN), TARGET :: a
1006          TYPE(C_PTR) :: libxsmm_ptr_b0
1007          libxsmm_ptr_b0 = C_LOC(a)
1008        END FUNCTION
1009        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_b1
1010        FUNCTION libxsmm_ptr_b1(a)
1011          CHARACTER(C_CHAR), INTENT(IN), TARGET :: a(*)
1012          TYPE(C_PTR) :: libxsmm_ptr_b1
1013          libxsmm_ptr_b1 = C_LOC(a)
1014        END FUNCTION
1015        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_b2
1016        FUNCTION libxsmm_ptr_b2(a)
1017          CHARACTER(C_CHAR), INTENT(IN) :: a(:,:)
1018          TYPE(C_PTR) :: libxsmm_ptr_b2
1019          libxsmm_ptr_b2 = libxsmm_ptr_b1(a)
1020        END FUNCTION
1021
1022        !> Determines the C-address of the given array.
1023        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_l0
1024        FUNCTION libxsmm_ptr_l0(a)
1025          INTEGER(C_LONG_LONG), INTENT(IN), TARGET :: a
1026          TYPE(C_PTR) :: libxsmm_ptr_l0
1027          libxsmm_ptr_l0 = C_LOC(a)
1028        END FUNCTION
1029        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_l1
1030        FUNCTION libxsmm_ptr_l1(a)
1031          INTEGER(C_LONG_LONG), INTENT(IN), TARGET :: a(*)
1032          TYPE(C_PTR) :: libxsmm_ptr_l1
1033          libxsmm_ptr_l1 = C_LOC(a)
1034        END FUNCTION
1035        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_l2
1036        FUNCTION libxsmm_ptr_l2(a)
1037          INTEGER(C_LONG_LONG), INTENT(IN) :: a(:,:)
1038          TYPE(C_PTR) :: libxsmm_ptr_l2
1039          libxsmm_ptr_l2 = libxsmm_ptr_l1(a)
1040        END FUNCTION
1041
1042        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_dmm
1043        FUNCTION libxsmm_ptr_dmm(a)
1044          TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN), TARGET :: a(:)
1045          TYPE(LIBXSMM_DMMFUNCTION), POINTER :: p
1046          TYPE(C_PTR) :: libxsmm_ptr_dmm
1047          p => a(LBOUND(a,1)); libxsmm_ptr_dmm = C_LOC(p%handle)
1048        END FUNCTION
1049        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_smm
1050        FUNCTION libxsmm_ptr_smm(a)
1051          TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN), TARGET :: a(:)
1052          TYPE(LIBXSMM_SMMFUNCTION), POINTER :: p
1053          TYPE(C_PTR) :: libxsmm_ptr_smm
1054          p => a(LBOUND(a,1)); libxsmm_ptr_smm = C_LOC(p%handle)
1055        END FUNCTION
1056        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_wimm
1057        FUNCTION libxsmm_ptr_wimm(a)
1058          TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN), TARGET :: a(:)
1059          TYPE(LIBXSMM_WIMMFUNCTION), POINTER :: p
1060          TYPE(C_PTR) :: libxsmm_ptr_wimm
1061          p => a(LBOUND(a,1)); libxsmm_ptr_wimm = C_LOC(p%handle)
1062        END FUNCTION
1063
1064        !> Deallocate JIT'ted code created by libxsmm_create routines. To
1065        !> unregister code generated with libxsmm_dispatch is unnecessary.
1066        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_dmmkernel
1067        SUBROUTINE libxsmm_release_dmmkernel(kernel)
1068          TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel
1069          CALL libxsmm_release_kernel(kernel%handle)
1070        END SUBROUTINE
1071
1072        !> Deallocate JIT'ted code created by libxsmm_create routines. To
1073        !> unregister code generated with libxsmm_dispatch is unnecessary.
1074        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_smmkernel
1075        SUBROUTINE libxsmm_release_smmkernel(kernel)
1076          TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel
1077          CALL libxsmm_release_kernel(kernel%handle)
1078        END SUBROUTINE
1079
1080        !> Deallocate JIT'ted code created by libxsmm_create routines. To
1081        !> unregister code generated with libxsmm_dispatch is unnecessary.
1082        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_wimmkernel
1083        SUBROUTINE libxsmm_release_wimmkernel(kernel)
1084          TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel
1085          CALL libxsmm_release_kernel(kernel%handle)
1086        END SUBROUTINE
1087
1088        !> Query or JIT-generate an SMM-kernel (double-precision).
1089        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmdispatch
1090        SUBROUTINE libxsmm_dmmdispatch(kernel,                          &
1091     &  m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch)
1092          TYPE(LIBXSMM_DMMFUNCTION), INTENT(OUT) :: kernel
1093          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1094          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN),                    &
1095     &                                OPTIONAL, TARGET :: lda, ldb, ldc
1096          REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: alpha, beta
1097          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: flags
1098          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: prefetch
1099          CALL libxsmm_xmmdispatch(                                     &
1100     &      kernel%handle, LIBXSMM_GEMM_PRECISION_F64,                  &
1101     &      m, n, k, C_LOC(lda), C_LOC(ldb), C_LOC(ldc),                &
1102     &      C_LOC(alpha), C_LOC(beta), C_LOC(flags), C_LOC(prefetch))
1103        END SUBROUTINE
1104
1105        !> Query or JIT-generate an SMM-kernel (single-precision).
1106        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmdispatch
1107        SUBROUTINE libxsmm_smmdispatch(kernel,                          &
1108     &  m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch)
1109          TYPE(LIBXSMM_SMMFUNCTION), INTENT(OUT) :: kernel
1110          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1111          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN),                    &
1112     &                                OPTIONAL, TARGET :: lda, ldb, ldc
1113          REAL(C_FLOAT),  INTENT(IN), OPTIONAL, TARGET :: alpha, beta
1114          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: flags
1115          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: prefetch
1116          CALL libxsmm_xmmdispatch(                                     &
1117     &      kernel%handle, LIBXSMM_GEMM_PRECISION_F32,                  &
1118     &      m, n, k, C_LOC(lda), C_LOC(ldb), C_LOC(ldc),                &
1119     &      C_LOC(alpha), C_LOC(beta), C_LOC(flags), C_LOC(prefetch))
1120        END SUBROUTINE
1121
1122        !> Query or JIT-generate an SMM-kernel (low-precision, int-accumulate).
1123        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wimmdispatch
1124        SUBROUTINE libxsmm_wimmdispatch(kernel,                         &
1125     &  m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch)
1126          TYPE(LIBXSMM_WIMMFUNCTION), INTENT(OUT) :: kernel
1127          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1128          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN),                    &
1129     &                                OPTIONAL, TARGET :: lda, ldb, ldc
1130          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: alpha, beta
1131          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: flags
1132          INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: prefetch
1133          CALL libxsmm_xmmdispatch2(kernel%handle,                      &
1134     &      LIBXSMM_GEMM_PRECISION_I16, LIBXSMM_GEMM_PRECISION_I32,     &
1135     &      m, n, k, C_LOC(lda), C_LOC(ldb), C_LOC(ldc),                &
1136     &      C_LOC(alpha), C_LOC(beta), C_LOC(flags), C_LOC(prefetch))
1137        END SUBROUTINE
1138
1139        !> Checks if the given kernel was generated. JIT code is guaranteed
1140        !> to be generated if JIT support was enabled at build-time of the
1141        !> library (default). This overload belongs to libxsmm_(mm)available.
1142        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmavailable
1143        LOGICAL FUNCTION libxsmm_dmmavailable(kernel)
1144          TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel
1145          libxsmm_dmmavailable = C_ASSOCIATED(kernel%handle)
1146        END FUNCTION
1147
1148        !> Checks if the given kernel was generated. JIT code is guaranteed
1149        !> to be generated if JIT support was enabled at build-time of the
1150        !> library (default). This overload belongs to libxsmm_(mm)available.
1151        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmavailable
1152        LOGICAL FUNCTION libxsmm_smmavailable(kernel)
1153          TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel
1154          libxsmm_smmavailable = C_ASSOCIATED(kernel%handle)
1155        END FUNCTION
1156
1157        !> Checks if the given kernel was generated. JIT code is guaranteed
1158        !> to be generated if JIT support was enabled at build-time of the
1159        !> library (default). This overload belongs to libxsmm_(mm)available.
1160        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wimmavailable
1161        LOGICAL FUNCTION libxsmm_wimmavailable(kernel)
1162          TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel
1163          libxsmm_wimmavailable = C_ASSOCIATED(kernel%handle)
1164        END FUNCTION
1165
1166        !> Calls the kernel with the given arguments. Alternatively,
1167        !> PROCPOINTER can be used as shown by the inner comments
1168        !> of this routine (LIBXSMM_FUNCTION3). The libxsmm_xmmcall
1169        !> routines can be used in FORTRAN77.
1170        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmcall_abc
1171        SUBROUTINE libxsmm_dmmcall_abc(kernel, a, b, c)
1172          TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel
1173          REAL(C_DOUBLE), INTENT(IN),    TARGET :: a(*), b(*)
1174          REAL(C_DOUBLE), INTENT(INOUT), TARGET :: c(*)
1175          ! PROCEDURE(LIBXSMM_FUNCTION3), POINTER :: xmm
1176          ! CALL C_F_PROCPOINTER(kernel%handle, xmm)
1177          ! CALL xmm(...)
1178          CALL libxsmm_xmmcall_abc(kernel%handle,                       &
1179     &      C_LOC(a), C_LOC(b), C_LOC(c))
1180        END SUBROUTINE
1181
1182        !> Calls the kernel with the given arguments. Alternatively,
1183        !> PROCPOINTER can be used as shown by the inner comments
1184        !> of this routine (LIBXSMM_FUNCTION6). The libxsmm_xmmcall
1185        !> routines can be used in FORTRAN77.
1186        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmcall_prf
1187        SUBROUTINE libxsmm_dmmcall_prf(kernel, a, b, c, pa, pb, pc)
1188          TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel
1189          REAL(C_DOUBLE), INTENT(IN),    TARGET ::  a(*), b(*)
1190          REAL(C_DOUBLE), INTENT(INOUT), TARGET ::  c(*)
1191          REAL(C_DOUBLE), INTENT(IN),    TARGET :: pa(*)
1192          REAL(C_DOUBLE), INTENT(IN),    TARGET :: pb(*)
1193          REAL(C_DOUBLE), INTENT(IN),    TARGET :: pc(*)
1194          ! PROCEDURE(LIBXSMM_FUNCTION6), POINTER :: xmm
1195          ! CALL C_F_PROCPOINTER(kernel%handle, xmm)
1196          ! CALL xmm(...)
1197          CALL libxsmm_xmmcall_prf(kernel%handle,                       &
1198     &      C_LOC(a),  C_LOC(b),  C_LOC(c),                             &
1199     &      C_LOC(pa), C_LOC(pb), C_LOC(pc))
1200        END SUBROUTINE
1201
1202        !> See also libxsmm_dmmcall_abc and libxsmm_dmmcall_prf.
1203        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmcall
1204        SUBROUTINE libxsmm_dmmcall(kernel, a, b, c, pa, pb, pc)
1205          TYPE(LIBXSMM_DMMFUNCTION),        INTENT(IN) :: kernel
1206          REAL(C_DOUBLE), INTENT(IN),           TARGET ::  a(*), b(*)
1207          REAL(C_DOUBLE), INTENT(INOUT),        TARGET ::  c(*)
1208          REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: pa(*)
1209          REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: pb(*)
1210          REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: pc(*)
1211          ! use .OR. instead of .AND. to avoid full check
1212          IF (PRESENT(pa).OR.PRESENT(pb).OR.PRESENT(pc)) THEN
1213            CALL libxsmm_xmmcall_prf(kernel%handle,                     &
1214     &        C_LOC(a),  C_LOC(b),  C_LOC(c),                           &
1215     &        C_LOC(pa), C_LOC(pb), C_LOC(pc))
1216          ELSE
1217            CALL libxsmm_xmmcall_abc(kernel%handle,                     &
1218     &        C_LOC(a), C_LOC(b), C_LOC(c))
1219          END IF
1220        END SUBROUTINE
1221
1222        !> Calls the kernel with the given arguments. Alternatively,
1223        !> PROCPOINTER can be used as shown by the inner comments
1224        !> of this routine (LIBXSMM_FUNCTION3). The libxsmm_xmmcall
1225        !> routines can be used in FORTRAN77.
1226        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmcall_abc
1227        SUBROUTINE libxsmm_smmcall_abc(kernel, a, b, c)
1228          TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel
1229          REAL(C_FLOAT), INTENT(IN),     TARGET :: a(*), b(*)
1230          REAL(C_FLOAT), INTENT(INOUT),  TARGET :: c(*)
1231          ! PROCEDURE(LIBXSMM_FUNCTION3), POINTER :: xmm
1232          ! CALL C_F_PROCPOINTER(kernel%handle, xmm)
1233          ! CALL xmm(...)
1234          CALL libxsmm_xmmcall_abc(kernel%handle,                       &
1235     &      C_LOC(a), C_LOC(b), C_LOC(c))
1236        END SUBROUTINE
1237
1238        !> Calls the kernel with the given arguments. Alternatively,
1239        !> PROCPOINTER can be used as shown by the inner comments
1240        !> of this routine (LIBXSMM_FUNCTION6). The libxsmm_xmmcall
1241        !> routines can be used in FORTRAN77.
1242        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmcall_prf
1243        SUBROUTINE libxsmm_smmcall_prf(kernel, a, b, c, pa, pb, pc)
1244          TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel
1245          REAL(C_FLOAT), INTENT(IN),     TARGET ::  a(*), b(*)
1246          REAL(C_FLOAT), INTENT(INOUT),  TARGET ::  c(*)
1247          REAL(C_FLOAT), INTENT(IN),     TARGET :: pa(*)
1248          REAL(C_FLOAT), INTENT(IN),     TARGET :: pb(*)
1249          REAL(C_FLOAT), INTENT(IN),     TARGET :: pc(*)
1250          ! PROCEDURE(LIBXSMM_FUNCTION6), POINTER :: xmm
1251          ! CALL C_F_PROCPOINTER(kernel%handle, xmm)
1252          ! CALL xmm(...)
1253          CALL libxsmm_xmmcall_prf(kernel%handle,                       &
1254     &      C_LOC(a),  C_LOC(b),  C_LOC(c),                             &
1255     &      C_LOC(pa), C_LOC(pb), C_LOC(pc))
1256        END SUBROUTINE
1257
1258        !> See also libxsmm_smmcall_abc and libxsmm_smmcall_prf.
1259        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmcall
1260        SUBROUTINE libxsmm_smmcall(kernel, a, b, c, pa, pb, pc)
1261          TYPE(LIBXSMM_SMMFUNCTION),       INTENT(IN) :: kernel
1262          REAL(C_FLOAT), INTENT(IN),           TARGET ::  a(*), b(*)
1263          REAL(C_FLOAT), INTENT(INOUT),        TARGET ::  c(*)
1264          REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: pa(*)
1265          REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: pb(*)
1266          REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: pc(*)
1267          ! use .OR. instead of .AND. to avoid full check
1268          IF (PRESENT(pa).OR.PRESENT(pb).OR.PRESENT(pc)) THEN
1269            CALL libxsmm_xmmcall_prf(kernel%handle,                     &
1270     &        C_LOC(a),  C_LOC(b),  C_LOC(c),                           &
1271     &        C_LOC(pa), C_LOC(pb), C_LOC(pc))
1272          ELSE
1273            CALL libxsmm_xmmcall_abc(kernel%handle,                     &
1274     &        C_LOC(a), C_LOC(b), C_LOC(c))
1275          END IF
1276        END SUBROUTINE
1277
1278        !> Calls the kernel with the given arguments. Alternatively,
1279        !> PROCPOINTER can be used as shown by the inner comments
1280        !> of this routine (LIBXSMM_FUNCTION3). The libxsmm_xmmcall
1281        !> routines can be used in FORTRAN77.
1282        SUBROUTINE libxsmm_wimmcall_abc(kernel, a, b, c)
1283          TYPE(LIBXSMM_WIMMFUNCTION),  INTENT(IN) :: kernel
1284          INTEGER(C_SHORT), INTENT(IN),    TARGET :: a(*), b(*)
1285          INTEGER(C_INT),   INTENT(INOUT), TARGET :: c(*)
1286          ! PROCEDURE(LIBXSMM_FUNCTION3), POINTER :: xmm
1287          ! CALL C_F_PROCPOINTER(kernel%handle, xmm)
1288          ! CALL xmm(...)
1289          CALL libxsmm_xmmcall_abc(kernel%handle,                       &
1290     &      C_LOC(a), C_LOC(b), C_LOC(c))
1291        END SUBROUTINE
1292
1293        !> Calls the kernel with the given arguments. Alternatively,
1294        !> PROCPOINTER can be used as shown by the inner comments
1295        !> of this routine (LIBXSMM_FUNCTION6). The libxsmm_xmmcall
1296        !> routines can be used in FORTRAN77.
1297        SUBROUTINE libxsmm_wimmcall_prf(kernel, a, b, c, pa, pb, pc)
1298          TYPE(LIBXSMM_WIMMFUNCTION),  INTENT(IN) :: kernel
1299          INTEGER(C_SHORT), INTENT(IN),    TARGET ::  a(*), b(*)
1300          INTEGER(C_INT),   INTENT(INOUT), TARGET ::  c(*)
1301          INTEGER(C_SHORT), INTENT(IN),    TARGET :: pa(*)
1302          INTEGER(C_SHORT), INTENT(IN),    TARGET :: pb(*)
1303          INTEGER(C_SHORT), INTENT(IN),    TARGET :: pc(*)
1304          ! PROCEDURE(LIBXSMM_FUNCTION6), POINTER :: xmm
1305          ! CALL C_F_PROCPOINTER(kernel%handle, xmm)
1306          ! CALL xmm(...)
1307          CALL libxsmm_xmmcall_prf(kernel%handle,                       &
1308     &      C_LOC(a),  C_LOC(b),  C_LOC(c),                             &
1309     &      C_LOC(pa), C_LOC(pb), C_LOC(pc))
1310        END SUBROUTINE
1311
1312        !> See also libxsmm_wimmcall_abc and libxsmm_wimmcall_prf.
1313        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wimmcall
1314        SUBROUTINE libxsmm_wimmcall(kernel, a, b, c, pa, pb, pc)
1315          TYPE(LIBXSMM_WIMMFUNCTION),         INTENT(IN) :: kernel
1316          INTEGER(C_SHORT), INTENT(IN),           TARGET ::  a(*), b(*)
1317          INTEGER(C_INT), INTENT(INOUT),          TARGET ::  c(*)
1318          INTEGER(C_SHORT), INTENT(IN), OPTIONAL, TARGET :: pa(*)
1319          INTEGER(C_SHORT), INTENT(IN), OPTIONAL, TARGET :: pb(*)
1320          INTEGER(C_SHORT), INTENT(IN), OPTIONAL, TARGET :: pc(*)
1321          ! use .OR. instead of .AND. to avoid full check
1322          IF (PRESENT(pa).OR.PRESENT(pb).OR.PRESENT(pc)) THEN
1323            CALL libxsmm_xmmcall_prf(kernel%handle,                     &
1324     &        C_LOC(a),  C_LOC(b),  C_LOC(c),                           &
1325     &        C_LOC(pa), C_LOC(pb), C_LOC(pc))
1326          ELSE
1327            CALL libxsmm_xmmcall_abc(kernel%handle,                     &
1328     &        C_LOC(a), C_LOC(b), C_LOC(c))
1329          END IF
1330        END SUBROUTINE
1331
1332        !> Register user-defined key-value; value can be queried (libxsmm_xdispatch).
1333        !> Since the key-type is unknown to LIBXSMM, the key must be binary reproducible,
1334        !> i.e., if it is a structured type (padded data may be uninitialized), it must
1335        !> be initially zero-filled (libxsmm_xclear) followed by an element-wise setup.
1336        !> The size of the key is limited (see documentation). The given value is copied
1337        !> by LIBXSMM and may be initialized at registration-time or whenever queried.
1338        !> Registered data is released at program termination but can be also released
1339        !> if needed (libxsmm_xrelease), .e.g., for larger value for the same key.
1340        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xregister
1341        FUNCTION libxsmm_xregister(key, keysize, valsize, valinit)
1342          TYPE(C_PTR), INTENT(IN), VALUE :: key
1343          TYPE(C_PTR), INTENT(IN), VALUE, OPTIONAL :: valinit
1344          INTEGER(C_INT), INTENT(IN) :: keysize, valsize
1345          TYPE(C_PTR) :: libxsmm_xregister
1346          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_xregister
1347          INTERFACE
1348            SUBROUTINE internal_xregister(regval,                       &
1349     &      key, keysize, valsize, valinit)                             &
1350     &      BIND(C, NAME="libxsmm_xregister_")
1351              IMPORT C_PTR, C_INT
1352              TYPE(C_PTR), INTENT(OUT) :: regval
1353              TYPE(C_PTR), INTENT(IN), VALUE :: key, valinit
1354              INTEGER(C_INT), INTENT(IN) :: keysize, valsize
1355            END SUBROUTINE
1356          END INTERFACE
1357          CALL internal_xregister(libxsmm_xregister,                    &
1358     &      key, keysize, valsize, valinit)
1359        END FUNCTION
1360
1361        !> Query user-defined value from LIBXSMM's code registry.
1362        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xdispatch
1363        FUNCTION libxsmm_xdispatch(key, keysize)
1364          TYPE(C_PTR), INTENT(IN), VALUE :: key
1365          INTEGER(C_INT), INTENT(IN) :: keysize
1366          TYPE(C_PTR) :: libxsmm_xdispatch
1367          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_xdispatch
1368          INTERFACE
1369            SUBROUTINE internal_xdispatch(regval, key, keysize)         &
1370     &      BIND(C, NAME="libxsmm_xdispatch_")
1371              IMPORT C_PTR, C_INT
1372              TYPE(C_PTR), INTENT(OUT) :: regval
1373              TYPE(C_PTR), INTENT(IN), VALUE :: key
1374              INTEGER(C_INT), INTENT(IN) :: keysize
1375            END SUBROUTINE
1376          END INTERFACE
1377          CALL internal_xdispatch(libxsmm_xdispatch, key, keysize)
1378        END FUNCTION
1379
1380        !> Auto-dispatched general dense MM (double-precision).
1381        !> This overload belongs to libxsmm_(d)gemm.
1382        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm0
1383        PURE SUBROUTINE libxsmm_dgemm0(transa, transb, m, n, k,         &
1384     &  alpha, a, lda, b, ldb, beta, c, ldc)
1385          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1386          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1387          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1388          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1389          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1390          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1391          REAL(C_DOUBLE), INTENT(IN) :: a, b
1392          REAL(C_DOUBLE), INTENT(INOUT) :: c
1393          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm
1394          INTERFACE
1395            PURE SUBROUTINE internal_gemm(transa, transb, m, n, k,      &
1396     &      alpha, a, lda, b, ldb, beta, c, ldc)                        &
1397     &      BIND(C, NAME="libxsmm_dgemm_")
1398              IMPORT C_CHAR, C_DOUBLE, LIBXSMM_BLASINT_KIND
1399              CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
1400              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1401              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda
1402              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb
1403              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc
1404              REAL(C_DOUBLE), INTENT(IN) :: alpha, beta
1405              REAL(C_DOUBLE), INTENT(IN) :: a, b
1406              REAL(C_DOUBLE), INTENT(INOUT) :: c
1407            END SUBROUTINE
1408          END INTERFACE
1409          CALL internal_gemm(transa, transb, m, n, k,                   &
1410     &      alpha, a, lda, b, ldb, beta, c, ldc)
1411        END SUBROUTINE
1412
1413        !> Auto-dispatched general dense MM (double-precision).
1414        !> This overload belongs to libxsmm_(d)gemm.
1415        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm1
1416        PURE SUBROUTINE libxsmm_dgemm1(transa, transb, m, n, k,         &
1417     &  alpha, a, lda, b, ldb, beta, c, ldc)
1418          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1419          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1420          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1421          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1422          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1423          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1424          REAL(C_DOUBLE), INTENT(IN)    :: a(*), b(*)
1425          REAL(C_DOUBLE), INTENT(INOUT) :: c(*)
1426          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1427            CALL libxsmm_dgemm0(transa, transb, m, n, k,                &
1428     &        alpha, a(LBOUND(a,1)), lda,                               &
1429     &               b(LBOUND(b,1)), ldb,                               &
1430     &         beta, c(LBOUND(c,1)), ldc)
1431          END IF
1432        END SUBROUTINE
1433
1434        !> Auto-dispatched general dense MM (double-precision).
1435        !> This overload belongs to libxsmm_(d)gemm.
1436        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm2
1437        PURE SUBROUTINE libxsmm_dgemm2(transa, transb, m, n, k,         &
1438     &  a, b, c, alpha, beta)
1439          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1440          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1441          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1442          REAL(C_DOUBLE), INTENT(IN)    :: a(m,*), b(k,*)
1443          REAL(C_DOUBLE), INTENT(INOUT) :: c(m,*)
1444          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1445            CALL libxsmm_dgemm0(transa, transb, m, n, k,                &
1446     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), m,                     &
1447     &               b(LBOUND(b,1),LBOUND(b,2)), k,                     &
1448     &         beta, c(LBOUND(c,1),LBOUND(c,2)), m)
1449          END IF
1450        END SUBROUTINE
1451
1452        !> Auto-dispatched general dense MM (double-precision).
1453        !> This overload belongs to libxsmm_(d)gemm.
1454        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm3
1455        PURE SUBROUTINE libxsmm_dgemm3(transa, transb, m, n, k,         &
1456     &  alpha, a, lda, b, ldb, beta, c, ldc)
1457          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1458          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1459          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
1460          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1461          REAL(C_DOUBLE), INTENT(IN)    :: a(lda,*), b(ldb,*)
1462          REAL(C_DOUBLE), INTENT(INOUT) :: c(ldc,*)
1463          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1464            CALL libxsmm_dgemm0(transa, transb, m, n, k,                &
1465     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), lda,                   &
1466     &               b(LBOUND(b,1),LBOUND(b,2)), ldb,                   &
1467     &         beta, c(LBOUND(c,1),LBOUND(c,2)), ldc)
1468          END IF
1469        END SUBROUTINE
1470
1471        !> Auto-dispatched general dense MM (single-precision).
1472        !> This overload belongs to libxsmm_(s)gemm.
1473        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm0
1474        PURE SUBROUTINE libxsmm_sgemm0(transa, transb, m, n, k,         &
1475     &  alpha, a, lda, b, ldb, beta, c, ldc)
1476          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1477          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1478          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1479          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1480          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1481          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1482          REAL(C_FLOAT), INTENT(IN)    :: a, b
1483          REAL(C_FLOAT), INTENT(INOUT) :: c
1484          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm
1485          INTERFACE
1486            PURE SUBROUTINE internal_gemm(transa, transb, m, n, k,      &
1487     &      alpha, a, lda, b, ldb, beta, c, ldc)                        &
1488     &      BIND(C, NAME="libxsmm_sgemm_")
1489              IMPORT C_CHAR, C_FLOAT, LIBXSMM_BLASINT_KIND
1490              CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
1491              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1492              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda
1493              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb
1494              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc
1495              REAL(C_FLOAT), INTENT(IN) :: alpha, beta
1496              REAL(C_FLOAT), INTENT(IN) :: a, b
1497              REAL(C_FLOAT), INTENT(INOUT) :: c
1498            END SUBROUTINE
1499          END INTERFACE
1500          CALL internal_gemm(transa, transb, m, n, k,                   &
1501     &      alpha, a, lda, b, ldb, beta, c, ldc)
1502        END SUBROUTINE
1503
1504        !> Auto-dispatched general dense MM (single-precision).
1505        !> This overload belongs to libxsmm_(s)gemm.
1506        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm1
1507        PURE SUBROUTINE libxsmm_sgemm1(transa, transb, m, n, k,         &
1508     &  alpha, a, lda, b, ldb, beta, c, ldc)
1509          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1510          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1511          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1512          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1513          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1514          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1515          REAL(C_FLOAT), INTENT(IN)    :: a(*), b(*)
1516          REAL(C_FLOAT), INTENT(INOUT) :: c(*)
1517          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1518            CALL libxsmm_sgemm0(transa, transb, m, n, k,                &
1519     &        alpha, a(LBOUND(a,1)), lda,                               &
1520     &               b(LBOUND(b,1)), ldb,                               &
1521     &         beta, c(LBOUND(c,1)), ldc)
1522          END IF
1523        END SUBROUTINE
1524
1525        !> Auto-dispatched general dense MM (single-precision).
1526        !> This overload belongs to libxsmm_(s)gemm.
1527        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm2
1528        PURE SUBROUTINE libxsmm_sgemm2(transa, transb, m, n, k,         &
1529     &  a, b, c, alpha, beta)
1530          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1531          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1532          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1533          REAL(C_FLOAT), INTENT(IN)    :: a(m,*), b(k,*)
1534          REAL(C_FLOAT), INTENT(INOUT) :: c(m,*)
1535          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1536            CALL libxsmm_sgemm0(transa, transb, m, n, k,                &
1537     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), m,                     &
1538     &               b(LBOUND(b,1),LBOUND(b,2)), k,                     &
1539     &         beta, c(LBOUND(c,1),LBOUND(c,2)), m)
1540          END IF
1541        END SUBROUTINE
1542
1543        !> Auto-dispatched general dense MM (single-precision).
1544        !> This overload belongs to libxsmm_(s)gemm.
1545        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm3
1546        PURE SUBROUTINE libxsmm_sgemm3(transa, transb, m, n, k,         &
1547     &  alpha, a, lda, b, ldb, beta, c, ldc)
1548          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1549          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1550          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
1551          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1552          REAL(C_FLOAT), INTENT(IN)    :: a(lda,*), b(ldb,*)
1553          REAL(C_FLOAT), INTENT(INOUT) :: c(ldc,*)
1554          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1555            CALL libxsmm_sgemm0(transa, transb, m, n, k,                &
1556     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), lda,                   &
1557     &               b(LBOUND(b,1),LBOUND(b,2)), ldb,                   &
1558     &         beta, c(LBOUND(c,1),LBOUND(c,2)), ldc)
1559          END IF
1560        END SUBROUTINE
1561
1562        !> Auto-dispatched general dense MM (low-precision, int-accumulate).
1563        !> This overload belongs to libxsmm_(wi)gemm.
1564        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm0
1565        PURE SUBROUTINE libxsmm_wigemm0(transa, transb, m, n, k,        &
1566     &  alpha, a, lda, b, ldb, beta, c, ldc)
1567          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1568          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1569          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1570          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1571          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1572          INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta
1573          INTEGER(C_SHORT), INTENT(IN)  :: a, b
1574          INTEGER(C_INT), INTENT(INOUT) :: c
1575          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm
1576          INTERFACE
1577            PURE SUBROUTINE internal_gemm(transa, transb, m, n, k,      &
1578     &      alpha, a, lda, b, ldb, beta, c, ldc)                        &
1579     &      BIND(C, NAME="libxsmm_wigemm_")
1580              IMPORT C_CHAR, C_SHORT, C_INT, LIBXSMM_BLASINT_KIND
1581              CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
1582              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1583              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda
1584              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb
1585              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc
1586              INTEGER(C_INT),   INTENT(IN)    :: alpha, beta
1587              INTEGER(C_SHORT), INTENT(IN)    :: a, b
1588              INTEGER(C_INT),   INTENT(INOUT) :: c
1589            END SUBROUTINE
1590          END INTERFACE
1591          CALL internal_gemm(transa, transb, m, n, k,                   &
1592     &      alpha, a, lda, b, ldb, beta, c, ldc)
1593        END SUBROUTINE
1594
1595        !> Auto-dispatched general dense MM (low-precision, int-accumulate).
1596        !> This overload belongs to libxsmm_(wi)gemm.
1597        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm1
1598        PURE SUBROUTINE libxsmm_wigemm1(transa, transb, m, n, k,        &
1599     &  alpha, a, lda, b, ldb, beta, c, ldc)
1600          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1601          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1602          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1603          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1604          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1605          INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta
1606          INTEGER(C_SHORT), INTENT(IN)    :: a(*), b(*)
1607          INTEGER(C_INT),   INTENT(INOUT) :: c(*)
1608          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1609            CALL libxsmm_wigemm0(transa, transb, m, n, k,               &
1610     &        alpha, a(LBOUND(a,1)), lda,                               &
1611     &               b(LBOUND(b,1)), ldb,                               &
1612     &         beta, c(LBOUND(c,1)), ldc)
1613          END IF
1614        END SUBROUTINE
1615
1616        !> Auto-dispatched general dense MM (low-precision, int-accumulate).
1617        !> This overload belongs to libxsmm_(wi)gemm.
1618        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm2
1619        PURE SUBROUTINE libxsmm_wigemm2(transa, transb, m, n, k,        &
1620     &  a, b, c, alpha, beta)
1621          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1622          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1623          INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta
1624          INTEGER(C_SHORT), INTENT(IN)  :: a(m,*), b(k,*)
1625          INTEGER(C_INT), INTENT(INOUT) :: c(m,*)
1626          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1627            CALL libxsmm_wigemm0(transa, transb, m, n, k,               &
1628     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), m,                     &
1629     &               b(LBOUND(b,1),LBOUND(b,2)), k,                     &
1630     &         beta, c(LBOUND(c,1),LBOUND(c,2)), m)
1631          END IF
1632        END SUBROUTINE
1633
1634        !> Auto-dispatched general dense MM (low-precision, int-accumulate).
1635        !> This overload belongs to libxsmm_(wi)gemm.
1636        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm3
1637        PURE SUBROUTINE libxsmm_wigemm3(transa, transb, m, n, k,        &
1638     &  alpha, a, lda, b, ldb, beta, c, ldc)
1639          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1640          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1641          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
1642          INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta
1643          INTEGER(C_SHORT), INTENT(IN)  :: a(lda,*), b(ldb,*)
1644          INTEGER(C_INT), INTENT(INOUT) :: c(ldc,*)
1645          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1646            CALL libxsmm_wigemm0(transa, transb, m, n, k,               &
1647     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), lda,                   &
1648     &               b(LBOUND(b,1),LBOUND(b,2)), ldb,                   &
1649     &         beta, c(LBOUND(c,1),LBOUND(c,2)), ldc)
1650          END IF
1651        END SUBROUTINE
1652
1653        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1654        !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm.
1655        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm0
1656        PURE SUBROUTINE libxsmm_blas_dgemm0(transa, transb, m, n, k,    &
1657     &  alpha, a, lda, b, ldb, beta, c, ldc)
1658          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1659          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1660          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1661          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1662          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1663          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1664          REAL(C_DOUBLE), INTENT(IN)    :: a, b
1665          REAL(C_DOUBLE), INTENT(INOUT) :: c
1666          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm
1667          INTERFACE
1668            PURE SUBROUTINE internal_gemm(transa, transb, m, n, k,      &
1669     &      alpha, a, lda, b, ldb, beta, c, ldc)                        &
1670     &      BIND(C, NAME="libxsmm_blas_dgemm_")
1671              IMPORT C_CHAR, C_DOUBLE, LIBXSMM_BLASINT_KIND
1672              CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
1673              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1674              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda
1675              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb
1676              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc
1677              REAL(C_DOUBLE), INTENT(IN)    :: alpha, beta
1678              REAL(C_DOUBLE), INTENT(IN)    :: a, b
1679              REAL(C_DOUBLE), INTENT(INOUT) :: c
1680            END SUBROUTINE
1681          END INTERFACE
1682          CALL internal_gemm(transa, transb, m, n, k,                   &
1683     &      alpha, a, lda, b, ldb, beta, c, ldc)
1684        END SUBROUTINE
1685
1686        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1687        !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm.
1688        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm1
1689        PURE SUBROUTINE libxsmm_blas_dgemm1(transa, transb, m, n, k,    &
1690     &  alpha, a, lda, b, ldb, beta, c, ldc)
1691          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1692          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1693          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1694          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1695          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1696          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1697          REAL(C_DOUBLE), INTENT(IN)    :: a(*), b(*)
1698          REAL(C_DOUBLE), INTENT(INOUT) :: c(*)
1699          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1700            CALL libxsmm_blas_dgemm0(transa, transb, m, n, k,           &
1701     &        alpha, a(LBOUND(a,1)), lda,                               &
1702     &               b(LBOUND(b,1)), ldb,                               &
1703     &         beta, c(LBOUND(c,1)), ldc)
1704          END IF
1705        END SUBROUTINE
1706
1707        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1708        !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm.
1709        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm2
1710        PURE SUBROUTINE libxsmm_blas_dgemm2(transa, transb, m, n, k,    &
1711     &  a, b, c, alpha, beta)
1712          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1713          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1714          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1715          REAL(C_DOUBLE), INTENT(IN)    :: a(m,*), b(k,*)
1716          REAL(C_DOUBLE), INTENT(INOUT) :: c(m,*)
1717          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1718            CALL libxsmm_blas_dgemm0(transa, transb, m, n, k,           &
1719     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), m,                     &
1720     &               b(LBOUND(b,1),LBOUND(b,2)), k,                     &
1721     &         beta, c(LBOUND(c,1),LBOUND(c,2)), m)
1722          END IF
1723        END SUBROUTINE
1724
1725        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1726        !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm.
1727        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm3
1728        PURE SUBROUTINE libxsmm_blas_dgemm3(transa, transb, m, n, k,    &
1729     &  alpha, a, lda, b, ldb, beta, c, ldc)
1730          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1731          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1732          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
1733          REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta
1734          REAL(C_DOUBLE), INTENT(IN)    :: a(lda,*), b(ldb,*)
1735          REAL(C_DOUBLE), INTENT(INOUT) :: c(ldc,*)
1736          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1737            CALL libxsmm_blas_dgemm0(transa, transb, m, n, k,           &
1738     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), lda,                   &
1739     &               b(LBOUND(b,1),LBOUND(b,2)), ldb,                   &
1740     &         beta, c(LBOUND(c,1),LBOUND(c,2)), ldc)
1741          END IF
1742        END SUBROUTINE
1743
1744        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1745        !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm.
1746        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm0
1747        PURE SUBROUTINE libxsmm_blas_sgemm0(transa, transb, m, n, k,    &
1748     &  alpha, a, lda, b, ldb, beta, c, ldc)
1749          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1750          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1751          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1752          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1753          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1754          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1755          REAL(C_FLOAT), INTENT(IN)    :: a, b
1756          REAL(C_FLOAT), INTENT(INOUT) :: c
1757          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm
1758          INTERFACE
1759            PURE SUBROUTINE internal_gemm(transa, transb, m, n, k,      &
1760     &      alpha, a, lda, b, ldb, beta, c, ldc)                        &
1761     &      BIND(C, NAME="libxsmm_blas_sgemm_")
1762              IMPORT C_CHAR, C_FLOAT, LIBXSMM_BLASINT_KIND
1763              CHARACTER(C_CHAR), INTENT(IN) :: transa, transb
1764              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1765              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda
1766              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb
1767              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc
1768              REAL(C_FLOAT), INTENT(IN)    :: alpha, beta
1769              REAL(C_FLOAT), INTENT(IN)    :: a, b
1770              REAL(C_FLOAT), INTENT(INOUT) :: c
1771            END SUBROUTINE
1772          END INTERFACE
1773          CALL internal_gemm(transa, transb, m, n, k,                   &
1774     &      alpha, a, lda, b, ldb, beta, c, ldc)
1775        END SUBROUTINE
1776
1777        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1778        !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm.
1779        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm1
1780        PURE SUBROUTINE libxsmm_blas_sgemm1(transa, transb, m, n, k,    &
1781     &  alpha, a, lda, b, ldb, beta, c, ldc)
1782          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1783          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1784          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda
1785          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb
1786          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc
1787          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1788          REAL(C_FLOAT), INTENT(IN)    :: a(*), b(*)
1789          REAL(C_FLOAT), INTENT(INOUT) :: c(*)
1790          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1791            CALL libxsmm_blas_sgemm0(transa, transb, m, n, k,           &
1792     &        alpha, a(LBOUND(a,1)), lda,                               &
1793     &               b(LBOUND(b,1)), ldb,                               &
1794     &         beta, c(LBOUND(c,1)), ldc)
1795          END IF
1796        END SUBROUTINE
1797
1798        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1799        !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm.
1800        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm2
1801        PURE SUBROUTINE libxsmm_blas_sgemm2(transa, transb, m, n, k,    &
1802     &  a, b, c, alpha, beta)
1803          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1804          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1805          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1806          REAL(C_FLOAT), INTENT(IN)    :: a(m,*), b(k,*)
1807          REAL(C_FLOAT), INTENT(INOUT) :: c(m,*)
1808          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1809            CALL libxsmm_blas_sgemm0(transa, transb, m, n, k,           &
1810     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), m,                     &
1811     &               b(LBOUND(b,1),LBOUND(b,2)), k,                     &
1812     &         beta, c(LBOUND(c,1),LBOUND(c,2)), m)
1813          END IF
1814        END SUBROUTINE
1815
1816        !> Re-exposes BLAS based GEMM routine with an interfaces similar to
1817        !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm.
1818        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm3
1819        PURE SUBROUTINE libxsmm_blas_sgemm3(transa, transb, m, n, k,    &
1820     &  alpha, a, lda, b, ldb, beta, c, ldc)
1821          CHARACTER, INTENT(IN), OPTIONAL :: transa, transb
1822          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k
1823          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc
1824          REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta
1825          REAL(C_FLOAT), INTENT(IN)    :: a(lda,*), b(ldb,*)
1826          REAL(C_FLOAT), INTENT(INOUT) :: c(ldc,*)
1827          IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN
1828            CALL libxsmm_blas_sgemm0(transa, transb, m, n, k,           &
1829     &        alpha, a(LBOUND(a,1),LBOUND(a,2)), lda,                   &
1830     &               b(LBOUND(b,1),LBOUND(b,2)), ldb,                   &
1831     &         beta, c(LBOUND(c,1),LBOUND(c,2)), ldc)
1832          END IF
1833        END SUBROUTINE
1834
1835        !> Matrix-copy (2-dimensional copy) routine. If the input (optional)
1836        !> is not present, the routine is used to zero-fill the out-matrix.
1837        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_p0
1838        PURE SUBROUTINE libxsmm_matcopy_p0(output, input, typesize,     &
1839     &  m, n, ldi, ldo)
1840          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1841          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN),                    &
1842     &                                OPTIONAL :: n, ldi, ldo
1843          INTEGER(C_INT), INTENT(IN) :: typesize
1844          TYPE(C_PTR), INTENT(IN), OPTIONAL :: input
1845          TYPE(C_PTR), INTENT(IN) :: output
1846          CALL libxsmm_xmatcopy(output, input, typesize,                &
1847     &      m, n, ldi, ldo)
1848        END SUBROUTINE
1849
1850        !> Matrix-copy (2-dimensional copy) routine (DP/rank-1).
1851        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_d1
1852        SUBROUTINE libxsmm_matcopy_d1(output, input, m, n, ldi, ldo)
1853          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1854          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n
1855          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi
1856          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo
1857          REAL(C_DOUBLE), INTENT(OUT),          TARGET :: output(*)
1858          REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET ::  input(*)
1859          CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 8,         &
1860     &      m, n, ldi, ldo)
1861        END SUBROUTINE
1862
1863        !> Matrix-copy (2-dimensional copy) routine (DP/rank-2).
1864        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_d2
1865        SUBROUTINE libxsmm_matcopy_d2(output, input, m, n, ldi, ldo)
1866          INTEGER(LIBXSMM_BLASINT_KIND),    INTENT(IN) :: m, n, ldi, ldo
1867          REAL(C_DOUBLE), INTENT(OUT),          TARGET :: output(ldo,*)
1868          REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET ::  input(ldi,*)
1869          CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 8,         &
1870     &      m, n, ldi, ldo)
1871        END SUBROUTINE
1872
1873        !> Matrix-copy (2-dimensional copy) routine (SP/rank-1).
1874        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_s1
1875        SUBROUTINE libxsmm_matcopy_s1(output, input, m, n, ldi, ldo)
1876          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1877          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n
1878          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi
1879          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo
1880          REAL(C_FLOAT),  INTENT(OUT),          TARGET :: output(*)
1881          REAL(C_FLOAT),  INTENT(IN), OPTIONAL, TARGET ::  input(*)
1882          CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 4,         &
1883     &      m, n, ldi, ldo)
1884        END SUBROUTINE
1885
1886        !> Matrix-copy (2-dimensional copy) routine (SP/rank-2).
1887        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_s2
1888        SUBROUTINE libxsmm_matcopy_s2(output, input, m, n, ldi, ldo)
1889          INTEGER(LIBXSMM_BLASINT_KIND),    INTENT(IN) :: m, n, ldi, ldo
1890          REAL(C_FLOAT),  INTENT(OUT),          TARGET :: output(ldo,*)
1891          REAL(C_FLOAT),  INTENT(IN), OPTIONAL, TARGET ::  input(ldi,*)
1892          CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 4,         &
1893     &      m, n, ldi, ldo)
1894        END SUBROUTINE
1895
1896        !> Transpose a matrix (in-place form).
1897        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_p0
1898        PURE SUBROUTINE libxsmm_itrans_p0(matrix, typesize, m, n, ld)
1899          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1900          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n, ld
1901          TYPE(C_PTR),    INTENT(IN) :: matrix
1902          INTEGER(C_INT), INTENT(IN) :: typesize
1903          CALL libxsmm_xitrans(matrix, typesize, m, n, ld)
1904        END SUBROUTINE
1905
1906        !> Transpose a matrix (in-place form, DP/rank-1).
1907        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_d1
1908        SUBROUTINE libxsmm_itrans_d1(matrix, m, n, ld)
1909          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1910          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n, ld
1911          REAL(C_DOUBLE), INTENT(INOUT), TARGET :: matrix(*)
1912          CALL libxsmm_xitrans(C_LOC(matrix), 8, m, n, ld)
1913        END SUBROUTINE
1914
1915        !> Transpose a matrix (in-place form, DP/rank-2).
1916        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_d2
1917        SUBROUTINE libxsmm_itrans_d2(matrix, m, n, ld)
1918          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ld
1919          REAL(C_DOUBLE), INTENT(INOUT), TARGET :: matrix(ld,*)
1920          CALL libxsmm_xitrans(C_LOC(matrix), 8, m, n, ld)
1921        END SUBROUTINE
1922
1923        !> Transpose a matrix (in-place form, SP/rank-1).
1924        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_s1
1925        SUBROUTINE libxsmm_itrans_s1(matrix, m, n, ld)
1926          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1927          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n, ld
1928          REAL(C_FLOAT), INTENT(INOUT), TARGET :: matrix(*)
1929          CALL libxsmm_xitrans(C_LOC(matrix), 4, m, n, ld)
1930        END SUBROUTINE
1931
1932        !> Transpose a matrix (in-place form, SP/rank-2).
1933        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_s2
1934        SUBROUTINE libxsmm_itrans_s2(matrix, m, n, ld)
1935          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ld
1936          REAL(C_FLOAT), INTENT(INOUT), TARGET :: matrix(ld,*)
1937          CALL libxsmm_xitrans(C_LOC(matrix), 4, m, n, ld)
1938        END SUBROUTINE
1939
1940        !> Transpose a matrix (out-of-place form).
1941        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_p0
1942        PURE SUBROUTINE libxsmm_otrans_p0(output, input, typesize,      &
1943     &  m, n, ldi, ldo)
1944          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1945          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n
1946          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi
1947          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo
1948          TYPE(C_PTR),    INTENT(IN) :: output, input
1949          INTEGER(C_INT), INTENT(IN) :: typesize
1950          CALL libxsmm_xotrans(output, input, typesize, m, n, ldi, ldo)
1951        END SUBROUTINE
1952
1953        !> Transpose a matrix (out-of-place form, DP/rank-1).
1954        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_d1
1955        SUBROUTINE libxsmm_otrans_d1(output, input, m, n, ldi, ldo)
1956          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1957          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n
1958          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi
1959          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo
1960          REAL(C_DOUBLE), INTENT(OUT), TARGET :: output(*)
1961          REAL(C_DOUBLE), INTENT(IN),  TARGET ::  input(*)
1962          CALL libxsmm_xotrans(C_LOC(output), C_LOC(input),             &
1963     &      8, m, n, ldi, ldo)
1964        END SUBROUTINE
1965
1966        !> Transpose a matrix (out-of-place form, DP/rank-2).
1967        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_d2
1968        SUBROUTINE libxsmm_otrans_d2(output, input, m, n, ldi, ldo)
1969          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo
1970          REAL(C_DOUBLE), INTENT(OUT), TARGET :: output(ldo,*)
1971          REAL(C_DOUBLE), INTENT(IN),  TARGET ::  input(ldi,*)
1972          CALL libxsmm_xotrans(C_LOC(output), C_LOC(input),             &
1973     &      8, m, n, ldi, ldo)
1974        END SUBROUTINE
1975
1976        !> Transpose a matrix (out-of-place form, SP/rank-1).
1977        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_s1
1978        SUBROUTINE libxsmm_otrans_s1(output, input, m, n, ldi, ldo)
1979          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
1980          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n
1981          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi
1982          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo
1983          REAL(C_FLOAT), INTENT(OUT), TARGET :: output(*)
1984          REAL(C_FLOAT), INTENT(IN),  TARGET ::  input(*)
1985          CALL libxsmm_xotrans(C_LOC(output), C_LOC(input),             &
1986     &      4, m, n, ldi, ldo)
1987        END SUBROUTINE
1988
1989        !> Transpose a matrix (out-of-place form, SP/rank-2).
1990        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_s2
1991        SUBROUTINE libxsmm_otrans_s2(output, input, m, n, ldi, ldo)
1992          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo
1993          REAL(C_FLOAT), INTENT(OUT), TARGET :: output(ldo,*)
1994          REAL(C_FLOAT), INTENT(IN),  TARGET ::  input(ldi,*)
1995          CALL libxsmm_xotrans(C_LOC(output), C_LOC(input),             &
1996     &      4, m, n, ldi, ldo)
1997        END SUBROUTINE
1998
1999        !> Returns the difference between two timer ticks (cycles).
2000        !> Implicit FORTRAN 77 interface: subroutine available.
2001        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_timer_ncycles
2002        PURE FUNCTION libxsmm_timer_ncycles(tick0, tick1)
2003          INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN) :: tick0, tick1
2004          INTEGER(LIBXSMM_TICKINT_KIND) :: libxsmm_timer_ncycles
2005          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_timer_ncycles
2006          INTERFACE
2007            PURE SUBROUTINE internal_timer_ncycles(ncycles,             &
2008     &      tick0, tick1) BIND(C, NAME="libxsmm_timer_ncycles_")
2009              IMPORT LIBXSMM_TICKINT_KIND
2010              INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN)  :: tick0, tick1
2011              INTEGER(LIBXSMM_TICKINT_KIND), INTENT(OUT) :: ncycles
2012            END SUBROUTINE
2013          END INTERFACE
2014          CALL internal_timer_ncycles(                                  &
2015     &      libxsmm_timer_ncycles, tick0, tick1)
2016          END FUNCTION
2017
2018        !> Utility function to calculate a collection of scalar differences
2019        !> between two matrices (libxsmm_matdiff_info). The location (m, n)
2020        !> of the largest difference (linf_abs) is recorded (also if NaN).
2021        !> In case of NaN, differences are set to infinity. If no difference
2022        !> is discovered, the location (m, n) is negative (OOB).
2023        !> Implicit FORTRAN 77 interface:
2024        !> TYPE         :: info
2025        !> INTEGER(4)   :: datatype
2026        !> INTEGER(4|8) :: m, n, ldref, ldtst
2027        !> ARRAY        :: ref, tst
2028        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matdiff
2029        PURE SUBROUTINE libxsmm_matdiff(info, datatype, m, n,           &
2030     &  ref, tst, ldref, ldtst)
2031          INTEGER(C_INT),                INTENT(IN) :: datatype
2032          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m
2033          INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN),                    &
2034     &                                     OPTIONAL :: n, ldref, ldtst
2035          TYPE(C_PTR), INTENT(IN),         OPTIONAL :: ref, tst
2036          TYPE(LIBXSMM_MATDIFF_INFO),   INTENT(OUT) :: info
2037          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_matdiff
2038          INTERFACE
2039            PURE SUBROUTINE internal_matdiff(info, datatype, m, n,      &
2040     &      ref, tst, ldref, ldtst) BIND(C, NAME="libxsmm_matdiff_")
2041              IMPORT LIBXSMM_MATDIFF_INFO, LIBXSMM_BLASINT_KIND
2042              IMPORT C_PTR, C_INT
2043              INTEGER(C_INT), INTENT(IN)                :: datatype
2044              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n
2045              INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldref, ldtst
2046              TYPE(C_PTR), INTENT(IN), VALUE            :: ref, tst
2047              TYPE(LIBXSMM_MATDIFF_INFO),   INTENT(OUT) :: info
2048            END SUBROUTINE
2049          END INTERFACE
2050          CALL internal_matdiff(info, datatype, m, n,                   &
2051     &      ref, tst, ldref, ldtst)
2052        END SUBROUTINE
2053
2054        !> Calculate co-prime number <= n/2 (except: libxsmm_shuffle(0|1) == 0).
2055        !> Implicit FORTRAN 77 interface:
2056        !> INTEGER(4) :: coprime (OUT)
2057        !> INTEGER(4) :: n
2058        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_shuffle
2059        ELEMENTAL FUNCTION libxsmm_shuffle(n)
2060          INTEGER(C_LONG_LONG) :: libxsmm_shuffle
2061          INTEGER(C_INT), INTENT(IN) :: n
2062          !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_shuffle
2063          INTERFACE
2064            PURE SUBROUTINE internal_shuffle(coprime, n)                &
2065     &      BIND(C, NAME="libxsmm_shuffle_")
2066              IMPORT C_LONG_LONG, C_INT
2067              INTEGER(C_LONG_LONG), INTENT(OUT) :: coprime
2068              INTEGER(C_INT), INTENT(IN) :: n
2069            END SUBROUTINE
2070          END INTERFACE
2071          libxsmm_shuffle = INT(0, KIND=C_LONG_LONG) ! avoid warning (older CRAY)
2072          CALL internal_shuffle(libxsmm_shuffle, n)
2073        END FUNCTION
2074
2075        !> Calculates a hash value for the given array and seed.
2076        !> FORTRAN 77: see libxsmm_xhash
2077        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_char
2078        FUNCTION libxsmm_hash_char(key, seed)
2079          CHARACTER(C_CHAR), INTENT(IN) :: key(:)
2080          INTEGER(C_INT), INTENT(IN) :: seed
2081          INTEGER(C_INT) :: libxsmm_hash_char
2082          libxsmm_hash_char = seed
2083          CALL libxsmm_xhash(libxsmm_hash_char,                         &
2084     &      libxsmm_ptr(key), SIZE(key))
2085        END FUNCTION
2086
2087        !> Calculates a hash value for the given array and seed.
2088        !> FORTRAN 77: see libxsmm_xhash
2089        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_i8
2090        FUNCTION libxsmm_hash_i8(key, seed)
2091          INTEGER(C_INT8_T), INTENT(IN) :: key(:)
2092          INTEGER(C_INT), INTENT(IN) :: seed
2093          INTEGER(C_INT) :: libxsmm_hash_i8
2094          libxsmm_hash_i8 = seed
2095          CALL libxsmm_xhash(libxsmm_hash_i8,                           &
2096     &      libxsmm_ptr(key), SIZE(key))
2097        END FUNCTION
2098
2099        !> Calculates a hash value for the given array and seed.
2100        !> FORTRAN 77: see libxsmm_xhash
2101        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_i32
2102        FUNCTION libxsmm_hash_i32(key, seed)
2103          INTEGER(C_INT), INTENT(IN) :: key(:)
2104          INTEGER(C_INT), INTENT(IN) :: seed
2105          INTEGER(C_INT) :: libxsmm_hash_i32
2106          libxsmm_hash_i32 = seed
2107          CALL libxsmm_xhash(libxsmm_hash_i32,                          &
2108     &      libxsmm_ptr(key), SIZE(key) * 4)
2109        END FUNCTION
2110
2111        !> Calculates a hash value for the given array and seed.
2112        !> FORTRAN 77: see libxsmm_xhash
2113        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_i64
2114        FUNCTION libxsmm_hash_i64(key, seed)
2115          INTEGER(C_LONG_LONG), INTENT(IN) :: key(:)
2116          INTEGER(C_INT), INTENT(IN) :: seed
2117          INTEGER(C_INT) :: libxsmm_hash_i64
2118          libxsmm_hash_i64 = seed
2119          CALL libxsmm_xhash(libxsmm_hash_i64,                          &
2120     &      libxsmm_ptr(key), SIZE(key) * 8)
2121        END FUNCTION
2122
2123        !> Calculates if there is a difference between two arrays.
2124        !> FORTRAN 77: see libxsmm_xdiff
2125        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_char
2126        FUNCTION libxsmm_diff_char(a, b)
2127          CHARACTER(C_CHAR), INTENT(IN) :: a(:), b(:)
2128          LOGICAL(C_BOOL) :: libxsmm_diff_char
2129          IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) &
2130     &    THEN
2131            CALL libxsmm_xdiff(libxsmm_diff_char,                       &
2132     &        libxsmm_ptr(a), libxsmm_ptr(b),                           &
2133     &        SIZE(a, KIND=C_LONG_LONG))
2134          ELSE
2135            libxsmm_diff_char = LOGICAL(.TRUE., KIND=C_BOOL)
2136          END IF
2137        END FUNCTION
2138
2139        !> Calculates if there is a difference between two arrays.
2140        !> FORTRAN 77: see libxsmm_xdiff
2141        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_i8
2142        FUNCTION libxsmm_diff_i8(a, b)
2143          INTEGER(C_INT8_T), INTENT(IN) :: a(:), b(:)
2144          LOGICAL(C_BOOL) :: libxsmm_diff_i8
2145          IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) &
2146     &    THEN
2147            CALL libxsmm_xdiff(libxsmm_diff_i8,                         &
2148     &        libxsmm_ptr(a), libxsmm_ptr(b),                           &
2149     &        SIZE(a, KIND=C_LONG_LONG))
2150          ELSE
2151            libxsmm_diff_i8 = LOGICAL(.TRUE., KIND=C_BOOL)
2152          END IF
2153        END FUNCTION
2154
2155        !> Calculates if there is a difference between two arrays.
2156        !> FORTRAN 77: see libxsmm_xdiff
2157        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_i32
2158        FUNCTION libxsmm_diff_i32(a, b)
2159          INTEGER(C_INT), INTENT(IN) :: a(:), b(:)
2160          LOGICAL(C_BOOL) :: libxsmm_diff_i32
2161          IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) &
2162     &    THEN
2163            CALL libxsmm_xdiff(libxsmm_diff_i32,                        &
2164     &        libxsmm_ptr(a), libxsmm_ptr(b),                           &
2165     &        SIZE(a, KIND=C_LONG_LONG) * INT(4, KIND=C_LONG_LONG))
2166          ELSE
2167            libxsmm_diff_i32 = LOGICAL(.TRUE., KIND=C_BOOL)
2168          END IF
2169        END FUNCTION
2170
2171        !> Calculates if there is a difference between two arrays.
2172        !> FORTRAN 77: see libxsmm_xdiff
2173        !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_i64
2174        FUNCTION libxsmm_diff_i64(a, b)
2175          INTEGER(C_LONG_LONG), INTENT(IN) :: a(:), b(:)
2176          LOGICAL(C_BOOL) :: libxsmm_diff_i64
2177          IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) &
2178     &    THEN
2179            CALL libxsmm_xdiff(libxsmm_diff_i64,                        &
2180     &        libxsmm_ptr(a), libxsmm_ptr(b),                           &
2181     &        SIZE(a, KIND=C_LONG_LONG) * INT(8, KIND=C_LONG_LONG))
2182          ELSE
2183            libxsmm_diff_i64 = LOGICAL(.TRUE., KIND=C_BOOL)
2184          END IF
2185        END FUNCTION
2186      END MODULE
2187
2188