1!=======================================================================! 2! Copyright (c) Intel Corporation - All rights reserved. ! 3! This file is part of the LIBXSMM library. ! 4! ! 5! For information on the license, see the LICENSE file. ! 6! Further information: https://github.com/hfp/libxsmm/ ! 7! SPDX-License-Identifier: BSD-3-Clause ! 8!=======================================================================! 9! Hans Pabst (Intel Corp.) 10!=======================================================================! 11 12 MODULE LIBXSMM 13 USE, INTRINSIC :: ISO_C_BINDING, ONLY: & 14 & C_DOUBLE, C_FLOAT, C_DOUBLE_COMPLEX, C_FLOAT_COMPLEX, & 15 & C_LONG_LONG, C_INT, C_SHORT, C_CHAR, C_INT8_T, C_BOOL, & 16 & C_F_POINTER, C_ASSOCIATED, C_LOC, C_PTR, & 17 & C_FUNPTR, C_NULL_FUNPTR, C_NULL_PTR 18 IMPLICIT NONE 19 20 !> Name of the version (stringized set of version numbers). 21 CHARACTER(*), PARAMETER :: LIBXSMM_VERSION = "$VERSION" 22 !> Name of the branch of which the version is derived from. 23 CHARACTER(*), PARAMETER :: LIBXSMM_BRANCH = "$BRANCH" 24 !> Major version based on the last reachable tag under RCS. 25 INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_MAJOR = $MAJOR 26 !> Minor version based on the last reachable tag of the RCS. 27 INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_MINOR = $MINOR 28 !> Update number based on the last reachable tag under RCS. 29 INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_UPDATE = $UPDATE 30 !> Patch number counting commits since the last version stamp. 31 INTEGER(C_INT), PARAMETER :: LIBXSMM_VERSION_PATCH = $PATCH 32 33 !> Parameters the library and static kernels were built for. 34 INTEGER(C_INT), PARAMETER :: LIBXSMM_CACHELINE = $CACHELINE 35 INTEGER(C_INT), PARAMETER :: LIBXSMM_ALIGNMENT = $CACHELINE 36 INTEGER(C_INT), PARAMETER :: LIBXSMM_PREFETCH = $PREFETCH 37 INTEGER(C_INT), PARAMETER :: LIBXSMM_MAX_MNK = $MAX_MNK 38 INTEGER(C_INT), PARAMETER :: LIBXSMM_MAX_DIM = $MAX_DIM 39 INTEGER(C_INT), PARAMETER :: LIBXSMM_FLAGS = $FLAGS 40 INTEGER(C_INT), PARAMETER :: LIBXSMM_ILP64 = $ILP64 41 42 !> Parameters supplied for backward compatibility (deprecated). 43 INTEGER(C_INT), PARAMETER :: LIBXSMM_COL_MAJOR = 1 44 INTEGER(C_INT), PARAMETER :: LIBXSMM_ROW_MAJOR = 0 45 46 !> LIBXSMM_BLASINT_KIND impacts BLAS interface (LP64: 32-bit, ILP64: 64-bit). 47 INTEGER(C_INT), PARAMETER :: LIBXSMM_BLASINT_KIND = $BLASINT_KIND 48 !> Integer kind used by timer interface. 49 INTEGER(C_INT), PARAMETER :: LIBXSMM_TICKINT_KIND = C_LONG_LONG 50 51 !> Parameters representing the GEMM performed by the simplified interface. 52 REAL(C_DOUBLE), PARAMETER :: LIBXSMM_ALPHA = REAL($ALPHA, C_DOUBLE) 53 REAL(C_DOUBLE), PARAMETER :: LIBXSMM_BETA = REAL($BETA, C_DOUBLE) 54 55 !> Flag enumeration which can be IORed. 56 INTEGER(C_INT), PARAMETER :: & 57 & LIBXSMM_GEMM_FLAG_NONE = 0, & 58 & LIBXSMM_GEMM_FLAG_TRANS_A = 1, & 59 & LIBXSMM_GEMM_FLAG_TRANS_B = 2, & 60 & LIBXSMM_GEMM_FLAG_TRANS_AB = IOR( & 61 & LIBXSMM_GEMM_FLAG_TRANS_A, LIBXSMM_GEMM_FLAG_TRANS_B), & 62 & LIBXSMM_GEMM_FLAG_BETA_0 = 16 63 64 !> Flag enumeration which can be IORed. 65 INTEGER(C_INT), PARAMETER :: & 66 ! Handle recorded batch unsynchronized-parallel. 67 & LIBXSMM_MMBATCH_FLAG_DEFAULT = 0, & 68 ! Synchronize among C matrices. 69 & LIBXSMM_MMBATCH_FLAG_SYNCHRONIZED = 512, & 70 ! Handle recorded batch sequentially. 71 & LIBXSMM_MMBATCH_FLAG_SEQUENTIAL = 1024, & 72 ! Only record a statistic of potential SMMs. 73 & LIBXSMM_MMBATCH_FLAG_STATISTIC = 2048 74 75 !> Enumerates element/data types. 76 INTEGER(C_INT), PARAMETER :: & 77 & LIBXSMM_DATATYPE_F64 = 0, & 78 & LIBXSMM_DATATYPE_F32 = 1, & 79 & LIBXSMM_DATATYPE_BF16 = 2, & 80 & LIBXSMM_DATATYPE_I64 = 3, & 81 & LIBXSMM_DATATYPE_I32 = 4, & 82 & LIBXSMM_DATATYPE_I16 = 5, & 83 & LIBXSMM_DATATYPE_I8 = 6, & 84 & LIBXSMM_DATATYPE_UNSUPPORTED = 7 85 86 !> Denotes the precision/data type of GEMM (for weak-typed 87 !> interface functions such as libxsmm_xmmdispatch). 88 INTEGER(C_INT), PARAMETER :: & 89 & LIBXSMM_GEMM_PRECISION_F64 = LIBXSMM_DATATYPE_F64, & 90 & LIBXSMM_GEMM_PRECISION_F32 = LIBXSMM_DATATYPE_F32, & 91 & LIBXSMM_GEMM_PRECISION_BF16 = LIBXSMM_DATATYPE_BF16, & 92 & LIBXSMM_GEMM_PRECISION_I32 = LIBXSMM_DATATYPE_I32, & 93 & LIBXSMM_GEMM_PRECISION_I16 = LIBXSMM_DATATYPE_I16, & 94 & LIBXSMM_GEMM_PRECISION_I8 = LIBXSMM_DATATYPE_I8 95 96 !> Enumeration of the available prefetch strategies which can be IORed. 97 INTEGER(C_INT), PARAMETER :: & 98 ! Automatically select strategy (frontend). 99 & LIBXSMM_PREFETCH_AUTO = -1, & 100 ! No prefetching and no prefetch function signature. 101 & LIBXSMM_PREFETCH_NONE = 0, & 102 ! Only function prefetch signature. 103 & LIBXSMM_PREFETCH_SIGONLY = 1, & 104 ! Prefetch PA using accesses to A. 105 & LIBXSMM_GEMM_PREFETCH_AL2 = 2, & 106 ! Prefetch PB using accesses to C. 107 & LIBXSMM_GEMM_PREFETCH_BL2_VIA_C = 4, & 108 ! Prefetch A ahead. 109 & LIBXSMM_GEMM_PREFETCH_AL2_AHEAD = 8, & 110 ! Composed prefetch strategies. 111 & LIBXSMM_GEMM_PREFETCH_AL2BL2_VIA_C = IOR( & 112 & LIBXSMM_GEMM_PREFETCH_BL2_VIA_C, & 113 & LIBXSMM_GEMM_PREFETCH_AL2), & 114 & LIBXSMM_GEMM_PREFETCH_AL2BL2_VIA_C_AHEAD = IOR( & 115 & LIBXSMM_GEMM_PREFETCH_BL2_VIA_C, & 116 & LIBXSMM_GEMM_PREFETCH_AL2_AHEAD), & 117 ! Current B into L1. 118 & LIBXSMM_GEMM_PREFETCH_BL1 = 16 119 120 !> Enumerates the available target architectures and instruction 121 !> set extensions as returned by libxsmm_get_target_archid(). 122 INTEGER(C_INT), PARAMETER :: & 123 & LIBXSMM_TARGET_ARCH_UNKNOWN = 0, & 124 & LIBXSMM_TARGET_ARCH_GENERIC = 1, & 125 & LIBXSMM_X86_GENERIC = 1002, & 126 & LIBXSMM_X86_SSE3 = 1003, & 127 & LIBXSMM_X86_SSE4 = 1004, & 128 & LIBXSMM_X86_AVX = 1005, & 129 & LIBXSMM_X86_AVX2 = 1006, & 130 & LIBXSMM_X86_AVX512 = 1007, & 131 & LIBXSMM_X86_AVX512_MIC = 1010, & 132 & LIBXSMM_X86_AVX512_KNM = 1011, & 133 & LIBXSMM_X86_AVX512_CORE = 1020, & 134 & LIBXSMM_X86_AVX512_CLX = 1021, & 135 & LIBXSMM_X86_AVX512_CPX = 1022 136 137 !> Generic function type (double-precision). 138 TYPE, BIND(C) :: LIBXSMM_DMMFUNCTION 139 TYPE(C_FUNPTR) :: handle = C_NULL_FUNPTR 140 END TYPE 141 142 !> Generic function type (single-precision). 143 TYPE, BIND(C) :: LIBXSMM_SMMFUNCTION 144 TYPE(C_FUNPTR) :: handle = C_NULL_FUNPTR 145 END TYPE 146 147 !> Generic function type (low-precision) 148 TYPE, BIND(C) :: LIBXSMM_WIMMFUNCTION 149 TYPE(C_FUNPTR) :: handle = C_NULL_FUNPTR 150 END TYPE 151 152 !> Generic function types with certain arity. 153 ABSTRACT INTERFACE 154 PURE SUBROUTINE LIBXSMM_FUNCTION3(a, b, c) BIND(C) 155 IMPORT :: C_PTR 156 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c 157 END SUBROUTINE 158 159 PURE SUBROUTINE LIBXSMM_FUNCTION6(a, b, c, pa, pb, pc) BIND(C) 160 IMPORT :: C_PTR 161 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c 162 TYPE(C_PTR), INTENT(IN), VALUE :: pa, pb, pc 163 END SUBROUTINE 164 END INTERFACE 165 166 !> Structure of differences with matrix norms according 167 !> to http://www.netlib.org/lapack/lug/node75.html). 168 TYPE, BIND(C) :: LIBXSMM_MATDIFF_INFO 169 REAL(C_DOUBLE) norm1_abs, norm1_rel !! One-norm 170 REAL(C_DOUBLE) normi_abs, normi_rel !! Infinity-norm 171 REAL(C_DOUBLE) normf_rel !! Froebenius-norm 172 !> Maximum difference, and L2-norm (both absolute and relative). 173 REAL(C_DOUBLE) linf_abs, linf_rel, l2_abs, l2_rel 174 !> Statistics: sum/l1, min., max., arith. avg., and variance. 175 REAL(C_DOUBLE) l1_ref, min_ref, max_ref, avg_ref, var_ref 176 !> Statistics: sum/l1, min., max., arith. avg., and variance. 177 REAL(C_DOUBLE) l1_tst, min_tst, max_tst, avg_tst, var_tst 178 !> Location (m, n) of largest difference (linf_abs). 179 INTEGER(LIBXSMM_BLASINT_KIND) m 180 INTEGER(LIBXSMM_BLASINT_KIND) n 181 END TYPE 182 183 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_init, libxsmm_finalize 184 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_gemm_auto_prefetch 185 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_gemm_auto_prefetch 186 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_target_archid 187 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_target_archid 188 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_target_arch 189 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_verbosity 190 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_set_verbosity 191 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_kernel 192 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matdiff_reduce 193 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matdiff_clear 194 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmdispatch2 195 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmdispatch 196 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmcall_abc 197 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmmcall_prf 198 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xclear 199 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xrelease 200 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xmatcopy 201 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xitrans 202 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xotrans 203 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_omp 204 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_omp 205 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm_omp 206 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm_omp 207 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_mmbatch 208 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_mmbatch_begin 209 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_mmbatch_end 210 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_gemm_batch 211 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_gemm_batch_omp 212 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_timer_duration 213 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_timer_tick 214 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xhash 215 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xdiff 216 INTERFACE 217 !> Initialize the library; pay for setup cost at a specific point. 218 SUBROUTINE libxsmm_init() BIND(C) 219 END SUBROUTINE 220 221 !> De-initialize the library and free internal memory (optional). 222 SUBROUTINE libxsmm_finalize() BIND(C) 223 END SUBROUTINE 224 225 !> Get the default prefetch strategy. 226 PURE FUNCTION libxsmm_get_gemm_auto_prefetch() BIND(C) 227 IMPORT :: C_INT 228 INTEGER(C_INT) :: libxsmm_get_gemm_auto_prefetch 229 END FUNCTION 230 231 !> Set the default prefetch strategy. 232 SUBROUTINE libxsmm_set_gemm_auto_prefetch(strategy) BIND(C) 233 IMPORT :: C_INT 234 INTEGER(C_INT), INTENT(IN), VALUE :: strategy 235 END SUBROUTINE 236 237 !> Returns the architecture and instruction set extension as determined 238 !> by the CPUID flags, as set by the libxsmm_get_target_arch* functions, 239 !> or as set by the LIBXSMM_TARGET environment variable. 240 PURE FUNCTION libxsmm_get_target_archid() BIND(C) 241 IMPORT :: C_INT 242 INTEGER(C_INT) :: libxsmm_get_target_archid 243 END FUNCTION 244 245 !> Set target architecture (archid: see PARAMETER enumeration) 246 !> for subsequent code generation (JIT). 247 SUBROUTINE libxsmm_set_target_archid(archid) BIND(C) 248 IMPORT :: C_INT 249 INTEGER(C_INT), INTENT(IN), VALUE :: archid 250 END SUBROUTINE 251 252 !> Set target architecture for subsequent code generation (JIT). 253 !> arch="0"|"sse"|"snb"|"hsw"|"knl"|"knm"|"skx"|"clx"|"cpx", 254 !> or "0" to rely on the CPUID (default). 255 !> There are some alternative target names as well: 256 !> "sse", "avx", "avx2", "avx3" (incomplete list). 257 SUBROUTINE libxsmm_set_target_arch(arch) BIND(C) 258 IMPORT :: C_CHAR 259 CHARACTER(C_CHAR), INTENT(IN) :: arch(*) 260 END SUBROUTINE 261 262 !> Get the level of verbosity. 263 PURE FUNCTION libxsmm_get_verbosity() BIND(C) 264 IMPORT :: C_INT 265 INTEGER(C_INT) :: libxsmm_get_verbosity 266 END FUNCTION 267 268 !> Set the level of verbosity (0: off, positive value: verbosity level, 269 !> negative value: maximum verbosity, which also dumps JIT-code). 270 SUBROUTINE libxsmm_set_verbosity(level) BIND(C) 271 IMPORT :: C_INT 272 INTEGER(C_INT), INTENT(IN), VALUE :: level 273 END SUBROUTINE 274 275 !> Impure function which returns the current clock tick of a 276 !> monotonic timer source; uses a platform-specific resolution. 277 !> Implicit FORTRAN 77 interface: not available. 278 INTEGER(LIBXSMM_TICKINT_KIND) & 279 & FUNCTION libxsmm_timer_tick() BIND(C) 280 IMPORT :: LIBXSMM_TICKINT_KIND 281 END FUNCTION 282 283 !> Impure function (timer freq. may vary) which returns the duration 284 !> (in seconds) between two values received by libxsmm_timer_tick. 285 !> Implicit FORTRAN 77 interface: not available. 286 FUNCTION libxsmm_timer_duration(tick0, tick1) BIND(C) 287 IMPORT :: LIBXSMM_TICKINT_KIND, C_DOUBLE 288 INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN), VALUE :: tick0 289 INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN), VALUE :: tick1 290 REAL(C_DOUBLE) :: libxsmm_timer_duration 291 END FUNCTION 292 293 !> Deallocates the JIT'ted code, or unregisters 294 !> and releases code from the registry. 295 !> Implicit FORTRAN 77 interface: 296 !> INTEGER(8) :: kernel 297 SUBROUTINE libxsmm_release_kernel(kernel) & 298 & BIND(C, NAME="libxsmm_release_kernel_") 299 IMPORT :: C_FUNPTR 300 TYPE(C_FUNPTR), INTENT(IN) :: kernel 301 END SUBROUTINE 302 303 !> Type-generic (unsafe) code dispatch (trylock: impure routine). 304 !> Implicit FORTRAN 77 interface: 305 !> INTEGER(4) :: gemm_precision, flags, prefetch 306 !> INTEGER(4|8) :: m, n, k, lda, ldb, ldc 307 !> REAL(4|8) :: alpha, beta 308 !> INTEGER(8) :: kernel 309 SUBROUTINE libxsmm_xmmdispatch(kernel, gemm_precision, & 310 & m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch) & 311 & BIND(C, NAME="libxsmm_xmmdispatch_") 312 IMPORT :: C_FUNPTR, C_PTR, C_INT, LIBXSMM_BLASINT_KIND 313 TYPE(C_FUNPTR), INTENT(OUT) :: kernel 314 INTEGER(C_INT), INTENT(IN) :: gemm_precision 315 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 316 TYPE(C_PTR), INTENT(IN), VALUE :: lda, ldb, ldc 317 TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta 318 TYPE(C_PTR), INTENT(IN), VALUE :: flags, prefetch 319 END SUBROUTINE 320 321 !> Type-generic (unsafe) code dispatch (trylock: impure routine). 322 !> Implicit FORTRAN 77 interface: 323 !> INTEGER(4) :: iprec, oprec, flags, prefetch 324 !> INTEGER(4|8) :: m, n, k, lda, ldb, ldc 325 !> REAL(4|8) :: alpha, beta 326 !> INTEGER(8) :: kernel 327 SUBROUTINE libxsmm_xmmdispatch2(kernel, iprec, oprec, & 328 & m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch) & 329 & BIND(C, NAME="libxsmm_xmmdispatch2_") 330 IMPORT :: C_FUNPTR, C_PTR, C_INT, LIBXSMM_BLASINT_KIND 331 TYPE(C_FUNPTR), INTENT(OUT) :: kernel 332 INTEGER(C_INT), INTENT(IN) :: iprec, oprec 333 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 334 TYPE(C_PTR), INTENT(IN), VALUE :: lda, ldb, ldc 335 TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta 336 TYPE(C_PTR), INTENT(IN), VALUE :: flags, prefetch 337 END SUBROUTINE 338 339 !> Generic call routine (3-argument form). 340 !> Implicit FORTRAN 77 interface: 341 !> REAL(4|8) :: a, b, c 342 !> INTEGER(8) :: kernel 343 PURE SUBROUTINE libxsmm_xmmcall_abc(kernel, a, b, c) & 344 & BIND(C, NAME="libxsmm_xmmcall_abc_") 345 IMPORT C_FUNPTR, C_PTR 346 TYPE(C_FUNPTR), INTENT(IN) :: kernel 347 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c 348 END SUBROUTINE 349 350 !> Generic call routine (6-argument form). 351 !> Implicit FORTRAN 77 interface: 352 !> REAL(4|8) :: a, b, c, pa, pb, pc 353 !> INTEGER(8) :: kernel 354 PURE SUBROUTINE libxsmm_xmmcall_prf(kernel, & 355 & a, b, c, pa, pb, pc) & 356 & BIND(C, NAME="libxsmm_xmmcall_prf_") 357 IMPORT C_FUNPTR, C_PTR 358 TYPE(C_FUNPTR), INTENT(IN) :: kernel 359 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c, pa, pb, pc 360 END SUBROUTINE 361 362 !> Fill destination with zeros; treats dst in raw/binary fashion. 363 SUBROUTINE libxsmm_xclear(dst, nbytes) & 364 & BIND(C, NAME="libxsmm_xclear_") 365 IMPORT C_PTR, C_INT 366 TYPE(C_PTR), INTENT(IN), VALUE :: dst 367 INTEGER(C_INT), INTENT(IN) :: nbytes 368 END SUBROUTINE 369 370 !> Remove key-value pair from code registry and release memory. 371 SUBROUTINE libxsmm_xrelease(key, keysize) & 372 & BIND(C, NAME="libxsmm_xrelease_") 373 IMPORT C_PTR, C_INT 374 TYPE(C_PTR), INTENT(IN), VALUE :: key 375 INTEGER(C_INT), INTENT(IN) :: keysize 376 END SUBROUTINE 377 378 !> Matrix-copy (2-dimensional copy) routine. 379 !> Implicit FORTRAN 77 interface: 380 !> ARRAY :: input, output 381 !> INTEGER(4|8) :: m, n, ldi, ldo 382 !> INTEGER(4) :: typesize 383 PURE SUBROUTINE libxsmm_xmatcopy(output, input, typesize, & 384 & m, n, ldi, ldo) BIND(C, NAME="libxsmm_matcopy_") 385 IMPORT LIBXSMM_BLASINT_KIND, C_PTR, C_INT 386 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 387 TYPE(C_PTR), INTENT(IN), VALUE :: output, input 388 INTEGER(C_INT), INTENT(IN) :: typesize 389 END SUBROUTINE 390 391 !> Transpose a matrix (in-place form). 392 !> Implicit FORTRAN 77 interface: 393 !> ARRAY :: matrix 394 !> INTEGER(4|8) :: m, n, ld 395 !> INTEGER(4) :: typesize 396 PURE SUBROUTINE libxsmm_xitrans(matrix, typesize, m, n, ld) & 397 & BIND(C, NAME="libxsmm_itrans_") 398 IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND 399 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ld 400 TYPE(C_PTR), INTENT(IN), VALUE :: matrix 401 INTEGER(C_INT), INTENT(IN) :: typesize 402 END SUBROUTINE 403 404 !> Transpose a matrix (out-of-place form). 405 !> Implicit FORTRAN 77 interface: 406 !> ARRAY :: input, output 407 !> INTEGER(4|8) :: m, n, ldi, ldo 408 !> INTEGER(4) :: typesize 409 PURE SUBROUTINE libxsmm_xotrans(output, input, & 410 & typesize, m, n, ldi, ldo) & 411 & BIND(C, NAME="libxsmm_otrans_") 412 IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND 413 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 414 TYPE(C_PTR), INTENT(IN), VALUE :: output, input 415 INTEGER(C_INT), INTENT(IN) :: typesize 416 END SUBROUTINE 417 418 !> Matrix copy; MT via libxsmmext (out-of-place form). 419 !> Implicit FORTRAN 77 interface: 420 !> ARRAY :: output, input 421 !> INTEGER(4|8) :: m, n, ldi, ldo 422 !> INTEGER(4) :: typesize 423 PURE SUBROUTINE libxsmm_matcopy_omp(output, input, & 424 & typesize, m, n, ldi, ldo) & 425 & BIND(C, NAME="libxsmm_matcopy_omp_") 426 IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND 427 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 428 TYPE(C_PTR), INTENT(IN), VALUE :: output, input 429 INTEGER(C_INT), INTENT(IN) :: typesize 430 END SUBROUTINE 431 432 !> Matrix transposition; MT via libxsmmext (out-of-place form). 433 !> Implicit FORTRAN 77 interface: 434 !> ARRAY :: output, input 435 !> INTEGER(4|8) :: m, n, ldi, ldo 436 !> INTEGER(4) :: typesize 437 PURE SUBROUTINE libxsmm_otrans_omp(output, input, & 438 & typesize, m, n, ldi, ldo) & 439 & BIND(C, NAME="libxsmm_otrans_omp_") 440 IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND 441 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 442 TYPE(C_PTR), INTENT(IN), VALUE :: output, input 443 INTEGER(C_INT), INTENT(IN) :: typesize 444 END SUBROUTINE 445 446 !> General dense MM; MT via libxsmmext (double-precision). 447 !> Implicit FORTRAN 77 interface: similar to DGEMM. 448 PURE SUBROUTINE libxsmm_dgemm_omp(transa, transb, m, n, k, & 449 & alpha, a, lda, b, ldb, beta, c, ldc) & 450 & BIND(C, NAME="libxsmm_dgemm_omp_") 451 IMPORT C_DOUBLE, C_CHAR, LIBXSMM_BLASINT_KIND 452 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 453 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 454 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 455 REAL(C_DOUBLE), INTENT(IN) :: alpha, beta 456 REAL(C_DOUBLE), INTENT(IN) :: a(lda,*), b(ldb,*) 457 REAL(C_DOUBLE), INTENT(INOUT) :: c(ldc,*) 458 END SUBROUTINE 459 460 !> General dense MM; MT via libxsmmext (single-precision). 461 !> Implicit FORTRAN 77 interface: similar to SGEMM. 462 PURE SUBROUTINE libxsmm_sgemm_omp(transa, transb, m, n, k, & 463 & alpha, a, lda, b, ldb, beta, c, ldc) & 464 & BIND(C, NAME="libxsmm_sgemm_omp_") 465 IMPORT C_FLOAT, C_CHAR, LIBXSMM_BLASINT_KIND 466 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 467 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 468 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 469 REAL(C_FLOAT), INTENT(IN) :: alpha, beta 470 REAL(C_FLOAT), INTENT(IN) :: a(lda,*), b(ldb,*) 471 REAL(C_FLOAT), INTENT(INOUT) :: c(ldc,*) 472 END SUBROUTINE 473 474 !> Process a series of MMs (batch). See also libxsmm_gemm_batch_omp. 475 !> The kind of matrix operands (a, b, c) depend on index_stride: 476 !> index_stride==0: pointers to pointers of elements, e.g., 477 !> double** for the C matrices. 478 !> index_stride!=0: pointer to elements, e.g., 479 !> const double* for the A and B matrices. 480 !> Implicit FORTRAN 77 interface: 481 !> INTEGER(4) :: iprec, oprec 482 !> REAL(4|8) :: alpha, beta 483 !> ARRAY :: a, b, c 484 !> ARRAY/VALUE :: stride_a, stride_b, stride_c 485 !> INTEGER(4|8) :: index_base, index_stride, batchsize 486 !> INTEGER(4) :: tid, nthreads 487 !> Otherwise arguments are similar to GEMM. 488 PURE SUBROUTINE libxsmm_mmbatch(iprec, oprec, transa, transb, & 489 & m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, index_base, & 490 & index_stride, stride_a, stride_b, stride_c, batchsize, & 491 & tid, nthreads) & 492 & BIND(C, NAME="libxsmm_mmbatch_") 493 IMPORT C_PTR, C_CHAR, C_INT, LIBXSMM_BLASINT_KIND 494 !> Determines index-base (usually 0, 1 for one-based indexes). 495 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_base 496 !> Stride (measured in Bytes) used to walk stride_*. 497 !> In Fortran: index_stride!=0. 498 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_stride 499 !> Number of SMMs. If the size is given as a negative value, 500 !> then internal synchronization is omitted. 501 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: batchsize 502 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 503 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 504 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 505 TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta 506 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c 507 !> Arrays of indexes determining the position of 508 !> a, b, and c operands. 509 TYPE(C_PTR), INTENT(IN), VALUE :: stride_a 510 TYPE(C_PTR), INTENT(IN), VALUE :: stride_b 511 TYPE(C_PTR), INTENT(IN), VALUE :: stride_c 512 INTEGER(C_INT), INTENT(IN) :: iprec, oprec 513 !> Thread-ID (TID), and number of threads. 514 INTEGER(C_INT), INTENT(IN) :: tid, nthreads 515 END SUBROUTINE 516 517 !> Process a series of SMMs (batch). See also libxsmm_mmbatch. 518 !> Implicit FORTRAN 77 interface: 519 !> INTEGER(4) :: iprec, oprec 520 !> REAL(4|8) :: alpha, beta 521 !> ARRAY :: a, b, c 522 !> ARRAY/VALUE :: stride_a, stride_b, stride_c 523 !> INTEGER(4|8) :: index_base, index_stride, batchsize 524 !> Otherwise arguments are similar to GEMM. 525 PURE SUBROUTINE libxsmm_gemm_batch(iprec, oprec, & 526 & transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, & 527 & index_base, index_stride, stride_a, stride_b, stride_c, & 528 & batchsize) & 529 & BIND(C, NAME="libxsmm_gemm_batch_") 530 IMPORT C_PTR, C_CHAR, C_INT, LIBXSMM_BLASINT_KIND 531 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_base 532 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_stride 533 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: batchsize 534 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 535 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 536 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 537 TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta 538 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c 539 TYPE(C_PTR), INTENT(IN), VALUE :: stride_a 540 TYPE(C_PTR), INTENT(IN), VALUE :: stride_b 541 TYPE(C_PTR), INTENT(IN), VALUE :: stride_c 542 INTEGER(C_INT), INTENT(IN) :: iprec, oprec 543 END SUBROUTINE 544 545 !> Process a series of SMMs (batch) with OpenMP (libxsmmext). 546 !> Implicit FORTRAN 77 interface: 547 !> INTEGER(4) :: iprec, oprec 548 !> REAL(4|8) :: alpha, beta 549 !> ARRAY :: a, b, c 550 !> ARRAY/VALUE :: stride_a, stride_b, stride_c 551 !> INTEGER(4|8) :: index_base, index_stride, batchsize 552 !> Otherwise arguments are similar to GEMM. 553 PURE SUBROUTINE libxsmm_gemm_batch_omp(iprec, oprec, & 554 & transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, & 555 & index_base, index_stride, stride_a, stride_b, stride_c, & 556 & batchsize) & 557 & BIND(C, NAME="libxsmm_gemm_batch_omp_") 558 IMPORT C_PTR, C_CHAR, C_INT, LIBXSMM_BLASINT_KIND 559 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_base 560 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: index_stride 561 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: batchsize 562 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 563 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 564 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 565 TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta 566 TYPE(C_PTR), INTENT(IN), VALUE :: a, b, c 567 TYPE(C_PTR), INTENT(IN), VALUE :: stride_a 568 TYPE(C_PTR), INTENT(IN), VALUE :: stride_b 569 TYPE(C_PTR), INTENT(IN), VALUE :: stride_c 570 INTEGER(C_INT), INTENT(IN) :: iprec, oprec 571 END SUBROUTINE 572 573 !> This function is a no-op unless LIBXSMM is built to intercept GEMM. 574 !> Pointer arguments are used to filter intercepted GEMM calls such that 575 !> non-NULL values match. Otherwise (NULL) the respective argument is 576 !> considered a "free value", i.e., every value can match; 577 !> libxsmmext required. 578 !> Implicit FORTRAN 77 interface: 579 !> INTEGER(4) :: gemm_precision, flags 580 !> INTEGER(4|8) :: m, n, k, lda, ldb, ldc 581 !> REAL(4|8) :: alpha, beta 582 SUBROUTINE libxsmm_mmbatch_begin(gemm_precision, flags, & 583 & m, n, k, lda, ldb, ldc, alpha, beta) BIND(C) 584 IMPORT C_PTR, C_INT, LIBXSMM_BLASINT_KIND 585 INTEGER(C_INT), INTENT(IN), VALUE :: gemm_precision 586 INTEGER(C_INT), INTENT(IN) :: flags 587 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 588 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 589 TYPE(C_PTR), INTENT(IN), VALUE :: alpha, beta 590 END SUBROUTINE 591 592 !> Processes the batch of previously recorded SMMs 593 !> (libxsmm_mmbatch_begin); libxsmmext required. 594 !> Implicit FORTRAN 77 interface: available. 595 SUBROUTINE libxsmm_mmbatch_end() BIND(C) 596 END SUBROUTINE 597 598 !> Reduces input into output such that the difference is maintained 599 !> or increased (max function). The very first (initial) output 600 !> should be zeroed (libxsmm_matdiff_clear). 601 !> Implicit FORTRAN 77 interface: available. 602 PURE SUBROUTINE libxsmm_matdiff_reduce(output, input) BIND(C) 603 IMPORT LIBXSMM_MATDIFF_INFO 604 TYPE(LIBXSMM_MATDIFF_INFO), INTENT(INOUT) :: output 605 TYPE(LIBXSMM_MATDIFF_INFO), INTENT(IN) :: input 606 END SUBROUTINE 607 608 !> Clears the given info-structure, e.g., for the initial 609 !> reduction-value (libxsmm_matdiff_reduce). 610 !> Implicit FORTRAN 77 interface: available. 611 PURE SUBROUTINE libxsmm_matdiff_clear(info) BIND(C) 612 IMPORT LIBXSMM_MATDIFF_INFO 613 TYPE(LIBXSMM_MATDIFF_INFO), INTENT(OUT) :: info 614 END SUBROUTINE 615 616 !> Calculates a hash value for the given array and seed. 617 !> Routine suitable for FORTRAN 77; keysize in Bytes. 618 PURE SUBROUTINE libxsmm_xhash(hash_seed, key, keysize) & 619 & BIND(C, NAME="libxsmm_xhash_") 620 IMPORT C_INT, C_PTR 621 INTEGER(C_INT), INTENT(INOUT) :: hash_seed 622 INTEGER(C_INT), INTENT(IN) :: keysize 623 TYPE(C_PTR), INTENT(IN), VALUE :: key 624 END SUBROUTINE 625 626 !> Calculates if there is a difference between two arrays. 627 !> Routine suitable for FORTRAN 77; size in Bytes. 628 PURE SUBROUTINE libxsmm_xdiff(diff, a, b, nbytes) & 629 & BIND(C, NAME="libxsmm_xdiff_") 630 IMPORT C_PTR, C_LONG_LONG, C_BOOL 631 TYPE(C_PTR), INTENT(IN), VALUE :: a, b 632 INTEGER(C_LONG_LONG), INTENT(IN) :: nbytes 633 LOGICAL(C_BOOL), INTENT(OUT) :: diff 634 END SUBROUTINE 635 END INTERFACE$MNK_INTERFACE_LIST 636 637 INTERFACE libxsmm_ptr0 638 MODULE PROCEDURE libxsmm_ptr_z0, libxsmm_ptr_c0 639 MODULE PROCEDURE libxsmm_ptr_d0, libxsmm_ptr_s0 640 MODULE PROCEDURE libxsmm_ptr_i0, libxsmm_ptr_w0 641 MODULE PROCEDURE libxsmm_ptr_j0 !! Byte/char 642 MODULE PROCEDURE libxsmm_ptr_b0 !! Byte/char 643 MODULE PROCEDURE libxsmm_ptr_l0 !! long long 644 END INTERFACE 645 646 INTERFACE libxsmm_ptr1 647 MODULE PROCEDURE libxsmm_ptr_z1, libxsmm_ptr_c1 648 MODULE PROCEDURE libxsmm_ptr_d1, libxsmm_ptr_s1 649 MODULE PROCEDURE libxsmm_ptr_i1, libxsmm_ptr_w1 650 MODULE PROCEDURE libxsmm_ptr_j1 !! Byte/char 651 MODULE PROCEDURE libxsmm_ptr_b1 !! Byte/char 652 MODULE PROCEDURE libxsmm_ptr_l1 !! long long 653 MODULE PROCEDURE libxsmm_ptr_dmm 654 MODULE PROCEDURE libxsmm_ptr_smm 655 MODULE PROCEDURE libxsmm_ptr_wimm 656 END INTERFACE 657 658 INTERFACE libxsmm_ptr2 659 MODULE PROCEDURE libxsmm_ptr_z2, libxsmm_ptr_c2 660 MODULE PROCEDURE libxsmm_ptr_d2, libxsmm_ptr_s2 661 MODULE PROCEDURE libxsmm_ptr_i2, libxsmm_ptr_w2 662 MODULE PROCEDURE libxsmm_ptr_j2 !! Byte/char 663 MODULE PROCEDURE libxsmm_ptr_b2 !! Byte/char 664 MODULE PROCEDURE libxsmm_ptr_l2 !! long long 665 END INTERFACE 666 667 INTERFACE libxsmm_ptr 668 MODULE PROCEDURE libxsmm_ptr_z0, libxsmm_ptr_c0 669 MODULE PROCEDURE libxsmm_ptr_d0, libxsmm_ptr_s0 670 MODULE PROCEDURE libxsmm_ptr_i0, libxsmm_ptr_w0 671 MODULE PROCEDURE libxsmm_ptr_j0 !! Byte/char 672 MODULE PROCEDURE libxsmm_ptr_b0 !! Byte/char 673 MODULE PROCEDURE libxsmm_ptr_l0 !! long long 674 MODULE PROCEDURE libxsmm_ptr_z1, libxsmm_ptr_c1 675 MODULE PROCEDURE libxsmm_ptr_d1, libxsmm_ptr_s1 676 MODULE PROCEDURE libxsmm_ptr_i1, libxsmm_ptr_w1 677 MODULE PROCEDURE libxsmm_ptr_j1 !! Byte/char 678 MODULE PROCEDURE libxsmm_ptr_b1 !! Byte/char 679 MODULE PROCEDURE libxsmm_ptr_l1 !! long long 680 MODULE PROCEDURE libxsmm_ptr_z2, libxsmm_ptr_c2 681 MODULE PROCEDURE libxsmm_ptr_d2, libxsmm_ptr_s2 682 MODULE PROCEDURE libxsmm_ptr_i2, libxsmm_ptr_w2 683 MODULE PROCEDURE libxsmm_ptr_j2 !! Byte/char 684 MODULE PROCEDURE libxsmm_ptr_b2 !! Byte/char 685 MODULE PROCEDURE libxsmm_ptr_l2 !! long long 686 MODULE PROCEDURE libxsmm_ptr_dmm 687 MODULE PROCEDURE libxsmm_ptr_smm 688 MODULE PROCEDURE libxsmm_ptr_wimm 689 END INTERFACE 690 691 !> Deallocates JIT'ted code, or unregisters/releases code from registry. 692 INTERFACE libxsmm_release_mmkernel 693 MODULE PROCEDURE libxsmm_release_dmmkernel 694 MODULE PROCEDURE libxsmm_release_smmkernel 695 MODULE PROCEDURE libxsmm_release_wimmkernel 696 END INTERFACE 697 698 !> Construct JIT-code depending on given argument set. 699 INTERFACE libxsmm_mmdispatch 700 MODULE PROCEDURE libxsmm_dmmdispatch, libxsmm_smmdispatch 701 MODULE PROCEDURE libxsmm_wimmdispatch 702 END INTERFACE 703 704 !> Construct JIT-code depending on given argument set. 705 INTERFACE libxsmm_dispatch 706 MODULE PROCEDURE libxsmm_dmmdispatch, libxsmm_smmdispatch 707 MODULE PROCEDURE libxsmm_wimmdispatch 708 END INTERFACE 709 710 !> Check if a function is available (LIBXSMM_?MMFUNCTION). 711 INTERFACE libxsmm_mmavailable 712 MODULE PROCEDURE libxsmm_dmmavailable, libxsmm_smmavailable 713 MODULE PROCEDURE libxsmm_wimmavailable 714 END INTERFACE 715 716 !> Check if a function is available (LIBXSMM_?MMFUNCTION). 717 INTERFACE libxsmm_available 718 MODULE PROCEDURE libxsmm_smmavailable, libxsmm_dmmavailable 719 MODULE PROCEDURE libxsmm_wimmavailable 720 END INTERFACE 721 722 !> Overloaded GEMM routines (double-precision). 723 INTERFACE libxsmm_dgemm 724 MODULE PROCEDURE libxsmm_dgemm0 725 MODULE PROCEDURE libxsmm_dgemm1 726 MODULE PROCEDURE libxsmm_dgemm2 727 MODULE PROCEDURE libxsmm_dgemm3 728 END INTERFACE 729 730 !> Overloaded GEMM routines (single-precision). 731 INTERFACE libxsmm_sgemm 732 MODULE PROCEDURE libxsmm_sgemm0 733 MODULE PROCEDURE libxsmm_sgemm1 734 MODULE PROCEDURE libxsmm_sgemm2 735 END INTERFACE 736 737 !> Overloaded GEMM routines (low-precision). 738 INTERFACE libxsmm_wigemm 739 MODULE PROCEDURE libxsmm_wigemm0 740 MODULE PROCEDURE libxsmm_wigemm1 741 MODULE PROCEDURE libxsmm_wigemm2 742 END INTERFACE 743 744 !> Overloaded GEMM routines. 745 INTERFACE libxsmm_gemm 746 MODULE PROCEDURE libxsmm_dgemm0 747 MODULE PROCEDURE libxsmm_dgemm1 748 MODULE PROCEDURE libxsmm_dgemm2 749 MODULE PROCEDURE libxsmm_dgemm3 750 MODULE PROCEDURE libxsmm_sgemm0 751 MODULE PROCEDURE libxsmm_sgemm1 752 MODULE PROCEDURE libxsmm_sgemm2 753 MODULE PROCEDURE libxsmm_sgemm3 754 MODULE PROCEDURE libxsmm_wigemm0 755 MODULE PROCEDURE libxsmm_wigemm1 756 MODULE PROCEDURE libxsmm_wigemm2 757 MODULE PROCEDURE libxsmm_wigemm3 758 END INTERFACE 759 760 !> Overloaded BLAS GEMM routines (double-precision). 761 INTERFACE libxsmm_blas_dgemm 762 MODULE PROCEDURE libxsmm_blas_dgemm0 763 MODULE PROCEDURE libxsmm_blas_dgemm1 764 MODULE PROCEDURE libxsmm_blas_dgemm2 765 MODULE PROCEDURE libxsmm_blas_dgemm3 766 END INTERFACE 767 768 !> Overloaded BLAS GEMM routines (single-precision). 769 INTERFACE libxsmm_blas_sgemm 770 MODULE PROCEDURE libxsmm_blas_sgemm0 771 MODULE PROCEDURE libxsmm_blas_sgemm1 772 MODULE PROCEDURE libxsmm_blas_sgemm2 773 MODULE PROCEDURE libxsmm_blas_sgemm3 774 END INTERFACE 775 776 !> Overloaded BLAS GEMM routines (single/double-precision). 777 INTERFACE libxsmm_blas_gemm 778 MODULE PROCEDURE libxsmm_blas_dgemm0 779 MODULE PROCEDURE libxsmm_blas_dgemm1 780 MODULE PROCEDURE libxsmm_blas_dgemm2 781 MODULE PROCEDURE libxsmm_blas_dgemm3 782 MODULE PROCEDURE libxsmm_blas_sgemm0 783 MODULE PROCEDURE libxsmm_blas_sgemm1 784 MODULE PROCEDURE libxsmm_blas_sgemm2 785 MODULE PROCEDURE libxsmm_blas_sgemm3 786 END INTERFACE 787 788 !> Overloaded MATCOPY routines (2d-copy). 789 INTERFACE libxsmm_matcopy 790 MODULE PROCEDURE libxsmm_matcopy_p0 791 MODULE PROCEDURE libxsmm_matcopy_d1 792 MODULE PROCEDURE libxsmm_matcopy_d2 793 MODULE PROCEDURE libxsmm_matcopy_s1 794 MODULE PROCEDURE libxsmm_matcopy_s2 795 END INTERFACE 796 797 !> Overloaded TRANSPOSE routines (in-place form). 798 INTERFACE libxsmm_itrans 799 MODULE PROCEDURE libxsmm_itrans_p0 800 MODULE PROCEDURE libxsmm_itrans_d1 801 MODULE PROCEDURE libxsmm_itrans_d2 802 MODULE PROCEDURE libxsmm_itrans_s1 803 MODULE PROCEDURE libxsmm_itrans_s2 804 END INTERFACE 805 806 !> Overloaded TRANSPOSE routines (out-of-place form). 807 INTERFACE libxsmm_otrans 808 MODULE PROCEDURE libxsmm_otrans_p0 809 MODULE PROCEDURE libxsmm_otrans_d1 810 MODULE PROCEDURE libxsmm_otrans_d2 811 MODULE PROCEDURE libxsmm_otrans_s1 812 MODULE PROCEDURE libxsmm_otrans_s2 813 END INTERFACE 814 815 !> Calculate a hash value for a given key value (binary blob). 816 !> Conceptually pure, but C_LOC may be (incorrectly) impure. 817 INTERFACE libxsmm_hash 818 MODULE PROCEDURE libxsmm_hash_char 819 MODULE PROCEDURE libxsmm_hash_i8 820 MODULE PROCEDURE libxsmm_hash_i32 821 MODULE PROCEDURE libxsmm_hash_i64 822 END INTERFACE 823 824 !> Calculate whether there is a difference between two series of items. 825 !> Conceptually pure, but C_LOC may be (incorrectly) impure. 826 INTERFACE libxsmm_diff 827 MODULE PROCEDURE libxsmm_diff_char 828 MODULE PROCEDURE libxsmm_diff_i8 829 MODULE PROCEDURE libxsmm_diff_i32 830 MODULE PROCEDURE libxsmm_diff_i64 831 END INTERFACE 832 833 CONTAINS 834 !> Returns the name of the target architecture as determined by 835 !> the CPUID flags, as set by the libxsmm_get_target_arch* functions, 836 !> or as set by the LIBXSMM_TARGET environment variable. 837 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_get_target_arch 838 FUNCTION libxsmm_get_target_arch() 839 !CHARACTER(LEN=:), POINTER :: libxsmm_get_target_arch 840 CHARACTER, POINTER :: libxsmm_get_target_arch(:) 841 INTEGER(C_INT) :: length(1) 842 TYPE(C_PTR) :: arch 843 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmmf_get_target_arch 844 INTERFACE 845 FUNCTION libxsmmf_get_target_arch(length) BIND(C) 846 IMPORT :: C_INT, C_PTR 847 INTEGER(C_INT), INTENT(OUT) :: length 848 TYPE(C_PTR) :: libxsmmf_get_target_arch 849 END FUNCTION 850 END INTERFACE 851 arch = libxsmmf_get_target_arch(length(1)) 852 CALL C_F_POINTER(arch, libxsmm_get_target_arch, length) 853 END FUNCTION 854 855 !> Returns C_NULL_PTR. 856 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_null 857 PURE FUNCTION libxsmm_ptr_null() 858 TYPE(C_PTR) :: libxsmm_ptr_null 859 libxsmm_ptr_null = C_NULL_PTR 860 END FUNCTION 861 862 !> Determines the C-address of the given array. 863 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_z0 864 FUNCTION libxsmm_ptr_z0(a) 865 COMPLEX(C_DOUBLE_COMPLEX), INTENT(IN), TARGET :: a 866 TYPE(C_PTR) :: libxsmm_ptr_z0 867 libxsmm_ptr_z0 = C_LOC(a) 868 END FUNCTION 869 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_z1 870 FUNCTION libxsmm_ptr_z1(a) 871 COMPLEX(C_DOUBLE_COMPLEX), INTENT(IN), TARGET :: a(*) 872 TYPE(C_PTR) :: libxsmm_ptr_z1 873 libxsmm_ptr_z1 = C_LOC(a) 874 END FUNCTION 875 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_z2 876 FUNCTION libxsmm_ptr_z2(a) 877 COMPLEX(C_DOUBLE_COMPLEX), INTENT(IN) :: a(:,:) 878 TYPE(C_PTR) :: libxsmm_ptr_z2 879 libxsmm_ptr_z2 = libxsmm_ptr_z1(a) 880 END FUNCTION 881 882 !> Determines the C-address of the given array. 883 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_c0 884 FUNCTION libxsmm_ptr_c0(a) 885 COMPLEX(C_FLOAT_COMPLEX), INTENT(IN), TARGET :: a 886 TYPE(C_PTR) :: libxsmm_ptr_c0 887 libxsmm_ptr_c0 = C_LOC(a) 888 END FUNCTION 889 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_c1 890 FUNCTION libxsmm_ptr_c1(a) 891 COMPLEX(C_FLOAT_COMPLEX), INTENT(IN), TARGET :: a(*) 892 TYPE(C_PTR) :: libxsmm_ptr_c1 893 libxsmm_ptr_c1 = C_LOC(a) 894 END FUNCTION 895 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_c2 896 FUNCTION libxsmm_ptr_c2(a) 897 COMPLEX(C_FLOAT_COMPLEX), INTENT(IN) :: a(:,:) 898 TYPE(C_PTR) :: libxsmm_ptr_c2 899 libxsmm_ptr_c2 = libxsmm_ptr_c1(a) 900 END FUNCTION 901 902 !> Determines the C-address of the given array. 903 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_d0 904 FUNCTION libxsmm_ptr_d0(a) 905 REAL(C_DOUBLE), INTENT(IN), TARGET :: a 906 TYPE(C_PTR) :: libxsmm_ptr_d0 907 libxsmm_ptr_d0 = C_LOC(a) 908 END FUNCTION 909 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_d1 910 FUNCTION libxsmm_ptr_d1(a) 911 REAL(C_DOUBLE), INTENT(IN), TARGET :: a(*) 912 TYPE(C_PTR) :: libxsmm_ptr_d1 913 libxsmm_ptr_d1 = C_LOC(a) 914 END FUNCTION 915 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_d2 916 FUNCTION libxsmm_ptr_d2(a) 917 REAL(C_DOUBLE), INTENT(IN) :: a(:,:) 918 TYPE(C_PTR) :: libxsmm_ptr_d2 919 libxsmm_ptr_d2 = libxsmm_ptr_d1(a) 920 END FUNCTION 921 922 !> Determines the C-address of the given array. 923 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_s0 924 FUNCTION libxsmm_ptr_s0(a) 925 REAL(C_FLOAT), INTENT(IN), TARGET :: a 926 TYPE(C_PTR) :: libxsmm_ptr_s0 927 libxsmm_ptr_s0 = C_LOC(a) 928 END FUNCTION 929 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_s1 930 FUNCTION libxsmm_ptr_s1(a) 931 REAL(C_FLOAT), INTENT(IN), TARGET :: a(*) 932 TYPE(C_PTR) :: libxsmm_ptr_s1 933 libxsmm_ptr_s1 = C_LOC(a) 934 END FUNCTION 935 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_s2 936 FUNCTION libxsmm_ptr_s2(a) 937 REAL(C_FLOAT), INTENT(IN) :: a(:,:) 938 TYPE(C_PTR) :: libxsmm_ptr_s2 939 libxsmm_ptr_s2 = libxsmm_ptr_s1(a) 940 END FUNCTION 941 942 !> Determines the C-address of the given array. 943 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_i0 944 FUNCTION libxsmm_ptr_i0(a) 945 INTEGER(C_INT), INTENT(IN), TARGET :: a 946 TYPE(C_PTR) :: libxsmm_ptr_i0 947 libxsmm_ptr_i0 = C_LOC(a) 948 END FUNCTION 949 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_i1 950 FUNCTION libxsmm_ptr_i1(a) 951 INTEGER(C_INT), INTENT(IN), TARGET :: a(*) 952 TYPE(C_PTR) :: libxsmm_ptr_i1 953 libxsmm_ptr_i1 = C_LOC(a) 954 END FUNCTION 955 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_i2 956 FUNCTION libxsmm_ptr_i2(a) 957 INTEGER(C_INT), INTENT(IN) :: a(:,:) 958 TYPE(C_PTR) :: libxsmm_ptr_i2 959 libxsmm_ptr_i2 = libxsmm_ptr_i1(a) 960 END FUNCTION 961 962 !> Determines the C-address of the given array. 963 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_w0 964 FUNCTION libxsmm_ptr_w0(a) 965 INTEGER(C_SHORT), INTENT(IN), TARGET :: a 966 TYPE(C_PTR) :: libxsmm_ptr_w0 967 libxsmm_ptr_w0 = C_LOC(a) 968 END FUNCTION 969 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_w1 970 FUNCTION libxsmm_ptr_w1(a) 971 INTEGER(C_SHORT), INTENT(IN), TARGET :: a(*) 972 TYPE(C_PTR) :: libxsmm_ptr_w1 973 libxsmm_ptr_w1 = C_LOC(a) 974 END FUNCTION 975 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_w2 976 FUNCTION libxsmm_ptr_w2(a) 977 INTEGER(C_SHORT), INTENT(IN) :: a(:,:) 978 TYPE(C_PTR) :: libxsmm_ptr_w2 979 libxsmm_ptr_w2 = libxsmm_ptr_w1(a) 980 END FUNCTION 981 982 !> Determines the C-address of the given array. 983 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_j0 984 FUNCTION libxsmm_ptr_j0(a) 985 INTEGER(C_INT8_T), INTENT(IN), TARGET :: a 986 TYPE(C_PTR) :: libxsmm_ptr_j0 987 libxsmm_ptr_j0 = C_LOC(a) 988 END FUNCTION 989 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_j1 990 FUNCTION libxsmm_ptr_j1(a) 991 INTEGER(C_INT8_T), INTENT(IN), TARGET :: a(*) 992 TYPE(C_PTR) :: libxsmm_ptr_j1 993 libxsmm_ptr_j1 = C_LOC(a) 994 END FUNCTION 995 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_j2 996 FUNCTION libxsmm_ptr_j2(a) 997 INTEGER(C_INT8_T), INTENT(IN) :: a(:,:) 998 TYPE(C_PTR) :: libxsmm_ptr_j2 999 libxsmm_ptr_j2 = libxsmm_ptr_j1(a) 1000 END FUNCTION 1001 1002 !> Determines the C-address of the given array. 1003 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_b0 1004 FUNCTION libxsmm_ptr_b0(a) 1005 CHARACTER(C_CHAR), INTENT(IN), TARGET :: a 1006 TYPE(C_PTR) :: libxsmm_ptr_b0 1007 libxsmm_ptr_b0 = C_LOC(a) 1008 END FUNCTION 1009 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_b1 1010 FUNCTION libxsmm_ptr_b1(a) 1011 CHARACTER(C_CHAR), INTENT(IN), TARGET :: a(*) 1012 TYPE(C_PTR) :: libxsmm_ptr_b1 1013 libxsmm_ptr_b1 = C_LOC(a) 1014 END FUNCTION 1015 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_b2 1016 FUNCTION libxsmm_ptr_b2(a) 1017 CHARACTER(C_CHAR), INTENT(IN) :: a(:,:) 1018 TYPE(C_PTR) :: libxsmm_ptr_b2 1019 libxsmm_ptr_b2 = libxsmm_ptr_b1(a) 1020 END FUNCTION 1021 1022 !> Determines the C-address of the given array. 1023 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_l0 1024 FUNCTION libxsmm_ptr_l0(a) 1025 INTEGER(C_LONG_LONG), INTENT(IN), TARGET :: a 1026 TYPE(C_PTR) :: libxsmm_ptr_l0 1027 libxsmm_ptr_l0 = C_LOC(a) 1028 END FUNCTION 1029 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_l1 1030 FUNCTION libxsmm_ptr_l1(a) 1031 INTEGER(C_LONG_LONG), INTENT(IN), TARGET :: a(*) 1032 TYPE(C_PTR) :: libxsmm_ptr_l1 1033 libxsmm_ptr_l1 = C_LOC(a) 1034 END FUNCTION 1035 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_l2 1036 FUNCTION libxsmm_ptr_l2(a) 1037 INTEGER(C_LONG_LONG), INTENT(IN) :: a(:,:) 1038 TYPE(C_PTR) :: libxsmm_ptr_l2 1039 libxsmm_ptr_l2 = libxsmm_ptr_l1(a) 1040 END FUNCTION 1041 1042 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_dmm 1043 FUNCTION libxsmm_ptr_dmm(a) 1044 TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN), TARGET :: a(:) 1045 TYPE(LIBXSMM_DMMFUNCTION), POINTER :: p 1046 TYPE(C_PTR) :: libxsmm_ptr_dmm 1047 p => a(LBOUND(a,1)); libxsmm_ptr_dmm = C_LOC(p%handle) 1048 END FUNCTION 1049 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_smm 1050 FUNCTION libxsmm_ptr_smm(a) 1051 TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN), TARGET :: a(:) 1052 TYPE(LIBXSMM_SMMFUNCTION), POINTER :: p 1053 TYPE(C_PTR) :: libxsmm_ptr_smm 1054 p => a(LBOUND(a,1)); libxsmm_ptr_smm = C_LOC(p%handle) 1055 END FUNCTION 1056 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_ptr_wimm 1057 FUNCTION libxsmm_ptr_wimm(a) 1058 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN), TARGET :: a(:) 1059 TYPE(LIBXSMM_WIMMFUNCTION), POINTER :: p 1060 TYPE(C_PTR) :: libxsmm_ptr_wimm 1061 p => a(LBOUND(a,1)); libxsmm_ptr_wimm = C_LOC(p%handle) 1062 END FUNCTION 1063 1064 !> Deallocate JIT'ted code created by libxsmm_create routines. To 1065 !> unregister code generated with libxsmm_dispatch is unnecessary. 1066 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_dmmkernel 1067 SUBROUTINE libxsmm_release_dmmkernel(kernel) 1068 TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel 1069 CALL libxsmm_release_kernel(kernel%handle) 1070 END SUBROUTINE 1071 1072 !> Deallocate JIT'ted code created by libxsmm_create routines. To 1073 !> unregister code generated with libxsmm_dispatch is unnecessary. 1074 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_smmkernel 1075 SUBROUTINE libxsmm_release_smmkernel(kernel) 1076 TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel 1077 CALL libxsmm_release_kernel(kernel%handle) 1078 END SUBROUTINE 1079 1080 !> Deallocate JIT'ted code created by libxsmm_create routines. To 1081 !> unregister code generated with libxsmm_dispatch is unnecessary. 1082 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_release_wimmkernel 1083 SUBROUTINE libxsmm_release_wimmkernel(kernel) 1084 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel 1085 CALL libxsmm_release_kernel(kernel%handle) 1086 END SUBROUTINE 1087 1088 !> Query or JIT-generate an SMM-kernel (double-precision). 1089 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmdispatch 1090 SUBROUTINE libxsmm_dmmdispatch(kernel, & 1091 & m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch) 1092 TYPE(LIBXSMM_DMMFUNCTION), INTENT(OUT) :: kernel 1093 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1094 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), & 1095 & OPTIONAL, TARGET :: lda, ldb, ldc 1096 REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: alpha, beta 1097 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: flags 1098 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: prefetch 1099 CALL libxsmm_xmmdispatch( & 1100 & kernel%handle, LIBXSMM_GEMM_PRECISION_F64, & 1101 & m, n, k, C_LOC(lda), C_LOC(ldb), C_LOC(ldc), & 1102 & C_LOC(alpha), C_LOC(beta), C_LOC(flags), C_LOC(prefetch)) 1103 END SUBROUTINE 1104 1105 !> Query or JIT-generate an SMM-kernel (single-precision). 1106 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmdispatch 1107 SUBROUTINE libxsmm_smmdispatch(kernel, & 1108 & m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch) 1109 TYPE(LIBXSMM_SMMFUNCTION), INTENT(OUT) :: kernel 1110 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1111 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), & 1112 & OPTIONAL, TARGET :: lda, ldb, ldc 1113 REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: alpha, beta 1114 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: flags 1115 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: prefetch 1116 CALL libxsmm_xmmdispatch( & 1117 & kernel%handle, LIBXSMM_GEMM_PRECISION_F32, & 1118 & m, n, k, C_LOC(lda), C_LOC(ldb), C_LOC(ldc), & 1119 & C_LOC(alpha), C_LOC(beta), C_LOC(flags), C_LOC(prefetch)) 1120 END SUBROUTINE 1121 1122 !> Query or JIT-generate an SMM-kernel (low-precision, int-accumulate). 1123 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wimmdispatch 1124 SUBROUTINE libxsmm_wimmdispatch(kernel, & 1125 & m, n, k, lda, ldb, ldc, alpha, beta, flags, prefetch) 1126 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(OUT) :: kernel 1127 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1128 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), & 1129 & OPTIONAL, TARGET :: lda, ldb, ldc 1130 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: alpha, beta 1131 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: flags 1132 INTEGER(C_INT), INTENT(IN), OPTIONAL, TARGET :: prefetch 1133 CALL libxsmm_xmmdispatch2(kernel%handle, & 1134 & LIBXSMM_GEMM_PRECISION_I16, LIBXSMM_GEMM_PRECISION_I32, & 1135 & m, n, k, C_LOC(lda), C_LOC(ldb), C_LOC(ldc), & 1136 & C_LOC(alpha), C_LOC(beta), C_LOC(flags), C_LOC(prefetch)) 1137 END SUBROUTINE 1138 1139 !> Checks if the given kernel was generated. JIT code is guaranteed 1140 !> to be generated if JIT support was enabled at build-time of the 1141 !> library (default). This overload belongs to libxsmm_(mm)available. 1142 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmavailable 1143 LOGICAL FUNCTION libxsmm_dmmavailable(kernel) 1144 TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel 1145 libxsmm_dmmavailable = C_ASSOCIATED(kernel%handle) 1146 END FUNCTION 1147 1148 !> Checks if the given kernel was generated. JIT code is guaranteed 1149 !> to be generated if JIT support was enabled at build-time of the 1150 !> library (default). This overload belongs to libxsmm_(mm)available. 1151 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmavailable 1152 LOGICAL FUNCTION libxsmm_smmavailable(kernel) 1153 TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel 1154 libxsmm_smmavailable = C_ASSOCIATED(kernel%handle) 1155 END FUNCTION 1156 1157 !> Checks if the given kernel was generated. JIT code is guaranteed 1158 !> to be generated if JIT support was enabled at build-time of the 1159 !> library (default). This overload belongs to libxsmm_(mm)available. 1160 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wimmavailable 1161 LOGICAL FUNCTION libxsmm_wimmavailable(kernel) 1162 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel 1163 libxsmm_wimmavailable = C_ASSOCIATED(kernel%handle) 1164 END FUNCTION 1165 1166 !> Calls the kernel with the given arguments. Alternatively, 1167 !> PROCPOINTER can be used as shown by the inner comments 1168 !> of this routine (LIBXSMM_FUNCTION3). The libxsmm_xmmcall 1169 !> routines can be used in FORTRAN77. 1170 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmcall_abc 1171 SUBROUTINE libxsmm_dmmcall_abc(kernel, a, b, c) 1172 TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel 1173 REAL(C_DOUBLE), INTENT(IN), TARGET :: a(*), b(*) 1174 REAL(C_DOUBLE), INTENT(INOUT), TARGET :: c(*) 1175 ! PROCEDURE(LIBXSMM_FUNCTION3), POINTER :: xmm 1176 ! CALL C_F_PROCPOINTER(kernel%handle, xmm) 1177 ! CALL xmm(...) 1178 CALL libxsmm_xmmcall_abc(kernel%handle, & 1179 & C_LOC(a), C_LOC(b), C_LOC(c)) 1180 END SUBROUTINE 1181 1182 !> Calls the kernel with the given arguments. Alternatively, 1183 !> PROCPOINTER can be used as shown by the inner comments 1184 !> of this routine (LIBXSMM_FUNCTION6). The libxsmm_xmmcall 1185 !> routines can be used in FORTRAN77. 1186 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmcall_prf 1187 SUBROUTINE libxsmm_dmmcall_prf(kernel, a, b, c, pa, pb, pc) 1188 TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel 1189 REAL(C_DOUBLE), INTENT(IN), TARGET :: a(*), b(*) 1190 REAL(C_DOUBLE), INTENT(INOUT), TARGET :: c(*) 1191 REAL(C_DOUBLE), INTENT(IN), TARGET :: pa(*) 1192 REAL(C_DOUBLE), INTENT(IN), TARGET :: pb(*) 1193 REAL(C_DOUBLE), INTENT(IN), TARGET :: pc(*) 1194 ! PROCEDURE(LIBXSMM_FUNCTION6), POINTER :: xmm 1195 ! CALL C_F_PROCPOINTER(kernel%handle, xmm) 1196 ! CALL xmm(...) 1197 CALL libxsmm_xmmcall_prf(kernel%handle, & 1198 & C_LOC(a), C_LOC(b), C_LOC(c), & 1199 & C_LOC(pa), C_LOC(pb), C_LOC(pc)) 1200 END SUBROUTINE 1201 1202 !> See also libxsmm_dmmcall_abc and libxsmm_dmmcall_prf. 1203 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dmmcall 1204 SUBROUTINE libxsmm_dmmcall(kernel, a, b, c, pa, pb, pc) 1205 TYPE(LIBXSMM_DMMFUNCTION), INTENT(IN) :: kernel 1206 REAL(C_DOUBLE), INTENT(IN), TARGET :: a(*), b(*) 1207 REAL(C_DOUBLE), INTENT(INOUT), TARGET :: c(*) 1208 REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: pa(*) 1209 REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: pb(*) 1210 REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: pc(*) 1211 ! use .OR. instead of .AND. to avoid full check 1212 IF (PRESENT(pa).OR.PRESENT(pb).OR.PRESENT(pc)) THEN 1213 CALL libxsmm_xmmcall_prf(kernel%handle, & 1214 & C_LOC(a), C_LOC(b), C_LOC(c), & 1215 & C_LOC(pa), C_LOC(pb), C_LOC(pc)) 1216 ELSE 1217 CALL libxsmm_xmmcall_abc(kernel%handle, & 1218 & C_LOC(a), C_LOC(b), C_LOC(c)) 1219 END IF 1220 END SUBROUTINE 1221 1222 !> Calls the kernel with the given arguments. Alternatively, 1223 !> PROCPOINTER can be used as shown by the inner comments 1224 !> of this routine (LIBXSMM_FUNCTION3). The libxsmm_xmmcall 1225 !> routines can be used in FORTRAN77. 1226 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmcall_abc 1227 SUBROUTINE libxsmm_smmcall_abc(kernel, a, b, c) 1228 TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel 1229 REAL(C_FLOAT), INTENT(IN), TARGET :: a(*), b(*) 1230 REAL(C_FLOAT), INTENT(INOUT), TARGET :: c(*) 1231 ! PROCEDURE(LIBXSMM_FUNCTION3), POINTER :: xmm 1232 ! CALL C_F_PROCPOINTER(kernel%handle, xmm) 1233 ! CALL xmm(...) 1234 CALL libxsmm_xmmcall_abc(kernel%handle, & 1235 & C_LOC(a), C_LOC(b), C_LOC(c)) 1236 END SUBROUTINE 1237 1238 !> Calls the kernel with the given arguments. Alternatively, 1239 !> PROCPOINTER can be used as shown by the inner comments 1240 !> of this routine (LIBXSMM_FUNCTION6). The libxsmm_xmmcall 1241 !> routines can be used in FORTRAN77. 1242 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmcall_prf 1243 SUBROUTINE libxsmm_smmcall_prf(kernel, a, b, c, pa, pb, pc) 1244 TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel 1245 REAL(C_FLOAT), INTENT(IN), TARGET :: a(*), b(*) 1246 REAL(C_FLOAT), INTENT(INOUT), TARGET :: c(*) 1247 REAL(C_FLOAT), INTENT(IN), TARGET :: pa(*) 1248 REAL(C_FLOAT), INTENT(IN), TARGET :: pb(*) 1249 REAL(C_FLOAT), INTENT(IN), TARGET :: pc(*) 1250 ! PROCEDURE(LIBXSMM_FUNCTION6), POINTER :: xmm 1251 ! CALL C_F_PROCPOINTER(kernel%handle, xmm) 1252 ! CALL xmm(...) 1253 CALL libxsmm_xmmcall_prf(kernel%handle, & 1254 & C_LOC(a), C_LOC(b), C_LOC(c), & 1255 & C_LOC(pa), C_LOC(pb), C_LOC(pc)) 1256 END SUBROUTINE 1257 1258 !> See also libxsmm_smmcall_abc and libxsmm_smmcall_prf. 1259 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_smmcall 1260 SUBROUTINE libxsmm_smmcall(kernel, a, b, c, pa, pb, pc) 1261 TYPE(LIBXSMM_SMMFUNCTION), INTENT(IN) :: kernel 1262 REAL(C_FLOAT), INTENT(IN), TARGET :: a(*), b(*) 1263 REAL(C_FLOAT), INTENT(INOUT), TARGET :: c(*) 1264 REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: pa(*) 1265 REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: pb(*) 1266 REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: pc(*) 1267 ! use .OR. instead of .AND. to avoid full check 1268 IF (PRESENT(pa).OR.PRESENT(pb).OR.PRESENT(pc)) THEN 1269 CALL libxsmm_xmmcall_prf(kernel%handle, & 1270 & C_LOC(a), C_LOC(b), C_LOC(c), & 1271 & C_LOC(pa), C_LOC(pb), C_LOC(pc)) 1272 ELSE 1273 CALL libxsmm_xmmcall_abc(kernel%handle, & 1274 & C_LOC(a), C_LOC(b), C_LOC(c)) 1275 END IF 1276 END SUBROUTINE 1277 1278 !> Calls the kernel with the given arguments. Alternatively, 1279 !> PROCPOINTER can be used as shown by the inner comments 1280 !> of this routine (LIBXSMM_FUNCTION3). The libxsmm_xmmcall 1281 !> routines can be used in FORTRAN77. 1282 SUBROUTINE libxsmm_wimmcall_abc(kernel, a, b, c) 1283 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel 1284 INTEGER(C_SHORT), INTENT(IN), TARGET :: a(*), b(*) 1285 INTEGER(C_INT), INTENT(INOUT), TARGET :: c(*) 1286 ! PROCEDURE(LIBXSMM_FUNCTION3), POINTER :: xmm 1287 ! CALL C_F_PROCPOINTER(kernel%handle, xmm) 1288 ! CALL xmm(...) 1289 CALL libxsmm_xmmcall_abc(kernel%handle, & 1290 & C_LOC(a), C_LOC(b), C_LOC(c)) 1291 END SUBROUTINE 1292 1293 !> Calls the kernel with the given arguments. Alternatively, 1294 !> PROCPOINTER can be used as shown by the inner comments 1295 !> of this routine (LIBXSMM_FUNCTION6). The libxsmm_xmmcall 1296 !> routines can be used in FORTRAN77. 1297 SUBROUTINE libxsmm_wimmcall_prf(kernel, a, b, c, pa, pb, pc) 1298 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel 1299 INTEGER(C_SHORT), INTENT(IN), TARGET :: a(*), b(*) 1300 INTEGER(C_INT), INTENT(INOUT), TARGET :: c(*) 1301 INTEGER(C_SHORT), INTENT(IN), TARGET :: pa(*) 1302 INTEGER(C_SHORT), INTENT(IN), TARGET :: pb(*) 1303 INTEGER(C_SHORT), INTENT(IN), TARGET :: pc(*) 1304 ! PROCEDURE(LIBXSMM_FUNCTION6), POINTER :: xmm 1305 ! CALL C_F_PROCPOINTER(kernel%handle, xmm) 1306 ! CALL xmm(...) 1307 CALL libxsmm_xmmcall_prf(kernel%handle, & 1308 & C_LOC(a), C_LOC(b), C_LOC(c), & 1309 & C_LOC(pa), C_LOC(pb), C_LOC(pc)) 1310 END SUBROUTINE 1311 1312 !> See also libxsmm_wimmcall_abc and libxsmm_wimmcall_prf. 1313 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wimmcall 1314 SUBROUTINE libxsmm_wimmcall(kernel, a, b, c, pa, pb, pc) 1315 TYPE(LIBXSMM_WIMMFUNCTION), INTENT(IN) :: kernel 1316 INTEGER(C_SHORT), INTENT(IN), TARGET :: a(*), b(*) 1317 INTEGER(C_INT), INTENT(INOUT), TARGET :: c(*) 1318 INTEGER(C_SHORT), INTENT(IN), OPTIONAL, TARGET :: pa(*) 1319 INTEGER(C_SHORT), INTENT(IN), OPTIONAL, TARGET :: pb(*) 1320 INTEGER(C_SHORT), INTENT(IN), OPTIONAL, TARGET :: pc(*) 1321 ! use .OR. instead of .AND. to avoid full check 1322 IF (PRESENT(pa).OR.PRESENT(pb).OR.PRESENT(pc)) THEN 1323 CALL libxsmm_xmmcall_prf(kernel%handle, & 1324 & C_LOC(a), C_LOC(b), C_LOC(c), & 1325 & C_LOC(pa), C_LOC(pb), C_LOC(pc)) 1326 ELSE 1327 CALL libxsmm_xmmcall_abc(kernel%handle, & 1328 & C_LOC(a), C_LOC(b), C_LOC(c)) 1329 END IF 1330 END SUBROUTINE 1331 1332 !> Register user-defined key-value; value can be queried (libxsmm_xdispatch). 1333 !> Since the key-type is unknown to LIBXSMM, the key must be binary reproducible, 1334 !> i.e., if it is a structured type (padded data may be uninitialized), it must 1335 !> be initially zero-filled (libxsmm_xclear) followed by an element-wise setup. 1336 !> The size of the key is limited (see documentation). The given value is copied 1337 !> by LIBXSMM and may be initialized at registration-time or whenever queried. 1338 !> Registered data is released at program termination but can be also released 1339 !> if needed (libxsmm_xrelease), .e.g., for larger value for the same key. 1340 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xregister 1341 FUNCTION libxsmm_xregister(key, keysize, valsize, valinit) 1342 TYPE(C_PTR), INTENT(IN), VALUE :: key 1343 TYPE(C_PTR), INTENT(IN), VALUE, OPTIONAL :: valinit 1344 INTEGER(C_INT), INTENT(IN) :: keysize, valsize 1345 TYPE(C_PTR) :: libxsmm_xregister 1346 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_xregister 1347 INTERFACE 1348 SUBROUTINE internal_xregister(regval, & 1349 & key, keysize, valsize, valinit) & 1350 & BIND(C, NAME="libxsmm_xregister_") 1351 IMPORT C_PTR, C_INT 1352 TYPE(C_PTR), INTENT(OUT) :: regval 1353 TYPE(C_PTR), INTENT(IN), VALUE :: key, valinit 1354 INTEGER(C_INT), INTENT(IN) :: keysize, valsize 1355 END SUBROUTINE 1356 END INTERFACE 1357 CALL internal_xregister(libxsmm_xregister, & 1358 & key, keysize, valsize, valinit) 1359 END FUNCTION 1360 1361 !> Query user-defined value from LIBXSMM's code registry. 1362 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_xdispatch 1363 FUNCTION libxsmm_xdispatch(key, keysize) 1364 TYPE(C_PTR), INTENT(IN), VALUE :: key 1365 INTEGER(C_INT), INTENT(IN) :: keysize 1366 TYPE(C_PTR) :: libxsmm_xdispatch 1367 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_xdispatch 1368 INTERFACE 1369 SUBROUTINE internal_xdispatch(regval, key, keysize) & 1370 & BIND(C, NAME="libxsmm_xdispatch_") 1371 IMPORT C_PTR, C_INT 1372 TYPE(C_PTR), INTENT(OUT) :: regval 1373 TYPE(C_PTR), INTENT(IN), VALUE :: key 1374 INTEGER(C_INT), INTENT(IN) :: keysize 1375 END SUBROUTINE 1376 END INTERFACE 1377 CALL internal_xdispatch(libxsmm_xdispatch, key, keysize) 1378 END FUNCTION 1379 1380 !> Auto-dispatched general dense MM (double-precision). 1381 !> This overload belongs to libxsmm_(d)gemm. 1382 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm0 1383 PURE SUBROUTINE libxsmm_dgemm0(transa, transb, m, n, k, & 1384 & alpha, a, lda, b, ldb, beta, c, ldc) 1385 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1386 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1387 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1388 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1389 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1390 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1391 REAL(C_DOUBLE), INTENT(IN) :: a, b 1392 REAL(C_DOUBLE), INTENT(INOUT) :: c 1393 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm 1394 INTERFACE 1395 PURE SUBROUTINE internal_gemm(transa, transb, m, n, k, & 1396 & alpha, a, lda, b, ldb, beta, c, ldc) & 1397 & BIND(C, NAME="libxsmm_dgemm_") 1398 IMPORT C_CHAR, C_DOUBLE, LIBXSMM_BLASINT_KIND 1399 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 1400 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1401 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda 1402 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb 1403 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc 1404 REAL(C_DOUBLE), INTENT(IN) :: alpha, beta 1405 REAL(C_DOUBLE), INTENT(IN) :: a, b 1406 REAL(C_DOUBLE), INTENT(INOUT) :: c 1407 END SUBROUTINE 1408 END INTERFACE 1409 CALL internal_gemm(transa, transb, m, n, k, & 1410 & alpha, a, lda, b, ldb, beta, c, ldc) 1411 END SUBROUTINE 1412 1413 !> Auto-dispatched general dense MM (double-precision). 1414 !> This overload belongs to libxsmm_(d)gemm. 1415 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm1 1416 PURE SUBROUTINE libxsmm_dgemm1(transa, transb, m, n, k, & 1417 & alpha, a, lda, b, ldb, beta, c, ldc) 1418 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1419 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1420 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1421 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1422 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1423 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1424 REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*) 1425 REAL(C_DOUBLE), INTENT(INOUT) :: c(*) 1426 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1427 CALL libxsmm_dgemm0(transa, transb, m, n, k, & 1428 & alpha, a(LBOUND(a,1)), lda, & 1429 & b(LBOUND(b,1)), ldb, & 1430 & beta, c(LBOUND(c,1)), ldc) 1431 END IF 1432 END SUBROUTINE 1433 1434 !> Auto-dispatched general dense MM (double-precision). 1435 !> This overload belongs to libxsmm_(d)gemm. 1436 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm2 1437 PURE SUBROUTINE libxsmm_dgemm2(transa, transb, m, n, k, & 1438 & a, b, c, alpha, beta) 1439 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1440 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1441 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1442 REAL(C_DOUBLE), INTENT(IN) :: a(m,*), b(k,*) 1443 REAL(C_DOUBLE), INTENT(INOUT) :: c(m,*) 1444 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1445 CALL libxsmm_dgemm0(transa, transb, m, n, k, & 1446 & alpha, a(LBOUND(a,1),LBOUND(a,2)), m, & 1447 & b(LBOUND(b,1),LBOUND(b,2)), k, & 1448 & beta, c(LBOUND(c,1),LBOUND(c,2)), m) 1449 END IF 1450 END SUBROUTINE 1451 1452 !> Auto-dispatched general dense MM (double-precision). 1453 !> This overload belongs to libxsmm_(d)gemm. 1454 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_dgemm3 1455 PURE SUBROUTINE libxsmm_dgemm3(transa, transb, m, n, k, & 1456 & alpha, a, lda, b, ldb, beta, c, ldc) 1457 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1458 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1459 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 1460 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1461 REAL(C_DOUBLE), INTENT(IN) :: a(lda,*), b(ldb,*) 1462 REAL(C_DOUBLE), INTENT(INOUT) :: c(ldc,*) 1463 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1464 CALL libxsmm_dgemm0(transa, transb, m, n, k, & 1465 & alpha, a(LBOUND(a,1),LBOUND(a,2)), lda, & 1466 & b(LBOUND(b,1),LBOUND(b,2)), ldb, & 1467 & beta, c(LBOUND(c,1),LBOUND(c,2)), ldc) 1468 END IF 1469 END SUBROUTINE 1470 1471 !> Auto-dispatched general dense MM (single-precision). 1472 !> This overload belongs to libxsmm_(s)gemm. 1473 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm0 1474 PURE SUBROUTINE libxsmm_sgemm0(transa, transb, m, n, k, & 1475 & alpha, a, lda, b, ldb, beta, c, ldc) 1476 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1477 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1478 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1479 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1480 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1481 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1482 REAL(C_FLOAT), INTENT(IN) :: a, b 1483 REAL(C_FLOAT), INTENT(INOUT) :: c 1484 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm 1485 INTERFACE 1486 PURE SUBROUTINE internal_gemm(transa, transb, m, n, k, & 1487 & alpha, a, lda, b, ldb, beta, c, ldc) & 1488 & BIND(C, NAME="libxsmm_sgemm_") 1489 IMPORT C_CHAR, C_FLOAT, LIBXSMM_BLASINT_KIND 1490 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 1491 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1492 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda 1493 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb 1494 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc 1495 REAL(C_FLOAT), INTENT(IN) :: alpha, beta 1496 REAL(C_FLOAT), INTENT(IN) :: a, b 1497 REAL(C_FLOAT), INTENT(INOUT) :: c 1498 END SUBROUTINE 1499 END INTERFACE 1500 CALL internal_gemm(transa, transb, m, n, k, & 1501 & alpha, a, lda, b, ldb, beta, c, ldc) 1502 END SUBROUTINE 1503 1504 !> Auto-dispatched general dense MM (single-precision). 1505 !> This overload belongs to libxsmm_(s)gemm. 1506 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm1 1507 PURE SUBROUTINE libxsmm_sgemm1(transa, transb, m, n, k, & 1508 & alpha, a, lda, b, ldb, beta, c, ldc) 1509 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1510 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1511 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1512 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1513 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1514 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1515 REAL(C_FLOAT), INTENT(IN) :: a(*), b(*) 1516 REAL(C_FLOAT), INTENT(INOUT) :: c(*) 1517 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1518 CALL libxsmm_sgemm0(transa, transb, m, n, k, & 1519 & alpha, a(LBOUND(a,1)), lda, & 1520 & b(LBOUND(b,1)), ldb, & 1521 & beta, c(LBOUND(c,1)), ldc) 1522 END IF 1523 END SUBROUTINE 1524 1525 !> Auto-dispatched general dense MM (single-precision). 1526 !> This overload belongs to libxsmm_(s)gemm. 1527 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm2 1528 PURE SUBROUTINE libxsmm_sgemm2(transa, transb, m, n, k, & 1529 & a, b, c, alpha, beta) 1530 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1531 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1532 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1533 REAL(C_FLOAT), INTENT(IN) :: a(m,*), b(k,*) 1534 REAL(C_FLOAT), INTENT(INOUT) :: c(m,*) 1535 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1536 CALL libxsmm_sgemm0(transa, transb, m, n, k, & 1537 & alpha, a(LBOUND(a,1),LBOUND(a,2)), m, & 1538 & b(LBOUND(b,1),LBOUND(b,2)), k, & 1539 & beta, c(LBOUND(c,1),LBOUND(c,2)), m) 1540 END IF 1541 END SUBROUTINE 1542 1543 !> Auto-dispatched general dense MM (single-precision). 1544 !> This overload belongs to libxsmm_(s)gemm. 1545 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_sgemm3 1546 PURE SUBROUTINE libxsmm_sgemm3(transa, transb, m, n, k, & 1547 & alpha, a, lda, b, ldb, beta, c, ldc) 1548 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1549 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1550 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 1551 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1552 REAL(C_FLOAT), INTENT(IN) :: a(lda,*), b(ldb,*) 1553 REAL(C_FLOAT), INTENT(INOUT) :: c(ldc,*) 1554 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1555 CALL libxsmm_sgemm0(transa, transb, m, n, k, & 1556 & alpha, a(LBOUND(a,1),LBOUND(a,2)), lda, & 1557 & b(LBOUND(b,1),LBOUND(b,2)), ldb, & 1558 & beta, c(LBOUND(c,1),LBOUND(c,2)), ldc) 1559 END IF 1560 END SUBROUTINE 1561 1562 !> Auto-dispatched general dense MM (low-precision, int-accumulate). 1563 !> This overload belongs to libxsmm_(wi)gemm. 1564 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm0 1565 PURE SUBROUTINE libxsmm_wigemm0(transa, transb, m, n, k, & 1566 & alpha, a, lda, b, ldb, beta, c, ldc) 1567 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1568 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1569 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1570 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1571 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1572 INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta 1573 INTEGER(C_SHORT), INTENT(IN) :: a, b 1574 INTEGER(C_INT), INTENT(INOUT) :: c 1575 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm 1576 INTERFACE 1577 PURE SUBROUTINE internal_gemm(transa, transb, m, n, k, & 1578 & alpha, a, lda, b, ldb, beta, c, ldc) & 1579 & BIND(C, NAME="libxsmm_wigemm_") 1580 IMPORT C_CHAR, C_SHORT, C_INT, LIBXSMM_BLASINT_KIND 1581 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 1582 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1583 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda 1584 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb 1585 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc 1586 INTEGER(C_INT), INTENT(IN) :: alpha, beta 1587 INTEGER(C_SHORT), INTENT(IN) :: a, b 1588 INTEGER(C_INT), INTENT(INOUT) :: c 1589 END SUBROUTINE 1590 END INTERFACE 1591 CALL internal_gemm(transa, transb, m, n, k, & 1592 & alpha, a, lda, b, ldb, beta, c, ldc) 1593 END SUBROUTINE 1594 1595 !> Auto-dispatched general dense MM (low-precision, int-accumulate). 1596 !> This overload belongs to libxsmm_(wi)gemm. 1597 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm1 1598 PURE SUBROUTINE libxsmm_wigemm1(transa, transb, m, n, k, & 1599 & alpha, a, lda, b, ldb, beta, c, ldc) 1600 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1601 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1602 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1603 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1604 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1605 INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta 1606 INTEGER(C_SHORT), INTENT(IN) :: a(*), b(*) 1607 INTEGER(C_INT), INTENT(INOUT) :: c(*) 1608 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1609 CALL libxsmm_wigemm0(transa, transb, m, n, k, & 1610 & alpha, a(LBOUND(a,1)), lda, & 1611 & b(LBOUND(b,1)), ldb, & 1612 & beta, c(LBOUND(c,1)), ldc) 1613 END IF 1614 END SUBROUTINE 1615 1616 !> Auto-dispatched general dense MM (low-precision, int-accumulate). 1617 !> This overload belongs to libxsmm_(wi)gemm. 1618 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm2 1619 PURE SUBROUTINE libxsmm_wigemm2(transa, transb, m, n, k, & 1620 & a, b, c, alpha, beta) 1621 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1622 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1623 INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta 1624 INTEGER(C_SHORT), INTENT(IN) :: a(m,*), b(k,*) 1625 INTEGER(C_INT), INTENT(INOUT) :: c(m,*) 1626 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1627 CALL libxsmm_wigemm0(transa, transb, m, n, k, & 1628 & alpha, a(LBOUND(a,1),LBOUND(a,2)), m, & 1629 & b(LBOUND(b,1),LBOUND(b,2)), k, & 1630 & beta, c(LBOUND(c,1),LBOUND(c,2)), m) 1631 END IF 1632 END SUBROUTINE 1633 1634 !> Auto-dispatched general dense MM (low-precision, int-accumulate). 1635 !> This overload belongs to libxsmm_(wi)gemm. 1636 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_wigemm3 1637 PURE SUBROUTINE libxsmm_wigemm3(transa, transb, m, n, k, & 1638 & alpha, a, lda, b, ldb, beta, c, ldc) 1639 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1640 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1641 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 1642 INTEGER(C_INT), INTENT(IN), OPTIONAL :: alpha, beta 1643 INTEGER(C_SHORT), INTENT(IN) :: a(lda,*), b(ldb,*) 1644 INTEGER(C_INT), INTENT(INOUT) :: c(ldc,*) 1645 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1646 CALL libxsmm_wigemm0(transa, transb, m, n, k, & 1647 & alpha, a(LBOUND(a,1),LBOUND(a,2)), lda, & 1648 & b(LBOUND(b,1),LBOUND(b,2)), ldb, & 1649 & beta, c(LBOUND(c,1),LBOUND(c,2)), ldc) 1650 END IF 1651 END SUBROUTINE 1652 1653 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1654 !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm. 1655 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm0 1656 PURE SUBROUTINE libxsmm_blas_dgemm0(transa, transb, m, n, k, & 1657 & alpha, a, lda, b, ldb, beta, c, ldc) 1658 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1659 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1660 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1661 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1662 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1663 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1664 REAL(C_DOUBLE), INTENT(IN) :: a, b 1665 REAL(C_DOUBLE), INTENT(INOUT) :: c 1666 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm 1667 INTERFACE 1668 PURE SUBROUTINE internal_gemm(transa, transb, m, n, k, & 1669 & alpha, a, lda, b, ldb, beta, c, ldc) & 1670 & BIND(C, NAME="libxsmm_blas_dgemm_") 1671 IMPORT C_CHAR, C_DOUBLE, LIBXSMM_BLASINT_KIND 1672 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 1673 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1674 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda 1675 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb 1676 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc 1677 REAL(C_DOUBLE), INTENT(IN) :: alpha, beta 1678 REAL(C_DOUBLE), INTENT(IN) :: a, b 1679 REAL(C_DOUBLE), INTENT(INOUT) :: c 1680 END SUBROUTINE 1681 END INTERFACE 1682 CALL internal_gemm(transa, transb, m, n, k, & 1683 & alpha, a, lda, b, ldb, beta, c, ldc) 1684 END SUBROUTINE 1685 1686 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1687 !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm. 1688 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm1 1689 PURE SUBROUTINE libxsmm_blas_dgemm1(transa, transb, m, n, k, & 1690 & alpha, a, lda, b, ldb, beta, c, ldc) 1691 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1692 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1693 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1694 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1695 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1696 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1697 REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*) 1698 REAL(C_DOUBLE), INTENT(INOUT) :: c(*) 1699 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1700 CALL libxsmm_blas_dgemm0(transa, transb, m, n, k, & 1701 & alpha, a(LBOUND(a,1)), lda, & 1702 & b(LBOUND(b,1)), ldb, & 1703 & beta, c(LBOUND(c,1)), ldc) 1704 END IF 1705 END SUBROUTINE 1706 1707 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1708 !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm. 1709 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm2 1710 PURE SUBROUTINE libxsmm_blas_dgemm2(transa, transb, m, n, k, & 1711 & a, b, c, alpha, beta) 1712 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1713 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1714 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1715 REAL(C_DOUBLE), INTENT(IN) :: a(m,*), b(k,*) 1716 REAL(C_DOUBLE), INTENT(INOUT) :: c(m,*) 1717 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1718 CALL libxsmm_blas_dgemm0(transa, transb, m, n, k, & 1719 & alpha, a(LBOUND(a,1),LBOUND(a,2)), m, & 1720 & b(LBOUND(b,1),LBOUND(b,2)), k, & 1721 & beta, c(LBOUND(c,1),LBOUND(c,2)), m) 1722 END IF 1723 END SUBROUTINE 1724 1725 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1726 !> libxsmm_(d)gemm. This overload belongs to libxsmm_blas_(d)gemm. 1727 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_dgemm3 1728 PURE SUBROUTINE libxsmm_blas_dgemm3(transa, transb, m, n, k, & 1729 & alpha, a, lda, b, ldb, beta, c, ldc) 1730 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1731 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1732 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 1733 REAL(C_DOUBLE), INTENT(IN), OPTIONAL :: alpha, beta 1734 REAL(C_DOUBLE), INTENT(IN) :: a(lda,*), b(ldb,*) 1735 REAL(C_DOUBLE), INTENT(INOUT) :: c(ldc,*) 1736 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1737 CALL libxsmm_blas_dgemm0(transa, transb, m, n, k, & 1738 & alpha, a(LBOUND(a,1),LBOUND(a,2)), lda, & 1739 & b(LBOUND(b,1),LBOUND(b,2)), ldb, & 1740 & beta, c(LBOUND(c,1),LBOUND(c,2)), ldc) 1741 END IF 1742 END SUBROUTINE 1743 1744 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1745 !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm. 1746 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm0 1747 PURE SUBROUTINE libxsmm_blas_sgemm0(transa, transb, m, n, k, & 1748 & alpha, a, lda, b, ldb, beta, c, ldc) 1749 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1750 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1751 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1752 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1753 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1754 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1755 REAL(C_FLOAT), INTENT(IN) :: a, b 1756 REAL(C_FLOAT), INTENT(INOUT) :: c 1757 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_gemm 1758 INTERFACE 1759 PURE SUBROUTINE internal_gemm(transa, transb, m, n, k, & 1760 & alpha, a, lda, b, ldb, beta, c, ldc) & 1761 & BIND(C, NAME="libxsmm_blas_sgemm_") 1762 IMPORT C_CHAR, C_FLOAT, LIBXSMM_BLASINT_KIND 1763 CHARACTER(C_CHAR), INTENT(IN) :: transa, transb 1764 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1765 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda 1766 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldb 1767 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldc 1768 REAL(C_FLOAT), INTENT(IN) :: alpha, beta 1769 REAL(C_FLOAT), INTENT(IN) :: a, b 1770 REAL(C_FLOAT), INTENT(INOUT) :: c 1771 END SUBROUTINE 1772 END INTERFACE 1773 CALL internal_gemm(transa, transb, m, n, k, & 1774 & alpha, a, lda, b, ldb, beta, c, ldc) 1775 END SUBROUTINE 1776 1777 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1778 !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm. 1779 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm1 1780 PURE SUBROUTINE libxsmm_blas_sgemm1(transa, transb, m, n, k, & 1781 & alpha, a, lda, b, ldb, beta, c, ldc) 1782 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1783 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1784 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: lda 1785 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldb 1786 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldc 1787 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1788 REAL(C_FLOAT), INTENT(IN) :: a(*), b(*) 1789 REAL(C_FLOAT), INTENT(INOUT) :: c(*) 1790 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1791 CALL libxsmm_blas_sgemm0(transa, transb, m, n, k, & 1792 & alpha, a(LBOUND(a,1)), lda, & 1793 & b(LBOUND(b,1)), ldb, & 1794 & beta, c(LBOUND(c,1)), ldc) 1795 END IF 1796 END SUBROUTINE 1797 1798 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1799 !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm. 1800 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm2 1801 PURE SUBROUTINE libxsmm_blas_sgemm2(transa, transb, m, n, k, & 1802 & a, b, c, alpha, beta) 1803 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1804 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1805 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1806 REAL(C_FLOAT), INTENT(IN) :: a(m,*), b(k,*) 1807 REAL(C_FLOAT), INTENT(INOUT) :: c(m,*) 1808 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1809 CALL libxsmm_blas_sgemm0(transa, transb, m, n, k, & 1810 & alpha, a(LBOUND(a,1),LBOUND(a,2)), m, & 1811 & b(LBOUND(b,1),LBOUND(b,2)), k, & 1812 & beta, c(LBOUND(c,1),LBOUND(c,2)), m) 1813 END IF 1814 END SUBROUTINE 1815 1816 !> Re-exposes BLAS based GEMM routine with an interfaces similar to 1817 !> libxsmm_(s)gemm. This overload belongs to libxsmm_blas_(s)gemm. 1818 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_blas_sgemm3 1819 PURE SUBROUTINE libxsmm_blas_sgemm3(transa, transb, m, n, k, & 1820 & alpha, a, lda, b, ldb, beta, c, ldc) 1821 CHARACTER, INTENT(IN), OPTIONAL :: transa, transb 1822 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, k 1823 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: lda, ldb, ldc 1824 REAL(C_FLOAT), INTENT(IN), OPTIONAL :: alpha, beta 1825 REAL(C_FLOAT), INTENT(IN) :: a(lda,*), b(ldb,*) 1826 REAL(C_FLOAT), INTENT(INOUT) :: c(ldc,*) 1827 IF ((0.LT.m).AND.(0.LT.n).AND.(0.LT.k)) THEN 1828 CALL libxsmm_blas_sgemm0(transa, transb, m, n, k, & 1829 & alpha, a(LBOUND(a,1),LBOUND(a,2)), lda, & 1830 & b(LBOUND(b,1),LBOUND(b,2)), ldb, & 1831 & beta, c(LBOUND(c,1),LBOUND(c,2)), ldc) 1832 END IF 1833 END SUBROUTINE 1834 1835 !> Matrix-copy (2-dimensional copy) routine. If the input (optional) 1836 !> is not present, the routine is used to zero-fill the out-matrix. 1837 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_p0 1838 PURE SUBROUTINE libxsmm_matcopy_p0(output, input, typesize, & 1839 & m, n, ldi, ldo) 1840 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1841 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), & 1842 & OPTIONAL :: n, ldi, ldo 1843 INTEGER(C_INT), INTENT(IN) :: typesize 1844 TYPE(C_PTR), INTENT(IN), OPTIONAL :: input 1845 TYPE(C_PTR), INTENT(IN) :: output 1846 CALL libxsmm_xmatcopy(output, input, typesize, & 1847 & m, n, ldi, ldo) 1848 END SUBROUTINE 1849 1850 !> Matrix-copy (2-dimensional copy) routine (DP/rank-1). 1851 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_d1 1852 SUBROUTINE libxsmm_matcopy_d1(output, input, m, n, ldi, ldo) 1853 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1854 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n 1855 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi 1856 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo 1857 REAL(C_DOUBLE), INTENT(OUT), TARGET :: output(*) 1858 REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: input(*) 1859 CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 8, & 1860 & m, n, ldi, ldo) 1861 END SUBROUTINE 1862 1863 !> Matrix-copy (2-dimensional copy) routine (DP/rank-2). 1864 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_d2 1865 SUBROUTINE libxsmm_matcopy_d2(output, input, m, n, ldi, ldo) 1866 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 1867 REAL(C_DOUBLE), INTENT(OUT), TARGET :: output(ldo,*) 1868 REAL(C_DOUBLE), INTENT(IN), OPTIONAL, TARGET :: input(ldi,*) 1869 CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 8, & 1870 & m, n, ldi, ldo) 1871 END SUBROUTINE 1872 1873 !> Matrix-copy (2-dimensional copy) routine (SP/rank-1). 1874 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_s1 1875 SUBROUTINE libxsmm_matcopy_s1(output, input, m, n, ldi, ldo) 1876 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1877 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n 1878 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi 1879 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo 1880 REAL(C_FLOAT), INTENT(OUT), TARGET :: output(*) 1881 REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: input(*) 1882 CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 4, & 1883 & m, n, ldi, ldo) 1884 END SUBROUTINE 1885 1886 !> Matrix-copy (2-dimensional copy) routine (SP/rank-2). 1887 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matcopy_s2 1888 SUBROUTINE libxsmm_matcopy_s2(output, input, m, n, ldi, ldo) 1889 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 1890 REAL(C_FLOAT), INTENT(OUT), TARGET :: output(ldo,*) 1891 REAL(C_FLOAT), INTENT(IN), OPTIONAL, TARGET :: input(ldi,*) 1892 CALL libxsmm_xmatcopy(C_LOC(output), C_LOC(input), 4, & 1893 & m, n, ldi, ldo) 1894 END SUBROUTINE 1895 1896 !> Transpose a matrix (in-place form). 1897 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_p0 1898 PURE SUBROUTINE libxsmm_itrans_p0(matrix, typesize, m, n, ld) 1899 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1900 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n, ld 1901 TYPE(C_PTR), INTENT(IN) :: matrix 1902 INTEGER(C_INT), INTENT(IN) :: typesize 1903 CALL libxsmm_xitrans(matrix, typesize, m, n, ld) 1904 END SUBROUTINE 1905 1906 !> Transpose a matrix (in-place form, DP/rank-1). 1907 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_d1 1908 SUBROUTINE libxsmm_itrans_d1(matrix, m, n, ld) 1909 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1910 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n, ld 1911 REAL(C_DOUBLE), INTENT(INOUT), TARGET :: matrix(*) 1912 CALL libxsmm_xitrans(C_LOC(matrix), 8, m, n, ld) 1913 END SUBROUTINE 1914 1915 !> Transpose a matrix (in-place form, DP/rank-2). 1916 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_d2 1917 SUBROUTINE libxsmm_itrans_d2(matrix, m, n, ld) 1918 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ld 1919 REAL(C_DOUBLE), INTENT(INOUT), TARGET :: matrix(ld,*) 1920 CALL libxsmm_xitrans(C_LOC(matrix), 8, m, n, ld) 1921 END SUBROUTINE 1922 1923 !> Transpose a matrix (in-place form, SP/rank-1). 1924 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_s1 1925 SUBROUTINE libxsmm_itrans_s1(matrix, m, n, ld) 1926 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1927 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n, ld 1928 REAL(C_FLOAT), INTENT(INOUT), TARGET :: matrix(*) 1929 CALL libxsmm_xitrans(C_LOC(matrix), 4, m, n, ld) 1930 END SUBROUTINE 1931 1932 !> Transpose a matrix (in-place form, SP/rank-2). 1933 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_itrans_s2 1934 SUBROUTINE libxsmm_itrans_s2(matrix, m, n, ld) 1935 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ld 1936 REAL(C_FLOAT), INTENT(INOUT), TARGET :: matrix(ld,*) 1937 CALL libxsmm_xitrans(C_LOC(matrix), 4, m, n, ld) 1938 END SUBROUTINE 1939 1940 !> Transpose a matrix (out-of-place form). 1941 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_p0 1942 PURE SUBROUTINE libxsmm_otrans_p0(output, input, typesize, & 1943 & m, n, ldi, ldo) 1944 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1945 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n 1946 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi 1947 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo 1948 TYPE(C_PTR), INTENT(IN) :: output, input 1949 INTEGER(C_INT), INTENT(IN) :: typesize 1950 CALL libxsmm_xotrans(output, input, typesize, m, n, ldi, ldo) 1951 END SUBROUTINE 1952 1953 !> Transpose a matrix (out-of-place form, DP/rank-1). 1954 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_d1 1955 SUBROUTINE libxsmm_otrans_d1(output, input, m, n, ldi, ldo) 1956 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1957 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n 1958 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi 1959 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo 1960 REAL(C_DOUBLE), INTENT(OUT), TARGET :: output(*) 1961 REAL(C_DOUBLE), INTENT(IN), TARGET :: input(*) 1962 CALL libxsmm_xotrans(C_LOC(output), C_LOC(input), & 1963 & 8, m, n, ldi, ldo) 1964 END SUBROUTINE 1965 1966 !> Transpose a matrix (out-of-place form, DP/rank-2). 1967 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_d2 1968 SUBROUTINE libxsmm_otrans_d2(output, input, m, n, ldi, ldo) 1969 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 1970 REAL(C_DOUBLE), INTENT(OUT), TARGET :: output(ldo,*) 1971 REAL(C_DOUBLE), INTENT(IN), TARGET :: input(ldi,*) 1972 CALL libxsmm_xotrans(C_LOC(output), C_LOC(input), & 1973 & 8, m, n, ldi, ldo) 1974 END SUBROUTINE 1975 1976 !> Transpose a matrix (out-of-place form, SP/rank-1). 1977 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_s1 1978 SUBROUTINE libxsmm_otrans_s1(output, input, m, n, ldi, ldo) 1979 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 1980 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: n 1981 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldi 1982 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), OPTIONAL :: ldo 1983 REAL(C_FLOAT), INTENT(OUT), TARGET :: output(*) 1984 REAL(C_FLOAT), INTENT(IN), TARGET :: input(*) 1985 CALL libxsmm_xotrans(C_LOC(output), C_LOC(input), & 1986 & 4, m, n, ldi, ldo) 1987 END SUBROUTINE 1988 1989 !> Transpose a matrix (out-of-place form, SP/rank-2). 1990 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_otrans_s2 1991 SUBROUTINE libxsmm_otrans_s2(output, input, m, n, ldi, ldo) 1992 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n, ldi, ldo 1993 REAL(C_FLOAT), INTENT(OUT), TARGET :: output(ldo,*) 1994 REAL(C_FLOAT), INTENT(IN), TARGET :: input(ldi,*) 1995 CALL libxsmm_xotrans(C_LOC(output), C_LOC(input), & 1996 & 4, m, n, ldi, ldo) 1997 END SUBROUTINE 1998 1999 !> Returns the difference between two timer ticks (cycles). 2000 !> Implicit FORTRAN 77 interface: subroutine available. 2001 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_timer_ncycles 2002 PURE FUNCTION libxsmm_timer_ncycles(tick0, tick1) 2003 INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN) :: tick0, tick1 2004 INTEGER(LIBXSMM_TICKINT_KIND) :: libxsmm_timer_ncycles 2005 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_timer_ncycles 2006 INTERFACE 2007 PURE SUBROUTINE internal_timer_ncycles(ncycles, & 2008 & tick0, tick1) BIND(C, NAME="libxsmm_timer_ncycles_") 2009 IMPORT LIBXSMM_TICKINT_KIND 2010 INTEGER(LIBXSMM_TICKINT_KIND), INTENT(IN) :: tick0, tick1 2011 INTEGER(LIBXSMM_TICKINT_KIND), INTENT(OUT) :: ncycles 2012 END SUBROUTINE 2013 END INTERFACE 2014 CALL internal_timer_ncycles( & 2015 & libxsmm_timer_ncycles, tick0, tick1) 2016 END FUNCTION 2017 2018 !> Utility function to calculate a collection of scalar differences 2019 !> between two matrices (libxsmm_matdiff_info). The location (m, n) 2020 !> of the largest difference (linf_abs) is recorded (also if NaN). 2021 !> In case of NaN, differences are set to infinity. If no difference 2022 !> is discovered, the location (m, n) is negative (OOB). 2023 !> Implicit FORTRAN 77 interface: 2024 !> TYPE :: info 2025 !> INTEGER(4) :: datatype 2026 !> INTEGER(4|8) :: m, n, ldref, ldtst 2027 !> ARRAY :: ref, tst 2028 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_matdiff 2029 PURE SUBROUTINE libxsmm_matdiff(info, datatype, m, n, & 2030 & ref, tst, ldref, ldtst) 2031 INTEGER(C_INT), INTENT(IN) :: datatype 2032 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m 2033 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN), & 2034 & OPTIONAL :: n, ldref, ldtst 2035 TYPE(C_PTR), INTENT(IN), OPTIONAL :: ref, tst 2036 TYPE(LIBXSMM_MATDIFF_INFO), INTENT(OUT) :: info 2037 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_matdiff 2038 INTERFACE 2039 PURE SUBROUTINE internal_matdiff(info, datatype, m, n, & 2040 & ref, tst, ldref, ldtst) BIND(C, NAME="libxsmm_matdiff_") 2041 IMPORT LIBXSMM_MATDIFF_INFO, LIBXSMM_BLASINT_KIND 2042 IMPORT C_PTR, C_INT 2043 INTEGER(C_INT), INTENT(IN) :: datatype 2044 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: m, n 2045 INTEGER(LIBXSMM_BLASINT_KIND), INTENT(IN) :: ldref, ldtst 2046 TYPE(C_PTR), INTENT(IN), VALUE :: ref, tst 2047 TYPE(LIBXSMM_MATDIFF_INFO), INTENT(OUT) :: info 2048 END SUBROUTINE 2049 END INTERFACE 2050 CALL internal_matdiff(info, datatype, m, n, & 2051 & ref, tst, ldref, ldtst) 2052 END SUBROUTINE 2053 2054 !> Calculate co-prime number <= n/2 (except: libxsmm_shuffle(0|1) == 0). 2055 !> Implicit FORTRAN 77 interface: 2056 !> INTEGER(4) :: coprime (OUT) 2057 !> INTEGER(4) :: n 2058 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_shuffle 2059 ELEMENTAL FUNCTION libxsmm_shuffle(n) 2060 INTEGER(C_LONG_LONG) :: libxsmm_shuffle 2061 INTEGER(C_INT), INTENT(IN) :: n 2062 !DIR$ ATTRIBUTES OFFLOAD:MIC :: internal_shuffle 2063 INTERFACE 2064 PURE SUBROUTINE internal_shuffle(coprime, n) & 2065 & BIND(C, NAME="libxsmm_shuffle_") 2066 IMPORT C_LONG_LONG, C_INT 2067 INTEGER(C_LONG_LONG), INTENT(OUT) :: coprime 2068 INTEGER(C_INT), INTENT(IN) :: n 2069 END SUBROUTINE 2070 END INTERFACE 2071 libxsmm_shuffle = INT(0, KIND=C_LONG_LONG) ! avoid warning (older CRAY) 2072 CALL internal_shuffle(libxsmm_shuffle, n) 2073 END FUNCTION 2074 2075 !> Calculates a hash value for the given array and seed. 2076 !> FORTRAN 77: see libxsmm_xhash 2077 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_char 2078 FUNCTION libxsmm_hash_char(key, seed) 2079 CHARACTER(C_CHAR), INTENT(IN) :: key(:) 2080 INTEGER(C_INT), INTENT(IN) :: seed 2081 INTEGER(C_INT) :: libxsmm_hash_char 2082 libxsmm_hash_char = seed 2083 CALL libxsmm_xhash(libxsmm_hash_char, & 2084 & libxsmm_ptr(key), SIZE(key)) 2085 END FUNCTION 2086 2087 !> Calculates a hash value for the given array and seed. 2088 !> FORTRAN 77: see libxsmm_xhash 2089 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_i8 2090 FUNCTION libxsmm_hash_i8(key, seed) 2091 INTEGER(C_INT8_T), INTENT(IN) :: key(:) 2092 INTEGER(C_INT), INTENT(IN) :: seed 2093 INTEGER(C_INT) :: libxsmm_hash_i8 2094 libxsmm_hash_i8 = seed 2095 CALL libxsmm_xhash(libxsmm_hash_i8, & 2096 & libxsmm_ptr(key), SIZE(key)) 2097 END FUNCTION 2098 2099 !> Calculates a hash value for the given array and seed. 2100 !> FORTRAN 77: see libxsmm_xhash 2101 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_i32 2102 FUNCTION libxsmm_hash_i32(key, seed) 2103 INTEGER(C_INT), INTENT(IN) :: key(:) 2104 INTEGER(C_INT), INTENT(IN) :: seed 2105 INTEGER(C_INT) :: libxsmm_hash_i32 2106 libxsmm_hash_i32 = seed 2107 CALL libxsmm_xhash(libxsmm_hash_i32, & 2108 & libxsmm_ptr(key), SIZE(key) * 4) 2109 END FUNCTION 2110 2111 !> Calculates a hash value for the given array and seed. 2112 !> FORTRAN 77: see libxsmm_xhash 2113 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_hash_i64 2114 FUNCTION libxsmm_hash_i64(key, seed) 2115 INTEGER(C_LONG_LONG), INTENT(IN) :: key(:) 2116 INTEGER(C_INT), INTENT(IN) :: seed 2117 INTEGER(C_INT) :: libxsmm_hash_i64 2118 libxsmm_hash_i64 = seed 2119 CALL libxsmm_xhash(libxsmm_hash_i64, & 2120 & libxsmm_ptr(key), SIZE(key) * 8) 2121 END FUNCTION 2122 2123 !> Calculates if there is a difference between two arrays. 2124 !> FORTRAN 77: see libxsmm_xdiff 2125 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_char 2126 FUNCTION libxsmm_diff_char(a, b) 2127 CHARACTER(C_CHAR), INTENT(IN) :: a(:), b(:) 2128 LOGICAL(C_BOOL) :: libxsmm_diff_char 2129 IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) & 2130 & THEN 2131 CALL libxsmm_xdiff(libxsmm_diff_char, & 2132 & libxsmm_ptr(a), libxsmm_ptr(b), & 2133 & SIZE(a, KIND=C_LONG_LONG)) 2134 ELSE 2135 libxsmm_diff_char = LOGICAL(.TRUE., KIND=C_BOOL) 2136 END IF 2137 END FUNCTION 2138 2139 !> Calculates if there is a difference between two arrays. 2140 !> FORTRAN 77: see libxsmm_xdiff 2141 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_i8 2142 FUNCTION libxsmm_diff_i8(a, b) 2143 INTEGER(C_INT8_T), INTENT(IN) :: a(:), b(:) 2144 LOGICAL(C_BOOL) :: libxsmm_diff_i8 2145 IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) & 2146 & THEN 2147 CALL libxsmm_xdiff(libxsmm_diff_i8, & 2148 & libxsmm_ptr(a), libxsmm_ptr(b), & 2149 & SIZE(a, KIND=C_LONG_LONG)) 2150 ELSE 2151 libxsmm_diff_i8 = LOGICAL(.TRUE., KIND=C_BOOL) 2152 END IF 2153 END FUNCTION 2154 2155 !> Calculates if there is a difference between two arrays. 2156 !> FORTRAN 77: see libxsmm_xdiff 2157 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_i32 2158 FUNCTION libxsmm_diff_i32(a, b) 2159 INTEGER(C_INT), INTENT(IN) :: a(:), b(:) 2160 LOGICAL(C_BOOL) :: libxsmm_diff_i32 2161 IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) & 2162 & THEN 2163 CALL libxsmm_xdiff(libxsmm_diff_i32, & 2164 & libxsmm_ptr(a), libxsmm_ptr(b), & 2165 & SIZE(a, KIND=C_LONG_LONG) * INT(4, KIND=C_LONG_LONG)) 2166 ELSE 2167 libxsmm_diff_i32 = LOGICAL(.TRUE., KIND=C_BOOL) 2168 END IF 2169 END FUNCTION 2170 2171 !> Calculates if there is a difference between two arrays. 2172 !> FORTRAN 77: see libxsmm_xdiff 2173 !DIR$ ATTRIBUTES OFFLOAD:MIC :: libxsmm_diff_i64 2174 FUNCTION libxsmm_diff_i64(a, b) 2175 INTEGER(C_LONG_LONG), INTENT(IN) :: a(:), b(:) 2176 LOGICAL(C_BOOL) :: libxsmm_diff_i64 2177 IF (SIZE(a, KIND=C_LONG_LONG) .EQ. SIZE(b, KIND=C_LONG_LONG)) & 2178 & THEN 2179 CALL libxsmm_xdiff(libxsmm_diff_i64, & 2180 & libxsmm_ptr(a), libxsmm_ptr(b), & 2181 & SIZE(a, KIND=C_LONG_LONG) * INT(8, KIND=C_LONG_LONG)) 2182 ELSE 2183 libxsmm_diff_i64 = LOGICAL(.TRUE., KIND=C_BOOL) 2184 END IF 2185 END FUNCTION 2186 END MODULE 2187 2188