1 /* 2 * -- High Performance Computing Linpack Benchmark (HPL) 3 * HPL - 2.3 - December 2, 2018 4 * Antoine P. Petitet 5 * University of Tennessee, Knoxville 6 * Innovative Computing Laboratory 7 * (C) Copyright 2000-2008 All Rights Reserved 8 * 9 * -- Copyright notice and Licensing terms: 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions, and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * 3. All advertising materials mentioning features or use of this 23 * software must display the following acknowledgement: 24 * This product includes software developed at the University of 25 * Tennessee, Knoxville, Innovative Computing Laboratory. 26 * 27 * 4. The name of the University, the name of the Laboratory, or the 28 * names of its contributors may not be used to endorse or promote 29 * products derived from this software without specific written 30 * permission. 31 * 32 * -- Disclaimer: 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY 38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * --------------------------------------------------------------------- 46 */ 47 /* 48 * Include files 49 */ 50 #include "hpl.h" 51 52 #ifndef HPL_dgemm 53 54 #ifdef HPL_CALL_VSIPL HPL_daxpy(const int N,const double ALPHA,const double * X,const int INCX,double * Y,const int INCY)55 56 #ifdef STDC_HEADERS 57 static void HPL_dgemmNN 58 ( 59 const int M, 60 const int N, 61 const int K, 62 const double ALPHA, 63 const double * A, 64 const int LDA, 65 const double * B, 66 const int LDB, 67 const double BETA, 68 double * C, 69 const int LDC 70 ) 71 #else 72 static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) 73 const int K, LDA, LDB, LDC, M, N; 74 const double ALPHA, BETA; 75 const double * A, * B; 76 double * C; 77 #endif 78 { 79 register double t0; 80 int i, iail, iblj, icij, j, jal, jbj, jcj, l; 81 82 for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) 83 { 84 HPL_dscal( M, BETA, C+jcj, 1 ); 85 for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) 86 { 87 t0 = ALPHA * B[iblj]; 88 for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) 89 { C[icij] += A[iail] * t0; } 90 } 91 } 92 } 93 94 #ifdef STDC_HEADERS 95 static void HPL_dgemmNT 96 ( 97 const int M, 98 const int N, 99 const int K, 100 const double ALPHA, 101 const double * A, 102 const int LDA, 103 const double * B, 104 const int LDB, 105 const double BETA, 106 double * C, 107 const int LDC 108 ) 109 #else 110 static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) 111 const int K, LDA, LDB, LDC, M, N; 112 const double ALPHA, BETA; 113 const double * A, * B; 114 double * C; 115 #endif 116 { 117 register double t0; 118 int i, iail, ibj, ibjl, icij, j, jal, jcj, l; 119 120 for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) 121 { 122 HPL_dscal( M, BETA, C+jcj, 1 ); 123 for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) 124 { 125 t0 = ALPHA * B[ibjl]; 126 for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) 127 { C[icij] += A[iail] * t0; } 128 } 129 } 130 } 131 132 #ifdef STDC_HEADERS 133 static void HPL_dgemmTN 134 ( 135 const int M, 136 const int N, 137 const int K, 138 const double ALPHA, 139 const double * A, 140 const int LDA, 141 const double * B, 142 const int LDB, 143 const double BETA, 144 double * C, 145 const int LDC 146 ) 147 #else 148 static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) 149 const int K, LDA, LDB, LDC, M, N; 150 const double ALPHA, BETA; 151 const double * A, * B; 152 double * C; 153 #endif 154 { 155 register double t0; 156 int i, iai, iail, iblj, icij, j, jbj, jcj, l; 157 158 for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) 159 { 160 for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) 161 { 162 t0 = HPL_rzero; 163 for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) 164 { t0 += A[iail] * B[iblj]; } 165 if( BETA == HPL_rzero ) C[icij] = HPL_rzero; 166 else C[icij] *= BETA; 167 C[icij] += ALPHA * t0; 168 } 169 } 170 } 171 172 #ifdef STDC_HEADERS 173 static void HPL_dgemmTT 174 ( 175 const int M, 176 const int N, 177 const int K, 178 const double ALPHA, 179 const double * A, 180 const int LDA, 181 const double * B, 182 const int LDB, 183 const double BETA, 184 double * C, 185 const int LDC 186 ) 187 #else 188 static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) 189 const int K, LDA, LDB, LDC, M, N; 190 const double ALPHA, BETA; 191 const double * A, * B; 192 double * C; 193 #endif 194 { 195 register double t0; 196 int i, iali, ibj, ibjl, icij, j, jai, jcj, l; 197 198 for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) 199 { 200 for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) 201 { 202 t0 = HPL_rzero; 203 for( l = 0, iali = jai, ibjl = ibj; 204 l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; 205 if( BETA == HPL_rzero ) C[icij] = HPL_rzero; 206 else C[icij] *= BETA; 207 C[icij] += ALPHA * t0; 208 } 209 } 210 } 211 212 #ifdef STDC_HEADERS 213 static void HPL_dgemm0 214 ( 215 const enum HPL_TRANS TRANSA, 216 const enum HPL_TRANS TRANSB, 217 const int M, 218 const int N, 219 const int K, 220 const double ALPHA, 221 const double * A, 222 const int LDA, 223 const double * B, 224 const int LDB, 225 const double BETA, 226 double * C, 227 const int LDC 228 ) 229 #else 230 static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, 231 BETA, C, LDC ) 232 const enum HPL_TRANS TRANSA, TRANSB; 233 const int K, LDA, LDB, LDC, M, N; 234 const double ALPHA, BETA; 235 const double * A, * B; 236 double * C; 237 #endif 238 { 239 int i, j; 240 241 if( ( M == 0 ) || ( N == 0 ) || 242 ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && 243 ( BETA == HPL_rone ) ) ) return; 244 245 if( ALPHA == HPL_rzero ) 246 { 247 for( j = 0; j < N; j++ ) 248 { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } 249 return; 250 } 251 252 if( TRANSB == HplNoTrans ) 253 { 254 if( TRANSA == HplNoTrans ) 255 { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } 256 else 257 { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } 258 } 259 else 260 { 261 if( TRANSA == HplNoTrans ) 262 { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } 263 else 264 { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } 265 } 266 } 267 268 #endif 269 270 #ifdef STDC_HEADERS 271 void HPL_dgemm 272 ( 273 const enum HPL_ORDER ORDER, 274 const enum HPL_TRANS TRANSA, 275 const enum HPL_TRANS TRANSB, 276 const int M, 277 const int N, 278 const int K, 279 const double ALPHA, 280 const double * A, 281 const int LDA, 282 const double * B, 283 const int LDB, 284 const double BETA, 285 double * C, 286 const int LDC 287 ) 288 #else 289 void HPL_dgemm 290 ( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) 291 const enum HPL_ORDER ORDER; 292 const enum HPL_TRANS TRANSA; 293 const enum HPL_TRANS TRANSB; 294 const int M; 295 const int N; 296 const int K; 297 const double ALPHA; 298 const double * A; 299 const int LDA; 300 const double * B; 301 const int LDB; 302 const double BETA; 303 double * C; 304 const int LDC; 305 #endif 306 { 307 /* 308 * Purpose 309 * ======= 310 * 311 * HPL_dgemm performs one of the matrix-matrix operations 312 * 313 * C := alpha * op( A ) * op( B ) + beta * C 314 * 315 * where op( X ) is one of 316 * 317 * op( X ) = X or op( X ) = X^T. 318 * 319 * Alpha and beta are scalars, and A, B and C are matrices, with op(A) 320 * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. 321 * 322 * Arguments 323 * ========= 324 * 325 * ORDER (local input) const enum HPL_ORDER 326 * On entry, ORDER specifies the storage format of the operands 327 * as follows: 328 * ORDER = HplRowMajor, 329 * ORDER = HplColumnMajor. 330 * 331 * TRANSA (local input) const enum HPL_TRANS 332 * On entry, TRANSA specifies the form of op(A) to be used in 333 * the matrix-matrix operation follows: 334 * TRANSA==HplNoTrans : op( A ) = A, 335 * TRANSA==HplTrans : op( A ) = A^T, 336 * TRANSA==HplConjTrans : op( A ) = A^T. 337 * 338 * TRANSB (local input) const enum HPL_TRANS 339 * On entry, TRANSB specifies the form of op(B) to be used in 340 * the matrix-matrix operation follows: 341 * TRANSB==HplNoTrans : op( B ) = B, 342 * TRANSB==HplTrans : op( B ) = B^T, 343 * TRANSB==HplConjTrans : op( B ) = B^T. 344 * 345 * M (local input) const int 346 * On entry, M specifies the number of rows of the matrix 347 * op(A) and of the matrix C. M must be at least zero. 348 * 349 * N (local input) const int 350 * On entry, N specifies the number of columns of the matrix 351 * op(B) and the number of columns of the matrix C. N must be 352 * at least zero. 353 * 354 * K (local input) const int 355 * On entry, K specifies the number of columns of the matrix 356 * op(A) and the number of rows of the matrix op(B). K must be 357 * be at least zero. 358 * 359 * ALPHA (local input) const double 360 * On entry, ALPHA specifies the scalar alpha. When ALPHA is 361 * supplied as zero then the elements of the matrices A and B 362 * need not be set on input. 363 * 364 * A (local input) const double * 365 * On entry, A is an array of dimension (LDA,ka), where ka is 366 * k when TRANSA==HplNoTrans, and is m otherwise. Before 367 * entry with TRANSA==HplNoTrans, the leading m by k part of 368 * the array A must contain the matrix A, otherwise the leading 369 * k by m part of the array A must contain the matrix A. 370 * 371 * LDA (local input) const int 372 * On entry, LDA specifies the first dimension of A as declared 373 * in the calling (sub) program. When TRANSA==HplNoTrans then 374 * LDA must be at least max(1,m), otherwise LDA must be at least 375 * max(1,k). 376 * 377 * B (local input) const double * 378 * On entry, B is an array of dimension (LDB,kb), where kb is 379 * n when TRANSB==HplNoTrans, and is k otherwise. Before 380 * entry with TRANSB==HplNoTrans, the leading k by n part of 381 * the array B must contain the matrix B, otherwise the leading 382 * n by k part of the array B must contain the matrix B. 383 * 384 * LDB (local input) const int 385 * On entry, LDB specifies the first dimension of B as declared 386 * in the calling (sub) program. When TRANSB==HplNoTrans then 387 * LDB must be at least max(1,k), otherwise LDB must be at least 388 * max(1,n). 389 * 390 * BETA (local input) const double 391 * On entry, BETA specifies the scalar beta. When BETA is 392 * supplied as zero then the elements of the matrix C need 393 * not be set on input. 394 * 395 * C (local input/output) double * 396 * On entry, C is an array of dimension (LDC,n). Before entry, 397 * the leading m by n part of the array C must contain the 398 * matrix C, except when beta is zero, in which case C need not 399 * be set on entry. On exit, the array C is overwritten by the 400 * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). 401 * 402 * LDC (local input) const int 403 * On entry, LDC specifies the first dimension of C as declared 404 * in the calling (sub) program. LDC must be at least 405 * max(1,m). 406 * 407 * --------------------------------------------------------------------- 408 */ 409 #ifdef HPL_CALL_CBLAS 410 cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, 411 BETA, C, LDC ); 412 #endif 413 #ifdef HPL_CALL_VSIPL 414 if( ORDER == HplColumnMajor ) 415 { 416 HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, 417 C, LDC ); 418 } 419 else 420 { 421 HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, 422 C, LDC ); 423 } 424 #endif 425 #ifdef HPL_CALL_FBLAS 426 double alpha = ALPHA, beta = BETA; 427 #ifdef StringSunStyle 428 #ifdef HPL_USE_F77_INTEGER_DEF 429 F77_INTEGER IONE = 1; 430 #else 431 int IONE = 1; 432 #endif 433 #endif 434 #ifdef StringStructVal 435 F77_CHAR ftransa; 436 F77_CHAR ftransb; 437 #endif 438 #ifdef StringStructPtr 439 F77_CHAR ftransa; 440 F77_CHAR ftransb; 441 #endif 442 #ifdef StringCrayStyle 443 F77_CHAR ftransa; 444 F77_CHAR ftransb; 445 #endif 446 #ifdef HPL_USE_F77_INTEGER_DEF 447 const F77_INTEGER F77M = M, F77N = N, F77K = K, 448 F77lda = LDA, F77ldb = LDB, F77ldc = LDC; 449 #else 450 #define F77M M 451 #define F77N N 452 #define F77K K 453 #define F77lda LDA 454 #define F77ldb LDB 455 #define F77ldc LDC 456 #endif 457 char ctransa, ctransb; 458 459 if( TRANSA == HplNoTrans ) ctransa = 'N'; 460 else if( TRANSA == HplTrans ) ctransa = 'T'; 461 else ctransa = 'C'; 462 463 if( TRANSB == HplNoTrans ) ctransb = 'N'; 464 else if( TRANSB == HplTrans ) ctransb = 'T'; 465 else ctransb = 'C'; 466 467 if( ORDER == HplColumnMajor ) 468 { 469 #ifdef StringSunStyle 470 F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, 471 B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); 472 #endif 473 #ifdef StringCrayStyle 474 ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); 475 F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, 476 B, &F77ldb, &beta, C, &F77ldc ); 477 #endif 478 #ifdef StringStructVal 479 ftransa.len = 1; ftransa.cp = &ctransa; 480 ftransb.len = 1; ftransb.cp = &ctransb; 481 F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, 482 B, &F77ldb, &beta, C, &F77ldc ); 483 #endif 484 #ifdef StringStructPtr 485 ftransa.len = 1; ftransa.cp = &ctransa; 486 ftransb.len = 1; ftransb.cp = &ctransb; 487 F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, 488 B, &F77ldb, &beta, C, &F77ldc ); 489 #endif 490 } 491 else 492 { 493 #ifdef StringSunStyle 494 F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, 495 A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); 496 #endif 497 #ifdef StringCrayStyle 498 ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); 499 F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, 500 A, &F77lda, &beta, C, &F77ldc ); 501 #endif 502 #ifdef StringStructVal 503 ftransa.len = 1; ftransa.cp = &ctransa; 504 ftransb.len = 1; ftransb.cp = &ctransb; 505 F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, 506 A, &F77lda, &beta, C, &F77ldc ); 507 #endif 508 #ifdef StringStructPtr 509 ftransa.len = 1; ftransa.cp = &ctransa; 510 ftransb.len = 1; ftransb.cp = &ctransb; 511 F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, 512 A, &F77lda, &beta, C, &F77ldc ); 513 #endif 514 } 515 #endif 516 /* 517 * End of HPL_dgemm 518 */ 519 } 520 521 #endif 522