1 SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) 2C .. Parameters .. 3C $Id: dgemm.F 24342 2013-06-22 05:32:15Z d3y133 $ 4 DOUBLE PRECISION ONE, ZERO 5 PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) 6 INTEGER MB, NB, KB 7#ifdef CACHE1M 8!x86_64 cache=1024K 9 PARAMETER (MB=256,NB=MB,KB=256) 10#elif CACHE6M 11!ia64 cache=6M 12 PARAMETER (MB=512,NB=MB,KB=512) 13#else 14!x86 cache=256K 15 PARAMETER (MB=64,NB=MB,KB=64) 16#endif 17C .. Scalar Arguments .. 18 DOUBLE PRECISION ALPHA, BETA 19 INTEGER K, LDA, LDB, LDC, M, N 20 CHARACTER TRANSA, TRANSB 21C .. Array Arguments .. 22C 23C Purpose 24C ======= 25C 26C DGEMM performs one of the matrix-matrix operations 27C 28C C := alpha*op( A )*op( B ) + beta*C, 29C 30C where op( X ) is one of 31C 32C op( X ) = X or op( X ) = X', 33C 34C alpha and beta are scalars, and A, B and C are matrices, with op( A ) 35C an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. 36C 37C Parameters 38C ========== 39C 40C TRANSA - CHARACTER*1. 41C On entry, TRANSA specifies the form of op( A ) to be used in 42C the matrix multiplication as follows: 43C 44C TRANSA = 'N' or 'n', op( A ) = A. 45C 46C TRANSA = 'T' or 't', op( A ) = A'. 47C 48C TRANSA = 'C' or 'c', op( A ) = A'. 49C 50C Unchanged on exit. 51C 52C TRANSB - CHARACTER*1. 53C On entry, TRANSB specifies the form of op( B ) to be used in 54C the matrix multiplication as follows: 55C 56C TRANSB = 'N' or 'n', op( B ) = B. 57C 58C TRANSB = 'T' or 't', op( B ) = B'. 59C 60C TRANSB = 'C' or 'c', op( B ) = B'. 61C 62C Unchanged on exit. 63C 64C M - INTEGER. 65C On entry, M specifies the number of rows of the matrix 66C op( A ) and of the matrix C. M must be at least zero. 67C Unchanged on exit. 68C 69C N - INTEGER. 70C On entry, N specifies the number of columns of the matrix 71C op( B ) and the number of columns of the matrix C. N must be 72C at least zero. 73C Unchanged on exit. 74C 75C K - INTEGER. 76C On entry, K specifies the number of columns of the matrix 77C op( A ) and the number of rows of the matrix op( B ). K must 78C be at least zero. 79C Unchanged on exit. 80C 81C ALPHA - DOUBLE PRECISION. 82C On entry, ALPHA specifies the scalar alpha. 83C Unchanged on exit. 84C 85C A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 86C k when TRANSA = 'N' or 'n', and is m otherwise. 87C Before entry with TRANSA = 'N' or 'n', the leading m by k 88C part of the array A must contain the matrix A, otherwise 89C the leading k by m part of the array A must contain the 90C matrix A. 91C Unchanged on exit. 92C 93C LDA - INTEGER. 94C On entry, LDA specifies the first dimension of A as declared 95C in the calling (sub) program. When TRANSA = 'N' or 'n' then 96C LDA must be at least max( 1, m ), otherwise LDA must be at 97C least max( 1, k ). 98C Unchanged on exit. 99C 100C B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 101C n when TRANSB = 'N' or 'n', and is k otherwise. 102C Before entry with TRANSB = 'N' or 'n', the leading k by n 103C part of the array B must contain the matrix B, otherwise 104C the leading n by k part of the array B must contain the 105C matrix B. 106C Unchanged on exit. 107C 108C LDB - INTEGER. 109C On entry, LDB specifies the first dimension of B as declared 110C in the calling (sub) program. When TRANSB = 'N' or 'n' then 111C LDB must be at least max( 1, k ), otherwise LDB must be at 112C least max( 1, n ). 113C Unchanged on exit. 114C 115C BETA - DOUBLE PRECISION. 116C On entry, BETA specifies the scalar beta. When BETA is 117C supplied as zero then C need not be set on input. 118C Unchanged on exit. 119C 120C C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 121C Before entry, the leading m by n part of the array C must 122C contain the matrix C, except when beta is zero, in which 123C case C need not be set on entry. 124C On exit, the array C is overwritten by the m by n matrix 125C ( alpha*op( A )*op( B ) + beta*C ). 126C 127C LDC - INTEGER. 128C On entry, LDC specifies the first dimension of C as declared 129C in the calling (sub) program. LDC must be at least 130C max( 1, m ). 131C Unchanged on exit. 132C 133C 134C Level 3 Blas routine. 135C 136C -- Written on 8-February-1989. 137C Jack Dongarra, Argonne National Laboratory. 138C Iain Duff, AERE Harwell. 139C Jeremy Du Croz, Numerical Algorithms Group Ltd. 140C Sven Hammarling, Numerical Algorithms Group Ltd. 141* 142* This code comes from a report entitled: 143* The IBM RISC System/6000 and Linear Algebra Operations, by 144* Jack J. Dongarra, Peter Mayes, and Giuseppe Radicati di Brozolo, 145* University of Tennessee Computer Science Tech Report: CS - 90 - 122. 146C 147C 148 DOUBLE PRECISION A(LDA,*), B(LDB,*), C(LDC,*) 149C .. External Functions .. 150 LOGICAL LSAME 151 EXTERNAL LSAME 152C .. External Subroutines .. 153 EXTERNAL XERBLA 154C .. Intrinsic Functions .. 155 INTRINSIC MAX, MIN 156C .. Local Scalars .. 157 DOUBLE PRECISION T11, T12, T21, T22 158 INTEGER I, IDEPTH, II, ILEN, INFO, ISPAN, J, JDEPTH, JJ, 159 * JLEN, JSPAN, L, LL, LSPAN, NCOLA, NROWA, NROWB 160 LOGICAL NOTA, NOTB 161C .. Local Arrays .. 162 DOUBLE PRECISION CH(KB,MB), CH1(KB), CH2(KB) 163C .. Executable Statements .. 164C 165C Set NOTA and NOTB as true if A and B respectively are not 166C transposed and set NROWA, NCOLA and NROWB as the number of rows 167C and columns of A and the number of rows of B respectively. 168C 169 NOTA = LSAME(TRANSA,'N') 170 NOTB = LSAME(TRANSB,'N') 171 IF (NOTA) THEN 172 NROWA = M 173 NCOLA = K 174 ELSE 175 NROWA = K 176 NCOLA = M 177 END IF 178 IF (NOTB) THEN 179 NROWB = K 180 ELSE 181 NROWB = N 182 END IF 183C 184C Test the input parameters. 185C 186 INFO = 0 187 IF (( .NOT. NOTA) .AND. ( .NOT. LSAME(TRANSA,'C')) 188 * .AND. ( .NOT. LSAME(TRANSA,'T'))) THEN 189 INFO = 1 190 ELSE IF (( .NOT. NOTB) .AND. ( .NOT. LSAME(TRANSB,'C')) 191 * .AND. ( .NOT. LSAME(TRANSB,'T'))) THEN 192 INFO = 2 193 ELSE IF (M.LT.0) THEN 194 INFO = 3 195 ELSE IF (N.LT.0) THEN 196 INFO = 4 197 ELSE IF (K.LT.0) THEN 198 INFO = 5 199 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN 200 INFO = 8 201 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN 202 INFO = 10 203 ELSE IF (LDC.LT.MAX(1,M)) THEN 204 INFO = 13 205 END IF 206 IF (INFO.NE.0) THEN 207 CALL XERBLA('DGEMM ',INFO) 208 RETURN 209 END IF 210C 211C Quick return if possible. 212C 213 IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (((ALPHA.EQ.ZERO) .OR. (K.EQ.0)) 214 * .AND. (BETA.EQ.ONE))) RETURN 215 IF (BETA.EQ.ZERO) THEN 216 DO 40 J = 1, N 217 DO 20 I = 1, M 218 C(I,J) = ZERO 219 20 CONTINUE 220 40 CONTINUE 221 ELSE 222 DO 80 J = 1, N 223 DO 60 I = 1, M 224 C(I,J) = BETA*C(I,J) 225 60 CONTINUE 226 80 CONTINUE 227 END IF 228C 229C And if alpha.eq.zero. 230C 231 IF (ALPHA.EQ.ZERO) RETURN 232C 233C Start the operations. 234C 235 IF (NOTB) THEN 236 IF (NOTA) THEN 237C 238C Form C := C + alpha*A*B. 239C 240 DO 380 L = 1, K, KB 241 LSPAN = MIN(KB,K-L+1) 242 DO 360 I = 1, M, MB 243 IDEPTH = 2 244 ISPAN = MIN(MB,M-I+1) 245 ILEN = IDEPTH*(ISPAN/IDEPTH) 246 DO 120 II = I, I + ISPAN - 1 247 DO 100 LL = L, L + LSPAN - 1 248 CH(LL-L+1,II-I+1) = ALPHA*A(II,LL) 249 100 CONTINUE 250 120 CONTINUE 251 DO 340 J = 1, N, NB 252 JDEPTH = 2 253 JSPAN = MIN(NB,N-J+1) 254 JLEN = JDEPTH*(JSPAN/JDEPTH) 255 DO 220 JJ = J, J + JLEN - 1, JDEPTH 256 DO 160 II = I, I + ILEN - 1, IDEPTH 257 T11 = ZERO 258 T21 = ZERO 259 T12 = ZERO 260 T22 = ZERO 261 DO 140 LL = L, L + LSPAN - 1 262 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 263 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 264 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL,JJ+1) 265 T22 = T22 + CH(LL-L+1,II-I+2)*B(LL,JJ+1) 266 140 CONTINUE 267 C(II,JJ) = C(II,JJ) + T11 268 C(II+1,JJ) = C(II+1,JJ) + T21 269 C(II,JJ+1) = C(II,JJ+1) + T12 270 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 271 160 CONTINUE 272 IF (ILEN.LT.ISPAN) THEN 273 DO 200 II = I + ILEN, I + ISPAN - 1 274 T11 = ZERO 275 T12 = ZERO 276 DO 180 LL = L, L + LSPAN - 1 277 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 278 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL, 279 * JJ+1) 280 180 CONTINUE 281 C(II,JJ) = C(II,JJ) + T11 282 C(II,JJ+1) = C(II,JJ+1) + T12 283 200 CONTINUE 284 END IF 285 220 CONTINUE 286 IF (JLEN.LT.JSPAN) THEN 287 DO 320 JJ = J + JLEN, J + JSPAN - 1 288 DO 260 II = I, I + ILEN - 1, IDEPTH 289 T11 = ZERO 290 T21 = ZERO 291 DO 240 LL = L, L + LSPAN - 1 292 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 293 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 294 240 CONTINUE 295 C(II,JJ) = C(II,JJ) + T11 296 C(II+1,JJ) = C(II+1,JJ) + T21 297 260 CONTINUE 298 IF (ILEN.LT.ISPAN) THEN 299 DO 300 II = I + ILEN, I + ISPAN - 1 300 T11 = ZERO 301 DO 280 LL = L, L + LSPAN - 1 302 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL, 303 * JJ) 304 280 CONTINUE 305 C(II,JJ) = C(II,JJ) + T11 306 300 CONTINUE 307 END IF 308 320 CONTINUE 309 END IF 310 340 CONTINUE 311 360 CONTINUE 312 380 CONTINUE 313 ELSE 314C 315C Form C := C + alpha*A'*B 316C 317 DO 680 I = 1, M, MB 318 IDEPTH = 2 319 ISPAN = MIN(MB,M-I+1) 320 ILEN = IDEPTH*(ISPAN/IDEPTH) 321 DO 660 L = 1, K, KB 322 LSPAN = MIN(KB,K-L+1) 323 DO 420 II = I, I + ISPAN - 1 324 DO 400 LL = L, L + LSPAN - 1 325 CH(LL-L+1,II-I+1) = ALPHA*A(LL,II) 326 400 CONTINUE 327 420 CONTINUE 328 DO 640 J = 1, N, NB 329 JDEPTH = 2 330 JSPAN = MIN(NB,N-J+1) 331 JLEN = JDEPTH*(JSPAN/JDEPTH) 332 DO 520 JJ = J, J + JLEN - 1, JDEPTH 333 DO 460 II = I, I + ILEN - 1, IDEPTH 334 T11 = ZERO 335 T21 = ZERO 336 T12 = ZERO 337 T22 = ZERO 338 DO 440 LL = L, L + LSPAN - 1 339 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 340 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 341 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL,JJ+1) 342 T22 = T22 + CH(LL-L+1,II-I+2)*B(LL,JJ+1) 343 440 CONTINUE 344 C(II,JJ) = C(II,JJ) + T11 345 C(II+1,JJ) = C(II+1,JJ) + T21 346 C(II,JJ+1) = C(II,JJ+1) + T12 347 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 348 460 CONTINUE 349 IF (ILEN.LT.ISPAN) THEN 350 DO 500 II = I + ILEN, I + ISPAN - 1 351 T11 = ZERO 352 T12 = ZERO 353 DO 480 LL = L, L + LSPAN - 1 354 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 355 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL, 356 * JJ+1) 357 480 CONTINUE 358 C(II,JJ) = C(II,JJ) + T11 359 C(II,JJ+1) = C(II,JJ+1) + T12 360 500 CONTINUE 361 END IF 362 520 CONTINUE 363 IF (JLEN.LT.JSPAN) THEN 364 DO 620 JJ = J + JLEN, J + JSPAN - 1 365 DO 560 II = I, I + ILEN - 1, IDEPTH 366 T11 = ZERO 367 T21 = ZERO 368 DO 540 LL = L, L + LSPAN - 1 369 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 370 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 371 540 CONTINUE 372 C(II,JJ) = C(II,JJ) + T11 373 C(II+1,JJ) = C(II+1,JJ) + T21 374 560 CONTINUE 375 IF (ILEN.LT.ISPAN) THEN 376 DO 600 II = I + ILEN, I + ISPAN - 1 377 T11 = ZERO 378 DO 580 LL = L, L + LSPAN - 1 379 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL, 380 * JJ) 381 580 CONTINUE 382 C(II,JJ) = C(II,JJ) + T11 383 600 CONTINUE 384 END IF 385 620 CONTINUE 386 END IF 387 640 CONTINUE 388 660 CONTINUE 389 680 CONTINUE 390 END IF 391 ELSE 392 IF (NOTA) THEN 393C 394C Form C := C + alpha*A*B' 395C 396 DO 1000 J = 1, N, NB 397 JDEPTH = 2 398 JSPAN = MIN(NB,N-J+1) 399 JLEN = JDEPTH*(JSPAN/JDEPTH) 400 DO 980 L = 1, K, KB 401 LSPAN = MIN(KB,K-L+1) 402 DO 720 JJ = J, J + JSPAN - 1 403 DO 700 LL = L, L + LSPAN - 1 404 CH(LL-L+1,JJ-J+1) = ALPHA*B(JJ,LL) 405 700 CONTINUE 406 720 CONTINUE 407 DO 960 I = 1, M, MB 408 IDEPTH = 2 409 ISPAN = MIN(MB,M-I+1) 410 ILEN = IDEPTH*(ISPAN/IDEPTH) 411 DO 840 II = I, I + ILEN - 1, IDEPTH 412 DO 740 LL = L, L + LSPAN - 1 413 CH1(LL-L+1) = A(II,LL) 414 CH2(LL-L+1) = A(II+1,LL) 415 740 CONTINUE 416 DO 780 JJ = J, J + JLEN - 1, JDEPTH 417 T11 = ZERO 418 T21 = ZERO 419 T12 = ZERO 420 T22 = ZERO 421 DO 760 LL = L, L + LSPAN - 1 422 T11 = T11 + CH1(LL-L+1)*CH(LL-L+1,JJ-J+1) 423 T21 = T21 + CH2(LL-L+1)*CH(LL-L+1,JJ-J+1) 424 T12 = T12 + CH1(LL-L+1)*CH(LL-L+1,JJ-J+2) 425 T22 = T22 + CH2(LL-L+1)*CH(LL-L+1,JJ-J+2) 426 760 CONTINUE 427 C(II,JJ) = C(II,JJ) + T11 428 C(II+1,JJ) = C(II+1,JJ) + T21 429 C(II,JJ+1) = C(II,JJ+1) + T12 430 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 431 780 CONTINUE 432 IF (JLEN.LT.JSPAN) THEN 433 DO 820 JJ = J + JLEN, J + JSPAN - 1 434 T11 = ZERO 435 T21 = ZERO 436 DO 800 LL = L, L + LSPAN - 1 437 T11 = T11 + A(II,LL)*CH(LL-L+1,JJ-J+1) 438 T21 = T21 + A(II+1,LL)*CH(LL-L+1, 439 * JJ-J+1) 440 800 CONTINUE 441 C(II,JJ) = C(II,JJ) + T11 442 C(II+1,JJ) = C(II+1,JJ) + T21 443 820 CONTINUE 444 END IF 445 840 CONTINUE 446 IF (ILEN.LT.ISPAN) THEN 447 DO 940 II = I + ILEN, I + ISPAN - 1 448 DO 880 JJ = J, J + JLEN - 1, JDEPTH 449 T11 = ZERO 450 T12 = ZERO 451 DO 860 LL = L, L + LSPAN - 1 452 T11 = T11 + A(II,LL)*CH(LL-L+1,JJ-J+1) 453 T12 = T12 + A(II,LL)*CH(LL-L+1,JJ-J+2) 454 860 CONTINUE 455 C(II,JJ) = C(II,JJ) + T11 456 C(II,JJ+1) = C(II,JJ+1) + T12 457 880 CONTINUE 458 IF (JLEN.LT.JSPAN) THEN 459 DO 920 JJ = J + JLEN, J + JSPAN - 1 460 T11 = ZERO 461 DO 900 LL = L, L + LSPAN - 1 462 T11 = T11 + A(II,LL)*CH(LL-L+1, 463 * JJ-J+1) 464 900 CONTINUE 465 C(II,JJ) = C(II,JJ) + T11 466 920 CONTINUE 467 END IF 468 940 CONTINUE 469 END IF 470 960 CONTINUE 471 980 CONTINUE 472 1000 CONTINUE 473 ELSE 474C 475C Form C := C + alpha*A'*B' 476C 477 DO 1300 J = 1, N, NB 478 JDEPTH = 2 479 JSPAN = MIN(NB,N-J+1) 480 JLEN = JDEPTH*(JSPAN/JDEPTH) 481 DO 1280 L = 1, K, KB 482 LSPAN = MIN(KB,K-L+1) 483 DO 1040 JJ = J, J + JSPAN - 1 484 DO 1020 LL = L, L + LSPAN - 1 485 CH(LL-L+1,JJ-J+1) = ALPHA*B(JJ,LL) 486 1020 CONTINUE 487 1040 CONTINUE 488 DO 1260 I = 1, M, MB 489 IDEPTH = 2 490 ISPAN = MIN(MB,M-I+1) 491 ILEN = IDEPTH*(ISPAN/IDEPTH) 492 DO 1140 II = I, I + ILEN - 1, IDEPTH 493 DO 1080 JJ = J, J + JLEN - 1, JDEPTH 494 T11 = ZERO 495 T21 = ZERO 496 T12 = ZERO 497 T22 = ZERO 498 DO 1060 LL = L, L + LSPAN - 1 499 T11 = T11 + A(LL,II)*CH(LL-L+1,JJ-J+1) 500 T21 = T21 + A(LL,II+1)*CH(LL-L+1,JJ-J+1) 501 T12 = T12 + A(LL,II)*CH(LL-L+1,JJ-J+2) 502 T22 = T22 + A(LL,II+1)*CH(LL-L+1,JJ-J+2) 503 1060 CONTINUE 504 C(II,JJ) = C(II,JJ) + T11 505 C(II+1,JJ) = C(II+1,JJ) + T21 506 C(II,JJ+1) = C(II,JJ+1) + T12 507 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 508 1080 CONTINUE 509 IF (JLEN.LT.JSPAN) THEN 510 DO 1120 JJ = J + JLEN, J + JSPAN - 1 511 T11 = ZERO 512 T21 = ZERO 513 DO 1100 LL = L, L + LSPAN - 1 514 T11 = T11 + A(LL,II)*CH(LL-L+1,JJ-J+1) 515 T21 = T21 + A(LL,II+1)*CH(LL-L+1, 516 * JJ-J+1) 517 1100 CONTINUE 518 C(II,JJ) = C(II,JJ) + T11 519 C(II+1,JJ) = C(II+1,JJ) + T21 520 1120 CONTINUE 521 END IF 522 1140 CONTINUE 523 IF (ILEN.LT.ISPAN) THEN 524 DO 1240 II = I + ILEN, I + ISPAN - 1 525 DO 1180 JJ = J, J + JLEN - 1, JDEPTH 526 T11 = ZERO 527 T12 = ZERO 528 DO 1160 LL = L, L + LSPAN - 1 529 T11 = T11 + A(LL,II)*CH(LL-L+1,JJ-J+1) 530 T12 = T12 + A(LL,II)*CH(LL-L+1,JJ-J+2) 531 1160 CONTINUE 532 C(II,JJ) = C(II,JJ) + T11 533 C(II,JJ+1) = C(II,JJ+1) + T12 534 1180 CONTINUE 535 IF (JLEN.LT.JSPAN) THEN 536 DO 1220 JJ = J + JLEN, J + JSPAN - 1 537 T11 = ZERO 538 DO 1200 LL = L, L + LSPAN - 1 539 T11 = T11 + A(LL,II)*CH(LL-L+1, 540 * JJ-J+1) 541 1200 CONTINUE 542 C(II,JJ) = C(II,JJ) + T11 543 1220 CONTINUE 544 END IF 545 1240 CONTINUE 546 END IF 547 1260 CONTINUE 548 1280 CONTINUE 549 1300 CONTINUE 550 END IF 551 END IF 552C 553 RETURN 554C 555C End of DGEMM . 556C 557 END 558C 559C Integer*8 interface implementation of dgemm 560C 561 SUBROUTINE DGEMM_i8(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA, 562 + C,LDC) 563C .. Parameters .. 564C $Id: dgemm.F 24342 2013-06-22 05:32:15Z d3y133 $ 565 DOUBLE PRECISION ONE, ZERO 566 PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) 567 INTEGER MB, NB, KB 568#ifdef CACHE1M 569!x86_64 cache=1024K 570 PARAMETER (MB=256,NB=MB,KB=256) 571#elif CACHE6M 572!ia64 cache=6M 573 PARAMETER (MB=512,NB=MB,KB=512) 574#else 575!x86 cache=256K 576 PARAMETER (MB=64,NB=MB,KB=64) 577#endif 578C .. Scalar Arguments .. 579 DOUBLE PRECISION ALPHA, BETA 580 INTEGER*8 K, LDA, LDB, LDC, M, N 581 CHARACTER TRANSA, TRANSB 582C .. Array Arguments .. 583C 584C Purpose 585C ======= 586C 587C DGEMM performs one of the matrix-matrix operations 588C 589C C := alpha*op( A )*op( B ) + beta*C, 590C 591C where op( X ) is one of 592C 593C op( X ) = X or op( X ) = X', 594C 595C alpha and beta are scalars, and A, B and C are matrices, with op( A ) 596C an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. 597C 598C Parameters 599C ========== 600C 601C TRANSA - CHARACTER*1. 602C On entry, TRANSA specifies the form of op( A ) to be used in 603C the matrix multiplication as follows: 604C 605C TRANSA = 'N' or 'n', op( A ) = A. 606C 607C TRANSA = 'T' or 't', op( A ) = A'. 608C 609C TRANSA = 'C' or 'c', op( A ) = A'. 610C 611C Unchanged on exit. 612C 613C TRANSB - CHARACTER*1. 614C On entry, TRANSB specifies the form of op( B ) to be used in 615C the matrix multiplication as follows: 616C 617C TRANSB = 'N' or 'n', op( B ) = B. 618C 619C TRANSB = 'T' or 't', op( B ) = B'. 620C 621C TRANSB = 'C' or 'c', op( B ) = B'. 622C 623C Unchanged on exit. 624C 625C M - INTEGER. 626C On entry, M specifies the number of rows of the matrix 627C op( A ) and of the matrix C. M must be at least zero. 628C Unchanged on exit. 629C 630C N - INTEGER. 631C On entry, N specifies the number of columns of the matrix 632C op( B ) and the number of columns of the matrix C. N must be 633C at least zero. 634C Unchanged on exit. 635C 636C K - INTEGER. 637C On entry, K specifies the number of columns of the matrix 638C op( A ) and the number of rows of the matrix op( B ). K must 639C be at least zero. 640C Unchanged on exit. 641C 642C ALPHA - DOUBLE PRECISION. 643C On entry, ALPHA specifies the scalar alpha. 644C Unchanged on exit. 645C 646C A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 647C k when TRANSA = 'N' or 'n', and is m otherwise. 648C Before entry with TRANSA = 'N' or 'n', the leading m by k 649C part of the array A must contain the matrix A, otherwise 650C the leading k by m part of the array A must contain the 651C matrix A. 652C Unchanged on exit. 653C 654C LDA - INTEGER. 655C On entry, LDA specifies the first dimension of A as declared 656C in the calling (sub) program. When TRANSA = 'N' or 'n' then 657C LDA must be at least max( 1, m ), otherwise LDA must be at 658C least max( 1, k ). 659C Unchanged on exit. 660C 661C B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 662C n when TRANSB = 'N' or 'n', and is k otherwise. 663C Before entry with TRANSB = 'N' or 'n', the leading k by n 664C part of the array B must contain the matrix B, otherwise 665C the leading n by k part of the array B must contain the 666C matrix B. 667C Unchanged on exit. 668C 669C LDB - INTEGER. 670C On entry, LDB specifies the first dimension of B as declared 671C in the calling (sub) program. When TRANSB = 'N' or 'n' then 672C LDB must be at least max( 1, k ), otherwise LDB must be at 673C least max( 1, n ). 674C Unchanged on exit. 675C 676C BETA - DOUBLE PRECISION. 677C On entry, BETA specifies the scalar beta. When BETA is 678C supplied as zero then C need not be set on input. 679C Unchanged on exit. 680C 681C C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 682C Before entry, the leading m by n part of the array C must 683C contain the matrix C, except when beta is zero, in which 684C case C need not be set on entry. 685C On exit, the array C is overwritten by the m by n matrix 686C ( alpha*op( A )*op( B ) + beta*C ). 687C 688C LDC - INTEGER. 689C On entry, LDC specifies the first dimension of C as declared 690C in the calling (sub) program. LDC must be at least 691C max( 1, m ). 692C Unchanged on exit. 693C 694C 695C Level 3 Blas routine. 696C 697C -- Written on 8-February-1989. 698C Jack Dongarra, Argonne National Laboratory. 699C Iain Duff, AERE Harwell. 700C Jeremy Du Croz, Numerical Algorithms Group Ltd. 701C Sven Hammarling, Numerical Algorithms Group Ltd. 702* 703* This code comes from a report entitled: 704* The IBM RISC System/6000 and Linear Algebra Operations, by 705* Jack J. Dongarra, Peter Mayes, and Giuseppe Radicati di Brozolo, 706* University of Tennessee Computer Science Tech Report: CS - 90 - 122. 707C 708C 709 DOUBLE PRECISION A(LDA,*), B(LDB,*), C(LDC,*) 710C .. External Functions .. 711 LOGICAL LSAME 712 EXTERNAL LSAME 713C .. External Subroutines .. 714 EXTERNAL XERBLA 715C .. Intrinsic Functions .. 716 INTRINSIC MAX, MIN 717C .. Local Scalars .. 718 DOUBLE PRECISION T11, T12, T21, T22 719 INTEGER I, IDEPTH, II, ILEN, INFO, ISPAN, J, JDEPTH, JJ, 720 * JLEN, JSPAN, L, LL, LSPAN, NCOLA, NROWA, NROWB 721 LOGICAL NOTA, NOTB 722C .. Local Arrays .. 723 DOUBLE PRECISION CH(KB,MB), CH1(KB), CH2(KB) 724C .. Executable Statements .. 725C 726C Set NOTA and NOTB as true if A and B respectively are not 727C transposed and set NROWA, NCOLA and NROWB as the number of rows 728C and columns of A and the number of rows of B respectively. 729C 730 NOTA = LSAME(TRANSA,'N') 731 NOTB = LSAME(TRANSB,'N') 732 IF (NOTA) THEN 733 NROWA = M 734 NCOLA = K 735 ELSE 736 NROWA = K 737 NCOLA = M 738 END IF 739 IF (NOTB) THEN 740 NROWB = K 741 ELSE 742 NROWB = N 743 END IF 744C 745C Test the input parameters. 746C 747 INFO = 0 748 IF (( .NOT. NOTA) .AND. ( .NOT. LSAME(TRANSA,'C')) 749 * .AND. ( .NOT. LSAME(TRANSA,'T'))) THEN 750 INFO = 1 751 ELSE IF (( .NOT. NOTB) .AND. ( .NOT. LSAME(TRANSB,'C')) 752 * .AND. ( .NOT. LSAME(TRANSB,'T'))) THEN 753 INFO = 2 754 ELSE IF (M.LT.0) THEN 755 INFO = 3 756 ELSE IF (N.LT.0) THEN 757 INFO = 4 758 ELSE IF (K.LT.0) THEN 759 INFO = 5 760 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN 761 INFO = 8 762 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN 763 INFO = 10 764 ELSE IF (LDC.LT.MAX(1,M)) THEN 765 INFO = 13 766 END IF 767 IF (INFO.NE.0) THEN 768 CALL XERBLA('DGEMM ',INFO) 769 RETURN 770 END IF 771C 772C Quick return if possible. 773C 774 IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (((ALPHA.EQ.ZERO) .OR. (K.EQ.0)) 775 * .AND. (BETA.EQ.ONE))) RETURN 776 IF (BETA.EQ.ZERO) THEN 777 DO 40 J = 1, N 778 DO 20 I = 1, M 779 C(I,J) = ZERO 780 20 CONTINUE 781 40 CONTINUE 782 ELSE 783 DO 80 J = 1, N 784 DO 60 I = 1, M 785 C(I,J) = BETA*C(I,J) 786 60 CONTINUE 787 80 CONTINUE 788 END IF 789C 790C And if alpha.eq.zero. 791C 792 IF (ALPHA.EQ.ZERO) RETURN 793C 794C Start the operations. 795C 796 IF (NOTB) THEN 797 IF (NOTA) THEN 798C 799C Form C := C + alpha*A*B. 800C 801 DO 380 L = 1, K, KB 802 LSPAN = MIN(KB,K-L+1) 803 DO 360 I = 1, M, MB 804 IDEPTH = 2 805 ISPAN = MIN(MB,M-I+1) 806 ILEN = IDEPTH*(ISPAN/IDEPTH) 807 DO 120 II = I, I + ISPAN - 1 808 DO 100 LL = L, L + LSPAN - 1 809 CH(LL-L+1,II-I+1) = ALPHA*A(II,LL) 810 100 CONTINUE 811 120 CONTINUE 812 DO 340 J = 1, N, NB 813 JDEPTH = 2 814 JSPAN = MIN(NB,N-J+1) 815 JLEN = JDEPTH*(JSPAN/JDEPTH) 816 DO 220 JJ = J, J + JLEN - 1, JDEPTH 817 DO 160 II = I, I + ILEN - 1, IDEPTH 818 T11 = ZERO 819 T21 = ZERO 820 T12 = ZERO 821 T22 = ZERO 822 DO 140 LL = L, L + LSPAN - 1 823 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 824 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 825 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL,JJ+1) 826 T22 = T22 + CH(LL-L+1,II-I+2)*B(LL,JJ+1) 827 140 CONTINUE 828 C(II,JJ) = C(II,JJ) + T11 829 C(II+1,JJ) = C(II+1,JJ) + T21 830 C(II,JJ+1) = C(II,JJ+1) + T12 831 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 832 160 CONTINUE 833 IF (ILEN.LT.ISPAN) THEN 834 DO 200 II = I + ILEN, I + ISPAN - 1 835 T11 = ZERO 836 T12 = ZERO 837 DO 180 LL = L, L + LSPAN - 1 838 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 839 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL, 840 * JJ+1) 841 180 CONTINUE 842 C(II,JJ) = C(II,JJ) + T11 843 C(II,JJ+1) = C(II,JJ+1) + T12 844 200 CONTINUE 845 END IF 846 220 CONTINUE 847 IF (JLEN.LT.JSPAN) THEN 848 DO 320 JJ = J + JLEN, J + JSPAN - 1 849 DO 260 II = I, I + ILEN - 1, IDEPTH 850 T11 = ZERO 851 T21 = ZERO 852 DO 240 LL = L, L + LSPAN - 1 853 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 854 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 855 240 CONTINUE 856 C(II,JJ) = C(II,JJ) + T11 857 C(II+1,JJ) = C(II+1,JJ) + T21 858 260 CONTINUE 859 IF (ILEN.LT.ISPAN) THEN 860 DO 300 II = I + ILEN, I + ISPAN - 1 861 T11 = ZERO 862 DO 280 LL = L, L + LSPAN - 1 863 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL, 864 * JJ) 865 280 CONTINUE 866 C(II,JJ) = C(II,JJ) + T11 867 300 CONTINUE 868 END IF 869 320 CONTINUE 870 END IF 871 340 CONTINUE 872 360 CONTINUE 873 380 CONTINUE 874 ELSE 875C 876C Form C := C + alpha*A'*B 877C 878 DO 680 I = 1, M, MB 879 IDEPTH = 2 880 ISPAN = MIN(MB,M-I+1) 881 ILEN = IDEPTH*(ISPAN/IDEPTH) 882 DO 660 L = 1, K, KB 883 LSPAN = MIN(KB,K-L+1) 884 DO 420 II = I, I + ISPAN - 1 885 DO 400 LL = L, L + LSPAN - 1 886 CH(LL-L+1,II-I+1) = ALPHA*A(LL,II) 887 400 CONTINUE 888 420 CONTINUE 889 DO 640 J = 1, N, NB 890 JDEPTH = 2 891 JSPAN = MIN(NB,N-J+1) 892 JLEN = JDEPTH*(JSPAN/JDEPTH) 893 DO 520 JJ = J, J + JLEN - 1, JDEPTH 894 DO 460 II = I, I + ILEN - 1, IDEPTH 895 T11 = ZERO 896 T21 = ZERO 897 T12 = ZERO 898 T22 = ZERO 899 DO 440 LL = L, L + LSPAN - 1 900 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 901 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 902 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL,JJ+1) 903 T22 = T22 + CH(LL-L+1,II-I+2)*B(LL,JJ+1) 904 440 CONTINUE 905 C(II,JJ) = C(II,JJ) + T11 906 C(II+1,JJ) = C(II+1,JJ) + T21 907 C(II,JJ+1) = C(II,JJ+1) + T12 908 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 909 460 CONTINUE 910 IF (ILEN.LT.ISPAN) THEN 911 DO 500 II = I + ILEN, I + ISPAN - 1 912 T11 = ZERO 913 T12 = ZERO 914 DO 480 LL = L, L + LSPAN - 1 915 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 916 T12 = T12 + CH(LL-L+1,II-I+1)*B(LL, 917 * JJ+1) 918 480 CONTINUE 919 C(II,JJ) = C(II,JJ) + T11 920 C(II,JJ+1) = C(II,JJ+1) + T12 921 500 CONTINUE 922 END IF 923 520 CONTINUE 924 IF (JLEN.LT.JSPAN) THEN 925 DO 620 JJ = J + JLEN, J + JSPAN - 1 926 DO 560 II = I, I + ILEN - 1, IDEPTH 927 T11 = ZERO 928 T21 = ZERO 929 DO 540 LL = L, L + LSPAN - 1 930 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL,JJ) 931 T21 = T21 + CH(LL-L+1,II-I+2)*B(LL,JJ) 932 540 CONTINUE 933 C(II,JJ) = C(II,JJ) + T11 934 C(II+1,JJ) = C(II+1,JJ) + T21 935 560 CONTINUE 936 IF (ILEN.LT.ISPAN) THEN 937 DO 600 II = I + ILEN, I + ISPAN - 1 938 T11 = ZERO 939 DO 580 LL = L, L + LSPAN - 1 940 T11 = T11 + CH(LL-L+1,II-I+1)*B(LL, 941 * JJ) 942 580 CONTINUE 943 C(II,JJ) = C(II,JJ) + T11 944 600 CONTINUE 945 END IF 946 620 CONTINUE 947 END IF 948 640 CONTINUE 949 660 CONTINUE 950 680 CONTINUE 951 END IF 952 ELSE 953 IF (NOTA) THEN 954C 955C Form C := C + alpha*A*B' 956C 957 DO 1000 J = 1, N, NB 958 JDEPTH = 2 959 JSPAN = MIN(NB,N-J+1) 960 JLEN = JDEPTH*(JSPAN/JDEPTH) 961 DO 980 L = 1, K, KB 962 LSPAN = MIN(KB,K-L+1) 963 DO 720 JJ = J, J + JSPAN - 1 964 DO 700 LL = L, L + LSPAN - 1 965 CH(LL-L+1,JJ-J+1) = ALPHA*B(JJ,LL) 966 700 CONTINUE 967 720 CONTINUE 968 DO 960 I = 1, M, MB 969 IDEPTH = 2 970 ISPAN = MIN(MB,M-I+1) 971 ILEN = IDEPTH*(ISPAN/IDEPTH) 972 DO 840 II = I, I + ILEN - 1, IDEPTH 973 DO 740 LL = L, L + LSPAN - 1 974 CH1(LL-L+1) = A(II,LL) 975 CH2(LL-L+1) = A(II+1,LL) 976 740 CONTINUE 977 DO 780 JJ = J, J + JLEN - 1, JDEPTH 978 T11 = ZERO 979 T21 = ZERO 980 T12 = ZERO 981 T22 = ZERO 982 DO 760 LL = L, L + LSPAN - 1 983 T11 = T11 + CH1(LL-L+1)*CH(LL-L+1,JJ-J+1) 984 T21 = T21 + CH2(LL-L+1)*CH(LL-L+1,JJ-J+1) 985 T12 = T12 + CH1(LL-L+1)*CH(LL-L+1,JJ-J+2) 986 T22 = T22 + CH2(LL-L+1)*CH(LL-L+1,JJ-J+2) 987 760 CONTINUE 988 C(II,JJ) = C(II,JJ) + T11 989 C(II+1,JJ) = C(II+1,JJ) + T21 990 C(II,JJ+1) = C(II,JJ+1) + T12 991 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 992 780 CONTINUE 993 IF (JLEN.LT.JSPAN) THEN 994 DO 820 JJ = J + JLEN, J + JSPAN - 1 995 T11 = ZERO 996 T21 = ZERO 997 DO 800 LL = L, L + LSPAN - 1 998 T11 = T11 + A(II,LL)*CH(LL-L+1,JJ-J+1) 999 T21 = T21 + A(II+1,LL)*CH(LL-L+1, 1000 * JJ-J+1) 1001 800 CONTINUE 1002 C(II,JJ) = C(II,JJ) + T11 1003 C(II+1,JJ) = C(II+1,JJ) + T21 1004 820 CONTINUE 1005 END IF 1006 840 CONTINUE 1007 IF (ILEN.LT.ISPAN) THEN 1008 DO 940 II = I + ILEN, I + ISPAN - 1 1009 DO 880 JJ = J, J + JLEN - 1, JDEPTH 1010 T11 = ZERO 1011 T12 = ZERO 1012 DO 860 LL = L, L + LSPAN - 1 1013 T11 = T11 + A(II,LL)*CH(LL-L+1,JJ-J+1) 1014 T12 = T12 + A(II,LL)*CH(LL-L+1,JJ-J+2) 1015 860 CONTINUE 1016 C(II,JJ) = C(II,JJ) + T11 1017 C(II,JJ+1) = C(II,JJ+1) + T12 1018 880 CONTINUE 1019 IF (JLEN.LT.JSPAN) THEN 1020 DO 920 JJ = J + JLEN, J + JSPAN - 1 1021 T11 = ZERO 1022 DO 900 LL = L, L + LSPAN - 1 1023 T11 = T11 + A(II,LL)*CH(LL-L+1, 1024 * JJ-J+1) 1025 900 CONTINUE 1026 C(II,JJ) = C(II,JJ) + T11 1027 920 CONTINUE 1028 END IF 1029 940 CONTINUE 1030 END IF 1031 960 CONTINUE 1032 980 CONTINUE 1033 1000 CONTINUE 1034 ELSE 1035C 1036C Form C := C + alpha*A'*B' 1037C 1038 DO 1300 J = 1, N, NB 1039 JDEPTH = 2 1040 JSPAN = MIN(NB,N-J+1) 1041 JLEN = JDEPTH*(JSPAN/JDEPTH) 1042 DO 1280 L = 1, K, KB 1043 LSPAN = MIN(KB,K-L+1) 1044 DO 1040 JJ = J, J + JSPAN - 1 1045 DO 1020 LL = L, L + LSPAN - 1 1046 CH(LL-L+1,JJ-J+1) = ALPHA*B(JJ,LL) 1047 1020 CONTINUE 1048 1040 CONTINUE 1049 DO 1260 I = 1, M, MB 1050 IDEPTH = 2 1051 ISPAN = MIN(MB,M-I+1) 1052 ILEN = IDEPTH*(ISPAN/IDEPTH) 1053 DO 1140 II = I, I + ILEN - 1, IDEPTH 1054 DO 1080 JJ = J, J + JLEN - 1, JDEPTH 1055 T11 = ZERO 1056 T21 = ZERO 1057 T12 = ZERO 1058 T22 = ZERO 1059 DO 1060 LL = L, L + LSPAN - 1 1060 T11 = T11 + A(LL,II)*CH(LL-L+1,JJ-J+1) 1061 T21 = T21 + A(LL,II+1)*CH(LL-L+1,JJ-J+1) 1062 T12 = T12 + A(LL,II)*CH(LL-L+1,JJ-J+2) 1063 T22 = T22 + A(LL,II+1)*CH(LL-L+1,JJ-J+2) 1064 1060 CONTINUE 1065 C(II,JJ) = C(II,JJ) + T11 1066 C(II+1,JJ) = C(II+1,JJ) + T21 1067 C(II,JJ+1) = C(II,JJ+1) + T12 1068 C(II+1,JJ+1) = C(II+1,JJ+1) + T22 1069 1080 CONTINUE 1070 IF (JLEN.LT.JSPAN) THEN 1071 DO 1120 JJ = J + JLEN, J + JSPAN - 1 1072 T11 = ZERO 1073 T21 = ZERO 1074 DO 1100 LL = L, L + LSPAN - 1 1075 T11 = T11 + A(LL,II)*CH(LL-L+1,JJ-J+1) 1076 T21 = T21 + A(LL,II+1)*CH(LL-L+1, 1077 * JJ-J+1) 1078 1100 CONTINUE 1079 C(II,JJ) = C(II,JJ) + T11 1080 C(II+1,JJ) = C(II+1,JJ) + T21 1081 1120 CONTINUE 1082 END IF 1083 1140 CONTINUE 1084 IF (ILEN.LT.ISPAN) THEN 1085 DO 1240 II = I + ILEN, I + ISPAN - 1 1086 DO 1180 JJ = J, J + JLEN - 1, JDEPTH 1087 T11 = ZERO 1088 T12 = ZERO 1089 DO 1160 LL = L, L + LSPAN - 1 1090 T11 = T11 + A(LL,II)*CH(LL-L+1,JJ-J+1) 1091 T12 = T12 + A(LL,II)*CH(LL-L+1,JJ-J+2) 1092 1160 CONTINUE 1093 C(II,JJ) = C(II,JJ) + T11 1094 C(II,JJ+1) = C(II,JJ+1) + T12 1095 1180 CONTINUE 1096 IF (JLEN.LT.JSPAN) THEN 1097 DO 1220 JJ = J + JLEN, J + JSPAN - 1 1098 T11 = ZERO 1099 DO 1200 LL = L, L + LSPAN - 1 1100 T11 = T11 + A(LL,II)*CH(LL-L+1, 1101 * JJ-J+1) 1102 1200 CONTINUE 1103 C(II,JJ) = C(II,JJ) + T11 1104 1220 CONTINUE 1105 END IF 1106 1240 CONTINUE 1107 END IF 1108 1260 CONTINUE 1109 1280 CONTINUE 1110 1300 CONTINUE 1111 END IF 1112 END IF 1113C 1114 RETURN 1115C 1116C End of DGEMM . 1117C 1118 END 1119C 1120