1 SUBROUTINE DGEMM_OMP(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB, 2 > BETA,C,LDC) 3* .. Scalar Arguments .. 4 DOUBLE PRECISION ALPHA,BETA 5 INTEGER K,LDA,LDB,LDC,M,N 6 CHARACTER TRANSA,TRANSB 7* .. 8* .. Array Arguments .. 9 DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) 10* .. 11* 12* Purpose 13* ======= 14* 15* DGEMM performs one of the matrix-matrix operations 16* 17* C := alpha*op( A )*op( B ) + beta*C, 18* 19* where op( X ) is one of 20* 21* op( X ) = X or op( X ) = X', 22* 23* alpha and beta are scalars, and A, B and C are matrices, with op( A ) 24* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. 25* 26* Arguments 27* ========== 28* 29* TRANSA - CHARACTER*1. 30* On entry, TRANSA specifies the form of op( A ) to be used in 31* the matrix multiplication as follows: 32* 33* TRANSA = 'N' or 'n', op( A ) = A. 34* 35* TRANSA = 'T' or 't', op( A ) = A'. 36* 37* TRANSA = 'C' or 'c', op( A ) = A'. 38* 39* Unchanged on exit. 40* 41* TRANSB - CHARACTER*1. 42* On entry, TRANSB specifies the form of op( B ) to be used in 43* the matrix multiplication as follows: 44* 45* TRANSB = 'N' or 'n', op( B ) = B. 46* 47* TRANSB = 'T' or 't', op( B ) = B'. 48* 49* TRANSB = 'C' or 'c', op( B ) = B'. 50* 51* Unchanged on exit. 52* 53* M - INTEGER. 54* On entry, M specifies the number of rows of the matrix 55* op( A ) and of the matrix C. M must be at least zero. 56* Unchanged on exit. 57* 58* N - INTEGER. 59* On entry, N specifies the number of columns of the matrix 60* op( B ) and the number of columns of the matrix C. N must be 61* at least zero. 62* Unchanged on exit. 63* 64* K - INTEGER. 65* On entry, K specifies the number of columns of the matrix 66* op( A ) and the number of rows of the matrix op( B ). K must 67* be at least zero. 68* Unchanged on exit. 69* 70* ALPHA - DOUBLE PRECISION. 71* On entry, ALPHA specifies the scalar alpha. 72* Unchanged on exit. 73* 74* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 75* k when TRANSA = 'N' or 'n', and is m otherwise. 76* Before entry with TRANSA = 'N' or 'n', the leading m by k 77* part of the array A must contain the matrix A, otherwise 78* the leading k by m part of the array A must contain the 79* matrix A. 80* Unchanged on exit. 81* 82* LDA - INTEGER. 83* On entry, LDA specifies the first dimension of A as declared 84* in the calling (sub) program. When TRANSA = 'N' or 'n' then 85* LDA must be at least max( 1, m ), otherwise LDA must be at 86* least max( 1, k ). 87* Unchanged on exit. 88* 89* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 90* n when TRANSB = 'N' or 'n', and is k otherwise. 91* Before entry with TRANSB = 'N' or 'n', the leading k by n 92* part of the array B must contain the matrix B, otherwise 93* the leading n by k part of the array B must contain the 94* matrix B. 95* Unchanged on exit. 96* 97* LDB - INTEGER. 98* On entry, LDB specifies the first dimension of B as declared 99* in the calling (sub) program. When TRANSB = 'N' or 'n' then 100* LDB must be at least max( 1, k ), otherwise LDB must be at 101* least max( 1, n ). 102* Unchanged on exit. 103* 104* BETA - DOUBLE PRECISION. 105* On entry, BETA specifies the scalar beta. When BETA is 106* supplied as zero then C need not be set on input. 107* Unchanged on exit. 108* 109* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 110* Before entry, the leading m by n part of the array C must 111* contain the matrix C, except when beta is zero, in which 112* case C need not be set on entry. 113* On exit, the array C is overwritten by the m by n matrix 114* ( alpha*op( A )*op( B ) + beta*C ). 115* 116* LDC - INTEGER. 117* On entry, LDC specifies the first dimension of C as declared 118* in the calling (sub) program. LDC must be at least 119* max( 1, m ). 120* Unchanged on exit. 121* 122* 123* Level 3 Blas routine. 124* 125* -- Written on 8-February-1989. 126* Jack Dongarra, Argonne National Laboratory. 127* Iain Duff, AERE Harwell. 128* Jeremy Du Croz, Numerical Algorithms Group Ltd. 129* Sven Hammarling, Numerical Algorithms Group Ltd. 130* 131* 132* .. External Functions .. 133 LOGICAL LSAME 134 EXTERNAL LSAME 135 INTEGER Parallel_threadid,Parallel_nthreads 136 EXTERNAL Parallel_threadid,Parallel_nthreads 137* .. 138* .. External Subroutines .. 139 EXTERNAL XERBLA 140* .. 141* .. Intrinsic Functions .. 142 INTRINSIC MAX 143* .. 144 INTEGER NB 145 PARAMETER (NB=32) 146 147* .. Local Scalars .. 148 DOUBLE PRECISION TEMP1(NB),TEMP 149 INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB 150 LOGICAL NOTA,NOTB 151 integer cidj,tid,nthr,Mtid,Itid,r 152* .. 153* .. Parameters .. 154 DOUBLE PRECISION ONE,ZERO 155 PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) 156* .. 157* 158 nthr = Parallel_nthreads() 159 if (nthr.le.1) then 160 call DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB, 161 > BETA,C,LDC) 162 return 163 end if 164 tid = Parallel_threadid() 165 166* Set NOTA and NOTB as true if A and B respectively are not 167* transposed and set NROWA, NCOLA and NROWB as the number of rows 168* and columns of A and the number of rows of B respectively. 169* 170 NOTA = LSAME(TRANSA,'N') 171 NOTB = LSAME(TRANSB,'N') 172 IF (NOTA) THEN 173 NROWA = M 174 NCOLA = K 175 ELSE 176 NROWA = K 177 NCOLA = M 178 END IF 179 IF (NOTB) THEN 180 NROWB = K 181 ELSE 182 NROWB = N 183 END IF 184* 185* Test the input parameters. 186* 187 INFO = 0 188 IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. 189 + (.NOT.LSAME(TRANSA,'T'))) THEN 190 INFO = 1 191 ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. 192 + (.NOT.LSAME(TRANSB,'T'))) THEN 193 INFO = 2 194 ELSE IF (M.LT.0) THEN 195 INFO = 3 196 ELSE IF (N.LT.0) THEN 197 INFO = 4 198 ELSE IF (K.LT.0) THEN 199 INFO = 5 200 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN 201 INFO = 8 202 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN 203 INFO = 10 204 ELSE IF (LDC.LT.MAX(1,M)) THEN 205 INFO = 13 206 END IF 207 IF (INFO.NE.0) THEN 208 CALL XERBLA('DGEMM_OMP',INFO) 209 RETURN 210 END IF 211* 212* Quick return if possible. 213* 214 IF ((M.EQ.0) .OR. (N.EQ.0) .OR. 215 + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN 216 217 218* 219* And if alpha.eq.zero. 220 221* 222 IF (ALPHA.EQ.ZERO) THEN 223 IF (BETA.EQ.ZERO) THEN 224 cidj = tid 225 DO 20 J = 1,N 226 DO 10 I = cidj+1,M 227 C(I,J) = ZERO 228 10 CONTINUE 229 cidj = mod(cidj+M,nthr) 230 20 CONTINUE 231 ELSE 232 cidj = tid 233 DO 40 J = 1,N 234 DO 30 I = cidj+1,M,nthr 235 C(I,J) = BETA*C(I,J) 236 30 CONTINUE 237 cidj = mod(cidj+M,nthr) 238 40 CONTINUE 239 END IF 240!$OMP BARRIER 241 RETURN 242 END IF 243* 244* Start the operations. 245* 246 IF (NOTB) THEN 247 IF (NOTA) THEN 248* 249* Form C := alpha*A*B + beta*C. 250* 251 r = mod(M,nthr) 252 if (tid.lt.r) then 253 Itid = tid*(M/nthr+1) + 1 254 Mtid = M/nthr + 1 255 else 256 Itid = r + tid*(M/nthr) + 1 257 Mtid = M/nthr 258 end if 259 call DGEMM(TRANSA,TRANSB,Mtid,N,K,ALPHA,A(Itid,1),LDA,B,LDB, 260 > BETA,C(Itid,1),LDC) 261 262 263 ELSE 264* 265* Form C := alpha*A'*B + beta*C 266* 267 cidj = tid 268 DO 120 J = 1,N 269 DO 110 I = cidj+1,M,nthr 270 TEMP = ZERO 271 DO 100 L = 1,K 272 TEMP = TEMP + A(L,I)*B(L,J) 273 100 CONTINUE 274 IF (BETA.EQ.ZERO) THEN 275 C(I,J) = ALPHA*TEMP 276 ELSE 277 C(I,J) = ALPHA*TEMP + BETA*C(I,J) 278 END IF 279 110 CONTINUE 280 cidj = mod(cidj+M,nthr) 281 120 CONTINUE 282 END IF 283 ELSE 284 IF (NOTA) THEN 285* 286* Form C := alpha*A*B' + beta*C 287 r = mod(M,nthr) 288 if (tid.lt.r) then 289 Itid = tid*(M/nthr+1) + 1 290 Mtid = M/nthr + 1 291 else 292 Itid = r + tid*(M/nthr) + 1 293 Mtid = M/nthr 294 end if 295 call DGEMM(TRANSA,TRANSB,Mtid,N,K,ALPHA,A(Itid,1),LDA,B,LDB, 296 > BETA,C(Itid,1),LDC) 297 298 ELSE 299* 300* Form C := alpha*A'*B' + beta*C 301* 302 cidj = tid 303 DO 200 J = 1,N 304 DO 190 I = cidj+1,M,nthr 305 TEMP = ZERO 306 DO 180 L = 1,K 307 TEMP = TEMP + A(L,I)*B(J,L) 308 180 CONTINUE 309 IF (BETA.EQ.ZERO) THEN 310 C(I,J) = ALPHA*TEMP 311 ELSE 312 C(I,J) = ALPHA*TEMP + BETA*C(I,J) 313 END IF 314 190 CONTINUE 315 cidj = mod(cidj+M,nthr) 316 200 CONTINUE 317 END IF 318 END IF 319* 320!$OMP BARRIER 321 RETURN 322* 323* End of DGEMM_OMP . 324* 325 END 326 327 328 329 330**************************************************** 331***** subroutine with no OMP barrier at the end **** 332**************************************************** 333 334 SUBROUTINE DGEMM_OMP0(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB, 335 > BETA,C,LDC) 336* .. Scalar Arguments .. 337 DOUBLE PRECISION ALPHA,BETA 338 INTEGER K,LDA,LDB,LDC,M,N 339 CHARACTER TRANSA,TRANSB 340* .. 341* .. Array Arguments .. 342 DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) 343* .. 344 345* 346* Purpose 347* ======= 348* 349* DGEMM performs one of the matrix-matrix operations 350* 351* C := alpha*op( A )*op( B ) + beta*C, 352* 353* where op( X ) is one of 354* 355* op( X ) = X or op( X ) = X', 356* 357* alpha and beta are scalars, and A, B and C are matrices, with op( A ) 358* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. 359* 360* Arguments 361* ========== 362* 363* TRANSA - CHARACTER*1. 364* On entry, TRANSA specifies the form of op( A ) to be used in 365* the matrix multiplication as follows: 366* 367* TRANSA = 'N' or 'n', op( A ) = A. 368* 369* TRANSA = 'T' or 't', op( A ) = A'. 370* 371* TRANSA = 'C' or 'c', op( A ) = A'. 372* 373* Unchanged on exit. 374* 375* TRANSB - CHARACTER*1. 376* On entry, TRANSB specifies the form of op( B ) to be used in 377* the matrix multiplication as follows: 378* 379* TRANSB = 'N' or 'n', op( B ) = B. 380* 381* TRANSB = 'T' or 't', op( B ) = B'. 382* 383* TRANSB = 'C' or 'c', op( B ) = B'. 384* 385* Unchanged on exit. 386* 387* M - INTEGER. 388* On entry, M specifies the number of rows of the matrix 389* op( A ) and of the matrix C. M must be at least zero. 390* Unchanged on exit. 391* 392* N - INTEGER. 393* On entry, N specifies the number of columns of the matrix 394* op( B ) and the number of columns of the matrix C. N must be 395* at least zero. 396* Unchanged on exit. 397* 398* K - INTEGER. 399* On entry, K specifies the number of columns of the matrix 400* op( A ) and the number of rows of the matrix op( B ). K must 401* be at least zero. 402* Unchanged on exit. 403* 404* ALPHA - DOUBLE PRECISION. 405* On entry, ALPHA specifies the scalar alpha. 406* Unchanged on exit. 407* 408* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 409* k when TRANSA = 'N' or 'n', and is m otherwise. 410* Before entry with TRANSA = 'N' or 'n', the leading m by k 411* part of the array A must contain the matrix A, otherwise 412* the leading k by m part of the array A must contain the 413* matrix A. 414* Unchanged on exit. 415* 416* LDA - INTEGER. 417* On entry, LDA specifies the first dimension of A as declared 418* in the calling (sub) program. When TRANSA = 'N' or 'n' then 419* LDA must be at least max( 1, m ), otherwise LDA must be at 420* least max( 1, k ). 421* Unchanged on exit. 422* 423* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 424* n when TRANSB = 'N' or 'n', and is k otherwise. 425* Before entry with TRANSB = 'N' or 'n', the leading k by n 426* part of the array B must contain the matrix B, otherwise 427* the leading n by k part of the array B must contain the 428* matrix B. 429* Unchanged on exit. 430* 431* LDB - INTEGER. 432* On entry, LDB specifies the first dimension of B as declared 433* in the calling (sub) program. When TRANSB = 'N' or 'n' then 434* LDB must be at least max( 1, k ), otherwise LDB must be at 435* least max( 1, n ). 436* Unchanged on exit. 437* 438* BETA - DOUBLE PRECISION. 439* On entry, BETA specifies the scalar beta. When BETA is 440* supplied as zero then C need not be set on input. 441* Unchanged on exit. 442* 443* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 444* Before entry, the leading m by n part of the array C must 445* contain the matrix C, except when beta is zero, in which 446* case C need not be set on entry. 447* On exit, the array C is overwritten by the m by n matrix 448* ( alpha*op( A )*op( B ) + beta*C ). 449* 450* LDC - INTEGER. 451* On entry, LDC specifies the first dimension of C as declared 452* in the calling (sub) program. LDC must be at least 453* max( 1, m ). 454* Unchanged on exit. 455* 456* 457* Level 3 Blas routine. 458* 459* -- Written on 8-February-1989. 460* Jack Dongarra, Argonne National Laboratory. 461* Iain Duff, AERE Harwell. 462* Jeremy Du Croz, Numerical Algorithms Group Ltd. 463* Sven Hammarling, Numerical Algorithms Group Ltd. 464* 465* 466* .. External Functions .. 467 LOGICAL LSAME 468 EXTERNAL LSAME 469 INTEGER Parallel_threadid,Parallel_nthreads 470 EXTERNAL Parallel_threadid,Parallel_nthreads 471* .. 472* .. External Subroutines .. 473 EXTERNAL XERBLA 474* .. 475* .. Intrinsic Functions .. 476 INTRINSIC MAX 477* .. 478 INTEGER NB 479 PARAMETER (NB=32) 480 481* .. Local Scalars .. 482 DOUBLE PRECISION TEMP1(NB),TEMP 483 INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB 484 LOGICAL NOTA,NOTB 485 integer cidj,tid,nthr,Mtid,Itid,r 486* .. 487* .. Parameters .. 488 DOUBLE PRECISION ONE,ZERO 489 PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) 490* .. 491* 492 nthr = Parallel_nthreads() 493 if (nthr.le.1) then 494 call DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB, 495 > BETA,C,LDC) 496 return 497 end if 498 tid = Parallel_threadid() 499 500* Set NOTA and NOTB as true if A and B respectively are not 501* transposed and set NROWA, NCOLA and NROWB as the number of rows 502* and columns of A and the number of rows of B respectively. 503* 504 NOTA = LSAME(TRANSA,'N') 505 NOTB = LSAME(TRANSB,'N') 506 IF (NOTA) THEN 507 NROWA = M 508 NCOLA = K 509 ELSE 510 NROWA = K 511 NCOLA = M 512 END IF 513 IF (NOTB) THEN 514 NROWB = K 515 ELSE 516 NROWB = N 517 END IF 518* 519* Test the input parameters. 520* 521 INFO = 0 522 IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. 523 + (.NOT.LSAME(TRANSA,'T'))) THEN 524 INFO = 1 525 ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. 526 + (.NOT.LSAME(TRANSB,'T'))) THEN 527 INFO = 2 528 ELSE IF (M.LT.0) THEN 529 INFO = 3 530 ELSE IF (N.LT.0) THEN 531 INFO = 4 532 ELSE IF (K.LT.0) THEN 533 INFO = 5 534 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN 535 INFO = 8 536 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN 537 INFO = 10 538 ELSE IF (LDC.LT.MAX(1,M)) THEN 539 INFO = 13 540 END IF 541 IF (INFO.NE.0) THEN 542 CALL XERBLA('DGEMM_OMP',INFO) 543 RETURN 544 END IF 545* 546* Quick return if possible. 547* 548 IF ((M.EQ.0) .OR. (N.EQ.0) .OR. 549 + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN 550 551 552* 553* And if alpha.eq.zero. 554 555* 556 IF (ALPHA.EQ.ZERO) THEN 557 IF (BETA.EQ.ZERO) THEN 558 cidj = tid 559 DO 20 J = 1,N 560 DO 10 I = cidj+1,M 561 C(I,J) = ZERO 562 10 CONTINUE 563 cidj = mod(cidj+M,nthr) 564 20 CONTINUE 565 ELSE 566 cidj = tid 567 DO 40 J = 1,N 568 DO 30 I = cidj+1,M,nthr 569 C(I,J) = BETA*C(I,J) 570 30 CONTINUE 571 cidj = mod(cidj+M,nthr) 572 40 CONTINUE 573 END IF 574 RETURN 575 END IF 576* 577* Start the operations. 578* 579 IF (NOTB) THEN 580 IF (NOTA) THEN 581* 582* Form C := alpha*A*B + beta*C. 583* 584 r = mod(M,nthr) 585 if (tid.lt.r) then 586 Itid = tid*(M/nthr+1) + 1 587 Mtid = M/nthr + 1 588 else 589 Itid = r + tid*(M/nthr) + 1 590 Mtid = M/nthr 591 end if 592 call DGEMM(TRANSA,TRANSB,Mtid,N,K,ALPHA,A(Itid,1),LDA,B,LDB, 593 > BETA,C(Itid,1),LDC) 594 595 596 ELSE 597* 598* Form C := alpha*A'*B + beta*C 599* 600 cidj = tid 601 DO 120 J = 1,N 602 DO 110 I = cidj+1,M,nthr 603 TEMP = ZERO 604 DO 100 L = 1,K 605 TEMP = TEMP + A(L,I)*B(L,J) 606 100 CONTINUE 607 IF (BETA.EQ.ZERO) THEN 608 C(I,J) = ALPHA*TEMP 609 ELSE 610 C(I,J) = ALPHA*TEMP + BETA*C(I,J) 611 END IF 612 110 CONTINUE 613 cidj = mod(cidj+M,nthr) 614 120 CONTINUE 615 END IF 616 ELSE 617 IF (NOTA) THEN 618* 619* Form C := alpha*A*B' + beta*C 620 r = mod(M,nthr) 621 if (tid.lt.r) then 622 Itid = tid*(M/nthr+1) + 1 623 Mtid = M/nthr + 1 624 else 625 Itid = r + tid*(M/nthr) + 1 626 Mtid = M/nthr 627 end if 628 call DGEMM(TRANSA,TRANSB,Mtid,N,K,ALPHA,A(Itid,1),LDA,B,LDB, 629 > BETA,C(Itid,1),LDC) 630 631 ELSE 632* 633* Form C := alpha*A'*B' + beta*C 634* 635 cidj = tid 636 DO 200 J = 1,N 637 DO 190 I = cidj+1,M,nthr 638 TEMP = ZERO 639 DO 180 L = 1,K 640 TEMP = TEMP + A(L,I)*B(J,L) 641 180 CONTINUE 642 IF (BETA.EQ.ZERO) THEN 643 C(I,J) = ALPHA*TEMP 644 ELSE 645 C(I,J) = ALPHA*TEMP + BETA*C(I,J) 646 END IF 647 190 CONTINUE 648 cidj = mod(cidj+M,nthr) 649 200 CONTINUE 650 END IF 651 END IF 652* 653 RETURN 654* 655* End of DGEMM_OMP0 656* 657 END 658 659