1      SUBROUTINE DGAMM_OMP(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,
2     >                     BETA,C,LDC)
3*     .. Scalar Arguments ..
4      DOUBLE PRECISION ALPHA,BETA
5      INTEGER K,LDA,LDB,LDC,M,N
6      CHARACTER TRANSA,TRANSB
7*     ..
8*     .. Array Arguments ..
9      DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
10*     ..
11*
12*  Purpose
13*  =======
14*
15*  DGAMM  performs one of the matrix-matrix operations
16*
17*     C := alpha*op( A )*op( B ) + beta*C,
18*
19*  where  op( X ) is one of
20*
21*     op( X ) = X   or   op( X ) = X',
22*
23*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
24*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
25*
26*  Arguments
27*  ==========
28*
29*  TRANSA - CHARACTER*1.
30*           On entry, TRANSA specifies the form of op( A ) to be used in
31*           the matrix multiplication as follows:
32*
33*              TRANSA = 'N' or 'n',  op( A ) = A.
34*
35*              TRANSA = 'T' or 't',  op( A ) = A'.
36*
37*              TRANSA = 'C' or 'c',  op( A ) = A'.
38*
39*           Unchanged on exit.
40*
41*  TRANSB - CHARACTER*1.
42*           On entry, TRANSB specifies the form of op( B ) to be used in
43*           the matrix multiplication as follows:
44*
45*              TRANSB = 'N' or 'n',  op( B ) = B.
46*
47*              TRANSB = 'T' or 't',  op( B ) = B'.
48*
49*              TRANSB = 'C' or 'c',  op( B ) = B'.
50*
51*           Unchanged on exit.
52*
53*  M      - INTEGER.
54*           On entry,  M  specifies  the number  of rows  of the  matrix
55*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
56*           Unchanged on exit.
57*
58*  N      - INTEGER.
59*           On entry,  N  specifies the number  of columns of the matrix
60*           op( B ) and the number of columns of the matrix C. N must be
61*           at least zero.
62*           Unchanged on exit.
63*
64*  K      - INTEGER.
65*           On entry,  K  specifies  the number of columns of the matrix
66*           op( A ) and the number of rows of the matrix op( B ). K must
67*           be at least  zero.
68*           Unchanged on exit.
69*
70*  ALPHA  - DOUBLE PRECISION.
71*           On entry, ALPHA specifies the scalar alpha.
72*           Unchanged on exit.
73*
74*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
75*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
76*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
77*           part of the array  A  must contain the matrix  A,  otherwise
78*           the leading  k by m  part of the array  A  must contain  the
79*           matrix A.
80*           Unchanged on exit.
81*
82*  LDA    - INTEGER.
83*           On entry, LDA specifies the first dimension of A as declared
84*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
85*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
86*           least  max( 1, k ).
87*           Unchanged on exit.
88*
89*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
90*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
91*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
92*           part of the array  B  must contain the matrix  B,  otherwise
93*           the leading  n by k  part of the array  B  must contain  the
94*           matrix B.
95*           Unchanged on exit.
96*
97*  LDB    - INTEGER.
98*           On entry, LDB specifies the first dimension of B as declared
99*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
100*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
101*           least  max( 1, n ).
102*           Unchanged on exit.
103*
104*  BETA   - DOUBLE PRECISION.
105*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
106*           supplied as zero then C need not be set on input.
107*           Unchanged on exit.
108*
109*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
110*           Before entry, the leading  m by n  part of the array  C must
111*           contain the matrix  C,  except when  beta  is zero, in which
112*           case C need not be set on entry.
113*           On exit, the array  C  is overwritten by the  m by n  matrix
114*           ( alpha*op( A )*op( B ) + beta*C ).
115*
116*  LDC    - INTEGER.
117*           On entry, LDC specifies the first dimension of C as declared
118*           in  the  calling  (sub)  program.   LDC  must  be  at  least
119*           max( 1, m ).
120*           Unchanged on exit.
121*
122*
123*  Level 3 Blas routine.
124*
125*  -- Written on 8-February-1989.
126*     Jack Dongarra, Argonne National Laboratory.
127*     Iain Duff, AERE Harwell.
128*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
129*     Sven Hammarling, Numerical Algorithms Group Ltd.
130*
131*
132*     .. External Functions ..
133      LOGICAL LSAME
134      EXTERNAL LSAME
135      INTEGER  Parallel_threadid,Parallel_nthreads
136      EXTERNAL Parallel_threadid,Parallel_nthreads
137*     ..
138*     .. External Subroutines ..
139      EXTERNAL XERBLA
140*     ..
141*     .. Intrinsic Functions ..
142      INTRINSIC MAX
143*     ..
144*     .. Local Scalars ..
145      DOUBLE PRECISION TEMP
146      INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
147      LOGICAL NOTA,NOTB
148      integer cidj,tid,nthr
149*     ..
150*     .. Parameters ..
151      DOUBLE PRECISION ONE,ZERO
152      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
153*     ..
154
155      tid  = Parallel_threadid()
156      nthr = Parallel_nthreads()
157*
158*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
159*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
160*     and  columns of  A  and the  number of  rows  of  B  respectively.
161*
162      NOTA = LSAME(TRANSA,'N')
163      NOTB = LSAME(TRANSB,'N')
164      IF (NOTA) THEN
165          NROWA = M
166          NCOLA = K
167      ELSE
168          NROWA = K
169          NCOLA = M
170      END IF
171      IF (NOTB) THEN
172          NROWB = K
173      ELSE
174          NROWB = N
175      END IF
176*
177*     Test the input parameters.
178*
179      INFO = 0
180      IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
181     +    (.NOT.LSAME(TRANSA,'T'))) THEN
182          INFO = 1
183      ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
184     +         (.NOT.LSAME(TRANSB,'T'))) THEN
185          INFO = 2
186      ELSE IF (M.LT.0) THEN
187          INFO = 3
188      ELSE IF (N.LT.0) THEN
189          INFO = 4
190      ELSE IF (K.LT.0) THEN
191          INFO = 5
192      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
193          INFO = 8
194      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
195          INFO = 10
196      ELSE IF (LDC.LT.MAX(1,M)) THEN
197          INFO = 13
198      END IF
199      IF (INFO.NE.0) THEN
200          CALL XERBLA('DGAMM ',INFO)
201          RETURN
202      END IF
203*
204*     Quick return if possible.
205*
206      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
207     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
208
209
210*
211*     And if  alpha.eq.zero.
212
213*
214      IF (ALPHA.EQ.ZERO) THEN
215          IF (BETA.EQ.ZERO) THEN
216              cidj = tid
217              DO 20 J = 1,N
218                  DO 10 I = cidj+1,M
219                      C(I,J) = ZERO
220   10             CONTINUE
221                  cidj = mod(cidj+M,nthr)
222   20         CONTINUE
223          ELSE
224              cidj = tid
225              DO 40 J = 1,N
226                  DO 30 I = cidj+1,M,nthr
227                      C(I,J) = BETA*C(I,J)
228   30             CONTINUE
229                  cidj = mod(cidj+M,nthr)
230   40         CONTINUE
231          END IF
232!$OMP BARRIER
233          RETURN
234      END IF
235*
236*     Start the operations.
237*
238      IF (NOTB) THEN
239          IF (NOTA) THEN
240*
241*           Form  C := alpha*A*B + beta*C.
242*
243              cidj = tid
244              DO 90 J = 1,N
245                  IF (BETA.EQ.ZERO) THEN
246                      DO 50 I = cidj+1,M,nthr
247                          C(I,J) = ZERO
248   50                 CONTINUE
249                  ELSE IF (BETA.NE.ONE) THEN
250                      DO 60 I = cidj+1,M,nthr
251                          C(I,J)=BETA*C(I,J)
252   60                 CONTINUE
253                  END IF
254                  DO 80 L = 1,K
255                      IF (B(L,J).NE.ZERO) THEN
256                          TEMP = ALPHA*dabs(B(L,J))
257                          DO 70 I = cidj+1,M,nthr
258                              C(I,J) = C(I,J) + TEMP*dabs(A(I,L))
259   70                     CONTINUE
260                      END IF
261   80             CONTINUE
262                  cidj = mod(cidj+M,nthr)
263   90         CONTINUE
264          ELSE
265*
266*           Form  C := alpha*A'*B + beta*C
267*
268              cidj = tid
269              DO 120 J = 1,N
270                  DO 110 I = cidj+1,M,nthr
271                         TEMP = ZERO
272                         DO 100 L = 1,K
273                             TEMP = TEMP + dabs(A(L,I)*B(L,J))
274  100                    CONTINUE
275                         IF (BETA.EQ.ZERO) THEN
276                             C(I,J) = ALPHA*TEMP
277                         ELSE
278                             C(I,J) = ALPHA*TEMP + BETA*C(I,J)
279                         END IF
280  110             CONTINUE
281                  cidj = mod(cidj+M,nthr)
282  120         CONTINUE
283          END IF
284      ELSE
285          IF (NOTA) THEN
286*
287*           Form  C := alpha*A*B' + beta*C
288*
289              cidj = tid
290              DO 170 J = 1,N
291                  IF (BETA.EQ.ZERO) THEN
292                      DO 130 I = cidj+1,M,nthr
293                          C(I,J) = ZERO
294  130                 CONTINUE
295                  ELSE IF (BETA.NE.ONE) THEN
296                      DO 140 I = cidj+1,M,nthr
297                          C(I,J) = BETA*C(I,J)
298  140                 CONTINUE
299                  END IF
300                  DO 160 L = 1,K
301                      IF (B(J,L).NE.ZERO) THEN
302                          TEMP = ALPHA*dabs(B(J,L))
303                          DO 150 I = cidj+1,M,nthr
304                             C(I,J) = C(I,J) + TEMP*dabs(A(I,L))
305  150                     CONTINUE
306                      END IF
307  160             CONTINUE
308                  cidj = mod(cidj+M,nthr)
309  170         CONTINUE
310          ELSE
311*
312*           Form  C := alpha*A'*B' + beta*C
313*
314              cidj = tid
315              DO 200 J = 1,N
316                  DO 190 I = cidj+1,M,nthr
317                      TEMP = ZERO
318                      DO 180 L = 1,K
319                          TEMP = TEMP + dabs(A(L,I)*B(J,L))
320  180                 CONTINUE
321                      IF (BETA.EQ.ZERO) THEN
322                          C(I,J) = ALPHA*TEMP
323                      ELSE
324                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
325                      END IF
326  190             CONTINUE
327                  cidj = mod(cidj+M,nthr)
328  200         CONTINUE
329          END IF
330      END IF
331*
332!$OMP BARRIER
333      RETURN
334*
335*     End of DGAMM_OMP .
336*
337      END
338
339