1C
2C  This file is part of MUMPS 5.1.2, released
3C  on Mon Oct  2 07:37:01 UTC 2017
4C
5C
6C  Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C  University of Bordeaux.
8C
9C  This version of MUMPS is provided to you free of charge. It is
10C  released under the CeCILL-C license:
11C  http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html
12C
13      MODULE DMUMPS_LR_CORE
14      USE MUMPS_LR_COMMON
15      USE DMUMPS_LR_TYPE
16      USE DMUMPS_LR_STATS
17!$    USE OMP_LIB
18      IMPLICIT NONE
19      CONTAINS
20      SUBROUTINE INIT_LRB(LRB_OUT,K,KSVD,M,N,ISLR)
21        TYPE(LRB_TYPE), INTENT(OUT) :: LRB_OUT
22        INTEGER,INTENT(IN) :: K,KSVD,M,N
23        LOGICAL,INTENT(IN) :: ISLR
24C This routine simply initializes a LR block but does NOT allocate it
25        LRB_OUT%M = M
26        LRB_OUT%N = N
27        LRB_OUT%K = K
28        LRB_OUT%KSVD = KSVD
29        LRB_OUT%ISLR = ISLR
30        NULLIFY(LRB_OUT%Q)
31        NULLIFY(LRB_OUT%R)
32        IF (ISLR) THEN
33          LRB_OUT%LRFORM = 1
34        ELSE
35          LRB_OUT%LRFORM = 0
36        ENDIF
37      END SUBROUTINE INIT_LRB
38      SUBROUTINE IS_FRONT_BLR_CANDIDATE(INODE, NFRONT, NASS, K486, K489,
39     &                    K490, K491, K492, N, LRGROUPS, LRSTATUS)
40        INTEGER,INTENT(IN) :: INODE, NFRONT, NASS, K486, K489, K490,
41     &                        K491, K492
42        INTEGER,INTENT(IN) :: N, LRGROUPS(N)
43        INTEGER,INTENT(OUT):: LRSTATUS
44C
45C     Local variables
46        LOGICAL :: COMPRESS_PANEL, COMPRESS_CB
47        COMPRESS_PANEL = .FALSE.
48        IF ((K486.GT.0).and.(
49     &        ((K492.LT.0).and.INODE.EQ.abs(K492))
50     &        .or.
51     &        ( (K492.GT.0).and.(K491.LE.NFRONT)
52     &        .and.(K490.LE.NASS)))) THEN
53          COMPRESS_PANEL = .TRUE.
54C         Compression for NASS =1 is useless
55          IF (NASS.EQ.1) COMPRESS_PANEL =.FALSE.
56          IF (LRGROUPS (INODE) .LT. 0) COMPRESS_PANEL = .FALSE.
57        ENDIF
58        COMPRESS_CB = .FALSE.
59        IF ((K492.GT.0).AND.(K489.EQ.1).AND.(NFRONT-NASS.GT.K491)) THEN
60          COMPRESS_CB = .TRUE.
61        ENDIF
62        IF (COMPRESS_PANEL.OR.COMPRESS_CB) THEN
63          IF (COMPRESS_CB.AND.(.NOT.COMPRESS_PANEL)) THEN
64            LRSTATUS = 1
65          ELSE IF (COMPRESS_PANEL.AND.(.NOT.COMPRESS_CB)) THEN
66            LRSTATUS = 2
67          ELSE
68            LRSTATUS = 3
69          ENDIF
70        ELSE
71         LRSTATUS = 0
72        ENDIF
73      END SUBROUTINE IS_FRONT_BLR_CANDIDATE
74      SUBROUTINE ALLOC_LRB(LRB_OUT,K,KSVD,M,N,ISLR,IFLAG,IERROR,KEEP8)
75        TYPE(LRB_TYPE), INTENT(OUT) :: LRB_OUT
76        INTEGER,INTENT(IN) :: K,KSVD,M,N
77        INTEGER,INTENT(OUT) :: IFLAG, IERROR
78        LOGICAL,INTENT(IN) :: ISLR
79        INTEGER(8) :: KEEP8(150)
80        INTEGER :: MEM, allocok
81        DOUBLE PRECISION :: ZERO
82        PARAMETER (ZERO = 0.0D0)
83        IF (ISLR) THEN
84          IF (K.EQ.0) THEN
85            nullify(LRB_OUT%Q)
86            nullify(LRB_OUT%R)
87          ELSE
88            allocate(LRB_OUT%Q(M,K),LRB_OUT%R(K,N),stat=allocok)
89            IF (allocok > 0) THEN
90              IFLAG  = -13
91              IERROR = K*(M+N)
92              write(*,*) 'Allocation problem in BLR routine ALLOC_LRB:',
93     &             ' not enough memory? memory requested = ' , IERROR
94              RETURN
95            ENDIF
96          ENDIF
97        ELSE
98          allocate(LRB_OUT%Q(M,N),stat=allocok)
99          IF (allocok > 0) THEN
100            IFLAG  = -13
101            IERROR = M*N
102            write(*,*) 'Allocation problem in BLR routine ALLOC_LRB:',
103     &           ' not enough memory? memory requested = ' , IERROR
104            RETURN
105          ENDIF
106          nullify(LRB_OUT%R)
107        ENDIF
108        LRB_OUT%M = M
109        LRB_OUT%N = N
110        LRB_OUT%K = K
111        LRB_OUT%KSVD = KSVD
112        LRB_OUT%ISLR = ISLR
113        IF (ISLR) THEN
114          LRB_OUT%LRFORM = 1
115        ELSE
116          LRB_OUT%LRFORM = 0
117        ENDIF
118        IF (ISLR) THEN
119          MEM = M*K + N*K
120        ELSE
121          MEM = M*N
122        ENDIF
123        KEEP8(70) = KEEP8(70) - int(MEM,8)
124        KEEP8(68) = min(KEEP8(70), KEEP8(68))
125        KEEP8(71) = KEEP8(71) - int(MEM,8)
126        KEEP8(69) = min(KEEP8(71), KEEP8(69))
127      END SUBROUTINE ALLOC_LRB
128      SUBROUTINE REGROUPING2(CUT, NPARTSASS, NASS,
129     &                   NPARTSCB, NCB, IBCKSZ, ONLYCB, K472)
130        INTEGER, INTENT(IN) :: IBCKSZ, NASS, NCB
131        INTEGER, INTENT(INOUT) :: NPARTSCB, NPARTSASS
132        INTEGER, POINTER, DIMENSION(:) :: CUT
133        INTEGER, POINTER, DIMENSION(:) :: NEW_CUT
134        INTEGER :: I, INEW, MINSIZE, NEW_NPARTSASS
135        LOGICAL :: ONLYCB, TRACE
136        INTEGER, INTENT(IN) :: K472
137        INTEGER :: IBCKSZ2
138        ALLOCATE(NEW_CUT(max(NPARTSASS,1)+NPARTSCB+1))
139        CALL COMPUTE_BLR_VCS(K472, IBCKSZ2, IBCKSZ, NASS)
140        MINSIZE = int(IBCKSZ2 / 2)
141        NEW_NPARTSASS = max(NPARTSASS,1)
142        IF (.NOT. ONLYCB) THEN
143           NEW_CUT(1) = 1
144           INEW = 2
145           I = 2
146           DO WHILE (I .LE. NPARTSASS + 1)
147              NEW_CUT(INEW) = CUT(I)
148              TRACE = .FALSE.
149              IF (NEW_CUT(INEW) - NEW_CUT(INEW-1) .GT. MINSIZE) THEN
150                 INEW = INEW + 1
151                 TRACE = .TRUE.
152              ENDIF
153              I = I + 1
154           END DO
155           IF (TRACE) THEN
156              INEW = INEW - 1
157           ELSE
158              IF (INEW .NE. 2) THEN
159                 NEW_CUT(INEW-1) = NEW_CUT(INEW)
160                 INEW = INEW - 1
161              ENDIF
162           ENDIF
163           NEW_NPARTSASS = INEW - 1
164        ENDIF
165        IF (ONLYCB) THEN
166           DO I=1,max(NPARTSASS,1)+1
167              NEW_CUT(I) = CUT(I)
168           ENDDO
169        ENDIF
170        IF (NCB .EQ. 0) GO TO 50
171        INEW = NEW_NPARTSASS+2
172        I = max(NPARTSASS,1) + 2
173        DO WHILE (I .LE. max(NPARTSASS,1) + NPARTSCB + 1)
174              NEW_CUT(INEW) = CUT(I)
175              TRACE = .FALSE.
176              IF (NEW_CUT(INEW) - NEW_CUT(INEW-1) .GT. MINSIZE) THEN
177                 INEW = INEW + 1
178                 TRACE = .TRUE.
179              ENDIF
180              I = I + 1
181        END DO
182        IF (TRACE) THEN
183           INEW = INEW - 1
184        ELSE
185           IF (INEW .NE.  NEW_NPARTSASS+2) THEN
186           NEW_CUT(INEW-1) = NEW_CUT(INEW)
187              INEW = INEW - 1
188           ENDIF
189        ENDIF
190        NPARTSCB = INEW - 1 - NEW_NPARTSASS
191 50     CONTINUE
192        NPARTSASS = NEW_NPARTSASS
193        DEALLOCATE(CUT)
194        ALLOCATE(CUT(NPARTSASS+NPARTSCB+1))
195        DO I=1,NPARTSASS+NPARTSCB+1
196           CUT(I) = NEW_CUT(I)
197        ENDDO
198        DEALLOCATE(NEW_CUT)
199      END SUBROUTINE REGROUPING2
200      SUBROUTINE DMUMPS_LRGEMM_SCALING(LRB, SCALED, A, LA, POSELTD,
201     &          LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, MAXI_CLUSTER)
202C This routine does the scaling (for the symmetric case) before
203C computing the LR product (done in DMUMPS_LRGEMM3)
204        TYPE(LRB_TYPE),INTENT(IN) :: LRB
205        INTEGER(8), intent(in)  :: LA
206        DOUBLE PRECISION, intent(inout)  :: A(LA)
207        DOUBLE PRECISION, intent(inout), DIMENSION(:,:)  :: SCALED
208        INTEGER,INTENT(IN) :: LD_DIAG, NFRONT, IW2(*)
209        INTEGER(8), INTENT(IN) :: POSELTD, POSELTT
210        INTEGER, INTENT(IN) :: MAXI_CLUSTER
211        DOUBLE PRECISION, intent(inout)  :: BLOCK(MAXI_CLUSTER)
212        INTEGER :: J, NROWS
213        DOUBLE PRECISION :: PIV1, PIV2, OFFDIAG
214        IF (LRB%LRFORM.EQ.1) THEN
215            NROWS = LRB%K
216        ELSE ! Full Rank Block
217            NROWS = LRB%M
218        ENDIF
219        J = 1
220        DO WHILE (J <= LRB%N)
221            IF (IW2(J) > 0) THEN
222                SCALED(1:NROWS,J) = A(POSELTD+LD_DIAG*(J-1)+J-1)
223     &           * SCALED(1:NROWS,J)
224                J = J+1
225            ELSE !2x2 pivot
226                PIV1    = A(POSELTD+LD_DIAG*(J-1)+J-1)
227                PIV2    = A(POSELTD+LD_DIAG*J+J)
228                OFFDIAG = A(POSELTD+LD_DIAG*(J-1)+J)
229                BLOCK(1:NROWS)    = SCALED(1:NROWS,J)
230                SCALED(1:NROWS,J) = PIV1 * SCALED(1:NROWS,J)
231     &            + OFFDIAG * SCALED(1:NROWS,J+1)
232                SCALED(1:NROWS,J+1) = OFFDIAG * BLOCK(1:NROWS)
233     &            + PIV2 * SCALED(1:NROWS,J+1)
234                 J=J+2
235            ENDIF
236        END DO
237      END SUBROUTINE DMUMPS_LRGEMM_SCALING
238      SUBROUTINE DMUMPS_LRGEMM3(TRANSB1, TRANSB2, ALPHA,
239     &           LRB1, LRB2, BETA, A, LA, POSELTT, NFRONT, SYM, NIV,
240     &           IFLAG, IERROR,
241     &           COMPRESS_MID_PRODUCT, TOLEPS, KPERCENT, RANK, BUILDQ,
242     &           POSELTD, LD_DIAG, IW2, BLOCK, MAXI_CLUSTER)
243        TYPE(LRB_TYPE),INTENT(IN) :: LRB1,LRB2
244        INTEGER(8), intent(in)  :: LA
245        DOUBLE PRECISION, intent(inout)  :: A(LA)
246        INTEGER,INTENT(IN) :: NFRONT, SYM, NIV
247        INTEGER,INTENT(OUT) :: IFLAG, IERROR
248        INTEGER(8), INTENT(IN) :: POSELTT
249        INTEGER(8), INTENT(IN), OPTIONAL :: POSELTD
250        INTEGER,INTENT(IN), OPTIONAL :: LD_DIAG, IW2(*)
251        INTEGER, INTENT(IN), OPTIONAL :: MAXI_CLUSTER
252        CHARACTER(len=1),INTENT(IN) :: TRANSB1, TRANSB2
253        INTEGER,intent(in) :: COMPRESS_MID_PRODUCT, KPERCENT
254        DOUBLE PRECISION, intent(in) :: TOLEPS
255        DOUBLE PRECISION :: ALPHA,BETA
256        DOUBLE PRECISION, intent(inout), OPTIONAL  :: BLOCK(:)
257        DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:,:) :: XY_YZ
258        DOUBLE PRECISION, ALLOCATABLE, TARGET, DIMENSION(:,:) :: XQ, R_Y
259        DOUBLE PRECISION, POINTER, DIMENSION(:,:) :: X, Y, Y1, Y2, Z
260        CHARACTER(len=1) :: SIDE, TRANSX, TRANSY, TRANSZ
261        INTEGER :: M_X, K_XY, K_YZ, N_Z, LDX, LDY, LDY1, LDY2, LDZ, K_Y
262        INTEGER :: I, J, RANK, MAXRANK, INFO, LWORK
263        LOGICAL :: BUILDQ
264        DOUBLE PRECISION,    ALLOCATABLE :: RWORK_RRQR(:)
265        DOUBLE PRECISION, ALLOCATABLE :: WORK_RRQR(:), TAU_RRQR(:),
266     &                          Y_RRQR(:,:)
267        INTEGER, ALLOCATABLE :: JPVT_RRQR(:)
268        INTEGER :: T1, T2, CR
269        INTEGER :: allocok, MREQ
270        DOUBLE PRECISION :: LOC_UPDT_TIME_OUT
271        DOUBLE PRECISION, EXTERNAL ::dnrm2
272        DOUBLE PRECISION :: ONE, MONE, ZERO
273        PARAMETER (ONE = 1.0D0, MONE=-1.0D0)
274        PARAMETER (ZERO=0.0D0)
275        IF (LRB2%M.EQ.0) THEN
276          write(*,*) "Internal error in DMUMPS_LRGEMM3, LRB2%M=0"
277          CALL MUMPS_ABORT()
278        ENDIF
279        IF ((SYM.NE.0).AND.((TRANSB1.NE.'N').OR.(TRANSB2.NE.'T'))) THEN
280            WRITE(*,*) "SYM > 0 and (", TRANSB1, ",", TRANSB2,
281     &                ") parameters found. Symmetric LRGEMM is only ",
282     &                 "compatible with (N,T) parameters"
283            CALL MUMPS_ABORT()
284        ENDIF
285        RANK = 0
286        BUILDQ = .FALSE.
287        IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==1)) THEN
288            IF ((LRB1%K.EQ.0).OR.(LRB2%K.EQ.0)) GOTO 700
289            allocate(Y(LRB1%K,LRB2%K),stat=allocok)
290            IF (allocok > 0) THEN
291              MREQ = LRB1%K*LRB2%K
292              GOTO 860
293            ENDIF
294            IF (TRANSB1 == 'N') THEN
295                X    => LRB1%Q
296                LDX  =  LRB1%M
297                M_X  =  LRB1%M
298                K_Y  =  LRB1%N
299                IF (SYM .EQ. 0) THEN
300                    Y1  => LRB1%R
301                ELSE
302                    allocate(Y1(LRB1%K,LRB1%N),stat=allocok)
303                    IF (allocok > 0) THEN
304                      MREQ = LRB1%K*LRB1%N
305                      GOTO 860
306                    ENDIF
307                    DO J=1,LRB1%N
308                        DO I=1,LRB1%K
309                            Y1(I,J) = LRB1%R(I,J)
310                        ENDDO
311                    ENDDO
312                    CALL DMUMPS_LRGEMM_SCALING(LRB1, Y1, A, LA, POSELTD,
313     &                     LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
314     &                     MAXI_CLUSTER)
315                ENDIF
316                LDY1 =  LRB1%K
317            ELSE  !TRANSB1 == 'T'
318                M_X  =  LRB1%N
319                X    => LRB1%R
320                LDX  =  LRB1%K
321                K_Y  =  LRB1%M
322                Y1   => LRB1%Q
323                LDY1 =  LRB1%M
324            ENDIF
325            IF (TRANSB2 == 'N') THEN
326                Z    => LRB2%R
327                LDZ  =  LRB2%K
328                N_Z  =  LRB2%N
329                Y2   => LRB2%Q
330                LDY2 =  LRB2%M
331            ELSE  !TRANSB2 == 'T'
332                N_Z  =  LRB2%M
333                Z    => LRB2%Q
334                LDZ  =  LRB2%M
335                Y2   => LRB2%R
336                LDY2 =  LRB2%K
337            ENDIF
338            TRANSZ = TRANSB2
339            CALL dgemm(TRANSB1 , TRANSB2 , LRB1%K , LRB2%K, K_Y, ONE,
340     &            Y1(1,1), LDY1, Y2(1,1), LDY2, ZERO, Y(1,1), LRB1%K )
341            BUILDQ = .FALSE.
342            IF (COMPRESS_MID_PRODUCT.GE.1) THEN
343                LWORK = MAX(LRB2%K**2, M_X**2)
344                allocate(Y_RRQR(LRB1%K,LRB2%K),
345     &               WORK_RRQR(LWORK), RWORK_RRQR(2*LRB2%K),
346     &               TAU_RRQR(MIN(LRB1%K,LRB2%K)),
347     &               JPVT_RRQR(LRB2%K),stat=allocok)
348                IF (allocok > 0) THEN
349                  MREQ = LRB1%K*LRB2%K + LWORK + 2*LRB2%K +
350     &                   MIN(LRB1%K,LRB2%K) + LRB2%K
351                  GOTO 860
352                ENDIF
353                DO J=1,LRB2%K
354                    DO I=1,LRB1%K
355                        Y_RRQR(I,J) = Y(I,J)
356                    ENDDO
357                ENDDO
358                MAXRANK = MIN(LRB1%K, LRB2%K)-1
359                MAXRANK = max (1, int((MAXRANK*KPERCENT/100)))
360                JPVT_RRQR = 0
361                CALL DMUMPS_TRUNCATED_RRQR(LRB1%K, LRB2%K, Y_RRQR(1,1),
362     &               LRB1%K, JPVT_RRQR(1), TAU_RRQR(1), WORK_RRQR(1),
363     &               LRB2%K, RWORK_RRQR(1), TOLEPS, RANK, MAXRANK, INFO)
364                IF ((RANK.GT.MAXRANK).OR.(RANK.EQ.0)) THEN
365                    deallocate(Y_RRQR, WORK_RRQR, RWORK_RRQR, TAU_RRQR,
366     &                     JPVT_RRQR)
367                    BUILDQ = .FALSE.
368                ELSE
369                    BUILDQ = .TRUE.
370                ENDIF
371                IF (BUILDQ) THEN ! Successfully compressed middle block
372                  allocate(XQ(M_X,RANK), R_Y(RANK,LRB2%K),stat=allocok)
373                  IF (allocok > 0) THEN
374                    MREQ = M_X*RANK + RANK*LRB2%K
375                    GOTO 860
376                  ENDIF
377                    DO J=1, LRB2%K
378                       R_Y(1:MIN(RANK,J),JPVT_RRQR(J)) =
379     &                   Y_RRQR(1:MIN(RANK,J),J)
380                       IF(J.LT.RANK) R_Y(MIN(RANK,J)+1:
381     &                   RANK,JPVT_RRQR(J))= ZERO
382                    END DO
383                    CALL dorgqr
384     &                  (LRB1%K, RANK, RANK, Y_RRQR(1,1),
385     &                  LRB1%K, TAU_RRQR(1),
386     &                  WORK_RRQR(1), LWORK, INFO )
387                    CALL dgemm(TRANSB1, 'N', M_X, RANK, LRB1%K, ONE,
388     &                    X(1,1), LDX, Y_RRQR(1,1), LRB1%K, ZERO,
389     &                    XQ(1,1), M_X)
390                    deallocate(Y_RRQR, WORK_RRQR, RWORK_RRQR, TAU_RRQR,
391     &                         JPVT_RRQR)
392                    nullify(X)
393                    X      => XQ
394                    LDX    =  M_X
395                    K_XY   =  RANK
396                    TRANSX =  'N'
397                    deallocate(Y)
398                    nullify(Y)
399                    Y      => R_Y
400                    LDY    =  RANK
401                    K_YZ   =  LRB2%K
402                    TRANSY =  'N'
403                    SIDE   = 'R'
404                ENDIF
405            ENDIF
406            IF (.NOT.BUILDQ) THEN
407                LDY    = LRB1%K
408                K_XY   = LRB1%K
409                K_YZ   = LRB2%K
410                TRANSX = TRANSB1
411                TRANSY = 'N'
412                IF (LRB1%K .GE. LRB2%K) THEN
413                    SIDE = 'L'
414                ELSE ! LRB1%K < LRB2%K
415                    SIDE = 'R'
416                ENDIF
417            ENDIF
418        ENDIF
419        IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN
420            IF (LRB1%K.EQ.0) GOTO 700
421            SIDE   =  'R'
422            K_XY   =  LRB1%K
423            TRANSX =  TRANSB1
424            TRANSY =  TRANSB1
425            Z      => LRB2%Q
426            LDZ    =  LRB2%M
427            TRANSZ =  TRANSB2
428            IF (TRANSB1 == 'N') THEN
429                X   => LRB1%Q
430                LDX =  LRB1%M
431                M_X =  LRB1%M
432                LDY =  LRB1%K
433                IF (SYM .EQ. 0) THEN
434                    Y   => LRB1%R
435                ELSE
436                    allocate(Y(LRB1%K,LRB1%N),stat=allocok)
437                    IF (allocok > 0) THEN
438                      MREQ = LRB1%K*LRB1%N
439                      GOTO 860
440                    ENDIF
441                    DO J=1,LRB1%N
442                        DO I=1,LRB1%K
443                            Y(I,J) = LRB1%R(I,J)
444                        ENDDO
445                    ENDDO
446                    CALL DMUMPS_LRGEMM_SCALING(LRB1, Y, A, LA, POSELTD,
447     &                     LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
448     &                     MAXI_CLUSTER)
449                ENDIF
450            ELSE ! TRANSB1 == 'T'
451                X   => LRB1%R
452                LDX =  LRB1%K
453                M_X =  LRB1%N
454                Y   => LRB1%Q
455                LDY =  LRB1%M
456            ENDIF
457            IF (TRANSB2 == 'N') THEN
458                K_YZ = LRB2%M
459                N_Z  = LRB2%N
460            ELSE ! TRANSB2 == 'T'
461                K_YZ = LRB2%N
462                N_Z  = LRB2%M
463            ENDIF
464        ENDIF
465        IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN
466            IF (LRB2%K.EQ.0) GOTO 700
467            SIDE   =  'L'
468            K_YZ   =  LRB2%K
469            X      => LRB1%Q
470            LDX    =  LRB1%M
471            TRANSX =  TRANSB1
472            TRANSY =  TRANSB2
473            TRANSZ =  TRANSB2
474            IF (TRANSB1 == 'N') THEN
475                M_X  = LRB1%M
476                K_XY = LRB1%N
477            ELSE ! TRANSB1 == 'T'
478                M_X  = LRB1%N
479                K_XY = LRB1%M
480            ENDIF
481            IF (TRANSB2 == 'N') THEN
482                Y   => LRB2%Q
483                LDY =  LRB2%M
484                Z   => LRB2%R
485                LDZ =  LRB2%K
486                N_Z =  LRB2%N
487            ELSE ! TRANSB2 == 'T'
488                IF (SYM .EQ. 0) THEN
489                    Y  => LRB2%R
490                ELSE ! Symmetric case: column scaling of R2 is done
491                    allocate(Y(LRB2%K,LRB2%N),stat=allocok)
492                    IF (allocok > 0) THEN
493                      MREQ = LRB2%K*LRB2%N
494                      GOTO 860
495                    ENDIF
496                    DO J=1,LRB2%N
497                        DO I=1,LRB2%K
498                            Y(I,J) = LRB2%R(I,J)
499                        ENDDO
500                    ENDDO
501                    CALL DMUMPS_LRGEMM_SCALING(LRB2, Y, A, LA, POSELTD,
502     &                     LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
503     &                     MAXI_CLUSTER)
504                ENDIF
505                LDY =  LRB2%K
506                Z   => LRB2%Q
507                LDZ =  LRB2%M
508                N_Z =  LRB2%M
509            ENDIF
510        ENDIF
511        IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN
512            IF (SYM .EQ. 0) THEN
513                X => LRB1%Q
514            ELSE
515                allocate(X(LRB1%M,LRB1%N),stat=allocok)
516                IF (allocok > 0) THEN
517                  MREQ = LRB1%M*LRB1%N
518                  GOTO 860
519                ENDIF
520                DO J=1,LRB1%N
521                    DO I=1,LRB1%M
522                        X(I,J) = LRB1%Q(I,J)
523                    ENDDO
524                ENDDO
525                CALL DMUMPS_LRGEMM_SCALING(LRB1, X, A, LA, POSELTD,
526     &                   LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
527     &                   MAXI_CLUSTER)
528            ENDIF
529            SIDE   =  'N'
530            LDX    =  LRB1%M
531            TRANSX =  TRANSB1
532            Z      => LRB2%Q
533            LDZ    =  LRB2%M
534            TRANSZ =  TRANSB2
535            IF (TRANSB1 == 'N') THEN
536                M_X  = LRB1%M
537                K_XY = LRB1%N
538            ELSE ! TRANSB1 == 'T'
539                M_X  = LRB1%N
540                K_XY = LRB1%M
541            ENDIF
542            IF (TRANSB2 == 'N') THEN
543                N_Z =  LRB2%N
544            ELSE ! TRANSB2 == 'T'
545                N_Z =  LRB2%M
546            ENDIF
547        ENDIF
548        IF (SIDE == 'L') THEN ! LEFT: XY_YZ = X*Y; A = XY_YZ*Z
549            allocate(XY_YZ(M_X,K_YZ),stat=allocok)
550            IF (allocok > 0) THEN
551              MREQ = M_X*K_YZ
552              GOTO 860
553            ENDIF
554            CALL dgemm(TRANSX , TRANSY , M_X , K_YZ, K_XY, ONE,
555     &             X(1,1), LDX, Y(1,1), LDY, ZERO, XY_YZ(1,1), M_X)
556            CALL SYSTEM_CLOCK(T1)
557            CALL dgemm('N', TRANSZ, M_X, N_Z, K_YZ, ALPHA,
558     &             XY_YZ(1,1), M_X, Z(1,1), LDZ, BETA, A(POSELTT),
559     &             NFRONT)
560            CALL SYSTEM_CLOCK(T2,CR)
561            LOC_UPDT_TIME_OUT = dble(T2-T1)/dble(CR)
562            CALL UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT)
563            deallocate(XY_YZ)
564        ELSEIF (SIDE == 'R') THEN ! RIGHT: XY_YZ = Y*Z; A = X*XY_YZ
565            allocate(XY_YZ(K_XY,N_Z),stat=allocok)
566            IF (allocok > 0) THEN
567              MREQ = K_XY*N_Z
568              GOTO 860
569            ENDIF
570            CALL dgemm(TRANSY , TRANSZ , K_XY , N_Z, K_YZ, ONE,
571     &             Y(1,1), LDY, Z(1,1), LDZ, ZERO, XY_YZ(1,1), K_XY)
572            CALL SYSTEM_CLOCK(T1)
573            CALL dgemm(TRANSX, 'N', M_X, N_Z, K_XY, ALPHA,
574     &             X(1,1), LDX, XY_YZ(1,1), K_XY, BETA, A(POSELTT),
575     &             NFRONT)
576            CALL SYSTEM_CLOCK(T2,CR)
577            LOC_UPDT_TIME_OUT = dble(T2-T1)/dble(CR)
578            CALL UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT)
579            deallocate(XY_YZ)
580        ELSE ! SIDE == 'N' : NONE; A = X*Z
581            CALL dgemm(TRANSX, TRANSZ, M_X, N_Z, K_XY, ALPHA,
582     &             X(1,1), LDX, Z(1,1), LDZ, BETA, A(POSELTT),
583     &             NFRONT)
584        ENDIF
585        GOTO 870
586  860 CONTINUE
587C       Alloc NOT ok!!
588        write(*,*) 'Allocation problem in BLR routine DMUMPS_LRGEMM3: ',
589     &    'not enough memory? memory requested = ' , MREQ
590        IFLAG  = - 13
591        IERROR = MREQ
592        RETURN
593  870 CONTINUE
594C       Alloc ok!!
595        IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN
596            IF (SYM .NE. 0) deallocate(X)
597        ELSEIF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN
598            IF (SYM .NE. 0) deallocate(Y)
599        ELSEIF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN
600            IF (SYM .NE. 0) deallocate(Y)
601        ELSE ! 1 AND 1
602            IF ((TRANSB1=='N').AND.(SYM .NE. 0)) deallocate(Y1)
603            IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN
604                deallocate(XQ)
605                deallocate(R_Y)
606            ELSE
607                deallocate(Y)
608            ENDIF
609        ENDIF
610 700    CONTINUE
611      END SUBROUTINE DMUMPS_LRGEMM3
612      SUBROUTINE MAX_CLUSTER(CUT,CUT_SIZE,MAXI_CLUSTER)
613        INTEGER, INTENT(IN) :: CUT_SIZE
614        INTEGER, intent(out) :: MAXI_CLUSTER
615        INTEGER, POINTER, DIMENSION(:) :: CUT
616        INTEGER :: I
617        MAXI_CLUSTER = 0
618        DO I = 1, CUT_SIZE
619          IF (CUT(I+1) - CUT(I) .GE. MAXI_CLUSTER) THEN
620            MAXI_CLUSTER = CUT(I+1) - CUT(I)
621          END IF
622        END DO
623      END SUBROUTINE MAX_CLUSTER
624      END MODULE DMUMPS_LR_CORE
625      SUBROUTINE DMUMPS_TRUNCATED_RRQR( M, N, A, LDA, JPVT, TAU, WORK,
626     &     LDW, RWORK, TOLEPS, RANK, MAXRANK, INFO)
627C     This routine computes a Rank-Revealing QR factorization of a dense
628C     matrix A. The factorization is truncated when the absolute value of
629C     a diagonal coefficient of the R factor becomes smaller than a
630C     prescribed threshold TOLEPS. The resulting partial Q and R factors
631C     provide a rank-k approximation of the input matrix A with accuracy
632C     TOLEPS.
633C
634C     This routine is obtained by merging the LAPACK
635C     (http://www.netlib.org/lapack/) CGEQP3 and CLAQPS routines and by
636C     applying a minor modification to the outer factorization loop in
637C     order to stop computations as soon as possible when the required
638C     accuracy is reached.
639C
640C     The authors of the LAPACK library are:
641C     - Univ. of Tennessee
642C     - Univ. of California Berkeley
643C     - Univ. of Colorado Denver
644C     - NAG Ltd.
645      IMPLICIT NONE
646      INTEGER            ::  INFO, LDA, LDW, M, N, RANK, MAXRANK
647      DOUBLE PRECISION               ::  TOLEPS
648      INTEGER            ::  JPVT(*)
649      DOUBLE PRECISION               ::  RWORK(*)
650      DOUBLE PRECISION            ::  A(LDA,*), TAU(*)
651      DOUBLE PRECISION            ::  WORK(LDW,*)
652      INTEGER, PARAMETER ::  INB=1, INBMIN=2
653      INTEGER            :: J, JB, MINMN, NB
654      INTEGER            :: OFFSET, ITEMP
655      INTEGER            :: LSTICC, PVT, K, RK
656      DOUBLE PRECISION               :: TEMP, TEMP2, TOL3Z
657      DOUBLE PRECISION            :: AKK
658      DOUBLE PRECISION, PARAMETER    :: RZERO=0.0D+0, RONE=1.0D+0
659      DOUBLE PRECISION :: ZERO
660      DOUBLE PRECISION :: ONE
661      PARAMETER          ( ONE = 1.0D+0 )
662      PARAMETER          ( ZERO = 0.0D+0 )
663      DOUBLE PRECISION               :: dlamch
664      INTEGER            :: ilaenv, idamax
665      EXTERNAL           :: idamax, dlamch
666      EXTERNAL           dgeqrf, dormqr, xerbla
667      EXTERNAL           ilaenv
668      EXTERNAL           dgemm, dgemv, dlarfg, dswap
669      DOUBLE PRECISION, EXTERNAL :: dnrm2
670      INFO = 0
671      IF( M.LT.0 ) THEN
672         INFO = -1
673      ELSE IF( N.LT.0 ) THEN
674         INFO = -2
675      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
676         INFO = -4
677      END IF
678      IF( INFO.EQ.0 ) THEN
679         IF( LDW.LT.N ) THEN
680            INFO = -8
681         END IF
682      END IF
683      IF( INFO.NE.0 ) THEN
684         CALL XERBLA( 'CGEQP3', -INFO )
685         RETURN
686      END IF
687      MINMN = MIN(M,N)
688      IF( MINMN.EQ.0 ) THEN
689         RETURN
690      END IF
691      NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 )
692C
693C     Avoid pointers (and TARGET attribute on RWORK/WORK)
694C     because of implicit interface. An implicit interface
695C     is needed to avoid intermediate array copies
696C     VN1  => RWORK(1:N)
697C     VN2  => RWORK(N+1:2*N)
698C     AUXV => WORK(1:LDW,1:1)
699C     F    => WORK(1:LDW,2:NB+1)
700C     LDF  =  LDW
701*     Initialize partial column norms. The first N elements of work
702*     store the exact column norms.
703      DO J = 1, N
704C        VN1( J ) = dnrm2( M, A( 1, J ), 1 )
705         RWORK( J ) = dnrm2( M, A( 1, J ), 1 )
706C        VN2( J ) = VN1( J )
707         RWORK( N + J ) = RWORK( J )
708         JPVT(J) = J
709      END DO
710      OFFSET = 0
711      TOL3Z  = SQRT(dlamch('Epsilon'))
712      DO
713         JB     = MIN(NB,MINMN-OFFSET)
714         LSTICC = 0
715         K      = 0
716         DO
717            IF(K.EQ.JB) EXIT
718            K   = K+1
719            RK  = OFFSET+K
720C           PVT = ( RK-1 ) + IDAMAX( N-RK+1, VN1( RK ), 1 )
721            PVT = ( RK-1 ) + IDAMAX( N-RK+1, RWORK( RK ), 1 )
722C           IF(VN1(PVT).LT.TOLEPS) THEN
723            IF(RWORK(PVT).LT.TOLEPS) THEN
724               RANK = RK-1
725               RETURN
726            END IF
727            IF (RK.GT.MAXRANK) THEN
728               RANK = RK
729               INFO = RK
730               RETURN
731            END IF
732            IF( PVT.NE.RK ) THEN
733               CALL dswap( M, A( 1, PVT ), 1, A( 1, RK ), 1 )
734c              CALL dswap( K-1, F( PVT-OFFSET, 1 ), LDF,
735c    &              F( K, 1 ), LDF )
736               CALL dswap( K-1, WORK( PVT-OFFSET, 2 ), LDW,
737     &              WORK( K, 2 ), LDW )
738               ITEMP     = JPVT(PVT)
739               JPVT(PVT) = JPVT(RK)
740               JPVT(RK)  = ITEMP
741C              VN1(PVT)  = VN1(RK)
742C              VN2(PVT)  = VN2(RK)
743               RWORK(PVT)    = RWORK(RK)
744               RWORK(N+PVT)  = RWORK(N+RK)
745            END IF
746*     Apply previous Householder reflectors to column K:
747*     A(RK:M,RK) := A(RK:M,RK) - A(RK:M,OFFSET+1:RK-1)*F(K,1:K-1)**H.
748            IF( K.GT.1 ) THEN
749               CALL dgemv( 'No transpose', M-RK+1, K-1, -ONE,
750C    &              A(RK,OFFSET+1), LDA, F(K,1), LDF,
751     &              A(RK,OFFSET+1), LDA, WORK(K,2), LDW,
752     &              ONE, A(RK,RK), 1 )
753            END IF
754*     Generate elementary reflector H(k).
755            IF( RK.LT.M ) THEN
756               CALL dlarfg( M-RK+1, A(RK,RK), A(RK+1,RK), 1, TAU(RK) )
757            ELSE
758               CALL dlarfg( 1, A(RK,RK), A(RK,RK), 1, TAU(RK) )
759            END IF
760            AKK      = A(RK,RK)
761            A(RK,RK) = ONE
762*     Compute Kth column of F:
763*     F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K).
764            IF( RK.LT.N ) THEN
765               CALL dgemv( 'Transpose', M-RK+1, N-RK, TAU(RK),
766     &              A(RK,RK+1), LDA, A(RK,RK), 1, ZERO,
767C    &              F( K+1, K ), 1 )
768     &              WORK( K+1, K+1 ), 1 )
769            END IF
770*     Padding F(1:K,K) with zeros.
771            DO J = 1, K
772C              F( J, K ) = ZERO
773               WORK( J, K+1 ) = ZERO
774            END DO
775*     Incremental updating of F:
776*     F(1:N,K) := F(1:N-OFFSET,K) - tau(RK)*F(1:N,1:K-1)*A(RK:M,OFFSET+1:RK-1)**H*A(RK:M,RK).
777            IF( K.GT.1 ) THEN
778               CALL dgemv( 'Transpose', M-RK+1, K-1, -TAU(RK),
779     &              A(RK,OFFSET+1), LDA, A(RK,RK), 1, ZERO,
780     &              WORK(1,1), 1 )
781C    &              AUXV(1,1), 1 )
782               CALL dgemv( 'No transpose', N-OFFSET, K-1, ONE,
783     &              WORK(1,2), LDW, WORK(1,1), 1, ONE, WORK(1,K+1), 1 )
784C    &              F(1,1), LDF, AUXV(1,1), 1, ONE, F(1,K), 1 )
785            END IF
786*     Update the current row of A:
787*     A(RK,RK+1:N) := A(RK,RK+1:N) - A(RK,OFFSET+1:RK)*F(K+1:N,1:K)**H.
788            IF( RK.LT.N ) THEN
789C              CALL dgemv( 'No Transpose', N-RK, K, -ONE, F( K+1, 1 ),
790               CALL dgemv( 'No Transpose', N-RK, K, -ONE, WORK( K+1,2 ),
791     &              LDW,
792     &              A( RK, OFFSET+1 ), LDA, ONE, A( RK, RK+1 ), LDA )
793            END IF
794*     Update partial column norms.
795*
796            IF( RK.LT.MINMN ) THEN
797               DO J = RK + 1, N
798C                 IF( VN1( J ).NE.RZERO ) THEN
799                  IF( RWORK( J ).NE.RZERO ) THEN
800*
801*     NOTE: The following 4 lines follow from the analysis in
802*     Lapack Working Note 176.
803*
804C                    TEMP = ABS( A( RK, J ) ) / VN1( J )
805                     TEMP = ABS( A( RK, J ) ) / RWORK( J )
806                     TEMP = MAX( RZERO, ( RONE+TEMP )*( RONE-TEMP ) )
807C                    TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2
808                     TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2
809                     IF( TEMP2 .LE. TOL3Z ) THEN
810C                       VN2( J ) = dble( LSTICC )
811                        RWORK( N+J ) = dble( LSTICC )
812                        LSTICC = J
813                     ELSE
814C                       VN1( J ) = VN1( J )*SQRT( TEMP )
815                        RWORK( J ) = RWORK( J )*SQRT( TEMP )
816                     END IF
817                  END IF
818               END DO
819            END IF
820            A( RK, RK ) = AKK
821            IF (LSTICC.NE.0) EXIT
822         END DO
823*     Apply the block reflector to the rest of the matrix:
824*     A(RK+1:M,RK+1:N) := A(RK+1:M,RK+1:N) -
825*     A(RK+1:M,OFFSET+1:RK)*F(K+1:N-OFFSET,1:K)**H.
826         IF( RK.LT.MIN(N,M) ) THEN
827            CALL dgemm( 'No transpose', 'Transpose', M-RK,
828     &           N-RK, K, -ONE, A(RK+1,OFFSET+1), LDA,
829C    &           F(K+1,1), LDF, ONE, A(RK+1,RK+1), LDA )
830     &           WORK(K+1,2), LDW, ONE, A(RK+1,RK+1), LDA )
831         END IF
832*     Recomputation of difficult columns.
833         DO WHILE( LSTICC.GT.0 )
834C           ITEMP = NINT( VN2( LSTICC ) )
835            ITEMP = NINT( RWORK( N + LSTICC ) )
836C           VN1( LSTICC ) = dnrm2( M-RK, A( RK+1, LSTICC ), 1 )
837            RWORK( LSTICC ) = dnrm2( M-RK, A( RK+1, LSTICC ), 1 )
838*
839*     NOTE: The computation of RWORK( LSTICC ) relies on the fact that
840*     SNRM2 does not fail on vectors with norm below the value of
841*     SQRT(DLAMCH('S'))
842*
843C           VN2( LSTICC ) = VN1( LSTICC )
844            RWORK( N + LSTICC ) = RWORK( LSTICC )
845            LSTICC = ITEMP
846         END DO
847         IF(RK.GE.MINMN) EXIT
848         OFFSET = RK
849      END DO
850      RANK = RK
851      END SUBROUTINE DMUMPS_TRUNCATED_RRQR
852