1C
2C  This file is part of MUMPS 5.1.2, released
3C  on Mon Oct  2 07:37:01 UTC 2017
4C
5C
6C  Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C  University of Bordeaux.
8C
9C  This version of MUMPS is provided to you free of charge. It is
10C  released under the CeCILL-C license:
11C  http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html
12C
13      MODULE CMUMPS_LR_CORE
14      USE MUMPS_LR_COMMON
15      USE CMUMPS_LR_TYPE
16      USE CMUMPS_LR_STATS
17!$    USE OMP_LIB
18      IMPLICIT NONE
19      CONTAINS
20      SUBROUTINE INIT_LRB(LRB_OUT,K,KSVD,M,N,ISLR)
21        TYPE(LRB_TYPE), INTENT(OUT) :: LRB_OUT
22        INTEGER,INTENT(IN) :: K,KSVD,M,N
23        LOGICAL,INTENT(IN) :: ISLR
24C This routine simply initializes a LR block but does NOT allocate it
25        LRB_OUT%M = M
26        LRB_OUT%N = N
27        LRB_OUT%K = K
28        LRB_OUT%KSVD = KSVD
29        LRB_OUT%ISLR = ISLR
30        NULLIFY(LRB_OUT%Q)
31        NULLIFY(LRB_OUT%R)
32        IF (ISLR) THEN
33          LRB_OUT%LRFORM = 1
34        ELSE
35          LRB_OUT%LRFORM = 0
36        ENDIF
37      END SUBROUTINE INIT_LRB
38      SUBROUTINE IS_FRONT_BLR_CANDIDATE(INODE, NFRONT, NASS, K486, K489,
39     &                    K490, K491, K492, N, LRGROUPS, LRSTATUS)
40        INTEGER,INTENT(IN) :: INODE, NFRONT, NASS, K486, K489, K490,
41     &                        K491, K492
42        INTEGER,INTENT(IN) :: N, LRGROUPS(N)
43        INTEGER,INTENT(OUT):: LRSTATUS
44C
45C     Local variables
46        LOGICAL :: COMPRESS_PANEL, COMPRESS_CB
47        COMPRESS_PANEL = .FALSE.
48        IF ((K486.GT.0).and.(
49     &        ((K492.LT.0).and.INODE.EQ.abs(K492))
50     &        .or.
51     &        ( (K492.GT.0).and.(K491.LE.NFRONT)
52     &        .and.(K490.LE.NASS)))) THEN
53          COMPRESS_PANEL = .TRUE.
54C         Compression for NASS =1 is useless
55          IF (NASS.EQ.1) COMPRESS_PANEL =.FALSE.
56          IF (LRGROUPS (INODE) .LT. 0) COMPRESS_PANEL = .FALSE.
57        ENDIF
58        COMPRESS_CB = .FALSE.
59        IF ((K492.GT.0).AND.(K489.EQ.1).AND.(NFRONT-NASS.GT.K491)) THEN
60          COMPRESS_CB = .TRUE.
61        ENDIF
62        IF (COMPRESS_PANEL.OR.COMPRESS_CB) THEN
63          IF (COMPRESS_CB.AND.(.NOT.COMPRESS_PANEL)) THEN
64            LRSTATUS = 1
65          ELSE IF (COMPRESS_PANEL.AND.(.NOT.COMPRESS_CB)) THEN
66            LRSTATUS = 2
67          ELSE
68            LRSTATUS = 3
69          ENDIF
70        ELSE
71         LRSTATUS = 0
72        ENDIF
73      END SUBROUTINE IS_FRONT_BLR_CANDIDATE
74      SUBROUTINE ALLOC_LRB(LRB_OUT,K,KSVD,M,N,ISLR,IFLAG,IERROR,KEEP8)
75        TYPE(LRB_TYPE), INTENT(OUT) :: LRB_OUT
76        INTEGER,INTENT(IN) :: K,KSVD,M,N
77        INTEGER,INTENT(OUT) :: IFLAG, IERROR
78        LOGICAL,INTENT(IN) :: ISLR
79        INTEGER(8) :: KEEP8(150)
80        INTEGER :: MEM, allocok
81        COMPLEX :: ZERO
82        PARAMETER (ZERO=(0.0E0,0.0E0))
83        IF (ISLR) THEN
84          IF (K.EQ.0) THEN
85            nullify(LRB_OUT%Q)
86            nullify(LRB_OUT%R)
87          ELSE
88            allocate(LRB_OUT%Q(M,K),LRB_OUT%R(K,N),stat=allocok)
89            IF (allocok > 0) THEN
90              IFLAG  = -13
91              IERROR = K*(M+N)
92              write(*,*) 'Allocation problem in BLR routine ALLOC_LRB:',
93     &             ' not enough memory? memory requested = ' , IERROR
94              RETURN
95            ENDIF
96          ENDIF
97        ELSE
98          allocate(LRB_OUT%Q(M,N),stat=allocok)
99          IF (allocok > 0) THEN
100            IFLAG  = -13
101            IERROR = M*N
102            write(*,*) 'Allocation problem in BLR routine ALLOC_LRB:',
103     &           ' not enough memory? memory requested = ' , IERROR
104            RETURN
105          ENDIF
106          nullify(LRB_OUT%R)
107        ENDIF
108        LRB_OUT%M = M
109        LRB_OUT%N = N
110        LRB_OUT%K = K
111        LRB_OUT%KSVD = KSVD
112        LRB_OUT%ISLR = ISLR
113        IF (ISLR) THEN
114          LRB_OUT%LRFORM = 1
115        ELSE
116          LRB_OUT%LRFORM = 0
117        ENDIF
118        IF (ISLR) THEN
119          MEM = M*K + N*K
120        ELSE
121          MEM = M*N
122        ENDIF
123        KEEP8(70) = KEEP8(70) - int(MEM,8)
124        KEEP8(68) = min(KEEP8(70), KEEP8(68))
125        KEEP8(71) = KEEP8(71) - int(MEM,8)
126        KEEP8(69) = min(KEEP8(71), KEEP8(69))
127      END SUBROUTINE ALLOC_LRB
128      SUBROUTINE REGROUPING2(CUT, NPARTSASS, NASS,
129     &                   NPARTSCB, NCB, IBCKSZ, ONLYCB, K472)
130        INTEGER, INTENT(IN) :: IBCKSZ, NASS, NCB
131        INTEGER, INTENT(INOUT) :: NPARTSCB, NPARTSASS
132        INTEGER, POINTER, DIMENSION(:) :: CUT
133        INTEGER, POINTER, DIMENSION(:) :: NEW_CUT
134        INTEGER :: I, INEW, MINSIZE, NEW_NPARTSASS
135        LOGICAL :: ONLYCB, TRACE
136        INTEGER, INTENT(IN) :: K472
137        INTEGER :: IBCKSZ2
138        ALLOCATE(NEW_CUT(max(NPARTSASS,1)+NPARTSCB+1))
139        CALL COMPUTE_BLR_VCS(K472, IBCKSZ2, IBCKSZ, NASS)
140        MINSIZE = int(IBCKSZ2 / 2)
141        NEW_NPARTSASS = max(NPARTSASS,1)
142        IF (.NOT. ONLYCB) THEN
143           NEW_CUT(1) = 1
144           INEW = 2
145           I = 2
146           DO WHILE (I .LE. NPARTSASS + 1)
147              NEW_CUT(INEW) = CUT(I)
148              TRACE = .FALSE.
149              IF (NEW_CUT(INEW) - NEW_CUT(INEW-1) .GT. MINSIZE) THEN
150                 INEW = INEW + 1
151                 TRACE = .TRUE.
152              ENDIF
153              I = I + 1
154           END DO
155           IF (TRACE) THEN
156              INEW = INEW - 1
157           ELSE
158              IF (INEW .NE. 2) THEN
159                 NEW_CUT(INEW-1) = NEW_CUT(INEW)
160                 INEW = INEW - 1
161              ENDIF
162           ENDIF
163           NEW_NPARTSASS = INEW - 1
164        ENDIF
165        IF (ONLYCB) THEN
166           DO I=1,max(NPARTSASS,1)+1
167              NEW_CUT(I) = CUT(I)
168           ENDDO
169        ENDIF
170        IF (NCB .EQ. 0) GO TO 50
171        INEW = NEW_NPARTSASS+2
172        I = max(NPARTSASS,1) + 2
173        DO WHILE (I .LE. max(NPARTSASS,1) + NPARTSCB + 1)
174              NEW_CUT(INEW) = CUT(I)
175              TRACE = .FALSE.
176              IF (NEW_CUT(INEW) - NEW_CUT(INEW-1) .GT. MINSIZE) THEN
177                 INEW = INEW + 1
178                 TRACE = .TRUE.
179              ENDIF
180              I = I + 1
181        END DO
182        IF (TRACE) THEN
183           INEW = INEW - 1
184        ELSE
185           IF (INEW .NE.  NEW_NPARTSASS+2) THEN
186           NEW_CUT(INEW-1) = NEW_CUT(INEW)
187              INEW = INEW - 1
188           ENDIF
189        ENDIF
190        NPARTSCB = INEW - 1 - NEW_NPARTSASS
191 50     CONTINUE
192        NPARTSASS = NEW_NPARTSASS
193        DEALLOCATE(CUT)
194        ALLOCATE(CUT(NPARTSASS+NPARTSCB+1))
195        DO I=1,NPARTSASS+NPARTSCB+1
196           CUT(I) = NEW_CUT(I)
197        ENDDO
198        DEALLOCATE(NEW_CUT)
199      END SUBROUTINE REGROUPING2
200      SUBROUTINE CMUMPS_LRGEMM_SCALING(LRB, SCALED, A, LA, POSELTD,
201     &          LD_DIAG, IW2, POSELTT, NFRONT, BLOCK, MAXI_CLUSTER)
202C This routine does the scaling (for the symmetric case) before
203C computing the LR product (done in CMUMPS_LRGEMM3)
204        TYPE(LRB_TYPE),INTENT(IN) :: LRB
205        INTEGER(8), intent(in)  :: LA
206        COMPLEX, intent(inout)  :: A(LA)
207        COMPLEX, intent(inout), DIMENSION(:,:)  :: SCALED
208        INTEGER,INTENT(IN) :: LD_DIAG, NFRONT, IW2(*)
209        INTEGER(8), INTENT(IN) :: POSELTD, POSELTT
210        INTEGER, INTENT(IN) :: MAXI_CLUSTER
211        COMPLEX, intent(inout)  :: BLOCK(MAXI_CLUSTER)
212        INTEGER :: J, NROWS
213        COMPLEX :: PIV1, PIV2, OFFDIAG
214        IF (LRB%LRFORM.EQ.1) THEN
215            NROWS = LRB%K
216        ELSE ! Full Rank Block
217            NROWS = LRB%M
218        ENDIF
219        J = 1
220        DO WHILE (J <= LRB%N)
221            IF (IW2(J) > 0) THEN
222                SCALED(1:NROWS,J) = A(POSELTD+LD_DIAG*(J-1)+J-1)
223     &           * SCALED(1:NROWS,J)
224                J = J+1
225            ELSE !2x2 pivot
226                PIV1    = A(POSELTD+LD_DIAG*(J-1)+J-1)
227                PIV2    = A(POSELTD+LD_DIAG*J+J)
228                OFFDIAG = A(POSELTD+LD_DIAG*(J-1)+J)
229                BLOCK(1:NROWS)    = SCALED(1:NROWS,J)
230                SCALED(1:NROWS,J) = PIV1 * SCALED(1:NROWS,J)
231     &            + OFFDIAG * SCALED(1:NROWS,J+1)
232                SCALED(1:NROWS,J+1) = OFFDIAG * BLOCK(1:NROWS)
233     &            + PIV2 * SCALED(1:NROWS,J+1)
234                 J=J+2
235            ENDIF
236        END DO
237      END SUBROUTINE CMUMPS_LRGEMM_SCALING
238      SUBROUTINE CMUMPS_LRGEMM3(TRANSB1, TRANSB2, ALPHA,
239     &           LRB1, LRB2, BETA, A, LA, POSELTT, NFRONT, SYM, NIV,
240     &           IFLAG, IERROR,
241     &           COMPRESS_MID_PRODUCT, TOLEPS, KPERCENT, RANK, BUILDQ,
242     &           POSELTD, LD_DIAG, IW2, BLOCK, MAXI_CLUSTER)
243        TYPE(LRB_TYPE),INTENT(IN) :: LRB1,LRB2
244        INTEGER(8), intent(in)  :: LA
245        COMPLEX, intent(inout)  :: A(LA)
246        INTEGER,INTENT(IN) :: NFRONT, SYM, NIV
247        INTEGER,INTENT(OUT) :: IFLAG, IERROR
248        INTEGER(8), INTENT(IN) :: POSELTT
249        INTEGER(8), INTENT(IN), OPTIONAL :: POSELTD
250        INTEGER,INTENT(IN), OPTIONAL :: LD_DIAG, IW2(*)
251        INTEGER, INTENT(IN), OPTIONAL :: MAXI_CLUSTER
252        CHARACTER(len=1),INTENT(IN) :: TRANSB1, TRANSB2
253        INTEGER,intent(in) :: COMPRESS_MID_PRODUCT, KPERCENT
254        REAL, intent(in) :: TOLEPS
255        COMPLEX :: ALPHA,BETA
256        COMPLEX, intent(inout), OPTIONAL  :: BLOCK(:)
257        COMPLEX, ALLOCATABLE, DIMENSION(:,:) :: XY_YZ
258        COMPLEX, ALLOCATABLE, TARGET, DIMENSION(:,:) :: XQ, R_Y
259        COMPLEX, POINTER, DIMENSION(:,:) :: X, Y, Y1, Y2, Z
260        CHARACTER(len=1) :: SIDE, TRANSX, TRANSY, TRANSZ
261        INTEGER :: M_X, K_XY, K_YZ, N_Z, LDX, LDY, LDY1, LDY2, LDZ, K_Y
262        INTEGER :: I, J, RANK, MAXRANK, INFO, LWORK
263        LOGICAL :: BUILDQ
264        REAL,    ALLOCATABLE :: RWORK_RRQR(:)
265        COMPLEX, ALLOCATABLE :: WORK_RRQR(:), TAU_RRQR(:),
266     &                          Y_RRQR(:,:)
267        INTEGER, ALLOCATABLE :: JPVT_RRQR(:)
268        INTEGER :: T1, T2, CR
269        INTEGER :: allocok, MREQ
270        DOUBLE PRECISION :: LOC_UPDT_TIME_OUT
271        REAL, EXTERNAL ::scnrm2
272        COMPLEX :: ONE, MONE, ZERO
273        PARAMETER (ONE=(1.0E0,0.0E0), MONE=(-1.0E0,0.0E0))
274        PARAMETER (ZERO=(0.0E0,0.0E0))
275        IF (LRB2%M.EQ.0) THEN
276          write(*,*) "Internal error in CMUMPS_LRGEMM3, LRB2%M=0"
277          CALL MUMPS_ABORT()
278        ENDIF
279        IF ((SYM.NE.0).AND.((TRANSB1.NE.'N').OR.(TRANSB2.NE.'T'))) THEN
280            WRITE(*,*) "SYM > 0 and (", TRANSB1, ",", TRANSB2,
281     &                ") parameters found. Symmetric LRGEMM is only ",
282     &                 "compatible with (N,T) parameters"
283            CALL MUMPS_ABORT()
284        ENDIF
285        RANK = 0
286        BUILDQ = .FALSE.
287        IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==1)) THEN
288            IF ((LRB1%K.EQ.0).OR.(LRB2%K.EQ.0)) GOTO 700
289            allocate(Y(LRB1%K,LRB2%K),stat=allocok)
290            IF (allocok > 0) THEN
291              MREQ = LRB1%K*LRB2%K
292              GOTO 860
293            ENDIF
294            IF (TRANSB1 == 'N') THEN
295                X    => LRB1%Q
296                LDX  =  LRB1%M
297                M_X  =  LRB1%M
298                K_Y  =  LRB1%N
299                IF (SYM .EQ. 0) THEN
300                    Y1  => LRB1%R
301                ELSE
302                    allocate(Y1(LRB1%K,LRB1%N),stat=allocok)
303                    IF (allocok > 0) THEN
304                      MREQ = LRB1%K*LRB1%N
305                      GOTO 860
306                    ENDIF
307                    DO J=1,LRB1%N
308                        DO I=1,LRB1%K
309                            Y1(I,J) = LRB1%R(I,J)
310                        ENDDO
311                    ENDDO
312                    CALL CMUMPS_LRGEMM_SCALING(LRB1, Y1, A, LA, POSELTD,
313     &                     LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
314     &                     MAXI_CLUSTER)
315                ENDIF
316                LDY1 =  LRB1%K
317            ELSE  !TRANSB1 == 'T'
318                M_X  =  LRB1%N
319                X    => LRB1%R
320                LDX  =  LRB1%K
321                K_Y  =  LRB1%M
322                Y1   => LRB1%Q
323                LDY1 =  LRB1%M
324            ENDIF
325            IF (TRANSB2 == 'N') THEN
326                Z    => LRB2%R
327                LDZ  =  LRB2%K
328                N_Z  =  LRB2%N
329                Y2   => LRB2%Q
330                LDY2 =  LRB2%M
331            ELSE  !TRANSB2 == 'T'
332                N_Z  =  LRB2%M
333                Z    => LRB2%Q
334                LDZ  =  LRB2%M
335                Y2   => LRB2%R
336                LDY2 =  LRB2%K
337            ENDIF
338            TRANSZ = TRANSB2
339            CALL cgemm(TRANSB1 , TRANSB2 , LRB1%K , LRB2%K, K_Y, ONE,
340     &            Y1(1,1), LDY1, Y2(1,1), LDY2, ZERO, Y(1,1), LRB1%K )
341            BUILDQ = .FALSE.
342            IF (COMPRESS_MID_PRODUCT.GE.1) THEN
343                LWORK = MAX(LRB2%K**2, M_X**2)
344                allocate(Y_RRQR(LRB1%K,LRB2%K),
345     &               WORK_RRQR(LWORK), RWORK_RRQR(2*LRB2%K),
346     &               TAU_RRQR(MIN(LRB1%K,LRB2%K)),
347     &               JPVT_RRQR(LRB2%K),stat=allocok)
348                IF (allocok > 0) THEN
349                  MREQ = LRB1%K*LRB2%K + LWORK + 2*LRB2%K +
350     &                   MIN(LRB1%K,LRB2%K) + LRB2%K
351                  GOTO 860
352                ENDIF
353                DO J=1,LRB2%K
354                    DO I=1,LRB1%K
355                        Y_RRQR(I,J) = Y(I,J)
356                    ENDDO
357                ENDDO
358                MAXRANK = MIN(LRB1%K, LRB2%K)-1
359                MAXRANK = max (1, int((MAXRANK*KPERCENT/100)))
360                JPVT_RRQR = 0
361                CALL CMUMPS_TRUNCATED_RRQR(LRB1%K, LRB2%K, Y_RRQR(1,1),
362     &               LRB1%K, JPVT_RRQR(1), TAU_RRQR(1), WORK_RRQR(1),
363     &               LRB2%K, RWORK_RRQR(1), TOLEPS, RANK, MAXRANK, INFO)
364                IF ((RANK.GT.MAXRANK).OR.(RANK.EQ.0)) THEN
365                    deallocate(Y_RRQR, WORK_RRQR, RWORK_RRQR, TAU_RRQR,
366     &                     JPVT_RRQR)
367                    BUILDQ = .FALSE.
368                ELSE
369                    BUILDQ = .TRUE.
370                ENDIF
371                IF (BUILDQ) THEN ! Successfully compressed middle block
372                  allocate(XQ(M_X,RANK), R_Y(RANK,LRB2%K),stat=allocok)
373                  IF (allocok > 0) THEN
374                    MREQ = M_X*RANK + RANK*LRB2%K
375                    GOTO 860
376                  ENDIF
377                    DO J=1, LRB2%K
378                       R_Y(1:MIN(RANK,J),JPVT_RRQR(J)) =
379     &                   Y_RRQR(1:MIN(RANK,J),J)
380                       IF(J.LT.RANK) R_Y(MIN(RANK,J)+1:
381     &                   RANK,JPVT_RRQR(J))= ZERO
382                    END DO
383                    CALL cungqr
384     &                  (LRB1%K, RANK, RANK, Y_RRQR(1,1),
385     &                  LRB1%K, TAU_RRQR(1),
386     &                  WORK_RRQR(1), LWORK, INFO )
387                    CALL cgemm(TRANSB1, 'N', M_X, RANK, LRB1%K, ONE,
388     &                    X(1,1), LDX, Y_RRQR(1,1), LRB1%K, ZERO,
389     &                    XQ(1,1), M_X)
390                    deallocate(Y_RRQR, WORK_RRQR, RWORK_RRQR, TAU_RRQR,
391     &                         JPVT_RRQR)
392                    nullify(X)
393                    X      => XQ
394                    LDX    =  M_X
395                    K_XY   =  RANK
396                    TRANSX =  'N'
397                    deallocate(Y)
398                    nullify(Y)
399                    Y      => R_Y
400                    LDY    =  RANK
401                    K_YZ   =  LRB2%K
402                    TRANSY =  'N'
403                    SIDE   = 'R'
404                ENDIF
405            ENDIF
406            IF (.NOT.BUILDQ) THEN
407                LDY    = LRB1%K
408                K_XY   = LRB1%K
409                K_YZ   = LRB2%K
410                TRANSX = TRANSB1
411                TRANSY = 'N'
412                IF (LRB1%K .GE. LRB2%K) THEN
413                    SIDE = 'L'
414                ELSE ! LRB1%K < LRB2%K
415                    SIDE = 'R'
416                ENDIF
417            ENDIF
418        ENDIF
419        IF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN
420            IF (LRB1%K.EQ.0) GOTO 700
421            SIDE   =  'R'
422            K_XY   =  LRB1%K
423            TRANSX =  TRANSB1
424            TRANSY =  TRANSB1
425            Z      => LRB2%Q
426            LDZ    =  LRB2%M
427            TRANSZ =  TRANSB2
428            IF (TRANSB1 == 'N') THEN
429                X   => LRB1%Q
430                LDX =  LRB1%M
431                M_X =  LRB1%M
432                LDY =  LRB1%K
433                IF (SYM .EQ. 0) THEN
434                    Y   => LRB1%R
435                ELSE
436                    allocate(Y(LRB1%K,LRB1%N),stat=allocok)
437                    IF (allocok > 0) THEN
438                      MREQ = LRB1%K*LRB1%N
439                      GOTO 860
440                    ENDIF
441                    DO J=1,LRB1%N
442                        DO I=1,LRB1%K
443                            Y(I,J) = LRB1%R(I,J)
444                        ENDDO
445                    ENDDO
446                    CALL CMUMPS_LRGEMM_SCALING(LRB1, Y, A, LA, POSELTD,
447     &                     LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
448     &                     MAXI_CLUSTER)
449                ENDIF
450            ELSE ! TRANSB1 == 'T'
451                X   => LRB1%R
452                LDX =  LRB1%K
453                M_X =  LRB1%N
454                Y   => LRB1%Q
455                LDY =  LRB1%M
456            ENDIF
457            IF (TRANSB2 == 'N') THEN
458                K_YZ = LRB2%M
459                N_Z  = LRB2%N
460            ELSE ! TRANSB2 == 'T'
461                K_YZ = LRB2%N
462                N_Z  = LRB2%M
463            ENDIF
464        ENDIF
465        IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN
466            IF (LRB2%K.EQ.0) GOTO 700
467            SIDE   =  'L'
468            K_YZ   =  LRB2%K
469            X      => LRB1%Q
470            LDX    =  LRB1%M
471            TRANSX =  TRANSB1
472            TRANSY =  TRANSB2
473            TRANSZ =  TRANSB2
474            IF (TRANSB1 == 'N') THEN
475                M_X  = LRB1%M
476                K_XY = LRB1%N
477            ELSE ! TRANSB1 == 'T'
478                M_X  = LRB1%N
479                K_XY = LRB1%M
480            ENDIF
481            IF (TRANSB2 == 'N') THEN
482                Y   => LRB2%Q
483                LDY =  LRB2%M
484                Z   => LRB2%R
485                LDZ =  LRB2%K
486                N_Z =  LRB2%N
487            ELSE ! TRANSB2 == 'T'
488                IF (SYM .EQ. 0) THEN
489                    Y  => LRB2%R
490                ELSE ! Symmetric case: column scaling of R2 is done
491                    allocate(Y(LRB2%K,LRB2%N),stat=allocok)
492                    IF (allocok > 0) THEN
493                      MREQ = LRB2%K*LRB2%N
494                      GOTO 860
495                    ENDIF
496                    DO J=1,LRB2%N
497                        DO I=1,LRB2%K
498                            Y(I,J) = LRB2%R(I,J)
499                        ENDDO
500                    ENDDO
501                    CALL CMUMPS_LRGEMM_SCALING(LRB2, Y, A, LA, POSELTD,
502     &                     LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
503     &                     MAXI_CLUSTER)
504                ENDIF
505                LDY =  LRB2%K
506                Z   => LRB2%Q
507                LDZ =  LRB2%M
508                N_Z =  LRB2%M
509            ENDIF
510        ENDIF
511        IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN
512            IF (SYM .EQ. 0) THEN
513                X => LRB1%Q
514            ELSE
515                allocate(X(LRB1%M,LRB1%N),stat=allocok)
516                IF (allocok > 0) THEN
517                  MREQ = LRB1%M*LRB1%N
518                  GOTO 860
519                ENDIF
520                DO J=1,LRB1%N
521                    DO I=1,LRB1%M
522                        X(I,J) = LRB1%Q(I,J)
523                    ENDDO
524                ENDDO
525                CALL CMUMPS_LRGEMM_SCALING(LRB1, X, A, LA, POSELTD,
526     &                   LD_DIAG, IW2, POSELTT, NFRONT, BLOCK,
527     &                   MAXI_CLUSTER)
528            ENDIF
529            SIDE   =  'N'
530            LDX    =  LRB1%M
531            TRANSX =  TRANSB1
532            Z      => LRB2%Q
533            LDZ    =  LRB2%M
534            TRANSZ =  TRANSB2
535            IF (TRANSB1 == 'N') THEN
536                M_X  = LRB1%M
537                K_XY = LRB1%N
538            ELSE ! TRANSB1 == 'T'
539                M_X  = LRB1%N
540                K_XY = LRB1%M
541            ENDIF
542            IF (TRANSB2 == 'N') THEN
543                N_Z =  LRB2%N
544            ELSE ! TRANSB2 == 'T'
545                N_Z =  LRB2%M
546            ENDIF
547        ENDIF
548        IF (SIDE == 'L') THEN ! LEFT: XY_YZ = X*Y; A = XY_YZ*Z
549            allocate(XY_YZ(M_X,K_YZ),stat=allocok)
550            IF (allocok > 0) THEN
551              MREQ = M_X*K_YZ
552              GOTO 860
553            ENDIF
554            CALL cgemm(TRANSX , TRANSY , M_X , K_YZ, K_XY, ONE,
555     &             X(1,1), LDX, Y(1,1), LDY, ZERO, XY_YZ(1,1), M_X)
556            CALL SYSTEM_CLOCK(T1)
557            CALL cgemm('N', TRANSZ, M_X, N_Z, K_YZ, ALPHA,
558     &             XY_YZ(1,1), M_X, Z(1,1), LDZ, BETA, A(POSELTT),
559     &             NFRONT)
560            CALL SYSTEM_CLOCK(T2,CR)
561            LOC_UPDT_TIME_OUT = dble(T2-T1)/dble(CR)
562            CALL UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT)
563            deallocate(XY_YZ)
564        ELSEIF (SIDE == 'R') THEN ! RIGHT: XY_YZ = Y*Z; A = X*XY_YZ
565            allocate(XY_YZ(K_XY,N_Z),stat=allocok)
566            IF (allocok > 0) THEN
567              MREQ = K_XY*N_Z
568              GOTO 860
569            ENDIF
570            CALL cgemm(TRANSY , TRANSZ , K_XY , N_Z, K_YZ, ONE,
571     &             Y(1,1), LDY, Z(1,1), LDZ, ZERO, XY_YZ(1,1), K_XY)
572            CALL SYSTEM_CLOCK(T1)
573            CALL cgemm(TRANSX, 'N', M_X, N_Z, K_XY, ALPHA,
574     &             X(1,1), LDX, XY_YZ(1,1), K_XY, BETA, A(POSELTT),
575     &             NFRONT)
576            CALL SYSTEM_CLOCK(T2,CR)
577            LOC_UPDT_TIME_OUT = dble(T2-T1)/dble(CR)
578            CALL UPDATE_UPDT_TIME_OUT(LOC_UPDT_TIME_OUT)
579            deallocate(XY_YZ)
580        ELSE ! SIDE == 'N' : NONE; A = X*Z
581            CALL cgemm(TRANSX, TRANSZ, M_X, N_Z, K_XY, ALPHA,
582     &             X(1,1), LDX, Z(1,1), LDZ, BETA, A(POSELTT),
583     &             NFRONT)
584        ENDIF
585        GOTO 870
586  860 CONTINUE
587C       Alloc NOT ok!!
588        write(*,*) 'Allocation problem in BLR routine CMUMPS_LRGEMM3: ',
589     &    'not enough memory? memory requested = ' , MREQ
590        IFLAG  = - 13
591        IERROR = MREQ
592        RETURN
593  870 CONTINUE
594C       Alloc ok!!
595        IF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==0)) THEN
596            IF (SYM .NE. 0) deallocate(X)
597        ELSEIF ((LRB1%LRFORM==0).AND.(LRB2%LRFORM==1)) THEN
598            IF (SYM .NE. 0) deallocate(Y)
599        ELSEIF ((LRB1%LRFORM==1).AND.(LRB2%LRFORM==0)) THEN
600            IF (SYM .NE. 0) deallocate(Y)
601        ELSE ! 1 AND 1
602            IF ((TRANSB1=='N').AND.(SYM .NE. 0)) deallocate(Y1)
603            IF ((COMPRESS_MID_PRODUCT.GE.1).AND.BUILDQ) THEN
604                deallocate(XQ)
605                deallocate(R_Y)
606            ELSE
607                deallocate(Y)
608            ENDIF
609        ENDIF
610 700    CONTINUE
611      END SUBROUTINE CMUMPS_LRGEMM3
612      SUBROUTINE MAX_CLUSTER(CUT,CUT_SIZE,MAXI_CLUSTER)
613        INTEGER, INTENT(IN) :: CUT_SIZE
614        INTEGER, intent(out) :: MAXI_CLUSTER
615        INTEGER, POINTER, DIMENSION(:) :: CUT
616        INTEGER :: I
617        MAXI_CLUSTER = 0
618        DO I = 1, CUT_SIZE
619          IF (CUT(I+1) - CUT(I) .GE. MAXI_CLUSTER) THEN
620            MAXI_CLUSTER = CUT(I+1) - CUT(I)
621          END IF
622        END DO
623      END SUBROUTINE MAX_CLUSTER
624      END MODULE CMUMPS_LR_CORE
625      SUBROUTINE CMUMPS_TRUNCATED_RRQR( M, N, A, LDA, JPVT, TAU, WORK,
626     &     LDW, RWORK, TOLEPS, RANK, MAXRANK, INFO)
627C     This routine computes a Rank-Revealing QR factorization of a dense
628C     matrix A. The factorization is truncated when the absolute value of
629C     a diagonal coefficient of the R factor becomes smaller than a
630C     prescribed threshold TOLEPS. The resulting partial Q and R factors
631C     provide a rank-k approximation of the input matrix A with accuracy
632C     TOLEPS.
633C
634C     This routine is obtained by merging the LAPACK
635C     (http://www.netlib.org/lapack/) CGEQP3 and CLAQPS routines and by
636C     applying a minor modification to the outer factorization loop in
637C     order to stop computations as soon as possible when the required
638C     accuracy is reached.
639C
640C     The authors of the LAPACK library are:
641C     - Univ. of Tennessee
642C     - Univ. of California Berkeley
643C     - Univ. of Colorado Denver
644C     - NAG Ltd.
645      IMPLICIT NONE
646      INTEGER            ::  INFO, LDA, LDW, M, N, RANK, MAXRANK
647      REAL               ::  TOLEPS
648      INTEGER            ::  JPVT(*)
649      REAL               ::  RWORK(*)
650      COMPLEX            ::  A(LDA,*), TAU(*)
651      COMPLEX            ::  WORK(LDW,*)
652      INTEGER, PARAMETER ::  INB=1, INBMIN=2
653      INTEGER            :: J, JB, MINMN, NB
654      INTEGER            :: OFFSET, ITEMP
655      INTEGER            :: LSTICC, PVT, K, RK
656      REAL               :: TEMP, TEMP2, TOL3Z
657      COMPLEX            :: AKK
658      REAL, PARAMETER    :: RZERO=0.0E+0, RONE=1.0E+0
659      COMPLEX :: ZERO
660      COMPLEX :: ONE
661      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
662      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
663      REAL               :: slamch
664      INTEGER            :: ilaenv, isamax
665      EXTERNAL           :: isamax, slamch
666      EXTERNAL           cgeqrf, cunmqr, xerbla
667      EXTERNAL           ilaenv
668      EXTERNAL           cgemm, cgemv, clarfg, cswap
669      REAL, EXTERNAL :: scnrm2
670      INFO = 0
671      IF( M.LT.0 ) THEN
672         INFO = -1
673      ELSE IF( N.LT.0 ) THEN
674         INFO = -2
675      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
676         INFO = -4
677      END IF
678      IF( INFO.EQ.0 ) THEN
679         IF( LDW.LT.N ) THEN
680            INFO = -8
681         END IF
682      END IF
683      IF( INFO.NE.0 ) THEN
684         CALL XERBLA( 'CGEQP3', -INFO )
685         RETURN
686      END IF
687      MINMN = MIN(M,N)
688      IF( MINMN.EQ.0 ) THEN
689         RETURN
690      END IF
691      NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 )
692C
693C     Avoid pointers (and TARGET attribute on RWORK/WORK)
694C     because of implicit interface. An implicit interface
695C     is needed to avoid intermediate array copies
696C     VN1  => RWORK(1:N)
697C     VN2  => RWORK(N+1:2*N)
698C     AUXV => WORK(1:LDW,1:1)
699C     F    => WORK(1:LDW,2:NB+1)
700C     LDF  =  LDW
701*     Initialize partial column norms. The first N elements of work
702*     store the exact column norms.
703      DO J = 1, N
704C        VN1( J ) = scnrm2( M, A( 1, J ), 1 )
705         RWORK( J ) = scnrm2( M, A( 1, J ), 1 )
706C        VN2( J ) = VN1( J )
707         RWORK( N + J ) = RWORK( J )
708         JPVT(J) = J
709      END DO
710      OFFSET = 0
711      TOL3Z  = SQRT(slamch('Epsilon'))
712      DO
713         JB     = MIN(NB,MINMN-OFFSET)
714         LSTICC = 0
715         K      = 0
716         DO
717            IF(K.EQ.JB) EXIT
718            K   = K+1
719            RK  = OFFSET+K
720C           PVT = ( RK-1 ) + ISAMAX( N-RK+1, VN1( RK ), 1 )
721            PVT = ( RK-1 ) + ISAMAX( N-RK+1, RWORK( RK ), 1 )
722C           IF(VN1(PVT).LT.TOLEPS) THEN
723            IF(RWORK(PVT).LT.TOLEPS) THEN
724               RANK = RK-1
725               RETURN
726            END IF
727            IF (RK.GT.MAXRANK) THEN
728               RANK = RK
729               INFO = RK
730               RETURN
731            END IF
732            IF( PVT.NE.RK ) THEN
733               CALL cswap( M, A( 1, PVT ), 1, A( 1, RK ), 1 )
734c              CALL cswap( K-1, F( PVT-OFFSET, 1 ), LDF,
735c    &              F( K, 1 ), LDF )
736               CALL cswap( K-1, WORK( PVT-OFFSET, 2 ), LDW,
737     &              WORK( K, 2 ), LDW )
738               ITEMP     = JPVT(PVT)
739               JPVT(PVT) = JPVT(RK)
740               JPVT(RK)  = ITEMP
741C              VN1(PVT)  = VN1(RK)
742C              VN2(PVT)  = VN2(RK)
743               RWORK(PVT)    = RWORK(RK)
744               RWORK(N+PVT)  = RWORK(N+RK)
745            END IF
746*     Apply previous Householder reflectors to column K:
747*     A(RK:M,RK) := A(RK:M,RK) - A(RK:M,OFFSET+1:RK-1)*F(K,1:K-1)**H.
748            IF( K.GT.1 ) THEN
749               DO J = 1, K-1
750C                 F( K, J ) = CONJG( F( K, J ) )
751                  WORK( K, J+1 ) = CONJG( WORK( K, J+1 ) )
752               END DO
753               CALL cgemv( 'No transpose', M-RK+1, K-1, -ONE,
754C    &              A(RK,OFFSET+1), LDA, F(K,1), LDF,
755     &              A(RK,OFFSET+1), LDA, WORK(K,2), LDW,
756     &              ONE, A(RK,RK), 1 )
757               DO J = 1, K - 1
758C                 F( K, J ) = CONJG( F( K, J ) )
759                  WORK( K, J + 1 ) = CONJG( WORK( K, J + 1 ) )
760               END DO
761            END IF
762*     Generate elementary reflector H(k).
763            IF( RK.LT.M ) THEN
764               CALL clarfg( M-RK+1, A(RK,RK), A(RK+1,RK), 1, TAU(RK) )
765            ELSE
766               CALL clarfg( 1, A(RK,RK), A(RK,RK), 1, TAU(RK) )
767            END IF
768            AKK      = A(RK,RK)
769            A(RK,RK) = ONE
770*     Compute Kth column of F:
771*     F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K).
772            IF( RK.LT.N ) THEN
773               CALL cgemv( 'Conjugate transpose', M-RK+1, N-RK, TAU(RK),
774     &              A(RK,RK+1), LDA, A(RK,RK), 1, ZERO,
775C    &              F( K+1, K ), 1 )
776     &              WORK( K+1, K+1 ), 1 )
777            END IF
778*     Padding F(1:K,K) with zeros.
779            DO J = 1, K
780C              F( J, K ) = ZERO
781               WORK( J, K+1 ) = ZERO
782            END DO
783*     Incremental updating of F:
784*     F(1:N,K) := F(1:N-OFFSET,K) - tau(RK)*F(1:N,1:K-1)*A(RK:M,OFFSET+1:RK-1)**H*A(RK:M,RK).
785            IF( K.GT.1 ) THEN
786               CALL cgemv( 'Conjugate transpose', M-RK+1, K-1, -TAU(RK),
787     &              A(RK,OFFSET+1), LDA, A(RK,RK), 1, ZERO,
788     &              WORK(1,1), 1 )
789C    &              AUXV(1,1), 1 )
790               CALL cgemv( 'No transpose', N-OFFSET, K-1, ONE,
791     &              WORK(1,2), LDW, WORK(1,1), 1, ONE, WORK(1,K+1), 1 )
792C    &              F(1,1), LDF, AUXV(1,1), 1, ONE, F(1,K), 1 )
793            END IF
794*     Update the current row of A:
795*     A(RK,RK+1:N) := A(RK,RK+1:N) - A(RK,OFFSET+1:RK)*F(K+1:N,1:K)**H.
796            IF( RK.LT.N ) THEN
797               CALL cgemm( 'No transpose', 'Conjugate transpose',
798     &              1, N-RK,
799C    &              K, -ONE, A( RK, OFFSET+1 ), LDA, F( K+1, 1 ), LDF,
800     &              K, -ONE, A( RK, OFFSET+1 ), LDA, WORK( K+1,2 ), LDW,
801     &              ONE, A( RK, RK+1 ), LDA )
802            END IF
803*     Update partial column norms.
804*
805            IF( RK.LT.MINMN ) THEN
806               DO J = RK + 1, N
807C                 IF( VN1( J ).NE.RZERO ) THEN
808                  IF( RWORK( J ).NE.RZERO ) THEN
809*
810*     NOTE: The following 4 lines follow from the analysis in
811*     Lapack Working Note 176.
812*
813C                    TEMP = ABS( A( RK, J ) ) / VN1( J )
814                     TEMP = ABS( A( RK, J ) ) / RWORK( J )
815                     TEMP = MAX( RZERO, ( RONE+TEMP )*( RONE-TEMP ) )
816C                    TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2
817                     TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2
818                     IF( TEMP2 .LE. TOL3Z ) THEN
819C                       VN2( J ) = REAL( LSTICC )
820                        RWORK( N+J ) = REAL( LSTICC )
821                        LSTICC = J
822                     ELSE
823C                       VN1( J ) = VN1( J )*SQRT( TEMP )
824                        RWORK( J ) = RWORK( J )*SQRT( TEMP )
825                     END IF
826                  END IF
827               END DO
828            END IF
829            A( RK, RK ) = AKK
830            IF (LSTICC.NE.0) EXIT
831         END DO
832*     Apply the block reflector to the rest of the matrix:
833*     A(RK+1:M,RK+1:N) := A(RK+1:M,RK+1:N) -
834*     A(RK+1:M,OFFSET+1:RK)*F(K+1:N-OFFSET,1:K)**H.
835         IF( RK.LT.MIN(N,M) ) THEN
836            CALL cgemm( 'No transpose', 'Conjugate transpose', M-RK,
837     &           N-RK, K, -ONE, A(RK+1,OFFSET+1), LDA,
838C    &           F(K+1,1), LDF, ONE, A(RK+1,RK+1), LDA )
839     &           WORK(K+1,2), LDW, ONE, A(RK+1,RK+1), LDA )
840         END IF
841*     Recomputation of difficult columns.
842         DO WHILE( LSTICC.GT.0 )
843C           ITEMP = NINT( VN2( LSTICC ) )
844            ITEMP = NINT( RWORK( N + LSTICC ) )
845C           VN1( LSTICC ) = scnrm2( M-RK, A( RK+1, LSTICC ), 1 )
846            RWORK( LSTICC ) = scnrm2( M-RK, A( RK+1, LSTICC ), 1 )
847*
848*     NOTE: The computation of RWORK( LSTICC ) relies on the fact that
849*     SNRM2 does not fail on vectors with norm below the value of
850*     SQRT(DLAMCH('S'))
851*
852C           VN2( LSTICC ) = VN1( LSTICC )
853            RWORK( N + LSTICC ) = RWORK( LSTICC )
854            LSTICC = ITEMP
855         END DO
856         IF(RK.GE.MINMN) EXIT
857         OFFSET = RK
858      END DO
859      RANK = RK
860      END SUBROUTINE CMUMPS_TRUNCATED_RRQR
861