1!
2!  Dalton, a molecular electronic structure program
3!  Copyright (C) by the authors of Dalton.
4!
5!  This program is free software; you can redistribute it and/or
6!  modify it under the terms of the GNU Lesser General Public
7!  License version 2.1 as published by the Free Software Foundation.
8!
9!  This program is distributed in the hope that it will be useful,
10!  but WITHOUT ANY WARRANTY; without even the implied warranty of
11!  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12!  Lesser General Public License for more details.
13!
14!  If a copy of the GNU LGPL v2.1 was not distributed with this
15!  code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html.
16!
17!
18
19#ifdef VAR_MPI
20***********************************************************************
21*                                                                     *
22* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
23* parallelization by Stefan Knecht                                    *
24*                                                                     *
25***********************************************************************
26      SUBROUTINE P1_B_PAR_RL_LUCI1(VEC1,VEC2,EIGAPR,RNRM,EIGSHF,
27     &                             EIG,TEST,E_CONV,RTCNV,CONVER,ITER,
28     &                             MAXIT,
29     &                             IROOT,LUIN1LIST,LUIN2LIST,LUOUTLIST,
30     &                             NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH,
31     &                             MY_IOFF_LUIN1,MY_IOFF_LUIN2,
32     &                             MY_IOFF_LUOUT,
33     &                             SCRRED,LUIN1,LUIN2,LUOUT)
34C
35C     Written by  S. Knecht        - March 13 2008
36C
37C**********************************************************************
38C
39C     calculating dot product between two vectors on file LUIN1 resp.
40C     LUIN2
41C
42C     NOTE: IROOT = IROOT
43C
44C     active blocks on the MPI-files are flagged by a nonzero list entry
45C
46C     Last revision:     S. Knecht       - March 2008
47C
48************************************************************************
49      IMPLICIT DOUBLE PRECISION ( A-H,O-Z)
50#include "maxorb.h"
51#include "infpar.h"
52#include "mpif.h"
53      integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE)
54#include "parluci.h"
55      DIMENSION VEC1(*), VEC2(*)
56      DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*)
57      DIMENSION LUIN1LIST(*), LUIN2LIST(*), LUOUTLIST(*)
58      DIMENSION RNRM(MAXIT,*), EIG(MAXIT,*)
59      DIMENSION SCRRED(*)
60      LOGICAL CONVER
61      integer RTCNV(*)
62      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN1
63      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2
64      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT
65      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN1, IOFFSET_IN_LUIN2
66      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT
67      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH
68      INTEGER NUM_BLK
69C
70C     initialize scratch offsets
71      NUM_BLK           = 0
72      IOFFSET_SCRATCH   = 0
73      IOFFSET_IN_LUIN1  = 0
74      IOFFSET_IN_LUIN2  = 0
75      IOFFSET_LUOUT     = 0
76      IOFFSET_INT_IN1   = 0
77      IOFFSET_INT_IN2   = 0
78      IOFFSET_INT_LUOUT = 0
79C
80      RNORM     = 0.0D0
81      REDSCRVAR = 0.0D0
82      CALL DZERO(SCRRED,IROOT)
83C
84      DO ISBATCH = 1, NBATCH
85C
86C       offset for batch ISBATCH w.r.t JOFF
87C
88        CALL DZERO(VEC1,LEBATCH(ISBATCH))
89C
90        FACTOR = - EIGAPR
91C
92C       set new offset wrt IROOT
93C
94        IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH +
95     &                   ( IROOT - 1 )  * MY_VEC1_IOFF
96        IOFFSET_INT_IN2  = 1 + NUM_BLK   +
97     &                   ( IROOT - 1 )  * MY_ACT_BLK1
98C
99CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUIN2',
100CSK  &                 IOFFSET_IN_LUIN2
101C
102        CALL RDVEC_BATCH_DRV4(LUIN2,VEC1,LBATCH(ISBATCH),
103     &                        IBATCH(1,I1BATCH(ISBATCH)),
104     &                        IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
105     &                        LUIN2LIST,NUM_ACTIVE_BATCH)
106C
107CSK     WRITE(LUWRT,*) 'initial VEC1 on LUIN2'
108CSK     CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
109CSK  &                LUWRT)
110C
111C       set offset for LUIN1 and zero read-in vector
112C
113        CALL DZERO(VEC2,LEBATCH(ISBATCH))
114C
115        IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH +
116     &                   ( IROOT - 1 )    * MY_VEC1_IOFF
117        IOFFSET_INT_IN1  = 1 + NUM_BLK   +
118     &                   ( IROOT - 1 )    * MY_ACT_BLK1
119C
120CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUIN1',
121CSK  &                  IOFFSET_IN_LUIN1
122C
123C       read in batch ISBATCH from LUIN1 to VEC2
124C
125        CALL RDVEC_BATCH_DRV4(LUIN1,VEC2,LBATCH(ISBATCH),
126     &                        IBATCH(1,I1BATCH(ISBATCH)),
127     &                        IOFFSET_IN_LUIN1,IOFFSET_INT_IN1,
128     &                        LUIN1LIST,NUM_ACTIVE_BATCH)
129C
130CSK     WRITE(LUWRT,*) 'initial VEC2 on LUIN1'
131CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
132CSK  &                LUWRT)
133C
134C       VEC2 == VEC2 + VEC1 * FACTOR
135C
136        CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1)
137C
138C       calculate partial RNORM
139C
140        REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC2,1,VEC2,1)
141C
142C       write VEC2 to LUOUT
143C
144        IOFFSET_LUOUT     = MY_IOFF_LUOUT  + IOFFSET_SCRATCH
145        IOFFSET_INT_LUOUT = 1 + NUM_BLK
146CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUOUT',
147CSK  &                  IOFFSET_LUOUT
148CSK     WRITE(LUWRT,*) 'final VEC2 to write on LUOUT'
149C
150        CALL WTVEC_BATCH_DRV4(LUOUT,VEC2,LBATCH(ISBATCH),
151     &                       IBATCH(1,I1BATCH(ISBATCH)),
152     &                       IOFFSET_LUOUT,IOFFSET_INT_LUOUT,
153     &                       LUOUTLIST,NUM_ACTIVE_BATCH)
154C
155C       keep track of correct offset
156        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
157        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
158C
159      END DO
160C     ^ loop over batches
161C
162C     communicate REDSCRVAR to get full RNORM
163C
164      CALL REDVEC_REL(REDSCRVAR,SCRRED,1,2,MPI_SUM,MPI_COMM_WORLD,-1)
165      CALL DCOPY(1,SCRRED,1,REDSCRVAR,1)
166C
167      RNORM = SQRT(REDSCRVAR)
168C
169      RNRM(ITER-1,IROOT) = RNORM
170C
171C     print norm and eigenvalue
172C
173      WRITE(LUWRT,'(A19,I8,1p,1E15.5,0p,3X,1F19.10)')
174     &     ' Iter RNORM EIGAPR ', ITER-1,RNORM,EIGAPR+EIGSHF
175      CALL FLSHFO(LUWRT)
176C
177C
178C     screening of new trial vector
179C
180      RNORM_FAC = RNORM
181C
182      IF (TRUNC_FAC .GT. 0.1D0) THEN
183          WRITE (LUWRT,*) 'TRUNC_FAC reset from ',TRUNC_FAC,' to',0.1D0
184          TRUNC_FAC = 0.1D0
185      END IF
186C
187C     check for convergence
188C
189      IF(RNORM .lt. TEST)then ! .OR. ( ITER .gt. 2 .and.
190!    &  ABS(EIG(ITER-2,IROOT)-EIG(ITER-1,IROOT)).LT.E_CONV)) THEN
191C
192        RTCNV(IROOT) = 0
193      ELSE
194C
195        RTCNV(IROOT) = iroot
196        CONVER       = .FALSE.
197      END IF
198C
199      END
200***********************************************************************
201*                                                                     *
202* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
203* parallelization by Stefan Knecht                                    *
204*                                                                     *
205***********************************************************************
206      SUBROUTINE P1_B_PAR_RL_LUCI2(VEC1,VEC2,SHIFT,IROOT,
207     &                             LUINLIST,LUOUT1LIST,LUOUT2LIST,
208     &                             LUOUT3LIST,
209     &                             NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH,
210     &                             MY_IOFF_LUIN,MY_IOFF_LUOUT1,
211     &                             MY_IOFF_LUOUT2,MY_IOFF_LUOUT3,
212     &                             MY_IOFF_LUDIA,
213     &                             LUIN,LUOUT1,LUOUT2,LUOUT3,LUDIA,INV)
214C
215C     Written by  S. Knecht         - March 13 2008
216C
217C**********************************************************************
218C
219C     part 1.2 of DAVIDSON-OLSEN algorithm in MPI-file I/O mode
220C
221C     NOTE: IROOT = IROOT
222C
223C     active blocks on the MPI-files are flagged by a nonzero list entry
224C
225C     Last revision:     S. Knecht       - June  2007
226C
227************************************************************************
228      IMPLICIT DOUBLE PRECISION ( A-H,O-Z)
229#include "maxorb.h"
230#include "infpar.h"
231#include "mpif.h"
232      integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE)
233#include "parluci.h"
234      DIMENSION VEC1(*), VEC2(*)
235      DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*)
236      DIMENSION LUINLIST(*), LUOUT1LIST(*), LUOUT2LIST(*)
237      DIMENSION LUOUT3LIST(*)
238      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN
239      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT1
240      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT2
241      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT3
242      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUDIA
243      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN, IOFFSET_LUOUT1
244      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT2
245      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT3
246      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUDIA
247      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH
248      INTEGER NUM_BLK
249C
250C     initialize scratch offsets
251      NUM_BLK            = 0
252      IOFFSET_SCRATCH    = 0
253      IOFFSET_IN_LUIN    = 0
254      IOFFSET_LUOUT1     = 0
255      IOFFSET_LUOUT2     = 0
256      IOFFSET_LUOUT3     = 0
257      IOFFSET_IN_LUDIA   = 0
258      IOFFSET_INT_IN     = 0
259      IOFFSET_INT_LUOUT1 = 0
260      IOFFSET_INT_LUOUT2 = 0
261      IOFFSET_INT_LUOUT3 = 0
262C
263      REDSCRVAR = 0.0D0
264C
265      DO ISBATCH = 1, NBATCH
266C
267C       offset for batch ISBATCH w.r.t JOFF
268C
269        CALL DZERO(VEC2,LEBATCH(ISBATCH))
270C
271C       set new offset
272C
273C       position in file is at the end of vector IROOT - 1
274C
275        IOFFSET_IN_LUIN  = MY_IOFF_LUIN + IOFFSET_SCRATCH +
276     &                   ( IROOT - 1 )   * MY_VEC1_IOFF
277        IOFFSET_INT_IN   = 1 + NUM_BLK  +
278     &                   ( IROOT - 1 )   * MY_ACT_BLK1
279C
280CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUIN',
281CSK  &                  IOFFSET_IN_LUIN
282C
283        CALL RDVEC_BATCH_DRV4(LUIN,VEC2,LBATCH(ISBATCH),
284     &                        IBATCH(1,I1BATCH(ISBATCH)),
285     &                        IOFFSET_IN_LUIN,IOFFSET_INT_IN,
286     &                        LUINLIST,NUM_ACTIVE_BATCH)
287C
288CSK     WRITE(LUWRT,*) 'initial VEC2 on LUIN'
289CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
290CSK  &                LUWRT)
291C
292C       calculate inverse diagonal on VEC1
293C
294        CALL DZERO(VEC1,LEBATCH(ISBATCH))
295C
296C       new offset for file containing diagonal
297C
298        IOFFSET_IN_LUDIA = MY_IOFF_LUDIA + IOFFSET_SCRATCH
299C
300CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUDIA',
301CSK  &                  IOFFSET_IN_LUDIA
302C
303C       read in batch ISBATCH from LUDIA to VEC1
304C
305        CALL RDVEC_BATCH_DRV5(LUDIA,VEC1,LBATCH(ISBATCH),
306     &                        IBATCH(1,I1BATCH(ISBATCH)),
307     &                        IOFFSET_IN_LUDIA)
308C
309CSK     CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
310CSK  &                LUWRT)
311C
312        IF( LEBATCH(ISBATCH) .gt. 0 )THEN
313          IF( CSCREEN) THEN
314C           set proper truncation factor
315            THR_TRUNC  = TRUNC_FAC * RNORM_FAC
316Csk         WRITE(LUWRT,*) 'TRUNCATION FACTOR:',THR_TRUNC
317Chj         14-jun-07:   disable THR_ETRUNC
318Chj         THR_ETRUNC = 1.0D-6 * THRES_E
319            THR_ETRUNC = -1.0D0
320            CALL DIAVC2_TRUNC(VEC1,VEC2,VEC1,SHIFT,LEBATCH(ISBATCH),
321     &                        THR_TRUNC,THR_ETRUNC)
322          ELSE
323            CALL DIAVC2(VEC1,VEC2,VEC1,SHIFT,LEBATCH(ISBATCH))
324          END IF
325        END IF
326C
327C       write VEC1 to LUOUT1 and VEC2 to LUOUT2
328C
329        IOFFSET_LUOUT1     = MY_IOFF_LUOUT1  + IOFFSET_SCRATCH
330        IOFFSET_LUOUT2     = MY_IOFF_LUOUT2  + IOFFSET_SCRATCH
331        IOFFSET_INT_LUOUT1 = 1 + NUM_BLK
332        IOFFSET_INT_LUOUT2 = 1 + NUM_BLK
333C
334C       VEC1
335        CALL WTVEC_BATCH_DRV4(LUOUT1,VEC1,LBATCH(ISBATCH),
336     &                       IBATCH(1,I1BATCH(ISBATCH)),
337     &                       IOFFSET_LUOUT1,IOFFSET_INT_LUOUT1,
338     &                       LUOUT1LIST,NUM_ACTIVE_BATCH)
339C       VEC2
340        CALL WTVEC_BATCH_DRV4(LUOUT2,VEC2,LBATCH(ISBATCH),
341     &                       IBATCH(1,I1BATCH(ISBATCH)),
342     &                       IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2,
343     &                       LUOUT2LIST,NUM_ACTIVE_BATCH)
344C
345C
346CSK     WRITE(LUWRT,*) 'THIS IS my partial REDSCRVAR',REDSCRVAR
347CSK     WRITE(LUWRT,*) 'THIS IS LEBATCH(ISBATCH)',LEBATCH(ISBATCH)
348C       calculate partial GAMMA
349        IF( LEBATCH(ISBATCH) .gt. 0 ) THEN
350          REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC2,1,VEC1,1)
351        END IF
352C
353C       keep track of correct offset
354        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
355        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
356C
357      END DO
358C
359C     communicate REDSCRVAR to get full GAMMA
360C
361      CALL REDVEC_REL(REDSCRVAR,GAMMA,1,2,MPI_SUM,MPI_COMM_WORLD,-1)
362C
363CSK   WRITE(LUWRT,*) 'THIS IS GAMMA',GAMMA
364C
365C     continue with VNORM ...
366C
367C     reset scratch offsets
368      NUM_BLK            = 0
369      IOFFSET_SCRATCH    = 0
370      REDSCRVAR = 0.0D0
371CSK   WRITE(LUWRT,*) 'THIS IS REDSCRVAR',REDSCRVAR
372C
373      DO ISBATCH = 1, NBATCH
374C
375        CALL DZERO(VEC1,LEBATCH(ISBATCH))
376        CALL DZERO(VEC2,LEBATCH(ISBATCH))
377C
378C       read VEC1 from LUOUT2 and VEC2 from LUOUT1
379C
380        IOFFSET_LUOUT1     = MY_IOFF_LUOUT1  + IOFFSET_SCRATCH
381        IOFFSET_LUOUT2     = MY_IOFF_LUOUT2  + IOFFSET_SCRATCH
382        IOFFSET_INT_LUOUT1 = 1 + NUM_BLK
383        IOFFSET_INT_LUOUT2 = 1 + NUM_BLK
384C
385C       VEC1
386        CALL RDVEC_BATCH_DRV4(LUOUT2,VEC1,LBATCH(ISBATCH),
387     &                        IBATCH(1,I1BATCH(ISBATCH)),
388     &                        IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2,
389     &                        LUOUT2LIST,NUM_ACTIVE_BATCH)
390C
391CSK     WRITE(LUWRT,*) 'initial VEC1 on LUOUT2 in P1..._2 again'
392CSK     CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
393CSK  &                LUWRT)
394C       VEC2
395        CALL RDVEC_BATCH_DRV4(LUOUT1,VEC2,LBATCH(ISBATCH),
396     &                        IBATCH(1,I1BATCH(ISBATCH)),
397     &                        IOFFSET_LUOUT1,IOFFSET_INT_LUOUT1,
398     &                        LUOUT1LIST,NUM_ACTIVE_BATCH)
399C
400CSK     WRITE(LUWRT,*) ' VEC2 before DAXPY call'
401CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
402CSK  &                LUWRT)
403CSK     WRITE(LUWRT,*) ' VEC1 before DAXPY call'
404CSK     CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
405CSK  &                LUWRT)
406C
407C       VEC2 == VEC2 + VEC1 * FACTOR
408C
409        CALL DAXPY(LEBATCH(ISBATCH),-GAMMA,VEC1,1,VEC2,1)
410C
411CSK     WRITE(LUWRT,*) ' VEC2 after DAXPY call'
412CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
413CSK  &                LUWRT)
414CSK     WRITE(LUWRT,*) ' VEC1 after DAXPY call'
415CSK     CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
416CSK  &                LUWRT)
417C
418C       calculate partial VNORM_Q
419C
420        REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC2,1,VEC2,1)
421C
422C       keep track of correct offset
423        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
424        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
425C
426      END DO
427C
428      VNORM_Q = 0.0D0
429      VNORM   = 0.0D0
430C
431C     communicate REDSCRVAR to get full VNORM_Q
432C
433      CALL REDVEC_REL(REDSCRVAR,VNORM_Q,1,2,MPI_SUM,MPI_COMM_WORLD,-1)
434C
435C     is X an eigen vector for (H0 - 1 ) - 1 ???
436C
437      VNORM = SQRT(VNORM_Q)
438C
439csk   WRITE(LUWRT,*) 'GAMMA ',GAMMA
440csk   WRITE(LUWRT,*) 'VNORM ',VNORM
441C
442      IF( VNORM .GT. 1.0D-7 ) THEN
443        IOLSAC = 1
444      ELSE
445        IOLSAC = 0
446      END IF
447      IF(IOLSAC .EQ. 1 ) THEN
448C
449CSK     WRITE(LUWRT,*) ' Olsen correction active'
450        DELTA = 0.0D0
451C
452C       continue with DELTA ...
453C
454C       reset scratch offsets
455        NUM_BLK            = 0
456        IOFFSET_SCRATCH    = 0
457        REDSCRVAR = 0.0D0
458C
459        DO ISBATCH = 1, NBATCH
460C
461          CALL DZERO(VEC1,LEBATCH(ISBATCH))
462          CALL DZERO(VEC2,LEBATCH(ISBATCH))
463C
464C         read VEC1 from LUOUT2 and VEC2 from LUOUT3
465C
466          IOFFSET_LUOUT3     = MY_IOFF_LUOUT3  + IOFFSET_SCRATCH
467          IOFFSET_LUOUT2     = MY_IOFF_LUOUT2  + IOFFSET_SCRATCH
468          IOFFSET_INT_LUOUT3 = 1 + NUM_BLK
469          IOFFSET_INT_LUOUT2 = 1 + NUM_BLK
470C
471C         VEC1
472          CALL RDVEC_BATCH_DRV4(LUOUT2,VEC1,LBATCH(ISBATCH),
473     &                          IBATCH(1,I1BATCH(ISBATCH)),
474     &                          IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2,
475     &                          LUOUT2LIST,NUM_ACTIVE_BATCH)
476C
477CSK       WRITE(LUWRT,*) 'initial VEC1 on LUOUT2'
478CSK       CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
479CSK  &                  LUWRT)
480C         VEC2
481          CALL RDVEC_BATCH_DRV4(LUOUT3,VEC2,LBATCH(ISBATCH),
482     &                          IBATCH(1,I1BATCH(ISBATCH)),
483     &                          IOFFSET_LUOUT3,IOFFSET_INT_LUOUT3,
484     &                          LUOUT3LIST,NUM_ACTIVE_BATCH)
485C
486CSK       WRITE(LUWRT,*) 'initial VEC2 on LUOUT3'
487CSK       CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
488CSK  &                  LUWRT)
489C
490C         calculate partial DELTA
491C
492          REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1)
493C
494C         keep track of correct offset
495          IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
496          NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
497C
498        END DO
499C
500C
501C       communicate REDSCRVAR to get full DELTA
502C
503        CALL REDVEC_REL(REDSCRVAR,DELTA,1,2,MPI_SUM,MPI_COMM_WORLD,-1)
504C
505C
506CSK     WRITE(LUWRT,*) ' THIS IS DELTA'
507C
508        FACTOR = - DELTA / GAMMA
509csk     WRITE(LUWRT,*) 'FACTOR, DELTA, GAMMA',FACTOR, DELTA, GAMMA
510C
511C       reset scratch offsets
512        NUM_BLK            = 0
513        IOFFSET_SCRATCH    = 0
514C
515        DO ISBATCH = 1, NBATCH
516C
517          CALL DZERO(VEC1,LEBATCH(ISBATCH))
518          CALL DZERO(VEC2,LEBATCH(ISBATCH))
519C
520C         read VEC1 from LUOUT1 and VEC2 from LUOUT3
521C
522          IOFFSET_LUOUT3     = MY_IOFF_LUOUT3  + IOFFSET_SCRATCH
523          IOFFSET_LUOUT1     = MY_IOFF_LUOUT1  + IOFFSET_SCRATCH
524          IOFFSET_INT_LUOUT3 = 1 + NUM_BLK
525          IOFFSET_INT_LUOUT1 = 1 + NUM_BLK
526C
527C         VEC1
528          CALL RDVEC_BATCH_DRV4(LUOUT1,VEC1,LBATCH(ISBATCH),
529     &                          IBATCH(1,I1BATCH(ISBATCH)),
530     &                          IOFFSET_LUOUT1,IOFFSET_INT_LUOUT1,
531     &                          LUOUT1LIST,NUM_ACTIVE_BATCH)
532C
533CSK       WRITE(LUWRT,*) 'initial VEC1 on LUOUT1'
534CSK       CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
535CSK  &                  LUWRT)
536C         VEC2
537          CALL RDVEC_BATCH_DRV4(LUOUT3,VEC2,LBATCH(ISBATCH),
538     &                          IBATCH(1,I1BATCH(ISBATCH)),
539     &                          IOFFSET_LUOUT3,IOFFSET_INT_LUOUT3,
540     &                          LUOUT3LIST,NUM_ACTIVE_BATCH)
541C
542CSK       WRITE(LUWRT,*) 'initial VEC2 on LUOUT3'
543CSK       CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
544CSK  &                  LUWRT)
545C
546C         VEC2 == VEC2 + VEC1 * FACTOR
547C
548          CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1)
549C
550C         write VEC2 on LUOUT3
551C
552          IOFFSET_LUOUT3     = MY_IOFF_LUOUT3  + IOFFSET_SCRATCH
553          IOFFSET_INT_LUOUT3 = 1 + NUM_BLK
554C
555CSK       WRITE(LUWRT,*) 'final VEC2 to write on LUOUT3'
556CSK       CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
557CSK  &                  LUWRT)
558C
559          CALL WTVEC_BATCH_DRV4(LUOUT3,VEC2,LBATCH(ISBATCH),
560     &                         IBATCH(1,I1BATCH(ISBATCH)),
561     &                         IOFFSET_LUOUT3,IOFFSET_INT_LUOUT3,
562     &                         LUOUT3LIST,NUM_ACTIVE_BATCH)
563C
564C         keep track of correct offset
565          IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
566          NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
567C
568        END DO
569C
570      END IF
571C     ^ IOLSAC ?
572C
573      END
574***********************************************************************
575*                                                                     *
576* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
577* parallelization by Stefan Knecht                                    *
578*                                                                     *
579***********************************************************************
580      SUBROUTINE P1_B_PAR_RL_LUCI3(VEC1,VEC2,SUBSPH,
581     &                             LUIN1LIST,LUIN2LIST,LUOUTLIST,
582     &                             LUOUT2LIST,
583     &                             NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH,
584     &                             MY_IOFF_LUIN1,MY_IOFF_LUIN2,
585     &                             MY_IOFF_LUOUT,MY_IOFF_LUOUT2,
586     &                             SCRRED,NVEC,IADD,
587     &                             LUIN1,LUIN2,LUOUT,LUOUT2)
588C
589C     Written by  S. Knecht         - March 13 2008
590C
591C**********************************************************************
592C
593C     calculating dot product between two vectors on file LUIN1 resp.
594C     LUIN2
595C
596C     NOTE: NVEC = NVEC
597C           IADD = IADD
598C
599C     active blocks on the MPI-files are flagged by a nonzero list entry
600C
601C     Last revision:     S. Knecht       - June  2007
602C
603************************************************************************
604      IMPLICIT DOUBLE PRECISION ( A-H,O-Z)
605#include "maxorb.h"
606#include "infpar.h"
607#include "mpif.h"
608      integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE)
609#include "parluci.h"
610      DIMENSION VEC1(*), VEC2(*), SUBSPH(*)
611      DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*)
612      DIMENSION LUIN1LIST(*), LUIN2LIST(*), LUOUTLIST(*)
613      DIMENSION SCRRED(*), LUOUT2LIST(*)
614      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN1
615      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2
616      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT
617      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT2
618      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN1, IOFFSET_IN_LUIN2
619      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT, IOFFSET_LUOUT2
620      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH
621      INTEGER NUM_BLK, IROUND, I_ZERO, I_REMOVED
622      real(8), parameter :: THR_CVEC  =  1.0d-03
623!     real(8), parameter :: THR_CVEC  = -1.0d-03
624      LOGICAL STORE_F
625
626!     print *, 'THR_CVEC ==>', THR_CVEC
627
628      IROUND    = 0
629 10   CONTINUE
630      IROUND    = IROUND + 1
631      I_ZERO    = 0
632      I_REMOVED = 0
633C
634C     initialize scratch offsets
635      NUM_BLK           = 0
636      IOFFSET_SCRATCH   = 0
637      IOFFSET_IN_LUIN1  = 0
638      IOFFSET_IN_LUIN2  = 0
639      IOFFSET_LUOUT     = 0
640      IOFFSET_LUOUT2    = 0
641      IOFFSET_INT_IN1   = 0
642      IOFFSET_INT_IN2   = 0
643      IOFFSET_INT_LUOUT = 0
644      IOFFSET_INT_LUOUT2= 0
645      STORE_F = .FALSE.
646      IF( NVEC + IADD - 1 - NROOT_INFO .gt. 0 ) STORE_F = .TRUE.
647C
648      REDSCRVAR = 0.0D0
649      CALL DZERO(SCRRED,NVEC+IADD)
650      CALL DZERO(SUBSPH,NVEC+IADD)
651CSK   WRITE(LUWRT,*) ' NVEC + IADD - 1',  NVEC + IADD - 1
652CSK   WRITE(LUWRT,*) ' LUIN1,LUIN2,LUOUT,LUOUT2',
653CSK  &                 LUIN1,LUIN2,LUOUT,LUOUT2
654C
655      DO ISBATCH = 1, NBATCH
656C
657C       offset for batch ISBATCH w.r.t JOFF
658C
659        CALL DZERO(VEC2,LEBATCH(ISBATCH))
660C
661C       set new offset
662C
663C       position in file is at the end of vector IVEC - 1
664C
665        IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH
666        IOFFSET_INT_IN2  = 1 + NUM_BLK
667C
668C
669CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUIN2 in P1..._3 100',
670CSK  &                  IOFFSET_IN_LUIN2
671C
672        CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH),
673     &                        IBATCH(1,I1BATCH(ISBATCH)),
674     &                        IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
675     &                        LUIN2LIST,NUM_ACTIVE_BATCH)
676C
677CSK     WRITE(LUWRT,*) 'initial VEC2 on LUIN2 in P1..._3 100'
678CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
679CSK  &                LUWRT)
680
681C
682        DO 100 IVEC = 1, NROOT_INFO
683C
684          CALL DZERO(VEC1,LEBATCH(ISBATCH))
685C
686C         set new offset
687C
688C         position in file is at the end of vector IVEC - 1
689C
690          IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH +
691     &                     ( IVEC - 1 )  * MY_VEC1_IOFF
692          IOFFSET_INT_IN1  = 1 + NUM_BLK   +
693     &                     ( IVEC - 1 )  * MY_ACT_BLK1
694C
695CSK       WRITE(LUWRT,*) 'This is my OFFSET for LUIN1 in P1..._3 100',
696CSK  &                   IOFFSET_IN_LUIN1
697CSK       WRITE(LUWRT,*) 'This is my INT_OFFSET for LUIN1 in
698CSK  &                    P1..._3 100',IOFFSET_INT_IN1
699CSK       WRITE(LUWRT,*) 'THIS IS MY LU1LIST inside P1_B_PAR_RL_3 100'
700CSK       CALL IWRTMAMN(LUIN1LIST,1,IALL_LU1,1,IALL_LU1,LUWRT)
701C
702          CALL RDVEC_BATCH_DRV4(LUIN1,VEC1,LBATCH(ISBATCH),
703     &                         IBATCH(1,I1BATCH(ISBATCH)),
704     &                         IOFFSET_IN_LUIN1,IOFFSET_INT_IN1,
705     &                         LUIN1LIST,NUM_ACTIVE_BATCH)
706C
707CSK       WRITE(LUWRT,*) 'initial VEC1 on LUIN1 in P1..._3 100'
708CSK       CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
709CSK  &                  LUWRT)
710C
711          SCRRED(IVEC) = SCRRED(IVEC) -
712     &                   DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1)
713C
714  100   CONTINUE
715C
716C       keep track of correct offset
717        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
718        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
719C
720      END DO
721C
722C     communicate SCRRED to get full OVERLAP matrix
723C
724      CALL REDVEC_REL(SCRRED,SUBSPH,NROOT_INFO,2,MPI_SUM,
725     &                MPI_COMM_WORLD,-1)
726C
727C
728C
729CSK   WRITE(LUWRT,*) ' THIS IS MY SUBSPH in P1..._3'
730CSK   CALL WRTMATMN(SUBSPH,1,NVEC+IADD-1,1,NVEC+IADD-1,LUWRT)
731C
732C     zero scratch offsets
733C
734      NUM_BLK           = 0
735      IOFFSET_SCRATCH   = 0
736      REDSCRVAR         = 0.0D0
737C
738C
739CSK   WRITE(LUWRT,*) ' THIS IS MY LUIN2LIST in P1..._3'
740CSK   CALL IWRTMAMN(LUIN2LIST,1,IALL_LU3,1,IALL_LU3,LUWRT)
741C
742      DO ISBATCH = 1, NBATCH
743C
744C       offset for batch ISBATCH w.r.t JOFF
745C
746        CALL DZERO(VEC2,LEBATCH(ISBATCH))
747C
748C       set new offset
749C
750C       position in file is at the end of vector IVEC - 1
751C
752        IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH
753        IOFFSET_INT_IN2  = 1 + NUM_BLK
754C
755C
756CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUIN2',
757CSK  &                  IOFFSET_IN_LUIN2
758C
759        CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH),
760     &                        IBATCH(1,I1BATCH(ISBATCH)),
761     &                        IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
762     &                        LUIN2LIST,NUM_ACTIVE_BATCH)
763C
764CSK     WRITE(LUWRT,*) 'initial VEC2 on LUIN2'
765CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
766CSK  &                LUWRT)
767
768C
769        DO 200 IVEC = 1, NROOT_INFO
770C
771          CALL DZERO(VEC1,LEBATCH(ISBATCH))
772C
773C         set new offset
774C
775C         position in file is at the end of vector IVEC - 1
776C
777          IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH +
778     &                     ( IVEC - 1 )  * MY_VEC1_IOFF
779          IOFFSET_INT_IN1  = 1 + NUM_BLK   +
780     &                     ( IVEC - 1 )  * MY_ACT_BLK1
781C
782CSK       WRITE(LUWRT,*) 'This is my OFFSET for LUIN1 in P1..._3 200',
783CSK  &                   IOFFSET_IN_LUIN1
784C
785          CALL RDVEC_BATCH_DRV4(LUIN1,VEC1,LBATCH(ISBATCH),
786     &                         IBATCH(1,I1BATCH(ISBATCH)),
787     &                         IOFFSET_IN_LUIN1,IOFFSET_INT_IN1,
788     &                         LUIN1LIST,NUM_ACTIVE_BATCH)
789C
790CSK       WRITE(LUWRT,*) 'initial VEC1 on LUIN1 in P1..._3 200'
791CSK       CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
792CSK  &                  LUWRT)
793C
794          FACTOR = SUBSPH(IVEC)
795C
796C         VEC2 == VEC2 + VEC1 * FACTOR
797C
798          CALL DAXPY(LEBATCH(ISBATCH), FACTOR, VEC1, 1, VEC2, 1)
799C
800  200   CONTINUE
801C
802CSK     WRITE(LUWRT,*) 'final VEC2 to write on LUOUT2 '
803CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
804CSK  &                LUWRT)
805        IF( STORE_F )THEN
806C
807C         new offset for writing on LUOUT2 --> ILU5
808C
809          IOFFSET_LUOUT2     = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH
810          IOFFSET_INT_LUOUT2 = 1 + NUM_BLK
811C
812          CALL WTVEC_BATCH_DRV4(LUOUT2,VEC2,LBATCH(ISBATCH),
813     &                          IBATCH(1,I1BATCH(ISBATCH)),
814     &                          IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2,
815     &                          LUOUT2LIST,NUM_ACTIVE_BATCH)
816        ELSE
817C
818          REDSCRVAR = REDSCRVAR
819     &              + DDOT( LEBATCH(ISBATCH), VEC2, 1, VEC2, 1)
820C
821C         new offset for writing on LUIN2
822C
823          IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH
824          IOFFSET_INT_IN2  = 1 + NUM_BLK
825C
826          CALL WTVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH),
827     &                          IBATCH(1,I1BATCH(ISBATCH)),
828     &                          IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
829     &                          LUIN2LIST,NUM_ACTIVE_BATCH)
830        END IF
831C
832C       keep track of correct offset
833        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
834        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
835C
836      END DO
837C
838      IF( STORE_F )THEN
839C
840C        zero scratch offsets
841C
842         NUM_BLK           = 0
843         IOFFSET_SCRATCH   = 0
844         CALL DZERO(SCRRED,NVEC+IADD)
845         CALL DZERO(SUBSPH,NVEC+IADD)
846C
847         DO ISBATCH = 1, NBATCH
848C
849C          offset for batch ISBATCH w.r.t JOFF
850C
851           CALL DZERO(VEC2,LEBATCH(ISBATCH))
852C
853C          set new offset
854C
855C          position in file is at the end of vector IVEC - 1
856C
857           IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH
858           IOFFSET_INT_IN2  = 1 + NUM_BLK
859C
860C
861CSK        WRITE(LUWRT,*) 'This is my OFFSET for LUIN2 in P1..._3 100',
862CSK  &                     IOFFSET_IN_LUIN2
863C
864           CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH),
865     &                           IBATCH(1,I1BATCH(ISBATCH)),
866     &                           IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
867     &                           LUIN2LIST,NUM_ACTIVE_BATCH)
868C
869CSK        WRITE(LUWRT,*) 'initial VEC2 on LUIN2 in P1..._3 100'
870CSK        CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
871CSK  &                   LUWRT)
872
873C
874           DO 300 IVEC = 1, (NVEC + IADD - 1 -NROOT_INFO)
875C
876             CALL DZERO(VEC1,LEBATCH(ISBATCH))
877C
878C            set new offset
879C
880C            position in file is at the end of vector IVEC - 1
881C
882             IOFFSET_LUOUT     = MY_IOFF_LUOUT + IOFFSET_SCRATCH +
883     &                         ( IVEC - 1 )  * MY_VEC1_IOFF
884             IOFFSET_INT_LUOUT = 1 + NUM_BLK   +
885     &                         ( IVEC - 1 )  * MY_ACT_BLK1
886C
887CSK          WRITE(LUWRT,*) 'This is my OFFSET for LUOUT in P1..._3 100',
888CSK  &                       IOFFSET_IN_LUOUT
889CSK          WRITE(LUWRT,*) 'This is my INT_OFFSET for LUOUT in
890CSK  &                       P1..._3 100',IOFFSET_INT_LUOUT
891C
892             CALL RDVEC_BATCH_DRV4(LUOUT,VEC1,LBATCH(ISBATCH),
893     &                             IBATCH(1,I1BATCH(ISBATCH)),
894     &                             IOFFSET_LUOUT,IOFFSET_INT_LUOUT,
895     &                             LUOUTLIST,NUM_ACTIVE_BATCH)
896C
897CSK          WRITE(LUWRT,*) 'initial VEC1 on LUOUT in P1..._3 100'
898CSK          CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
899CSK  &                     LUWRT)
900C
901             SCRRED(IVEC) = SCRRED(IVEC) -
902     &                      DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1)
903C
904  300     CONTINUE
905C
906C         keep track of correct offset
907          IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
908          NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
909C
910        END DO
911C
912C       communicate SCRRED to get full OVERLAP matrix
913C
914        CALL REDVEC_REL(SCRRED,SUBSPH,NVEC+IADD-1-NROOT_INFO,2,MPI_SUM,
915     &                  MPI_COMM_WORLD,-1)
916C
917C       zero scratch offsets
918C
919        NUM_BLK           = 0
920        IOFFSET_SCRATCH   = 0
921        REDSCRVAR         = 0.0D0
922C
923        DO ISBATCH = 1, NBATCH
924C
925C         offset for batch ISBATCH w.r.t JOFF
926C
927          CALL DZERO(VEC2,LEBATCH(ISBATCH))
928C
929C         set new offset
930C
931C         position in file is at the end of vector IVEC - 1
932C
933          IOFFSET_LUOUT2     = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH
934          IOFFSET_INT_LUOUT2 = 1 + NUM_BLK
935C
936CSK       WRITE(LUWRT,*) 'This is my OFFSET for LUOUT2',
937CSK  &                    IOFFSET_IN_LUOUT2
938C
939          CALL RDVEC_BATCH_DRV4(LUOUT2,VEC2,LBATCH(ISBATCH),
940     &                          IBATCH(1,I1BATCH(ISBATCH)),
941     &                          IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2,
942     &                          LUOUT2LIST,NUM_ACTIVE_BATCH)
943C
944CSK       WRITE(LUWRT,*) 'initial VEC2 on LUOUT2'
945CSK       CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
946CSK  &                  LUWRT)
947C
948          DO 400 IVEC = 1, (NVEC+IADD-1-NROOT_INFO)
949C
950            CALL DZERO(VEC1,LEBATCH(ISBATCH))
951C
952C           set new offset
953C
954C           position in file is at the end of vector IVEC - 1
955C
956            IOFFSET_LUOUT     = MY_IOFF_LUOUT + IOFFSET_SCRATCH +
957     &                        ( IVEC - 1 )  * MY_VEC1_IOFF
958            IOFFSET_INT_LUOUT = 1 + NUM_BLK   +
959     &                        ( IVEC - 1 )  * MY_ACT_BLK1
960C
961CSK         WRITE(LUWRT,*) 'This is my OFFSET for LUOUT in P1..._3 100',
962CSK  &                      IOFFSET_IN_LUOUT
963CSK         WRITE(LUWRT,*) 'This is my INT_OFFSET for LUOUT in
964CSK  &                      P1..._3 100',IOFFSET_INT_LUOUT
965C
966            CALL RDVEC_BATCH_DRV4(LUOUT,VEC1,LBATCH(ISBATCH),
967     &                            IBATCH(1,I1BATCH(ISBATCH)),
968     &                            IOFFSET_LUOUT,IOFFSET_INT_LUOUT,
969     &                            LUOUTLIST,NUM_ACTIVE_BATCH)
970CSK         WRITE(LUWRT,*) 'initial VEC1 on LUIN1 in P1..._3 200'
971CSK         CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
972CSK  &                    LUWRT)
973C
974            FACTOR = SUBSPH(IVEC)
975C
976C           VEC2 == VEC2 + VEC1 * FACTOR
977C
978            CALL DAXPY(LEBATCH(ISBATCH), FACTOR, VEC1, 1, VEC2, 1)
979C
980  400     CONTINUE
981C
982CSK       WRITE(LUWRT,*) 'final VEC2 to write on LUIN2 '
983CSK       CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
984CSK  &                  LUWRT)
985          REDSCRVAR = REDSCRVAR
986     &              + DDOT( LEBATCH(ISBATCH), VEC2, 1, VEC2, 1)
987C
988C         new offset for writing on LUIN2
989C
990          IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH
991          IOFFSET_INT_IN2  = 1 + NUM_BLK
992C
993          CALL WTVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH),
994     &                          IBATCH(1,I1BATCH(ISBATCH)),
995     &                          IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
996     &                          LUIN2LIST,NUM_ACTIVE_BATCH)
997C
998C         keep track of correct offset
999          IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
1000          NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
1001C
1002        END DO
1003C
1004      END IF
1005C     ^ NVEC + IADD - 1 - NROOT > 0 ( STORE_F == .TRUE. )
1006C
1007      SCALEVEC = 0.0D0
1008C
1009C     communicate REDSCRVAR to get full scale factor
1010C
1011      CALL REDVEC_REL(REDSCRVAR,SCALEVEC,1,2,MPI_SUM,MPI_COMM_WORLD,-1)
1012C
1013C     1.4 normalizing the new vector
1014C
1015C     zero scratch offsets
1016C
1017      NUM_BLK           = 0
1018      IOFFSET_SCRATCH   = 0
1019C
1020C
1021      FACTOR = 1.0D0 / SQRT( SCALEVEC )
1022csk   WRITE(LUWRT,*) 'THIS IS MY SCALING FACTOR',FACTOR
1023C
1024      DO ISBATCH = 1, NBATCH
1025C
1026C       offset for batch ISBATCH w.r.t JOFF
1027C
1028        CALL DZERO(VEC2,LEBATCH(ISBATCH))
1029C
1030C       set new offset
1031C
1032C       position in file is at the end of vector IVEC - 1
1033C
1034        IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH
1035        IOFFSET_INT_IN2  = 1 + NUM_BLK
1036C
1037C
1038CSK     WRITE(LUWRT,*) 'This is my OFFSET for LUIN2',
1039CSK  &                  IOFFSET_IN_LUIN2
1040C
1041        CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH),
1042     &                        IBATCH(1,I1BATCH(ISBATCH)),
1043     &                        IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
1044     &                        LUIN2LIST,NUM_ACTIVE_BATCH)
1045C
1046CSK     WRITE(LUWRT,*) 'initial VEC2 on LUIN2'
1047CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1048CSK  &                LUWRT)
1049
1050C
1051        CALL DSCAL(LEBATCH(ISBATCH),FACTOR,VEC2,1)
1052
1053        IF(THR_CVEC .GT. 0.0D0 .AND. IROUND .EQ. 1) THEN
1054          DO I = 1,LEBATCH(ISBATCH)
1055            IF(ABS(VEC2(I)) .LT. THR_CVEC) THEN
1056              IF(VEC2(I) .EQ. 0.0D0) THEN
1057                I_ZERO = I_ZERO + 1
1058              ELSE
1059                VEC2(I) = 0.0D0
1060                I_REMOVED = I_REMOVED + 1
1061              END IF
1062            END IF
1063          END DO
1064        END IF
1065C
1066C       set new offset
1067C
1068C       position in file is at the end of vector NVEC + IADD - 1 - NROOT
1069C
1070        IOFFSET_LUOUT  = MY_IOFF_LUOUT + IOFFSET_SCRATCH +
1071     &                 ( NVEC + IADD - 1 - NROOT_INFO ) * MY_VEC1_IOFF
1072C
1073        IOFFSET_INT_LUOUT = 1 + NUM_BLK +
1074     &                 ( NVEC + IADD - 1 - NROOT_INFO ) * MY_ACT_BLK1
1075C
1076csk     WRITE(LUWRT,*) 'This is my OFFSET for LUOUT, IOFFSET_INT_LUOUT',
1077csk  &                  IOFFSET_LUOUT, IOFFSET_INT_LUOUT
1078C
1079csk     WRITE(LUWRT,*) 'absolute final new vec on VEC2 to LUOUT'
1080csk     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1081csk  &                LUWRT)
1082C
1083        IDEBUGPRNT = 0
1084C
1085        CALL WTVEC_BATCH_DRV4SP(LUOUT,VEC2,LBATCH(ISBATCH),
1086     &                          IBATCH(1,I1BATCH(ISBATCH)),
1087     &                          IOFFSET_LUOUT,IOFFSET_INT_LUOUT,
1088     &                          LUOUTLIST,NUM_ACTIVE_BATCH,IDEBUGPRNT)
1089C
1090C
1091C       keep track of correct offset
1092        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
1093        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
1094C
1095      END DO
1096      ! need to do an MPI_ALLREDUCE to check whether any slave has removed an element in the trial vector
1097      IF(THR_CVEC .GT. 0.0D0 .AND. IROUND .EQ. 1) THEN
1098         I_REMOVED_total = 0
1099         I_ZERO_total    = 0
1100         CAll redvec_rel(I_REMOVED,I_REMOVED_total,1,1,
1101     &               MPI_SUM,MPI_COMM_WORLD,-1)
1102         CAll redvec_rel(I_ZERO,I_ZERO_total,1,1,
1103     &                MPI_SUM,MPI_COMM_WORLD,-1)
1104         IF(I_REMOVED_total .GT. 0) THEN
1105!#define LUCI_DEBUG
1106#ifdef LUCI_DEBUG
1107            WRITE(luwrt,'(/A,I12,A,I4,A,1P,D10.2,I14)')
1108     &       'info: Removed',I_REMOVED_total,
1109     &       ' elements in new CI trial vector no.',IADD,
1110     &       '; threshold & zeroes',THR_CVEC, I_ZERO_total
1111#undef LUCI_DEBUG
1112#endif
1113            GO TO 10
1114         END IF
1115      END IF
1116C
1117      END
1118***********************************************************************
1119*                                                                     *
1120* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
1121* parallelization by Stefan Knecht                                    *
1122*                                                                     *
1123***********************************************************************
1124      SUBROUTINE INPROD_B_PAR_RL_LUCI2(LUIN1,LUIN2,LUIN3,VEC1,VEC2,
1125     &                                 SUBSPH,NBATCH,
1126     &                                 LBATCH,LEBATCH,I1BATCH,IBATCH,
1127     &                                 MY_IOFF_LUIN1,MY_IOFF_LUIN2,
1128     &                                 MY_IOFF_LUIN3,LUIN1LIST,
1129     &                                 LUIN2LIST,LUIN3LIST,IVEC,
1130     &                                 NVEC,IMUSTRED,ISTRED)
1131C
1132C     Written by  S. Knecht         - March 14 2008
1133C
1134C**********************************************************************
1135C
1136C     calculating dot product between two vectors on file LUIN1 resp.
1137C     LUIN2
1138C
1139C     NOTE: IVEC = IVEC
1140C           NVEC = NVEC
1141C
1142C     active blocks on the MPI-files are flagged by a nonzero length
1143C
1144C     Last revision:     S. Knecht       - June  2007
1145C
1146************************************************************************
1147      IMPLICIT DOUBLE PRECISION ( A-H,O-Z)
1148#include "maxorb.h"
1149#include "infpar.h"
1150#include "mpif.h"
1151      integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE)
1152#include "parluci.h"
1153      DIMENSION VEC1(*), VEC2(*), SUBSPH(*)
1154      DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*)
1155      DIMENSION LUIN1LIST(*), LUIN2LIST(*), LUIN3LIST(*)
1156      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN1
1157      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2
1158      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN3
1159      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN1, IOFFSET_IN_LUIN2
1160      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN3
1161      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH
1162      INTEGER NUM_BLK
1163C
1164C     initialize scratch offsets
1165      NUM_BLK           = 0
1166      IOFFSET_SCRATCH   = 0
1167      IOFFSET_IN_LUIN1  = 0
1168      IOFFSET_IN_LUIN2  = 0
1169      IOFFSET_IN_LUIN3  = 0
1170      IOFFSET_INT_IN1   = 0
1171      IOFFSET_INT_IN2   = 0
1172      IOFFSET_INT_IN3   = 0
1173C
1174      DO ISBATCH = 1, NBATCH
1175C
1176C       offset for batch ISBATCH w.r.t JOFF
1177C
1178        CALL DZERO(VEC2,LEBATCH(ISBATCH))
1179C
1180C       set new offset
1181C       position in file is at the end of vector JOFF - 1
1182C
1183        IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH +
1184     &                     ( JVEC_SF ) * MY_VEC1_IOFF
1185        IOFFSET_INT_IN1  = 1 + NUM_BLK  +
1186     &                     ( JVEC_SF ) * MY_ACT_BLK1
1187C
1188csk     WRITE(LUWRT,*) 'This is my OFFSET for LUIN1',
1189csk  &                  IOFFSET_IN_LUIN1
1190C
1191        CALL RDVEC_BATCH_DRV4(LUIN1,VEC2,LBATCH(ISBATCH),
1192     &                       IBATCH(1,I1BATCH(ISBATCH)),
1193     &                       IOFFSET_IN_LUIN1,IOFFSET_INT_IN1,
1194     &                       LUIN1LIST,NUM_ACTIVE_BATCH)
1195C
1196csk     WRITE(LUWRT,*) 'initial VEC2 on LUIN1'
1197csk     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),LUWRT)
1198C
1199        DO 100 JVEC = 1, NROOT_INFO
1200C
1201C          set new offset and zero read-in vector
1202C
1203           CALL DZERO(VEC1,LEBATCH(ISBATCH))
1204C
1205           IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH +
1206     &                        ( JVEC - 1 )  * MY_VEC1_IOFF
1207           IOFFSET_INT_IN2  = 1 + NUM_BLK  +
1208     &                        ( JVEC - 1 ) * MY_ACT_BLK1
1209C
1210CSK        WRITE(LUWRT,*) 'This is my OFFSET for LUIN2',
1211CSK     &                  IOFFSET_IN_LUIN2
1212C
1213C
1214C          read in batch ISBATCH from LUIN1 to VEC1
1215C
1216           CALL RDVEC_BATCH_DRV4(LUIN2,VEC1,LBATCH(ISBATCH),
1217     &                           IBATCH(1,I1BATCH(ISBATCH)),
1218     &                           IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
1219     &                           LUIN2LIST,NUM_ACTIVE_BATCH)
1220C
1221csk        WRITE(LUWRT,*) 'initial VEC1 on LUIN2'
1222csk        CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,
1223csk  &                   LEBATCH(ISBATCH),LUWRT)
1224C
1225C
1226C          IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC
1227C          SUBSPH(IJ) == VEC1 * VEC2
1228C
1229           IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC
1230csk        WRITE(LUWRT,*) ' IJ in loop 1', IJ
1231C
1232           SUBSPH(IJ) = SUBSPH(IJ) +
1233     &                  DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1)
1234C
1235C          keep track of memory offset and the 'reduction' counter
1236C
1237           IF( ISBATCH .eq. 1 ) THEN
1238             IMUSTRED = IMUSTRED + 1
1239             IF( IVEC .eq. 1 .and. JVEC .eq. 1 ) ISTRED = IJ
1240           END IF
1241C
1242C
1243  100   CONTINUE
1244C
1245        JJVEC = 0
1246C
1247        DO 200 JVEC = NROOT_INFO+1 , NVEC+IVEC
1248C
1249C          set new offset and zero read-in vector
1250C
1251           JJVEC = JJVEC + 1
1252C
1253           CALL DZERO(VEC1,LEBATCH(ISBATCH))
1254C
1255           IOFFSET_IN_LUIN3 = MY_IOFF_LUIN3 + IOFFSET_SCRATCH +
1256     &                        ( JJVEC - 1 )  * MY_VEC1_IOFF
1257           IOFFSET_INT_IN3  = 1 + NUM_BLK  +
1258     &                        ( JJVEC - 1 )  * MY_ACT_BLK1
1259C
1260csk        WRITE(LUWRT,*) 'This is my OFFSET for LUIN3',
1261csk  &                     IOFFSET_IN_LUIN3, IOFFSET_INT_IN3
1262C
1263C
1264C          read in batch ISBATCH from LUIN3 to VEC1
1265C
1266           CALL RDVEC_BATCH_DRV4(LUIN3,VEC1,LBATCH(ISBATCH),
1267     &                           IBATCH(1,I1BATCH(ISBATCH)),
1268     &                           IOFFSET_IN_LUIN3,IOFFSET_INT_IN3,
1269     &                           LUIN3LIST,NUM_ACTIVE_BATCH)
1270C
1271csk        WRITE(LUWRT,*) 'initial VEC1 on LUIN3'
1272csk        CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,
1273csk  &                   LEBATCH(ISBATCH),LUWRT)
1274C
1275C
1276C          IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC
1277C          SUBSPH(IJ) == VEC1 * VEC2
1278C
1279           IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC
1280csk        WRITE(LUWRT,*) ' IJ in loop 2', IJ
1281C
1282           SUBSPH(IJ) = SUBSPH(IJ) +
1283     &                  DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1)
1284C
1285C          keep track of the 'reduction' counter
1286C
1287           IF( ISBATCH .eq. 1 ) THEN
1288             IMUSTRED = IMUSTRED + 1
1289           END IF
1290C
1291C
1292  200   CONTINUE
1293C
1294C       keep track of correct offset
1295        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
1296        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
1297C
1298      END DO
1299C
1300      END
1301***********************************************************************
1302*                                                                     *
1303* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
1304* parallelization by Stefan Knecht                                    *
1305*                                                                     *
1306***********************************************************************
1307      SUBROUTINE P3_B_PAR_RL_LUCI1(VEC1,VEC2,SUBSPH,
1308     &                             LUINLIST,LUIN2LIST,LUOUTLIST,
1309     &                             NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH,
1310     &                             MY_IOFF_LUIN,MY_IOFF_LUIN2,
1311     &                             MY_IOFF_LUOUT,NVEC,NVEC2,IROOT,
1312     &                             LUIN,LUIN2,LUOUT)
1313C
1314C     Written by  S. Knecht         - March 14 2008
1315C
1316C**********************************************************************
1317C
1318C     calculating scaled vecsum between two vectors on file LUIN resp.
1319C     LUIN2; saving on LUOUT
1320C
1321C     NOTE: IROOT = IROOT
1322C           NVEC  = NVEC
1323C           NVEC2 = NROOT
1324C
1325C     active blocks on the MPI-files are flagged by a nonzero list entry
1326C
1327C     Last revision:     S. Knecht       - March  2008
1328C
1329************************************************************************
1330      IMPLICIT DOUBLE PRECISION ( A-H,O-Z)
1331#include "maxorb.h"
1332#include "infpar.h"
1333#include "mpif.h"
1334      integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE)
1335#include "parluci.h"
1336      DIMENSION VEC1(*), VEC2(*), SUBSPH(*)
1337      DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*)
1338      DIMENSION LUINLIST(*), LUOUTLIST(*), LUIN2LIST(*)
1339      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN
1340      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2
1341      INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT
1342      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN
1343      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN2
1344      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT
1345      INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH
1346      INTEGER NUM_BLK
1347C
1348C     initialize scratch offsets
1349      NUM_BLK           = 0
1350      IOFFSET_SCRATCH   = 0
1351      IOFFSET_IN_LUIN   = 0
1352      IOFFSET_IN_LUIN2  = 0
1353      IOFFSET_LUOUT     = 0
1354      IOFFSET_INT_IN    = 0
1355      IOFFSET_INT_IN2   = 0
1356      IOFFSET_INT_LUOUT = 0
1357C
1358C
1359      DO ISBATCH = 1, NBATCH
1360C
1361C       offset for batch ISBATCH w.r.t JOFF
1362C
1363        CALL DZERO(VEC2,LEBATCH(ISBATCH))
1364C
1365        DO 100 IVEC = 1, NVEC2
1366C
1367          IJ = (( IROOT - 1 ) * NVEC ) + 1 + ( IVEC - 1)
1368C
1369          FACTOR = SUBSPH( IJ )
1370C
1371C         set new offset
1372C
1373C         position in file is at the end of vector IVEC - 1
1374C
1375          IOFFSET_IN_LUIN  = MY_IOFF_LUIN + IOFFSET_SCRATCH +
1376     &                     ( IVEC - 1 )   * MY_VEC1_IOFF
1377          IOFFSET_INT_IN   = 1 + NUM_BLK  +
1378     &                     ( IVEC - 1 )   * MY_ACT_BLK1
1379C
1380CSK       WRITE(LUWRT,*) 'This is my OFFSET for LUIN',
1381CSK  &                    IOFFSET_IN_LUIN
1382C
1383          IF( IVEC .eq. 1 ) THEN
1384C
1385            CALL RDVEC_BATCH_DRV4(LUIN,VEC2,LBATCH(ISBATCH),
1386     &                           IBATCH(1,I1BATCH(ISBATCH)),
1387     &                           IOFFSET_IN_LUIN,IOFFSET_INT_IN,
1388     &                           LUINLIST,NUM_ACTIVE_BATCH)
1389C
1390CSK         WRITE(LUWRT,*) 'initial VEC2 on LUIN'
1391CSK         CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1392CSK  &                    LUWRT)
1393CSK         WRITE(LUWRT,*) 'scaling factor for this vector',FACTOR
1394C
1395            CALL DSCAL(LEBATCH(ISBATCH),FACTOR,VEC2,1)
1396CSK         WRITE(LUWRT,*) ' VEC2 after first scaling '
1397CSK         CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1398CSK  &                    LUWRT)
1399C
1400          ELSE
1401C
1402            CALL DZERO(VEC1,LEBATCH(ISBATCH))
1403C
1404            CALL RDVEC_BATCH_DRV4(LUIN,VEC1,LBATCH(ISBATCH),
1405     &                           IBATCH(1,I1BATCH(ISBATCH)),
1406     &                           IOFFSET_IN_LUIN,IOFFSET_INT_IN,
1407     &                           LUINLIST,NUM_ACTIVE_BATCH)
1408C
1409CSK         WRITE(LUWRT,*) 'initial VEC1 on LUIN'
1410CSK         CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1411CSK  &                 LUWRT)
1412C
1413C           VEC2 == VEC2 + VEC1 * FACTOR
1414C
1415            CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1)
1416C
1417CSK         WRITE(LUWRT,*) 'final VEC2 after DAXPY in P3...1'
1418CSK         CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1419CSK  &                    LUWRT)
1420C
1421          END IF
1422C         ^ IVEC == 1 ?
1423C
1424  100   CONTINUE
1425C
1426        DO 200 IVEC = 1, (NVEC - NVEC2)
1427C
1428          IJ = (( IROOT - 1 ) * NVEC ) + 1 + ( IVEC - 1) + NVEC2
1429C
1430          FACTOR = SUBSPH( IJ )
1431C
1432C         set new offset
1433C
1434C         position in file is at the end of vector IVEC - 1
1435C
1436          IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH +
1437     &                     ( IVEC - 1 )    * MY_VEC1_IOFF
1438          IOFFSET_INT_IN2  = 1 + NUM_BLK   +
1439     &                     ( IVEC - 1 )    * MY_ACT_BLK1
1440C
1441CSK       WRITE(LUWRT,*) 'This is my OFFSET for LUIN',
1442CSK     &                 IOFFSET_IN_LUIN
1443C
1444          CALL DZERO(VEC1,LEBATCH(ISBATCH))
1445C
1446          CALL RDVEC_BATCH_DRV4(LUIN2,VEC1,LBATCH(ISBATCH),
1447     &                          IBATCH(1,I1BATCH(ISBATCH)),
1448     &                          IOFFSET_IN_LUIN2,IOFFSET_INT_IN2,
1449     &                          LUIN2LIST,NUM_ACTIVE_BATCH)
1450C
1451CSK       WRITE(LUWRT,*) 'initial VEC1 on LUIN2'
1452CSK       CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1453CSK  &                  LUWRT)
1454CSK       WRITE(LUWRT,*) 'scaling factor for this vector',FACTOR
1455C
1456C         VEC2 == VEC2 + VEC1 * FACTOR
1457C
1458          CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1)
1459C
1460CSK       WRITE(LUWRT,*) 'final VEC2 after 2nd DAXPY in P3...1'
1461CSK       CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1462CSK  &                  LUWRT)
1463C
1464  200   CONTINUE
1465C
1466CSK     WRITE(LUWRT,*) 'final VEC2 to write on position',IROOT -1
1467CSK     CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),
1468CSK  &                LUWRT)
1469C
1470C       new offset for writing on LUOUT
1471C
1472        IOFFSET_LUOUT      =  MY_IOFF_LUOUT + IOFFSET_SCRATCH +
1473     &                        ( IROOT - 1 ) * MY_VEC1_IOFF
1474C
1475        IOFFSET_INT_LUOUT  = 1 + NUM_BLK    +
1476     &                        ( IROOT - 1 ) * MY_ACT_BLK1
1477C
1478        CALL WTVEC_BATCH_DRV4(LUOUT,VEC2,LBATCH(ISBATCH),
1479     &                       IBATCH(1,I1BATCH(ISBATCH)),
1480     &                       IOFFSET_LUOUT,IOFFSET_INT_LUOUT,
1481     &                       LUOUTLIST,NUM_ACTIVE_BATCH)
1482C
1483C       keep track of correct offset
1484        IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH)
1485        NUM_BLK         = NUM_BLK + NUM_ACTIVE_BATCH
1486C
1487      END DO
1488C
1489      END
1490***********************************************************************
1491*                                                                     *
1492* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
1493* parallelization by Stefan Knecht                                    *
1494*                                                                     *
1495***********************************************************************
1496      SUBROUTINE UPDATE_LUC_LIST2(ISCLFAC_GROUP,LUCLIST,RCCTOS,CB,
1497     &                            NPARBLOCK,IBLOCKL,IGROUPLIST,
1498     &                            IPROCLIST,IRILP,BLOCKTIME)
1499C
1500C     make an update of of grouplist for c-vector file based on
1501C     different list gathered from MPI_COMM_WORLD
1502C
1503C
1504C     Written by  S. Knecht         - March 08 2008
1505C
1506C     OUTPUT: ISCLFAC_GROUP and updated file ILUC
1507C
1508C**********************************************************************
1509#include "implicit.h"
1510      INTEGER*8 KSCALLOC2, KSCALLOC3
1511!               for addressing of WORK
1512#include "maxorb.h"
1513#include "infpar.h"
1514#include "mpif.h"
1515      integer(kind=MPI_INTEGER_KIND) :: mynew_comm_mpi
1516      integer(kind=MPI_INTEGER_KIND) :: ierr_mpi
1517      integer(kind=MPI_INTEGER_KIND) :: my_STATUS(MPI_STATUS_SIZE)
1518#include "parluci.h"
1519#include "wrkspc.inc"
1520      DIMENSION ISCLFAC_GROUP(*), LUCLIST(*)
1521      DIMENSION CB(*)
1522      DIMENSION NPARBLOCK(*), IBLOCKL(*)
1523      DIMENSION IGROUPLIST(*), IPROCLIST(*)
1524      CHARACTER*12 WALLTID3, SECTID
1525      INTEGER RCCTOS(*)
1526C     some scratch
1527      INTEGER NZERO
1528C
1529      NZERO = 0
1530C
1531C     set mark for local memory
1532C
1533      IDUM = 0
1534      CALL MEMMAN(KDUM,  IDUM,    'MARK  ',IDUM,'UPLIST')
1535C
1536      CALL MEMMAN(KSCALLOC2,NUM_BLOCKS2,'ADDL  ',1,'ICLLC2')
1537      CALL MEMMAN(KSCALLOC3,NUM_BLOCKS2,'ADDL  ',1,'ICLLC3')
1538C
1539C     fill complete local iscalfac arrays with zero's
1540      CALL IZERO(WORK(KSCALLOC2), NUM_BLOCKS2)
1541      CALL IZERO(WORK(KSCALLOC3), NUM_BLOCKS2)
1542      CALL IZERO(ISCLFAC_GROUP  , NUM_BLOCKS2)
1543C
1544#ifdef LUCI_DEBUG
1545      WRITE(luwrt,*) '  start of subroutine UPDATE_LUC_LIST2 speaking'
1546      WRITE(luwrt,*) 'LUCLIST:'
1547      CALL IWRTMAMN(LUCLIST,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt)
1548      WRITE(luwrt,*) 'ISCLFAC_GROUP:'
1549      CALL IWRTMAMN(ISCLFAC_GROUP,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt)
1550      WRITE(luwrt,*) 'WORK(KSCALLOC2):'
1551      CALL IWRTMAMN(WORK(KSCALLOC2),1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt)
1552      WRITE(luwrt,*) 'WORK(KSCALLOC3):'
1553      CALL IWRTMAMN(WORK(KSCALLOC3),1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt)
1554#endif
1555C
1556      starttimer = MPI_WTIME()
1557C
1558C     "mpi_allsum" local LUCLIST which then on all
1559C     nodes will contain the number of non-zero C-blocks in
1560C     the complete CI-vector
1561C
1562      CALL REDVEC_REL(LUCLIST,WORK(KSCALLOC2),NUM_BLOCKS2,1,
1563     &                MPI_SUM,MPI_COMM_WORLD,-1)
1564C
1565C     find all c-blocks connecting to all sigma-blocks on each cpu
1566C
1567      CALL ICOPY(NUM_BLOCKS2,RCCTOS,1,WORK(KSCALLOC3),1)
1568C
1569C     case 1: number of CPUs in new group not equal to total number
1570C
1571C     case 2: number of CPUs in new group equal to total number
1572C
1573C
1574#ifdef LUCI_DEBUG
1575      write(luwrt,*) 'NEWCOMM_PROC,LUCI_NMPROC',NEWCOMM_PROC,LUCI_NMPROC
1576      write(luwrt,*) 'ILUC',ILUC
1577      write(luwrt,*) 'MYNEW_COMM',ILUC
1578      write(luwrt,*) 'ICOMM',ICOMM
1579      write(luwrt,*) 'ICOMM_id, ICOMM_SIZE',ICOMM_id,ICOMM_SIZE
1580#endif
1581*
1582      IF( NEWCOMM_PROC .ne. LUCI_NMPROC ) THEN
1583*
1584        CALL REDVEC_REL(WORK(KSCALLOC3),ISCLFAC_GROUP,NUM_BLOCKS2,1,
1585     &                  MPI_SUM,MYNEW_COMM,0)
1586*
1587*       all local node-masters call this routine!
1588*
1589        IF( MYNEW_ID .eq. 0 ) THEN
1590           CALL COPVCD_PAR_BDRIV5_REL(ILUC,ILUC,CB,NPARBLOCK,
1591     &                                WORK(KSCALLOC2),ISCLFAC_GROUP,
1592     &                                IBLOCKL,NUM_BLOCKS,ICOMM,
1593     &                                IGROUPLIST,IPROCLIST,IRILP)
1594C               COPVCD_PAR_BDRIV5_REL(LUIN,LUOUT,SEGMNT,IBLOCKD,
1595C     &                               ISCALFAC,ISCALFAC_GROUP,
1596C     &                               IBLOCKL,NBLOCK,JCOMM,
1597C     &                               IGROUPLIST,IPROCLIST,IRILP)
1598C
1599
1600        END IF
1601           mynew_comm_mpi = mynew_comm
1602           CALL MPI_BCAST(ISCLFAC_GROUP,NUM_BLOCKS2,
1603     &                    my_MPI_INTEGER,0,MYNEW_COMM_MPI,ierr_mpi)
1604*
1605      ELSE
1606*
1607C
1608         CALL UPDATE_GEN_LIST(WORK(KSCALLOC3),WORK(KSCALLOC2),
1609     &                        NUM_BLOCKS2)
1610C
1611C        to be consistent with output of case 1
1612C
1613         CALL IZERO(ISCLFAC_GROUP,NUM_BLOCKS2)
1614         CALL ICOPY(NUM_BLOCKS2,WORK(KSCALLOC3),1,ISCLFAC_GROUP,1)
1615C
1616      END IF
1617C     ^ NEWCOMM_PROC == LUCI_NMPROC ?
1618C
1619#ifdef LUCI_DEBUG
1620      WRITE(luwrt,*) '  subroutine UPDATE_LUC_LIST2 speaking'
1621      WRITE(luwrt,*) 'LUCLIST:'
1622      CALL IWRTMAMN(LUCLIST,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt)
1623      WRITE(luwrt,*) 'ISCLFAC_GROUP:'
1624      CALL IWRTMAMN(ISCLFAC_GROUP,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt)
1625#endif
1626C
1627C     final timing for block distribution
1628      blocktime = blocktime + MPI_WTIME() - starttimer
1629C
1630C     flush local memory
1631C
1632      IDUM = 0
1633      CALL MEMMAN(KDUM ,IDUM,'FLUSM ',2,'UPLIST')
1634C
1635      END
1636***********************************************************************
1637*                                                                     *
1638* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
1639* parallelization by Stefan Knecht                                    *
1640*                                                                     *
1641***********************************************************************
1642      SUBROUTINE BLOCK_DISTR_DRV(NBLOCK,IBLOCKL,NBLOCKD,IBLOCKS_FNODE,
1643     &                           SCALFAC,NVAR,IPROCLIST)
1644#include "implicit.h"
1645      INTEGER*8 KICCTOS, KCWEIGHT, KCWEIGHTF, KBLCKWT, KIBTOTW
1646!               for addressing of WORK
1647#include "maxorb.h"
1648#include "infpar.h"
1649#include "mpif.h"
1650#include "parluci.h"
1651#include "wrkspc.inc"
1652! from cands.inc: icsm
1653#include "cands.inc"
1654      INTEGER*8 IABSOLUTE_WEIGHT
1655      IABSOLUTE_WEIGHT = 0
1656C
1657      IDUM  = 0
1658      CALL MEMMAN(KDUM,IDUM,'MARK  ',IDUM,'BLKDRV')
1659C
1660C     allocate local scratch arrays
1661C
1662      CALL MEMMAN(KICCTOS,      NBLOCK**2,'ADDL  ',1,'ICCTOS')
1663      CALL MEMMAN(KCWEIGHT,        NBLOCK,'ADDL  ',1,'ICWHT ')
1664      CALL MEMMAN(KCWEIGHTF,       NBLOCK,'ADDL  ',1,'ICWHTF')
1665      CALL MEMMAN(KBLCKWT,  2*LUCI_NMPROC,'ADDL  ',1,'IBLCKW')
1666      CALL MEMMAN(KIBTOTW,         NBLOCK,'ADDL  ',2,'IBTOTW')
1667
1668      CALL IZERO(WORK(KBLCKWT),2*LUCI_NMPROC)
1669      CALL IZERO(WORK(KCWEIGHT),NBLOCK)
1670      CALL IZERO(WORK(KCWEIGHTF),NBLOCK)
1671      CALL IZERO(WORK(KICCTOS),NBLOCK**2)
1672C
1673      CALL FIND_IMAT_SC(IBLOCKL,SCALFAC,WORK(KICCTOS),WORK(KCWEIGHT),
1674     &                  WORK(KIBTOTW),WORK(KCWEIGHTF),NBLOCK,
1675     &                  IABSOLUTE_WEIGHT)
1676C
1677      IF(IDISTROUTE.EQ.1) THEN
1678        call quit('dist strategy 1 disabled - not all variables are'//
1679     &            ' set properly if PRINT_BATCH_INFO is disabled...')
1680        CALL DISTBLKND_1(NBLOCK,WORK(KCWEIGHTF),NBLOCKD,WORK(KIBTOTW),
1681     &                   WORK(KBLCKWT),NVAR,WORK(KICCTOS),IBLOCKL,
1682     &                   IPROCLIST,WORK(KCWEIGHT),IABSOLUTE_WEIGHT)
1683      ELSE
1684        CALL DISTBLKND_2(NBLOCK,WORK(KCWEIGHTF),NBLOCKD,IBLOCKL,icsm)
1685      END IF
1686C
1687C     find all c-blocks connecting to a given sigma-block,
1688C     information will be stored in CBLOCKS_FNODE
1689C
1690      CALL FIND_CCTOS(IBLOCKS_FNODE,NBLOCKD,WORK(KICCTOS),NBLOCK)
1691C
1692C     eliminate local memory
1693      IDUM = 0
1694      CALL MEMMAN(KDUM ,IDUM,'FLUSM ',IDUM,'BLKDRV')
1695C
1696      END
1697***********************************************************************
1698*                                                                     *
1699* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig                *
1700* parallelization by Stefan Knecht                                    *
1701*                                                                     *
1702***********************************************************************
1703      SUBROUTINE FIND_ACTIVE_BLOCKS_PAR(LUIN,LBLK,BLK_A,SEGMNT,
1704     &                                  NBLOCK,IBLOCKD)
1705*
1706*. Find the active (nonvanishing blocks) on LUIN
1707*. Non vanishing block is flagged by a 1.0 ( note : real)
1708*  in BLK_A
1709*  parallel version
1710*
1711      IMPLICIT REAL*8(A-H,O-Z)
1712*. Output
1713      DIMENSION BLK_A(*)
1714*. Scratch
1715      DIMENSION SEGMNT(*)
1716#include "maxorb.h"
1717#include "infpar.h"
1718#include "mpif.h"
1719      integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE)
1720#include "parluci.h"
1721      DIMENSION IBLOCKD(NBLOCK)
1722*
1723      CALL REWINE(LUIN,LBLK)
1724*
1725      NBLK_A = 0
1726*. Loop over blocks
1727      DO 500 IBLK = 1, NBLOCK
1728*
1729        IF(LUCI_MYPROC.NE.IBLOCKD(IBLK))THEN
1730          BLK_A(IBLK) = 0.0D0
1731          GOTO 300
1732        ELSE
1733          CALL IFRMDS(LBL,1,-1,LUIN)
1734          IF( LBL .GE. 0 ) THEN
1735            IF(LBLK .GE.0 ) THEN
1736              KBLK = LBL
1737            ELSE
1738              KBLK = -1
1739            END IF
1740            NO_ZEROING = 1
1741            CALL FRMDSC2(SEGMNT,LBL,KBLK,LUIN,IMZERO,IAMPACK,
1742     &                   NO_ZEROING)
1743            IF(IMZERO.EQ.0) THEN
1744             NBLK_A = NBLK_A + 1
1745             BLK_A(IBLK) = 1.0D0
1746            ELSE
1747             BLK_A(IBLK) = 0.0D0
1748            END IF
1749          END IF
1750        END IF
1751 300  CONTINUE
1752*
1753 500  CONTINUE
1754*
1755      NTEST = 0
1756      IF(NTEST.GE.1) THEN
1757        WRITE(6,*)'myproc',LUCI_MYPROC
1758        WRITE(6,'(A,I8,I8)')
1759     &  ' FIND_A.... Number of total and active Blocks',NBLOCK,NBLK_A
1760      END IF
1761*
1762      END
1763***********************************************************************
1764      SUBROUTINE FNDMND_PAR(LU,LBLK,SEGMNT,NSUBMX,NSUB,ISCR,
1765     &                     SCR,ISCAT,SUBVAL,IBLOCKL,IBLOCKD,
1766     &                     NBLOCK,NTESTG)
1767*
1768* FIND NSUB LOWEST ELEMENTS OF VECTOR RESIDING ON FILE
1769* LU. ENSURE THAT NO DEGENERENCIES ARE SPLIT
1770*
1771*
1772* INPUT
1773*=======
1774* LU :    FILE WHERE VECTOR OF INTEREST IS RESIDING, REWOUND
1775* LBLK :  DEFINES FILE STRUCTURE ON FILE LU
1776* NSUBMX: LARGEST ALLOWED NUMBER OF SORTED ELEMENTS
1777*
1778* OUTPUT
1779*=======
1780* NSUB : ACTUAL NUMBER OF ELEMENTS OBTAINED. CAN BE SMALLER
1781*        THAN NSUBMX IF THE LAST ELEMENT BELONGS TO A DEGENERATE
1782*        SET
1783*ISCAT:  SCATTERING ARRAY, ISCAT(I) GIVES FULL ADRESS OF SORTED
1784*        ELEMENT I
1785*SUBVAL: VALUE OF SORTED ELEMENTS
1786
1787      IMPLICIT REAL*8           ( A-H,O-Z)
1788      INTEGER*8 KGATHERA, KGATHERB, KGATHERC, KGATHERD
1789!               for addressing of WORK
1790#include "maxorb.h"
1791#include "infpar.h"
1792#include "mpif.h"
1793      integer(kind=MPI_INTEGER_KIND) :: my_MPI_REAL8 = MPI_REAL8
1794      integer(kind=MPI_INTEGER_KIND) :: my_STATUS(MPI_STATUS_SIZE)
1795#include "parluci.h"
1796#include "wrkspc.inc"
1797
1798      DIMENSION SEGMNT(*), ISCAT(*),SUBVAL(*),SCR(*),ISCR(*)
1799      DIMENSION IBLOCKL(NBLOCK), IBLOCKD(NBLOCK)
1800      INTEGER(KIND=MPI_OFFSET_KIND) IOFF_SCR
1801C
1802      NTESTL = 0000
1803      NTEST = MAX(NTESTG,NTESTL)
1804      NTEST = 000
1805C     offset initialization
1806      IOFF_SCR = 0
1807      IOFF_SCR = IOFF_SCR + MY_DIA_OFF
1808C
1809      IBASE = 1
1810      LSUB = 0
1811*     loop over blocks
1812      DO 1000 II = 1, NBLOCK
1813*
1814        IF( IBLOCKD(II) .eq. LUCI_MYPROC )THEN
1815          LBL = IBLOCKL(II)
1816        ELSE
1817*         useful to set all other blocks to 0?
1818          LBL = 0
1819        ENDIF
1820*
1821        IF(NTEST.GE.10) THEN
1822          WRITE(LUWRT,*) ' Info about block ',II
1823          WRITE(LUWRT,*) ' Number of elements ',LBL
1824        END IF
1825        IF(LBL .GE. 0 ) THEN
1826          IF(LBLK .GE.0 ) THEN
1827            KBLK = LBL
1828          ELSE
1829            KBLK = -1
1830          END IF
1831          IF( IBLOCKD(II) .eq. LUCI_MYPROC )THEN
1832             CALL MPI_FILE_READ_AT(IDIA,IOFF_SCR,SEGMNT,LBL,
1833     &                             my_MPI_REAL8,my_STATUS,ierr_mpi)
1834          ENDIF
1835          IF(NTEST.GE.100) THEN
1836            WRITE(LUWRT,*) ' Elements read in '
1837            CALL WRTMATMN(SEGMNT,1,LBL,1,LBL,LUWRT)
1838          END IF
1839          IF(LBL .GE. 0 ) THEN
1840*. LOWEST ELEMENTS IN SEGMNT  ( ADD TO PREVIOUS LIST )
1841            MSUBMX = MIN(NSUBMX,LBL)
1842            IF(LBL.GE.1) THEN
1843              CALL SORLOW(SEGMNT,SCR(1+LSUB),ISCR(1+LSUB),LBL,
1844     &                    MSUBMX,MSUB,NTEST)
1845            ELSE
1846              MSUB = 0
1847            END IF
1848            DO 10 I = 1, MSUB
1849   10         ISCR(LSUB+I) = ISCR(LSUB+I) + IBASE - 1
1850* SORT COMBINED LIST
1851            MSUBMX = MIN(NSUBMX,LSUB+MSUB)
1852            IF(MSUBMX.GT.0) THEN
1853              CALL SORLOW(SCR,SUBVAL,ISCAT,LSUB+MSUB,MSUBMX,LLSUB,
1854     &                    NTEST)
1855            ELSE
1856              LLSUB = 0
1857            END IF
1858            LSUB = LLSUB
1859            DO 20 I = 1, LSUB
1860              ISCR(I+2*NSUBMX) = ISCR(ISCAT(I))
1861   20       CONTINUE
1862*
1863            CALL ICOPVE(ISCR(1+2*NSUBMX),ISCR(1),LSUB)
1864            CALL DCOPY(LSUB,SUBVAL,1,SCR,1)
1865
1866            IF(NTEST .GE. 20 ) THEN
1867              WRITE(LUWRT,*)' Lowest elements and their original place'
1868              WRITE(LUWRT,*)' Number of elements obtained ', LSUB
1869              CALL WRTMATMN(SUBVAL,1,LSUB,1,LSUB,LUWRT)
1870              CALL IWRTMAMN(ISCR,1,LSUB,1,LSUB,LUWRT)
1871            END IF
1872          END IF
1873*
1874        END IF
1875        IOFF_SCR = IOFF_SCR + LBL
1876C       set to lbl to true value
1877        LBL      = IBLOCKL(II)
1878        IBASE    = IBASE + LBL
1879C
1880 1000 CONTINUE
1881*
1882      NTEST = 00
1883      NSUB = LSUB
1884      CALL ICOPVE(ISCR,ISCAT,NSUB)
1885      IF(NTEST .GE. 20) THEN
1886        WRITE(LUWRT,*) ' Lowest elements and their original place '
1887        WRITE(LUWRT,*) ' Number of elements obtained ', NSUB
1888        CALL WRTMATMN(SUBVAL,1,NSUB,1,NSUB,LUWRT)
1889        CALL IWRTMAMN(ISCAT,1,NSUB,1,NSUB,LUWRT)
1890      END IF
1891*
1892      IDUM = 0
1893      CALL MEMMAN(KDUM,IDUM,'MARK  ',IDUM,'GATHER')
1894*
1895      CALL MEMMAN(KGATHERA,LUCI_NMPROC*NSUBMX,'ADDL  ',2,'PARRA1')
1896      CALL MEMMAN(KGATHERB,LUCI_NMPROC*NSUBMX,'ADDL  ',2,'PARRA2')
1897      CALL MEMMAN(KGATHERC,LUCI_NMPROC*NSUBMX,'ADDL  ',1,'PARIA1')
1898      CALL MEMMAN(KGATHERD,LUCI_NMPROC*NSUBMX,'ADDL  ',1,'PARIA2')
1899*. We gather all lowest values from each node
1900*. and build up a combined list of those
1901      CALL GATHER_LOW_PAR(NSUB,NSUBMX,SUBVAL,ISCAT,
1902     &                    WORK(KGATHERA),WORK(KGATHERB),
1903     &                    WORK(KGATHERC),WORK(KGATHERD),
1904     &                    NTESTG)
1905*     update SCR1 and ISCR1
1906      CALL DCOPY(NSUBMX,SUBVAL,1,SCR,1)
1907      CALL ICOPVE(ISCAT,ISCR,NSUBMX)
1908
1909      IF(NTEST.GE.20)THEN
1910        WRITE(LUWRT,*)'after search: SUBVAL and ISCAT'
1911        CALL WRTMATMN(SUBVAL,1,NSUBMX,1,NSUBMX,LUWRT)
1912        CALL IWRTMAMN(ISCAT,1,NSUBMX,1,NSUBMX,LUWRT)
1913      END IF
1914CSK      NTEST = 0
1915
1916      IF(NSUB.NE.NSUBMX.AND.NTEST.GE.20)THEN
1917        WRITE(LUWRT,*)'Warning! NSUB is lower than NSUBMX'
1918        WRITE(LUWRT,*)'NSUB is set to be equal to NSUBMX'
1919        NSUB = NSUBMX
1920      END IF
1921*
1922*. Eliminate local memory
1923      IDUM = 0
1924      CALL MEMMAN(KDUM ,IDUM,'FLUSM ',IDUM,'GATHER')
1925*
1926      END
1927*****************************************************
1928#else   /* VAR_MPI */
1929* dummy routine for non-mpi compilation
1930       SUBROUTINE par_lucita
1931!        write(6,*) 'let us do nothing',nothing
1932       END
1933#endif    /* VAR_MPI */
1934