1! 2! Dalton, a molecular electronic structure program 3! Copyright (C) by the authors of Dalton. 4! 5! This program is free software; you can redistribute it and/or 6! modify it under the terms of the GNU Lesser General Public 7! License version 2.1 as published by the Free Software Foundation. 8! 9! This program is distributed in the hope that it will be useful, 10! but WITHOUT ANY WARRANTY; without even the implied warranty of 11! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12! Lesser General Public License for more details. 13! 14! If a copy of the GNU LGPL v2.1 was not distributed with this 15! code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html. 16! 17! 18 19#ifdef VAR_MPI 20*********************************************************************** 21* * 22* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 23* parallelization by Stefan Knecht * 24* * 25*********************************************************************** 26 SUBROUTINE P1_B_PAR_RL_LUCI1(VEC1,VEC2,EIGAPR,RNRM,EIGSHF, 27 & EIG,TEST,E_CONV,RTCNV,CONVER,ITER, 28 & MAXIT, 29 & IROOT,LUIN1LIST,LUIN2LIST,LUOUTLIST, 30 & NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH, 31 & MY_IOFF_LUIN1,MY_IOFF_LUIN2, 32 & MY_IOFF_LUOUT, 33 & SCRRED,LUIN1,LUIN2,LUOUT) 34C 35C Written by S. Knecht - March 13 2008 36C 37C********************************************************************** 38C 39C calculating dot product between two vectors on file LUIN1 resp. 40C LUIN2 41C 42C NOTE: IROOT = IROOT 43C 44C active blocks on the MPI-files are flagged by a nonzero list entry 45C 46C Last revision: S. Knecht - March 2008 47C 48************************************************************************ 49 IMPLICIT DOUBLE PRECISION ( A-H,O-Z) 50#include "maxorb.h" 51#include "infpar.h" 52#include "mpif.h" 53 integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE) 54#include "parluci.h" 55 DIMENSION VEC1(*), VEC2(*) 56 DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*) 57 DIMENSION LUIN1LIST(*), LUIN2LIST(*), LUOUTLIST(*) 58 DIMENSION RNRM(MAXIT,*), EIG(MAXIT,*) 59 DIMENSION SCRRED(*) 60 LOGICAL CONVER 61 integer RTCNV(*) 62 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN1 63 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2 64 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT 65 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN1, IOFFSET_IN_LUIN2 66 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT 67 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH 68 INTEGER NUM_BLK 69C 70C initialize scratch offsets 71 NUM_BLK = 0 72 IOFFSET_SCRATCH = 0 73 IOFFSET_IN_LUIN1 = 0 74 IOFFSET_IN_LUIN2 = 0 75 IOFFSET_LUOUT = 0 76 IOFFSET_INT_IN1 = 0 77 IOFFSET_INT_IN2 = 0 78 IOFFSET_INT_LUOUT = 0 79C 80 RNORM = 0.0D0 81 REDSCRVAR = 0.0D0 82 CALL DZERO(SCRRED,IROOT) 83C 84 DO ISBATCH = 1, NBATCH 85C 86C offset for batch ISBATCH w.r.t JOFF 87C 88 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 89C 90 FACTOR = - EIGAPR 91C 92C set new offset wrt IROOT 93C 94 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH + 95 & ( IROOT - 1 ) * MY_VEC1_IOFF 96 IOFFSET_INT_IN2 = 1 + NUM_BLK + 97 & ( IROOT - 1 ) * MY_ACT_BLK1 98C 99CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN2', 100CSK & IOFFSET_IN_LUIN2 101C 102 CALL RDVEC_BATCH_DRV4(LUIN2,VEC1,LBATCH(ISBATCH), 103 & IBATCH(1,I1BATCH(ISBATCH)), 104 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 105 & LUIN2LIST,NUM_ACTIVE_BATCH) 106C 107CSK WRITE(LUWRT,*) 'initial VEC1 on LUIN2' 108CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 109CSK & LUWRT) 110C 111C set offset for LUIN1 and zero read-in vector 112C 113 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 114C 115 IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH + 116 & ( IROOT - 1 ) * MY_VEC1_IOFF 117 IOFFSET_INT_IN1 = 1 + NUM_BLK + 118 & ( IROOT - 1 ) * MY_ACT_BLK1 119C 120CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN1', 121CSK & IOFFSET_IN_LUIN1 122C 123C read in batch ISBATCH from LUIN1 to VEC2 124C 125 CALL RDVEC_BATCH_DRV4(LUIN1,VEC2,LBATCH(ISBATCH), 126 & IBATCH(1,I1BATCH(ISBATCH)), 127 & IOFFSET_IN_LUIN1,IOFFSET_INT_IN1, 128 & LUIN1LIST,NUM_ACTIVE_BATCH) 129C 130CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN1' 131CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 132CSK & LUWRT) 133C 134C VEC2 == VEC2 + VEC1 * FACTOR 135C 136 CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1) 137C 138C calculate partial RNORM 139C 140 REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC2,1,VEC2,1) 141C 142C write VEC2 to LUOUT 143C 144 IOFFSET_LUOUT = MY_IOFF_LUOUT + IOFFSET_SCRATCH 145 IOFFSET_INT_LUOUT = 1 + NUM_BLK 146CSK WRITE(LUWRT,*) 'This is my OFFSET for LUOUT', 147CSK & IOFFSET_LUOUT 148CSK WRITE(LUWRT,*) 'final VEC2 to write on LUOUT' 149C 150 CALL WTVEC_BATCH_DRV4(LUOUT,VEC2,LBATCH(ISBATCH), 151 & IBATCH(1,I1BATCH(ISBATCH)), 152 & IOFFSET_LUOUT,IOFFSET_INT_LUOUT, 153 & LUOUTLIST,NUM_ACTIVE_BATCH) 154C 155C keep track of correct offset 156 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 157 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 158C 159 END DO 160C ^ loop over batches 161C 162C communicate REDSCRVAR to get full RNORM 163C 164 CALL REDVEC_REL(REDSCRVAR,SCRRED,1,2,MPI_SUM,MPI_COMM_WORLD,-1) 165 CALL DCOPY(1,SCRRED,1,REDSCRVAR,1) 166C 167 RNORM = SQRT(REDSCRVAR) 168C 169 RNRM(ITER-1,IROOT) = RNORM 170C 171C print norm and eigenvalue 172C 173 WRITE(LUWRT,'(A19,I8,1p,1E15.5,0p,3X,1F19.10)') 174 & ' Iter RNORM EIGAPR ', ITER-1,RNORM,EIGAPR+EIGSHF 175 CALL FLSHFO(LUWRT) 176C 177C 178C screening of new trial vector 179C 180 RNORM_FAC = RNORM 181C 182 IF (TRUNC_FAC .GT. 0.1D0) THEN 183 WRITE (LUWRT,*) 'TRUNC_FAC reset from ',TRUNC_FAC,' to',0.1D0 184 TRUNC_FAC = 0.1D0 185 END IF 186C 187C check for convergence 188C 189 IF(RNORM .lt. TEST)then ! .OR. ( ITER .gt. 2 .and. 190! & ABS(EIG(ITER-2,IROOT)-EIG(ITER-1,IROOT)).LT.E_CONV)) THEN 191C 192 RTCNV(IROOT) = 0 193 ELSE 194C 195 RTCNV(IROOT) = iroot 196 CONVER = .FALSE. 197 END IF 198C 199 END 200*********************************************************************** 201* * 202* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 203* parallelization by Stefan Knecht * 204* * 205*********************************************************************** 206 SUBROUTINE P1_B_PAR_RL_LUCI2(VEC1,VEC2,SHIFT,IROOT, 207 & LUINLIST,LUOUT1LIST,LUOUT2LIST, 208 & LUOUT3LIST, 209 & NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH, 210 & MY_IOFF_LUIN,MY_IOFF_LUOUT1, 211 & MY_IOFF_LUOUT2,MY_IOFF_LUOUT3, 212 & MY_IOFF_LUDIA, 213 & LUIN,LUOUT1,LUOUT2,LUOUT3,LUDIA,INV) 214C 215C Written by S. Knecht - March 13 2008 216C 217C********************************************************************** 218C 219C part 1.2 of DAVIDSON-OLSEN algorithm in MPI-file I/O mode 220C 221C NOTE: IROOT = IROOT 222C 223C active blocks on the MPI-files are flagged by a nonzero list entry 224C 225C Last revision: S. Knecht - June 2007 226C 227************************************************************************ 228 IMPLICIT DOUBLE PRECISION ( A-H,O-Z) 229#include "maxorb.h" 230#include "infpar.h" 231#include "mpif.h" 232 integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE) 233#include "parluci.h" 234 DIMENSION VEC1(*), VEC2(*) 235 DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*) 236 DIMENSION LUINLIST(*), LUOUT1LIST(*), LUOUT2LIST(*) 237 DIMENSION LUOUT3LIST(*) 238 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN 239 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT1 240 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT2 241 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT3 242 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUDIA 243 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN, IOFFSET_LUOUT1 244 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT2 245 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT3 246 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUDIA 247 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH 248 INTEGER NUM_BLK 249C 250C initialize scratch offsets 251 NUM_BLK = 0 252 IOFFSET_SCRATCH = 0 253 IOFFSET_IN_LUIN = 0 254 IOFFSET_LUOUT1 = 0 255 IOFFSET_LUOUT2 = 0 256 IOFFSET_LUOUT3 = 0 257 IOFFSET_IN_LUDIA = 0 258 IOFFSET_INT_IN = 0 259 IOFFSET_INT_LUOUT1 = 0 260 IOFFSET_INT_LUOUT2 = 0 261 IOFFSET_INT_LUOUT3 = 0 262C 263 REDSCRVAR = 0.0D0 264C 265 DO ISBATCH = 1, NBATCH 266C 267C offset for batch ISBATCH w.r.t JOFF 268C 269 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 270C 271C set new offset 272C 273C position in file is at the end of vector IROOT - 1 274C 275 IOFFSET_IN_LUIN = MY_IOFF_LUIN + IOFFSET_SCRATCH + 276 & ( IROOT - 1 ) * MY_VEC1_IOFF 277 IOFFSET_INT_IN = 1 + NUM_BLK + 278 & ( IROOT - 1 ) * MY_ACT_BLK1 279C 280CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN', 281CSK & IOFFSET_IN_LUIN 282C 283 CALL RDVEC_BATCH_DRV4(LUIN,VEC2,LBATCH(ISBATCH), 284 & IBATCH(1,I1BATCH(ISBATCH)), 285 & IOFFSET_IN_LUIN,IOFFSET_INT_IN, 286 & LUINLIST,NUM_ACTIVE_BATCH) 287C 288CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN' 289CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 290CSK & LUWRT) 291C 292C calculate inverse diagonal on VEC1 293C 294 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 295C 296C new offset for file containing diagonal 297C 298 IOFFSET_IN_LUDIA = MY_IOFF_LUDIA + IOFFSET_SCRATCH 299C 300CSK WRITE(LUWRT,*) 'This is my OFFSET for LUDIA', 301CSK & IOFFSET_IN_LUDIA 302C 303C read in batch ISBATCH from LUDIA to VEC1 304C 305 CALL RDVEC_BATCH_DRV5(LUDIA,VEC1,LBATCH(ISBATCH), 306 & IBATCH(1,I1BATCH(ISBATCH)), 307 & IOFFSET_IN_LUDIA) 308C 309CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 310CSK & LUWRT) 311C 312 IF( LEBATCH(ISBATCH) .gt. 0 )THEN 313 IF( CSCREEN) THEN 314C set proper truncation factor 315 THR_TRUNC = TRUNC_FAC * RNORM_FAC 316Csk WRITE(LUWRT,*) 'TRUNCATION FACTOR:',THR_TRUNC 317Chj 14-jun-07: disable THR_ETRUNC 318Chj THR_ETRUNC = 1.0D-6 * THRES_E 319 THR_ETRUNC = -1.0D0 320 CALL DIAVC2_TRUNC(VEC1,VEC2,VEC1,SHIFT,LEBATCH(ISBATCH), 321 & THR_TRUNC,THR_ETRUNC) 322 ELSE 323 CALL DIAVC2(VEC1,VEC2,VEC1,SHIFT,LEBATCH(ISBATCH)) 324 END IF 325 END IF 326C 327C write VEC1 to LUOUT1 and VEC2 to LUOUT2 328C 329 IOFFSET_LUOUT1 = MY_IOFF_LUOUT1 + IOFFSET_SCRATCH 330 IOFFSET_LUOUT2 = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH 331 IOFFSET_INT_LUOUT1 = 1 + NUM_BLK 332 IOFFSET_INT_LUOUT2 = 1 + NUM_BLK 333C 334C VEC1 335 CALL WTVEC_BATCH_DRV4(LUOUT1,VEC1,LBATCH(ISBATCH), 336 & IBATCH(1,I1BATCH(ISBATCH)), 337 & IOFFSET_LUOUT1,IOFFSET_INT_LUOUT1, 338 & LUOUT1LIST,NUM_ACTIVE_BATCH) 339C VEC2 340 CALL WTVEC_BATCH_DRV4(LUOUT2,VEC2,LBATCH(ISBATCH), 341 & IBATCH(1,I1BATCH(ISBATCH)), 342 & IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2, 343 & LUOUT2LIST,NUM_ACTIVE_BATCH) 344C 345C 346CSK WRITE(LUWRT,*) 'THIS IS my partial REDSCRVAR',REDSCRVAR 347CSK WRITE(LUWRT,*) 'THIS IS LEBATCH(ISBATCH)',LEBATCH(ISBATCH) 348C calculate partial GAMMA 349 IF( LEBATCH(ISBATCH) .gt. 0 ) THEN 350 REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC2,1,VEC1,1) 351 END IF 352C 353C keep track of correct offset 354 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 355 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 356C 357 END DO 358C 359C communicate REDSCRVAR to get full GAMMA 360C 361 CALL REDVEC_REL(REDSCRVAR,GAMMA,1,2,MPI_SUM,MPI_COMM_WORLD,-1) 362C 363CSK WRITE(LUWRT,*) 'THIS IS GAMMA',GAMMA 364C 365C continue with VNORM ... 366C 367C reset scratch offsets 368 NUM_BLK = 0 369 IOFFSET_SCRATCH = 0 370 REDSCRVAR = 0.0D0 371CSK WRITE(LUWRT,*) 'THIS IS REDSCRVAR',REDSCRVAR 372C 373 DO ISBATCH = 1, NBATCH 374C 375 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 376 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 377C 378C read VEC1 from LUOUT2 and VEC2 from LUOUT1 379C 380 IOFFSET_LUOUT1 = MY_IOFF_LUOUT1 + IOFFSET_SCRATCH 381 IOFFSET_LUOUT2 = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH 382 IOFFSET_INT_LUOUT1 = 1 + NUM_BLK 383 IOFFSET_INT_LUOUT2 = 1 + NUM_BLK 384C 385C VEC1 386 CALL RDVEC_BATCH_DRV4(LUOUT2,VEC1,LBATCH(ISBATCH), 387 & IBATCH(1,I1BATCH(ISBATCH)), 388 & IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2, 389 & LUOUT2LIST,NUM_ACTIVE_BATCH) 390C 391CSK WRITE(LUWRT,*) 'initial VEC1 on LUOUT2 in P1..._2 again' 392CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 393CSK & LUWRT) 394C VEC2 395 CALL RDVEC_BATCH_DRV4(LUOUT1,VEC2,LBATCH(ISBATCH), 396 & IBATCH(1,I1BATCH(ISBATCH)), 397 & IOFFSET_LUOUT1,IOFFSET_INT_LUOUT1, 398 & LUOUT1LIST,NUM_ACTIVE_BATCH) 399C 400CSK WRITE(LUWRT,*) ' VEC2 before DAXPY call' 401CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 402CSK & LUWRT) 403CSK WRITE(LUWRT,*) ' VEC1 before DAXPY call' 404CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 405CSK & LUWRT) 406C 407C VEC2 == VEC2 + VEC1 * FACTOR 408C 409 CALL DAXPY(LEBATCH(ISBATCH),-GAMMA,VEC1,1,VEC2,1) 410C 411CSK WRITE(LUWRT,*) ' VEC2 after DAXPY call' 412CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 413CSK & LUWRT) 414CSK WRITE(LUWRT,*) ' VEC1 after DAXPY call' 415CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 416CSK & LUWRT) 417C 418C calculate partial VNORM_Q 419C 420 REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC2,1,VEC2,1) 421C 422C keep track of correct offset 423 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 424 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 425C 426 END DO 427C 428 VNORM_Q = 0.0D0 429 VNORM = 0.0D0 430C 431C communicate REDSCRVAR to get full VNORM_Q 432C 433 CALL REDVEC_REL(REDSCRVAR,VNORM_Q,1,2,MPI_SUM,MPI_COMM_WORLD,-1) 434C 435C is X an eigen vector for (H0 - 1 ) - 1 ??? 436C 437 VNORM = SQRT(VNORM_Q) 438C 439csk WRITE(LUWRT,*) 'GAMMA ',GAMMA 440csk WRITE(LUWRT,*) 'VNORM ',VNORM 441C 442 IF( VNORM .GT. 1.0D-7 ) THEN 443 IOLSAC = 1 444 ELSE 445 IOLSAC = 0 446 END IF 447 IF(IOLSAC .EQ. 1 ) THEN 448C 449CSK WRITE(LUWRT,*) ' Olsen correction active' 450 DELTA = 0.0D0 451C 452C continue with DELTA ... 453C 454C reset scratch offsets 455 NUM_BLK = 0 456 IOFFSET_SCRATCH = 0 457 REDSCRVAR = 0.0D0 458C 459 DO ISBATCH = 1, NBATCH 460C 461 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 462 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 463C 464C read VEC1 from LUOUT2 and VEC2 from LUOUT3 465C 466 IOFFSET_LUOUT3 = MY_IOFF_LUOUT3 + IOFFSET_SCRATCH 467 IOFFSET_LUOUT2 = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH 468 IOFFSET_INT_LUOUT3 = 1 + NUM_BLK 469 IOFFSET_INT_LUOUT2 = 1 + NUM_BLK 470C 471C VEC1 472 CALL RDVEC_BATCH_DRV4(LUOUT2,VEC1,LBATCH(ISBATCH), 473 & IBATCH(1,I1BATCH(ISBATCH)), 474 & IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2, 475 & LUOUT2LIST,NUM_ACTIVE_BATCH) 476C 477CSK WRITE(LUWRT,*) 'initial VEC1 on LUOUT2' 478CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 479CSK & LUWRT) 480C VEC2 481 CALL RDVEC_BATCH_DRV4(LUOUT3,VEC2,LBATCH(ISBATCH), 482 & IBATCH(1,I1BATCH(ISBATCH)), 483 & IOFFSET_LUOUT3,IOFFSET_INT_LUOUT3, 484 & LUOUT3LIST,NUM_ACTIVE_BATCH) 485C 486CSK WRITE(LUWRT,*) 'initial VEC2 on LUOUT3' 487CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 488CSK & LUWRT) 489C 490C calculate partial DELTA 491C 492 REDSCRVAR = REDSCRVAR + DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1) 493C 494C keep track of correct offset 495 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 496 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 497C 498 END DO 499C 500C 501C communicate REDSCRVAR to get full DELTA 502C 503 CALL REDVEC_REL(REDSCRVAR,DELTA,1,2,MPI_SUM,MPI_COMM_WORLD,-1) 504C 505C 506CSK WRITE(LUWRT,*) ' THIS IS DELTA' 507C 508 FACTOR = - DELTA / GAMMA 509csk WRITE(LUWRT,*) 'FACTOR, DELTA, GAMMA',FACTOR, DELTA, GAMMA 510C 511C reset scratch offsets 512 NUM_BLK = 0 513 IOFFSET_SCRATCH = 0 514C 515 DO ISBATCH = 1, NBATCH 516C 517 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 518 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 519C 520C read VEC1 from LUOUT1 and VEC2 from LUOUT3 521C 522 IOFFSET_LUOUT3 = MY_IOFF_LUOUT3 + IOFFSET_SCRATCH 523 IOFFSET_LUOUT1 = MY_IOFF_LUOUT1 + IOFFSET_SCRATCH 524 IOFFSET_INT_LUOUT3 = 1 + NUM_BLK 525 IOFFSET_INT_LUOUT1 = 1 + NUM_BLK 526C 527C VEC1 528 CALL RDVEC_BATCH_DRV4(LUOUT1,VEC1,LBATCH(ISBATCH), 529 & IBATCH(1,I1BATCH(ISBATCH)), 530 & IOFFSET_LUOUT1,IOFFSET_INT_LUOUT1, 531 & LUOUT1LIST,NUM_ACTIVE_BATCH) 532C 533CSK WRITE(LUWRT,*) 'initial VEC1 on LUOUT1' 534CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 535CSK & LUWRT) 536C VEC2 537 CALL RDVEC_BATCH_DRV4(LUOUT3,VEC2,LBATCH(ISBATCH), 538 & IBATCH(1,I1BATCH(ISBATCH)), 539 & IOFFSET_LUOUT3,IOFFSET_INT_LUOUT3, 540 & LUOUT3LIST,NUM_ACTIVE_BATCH) 541C 542CSK WRITE(LUWRT,*) 'initial VEC2 on LUOUT3' 543CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 544CSK & LUWRT) 545C 546C VEC2 == VEC2 + VEC1 * FACTOR 547C 548 CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1) 549C 550C write VEC2 on LUOUT3 551C 552 IOFFSET_LUOUT3 = MY_IOFF_LUOUT3 + IOFFSET_SCRATCH 553 IOFFSET_INT_LUOUT3 = 1 + NUM_BLK 554C 555CSK WRITE(LUWRT,*) 'final VEC2 to write on LUOUT3' 556CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 557CSK & LUWRT) 558C 559 CALL WTVEC_BATCH_DRV4(LUOUT3,VEC2,LBATCH(ISBATCH), 560 & IBATCH(1,I1BATCH(ISBATCH)), 561 & IOFFSET_LUOUT3,IOFFSET_INT_LUOUT3, 562 & LUOUT3LIST,NUM_ACTIVE_BATCH) 563C 564C keep track of correct offset 565 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 566 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 567C 568 END DO 569C 570 END IF 571C ^ IOLSAC ? 572C 573 END 574*********************************************************************** 575* * 576* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 577* parallelization by Stefan Knecht * 578* * 579*********************************************************************** 580 SUBROUTINE P1_B_PAR_RL_LUCI3(VEC1,VEC2,SUBSPH, 581 & LUIN1LIST,LUIN2LIST,LUOUTLIST, 582 & LUOUT2LIST, 583 & NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH, 584 & MY_IOFF_LUIN1,MY_IOFF_LUIN2, 585 & MY_IOFF_LUOUT,MY_IOFF_LUOUT2, 586 & SCRRED,NVEC,IADD, 587 & LUIN1,LUIN2,LUOUT,LUOUT2) 588C 589C Written by S. Knecht - March 13 2008 590C 591C********************************************************************** 592C 593C calculating dot product between two vectors on file LUIN1 resp. 594C LUIN2 595C 596C NOTE: NVEC = NVEC 597C IADD = IADD 598C 599C active blocks on the MPI-files are flagged by a nonzero list entry 600C 601C Last revision: S. Knecht - June 2007 602C 603************************************************************************ 604 IMPLICIT DOUBLE PRECISION ( A-H,O-Z) 605#include "maxorb.h" 606#include "infpar.h" 607#include "mpif.h" 608 integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE) 609#include "parluci.h" 610 DIMENSION VEC1(*), VEC2(*), SUBSPH(*) 611 DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*) 612 DIMENSION LUIN1LIST(*), LUIN2LIST(*), LUOUTLIST(*) 613 DIMENSION SCRRED(*), LUOUT2LIST(*) 614 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN1 615 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2 616 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT 617 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT2 618 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN1, IOFFSET_IN_LUIN2 619 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT, IOFFSET_LUOUT2 620 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH 621 INTEGER NUM_BLK, IROUND, I_ZERO, I_REMOVED 622 real(8), parameter :: THR_CVEC = 1.0d-03 623! real(8), parameter :: THR_CVEC = -1.0d-03 624 LOGICAL STORE_F 625 626! print *, 'THR_CVEC ==>', THR_CVEC 627 628 IROUND = 0 629 10 CONTINUE 630 IROUND = IROUND + 1 631 I_ZERO = 0 632 I_REMOVED = 0 633C 634C initialize scratch offsets 635 NUM_BLK = 0 636 IOFFSET_SCRATCH = 0 637 IOFFSET_IN_LUIN1 = 0 638 IOFFSET_IN_LUIN2 = 0 639 IOFFSET_LUOUT = 0 640 IOFFSET_LUOUT2 = 0 641 IOFFSET_INT_IN1 = 0 642 IOFFSET_INT_IN2 = 0 643 IOFFSET_INT_LUOUT = 0 644 IOFFSET_INT_LUOUT2= 0 645 STORE_F = .FALSE. 646 IF( NVEC + IADD - 1 - NROOT_INFO .gt. 0 ) STORE_F = .TRUE. 647C 648 REDSCRVAR = 0.0D0 649 CALL DZERO(SCRRED,NVEC+IADD) 650 CALL DZERO(SUBSPH,NVEC+IADD) 651CSK WRITE(LUWRT,*) ' NVEC + IADD - 1', NVEC + IADD - 1 652CSK WRITE(LUWRT,*) ' LUIN1,LUIN2,LUOUT,LUOUT2', 653CSK & LUIN1,LUIN2,LUOUT,LUOUT2 654C 655 DO ISBATCH = 1, NBATCH 656C 657C offset for batch ISBATCH w.r.t JOFF 658C 659 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 660C 661C set new offset 662C 663C position in file is at the end of vector IVEC - 1 664C 665 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH 666 IOFFSET_INT_IN2 = 1 + NUM_BLK 667C 668C 669CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN2 in P1..._3 100', 670CSK & IOFFSET_IN_LUIN2 671C 672 CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH), 673 & IBATCH(1,I1BATCH(ISBATCH)), 674 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 675 & LUIN2LIST,NUM_ACTIVE_BATCH) 676C 677CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN2 in P1..._3 100' 678CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 679CSK & LUWRT) 680 681C 682 DO 100 IVEC = 1, NROOT_INFO 683C 684 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 685C 686C set new offset 687C 688C position in file is at the end of vector IVEC - 1 689C 690 IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH + 691 & ( IVEC - 1 ) * MY_VEC1_IOFF 692 IOFFSET_INT_IN1 = 1 + NUM_BLK + 693 & ( IVEC - 1 ) * MY_ACT_BLK1 694C 695CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN1 in P1..._3 100', 696CSK & IOFFSET_IN_LUIN1 697CSK WRITE(LUWRT,*) 'This is my INT_OFFSET for LUIN1 in 698CSK & P1..._3 100',IOFFSET_INT_IN1 699CSK WRITE(LUWRT,*) 'THIS IS MY LU1LIST inside P1_B_PAR_RL_3 100' 700CSK CALL IWRTMAMN(LUIN1LIST,1,IALL_LU1,1,IALL_LU1,LUWRT) 701C 702 CALL RDVEC_BATCH_DRV4(LUIN1,VEC1,LBATCH(ISBATCH), 703 & IBATCH(1,I1BATCH(ISBATCH)), 704 & IOFFSET_IN_LUIN1,IOFFSET_INT_IN1, 705 & LUIN1LIST,NUM_ACTIVE_BATCH) 706C 707CSK WRITE(LUWRT,*) 'initial VEC1 on LUIN1 in P1..._3 100' 708CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 709CSK & LUWRT) 710C 711 SCRRED(IVEC) = SCRRED(IVEC) - 712 & DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1) 713C 714 100 CONTINUE 715C 716C keep track of correct offset 717 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 718 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 719C 720 END DO 721C 722C communicate SCRRED to get full OVERLAP matrix 723C 724 CALL REDVEC_REL(SCRRED,SUBSPH,NROOT_INFO,2,MPI_SUM, 725 & MPI_COMM_WORLD,-1) 726C 727C 728C 729CSK WRITE(LUWRT,*) ' THIS IS MY SUBSPH in P1..._3' 730CSK CALL WRTMATMN(SUBSPH,1,NVEC+IADD-1,1,NVEC+IADD-1,LUWRT) 731C 732C zero scratch offsets 733C 734 NUM_BLK = 0 735 IOFFSET_SCRATCH = 0 736 REDSCRVAR = 0.0D0 737C 738C 739CSK WRITE(LUWRT,*) ' THIS IS MY LUIN2LIST in P1..._3' 740CSK CALL IWRTMAMN(LUIN2LIST,1,IALL_LU3,1,IALL_LU3,LUWRT) 741C 742 DO ISBATCH = 1, NBATCH 743C 744C offset for batch ISBATCH w.r.t JOFF 745C 746 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 747C 748C set new offset 749C 750C position in file is at the end of vector IVEC - 1 751C 752 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH 753 IOFFSET_INT_IN2 = 1 + NUM_BLK 754C 755C 756CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN2', 757CSK & IOFFSET_IN_LUIN2 758C 759 CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH), 760 & IBATCH(1,I1BATCH(ISBATCH)), 761 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 762 & LUIN2LIST,NUM_ACTIVE_BATCH) 763C 764CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN2' 765CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 766CSK & LUWRT) 767 768C 769 DO 200 IVEC = 1, NROOT_INFO 770C 771 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 772C 773C set new offset 774C 775C position in file is at the end of vector IVEC - 1 776C 777 IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH + 778 & ( IVEC - 1 ) * MY_VEC1_IOFF 779 IOFFSET_INT_IN1 = 1 + NUM_BLK + 780 & ( IVEC - 1 ) * MY_ACT_BLK1 781C 782CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN1 in P1..._3 200', 783CSK & IOFFSET_IN_LUIN1 784C 785 CALL RDVEC_BATCH_DRV4(LUIN1,VEC1,LBATCH(ISBATCH), 786 & IBATCH(1,I1BATCH(ISBATCH)), 787 & IOFFSET_IN_LUIN1,IOFFSET_INT_IN1, 788 & LUIN1LIST,NUM_ACTIVE_BATCH) 789C 790CSK WRITE(LUWRT,*) 'initial VEC1 on LUIN1 in P1..._3 200' 791CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 792CSK & LUWRT) 793C 794 FACTOR = SUBSPH(IVEC) 795C 796C VEC2 == VEC2 + VEC1 * FACTOR 797C 798 CALL DAXPY(LEBATCH(ISBATCH), FACTOR, VEC1, 1, VEC2, 1) 799C 800 200 CONTINUE 801C 802CSK WRITE(LUWRT,*) 'final VEC2 to write on LUOUT2 ' 803CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 804CSK & LUWRT) 805 IF( STORE_F )THEN 806C 807C new offset for writing on LUOUT2 --> ILU5 808C 809 IOFFSET_LUOUT2 = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH 810 IOFFSET_INT_LUOUT2 = 1 + NUM_BLK 811C 812 CALL WTVEC_BATCH_DRV4(LUOUT2,VEC2,LBATCH(ISBATCH), 813 & IBATCH(1,I1BATCH(ISBATCH)), 814 & IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2, 815 & LUOUT2LIST,NUM_ACTIVE_BATCH) 816 ELSE 817C 818 REDSCRVAR = REDSCRVAR 819 & + DDOT( LEBATCH(ISBATCH), VEC2, 1, VEC2, 1) 820C 821C new offset for writing on LUIN2 822C 823 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH 824 IOFFSET_INT_IN2 = 1 + NUM_BLK 825C 826 CALL WTVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH), 827 & IBATCH(1,I1BATCH(ISBATCH)), 828 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 829 & LUIN2LIST,NUM_ACTIVE_BATCH) 830 END IF 831C 832C keep track of correct offset 833 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 834 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 835C 836 END DO 837C 838 IF( STORE_F )THEN 839C 840C zero scratch offsets 841C 842 NUM_BLK = 0 843 IOFFSET_SCRATCH = 0 844 CALL DZERO(SCRRED,NVEC+IADD) 845 CALL DZERO(SUBSPH,NVEC+IADD) 846C 847 DO ISBATCH = 1, NBATCH 848C 849C offset for batch ISBATCH w.r.t JOFF 850C 851 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 852C 853C set new offset 854C 855C position in file is at the end of vector IVEC - 1 856C 857 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH 858 IOFFSET_INT_IN2 = 1 + NUM_BLK 859C 860C 861CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN2 in P1..._3 100', 862CSK & IOFFSET_IN_LUIN2 863C 864 CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH), 865 & IBATCH(1,I1BATCH(ISBATCH)), 866 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 867 & LUIN2LIST,NUM_ACTIVE_BATCH) 868C 869CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN2 in P1..._3 100' 870CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 871CSK & LUWRT) 872 873C 874 DO 300 IVEC = 1, (NVEC + IADD - 1 -NROOT_INFO) 875C 876 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 877C 878C set new offset 879C 880C position in file is at the end of vector IVEC - 1 881C 882 IOFFSET_LUOUT = MY_IOFF_LUOUT + IOFFSET_SCRATCH + 883 & ( IVEC - 1 ) * MY_VEC1_IOFF 884 IOFFSET_INT_LUOUT = 1 + NUM_BLK + 885 & ( IVEC - 1 ) * MY_ACT_BLK1 886C 887CSK WRITE(LUWRT,*) 'This is my OFFSET for LUOUT in P1..._3 100', 888CSK & IOFFSET_IN_LUOUT 889CSK WRITE(LUWRT,*) 'This is my INT_OFFSET for LUOUT in 890CSK & P1..._3 100',IOFFSET_INT_LUOUT 891C 892 CALL RDVEC_BATCH_DRV4(LUOUT,VEC1,LBATCH(ISBATCH), 893 & IBATCH(1,I1BATCH(ISBATCH)), 894 & IOFFSET_LUOUT,IOFFSET_INT_LUOUT, 895 & LUOUTLIST,NUM_ACTIVE_BATCH) 896C 897CSK WRITE(LUWRT,*) 'initial VEC1 on LUOUT in P1..._3 100' 898CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 899CSK & LUWRT) 900C 901 SCRRED(IVEC) = SCRRED(IVEC) - 902 & DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1) 903C 904 300 CONTINUE 905C 906C keep track of correct offset 907 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 908 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 909C 910 END DO 911C 912C communicate SCRRED to get full OVERLAP matrix 913C 914 CALL REDVEC_REL(SCRRED,SUBSPH,NVEC+IADD-1-NROOT_INFO,2,MPI_SUM, 915 & MPI_COMM_WORLD,-1) 916C 917C zero scratch offsets 918C 919 NUM_BLK = 0 920 IOFFSET_SCRATCH = 0 921 REDSCRVAR = 0.0D0 922C 923 DO ISBATCH = 1, NBATCH 924C 925C offset for batch ISBATCH w.r.t JOFF 926C 927 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 928C 929C set new offset 930C 931C position in file is at the end of vector IVEC - 1 932C 933 IOFFSET_LUOUT2 = MY_IOFF_LUOUT2 + IOFFSET_SCRATCH 934 IOFFSET_INT_LUOUT2 = 1 + NUM_BLK 935C 936CSK WRITE(LUWRT,*) 'This is my OFFSET for LUOUT2', 937CSK & IOFFSET_IN_LUOUT2 938C 939 CALL RDVEC_BATCH_DRV4(LUOUT2,VEC2,LBATCH(ISBATCH), 940 & IBATCH(1,I1BATCH(ISBATCH)), 941 & IOFFSET_LUOUT2,IOFFSET_INT_LUOUT2, 942 & LUOUT2LIST,NUM_ACTIVE_BATCH) 943C 944CSK WRITE(LUWRT,*) 'initial VEC2 on LUOUT2' 945CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 946CSK & LUWRT) 947C 948 DO 400 IVEC = 1, (NVEC+IADD-1-NROOT_INFO) 949C 950 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 951C 952C set new offset 953C 954C position in file is at the end of vector IVEC - 1 955C 956 IOFFSET_LUOUT = MY_IOFF_LUOUT + IOFFSET_SCRATCH + 957 & ( IVEC - 1 ) * MY_VEC1_IOFF 958 IOFFSET_INT_LUOUT = 1 + NUM_BLK + 959 & ( IVEC - 1 ) * MY_ACT_BLK1 960C 961CSK WRITE(LUWRT,*) 'This is my OFFSET for LUOUT in P1..._3 100', 962CSK & IOFFSET_IN_LUOUT 963CSK WRITE(LUWRT,*) 'This is my INT_OFFSET for LUOUT in 964CSK & P1..._3 100',IOFFSET_INT_LUOUT 965C 966 CALL RDVEC_BATCH_DRV4(LUOUT,VEC1,LBATCH(ISBATCH), 967 & IBATCH(1,I1BATCH(ISBATCH)), 968 & IOFFSET_LUOUT,IOFFSET_INT_LUOUT, 969 & LUOUTLIST,NUM_ACTIVE_BATCH) 970CSK WRITE(LUWRT,*) 'initial VEC1 on LUIN1 in P1..._3 200' 971CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 972CSK & LUWRT) 973C 974 FACTOR = SUBSPH(IVEC) 975C 976C VEC2 == VEC2 + VEC1 * FACTOR 977C 978 CALL DAXPY(LEBATCH(ISBATCH), FACTOR, VEC1, 1, VEC2, 1) 979C 980 400 CONTINUE 981C 982CSK WRITE(LUWRT,*) 'final VEC2 to write on LUIN2 ' 983CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 984CSK & LUWRT) 985 REDSCRVAR = REDSCRVAR 986 & + DDOT( LEBATCH(ISBATCH), VEC2, 1, VEC2, 1) 987C 988C new offset for writing on LUIN2 989C 990 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH 991 IOFFSET_INT_IN2 = 1 + NUM_BLK 992C 993 CALL WTVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH), 994 & IBATCH(1,I1BATCH(ISBATCH)), 995 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 996 & LUIN2LIST,NUM_ACTIVE_BATCH) 997C 998C keep track of correct offset 999 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 1000 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 1001C 1002 END DO 1003C 1004 END IF 1005C ^ NVEC + IADD - 1 - NROOT > 0 ( STORE_F == .TRUE. ) 1006C 1007 SCALEVEC = 0.0D0 1008C 1009C communicate REDSCRVAR to get full scale factor 1010C 1011 CALL REDVEC_REL(REDSCRVAR,SCALEVEC,1,2,MPI_SUM,MPI_COMM_WORLD,-1) 1012C 1013C 1.4 normalizing the new vector 1014C 1015C zero scratch offsets 1016C 1017 NUM_BLK = 0 1018 IOFFSET_SCRATCH = 0 1019C 1020C 1021 FACTOR = 1.0D0 / SQRT( SCALEVEC ) 1022csk WRITE(LUWRT,*) 'THIS IS MY SCALING FACTOR',FACTOR 1023C 1024 DO ISBATCH = 1, NBATCH 1025C 1026C offset for batch ISBATCH w.r.t JOFF 1027C 1028 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 1029C 1030C set new offset 1031C 1032C position in file is at the end of vector IVEC - 1 1033C 1034 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH 1035 IOFFSET_INT_IN2 = 1 + NUM_BLK 1036C 1037C 1038CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN2', 1039CSK & IOFFSET_IN_LUIN2 1040C 1041 CALL RDVEC_BATCH_DRV4(LUIN2,VEC2,LBATCH(ISBATCH), 1042 & IBATCH(1,I1BATCH(ISBATCH)), 1043 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 1044 & LUIN2LIST,NUM_ACTIVE_BATCH) 1045C 1046CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN2' 1047CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1048CSK & LUWRT) 1049 1050C 1051 CALL DSCAL(LEBATCH(ISBATCH),FACTOR,VEC2,1) 1052 1053 IF(THR_CVEC .GT. 0.0D0 .AND. IROUND .EQ. 1) THEN 1054 DO I = 1,LEBATCH(ISBATCH) 1055 IF(ABS(VEC2(I)) .LT. THR_CVEC) THEN 1056 IF(VEC2(I) .EQ. 0.0D0) THEN 1057 I_ZERO = I_ZERO + 1 1058 ELSE 1059 VEC2(I) = 0.0D0 1060 I_REMOVED = I_REMOVED + 1 1061 END IF 1062 END IF 1063 END DO 1064 END IF 1065C 1066C set new offset 1067C 1068C position in file is at the end of vector NVEC + IADD - 1 - NROOT 1069C 1070 IOFFSET_LUOUT = MY_IOFF_LUOUT + IOFFSET_SCRATCH + 1071 & ( NVEC + IADD - 1 - NROOT_INFO ) * MY_VEC1_IOFF 1072C 1073 IOFFSET_INT_LUOUT = 1 + NUM_BLK + 1074 & ( NVEC + IADD - 1 - NROOT_INFO ) * MY_ACT_BLK1 1075C 1076csk WRITE(LUWRT,*) 'This is my OFFSET for LUOUT, IOFFSET_INT_LUOUT', 1077csk & IOFFSET_LUOUT, IOFFSET_INT_LUOUT 1078C 1079csk WRITE(LUWRT,*) 'absolute final new vec on VEC2 to LUOUT' 1080csk CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1081csk & LUWRT) 1082C 1083 IDEBUGPRNT = 0 1084C 1085 CALL WTVEC_BATCH_DRV4SP(LUOUT,VEC2,LBATCH(ISBATCH), 1086 & IBATCH(1,I1BATCH(ISBATCH)), 1087 & IOFFSET_LUOUT,IOFFSET_INT_LUOUT, 1088 & LUOUTLIST,NUM_ACTIVE_BATCH,IDEBUGPRNT) 1089C 1090C 1091C keep track of correct offset 1092 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 1093 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 1094C 1095 END DO 1096 ! need to do an MPI_ALLREDUCE to check whether any slave has removed an element in the trial vector 1097 IF(THR_CVEC .GT. 0.0D0 .AND. IROUND .EQ. 1) THEN 1098 I_REMOVED_total = 0 1099 I_ZERO_total = 0 1100 CAll redvec_rel(I_REMOVED,I_REMOVED_total,1,1, 1101 & MPI_SUM,MPI_COMM_WORLD,-1) 1102 CAll redvec_rel(I_ZERO,I_ZERO_total,1,1, 1103 & MPI_SUM,MPI_COMM_WORLD,-1) 1104 IF(I_REMOVED_total .GT. 0) THEN 1105!#define LUCI_DEBUG 1106#ifdef LUCI_DEBUG 1107 WRITE(luwrt,'(/A,I12,A,I4,A,1P,D10.2,I14)') 1108 & 'info: Removed',I_REMOVED_total, 1109 & ' elements in new CI trial vector no.',IADD, 1110 & '; threshold & zeroes',THR_CVEC, I_ZERO_total 1111#undef LUCI_DEBUG 1112#endif 1113 GO TO 10 1114 END IF 1115 END IF 1116C 1117 END 1118*********************************************************************** 1119* * 1120* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 1121* parallelization by Stefan Knecht * 1122* * 1123*********************************************************************** 1124 SUBROUTINE INPROD_B_PAR_RL_LUCI2(LUIN1,LUIN2,LUIN3,VEC1,VEC2, 1125 & SUBSPH,NBATCH, 1126 & LBATCH,LEBATCH,I1BATCH,IBATCH, 1127 & MY_IOFF_LUIN1,MY_IOFF_LUIN2, 1128 & MY_IOFF_LUIN3,LUIN1LIST, 1129 & LUIN2LIST,LUIN3LIST,IVEC, 1130 & NVEC,IMUSTRED,ISTRED) 1131C 1132C Written by S. Knecht - March 14 2008 1133C 1134C********************************************************************** 1135C 1136C calculating dot product between two vectors on file LUIN1 resp. 1137C LUIN2 1138C 1139C NOTE: IVEC = IVEC 1140C NVEC = NVEC 1141C 1142C active blocks on the MPI-files are flagged by a nonzero length 1143C 1144C Last revision: S. Knecht - June 2007 1145C 1146************************************************************************ 1147 IMPLICIT DOUBLE PRECISION ( A-H,O-Z) 1148#include "maxorb.h" 1149#include "infpar.h" 1150#include "mpif.h" 1151 integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE) 1152#include "parluci.h" 1153 DIMENSION VEC1(*), VEC2(*), SUBSPH(*) 1154 DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*) 1155 DIMENSION LUIN1LIST(*), LUIN2LIST(*), LUIN3LIST(*) 1156 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN1 1157 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2 1158 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN3 1159 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN1, IOFFSET_IN_LUIN2 1160 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN3 1161 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH 1162 INTEGER NUM_BLK 1163C 1164C initialize scratch offsets 1165 NUM_BLK = 0 1166 IOFFSET_SCRATCH = 0 1167 IOFFSET_IN_LUIN1 = 0 1168 IOFFSET_IN_LUIN2 = 0 1169 IOFFSET_IN_LUIN3 = 0 1170 IOFFSET_INT_IN1 = 0 1171 IOFFSET_INT_IN2 = 0 1172 IOFFSET_INT_IN3 = 0 1173C 1174 DO ISBATCH = 1, NBATCH 1175C 1176C offset for batch ISBATCH w.r.t JOFF 1177C 1178 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 1179C 1180C set new offset 1181C position in file is at the end of vector JOFF - 1 1182C 1183 IOFFSET_IN_LUIN1 = MY_IOFF_LUIN1 + IOFFSET_SCRATCH + 1184 & ( JVEC_SF ) * MY_VEC1_IOFF 1185 IOFFSET_INT_IN1 = 1 + NUM_BLK + 1186 & ( JVEC_SF ) * MY_ACT_BLK1 1187C 1188csk WRITE(LUWRT,*) 'This is my OFFSET for LUIN1', 1189csk & IOFFSET_IN_LUIN1 1190C 1191 CALL RDVEC_BATCH_DRV4(LUIN1,VEC2,LBATCH(ISBATCH), 1192 & IBATCH(1,I1BATCH(ISBATCH)), 1193 & IOFFSET_IN_LUIN1,IOFFSET_INT_IN1, 1194 & LUIN1LIST,NUM_ACTIVE_BATCH) 1195C 1196csk WRITE(LUWRT,*) 'initial VEC2 on LUIN1' 1197csk CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH),LUWRT) 1198C 1199 DO 100 JVEC = 1, NROOT_INFO 1200C 1201C set new offset and zero read-in vector 1202C 1203 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 1204C 1205 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH + 1206 & ( JVEC - 1 ) * MY_VEC1_IOFF 1207 IOFFSET_INT_IN2 = 1 + NUM_BLK + 1208 & ( JVEC - 1 ) * MY_ACT_BLK1 1209C 1210CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN2', 1211CSK & IOFFSET_IN_LUIN2 1212C 1213C 1214C read in batch ISBATCH from LUIN1 to VEC1 1215C 1216 CALL RDVEC_BATCH_DRV4(LUIN2,VEC1,LBATCH(ISBATCH), 1217 & IBATCH(1,I1BATCH(ISBATCH)), 1218 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 1219 & LUIN2LIST,NUM_ACTIVE_BATCH) 1220C 1221csk WRITE(LUWRT,*) 'initial VEC1 on LUIN2' 1222csk CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1, 1223csk & LEBATCH(ISBATCH),LUWRT) 1224C 1225C 1226C IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC 1227C SUBSPH(IJ) == VEC1 * VEC2 1228C 1229 IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC 1230csk WRITE(LUWRT,*) ' IJ in loop 1', IJ 1231C 1232 SUBSPH(IJ) = SUBSPH(IJ) + 1233 & DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1) 1234C 1235C keep track of memory offset and the 'reduction' counter 1236C 1237 IF( ISBATCH .eq. 1 ) THEN 1238 IMUSTRED = IMUSTRED + 1 1239 IF( IVEC .eq. 1 .and. JVEC .eq. 1 ) ISTRED = IJ 1240 END IF 1241C 1242C 1243 100 CONTINUE 1244C 1245 JJVEC = 0 1246C 1247 DO 200 JVEC = NROOT_INFO+1 , NVEC+IVEC 1248C 1249C set new offset and zero read-in vector 1250C 1251 JJVEC = JJVEC + 1 1252C 1253 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 1254C 1255 IOFFSET_IN_LUIN3 = MY_IOFF_LUIN3 + IOFFSET_SCRATCH + 1256 & ( JJVEC - 1 ) * MY_VEC1_IOFF 1257 IOFFSET_INT_IN3 = 1 + NUM_BLK + 1258 & ( JJVEC - 1 ) * MY_ACT_BLK1 1259C 1260csk WRITE(LUWRT,*) 'This is my OFFSET for LUIN3', 1261csk & IOFFSET_IN_LUIN3, IOFFSET_INT_IN3 1262C 1263C 1264C read in batch ISBATCH from LUIN3 to VEC1 1265C 1266 CALL RDVEC_BATCH_DRV4(LUIN3,VEC1,LBATCH(ISBATCH), 1267 & IBATCH(1,I1BATCH(ISBATCH)), 1268 & IOFFSET_IN_LUIN3,IOFFSET_INT_IN3, 1269 & LUIN3LIST,NUM_ACTIVE_BATCH) 1270C 1271csk WRITE(LUWRT,*) 'initial VEC1 on LUIN3' 1272csk CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1, 1273csk & LEBATCH(ISBATCH),LUWRT) 1274C 1275C 1276C IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC 1277C SUBSPH(IJ) == VEC1 * VEC2 1278C 1279 IJ = (IVEC+NVEC)*(IVEC+NVEC-1)/2 + JVEC 1280csk WRITE(LUWRT,*) ' IJ in loop 2', IJ 1281C 1282 SUBSPH(IJ) = SUBSPH(IJ) + 1283 & DDOT(LEBATCH(ISBATCH),VEC1,1,VEC2,1) 1284C 1285C keep track of the 'reduction' counter 1286C 1287 IF( ISBATCH .eq. 1 ) THEN 1288 IMUSTRED = IMUSTRED + 1 1289 END IF 1290C 1291C 1292 200 CONTINUE 1293C 1294C keep track of correct offset 1295 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 1296 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 1297C 1298 END DO 1299C 1300 END 1301*********************************************************************** 1302* * 1303* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 1304* parallelization by Stefan Knecht * 1305* * 1306*********************************************************************** 1307 SUBROUTINE P3_B_PAR_RL_LUCI1(VEC1,VEC2,SUBSPH, 1308 & LUINLIST,LUIN2LIST,LUOUTLIST, 1309 & NBATCH,LBATCH,LEBATCH,I1BATCH,IBATCH, 1310 & MY_IOFF_LUIN,MY_IOFF_LUIN2, 1311 & MY_IOFF_LUOUT,NVEC,NVEC2,IROOT, 1312 & LUIN,LUIN2,LUOUT) 1313C 1314C Written by S. Knecht - March 14 2008 1315C 1316C********************************************************************** 1317C 1318C calculating scaled vecsum between two vectors on file LUIN resp. 1319C LUIN2; saving on LUOUT 1320C 1321C NOTE: IROOT = IROOT 1322C NVEC = NVEC 1323C NVEC2 = NROOT 1324C 1325C active blocks on the MPI-files are flagged by a nonzero list entry 1326C 1327C Last revision: S. Knecht - March 2008 1328C 1329************************************************************************ 1330 IMPLICIT DOUBLE PRECISION ( A-H,O-Z) 1331#include "maxorb.h" 1332#include "infpar.h" 1333#include "mpif.h" 1334 integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE) 1335#include "parluci.h" 1336 DIMENSION VEC1(*), VEC2(*), SUBSPH(*) 1337 DIMENSION LBATCH(*), LEBATCH(*), I1BATCH(*), IBATCH(8,*) 1338 DIMENSION LUINLIST(*), LUOUTLIST(*), LUIN2LIST(*) 1339 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN 1340 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUIN2 1341 INTEGER(KIND=MPI_OFFSET_KIND) MY_IOFF_LUOUT 1342 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN 1343 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_IN_LUIN2 1344 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_LUOUT 1345 INTEGER(KIND=MPI_OFFSET_KIND) IOFFSET_SCRATCH 1346 INTEGER NUM_BLK 1347C 1348C initialize scratch offsets 1349 NUM_BLK = 0 1350 IOFFSET_SCRATCH = 0 1351 IOFFSET_IN_LUIN = 0 1352 IOFFSET_IN_LUIN2 = 0 1353 IOFFSET_LUOUT = 0 1354 IOFFSET_INT_IN = 0 1355 IOFFSET_INT_IN2 = 0 1356 IOFFSET_INT_LUOUT = 0 1357C 1358C 1359 DO ISBATCH = 1, NBATCH 1360C 1361C offset for batch ISBATCH w.r.t JOFF 1362C 1363 CALL DZERO(VEC2,LEBATCH(ISBATCH)) 1364C 1365 DO 100 IVEC = 1, NVEC2 1366C 1367 IJ = (( IROOT - 1 ) * NVEC ) + 1 + ( IVEC - 1) 1368C 1369 FACTOR = SUBSPH( IJ ) 1370C 1371C set new offset 1372C 1373C position in file is at the end of vector IVEC - 1 1374C 1375 IOFFSET_IN_LUIN = MY_IOFF_LUIN + IOFFSET_SCRATCH + 1376 & ( IVEC - 1 ) * MY_VEC1_IOFF 1377 IOFFSET_INT_IN = 1 + NUM_BLK + 1378 & ( IVEC - 1 ) * MY_ACT_BLK1 1379C 1380CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN', 1381CSK & IOFFSET_IN_LUIN 1382C 1383 IF( IVEC .eq. 1 ) THEN 1384C 1385 CALL RDVEC_BATCH_DRV4(LUIN,VEC2,LBATCH(ISBATCH), 1386 & IBATCH(1,I1BATCH(ISBATCH)), 1387 & IOFFSET_IN_LUIN,IOFFSET_INT_IN, 1388 & LUINLIST,NUM_ACTIVE_BATCH) 1389C 1390CSK WRITE(LUWRT,*) 'initial VEC2 on LUIN' 1391CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1392CSK & LUWRT) 1393CSK WRITE(LUWRT,*) 'scaling factor for this vector',FACTOR 1394C 1395 CALL DSCAL(LEBATCH(ISBATCH),FACTOR,VEC2,1) 1396CSK WRITE(LUWRT,*) ' VEC2 after first scaling ' 1397CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1398CSK & LUWRT) 1399C 1400 ELSE 1401C 1402 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 1403C 1404 CALL RDVEC_BATCH_DRV4(LUIN,VEC1,LBATCH(ISBATCH), 1405 & IBATCH(1,I1BATCH(ISBATCH)), 1406 & IOFFSET_IN_LUIN,IOFFSET_INT_IN, 1407 & LUINLIST,NUM_ACTIVE_BATCH) 1408C 1409CSK WRITE(LUWRT,*) 'initial VEC1 on LUIN' 1410CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1411CSK & LUWRT) 1412C 1413C VEC2 == VEC2 + VEC1 * FACTOR 1414C 1415 CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1) 1416C 1417CSK WRITE(LUWRT,*) 'final VEC2 after DAXPY in P3...1' 1418CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1419CSK & LUWRT) 1420C 1421 END IF 1422C ^ IVEC == 1 ? 1423C 1424 100 CONTINUE 1425C 1426 DO 200 IVEC = 1, (NVEC - NVEC2) 1427C 1428 IJ = (( IROOT - 1 ) * NVEC ) + 1 + ( IVEC - 1) + NVEC2 1429C 1430 FACTOR = SUBSPH( IJ ) 1431C 1432C set new offset 1433C 1434C position in file is at the end of vector IVEC - 1 1435C 1436 IOFFSET_IN_LUIN2 = MY_IOFF_LUIN2 + IOFFSET_SCRATCH + 1437 & ( IVEC - 1 ) * MY_VEC1_IOFF 1438 IOFFSET_INT_IN2 = 1 + NUM_BLK + 1439 & ( IVEC - 1 ) * MY_ACT_BLK1 1440C 1441CSK WRITE(LUWRT,*) 'This is my OFFSET for LUIN', 1442CSK & IOFFSET_IN_LUIN 1443C 1444 CALL DZERO(VEC1,LEBATCH(ISBATCH)) 1445C 1446 CALL RDVEC_BATCH_DRV4(LUIN2,VEC1,LBATCH(ISBATCH), 1447 & IBATCH(1,I1BATCH(ISBATCH)), 1448 & IOFFSET_IN_LUIN2,IOFFSET_INT_IN2, 1449 & LUIN2LIST,NUM_ACTIVE_BATCH) 1450C 1451CSK WRITE(LUWRT,*) 'initial VEC1 on LUIN2' 1452CSK CALL WRTMATMN(VEC1,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1453CSK & LUWRT) 1454CSK WRITE(LUWRT,*) 'scaling factor for this vector',FACTOR 1455C 1456C VEC2 == VEC2 + VEC1 * FACTOR 1457C 1458 CALL DAXPY(LEBATCH(ISBATCH),FACTOR,VEC1,1,VEC2,1) 1459C 1460CSK WRITE(LUWRT,*) 'final VEC2 after 2nd DAXPY in P3...1' 1461CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1462CSK & LUWRT) 1463C 1464 200 CONTINUE 1465C 1466CSK WRITE(LUWRT,*) 'final VEC2 to write on position',IROOT -1 1467CSK CALL WRTMATMN(VEC2,1,LEBATCH(ISBATCH),1,LEBATCH(ISBATCH), 1468CSK & LUWRT) 1469C 1470C new offset for writing on LUOUT 1471C 1472 IOFFSET_LUOUT = MY_IOFF_LUOUT + IOFFSET_SCRATCH + 1473 & ( IROOT - 1 ) * MY_VEC1_IOFF 1474C 1475 IOFFSET_INT_LUOUT = 1 + NUM_BLK + 1476 & ( IROOT - 1 ) * MY_ACT_BLK1 1477C 1478 CALL WTVEC_BATCH_DRV4(LUOUT,VEC2,LBATCH(ISBATCH), 1479 & IBATCH(1,I1BATCH(ISBATCH)), 1480 & IOFFSET_LUOUT,IOFFSET_INT_LUOUT, 1481 & LUOUTLIST,NUM_ACTIVE_BATCH) 1482C 1483C keep track of correct offset 1484 IOFFSET_SCRATCH = IOFFSET_SCRATCH + LEBATCH(ISBATCH) 1485 NUM_BLK = NUM_BLK + NUM_ACTIVE_BATCH 1486C 1487 END DO 1488C 1489 END 1490*********************************************************************** 1491* * 1492* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 1493* parallelization by Stefan Knecht * 1494* * 1495*********************************************************************** 1496 SUBROUTINE UPDATE_LUC_LIST2(ISCLFAC_GROUP,LUCLIST,RCCTOS,CB, 1497 & NPARBLOCK,IBLOCKL,IGROUPLIST, 1498 & IPROCLIST,IRILP,BLOCKTIME) 1499C 1500C make an update of of grouplist for c-vector file based on 1501C different list gathered from MPI_COMM_WORLD 1502C 1503C 1504C Written by S. Knecht - March 08 2008 1505C 1506C OUTPUT: ISCLFAC_GROUP and updated file ILUC 1507C 1508C********************************************************************** 1509#include "implicit.h" 1510 INTEGER*8 KSCALLOC2, KSCALLOC3 1511! for addressing of WORK 1512#include "maxorb.h" 1513#include "infpar.h" 1514#include "mpif.h" 1515 integer(kind=MPI_INTEGER_KIND) :: mynew_comm_mpi 1516 integer(kind=MPI_INTEGER_KIND) :: ierr_mpi 1517 integer(kind=MPI_INTEGER_KIND) :: my_STATUS(MPI_STATUS_SIZE) 1518#include "parluci.h" 1519#include "wrkspc.inc" 1520 DIMENSION ISCLFAC_GROUP(*), LUCLIST(*) 1521 DIMENSION CB(*) 1522 DIMENSION NPARBLOCK(*), IBLOCKL(*) 1523 DIMENSION IGROUPLIST(*), IPROCLIST(*) 1524 CHARACTER*12 WALLTID3, SECTID 1525 INTEGER RCCTOS(*) 1526C some scratch 1527 INTEGER NZERO 1528C 1529 NZERO = 0 1530C 1531C set mark for local memory 1532C 1533 IDUM = 0 1534 CALL MEMMAN(KDUM, IDUM, 'MARK ',IDUM,'UPLIST') 1535C 1536 CALL MEMMAN(KSCALLOC2,NUM_BLOCKS2,'ADDL ',1,'ICLLC2') 1537 CALL MEMMAN(KSCALLOC3,NUM_BLOCKS2,'ADDL ',1,'ICLLC3') 1538C 1539C fill complete local iscalfac arrays with zero's 1540 CALL IZERO(WORK(KSCALLOC2), NUM_BLOCKS2) 1541 CALL IZERO(WORK(KSCALLOC3), NUM_BLOCKS2) 1542 CALL IZERO(ISCLFAC_GROUP , NUM_BLOCKS2) 1543C 1544#ifdef LUCI_DEBUG 1545 WRITE(luwrt,*) ' start of subroutine UPDATE_LUC_LIST2 speaking' 1546 WRITE(luwrt,*) 'LUCLIST:' 1547 CALL IWRTMAMN(LUCLIST,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt) 1548 WRITE(luwrt,*) 'ISCLFAC_GROUP:' 1549 CALL IWRTMAMN(ISCLFAC_GROUP,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt) 1550 WRITE(luwrt,*) 'WORK(KSCALLOC2):' 1551 CALL IWRTMAMN(WORK(KSCALLOC2),1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt) 1552 WRITE(luwrt,*) 'WORK(KSCALLOC3):' 1553 CALL IWRTMAMN(WORK(KSCALLOC3),1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt) 1554#endif 1555C 1556 starttimer = MPI_WTIME() 1557C 1558C "mpi_allsum" local LUCLIST which then on all 1559C nodes will contain the number of non-zero C-blocks in 1560C the complete CI-vector 1561C 1562 CALL REDVEC_REL(LUCLIST,WORK(KSCALLOC2),NUM_BLOCKS2,1, 1563 & MPI_SUM,MPI_COMM_WORLD,-1) 1564C 1565C find all c-blocks connecting to all sigma-blocks on each cpu 1566C 1567 CALL ICOPY(NUM_BLOCKS2,RCCTOS,1,WORK(KSCALLOC3),1) 1568C 1569C case 1: number of CPUs in new group not equal to total number 1570C 1571C case 2: number of CPUs in new group equal to total number 1572C 1573C 1574#ifdef LUCI_DEBUG 1575 write(luwrt,*) 'NEWCOMM_PROC,LUCI_NMPROC',NEWCOMM_PROC,LUCI_NMPROC 1576 write(luwrt,*) 'ILUC',ILUC 1577 write(luwrt,*) 'MYNEW_COMM',ILUC 1578 write(luwrt,*) 'ICOMM',ICOMM 1579 write(luwrt,*) 'ICOMM_id, ICOMM_SIZE',ICOMM_id,ICOMM_SIZE 1580#endif 1581* 1582 IF( NEWCOMM_PROC .ne. LUCI_NMPROC ) THEN 1583* 1584 CALL REDVEC_REL(WORK(KSCALLOC3),ISCLFAC_GROUP,NUM_BLOCKS2,1, 1585 & MPI_SUM,MYNEW_COMM,0) 1586* 1587* all local node-masters call this routine! 1588* 1589 IF( MYNEW_ID .eq. 0 ) THEN 1590 CALL COPVCD_PAR_BDRIV5_REL(ILUC,ILUC,CB,NPARBLOCK, 1591 & WORK(KSCALLOC2),ISCLFAC_GROUP, 1592 & IBLOCKL,NUM_BLOCKS,ICOMM, 1593 & IGROUPLIST,IPROCLIST,IRILP) 1594C COPVCD_PAR_BDRIV5_REL(LUIN,LUOUT,SEGMNT,IBLOCKD, 1595C & ISCALFAC,ISCALFAC_GROUP, 1596C & IBLOCKL,NBLOCK,JCOMM, 1597C & IGROUPLIST,IPROCLIST,IRILP) 1598C 1599 1600 END IF 1601 mynew_comm_mpi = mynew_comm 1602 CALL MPI_BCAST(ISCLFAC_GROUP,NUM_BLOCKS2, 1603 & my_MPI_INTEGER,0,MYNEW_COMM_MPI,ierr_mpi) 1604* 1605 ELSE 1606* 1607C 1608 CALL UPDATE_GEN_LIST(WORK(KSCALLOC3),WORK(KSCALLOC2), 1609 & NUM_BLOCKS2) 1610C 1611C to be consistent with output of case 1 1612C 1613 CALL IZERO(ISCLFAC_GROUP,NUM_BLOCKS2) 1614 CALL ICOPY(NUM_BLOCKS2,WORK(KSCALLOC3),1,ISCLFAC_GROUP,1) 1615C 1616 END IF 1617C ^ NEWCOMM_PROC == LUCI_NMPROC ? 1618C 1619#ifdef LUCI_DEBUG 1620 WRITE(luwrt,*) ' subroutine UPDATE_LUC_LIST2 speaking' 1621 WRITE(luwrt,*) 'LUCLIST:' 1622 CALL IWRTMAMN(LUCLIST,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt) 1623 WRITE(luwrt,*) 'ISCLFAC_GROUP:' 1624 CALL IWRTMAMN(ISCLFAC_GROUP,1,NUM_BLOCKS2,1,NUM_BLOCKS2,luwrt) 1625#endif 1626C 1627C final timing for block distribution 1628 blocktime = blocktime + MPI_WTIME() - starttimer 1629C 1630C flush local memory 1631C 1632 IDUM = 0 1633 CALL MEMMAN(KDUM ,IDUM,'FLUSM ',2,'UPLIST') 1634C 1635 END 1636*********************************************************************** 1637* * 1638* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 1639* parallelization by Stefan Knecht * 1640* * 1641*********************************************************************** 1642 SUBROUTINE BLOCK_DISTR_DRV(NBLOCK,IBLOCKL,NBLOCKD,IBLOCKS_FNODE, 1643 & SCALFAC,NVAR,IPROCLIST) 1644#include "implicit.h" 1645 INTEGER*8 KICCTOS, KCWEIGHT, KCWEIGHTF, KBLCKWT, KIBTOTW 1646! for addressing of WORK 1647#include "maxorb.h" 1648#include "infpar.h" 1649#include "mpif.h" 1650#include "parluci.h" 1651#include "wrkspc.inc" 1652! from cands.inc: icsm 1653#include "cands.inc" 1654 INTEGER*8 IABSOLUTE_WEIGHT 1655 IABSOLUTE_WEIGHT = 0 1656C 1657 IDUM = 0 1658 CALL MEMMAN(KDUM,IDUM,'MARK ',IDUM,'BLKDRV') 1659C 1660C allocate local scratch arrays 1661C 1662 CALL MEMMAN(KICCTOS, NBLOCK**2,'ADDL ',1,'ICCTOS') 1663 CALL MEMMAN(KCWEIGHT, NBLOCK,'ADDL ',1,'ICWHT ') 1664 CALL MEMMAN(KCWEIGHTF, NBLOCK,'ADDL ',1,'ICWHTF') 1665 CALL MEMMAN(KBLCKWT, 2*LUCI_NMPROC,'ADDL ',1,'IBLCKW') 1666 CALL MEMMAN(KIBTOTW, NBLOCK,'ADDL ',2,'IBTOTW') 1667 1668 CALL IZERO(WORK(KBLCKWT),2*LUCI_NMPROC) 1669 CALL IZERO(WORK(KCWEIGHT),NBLOCK) 1670 CALL IZERO(WORK(KCWEIGHTF),NBLOCK) 1671 CALL IZERO(WORK(KICCTOS),NBLOCK**2) 1672C 1673 CALL FIND_IMAT_SC(IBLOCKL,SCALFAC,WORK(KICCTOS),WORK(KCWEIGHT), 1674 & WORK(KIBTOTW),WORK(KCWEIGHTF),NBLOCK, 1675 & IABSOLUTE_WEIGHT) 1676C 1677 IF(IDISTROUTE.EQ.1) THEN 1678 call quit('dist strategy 1 disabled - not all variables are'// 1679 & ' set properly if PRINT_BATCH_INFO is disabled...') 1680 CALL DISTBLKND_1(NBLOCK,WORK(KCWEIGHTF),NBLOCKD,WORK(KIBTOTW), 1681 & WORK(KBLCKWT),NVAR,WORK(KICCTOS),IBLOCKL, 1682 & IPROCLIST,WORK(KCWEIGHT),IABSOLUTE_WEIGHT) 1683 ELSE 1684 CALL DISTBLKND_2(NBLOCK,WORK(KCWEIGHTF),NBLOCKD,IBLOCKL,icsm) 1685 END IF 1686C 1687C find all c-blocks connecting to a given sigma-block, 1688C information will be stored in CBLOCKS_FNODE 1689C 1690 CALL FIND_CCTOS(IBLOCKS_FNODE,NBLOCKD,WORK(KICCTOS),NBLOCK) 1691C 1692C eliminate local memory 1693 IDUM = 0 1694 CALL MEMMAN(KDUM ,IDUM,'FLUSM ',IDUM,'BLKDRV') 1695C 1696 END 1697*********************************************************************** 1698* * 1699* LUCITA, by Jeppe Olsen, DIRAC adaption by Timo Fleig * 1700* parallelization by Stefan Knecht * 1701* * 1702*********************************************************************** 1703 SUBROUTINE FIND_ACTIVE_BLOCKS_PAR(LUIN,LBLK,BLK_A,SEGMNT, 1704 & NBLOCK,IBLOCKD) 1705* 1706*. Find the active (nonvanishing blocks) on LUIN 1707*. Non vanishing block is flagged by a 1.0 ( note : real) 1708* in BLK_A 1709* parallel version 1710* 1711 IMPLICIT REAL*8(A-H,O-Z) 1712*. Output 1713 DIMENSION BLK_A(*) 1714*. Scratch 1715 DIMENSION SEGMNT(*) 1716#include "maxorb.h" 1717#include "infpar.h" 1718#include "mpif.h" 1719 integer(kind=MPI_INTEGER_KIND) my_STATUS(MPI_STATUS_SIZE) 1720#include "parluci.h" 1721 DIMENSION IBLOCKD(NBLOCK) 1722* 1723 CALL REWINE(LUIN,LBLK) 1724* 1725 NBLK_A = 0 1726*. Loop over blocks 1727 DO 500 IBLK = 1, NBLOCK 1728* 1729 IF(LUCI_MYPROC.NE.IBLOCKD(IBLK))THEN 1730 BLK_A(IBLK) = 0.0D0 1731 GOTO 300 1732 ELSE 1733 CALL IFRMDS(LBL,1,-1,LUIN) 1734 IF( LBL .GE. 0 ) THEN 1735 IF(LBLK .GE.0 ) THEN 1736 KBLK = LBL 1737 ELSE 1738 KBLK = -1 1739 END IF 1740 NO_ZEROING = 1 1741 CALL FRMDSC2(SEGMNT,LBL,KBLK,LUIN,IMZERO,IAMPACK, 1742 & NO_ZEROING) 1743 IF(IMZERO.EQ.0) THEN 1744 NBLK_A = NBLK_A + 1 1745 BLK_A(IBLK) = 1.0D0 1746 ELSE 1747 BLK_A(IBLK) = 0.0D0 1748 END IF 1749 END IF 1750 END IF 1751 300 CONTINUE 1752* 1753 500 CONTINUE 1754* 1755 NTEST = 0 1756 IF(NTEST.GE.1) THEN 1757 WRITE(6,*)'myproc',LUCI_MYPROC 1758 WRITE(6,'(A,I8,I8)') 1759 & ' FIND_A.... Number of total and active Blocks',NBLOCK,NBLK_A 1760 END IF 1761* 1762 END 1763*********************************************************************** 1764 SUBROUTINE FNDMND_PAR(LU,LBLK,SEGMNT,NSUBMX,NSUB,ISCR, 1765 & SCR,ISCAT,SUBVAL,IBLOCKL,IBLOCKD, 1766 & NBLOCK,NTESTG) 1767* 1768* FIND NSUB LOWEST ELEMENTS OF VECTOR RESIDING ON FILE 1769* LU. ENSURE THAT NO DEGENERENCIES ARE SPLIT 1770* 1771* 1772* INPUT 1773*======= 1774* LU : FILE WHERE VECTOR OF INTEREST IS RESIDING, REWOUND 1775* LBLK : DEFINES FILE STRUCTURE ON FILE LU 1776* NSUBMX: LARGEST ALLOWED NUMBER OF SORTED ELEMENTS 1777* 1778* OUTPUT 1779*======= 1780* NSUB : ACTUAL NUMBER OF ELEMENTS OBTAINED. CAN BE SMALLER 1781* THAN NSUBMX IF THE LAST ELEMENT BELONGS TO A DEGENERATE 1782* SET 1783*ISCAT: SCATTERING ARRAY, ISCAT(I) GIVES FULL ADRESS OF SORTED 1784* ELEMENT I 1785*SUBVAL: VALUE OF SORTED ELEMENTS 1786 1787 IMPLICIT REAL*8 ( A-H,O-Z) 1788 INTEGER*8 KGATHERA, KGATHERB, KGATHERC, KGATHERD 1789! for addressing of WORK 1790#include "maxorb.h" 1791#include "infpar.h" 1792#include "mpif.h" 1793 integer(kind=MPI_INTEGER_KIND) :: my_MPI_REAL8 = MPI_REAL8 1794 integer(kind=MPI_INTEGER_KIND) :: my_STATUS(MPI_STATUS_SIZE) 1795#include "parluci.h" 1796#include "wrkspc.inc" 1797 1798 DIMENSION SEGMNT(*), ISCAT(*),SUBVAL(*),SCR(*),ISCR(*) 1799 DIMENSION IBLOCKL(NBLOCK), IBLOCKD(NBLOCK) 1800 INTEGER(KIND=MPI_OFFSET_KIND) IOFF_SCR 1801C 1802 NTESTL = 0000 1803 NTEST = MAX(NTESTG,NTESTL) 1804 NTEST = 000 1805C offset initialization 1806 IOFF_SCR = 0 1807 IOFF_SCR = IOFF_SCR + MY_DIA_OFF 1808C 1809 IBASE = 1 1810 LSUB = 0 1811* loop over blocks 1812 DO 1000 II = 1, NBLOCK 1813* 1814 IF( IBLOCKD(II) .eq. LUCI_MYPROC )THEN 1815 LBL = IBLOCKL(II) 1816 ELSE 1817* useful to set all other blocks to 0? 1818 LBL = 0 1819 ENDIF 1820* 1821 IF(NTEST.GE.10) THEN 1822 WRITE(LUWRT,*) ' Info about block ',II 1823 WRITE(LUWRT,*) ' Number of elements ',LBL 1824 END IF 1825 IF(LBL .GE. 0 ) THEN 1826 IF(LBLK .GE.0 ) THEN 1827 KBLK = LBL 1828 ELSE 1829 KBLK = -1 1830 END IF 1831 IF( IBLOCKD(II) .eq. LUCI_MYPROC )THEN 1832 CALL MPI_FILE_READ_AT(IDIA,IOFF_SCR,SEGMNT,LBL, 1833 & my_MPI_REAL8,my_STATUS,ierr_mpi) 1834 ENDIF 1835 IF(NTEST.GE.100) THEN 1836 WRITE(LUWRT,*) ' Elements read in ' 1837 CALL WRTMATMN(SEGMNT,1,LBL,1,LBL,LUWRT) 1838 END IF 1839 IF(LBL .GE. 0 ) THEN 1840*. LOWEST ELEMENTS IN SEGMNT ( ADD TO PREVIOUS LIST ) 1841 MSUBMX = MIN(NSUBMX,LBL) 1842 IF(LBL.GE.1) THEN 1843 CALL SORLOW(SEGMNT,SCR(1+LSUB),ISCR(1+LSUB),LBL, 1844 & MSUBMX,MSUB,NTEST) 1845 ELSE 1846 MSUB = 0 1847 END IF 1848 DO 10 I = 1, MSUB 1849 10 ISCR(LSUB+I) = ISCR(LSUB+I) + IBASE - 1 1850* SORT COMBINED LIST 1851 MSUBMX = MIN(NSUBMX,LSUB+MSUB) 1852 IF(MSUBMX.GT.0) THEN 1853 CALL SORLOW(SCR,SUBVAL,ISCAT,LSUB+MSUB,MSUBMX,LLSUB, 1854 & NTEST) 1855 ELSE 1856 LLSUB = 0 1857 END IF 1858 LSUB = LLSUB 1859 DO 20 I = 1, LSUB 1860 ISCR(I+2*NSUBMX) = ISCR(ISCAT(I)) 1861 20 CONTINUE 1862* 1863 CALL ICOPVE(ISCR(1+2*NSUBMX),ISCR(1),LSUB) 1864 CALL DCOPY(LSUB,SUBVAL,1,SCR,1) 1865 1866 IF(NTEST .GE. 20 ) THEN 1867 WRITE(LUWRT,*)' Lowest elements and their original place' 1868 WRITE(LUWRT,*)' Number of elements obtained ', LSUB 1869 CALL WRTMATMN(SUBVAL,1,LSUB,1,LSUB,LUWRT) 1870 CALL IWRTMAMN(ISCR,1,LSUB,1,LSUB,LUWRT) 1871 END IF 1872 END IF 1873* 1874 END IF 1875 IOFF_SCR = IOFF_SCR + LBL 1876C set to lbl to true value 1877 LBL = IBLOCKL(II) 1878 IBASE = IBASE + LBL 1879C 1880 1000 CONTINUE 1881* 1882 NTEST = 00 1883 NSUB = LSUB 1884 CALL ICOPVE(ISCR,ISCAT,NSUB) 1885 IF(NTEST .GE. 20) THEN 1886 WRITE(LUWRT,*) ' Lowest elements and their original place ' 1887 WRITE(LUWRT,*) ' Number of elements obtained ', NSUB 1888 CALL WRTMATMN(SUBVAL,1,NSUB,1,NSUB,LUWRT) 1889 CALL IWRTMAMN(ISCAT,1,NSUB,1,NSUB,LUWRT) 1890 END IF 1891* 1892 IDUM = 0 1893 CALL MEMMAN(KDUM,IDUM,'MARK ',IDUM,'GATHER') 1894* 1895 CALL MEMMAN(KGATHERA,LUCI_NMPROC*NSUBMX,'ADDL ',2,'PARRA1') 1896 CALL MEMMAN(KGATHERB,LUCI_NMPROC*NSUBMX,'ADDL ',2,'PARRA2') 1897 CALL MEMMAN(KGATHERC,LUCI_NMPROC*NSUBMX,'ADDL ',1,'PARIA1') 1898 CALL MEMMAN(KGATHERD,LUCI_NMPROC*NSUBMX,'ADDL ',1,'PARIA2') 1899*. We gather all lowest values from each node 1900*. and build up a combined list of those 1901 CALL GATHER_LOW_PAR(NSUB,NSUBMX,SUBVAL,ISCAT, 1902 & WORK(KGATHERA),WORK(KGATHERB), 1903 & WORK(KGATHERC),WORK(KGATHERD), 1904 & NTESTG) 1905* update SCR1 and ISCR1 1906 CALL DCOPY(NSUBMX,SUBVAL,1,SCR,1) 1907 CALL ICOPVE(ISCAT,ISCR,NSUBMX) 1908 1909 IF(NTEST.GE.20)THEN 1910 WRITE(LUWRT,*)'after search: SUBVAL and ISCAT' 1911 CALL WRTMATMN(SUBVAL,1,NSUBMX,1,NSUBMX,LUWRT) 1912 CALL IWRTMAMN(ISCAT,1,NSUBMX,1,NSUBMX,LUWRT) 1913 END IF 1914CSK NTEST = 0 1915 1916 IF(NSUB.NE.NSUBMX.AND.NTEST.GE.20)THEN 1917 WRITE(LUWRT,*)'Warning! NSUB is lower than NSUBMX' 1918 WRITE(LUWRT,*)'NSUB is set to be equal to NSUBMX' 1919 NSUB = NSUBMX 1920 END IF 1921* 1922*. Eliminate local memory 1923 IDUM = 0 1924 CALL MEMMAN(KDUM ,IDUM,'FLUSM ',IDUM,'GATHER') 1925* 1926 END 1927***************************************************** 1928#else /* VAR_MPI */ 1929* dummy routine for non-mpi compilation 1930 SUBROUTINE par_lucita 1931! write(6,*) 'let us do nothing',nothing 1932 END 1933#endif /* VAR_MPI */ 1934