1c 2c Evaluate the quantity W. Eq. 24, Furche & Ahlrichs 3c There are three parts that need evaluating separately. 4c Occupied-occupied, Occupied-Virtual, Virtual-Virtual parts. 5c HvD 11/2007, NG 11/2012 6c 7 subroutine tddft_grad_compute_w(rtdb,ihdl_geom,ihdl_bfao,tol2e, 8 + tda,ipol,nroots,nfc,naoc,nocc,nav, 9 + nfv,nao,g_mo,g_p,g_z,g_xpy,g_xmy,eps,omega,g_w, 10 + kfac,lhashf,otriplet) 11c 12 implicit none 13c 14#include "errquit.fh" 15#include "mafdecls.fh" 16#include "global.fh" 17#include "rtdb.fh" 18#include "tddft_grad_util.fh" 19#include "stdio.fh" 20c 21c Input: 22c 23 integer rtdb ! runtime database handle 24 integer ihdl_geom ! geometry handle 25 integer ihdl_bfao ! basis set handle 26 logical tda ! .true. if Tamm-Dancoff approximation is used 27 integer ipol ! =1 (restricted), =2 (unrestricted) 28 integer nfc(2) ! the number of frozen core orbitals 29 integer naoc(2) ! the number of active occupied orbitals 30 integer nocc(2) ! the total number of occupied orbitals 31 integer nav(2) ! the number of active virtual orbitals 32 integer nfv(2) ! the number of frozen virtual orbitals 33 integer nao ! the number of basis functions 34 integer nroots ! the number of states to consider 35 integer g_mo(2) ! global array handle for MO coefficients 36 integer g_p(2) ! global array handle for Ppq 37 integer g_z(2) ! global array handle for Zia 38 integer g_xpy(2) ! global array handle for (X+Y) 39 integer g_xmy(2) ! global array handle for (X-Y) 40c 41 logical lhashf ! =.true. hybrid functionals 42 ! =.false. otherwise 43 logical otriplet ! =.true. triplet excited states 44 ! =.false. singlet excited states 45 ! value does not matter for TDUDFT 46c 47 double precision kfac ! the weight of the Hartree-Fock 48 ! exchange contributions 49 double precision eps(nao,2) ! orbital eigenvalues 50 double precision omega(nroots) ! the excitation energies 51 double precision tol2e ! integral tolerance 52c 53c Output: 54c 55 integer g_w(2) ! global array handle for W 56c 57c Functions: 58c 59 integer ga_create_atom_blocked 60 external ga_create_atom_blocked 61 logical xc_gotxc 62 external xc_gotxc 63c 64c Local: 65c 66 integer g_puv ! global array handle for Puv 67 integer g_apbp ! global array handle for (A+B)Puv 68 integer g_ambp ! global array handle for (A-B)Puv 69 integer g_x ! global array handle for X 70 integer g_y ! global array handle for Y 71 integer g_apbx ! global array handle for (A+B)X 72 integer g_ambx ! global array handle for (A-B)X 73 integer g_apby ! global array handle for (A+B)Y 74 integer g_amby ! global array handle for (A-B)Y 75 integer g_hij(2) ! global array handle for Hij+- 76 integer g_t ! global array handle for transposer 77c 78 integer alo(3) ! lower chunck limits on A 79 integer ahi(3) ! upper chunck limits on A 80 integer blo(3) ! lower chunck limits on B 81 integer bhi(3) ! upper chunck limits on B 82 integer clo(3) ! lower chunck limits on C 83 integer chi(3) ! upper chunck limits on C 84 integer ld(3) ! leading dimensions 85c 86 integer idim(3) ! dimensions for global arrays 87 integer ichnk(3) ! chunk sizes for distribution 88c 89 integer nproc ! the number of processors 90 integer iproc ! the rank of the current processor 91 integer jproc ! the rank of some other processor 92c 93 integer ip ! counter over spin components 94 integer ir ! counter over roots 95 integer ic ! counter for arbitrary purposes 96 integer icount ! counter for arbitrary purposes 97c 98 integer mini ! minimum value of i-label 99 integer minj ! minimum value of j-label 100 integer mina ! minimum value of a-label 101 integer minb ! minimum value of b-label 102 integer maxi ! maximum value of i-label 103 integer maxj ! maximum value of j-label 104 integer maxa ! maximum value of a-label 105 integer maxb ! maximum value of b-label 106 integer numi ! the number of i-labels 107 integer numj ! the number of j-labels 108 integer numa ! the number of a-labels 109 integer numb ! the number of b-labels 110c 111 integer i ! counter over i-labels 112 integer j ! counter over j-labels 113 integer a ! counter over a-labels 114 integer b ! counter over b-labels 115c 116 integer imo ! molecular orbital label for accessing eps 117c 118 integer ihdl_a ! the memory handle for block A 119 integer ihdl_b ! the memory handle for block B 120 integer ihdl_r ! the memory handle for block R 121 integer ihdl_v ! the memory handle for block V 122c 123 integer iptr_a ! the memory index for block A 124 integer iptr_b ! the memory index for block B 125 integer iptr_r ! the memory index for block R 126 integer iptr_v ! the memory index for block V 127c 128 integer calc_type ! calculation type for fock_xc 129 integer l_dens, k_dens ! memory for array of density handles 130 integer l_den2, k_den2 ! memory for array of density handles 131 integer ndens ! the number of density matrices 132 integer l_gxc , k_gxc ! memory for array of Gxc matrix handles 133 integer ngxc ! the number of Gxc matrices 134 logical oroot 135 integer iwhich 136c 137 character*32 pname 138c 139 logical oskel 140 parameter (oskel=.false.) 141 double precision Exc(2) ! Exchange-correlation energy 142c 143 logical tdaloc 144 logical doitw,doitz 145 logical doitxpy1,doitxpy2 146 logical doitxmy1,doitxmy2 147c 148 pname = "tddft_grad_compute_w: " 149 iwhich = 0 ! call to tddft_nga_cont() 150c 151c General stuff 152c 153 iproc = ga_nodeid() 154 nproc = ga_nnodes() 155c 156 do ip = 1, ipol 157 call ga_zero(g_w(ip)) 158 enddo 159c Daniel (12/1/12): Here, we have that the occupied-occupied block is 160c the sum of the ground state energy-weighted matrix with the 161c energy-weighted difference density matrix. The former gives the 162c first term in the expression. The ground state density matrix makes 163c no other contributions to W. 164c 165c Compute the occupied-occupied block 166c 167c Wijs = delta_ij eps_i 168c + Omega Sum_a [(X+Y)ias(X-Y)jas+(X-Y)ias(X+Y)jas]/2 169c - Sum_a eps_a [(X+Y)ias(X+Y)jas+(X-Y)ias(X-Y)jas]/2 170c + Aijs'[P] 171c + Sum_kat Sum_ldr Gxc_ijskatldr(X+Y)kat(X+Y)ldr 172c 173c Do the first term, the parallelisation is driven off the 174c distribution of g_w. 175c 176 do ip = 1, ipol 177 call nga_distribution(g_w(ip),iproc,alo,ahi) 178 doitw = .not.(alo(1).ne.1.or.ahi(1).ne.nroots) 179 if (doitw) then 180 mini = max(alo(2),alo(3)) 181 maxi = min(ahi(2),ahi(3),naoc(ip)) 182 do i = mini, maxi 183 imo = nfc(ip)+i 184 do ir = 1, nroots 185 blo(1) = ir 186 bhi(1) = ir 187 blo(2) = i 188 bhi(2) = i 189 blo(3) = i 190 bhi(3) = i 191 ld(1) = 1 192 ld(2) = 1 193 call nga_acc(g_w(ip),blo,bhi,(3-ipol)*eps(imo,ip),ld,1.0d0) 194 enddo 195 enddo 196 endif ! doitw 197 enddo ! ipol 198c 199c Do the second term 200c 201 do ip = 1, ipol 202 do ir = 1, nroots 203 alo(1) = ir 204 ahi(1) = ir 205 alo(2) = 1 206 ahi(2) = naoc(ip) 207 alo(3) = 1 208 ahi(3) = nav(ip) 209 blo(1) = ir 210 bhi(1) = ir 211 blo(2) = 1 212 bhi(2) = nav(ip) 213 blo(3) = 1 214 bhi(3) = naoc(ip) 215 clo(1) = ir 216 chi(1) = ir 217 clo(2) = 1 218 chi(2) = naoc(ip) 219 clo(3) = 1 220 chi(3) = naoc(ip) 221 if (.not.tda) then 222 call tddft_patch3mxm('n','t',0.5d0*omega(ir),1.0d0, 223 + g_xpy(ip),alo,ahi, 224 + g_xmy(ip),blo,bhi, 225 + g_w(ip),clo,chi) 226 call tddft_patch3mxm('n','t',0.5d0*omega(ir),1.0d0, 227 + g_xmy(ip),alo,ahi, 228 + g_xpy(ip),blo,bhi, 229 + g_w(ip),clo,chi) 230 else 231c g_xpy: X with CIS, Y = 0 232c g_xmy: not created with CIS 233 call tddft_patch3mxm('n','t',omega(ir),1.0d0, 234 + g_xpy(ip),alo,ahi, 235 + g_xpy(ip),blo,bhi, 236 + g_w(ip),clo,chi) 237 endif 238 enddo ! ir 239c 240c Do the third term, drive the parallelization off the 241c distribution of (X+-Y)ib. The reason for this choice is that 242c the occ-occ block will be localized on a (small) sub-set of 243c the processors. So driving the parallelization off the 244c distribution of W will result in dreadful load imbalance 245c problems. This choice will lead to a (slightly) less bad 246c load imbalance. 247c 248c Do the (X+Y) terms first 249c 250 call nga_distribution(g_xpy(ip),iproc,blo,bhi) 251 doitxpy1 = .not.(blo(1).ne.1.or.bhi(1).ne.nroots) 252c 253 if (doitxpy1) then 254c 255 minj = blo(2) 256 maxj = bhi(2) 257 numj = maxj-minj+1 258 mina = blo(3) 259 maxa = bhi(3) 260 numa = maxa-mina+1 261 if (.not.ma_push_get(mt_dbl,nroots*numa*numj,'block b', 262 + ihdl_b,iptr_b)) 263 + call errquit(pname//'failed to allocate blockb',0,MA_ERR) 264 ld(1) = nroots 265 ld(2) = numj 266 call nga_get(g_xpy(ip),blo,bhi,dbl_mb(iptr_b),ld) 267 ic = 0 268 do a = 1, numa 269 do j = 1, numj 270 imo = nocc(ip)+mina-1+a 271 do ir = 1, nroots 272 dbl_mb(iptr_b+ic) = 0.5d0*eps(imo,ip)*dbl_mb(iptr_b+ic) 273 ic = ic + 1 274 enddo 275 enddo 276 enddo 277c 278 do icount = 0, nproc-1 279 jproc = mod(iproc+icount,nproc) 280c 281 call nga_distribution(g_xpy(ip),jproc,alo,ahi) 282 doitxpy2 = .not.(alo(1).ne.1.or.ahi(1).ne.nroots) 283c 284 if (doitxpy2.and.doitxpy1) then 285 if (alo(3).eq.mina.and.ahi(3).eq.maxa) then 286 mini = alo(2) 287 maxi = ahi(2) 288 numi = maxi-mini+1 289c alo(3) = mina 290c ahi(3) = maxa 291 if (.not.ma_push_get(mt_dbl,nroots*numa*numi,'block a', 292 + ihdl_a,iptr_a)) 293 + call errquit(pname//'failed to allocate blockb',0,UERR) 294 if (.not.ma_push_get(mt_dbl,nroots*numi*numj,'block r', 295 + ihdl_r,iptr_r)) 296 + call errquit(pname//'failed to allocate blockr',0,UERR) 297 ld(1) = nroots 298 ld(2) = numi 299 call nga_get(g_xpy(ip),alo,ahi,dbl_mb(iptr_a),ld) 300 call dfill(nroots*numi*numj,0.0d0,dbl_mb(iptr_r),1) 301 do a = 0, numa-1 302 do j = 0, numj-1 303 do i = 0, numi-1 304 do ir = 0, nroots-1 305 dbl_mb(iptr_r+ir+nroots*i+nroots*numi*j) 306 + = dbl_mb(iptr_r+ir+nroots*i+nroots*numi*j) 307 + - dbl_mb(iptr_a+ir+nroots*i+nroots*numi*a) 308 + * dbl_mb(iptr_b+ir+nroots*j+nroots*numj*a) 309 enddo 310 enddo 311 enddo 312 enddo 313c 314c Daniel (12-3-12): We need to account for (X-Y) = X in TDA here. 315c We must either double the contribution from (X+Y) here or redo the 316c procedure in this block of code. I prefer scaling here since it's 317c less messy. 318 if (tda) then 319 call dscal(nroots*numi*numj,2.0d0,dbl_mb(iptr_r),1) 320 endif 321 clo(1) = 1 322 chi(1) = nroots 323 clo(2) = mini 324 chi(2) = maxi 325 clo(3) = minj 326 chi(3) = maxj 327 ld(1) = nroots 328 ld(2) = numi 329 call nga_acc(g_w(ip),clo,chi,dbl_mb(iptr_r),ld,1.0d0) 330 if (.not.ma_pop_stack(ihdl_r)) 331 + call errquit(pname//'failed to deallocate blockr',0,UERR) 332 if (.not.ma_pop_stack(ihdl_a)) 333 + call errquit(pname//'failed to deallocate blocka',0,UERR) 334 endif 335 endif !doitxpy1 and doitxpy2 336 enddo ! icount 337c 338 if (.not.ma_pop_stack(ihdl_b)) 339 + call errquit(pname//'failed to deallocate blockb',0,UERR) 340 endif ! doitxpy1 341 342 call ga_sync() 343c 344 if (.not.tda) then 345c 346c Do the (X-Y) terms next 347c 348 call nga_distribution(g_xmy(ip),iproc,blo,bhi) 349 doitxmy1 = .not.(blo(1).ne.1.or.bhi(1).ne.nroots) 350c 351 if (doitxmy1) then 352c 353 minj = blo(2) 354 maxj = bhi(2) 355 numj = maxj-minj+1 356 mina = blo(3) 357 maxa = bhi(3) 358 numa = maxa-mina+1 359 if (.not.ma_push_get(mt_dbl,nroots*numa*numj,'block b', 360 + ihdl_b,iptr_b)) 361 + call errquit(pname//'failed to allocate blockb',0,UERR) 362 ld(1) = nroots 363 ld(2) = numj 364 call nga_get(g_xmy(ip),blo,bhi,dbl_mb(iptr_b),ld) 365 ic = 0 366 do a = 1, numa 367 do i = 1, numj 368 imo = nocc(ip)+mina-1+a 369 do ir = 1, nroots 370 dbl_mb(iptr_b+ic) = 0.5d0*eps(imo,ip)* 371 + dbl_mb(iptr_b+ic) 372 ic = ic + 1 373 enddo 374 enddo 375 enddo 376c 377 do icount = 0, nproc-1 378 jproc = mod(iproc+icount,nproc) 379 call nga_distribution(g_xmy(ip),jproc,alo,ahi) 380 doitxmy2 = .not.(alo(1).ne.1.or.ahi(1).ne.nroots) 381c 382 if (doitxmy2.and.doitxmy1) then 383 if (alo(3).eq.mina.and.ahi(3).eq.maxa) then 384 mini = alo(2) 385 maxi = ahi(2) 386 numi = maxi-mini+1 387c alo(3) = mina 388c ahi(3) = maxa 389 if (.not.ma_push_get(mt_dbl,nroots*numa*numi,'block a', 390 + ihdl_a,iptr_a)) 391 + call errquit(pname//'failed to allocate blockb',0,UERR) 392 if (.not.ma_push_get(mt_dbl,nroots*numi*numj,'block r', 393 + ihdl_r,iptr_r)) 394 + call errquit(pname//'failed to allocate blockr',0,UERR) 395 ld(1) = nroots 396 ld(2) = numi 397 call nga_get(g_xmy(ip),alo,ahi,dbl_mb(iptr_a),ld) 398 call dfill(nroots*numi*numj,0.0d0,dbl_mb(iptr_r),1) 399 do a = 0, numa-1 400 do i = 0, numi-1 401 do j = 0, numj-1 402 do ir = 0, nroots-1 403 dbl_mb(iptr_r+ir+nroots*i+nroots*numi*j) 404 + = dbl_mb(iptr_r+ir+nroots*i+nroots*numi*j) 405 + - dbl_mb(iptr_a+ir+nroots*i+nroots*numi*a) 406 + * dbl_mb(iptr_b+ir+nroots*j+nroots*numj*a) 407 enddo 408 enddo 409 enddo 410 enddo 411 clo(1) = 1 412 chi(1) = nroots 413 clo(2) = mini 414 chi(2) = maxi 415 clo(3) = minj 416 chi(3) = maxj 417 ld(1) = nroots 418 ld(2) = numi 419 call nga_acc(g_w(ip),clo,chi,dbl_mb(iptr_r),ld,1.0d0) 420 if (.not.ma_pop_stack(ihdl_r)) 421 + call errquit(pname//'failed to deallocate blockr',0, 422 + UERR) 423 if (.not.ma_pop_stack(ihdl_a)) 424 + call errquit(pname//'failed to deallocate blocka',0, 425 + UERR) 426 endif 427 endif ! doitxmy1 and doitxmy2 428 enddo ! icount 429c 430 if (.not.ma_pop_stack(ihdl_b)) 431 + call errquit(pname//'failed to deallocate blockb',0,UERR) 432 endif ! doitxmy1 433c 434 call ga_sync() 435c 436 endif ! tda 437c 438 enddo ! ipol 439c 440c Now do the fourth term 441c 442c - Create Puv 443c 444 idim(1) = nroots*ipol 445 idim(2) = nao 446 idim(3) = nao 447 ichnk(1) = nroots*ipol 448 ichnk(2) = -1 449 ichnk(3) = -1 450 if (.not.nga_create(mt_dbl,3,idim,'vec Puv',ichnk,g_puv)) 451 + call errquit(pname//'failed to create g_puv',0,GA_ERR) 452c 453c - Puv = sum_pq Cup*Ppq*Cvq 454c 455 call tddft_grad_trans_mo2ao(ipol,nao,nfc,naoc,nocc,nav,nfv, 456 + nroots,1.0d0,0.0d0,"pq",g_mo,g_p,"pq",g_puv) 457c 458c - Create global array for (A+B)P in AO basis 459c - Compute (A+B)P in AO basis (currently we have to compute 460c (A-B)P as well although we do not need it...) 461c 462 if (.not.nga_create(mt_dbl,3,idim,'vec (A+B)P',ichnk,g_apbp)) 463 + call errquit(pname//'failed to create g_apbp',0,GA_ERR) 464 if (.not.nga_create(mt_dbl,3,idim,'vec (A-B)P',ichnk,g_ambp)) 465 + call errquit(pname//'failed to create g_ambp',0,GA_ERR) 466 call ga_zero(g_ambp) 467 call ga_zero(g_apbp) 468c Daniel (2-26-13): It was not obvious that we need to unset 469c fock_xc:triplet here for restricted triplet calculations to work. 470 if (otriplet) then 471 if (.not.rtdb_put(rtdb,'fock_xc:triplet',mt_log,1,.false.)) 472 1 call errquit(pname//'failed to set triplet',0,RTDB_ERR) 473 endif 474 call tddft_nga_cont(rtdb,ihdl_geom,ihdl_bfao,g_puv,g_apbp,g_ambp, 475 + nao,ipol,tol2e,tda,oskel,kfac,lhashf,.false.,nroots,iwhich) 476c Daniel (2-26-13): Reset fock_xc:triplet here for restricted triplet 477c calculations to work. 478 if (otriplet) then 479 if (.not.rtdb_put(rtdb,'fock_xc:triplet',mt_log,1,.true.)) 480 1 call errquit(pname//'failed to set triplet',0,RTDB_ERR) 481 endif 482c Daniel (1-5-13): This line accidentally halves the contribution of 483c this term for CIS, but functions correctly for RPA. 484 if (.not.tda) then 485 call ga_add(+0.5d0,g_apbp,+0.5d0,g_ambp,g_apbp) 486 endif 487c 488c - Destroy global array for Puv 489c 490 if (.not.ga_destroy(g_puv)) 491 + call errquit(pname//'failed to destroy g_puv',0,GA_ERR) 492c 493c - Transform (A+B)P from AO basis to Wij in MO basis 494c 495 call tddft_grad_trans_ao2mo(ipol,nao,nfc,naoc,nocc,nav,nfv, 496 + nroots,1.0d0,1.0d0,"ij",g_mo,g_apbp,g_w,"pq") 497c 498c - Destroy (A+B)P and (A-B)P 499c 500 if (.not.ga_destroy(g_apbp)) 501 + call errquit(pname//'failed to destroy g_apbp',0,GA_ERR) 502 if (.not.ga_destroy(g_ambp)) 503 + call errquit(pname//'failed to destroy g_apbp',0,GA_ERR) 504c 505c Now do the fifth term (this follows the same approach as in 506c tddft_grad_comp_r) 507c 508 if (xc_gotxc()) then 509 ndens = ipol*(nroots+1) 510 ngxc = ipol*nroots 511 if (.not.ma_push_get(mt_int,ngxc,'gxc-s',l_gxc,k_gxc)) 512 + call errquit(pname//'failed to allocate l_gxc',0,MA_ERR) 513 if (.not.ma_push_get(mt_int,ndens,'densities',l_dens,k_dens)) 514 + call errquit(pname//'failed to allocate l_dens',0,MA_ERR) 515 if (.not.ma_push_get(mt_int,ndens,'dens-tmps',l_den2,k_den2)) 516 + call errquit(pname//'failed to allocate l_den2',0,MA_ERR) 517c 518c - Create and calculate the AO basis density matrices 519c 520 do ip = 0, ipol-1 521 int_mb(k_dens+nroots*ipol+ip) = 522 + ga_create_atom_blocked(ihdl_geom,ihdl_bfao,"d_ao") 523 enddo 524 call tddft_grad_compute_dao(ipol,nao,nocc,g_mo, 525 + int_mb(k_dens+nroots*ipol)) 526 if (ipol.eq.1) call ga_scale(int_mb(k_dens+nroots*ipol),2.0d0) 527c 528c - Create and calculate the AO basis representation of (X+Y) 529c 530 do ip = 0, ipol-1 531 do ir = 0, nroots-1 532 int_mb(k_dens+ip*nroots+ir) = ga_create_atom_blocked( 533 + ihdl_geom,ihdl_bfao,"xpy_ao") 534c Daniel (1-14-13): Added this call for consistency with 535c tddft_grad_compute_r 536 call ga_zero(int_mb(k_dens+ip*nroots+ir)) 537 enddo 538 enddo 539 do ip = 1, ipol 540 do ir = 1, nroots 541 call tddft_grad_trans_mo2ao(1,nao,nfc(ip),naoc(ip),nocc(ip), 542 + nav(ip),nfv(ip),ir,1.0d0,0.0d0,"ib",g_mo(ip),g_xpy(ip), 543 + "ib",int_mb(k_dens+(ip-1)*nroots+ir-1)) 544 call ga_symmetrize(int_mb(k_dens+(ip-1)*nroots+ir-1)) 545c Daniel (2-16-13): This line is needed to get matching results from the 546c unrestricted code compared to the restricted one. 547 if (ipol.eq.1) then 548 call ga_scale(int_mb(k_dens+(ip-1)*nroots+ir-1),2.0d0) 549 endif 550 enddo 551 enddo 552c 553c - Create and calculate Gxc in AO basis using fock_xc 554c 555 do i = 0, ngxc-1 556 int_mb(k_gxc+i) = ga_create_atom_blocked(ihdl_geom,ihdl_bfao, 557 + "gxc_ao") 558 call ga_zero(int_mb(k_gxc+i)) 559 enddo 560 if (.not.rtdb_get(rtdb,'fock_xc:calc_type',mt_int,1,calc_type)) 561 + calc_type=0 562 if (.not.rtdb_put(rtdb,'fock_xc:calc_type',mt_int,1,5)) 563 + call errquit(pname//'failed to set calc_type 5',0,RTDB_ERR) 564 if (.not.rtdb_put(rtdb,'fock_xc:calc_type_tddft_w',mt_int,1, 565 + calc_type)) 566 + call errquit(pname//'failed to set calc_type_tddft_w', 567 + 0,RTDB_ERR) 568c 569c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 570c Daniel (2-16-13): There is a line in fock_xc, involving lcgmin, that 571c will set l3d to true, even though we want it to be false for the 572c gradients. We need to set the RTDB such that dft:cgmin is true so 573c that the l3d is set correctly in fock_xc. 574c ********************************************************************** 575c WE NEED TO FIX THIS BECAUSE TDDFT GRADIENTS DON'T WORK with CGMIN SET. 576c ********************************************************************** 577c Daniel (2-18-13): This fix will not work for optimizations. 578c if (.not.rtdb_put(rtdb,'dft:cgmin',mt_log,1,.true.)) 579c 1 call errquit(pname//'failed to set cgmin',0,RTDB_ERR) 580c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 581c 582c Reorder the densities to match the expectation in fock_xc 583c See tddft_grad_comp_r for details. 584c 585 do i = 0, nroots 586 do ip = 0, ipol-1 587 int_mb(k_den2+i+ip*(nroots+1)) = int_mb(k_dens+ip+ipol*i) 588 enddo 589 enddo 590 call ga_sync() 591c Daniel (2-12-13): We need to set triplet here for fock_xc. 592 if (otriplet) then 593 if (.not.rtdb_put(rtdb,'fock_xc:triplet',mt_log,1,otriplet)) 594 1 call errquit(pname//'failed to set triplet',0,RTDB_ERR) 595 endif 596 call fock_xc(ihdl_geom, nao, ihdl_bfao, ngxc, int_mb(k_den2), 597 + int_mb(k_gxc), Exc, ipol, .false.) 598 call ga_sync() 599c DEBUG 600 if (tddft_grad_util_print('tddft grad w',print_debug)) then 601 oroot = ga_nodeid().eq.0 602 if (oroot) write(LuOut,*)'DEBUG: '//pname//'gxc' 603 call tddft_grad_print_array(ipol,nroots,int_mb(k_gxc), 604 + dble(ipol)) 605 endif 606c DEBUG 607c 608 if (.not.rtdb_get(rtdb,'fock_xc:calc_type_tddft_w',mt_int,1, 609 + calc_type)) 610 + call errquit(pname//'failed to get calc_type_tddft_w',0, 611 + RTDB_ERR) 612 if (.not.rtdb_put(rtdb,'fock_xc:calc_type',mt_int,1,calc_type)) 613 + call errquit(pname//'failed to reset calc_type',0,RTDB_ERR) 614 if (.not.rtdb_delete(rtdb,'fock_xc:calc_type_tddft_w')) 615 + call errquit(pname//'failed to delete calc_type_tddft_w',0, 616 + RTDB_ERR) 617c 618c - Transform the Gxc matrices to MO basis and add them to Wij 619c 620 do i = 0, ndens-1 621 if (.not.ga_destroy(int_mb(k_dens+i))) 622 + call errquit(pname//'failed to destroy densities',0,GA_ERR) 623 enddo 624 if (.not.ma_pop_stack(l_den2)) 625 + call errquit(pname//'failed to deallocate l_den2', 0, MA_ERR) 626 if (.not.ma_pop_stack(l_dens)) 627 + call errquit(pname//'failed to deallocate l_dens', 0, MA_ERR) 628 do ip = 1, ipol 629 do ir = 1, nroots 630 call tddft_grad_trans_ao2mo(1,nao,nfc(ip),naoc(ip),nocc(ip), 631 + nav(ip),nfv(ip),ir,1.0d0,1.0d0,"ij",g_mo(ip), 632 + int_mb(k_gxc+(ip-1)*nroots+ir-1),g_w(ip),"ij") 633 enddo 634 enddo 635 do i = 0, ngxc-1 636 if (.not.ga_destroy(int_mb(k_gxc+i))) 637 + call errquit(pname//'failed to destroy gxc_ao', 0, GA_ERR) 638 enddo 639 if (.not.ma_pop_stack(l_gxc)) 640 + call errquit(pname//'failed to deallocate l_gxc', 0, MA_ERR) 641c 642 endif 643c 644c Compute the occupied-virtual block 645c 646c Wias = eps_is Zias 647c + Sum_j {(X+Y)jasHji+[X+Y]+(X-Y)jasHjis-[X-Y]} 648c 649c Do the first term, drive the parallelization off the distribution 650c of Zias 651c 652 do ip = 1, ipol 653c 654 call nga_distribution(g_z(ip),iproc,alo,ahi) 655 doitz = .not.(alo(1).ne.1.or.ahi(1).ne.nroots) 656c 657 if (doitz) then 658 mini = alo(2) 659 maxi = ahi(2) 660 numi = maxi-mini+1 661 mina = alo(3) 662 maxa = ahi(3) 663 numa = maxa-mina+1 664 if (.not.ma_push_get(mt_dbl,nroots*numi*numa,'block v', 665 + ihdl_v,iptr_v)) 666 + call errquit(pname//'failed to allocate block v',0, UERR) 667 ld(1) = nroots 668 ld(2) = numi 669 call nga_get(g_z(ip),alo,ahi,dbl_mb(iptr_v),ld) 670 do a = 0, numa-1 671 do i = 0, numi-1 672 imo = nfc(ip)+mini+i 673 do ir = 0, nroots-1 674 dbl_mb(iptr_v+ir+nroots*i+nroots*numi*a) 675 + = dbl_mb(iptr_v+ir+nroots*i+nroots*numi*a) 676 + * eps(imo,ip) 677 enddo 678 enddo 679 enddo 680 alo(3) = naoc(ip)+mina 681 ahi(3) = naoc(ip)+maxa 682 call nga_put(g_w(ip),alo,ahi,dbl_mb(iptr_v),ld) 683 if (.not.ma_pop_stack(ihdl_v)) 684 + call errquit(pname//'failed to deallocate block v',0, UERR) 685 endif ! doitz 686 enddo ! ip 687c 688c Do the second term 689c 690c - Do the (X+Y) and (X-Y) contributions 691c 692c - Create global arrays for X and Y in AO basis 693c 694 idim(1) = nroots*ipol 695 idim(2) = nao 696 idim(3) = nao 697 ichnk(1) = nroots*ipol 698 ichnk(2) = -1 699 ichnk(3) = -1 700 if (.not.nga_create(mt_dbl,3,idim,'vectors Xuv',ichnk,g_x)) 701 + call errquit(pname//'failed to create g_x',0,GA_ERR) 702 if (.not.nga_create(mt_dbl,3,idim,'vectors Yuv',ichnk,g_y)) 703 + call errquit(pname//'failed to create g_y',0,GA_ERR) 704c 705c - Transform (X+Y) to AO basis in g_x and (X-Y) to AO basis 706c in g_y 707c 708 call tddft_grad_trans_mo2ao(ipol,nao,nfc,naoc,nocc,nav,nfv, 709 + nroots,1.0d0,0.0d0,"ib",g_mo,g_xpy,"ib",g_x) 710 if (.not.tda) then 711 call tddft_grad_trans_mo2ao(ipol,nao,nfc,naoc,nocc,nav,nfv, 712 + nroots,1.0d0,0.0d0,"ib",g_mo,g_xmy,"ib",g_y) 713 else 714 call tddft_grad_trans_mo2ao(ipol,nao,nfc,naoc,nocc,nav,nfv, 715 + nroots,1.0d0,0.0d0,"ib",g_mo,g_xpy,"ib",g_y) 716 endif 717c 718c - Compute X and Y from (X+Y) and (X-Y) in place 719c 720 alo(1) = 1 721 alo(2) = 1 722 alo(3) = 1 723 ahi(1) = nroots*ipol 724 ahi(2) = nao 725 ahi(3) = nao 726c Daniel (1-7-13): With the modification above, the lines here behave 727c like you'd expect (i.e. g_x = X and g_y = Y = 0 for CIS, rather than 728c what was here before which made g_x = 0.50*X and g_y = 0.50*X). 729 call nga_add_patch(0.5d0,g_x,alo,ahi,0.5d0,g_y,alo,ahi,g_x, 730 + alo,ahi) 731 call nga_add_patch(1.0d0,g_x,alo,ahi,-1.0d0,g_y,alo,ahi,g_y, 732 + alo,ahi) 733c 734c - Allocate various workspace arrays 735c 736 if (.not.nga_create(mt_dbl,3,idim,'vectors (A+B)X',ichnk,g_apbx)) 737 + call errquit(pname//'failed to create g_apbx',0, GA_ERR) 738 if (.not.nga_create(mt_dbl,3,idim,'vectors (A-B)X',ichnk,g_ambx)) 739 + call errquit(pname//'failed to create g_ambx',0, GA_ERR) 740 if (.not.nga_create(mt_dbl,3,idim,'vectors (A+B)Y',ichnk,g_apby)) 741 + call errquit(pname//'failed to create g_apby',0, GA_ERR) 742 if (.not.nga_create(mt_dbl,3,idim,'vectors (A-B)Y',ichnk,g_amby)) 743 + call errquit(pname//'failed to create g_amby',0, GA_ERR) 744c 745c - Compute (A+B)X, (A-B)X, (A+B)Y and (A-B)Y 746c 747c Daniel (1-7-13): We manipulate the code here because the 748c R vector has the same number of terms for RPA and CIS. This is a 749c consequence of (X-Y) = X. It might be a good idea to avoid doing this 750c part for the Y vector since Y = 0. Note that the coupling matrix 751c expressions H^+[V] and H^-[V] can both be nonzero for CIS, so it isn't 752c okay to skip the anti-symmetric part in the tddft_nga_cont routine. 753c What CIS does is makes the Y vector contribution zero in the following 754c routines. 755c Daniel (2-8-13): Set the local TDA variable so that we don't change 756c the global one. 757 if (lhashf) then 758 if (tda) then 759 tdaloc = .false. ! For CIS and TDDFT/TDA (hybrid) calculations 760 else 761 tdaloc = .false. ! For RPA and TDDFT (hybrid) calculations 762 endif 763 else 764 if (tda) then 765 tdaloc = .true. ! For TDDFT/TDA (pure) calculations 766 else 767 tdaloc = .false. ! For TDDFT (pure) calculations 768 endif 769 endif 770 call tddft_nga_cont(rtdb,ihdl_geom,ihdl_bfao,g_x,g_apbx,g_ambx, 771 + nao,ipol,tol2e,tdaloc,oskel,kfac,lhashf,otriplet,nroots,iwhich) 772 call tddft_nga_cont(rtdb,ihdl_geom,ihdl_bfao,g_y,g_apby,g_amby, 773 + nao,ipol,tol2e,tdaloc,oskel,kfac,lhashf,otriplet,nroots,iwhich) 774c 775c - Dispose of X and Y 776c 777 if (.not.ga_destroy(g_x)) 778 + call errquit(pname//'failed to destroy g_x',0, GA_ERR) 779 if (.not.ga_destroy(g_y)) 780 + call errquit(pname//'failed to destroy g_y',0, GA_ERR) 781c 782c - Compute (A+B)(X+Y) and (A-B)(X-Y) 783cdbg (A+B)(X-Y) (A-B)(X+Y) 784c 785 call nga_add_patch(1.0d0,g_apbx,alo,ahi,+1.0d0,g_apby,alo,ahi, 786 + g_apbx,alo,ahi) 787 call nga_add_patch(1.0d0,g_ambx,alo,ahi,-1.0d0,g_amby,alo,ahi, 788 + g_ambx,alo,ahi) 789c 790c - Dispose of (A+B)Y and (A-B)Y 791c 792 if (.not.ga_destroy(g_apby)) 793 + call errquit(pname//'failed to destroy g_apby',0, GA_ERR) 794 if (.not.ga_destroy(g_amby)) 795 + call errquit(pname//'failed to destroy g_amby',0, GA_ERR) 796c 797c - Allocate (A+-B)(X+-Y)ij 798c 799 do ip = 1, ipol 800 idim(1) = nroots 801 idim(2) = naoc(ip) 802 idim(3) = naoc(ip) 803 ichnk(1) = nroots 804 ichnk(2) = -1 805 ichnk(3) = -1 806 if (.not.nga_create(mt_dbl,3,idim,'vec hij',ichnk,g_hij(ip))) 807 + call errquit(pname//'failed to create g_hij',0, GA_ERR) 808 enddo 809c 810c - Transform (A+B)(X+Y) to MO basis occupied-occupied block only 811cdbg (A+B)(X-Y) 812c 813 call tddft_grad_trans_ao2mo(ipol,nao,nfc,naoc,nocc,nav,nfv, 814 + nroots,1.0d0,0.0d0,"ij",g_mo,g_apbx,g_hij,"ij") 815c 816c - Add sum_j (X+Y)ja [(A+B)(X+Y)ji] to Wia 817cdbg (X-Y)ja [(A+B)(X-Y)ji] 818c use symmetry of [(A+B)(X+Y)ji] 819c 820 do ip = 1, ipol 821 do ir=1,nroots 822 alo(1) = ir 823 ahi(1) = ir 824 alo(2) = 1 825 ahi(2) = naoc(ip) 826 alo(3) = 1 827 ahi(3) = naoc(ip) 828 blo(1) = ir 829 bhi(1) = ir 830 blo(2) = 1 831 bhi(2) = naoc(ip) 832 blo(3) = 1 833 bhi(3) = nav(ip) 834 clo(1) = ir 835 chi(1) = ir 836 clo(2) = 1 837 chi(2) = naoc(ip) 838 clo(3) = naoc(ip)+1 839 chi(3) = naoc(ip)+nav(ip) 840 call tddft_patch3mxm('n','n',1.0d0,1.0d0,g_hij(ip),alo,ahi, 841 + g_xpy(ip),blo,bhi,g_w(ip),clo,chi) 842 enddo 843 enddo 844c 845c - Transform (A-B)(X-Y) to MO basis occupied-occupied block only 846cdbg (A-B)(X+Y) 847c 848 call tddft_grad_trans_ao2mo(ipol,nao,nfc,naoc,nocc,nav,nfv, 849 + nroots,1.0d0,0.0d0,"ij",g_mo,g_ambx,g_hij,"ij") 850c 851c - Add sum_j (X-Y)ja [(A-B)(X-Y)ji] to Wia 852cdbg (X+Y)js [(A-B)(X+Y)ji] 853c use anti-symmetry of [(A-B)(X-Y)ji] 854c 855c Daniel (1-7-13): We can definitely avoid this part of the routine if 856c we don't use exact exchange, since the linear transformation H^-[V] 857c is zero in that case. 858 if (lhashf) then 859 do ip = 1, ipol 860 do ir = 1, nroots 861 alo(1) = ir 862 ahi(1) = ir 863 alo(2) = 1 864 ahi(2) = naoc(ip) 865 alo(3) = 1 866 ahi(3) = naoc(ip) 867 blo(1) = ir 868 bhi(1) = ir 869 blo(2) = 1 870 bhi(2) = naoc(ip) 871 blo(3) = 1 872 bhi(3) = nav(ip) 873 clo(1) = ir 874 chi(1) = ir 875 clo(2) = 1 876 chi(2) = naoc(ip) 877 clo(3) = naoc(ip)+1 878 chi(3) = naoc(ip)+nav(ip) 879c Daniel (1-7-13): Manipulate the code here for CIS to use g_xpy here 880c since (X+Y) = (X-Y) = X. This is a consequence of not allocating 881c a g_xmy array for CIS. The linear transformation H^-[X] still exists 882c in CIS. 883 if (.not.tda) then 884 call tddft_patch3mxm('n','n',-1.0d0,1.0d0, 885 + g_hij(ip),alo,ahi, 886 + g_xmy(ip),blo,bhi,g_w(ip),clo,chi) 887 else 888 call tddft_patch3mxm('n','n',-1.0d0,1.0d0, 889 + g_hij(ip),alo,ahi, 890 + g_xpy(ip),blo,bhi,g_w(ip),clo,chi) 891 endif 892 enddo 893 enddo 894 endif ! lhashf 895c 896c - Dispose of g_hij 897c 898 do ip = 1, ipol 899 if (.not.ga_destroy(g_hij(ip))) 900 + call errquit(pname//'failed to destroy g_hij',0, GA_ERR) 901 enddo 902c 903c - Dispose of (A+B)(X+Y) and (A+B)(X-Y) 904c 905 if (.not.ga_destroy(g_apbx)) 906 + call errquit(pname//'failed to destroy g_apbx',0, GA_ERR) 907 if (.not.ga_destroy(g_ambx)) 908 + call errquit(pname//'failed to destroy g_ambx',0, GA_ERR) 909c 910c 911c Compute the virtual-virtual block 912c 913c Wabs = Omega Sum_i [(X+Y)ias(X-Y)ibs+(X-Y)ias(X+Y)ibs]/2 914c + Sum_i eps_is [(X+Y)ias(X+Y)ibs+(X-Y)ias(X-Y)ibs]/2 915c 916 do ip = 1, ipol 917 do ir = 1, nroots 918 alo(1) = ir 919 ahi(1) = ir 920 alo(2) = 1 921 ahi(2) = nav(ip) 922 alo(3) = 1 923 ahi(3) = naoc(ip) 924 blo(1) = ir 925 bhi(1) = ir 926 blo(2) = 1 927 bhi(2) = naoc(ip) 928 blo(3) = 1 929 bhi(3) = nav(ip) 930 clo(1) = ir 931 chi(1) = ir 932 clo(2) = naoc(ip)+1 933 chi(2) = naoc(ip)+nav(ip) 934 clo(3) = naoc(ip)+1 935 chi(3) = naoc(ip)+nav(ip) 936c 937 if (.not.tda) then 938 call tddft_patch3mxm('t','n',0.5d0*omega(ir),0.0d0, 939 + g_xpy(ip),alo,ahi, 940 + g_xmy(ip),blo,bhi, 941 + g_w(ip),clo,chi) 942 call tddft_patch3mxm('t','n',0.5d0*omega(ir),1.0d0, 943 + g_xmy(ip),alo,ahi, 944 + g_xpy(ip),blo,bhi, 945 + g_w(ip),clo,chi) 946 else 947c g_xpy: X with CIS, Y = 0 948c g_xmy: not created with CIS 949 call tddft_patch3mxm('t','n',omega(ir),0.0d0, 950 + g_xpy(ip),alo,ahi, 951 + g_xpy(ip),blo,bhi, 952 + g_w(ip),clo,chi) 953 endif ! tda 954 enddo ! ir 955c 956c Do the second term, drive the parallelization off the 957c distribution of (X+-Y)ib 958c 959c Do the (X+Y) terms first 960c 961 call nga_distribution(g_xpy(ip),iproc,blo,bhi) 962 doitxpy1 = .not.(blo(1).ne.1.or.bhi(1).ne.nroots) 963c 964 if (doitxpy1) then 965c 966 mini = blo(2) 967 maxi = bhi(2) 968 numi = maxi-mini+1 969 minb = blo(3) 970 maxb = bhi(3) 971 numb = maxb-minb+1 972 if (.not.ma_push_get(mt_dbl,nroots*numb*numi,'block b', 973 + ihdl_b,iptr_b)) 974 + call errquit(pname//'failed to allocate blockb',0,UERR) 975 ld(1) = nroots 976 ld(2) = numi 977 call nga_get(g_xpy(ip),blo,bhi,dbl_mb(iptr_b),ld) 978 ic = 0 979 do b = 1, numb 980 do i = 1, numi 981 imo = nfc(ip)+mini-1+i 982 do ir = 1, nroots 983 dbl_mb(iptr_b+ic) = 0.5d0*eps(imo,ip)*dbl_mb(iptr_b+ic) 984 ic = ic + 1 985 enddo 986 enddo 987 enddo 988c 989 do icount = 0, nproc-1 990 jproc = mod(iproc+icount,nproc) 991c 992 call nga_distribution(g_xpy(ip),jproc,alo,ahi) 993 doitxpy2 = .not.(alo(1).ne.1.or.ahi(1).ne.nroots) 994c 995 if (doitxpy2.and.doitxpy1) then 996 if (alo(2).eq.mini.and.ahi(2).eq.maxi) then 997 mina = alo(3) 998 maxa = ahi(3) 999 numa = maxa-mina+1 1000 alo(2) = mini 1001 ahi(2) = maxi 1002 if (.not.ma_push_get(mt_dbl,nroots*numa*numi,'block a', 1003 + ihdl_a,iptr_a)) 1004 + call errquit(pname//'failed to allocate blockb',0, MA_ERR) 1005 if (.not.ma_push_get(mt_dbl,nroots*numa*numb,'block r', 1006 + ihdl_r,iptr_r)) 1007 + call errquit(pname//'failed to allocate blockr',0, MA_ERR) 1008 ld(1) = nroots 1009 ld(2) = numi 1010 call nga_get(g_xpy(ip),alo,ahi,dbl_mb(iptr_a),ld) 1011 call dfill(nroots*numa*numb,0.0d0,dbl_mb(iptr_r),1) 1012 do b = 0, numb-1 1013 do a = 0, numa-1 1014 do i = 0, numi-1 1015 do ir = 0, nroots-1 1016 dbl_mb(iptr_r+ir+nroots*a+nroots*numa*b) 1017 + = dbl_mb(iptr_r+ir+nroots*a+nroots*numa*b) 1018 + + dbl_mb(iptr_a+ir+nroots*i+nroots*numi*a) 1019 + * dbl_mb(iptr_b+ir+nroots*i+nroots*numi*b) 1020 enddo 1021 enddo 1022 enddo 1023 enddo 1024c Daniel (12-3-12): We need to account for (X-Y) = X in TDA here. 1025c We must either double the contribution from (X+Y) here or redo the 1026c procedure in this block of code. I prefer scaling here since it's 1027c less messy. 1028 if (tda) then 1029 call dscal(nroots*numa*numb,2.0d0,dbl_mb(iptr_r),1) 1030 endif 1031 clo(1) = 1 1032 chi(1) = nroots 1033 clo(2) = naoc(ip)+mina 1034 chi(2) = naoc(ip)+maxa 1035 clo(3) = naoc(ip)+minb 1036 chi(3) = naoc(ip)+maxb 1037 ld(1) = nroots 1038 ld(2) = numa 1039 call nga_acc(g_w(ip),clo,chi,dbl_mb(iptr_r),ld,1.0d0) 1040 if (.not.ma_pop_stack(ihdl_r)) 1041 + call errquit(pname//'failed to deallocate blockr',0,MA_ERR) 1042 if (.not.ma_pop_stack(ihdl_a)) 1043 + call errquit(pname//'failed to deallocate blocka',0,MA_ERR) 1044 endif 1045 endif ! doitxpy1 and doitxpy2 1046 enddo ! icount 1047c 1048 if (.not.ma_pop_stack(ihdl_b)) 1049 + call errquit(pname//'failed to deallocate blockb',0,MA_ERR) 1050 endif ! doitxpy1 1051c 1052 call ga_sync() 1053c 1054 if (.not.tda) then 1055c 1056c Do the (X-Y) terms next 1057c 1058 call nga_distribution(g_xmy(ip),iproc,blo,bhi) 1059 doitxmy1 = .not.(blo(1).ne.1.or.bhi(1).ne.nroots) 1060c 1061 if (doitxmy1) then 1062c 1063 mini = blo(2) 1064 maxi = bhi(2) 1065 numi = maxi-mini+1 1066 minb = blo(3) 1067 maxb = bhi(3) 1068 numb = maxb-minb+1 1069 if (.not.ma_push_get(mt_dbl,nroots*numb*numi,'block b', 1070 + ihdl_b,iptr_b)) 1071 + call errquit(pname//'failed to allocate blockb',0,UERR) 1072 ld(1) = nroots 1073 ld(2) = numi 1074 call nga_get(g_xmy(ip),blo,bhi,dbl_mb(iptr_b),ld) 1075 ic = 0 1076 do b = 1, numb 1077 do i = 1, numi 1078 imo = nfc(ip)+mini-1+i 1079 do ir = 1, nroots 1080 dbl_mb(iptr_b+ic) = 0.5d0*eps(imo,ip)*dbl_mb(iptr_b+ic) 1081 ic = ic + 1 1082 enddo 1083 enddo 1084 enddo 1085c 1086 do icount = 0, nproc-1 1087 jproc = mod(iproc+icount,nproc) 1088c 1089 call nga_distribution(g_xmy(ip),jproc,alo,ahi) 1090 doitxmy2 = .not.(alo(1).ne.1.or.ahi(1).ne.nroots) 1091c 1092 if (doitxmy2) then 1093 if (alo(2).eq.mini.and.ahi(2).eq.maxi) then 1094 mina = alo(3) 1095 maxa = ahi(3) 1096 numa = maxa-mina+1 1097 alo(2) = mini 1098 ahi(2) = maxi 1099 if (.not.ma_push_get(mt_dbl,nroots*numa*numi,'block a', 1100 + ihdl_a,iptr_a)) 1101 + call errquit(pname//'failed to allocate blockb',0, UERR) 1102 if (.not.ma_push_get(mt_dbl,nroots*numa*numb,'block r', 1103 + ihdl_r,iptr_r)) 1104 + call errquit(pname//'failed to allocate blockr',0, UERR) 1105 ld(1) = nroots 1106 ld(2) = numi 1107 call nga_get(g_xmy(ip),alo,ahi,dbl_mb(iptr_a),ld) 1108 call dfill(nroots*numa*numb,0.0d0,dbl_mb(iptr_r),1) 1109 do b = 0, numb-1 1110 do a = 0, numa-1 1111 do i = 0, numi-1 1112 do ir = 0, nroots-1 1113 dbl_mb(iptr_r+ir+nroots*a+nroots*numa*b) 1114 + = dbl_mb(iptr_r+ir+nroots*a+nroots*numa*b) 1115 + + dbl_mb(iptr_a+ir+nroots*i+nroots*numi*a) 1116 + * dbl_mb(iptr_b+ir+nroots*i+nroots*numi*b) 1117 enddo 1118 enddo 1119 enddo 1120 enddo 1121 clo(1) = 1 1122 chi(1) = nroots 1123 clo(2) = naoc(ip)+mina 1124 chi(2) = naoc(ip)+maxa 1125 clo(3) = naoc(ip)+minb 1126 chi(3) = naoc(ip)+maxb 1127 ld(1) = nroots 1128 ld(2) = numa 1129 call nga_acc(g_w(ip),clo,chi,dbl_mb(iptr_r),ld,1.0d0) 1130 if (.not.ma_pop_stack(ihdl_r)) 1131 + call errquit(pname//'failed to deallocate blockr',0,MA_ERR) 1132 if (.not.ma_pop_stack(ihdl_a)) 1133 + call errquit(pname//'failed to deallocate blocka',0,MA_ERR) 1134 endif 1135 endif ! doitxmy1 and doitxmy2 1136 enddo ! icount 1137c 1138 if (.not.ma_pop_stack(ihdl_b)) 1139 + call errquit(pname//'failed to deallocate blockb',0,MA_ERR) 1140 endif !doitxmy1 1141 1142 call ga_sync() 1143 1144 endif ! tda 1145c 1146 enddo 1147c 1148c Copy Wia to Wai 1149c 1150 do ip = 1, ipol 1151 alo(1) = 1 1152 ahi(1) = nroots 1153 alo(2) = 1 1154 ahi(2) = naoc(ip) 1155 alo(3) = naoc(ip)+1 1156 ahi(3) = naoc(ip)+nav(ip) 1157 call nga_scale_patch(g_w(ip),alo,ahi,0.5d0) 1158 idim(1) = naoc(ip) 1159 idim(2) = nav(ip) 1160 ichnk(1) = -1 1161 ichnk(2) = -1 1162 if (.not.nga_create(mt_dbl,2,idim,"transpose",ichnk,g_t)) 1163 + call errquit(pname//'failed to create g_t',0,GA_ERR) 1164 do ir = 1, nroots 1165 alo(1) = ir 1166 ahi(1) = ir 1167 alo(2) = 1 1168 ahi(2) = naoc(ip) 1169 alo(3) = naoc(ip)+1 1170 ahi(3) = naoc(ip)+nav(ip) 1171 blo(1) = 1 1172 bhi(1) = naoc(ip) 1173 blo(2) = 1 1174 bhi(2) = nav(ip) 1175 call nga_copy_patch('n',g_w(ip),alo,ahi,g_t,blo,bhi) 1176 alo(2) = naoc(ip)+1 1177 ahi(2) = naoc(ip)+nav(ip) 1178 alo(3) = 1 1179 ahi(3) = naoc(ip) 1180 bhi(2) = nav(ip) 1181 bhi(1) = naoc(ip) 1182 call nga_copy_patch('t',g_t,blo,bhi,g_w(ip),alo,ahi) 1183 enddo 1184 if (.not.ga_destroy(g_t)) 1185 + call errquit(pname//'failed to destroy g_t',0,GA_ERR) 1186 enddo 1187 call ga_sync() 1188c 1189 if (tddft_grad_util_print('tddft grad w',print_debug)) then 1190 oroot = ga_nodeid().eq.0 1191 if (oroot) write(LuOut,*)'DEBUG: '//pname//'W' 1192 call tddft_grad_print_array(ipol,nroots,g_w,dble(ipol)) 1193 endif 1194c 1195 end 1196c $Id$ 1197