1* 2* $Id$ 3* 4 5#define TCGMSG 6 7* **************************************************** 8* * * 9* * cpsp_init * 10* * * 11* **************************************************** 12 subroutine cpsp_init() 13 implicit none 14 15#include "bafdecls.fh" 16#include "cpsp_common.fh" 17#include "errquit.fh" 18 19* ***** local variables **** 20 logical value 21 integer axsize 22 integer npack0,npack1,nbrillq,nion 23 24* **** external functions ***** 25 logical two_component_pseudopotential,control_pspspin 26 external two_component_pseudopotential,control_pspspin 27 integer ion_nkatm, brillioun_nbrillq,ion_nion 28 external ion_nkatm, brillioun_nbrillq,ion_nion 29 30 npsp = ion_nkatm() 31 nbrillq = brillioun_nbrillq() 32 33 call Cram_npack(0,npack0) 34 call Cram_max_npack(npack1) 35 36* **** allocate projector datastructure **** 37 call cpsp_projector_init(5*npsp) 38 39* ***** allocate local potentitals ***** 40 value = BA_alloc_get(mt_dbl,(npsp*npack0),'vl',vl(2),vl(1)) 41 42 43* ***** allocate projector tag lists ***** 44 value = value.and. 45 > BA_alloc_get(mt_int,npsp,'vnl',vnl(2),vnl(1)) 46 value = value.and. 47 > BA_alloc_get(mt_int,npsp,'vnlso',vnlso(2),vnlso(1)) 48 49* **** nonlocal coeffiencients ***** 50 axsize=npsp*gij_stride 51 value = value.and. 52 > BA_alloc_get(mt_dbl,axsize,'Gijl',Gijl(2),Gijl(1)) 53 axsize=npsp*kij_stride 54 value = value.and. 55 > BA_alloc_get(mt_dbl,axsize,'Kijl',Kijl(2),Kijl(1)) 56 57 value = value.and. 58 > BA_alloc_get(mt_int,(npsp),'nprj',nprj(2),nprj(1)) 59 60 axsize=npsp*jmmax_max 61 value = value.and. 62 > BA_alloc_get(mt_int,axsize, 63 > 'n_projector',n_projector(2),n_projector(1)) 64 value = value.and. 65 > BA_alloc_get(mt_int,axsize, 66 > 'l_projector',l_projector(2),l_projector(1)) 67 value = value.and. 68 > BA_alloc_get(mt_int,axsize, 69 > 'm_projector',m_projector(2),m_projector(1)) 70 71 72 value = value.and. 73 > BA_alloc_get(mt_dbl,(npsp),'zv',zv(2),zv(1)) 74 value = value.and. 75 > BA_alloc_get(mt_dbl,(npsp),'amass',amass(2),amass(1)) 76 value = value.and. 77 > BA_alloc_get(mt_dbl,(npsp*(lmax_max+1)),'rc',rc(2),rc(1)) 78 value = value.and. 79 > BA_alloc_get(mt_int,(npsp),'lmmax',lmmax(2),lmmax(1)) 80 value = value.and. 81 > BA_alloc_get(mt_int,(npsp),'lmax',lmax(2),lmax(1)) 82 value = value.and. 83 > BA_alloc_get(mt_int,(npsp),'locp',locp(2),locp(1)) 84 value = value.and. 85 > BA_alloc_get(mt_int,(npsp),'nmax',nmax(2),nmax(1)) 86 value = value.and. 87 > BA_alloc_get(mt_int,(npsp), 88 > 'psp_type',psp_type(2),psp_type(1)) 89 90* **** setup pspspin structure - used for generating antiferromagnetic structures **** 91 pspspin = control_pspspin() 92 if (pspspin) then 93 nion = ion_nion() 94 value = value.and. 95 > BA_alloc_get(mt_log,nion,'pspspin_upions', 96 > pspspin_upions(2),pspspin_upions(1)) 97 value = value.and. 98 > BA_alloc_get(mt_log,nion,'pspspin_downions', 99 > pspspin_downions(2),pspspin_downions(1)) 100 if (.not. value) 101 > call errquit('psp_init:out of heap memory',0, MA_ERR) 102 103 call control_set_pspspin(pspspin_upscale,pspspin_downscale, 104 > pspspin_upl,pspspin_downl, 105 > nion, 106 > log_mb(pspspin_upions(1)), 107 > log_mb(pspspin_downions(1))) 108 end if 109 110 if (.not. value) 111 > call errquit('cpsp_init:out of heap memory',0, MA_ERR) 112 113 call dcopy(npsp*npack0,0.0d0,0,dbl_mb(vl(1)),1) 114 call dcopy(npsp, 0.0d0,0,dbl_mb(zv(1)),1) 115 call dcopy(npsp, 0.0d0,0,dbl_mb(amass(1)),1) 116 call dcopy(npsp*(lmax_max+1), 0.0d0,0,dbl_mb(rc(1)),1) 117 118* **** allocate semicore data **** 119 call c_semicore_init() 120 return 121 end 122 123* **************************************************** 124* * * 125* * cpsp_proj_init * 126* * * 127* **************************************************** 128 subroutine cpsp_proj_init() 129 implicit none 130 131#include "bafdecls.fh" 132#include "errquit.fh" 133#include "cpsp_common.fh" 134 135* ***** local variables **** 136 logical value 137 integer axsize,npack1 138 139* ***** external functions ***** 140 integer cpsp_nprj_max 141 external cpsp_nprj_max 142 143 nprj_max = cpsp_nprj_max() 144 call Cram_max_npack(npack1) 145 146 axsize=npack1*nprj_max 147 value = BA_alloc_get(mt_dcpl,axsize,'prjtmp',prjtmp(2),prjtmp(1)) 148 if (.not. value) 149 > call errquit('cpsp_proj_init:out of heap memory',0,MA_ERR) 150 151 return 152 end 153 154* ****************************************** 155* * * 156* * cpsp_end * 157* * * 158* ****************************************** 159 subroutine cpsp_end() 160 implicit none 161 162#include "bafdecls.fh" 163#include "errquit.fh" 164#include "cpsp_common.fh" 165 166 logical value 167 168* **** external functions **** 169 170 171* **** deallocate projector data **** 172 call cpsp_projector_end() 173 174* **** deallocate semicore data **** 175 call c_semicore_end() 176 177 value = BA_free_heap(prjtmp(2)) 178 value = value.and.BA_free_heap(vl(2)) 179 value = value.and.BA_free_heap(vnlso(2)) 180 value = value.and.BA_free_heap(vnl(2)) 181 value = value.and.BA_free_heap(Kijl(2)) 182 value = value.and.BA_free_heap(Gijl(2)) 183 value = value.and.BA_free_heap(nprj(2)) 184 value = value.and.BA_free_heap(n_projector(2)) 185 value = value.and.BA_free_heap(l_projector(2)) 186 value = value.and.BA_free_heap(m_projector(2)) 187 value = value.and.BA_free_heap(zv(2)) 188 value = value.and.BA_free_heap(amass(2)) 189 value = value.and.BA_free_heap(rc(2)) 190 value = value.and.BA_free_heap(lmmax(2)) 191 value = value.and.BA_free_heap(lmax(2)) 192 value = value.and.BA_free_heap(locp(2)) 193 value = value.and.BA_free_heap(nmax(2)) 194 value = value.and.BA_free_heap(psp_type(2)) 195 if (pspspin) then 196 value = value.and.BA_free_heap(pspspin_upions(2)) 197 value = value.and.BA_free_heap(pspspin_downions(2)) 198 end if 199 if (.not. value) 200 > call errquit('cpsp_end:error freeing heap memory',0,MA_ERR) 201 202 return 203 end 204 205 206* *********************************** 207* * * 208* * cpsp_zv * 209* * * 210* *********************************** 211 real*8 function cpsp_zv(ia) 212 implicit none 213 integer ia 214 215#include "bafdecls.fh" 216#include "cpsp_common.fh" 217 218 219 cpsp_zv = dbl_mb(zv(1)+ia-1) 220 return 221 end 222 223 224* *********************************** 225* * * 226* * cpsp_amass * 227* * * 228* *********************************** 229 real*8 function cpsp_amass(ia) 230 implicit none 231 integer ia 232 233#include "bafdecls.fh" 234#include "cpsp_common.fh" 235 236 cpsp_amass = dbl_mb(amass(1)+ia-1) 237 return 238 end 239 240 241* *********************************** 242* * * 243* * cpsp_rc * 244* * * 245* *********************************** 246 real*8 function cpsp_rc(i,ia) 247 implicit none 248 integer i,ia 249 250#include "bafdecls.fh" 251#include "cpsp_common.fh" 252 253 cpsp_rc = dbl_mb(rc(1) + i + (lmax_max+1)*(ia-1)) 254 return 255 end 256 257* *********************************** 258* * * 259* * cpsp_atom * 260* * * 261* *********************************** 262 character*2 function cpsp_atom(ia) 263 implicit none 264 integer ia 265 266#include "cpsp_common.fh" 267 268 cpsp_atom = atom(ia) 269 return 270 end 271 272 273* *********************************** 274* * * 275* * psp_comment * 276* * * 277* *********************************** 278 character*(*) function cpsp_comment(ia) 279 implicit none 280 integer ia 281 282#include "cpsp_common.fh" 283 284 285 cpsp_comment = comment(ia) 286 return 287 end 288 289 290 291 292* *********************************** 293* * * 294* * cpsp_lmmax * 295* * * 296* *********************************** 297 integer function cpsp_lmmax(ia) 298 implicit none 299 integer ia 300 301#include "bafdecls.fh" 302#include "cpsp_common.fh" 303 304 305 cpsp_lmmax = int_mb(lmmax(1)+ia-1) 306 return 307 end 308 309* *********************************** 310* * * 311* * cpsp_nprj * 312* * * 313* *********************************** 314 integer function cpsp_nprj(ia) 315 implicit none 316 integer ia 317 318#include "bafdecls.fh" 319#include "cpsp_common.fh" 320 321 cpsp_nprj = int_mb(nprj(1)+ia-1) 322 return 323 end 324 325* *********************************** 326* * * 327* * cpsp_nprj_max * 328* * * 329* *********************************** 330 integer function cpsp_nprj_max() 331 implicit none 332 333#include "bafdecls.fh" 334#include "cpsp_common.fh" 335 336 integer ia,nprjmax,nprjtmp 337 338 nprjmax = 0 339 do ia=1,npsp 340 nprjtmp = (int_mb(nprj(1)+ia-1)) 341 if (int_mb(psp_type(1)+ia-1).eq.7) nprjtmp = 2*nprjtmp 342 if (nprjtmp.gt.nprjmax) nprjmax = nprjtmp 343 end do 344 345 cpsp_nprj_max = nprjmax 346 return 347 end 348 349* *********************************** 350* * * 351* * cpsp_psp_type * 352* * * 353* *********************************** 354 integer function cpsp_psp_type(ia) 355 implicit none 356 integer ia 357 358#include "bafdecls.fh" 359#include "cpsp_common.fh" 360 361 cpsp_psp_type = int_mb(psp_type(1)+ia-1) 362 return 363 end 364 365 366* *********************************** 367* * * 368* * cpsp_lmax * 369* * * 370* *********************************** 371 integer function cpsp_lmax(ia) 372 implicit none 373 integer ia 374 375#include "bafdecls.fh" 376#include "cpsp_common.fh" 377 378 379 cpsp_lmax = int_mb(lmax(1)+ia-1) 380 return 381 end 382 383* *********************************** 384* * * 385* * cpsp_locp * 386* * * 387* *********************************** 388 integer function cpsp_locp(ia) 389 implicit none 390 integer ia 391 392#include "bafdecls.fh" 393#include "cpsp_common.fh" 394 395 396 cpsp_locp = int_mb(locp(1)+ia-1) 397 return 398 end 399 400* *********************************** 401* * * 402* * cpsp_npsp * 403* * * 404* *********************************** 405 integer function cpsp_npsp() 406 implicit none 407 408#include "cpsp_common.fh" 409 410 411 cpsp_npsp = npsp 412 return 413 end 414 415 416* *********************************** 417* * * 418* * cpsp_v_local * 419* * * 420* *********************************** 421 subroutine cpsp_v_local(vl_out,move,dng,fion) 422 implicit none 423 complex*16 vl_out(*) 424 logical move 425 complex*16 dng(*) 426 real*8 fion(3,*) 427 428#include "bafdecls.fh" 429#include "cpsp_common.fh" 430#include "errquit.fh" 431 432* *** local variables *** 433 integer nfft3d,npack0 434 integer i,ii,ia 435 integer exi(2),vtmp(2),xtmp(2),G(3) 436 integer Gx(2),Gy(2),Gz(2) 437 logical value 438 439* **** external functions **** 440 integer c_G_indx,ion_nion,ion_katm 441 external c_G_indx,ion_nion,ion_katm 442 443 call nwpw_timing_start(5) 444 call C3dB_nfft3d(1,nfft3d) 445 call Cram_npack(0,npack0) 446 447 value = BA_push_get(mt_dcpl,npack0,'exi', exi(2), exi(1)) 448 value = value.and. 449 > BA_push_get(mt_dcpl,nfft3d,'vtmp',vtmp(2),vtmp(1)) 450 if (.not. value) call errquit('cpsp_v_local: pushing stack',0, 451 & MA_ERR) 452 453 if (move) then 454 value = BA_push_get(mt_dbl, nfft3d,'xtmp',xtmp(2),xtmp(1)) 455 value = value.and. 456 > BA_push_get(mt_dbl, nfft3d,'Gx',Gx(2),Gx(1)) 457 value = value.and. 458 > BA_push_get(mt_dbl, nfft3d,'Gy',Gy(2),Gy(1)) 459 value = value.and. 460 > BA_push_get(mt_dbl, nfft3d,'Gz',Gz(2),Gz(1)) 461 if (.not. value) call errquit('cpsp_v_local: pushing stack',1, 462 & MA_ERR) 463 G(1) = c_G_indx(1) 464 G(2) = c_G_indx(2) 465 G(3) = c_G_indx(3) 466 467* **** define Gx,Gy and Gz in packed space **** 468 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 469 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 470 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 471 call Cram_r_pack(0,dbl_mb(Gx(1))) 472 call Cram_r_pack(0,dbl_mb(Gy(1))) 473 call Cram_r_pack(0,dbl_mb(Gz(1))) 474 end if 475 476 call dcopy((2*npack0),0.0d0,0,vl_out,1) 477 do ii=1,ion_nion() 478 ia=ion_katm(ii) 479 480* **** structure factor and local pseudopotential **** 481 call cstrfac_pack(0,ii,dcpl_mb(exi(1))) 482 483* **** add to local psp **** 484 call Cram_rc_Mul(0,dbl_mb(vl(1)+npack0*(ia-1)), 485 > dcpl_mb(exi(1)), 486 > dcpl_mb(vtmp(1))) 487c call Cram_cc_Sum(0,vl_out,dcpl_mb(vtmp(1)),vl_out) 488 call Cram_cc_Sum2(0,dcpl_mb(vtmp(1)),vl_out) 489 490 if (move) then 491 492 do i=1,npack0 493 dbl_mb(xtmp(1)+i-1) 494 > = dimag(dng(i))* dble(dcpl_mb(vtmp(1)+i-1)) 495 > - dble(dng(i))*dimag(dcpl_mb(vtmp(1)+i-1)) 496 end do 497 call Cram_rr_dot(0,dbl_mb(Gx(1)),dbl_mb(xtmp(1)),fion(1,ii)) 498 call Cram_rr_dot(0,dbl_mb(Gy(1)),dbl_mb(xtmp(1)),fion(2,ii)) 499 call Cram_rr_dot(0,dbl_mb(Gz(1)),dbl_mb(xtmp(1)),fion(3,ii)) 500 501 end if 502 503 504 end do 505 506 if (move) then 507 value = BA_pop_stack(Gz(2)) 508 value = value.and.BA_pop_stack(Gy(2)) 509 value = value.and.BA_pop_stack(Gx(2)) 510 value = value.and.BA_pop_stack(xtmp(2)) 511 if (.not. value) call errquit('cpsp_v_local: popping stack',0, 512 & MA_ERR) 513 end if 514 value = BA_pop_stack(vtmp(2)) 515 value = value.and.BA_pop_stack(exi(2)) 516 if (.not. value) call errquit('cpsp_v_local: popping stack',1, 517 & MA_ERR) 518 519 call nwpw_timing_end(5) 520 return 521 end 522 523 524* *********************************** 525* * * 526* * cpsp_f_vlocal * 527* * * 528* *********************************** 529 530 subroutine cpsp_f_vlocal(dng,fion) 531 implicit none 532 complex*16 dng(*) 533 real*8 fion(3,*) 534 535#include "bafdecls.fh" 536#include "cpsp_common.fh" 537#include "errquit.fh" 538 539 540* *** local variables *** 541 integer nfft3d,npack0 542 integer i,ii,ia 543 integer exi(2),vtmp(2),xtmp(2),G(3) 544 integer Gx(2),Gy(2),Gz(2) 545 logical value 546 547* **** external functions **** 548 integer c_G_indx,ion_nion,ion_katm 549 external c_G_indx,ion_nion,ion_katm 550 551 call nwpw_timing_start(5) 552 553 call C3dB_nfft3d(1,nfft3d) 554 call Cram_npack(0,npack0) 555 value = BA_push_get(mt_dcpl,npack0,'exi', exi(2), exi(1)) 556 value = value.and. 557 > BA_push_get(mt_dcpl,nfft3d,'vtmp',vtmp(2),vtmp(1)) 558 value = value.and. 559 > BA_push_get(mt_dbl, nfft3d,'xtmp',xtmp(2),xtmp(1)) 560 value = value.and. 561 > BA_push_get(mt_dbl, nfft3d,'Gx',Gx(2),Gx(1)) 562 value = value.and. 563 > BA_push_get(mt_dbl, nfft3d,'Gy',Gy(2),Gy(1)) 564 value = value.and. 565 > BA_push_get(mt_dbl, nfft3d,'Gz',Gz(2),Gz(1)) 566 if (.not. value) call errquit('cpsp_f_vlocal:pushing stack',0, 567 & MA_ERR) 568 G(1) = c_G_indx(1) 569 G(2) = c_G_indx(2) 570 G(3) = c_G_indx(3) 571 572* **** define Gx,Gy and Gz in packed space **** 573 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 574 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 575 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 576 call Cram_r_pack(0,dbl_mb(Gx(1))) 577 call Cram_r_pack(0,dbl_mb(Gy(1))) 578 call Cram_r_pack(0,dbl_mb(Gz(1))) 579 580 do ii=1,ion_nion() 581 ia=ion_katm(ii) 582 583* **** structure factor and local pseudopotential **** 584c call cstrfac(ii,dcpl_mb(exi(1))) 585c call Cram_c_pack(0,dcpl_mb(exi(1))) 586 call cstrfac_pack(0,ii,dcpl_mb(exi(1))) 587 588* **** add to local psp **** 589 call Cram_rc_Mul(0,dbl_mb(vl(1)+npack0*(ia-1)), 590 > dcpl_mb(exi(1)), 591 > dcpl_mb(vtmp(1))) 592 593 do i=1,npack0 594 dbl_mb(xtmp(1)+i-1) 595 > = dimag(dng(i))* dble(dcpl_mb(vtmp(1)+i-1)) 596 > - dble(dng(i))*dimag(dcpl_mb(vtmp(1)+i-1)) 597 end do 598 599 call Cram_rr_dot(0,dbl_mb(Gx(1)),dbl_mb(xtmp(1)),fion(1,ii)) 600 call Cram_rr_dot(0,dbl_mb(Gy(1)),dbl_mb(xtmp(1)),fion(2,ii)) 601 call Cram_rr_dot(0,dbl_mb(Gz(1)),dbl_mb(xtmp(1)),fion(3,ii)) 602 603 end do 604 value = BA_pop_stack(Gz(2)) 605 value = value.and.BA_pop_stack(Gy(2)) 606 value = value.and.BA_pop_stack(Gx(2)) 607 value = value.and.BA_pop_stack(xtmp(2)) 608 value = value.and.BA_pop_stack(vtmp(2)) 609 value = value.and.BA_pop_stack(exi(2)) 610 if (.not. value) call errquit('cpsp_f_vlocal:popping stack',1, 611 & MA_ERR) 612 613 call nwpw_timing_end(5) 614 return 615 end 616 617 618 619* *********************************** 620* * * 621* * cpsp_v_nonlocal * 622* * * 623* *********************************** 624 625 subroutine cpsp_v_nonlocal(ispin,ne,psi1_tag,psi2_tag,move,fion) 626 implicit none 627 integer ispin,ne(2) 628 integer psi1_tag 629 integer psi2_tag 630 logical move 631 real*8 fion(3,*) 632 633#include "bafdecls.fh" 634#include "cpsp_common.fh" 635#include "errquit.fh" 636 637* *** local variables *** 638 complex*16 one,mone,ione 639 integer nfft3d,G(3),npack1,npack 640 integer ii,ia,l,n,nn,nb,nbq,neall,nbrillq,nion 641 integer shift,l_prj,nproj 642 integer psi1_shift,psi2_shift,psi_shift,nshift 643 integer occ_tag,occ_shift,occ1_shift 644 real*8 omega,weight,scal,wfrac 645 complex*16 cxr 646 integer exi(2),xtmp(2),zsw1(2),zsw2(2),sum(2),ftmp(2) 647 integer Gx(2),Gy(2),Gz(2),shfts,shftd 648 logical value,sd_function 649 650* **** external functions **** 651 logical is_sORd,cpsi_spin_orbit 652 integer ion_nion,ion_katm,c_G_indx,Pneb_nbrillq 653 integer cpsp_projector_get_ptr,cpsi_data_get_chnk 654 integer cpsi_data_get_next 655 real*8 lattice_omega,brillioun_weight 656 external is_sORd,cpsi_spin_orbit 657 external ion_nion,ion_katm,c_G_indx,Pneb_nbrillq 658 external cpsp_projector_get_ptr,cpsi_data_get_chnk 659 external cpsi_data_get_next 660 external lattice_omega,brillioun_weight 661 662 one = dcmplx( 1.0d0,0.0d0) 663 mone = dcmplx(-1.0d0,0.0d0) 664 ione = dcmplx( 0.0d0,1.0d0) 665 call nwpw_timing_start(6) 666 667 668* **** allocate local memory **** 669 nn = ne(1)+ne(2) 670 neall = ne(1)+ne(2) 671 nion = ion_nion() 672 nbrillq = Pneb_nbrillq() 673 call C3dB_nfft3d(1,nfft3d) 674 call Cram_max_npack(npack1) 675 nshift = 2*npack1 676 677 value = BA_push_get(mt_dcpl,npack1,'exi',exi(2),exi(1)) 678 value = value.and.BA_push_get(mt_dcpl,nn*nprj_max, 679 > 'zsw1',zsw1(2),zsw1(1)) 680 value = value.and.BA_push_get(mt_dcpl,nn*nprj_max, 681 > 'zsw2',zsw2(2),zsw2(1)) 682 if (.not.value) call errquit('cpsp_v_nonlocal:pushing stack',0, 683 > MA_ERR) 684 685ccccccccccccccc if move we do this ccccccccccccccccccccccccc 686 if (move) then 687 occ_tag = cpsi_data_get_next(psi1_tag) 688 value = BA_push_get(mt_dbl, nfft3d,'xtmp',xtmp(2),xtmp(1)) 689 value = value.and. 690 > BA_push_get(mt_dbl, nfft3d,'Gx',Gx(2),Gx(1)) 691 value = value.and. 692 > BA_push_get(mt_dbl, nfft3d,'Gy',Gy(2),Gy(1)) 693 value = value.and. 694 > BA_push_get(mt_dbl, nfft3d,'Gz',Gz(2),Gz(1)) 695 value = value.and. 696 > BA_push_get(mt_dbl,3*nn,'sum',sum(2),sum(1)) 697 value = value.and. 698 > BA_push_get(mt_dbl,3*nion,'ftmp',ftmp(2),ftmp(1)) 699 if (.not.value) call errquit('cpsp_v_nonlocal:pushing stack',1, 700 & MA_ERR) 701 G(1) = c_G_indx(1) 702 G(2) = c_G_indx(2) 703 G(3) = c_G_indx(3) 704 call dcopy(3*nion,0.0d0,0,dbl_mb(ftmp(1)),1) 705 end if 706cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 707 708 omega = lattice_omega() 709 scal = 1.0d0/omega 710 711 do nbq=1,nbrillq 712 psi1_shift = cpsi_data_get_chnk(psi1_tag,nbq) 713 psi2_shift = cpsi_data_get_chnk(psi2_tag,nbq) 714 call Cram_npack(nbq,npack) 715 716 do 500 ii=1,nion 717 ia=ion_katm(ii) 718 719 nproj = int_mb(nprj(1)+ia-1) 720 if (nproj.gt.0) then 721 722 if (int_mb(psp_type(1)+ia-1).eq.7) then 723 724 call cpsp_v_nonlocal_rel(nbq,ii,ia,nproj,npack1,nbrillq, 725 > nn,ne, 726 > dcpl_mb(exi(1)),dcpl_mb(prjtmp(1)), 727 > dcpl_mb(zsw1(1)), 728 > dbl_mb(psi1_shift),dbl_mb(psi2_shift), 729 > dbl_mb(Kijl(1)+(ia-1)*kij_stride),scal) 730 731 if (move) then 732 733* **** define Gx,Gy and Gz in packed space **** 734 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 735 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 736 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 737 call Cram_r_pack(nbq,dbl_mb(Gx(1))) 738 call Cram_r_pack(nbq,dbl_mb(Gy(1))) 739 call Cram_r_pack(nbq,dbl_mb(Gz(1))) 740 741 shftd=nproj*npack1 742 shfts=ne(1)*npack1*2 743 weight = brillioun_weight(nbq) 744 if (occ_tag.gt.0) 745 > occ1_shift = cpsi_data_get_chnk(occ_tag,nbq) 746 do l=1,nproj 747 psi_shift = psi1_shift 748 do n=1,ne(1) 749 750* **** routine should be vectorized!!!**** 751 call Cram_zccr_Multiply2(nbq, 752 > dcpl_mb(zsw1(1)+(l-1)*ne(1)+n-1), 753 > dbl_mb(psi_shift), 754 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 755 > dbl_mb(xtmp(1))) 756 call Cram_zccr_Multiply2Add(nbq, 757 > dcpl_mb(zsw1(1)+(l-1)*ne(1)+n-1), 758 > dbl_mb(psi_shift+shfts), 759 > dcpl_mb(prjtmp(1)+(l-1)*npack1+shftd), 760 > dbl_mb(xtmp(1))) 761 psi_shift = psi_shift + nshift 762 763 call Cram_rr_idot(nbq,dbl_mb(Gx(1)),dbl_mb(xtmp(1)), 764 > dbl_mb(sum(1)+3*(n-1))) 765 call Cram_rr_idot(nbq,dbl_mb(Gy(1)),dbl_mb(xtmp(1)), 766 > dbl_mb(sum(1)+1+3*(n-1))) 767 call Cram_rr_idot(nbq,dbl_mb(Gz(1)),dbl_mb(xtmp(1)), 768 > dbl_mb(sum(1)+2+3*(n-1))) 769 end do !**n** 770 771 call C3dB_Vector_Sumall((3*ne(1)),dbl_mb(sum(1))) 772 773 wfrac = 1.0d0 774 if (occ_tag.gt.0) occ_shift = occ1_shift 775 do n=1,ne(1) 776 if (occ_tag.gt.0) then 777 wfrac = dbl_mb(occ_shift) 778 occ_shift = occ_shift+1 779 end if 780 dbl_mb(ftmp(1)+3*(ii-1)) = 781 > dbl_mb(ftmp(1)+3*(ii-1)) 782 > + 2.0d0*weight*wfrac 783 > *dbl_mb(sum(1)+3*(n-1)) 784 dbl_mb(ftmp(1)+3*(ii-1)+1) = 785 > dbl_mb(ftmp(1)+3*(ii-1)+1) 786 > + 2.0d0*weight*wfrac 787 > *dbl_mb(sum(1)+1+3*(n-1)) 788 dbl_mb(ftmp(1)+3*(ii-1)+2) = 789 > dbl_mb(ftmp(1)+3*(ii-1)+2) 790 > + 2.0d0*weight*wfrac 791 > *dbl_mb(sum(1)+2+3*(n-1)) 792 end do !** ne(1) ** 793 794 end do !** l ** 795 end if !** move ** 796 goto 500 797 798 end if 799 800* **** structure factor pseudopotential **** 801 call cstrfac_pack(nbq,ii,dcpl_mb(exi(1))) 802 call cstrfac_k(ii,nbq,cxr) 803c call Cram_c_ZMul(nbq,cxr,dcpl_mb(exi(1)),dcpl_mb(exi(1))) 804 call zscal(npack,cxr,dcpl_mb(exi(1)),1) 805 806 807* **** generate zsw1's and projectors **** 808 do l=1,nproj 809 810 shift = cpsp_projector_get_ptr( 811 > int_mb(vnl(1)+ia-1),nbq,l) 812 l_prj = int_mb(l_projector(1)+(l-1) 813 > +(ia-1)*jmmax_max) 814 815 sd_function=.true. 816 if (mod(l_prj,2).ne.0) then 817 sd_function=.false. 818 end if 819 820* **** phase factor does not matter therefore **** 821* **** (-i)^l is the same as (i)^l in the **** 822* **** Rayleigh scattering formula **** 823 824c *** current function is s or d **** 825 if (sd_function) then 826 call Cram_rc_Mul(nbq,dbl_mb(shift), 827 > dcpl_mb(exi(1)), 828 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 829* *** current function is p or f **** 830 else 831 call Cram_irc_Mul(nbq,dbl_mb(shift), 832 > dcpl_mb(exi(1)), 833 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 834 end if 835 836* **** compute nnXnproj matrix zsw1 = <psi1|prj> **** 837 call Cram_cc_inzdot(nbq,nn, 838 > dbl_mb(psi1_shift), 839 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 840 > dcpl_mb(zsw1(1)+(l-1)*nn)) 841 842* ***** scale psp by factor - used for generating antiferromagnetic structures **** 843* **** nwchem input: pspspin up/down scale l ion_numbers **** 844 if (pspspin) then 845 if (log_mb(pspspin_upions(1)+ii-1).and. 846 > (l_prj.eq.pspspin_upl)) 847 > call dscal(2*ne(1),pspspin_upscale, 848 > dcpl_mb(zsw1(1)+(l-1)*nn),1) 849 if (log_mb(pspspin_downions(1)+ii-1).and. 850 > (l_prj.eq.pspspin_downl)) 851 > call dscal(2*ne(2),pspspin_downscale, 852 > dcpl_mb(zsw1(1)+(l-1)*nn+ne(1)),1) 853 end if 854 855 end do !**l** 856 857 call C3dB_Vector_SumAll((2*nn*nproj),dcpl_mb(zsw1(1))) 858 859 860* **** zsw2 = Gijl*zsw1 ****** 861 call Multiply_Gijl_zsw1(nn, 862 > nproj, 863 > int_mb(nmax(1)+ia-1), 864 > int_mb(lmax(1)+ia-1), 865 > int_mb(n_projector(1) 866 > + (ia-1)*jmmax_max), 867 > int_mb(l_projector(1) 868 > + (ia-1)*jmmax_max), 869 > int_mb(m_projector(1) 870 > + (ia-1)*jmmax_max), 871 > dbl_mb(Gijl(1) 872 > + (ia-1)*gij_stride), 873 > dcpl_mb(zsw1(1)), 874 > dcpl_mb(zsw2(1))) 875 876 877* **** do Kleinman-Bylander Multiplication **** 878 call dscal(2*nn*nproj,scal,dcpl_mb(zsw2(1)),1) 879 call ZGEMM('N','C',npack,nn,nproj, 880 > mone, 881 > dcpl_mb(prjtmp(1)), npack1, 882 > dcpl_mb(zsw2(1)), nn, 883 > one, 884 > dbl_mb(psi2_shift), npack1) 885 886 887 if (move) then 888 weight = brillioun_weight(nbq) 889 if (occ_tag.gt.0) 890 > occ1_shift = cpsi_data_get_chnk(occ_tag,nbq) 891 if (ispin.eq.1) 892 > call dscal(2*nn*nproj,2.0d0,dcpl_mb(zsw2(1)),1) 893 894 do l=1,nproj 895 896 psi_shift = psi1_shift 897 do n=1,nn 898 899* **** routine should be vectorized!!!**** 900 call Cram_zccr_Multiply2(nbq, 901 > dcpl_mb(zsw2(1)+(l-1)*nn+n-1), 902 > dbl_mb(psi_shift), 903 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 904 > dbl_mb(xtmp(1))) 905 psi_shift = psi_shift + nshift 906c do i=1,npack 907c ctmp = psi1(i+(n-1)*npack1 908c > +(nb-1)*neall*npack1) 909c > *dconjg(dcpl_mb(prjtmp(1)+(l-1)*npack1+i-1)) 910c > *dconjg(dcpl_mb(zsw2(1)+(l-1)*nn+n-1)) 911c dbl_mb(xtmp(1)+i-1) = dimag(ctmp) 912c end do 913 914 915* **** define Gx,Gy and Gz in packed space **** 916 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 917 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 918 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 919 call Cram_r_pack(nbq,dbl_mb(Gx(1))) 920 call Cram_r_pack(nbq,dbl_mb(Gy(1))) 921 call Cram_r_pack(nbq,dbl_mb(Gz(1))) 922 923 call Cram_rr_idot(nbq,dbl_mb(Gx(1)),dbl_mb(xtmp(1)), 924 > dbl_mb(sum(1)+3*(n-1))) 925 call Cram_rr_idot(nbq,dbl_mb(Gy(1)),dbl_mb(xtmp(1)), 926 > dbl_mb(sum(1)+1+3*(n-1))) 927 call Cram_rr_idot(nbq,dbl_mb(Gz(1)),dbl_mb(xtmp(1)), 928 > dbl_mb(sum(1)+2+3*(n-1))) 929 end do !**n** 930 931 call C3dB_Vector_Sumall(3*(nn),dbl_mb(sum(1))) 932 933 wfrac = 1.0d0 934 if (occ_tag.gt.0) occ_shift = occ1_shift 935 do n=1,nn 936 if (occ_tag.gt.0) then 937 wfrac = dbl_mb(occ_shift) 938 occ_shift = occ_shift+1 939 end if 940 dbl_mb(ftmp(1)+3*(ii-1)) = 941 > dbl_mb(ftmp(1)+3*(ii-1)) 942 > + 2.0d0*weight*wfrac 943 > *dbl_mb(sum(1)+3*(n-1)) 944 dbl_mb(ftmp(1)+3*(ii-1)+1) = 945 > dbl_mb(ftmp(1)+3*(ii-1)+1) 946 > + 2.0d0*weight*wfrac 947 > *dbl_mb(sum(1)+1+3*(n-1)) 948 dbl_mb(ftmp(1)+3*(ii-1)+2) = 949 > dbl_mb(ftmp(1)+3*(ii-1)+2) 950 > + 2.0d0*weight*wfrac 951 > *dbl_mb(sum(1)+2+3*(n-1)) 952 end do !** nn ** 953 954 end do !** l ** 955 end if !** move ** 956 957 end if !** nproj>0** 958 959 500 continue 960 end do !** nbq ** 961 962 if (move) then 963 call K1dB_Vector_SumAll(3*nion,dbl_mb(ftmp(1))) 964 call daxpy(3*nion,1.0d0,dbl_mb(ftmp(1)),1,fion,1) 965 value = BA_pop_stack(ftmp(2)) 966 value = value.and.BA_pop_stack(sum(2)) 967 value = value.and.BA_pop_stack(Gz(2)) 968 value = value.and.BA_pop_stack(Gy(2)) 969 value = value.and.BA_pop_stack(Gx(2)) 970 value = value.and.BA_pop_stack(xtmp(2)) 971 if (.not.value) call errquit('cpsp_v_nonlocal:popping stack',2, 972 & MA_ERR) 973 end if 974 975 value = BA_pop_stack(zsw2(2)) 976 value = value.and.BA_pop_stack(zsw1(2)) 977 value = value.and.BA_pop_stack(exi(2)) 978 if (.not.value) call errquit('cpsp_v_nonlocal:popping stack',3, 979 & MA_ERR) 980 981 call nwpw_timing_end(6) 982 983 return 984 end 985 986 987 988* *********************************** 989* * * 990* * cpsp_v_nonlocal0 * 991* * * 992* *********************************** 993 subroutine cpsp_v_nonlocal0(nbq,ispin,ne, 994 > psi1_tag,psi2_tag,move,fion) 995 implicit none 996 integer nbq 997 integer ispin,ne(2) 998 integer psi1_tag 999 integer psi2_tag 1000 logical move 1001 real*8 fion(3,*) 1002 1003#include "bafdecls.fh" 1004#include "cpsp_common.fh" 1005#include "errquit.fh" 1006 1007* *** local variables *** 1008 complex*16 one,mone,ione 1009 integer nfft3d,G(3),npack1,npack 1010 integer ii,ia,l,n,nn,nb,neall,nbrillq,nion 1011 integer shift,l_prj,nproj 1012 integer psi1_shift,psi2_shift,psi_shift,nshift 1013 integer occ_tag,occ_shift,occ1_shift 1014 real*8 omega,weight,scal,wfrac 1015 complex*16 cxr 1016 integer exi(2),xtmp(2),zsw1(2),zsw2(2),sum(2),ftmp(2) 1017 integer Gx(2),Gy(2),Gz(2),shfts,shftd 1018 logical value,sd_function 1019 1020* **** external functions **** 1021 logical is_sORd,cpsi_spin_orbit 1022 integer ion_nion,ion_katm,c_G_indx,Pneb_nbrillq 1023 integer cpsp_projector_get_ptr,cpsi_data_get_chnk 1024 integer cpsi_data_get_next 1025 real*8 lattice_omega,brillioun_weight 1026 external is_sORd,cpsi_spin_orbit 1027 external ion_nion,ion_katm,c_G_indx,Pneb_nbrillq 1028 external cpsp_projector_get_ptr,cpsi_data_get_chnk 1029 external cpsi_data_get_next 1030 external lattice_omega,brillioun_weight 1031 1032 one = dcmplx( 1.0d0,0.0d0) 1033 mone = dcmplx(-1.0d0,0.0d0) 1034 ione = dcmplx( 0.0d0,1.0d0) 1035 call nwpw_timing_start(6) 1036 1037 1038* **** allocate local memory **** 1039 nn = ne(1)+ne(2) 1040 neall = ne(1)+ne(2) 1041 nion = ion_nion() 1042 nbrillq = Pneb_nbrillq() 1043 call C3dB_nfft3d(1,nfft3d) 1044 call Cram_max_npack(npack1) 1045 nshift = 2*npack1 1046 1047 value = BA_push_get(mt_dcpl,npack1,'exi',exi(2),exi(1)) 1048 value = value.and.BA_push_get(mt_dcpl,nn*nprj_max, 1049 > 'zsw1',zsw1(2),zsw1(1)) 1050 value = value.and.BA_push_get(mt_dcpl,nn*nprj_max, 1051 > 'zsw2',zsw2(2),zsw2(1)) 1052 if (.not.value) call errquit('cpsp_v_nonlocal:pushing stack',0, 1053 > MA_ERR) 1054 1055ccccccccccccccc if move we do this ccccccccccccccccccccccccc 1056 if (move) then 1057 occ_tag = cpsi_data_get_next(psi1_tag) 1058 value = BA_push_get(mt_dbl, nfft3d,'xtmp',xtmp(2),xtmp(1)) 1059 value = value.and. 1060 > BA_push_get(mt_dbl, nfft3d,'Gx',Gx(2),Gx(1)) 1061 value = value.and. 1062 > BA_push_get(mt_dbl, nfft3d,'Gy',Gy(2),Gy(1)) 1063 value = value.and. 1064 > BA_push_get(mt_dbl, nfft3d,'Gz',Gz(2),Gz(1)) 1065 value = value.and. 1066 > BA_push_get(mt_dbl,3*nn,'sum',sum(2),sum(1)) 1067 value = value.and. 1068 > BA_push_get(mt_dbl,3*nion,'ftmp',ftmp(2),ftmp(1)) 1069 if (.not.value) call errquit('cpsp_v_nonlocal:pushing stack',1, 1070 & MA_ERR) 1071 G(1) = c_G_indx(1) 1072 G(2) = c_G_indx(2) 1073 G(3) = c_G_indx(3) 1074 call dcopy(3*nion,0.0d0,0,dbl_mb(ftmp(1)),1) 1075 end if 1076cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 1077 1078 omega = lattice_omega() 1079 scal = 1.0d0/omega 1080 1081 1082 psi1_shift = cpsi_data_get_chnk(psi1_tag,nbq) 1083 psi2_shift = cpsi_data_get_chnk(psi2_tag,nbq) 1084 call Cram_npack(nbq,npack) 1085 1086 do 500 ii=1,nion 1087 ia=ion_katm(ii) 1088 1089 nproj = int_mb(nprj(1)+ia-1) 1090 if (nproj.gt.0) then 1091 1092 if (int_mb(psp_type(1)+ia-1).eq.7) then 1093 1094 call cpsp_v_nonlocal_rel(nbq,ii,ia,nproj,npack1,nbrillq, 1095 > nn,ne, 1096 > dcpl_mb(exi(1)),dcpl_mb(prjtmp(1)), 1097 > dcpl_mb(zsw1(1)), 1098 > dbl_mb(psi1_shift),dbl_mb(psi2_shift), 1099 > dbl_mb(Kijl(1)+(ia-1)*kij_stride),scal) 1100 1101 if (move) then 1102 1103* **** define Gx,Gy and Gz in packed space **** 1104 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 1105 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 1106 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 1107 call Cram_r_pack(nbq,dbl_mb(Gx(1))) 1108 call Cram_r_pack(nbq,dbl_mb(Gy(1))) 1109 call Cram_r_pack(nbq,dbl_mb(Gz(1))) 1110 1111 shftd=nproj*npack1 1112 shfts=ne(1)*npack1*2 1113 weight = brillioun_weight(nbq) 1114 if (occ_tag.gt.0) 1115 > occ1_shift = cpsi_data_get_chnk(occ_tag,nbq) 1116 do l=1,nproj 1117 psi_shift = psi1_shift 1118 do n=1,ne(1) 1119 1120* **** routine should be vectorized!!!**** 1121 call Cram_zccr_Multiply2(nbq, 1122 > dcpl_mb(zsw1(1)+(l-1)*ne(1)+n-1), 1123 > dbl_mb(psi_shift), 1124 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 1125 > dbl_mb(xtmp(1))) 1126 call Cram_zccr_Multiply2Add(nbq, 1127 > dcpl_mb(zsw1(1)+(l-1)*ne(1)+n-1), 1128 > dbl_mb(psi_shift+shfts), 1129 > dcpl_mb(prjtmp(1)+(l-1)*npack1+shftd), 1130 > dbl_mb(xtmp(1))) 1131 psi_shift = psi_shift + nshift 1132 1133 call Cram_rr_idot(nbq,dbl_mb(Gx(1)),dbl_mb(xtmp(1)), 1134 > dbl_mb(sum(1)+3*(n-1))) 1135 call Cram_rr_idot(nbq,dbl_mb(Gy(1)),dbl_mb(xtmp(1)), 1136 > dbl_mb(sum(1)+1+3*(n-1))) 1137 call Cram_rr_idot(nbq,dbl_mb(Gz(1)),dbl_mb(xtmp(1)), 1138 > dbl_mb(sum(1)+2+3*(n-1))) 1139 end do !**n** 1140 1141 call C3dB_Vector_Sumall((3*ne(1)),dbl_mb(sum(1))) 1142 1143 wfrac = 1.0d0 1144 if (occ_tag.gt.0) occ_shift = occ1_shift 1145 do n=1,ne(1) 1146 if (occ_tag.gt.0) then 1147 wfrac = dbl_mb(occ_shift) 1148 occ_shift = occ_shift+1 1149 end if 1150 dbl_mb(ftmp(1)+3*(ii-1)) = 1151 > dbl_mb(ftmp(1)+3*(ii-1)) 1152 > + 2.0d0*weight*wfrac 1153 > *dbl_mb(sum(1)+3*(n-1)) 1154 dbl_mb(ftmp(1)+3*(ii-1)+1) = 1155 > dbl_mb(ftmp(1)+3*(ii-1)+1) 1156 > + 2.0d0*weight*wfrac 1157 > *dbl_mb(sum(1)+1+3*(n-1)) 1158 dbl_mb(ftmp(1)+3*(ii-1)+2) = 1159 > dbl_mb(ftmp(1)+3*(ii-1)+2) 1160 > + 2.0d0*weight*wfrac 1161 > *dbl_mb(sum(1)+2+3*(n-1)) 1162 end do !** ne(1) ** 1163 1164 end do !** l ** 1165 end if !** move ** 1166 goto 500 1167 1168 end if 1169 1170* **** structure factor pseudopotential **** 1171 call cstrfac_pack(nbq,ii,dcpl_mb(exi(1))) 1172 call cstrfac_k(ii,nbq,cxr) 1173c call Cram_c_ZMul(nbq,cxr,dcpl_mb(exi(1)),dcpl_mb(exi(1))) 1174 call zscal(npack,cxr,dcpl_mb(exi(1)),1) 1175 1176 1177* **** generate zsw1's and projectors **** 1178 do l=1,nproj 1179 1180 shift = cpsp_projector_get_ptr( 1181 > int_mb(vnl(1)+ia-1),nbq,l) 1182 l_prj = int_mb(l_projector(1)+(l-1) 1183 > +(ia-1)*jmmax_max) 1184 1185 sd_function=.true. 1186 if (mod(l_prj,2).ne.0) then 1187 sd_function=.false. 1188 end if 1189 1190* **** phase factor does not matter therefore **** 1191* **** (-i)^l is the same as (i)^l in the **** 1192* **** Rayleigh scattering formula **** 1193 1194c *** current function is s or d **** 1195 if (sd_function) then 1196 call Cram_rc_Mul(nbq,dbl_mb(shift), 1197 > dcpl_mb(exi(1)), 1198 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 1199* *** current function is p or f **** 1200 else 1201 call Cram_irc_Mul(nbq,dbl_mb(shift), 1202 > dcpl_mb(exi(1)), 1203 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 1204 end if 1205 1206* **** compute nnXnproj matrix zsw1 = <psi1|prj> **** 1207 call Cram_cc_inzdot(nbq,nn, 1208 > dbl_mb(psi1_shift), 1209 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 1210 > dcpl_mb(zsw1(1)+(l-1)*nn)) 1211 1212* ***** scale psp by factor - used for generating antiferromagnetic structures **** 1213* **** nwchem input: pspspin up/down scale l ion_numbers **** 1214 if (pspspin) then 1215 if (log_mb(pspspin_upions(1)+ii-1).and. 1216 > (l_prj.eq.pspspin_upl)) 1217 > call dscal(2*ne(1),pspspin_upscale, 1218 > dcpl_mb(zsw1(1)+(l-1)*nn),1) 1219 if (log_mb(pspspin_downions(1)+ii-1).and. 1220 > (l_prj.eq.pspspin_downl)) 1221 > call dscal(2*ne(2),pspspin_downscale, 1222 > dcpl_mb(zsw1(1)+(l-1)*nn+ne(1)),1) 1223 end if 1224 1225 end do !**l** 1226 1227 call C3dB_Vector_SumAll((2*nn*nproj),dcpl_mb(zsw1(1))) 1228 1229 1230* **** zsw2 = Gijl*zsw1 ****** 1231 call Multiply_Gijl_zsw1(nn, 1232 > nproj, 1233 > int_mb(nmax(1)+ia-1), 1234 > int_mb(lmax(1)+ia-1), 1235 > int_mb(n_projector(1) 1236 > + (ia-1)*jmmax_max), 1237 > int_mb(l_projector(1) 1238 > + (ia-1)*jmmax_max), 1239 > int_mb(m_projector(1) 1240 > + (ia-1)*jmmax_max), 1241 > dbl_mb(Gijl(1) 1242 > + (ia-1)*gij_stride), 1243 > dcpl_mb(zsw1(1)), 1244 > dcpl_mb(zsw2(1))) 1245 1246 1247* **** do Kleinman-Bylander Multiplication **** 1248 call dscal(2*nn*nproj,scal,dcpl_mb(zsw2(1)),1) 1249 call ZGEMM('N','C',npack,nn,nproj, 1250 > mone, 1251 > dcpl_mb(prjtmp(1)), npack1, 1252 > dcpl_mb(zsw2(1)), nn, 1253 > one, 1254 > dbl_mb(psi2_shift), npack1) 1255 1256 1257 if (move) then 1258 weight = brillioun_weight(nbq) 1259 if (occ_tag.gt.0) 1260 > occ1_shift = cpsi_data_get_chnk(occ_tag,nbq) 1261 if (ispin.eq.1) 1262 > call dscal(2*nn*nproj,2.0d0,dcpl_mb(zsw2(1)),1) 1263 1264 do l=1,nproj 1265 1266 psi_shift = psi1_shift 1267 do n=1,nn 1268 1269* **** routine should be vectorized!!!**** 1270 call Cram_zccr_Multiply2(nbq, 1271 > dcpl_mb(zsw2(1)+(l-1)*nn+n-1), 1272 > dbl_mb(psi_shift), 1273 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 1274 > dbl_mb(xtmp(1))) 1275 psi_shift = psi_shift + nshift 1276c do i=1,npack 1277c ctmp = psi1(i+(n-1)*npack1 1278c > +(nb-1)*neall*npack1) 1279c > *dconjg(dcpl_mb(prjtmp(1)+(l-1)*npack1+i-1)) 1280c > *dconjg(dcpl_mb(zsw2(1)+(l-1)*nn+n-1)) 1281c dbl_mb(xtmp(1)+i-1) = dimag(ctmp) 1282c end do 1283 1284 1285* **** define Gx,Gy and Gz in packed space **** 1286 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 1287 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 1288 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 1289 call Cram_r_pack(nbq,dbl_mb(Gx(1))) 1290 call Cram_r_pack(nbq,dbl_mb(Gy(1))) 1291 call Cram_r_pack(nbq,dbl_mb(Gz(1))) 1292 1293 call Cram_rr_idot(nbq,dbl_mb(Gx(1)),dbl_mb(xtmp(1)), 1294 > dbl_mb(sum(1)+3*(n-1))) 1295 call Cram_rr_idot(nbq,dbl_mb(Gy(1)),dbl_mb(xtmp(1)), 1296 > dbl_mb(sum(1)+1+3*(n-1))) 1297 call Cram_rr_idot(nbq,dbl_mb(Gz(1)),dbl_mb(xtmp(1)), 1298 > dbl_mb(sum(1)+2+3*(n-1))) 1299 end do !**n** 1300 1301 call C3dB_Vector_Sumall(3*(nn),dbl_mb(sum(1))) 1302 1303 wfrac = 1.0d0 1304 if (occ_tag.gt.0) occ_shift = occ1_shift 1305 do n=1,nn 1306 if (occ_tag.gt.0) then 1307 wfrac = dbl_mb(occ_shift) 1308 occ_shift = occ_shift+1 1309 end if 1310 dbl_mb(ftmp(1)+3*(ii-1)) = 1311 > dbl_mb(ftmp(1)+3*(ii-1)) 1312 > + 2.0d0*weight*wfrac 1313 > *dbl_mb(sum(1)+3*(n-1)) 1314 dbl_mb(ftmp(1)+3*(ii-1)+1) = 1315 > dbl_mb(ftmp(1)+3*(ii-1)+1) 1316 > + 2.0d0*weight*wfrac 1317 > *dbl_mb(sum(1)+1+3*(n-1)) 1318 dbl_mb(ftmp(1)+3*(ii-1)+2) = 1319 > dbl_mb(ftmp(1)+3*(ii-1)+2) 1320 > + 2.0d0*weight*wfrac 1321 > *dbl_mb(sum(1)+2+3*(n-1)) 1322 end do !** nn ** 1323 1324 end do !** l ** 1325 end if !** move ** 1326 1327 end if !** nproj>0** 1328 1329 500 continue 1330 1331 1332 if (move) then 1333 call K1dB_Vector_SumAll(3*nion,dbl_mb(ftmp(1))) 1334 call daxpy(3*nion,1.0d0,dbl_mb(ftmp(1)),1,fion,1) 1335 value = BA_pop_stack(ftmp(2)) 1336 value = value.and.BA_pop_stack(sum(2)) 1337 value = value.and.BA_pop_stack(Gz(2)) 1338 value = value.and.BA_pop_stack(Gy(2)) 1339 value = value.and.BA_pop_stack(Gx(2)) 1340 value = value.and.BA_pop_stack(xtmp(2)) 1341 if (.not.value) call errquit('cpsp_v_nonlocal:popping stack',2, 1342 & MA_ERR) 1343 end if 1344 1345 value = BA_pop_stack(zsw2(2)) 1346 value = value.and.BA_pop_stack(zsw1(2)) 1347 value = value.and.BA_pop_stack(exi(2)) 1348 if (.not.value) call errquit('cpsp_v_nonlocal:popping stack',3, 1349 & MA_ERR) 1350 1351 call nwpw_timing_end(6) 1352 1353 return 1354 end 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364* ************************************************ 1365* * * 1366* * Multiply_Gijl_zsw1 * 1367* * * 1368* ************************************************ 1369cccccccccccccccccccc 1370c 1371cccccccccccccccccccc 1372 subroutine Multiply_Gijl_zsw1(nn,nprj,nmax,lmax, 1373 > n_prj,l_prj,m_prj, 1374 > G, 1375 > zsw1,zsw2) 1376 implicit none 1377 integer nn 1378 integer nprj,nmax,lmax 1379 integer n_prj(nprj) 1380 integer l_prj(nprj) 1381 integer m_prj(nprj) 1382 real*8 G(nmax,nmax,0:lmax) 1383 complex*16 zsw1(nn,nprj) 1384 complex*16 zsw2(nn,nprj) 1385 1386 !**** local variables **** 1387 integer a,b,na,nb,la,lb,ma,mb 1388 1389 1390 call dcopy(2*nn*nprj,0.0d0,0,zsw2,1) 1391 do b=1,nprj 1392 lb = l_prj(b) 1393 mb = m_prj(b) 1394 1395 do a=1,nprj 1396 la = l_prj(a) 1397 ma = m_prj(a) 1398 1399 if ((la.eq.lb).and.(ma.eq.mb)) then 1400 na = n_prj(a) 1401 nb = n_prj(b) 1402 call daxpy(2*nn,G(nb,na,la),zsw1(1,a),1,zsw2(1,b),1) 1403 end if 1404 1405 end do 1406 end do 1407 return 1408 end 1409 1410 1411* *********************************** 1412* * * 1413* * cpsp_v_nonlocal_orb * 1414* * * 1415* *********************************** 1416 1417 subroutine cpsp_v_nonlocal_orb(nb,orb1,orb2) 1418 implicit none 1419 integer nb 1420 complex*16 orb1(*) 1421 complex*16 orb2(*) 1422 1423#include "bafdecls.fh" 1424#include "errquit.fh" 1425#include "cpsp_common.fh" 1426 1427 1428* *** local variables *** 1429 complex*16 one,mone,ione 1430c parameter (one=(1.0d0,0.0d0), mone=(-1.0d0,0.0d0)) 1431c parameter (ione=(0.0d0,1.0d0)) 1432 integer nfft3d,npack1,npack 1433 integer ii,ia,l,ne1 1434 integer shift,l_prj,nproj 1435 real*8 omega,scal 1436 complex*16 cxr 1437 integer exi(2),zsw1(2),zsw2(2) 1438 logical value,sd_function 1439 1440* **** external functions **** 1441 logical is_sORd,cpsi_spin_orbit 1442 integer ion_nion,ion_katm 1443 integer cpsp_projector_get_ptr 1444 integer cpsi_neq 1445 real*8 lattice_omega 1446 external is_sORd,cpsi_spin_orbit 1447 external ion_nion,ion_katm 1448 external cpsp_projector_get_ptr 1449 external lattice_omega 1450 external cpsi_neq 1451 one=dcmplx(1.0d0,0.0d0) 1452 mone=dcmplx(-1.0d0,0.d0) 1453 ione=dcmplx(0.0d0,1.d0) 1454 1455 if (cpsi_spin_orbit()) then 1456 call cpsp_v_nonlocal_orb_2com(nb,orb1,orb2) 1457 return 1458 end if 1459 1460 call nwpw_timing_start(6) 1461 1462* **** allocate local memory **** 1463 call C3dB_nfft3d(1,nfft3d) 1464 call Cram_max_npack(npack1) 1465c nbrill = brillioun_nbrillioun() 1466 ne1=cpsi_neq(1) 1467 value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1)) 1468 value = value.and. 1469 > BA_push_get(mt_dcpl,nprj_max, 1470 > 'zsw1',zsw1(2),zsw1(1)) 1471 value = value.and. 1472 > BA_push_get(mt_dcpl,nprj_max, 1473 > 'zsw2',zsw2(2),zsw2(1)) 1474 if (.not.value) 1475 > call errquit('cpsp_v_nonlocal_orb:pushing stack',0,MA_ERR) 1476 1477 1478 omega = lattice_omega() 1479 scal = 1.0d0/omega 1480 1481 do 500 ii=1,ion_nion() 1482 ia=ion_katm(ii) 1483 nproj = int_mb(nprj(1)+ia-1) 1484 1485 if (nproj.gt.0) then 1486 1487 if ( int_mb(psp_type(1)+ia-1) .eq. 7) then 1488 call cpsp_v_nonlocal_rel_orb(nb,orb1,orb2, 1489 > dcpl_mb(zsw1(1)), 1490 > dbl_mb(Kijl(1)+(ia-1)*kij_stride), 1491 > dcpl_mb(exi(1)), 1492 > nfft3d,ia,ii,ne1, 1493 > npack1,nproj) 1494 goto 500 1495 end if 1496 1497 1498* **** structure factor **** 1499 call Cram_npack(nb,npack) 1500 call cstrfac_pack(nb,ii,dcpl_mb(exi(1))) 1501 call cstrfac_k(ii,nb,cxr) 1502cc call Cram_c_ZMul(nb,cxr,dcpl_mb(exi(1)),dcpl_mb(exi(1))) 1503 call zscal(npack,cxr,dcpl_mb(exi(1)),1) 1504 1505 do l=1,nproj 1506 1507c shift = vnl(1)+(l-1) *npack1 1508c > +(nb-1)*npack1*vnl_stride 1509c > +(ia-1)*npack1*vnl_stride*nbrill 1510 shift = cpsp_projector_get_ptr( 1511 > int_mb(vnl(1)+ia-1),nb,l) 1512 l_prj = int_mb(l_projector(1)+(l-1) 1513 > +(ia-1)*jmmax_max) 1514 1515 sd_function=.true. 1516 if (mod(l_prj,2).ne.0) then 1517 sd_function=.false. 1518 end if 1519 1520* **** phase factor does not matter therefore **** 1521* **** (-i)^l is the same as (i)^l in the **** 1522* **** Rayleigh scattering formula **** 1523* *** current function is s or d **** 1524 if (sd_function) then 1525 call Cram_rc_Mul(nb,dbl_mb(shift), 1526 > dcpl_mb(exi(1)), 1527 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 1528* *** current function is p or f **** 1529 else 1530 call Cram_irc_Mul(nb,dbl_mb(shift), 1531 > dcpl_mb(exi(1)), 1532 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 1533 end if 1534 1535* **** compute 1Xnproj matrix zsw1 = <psi1|prj> **** 1536 call Cram_cc_izdot(nb, 1537 > orb1, 1538 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 1539 > dcpl_mb(zsw1(1)+(l-1))) 1540 end do !**l** 1541 call C3dB_Vector_SumAll((2*nproj),dcpl_mb(zsw1(1))) 1542 1543* **** zsw2 = Gijl*zsw1 ****** 1544 call Multiply_Gijl_zsw1(1, 1545 > nproj, 1546 > int_mb(nmax(1)+ia-1), 1547 > int_mb(lmax(1)+ia-1), 1548 > int_mb(n_projector(1) 1549 > + (ia-1)*jmmax_max), 1550 > int_mb(l_projector(1) 1551 > + (ia-1)*jmmax_max), 1552 > int_mb(m_projector(1) 1553 > + (ia-1)*jmmax_max), 1554 > dbl_mb(Gijl(1) 1555 > + (ia-1)*gij_stride), 1556 > dcpl_mb(zsw1(1)), 1557 > dcpl_mb(zsw2(1))) 1558 1559* **** do Kleinman-Bylander Multiplication **** 1560 call dscal(2*nproj,scal,dcpl_mb(zsw2(1)),1) 1561 call ZGEMM('N','C',npack,1,nproj, 1562 > mone, 1563 > dcpl_mb(prjtmp(1)), npack1, 1564 > dcpl_mb(zsw2(1)), 1, 1565 > one, 1566 > orb2, npack1) 1567 1568 end if !** nproj>0 ** 1569500 continue !** ii **** 1570 1571 value = BA_pop_stack(zsw2(2)) 1572 value = value.and.BA_pop_stack(zsw1(2)) 1573 value = value.and.BA_pop_stack(exi(2)) 1574 if (.not.value) 1575 > call errquit('cpsp_v_nonlocal_orb:popping stack',3,MA_ERR) 1576 1577 call nwpw_timing_end(6) 1578 1579 return 1580 end 1581 1582 1583 1584* *********************************** 1585* * * 1586* * cpsp_f_vnonlocal * 1587* * * 1588* *********************************** 1589 1590 subroutine cpsp_f_vnonlocal(ispin,ne,psi1_tag,fion) 1591 implicit none 1592 integer ispin,ne(2) 1593 integer psi1_tag 1594 real*8 fion(3,*) 1595 1596#include "bafdecls.fh" 1597#include "cpsp_common.fh" 1598#include "errquit.fh" 1599 1600 1601* *** local variables *** 1602 integer nfft3d,G(3),npack1,npack,nbrillq,neall,nion 1603 integer ii,ia,l,n,nn,nbq,l_prj,nproj,shift 1604 integer psi1_shift,psi_shift,nshift 1605 real*8 omega,weight,scal,wfrac 1606 complex*16 cxr 1607 integer exi(2),xtmp(2),zsw1(2),zsw2(2),sum(2),ftmp(2) 1608 integer Gx(2),Gy(2),Gz(2),it 1609 integer occ1_tag,occ1_shift,occ_shift 1610 logical value,sd_function 1611 1612* **** external functions **** 1613 logical is_sORd 1614 integer ion_nion,ion_katm,c_G_indx,Pneb_nbrillq 1615 integer cpsp_projector_get_ptr,cpsi_data_get_chnk 1616 integer cpsi_data_get_next 1617 real*8 lattice_omega,brillioun_weight 1618 external is_sORd 1619 external ion_nion,ion_katm,c_G_indx,Pneb_nbrillq 1620 external cpsp_projector_get_ptr,cpsi_data_get_chnk 1621 external cpsi_data_get_next 1622 external lattice_omega,brillioun_weight 1623 1624 call nwpw_timing_start(6) 1625 1626* **** allocate local memory **** 1627 nn = ne(1)+ne(2) 1628 neall = ne(1)+ne(2) 1629 nion = ion_nion() 1630 nbrillq = Pneb_nbrillq() 1631 call C3dB_nfft3d(1,nfft3d) 1632 call Cram_max_npack(npack1) 1633 nshift = 2*npack1 1634 occ1_tag = cpsi_data_get_next(psi1_tag) 1635 1636 value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1)) 1637 value = value.and. 1638 > BA_push_get(mt_dcpl,nn*nprj_max, 1639 > 'zsw1',zsw1(2),zsw1(1)) 1640 value = value.and. 1641 > BA_push_get(mt_dcpl,nn*nprj_max, 1642 > 'zsw2',zsw2(2),zsw2(1)) 1643 if (.not.value) call errquit('cpsp_v_nonlocal:pushing stack',0, 1644 & MA_ERR) 1645 1646 value = BA_push_get(mt_dbl, nfft3d,'xtmp',xtmp(2),xtmp(1)) 1647 value = value.and. 1648 > BA_push_get(mt_dbl, nfft3d,'Gx',Gx(2),Gx(1)) 1649 value = value.and. 1650 > BA_push_get(mt_dbl, nfft3d,'Gy',Gy(2),Gy(1)) 1651 value = value.and. 1652 > BA_push_get(mt_dbl, nfft3d,'Gz',Gz(2),Gz(1)) 1653 1654 value = value.and. 1655 > BA_push_get(mt_dbl,3*nn,'sum',sum(2),sum(1)) 1656 value = value.and. 1657 > BA_push_get(mt_dbl,3*nion,'ftmp',ftmp(2),ftmp(1)) 1658 if (.not.value) call errquit('cpsp_v_nonlocal:pushing stack',1, 1659 & MA_ERR) 1660 call dcopy(3*nion,0.0d0,0,dbl_mb(ftmp(1)),1) 1661 G(1) = c_G_indx(1) 1662 G(2) = c_G_indx(2) 1663 G(3) = c_G_indx(3) 1664 1665 omega = lattice_omega() 1666 scal = 1.0d0/omega 1667 1668 do nbq = 1,nbrillq 1669 call Cram_npack(nbq,npack) 1670 weight = brillioun_weight(nbq) 1671 psi1_shift = cpsi_data_get_chnk(psi1_tag,nbq) 1672 if (occ1_tag.gt.0) 1673 > occ1_shift = cpsi_data_get_chnk(occ1_tag,nbq) 1674 1675 1676* **** define Gx,Gy and Gz in packed space **** 1677 call C3dB_t_Copy(1,dbl_mb(G(1)),dbl_mb(Gx(1))) 1678 call C3dB_t_Copy(1,dbl_mb(G(2)),dbl_mb(Gy(1))) 1679 call C3dB_t_Copy(1,dbl_mb(G(3)),dbl_mb(Gz(1))) 1680 call Cram_r_pack(nbq,dbl_mb(Gx(1))) 1681 call Cram_r_pack(nbq,dbl_mb(Gy(1))) 1682 call Cram_r_pack(nbq,dbl_mb(Gz(1))) 1683 1684 do 100 ii=1,nion 1685 ia=ion_katm(ii) 1686 it=int_mb(psp_type(1)+ia-1) 1687 nproj = int_mb(nprj(1)+ia-1) 1688 if (nproj.gt.0) then 1689 if (it.eq.7) then 1690 call cpsp_f_nonlocal_rel(nbq,ii,ia,nproj,npack1,ne(1), 1691 > dcpl_mb(exi(1)),dcpl_mb(zsw1(1)), 1692 > dbl_mb(psi1_shift),dcpl_mb(prjtmp(1)), 1693 > dbl_mb(xtmp(1)),dbl_mb(sum(1)), 1694 > dbl_mb(Kijl(1)+(ia-1)*kij_stride), 1695 > dbl_mb(Gx(1)),dbl_mb(Gy(1)),dbl_mb(Gz(1)), 1696 > dbl_mb(ftmp(1)+3*(ii-1)), 1697 > dbl_mb(ftmp(1)+3*(ii-1)+1), 1698 > dbl_mb(ftmp(1)+3*(ii-1)+2), 1699 > weight,scal) 1700 goto 100 1701 end if 1702 1703* **** structure factor **** 1704 call cstrfac_pack(nbq,ii,dcpl_mb(exi(1))) 1705 call cstrfac_k(ii,nbq,cxr) 1706c call Cram_c_ZMul(nbq,cxr,dcpl_mb(exi(1)),dcpl_mb(exi(1))) 1707 call zscal(npack,cxr,dcpl_mb(exi(1)),1) 1708 1709 do l=1,nproj 1710c shift = vnl(1) 1711c > +(l-1) *npack1 1712c > +(nb-1)*npack1*vnl_stride 1713c > +(ia-1)*npack1*vnl_stride*nbrill 1714 shift = cpsp_projector_get_ptr( 1715 > int_mb(vnl(1)+ia-1),nbq,l) 1716 l_prj = int_mb(l_projector(1)+(l-1) 1717 > +(ia-1)*jmmax_max) 1718 1719 sd_function=.true. 1720 if (mod(l_prj,2).ne.0) then 1721 sd_function=.false. 1722 end if 1723 1724* **** phase factor does not matter therefore **** 1725* **** (-i)^l is the same as (i)^l in the **** 1726* **** Rayleigh scattering formula **** 1727* *** current function is s or d **** 1728 if (sd_function) then 1729 call Cram_rc_Mul(nbq,dbl_mb(shift), 1730 > dcpl_mb(exi(1)), 1731 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 1732* *** current function is p or f **** 1733 else 1734 call Cram_irc_Mul(nbq,dbl_mb(shift), 1735 > dcpl_mb(exi(1)), 1736 > dcpl_mb(prjtmp(1)+(l-1)*npack1)) 1737 end if 1738 1739* **** compute nnXnproj matrix zsw1 = <psi1|prj> **** 1740 call Cram_cc_inzdot(nbq,nn, 1741 > dbl_mb(psi1_shift), 1742 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 1743 > dcpl_mb(zsw1(1)+(l-1)*nn)) 1744 1745* ***** scale psp by factor - used for generating antiferromagnetic structures **** 1746* **** nwchem input: pspspin up/down scale l ion_numbers **** 1747 if (pspspin) then 1748 if (log_mb(pspspin_upions(1)+ii-1).and. 1749 > (l_prj.eq.pspspin_upl)) 1750 > call dscal(2*ne(1),pspspin_upscale, 1751 > dcpl_mb(zsw1(1)+(l-1)*nn),1) 1752 if (log_mb(pspspin_downions(1)+ii-1).and. 1753 > (l_prj.eq.pspspin_downl)) 1754 > call dscal(2*ne(2),pspspin_downscale, 1755 > dcpl_mb(zsw1(1)+(l-1)*nn+ne(1)),1) 1756 end if 1757 end do 1758 call C3dB_Vector_SumAll((2*nn*nproj),dcpl_mb(zsw1(1))) 1759 1760 1761* **** do kleinman-bylander multiplication **** 1762* **** sw2 = Gijl*sw1 ****** 1763 call Multiply_Gijl_zsw1(nn, 1764 > nproj, 1765 > int_mb(nmax(1)+ia-1), 1766 > int_mb(lmax(1)+ia-1), 1767 > int_mb(n_projector(1) 1768 > + (ia-1)*jmmax_max), 1769 > int_mb(l_projector(1) 1770 > + (ia-1)*jmmax_max), 1771 > int_mb(m_projector(1) 1772 > + (ia-1)*jmmax_max), 1773 > dbl_mb(Gijl(1) 1774 > + (ia-1)*gij_stride), 1775 > dcpl_mb(zsw1(1)), 1776 > dcpl_mb(zsw2(1))) 1777 1778 call dscal(2*nn*nproj,scal,dcpl_mb(zsw2(1)),1) 1779 if (ispin.eq.1) 1780 > call dscal(2*nn*nproj,2.0d0,dcpl_mb(zsw2(1)),1) 1781 1782 do l=1,nproj 1783 1784 psi_shift = psi1_shift 1785 do n=1,nn 1786 call Cram_zccr_Multiply2(nbq, 1787 > dcpl_mb(zsw2(1)+(l-1)*nn+n-1), 1788 > dbl_mb(psi_shift), 1789 > dcpl_mb(prjtmp(1)+(l-1)*npack1), 1790 > dbl_mb(xtmp(1))) 1791 psi_shift = psi_shift + nshift 1792c do i=1,npack 1793c ctmp = psi1(i+(n-1)*npack1 1794c > +(nb-1)*neall*npack1) 1795c > *dconjg(dcpl_mb(prjtmp(1)+(l-1)*npack1+i-1)) 1796c > *dconjg(dcpl_mb(zsw2(1)+(l-1)*nn+n-1)) 1797c dbl_mb(xtmp(1)+i-1) = dimag(ctmp) 1798c end do 1799 1800 call Cram_rr_idot(nbq,dbl_mb(Gx(1)),dbl_mb(xtmp(1)), 1801 > dbl_mb(sum(1)+3*(n-1))) 1802 call Cram_rr_idot(nbq,dbl_mb(Gy(1)),dbl_mb(xtmp(1)), 1803 > dbl_mb(sum(1)+1+3*(n-1))) 1804 call Cram_rr_idot(nbq,dbl_mb(Gz(1)),dbl_mb(xtmp(1)), 1805 > dbl_mb(sum(1)+2+3*(n-1))) 1806 end do 1807 call C3dB_Vector_Sumall(3*(nn),dbl_mb(sum(1))) 1808 1809 wfrac = 1.0d0 1810 if (occ1_tag.gt.0) occ_shift = occ1_shift 1811 do n=1,nn 1812 if (occ1_tag.gt.0) then 1813 wfrac = dbl_mb(occ_shift) 1814 occ_shift = occ_shift+1 1815 end if 1816 dbl_mb(ftmp(1)+3*(ii-1)) = 1817 > dbl_mb(ftmp(1)+3*(ii-1)) 1818 > + 2.0d0*wfrac*weight 1819 > *dbl_mb(sum(1)+3*(n-1)) 1820 dbl_mb(ftmp(1)+3*(ii-1)+1) = 1821 > dbl_mb(ftmp(1)+3*(ii-1)+1) 1822 > + 2.0d0*wfrac*weight 1823 > *dbl_mb(sum(1)+1+3*(n-1)) 1824 dbl_mb(ftmp(1)+3*(ii-1)+2) = 1825 > dbl_mb(ftmp(1)+3*(ii-1)+2) 1826 > + 2.0d0*wfrac*weight 1827 > *dbl_mb(sum(1)+2+3*(n-1)) 1828 end do 1829 1830 end do !** l ** 1831 1832 end if !** nproj>0) *** 1833100 continue !** ii ** 1834 1835 end do !** nbq *** 1836 1837 call K1dB_Vector_SumAll(3*nion,dbl_mb(ftmp(1))) 1838 call daxpy(3*nion,1.0d0,dbl_mb(ftmp(1)),1,fion,1) 1839 value = BA_pop_stack(ftmp(2)) 1840 value = value.and.BA_pop_stack(sum(2)) 1841 value = value.and.BA_pop_stack(Gz(2)) 1842 value = value.and.BA_pop_stack(Gy(2)) 1843 value = value.and.BA_pop_stack(Gx(2)) 1844 value = value.and.BA_pop_stack(xtmp(2)) 1845 if (.not.value) call errquit('cpsp_f_vnonlocal:popping stack',2, 1846 & MA_ERR) 1847 1848 value = BA_pop_stack(zsw2(2)) 1849 value = value.and.BA_pop_stack(zsw1(2)) 1850 value = value.and.BA_pop_stack(exi(2)) 1851 if (.not.value) call errquit('cpsp_f_vnonlocal:popping stack',3, 1852 & MA_ERR) 1853 1854 call nwpw_timing_end(6) 1855 1856 1857 return 1858 end 1859 1860* ************************************************************* 1861* * * 1862* * cpsp_read * 1863* * * 1864* ************************************************************* 1865cccccccccccccccccccccccccccccccccccccccccccccccccc 1866c Read data from psp file *.cpp 1867cccccccccccccccccccccccccccccccccccccccccccccccccc 1868 subroutine cpsp_read(fname,comment, 1869 > psp_type, 1870 > version, 1871 > nfft,unita, 1872 > atom,amass,zv,lmmax,lmax,locp,nmax, 1873 > rc, 1874 > nprj,n_projector,l_projector,m_projector, 1875 > Gijl, 1876 > Kijl, 1877 > nfft3d,npack0,npack1,lmmax_max,nmax_max, 1878 > vl,vnl_tag,vnl2_tag, 1879 > semicore,rcore,ncore, 1880 > tmp,tmp2, 1881 > ierr) 1882 implicit none 1883 character*(*) comment 1884 character*50 fname 1885 integer psp_type 1886 integer version 1887 integer nfft(3) 1888 real*8 unita(3,3) 1889 character*2 atom 1890 real*8 amass,zv 1891 integer lmmax 1892 integer lmax 1893 integer locp 1894 integer nmax 1895 real*8 rc(*) 1896 integer nprj,n_projector(*),l_projector(*),m_projector(*) 1897 real*8 Gijl(*),Kijl(*) 1898 integer nfft3d,npack0,npack1,lmmax_max,nmax_max 1899 real*8 vl(*) 1900 integer vnl_tag 1901 integer vnl2_tag 1902 logical semicore 1903 real*8 rcore 1904 real*8 ncore(*) 1905 real*8 tmp(*) 1906 real*8 tmp2(*) 1907 integer ierr 1908 1909#include "bafdecls.fh" 1910#include "btdb.fh" 1911#include "util.fh" 1912 1913#ifdef TCGMSG 1914#include "tcgmsg.fh" 1915#include "msgtypesf.h" 1916#endif 1917 1918* *** local variables *** 1919 integer MASTER,taskid,taskid_k 1920 parameter(MASTER=0) 1921 integer n,l,nb,nbq,pk 1922 integer msglen,rsize 1923 integer iatom(2) 1924 character*255 full_filename 1925 real*8 kv(3) 1926 integer nbrillioun 1927 logical mprint,value 1928 1929* ***** external functions **** 1930 integer brillioun_nbrillioun,cpsp_projector_alloc 1931 integer brillioun_nbrillq 1932 real*8 brillioun_all_k 1933 logical control_print 1934 external brillioun_nbrillioun,cpsp_projector_alloc 1935 external brillioun_nbrillq 1936 external brillioun_all_k 1937 external control_print 1938 1939 call Parallel_taskid(taskid) 1940 call Parallel3d_taskid_k(taskid_k) 1941 mprint = (taskid.eq.MASTER).and.control_print(print_medium) 1942 1943* **** open fname binary file **** 1944 1945 if (taskid.eq.MASTER) then 1946 call util_file_name_noprefix(fname,.false., 1947 > .false., 1948 > full_filename) 1949 l = index(full_filename,' ') - 1 1950 call openfile(5,full_filename,l,'r',l) 1951 call cread(5,comment,80) 1952 call iread(5,psp_type,1) 1953 call iread(5,version,1) 1954 call iread(5,nfft,3) 1955 call dread(5,unita,9) 1956 call cread(5,atom,2) 1957 call dread(5,amass,1) 1958 call dread(5,zv,1) 1959 call iread(5,lmax,1) 1960 call iread(5,locp,1) 1961 call iread(5,nmax,1) 1962 lmmax=(lmax+1)**2 - (2*locp+1) 1963 amass = amass*1822.89d0 1964 call dread(5,rc,(lmax+1)) 1965 call iread(5,nprj,1) 1966 if (nprj.gt.0) then 1967 call iread(5,n_projector,nprj) 1968 call iread(5,l_projector,nprj) 1969 call iread(5,m_projector,nprj) 1970 if (psp_type.eq.7) then 1971 call dread(5,Kijl,nprj) 1972 else 1973ccc!** number of matrix elements = nmax*nmax*(lmax+1) ** 1974 rsize=nmax*nmax*(lmax+1) 1975 call dread(5,Gijl,rsize) 1976 if (psp_type.eq.1) then 1977 call dread(5,Kijl,rsize) 1978 end if 1979ccc!** number of matrix elements = nmax*nmax*(lmax+1) ** 1980 end if 1981 end if 1982 call dread(5,rcore,1) 1983 call iread(5,nbrillioun,1) 1984 ierr = 0 1985 if (nbrillioun.eq.brillioun_nbrillioun()) then 1986 do nb=1,nbrillioun 1987 call dread(5,kv,3) 1988 if ((brillioun_all_k(1,nb).ne.kv(1)).or. 1989 > (brillioun_all_k(2,nb).ne.kv(2)).or. 1990 > (brillioun_all_k(3,nb).ne.kv(3))) 1991 > ierr = 1 1992 end do 1993 if (ierr.eq.1) then 1994 if (mprint) then 1995 write(*,*)"Brillioun Zone Vectors do not match!" 1996 call flush(6) 1997 end if 1998 end if 1999 else 2000 if (mprint) then 2001 write(*,*)"Brillioun Zone Points do not match!" 2002 write(*,*)"NB = ",nbrillioun," not equal ", 2003 > brillioun_nbrillioun() 2004 call flush(6) 2005 end if 2006 ierr = 1 2007 end if 2008 end if 2009 2010 msglen = 1 2011 call Parallel_Brdcst_ivalues(MASTER,msglen,ierr) 2012 if (ierr.ne.0) then 2013 if (taskid.eq.MASTER) call closefile(5) 2014 return 2015 end if 2016 2017* **** send header data to all processors **** 2018 msglen = 1 2019 call Parallel_Brdcst_ivalues(MASTER,msglen,version) 2020 call Parallel_Brdcst_ivalues(MASTER,msglen,psp_type) 2021 msglen = 3 2022 call Parallel_Brdcst_ivalues(MASTER,msglen,nfft) 2023 msglen = 9 2024 call Parallel_Brdcst_values(MASTER,msglen,unita) 2025 2026 iatom(1) = ichar(atom(1:1)) 2027 iatom(2) = ichar(atom(2:2)) 2028 msglen = 2 2029 call BRDCST(9+MSGCHR,iatom,mitob(msglen),MASTER) 2030 atom(1:1) = char(iatom(1)) 2031 atom(2:2) = char(iatom(2)) 2032 2033 msglen = 1 2034 call Parallel_Brdcst_values(MASTER,msglen,amass) 2035 msglen = 1 2036 call Parallel_Brdcst_values(MASTER,msglen,zv) 2037 msglen = 1 2038 call Parallel_Brdcst_ivalues(MASTER,msglen,lmax) 2039 call Parallel_Brdcst_ivalues(MASTER,msglen,locp) 2040 call Parallel_Brdcst_ivalues(MASTER,msglen,nmax) 2041 call Parallel_Brdcst_ivalues(MASTER,msglen,nprj) 2042 lmmax=(lmax+1)**2 - (2*locp+1) 2043 2044 msglen=lmax+1 2045 call Parallel_Brdcst_values(MASTER,msglen,rc) 2046 2047 msglen=nprj 2048 call Parallel_Brdcst_ivalues(MASTER,msglen,n_projector) 2049 call Parallel_Brdcst_ivalues(MASTER,msglen,l_projector) 2050 call Parallel_Brdcst_ivalues(MASTER,msglen,m_projector) 2051 2052 if (psp_type.eq.7) then 2053 msglen=nprj 2054 call Parallel_Brdcst_values(MASTER,msglen,Kijl) 2055 else 2056 msglen=nmax*nmax*(lmax+1) 2057 call Parallel_Brdcst_values(MASTER,msglen,Gijl) 2058 if(psp_type.eq.1) call Parallel_Brdcst_values(MASTER,msglen,Kijl) 2059 end if 2060 msglen=1 2061 call Parallel_Brdcst_values(MASTER,msglen,rcore) 2062 2063 2064* **** determine semicore value **** 2065 if (rcore.gt.0.0d0) then 2066 semicore = .true. 2067 else 2068 semicore = .false. 2069 end if 2070 2071* *** read in vl 3d block *** 2072 call C3dB_r_read(1,5,tmp2,tmp,-1,-1,.true.) 2073 call Cram_r_pack(0,tmp2) 2074 call Cram_r_Copy(0,tmp2,vl) 2075 2076 2077* **** read in vnl 3d blocks **** 2078 2079ccccccccccccccccccc 2080 if (psp_type.eq.7) then 2081cccccccccccccccccccc 2082c Relativistic Non-Local PPot. 2083c spin-orbit included... 2084cccccccccccccccccccc 2085 vnl_tag = cpsp_projector_alloc(brillioun_nbrillq(), 2086 > nprj,2*npack1) 2087 vnl2_tag = cpsp_projector_alloc(brillioun_nbrillq(), 2088 > nprj,2*npack1) 2089!*** XXXX double check this INPUT **** 2090 do nb=1,brillioun_nbrillioun() 2091 call K1dB_ktoqp(nb,nbq,pk) 2092 do n=1,nprj 2093 call C3dB_c_Read(1,5,tmp2,tmp,-1,pk) 2094 if (pk.eq.taskid_k) then 2095 call Cram_c_pack(nbq,tmp2) 2096 call cpsp_projector_add(vnl_tag,nbq,n,tmp2) 2097 end if 2098 end do 2099 do n=1,nprj 2100 call C3dB_c_read(1,5,tmp2,tmp,-1,pk) 2101 if (pk.eq.taskid_k) then 2102 call Cram_c_pack(nbq,tmp2) 2103 call cpsp_projector_add(vnl2_tag,nbq,n,tmp2) 2104 end if 2105 end do 2106 end do 2107 2108 else 2109 vnl_tag = cpsp_projector_alloc(brillioun_nbrillq(), 2110 > nprj,npack1) 2111 do nb=1,brillioun_nbrillioun() 2112 call K1dB_ktoqp(nb,nbq,pk) 2113 do n=1,nprj 2114 2115 call C3dB_r_read(1,5,tmp2,tmp,-1,pk,.true.) 2116 2117 if (pk.eq.taskid_k) then 2118 call Cram_r_pack(nbq,tmp2) 2119 call cpsp_projector_add(vnl_tag,nbq,n,tmp2) 2120 endif 2121 end do 2122 end do 2123* **** read in v_spin_orbit 3d blocks **** 2124 if (psp_type.eq.1) then 2125 vnl2_tag = cpsp_projector_alloc(brillioun_nbrillq(), 2126 > nprj,2*npack1) 2127 do nb=1,brillioun_nbrillioun() 2128 call K1dB_ktoqp(nb,nbq,pk) 2129 do n=1,nprj 2130 call C3dB_c_read(1,5,tmp2,tmp,-1,pk) 2131 if (pk.eq.taskid_k) then 2132 call Cram_c_pack(nbq,tmp2) 2133 call cpsp_projector_add(vnl2_tag,nbq,n,tmp2) 2134 endif 2135 end do 2136 end do 2137 end if 2138 end if 2139* **** read in semicore density block **** 2140 if (semicore) then 2141 call C3dB_r_read(1,5,tmp2,tmp,-1,-1,.true.) 2142 call Cram_r_pack(0,tmp2) 2143 call Cram_r_Copy(0,tmp2,ncore(1)) 2144 2145 call C3dB_r_read(1,5,tmp2,tmp,-1,-1,.true.) 2146 call Cram_r_pack(0,tmp2) 2147 call Cram_r_Copy(0,tmp2,ncore(1+2*npack0)) 2148 2149 call C3dB_r_read(1,5,tmp2,tmp,-1,-1,.true.) 2150 call Cram_r_pack(0,tmp2) 2151 call Cram_r_Copy(0,tmp2,ncore(1+3*npack0)) 2152 2153 call C3dB_r_read(1,5,tmp2,tmp,-1,-1,.true.) 2154 call Cram_r_pack(0,tmp2) 2155 call Cram_r_Copy(0,tmp2,ncore(1+4*npack0)) 2156 end if 2157 2158* *** close fname binary file *** 2159 if (taskid.eq.MASTER) then 2160 call closefile(5) 2161 end if 2162 2163 ierr = 0 2164 return 2165 end 2166 2167 2168* *********************************** 2169* * * 2170* * cpsp_readall * 2171* * * 2172* *********************************** 2173 2174 subroutine cpsp_readall() 2175 implicit none 2176 2177#include "bafdecls.fh" 2178#include "cpsp_common.fh" 2179#include "c_semicore_common.fh" 2180#include "stdio.fh" 2181#include "errquit.fh" 2182#include "util.fh" 2183 2184* **** local variables **** 2185 integer ngp(3),version,nfft3d,npack0,npack1 2186 integer ia,l 2187 real*8 unita(3,3) 2188 character*12 boundry 2189 integer tmp(2),tmp2(2),ierr 2190 logical value,found,correct_box,mprint 2191 character*5 element 2192 character*50 fname 2193 2194* **** parallel i/o variable **** 2195 integer MASTER,taskid 2196 parameter(MASTER=0) 2197 2198* **** external functions **** 2199 logical nwpw_filefind,control_spin_orbit,control_print 2200 integer control_ngrid 2201 real*8 control_unita 2202 character*12 control_boundry 2203 character*4 ion_atom 2204 external nwpw_filefind,control_spin_orbit,control_print 2205 external control_ngrid 2206 external control_unita 2207 external control_boundry 2208 external ion_atom 2209 2210 2211 call C3dB_nfft3d(1,nfft3d) 2212 call Cram_npack(0,npack0) 2213 call Cram_max_npack(npack1) 2214 call Parallel_taskid(taskid) 2215 !nbrill = brillioun_nbrillioun() 2216 2217 mprint = (taskid.eq.MASTER).and.control_print(print_medium) 2218 2219* *** set semicore(0) ***** 2220 log_mb(semicore(1)) = .false. 2221 2222 value = BA_push_get(mt_dbl,(4*nfft3d),'tmp',tmp(2),tmp(1)) 2223 if (.not. value) 2224 > call errquit('cpsp_readall:out of stack memory',0,MA_ERR) 2225 2226 value = BA_push_get(mt_dbl,(2*nfft3d),'tmp2',tmp2(2),tmp2(1)) 2227 if (.not. value) 2228 > call errquit('cpsp_readall:out of stack memory',0,MA_ERR) 2229 2230 do_spin_orbit=.false. 2231 2232* **** read pseudopotentials **** 2233 do ia=1,npsp 2234 2235* **** define formatted psp name **** 2236 element = ' ' 2237 element = ion_atom(ia) 2238 l = index(element,' ') - 1 2239 fname = element(1:l)//'.cpp' 2240 2241 found = .false. 2242 do while (.not.found) 2243 2244 if (nwpw_filefind(fname)) then 2245 call cpsp_read(fname,comment(ia), 2246 > int_mb(psp_type(1)+ia-1), 2247 > version, 2248 > ngp,unita, 2249 > atom(ia), 2250 > dbl_mb(amass(1)+ia-1), 2251 > dbl_mb(zv(1)+ia-1), 2252 > int_mb(lmmax(1)+ia-1), 2253 > int_mb(lmax(1)+ia-1), 2254 > int_mb(locp(1)+ia-1), 2255 > int_mb(nmax(1)+ia-1), 2256 > dbl_mb(rc(1) + (ia-1)*(lmax_max+1)), 2257 > int_mb(nprj(1)+ia-1), 2258 > int_mb(n_projector(1) 2259 > + (ia-1)*jmmax_max), 2260 > int_mb(l_projector(1) 2261 > + (ia-1)*jmmax_max), 2262 > int_mb(m_projector(1) 2263 > + (ia-1)*jmmax_max), 2264 > dbl_mb(Gijl(1) 2265 > + (ia-1)*gij_stride), 2266 > dbl_mb(Kijl(1) 2267 > + (ia-1)*kij_stride), 2268 > nfft3d,npack0,npack1,lmmax_max,nmax_max, 2269 > dbl_mb(vl(1) + (ia-1)*npack0), 2270 > int_mb(vnl(1) + ia-1), 2271 > int_mb(vnlso(1) + ia-1), 2272 > log_mb(semicore(1)+ia), 2273 > dbl_mb(rcore(1)+ia-1), 2274 > dbl_mb(ncore(1)+ (ia-1)*npack0*5), 2275 > dbl_mb(tmp(1)),dbl_mb(tmp2(1)), 2276 > ierr) 2277 2278 if ((int_mb(psp_type(1)+(ia-1)).eq.1).and. 2279 > control_spin_orbit()) then 2280 do_spin_orbit=.true. 2281 if (mprint) then 2282 write(*,*)"HGH spin orbit pseudopotential activated" 2283 call flush(6) 2284 end if 2285 end if 2286 if ((int_mb(psp_type(1)+(ia-1)).eq.7)) then 2287 if (mprint) then 2288 write(*,*)"RTC spin orbit pseudopotential activated" 2289 call flush(6) 2290 end if 2291 end if 2292 2293* **** set semicore(0) **** 2294 if (log_mb(semicore(1)+ia)) log_mb(semicore(1)) = .true. 2295 if (ierr.gt.1) go to 9000 2296* ************************************************************** 2297* ***** logic for finding out if psp is correctly formatted **** 2298* ************************************************************** 2299 correct_box = .true. 2300 boundry = control_boundry() 2301 l =index(boundry,' ') - 1 2302 if ( (ngp(1).ne.control_ngrid(1)) .or. 2303 > (ngp(2).ne.control_ngrid(2)) .or. 2304 > (ngp(3).ne.control_ngrid(3)) .or. 2305 > (unita(1,1).ne.control_unita(1,1)) .or. 2306 > (unita(2,1).ne.control_unita(2,1)) .or. 2307 > (unita(3,1).ne.control_unita(3,1)) .or. 2308 > (unita(1,2).ne.control_unita(1,2)) .or. 2309 > (unita(2,2).ne.control_unita(2,2)) .or. 2310 > (unita(3,2).ne.control_unita(3,2)) .or. 2311 > (unita(1,3).ne.control_unita(1,3)) .or. 2312 > (unita(2,3).ne.control_unita(2,3)) .or. 2313 > (unita(3,3).ne.control_unita(3,3)) .or. 2314 > ((boundry(1:l).eq.'periodic').and.(version.ne.3)).or. 2315 > ((boundry(1:l).eq.'aperiodic').and.(version.ne.4))) then 2316 correct_box = .false. 2317 if (mprint) then 2318 write(luout,*) 2319 write(luout,*) 2320 > "pseudopotential is not correctly formatted:",fname 2321 end if 2322 if ((ierr.eq.0).and.(int_mb(nprj(1)+ia-1).gt.0)) then 2323 call cpsp_projector_dealloc(int_mb(vnl(1)+ia-1)) 2324 if ((int_mb(psp_type(1)+ia-1).eq.7) 2325 > .or.(do_spin_orbit)) then 2326 call cpsp_projector_dealloc(int_mb(vnlso(1)+ia-1)) 2327 end if 2328 end if 2329 end if 2330 if (correct_box) found = .true. 2331 if (ierr.eq.1) then 2332 found = .false. 2333 if (mprint) then 2334 write(luout,*) 2335 write(luout,*) 2336 > "pseudopotential is not correctly formatted---", 2337 > "bad brillioun zone:",fname 2338 end if 2339 end if 2340 2341 end if 2342 2343* **** generate formatted pseudopotential atom.cpp ***** 2344 if (.not.found) then 2345 call cpsp_formatter_auto(ion_atom(ia)) 2346 end if 2347 2348 end do !***do while **** 2349 2350 2351 end do 2352 2353 9000 value = BA_pop_stack(tmp2(2)) 2354 value = value.and.BA_pop_stack(tmp(2)) 2355 if (.not. value) 2356 > call errquit('cpsp_readall:error popping stack',0,MA_ERR) 2357 2358* **** done reading set nprj_max and prjtmp for nonlocal psp operator **** 2359 call cpsp_proj_init() 2360 2361 return 2362 end 2363 2364 2365 2366 2367