1ckbn ccsd_t.F modified to tce_mrcc_ccsdpt_uncoup_pt3.F 2 3 SUBROUTINE tce_mrcc_ccsdpt_subg(d_t1,k_t1_offset, 4 + d_t2,k_t2_offset, 5 + d_v2,k_v2_offset, 6 + d_f1, 7 + k_f1_offset, 8c + energy1,energy2,energy3, 9 + energy3, 10 1 iref) 11c 1 iref,nref) 12 13 IMPLICIT NONE 14#include "mafdecls.fh" 15#include "tcgmsg.fh" 16#include "global.fh" 17#include "bas.fh" 18#include "geom.fh" 19#include "rtdb.fh" 20#include "sym.fh" 21#include "util.fh" 22#include "msgids.fh" 23#include "stdio.fh" 24#include "sf.fh" 25#include "inp.fh" 26#include "errquit.fh" 27#include "tce.fh" 28#include "tce_main.fh" 29#include "tce_hetio.fh" 30#include "tce_diis.fh" 31#include "tce_prop.fh" 32#include "tce_restart.fh" 33#include "tce_mrcc.fh" 34#include "offl.fh" 35c integer nref 36 integer iref 37 integer d_t1 38 integer k_t1_offset 39 integer d_t2 40 integer k_t2_offset 41 integer d_v2 42 integer k_v2_offset 43 integer t_h1b, t_h1 44 integer t_h2b, t_h2 45 integer t_h3b, t_h3 46 integer t_p4b, t_p4 47 integer t_p5b, t_p5 48 integer t_p6b, t_p6 49 integer k_singles,l_singles 50 integer k_doubles,l_doubles 51ckbn-2 52c integer k_q3fnt2,l_q3fnt2 53 integer size,i,j 54 integer nxtask 55 integer next 56 integer nprocs 57 integer count 58 integer offset_p4,offset_p5,offset_p6 59 integer offset_h1,offset_h2,offset_h3 60 integer range_p4,range_p5,range_p6 61 integer range_h1,range_h2,range_h3 62 double precision energy(3) 63c double precision energy1,energy2 64ckbn-2 65 double precision energy3 66 double precision factor,denom 67 double precision denom_p4,denom_p5,denom_p6 68 double precision denom_h1,denom_h2,denom_h3 69 external nxtask 70ckbn-3 71 logical nodezero 72 integer d_f1,k_f1_offset 73 double precision dnorm1,dnorm2,dmaxt1,dmaxt2 74 double precision dmint1,dmint2 75 integer ioff 76 integer orbspin(6),orbindex(6),off(6),blck(6) 77 integer itype,idim1, idim2(2),idim3,ndim 78 INTEGER NXTASKsub 79 EXTERNAL NXTASKsub 80 81ckbn-2 82 nodezero=(ga_nodeid().eq.0) 83 84 if(lusesub) then 85 nprocs = GA_pgroup_NNODES(mypgid) 86 count = 0 87 next = NXTASKsub(nprocs,1,mypgid) 88 else 89 nprocs = GA_NNODES() 90 count = 0 91 next = nxtask(nprocs,1) 92 endif 93c 94 energy(1)=0.0d0 95 energy(2)=0.0d0 96 energy(3)=0.0d0 97c energy1 =0.0d0 98c energy2 =0.0d0 99 energy3 =0.0d0 100c 101c MIC 102c 103#ifdef USE_OFFLOAD 104 triplesx_alloced=.false. 105 v2sub_alloced=.false. 106 t1sub_alloced=.false. 107 t2sub_alloced=.false. 108#endif 109c estimate triplesx size 110 range_p4=0 111 do t_p4b = noab+1,noab+nvab 112 range_p4 = max(range_p4,int_mb(k_range+t_p4b-1)) 113 enddo 114 range_h1=0 115 do t_h1b = 1,noab 116 range_h1 = max(range_h1,int_mb(k_range+t_h1b-1)) 117 enddo 118 size=(range_p4**3)*(range_h1**3) 119 if (.not.MA_PUSH_GET(mt_dbl,size,'(T) singles',l_singles, 120 1 k_singles)) call errquit('ccsd_t: MA error sgl', 121 2 size,MA_ERR) 122 if (.not.MA_PUSH_GET(mt_dbl,size,'(T) doubles',l_doubles, 123 1 k_doubles)) call errquit('ccsd_t: MA error dbl', 124 2 size,MA_ERR) 125#ifdef USE_OFFLOAD 126 triplesx_copyback=.true. 127c call util_align64(size) 128 triplesx_mxlgth=size 129 call offl_alloc(dbl_mb(k_doubles),size) 130 triplesx_alloced=.true. 131#endif 132c stagger start of loop 133 call util_mpinap(100) 134c 135c 136 do t_p4b = noab+1,noab+nvab 137 range_p4 = int_mb(k_range+t_p4b-1) 138 offset_p4 = k_evl_sorted+int_mb(k_offset+t_p4b-1)-1 139 do t_p5b = t_p4b,noab+nvab 140 range_p5 = int_mb(k_range+t_p5b-1) 141 offset_p5 = k_evl_sorted+int_mb(k_offset+t_p5b-1)-1 142 do t_p6b = t_p5b,noab+nvab 143 range_p6 = int_mb(k_range+t_p6b-1) 144 offset_p6 = k_evl_sorted+int_mb(k_offset+t_p6b-1)-1 145 do t_h1b = 1,noab 146 range_h1 = int_mb(k_range+t_h1b-1) 147 offset_h1 = k_evl_sorted+int_mb(k_offset+t_h1b-1)-1 148 do t_h2b = t_h1b,noab 149 range_h2 = int_mb(k_range+t_h2b-1) 150 offset_h2 = k_evl_sorted+int_mb(k_offset+t_h2b-1)-1 151 do t_h3b = t_h2b,noab 152 range_h3 = int_mb(k_range+t_h3b-1) 153 offset_h3 = k_evl_sorted+int_mb(k_offset+t_h3b-1)-1 154 if (next.eq.count) then 155 if (int_mb(k_spin+t_p4b-1) 156 1 +int_mb(k_spin+t_p5b-1) 157 2 +int_mb(k_spin+t_p6b-1) 158 3 .eq.int_mb(k_spin+t_h1b-1) 159 4 +int_mb(k_spin+t_h2b-1) 160 5 +int_mb(k_spin+t_h3b-1)) then 161 if ((.not.restricted).or. 162 1 (int_mb(k_spin+t_p4b-1) 163 1 +int_mb(k_spin+t_p5b-1) 164 2 +int_mb(k_spin+t_p6b-1) 165 3 +int_mb(k_spin+t_h1b-1) 166 4 +int_mb(k_spin+t_h2b-1) 167 5 +int_mb(k_spin+t_h3b-1).le.8)) then 168 if (ieor(int_mb(k_sym+t_p4b-1), 169 1 ieor(int_mb(k_sym+t_p5b-1), 170 2 ieor(int_mb(k_sym+t_p6b-1), 171 3 ieor(int_mb(k_sym+t_h1b-1), 172 4 ieor(int_mb(k_sym+t_h2b-1), 173 5 int_mb(k_sym+t_h3b-1)))))).eq.0) then 174 175 176 size = range_p4 * range_p5 * range_p6 177 3 * range_h1 * range_h2 * range_h3 178c MIC 179c zeroing --- 180cc call dcopy(size, 0.0d0, 0, dbl_mb(k_singles), 1) 181cc call offl_zerofill(dbl_mb(k_singles),size) 182cc call dcopy(size, 0.0d0, 0, dbl_mb(k_doubles), 1) 183cc call offl_zerofill(dbl_mb(k_doubles),size) 184 call dcopy(triplesx_mxlgth, 0.0d0, 0, dbl_mb(k_singles), 1) 185 186 call dcopy(triplesx_mxlgth, 0.0d0, 0, dbl_mb(k_doubles), 1) 187#ifdef USE_OFFLOAD 188 call offl_zerofill(dbl_mb(k_doubles),triplesx_mxlgth) 189#endif 190c ----------- 191c 192ccx if (.not.MA_PUSH_GET(mt_dbl,size,'(T) singles',l_singles, 193ccx 1 k_singles)) call errquit('ccsd_t: MA error',1,MA_ERR) 194ccx if (.not.MA_PUSH_GET(mt_dbl,size,'(T) doubles',l_doubles, 195ccx 1 k_doubles)) call errquit('ccsd_t: MA error',2,MA_ERR) 196ccx do i = 0, size-1 197ccx dbl_mb(k_singles+i) = 0.0d0 198ccx dbl_mb(k_doubles+i) = 0.0d0 199ccx enddo 200cx call tce_mrcc_q3vnt2(dbl_mb(k_doubles),d_t2,d_v2,k_t2_offset, 201cx 1 k_v2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2) 202 call ccsd_t_doubles_l(dbl_mb(k_doubles), 203 1 d_t2,d_v2,k_t2_offset, 204 1 k_v2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2) 205 call tce_mrcc_q3vnt1_subg(dbl_mb(k_singles),d_t1,d_v2, 206 + k_t1_offset, 207 1 k_v2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2) 208 call tce_mrcc_q3fnt2_subg(dbl_mb(k_singles),d_f1,d_t2, 209 + k_f1_offset, 210 + k_t2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2) 211 212 if (restricted) then 213 factor = 2.0d0 214 else 215 factor = 1.0d0 216 endif 217 if ((t_p4b.eq.t_p5b).and.(t_p5b.eq.t_p6b)) then 218 factor = factor / 6.0d0 219 else if ((t_p4b.eq.t_p5b).or.(t_p5b.eq.t_p6b)) then 220 factor = factor / 2.0d0 221 endif 222 if ((t_h1b.eq.t_h2b).and.(t_h2b.eq.t_h3b)) then 223 factor = factor / 6.0d0 224 else if ((t_h1b.eq.t_h2b).or.(t_h2b.eq.t_h3b)) then 225 factor = factor / 2.0d0 226 endif 227c 228c factor = [ 1/36, 1/18, 1/12, 1/6, 1/4, 1/3, 1/2, 1, 2] 229c 230 i = 0 231 do t_p4 = 1, range_p4 232 denom_p4 = dbl_mb(offset_p4+t_p4) 233 do t_p5 = 1, range_p5 234 denom_p5 = dbl_mb(offset_p5+t_p5) 235 do t_p6 = 1, range_p6 236 denom_p6 = dbl_mb(offset_p6+t_p6) 237 do t_h1 = 1, range_h1 238 denom_h1 = dbl_mb(offset_h1+t_h1) 239 do t_h2 = 1, range_h2 240 denom_h2 = dbl_mb(offset_h2+t_h2) 241 do t_h3 = 1, range_h3 242 denom_h3 = dbl_mb(offset_h3+t_h3) 243 244cxx 245 orbspin(1) = int_mb(k_spin+t_p4b-1)-1 246 orbspin(2) = int_mb(k_spin+t_p5b-1)-1 247 orbspin(3) = int_mb(k_spin+t_p6b-1)-1 248 orbspin(4) = int_mb(k_spin+t_h1b-1)-1 249 orbspin(5) = int_mb(k_spin+t_h2b-1)-1 250 orbspin(6) = int_mb(k_spin+t_h3b-1)-1 251 252 orbindex(1) = (1-orbspin(1)+int_mb(k_mo_indexm(iref)+ 253 1 int_mb(k_offset+t_p4b-1)+t_p4-1))/2 254 orbindex(2) = (1-orbspin(2)+int_mb(k_mo_indexm(iref)+ 255 1 int_mb(k_offset+t_p5b-1)+t_p5-1))/2 256 orbindex(3) = (1-orbspin(3)+int_mb(k_mo_indexm(iref)+ 257 1 int_mb(k_offset+t_p6b-1)+t_p6-1))/2 258 orbindex(4) = (1-orbspin(4)+int_mb(k_mo_indexm(iref)+ 259 1 int_mb(k_offset+t_h1b-1)+t_h1-1))/2 260 orbindex(5) = (1-orbspin(5)+int_mb(k_mo_indexm(iref)+ 261 1 int_mb(k_offset+t_h2b-1)+t_h2-1))/2 262 orbindex(6) = (1-orbspin(6)+int_mb(k_mo_indexm(iref)+ 263 1 int_mb(k_offset+t_h3b-1)+t_h3-1))/2 264 265 orbindex(1) = moindexes(orbindex(1),orbspin(1)+1,iref) 266 orbindex(2) = moindexes(orbindex(2),orbspin(2)+1,iref) 267 orbindex(3) = moindexes(orbindex(3),orbspin(3)+1,iref) 268 orbindex(4) = moindexes(orbindex(4),orbspin(4)+1,iref) 269 orbindex(5) = moindexes(orbindex(5),orbspin(5)+1,iref) 270 orbindex(6) = moindexes(orbindex(6),orbspin(6)+1,iref) 271 272 273 if(isactive(orbindex(1),orbspin(1)+1).and. 274 1 isactive(orbindex(2),orbspin(2)+1) .and. 275 2 isactive(orbindex(3),orbspin(3)+1) .and. 276 3 isactive(orbindex(4),orbspin(4)+1) .and. 277 4 isactive(orbindex(5),orbspin(5)+1) .and. 278 5 isactive(orbindex(6),orbspin(6)+1)) then 279 dbl_mb(k_doubles+i) = 0.0d0 280 endif 281cxx 282 283 if(.not.lusesamefock_nonit) then 284 denom = 1.0d0 / ((denom_h1 + denom_h2 + denom_h3) 285 + - (denom_p4 + denom_p5 + denom_p6)) 286 287 288 else 289 orbspin(1) = int_mb(k_spin+t_p4b-1)-1 290 orbspin(2) = int_mb(k_spin+t_p5b-1)-1 291 orbspin(3) = int_mb(k_spin+t_p6b-1)-1 292 orbspin(4) = int_mb(k_spin+t_h1b-1)-1 293 orbspin(5) = int_mb(k_spin+t_h2b-1)-1 294 orbspin(6) = int_mb(k_spin+t_h3b-1)-1 295 296 orbindex(1) = (1-orbspin(1)+int_mb(k_mo_indexm(iref)+ 297 1 int_mb(k_offset+t_p4b-1)+t_p4-1))/2 298 orbindex(2) = (1-orbspin(2)+int_mb(k_mo_indexm(iref)+ 299 1 int_mb(k_offset+t_p5b-1)+t_p5-1))/2 300 orbindex(3) = (1-orbspin(3)+int_mb(k_mo_indexm(iref)+ 301 1 int_mb(k_offset+t_p6b-1)+t_p6-1))/2 302 orbindex(4) = (1-orbspin(4)+int_mb(k_mo_indexm(iref)+ 303 1 int_mb(k_offset+t_h1b-1)+t_h1-1))/2 304 orbindex(5) = (1-orbspin(5)+int_mb(k_mo_indexm(iref)+ 305 1 int_mb(k_offset+t_h2b-1)+t_h2-1))/2 306 orbindex(6) = (1-orbspin(6)+int_mb(k_mo_indexm(iref)+ 307 1 int_mb(k_offset+t_h3b-1)+t_h3-1))/2 308 309 do j=1,6 310 blck(j) = orbinblck(orbindex(j),orbspin(j)+1,1) 311 off(j) = offsetinblck(orbindex(j),orbspin(j)+1,1) 312 enddo 313 314 denom = 1.0d0/( 315 + -dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1) 316 + +blck(1)-1)+off(1)) 317 + -dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1) 318 + +blck(2)-1)+off(2)) 319 + -dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1) 320 + +blck(3)-1)+off(3)) 321 + +dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1) 322 + +blck(4)-1)+off(4)) 323 + +dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1) 324 + +blck(5)-1)+off(5)) 325 + +dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1) 326 + +blck(6)-1)+off(6))) 327 endif ! lusesamefock_nonit 328 329 if((abs(denom)).gt.1.0d2) then 330c if(nodezero) write(LuOut,*) 331 write(LuOut,*) 332 + 'Warning:Denominator is very low. 1/D=',abs(denom) 333c if(nodezero) call util_flush(LuOut) 334 endif 335 336 337c energy1 = energy1 + factor*denom 338c 1 * dbl_mb(k_doubles+i)*dbl_mb(k_doubles+i) 339c energy2 = energy2 + factor*denom*dbl_mb(k_doubles+i) 340c 1 * (dbl_mb(k_doubles+i)+dbl_mb(k_singles+i)) 341 energy3 = energy3 + factor*denom*dbl_mb(k_doubles+i) 342 + * (dbl_mb(k_doubles+i)+dbl_mb(k_singles+i)) 343 344 345 i = i + 1 346 enddo 347 enddo 348 enddo 349 enddo 350 enddo 351 enddo 352c if (.not.MA_POP_STACK(l_q3fnt2)) 353c 1 call errquit('ccsd_t',2,MA_ERR) 354ccx if (.not.MA_POP_STACK(l_doubles)) 355ccx 1 call errquit('ccsd_t',3,MA_ERR) 356ccx if (.not.MA_POP_STACK(l_singles)) 357ccx 1 call errquit('ccsd_t',4,MA_ERR) 358 endif 359 endif 360 endif 361c ENDIF 362 if(lusesub) then 363 next = NXTASKsub(nprocs,1,mypgid) 364 else 365 next = nxtask(nprocs,1) 366 endif 367 endif 368 count = count + 1 369 enddo 370 enddo 371 enddo 372 enddo 373 enddo 374 enddo 375c 376c 377#ifdef USE_OFFLOAD 378 call offl_free(dbl_mb(k_doubles),size) 379 triplesx_alloced=.false. 380#if 0 381 call offl_free(dbl_mb(k_singles),size) 382#endif 383#endif 384 385 if (.not.MA_POP_STACK(l_doubles)) 386 1 call errquit('ccsd_t doubles',3,MA_ERR) 387 if (.not.MA_POP_STACK(l_singles)) 388 1 call errquit('ccsd_t singles',4,MA_ERR) 389c 390c 391 if(lusesub) then 392 next = NXTASKsub(-nprocs,1,mypgid) 393 call ga_pgroup_sync(mypgid) 394 else 395 next = nxtask(-nprocs,1) 396 call ga_sync() 397 endif 398c energy(1) = energy1 399c energy(2) = energy2 400 energy(3) = energy3 401 if(lusesub) then 402c call ga_pgroup_dgop(p_handle, type, buf, lenbuf, op) 403 call ga_pgroup_dgop(mypgid, mt_dbl,energy,3,'+') 404 else 405 call ga_dgop(mt_dbl,energy,3,'+') 406 endif 407c energy1 = energy(1) 408c energy2 = energy(2) 409 energy3 = energy(3) 410 411c if(lusesub)then 412c if(ga_pgroup_nodeid(mypgid).eq.0) then 413cc write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy1 ",energy1, 414cc + " energy2 ",energy2," energy3 ",energy3 415c write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy3 ",energy3 416c endif 417c else 418c if(nodezero) then 419cc write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy1 ",energy1, 420cc + " energy2 ",energy2," energy3 ",energy3 421c write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy3 ",energy3 422c endif 423c endif 424 425 return 426 end 427c $Id$ 428