1 SUBROUTINE ccsd_t2_8(d_a,k_a_offset,d_b,k_b_offset,d_c,k_c_offset) 2C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 3C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 4C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 5C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 6 IMPLICIT NONE 7#include "global.fh" 8#include "mafdecls.fh" 9#include "sym.fh" 10#include "errquit.fh" 11#include "tce.fh" 12 INTEGER d_a 13 INTEGER k_a_offset 14 INTEGER d_b 15 INTEGER k_b_offset 16 INTEGER d_c 17 INTEGER k_c_offset 18 INTEGER NXTASK 19 INTEGER next 20 INTEGER nprocs 21 INTEGER count 22 INTEGER p3b 23 INTEGER p4b 24 INTEGER h1b 25 INTEGER h2b 26 INTEGER dimc 27 INTEGER l_cs 28 INTEGER k_cs 29 INTEGER p5b 30 INTEGER p6b 31 INTEGER p5b_1 32 INTEGER p6b_1 33 INTEGER h1b_1 34 INTEGER h2b_1 35 INTEGER p3b_2 36 INTEGER p4b_2 37 INTEGER p5b_2 38 INTEGER p6b_2 39 INTEGER dim_common 40 INTEGER dima_sort 41 INTEGER dima 42 INTEGER dimb_sort 43 INTEGER dimb 44 INTEGER l_as 45 INTEGER k_as 46 INTEGER l_a 47 INTEGER k_a 48 INTEGER l_bs 49 INTEGER k_bs 50 INTEGER l_b 51 INTEGER k_b 52 INTEGER nsuperp(2) 53 INTEGER isuperp 54 INTEGER l_c 55 INTEGER k_c 56 integer p5b_in,p6b_in 57 DOUBLE PRECISION FACTORIAL 58 EXTERNAL NXTASK 59 EXTERNAL FACTORIAL 60 nprocs = GA_NNODES() 61 count = 0 62 next = NXTASK(nprocs, 1) 63 DO p3b = noab+1,noab+nvab 64 DO p4b = p3b,noab+nvab 65 DO h1b = 1,noab 66 DO h2b = h1b,noab 67 IF ((.not.restricted).or.(int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1 68 &)+int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN 69 IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. int_mb(k_spin+h 70 &1b-1)+int_mb(k_spin+h2b-1)) THEN 71 IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),ieor(int_mb( 72 &k_sym+h1b-1),int_mb(k_sym+h2b-1)))) .eq. ieor(irrep_v,irrep_t)) TH 73 &EN 74 IF (next.eq.count) THEN 75 dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) * int_mb(k_ra 76 &nge+h1b-1) * int_mb(k_range+h2b-1) 77 IF (.not.MA_PUSH_GET(mt_dbl,dimc,'cs',l_cs,k_cs)) CALL 78 & ERRQUIT('ccsd_t2_8',0,MA_ERR) 79 CALL DFILL(dimc,0.0d0,dbl_mb(k_cs),1) 80#if 0 81 DO p5b = noab+1,noab+nvab 82 DO p6b = p5b,noab+nvab 83#else 84 DO p5b_in =ga_nodeid(),ga_nodeid()+nvab-1 85 p5b=mod(p5b_in,nvab)+noab+1 86 DO p6b_in=ga_nodeid(),ga_nodeid()+nvab+noab-p5b 87 p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b 88#endif 89 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. int_mb(k_spin+h 90 &1b-1)+int_mb(k_spin+h2b-1)) THEN 91 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),ieor(int_mb( 92 &k_sym+h1b-1),int_mb(k_sym+h2b-1)))) .eq. irrep_t) THEN 93 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,p5b_1,p6b_1,h1b_1,h2b_1) 94 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,p3b_2,p4b_2,p5b_2,p6b_2) 95 dim_common = int_mb(k_range+p5b-1) * int_mb(k_range+p6b-1) 96 dima_sort = int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) 97 dima = dim_common * dima_sort 98 dimb_sort = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) 99 dimb = dim_common * dimb_sort 100 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 101 IF (.not.MA_PUSH_GET(mt_dbl,dima,'as',l_as,k_as)) CALL 102 & ERRQUIT('ccsd_t2_8',1,MA_ERR) 103 IF (.not.MA_PUSH_GET(mt_dbl,dima,'a',l_a,k_a)) CALL ERRQUIT(' 104 &ccsd_t2_8',2,MA_ERR) 105 CALL GET_HASH_BLOCK(d_a,dbl_mb(k_a),dima,int_mb(k_a_offset),(h2b_1 106 & - 1 + noab * (h1b_1 - 1 + noab * (p6b_1 - noab - 1 + nvab * (p5b_ 107 &1 - noab - 1))))) 108 CALL TCE_SORT_4(dbl_mb(k_a),dbl_mb(k_as),int_mb(k_range+p5b-1) 109 &,int_mb(k_range+p6b-1),int_mb(k_range+h1b-1),int_mb(k_range+h2b-1) 110 &,4,3,2,1,1.0d0) 111 IF (.not.MA_POP_STACK(l_a)) CALL ERRQUIT('ccsd_t2_8',3,MA_ERR) 112 IF (.not.MA_PUSH_GET(mt_dbl,dimb,'bs',l_bs,k_bs)) CALL 113 & ERRQUIT('ccsd_t2_8',4,MA_ERR) 114 IF (.not.MA_PUSH_GET(mt_dbl,dimb,'b',l_b,k_b)) CALL ERRQUIT(' 115 &ccsd_t2_8',5,MA_ERR) 116 if(.not.intorb) then 117 CALL GET_HASH_BLOCK(d_b,dbl_mb(k_b),dimb,int_mb(k_b_offset),(p6b_2 118 & - 1 + (noab+nvab) * (p5b_2 - 1 + (noab+nvab) * (p4b_2 - 1 + (noab 119 &+nvab) * (p3b_2 - 1))))) 120 else 121 CALL GET_HASH_BLOCK_I(d_b,dbl_mb(k_b),dimb,int_mb(k_b_offset), 122 &(p6b_2 123 & - 1 + (noab+nvab) * (p5b_2 - 1 + (noab+nvab) * (p4b_2 - 1 + (noab 124 &+nvab) * (p3b_2 - 1)))),p6b_2,p5b_2,p4b_2,p3b_2) 125 end if 126 CALL TCE_SORT_4(dbl_mb(k_b),dbl_mb(k_bs),int_mb(k_range+p3b-1) 127 &,int_mb(k_range+p4b-1),int_mb(k_range+p5b-1),int_mb(k_range+p6b-1) 128 &,2,1,4,3,1.0d0) 129 IF (.not.MA_POP_STACK(l_b)) CALL ERRQUIT('ccsd_t2_8',6,MA_ERR) 130 nsuperp(1) = 1 131 nsuperp(2) = 1 132 isuperp = 1 133 IF (p5b .eq. p6b) THEN 134 nsuperp(isuperp) = nsuperp(isuperp) + 1 135 ELSE 136 isuperp = isuperp + 1 137 END IF 138 CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common,2.0d0/FACTORIAL( 139 &nsuperp(1))/FACTORIAL(nsuperp(2)),dbl_mb(k_as),dim_common,dbl_ 140 &mb(k_bs),dim_common,1.0d0,dbl_mb(k_cs),dima_sort) 141 IF (.not.MA_POP_STACK(l_bs)) CALL ERRQUIT('ccsd_t2_8',7,MA_ERR 142 &) 143 IF (.not.MA_POP_STACK(l_as)) CALL ERRQUIT('ccsd_t2_8',8,MA_ERR 144 &) 145 END IF 146 END IF 147 END IF 148 END DO 149 END DO 150 IF (.not.MA_PUSH_GET(mt_dbl,dimc,'c',l_c,k_c)) CALL ERRQUIT(' 151 &ccsd_t2_8',9,MA_ERR) 152 CALL TCE_SORT_4(dbl_mb(k_cs),dbl_mb(k_c),int_mb(k_range+p4b-1) 153 &,int_mb(k_range+p3b-1),int_mb(k_range+h2b-1),int_mb(k_range+h1b-1) 154 &,2,1,4,3,1.0d0/2.0d0) 155 CALL ADD_HASH_BLOCK(d_c,dbl_mb(k_c),dimc,int_mb(k_c_offset),(h2b - 156 & 1 + noab * (h1b - 1 + noab * (p4b - noab - 1 + nvab * (p3b - noab 157 & - 1))))) 158 IF (.not.MA_POP_STACK(l_c)) CALL ERRQUIT('ccsd_t2_8',10,MA_ERR) 159 IF (.not.MA_POP_STACK(l_cs)) CALL ERRQUIT('ccsd_t2_8',11,MA_ER 160 &R) 161 next = NXTASK(nprocs, 1) 162 END IF 163 count = count + 1 164 END IF 165 END IF 166 END IF 167 END DO 168 END DO 169 END DO 170 END DO 171 next = NXTASK(-nprocs, 1) 172 call GA_SYNC() 173 RETURN 174 END 175 176 SUBROUTINE ccsd_t2_8_test(d_a,k_a_offset, 177 & d_b,k_b_offset, 178 & d_c,k_c_offset, 179 & maxh,maxp) 180C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 181C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 182C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 183C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 184 IMPLICIT NONE 185#include "global.fh" 186#include "mafdecls.fh" 187#include "sym.fh" 188#include "errquit.fh" 189#include "tce.fh" 190 INTEGER d_a,d_b,d_c 191 INTEGER k_a_offset,k_b_offset,k_c_offset 192 INTEGER maxh,maxp,dimhhpp,dimpppp,dimtemp 193 INTEGER next,nprocs,count 194 INTEGER p5b,p6b,p3b,p4b,h1b,h2b 195 INTEGER p5b_1,p6b_1,h1b_1,h2b_1 196 INTEGER p3b_2,p4b_2,p5b_2,p6b_2 197 INTEGER dima,dimb,dimc,dim_common,dima_sort,dimb_sort 198#ifdef USE_F90_ALLOCATABLE 199 double precision, allocatable :: f_a(:) 200 double precision, allocatable :: f_b(:) 201 double precision, allocatable :: f_c(:) 202 double precision, allocatable :: f_t(:) 203#ifdef USE_FASTMEM 204 !dec$ attributes fastmem :: f_a,f_b,f_c,f_t 205#endif 206 integer :: e_a,e_b,e_c,e_t 207#else 208 integer k_a, l_a 209 integer k_b, l_b 210 integer k_c, l_c 211 integer k_t, l_t 212 integer e_a,e_b,e_c,e_t 213#endif 214 double precision alpha 215 integer p5b_in,p6b_in 216 INTEGER NXTASK 217 EXTERNAL NXTASK 218 nprocs = GA_NNODES() 219 count = 0 220 next = NXTASK(nprocs, 1) 221 222 dimhhpp = maxh*maxh*maxp*maxp 223 dimpppp = maxp*maxp*maxp*maxp 224 dimtemp = max(dimpppp,dimhhpp) 225 226#ifdef USE_F90_ALLOCATABLE 227 allocate(f_a(1:dimhhpp),stat=e_a) 228 allocate(f_b(1:dimpppp),stat=e_b) 229 allocate(f_c(1:dimhhpp),stat=e_c) 230# ifndef USE_LOOPS_NOT_DGEMM 231 allocate(f_t(1:dimtemp),stat=e_t) 232# endif 233#else 234 e_a=0 235 if(.not.MA_PUSH_GET(mt_dbl,dimhhpp,"a",l_a,k_a)) e_a=-1 236 e_b=0 237 if(.not.MA_PUSH_GET(mt_dbl,dimpppp,"b",l_b,k_b)) e_b=-1 238 e_c=0 239 if(.not.MA_PUSH_GET(mt_dbl,dimhhpp,"c",l_c,k_c)) e_c=-1 240# ifndef USE_LOOPS_NOT_DGEMM 241 e_t=0 242 if(.not.MA_PUSH_GET(mt_dbl,dimtemp,"t",l_t,k_t)) e_t=-1 243# else 244 dimtemp=-12345 245 e_t=.false. 246# endif 247#endif 248 if (e_a.ne.0) call errquit("MA a",dimhhpp,MA_ERR) 249 if (e_b.ne.0) call errquit("MA b",dimpppp,MA_ERR) 250 if (e_c.ne.0) call errquit("MA c",dimhhpp,MA_ERR) 251 if (e_t.ne.0) call errquit("MA t",dimtemp,MA_ERR) 252 DO p3b = noab+1,noab+nvab 253 DO p4b = p3b,noab+nvab 254 DO h1b = 1,noab 255 DO h2b = h1b,noab 256 IF ((.not.restricted).or. 257 & ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) 258 & +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN 259 IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. 260 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 261 IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1), 262 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 263 & .eq. ieor(irrep_v,irrep_t)) THEN 264 IF (next.eq.count) THEN 265 dima_sort = int_mb(k_range+h1b-1) 266 & * int_mb(k_range+h2b-1) 267 dimb_sort = int_mb(k_range+p3b-1) 268 & * int_mb(k_range+p4b-1) 269 dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) 270 & * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) 271#ifdef USE_F90_ALLOCATABLE 272 CALL DFILL(dimc,0.0d0,f_c,1) 273#if 0 274 DO p5b = noab+1,noab+nvab 275 DO p6b = p5b,noab+nvab 276#else 277 DO p5b_in =ga_nodeid(),ga_nodeid()+nvab-1 278 p5b=mod(p5b_in,nvab)+noab+1 279 DO p6b_in=ga_nodeid(),ga_nodeid()+nvab+noab-p5b 280 p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b 281#endif 282 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 283 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 284 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 285 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 286 & .eq. irrep_t) THEN 287 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, 288 & p5b_1,p6b_1,h1b_1,h2b_1) 289 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, 290 & p3b_2,p4b_2,p5b_2,p6b_2) 291 dim_common = int_mb(k_range+p5b-1) 292 & * int_mb(k_range+p6b-1) 293 dima = dim_common * dima_sort 294 dimb = dim_common * dimb_sort 295 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 296#ifdef USE_LOOPS_NOT_DGEMM 297 CALL GET_HASH_BLOCK(d_a,f_a,dima, 298 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 299 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 300#else 301 CALL GET_HASH_BLOCK(d_a,f_t,dima, 302 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 303 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 304 CALL TCE_SORT_4(f_t,f_a, 305 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 306 & int_mb(k_range+h1b-1),int_mb(k_range+h2b-1), 307 & 4,3,2,1,1.0d0) 308#endif 309 if(.not.intorb) then 310#ifdef USE_LOOPS_NOT_DGEMM 311 CALL GET_HASH_BLOCK(d_b,f_b,dimb, 312 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 313 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 314 & (p3b_2-1))))) 315#else 316 CALL GET_HASH_BLOCK(d_b,f_t,dimb, 317 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 318 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 319 & (p3b_2-1))))) 320#endif 321 else 322#ifdef USE_LOOPS_NOT_DGEMM 323 CALL GET_HASH_BLOCK_I(d_b,f_b,dimb, 324 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 325 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 326 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 327#else 328 CALL GET_HASH_BLOCK_I(d_b,f_t,dimb, 329 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 330 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 331 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 332#endif 333 end if 334#ifndef USE_LOOPS_NOT_DGEMM 335 CALL TCE_SORT_4(f_t,f_b, 336 & int_mb(k_range+p3b-1),int_mb(k_range+p4b-1), 337 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 338 & 2,1,4,3,1.0d0) 339#endif 340 if (p5b .eq. p6b) then 341 alpha = 1.0d0 342 else 343 alpha = 2.0d0 344 end if 345#ifdef USE_LOOPS_NOT_DGEMM 346 call t2_p8(int_mb(k_range+h1b-1), 347 & int_mb(k_range+h2b-1), 348 & int_mb(k_range+p3b-1), 349 & int_mb(k_range+p4b-1), 350 & int_mb(k_range+p5b-1), 351 & int_mb(k_range+p6b-1), 352 & f_a,f_b,f_c, 353 & 0.5d0*alpha) 354#else 355 CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common, 356 & alpha,f_a,dim_common,f_b, 357 & dim_common,1.0d0,f_c,dima_sort) 358#endif 359 END IF 360 END IF 361 END IF 362 END DO 363 END DO 364#ifdef USE_LOOPS_NOT_DGEMM 365 CALL ADD_HASH_BLOCK(d_c,f_c,dimc, 366 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 367 & (p4b-noab-1+nvab*(p3b-noab-1))))) 368#else 369 CALL TCE_SORT_4(f_c,f_t, 370 & int_mb(k_range+p4b-1),int_mb(k_range+p3b-1), 371 & int_mb(k_range+h2b-1),int_mb(k_range+h1b-1), 372 & 2,1,4,3,0.5d0) 373 CALL ADD_HASH_BLOCK(d_c,f_t,dimc, 374 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 375 & (p4b-noab-1+nvab*(p3b-noab-1))))) 376#endif 377#else 378celse// USE_F90_ALLOCATABLE 379 CALL DFILL(dimc,0.0d0,dbl_mb(k_c),1) 380 DO p5b = noab+1,noab+nvab 381 DO p6b = p5b,noab+nvab 382 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 383 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 384 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 385 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 386 & .eq. irrep_t) THEN 387 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, 388 & p5b_1,p6b_1,h1b_1,h2b_1) 389 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, 390 & p3b_2,p4b_2,p5b_2,p6b_2) 391 dim_common = int_mb(k_range+p5b-1) 392 & * int_mb(k_range+p6b-1) 393 dima = dim_common * dima_sort 394 dimb = dim_common * dimb_sort 395 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 396#ifdef USE_LOOPS_NOT_DGEMM 397 CALL GET_HASH_BLOCK(d_a,dbl_mb(k_a),dima, 398 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 399 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 400#else 401 CALL GET_HASH_BLOCK(d_a,dbl_mb(k_t),dima, 402 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 403 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 404 CALL TCE_SORT_4(dbl_mb(k_t),dbl_mb(k_a), 405 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 406 & int_mb(k_range+h1b-1),int_mb(k_range+h2b-1), 407 & 4,3,2,1,1.0d0) 408#endif 409 if(.not.intorb) then 410#ifdef USE_LOOPS_NOT_DGEMM 411 CALL GET_HASH_BLOCK(d_b,dbl_mb(k_b),dimb, 412 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 413 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 414 & (p3b_2-1))))) 415#else 416 CALL GET_HASH_BLOCK(d_b,dbl_mb(k_t),dimb, 417 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 418 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 419 & (p3b_2-1))))) 420#endif 421 else 422#ifdef USE_LOOPS_NOT_DGEMM 423 CALL GET_HASH_BLOCK_I(d_b,dbl_mb(k_b),dimb, 424 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 425 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 426 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 427#else 428 CALL GET_HASH_BLOCK_I(d_b,dbl_mb(k_t),dimb, 429 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 430 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 431 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 432#endif 433 end if 434#ifndef USE_LOOPS_NOT_DGEMM 435 CALL TCE_SORT_4(dbl_mb(k_t),dbl_mb(k_b), 436 & int_mb(k_range+p3b-1),int_mb(k_range+p4b-1), 437 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 438 & 2,1,4,3,1.0d0) 439#endif 440 if (p5b .eq. p6b) then 441 alpha = 1.0d0 442 else 443 alpha = 2.0d0 444 end if 445#ifdef USE_LOOPS_NOT_DGEMM 446 call t2_p8(int_mb(k_range+h1b-1), 447 & int_mb(k_range+h2b-1), 448 & int_mb(k_range+p3b-1), 449 & int_mb(k_range+p4b-1), 450 & int_mb(k_range+p5b-1), 451 & int_mb(k_range+p6b-1), 452 & dbl_mb(k_a),dbl_mb(k_b),dbl_mb(k_c), 453 & 0.5d0*alpha) 454#else 455 CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common, 456 & alpha,dbl_mb(k_a),dim_common,dbl_mb(k_b), 457 & dim_common,1.0d0,dbl_mb(k_c),dima_sort) 458#endif 459 END IF 460 END IF 461 END IF 462 END DO 463 END DO 464#ifdef USE_LOOPS_NOT_DGEMM 465 CALL ADD_HASH_BLOCK(d_c,dbl_mb(k_c),dimc, 466 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 467 & (p4b-noab-1+nvab*(p3b-noab-1))))) 468#else 469 CALL TCE_SORT_4(dbl_mb(k_c),dbl_mb(k_t), 470 & int_mb(k_range+p4b-1),int_mb(k_range+p3b-1), 471 & int_mb(k_range+h2b-1),int_mb(k_range+h1b-1), 472 & 2,1,4,3,0.5d0) 473 CALL ADD_HASH_BLOCK(d_c,dbl_mb(k_t),dimc, 474 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 475 & (p4b-noab-1+nvab*(p3b-noab-1))))) 476#endif 477#endif 478cendif// USE_F90_ALLOCATABLE 479 next = NXTASK(nprocs, 1) 480 END IF 481 count = count + 1 482 END IF 483 END IF 484 END IF 485 END DO 486 END DO 487 END DO 488 END DO 489 next = NXTASK(-nprocs, 1) 490 call GA_SYNC() 491 492#ifdef USE_F90_ALLOCATABLE 493 deallocate(f_a,stat=e_a) 494 deallocate(f_b,stat=e_b) 495 deallocate(f_c,stat=e_c) 496# ifndef USE_LOOPS_NOT_DGEMM 497 deallocate(f_t,stat=e_t) 498# endif 499#else 500# ifndef USE_LOOPS_NOT_DGEMM 501 e_t=0 502 if(.not.MA_POP_STACK(l_t)) e_t=-1 503# else 504 l_t=-12345 505 e_t=0 506# endif 507 e_a=0 508 if(.not.MA_CHOP_STACK(l_a)) e_a=-1 509#endif 510 if (e_a.ne.0) call errquit("MA pops a",0,MA_ERR) 511 if (e_t.ne.0) call errquit("MA pops t",1,MA_ERR) 512 RETURN 513 END 514 515 516 517 SUBROUTINE ccsd_t2_8_spiral(d_a,k_a_offset,d_b,k_b_offset, 518 1 d_c,k_c_offset) 519C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 520C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 521C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 522C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 523 IMPLICIT NONE 524#include "global.fh" 525#include "mafdecls.fh" 526#include "sym.fh" 527#include "errquit.fh" 528#include "tce.fh" 529#include "tce_main.fh" 530 integer d_a, d_b, d_c 531 integer k_a_offset, k_b_offset, k_c_offset 532 integer NXTASK, next, nprocs, count 533 integer p3b, p4b, h1b, h2b, p5b, p6b 534 integer p5b_1, p6b_1, h1b_1, h2b_1, p3b_2, p4b_2, p5b_2, p6b_2 535 integer dim1,dim2,dim3,dim4,dim5,dim6 536 integer dim12,dim34,dim56 537 integer dima,dimb,dimc 538 integer spn1,spn2,spn3,spn4,spn5,spn6 539 integer spn12,spn34,spn56 540 integer sym1,sym2,sym3,sym4,sym5,sym6 541 integer sym12,sym34,sym56 542 integer k_as, l_as, k_a, l_a 543 integer k_bs, l_bs, k_b, l_b 544 integer k_cs, l_cs, k_c, l_c 545 integer nbh 546 double precision alpha 547 external NXTASK 548c 549c print*,'entering ccsd_t2_8_spiral (energy)' 550c 551 nprocs = ga_nnodes() 552 count = 0 553 next = nxtask(nprocs, 1) 554c 555 if (.not.ma_push_get(mt_dbl,tile_dim**4,'c',l_c,k_c)) 556 1 call errquit('ccsd_t2_8',9,MA_ERR) 557c 558 do p3b = noab+1,noab+nvab 559 dim3=int_mb(k_range+p3b-1) 560 spn3=int_mb(k_spin +p3b-1) 561 sym3=int_mb(k_sym +p3b-1) 562 do p4b = p3b,noab+nvab 563 dim4=int_mb(k_range+p4b-1) 564 spn4=int_mb(k_spin +p4b-1) 565 sym4=int_mb(k_sym +p4b-1) 566c 567 dim34 = dim3 * dim4 568 spn34 = spn3 + spn4 569 sym34 = ieor(sym3,sym4) 570c 571 do p5b = noab+1,noab+nvab 572 dim5=int_mb(k_range+p5b-1) 573 spn5=int_mb(k_spin +p5b-1) 574 sym5=int_mb(k_sym +p5b-1) 575 do p6b = p5b,noab+nvab 576 dim6=int_mb(k_range+p6b-1) 577 spn6=int_mb(k_spin +p6b-1) 578 sym6=int_mb(k_sym +p6b-1) 579c 580 dim56 = dim5 * dim6 581 spn56 = spn5 + spn6 582 sym56 = ieor(sym5,sym6) 583c 584 dimb = dim34 * dim56 585c 586 if ( (dimb.gt.0) .and. (ieor(sym34,sym56).eq.0) 587 1 .and. (spn34.eq.spn56) ) then 588c 589 if (next.eq.count) then 590c 591 call tce_restricted_4(p3b,p4b,p5b,p6b, 592 1 p3b_2,p4b_2,p5b_2,p6b_2) 593c 594 if (.not.ma_push_get(mt_dbl,dimb,'bs',l_bs,k_bs)) 595 1 call errquit('ccsd_t2_8',4,MA_ERR) 596 if (.not.ma_push_get(mt_dbl,dimb,'b',l_b,k_b)) 597 1 call errquit('ccsd_t2_8',5,MA_ERR) 598c 599 if(.not.intorb) then 600 call get_hash_block(d_b,dbl_mb(k_b),dimb, 601 1 int_mb(k_b_offset), 602 2 (p6b_2 - 1 + (noab+nvab) * (p5b_2 - 1 + 603 3 (noab+nvab) * (p4b_2 - 1 + (noab+nvab) * 604 4 (p3b_2 - 1))))) 605 else 606 call get_hash_block_i(d_b,dbl_mb(k_b),dimb, 607 1 int_mb(k_b_offset), 608 2 (p6b_2 - 1 + (noab+nvab) * (p5b_2 - 1 + 609 3 (noab+nvab) * (p4b_2 - 1 + (noab+nvab) * 610 4 (p3b_2 - 1)))),p6b_2,p5b_2,p4b_2,p3b_2) 611 end if 612c 613 call tce_sort_4(dbl_mb(k_b),dbl_mb(k_bs), 614 1 dim3,dim4,dim5,dim6,2,1,4,3,1.0d0) 615c 616 if (.not.ma_pop_stack(l_b)) 617 1 call errquit('ccsd_t2_8',6,MA_ERR) 618c 619 do h1b = 1,noab 620 dim1=int_mb(k_range+h1b-1) 621 spn1=int_mb(k_spin +h1b-1) 622 sym1=int_mb(k_sym +h1b-1) 623 do h2b = h1b,noab 624 dim2=int_mb(k_range+h2b-1) 625 spn2=int_mb(k_spin +h2b-1) 626 sym2=int_mb(k_sym +h2b-1) 627c 628 dim12 = dim1 * dim2 629 spn12 = spn1 + spn2 630 sym12 = ieor(sym1,sym2) 631c 632 dima = dim12 * dim56 633c 634 if (dima.gt.0) then 635c 636 call tce_restricted_4(p5b,p6b,h1b,h2b, 637 1 p5b_1,p6b_1,h1b_1,h2b_1) 638c 639 if (spn34.eq.spn12) then 640 if (spn56.eq.spn12) then 641 if ((.not.restricted).or.((spn34+spn12).ne.8)) then 642 if (ieor(sym34,sym12).eq.0) then 643 if (ieor(sym56,sym12).eq.0) then 644c 645 dimc = dim12 * dim34 646c 647 if (.not.ma_push_get(mt_dbl,dimc,'cs',l_cs,k_cs)) 648 1 call errquit('ccsd_t2_8',0,MA_ERR) 649c 650 call dfill(dimc,0.0d0,dbl_mb(k_cs),1) 651c 652 if (.not.ma_push_get(mt_dbl,dima,'as',l_as,k_as)) 653 1 call errquit('ccsd_t2_8',1,MA_ERR) 654 if (.not.ma_push_get(mt_dbl,dima,'a',l_a,k_a)) 655 1 call errquit('ccsd_t2_8',2,MA_ERR) 656c 657 call get_hash_block(d_a,dbl_mb(k_a),dima, 658 1 int_mb(k_a_offset), 659 2 (h2b_1 - 1 + noab * (h1b_1 - 1 + noab * 660 3 (p6b_1 - noab - 1 + nvab * 661 4 (p5b_1 - noab - 1))))) 662c 663 call tce_sort_4(dbl_mb(k_a),dbl_mb(k_as), 664 1 dim5,dim6,dim1,dim2,4,3,2,1,1.0d0) 665c 666 if (.not.ma_pop_stack(l_a)) 667 1 call errquit('ccsd_t2_8',3,MA_ERR) 668c 669 if (p5b .eq. p6b) then 670 alpha = 1.0d0 671 else 672 alpha = 2.0d0 673 end if 674 call dgemm('T','N',dim12,dim34,dim56,alpha, 675 2 dbl_mb(k_as),dim56,dbl_mb(k_bs),dim56, 676 3 1.0d0,dbl_mb(k_cs),dim12) 677c 678 if (.not.ma_pop_stack(l_as)) 679 1 call errquit('ccsd_t2_8',8,MA_ERR) 680c 681 call ga_nbwait(nbh) ! wait until previous put of c is gone before overwriting buffer 682c 683 call tce_sort_4(dbl_mb(k_cs),dbl_mb(k_c), 684 1 dim4,dim3,dim2,dim1,2,1,4,3,0.5d0) 685c 686 call add_hash_block_nb(d_c,dbl_mb(k_c),dimc, 687 1 int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 688 2 (p4b-noab-1+nvab*(p3b-noab-1)))),nbh) 689c 690 if (.not.ma_pop_stack(l_cs)) 691 1 call errquit('ccsd_t2_8',11,MA_ERR) 692c 693 end if 694 end if 695 end if 696 end if 697 end if 698c 699 endif ! dima>0 700c 701 end do 702 end do 703c 704 if (.not.ma_pop_stack(l_bs)) 705 1 call errquit('ccsd_t2_8',7,MA_ERR) 706c 707 next = NXTASK(nprocs, 1) 708 end if ! next=count 709 count = count + 1 710c 711 endif ! dimb>0 712c 713 end do 714 end do 715 end do 716 end do 717c 718 if (.not.ma_pop_stack(l_c)) 719 1 call errquit('ccsd_t2_8',10,MA_ERR) 720c 721 next = NXTASK(-nprocs, 1) 722 call ga_sync() 723 RETURN 724 END 725 726 SUBROUTINE ccsd_t2_8_task_dgemm(d_a,k_a_offset, 727 & d_b,k_b_offset, 728 & d_c,k_c_offset, 729 & maxh,maxp) 730C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 731C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 732C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 733C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 734 IMPLICIT NONE 735#include "global.fh" 736#include "mafdecls.fh" 737#include "sym.fh" 738#include "errquit.fh" 739#include "tce.fh" 740 integer :: d_a,d_b,d_c 741 integer :: k_a_offset,k_b_offset,k_c_offset 742 integer :: maxh,maxp,dimhhpp,dimpppp,dimtemp 743 integer :: next,nprocs,count 744 integer :: p5b,p6b,p3b,p4b,h1b,h2b 745 integer :: p5b_1,p6b_1,h1b_1,h2b_1 746 integer :: p3b_2,p4b_2,p5b_2,p6b_2 747 integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort 748 double precision, allocatable :: f_a(:) 749 double precision, allocatable :: f_b(:) 750 double precision, allocatable :: f_c(:) 751 double precision, allocatable :: f_t(:) 752#ifdef USE_FASTMEM 753 !dec$ attributes fastmem :: f_a,f_b,f_c,f_t 754#endif 755 integer :: e_a,e_b,e_c,e_t 756 double precision alpha 757 integer p5b_in,p6b_in,me 758 integer NXTASK 759 external NXTASK 760 nprocs = GA_NNODES() 761 count = 0 762 next = NXTASK(nprocs, 1) 763 764 me = ga_nodeid() 765 766 dimhhpp = maxh*maxh*maxp*maxp 767 dimpppp = maxp*maxp*maxp*maxp 768 dimtemp = max(dimpppp,dimhhpp) 769 770 allocate(f_a(1:dimhhpp),stat=e_a) 771 allocate(f_b(1:dimpppp),stat=e_b) 772 allocate(f_c(1:dimhhpp),stat=e_c) 773 allocate(f_t(1:dimtemp),stat=e_t) 774 if (e_a.ne.0) call errquit("alloc a",dimhhpp,MA_ERR) 775 if (e_b.ne.0) call errquit("alloc b",dimpppp,MA_ERR) 776 if (e_c.ne.0) call errquit("alloc c",dimhhpp,MA_ERR) 777 if (e_t.ne.0) call errquit("alloc t",dimtemp,MA_ERR) 778 DO p3b = noab+1,noab+nvab 779 DO p4b = p3b,noab+nvab 780 DO h1b = 1,noab 781 DO h2b = h1b,noab 782 IF ((.not.restricted).or. 783 & ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) 784 & +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN 785 IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. 786 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 787 IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1), 788 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 789 & .eq. ieor(irrep_v,irrep_t)) THEN 790 IF (next.eq.count) THEN 791 dima_sort = int_mb(k_range+h1b-1) 792 & * int_mb(k_range+h2b-1) 793 dimb_sort = int_mb(k_range+p3b-1) 794 & * int_mb(k_range+p4b-1) 795 dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) 796 & * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) 797 CALL DFILL(dimc,0.0d0,f_c,1) 798 DO p5b_in =me,me+nvab-1 799 p5b=mod(p5b_in,nvab)+noab+1 800 DO p6b_in=me,me+nvab+noab-p5b 801 p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b 802 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 803 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 804 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 805 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 806 & .eq. irrep_t) THEN 807 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, 808 & p5b_1,p6b_1,h1b_1,h2b_1) 809 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, 810 & p3b_2,p4b_2,p5b_2,p6b_2) 811 dim_common = int_mb(k_range+p5b-1) 812 & * int_mb(k_range+p6b-1) 813 dima = dim_common * dima_sort 814 dimb = dim_common * dimb_sort 815 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 816 CALL GET_HASH_BLOCK(d_a,f_t,dima, 817 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 818 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 819 CALL TCE_SORT_4(f_t,f_a, 820 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 821 & int_mb(k_range+h1b-1),int_mb(k_range+h2b-1), 822 & 4,3,2,1,1.0d0) 823 if(.not.intorb) then 824 CALL GET_HASH_BLOCK(d_b,f_t,dimb, 825 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 826 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 827 & (p3b_2-1))))) 828 else 829 CALL GET_HASH_BLOCK_I(d_b,f_t,dimb, 830 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 831 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 832 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 833 end if 834 CALL TCE_SORT_4(f_t,f_b, 835 & int_mb(k_range+p3b-1),int_mb(k_range+p4b-1), 836 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 837 & 2,1,4,3,1.0d0) 838 if (p5b .eq. p6b) then 839 alpha = 1.0d0 840 else 841 alpha = 2.0d0 842 end if 843 CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common, 844 & alpha,f_a,dim_common,f_b, 845 & dim_common,1.0d0,f_c,dima_sort) 846 END IF 847 END IF 848 END IF 849 END DO 850 END DO 851 CALL TCE_SORT_4(f_c,f_t, 852 & int_mb(k_range+p4b-1),int_mb(k_range+p3b-1), 853 & int_mb(k_range+h2b-1),int_mb(k_range+h1b-1), 854 & 2,1,4,3,0.5d0) 855 CALL ADD_HASH_BLOCK(d_c,f_t,dimc, 856 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 857 & (p4b-noab-1+nvab*(p3b-noab-1))))) 858 next = NXTASK(nprocs, 1) 859 END IF 860 count = count + 1 861 END IF 862 END IF 863 END IF 864 END DO 865 END DO 866 END DO 867 END DO 868 next = NXTASK(-nprocs, 1) 869 call GA_SYNC() 870 deallocate(f_a,stat=e_a) 871 deallocate(f_b,stat=e_b) 872 deallocate(f_c,stat=e_c) 873 deallocate(f_t,stat=e_t) 874 if (e_a.ne.0) call errquit("free a",0,MA_ERR) 875 if (e_b.ne.0) call errquit("free b",1,MA_ERR) 876 if (e_c.ne.0) call errquit("free c",2,MA_ERR) 877 if (e_t.ne.0) call errquit("free t",3,MA_ERR) 878 RETURN 879 END 880 881 882 SUBROUTINE ccsd_t2_8_task_loops(d_a,k_a_offset, 883 & d_b,k_b_offset, 884 & d_c,k_c_offset, 885 & maxh,maxp) 886C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 887C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 888C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 889C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 890 IMPLICIT NONE 891#include "global.fh" 892#include "mafdecls.fh" 893#include "sym.fh" 894#include "errquit.fh" 895#include "tce.fh" 896 integer :: d_a,d_b,d_c 897 integer :: k_a_offset,k_b_offset,k_c_offset 898 integer :: maxh,maxp,dimhhpp,dimpppp,dimtemp 899 integer :: next,nprocs,count 900 integer :: p5b,p6b,p3b,p4b,h1b,h2b 901 integer :: p5b_1,p6b_1,h1b_1,h2b_1 902 integer :: p3b_2,p4b_2,p5b_2,p6b_2 903 integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort 904 double precision, allocatable :: f_a(:) 905 double precision, allocatable :: f_b(:) 906 double precision, allocatable :: f_c(:) 907#ifdef USE_FASTMEM 908 !dec$ attributes fastmem :: f_a,f_b,f_c 909#endif 910 integer :: e_a,e_b,e_c 911 double precision alpha 912 integer p5b_in,p6b_in,me 913 integer NXTASK 914 external NXTASK 915 nprocs = GA_NNODES() 916 count = 0 917 next = NXTASK(nprocs, 1) 918 919 me = ga_nodeid() 920 921 dimhhpp = maxh*maxh*maxp*maxp 922 dimpppp = maxp*maxp*maxp*maxp 923 dimtemp = max(dimpppp,dimhhpp) 924 925 allocate(f_a(1:dimhhpp),stat=e_a) 926 allocate(f_b(1:dimpppp),stat=e_b) 927 allocate(f_c(1:dimhhpp),stat=e_c) 928 if (e_a.ne.0) call errquit("alloc a",dimhhpp,MA_ERR) 929 if (e_b.ne.0) call errquit("alloc b",dimpppp,MA_ERR) 930 if (e_c.ne.0) call errquit("alloc c",dimhhpp,MA_ERR) 931 DO p3b = noab+1,noab+nvab 932 DO p4b = p3b,noab+nvab 933 DO h1b = 1,noab 934 DO h2b = h1b,noab 935 IF ((.not.restricted).or. 936 & ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) 937 & +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN 938 IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. 939 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 940 IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1), 941 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 942 & .eq. ieor(irrep_v,irrep_t)) THEN 943 IF (next.eq.count) THEN 944 dima_sort = int_mb(k_range+h1b-1) 945 & * int_mb(k_range+h2b-1) 946 dimb_sort = int_mb(k_range+p3b-1) 947 & * int_mb(k_range+p4b-1) 948 dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) 949 & * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) 950 CALL DFILL(dimc,0.0d0,f_c,1) 951 DO p5b_in =me,me+nvab-1 952 p5b=mod(p5b_in,nvab)+noab+1 953 DO p6b_in=me,me+nvab+noab-p5b 954 p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b 955 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 956 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 957 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 958 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 959 & .eq. irrep_t) THEN 960 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, 961 & p5b_1,p6b_1,h1b_1,h2b_1) 962 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, 963 & p3b_2,p4b_2,p5b_2,p6b_2) 964 dim_common = int_mb(k_range+p5b-1) 965 & * int_mb(k_range+p6b-1) 966 dima = dim_common * dima_sort 967 dimb = dim_common * dimb_sort 968 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 969 CALL GET_HASH_BLOCK(d_a,f_a,dima, 970 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 971 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 972 if(.not.intorb) then 973 CALL GET_HASH_BLOCK(d_b,f_b,dimb, 974 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 975 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 976 & (p3b_2-1))))) 977 else 978 CALL GET_HASH_BLOCK_I(d_b,f_b,dimb, 979 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 980 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 981 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 982 end if 983 if (p5b .eq. p6b) then 984 alpha = 1.0d0 985 else 986 alpha = 2.0d0 987 end if 988 call t2_p8(int_mb(k_range+h1b-1), 989 & int_mb(k_range+h2b-1), 990 & int_mb(k_range+p3b-1), 991 & int_mb(k_range+p4b-1), 992 & int_mb(k_range+p5b-1), 993 & int_mb(k_range+p6b-1), 994 & f_a,f_b,f_c, 995 & 0.5d0*alpha) 996 END IF 997 END IF 998 END IF 999 END DO 1000 END DO 1001 CALL ADD_HASH_BLOCK(d_c,f_c,dimc, 1002 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 1003 & (p4b-noab-1+nvab*(p3b-noab-1))))) 1004 next = NXTASK(nprocs, 1) 1005 END IF 1006 count = count + 1 1007 END IF 1008 END IF 1009 END IF 1010 END DO 1011 END DO 1012 END DO 1013 END DO 1014 next = NXTASK(-nprocs, 1) 1015 call GA_SYNC() 1016 deallocate(f_a,stat=e_a) 1017 deallocate(f_b,stat=e_b) 1018 deallocate(f_c,stat=e_c) 1019 if (e_a.ne.0) call errquit("free a",0,MA_ERR) 1020 if (e_b.ne.0) call errquit("free b",0,MA_ERR) 1021 if (e_c.ne.0) call errquit("free t",0,MA_ERR) 1022 RETURN 1023 END 1024 1025 1026 integer function ccsd_t2_8_count() 1027 IMPLICIT NONE 1028#include "global.fh" 1029#include "mafdecls.fh" 1030#include "sym.fh" 1031#include "errquit.fh" 1032#include "tce.fh" 1033 integer :: n 1034 integer :: p5b,p6b,p3b,p4b,h1b,h2b 1035 n = 0 1036 DO p3b = noab+1,noab+nvab 1037 DO p4b = p3b,noab+nvab 1038 DO h1b = 1,noab 1039 DO h2b = h1b,noab 1040 IF ((.not.restricted).or. 1041 & ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) 1042 & +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN 1043 IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. 1044 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 1045 IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1), 1046 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 1047 & .eq. ieor(irrep_v,irrep_t)) THEN 1048 !DO p5b = noab+1,noab+nvab 1049 ! DO p6b = p5b,noab+nvab 1050 ! IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 1051 & ! int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 1052 ! IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 1053 & ! ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 1054 & ! .eq. irrep_t) THEN 1055 n = n+1 1056 ! END IF 1057 ! END IF 1058 ! END DO 1059 !END DO 1060 END IF 1061 END IF 1062 END IF 1063 END DO 1064 END DO 1065 END DO 1066 END DO 1067 ccsd_t2_8_count = n 1068 RETURN 1069 END 1070 1071 subroutine ccsd_t2_8_make_list(num_tasks, task_list) 1072 IMPLICIT NONE 1073#include "global.fh" 1074#include "mafdecls.fh" 1075#include "sym.fh" 1076#include "errquit.fh" 1077#include "tce.fh" 1078 integer, intent(in) :: num_tasks 1079 integer, intent(inout) :: task_list(4,num_tasks) 1080 integer :: p5b,p6b,p3b,p4b,h1b,h2b 1081 integer :: i 1082 i = 0 1083 DO p3b = noab+1,noab+nvab 1084 DO p4b = p3b,noab+nvab 1085 DO h1b = 1,noab 1086 DO h2b = h1b,noab 1087 IF ((.not.restricted).or. 1088 & ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) 1089 & +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN 1090 IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. 1091 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 1092 IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1), 1093 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 1094 & .eq. ieor(irrep_v,irrep_t)) THEN 1095 !DO p5b = noab+1,noab+nvab 1096 ! DO p6b = p5b,noab+nvab 1097 ! IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 1098 & ! int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 1099 ! IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 1100 & ! ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 1101 & ! .eq. irrep_t) THEN 1102 i = i + 1 1103 task_list(1,i) = p3b 1104 task_list(2,i) = p4b 1105 task_list(3,i) = h1b 1106 task_list(4,i) = h2b 1107 ! task_list(5,i) = p5b 1108 ! task_list(6,i) = p6b 1109 ! END IF 1110 ! END IF 1111 ! END DO 1112 !END DO 1113 END IF 1114 END IF 1115 END IF 1116 END DO 1117 END DO 1118 END DO 1119 END DO 1120 RETURN 1121 END 1122 1123#if defined(USE_OPENMP) && defined(USE_OPENMP_TASKS) 1124 1125 SUBROUTINE ccsd_t2_8_loops_exec(d_a,k_a_offset, 1126 & d_b,k_b_offset, 1127 & d_c,k_c_offset, 1128 & maxh,maxp, 1129 & num_tasks,task_list) 1130 IMPLICIT NONE 1131#include "global.fh" 1132#include "mafdecls.fh" 1133#include "sym.fh" 1134#include "errquit.fh" 1135#include "tce.fh" 1136 integer, intent(in) :: d_a,d_b,d_c 1137 integer, intent(in) :: k_a_offset,k_b_offset,k_c_offset 1138 integer, intent(in) :: maxh,maxp 1139 integer, intent(in) :: num_tasks 1140 integer, intent(in) :: task_list(4,num_tasks) 1141 integer :: dimhhpp,dimpppp 1142 integer :: p5b,p6b,p3b,p4b,h1b,h2b 1143 integer :: p5b_1,p6b_1,h1b_1,h2b_1 1144 integer :: p3b_2,p4b_2,p5b_2,p6b_2 1145 integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort 1146 double precision, allocatable :: f_a(:) 1147 double precision, allocatable :: f_b(:) 1148 double precision, allocatable :: f_c(:) 1149#ifdef USE_FASTMEM 1150 !dec$ attributes fastmem :: f_a,f_b,f_c 1151#endif 1152 integer :: e_a,e_b,e_c 1153 double precision :: alpha 1154 integer :: p5b_in,p6b_in 1155 integer :: me,np 1156 integer :: i 1157 1158 me = ga_nodeid() 1159 np = ga_nnodes() 1160 1161 dimhhpp = maxh*maxh*maxp*maxp 1162 dimpppp = maxp*maxp*maxp*maxp 1163!$omp parallel private(f_a,f_b,f_c,e_a,e_b,e_c) 1164 allocate(f_a(1:dimhhpp),stat=e_a) 1165 allocate(f_b(1:dimpppp),stat=e_b) 1166 allocate(f_c(1:dimhhpp),stat=e_c) 1167 if (e_a.ne.0) call errquit("alloc a",dimhhpp,MA_ERR) 1168 if (e_b.ne.0) call errquit("alloc b",dimpppp,MA_ERR) 1169 if (e_c.ne.0) call errquit("alloc c",dimhhpp,MA_ERR) 1170!$omp master 1171 do i = 1, num_tasks 1172 if (mod(i,np).eq.me) then 1173!$omp task private(p3b,p4b,h1b,h2b,p5b_in,p5b,p6b_in,p6b) 1174!$omp& private(p5b_1,p6b_1,h1b_1,h2b_1,p3b_2,p4b_2,p5b_2,p6b_2) 1175!$omp& private(dima,dimb,dimc,dim_common,dima_sort,dimb_sort) 1176!$omp& private(alpha) 1177 p3b = task_list(1,i) 1178 p4b = task_list(2,i) 1179 h1b = task_list(3,i) 1180 h2b = task_list(4,i) 1181 dima_sort = int_mb(k_range+h1b-1) 1182 & * int_mb(k_range+h2b-1) 1183 dimb_sort = int_mb(k_range+p3b-1) 1184 & * int_mb(k_range+p4b-1) 1185 dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) 1186 & * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) 1187 CALL DFILL(dimc,0.0d0,f_c,1) 1188 DO p5b_in =me,me+nvab-1 1189 p5b=mod(p5b_in,nvab)+noab+1 1190 DO p6b_in=me,me+nvab+noab-p5b 1191 p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b 1192 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 1193 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 1194 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 1195 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 1196 & .eq. irrep_t) THEN 1197 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, 1198 & p5b_1,p6b_1,h1b_1,h2b_1) 1199 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, 1200 & p3b_2,p4b_2,p5b_2,p6b_2) 1201 dim_common = int_mb(k_range+p5b-1) 1202 & * int_mb(k_range+p6b-1) 1203 dima = dim_common * dima_sort 1204 dimb = dim_common * dimb_sort 1205 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 1206 CALL GET_HASH_BLOCK_R(d_a,f_a,dima, 1207 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 1208 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 1209 if(.not.intorb) then 1210 CALL GET_HASH_BLOCK_R(d_b,f_b,dimb, 1211 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 1212 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 1213 & (p3b_2-1))))) 1214 else 1215 CALL GET_HASH_BLOCK_I_R(d_b,f_b,dimb, 1216 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 1217 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 1218 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 1219 end if 1220 if (p5b .eq. p6b) then 1221 alpha = 1.0d0 1222 else 1223 alpha = 2.0d0 1224 end if 1225 call t2_p8(int_mb(k_range+h1b-1), 1226 & int_mb(k_range+h2b-1), 1227 & int_mb(k_range+p3b-1), 1228 & int_mb(k_range+p4b-1), 1229 & int_mb(k_range+p5b-1), 1230 & int_mb(k_range+p6b-1), 1231 & f_a,f_b,f_c, 1232 & 0.5d0*alpha) 1233 END IF 1234 END IF 1235 END IF 1236 END DO 1237 END DO 1238 CALL ADD_HASH_BLOCK_R(d_c,f_c,dimc, 1239 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 1240 & (p4b-noab-1+nvab*(p3b-noab-1))))) 1241!$omp end task 1242 endif 1243 enddo 1244!$omp taskwait 1245 call GA_SYNC() 1246!$omp end master 1247 deallocate(f_a,stat=e_a) 1248 deallocate(f_b,stat=e_b) 1249 deallocate(f_c,stat=e_c) 1250 if (e_a.ne.0) call errquit("free a",0,MA_ERR) 1251 if (e_b.ne.0) call errquit("free b",1,MA_ERR) 1252 if (e_c.ne.0) call errquit("free c",2,MA_ERR) 1253!$omp end parallel 1254 RETURN 1255 END 1256 1257 1258 SUBROUTINE ccsd_t2_8_loops_driver(d_a,k_a_offset, 1259 & d_b,k_b_offset, 1260 & d_c,k_c_offset, 1261 & maxh,maxp) 1262C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 1263C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 1264C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 1265C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 1266 IMPLICIT NONE 1267#include "global.fh" 1268#include "mafdecls.fh" 1269#include "sym.fh" 1270#include "errquit.fh" 1271#include "tce.fh" 1272 integer :: d_a,d_b,d_c 1273 integer :: k_a_offset,k_b_offset,k_c_offset 1274 integer :: maxh,maxp 1275 integer :: num_tasks 1276 integer, allocatable :: task_list(:,:) 1277 integer :: e_tl 1278 integer :: ccsd_t2_8_count 1279 external :: ccsd_t2_8_count 1280 num_tasks = ccsd_t2_8_count() 1281 allocate(task_list(4,1:num_tasks),stat=e_tl) 1282 if (e_tl.ne.0) call errquit("alloc task_list",num_tasks,MA_ERR) 1283 call ccsd_t2_8_make_list(num_tasks, task_list) 1284 call ccsd_t2_8_loops_exec(d_a,k_a_offset, 1285 & d_b,k_b_offset, 1286 & d_c,k_c_offset, 1287 & maxh,maxp, 1288 & num_tasks,task_list) 1289 deallocate(task_list,stat=e_tl) 1290 if (e_tl.ne.0) call errquit("free task_list",num_tasks,MA_ERR) 1291 RETURN 1292 END 1293 1294 1295 SUBROUTINE ccsd_t2_8_dgemm_exec(d_a,k_a_offset, 1296 & d_b,k_b_offset, 1297 & d_c,k_c_offset, 1298 & maxh,maxp, 1299 & num_tasks,task_list) 1300 IMPLICIT NONE 1301#include "global.fh" 1302#include "mafdecls.fh" 1303#include "sym.fh" 1304#include "errquit.fh" 1305#include "tce.fh" 1306 integer, intent(in) :: d_a,d_b,d_c 1307 integer, intent(in) :: k_a_offset,k_b_offset,k_c_offset 1308 integer, intent(in) :: maxh,maxp 1309 integer, intent(in) :: num_tasks 1310 integer, intent(in) :: task_list(4,num_tasks) 1311 integer :: dimhhpp,dimpppp,dimtemp 1312 integer :: p5b,p6b,p3b,p4b,h1b,h2b 1313 integer :: p5b_1,p6b_1,h1b_1,h2b_1 1314 integer :: p3b_2,p4b_2,p5b_2,p6b_2 1315 integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort 1316 double precision, allocatable :: f_a(:) 1317 double precision, allocatable :: f_b(:) 1318 double precision, allocatable :: f_c(:) 1319 double precision, allocatable :: f_t(:) 1320#ifdef USE_FASTMEM 1321 !dec$ attributes fastmem :: f_a,f_b,f_c,f_t 1322#endif 1323 integer :: e_a,e_b,e_c,e_t 1324 double precision :: alpha 1325 integer :: p5b_in,p6b_in 1326 integer :: me,np 1327 integer :: i 1328 1329 me = ga_nodeid() 1330 np = ga_nnodes() 1331 1332 dimhhpp = maxh*maxh*maxp*maxp 1333 dimpppp = maxp*maxp*maxp*maxp 1334 dimtemp = max(dimpppp,dimhhpp) 1335!$omp parallel private(f_a,f_b,f_c,f_t,e_a,e_b,e_c,e_t) 1336 allocate(f_a(1:dimhhpp),stat=e_a) 1337 allocate(f_b(1:dimpppp),stat=e_b) 1338 allocate(f_c(1:dimhhpp),stat=e_c) 1339 allocate(f_t(1:dimtemp),stat=e_t) 1340 if (e_a.ne.0) call errquit("MA a",dimhhpp,MA_ERR) 1341 if (e_b.ne.0) call errquit("MA b",dimpppp,MA_ERR) 1342 if (e_c.ne.0) call errquit("MA c",dimhhpp,MA_ERR) 1343 if (e_t.ne.0) call errquit("MA t",dimhhpp,MA_ERR) 1344!$omp master 1345 do i = 1, num_tasks 1346 if (mod(i,np).eq.me) then 1347!$omp task private(p3b,p4b,h1b,h2b,p5b_in,p5b,p6b_in,p6b) 1348!$omp& private(p5b_1,p6b_1,h1b_1,h2b_1,p3b_2,p4b_2,p5b_2,p6b_2) 1349!$omp& private(dima,dimb,dimc,dim_common,dima_sort,dimb_sort) 1350!$omp& private(alpha) 1351 p3b = task_list(1,i) 1352 p4b = task_list(2,i) 1353 h1b = task_list(3,i) 1354 h2b = task_list(4,i) 1355 dima_sort = int_mb(k_range+h1b-1) 1356 & * int_mb(k_range+h2b-1) 1357 dimb_sort = int_mb(k_range+p3b-1) 1358 & * int_mb(k_range+p4b-1) 1359 dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) 1360 & * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) 1361 CALL DFILL(dimc,0.0d0,f_c,1) 1362 DO p5b_in =me,me+nvab-1 1363 p5b=mod(p5b_in,nvab)+noab+1 1364 DO p6b_in=me,me+nvab+noab-p5b 1365 p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b 1366 IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. 1367 & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN 1368 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), 1369 & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) 1370 & .eq. irrep_t) THEN 1371 CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, 1372 & p5b_1,p6b_1,h1b_1,h2b_1) 1373 CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, 1374 & p3b_2,p4b_2,p5b_2,p6b_2) 1375 dim_common = int_mb(k_range+p5b-1) 1376 & * int_mb(k_range+p6b-1) 1377 dima = dim_common * dima_sort 1378 dimb = dim_common * dimb_sort 1379 IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN 1380 CALL GET_HASH_BLOCK_R(d_a,f_t,dima, 1381 & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* 1382 & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) 1383 CALL TCE_SORT_4(f_t,f_a, 1384 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 1385 & int_mb(k_range+h1b-1),int_mb(k_range+h2b-1), 1386 & 4,3,2,1,1.0d0) 1387 if(.not.intorb) then 1388 CALL GET_HASH_BLOCK_R(d_b,f_t,dimb, 1389 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 1390 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 1391 & (p3b_2-1))))) 1392 else 1393 CALL GET_HASH_BLOCK_I_R(d_b,f_t,dimb, 1394 & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* 1395 & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* 1396 & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) 1397 end if 1398 CALL TCE_SORT_4(f_t,f_b, 1399 & int_mb(k_range+p3b-1),int_mb(k_range+p4b-1), 1400 & int_mb(k_range+p5b-1),int_mb(k_range+p6b-1), 1401 & 2,1,4,3,1.0d0) 1402 if (p5b .eq. p6b) then 1403 alpha = 1.0d0 1404 else 1405 alpha = 2.0d0 1406 end if 1407 CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common, 1408 & alpha,f_a,dim_common,f_b, 1409 & dim_common,1.0d0,f_c,dima_sort) 1410 END IF 1411 END IF 1412 END IF 1413 END DO 1414 END DO 1415 CALL TCE_SORT_4(f_c,f_t, 1416 & int_mb(k_range+p4b-1),int_mb(k_range+p3b-1), 1417 & int_mb(k_range+h2b-1),int_mb(k_range+h1b-1), 1418 & 2,1,4,3,0.5d0) 1419 CALL ADD_HASH_BLOCK_R(d_c,f_t,dimc, 1420 & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* 1421 & (p4b-noab-1+nvab*(p3b-noab-1))))) 1422!$omp end task 1423 endif 1424 enddo 1425!$omp taskwait 1426 call GA_SYNC() 1427!$omp end master 1428 deallocate(f_a,stat=e_a) 1429 deallocate(f_b,stat=e_b) 1430 deallocate(f_c,stat=e_c) 1431 if (e_a.ne.0) call errquit("MA pops a",0,MA_ERR) 1432 if (e_b.ne.0) call errquit("MA pops b",1,MA_ERR) 1433 if (e_c.ne.0) call errquit("MA pops c",2,MA_ERR) 1434!$omp end parallel 1435 RETURN 1436 END 1437 1438 1439 SUBROUTINE ccsd_t2_8_dgemm_driver(d_a,k_a_offset, 1440 & d_b,k_b_offset, 1441 & d_c,k_c_offset, 1442 & maxh,maxp) 1443C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ 1444C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 1445C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 1446C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v 1447 IMPLICIT NONE 1448#include "global.fh" 1449#include "mafdecls.fh" 1450#include "sym.fh" 1451#include "errquit.fh" 1452#include "tce.fh" 1453 integer :: d_a,d_b,d_c 1454 integer :: k_a_offset,k_b_offset,k_c_offset 1455 integer :: maxh,maxp 1456 integer :: num_tasks 1457 integer, allocatable :: task_list(:,:) 1458 integer :: e_tl 1459 integer :: ccsd_t2_8_count 1460 external :: ccsd_t2_8_count 1461 num_tasks = ccsd_t2_8_count() 1462 allocate(task_list(4,1:num_tasks),stat=e_tl) 1463 if (e_tl.ne.0) call errquit("alloc task_list",num_tasks,MA_ERR) 1464 call ccsd_t2_8_make_list(num_tasks, task_list) 1465 call ccsd_t2_8_dgemm_exec(d_a,k_a_offset, 1466 & d_b,k_b_offset, 1467 & d_c,k_c_offset, 1468 & maxh,maxp, 1469 & num_tasks,task_list) 1470 deallocate(task_list,stat=e_tl) 1471 if (e_tl.ne.0) call errquit("free task_list",num_tasks,MA_ERR) 1472 RETURN 1473 END 1474 1475#endif 1476