1C::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 2C NAME 3C CCDen_2PDMoooo -- Form coupled cluster 2-particle density matrix 4C 5C REVISION 6C $Id$ 7C 8C SYNOPSIS 9 Subroutine CCDen_2PDMoooo(NOcc, NVir, T1, LDT1, g_T2, 10 $ g_Z2, g_Q, g_G) 11 Implicit NONE 12#include "errquit.fh" 13 Integer NOcc, NVir, LDT1 14 Double Precision T1(LDT1, NVir) 15 Integer g_T2, g_Z2, g_Q, g_G 16C 17C ARGUMENTS 18C DESCRIPTION 19C Compute the oooo part of the coupled cluster 2-PDM 20C 21C REFERENCES 22C Alistair P. Rendell and Timothy J. Lee, J. Chem. Phys. 94, 23C 6219--6228 (1991). Especially Eq. 46 24C 25C INCLUDE FILES 26#include "global.fh" 27#include "mafdecls.fh" 28#include "stdio.fh" 29C 30C EXTERNAL ROUTINES 31C::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 32C LOCAL VARIABLES 33 Integer ILo, IHi, JLo, JHi, I, J, K 34 Integer h_Qi, i_Qi 35 Double precision Qij 36 Integer g_Tau 37C 38C Clear the result 39C 40 Call GA_Zero(g_G) 41C 42C Find out what portion of G is "local" 43C 44 Call CCSD_T2_MyIJ(GA_NodeID(), NOcc, g_G, ILo, IHi, JLo, JHi) 45C 46C ********************************************************* 47C * G(i,j,k,l) = 1/8 P(i,j,k,l) {1/2 [TauZ](i,k,j,l) * 48C * + 2 delta(k,l) ( Q(i,j) - delta(i,j) ) * 49C * - delta(j,l) ( Q(i,k) - delta(i,k) ) } * 50C * APR & TJL Eq. 46 * 51C ********************************************************* 52C 53C *********************** 54C * 1/2 [TauZ](i,j,k,l) * 55C *********************** 56C 57 If ( .NOT. GA_Create(MT_Dbl, NVir*NOcc, NVir*NOcc, 58 $ 'CCDen Tau', NVir, NVir, G_Tau) ) Call ErrQuit( 59 $ 'CCDen_2PDMoooo: unable to allocate Tau', 60 $ NVir*NOcc*NVir*NOcc, GA_ERR) 61C 62 Call CCSD_MkTau_GA(NOcc, NVir, T1, LDT1, g_T2, g_Tau) 63 Call CCDen_tZoooo(NOcc, NVir, g_Tau, g_Z2, g_G) 64 Call GA_Scale(g_G, 0.50d0) 65C 66 If (.NOT. GA_Destroy(g_Tau) ) Call ErrQuit( 67 $ 'CCDen_2PDMoooo: unable to free Tau', 0, GA_ERR) 68C 69C Let's do the 1-PDM terms. Note that the data distribution is 70C such that each node has complete kl blocks for a particular subset 71C of i and j. 72C 73C Note too that the data distribution does not guarantee strict 74C locality -- the edge portion of our patch (determined by 75C CCSD_T2_MyIJ) may in fact be remote. Thus it is possible this 76C implementation is a bit too naive for good performance. 77C With a little work, it can be made to do fewer comms by using 78C a ga_scatter or other mechanism. 79C 80C **************************************** 81C * 2 delta(k,l) ( Q(i,j) - delta(i,j) ) * 82C **************************************** 83C Place the Q(i,j) element all along the diagonal of the block (k=l). 84C 85 Do I = ILo, IHi 86 Do J = JLo, JHi 87C 88C Get the appropriate element of Q for this ij block 89C 90 Call GA_Get(g_Q, I, I, J, J, Qij, 1) 91 If ( I .eq. J ) Qij = Qij - 1.0d0 92C 93C Now plop it into the diagonal elements of this block 94C 95 Do K = 1, NOcc 96 Call GA_Acc(g_G, (I-1)*NOcc+K, (I-1)*NOcc+K, 97 $ (J-1)*NOcc+K, (J-1)*NOcc+K, Qij, 1, 2.0d0) 98 EndDo 99 EndDo 100 EndDo 101C 102C ************************************** 103C * delta(j,l) ( Q(i,k) - delta(i,k) ) * 104C ************************************** 105C Put the Q(i,*) row into one one column (j=l) of the block. 106C 107 If (.NOT. MA_Push_Get(MT_Dbl, NOcc, 'CCDen Scr', 108 $ H_Qi, I_Qi) ) Call ErrQuit( 109 $ 'CCDen_2PDMoooo: unable to allocate local Qi column', NOcc, 110 & MA_ERR) 111C 112 Do I = ILo, IHi 113C 114C This column of Q goes into one column in each J 115C 116 Call GA_Get(g_Q, I, I, 1, NOcc, Dbl_MB(I_Qi), 1) 117C 118C Handle the delta(i,k) term 119C 120 Dbl_MB(I_Qi + I - 1) = Dbl_MB(I_Qi + I - 1) - 1.0d0 121C 122 Do J = JLo, JHi 123 Call GA_Acc(g_G, (I-1)*NOcc+1, I*NOcc, 124 $ (J-1)*NOcc+J, (J-1)*NOcc+J, Dbl_MB(i_Qi), 1, 125 $ -1.0d0) 126 EndDo 127 EndDo 128C 129 If ( .NOT. MA_Pop_Stack(H_Qi) ) Call ErrQuit( 130 $ 'CCDen_2PDMoooo: unable to free Qi', NOcc, MA_ERR) 131C 132C 133C 134 Return 135 End 136C::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 137C NAME 138C CCDen_tZoooo -- Form [tZ] intermediate for 2PDM 139C 140C REVISION 141C $Id$ 142C 143C SYNOPSIS 144 Subroutine CCDen_tZoooo(NOcc, NVir, g_t, g_Z, g_tZ) 145 Implicit NONE 146#include "errquit.fh" 147 Integer NOcc, NVir 148 Integer g_t, g_Z, g_tZ 149C 150C ARGUMENTS 151C DESCRIPTION 152C Computes the [tZ](i,j,k,l) intermediate 153C 154C In paper, the term is defined as (Eq. 58) 155C [tZ](i,j,k,l) = sum(a>=b) t(i,j,a,b)(+) Z(k,l,a,b)(+) 156C + sum(a>b) t(i,j,a,b)(-) Z(k,l,a,b)(-) 157C with (Eq. 66--68) 158C Z(k,l,a,b)(+/-) = 1/2 [ Z(k,l,a,b) +/- Z(l,k,a,b) ] 159C t(i,j,a,b)(+/-) = 1/2 [ t(i,j,a,b) +/- t(j,i,a,b) ] 160C t(i,j,a,a)(+) = 1/2 [ t(i,j,a,a) ] 161C 162C This can be simplified to 163C [tZ](i,j,k,l) = 1/4 sum(a) [ t(i,j,a,a) {Z(k,l,a,a) + Z(l,k,a,a)} ] 164C + 1/2 sum(a>b) [ t(i,j,a,b) Z(k,l,a,b) 165C + t(j,i,a,b) Z(l,k,a,b) ] 166C 167C Note that [tZ](i,j,k,l) = [tZ](j,i,l,k) 168C 169C REFERENCES 170C Alistair P. Rendell and Timothy J. Lee, J. Chem. Phys. 94, 171C 6219--6228 (1991). Especially Eq. 58, 66-68. 172C 173C INCLUDE FILES 174#include "global.fh" 175#include "mafdecls.fh" 176#include "stdio.fh" 177#include "util.fh" 178C 179C EXTERNAL ROUTINES 180C::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 181C LOCAL VARIABLES 182 Integer I, J, K, L, ILo, IHi, JLo, JHi, KLo, KHi, LLo, LHi, A, B 183 Integer Sorta_Node, Node 184 Integer H_Scr1, I_Scr1, H_Scr2, I_Scr2 185 Double precision tZ 186 Logical oprint 187C 188C Get print information 189C 190 oprint = util_print('information',print_low) 191C 192C Find out what portion of t is "local" 193C 194 Call CCSD_T2_MyIJ(GA_NodeID(), NVir, g_t, ILo, IHi, JLo, JHi) 195C 196 If (oprint) Write (LuOut, *) 'CCDen_tZoooo ', GA_NodeID(), 197 $ ': T2 region (', ILo, ':', IHi, ', ', JLo, ':', JHi, ')' 198C 199C Allocate scratch space 200C 201 If (.NOT. MA_Push_Get(MT_Dbl, NVir*NVir, 202 $ 'CCDen Scratch 1', H_Scr1, I_Scr1)) Call ErrQuit( 203 $ 'CCDen_tZoooo: unable to allocate scratch', NVir*NVir, MA_ERR) 204 If (.NOT. MA_Push_Get(MT_Dbl, NVir*NVir, 205 $ 'CCDen Scratch 1', H_Scr2, I_Scr2)) Call ErrQuit( 206 $ 'CCDen_tZoooo: unable to allocate scratch', NVir*NVir, MA_ERR) 207C 208C Basically, we get blocks of t and Z and dot-product them to 209C produce an element [tZ]. For the local chunk of t, contract 210C with all blocks of Z. The comms can probably be handled smarter. 211C Should check if this is a problem or not. 212C 213C 214C Loop over processors. Start with ours and work around the 215C list to avoid everyone pounding on the same processor at once. 216C 217 Do Sorta_Node = GA_NodeID(), GA_NNodes() + GA_NodeID() - 1 218 Node = Mod(Sorta_Node, GA_NNodes() ) 219C 220C Figure out what Z patches are on the node and loop over them 221C 222 Call CCSD_T2_MyIJ(Node, NVir, g_Z, KLo, KHi, LLo, LHi) 223 If (oprint) Write (LuOut, *) 'CCDen_tZoooo ', GA_NodeID(), 224 $ ': Z2 region (', KLo, ':', KHi, ', ', LLo, ':', LHi, ')' 225C 226 Do K = KLo, KHi 227 Do L = LLo, LHi 228 Call GA_Get(g_Z, (K-1)*NVir+1, K*NVir, (L-1)*NVir+1, 229 $ L*NVir, Dbl_MB(I_Scr1), NVir) 230C 231C Contract this Z block with each local t block 232C 233 Do I = ILo, IHi 234 Do J = JLo, JHi 235 Call GA_Get(g_t, (I-1)*NVir+1, I*NVir, 236 $ (J-1)*NVir+1, J*NVir, Dbl_MB(I_Scr2), NVir) 237C 238 tZ = 0.0d0 239 Do A = 1, NVir 240 Do B = 1, A-1 241 tZ = tZ + Dbl_MB(i_Scr1 + (B-1)*NVir + A - 1) 242 $ * Dbl_MB(i_Scr2 + (B-1)*NVir + A - 1) 243 EndDo 244C 245C Now handle diagonal. Note factor of 1/2 here. 246C Additional 1/2 applied to final result below. 247C 248 tZ = tZ + 0.5d0 249 $ * Dbl_MB(i_Scr1 + (A-1)*NVir + A - 1) 250 $ * Dbl_MB(i_Scr2 + (A-1)*NVir + A - 1) 251 EndDo 252C 253C Don't forget to apply factor of 1/2 254C 255 tZ = 0.5d0 * tZ 256C 257C Put this in [tZ](i,j,k,l) and [tZ](j,i,l,k) 258C 259 Call GA_Acc(g_tZ, (I-1)*NOcc+K, (I-1)*NOcc+K, 260 $ (J-1)*NOcc+L, (J-1)*NOcc+L, tZ, 1, 1.0d0) 261 Call GA_Acc(g_tZ, (J-1)*NOcc+L, (J-1)*NOcc+L, 262 $ (I-1)*NOcc+K, (I-1)*NOcc+K, tZ, 1, 1.0d0) 263 264 EndDo 265 EndDo 266C 267 EndDo 268 EndDo 269C 270 EndDo ! Nodes 271C 272C Clean up scratch 273C 274 If (.NOT. MA_Pop_Stack(H_Scr2) ) Call ErrQuit( 275 $ 'CCDen_tZoooo: unable to free scratch', 0, MA_ERR) 276 If (.NOT. MA_Pop_Stack(H_Scr1) ) Call ErrQuit( 277 $ 'CCDen_tZoooo: unable to free scratch', 0, MA_ERR) 278C 279C 280C 281 Return 282 End 283C::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 284C NAME 285C CCDen_tZvvvv -- Form [tZ] intermediate for 2PDM 286C 287C REVISION 288C $Id$ 289C 290C SYNOPSIS 291 Subroutine CCDen_tZvvvv(NOcc, NVir, g_t, g_Z, g_tZ) 292 Implicit NONE 293#include "errquit.fh" 294 Integer NOcc, NVir 295 Integer g_t, g_Z, g_tZ 296C 297C ARGUMENTS 298C DESCRIPTION 299C Computes the [tZ](a,b,c,d) intermediate 300C 301C In paper, the term is defined as (Eq. 59) 302C [tZ](a,b,c,d) = sum(i>=j) t(i,j,a,b)(+) Z(i,j,c,d)(+) 303C + sum(i>j) t(i,j,a,b)(-) Z(i,j,c,d)(-) 304C with (Eq. 69--71) 305C Z(i,j,c,d)(+/-) = 1/2 [ Z(i,j,c,d) +/- Z(i,j,d,c) ] 306C t(i,j,a,b)(+/-) = 1/2 [ t(i,j,a,b) +/- t(j,i,a,b) ] 307C t(i,i,a,b)(+) = 1/2 [ t(i,i,a,b) ] 308C 309C This can be simplified to 310C [tZ](a,b,c,d) = 1/4 sum(i) [ t(i,i,a,b) {Z(i,j,c,d) + Z(i,j,d,c)} ] 311C + 1/2 sum(i>j) [ t(i,j,a,b) Z(i,j,c,d) 312C + t(j,i,a,b) Z(i,j,d,c) ] 313C 314C Note that [tZ](a,b,c,d) = [tZ](b,a,d,c) 315C 316C REFERENCES 317C Alistair P. Rendell and Timothy J. Lee, J. Chem. Phys. 94, 318C 6219--6228 (1991). Especially Eq. 59, 69--71 319C 320C INCLUDE FILES 321#include "global.fh" 322#include "mafdecls.fh" 323#include "stdio.fh" 324#include "util.fh" 325C 326C EXTERNAL ROUTINES 327C::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 328C LOCAL VARIABLES 329 Integer I, J, K, L, ILo, IHi, JLo, JHi, KLo, KHi, LLo, LHi, A, B 330 Integer Sorta_Node, Node 331 Integer H_Scr1, I_Scr1, H_Scr2, I_Scr2 332 Double precision tZ 333 Logical oprint 334C 335C Get print information 336C 337 oprint = util_print('information',print_low) 338C 339C Find out what portion of t is "local" 340C 341 Call CCSD_T2_MyIJ(GA_NodeID(), NVir, g_t, ILo, IHi, JLo, JHi) 342C 343 If (oprint) Write (LuOut, *) 'CCDen_tZvvvv ', GA_NodeID(), 344 $ ': T2 region (', ILo, ':', IHi, ', ', JLo, ':', JHi, ')' 345C 346C Allocate scratch space 347C 348 If (.NOT. MA_Push_Get(MT_Dbl, NVir*NVir, 349 $ 'CCDen Scratch 1', H_Scr1, I_Scr1)) Call ErrQuit( 350 $ 'CCDen_tZvvvv: unable to allocate scratch', NVir*NVir, 351 & MA_ERR) 352 If (.NOT. MA_Push_Get(MT_Dbl, NVir*NVir, 353 $ 'CCDen Scratch 1', H_Scr2, I_Scr2)) Call ErrQuit( 354 $ 'CCDen_tZvvvv: unable to allocate scratch', NVir*NVir, 355 & MA_ERR) 356C 357C Basically, we get blocks of t and Z and dot-product them to 358C produce an element [tZ]. For the local chunk of t, contract 359C with all blocks of Z. The comms can probably be handled smarter. 360C Should check if this is a problem or not. 361C 362C 363C Loop over processors. Start with ours and work around the 364C list to avoid everyone pounding on the same processor at once. 365C 366 Do Sorta_Node = GA_NodeID(), GA_NNodes() + GA_NodeID() - 1 367 Node = Mod(Sorta_Node, GA_NNodes() ) 368C 369C Figure out what Z patches are on the node and loop over them 370C 371 Call CCSD_T2_MyIJ(Node, NVir, g_Z, KLo, KHi, LLo, LHi) 372 If (oprint) Write (LuOut, *) 'CCDen_tZvvvv ', GA_NodeID(), 373 $ ': Z2 region (', KLo, ':', KHi, ', ', LLo, ':', LHi, ')' 374C 375 Do K = KLo, KHi 376 Do L = LLo, LHi 377 Call GA_Get(g_Z, (K-1)*NVir+1, K*NVir, (L-1)*NVir+1, 378 $ L*NVir, Dbl_MB(I_Scr1), NVir) 379C 380C Contract this Z block with each local t block 381C 382 Do I = ILo, IHi 383 Do J = JLo, JHi 384 Call GA_Get(g_t, (I-1)*NVir+1, I*NVir, 385 $ (J-1)*NVir+1, J*NVir, Dbl_MB(I_Scr2), NVir) 386C 387 tZ = 0.0d0 388 Do A = 1, NVir 389 Do B = 1, A-1 390 tZ = tZ + Dbl_MB(i_Scr1 + (B-1)*NVir + A - 1) 391 $ * Dbl_MB(i_Scr2 + (B-1)*NVir + A - 1) 392 EndDo 393C 394C Now handle diagonal. Note factor of 1/2 here. 395C Additional 1/2 applied to final result below. 396C 397 tZ = tZ + 0.5d0 398 $ * Dbl_MB(i_Scr1 + (A-1)*NVir + A - 1) 399 $ * Dbl_MB(i_Scr2 + (A-1)*NVir + A - 1) 400 EndDo 401C 402C Don't forget to apply factor of 1/2 403C 404 tZ = 0.5d0 * tZ 405C 406C Put this in [tZ](i,j,k,l) and [tZ](j,i,l,k) 407C 408 Call GA_Acc(g_tZ, (I-1)*NOcc+K, (I-1)*NOcc+K, 409 $ (J-1)*NOcc+L, (J-1)*NOcc+L, tZ, 1, 1.0d0) 410 Call GA_Acc(g_tZ, (J-1)*NOcc+L, (J-1)*NOcc+L, 411 $ (I-1)*NOcc+K, (I-1)*NOcc+K, tZ, 1, 1.0d0) 412 413 EndDo 414 EndDo 415C 416 EndDo 417 EndDo 418C 419 EndDo ! Nodes 420C 421C Clean up scratch 422C 423 If (.NOT. MA_Pop_Stack(H_Scr2) ) Call ErrQuit( 424 $ 'CCDen_tZvvvv: unable to free scratch', 0, MA_ERR) 425 If (.NOT. MA_Pop_Stack(H_Scr1) ) Call ErrQuit( 426 $ 'CCDen_tZvvvv: unable to free scratch', 0, MA_ERR) 427C 428C 429C 430 Return 431 End 432