1c===============================================================c 2c c 3c NCC - a New Coupled-Cluster code for NWChem c 4c c 5c Developed by: c 6c c 7c Jeff R. Hammond c 8c Leadership Computing Facility c 9c Argonne National Laboratory c 10c jhammond@mcs.anl.gov c 11c c 12c Karol Kowalski c 13c Environmental Molecular Sciences Laboratory c 14c Pacific Northwest National Laboratory c 15c karol.kowalski@pnl.gov c 16c c 17c Marta Włoch c 18c Department of Chemistry c 19c Michigan Technological University c 20c wloch@mtu.edu c 21c c 22c===============================================================c 23c 24#ifdef VACUUM 25#define DEBUG_PRINT 26 program nwchem 27 implicit none 28 integer x2info(2) 29 integer nocc, nvir, tszocc, tszvir 30 character*(20) label 31c 32 nocc = 13 33 nvir = 57 34 tszocc = 7 35 tszvir = 17 36c 37 call ncc_doubles_create_aa(x2info, ! output array 38 1 label, ! character name for this array 39 2 nocc, ! number of occupied orbitals 40 3 nvir, ! number of virtual orbitals 41 4 tszocc, ! tilesize of occupied orbitals 42 5 tszvir) ! tilesize of virtual orbitals 43c 44 end 45c 46c this removes the dependency on stdio.fh 47c 48#define LuOut 6 49c 50c this removes the dependency on errquit.fh 51c 52#define errquit errbomb 53#define GA_ERR 100 54#define MA_ERR 200 55c 56 subroutine errbomb(string, icode, errcode) 57 implicit none 58 character*(*) string 59 integer icode 60 integer errcode 61 call ga_error(string, icode) 62 end 63#endif 64 65#ifdef DEBUG_PRINT 66#define DP(x) print*,' x = ', x 67#else 68#define DP(x) 69#endif 70c 71 subroutine ncc_doubles_create_aa(x2info, ! output array 72 1 label, ! character name for this array 73 2 nocc, ! number of occupied orbitals 74 3 nvir, ! number of virtual orbitals 75 4 tszocc, ! tilesize of occupied orbitals 76 5 tszvir) ! tilesize of virtual orbitals 77c 78c $Id$ 79c 80 implicit none 81#include "mafdecls.fh" 82#include "global.fh" 83#ifndef VACUUM 84#include "errquit.fh" 85#include "stdio.fh" 86#endif 87c 88c interface variables 89c 90 integer x2info(*) ! array containing doubles descriptor: 91c 92c x2info(1) = GA handle ~ g_x2 93c x2info(2) = GA size ~ s_x2 94c x2info(3) = 95c 96c other handles, such as for check-pointing, should be 97c added to this array, so be careful to not hard-core 98c the length too many places 99c 100 character*(*) label ! label 101 integer nocc ! number of occupied orbitals 102 integer nvir ! number of virtual orbitals 103 integer tszocc ! tilesize of occupied orbitals 104 integer tszvir ! tilesize of virtual orbitals 105c 106c internal variables 107c 108 integer g_x2 ! GA handle 109 integer s_x2 ! GA size 110c 111 integer nftocc ! number of full occupied tiles (1D) 112 integer nftvir ! number of full virtual tiles (1D) 113c 114 integer ntocc ! total number of occupied tiles (1D) 115 integer ntvir ! total number of virtual tiles (1D) 116c 117 integer etszocc ! end tilesize of occupied orbitals 118 integer etszvir ! end tilesize of virtual orbitals 119c 120 integer ifetocc ! 1 if end tile exists, 0 otherwise 121 integer ifetvir ! 1 if end tile exists, 0 otherwise 122c 123c virtual 2D tile configuration info 124c 125 integer p1vv_num, p1vv_dim, p1vv_tot 126 integer p2vv_num, p2vv_dim, p2vv_tot 127 integer p3vv_num, p3vv_dim, p3vv_tot 128 integer p4vv_num, p4vv_dim, p4vv_tot 129c 130c occupied 2D tile configuration info 131c 132 integer p1oo_num, p1oo_dim, p1oo_tot 133 integer p2oo_num, p2oo_dim, p2oo_tot 134 integer p3oo_num, p3oo_dim, p3oo_tot 135 integer p4oo_num, p4oo_dim, p4oo_tot 136c 137c GA info 138c 139 integer dims(2) ! GA dimensions 140 integer chunk(2) ! GA chunking 141 integer ndim ! using 2D GA storage 142 parameter (ndim = 2) 143 integer gatype ! numerical type for GA (always double) 144 parameter (gatype = MT_DBL) 145 integer pgroup ! GA processor group handle 146c 147c irregular distribution variables 148c 149 integer dist_block(2) ! blocks per dimension 150 integer k_blksz_list,l_blksz_list ! MA handles for map helper 151 integer k_dist_map,l_dist_map ! MA handles for map 152 integer b, bsum 153c 154c function declarations 155c 156 integer ncc_anti, ncc_symm 157 external ncc_anti, ncc_symm 158c 159#ifdef DEBUG_PRINT 160 print*,'top of ncc_doubles_create_aa' 161 DP(tszocc) 162 DP(tszvir) 163#endif 164c 165c determine number of full tiles (dimension tszocc^2 * tszvir^2) 166c 167 nftocc = nocc / tszocc 168 nftvir = nvir / tszvir 169c 170c determine size of end tilesize 171c 172 etszocc = mod(nocc,tszocc) 173 etszvir = mod(nvir,tszvir) 174c 175c determine total tile number from full tiles + end tile (if exist) 176c 177 ntocc = nftocc 178 ntvir = nftvir 179 if (etszocc.gt.0) ntocc = ntocc+1 180 if (etszvir.gt.0) ntvir = ntvir+1 181 DP(nocc) 182 DP(nvir) 183 DP(tszocc) 184 DP(tszvir) 185 DP(etszocc) 186 DP(etszvir) 187 DP(nftocc) 188 DP(nftvir) 189 DP(ntocc) 190 DP(ntvir) 191c 192c configure tiling in four parts - virtuals 193c 194c B_last is the end tile index, if it exists, otherwise it is ntvir+1 195c 196c part 1 - A < B < B_last (all cases) 197c part 2 - A < B = B_last (if end tile exists) 198c part 3 - A = B < B_last (all cases) 199c part 4 - A = B = B_last (if end tile exists) 200c 201c now we tabulate the number of 2D VV tiles, 202c their individual 2D dimension, and their total 2D dimension 203c 204c part 1 - A < B < B_last (all cases) 205c 206 p1vv_num = ncc_anti(nftvir) 207 p1vv_dim = tszvir*tszvir 208 p1vv_tot = p1vv_num * p1vv_dim 209 DP(p1vv_num) 210 DP(p1vv_dim) 211 DP(p1vv_tot) 212c 213c part 2 - A < B = B_last (if end tile exists) 214c 215 if (etszocc.gt.0) then 216 p2vv_num = nftvir 217 p2vv_dim = tszvir*etszvir 218 else 219 p2vv_num = 0 220 p2vv_dim = 0 221 endif 222 p2vv_tot = p2vv_num * p2vv_dim 223 DP(p2vv_num) 224 DP(p2vv_dim) 225 DP(p2vv_tot) 226c 227c part 3 - A = B < B_last (all cases) 228c 229 p3vv_num = nftvir 230 p3vv_dim = ncc_anti(tszvir) 231 p3vv_tot = p3vv_num * p3vv_dim 232 DP(p3vv_num) 233 DP(p3vv_dim) 234 DP(p3vv_tot) 235c 236c part 4 - A = B = B_last (if end tile exists) 237c 238 if (etszocc.gt.0) then 239 p4vv_num = 1 240 p4vv_dim = ncc_anti(etszvir) 241 else 242 p4vv_num = 0 243 p4vv_dim = 0 244 endif 245 p4vv_tot = p4vv_num * p4vv_dim 246 DP(p4vv_num) 247 DP(p4vv_dim) 248 DP(p4vv_tot) 249c 250c sanity check, since tiled total 2D dimension should 251c equal the untiled 2D dimension 252c 253 if ( (p1vv_tot + p2vv_tot + p3vv_tot + p4vv_tot) 254 1 .ne. ( ncc_anti(nvir) ) ) then 255 if (ga_nodeid().eq.0) write(LuOut,100) ncc_anti(nvir), 256 1 (p1vv_tot + p2vv_tot + p3vv_tot + p4vv_tot) 257 100 format(1x,'virtual tiling wrong!!! ',/, 258 1 1x,'untiled VV dimension = ',i16,/, 259 2 1x,' tiled VV dimension = ',i16) 260 endif 261 DP(ncc_anti(nvir)) 262c 263c configure tiling in four parts - occupied 264c 265c J_last is the end tile index, if it exists, otherwise it is ntocc+1 266c 267c part 1 - I < J < J_last (all cases) 268c part 2 - I < J = J_last (if end tile exists) 269c part 3 - I = J < J_last (all cases) 270c part 4 - I = J = J_last (if end tile exists) 271c 272c now we tabulate the number of 2D VV tiles, 273c their individual 2D dimension, and their total 2D dimension 274c 275c part 1 - I < J < J_last (all cases) 276c 277 p1oo_num = ncc_anti(nftocc) 278 p1oo_dim = tszocc*tszocc 279 p1oo_tot = p1oo_num * p1oo_dim 280 DP(p1oo_num) 281 DP(p1oo_dim) 282 DP(p1oo_tot) 283c 284c part 2 - I < J = J_last (if end tile exists) 285c 286 if (etszocc.gt.0) then 287 p2oo_num = nftocc 288 p2oo_dim = tszocc*etszocc 289 else 290 p2oo_num = 0 291 p2oo_dim = 0 292 endif 293 p2oo_tot = p2oo_num * p2oo_dim 294 DP(p2oo_num) 295 DP(p2oo_dim) 296 DP(p2oo_tot) 297c 298c part 3 - I = J < J_last (all cases) 299c 300 p3oo_num = nftocc 301 p3oo_dim = ncc_anti(tszocc) 302 p3oo_tot = p3oo_num * p3oo_dim 303 DP(p3oo_num) 304 DP(p3oo_dim) 305 DP(p3oo_tot) 306c 307c part 4 - I = J = J_last (if end tile exists) 308c 309 if (etszocc.gt.0) then 310 p4oo_num = 1 311 p4oo_dim = ncc_anti(etszocc) 312 else 313 p4oo_num = 0 314 p4oo_dim = 0 315 endif 316 p4oo_tot = p4oo_num * p4oo_dim 317 DP(p4oo_num) 318 DP(p4oo_dim) 319 DP(p4oo_tot) 320c 321c sanity check, since tiled total 2D dimension should 322c equal the untiled 2D dimension 323c 324 if ( (p1oo_tot + p2oo_tot + p3oo_tot + p4oo_tot) 325 1 .ne. ( ncc_anti(nocc) ) ) then 326 if (ga_nodeid().eq.0) write(LuOut,200) ncc_anti(nocc), 327 1 (p1oo_tot + p2oo_tot + p3oo_tot + p4oo_tot) 328 200 format(1x,'occupied tiling wrong!!! ',/, 329 1 1x,'untiled OO dimension = ',i16,/, 330 2 1x,' tiled OO dimension = ',i16) 331 endif 332 DP(ncc_anti(nocc)) 333c 334c end of tiling configuration 335c 336c the GA will be 2D nocc*(nocc-1)/2 * nvir*(nvir-1)/2 337c 338 s_x2 = ncc_anti(nvir) * ncc_anti(nocc) 339 x2info(2) = s_x2 340 if (ga_nodeid().eq.0) write(LuOut,300) s_x2 341 300 format(1x,'creating GA of ',i16,' doubles') 342c 343c GA handle creation and labeling 344c 345 g_x2 = ga_create_handle() 346 call ga_set_array_name(g_x2, label) 347c 348c GA processor group configuration 349c to begin, we'll use the world group 350c 351 pgroup = ga_pgroup_get_world() 352 call ga_set_pgroup(g_x2, pgroup) 353c 354c GA dimensions 355c 356 dims(1) = ncc_anti(nvir) ! leading dimension is "fast" in Fortran GA API 357 dims(2) = ncc_anti(nocc) 358 call ga_set_data(g_x2, ndim, dims, gatype) 359c 360c GA distribution 361c all 4D tiles should be contiguous on a single host 362c and distributed evenly across all nodes 363c 364c dist_block(2) - number of blocks each dimension is divided into 365c 366 dist_block(1) = p1vv_num + p2vv_num + p3vv_num + p4vv_num ! VV first 367 dist_block(2) = p1oo_num + p2oo_num + p3oo_num + p4oo_num ! OO second 368 DP(dist_block(1)) 369 DP(dist_block(2)) 370c 371c dist_map - starting index for each block; the size s is a sum of all elements of nblock array 372c 373 if (.not.ma_push_get(mt_int,dist_block(1)+dist_block(2), 374 1 'blksz_list',l_blksz_list,k_blksz_list)) then 375 call errquit ('ncc_doubles_create_aa: ma_push_get blksz_list', 376 1 dist_block(1)+dist_block(2),MA_ERR) 377 endif 378c 379 if (.not.ma_push_get(mt_int,dist_block(1)+dist_block(2), 380 1 'dist_map',l_dist_map,k_dist_map)) then 381 call errquit ('ncc_doubles_create_aa: ma_push_get dist_map', 382 1 dist_block(1)+dist_block(2),MA_ERR) 383 endif 384c 385 do b = 1, p1vv_num 386 int_mb(k_blksz_list+b-1) = p1vv_dim 387 enddo 388 do b = 1,p2vv_num 389 int_mb(k_blksz_list+p1vv_num+b-1) = p2vv_dim 390 enddo 391 do b = 1,p3vv_num 392 int_mb(k_blksz_list+p1vv_num+p2vv_num+b-1) = p3vv_dim 393 enddo 394 do b = 1,p4vv_num 395 int_mb(k_blksz_list+p1vv_num+p2vv_num+p3vv_num+b-1) = p4vv_dim 396 enddo 397c 398 bsum = 1 399 int_mb(k_dist_map) = bsum 400 do b = 2,p1vv_num+p2vv_num+p3vv_num+p4vv_num 401 bsum = bsum + int_mb(k_blksz_list+b-2) 402 int_mb(k_dist_map+b-1) = bsum 403 enddo 404c 405#ifdef DEBUG_PRINT 406 print*,'========================================' 407 do b = 1,dist_block(1) 408 print*,'blksz_list',b,int_mb(k_blksz_list+b-1) 409 enddo 410c 411 do b = 1,dist_block(1) 412 print*,'dist_map',b,int_mb(k_dist_map+b-1) 413 enddo 414 print*,'========================================' 415#endif 416c 417 do b = 1, p1oo_num 418 int_mb(k_blksz_list+dist_block(1)+b-1) = p1oo_dim 419 enddo 420 do b = 1,p2oo_num 421 int_mb(k_blksz_list+dist_block(1)+p1oo_num+b-1) = p2oo_dim 422 enddo 423 do b = 1,p3oo_num 424 int_mb(k_blksz_list+dist_block(1)+ 425 1 p1oo_num+p2oo_num+b-1) = p3oo_dim 426 enddo 427 do b = 1,p4oo_num 428 int_mb(k_blksz_list+dist_block(1)+ 429 1 p1oo_num+p2oo_num+p3oo_num+b-1) = p4oo_dim 430 enddo 431c 432 bsum = 1 433 int_mb(k_dist_map+dist_block(1)) = bsum 434 do b = 2,p1oo_num+p2oo_num+p3oo_num+p4oo_num 435 bsum = bsum + int_mb(k_blksz_list+dist_block(1)+b-2) 436 int_mb(k_dist_map+dist_block(1)+b-1) = bsum 437 enddo 438c 439#ifdef DEBUG_PRINT 440 print*,'========================================' 441 do b = dist_block(1)+1,dist_block(1)+dist_block(2) 442 print*,'blksz_list',b,int_mb(k_blksz_list+b-1) 443 enddo 444c 445 do b = dist_block(1)+1,dist_block(1)+dist_block(2) 446 print*,'dist_map',b,int_mb(k_dist_map+b-1) 447 enddo 448 print*,'========================================' 449#endif 450c 451c send the irregular distribution to GA 452c 453! call ga_set_irreg_distr(g_x2, int_mb(k_dist_map), dist_block) 454c 455c don't need the dist_map anymore since it is inside of GA now 456c 457 if (.not.ma_pop_stack(l_dist_map)) then 458 call errquit('ncc_doubles_create_aa: ma_pop_stack dist_map ', 459 1 0,MA_ERR) 460 endif 461c 462 if (.not.ma_pop_stack(l_blksz_list)) then 463 call errquit('ncc_doubles_create_aa: ma_pop_stack blksz_list ', 464 1 0,MA_ERR) 465 endif 466c 467c Regular distribution with chunking for now 468c 469 chunk(1) = -1 470 chunk(2) = -1 471 call ga_set_chunk(g_x2, chunk) 472c 473c GA allocation 474c 475 if (.not. ga_allocate(g_x2) ) then 476 call errquit ('ncc_doubles_create_aa: ga_allocate',g_x2, 477 1 GA_ERR) 478 endif 479c 480 x2info(1) = g_x2 481c 482 call ga_zero(g_x2) 483c 484#ifdef DEBUG_PRINT 485 call ga_print_distribution(g_x2) 486#endif 487c 488#ifdef DEBUG_PRINT 489 print*,'end of ncc_doubles_create_aa' 490#endif 491c 492 return 493 end 494 495 496 497 498 subroutine ncc_doubles_destroy(x2info) 499c 500 implicit none 501c#include "mafdecls.fh" 502#include "global.fh" 503#ifndef VACUUM 504#include "errquit.fh" 505#include "stdio.fh" 506#endif 507c 508c interface variables 509c 510 integer x2info(*) ! array containing doubles descriptor: 511c 512c x2info(1) = GA handle ~ g_x2 513c x2info(2) = GA size ~ s_x2 514c 515c other handles, such as for check-pointing, should be 516c added to this array, so be careful to not hard-core 517c the length too many places 518c 519c internal variables 520c 521 integer g_x2 ! GA handle 522c 523#ifdef DEBUG_PRINT 524 print*,'top of ncc_doubles_destroy' 525#endif 526c 527 g_x2 = x2info(1) 528c 529 if (.not. ga_destroy(g_x2) ) then 530 call errquit ('ncc_doubles_destroy: ga_destroy',g_x2,GA_ERR) 531 endif 532c 533#ifdef DEBUG_PRINT 534 print*,'end of ncc_doubles_destroy' 535#endif 536c 537 return 538 end