1C$Id$ 2C> 3C> \defgroup subgrp NWChem interface to GA sub-groups 4C> 5C> \ingroup subgrp 6C> @{ 7C> 8C> \file util_sgroup.F 9C> \brief The NWChem interface to the GA sub-groups 10C> 11C> This file contains routines that facilitate the use of GA sub-groups 12C> in NWChem [1-3]. The routines take care of issues such as, 13C> redirecting the standard output from different sub-groups, 14C> replicating the runtime database, accessing the movecs file, etc. In 15C> addition it provides mechanisms for creating pools of processors in 16C> a number of different ways. 17C> 18C> To date there is no input block to control this functionality. 19C> Instead set directives that put data directly into the runtime 20C> database are required. 21C> 22C> ### References ### 23C> 24C> [1] T. L. Windus, S. M. Kathmann, L. D. Crosby, 25C> "High performance computations using dynamical nucleation 26C> theory", <i>J. Phys.: Conf. Ser.</i> (2008) <b>125</b>, 27C> 012017, DOI: 28C> <a href="https://doi.org/10.1088/1742-6596/125/1/012017"> 29C> 10.1088/1742-6596/125/1/012017</a>. 30C> 31C> [2] B. J. Palmer, S. M. Kathmann, M. Krishnan, V. Tipparaju, 32C> J. Nieplocha, "The use of processor groups in molecular 33C> dynamics simulations to sample free-energy states", 34C> <i>J. Chem. Theory Comput.</i> (2007) <b>3</b>, 583-592, 35C> DOI: 36C> <a href="https://doi.org/10.1021/ct600260u"> 37C> 10.1021/ct600260u</a>. 38C> 39C> [3] J. Nieplocha, B. Palmer, V. Tipparaju, M. Krishnan, H. Trease, 40C> E. Apra, "Advances, applications and performance of the Global 41C> Arrays shared memory programming toolkit", 42C> <i>Int. J. High Perf. Comput. Appl.</i> (2006) <b>20</b>, 43C> 203-231, DOI: 44C> <a href="https://doi.org/10.1177/1094342006064503"> 45C> 10.1177/1094342006064503</a>. 46C> 47 block data groups 48#include "util_sgroup.fh" 49C Make sure that the group code works even if groups are not set up yet 50 data mygroup(0) /1/ 51 data my_ga_grp(0)/-1/ 52 data my_ga_0_grp(0)/-999/ ! Bogus 53 data ngroups(0) /1/ 54 data sgprint /.true./ 55 data depth /0/ 56 end 57 58C############################################################################### 59C> 60C> \brief Initialize the subgroup layer 61C> 62C> \return Returns .TRUE. if successful (does not return otherwise) 63C> 64 logical function util_sgstart(rtdb) 65 implicit none 66#include "mafdecls.fh" 67#include "rtdb.fh" 68 integer rtdb !< [Input] The RTDB handle 69 integer groups_want 70 integer dum(1) 71 integer method 72C added option to allow writting of group rtdb's 73C to permanent_dir(.false.) or scratch_dir(.true.) 74 integer dir 75c 76 dir = 1 ! permanent_dir by default 77c 78c Read number of subgroups from input 79c 80 if (.not.rtdb_get(rtdb, 'subgroups_number', mt_int, 1, 81 & groups_want))groups_want=1 82 83 method = 1 84 call util_sggo(rtdb,groups_want,method,dum,dir) 85 86 util_sgstart=.true. 87 return 88 end 89C############################################################################### 90C> 91C> \brief Do the actual initialization 92C> 93 subroutine util_sggo(rtdb,groups_want,method,array_cpu,dir) 94 implicit none 95#include "stdio.fh" 96#include "global.fh" 97#include "rtdb.fh" 98#include "util_sgroup.fh" 99#include "mafdecls.fh" 100#include "inp.fh" 101#include "util.fh" 102#include "errquit.fh" 103 104c 105c Input 106c 107 integer rtdb !< [Input] The RTDB handle 108 integer groups_want !< [Input] The number of subgroups requested 109 integer method !< [Input] The method to partition the 110 !< processor pool 111 !< * 1 -- use `groups_want` to generate equal 112 !< sized groups (`array_cpu` ignored) 113 !< * 2 -- turn each SMP box into a group 114 !< (`array_cpu` and `groups_want` ignored) 115 !< * 3 -- use `array_cpu(groups_want)` to 116 !< define number of nodes per group 117 !< * 4 -- use `array_cpu(groups_want+nnodes)` 118 !< to define which nodes per group 119 integer array_cpu(*) !< [Input] Specification of the subgroups 120c 121c rtdb is the runtime database file 122c 123c method is: 124c 1 -- use groups_want to generate equal sized groups (array_cpu ignored) 125c 2 -- turn each SMP box into a group (array_cpu and groups_want ignored) 126c 3 -- use array_cpu(groups_want) to define number of nodes per group 127c 4 -- use array_cpu(groups_want+nnodes) to define which nodes per group 128c 129 integer i 130 integer meinworld, meafter 131 integer X0(1) 132 integer idum 133 134C strings for fname definition 135 character*256 permdir 136 character*256 fprefix 137 integer myproc,nproc 138 integer mypgroup_old 139C 140C Added option to allow group rtdb's to be written 141C to permanent_dir (0) or scratch_dir(1) 142 integer dir 143 logical dir2 144#ifndef GANXTVAL 145C Turn off GA load balancing in groups 146 logical util_statldb 147 external util_statldb 148 logical dumlog 149#endif 150 151C convert to logical 152 if (dir .gt. 0) then 153 dir2 = .true. 154 else 155 dir2 = .false. 156 endif 157#ifndef GANXTVAL 158C Turn off dynamic load balancing, since it fails in groups 159C We never turn this back on 160 if (groups_want .ne. 1) dumlog = util_statldb(.true.,rtdb) 161#endif 162c 163c dont use disk for gridpts, etc. in DFT 164c We never turn this back on 165c 166 if(.not.rtdb_put(rtdb,'dft:largenode',mt_log,1,.true.)) 167 & call errquit('util_sggo: rtdb_put nodisk failed',1,RTDB_ERR) 168 if (.not. rtdb_put(rtdb, 'dft:noio', mt_int, 1, 1)) 169 & call errquit('util_sggo: rtdb_put noio failed',1,RTDB_ERR) 170 if(.not.rtdb_put(rtdb,'dft:xcreplicated',mt_log,1,.false.)) 171 & call errquit('util_sggo: rtdb_put xcreplicated fail',1,RTDB_ERR) 172 173c 174 175 mypgroup_old = ga_pgroup_get_default() 176 if (mypgroup_old .eq. ga_pgroup_get_world()) then 177 depth = 0 178 mygroup(depth) = 1 179 ngroups(depth) = 1 180 my_ga_grp(depth) = ga_pgroup_get_world() 181 x0(1) = 0 182 if (my_ga_0_grp(0) .eq. -999) then 183 my_ga_0_grp(depth) = ga_pgroup_create(x0,1) 184 end if 185 186c preliminary rtdb cloning 187C Work around the 36 char cutoff for fname 188 if (.not. rtdb_getfname(rtdb, rtdb_fname(depth))) call 189 * errquit('rtdb_fname call failed',0,0) 190 write(permdir,'(256(a))') (' ', i=1,256) 191 write(fprefix,'(256(a))') (' ', i=1,256) 192 if(.not. rtdb_cget(rtdb,'permanent_dir',1,permdir)) then 193 permdir = '.' 194 endif 195 if(.not. rtdb_cget(rtdb,'file_prefix',1,fprefix)) 196 & call errquit('rtdb get file_prefix failed',0,0) 197 idum = 1 198 747 if(permdir(idum:idum).eq. ' ' .and. idum.lt.inp_strlen(permdir)) 199 & then 200 idum = idum + 1 201 goto 747 202 endif 203 write(rtdb_fname(depth),'(a,a,a,a)') 204 & permdir(idum:inp_strlen(permdir)), 205 & '/', 206 & fprefix(1:inp_strlen(fprefix)), 207 & '.db' 208 end if 209 if (.not. rtdb_close(rtdb, 'keep')) call errquit( 210 * ' failed to close rtdb for sg ',0,0) 211 212 depth = depth + 1 213 if (depth .gt. maxdeep) 214 * call errquit('depth is greater than maxdeep',0,0) 215 216 myproc=ga_nodeid() 217 nproc=ga_nnodes() 218c sgprint = util_print('sgroup info', print_debug) 219c 220c Create groups 221c 222 if(myproc.eq.0) write(LuOut,*) 'Creating groups' 223 224c initialize sub groups using a method 225 226 if (method .eq. 1) then 227 call util_sginit(groups_want) 228 else if (method .eq. 2) then 229 call util_sginit_smp() 230 else if (method .eq. 3) then 231 call util_sginit_irreg(array_cpu,groups_want) 232 else if (method .eq. 4) then 233 call util_sginit_power(array_cpu,groups_want+nproc) 234 else 235 call errquit('util_sggo: bad method',0,0) 236 endif 237 238 if (sgprint) then 239 meafter=ga_nodeid() 240 meinworld = ga_pgroup_nodeid(ga_pgroup_get_world()) 241 write(LuOut, 242 * "(' proc ',I4,' was (', 243 * I4,' ',I4,' ',I4, 244 * ') now is (', 245 * I4,' ',I4,' ',I4, 246 * ')')") 247 * meinworld, 248 * myproc,mygroup(depth-1),my_ga_grp(depth-1), 249 * meafter,mygroup(depth),my_ga_grp(depth) 250 call util_flush(LuOut) 251 endif 252 253 call ga_pgroup_sync(mypgroup_old) ! Make sure all is well 254 255 call util_sgrtdb(rtdb,rtdb_fname(depth-1),rtdb_fname(depth),dir2) 256 if (.not. rtdb_open(rtdb_fname(depth), 'old', rtdb)) call errquit 257 * (' rtdb_open old failed ',0,0) 258 259 if(myproc.eq.0.and.sgprint) then 260 write(LuOut,*) ga_nodeid(),' of group',mygroup(depth),'rtdbname ' 261 * ,rtdb_fname(depth), ' rtdb=',rtdb 262 endif 263c 264c Create movecs for each subgroup 265c 266 call util_sgmovecs(rtdb) 267c 268c 269 if(myproc.eq.0.and.sgprint) then 270 write(LuOut,*) 'everyone should have an open file at this point' 271 call util_flush(LuOut) 272 endif 273c These next lines are since we are now done creating groups 274 call util_flush(LuErr) 275 call util_flush(LuOut) 276 call ga_pgroup_sync(mypgroup_old) ! Make sure all is well 277 return 278 end 279C############################################################################### 280C> 281C> \brief Finalize the subgroup layer 282C> 283 subroutine util_sgend(rtdb) 284 implicit none 285#include "stdio.fh" 286#include "global.fh" 287#include "rtdb.fh" 288#include "util_sgroup.fh" 289 integer rtdb !< [Input] The RTDB handle 290 integer myproc 291 logical ignore 292 integer mypgroup_old 293 294 mypgroup_old = ga_pgroup_get_default() 295 myproc=ga_nodeid() 296 if (mypgroup_old .eq. ga_pgroup_get_world()) then 297 write(LuErr,*) ' Node ',myproc,' tried to close world group ' 298 return 299 endif 300 myproc=ga_nodeid() 301 if(myproc.eq.0.and.sgprint) then 302 write(LuErr,*) ' deleting cloned rtdb ' 303 endif 304C Make sure all nodes are done using RTDB file 305 call ga_pgroup_sync(mypgroup_old) 306 if (.not. rtdb_close(rtdb, 'delete'))call errquit 307 $ (' sgend: rtdb_close and delete failed ',0,0) 308 if(myproc.eq.0) then 309 write(LuOut,*) 'Closing subgroup ' 310 endif 311 if (sgprint) call util_flush(LuOut) 312 313 call ga_pgroup_set_default(my_ga_grp(depth-1)) 314 if (myproc .eq. 0) then 315 ignore = ga_pgroup_destroy(my_ga_0_grp(depth)) 316 endif 317 ignore = ga_pgroup_destroy(mypgroup_old) 318 my_ga_0_grp(depth) = -999 319 my_ga_grp(depth) = -999 320c 321c reopen higher up rtdb 322c 323 depth = depth - 1 324 if (.not. rtdb_open(rtdb_fname(depth), 'old', rtdb)) 325 $ call errquit('sgend: rtdb_open old failed ', 0, 0) 326 327 return 328 end 329 330C############################################################################### 331C> 332C> \brief Copy the RTDB for each subgroup 333C> 334C> Copy the RTDB for each subgroup. This way the subgroups can 335C> independently modify the RTDB without messing each other's work up. 336C> Depending on the kind of calculation one may want to store the 337C> RTDB replicas in the scratch directory or the permanent directory. 338C> This behavior can be controlled by the `dir` argument. 339C> 340 subroutine util_sgrtdb(rtdb,name_in,name_out,dir) 341 implicit none 342 integer rtdb !< [Input] The RTDB handle 343 character*256 name_in !< [Input] The filename of the global RTDB 344 character*256 name_out !< [Output] The filename of the subgroup 345 !< RTDB 346#include "stdio.fh" 347#include "global.fh" 348#include "rtdb.fh" 349#include "inp.fh" 350#include "util_sgroup.fh" 351 integer ii, iend, ibegin 352 logical ortdbstate,ortdbstate2,status 353 character*256 grouprtdb 354 character*256 groupname 355 integer myproc 356 357C Added option to allow group rtdb's to be written to 358C Permanent_dir or scratch_dir 359 logical dir !< [Input] Flag determining the location of the 360 !< replica RTDBs 361 !< * .FALSE. changes to `permanent_dir` 362 !< * .TRUE. changes to `scratch_dir` 363 364 myproc=ga_nodeid() 365 366 if(myproc.eq.0.and.sgprint) then 367 write(LuOut,*) 'util_sgrtdb start' 368 call util_flush(LuOut) 369 endif 370c 371c do a copy of rtdb to permanent_dir or scratch_dir for each group 372c 373 ibegin = 1 374 iend = ibegin + (6-1) 375 write(groupname(ibegin:iend),'(6A)') 'sg_db.' 376 do ii=1,depth 377 ibegin = iend + 1 378 iend = ibegin + (4-1) 379C 9999 is largest 4 digit number, and 256 is just plain long (used above) 380C Of course a long group name will most likely fail long before 381C it gets to that long - other limitations on file name length exist 382 if (iend .gt. 256 .or. mygroup(ii) .gt. 9999) 383 * call errquit(' util_sgrtdb: groups too deep',0,0) 384 write(groupname(ibegin:iend),'(i4.4)') mygroup(ii) 385 end do 386 387 call util_file_name(groupname(1:iend),dir,.false.,grouprtdb) 388C 389C Do not want to have other nodes to be using old RTDB file anymore 390 call ga_pgroup_sync(ga_pgroup_get_default()) 391C 392C Node 0 of the group now copies the original RTDB to a group specific one 393C 394 if (myproc .eq. 0) then 395 if(sgprint) then 396 write(LuOut,*)'world rtdbname is ', 397 & name_in(1:inp_strlen(name_in)) 398 write(LuOut,*)'group rtdbname is ', 399 & grouprtdb(1:inp_strlen(grouprtdb)) 400 call util_flush(LuOut) 401 endif 402 inquire(file=name_in(1:inp_strlen(name_in)),exist=status) 403 if(.not.status) then 404c most likely cause of error: permdir not available on this node 405 write(LuErr,*) ' !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' 406 write(LuErr,*) ' ! please change permanent_dir to a !' 407 write(LuErr,*) ' ! filesystem available to all the !' 408 write(LuErr,*) ' ! processors (e.g. NFS, GPFS, Lustre)!' 409 write(LuErr,*) ' !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' 410 call errquit(' util_sgrtdb: failed to open rtdb',0,0) 411 endif 412 call util_file_copy(name_in(1:inp_strlen(name_in)), 413 & grouprtdb(1:inp_strlen(grouprtdb))) 414 inquire(file=grouprtdb(1:inp_strlen(grouprtdb)), 415 & exist=status) 416 if (.not. status) then 417 write (LuErr,*) 'copy failed' 418 call errquit('util_sgrtdb: problem with system call', 0, 0) 419 else if (sgprint) then 420 write(LuOut,*) ' rtdb copied ', ga_pgroup_get_default() 421 call util_flush(LuOut) 422 endif 423 endif 424 name_out=grouprtdb 425C Do not want to have other nodes using new RTDB before it exists 426 call ga_pgroup_sync(ga_pgroup_get_default()) 427 return 428 end 429C############################################################################### 430C> 431C> \brief Initialize the subgroup layer using the SMP method 432C> 433C> The SMP method targets machines that are clusters of shared memory 434C> nodes. It creates subgroups where each subgroup coincides with 435C> a shared memory node. 436C> 437 subroutine util_sginit_smp() 438 implicit none 439c 440c creates one subgroup for each smp 441c 442#include "global.fh" 443#include "util_sgroup.fh" 444 integer i,j,proclist(maxcpus),num_proc 445 446c setup 447 if (depth .ne. 1) then 448 call errquit(' util_sginit_smp: bad depth',0,0) 449 endif 450 ngroups(depth) = ga_cluster_nnodes() 451 mygroup(depth) = 1 + ga_cluster_nodeid() 452 my_ga_grp(depth) = -99 453 454c make each smp node into a group 455 456 do i=0,ngroups(depth)-1 457 num_proc = ga_cluster_nprocs(i) 458 if (num_proc .gt. maxcpus) call errquit('increase maxcpus',0,0) 459 do j=0,num_proc - 1 460 proclist(j+1)=ga_cluster_procid(i,j) 461 enddo 462 if (i+1 .eq. mygroup(depth)) then 463 my_ga_grp(depth) = ga_pgroup_create(proclist,num_proc) 464 endif 465 enddo 466 467c create the zero group 468 469 num_proc = ngroups(depth) 470 if(num_proc .gt. maxcpus)call errquit('increase maxcpus',0,0) 471 do i=0, num_proc - 1 472 proclist(i+1)=ga_cluster_procid(i,0) 473 enddo 474 my_ga_0_grp(depth) = ga_pgroup_create(proclist,num_proc) 475 476C Set the group 477 478 call ga_pgroup_set_default(my_ga_grp(depth)) 479 480 return 481 end 482C############################################################################### 483C> 484C> \brief Initialize the subgroup layer with irregularly sized groups 485C> 486C> This method generates irregularly sized subgroups in a simple way. 487C> The array `cpusperg_array` (CPUs per Group) specifies the size of 488C> every subgroup. This routine generates subgroups to match that 489C> specification. 490C> 491 subroutine util_sginit_irreg(cpusperg_array,length) 492 implicit none 493c 494c creates custom subgroups using cpusperg_array() 495c 496#include "global.fh" 497#include "util_sgroup.fh" 498 integer nproc, myproc 499 integer i,j,proclist(maxcpus), proc_counter,group_counter 500 integer length !< [Input] The number of subgroups 501 integer cpusperg_array(length) !< [Input] The size of each 502 !< subgroup 503 504c setup 505 506 nproc = ga_nnodes() 507 myproc = ga_nodeid() 508 my_ga_grp(depth) = -99 509 mygroup(depth)=-99 510 ngroups(depth)=length 511 512C verify input all at once instead of constantly in loops 513 514 proc_counter = 0 515 do i=1,length 516 if(cpusperg_array(i) .le. 0) then 517 call errquit(' util_sginit_irreg: zero cpusperg_array',0,0) 518 endif 519 if(cpusperg_array(i) .gt. maxcpus) then 520 call errquit(' util_sginit_irreg:increase maxcpus',0,0) 521 endif 522 proc_counter = proc_counter + cpusperg_array(i) 523 enddo 524 if (proc_counter .ne. nproc) then 525 call errquit(' util_sginit_irreg:bad cpusperg_array',0,0) 526 end if 527 528C make groups 529 proc_counter=1 530 group_counter=1 531 do i=0,nproc-1 532 proclist(proc_counter) = i 533 if (i.eq.myproc) then 534 mygroup(depth)=group_counter 535 endif 536 if (cpusperg_array(group_counter).eq.proc_counter) then 537 if(mygroup(depth) .eq. group_counter) then 538 my_ga_grp(depth)=ga_pgroup_create(proclist,proc_counter) 539 endif 540 group_counter=group_counter+1 541 proc_counter=0 542 end if 543 proc_counter=proc_counter+1 544 enddo 545 546c create the zero group 547 548 if(ngroups(depth).gt.maxcpus)call errquit('increase maxcpus',0,0) 549 proc_counter=1 550 group_counter=1 551 do i=0,nproc-1 552 if (proc_counter .eq. 1) then 553 proclist(group_counter) = i 554 end if 555 if (cpusperg_array(group_counter).eq.proc_counter) then 556 group_counter=group_counter+1 557 proc_counter=0 558 end if 559 proc_counter=proc_counter+1 560 enddo 561 my_ga_0_grp(depth) = ga_pgroup_create(proclist,ngroups(depth)) 562 563C Set the group 564 565 call ga_pgroup_set_default(my_ga_grp(depth)) 566 567 return 568 end 569C############################################################################### 570C> 571C> \brief Initialize the subgroup layer using the "power" method 572C> 573C> The power method creates subgroups based on the specification in 574C> `cpu_array`. The data in this array specifies the size of each 575C> subgroup as well as which processes are part of each subgroup. 576C> The data is stored in a vector of the following form 577C> \f{eqnarray*}{ 578C> \left(G_1,n_1,n_2,n_3,\ldots,n_{G_1},G_2,n_1,n_2,n_3,\ldots, 579C> n_{G_2},\ldots\right) 580C> \f} 581C> where \f$G_1\f$, \f$G_2\f$, etc. are the sizes of the respective 582C> subgroups, and \f$n_1, \ldots, n_G\f$ are the processor ranks that 583C> are part of that subgroup. 584C> 585 subroutine util_sginit_power(cpu_array,length) 586 implicit none 587c 588c creates custom highly custom subgroups using cpu_array() 589c is is of the form G1,n1,n2,n3,...,nn,G2,n1,n2,n3,...,nn,... 590c Where the G's are the size of the groups, and the n's 591c are the specific nodes in the groups 592c length = nnodes + ngroups 593c 594#include "global.fh" 595#include "util_sgroup.fh" 596 integer nproc, myproc 597 integer i,j,proclist(maxcpus), proc_counter,group_counter 598 integer pos_counter, group_size 599 integer length, cpu_array(length) 600 601c setup 602 603 nproc = ga_nnodes() 604 myproc = ga_nodeid() 605 my_ga_grp(depth) = -99 606 mygroup(depth)=-99 607 ngroups(depth)=-99 608 609C make groups 610 group_counter=0 611 pos_counter=0 612 do while (pos_counter .lt. length) 613 group_counter = group_counter + 1 614 pos_counter = pos_counter + 1 615 group_size = cpu_array(pos_counter) 616 proc_counter = 0 617 do i=1,group_size 618 pos_counter = pos_counter + 1 619 proc_counter=proc_counter+1 620 if (proc_counter .gt. maxcpus) then 621 call errquit('increase maxcpus',0,0) 622 end if 623 proclist(proc_counter) = cpu_array(pos_counter) 624 if (proclist(proc_counter) .eq. myproc) then 625 mygroup(depth)=group_counter 626 endif 627 end do 628 if(mygroup(depth) .eq. group_counter) then 629 my_ga_grp(depth)=ga_pgroup_create(proclist,proc_counter) 630 endif 631 end do 632 ngroups(depth)=group_counter 633 634c create the zero group 635 636 if(ngroups(depth).gt.maxcpus)call errquit('increase maxcpus',0,0) 637 group_counter=0 638 pos_counter=1 639 do while (pos_counter .lt. length) 640 group_counter = group_counter + 1 641 group_size = cpu_array(pos_counter) 642 pos_counter = pos_counter + 1 643 proclist(group_counter) = cpu_array(pos_counter) 644 pos_counter = pos_counter + group_size 645 end do 646 my_ga_0_grp(depth) = ga_pgroup_create(proclist,ngroups(depth)) 647 648C Set the group 649 650 call ga_pgroup_set_default(my_ga_grp(depth)) 651 652 return 653 end 654 655 656C############################################################################### 657C> 658C> \brief Initialize the subgroup layer with regular subgroup sizes 659C> 660 subroutine util_sginit(groups_want) 661 implicit none 662#include "global.fh" 663#include "util_sgroup.fh" 664c 665c Create subgroups of a constant size 666c 667 integer groups_want !< [Input] The number of subgroups wanted 668 integer nproc,myproc,nchunkq,nremainq,i,j,n 669 integer num_proc,proclist(maxcpus) 670 671C setup 672 673 ngroups(depth) = groups_want 674 nproc = ga_nnodes() 675 myproc = ga_nodeid() 676 mygroup(depth) = -99 677 my_ga_grp(depth) = -99 678 679 if (nproc .lt. ngroups(depth)) then 680 if(myproc .eq. 0) write(*,*) ' Will only create group of',nproc 681 ngroups(depth) = nproc 682 endif 683 nchunkq=nproc/ngroups(depth) 684 nremainq=mod(nproc,ngroups(depth)) 685 if ((nchunkq .gt. maxcpus) .or. 686 * (nchunkq+1 .gt. maxcpus .and. nremainq .ne. 0)) then 687 call errquit('increase maxcpus',0,0) 688 end if 689 690c for irregular distribution, make sure the lower groups have more procs 691c assign each proc to individual group, then create individual group 692 693 n = 0 694 do i=1,ngroups(depth) 695 num_proc = nchunkq 696 if(i .le. nremainq) num_proc = num_proc + 1 697 do j=1,num_proc 698 proclist(j)=n 699 if (n .eq. myproc) mygroup(depth) = i 700 n=n+1 701 enddo 702 if (mygroup(depth) .eq. i) then 703 my_ga_grp(depth) = ga_pgroup_create(proclist, num_proc) 704 endif 705 enddo 706 707c create the zero group 708 709 if(ngroups(depth).gt.maxcpus)call errquit('increase maxcpus',0,0) 710 n = 0 711 do i=1,ngroups(depth) 712 num_proc = nchunkq 713 if(i .le. nremainq) num_proc = num_proc + 1 714 proclist(i) = n 715 n = n + num_proc 716 enddo 717 my_ga_0_grp(depth) = ga_pgroup_create(proclist,ngroups(depth)) 718 719c set the group 720 721 call ga_pgroup_set_default(my_ga_grp(depth)) 722 723 return 724 end 725C############################################################################### 726C> 727C> \brief Create movecs file names for the current subgroup 728C> 729C> The movecs file holds the SCF vectors of a particular calculation 730C> and is therefore a crucial data object. With subgroups every group 731C> works on a calculation of its own and therefore needs a movecs file 732C> of its own. This routine creates and stores the name of the movecs 733C> file for the current subgroup. 734C> 735 subroutine util_sgmovecs(rtdb) 736 implicit none 737 integer rtdb !< [Input] The RTDB handle 738#include "stdio.fh" 739#include "global.fh" 740#include "rtdb.fh" 741#include "inp.fh" 742#include "mafdecls.fh" 743#include "util_sgroup.fh" 744 character*255 movecs_in,sg_movecs_out,tag,theory 745 logical movecs_out_l 746 integer myproc 747c 748c Tested only for the case when movecs are created 749C 750c Read movecs filename with path 751c 752 myproc=ga_nodeid() 753 754 if (.not.(rtdb_cget(rtdb,'scf:input vectors',1,movecs_in).or. 755 > rtdb_cget(rtdb,'pspw:input vectors',1,movecs_in).or. 756 $ rtdb_cget(rtdb,'dft:input vectors',1, movecs_in))) 757 $ movecs_in = 'atomic' 758 if (movecs_in.eq.'atomic') 759 $ call util_file_name('movecs',.true.,.false.,movecs_in) 760c 761c Create movecs.mygroup(depth) 762C This just keeps appending to the end 763c 764 call util_pname0(movecs_in,sg_movecs_out,mygroup(depth), 765 * ngroups(depth)) 766 767 if(myproc.eq.0.and.sgprint) then 768 write(LuOut,*) 'mygroup=',mygroup(depth), 769 $ ' movecs_in',movecs_in, 770 $ ' sg_movecs_out=',sg_movecs_out 771 endif 772c 773c Set output movecs 774c 775c if (.not.(rtdb_cget(rtdb,'task:theory',1,theory))) 776c * call errquit('rtdb_get task:theory failed',0,0) 777c if (theory.eq.'dft') then 778c tag='dft:output vectors' 779c else 780c tag='scf:output vectors' 781c endif 782c 783 tag='dft:output vectors' 784 if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then 785 write (LuOut,*) 'error in rtdb_cput' 786 endif 787 tag='scf:output vectors' 788 if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then 789 write (LuOut,*) 'error in rtdb_cput' 790 endif 791 792 tag='pspw:input vectors' 793 if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then 794 write (LuOut,*) 'error in rtdb_cput' 795 endif 796 tag='pspw:output vectors' 797 if (.not. rtdb_cput(rtdb,tag,1,sg_movecs_out)) then 798 write (LuOut,*) 'error in rtdb_cput' 799 endif 800 801c 802 return 803 end 804c Hokey functions to keep others away from my common blocks 805C############################################################################### 806C> 807C> \brief Obtain my subgroup number 808C> 809 integer function util_sgroup_mygroup() 810#include "util_sgroup.fh" 811 util_sgroup_mygroup = mygroup(depth) 812 return 813 end 814C############################################################################### 815C> 816C> \brief Obtain the total number of subgroups 817C> 818 integer function util_sgroup_numgroups() 819#include "util_sgroup.fh" 820 util_sgroup_numgroups = ngroups(depth) 821 return 822 end 823C############################################################################### 824C> 825C> \brief Obtain the subgroup of all rank 0 processes in each subgroup 826C> 827 integer function util_sgroup_zero_group() 828#include "util_sgroup.fh" 829 util_sgroup_zero_group = my_ga_0_grp(depth) 830 return 831 end 832C############################################################################### 833C> 834C> \brief Obtain the current subgroup nesting depth 835C> 836 integer function util_sgroup_depth() 837#include "util_sgroup.fh" 838 util_sgroup_depth = depth 839 return 840 end 841C############################################################################### 842C> 843C> \brief Set the file name for standard output for this subgroup 844C> 845 integer function util_sgroup_set_ioname(filename) 846 implicit none 847 character*(*) filename !< [Input] The base file name 848 849#include "stdio.fh" 850#include "util_sgroup.fh" 851 852 logical found 853 character*255 full_filename 854 character ch_tmp 855 856#ifdef USE_SUBGROUPS 857 LuOut = mygroup(depth)+200 858 859 call util_file_name_noprefix(filename,.false.,.false., 860 > full_filename) 861 862 inquire(file=full_filename,exist=found) 863* **** FILE already exists - parse to EOF **** 864 if (found) then 865 open(unit=LuOut,file=full_filename,form='formatted', 866 > status='old') 867 do while(.true.) 868 read(LuOut,*,ERR=30,END=30) ch_tmp 869 end do 870 30 continue 871#if defined(FUJITSU_SOLARIS) || defined(PSCALE) || defined(__crayx1) 872 backspace LuOut 873#endif 874* **** FILE does not exist **** 875 else 876 open(unit=LuOut,file=full_filename,form='formatted') 877 end if 878 util_sgroup_set_ioname = LuOut 879#else 880 util_sgroup_set_ioname = -1 881#endif 882 return 883 end 884C############################################################################### 885C> 886C> \brief Switch standard output back to the default Fortran scenario 887C> 888 integer function util_sgroup_unset_io() 889 implicit none 890#include "stdio.fh" 891#include "util_sgroup.fh" 892 893#ifdef USE_SUBGROUPS 894 close(LuOut) 895 LuOut = 6 896 util_sgroup_unset_io = LuOut 897#else 898 util_sgroup_unset_io = -1 899#endif 900 return 901 end 902C############################################################################### 903C> 904C> @} 905