1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8 module parallelsubs 9C 10C Parallelisation related subroutines 11C 12 use parallel, only : BlockSize, ProcessorY 13 use parallel, only : thisNode => Node 14 use spatial, only : lspatial, nNode, nL2G, nG2L, 15 & nOrbPerNode 16 use domain_decom, only : use_dd, use_dd_perm, dd_nuo, dd_perm, 17 & ulimit, llimit, dd_invp, dd_nnode 18 use sys, only : die 19 implicit none 20 21 logical, public, save :: pexsi_dist 22 integer, public, save :: pexsi_bs 23 24 contains 25 26 subroutine set_processorY(n) 27 integer, intent(in) :: n 28 ProcessorY = n 29 end subroutine set_processorY 30 31 subroutine GetNodeOrbs( NOrb, Node, Nodes, NOrbNode) 32 use parallel, only: thisNode => Node 33C 34C Calculates the number of orbitals stored on the local Node. 35C 36C Julian Gale, October 1998 37C 38C Input : 39C 40C integer NOrb = The total number of orbitals in the calculation 41C integer Node = The local processor 42C integer Nodes = The total number of processors 43C 44C Output : 45C 46C integer NOrbNode = The number of orbitals stored on this Node - if zero 47C on input then calculated otherwise left unchanged 48C 49C Modified so that value from last call is saved in case it can be 50C re-used on the next call to save re-calculation. 51C 52 53C Passed arguments 54 integer NOrb, NOrbNode, Node, Nodes 55 56C Local variables 57 integer Remainder, MinPerNode, RemainderBlocks, NOrbLast, 58 . NOrbNodeLast 59 60C Saved variables 61 save NOrbLast, NOrbNodeLast 62 63 data 64 . NOrbLast / 0 / 65 . NOrbNodeLast / 0 / 66 67! if (thisNode /= Node) 68! $ call message("Non-local use of GetNodeOrbs") 69 70 if (pexsi_dist) then 71 remainder = Norb - pexsi_bs*Nodes 72 if (Node == Nodes-1) then 73 NorbNode = pexsi_bs + remainder 74 else 75 NorbNode = pexsi_bs 76 endif 77 78 RETURN 79 endif 80 81 if (NOrb .eq. NOrbLast) then 82C Values are the same as last call - no need to recalculate 83 NOrbNode = NOrbNodeLast 84 85 else 86 87 if (lspatial) then 88C------------------------- 89C Spatial distribution - 90C------------------------- 91 NOrbNode = nOrbPerNode(Node+1) 92 93 else if (use_dd) then 94 if (use_dd_perm) then 95 ! NOrbNode = dd_nuo 96 ! We have this structure also 97 NOrbNode = nOrbPerNode(Node+1) 98 else 99 if (thisNode /= Node) 100 $ call die("Wrong use of dd GetNodeOrbs") 101 NOrbNode = dd_nuo 102 endif 103 else 104C----------------------------- 105C Blockcyclic distribution - 106C----------------------------- 107C Calculate the minimum number of orbitals per node 108 MinPerNode = NOrb / (Nodes*BlockSize) 109 110C Find the remainder of unassigned orbitals 111 Remainder = NOrb - MinPerNode * Nodes * BlockSize 112 113C Find number of complete blocks in the remainder 114 RemainderBlocks = Remainder / BlockSize 115 Remainder = Remainder - RemainderBlocks * BlockSize 116 117C Workout the local number of orbitals 118 NOrbNode = MinPerNode*BlockSize 119 if (Node.lt.RemainderBlocks) NOrbNode = NOrbNode + BlockSize 120 if (Node.eq.RemainderBlocks) NOrbNode = NOrbNode + Remainder 121 endif 122 123C Save value for next call 124 NOrbNodeLast = NOrbNode 125 126 endif 127 128 end subroutine GetNodeOrbs 129 130 subroutine GlobalToLocalOrb( GOrb, Node, Nodes, LOrb) 131 use parallel, only: thisNode => Node 132C 133C Converts an orbital index in the global frame to the local frame 134C if the orbital is local to this node. Otherwise the pointer is 135C return as zero and can therefore be used to test whether the 136C orbital is local or not. 137C 138C Julian Gale, Imperial College, November 1998 139C 140C Input : 141C 142C integer GOrb = global orbital index 143C integer Node = local processor number 144C integer Nodes = global number of processors 145C 146C From parallel.h : 147C 148C integer BlockSize = blocking size for orbital distribution across 149C the nodes. Choice of value affects the 150C performance of the Scalapack routines 151C 152C Output : 153C 154C integer LOrb = local orbital index 155C 156 157 integer GOrb, Node, Nodes, LOrb 158 159C Local variables 160 integer OrbCheck, GBlock, LEle, LBlock 161 162! if (thisNode /= Node) 163! $ call message("Non-local use of GlobalToLocalOrb") 164 165 if (pexsi_dist) then 166 call WhichNodeOrb(Gorb,Nodes,OrbCheck) 167 if (OrbCheck == Node) then 168 lorb = gorb - pexsi_bs*Node 169 else 170 lorb = 0 171 endif 172 173 else if (lspatial) then 174C------------------------- 175C Spatial distribution - 176C------------------------- 177 if (Node.eq.nNode(GOrb)) then 178 LOrb = nG2L(GOrb) 179 else 180 LOrb = 0 181 endif 182 else if (use_dd) then 183 if (use_dd_perm) then 184 ! LOrb = dd_perm(GOrb) 185 LOrb = nG2L(GOrb) 186 else 187 if (GOrb.ge.llimit .and. GOrb.lt.ulimit) then 188 LOrb = GOrb - llimit + 1 189 else 190 LOrb = 0 191 endif 192 endif 193 else 194C----------------------------- 195C Blockcyclic distribution - 196C----------------------------- 197C Find global block number 198 GBlock = ((GOrb -1)/BlockSize) 199 200C Substract global base line to find element number within the block 201 LEle = GOrb - GBlock*BlockSize 202 203C Find the block number on the local node 204 LBlock = ((GBlock - Node)/Nodes) 205 206C Generate the local orbital pointer based on the local block number 207 LOrb = LEle + LBlock*Blocksize 208 209C Check that this is consistent - if it is not then this 210C local orbital is not on this node and so we return 0 211C to indicate this. 212 OrbCheck = (LBlock*Nodes + Node)*BlockSize + LEle 213 if (OrbCheck.ne.GOrb) LOrb = 0 214 endif 215 216 end subroutine GlobalToLocalOrb 217 218 219 subroutine LocalToGlobalOrb( LOrb, Node, Nodes, GOrb) 220 use parallel, only: thisNode => Node 221C 222C Converts an orbital index in the local frame to the global frame 223C 224C Julian Gale, Imperial College, December 1998 225C 226C Input : 227C 228C integer LOrb = local orbital index 229C integer Node = local processor number 230C integer Nodes = global number of processors 231C 232C From parallel.h : 233C 234C integer BlockSize = blocking size for orbital distribution across 235C the nodes. Choice of value affects the 236C performance of the Scalapack routines 237C 238C Output : 239C 240C integer GOrb = global orbital index 241C 242 243 integer LOrb, Node, Nodes, GOrb 244 245C Local variables 246 integer LEle, LBlock 247 248! if (thisNode /= Node) 249! $ call message("Non-local use of LocalToGlobalOrb") 250 251 if (pexsi_dist) then 252 gorb = pexsi_bs*Node + lorb 253 254 else if (lspatial) then 255C------------------------- 256C Spatial distribution - 257C------------------------- 258 GOrb = nL2G(LOrb,Node+1) 259 260 else if (use_dd) then 261 if (use_dd_perm) then 262 ! GOrb = dd_invp(LOrb) 263 ! We have this structures also... 264 GOrb = nL2G(LOrb,Node+1) 265 else 266 if (thisNode /= Node) 267 $ call die("Wrong use of dd LocalToGlobalOrb") 268 269 GOrb = LOrb + llimit - 1 270 endif 271 else 272C----------------------------- 273C Blockcyclic distribution - 274C----------------------------- 275C Find local block number 276 LBlock = ((LOrb -1)/BlockSize) 277 278C Substract local base line to find element number within the block 279 LEle = LOrb - LBlock*BlockSize 280 281C Calculate global index 282 GOrb = (LBlock*Nodes + Node)*BlockSize + LEle 283 endif 284 return 285 end subroutine LocalToGlobalOrb 286 287 subroutine WhichNodeOrb( GOrb, Nodes, Node) 288C 289C Given the global orbital pointer, this routine 290C returns the Node number where this is stored. 291C 292C Julian Gale, Imperial College, January 1999 293C 294C Input : 295C 296C integer GOrb = global orbital index 297C integer Nodes = total number of Nodes 298C 299C From parallel.h : 300C 301C integer BlockSize = blocking size for orbital distribution across 302C the nodes. Choice of value affects the 303C performance of the Scalapack routines 304C 305C Output : 306C 307C integer Node = Node where this orbital is stored locally 308C 309 integer GOrb, Node, Nodes 310 311C Local variables 312 integer GBlock 313 314 if (pexsi_dist) then 315 Node = (Gorb-1)/pexsi_bs 316 ! Check for overflow: if gorb is in the "remainder" set, 317 ! the above will be incorrect (e.g. 4 (or even 5) in a {0,1,2,3} set) 318 ! (Consider the case norbs=13, pexsi_bs=2, nodes=5) 319 if (Node > Nodes-1) Node = Nodes-1 320 321 else if (lspatial) then 322C------------------------- 323C Spatial distribution - 324C------------------------- 325 Node = nNode(GOrb) 326 else if (use_dd) then 327 if (.not. use_dd_perm) then 328 call die("WhichNodeOrb not ready in DD") 329 endif 330 Node = dd_nnode(GOrb) 331 !! Node = nNode(GOrb) 332 else 333C----------------------------- 334C Blockcyclic distribution - 335C----------------------------- 336C Find global block number 337 GBlock = ((GOrb -1)/BlockSize) 338 339C Find the Node number that has this block 340 Node = mod(GBlock,Nodes) 341 endif 342 343 return 344 end subroutine WhichNodeOrb 345 346 347 subroutine WhichMeshNode( GMesh, Nxyz, Nodes, Node) 348C 349C Given the global mesh pointer, this routine 350C returns the Node number where this is stored. 351C 352C Julian Gale, Imperial College, March 1999 353C 354C Input : 355C 356C integer GMesh = global mesh index 357C integer Nxyz(3)= number of grid points along each axis 358C integer Nodes = total number of Nodes 359C 360C Output : 361C 362C integer Node = Node where this orbital is stored locally 363C 364 365C Passed variables 366 integer GMesh, Nxyz(3), Node, Nodes 367 368C Local variables 369 integer 370 . Gy, Gz, Mesh(3) 371 372C Trap zero mesh case 373 if (Nxyz(1)*Nxyz(2)*Nxyz(3).eq.0) then 374 Node = 0 375 return 376 endif 377 378C Find grid indices along each axis 379 Gz = ((GMesh-1)/(Nxyz(1)*Nxyz(2))) + 1 380 Gy = GMesh - (Gz-1)*Nxyz(1)*Nxyz(2) 381 Gy = ((Gy-1)/Nxyz(1))+1 382 Mesh(1) = 1 383 Mesh(2) = Gy 384 Mesh(3) = Gz 385 386C Call subroutine to find node number from global XYZ index 387 call WhichMeshXYZNode( Mesh, Nxyz, Nodes, Node) 388 389 end subroutine WhichMeshNode 390 391 subroutine HowManyMeshPerNode(Nxyz, Node, Nodes, NMeshPN, NxyzL) 392C 393C Given the total dimensions of the grid of points it works 394C out how many of the points are on the current node. 395C 396C Julian Gale, Imperial College, March 1999 397C 398C Input : 399C 400C integer Nxyz(3) = dimensions of grid of points 401C integer Node = local processor number 402C integer Nodes = global number of processors 403C 404C From parallel.h : 405C 406C integer ProcessorY = second dimension of processor grid - 407C must be a factor of the number of processors 408C being used. 409C 410C Output : 411C 412C integer NMeshPN = total number of local mesh points 413C integer NxyzL(3) = local dimensions of mesh points on this node 414C 415 416 implicit none 417C Passed variables 418 integer Nxyz(3), Node, Nodes, NMeshPN, NxyzL(3) 419 420C Local variables 421 integer 422 . BlockSizeY, BlockSizeZ, NRemY, NRemZ, Py, Pz, 423 . ProcessorZ 424 425C Check that ProcessorY is a factor of the number of processors 426 if (mod(Nodes,ProcessorY).gt.0) 427 $ call die('ERROR: ProcessorY must be a factor of the' // 428 $ ' number of processors!') 429 ProcessorZ = Nodes/ProcessorY 430 431C Find processor grid location 432 Py = (Node/ProcessorZ) + 1 433 Pz = Node - (Py - 1)*ProcessorZ + 1 434 435C Set blocking sizes 436 BlockSizeY = (Nxyz(2)/ProcessorY) 437 BlockSizeZ = (Nxyz(3)/ProcessorZ) 438 439 NRemY = Nxyz(2) - BlockSizeY*ProcessorY 440 NRemZ = Nxyz(3) - BlockSizeZ*ProcessorZ 441 if (Py-1.lt.NRemY) BlockSizeY = BlockSizeY + 1 442 if (Pz-1.lt.NRemZ) BlockSizeZ = BlockSizeZ + 1 443 444C Trap zero grid case 445 if (BlockSizeY.eq.0.or.BlockSizeZ.eq.0) then 446 NMeshPN = 0 447 NxyzL(1) = Nxyz(1) 448 NxyzL(2) = Nxyz(2) 449 NxyzL(3) = Nxyz(3) 450 return 451 endif 452 453C Assign blocksizes as local grid dimensions 454 NxyzL(1) = Nxyz(1) 455 NxyzL(2) = BlockSizeY 456 NxyzL(3) = BlockSizeZ 457 458C Calculate total number of grid points by multiplying dimensions 459 NMeshPN = NxyzL(1)*NxyzL(2)*NxyzL(3) 460 461 return 462 end subroutine HowManyMeshPerNode 463 464 465 subroutine GlobalToLocalMesh( GMesh, Nxyz, Node, Nodes, LMesh) 466C 467C Converts an orbital index in the global frame to the local frame 468C if the orbital is local to this node. Otherwise the pointer is 469C return as zero and can therefore be used to test whether the 470C orbital is local or not. 471C 472C Julian Gale, Imperial College, January 1999 473C 474C Input : 475C 476C integer GMesh = global mesh point index 477C integer Nxyz(3) = dimensions of grid of points 478C integer Node = local processor number 479C integer Nodes = global number of processors 480C 481C Output : 482C 483C integer LMesh = local mesh point index 484C 485 486C Passed variables 487 integer GMesh, Nxyz(3), Node, Nodes, LMesh 488 489C Local variables 490 integer 491 . Mesh(3), MeshL(3), NxyzL(3), Gx, Gy, Gz, NMeshPN 492 493C Trap zero mesh size case 494 if (Nxyz(1)*Nxyz(2)*Nxyz(3).eq.0) then 495 LMesh = GMesh 496 return 497 endif 498 499C Find grid indices along each axis 500 Gz = ((GMesh-1)/(Nxyz(1)*Nxyz(2))) + 1 501 Gx = GMesh - (Gz-1)*Nxyz(1)*Nxyz(2) 502 Gy = ((Gx-1)/Nxyz(1))+1 503 Gx = Gx - (Gy-1)*Nxyz(1) 504 Mesh(1) = Gx 505 Mesh(2) = Gy 506 Mesh(3) = Gz 507 508C Call subroutine to find local mesh pointer based on global XYZ grid 509 call GlobalToLocalXYZMesh( Mesh, Nxyz, Node, Nodes, MeshL) 510 511C If MeshL = 0 then this point is not on this node -> return 512 if (MeshL(1)*MeshL(2)*MeshL(3).eq.0) then 513 LMesh = 0 514 return 515 endif 516 517C Find dimensions of local mesh so that we can work out pointer 518 call HowManyMeshPerNode(Nxyz, Node, Nodes, NMeshPN, NxyzL) 519 520C Generate the local mesh pointer based on the local element and blocks 521 LMesh = (MeshL(3)-1)*(Nxyz(1)*NxyzL(2)) + 522 . (MeshL(2)-1)*Nxyz(1) + Gx 523 524 end subroutine GlobalToLocalMesh 525 526 527 subroutine WhichMeshXYZNode( Mesh, Nxyz, Nodes, Node) 528C 529C Given the global mesh pointer, this routine 530C returns the Node number where this is stored. 531C 532C Julian Gale, Imperial College, March 1999 533C 534C Input : 535C 536C integer Mesh(3) = global mesh point by grid reference 537C integer Nxyz(3) = number of grid points along each axis 538C integer Nodes = total number of Nodes 539C 540C From parallel.h : 541C 542C integer ProcessorY = second dimension of processor grid - 543C must be a factor of the number of processors 544C being used. 545C 546C Output : 547C 548C integer Node = Node where this orbital is stored locally 549C 550 551C Passed variables 552 integer Mesh(3), Nxyz(3), Node, Nodes 553 554C Local variables 555 integer 556 . ProcessorZ, PGy, PGz, BlockSizeY, BlockSizeZ, 557 . GSplitY, GSplitZ, NRemY, NRemZ 558 559C Trap zero mesh size case 560 if (Nxyz(1)*Nxyz(2)*Nxyz(3).eq.0) then 561 Node = 0 562 return 563 endif 564 565C Check that ProcessorY is a factor of the number of processors 566 if (mod(Nodes,ProcessorY).gt.0) 567 $ call die('ERROR: ProcessorY must be a factor of the' // 568 $ ' number of processors!') 569 570 ProcessorZ = Nodes/ProcessorY 571 572C Find blocksizes along axes 573 BlockSizeY = (Nxyz(2)/ProcessorY) 574 BlockSizeZ = (Nxyz(3)/ProcessorZ) 575 NRemY = Nxyz(2) - BlockSizeY*ProcessorY 576 NRemZ = Nxyz(3) - BlockSizeZ*ProcessorZ 577 578C Find grid point where the change from BlockSize+1 to BlockSize happens 579 GSplitY = NRemY*(BlockSizeY + 1) 580 GSplitZ = NRemZ*(BlockSizeZ + 1) 581 582C Find processor grid coordinates for this point 583 if (Mesh(2).le.GSplitY) then 584 PGy = (Mesh(2) - 1)/(BlockSizeY + 1) + 1 585 else 586 PGy = (Mesh(2) - 1 - GSplitY)/(BlockSizeY) + NRemY + 1 587 endif 588 if (Mesh(3).le.GSplitZ) then 589 PGz = (Mesh(3) - 1)/(BlockSizeZ + 1) + 1 590 else 591 PGz = (Mesh(3) - 1 - GSplitZ)/(BlockSizeZ) + NRemZ + 1 592 endif 593 594C Calculate processor number for this grid point 595 Node = (PGy - 1)*ProcessorZ + PGz - 1 596 597 return 598 end subroutine WhichMeshXYZNode 599 600 601 subroutine GlobalToLocalXYZMesh( Mesh, Nxyz, Node, Nodes, LMesh) 602C 603C Converts an orbital index in the global frame to the local frame 604C if the orbital is local to this node. Otherwise the pointer is 605C return as zero and can therefore be used to test whether the 606C orbital is local or not. 607C 608C Julian Gale, Imperial College, January 1999 609C 610C Input : 611C 612C integer Mesh(3) = global mesh point grid reference 613C integer Nxyz(3) = dimensions of grid of points 614C integer Node = local processor number 615C integer Nodes = global number of processors 616C 617C From parallel.h : 618C 619C integer ProcessorY = second dimension of processor grid - 620C must be a factor of the number of processors 621C being used. 622C 623C Output : 624C 625C integer LMesh(3) = local mesh point grid reference 626C 627 628C Passed variables 629 integer Mesh(3), Nxyz(3), Node, Nodes, LMesh(3) 630 631C Local variables 632 integer 633 . ProcessorZ, Py, Pz, PGy, PGz, BlockSizeY, BlockSizeZ, 634 . GSplitY, GSplitZ, NRemY, NRemZ 635 636C Trap zero mesh size case 637 if (Nxyz(1)*Nxyz(2)*Nxyz(3).eq.0) then 638 LMesh(1) = Mesh(1) 639 LMesh(2) = Mesh(2) 640 LMesh(3) = Mesh(3) 641 return 642 endif 643 644C Check that ProcessorY is a factor of the number of processors 645 if (mod(Nodes,ProcessorY).gt.0) 646 $ call die('ERROR: ProcessorY must be a factor of the' // 647 $ ' number of processors!') 648 ProcessorZ = Nodes/ProcessorY 649 650C Find processor grid location 651 Py = (Node/ProcessorZ) + 1 652 Pz = Node - (Py - 1)*ProcessorZ + 1 653 654C Find blocksizes along axes 655 BlockSizeY = (Nxyz(2)/ProcessorY) 656 BlockSizeZ = (Nxyz(3)/ProcessorZ) 657 NRemY = Nxyz(2) - BlockSizeY*ProcessorY 658 NRemZ = Nxyz(3) - BlockSizeZ*ProcessorZ 659 660C Find grid point where the change from BlockSize+1 to BlockSize happens 661 GSplitY = NRemY*(BlockSizeY + 1) 662 GSplitZ = NRemZ*(BlockSizeZ + 1) 663 664C Find processor grid coordinates for this point 665 if (Mesh(2).le.GSplitY) then 666 PGy = (Mesh(2) - 1)/(BlockSizeY + 1) + 1 667 else 668 PGy = (Mesh(2) - 1 - GSplitY)/(BlockSizeY) + NRemY + 1 669 endif 670 if (Mesh(3).le.GSplitZ) then 671 PGz = (Mesh(3) - 1)/(BlockSizeZ + 1) + 1 672 else 673 PGz = (Mesh(3) - 1 - GSplitZ)/(BlockSizeZ) + NRemZ + 1 674 endif 675 676C If this node isn't the local one then set pointer to zero and return 677 if (Py.ne.PGy.or.Pz.ne.PGz) then 678 LMesh(1) = 0 679 LMesh(2) = 0 680 LMesh(3) = 0 681 return 682 endif 683 684C Get local mesh pointer by subtracting baseline 685 LMesh(1) = Mesh(1) 686 if (Mesh(2).le.GSplitY) then 687 LMesh(2) = Mesh(2) - (PGy - 1)*(BlockSizeY + 1) 688 else 689 LMesh(2) = Mesh(2) - (PGy - 1)*BlockSizeY - NRemY 690 endif 691 if (Mesh(3).le.GSplitZ) then 692 LMesh(3) = Mesh(3) - (PGz - 1)*(BlockSizeZ + 1) 693 else 694 LMesh(3) = Mesh(3) - (PGz - 1)*BlockSizeZ - NRemZ 695 endif 696 697 end subroutine GlobalToLocalXYZMesh 698 699 subroutine set_processorYdefault(Nodes,procYdefault) 700C 701C Finds a sensible default value for the processor grid in the Y 702C direction to try to get an equal split in X and Y. Currently only 703C looks for factors of 2, 3 and 5. 704C 705C Input : 706C 707C integer Nodes : total number of processors 708C 709C Output : 710C 711C integer procYdefault : default value of Y grid parameter 712C 713C Written by Julian Gale, November 1999 714C 715C Passed arguments 716 integer 717 . Nodes, procYdefault 718C Local variables 719 integer 720 . Nx, Ny, Nrem 721 logical 722 . factor 723 724C Initialise values 725 Nx = 1 726 Ny = 1 727 Nrem = Nodes 728 factor = .true. 729 730C Loop looking for factors 731 do while (factor.and.Nrem.gt.1) 732 factor = .false. 733 if (mod(Nrem,2).eq.0) then 734 Nrem = Nrem/2 735 factor = .true. 736 if (Nx.gt.Ny) then 737 Ny = 2*Ny 738 else 739 Nx = 2*Nx 740 endif 741 endif 742 if (mod(Nrem,3).eq.0) then 743 Nrem = Nrem/3 744 factor = .true. 745 if (Nx.gt.Ny) then 746 Ny = 3*Ny 747 else 748 Nx = 3*Nx 749 endif 750 endif 751 if (mod(Nrem,5).eq.0) then 752 Nrem = Nrem/5 753 factor = .true. 754 if (Nx.gt.Ny) then 755 Ny = 5*Ny 756 else 757 Nx = 5*Nx 758 endif 759 endif 760 enddo 761 762C Choose default value as lowest of Nx and Ny 763 if (Nx.gt.Ny) then 764 procYdefault = Ny 765 else 766 procYdefault = Nx 767 endif 768 769 end subroutine set_processorYdefault 770 771#ifdef MPI 772 subroutine set_blocksizedefault(Nodes,nuotot,bs) 773C 774C Finds a sensible default value for the blocksize default. 775C When the number of orbitals is less than the blocksize 776C typically used then lower the blocksize to ensure that 777C some work is done on all nodes. 778C 779C Input : 780C 781C integer Nodes : total number of processors 782C integer nuotot : total number of orbitals 783C 784C Output : 785C 786C integer bs : default value of blocksize 787C 788C Written by Julian Gale, March 2001 789C Modified by Alberto Garcia, October 2013 790C 791C Passed arguments 792 integer, intent(in) :: Nodes, nuotot 793 integer, intent(out) :: bs 794 795 integer :: n1, nn 796 797 ! Scalapack routine for block-cyclic distributions 798 integer, external :: numroc 799 800C Compare number of orbitals against sensible number 801! Note that this number could be optimized... 802 803 if (nuotot.gt.24*Nodes) then 804 bs = 24 805 else if (nuotot.lt.Nodes) then 806 bs = 1 807 else 808 809 bs = nuotot/Nodes + 1 810 do 811 ! Check the number of orbitals handled 812 ! by the first and last processors 813 ! Avoid idle processors, and too-high 814 ! imbalances 815 816 n1 = numroc(nuotot,bs,0,0,nodes) 817 nn = numroc(nuotot,bs,nodes-1,0,nodes) 818 819 if (nn == 0) then 820 bs = bs - 1 821 else if (dble(n1)/nn > 2) then 822 bs = bs - 1 823 else 824 exit 825 endif 826 enddo 827 828 endif 829 830 end subroutine set_blocksizedefault 831#endif 832 end module parallelsubs 833