1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8! This code segment has been fully created by: 9! Nick Papior Andersen, 2014, nickpapior@gmail.com 10! Please conctact the author, prior to re-using this code. 11 12! This particular solution method relies on solving the GF 13! with the tri-diagonalization routine. 14! This will leverage memory usage and also the execution time. 15 16module m_ts_tri_init 17 18 use precision, only : dp, i8b 19 use m_region 20 21 use m_ts_electype 22 23 use m_ts_pivot 24 25 implicit none 26 27 ! arrays for containing the tri-diagonal matrix part sizes 28 type(tRgn), save :: c_Tri 29 30 public :: ts_tri_init 31 public :: ts_tri_analyze 32 public :: c_Tri 33 34 private 35 36contains 37 38 subroutine ts_tri_init( dit, sparse_pattern , N_Elec, Elecs, & 39 IsVolt, ucell, na_u, xa, lasto , nsc, isc_off, method ) 40 41 use alloc, only : re_alloc, de_alloc 42 use parallel, only : IONode 43 use fdf, only : fdf_get, leqi 44 use fdf_extra 45#ifdef MPI 46 use mpi_siesta, only : MPI_Comm_Self 47#endif 48 49 use class_OrbitalDistribution 50 use class_Sparsity 51 use create_Sparsity_Union 52 use create_Sparsity_SC 53 use m_sparsity_handling 54 55 use m_pivot 56 57 use m_ts_electype 58 use m_ts_sparse, only : ts_sp_calculation 59 60 use m_ts_method ! has r_pvt 61 62 use m_ts_tri_common 63 use m_ts_rgn2trimat 64 65#ifdef TRANSIESTA_DEBUG 66 use m_ts_debug 67#endif 68 69 type(OrbitalDistribution), intent(inout) :: dit 70 type(Sparsity), intent(inout) :: sparse_pattern ! the local sparse pattern 71 integer, intent(in) :: N_Elec 72 type(Elec), intent(inout) :: Elecs(N_Elec) 73 logical, intent(in) :: IsVolt 74 real(dp), intent(in) :: ucell(3,3) 75 integer, intent(in) :: na_u, lasto(0:na_u) 76 real(dp), intent(in) :: xa(3,na_u) 77 integer, intent(in) :: nsc(3), isc_off(3,product(nsc)) 78 79 ! The method used to partition the BTD format 80 integer, intent(in) :: method 81 82 type(OrbitalDistribution) :: fdit 83 type(Sparsity) :: tmpSp1, tmpSp2 84 85 integer :: idx, no 86 integer :: i, io, iEl, no_u_TS 87 integer(i8b) :: els 88 character(len=NAME_LEN) :: csort 89 90 ! Regions used for sorting the device region 91 type(tRgn) :: r_tmp 92 93 no_u_TS = nrows_g(sparse_pattern) - no_Buf 94 95 ! In order to ensure that the electrodes are in the 96 ! tri-diagonal sparsity pattern, we can easily create 97 ! the full sparsity pattern with the electrodes included 98 ! and then recreate the tri-diagonal sparsity pattern 99 ! This is probably the crudest way of doing it. 100#ifdef MPI 101 call newDistribution(nrows_g(sparse_pattern),MPI_Comm_Self,fdit, & 102 name='TranSiesta UC distribution') 103#else 104 call newDistribution(nrows_g(sparse_pattern),-1,fdit, & 105 name='TranSiesta UC distribution') 106#endif 107 108 ! We need to regenerate the sparsity pattern without the 109 ! electrode connections etc. this is because ts_sp_uc 110 ! has different size dependent on the electrodes bulk settings. 111 call ts_Sp_calculation(dit,sparse_pattern,N_Elec,Elecs, & 112 ucell, nsc, isc_off, tmpSp2) 113 114 call crtSparsity_SC(tmpSp2, tmpSp1, UC = .true. ) 115 116 ! point to the local (SIESTA-UC) sparsity pattern arrays 117 call Sp_to_Spglobal(dit,tmpSp1,tmpSp2) 118 119 ! This works as creating a new sparsity deletes the previous 120 ! and as it is referenced several times it will not be actually 121 ! deleted... 122 do iEl = 1 , N_Elec 123 124 idx = Elecs(iEl)%idx_o 125 no = TotUsedOrbs(Elecs(iEl)) 126 127 ! we first create the super-set sparsity 128 tmpSp1 = tmpSp2 129 call crtSparsity_Union(fdit,tmpSp1, & 130 idx,idx,no,no, tmpSp2) 131 132 ! Create the o_inD 133 call rgn_range(Elecs(iEl)%o_inD,idx,idx+no-1) 134 135 end do 136 call delete(tmpSp1) ! clean up 137 138#ifdef TRANSIESTA_DEBUG 139 if(IONode)write(*,*)'Created TS-tri + elecs (4000)' 140 call sp_to_file(4000,tmpSp2) 141#endif 142 143 iEl = 1 144 do i = 2, N_Elec 145 if ( rgn_size(Elecs(iEl)%o_inD) < rgn_size(Elecs(i)%o_inD) ) then 146 iEl = i 147 end if 148 end do 149 150 ! Get sorting method, we default to sort 151 ! the BTD matrix according to the connection 152 ! scheme of the first electrode. 153 csort = fdf_get('TS.BTD.Pivot','atom+'//trim(Elecs(iEl)%name)) 154 call ts_pivot( fdit, tmpSp2, & 155 N_Elec, Elecs, & 156 ucell, na_u, xa, lasto, & 157 r_pvt, csort) 158 159 ! In transiesta we can immediately calculate the 160 ! tri-diagonal matrix. 161 ! Then we can re-arrange the pivot indices in each block 162 ! to be as consecutive in each electrode as possible. 163 ! This will greatly speed up complex Gf.G.Gf matrix 164 ! products 165 call rgn_delete(c_Tri) 166 167 ! Get the current sorting method 168 if ( IONode ) & 169 write(*,'(/,2a)') 'transiesta: Determining an optimal & 170 &tri-matrix using: ',trim(csort) 171 172 ! Create a new tri-diagonal matrix, do it in parallel 173 call ts_rgn2TriMat(N_Elec, Elecs, IsVolt, & 174 fdit, tmpSp2, r_pvt, c_Tri%n, c_Tri%r, & 175 method, 0, par = .true. ) 176 call delete(tmpSp2) ! clean-up 177 call delete(fdit) 178 179 ! Sort the pivoting table for the electrodes 180 ! such that we reduce the Gf.Gamma.Gf 181 ! However, this also makes it easier to 182 ! insert the self-energy as they become consecutive 183 ! in index 184 call ts_pivot_tri_sort_El(nrows_g(sparse_pattern), r_pvt, N_Elec, Elecs, c_Tri) 185 186 if ( c_Tri%n < 2 ) then 187 call die('Erroneous transiesta BTD format. & 188 &Check with the developers') 189 end if 190 191 ! Recalculate number of orbitals in TS 192 no_u_TS = nrows_g(sparse_pattern) - no_Buf 193 194 if ( r_pvt%n /= no_u_TS ) then 195 call die('Error in size estimation, the sparse pattern & 196 &removal is erroneous') 197 end if 198 199 ! Now r_pvt contains the sorted device region according to 200 ! electrode (1). (if asked for) 201 ! From this we can generate a "better" tri-diagonal matrix than 202 ! from the non-sorted one (provided that the user have provided 203 ! the atoms in sub-optimal order). 204 205 do i = 1 , N_Elec 206 207 idx = Elecs(i)%idx_o 208 no = TotUsedOrbs(Elecs(i)) 209 210 ! Create the pivoting table for the electrodes 211 call rgn_init(r_tmp,no) 212 do io = 1 , no 213 ! equals the io'th orbital index in the 214 ! TS region. io == ts_i 215 r_tmp%r(io) = rgn_pivot(r_pvt,idx-1+io) 216 end do 217 218 ! Sort it to be able to gather the indices 219 ! in the correct order 220 call rgn_sort(r_tmp) 221 ! pivot the o_inD back 222 call rgn_init(Elecs(i)%o_inD,no) 223 call rgn_init(Elecs(i)%inDpvt,no) 224 do io = 1 , no 225 Elecs(i)%o_inD%r(io) = r_pvt%r(r_tmp%r(io)) 226 227 ! Create the pivot table for the electrode scattering matrix 228 ! (1) = an electrode orbital which is seen first in the device 229 ! Note that this array's meaning is different in TBtrans 230 Elecs(i)%inDpvt%r(io) = Elecs(i)%o_inD%r(io) - idx + 1 231 232 end do 233 234 end do 235 call rgn_delete(r_tmp) 236 237 238 ! Calculate size of the tri-diagonal matrix 239 els = nnzs_tri_i8b(c_Tri%n,c_Tri%r) 240 ! check if there are overflows 241 if ( els > huge(1) ) then 242 call die('transiesta: Memory consumption is too large') 243 end if 244 245 if ( IONode ) then 246 write(*,'(a)') 'transiesta: Established a near-optimal partition & 247 &for the tri-diagonal matrix.' 248 249 call rgn_print(c_Tri, name = 'BTD partitions' , & 250 seq_max = 10 , repeat = .true. ) 251 252 write(*,'(a,f9.5,'' %'')') & 253 'transiesta: Matrix elements in % of full matrix: ', & 254 real(els,dp)/real(no_u_TS**2,dp) * 100._dp 255 end if 256 257 end subroutine ts_tri_init 258 259 subroutine ts_tri_analyze( dit, sparse_pattern , N_Elec, Elecs, & 260 ucell, na_u, lasto , nsc, isc_off , method ) 261 262 use parallel, only : IONode 263 use fdf, only : fdf_get, leqi 264#ifdef MPI 265 use mpi_siesta, only : MPI_Comm_Self 266#endif 267 268 use class_OrbitalDistribution 269 use class_Sparsity 270 use create_Sparsity_Union 271 use create_Sparsity_SC 272 use m_sparsity_handling 273 274 use m_pivot 275 use m_pivot_methods, only : sp2graphviz 276 277 use m_ts_electype 278 use m_ts_sparse, only : ts_sp_calculation 279 280 use m_ts_method ! r_pvt 281 282 use m_ts_tri_common 283 use m_ts_rgn2trimat 284 285#ifdef TRANSIESTA_DEBUG 286 use m_ts_debug 287#endif 288 289 type(OrbitalDistribution), intent(inout) :: dit 290 type(Sparsity), intent(inout) :: sparse_pattern ! the local sparse pattern 291 integer, intent(in) :: N_Elec 292 type(Elec), intent(inout) :: Elecs(N_Elec) 293 real(dp), intent(in) :: ucell(3,3) 294 integer, intent(in) :: na_u, lasto(0:na_u) 295 integer, intent(in) :: nsc(3), isc_off(3,product(nsc)) 296 297 ! The method used to partition the BTD format 298 integer, intent(in) :: method 299 300 type(OrbitalDistribution) :: fdit 301 type(Sparsity) :: tmpSp1, tmpSp2 302 303 integer :: no_u_TS, i, iEl, no 304 305 integer :: n, n_nzs 306 integer, pointer :: ncol(:), l_ptr(:), l_col(:) 307 308 character(len=*), parameter :: fmt = '(/,''TS.BTD.Pivot '',a,''+'',a)' 309 character(len=64) :: fmethod 310 character(len=4) :: corb 311 312 ! Regions used for sorting the device region 313 type(tRgn) :: r_tmp, start, r_El, full, priority 314 integer :: orb_atom 315 316 ! Capture the min memory pivoting scheme 317 character(len=64) :: min_mem_method 318 real(dp) :: min_mem 319 320 call timer('TS-analyze', 1) 321 322 no_u_TS = nrows_g(sparse_pattern) - no_Buf 323 324 ! In order to ensure that the electrodes are in the 325 ! tri-diagonal sparsity pattern, we can easily create 326 ! the full sparsity pattern with the electrodes included 327 ! and then recreate the tri-diagonal sparsity pattern 328 ! This is probably the crudest way of doing it. 329#ifdef MPI 330 call newDistribution(nrows_g(sparse_pattern),MPI_Comm_Self,fdit, & 331 name='TranSiesta UC distribution') 332#else 333 call newDistribution(nrows_g(sparse_pattern),-1,fdit, & 334 name='TranSiesta UC distribution') 335#endif 336 337 ! We need to regenerate the sparsity pattern without the 338 ! electrode connections etc. this is because ts_sp_uc 339 ! has different size dependent on the electrodes bulk settings. 340 call ts_Sp_calculation(dit,sparse_pattern,N_Elec,Elecs, & 341 ucell, nsc, isc_off, tmpSp2) 342 343 call crtSparsity_SC(tmpSp2, tmpSp1, UC = .TRUE. ) 344 345 ! point to the local (SIESTA-UC) sparsity pattern arrays 346 call Sp_to_Spglobal(dit,tmpSp1,tmpSp2) 347 348 do iEl = 1 , N_Elec 349 350 i = Elecs(iEl)%idx_o 351 no = TotUsedOrbs(Elecs(iEl)) 352 353 ! we first create the super-set sparsity 354 tmpSp1 = tmpSp2 355 call crtSparsity_Union(fdit,tmpSp1, & 356 i,i,no,no, tmpSp2) 357 358 ! Create the o_inD 359 call rgn_range(Elecs(iEl)%o_inD,i,i+no-1) 360 361 end do 362 363 ! Write out all pivoting etc. analysis steps 364 if ( IONode ) write(*,'(/,a)') 'transiesta: BTD analysis' 365 366 ! Initialize the min_mem 367 min_mem = huge(1._dp) 368 min_mem_method = 'TOO LARGE' 369 370 ! Make a copy 371 tmpSp1 = tmpSp2 372 373 ! Attach the sparsity pattern of the orbitals 374 ! later (tmpSp2) may be atom 375 call attach(tmpSp1, n_col = ncol, list_ptr = l_ptr, & 376 list_col = l_col , nrows_g = no , nnzs = n_nzs ) 377 378 fmethod = 'orb+none' 379 if ( IONode ) write(*,fmt) 'orb','none' 380 call tri(r_pvt) 381 382 orb_atom_switch: do orb_atom = 1 , 2 383 ! The user can skip the orbital analysis if it takes too long 384 if ( orb_atom == 1 ) then 385 ! We default to only looking at the atomic sparsity 386 ! pattern. This is *much* faster and does provide 387 ! a very near optimal sparse pattern. 388 ! The user can select to do both. 389 if ( leqi(fdf_get('TS.BTD.Analyze','atom'),'atom') ) cycle 390 corb = 'orb' 391 392 call rgn_copy(r_pvt, full) 393 394 else 395 corb = 'atom' 396 397 ! Convert the sparsity pattern to the atom 398 call SpOrb_to_SpAtom(fdit,tmpSp1,na_u,lasto,tmpSp2) 399 ! *** the distribution will always 400 ! be bigger than for the atoms, hence we need 401 ! not re-construct it *** 402 403 ! Reduce the searching place of atoms 404 call rgn_copy(r_aC, full) 405 406 end if 407 408 ! Sort the device region 409 call rgn_sort(full) 410 411 n = nrows_g(tmpSp2) 412 413 ! Create priority list 414 call rgn_init(priority,no) 415 call crt_El_priority(N_Elec,Elecs,priority, & 416 na_u,lasto,is_orb = orb_atom == 1 ) 417 418 if ( fdf_get('TS.Analyze.Graphviz', .false.) ) then 419 ! Attach sparsity pattern to current designation ('atom' == atom sp) 420 call attach(tmpSp2, n_col = ncol, list_ptr = l_ptr, & 421 list_col = l_col , nnzs = n_nzs ) 422 call rgn_init(r_El,n) 423 r_El%r(:) = 0 424 do i = 1 , n 425 if ( orb_atom == 1 ) then 426 r_El%r(i) = orb_type(i) 427 else 428 r_El%r(i) = atom_type(i) 429 end if 430 end do 431 if ( IONode ) & 432 call sp2graphviz('GRAPHVIZ_'//trim(corb)//'.gv', & 433 n,n_nzs,ncol,l_ptr,l_col, types = r_El%r ) 434 end if 435 436 ! Attach the sparsity pattern of the orbitals 437 call attach(tmpSp1, n_col = ncol, list_ptr = l_ptr, & 438 list_col = l_col , nrows_g = no , nnzs = n_nzs ) 439 440 441 ! *** Start analyzing sparsity pattern 442 443 do iEl = 1, N_Elec 444 445 if ( orb_atom == 1 ) then 446 call rgn_copy(Elecs(iEl)%o_inD, start) 447 else 448 ! transfer to atom 449 call rgn_Orb2Atom(Elecs(iEl)%o_inD,na_u,lasto,start) 450 end if 451 452 fmethod = trim(corb)//'+'//trim(Elecs(iEl)%name) 453 if ( IONode ) write(*,fmt) trim(corb),trim(Elecs(iEl)%name) 454 call sp_pvt(n,tmpSp2,r_tmp, PVT_CONNECT, sub = full, start = start) 455 if ( orb_atom == 1 ) then 456 call tri(r_tmp) 457 else 458 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 459 call tri(r_El) 460 end if 461 462 fmethod = trim(corb)//'+rev-'//trim(Elecs(iEl)%name) 463 if ( IONode ) write(*,fmt) trim(corb),'rev-'//trim(Elecs(iEl)%name) 464 call rgn_reverse(r_tmp) 465 if ( orb_atom == 1 ) then 466 call tri(r_tmp) 467 else 468 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 469 call tri(r_El) 470 end if 471 472 fmethod = trim(corb)//'+CM+'//trim(Elecs(iEl)%name) 473 if ( IONode ) write(*,fmt) trim(corb),'CM+'//trim(Elecs(iEl)%name) 474 call sp_pvt(n,tmpSp2,r_tmp, PVT_CUTHILL_MCKEE, sub = full, start = start) 475 if ( orb_atom == 1 ) then 476 call tri(r_tmp) 477 else 478 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 479 call tri(r_El) 480 end if 481 482 fmethod = trim(corb)//'+rev-CM+'//trim(Elecs(iEl)%name) 483 if ( IONode ) write(*,fmt) trim(corb),'rev-CM+'//trim(Elecs(iEl)%name) 484 call rgn_reverse(r_tmp) 485 if ( orb_atom == 1 ) then 486 call tri(r_tmp) 487 else 488 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 489 call tri(r_El) 490 end if 491 492 fmethod = trim(corb)//'+CM+priority+'//trim(Elecs(iEl)%name) 493 if ( IONode ) write(*,fmt) trim(corb),'CM+priority+'//trim(Elecs(iEl)%name) 494 call sp_pvt(n,tmpSp2,r_tmp, PVT_CUTHILL_MCKEE, sub = full, start = start, & 495 priority = priority%r) 496 if ( orb_atom == 1 ) then 497 call tri(r_tmp) 498 else 499 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 500 call tri(r_El) 501 end if 502 503 fmethod = trim(corb)//'+rev-CM+priority+'//trim(Elecs(iEl)%name) 504 if ( IONode ) write(*,fmt) trim(corb),'rev-CM+priority+'//trim(Elecs(iEl)%name) 505 call rgn_reverse(r_tmp) 506 if ( orb_atom == 1 ) then 507 call tri(r_tmp) 508 else 509 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 510 call tri(r_El) 511 end if 512 513 fmethod = trim(corb)//'+PCG+'//trim(Elecs(iEl)%name) 514 if ( IONode ) write(*,fmt) trim(corb),'PCG+'//trim(Elecs(iEl)%name) 515 call sp_pvt(n,tmpSp2,r_tmp, PVT_PCG, sub = full, start = start) 516 if ( orb_atom == 1 ) then 517 call tri(r_tmp) 518 else 519 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 520 call tri(r_El) 521 end if 522 523 fmethod = trim(corb)//'+rev-PCG+'//trim(Elecs(iEl)%name) 524 if ( IONode ) write(*,fmt) trim(corb),'rev-PCG+'//trim(Elecs(iEl)%name) 525 call rgn_reverse(r_tmp) 526 if ( orb_atom == 1 ) then 527 call tri(r_tmp) 528 else 529 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 530 call tri(r_El) 531 end if 532 533 fmethod = trim(corb)//'+PCG+priority+'//trim(Elecs(iEl)%name) 534 if ( IONode ) write(*,fmt) trim(corb),'PCG+priority+'//trim(Elecs(iEl)%name) 535 call sp_pvt(n,tmpSp2,r_tmp, PVT_PCG, sub = full, start = start, priority = priority%r) 536 if ( orb_atom == 1 ) then 537 call tri(r_tmp) 538 else 539 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 540 call tri(r_El) 541 end if 542 543 fmethod = trim(corb)//'+rev-PCG+priority+'//trim(Elecs(iEl)%name) 544 if ( IONode ) write(*,fmt) trim(corb),'rev-PCG+priority+'//trim(Elecs(iEl)%name) 545 call rgn_reverse(r_tmp) 546 if ( orb_atom == 1 ) then 547 call tri(r_tmp) 548 else 549 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 550 call tri(r_El) 551 end if 552 553 end do 554 555 call rgn_delete(start) 556 557 558 fmethod = trim(corb)//'+GPS' 559 if ( IONode ) write(*,fmt) trim(corb),'GPS' 560 call sp_pvt(n,tmpSp2,r_tmp, PVT_GPS, sub = full) 561 if ( orb_atom == 1 ) then 562 call tri(r_tmp) 563 else 564 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 565 call tri(r_El) 566 end if 567 568 fmethod = trim(corb)//'+rev-GPS' 569 if ( IONode ) write(*,fmt) trim(corb),'rev-GPS' 570 call rgn_reverse(r_tmp) 571 if ( orb_atom == 1 ) then 572 call tri(r_tmp) 573 else 574 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 575 call tri(r_El) 576 end if 577 578 fmethod = trim(corb)//'+GPS+priority' 579 if ( IONode ) write(*,fmt) trim(corb),'GPS+priority' 580 call sp_pvt(n,tmpSp2,r_tmp, PVT_GPS, sub = full, priority = priority%r) 581 if ( orb_atom == 1 ) then 582 call tri(r_tmp) 583 else 584 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 585 call tri(r_El) 586 end if 587 588 fmethod = trim(corb)//'+rev-GPS+priority' 589 if ( IONode ) write(*,fmt) trim(corb),'rev-GPS+priority' 590 call rgn_reverse(r_tmp) 591 if ( orb_atom == 1 ) then 592 call tri(r_tmp) 593 else 594 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 595 call tri(r_El) 596 end if 597 598#ifdef TS_PVT_GGPS 599 ! Above pre-processor: 600 ! Undocumented feature, however the GGPS is ridicously 601 ! slow. So we have it disabled. 602 603 fmethod = trim(corb)//'+GGPS' 604 if ( IONode ) write(*,fmt) trim(corb),'GGPS' 605 call sp_pvt(n,tmpSp2,r_tmp, PVT_GGPS, sub = full) 606 if ( orb_atom == 1 ) then 607 call tri(r_tmp) 608 else 609 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 610 call tri(r_El) 611 end if 612 613 fmethod = trim(corb)//'+rev-GGPS' 614 if ( IONode ) write(*,fmt) trim(corb),'rev-GGPS' 615 call rgn_reverse(r_tmp) 616 if ( orb_atom == 1 ) then 617 call tri(r_tmp) 618 else 619 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 620 call tri(r_El) 621 end if 622 623 fmethod = trim(corb)//'+GGPS+priority' 624 if ( IONode ) write(*,fmt) trim(corb),'GGPS+priority' 625 call sp_pvt(n,tmpSp2,r_tmp, PVT_GGPS, sub = full, priority = priority%r) 626 if ( orb_atom == 1 ) then 627 call tri(r_tmp) 628 else 629 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 630 call tri(r_El) 631 end if 632 633 fmethod = trim(corb)//'+rev-GGPS+priority' 634 if ( IONode ) write(*,fmt) trim(corb),'rev-GGPS+priority' 635 call rgn_reverse(r_tmp) 636 if ( orb_atom == 1 ) then 637 call tri(r_tmp) 638 else 639 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 640 call tri(r_El) 641 end if 642#endif 643 644#ifdef SIESTA__METIS 645#ifdef TS_PVT_METIS 646 fmethod = trim(corb)//'+NodeND+priority' 647 if ( IONode ) write(*,fmt) trim(corb),'NodeND+priority' 648 call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS_NODEND, sub = full, priority = priority%r) 649 if ( orb_atom == 1 ) then 650 call tri(r_tmp) 651 else 652 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 653 call tri(r_El) 654 end if 655 656 fmethod = trim(corb)//'+rev-NodeND+priority' 657 if ( IONode ) write(*,fmt) trim(corb),'rev-NodeND+priority' 658 call rgn_reverse(r_tmp) 659 if ( orb_atom == 1 ) then 660 call tri(r_tmp) 661 else 662 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 663 call tri(r_El) 664 end if 665 666 fmethod = trim(corb)//'+PartGraphKway+priority' 667 if ( IONode ) write(*,fmt) trim(corb),'PartGraphKway+priority' 668 call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS_PARTGRAPHKWAY, sub = full, priority = priority%r) 669 if ( orb_atom == 1 ) then 670 call tri(r_tmp) 671 else 672 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 673 call tri(r_El) 674 end if 675 676 fmethod = trim(corb)//'+rev-PartGraphKway+priority' 677 if ( IONode ) write(*,fmt) trim(corb),'rev-PartGraphKway+priority' 678 call rgn_reverse(r_tmp) 679 if ( orb_atom == 1 ) then 680 call tri(r_tmp) 681 else 682 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 683 call tri(r_El) 684 end if 685 686 fmethod = trim(corb)//'+PartGraphRecursive+priority' 687 if ( IONode ) write(*,fmt) trim(corb),'PartGraphRecursive+priority' 688 call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS_PARTGRAPHRECURSIVE, sub = full, priority = priority%r) 689 if ( orb_atom == 1 ) then 690 call tri(r_tmp) 691 else 692 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 693 call tri(r_El) 694 end if 695 696 fmethod = trim(corb)//'+rev-PartGraphRecursive+priority' 697 if ( IONode ) write(*,fmt) trim(corb),'rev-PartGraphRecursive+priority' 698 call rgn_reverse(r_tmp) 699 if ( orb_atom == 1 ) then 700 call tri(r_tmp) 701 else 702 call rgn_atom2orb(r_tmp,na_u,lasto,r_El) 703 call tri(r_El) 704 end if 705 706#endif 707#endif 708 709 end do orb_atom_switch 710 711 call rgn_delete(r_tmp,r_El,full,priority) 712 713 call delete(tmpSp1) ! clean up 714 call delete(tmpSp2) 715 call delete(fdit) 716 717 if ( IONode ) then 718 write(*,*) ! new-line 719 write(*,*) ! new-line 720 write(*,'(a)') ' **********' 721 write(*,'(a)') ' * NOTE *' 722 write(*,'(a)') ' **********' 723 if ( trim(min_mem_method) == 'TOO LARGE' ) then 724 write(*,'(a)') ' All pivoting methods requires more elements than can be allocated' 725 write(*,'(a)') ' Therefore you cannot run your simulation using TranSiesta' 726 else 727 write(*,'(a)') ' This minimum memory pivoting scheme may not necessarily be the' 728 write(*,'(a)') ' best performing algorithm!' 729 write(*,'(a,/)') ' You should analyze the pivoting schemes!' 730 write(*,'(a)') ' Minimum memory required pivoting scheme:' 731 write(*,'(a,a)') ' TS.BTD.Pivot ', trim(min_mem_method) 732 write(*,'(a,en11.3,a)') ' Memory: ', min_mem, ' GB' 733 end if 734 write(*,*) ! new-line 735 end if 736 737 call timer('TS-analyze', 2) 738 739 contains 740 741 ! Print out all relevant information for this 742 ! pivoting scheme 743 subroutine tri(r_pvt) 744 use m_pivot_methods, only : bandwidth, profile 745 use fdf, only : fdf_overwrite 746 type(tRgn), intent(inout) :: r_pvt 747 748 integer :: bw, i 749 ! Possibly very large numbers 750 integer(i8b) :: prof, els, pad, work 751 logical :: is_suitable 752 type(tRgn) :: ctri 753 character(len=132) :: fname 754 real(dp) :: total 755 756 ! Only if it is defined 757 fname = fdf_get('TS.BTD.Output',' ') 758 if ( len_trim(fname) > 0 ) then 759 fname = 'TS.BTD.Output '//trim(fmethod) 760 call fdf_overwrite(fname) 761 end if 762 763 ! Create a new tri-diagonal matrix, do it in parallel 764 call ts_rgn2TriMat(N_Elec, Elecs, .true., & 765 fdit, tmpSp1, r_pvt, ctri%n, ctri%r, & 766 method, 0, par = .true. ) 767 768 ! Sort the pivoting table for the electrodes 769 ! such that we reduce the Gf.Gamma.Gf 770 ! However, this also makes it easier to 771 ! insert the self-energy as they become consecutive 772 ! in index, all-in-all, win-win! 773 call ts_pivot_tri_sort_El(nrows_g(tmpSp1), r_pvt, N_Elec, Elecs, ctri) 774 775 bw = bandwidth(no,n_nzs,ncol,l_ptr,l_col,r_pvt) 776 prof = profile(no,n_nzs,ncol,l_ptr,l_col,r_pvt) 777 if ( IONode ) then 778 write(*,'(tr3,a,t23,i10,/,tr3,a,t13,i20)') & 779 'Bandwidth: ',bw,'Profile: ',prof 780 end if 781 782 ! Calculate size of the tri-diagonal matrix 783 els = nnzs_tri_i8b(ctri%n,ctri%r) 784 ! check if there are overflows 785 if ( els > huge(1) ) then 786 write(*,'(tr3,a,i0,'' / '',i0)')'*** Number of elements exceeds integer limits [elements / max] ', & 787 els, huge(1) 788 write(*,'(tr3,a)')'*** Will not be able to use this pivoting scheme!' 789 is_suitable = .false. 790 else 791 is_suitable = .true. 792 end if 793 794 total = real(no_u_ts, dp) ** 2 795 if ( IONode ) then 796 call rgn_print(ctri, name = 'BTD partitions' , & 797 seq_max = 10 , indent = 3 , repeat = .true. ) 798 799 write(*,'(tr3,a,i0,'' / '',f10.3)') & 800 'BTD matrix block size [max] / [average]: ', & 801 maxval(ctri%r), sum(real(ctri%r)) / ctri%n 802 803 write(*,'(tr3,a,f9.5,'' %'')') & 804 'BTD matrix elements in % of full matrix: ', & 805 real(els,dp)/total * 100._dp 806 end if 807 808 if ( ts_A_method == TS_BTD_A_COLUMN ) then 809 ! Get the padding for the array to hold the entire column 810 call GFGGF_needed_worksize(ctri%n, ctri%r, & 811 N_Elec, Elecs, i, bw) 812 pad = i 813 work = bw 814 else 815 pad = 0 816 work = 0 817 end if 818 819 ! Total size of the system 820 total = size2gb(pad + work) + size2gb(els) * 2 821 if ( IONode ) then 822 write(*,'(tr3,a,t39,en11.3,a)') 'BTD x 2 MEMORY: ', & 823 size2gb(els) * 2, ' GB' 824 write(*,'(tr3,a,t39,en11.3,a)') 'Rough estimation of MEMORY: ', & 825 total,' GB' 826 end if 827 if ( total < min_mem .and. is_suitable ) then 828 min_mem = total 829 min_mem_method = fmethod 830 end if 831 do i = 1 , N_Elec 832 work = consecutive_Elec_orb(Elecs(i),r_pvt) 833 if ( IONode ) then 834 write(*,'(tr3,2a,t39,i0)') trim(Elecs(i)%name), & 835 ' splitting Gamma:',work-1 836 end if 837 end do 838 839 call rgn_delete(ctri) 840 841 end subroutine tri 842 843 function size2gb(i) result(b) 844 integer(i8b) :: i 845 real(dp) :: b 846 b = real(i, dp) * 16._dp / 1024._dp ** 3 847 end function size2gb 848 849 end subroutine ts_tri_analyze 850 851end module m_ts_tri_init 852