1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8! This code segment has been fully created by:
9! Nick Papior Andersen, 2014, nickpapior@gmail.com
10! Please conctact the author, prior to re-using this code.
11
12! This particular solution method relies on solving the GF
13! with the tri-diagonalization routine.
14! This will leverage memory usage and also the execution time.
15
16module m_ts_tri_init
17
18  use precision, only : dp, i8b
19  use m_region
20
21  use m_ts_electype
22
23  use m_ts_pivot
24
25  implicit none
26
27  ! arrays for containing the tri-diagonal matrix part sizes
28  type(tRgn), save :: c_Tri
29
30  public :: ts_tri_init
31  public :: ts_tri_analyze
32  public :: c_Tri
33
34  private
35
36contains
37
38  subroutine ts_tri_init( dit, sparse_pattern , N_Elec, Elecs, &
39       IsVolt, ucell, na_u, xa, lasto , nsc, isc_off, method )
40
41    use alloc, only : re_alloc, de_alloc
42    use parallel, only : IONode
43    use fdf, only : fdf_get, leqi
44    use fdf_extra
45#ifdef MPI
46    use mpi_siesta, only : MPI_Comm_Self
47#endif
48
49    use class_OrbitalDistribution
50    use class_Sparsity
51    use create_Sparsity_Union
52    use create_Sparsity_SC
53    use m_sparsity_handling
54
55    use m_pivot
56
57    use m_ts_electype
58    use m_ts_sparse, only : ts_sp_calculation
59
60    use m_ts_method ! has r_pvt
61
62    use m_ts_tri_common
63    use m_ts_rgn2trimat
64
65#ifdef TRANSIESTA_DEBUG
66    use m_ts_debug
67#endif
68
69    type(OrbitalDistribution), intent(inout) :: dit
70    type(Sparsity), intent(inout) :: sparse_pattern ! the local sparse pattern
71    integer, intent(in) :: N_Elec
72    type(Elec), intent(inout) :: Elecs(N_Elec)
73    logical, intent(in) :: IsVolt
74    real(dp), intent(in) :: ucell(3,3)
75    integer, intent(in) :: na_u, lasto(0:na_u)
76    real(dp), intent(in) :: xa(3,na_u)
77    integer, intent(in) :: nsc(3), isc_off(3,product(nsc))
78
79    ! The method used to partition the BTD format
80    integer, intent(in) :: method
81
82    type(OrbitalDistribution) :: fdit
83    type(Sparsity) :: tmpSp1, tmpSp2
84
85    integer :: idx, no
86    integer :: i, io, iEl, no_u_TS
87    integer(i8b) :: els
88    character(len=NAME_LEN) :: csort
89
90    ! Regions used for sorting the device region
91    type(tRgn) :: r_tmp
92
93    no_u_TS = nrows_g(sparse_pattern) - no_Buf
94
95    ! In order to ensure that the electrodes are in the
96    ! tri-diagonal sparsity pattern, we can easily create
97    ! the full sparsity pattern with the electrodes included
98    ! and then recreate the tri-diagonal sparsity pattern
99    ! This is probably the crudest way of doing it.
100#ifdef MPI
101    call newDistribution(nrows_g(sparse_pattern),MPI_Comm_Self,fdit, &
102         name='TranSiesta UC distribution')
103#else
104    call newDistribution(nrows_g(sparse_pattern),-1,fdit, &
105         name='TranSiesta UC distribution')
106#endif
107
108    ! We need to regenerate the sparsity pattern without the
109    ! electrode connections etc. this is because ts_sp_uc
110    ! has different size dependent on the electrodes bulk settings.
111    call ts_Sp_calculation(dit,sparse_pattern,N_Elec,Elecs, &
112         ucell, nsc, isc_off, tmpSp2)
113
114    call crtSparsity_SC(tmpSp2, tmpSp1, UC = .true. )
115
116    ! point to the local (SIESTA-UC) sparsity pattern arrays
117    call Sp_to_Spglobal(dit,tmpSp1,tmpSp2)
118
119    ! This works as creating a new sparsity deletes the previous
120    ! and as it is referenced several times it will not be actually
121    ! deleted...
122    do iEl = 1 , N_Elec
123
124       idx = Elecs(iEl)%idx_o
125       no  = TotUsedOrbs(Elecs(iEl))
126
127       ! we first create the super-set sparsity
128       tmpSp1 = tmpSp2
129       call crtSparsity_Union(fdit,tmpSp1, &
130            idx,idx,no,no, tmpSp2)
131
132       ! Create the o_inD
133       call rgn_range(Elecs(iEl)%o_inD,idx,idx+no-1)
134
135    end do
136    call delete(tmpSp1) ! clean up
137
138#ifdef TRANSIESTA_DEBUG
139    if(IONode)write(*,*)'Created TS-tri + elecs (4000)'
140    call sp_to_file(4000,tmpSp2)
141#endif
142
143    iEl = 1
144    do i = 2, N_Elec
145       if ( rgn_size(Elecs(iEl)%o_inD) < rgn_size(Elecs(i)%o_inD) ) then
146          iEl = i
147       end if
148    end do
149
150    ! Get sorting method, we default to sort
151    ! the BTD matrix according to the connection
152    ! scheme of the first electrode.
153    csort = fdf_get('TS.BTD.Pivot','atom+'//trim(Elecs(iEl)%name))
154    call ts_pivot( fdit, tmpSp2, &
155         N_Elec, Elecs, &
156         ucell, na_u, xa, lasto, &
157         r_pvt, csort)
158
159    ! In transiesta we can immediately calculate the
160    ! tri-diagonal matrix.
161    ! Then we can re-arrange the pivot indices in each block
162    ! to be as consecutive in each electrode as possible.
163    ! This will greatly speed up complex Gf.G.Gf matrix
164    ! products
165    call rgn_delete(c_Tri)
166
167    ! Get the current sorting method
168    if ( IONode ) &
169         write(*,'(/,2a)') 'transiesta: Determining an optimal &
170         &tri-matrix using: ',trim(csort)
171
172    ! Create a new tri-diagonal matrix, do it in parallel
173    call ts_rgn2TriMat(N_Elec, Elecs, IsVolt, &
174         fdit, tmpSp2, r_pvt, c_Tri%n, c_Tri%r, &
175         method, 0, par = .true. )
176    call delete(tmpSp2) ! clean-up
177    call delete(fdit)
178
179    ! Sort the pivoting table for the electrodes
180    ! such that we reduce the Gf.Gamma.Gf
181    ! However, this also makes it easier to
182    ! insert the self-energy as they become consecutive
183    ! in index
184    call ts_pivot_tri_sort_El(nrows_g(sparse_pattern), r_pvt, N_Elec, Elecs, c_Tri)
185
186    if ( c_Tri%n < 2 ) then
187       call die('Erroneous transiesta BTD format. &
188            &Check with the developers')
189    end if
190
191    ! Recalculate number of orbitals in TS
192    no_u_TS = nrows_g(sparse_pattern) - no_Buf
193
194    if ( r_pvt%n /= no_u_TS ) then
195       call die('Error in size estimation, the sparse pattern &
196            &removal is erroneous')
197    end if
198
199    ! Now r_pvt contains the sorted device region according to
200    ! electrode (1). (if asked for)
201    ! From this we can generate a "better" tri-diagonal matrix than
202    ! from the non-sorted one (provided that the user have provided
203    ! the atoms in sub-optimal order).
204
205    do i = 1 , N_Elec
206
207       idx = Elecs(i)%idx_o
208       no = TotUsedOrbs(Elecs(i))
209
210       ! Create the pivoting table for the electrodes
211       call rgn_init(r_tmp,no)
212       do io = 1 , no
213          ! equals the io'th orbital index in the
214          !           TS region.  io == ts_i
215          r_tmp%r(io) = rgn_pivot(r_pvt,idx-1+io)
216       end do
217
218       ! Sort it to be able to gather the indices
219       ! in the correct order
220       call rgn_sort(r_tmp)
221       ! pivot the o_inD back
222       call rgn_init(Elecs(i)%o_inD,no)
223       call rgn_init(Elecs(i)%inDpvt,no)
224       do io = 1 , no
225          Elecs(i)%o_inD%r(io)  = r_pvt%r(r_tmp%r(io))
226
227          ! Create the pivot table for the electrode scattering matrix
228          !  (1) = an electrode orbital which is seen first in the device
229          ! Note that this array's meaning is different in TBtrans
230          Elecs(i)%inDpvt%r(io) = Elecs(i)%o_inD%r(io) - idx + 1
231
232       end do
233
234    end do
235    call rgn_delete(r_tmp)
236
237
238    ! Calculate size of the tri-diagonal matrix
239    els = nnzs_tri_i8b(c_Tri%n,c_Tri%r)
240    ! check if there are overflows
241    if ( els > huge(1) ) then
242       call die('transiesta: Memory consumption is too large')
243    end if
244
245    if ( IONode ) then
246       write(*,'(a)') 'transiesta: Established a near-optimal partition &
247            &for the tri-diagonal matrix.'
248
249       call rgn_print(c_Tri, name = 'BTD partitions' , &
250            seq_max = 10 , repeat = .true. )
251
252       write(*,'(a,f9.5,'' %'')') &
253            'transiesta: Matrix elements in % of full matrix: ', &
254            real(els,dp)/real(no_u_TS**2,dp) * 100._dp
255    end if
256
257  end subroutine ts_tri_init
258
259  subroutine ts_tri_analyze( dit, sparse_pattern , N_Elec, Elecs, &
260       ucell, na_u, lasto , nsc, isc_off , method )
261
262    use parallel, only : IONode
263    use fdf, only : fdf_get, leqi
264#ifdef MPI
265    use mpi_siesta, only : MPI_Comm_Self
266#endif
267
268    use class_OrbitalDistribution
269    use class_Sparsity
270    use create_Sparsity_Union
271    use create_Sparsity_SC
272    use m_sparsity_handling
273
274    use m_pivot
275    use m_pivot_methods, only : sp2graphviz
276
277    use m_ts_electype
278    use m_ts_sparse, only : ts_sp_calculation
279
280    use m_ts_method ! r_pvt
281
282    use m_ts_tri_common
283    use m_ts_rgn2trimat
284
285#ifdef TRANSIESTA_DEBUG
286    use m_ts_debug
287#endif
288
289    type(OrbitalDistribution), intent(inout) :: dit
290    type(Sparsity), intent(inout) :: sparse_pattern ! the local sparse pattern
291    integer, intent(in) :: N_Elec
292    type(Elec), intent(inout) :: Elecs(N_Elec)
293    real(dp), intent(in) :: ucell(3,3)
294    integer, intent(in) :: na_u, lasto(0:na_u)
295    integer, intent(in) :: nsc(3), isc_off(3,product(nsc))
296
297    ! The method used to partition the BTD format
298    integer, intent(in) :: method
299
300    type(OrbitalDistribution) :: fdit
301    type(Sparsity) :: tmpSp1, tmpSp2
302
303    integer :: no_u_TS, i, iEl, no
304
305    integer :: n, n_nzs
306    integer, pointer :: ncol(:), l_ptr(:), l_col(:)
307
308    character(len=*), parameter :: fmt = '(/,''TS.BTD.Pivot '',a,''+'',a)'
309    character(len=64) :: fmethod
310    character(len=4) :: corb
311
312    ! Regions used for sorting the device region
313    type(tRgn) :: r_tmp, start, r_El, full, priority
314    integer :: orb_atom
315
316    ! Capture the min memory pivoting scheme
317    character(len=64) :: min_mem_method
318    real(dp) :: min_mem
319
320    call timer('TS-analyze', 1)
321
322    no_u_TS = nrows_g(sparse_pattern) - no_Buf
323
324    ! In order to ensure that the electrodes are in the
325    ! tri-diagonal sparsity pattern, we can easily create
326    ! the full sparsity pattern with the electrodes included
327    ! and then recreate the tri-diagonal sparsity pattern
328    ! This is probably the crudest way of doing it.
329#ifdef MPI
330    call newDistribution(nrows_g(sparse_pattern),MPI_Comm_Self,fdit, &
331         name='TranSiesta UC distribution')
332#else
333    call newDistribution(nrows_g(sparse_pattern),-1,fdit, &
334         name='TranSiesta UC distribution')
335#endif
336
337    ! We need to regenerate the sparsity pattern without the
338    ! electrode connections etc. this is because ts_sp_uc
339    ! has different size dependent on the electrodes bulk settings.
340    call ts_Sp_calculation(dit,sparse_pattern,N_Elec,Elecs, &
341         ucell, nsc, isc_off, tmpSp2)
342
343    call crtSparsity_SC(tmpSp2, tmpSp1, UC = .TRUE. )
344
345    ! point to the local (SIESTA-UC) sparsity pattern arrays
346    call Sp_to_Spglobal(dit,tmpSp1,tmpSp2)
347
348    do iEl = 1 , N_Elec
349
350       i  = Elecs(iEl)%idx_o
351       no = TotUsedOrbs(Elecs(iEl))
352
353       ! we first create the super-set sparsity
354       tmpSp1 = tmpSp2
355       call crtSparsity_Union(fdit,tmpSp1, &
356            i,i,no,no, tmpSp2)
357
358       ! Create the o_inD
359       call rgn_range(Elecs(iEl)%o_inD,i,i+no-1)
360
361    end do
362
363    ! Write out all pivoting etc. analysis steps
364    if ( IONode ) write(*,'(/,a)') 'transiesta: BTD analysis'
365
366    ! Initialize the min_mem
367    min_mem = huge(1._dp)
368    min_mem_method = 'TOO LARGE'
369
370    ! Make a copy
371    tmpSp1 = tmpSp2
372
373    ! Attach the sparsity pattern of the orbitals
374    ! later (tmpSp2) may be atom
375    call attach(tmpSp1, n_col = ncol, list_ptr = l_ptr, &
376         list_col = l_col , nrows_g = no , nnzs = n_nzs )
377
378    fmethod = 'orb+none'
379    if ( IONode ) write(*,fmt) 'orb','none'
380    call tri(r_pvt)
381
382    orb_atom_switch: do orb_atom = 1 , 2
383    ! The user can skip the orbital analysis if it takes too long
384    if ( orb_atom == 1 ) then
385       ! We default to only looking at the atomic sparsity
386       ! pattern. This is *much* faster and does provide
387       ! a very near optimal sparse pattern.
388       ! The user can select to do both.
389       if ( leqi(fdf_get('TS.BTD.Analyze','atom'),'atom') ) cycle
390       corb = 'orb'
391
392       call rgn_copy(r_pvt, full)
393
394    else
395       corb = 'atom'
396
397       ! Convert the sparsity pattern to the atom
398       call SpOrb_to_SpAtom(fdit,tmpSp1,na_u,lasto,tmpSp2)
399       ! *** the distribution will always
400       !     be bigger than for the atoms, hence we need
401       !     not re-construct it ***
402
403       ! Reduce the searching place of atoms
404       call rgn_copy(r_aC, full)
405
406    end if
407
408    ! Sort the device region
409    call rgn_sort(full)
410
411    n = nrows_g(tmpSp2)
412
413    ! Create priority list
414    call rgn_init(priority,no)
415    call crt_El_priority(N_Elec,Elecs,priority, &
416         na_u,lasto,is_orb = orb_atom == 1 )
417
418    if ( fdf_get('TS.Analyze.Graphviz', .false.) ) then
419       ! Attach sparsity pattern to current designation ('atom' == atom sp)
420       call attach(tmpSp2, n_col = ncol, list_ptr = l_ptr, &
421            list_col = l_col , nnzs = n_nzs )
422       call rgn_init(r_El,n)
423       r_El%r(:) = 0
424       do i = 1 , n
425          if ( orb_atom == 1 ) then
426             r_El%r(i) = orb_type(i)
427          else
428             r_El%r(i) = atom_type(i)
429          end if
430       end do
431       if ( IONode ) &
432            call sp2graphviz('GRAPHVIZ_'//trim(corb)//'.gv', &
433            n,n_nzs,ncol,l_ptr,l_col, types = r_El%r )
434    end if
435
436    ! Attach the sparsity pattern of the orbitals
437    call attach(tmpSp1, n_col = ncol, list_ptr = l_ptr, &
438         list_col = l_col , nrows_g = no , nnzs = n_nzs )
439
440
441    ! *** Start analyzing sparsity pattern
442
443    do iEl = 1, N_Elec
444
445       if ( orb_atom == 1 ) then
446          call rgn_copy(Elecs(iEl)%o_inD, start)
447       else
448          ! transfer to atom
449          call rgn_Orb2Atom(Elecs(iEl)%o_inD,na_u,lasto,start)
450       end if
451
452       fmethod = trim(corb)//'+'//trim(Elecs(iEl)%name)
453       if ( IONode ) write(*,fmt) trim(corb),trim(Elecs(iEl)%name)
454       call sp_pvt(n,tmpSp2,r_tmp, PVT_CONNECT, sub = full, start = start)
455       if ( orb_atom == 1 ) then
456          call tri(r_tmp)
457       else
458          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
459          call tri(r_El)
460       end if
461
462       fmethod = trim(corb)//'+rev-'//trim(Elecs(iEl)%name)
463       if ( IONode ) write(*,fmt) trim(corb),'rev-'//trim(Elecs(iEl)%name)
464       call rgn_reverse(r_tmp)
465       if ( orb_atom == 1 ) then
466          call tri(r_tmp)
467       else
468          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
469          call tri(r_El)
470       end if
471
472       fmethod = trim(corb)//'+CM+'//trim(Elecs(iEl)%name)
473       if ( IONode ) write(*,fmt) trim(corb),'CM+'//trim(Elecs(iEl)%name)
474       call sp_pvt(n,tmpSp2,r_tmp, PVT_CUTHILL_MCKEE, sub = full, start = start)
475       if ( orb_atom == 1 ) then
476          call tri(r_tmp)
477       else
478          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
479          call tri(r_El)
480       end if
481
482       fmethod = trim(corb)//'+rev-CM+'//trim(Elecs(iEl)%name)
483       if ( IONode ) write(*,fmt) trim(corb),'rev-CM+'//trim(Elecs(iEl)%name)
484       call rgn_reverse(r_tmp)
485       if ( orb_atom == 1 ) then
486          call tri(r_tmp)
487       else
488          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
489          call tri(r_El)
490       end if
491
492       fmethod = trim(corb)//'+CM+priority+'//trim(Elecs(iEl)%name)
493       if ( IONode ) write(*,fmt) trim(corb),'CM+priority+'//trim(Elecs(iEl)%name)
494       call sp_pvt(n,tmpSp2,r_tmp, PVT_CUTHILL_MCKEE, sub = full, start = start, &
495            priority = priority%r)
496       if ( orb_atom == 1 ) then
497          call tri(r_tmp)
498       else
499          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
500          call tri(r_El)
501       end if
502
503       fmethod = trim(corb)//'+rev-CM+priority+'//trim(Elecs(iEl)%name)
504       if ( IONode ) write(*,fmt) trim(corb),'rev-CM+priority+'//trim(Elecs(iEl)%name)
505       call rgn_reverse(r_tmp)
506       if ( orb_atom == 1 ) then
507          call tri(r_tmp)
508       else
509          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
510          call tri(r_El)
511       end if
512
513       fmethod = trim(corb)//'+PCG+'//trim(Elecs(iEl)%name)
514       if ( IONode ) write(*,fmt) trim(corb),'PCG+'//trim(Elecs(iEl)%name)
515       call sp_pvt(n,tmpSp2,r_tmp, PVT_PCG, sub = full, start = start)
516       if ( orb_atom == 1 ) then
517          call tri(r_tmp)
518       else
519          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
520          call tri(r_El)
521       end if
522
523       fmethod = trim(corb)//'+rev-PCG+'//trim(Elecs(iEl)%name)
524       if ( IONode ) write(*,fmt) trim(corb),'rev-PCG+'//trim(Elecs(iEl)%name)
525       call rgn_reverse(r_tmp)
526       if ( orb_atom == 1 ) then
527          call tri(r_tmp)
528       else
529          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
530          call tri(r_El)
531       end if
532
533       fmethod = trim(corb)//'+PCG+priority+'//trim(Elecs(iEl)%name)
534       if ( IONode ) write(*,fmt) trim(corb),'PCG+priority+'//trim(Elecs(iEl)%name)
535       call sp_pvt(n,tmpSp2,r_tmp, PVT_PCG, sub = full, start = start, priority = priority%r)
536       if ( orb_atom == 1 ) then
537          call tri(r_tmp)
538       else
539          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
540          call tri(r_El)
541       end if
542
543       fmethod = trim(corb)//'+rev-PCG+priority+'//trim(Elecs(iEl)%name)
544       if ( IONode ) write(*,fmt) trim(corb),'rev-PCG+priority+'//trim(Elecs(iEl)%name)
545       call rgn_reverse(r_tmp)
546       if ( orb_atom == 1 ) then
547          call tri(r_tmp)
548       else
549          call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
550          call tri(r_El)
551       end if
552
553    end do
554
555    call rgn_delete(start)
556
557
558    fmethod = trim(corb)//'+GPS'
559    if ( IONode ) write(*,fmt) trim(corb),'GPS'
560    call sp_pvt(n,tmpSp2,r_tmp, PVT_GPS, sub = full)
561    if ( orb_atom == 1 ) then
562       call tri(r_tmp)
563    else
564       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
565       call tri(r_El)
566    end if
567
568    fmethod = trim(corb)//'+rev-GPS'
569    if ( IONode ) write(*,fmt) trim(corb),'rev-GPS'
570    call rgn_reverse(r_tmp)
571    if ( orb_atom == 1 ) then
572       call tri(r_tmp)
573    else
574       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
575       call tri(r_El)
576    end if
577
578    fmethod = trim(corb)//'+GPS+priority'
579    if ( IONode ) write(*,fmt) trim(corb),'GPS+priority'
580    call sp_pvt(n,tmpSp2,r_tmp, PVT_GPS, sub = full, priority = priority%r)
581    if ( orb_atom == 1 ) then
582       call tri(r_tmp)
583    else
584       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
585       call tri(r_El)
586    end if
587
588    fmethod = trim(corb)//'+rev-GPS+priority'
589    if ( IONode ) write(*,fmt) trim(corb),'rev-GPS+priority'
590    call rgn_reverse(r_tmp)
591    if ( orb_atom == 1 ) then
592       call tri(r_tmp)
593    else
594       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
595       call tri(r_El)
596    end if
597
598#ifdef TS_PVT_GGPS
599    ! Above pre-processor:
600    ! Undocumented feature, however the GGPS is ridicously
601    ! slow. So we have it disabled.
602
603    fmethod = trim(corb)//'+GGPS'
604    if ( IONode ) write(*,fmt) trim(corb),'GGPS'
605    call sp_pvt(n,tmpSp2,r_tmp, PVT_GGPS, sub = full)
606    if ( orb_atom == 1 ) then
607       call tri(r_tmp)
608    else
609       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
610       call tri(r_El)
611    end if
612
613    fmethod = trim(corb)//'+rev-GGPS'
614    if ( IONode ) write(*,fmt) trim(corb),'rev-GGPS'
615    call rgn_reverse(r_tmp)
616    if ( orb_atom == 1 ) then
617       call tri(r_tmp)
618    else
619       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
620       call tri(r_El)
621    end if
622
623    fmethod = trim(corb)//'+GGPS+priority'
624    if ( IONode ) write(*,fmt) trim(corb),'GGPS+priority'
625    call sp_pvt(n,tmpSp2,r_tmp, PVT_GGPS, sub = full, priority = priority%r)
626    if ( orb_atom == 1 ) then
627       call tri(r_tmp)
628    else
629       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
630       call tri(r_El)
631    end if
632
633    fmethod = trim(corb)//'+rev-GGPS+priority'
634    if ( IONode ) write(*,fmt) trim(corb),'rev-GGPS+priority'
635    call rgn_reverse(r_tmp)
636    if ( orb_atom == 1 ) then
637       call tri(r_tmp)
638    else
639       call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
640       call tri(r_El)
641    end if
642#endif
643
644#ifdef SIESTA__METIS
645#ifdef TS_PVT_METIS
646    fmethod = trim(corb)//'+NodeND+priority'
647    if ( IONode ) write(*,fmt) trim(corb),'NodeND+priority'
648    call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS_NODEND, sub = full, priority = priority%r)
649    if ( orb_atom == 1 ) then
650      call tri(r_tmp)
651    else
652      call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
653      call tri(r_El)
654    end if
655
656    fmethod = trim(corb)//'+rev-NodeND+priority'
657    if ( IONode ) write(*,fmt) trim(corb),'rev-NodeND+priority'
658    call rgn_reverse(r_tmp)
659    if ( orb_atom == 1 ) then
660      call tri(r_tmp)
661    else
662      call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
663      call tri(r_El)
664    end if
665
666    fmethod = trim(corb)//'+PartGraphKway+priority'
667    if ( IONode ) write(*,fmt) trim(corb),'PartGraphKway+priority'
668    call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS_PARTGRAPHKWAY, sub = full, priority = priority%r)
669    if ( orb_atom == 1 ) then
670      call tri(r_tmp)
671    else
672      call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
673      call tri(r_El)
674    end if
675
676    fmethod = trim(corb)//'+rev-PartGraphKway+priority'
677    if ( IONode ) write(*,fmt) trim(corb),'rev-PartGraphKway+priority'
678    call rgn_reverse(r_tmp)
679    if ( orb_atom == 1 ) then
680      call tri(r_tmp)
681    else
682      call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
683      call tri(r_El)
684    end if
685
686    fmethod = trim(corb)//'+PartGraphRecursive+priority'
687    if ( IONode ) write(*,fmt) trim(corb),'PartGraphRecursive+priority'
688    call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS_PARTGRAPHRECURSIVE, sub = full, priority = priority%r)
689    if ( orb_atom == 1 ) then
690      call tri(r_tmp)
691    else
692      call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
693      call tri(r_El)
694    end if
695
696    fmethod = trim(corb)//'+rev-PartGraphRecursive+priority'
697    if ( IONode ) write(*,fmt) trim(corb),'rev-PartGraphRecursive+priority'
698    call rgn_reverse(r_tmp)
699    if ( orb_atom == 1 ) then
700      call tri(r_tmp)
701    else
702      call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
703      call tri(r_El)
704    end if
705
706#endif
707#endif
708
709    end do orb_atom_switch
710
711    call rgn_delete(r_tmp,r_El,full,priority)
712
713    call delete(tmpSp1) ! clean up
714    call delete(tmpSp2)
715    call delete(fdit)
716
717    if ( IONode ) then
718       write(*,*) ! new-line
719       write(*,*) ! new-line
720       write(*,'(a)') ' **********'
721       write(*,'(a)') ' *  NOTE  *'
722       write(*,'(a)') ' **********'
723       if ( trim(min_mem_method) == 'TOO LARGE' ) then
724         write(*,'(a)') ' All pivoting methods requires more elements than can be allocated'
725         write(*,'(a)') ' Therefore you cannot run your simulation using TranSiesta'
726       else
727         write(*,'(a)') ' This minimum memory pivoting scheme may not necessarily be the'
728         write(*,'(a)') ' best performing algorithm!'
729         write(*,'(a,/)') ' You should analyze the pivoting schemes!'
730         write(*,'(a)') ' Minimum memory required pivoting scheme:'
731         write(*,'(a,a)') '  TS.BTD.Pivot ', trim(min_mem_method)
732         write(*,'(a,en11.3,a)') '  Memory: ', min_mem, ' GB'
733       end if
734       write(*,*) ! new-line
735    end if
736
737    call timer('TS-analyze', 2)
738
739  contains
740
741    ! Print out all relevant information for this
742    ! pivoting scheme
743    subroutine tri(r_pvt)
744      use m_pivot_methods, only : bandwidth, profile
745      use fdf, only : fdf_overwrite
746      type(tRgn), intent(inout) :: r_pvt
747
748      integer :: bw, i
749      ! Possibly very large numbers
750      integer(i8b) :: prof, els, pad, work
751      logical :: is_suitable
752      type(tRgn) :: ctri
753      character(len=132) :: fname
754      real(dp) :: total
755
756      ! Only if it is defined
757      fname = fdf_get('TS.BTD.Output',' ')
758      if ( len_trim(fname) > 0 ) then
759         fname = 'TS.BTD.Output '//trim(fmethod)
760         call fdf_overwrite(fname)
761      end if
762
763      ! Create a new tri-diagonal matrix, do it in parallel
764      call ts_rgn2TriMat(N_Elec, Elecs, .true., &
765           fdit, tmpSp1, r_pvt, ctri%n, ctri%r, &
766           method, 0, par = .true. )
767
768      ! Sort the pivoting table for the electrodes
769      ! such that we reduce the Gf.Gamma.Gf
770      ! However, this also makes it easier to
771      ! insert the self-energy as they become consecutive
772      ! in index, all-in-all, win-win!
773      call ts_pivot_tri_sort_El(nrows_g(tmpSp1), r_pvt, N_Elec, Elecs, ctri)
774
775      bw   = bandwidth(no,n_nzs,ncol,l_ptr,l_col,r_pvt)
776      prof = profile(no,n_nzs,ncol,l_ptr,l_col,r_pvt)
777      if ( IONode ) then
778         write(*,'(tr3,a,t23,i10,/,tr3,a,t13,i20)') &
779              'Bandwidth: ',bw,'Profile: ',prof
780      end if
781
782      ! Calculate size of the tri-diagonal matrix
783      els = nnzs_tri_i8b(ctri%n,ctri%r)
784      ! check if there are overflows
785      if ( els > huge(1) ) then
786        write(*,'(tr3,a,i0,'' / '',i0)')'*** Number of elements exceeds integer limits [elements / max] ', &
787            els, huge(1)
788        write(*,'(tr3,a)')'*** Will not be able to use this pivoting scheme!'
789        is_suitable = .false.
790      else
791        is_suitable = .true.
792      end if
793
794      total = real(no_u_ts, dp) ** 2
795      if ( IONode ) then
796         call rgn_print(ctri, name = 'BTD partitions' , &
797              seq_max = 10 , indent = 3 , repeat = .true. )
798
799         write(*,'(tr3,a,i0,'' / '',f10.3)') &
800              'BTD matrix block size [max] / [average]: ', &
801              maxval(ctri%r), sum(real(ctri%r)) / ctri%n
802
803         write(*,'(tr3,a,f9.5,'' %'')') &
804              'BTD matrix elements in % of full matrix: ', &
805              real(els,dp)/total * 100._dp
806      end if
807
808      if ( ts_A_method == TS_BTD_A_COLUMN ) then
809         ! Get the padding for the array to hold the entire column
810         call GFGGF_needed_worksize(ctri%n, ctri%r, &
811              N_Elec, Elecs, i, bw)
812         pad = i
813         work = bw
814      else
815         pad = 0
816         work = 0
817      end if
818
819      ! Total size of the system
820      total = size2gb(pad + work) + size2gb(els) * 2
821      if ( IONode ) then
822         write(*,'(tr3,a,t39,en11.3,a)') 'BTD x 2 MEMORY: ', &
823              size2gb(els) * 2, ' GB'
824         write(*,'(tr3,a,t39,en11.3,a)') 'Rough estimation of MEMORY: ', &
825              total,' GB'
826      end if
827      if ( total < min_mem .and. is_suitable ) then
828         min_mem = total
829         min_mem_method = fmethod
830      end if
831      do i = 1 , N_Elec
832         work = consecutive_Elec_orb(Elecs(i),r_pvt)
833         if ( IONode ) then
834            write(*,'(tr3,2a,t39,i0)') trim(Elecs(i)%name), &
835                 ' splitting Gamma:',work-1
836         end if
837      end do
838
839      call rgn_delete(ctri)
840
841    end subroutine tri
842
843    function size2gb(i) result(b)
844      integer(i8b) :: i
845      real(dp) :: b
846      b = real(i, dp) * 16._dp / 1024._dp ** 3
847    end function size2gb
848
849  end subroutine ts_tri_analyze
850
851end module m_ts_tri_init
852