1*
2* $Id$
3*
4
5*     ***********************************
6*     *					*
7*     *		geodesic_init		*
8*     *					*
9*     ***********************************
10*
11*     Uses - geodesic common block
12*
13
14      subroutine geodesic_init()
15      implicit none
16
17#include "errquit.fh"
18#include "bafdecls.fh"
19
20*     **** geodesic common block ***
21      integer U(2)
22      integer Vt(2)
23      integer S(2)
24      common / geodesic_block / U,Vt,S
25
26*     **** local variables ****
27      logical value
28      integer npack1
29
30
31*     **** external functions ****
32      integer  psi_ne,psi_neq
33      external psi_ne,psi_neq
34
35      logical  Dneall_m_allocate
36      external Dneall_m_allocate
37
38      call Pack_npack(1,npack1)
39c      nemax = psi_ne(1)+psi_ne(2)
40c      nelc1 = psi_ne(1)
41
42      value = BA_alloc_get(mt_dcpl,npack1*(psi_neq(1)+psi_neq(2)),
43     >                     'U',U(2),U(1))
44
45      value = value.and.Dneall_m_allocate(0,Vt)
46c      value = value.and.
47c     >        BA_alloc_get(mt_dbl,2*nelc1*nelc1,
48c     >                     'Vt',Vt(2),Vt(1))
49
50      value = value.and.
51     >        BA_alloc_get(mt_dbl,(psi_ne(1)+psi_ne(2)),
52     >                     'S',S(2),S(1))
53      if (.not. value) call errquit('out of heap memory',0, MA_ERR)
54
55      return
56      end
57
58*     ***********************************
59*     *					*
60*     *		geodesic_finalize	*
61*     *					*
62*     ***********************************
63*
64*     Uses - geodesic common block
65*
66
67      subroutine geodesic_finalize()
68      implicit none
69#include "errquit.fh"
70
71#include "bafdecls.fh"
72
73*     **** geodesic common block ***
74      integer U(2)
75      integer Vt(2)
76      integer S(2)
77      common / geodesic_block / U,Vt,S
78
79*     **** local variables ****
80      logical value
81      logical  Dneall_m_free
82      external Dneall_m_free
83
84      value =           BA_free_heap(S(2))
85      value = value.and.Dneall_m_free(Vt)
86c      value = value.and.BA_free_heap(Vt(2))
87      value = value.and.BA_free_heap(U(2))
88      if (.not. value) call errquit('error freeing of heap memory',0,
89     &       MA_ERR)
90
91      return
92      end
93
94
95
96*     ***********************************
97*     *					*
98*     *		geodesic_start		*
99*     *					*
100*     ***********************************
101*
102*     This routine initializes the geodesic module
103* for a linesearch.  Basically this routine just
104* calculates the SVD decomposition of the search direction,
105* A=HY-Y(Y^tHY) or A=(determined from CG). The only requirement
106* of the search direction is that it is tangent to the direction
107* spanned by Y.   It returns the maximum value in the diagonal
108* Sigma matrix, and it also returns the linegradient determined
109* by the direction A.
110*
111*     Entry - A: gradient
112*             SA: S*gradient if paw
113*     Exit  - max_sigma:
114*             dE:
115*             SA: S*U if paw
116*     Uses - geodesic common block
117*
118
119      subroutine geodesic_start(A,max_sigma,dE)
120      implicit none
121      complex*16 A(*)
122      real*8     max_sigma,dE
123
124#include "bafdecls.fh"
125#include "errquit.fh"
126
127*     **** geodesic common block ***
128      integer U(2)
129      integer Vt(2)
130      integer S(2)
131      common / geodesic_block / U,Vt,S
132
133      integer spsi1(2),spsi2(2)
134      common / psi_paw_block / spsi1,spsi2
135
136*     **** local variables ****
137      integer i,npack1,V(2),ispin,neq(2)
138      real*8 de_private
139
140*     **** external functions ****
141      logical  Dneall_m_push_get,Dneall_m_pop_stack,psp_pawexist
142      integer  psi_ispin,psi_ne,psi_neq
143      real*8   electron_eorbit_noocc
144      external Dneall_m_push_get,Dneall_m_pop_stack,psp_pawexist
145      external psi_ispin,psi_ne,psi_neq
146      external electron_eorbit_noocc
147
148      call nwpw_timing_start(10)
149      call Pack_npack(1,npack1)
150
151*     **** allocate tmp space ****
152      if (.not.Dneall_m_push_get(0,V))
153     >   call errquit('geodesic_start:out of stack memory',0,MA_ERR)
154
155*     **** HomeGrown SVD ****
156      if (psp_pawexist()) then
157         ispin = psi_ispin()
158         neq(1)= psi_neq(1)
159         neq(2)= psi_neq(2)
160         call psp_overlap_S(ispin,neq,A,dcpl_mb(spsi1(1)))
161         call Dneall_f_SVD_ASA1(0,A,
162     >                          dcpl_mb(spsi1(1)),
163     >                          dcpl_mb(U(1)),npack1,
164     >                          dbl_mb(S(1)),dbl_mb(V(1)))
165         call psp_overlap_S(ispin,neq,dcpl_mb(U(1)),dcpl_mb(spsi1(1)))
166         call Dneall_f_SVD_ASA2(0,dcpl_mb(U(1)),
167     >                          dcpl_mb(spsi1(1)),npack1)
168
169      else
170         call Dneall_f_SVD(0,A,dcpl_mb(U(1)),npack1,
171     >                     dbl_mb(S(1)),dbl_mb(V(1)))
172      end if
173
174      max_sigma = 0.0d0
175      do i=1,(psi_ne(1)+psi_ne(2))
176        if (dabs(dbl_mb(S(1)+i-1)).gt.max_sigma)
177     >      max_sigma = dabs(dbl_mb(S(1)+i-1))
178      end do
179
180*     **** calculate Vt ****
181      call Dneall_mm_transpose(0,dbl_mb(V(1)),dbl_mb(Vt(1)))
182
183*     **** calculate 2*<A|H|psi> ****
184      de_private = 2.0d0*electron_eorbit_noocc(A)
185
186      dE = de_private
187
188*     **** deallocate tmp space ****
189      if (.not.Dneall_m_pop_stack(V))
190     >   call errquit('geodesic_start:error popping stack',0,MA_ERR)
191
192      call nwpw_timing_end(10)
193
194      return
195      end
196
197      subroutine pspw_calc_Vt(n,A,B)
198      implicit none
199      integer n
200      real*8 A(n,n)
201      real*8 B(n,n)
202      integer i,j
203
204      do j=1,n
205      do i=1,n
206        A(i,j) = B(j,i)
207      end do
208      end do
209
210      return
211      end
212
213
214*     ***********************************
215*     *					*
216*     *		geodesic_get		*
217*     *					*
218*     ***********************************
219*
220*     Uses - geodesic common block
221*
222
223      subroutine geodesic_get(t,Yold,Ynew)
224      implicit none
225      real*8     t
226      complex*16 Yold(*)
227      complex*16 Ynew(*)
228
229#include "bafdecls.fh"
230#include "errquit.fh"
231
232*     **** geodesic common block ***
233      integer U(2)
234      integer Vt(2)
235      integer S(2)
236      common / geodesic_block / U,Vt,S
237
238      integer spsi1(2),spsi2(2)
239      common / psi_paw_block / spsi1,spsi2
240
241*     **** local variables ****
242      logical    value
243      integer    npack1,nemax,ispin,ne(2),neq(2),shift,ms
244      real*8     zero,one
245      integer    tmp1(2),tmp2(2),tmp3(2)
246      integer    tmpC(2),tmpS(2)
247c      real*8     sum1,sum2,sum3
248      real*8     sum1
249
250      real*8 sum2(2)
251      common /geodescic_sum2/ sum2
252
253      integer    taskid, MASTER
254      parameter  (MASTER=0)
255
256*     **** external functions ****
257      integer  psi_ispin,psi_ne,psi_neq
258      external psi_ispin,psi_ne,psi_neq
259      logical  Dneall_m_push_get,Dneall_m_pop_stack
260      external Dneall_m_push_get,Dneall_m_pop_stack
261      logical  psp_pawexist
262      external psp_pawexist
263
264      call nwpw_timing_start(10)
265      zero = 0.0d0
266      one  = 1.0d0
267      call Pack_npack(1,npack1)
268      ispin  = psi_ispin()
269      ne(1)  = psi_ne(1)
270      ne(2)  = psi_ne(2)
271      neq(1) = psi_neq(1)
272      neq(2) = psi_neq(2)
273      nemax  = ne(1) + ne(2)
274
275*     **** allocate tmp space ****
276      value =           Dneall_m_push_get(0,tmp1)
277      value = value.and.Dneall_m_push_get(0,tmp2)
278      value = value.and.Dneall_m_push_get(0,tmp3)
279      value = value.and.BA_push_get(mt_dbl,nemax,'tmpC',tmpC(2),tmpC(1))
280      value = value.and.BA_push_get(mt_dbl,nemax,'tmpS',tmpS(2),tmpS(1))
281      if (.not.value) call errquit('geodesic_get:out of stack',0,MA_ERR)
282
283
284      call Dneall_mm_SCtimesVtrans(0,t,dbl_mb(S(1)),
285     >                                dbl_mb(Vt(1)),
286     >                                dbl_mb(tmp1(1)),
287     >                                dbl_mb(tmp3(1)),
288     >                                dbl_mb(tmpC(1)),
289     >                                dbl_mb(tmpS(1)))
290
291
292      call Dneall_mmm_Multiply2(0,dbl_mb(Vt(1)),
293     >                           dbl_mb(tmp1(1)),
294     >                           dbl_mb(tmp2(1)))
295
296      call Dneall_fmf_Multiply(0,Yold,npack1,
297     >                          dbl_mb(tmp2(1)),1.0d0,
298     >                          Ynew,0.0d0)
299
300      call Dneall_fmf_Multiply(0,dcpl_mb(U(1)),npack1,
301     >                          dbl_mb(tmp3(1)),1.0d0,
302     >                          Ynew,1.0d0)
303
304
305
306!$OMP BARRIER
307*     **** Orthonormality Check ****
308      if (psp_pawexist()) then
309         call psp_overlap_S(ispin,neq,Ynew,dcpl_mb(spsi1(1)))
310         do ms=1,ispin
311            shift = 1 + (ms-1)*neq(1)*npack1
312            call Grsm_gg_itrace(npack1,neq(ms),
313     >                       Ynew(shift),
314     >                       dcpl_mb(spsi1(1)+shift-1),sum2(ms))
315         end do
316!$OMP BARRIER
317         call Parallel_Vector_SumAll(ispin,sum2)
318         do ms=1,ispin
319            sum1 = dble(ne(ms))
320            if (dabs(sum2(ms)-sum1).gt.1.0d-10) then
321                shift = 1 + (ms-1)*neq(1)*npack1
322               call Dneall_f_Sortho(ms,Ynew,dcpl_mb(spsi1(1)),npack1)
323            end if
324         end do
325      else
326         do ms=1,ispin
327            shift = 1 + (ms-1)*neq(1)*npack1
328            call Grsm_gg_itrace(npack1,neq(ms),
329     >                          Ynew(shift),Ynew(shift),sum2(ms))
330         end do
331!$OMP BARRIER
332         call Parallel_Vector_SumAll(ispin,sum2)
333         do ms=1,ispin
334            sum1 = dble(ne(ms))
335            !write(*,*) "sum1,sum2=",sum1,sum2(ms),dabs(sum2(ms)-sum1)
336            if (dabs(sum2(ms)-sum1).gt.1.0d-10) then
337                shift = 1 + (ms-1)*neq(1)*npack1
338            !write(*,*) "INTO GRAMSCHMIDT"
339               call Dneall_f_GramSchmidt(ms,Ynew,npack1)
340            !write(*,*) "OUT GRAMSCHMIDT"
341            end if
342         end do
343      end if
344!$OMP BARRIER
345
346
347*     **** deallocate tmp space ****
348      value = BA_pop_stack(tmpS(2))
349      value = value.and.BA_pop_stack(tmpC(2))
350      value = value.and.Dneall_m_pop_stack(tmp3)
351      value = value.and.Dneall_m_pop_stack(tmp2)
352      value = value.and.Dneall_m_pop_stack(tmp1)
353      if (.not. value)
354     > call errquit('geodesic:get:error popping stack memory',0,MA_ERR)
355
356      call nwpw_timing_end(10)
357
358      return
359      end
360
361
362
363
364*     ***********************************
365*     *					*
366*     *		SCtimesVtrans		*
367*     *					*
368*     ***********************************
369
370      subroutine SCtimesVtrans(t,n,S,Vt,A,B,scal1,scal2)
371      implicit none
372      real*8 t
373      integer n
374      real*8  S(n),Vt(n,n)
375      real*8  A(n,n),B(n,n)
376      real*8 scal1(n),scal2(n)
377
378      integer j,k
379
380      do j=1,n
381        scal1(j) = dcos(S(j)*t)
382        scal2(j) = dsin(S(j)*t)
383      end do
384
385      do k=1,n
386      do j=1,n
387          A(j,k) = scal1(j)*Vt(j,k)
388          B(j,k) = scal2(j)*Vt(j,k)
389      end do
390      end do
391
392      return
393      end
394
395
396*     ***********************************
397*     *					*
398*     *		geodesic_transport	*
399*     *					*
400*     ***********************************
401*
402*     Uses - geodesic common block
403*
404
405      subroutine geodesic_transport(t,Yold,Ynew)
406      implicit none
407      real*8     t
408      complex*16 Yold(*)
409      complex*16 Ynew(*)
410
411#include "bafdecls.fh"
412#include "errquit.fh"
413
414*     **** geodesic common block ***
415      integer U(2)
416      integer Vt(2)
417      integer S(2)
418      common / geodesic_block / U,Vt,S
419
420*     **** local variables ****
421      logical    value
422      integer    npack1,nemax
423      real*8     zero,one
424      integer    tmp1(2),tmp2(2),tmp3(2)
425      integer    tmpC(2),tmpS(2)
426
427*     **** external functions ****
428      integer  psi_ispin,psi_ne
429      external psi_ispin,psi_ne
430      logical  Dneall_m_push_get,Dneall_m_pop_stack
431      external Dneall_m_push_get,Dneall_m_pop_stack
432
433
434      call nwpw_timing_start(10)
435      zero = 0.0d0
436      one  = 1.0d0
437
438      call Pack_npack(1,npack1)
439      nemax = psi_ne(1) + psi_ne(2)
440
441*     **** allocate tmp space ****
442      value =           Dneall_m_push_get(0,tmp1)
443      value = value.and.Dneall_m_push_get(0,tmp2)
444      value = value.and.Dneall_m_push_get(0,tmp3)
445      value = value.and.BA_push_get(mt_dbl,nemax,'tmpC',tmpC(2),tmpC(1))
446      value = value.and.BA_push_get(mt_dbl,nemax,'tmpS',tmpS(2),tmpS(1))
447      if (.not.value)
448     >   call errquit('geodesic_transport: out of stack',0,MA_ERR)
449
450
451      call Dneall_mm_SCtimesVtrans2(0,t,dbl_mb(S(1)),
452     >                                dbl_mb(Vt(1)),
453     >                                dbl_mb(tmp1(1)),
454     >                                dbl_mb(tmp3(1)),
455     >                                dbl_mb(tmpC(1)),
456     >                                dbl_mb(tmpS(1)))
457
458      call Dneall_mmm_Multiply2(0,dbl_mb(Vt(1)),
459     >                           dbl_mb(tmp1(1)),
460     >                           dbl_mb(tmp2(1)))
461
462
463      call Dneall_fmf_Multiply(0,Yold,npack1,
464     >                          dbl_mb(tmp2(1)),-1.0d0,
465     >                          Ynew,0.0d0)
466
467      call Dneall_fmf_Multiply(0,dcpl_mb(U(1)),npack1,
468     >                          dbl_mb(tmp3(1)),1.0d0,
469     >                          Ynew,1.0d0)
470
471*     **** deallocate tmp space ****
472      value =           BA_pop_stack(tmpS(2))
473      value = value.and.BA_pop_stack(tmpC(2))
474      value = value.and.Dneall_m_pop_stack(tmp3)
475      value = value.and.Dneall_m_pop_stack(tmp2)
476      value = value.and.Dneall_m_pop_stack(tmp1)
477      if (.not. value)
478     > call errquit('geodesic_transport:error popping stack',0,MA_ERR)
479
480      call nwpw_timing_end(10)
481
482      return
483      end
484
485*     ***********************************
486*     *                                 *
487*     *         SCtimesVtrans2           *
488*     *                                 *
489*     ***********************************
490
491      subroutine SCtimesVtrans2(t,n,S,Vt,A,B,scal1,scal2)
492      implicit none
493      real*8 t
494      integer n
495      real*8  S(n),Vt(n,n)
496      real*8  A(n,n),B(n,n)
497      real*8 scal1(n),scal2(n)
498
499      integer j,k
500
501      do j=1,n
502        scal1(j) = S(j)*dsin(S(j)*t)
503        scal2(j) = S(j)*dcos(S(j)*t)
504      end do
505
506      do k=1,n
507      do j=1,n
508          A(j,k) = scal1(j)*Vt(j,k)
509          B(j,k) = scal2(j)*Vt(j,k)
510      end do
511      end do
512
513      return
514      end
515
516
517*     ***********************************
518*     *					*
519*     *		geodesic_Gtransport	*
520*     *					*
521*     ***********************************
522*
523*     Uses - geodesic common block
524*
525
526      subroutine geodesic_Gtransport(t,Yold,tG)
527      implicit none
528      real*8     t
529      complex*16 Yold(*)
530      complex*16 tG(*)
531
532#include "bafdecls.fh"
533#include "errquit.fh"
534
535*     **** geodesic common block ***
536      integer U(2)
537      integer Vt(2)
538      integer S(2)
539      common / geodesic_block / U,Vt,S
540
541
542*     **** local variables ****
543      logical    value
544      integer    npack1,nemax
545      real*8     zero,one
546      integer    tmp1(2),tmp2(2),tmp3(2)
547      integer    tmpC(2),tmpS(2)
548
549*     **** external functions ****
550      integer  psi_ispin,psi_ne
551      external psi_ispin,psi_ne
552      logical  Dneall_m_push_get,Dneall_m_pop_stack
553      external Dneall_m_push_get,Dneall_m_pop_stack
554
555      call nwpw_timing_start(10)
556      zero = 0.0d0
557      one  = 1.0d0
558
559      call Pack_npack(1,npack1)
560      nemax = psi_ne(1) + psi_ne(2)
561
562*     **** allocate tmp space ****
563      value =           Dneall_m_push_get(0,tmp1)
564      value = value.and.Dneall_m_push_get(0,tmp2)
565      value = value.and.Dneall_m_push_get(0,tmp3)
566      value = value.and.BA_push_get(mt_dbl,nemax,'tmpC',tmpC(2),tmpC(1))
567      value = value.and.BA_push_get(mt_dbl,nemax,'tmpS',tmpS(2),tmpS(1))
568      if (.not. value)
569     >   call errquit('geodesic_Gtransport:out of stack',0,MA_ERR)
570
571
572      call Dneall_ffm_Multiply(0,dcpl_mb(U(1)),tG,npack1,
573     >                           dbl_mb(tmp2(1)))
574
575      call Dneall_mm_SCtimesVtrans3(0,t,dbl_mb(S(1)),
576     >                                dbl_mb(tmp2(1)),
577     >                                dbl_mb(tmp1(1)),
578     >                                dbl_mb(tmp3(1)),
579     >                                dbl_mb(tmpC(1)),
580     >                                dbl_mb(tmpS(1)))
581
582      call Dneall_mmm_Multiply2(0,dbl_mb(Vt(1)),
583     >                           dbl_mb(tmp1(1)),
584     >                           dbl_mb(tmp2(1)))
585
586      call Dneall_fmf_Multiply(0,Yold,npack1,
587     >                          dbl_mb(tmp2(1)),(-1.0d0),
588     >                          tG,1.0d0)
589
590      call Dneall_fmf_Multiply(0,dcpl_mb(U(1)),npack1,
591     >                          dbl_mb(tmp3(1)),(-1.0d0),
592     >                          tG,1.0d0)
593
594*     **** deallocate tmp space ****
595      value =           BA_pop_stack(tmpS(2))
596      value = value.and.BA_pop_stack(tmpC(2))
597      value = value.and.Dneall_m_pop_stack(tmp3)
598      value = value.and.Dneall_m_pop_stack(tmp2)
599      value = value.and.Dneall_m_pop_stack(tmp1)
600      if (.not. value)
601     > call errquit('geodesic_gtransport:error popping stack',0,MA_ERR)
602
603      call nwpw_timing_end(10)
604
605      return
606      end
607
608
609*     ***********************************
610*     *                                 *
611*     *         SCtimesVtrans3	        *
612*     *                                 *
613*     ***********************************
614
615      subroutine SCtimesVtrans3(t,n,S,Vt,A,B,scal1,scal2)
616      implicit none
617      real*8 t
618      integer n
619      real*8  S(n),Vt(n,n)
620      real*8  A(n,n),B(n,n)
621      real*8  scal1(n),scal2(n)
622
623      integer j,k
624
625      do j=1,n
626        scal1(j) = dsin(S(j)*t)
627        scal2(j) = 1.0d0-dcos(S(j)*t)
628      end do
629
630      do k=1,n
631      do j=1,n
632          A(j,k) = scal1(j)*Vt(j,k)
633          B(j,k) = scal2(j)*Vt(j,k)
634      end do
635      end do
636
637      return
638      end
639
640
641
642