1*
2* $Id$
3*
4
5*     **************************************
6*     *                                    *
7*     *             ke_init                *
8*     *                                    *
9*     **************************************
10      subroutine ke_init()
11      implicit none
12
13#include "bafdecls.fh"
14#include "errquit.fh"
15
16*     **** parameters - filter ****
17c      real*8 eps,ncut
18c      parameter (eps=1.0d-6,ncut=151.0d0)
19      real*8 ncut
20
21*     **** local variables ****
22      integer npack1,nfft3d,G(3)
23      integer i
24      real*8  gg,g1,ggcut,A,E0,sigma,bb,x
25      logical value
26
27      integer tmp1(2),tmp2(2)
28
29*     **** external functions ****
30      logical  control_smooth_cutoff
31      external control_smooth_cutoff
32      integer  G_indx
33      external G_indx
34      real*8   lattice_wcut,control_smooth_cutoff_values,util_erf
35      external lattice_wcut,control_smooth_cutoff_values,util_erf
36
37
38*     **** common block used for ke.F ****
39c     real*8 tg(nfft3d)
40      integer tg_indx,tg_hndl
41      common / tg_block / tg_indx,tg_hndl
42
43      logical filter
44      integer df(2)
45      common / tg2_block / df,filter
46
47
48      call D3dB_nfft3d(1,nfft3d)
49      call Pack_npack(1,npack1)
50      G(1)= G_indx(1)
51      G(2)= G_indx(2)
52      G(3)= G_indx(3)
53
54      value = BA_alloc_get(mt_dbl,npack1,
55     >                     'tg',tg_hndl,tg_indx)
56      if (.not. value)
57     > call errquit('ke_init:out of heap memory',0, MA_ERR)
58
59      value = BA_push_get(mt_dbl,nfft3d,'tmp1',tmp1(2),tmp1(1))
60      value = value.and.
61     >        BA_push_get(mt_dbl,nfft3d,'tmp2',tmp2(2),tmp2(1))
62      if (.not. value)
63     > call errquit('ke_init:out of stack memory',0, MA_ERR)
64
65
66      do i=1,nfft3d
67         gg  = ( dbl_mb(G(1)+i-1)*dbl_mb(G(1)+i-1)
68     >         + dbl_mb(G(2)+i-1)*dbl_mb(G(2)+i-1)
69     >         + dbl_mb(G(3)+i-1)*dbl_mb(G(3)+i-1))
70         dbl_mb(tmp1(1)+i-1) = -0.5d0*gg
71      end do
72      call Pack_t_pack(1,dbl_mb(tmp1(1)))
73      call Pack_t_Copy(1,dbl_mb(tmp1(1)),dbl_mb(tg_indx))
74
75
76*     **** Funny decay added to stabalize unitcell optimization and pressures ****
77      filter = control_smooth_cutoff()
78      if (filter) then
79         value = BA_alloc_get(mt_dbl,npack1,'df',df(2),df(1))
80         if (.not. value)
81     >      call errquit('ke_init:out of heap memory',3,MA_ERR)
82
83         E0    = lattice_wcut()
84         A     = control_smooth_cutoff_values(1)*E0
85         sigma = control_smooth_cutoff_values(2)
86         bb    = 2.0d0*A/(sigma*dsqrt(4.0d0*datan(1.0d0)))
87
88         do i=1,nfft3d
89            gg  = ( dbl_mb(G(1)+i-1)*dbl_mb(G(1)+i-1)
90     >            + dbl_mb(G(2)+i-1)*dbl_mb(G(2)+i-1)
91     >            + dbl_mb(G(3)+i-1)*dbl_mb(G(3)+i-1))
92
93            if (gg.gt.(E0-6.0d0*sigma)) then
94               x = (0.5d0*gg-E0)/sigma
95               dbl_mb(tmp1(1)+i-1) = -A*(1.0d0+util_erf(x))
96               dbl_mb(tmp2(1)+i-1) = 1.0d0 + bb*dexp(-x*x)
97            else
98               dbl_mb(tmp1(1)+i-1) = 0.0d0
99               dbl_mb(tmp2(1)+i-1) = 1.0d0
100            end if
101         end do
102         call Pack_t_pack(1,dbl_mb(tmp1(1)))
103         call Pack_t_pack(1,dbl_mb(tmp2(1)))
104         call Pack_t_Copy(1,dbl_mb(tmp2(1)),dbl_mb(df(1)))
105         call Pack_tt_Sum2(1,dbl_mb(tmp1(1)),dbl_mb(tg_indx))
106      end if
107
108      value =           BA_pop_stack(tmp2(2))
109      value = value.and.BA_pop_stack(tmp1(2))
110      if (.not. value)
111     > call errquit('ke_init:popping stack memory',0, MA_ERR)
112      return
113      end
114
115
116*     ****************************************
117*     *                                      *
118*     *               ke_end                 *
119*     *                                      *
120*     ****************************************
121      subroutine ke_end()
122      implicit none
123
124#include "bafdecls.fh"
125#include "errquit.fh"
126
127*     **** common block used for ke.F ****
128c     real*8 tg(nfft3d)
129      integer tg_indx,tg_hndl
130      common / tg_block / tg_indx,tg_hndl
131
132      logical filter
133      integer df(2)
134      common / tg2_block / df,filter
135
136      logical value
137
138      value = BA_free_heap(tg_hndl)
139      if (filter) value = value.and.BA_free_heap(df(2))
140      if (.not. value)
141     >   call errquit('ke_end:error freeing heap',0, MA_ERR)
142      return
143      end
144
145
146*     ****************************************
147*     *                                      *
148*     *               ke                     *
149*     *                                      *
150*     ****************************************
151      subroutine ke(ispin,ne,psi1,psi2)
152      implicit none
153      integer    ispin,ne(2)
154      complex*16 psi1(*)
155      complex*16 psi2(*)
156
157#include "bafdecls.fh"
158
159*     **** common block used for ke.F ****
160c     real*8 tg(nfft3d)
161      integer tg_indx,tg_hndl
162      common / tg_block / tg_indx,tg_hndl
163
164*     **** local variables ****
165      integer npack1
166      integer n
167
168      call Pack_npack(1,npack1)
169
170      do n=1,(ne(1)+ne(2))
171         call Pack_tc_Mul(1,dbl_mb(tg_indx),psi1(1+(n-1)*npack1),
172     >                                      psi2(1+(n-1)*npack1))
173      end do
174
175      return
176      end
177
178*     ****************************************
179*     *                                      *
180*     *               ke_add                 *
181*     *                                      *
182*     ****************************************
183      subroutine ke_add(ispin,ne,psi1,psi2)
184      implicit none
185      integer    ispin,ne(2)
186      complex*16 psi1(*)
187      complex*16 psi2(*)
188
189#include "bafdecls.fh"
190cccccccc#include "frac_occ.fh"
191
192*     **** common block used for ke.F ****
193c     real*8 tg(nfft3d)
194      integer tg_indx,tg_hndl
195      common / tg_block / tg_indx,tg_hndl
196
197*     **** local variables ****
198      integer npack1
199      integer n
200
201      call Pack_npack(1,npack1)
202
203      do n=1,(ne(1)+ne(2))
204         call Pack_tc_MulAdd(1,dbl_mb(tg_indx),psi1(1+(n-1)*npack1),
205     >                                         psi2(1+(n-1)*npack1))
206      end do
207
208      return
209      end
210
211
212*     ****************************************
213*     *                                      *
214*     *               ke_ave                 *
215*     *                                      *
216*     ****************************************
217      subroutine ke_ave(ispin,ne,psi1,ave,fractional,occ)
218      implicit none
219      integer ispin,ne(2)
220      complex*16 psi1(*)
221      real*8     ave
222      logical fractional
223      real*8  occ(*)
224
225#include "bafdecls.fh"
226#include "errquit.fh"
227cccccccc#include "frac_occ.fh"
228
229*     **** common block used for ke.F ****
230c     real*8 tg(nfft3d)
231      integer tg_indx,tg_hndl
232      common / tg_block / tg_indx,tg_hndl
233
234
235*     **** local variables ****
236      integer npack1,np
237      integer ms,n,n1(2),n2(2)
238      real*8  sum,ave1
239
240      common /eelectron_ejtmp/ sum,ave1
241
242
243c     complex*16 tmp1(nfft3d)
244      integer tmp1(2)
245      logical value
246
247      call Parallel_np(np)
248
249      call Pack_npack(1,npack1)
250      value = BA_push_get(mt_dcpl,npack1,'tmp1',tmp1(2),tmp1(1))
251      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
252
253
254      n1(1) = 1
255      n2(1) = ne(1)
256      n1(2) = ne(1) + 1
257      n2(2) = ne(1) + ne(2)
258
259!$OMP MASTER
260      ave1 = 0.0d0
261!$OMP END MASTER
262      do ms=1,ispin
263         do n=n1(ms),n2(ms)
264            if (fractional) then
265            call Pack_tc_aMul(1,occ(n),
266     >                          dbl_mb(tg_indx),
267     >                          psi1(1+(n-1)*npack1),
268     >                          dcpl_mb(tmp1(1)))
269            else
270            call Pack_tc_Mul(1,dbl_mb(tg_indx),
271     >                       psi1(1+(n-1)*npack1),
272     >                       dcpl_mb(tmp1(1)))
273            end if
274            call Pack_cc_idot(1,psi1(1+(n-1)*npack1),
275     >                       dcpl_mb(tmp1(1)),
276     >                       sum)
277
278!$OMP MASTER
279            ave1 = ave1 + sum
280!$OMP END MASTER
281         end do
282      end do
283!$OMP BARRIER
284      if (np.gt.1) call Parallel_SumAll(ave1)
285      ave = ave1
286      if (ispin.eq.1) ave = 2.0d0*ave
287      ave = -ave
288
289      value = BA_pop_stack(tmp1(2))
290      return
291      end
292
293
294
295*     ****************************************
296*     *                                      *
297*     *               ke_euv                 *
298*     *                                      *
299*     ****************************************
300      subroutine ke_euv(ispin,ne,psi,euv)
301*
302* $Id$
303*
304      implicit none
305      integer ispin,ne(2)
306      complex*16 psi(*)
307      real*8 euv(3,3)
308
309#include "bafdecls.fh"
310#include "errquit.fh"
311
312      logical filter
313      integer df(2)
314      common / tg2_block / df,filter
315
316*     **** local variables ****
317      integer npack1,nfft3d,G(2,3)
318      integer i,j,ms,n,n1(2),n2(2),np_i,np_j
319      integer u,v,s
320      logical value
321
322      real*8 pi,scal,sum,ave
323      real*8 hm(3,3),Aus(3,3)
324      integer tmp1(2),tmp2(2)
325
326*     **** external functions ****
327c     real*8 G(nfft3d,3)
328      integer  G_indx
329      external G_indx
330
331      real*8   lattice_unitg,lattice_omega,lattice_unita
332      external lattice_unitg,lattice_omega,lattice_unita
333
334
335      call Parallel2d_np_i(np_i)
336      call Parallel2d_np_j(np_j)
337
338      n1(1) = 1
339      n2(1) = ne(1)
340      n1(2) = ne(1) + 1
341      n2(2) = ne(1) + ne(2)
342
343      pi   = 4.0d0*datan(1.0d0)
344      scal = 1.0d0/(2.0d0*pi)
345
346*     *** define hm ****
347      do j=1,3
348      do i=1,3
349         hm(i,j) = scal*lattice_unitg(i,j)
350      end do
351      end do
352
353
354
355      call D3dB_nfft3d(1,nfft3d)
356      call Pack_npack(1,npack1)
357
358      value = BA_push_get(mt_dbl,nfft3d,
359     >                     'G1',G(2,1),G(1,1))
360      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
361      value = BA_push_get(mt_dbl,nfft3d,
362     >                     'G2',G(2,2),G(1,2))
363      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
364      value = BA_push_get(mt_dbl,nfft3d,
365     >                     'G3',G(2,3),G(1,3))
366      if (.not. value) call errquit('out of stack  memory',0, MA_ERR)
367
368
369      value = BA_push_get(mt_dbl,npack1,'tmp1',tmp1(2),tmp1(1))
370      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
371
372      value = BA_push_get(mt_dcpl,npack1,'tmp2',tmp2(2),tmp2(1))
373      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
374
375
376      call dcopy(nfft3d,dbl_mb(G_indx(1)),1,dbl_mb(G(1,1)),1)
377      call dcopy(nfft3d,dbl_mb(G_indx(2)),1,dbl_mb(G(1,2)),1)
378      call dcopy(nfft3d,dbl_mb(G_indx(3)),1,dbl_mb(G(1,3)),1)
379      call Pack_t_pack(1,dbl_mb(G(1,1)))
380      call Pack_t_pack(1,dbl_mb(G(1,2)))
381      call Pack_t_pack(1,dbl_mb(G(1,3)))
382
383*     **** calculate Aus = Sum(n)Sum(G) psi(G,n)**2 G(u)G(s) ****
384      call dcopy(9,0.0d0,0,Aus,1)
385      do u=1,3
386      do s=u,3
387        call Pack_tt_Mul(1,dbl_mb(G(1,u)),
388     >                     dbl_mb(G(1,s)),
389     >                     dbl_mb(tmp1(1)))
390
391        if (filter) call Pack_tt_Mul2(1,dbl_mb(df(1)),dbl_mb(tmp1(1)))
392
393        ave = 0.0d0
394        do ms=1,ispin
395        do n=n1(ms),n2(ms)
396            call Pack_tc_Mul(1,dbl_mb(tmp1(1)),
397     >                       psi(1+(n-1)*npack1),
398     >                       dcpl_mb(tmp2(1)))
399            call Pack_cc_idot(1,psi(1+(n-1)*npack1),
400     >                        dcpl_mb(tmp2(1)),
401     >                       sum)
402             ave = ave + sum
403             !Aus(u,s) = Aus(u,s) + sum
404        end do
405        end do
406        if (np_i.gt.1) call D3dB_SumAll(ave)
407        if (np_j.gt.1) call D1dB_SumAll(ave)
408        Aus(u,s) = Aus(u,s) + ave
409
410      end do
411      end do
412      do u=1,3
413      do s=u+1,3
414         Aus(s,u) = Aus(u,s)
415      end do
416      end do
417      if (ispin.eq.1) call dscal(9,2.0d0,Aus,1)
418
419*     *** calculate euv = -Sum(s) hm(s,v)*Aus(u,s)
420      call dcopy(9,0.0d0,0,euv,1)
421      do v=1,3
422      do u=1,3
423         do s=1,3
424            euv(u,v) = euv(u,v) - Aus(u,s)*hm(s,v)
425         end do
426      end do
427      end do
428
429      value = BA_pop_stack(tmp2(2))
430      value = value.and.BA_pop_stack(tmp1(2))
431      value = value.and.BA_pop_stack(G(2,3))
432      value = value.and.BA_pop_stack(G(2,2))
433      value = value.and.BA_pop_stack(G(2,1))
434      if (.not. value) call errquit('error poping stack memory',0,
435     &       MA_ERR)
436      return
437      end
438
439
440*     ****************************************
441*     *                                      *
442*     *               ke_Precondition        *
443*     *                                      *
444*     ****************************************
445      subroutine ke_Precondition(npack,neall,psi,gradk)
446      implicit none
447      integer npack,neall
448      complex*16   psi(npack,neall)
449      complex*16 gradk(npack,neall)
450
451#include "bafdecls.fh"
452#include "errquit.fh"
453
454*     **** common block used for ke.F ****
455c     real*8 tg(nfft3d)
456      integer tg_indx,tg_hndl
457      common / tg_block / tg_indx,tg_hndl
458
459      real*8  sum,elocal
460      common /eelectron_ejtmp/ sum,elocal
461
462*     **** local variables ****
463      logical value
464      integer k,n
465      real*8  x,cm,dm,Ep,cm2(1)
466      integer tmp1(2)
467
468      real*8   lattice_wggcut,control_Ep
469      external lattice_wggcut,control_Ep
470
471      integer ispin,ne(2)
472c      integer  psi_ispin,psi_ne
473c      external psi_ispin,psi_ne
474
475      value = BA_push_get(mt_dcpl,npack,'tmp1',tmp1(2),tmp1(1))
476      if (.not. value) call errquit('out of stack memory',0)
477
478*     **** My preconditioner ****
479      do n=1,neall
480         call Pack_tc_Mul(1,dbl_mb(tg_indx),psi(1,n),dcpl_mb(tmp1(1)))
481         call Pack_cc_dot(1,psi(1,n),dcpl_mb(tmp1(1)),sum)
482CDIR$ NOVECTOR
483!$OMP DO
484         do k=1,npack
485           x = dbl_mb(tg_indx+k-1)
486           x = x*dconjg(psi(k,n))*psi(k,n)
487           x = x/sum
488
489           cm = 27.0d0+(18.0d0+(12.0d0+8.0d0*x)*x)*x
490           dm = (cm + 16.0d0* x**4)
491           cm = cm/dm
492
493           gradk(k,n) = gradk(k,n)*(cm)
494         end do
495!$OMP END DO
496      end do
497
498
499c*     **** Preconditioner of Tassone, Mauri, and Car ****
500c      ispin = psi_ispin()
501c      ne(1) = psi_ne(1)
502c      ne(2) = psi_ne(2)
503c      call ke_ave(ispin,ne,gradk,Ep)
504c      write(*,*) "E(R):",Ep
505c      Ep = control_Ep()-Ep
506c      cm = 1.0d0/(Ep)
507c      do k=1,npack
508c         x = -dbl_mb(tg_indx+k-1)
509c        dm = (x*cm)
510c         if (x.gt.Ep) then
511c           do n=1,neall
512c              gradk(k,n) = gradk(k,n)/dm
513c           end do
514c         end if
515c      end do
516
517
518c*     **** My preconditioner ****
519c      ispin = 2
520c      ne(1) = 1
521c      ne(2) = 0
522c      do n=1,neall
523cc        call ke_ave(ispin,ne,gradk(1,n),Ep)
524cc        write(*,*) "n,E(R)=",n,Ep,control_Ep()-50*Ep
525cc        Ep =  control_Ep() - 15*Ep
526c        Ep =  control_Ep()
527c        cm = 1.0d0/Ep
528cCDIR$ NOVECTOR
529c!$OMP DO
530c        do k=1,npack
531c          x = -dbl_mb(tg_indx+k-1)
532c          dm = x*cm
533c          if (x.gt.Ep) then
534c          gradk(k,n) = gradk(k,n)/dm
535c          end if
536c        end do
537c!$OMP END DO
538c      end do
539c
540c*     **** preconditioner #5 ****
541c      ispin = 2
542c      ne(1) = 1
543c      ne(2) = 0
544c      do n=1,neall
545c        call ke_ave(ispin,ne,gradk(1,n),Ep,.false.,cm2)
546c        Ep = 1.5d0*Ep
547c        do k=1,npack
548c           x = -2.0d0*dbl_mb(tg_indx+k-1)/Ep
549c           cm = 27.0d0+(18.0d0+(12.0d0+8.0d0*x)*x)*x
550c           dm = (cm + 16.0d0* x**4)
551c           cm = (cm/dm)*(2.0d0/Ep)
552c           gradk(k,n) = gradk(k,n)*cm
553c        end do
554c      end do
555
556      value = BA_pop_stack(tmp1(2))
557      if (.not. value) call errquit('error popping stack memory',0)
558
559      return
560      end
561
562
563
564*     ****************************************
565*     *                                      *
566*     *               ke_Precondition2       *
567*     *                                      *
568*     ****************************************
569      subroutine ke_Precondition2(npack,neall,residual,Kresidual)
570      implicit none
571      integer npack,neall
572      complex*16 residual(npack,neall)
573      complex*16 Kresidual(npack,neall)
574
575#include "bafdecls.fh"
576#include "errquit.fh"
577
578*     **** common block used for ke.F ****
579c     real*8 tg(nfft3d)
580      integer tg_indx,tg_hndl
581      common / tg_block / tg_indx,tg_hndl
582
583*     **** local variables ****
584      logical value
585      integer k,n
586      real*8  sum
587      real*8  x,cm,dm,Ep,cm2(1)
588      integer ispin,ne(2)
589
590
591*     **** preconditioner #5 ****
592      ispin = 2
593      ne(1) = 1
594      ne(2) = 0
595      do n=1,neall
596        call ke_ave(ispin,ne,residual(1,n),Ep,.false.,cm2)
597        Ep = 1.5d0*Ep
598        do k=1,npack
599           x = -2.0d0*dbl_mb(tg_indx+k-1)/Ep
600           cm = 27.0d0+(18.0d0+(12.0d0+8.0d0*x)*x)*x
601           dm = (cm + 16.0d0* x**4)
602           cm = (cm/dm)*(2.0d0/Ep)
603           Kresidual(k,n) = residual(k,n)*cm
604        end do
605      end do
606
607      return
608      end
609
610
611