1*
2* $Id$
3*
4
5* $Log: not supported by cvs2svn $
6* Revision 1.44  2008/09/15 20:52:32  bylaska
7* ..paw fixes...EJB
8*
9* Revision 1.43  2007/09/24 16:58:12  bylaska
10* ...preliminary PAW modifications...
11*   - basis file format changed
12*   - .vpp formatting routines added to pspw
13*
14* - zdotc routines currently modified to tzdotc.
15* ...EJB
16*
17* Revision 1.42  2006/02/11 02:50:46  bylaska
18* GGA's using 1st derivative formulas have been added in core part of PAW....EJB
19*
20* Revision 1.41  2005/02/09 02:38:57  bylaska
21* ..............EJB
22*
23* Revision 1.40  2004/11/08 23:37:41  bylaska
24* Bug fix in pspw_hfx found by M. Hackler.
25* PBE0 has been implemented.
26*
27*  ........EJB
28*
29* Revision 1.39  2004/08/12 18:39:41  bylaska
30* A prototype of a Grassmann CG paw minimizer (i.e. nwpw:minimizer 1) has been added.
31* The code is similar to the CG minimizer in pspw, but differences exist
32* because the residual |R> = (1 - S|psi><psi|)|Hpsi> is not the same as the
33* tangent vector |T> = (1 - |psi><psi|S)|R>.
34*
35* Forces still need to be implemented.
36*
37* ...EJB
38*
39* Revision 1.38  2003/10/24 18:45:24  bylaska
40*
41* Aperiodic convolution capability has been added to PAW .... EJB
42*
43* Revision 1.37  2003/10/21 02:05:15  marat
44* switched to new errquit by running global replace operation
45* see the script below (note it will not work on multiline errquit calls)
46* *********************************************************
47* #!/bin/sh
48*
49* e=`find . -name "*F" -print`
50*
51* for f in $e
52* do
53* cp $f $f.bak
54* sed  's|\(^[ ].*call[ ]*errquit([^,]*\)\(,[^,]*\)\()\)|\1,0\2\3|' $f.bak > $f
55* #rm $f.bak
56* done
57* **********************************************************
58*
59* Revision 1.36  2003/03/25 19:43:31  bylaska
60* bug fix...EJB
61*
62* Revision 1.35  2003/03/22 02:30:01  bylaska
63* paw cpsd program finished....
64* The nwpw directory structure is ready to be checked into 4.5 release tree.
65*
66* ....EJB
67*
68* Revision 1.34  2003/03/21 23:41:13  bylaska
69*
70* paw updates ...EJB
71*
72* Revision 1.33  2003/03/15 02:14:44  bylaska
73* orthonormalization checking fixed to work with forces...EJB
74*
75* Revision 1.32  2003/03/15 01:47:43  bylaska
76* steepest descent loop has been modified for the inclusion of forces....
77* Lagrange Multipliers require a recalculation of the phase factors
78* after call to paw_overlap_S and before paw_psi_lagrange.....EJB
79*
80* Revision 1.31  2003/03/14 01:20:59  marat
81* moved call to paw_force_solve after nonlocal
82* matrices have been calculated
83* MV
84*
85* Revision 1.30  2003/03/11 17:57:10  bylaska
86* updates...EJB
87*
88* Revision 1.29  2003/03/07 20:51:10  bylaska
89* Code cleanup...0.0 changed to 0.0d0 in paw_xc.F
90* Tangent vector now used for SD with Gram-schmidt.
91* ....EJB
92*
93* Revision 1.28  2003/03/06 01:46:39  bylaska
94* bug fix in paw_energy_core_atom...and ma_chop_stack changed to ma_pop_stack
95* ...EJB
96*
97* Revision 1.27  2003/03/05 23:16:31  bylaska
98* Commented out write statements and other minor fixes.....
99* self-consistent loop looks like it is working.....
100* ....EJB
101*
102* Revision 1.26  2003/03/04 00:04:03  marat
103* added printouts for atomic potenitials
104* for debug purposes
105* MV
106*
107* Revision 1.25  2003/02/24 22:38:51  bylaska
108* Fixed bugs in ehartr_pw calculation....EJB
109*
110* Revision 1.24  2003/02/24 21:58:59  marat
111* ...
112* MV
113*
114* Revision 1.23  2003/02/24 21:05:00  bylaska
115*  $Log: added to cvs output....EJB
116*
117
118
119
120      subroutine paw_inner_loop(ispin,ne,
121     >                      npack1,nfft3d,nemax,
122     >                      psi1,psi2,dn,
123     >                      dn_cmp_smooth,
124     >                      it_in,E,deltae,deltac,deltar,
125     >                      hml,lmd,lmd1,first_iteration,
126     >                      psi_r,Hpsi)
127      implicit none
128      integer    ispin,ne(2)
129      integer    npack1,nfft3d,nemax
130      complex*16 psi1(npack1,nemax)
131      complex*16 psi2(npack1,nemax)
132      real*8     dn(2*nfft3d,2)
133      real*8     dn_cmp_smooth(2*nfft3d)
134      integer    it_in
135      real*8     E(*)
136      real*8     deltae,deltac,deltar
137      real*8     hml(2*nemax*nemax)
138      real*8     lmd(2*nemax*nemax),lmd1(2*nemax*nemax)
139      logical    first_iteration
140
141*     **** very big workspace variables ****
142      real*8     psi_r(2*nfft3d,nemax)
143      complex*16 Hpsi(npack1,nemax)
144
145
146#include "bafdecls.fh"
147#include "paw_energy_kin_atom.fh"
148#include "paw_energy_vloc_atom.fh"
149#include "paw_energy_ion_atom.fh"
150#include "paw_energy_core_atom.fh"
151#include "paw_energy_hartree_atom.fh"
152#include "paw_xc.fh"
153#include "nwpwxc.fh"
154#include "util.fh"
155
156
157*     **** local variables ****
158      logical move
159      logical use_lda
160      integer n2ft3d,np
161      integer i,j,ii,jj,n,n1(2),n2(2),it,ms,nn,ierr
162      integer nx,ny,nz
163      integer index,indext
164      double precision evloc_pw,evloc_atom,occ(1)
165      real*8  sum,Eold,eorbit,ehartr_pw,eke,enlocal
166      real*8  exc,exc2,pxc,pxc2,dte,scal1,scal2,dv,dt
167      real*8  deltamm,vzero
168      double precision ekin_atom
169      double precision eion_atom
170      double precision ecore_atom
171      double precision ecore_ion_atom
172      double precision ecore_self_atom
173      double precision ehartree_atom
174      double precision exc_atom
175
176
177*     **** MA local variables ****
178      logical value,gram_schmidt
179      integer tmp_L(2)
180      integer tmp1(2),tmp2(2)
181      integer vl(2),vh(2),vc(2),vcomp(2),dng(2)
182      integer rho(2)
183      integer xcp(2),xce(2),dnall(2)
184      integer natmx,fion(2),ftest(2)
185      integer sumi(2)
186      integer npack0,gga
187
188*     ***** external functions ****
189      logical  control_move,control_gram_schmidt
190      integer  ion_nion,control_gga
191      real*8   control_time_step,control_fake_mass,ion_dti
192      real*8   lattice_omega,coulomb_e,ewald_e
193      external control_move,control_gram_schmidt
194      external ion_nion,control_gga
195      external control_time_step,control_fake_mass,ion_dti
196      external lattice_omega,coulomb_e,ewald_e
197      integer  control_version
198      external control_version
199      real*8   ion_ion_e
200      external ion_ion_e
201      real*8   paw_mult_energy_atom_comp !**no header file for paw_mult**
202      real*8   paw_mult_energy_atom_self
203      real*8   paw_mult_energy_atom_mult
204      external paw_mult_energy_atom_comp
205      external paw_mult_energy_atom_self
206      external paw_mult_energy_atom_mult
207
208
209      call Parallel_np(np)
210      call Pack_npack(0,npack0)
211      n2ft3d = 2*nfft3d
212      deltamm = 0.0d0
213      gga = control_gga()
214
215
216      call nwpw_timing_start(12)
217*     **** allocate MA local variables ****
218      value = BA_push_get(mt_dbl,(8*nemax*nemax),
219     >                     'tmp_L',tmp_L(2),tmp_L(1))
220      value = value.and.
221     >        BA_push_get(mt_dcpl,(nfft3d),'tmp1',tmp1(2),tmp1(1))
222      value = value.and.
223     >        BA_push_get(mt_dcpl,(nfft3d),'tmp2',tmp2(2),tmp2(1))
224
225      if (control_version().eq.3) then
226       value = value.and.
227     >        BA_push_get(mt_dcpl,(npack0),'vcomp',vcomp(2),vcomp(1))
228       value = value.and.
229     >        BA_push_get(mt_dcpl,(npack0),'vh',vh(2),vh(1))
230       value = value.and.
231     >        BA_push_get(mt_dcpl,(npack0),'vc',vc(2),vc(1))
232      end if
233
234      if (control_version().eq.4) then
235       value = value.and.
236     >        BA_push_get(mt_dcpl,(nfft3d),'vcomp',vcomp(2),vcomp(1))
237       value = value.and.
238     >        BA_push_get(mt_dcpl,(nfft3d),'vh',vh(2),vh(1))
239       value = value.and.
240     >        BA_push_get(mt_dcpl,(npack0),'vc',vc(2),vc(1))
241      end if
242
243      value = value.and.
244     >        BA_push_get(mt_dcpl,(npack0),'vloc', vl(2), vl(1))
245      value = value.and.
246     >        BA_push_get(mt_dbl,(n2ft3d),'rho',rho(2),rho(1))
247      value = value.and.
248     >        BA_push_get(mt_dcpl,(npack0),'dng',dng(2), dng(1))
249      value = value.and.
250     >        BA_push_get(mt_dbl,(4*nfft3d),'xcp',xcp(2), xcp(1))
251      value = value.and.
252     >        BA_push_get(mt_dbl,(4*nfft3d),'xce',xce(2), xce(1))
253      value = value.and.
254     >        BA_push_get(mt_dbl,(4*nfft3d),'dnall',dnall(2),dnall(1))
255      natmx = ion_nion()
256      value = value.and.
257     >        BA_push_get(mt_dbl,(3*natmx),'fion',fion(2),fion(1))
258      value = value.and.
259     >        BA_push_get(mt_dbl,(3*natmx),'ftest',ftest(2),ftest(1))
260      value = value.and.
261     >        BA_push_get(mt_dbl,(nemax),'sumi',sumi(2),sumi(1))
262
263      if (.not. value) call errquit('out of stack memory',0,0)
264
265      call nwpw_timing_end(12)
266
267      call D3dB_nx(1,nx)
268      call D3dB_ny(1,ny)
269      call D3dB_nz(1,nz)
270      move         = control_move()
271      gram_schmidt = control_gram_schmidt()
272
273      n1(1) = 1
274      n2(1) = ne(1)
275      n1(2) = ne(1) + 1
276      n2(2) = ne(1) + ne(2)
277
278      dt = control_time_step()
279      dte = dt/dsqrt(control_fake_mass())
280      scal1 = 1.0d0/dble(nx*ny*nz)
281      scal2 = 1.0d0/lattice_omega()
282      dv    = scal1*lattice_omega()
283
284
285*     ******************************************
286*     ****                                  ****
287*     ****   Start of steepest descent loop ****
288*     ****                                  ****
289*     ******************************************
290      do it=1,it_in
291
292*       **** shift wavefunction and atoms ****
293        call dcopy(2*npack1*nemax,psi2,1,psi1,1)
294        if (move) call ion_shift()
295        !if (move) call phafac()
296        if (move) call paw_set_mult_energy_coeff()
297
298        call nwpw_timing_start(11)
299*       *******************
300*       **** get psi_r ****
301*       *******************
302        do n=n1(1),n2(ispin)
303           call Pack_c_Copy(1,psi1(1,n),psi_r(1,n))
304           call Pack_c_unpack(1,psi_r(1,n))
305           call D3dB_cr_fft3b(1,psi_r(1,n))
306           call D3dB_r_Zero_Ends(1,psi_r(1,n))
307        end do
308
309*       *******************
310*       **** set overlaps *
311*       *******************
312        call paw_ovlp_coeff_set(psi1)
313        call paw_ovlp_weights_set()
314        !call paw_ovlp_weights_write(89)
315
316*       *************************************
317*       ****generate comp charge potential***
318*       *************************************
319        call paw_comp_charge_update()
320        call paw_pot_comp_solve()
321        !call paw_pot_comp_print()
322
323
324*       *********************
325*       **** generate dn ****
326*       *********************
327        call dcopy(ispin*n2ft3d,0.0d0,0,dn,1)
328        do ms=1,ispin
329           do n=n1(ms),n2(ms)
330              do i=1,n2ft3d
331                 dn(i,ms) = dn(i,ms) + scal2*(psi_r(i,n)**2)
332              end do
333           end do
334           call D3dB_r_Zero_Ends(1,dn(1,ms))
335        end do
336
337
338*       **********************
339*       **** generate dng ****
340*       **********************
341        call D3dB_rr_Sum(1,dn(1,1),dn(1,ispin),dbl_mb(rho(1)))
342        call D3dB_r_SMul(1,scal1,dbl_mb(rho(1)),dcpl_mb(tmp1(1)))
343        call D3dB_rc_fft3f(1,dcpl_mb(tmp1(1)))
344        call Pack_c_pack(0,dcpl_mb(tmp1(1)))
345        call Pack_c_Copy(0,dcpl_mb(tmp1(1)),dcpl_mb(dng(1)))
346
347
348
349*       *****************************************
350*       **** generate local pseudopotential  ****
351*       **** and also get force if move true ****
352*       *****************************************
353       call paw_vloc(dcpl_mb(vl(1)),
354     >               move,
355     >               dcpl_mb(dng(1)),
356     >               dbl_mb(fion(1)))
357       call Pack_cc_dot(0,dcpl_mb(dng(1)),dcpl_mb(vl(1)),evloc_pw)
358
359
360*      ************************************
361*      **** generate coulomb potential ****
362*      ************************************
363
364*      *** atomic portion ***
365       call paw_pot_hartree_solve()
366
367        call paw_mult_dn_cmp_get(dcpl_mb(tmp1(1)),
368     >                           dn_cmp_smooth)
369        if (control_version().eq.3)  then
370           call Pack_cc_Sub(0,
371     >                      dcpl_mb(tmp1(1)),
372     >                      dn_cmp_smooth,
373     >                      dcpl_mb(tmp2(1))) !** tmp2 = dn_cmp - dn_cmp_smooth
374           call Pack_cc_Sum(0,
375     >                      dn_cmp_smooth,
376     >                      dcpl_mb(dng(1)),
377     >                      dcpl_mb(tmp1(1))) !** tmp1 = dng+dn_cmp_smooth **
378
379           !**** vh *****
380           call coulomb_v(dcpl_mb(tmp1(1)),
381     >                    dcpl_mb(vh(1)))
382
383           !**** vcmp *****
384           call coulomb_v(dcpl_mb(tmp2(1)),
385     >                    dcpl_mb(vcomp(1)))
386           call paw_mult_vzero(vzero)
387           call Pack_c_setzero(0,vzero,dcpl_mb(vcomp(1)))
388
389           call paw_mult_coeff_set(dcpl_mb(vh(1)),dcpl_mb(vcomp(1)))
390
391           call Pack_cc_Sum(0,
392     >                      dcpl_mb(vh(1)),
393     >                      dcpl_mb(vcomp(1)),
394     >                      dcpl_mb(vc(1)))
395
396        end if
397
398        if (control_version().eq.4) then
399
400           call Pack_cc_Sub(0,
401     >                      dcpl_mb(tmp1(1)),
402     >                      dn_cmp_smooth,
403     >                      dcpl_mb(tmp2(1))) !** tmp2 = dn_cmp - dn_cmp_smooth
404           call Pack_cc_Sum(0,
405     >                      dn_cmp_smooth,
406     >                      dcpl_mb(dng(1)),
407     >                      dcpl_mb(tmp1(1))) !** tmp1 = dng+dn_cmp_smooth **
408
409           call Pack_c_unpack(0,dcpl_mb(tmp1(1)))
410           call D3dB_cr_fft3b(1,dcpl_mb(tmp1(1)))
411           call D3dB_r_Zero_Ends(1,dcpl_mb(tmp1(1)))
412
413           call coulomb2_v(dcpl_mb(tmp1(1)),dcpl_mb(vh(1)))
414
415           call D3dB_rc_fft3f(1,dcpl_mb(vh(1)))
416c           call D3dB_r_SMul(1,scal1,dcpl_mb(vh(1)),dcpl_mb(vh(1)))
417           call D3dB_r_SMul1(1,scal1,dcpl_mb(vh(1)))
418           call Pack_c_pack(0,dcpl_mb(vh(1)))
419
420
421           call Pack_c_unpack(0,dcpl_mb(tmp2(1)))
422           call D3dB_cr_fft3b(1,dcpl_mb(tmp2(1)))
423           call D3dB_r_Zero_Ends(1,dcpl_mb(tmp2(1)))
424
425           call coulomb2_v(dcpl_mb(tmp2(1)),dcpl_mb(vcomp(1)))
426
427           call D3dB_rc_fft3f(1,dcpl_mb(vcomp(1)))
428c           call D3dB_r_SMul(1,scal1,dcpl_mb(vcomp(1)),dcpl_mb(vcomp(1)))
429           call D3dB_r_SMul1(1,scal1,dcpl_mb(vcomp(1)))
430           call Pack_c_pack(0,dcpl_mb(vcomp(1)))
431
432           call paw_mult_vzero(vzero)
433           call Pack_c_setzero(0,vzero,dcpl_mb(vcomp(1)))
434
435           call paw_mult_coeff_set(dcpl_mb(vh(1)),dcpl_mb(vcomp(1)))
436
437           call Pack_cc_Sum(0,
438     >                      dcpl_mb(vh(1)),
439     >                      dcpl_mb(vcomp(1)),
440     >                      dcpl_mb(vc(1)))
441
442        end if
443
444
445*       *************************************************
446*       **** generate exchange-correlation potential ****
447*       *************************************************
448
449*       *** local portion ***
450c        call paw_density_solve()
451        call paw_xc_solve()
452        !call paw_xc_print()
453
454*       *** plane wave ***
455
456        use_lda = ((.not.nwpwxc_is_on().and.gga.eq.0).or.
457     &             (nwpwxc_is_on().and.nwpwxc_is_lda()))
458
459        if (use_lda) then
460          call vxc(n2ft3d,ispin,dn,
461     >                      dbl_mb(xcp(1)),
462     >                      dbl_mb(xce(1)),
463     >                      dcpl_mb(tmp1(1)))
464        else
465          call v_bwexc(gga,n2ft3d,ispin,dn,
466     >                      1.0d0,1.0d0,
467     >                      dbl_mb(xcp(1)),
468     >                      dbl_mb(xce(1)))
469        end if
470
471
472*       ******************
473*       **** get Hpsi ****
474*       ******************
475        call nwpw_timing_start(13)
476        call paw_psi_H(ispin,ne,psi1,psi_r,
477     >             dcpl_mb(vl(1)),
478     >             dcpl_mb(vc(1)),dbl_mb(xcp(1)),Hpsi,
479     >             move,dbl_mb(fion(1)))
480
481
482        !call paw_Gop_print()
483
484*       ************************************
485*       **** do a steepest descent step ****
486*       ************************************
487*
488*       **** if gram-schmidt make Hpsi a tangent vecotor ****
489        if (gram_schmidt) then
490
491          call paw_ovlp_S(n2(ispin),psi1,psi2)   ! psi2 = S*psi1
492          do ms=1,ispin
493            call Grsm_ggm_sym_dot(npack1,ne(ms),
494     >                            psi1(1,n1(ms)),
495     >                            Hpsi(1,n1(ms)),
496     >                            dbl_mb(tmp_L(1)))
497            call dscal(ne(ms)*ne(ms),(-1.0d0),dbl_mb(tmp_L(1)),1)
498            call Grsm_gmg_Mul(npack1,ne(ms),
499     >                          psi2(1,n1(ms)),
500     >                          dbl_mb(tmp_L(1)),
501     >                          psi_r)            ! psi_r =  -S*psi1*<psi1|Hpsi2>
502            call Grsm_ggg_Sum(npack1,ne(ms),
503     >                        Hpsi(1,n1(ms)),
504     >                        psi_r,
505     >                        psi2(1,n1(ms)))    ! psi2 = Hpsi - S*psi1*<psi1|Hpsi1>
506
507c            call Grsm_gg_dscale(npack1,ne(ms),dte,
508c     >                         psi2(1,n1(ms)),
509c     >                         psi2(1,n1(ms)))
510            call Grsm_gg_dScale1(npack1,ne(ms),dte,psi2(1,n1(ms)))
511
512c            call Grsm_ggg_Sum(npack1,ne(ms),
513c     >                        psi2(1,n1(ms)),
514c     >                        psi1(1,n1(ms)),
515c     >                        psi2(1,n1(ms)))
516            call Grsm_ggg_Sum2(npack1,ne(ms),
517     >                        psi1(1,n1(ms)),
518     >                        psi2(1,n1(ms)))
519          end do !*ms*
520
521*       **** else don't change Hpsi ***
522        else
523
524          do n=1,n2(ispin)
525            call Pack_c_SMul(1,dte,Hpsi(1,n),psi2(1,n))
526c            call Pack_cc_Sum(1,psi2(1,n),psi1(1,n),psi2(1,n))
527            call Pack_cc_Sum2(1,psi1(1,n),psi2(1,n))
528          end do
529        end if
530
531        call nwpw_timing_end(13)
532
533*       *******************************************
534*       **** get ion forces and do steepest    ****
535*       **** descent on ions                   ****
536*       *******************************************
537
538*       *********************
539*       **** generate force *
540*       *********************
541         if (move) then
542
543           call paw_mult_pw_force(dcpl_mb(vh(1)),
544     >                            dcpl_mb(vcomp(1)),
545     >                            dbl_mb(fion(1)))
546
547           call paw_force_solve(psi1,dbl_mb(fion(1)))
548
549
550*           *** compute hamiltonian matrix if first iteration ****
551           if (first_iteration) then
552             n = ne(1)
553             nn = n*n
554             do ms=1,ispin
555                do ii=n1(ms),n2(ms)
556                  i = ii-n1(ms)
557                  index = (i+1) + i*n + (ms-1)*nn
558                  call Pack_cc_idot(1,psi1(1,ii),Hpsi(1,ii),sum)
559
560                  hml(index) =  -sum
561                  do jj=ii+1,n2(ms)
562                     j = jj-n1(ms)
563                     index  = (i+1) + j*n + (ms-1)*nn
564                     indext = (j+1) + i*n + (ms-1)*nn
565                     call Pack_cc_idot(1,psi1(1,ii),Hpsi(1,jj),sum)
566
567                     hml(index)  =  -sum
568                     hml(indext) =  -sum
569                  end do
570                end do
571             end do
572             if (np.gt.1)  call D3dB_Vector_SumAll((ispin*nn),hml)
573             call dcopy(2*nemax*nemax,hml,1,lmd1,1)
574             call dcopy(2*nemax*nemax,hml,1,lmd,1)
575             first_iteration = .false.
576           end if
577
578           call dcopy(2*nemax*nemax,lmd,1,dbl_mb(tmp_L(1)),1)
579           call dscal(2*nemax*nemax,2.0d0,dbl_mb(tmp_L(1)),1)
580           call daxpy(2*nemax*nemax,-1.0d0,lmd1,1,dbl_mb(tmp_L(1)),1)
581           call paw_force_constraint(dbl_mb(tmp_L(1)),dbl_mb(fion(1)))
582
583
584
585
586*          **** remove ion forces using ion_FixIon ****
587           call ion_FixIon(dbl_mb(fion(1)))
588
589           call ion_optimize_step(dbl_mb(fion(1)))
590        end if
591
592
593*       *****************************************
594*       **** lagrange multiplier corrections ****
595*       *****************************************
596        if (gram_schmidt) then
597          if (move) call phafac2()
598          do ms=1,ispin
599            call paw_psi_MakeOrtho(npack1,ne(ms),psi2(1,n1(ms)))
600          end do
601        else
602
603          call paw_ovlp_S(n2(ispin),psi1,psi_r)
604
605          if (move) call phafac2()
606          call dcopy(2*nemax*nemax,lmd,1,lmd1,1)
607          call paw_psi_lmbda(ispin,ne,nemax,npack1,psi_r,psi2,dte,
608     >                 lmd,dbl_mb(tmp_L(1)),ierr)
609        end if
610
611!*        ***** debug ***
612!         do ms=1,ispin
613!           write(23,*)
614!           write(23,*)
615!           write(23,*) "DEBUG: iteration=",it
616!           write(23,*) "DEBUG: overlap matrix"
617!           call paw_overlap_matrix_gen(ne(ms),ne(ms),
618!     >                                 psi2(1,n1(ms)),
619!     >                                 psi2(1,n1(ms)),
620!     >                                 dbl_mb(tmp_L(1)))
621!           write(23,*) "Overlap matrix, spin:",ms
622!           do i=1,ne(ms)
623!             write(23,*) (dbl_mb(tmp_L(1)+(i-1) +(j-1)*ne(ms)),
624!     >                   j=1,ne(ms))
625!           end do
626!         end do
627!*        ***** debug ***
628
629
630      end do
631
632*     *************************************
633*     ***** total energy calculation ******
634*     *************************************
635      call nwpw_timing_start(10)
636
637      !if (move) call phafac() !*** reset phase factors to r1 ***
638
639*     *** get orbital energies ****
640      n = ne(1)
641      nn = n*n
642      do ms=1,ispin
643         do ii=n1(ms),n2(ms)
644           i = ii-n1(ms)
645           index = (i+1) + i*n + (ms-1)*nn
646           call Pack_cc_idot(1,psi1(1,ii),Hpsi(1,ii),sum)
647
648           hml(index) =  -sum
649           do jj=ii+1,n2(ms)
650              j = jj-n1(ms)
651              index  = (i+1) + j*n + (ms-1)*nn
652              indext = (j+1) + i*n + (ms-1)*nn
653              call Pack_cc_idot(1,psi1(1,ii),Hpsi(1,jj),sum)
654
655              hml(index)  =  -sum
656              hml(indext) =  -sum
657           end do
658         end do
659      end do
660      if (np.gt.1)  call D3dB_Vector_SumAll((ispin*nn),hml)
661      eorbit = 0.0d0
662      do ms=1,ispin
663         do ii=1,ne(ms)
664            index = (ii) + (ii-1)*n + (ms-1)*nn
665            eorbit = eorbit + hml(index)
666         end do
667      end do
668      if (ispin.eq.1) eorbit = eorbit+eorbit
669
670
671
672*     **** get coulomb energy ****
673      call Pack_cc_Sum(0,
674     >                   dcpl_mb(dng(1)),
675     >                   dn_cmp_smooth,
676     >                   dcpl_mb(tmp1(1)))
677      call Pack_c_Copy(0,
678     >                   dcpl_mb(vcomp(1)),
679     >                   dcpl_mb(tmp2(1)))
680      call Pack_cc_daxpy(0,0.5d0,
681     >                   dcpl_mb(vh(1)),
682     >                   dcpl_mb(tmp2(1)))
683      call Pack_cc_dot(0,
684     >                  dcpl_mb(tmp1(1)),
685     >                  dcpl_mb(tmp2(1)),
686     >                  ehartr_pw)
687      ehartr_pw = ehartr_pw*lattice_omega()
688
689
690
691
692*     **** get exchange-correlation energy ****
693      call D3dB_rr_dot(1,dn(1,1),dbl_mb(xce(1)),exc)
694      call D3dB_rr_dot(1,dn(1,1),dbl_mb(xcp(1)),pxc)
695      if (ispin.eq.1) then
696         exc= exc + exc
697         pxc= pxc + pxc
698      else
699         call D3dB_rr_dot(1,dn(1,2),dbl_mb(xce(1)),exc2)
700         call D3dB_rr_dot(1,dn(1,2),dbl_mb(xcp(1)+n2ft3d),pxc2)
701         exc= exc + exc2
702         pxc= pxc + pxc2
703      end if
704      exc = exc*dv
705      pxc = pxc*dv
706
707
708*     ***** average Kohn-Sham kinetic energy ****
709      call ke_ave(ispin,ne,psi1,eke,.false.,occ)
710
711
712*     **** average Kohn-Sham v_local energy ****
713      call Pack_cc_dot(0,dcpl_mb(dng(1)),dcpl_mb(vl(1)),evloc_pw)
714
715
716
717
718*     ***** average Kohn-Sham v_nonlocal energy ****
719c     call dcopy(2*npack1*nemax,0.0d0,0,Hpsi,1)
720c     call v_nonlocal(ispin,ne,psi1,Hpsi,
721c    >                move,dbl_mb(ftest(1)))
722      enlocal = 0.0d0
723c     do ms=1,ispin
724c     do n=n1(ms),n2(ms)
725c        call Pack_cc_idot(1,psi1(1,n),Hpsi(1,n),sum)
726c        enlocal = enlocal - sum
727c     end do
728c     end do
729c     if (np.gt.1) call D3dB_SumAll(enlocal)
730c     if (ispin.eq.1) enlocal = 2.0d0*enlocal
731
732*     **** atomic energies ***
733      ehartree_atom = paw_energy_hartree_atom()
734      ekin_atom = paw_energy_kin_atom()
735      evloc_atom = paw_energy_vloc_atom()
736      eion_atom = paw_energy_ion_atom()
737      ecore_atom = paw_energy_core_atom()
738      ecore_ion_atom = paw_energy_core_ion_atom()
739      ecore_self_atom = paw_energy_core_self_atom()
740      exc_atom = paw_energy_xc_atom()
741
742*?????????????????????? what is this ??????????????
743      call Pack_c_unpack(0,dn_cmp_smooth)
744      call D3dB_cr_fft3b(1,dn_cmp_smooth)
745      call D3dB_r_Zero_Ends(1,dn_cmp_smooth)
746
747
748*     *** fill in total energy array ***
749
750*     *** kinetic energy
751      E(2) = eke
752      E(3) = ekin_atom
753      E(4) = ehartr_pw
754
755*     *** coulomb contributions
756      E(5) = eion_atom + ecore_atom + ehartree_atom +
757     >       ecore_ion_atom + ecore_self_atom +
758     >       paw_mult_energy_atom_self() +
759     >       paw_mult_energy_atom_comp()
760
761      E(6)=paw_mult_energy_atom_mult()
762
763*     *** exch-correlation
764      E(7) = exc
765      E(8) = exc_atom
766
767*     *** local pseudopot ***
768      E(9)  = evloc_pw
769      E(10) = evloc_atom
770
771
772*     *** total energy ***
773      Eold=E(1)
774      E(1) = 0.0d0
775      do i=2,10
776       E(1) = E(1) + E(i)
777      end do
778
779      E(11) = eorbit
780
781*     **** set convergence variables ****
782      deltae = (E(1)-Eold)/(dt*dble(it_in))
783
784*     *** deltac ***
785      do n=n1(1),n2(ispin)
786         do i=1,npack1
787            Hpsi(i,n) = psi2(i,n) - psi1(i,n)
788         end do
789      end do
790
791      do n=n1(1),n2(ispin)
792         call Pack_cc_idot(1,Hpsi(1,n),Hpsi(1,n),dbl_mb(sumi(1)+n-1))
793      end do
794      if (np.gt.1)
795     >     call D3dB_Vector_SumAll((n2(ispin)-n1(1)),
796     >                             dbl_mb(sumi(1)))
797      deltac = 0.0d0
798      do n=n1(1),n2(ispin)
799         if (dbl_mb(sumi(1)+n-1).gt.deltac) deltac=dbl_mb(sumi(1)+n-1)
800      end do
801      deltac = deltac/dte
802
803
804*     *** deltar ***
805      deltar = deltamm
806      if (move) then
807        do i=1,ion_nion()
808           sum = dsqrt( dbl_mb(fion(1)+(i-1)*3  )**2
809     >                + dbl_mb(fion(1)+(i-1)*3+1)**2
810     >                + dbl_mb(fion(1)+(i-1)*3+2)**2)
811           if (sum.gt.deltar) deltar = sum
812        end do
813      end if
814
815      call nwpw_timing_end(10)
816
817*     **** dealocate MA local variables ****
818      call nwpw_timing_start(12)
819      value = BA_pop_stack(sumi(2))
820      value = BA_pop_stack(ftest(2))
821      value = BA_pop_stack(fion(2))
822      value = BA_pop_stack(dnall(2))
823      value = BA_pop_stack(xce(2))
824      value = BA_pop_stack(xcp(2))
825      value = BA_pop_stack(dng(2))
826      value = BA_pop_stack(rho(2))
827      value = BA_pop_stack(vl(2))
828
829
830      value = BA_pop_stack(vc(2))
831      value = BA_pop_stack(vh(2))
832      value = BA_pop_stack(vcomp(2))
833      value = BA_pop_stack(tmp2(2))
834      value = BA_pop_stack(tmp1(2))
835      value = BA_pop_stack(tmp_L(2))
836
837      call nwpw_timing_end(12)
838
839
840
841      return
842      end
843
844