1*
2* $Id$
3*
4
5*     *************************
6*     *                       *
7*     *     band_init_HFX     *
8*     *                       *
9*     *************************
10      subroutine band_init_HFX(rtdb,nbrill0,ispin0,ne)
11      implicit none
12      integer rtdb
13      integer nbrill0,ispin0
14      integer ne(2)
15
16#include "bafdecls.fh"
17#include "btdb.fh"
18#include "errquit.fh"
19#include "band_hfx.fh"
20
21*     **** local variables ****
22      logical value
23      integer ma_type
24      integer n1,n2,n3,mapping,ms,neq(2)
25
26*     **** external functions ****
27      integer  control_version,control_mapping,Pneb_nbrillq
28      external control_version,control_mapping,Pneb_nbrillq
29      integer  cpsi_data_alloc
30      external cpsi_data_alloc
31
32      nbrillioun = nbrill0
33      nbrillq = Pneb_nbrillq()
34      ispin = ispin0
35      norbs(1) = 0
36      norbs(2) = 0
37      ehfx = 0.0d0
38      hfx_on = .false.
39      call C3dB_nfft3d(1,nfft3d)
40
41      if (.not.btdb_get(rtdb,'band:HFX',mt_log,1,hfx_on))
42     >   hfx_on = .false.
43
44
45*     **** get the number of HFX orbitals ****
46      if (hfx_on) then
47         norbs(1) = ne(1)
48         norbs(2) = ne(2)
49         neall = ne(1)+ne(2)
50
51         if (.not. btdb_get(rtdb,
52     >                      'band:HFX_screening_radius',
53     >                      mt_dbl,1,rcut))
54     >       rcut = 8.0d0
55
56         if (.not. btdb_get(rtdb,
57     >                      'band:HFX_screening_power',
58     >                      mt_dbl,1,pp))
59     >       pp = 8.0d0
60
61         if (.not. btdb_get(rtdb,
62     >                      'band:HFX_screening_type',
63     >                      mt_int,1,flag))
64     >       flag = 0
65
66         if (.not. btdb_get(rtdb,
67     >                      'band:HFX_relax',
68     >                      mt_log,1,relaxed))
69     >       relaxed = .true.
70
71         if (.not. btdb_get(rtdb,
72     >                      'band:HFX_solver_type',
73     >                      mt_int,1,solver_type)) then
74
75            solver_type = 1
76         end if
77
78         if (.not. btdb_get(rtdb,
79     >                      'band:HFX_parameter',
80     >                       mt_dbl,1,HFX_parameter))
81     >       HFX_parameter = 1.0d0
82
83         if (.not. btdb_get(rtdb,
84     >                      'band:HFX_print_orbital_contribution',
85     >                       mt_log,1,orb_contribution))
86     >       orb_contribution = .false.
87
88
89*        **** initialize coulomb_screened ****
90         call c_coulomb_screened_init(flag,rcut,pp)
91
92*
93*        **** initialize orb_contribution ****
94c         do ms=1,ispin
95c           value = BA_alloc_get(mt_dbl,norbs(ms),
96c     >                'ehfx_orb',ehfx_orb(2,ms),ehfx_orb(1,ms))
97c           if (.not. value)
98c     >       call errquit('band_init_HFX: out of heap memory',1, MA_ERR)
99c         end do
100
101      end if
102
103
104c     **** define extra psi and Hpsi  ****
105      call Parallel3d_np_k(npk)
106      call Parallel3d_taskid_k(taskid_k)
107      npkrot  = npk/2
108      npkeven = (mod(npk,2).eq.0)
109      if (.not.npkeven) npkrot = npkrot+1
110      replicated = (npk.gt.1)
111      if (hfx_on.and.replicated) then
112         nbrillq_max = nbrillq
113         call Parallel_IMaxAll(nbrillq_max)
114         nrsize = 2*nbrillq_max*neall*nfft3d
115         psi_rep1_tag  = cpsi_data_alloc(nbrillq_max,neall,2*nfft3d)
116         psi_rep2_tag  = cpsi_data_alloc(nbrillq_max,neall,2*nfft3d)
117         Hpsi_rep1_tag = cpsi_data_alloc(nbrillq_max,neall,2*nfft3d)
118         Hpsi_rep2_tag = cpsi_data_alloc(nbrillq_max,neall,2*nfft3d)
119         value = BA_alloc_get(mt_dbl,4*nbrillq_max,'kw_rep1',
120     >                        kw_rep1(2),kw_rep1(1))
121         value = value.and.
122     >           BA_alloc_get(mt_dbl,4*nbrillq_max,'kw_rep2',
123     >                        kw_rep2(2),kw_rep2(1))
124         value = value.and.
125     >           BA_alloc_get(mt_int,npk,'nbrillq_rep',
126     >                        nbrillq_rep(2),nbrillq_rep(1))
127         if (.not. value)
128     >      call errquit('band_init_HFX: out of heap memory',1, MA_ERR)
129
130         do n1=1,npk
131            int_mb(nbrillq_rep(1)+n1-1) = 0
132         end do
133         int_mb(nbrillq_rep(1)+taskid_k) = nbrillq
134         call Parallela_Vector_ISumAll(3,npk,int_mb(nbrillq_rep(1)))
135      end if
136
137
138      return
139      end
140
141
142*     *************************
143*     *                       *
144*     *     band_end_HFX      *
145*     *                       *
146*     *************************
147      subroutine band_end_HFX()
148      implicit none
149
150#include "bafdecls.fh"
151#include "band_hfx.fh"
152#include "errquit.fh"
153
154*     **** local variables ****
155      integer MASTER,taskid
156      parameter(MASTER=0)
157      logical value
158      integer i,ms
159
160*     **** external functions ****
161      integer  control_version
162      external control_version
163
164      if ((norbs(1)+norbs(2)).gt.0) then
165
166*       **** print out orbital contributions ****
167c        if (orb_contribution) then
168c           call Parallel_taskid(taskid)
169c           if (taskid.eq.MASTER) then
170c              write(6,487)
171c              write(6,488)
172c              do ms=1,ispin
173c              do i=1,norbs(ms)
174c                write(6,489)
175c     >            ms,i,
176c     >            dbl_mb(ehfx_orb(1,ms)+i-1)
177c              end do
178c              end do
179c           end if
180c  487   format(//,'== Orbital Contributions to HFX ==')
181c  488   format(/1x,'orbital',15x,
182c     >         'HF_Exchange')
183c  489   format(1x,i3,i7,2x,e18.6)
184c        end if
185
186
187*       **** deallocate memory ****
188        value = .true.
189c        do ms=1,ispin
190c          value = value.and.BA_free_heap(ehfx_orb(2,ms))
191c        end do
192
193*        **** end coulomb_screened ****
194         call c_coulomb_screened_end()
195
196*        **** deallocate replicated space if necessary ****
197         if (replicated) then
198            call cpsi_data_dealloc(psi_rep1_tag)
199            call cpsi_data_dealloc(psi_rep2_tag)
200            call cpsi_data_dealloc(Hpsi_rep1_tag)
201            call cpsi_data_dealloc(Hpsi_rep2_tag)
202            value =           BA_free_heap(kw_rep1(2))
203            value = value.and.BA_free_heap(kw_rep2(2))
204            value = value.and.BA_free_heap(nbrillq_rep(2))
205            if (.not. value)
206     >         call errquit('band_end_HFX: free heap memory',1,MA_ERR)
207         end if
208
209      end if
210
211      return
212      end
213
214*     *************************
215*     *                       *
216*     *     band_print_HFX    *
217*     *                       *
218*     *************************
219      subroutine band_print_HFX(unit)
220      implicit none
221      integer unit
222
223#include "bafdecls.fh"
224#include "band_hfx.fh"
225
226*     **** local variables ****
227      integer i,ms
228      real*8   control_attenuation
229      external control_attenuation
230
231      if (hfx_on) then
232        if (relaxed) then
233          write(unit,1001)
234        else
235          write(unit,1002)
236        end if
237        write(unit,1006)
238        if (rcut.ge.0.0d0) write(unit,1008) rcut
239        if (rcut.ge.0.0d0) write(unit,1009) pp
240        if (rcut.ge.0.0d0) write(unit,1011) flag
241        if ((rcut.ge.0.0d0).and.(flag.eq.2))
242     >     write(unit,1012) control_attenuation()
243        if (hfx_parameter.ne.1.0d0) write(unit,1010) hfx_parameter
244        write(unit,*)
245      end if
246
247      return
248 1001 FORMAT(6x,"- HFX relaxed")
249 1002 FORMAT(6x,"- HFX unrelaxed")
250 1003 FORMAT(6x,"- HFX restricted orbitals :",10I5)
251 1004 FORMAT(6x,"- HFX alpha orbitals:",10I5)
252 1005 FORMAT(6x,"- HFX beta orbitals :",10I5)
253
254 1006 FORMAT(6x,"- HFX screened coulomb solver")
255 1007 FORMAT(6x,"- HFX free-space coulomb solver")
256 1008 FORMAT(6x,"- HFX screening radius(band:HFX_screening_radius):",
257     >       E10.3)
258 1009 FORMAT(6x,"- HFX screening power (band:HFX_screening_power) :",
259     >       E10.3)
260 1010 FORMAT(6x,"- HFX scaling parameter (band:HFX_parameter)     :",
261     >       E10.3)
262 1011 FORMAT(6x,"- HFX screening type (band:HFX_screening_type)   :",
263     >       I2)
264 1012 FORMAT(6x,"- attenuation parameter (nwpw:attenuation)       :",
265     >       E10.3)
266      end
267
268
269
270*     ****************************
271*     *                    	 *
272*     *     band_potential_HFX   *
273*     *                          *
274*     ****************************
275      subroutine band_potential_HFX(ispin0,psi_r_tag,Hpsi_r_tag)
276      implicit none
277      integer    ispin0
278      integer    psi_r_tag
279      integer    Hpsi_r_tag
280
281#include "bafdecls.fh"
282#include "band_hfx.fh"
283#include "errquit.fh"
284
285      integer nb1,nb2
286      integer istart,iend,jstart,jend,imodn,imodtask
287      integer ms,l,q,n,indx1,indx2,Levels,neq(2)
288      integer nbrillq_rep1,r1,r2
289      integer request2(4),request3(4),request4(4)
290      real*8  kv1(3),kv2(3),w2
291
292*     **** external functions ****
293      integer  cpsi_data_get_chnk,cpsi_data_get_allptr
294      external cpsi_data_get_chnk,cpsi_data_get_allptr
295      real*8   brillioun_weight,brillioun_k
296      external brillioun_weight,brillioun_k
297c      integer  Butter_Levels,Dneall_na_ptr
298c      external Butter_Levels,Dneall_na_ptr
299
300      call nwpw_timing_start(33)
301      ehfx = 0.0d0
302      phfx = 0.0d0
303      if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then
304
305c        **** reduceall algorithm ****
306         if (replicated) then
307
308*           **** initialize replicated data ****
309            call dcopy(4*nbrillq_max,0.0d0,0,dbl_mb(kw_rep1(1)),1)
310            call dcopy(4*nbrillq_max,0.0d0,0,dbl_mb(kw_rep2(1)),1)
311            do nb1=1,nbrillq
312               dbl_mb(kw_rep1(1)+4*(nb1-1))   = brillioun_k(1,nb1)
313               dbl_mb(kw_rep1(1)+4*(nb1-1)+1) = brillioun_k(2,nb1)
314               dbl_mb(kw_rep1(1)+4*(nb1-1)+2) = brillioun_k(3,nb1)
315               dbl_mb(kw_rep1(1)+4*(nb1-1)+3) = brillioun_weight(nb1)
316            end do
317            r2 = taskid_k
318            nbrillq_rep1 = nbrillq
319            call dcopy(nrsize,0.0d0,0,
320     >                 dbl_mb(cpsi_data_get_allptr(psi_rep1_tag)),1)
321            call dcopy(nrsize,0.0d0,0,
322     >                 dbl_mb(cpsi_data_get_allptr(psi_rep2_tag)),1)
323            call dcopy(nrsize,0.0d0,0,
324     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_rep1_tag)),1)
325            call dcopy(nrsize,0.0d0,0,
326     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_rep2_tag)),1)
327
328            call dcopy(nbrillq*2*nfft3d*neall,
329     >                 dbl_mb(cpsi_data_get_allptr(psi_r_tag)),1,
330     >                 dbl_mb(cpsi_data_get_allptr(psi_rep1_tag)),1)
331            call dcopy(nbrillq*2*nfft3d*neall,
332     >                 0.0d0,0,
333     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_r_tag)),1)
334
335
336            call Parallela_start_rotate(3,1,12,
337     >                                 dbl_mb(kw_rep1(1)),4*nbrillq_max,
338     >                                 dbl_mb(kw_rep2(1)),4*nbrillq_max,
339     >                                 request2)
340            call Parallela_start_rotate(3,1,13,
341     >           dbl_mb(cpsi_data_get_allptr(psi_rep1_tag)),nrsize,
342     >           dbl_mb(cpsi_data_get_allptr(psi_rep2_tag)),nrsize,
343     >           request3)
344
345            ehfx   = 0.0d0
346            phfx   = 0.0d0
347            kv1(1) = 0.0d0
348            kv1(2) = 0.0d0
349            kv1(3) = 0.0d0
350            kv2(1) = 0.0d0
351            kv2(2) = 0.0d0
352            kv2(3) = 0.0d0
353            call c_coulomb_screened_set(kv1,kv2)
354            do nb1=1,nbrillq
355               call band_potential_HFX_sub(brillioun_weight(nb1),
356     >                  dbl_mb(cpsi_data_get_chnk(psi_r_tag, nb1)),
357     >                  dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)))
358            end do
359            do nb1=1,nbrillq-1
360               do nb2=nb1+1,nbrillq
361                  kv1(1) = brillioun_k(1,nb1)
362                  kv1(2) = brillioun_k(2,nb1)
363                  kv1(3) = brillioun_k(3,nb1)
364                  kv2(1) = brillioun_k(1,nb2)
365                  kv2(2) = brillioun_k(2,nb2)
366                  kv2(3) = brillioun_k(3,nb2)
367                  call c_coulomb_screened_set(kv1,kv2)
368                  call band_potential_HFX_sub2(brillioun_weight(nb1),
369     >                                         brillioun_weight(nb2),
370     >                    dbl_mb(cpsi_data_get_chnk(psi_r_tag, nb1)),
371     >                    dbl_mb(cpsi_data_get_chnk(psi_r_tag, nb2)),
372     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)),
373     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb2)))
374               end do
375            end do
376
377            do r1=1,npkrot-1
378               r2 = mod(r2-1+npk,npk)
379               nbrillq_rep1 = int_mb(nbrillq_rep(1)+r2)
380
381               call Parallela_end_rotate(request2)
382               call dcopy(4*nbrillq_max,
383     >                    dbl_mb(kw_rep2(1)),1,dbl_mb(kw_rep1(1)),1)
384
385               call Parallela_end_rotate(request3)
386               call dcopy(nrsize,
387     >                 dbl_mb(cpsi_data_get_allptr(psi_rep2_tag)),1,
388     >                 dbl_mb(cpsi_data_get_allptr(psi_rep1_tag)),1)
389
390               if (r1.gt.1) then
391                  call Parallela_end_rotate(request4)
392                  call dcopy(nrsize,
393     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_rep2_tag)),1,
394     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_rep1_tag)),1)
395               end if
396
397               if ((r1.lt.(npkrot-1)).or.(npkeven)) then
398                  call Parallela_start_rotate(3,1,12,
399     >                                 dbl_mb(kw_rep1(1)),4*nbrillq_max,
400     >                                 dbl_mb(kw_rep2(1)),4*nbrillq_max,
401     >                                 request2)
402                  call Parallela_start_rotate(3,1,13,
403     >              dbl_mb(cpsi_data_get_allptr(psi_rep1_tag)),nrsize,
404     >              dbl_mb(cpsi_data_get_allptr(psi_rep2_tag)),nrsize,
405     >              request3)
406               end if
407
408               do nb1=1,nbrillq
409                  do nb2=1,nbrillq_rep1
410                     kv1(1) = brillioun_k(1,nb1)
411                     kv1(2) = brillioun_k(2,nb1)
412                     kv1(3) = brillioun_k(3,nb1)
413                     kv2(1) = dbl_mb(kw_rep1(1)+4*(nb2-1))
414                     kv2(2) = dbl_mb(kw_rep1(1)+4*(nb2-1)+1)
415                     kv2(3) = dbl_mb(kw_rep1(1)+4*(nb2-1)+2)
416                     w2     = dbl_mb(kw_rep1(1)+4*(nb2-1)+3)
417
418                     call c_coulomb_screened_set(kv1,kv2)
419                     call band_potential_HFX_sub2(
420     >                    brillioun_weight(nb1),w2,
421     >                    dbl_mb(cpsi_data_get_chnk(psi_r_tag,nb1)),
422     >                    dbl_mb(cpsi_data_get_chnk(psi_rep1_tag,nb2)),
423     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)),
424     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_rep1_tag,nb2)))
425                  end do
426               end do
427
428               if (r1.lt.(npkrot-1)) then
429                  call Parallela_start_rotate(3,1,14,
430     >              dbl_mb(cpsi_data_get_allptr(Hpsi_rep1_tag)),nrsize,
431     >              dbl_mb(cpsi_data_get_allptr(Hpsi_rep2_tag)),nrsize,
432     >              request4)
433               else
434                  call Parallela_start_rotate(3,-r1,14,
435     >              dbl_mb(cpsi_data_get_allptr(Hpsi_rep1_tag)),nrsize,
436     >              dbl_mb(cpsi_data_get_allptr(Hpsi_rep2_tag)),nrsize,
437     >              request4)
438               end if
439
440            end do
441            if (npkeven) then
442               r2 = mod(r2-1+npk,npk)
443               nbrillq_rep1 = int_mb(nbrillq_rep(1)+r2)
444
445               call Parallela_end_rotate(request2)
446               call dcopy(4*nbrillq_max,
447     >                    dbl_mb(kw_rep2(1)),1,dbl_mb(kw_rep1(1)),1)
448
449               call Parallela_end_rotate(request3)
450               call dcopy(nrsize,
451     >                 dbl_mb(cpsi_data_get_allptr(psi_rep2_tag)),1,
452     >                 dbl_mb(cpsi_data_get_allptr(psi_rep1_tag)),1)
453
454               do nb1=1,nbrillq
455                  do nb2=1,nbrillq_rep1
456                     kv1(1) = brillioun_k(1,nb1)
457                     kv1(2) = brillioun_k(2,nb1)
458                     kv1(3) = brillioun_k(3,nb1)
459                     kv2(1) = dbl_mb(kw_rep1(1)+4*(nb2-1))
460                     kv2(2) = dbl_mb(kw_rep1(1)+4*(nb2-1)+1)
461                     kv2(3) = dbl_mb(kw_rep1(1)+4*(nb2-1)+2)
462                     w2     = dbl_mb(kw_rep1(1)+4*(nb2-1)+3)
463
464                     call c_coulomb_screened_set(kv1,kv2)
465                     call band_potential_HFX_sub3(
466     >                    brillioun_weight(nb1),w2,
467     >                    dbl_mb(cpsi_data_get_chnk(psi_r_tag,nb1)),
468     >                    dbl_mb(cpsi_data_get_chnk(psi_rep1_tag,nb2)),
469     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)))
470                  end do
471               end do
472            end if
473
474            call Parallela_end_rotate(request4)
475            call daxpy(nbrillq*2*nfft3d*neall,1.0d0,
476     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_rep2_tag)),1,
477     >                 dbl_mb(cpsi_data_get_allptr(Hpsi_r_tag)),1)
478
479            call Parallel_SumAll(ehfx)
480            call Parallel_SumAll(phfx)
481
482         else
483            ehfx   = 0.0d0
484            phfx   = 0.0d0
485            kv1(1) = 0.0d0
486            kv1(2) = 0.0d0
487            kv1(3) = 0.0d0
488            kv2(1) = 0.0d0
489            kv2(2) = 0.0d0
490            kv2(3) = 0.0d0
491            call c_coulomb_screened_set(kv1,kv2)
492            do nb1=1,nbrillq
493
494               call dcopy(2*nfft3d*neall,0.0d0,0,
495     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)),1)
496               call band_potential_HFX_sub(brillioun_weight(nb1),
497     >                  dbl_mb(cpsi_data_get_chnk(psi_r_tag, nb1)),
498     >                  dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)))
499            end do
500            do nb1=1,nbrillq-1
501               do nb2=nb1+1,nbrillq
502                  kv1(1) = brillioun_k(1,nb1)
503                  kv1(2) = brillioun_k(2,nb1)
504                  kv1(3) = brillioun_k(3,nb1)
505                  kv2(1) = brillioun_k(1,nb2)
506                  kv2(2) = brillioun_k(2,nb2)
507                  kv2(3) = brillioun_k(3,nb2)
508
509                  call c_coulomb_screened_set(kv1,kv2)
510                  call band_potential_HFX_sub2(brillioun_weight(nb1),
511     >                                         brillioun_weight(nb2),
512     >                    dbl_mb(cpsi_data_get_chnk(psi_r_tag, nb1)),
513     >                    dbl_mb(cpsi_data_get_chnk(psi_r_tag, nb2)),
514     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb1)),
515     >                    dbl_mb(cpsi_data_get_chnk(Hpsi_r_tag,nb2)))
516               end do
517            end do
518            call Parallel_SumAll(ehfx)
519            call Parallel_SumAll(phfx)
520         end if
521
522
523      end if
524      call nwpw_timing_end(33)
525
526      return
527      end
528
529
530*     *************************
531*     *                       *
532*     *     band_energy_HFX   *
533*     *                       *
534*     *************************
535      subroutine band_energy_HFX(ispin0,psi_r_tag,ehfx_out,phfx_out)
536      implicit none
537      integer ispin0
538      integer psi_r_tag
539      real*8 ehfx_out
540      real*8 phfx_out
541
542#include "bafdecls.fh"
543#include "band_hfx.fh"
544#include "errquit.fh"
545
546      integer q,n,indx1,indx2
547
548      call nwpw_timing_start(33)
549c     **** calculate HFX energy  ****
550      if (((norbs(1)+norbs(2)).ne.0).and.(.not.relaxed)) then
551         call band_energy_HFX_sub(ispin0,psi_r_tag,ehfx_out,phfx_out)
552
553c     **** nothing to do ****
554      else
555         ehfx_out = ehfx
556         phfx_out = phfx
557      end if
558      call nwpw_timing_end(33)
559
560      return
561      end
562
563*     ****************************
564*     *                          *
565*     *  band_potential_HFX_orb  *
566*     *                          *
567*     ****************************
568      subroutine band_potential_HFX_orb(nb1,ms1,psi_r_tag,orb_r,Horb_r)
569      implicit none
570      integer nb1,ms1,psi_r_tag
571      complex*16 orb_r(*),Horb_r(*)
572
573#include "bafdecls.fh"
574#include "band_hfx.fh"
575#include "errquit.fh"
576
577*     **** local variables ****
578      integer nbq2
579      real*8 kv1(3),kv2(3)
580
581*     **** external functions ****
582      integer  cpsi_data_get_chnk
583      external cpsi_data_get_chnk
584      real*8   brillioun_weight,brillioun_k,brillioun_all_k
585      external brillioun_weight,brillioun_k,brillioun_all_k
586
587      if ((norbs(ms1).ne.0).and.relaxed) then
588         call dcopy(2*nfft3d,0.0d0,0,Horb_r,1)
589         kv1(1) = brillioun_all_k(1,nb1)
590         kv1(2) = brillioun_all_k(2,nb1)
591         kv1(3) = brillioun_all_k(3,nb1)
592         do nbq2=1,nbrillq
593            kv2(1) = brillioun_k(1,nbq2)
594            kv2(2) = brillioun_k(2,nbq2)
595            kv2(3) = brillioun_k(3,nbq2)
596            call c_coulomb_screened_set(kv1,kv2)
597            call band_potential_HFX_orb_sub(ms1,brillioun_weight(nbq2),
598     >               dbl_mb(cpsi_data_get_chnk(psi_r_tag,nbq2)),
599     >               orb_r,Horb_r)
600         end do
601         call K1dB_Vector_SumAll(2*nfft3d,Horb_r)
602
603      end if
604      return
605      end
606
607
608
609*     *************************
610*     *                       *
611*     *     band_HFX          *
612*     *                       *
613*     *************************
614      logical function band_HFX()
615      implicit none
616
617#include "band_hfx.fh"
618
619      band_HFX= hfx_on
620      return
621      end
622
623*     *************************
624*     *                       *
625*     *   band_HFX_relaxed    *
626*     *                       *
627*     *************************
628      logical function band_HFX_relaxed()
629      implicit none
630
631#include "bafdecls.fh"
632#include "band_hfx.fh"
633
634      band_hfx_relaxed = relaxed
635      return
636      end
637
638
639
640
641
642c***************** sub/replicated routines *****************************
643
644*     ********************************
645*     *                    	     *
646*     *     band_potential_HFX_sub   *
647*     *                              *
648*     ********************************
649*
650* calculates the exact exchange potentials and k=l
651*
652      subroutine band_potential_HFX_sub(weight,psi_r,Hpsi_r)
653      implicit none
654
655#include "band_hfx.fh"
656#include "bafdecls.fh"
657#include "errquit.fh"
658
659      real*8     weight
660      complex*16 psi_r(nfft3d,*)
661      complex*16 Hpsi_r(nfft3d,*)
662
663*     **** local variables ****
664      logical value,done
665      integer i,j,n1,n2,n3,ms
666      integer dn(2),vij(2),tmp1(2),tmp2(2),index1,index2
667      integer psir1,psir2,hpsir1,hpsir2
668      integer i1,j1,k1,NN
669      integer i2,j2,k2
670      integer i3,j3,k3
671      integer kv1,kv2
672      real*8  scal1,scal2,dv,eh,ph,weight2
673
674*     **** external functions ****
675      real*8   lattice_omega,c_icoulomb_screened_e
676      logical   C3dB_rc_pfft3_queue_filled,C3dB_cr_pfft3_queue_filled
677      external lattice_omega,c_icoulomb_screened_e
678      external  C3dB_rc_pfft3_queue_Cilled,C3dB_cr_pfft3_queue_filled
679
680      !ehfx = 0.0d0
681      !phfx = 0.0d0
682      if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then
683
684         call C3dB_nx(1,n1)
685         call C3dB_ny(1,n2)
686         call C3dB_nz(1,n3)
687         value = BA_push_get(mt_dcpl,nfft3d,'dn_hfx',dn(2),dn(1))
688         value = value.and.
689     >           BA_push_get(mt_dcpl,nfft3d,'vij_hfx',vij(2),vij(1))
690         value = value.and.
691     >           BA_push_get(mt_dcpl,nfft3d,'tmp1_hfx',tmp1(2),tmp1(1))
692         value = value.and.
693     >           BA_push_get(mt_dcpl,nfft3d,'tmp2_hfx',tmp2(2),tmp2(1))
694         if (.not. value)
695     >      call errquit('band_potential_HFX:out of stack memory',0,
696     >      MA_ERR)
697
698
699         weight2 = weight*weight
700         scal1 = 1.0d0/dble(n1*n2*n3)
701         scal2 = 1.0d0/lattice_omega()
702         dv = scal1/scal2
703
704*        ***** screened coulomb solver ****
705           do ms=1,ispin
706c              call dcopy(norbs(ms),0.0d0,0,dbl_mb(ehfx_orb(1,ms)),1)
707              NN = norbs(ms)*(norbs(ms)+1)/2
708              i1 = 1
709              j1 = 1
710              k1 = 1
711              i2 = 1
712              j2 = 1
713              k2 = 1
714              i3 = 1
715              j3 = 1
716              k3 = 1
717              done = .false.
718              do while (.not.done)
719
720                 if (k1.le.NN) then
721
722*                   **** generate dnij for Vij  ****
723                    call C3dB_bb_Mul(1,psi_r(1,j1),
724     >                                 psi_r(1,i1),dcpl_mb(dn(1)))
725                    call C3dB_b_SMul1(1,scal2*scal1,dcpl_mb(dn(1)))
726                    call C3dB_rc_pfft3f_queuein(0,dcpl_mb(dn(1)))
727
728                    k1 = k1 + 1
729                    j1 = j1 + 1
730                    if (j1.gt.i1) then
731                       j1 = 1
732                       i1 = i1 + 1
733                    end if
734                 end if
735
736                 if (     ((C3dB_rc_pfft3_queue_filled()).or.(k1.gt.NN))
737     >               .and.(k2.le.NN)) then
738
739                       call C3dB_rc_pfft3f_queueout(0,dcpl_mb(dn(1)))
740
741*                      **** generate Vcoul ****
742                       eh = c_icoulomb_screened_e(dcpl_mb(dn(1)))
743                       call c_coulomb_screened_v(dcpl_mb(dn(1)),
744     >                                           dcpl_mb(vij(1)))
745
746*                      **** apply hfx_parameter, double eh for restricted, and calculcate ph ****
747                       eh = eh*hfx_parameter*weight2
748                       if (ispin.eq.1) eh = eh + eh
749                       ph = 2.0d0*eh
750                       ehfx = ehfx - eh
751                       phfx = phfx - ph
752c                       dbl_mb(ehfx_orb(1,ms)+i2-1)
753c     >                  = dbl_mb(ehfx_orb(1,ms)+i2-1) - eh
754                       if (i2.ne.j2) then
755                          ehfx = ehfx - eh
756                          phfx = phfx - ph
757c                          dbl_mb(ehfx_orb(1,ms)+i2-1)
758c     >                     = dbl_mb(ehfx_orb(1,ms)+i2-1) - eh
759                       end if
760
761                       call C3dB_cr_pfft3b_queuein(0,dcpl_mb(vij(1)))
762
763                       k2 = k2 + 1
764                       j2 = j2 + 1
765                       if (j2.gt.i2) then
766                          j2 = 1
767                          i2 = i2 + 1
768                       end if
769                 end if
770
771                 if ((C3dB_cr_pfft3_queue_filled()).or.(k2.gt.NN)) then
772
773                    call C3dB_cr_pfft3b_queueout(0,dcpl_mb(vij(1)))
774
775*                   **** apply hfx_parameter ****
776                    call C3dB_b_SMul1(1,hfx_parameter*weight,
777     >                                dcpl_mb(vij(1)))
778
779*                   **** generate (Vij)*psi_r ***
780                    call C3dB_bb_ncMul(1,dcpl_mb(vij(1)),
781     >                                 psi_r(1,j3),
782     >                                 dcpl_mb(tmp1(1)))
783
784*                   **** add -(Vij)*psi_r to Hpsi_r ***
785                    call C3dB_bb_Sub2(1,dcpl_mb(tmp1(1)),Hpsi_r(1,i3))
786
787
788                    !**** include off diagonal terms ****
789                    if (i3.ne.j3) then
790*                      **** generate (Vij)*psi_r ***
791                       call C3dB_bb_Mul(1,dcpl_mb(vij(1)),
792     >                                 psi_r(1,i3),
793     >                                 dcpl_mb(tmp2(1)))
794
795*                      **** add -(Vij)*psi_r to Hpsi_r ***
796                       call C3dB_bb_Sub2(1,dcpl_mb(tmp2(1)),
797     >                                   Hpsi_r(1,j3))
798                    end if
799
800                    k3 = k3 + 1
801                    j3 = j3 + 1
802                    if (j3.gt.i3) then
803                       j3 = 1
804                       i3 = i3 + 1
805                    end if
806                 end if
807
808              done = (k1.gt.NN).and.(k2.gt.NN).and.(k3.gt.NN)
809           end do !**** while ****
810
811c           call Parallel_Vector_SumAll(norbs(ms),dbl_mb(ehfx_orb(1,ms)))
812       end do !**** ms *****
813
814
815         value =           BA_pop_stack(tmp2(2))
816         value = value.and.BA_pop_stack(tmp1(2))
817         value = value.and.BA_pop_stack(vij(2))
818         value = value.and.BA_pop_stack(dn(2))
819         if (.not. value)
820     >      call errquit('band_potential_HFX:popping stack memory',0,
821     >      MA_ERR)
822
823      end if
824
825      return
826      end
827
828
829*     ********************************
830*     *                    	     *
831*     *     band_potential_HFX_sub2  *
832*     *                              *
833*     ********************************
834*
835* calculates the exact exchange potentials and k!=l
836*
837      subroutine band_potential_HFX_sub2(weight1,weight2,
838     >                                   psi1_r,psi2_r,
839     >                                   Hpsi1_r,Hpsi2_r)
840      implicit none
841
842#include "band_hfx.fh"
843#include "bafdecls.fh"
844#include "errquit.fh"
845
846      real*8     weight1,weight2
847      complex*16  psi1_r(nfft3d,*), psi2_r(nfft3d,*)
848      complex*16 Hpsi1_r(nfft3d,*),Hpsi2_r(nfft3d,*)
849
850*     **** local variables ****
851      logical value,done
852      integer i,j,n1,n2,n3,ms
853      integer dn12(2),v12(2),tmp1(2),tmp2(2),index1,index2
854      integer psir1,psir2,hpsir1,hpsir2
855      integer i1,j1,k1,N,NN
856      integer i2,j2,k2
857      integer i3,j3,k3
858      integer kv1,kv2
859      real*8  scal1,scal2,dv,eh,ph,weight12
860
861*     **** external functions ****
862      real*8   lattice_omega,c_icoulomb_screened_e
863      logical   C3dB_rc_pfft3_queue_filled,C3dB_cr_pfft3_queue_filled
864      external lattice_omega,c_icoulomb_screened_e
865      external  C3dB_rc_pfft3_queue_Cilled,C3dB_cr_pfft3_queue_filled
866
867      if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then
868
869         call C3dB_nx(1,n1)
870         call C3dB_ny(1,n2)
871         call C3dB_nz(1,n3)
872         value = BA_push_get(mt_dcpl,nfft3d,'dn12_hfx',dn12(2),dn12(1))
873         value = value.and.
874     >           BA_push_get(mt_dcpl,nfft3d,'v12_hfx',v12(2),v12(1))
875         value = value.and.
876     >           BA_push_get(mt_dcpl,nfft3d,'tmp1_hfx',tmp1(2),tmp1(1))
877         value = value.and.
878     >           BA_push_get(mt_dcpl,nfft3d,'tmp2_hfx',tmp2(2),tmp2(1))
879         if (.not. value)
880     >      call errquit('band_potential_HFX:out of stack memory',0,
881     >      MA_ERR)
882
883
884         weight12 = weight1*weight2
885         scal1 = 1.0d0/dble(n1*n2*n3)
886         scal2 = 1.0d0/lattice_omega()
887         dv = scal1/scal2
888
889*        ***** screened coulomb solver ****
890           do ms=1,ispin
891              N  = norbs(ms)
892              NN = N*N
893              i1 = 1
894              j1 = 1
895              k1 = 1
896              i2 = 1
897              j2 = 1
898              k2 = 1
899              i3 = 1
900              j3 = 1
901              k3 = 1
902              done = .false.
903              do while (.not.done)
904
905                 if (k1.le.NN) then
906
907*                   **** generate dn12 for v12 ****
908                    call C3dB_bb_Mul(1,psi1_r(1,j1),
909     >                                 psi2_r(1,i1),
910     >                                 dcpl_mb(dn12(1)))
911                    call C3dB_b_SMul1(1,scal2*scal1,dcpl_mb(dn12(1)))
912                    call C3dB_rc_pfft3f_queuein(0,dcpl_mb(dn12(1)))
913
914                    k1 = k1 + 1
915                    j1 = j1 + 1
916                    if (j1.gt.N) then
917                       j1 = 1
918                       i1 = i1 + 1
919                    end if
920                 end if
921
922                 if (     ((C3dB_rc_pfft3_queue_filled()).or.(k1.gt.NN))
923     >               .and.(k2.le.NN)) then
924
925                       call C3dB_rc_pfft3f_queueout(0,dcpl_mb(dn12(1)))
926
927*                      **** generate V12 ****
928                       eh=2.0d0*c_icoulomb_screened_e(dcpl_mb(dn12(1)))
929                       call c_coulomb_screened_v(dcpl_mb(dn12(1)),
930     >                                           dcpl_mb(v12(1)))
931
932                       call C3dB_cr_pfft3b_queuein(0,dcpl_mb(v12(1)))
933
934*                      **** apply hfx_parameter, double eh for restricted, and calculcate ph ****
935                       eh = eh*hfx_parameter*weight12
936                       if (ispin.eq.1) eh = eh + eh
937                       ph = 2.0d0*eh
938                       ehfx = ehfx - eh
939                       phfx = phfx - ph
940
941                       k2 = k2 + 1
942                       j2 = j2 + 1
943                       if (j2.gt.N) then
944                          j2 = 1
945                          i2 = i2 + 1
946                       end if
947                 end if
948
949                 if ((C3dB_cr_pfft3_queue_filled()).or.(k2.gt.NN)) then
950
951                    call C3dB_cr_pfft3b_queueout(0,dcpl_mb(v12(1)))
952
953*                   **** generate conjg(V12)*psi1_r and add -conjg(V12)*psi1_r to Hpsi2_r ****
954                    call C3dB_bb_ncMul(1,dcpl_mb(v12(1)),psi1_r(1,j3),
955     >                                 dcpl_mb(tmp1(1)))
956*                   **** apply hfx_parameter ****
957                    call C3dB_b_SMul1(1,hfx_parameter*weight1,
958     >                                dcpl_mb(tmp1(1)))
959                    call C3dB_bb_Sub2(1,dcpl_mb(tmp1(1)),Hpsi2_r(1,i3))
960
961
962*                   **** generate (V12)*psi2_r and add -(V12)*psi2_r to Hpsi1_r ****
963                    call C3dB_bb_Mul(1,dcpl_mb(v12(1)),psi2_r(1,i3),
964     >                                 dcpl_mb(tmp2(1)))
965*                   **** apply hfx_parameter ****
966                    call C3dB_b_SMul1(1,hfx_parameter*weight2,
967     >                                dcpl_mb(tmp2(1)))
968                    call C3dB_bb_Sub2(1,dcpl_mb(tmp2(1)),Hpsi1_r(1,j3))
969
970
971                    k3 = k3 + 1
972                    j3 = j3 + 1
973                    if (j3.gt.N) then
974                       j3 = 1
975                       i3 = i3 + 1
976                    end if
977                 end if
978
979              done = (k1.gt.NN).and.(k2.gt.NN).and.(k3.gt.NN)
980           end do !**** while ****
981
982       end do !**** ms *****
983
984
985         value =           BA_pop_stack(tmp2(2))
986         value = value.and.BA_pop_stack(tmp1(2))
987         value = value.and.BA_pop_stack(v12(2))
988         value = value.and.BA_pop_stack(dn12(2))
989         if (.not. value)
990     >   call errquit('band_potential_HFX_sub2:popping stack memory',
991     >                0,MA_ERR)
992
993      end if
994
995      return
996      end
997
998*     ********************************
999*     *                    	     *
1000*     *     band_potential_HFX_sub3  *
1001*     *                              *
1002*     ********************************
1003*
1004* calculates the exact exchange potentials and k!=l
1005*
1006      subroutine band_potential_HFX_sub3(weight1,weight2,
1007     >                                   psi1_r,psi2_r,
1008     >                                   Hpsi1_r)
1009      implicit none
1010
1011#include "band_hfx.fh"
1012#include "bafdecls.fh"
1013#include "errquit.fh"
1014
1015      real*8     weight1,weight2
1016      complex*16  psi1_r(nfft3d,*), psi2_r(nfft3d,*)
1017      complex*16 Hpsi1_r(nfft3d,*)
1018
1019*     **** local variables ****
1020      logical value,done
1021      integer i,j,n1,n2,n3,ms
1022      integer dn12(2),v12(2),tmp2(2)
1023      integer i1,j1,k1,N,NN
1024      integer i2,j2,k2
1025      integer i3,j3,k3
1026      integer kv1,kv2
1027      real*8  scal1,scal2,dv,eh,ph,weight12
1028
1029*     **** external functions ****
1030      real*8   lattice_omega,c_icoulomb_screened_e
1031      logical   C3dB_rc_pfft3_queue_filled,C3dB_cr_pfft3_queue_filled
1032      external lattice_omega,c_icoulomb_screened_e
1033      external  C3dB_rc_pfft3_queue_Cilled,C3dB_cr_pfft3_queue_filled
1034
1035      if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then
1036
1037         call C3dB_nx(1,n1)
1038         call C3dB_ny(1,n2)
1039         call C3dB_nz(1,n3)
1040         value = BA_push_get(mt_dcpl,nfft3d,'dn12_hfx',dn12(2),dn12(1))
1041         value = value.and.
1042     >           BA_push_get(mt_dcpl,nfft3d,'v12_hfx',v12(2),v12(1))
1043         value = value.and.
1044     >           BA_push_get(mt_dcpl,nfft3d,'tmp2_hfx',tmp2(2),tmp2(1))
1045         if (.not. value)
1046     >      call errquit('band_potential_HFX:out of stack memory',0,
1047     >      MA_ERR)
1048
1049
1050         weight12 = weight1*weight2
1051         scal1 = 1.0d0/dble(n1*n2*n3)
1052         scal2 = 1.0d0/lattice_omega()
1053         dv = scal1/scal2
1054
1055*        ***** screened coulomb solver ****
1056           do ms=1,ispin
1057              N  = norbs(ms)
1058              NN = N*N
1059              i1 = 1
1060              j1 = 1
1061              k1 = 1
1062              i2 = 1
1063              j2 = 1
1064              k2 = 1
1065              i3 = 1
1066              j3 = 1
1067              k3 = 1
1068              done = .false.
1069              do while (.not.done)
1070
1071                 if (k1.le.NN) then
1072
1073*                   **** generate dn12 for v12 ****
1074                    call C3dB_bb_Mul(1,psi1_r(1,j1),
1075     >                                 psi2_r(1,i1),
1076     >                                 dcpl_mb(dn12(1)))
1077                    call C3dB_b_SMul1(1,scal2*scal1,dcpl_mb(dn12(1)))
1078                    call C3dB_rc_pfft3f_queuein(0,dcpl_mb(dn12(1)))
1079
1080                    k1 = k1 + 1
1081                    j1 = j1 + 1
1082                    if (j1.gt.N) then
1083                       j1 = 1
1084                       i1 = i1 + 1
1085                    end if
1086                 end if
1087
1088                 if (     ((C3dB_rc_pfft3_queue_filled()).or.(k1.gt.NN))
1089     >               .and.(k2.le.NN)) then
1090
1091                       call C3dB_rc_pfft3f_queueout(0,dcpl_mb(dn12(1)))
1092
1093*                      **** generate V12 ****
1094                       eh = c_icoulomb_screened_e(dcpl_mb(dn12(1)))
1095                       call c_coulomb_screened_v(dcpl_mb(dn12(1)),
1096     >                                           dcpl_mb(v12(1)))
1097
1098                       call C3dB_cr_pfft3b_queuein(0,dcpl_mb(v12(1)))
1099
1100*                      **** apply hfx_parameter, double eh for restricted, and calculcate ph ****
1101                       eh = eh*hfx_parameter*weight12
1102                       if (ispin.eq.1) eh = eh + eh
1103                       ph = 2.0d0*eh
1104                       ehfx = ehfx - eh
1105                       phfx = phfx - ph
1106
1107                       k2 = k2 + 1
1108                       j2 = j2 + 1
1109                       if (j2.gt.N) then
1110                          j2 = 1
1111                          i2 = i2 + 1
1112                       end if
1113                 end if
1114
1115                 if ((C3dB_cr_pfft3_queue_filled()).or.(k2.gt.NN)) then
1116
1117                    call C3dB_cr_pfft3b_queueout(0,dcpl_mb(v12(1)))
1118
1119*                   **** generate (V12)*psi2_r and add -(V12)*psi2_r to Hpsi1_r ****
1120                    call C3dB_bb_Mul(1,dcpl_mb(v12(1)),psi2_r(1,i3),
1121     >                                 dcpl_mb(tmp2(1)))
1122*                   **** apply hfx_parameter ****
1123                    call C3dB_b_SMul1(1,hfx_parameter*weight2,
1124     >                                dcpl_mb(tmp2(1)))
1125                    call C3dB_bb_Sub2(1,dcpl_mb(tmp2(1)),Hpsi1_r(1,j3))
1126
1127                    k3 = k3 + 1
1128                    j3 = j3 + 1
1129                    if (j3.gt.N) then
1130                       j3 = 1
1131                       i3 = i3 + 1
1132                    end if
1133                 end if
1134
1135              done = (k1.gt.NN).and.(k2.gt.NN).and.(k3.gt.NN)
1136           end do !**** while ****
1137
1138       end do !**** ms *****
1139
1140       value =           BA_pop_stack(tmp2(2))
1141       value = value.and.BA_pop_stack(v12(2))
1142       value = value.and.BA_pop_stack(dn12(2))
1143       if (.not. value)
1144     > call errquit('band_potential_HFX_sub3:popping stack memory',
1145     >              0,MA_ERR)
1146
1147      end if
1148
1149      return
1150      end
1151
1152
1153
1154
1155
1156
1157*     *****************************
1158*     *                           *
1159*     *     band_energy_HFX_sub   *
1160*     *                           *
1161*     *****************************
1162      subroutine band_energy_HFX_sub(ispin0,psi_r_tag,ehfx_out,phfx_out)
1163      implicit none
1164      integer ispin0
1165      integer psi_r_tag
1166      real*8 ehfx_out
1167      real*8 phfx_out
1168
1169#include "bafdecls.fh"
1170#include "band_hfx.fh"
1171#include "errquit.fh"
1172
1173*     **** local variables ****
1174      logical value
1175      integer i,j,n1,n2,n3,ms,k1
1176      integer dn(2),tmp1(2),index1,index2
1177      real*8  scal1,scal2,dv,eh,ph
1178
1179*     **** external functions ****
1180      real*8   lattice_omega,coulomb_screened_e
1181      external lattice_omega,coulomb_screened_e
1182
1183
1184      if (((norbs(1)+norbs(2)).ne.0).and.(.not.relaxed)) then
1185        ehfx = 0.0d0
1186        phfx = 0.0d0
1187
1188        call C3dB_nx(1,n1)
1189        call C3dB_ny(1,n2)
1190        call C3dB_nz(1,n3)
1191        value = BA_push_get(mt_dcpl,(2*nfft3d),'dn_hfx',dn(2),dn(1))
1192        value = value.and.
1193     >          BA_push_get(mt_dcpl,(nfft3d),'tmp1_hfx',tmp1(2),tmp1(1))
1194        if (.not. value)
1195     >     call errquit('band_energy_HFX_sub:out of stack memory',0,
1196     >       MA_ERR)
1197
1198        scal1 = 1.0d0/dble(n1*n2*n3)
1199        scal2 = 1.0d0/lattice_omega()
1200        dv = scal1/scal2
1201
1202        k1 = 1
1203c        do ms=1,ispin
1204c        do i=1,norbs(ms)
1205c         dbl_mb(ehfx_orb(1,ms)+i-1) = 0.0d0
1206c         do j=1,i
1207c
1208c            if (mod(k1,npj).eq.taskid_j) then
1209c              index1 = (i-1)*n2ft3d+1
1210c              index2 = (j-1)*n2ft3d+1
1211c
1212c*             **** generate dnij ****
1213c              call D3dB_rr_Mul(1,psi_r(index1),psi_r(index2),
1214c     >                         dbl_mb(dn(1)))
1215cc              call D3dB_r_SMul(1,scal2,dbl_mb(dn(1)),dbl_mb(dn(1)))
1216c              call D3dB_r_SMul1(1,scal2,dbl_mb(dn(1)))
1217c              call D3dB_r_Zero_Ends(1,dbl_mb(dn(1)))
1218c
1219c*             ***** screened coulomb solver ****
1220c              if (solver_type.eq.1) then
1221c
1222c*               **** generate dng ****
1223cc                call D3dB_r_SMul(1,scal1,dbl_mb(dn(1)),
1224cc     >                                   dbl_mb(dn(1)))
1225c                call D3dB_r_SMul1(1,scal1,dbl_mb(dn(1)))
1226c                call D3dB_rc_pfft3f(1,0,dbl_mb(dn(1)))
1227c                call Pack_c_pack(0,dbl_mb(dn(1)))
1228c
1229c*               **** get Ecoul energy ****
1230c                eh = coulomb_screened_e(dbl_mb(dn(1)))
1231c
1232c*             ***** free-space coulomb solver ****
1233c              else
1234c                 call coulomb2_v(dbl_mb(dn(1)),dbl_mb(tmp1(1)))
1235c                 call D3dB_rr_dot(1,dbl_mb(dn(1)),dbl_mb(tmp1(1)),eh)
1236c                 eh = 0.5d0*eh*dv
1237c              end if
1238c
1239c*             **** apply hfx_parameter, double eh for restricted, and calculcate ph ****
1240c              eh = eh*hfx_parameter
1241c              if (ispin0.eq.1) eh = eh + eh
1242c              ph = 2.0d0*eh
1243c
1244c              ehfx = ehfx - eh
1245c              phfx = phfx - ph
1246c              dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1)-eh
1247c
1248c              !**** include off diagonal terms ****
1249c              if (i.ne.j) then
1250c                 ehfx = ehfx - eh
1251c                 phfx = phfx - ph
1252c                 dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1)
1253c     >                                      - eh
1254c              end if
1255c
1256c            end if
1257c            k1 = k1 + 1
1258c
1259c         end do
1260c        end do
1261c        call D1dB_Vector_SumAll(norbs(ms),dbl_mb(ehfx_orb(1,ms)))
1262c        end do
1263
1264        value =           BA_pop_stack(tmp1(2))
1265        value = value.and.BA_pop_stack(dn(2))
1266        if (.not. value)
1267     >     call errquit('band_energy_HFX_sub:popping stack memory',0,
1268     >       MA_ERR)
1269
1270c        call D1dB_SumAll(ehfx)
1271c        call D1dB_SumAll(phfx)
1272      end if
1273
1274      ehfx_out = ehfx
1275      phfx_out = phfx
1276
1277      return
1278      end
1279
1280
1281
1282      subroutine band_potential_HFX_orb_sub(ms,weight1,
1283     >                                      psi1_r,orb_r,Horb_r)
1284      implicit none
1285
1286#include "band_hfx.fh"
1287#include "bafdecls.fh"
1288#include "errquit.fh"
1289
1290      integer ms
1291      real*8     weight1
1292      complex*16  psi1_r(nfft3d,*)
1293      complex*16 orb_r(nfft3d),Horb_r(nfft3d)
1294
1295*     **** local variables ****
1296      logical value,done
1297      integer i,j,n1,n2,n3,msshift
1298      integer dn12(2),v12(2),tmp1(2)
1299      integer j1,j2,j3,k1,k2,k3,N
1300      real*8  scal1,scal2
1301
1302*     **** external functions ****
1303      real*8   lattice_omega,c_icoulomb_screened_e
1304      logical   C3dB_rc_pfft3_queue_filled,C3dB_cr_pfft3_queue_filled
1305      external lattice_omega,c_icoulomb_screened_e
1306      external  C3dB_rc_pfft3_queue_Cilled,C3dB_cr_pfft3_queue_filled
1307
1308      if ((norbs(ms).gt.0).and.relaxed) then
1309
1310         call C3dB_nx(1,n1)
1311         call C3dB_ny(1,n2)
1312         call C3dB_nz(1,n3)
1313         value = BA_push_get(mt_dcpl,nfft3d,'dn12_hfx',dn12(2),dn12(1))
1314         value = value.and.
1315     >           BA_push_get(mt_dcpl,nfft3d,'v12_hfx',v12(2),v12(1))
1316         value = value.and.
1317     >           BA_push_get(mt_dcpl,nfft3d,'tmp1_hfx',tmp1(2),tmp1(1))
1318         if (.not. value)
1319     >      call errquit('band_potential_HFX:out of stack memory',0,
1320     >      MA_ERR)
1321
1322         scal1 = 1.0d0/dble(n1*n2*n3)
1323         scal2 = 1.0d0/lattice_omega()
1324
1325*        ***** screened coulomb solver ****
1326         msshift = (ms-1)*norbs(1)
1327         N  = norbs(ms)
1328         j1 = 1
1329         j2 = 1
1330         j3 = 1
1331         k1 = 1
1332         k2 = 1
1333         k3 = 1
1334         done = .false.
1335         do while (.not.done)
1336
1337            if (k1.le.N) then
1338
1339*              **** generate dn12 for v12 ****
1340               call C3dB_bb_Mul(1,psi1_r(1,j1+msshift),
1341     >                            orb_r,
1342     >                            dcpl_mb(dn12(1)))
1343               call C3dB_b_SMul1(1,scal2*scal1,dcpl_mb(dn12(1)))
1344               call C3dB_rc_pfft3f_queuein(0,dcpl_mb(dn12(1)))
1345               k1 = k1 + 1
1346               j1 = j1 + 1
1347            end if
1348
1349            if (     ((C3dB_rc_pfft3_queue_filled()).or.(k1.gt.N))
1350     >          .and.(k2.le.N)) then
1351
1352               call C3dB_rc_pfft3f_queueout(0,dcpl_mb(dn12(1)))
1353
1354*              **** generate V12 ****
1355               call c_coulomb_screened_v(dcpl_mb(dn12(1)),
1356     >                                   dcpl_mb(v12(1)))
1357               call C3dB_cr_pfft3b_queuein(0,dcpl_mb(v12(1)))
1358
1359               k2 = k2 + 1
1360               j2 = j2 + 1
1361            end if
1362
1363            if ((C3dB_cr_pfft3_queue_filled()).or.(k2.gt.N)) then
1364
1365               call C3dB_cr_pfft3b_queueout(0,dcpl_mb(v12(1)))
1366
1367*              **** generate conjg(V12)*psi1_r and add -conjg(V12)*psi1_r to Horb_r ****
1368               call C3dB_bb_ncMul(1,dcpl_mb(v12(1)),
1369     >                            psi1_r(1,j3+msshift),
1370     >                            dcpl_mb(tmp1(1)))
1371
1372*              **** apply hfx_parameter ****
1373               call C3dB_b_SMul1(1,hfx_parameter*weight1,
1374     >                                dcpl_mb(tmp1(1)))
1375               call C3dB_bb_Sub2(1,dcpl_mb(tmp1(1)),Horb_r)
1376
1377               k3 = k3 + 1
1378               j3 = j3 + 1
1379            end if
1380
1381            done = (k1.gt.N).and.(k2.gt.N).and.(k3.gt.N)
1382         end do !**** while ****
1383
1384         value =           BA_pop_stack(tmp1(2))
1385         value = value.and.BA_pop_stack(v12(2))
1386         value = value.and.BA_pop_stack(dn12(2))
1387         if (.not. value)
1388     >   call errquit('band_potential_HFX_orb_sub:popping stack memory',
1389     >                0,MA_ERR)
1390
1391      end if
1392
1393      return
1394      end
1395
1396