1********************************* real Gaussian Integrals **************************************
2
3*     *************************************************
4*     *                                               *
5*     *         nwpw_gintegrals_set_gcount            *
6*     *                                               *
7*     *************************************************
8
9      subroutine nwpw_gintegrals_set_gcount()
10      implicit none
11
12#include "bafdecls.fh"
13#include "errquit.fh"
14#include "nwpw_compcharge.fh"
15
16*     **** local variables ****
17      integer taskid,np,pcount,gcount,nshl3d,tcount,gcount0
18      integer l1,m1,l2,m2,iii,jjj,iia,jja
19      integer tid,nthr,nn
20
21*     **** external functions ****
22      integer  control_version,ewald_nshl3d
23      external control_version,ewald_nshl3d
24      integer  Parallel_maxthreads
25      external Parallel_maxthreads
26
27      call Parallel_taskid(taskid)
28      call Parallel_np(np)
29      nthr = Parallel_maxthreads()
30      do l1 = 1,nthr
31         int_mb(tgauss(1)+l1-1) = 0
32      end do
33
34      periodic = (control_version().eq.3)
35
36      if (periodic) then
37         nshl3d = ewald_nshl3d()
38      else
39         nshl3d = 1
40      end if
41
42      pcount = 0
43      gcount = 0
44      tcount = 0
45      do iii=1,nion_paw
46         iia = int_mb(katm_paw(1)+iii-1)
47
48*        **** calculate on-site integrals ****
49         do l1=0,int_mb(mult_l(1)+iia-1)
50         do m1=0,l1
51            if (m1.eq.0) nn = 1
52            if (m1.gt.0) nn = 2
53            if (mod(pcount,np).eq.taskid) then
54               tid = mod(gcount,nthr)
55               int_mb(tgauss(1)+tid) = int_mb(tgauss(1)+tid) + nn
56               tcount = tcount + nn
57               gcount = gcount + 1
58            end if
59            pcount = pcount + 1
60
61            if (nshl3d.gt.1) then
62               do l2=0,int_mb(mult_l(1)+iia-1)
63               do m2=0,l2
64                  if ((m1.eq.0).and.(m2.eq.0)) nn = 1
65                  if ((m1.eq.0).and.(m2.gt.0)) nn = 2
66                  if ((m1.gt.0).and.(m2.eq.0)) nn = 2
67                  if ((m1.gt.0).and.(m2.gt.0)) nn = 4
68                  if (mod(pcount,np).eq.taskid) then
69                     tid = mod(gcount,nthr)
70                     int_mb(tgauss(1)+tid) = int_mb(tgauss(1)+tid) + nn
71                     tcount = tcount + nn
72                     gcount = gcount + 1
73                  end if
74                  pcount = pcount + 1
75               end do
76               end do
77            end if
78         end do
79         end do
80
81*        **** calculate IJ integrals ****
82         do jjj=iii+1,nion_paw
83            jja = int_mb(katm_paw(1)+jjj-1)
84
85            do l1=0,int_mb(mult_l(1)+iia-1)
86            do m1=0,l1
87               do l2=0,int_mb(mult_l(1)+jja-1)
88               do m2=0,l2
89                  if ((m1.eq.0).and.(m2.eq.0)) nn = 1
90                  if ((m1.eq.0).and.(m2.gt.0)) nn = 2
91                  if ((m1.gt.0).and.(m2.eq.0)) nn = 2
92                  if ((m1.gt.0).and.(m2.gt.0)) nn = 4
93                  if (mod(pcount,np).eq.taskid) then
94                     tid = mod(gcount,nthr)
95                     int_mb(tgauss(1)+tid) = int_mb(tgauss(1)+tid) + nn
96                     tcount = tcount + nn
97                     gcount = gcount + 1
98                  end if
99                  pcount = pcount + 1
100               end do
101               end do
102            end do
103            end do
104         end do
105      end do
106      ngauss_max = tcount
107      ngauss     = tcount
108
109      tcount = 0
110      do l1 = 1,nthr
111         int_mb(tgauss_shift(1)+l1-1) = tcount
112         tcount = tcount + int_mb(tgauss(1)+l1-1)
113      end do
114
115      return
116      end
117
118
119*     *************************************************
120*     *                                               *
121*     *           nwpw_gintegrals_init                *
122*     *                                               *
123*     *************************************************
124
125      subroutine nwpw_gintegrals_init()
126      implicit none
127
128#include "bafdecls.fh"
129#include "errquit.fh"
130#include "nwpw_compcharge.fh"
131
132*     **** local variables ****
133      logical value
134      integer nthr
135
136*     **** external functions ****
137      integer  Parallel_maxthreads
138      external Parallel_maxthreads
139
140      nthr = Parallel_maxthreads()
141      value =           BA_alloc_get(mt_int,nthr,"tgauss",
142     >                               tgauss(2),tgauss(1))
143      value = value.and.BA_alloc_get(mt_int,nthr,"tgauss_shift",
144     >                               tgauss_shift(2),tgauss_shift(1))
145
146      call nwpw_gintegrals_set_gcount()
147
148      value = value.and.BA_alloc_get(mt_int,ngauss_max,"lm1_gauss",
149     >                              lm1_gauss(2),lm1_gauss(1))
150      value = value.and.BA_alloc_get(mt_int,ngauss_max,"lm2_gauss",
151     >                              lm2_gauss(2),lm2_gauss(1))
152      value = value.and.BA_alloc_get(mt_int,ngauss_max,"iii1_gauss",
153     >                              iii1_gauss(2),iii1_gauss(1))
154      value = value.and.BA_alloc_get(mt_int,ngauss_max,"iii2_gauss",
155     >                              iii2_gauss(2),iii2_gauss(1))
156      value = value.and.BA_alloc_get(mt_dbl,ngauss_max,"e_gauss",
157     >                              e_gauss(2),e_gauss(1))
158      value = value.and.BA_alloc_get(mt_dbl,3*ngauss_max,"f_gauss",
159     >                              f_gauss(2),f_gauss(1))
160      if (.not.value)
161     > call errquit("nwpw_gintegrals_init:cannot allocate memory",
162     >             0,MA_ERR)
163
164      return
165      end
166
167
168*     *************************************************
169*     *                                               *
170*     *           nwpw_gintegrals_end                 *
171*     *                                               *
172*     *************************************************
173      subroutine nwpw_gintegrals_end()
174      implicit none
175
176#include "bafdecls.fh"
177#include "errquit.fh"
178#include "nwpw_compcharge.fh"
179
180*     **** local variables ****
181      logical value
182
183      value =           BA_free_heap(tgauss(2))
184      value = value.and.BA_free_heap(tgauss_shift(2))
185      value = value.and.BA_free_heap(lm1_gauss(2))
186      value = value.and.BA_free_heap(lm2_gauss(2))
187      value = value.and.BA_free_heap(iii1_gauss(2))
188      value = value.and.BA_free_heap(iii2_gauss(2))
189      value = value.and.BA_free_heap(e_gauss(2))
190      value = value.and.BA_free_heap(f_gauss(2))
191      if (.not.value)
192     > call errquit("nwpw_gintegrals_end:cannot allocate memory",
193     >             0,MA_ERR)
194
195      return
196      end
197
198
199
200*     *************************************************
201*     *                                               *
202*     *             nwpw_gintegrals_set               *
203*     *                                               *
204*     *************************************************
205c
206c  The logic of this routine needs to be completely reworked for threading.
207c  It's well designed for MPI parallelism, so one option is to expand all the
208c  data structures over tasks and threads instead of just tasks.
209c  Another option is to define thread shifts for indx.... However the threshold
210c  check with tole would have to be eliminated.
211c
212      subroutine nwpw_gintegrals_set(move)
213      implicit none
214      logical move
215
216#include "bafdecls.fh"
217#include "errquit.fh"
218#include "nwpw_compcharge.fh"
219
220*     ***** local variables ****
221      real*8 tole
222      parameter (tole=1.0d-25)
223
224      logical value
225      integer taskid,np,pcount,gcount
226      integer ii,jj,ia,ja,indx,shft,inds
227      integer iii,jjj,iia,jja
228      integer l1,m1,l2,m2
229      integer l,nshl3d,rcell,rcell_hndl
230      integer lm1(4),lm2(4),n,i,nn
231      real*8  R1(3),R12(3),s1,s2,Rab(3),Rba(3),R,ss
232      real*8  W1,W2,W3,W4,dW1(3),dW2(3),dW3(3),dW4(3)
233      real*8  W(4),dW(3,4)
234      real*8  e1(4),de1(3,4)
235
236      integer tid,nthr
237      integer lm1_tauss(2),lm2_tauss(2),iii1_tauss(2),iii2_tauss(2)
238      integer e_tauss(2),f_tauss(2)
239
240*     **** external functions ****
241      real*8   ion_rion,nwpw_UGaussian
242      external ion_rion,nwpw_UGaussian
243      integer  nwpw_doublefactorial
244      external nwpw_doublefactorial
245      integer  ewald_nshl3d,ewald_rcell_ptr
246      external ewald_nshl3d,ewald_rcell_ptr
247      integer  Parallel_threadid,Parallel_nthreads
248      external Parallel_threadid,Parallel_nthreads
249
250
251      call nwpw_timing_start(34)
252      call Parallel_taskid(taskid)
253      call Parallel_np(np)
254      tid  = Parallel_threadid()
255      nthr = Parallel_nthreads()
256      shft = int_mb(tgauss_shift(1)+tid)
257
258c     **** allocate temporary memory ****
259      value =           BA_push_get(mt_int,ngauss_max,"lm1_tauss",
260     >                              lm1_tauss(2),lm1_tauss(1))
261      value = value.and.BA_push_get(mt_int,ngauss_max,"lm2_tauss",
262     >                              lm2_tauss(2),lm2_tauss(1))
263      value = value.and.BA_push_get(mt_int,ngauss_max,"iii1_tauss",
264     >                              iii1_tauss(2),iii1_tauss(1))
265      value = value.and.BA_push_get(mt_int,ngauss_max,"iii2_tauss",
266     >                              iii2_tauss(2),iii2_tauss(1))
267      value = value.and.BA_push_get(mt_dbl,ngauss_max,"e_tauss",
268     >                              e_tauss(2),e_tauss(1))
269      value = value.and.BA_push_get(mt_dbl,3*ngauss_max,"f_tauss",
270     >                              f_tauss(2),f_tauss(1))
271
272      if (periodic) then
273         nshl3d = ewald_nshl3d()
274         rcell  = ewald_rcell_ptr()
275      else
276         if (.not. BA_push_get(mt_dbl,3,"rcellflm",rcell_hndl,rcell))
277     >   call errquit("nwpw_compcharge_set_gintegrals:stack",1,MA_ERR)
278
279         nshl3d = 1
280!$OMP MASTER
281         dbl_mb(rcell)   = 0.0d0
282         dbl_mb(rcell+1) = 0.0d0
283         dbl_mb(rcell+2) = 0.0d0
284!$OMP END MASTER
285!$OMP BARRIER
286      end if
287
288
289      !**** these should not need to be called!!!! ****
290!$OMP MASTER
291      call dcopy(ngauss_max,0.0d0,0,dbl_mb(e_tauss(1)),1)
292      call dcopy(3*ngauss_max,0.0d0,0,dbl_mb(f_tauss(1)),1)
293
294      call dcopy(ngauss_max,0.0d0,0,dbl_mb(e_gauss(1)),1)
295      call dcopy(3*ngauss_max,0.0d0,0,dbl_mb(f_gauss(1)),1)
296!$OMP END MASTER
297!$OMP BARRIER
298
299
300      pcount = 0
301      gcount = 0
302      indx   = 0
303      do iii=1,nion_paw
304         iia = int_mb(katm_paw(1)+iii-1)
305         s1  = dbl_mb(sigma_paw(1)+iia-1)
306
307*        **** calculate on-site integrals ****
308         do l1=0,int_mb(mult_l(1)+iia-1)
309         do m1=0,l1
310            if (m1.eq.0) nn = 1
311            if (m1.gt.0) nn = 2
312            if (mod(pcount,np).eq.taskid) then
313            if (mod(gcount,nthr).eq.tid) then
314                W1=nwpw_UGaussian(l1,m1,s1,l1,m1,s1)
315                W2=nwpw_UGaussian(l1,m1,s1,l1,m1,sigma_smooth)
316                W4=nwpw_UGaussian(l1,m1,sigma_smooth,l1,m1,sigma_smooth)
317                e1(1) = 0.5d0*W1 + 0.5d0*W4 - W2
318                lm1(1) = l1*(l1+1) + m1
319                if (nn.gt.1) then
320                   W1=nwpw_UGaussian(l1,-m1,s1,l1,-m1,s1)
321                   W2=nwpw_UGaussian(l1,-m1,s1,l1,-m1,sigma_smooth)
322                   W4=nwpw_UGaussian(l1,-m1,sigma_smooth,
323     >                               l1,-m1,sigma_smooth)
324                   e1(2) = 0.5d0*W1 + 0.5d0*W4 - W2
325                   lm1(2) = l1*(l1+1) - m1
326                end if
327
328c               !if (dabs(e1).gt.tole) then
329                do i=1,nn
330                   inds = indx + shft
331                   dbl_mb(e_tauss(1)+inds) = e1(i)
332                   int_mb(lm1_tauss(1)+inds)
333     >             = (iii-1)*2*lm_size_max+lm1(i)
334                   int_mb(lm2_tauss(1)+inds)
335     >             = (iii-1)*2*lm_size_max+lm1(i)
336                   int_mb(iii1_tauss(1)+inds) = iii
337                   int_mb(iii2_tauss(1)+inds) = iii
338                   indx = indx + 1
339                end do
340c               !end if
341            end if
342            gcount = gcount + 1
343            end if
344            pcount = pcount + 1
345
346            if (nshl3d.gt.1) then
347               do l2=0,int_mb(mult_l(1)+iia-1)
348               do m2=0,l2
349                  if ((m1.eq.0).and.(m2.eq.0)) nn = 1
350                  if ((m1.eq.0).and.(m2.gt.0)) nn = 2
351                  if ((m1.gt.0).and.(m2.eq.0)) nn = 2
352                  if ((m1.gt.0).and.(m2.gt.0)) nn = 4
353                  if (mod(pcount,np).eq.taskid) then
354                  if (mod(gcount,nthr).eq.tid) then
355                     do i=1,4
356                        e1(i) = 0.0d0
357                     end do
358                     do l=2,nshl3d
359                        Rab(1) = dbl_mb(rcell+l-1)
360                        Rab(2) = dbl_mb(rcell+l-1+nshl3d)
361                        Rab(3) = dbl_mb(rcell+l-1+2*nshl3d)
362                        R = dsqrt(Rab(1)**2 + Rab(2)**2 + Rab(3)**2)
363                        if (R.lt.(4*sigma_smooth)) then
364                           call nwpw_WGaussian2_block(l1,m1,s1,l2,m2,
365     >                                               sigma_smooth,Rab,
366     >                                               n,lm1,lm2,W)
367                           do i=1,n
368                              e1(i) = e1(i) + 0.5d0*W(i)
369                           end do
370                        end if
371                     end do
372
373                     !if (dabs(e1).gt.tole) then
374                     do i=1,nn
375                        inds = indx + shft
376                        dbl_mb(e_tauss(1)+inds) = e1(i)
377                        int_mb(lm1_tauss(1)+inds)
378     >                        = (iii-1)*2*lm_size_max+lm1(i)
379                        int_mb(lm2_tauss(1)+inds)
380     >                        = (iii-1)*2*lm_size_max+lm2(i)
381                        int_mb(iii1_tauss(1)+inds) = iii
382                        int_mb(iii2_tauss(1)+inds) = iii
383
384                        indx = indx + 1
385                     end do
386                     !end if
387                  end if
388                  gcount = gcount + 1
389                  end if
390                  pcount = pcount + 1
391               end do
392               end do
393            end if
394         end do
395         end do
396
397*        **** calculate IJ integrals ****
398         ii  = int_mb(ion_pawtoion(1)+iii-1)
399         R1(1) = ion_rion(1,ii)
400         R1(2) = ion_rion(2,ii)
401         R1(3) = ion_rion(3,ii)
402         do jjj=iii+1,nion_paw
403            jja = int_mb(katm_paw(1)+jjj-1)
404            s2  = dbl_mb(sigma_paw(1)+jja-1)
405
406            jj  = int_mb(ion_pawtoion(1)+jjj-1)
407            R12(1) = R1(1) - ion_rion(1,jj)
408            R12(2) = R1(2) - ion_rion(2,jj)
409            R12(3) = R1(3) - ion_rion(3,jj)
410
411            do l1=0,int_mb(mult_l(1)+iia-1)
412            do m1=0,l1
413
414               do l2=0,int_mb(mult_l(1)+jja-1)
415               do m2=0,l2
416                  if ((m1.eq.0).and.(m2.eq.0)) nn = 1
417                  if ((m1.eq.0).and.(m2.gt.0)) nn = 2
418                  if ((m1.gt.0).and.(m2.eq.0)) nn = 2
419                  if ((m1.gt.0).and.(m2.gt.0)) nn = 4
420                  if (mod(pcount,np).eq.taskid) then
421                  if (mod(gcount,nthr).eq.tid) then
422                     do i=1,4
423                        e1(i)    = 0.0d0
424                        de1(1,i) = 0.0d0
425                        de1(2,i) = 0.0d0
426                        de1(3,i) = 0.0d0
427                     end do
428                     do l=1,nshl3d
429                        Rab(1) = R12(1) + dbl_mb(rcell+l-1)
430                        Rab(2) = R12(2) + dbl_mb(rcell+l-1+nshl3d)
431                        Rab(3) = R12(3) + dbl_mb(rcell+l-1+2*nshl3d)
432                        R = dsqrt(Rab(1)**2 + Rab(2)**2 + Rab(3)**2)
433                        if (R.lt.(4*sigma_smooth)) then
434                        if (move) then
435                           call nwpw_dWGaussian_block(l1,m1,s1,l2,m2,s2,
436     >                                               sigma_smooth,Rab,
437     >                                               n,lm1,lm2,W,dW)
438                           do i=1,n
439                              e1(i)  = e1(i)  + W(i)
440                              de1(1,i) = de1(1,i) + dW(1,i)
441                              de1(2,i) = de1(2,i) + dW(2,i)
442                              de1(3,i) = de1(3,i) + dW(3,i)
443                           end do
444                        else
445                           call nwpw_WGaussian_block(l1,m1,s1,l2,m2,s2,
446     >                                               sigma_smooth,Rab,
447     >                                               n,lm1,lm2,W)
448                           do i=1,n
449                              e1(i) = e1(i) + W(i)
450                           end do
451                        end if
452                        end if
453                     end do
454                     !if (dabs(e1).gt.tole) then
455                     do i=1,nn
456                        inds = indx + shft
457                        dbl_mb(e_tauss(1)+inds) = e1(i)
458                        if (move) then
459                           dbl_mb(f_tauss(1)+3*inds)   = de1(1,i)
460                           dbl_mb(f_tauss(1)+3*inds+1) = de1(2,i)
461                           dbl_mb(f_tauss(1)+3*inds+2) = de1(3,i)
462                        end if
463                        int_mb(lm1_tauss(1)+inds)
464     >                     = (iii-1)*2*lm_size_max+lm1(i)
465                        int_mb(lm2_tauss(1)+inds)
466     >                     = (jjj-1)*2*lm_size_max+lm2(i)
467                        int_mb(iii1_tauss(1)+inds) = iii
468                        int_mb(iii2_tauss(1)+inds) = jjj
469
470                        indx = indx + 1
471                     end do
472                     !end if
473                  end if
474                  gcount = gcount + 1
475                  end if
476                  pcount = pcount + 1
477               end do
478               end do
479
480            end do
481            end do
482
483         end do
484
485      end do
486
487!$OMP BARRIER
488!$OMP MASTER
489      call nwpw_gintegral_stripper(ngauss_max,
490     >                             int_mb(iii1_tauss(1)),
491     >                             int_mb(iii2_tauss(1)),
492     >                             int_mb(lm1_tauss(1)),
493     >                             int_mb(lm2_tauss(1)),
494     >                             dbl_mb(e_tauss(1)),
495     >                             dbl_mb(f_tauss(1)),
496     >                             ngauss,
497     >                             int_mb(iii1_gauss(1)),
498     >                             int_mb(iii2_gauss(1)),
499     >                             int_mb(lm1_gauss(1)),
500     >                             int_mb(lm2_gauss(1)),
501     >                             dbl_mb(e_gauss(1)),
502     >                             dbl_mb(f_gauss(1)))
503!$OMP END MASTER
504!$OMP BARRIER
505c Need to have barrier before deallocation below.  This barrier could be removed
506c if the tauss variables were on the heap and not deallocated rather than stack.
507
508
509c     **** deallocate stack memory ****
510      if (.not.periodic) then
511        if (.not.BA_pop_stack(rcell_hndl))
512     >   call errquit("nwpw_compcharge_set_gintegrals:stack",2,MA_ERR)
513      end if
514      value =           BA_pop_stack(f_tauss(2))
515      value = value.and.BA_pop_stack(e_tauss(2))
516      value = value.and.BA_pop_stack(iii2_tauss(2))
517      value = value.and.BA_pop_stack(iii1_tauss(2))
518      value = value.and.BA_pop_stack(lm2_tauss(2))
519      value = value.and.BA_pop_stack(lm1_tauss(2))
520        if (.not.value)
521     >   call errquit("nwpw_compcharge_set_gintegrals:stack",3,MA_ERR)
522
523      call nwpw_timing_end(34)
524      return
525      end
526
527
528*     *******************************************************
529*     *                                                     *
530*     *             nwpw_WGaussian_block                    *
531*     *                                                     *
532*     *******************************************************
533
534      subroutine nwpw_WGaussian_block(l1,mod_m1,sigma1,
535     >                                l2,mod_m2,sigma2,
536     >                                sigma_smooth,Rab,
537     >                                n,lm1,lm2,W)
538      implicit none
539      integer l1,mod_m1
540      real*8  sigma1
541      integer l2,mod_m2
542      real*8  sigma2
543      real*8  sigma_smooth
544      real*8  Rab(3)
545      integer n
546      integer lm1(*),lm2(*)
547      real*8  W(*)
548
549*     **** local variables ****
550      complex*16 CW4,CW4p,CW4m,CW4pp,CW4pm,CW4mp,CW4mm
551
552
553*     **** external functions ****
554      complex*16 nwpw_CWGaussian3
555      external   nwpw_CWGaussian3
556
557      if ((mod_m1.eq.0).and.(mod_m2.eq.0)) then
558         n  = 1
559         lm1(1) = l1*(l1+1)
560         lm2(1) = l2*(l2+1)
561
562         CW4 = nwpw_CWGaussian3(l1,mod_m1,sigma1,
563     >                          l2,mod_m2,sigma2,sigma_smooth,Rab)
564         W(1) = dble(CW4)
565
566      else if (mod_m1.eq.0) then
567         n  = 2
568         lm1(1) = l1*(l1+1)
569         lm2(1) = l2*(l2+1) - mod_m2
570
571         lm1(2) = l1*(l1+1)
572         lm2(2) = l2*(l2+1) + mod_m2
573
574         CW4p = nwpw_CWGaussian3(l1,mod_m1,sigma1,
575     >                           l2,mod_m2,sigma2,sigma_smooth,Rab)
576         CW4m = nwpw_CWGaussian3(l1,mod_m1,sigma1,
577     >                           l2,-mod_m2,sigma2,sigma_smooth,Rab)
578
579         !** m2<0 **
580         CW4 = (CW4m - (-1.0d0)**mod_m2 * CW4p)
581     >         *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
582         W(1) = dble(CW4)
583
584         !** m2>0 **
585         CW4 = (CW4m + (-1.0d0)**mod_m2 * CW4p)/dsqrt(2.0d0)
586         W(2) = dble(CW4)
587
588      else if (mod_m2.eq.0) then
589         n  = 2
590         lm1(1) = l1*(l1+1) - mod_m1
591         lm2(1) = l2*(l2+1)
592
593         lm1(2) = l1*(l1+1) + mod_m1
594         lm2(2) = l2*(l2+1)
595
596         CW4p = nwpw_CWGaussian3(l1,mod_m1,sigma1,
597     >                           l2,mod_m2,sigma2,sigma_smooth,Rab)
598         CW4m = nwpw_CWGaussian3(l1,-mod_m1,sigma1,
599     >                           l2,mod_m2,sigma2,sigma_smooth,Rab)
600
601         !** m1<0 **
602         CW4 = (CW4m - (-1.0d0)**mod_m1 * CW4p)
603     >         *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
604         W(1) = dble(CW4)
605
606         !** m1>0 **
607         CW4 = (CW4m + (-1.0d0)**mod_m1 * CW4p)/dsqrt(2.0d0)
608         W(2) = dble(CW4)
609
610      else
611         n = 4
612         lm1(1) = l1*(l1+1) - mod_m1
613         lm2(1) = l2*(l2+1) - mod_m2
614
615         lm1(2) = l1*(l1+1) - mod_m1
616         lm2(2) = l2*(l2+1) + mod_m2
617
618         lm1(3) = l1*(l1+1) + mod_m1
619         lm2(3) = l2*(l2+1) - mod_m2
620
621         lm1(4) = l1*(l1+1) + mod_m1
622         lm2(4) = l2*(l2+1) + mod_m2
623
624         CW4pp =nwpw_CWGaussian3(l1, mod_m1,sigma1,
625     >                          l2, mod_m2,sigma2,sigma_smooth,Rab)
626         CW4pm =nwpw_CWGaussian3(l1, mod_m1,sigma1,
627     >                          l2,-mod_m2,sigma2,sigma_smooth,Rab)
628         CW4mp =nwpw_CWGaussian3(l1,-mod_m1,sigma1,
629     >                          l2, mod_m2,sigma2,sigma_smooth,Rab)
630         CW4mm =nwpw_CWGaussian3(l1,-mod_m1,sigma1,
631     >                          l2,-mod_m2,sigma2,sigma_smooth,Rab)
632
633         !** m1<0 and m2<0 **
634         CW4 = -(CW4mm
635     >          + (-1.0d0)**(mod_m1+mod_m2) * CW4pp
636     >          - (-1.0d0)**mod_m1          * CW4pm
637     >          - (-1.0d0)**mod_m2          * CW4mp)/2.0d0
638         W(1) = dble(CW4)
639
640         !** m1<0 and m2>0 **
641         CW4 = (CW4mm
642     >         - (-1.0d0)**(mod_m1+mod_m2) * CW4pp
643     >         - (-1.0d0)**mod_m1          * CW4pm
644     >         + (-1.0d0)**mod_m2          * CW4mp)
645     >         *dcmplx(0.0d0,1.0d0/2.0d0)
646         W(2) = dble(CW4)
647
648         !** m1>0 and m2<0 **
649         CW4 = (CW4mm
650     >         - (-1.0d0)**(mod_m1+mod_m2) * CW4pp
651     >         + (-1.0d0)**mod_m1      * CW4pm
652     >         - (-1.0d0)**mod_m2      * CW4mp)
653     >        *dcmplx(0.0d0,1.0d0/2.0d0)
654         W(3) = dble(CW4)
655
656         !** m1>0 and m2>0 **
657         CW4 =  (CW4mm
658     >          + (-1.0d0)**(mod_m1+mod_m2) * CW4pp
659     >          + (-1.0d0)**mod_m1      * CW4pm
660     >          + (-1.0d0)**mod_m2      * CW4mp)/2.0d0
661         W(4) = dble(CW4)
662
663      end if
664
665      return
666      end
667
668
669
670*     *******************************************************
671*     *                                                     *
672*     *             nwpw_WGaussian2_block                   *
673*     *                                                     *
674*     *******************************************************
675
676      subroutine nwpw_WGaussian2_block(l1,mod_m1,sigma1,
677     >                                l2,mod_m2,
678     >                                sigma_smooth,Rab,
679     >                                n,lm1,lm2,W)
680      implicit none
681      integer l1,mod_m1
682      real*8  sigma1
683      integer l2,mod_m2
684      real*8  sigma_smooth
685      real*8  Rab(3)
686      integer n
687      integer lm1(*),lm2(*)
688      real*8  W(*)
689
690*     **** local variables ****
691      complex*16 CW4,CW4p,CW4m,CW4pp,CW4pm,CW4mp,CW4mm
692
693
694*     **** external functions ****
695      complex*16 nwpw_CWGaussian2
696      external   nwpw_CWGaussian2
697
698      if ((mod_m1.eq.0).and.(mod_m2.eq.0)) then
699         n  = 1
700         lm1(1) = l1*(l1+1)
701         lm2(1) = l2*(l2+1)
702
703         CW4 = nwpw_CWGaussian2(l1,mod_m1,sigma1,
704     >                          l2,mod_m2,sigma_smooth,Rab)
705         W(1) = dble(CW4)
706
707      else if (mod_m1.eq.0) then
708         n  = 2
709         lm1(1) = l1*(l1+1)
710         lm2(1) = l2*(l2+1) - mod_m2
711
712         lm1(2) = l1*(l1+1)
713         lm2(2) = l2*(l2+1) + mod_m2
714
715         CW4p = nwpw_CWGaussian2(l1,mod_m1,sigma1,
716     >                           l2,mod_m2,sigma_smooth,Rab)
717         CW4m = nwpw_CWGaussian2(l1, mod_m1,sigma1,
718     >                           l2,-mod_m2,sigma_smooth,Rab)
719
720         !** m2<0 **
721         CW4 = (CW4m - (-1.0d0)**mod_m2 * CW4p)
722     >         *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
723         W(1) = dble(CW4)
724
725         !** m2>0 **
726         CW4 = (CW4m + (-1.0d0)**mod_m2 * CW4p)/dsqrt(2.0d0)
727         W(2) = dble(CW4)
728
729      else if (mod_m2.eq.0) then
730         n  = 2
731         lm1(1) = l1*(l1+1) - mod_m1
732         lm2(1) = l2*(l2+1)
733
734         lm1(2) = l1*(l1+1) + mod_m1
735         lm2(2) = l2*(l2+1)
736
737         CW4p = nwpw_CWGaussian2(l1,mod_m1,sigma1,
738     >                           l2,mod_m2,sigma_smooth,Rab)
739         CW4m = nwpw_CWGaussian2(l1,-mod_m1,sigma1,
740     >                           l2, mod_m2,sigma_smooth,Rab)
741
742         !** m1<0 **
743         CW4 = (CW4m - (-1.0d0)**mod_m1 * CW4p)
744     >         *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
745         W(1) = dble(CW4)
746
747         !** m1>0 **
748         CW4 = (CW4m + (-1.0d0)**mod_m1 * CW4p)/dsqrt(2.0d0)
749         W(2) = dble(CW4)
750
751      else
752         n = 4
753         lm1(1) = l1*(l1+1) - mod_m1
754         lm2(1) = l2*(l2+1) - mod_m2
755
756         lm1(2) = l1*(l1+1) - mod_m1
757         lm2(2) = l2*(l2+1) + mod_m2
758
759         lm1(3) = l1*(l1+1) + mod_m1
760         lm2(3) = l2*(l2+1) - mod_m2
761
762         lm1(4) = l1*(l1+1) + mod_m1
763         lm2(4) = l2*(l2+1) + mod_m2
764
765         CW4pp =nwpw_CWGaussian2(l1, mod_m1,sigma1,
766     >                           l2, mod_m2,sigma_smooth,Rab)
767         CW4pm =nwpw_CWGaussian2(l1, mod_m1,sigma1,
768     >                           l2,-mod_m2,sigma_smooth,Rab)
769         CW4mp =nwpw_CWGaussian2(l1,-mod_m1,sigma1,
770     >                           l2, mod_m2,sigma_smooth,Rab)
771         CW4mm =nwpw_CWGaussian2(l1,-mod_m1,sigma1,
772     >                           l2,-mod_m2,sigma_smooth,Rab)
773
774         !** m1<0 and m2<0 **
775         CW4 = -(CW4mm
776     >          + (-1.0d0)**(mod_m1+mod_m2) * CW4pp
777     >          - (-1.0d0)**mod_m1          * CW4pm
778     >          - (-1.0d0)**mod_m2          * CW4mp)/2.0d0
779         W(1) = dble(CW4)
780
781         !** m1<0 and m2>0 **
782         CW4 = (CW4mm
783     >         - (-1.0d0)**(mod_m1+mod_m2) * CW4pp
784     >         - (-1.0d0)**mod_m1          * CW4pm
785     >         + (-1.0d0)**mod_m2          * CW4mp)
786     >         *dcmplx(0.0d0,1.0d0/2.0d0)
787         W(2) = dble(CW4)
788
789         !** m1>0 and m2<0 **
790         CW4 = (CW4mm
791     >         - (-1.0d0)**(mod_m1+mod_m2) * CW4pp
792     >         + (-1.0d0)**mod_m1      * CW4pm
793     >         - (-1.0d0)**mod_m2      * CW4mp)
794     >        *dcmplx(0.0d0,1.0d0/2.0d0)
795         W(3) = dble(CW4)
796
797         !** m1>0 and m2>0 **
798         CW4 =  (CW4mm
799     >          + (-1.0d0)**(mod_m1+mod_m2) * CW4pp
800     >          + (-1.0d0)**mod_m1      * CW4pm
801     >          + (-1.0d0)**mod_m2      * CW4mp)/2.0d0
802         W(4) = dble(CW4)
803
804      end if
805
806      return
807      end
808
809
810
811*     *******************************************************
812*     *                                                     *
813*     *             nwpw_dWGaussian_block                   *
814*     *                                                     *
815*     *******************************************************
816
817      subroutine nwpw_dWGaussian_block(l1,mod_m1,sigma1,
818     >                                 l2,mod_m2,sigma2,
819     >                                 sigma_smooth,Rab,
820     >                                 n,lm1,lm2,W,dW)
821      implicit none
822      integer l1,mod_m1
823      real*8  sigma1
824      integer l2,mod_m2
825      real*8  sigma2
826      real*8  sigma_smooth
827      real*8  Rab(3)
828      integer n
829      integer lm1(*),lm2(*)
830      real*8  W(*),dW(3,*)
831
832*     **** local variables ****
833      integer i
834      complex*16 CW4,CW4p,CW4m,CW4pp,CW4pm,CW4mp,CW4mm
835      complex*16 dCW4(3),dCW4p(3),dCW4m(3)
836      complex*16 dCW4pp(3),dCW4pm(3)
837      complex*16 dCW4mp(3),dCW4mm(3)
838
839      if ((mod_m1.eq.0).and.(mod_m2.eq.0)) then
840         n  = 1
841         lm1(1) = l1*(l1+1)
842         lm2(1) = l2*(l2+1)
843
844         call nwpw_dCWGaussian3(l1,mod_m1,sigma1,
845     >                          l2,mod_m2,sigma2,sigma_smooth,
846     >                          Rab,CW4,dCW4)
847         W(1) = dble(CW4)
848         dW(1,1) = dble(dCW4(1))
849         dW(2,1) = dble(dCW4(2))
850         dW(3,1) = dble(dCW4(3))
851
852      else if (mod_m1.eq.0) then
853         n  = 2
854         lm1(1) = l1*(l1+1)
855         lm2(1) = l2*(l2+1) - mod_m2
856
857         lm1(2) = l1*(l1+1)
858         lm2(2) = l2*(l2+1) + mod_m2
859
860         call nwpw_dCWGaussian3(l1,mod_m1,sigma1,
861     >                          l2,mod_m2,sigma2,sigma_smooth,
862     >                          Rab,CW4p,dCW4p)
863
864         call nwpw_dCWGaussian3(l1,mod_m1,sigma1,
865     >                          l2,-mod_m2,sigma2,sigma_smooth,
866     >                          Rab,CW4m,dCW4m)
867
868         !** m2<0 **
869         CW4 = (CW4m - (-1.0d0)**mod_m2 * CW4p)
870     >         *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
871         W(1) = dble(CW4)
872         do i=1,3
873            CW4 = (dCW4m(i) - (-1.0d0)**mod_m2 * dCW4p(i))
874     >            *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
875            dW(i,1) = dble(CW4)
876         end do
877
878         !** m2>0 **
879         CW4 = (CW4m + (-1.0d0)**mod_m2 * CW4p)/dsqrt(2.0d0)
880         W(2) = dble(CW4)
881         do i=1,3
882            CW4 = (dCW4m(i) + (-1.0d0)**mod_m2 * dCW4p(i))/dsqrt(2.0d0)
883            dW(i,2) = dble(CW4)
884         end do
885
886
887      else if (mod_m2.eq.0) then
888         n  = 2
889         lm1(1) = l1*(l1+1) - mod_m1
890         lm2(1) = l2*(l2+1)
891
892         lm1(2) = l1*(l1+1) + mod_m1
893         lm2(2) = l2*(l2+1)
894
895         call nwpw_dCWGaussian3(l1,mod_m1,sigma1,
896     >                          l2,mod_m2,sigma2,sigma_smooth,
897     >                          Rab,CW4p,dCW4p)
898
899        call nwpw_dCWGaussian3(l1,-mod_m1,sigma1,
900     >                         l2,mod_m2,sigma2,sigma_smooth,
901     >                         Rab,CW4m,dCW4m)
902
903         !** m1<0 **
904         CW4 = (CW4m - (-1.0d0)**mod_m1 * CW4p)
905     >         *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
906         W(1) = dble(CW4)
907         do i=1,3
908            CW4 = (dCW4m(i) - (-1.0d0)**mod_m1 * dCW4p(i))
909     >            *dcmplx(0.0d0,1.0d0/dsqrt(2.0d0))
910            dW(i,1) = dble(CW4)
911         end do
912
913         !** m1>0 **
914         CW4 = (CW4m + (-1.0d0)**mod_m1 * CW4p)/dsqrt(2.0d0)
915         W(2) = dble(CW4)
916         do i=1,3
917            CW4 = (dCW4m(i) + (-1.0d0)**mod_m1 * dCW4p(i))/dsqrt(2.0d0)
918            dW(i,2) = dble(CW4)
919         end do
920
921      else
922         n = 4
923         lm1(1) = l1*(l1+1) - mod_m1
924         lm2(1) = l2*(l2+1) - mod_m2
925
926         lm1(2) = l1*(l1+1) - mod_m1
927         lm2(2) = l2*(l2+1) + mod_m2
928
929         lm1(3) = l1*(l1+1) + mod_m1
930         lm2(3) = l2*(l2+1) - mod_m2
931
932         lm1(4) = l1*(l1+1) + mod_m1
933         lm2(4) = l2*(l2+1) + mod_m2
934
935         call nwpw_dCWGaussian3(l1,mod_m1,sigma1,
936     >                          l2,mod_m2,sigma2,sigma_smooth,
937     >                          Rab,CW4pp,dCW4pp)
938
939         call nwpw_dCWGaussian3(l1,mod_m1,sigma1,
940     >                          l2,-mod_m2,sigma2,sigma_smooth,
941     >                          Rab,CW4pm,dCW4pm)
942
943         call nwpw_dCWGaussian3(l1,-mod_m1,sigma1,
944     >                          l2,mod_m2,sigma2,sigma_smooth,
945     >                          Rab,CW4mp,dCW4mp)
946
947         call nwpw_dCWGaussian3(l1,-mod_m1,sigma1,
948     >                          l2,-mod_m2,sigma2,sigma_smooth,
949     >                          Rab,CW4mm,dCW4mm)
950
951         !** m1<0 and m2<0 **
952         CW4 = -(CW4mm
953     >          + (-1.0d0)**(mod_m1+mod_m2) * CW4pp
954     >          - (-1.0d0)**mod_m1          * CW4pm
955     >          - (-1.0d0)**mod_m2          * CW4mp)/2.0d0
956         W(1) = dble(CW4)
957         do i=1,3
958            CW4 = -(dCW4mm(i)
959     >          + (-1.0d0)**(mod_m1+mod_m2) * dCW4pp(i)
960     >          - (-1.0d0)**mod_m1          * dCW4pm(i)
961     >          - (-1.0d0)**mod_m2          * dCW4mp(i))/2.0d0
962            dW(i,1) = dble(CW4)
963         end do
964
965         !** m1<0 and m2>0 **
966         CW4 = (CW4mm
967     >         - (-1.0d0)**(mod_m1+mod_m2) * CW4pp
968     >         - (-1.0d0)**mod_m1          * CW4pm
969     >         + (-1.0d0)**mod_m2     * CW4mp)*dcmplx(0.0d0,1.0d0/2.0d0)
970         W(2) = dble(CW4)
971         do i=1,3
972            CW4 = (dCW4mm(i)
973     >         - (-1.0d0)**(mod_m1+mod_m2) * dCW4pp(i)
974     >         - (-1.0d0)**mod_m1          * dCW4pm(i)
975     >         + (-1.0d0)**mod_m2 * dCW4mp(i))*dcmplx(0.0d0,1.0d0/2.0d0)
976            dW(i,2) = dble(CW4)
977         end do
978
979         !** m1>0 and m2<0 **
980         CW4 = (CW4mm
981     >         - (-1.0d0)**(mod_m1+mod_m2) * CW4pp
982     >         + (-1.0d0)**mod_m1     * CW4pm
983     >         - (-1.0d0)**mod_m2     * CW4mp)*dcmplx(0.0d0,1.0d0/2.0d0)
984         W(3) = dble(CW4)
985         do i=1,3
986            CW4 = (dCW4mm(i)
987     >         - (-1.0d0)**(mod_m1+mod_m2) * dCW4pp(i)
988     >         + (-1.0d0)**mod_m1 * dCW4pm(i)
989     >         - (-1.0d0)**mod_m2 * dCW4mp(i))*dcmplx(0.0d0,1.0d0/2.0d0)
990            dW(i,3) = dble(CW4)
991         end do
992
993         !** m1>0 and m2>0 **
994         CW4 =  (CW4mm
995     >          + (-1.0d0)**(mod_m1+mod_m2) * CW4pp
996     >          + (-1.0d0)**mod_m1      * CW4pm
997     >          + (-1.0d0)**mod_m2      * CW4mp)/2.0d0
998         W(4) = dble(CW4)
999         do i=1,3
1000            CW4 =  (dCW4mm(i)
1001     >          + (-1.0d0)**(mod_m1+mod_m2) * dCW4pp(i)
1002     >          + (-1.0d0)**mod_m1      * dCW4pm(i)
1003     >          + (-1.0d0)**mod_m2      * dCW4mp(i))/2.0d0
1004            dW(i,4) = dble(CW4)
1005         end do
1006
1007      end if
1008
1009      return
1010      end
1011
1012
1013*     *************************************************
1014*     *                                               *
1015*     *             nwpw_gintegrals_stripper          *
1016*     *                                               *
1017*     *************************************************
1018c
1019c  This routine is used to remove unecessary integrals
1020c
1021      subroutine nwpw_gintegral_stripper(ng_in,
1022     >                                   iii1_in,iii2_in,
1023     >                                   lm1_in,lm2_in,e_in,f_in,
1024     >                                   ng_out,
1025     >                                   iii1_out,iii2_out,
1026     >                                   lm1_out,lm2_out,e_out,f_out)
1027      implicit none
1028      integer ng_in,iii1_in(*),iii2_in(*),lm1_in(*),lm2_in(*)
1029      real*8  e_in(*),f_in(*)
1030      integer ng_out,iii1_out(*),iii2_out(*),lm1_out(*),lm2_out(*)
1031      real*8  e_out(*),f_out(*)
1032
1033c     **** local variables ****
1034      integer i
1035      real*8 tole
1036      parameter (tole=1.0d-25)
1037
1038      ng_out = 0
1039      do i=1,ng_in
1040         if (dabs(e_in(i)).gt.tole) then
1041            ng_out = ng_out + 1
1042            iii1_out(ng_out) = iii1_in(i)
1043            iii2_out(ng_out) = iii2_in(i)
1044            lm1_out(ng_out)  = lm1_in(i)
1045            lm2_out(ng_out)  = lm2_in(i)
1046            e_out(ng_out)    = e_in(i)
1047            f_out(3*(ng_out-1)+1) = f_in(3*(i-1)+1)
1048            f_out(3*(ng_out-1)+2) = f_in(3*(i-1)+2)
1049            f_out(3*(ng_out-1)+3) = f_in(3*(i-1)+3)
1050         end if
1051      end do
1052      return
1053      end
1054
1055
1056********************************* real Gaussian Integrals **************************************
1057