1      subroutine pme_start(a,m,imffti,nodpmi,
2     + ngxi,ngyi,ngzi,nwm,nwa,nsa,ictrl,nbgeti)
3c $Id$
4      implicit none
5c
6      integer nodpmi,ngxi,ngyi,ngzi,imffti,nwm,nwa,nsa,m
7      integer ictrl,nbgeti
8      real*8 a
9c
10      integer mem
11c
12#include "pme_common.fh"
13#include "mafdecls.fh"
14#include "global.fh"
15c
16      me=ga_nodeid()
17      np=ga_nnodes()
18c
19      pi=four*atan(one)
20      twopi=two*pi
21      alpha=a
22      morder=m
23      nodpme=nodpmi
24      icntrl=ictrl
25      nbget=nbgeti
26c
27      imfft=imffti
28      lcorr(1)=.false.
29      lcorr(2)=.false.
30      lcorr(3)=.false.
31c
32      mwa=nwa
33      mwm=nwm
34      msa=nsa
35c
36      ngx=ngxi
37      ngy=ngyi
38      ngz=ngzi
39      ngmax=max(ngx,ngy,ngz)
40      mgx=ngx
41      mgy=ngy
42      mgz=(ngz/nodpme)+1
43      ngrx=ngx+morder
44      ngry=ngy+morder
45      ngrz=ngz
46      mgrx=ngrx
47      mgry=ngry
48      mgrz=mgz
49      if(nodpme*mgz.lt.ngz)
50     + call md_abort('Inconsistent number of pme nodes',0)
51c
52c     allocate memory for spline coefficients
53c
54      if(.not.ma_push_get(mt_dbl,3*ngmax,'bmod',l_bmod,i_bmod))
55     + call md_abort('Failed to allocate bmod',0)
56      if(.not.ma_push_get(mt_dbl,ngmax,'barr',l_barr,i_barr))
57     + call md_abort('Failed to allocate barr',0)
58c
59c     allocate memory for the grids
60c
61      mem=2*mgx*mgy*mgz
62      if(.not.ma_push_get(mt_dbl,mem,'grid1',l_grd1,i_grd1))
63     + call md_abort('Failed to allocate grid1',0)
64      if(.not.ma_push_get(mt_dbl,mem,'grid2',l_grd2,i_grd2))
65     + call md_abort('Failed to allocate grid2',0)
66      if(.not.ma_push_get(mt_dbl,mgrx*mgry*mgz,'grid',l_grd,i_grd))
67     + call md_abort('Failed to allocate grid',0)
68      if(nbget.gt.0) then
69      if(.not.ma_push_get(mt_dbl,mgrx*mgry*mgz,'gridt',l_grdt,i_grdt))
70     + call md_abort('Failed to allocate gridt',0)
71      if(.not.ma_push_get(mt_int,mgz,'gridh',l_grdh,i_grdh))
72     + call md_abort('Failed to allocate gridh',0)
73      else
74      if(.not.ma_push_get(mt_dbl,1,'gridt',l_grdt,i_grdt))
75     + call md_abort('Failed to allocate gridt',0)
76      if(.not.ma_push_get(mt_int,1,'gridh',l_grdh,i_grdh))
77     + call md_abort('Failed to allocate gridh',0)
78      endif
79c
80c     allocate memory for node arrays
81c
82      if(.not.ma_push_get(mt_int,np,'lnode',l_lnod,i_lnod))
83     + call md_abort('Failed to allocate lnode',0)
84      if(.not.ma_push_get(mt_int,np,'kfrom',l_kfr,i_kfr))
85     + call md_abort('Failed to allocate kfrom',0)
86      if(.not.ma_push_get(mt_int,np,'kto',l_kto,i_kto))
87     + call md_abort('Failed to allocate kto',0)
88      if(.not.ma_push_get(mt_int,np,'nodep',l_nodp,i_nodp))
89     + call md_abort('Failed to allocate nodep',0)
90c
91      if(.not.ma_push_get(mt_int,ngz,'knode',l_knod,i_knod))
92     + call md_abort('Failed to allocate knode',0)
93c
94      if(.not.ma_push_get(mt_log,mgz*np,'lsldo',l_lsld,i_lsld))
95     + call md_abort('Failed to allocate ldldo',0)
96      if(.not.ma_push_get(mt_int,4*mgz*np,'slmax',l_slmax,i_slmax))
97     + call md_abort('Failed to allocate slmax',0)
98c
99c     allocate memory for fft node maps
100c
101      mem=3*max(ngx,ngy,ngz)
102      if(.not.ma_push_get(mt_int,mem,'map1',l_map1,i_map1))
103     + call md_abort('Failed to allocate map1',0)
104      if(.not.ma_push_get(mt_int,mem,'map2',l_map2,i_map2))
105     + call md_abort('Failed to allocate map2',0)
106c
107c     allocate memory for slab arrays
108c
109      mem=mgz*np
110      if(.not.ma_push_get(mt_int,mem,'slab',l_slab,i_slab))
111     + call md_abort('Failed to allocate slab',0)
112c
113c     create the global arrays for the slabs
114c
115      call ga_create_list(np*mgrx,mgry,'sl',mgrx,mgry,mgz,lslab)
116c
117c     create the flag array
118c
119      if(.not.ga_create(mt_int,np,5,'flg',1,5,ga_flg))
120     + call md_abort('Failed to allocate global array flg',0)
121      call pme_flag(0,0,me)
122      call pme_flag(1,0,me)
123c
124c     select the fft
125c
126      call pme_select(zero)
127c
128c     test the fft
129c
130      if(lpnode)
131     + call pme_fftest(dbl_mb(i_grd1),dbl_mb(i_grd2),int_mb(i_nodp))
132c
133      call pme_coeff(dbl_mb(i_bmod),dbl_mb(i_barr))
134c
135      return
136      end
137      subroutine pme_finish
138c
139      implicit none
140c
141#include "pme_common.fh"
142#include "mafdecls.fh"
143#include "global.fh"
144c
145      if(.not.ga_destroy(ga_flg))
146     + call md_abort('Failed to deallocate flg',0)
147c
148      call ga_destroy_list(mgz,lslab)
149c
150      if(.not.ma_pop_stack(l_slab))
151     + call md_abort('Failed to deallocate slab',0)
152      if(.not.ma_pop_stack(l_map2))
153     + call md_abort('Failed to deallocate map2',0)
154      if(.not.ma_pop_stack(l_map1))
155     + call md_abort('Failed to deallocate map1',0)
156      if(.not.ma_pop_stack(l_slmax))
157     + call md_abort('Failed to deallocate slmax',0)
158      if(.not.ma_pop_stack(l_lsld))
159     + call md_abort('Failed to deallocate lsldo',0)
160      if(.not.ma_pop_stack(l_knod))
161     + call md_abort('Failed to deallocate knode',0)
162      if(.not.ma_pop_stack(l_nodp))
163     + call md_abort('Failed to deallocate nodep',0)
164      if(.not.ma_pop_stack(l_kto))
165     + call md_abort('Failed to deallocate kto',0)
166      if(.not.ma_pop_stack(l_kfr))
167     + call md_abort('Failed to deallocate kfrom',0)
168      if(.not.ma_pop_stack(l_lnod))
169     + call md_abort('Failed to deallocate lnode',0)
170      if(.not.ma_pop_stack(l_grdh))
171     + call md_abort('Failed to deallocate gridh',0)
172      if(.not.ma_pop_stack(l_grdt))
173     + call md_abort('Failed to deallocate gridt',0)
174      if(.not.ma_pop_stack(l_grd))
175     + call md_abort('Failed to deallocate grid',0)
176      if(.not.ma_pop_stack(l_grd2))
177     + call md_abort('Failed to deallocate grid2',0)
178      if(.not.ma_pop_stack(l_grd1))
179     + call md_abort('Failed to deallocate grid1',0)
180c
181      if(.not.ma_pop_stack(l_barr))
182     + call md_abort('Failed to deallocate barr',0)
183      if(.not.ma_pop_stack(l_bmod))
184     + call md_abort('Failed to deallocate bmod',0)
185c
186      return
187      end
188      subroutine pme_select(value)
189c
190      implicit none
191c
192#include "pme_common.fh"
193#include "mafdecls.fh"
194c
195      real*8 value
196      integer i_trgt,l_trgt
197c
198      if(.not.ma_push_get(mt_dbl,np,'target',l_trgt,i_trgt))
199     + call md_abort('Failed to allocate target',0)
200c
201      call pme_selnod(value,dbl_mb(i_trgt),int_mb(i_nodp),
202     + int_mb(i_knod),int_mb(i_kfr),int_mb(i_kto),int_mb(i_map1))
203c
204      if(.not.ma_pop_stack(l_trgt))
205     + call md_abort('Failed to deallocate target',0)
206c
207      return
208      end
209      subroutine pme_selnod(value,target,nodep,knode,kfrom,kto,map)
210c
211      implicit none
212c
213#include "pme_common.fh"
214#include "mafdecls.fh"
215#include "msgids.fh"
216c
217      real*8 value,target(np)
218      integer nodep(nodpme),knode(ngz),kfrom(np),kto(np),map(np)
219c
220      integer i,j,k,npls,nlnd
221#if defined(ESSL) && defined(LAPI)
222      integer number,npr,npc,myr,myc
223      integer icntxt,lfftok
224#endif
225c
226c     select nodes with minimum value to be used for pme/fft
227c
228      do 1 i=1,np
229      target(i)=zero
230    1 continue
231      target(me+1)=value
232      call ga_dgop(mrp_d01,target,np,'+')
233c
234      do 2 i=1,np
235      nodep(i)=i
236    2 continue
237      do 3 i=1,np-1
238      do 4 j=i+1,np
239      if(target(i).lt.target(j)) then
240      k=nodep(i)
241      nodep(i)=nodep(j)
242      nodep(j)=k
243      endif
244    4 continue
245    3 continue
246      do 5 i=1,np
247      nodep(i)=nodep(i)-1
248    5 continue
249      do 6 i=1,nodpme-1
250      do 7 j=i+1,nodpme
251      if(nodep(i).gt.nodep(j)) then
252      k=nodep(i)
253      nodep(i)=nodep(j)
254      nodep(j)=k
255      endif
256    7 continue
257    6 continue
258c
259      call ga_brdcst(mrp_i01,nodep,nodpme*ma_sizeof(mt_int,1,mt_byte),0)
260c
261c     for node i: kfrom(i) is the first grid point in the z direction
262c     for node i: kto(i)   is the last  grid point in the z direction
263c
264      do 8 i=1,np
265      kfrom(i)=0
266      kto(i)=0
267    8 continue
268c
269      npls=(ngz/nodpme)+1
270      nlnd=mod(ngz,nodpme)
271      kfrom(nodep(1)+1)=1
272      do 9 i=1,nodpme
273      if(i.eq.nlnd+1) npls=npls-1
274      kto(nodep(i)+1)=kfrom(nodep(i)+1)+npls-1
275      if(i.lt.nodpme) kfrom(nodep(i+1)+1)=kto(nodep(i)+1)+1
276    9 continue
277c
278      if(kto(nodep(nodpme)+1).ne.ngz)
279     + call md_abort('Error in pme_select',me)
280      kto(nodep(nodpme)+1)=ngz
281c
282c     for point i in the z direction: knode(i) is the owning node
283c     ngzloc is the number of points in the z direction on this node
284c
285      j=1
286      ngzloc=0
287      do 10 i=1,ngz
288      if(i.gt.kto(nodep(j)+1)) j=j+1
289      knode(i)=nodep(j)
290      if(nodep(j).eq.me) ngzloc=ngzloc+1
291   10 continue
292c
293      lpnode=.false.
294      do 11 i=1,nodpme
295      if(me.eq.nodep(i)) lpnode=.true.
296   11 continue
297c
298#if defined(ESSL) && defined(LAPI)
299c
300c     setup process grid for pessl grid
301c
302      if(imfft.eq.2) then
303c
304      if(lfftok.gt.0) then
305      call blacs_gridexit(icntxt)
306      else
307      call blacs_pinfo(myr,myc)
308      if(me.ne.myr) call md_abort('Identity crisis',me)
309      if(np.ne.myc) call md_abort('Node count inconsistent',me)
310      endif
311c
312      do 12 i=1,np
313      map(i)=-1
314   12 continue
315      number=1
316      do 13 i=1,ngz
317      if(map(number).lt.0) then
318      map(number)=knode(i)
319      elseif(map(number).ne.knode(i)) then
320      number=number+1
321      map(number)=knode(i)
322      endif
323   13 continue
324      if(number.ne.nodpme) call md_abort('Node assignment problem',me)
325c
326      call blacs_get(0,0,icntxt)
327c
328      call blacs_gridmap(icntxt,map,1,1,nodpme)
329      call blacs_gridinfo(icntxt,npr,npc,myr,myc)
330c
331      if(lpnode.and.(myr.ge.npr.or.myc.ge.npc)) then
332      call md_abort('arg_fft3d: fft initialization failed',me)
333      endif
334c
335      endif
336c
337#endif
338c
339      return
340      end
341      subroutine pme_coeff(bmod,barr)
342c
343      implicit none
344c
345#include "pme_common.fh"
346c
347      real*8 bmod(ngmax,3),barr(ngmax)
348c
349      real*8 w,arr(25),darr(25)
350      integer i
351c
352      w=zero
353c
354      call pme_splfil(w,arr,darr)
355c
356      do 1 i=1,ngmax
357      barr(i)=zero
358    1 continue
359c
360      do 2 i=2,morder+1
361      barr(i)=arr(i-1)
362    2 continue
363c
364      call pme_dftmod(bmod(1,1),barr,ngx)
365      call pme_dftmod(bmod(1,2),barr,ngy)
366      call pme_dftmod(bmod(1,3),barr,ngz)
367c
368      return
369      end
370      subroutine pme_splfil(w,arr,darr)
371c
372      implicit none
373c
374#include "pme_common.fh"
375c
376      real*8 arr(morder),darr(morder),w
377c
378      integer i,j
379      real*8 rinv
380c
381      arr(morder)=zero
382      arr(2)=w
383      arr(1)=one-w
384      do 1 i=3,morder-1
385      rinv=one/dble(i-1)
386      arr(i)=rinv*w*arr(i-1)
387      do 2 j=1,i-2
388      arr(i-j)=rinv*((w+dble(j))*arr(i-j-1)+(dble(i-j)-w)*arr(i-j))
389    2 continue
390      arr(1)=rinv*(one-w)*arr(1)
391    1 continue
392      darr(1)=-arr(1)
393      do 3 j=2,morder
394      darr(j)=arr(j-1)-arr(j)
395    3 continue
396      rinv=one/dble(morder-1)
397      arr(morder)=rinv*w*arr(morder-1)
398      do 4 j=1,morder-2
399      arr(morder-j)=rinv*((w+dble(j))*arr(morder-j-1)+
400     + (dble(morder-j)-w)*arr(morder-j))
401    4 continue
402      arr(1)=rinv*(one-w)*arr(1)
403c
404      return
405      end
406      subroutine pme_dftmod(bmod,barr,ng)
407c
408      implicit none
409c
410#include "pme_common.fh"
411c
412      integer ng
413      real*8 bmod(ng),barr(ng)
414      integer i,j
415      real*8 sum1,sum2,arg
416c
417      do 1 i=1,ng
418      sum1=zero
419      sum2=zero
420      do 2 j=1,ng
421      arg=twopi*dble((i-1)*(j-1))/dble(ng)
422      sum1=sum1+barr(j)*cos(arg)
423      sum2=sum2+barr(j)*sin(arg)
424    2 continue
425      bmod(i)=sum1*sum1+sum2*sum2
426    1 continue
427      do 3 i=1,ng
428      if(bmod(i).lt.small) bmod(i)=half*(bmod(i-1)+bmod(i+1))
429    3 continue
430c
431      return
432      end
433      subroutine pme_init()
434c
435      implicit none
436c
437#include "pme_common.fh"
438#include "global.fh"
439c
440      call ga_zero_list(mgz,lslab)
441      call pme_flag(0,0,me)
442      call pme_flag(1,0,me)
443      call ga_sync()
444c
445      return
446      end
447