1*
2* $Id$
3*
4c************************************************************************
5c Complex 3D FFT interface.
6c Based on a blockwise distribution of slabs dimensioned id1xid2.
7c The blockwise (and contiguous) distribution *must* be performed
8c outside of this routine.
9c
10c GENERAL compilation:
11c       uses the portable "EMSL" FFT,  pfft1.0.
12c IBM SP2:
13c       uses the faster ESSL FFT.
14c
15c Non-destructive interface (see below).
16c ------------------------------------------------------------------------
17      SUBROUTINE nwfft3d(nd1,nd2,nd3,idir,
18     .                   x1,x2,map1,map2,doMap,
19     .                   iwrklen,iwrk,rwrklen,rwrk,jstatus,ffttype)
20      implicit none
21#include "errquit.fh"
22      INTEGER nd1,nd2,nd3, isign,jstatus,idir
23      INTEGER map1(*),map2(*)
24      INTEGER doMap
25      DOUBLE PRECISION x1(*),x2(*)
26      INTEGER rwrklen
27      DOUBLE PRECISION rwrk(*)
28      INTEGER iwrklen
29      INTEGER iwrk(*)
30      INTEGER ffttype
31#include "mafdecls.fh"
32c *** input parameters described
33c
34c  nd1,nd2,nd3: dimension of x1 and x2
35c  isign: 1 => forward fft;  -1 => reverse fft; -2 => reverse, no scaling
36c  x1: data to be fft-ed. input(x1) = output(x1), i.e., non-destructive.
37c  x2: output fft(x1). Note that EMSL uses a destructive interface, while
38c      TYPE: double precsion  => x1(dim), dim  = nd1*nd2*nd3*2
39c            double complex   => x1(dim), dim  = nd1*nd2*nd3
40c  map1: (EMSL) id of processor owning ith plane: Computed here according
41c        to a blockwise decomposition.
42c  map2: (EMSL) id of processor owning ith plane after transpose: computed here
43c        according to a blockwise decomposition.
44c        NOTE1: map1 and map2 dimensions are greater than nd3-1
45c        NOTE2: map1 and map2 are NOT used for ESSL call (may dimension to 1)
46c  doMap: (EMSL) forces recomputation of map; map computed on first
47c              call and saved.  Can also be use
48c  rwrklen,iwrklen: if <> 0, use for EMSL routine scratch memory
49c  ffttype:  1=>EMSL fft;  2=>ESSL fft
50c
51c *** local variables
52      integer d1,d3,owner
53      double precision scale
54      integer blksz,computedMap
55      integer n123
56#ifdef ESSLFFT
57      integer icontext,ip(40)
58      integer initESSL,iESSL
59#endif
60#ifdef EMSLFFT
61      integer  nwords, nrealscr,ma_nrealscr, ma_nintscr
62      logical  bstatus
63      integer  dblh,inth, idbl,iint
64      integer  initSCRATCH, mamem
65      integer nvecs1, nvecs2, n1max, n2max, nele, nelebk
66      integer lstcnt, ii, m2, n3
67#endif
68c       integer isize
69c       parameter (isize=128*128*128)
70c       integer iwork(isize)
71c	double precision work(isize)
72       integer nodeid,nnodes,me,nproc
73#ifdef ESSLFFT
74      data iESSL /0/
75      save iESSL
76#endif
77#ifdef EMSLFFT
78      data initSCRATCH,ma_nrealscr,ma_nintscr /0,0,0/
79      save initSCRATCH, ma_nrealscr,ma_nintscr, dblh,inth,idbl,iint
80#endif
81*
82      data computedMap /0/
83      save computedMap
84*
85      me = nodeid()
86      nproc = nnodes()
87      isign=idir
88      if(isign.lt.-1) isign=-1
89
90c      if (me.EQ.0) print*,' ENTER FFT ',nd1,nd2,nd3,' isign=',isign
91
92c --- calculate constants ----
93      n123 = nd1*nd2*nd3
94c scale for forward (isign==1) and reverse (isign==-1) fft
95      if (idir.EQ.-1) then
96         scale = 1.D0/dble(n123)
97      else
98         scale = 1.D0
99      endif
100
101#ifdef ESSLFFT
102      ffttype = 2
103#else
104      ffttype = 1
105#endif
106
107#ifdef EMSLFFT
108      blksz = nd3/ nproc
109#endif
110
111#ifdef ESSLFFT
112c     ** do not use defaults
113      ip(1) = 1
114c     ** return normal form, i.e., x2 = Transpose(FFT(x1)),
115c       where FFT transposes x1.
116      ip(2) = 1
117c     ** fft x1(nd1,nd2,*)
118      ip(20) = nd1
119      ip(21) = nd2
120
121      if (iESSL.EQ.0) then
122         iESSL = 1
123         jstatus = initESSL(icontext)
124         if (jstatus.LT.0) then
125           call blacs_abort(icontext,1)
126           call errquit('nwfft3d: ESSL initialization failed',0,
127     &       UNKNOWN_ERR)
128         endif
129      endif
130#endif
131
132
133
134c determine map if necessary (required once) for EMSL routine
135#ifdef EMSLFFT
136      if (blksz.LE.0) then
137       call errquit('nwfft3d:: internal error: bad blocksize',0,
138     &       UNKNOWN_ERR)
139      endif
140      if (doMap.EQ.1 .OR. computedMap.EQ.0) then
141        owner = 0
142        do d3 = 1, nd3
143          if (d3.GT.blksz*(owner+1)) owner = owner+1
144          map1(d3) = owner
145          map2(d3) = owner
146        enddo
147        computedMap = 1
148      endif
149c
150
151      NELEBK = 2 * ND1           ! complex to complex
152      NELEBK = 2 * ( ND1 / 2 + 1 ) ! real-> complex
153c
154      if ( me.eq.0) then
155         do ii=1, nd3
156            write(*,*) ' map1, map2 ', ii, map1(ii), map2(ii)
157         enddo
158         call ffflush()
159      endif
160c
161      NELEBK = max(2*nd1, 2*(nd1/2+1))
162      NVECS1 = LSTCNT( ME, MAP1, nd3 )
163      NVECS2 = LSTCNT( ME, MAP2, nd3 )
164      N1MAX = 0
165      N2MAX = 0
166      do iI = 0, NPROC-1
167         N1MAX = MAX( N1MAX, LSTCNT( iI, MAP1, N3 ) )
168         N2MAX = MAX( N2MAX, LSTCNT( iI, MAP2, M2 ) )
169      enddo
170      NELE = NELEBK * MAX( M2 * N1MAX, N3 * N2MAX )
171      nrealscr = nele
172c
173      if (rwrklen.NE.0.AND.iwrklen.NE.0) then
174        mamem = 0
175      else
176        mamem = 1
177      endif
178      if(mamem.EQ.1.AND.
179     .         (nrealscr.GT.ma_nrealscr.OR.initSCRATCH.EQ.0))then
180c free previous space if present
181        if (ma_nrealscr.GT.0) then
182           bstatus = MA_free_heap(dblh)
183        endif
184        nwords = MA_inquire_heap(MT_DBL)
185        if (nwords.LT.nrealscr) then
186          call errquit('nwfft3d: insufficient heap for dscratch',0,
187     &       MA_ERR)
188        else
189           if(.NOT.MA_alloc_get(MT_DBL,nrealscr,'fftdbl',dblh,idbl))then
190            call errquit('nwfft3d: insufficient heap for dscratch',0,
191     &       MA_ERR)
192           endif
193        endif
194        ma_nrealscr = nrealscr
195      endif
196
197c integer scratch depends on the number of processes. Assume, therefore,
198c it will not change during the course of a run. (see pfft1.0/pfft/chk3d.F)
199      if (mamem.EQ.1.AND.initSCRATCH.EQ.0) then
200        ma_nintscr = 5*nproc+100
201        nwords = MA_inquire_heap(MT_INT)
202        if (nwords.LT.ma_nintscr) then
203           call errquit('nwfft3d: insufficient heap for iscratch',0,
204     &       MA_ERR)
205        else
206          if (.NOT.MA_alloc_get(MT_INT,ma_nintscr,
207     .                         'fftint',inth,iint)) then
208           call errquit('nwfft3d: insufficient heap for iscratch',0,
209     &       MA_ERR)
210          endif
211        endif
212        initSCRATCH = 1
213      endif
214#endif
215
216
217
218#ifdef ESSLFFT
219      call pdcft3(x1,x2,nd1,nd2,nd3,isign,scale,icontext,ip)
220#endif
221#ifdef EMSLFFT
222c     * make the EMSL destructive interface, nondestructive
223c
224      do d1 = 1, nd1*nd2*blksz*2
225         x2(d1) = x1(d1)
226      enddo
227c	print*,'-->',isign,nd1,nd2,nd3,x2(1),map1(1),map2(1),blksz
228c     call pfft3d(isign,nd1,nd2,nd3,x2,map1,map2,
229c    .            isize,work,isize,iwork,jstatus)
230
231
232      if (mamem.EQ.1) then
233        call pfft3d(isign,nd1,nd2,nd3,x2,map1,map2,
234     .       ma_nrealscr,dbl_mb(idbl),ma_nintscr,int_mb(iint),jstatus)
235      else
236        call pfft3d(isign,nd1,nd2,nd3,x2,map1,map2,
237     .       rwrklen,rwrk,iwrklen,iwrk,jstatus)
238      endif
239      if (isign.EQ.-1) then
240        do d1 = 1, nd1*nd2*blksz*2
241          x2(d1) = x2(d1)*scale
242        enddo
243      endif
244#endif
245
246c status check
247      if (jstatus.NE.0) then
248#ifdef EMSLFFT
249        call errquit('nwfft3d: pfft3d() error return',0, UNKNOWN_ERR)
250#endif
251#ifdef FFTDEBUG
252        print*,me,'WARNING: forward fft status = ',jstatus,nd1,nd2,nd3
253#endif
254      endif
255
256      return
257      end
258
259c************************************************************************
260
261      double precision FUNCTION inorm(x,y,n)
262      implicit none
263      integer n
264      double precision x(*),y(*)
265      double precision maxval, newval
266      integer i, me, nodeid
267
268      me = nodeid()
269      maxval = 0.D0
270      do i = 1, n
271        newval = ABS(x(i)-y(i))
272        if (newval.GT.maxval) then
273c        write(6,*) me,' i,x1,x2: ',i,x(i),y(i)
274          maxval=newval
275        endif
276      enddo
277      inorm = maxval
278      return
279      end
280
281c************************************************************************
282