1* 2* $Id$ 3* 4c************************************************************************ 5c Complex 3D FFT interface. 6c Based on a blockwise distribution of slabs dimensioned id1xid2. 7c The blockwise (and contiguous) distribution *must* be performed 8c outside of this routine. 9c 10c GENERAL compilation: 11c uses the portable "EMSL" FFT, pfft1.0. 12c IBM SP2: 13c uses the faster ESSL FFT. 14c 15c Non-destructive interface (see below). 16c ------------------------------------------------------------------------ 17 SUBROUTINE nwfft3d(nd1,nd2,nd3,idir, 18 . x1,x2,map1,map2,doMap, 19 . iwrklen,iwrk,rwrklen,rwrk,jstatus,ffttype) 20 implicit none 21#include "errquit.fh" 22 INTEGER nd1,nd2,nd3, isign,jstatus,idir 23 INTEGER map1(*),map2(*) 24 INTEGER doMap 25 DOUBLE PRECISION x1(*),x2(*) 26 INTEGER rwrklen 27 DOUBLE PRECISION rwrk(*) 28 INTEGER iwrklen 29 INTEGER iwrk(*) 30 INTEGER ffttype 31#include "mafdecls.fh" 32c *** input parameters described 33c 34c nd1,nd2,nd3: dimension of x1 and x2 35c isign: 1 => forward fft; -1 => reverse fft; -2 => reverse, no scaling 36c x1: data to be fft-ed. input(x1) = output(x1), i.e., non-destructive. 37c x2: output fft(x1). Note that EMSL uses a destructive interface, while 38c TYPE: double precsion => x1(dim), dim = nd1*nd2*nd3*2 39c double complex => x1(dim), dim = nd1*nd2*nd3 40c map1: (EMSL) id of processor owning ith plane: Computed here according 41c to a blockwise decomposition. 42c map2: (EMSL) id of processor owning ith plane after transpose: computed here 43c according to a blockwise decomposition. 44c NOTE1: map1 and map2 dimensions are greater than nd3-1 45c NOTE2: map1 and map2 are NOT used for ESSL call (may dimension to 1) 46c doMap: (EMSL) forces recomputation of map; map computed on first 47c call and saved. Can also be use 48c rwrklen,iwrklen: if <> 0, use for EMSL routine scratch memory 49c ffttype: 1=>EMSL fft; 2=>ESSL fft 50c 51c *** local variables 52 integer d1,d3,owner 53 double precision scale 54 integer blksz,computedMap 55 integer n123 56#ifdef ESSLFFT 57 integer icontext,ip(40) 58 integer initESSL,iESSL 59#endif 60#ifdef EMSLFFT 61 integer nwords, nrealscr,ma_nrealscr, ma_nintscr 62 logical bstatus 63 integer dblh,inth, idbl,iint 64 integer initSCRATCH, mamem 65 integer nvecs1, nvecs2, n1max, n2max, nele, nelebk 66 integer lstcnt, ii, m2, n3 67#endif 68c integer isize 69c parameter (isize=128*128*128) 70c integer iwork(isize) 71c double precision work(isize) 72 integer nodeid,nnodes,me,nproc 73#ifdef ESSLFFT 74 data iESSL /0/ 75 save iESSL 76#endif 77#ifdef EMSLFFT 78 data initSCRATCH,ma_nrealscr,ma_nintscr /0,0,0/ 79 save initSCRATCH, ma_nrealscr,ma_nintscr, dblh,inth,idbl,iint 80#endif 81* 82 data computedMap /0/ 83 save computedMap 84* 85 me = nodeid() 86 nproc = nnodes() 87 isign=idir 88 if(isign.lt.-1) isign=-1 89 90c if (me.EQ.0) print*,' ENTER FFT ',nd1,nd2,nd3,' isign=',isign 91 92c --- calculate constants ---- 93 n123 = nd1*nd2*nd3 94c scale for forward (isign==1) and reverse (isign==-1) fft 95 if (idir.EQ.-1) then 96 scale = 1.D0/dble(n123) 97 else 98 scale = 1.D0 99 endif 100 101#ifdef ESSLFFT 102 ffttype = 2 103#else 104 ffttype = 1 105#endif 106 107#ifdef EMSLFFT 108 blksz = nd3/ nproc 109#endif 110 111#ifdef ESSLFFT 112c ** do not use defaults 113 ip(1) = 1 114c ** return normal form, i.e., x2 = Transpose(FFT(x1)), 115c where FFT transposes x1. 116 ip(2) = 1 117c ** fft x1(nd1,nd2,*) 118 ip(20) = nd1 119 ip(21) = nd2 120 121 if (iESSL.EQ.0) then 122 iESSL = 1 123 jstatus = initESSL(icontext) 124 if (jstatus.LT.0) then 125 call blacs_abort(icontext,1) 126 call errquit('nwfft3d: ESSL initialization failed',0, 127 & UNKNOWN_ERR) 128 endif 129 endif 130#endif 131 132 133 134c determine map if necessary (required once) for EMSL routine 135#ifdef EMSLFFT 136 if (blksz.LE.0) then 137 call errquit('nwfft3d:: internal error: bad blocksize',0, 138 & UNKNOWN_ERR) 139 endif 140 if (doMap.EQ.1 .OR. computedMap.EQ.0) then 141 owner = 0 142 do d3 = 1, nd3 143 if (d3.GT.blksz*(owner+1)) owner = owner+1 144 map1(d3) = owner 145 map2(d3) = owner 146 enddo 147 computedMap = 1 148 endif 149c 150 151 NELEBK = 2 * ND1 ! complex to complex 152 NELEBK = 2 * ( ND1 / 2 + 1 ) ! real-> complex 153c 154 if ( me.eq.0) then 155 do ii=1, nd3 156 write(*,*) ' map1, map2 ', ii, map1(ii), map2(ii) 157 enddo 158 call ffflush() 159 endif 160c 161 NELEBK = max(2*nd1, 2*(nd1/2+1)) 162 NVECS1 = LSTCNT( ME, MAP1, nd3 ) 163 NVECS2 = LSTCNT( ME, MAP2, nd3 ) 164 N1MAX = 0 165 N2MAX = 0 166 do iI = 0, NPROC-1 167 N1MAX = MAX( N1MAX, LSTCNT( iI, MAP1, N3 ) ) 168 N2MAX = MAX( N2MAX, LSTCNT( iI, MAP2, M2 ) ) 169 enddo 170 NELE = NELEBK * MAX( M2 * N1MAX, N3 * N2MAX ) 171 nrealscr = nele 172c 173 if (rwrklen.NE.0.AND.iwrklen.NE.0) then 174 mamem = 0 175 else 176 mamem = 1 177 endif 178 if(mamem.EQ.1.AND. 179 . (nrealscr.GT.ma_nrealscr.OR.initSCRATCH.EQ.0))then 180c free previous space if present 181 if (ma_nrealscr.GT.0) then 182 bstatus = MA_free_heap(dblh) 183 endif 184 nwords = MA_inquire_heap(MT_DBL) 185 if (nwords.LT.nrealscr) then 186 call errquit('nwfft3d: insufficient heap for dscratch',0, 187 & MA_ERR) 188 else 189 if(.NOT.MA_alloc_get(MT_DBL,nrealscr,'fftdbl',dblh,idbl))then 190 call errquit('nwfft3d: insufficient heap for dscratch',0, 191 & MA_ERR) 192 endif 193 endif 194 ma_nrealscr = nrealscr 195 endif 196 197c integer scratch depends on the number of processes. Assume, therefore, 198c it will not change during the course of a run. (see pfft1.0/pfft/chk3d.F) 199 if (mamem.EQ.1.AND.initSCRATCH.EQ.0) then 200 ma_nintscr = 5*nproc+100 201 nwords = MA_inquire_heap(MT_INT) 202 if (nwords.LT.ma_nintscr) then 203 call errquit('nwfft3d: insufficient heap for iscratch',0, 204 & MA_ERR) 205 else 206 if (.NOT.MA_alloc_get(MT_INT,ma_nintscr, 207 . 'fftint',inth,iint)) then 208 call errquit('nwfft3d: insufficient heap for iscratch',0, 209 & MA_ERR) 210 endif 211 endif 212 initSCRATCH = 1 213 endif 214#endif 215 216 217 218#ifdef ESSLFFT 219 call pdcft3(x1,x2,nd1,nd2,nd3,isign,scale,icontext,ip) 220#endif 221#ifdef EMSLFFT 222c * make the EMSL destructive interface, nondestructive 223c 224 do d1 = 1, nd1*nd2*blksz*2 225 x2(d1) = x1(d1) 226 enddo 227c print*,'-->',isign,nd1,nd2,nd3,x2(1),map1(1),map2(1),blksz 228c call pfft3d(isign,nd1,nd2,nd3,x2,map1,map2, 229c . isize,work,isize,iwork,jstatus) 230 231 232 if (mamem.EQ.1) then 233 call pfft3d(isign,nd1,nd2,nd3,x2,map1,map2, 234 . ma_nrealscr,dbl_mb(idbl),ma_nintscr,int_mb(iint),jstatus) 235 else 236 call pfft3d(isign,nd1,nd2,nd3,x2,map1,map2, 237 . rwrklen,rwrk,iwrklen,iwrk,jstatus) 238 endif 239 if (isign.EQ.-1) then 240 do d1 = 1, nd1*nd2*blksz*2 241 x2(d1) = x2(d1)*scale 242 enddo 243 endif 244#endif 245 246c status check 247 if (jstatus.NE.0) then 248#ifdef EMSLFFT 249 call errquit('nwfft3d: pfft3d() error return',0, UNKNOWN_ERR) 250#endif 251#ifdef FFTDEBUG 252 print*,me,'WARNING: forward fft status = ',jstatus,nd1,nd2,nd3 253#endif 254 endif 255 256 return 257 end 258 259c************************************************************************ 260 261 double precision FUNCTION inorm(x,y,n) 262 implicit none 263 integer n 264 double precision x(*),y(*) 265 double precision maxval, newval 266 integer i, me, nodeid 267 268 me = nodeid() 269 maxval = 0.D0 270 do i = 1, n 271 newval = ABS(x(i)-y(i)) 272 if (newval.GT.maxval) then 273c write(6,*) me,' i,x1,x2: ',i,x(i),y(i) 274 maxval=newval 275 endif 276 enddo 277 inorm = maxval 278 return 279 end 280 281c************************************************************************ 282