1 /***************************************************************************
2 * fft.cpp is part of Math Graphic Library
3 * Copyright (C) 2007-2016 Alexey Balakin <mathgl.abalakin@gmail.ru> *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as *
7 * published by the Free Software Foundation; either version 3 of the *
8 * License, or (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU Lesser General Public *
16 * License along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "mgl2/datac.h"
21 #include "mgl2/data.h"
22 #include "mgl2/thread.h"
23 #if MGL_HAVE_GSL
24 #include <gsl/gsl_fft_complex.h>
25 #include <gsl/gsl_dht.h>
26 #include <gsl/gsl_sf.h>
27 #include <gsl/gsl_wavelet.h>
28 #endif
29 //-----------------------------------------------------------------------------
mglStartThreadT(void * (* func)(void *),long n,void * a,double * b,const void * v,void ** w,const long * p,const void * re,const void * im)30 void MGL_EXPORT mglStartThreadT(void *(*func)(void *), long n, void *a, double *b, const void *v, void **w, const long *p, const void *re, const void *im)
31 {
32 if(!func) return;
33 #if MGL_HAVE_PTHREAD
34 if(mglNumThr<1) mgl_set_num_thr(0);
35 if(mglNumThr>1)
36 {
37 pthread_t *tmp=new pthread_t[mglNumThr];
38 mglThreadT *par=new mglThreadT[mglNumThr];
39 for(long i=0;i<mglNumThr;i++) // put parameters into the structure
40 { par[i].n=n; par[i].a=a; par[i].v=v; par[i].w=w; par[i].b=b;
41 par[i].p=p; par[i].re=re; par[i].im=im; par[i].id=i; }
42 for(long i=0;i<mglNumThr;i++) pthread_create(tmp+i, 0, func, par+i);
43 for(long i=0;i<mglNumThr;i++) pthread_join(tmp[i], 0);
44 delete []tmp; delete []par;
45 }
46 else
47 #endif
48 {
49 mglNumThr = 1;
50 mglThreadT par;
51 par.n=n; par.a=a; par.b=b; par.v=v; par.w=w;
52 par.p=p; par.re=re; par.im=im; par.id=0;
53 func(&par);
54 }
55 }
56 //-----------------------------------------------------------------------------
57 struct mglFFTdata
58 {
59 long wnx,wny,wnz; // sizes for FFT
60 long hnx,hny,hnz; // sizes for Hankel
61 void *wtx,*wty,*wtz; // tables for FFT
62 void *htx,*hty,*htz; // tables for Hankel
mglFFTdatamglFFTdata63 mglFFTdata() { memset(this,0,sizeof(mglFFTdata)); }
~mglFFTdatamglFFTdata64 ~mglFFTdata() { Clear(); }
ClearmglFFTdata65 void Clear()
66 {
67 if(wnx) { wnx=0; mgl_fft_free(wtx,0,0); }
68 if(wny) { wny=0; mgl_fft_free(wty,0,0); }
69 if(wnz) { wnz=0; mgl_fft_free(wtz,0,0); }
70 #if MGL_HAVE_GSL
71 if(hnx) { hnx=0; gsl_dht_free((gsl_dht*)htx); }
72 if(hny) { hny=0; gsl_dht_free((gsl_dht*)hty); }
73 if(hnz) { hnz=0; gsl_dht_free((gsl_dht*)htz); }
74 #endif
75 }
76 } mgl_fft_data;
mgl_clear_fft()77 void MGL_EXPORT mgl_clear_fft() { mgl_fft_data.Clear(); }
78 //-----------------------------------------------------------------------------
mgl_fft_alloc_thr(long n)79 MGL_EXPORT void *mgl_fft_alloc_thr(long n)
80 {
81 #if MGL_HAVE_GSL
82 return gsl_fft_complex_workspace_alloc(n);
83 #else
84 return new double[2*n];
85 #endif
86 }
87 //-----------------------------------------------------------------------------
mgl_fft_alloc(long n,void ** space,long nthr)88 MGL_EXPORT void *mgl_fft_alloc(long n, void **space, long nthr)
89 {
90 if(space && nthr>0) for(long i=0;i<nthr;i++) space[i] = mgl_fft_alloc_thr(n);
91 #if MGL_HAVE_GSL
92 return gsl_fft_complex_wavetable_alloc(n);
93 #else
94 double *c = new double[2*n*n];
95 #pragma omp parallel for collapse(2)
96 for(long i=0;i<n;i++) for(long j=0;j<n;j++)
97 { c[2*(i+n*j)]=cos(2*M_PI*i*j/n); c[2*(i+n*j)+1]=-sin(2*M_PI*i*j/n); }
98 return c;
99 #endif
100 }
101 //-----------------------------------------------------------------------------
mgl_fft_free_thr(void * ws)102 void MGL_EXPORT mgl_fft_free_thr(void *ws)
103 {
104 #if MGL_HAVE_GSL
105 if(ws) gsl_fft_complex_workspace_free((gsl_fft_complex_workspace*)ws);
106 #else
107 if(ws) delete []((double*)ws);
108 #endif
109 }
110 //-----------------------------------------------------------------------------
mgl_fft_free(void * wt,void ** ws,long nthr)111 void MGL_EXPORT mgl_fft_free(void *wt, void **ws, long nthr)
112 {
113 if(ws && nthr>0) for(long i=0;i<nthr;i++) mgl_fft_free_thr(ws[i]);
114 #if MGL_HAVE_GSL
115 if(wt) gsl_fft_complex_wavetable_free((gsl_fft_complex_wavetable*)wt);
116 #else
117 if(wt) delete []((double*)wt);
118 #endif
119 }
120 //-----------------------------------------------------------------------------
mgl_fft(double * x,long s,long n,const void * wt,void * ws,int inv)121 void MGL_EXPORT mgl_fft(double *x, long s, long n, const void *wt, void *ws, int inv)
122 {
123 #if MGL_HAVE_GSL
124 if(inv) gsl_fft_complex_inverse(x, s, n, (const gsl_fft_complex_wavetable*)wt, (gsl_fft_complex_workspace*)ws);
125 else gsl_fft_complex_forward(x, s, n, (const gsl_fft_complex_wavetable*)wt, (gsl_fft_complex_workspace*)ws);
126 #else // NOTE this is VERY slow!
127 const double *c = (const double *)wt;
128 double *d = (double *)ws, f = inv?1./n:1;
129 memset(d,0,2*n*sizeof(double));
130 if(inv) for(long i=0;i<n;i++) for(long j=0;j<n;j++)
131 {
132 long ii = 2*(i+n*j), jj = 2*j*s;
133 d[2*i] += x[jj]*c[ii]+x[jj+1]*c[ii+1];
134 d[2*i+1]+= x[jj+1]*c[ii]-x[jj]*c[ii+1];
135 }
136 else for(long i=0;i<n;i++) for(long j=0;j<n;j++)
137 {
138 long ii = 2*(i+n*j), jj = 2*j*s;
139 d[2*i] += x[jj]*c[ii]-x[jj+1]*c[ii+1];
140 d[2*i+1]+= x[jj+1]*c[ii]+x[jj]*c[ii+1];
141 }
142 for(long j=0;j<n;j++)
143 { long jj = 2*j*s; x[jj] = d[2*j]*f; x[jj+1] = d[2*j+1]*f; }
144 #endif
145 }
146 //-----------------------------------------------------------------------------
mgl_fftx(void * par)147 static void* mgl_fftx(void *par)
148 {
149 mglThreadT *t=(mglThreadT *)par;
150 long nx=t->p[0];
151 #if !MGL_HAVE_PTHREAD
152 #pragma omp parallel
153 #endif
154 {
155 void *w = mgl_fft_alloc_thr(nx);
156 #pragma omp for nowait
157 for(long i=t->id;i<t->n;i+=mglNumThr)
158 mgl_fft(t->b+2*nx*i, 1, nx, t->v, w, t->p[3]);
159 mgl_fft_free_thr(w);
160 }
161 return 0;
162 }
mgl_ffty(void * par)163 static void* mgl_ffty(void *par)
164 {
165 mglThreadT *t=(mglThreadT *)par;
166 long nx=t->p[0],ny=t->p[1];
167 #if !MGL_HAVE_PTHREAD
168 #pragma omp parallel
169 #endif
170 {
171 void *w = mgl_fft_alloc_thr(ny);
172 #pragma omp for nowait
173 for(long i=t->id;i<t->n;i+=mglNumThr)
174 mgl_fft(t->b+2*(i%nx)+2*nx*ny*(i/nx), nx, ny, t->v, w, t->p[3]);
175 mgl_fft_free_thr(w);
176 }
177 return 0;
178 }
mgl_fftz(void * par)179 static void* mgl_fftz(void *par)
180 {
181 mglThreadT *t=(mglThreadT *)par;
182 long nx=t->p[0],ny=t->p[1],nz=t->p[2];
183 #if !MGL_HAVE_PTHREAD
184 #pragma omp parallel
185 #endif
186 {
187 void *w = mgl_fft_alloc_thr(nz);
188 #pragma omp for nowait
189 for(long i=t->id;i<t->n;i+=mglNumThr)
190 mgl_fft(t->b+2*i, nx*ny, nz, t->v, w, t->p[3]);
191 mgl_fft_free_thr(w);
192 }
193 return 0;
194 }
mgl_datac_fft(HADT d,const char * dir)195 void MGL_EXPORT mgl_datac_fft(HADT d, const char *dir)
196 {
197 if(!dir || *dir==0) return;
198 long nx = d->nx, ny = d->ny, nz = d->nz;
199 void *wt=0;
200 bool clear=false;
201 long par[4]={nx,ny,nz,strchr(dir,'i')!=0};
202 #if MGL_USE_DOUBLE
203 double *a = (double *)(d->a);
204 #else
205 double *a = new double[2*nx*ny*nz]; // manually convert to double
206 #pragma omp parallel for
207 for(long i=0;i<nx*ny*nz;i++)
208 { a[2*i] = real(d->a[i]); a[2*i+1] = imag(d->a[i]); }
209 #endif
210 if(strchr(dir,'x') && nx>1)
211 {
212 if(mgl_fft_data.wnx==nx) wt = mgl_fft_data.wtx;
213 else { clear = true; wt = mgl_fft_alloc(nx,0,0); }
214 mglStartThreadT(mgl_fftx,ny*nz,0,a,wt,0,par);
215 if(mgl_fft_data.wnx==0)
216 { clear = false; mgl_fft_data.wtx = wt; mgl_fft_data.wnx=nx; }
217 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
218 }
219 if(strchr(dir,'y') && ny>1)
220 {
221 if(mgl_fft_data.wny==ny) wt = mgl_fft_data.wty;
222 else { clear = true; wt = mgl_fft_alloc(ny,0,0); }
223 mglStartThreadT(mgl_ffty,nx*nz,0,a,wt,0,par);
224 if(mgl_fft_data.wny==0)
225 { clear = false; mgl_fft_data.wty = wt; mgl_fft_data.wny=ny; }
226 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
227 }
228 if(strchr(dir,'z') && nz>1)
229 {
230 if(mgl_fft_data.wnz==nz) wt = mgl_fft_data.wtz;
231 else { clear = true; wt = mgl_fft_alloc(nz,0,0); }
232 mglStartThreadT(mgl_fftz,nx*ny,0,a,wt,0,par);
233 if(mgl_fft_data.wnz==0)
234 { clear = false; mgl_fft_data.wtz = wt; mgl_fft_data.wnz=nz; }
235 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
236 }
237 #if !MGL_USE_DOUBLE
238 #pragma omp parallel for
239 for(long i=0;i<nx*ny*nz;i++) d->a[i] = dual(a[2*i], a[2*i+1]);
240 delete []a;
241 #endif
242 }
243 //-----------------------------------------------------------------------------
mgl_data_fourier(HMDT re,HMDT im,const char * dir)244 void MGL_EXPORT mgl_data_fourier(HMDT re, HMDT im, const char *dir)
245 {
246 if(!dir || *dir==0) return;
247 long nx = re->nx, ny = re->ny, nz = re->nz;
248 if(nx*ny*nz != im->nx*im->ny*im->nz || dir[0]==0) return;
249 bool clear=false;
250 void *wt=0;
251 long par[4]={nx,ny,nz,strchr(dir,'i')!=0};
252 double *a = new double[2*nx*ny*nz];
253 #pragma omp parallel for
254 for(long i=0;i<nx*ny*nz;i++)
255 { a[2*i] = re->a[i]; a[2*i+1] = im->a[i]; }
256 if(strchr(dir,'x') && nx>1)
257 {
258 if(mgl_fft_data.wnx==nx) wt = mgl_fft_data.wtx;
259 else { clear = true; wt = mgl_fft_alloc(nx,0,0); }
260 mglStartThreadT(mgl_fftx,ny*nz,0,a,wt,0,par);
261 if(mgl_fft_data.wnx==0)
262 { mgl_fft_data.wtx = wt; clear = false; mgl_fft_data.wnx=nx; }
263 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
264 }
265 if(strchr(dir,'y') && ny>1)
266 {
267 if(mgl_fft_data.wny==ny) wt = mgl_fft_data.wty;
268 else { clear = true; wt = mgl_fft_alloc(ny,0,0); }
269 mglStartThreadT(mgl_ffty,nx*nz,0,a,wt,0,par);
270 if(mgl_fft_data.wny==0)
271 { mgl_fft_data.wty = wt; clear = false; mgl_fft_data.wny=ny; }
272 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
273 }
274 if(strchr(dir,'z') && nz>1)
275 {
276 if(mgl_fft_data.wnz==nz) wt = mgl_fft_data.wtz;
277 else { clear = true; wt = mgl_fft_alloc(nz,0,0); }
278 mglStartThreadT(mgl_fftz,nx*ny,0,a,wt,0,par);
279 if(mgl_fft_data.wnz==0)
280 { mgl_fft_data.wtz = wt; clear = false; mgl_fft_data.wnz=nz; }
281 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
282 }
283 #pragma omp parallel for
284 for(long i=0;i<nx*ny*nz;i++)
285 { re->a[i] = a[2*i]; im->a[i] = a[2*i+1]; }
286 delete []a;
287 }
288 //-----------------------------------------------------------------------------
mgl_envx(void * par)289 static void* mgl_envx(void *par)
290 {
291 mglThreadT *t=(mglThreadT *)par;
292 long nx=t->p[0];
293 mreal *a = (mreal*)t->a;
294 #if !MGL_HAVE_PTHREAD
295 #pragma omp parallel
296 #endif
297 {
298 double *b = new double[2*nx];
299 void *w = mgl_fft_alloc_thr(nx);
300 #pragma omp for nowait
301 for(long i=t->id;i<t->n;i+=mglNumThr)
302 {
303 for(long j=0;j<nx;j++) { b[2*j] = a[j+i*nx]; b[2*j+1] = 0; }
304 mgl_fft(b, 1, nx, t->v, w, false);
305 for(long j=0;j<nx;j++) { b[j] *= 2.; b[j+nx] = 0; }
306 mgl_fft(b, 1, nx, t->v, w, true);
307 for(long j=0;j<nx;j++) a[j+i*nx] = hypot(b[2*j], b[2*j+1]);
308 }
309 mgl_fft_free_thr(w); delete []b;
310 }
311 return 0;
312 }
mgl_envy(void * par)313 static void* mgl_envy(void *par)
314 {
315 mglThreadT *t=(mglThreadT *)par;
316 long nx=t->p[0],ny=t->p[1];
317 mreal *a = (mreal*)t->a;
318 #if !MGL_HAVE_PTHREAD
319 #pragma omp parallel
320 #endif
321 {
322 double *b = new double[2*ny];
323 void *w = mgl_fft_alloc_thr(ny);
324 #pragma omp for nowait
325 for(long i=t->id;i<t->n;i+=mglNumThr)
326 {
327 for(long j=0;j<ny;j++) { b[2*j] = a[(i%nx)+nx*(j+ny*(i/nx))]; b[2*j+1] = 0; }
328 mgl_fft(b, 1, ny, t->v, t->w[t->id], false);
329 for(long j=0;j<ny;j++) { b[j] *= 2.; b[j+ny] = 0; }
330 mgl_fft(b, 1, ny, t->v, t->w[t->id], true);
331 for(long j=0;j<ny;j++) a[(i%nx)+nx*(j+ny*(i/nx))] = hypot(b[2*j], b[2*j+1]);
332 }
333 mgl_fft_free_thr(w); delete []b;
334 }
335 return 0;
336 }
mgl_envz(void * par)337 static void* mgl_envz(void *par)
338 {
339 mglThreadT *t=(mglThreadT *)par;
340 long nx=t->p[0],ny=t->p[1],nz=t->p[2],k=nx*ny;
341 mreal *a = (mreal*)t->a;
342 #if !MGL_HAVE_PTHREAD
343 #pragma omp parallel
344 #endif
345 {
346 double *b = new double[2*nz];
347 void *w = mgl_fft_alloc_thr(nz);
348 #pragma omp for nowait
349 for(long i=t->id;i<t->n;i+=mglNumThr)
350 {
351 for(long j=0;j<nz;j++) { b[2*j] = a[j*k+i]; b[2*j+1] = 0; }
352 mgl_fft(b, 1, nz, t->v, t->w[t->id], false);
353 for(long j=0;j<nz;j++) { b[j] *= 2.; b[j+nz] = 0; }
354 mgl_fft(b, 1, nz, t->v, t->w[t->id], true);
355 for(long j=0;j<nz;j++) a[j*k+i] = hypot(b[2*j], b[2*j+1]);
356 }
357 mgl_fft_free_thr(w); delete []b;
358 }
359 return 0;
360 }
mgl_data_envelop(HMDT d,char dir)361 void MGL_EXPORT mgl_data_envelop(HMDT d, char dir)
362 {
363 long nx=d->nx,ny=d->ny,nz=d->nz,par[3]={nx,ny,nz};
364 bool clear=false;
365 void *wt=0;
366 if(dir=='x' && nx>1)
367 {
368 if(mgl_fft_data.wnx==nx) wt = mgl_fft_data.wtx;
369 else { clear = true; wt = mgl_fft_alloc(nx,0,0); }
370 mglStartThreadT(mgl_envx,ny*nz,d->a,0,wt,0,par);
371 if(mgl_fft_data.wnx==0)
372 { mgl_fft_data.wtx = wt; clear = false; mgl_fft_data.wnx=nx; }
373 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
374 }
375 if(dir=='y' && ny>1)
376 {
377 if( mgl_fft_data.wny==ny) wt = mgl_fft_data.wty;
378 else { clear = true; wt = mgl_fft_alloc(ny,0,0); }
379 mglStartThreadT(mgl_envy,nx*nz,d->a,0,wt,0,par);
380 if(mgl_fft_data.wny==0)
381 { mgl_fft_data.wty = wt; clear = false; mgl_fft_data.wny=ny; }
382 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
383 }
384 if(dir=='z' && nz>1)
385 {
386 if(mgl_fft_data.wnz==nz) wt = mgl_fft_data.wtz;
387 else { clear = true; wt = mgl_fft_alloc(nz,0,0); }
388 mglStartThreadT(mgl_envz,nx*ny,d->a,0,wt,0,par);
389 if(mgl_fft_data.wnz==0)
390 { mgl_fft_data.wtz = wt; clear = false; mgl_fft_data.wnz=nz; }
391 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
392 }
393 }
394 //-----------------------------------------------------------------------------
mgl_datac_envelop(HADT c,char dir)395 void MGL_EXPORT mgl_datac_envelop(HADT c, char dir)
396 {
397 mglData re(c->nx, c->ny, c->nz), im(c->nx, c->ny, c->nz);
398 long n = c->GetNN();
399 #pragma omp parallel for
400 for(long i=0;i<n;i++) { re.a[i]=real(c->a[i]); im.a[i]=imag(c->a[i]); }
401 mgl_data_envelop(&re, dir);
402 mgl_data_envelop(&im, dir);
403 #pragma omp parallel for
404 for(long i=0;i<n;i++) c->a[i] = dual(re.a[i], im.a[i]);
405 }
406 //-----------------------------------------------------------------------------
mgl_stfa1(void * par)407 static void* mgl_stfa1(void *par)
408 {
409 mglThreadT *t=(mglThreadT *)par;
410 long mx=t->p[0],mz=t->p[2],dn=t->p[3],dd=dn/2,ny=t->p[4];
411 mreal *d = (mreal*)t->a;
412 HCDT re = (HCDT)t->re, im = (HCDT)t->im;
413 #if !MGL_HAVE_PTHREAD
414 #pragma omp parallel
415 #endif
416 {
417 double *a = new double[4*dn], ff;
418 void *w = mgl_fft_alloc_thr(2*dn);
419 #pragma omp for nowait
420 for(long ii=t->id;ii<t->n;ii+=mglNumThr)
421 {
422 long i = ii%mx, j = ii/mx, i0;
423 for(long k=0;k<2*dn;k++)
424 {
425 i0 = k-dd+j*dn; ff = 1;
426 if(i0<0) i0=0; else if(i0>=ny) i0=ny-1;
427 if(k<dd)
428 { ff = 0.5*(k-dd/2.)/dd; ff=0.5+ff*(3-ff*ff); }
429 else if(k>=dn+dd)
430 { ff = 0.5*(k-3.5*dd)/dd; ff=0.5-ff*(3-ff*ff); }
431 a[2*k] = re->v(i,i0)*ff; a[2*k+1] = im->v(i,i0)*ff;
432 }
433 mgl_fft(a, 1, 2*dn, t->v, w, false);
434 for(long k=0;k<dd;k++)
435 {
436 i0 = i+mx*(j+mz*k);
437 d[i0+mx*mz*dd] = hypot(a[4*k],a[4*k+1])/dn;
438 d[i0] = hypot(a[4*k+2*dn],a[4*k+2*dn+1])/dn;
439 }
440 }
441 mgl_fft_free_thr(w); delete []a;
442 }
443 return 0;
444 }
mgl_stfa2(void * par)445 static void* mgl_stfa2(void *par)
446 {
447 mglThreadT *t=(mglThreadT *)par;
448 long mx=t->p[0],my=t->p[1],dn=t->p[3],dd=dn/2,nx=t->p[4];
449 mreal *d = (mreal*)t->a;
450 HCDT re = (HCDT)t->re, im = (HCDT)t->im;
451 #if !MGL_HAVE_PTHREAD
452 #pragma omp parallel
453 #endif
454 {
455 double *a = new double[4*dn], ff;
456 void *w = mgl_fft_alloc_thr(2*dn);
457 #pragma omp for nowait
458 for(long ii=t->id;ii<t->n;ii+=mglNumThr)
459 {
460 long i = ii%my, j = ii/my, i0;
461 for(long k=0;k<2*dn;k++)
462 {
463 i0 = k-dd+i*dn; ff = 1;
464 if(i0<0) i0=0; else if(i0>=nx) i0=nx-1;
465 if(k<dd)
466 { ff = 0.5*(k-dd/2.)/dd; ff=0.5+ff*(3-ff*ff); }
467 else if(k>=3*dd)
468 { ff = 0.5*(k-3.5*dd)/dd; ff=0.5-ff*(3-ff*ff); }
469 a[2*k] = re->v(i0,j)*ff; a[2*k+1] = im->v(i0,j)*ff;
470 }
471 mgl_fft(a, 1, 2*dn, t->v, w, false);
472 for(long k=0;k<dd;k++)
473 {
474 i0 = i+my*(k+mx*j);
475 d[i0+dd*my] = hypot(a[4*k],a[4*k+1])/dn;
476 d[i0] = hypot(a[4*k+2*dn],a[4*k+2*dn+1])/dn;
477 }
478 }
479 mgl_fft_free_thr(w); delete []a;
480 }
481 return 0;
482 }
mgl_data_stfa(HCDT re,HCDT im,long dn,char dir)483 HMDT MGL_EXPORT mgl_data_stfa(HCDT re, HCDT im, long dn, char dir)
484 {
485 if(dn<2) return 0;
486 dn = 2*(dn/2);
487 long nx = re->GetNx(), ny = re->GetNy();
488 if(nx*ny!=im->GetNx()*im->GetNy()) return 0;
489 void *wt = mgl_fft_alloc(2*dn,0,0);
490 long mx,my,mz;
491 mglData *d=new mglData;
492 if(dir=='y')
493 {
494 mx = nx; my = dn; mz = ny/dn;
495 mgl_data_create(d, mx, mz, my);
496 long par[5]={mx,my,mz,dn,ny};
497 mglStartThreadT(mgl_stfa1,mx*mz,d->a,0,wt,0,par,re,im);
498 }
499 else
500 {
501 mx = dn; my = nx/dn; mz = ny;
502 mgl_data_create(d, my, mx, mz);
503 long par[5]={mx,my,mz,dn,nx};
504 mglStartThreadT(mgl_stfa2,my*mz,d->a,0,wt,0,par,re,im);
505 }
506 mgl_fft_free(wt,0,0);
507 return d;
508 }
509 //-----------------------------------------------------------------------------
mgl_sinx(void * par)510 static void* mgl_sinx(void *par)
511 {
512 mglThreadT *t=(mglThreadT *)par;
513 long nx=t->p[0];
514 mreal *a = (mreal*)t->a;
515 #if !MGL_HAVE_PTHREAD
516 #pragma omp parallel
517 #endif
518 {
519 double *b = new double[2*nx], f=sqrt(2./nx);
520 void *w = mgl_fft_alloc_thr(nx);
521 #pragma omp for nowait
522 for(long i=t->id;i<t->n;i+=mglNumThr)
523 {
524 long k = i*nx; memset(b,0,2*nx*sizeof(double));
525 for(long j=1;j<nx;j++) b[2*j]=sin(M_PI*j/nx)*(a[j+k]+a[nx-j+k])+(a[j+k]-a[nx-j+k])*0.5;
526 mgl_fft(b,1,nx,t->v,w,false);
527 a[k]=0; a[k+1]=b[0]*f/2; // fill sinfft
528 for(long j=1;j<nx/2;j++)
529 {
530 a[k+2*j] = -b[2*j+1]*f;
531 a[k+2*j+1] = a[k+2*j-1]+b[2*j]*f;
532 }
533 if(nx%2) a[nx-1] = -b[nx]*f;
534 }
535 mgl_fft_free_thr(w); delete []b;
536 }
537 return 0;
538 }
mgl_siny(void * par)539 static void* mgl_siny(void *par)
540 {
541 mglThreadT *t=(mglThreadT *)par;
542 long nx=t->p[0],ny=t->p[1];
543 mreal *a = (mreal*)t->a;
544 #if !MGL_HAVE_PTHREAD
545 #pragma omp parallel
546 #endif
547 {
548 double *b = new double[2*ny], f=sqrt(2./ny);
549 void *w = mgl_fft_alloc_thr(ny);
550 #pragma omp for nowait
551 for(long ii=t->id;ii<t->n;ii+=mglNumThr)
552 {
553 long i = ii%nx, k = ii/nx; memset(b,0,2*ny*sizeof(double));
554 for(long j=1;j<ny;j++) b[2*j]=sin(M_PI*j/ny)*(a[i+nx*(ny*k+j)]+a[i+nx*(ny*k+ny-j)])+(a[i+nx*(ny*k+j)]-a[i+nx*(ny*k+ny-j)])*0.5;
555 mgl_fft(b,1,ny,t->v,w,false);
556 a[i+nx*ny*k]=0; a[i+nx*(ny*k+1)]=b[0]*f/2; // fill sinfft
557 for(long j=1;j<ny/2;j++)
558 {
559 a[i+nx*(ny*k+2*j)] = -b[2*j+1]*f;
560 a[i+nx*(ny*k+2*j+1)] = a[i+nx*(ny*k+2*j-1)]+b[2*j]*f;
561 }
562 if(ny%2) a[i+nx*(ny*k+ny-1)] = -b[ny]*f;
563 }
564 mgl_fft_free_thr(w); delete []b;
565 }
566 return 0;
567 }
mgl_sinz(void * par)568 static void* mgl_sinz(void *par)
569 {
570 mglThreadT *t=(mglThreadT *)par;
571 long nx=t->p[0],ny=t->p[1],nz=t->p[2],k=nx*ny;
572 mreal *a = (mreal*)t->a;
573 #if !MGL_HAVE_PTHREAD
574 #pragma omp parallel
575 #endif
576 {
577 double *b = new double[2*nz], f=sqrt(2./nz);
578 void *w = mgl_fft_alloc_thr(nz);
579 #pragma omp for nowait
580 for(long i=t->id;i<t->n;i+=mglNumThr)
581 {
582 memset(b,0,2*nz*sizeof(double));
583 for(long j=1;j<nz;j++) b[2*j]=sin(M_PI*j/nz)*(a[i+k*j]+a[i+k*(nz-j)])+(a[i+k*j]-a[i+k*(nz-j)])*0.5;
584 mgl_fft(b,1,nz,t->v,w,false);
585 a[i]=0; a[i+k]=b[0]*f/2; // fill sinfft
586 for(long j=1;j<nz/2;j++)
587 {
588 a[i+k*2*j] = -b[2*j+1]*f;
589 a[i+k*(2*j+1)] = a[i+k*(2*j-1)]+b[2*j]*f;
590 }
591 if(nz%2) a[i+k*nz-k] = -b[nz]*f;
592 }
593 mgl_fft_free_thr(w); delete []b;
594 }
595 return 0;
596 }
mgl_data_sinfft(HMDT d,const char * dir)597 void MGL_EXPORT mgl_data_sinfft(HMDT d, const char *dir) // use DST-1
598 {
599 if(!dir || *dir==0) return;
600 bool clear=false;
601 void *wt=0;
602 long nx=d->nx, ny=d->ny, nz=d->nz, par[3]={nx,ny,nz};
603 if(strchr(dir,'x') && nx>1)
604 {
605 if(mgl_fft_data.wnx==nx) wt = mgl_fft_data.wtx;
606 else { clear = true; wt = mgl_fft_alloc(nx,0,0); }
607 mglStartThreadT(mgl_sinx,ny*nz,d->a,0,wt,0,par);
608 if(mgl_fft_data.wnx==0)
609 { mgl_fft_data.wtx = wt; clear = false; mgl_fft_data.wnx=nx; }
610 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
611 }
612 if(strchr(dir,'y') && ny>1)
613 {
614 if(mgl_fft_data.wny==ny) wt = mgl_fft_data.wty;
615 else { clear = true; wt = mgl_fft_alloc(ny,0,0); }
616 mglStartThreadT(mgl_siny,nx*nz,d->a,0,wt,0,par);
617 if(mgl_fft_data.wny==0)
618 { mgl_fft_data.wty = wt; clear = false; mgl_fft_data.wny=ny; }
619 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
620 }
621 if(strchr(dir,'z') && nz>1)
622 {
623 if(mgl_fft_data.wnz==nz) wt = mgl_fft_data.wtz;
624 else { clear = true; wt = mgl_fft_alloc(nz,0,0); }
625 mglStartThreadT(mgl_sinz,nx*ny,d->a,0,wt,0,par);
626 if(mgl_fft_data.wnz==0)
627 { mgl_fft_data.wtz = wt; clear = false; mgl_fft_data.wnz=nz; }
628 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
629 }
630 }
631 //-----------------------------------------------------------------------------
mgl_datac_sinfft(HADT c,const char * dir)632 void MGL_EXPORT mgl_datac_sinfft(HADT c, const char *dir)
633 {
634 if(!dir || *dir==0) return;
635 mglData re(c->nx, c->ny, c->nz), im(c->nx, c->ny, c->nz);
636 long n = c->GetNN();
637 #pragma omp parallel for
638 for(long i=0;i<n;i++) { re.a[i]=real(c->a[i]); im.a[i]=imag(c->a[i]); }
639 mgl_data_sinfft(&re, dir);
640 mgl_data_sinfft(&im, dir);
641 #pragma omp parallel for
642 for(long i=0;i<n;i++) c->a[i] = dual(re.a[i], im.a[i]);
643 }
644 //-----------------------------------------------------------------------------
mgl_cosx(void * par)645 static void* mgl_cosx(void *par)
646 {
647 mglThreadT *t=(mglThreadT *)par;
648 long nx=t->p[0],nn=nx-1;
649 mreal *a = (mreal*)t->a;
650 #if !MGL_HAVE_PTHREAD
651 #pragma omp parallel
652 #endif
653 {
654 double *b = new double[2*nx], f=sqrt(2./nn);
655 void *w = mgl_fft_alloc_thr(nn);
656 #pragma omp for nowait
657 for(long i=t->id;i<t->n;i+=mglNumThr)
658 {
659 long k = i*nx; memset(b,0,2*nx*sizeof(double));
660 for(long j=0;j<nn;j++) b[2*j]=(a[j+k]+a[nn-j+k])*0.5-sin(M_PI*j/nn)*(a[j+k]-a[nn-j+k]);
661 mgl_fft(b,1,nn,t->v,w,false);
662 double f1=0.5*(a[k]-a[nn+k]), s=-1;
663 a[nn+k]=0.5*(a[k]+a[nn+k]*((nn%2)?-1:1));
664 for(long j=1;j<nn;j++)
665 {
666 f1 += a[j+k]*cos(M_PI*j/nn);
667 a[nn+k] += a[j+k]*s; s = -s;
668 }
669 a[k]=b[0]*f; a[1+k]=f1*f; a[nn+k]*=f; // fill cosfft
670 for(long j=1;j<nn/2;j++)
671 {
672 a[2*j+k] = b[2*j]*f;
673 a[2*j+1+k] = a[2*j-1+k]-b[2*j+1]*f;
674 }
675 if(nn%2) a[nn-1+k] = b[nn-1]*f;
676 }
677 mgl_fft_free_thr(w);
678 delete []b;
679 }
680 return 0;
681 }
mgl_cosy(void * par)682 static void* mgl_cosy(void *par)
683 {
684 mglThreadT *t=(mglThreadT *)par;
685 long nx=t->p[0],ny=t->p[1],nn=ny-1;
686 mreal *a = (mreal*)t->a;
687 #if !MGL_HAVE_PTHREAD
688 #pragma omp parallel
689 #endif
690 {
691 double *b = new double[2*ny], f=sqrt(2./nn);
692 void *w = mgl_fft_alloc_thr(nn);
693 #pragma omp for nowait
694 for(long ii=t->id;ii<t->n;ii+=mglNumThr)
695 {
696 long i = ii%nx, k = ii/nx; memset(b,0,2*ny*sizeof(double));
697 for(long j=0;j<nn;j++) b[2*j]=(a[i+nx*(ny*k+j)]+a[i+nx*(ny*k+nn-j)])*0.5-sin(M_PI*j/nn)*(a[i+nx*(ny*k+j)]-a[i+nx*(ny*k+nn-j)]);
698 mgl_fft(b,1,nn,t->v,w,false);
699 double f1=0.5*(a[i+nx*ny*k]-a[i+nx*(ny*k+nn)]), s=-1;
700 a[i+nx*(ny*k+nn)]=0.5*(a[i+nx*ny*k]+a[i+nx*(ny*k+nn)]*((nn%2)?-1:1));
701 for(long j=1;j<nn;j++)
702 {
703 f1 += a[i+nx*(ny*k+j)]*cos(M_PI*j/nn);
704 a[i+nx*(ny*k+nn)] += a[i+nx*(ny*k+j)]*s; s = -s;
705 }
706 a[i+nx*ny*k]=b[0]*f; a[i+nx*(ny*k+1)]=f1*f; a[i+nx*(ny*k+nn)]*=f; // fill cosfft
707 for(long j=1;j<nn/2;j++)
708 {
709 a[i+nx*(ny*k+2*j)] = b[2*j]*f;
710 a[i+nx*(ny*k+2*j+1)] = a[i+nx*(ny*k+2*j-1)]-b[2*j+1]*f;
711 }
712 if(nn%2) a[i+nx*(ny*k+nn-1)] = b[nn-1]*f;
713 }
714 mgl_fft_free_thr(w);
715 delete []b;
716 }
717 return 0;
718 }
mgl_cosz(void * par)719 static void* mgl_cosz(void *par)
720 {
721 mglThreadT *t=(mglThreadT *)par;
722 long nx=t->p[0],ny=t->p[1],nz=t->p[2],k=nx*ny,nn=nz-1;
723 mreal *a = (mreal*)t->a;
724 #if !MGL_HAVE_PTHREAD
725 #pragma omp parallel
726 #endif
727 {
728 double *b = new double[2*nz], f=sqrt(2./nn);
729 void *w = mgl_fft_alloc_thr(nn);
730 #pragma omp for nowait
731 for(long i=t->id;i<t->n;i+=mglNumThr)
732 {
733 memset(b,0,2*nz*sizeof(double));
734 for(long j=0;j<nn;j++) b[2*j]=(a[i+k*j]+a[i+k*(nn-j)])*0.5-sin(M_PI*j/nn)*(a[i+k*j]-a[i+k*(nn-j)]);
735 mgl_fft(b,1,nn,t->v,w,false);
736 double f1=0.5*(a[i]-a[i+k*nn]), s=-1;
737 a[i+k*nn]=0.5*(a[i]+a[i+k*nn]*((nn%2)?-1:1));
738 for(long j=1;j<nn;j++)
739 {
740 f1 += a[i+k*j]*cos(M_PI*j/nn);
741 a[i+k*nn] += a[i+k*j]*s; s = -s;
742 }
743 a[i]=b[0]*f; a[i+k]=f1*f; a[i+k*nn]*=f; // fill cosfft
744 for(long j=1;j<nn/2;j++)
745 {
746 a[i+k*2*j] = b[2*j]*f;
747 a[i+k*2*j+k] = a[i+k*2*j-k]-b[2*j+1]*f;
748 }
749 if(nn%2) a[i+k*nn-k] = b[nn-1]*f;
750 }
751 mgl_fft_free_thr(w);
752 delete []b;
753 }
754 return 0;
755 }
mgl_data_cosfft(HMDT d,const char * dir)756 void MGL_EXPORT mgl_data_cosfft(HMDT d, const char *dir)
757 {
758 if(!dir || *dir==0) return;
759 bool clear=false;
760 void *wt=0;
761 long nx=d->nx, ny=d->ny, nz=d->nz, par[3]={nx,ny,nz};
762 if(strchr(dir,'x') && nx>1)
763 {
764 if(mgl_fft_data.wnx==nx-1) wt = mgl_fft_data.wtx;
765 else { clear = true; wt = mgl_fft_alloc(nx-1,0,0); }
766 mglStartThreadT(mgl_cosx,ny*nz,d->a,0,wt,0,par);
767 if(mgl_fft_data.wnx==0)
768 { mgl_fft_data.wtx = wt; clear = false; mgl_fft_data.wnx=nx-1; }
769 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
770 }
771 if(strchr(dir,'y') && ny>1)
772 {
773 if(mgl_fft_data.wny==ny-1) wt = mgl_fft_data.wty;
774 else { clear = true; wt = mgl_fft_alloc(ny-1,0,0); }
775 mglStartThreadT(mgl_cosy,nx*nz,d->a,0,wt,0,par);
776 if(mgl_fft_data.wny==0)
777 { mgl_fft_data.wty = wt; clear = false; mgl_fft_data.wny=ny-1; }
778 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
779 }
780 if(strchr(dir,'z') && nz>1)
781 {
782 if(mgl_fft_data.wnz==nz-1) wt = mgl_fft_data.wtz;
783 else { clear = true; wt = mgl_fft_alloc(nz-1,0,0); }
784 mglStartThreadT(mgl_cosz,nx*ny,d->a,0,wt,0,par);
785 if(mgl_fft_data.wnz==0)
786 { mgl_fft_data.wtz = wt; clear = false; mgl_fft_data.wnz=nz-1; }
787 if(clear) { mgl_fft_free(wt,0,0); clear = false; }
788 }
789 }
790 //-----------------------------------------------------------------------------
mgl_datac_cosfft(HADT c,const char * dir)791 void MGL_EXPORT mgl_datac_cosfft(HADT c, const char *dir)
792 {
793 if(!dir || *dir==0) return;
794 mglData re(c->nx, c->ny, c->nz), im(c->nx, c->ny, c->nz);
795 long n = c->GetNN();
796 #pragma omp parallel for
797 for(long i=0;i<n;i++) { re.a[i]=real(c->a[i]); im.a[i]=imag(c->a[i]); }
798 mgl_data_cosfft(&re, dir);
799 mgl_data_cosfft(&im, dir);
800 #pragma omp parallel for
801 for(long i=0;i<n;i++) c->a[i] = dual(re.a[i], im.a[i]);
802 }
803 //-----------------------------------------------------------------------------
mgl_transform_a(HCDT am,HCDT ph,const char * tr)804 HMDT MGL_EXPORT mgl_transform_a(HCDT am, HCDT ph, const char *tr)
805 {
806 long nx = am->GetNx(), ny = am->GetNy(), nz = am->GetNz();
807 if(nx*ny*nz != ph->GetNN() || !tr || tr[0]==0) return 0;
808 mglData re(nx,ny,nz), im(nx,ny,nz);
809 #pragma omp parallel for
810 for(long i=0;i<nx*ny*nz;i++)
811 {
812 mreal a=am->vthr(i), p=ph->vthr(i);
813 re.a[i] = a*cos(p); im.a[i] = a*sin(p);
814 }
815 return mgl_transform(&re, &im, tr);
816 }
817 //-----------------------------------------------------------------------------
mgl_transform(HCDT re,HCDT im,const char * tr)818 HMDT MGL_EXPORT mgl_transform(HCDT re, HCDT im, const char *tr)
819 {
820 if(!tr || *tr==0) return 0;
821 long nx = re->GetNx(), ny = re->GetNy(), nz = re->GetNz();
822 if(nx*ny*nz != im->GetNN() || tr[0]==0) return 0;
823 mglData rr(re),ii(im);
824 if(strchr(tr,'i') && strchr(tr,'f')) // general case
825 {
826 if(tr[0]=='f') mgl_data_fourier(&rr,&ii,"x");
827 if(tr[0]=='i') mgl_data_fourier(&rr,&ii,"xi");
828 if(tr[1]=='f') mgl_data_fourier(&rr,&ii,"y");
829 if(tr[1]=='i') mgl_data_fourier(&rr,&ii,"yi");
830 if(tr[2]=='f') mgl_data_fourier(&rr,&ii,"z");
831 if(tr[2]=='i') mgl_data_fourier(&rr,&ii,"zi");
832 }
833 else if(strchr(tr,'f')) // do Fourier only once for speeding up
834 {
835 char str[4] = " ";
836 if(tr[0]=='f') str[0]='x';
837 if(tr[1]=='f') str[1]='y';
838 if(tr[2]=='f') str[2]='z';
839 mgl_data_fourier(&rr,&ii,str);
840 }
841 else if(strchr(tr,'i')) // do Fourier only once for speeding up
842 {
843 char str[5] = " i";
844 if(tr[0]=='i') str[0]='x';
845 if(tr[1]=='i') str[1]='y';
846 if(tr[2]=='i') str[2]='z';
847 mgl_data_fourier(&rr,&ii,str);
848 }
849 else if(strchr(tr,'s')) // do Fourier only once for speeding up
850 {
851 if(tr[0]=='s') { rr.SinFFT("x"); ii.SinFFT("x"); }
852 if(tr[1]=='s') { rr.SinFFT("y"); ii.SinFFT("y"); }
853 if(tr[2]=='s') { rr.SinFFT("z"); ii.SinFFT("z"); }
854 }
855 else if(strchr(tr,'c')) // do Fourier only once for speeding up
856 {
857 if(tr[0]=='c') { rr.CosFFT("x"); ii.CosFFT("x"); }
858 if(tr[1]=='c') { rr.CosFFT("y"); ii.CosFFT("y"); }
859 if(tr[2]=='c') { rr.CosFFT("z"); ii.CosFFT("z"); }
860 }
861 else if(strchr(tr,'h')) // do Fourier only once for speeding up
862 {
863 if(tr[0]=='h') { rr.Hankel("x"); ii.Hankel("x"); }
864 if(tr[1]=='h') { rr.Hankel("y"); ii.Hankel("y"); }
865 if(tr[2]=='h') { rr.Hankel("z"); ii.Hankel("z"); }
866 }
867 mglData *d = new mglData(nx, ny, nz);
868 #pragma omp parallel for
869 for(long i=0;i<nx*ny*nz;i++) d->a[i] = hypot(rr.a[i],ii.a[i]);
870 return d;
871 }
872 //-----------------------------------------------------------------------------
mgl_transform_a_(uintptr_t * am,uintptr_t * ph,const char * tr,int l)873 uintptr_t MGL_EXPORT mgl_transform_a_(uintptr_t *am, uintptr_t *ph, const char *tr, int l)
874 { char *s=new char[l+1]; memcpy(s,tr,l); s[l]=0;
875 uintptr_t res = uintptr_t(mgl_transform_a(_DA_(am),_DA_(ph),s));
876 delete []s; return res; }
mgl_transform_(uintptr_t * re,uintptr_t * im,const char * tr,int l)877 uintptr_t MGL_EXPORT mgl_transform_(uintptr_t *re, uintptr_t *im, const char *tr, int l)
878 { char *s=new char[l+1]; memcpy(s,tr,l); s[l]=0;
879 uintptr_t res = uintptr_t(mgl_transform(_DA_(re),_DA_(im),s));
880 delete []s; return res; }
881 //-----------------------------------------------------------------------------
mgl_data_envelop_(uintptr_t * d,const char * dir,int)882 void MGL_EXPORT mgl_data_envelop_(uintptr_t *d, const char *dir, int)
883 { mgl_data_envelop(_DT_,*dir); }
mgl_datac_envelop_(uintptr_t * d,const char * dir,int)884 void MGL_EXPORT mgl_datac_envelop_(uintptr_t *d, const char *dir, int)
885 { mgl_datac_envelop(_DC_,*dir); }
886 //-----------------------------------------------------------------------------
887 #if MGL_HAVE_GSL
mgl_chnkx(void * par)888 static void* mgl_chnkx(void *par)
889 {
890 mglThreadT *t=(mglThreadT *)par;
891 long nx=t->p[0];
892 dual *a = (dual*)t->a;
893 const gsl_dht *dht = (const gsl_dht*)t->v;
894 double mm = gsl_sf_bessel_zero_J0(nx+1);
895
896 #if !MGL_HAVE_PTHREAD
897 #pragma omp parallel
898 #endif
899 {
900 double *b = new double[3*nx];
901 #pragma omp for nowait
902 for(long i=t->id;i<t->n;i+=mglNumThr)
903 {
904 for(long j=0;j<nx;j++) b[j] = real(a[j+nx*i]);
905 gsl_dht_apply(dht,b,b+nx);
906 for(long j=0;j<nx;j++) b[j] = imag(a[j+nx*i]);
907 gsl_dht_apply(dht,b,b+2*nx);
908 for(long j=0;j<nx;j++) a[j+nx*i] = dual(b[j+nx]*mm,b[j+2*nx]*mm);
909 }
910 delete []b;
911 }
912 return 0;
913 }
mgl_chnky(void * par)914 static void* mgl_chnky(void *par)
915 {
916 mglThreadT *t=(mglThreadT *)par;
917 long nx=t->p[0],ny=t->p[1];
918 dual *a = (dual*)t->a;
919 const gsl_dht *dht = (const gsl_dht*)t->v;
920 double mm = gsl_sf_bessel_zero_J0(ny+1);
921
922 #if !MGL_HAVE_PTHREAD
923 #pragma omp parallel
924 #endif
925 {
926 double *b = new double[3*ny];
927 #pragma omp for nowait
928 for(long ii=t->id;ii<t->n;ii+=mglNumThr)
929 {
930 long i = ii%nx, k = ii/nx;
931 for(long j=0;j<ny;j++) b[j] = real(a[i+nx*(j+ny*k)]);
932 gsl_dht_apply(dht,b,b+ny);
933 for(long j=0;j<ny;j++) b[j] = imag(a[i+nx*(j+ny*k)]);
934 gsl_dht_apply(dht,b,b+2*ny);
935 for(long j=0;j<ny;j++) a[i+nx*(j+ny*k)] = dual(b[j+ny]*mm,b[j+2*ny]*mm);
936 }
937 delete []b;
938 }
939 return 0;
940 }
mgl_chnkz(void * par)941 static void* mgl_chnkz(void *par)
942 {
943 mglThreadT *t=(mglThreadT *)par;
944 long k=t->p[0]*t->p[1],nz=t->p[2];
945 dual *a = (dual*)t->a;
946 const gsl_dht *dht = (const gsl_dht*)t->v;
947 double mm = gsl_sf_bessel_zero_J0(nz+1);
948
949 #if !MGL_HAVE_PTHREAD
950 #pragma omp parallel
951 #endif
952 {
953 double *b = new double[3*nz];
954 #pragma omp for nowait
955 for(long i=t->id;i<t->n;i+=mglNumThr)
956 {
957 for(long j=0;j<nz;j++) b[j] = real(a[i+j*k]);
958 gsl_dht_apply(dht,b,b+nz);
959 for(long j=0;j<nz;j++) b[j] = imag(a[i+j*k]);
960 gsl_dht_apply(dht,b,b+2*nz);
961 for(long j=0;j<nz;j++) a[i+j*k] = dual(b[j+nz]*mm,b[j+2*nz]*mm);
962 }
963 delete []b;
964 }
965 return 0;
966 }
mgl_datac_hankel(HADT d,const char * dir)967 void MGL_EXPORT mgl_datac_hankel(HADT d, const char *dir)
968 {
969 if(!dir || *dir==0) return;
970 gsl_dht *dht=0;
971 bool clear = false;
972 long nx=d->nx, ny=d->ny, nz=d->nz;
973 long par[3]={nx,ny,nz};
974 if(strchr(dir,'x') && nx>1)
975 {
976 if(mgl_fft_data.hnx==nx) dht = (gsl_dht *)mgl_fft_data.htx;
977 else { dht = gsl_dht_new(nx,0,1); clear = true; }
978 mglStartThreadT(mgl_chnkx,ny*nz,d->a,0,dht,0,par);
979 if(mgl_fft_data.hnx==0)
980 { mgl_fft_data.htx = dht; clear = false; mgl_fft_data.hnx=nx; }
981 }
982 if(strchr(dir,'y') && ny>1)
983 {
984 if(mgl_fft_data.hny==ny) dht = (gsl_dht *)mgl_fft_data.hty;
985 else { dht = gsl_dht_new(ny,0,1); clear = true; }
986 mglStartThreadT(mgl_chnky,nx*nz,d->a,0,dht,0,par);
987 if(mgl_fft_data.hny==0)
988 { mgl_fft_data.hty = dht; clear = false; mgl_fft_data.hny=ny; }
989 }
990 if(strchr(dir,'z') && nz>1)
991 {
992 if(mgl_fft_data.hnz==nz) dht = (gsl_dht *)mgl_fft_data.htz;
993 else { dht = gsl_dht_new(nz,0,1); clear = true; }
994 mglStartThreadT(mgl_chnkz,nx*ny,d->a,0,dht,0,par);
995 if(mgl_fft_data.hnz==0)
996 { mgl_fft_data.htz = dht; clear = false; mgl_fft_data.hnz=nz; }
997 }
998 if(clear) gsl_dht_free(dht);
999 }
1000 #else
mgl_datac_hankel(HADT,const char *)1001 void MGL_EXPORT mgl_datac_hankel(HADT , const char *){}
1002 #endif
mgl_datac_hankel_(uintptr_t * d,const char * dir,int l)1003 void MGL_EXPORT mgl_datac_hankel_(uintptr_t *d, const char *dir,int l)
1004 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1005 mgl_datac_hankel(_DC_,s); delete []s; }
1006 //-----------------------------------------------------------------------------
1007 #if MGL_HAVE_GSL
mgl_hnkx(void * par)1008 static void* mgl_hnkx(void *par)
1009 {
1010 mglThreadT *t=(mglThreadT *)par;
1011 long nx=t->p[0];
1012 mreal *a = (mreal*)t->a;
1013 const gsl_dht *dht = (const gsl_dht*)t->v;
1014 double mm = gsl_sf_bessel_zero_J0(nx+1);
1015
1016 #if !MGL_HAVE_PTHREAD
1017 #pragma omp parallel
1018 #endif
1019 {
1020 double *b = new double[2*nx];
1021 #pragma omp for nowait
1022 for(long i=t->id;i<t->n;i+=mglNumThr)
1023 {
1024 for(long j=0;j<nx;j++) b[j] = a[j+nx*i];
1025 gsl_dht_apply(dht,b,b+nx);
1026 for(long j=0;j<nx;j++) a[j+nx*i] = b[j+nx]*mm;
1027 }
1028 delete []b;
1029 }
1030 return 0;
1031 }
mgl_hnky(void * par)1032 static void* mgl_hnky(void *par)
1033 {
1034 mglThreadT *t=(mglThreadT *)par;
1035 long nx=t->p[0],ny=t->p[1];
1036 mreal *a = (mreal*)t->a;
1037 const gsl_dht *dht = (const gsl_dht*)t->v;
1038 double mm = gsl_sf_bessel_zero_J0(ny+1);
1039
1040 #if !MGL_HAVE_PTHREAD
1041 #pragma omp parallel
1042 #endif
1043 {
1044 double *b = new double[2*ny];
1045 #pragma omp for nowait
1046 for(long ii=t->id;ii<t->n;ii+=mglNumThr)
1047 {
1048 long i = ii%nx, k = ii/nx;
1049 for(long j=0;j<ny;j++) b[j] = a[i+nx*(j+ny*k)];
1050 gsl_dht_apply(dht,b,b+ny);
1051 for(long j=0;j<ny;j++)a[i+nx*(j+ny*k)] = b[j+ny]*mm;
1052 }
1053 delete []b;
1054 }
1055 return 0;
1056 }
mgl_hnkz(void * par)1057 static void* mgl_hnkz(void *par)
1058 {
1059 mglThreadT *t=(mglThreadT *)par;
1060 long k=t->p[0]*t->p[1],nz=t->p[2];
1061 mreal *a = (mreal*)t->a;
1062 const gsl_dht *dht = (const gsl_dht*)t->v;
1063 double mm = gsl_sf_bessel_zero_J0(nz+1);
1064
1065 #if !MGL_HAVE_PTHREAD
1066 #pragma omp parallel
1067 #endif
1068 {
1069 double *b = new double[2*nz];
1070 #pragma omp for nowait
1071 for(long i=t->id;i<t->n;i+=mglNumThr)
1072 {
1073 for(long j=0;j<nz;j++) b[j] = a[i+j*k];
1074 gsl_dht_apply(dht,b,b+nz);
1075 for(long j=0;j<nz;j++) a[i+j*k] = b[j+nz]*mm;
1076 }
1077 delete []b;
1078 }
1079 return 0;
1080 }
mgl_data_hankel(HMDT d,const char * dir)1081 void MGL_EXPORT mgl_data_hankel(HMDT d, const char *dir)
1082 {
1083 if(!dir || *dir==0) return;
1084 bool clear = false;
1085 gsl_dht *dht=0;
1086 long nx=d->nx, ny=d->ny, nz=d->nz;
1087 long par[3]={nx,ny,nz};
1088 if(strchr(dir,'x') && nx>1)
1089 {
1090 if(mgl_fft_data.hnx==nx) dht = (gsl_dht *)mgl_fft_data.htx;
1091 else { dht = gsl_dht_new(nx,0,1); clear = true; }
1092 mglStartThreadT(mgl_hnkx,ny*nz,d->a,0,dht,0,par);
1093 if(mgl_fft_data.hnx==0)
1094 { mgl_fft_data.htx = dht; clear = false; mgl_fft_data.hnx=nx; }
1095 }
1096 if(strchr(dir,'y') && ny>1)
1097 {
1098 if(mgl_fft_data.hny==ny) dht = (gsl_dht *)mgl_fft_data.hty;
1099 else { dht = gsl_dht_new(ny,0,1); clear = true; }
1100 mglStartThreadT(mgl_hnky,nx*nz,d->a,0,dht,0,par);
1101 if(mgl_fft_data.hny==0)
1102 { mgl_fft_data.hty = dht; clear = false; mgl_fft_data.hny=ny; }
1103 }
1104 if(strchr(dir,'z') && nz>1)
1105 {
1106 if(mgl_fft_data.hnz==nz) dht = (gsl_dht *)mgl_fft_data.htz;
1107 else { dht = gsl_dht_new(nz,0,1); clear = true; }
1108 mglStartThreadT(mgl_hnkz,nx*ny,d->a,0,dht,0,par);
1109 if(mgl_fft_data.hnz==0)
1110 { mgl_fft_data.htz = dht; clear = false; mgl_fft_data.hnz=nz; }
1111 }
1112 if(clear) gsl_dht_free(dht);
1113 }
1114 #else
mgl_data_hankel(HMDT,const char *)1115 void MGL_EXPORT mgl_data_hankel(HMDT , const char *){}
1116 #endif
mgl_data_hankel_(uintptr_t * d,const char * dir,int l)1117 void MGL_EXPORT mgl_data_hankel_(uintptr_t *d, const char *dir,int l)
1118 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1119 mgl_data_hankel(_DT_,s); delete []s; }
1120 //-----------------------------------------------------------------------------
mgl_data_fill_sample(HMDT d,const char * how)1121 void MGL_EXPORT mgl_data_fill_sample(HMDT d, const char *how)
1122 {
1123 if(!how || *how==0) return;
1124 bool kk = mglchr(how,'k');
1125 long n=d->nx,dn=1;
1126 mreal *aa=d->a;
1127 if(mglchr(how,'y')) { n=d->ny; dn=d->nx; }
1128 if(mglchr(how,'z')) { n=d->nz; dn=d->nx*d->ny; }
1129 if(mglchr(how,'h')) // Hankel
1130 {
1131 #if MGL_HAVE_GSL
1132 gsl_dht *dht = gsl_dht_new(n,0,1);
1133 #pragma omp parallel for
1134 for(long i=0;i<n;i++)
1135 aa[i*dn] = kk ? gsl_dht_k_sample(dht, i) : gsl_dht_x_sample(dht, i);
1136 gsl_dht_free(dht);
1137 #endif
1138 }
1139 else // Fourier
1140 {
1141 if(kk) for(long i=0;i<n;i++) aa[i*dn] = M_PI*(i<n/2 ? i:i-n);
1142 else for(long i=0;i<n;i++) aa[i*dn] = mreal(2*i-n)/n;
1143 }
1144 #pragma omp parallel for
1145 for(long i=0;i<d->GetNN();i++) aa[i] = aa[((i%(n*dn))/dn)*dn];
1146 }
mgl_data_fill_sample_(uintptr_t * d,const char * how,int l)1147 void MGL_EXPORT mgl_data_fill_sample_(uintptr_t *d, const char *how,int l)
1148 { char *s=new char[l+1]; memcpy(s,how,l); s[l]=0;
1149 mgl_data_fill_sample(_DT_,s); delete []s; }
1150 //-----------------------------------------------------------------------------
mgl_datac_fft_(uintptr_t * d,const char * dir,int l)1151 void MGL_EXPORT mgl_datac_fft_(uintptr_t *d, const char *dir, int l)
1152 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1153 mgl_datac_fft(_DC_,s); delete []s; }
mgl_data_fourier_(uintptr_t * re,uintptr_t * im,const char * dir,int l)1154 void MGL_EXPORT mgl_data_fourier_(uintptr_t *re, uintptr_t *im, const char *dir, int l)
1155 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1156 mgl_data_fourier(_DM_(re),_DM_(im),s); delete []s; }
mgl_data_stfa_(uintptr_t * re,uintptr_t * im,int * dn,char * dir,int)1157 uintptr_t MGL_EXPORT mgl_data_stfa_(uintptr_t *re, uintptr_t *im, int *dn, char *dir, int)
1158 { return uintptr_t(mgl_data_stfa(_DA_(re),_DA_(im),*dn,*dir)); }
mgl_data_cosfft_(uintptr_t * d,const char * dir,int l)1159 void MGL_EXPORT mgl_data_cosfft_(uintptr_t *d, const char *dir,int l)
1160 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1161 mgl_data_cosfft(_DT_,s); delete []s; }
mgl_data_sinfft_(uintptr_t * d,const char * dir,int l)1162 void MGL_EXPORT mgl_data_sinfft_(uintptr_t *d, const char *dir,int l)
1163 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1164 mgl_data_sinfft(_DT_,s); delete []s; }
mgl_datac_cosfft_(uintptr_t * d,const char * dir,int l)1165 void MGL_EXPORT mgl_datac_cosfft_(uintptr_t *d, const char *dir,int l)
1166 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1167 mgl_datac_cosfft(_DC_,s); delete []s; }
mgl_datac_sinfft_(uintptr_t * d,const char * dir,int l)1168 void MGL_EXPORT mgl_datac_sinfft_(uintptr_t *d, const char *dir,int l)
1169 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1170 mgl_datac_sinfft(_DC_,s); delete []s; }
1171 //-----------------------------------------------------------------------------
1172 /*static void* mgl_cor(void *par)
1173 {
1174 mglThreadC *t=(mglThreadC *)par;
1175 dual *a = t->a;
1176 const dual *b = t->b;
1177 #if !MGL_HAVE_PTHREAD
1178 #pragma omp parallel
1179 #endif
1180 for(long i=t->id;i<t->n;i+=mglNumThr) a[i] *= conj(b[i]);
1181 return 0;
1182 }*/
mgl_datac_correl(HCDT d1,HCDT d2,const char * dir)1183 HADT MGL_EXPORT mgl_datac_correl(HCDT d1, HCDT d2, const char *dir)
1184 {
1185 if(!dir || *dir==0) return 0;
1186 if(d2==NULL) d2=d1;
1187 long nx = d1->GetNx(), ny = d1->GetNy(), nz = d1->GetNz();
1188 if(nx*ny*nz!=d2->GetNN()) return 0;
1189 std::string dirs;
1190 if(strchr(dir,'x') && nx>1) dirs += 'x';
1191 if(strchr(dir,'y') && ny>1) dirs += 'y';
1192 if(strchr(dir,'z') && nz>1) dirs += 'z';
1193 if(dirs.empty()) return 0;
1194 mglDataC *a = new mglDataC(d1), *b=a; a->FFT(dirs.c_str());
1195 if(d1!=d2)
1196 { b = new mglDataC(d2); b->FFT(dirs.c_str()); }
1197 // mglStartThreadC(mgl_cor,0,nx*ny*nz,a->a,b->a); // TODO: sth strange
1198 #pragma omp parallel for
1199 for(long i=0;i<nx*ny*nz;i++) a->a[i] *= conj(b->a[i]);
1200 dirs += 'i'; a->FFT(dirs.c_str());
1201 if(d1!=d2) delete b;
1202 return a;
1203 }
mgl_datac_correl_(uintptr_t * d1,uintptr_t * d2,const char * dir,int l)1204 uintptr_t MGL_EXPORT mgl_datac_correl_(uintptr_t *d1, uintptr_t *d2, const char *dir,int l)
1205 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1206 uintptr_t res = uintptr_t(mgl_datac_correl(_DA_(d1),_DA_(d2),s));
1207 delete []s; return res; }
1208 //-----------------------------------------------------------------------------
mgl_data_correl(HCDT d1,HCDT d2,const char * dir)1209 HMDT MGL_EXPORT mgl_data_correl(HCDT d1, HCDT d2, const char *dir)
1210 {
1211 HADT a = mgl_datac_correl(d1,d2,dir); // NOTE: this is not so effective but straightforward way
1212 if(!a) return 0;
1213 const long nx = d1->GetNx(), ny = d1->GetNy(), nz = d1->GetNz();
1214 mglData *res = new mglData(nx,ny,nz);
1215 #pragma omp parallel for
1216 for(long i=0;i<nx*ny*nz;i++) res->a[i] = real(a->a[i]);
1217 delete a; return res;
1218 }
mgl_data_correl_(uintptr_t * d1,uintptr_t * d2,const char * dir,int l)1219 uintptr_t MGL_EXPORT mgl_data_correl_(uintptr_t *d1, uintptr_t *d2, const char *dir,int l)
1220 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1221 uintptr_t res = uintptr_t(mgl_data_correl(_DA_(d1),_DA_(d2),s));
1222 delete []s; return res; }
1223 //-----------------------------------------------------------------------------
mgl_data_wavelet(HMDT dat,const char * how,int k)1224 void MGL_EXPORT mgl_data_wavelet(HMDT dat, const char *how, int k)
1225 {
1226 #if MGL_HAVE_GSL
1227 gsl_wavelet *w=0;
1228 if(mglchr(how,'d')) w = gsl_wavelet_alloc(gsl_wavelet_daubechies, k);
1229 else if(mglchr(how,'D')) w = gsl_wavelet_alloc(gsl_wavelet_daubechies_centered, k);
1230 else if(mglchr(how,'h')) w = gsl_wavelet_alloc(gsl_wavelet_haar, k);
1231 else if(mglchr(how,'H')) w = gsl_wavelet_alloc(gsl_wavelet_haar_centered, k);
1232 else if(mglchr(how,'b')) w = gsl_wavelet_alloc(gsl_wavelet_bspline, k);
1233 else if(mglchr(how,'B')) w = gsl_wavelet_alloc(gsl_wavelet_bspline_centered, k);
1234 if(!w) return;
1235
1236 double *a;
1237 #if MGL_USE_DOUBLE
1238 a = dat->a;
1239 #else
1240 long nn = dat->GetNN();
1241 a = new double[nn];
1242 #pragma omp parallel for
1243 for(long i=0;i<nn;i++) a[i] = dat->a[i];
1244 #endif
1245 if(mglchr(how,'x'))
1246 #pragma omp parallel
1247 {
1248 long n = dat->nx;
1249 gsl_wavelet_workspace *work = gsl_wavelet_workspace_alloc(n);
1250 if(mglchr(how,'i'))
1251 #pragma omp for
1252 for(long i=0;i<dat->ny*dat->nz;i++)
1253 gsl_wavelet_transform_inverse(w, a+i*n, 1, n, work);
1254 else
1255 #pragma omp for
1256 for(long i=0;i<dat->ny*dat->nz;i++)
1257 gsl_wavelet_transform_forward(w, a+i*n, 1, n, work);
1258 gsl_wavelet_workspace_free(work);
1259 }
1260 if(mglchr(how,'y'))
1261 #pragma omp parallel
1262 {
1263 long n = dat->ny, s = dat->nx;
1264 gsl_wavelet_workspace *work = gsl_wavelet_workspace_alloc(n);
1265 if(mglchr(how,'i'))
1266 #pragma omp for collapse(2)
1267 for(long j=0;j<dat->nz;j++) for(long i=0;i<dat->nx;i++)
1268 gsl_wavelet_transform_inverse(w, a+i+n*s*j, s, n, work);
1269 else
1270 #pragma omp for collapse(2)
1271 for(long j=0;j<dat->nz;j++) for(long i=0;i<dat->nx;i++)
1272 gsl_wavelet_transform_forward(w, a+i+n*s*j, s, n, work);
1273 gsl_wavelet_workspace_free(work);
1274 }
1275 if(mglchr(how,'z'))
1276 #pragma omp parallel
1277 {
1278 long n = dat->nz, s = dat->nx*dat->ny;
1279 gsl_wavelet_workspace *work = gsl_wavelet_workspace_alloc(n);
1280 if(mglchr(how,'i'))
1281 #pragma omp for
1282 for(long i=0;i<dat->nx*dat->ny;i++)
1283 gsl_wavelet_transform_inverse(w, a+i, s, n, work);
1284 else
1285 #pragma omp for
1286 for(long i=0;i<dat->nx*dat->ny;i++)
1287 gsl_wavelet_transform_forward(w, a+i, s, n, work);
1288 gsl_wavelet_workspace_free(work);
1289 }
1290 #if !MGL_USE_DOUBLE
1291 #pragma omp parallel for
1292 for(long i=0;i<nn;i++) dat->a[i] = a[i];
1293 delete []a;
1294 #endif
1295 gsl_wavelet_free (w);
1296 #endif
1297 }
mgl_data_wavelet_(uintptr_t * d,const char * dir,int * k,int l)1298 void MGL_EXPORT mgl_data_wavelet_(uintptr_t *d, const char *dir, int *k,int l)
1299 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1300 mgl_data_wavelet(_DT_,s,*k); delete []s; }
1301 //-----------------------------------------------------------------------------
mgl_datac_wavelet(HADT c,const char * how,int k)1302 void MGL_EXPORT mgl_datac_wavelet(HADT c, const char *how, int k)
1303 {
1304 mglData re(c->nx, c->ny, c->nz), im(c->nx, c->ny, c->nz);
1305 long n = c->GetNN();
1306 #pragma omp parallel for
1307 for(long i=0;i<n;i++) { re.a[i]=real(c->a[i]); im.a[i]=imag(c->a[i]); }
1308 mgl_data_wavelet(&re, how, k);
1309 mgl_data_wavelet(&im, how, k);
1310 #pragma omp parallel for
1311 for(long i=0;i<n;i++) c->a[i] = dual(re.a[i], im.a[i]);
1312 }
mgl_datac_wavelet_(uintptr_t * d,const char * dir,int * k,int l)1313 void MGL_EXPORT mgl_datac_wavelet_(uintptr_t *d, const char *dir, int *k,int l)
1314 { char *s=new char[l+1]; memcpy(s,dir,l); s[l]=0;
1315 mgl_datac_wavelet(_DC_,s,*k); delete []s; }
1316 //-----------------------------------------------------------------------------
1317