1 /***************************************************************************
2 * pde.cpp is part of Math Graphic Library
3 * Copyright (C) 2007-2016 Alexey Balakin <mathgl.abalakin@gmail.ru> *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as *
7 * published by the Free Software Foundation; either version 3 of the *
8 * License, or (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU Lesser General Public *
16 * License along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "mgl2/data.h"
21 #include "mgl2/datac.h"
22 #include "mgl2/eval.h"
23 #include "mgl2/thread.h"
24 #include "mgl2/base.h"
25 #include "interp.hpp"
26 const double GAMMA=0.1; ///< value for damping
27 //-----------------------------------------------------------------------------
28 //
29 // Advanced PDE series in 2D case
30 //
31 //-----------------------------------------------------------------------------
mgl_operator_exp(long n,const dual * h,dual * a,dual * f)32 void static mgl_operator_exp(long n, const dual *h, dual *a, dual *f)
33 {
34 memset(f,0,2*n*sizeof(dual));
35 const long i1=n/2, i2=3*n/2-1;
36 #pragma omp parallel for
37 for(long j=0;j<n;j++)
38 {
39 long jp = (j+1)%n;
40 dual h1=h[n*j]*dual(0,1), g1=(h1+h[n*jp]*dual(0,1))/mreal(2);
41 dual h2=h[n-1+n*j]*dual(0,1), g2=(h2+h[n-1+n*jp]*dual(0,1))/mreal(2);
42 mreal k1=M_PI*2*j/n, k2 = M_PI*(2*j+1)/n;
43 for(long i=0;i<i1;i++)
44 {
45 f[2*j] += a[i]*exp(h1+dual(0,i*k1));
46 f[2*j+1] += a[i]*exp(g1+dual(0,i*k2));
47 }
48 for(long i=i1;i<i2;i++)
49 {
50 dual hh = h[i-i1+n*j];
51 f[2*j] += a[i]*exp(hh*dual(0,1)+dual(0,i*k1));
52 f[2*j+1] += a[i]*exp((hh+h[i-i1+n*jp])*dual(0,0.5)+dual(0,i*k2));
53 }
54 for(long i=i2;i<2*n;i++)
55 {
56 f[2*j] += a[i]*exp(h2+dual(0,i*k1));
57 f[2*j+1] += a[i]*exp(g2+dual(0,i*k2));
58 }
59 }
60 memset(a,0,2*n*sizeof(dual));
61 #pragma omp parallel for
62 for(long i=0;i<2*n;i++)
63 {
64 long ii=i-i1;
65 if(ii<0) ii=0;
66 if(ii>n-1) ii=n-1;
67 double kk=M_PI*2*i/n;
68 for(long j=0;j<n;j++)
69 {
70 dual h1 = h[ii+n*j], g1 = (h1+h[ii+n*((j+1)%n)])*dual(0,0.5);
71 a[i] += f[2*j]*exp(h1*dual(0,1)-dual(0,kk*j));
72 a[i] += f[2*j+1]*exp(g1-dual(0,kk*(j+0.5)));
73 }
74 }
75 }
76 //-----------------------------------------------------------------------------
mgl_operator_lin(long n,mreal * h,dual * a,dual * f,dual * g,dual * o,const dual * iexp)77 void static mgl_operator_lin(long n, mreal *h, dual *a, dual *f, dual *g, dual *o, const dual *iexp)
78 {
79 memset(f,0,2*n*sizeof(dual));
80 memset(g,0,2*n*sizeof(dual));
81 const long i1=n/2, i2=3*n/2-1;
82 #pragma omp parallel for
83 for(long j=0;j<n;j++)
84 {
85 long jp = (j+1)%n;
86 mreal h1=tanh(h[n*j]), g1=(h1+tanh(h[n*jp]))/2;
87 mreal h2=tanh(h[n-1+n*j]), g2=(h2+tanh(h[n-1+n*jp]))/2;
88 const dual *ie1=iexp+4*n*j, *ie2=iexp+2*n*(2*j+1);
89 for(long i=0;i<i1;i++)
90 {
91 dual e1=ie1[i], e2=ie2[i];
92 f[2*j] += a[i]*h1*e1; f[2*j+1] += a[i]*g1*e2;
93 g[2*j] += a[i]*e1; g[2*j+1] += a[i]*e2;
94 }
95 for(long i=i1;i<i2;i++)
96 {
97 mreal hh = tanh(h[i-i1+n*j]);
98 mreal gg = (hh+tanh(h[i-i1+n*jp]))/2;
99 dual e1=ie1[i], e2=ie2[i];
100 f[2*j] += a[i]*hh*e1; f[2*j+1] += a[i]*gg*e2;
101 g[2*j] += a[i]*e1; g[2*j+1] += a[i]*e2;
102 }
103 for(long i=i2;i<2*n;i++)
104 {
105 dual e1=ie1[i], e2=ie2[i];
106 f[2*j] += a[i]*h2*e1; f[2*j+1] += a[i]*g2*e2;
107 g[2*j] += a[i]*e1; g[2*j+1] += a[i]*e2;
108 }
109 }
110 memset(o,0,2*n*sizeof(dual));
111 #pragma omp parallel for
112 for(long i=0;i<2*n;i++)
113 {
114 long ii=i-i1;
115 if(ii<0) ii=0;
116 if(ii>n-1) ii=n-1;
117 const dual *ie1=iexp+2*n*i;
118 // double kk=M_PI*2*i/n;
119 for(long j=0;j<n;j++)
120 {
121 mreal h1 = tanh(h[ii+n*j]);
122 mreal g1 = (h1+tanh(h[ii+n*((j+1)%n)]))/2;
123 dual e1=conj(ie1[2*j]), e2=conj(ie1[2*j+1]);
124 o[i] += f[2*j]*e1 + f[2*j+1]*e2;
125 o[i] += g[2*j]*h1*e1 + g[2*j+1]*g1*e2;
126 }
127 }
128 }
129 //-----------------------------------------------------------------------------
mgl_apde_calc_ham(HMDT hs,bool old,const char * func,std::vector<mglDataA * > list,const mreal dd)130 HADT static mgl_apde_calc_ham(HMDT hs, bool old, const char *func, std::vector<mglDataA*> list, const mreal dd)
131 {
132 HADT ham = mglFormulaCalcC(func, list); mgl_datac_mul_num(ham,dd);
133 const long nx = ham->nx;
134 if(old)
135 {
136 mreal hh = ham->Imag().Minimal();
137 if(hh>0) hh=0;
138 #pragma omp parallel for
139 for(long i=0;i<nx*nx;i++)
140 {
141 hs->a[i] = sqrt(imag(ham->a[i])-hh); // non-additive term
142 ham->a[i] = dual(real(ham->a[i]),hh); // additive terms
143 }
144 }
145 else
146 {
147 mglData xIm(nx), pIm(nx);
148 #pragma omp parallel for
149 for(long i=0;i<nx;i++) // first find minimal values along x and p
150 {
151 dual *ax=ham->a+i, *ay=ham->a+i*nx;
152 mreal mx=imag(ax[0]), my=imag(ay[0]);
153 for(long j=1;j<nx;j++) my = (my<imag(ay[j]))?my:imag(ay[j]);
154 for(long j=1;j<nx;j++) mx = (mx<imag(ax[j*nx]))?mx:imag(ax[j*nx]);
155 xIm.a[i] = mx; pIm.a[i]=my;
156 }
157 mreal mIm=xIm.a[0]; mreal *aa=xIm.a; // global minimum
158 for(long j=1;j<nx;j++) mIm = (mIm<aa[j])?mIm:aa[j];
159 #pragma omp parallel for collapse(2)
160 for(long j=0;j<nx;j++) for(long i=0;i<nx;i++)
161 {
162 mreal hh = xIm.a[i]+pIm.a[j]-mIm;
163 long i0=i+nx*j;
164 hs->a[i0] = sqrt(fabs(imag(ham->a[i0])-hh)); // non-additive term. NOTE: fabs() guarantee absence of negative values due to rounding error
165 ham->a[i0] = dual(real(ham->a[i0]),hh); // additive terms
166 }
167 }
168 return ham;
169 }
170 //-----------------------------------------------------------------------------
171 // Solve equation dx/dy = func(p,x,y,|u|)[u] where p=d/dx. There are no assumptions about form of func().
mgl_pde_adv_c(HMGL gr,const char * func,HCDT ini_re,HCDT ini_im,mreal dt,mreal k0,const char * opt)172 HADT MGL_EXPORT mgl_pde_adv_c(HMGL gr, const char *func, HCDT ini_re, HCDT ini_im, mreal dt, mreal k0, const char *opt)
173 {
174 mreal gamma = gr->SaveState(opt); if(mgl_isnan(gamma)) gamma = 20;
175 const mglPoint &Min=gr->Min, &Max=gr->Max;
176 const long nx=ini_re->GetNx(), nt = long((Max.y-Min.y)/dt)+1;
177 if(nx<2 || nt<2 || Max.x==Min.x){ gr->SetWarn(mglWarnLow,"PDE"); return 0; } // Too small data
178 if(ini_im->GetNx() != nx) { gr->SetWarn(mglWarnDim,"PDE"); return 0; } // Wrong dimensions
179
180 mglDataC *res=new mglDataC(nx, nt);
181 mglData hIm(nx,nx); // for advanced damping calculation
182 mglDataC u(nx); u.Name(L"u");
183 mglDataV x(nx,nx), y(nx,nx), r(nx,nx);
184 mglDataW p(nx,nx); p.Name(L"p");
185 bool old = func[0]==';'; if(old) func=func+1;
186 x.Name(L"x"); y.Name(L"y"); r.Name(L"#$mgl");
187 const mreal dp = 2*M_PI/(Max.x-Min.x), dd = k0*dt/2;
188 x.Fill(Min.x,Max.x,'x'); p.Freq(dp/k0,'y');
189 std::vector<mglDataA*> list;
190 list.push_back(&x); list.push_back(&y); list.push_back(&p); list.push_back(&r); list.push_back(&u);
191
192 dual *a = new dual[2*nx]; memset(a,0,2*nx*sizeof(dual)); // Add "damping" area
193 dual *f = new dual[6*nx], *g=f+2*nx, *s=f+4*nx;
194 #pragma omp parallel for
195 for(long i=0;i<nx;i++) // Initial conditions
196 a[i+nx/2] = dual(ini_re->v(i), ini_im->v(i));
197 double *dmp = new double[2*nx]; memset(dmp,0,2*nx*sizeof(double));
198 #pragma omp parallel for
199 for(long i=0;i<2*nx;i++) // dumping
200 {
201 if(i<nx/2) dmp[i] += gamma*mgl_ipow((nx/2-i)/mreal(nx/2),2);
202 if(i>3*nx/2) dmp[i] += gamma*mgl_ipow((i-3*nx/2-1)/mreal(nx/2),2);
203 }
204 bool have_y = mglchr(func,'y');
205 HADT ham;
206 if(!have_y) ham = mgl_apde_calc_ham(&hIm, old, func, list, dd);
207 dual *iexp = new dual[4*nx*nx];
208 #pragma omp parallel for collapse(2)
209 for(long j=0;j<2*nx;j++) for(long i=0;i<2*nx;i++)
210 iexp[i+2*nx*j] = exp(dual(0,(M_PI*i*j)/nx));
211 for(long k=0;k<nt;k++)
212 {
213 memcpy(u.a,a+nx/2,nx*sizeof(dual));
214 memcpy(res->a+k*nx,a+nx/2,nx*sizeof(dual));
215 if(have_y)
216 { y.Fill(k*dt); ham = mgl_apde_calc_ham(&hIm, old, func, list, dd); }
217 mgl_operator_exp(nx,ham->a,a,f);
218 mgl_operator_lin(nx,hIm.a,a,f,g,s,iexp);
219 mgl_operator_lin(nx,hIm.a,s,f,g,s,iexp);
220 #pragma omp parallel for
221 for(long i=0;i<2*nx;i++)
222 a[i] = (a[i]-s[i]/mreal(8*nx*nx))*mreal(exp(-dmp[i]*dt)/2/nx);
223 if(have_y) delete ham;
224 }
225 delete []a; delete []f; delete []dmp; delete []iexp;
226 if(!have_y) delete ham;
227 gr->LoadState(); return res;
228 }
229 //-----------------------------------------------------------------------------
mgl_pde_adv(HMGL gr,const char * ham,HCDT ini_re,HCDT ini_im,mreal dz,mreal k0,const char * opt)230 HMDT MGL_EXPORT mgl_pde_adv(HMGL gr, const char *ham, HCDT ini_re, HCDT ini_im, mreal dz, mreal k0, const char *opt)
231 {
232 HADT res = mgl_pde_adv_c(gr,ham,ini_re,ini_im,dz,k0,opt);
233 HMDT out = mgl_datac_abs(res); delete res; return out;
234 }
235 //-----------------------------------------------------------------------------
mgl_pde_adv_c_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)236 uintptr_t MGL_EXPORT mgl_pde_adv_c_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
237 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
238 char *o=new char[lo+1]; memcpy(o,opt,lo); o[lo]=0;
239 uintptr_t res = uintptr_t(mgl_pde_adv_c(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
240 delete []o; delete []s; return res; }
241 //-----------------------------------------------------------------------------
mgl_pde_adv_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)242 uintptr_t MGL_EXPORT mgl_pde_adv_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
243 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
244 char *o=new char[lo+1]; memcpy(o,opt,lo); o[lo]=0;
245 uintptr_t res = uintptr_t(mgl_pde_adv(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
246 delete []o; delete []s; return res; }
247 //-----------------------------------------------------------------------------
248 //
249 // Simplified PDE series
250 //
251 //-----------------------------------------------------------------------------
252 struct mgl_pde_ham
253 {
254 ddual *a,*hxy,*hxv,*huv,*huy;
255 const char *eqs;
256 long nx,ny;
257 double xx,yy,xs,ys,dx,dy,dq,dp,zz;
258 double dd;
259 };
mgl_pde_hprep(const mgl_pde_ham * f)260 void static mgl_pde_hprep(const mgl_pde_ham *f)
261 {
262 const long nx = f->nx, ny = f->ny;
263 mglDataV x(nx,ny), y(nx,ny), z, r(nx,ny);
264 mglDataW p(nx,ny), q(nx,ny);
265 x.Name(L"x"); y.Name(L"y"); p.Name(L"p"); q.Name(L"q"); r.Name(L"#$mgl");
266 z.Name(L"z"); z.Fill(f->zz);
267 dual dd(0,f->dd);
268 mglData u(nx,ny); u.Name(L"u");
269 #pragma omp parallel for
270 for(long i=0;i<nx*ny;i++) u.a[i] = abs(f->a[i]);
271 std::vector<mglDataA*> list;
272 list.push_back(&x); list.push_back(&y); list.push_back(&z);
273 list.push_back(&p); list.push_back(&q); list.push_back(&u);
274
275 x.Fill(f->xx,f->xx+f->dx*(nx-1),'x'); p.Freq(0,'x');
276 y.Fill(f->yy,f->yy+f->dy*(ny-1),'y'); q.Freq(0,'y');
277 HADT res = mglFormulaCalcC(f->eqs, list);
278 #pragma omp parallel for
279 for(long i=0;i<nx*ny;i++) f->hxy[i] = res->a[i]*dd;
280 delete res;
281 if(ny>2)
282 {
283 x.Fill(f->xs); p.Freq(f->dp,'x');
284 res = mglFormulaCalcC(f->eqs, list);
285 #pragma omp parallel for
286 for(long i=0;i<nx*ny;i++) f->huy[i] = res->a[i]*dd;
287 delete res;
288 }
289 x.Fill(f->xs); p.Freq(f->dp,'x');
290 y.Fill(f->ys); q.Freq(f->dq,'y');
291 res = mglFormulaCalcC(f->eqs, list);
292 #pragma omp parallel for
293 for(long i=0;i<nx*ny;i++) f->huv[i] = res->a[i]*dd;
294 delete res;
295 if(ny>2)
296 {
297 x.Fill(f->xx,f->xx+f->dx*(nx-1),'x'); p.Freq(0,'x');
298 res = mglFormulaCalcC(f->eqs, list);
299 #pragma omp parallel for
300 for(long i=0;i<nx*ny;i++) f->hxv[i] = res->a[i]*dd;
301 delete res;
302 }
303 }
304 //-----------------------------------------------------------------------------
305 // Solve equation dx/dz = func(p,q,x,y,z,|u|)[u] where p=d/dx, q=d/dy. At this moment simplified form of ham is supported: ham = f(p,q,z) + g(x,y,z,'u'), where variable 'u'=|u| (for allowing solve nonlinear problems). You may specify imaginary part like ham = p^2 + 1i*x*(x>0).
mgl_pde_solve_c(HMGL gr,const char * ham,HCDT ini_re,HCDT ini_im,mreal dz,mreal k0,const char * opt)306 HADT MGL_EXPORT mgl_pde_solve_c(HMGL gr, const char *ham, HCDT ini_re, HCDT ini_im, mreal dz, mreal k0, const char *opt)
307 {
308 mreal gamma = gr->SaveState(opt); if(mgl_isnan(gamma)) gamma = GAMMA;
309 mglPoint Min=gr->Min, Max=gr->Max;
310 long nx=ini_re->GetNx(), ny=ini_re->GetNy(), nz = long((Max.z-Min.z)/dz)+1;
311 if(nx<2 || nz<2 || Max.x==Min.x) // Too small data
312 { gr->SetWarn(mglWarnLow,"PDE"); return 0; }
313 if(ini_im->GetNx()*ini_im->GetNy() != nx*ny)// Wrong dimensions
314 { gr->SetWarn(mglWarnDim,"PDE"); return 0; }
315 mglDataC *res=new mglDataC(nz, nx, ny);
316
317 ddual *a = new ddual[4*nx*ny], hh0; // Add "damping" area
318 ddual *hxy = new ddual[4*nx*ny], *hxv = new ddual[4*nx*ny];
319 ddual *huy = new ddual[4*nx*ny], *huv = new ddual[4*nx*ny];
320 ddual *hx = new ddual[2*nx], *hv = new ddual[2*ny];
321 ddual *hy = new ddual[2*ny], *hu = new ddual[2*nx];
322 double *dmp = new double[4*nx*ny];
323 memset(a,0,4*nx*ny*sizeof(ddual));
324 memset(dmp,0,4*nx*ny*sizeof(double));
325 #pragma omp parallel for collapse(2)
326 for(long j=0;j<ny;j++) for(long i=0;i<nx;i++) // Initial conditions
327 {
328 long i0 = i+nx/2+2*nx*(j+ny/2);
329 a[i0] = ddual(ini_re->v(i,j), ini_im->v(i,j));
330 res->a[nz*(i+nx*j)] = a[i0];
331 }
332 #pragma omp parallel for collapse(2)
333 for(long j=0;j<2*ny;j++) for(long i=0;i<2*nx;i++) // step 1
334 {
335 long i0 = i+2*nx*j;
336 if(i<nx/2) dmp[i0] += gamma*mgl_ipow((nx/2-i)/(nx/2.),2);
337 if(i>3*nx/2) dmp[i0] += gamma*mgl_ipow((i-3*nx/2-1)/(nx/2.),2);
338 if(j<ny/2) dmp[i0] += gamma*mgl_ipow((ny/2-j)/(ny/2.),2);
339 if(j>3*ny/2) dmp[i0] += gamma*mgl_ipow((j-3*ny/2-1)/(ny/2.),2);
340 }
341 mreal dx = (Max.x-Min.x)/(nx-1), dy = ny>1?(Max.y-Min.y)/(ny-1):0;
342 mreal dp = M_PI/(Max.x-Min.x)/k0, dq = M_PI/(Max.y-Min.y)/k0;
343 mreal xs=(Min.x+Max.x)/2, ys=(Min.y+Max.y)/2;
344 double dd = k0*dz;
345
346 mgl_pde_ham tmp;tmp.eqs = ham;
347 tmp.nx = 2*nx; tmp.ny = 2*ny; tmp.dd = dd; tmp.a=a;
348 tmp.hxy=hxy; tmp.hxv=hxv; tmp.huy=huy; tmp.huv=huv;
349 tmp.xx = Min.x-dx*(nx/2); tmp.xs = xs; tmp.dx = dx; tmp.dp = dp;
350 tmp.yy = Min.y-dy*(ny/2); tmp.ys = ys; tmp.dy = dy; tmp.dq = dq;
351
352 // prepare fft. NOTE: slow procedures due to unknown nx, ny.
353 void *wtx = mgl_fft_alloc(2*nx,0,0);
354 void *wty = mgl_fft_alloc(2*ny,0,0);
355 for(long k=1;k<nz;k++)
356 {
357 if(gr->NeedStop()) break;
358 tmp.zz = Min.z+dz*k;
359 memset(hxy,0,4*nx*ny*sizeof(ddual)); memset(hxv,0,4*nx*ny*sizeof(ddual));
360 memset(huv,0,4*nx*ny*sizeof(ddual)); memset(huy,0,4*nx*ny*sizeof(ddual));
361 mgl_pde_hprep(&tmp);
362 for(long i=0;i<2*nx;i++) { hx[i] = hxv[i]; hu[i] = huv[i]; }
363 for(long j=0;j<2*ny;j++) { hy[j] = huy[2*nx*j]; hv[j] = huv[2*nx*j];}
364 // rearrange arrays
365 hh0=hu[0];
366 if(ny>1)
367 #pragma omp parallel for collapse(2)
368 for(long j=0;j<2*ny;j++) for(long i=0;i<2*nx;i++)
369 {
370 long i0 = i+2*nx*j; huv[i0] -= hh0;
371 hxv[i0] -= hx[i]+hv[j]-hh0;
372 huy[i0] -= hu[i]+hy[j]-hh0;
373 }
374 else
375 #pragma omp parallel for
376 for(long i=0;i<4*nx*ny;i++) huv[i] -= hh0;
377 // solve equation
378 if(ny>1)
379 #pragma omp parallel
380 {
381 void *wsx = mgl_fft_alloc_thr(2*nx), *wsy = mgl_fft_alloc_thr(2*ny);
382 #pragma omp for
383 for(long i=0;i<4*nx*ny;i++) a[i] *= exp(hxy[i]-double(dmp[i]*dz));
384 #pragma omp for
385 for(long i=0;i<2*ny;i++) mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, false);
386 #pragma omp for
387 for(long i=0;i<4*nx*ny;i++) a[i] *= exp(huy[i]);
388 #pragma omp for
389 for(long i=0;i<2*nx;i++) mgl_fft((double *)(a+i), 2*nx, 2*ny, wty, wsy, false);
390 #pragma omp for
391 for(long i=0;i<4*nx*ny;i++) a[i] *= exp(huv[i]);
392 #pragma omp for
393 for(long i=0;i<2*ny;i++) mgl_fft((double *)(a+2*i*nx), 1, 2*nx, wtx, wsx, true);
394 #pragma omp for
395 for(long i=0;i<4*nx*ny;i++) a[i] *= exp(hxv[i]);
396 #pragma omp for
397 for(long i=0;i<2*nx;i++) mgl_fft((double *)(a+i), 2*nx, 2*ny, wty, wsy, true);
398 mgl_fft_free_thr(wsx); mgl_fft_free_thr(wsy);
399 }
400 else
401 #pragma omp parallel
402 {
403 void *wsx = mgl_fft_alloc_thr(2*nx);
404 #pragma omp for
405 for(long i=0;i<4*nx*ny;i++) a[i] *= exp(hxy[i]-double(dmp[i]*dz));
406 #pragma omp for
407 for(long i=0;i<2*ny;i++) mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, false);
408 #pragma omp for
409 for(long i=0;i<4*nx*ny;i++) a[i] *= exp(huv[i]);
410 #pragma omp for
411 for(long i=0;i<2*ny;i++) mgl_fft((double *)(a+2*i*nx), 1, 2*nx, wtx, wsx, true);
412 mgl_fft_free_thr(wsx);
413 }
414 #pragma omp parallel for collapse(2)
415 for(long j=0;j<ny;j++) for(long i=0;i<nx;i++) // save result
416 res->a[k+nz*(i+nx*j)] = a[i+nx/2+2*nx*(j+ny/2)];
417 }
418 mgl_fft_free(wtx,0,0); mgl_fft_free(wty,0,0);
419 delete []a; delete []dmp;
420 delete []hxy; delete []hxv; delete []huy; delete []huv;
421 delete []hx; delete []hy; delete []hu; delete []hv;
422 gr->LoadState();
423 return res;
424 }
425 //-----------------------------------------------------------------------------
mgl_pde_solve(HMGL gr,const char * ham,HCDT ini_re,HCDT ini_im,mreal dz,mreal k0,const char * opt)426 HMDT MGL_EXPORT mgl_pde_solve(HMGL gr, const char *ham, HCDT ini_re, HCDT ini_im, mreal dz, mreal k0, const char *opt)
427 {
428 HADT res = mgl_pde_solve_c(gr,ham,ini_re,ini_im,dz,k0,opt);
429 HMDT out = mgl_datac_abs(res); delete res; return out;
430 }
431 //-----------------------------------------------------------------------------
mgl_pde_solve_c_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)432 uintptr_t MGL_EXPORT mgl_pde_solve_c_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
433 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
434 char *o=new char[lo+1]; memcpy(o,opt,lo); o[lo]=0;
435 uintptr_t res = uintptr_t(mgl_pde_solve_c(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
436 delete []o; delete []s; return res; }
437 //-----------------------------------------------------------------------------
mgl_pde_solve_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)438 uintptr_t MGL_EXPORT mgl_pde_solve_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
439 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
440 char *o=new char[lo+1]; memcpy(o,opt,lo); o[lo]=0;
441 uintptr_t res = uintptr_t(mgl_pde_solve(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
442 delete []o; delete []s; return res; }
443 //-----------------------------------------------------------------------------
444 //
445 // ODE series
446 //
447 //-----------------------------------------------------------------------------
mgl_set_func(const mreal * x,mreal * dx,void * par)448 void MGL_NO_EXPORT mgl_set_func(const mreal *x, mreal *dx, void *par)
449 {
450 mglEqTxT *p=(mglEqTxT *)par;
451 long n = p->n/p->m;
452 for(long i=0;i<p->m;i++)
453 {
454 HMDT d = static_cast<HMDT>(p->head[i]);
455 memcpy(d->a, x+i*n, n*sizeof(mreal));
456 }
457 p->t->a[0] = x[p->n];
458 //#pragma omp parallel for collapse(2)
459 for(long j=0;j<p->m;j++)
460 {
461 HMDT d = mglFormulaCalc(p->str[j].c_str(),p->head);
462 mreal val = d->a[0];
463 if(d->nx<n)
464 {
465 d->Create(n);
466 for(long i=0;i<n;i++) d->a[i] = val;
467 }
468 else switch(p->brd)
469 {
470 default: // zero instead of NAN
471 case 0:
472 case 'z':
473 case '0':
474 for(long i=0;i<n;i++) if(mgl_isbad(d->a[i])) d->a[i] = 0;
475 break;
476 case 1: // constant at border
477 case 'c':
478 case '1':
479 d->a[0] = d->a[1]; d->a[n-1] = d->a[n-2]; break;
480 case 2: // linear at border
481 case 'l':
482 case '2':
483 d->a[0] = mreal(2)*d->a[1]-d->a[2];
484 d->a[n-1] = mreal(2)*d->a[n-2]-d->a[n-3]; break;
485 case 3: // square at border
486 case 's':
487 case '3':
488 d->a[0] = d->a[3]+mreal(3)*(d->a[1]-d->a[2]);
489 d->a[n-1] = d->a[n-4]+mreal(3)*(d->a[n-2]-d->a[n-3]); break;
490 }
491 memcpy(dx+j*n, d->a, n*sizeof(mreal));
492 delete d;
493 }
494 }
mgl_ode_solve_set(const char * func,const char * var,char brd,HCDT x0,mreal dt,mreal tmax)495 HMDT MGL_EXPORT mgl_ode_solve_set(const char *func, const char *var, char brd, HCDT x0, mreal dt, mreal tmax)
496 {
497 if(!var || !(*var) || !func || !x0) return 0;
498 mglEqTxT par;
499 par.var=var; par.brd=brd; par.FillStr(func);
500 const long n = par.n = x0->GetNx();
501 const long m = par.m = long(strlen(var)), nn = n/m; // number of variables
502
503 HMDT dat = new mglData[m+3];
504 for(long i=0;i<m;i++)
505 {
506 HMDT d = dat+i; d->Create(nn);
507 d->s = var[i]; par.head.push_back(d);
508 }
509 HMDT d = dat+m+1; d->Create(nn);
510 for(long i=0;i<nn;i++) d->a[i] = i;
511 d->s = 'j'; par.head.push_back(d);
512 (dat+m+2)->Create(nn); (dat+m+2)->s = "#$mgl";
513 par.head.push_back(dat+m+2);
514 par.t = dat+m; par.t->s = 't'; par.head.push_back(par.t);
515
516 mreal *xx = new mreal[n];
517 for(long i=0;i<n;i++) xx[i] = x0->vthr(i);
518 HMDT res = mgl_ode_solve_ex(mgl_set_func,n,xx,dt,tmax,&par,NULL);
519 delete []xx; delete []dat; return res;
520 }
521 //-----------------------------------------------------------------------------
mgl_set_funcC(const mreal * x,mreal * dx,void * par)522 void MGL_NO_EXPORT mgl_set_funcC(const mreal *x, mreal *dx, void *par)
523 {
524 mglEqTxT *p=(mglEqTxT *)par;
525 long n = p->n/p->m;
526 for(long i=0;i<p->m;i++)
527 {
528 HADT d = static_cast<HADT>(p->head[i]);
529 memcpy(d->a, x+2*i*n, 2*n*sizeof(mreal));
530 }
531 p->t->a[0] = x[2*p->n];
532 //#pragma omp parallel for collapse(2)
533 for(long j=0;j<p->m;j++)
534 {
535 HADT d = mglFormulaCalcC(p->str[j].c_str(),p->head);
536 dual val = d->a[0];
537 if(d->nx<n)
538 {
539 d->Create(n);
540 for(long i=0;i<n;i++) d->a[i] = val;
541 }
542 else switch(p->brd)
543 {
544 default:
545 case 0: // zero instead of NAN
546 case 'z':
547 case '0':
548 for(long i=0;i<n;i++) if(mgl_isbad(d->a[i])) d->a[i] = 0;
549 break;
550 case 1: // constant at border
551 case 'c':
552 case '1':
553 d->a[0] = d->a[1]; d->a[n-1] = d->a[n-2]; break;
554 case 2: // linear at border
555 case 'l':
556 case '2':
557 d->a[0] = mreal(2)*d->a[1]-d->a[2];
558 d->a[n-1] = mreal(2)*d->a[n-2]-d->a[n-3]; break;
559 case 3: // square at border
560 case 's':
561 case '3':
562 d->a[0] = d->a[3]+mreal(3)*(d->a[1]-d->a[2]);
563 d->a[n-1] = d->a[n-4]+mreal(3)*(d->a[n-2]-d->a[n-3]); break;
564 case -1: // exponent at border
565 case 4:
566 case 'e':
567 case '4':
568 d->a[0] = norm(d->a[2])<norm(d->a[1]) ? d->a[1] : d->a[1]*d->a[1]/d->a[2];
569 d->a[n-1] = norm(d->a[n-3])<norm(d->a[n-2]) ? d->a[n-2] : d->a[n-2]*d->a[n-2]/d->a[n-3];
570 break;
571 case -2: // gaussian at border
572 case 5:
573 case 'g':
574 case '5':
575 d->a[0] = norm(d->a[2])<norm(d->a[1]) ? d->a[3] : pow(d->a[1]/d->a[2],3)*d->a[3];
576 d->a[n-1] = norm(d->a[n-3])<norm(d->a[n-2]) ? d->a[n-4] : pow(d->a[n-2]/d->a[n-3],3)*d->a[n-4];
577 break;
578 }
579 memcpy(dx+2*j*n, d->a, 2*n*sizeof(mreal));
580 delete d;
581 }
582 }
mgl_ode_solve_set_c(const char * func,const char * var,char brd,HCDT x0,mreal dt,mreal tmax)583 HADT MGL_EXPORT mgl_ode_solve_set_c(const char *func, const char *var, char brd, HCDT x0, mreal dt, mreal tmax)
584 {
585 if(!var || !(*var) || !func || !x0) return 0;
586 mglEqTxT par;
587 par.var=var; par.brd=brd; par.FillStr(func);
588 const long n = par.n = x0->GetNx();
589 const long m = par.m = long(strlen(var)), nn = n/m; // number of variables
590
591 HADT dat = new mglDataC[m];
592 for(long i=0;i<m;i++)
593 {
594 HADT d = dat+i; d->Create(nn);
595 d->s = var[i]; par.head.push_back(d);
596 }
597 HMDT d = new mglData[3];
598 d->Create(nn); d->s = 'j';
599 for(long i=0;i<nn;i++) d->a[i] = i;
600 par.head.push_back(d);
601 (d+1)->Create(nn); (d+1)->s = "#$mgl";
602 par.head.push_back(d+1);
603 par.t = d+2; par.t->s = 't';
604 par.head.push_back(par.t);
605
606 mreal *xx = new mreal[2*n];
607 const mglDataC *c = dynamic_cast<const mglDataC *>(x0);
608 if(c) for(long i=0;i<n;i++)
609 { xx[2*i]=real(c->a[i]); xx[2*i+1]=imag(c->a[i]); }
610 else if(x0) for(long i=0;i<n;i++)
611 { xx[2*i] = x0->vthr(i); xx[2*i+1]=0; }
612 else for(long i=0;i<n;i++) xx[2*i] = xx[2*i+1]=0;
613
614 HMDT res = mgl_ode_solve_ex(mgl_set_funcC,2*n,xx,dt,tmax,&par,NULL);
615 delete []xx; delete []d; delete []dat;
616 const long nt=res->ny;
617 mglDataC *out = new mglDataC(n, nt);
618 #pragma omp parallel for
619 for(long i=0;i<nt*n;i++) out->a[i] = dual(res->a[2*i],res->a[2*i+1]);
620 delete res; return out;
621 }
622 //-----------------------------------------------------------------------------
mgl_txt_func(const mreal * x,mreal * dx,void * par)623 void MGL_NO_EXPORT mgl_txt_func(const mreal *x, mreal *dx, void *par)
624 {
625 mglEqTxT *p=(mglEqTxT *)par;
626 mreal vars[MGL_VS];
627 const long n = p->n;
628 for(long i=0;i<n;i++)
629 { char ch = p->var[i]; if(ch>='a' && ch<='z') vars[ch-'a']=x[i]; }
630 //#pragma omp parallel for
631 for(long i=0;i<n;i++)
632 dx[i] = mgl_expr_eval_v(p->eqR[i], vars);
633 }
mgl_ode_solve_str(const char * func,const char * var,HCDT x0,mreal dt,mreal tmax)634 HMDT MGL_EXPORT mgl_ode_solve_str(const char *func, const char *var, HCDT x0, mreal dt, mreal tmax)
635 {
636 if(!var || !(*var) || !func) return 0;
637 mglEqTxT par;
638 par.var=var; par.FillReal(func);
639 long n = par.n = long(par.str.size());
640 mreal *xx = new mreal[n];
641 for(long i=0;i<n;i++) xx[i] = x0?x0->vthr(i):0;
642 HMDT res = mgl_ode_solve_ex(mgl_txt_func,n,xx,dt,tmax,&par,NULL);
643 delete []xx; return res;
644 }
645 //-----------------------------------------------------------------------------
mgl_txt_funcC(const mreal * x,mreal * dx,void * par)646 void MGL_NO_EXPORT mgl_txt_funcC(const mreal *x, mreal *dx, void *par)
647 {
648 mglEqTxT *p=(mglEqTxT *)par;
649 mdual vars[MGL_VS];
650 const long n = p->n;
651 for(long i=0;i<n;i++)
652 { char ch = p->var[i]; if(ch>='a' && ch<='z') vars[ch-'a']=dual(x[2*i],x[2*i+1]); }
653 //#pragma omp parallel for
654 for(long i=0;i<n;i++)
655 {
656 dual r = mgl_cexpr_eval_v(p->eqC[i], vars);
657 dx[2*i] = real(r); dx[2*i+1] = imag(r);
658 }
659 }
mgl_ode_solve_str_c(const char * func,const char * var,HCDT x0,mreal dt,mreal tmax)660 HADT MGL_EXPORT mgl_ode_solve_str_c(const char *func, const char *var, HCDT x0, mreal dt, mreal tmax)
661 {
662 if(!var || !(*var) || !func) return 0;
663 mglEqTxT par; par.var=var;
664 par.var=var; par.FillCmplx(func);
665 long n = par.n = long(par.str.size());
666 mreal *xx = new mreal[2*n];
667 const mglDataC *c = dynamic_cast<const mglDataC *>(x0);
668 for(long i=0;i<n;i++)
669 {
670 if(c) { xx[2*i]=real(c->a[i]); xx[2*i+1]=imag(c->a[i]); }
671 else { xx[2*i] = x0?x0->vthr(i):0; xx[2*i+1]=0; }
672 }
673 HMDT res = mgl_ode_solve_ex(mgl_txt_funcC,2*n,xx,dt,tmax,&par,NULL);
674 delete []xx;
675 const long nt=res->ny;
676 mglDataC *out = new mglDataC(n, nt);
677 #pragma omp parallel for
678 for(long i=0;i<nt*n;i++) out->a[i] = dual(res->a[2*i],res->a[2*i+1]);
679 delete res; return out;
680 }
681 //-----------------------------------------------------------------------------
mgl_ode_solve(void (* func)(const mreal * x,mreal * dx,void * par),int n,const mreal * x0,mreal dt,mreal tmax,void * par)682 HMDT MGL_EXPORT mgl_ode_solve(void (*func)(const mreal *x, mreal *dx, void *par), int n, const mreal *x0, mreal dt, mreal tmax, void *par)
683 { return mgl_ode_solve_ex(func,n,x0,dt,tmax,par,0); }
mgl_ode_solve_ex(void (* func)(const mreal * x,mreal * dx,void * par),int n,const mreal * x0,mreal dt,mreal tmax,void * par,void (* bord)(mreal * x,const mreal * xp,void * par))684 HMDT MGL_EXPORT mgl_ode_solve_ex(void (*func)(const mreal *x, mreal *dx, void *par), int n, const mreal *x0, mreal dt, mreal tmax, void *par, void (*bord)(mreal *x, const mreal *xp, void *par))
685 {
686 bool scale=false;
687 if(dt*tmax<0) { scale = true; dt = fabs(dt); tmax = fabs(tmax); }
688 if(tmax<dt) return 0; // nothing to do
689 const long nt = int(tmax/dt+0.5)+1;
690 mglData *res=new mglData(n,nt);
691 mreal *x=new mreal[n+1], *k1=new mreal[n], *k2=new mreal[n], *k3=new mreal[n], *v=new mreal[n+1], hh=dt/2;
692 // initial conditions
693 for(long i=0;i<n;i++) x[i] = res->a[i] = x0[i];
694 // Runge Kutta scheme of 4th order
695 bool good=true;
696 long k;
697 x[n] = 0;
698 for(k=1;k<nt && good;k++)
699 {
700 double m0=0,m1=0,m2=0,m3=0, t = x[n];
701 func(x,k1,par);
702 if(scale)
703 {
704 for(long i=0;i<n;i++)
705 { double kk = fabs(k1[i]); if(kk>m0) m0 = kk; }
706 for(long i=0;i<n;i++) k1[i] /= m0;
707 }
708 else m0 = 1;
709 for(long i=0;i<n;i++) v[i] = x[i]+k1[i]*hh;
710 v[n] = t+hh/m0;
711 func(v,k2,par);
712 if(scale)
713 {
714 for(long i=0;i<n;i++)
715 { double kk = fabs(k2[i]); if(kk>m1) m1 = kk; }
716 for(long i=0;i<n;i++) k2[i] /= m1;
717 }
718 else m1 = 1;
719 for(long i=0;i<n;i++) v[i] = x[i]+k2[i]*hh;
720 v[n] = t+hh/m1;
721 func(v,k3,par);
722 if(scale)
723 {
724 for(long i=0;i<n;i++)
725 { double kk = fabs(k3[i]); if(kk>m2) m2 = kk; }
726 for(long i=0;i<n;i++) k3[i] /= m2;
727 }
728 else m2 = 1;
729 for(long i=0;i<n;i++) { v[i] = x[i]+k3[i]*dt; k3[i] += k2[i]; }
730 v[n] = t+dt/m2;
731 func(v,k2,par);
732 if(scale)
733 {
734 for(long i=0;i<n;i++)
735 { double kk = fabs(k2[i]); if(kk>m3) m3 = kk; }
736 for(long i=0;i<n;i++) k2[i] /= m3;
737 }
738 else m3 = 1;
739 x[n] = t + (1/m0+2/m1+2/m2+1/m3)*dt/6;
740 for(long i=0;i<n;i++) x[i] += (k1[i]+k2[i]+2*k3[i])*dt/6;
741 if(bord) bord(x,res->a+n*(k-1),par);
742 for(long i=0;i<n;i++)
743 { res->a[i+n*k] = x[i]; if(mgl_isbad(x[i])) good=false; }
744 }
745 delete []x; delete []k1; delete []k2; delete []k3; delete []v;
746 res->Crop(0,k,'y');
747 return res;
748 }
749 //-----------------------------------------------------------------------------
750 //
751 // Common functions for quasioptical calculations
752 //
753 //-----------------------------------------------------------------------------
mgl_ray3d(const mreal * in,mreal * out,void * par)754 void static mgl_ray3d(const mreal *in, mreal *out, void *par)
755 {
756 mglFormula *eqs = (mglFormula *)par;
757 const char *v="xyzpqvt";
758 mreal var[MGL_VS]; memset(var,0,MGL_VS*sizeof(mreal));
759 for(int i=0;i<7;i++) var[v[i]-'a'] = in[i];
760 out[0] = eqs->CalcD(var,'p'); out[3] = -eqs->CalcD(var,'x');
761 out[1] = eqs->CalcD(var,'q'); out[4] = -eqs->CalcD(var,'y');
762 out[2] = eqs->CalcD(var,'v'); out[5] = -eqs->CalcD(var,'z');
763 out[7] = eqs->CalcD(var,'i'); out[6] = 1;
764 }
765 // Solve GO ray equation like dr/dt = d ham/dp, dp/dt = -d ham/dr where ham = ham(x,y,z,p,q,v,t) and px=p, py=q, pz=v. The starting point (at t=0) is r0, p0. Result is array of {x,y,z,p,q,v,t}
mgl_ray_trace(const char * ham,mreal x0,mreal y0,mreal z0,mreal px,mreal py,mreal pz,mreal dt,mreal tmax)766 HMDT MGL_EXPORT mgl_ray_trace(const char *ham, mreal x0, mreal y0, mreal z0, mreal px, mreal py, mreal pz, mreal dt, mreal tmax)
767 {
768 mglFormula eqs(ham);
769 mreal in[8]={x0,y0,z0,px,py,pz,0,0};
770 HMDT res = mgl_ode_solve(mgl_ray3d,8,in,dt,tmax,&eqs);
771 mgl_data_set_id(res,"xyzpqvti");
772 return res;
773 }
774 //-----------------------------------------------------------------------------
mgl_ray_trace_(const char * ham,mreal * x0,mreal * y0,mreal * z0,mreal * px,mreal * py,mreal * pz,mreal * dt,mreal * tmax,int l)775 uintptr_t MGL_EXPORT mgl_ray_trace_(const char *ham, mreal *x0, mreal *y0, mreal *z0, mreal *px, mreal *py, mreal *pz, mreal *dt, mreal *tmax,int l)
776 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
777 uintptr_t res = uintptr_t(mgl_ray_trace(s, *x0,*y0,*z0, *px,*py,*pz, *dt,*tmax));
778 delete []s; return res; }
779 //-----------------------------------------------------------------------------
780 struct mgl_ap
781 {
782 double x0,y0,z0,x1,y1,z1,x2,y2,z2; // vectors {l, g1, g2}
783 double t1,t2,ch,q1,q2,pt,dt,d1,d2; // theta_{1,2}, chi, q_{1,2}, p_t, dtau, dq_{1,2}
mgl_apmgl_ap784 mgl_ap() { memset(this,0,sizeof(mgl_ap)); }
785 };
786 //-----------------------------------------------------------------------------
mgl_init_ra(long n,int n7,const mreal * r,mgl_ap * ra)787 void static mgl_init_ra(long n, int n7, const mreal *r, mgl_ap *ra) // prepare some intermediate data for QO (3d case)
788 {
789 double tt = hypot(r[n7]-r[0], r[n7+1]-r[1]);
790 if(tt)
791 {
792 ra[0].x1 = (r[n7+1]-r[1])/tt;
793 ra[0].y1 = (r[0]-r[n7])/tt;
794 ra[0].z1 = 0;
795 }
796 else { ra[0].x1 = ra[0].y1 = 0; ra[0].z1 = 1; }
797 ra[0].x0 = r[n7] - r[0]; ra[0].y0 = r[n7+1] - r[1]; ra[0].z0 = r[n7+2] - r[2];
798 tt = sqrt(ra[0].x0*ra[0].x0 + ra[0].y0*ra[0].y0 + ra[0].z0*ra[0].z0);
799 ra[0].x0 /= tt; ra[0].y0 /= tt; ra[0].z0 /= tt;
800 ra[0].x2 = ra[0].y1*ra[0].z0 - ra[0].y0*ra[0].z1; // vector g_2
801 ra[0].y2 = ra[0].z1*ra[0].x0 - ra[0].z0*ra[0].x1;
802 ra[0].z2 = ra[0].x1*ra[0].y0 - ra[0].x0*ra[0].y1;
803 for(long i=1;i<n;i++) // NOTE: no parallel due to dependence on prev point!
804 {
805 mgl_ap *ri=ra+i, *rp=ra+i-1;
806 const mreal *rr = r+n7*i;
807 ri->dt = rr[6] - rr[6-n7];
808 ri->x0 = rr[0] - rr[-n7]; // NOTE: very rough formulas
809 ri->y0 = rr[1] - rr[1-n7]; // for corresponding with dt one
810 ri->z0 = rr[2] - rr[2-n7]; // for corresponding with dt one
811 double ch = sqrt(ri->x0*ri->x0 + ri->y0*ri->y0 + ri->z0*ri->z0);
812 ri->x0 /= ch; ri->y0 /= ch; ri->z0 /= ch;
813 ri->ch = ch/ri->dt;
814 ri->pt = rr[3]*ri->x0 + rr[4]*ri->y0 + rr[5]*ri->z0;
815 ri->q1 = rr[3]*ri->x1 + rr[4]*ri->y1 + rr[5]*ri->z1;
816 ri->q2 = rr[3]*ri->x2 + rr[4]*ri->y2 + rr[5]*ri->z2;
817 // NOTE previous point is used here!
818 tt = ri->x0*rp->x1 + ri->y0*rp->y1 + ri->z0*rp->z1;
819 ri->x1 = rp->x1 - tt*ri->x0; // vector g_1
820 ri->y1 = rp->y1 - tt*ri->y0;
821 ri->z1 = rp->z1 - tt*ri->z0;
822 ri->t1 = tt/ch;
823 tt = sqrt(ri->x1*ri->x1 + ri->y1*ri->y1 + ri->z1*ri->z1);
824 ri->x1 /= tt; ri->y1 /= tt; ri->z1 /= tt; // norm for reducing numeric error
825 ri->x2 = ri->y1*ri->z0 - ri->y0*ri->z1; // vector g_2
826 ri->y2 = ri->z1*ri->x0 - ri->z0*ri->x1;
827 ri->z2 = ri->x1*ri->y0 - ri->x0*ri->y1;
828 tt = ri->x0*rp->x2 + ri->y0*rp->y2 + ri->z0*rp->z2;
829 ri->t2 = tt/ch;
830 ri->d1 = (ri->q1-rp->q1)/ch;
831 ri->d2 = (ri->q2-rp->q2)/ch;
832 }
833 memcpy(ra,ra+1,sizeof(mgl_ap)); // setup zero point
834 ra[0].pt = r[3]*ra[0].x0 + r[4]*ra[0].y0 + r[5]*ra[0].z0;
835 ra[0].q1 = r[3]*ra[0].x1 + r[4]*ra[0].y1 + r[5]*ra[0].z1;
836 ra[0].q2 = r[3]*ra[0].x2 + r[4]*ra[0].y2 + r[5]*ra[0].z2;
837 }
838 //-----------------------------------------------------------------------------
839 //
840 // QO2d series
841 //
842 //-----------------------------------------------------------------------------
843 struct mgl_qo2d_ham
844 {
845 dual *hx, *hu, *a, h0;
846 double *dmp, dr, dk;
847 mreal *r;
848 mgl_ap *ra;
849 mdual (*ham)(mreal u, mreal x, mreal y, mreal px, mreal py, void *par);
850 void *par;
851 };
852 //-----------------------------------------------------------------------------
mgl_qo2d_hprep(void * par)853 static void *mgl_qo2d_hprep(void *par)
854 {
855 mglThreadD *t=(mglThreadD *)par;
856 mgl_qo2d_ham *f = (mgl_qo2d_ham *)t->v;
857 mgl_ap *ra = f->ra;
858
859 const mreal *r = f->r;
860 const long nx=t->n;
861 #if !MGL_HAVE_PTHREAD
862 #pragma omp parallel for
863 #endif
864 for(long i=t->id;i<nx;i+=mglNumThr)
865 {
866 // x terms
867 mreal x1 = (2*i-nx+1)*f->dr, hh = 1 - ra->t1*x1;
868 hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
869 mreal tt = (ra->pt + ra->d1*x1)/hh - ra->pt;
870 dual tmp = f->ham(abs(f->a[i]), r[0]+ra->x1*x1, r[1]+ra->y1*x1, r[3]+ra->x0*tt, r[4]+ra->y0*tt, f->par);
871 f->hx[i] = tmp - f->h0/mreal(2);
872 // u-y terms
873 x1 = f->dk/2*(i<nx/2 ? i:i-nx);
874 tmp = f->ham(0, r[0], r[1], r[3]+ra->x1*x1, r[4]+ra->y1*x1, f->par);
875 f->hu[i] = tmp - f->h0/mreal(2);
876
877 if(imag(f->hx[i])>0) f->hx[i] = f->hx[i].real();
878 if(imag(f->hu[i])>0) f->hu[i] = f->hu[i].real();
879 // add boundary conditions for x-direction
880 f->hx[i] -= dual(0,f->dmp[i]);
881 }
882 return 0;
883 }
884 //-----------------------------------------------------------------------------
mgl_qo2d_func_c(mdual (* ham)(mreal u,mreal x,mreal y,mreal px,mreal py,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)885 HADT MGL_EXPORT mgl_qo2d_func_c(mdual (*ham)(mreal u, mreal x, mreal y, mreal px, mreal py, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
886 {
887 const mglData *ray=dynamic_cast<const mglData *>(ray_dat); // NOTE: Ray must be mglData!
888 if(!ray) return 0;
889 const long nx=ini_re->GetNx(), nt=ray->ny, n7=ray->nx;
890 if(nx<2 || ini_im->GetNx()!=nx || nt<2) return 0;
891 mglDataC *res=new mglDataC(nx,nt,1);
892
893 dual *a=new dual[2*nx], *hu=new dual[2*nx], *hx=new dual[2*nx];
894 double *dmp=new double[2*nx];
895 mgl_ap *ra = new mgl_ap[nt]; mgl_init_ra(nt, n7, ray->a, ra); // ray
896
897 mreal dr = r/(nx-1), dk = M_PI*(nx-1)/(k0*r*nx);
898 memset(dmp,0,2*nx*sizeof(double));
899 for(long i=0;i<nx/2;i++) // prepare damping
900 {
901 mreal x1 = (nx/2-i)/(nx/2.);
902 dmp[2*nx-1-i] = dmp[i] = 30*GAMMA*x1*x1/k0;
903 }
904 for(long i=0;i<nx;i++) a[i+nx/2] = dual(ini_re->v(i),ini_im->v(i)); // init
905 void *wsx, *wtx = mgl_fft_alloc(2*nx,&wsx,1);
906 if(xx && yy) { xx->Create(nx,nt); yy->Create(nx,nt); }
907
908 mgl_qo2d_ham tmp; // parameters for Hamiltonian calculation
909 tmp.hx=hx; tmp.hu=hu; tmp.dmp=dmp; tmp.par=par;
910 tmp.dr=dr; tmp.dk=dk; tmp.ham=ham; tmp.a=a;
911 // start calculation
912 for(long k=0;k<nt;k++)
913 {
914 for(long i=0;i<nx;i++) // "save"
915 res->a[i+k*nx]=a[i+nx/2]*mreal(sqrt(ra[0].ch/ra[k].ch));
916 if(xx && yy) for(long i=0;i<nx;i++) // prepare xx, yy
917 {
918 mreal x1 = (2*i-nx+1)*dr;
919 xx->a[i+k*nx] = ray->a[n7*k] + ra[k].x1*x1; // new coordinates
920 yy->a[i+k*nx] = ray->a[n7*k+1] + ra[k].y1*x1;
921 }
922 tmp.r=ray->a+n7*k; tmp.ra=ra+k;
923 mreal hh = ra[k].pt*(1/sqrt(sqrt(1.041))-1); // 0.041=0.45^4 -- minimal value of h
924 tmp.h0 = ham(0, tmp.r[0], tmp.r[1], tmp.r[3]+ra[k].x0*hh, tmp.r[4]+ra[k].x0*hh, par);
925 mglStartThread(mgl_qo2d_hprep,0,2*nx,0,0,0,0,&tmp);
926 // Step for field
927 dual dt = dual(0, -ra[k].dt*k0);
928 for(long i=0;i<2*nx;i++) a[i] *= exp(hx[i]*dt);
929 mgl_fft((double *)a, 1, 2*nx, wtx, wsx, false);
930 for(long i=0;i<2*nx;i++) a[i] *= exp(hu[i]*dt);
931 mgl_fft((double *)a, 1, 2*nx, wtx, wsx, true);
932
933 /* // Calculate B1 // TODO make more general scheme later!!!
934 hh = ra[k].pt*(1/sqrt(sqrt(1.041))-1);
935 var['x'-'a'] = ray->a[n7*k]; // new coordiantes
936 var['y'-'a'] = ray->a[n7*k+1];
937 var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh; // new momentums
938 var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
939 tt = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
940 var['x'-'a'] = ray->a[n7*k] + ra[k].x1*dr; // new coordiantes
941 var['y'-'a'] = ray->a[n7*k+1] + ra[k].y1*dr;
942 hh = 1 - ra[k].t1*dr; hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
943 hh = (ra[k].ch*ra[k].pt + ra[k].d1*dr)/(hh*ra[k].ch) - ra[k].pt;
944 var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh; // new momentums
945 var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
946 B1 = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
947 B1 = (B1-tt)/dr;
948 double a1=0, a2=0;
949 for(i=0;i<2*nx;i++) a1 += norm(a[i]);
950 hx[0] = hx[2*nx-1] = 0.;
951 for(i=1;i<2*nx-1;i++) hx[i] = (B1*ra[k].dt*(i-nx))*(a[i+1]-a[i-1]);
952 for(i=0;i<2*nx;i++) { a[i] += hx[i]; a2 += norm(a[i]); }
953 a1 = sqrt(a1/a2);
954 for(i=0;i<2*nx;i++) a[i] *= a1;*/
955 }
956 mgl_fft_free(wtx,&wsx,1);
957 delete []a; delete []hu; delete []hx; delete []ra; delete []dmp;
958 return res;
959 }
960 //-----------------------------------------------------------------------------
mgl_qo2d_func(mdual (* ham)(mreal u,mreal x,mreal y,mreal px,mreal py,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)961 HMDT MGL_EXPORT mgl_qo2d_func(mdual (*ham)(mreal u, mreal x, mreal y, mreal px, mreal py, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
962 {
963 HADT res = mgl_qo2d_func_c(ham,par,ini_re,ini_im,ray_dat,r,k0,xx,yy);
964 HMDT out = mgl_datac_abs(res); delete res; return out;
965 }
966 //-----------------------------------------------------------------------------
mgl_ham2d(mreal u,mreal x,mreal y,mreal px,mreal py,void * par)967 mdual static mgl_ham2d(mreal u, mreal x, mreal y, mreal px, mreal py, void *par)
968 {
969 mglFormula *h = (mglFormula *)par;
970 mreal var[MGL_VS]; memset(var,0,MGL_VS*sizeof(mreal));
971 var['x'-'a'] = x; var['y'-'a'] = y; var['u'-'a'] = u;
972 var['p'-'a'] = px; var['q'-'a'] = py;
973 return mdual(h->Calc(var), -h->CalcD(var,'i'));
974 }
975 //-----------------------------------------------------------------------------
mgl_qo2d_solve_c(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)976 HADT MGL_EXPORT mgl_qo2d_solve_c(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
977 {
978 mglFormula h(ham);
979 return mgl_qo2d_func_c(mgl_ham2d, &h, ini_re, ini_im, ray_dat, r, k0, xx, yy);
980 }
981 //-----------------------------------------------------------------------------
mgl_qo2d_solve(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)982 HMDT MGL_EXPORT mgl_qo2d_solve(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
983 {
984 HADT res = mgl_qo2d_solve_c(ham,ini_re,ini_im,ray_dat,r,k0,xx,yy);
985 HMDT out = mgl_datac_abs(res); delete res; return out;
986 }
987 //-----------------------------------------------------------------------------
mgl_qo2d_solve_(const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,uintptr_t * ray,mreal * r,mreal * k0,uintptr_t * xx,uintptr_t * yy,int l)988 uintptr_t MGL_EXPORT mgl_qo2d_solve_(const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, uintptr_t* ray, mreal *r, mreal *k0, uintptr_t* xx, uintptr_t* yy, int l)
989 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
990 uintptr_t res = uintptr_t(mgl_qo2d_solve(s, _DA_(ini_re), _DA_(ini_im), _DA_(ray), *r, *k0, _DM_(xx), _DM_(yy)));
991 delete []s; return res; }
992 //-----------------------------------------------------------------------------
993 //
994 // QO3d series
995 //
996 //-----------------------------------------------------------------------------
997 struct mgl_qo3d_ham
998 {
999 dual *hxy, *huv, *hxv, *huy, *a;
1000 dual *hx, *hy, *hu, *hv, h0;
1001 double *dmp, dr, dk;
1002 mreal *r;
1003 mgl_ap *ra;
1004 mdual (*ham)(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par);
1005 void *par;
1006 };
1007 //-----------------------------------------------------------------------------
mgl_qo3d_hprep(void * par)1008 static void *mgl_qo3d_hprep(void *par)
1009 {
1010 mglThreadD *t=(mglThreadD *)par;
1011 mgl_qo3d_ham *f = (mgl_qo3d_ham *)t->v;
1012 mgl_ap *ra = f->ra;
1013 const mreal *r = f->r;
1014 const long nx=t->n;
1015 #if !MGL_HAVE_PTHREAD
1016 #pragma omp parallel for
1017 #endif
1018 for(long ii=t->id;ii<nx*nx;ii+=mglNumThr)
1019 {
1020 long i = ii%nx, j = ii/nx;
1021 // x-y terms
1022 mreal x1 = (2*i-nx+1)*f->dr, x2 = (2*j-nx+1)*f->dr, hh = 1-ra->t1*x1-ra->t2*x2;
1023 hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1024 mreal tt = (ra->pt + ra->d1*x1 + ra->d2*x2)/hh - ra->pt;
1025 f->hxy[ii] = f->ham(abs(f->a[i]), r[0]+ra->x1*x1+ra->x2*x2, r[1]+ra->y1*x1+ra->y2*x2, r[2]+ra->z1*x1+ra->z2*x2, r[3]+ra->x0*tt, r[4]+ra->y0*tt, r[5]+ra->z0*tt, f->par);
1026 // x-v terms
1027 x1 = (2*i-nx+1)*f->dr; x2 = f->dk/2*(j<nx/2 ? j:j-nx); hh = 1-ra->t1*x1;
1028 hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1029 tt = (ra->pt + ra->d1*x1)/hh - ra->pt;
1030 f->hxv[ii] = f->ham(0, r[0]+ra->x1*x1, r[1]+ra->y1*x1, r[2]+ra->z1*x1, r[3]+ra->x0*tt+ra->x2*x2, r[4]+ra->y0*tt+ra->y2*x2, r[5]+ra->z0*tt+ra->z2*x2, f->par);
1031 // u-y terms
1032 x1 = f->dk/2*(i<nx/2 ? i:i-nx); x2 = (2*j-nx+1)*f->dr; hh = 1-ra->t2*x2;
1033 hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1034 tt = (ra->pt + ra->d2*x2)/hh - ra->pt;
1035 f->huy[ii] = f->ham(0, r[0]+ra->x2*x2, r[1]+ra->y2*x2, r[2]+ra->z2*x2, r[3]+ra->x1*x1+ra->x0*tt, r[4]+ra->y1*x1+ra->y0*tt, r[5]+ra->z1*x1+ra->z0*tt, f->par);
1036 // u-y terms
1037 x1 = f->dk/2*(i<nx/2 ? i:i-nx); x2 = f->dk/2*(j<nx/2 ? j:j-nx);
1038 f->huv[ii] = f->ham(0, r[0], r[1], r[2], r[3]+ra->x1*x1+ra->x2*x2, r[4]+ra->y1*x1+ra->y2*x2, r[5]+ra->z1*x1+ra->z2*x2, f->par);
1039 }
1040 return 0;
1041 }
1042 //-----------------------------------------------------------------------------
mgl_qo3d_post(void * par)1043 static void *mgl_qo3d_post(void *par)
1044 {
1045 mglThreadD *t=(mglThreadD *)par;
1046 mgl_qo3d_ham *f = (mgl_qo3d_ham *)t->v;
1047 const long nx=t->n;
1048 #if !MGL_HAVE_PTHREAD
1049 #pragma omp parallel for
1050 #endif
1051 for(long ii=t->id;ii<nx*nx;ii+=mglNumThr)
1052 {
1053 long i = ii%nx, j = ii/nx;
1054 f->hxy[ii] -= (f->hx[i]+f->hy[j]-f->h0/mreal(2))/mreal(2);
1055 if(imag(f->hxy[ii])>0) f->hxy[ii] = f->hxy[ii].real();
1056 f->hxv[ii] -= (f->hx[i]+f->hv[j]-f->h0/mreal(2))/mreal(2);
1057 if(imag(f->hxv[ii])>0) f->hxv[ii] = f->hxv[ii].real();
1058 f->huy[ii] -= (f->hu[i]+f->hy[j]-f->h0/mreal(2))/mreal(2);
1059 if(imag(f->huy[ii])>0) f->huy[ii] = f->huy[ii].real();
1060 f->huv[ii] -= (f->hu[i]+f->hv[j]-f->h0/mreal(2))/mreal(2);
1061 if(imag(f->huv[ii])>0) f->huv[ii] = f->huv[ii].real();
1062 // add boundary conditions for x-direction
1063 f->hxy[ii] -= dual(0,f->dmp[ii]);
1064 }
1065 return 0;
1066 }
1067 //-----------------------------------------------------------------------------
mgl_qo3d_func_c(mdual (* ham)(mreal u,mreal x,mreal y,mreal z,mreal px,mreal py,mreal pz,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1068 HADT MGL_EXPORT mgl_qo3d_func_c(mdual (*ham)(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1069 {
1070 const mglData *ray=dynamic_cast<const mglData *>(ray_dat); // NOTE: Ray must be mglData!
1071 if(!ray) return 0;
1072 const long nx=ini_re->GetNx(), nt=ray->ny, n7=ray->nx; // NOTE: only square grids are supported now (for simplicity)
1073 if(nx<2 || ini_re->GetNx()!=nx || ini_im->GetNx()*ini_im->GetNy()!=nx*nx || nt<2) return 0;
1074 mglDataC *res=new mglDataC(nx,nx,nt);
1075
1076 dual *a=new dual[4*nx*nx], *huv=new dual[4*nx*nx], *hxy=new dual[4*nx*nx], *huy=new dual[4*nx*nx], *hxv=new dual[4*nx*nx];
1077 dual *hu=new dual[2*nx], *hx=new dual[2*nx], *hy=new dual[2*nx], *hv=new dual[2*nx];
1078 double *dmp=new double[4*nx*nx];
1079 mgl_ap *ra = new mgl_ap[nt];
1080 mgl_init_ra(nt, n7, ray->a, ra); // prepare ray
1081
1082 double dr = r/(nx-1), dk = M_PI*(nx-1)/(k0*r*nx);
1083 memset(dmp,0,4*nx*nx*sizeof(double));
1084 #pragma omp parallel for collapse(2)
1085 for(long i=0;i<nx/2;i++) for(long j=0;j<nx/2;j++) // prepare damping
1086 {
1087 double x1 = (nx/2-i)/(nx/2.), x2 = (nx/2-j)/(nx/2.);
1088 dmp[2*nx-1-i] = dmp[i] = 30*GAMMA*x1*x1/k0;
1089 dmp[(2*nx-1-j)*2*nx] += 30*GAMMA*x2*x2/k0;
1090 dmp[j*2*nx] += 30*GAMMA*x2*x2/k0;
1091 }
1092 #pragma omp parallel for collapse(2)
1093 for(long i=0;i<nx;i++) for(long j=0;j<nx;j++) // init
1094 a[i+nx/2+2*nx*(j+nx/2)] = dual(ini_re->v(i,j),ini_im->v(i,j));
1095 void *wtx = mgl_fft_alloc(2*nx,0,0);
1096 if(xx && yy && zz) { xx->Create(nx,nx,nt); yy->Create(nx,nx,nt); zz->Create(nx,nx,nt); }
1097
1098 mgl_qo3d_ham tmp; // parameters for Hamiltonian calculation
1099 tmp.hxy=hxy; tmp.hx=hx; tmp.huv=huv; tmp.hu=hu;
1100 tmp.huy=huy; tmp.hy=hy; tmp.hxv=hxv; tmp.hv=hv;
1101 tmp.dmp=dmp; tmp.par=par;
1102 tmp.dr=dr; tmp.dk=dk; tmp.ham=ham; tmp.a=a;
1103 // start calculation
1104 for(long k=0;k<nt;k++)
1105 {
1106 #pragma omp parallel for collapse(2)
1107 for(long i=0;i<nx;i++) for(long j=0;j<nx;j++) // "save"
1108 res->a[i+nx*(j+k*nx)]=a[i+nx/2+2*nx*(j+nx/2)]*mreal(sqrt(ra[0].ch/ra[k].ch));
1109 if(xx && yy && zz)
1110 #pragma omp parallel for collapse(2)
1111 for(long i=0;i<nx;i++) for(long j=0;j<nx;j++) // prepare xx, yy, zz
1112 {
1113 mreal x1 = (2*i-nx+1)*dr, x2 = (2*j-nx+1)*dr;
1114 xx->a[i+nx*(j+k*nx)] = ray->a[n7*k] + ra[k].x1*x1 + ra[k].x2*x2; // new coordinates
1115 yy->a[i+nx*(j+k*nx)] = ray->a[n7*k+1] + ra[k].y1*x1 + ra[k].y2*x2;
1116 zz->a[i+nx*(j+k*nx)] = ray->a[n7*k+2] + ra[k].z1*x1 + ra[k].z2*x2;
1117 }
1118 tmp.r=ray->a+n7*k; tmp.ra=ra+k;
1119 mglStartThread(mgl_qo3d_hprep,0,2*nx,0,0,0,0,&tmp); tmp.h0 = huv[0];
1120 for(long i=0;i<2*nx;i++) // fill intermediate arrays
1121 {
1122 tmp.hx[i] = hxv[i]; tmp.hy[i] = huy[i*2*nx];
1123 tmp.hv[i] = huv[i]; tmp.hu[i] = huv[i*2*nx];
1124 }
1125 mglStartThread(mgl_qo3d_post,0,2*nx,0,0,0,0,&tmp);
1126 // Step for field
1127 dual dt = dual(0, -ra[k].dt*k0);
1128 #pragma omp parallel
1129 {
1130 void *wsx = mgl_fft_alloc_thr(2*nx);
1131 #pragma omp for
1132 for(long i=0;i<4*nx*nx;i++) a[i] *= exp(hxy[i]*dt); // x-y
1133 #pragma omp for
1134 for(long i=0;i<2*nx;i++) // x->u
1135 mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, false);
1136 #pragma omp for
1137 for(long i=0;i<4*nx*nx;i++) a[i] *= exp(huy[i]*dt); // u-y
1138 #pragma omp for
1139 for(long i=0;i<2*nx;i++) // y->v
1140 mgl_fft((double *)(a+i), 2*nx, 2*nx, wtx, wsx, false);
1141 #pragma omp for
1142 for(long i=0;i<4*nx*nx;i++) a[i] *= exp(huv[i]*dt); // u-v
1143 #pragma omp for
1144 for(long i=0;i<2*nx;i++) // u->x
1145 mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, true);
1146 #pragma omp for
1147 for(long i=0;i<4*nx*nx;i++) a[i] *= exp(hxv[i]*dt); // x-v
1148 #pragma omp for
1149 for(long i=0;i<2*nx;i++) // v->y
1150 mgl_fft((double *)(a+i), 2*nx, 2*nx, wtx, wsx, true);
1151 mgl_fft_free_thr(wsx);
1152 }
1153
1154 /* // Calculate B1 // TODO make more general scheme later!!!
1155 hh = ra[k].pt*(1/sqrt(sqrt(1.041))-1);
1156 var['x'-'a'] = ray->a[n7*k]; // new coordiantes
1157 var['y'-'a'] = ray->a[n7*k+1];
1158 var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh; // new momentums
1159 var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
1160 tt = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
1161 var['x'-'a'] = ray->a[n7*k] + ra[k].x1*dr; // new coordiantes
1162 var['y'-'a'] = ray->a[n7*k+1] + ra[k].y1*dr;
1163 hh = 1 - ra[k].t1*dr; hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1164 hh = (ra[k].ch*ra[k].pt + ra[k].d1*dr)/(hh*ra[k].ch) - ra[k].pt;
1165 var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh; // new momentums
1166 var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
1167 B1 = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
1168 B1 = (B1-tt)/dr;
1169 double a1=0, a2=0;
1170 for(i=0;i<2*nx;i++) a1 += norm(a[i]);
1171 hx[0] = hx[2*nx-1] = 0.;
1172 for(i=1;i<2*nx-1;i++) hx[i] = (B1*ra[k].dt*(i-nx))*(a[i+1]-a[i-1]);
1173 for(i=0;i<2*nx;i++) { a[i] += hx[i]; a2 += norm(a[i]); }
1174 a1 = sqrt(a1/a2);
1175 for(i=0;i<2*nx;i++) a[i] *= a1;*/
1176 }
1177 mgl_fft_free(wtx,0,0);
1178 delete []a; delete []ra; delete []dmp;
1179 delete []huv; delete []hxy; delete []hxv; delete []huy;
1180 delete []hu; delete []hx; delete []hv; delete []hy;
1181 return res;
1182 }
1183 //-----------------------------------------------------------------------------
mgl_qo3d_func(mdual (* ham)(mreal u,mreal x,mreal y,mreal z,mreal px,mreal py,mreal pz,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1184 HMDT MGL_EXPORT mgl_qo3d_func(mdual (*ham)(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1185 {
1186 HADT res = mgl_qo3d_func_c(ham,par,ini_re,ini_im,ray_dat,r,k0,xx,yy,zz);
1187 HMDT out = mgl_datac_abs(res); delete res; return out;
1188 }
1189 //-----------------------------------------------------------------------------
mgl_ham3d(mreal u,mreal x,mreal y,mreal z,mreal px,mreal py,mreal pz,void * par)1190 mdual static mgl_ham3d(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par)
1191 {
1192 mglFormula *h = (mglFormula *)par;
1193 mreal var[MGL_VS]; memset(var,0,MGL_VS*sizeof(mreal));
1194 var['x'-'a'] = x; var['y'-'a'] = y; var['z'-'a'] = z; var['u'-'a'] = u;
1195 var['p'-'a'] = px; var['q'-'a'] = py; var['v'-'a'] = pz;
1196 return mdual(h->Calc(var), -h->CalcD(var,'i'));
1197 }
1198 //-----------------------------------------------------------------------------
mgl_qo3d_solve_c(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1199 HADT MGL_EXPORT mgl_qo3d_solve_c(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1200 {
1201 mglFormula h(ham);
1202 return mgl_qo3d_func_c(mgl_ham3d, &h, ini_re, ini_im, ray_dat, r, k0, xx, yy, zz);
1203 }
1204 //-----------------------------------------------------------------------------
mgl_qo3d_solve(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1205 HMDT MGL_EXPORT mgl_qo3d_solve(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1206 {
1207 HADT res = mgl_qo3d_solve_c(ham,ini_re,ini_im,ray_dat,r,k0,xx,yy,zz);
1208 HMDT out = mgl_datac_abs(res); delete res; return out;
1209 }
1210 //-----------------------------------------------------------------------------
mgl_qo3d_solve_(const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,uintptr_t * ray,mreal * r,mreal * k0,uintptr_t * xx,uintptr_t * yy,uintptr_t * zz,int l)1211 uintptr_t MGL_EXPORT mgl_qo3d_solve_(const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, uintptr_t* ray, mreal *r, mreal *k0, uintptr_t* xx, uintptr_t* yy, uintptr_t* zz, int l)
1212 { char *s=new char[l+1]; memcpy(s,ham,l); s[l]=0;
1213 uintptr_t res = uintptr_t(mgl_qo3d_solve(s, _DA_(ini_re), _DA_(ini_im), _DA_(ray), *r, *k0, _DM_(xx), _DM_(yy), _DM_(zz)));
1214 delete []s; return res; }
1215 //-----------------------------------------------------------------------------
1216 //
1217 // mglJacobian series
1218 //
1219 //-----------------------------------------------------------------------------
mgl_jacob2(void * par)1220 static void *mgl_jacob2(void *par)
1221 {
1222 mglThreadD *t=(mglThreadD *)par;
1223 const long nx=t->p[0], ny=t->p[1];
1224 mreal *r=t->a;
1225 const mreal *x=t->b, *y=t->c;
1226 #if !MGL_HAVE_PTHREAD
1227 #pragma omp parallel for
1228 #endif
1229 for(long i0=t->id;i0<t->n;i0+=mglNumThr)
1230 {
1231 long i=i0%nx, j=i0/nx;
1232 long ip = i<nx-1 ? 1:0, jp = j<ny-1 ? nx:0;
1233 long im = i>0 ? -1:0, jm = j>0 ? -nx:0;
1234 r[i0] = (x[i0+ip]-x[i0+im])*(y[i0+jp]-y[i0+jm]) -
1235 (y[i0+ip]-y[i0+im])*(x[i0+jp]-x[i0+jm]);
1236 r[i0] *= mreal((nx-1)*(ny-1)) / mreal((ip-im)*(jp-jm));
1237 }
1238 return 0;
1239 }
mgl_jacobian_2d(HCDT x,HCDT y)1240 HMDT MGL_EXPORT mgl_jacobian_2d(HCDT x, HCDT y)
1241 {
1242 const long nx = x->GetNx(), ny=x->GetNy();
1243 if(nx!=y->GetNx() || ny!=y->GetNy() || nx<2 || ny<2) return 0;
1244 mglData *r=new mglData(nx,ny,1);
1245 const mglData *xx=dynamic_cast<const mglData *>(x);
1246 const mglData *yy=dynamic_cast<const mglData *>(y);
1247 if(xx && yy)
1248 {
1249 long p[2]={nx,ny};
1250 mglStartThread(mgl_jacob2,0,nx*ny,r->a,xx->a,yy->a,p);
1251 }
1252 else // slow variant
1253 {
1254 #pragma omp parallel for collapse(2)
1255 for(long j=0;j<ny;j++) for(long i=0;i<nx;i++)
1256 {
1257 long im = i>0 ? i-1:i, ip = i<nx-1 ? i+1:i;
1258 long jm = j>0 ? j-1:j, jp = j<ny-1 ? j+1:j;
1259 r->a[i+nx*j] = (x->v(ip,j)-x->v(im,j))*(y->v(i,jp)-y->v(i,jm)) -
1260 (y->v(ip,j)-y->v(im,j))*(x->v(i,jp)-x->v(i,jm));
1261 r->a[i+nx*j] *= mreal((nx-1)*(ny-1)) / mreal((ip-im)*(jp-jm));
1262 }
1263 }
1264 return r;
1265 }
1266 //-----------------------------------------------------------------------------
mgl_jacob3(void * par)1267 static void *mgl_jacob3(void *par)
1268 {
1269 mglThreadD *t=(mglThreadD *)par;
1270 const long nx=t->p[0], ny=t->p[1], nz=t->p[2];
1271 mreal *r=t->a;
1272 const mreal *x=t->b, *y=t->c, *z=t->d;
1273 #if !MGL_HAVE_PTHREAD
1274 #pragma omp parallel for
1275 #endif
1276 for(long i0=t->id;i0<t->n;i0+=mglNumThr)
1277 {
1278 long i=i0%nx, j=(i0/nx)%ny, k=i0/(nx*ny);
1279 long ip = i<nx-1 ? 1:0, jp = j<ny-1 ? nx:0, kp = k<nz-1 ? nx*ny:0;
1280 long im = i>0 ? -1:0, jm = j>0 ? -nx:0, km = k>0 ? -nx*ny:0;
1281 r[i0] = (x[i0+ip]-x[i0+im])*(y[i0+jp]-y[i0+jm])*(z[i0+kp]-z[i0+km]) -
1282 (x[i0+ip]-x[i0+im])*(y[i0+kp]-y[i0+km])*(z[i0+jp]-z[i0+jm]) -
1283 (x[i0+jp]-x[i0+jm])*(y[i0+ip]-y[i0+im])*(z[i0+kp]-z[i0+km]) +
1284 (x[i0+jp]-x[i0+jm])*(y[i0+kp]-y[i0+km])*(z[i0+ip]-z[i0+im]) +
1285 (x[i0+kp]-x[i0+km])*(y[i0+ip]-y[i0+im])*(z[i0+jp]-z[i0+jm]) -
1286 (x[i0+kp]-x[i0+km])*(y[i0+jp]-y[i0+jm])*(z[i0+ip]-z[i0+im]);
1287 r[i0] *= mreal((nx-1)*(ny-1)*(nz-1)) / mreal((ip-im)*(jp-jm)*(kp-km));
1288 }
1289 return 0;
1290 }
mgl_jacobian_3d(HCDT x,HCDT y,HCDT z)1291 HMDT MGL_EXPORT mgl_jacobian_3d(HCDT x, HCDT y, HCDT z)
1292 {
1293 const long nx = x->GetNx(), ny=x->GetNy(), nz=x->GetNz(), nn = nx*ny*nz;
1294 if(nx<2 || ny<2 || nz<2) return 0;
1295 if(nn!=y->GetNN() || nn!=z->GetNN()) return 0;
1296 mglData *r=new mglData(nx,ny,nz);
1297 const mglData *xx=dynamic_cast<const mglData *>(x);
1298 const mglData *yy=dynamic_cast<const mglData *>(y);
1299 const mglData *zz=dynamic_cast<const mglData *>(z);
1300 if(xx && yy && zz)
1301 {
1302 long p[3]={nx,ny,nz};
1303 mglStartThread(mgl_jacob3,0,nx*ny*nz,r->a,xx->a,yy->a,p,0,zz->a);
1304 }
1305 else // slow variant
1306 {
1307 #pragma omp parallel for collapse(3)
1308 for(long k=0;k<nz;k++) for(long j=0;j<ny;j++) for(long i=0;i<nx;i++)
1309 {
1310 long im = i>0 ? i-1:i, ip = i<nx-1 ? i+1:i;
1311 long jm = j>0 ? j-1:j, jp = j<ny-1 ? j+1:j;
1312 long km = k>0 ? k-1:k, kp = k<nz-1 ? k+1:k;
1313 long i0 = i+nx*(j+ny*k);
1314 r->a[i0] = (x->v(ip,j,k)-x->v(im,j,k))*(y->v(i,jp,k)-y->v(i,jm,k))*(z->v(i,j,kp)-z->v(i,j,km)) -
1315 (x->v(ip,j,k)-x->v(im,j,k))*(y->v(i,j,kp)-y->v(i,j,km))*(z->v(i,jp,k)-z->v(i,jm,k)) -
1316 (x->v(i,jp,k)-x->v(i,jm,k))*(y->v(ip,j,k)-y->v(im,j,k))*(z->v(i,j,kp)-z->v(i,j,km)) +
1317 (x->v(i,jp,k)-x->v(i,jm,k))*(y->v(i,j,kp)-y->v(i,j,km))*(z->v(ip,j,k)-z->v(im,j,k)) +
1318 (x->v(i,j,kp)-x->v(i,j,km))*(y->v(ip,j,k)-y->v(im,j,k))*(z->v(i,jp,k)-z->v(i,jm,k)) -
1319 (x->v(i,j,kp)-x->v(i,j,km))*(y->v(i,jp,k)-y->v(i,jm,k))*(z->v(ip,j,k)-z->v(im,j,k));
1320 r->a[i0] *= mreal((nx-1)*(ny-1)*(nz-1)) / mreal((ip-im)*(jp-jm)*(kp-km));
1321 }
1322
1323 }
1324 return r;
1325 }
1326 //-----------------------------------------------------------------------------
mgl_jacobian_2d_(uintptr_t * x,uintptr_t * y)1327 uintptr_t MGL_EXPORT mgl_jacobian_2d_(uintptr_t* x, uintptr_t* y)
1328 { return uintptr_t(mgl_jacobian_2d(_DA_(x), _DA_(y))); }
mgl_jacobian_3d_(uintptr_t * x,uintptr_t * y,uintptr_t * z)1329 uintptr_t MGL_EXPORT mgl_jacobian_3d_(uintptr_t* x, uintptr_t* y, uintptr_t* z)
1330 { return uintptr_t(mgl_jacobian_3d(_DA_(x), _DA_(y), _DA_(z))); }
1331 //-----------------------------------------------------------------------------
1332 //
1333 // Progonka
1334 //
1335 //-----------------------------------------------------------------------------
mgl_progonka_sr(HCDT A,HCDT B,HCDT C,HCDT D,mreal * dat,long n,long id,long i0,long di,bool difr)1336 void static mgl_progonka_sr(HCDT A, HCDT B, HCDT C, HCDT D, mreal *dat, long n, long id, long i0, long di, bool difr)
1337 {
1338 mreal *aa=dat, *bb=dat+n, *uu=dat+2*n;
1339 mreal b0=B->vthr(i0), c0=C->vthr(i0), d0=D->vthr(id);
1340 if(difr) d0 = (2.-b0)*d0-c0*D->vthr(id+di);
1341 aa[0] = -c0/b0; bb[0] = d0/b0;
1342 for(long i=1;i<n;i++)
1343 {
1344 long ii=i0+di*i, dd=id+di*i, tt = id+di*((i+1)%n);
1345 mreal a=A->vthr(ii), b=B->vthr(ii), c=C->vthr(ii);
1346 mreal d=difr?-a*D->vthr(dd-di)+(2.-b)*D->vthr(dd)-c*D->vthr(tt):D->vthr(dd);
1347 aa[i] = -c/(b+a*aa[i-1]);
1348 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1349 }
1350 uu[n-1] = bb[n-1];
1351 for(long i=n-2;i>=0;i--) uu[i] = bb[i]+aa[i]*uu[i+1];
1352 }
mgl_progonka_pr(HCDT A,HCDT B,HCDT C,HCDT D,mreal * dat,long n,long id,long i0,long di,bool difr)1353 void static mgl_progonka_pr(HCDT A, HCDT B, HCDT C, HCDT D, mreal *dat, long n, long id, long i0, long di, bool difr)
1354 {
1355 mreal *aa=dat, *bb=dat+n, *gg=dat+2*n, *uu=dat+3*n;
1356 mreal a0=A->vthr(i0), b0=B->vthr(i0), c0=C->vthr(i0), d0=D->vthr(id);
1357 if(difr) d0 = -a0*D->vthr(id+di*(n-1))+(2.-b0)*d0-c0*D->vthr(id+di);
1358 aa[0] =-c0/b0; bb[0] = d0/b0; gg[0] =-a0/b0;
1359 for(long i=1;i<n;i++)
1360 {
1361 long ii=i0+di*i, il=id+di*((i+1)%n), dd=id+di*i;
1362 mreal a=A->vthr(ii), b=B->vthr(ii), c=C->vthr(ii);
1363 mreal d=difr?-a*D->vthr(dd-di)+(2.-b)*D->vthr(dd)-c*D->vthr(il):D->vthr(dd);
1364 aa[i] = -c/(b+a*aa[i-1]);
1365 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1366 gg[i] = -a*gg[i-1]/(b+a*aa[i-1]);
1367 }
1368 mreal P=bb[n-1]/(1.-gg[n-1]), Q=aa[n-1]/(1.-gg[n-1]);
1369 aa[n-1] = Q; bb[n-1] = P;
1370 for(long i=n-2;i>=0;i--)
1371 {
1372 bb[i] += aa[i]*bb[i+1]+gg[i]*P;
1373 aa[i] = aa[i]*aa[i+1]+gg[i]*Q;
1374 }
1375 mreal u0 = bb[0]/(1.-aa[0]);
1376 for(long i=0;i<n;i++) uu[i]=bb[i]+aa[i]*u0;
1377 }
mgl_progonka_hr(HCDT A,HCDT B,HCDT C,HCDT D,mreal * dat,long n,long id,long i0,bool difr)1378 void static mgl_progonka_hr(HCDT A, HCDT B, HCDT C, HCDT D, mreal *dat, long n, long id, long i0, bool difr)
1379 {
1380 mreal *aa=dat, *bb=dat+n, *uu=dat+n*n;
1381 mreal b0=B->vthr(i0), c0=C->vthr(i0), d0=D->vthr(id);
1382 uu[0] = d0/b0*(difr?(2.-b0):1.);
1383 b0=B->vthr(i0+n*n-1); d0=D->vthr(id+n*n-1);
1384 uu[n*n-1] = d0/b0*(difr?(2.-b0):1.);
1385 long di = n-1, i1 = i0+n*(n-1), d1 = id+n*(n-1);
1386 // suppose the square grid!
1387 for(long j=1;j<n;j++)
1388 {
1389 // first bottom-left triangle
1390 b0=B->vthr(i0+j); c0=C->vthr(i0+j); d0=D->vthr(id+j);
1391 if(difr) d0 = (2.-b0)*d0-c0*D->vthr(id+j+di);
1392 aa[0] = -c0/b0; bb[0] = d0/b0;
1393 for(long i=1;i<=j;i++)
1394 {
1395 long ii=i0+j+di*i, dd=id+j+di*i;
1396 mreal a=A->vthr(ii),b=B->vthr(ii),c=C->vthr(ii);
1397 mreal d=difr?-a*D->vthr(dd-di)+(2.-b)*D->vthr(dd)-c*D->vthr(dd+di):D->vthr(dd);
1398 aa[i] = -c/(b+a*aa[i-1]);
1399 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1400 }
1401 uu[j+di*(j-1)] = bb[j];
1402 for(long i=j-1;i>=0;i--)
1403 uu[j+di*i] = bb[i]+aa[i]*uu[j+di*i+di];
1404 // next top-right triangle
1405 long j1=n-1-j;
1406 b0=B->vthr(i1+j1); c0=C->vthr(i1+j1); d0=D->vthr(d1+j1);
1407 if(difr) d0 = (2.-b0)*d0-c0*D->vthr(d1+j1-di);
1408 aa[0] = -c0/b0; bb[0] = d0/b0;
1409 for(long i=1;i<=j;i++)
1410 {
1411 long ii=i1+j1-di*i, dd=d1+j1-di*i;
1412 mreal a=A->vthr(ii),b=B->vthr(ii),c=C->vthr(ii);
1413 mreal d=difr?-a*D->vthr(dd+di)+(2.-b)*D->vthr(dd)-c*D->vthr(dd-di):D->vthr(dd);
1414 aa[i] = -c/(b+a*aa[i-1]);
1415 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1416 }
1417 uu[j1+n*(n-1)-di*(j-1)] = bb[j];
1418 for(long i=j-1;i>=0;i--)
1419 uu[j1+n*(n-1)-di*i] = bb[i]+aa[i]*uu[j1+n*(n-1)-di*i-di];
1420 }
1421 }
1422 //-----------------------------------------------------------------------------
mgl_data_tridmat(HCDT A,HCDT B,HCDT C,HCDT D,const char * how)1423 HMDT MGL_EXPORT mgl_data_tridmat(HCDT A, HCDT B, HCDT C, HCDT D, const char *how)
1424 {
1425 const long nx=D->GetNx(),ny=D->GetNy(),nz=D->GetNz();
1426 const long nn=nx*ny*nz, np=nx*ny, na=A->GetNN();
1427 if(B->GetNN()!=na || C->GetNN()!=na) return 0;
1428 mglData *r = new mglData(nx,ny,nz);
1429 bool per = mglchr(how,'c');
1430 bool difr = mglchr(how,'d');
1431 if(mglchr(how,'x') && (na==nn || na==np || na==nx))
1432 #pragma omp parallel
1433 {
1434 mreal *tt = new mreal[4*nx], *uu=tt+(per?3:2)*nx;
1435 // mglData T(nx,4); mreal *uu=T.a+(per?3:2)*nx;
1436 #pragma omp for collapse(2)
1437 for(long k=0;k<nz;k++) for(long j=0;j<ny;j++)
1438 {
1439 long i0=0, i1=nx*(j+ny*k);
1440 if(na==nn) i0=nx*(j+ny*k); else if(na==np) i0=nx*j;
1441 if(per) mgl_progonka_pr(A,B,C,D,tt,nx,i1,i0,1,difr);
1442 else mgl_progonka_sr(A,B,C,D,tt,nx,i1,i0,1,difr);
1443 i0 = nx*(j+ny*k);
1444 for(long i=0;i<nx;i++) r->a[i+i0] = uu[i];
1445 }
1446 delete []tt;
1447 }
1448 else if(mglchr(how,'y') && (na==nn || na==np || na==ny))
1449 #pragma omp parallel
1450 {
1451 mreal *tt = new mreal[4*ny], *uu=tt+(per?3:2)*ny;
1452 // / mglData T(ny,4); mreal *uu=T.a+(per?3:2)*ny;
1453 #pragma omp for collapse(2)
1454 for(long k=0;k<nz;k++) for(long i=0;i<nx;i++)
1455 {
1456 long i0=0, i1 = i+np*k;
1457 if(na==nn) i0=i+np*k; else if(na==np) i0=i;
1458 if(per) mgl_progonka_pr(A,B,C,D,tt,ny,i1,i0,nx,difr);
1459 else mgl_progonka_sr(A,B,C,D,tt,ny,i1,i0,nx,difr);
1460 i0 = i+np*k;
1461 for(long j=0;j<ny;j++) r->a[j*nx+i0] = uu[j];
1462 }
1463 delete []tt;
1464 }
1465 else if(mglchr(how,'z') && (na==nn || na==nz))
1466 #pragma omp parallel
1467 {
1468 mreal *tt = new mreal[4*nz], *uu=tt+(per?3:2)*nz;
1469 // mglData T(nz,4); mreal *uu=T.a+(per?3:2)*nz;
1470 #pragma omp for collapse(2)
1471 for(long j=0;j<ny;j++) for(long i=0;i<nx;i++)
1472 {
1473 long i0 = na==nn?i+nx*j:0, i1 = i+nx*j;
1474 if(per) mgl_progonka_pr(A,B,C,D,tt,nz,i1,i0,np,difr);
1475 else mgl_progonka_sr(A,B,C,D,tt,nz,i1,i0,np,difr);
1476 i0 = i+nx*j;
1477 for(long k=0;k<nz;k++) r->a[k*np+i0] = uu[k];
1478 }
1479 delete []tt;
1480 }
1481 else if(mglchr(how,'h') && ny==nx && (na==nn || na==np) && nx>1)
1482 #pragma omp parallel
1483 {
1484 mreal *tt = new mreal[2*np];
1485 // mglData T(np,2);
1486 #pragma omp for
1487 for(long k=0;k<nz;k++)
1488 {
1489 mgl_progonka_hr(A,B,C,D,tt,nx,k*np,na==nn ? k*np:0,difr);
1490 memcpy(r->a+k*np, tt+np, np*sizeof(mreal));
1491 }
1492 delete []tt;
1493 }
1494 else { delete r; r=0; }
1495 return r;
1496 }
1497 //-----------------------------------------------------------------------------
mgl_data_tridmat_(uintptr_t * A,uintptr_t * B,uintptr_t * C,uintptr_t * D,const char * how,int l)1498 uintptr_t MGL_EXPORT mgl_data_tridmat_(uintptr_t *A, uintptr_t *B, uintptr_t *C, uintptr_t *D, const char *how, int l)
1499 { char *s=new char[l+1]; memcpy(s,how,l); s[l]=0;
1500 uintptr_t r = uintptr_t(mgl_data_tridmat(_DA_(A),_DA_(B),_DA_(C),_DA_(D),s));
1501 delete []s; return r;
1502 }
1503 //-----------------------------------------------------------------------------
mgl_progonka_sc(HCDT A,HCDT B,HCDT C,HCDT D,dual * dat,long n,long id,long i0,long di,bool difr)1504 void static mgl_progonka_sc(HCDT A, HCDT B, HCDT C, HCDT D, dual *dat, long n, long id, long i0, long di, bool difr)
1505 {
1506 dual *aa=dat, *bb=dat+n, *uu=dat+2*n;
1507 dual b0=B->vcthr(i0), c0=C->vcthr(i0), d0=D->vcthr(id);
1508 if(difr) d0 = (mreal(2)-b0)*d0-c0*D->vcthr(id+di);
1509 aa[0] = -c0/b0; bb[0] = d0/b0;
1510 for(long i=1;i<n;i++)
1511 {
1512 long ii=i0+di*i, dd=id+di*i, tt = id+di*((i+1)%n);
1513 dual a=A->vcthr(ii), b=B->vcthr(ii), c=C->vcthr(ii);
1514 dual d=difr?-a*D->vcthr(dd-di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(tt):D->vcthr(dd);
1515 aa[i] = -c/(b+a*aa[i-1]);
1516 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1517 }
1518 uu[n-1] = bb[n-1];
1519 for(long i=n-2;i>=0;i--) uu[i] = bb[i]+aa[i]*uu[i+1];
1520 }
mgl_progonka_pc(HCDT A,HCDT B,HCDT C,HCDT D,dual * dat,long n,long id,long i0,long di,bool difr)1521 void static mgl_progonka_pc(HCDT A, HCDT B, HCDT C, HCDT D, dual *dat, long n, long id, long i0, long di, bool difr)
1522 {
1523 dual *aa=dat, *bb=dat+n, *gg=dat+2*n, *uu=dat+3*n;
1524 dual a0=A->vcthr(i0), b0=B->vcthr(i0), c0=C->vcthr(i0), d0=D->vcthr(id);
1525 if(difr) d0 = -a0*D->vcthr(id+di*(n-1))+(mreal(2)-b0)*d0-c0*D->vcthr(id+di);
1526 aa[0] =-c0/b0; bb[0] = d0/b0; gg[0] =-a0/b0;
1527 for(long i=1;i<n;i++)
1528 {
1529 long ii=i0+di*i, il=id+di*((i+1)%n), dd=id+di*i;
1530 dual a=A->vcthr(ii), b=B->vcthr(ii), c=C->vcthr(ii);
1531 dual d=difr?-a*D->vcthr(dd-di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(il):D->vcthr(dd);
1532 aa[i] = -c/(b+a*aa[i-1]);
1533 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1534 gg[i] = -a*gg[i-1]/(b+a*aa[i-1]);
1535 }
1536 dual P=bb[n-1]/(mreal(1)-gg[n-1]), Q=aa[n-1]/(mreal(1)-gg[n-1]);
1537 aa[n-1] = Q; bb[n-1] = P;
1538 for(long i=n-2;i>=0;i--)
1539 {
1540 bb[i] += aa[i]*bb[i+1]+gg[i]*P;
1541 aa[i] = aa[i]*aa[i+1]+gg[i]*Q;
1542 }
1543 dual u0 = bb[0]/(mreal(1)-aa[0]);
1544 for(long i=0;i<n;i++) uu[i]=bb[i]+aa[i]*u0;
1545 }
mgl_progonka_hc(HCDT A,HCDT B,HCDT C,HCDT D,dual * dat,long n,long id,long i0,bool difr)1546 void static mgl_progonka_hc(HCDT A, HCDT B, HCDT C, HCDT D, dual *dat, long n, long id, long i0, bool difr)
1547 {
1548 dual *aa=dat, *bb=dat+n, *uu=dat+n*n;
1549 dual b0=B->vcthr(i0), c0=C->vcthr(i0), d0=D->vcthr(id);
1550 uu[0] = d0/b0*(difr?(mreal(2)-b0):mreal(1));
1551 b0=B->vcthr(i0+n*n-1); d0=D->vcthr(id+n*n-1);
1552 uu[n*n-1] = d0/b0*(difr?(mreal(2)-b0):mreal(1));
1553 long di = n-1, i1 = i0+n*(n-1), d1 = id+n*(n-1);
1554 // suppose the square grid!
1555 for(long j=1;j<n;j++)
1556 {
1557 // first bottom-left triangle
1558 b0=B->vcthr(i0+j); c0=C->vcthr(i0+j); d0=D->vcthr(id+j);
1559 if(difr) d0 = (mreal(2)-b0)*d0-c0*D->vcthr(id+j+di);
1560 aa[0] = -c0/b0; bb[0] = d0/b0;
1561 for(long i=1;i<=j;i++)
1562 {
1563 long ii=i0+j+di*i, dd=id+j+di*i;
1564 dual a=A->vcthr(ii),b=B->vcthr(ii),c=C->vcthr(ii);
1565 dual d=difr?-a*D->vcthr(dd-di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(dd+di):D->vcthr(dd);
1566 aa[i] = -c/(b+a*aa[i-1]);
1567 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1568 }
1569 uu[j+di*(j-1)] = bb[j];
1570 for(long i=j-1;i>=0;i--)
1571 uu[j+di*i] = bb[i]+aa[i]*uu[j+di*i+di];
1572 // next top-right triangle
1573 long j1=n-1-j;
1574 b0=B->vcthr(i1+j1); c0=C->vcthr(i1+j1); d0=D->vcthr(d1+j1);
1575 if(difr) d0 = (mreal(2)-b0)*d0-c0*D->vcthr(d1+j1-di);
1576 aa[0] = -c0/b0; bb[0] = d0/b0;
1577 for(long i=1;i<=j;i++)
1578 {
1579 long ii=i1+j1-di*i, dd=d1+j1-di*i;
1580 dual a=A->vcthr(ii),b=B->vcthr(ii),c=C->vcthr(ii);
1581 dual d=difr?-a*D->vcthr(dd+di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(dd-di):D->vcthr(dd);
1582 aa[i] = -c/(b+a*aa[i-1]);
1583 bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1584 }
1585 uu[j1+n*(n-1)-di*(j-1)] = bb[j];
1586 for(long i=j-1;i>=0;i--)
1587 uu[j1+n*(n-1)-di*i] = bb[i]+aa[i]*uu[j1+n*(n-1)-di*i-di];
1588 }
1589 }
1590 //-----------------------------------------------------------------------------
mgl_datac_tridmat(HCDT A,HCDT B,HCDT C,HCDT D,const char * how)1591 HADT MGL_EXPORT mgl_datac_tridmat(HCDT A, HCDT B, HCDT C, HCDT D, const char *how)
1592 {
1593 const long nx=D->GetNx(),ny=D->GetNy(),nz=D->GetNz();
1594 const long nn=nx*ny*nz, np=nx*ny, na=A->GetNN();
1595 if(B->GetNN()!=na || C->GetNN()!=na) return 0;
1596 mglDataC *r = new mglDataC(nx,ny,nz);
1597 bool per = mglchr(how,'c');
1598 bool difr = mglchr(how,'d');
1599 if(mglchr(how,'x') && (na==nn || na==np || na==nx))
1600 #pragma omp parallel
1601 {
1602 dual *tt = new dual[4*nx], *uu=tt+(per?3:2)*nx;
1603 // mglDataC T(nx,4); dual *uu=T.a+(per?3:2)*nx;
1604 #pragma omp for collapse(2)
1605 for(long k=0;k<nz;k++) for(long j=0;j<ny;j++)
1606 {
1607 long i0=0, i1=nx*(j+ny*k);
1608 if(na==nn) i0=i1; else if(na==np) i0=nx*j;
1609 if(per) mgl_progonka_pc(A,B,C,D,tt,nx,i1,i0,1,difr);
1610 else mgl_progonka_sc(A,B,C,D,tt,nx,i1,i0,1,difr);
1611 for(long i=0;i<nx;i++) r->a[i+i1] = uu[i];
1612 }
1613 delete []tt;
1614 }
1615 else if(mglchr(how,'y') && (na==nn || na==np || na==ny))
1616 #pragma omp parallel
1617 {
1618 dual *tt = new dual[4*ny], *uu=tt+(per?3:2)*ny;
1619 // mglDataC T(ny,4); dual *uu=T.a+(per?3:2)*ny;
1620 #pragma omp for collapse(2)
1621 for(long k=0;k<nz;k++) for(long i=0;i<nx;i++)
1622 {
1623 long i0=0, i1 = i+np*k;
1624 if(na==nn) i0=i1; else if(na==np) i0=i;
1625 if(per) mgl_progonka_pc(A,B,C,D,tt,ny,i1,i0,nx,difr);
1626 else mgl_progonka_sc(A,B,C,D,tt,ny,i1,i0,nx,difr);
1627 i0 = i+np*k;
1628 for(long j=0;j<ny;j++) r->a[j*nx+i0] = uu[j];
1629 }
1630 delete []tt;
1631 }
1632 else if(mglchr(how,'z') && (na==nn || na==nz))
1633 #pragma omp parallel
1634 {
1635 dual *tt = new dual[4*nz], *uu=tt+(per?3:2)*ny;
1636 // mglDataC T(nz,4); dual *uu=T.a+(per?3:2)*nz;
1637 #pragma omp for collapse(2)
1638 for(long j=0;j<ny;j++) for(long i=0;i<nx;i++)
1639 {
1640 long i0 = na==nn?i+nx*j:0, i1 = i+nx*j;
1641 if(per) mgl_progonka_pc(A,B,C,D,tt,nz,i1,i0,np,difr);
1642 else mgl_progonka_sc(A,B,C,D,tt,nz,i1,i0,np,difr);
1643 for(long k=0;k<nz;k++) r->a[k*np+i1] = uu[k];
1644 }
1645 delete []tt;
1646 }
1647 else if(mglchr(how,'h') && ny==nx && (na==nn || na==np) && nx>1)
1648 #pragma omp parallel
1649 {
1650 dual *tt = new dual[2*np];
1651 // mglDataC T(np,2);
1652 #pragma omp for
1653 for(long k=0;k<nz;k++)
1654 {
1655 mgl_progonka_hc(A,B,C,D,tt,nx,k*np, na==nn ? k*np:0,difr);
1656 memcpy(r->a+k*np, tt+np, np*sizeof(dual));
1657 }
1658 delete []tt;
1659 }
1660 else { delete r; r=0; }
1661 return r;
1662 }
1663 //-----------------------------------------------------------------------------
mgl_datac_tridmat_(uintptr_t * A,uintptr_t * B,uintptr_t * C,uintptr_t * D,const char * how,int l)1664 uintptr_t MGL_EXPORT mgl_datac_tridmat_(uintptr_t *A, uintptr_t *B, uintptr_t *C, uintptr_t *D, const char *how, int l)
1665 { char *s=new char[l+1]; memcpy(s,how,l); s[l]=0;
1666 uintptr_t r = uintptr_t(mgl_datac_tridmat(_DA_(A),_DA_(B),_DA_(C),_DA_(D),s));
1667 delete []s; return r;
1668 }
1669 //-----------------------------------------------------------------------------
1670