1 /***************************************************************************
2  * pde.cpp is part of Math Graphic Library
3  * Copyright (C) 2007-2016 Alexey Balakin <mathgl.abalakin@gmail.ru>       *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU Lesser General Public License  as       *
7  *   published by the Free Software Foundation; either version 3 of the    *
8  *   License, or (at your option) any later version.                       *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU Lesser General Public     *
16  *   License along with this program; if not, write to the                 *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "mgl2/data.h"
21 #include "mgl2/datac.h"
22 #include "mgl2/eval.h"
23 #include "mgl2/thread.h"
24 #include "mgl2/base.h"
25 #include "interp.hpp"
26 const double GAMMA=0.1;	///< value for damping
27 //-----------------------------------------------------------------------------
28 //
29 //		Advanced PDE series in 2D case
30 //
31 //-----------------------------------------------------------------------------
mgl_operator_exp(long n,const dual * h,dual * a,dual * f)32 void static mgl_operator_exp(long n, const dual *h, dual *a, dual *f)
33 {
34 	memset(f,0,2*n*sizeof(dual));
35 	const long i1=n/2, i2=3*n/2-1;
36 #pragma omp parallel for
37 	for(long j=0;j<n;j++)
38 	{
39 		long jp = (j+1)%n;
40 		dual h1=h[n*j]*dual(0,1),		g1=(h1+h[n*jp]*dual(0,1))/mreal(2);
41 		dual h2=h[n-1+n*j]*dual(0,1),	g2=(h2+h[n-1+n*jp]*dual(0,1))/mreal(2);
42 		mreal k1=M_PI*2*j/n, k2 = M_PI*(2*j+1)/n;
43 		for(long i=0;i<i1;i++)
44 		{
45 			f[2*j] += a[i]*exp(h1+dual(0,i*k1));
46 			f[2*j+1] += a[i]*exp(g1+dual(0,i*k2));
47 		}
48 		for(long i=i1;i<i2;i++)
49 		{
50 			dual hh = h[i-i1+n*j];
51 			f[2*j] += a[i]*exp(hh*dual(0,1)+dual(0,i*k1));
52 			f[2*j+1] += a[i]*exp((hh+h[i-i1+n*jp])*dual(0,0.5)+dual(0,i*k2));
53 		}
54 		for(long i=i2;i<2*n;i++)
55 		{
56 			f[2*j] += a[i]*exp(h2+dual(0,i*k1));
57 			f[2*j+1] += a[i]*exp(g2+dual(0,i*k2));
58 		}
59 	}
60 	memset(a,0,2*n*sizeof(dual));
61 #pragma omp parallel for
62 	for(long i=0;i<2*n;i++)
63 	{
64 		long ii=i-i1;
65 		if(ii<0)	ii=0;
66 		if(ii>n-1)	ii=n-1;
67 		double kk=M_PI*2*i/n;
68 		for(long j=0;j<n;j++)
69 		{
70 			dual h1 = h[ii+n*j], g1 = (h1+h[ii+n*((j+1)%n)])*dual(0,0.5);
71 			a[i] += f[2*j]*exp(h1*dual(0,1)-dual(0,kk*j));
72 			a[i] += f[2*j+1]*exp(g1-dual(0,kk*(j+0.5)));
73 		}
74 	}
75 }
76 //-----------------------------------------------------------------------------
mgl_operator_lin(long n,mreal * h,dual * a,dual * f,dual * g,dual * o,const dual * iexp)77 void static mgl_operator_lin(long n, mreal *h, dual *a, dual *f, dual *g, dual *o, const dual *iexp)
78 {
79 	memset(f,0,2*n*sizeof(dual));
80 	memset(g,0,2*n*sizeof(dual));
81 	const long i1=n/2, i2=3*n/2-1;
82 #pragma omp parallel for
83 	for(long j=0;j<n;j++)
84 	{
85 		long jp = (j+1)%n;
86 		mreal h1=tanh(h[n*j]), g1=(h1+tanh(h[n*jp]))/2;
87 		mreal h2=tanh(h[n-1+n*j]), g2=(h2+tanh(h[n-1+n*jp]))/2;
88 		const dual *ie1=iexp+4*n*j, *ie2=iexp+2*n*(2*j+1);
89 		for(long i=0;i<i1;i++)
90 		{
91 			dual e1=ie1[i], e2=ie2[i];
92 			f[2*j] += a[i]*h1*e1;	f[2*j+1] += a[i]*g1*e2;
93 			g[2*j] += a[i]*e1;		g[2*j+1] += a[i]*e2;
94 		}
95 		for(long i=i1;i<i2;i++)
96 		{
97 			mreal hh = tanh(h[i-i1+n*j]);
98 			mreal gg = (hh+tanh(h[i-i1+n*jp]))/2;
99 			dual e1=ie1[i], e2=ie2[i];
100 			f[2*j] += a[i]*hh*e1;	f[2*j+1] += a[i]*gg*e2;
101 			g[2*j] += a[i]*e1;		g[2*j+1] += a[i]*e2;
102 		}
103 		for(long i=i2;i<2*n;i++)
104 		{
105 			dual e1=ie1[i], e2=ie2[i];
106 			f[2*j] += a[i]*h2*e1;	f[2*j+1] += a[i]*g2*e2;
107 			g[2*j] += a[i]*e1;		g[2*j+1] += a[i]*e2;
108 		}
109 	}
110 	memset(o,0,2*n*sizeof(dual));
111 #pragma omp parallel for
112 	for(long i=0;i<2*n;i++)
113 	{
114 		long ii=i-i1;
115 		if(ii<0)	ii=0;
116 		if(ii>n-1)	ii=n-1;
117 		const dual *ie1=iexp+2*n*i;
118 //		double kk=M_PI*2*i/n;
119 		for(long j=0;j<n;j++)
120 		{
121 			mreal h1 = tanh(h[ii+n*j]);
122 			mreal g1 = (h1+tanh(h[ii+n*((j+1)%n)]))/2;
123 			dual e1=conj(ie1[2*j]), e2=conj(ie1[2*j+1]);
124 			o[i] += f[2*j]*e1 + f[2*j+1]*e2;
125 			o[i] += g[2*j]*h1*e1 + g[2*j+1]*g1*e2;
126 		}
127 	}
128 }
129 //-----------------------------------------------------------------------------
mgl_apde_calc_ham(HMDT hs,bool old,const char * func,std::vector<mglDataA * > list,const mreal dd)130 HADT static mgl_apde_calc_ham(HMDT hs, bool old, const char *func, std::vector<mglDataA*> list, const mreal dd)
131 {
132 	HADT ham = mglFormulaCalcC(func, list);	mgl_datac_mul_num(ham,dd);
133 	const long nx = ham->nx;
134 	if(old)
135 	{
136 		mreal hh = ham->Imag().Minimal();
137 		if(hh>0)	hh=0;
138 #pragma omp parallel for
139 		for(long i=0;i<nx*nx;i++)
140 		{
141 			hs->a[i] = sqrt(imag(ham->a[i])-hh);	// non-additive term
142 			ham->a[i] = dual(real(ham->a[i]),hh);	// additive terms
143 		}
144 	}
145 	else
146 	{
147 		mglData xIm(nx), pIm(nx);
148 #pragma omp parallel for
149 		for(long i=0;i<nx;i++)	// first find minimal values along x and p
150 		{
151 			dual *ax=ham->a+i, *ay=ham->a+i*nx;
152 			mreal mx=imag(ax[0]), my=imag(ay[0]);
153 			for(long j=1;j<nx;j++)	my = (my<imag(ay[j]))?my:imag(ay[j]);
154 			for(long j=1;j<nx;j++)	mx = (mx<imag(ax[j*nx]))?mx:imag(ax[j*nx]);
155 			xIm.a[i] = mx;	pIm.a[i]=my;
156 		}
157 		mreal mIm=xIm.a[0];	mreal *aa=xIm.a;	// global minimum
158 		for(long j=1;j<nx;j++)	mIm = (mIm<aa[j])?mIm:aa[j];
159 #pragma omp parallel for collapse(2)
160 		for(long j=0;j<nx;j++)	for(long i=0;i<nx;i++)
161 		{
162 			mreal hh = xIm.a[i]+pIm.a[j]-mIm;
163 			long i0=i+nx*j;
164 			hs->a[i0] = sqrt(fabs(imag(ham->a[i0])-hh));	// non-additive term. NOTE: fabs() guarantee absence of negative values due to rounding error
165 			ham->a[i0] = dual(real(ham->a[i0]),hh);	// additive terms
166 		}
167 	}
168 	return ham;
169 }
170 //-----------------------------------------------------------------------------
171 // Solve equation dx/dy = func(p,x,y,|u|)[u] where p=d/dx. There are no assumptions about form of func().
mgl_pde_adv_c(HMGL gr,const char * func,HCDT ini_re,HCDT ini_im,mreal dt,mreal k0,const char * opt)172 HADT MGL_EXPORT mgl_pde_adv_c(HMGL gr, const char *func, HCDT ini_re, HCDT ini_im, mreal dt, mreal k0, const char *opt)
173 {
174 	mreal gamma = gr->SaveState(opt);	if(mgl_isnan(gamma))	gamma = 20;
175 	const mglPoint &Min=gr->Min, &Max=gr->Max;
176 	const long nx=ini_re->GetNx(), nt = long((Max.y-Min.y)/dt)+1;
177 	if(nx<2 || nt<2 || Max.x==Min.x){	gr->SetWarn(mglWarnLow,"PDE");	return 0;	}	// Too small data
178 	if(ini_im->GetNx() != nx)		{	gr->SetWarn(mglWarnDim,"PDE");	return 0;	}	// Wrong dimensions
179 
180 	mglDataC *res=new mglDataC(nx, nt);
181 	mglData hIm(nx,nx);	// for advanced damping calculation
182 	mglDataC u(nx);	u.Name(L"u");
183 	mglDataV x(nx,nx), y(nx,nx), r(nx,nx);
184 	mglDataW p(nx,nx);	p.Name(L"p");
185 	bool old = func[0]==';';	if(old)	func=func+1;
186 	x.Name(L"x");	y.Name(L"y");	r.Name(L"#$mgl");
187 	const mreal dp = 2*M_PI/(Max.x-Min.x), dd = k0*dt/2;
188 	x.Fill(Min.x,Max.x,'x');	p.Freq(dp/k0,'y');
189 	std::vector<mglDataA*> list;
190 	list.push_back(&x);	list.push_back(&y);	list.push_back(&p);	list.push_back(&r);	list.push_back(&u);
191 
192 	dual *a = new dual[2*nx];	memset(a,0,2*nx*sizeof(dual));	// Add "damping" area
193 	dual *f = new dual[6*nx], *g=f+2*nx, *s=f+4*nx;
194 #pragma omp parallel for
195 	for(long i=0;i<nx;i++)	// Initial conditions
196 		a[i+nx/2] = dual(ini_re->v(i), ini_im->v(i));
197 	double *dmp = new double[2*nx];	memset(dmp,0,2*nx*sizeof(double));
198 #pragma omp parallel for
199 	for(long i=0;i<2*nx;i++)	// dumping
200 	{
201 		if(i<nx/2)		dmp[i] += gamma*mgl_ipow((nx/2-i)/mreal(nx/2),2);
202 		if(i>3*nx/2)	dmp[i] += gamma*mgl_ipow((i-3*nx/2-1)/mreal(nx/2),2);
203 	}
204 	bool have_y = mglchr(func,'y');
205 	HADT ham;
206 	if(!have_y)		ham = mgl_apde_calc_ham(&hIm, old, func, list, dd);
207 	dual *iexp = new dual[4*nx*nx];
208 #pragma omp parallel for collapse(2)
209 	for(long j=0;j<2*nx;j++)	for(long i=0;i<2*nx;i++)
210 		iexp[i+2*nx*j] = exp(dual(0,(M_PI*i*j)/nx));
211 	for(long k=0;k<nt;k++)
212 	{
213 		memcpy(u.a,a+nx/2,nx*sizeof(dual));
214 		memcpy(res->a+k*nx,a+nx/2,nx*sizeof(dual));
215 		if(have_y)
216 		{	y.Fill(k*dt);	ham = mgl_apde_calc_ham(&hIm, old, func, list, dd);	}
217 		mgl_operator_exp(nx,ham->a,a,f);
218 		mgl_operator_lin(nx,hIm.a,a,f,g,s,iexp);
219 		mgl_operator_lin(nx,hIm.a,s,f,g,s,iexp);
220 #pragma omp parallel for
221 		for(long i=0;i<2*nx;i++)
222 			a[i] = (a[i]-s[i]/mreal(8*nx*nx))*mreal(exp(-dmp[i]*dt)/2/nx);
223 		if(have_y)	delete ham;
224 	}
225 	delete []a;	delete []f;	delete []dmp;	delete []iexp;
226 	if(!have_y)	delete ham;
227 	gr->LoadState();	return res;
228 }
229 //-----------------------------------------------------------------------------
mgl_pde_adv(HMGL gr,const char * ham,HCDT ini_re,HCDT ini_im,mreal dz,mreal k0,const char * opt)230 HMDT MGL_EXPORT mgl_pde_adv(HMGL gr, const char *ham, HCDT ini_re, HCDT ini_im, mreal dz, mreal k0, const char *opt)
231 {
232 	HADT res = mgl_pde_adv_c(gr,ham,ini_re,ini_im,dz,k0,opt);
233 	HMDT out = mgl_datac_abs(res);	delete res;	return out;
234 }
235 //-----------------------------------------------------------------------------
mgl_pde_adv_c_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)236 uintptr_t MGL_EXPORT mgl_pde_adv_c_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
237 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
238 	char *o=new char[lo+1];	memcpy(o,opt,lo);	o[lo]=0;
239 	uintptr_t res = uintptr_t(mgl_pde_adv_c(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
240 	delete []o;	delete []s;	return res;	}
241 //-----------------------------------------------------------------------------
mgl_pde_adv_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)242 uintptr_t MGL_EXPORT mgl_pde_adv_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
243 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
244 	char *o=new char[lo+1];	memcpy(o,opt,lo);	o[lo]=0;
245 	uintptr_t res = uintptr_t(mgl_pde_adv(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
246 	delete []o;	delete []s;	return res;	}
247 //-----------------------------------------------------------------------------
248 //
249 //		Simplified PDE series
250 //
251 //-----------------------------------------------------------------------------
252 struct mgl_pde_ham
253 {
254 	ddual *a,*hxy,*hxv,*huv,*huy;
255 	const char *eqs;
256 	long nx,ny;
257 	double xx,yy,xs,ys,dx,dy,dq,dp,zz;
258 	double dd;
259 };
mgl_pde_hprep(const mgl_pde_ham * f)260 void static mgl_pde_hprep(const mgl_pde_ham *f)
261 {
262 	const long nx = f->nx, ny = f->ny;
263 	mglDataV x(nx,ny), y(nx,ny), z, r(nx,ny);
264 	mglDataW p(nx,ny), q(nx,ny);
265 	x.Name(L"x");	y.Name(L"y");	p.Name(L"p");	q.Name(L"q");	r.Name(L"#$mgl");
266 	z.Name(L"z");	z.Fill(f->zz);
267 	dual dd(0,f->dd);
268 	mglData u(nx,ny);	u.Name(L"u");
269 #pragma omp parallel for
270 	for(long i=0;i<nx*ny;i++)	u.a[i] = abs(f->a[i]);
271 	std::vector<mglDataA*> list;
272 	list.push_back(&x);	list.push_back(&y);	list.push_back(&z);
273 	list.push_back(&p);	list.push_back(&q);	list.push_back(&u);
274 
275 	x.Fill(f->xx,f->xx+f->dx*(nx-1),'x');	p.Freq(0,'x');
276 	y.Fill(f->yy,f->yy+f->dy*(ny-1),'y');	q.Freq(0,'y');
277 	HADT res = mglFormulaCalcC(f->eqs, list);
278 #pragma omp parallel for
279 	for(long i=0;i<nx*ny;i++)	f->hxy[i] = res->a[i]*dd;
280 	delete res;
281 	if(ny>2)
282 	{
283 		x.Fill(f->xs);	p.Freq(f->dp,'x');
284 		res = mglFormulaCalcC(f->eqs, list);
285 #pragma omp parallel for
286 		for(long i=0;i<nx*ny;i++)	f->huy[i] = res->a[i]*dd;
287 		delete res;
288 	}
289 	x.Fill(f->xs);	p.Freq(f->dp,'x');
290 	y.Fill(f->ys);	q.Freq(f->dq,'y');
291 	res = mglFormulaCalcC(f->eqs, list);
292 #pragma omp parallel for
293 	for(long i=0;i<nx*ny;i++)	f->huv[i] = res->a[i]*dd;
294 	delete res;
295 	if(ny>2)
296 	{
297 		x.Fill(f->xx,f->xx+f->dx*(nx-1),'x');	p.Freq(0,'x');
298 		res = mglFormulaCalcC(f->eqs, list);
299 #pragma omp parallel for
300 		for(long i=0;i<nx*ny;i++)	f->hxv[i] = res->a[i]*dd;
301 		delete res;
302 	}
303 }
304 //-----------------------------------------------------------------------------
305 // Solve equation dx/dz = func(p,q,x,y,z,|u|)[u] where p=d/dx, q=d/dy. At this moment simplified form of ham is supported: ham = f(p,q,z) + g(x,y,z,'u'), where variable 'u'=|u| (for allowing solve nonlinear problems). You may specify imaginary part like ham = p^2 + 1i*x*(x>0).
mgl_pde_solve_c(HMGL gr,const char * ham,HCDT ini_re,HCDT ini_im,mreal dz,mreal k0,const char * opt)306 HADT MGL_EXPORT mgl_pde_solve_c(HMGL gr, const char *ham, HCDT ini_re, HCDT ini_im, mreal dz, mreal k0, const char *opt)
307 {
308 	mreal gamma = gr->SaveState(opt);	if(mgl_isnan(gamma))	gamma = GAMMA;
309 	mglPoint Min=gr->Min, Max=gr->Max;
310 	long nx=ini_re->GetNx(), ny=ini_re->GetNy(), nz = long((Max.z-Min.z)/dz)+1;
311 	if(nx<2 || nz<2 || Max.x==Min.x)			// Too small data
312 	{	gr->SetWarn(mglWarnLow,"PDE");	return 0;	}
313 	if(ini_im->GetNx()*ini_im->GetNy() != nx*ny)// Wrong dimensions
314 	{	gr->SetWarn(mglWarnDim,"PDE");	return 0;	}
315 	mglDataC *res=new mglDataC(nz, nx, ny);
316 
317 	ddual *a = new ddual[4*nx*ny], hh0;	// Add "damping" area
318 	ddual *hxy = new ddual[4*nx*ny], *hxv = new ddual[4*nx*ny];
319 	ddual *huy = new ddual[4*nx*ny], *huv = new ddual[4*nx*ny];
320 	ddual *hx = new ddual[2*nx], *hv = new ddual[2*ny];
321 	ddual *hy = new ddual[2*ny], *hu = new ddual[2*nx];
322 	double *dmp = new double[4*nx*ny];
323 	memset(a,0,4*nx*ny*sizeof(ddual));
324 	memset(dmp,0,4*nx*ny*sizeof(double));
325 #pragma omp parallel for collapse(2)
326 	for(long j=0;j<ny;j++)	for(long i=0;i<nx;i++)	// Initial conditions
327 	{
328 		long i0 = i+nx/2+2*nx*(j+ny/2);
329 		a[i0] = ddual(ini_re->v(i,j), ini_im->v(i,j));
330 		res->a[nz*(i+nx*j)] = a[i0];
331 	}
332 #pragma omp parallel for collapse(2)
333 	for(long j=0;j<2*ny;j++)	for(long i=0;i<2*nx;i++)	// step 1
334 	{
335 		long i0 = i+2*nx*j;
336 		if(i<nx/2)		dmp[i0] += gamma*mgl_ipow((nx/2-i)/(nx/2.),2);
337 		if(i>3*nx/2)	dmp[i0] += gamma*mgl_ipow((i-3*nx/2-1)/(nx/2.),2);
338 		if(j<ny/2)		dmp[i0] += gamma*mgl_ipow((ny/2-j)/(ny/2.),2);
339 		if(j>3*ny/2)	dmp[i0] += gamma*mgl_ipow((j-3*ny/2-1)/(ny/2.),2);
340 	}
341 	mreal dx = (Max.x-Min.x)/(nx-1), dy = ny>1?(Max.y-Min.y)/(ny-1):0;
342 	mreal dp = M_PI/(Max.x-Min.x)/k0, dq = M_PI/(Max.y-Min.y)/k0;
343 	mreal xs=(Min.x+Max.x)/2, ys=(Min.y+Max.y)/2;
344 	double dd = k0*dz;
345 
346 	mgl_pde_ham tmp;tmp.eqs = ham;
347 	tmp.nx = 2*nx;	tmp.ny = 2*ny;	tmp.dd = dd;	tmp.a=a;
348 	tmp.hxy=hxy;	tmp.hxv=hxv;	tmp.huy=huy;	tmp.huv=huv;
349 	tmp.xx = Min.x-dx*(nx/2);	tmp.xs = xs;	tmp.dx = dx;	tmp.dp = dp;
350 	tmp.yy = Min.y-dy*(ny/2);	tmp.ys = ys;	tmp.dy = dy;	tmp.dq = dq;
351 
352 	// prepare fft. NOTE: slow procedures due to unknown nx, ny.
353 	void *wtx = mgl_fft_alloc(2*nx,0,0);
354 	void *wty = mgl_fft_alloc(2*ny,0,0);
355 	for(long k=1;k<nz;k++)
356 	{
357 		if(gr->NeedStop())	break;
358 		tmp.zz = Min.z+dz*k;
359 		memset(hxy,0,4*nx*ny*sizeof(ddual));	memset(hxv,0,4*nx*ny*sizeof(ddual));
360 		memset(huv,0,4*nx*ny*sizeof(ddual));	memset(huy,0,4*nx*ny*sizeof(ddual));
361 		mgl_pde_hprep(&tmp);
362 		for(long i=0;i<2*nx;i++)	{	hx[i] = hxv[i];			hu[i] = huv[i];		}
363 		for(long j=0;j<2*ny;j++)	{	hy[j] = huy[2*nx*j];	hv[j] = huv[2*nx*j];}
364 		// rearrange arrays
365 		hh0=hu[0];
366 		if(ny>1)
367 #pragma omp parallel for collapse(2)
368 		 for(long j=0;j<2*ny;j++)	for(long i=0;i<2*nx;i++)
369 			{
370 				long i0 = i+2*nx*j;	huv[i0] -= hh0;
371 				hxv[i0] -= hx[i]+hv[j]-hh0;
372 				huy[i0] -= hu[i]+hy[j]-hh0;
373 			}
374 		else
375 #pragma omp parallel for
376 			for(long i=0;i<4*nx*ny;i++)	huv[i] -= hh0;
377 		// solve equation
378 		if(ny>1)
379 #pragma omp parallel
380 		{
381 			void *wsx = mgl_fft_alloc_thr(2*nx), *wsy = mgl_fft_alloc_thr(2*ny);
382 #pragma omp for
383 			for(long i=0;i<4*nx*ny;i++)	a[i] *= exp(hxy[i]-double(dmp[i]*dz));
384 #pragma omp for
385 			for(long i=0;i<2*ny;i++)	mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, false);
386 #pragma omp for
387 			for(long i=0;i<4*nx*ny;i++)	a[i] *= exp(huy[i]);
388 #pragma omp for
389 			for(long i=0;i<2*nx;i++)	mgl_fft((double *)(a+i), 2*nx, 2*ny, wty, wsy, false);
390 #pragma omp for
391 			for(long i=0;i<4*nx*ny;i++)	a[i] *= exp(huv[i]);
392 #pragma omp for
393 			for(long i=0;i<2*ny;i++)	mgl_fft((double *)(a+2*i*nx), 1, 2*nx, wtx, wsx, true);
394 #pragma omp for
395 			for(long i=0;i<4*nx*ny;i++)	a[i] *= exp(hxv[i]);
396 #pragma omp for
397 			for(long i=0;i<2*nx;i++)	mgl_fft((double *)(a+i), 2*nx, 2*ny, wty, wsy, true);
398 			mgl_fft_free_thr(wsx);	mgl_fft_free_thr(wsy);
399 		}
400 		else
401 #pragma omp parallel
402 		{
403 			void *wsx = mgl_fft_alloc_thr(2*nx);
404 #pragma omp for
405 			for(long i=0;i<4*nx*ny;i++)	a[i] *= exp(hxy[i]-double(dmp[i]*dz));
406 #pragma omp for
407 			for(long i=0;i<2*ny;i++)	mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, false);
408 #pragma omp for
409 			for(long i=0;i<4*nx*ny;i++)	a[i] *= exp(huv[i]);
410 #pragma omp for
411 			for(long i=0;i<2*ny;i++)	mgl_fft((double *)(a+2*i*nx), 1, 2*nx, wtx, wsx, true);
412 			mgl_fft_free_thr(wsx);
413 		}
414 #pragma omp parallel for collapse(2)
415 		for(long j=0;j<ny;j++)	for(long i=0;i<nx;i++)	// save result
416 			res->a[k+nz*(i+nx*j)] = a[i+nx/2+2*nx*(j+ny/2)];
417 	}
418 	mgl_fft_free(wtx,0,0);	mgl_fft_free(wty,0,0);
419 	delete []a;		delete []dmp;
420 	delete []hxy;	delete []hxv;	delete []huy;	delete []huv;
421 	delete []hx;	delete []hy;	delete []hu;	delete []hv;
422 	gr->LoadState();
423 	return res;
424 }
425 //-----------------------------------------------------------------------------
mgl_pde_solve(HMGL gr,const char * ham,HCDT ini_re,HCDT ini_im,mreal dz,mreal k0,const char * opt)426 HMDT MGL_EXPORT mgl_pde_solve(HMGL gr, const char *ham, HCDT ini_re, HCDT ini_im, mreal dz, mreal k0, const char *opt)
427 {
428 	HADT res = mgl_pde_solve_c(gr,ham,ini_re,ini_im,dz,k0,opt);
429 	HMDT out = mgl_datac_abs(res);	delete res;	return out;
430 }
431 //-----------------------------------------------------------------------------
mgl_pde_solve_c_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)432 uintptr_t MGL_EXPORT mgl_pde_solve_c_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
433 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
434 	char *o=new char[lo+1];	memcpy(o,opt,lo);	o[lo]=0;
435 	uintptr_t res = uintptr_t(mgl_pde_solve_c(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
436 	delete []o;	delete []s;	return res;	}
437 //-----------------------------------------------------------------------------
mgl_pde_solve_(uintptr_t * gr,const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,mreal * dz,mreal * k0,const char * opt,int l,int lo)438 uintptr_t MGL_EXPORT mgl_pde_solve_(uintptr_t* gr, const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, mreal *dz, mreal *k0, const char *opt, int l, int lo)
439 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
440 	char *o=new char[lo+1];	memcpy(o,opt,lo);	o[lo]=0;
441 	uintptr_t res = uintptr_t(mgl_pde_solve(_GR_, s, _DA_(ini_re), _DA_(ini_im), *dz, *k0, o));
442 	delete []o;	delete []s;	return res;	}
443 //-----------------------------------------------------------------------------
444 //
445 //		ODE series
446 //
447 //-----------------------------------------------------------------------------
mgl_set_func(const mreal * x,mreal * dx,void * par)448 void MGL_NO_EXPORT mgl_set_func(const mreal *x, mreal *dx, void *par)
449 {
450 	mglEqTxT *p=(mglEqTxT *)par;
451 	long n = p->n/p->m;
452 	for(long i=0;i<p->m;i++)
453 	{
454 		HMDT d = static_cast<HMDT>(p->head[i]);
455 		memcpy(d->a, x+i*n, n*sizeof(mreal));
456 	}
457 	p->t->a[0] = x[p->n];
458 //#pragma omp parallel for collapse(2)
459 	for(long j=0;j<p->m;j++)
460 	{
461 		HMDT d = mglFormulaCalc(p->str[j].c_str(),p->head);
462 		mreal val = d->a[0];
463 		if(d->nx<n)
464 		{
465 			d->Create(n);
466 			for(long i=0;i<n;i++)	d->a[i] = val;
467 		}
468 		else switch(p->brd)
469 		{
470 			default:	// zero instead of NAN
471 			case 0:
472 			case 'z':
473 			case '0':
474 				for(long i=0;i<n;i++)	if(mgl_isbad(d->a[i]))	d->a[i] = 0;
475 				break;
476 			case 1:		// constant at border
477 			case 'c':
478 			case '1':
479 				d->a[0] = d->a[1];	d->a[n-1] = d->a[n-2];	break;
480 			case 2:		// linear at border
481 			case 'l':
482 			case '2':
483 				d->a[0] = mreal(2)*d->a[1]-d->a[2];
484 				d->a[n-1] = mreal(2)*d->a[n-2]-d->a[n-3];	break;
485 			case 3:		// square at border
486 			case 's':
487 			case '3':
488 				d->a[0] = d->a[3]+mreal(3)*(d->a[1]-d->a[2]);
489 				d->a[n-1] = d->a[n-4]+mreal(3)*(d->a[n-2]-d->a[n-3]);	break;
490 		}
491 		memcpy(dx+j*n, d->a, n*sizeof(mreal));
492 		delete d;
493 	}
494 }
mgl_ode_solve_set(const char * func,const char * var,char brd,HCDT x0,mreal dt,mreal tmax)495 HMDT MGL_EXPORT mgl_ode_solve_set(const char *func, const char *var, char brd, HCDT x0, mreal dt, mreal tmax)
496 {
497 	if(!var || !(*var) || !func || !x0)	return 0;
498 	mglEqTxT par;
499 	par.var=var;	par.brd=brd;	par.FillStr(func);
500 	const long n = par.n = x0->GetNx();
501 	const long m = par.m = long(strlen(var)), nn = n/m;	// number of variables
502 
503 	HMDT dat = new mglData[m+3];
504 	for(long i=0;i<m;i++)
505 	{
506 		HMDT d = dat+i;	d->Create(nn);
507 		d->s = var[i];	par.head.push_back(d);
508 	}
509 	HMDT d = dat+m+1;	d->Create(nn);
510 	for(long i=0;i<nn;i++)	d->a[i] = i;
511 	d->s = 'j';	par.head.push_back(d);
512 	(dat+m+2)->Create(nn);	(dat+m+2)->s = "#$mgl";
513 	par.head.push_back(dat+m+2);
514 	par.t = dat+m;	par.t->s = 't';	par.head.push_back(par.t);
515 
516 	mreal *xx = new mreal[n];
517 	for(long i=0;i<n;i++)	xx[i] = x0->vthr(i);
518 	HMDT res = mgl_ode_solve_ex(mgl_set_func,n,xx,dt,tmax,&par,NULL);
519 	delete []xx;	delete []dat;	return res;
520 }
521 //-----------------------------------------------------------------------------
mgl_set_funcC(const mreal * x,mreal * dx,void * par)522 void MGL_NO_EXPORT mgl_set_funcC(const mreal *x, mreal *dx, void *par)
523 {
524 	mglEqTxT *p=(mglEqTxT *)par;
525 	long n = p->n/p->m;
526 	for(long i=0;i<p->m;i++)
527 	{
528 		HADT d = static_cast<HADT>(p->head[i]);
529 		memcpy(d->a, x+2*i*n, 2*n*sizeof(mreal));
530 	}
531 	p->t->a[0] = x[2*p->n];
532 //#pragma omp parallel for collapse(2)
533 	for(long j=0;j<p->m;j++)
534 	{
535 		HADT d = mglFormulaCalcC(p->str[j].c_str(),p->head);
536 		dual val = d->a[0];
537 		if(d->nx<n)
538 		{
539 			d->Create(n);
540 			for(long i=0;i<n;i++)	d->a[i] = val;
541 		}
542 		else switch(p->brd)
543 		{
544 			default:
545 			case 0:		// zero instead of NAN
546 			case 'z':
547 			case '0':
548 				for(long i=0;i<n;i++)	if(mgl_isbad(d->a[i]))	d->a[i] = 0;
549 				break;
550 			case 1:		// constant at border
551 			case 'c':
552 			case '1':
553 				d->a[0] = d->a[1];	d->a[n-1] = d->a[n-2];	break;
554 			case 2:		// linear at border
555 			case 'l':
556 			case '2':
557 				d->a[0] = mreal(2)*d->a[1]-d->a[2];
558 				d->a[n-1] = mreal(2)*d->a[n-2]-d->a[n-3];	break;
559 			case 3:		// square at border
560 			case 's':
561 			case '3':
562 				d->a[0] = d->a[3]+mreal(3)*(d->a[1]-d->a[2]);
563 				d->a[n-1] = d->a[n-4]+mreal(3)*(d->a[n-2]-d->a[n-3]);	break;
564 			case -1:		// exponent at border
565 			case 4:
566 			case 'e':
567 			case '4':
568 				d->a[0] = norm(d->a[2])<norm(d->a[1]) ? d->a[1] : d->a[1]*d->a[1]/d->a[2];
569 				d->a[n-1] = norm(d->a[n-3])<norm(d->a[n-2]) ? d->a[n-2] : d->a[n-2]*d->a[n-2]/d->a[n-3];
570 				break;
571 			case -2:		// gaussian at border
572 			case 5:
573 			case 'g':
574 			case '5':
575 				d->a[0] = norm(d->a[2])<norm(d->a[1]) ? d->a[3] : pow(d->a[1]/d->a[2],3)*d->a[3];
576 				d->a[n-1] = norm(d->a[n-3])<norm(d->a[n-2]) ? d->a[n-4] : pow(d->a[n-2]/d->a[n-3],3)*d->a[n-4];
577 				break;
578 		}
579 		memcpy(dx+2*j*n, d->a, 2*n*sizeof(mreal));
580 		delete d;
581 	}
582 }
mgl_ode_solve_set_c(const char * func,const char * var,char brd,HCDT x0,mreal dt,mreal tmax)583 HADT MGL_EXPORT mgl_ode_solve_set_c(const char *func, const char *var, char brd, HCDT x0, mreal dt, mreal tmax)
584 {
585 	if(!var || !(*var) || !func || !x0)	return 0;
586 	mglEqTxT par;
587 	par.var=var;	par.brd=brd;	par.FillStr(func);
588 	const long n = par.n = x0->GetNx();
589 	const long m = par.m = long(strlen(var)), nn = n/m;	// number of variables
590 
591 	HADT dat = new mglDataC[m];
592 	for(long i=0;i<m;i++)
593 	{
594 		HADT d = dat+i;	d->Create(nn);
595 		d->s = var[i];	par.head.push_back(d);
596 	}
597 	HMDT d = new mglData[3];
598 	d->Create(nn);	d->s = 'j';
599 	for(long i=0;i<nn;i++)	d->a[i] = i;
600 	par.head.push_back(d);
601 	(d+1)->Create(nn);	(d+1)->s = "#$mgl";
602 	par.head.push_back(d+1);
603 	par.t = d+2;	par.t->s = 't';
604 	par.head.push_back(par.t);
605 
606 	mreal *xx = new mreal[2*n];
607 	const mglDataC *c = dynamic_cast<const mglDataC *>(x0);
608 	if(c)		for(long i=0;i<n;i++)
609 	{	xx[2*i]=real(c->a[i]);	xx[2*i+1]=imag(c->a[i]);	}
610 	else if(x0)	for(long i=0;i<n;i++)
611 	{	xx[2*i] = x0->vthr(i);	xx[2*i+1]=0;	}
612 	else		for(long i=0;i<n;i++)	xx[2*i] = xx[2*i+1]=0;
613 
614 	HMDT res = mgl_ode_solve_ex(mgl_set_funcC,2*n,xx,dt,tmax,&par,NULL);
615 	delete []xx;	delete []d;	delete []dat;
616 	const long nt=res->ny;
617 	mglDataC *out = new mglDataC(n, nt);
618 #pragma omp parallel for
619 	for(long i=0;i<nt*n;i++)	out->a[i] = dual(res->a[2*i],res->a[2*i+1]);
620 	delete res;	return out;
621 }
622 //-----------------------------------------------------------------------------
mgl_txt_func(const mreal * x,mreal * dx,void * par)623 void MGL_NO_EXPORT mgl_txt_func(const mreal *x, mreal *dx, void *par)
624 {
625 	mglEqTxT *p=(mglEqTxT *)par;
626 	mreal vars[MGL_VS];
627 	const long n = p->n;
628 	for(long i=0;i<n;i++)
629 	{	char ch = p->var[i];	if(ch>='a' && ch<='z')	vars[ch-'a']=x[i];	}
630 //#pragma omp parallel for
631 	for(long i=0;i<n;i++)
632 		dx[i] = mgl_expr_eval_v(p->eqR[i], vars);
633 }
mgl_ode_solve_str(const char * func,const char * var,HCDT x0,mreal dt,mreal tmax)634 HMDT MGL_EXPORT mgl_ode_solve_str(const char *func, const char *var, HCDT x0, mreal dt, mreal tmax)
635 {
636 	if(!var || !(*var) || !func)	return 0;
637 	mglEqTxT par;
638 	par.var=var;	par.FillReal(func);
639 	long n = par.n = long(par.str.size());
640 	mreal *xx = new mreal[n];
641 	for(long i=0;i<n;i++)	xx[i] = x0?x0->vthr(i):0;
642 	HMDT res = mgl_ode_solve_ex(mgl_txt_func,n,xx,dt,tmax,&par,NULL);
643 	delete []xx;	return res;
644 }
645 //-----------------------------------------------------------------------------
mgl_txt_funcC(const mreal * x,mreal * dx,void * par)646 void MGL_NO_EXPORT mgl_txt_funcC(const mreal *x, mreal *dx, void *par)
647 {
648 	mglEqTxT *p=(mglEqTxT *)par;
649 	mdual vars[MGL_VS];
650 	const long n = p->n;
651 	for(long i=0;i<n;i++)
652 	{	char ch = p->var[i];	if(ch>='a' && ch<='z')	vars[ch-'a']=dual(x[2*i],x[2*i+1]);	}
653 //#pragma omp parallel for
654 	for(long i=0;i<n;i++)
655 	{
656 		dual r = mgl_cexpr_eval_v(p->eqC[i], vars);
657 		dx[2*i] = real(r);	dx[2*i+1] = imag(r);
658 	}
659 }
mgl_ode_solve_str_c(const char * func,const char * var,HCDT x0,mreal dt,mreal tmax)660 HADT MGL_EXPORT mgl_ode_solve_str_c(const char *func, const char *var, HCDT x0, mreal dt, mreal tmax)
661 {
662 	if(!var || !(*var) || !func)	return 0;
663 	mglEqTxT par;	par.var=var;
664 	par.var=var;	par.FillCmplx(func);
665 	long n = par.n = long(par.str.size());
666 	mreal *xx = new mreal[2*n];
667 	const mglDataC *c = dynamic_cast<const mglDataC *>(x0);
668 	for(long i=0;i<n;i++)
669 	{
670 		if(c)	{	xx[2*i]=real(c->a[i]);	xx[2*i+1]=imag(c->a[i]);	}
671 		else	{	xx[2*i] = x0?x0->vthr(i):0;	xx[2*i+1]=0;	}
672 	}
673 	HMDT res = mgl_ode_solve_ex(mgl_txt_funcC,2*n,xx,dt,tmax,&par,NULL);
674 	delete []xx;
675 	const long nt=res->ny;
676 	mglDataC *out = new mglDataC(n, nt);
677 #pragma omp parallel for
678 	for(long i=0;i<nt*n;i++)	out->a[i] = dual(res->a[2*i],res->a[2*i+1]);
679 	delete res;	return out;
680 }
681 //-----------------------------------------------------------------------------
mgl_ode_solve(void (* func)(const mreal * x,mreal * dx,void * par),int n,const mreal * x0,mreal dt,mreal tmax,void * par)682 HMDT MGL_EXPORT mgl_ode_solve(void (*func)(const mreal *x, mreal *dx, void *par), int n, const mreal *x0, mreal dt, mreal tmax, void *par)
683 {	return mgl_ode_solve_ex(func,n,x0,dt,tmax,par,0);	}
mgl_ode_solve_ex(void (* func)(const mreal * x,mreal * dx,void * par),int n,const mreal * x0,mreal dt,mreal tmax,void * par,void (* bord)(mreal * x,const mreal * xp,void * par))684 HMDT MGL_EXPORT mgl_ode_solve_ex(void (*func)(const mreal *x, mreal *dx, void *par), int n, const mreal *x0, mreal dt, mreal tmax, void *par, void (*bord)(mreal *x, const mreal *xp, void *par))
685 {
686 	bool scale=false;
687 	if(dt*tmax<0)	{	scale = true;	dt = fabs(dt);	tmax = fabs(tmax);	}
688 	if(tmax<dt)	return 0;	// nothing to do
689 	const long nt = int(tmax/dt+0.5)+1;
690 	mglData *res=new mglData(n,nt);
691 	mreal *x=new mreal[n+1], *k1=new mreal[n], *k2=new mreal[n], *k3=new mreal[n], *v=new mreal[n+1], hh=dt/2;
692 	// initial conditions
693 	for(long i=0;i<n;i++)	x[i] = res->a[i] = x0[i];
694 	// Runge Kutta scheme of 4th order
695 	bool good=true;
696 	long k;
697 	x[n] = 0;
698 	for(k=1;k<nt && good;k++)
699 	{
700 		double m0=0,m1=0,m2=0,m3=0, t = x[n];
701 		func(x,k1,par);
702 		if(scale)
703 		{
704 			for(long i=0;i<n;i++)
705 			{	double kk = fabs(k1[i]);	if(kk>m0)	m0 = kk;	}
706 			for(long i=0;i<n;i++)	k1[i] /= m0;
707 		}
708 		else m0 = 1;
709 		for(long i=0;i<n;i++)	v[i] = x[i]+k1[i]*hh;
710 		v[n] = t+hh/m0;
711 		func(v,k2,par);
712 		if(scale)
713 		{
714 			for(long i=0;i<n;i++)
715 			{	double kk = fabs(k2[i]);	if(kk>m1)	m1 = kk;	}
716 			for(long i=0;i<n;i++)	k2[i] /= m1;
717 		}
718 		else m1 = 1;
719 		for(long i=0;i<n;i++)	v[i] = x[i]+k2[i]*hh;
720 		v[n] = t+hh/m1;
721 		func(v,k3,par);
722 		if(scale)
723 		{
724 			for(long i=0;i<n;i++)
725 			{	double kk = fabs(k3[i]);	if(kk>m2)	m2 = kk;	}
726 			for(long i=0;i<n;i++)	k3[i] /= m2;
727 		}
728 		else m2 = 1;
729 		for(long i=0;i<n;i++)	{	v[i] = x[i]+k3[i]*dt;	k3[i] += k2[i];	}
730 		v[n] = t+dt/m2;
731 		func(v,k2,par);
732 		if(scale)
733 		{
734 			for(long i=0;i<n;i++)
735 			{	double kk = fabs(k2[i]);	if(kk>m3)	m3 = kk;	}
736 			for(long i=0;i<n;i++)	k2[i] /= m3;
737 		}
738 		else m3 = 1;
739 		x[n] = t + (1/m0+2/m1+2/m2+1/m3)*dt/6;
740 		for(long i=0;i<n;i++)	x[i] += (k1[i]+k2[i]+2*k3[i])*dt/6;
741 		if(bord)	bord(x,res->a+n*(k-1),par);
742 		for(long i=0;i<n;i++)
743 		{	res->a[i+n*k] = x[i];	if(mgl_isbad(x[i]))	good=false;	}
744 	}
745 	delete []x;	delete []k1;	delete []k2;	delete []k3;	delete []v;
746 	res->Crop(0,k,'y');
747 	return res;
748 }
749 //-----------------------------------------------------------------------------
750 //
751 //		Common functions for quasioptical calculations
752 //
753 //-----------------------------------------------------------------------------
mgl_ray3d(const mreal * in,mreal * out,void * par)754 void static mgl_ray3d(const mreal *in, mreal *out, void *par)
755 {
756 	mglFormula *eqs = (mglFormula *)par;
757 	const char *v="xyzpqvt";
758 	mreal var[MGL_VS];	memset(var,0,MGL_VS*sizeof(mreal));
759 	for(int i=0;i<7;i++)	var[v[i]-'a'] = in[i];
760 	out[0] = eqs->CalcD(var,'p');	out[3] = -eqs->CalcD(var,'x');
761 	out[1] = eqs->CalcD(var,'q');	out[4] = -eqs->CalcD(var,'y');
762 	out[2] = eqs->CalcD(var,'v');	out[5] = -eqs->CalcD(var,'z');
763 	out[7] = eqs->CalcD(var,'i');	out[6] = 1;
764 }
765 // Solve GO ray equation like dr/dt = d ham/dp, dp/dt = -d ham/dr where ham = ham(x,y,z,p,q,v,t) and px=p, py=q, pz=v. The starting point (at t=0) is r0, p0. Result is array of {x,y,z,p,q,v,t}
mgl_ray_trace(const char * ham,mreal x0,mreal y0,mreal z0,mreal px,mreal py,mreal pz,mreal dt,mreal tmax)766 HMDT MGL_EXPORT mgl_ray_trace(const char *ham, mreal x0, mreal y0, mreal z0, mreal px, mreal py, mreal pz, mreal dt, mreal tmax)
767 {
768 	mglFormula eqs(ham);
769 	mreal in[8]={x0,y0,z0,px,py,pz,0,0};
770 	HMDT res = mgl_ode_solve(mgl_ray3d,8,in,dt,tmax,&eqs);
771 	mgl_data_set_id(res,"xyzpqvti");
772 	return res;
773 }
774 //-----------------------------------------------------------------------------
mgl_ray_trace_(const char * ham,mreal * x0,mreal * y0,mreal * z0,mreal * px,mreal * py,mreal * pz,mreal * dt,mreal * tmax,int l)775 uintptr_t MGL_EXPORT mgl_ray_trace_(const char *ham, mreal *x0, mreal *y0, mreal *z0, mreal *px, mreal *py, mreal *pz, mreal *dt, mreal *tmax,int l)
776 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
777 	uintptr_t res = uintptr_t(mgl_ray_trace(s, *x0,*y0,*z0, *px,*py,*pz, *dt,*tmax));
778 	delete []s;	return res;	}
779 //-----------------------------------------------------------------------------
780 struct mgl_ap
781 {
782 	double x0,y0,z0,x1,y1,z1,x2,y2,z2;	// vectors {l, g1, g2}
783 	double t1,t2,ch,q1,q2,pt,dt,d1,d2;	// theta_{1,2}, chi, q_{1,2}, p_t, dtau, dq_{1,2}
mgl_apmgl_ap784 	mgl_ap()	{	memset(this,0,sizeof(mgl_ap));	}
785 };
786 //-----------------------------------------------------------------------------
mgl_init_ra(long n,int n7,const mreal * r,mgl_ap * ra)787 void static mgl_init_ra(long n, int n7, const mreal *r, mgl_ap *ra)	// prepare some intermediate data for QO (3d case)
788 {
789 	double tt = hypot(r[n7]-r[0], r[n7+1]-r[1]);
790 	if(tt)
791 	{
792 		ra[0].x1 = (r[n7+1]-r[1])/tt;
793 		ra[0].y1 = (r[0]-r[n7])/tt;
794 		ra[0].z1 = 0;
795 	}
796 	else	{	ra[0].x1 = ra[0].y1 = 0;	ra[0].z1 = 1;	}
797 	ra[0].x0 = r[n7] - r[0];	ra[0].y0 = r[n7+1] - r[1];	ra[0].z0 = r[n7+2] - r[2];
798 	tt = sqrt(ra[0].x0*ra[0].x0 + ra[0].y0*ra[0].y0 + ra[0].z0*ra[0].z0);
799 	ra[0].x0 /= tt;	ra[0].y0 /= tt;	ra[0].z0 /= tt;
800 	ra[0].x2 = ra[0].y1*ra[0].z0 - ra[0].y0*ra[0].z1;	// vector g_2
801 	ra[0].y2 = ra[0].z1*ra[0].x0 - ra[0].z0*ra[0].x1;
802 	ra[0].z2 = ra[0].x1*ra[0].y0 - ra[0].x0*ra[0].y1;
803 	for(long i=1;i<n;i++)	// NOTE: no parallel due to dependence on prev point!
804 	{
805 		mgl_ap *ri=ra+i, *rp=ra+i-1;
806 		const mreal *rr = r+n7*i;
807 		ri->dt = rr[6] - rr[6-n7];
808 		ri->x0 = rr[0] - rr[-n7];	// NOTE: very rough formulas
809 		ri->y0 = rr[1] - rr[1-n7];	// for corresponding with dt one
810 		ri->z0 = rr[2] - rr[2-n7];	// for corresponding with dt one
811 		double ch = sqrt(ri->x0*ri->x0 + ri->y0*ri->y0 + ri->z0*ri->z0);
812 		ri->x0 /= ch;	ri->y0 /= ch;	ri->z0 /= ch;
813 		ri->ch = ch/ri->dt;
814 		ri->pt = rr[3]*ri->x0 + rr[4]*ri->y0 + rr[5]*ri->z0;
815 		ri->q1 = rr[3]*ri->x1 + rr[4]*ri->y1 + rr[5]*ri->z1;
816 		ri->q2 = rr[3]*ri->x2 + rr[4]*ri->y2 + rr[5]*ri->z2;
817 		// NOTE previous point is used here!
818 		tt = ri->x0*rp->x1 + ri->y0*rp->y1 + ri->z0*rp->z1;
819 		ri->x1 = rp->x1 - tt*ri->x0;	// vector g_1
820 		ri->y1 = rp->y1 - tt*ri->y0;
821 		ri->z1 = rp->z1 - tt*ri->z0;
822 		ri->t1 = tt/ch;
823 		tt = sqrt(ri->x1*ri->x1 + ri->y1*ri->y1 + ri->z1*ri->z1);
824 		ri->x1 /= tt;	ri->y1 /= tt;	ri->z1 /= tt;	// norm for reducing numeric error
825 		ri->x2 = ri->y1*ri->z0 - ri->y0*ri->z1;	// vector g_2
826 		ri->y2 = ri->z1*ri->x0 - ri->z0*ri->x1;
827 		ri->z2 = ri->x1*ri->y0 - ri->x0*ri->y1;
828 		tt = ri->x0*rp->x2 + ri->y0*rp->y2 + ri->z0*rp->z2;
829 		ri->t2 = tt/ch;
830 		ri->d1 = (ri->q1-rp->q1)/ch;
831 		ri->d2 = (ri->q2-rp->q2)/ch;
832 	}
833 	memcpy(ra,ra+1,sizeof(mgl_ap));	// setup zero point
834 	ra[0].pt = r[3]*ra[0].x0 + r[4]*ra[0].y0 + r[5]*ra[0].z0;
835 	ra[0].q1 = r[3]*ra[0].x1 + r[4]*ra[0].y1 + r[5]*ra[0].z1;
836 	ra[0].q2 = r[3]*ra[0].x2 + r[4]*ra[0].y2 + r[5]*ra[0].z2;
837 }
838 //-----------------------------------------------------------------------------
839 //
840 //		QO2d series
841 //
842 //-----------------------------------------------------------------------------
843 struct mgl_qo2d_ham
844 {
845 	dual *hx, *hu, *a, h0;
846 	double *dmp, dr, dk;
847 	mreal *r;
848 	mgl_ap *ra;
849 	mdual (*ham)(mreal u, mreal x, mreal y, mreal px, mreal py, void *par);
850 	void *par;
851 };
852 //-----------------------------------------------------------------------------
mgl_qo2d_hprep(void * par)853 static void *mgl_qo2d_hprep(void *par)
854 {
855 	mglThreadD *t=(mglThreadD *)par;
856 	mgl_qo2d_ham *f = (mgl_qo2d_ham *)t->v;
857 	mgl_ap *ra = f->ra;
858 
859 	const mreal *r = f->r;
860 	const long nx=t->n;
861 #if !MGL_HAVE_PTHREAD
862 #pragma omp parallel for
863 #endif
864 	for(long i=t->id;i<nx;i+=mglNumThr)
865 	{
866 		// x terms
867 		mreal x1 = (2*i-nx+1)*f->dr, hh = 1 - ra->t1*x1;
868 		hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
869 		mreal tt = (ra->pt + ra->d1*x1)/hh - ra->pt;
870 		dual tmp = f->ham(abs(f->a[i]), r[0]+ra->x1*x1, r[1]+ra->y1*x1, r[3]+ra->x0*tt, r[4]+ra->y0*tt, f->par);
871 		f->hx[i] = tmp - f->h0/mreal(2);
872 		// u-y terms
873 		x1 = f->dk/2*(i<nx/2 ? i:i-nx);
874 		tmp = f->ham(0, r[0], r[1], r[3]+ra->x1*x1, r[4]+ra->y1*x1, f->par);
875 		f->hu[i] = tmp - f->h0/mreal(2);
876 
877 		if(imag(f->hx[i])>0)	f->hx[i] = f->hx[i].real();
878 		if(imag(f->hu[i])>0)	f->hu[i] = f->hu[i].real();
879 		// add boundary conditions for x-direction
880 		f->hx[i] -= dual(0,f->dmp[i]);
881 	}
882 	return 0;
883 }
884 //-----------------------------------------------------------------------------
mgl_qo2d_func_c(mdual (* ham)(mreal u,mreal x,mreal y,mreal px,mreal py,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)885 HADT MGL_EXPORT mgl_qo2d_func_c(mdual (*ham)(mreal u, mreal x, mreal y, mreal px, mreal py, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
886 {
887 	const mglData *ray=dynamic_cast<const mglData *>(ray_dat);	// NOTE: Ray must be mglData!
888 	if(!ray)	return 0;
889 	const long nx=ini_re->GetNx(), nt=ray->ny, n7=ray->nx;
890 	if(nx<2 || ini_im->GetNx()!=nx || nt<2)	return 0;
891 	mglDataC *res=new mglDataC(nx,nt,1);
892 
893 	dual *a=new dual[2*nx], *hu=new dual[2*nx],  *hx=new dual[2*nx];
894 	double *dmp=new double[2*nx];
895 	mgl_ap *ra = new mgl_ap[nt];	mgl_init_ra(nt, n7, ray->a, ra);	// ray
896 
897 	mreal dr = r/(nx-1), dk = M_PI*(nx-1)/(k0*r*nx);
898 	memset(dmp,0,2*nx*sizeof(double));
899 	for(long i=0;i<nx/2;i++)	// prepare damping
900 	{
901 		mreal x1 = (nx/2-i)/(nx/2.);
902 		dmp[2*nx-1-i] = dmp[i] = 30*GAMMA*x1*x1/k0;
903 	}
904 	for(long i=0;i<nx;i++)	a[i+nx/2] = dual(ini_re->v(i),ini_im->v(i));	// init
905 	void *wsx, *wtx = mgl_fft_alloc(2*nx,&wsx,1);
906 	if(xx && yy)	{	xx->Create(nx,nt);	yy->Create(nx,nt);	}
907 
908 	mgl_qo2d_ham tmp;	// parameters for Hamiltonian calculation
909 	tmp.hx=hx;	tmp.hu=hu;	tmp.dmp=dmp;	tmp.par=par;
910 	tmp.dr=dr;	tmp.dk=dk;	tmp.ham=ham;	tmp.a=a;
911 	// start calculation
912 	for(long k=0;k<nt;k++)
913 	{
914 		for(long i=0;i<nx;i++)	// "save"
915 			res->a[i+k*nx]=a[i+nx/2]*mreal(sqrt(ra[0].ch/ra[k].ch));
916 		if(xx && yy)	for(long i=0;i<nx;i++)	// prepare xx, yy
917 		{
918 			mreal x1 = (2*i-nx+1)*dr;
919 			xx->a[i+k*nx] = ray->a[n7*k] + ra[k].x1*x1;	// new coordinates
920 			yy->a[i+k*nx] = ray->a[n7*k+1] + ra[k].y1*x1;
921 		}
922 		tmp.r=ray->a+n7*k;	tmp.ra=ra+k;
923 		mreal hh = ra[k].pt*(1/sqrt(sqrt(1.041))-1);	// 0.041=0.45^4 -- minimal value of h
924 		tmp.h0 = ham(0, tmp.r[0], tmp.r[1], tmp.r[3]+ra[k].x0*hh, tmp.r[4]+ra[k].x0*hh, par);
925 		mglStartThread(mgl_qo2d_hprep,0,2*nx,0,0,0,0,&tmp);
926 		// Step for field
927 		dual dt = dual(0, -ra[k].dt*k0);
928 		for(long i=0;i<2*nx;i++)	a[i] *= exp(hx[i]*dt);
929 		mgl_fft((double *)a, 1, 2*nx, wtx, wsx, false);
930 		for(long i=0;i<2*nx;i++)	a[i] *= exp(hu[i]*dt);
931 		mgl_fft((double *)a, 1, 2*nx, wtx, wsx, true);
932 
933 /*		// Calculate B1			// TODO make more general scheme later!!!
934 		hh = ra[k].pt*(1/sqrt(sqrt(1.041))-1);
935 		var['x'-'a'] = ray->a[n7*k];	// new coordiantes
936 		var['y'-'a'] = ray->a[n7*k+1];
937 		var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh;	// new momentums
938 		var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
939 		tt = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
940 		var['x'-'a'] = ray->a[n7*k] + ra[k].x1*dr;	// new coordiantes
941 		var['y'-'a'] = ray->a[n7*k+1] + ra[k].y1*dr;
942 		hh = 1 - ra[k].t1*dr;	hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
943 		hh = (ra[k].ch*ra[k].pt + ra[k].d1*dr)/(hh*ra[k].ch) - ra[k].pt;
944 		var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh;	// new momentums
945 		var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
946 		B1 = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
947 		B1 = (B1-tt)/dr;
948 		double a1=0, a2=0;
949 		for(i=0;i<2*nx;i++)	a1 += norm(a[i]);
950 		hx[0] = hx[2*nx-1] = 0.;
951 		for(i=1;i<2*nx-1;i++)	hx[i] = (B1*ra[k].dt*(i-nx))*(a[i+1]-a[i-1]);
952 		for(i=0;i<2*nx;i++)	{	a[i] += hx[i];	a2 += norm(a[i]);	}
953 		a1 = sqrt(a1/a2);
954 		for(i=0;i<2*nx;i++)	a[i] *= a1;*/
955 	}
956 	mgl_fft_free(wtx,&wsx,1);
957 	delete []a;		delete []hu;	delete []hx;	delete []ra;	delete []dmp;
958 	return res;
959 }
960 //-----------------------------------------------------------------------------
mgl_qo2d_func(mdual (* ham)(mreal u,mreal x,mreal y,mreal px,mreal py,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)961 HMDT MGL_EXPORT mgl_qo2d_func(mdual (*ham)(mreal u, mreal x, mreal y, mreal px, mreal py, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
962 {
963 	HADT res = mgl_qo2d_func_c(ham,par,ini_re,ini_im,ray_dat,r,k0,xx,yy);
964 	HMDT out = mgl_datac_abs(res);	delete res;	return out;
965 }
966 //-----------------------------------------------------------------------------
mgl_ham2d(mreal u,mreal x,mreal y,mreal px,mreal py,void * par)967 mdual static mgl_ham2d(mreal u, mreal x, mreal y, mreal px, mreal py, void *par)
968 {
969 	mglFormula *h = (mglFormula *)par;
970 	mreal var[MGL_VS];	memset(var,0,MGL_VS*sizeof(mreal));
971 	var['x'-'a'] = x;	var['y'-'a'] = y;	var['u'-'a'] = u;
972 	var['p'-'a'] = px;	var['q'-'a'] = py;
973 	return mdual(h->Calc(var), -h->CalcD(var,'i'));
974 }
975 //-----------------------------------------------------------------------------
mgl_qo2d_solve_c(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)976 HADT MGL_EXPORT mgl_qo2d_solve_c(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
977 {
978 	mglFormula h(ham);
979 	return mgl_qo2d_func_c(mgl_ham2d, &h, ini_re, ini_im, ray_dat, r, k0, xx, yy);
980 }
981 //-----------------------------------------------------------------------------
mgl_qo2d_solve(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy)982 HMDT MGL_EXPORT mgl_qo2d_solve(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy)
983 {
984 	HADT res = mgl_qo2d_solve_c(ham,ini_re,ini_im,ray_dat,r,k0,xx,yy);
985 	HMDT out = mgl_datac_abs(res);	delete res;	return out;
986 }
987 //-----------------------------------------------------------------------------
mgl_qo2d_solve_(const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,uintptr_t * ray,mreal * r,mreal * k0,uintptr_t * xx,uintptr_t * yy,int l)988 uintptr_t MGL_EXPORT mgl_qo2d_solve_(const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, uintptr_t* ray, mreal *r, mreal *k0, uintptr_t* xx, uintptr_t* yy, int l)
989 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
990 	uintptr_t res = uintptr_t(mgl_qo2d_solve(s, _DA_(ini_re), _DA_(ini_im), _DA_(ray), *r, *k0, _DM_(xx), _DM_(yy)));
991 	delete []s;	return res;	}
992 //-----------------------------------------------------------------------------
993 //
994 //		QO3d series
995 //
996 //-----------------------------------------------------------------------------
997 struct mgl_qo3d_ham
998 {
999 	dual *hxy, *huv, *hxv, *huy, *a;
1000 	dual *hx, *hy, *hu, *hv, h0;
1001 	double *dmp, dr, dk;
1002 	mreal *r;
1003 	mgl_ap *ra;
1004 	mdual (*ham)(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par);
1005 	void *par;
1006 };
1007 //-----------------------------------------------------------------------------
mgl_qo3d_hprep(void * par)1008 static void *mgl_qo3d_hprep(void *par)
1009 {
1010 	mglThreadD *t=(mglThreadD *)par;
1011 	mgl_qo3d_ham *f = (mgl_qo3d_ham *)t->v;
1012 	mgl_ap *ra = f->ra;
1013 	const mreal *r = f->r;
1014 	const long nx=t->n;
1015 #if !MGL_HAVE_PTHREAD
1016 #pragma omp parallel for
1017 #endif
1018 	for(long ii=t->id;ii<nx*nx;ii+=mglNumThr)
1019 	{
1020 		long i = ii%nx, j = ii/nx;
1021 		// x-y terms
1022 		mreal x1 = (2*i-nx+1)*f->dr, x2 = (2*j-nx+1)*f->dr, hh = 1-ra->t1*x1-ra->t2*x2;
1023 		hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1024 		mreal tt = (ra->pt + ra->d1*x1 + ra->d2*x2)/hh - ra->pt;
1025 		f->hxy[ii] = f->ham(abs(f->a[i]), r[0]+ra->x1*x1+ra->x2*x2, r[1]+ra->y1*x1+ra->y2*x2, r[2]+ra->z1*x1+ra->z2*x2, r[3]+ra->x0*tt, r[4]+ra->y0*tt, r[5]+ra->z0*tt, f->par);
1026 		// x-v terms
1027 		x1 = (2*i-nx+1)*f->dr;	x2 = f->dk/2*(j<nx/2 ? j:j-nx);	hh = 1-ra->t1*x1;
1028 		hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1029 		tt = (ra->pt + ra->d1*x1)/hh - ra->pt;
1030 		f->hxv[ii] = f->ham(0, r[0]+ra->x1*x1, r[1]+ra->y1*x1, r[2]+ra->z1*x1, r[3]+ra->x0*tt+ra->x2*x2, r[4]+ra->y0*tt+ra->y2*x2, r[5]+ra->z0*tt+ra->z2*x2, f->par);
1031 		// u-y terms
1032 		x1 = f->dk/2*(i<nx/2 ? i:i-nx);	x2 = (2*j-nx+1)*f->dr;	hh = 1-ra->t2*x2;
1033 		hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1034 		tt = (ra->pt + ra->d2*x2)/hh - ra->pt;
1035 		f->huy[ii] = f->ham(0, r[0]+ra->x2*x2, r[1]+ra->y2*x2, r[2]+ra->z2*x2, r[3]+ra->x1*x1+ra->x0*tt, r[4]+ra->y1*x1+ra->y0*tt, r[5]+ra->z1*x1+ra->z0*tt, f->par);
1036 		// u-y terms
1037 		x1 = f->dk/2*(i<nx/2 ? i:i-nx);	x2 = f->dk/2*(j<nx/2 ? j:j-nx);
1038 		f->huv[ii] = f->ham(0, r[0], r[1], r[2], r[3]+ra->x1*x1+ra->x2*x2, r[4]+ra->y1*x1+ra->y2*x2, r[5]+ra->z1*x1+ra->z2*x2, f->par);
1039 	}
1040 	return 0;
1041 }
1042 //-----------------------------------------------------------------------------
mgl_qo3d_post(void * par)1043 static void *mgl_qo3d_post(void *par)
1044 {
1045 	mglThreadD *t=(mglThreadD *)par;
1046 	mgl_qo3d_ham *f = (mgl_qo3d_ham *)t->v;
1047 	const long nx=t->n;
1048 #if !MGL_HAVE_PTHREAD
1049 #pragma omp parallel for
1050 #endif
1051 	for(long ii=t->id;ii<nx*nx;ii+=mglNumThr)
1052 	{
1053 		long i = ii%nx, j = ii/nx;
1054 		f->hxy[ii] -= (f->hx[i]+f->hy[j]-f->h0/mreal(2))/mreal(2);
1055 		if(imag(f->hxy[ii])>0)	f->hxy[ii] = f->hxy[ii].real();
1056 		f->hxv[ii] -= (f->hx[i]+f->hv[j]-f->h0/mreal(2))/mreal(2);
1057 		if(imag(f->hxv[ii])>0)	f->hxv[ii] = f->hxv[ii].real();
1058 		f->huy[ii] -= (f->hu[i]+f->hy[j]-f->h0/mreal(2))/mreal(2);
1059 		if(imag(f->huy[ii])>0)	f->huy[ii] = f->huy[ii].real();
1060 		f->huv[ii] -= (f->hu[i]+f->hv[j]-f->h0/mreal(2))/mreal(2);
1061 		if(imag(f->huv[ii])>0)	f->huv[ii] = f->huv[ii].real();
1062 		// add boundary conditions for x-direction
1063 		f->hxy[ii] -= dual(0,f->dmp[ii]);
1064 	}
1065 	return 0;
1066 }
1067 //-----------------------------------------------------------------------------
mgl_qo3d_func_c(mdual (* ham)(mreal u,mreal x,mreal y,mreal z,mreal px,mreal py,mreal pz,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1068 HADT MGL_EXPORT mgl_qo3d_func_c(mdual (*ham)(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1069 {
1070 	const mglData *ray=dynamic_cast<const mglData *>(ray_dat);	// NOTE: Ray must be mglData!
1071 	if(!ray)	return 0;
1072 	const long nx=ini_re->GetNx(), nt=ray->ny, n7=ray->nx;	// NOTE: only square grids are supported now (for simplicity)
1073 	if(nx<2 || ini_re->GetNx()!=nx || ini_im->GetNx()*ini_im->GetNy()!=nx*nx || nt<2)	return 0;
1074 	mglDataC *res=new mglDataC(nx,nx,nt);
1075 
1076 	dual *a=new dual[4*nx*nx], *huv=new dual[4*nx*nx],  *hxy=new dual[4*nx*nx], *huy=new dual[4*nx*nx],  *hxv=new dual[4*nx*nx];
1077 	dual *hu=new dual[2*nx],  *hx=new dual[2*nx], *hy=new dual[2*nx],  *hv=new dual[2*nx];
1078 	double *dmp=new double[4*nx*nx];
1079 	mgl_ap *ra = new mgl_ap[nt];
1080 	mgl_init_ra(nt, n7, ray->a, ra);	// prepare ray
1081 
1082 	double dr = r/(nx-1), dk = M_PI*(nx-1)/(k0*r*nx);
1083 	memset(dmp,0,4*nx*nx*sizeof(double));
1084 #pragma omp parallel for collapse(2)
1085 	for(long i=0;i<nx/2;i++)	for(long j=0;j<nx/2;j++)	// prepare damping
1086 	{
1087 		double x1 = (nx/2-i)/(nx/2.), x2 = (nx/2-j)/(nx/2.);
1088 		dmp[2*nx-1-i] = dmp[i] = 30*GAMMA*x1*x1/k0;
1089 		dmp[(2*nx-1-j)*2*nx] += 30*GAMMA*x2*x2/k0;
1090 		dmp[j*2*nx] += 30*GAMMA*x2*x2/k0;
1091 	}
1092 #pragma omp parallel for collapse(2)
1093 	for(long i=0;i<nx;i++)	for(long j=0;j<nx;j++)	// init
1094 		a[i+nx/2+2*nx*(j+nx/2)] = dual(ini_re->v(i,j),ini_im->v(i,j));
1095 	void *wtx = mgl_fft_alloc(2*nx,0,0);
1096 	if(xx && yy && zz)	{	xx->Create(nx,nx,nt);	yy->Create(nx,nx,nt);	zz->Create(nx,nx,nt);	}
1097 
1098 	mgl_qo3d_ham tmp;	// parameters for Hamiltonian calculation
1099 	tmp.hxy=hxy;	tmp.hx=hx;	tmp.huv=huv;	tmp.hu=hu;
1100 	tmp.huy=huy;	tmp.hy=hy;	tmp.hxv=hxv;	tmp.hv=hv;
1101 	tmp.dmp=dmp;	tmp.par=par;
1102 	tmp.dr=dr;	tmp.dk=dk;	tmp.ham=ham;	tmp.a=a;
1103 	// start calculation
1104 	for(long k=0;k<nt;k++)
1105 	{
1106 #pragma omp parallel for collapse(2)
1107 		for(long i=0;i<nx;i++)	for(long j=0;j<nx;j++)	// "save"
1108 			res->a[i+nx*(j+k*nx)]=a[i+nx/2+2*nx*(j+nx/2)]*mreal(sqrt(ra[0].ch/ra[k].ch));
1109 		if(xx && yy && zz)
1110 #pragma omp parallel for collapse(2)
1111 			for(long i=0;i<nx;i++)	for(long j=0;j<nx;j++)	// prepare xx, yy, zz
1112 			{
1113 				mreal x1 = (2*i-nx+1)*dr, x2 = (2*j-nx+1)*dr;
1114 				xx->a[i+nx*(j+k*nx)] = ray->a[n7*k] + ra[k].x1*x1 + ra[k].x2*x2;	// new coordinates
1115 				yy->a[i+nx*(j+k*nx)] = ray->a[n7*k+1] + ra[k].y1*x1 + ra[k].y2*x2;
1116 				zz->a[i+nx*(j+k*nx)] = ray->a[n7*k+2] + ra[k].z1*x1 + ra[k].z2*x2;
1117 			}
1118 		tmp.r=ray->a+n7*k;	tmp.ra=ra+k;
1119 		mglStartThread(mgl_qo3d_hprep,0,2*nx,0,0,0,0,&tmp);	tmp.h0 = huv[0];
1120 		for(long i=0;i<2*nx;i++)	// fill intermediate arrays
1121 		{
1122 			tmp.hx[i] = hxv[i];	tmp.hy[i] = huy[i*2*nx];
1123 			tmp.hv[i] = huv[i];	tmp.hu[i] = huv[i*2*nx];
1124 		}
1125 		mglStartThread(mgl_qo3d_post,0,2*nx,0,0,0,0,&tmp);
1126 		// Step for field
1127 		dual dt = dual(0, -ra[k].dt*k0);
1128 #pragma omp parallel
1129 		{
1130 			void *wsx = mgl_fft_alloc_thr(2*nx);
1131 #pragma omp for
1132 			for(long i=0;i<4*nx*nx;i++)	a[i] *= exp(hxy[i]*dt);		// x-y
1133 #pragma omp for
1134 			for(long i=0;i<2*nx;i++)	// x->u
1135 				mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, false);
1136 #pragma omp for
1137 			for(long i=0;i<4*nx*nx;i++)	a[i] *= exp(huy[i]*dt);		// u-y
1138 #pragma omp for
1139 			for(long i=0;i<2*nx;i++)	// y->v
1140 				mgl_fft((double *)(a+i), 2*nx, 2*nx, wtx, wsx, false);
1141 #pragma omp for
1142 			for(long i=0;i<4*nx*nx;i++)	a[i] *= exp(huv[i]*dt);		// u-v
1143 #pragma omp for
1144 			for(long i=0;i<2*nx;i++)	// u->x
1145 				mgl_fft((double *)(a+i*2*nx), 1, 2*nx, wtx, wsx, true);
1146 #pragma omp for
1147 			for(long i=0;i<4*nx*nx;i++)	a[i] *= exp(hxv[i]*dt);		// x-v
1148 #pragma omp for
1149 			for(long i=0;i<2*nx;i++)	// v->y
1150 				mgl_fft((double *)(a+i), 2*nx, 2*nx, wtx, wsx, true);
1151 			mgl_fft_free_thr(wsx);
1152 		}
1153 
1154 /*		// Calculate B1			// TODO make more general scheme later!!!
1155 		hh = ra[k].pt*(1/sqrt(sqrt(1.041))-1);
1156 		var['x'-'a'] = ray->a[n7*k];	// new coordiantes
1157 		var['y'-'a'] = ray->a[n7*k+1];
1158 		var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh;	// new momentums
1159 		var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
1160 		tt = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
1161 		var['x'-'a'] = ray->a[n7*k] + ra[k].x1*dr;	// new coordiantes
1162 		var['y'-'a'] = ray->a[n7*k+1] + ra[k].y1*dr;
1163 		hh = 1 - ra[k].t1*dr;	hh = sqrt(sqrt(0.041+hh*hh*hh*hh));
1164 		hh = (ra[k].ch*ra[k].pt + ra[k].d1*dr)/(hh*ra[k].ch) - ra[k].pt;
1165 		var['p'-'a'] = ray->a[n7*k+3] + ra[k].x0*hh;	// new momentums
1166 		var['q'-'a'] = ray->a[n7*k+4] + ra[k].y0*hh;
1167 		B1 = h.CalcD(var,'p')*ra[k].x1 + h.CalcD(var,'q')*ra[k].y1;
1168 		B1 = (B1-tt)/dr;
1169 		double a1=0, a2=0;
1170 		for(i=0;i<2*nx;i++)	a1 += norm(a[i]);
1171 		hx[0] = hx[2*nx-1] = 0.;
1172 		for(i=1;i<2*nx-1;i++)	hx[i] = (B1*ra[k].dt*(i-nx))*(a[i+1]-a[i-1]);
1173 		for(i=0;i<2*nx;i++)	{	a[i] += hx[i];	a2 += norm(a[i]);	}
1174 		a1 = sqrt(a1/a2);
1175 		for(i=0;i<2*nx;i++)	a[i] *= a1;*/
1176 	}
1177 	mgl_fft_free(wtx,0,0);
1178 	delete []a;		delete []ra;	delete []dmp;
1179 	delete []huv;	delete []hxy;	delete []hxv;	delete []huy;
1180 	delete []hu;	delete []hx;	delete []hv;	delete []hy;
1181 	return res;
1182 }
1183 //-----------------------------------------------------------------------------
mgl_qo3d_func(mdual (* ham)(mreal u,mreal x,mreal y,mreal z,mreal px,mreal py,mreal pz,void * par),void * par,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1184 HMDT MGL_EXPORT mgl_qo3d_func(mdual (*ham)(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par), void *par, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1185 {
1186 	HADT res = mgl_qo3d_func_c(ham,par,ini_re,ini_im,ray_dat,r,k0,xx,yy,zz);
1187 	HMDT out = mgl_datac_abs(res);	delete res;	return out;
1188 }
1189 //-----------------------------------------------------------------------------
mgl_ham3d(mreal u,mreal x,mreal y,mreal z,mreal px,mreal py,mreal pz,void * par)1190 mdual static mgl_ham3d(mreal u, mreal x, mreal y, mreal z, mreal px, mreal py, mreal pz, void *par)
1191 {
1192 	mglFormula *h = (mglFormula *)par;
1193 	mreal var[MGL_VS];	memset(var,0,MGL_VS*sizeof(mreal));
1194 	var['x'-'a'] = x;	var['y'-'a'] = y;	var['z'-'a'] = z;	var['u'-'a'] = u;
1195 	var['p'-'a'] = px;	var['q'-'a'] = py;	var['v'-'a'] = pz;
1196 	return mdual(h->Calc(var), -h->CalcD(var,'i'));
1197 }
1198 //-----------------------------------------------------------------------------
mgl_qo3d_solve_c(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1199 HADT MGL_EXPORT mgl_qo3d_solve_c(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1200 {
1201 	mglFormula h(ham);
1202 	return mgl_qo3d_func_c(mgl_ham3d, &h, ini_re, ini_im, ray_dat, r, k0, xx, yy, zz);
1203 }
1204 //-----------------------------------------------------------------------------
mgl_qo3d_solve(const char * ham,HCDT ini_re,HCDT ini_im,HCDT ray_dat,mreal r,mreal k0,HMDT xx,HMDT yy,HMDT zz)1205 HMDT MGL_EXPORT mgl_qo3d_solve(const char *ham, HCDT ini_re, HCDT ini_im, HCDT ray_dat, mreal r, mreal k0, HMDT xx, HMDT yy, HMDT zz)
1206 {
1207 	HADT res = mgl_qo3d_solve_c(ham,ini_re,ini_im,ray_dat,r,k0,xx,yy,zz);
1208 	HMDT out = mgl_datac_abs(res);	delete res;	return out;
1209 }
1210 //-----------------------------------------------------------------------------
mgl_qo3d_solve_(const char * ham,uintptr_t * ini_re,uintptr_t * ini_im,uintptr_t * ray,mreal * r,mreal * k0,uintptr_t * xx,uintptr_t * yy,uintptr_t * zz,int l)1211 uintptr_t MGL_EXPORT mgl_qo3d_solve_(const char *ham, uintptr_t* ini_re, uintptr_t* ini_im, uintptr_t* ray, mreal *r, mreal *k0, uintptr_t* xx, uintptr_t* yy, uintptr_t* zz, int l)
1212 {	char *s=new char[l+1];	memcpy(s,ham,l);	s[l]=0;
1213 	uintptr_t res = uintptr_t(mgl_qo3d_solve(s, _DA_(ini_re), _DA_(ini_im), _DA_(ray), *r, *k0, _DM_(xx), _DM_(yy), _DM_(zz)));
1214 	delete []s;	return res;	}
1215 //-----------------------------------------------------------------------------
1216 //
1217 //		mglJacobian series
1218 //
1219 //-----------------------------------------------------------------------------
mgl_jacob2(void * par)1220 static void *mgl_jacob2(void *par)
1221 {
1222 	mglThreadD *t=(mglThreadD *)par;
1223 	const long nx=t->p[0], ny=t->p[1];
1224 	mreal *r=t->a;
1225 	const mreal *x=t->b, *y=t->c;
1226 #if !MGL_HAVE_PTHREAD
1227 #pragma omp parallel for
1228 #endif
1229 	for(long i0=t->id;i0<t->n;i0+=mglNumThr)
1230 	{
1231 		long i=i0%nx, j=i0/nx;
1232 		long ip = i<nx-1 ? 1:0, jp = j<ny-1 ? nx:0;
1233 		long im = i>0 ? -1:0, jm = j>0 ? -nx:0;
1234 		r[i0] = (x[i0+ip]-x[i0+im])*(y[i0+jp]-y[i0+jm]) -
1235 				(y[i0+ip]-y[i0+im])*(x[i0+jp]-x[i0+jm]);
1236 		r[i0] *= mreal((nx-1)*(ny-1)) / mreal((ip-im)*(jp-jm));
1237 	}
1238 	return 0;
1239 }
mgl_jacobian_2d(HCDT x,HCDT y)1240 HMDT MGL_EXPORT mgl_jacobian_2d(HCDT x, HCDT y)
1241 {
1242 	const long nx = x->GetNx(), ny=x->GetNy();
1243 	if(nx!=y->GetNx() || ny!=y->GetNy() || nx<2 || ny<2)	return	0;
1244 	mglData *r=new mglData(nx,ny,1);
1245 	const mglData *xx=dynamic_cast<const mglData *>(x);
1246 	const mglData *yy=dynamic_cast<const mglData *>(y);
1247 	if(xx && yy)
1248 	{
1249 		long p[2]={nx,ny};
1250 		mglStartThread(mgl_jacob2,0,nx*ny,r->a,xx->a,yy->a,p);
1251 	}
1252 	else	// slow variant
1253 	{
1254 #pragma omp parallel for collapse(2)
1255 		for(long j=0;j<ny;j++)	for(long i=0;i<nx;i++)
1256 		{
1257 			long im = i>0 ? i-1:i, ip = i<nx-1 ? i+1:i;
1258 			long jm = j>0 ? j-1:j, jp = j<ny-1 ? j+1:j;
1259 			r->a[i+nx*j] = (x->v(ip,j)-x->v(im,j))*(y->v(i,jp)-y->v(i,jm)) -
1260 						(y->v(ip,j)-y->v(im,j))*(x->v(i,jp)-x->v(i,jm));
1261 			r->a[i+nx*j] *= mreal((nx-1)*(ny-1)) / mreal((ip-im)*(jp-jm));
1262 		}
1263 	}
1264 	return r;
1265 }
1266 //-----------------------------------------------------------------------------
mgl_jacob3(void * par)1267 static void *mgl_jacob3(void *par)
1268 {
1269 	mglThreadD *t=(mglThreadD *)par;
1270 	const long nx=t->p[0], ny=t->p[1], nz=t->p[2];
1271 	mreal *r=t->a;
1272 	const mreal *x=t->b, *y=t->c, *z=t->d;
1273 #if !MGL_HAVE_PTHREAD
1274 #pragma omp parallel for
1275 #endif
1276 	for(long i0=t->id;i0<t->n;i0+=mglNumThr)
1277 	{
1278 		long i=i0%nx, j=(i0/nx)%ny, k=i0/(nx*ny);
1279 		long ip = i<nx-1 ? 1:0, jp = j<ny-1 ? nx:0, kp = k<nz-1 ? nx*ny:0;
1280 		long im = i>0 ? -1:0, jm = j>0 ? -nx:0, km = k>0 ? -nx*ny:0;
1281 		r[i0] = (x[i0+ip]-x[i0+im])*(y[i0+jp]-y[i0+jm])*(z[i0+kp]-z[i0+km]) -
1282 				(x[i0+ip]-x[i0+im])*(y[i0+kp]-y[i0+km])*(z[i0+jp]-z[i0+jm]) -
1283 				(x[i0+jp]-x[i0+jm])*(y[i0+ip]-y[i0+im])*(z[i0+kp]-z[i0+km]) +
1284 				(x[i0+jp]-x[i0+jm])*(y[i0+kp]-y[i0+km])*(z[i0+ip]-z[i0+im]) +
1285 				(x[i0+kp]-x[i0+km])*(y[i0+ip]-y[i0+im])*(z[i0+jp]-z[i0+jm]) -
1286 				(x[i0+kp]-x[i0+km])*(y[i0+jp]-y[i0+jm])*(z[i0+ip]-z[i0+im]);
1287 		r[i0] *= mreal((nx-1)*(ny-1)*(nz-1)) / mreal((ip-im)*(jp-jm)*(kp-km));
1288 	}
1289 	return 0;
1290 }
mgl_jacobian_3d(HCDT x,HCDT y,HCDT z)1291 HMDT MGL_EXPORT mgl_jacobian_3d(HCDT x, HCDT y, HCDT z)
1292 {
1293 	const long nx = x->GetNx(), ny=x->GetNy(), nz=x->GetNz(), nn = nx*ny*nz;
1294 	if(nx<2 || ny<2 || nz<2)	return 0;
1295 	if(nn!=y->GetNN() || nn!=z->GetNN())	return 0;
1296 	mglData *r=new mglData(nx,ny,nz);
1297 	const mglData *xx=dynamic_cast<const mglData *>(x);
1298 	const mglData *yy=dynamic_cast<const mglData *>(y);
1299 	const mglData *zz=dynamic_cast<const mglData *>(z);
1300 	if(xx && yy && zz)
1301 	{
1302 		long p[3]={nx,ny,nz};
1303 		mglStartThread(mgl_jacob3,0,nx*ny*nz,r->a,xx->a,yy->a,p,0,zz->a);
1304 	}
1305 	else	// slow variant
1306 	{
1307 #pragma omp parallel for collapse(3)
1308 		for(long k=0;k<nz;k++)	for(long j=0;j<ny;j++)	for(long i=0;i<nx;i++)
1309 		{
1310 			long im = i>0 ? i-1:i, ip = i<nx-1 ? i+1:i;
1311 			long jm = j>0 ? j-1:j, jp = j<ny-1 ? j+1:j;
1312 			long km = k>0 ? k-1:k, kp = k<nz-1 ? k+1:k;
1313 			long i0 = i+nx*(j+ny*k);
1314 			r->a[i0] = (x->v(ip,j,k)-x->v(im,j,k))*(y->v(i,jp,k)-y->v(i,jm,k))*(z->v(i,j,kp)-z->v(i,j,km)) -
1315 					(x->v(ip,j,k)-x->v(im,j,k))*(y->v(i,j,kp)-y->v(i,j,km))*(z->v(i,jp,k)-z->v(i,jm,k)) -
1316 					(x->v(i,jp,k)-x->v(i,jm,k))*(y->v(ip,j,k)-y->v(im,j,k))*(z->v(i,j,kp)-z->v(i,j,km)) +
1317 					(x->v(i,jp,k)-x->v(i,jm,k))*(y->v(i,j,kp)-y->v(i,j,km))*(z->v(ip,j,k)-z->v(im,j,k)) +
1318 					(x->v(i,j,kp)-x->v(i,j,km))*(y->v(ip,j,k)-y->v(im,j,k))*(z->v(i,jp,k)-z->v(i,jm,k)) -
1319 					(x->v(i,j,kp)-x->v(i,j,km))*(y->v(i,jp,k)-y->v(i,jm,k))*(z->v(ip,j,k)-z->v(im,j,k));
1320 			r->a[i0] *= mreal((nx-1)*(ny-1)*(nz-1)) / mreal((ip-im)*(jp-jm)*(kp-km));
1321 		}
1322 
1323 	}
1324 	return r;
1325 }
1326 //-----------------------------------------------------------------------------
mgl_jacobian_2d_(uintptr_t * x,uintptr_t * y)1327 uintptr_t MGL_EXPORT mgl_jacobian_2d_(uintptr_t* x, uintptr_t* y)
1328 {	return uintptr_t(mgl_jacobian_2d(_DA_(x), _DA_(y)));	}
mgl_jacobian_3d_(uintptr_t * x,uintptr_t * y,uintptr_t * z)1329 uintptr_t MGL_EXPORT mgl_jacobian_3d_(uintptr_t* x, uintptr_t* y, uintptr_t* z)
1330 {	return uintptr_t(mgl_jacobian_3d(_DA_(x), _DA_(y), _DA_(z)));	}
1331 //-----------------------------------------------------------------------------
1332 //
1333 //	Progonka
1334 //
1335 //-----------------------------------------------------------------------------
mgl_progonka_sr(HCDT A,HCDT B,HCDT C,HCDT D,mreal * dat,long n,long id,long i0,long di,bool difr)1336 void static mgl_progonka_sr(HCDT A, HCDT B, HCDT C, HCDT D, mreal *dat, long n, long id, long i0, long di, bool difr)
1337 {
1338 	mreal *aa=dat, *bb=dat+n, *uu=dat+2*n;
1339 	mreal b0=B->vthr(i0), c0=C->vthr(i0), d0=D->vthr(id);
1340 	if(difr)	d0 = (2.-b0)*d0-c0*D->vthr(id+di);
1341 	aa[0] = -c0/b0;	bb[0] = d0/b0;
1342 	for(long i=1;i<n;i++)
1343 	{
1344 		long ii=i0+di*i, dd=id+di*i, tt = id+di*((i+1)%n);
1345 		mreal a=A->vthr(ii), b=B->vthr(ii), c=C->vthr(ii);
1346 		mreal d=difr?-a*D->vthr(dd-di)+(2.-b)*D->vthr(dd)-c*D->vthr(tt):D->vthr(dd);
1347 		aa[i] = -c/(b+a*aa[i-1]);
1348 		bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1349 	}
1350 	uu[n-1] = bb[n-1];
1351 	for(long i=n-2;i>=0;i--)	uu[i] = bb[i]+aa[i]*uu[i+1];
1352 }
mgl_progonka_pr(HCDT A,HCDT B,HCDT C,HCDT D,mreal * dat,long n,long id,long i0,long di,bool difr)1353 void static mgl_progonka_pr(HCDT A, HCDT B, HCDT C, HCDT D, mreal *dat, long n, long id, long i0, long di, bool difr)
1354 {
1355 	mreal *aa=dat, *bb=dat+n, *gg=dat+2*n, *uu=dat+3*n;
1356 	mreal a0=A->vthr(i0), b0=B->vthr(i0), c0=C->vthr(i0), d0=D->vthr(id);
1357 	if(difr)	d0 = -a0*D->vthr(id+di*(n-1))+(2.-b0)*d0-c0*D->vthr(id+di);
1358 	aa[0] =-c0/b0;	bb[0] = d0/b0;	gg[0] =-a0/b0;
1359 	for(long i=1;i<n;i++)
1360 	{
1361 		long ii=i0+di*i, il=id+di*((i+1)%n), dd=id+di*i;
1362 		mreal a=A->vthr(ii), b=B->vthr(ii), c=C->vthr(ii);
1363 		mreal d=difr?-a*D->vthr(dd-di)+(2.-b)*D->vthr(dd)-c*D->vthr(il):D->vthr(dd);
1364 		aa[i] = -c/(b+a*aa[i-1]);
1365 		bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1366 		gg[i] = -a*gg[i-1]/(b+a*aa[i-1]);
1367 	}
1368 	mreal P=bb[n-1]/(1.-gg[n-1]), Q=aa[n-1]/(1.-gg[n-1]);
1369 	aa[n-1] = Q;	bb[n-1] = P;
1370 	for(long i=n-2;i>=0;i--)
1371 	{
1372 		bb[i] += aa[i]*bb[i+1]+gg[i]*P;
1373 		aa[i] = aa[i]*aa[i+1]+gg[i]*Q;
1374 	}
1375 	mreal u0 = bb[0]/(1.-aa[0]);
1376 	for(long i=0;i<n;i++)	uu[i]=bb[i]+aa[i]*u0;
1377 }
mgl_progonka_hr(HCDT A,HCDT B,HCDT C,HCDT D,mreal * dat,long n,long id,long i0,bool difr)1378 void static mgl_progonka_hr(HCDT A, HCDT B, HCDT C, HCDT D, mreal *dat, long n, long id, long i0, bool difr)
1379 {
1380 	mreal *aa=dat, *bb=dat+n, *uu=dat+n*n;
1381 	mreal b0=B->vthr(i0), c0=C->vthr(i0), d0=D->vthr(id);
1382 	uu[0] = d0/b0*(difr?(2.-b0):1.);
1383 	b0=B->vthr(i0+n*n-1);	d0=D->vthr(id+n*n-1);
1384 	uu[n*n-1] = d0/b0*(difr?(2.-b0):1.);
1385 	long di = n-1, i1 = i0+n*(n-1), d1 = id+n*(n-1);
1386 	// suppose the square grid!
1387 	for(long j=1;j<n;j++)
1388 	{
1389 		// first bottom-left triangle
1390 		b0=B->vthr(i0+j);	c0=C->vthr(i0+j);	d0=D->vthr(id+j);
1391 		if(difr)	d0 = (2.-b0)*d0-c0*D->vthr(id+j+di);
1392 		aa[0] = -c0/b0;	bb[0] = d0/b0;
1393 		for(long i=1;i<=j;i++)
1394 		{
1395 			long ii=i0+j+di*i, dd=id+j+di*i;
1396 			mreal a=A->vthr(ii),b=B->vthr(ii),c=C->vthr(ii);
1397 			mreal d=difr?-a*D->vthr(dd-di)+(2.-b)*D->vthr(dd)-c*D->vthr(dd+di):D->vthr(dd);
1398 			aa[i] = -c/(b+a*aa[i-1]);
1399 			bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1400 		}
1401 		uu[j+di*(j-1)] = bb[j];
1402 		for(long i=j-1;i>=0;i--)
1403 			uu[j+di*i] = bb[i]+aa[i]*uu[j+di*i+di];
1404 		// next top-right triangle
1405 		long j1=n-1-j;
1406 		b0=B->vthr(i1+j1);	c0=C->vthr(i1+j1);	d0=D->vthr(d1+j1);
1407 		if(difr)	d0 = (2.-b0)*d0-c0*D->vthr(d1+j1-di);
1408 		aa[0] = -c0/b0;	bb[0] = d0/b0;
1409 		for(long i=1;i<=j;i++)
1410 		{
1411 			long ii=i1+j1-di*i, dd=d1+j1-di*i;
1412 			mreal a=A->vthr(ii),b=B->vthr(ii),c=C->vthr(ii);
1413 			mreal d=difr?-a*D->vthr(dd+di)+(2.-b)*D->vthr(dd)-c*D->vthr(dd-di):D->vthr(dd);
1414 			aa[i] = -c/(b+a*aa[i-1]);
1415 			bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1416 		}
1417 		uu[j1+n*(n-1)-di*(j-1)] = bb[j];
1418 		for(long i=j-1;i>=0;i--)
1419 			uu[j1+n*(n-1)-di*i] = bb[i]+aa[i]*uu[j1+n*(n-1)-di*i-di];
1420 	}
1421 }
1422 //-----------------------------------------------------------------------------
mgl_data_tridmat(HCDT A,HCDT B,HCDT C,HCDT D,const char * how)1423 HMDT MGL_EXPORT mgl_data_tridmat(HCDT A, HCDT B, HCDT C, HCDT D, const char *how)
1424 {
1425 	const long nx=D->GetNx(),ny=D->GetNy(),nz=D->GetNz();
1426 	const long nn=nx*ny*nz, np=nx*ny, na=A->GetNN();
1427 	if(B->GetNN()!=na || C->GetNN()!=na)	return 0;
1428 	mglData *r = new mglData(nx,ny,nz);
1429 	bool per = mglchr(how,'c');
1430 	bool difr = mglchr(how,'d');
1431 	if(mglchr(how,'x') && (na==nn || na==np || na==nx))
1432 #pragma omp parallel
1433 	{
1434 		mreal *tt = new mreal[4*nx], *uu=tt+(per?3:2)*nx;
1435 // 		mglData T(nx,4);	mreal *uu=T.a+(per?3:2)*nx;
1436 #pragma omp for collapse(2)
1437 		for(long k=0;k<nz;k++)	for(long j=0;j<ny;j++)
1438 		{
1439 			long i0=0, i1=nx*(j+ny*k);
1440 			if(na==nn)	i0=nx*(j+ny*k);	else if(na==np)	i0=nx*j;
1441 			if(per)	mgl_progonka_pr(A,B,C,D,tt,nx,i1,i0,1,difr);
1442 			else	mgl_progonka_sr(A,B,C,D,tt,nx,i1,i0,1,difr);
1443 			i0 = nx*(j+ny*k);
1444 			for(long i=0;i<nx;i++)	r->a[i+i0] = uu[i];
1445 		}
1446 		delete []tt;
1447 	}
1448 	else if(mglchr(how,'y') && (na==nn || na==np || na==ny))
1449 #pragma omp parallel
1450 	{
1451 		mreal *tt = new mreal[4*ny], *uu=tt+(per?3:2)*ny;
1452 // /		mglData T(ny,4);	mreal *uu=T.a+(per?3:2)*ny;
1453 #pragma omp for collapse(2)
1454 		for(long k=0;k<nz;k++)	for(long i=0;i<nx;i++)
1455 		{
1456 			long i0=0, i1 = i+np*k;
1457 			if(na==nn)	i0=i+np*k;	else if(na==np)	i0=i;
1458 			if(per)	mgl_progonka_pr(A,B,C,D,tt,ny,i1,i0,nx,difr);
1459 			else	mgl_progonka_sr(A,B,C,D,tt,ny,i1,i0,nx,difr);
1460 			i0 = i+np*k;
1461 			for(long j=0;j<ny;j++)	r->a[j*nx+i0] = uu[j];
1462 		}
1463 		delete []tt;
1464 	}
1465 	else if(mglchr(how,'z') && (na==nn || na==nz))
1466 #pragma omp parallel
1467 	{
1468 		mreal *tt = new mreal[4*nz], *uu=tt+(per?3:2)*nz;
1469 // 		mglData T(nz,4);	mreal *uu=T.a+(per?3:2)*nz;
1470 #pragma omp for collapse(2)
1471 		for(long j=0;j<ny;j++)	for(long i=0;i<nx;i++)
1472 		{
1473 			long i0 = na==nn?i+nx*j:0, i1 = i+nx*j;
1474 			if(per)	mgl_progonka_pr(A,B,C,D,tt,nz,i1,i0,np,difr);
1475 			else	mgl_progonka_sr(A,B,C,D,tt,nz,i1,i0,np,difr);
1476 			i0 = i+nx*j;
1477 			for(long k=0;k<nz;k++)	r->a[k*np+i0] = uu[k];
1478 		}
1479 		delete []tt;
1480 	}
1481 	else if(mglchr(how,'h') && ny==nx && (na==nn || na==np) && nx>1)
1482 #pragma omp parallel
1483 	{
1484 		mreal *tt = new mreal[2*np];
1485 // 		mglData T(np,2);
1486 #pragma omp for
1487 		for(long k=0;k<nz;k++)
1488 		{
1489 			mgl_progonka_hr(A,B,C,D,tt,nx,k*np,na==nn ? k*np:0,difr);
1490 			memcpy(r->a+k*np, tt+np, np*sizeof(mreal));
1491 		}
1492 		delete []tt;
1493 	}
1494 	else	{	delete r;	r=0;	}
1495 	return r;
1496 }
1497 //-----------------------------------------------------------------------------
mgl_data_tridmat_(uintptr_t * A,uintptr_t * B,uintptr_t * C,uintptr_t * D,const char * how,int l)1498 uintptr_t MGL_EXPORT mgl_data_tridmat_(uintptr_t *A, uintptr_t *B, uintptr_t *C, uintptr_t *D, const char *how, int l)
1499 {	char *s=new char[l+1];	memcpy(s,how,l);	s[l]=0;
1500 	uintptr_t r = uintptr_t(mgl_data_tridmat(_DA_(A),_DA_(B),_DA_(C),_DA_(D),s));
1501 	delete []s;	return r;
1502 }
1503 //-----------------------------------------------------------------------------
mgl_progonka_sc(HCDT A,HCDT B,HCDT C,HCDT D,dual * dat,long n,long id,long i0,long di,bool difr)1504 void static mgl_progonka_sc(HCDT A, HCDT B, HCDT C, HCDT D, dual *dat, long n, long id, long i0, long di, bool difr)
1505 {
1506 	dual *aa=dat, *bb=dat+n, *uu=dat+2*n;
1507 	dual b0=B->vcthr(i0), c0=C->vcthr(i0), d0=D->vcthr(id);
1508 	if(difr)	d0 = (mreal(2)-b0)*d0-c0*D->vcthr(id+di);
1509 	aa[0] = -c0/b0;	bb[0] = d0/b0;
1510 	for(long i=1;i<n;i++)
1511 	{
1512 		long ii=i0+di*i, dd=id+di*i, tt = id+di*((i+1)%n);
1513 		dual a=A->vcthr(ii), b=B->vcthr(ii), c=C->vcthr(ii);
1514 		dual d=difr?-a*D->vcthr(dd-di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(tt):D->vcthr(dd);
1515 		aa[i] = -c/(b+a*aa[i-1]);
1516 		bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1517 	}
1518 	uu[n-1] = bb[n-1];
1519 	for(long i=n-2;i>=0;i--)	uu[i] = bb[i]+aa[i]*uu[i+1];
1520 }
mgl_progonka_pc(HCDT A,HCDT B,HCDT C,HCDT D,dual * dat,long n,long id,long i0,long di,bool difr)1521 void static mgl_progonka_pc(HCDT A, HCDT B, HCDT C, HCDT D, dual *dat, long n, long id, long i0, long di, bool difr)
1522 {
1523 	dual *aa=dat, *bb=dat+n, *gg=dat+2*n, *uu=dat+3*n;
1524 	dual a0=A->vcthr(i0), b0=B->vcthr(i0), c0=C->vcthr(i0), d0=D->vcthr(id);
1525 	if(difr)	d0 = -a0*D->vcthr(id+di*(n-1))+(mreal(2)-b0)*d0-c0*D->vcthr(id+di);
1526 	aa[0] =-c0/b0;	bb[0] = d0/b0;	gg[0] =-a0/b0;
1527 	for(long i=1;i<n;i++)
1528 	{
1529 		long ii=i0+di*i, il=id+di*((i+1)%n), dd=id+di*i;
1530 		dual a=A->vcthr(ii), b=B->vcthr(ii), c=C->vcthr(ii);
1531 		dual d=difr?-a*D->vcthr(dd-di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(il):D->vcthr(dd);
1532 		aa[i] = -c/(b+a*aa[i-1]);
1533 		bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1534 		gg[i] = -a*gg[i-1]/(b+a*aa[i-1]);
1535 	}
1536 	dual P=bb[n-1]/(mreal(1)-gg[n-1]), Q=aa[n-1]/(mreal(1)-gg[n-1]);
1537 	aa[n-1] = Q;	bb[n-1] = P;
1538 	for(long i=n-2;i>=0;i--)
1539 	{
1540 		bb[i] += aa[i]*bb[i+1]+gg[i]*P;
1541 		aa[i] = aa[i]*aa[i+1]+gg[i]*Q;
1542 	}
1543 	dual u0 = bb[0]/(mreal(1)-aa[0]);
1544 	for(long i=0;i<n;i++)	uu[i]=bb[i]+aa[i]*u0;
1545 }
mgl_progonka_hc(HCDT A,HCDT B,HCDT C,HCDT D,dual * dat,long n,long id,long i0,bool difr)1546 void static mgl_progonka_hc(HCDT A, HCDT B, HCDT C, HCDT D, dual *dat, long n, long id, long i0, bool difr)
1547 {
1548 	dual *aa=dat, *bb=dat+n, *uu=dat+n*n;
1549 	dual b0=B->vcthr(i0), c0=C->vcthr(i0), d0=D->vcthr(id);
1550 	uu[0] = d0/b0*(difr?(mreal(2)-b0):mreal(1));
1551 	b0=B->vcthr(i0+n*n-1);	d0=D->vcthr(id+n*n-1);
1552 	uu[n*n-1] = d0/b0*(difr?(mreal(2)-b0):mreal(1));
1553 	long di = n-1, i1 = i0+n*(n-1), d1 = id+n*(n-1);
1554 	// suppose the square grid!
1555 	for(long j=1;j<n;j++)
1556 	{
1557 		// first bottom-left triangle
1558 		b0=B->vcthr(i0+j);	c0=C->vcthr(i0+j);	d0=D->vcthr(id+j);
1559 		if(difr)	d0 = (mreal(2)-b0)*d0-c0*D->vcthr(id+j+di);
1560 		aa[0] = -c0/b0;	bb[0] = d0/b0;
1561 		for(long i=1;i<=j;i++)
1562 		{
1563 			long ii=i0+j+di*i, dd=id+j+di*i;
1564 			dual a=A->vcthr(ii),b=B->vcthr(ii),c=C->vcthr(ii);
1565 			dual d=difr?-a*D->vcthr(dd-di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(dd+di):D->vcthr(dd);
1566 			aa[i] = -c/(b+a*aa[i-1]);
1567 			bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1568 		}
1569 		uu[j+di*(j-1)] = bb[j];
1570 		for(long i=j-1;i>=0;i--)
1571 			uu[j+di*i] = bb[i]+aa[i]*uu[j+di*i+di];
1572 		// next top-right triangle
1573 		long j1=n-1-j;
1574 		b0=B->vcthr(i1+j1);	c0=C->vcthr(i1+j1);	d0=D->vcthr(d1+j1);
1575 		if(difr)	d0 = (mreal(2)-b0)*d0-c0*D->vcthr(d1+j1-di);
1576 		aa[0] = -c0/b0;	bb[0] = d0/b0;
1577 		for(long i=1;i<=j;i++)
1578 		{
1579 			long ii=i1+j1-di*i, dd=d1+j1-di*i;
1580 			dual a=A->vcthr(ii),b=B->vcthr(ii),c=C->vcthr(ii);
1581 			dual d=difr?-a*D->vcthr(dd+di)+(mreal(2)-b)*D->vcthr(dd)-c*D->vcthr(dd-di):D->vcthr(dd);
1582 			aa[i] = -c/(b+a*aa[i-1]);
1583 			bb[i] = (d-a*bb[i-1])/(b+a*aa[i-1]);
1584 		}
1585 		uu[j1+n*(n-1)-di*(j-1)] = bb[j];
1586 		for(long i=j-1;i>=0;i--)
1587 			uu[j1+n*(n-1)-di*i] = bb[i]+aa[i]*uu[j1+n*(n-1)-di*i-di];
1588 	}
1589 }
1590 //-----------------------------------------------------------------------------
mgl_datac_tridmat(HCDT A,HCDT B,HCDT C,HCDT D,const char * how)1591 HADT MGL_EXPORT mgl_datac_tridmat(HCDT A, HCDT B, HCDT C, HCDT D, const char *how)
1592 {
1593 	const long nx=D->GetNx(),ny=D->GetNy(),nz=D->GetNz();
1594 	const long nn=nx*ny*nz, np=nx*ny, na=A->GetNN();
1595 	if(B->GetNN()!=na || C->GetNN()!=na)	return 0;
1596 	mglDataC *r = new mglDataC(nx,ny,nz);
1597 	bool per = mglchr(how,'c');
1598 	bool difr = mglchr(how,'d');
1599 	if(mglchr(how,'x') && (na==nn || na==np || na==nx))
1600 #pragma omp parallel
1601 	{
1602 		dual *tt = new dual[4*nx], *uu=tt+(per?3:2)*nx;
1603 // 		mglDataC T(nx,4);	dual *uu=T.a+(per?3:2)*nx;
1604 #pragma omp for collapse(2)
1605 		for(long k=0;k<nz;k++)	for(long j=0;j<ny;j++)
1606 		{
1607 			long i0=0, i1=nx*(j+ny*k);
1608 			if(na==nn)	i0=i1;	else if(na==np)	i0=nx*j;
1609 			if(per)	mgl_progonka_pc(A,B,C,D,tt,nx,i1,i0,1,difr);
1610 			else	mgl_progonka_sc(A,B,C,D,tt,nx,i1,i0,1,difr);
1611 			for(long i=0;i<nx;i++)	r->a[i+i1] = uu[i];
1612 		}
1613 		delete []tt;
1614 	}
1615 	else if(mglchr(how,'y') && (na==nn || na==np || na==ny))
1616 #pragma omp parallel
1617 	{
1618 		dual *tt = new dual[4*ny], *uu=tt+(per?3:2)*ny;
1619 // 		mglDataC T(ny,4);	dual *uu=T.a+(per?3:2)*ny;
1620 #pragma omp for collapse(2)
1621 		for(long k=0;k<nz;k++)	for(long i=0;i<nx;i++)
1622 		{
1623 			long i0=0, i1 = i+np*k;
1624 			if(na==nn)	i0=i1;	else if(na==np)	i0=i;
1625 			if(per)	mgl_progonka_pc(A,B,C,D,tt,ny,i1,i0,nx,difr);
1626 			else	mgl_progonka_sc(A,B,C,D,tt,ny,i1,i0,nx,difr);
1627 			i0 = i+np*k;
1628 			for(long j=0;j<ny;j++)	r->a[j*nx+i0] = uu[j];
1629 		}
1630 		delete []tt;
1631 	}
1632 	else if(mglchr(how,'z') && (na==nn || na==nz))
1633 #pragma omp parallel
1634 	{
1635 		dual *tt = new dual[4*nz], *uu=tt+(per?3:2)*ny;
1636 // 		mglDataC T(nz,4);	dual *uu=T.a+(per?3:2)*nz;
1637 #pragma omp for collapse(2)
1638 		for(long j=0;j<ny;j++)	for(long i=0;i<nx;i++)
1639 		{
1640 			long i0 = na==nn?i+nx*j:0, i1 = i+nx*j;
1641 			if(per)	mgl_progonka_pc(A,B,C,D,tt,nz,i1,i0,np,difr);
1642 			else	mgl_progonka_sc(A,B,C,D,tt,nz,i1,i0,np,difr);
1643 			for(long k=0;k<nz;k++)	r->a[k*np+i1] = uu[k];
1644 		}
1645 		delete []tt;
1646 	}
1647 	else if(mglchr(how,'h') && ny==nx && (na==nn || na==np) && nx>1)
1648 #pragma omp parallel
1649 	{
1650 		dual *tt = new dual[2*np];
1651 // 		mglDataC T(np,2);
1652 #pragma omp for
1653 		for(long k=0;k<nz;k++)
1654 		{
1655 			mgl_progonka_hc(A,B,C,D,tt,nx,k*np, na==nn ? k*np:0,difr);
1656 			memcpy(r->a+k*np, tt+np, np*sizeof(dual));
1657 		}
1658 		delete []tt;
1659 	}
1660 	else	{	delete r;	r=0;	}
1661 	return r;
1662 }
1663 //-----------------------------------------------------------------------------
mgl_datac_tridmat_(uintptr_t * A,uintptr_t * B,uintptr_t * C,uintptr_t * D,const char * how,int l)1664 uintptr_t MGL_EXPORT mgl_datac_tridmat_(uintptr_t *A, uintptr_t *B, uintptr_t *C, uintptr_t *D, const char *how, int l)
1665 {	char *s=new char[l+1];	memcpy(s,how,l);	s[l]=0;
1666 	uintptr_t r = uintptr_t(mgl_datac_tridmat(_DA_(A),_DA_(B),_DA_(C),_DA_(D),s));
1667 	delete []s;	return r;
1668 }
1669 //-----------------------------------------------------------------------------
1670