1 /////////////////////////////////////////////////////////////////////////////
2 // einspline: a library for creating and evaluating B-splines //
3 // Copyright (C) 2007 Kenneth P. Esler, Jr. //
4 // //
5 // This program is free software; you can redistribute it and/or modify //
6 // it under the terms of the GNU General Public License as published by //
7 // the Free Software Foundation; either version 2 of the License, or //
8 // (at your option) any later version. //
9 // //
10 // This program is distributed in the hope that it will be useful, //
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13 // GNU General Public License for more details. //
14 // //
15 // You should have received a copy of the GNU General Public License //
16 // along with this program; if not, write to the Free Software //
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, //
18 // Boston, MA 02110-1301 USA //
19 /////////////////////////////////////////////////////////////////////////////
20
21 #include "bspline_create.h"
22 #ifndef _XOPEN_SOURCE
23 #define _XOPEN_SOURCE 600
24 #endif
25 #ifndef __USE_XOPEN2K
26 #define __USE_XOPEN2K
27 #endif
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <vector>
31
32 int posix_memalign(void **memptr, size_t alignment, size_t size);
33
34 ////////////////////////////////////////////////////////////
35 ////////////////////////////////////////////////////////////
36 //// Helper functions for spline creation ////
37 ////////////////////////////////////////////////////////////
38 ////////////////////////////////////////////////////////////
39 void init_sse_data();
40
41 void
42 find_coefs_1d_d (Ugrid grid, BCtype_d bc,
43 double *data, intptr_t dstride,
44 double *coefs, intptr_t cstride);
45
46 void
solve_deriv_interp_1d_s(float bands[],float coefs[],int M,int cstride)47 solve_deriv_interp_1d_s (float bands[], float coefs[],
48 int M, int cstride)
49 {
50 // Solve interpolating equations
51 // First and last rows are different
52 bands[4*(0)+1] /= bands[4*(0)+0];
53 bands[4*(0)+2] /= bands[4*(0)+0];
54 bands[4*(0)+3] /= bands[4*(0)+0];
55 bands[4*(0)+0] = 1.0;
56 bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1];
57 bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2];
58 bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3];
59 bands[4*(0)+0] = 0.0;
60 bands[4*(1)+2] /= bands[4*(1)+1];
61 bands[4*(1)+3] /= bands[4*(1)+1];
62 bands[4*(1)+1] = 1.0;
63
64 // Now do rows 2 through M+1
65 for (int row=2; row < (M+1); row++) {
66 bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
67 bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
68 bands[4*(row)+2] /= bands[4*(row)+1];
69 bands[4*(row)+3] /= bands[4*(row)+1];
70 bands[4*(row)+0] = 0.0;
71 bands[4*(row)+1] = 1.0;
72 }
73
74 // Do last row
75 bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
76 bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
77 bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
78 bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
79 bands[4*(M+1)+3] /= bands[4*(M+1)+2];
80 bands[4*(M+1)+2] = 1.0;
81
82 coefs[(M+1)*cstride] = bands[4*(M+1)+3];
83 // Now back substitute up
84 for (int row=M; row>0; row--)
85 coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)];
86
87 // Finish with first row
88 coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride];
89 }
90
91 // On input, bands should be filled with:
92 // row 0 : abcdInitial from boundary conditions
93 // rows 1:M: basis functions in first 3 cols, data in last
94 // row M+1 : abcdFinal from boundary conditions
95 // cstride gives the stride between values in coefs.
96 // On exit, coefs with contain interpolating B-spline coefs
97 void
solve_periodic_interp_1d_s(float bands[],float coefs[],int M,int cstride)98 solve_periodic_interp_1d_s (float bands[], float coefs[],
99 int M, int cstride)
100 {
101 std::vector<float> lastCol(M);
102
103 // Now solve:
104 // First and last rows are different
105 bands[4*(0)+2] /= bands[4*(0)+1];
106 bands[4*(0)+0] /= bands[4*(0)+1];
107 bands[4*(0)+3] /= bands[4*(0)+1];
108 bands[4*(0)+1] = 1.0;
109 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
110 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
111 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2];
112 lastCol[0] = bands[4*(0)+0];
113
114 for (int row=1; row < (M-1); row++) {
115 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
116 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
117 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1];
118 bands[4*(row)+0] = 0.0;
119 bands[4*(row)+2] /= bands[4*(row)+1];
120 bands[4*(row)+3] /= bands[4*(row)+1];
121 lastCol[row] /= bands[4*(row)+1];
122 bands[4*(row)+1] = 1.0;
123 if (row < (M-2)) {
124 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
125 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
126 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
127 }
128 }
129
130 // Now do last row
131 // The [2] element and [0] element are now on top of each other
132 bands[4*(M-1)+0] += bands[4*(M-1)+2];
133 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
134 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3];
135 bands[4*(M-1)+3] /= bands[4*(M-1)+1];
136 coefs[M*cstride] = bands[4*(M-1)+3];
137 for (int row=M-2; row>=0; row--)
138 coefs[(row+1)*cstride] =
139 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
140
141 coefs[0*cstride] = coefs[M*cstride];
142 coefs[(M+1)*cstride] = coefs[1*cstride];
143 coefs[(M+2)*cstride] = coefs[2*cstride];
144
145
146 }
147
148
149 // On input, bands should be filled with:
150 // row 0 : abcdInitial from boundary conditions
151 // rows 1:M: basis functions in first 3 cols, data in last
152 // row M+1 : abcdFinal from boundary conditions
153 // cstride gives the stride between values in coefs.
154 // On exit, coefs with contain interpolating B-spline coefs
155 void
solve_antiperiodic_interp_1d_s(float bands[],float coefs[],int M,int cstride)156 solve_antiperiodic_interp_1d_s (float bands[], float coefs[],
157 int M, int cstride)
158 {
159 bands[4*0+0] *= -1.0;
160 bands[4*(M-1)+2] *= -1.0;
161
162 std::vector<float> lastCol(M);
163
164 // Now solve:
165 // First and last rows are different
166 bands[4*(0)+2] /= bands[4*(0)+1];
167 bands[4*(0)+0] /= bands[4*(0)+1];
168 bands[4*(0)+3] /= bands[4*(0)+1];
169 bands[4*(0)+1] = 1.0;
170 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
171 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
172 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2];
173 lastCol[0] = bands[4*(0)+0];
174
175 for (int row=1; row < (M-1); row++) {
176 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
177 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
178 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1];
179 bands[4*(row)+0] = 0.0;
180 bands[4*(row)+2] /= bands[4*(row)+1];
181 bands[4*(row)+3] /= bands[4*(row)+1];
182 lastCol[row] /= bands[4*(row)+1];
183 bands[4*(row)+1] = 1.0;
184 if (row < (M-2)) {
185 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
186 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
187 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
188 }
189 }
190
191 // Now do last row
192 // The [2] element and [0] element are now on top of each other
193 bands[4*(M-1)+0] += bands[4*(M-1)+2];
194 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
195 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3];
196 bands[4*(M-1)+3] /= bands[4*(M-1)+1];
197 coefs[M*cstride] = bands[4*(M-1)+3];
198 for (int row=M-2; row>=0; row--)
199 coefs[(row+1)*cstride] =
200 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
201
202 coefs[0*cstride] = -coefs[M*cstride];
203 coefs[(M+1)*cstride] = -coefs[1*cstride];
204 coefs[(M+2)*cstride] = -coefs[2*cstride];
205
206
207 }
208
209
210
211
212 #ifdef HIGH_PRECISION
213 void
find_coefs_1d_s(Ugrid grid,BCtype_s bc,float * data,intptr_t dstride,float * coefs,intptr_t cstride)214 find_coefs_1d_s (Ugrid grid, BCtype_s bc,
215 float *data, intptr_t dstride,
216 float *coefs, intptr_t cstride)
217 {
218 BCtype_d d_bc;
219 double *d_data, *d_coefs;
220
221 d_bc.lCode = bc.lCode; d_bc.rCode = bc.rCode;
222 d_bc.lVal = bc.lVal; d_bc.rVal = bc.rVal;
223 int M = grid.num, N;
224 if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) N = M+3;
225 else N = M+2;
226
227 d_data = new double[N];
228 d_coefs = new double[N];
229 for (int i=0; i<M; i++)
230 d_data[i] = data[i*dstride];
231 find_coefs_1d_d (grid, d_bc, d_data, 1, d_coefs, 1);
232 for (int i=0; i<N; i++)
233 coefs[i*cstride] = d_coefs[i];
234 delete[] d_data;
235 delete[] d_coefs;
236 }
237
238 #else
239 void
find_coefs_1d_s(Ugrid grid,BCtype_s bc,float * data,intptr_t dstride,float * coefs,intptr_t cstride)240 find_coefs_1d_s (Ugrid grid, BCtype_s bc,
241 float *data, intptr_t dstride,
242 float *coefs, intptr_t cstride)
243 {
244 int M = grid.num;
245 float basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0};
246 if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) {
247
248 float *bands = new float[4*M];
249
250 for (int i=0; i<M; i++) {
251 bands[4*i+0] = basis[0];
252 bands[4*i+1] = basis[1];
253 bands[4*i+2] = basis[2];
254 bands[4*i+3] = data[i*dstride];
255 }
256 if (bc.lCode == PERIODIC)
257 solve_periodic_interp_1d_s (bands, coefs, M, cstride);
258 else
259 solve_antiperiodic_interp_1d_s (bands, coefs, M, cstride);
260
261 delete[] bands;
262 }
263 else {
264 // Setup boundary conditions
265 float abcd_left[4];
266 float abcd_right[4];
267 for (int i = 0; i < 4; i++) {
268 abcd_left[i] = 0.0;
269 abcd_right[i] = 0.0;
270 }
271
272 // Left boundary
273 if (bc.lCode == FLAT || bc.lCode == NATURAL)
274 bc.lVal = 0.0;
275 if (bc.lCode == FLAT || bc.lCode == DERIV1) {
276 abcd_left[0] = -0.5 * grid.delta_inv;
277 abcd_left[1] = 0.0 * grid.delta_inv;
278 abcd_left[2] = 0.5 * grid.delta_inv;
279 abcd_left[3] = bc.lVal;
280 }
281 if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
282 abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv;
283 abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv;
284 abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv;
285 abcd_left[3] = bc.lVal;
286 }
287
288 // Right boundary
289 if (bc.rCode == FLAT || bc.rCode == NATURAL)
290 bc.rVal = 0.0;
291 if (bc.rCode == FLAT || bc.rCode == DERIV1) {
292 abcd_right[0] = -0.5 * grid.delta_inv;
293 abcd_right[1] = 0.0 * grid.delta_inv;
294 abcd_right[2] = 0.5 * grid.delta_inv;
295 abcd_right[3] = bc.rVal;
296 }
297 if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
298 abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv;
299 abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv;
300 abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv;
301 abcd_right[3] = bc.rVal;
302 }
303
304 float *bands = new float[4*(M+2)];
305 for (int i=0; i<4; i++) {
306 bands[4*( 0 )+i] = abcd_left[i];
307 bands[4*(M+1)+i] = abcd_right[i];
308 }
309 for (int i=0; i<M; i++) {
310 for (int j=0; j<3; j++)
311 bands[4*(i+1)+j] = basis[j];
312 bands[4*(i+1)+3] = data[i*dstride];
313 }
314 // Now, solve for coefficients
315 solve_deriv_interp_1d_s (bands, coefs, M, cstride);
316 delete[] bands;
317 }
318 }
319
320 #endif
321
322 ////////////////////////////////////////////////////////////
323 ////////////////////////////////////////////////////////////
324 //// Single-Precision, Real Creation Routines ////
325 ////////////////////////////////////////////////////////////
326 ////////////////////////////////////////////////////////////
327
328 // On input, bands should be filled with:
329 // row 0 : abcdInitial from boundary conditions
330 // rows 1:M: basis functions in first 3 cols, data in last
331 // row M+1 : abcdFinal from boundary conditions
332 // cstride gives the stride between values in coefs.
333 // On exit, coefs with contain interpolating B-spline coefs
334 UBspline_1d_s*
create_UBspline_1d_s(Ugrid x_grid,BCtype_s xBC,float * data)335 create_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, float *data)
336 {
337 // Create new spline
338 UBspline_1d_s* restrict spline = new UBspline_1d_s;
339 spline->spcode = U1D;
340 spline->tcode = SINGLE_REAL;
341 spline->xBC = xBC; spline->x_grid = x_grid;
342
343 // Setup internal variables
344 int M = x_grid.num;
345 int N;
346
347 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
348 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num);
349 N = M+3;
350 }
351 else {
352 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
353 N = M+2;
354 }
355
356 x_grid.delta_inv = 1.0/x_grid.delta;
357 spline->x_grid = x_grid;
358 #ifndef HAVE_SSE2
359 spline->coefs = (float*)malloc (sizeof(float)*N);
360 #else
361 posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*N));
362 #endif
363 find_coefs_1d_s (spline->x_grid, xBC, data, 1, spline->coefs, 1);
364
365 init_sse_data();
366 return spline;
367 }
368
369 void
recompute_UBspline_1d_s(UBspline_1d_s * spline,float * data)370 recompute_UBspline_1d_s (UBspline_1d_s* spline, float *data)
371 {
372 find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1);
373 }
374
375
376 UBspline_2d_s*
create_UBspline_2d_s(Ugrid x_grid,Ugrid y_grid,BCtype_s xBC,BCtype_s yBC,float * data)377 create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid,
378 BCtype_s xBC, BCtype_s yBC, float *data)
379 {
380 // Create new spline
381 UBspline_2d_s* restrict spline = new UBspline_2d_s;
382 spline->spcode = U2D;
383 spline->tcode = SINGLE_REAL;
384 spline->xBC = xBC;
385 spline->yBC = yBC;
386 // Setup internal variables
387 int Mx = x_grid.num;
388 int My = y_grid.num;
389 int Nx, Ny;
390
391 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
392 else Nx = Mx+2;
393 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
394 x_grid.delta_inv = 1.0/x_grid.delta;
395 spline->x_grid = x_grid;
396
397 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
398 else Ny = My+2;
399 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
400 y_grid.delta_inv = 1.0/y_grid.delta;
401 spline->y_grid = y_grid;
402 spline->x_stride = Ny;
403 #ifndef HAVE_SSE2
404 spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny);
405 #else
406 posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny);
407 #endif
408
409 // First, solve in the X-direction
410 for (int iy=0; iy<My; iy++) {
411 intptr_t doffset = iy;
412 intptr_t coffset = iy;
413 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My,
414 spline->coefs+coffset, Ny);
415 }
416
417 // Now, solve in the Y-direction
418 for (int ix=0; ix<Nx; ix++) {
419 intptr_t doffset = ix*Ny;
420 intptr_t coffset = ix*Ny;
421 find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1,
422 spline->coefs+coffset, 1);
423 }
424 init_sse_data();
425 return spline;
426 }
427
428 void
recompute_UBspline_2d_s(UBspline_2d_s * spline,float * data)429 recompute_UBspline_2d_s (UBspline_2d_s* spline, float *data)
430 {
431 int Mx = spline->x_grid.num;
432 int My = spline->y_grid.num;
433 int Nx, Ny;
434
435 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
436 else Nx = Mx+2;
437 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3;
438 else Ny = My+2;
439
440 // First, solve in the X-direction
441 for (int iy=0; iy<My; iy++) {
442 intptr_t doffset = iy;
443 intptr_t coffset = iy;
444 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My,
445 spline->coefs+coffset, Ny);
446 }
447
448 // Now, solve in the Y-direction
449 for (int ix=0; ix<Nx; ix++) {
450 intptr_t doffset = ix*Ny;
451 intptr_t coffset = ix*Ny;
452 find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1,
453 spline->coefs+coffset, 1);
454 }
455 }
456
457
458 UBspline_3d_s*
create_UBspline_3d_s(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_s xBC,BCtype_s yBC,BCtype_s zBC,float * data)459 create_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
460 BCtype_s xBC, BCtype_s yBC, BCtype_s zBC,
461 float *data)
462 {
463 // Create new spline
464 UBspline_3d_s* restrict spline = new UBspline_3d_s;
465 spline->spcode = U3D;
466 spline->tcode = SINGLE_REAL;
467 spline->xBC = xBC;
468 spline->yBC = yBC;
469 spline->zBC = zBC;
470 // Setup internal variables
471 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num;
472 int Nx, Ny, Nz;
473
474 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
475 else Nx = Mx+2;
476 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
477 x_grid.delta_inv = 1.0/x_grid.delta;
478 spline->x_grid = x_grid;
479
480 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
481 else Ny = My+2;
482 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
483 y_grid.delta_inv = 1.0/y_grid.delta;
484 spline->y_grid = y_grid;
485
486 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3;
487 else Nz = Mz+2;
488 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
489 z_grid.delta_inv = 1.0/z_grid.delta;
490 spline->z_grid = z_grid;
491
492 spline->x_stride = Ny*Nz;
493 spline->y_stride = Nz;
494
495 #ifndef HAVE_SSE2
496 spline->coefs = new float[Nx*Ny*Nz];
497 #else
498 posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*Nx*Ny*Nz));
499 #endif
500
501 // First, solve in the X-direction
502 for (int iy=0; iy<My; iy++)
503 for (int iz=0; iz<Mz; iz++) {
504 intptr_t doffset = iy*Mz+iz;
505 intptr_t coffset = iy*Nz+iz;
506 find_coefs_1d_s (spline->x_grid, xBC, data+doffset, My*Mz,
507 spline->coefs+coffset, Ny*Nz);
508 }
509
510 // Now, solve in the Y-direction
511 for (int ix=0; ix<Nx; ix++)
512 for (int iz=0; iz<Nz; iz++) {
513 intptr_t doffset = ix*Ny*Nz + iz;
514 intptr_t coffset = ix*Ny*Nz + iz;
515 find_coefs_1d_s (spline->y_grid, yBC, spline->coefs+doffset, Nz,
516 spline->coefs+coffset, Nz);
517 }
518
519 // Now, solve in the Z-direction
520 for (int ix=0; ix<Nx; ix++)
521 for (int iy=0; iy<Ny; iy++) {
522 intptr_t doffset = (ix*Ny+iy)*Nz;
523 intptr_t coffset = (ix*Ny+iy)*Nz;
524 find_coefs_1d_s (spline->z_grid, zBC, spline->coefs+doffset, 1,
525 spline->coefs+coffset, 1);
526 }
527 init_sse_data();
528 return spline;
529 }
530
531 void
recompute_UBspline_3d_s(UBspline_3d_s * spline,float * data)532 recompute_UBspline_3d_s (UBspline_3d_s* spline, float *data)
533 {
534 int Mx = spline->x_grid.num;
535 int My = spline->y_grid.num;
536 int Mz = spline->z_grid.num;
537 int Nx, Ny, Nz;
538
539 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
540 else Nx = Mx+2;
541 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3;
542 else Ny = My+2;
543 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3;
544 else Nz = Mz+2;
545
546 // First, solve in the X-direction
547 for (int iy=0; iy<My; iy++)
548 for (int iz=0; iz<Mz; iz++) {
549 intptr_t doffset = iy*Mz+iz;
550 intptr_t coffset = iy*Nz+iz;
551 find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My*Mz,
552 spline->coefs+coffset, Ny*Nz);
553 }
554
555 // Now, solve in the Y-direction
556 for (int ix=0; ix<Nx; ix++)
557 for (int iz=0; iz<Nz; iz++) {
558 intptr_t doffset = ix*Ny*Nz + iz;
559 intptr_t coffset = ix*Ny*Nz + iz;
560 find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz,
561 spline->coefs+coffset, Nz);
562 }
563
564 // Now, solve in the Z-direction
565 for (int ix=0; ix<Nx; ix++)
566 for (int iy=0; iy<Ny; iy++) {
567 intptr_t doffset = (ix*Ny+iy)*Nz;
568 intptr_t coffset = (ix*Ny+iy)*Nz;
569 find_coefs_1d_s (spline->z_grid, spline->zBC, spline->coefs+doffset, 1,
570 spline->coefs+coffset, 1);
571 }
572 }
573
574
575 ////////////////////////////////////////////////////////////
576 ////////////////////////////////////////////////////////////
577 //// Single-Precision, Complex Creation Routines ////
578 ////////////////////////////////////////////////////////////
579 ////////////////////////////////////////////////////////////
580
581 // On input, bands should be filled with:
582 // row 0 : abcdInitial from boundary conditions
583 // rows 1:M: basis functions in first 3 cols, data in last
584 // row M+1 : abcdFinal from boundary conditions
585 // cstride gives the stride between values in coefs.
586 // On exit, coefs with contain interpolating B-spline coefs
587 UBspline_1d_c*
create_UBspline_1d_c(Ugrid x_grid,BCtype_c xBC,complex_float * data)588 create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data)
589 {
590 // Create new spline
591 UBspline_1d_c* restrict spline = new UBspline_1d_c;
592 spline->spcode = U1D;
593 spline->tcode = SINGLE_COMPLEX;
594 spline->xBC = xBC;
595 // Setup internal variables
596 int M = x_grid.num;
597 int N;
598
599 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
600 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num);
601 N = M+3;
602 }
603 else {
604 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
605 N = M+2;
606 }
607
608 x_grid.delta_inv = 1.0/x_grid.delta;
609 spline->x_grid = x_grid;
610 #ifndef HAVE_SSE2
611 spline->coefs = (complex_float*)malloc (2*sizeof(float)*N);
612 #else
613 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*N);
614 #endif
615
616 BCtype_s xBC_r, xBC_i;
617 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode;
618 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r;
619 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode;
620 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i;
621 // Real part
622 find_coefs_1d_s (spline->x_grid, xBC_r,
623 (float*)data, 2, (float*)spline->coefs, 2);
624 // Imaginarty part
625 find_coefs_1d_s (spline->x_grid, xBC_i,
626 ((float*)data)+1, 2, ((float*)spline->coefs+1), 2);
627
628 init_sse_data();
629 return spline;
630 }
631
632 void
recompute_UBspline_1d_c(UBspline_1d_c * spline,complex_float * data)633 recompute_UBspline_1d_c (UBspline_1d_c* spline, complex_float *data)
634 {
635
636 BCtype_s xBC_r, xBC_i;
637 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode;
638 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r;
639 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode;
640 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i;
641
642 // Real part
643 find_coefs_1d_s (spline->x_grid, xBC_r,
644 (float*)data, 2, (float*)spline->coefs, 2);
645 // Imaginarty part
646 find_coefs_1d_s (spline->x_grid, xBC_i,
647 ((float*)data)+1, 2, ((float*)spline->coefs+1), 2);
648 }
649
650
651
652 UBspline_2d_c*
create_UBspline_2d_c(Ugrid x_grid,Ugrid y_grid,BCtype_c xBC,BCtype_c yBC,complex_float * data)653 create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid,
654 BCtype_c xBC, BCtype_c yBC, complex_float *data)
655 {
656 // Create new spline
657 UBspline_2d_c* restrict spline = new UBspline_2d_c;
658 spline->spcode = U2D;
659 spline->tcode = SINGLE_COMPLEX;
660 spline->xBC = xBC;
661 spline->yBC = yBC;
662
663 // Setup internal variables
664 int Mx = x_grid.num;
665 int My = y_grid.num;
666 int Nx, Ny;
667
668 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
669 else Nx = Mx+2;
670 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
671 x_grid.delta_inv = 1.0/x_grid.delta;
672 spline->x_grid = x_grid;
673
674 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
675 else Ny = My+2;
676 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
677 y_grid.delta_inv = 1.0/y_grid.delta;
678 spline->y_grid = y_grid;
679 spline->x_stride = Ny;
680
681 #ifndef HAVE_SSE2
682 spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny);
683 #else
684 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny);
685 #endif
686
687 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
688 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode;
689 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r;
690 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode;
691 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i;
692 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode;
693 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r;
694 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode;
695 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i;
696 // First, solve in the X-direction
697 for (int iy=0; iy<My; iy++) {
698 intptr_t doffset = 2*iy;
699 intptr_t coffset = 2*iy;
700 // Real part
701 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My,
702 (float*)spline->coefs+coffset, 2*Ny);
703 // Imag part
704 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My,
705 ((float*)spline->coefs)+coffset+1, 2*Ny);
706 }
707
708 // Now, solve in the Y-direction
709 for (int ix=0; ix<Nx; ix++) {
710 intptr_t doffset = 2*ix*Ny;
711 intptr_t coffset = 2*ix*Ny;
712 // Real part
713 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2,
714 ((float*)spline->coefs)+coffset, 2);
715 // Imag part
716 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2,
717 ((float*)spline->coefs)+coffset+1, 2);
718 }
719 init_sse_data();
720 return spline;
721 }
722
723
724 void
recompute_UBspline_2d_c(UBspline_2d_c * spline,complex_float * data)725 recompute_UBspline_2d_c (UBspline_2d_c* spline, complex_float *data)
726 {
727 // Setup internal variables
728 int Mx = spline->x_grid.num;
729 int My = spline->y_grid.num;
730 int Nx, Ny;
731
732 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
733 Nx = Mx+3;
734 else
735 Nx = Mx+2;
736 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
737 Ny = My+3;
738 else
739 Ny = My+2;
740
741 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
742 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode;
743 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r;
744 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode;
745 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i;
746 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode;
747 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r;
748 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode;
749 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i;
750
751 // First, solve in the X-direction
752 for (int iy=0; iy<My; iy++) {
753 intptr_t doffset = 2*iy;
754 intptr_t coffset = 2*iy;
755 // Real part
756 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My,
757 (float*)spline->coefs+coffset, 2*Ny);
758 // Imag part
759 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My,
760 ((float*)spline->coefs)+coffset+1, 2*Ny);
761 }
762
763 // Now, solve in the Y-direction
764 for (int ix=0; ix<Nx; ix++) {
765 intptr_t doffset = 2*ix*Ny;
766 intptr_t coffset = 2*ix*Ny;
767 // Real part
768 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2,
769 ((float*)spline->coefs)+coffset, 2);
770 // Imag part
771 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2,
772 ((float*)spline->coefs)+coffset+1, 2);
773 }
774 }
775
776 UBspline_3d_c*
create_UBspline_3d_c(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_c xBC,BCtype_c yBC,BCtype_c zBC,complex_float * data)777 create_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
778 BCtype_c xBC, BCtype_c yBC, BCtype_c zBC,
779 complex_float *data)
780 {
781 // Create new spline
782 UBspline_3d_c* restrict spline = new UBspline_3d_c;
783 spline->spcode = U3D;
784 spline->tcode = SINGLE_COMPLEX;
785 spline->xBC = xBC;
786 spline->yBC = yBC;
787 spline->zBC = zBC;
788
789 // Setup internal variables
790 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num;
791 int Nx, Ny, Nz;
792
793 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
794 else Nx = Mx+2;
795 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
796 x_grid.delta_inv = 1.0/x_grid.delta;
797 spline->x_grid = x_grid;
798
799 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
800 else Ny = My+2;
801 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
802 y_grid.delta_inv = 1.0/y_grid.delta;
803 spline->y_grid = y_grid;
804
805 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3;
806 else Nz = Mz+2;
807 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
808 z_grid.delta_inv = 1.0/z_grid.delta;
809 spline->z_grid = z_grid;
810
811 spline->x_stride = Ny*Nz;
812 spline->y_stride = Nz;
813
814 #ifndef HAVE_SSE2
815 spline->coefs = new complex_float[Nx*Ny*Nz];
816 #else
817 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny*Nz);
818 #endif
819
820 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
821 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode;
822 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r;
823 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode;
824 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i;
825 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode;
826 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r;
827 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode;
828 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i;
829 zBC_r.lCode = zBC.lCode; zBC_r.rCode = zBC.rCode;
830 zBC_r.lVal = zBC.lVal_r; zBC_r.rVal = zBC.rVal_r;
831 zBC_i.lCode = zBC.lCode; zBC_i.rCode = zBC.rCode;
832 zBC_i.lVal = zBC.lVal_i; zBC_i.rVal = zBC.rVal_i;
833 // First, solve in the X-direction
834 for (int iy=0; iy<My; iy++)
835 for (int iz=0; iz<Mz; iz++) {
836 intptr_t doffset = 2*(iy*Mz+iz);
837 intptr_t coffset = 2*(iy*Nz+iz);
838 // Real part
839 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz,
840 ((float*)spline->coefs)+coffset, 2*Ny*Nz);
841 // Imag part
842 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz,
843 ((float*)spline->coefs)+coffset+1, 2*Ny*Nz);
844 }
845
846 // Now, solve in the Y-direction
847 for (int ix=0; ix<Nx; ix++)
848 for (int iz=0; iz<Nz; iz++) {
849 intptr_t doffset = 2*(ix*Ny*Nz + iz);
850 intptr_t coffset = 2*(ix*Ny*Nz + iz);
851 // Real part
852 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz,
853 ((float*)spline->coefs)+coffset, 2*Nz);
854 // Imag part
855 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz,
856 ((float*)spline->coefs)+coffset+1, 2*Nz);
857 }
858
859 // Now, solve in the Z-direction
860 for (int ix=0; ix<Nx; ix++)
861 for (int iy=0; iy<Ny; iy++) {
862 intptr_t doffset = 2*((ix*Ny+iy)*Nz);
863 intptr_t coffset = 2*((ix*Ny+iy)*Nz);
864 // Real part
865 find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2,
866 ((float*)spline->coefs)+coffset, 2);
867 // Imag part
868 find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2,
869 ((float*)spline->coefs)+coffset+1, 2);
870 }
871
872 init_sse_data();
873 return spline;
874 }
875
876 void
recompute_UBspline_3d_c(UBspline_3d_c * spline,complex_float * data)877 recompute_UBspline_3d_c (UBspline_3d_c* spline, complex_float *data)
878 {
879 // Setup internal variables
880 int Mx = spline->x_grid.num;
881 int My = spline->y_grid.num;
882 int Mz = spline->z_grid.num;
883 int Nx, Ny, Nz;
884
885 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
886 else Nx = Mx+2;
887 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3;
888 else Ny = My+2;
889 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3;
890 else Nz = Mz+2;
891
892 BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
893 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode;
894 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r;
895 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode;
896 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i;
897 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode;
898 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r;
899 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode;
900 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i;
901 zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode;
902 zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r;
903 zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode;
904 zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i;
905 // First, solve in the X-direction
906 for (int iy=0; iy<My; iy++)
907 for (int iz=0; iz<Mz; iz++) {
908 intptr_t doffset = 2*(iy*Mz+iz);
909 intptr_t coffset = 2*(iy*Nz+iz);
910 // Real part
911 find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz,
912 ((float*)spline->coefs)+coffset, 2*Ny*Nz);
913 // Imag part
914 find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz,
915 ((float*)spline->coefs)+coffset+1, 2*Ny*Nz);
916 }
917
918 // Now, solve in the Y-direction
919 for (int ix=0; ix<Nx; ix++)
920 for (int iz=0; iz<Nz; iz++) {
921 intptr_t doffset = 2*(ix*Ny*Nz + iz);
922 intptr_t coffset = 2*(ix*Ny*Nz + iz);
923 // Real part
924 find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz,
925 ((float*)spline->coefs)+coffset, 2*Nz);
926 // Imag part
927 find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz,
928 ((float*)spline->coefs)+coffset+1, 2*Nz);
929 }
930
931 // Now, solve in the Z-direction
932 for (int ix=0; ix<Nx; ix++)
933 for (int iy=0; iy<Ny; iy++) {
934 intptr_t doffset = 2*((ix*Ny+iy)*Nz);
935 intptr_t coffset = 2*((ix*Ny+iy)*Nz);
936 // Real part
937 find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2,
938 ((float*)spline->coefs)+coffset, 2);
939 // Imag part
940 find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2,
941 ((float*)spline->coefs)+coffset+1, 2);
942 }
943 }
944
945
946 ////////////////////////////////////////////////////////////
947 ////////////////////////////////////////////////////////////
948 //// Double-Precision, Real Creation Routines ////
949 ////////////////////////////////////////////////////////////
950 ////////////////////////////////////////////////////////////
951
952 // On input, bands should be filled with:
953 // row 0 : abcdInitial from boundary conditions
954 // rows 1:M: basis functions in first 3 cols, data in last
955 // row M+1 : abcdFinal from boundary conditions
956 // cstride gives the stride between values in coefs.
957 // On exit, coefs with contain interpolating B-spline coefs
958 void
solve_deriv_interp_1d_d(double bands[],double coefs[],int M,int cstride)959 solve_deriv_interp_1d_d (double bands[], double coefs[],
960 int M, int cstride)
961 {
962 // Solve interpolating equations
963 // First and last rows are different
964 bands[4*(0)+1] /= bands[4*(0)+0];
965 bands[4*(0)+2] /= bands[4*(0)+0];
966 bands[4*(0)+3] /= bands[4*(0)+0];
967 bands[4*(0)+0] = 1.0;
968 bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1];
969 bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2];
970 bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3];
971 bands[4*(0)+0] = 0.0;
972 bands[4*(1)+2] /= bands[4*(1)+1];
973 bands[4*(1)+3] /= bands[4*(1)+1];
974 bands[4*(1)+1] = 1.0;
975
976 // Now do rows 2 through M+1
977 for (int row=2; row < (M+1); row++) {
978 bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
979 bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
980 bands[4*(row)+2] /= bands[4*(row)+1];
981 bands[4*(row)+3] /= bands[4*(row)+1];
982 bands[4*(row)+0] = 0.0;
983 bands[4*(row)+1] = 1.0;
984 }
985
986 // Do last row
987 bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
988 bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
989 bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
990 bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
991 bands[4*(M+1)+3] /= bands[4*(M+1)+2];
992 bands[4*(M+1)+2] = 1.0;
993
994 coefs[(M+1)*cstride] = bands[4*(M+1)+3];
995 // Now back substitute up
996 for (int row=M; row>0; row--)
997 coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)];
998
999 // Finish with first row
1000 coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride];
1001 }
1002
1003 // On input, bands should be filled with:
1004 // row 0 : abcdInitial from boundary conditions
1005 // rows 1:M: basis functions in first 3 cols, data in last
1006 // row M+1 : abcdFinal from boundary conditions
1007 // cstride gives the stride between values in coefs.
1008 // On exit, coefs with contain interpolating B-spline coefs
1009 void
solve_periodic_interp_1d_d(double bands[],double coefs[],int M,int cstride)1010 solve_periodic_interp_1d_d (double bands[], double coefs[],
1011 int M, int cstride)
1012 {
1013 std::vector<double> lastCol(M);
1014
1015 // Now solve:
1016 // First and last rows are different
1017 bands[4*(0)+2] /= bands[4*(0)+1];
1018 bands[4*(0)+0] /= bands[4*(0)+1];
1019 bands[4*(0)+3] /= bands[4*(0)+1];
1020 bands[4*(0)+1] = 1.0;
1021 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
1022 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
1023 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2];
1024 lastCol[0] = bands[4*(0)+0];
1025
1026 for (int row=1; row < (M-1); row++) {
1027 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
1028 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
1029 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1];
1030 bands[4*(row)+0] = 0.0;
1031 bands[4*(row)+2] /= bands[4*(row)+1];
1032 bands[4*(row)+3] /= bands[4*(row)+1];
1033 lastCol[row] /= bands[4*(row)+1];
1034 bands[4*(row)+1] = 1.0;
1035 if (row < (M-2)) {
1036 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
1037 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
1038 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
1039 }
1040 }
1041
1042 // Now do last row
1043 // The [2] element and [0] element are now on top of each other
1044 bands[4*(M-1)+0] += bands[4*(M-1)+2];
1045 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
1046 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3];
1047 bands[4*(M-1)+3] /= bands[4*(M-1)+1];
1048 coefs[M*cstride] = bands[4*(M-1)+3];
1049 for (int row=M-2; row>=0; row--)
1050 coefs[(row+1)*cstride] =
1051 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
1052
1053 coefs[0*cstride] = coefs[M*cstride];
1054 coefs[(M+1)*cstride] = coefs[1*cstride];
1055 coefs[(M+2)*cstride] = coefs[2*cstride];
1056 }
1057
1058 // On input, bands should be filled with:
1059 // row 0 : abcdInitial from boundary conditions
1060 // rows 1:M: basis functions in first 3 cols, data in last
1061 // row M+1 : abcdFinal from boundary conditions
1062 // cstride gives the stride between values in coefs.
1063 // On exit, coefs with contain interpolating B-spline coefs
1064 void
solve_antiperiodic_interp_1d_d(double bands[],double coefs[],int M,int cstride)1065 solve_antiperiodic_interp_1d_d (double bands[], double coefs[],
1066 int M, int cstride)
1067 {
1068 std::vector<double> lastCol(M);
1069
1070 bands[4*0+0] *= -1.0;
1071 bands[4*(M-1)+2] *= -1.0;
1072 // Now solve:
1073 // First and last rows are different
1074 bands[4*(0)+2] /= bands[4*(0)+1];
1075 bands[4*(0)+0] /= bands[4*(0)+1];
1076 bands[4*(0)+3] /= bands[4*(0)+1];
1077 bands[4*(0)+1] = 1.0;
1078 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
1079 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
1080 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2];
1081 lastCol[0] = bands[4*(0)+0];
1082
1083 for (int row=1; row < (M-1); row++) {
1084 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
1085 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
1086 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1];
1087 bands[4*(row)+0] = 0.0;
1088 bands[4*(row)+2] /= bands[4*(row)+1];
1089 bands[4*(row)+3] /= bands[4*(row)+1];
1090 lastCol[row] /= bands[4*(row)+1];
1091 bands[4*(row)+1] = 1.0;
1092 if (row < (M-2)) {
1093 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
1094 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
1095 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
1096 }
1097 }
1098
1099 // Now do last row
1100 // The [2] element and [0] element are now on top of each other
1101 bands[4*(M-1)+0] += bands[4*(M-1)+2];
1102 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
1103 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3];
1104 bands[4*(M-1)+3] /= bands[4*(M-1)+1];
1105 coefs[M*cstride] = bands[4*(M-1)+3];
1106 for (int row=M-2; row>=0; row--)
1107 coefs[(row+1)*cstride] =
1108 bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
1109
1110 coefs[0*cstride] = -coefs[M*cstride];
1111 coefs[(M+1)*cstride] = -coefs[1*cstride];
1112 coefs[(M+2)*cstride] = -coefs[2*cstride];
1113 }
1114
1115
1116
1117 void
find_coefs_1d_d(Ugrid grid,BCtype_d bc,double * data,intptr_t dstride,double * coefs,intptr_t cstride)1118 find_coefs_1d_d (Ugrid grid, BCtype_d bc,
1119 double *data, intptr_t dstride,
1120 double *coefs, intptr_t cstride)
1121 {
1122 int M = grid.num;
1123 double basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0};
1124 if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) {
1125 #ifdef HAVE_C_VARARRAYS
1126 double bands[M*4];
1127 #else
1128 double *bands = new double[4*M];
1129 #endif
1130 for (int i=0; i<M; i++) {
1131 bands[4*i+0] = basis[0];
1132 bands[4*i+1] = basis[1];
1133 bands[4*i+2] = basis[2];
1134 bands[4*i+3] = data[i*dstride];
1135 }
1136 if (bc.lCode == ANTIPERIODIC)
1137 solve_antiperiodic_interp_1d_d (bands, coefs, M, cstride);
1138 else
1139 solve_periodic_interp_1d_d (bands, coefs, M, cstride);
1140
1141
1142 #ifndef HAVE_C_VARARRAYS
1143 delete[] bands;
1144 #endif
1145 }
1146 else {
1147 // Setup boundary conditions
1148 double abcd_left[4];
1149 double abcd_right[4];
1150 for (int i = 0; i < 4; i++) {
1151 abcd_left[i] = 0.0;
1152 abcd_right[i] = 0.0;
1153 }
1154 // Left boundary
1155 if (bc.lCode == FLAT || bc.lCode == NATURAL)
1156 bc.lVal = 0.0;
1157 if (bc.lCode == FLAT || bc.lCode == DERIV1) {
1158 abcd_left[0] = -0.5 * grid.delta_inv;
1159 abcd_left[1] = 0.0 * grid.delta_inv;
1160 abcd_left[2] = 0.5 * grid.delta_inv;
1161 abcd_left[3] = bc.lVal;
1162 }
1163 if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
1164 abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv;
1165 abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv;
1166 abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv;
1167 abcd_left[3] = bc.lVal;
1168 }
1169
1170 // Right boundary
1171 if (bc.rCode == FLAT || bc.rCode == NATURAL)
1172 bc.rVal = 0.0;
1173 if (bc.rCode == FLAT || bc.rCode == DERIV1) {
1174 abcd_right[0] = -0.5 * grid.delta_inv;
1175 abcd_right[1] = 0.0 * grid.delta_inv;
1176 abcd_right[2] = 0.5 * grid.delta_inv;
1177 abcd_right[3] = bc.rVal;
1178 }
1179 if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
1180 abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv;
1181 abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv;
1182 abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv;
1183 abcd_right[3] = bc.rVal;
1184 }
1185 #ifdef HAVE_C_VARARRAYS
1186 double bands[(M+2)*4];
1187 #else
1188 double *bands = new double[(M+2)*4];
1189 #endif
1190 for (int i=0; i<4; i++) {
1191 bands[4*( 0 )+i] = abcd_left[i];
1192 bands[4*(M+1)+i] = abcd_right[i];
1193 }
1194 for (int i=0; i<M; i++) {
1195 for (int j=0; j<3; j++)
1196 bands[4*(i+1)+j] = basis[j];
1197 bands[4*(i+1)+3] = data[i*dstride];
1198 }
1199 // Now, solve for coefficients
1200 solve_deriv_interp_1d_d (bands, coefs, M, cstride);
1201 #ifndef HAVE_C_VARARRAYS
1202 delete[] bands;
1203 #endif
1204 }
1205 }
1206
1207
1208
1209 UBspline_1d_d*
create_UBspline_1d_d(Ugrid x_grid,BCtype_d xBC,double * data)1210 create_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, double *data)
1211 {
1212 // Create new spline
1213 UBspline_1d_d* restrict spline = new UBspline_1d_d;
1214 spline->spcode = U1D;
1215 spline->tcode = DOUBLE_REAL;
1216 spline->xBC = xBC;
1217
1218 // Setup internal variables
1219 int M = x_grid.num;
1220 int N;
1221
1222 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1223 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1224 N = M+3;
1225 }
1226 else {
1227 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1228 N = M+2;
1229 }
1230
1231 x_grid.delta_inv = 1.0/x_grid.delta;
1232 spline->x_grid = x_grid;
1233
1234 #ifndef HAVE_SSE2
1235 spline->coefs = (double*)malloc (sizeof(double)*N);
1236 #else
1237 posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*N);
1238 #endif
1239 find_coefs_1d_d (spline->x_grid, xBC, data, 1, spline->coefs, 1);
1240
1241 init_sse_data();
1242 return spline;
1243 }
1244
1245 void
recompute_UBspline_1d_d(UBspline_1d_d * spline,double * data)1246 recompute_UBspline_1d_d (UBspline_1d_d* spline, double *data)
1247 {
1248 find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1);
1249 }
1250
1251
1252 UBspline_2d_d*
create_UBspline_2d_d(Ugrid x_grid,Ugrid y_grid,BCtype_d xBC,BCtype_d yBC,double * data)1253 create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid,
1254 BCtype_d xBC, BCtype_d yBC, double *data)
1255 {
1256 // Create new spline
1257 UBspline_2d_d* restrict spline = new UBspline_2d_d;
1258 spline->spcode = U2D;
1259 spline->tcode = DOUBLE_REAL;
1260 spline->xBC = xBC;
1261 spline->yBC = yBC;
1262
1263 // Setup internal variables
1264 int Mx = x_grid.num;
1265 int My = y_grid.num;
1266 int Nx, Ny;
1267
1268 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
1269 else Nx = Mx+2;
1270 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1271 x_grid.delta_inv = 1.0/x_grid.delta;
1272 spline->x_grid = x_grid;
1273
1274 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
1275 else Ny = My+2;
1276 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1277 y_grid.delta_inv = 1.0/y_grid.delta;
1278 spline->y_grid = y_grid;
1279 spline->x_stride = Ny;
1280
1281 #ifndef HAVE_SSE2
1282 spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny);
1283 #else
1284 posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny));
1285 #endif
1286
1287 // First, solve in the X-direction
1288 for (int iy=0; iy<My; iy++) {
1289 intptr_t doffset = iy;
1290 intptr_t coffset = iy;
1291 find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My,
1292 spline->coefs+coffset, Ny);
1293 }
1294
1295 // Now, solve in the Y-direction
1296 for (int ix=0; ix<Nx; ix++) {
1297 intptr_t doffset = ix*Ny;
1298 intptr_t coffset = ix*Ny;
1299 find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, 1,
1300 spline->coefs+coffset, 1);
1301 }
1302
1303 init_sse_data();
1304 return spline;
1305 }
1306
1307 void
recompute_UBspline_2d_d(UBspline_2d_d * spline,double * data)1308 recompute_UBspline_2d_d (UBspline_2d_d* spline, double *data)
1309 {
1310 int Mx = spline->x_grid.num;
1311 int My = spline->y_grid.num;
1312 int Nx, Ny;
1313
1314 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1315 Nx = Mx+3;
1316 else
1317 Nx = Mx+2;
1318
1319 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1320 Ny = My+3;
1321 else
1322 Ny = My+2;
1323
1324 // First, solve in the X-direction
1325 for (int iy=0; iy<My; iy++) {
1326 intptr_t doffset = iy;
1327 intptr_t coffset = iy;
1328 find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My,
1329 spline->coefs+coffset, Ny);
1330 }
1331
1332 // Now, solve in the Y-direction
1333 for (int ix=0; ix<Nx; ix++) {
1334 intptr_t doffset = ix*Ny;
1335 intptr_t coffset = ix*Ny;
1336 find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, 1,
1337 spline->coefs+coffset, 1);
1338 }
1339 }
1340
1341
1342 UBspline_3d_d*
create_UBspline_3d_d(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_d xBC,BCtype_d yBC,BCtype_d zBC,double * data)1343 create_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1344 BCtype_d xBC, BCtype_d yBC, BCtype_d zBC,
1345 double *data)
1346 {
1347 // Create new spline
1348 UBspline_3d_d* restrict spline = new UBspline_3d_d;
1349 spline->spcode = U3D;
1350 spline->tcode = DOUBLE_REAL;
1351 spline->xBC = xBC;
1352 spline->yBC = yBC;
1353 spline->zBC = zBC;
1354
1355 // Setup internal variables
1356 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num;
1357 int Nx, Ny, Nz;
1358
1359 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
1360 else Nx = Mx+2;
1361 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1362 x_grid.delta_inv = 1.0/x_grid.delta;
1363 spline->x_grid = x_grid;
1364
1365 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
1366 else Ny = My+2;
1367 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1368 y_grid.delta_inv = 1.0/y_grid.delta;
1369 spline->y_grid = y_grid;
1370
1371 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3;
1372 else Nz = Mz+2;
1373 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1374 z_grid.delta_inv = 1.0/z_grid.delta;
1375 spline->z_grid = z_grid;
1376
1377 spline->x_stride = Ny*Nz;
1378 spline->y_stride = Nz;
1379
1380 #ifndef HAVE_SSE2
1381 spline->coefs = new double[Nx*Ny*Nz];
1382 #else
1383 posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny*Nz));
1384 #endif
1385
1386 // First, solve in the X-direction
1387 for (int iy=0; iy<My; iy++)
1388 for (int iz=0; iz<Mz; iz++) {
1389 intptr_t doffset = iy*Mz+iz;
1390 intptr_t coffset = iy*Nz+iz;
1391 find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My*Mz,
1392 spline->coefs+coffset, Ny*Nz);
1393 }
1394
1395 // Now, solve in the Y-direction
1396 for (int ix=0; ix<Nx; ix++)
1397 for (int iz=0; iz<Nz; iz++) {
1398 intptr_t doffset = ix*Ny*Nz + iz;
1399 intptr_t coffset = ix*Ny*Nz + iz;
1400 find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, Nz,
1401 spline->coefs+coffset, Nz);
1402 }
1403
1404 // Now, solve in the Z-direction
1405 for (int ix=0; ix<Nx; ix++)
1406 for (int iy=0; iy<Ny; iy++) {
1407 intptr_t doffset = (ix*Ny+iy)*Nz;
1408 intptr_t coffset = (ix*Ny+iy)*Nz;
1409 find_coefs_1d_d (spline->z_grid, zBC, spline->coefs+doffset, 1,
1410 spline->coefs+coffset, 1);
1411 }
1412
1413 init_sse_data();
1414 return spline;
1415 }
1416
1417 void
recompute_UBspline_3d_d(UBspline_3d_d * spline,double * data)1418 recompute_UBspline_3d_d (UBspline_3d_d* spline, double *data)
1419 {
1420 int Mx = spline->x_grid.num;
1421 int My = spline->y_grid.num;
1422 int Mz = spline->z_grid.num;
1423 int Nx, Ny, Nz;
1424
1425 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1426 Nx = Mx+3;
1427 else
1428 Nx = Mx+2;
1429 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1430 Ny = My+3;
1431 else
1432 Ny = My+2;
1433 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)
1434 Nz = Mz+3;
1435 else
1436 Nz = Mz+2;
1437
1438 // First, solve in the X-direction
1439 for (int iy=0; iy<My; iy++)
1440 for (int iz=0; iz<Mz; iz++) {
1441 intptr_t doffset = iy*Mz+iz;
1442 intptr_t coffset = iy*Nz+iz;
1443 find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My*Mz,
1444 spline->coefs+coffset, Ny*Nz);
1445 }
1446
1447 // Now, solve in the Y-direction
1448 for (int ix=0; ix<Nx; ix++)
1449 for (int iz=0; iz<Nz; iz++) {
1450 intptr_t doffset = ix*Ny*Nz + iz;
1451 intptr_t coffset = ix*Ny*Nz + iz;
1452 find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz,
1453 spline->coefs+coffset, Nz);
1454 }
1455
1456 // Now, solve in the Z-direction
1457 for (int ix=0; ix<Nx; ix++)
1458 for (int iy=0; iy<Ny; iy++) {
1459 intptr_t doffset = (ix*Ny+iy)*Nz;
1460 intptr_t coffset = (ix*Ny+iy)*Nz;
1461 find_coefs_1d_d (spline->z_grid, spline->zBC, spline->coefs+doffset, 1,
1462 spline->coefs+coffset, 1);
1463 }
1464 }
1465
1466
1467 ////////////////////////////////////////////////////////////
1468 ////////////////////////////////////////////////////////////
1469 //// Double-Precision, Complex Creation Routines ////
1470 ////////////////////////////////////////////////////////////
1471 ////////////////////////////////////////////////////////////
1472
1473 // On input, bands should be filled with:
1474 // row 0 : abcdInitial from boundary conditions
1475 // rows 1:M: basis functions in first 3 cols, data in last
1476 // row M+1 : abcdFinal from boundary conditions
1477 // cstride gives the stride between values in coefs.
1478 // On exit, coefs with contain interpolating B-spline coefs
1479
1480
1481 UBspline_1d_z*
create_UBspline_1d_z(Ugrid x_grid,BCtype_z xBC,complex_double * data)1482 create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data)
1483 {
1484 // Create new spline
1485 UBspline_1d_z* restrict spline = new UBspline_1d_z;
1486 spline->spcode = U1D;
1487 spline->tcode = DOUBLE_COMPLEX;
1488 spline->xBC = xBC;
1489
1490 // Setup internal variables
1491 int M = x_grid.num;
1492 int N;
1493
1494 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1495 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1496 N = M+3;
1497 }
1498 else {
1499 x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1500 N = M+2;
1501 }
1502
1503 x_grid.delta_inv = 1.0/x_grid.delta;
1504 spline->x_grid = x_grid;
1505 #ifndef HAVE_SSE2
1506 spline->coefs = (complex_double*)malloc (2*sizeof(double)*N);
1507 #else
1508 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*N);
1509 #endif
1510
1511 BCtype_d xBC_r, xBC_i;
1512 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode;
1513 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r;
1514 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode;
1515 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i;
1516 // Real part
1517 find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2,
1518 (double*)spline->coefs, 2);
1519 // Imaginarty part
1520 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2,
1521 ((double*)spline->coefs)+1, 2);
1522
1523 init_sse_data();
1524 return spline;
1525 }
1526
1527 void
recompute_UBspline_1d_z(UBspline_1d_z * spline,complex_double * data)1528 recompute_UBspline_1d_z (UBspline_1d_z* spline, complex_double *data)
1529 {
1530 // int M = spline->x_grid.num;
1531 // int N;
1532
1533 // if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1534 // N = M+3;
1535 // else
1536 // N = M+2;
1537
1538 BCtype_d xBC_r, xBC_i;
1539 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode;
1540 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r;
1541 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode;
1542 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i;
1543 // Real part
1544 find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2,
1545 (double*)spline->coefs, 2);
1546 // Imaginarty part
1547 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2,
1548 ((double*)spline->coefs)+1, 2);
1549
1550 }
1551
1552
1553 UBspline_2d_z*
create_UBspline_2d_z(Ugrid x_grid,Ugrid y_grid,BCtype_z xBC,BCtype_z yBC,complex_double * data)1554 create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid,
1555 BCtype_z xBC, BCtype_z yBC, complex_double *data)
1556 {
1557 // Create new spline
1558 UBspline_2d_z* restrict spline = new UBspline_2d_z;
1559 spline->spcode = U2D;
1560 spline->tcode = DOUBLE_COMPLEX;
1561 spline->xBC = xBC;
1562 spline->yBC = yBC;
1563
1564 // Setup internal variables
1565 int Mx = x_grid.num;
1566 int My = y_grid.num;
1567 int Nx, Ny;
1568
1569 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)
1570 Nx = Mx+3;
1571 else
1572 Nx = Mx+2;
1573 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1574 x_grid.delta_inv = 1.0/x_grid.delta;
1575 spline->x_grid = x_grid;
1576
1577 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)
1578 Ny = My+3;
1579 else
1580 Ny = My+2;
1581 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1582 y_grid.delta_inv = 1.0/y_grid.delta;
1583 spline->y_grid = y_grid;
1584 spline->x_stride = Ny;
1585
1586 #ifndef HAVE_SSE2
1587 spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*Ny);
1588 #else
1589 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny);
1590 #endif
1591
1592 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1593 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode;
1594 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r;
1595 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode;
1596 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i;
1597 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode;
1598 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r;
1599 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode;
1600 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i;
1601 // First, solve in the X-direction
1602 for (int iy=0; iy<My; iy++) {
1603 intptr_t doffset = 2*iy;
1604 intptr_t coffset = 2*iy;
1605 // Real part
1606 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My,
1607 (double*)spline->coefs+coffset, 2*Ny);
1608 // Imag part
1609 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My,
1610 ((double*)spline->coefs)+coffset+1, 2*Ny);
1611 }
1612
1613 // Now, solve in the Y-direction
1614 for (int ix=0; ix<Nx; ix++) {
1615 intptr_t doffset = 2*ix*Ny;
1616 intptr_t coffset = 2*ix*Ny;
1617 // Real part
1618 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2,
1619 (double*)spline->coefs+coffset, 2);
1620 // Imag part
1621 find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2,
1622 ((double*)spline->coefs)+coffset+1, 2);
1623 }
1624
1625 init_sse_data();
1626 return spline;
1627 }
1628
1629
1630 void
recompute_UBspline_2d_z(UBspline_2d_z * spline,complex_double * data)1631 recompute_UBspline_2d_z (UBspline_2d_z* spline, complex_double *data)
1632 {
1633 int Mx = spline->x_grid.num;
1634 int My = spline->y_grid.num;
1635 int Nx, Ny;
1636
1637 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1638 Nx = Mx+3;
1639 else
1640 Nx = Mx+2;
1641 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1642 Ny = My+3;
1643 else
1644 Ny = My+2;
1645
1646 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1647 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode;
1648 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r;
1649 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode;
1650 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i;
1651 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode;
1652 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r;
1653 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode;
1654 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i;
1655
1656 // First, solve in the X-direction
1657 for (int iy=0; iy<My; iy++) {
1658 intptr_t doffset = 2*iy;
1659 intptr_t coffset = 2*iy;
1660 // Real part
1661 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My,
1662 (double*)spline->coefs+coffset, 2*Ny);
1663 // Imag part
1664 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My,
1665 ((double*)spline->coefs)+coffset+1, 2*Ny);
1666 }
1667
1668 // Now, solve in the Y-direction
1669 for (int ix=0; ix<Nx; ix++) {
1670 intptr_t doffset = 2*ix*Ny;
1671 intptr_t coffset = 2*ix*Ny;
1672 // Real part
1673 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2,
1674 (double*)spline->coefs+coffset, 2);
1675 // Imag part
1676 find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2,
1677 ((double*)spline->coefs)+coffset+1, 2);
1678 }
1679 }
1680
1681
1682
1683 UBspline_3d_z*
create_UBspline_3d_z(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_z xBC,BCtype_z yBC,BCtype_z zBC,complex_double * data)1684 create_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1685 BCtype_z xBC, BCtype_z yBC, BCtype_z zBC,
1686 complex_double *data)
1687 {
1688 // Create new spline
1689 UBspline_3d_z* restrict spline = new UBspline_3d_z;
1690 spline->spcode = U3D;
1691 spline->tcode = DOUBLE_COMPLEX;
1692 spline->xBC = xBC;
1693 spline->yBC = yBC;
1694 spline->zBC = zBC;
1695
1696 // Setup internal variables
1697 int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num;
1698 int Nx, Ny, Nz;
1699
1700 if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3;
1701 else Nx = Mx+2;
1702 x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1703 x_grid.delta_inv = 1.0/x_grid.delta;
1704 spline->x_grid = x_grid;
1705
1706 if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3;
1707 else Ny = My+2;
1708 y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1709 y_grid.delta_inv = 1.0/y_grid.delta;
1710 spline->y_grid = y_grid;
1711
1712 if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3;
1713 else Nz = Mz+2;
1714 z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1715 z_grid.delta_inv = 1.0/z_grid.delta;
1716 spline->z_grid = z_grid;
1717
1718 spline->x_stride = Ny*Nz;
1719 spline->y_stride = Nz;
1720
1721 #ifndef HAVE_SSE2
1722 spline->coefs = new complex_double[Nx*Ny*Nz];
1723 #else
1724 posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny*Nz);
1725 #endif
1726
1727 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1728 xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode;
1729 xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r;
1730 xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode;
1731 xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i;
1732 yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode;
1733 yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r;
1734 yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode;
1735 yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i;
1736 zBC_r.lCode = zBC.lCode; zBC_r.rCode = zBC.rCode;
1737 zBC_r.lVal = zBC.lVal_r; zBC_r.rVal = zBC.rVal_r;
1738 zBC_i.lCode = zBC.lCode; zBC_i.rCode = zBC.rCode;
1739 zBC_i.lVal = zBC.lVal_i; zBC_i.rVal = zBC.rVal_i;
1740 // First, solve in the X-direction
1741 for (int iy=0; iy<My; iy++)
1742 for (int iz=0; iz<Mz; iz++) {
1743 intptr_t doffset = 2*(iy*Mz+iz);
1744 intptr_t coffset = 2*(iy*Nz+iz);
1745 // Real part
1746 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz,
1747 ((double*)spline->coefs)+coffset, 2*Ny*Nz);
1748 // Imag part
1749 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
1750 ((double*)spline->coefs)+coffset+1, 2*Ny*Nz);
1751 }
1752
1753 // Now, solve in the Y-direction
1754 for (int ix=0; ix<Nx; ix++)
1755 for (int iz=0; iz<Nz; iz++) {
1756 intptr_t doffset = 2*(ix*Ny*Nz + iz);
1757 intptr_t coffset = 2*(ix*Ny*Nz + iz);
1758 // Real part
1759 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz,
1760 ((double*)spline->coefs)+coffset, 2*Nz);
1761 // Imag part
1762 find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz,
1763 ((double*)spline->coefs)+coffset+1, 2*Nz);
1764 }
1765
1766 // Now, solve in the Z-direction
1767 for (int ix=0; ix<Nx; ix++)
1768 for (int iy=0; iy<Ny; iy++) {
1769 intptr_t doffset = 2*((ix*Ny+iy)*Nz);
1770 intptr_t coffset = 2*((ix*Ny+iy)*Nz);
1771 // Real part
1772 find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2,
1773 ((double*)spline->coefs)+coffset, 2);
1774 // Imag part
1775 find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2,
1776 ((double*)spline->coefs)+coffset+1, 2);
1777 }
1778 init_sse_data();
1779 return spline;
1780 }
1781
1782 void
recompute_UBspline_3d_z(UBspline_3d_z * spline,complex_double * data)1783 recompute_UBspline_3d_z (UBspline_3d_z* spline, complex_double *data)
1784 {
1785 // Setup internal variables
1786 int Mx = spline->x_grid.num;
1787 int My = spline->y_grid.num;
1788 int Mz = spline->z_grid.num;
1789 int Nx, Ny, Nz;
1790
1791 if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1792 Nx = Mx+3;
1793 else
1794 Nx = Mx+2;
1795 if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1796 Ny = My+3;
1797 else
1798 Ny = My+2;
1799 if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)
1800 Nz = Mz+3;
1801 else
1802 Nz = Mz+2;
1803
1804 BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1805 xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode;
1806 xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r;
1807 xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode;
1808 xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i;
1809 yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode;
1810 yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r;
1811 yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode;
1812 yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i;
1813 zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode;
1814 zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r;
1815 zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode;
1816 zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i;
1817 // First, solve in the X-direction
1818 for (int iy=0; iy<My; iy++)
1819 for (int iz=0; iz<Mz; iz++) {
1820 intptr_t doffset = 2*(iy*Mz+iz);
1821 intptr_t coffset = 2*(iy*Nz+iz);
1822 // Real part
1823 find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz,
1824 ((double*)spline->coefs)+coffset, 2*Ny*Nz);
1825 // Imag part
1826 find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
1827 ((double*)spline->coefs)+coffset+1, 2*Ny*Nz);
1828 }
1829
1830 // Now, solve in the Y-direction
1831 for (int ix=0; ix<Nx; ix++)
1832 for (int iz=0; iz<Nz; iz++) {
1833 intptr_t doffset = 2*(ix*Ny*Nz + iz);
1834 intptr_t coffset = 2*(ix*Ny*Nz + iz);
1835 // Real part
1836 find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz,
1837 ((double*)spline->coefs)+coffset, 2*Nz);
1838 // Imag part
1839 find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz,
1840 ((double*)spline->coefs)+coffset+1, 2*Nz);
1841 }
1842
1843 // Now, solve in the Z-direction
1844 for (int ix=0; ix<Nx; ix++)
1845 for (int iy=0; iy<Ny; iy++) {
1846 intptr_t doffset = 2*((ix*Ny+iy)*Nz);
1847 intptr_t coffset = 2*((ix*Ny+iy)*Nz);
1848 // Real part
1849 find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2,
1850 ((double*)spline->coefs)+coffset, 2);
1851 // Imag part
1852 find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2,
1853 ((double*)spline->coefs)+coffset+1, 2);
1854 }
1855 }
1856
1857
1858 void
destroy_UBspline(Bspline * spline)1859 destroy_UBspline (Bspline *spline)
1860 {
1861 free(spline->coefs);
1862 delete spline;
1863 }
1864
1865 void
1866 destroy_NUBspline (Bspline *spline);
1867
1868 void
1869 destroy_multi_UBspline (Bspline *spline);
1870
1871 void
destroy_Bspline(void * spline)1872 destroy_Bspline (void *spline)
1873 {
1874 Bspline *sp = (Bspline *)spline;
1875 if (sp->sp_code <= U3D)
1876 destroy_UBspline (sp);
1877 else if (sp->sp_code <= NU3D)
1878 destroy_NUBspline (sp);
1879 else if (sp->sp_code <= MULTI_U3D)
1880 destroy_multi_UBspline (sp);
1881 else
1882 fprintf (stderr, "Error in destroy_Bspline: invalid spline code %d.\n",
1883 sp->sp_code);
1884 }
1885