1 /////////////////////////////////////////////////////////////////////////////
2 //  einspline:  a library for creating and evaluating B-splines            //
3 //  Copyright (C) 2007 Kenneth P. Esler, Jr.                               //
4 //                                                                         //
5 //  This program is free software; you can redistribute it and/or modify   //
6 //  it under the terms of the GNU General Public License as published by   //
7 //  the Free Software Foundation; either version 2 of the License, or      //
8 //  (at your option) any later version.                                    //
9 //                                                                         //
10 //  This program is distributed in the hope that it will be useful,        //
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of         //
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
13 //  GNU General Public License for more details.                           //
14 //                                                                         //
15 //  You should have received a copy of the GNU General Public License      //
16 //  along with this program; if not, write to the Free Software            //
17 //  Foundation, Inc., 51 Franklin Street, Fifth Floor,                     //
18 //  Boston, MA  02110-1301  USA                                            //
19 /////////////////////////////////////////////////////////////////////////////
20 
21 #include "bspline_create.h"
22 #ifndef _XOPEN_SOURCE
23 #define _XOPEN_SOURCE 600
24 #endif
25 #ifndef __USE_XOPEN2K
26   #define __USE_XOPEN2K
27 #endif
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <vector>
31 
32 int posix_memalign(void **memptr, size_t alignment, size_t size);
33 
34 ////////////////////////////////////////////////////////////
35 ////////////////////////////////////////////////////////////
36 ////       Helper functions for spline creation         ////
37 ////////////////////////////////////////////////////////////
38 ////////////////////////////////////////////////////////////
39 void init_sse_data();
40 
41 void
42 find_coefs_1d_d (Ugrid grid, BCtype_d bc,
43 		 double *data,  intptr_t dstride,
44 		 double *coefs, intptr_t cstride);
45 
46 void
solve_deriv_interp_1d_s(float bands[],float coefs[],int M,int cstride)47 solve_deriv_interp_1d_s (float bands[], float coefs[],
48 			 int M, int cstride)
49 {
50   // Solve interpolating equations
51   // First and last rows are different
52   bands[4*(0)+1] /= bands[4*(0)+0];
53   bands[4*(0)+2] /= bands[4*(0)+0];
54   bands[4*(0)+3] /= bands[4*(0)+0];
55   bands[4*(0)+0] = 1.0;
56   bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1];
57   bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2];
58   bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3];
59   bands[4*(0)+0] = 0.0;
60   bands[4*(1)+2] /= bands[4*(1)+1];
61   bands[4*(1)+3] /= bands[4*(1)+1];
62   bands[4*(1)+1] = 1.0;
63 
64   // Now do rows 2 through M+1
65   for (int row=2; row < (M+1); row++) {
66     bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
67     bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
68     bands[4*(row)+2] /= bands[4*(row)+1];
69     bands[4*(row)+3] /= bands[4*(row)+1];
70     bands[4*(row)+0] = 0.0;
71     bands[4*(row)+1] = 1.0;
72   }
73 
74   // Do last row
75   bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
76   bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
77   bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
78   bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
79   bands[4*(M+1)+3] /= bands[4*(M+1)+2];
80   bands[4*(M+1)+2] = 1.0;
81 
82   coefs[(M+1)*cstride] = bands[4*(M+1)+3];
83   // Now back substitute up
84   for (int row=M; row>0; row--)
85     coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)];
86 
87   // Finish with first row
88   coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride];
89 }
90 
91 // On input, bands should be filled with:
92 // row 0   :  abcdInitial from boundary conditions
93 // rows 1:M:  basis functions in first 3 cols, data in last
94 // row M+1 :  abcdFinal   from boundary conditions
95 // cstride gives the stride between values in coefs.
96 // On exit, coefs with contain interpolating B-spline coefs
97 void
solve_periodic_interp_1d_s(float bands[],float coefs[],int M,int cstride)98 solve_periodic_interp_1d_s (float bands[], float coefs[],
99 			    int M, int cstride)
100 {
101   std::vector<float> lastCol(M);
102 
103   // Now solve:
104   // First and last rows are different
105   bands[4*(0)+2] /= bands[4*(0)+1];
106   bands[4*(0)+0] /= bands[4*(0)+1];
107   bands[4*(0)+3] /= bands[4*(0)+1];
108   bands[4*(0)+1]  = 1.0;
109   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
110   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
111   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
112   lastCol[0] = bands[4*(0)+0];
113 
114   for (int row=1; row < (M-1); row++) {
115     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
116     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
117     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
118     bands[4*(row)+0] = 0.0;
119     bands[4*(row)+2] /= bands[4*(row)+1];
120     bands[4*(row)+3] /= bands[4*(row)+1];
121     lastCol[row]  /= bands[4*(row)+1];
122     bands[4*(row)+1]  = 1.0;
123     if (row < (M-2)) {
124       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
125       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
126       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
127     }
128   }
129 
130   // Now do last row
131   // The [2] element and [0] element are now on top of each other
132   bands[4*(M-1)+0] += bands[4*(M-1)+2];
133   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
134   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
135   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
136   coefs[M*cstride] = bands[4*(M-1)+3];
137   for (int row=M-2; row>=0; row--)
138     coefs[(row+1)*cstride] =
139       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
140 
141   coefs[0*cstride] = coefs[M*cstride];
142   coefs[(M+1)*cstride] = coefs[1*cstride];
143   coefs[(M+2)*cstride] = coefs[2*cstride];
144 
145 
146 }
147 
148 
149 // On input, bands should be filled with:
150 // row 0   :  abcdInitial from boundary conditions
151 // rows 1:M:  basis functions in first 3 cols, data in last
152 // row M+1 :  abcdFinal   from boundary conditions
153 // cstride gives the stride between values in coefs.
154 // On exit, coefs with contain interpolating B-spline coefs
155 void
solve_antiperiodic_interp_1d_s(float bands[],float coefs[],int M,int cstride)156 solve_antiperiodic_interp_1d_s (float bands[], float coefs[],
157 				int M, int cstride)
158 {
159   bands[4*0+0]     *= -1.0;
160   bands[4*(M-1)+2] *= -1.0;
161 
162   std::vector<float> lastCol(M);
163 
164   // Now solve:
165   // First and last rows are different
166   bands[4*(0)+2] /= bands[4*(0)+1];
167   bands[4*(0)+0] /= bands[4*(0)+1];
168   bands[4*(0)+3] /= bands[4*(0)+1];
169   bands[4*(0)+1]  = 1.0;
170   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
171   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
172   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
173   lastCol[0] = bands[4*(0)+0];
174 
175   for (int row=1; row < (M-1); row++) {
176     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
177     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
178     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
179     bands[4*(row)+0] = 0.0;
180     bands[4*(row)+2] /= bands[4*(row)+1];
181     bands[4*(row)+3] /= bands[4*(row)+1];
182     lastCol[row]  /= bands[4*(row)+1];
183     bands[4*(row)+1]  = 1.0;
184     if (row < (M-2)) {
185       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
186       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
187       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
188     }
189   }
190 
191   // Now do last row
192   // The [2] element and [0] element are now on top of each other
193   bands[4*(M-1)+0] += bands[4*(M-1)+2];
194   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
195   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
196   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
197   coefs[M*cstride] = bands[4*(M-1)+3];
198   for (int row=M-2; row>=0; row--)
199     coefs[(row+1)*cstride] =
200       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
201 
202   coefs[0*cstride]     = -coefs[M*cstride];
203   coefs[(M+1)*cstride] = -coefs[1*cstride];
204   coefs[(M+2)*cstride] = -coefs[2*cstride];
205 
206 
207 }
208 
209 
210 
211 
212 #ifdef HIGH_PRECISION
213 void
find_coefs_1d_s(Ugrid grid,BCtype_s bc,float * data,intptr_t dstride,float * coefs,intptr_t cstride)214 find_coefs_1d_s (Ugrid grid, BCtype_s bc,
215 		 float *data,  intptr_t dstride,
216 		 float *coefs, intptr_t cstride)
217 {
218   BCtype_d d_bc;
219   double *d_data, *d_coefs;
220 
221   d_bc.lCode = bc.lCode;   d_bc.rCode = bc.rCode;
222   d_bc.lVal  = bc.lVal;    d_bc.rVal  = bc.rVal;
223   int M = grid.num, N;
224   if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC)    N = M+3;
225   else                         N = M+2;
226 
227   d_data  = new double[N];
228   d_coefs = new double[N];
229   for (int i=0; i<M; i++)
230     d_data[i] = data[i*dstride];
231   find_coefs_1d_d (grid, d_bc, d_data, 1, d_coefs, 1);
232   for (int i=0; i<N; i++)
233     coefs[i*cstride] = d_coefs[i];
234   delete[] d_data;
235   delete[] d_coefs;
236 }
237 
238 #else
239 void
find_coefs_1d_s(Ugrid grid,BCtype_s bc,float * data,intptr_t dstride,float * coefs,intptr_t cstride)240 find_coefs_1d_s (Ugrid grid, BCtype_s bc,
241 		 float *data,  intptr_t dstride,
242 		 float *coefs, intptr_t cstride)
243 {
244   int M = grid.num;
245   float basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0};
246   if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) {
247 
248     float *bands = new float[4*M];
249 
250     for (int i=0; i<M; i++) {
251       bands[4*i+0] = basis[0];
252       bands[4*i+1] = basis[1];
253       bands[4*i+2] = basis[2];
254       bands[4*i+3] = data[i*dstride];
255     }
256     if (bc.lCode == PERIODIC)
257       solve_periodic_interp_1d_s (bands, coefs, M, cstride);
258     else
259       solve_antiperiodic_interp_1d_s (bands, coefs, M, cstride);
260 
261     delete[] bands;
262   }
263   else {
264     // Setup boundary conditions
265     float abcd_left[4];
266     float abcd_right[4];
267     for (int i = 0; i < 4; i++) {
268         abcd_left[i] = 0.0;
269         abcd_right[i] = 0.0;
270     }
271 
272     // Left boundary
273     if (bc.lCode == FLAT || bc.lCode == NATURAL)
274       bc.lVal = 0.0;
275     if (bc.lCode == FLAT || bc.lCode == DERIV1) {
276       abcd_left[0] = -0.5 * grid.delta_inv;
277       abcd_left[1] =  0.0 * grid.delta_inv;
278       abcd_left[2] =  0.5 * grid.delta_inv;
279       abcd_left[3] =  bc.lVal;
280     }
281     if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
282       abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv;
283       abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv;
284       abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv;
285       abcd_left[3] = bc.lVal;
286     }
287 
288     // Right boundary
289     if (bc.rCode == FLAT || bc.rCode == NATURAL)
290       bc.rVal = 0.0;
291     if (bc.rCode == FLAT || bc.rCode == DERIV1) {
292       abcd_right[0] = -0.5 * grid.delta_inv;
293       abcd_right[1] =  0.0 * grid.delta_inv;
294       abcd_right[2] =  0.5 * grid.delta_inv;
295       abcd_right[3] =  bc.rVal;
296     }
297     if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
298       abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv;
299       abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv;
300       abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv;
301       abcd_right[3] = bc.rVal;
302     }
303 
304     float *bands = new float[4*(M+2)];
305     for (int i=0; i<4; i++) {
306       bands[4*( 0 )+i]   = abcd_left[i];
307       bands[4*(M+1)+i] = abcd_right[i];
308     }
309     for (int i=0; i<M; i++) {
310       for (int j=0; j<3; j++)
311 	bands[4*(i+1)+j] = basis[j];
312       bands[4*(i+1)+3] = data[i*dstride];
313     }
314     // Now, solve for coefficients
315     solve_deriv_interp_1d_s (bands, coefs, M, cstride);
316     delete[] bands;
317   }
318 }
319 
320 #endif
321 
322 ////////////////////////////////////////////////////////////
323 ////////////////////////////////////////////////////////////
324 ////     Single-Precision, Real Creation Routines       ////
325 ////////////////////////////////////////////////////////////
326 ////////////////////////////////////////////////////////////
327 
328 // On input, bands should be filled with:
329 // row 0   :  abcdInitial from boundary conditions
330 // rows 1:M:  basis functions in first 3 cols, data in last
331 // row M+1 :  abcdFinal   from boundary conditions
332 // cstride gives the stride between values in coefs.
333 // On exit, coefs with contain interpolating B-spline coefs
334 UBspline_1d_s*
create_UBspline_1d_s(Ugrid x_grid,BCtype_s xBC,float * data)335 create_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, float *data)
336 {
337   // Create new spline
338   UBspline_1d_s* restrict spline = new UBspline_1d_s;
339   spline->spcode = U1D;
340   spline->tcode  = SINGLE_REAL;
341   spline->xBC = xBC; spline->x_grid = x_grid;
342 
343   // Setup internal variables
344   int M = x_grid.num;
345   int N;
346 
347   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
348     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
349     N = M+3;
350   }
351   else {
352     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
353     N = M+2;
354   }
355 
356   x_grid.delta_inv = 1.0/x_grid.delta;
357   spline->x_grid   = x_grid;
358 #ifndef HAVE_SSE2
359   spline->coefs = (float*)malloc (sizeof(float)*N);
360 #else
361   posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*N));
362 #endif
363   find_coefs_1d_s (spline->x_grid, xBC, data, 1, spline->coefs, 1);
364 
365   init_sse_data();
366   return spline;
367 }
368 
369 void
recompute_UBspline_1d_s(UBspline_1d_s * spline,float * data)370 recompute_UBspline_1d_s (UBspline_1d_s* spline, float *data)
371 {
372   find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1);
373 }
374 
375 
376 UBspline_2d_s*
create_UBspline_2d_s(Ugrid x_grid,Ugrid y_grid,BCtype_s xBC,BCtype_s yBC,float * data)377 create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid,
378 		      BCtype_s xBC, BCtype_s yBC, float *data)
379 {
380   // Create new spline
381   UBspline_2d_s* restrict spline = new UBspline_2d_s;
382   spline->spcode = U2D;
383   spline->tcode  = SINGLE_REAL;
384   spline->xBC = xBC;
385   spline->yBC = yBC;
386   // Setup internal variables
387   int Mx = x_grid.num;
388   int My = y_grid.num;
389   int Nx, Ny;
390 
391   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
392   else                           Nx = Mx+2;
393   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
394   x_grid.delta_inv = 1.0/x_grid.delta;
395   spline->x_grid   = x_grid;
396 
397   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
398   else                           Ny = My+2;
399   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
400   y_grid.delta_inv = 1.0/y_grid.delta;
401   spline->y_grid   = y_grid;
402   spline->x_stride = Ny;
403 #ifndef HAVE_SSE2
404   spline->coefs = (float*)malloc (sizeof(float)*Nx*Ny);
405 #else
406   posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny);
407 #endif
408 
409   // First, solve in the X-direction
410   for (int iy=0; iy<My; iy++) {
411     intptr_t doffset = iy;
412     intptr_t coffset = iy;
413     find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My,
414 		     spline->coefs+coffset, Ny);
415   }
416 
417   // Now, solve in the Y-direction
418   for (int ix=0; ix<Nx; ix++) {
419     intptr_t doffset = ix*Ny;
420     intptr_t coffset = ix*Ny;
421     find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1,
422 		     spline->coefs+coffset, 1);
423   }
424   init_sse_data();
425   return spline;
426 }
427 
428 void
recompute_UBspline_2d_s(UBspline_2d_s * spline,float * data)429 recompute_UBspline_2d_s (UBspline_2d_s* spline, float *data)
430 {
431   int Mx = spline->x_grid.num;
432   int My = spline->y_grid.num;
433   int Nx, Ny;
434 
435   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
436   else                           Nx = Mx+2;
437   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     Ny = My+3;
438   else                           Ny = My+2;
439 
440   // First, solve in the X-direction
441   for (int iy=0; iy<My; iy++) {
442     intptr_t doffset = iy;
443     intptr_t coffset = iy;
444     find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My,
445 		     spline->coefs+coffset, Ny);
446   }
447 
448   // Now, solve in the Y-direction
449   for (int ix=0; ix<Nx; ix++) {
450     intptr_t doffset = ix*Ny;
451     intptr_t coffset = ix*Ny;
452     find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, 1,
453 		     spline->coefs+coffset, 1);
454   }
455 }
456 
457 
458 UBspline_3d_s*
create_UBspline_3d_s(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_s xBC,BCtype_s yBC,BCtype_s zBC,float * data)459 create_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
460 		      BCtype_s xBC, BCtype_s yBC, BCtype_s zBC,
461 		      float *data)
462 {
463   // Create new spline
464   UBspline_3d_s* restrict spline = new UBspline_3d_s;
465   spline->spcode = U3D;
466   spline->tcode  = SINGLE_REAL;
467   spline->xBC = xBC;
468   spline->yBC = yBC;
469   spline->zBC = zBC;
470   // Setup internal variables
471   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
472   int Nx, Ny, Nz;
473 
474   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
475   else                           Nx = Mx+2;
476   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
477   x_grid.delta_inv = 1.0/x_grid.delta;
478   spline->x_grid   = x_grid;
479 
480   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
481   else                           Ny = My+2;
482   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
483   y_grid.delta_inv = 1.0/y_grid.delta;
484   spline->y_grid   = y_grid;
485 
486   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
487   else                           Nz = Mz+2;
488   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
489   z_grid.delta_inv = 1.0/z_grid.delta;
490   spline->z_grid   = z_grid;
491 
492   spline->x_stride = Ny*Nz;
493   spline->y_stride = Nz;
494 
495 #ifndef HAVE_SSE2
496   spline->coefs      = new float[Nx*Ny*Nz];
497 #else
498   posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*Nx*Ny*Nz));
499 #endif
500 
501   // First, solve in the X-direction
502   for (int iy=0; iy<My; iy++)
503     for (int iz=0; iz<Mz; iz++) {
504       intptr_t doffset = iy*Mz+iz;
505       intptr_t coffset = iy*Nz+iz;
506       find_coefs_1d_s (spline->x_grid, xBC, data+doffset, My*Mz,
507 		       spline->coefs+coffset, Ny*Nz);
508     }
509 
510   // Now, solve in the Y-direction
511   for (int ix=0; ix<Nx; ix++)
512     for (int iz=0; iz<Nz; iz++) {
513       intptr_t doffset = ix*Ny*Nz + iz;
514       intptr_t coffset = ix*Ny*Nz + iz;
515       find_coefs_1d_s (spline->y_grid, yBC, spline->coefs+doffset, Nz,
516 		       spline->coefs+coffset, Nz);
517     }
518 
519   // Now, solve in the Z-direction
520   for (int ix=0; ix<Nx; ix++)
521     for (int iy=0; iy<Ny; iy++) {
522       intptr_t doffset = (ix*Ny+iy)*Nz;
523       intptr_t coffset = (ix*Ny+iy)*Nz;
524       find_coefs_1d_s (spline->z_grid, zBC, spline->coefs+doffset, 1,
525 		       spline->coefs+coffset, 1);
526     }
527   init_sse_data();
528   return spline;
529 }
530 
531 void
recompute_UBspline_3d_s(UBspline_3d_s * spline,float * data)532 recompute_UBspline_3d_s (UBspline_3d_s* spline, float *data)
533 {
534   int Mx = spline->x_grid.num;
535   int My = spline->y_grid.num;
536   int Mz = spline->z_grid.num;
537   int Nx, Ny, Nz;
538 
539   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
540   else                           Nx = Mx+2;
541   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     Ny = My+3;
542   else                           Ny = My+2;
543   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
544   else                           Nz = Mz+2;
545 
546   // First, solve in the X-direction
547   for (int iy=0; iy<My; iy++)
548     for (int iz=0; iz<Mz; iz++) {
549       intptr_t doffset = iy*Mz+iz;
550       intptr_t coffset = iy*Nz+iz;
551       find_coefs_1d_s (spline->x_grid, spline->xBC, data+doffset, My*Mz,
552 		       spline->coefs+coffset, Ny*Nz);
553     }
554 
555   // Now, solve in the Y-direction
556   for (int ix=0; ix<Nx; ix++)
557     for (int iz=0; iz<Nz; iz++) {
558       intptr_t doffset = ix*Ny*Nz + iz;
559       intptr_t coffset = ix*Ny*Nz + iz;
560       find_coefs_1d_s (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz,
561 		       spline->coefs+coffset, Nz);
562     }
563 
564   // Now, solve in the Z-direction
565   for (int ix=0; ix<Nx; ix++)
566     for (int iy=0; iy<Ny; iy++) {
567       intptr_t doffset = (ix*Ny+iy)*Nz;
568       intptr_t coffset = (ix*Ny+iy)*Nz;
569       find_coefs_1d_s (spline->z_grid, spline->zBC, spline->coefs+doffset, 1,
570 		       spline->coefs+coffset, 1);
571     }
572 }
573 
574 
575 ////////////////////////////////////////////////////////////
576 ////////////////////////////////////////////////////////////
577 ////    Single-Precision, Complex Creation Routines     ////
578 ////////////////////////////////////////////////////////////
579 ////////////////////////////////////////////////////////////
580 
581 // On input, bands should be filled with:
582 // row 0   :  abcdInitial from boundary conditions
583 // rows 1:M:  basis functions in first 3 cols, data in last
584 // row M+1 :  abcdFinal   from boundary conditions
585 // cstride gives the stride between values in coefs.
586 // On exit, coefs with contain interpolating B-spline coefs
587 UBspline_1d_c*
create_UBspline_1d_c(Ugrid x_grid,BCtype_c xBC,complex_float * data)588 create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data)
589 {
590   // Create new spline
591   UBspline_1d_c* restrict spline = new UBspline_1d_c;
592   spline->spcode = U1D;
593   spline->tcode  = SINGLE_COMPLEX;
594   spline->xBC = xBC;
595   // Setup internal variables
596   int M = x_grid.num;
597   int N;
598 
599   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
600     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
601     N = M+3;
602   }
603   else {
604     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
605     N = M+2;
606   }
607 
608   x_grid.delta_inv = 1.0/x_grid.delta;
609   spline->x_grid   = x_grid;
610 #ifndef HAVE_SSE2
611   spline->coefs = (complex_float*)malloc (2*sizeof(float)*N);
612 #else
613   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*N);
614 #endif
615 
616   BCtype_s xBC_r, xBC_i;
617   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
618   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
619   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
620   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
621   // Real part
622   find_coefs_1d_s (spline->x_grid, xBC_r,
623 		   (float*)data, 2, (float*)spline->coefs, 2);
624   // Imaginarty part
625   find_coefs_1d_s (spline->x_grid, xBC_i,
626 		   ((float*)data)+1, 2, ((float*)spline->coefs+1), 2);
627 
628   init_sse_data();
629   return spline;
630 }
631 
632 void
recompute_UBspline_1d_c(UBspline_1d_c * spline,complex_float * data)633 recompute_UBspline_1d_c (UBspline_1d_c* spline, complex_float *data)
634 {
635 
636   BCtype_s xBC_r, xBC_i;
637   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
638   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
639   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
640   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
641 
642   // Real part
643   find_coefs_1d_s (spline->x_grid, xBC_r,
644 		   (float*)data, 2, (float*)spline->coefs, 2);
645   // Imaginarty part
646   find_coefs_1d_s (spline->x_grid, xBC_i,
647 		   ((float*)data)+1, 2, ((float*)spline->coefs+1), 2);
648 }
649 
650 
651 
652 UBspline_2d_c*
create_UBspline_2d_c(Ugrid x_grid,Ugrid y_grid,BCtype_c xBC,BCtype_c yBC,complex_float * data)653 create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid,
654 		      BCtype_c xBC, BCtype_c yBC, complex_float *data)
655 {
656   // Create new spline
657   UBspline_2d_c* restrict spline = new UBspline_2d_c;
658   spline->spcode = U2D;
659   spline->tcode  = SINGLE_COMPLEX;
660   spline->xBC = xBC;
661   spline->yBC = yBC;
662 
663   // Setup internal variables
664   int Mx = x_grid.num;
665   int My = y_grid.num;
666   int Nx, Ny;
667 
668   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
669   else                           Nx = Mx+2;
670   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
671   x_grid.delta_inv = 1.0/x_grid.delta;
672   spline->x_grid   = x_grid;
673 
674   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
675   else                           Ny = My+2;
676   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
677   y_grid.delta_inv = 1.0/y_grid.delta;
678   spline->y_grid   = y_grid;
679   spline->x_stride = Ny;
680 
681 #ifndef HAVE_SSE2
682   spline->coefs = (complex_float*)malloc (2*sizeof(float)*Nx*Ny);
683 #else
684   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny);
685 #endif
686 
687   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
688   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
689   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
690   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
691   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
692   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
693   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
694   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
695   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
696   // First, solve in the X-direction
697   for (int iy=0; iy<My; iy++) {
698     intptr_t doffset = 2*iy;
699     intptr_t coffset = 2*iy;
700     // Real part
701     find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My,
702 		     (float*)spline->coefs+coffset, 2*Ny);
703     // Imag part
704     find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My,
705 		     ((float*)spline->coefs)+coffset+1, 2*Ny);
706   }
707 
708   // Now, solve in the Y-direction
709   for (int ix=0; ix<Nx; ix++) {
710     intptr_t doffset = 2*ix*Ny;
711     intptr_t coffset = 2*ix*Ny;
712     // Real part
713     find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2,
714 		     ((float*)spline->coefs)+coffset, 2);
715     // Imag part
716     find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2,
717 		     ((float*)spline->coefs)+coffset+1, 2);
718   }
719   init_sse_data();
720   return spline;
721 }
722 
723 
724 void
recompute_UBspline_2d_c(UBspline_2d_c * spline,complex_float * data)725 recompute_UBspline_2d_c (UBspline_2d_c* spline, complex_float *data)
726 {
727   // Setup internal variables
728   int Mx = spline->x_grid.num;
729   int My = spline->y_grid.num;
730   int Nx, Ny;
731 
732   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
733     Nx = Mx+3;
734   else
735     Nx = Mx+2;
736   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
737     Ny = My+3;
738   else
739     Ny = My+2;
740 
741   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i;
742   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
743   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
744   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
745   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
746   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
747   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
748   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
749   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
750 
751   // First, solve in the X-direction
752   for (int iy=0; iy<My; iy++) {
753     intptr_t doffset = 2*iy;
754     intptr_t coffset = 2*iy;
755     // Real part
756     find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My,
757 		     (float*)spline->coefs+coffset, 2*Ny);
758     // Imag part
759     find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My,
760 		     ((float*)spline->coefs)+coffset+1, 2*Ny);
761   }
762 
763   // Now, solve in the Y-direction
764   for (int ix=0; ix<Nx; ix++) {
765     intptr_t doffset = 2*ix*Ny;
766     intptr_t coffset = 2*ix*Ny;
767     // Real part
768     find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2,
769 		     ((float*)spline->coefs)+coffset, 2);
770     // Imag part
771     find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2,
772 		     ((float*)spline->coefs)+coffset+1, 2);
773   }
774 }
775 
776 UBspline_3d_c*
create_UBspline_3d_c(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_c xBC,BCtype_c yBC,BCtype_c zBC,complex_float * data)777 create_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
778 		      BCtype_c xBC, BCtype_c yBC, BCtype_c zBC,
779 		      complex_float *data)
780 {
781   // Create new spline
782   UBspline_3d_c* restrict spline = new UBspline_3d_c;
783   spline->spcode = U3D;
784   spline->tcode  = SINGLE_COMPLEX;
785   spline->xBC = xBC;
786   spline->yBC = yBC;
787   spline->zBC = zBC;
788 
789   // Setup internal variables
790   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
791   int Nx, Ny, Nz;
792 
793   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
794   else                           Nx = Mx+2;
795   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
796   x_grid.delta_inv = 1.0/x_grid.delta;
797   spline->x_grid   = x_grid;
798 
799   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
800   else                           Ny = My+2;
801   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
802   y_grid.delta_inv = 1.0/y_grid.delta;
803   spline->y_grid   = y_grid;
804 
805   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
806   else                           Nz = Mz+2;
807   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
808   z_grid.delta_inv = 1.0/z_grid.delta;
809   spline->z_grid   = z_grid;
810 
811   spline->x_stride = Ny*Nz;
812   spline->y_stride = Nz;
813 
814 #ifndef HAVE_SSE2
815   spline->coefs      =  new complex_float[Nx*Ny*Nz];
816 #else
817   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny*Nz);
818 #endif
819 
820   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
821   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
822   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
823   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
824   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
825   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
826   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
827   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
828   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
829   zBC_r.lCode = zBC.lCode;  zBC_r.rCode = zBC.rCode;
830   zBC_r.lVal  = zBC.lVal_r; zBC_r.rVal  = zBC.rVal_r;
831   zBC_i.lCode = zBC.lCode;  zBC_i.rCode = zBC.rCode;
832   zBC_i.lVal  = zBC.lVal_i; zBC_i.rVal  = zBC.rVal_i;
833   // First, solve in the X-direction
834   for (int iy=0; iy<My; iy++)
835     for (int iz=0; iz<Mz; iz++) {
836       intptr_t doffset = 2*(iy*Mz+iz);
837       intptr_t coffset = 2*(iy*Nz+iz);
838       // Real part
839       find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz,
840 		       ((float*)spline->coefs)+coffset, 2*Ny*Nz);
841       // Imag part
842       find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz,
843 		       ((float*)spline->coefs)+coffset+1, 2*Ny*Nz);
844     }
845 
846   // Now, solve in the Y-direction
847   for (int ix=0; ix<Nx; ix++)
848     for (int iz=0; iz<Nz; iz++) {
849       intptr_t doffset = 2*(ix*Ny*Nz + iz);
850       intptr_t coffset = 2*(ix*Ny*Nz + iz);
851       // Real part
852       find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz,
853 		       ((float*)spline->coefs)+coffset, 2*Nz);
854       // Imag part
855       find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz,
856 		       ((float*)spline->coefs)+coffset+1, 2*Nz);
857     }
858 
859   // Now, solve in the Z-direction
860   for (int ix=0; ix<Nx; ix++)
861     for (int iy=0; iy<Ny; iy++) {
862       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
863       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
864       // Real part
865       find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2,
866 		       ((float*)spline->coefs)+coffset, 2);
867       // Imag part
868       find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2,
869 		       ((float*)spline->coefs)+coffset+1, 2);
870     }
871 
872   init_sse_data();
873   return spline;
874 }
875 
876 void
recompute_UBspline_3d_c(UBspline_3d_c * spline,complex_float * data)877 recompute_UBspline_3d_c (UBspline_3d_c* spline, complex_float *data)
878 {
879   // Setup internal variables
880   int Mx = spline->x_grid.num;
881   int My = spline->y_grid.num;
882   int Mz = spline->z_grid.num;
883   int Nx, Ny, Nz;
884 
885   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
886   else                           Nx = Mx+2;
887   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)     Ny = My+3;
888   else                           Ny = My+2;
889   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
890   else                           Nz = Mz+2;
891 
892   BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
893   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
894   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
895   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
896   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
897   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
898   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
899   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
900   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
901   zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
902   zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
903   zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
904   zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
905   // First, solve in the X-direction
906   for (int iy=0; iy<My; iy++)
907     for (int iz=0; iz<Mz; iz++) {
908       intptr_t doffset = 2*(iy*Mz+iz);
909       intptr_t coffset = 2*(iy*Nz+iz);
910       // Real part
911       find_coefs_1d_s (spline->x_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz,
912 		       ((float*)spline->coefs)+coffset, 2*Ny*Nz);
913       // Imag part
914       find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz,
915 		       ((float*)spline->coefs)+coffset+1, 2*Ny*Nz);
916     }
917 
918   // Now, solve in the Y-direction
919   for (int ix=0; ix<Nx; ix++)
920     for (int iz=0; iz<Nz; iz++) {
921       intptr_t doffset = 2*(ix*Ny*Nz + iz);
922       intptr_t coffset = 2*(ix*Ny*Nz + iz);
923       // Real part
924       find_coefs_1d_s (spline->y_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz,
925 		       ((float*)spline->coefs)+coffset, 2*Nz);
926       // Imag part
927       find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz,
928 		       ((float*)spline->coefs)+coffset+1, 2*Nz);
929     }
930 
931   // Now, solve in the Z-direction
932   for (int ix=0; ix<Nx; ix++)
933     for (int iy=0; iy<Ny; iy++) {
934       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
935       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
936       // Real part
937       find_coefs_1d_s (spline->z_grid, zBC_r, ((float*)spline->coefs)+doffset, 2,
938 		       ((float*)spline->coefs)+coffset, 2);
939       // Imag part
940       find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2,
941 		       ((float*)spline->coefs)+coffset+1, 2);
942     }
943 }
944 
945 
946 ////////////////////////////////////////////////////////////
947 ////////////////////////////////////////////////////////////
948 ////     Double-Precision, Real Creation Routines       ////
949 ////////////////////////////////////////////////////////////
950 ////////////////////////////////////////////////////////////
951 
952 // On input, bands should be filled with:
953 // row 0   :  abcdInitial from boundary conditions
954 // rows 1:M:  basis functions in first 3 cols, data in last
955 // row M+1 :  abcdFinal   from boundary conditions
956 // cstride gives the stride between values in coefs.
957 // On exit, coefs with contain interpolating B-spline coefs
958 void
solve_deriv_interp_1d_d(double bands[],double coefs[],int M,int cstride)959 solve_deriv_interp_1d_d (double bands[], double coefs[],
960 			 int M, int cstride)
961 {
962   // Solve interpolating equations
963   // First and last rows are different
964   bands[4*(0)+1] /= bands[4*(0)+0];
965   bands[4*(0)+2] /= bands[4*(0)+0];
966   bands[4*(0)+3] /= bands[4*(0)+0];
967   bands[4*(0)+0] = 1.0;
968   bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1];
969   bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2];
970   bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3];
971   bands[4*(0)+0] = 0.0;
972   bands[4*(1)+2] /= bands[4*(1)+1];
973   bands[4*(1)+3] /= bands[4*(1)+1];
974   bands[4*(1)+1] = 1.0;
975 
976   // Now do rows 2 through M+1
977   for (int row=2; row < (M+1); row++) {
978     bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
979     bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
980     bands[4*(row)+2] /= bands[4*(row)+1];
981     bands[4*(row)+3] /= bands[4*(row)+1];
982     bands[4*(row)+0] = 0.0;
983     bands[4*(row)+1] = 1.0;
984   }
985 
986   // Do last row
987   bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
988   bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
989   bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
990   bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
991   bands[4*(M+1)+3] /= bands[4*(M+1)+2];
992   bands[4*(M+1)+2] = 1.0;
993 
994   coefs[(M+1)*cstride] = bands[4*(M+1)+3];
995   // Now back substitute up
996   for (int row=M; row>0; row--)
997     coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)];
998 
999   // Finish with first row
1000   coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride];
1001 }
1002 
1003 // On input, bands should be filled with:
1004 // row 0   :  abcdInitial from boundary conditions
1005 // rows 1:M:  basis functions in first 3 cols, data in last
1006 // row M+1 :  abcdFinal   from boundary conditions
1007 // cstride gives the stride between values in coefs.
1008 // On exit, coefs with contain interpolating B-spline coefs
1009 void
solve_periodic_interp_1d_d(double bands[],double coefs[],int M,int cstride)1010 solve_periodic_interp_1d_d (double bands[], double coefs[],
1011 			    int M, int cstride)
1012 {
1013   std::vector<double> lastCol(M);
1014 
1015   // Now solve:
1016   // First and last rows are different
1017   bands[4*(0)+2] /= bands[4*(0)+1];
1018   bands[4*(0)+0] /= bands[4*(0)+1];
1019   bands[4*(0)+3] /= bands[4*(0)+1];
1020   bands[4*(0)+1]  = 1.0;
1021   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
1022   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
1023   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
1024   lastCol[0] = bands[4*(0)+0];
1025 
1026   for (int row=1; row < (M-1); row++) {
1027     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
1028     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
1029     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
1030     bands[4*(row)+0] = 0.0;
1031     bands[4*(row)+2] /= bands[4*(row)+1];
1032     bands[4*(row)+3] /= bands[4*(row)+1];
1033     lastCol[row]  /= bands[4*(row)+1];
1034     bands[4*(row)+1]  = 1.0;
1035     if (row < (M-2)) {
1036       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
1037       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
1038       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
1039     }
1040   }
1041 
1042   // Now do last row
1043   // The [2] element and [0] element are now on top of each other
1044   bands[4*(M-1)+0] += bands[4*(M-1)+2];
1045   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
1046   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
1047   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
1048   coefs[M*cstride] = bands[4*(M-1)+3];
1049   for (int row=M-2; row>=0; row--)
1050     coefs[(row+1)*cstride] =
1051       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
1052 
1053   coefs[0*cstride] = coefs[M*cstride];
1054   coefs[(M+1)*cstride] = coefs[1*cstride];
1055   coefs[(M+2)*cstride] = coefs[2*cstride];
1056 }
1057 
1058 // On input, bands should be filled with:
1059 // row 0   :  abcdInitial from boundary conditions
1060 // rows 1:M:  basis functions in first 3 cols, data in last
1061 // row M+1 :  abcdFinal   from boundary conditions
1062 // cstride gives the stride between values in coefs.
1063 // On exit, coefs with contain interpolating B-spline coefs
1064 void
solve_antiperiodic_interp_1d_d(double bands[],double coefs[],int M,int cstride)1065 solve_antiperiodic_interp_1d_d (double bands[], double coefs[],
1066 			    int M, int cstride)
1067 {
1068   std::vector<double> lastCol(M);
1069 
1070   bands[4*0+0]     *= -1.0;
1071   bands[4*(M-1)+2] *= -1.0;
1072   // Now solve:
1073   // First and last rows are different
1074   bands[4*(0)+2] /= bands[4*(0)+1];
1075   bands[4*(0)+0] /= bands[4*(0)+1];
1076   bands[4*(0)+3] /= bands[4*(0)+1];
1077   bands[4*(0)+1]  = 1.0;
1078   bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
1079   bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
1080   bands[4*(M-1)+2]  = -bands[4*(M-1)+2]*bands[4*(0)+2];
1081   lastCol[0] = bands[4*(0)+0];
1082 
1083   for (int row=1; row < (M-1); row++) {
1084     bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
1085     bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
1086     lastCol[row]   = -bands[4*(row)+0] * lastCol[row-1];
1087     bands[4*(row)+0] = 0.0;
1088     bands[4*(row)+2] /= bands[4*(row)+1];
1089     bands[4*(row)+3] /= bands[4*(row)+1];
1090     lastCol[row]  /= bands[4*(row)+1];
1091     bands[4*(row)+1]  = 1.0;
1092     if (row < (M-2)) {
1093       bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
1094       bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
1095       bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
1096     }
1097   }
1098 
1099   // Now do last row
1100   // The [2] element and [0] element are now on top of each other
1101   bands[4*(M-1)+0] += bands[4*(M-1)+2];
1102   bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
1103   bands[4*(M-1)+3] -= bands[4*(M-1)+0] *  bands[4*(M-2)+3];
1104   bands[4*(M-1)+3] /= bands[4*(M-1)+1];
1105   coefs[M*cstride] = bands[4*(M-1)+3];
1106   for (int row=M-2; row>=0; row--)
1107     coefs[(row+1)*cstride] =
1108       bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride];
1109 
1110   coefs[0*cstride]     = -coefs[M*cstride];
1111   coefs[(M+1)*cstride] = -coefs[1*cstride];
1112   coefs[(M+2)*cstride] = -coefs[2*cstride];
1113 }
1114 
1115 
1116 
1117 void
find_coefs_1d_d(Ugrid grid,BCtype_d bc,double * data,intptr_t dstride,double * coefs,intptr_t cstride)1118 find_coefs_1d_d (Ugrid grid, BCtype_d bc,
1119 		 double *data,  intptr_t dstride,
1120 		 double *coefs, intptr_t cstride)
1121 {
1122   int M = grid.num;
1123   double basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0};
1124   if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) {
1125 #ifdef HAVE_C_VARARRAYS
1126     double bands[M*4];
1127 #else
1128     double *bands = new double[4*M];
1129 #endif
1130     for (int i=0; i<M; i++) {
1131       bands[4*i+0] = basis[0];
1132       bands[4*i+1] = basis[1];
1133       bands[4*i+2] = basis[2];
1134       bands[4*i+3] = data[i*dstride];
1135     }
1136     if (bc.lCode == ANTIPERIODIC)
1137       solve_antiperiodic_interp_1d_d (bands, coefs, M, cstride);
1138     else
1139       solve_periodic_interp_1d_d (bands, coefs, M, cstride);
1140 
1141 
1142 #ifndef HAVE_C_VARARRAYS
1143     delete[] bands;
1144 #endif
1145   }
1146   else {
1147     // Setup boundary conditions
1148     double abcd_left[4];
1149     double abcd_right[4];
1150     for (int i = 0; i < 4; i++) {
1151         abcd_left[i] = 0.0;
1152         abcd_right[i] = 0.0;
1153     }
1154     // Left boundary
1155     if (bc.lCode == FLAT || bc.lCode == NATURAL)
1156       bc.lVal = 0.0;
1157     if (bc.lCode == FLAT || bc.lCode == DERIV1) {
1158       abcd_left[0] = -0.5 * grid.delta_inv;
1159       abcd_left[1] =  0.0 * grid.delta_inv;
1160       abcd_left[2] =  0.5 * grid.delta_inv;
1161       abcd_left[3] =  bc.lVal;
1162     }
1163     if (bc.lCode == NATURAL || bc.lCode == DERIV2) {
1164       abcd_left[0] = 1.0 * grid.delta_inv * grid.delta_inv;
1165       abcd_left[1] =-2.0 * grid.delta_inv * grid.delta_inv;
1166       abcd_left[2] = 1.0 * grid.delta_inv * grid.delta_inv;
1167       abcd_left[3] = bc.lVal;
1168     }
1169 
1170     // Right boundary
1171     if (bc.rCode == FLAT || bc.rCode == NATURAL)
1172       bc.rVal = 0.0;
1173     if (bc.rCode == FLAT || bc.rCode == DERIV1) {
1174       abcd_right[0] = -0.5 * grid.delta_inv;
1175       abcd_right[1] =  0.0 * grid.delta_inv;
1176       abcd_right[2] =  0.5 * grid.delta_inv;
1177       abcd_right[3] =  bc.rVal;
1178     }
1179     if (bc.rCode == NATURAL || bc.rCode == DERIV2) {
1180       abcd_right[0] = 1.0 *grid.delta_inv * grid.delta_inv;
1181       abcd_right[1] =-2.0 *grid.delta_inv * grid.delta_inv;
1182       abcd_right[2] = 1.0 *grid.delta_inv * grid.delta_inv;
1183       abcd_right[3] = bc.rVal;
1184     }
1185 #ifdef HAVE_C_VARARRAYS
1186     double bands[(M+2)*4];
1187 #else
1188     double *bands = new double[(M+2)*4];
1189 #endif
1190     for (int i=0; i<4; i++) {
1191       bands[4*( 0 )+i] = abcd_left[i];
1192       bands[4*(M+1)+i] = abcd_right[i];
1193     }
1194     for (int i=0; i<M; i++) {
1195       for (int j=0; j<3; j++)
1196 	bands[4*(i+1)+j] = basis[j];
1197       bands[4*(i+1)+3] = data[i*dstride];
1198     }
1199     // Now, solve for coefficients
1200     solve_deriv_interp_1d_d (bands, coefs, M, cstride);
1201 #ifndef HAVE_C_VARARRAYS
1202     delete[] bands;
1203 #endif
1204   }
1205 }
1206 
1207 
1208 
1209 UBspline_1d_d*
create_UBspline_1d_d(Ugrid x_grid,BCtype_d xBC,double * data)1210 create_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, double *data)
1211 {
1212   // Create new spline
1213   UBspline_1d_d* restrict spline = new UBspline_1d_d;
1214   spline->spcode = U1D;
1215   spline->tcode  = DOUBLE_REAL;
1216   spline->xBC = xBC;
1217 
1218   // Setup internal variables
1219   int M = x_grid.num;
1220   int N;
1221 
1222   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1223     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1224     N = M+3;
1225   }
1226   else {
1227     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1228     N = M+2;
1229   }
1230 
1231   x_grid.delta_inv = 1.0/x_grid.delta;
1232   spline->x_grid   = x_grid;
1233 
1234 #ifndef HAVE_SSE2
1235   spline->coefs = (double*)malloc (sizeof(double)*N);
1236 #else
1237   posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*N);
1238 #endif
1239   find_coefs_1d_d (spline->x_grid, xBC, data, 1, spline->coefs, 1);
1240 
1241   init_sse_data();
1242   return spline;
1243 }
1244 
1245 void
recompute_UBspline_1d_d(UBspline_1d_d * spline,double * data)1246 recompute_UBspline_1d_d (UBspline_1d_d* spline, double *data)
1247 {
1248   find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1);
1249 }
1250 
1251 
1252 UBspline_2d_d*
create_UBspline_2d_d(Ugrid x_grid,Ugrid y_grid,BCtype_d xBC,BCtype_d yBC,double * data)1253 create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid,
1254 		      BCtype_d xBC, BCtype_d yBC, double *data)
1255 {
1256   // Create new spline
1257   UBspline_2d_d* restrict spline = new UBspline_2d_d;
1258   spline->spcode = U2D;
1259   spline->tcode  = DOUBLE_REAL;
1260   spline->xBC = xBC;
1261   spline->yBC = yBC;
1262 
1263   // Setup internal variables
1264   int Mx = x_grid.num;
1265   int My = y_grid.num;
1266   int Nx, Ny;
1267 
1268   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
1269   else                           Nx = Mx+2;
1270   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1271   x_grid.delta_inv = 1.0/x_grid.delta;
1272   spline->x_grid   = x_grid;
1273 
1274   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
1275   else                           Ny = My+2;
1276   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1277   y_grid.delta_inv = 1.0/y_grid.delta;
1278   spline->y_grid   = y_grid;
1279   spline->x_stride = Ny;
1280 
1281 #ifndef HAVE_SSE2
1282   spline->coefs = (double*)malloc (sizeof(double)*Nx*Ny);
1283 #else
1284   posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny));
1285 #endif
1286 
1287   // First, solve in the X-direction
1288   for (int iy=0; iy<My; iy++) {
1289     intptr_t doffset = iy;
1290     intptr_t coffset = iy;
1291     find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My,
1292 		     spline->coefs+coffset, Ny);
1293   }
1294 
1295   // Now, solve in the Y-direction
1296   for (int ix=0; ix<Nx; ix++) {
1297     intptr_t doffset = ix*Ny;
1298     intptr_t coffset = ix*Ny;
1299     find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, 1,
1300 		     spline->coefs+coffset, 1);
1301   }
1302 
1303   init_sse_data();
1304   return spline;
1305 }
1306 
1307 void
recompute_UBspline_2d_d(UBspline_2d_d * spline,double * data)1308 recompute_UBspline_2d_d (UBspline_2d_d* spline, double *data)
1309 {
1310   int Mx = spline->x_grid.num;
1311   int My = spline->y_grid.num;
1312   int Nx, Ny;
1313 
1314   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1315     Nx = Mx+3;
1316   else
1317     Nx = Mx+2;
1318 
1319   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1320     Ny = My+3;
1321   else
1322     Ny = My+2;
1323 
1324   // First, solve in the X-direction
1325   for (int iy=0; iy<My; iy++) {
1326     intptr_t doffset = iy;
1327     intptr_t coffset = iy;
1328     find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My,
1329 		     spline->coefs+coffset, Ny);
1330   }
1331 
1332   // Now, solve in the Y-direction
1333   for (int ix=0; ix<Nx; ix++) {
1334     intptr_t doffset = ix*Ny;
1335     intptr_t coffset = ix*Ny;
1336     find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, 1,
1337 		     spline->coefs+coffset, 1);
1338   }
1339 }
1340 
1341 
1342 UBspline_3d_d*
create_UBspline_3d_d(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_d xBC,BCtype_d yBC,BCtype_d zBC,double * data)1343 create_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1344 		      BCtype_d xBC, BCtype_d yBC, BCtype_d zBC,
1345 		      double *data)
1346 {
1347   // Create new spline
1348   UBspline_3d_d* restrict spline = new UBspline_3d_d;
1349   spline->spcode = U3D;
1350   spline->tcode  = DOUBLE_REAL;
1351   spline->xBC = xBC;
1352   spline->yBC = yBC;
1353   spline->zBC = zBC;
1354 
1355   // Setup internal variables
1356   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
1357   int Nx, Ny, Nz;
1358 
1359   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
1360   else                           Nx = Mx+2;
1361   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1362   x_grid.delta_inv = 1.0/x_grid.delta;
1363   spline->x_grid   = x_grid;
1364 
1365   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
1366   else                           Ny = My+2;
1367   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1368   y_grid.delta_inv = 1.0/y_grid.delta;
1369   spline->y_grid   = y_grid;
1370 
1371   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
1372   else                           Nz = Mz+2;
1373   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1374   z_grid.delta_inv = 1.0/z_grid.delta;
1375   spline->z_grid   = z_grid;
1376 
1377   spline->x_stride = Ny*Nz;
1378   spline->y_stride = Nz;
1379 
1380 #ifndef HAVE_SSE2
1381   spline->coefs      = new double[Nx*Ny*Nz];
1382 #else
1383   posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny*Nz));
1384 #endif
1385 
1386   // First, solve in the X-direction
1387   for (int iy=0; iy<My; iy++)
1388     for (int iz=0; iz<Mz; iz++) {
1389       intptr_t doffset = iy*Mz+iz;
1390       intptr_t coffset = iy*Nz+iz;
1391       find_coefs_1d_d (spline->x_grid, xBC, data+doffset, My*Mz,
1392 		       spline->coefs+coffset, Ny*Nz);
1393     }
1394 
1395   // Now, solve in the Y-direction
1396   for (int ix=0; ix<Nx; ix++)
1397     for (int iz=0; iz<Nz; iz++) {
1398       intptr_t doffset = ix*Ny*Nz + iz;
1399       intptr_t coffset = ix*Ny*Nz + iz;
1400       find_coefs_1d_d (spline->y_grid, yBC, spline->coefs+doffset, Nz,
1401 		       spline->coefs+coffset, Nz);
1402     }
1403 
1404   // Now, solve in the Z-direction
1405   for (int ix=0; ix<Nx; ix++)
1406     for (int iy=0; iy<Ny; iy++) {
1407       intptr_t doffset = (ix*Ny+iy)*Nz;
1408       intptr_t coffset = (ix*Ny+iy)*Nz;
1409       find_coefs_1d_d (spline->z_grid, zBC, spline->coefs+doffset, 1,
1410 		       spline->coefs+coffset, 1);
1411     }
1412 
1413   init_sse_data();
1414   return spline;
1415 }
1416 
1417 void
recompute_UBspline_3d_d(UBspline_3d_d * spline,double * data)1418 recompute_UBspline_3d_d (UBspline_3d_d* spline, double *data)
1419 {
1420   int Mx = spline->x_grid.num;
1421   int My = spline->y_grid.num;
1422   int Mz = spline->z_grid.num;
1423   int Nx, Ny, Nz;
1424 
1425   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1426     Nx = Mx+3;
1427   else
1428     Nx = Mx+2;
1429   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1430     Ny = My+3;
1431   else
1432     Ny = My+2;
1433   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)
1434     Nz = Mz+3;
1435   else
1436     Nz = Mz+2;
1437 
1438   // First, solve in the X-direction
1439   for (int iy=0; iy<My; iy++)
1440     for (int iz=0; iz<Mz; iz++) {
1441       intptr_t doffset = iy*Mz+iz;
1442       intptr_t coffset = iy*Nz+iz;
1443       find_coefs_1d_d (spline->x_grid, spline->xBC, data+doffset, My*Mz,
1444 		       spline->coefs+coffset, Ny*Nz);
1445     }
1446 
1447   // Now, solve in the Y-direction
1448   for (int ix=0; ix<Nx; ix++)
1449     for (int iz=0; iz<Nz; iz++) {
1450       intptr_t doffset = ix*Ny*Nz + iz;
1451       intptr_t coffset = ix*Ny*Nz + iz;
1452       find_coefs_1d_d (spline->y_grid, spline->yBC, spline->coefs+doffset, Nz,
1453 		       spline->coefs+coffset, Nz);
1454     }
1455 
1456   // Now, solve in the Z-direction
1457   for (int ix=0; ix<Nx; ix++)
1458     for (int iy=0; iy<Ny; iy++) {
1459       intptr_t doffset = (ix*Ny+iy)*Nz;
1460       intptr_t coffset = (ix*Ny+iy)*Nz;
1461       find_coefs_1d_d (spline->z_grid, spline->zBC, spline->coefs+doffset, 1,
1462 		       spline->coefs+coffset, 1);
1463     }
1464 }
1465 
1466 
1467 ////////////////////////////////////////////////////////////
1468 ////////////////////////////////////////////////////////////
1469 ////    Double-Precision, Complex Creation Routines     ////
1470 ////////////////////////////////////////////////////////////
1471 ////////////////////////////////////////////////////////////
1472 
1473 // On input, bands should be filled with:
1474 // row 0   :  abcdInitial from boundary conditions
1475 // rows 1:M:  basis functions in first 3 cols, data in last
1476 // row M+1 :  abcdFinal   from boundary conditions
1477 // cstride gives the stride between values in coefs.
1478 // On exit, coefs with contain interpolating B-spline coefs
1479 
1480 
1481 UBspline_1d_z*
create_UBspline_1d_z(Ugrid x_grid,BCtype_z xBC,complex_double * data)1482 create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data)
1483 {
1484   // Create new spline
1485   UBspline_1d_z* restrict spline = new UBspline_1d_z;
1486   spline->spcode = U1D;
1487   spline->tcode  = DOUBLE_COMPLEX;
1488   spline->xBC = xBC;
1489 
1490   // Setup internal variables
1491   int M = x_grid.num;
1492   int N;
1493 
1494   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) {
1495     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num);
1496     N = M+3;
1497   }
1498   else {
1499     x_grid.delta     = (x_grid.end-x_grid.start)/(double)(x_grid.num-1);
1500     N = M+2;
1501   }
1502 
1503   x_grid.delta_inv = 1.0/x_grid.delta;
1504   spline->x_grid   = x_grid;
1505 #ifndef HAVE_SSE2
1506   spline->coefs = (complex_double*)malloc (2*sizeof(double)*N);
1507 #else
1508   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*N);
1509 #endif
1510 
1511   BCtype_d xBC_r, xBC_i;
1512   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1513   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1514   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1515   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1516   // Real part
1517   find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2,
1518 		   (double*)spline->coefs, 2);
1519   // Imaginarty part
1520   find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2,
1521 		   ((double*)spline->coefs)+1, 2);
1522 
1523   init_sse_data();
1524   return spline;
1525 }
1526 
1527 void
recompute_UBspline_1d_z(UBspline_1d_z * spline,complex_double * data)1528 recompute_UBspline_1d_z (UBspline_1d_z* spline, complex_double *data)
1529 {
1530 //  int M = spline->x_grid.num;
1531 //  int N;
1532 
1533 //  if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1534 //    N = M+3;
1535 //  else
1536 //    N = M+2;
1537 
1538   BCtype_d xBC_r, xBC_i;
1539   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1540   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1541   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1542   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1543   // Real part
1544   find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2,
1545 		   (double*)spline->coefs, 2);
1546   // Imaginarty part
1547   find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2,
1548 		   ((double*)spline->coefs)+1, 2);
1549 
1550 }
1551 
1552 
1553 UBspline_2d_z*
create_UBspline_2d_z(Ugrid x_grid,Ugrid y_grid,BCtype_z xBC,BCtype_z yBC,complex_double * data)1554 create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid,
1555 		      BCtype_z xBC, BCtype_z yBC, complex_double *data)
1556 {
1557   // Create new spline
1558   UBspline_2d_z* restrict spline = new UBspline_2d_z;
1559   spline->spcode = U2D;
1560   spline->tcode  = DOUBLE_COMPLEX;
1561   spline->xBC = xBC;
1562   spline->yBC = yBC;
1563 
1564   // Setup internal variables
1565   int Mx = x_grid.num;
1566   int My = y_grid.num;
1567   int Nx, Ny;
1568 
1569   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)
1570     Nx = Mx+3;
1571   else
1572     Nx = Mx+2;
1573   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1574   x_grid.delta_inv = 1.0/x_grid.delta;
1575   spline->x_grid   = x_grid;
1576 
1577   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)
1578     Ny = My+3;
1579   else
1580     Ny = My+2;
1581   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1582   y_grid.delta_inv = 1.0/y_grid.delta;
1583   spline->y_grid   = y_grid;
1584   spline->x_stride = Ny;
1585 
1586 #ifndef HAVE_SSE2
1587   spline->coefs = (complex_double*)malloc (2*sizeof(double)*Nx*Ny);
1588 #else
1589   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny);
1590 #endif
1591 
1592   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1593   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1594   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1595   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1596   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1597   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
1598   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
1599   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
1600   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
1601   // First, solve in the X-direction
1602   for (int iy=0; iy<My; iy++) {
1603     intptr_t doffset = 2*iy;
1604     intptr_t coffset = 2*iy;
1605     // Real part
1606     find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My,
1607 		     (double*)spline->coefs+coffset, 2*Ny);
1608     // Imag part
1609     find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My,
1610 		     ((double*)spline->coefs)+coffset+1, 2*Ny);
1611   }
1612 
1613   // Now, solve in the Y-direction
1614   for (int ix=0; ix<Nx; ix++) {
1615     intptr_t doffset = 2*ix*Ny;
1616     intptr_t coffset = 2*ix*Ny;
1617     // Real part
1618     find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2,
1619 		     (double*)spline->coefs+coffset, 2);
1620     // Imag part
1621     find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2,
1622 		     ((double*)spline->coefs)+coffset+1, 2);
1623   }
1624 
1625   init_sse_data();
1626   return spline;
1627 }
1628 
1629 
1630 void
recompute_UBspline_2d_z(UBspline_2d_z * spline,complex_double * data)1631 recompute_UBspline_2d_z (UBspline_2d_z* spline, complex_double *data)
1632 {
1633   int Mx = spline->x_grid.num;
1634   int My = spline->y_grid.num;
1635   int Nx, Ny;
1636 
1637   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1638     Nx = Mx+3;
1639   else
1640     Nx = Mx+2;
1641   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1642     Ny = My+3;
1643   else
1644     Ny = My+2;
1645 
1646   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i;
1647   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1648   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1649   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1650   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1651   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
1652   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
1653   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
1654   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
1655 
1656   // First, solve in the X-direction
1657   for (int iy=0; iy<My; iy++) {
1658     intptr_t doffset = 2*iy;
1659     intptr_t coffset = 2*iy;
1660     // Real part
1661     find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data+doffset), 2*My,
1662 		     (double*)spline->coefs+coffset, 2*Ny);
1663     // Imag part
1664     find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My,
1665 		     ((double*)spline->coefs)+coffset+1, 2*Ny);
1666   }
1667 
1668   // Now, solve in the Y-direction
1669   for (int ix=0; ix<Nx; ix++) {
1670     intptr_t doffset = 2*ix*Ny;
1671     intptr_t coffset = 2*ix*Ny;
1672     // Real part
1673     find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2,
1674 		     (double*)spline->coefs+coffset, 2);
1675     // Imag part
1676     find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2,
1677 		     ((double*)spline->coefs)+coffset+1, 2);
1678   }
1679 }
1680 
1681 
1682 
1683 UBspline_3d_z*
create_UBspline_3d_z(Ugrid x_grid,Ugrid y_grid,Ugrid z_grid,BCtype_z xBC,BCtype_z yBC,BCtype_z zBC,complex_double * data)1684 create_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid,
1685 		      BCtype_z xBC, BCtype_z yBC, BCtype_z zBC,
1686 		      complex_double *data)
1687 {
1688   // Create new spline
1689   UBspline_3d_z* restrict spline = new UBspline_3d_z;
1690   spline->spcode = U3D;
1691   spline->tcode  = DOUBLE_COMPLEX;
1692   spline->xBC = xBC;
1693   spline->yBC = yBC;
1694   spline->zBC = zBC;
1695 
1696   // Setup internal variables
1697   int Mx = x_grid.num;  int My = y_grid.num; int Mz = z_grid.num;
1698   int Nx, Ny, Nz;
1699 
1700   if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC)     Nx = Mx+3;
1701   else                           Nx = Mx+2;
1702   x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3);
1703   x_grid.delta_inv = 1.0/x_grid.delta;
1704   spline->x_grid   = x_grid;
1705 
1706   if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC)     Ny = My+3;
1707   else                           Ny = My+2;
1708   y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3);
1709   y_grid.delta_inv = 1.0/y_grid.delta;
1710   spline->y_grid   = y_grid;
1711 
1712   if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC)     Nz = Mz+3;
1713   else                           Nz = Mz+2;
1714   z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3);
1715   z_grid.delta_inv = 1.0/z_grid.delta;
1716   spline->z_grid   = z_grid;
1717 
1718   spline->x_stride = Ny*Nz;
1719   spline->y_stride = Nz;
1720 
1721 #ifndef HAVE_SSE2
1722   spline->coefs      = new complex_double[Nx*Ny*Nz];
1723 #else
1724   posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny*Nz);
1725 #endif
1726 
1727   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1728   xBC_r.lCode = xBC.lCode;  xBC_r.rCode = xBC.rCode;
1729   xBC_r.lVal  = xBC.lVal_r; xBC_r.rVal  = xBC.rVal_r;
1730   xBC_i.lCode = xBC.lCode;  xBC_i.rCode = xBC.rCode;
1731   xBC_i.lVal  = xBC.lVal_i; xBC_i.rVal  = xBC.rVal_i;
1732   yBC_r.lCode = yBC.lCode;  yBC_r.rCode = yBC.rCode;
1733   yBC_r.lVal  = yBC.lVal_r; yBC_r.rVal  = yBC.rVal_r;
1734   yBC_i.lCode = yBC.lCode;  yBC_i.rCode = yBC.rCode;
1735   yBC_i.lVal  = yBC.lVal_i; yBC_i.rVal  = yBC.rVal_i;
1736   zBC_r.lCode = zBC.lCode;  zBC_r.rCode = zBC.rCode;
1737   zBC_r.lVal  = zBC.lVal_r; zBC_r.rVal  = zBC.rVal_r;
1738   zBC_i.lCode = zBC.lCode;  zBC_i.rCode = zBC.rCode;
1739   zBC_i.lVal  = zBC.lVal_i; zBC_i.rVal  = zBC.rVal_i;
1740   // First, solve in the X-direction
1741   for (int iy=0; iy<My; iy++)
1742     for (int iz=0; iz<Mz; iz++) {
1743       intptr_t doffset = 2*(iy*Mz+iz);
1744       intptr_t coffset = 2*(iy*Nz+iz);
1745       // Real part
1746       find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz,
1747 		       ((double*)spline->coefs)+coffset, 2*Ny*Nz);
1748       // Imag part
1749       find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
1750 		       ((double*)spline->coefs)+coffset+1, 2*Ny*Nz);
1751     }
1752 
1753   // Now, solve in the Y-direction
1754   for (int ix=0; ix<Nx; ix++)
1755     for (int iz=0; iz<Nz; iz++) {
1756       intptr_t doffset = 2*(ix*Ny*Nz + iz);
1757       intptr_t coffset = 2*(ix*Ny*Nz + iz);
1758       // Real part
1759       find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz,
1760 		       ((double*)spline->coefs)+coffset, 2*Nz);
1761       // Imag part
1762       find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz,
1763 		       ((double*)spline->coefs)+coffset+1, 2*Nz);
1764     }
1765 
1766   // Now, solve in the Z-direction
1767   for (int ix=0; ix<Nx; ix++)
1768     for (int iy=0; iy<Ny; iy++) {
1769       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
1770       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
1771       // Real part
1772       find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2,
1773 		       ((double*)spline->coefs)+coffset, 2);
1774       // Imag part
1775       find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2,
1776 		       ((double*)spline->coefs)+coffset+1, 2);
1777     }
1778   init_sse_data();
1779   return spline;
1780 }
1781 
1782 void
recompute_UBspline_3d_z(UBspline_3d_z * spline,complex_double * data)1783 recompute_UBspline_3d_z (UBspline_3d_z* spline, complex_double *data)
1784 {
1785   // Setup internal variables
1786   int Mx = spline->x_grid.num;
1787   int My = spline->y_grid.num;
1788   int Mz = spline->z_grid.num;
1789   int Nx, Ny, Nz;
1790 
1791   if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC)
1792     Nx = Mx+3;
1793   else
1794     Nx = Mx+2;
1795   if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC)
1796     Ny = My+3;
1797   else
1798     Ny = My+2;
1799   if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC)
1800     Nz = Mz+3;
1801   else
1802     Nz = Mz+2;
1803 
1804   BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i;
1805   xBC_r.lCode = spline->xBC.lCode;  xBC_r.rCode = spline->xBC.rCode;
1806   xBC_r.lVal  = spline->xBC.lVal_r; xBC_r.rVal  = spline->xBC.rVal_r;
1807   xBC_i.lCode = spline->xBC.lCode;  xBC_i.rCode = spline->xBC.rCode;
1808   xBC_i.lVal  = spline->xBC.lVal_i; xBC_i.rVal  = spline->xBC.rVal_i;
1809   yBC_r.lCode = spline->yBC.lCode;  yBC_r.rCode = spline->yBC.rCode;
1810   yBC_r.lVal  = spline->yBC.lVal_r; yBC_r.rVal  = spline->yBC.rVal_r;
1811   yBC_i.lCode = spline->yBC.lCode;  yBC_i.rCode = spline->yBC.rCode;
1812   yBC_i.lVal  = spline->yBC.lVal_i; yBC_i.rVal  = spline->yBC.rVal_i;
1813   zBC_r.lCode = spline->zBC.lCode;  zBC_r.rCode = spline->zBC.rCode;
1814   zBC_r.lVal  = spline->zBC.lVal_r; zBC_r.rVal  = spline->zBC.rVal_r;
1815   zBC_i.lCode = spline->zBC.lCode;  zBC_i.rCode = spline->zBC.rCode;
1816   zBC_i.lVal  = spline->zBC.lVal_i; zBC_i.rVal  = spline->zBC.rVal_i;
1817   // First, solve in the X-direction
1818   for (int iy=0; iy<My; iy++)
1819     for (int iz=0; iz<Mz; iz++) {
1820       intptr_t doffset = 2*(iy*Mz+iz);
1821       intptr_t coffset = 2*(iy*Nz+iz);
1822       // Real part
1823       find_coefs_1d_d (spline->x_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz,
1824 		       ((double*)spline->coefs)+coffset, 2*Ny*Nz);
1825       // Imag part
1826       find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz,
1827 		       ((double*)spline->coefs)+coffset+1, 2*Ny*Nz);
1828     }
1829 
1830   // Now, solve in the Y-direction
1831   for (int ix=0; ix<Nx; ix++)
1832     for (int iz=0; iz<Nz; iz++) {
1833       intptr_t doffset = 2*(ix*Ny*Nz + iz);
1834       intptr_t coffset = 2*(ix*Ny*Nz + iz);
1835       // Real part
1836       find_coefs_1d_d (spline->y_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz,
1837 		       ((double*)spline->coefs)+coffset, 2*Nz);
1838       // Imag part
1839       find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz,
1840 		       ((double*)spline->coefs)+coffset+1, 2*Nz);
1841     }
1842 
1843   // Now, solve in the Z-direction
1844   for (int ix=0; ix<Nx; ix++)
1845     for (int iy=0; iy<Ny; iy++) {
1846       intptr_t doffset = 2*((ix*Ny+iy)*Nz);
1847       intptr_t coffset = 2*((ix*Ny+iy)*Nz);
1848       // Real part
1849       find_coefs_1d_d (spline->z_grid, zBC_r, ((double*)spline->coefs)+doffset, 2,
1850 		       ((double*)spline->coefs)+coffset, 2);
1851       // Imag part
1852       find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2,
1853 		       ((double*)spline->coefs)+coffset+1, 2);
1854     }
1855 }
1856 
1857 
1858 void
destroy_UBspline(Bspline * spline)1859 destroy_UBspline (Bspline *spline)
1860 {
1861   free(spline->coefs);
1862   delete spline;
1863 }
1864 
1865 void
1866 destroy_NUBspline (Bspline *spline);
1867 
1868 void
1869 destroy_multi_UBspline (Bspline *spline);
1870 
1871 void
destroy_Bspline(void * spline)1872 destroy_Bspline (void *spline)
1873 {
1874   Bspline *sp = (Bspline *)spline;
1875   if (sp->sp_code <= U3D)
1876     destroy_UBspline (sp);
1877   else if (sp->sp_code <= NU3D)
1878     destroy_NUBspline (sp);
1879   else if (sp->sp_code <= MULTI_U3D)
1880     destroy_multi_UBspline (sp);
1881   else
1882     fprintf (stderr, "Error in destroy_Bspline:  invalid spline code %d.\n",
1883 	     sp->sp_code);
1884 }
1885