1 /*
2 * Copyright (c) 2002, 2020 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 /* Nonequispaced fast cosine transform */
20
21 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
22
23 /* configure header */
24 #include "config.h"
25
26 /* complex datatype (maybe) */
27 #ifdef HAVE_COMPLEX_H
28 #include<complex.h>
29 #endif
30
31 /* NFFT headers */
32 #include "nfft3.h"
33 #include "infft.h"
34
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38
39 #ifdef OMP_ASSERT
40 #include <assert.h>
41 #endif
42
43 #undef X
44 #define X(name) NFCT(name)
45
46 /** Compute aggregated product of integer array. */
intprod(const INT * vec,const INT a,const INT d)47 static inline INT intprod(const INT *vec, const INT a, const INT d)
48 {
49 INT t, p;
50
51 p = 1;
52 for (t = 0; t < d; t++)
53 p *= vec[t] - a;
54
55 return p;
56 }
57
58 /* handy shortcuts */
59 #define BASE(x) COS(x)
60 #define NN(x) (x - 1)
61 #define OFFSET 0
62 #define FOURIER_TRAFO FFTW_REDFT00
63 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
64
65 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
66
67 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
68 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
69 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
70 - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
71 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
72
73 /**
74 * Direct computation of non equispaced cosine transforms
75 * nfct_trafo_direct, nfct_adjoint_direct
76 * require O(M N^d) arithemtical operations
77 *
78 * direct computation of the nfct_trafo_direct, formula (1.1)
79 * nfct_trafo_direct:
80 * for j=0,...,M-1
81 * f[j] = sum_{k in I_N^d} f_hat[k] * cos(2 (pi) k x[j])
82 *
83 * direct computation of the nfft_adjoint_direct, formula (1.2)
84 * nfct_adjoint_direct:
85 * for k in I_N^d
86 * f_hat[k] = sum_{j=0}^{M-1} f[j] * cos(2 (pi) k x[j])
87 */
X(trafo_direct)88 void X(trafo_direct)(const X(plan) *ths)
89 {
90 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
91
92 memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
93
94 if (ths->d == 1)
95 {
96 /* specialize for univariate case, rationale: faster */
97 INT j;
98 #ifdef _OPENMP
99 #pragma omp parallel for default(shared) private(j)
100 #endif
101 for (j = 0; j < ths->M_total; j++)
102 {
103 INT k_L;
104 for (k_L = 0; k_L < ths->N_total; k_L++)
105 {
106 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
107 f[j] += f_hat[k_L] * BASE(omega);
108 }
109 }
110 }
111 else
112 {
113 /* multivariate case */
114 INT j;
115 #ifdef _OPENMP
116 #pragma omp parallel for default(shared) private(j)
117 #endif
118 for (j = 0; j < ths->M_total; j++)
119 {
120 R x[ths->d], omega, Omega[ths->d + 1];
121 INT t, t2, k_L, k[ths->d];
122 Omega[0] = K(1.0);
123 for (t = 0; t < ths->d; t++)
124 {
125 k[t] = OFFSET;
126 x[t] = K2PI * ths->x[j * ths->d + t];
127 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
128 }
129 omega = Omega[ths->d];
130
131 for (k_L = 0; k_L < ths->N_total; k_L++)
132 {
133 f[j] += f_hat[k_L] * omega;
134 {
135 for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
136 k[t] = OFFSET;
137
138 k[t]++;
139
140 for (t2 = t; t2 < ths->d; t2++)
141 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
142
143 omega = Omega[ths->d];
144 }
145 }
146 }
147 }
148 }
149
X(adjoint_direct)150 void X(adjoint_direct)(const X(plan) *ths)
151 {
152 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
153
154 memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
155
156 if (ths->d == 1)
157 {
158 /* specialize for univariate case, rationale: faster */
159 #ifdef _OPENMP
160 INT k_L;
161 #pragma omp parallel for default(shared) private(k_L)
162 for (k_L = 0; k_L < ths->N_total; k_L++)
163 {
164 INT j;
165 for (j = 0; j < ths->M_total; j++)
166 {
167 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
168 f_hat[k_L] += f[j] * BASE(omega);
169 }
170 }
171 #else
172 INT j;
173 for (j = 0; j < ths->M_total; j++)
174 {
175 INT k_L;
176 for (k_L = 0; k_L < ths->N_total; k_L++)
177 {
178 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
179 f_hat[k_L] += f[j] * BASE(omega);
180 }
181 }
182 #endif
183 }
184 else
185 {
186 /* multivariate case */
187 INT j, k_L;
188 #ifdef _OPENMP
189 #pragma omp parallel for default(shared) private(j, k_L)
190 for (k_L = 0; k_L < ths->N_total; k_L++)
191 {
192 INT k[ths->d], k_temp, t;
193
194 k_temp = k_L;
195
196 for (t = ths->d - 1; t >= 0; t--)
197 {
198 k[t] = k_temp % ths->N[t];
199 k_temp /= ths->N[t];
200 }
201
202 for (j = 0; j < ths->M_total; j++)
203 {
204 R omega = K(1.0);
205 for (t = 0; t < ths->d; t++)
206 omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
207 f_hat[k_L] += f[j] * omega;
208 }
209 }
210 #else
211 for (j = 0; j < ths->M_total; j++)
212 {
213 R x[ths->d], omega, Omega[ths->d+1];
214 INT t, t2, k[ths->d];
215 Omega[0] = K(1.0);
216 for (t = 0; t < ths->d; t++)
217 {
218 k[t] = OFFSET;
219 x[t] = K2PI * ths->x[j * ths->d + t];
220 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
221 }
222 omega = Omega[ths->d];
223 for (k_L = 0; k_L < ths->N_total; k_L++)
224 {
225 f_hat[k_L] += f[j] * omega;
226
227 for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
228 k[t] = OFFSET;
229
230 k[t]++;
231
232 for (t2 = t; t2 < ths->d; t2++)
233 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
234
235 omega = Omega[ths->d];
236 }
237 }
238 #endif
239 }
240 }
241
242 /** fast computation of non equispaced cosine transforms
243 * require O(N^d log(N) + M) arithemtical operations
244 *
245 * fast computation of the nfct_trafo, formula (1.1)
246 * nfct_trafo:
247 * for j=0,...,M-1
248 * f[j] = sum_{k in I_N^d} f_hat[k] * cos(2 (pi) k x[j])
249 *
250 * direct computation of the nfct_adjoint, formula (1.2)
251 * nfct_adjoint:
252 * for k in I_N^d
253 * f_hat[k] = sum_{j=0}^{M-1} f[j] * cos(2 (pi) k x[j])
254 */
255
256 /** macros and small sub routines for the fast transforms
257 */
258
259 /** computes 2m+2 indices for the matrix B
260 */
uo(const X (plan)* ths,const INT j,INT * up,INT * op,const INT act_dim)261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
262 const INT act_dim)
263 {
264 const R xj = ths->x[j * ths->d + act_dim];
265 INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
266
267 (*up) = c - (ths->m);
268 (*op) = c + 1 + (ths->m);
269 }
270
271 #define MACRO_D_compute_A \
272 { \
273 g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
274 }
275
276 #define MACRO_D_compute_T \
277 { \
278 f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
279 }
280
281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
282
283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
284
285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
286
287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
288
289 #define MACRO_init_k_ks \
290 { \
291 for (t = 0; t < ths->d; t++) \
292 { \
293 kg[t] = 0; \
294 } \
295 i = 0; \
296 }
297
298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
299 { \
300 for (t = i; t < ths->d; t++) \
301 { \
302 MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
303 kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
304 } \
305 }
306
307 #define MACRO_update_c_phi_inv_k_A(which_phi) \
308 { \
309 c_phi_inv_k[t+1] = (kg[t] == 0 ? K(1.0) : K(0.5)) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
310 }
311
312 #define MACRO_update_c_phi_inv_k_T(which_phi) \
313 { \
314 c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \
315 }
316
317 #define MACRO_count_k_ks \
318 { \
319 kg[ths->d - 1]++; \
320 i = ths->d - 1; \
321 \
322 while ((kg[i] == ths->N[i]) && (i > 0)) \
323 { \
324 kg[i - 1]++; \
325 kg[i] = 0; \
326 i--; \
327 } \
328 }
329
330 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */
331 #define MACRO_D(which_one) \
332 static inline void D_ ## which_one (X(plan) *ths) \
333 { \
334 R *g_hat, *f_hat; /* local copy */ \
335 R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
336 INT t; /* index dimensions */ \
337 INT i; \
338 INT k_L; /* plain index */ \
339 INT kg[ths->d]; /* multi index in g_hat */ \
340 INT kg_plain[ths->d+1]; /* postfix plain index */ \
341 \
342 f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
343 MACRO_D_init_result_ ## which_one; \
344 \
345 c_phi_inv_k[0] = K(1.0); \
346 kg_plain[0] = 0; \
347 \
348 MACRO_init_k_ks; \
349 \
350 if (ths->flags & PRE_PHI_HUT) \
351 { \
352 for (k_L = 0; k_L < ths->N_total; k_L++) \
353 { \
354 MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
355 MACRO_D_compute_ ## which_one; \
356 MACRO_count_k_ks; \
357 } \
358 } \
359 else \
360 { \
361 for (k_L = 0; k_L < ths->N_total; k_L++) \
362 { \
363 MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
364 MACRO_D_compute_ ## which_one; \
365 MACRO_count_k_ks; \
366 } \
367 } \
368 }
369
370 MACRO_D(A)
MACRO_D(T)371 MACRO_D(T)
372
373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
376
377 #define MACRO_B_PRE_FULL_PSI_compute_A \
378 { \
379 (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
380 }
381
382 #define MACRO_B_PRE_FULL_PSI_compute_T \
383 { \
384 R factor = K(1.0); \
385 INT d = ths->psi_index_g[ix]; \
386 for (t = ths->d - 1; t >= 0; t--) \
387 { \
388 INT m = d % ths->n[t]; \
389 if (m != 0 && m != ths->n[t] - 1) \
390 factor *= K(0.5); \
391 d = d / ths->n[t]; \
392 } \
393 g[ths->psi_index_g[ix]] += factor * ths->psi[ix] * (*fj); \
394 }
395
396 #define MACRO_B_compute_A \
397 { \
398 (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
399 }
400
401 #define MACRO_B_compute_T \
402 { \
403 g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
404 }
405
406 #define MACRO_init_uo_l_lj_t \
407 { \
408 for (t2 = 0; t2 < ths->d; t2++) \
409 { \
410 uo(ths, j, &u[t2], &o[t2], t2); \
411 \
412 /* determine index in g-array corresponding to u[(t2)] */ \
413 if (u[(t2)] < 0) \
414 lg_offset[(t2)] = \
415 (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
416 else \
417 lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
418 if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
419 lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
420 \
421 if (lg_offset[t2] <= 0) \
422 { \
423 l[t2] = -lg_offset[t2]; \
424 count_lg[t2] = -1; \
425 } \
426 else \
427 { \
428 l[t2] = +lg_offset[t2]; \
429 count_lg[t2] = +1; \
430 } \
431 \
432 lj[t2] = 0; \
433 } \
434 t2 = 0; \
435 }
436
437 #define FOO_A K(1.0)
438
439 #define FOO_T ((l[t] == 0 || l[t] == ths->n[t] - 1) ? K(1.0) : K(0.5))
440
441 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
442 { \
443 for (t = t2; t < ths->d; t++) \
444 { \
445 phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
446 ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t]; \
447 } \
448 }
449
450 #define MACRO_count_uo_l_lj_t \
451 { \
452 /* turn around if we hit one of the boundaries */ \
453 if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
454 count_lg[(ths->d-1)] *= -1; \
455 \
456 /* move array index */ \
457 l[(ths->d-1)] += count_lg[(ths->d-1)]; \
458 \
459 lj[ths->d - 1]++; \
460 t2 = ths->d - 1; \
461 \
462 while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
463 { \
464 lj[t2 - 1]++; \
465 lj[t2] = 0; \
466 /* ansonsten lg[i-1] verschieben */ \
467 \
468 /* turn around if we hit one of the boundaries */ \
469 if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
470 count_lg[(t2 - 1)] *= -1; \
471 /* move array index */ \
472 l[(t2 - 1)] += count_lg[(t2 - 1)]; \
473 \
474 /* lg[i] = anfangswert */ \
475 if (lg_offset[t2] <= 0) \
476 { \
477 l[t2] = -lg_offset[t2]; \
478 count_lg[t2] = -1; \
479 } \
480 else \
481 { \
482 l[t2] = +lg_offset[t2]; \
483 count_lg[t2] = +1; \
484 } \
485 \
486 t2--; \
487 } \
488 }
489
490 #define MACRO_B(which_one) \
491 static inline void B_ ## which_one (X(plan) *ths) \
492 { \
493 INT lprod; /* 'regular bandwidth' of matrix B */ \
494 INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
495 INT t, t2; /* index dimensions */ \
496 INT j; /* index nodes */ \
497 INT l_L, ix; /* index one row of B */ \
498 INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
499 INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
500 INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
501 R phi_prod[ths->d+1]; /* postfix product of PHI */ \
502 R *f, *g; /* local copy */ \
503 R *fj; /* local copy */ \
504 R y[ths->d]; \
505 R fg_psi[ths->d][2*ths->m+2]; \
506 R fg_exp_l[ths->d][2*ths->m+2]; \
507 INT l_fg,lj_fg; \
508 R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
509 R ip_w; \
510 INT ip_u; \
511 INT ip_s = ths->K/(ths->m+2); \
512 INT lg_offset[ths->d]; /* offset in g according to u */ \
513 INT count_lg[ths->d]; /* count summands (2m+2) */ \
514 \
515 f = (R*)ths->f; g = (R*)ths->g; \
516 \
517 MACRO_B_init_result_ ## which_one \
518 \
519 if (ths->flags & PRE_FULL_PSI) \
520 { \
521 for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
522 { \
523 for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
524 { \
525 MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
526 } \
527 } \
528 return; \
529 } \
530 \
531 phi_prod[0] = K(1.0); \
532 ll_plain[0] = 0; \
533 \
534 for (t = 0, lprod = 1; t < ths->d; t++) \
535 lprod *= (2 * ths->m + 2); \
536 \
537 if (ths->flags & PRE_PSI) \
538 { \
539 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
540 { \
541 MACRO_init_uo_l_lj_t; \
542 \
543 for (l_L = 0; l_L < lprod; l_L++) \
544 { \
545 MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
546 \
547 MACRO_B_compute_ ## which_one; \
548 \
549 MACRO_count_uo_l_lj_t; \
550 } /* for(l_L) */ \
551 } /* for(j) */ \
552 return; \
553 } /* if(PRE_PSI) */ \
554 \
555 if (ths->flags & PRE_FG_PSI) \
556 { \
557 for (t = 0; t < ths->d; t++) \
558 { \
559 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
560 tmpEXP2sq = tmpEXP2 * tmpEXP2; \
561 tmp2 = K(1.0); \
562 tmp3 = K(1.0); \
563 fg_exp_l[t][0] = K(1.0); \
564 \
565 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
566 { \
567 tmp3 = tmp2 * tmpEXP2; \
568 tmp2 *= tmpEXP2sq; \
569 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
570 } \
571 } \
572 \
573 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
574 { \
575 MACRO_init_uo_l_lj_t; \
576 \
577 for (t = 0; t < ths->d; t++) \
578 { \
579 fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
580 tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
581 tmp1 = K(1.0); \
582 \
583 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
584 { \
585 tmp1 *= tmpEXP1; \
586 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
587 } \
588 } \
589 \
590 for (l_L= 0; l_L < lprod; l_L++) \
591 { \
592 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
593 \
594 MACRO_B_compute_ ## which_one; \
595 \
596 MACRO_count_uo_l_lj_t; \
597 } \
598 } \
599 return; \
600 } \
601 \
602 if (ths->flags & FG_PSI) \
603 { \
604 for (t = 0; t < ths->d; t++) \
605 { \
606 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
607 tmpEXP2sq = tmpEXP2 * tmpEXP2; \
608 tmp2 = K(1.0); \
609 tmp3 = K(1.0); \
610 fg_exp_l[t][0] = K(1.0); \
611 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
612 { \
613 tmp3 = tmp2 * tmpEXP2; \
614 tmp2 *= tmpEXP2sq; \
615 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
616 } \
617 } \
618 \
619 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
620 { \
621 MACRO_init_uo_l_lj_t; \
622 \
623 for (t = 0; t < ths->d; t++) \
624 { \
625 fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
626 \
627 tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
628 tmp1 = K(1.0); \
629 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
630 { \
631 tmp1 *= tmpEXP1; \
632 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
633 } \
634 } \
635 \
636 for (l_L = 0; l_L < lprod; l_L++) \
637 { \
638 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
639 \
640 MACRO_B_compute_ ## which_one; \
641 \
642 MACRO_count_uo_l_lj_t; \
643 } \
644 } \
645 return; \
646 } \
647 \
648 if (ths->flags & PRE_LIN_PSI) \
649 { \
650 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
651 { \
652 MACRO_init_uo_l_lj_t; \
653 \
654 for (t = 0; t < ths->d; t++) \
655 { \
656 y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
657 * ((R)ths->K))/(ths->m + 2); \
658 ip_u = LRINT(FLOOR(y[t])); \
659 ip_w = y[t]-ip_u; \
660 for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
661 { \
662 fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
663 * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
664 * (ip_w); \
665 } \
666 } \
667 \
668 for (l_L = 0; l_L < lprod; l_L++) \
669 { \
670 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
671 \
672 MACRO_B_compute_ ## which_one; \
673 \
674 MACRO_count_uo_l_lj_t; \
675 } /* for(l_L) */ \
676 } /* for(j) */ \
677 return; \
678 } /* if(PRE_LIN_PSI) */ \
679 \
680 /* no precomputed psi at all */ \
681 for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
682 { \
683 MACRO_init_uo_l_lj_t; \
684 \
685 for (l_L = 0; l_L < lprod; l_L++) \
686 { \
687 MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
688 \
689 MACRO_B_compute_ ## which_one; \
690 \
691 MACRO_count_uo_l_lj_t; \
692 } /* for (l_L) */ \
693 } /* for (j) */ \
694 } /* B */
695
696 MACRO_B(A)
697 MACRO_B(T)
698
699 /**
700 * user routines
701 */
702 void X(trafo)(X(plan) *ths)
703 {
704 switch(ths->d)
705 {
706 default:
707 {
708 /* use ths->my_fftw_r2r_plan */
709 ths->g_hat = ths->g1;
710 ths->g = ths->g2;
711
712 /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
713 * k \in I_N \f$ */
714 TIC(0)
715 D_A(ths);
716 TOC(0)
717
718 /* Compute by d-variate discrete Fourier transform
719 * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
720 * \text{ for } l \in I_n \f$ */
721 TIC_FFTW(1)
722 FFTW(execute)(ths->my_fftw_r2r_plan);
723 TOC_FFTW(1)
724
725 /*if (ths->flags & PRE_FULL_PSI)
726 full_psi__A(ths);*/
727
728 /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
729 * \text{ for } j=0,\dots,M-1 \f$ */
730 TIC(2)
731 B_A(ths);
732 TOC(2)
733
734 /*if (ths->flags & PRE_FULL_PSI)
735 {
736 Y(free)(ths->psi_index_g);
737 Y(free)(ths->psi_index_f);
738 }*/
739 }
740 }
741 } /* trafo */
742
X(adjoint)743 void X(adjoint)(X(plan) *ths)
744 {
745 switch(ths->d)
746 {
747 default:
748 {
749 /* use ths->my_fftw_plan */
750 ths->g_hat = ths->g2;
751 ths->g = ths->g1;
752
753 /*if (ths->flags & PRE_FULL_PSI)
754 full_psi__T(ths);*/
755
756 /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
757 * \text{ for } l \in I_n,m(x_j) \f$ */
758 TIC(2)
759 B_T(ths);
760 TOC(2)
761
762 /* Compute by d-variate discrete cosine transform
763 * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
764 * \text{ for } k \in I_N\f$ */
765 TIC_FFTW(1)
766 FFTW(execute)(ths->my_fftw_r2r_plan);
767 TOC_FFTW(1)
768
769 /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
770 * k \in I_N \f$ */
771 TIC(0)
772 D_T(ths);
773 TOC(0)
774 }
775 }
776 } /* adjoint */
777
778 /** initialisation of direct transform
779 */
precompute_phi_hut(X (plan)* ths)780 static inline void precompute_phi_hut(X(plan) *ths)
781 {
782 INT ks[ths->d]; /* index over all frequencies */
783 INT t; /* index over all dimensions */
784
785 ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
786
787 for (t = 0; t < ths->d; t++)
788 {
789 ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
790
791 for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
792 {
793 ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
794 }
795 }
796 } /* phi_hut */
797
798 /** create a lookup table, but NOT for each node
799 * good idea K=2^xx
800 * TODO: estimate K, call from init
801 * assumes an EVEN window function
802 */
X(precompute_lin_psi)803 void X(precompute_lin_psi)(X(plan) *ths)
804 {
805 INT t; /**< index over all dimensions */
806 INT j; /**< index over all nodes */
807 R step; /**< step size in [0,(m+2)/n] */
808
809 for (t = 0; t < ths->d; t++)
810 {
811 step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
812
813 for (j = 0; j <= ths->K; j++)
814 {
815 ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
816 } /* for(j) */
817 } /* for(t) */
818 }
819
X(precompute_fg_psi)820 void X(precompute_fg_psi)(X(plan) *ths)
821 {
822 INT t; /* index over all dimensions */
823 INT u, o; /* depends on x_j */
824
825 // sort(ths);
826
827 for (t = 0; t < ths->d; t++)
828 {
829 INT j;
830 // #pragma omp parallel for default(shared) private(j,u,o)
831 for (j = 0; j < ths->M_total; j++)
832 {
833 uo(ths, j, &u, &o, t);
834
835 ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
836 ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
837 } /* for(j) */
838 }
839 /* for(t) */
840 } /* nfft_precompute_fg_psi */
841
X(precompute_psi)842 void X(precompute_psi)(X(plan) *ths)
843 {
844 INT t; /* index over all dimensions */
845 INT lj; /* index 0<=lj<u+o+1 */
846 INT u, o; /* depends on x_j */
847
848 //sort(ths);
849
850 for (t = 0; t < ths->d; t++)
851 {
852 INT j;
853
854 for (j = 0; j < ths->M_total; j++)
855 {
856 uo(ths, j, &u, &o, t);
857
858 for(lj = 0; lj < (2 * ths->m + 2); lj++)
859 ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
860 (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
861 } /* for (j) */
862 } /* for (t) */
863 } /* precompute_psi */
864
X(precompute_full_psi)865 void X(precompute_full_psi)(X(plan) *ths)
866 {
867 //#ifdef _OPENMP
868 // sort(ths);
869 //
870 // nfft_precompute_full_psi_omp(ths);
871 //#else
872 INT t, t2; /* index over all dimensions */
873 INT j; /* index over all nodes */
874 INT l_L; /* plain index 0 <= l_L < lprod */
875 INT l[ths->d]; /* multi index u<=l<=o */
876 INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
877 INT ll_plain[ths->d+1]; /* postfix plain index */
878 INT lprod; /* 'bandwidth' of matrix B */
879 INT u[ths->d], o[ths->d]; /* depends on x_j */
880 INT count_lg[ths->d];
881 INT lg_offset[ths->d];
882
883 R phi_prod[ths->d+1];
884
885 INT ix, ix_old;
886
887 //sort(ths);
888
889 phi_prod[0] = K(1.0);
890 ll_plain[0] = 0;
891
892 for (t = 0, lprod = 1; t < ths->d; t++)
893 lprod *= 2 * ths->m + 2;
894
895 for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
896 {
897 MACRO_init_uo_l_lj_t;
898
899 for (l_L = 0; l_L < lprod; l_L++, ix++)
900 {
901 MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
902
903 ths->psi_index_g[ix] = ll_plain[ths->d];
904 ths->psi[ix] = phi_prod[ths->d];
905
906 MACRO_count_uo_l_lj_t;
907 } /* for (l_L) */
908
909 ths->psi_index_f[j] = ix - ix_old;
910 ix_old = ix;
911 } /* for(j) */
912 //#endif
913 }
914
X(precompute_one_psi)915 void X(precompute_one_psi)(X(plan) *ths)
916 {
917 if(ths->flags & PRE_PSI)
918 X(precompute_psi)(ths);
919 if(ths->flags & PRE_FULL_PSI)
920 X(precompute_full_psi)(ths);
921 if(ths->flags & PRE_FG_PSI)
922 X(precompute_fg_psi)(ths);
923 if(ths->flags & PRE_LIN_PSI)
924 X(precompute_lin_psi)(ths);
925 }
926
init_help(X (plan)* ths)927 static inline void init_help(X(plan) *ths)
928 {
929 INT t; /* index over all dimensions */
930 INT lprod; /* 'bandwidth' of matrix B */
931
932 if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
933 ths->flags |= NFFT_SORT_NODES;
934
935 ths->N_total = intprod(ths->N, OFFSET, ths->d);
936 ths->n_total = intprod(ths->n, 0, ths->d);
937
938 ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
939
940 for (t = 0; t < ths->d; t++)
941 ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
942
943 /* Assign r2r transform kinds for each dimension */
944 ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
945 for (t = 0; t < ths->d; t++)
946 ths->r2r_kind[t] = FOURIER_TRAFO;
947
948 WINDOW_HELP_INIT;
949
950 if (ths->flags & MALLOC_X)
951 ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
952
953 if (ths->flags & MALLOC_F_HAT)
954 ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
955
956 if (ths->flags & MALLOC_F)
957 ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
958
959 if (ths->flags & PRE_PHI_HUT)
960 precompute_phi_hut(ths);
961
962 if(ths->flags & PRE_LIN_PSI)
963 {
964 ths->K = (1U<< 10) * (ths->m+2);
965 ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
966 }
967
968 if(ths->flags & PRE_FG_PSI)
969 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
970
971 if (ths->flags & PRE_PSI)
972 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
973
974 if(ths->flags & PRE_FULL_PSI)
975 {
976 for (t = 0, lprod = 1; t < ths->d; t++)
977 lprod *= 2 * ths->m + 2;
978
979 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
980
981 ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
982 ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
983 }
984
985 if (ths->flags & FFTW_INIT)
986 {
987 ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
988
989 if (ths->flags & FFT_OUT_OF_PLACE)
990 ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
991 else
992 ths->g2 = ths->g1;
993
994 {
995 int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
996
997 for (t = 0; t < ths->d; t++)
998 _n[t] = (int)(ths->n[t]);
999
1000 ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
1001 Y(free)(_n);
1002 }
1003 }
1004
1005 // if(ths->flags & NFFT_SORT_NODES)
1006 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
1007 // else
1008 // ths->index_x = NULL;
1009
1010 ths->mv_trafo = (void (*) (void* ))X(trafo);
1011 ths->mv_adjoint = (void (*) (void* ))X(adjoint);
1012 }
1013
X(init)1014 void X(init)(X(plan) *ths, int d, int *N, int M_total)
1015 {
1016 int t; /* index over all dimensions */
1017
1018 ths->d = (INT)d;
1019
1020 ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1021
1022 for (t = 0; t < d; t++)
1023 ths->N[t] = (INT)N[t];
1024
1025 ths->M_total = (INT)M_total;
1026
1027 ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1028
1029 for (t = 0; t < d; t++)
1030 ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
1031
1032 ths->m = WINDOW_HELP_ESTIMATE_m;
1033
1034 if (d > 1)
1035 {
1036 //#ifdef _OPENMP
1037 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1038 // FFTW_INIT | NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
1039 //#else
1040 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1041 FFTW_INIT | NFFT_SORT_NODES;
1042 //#endif
1043 }
1044 else
1045 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1046 FFTW_INIT | FFT_OUT_OF_PLACE;
1047
1048 ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
1049
1050 init_help(ths);
1051 }
1052
X(init_guru)1053 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
1054 unsigned flags, unsigned fftw_flags)
1055 {
1056 INT t; /* index over all dimensions */
1057
1058 ths->d = (INT)d;
1059 ths->M_total = (INT)M_total;
1060 ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1061
1062 for (t = 0; t < d; t++)
1063 ths->N[t] = (INT)N[t];
1064
1065 ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1066
1067 for (t = 0; t < d; t++)
1068 ths->n[t] = (INT)n[t];
1069
1070 ths->m = (INT)m;
1071
1072 ths->flags = flags;
1073 ths->fftw_flags = fftw_flags;
1074
1075 init_help(ths);
1076 }
1077
X(init_1d)1078 void X(init_1d)(X(plan) *ths, int N1, int M_total)
1079 {
1080 int N[1];
1081
1082 N[0] = N1;
1083
1084 X(init)(ths, 1, N, M_total);
1085 }
1086
X(init_2d)1087 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
1088 {
1089 int N[2];
1090
1091 N[0] = N1;
1092 N[1] = N2;
1093
1094 X(init)(ths, 2, N, M_total);
1095 }
1096
X(init_3d)1097 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
1098 {
1099 int N[3];
1100
1101 N[0] = N1;
1102 N[1] = N2;
1103 N[2] = N3;
1104
1105 X(init)(ths, 3, N, M_total);
1106 }
1107
X(check)1108 const char* X(check)(X(plan) *ths)
1109 {
1110 INT j;
1111
1112 if (!ths->f)
1113 return "Member f not initialized.";
1114
1115 if (!ths->x)
1116 return "Member x not initialized.";
1117
1118 if (!ths->f_hat)
1119 return "Member f_hat not initialized.";
1120
1121 for (j = 0; j < ths->M_total * ths->d; j++)
1122 {
1123 if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
1124 {
1125 return "ths->x out of range [0.0,0.5)";
1126 }
1127 }
1128
1129 for (j = 0; j < ths->d; j++)
1130 {
1131 if (ths->sigma[j] <= 1)
1132 return "Oversampling factor too small";
1133
1134 if(ths->N[j] - 1 <= ths->m)
1135 return "Polynomial degree N is smaller than cut-off m";
1136 }
1137 return 0;
1138 }
1139
X(finalize)1140 void X(finalize)(X(plan) *ths)
1141 {
1142 INT t; /* index over dimensions */
1143
1144 // if(ths->flags & NFFT_SORT_NODES)
1145 // Y(free)(ths->index_x);
1146
1147 if (ths->flags & FFTW_INIT)
1148 {
1149 #ifdef _OPENMP
1150 #pragma omp critical (nfft_omp_critical_fftw_plan)
1151 #endif
1152 FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
1153
1154 if (ths->flags & FFT_OUT_OF_PLACE)
1155 Y(free)(ths->g2);
1156
1157 Y(free)(ths->g1);
1158 }
1159
1160 if(ths->flags & PRE_FULL_PSI)
1161 {
1162 Y(free)(ths->psi_index_g);
1163 Y(free)(ths->psi_index_f);
1164 Y(free)(ths->psi);
1165 }
1166
1167 if (ths->flags & PRE_PSI)
1168 Y(free)(ths->psi);
1169
1170 if(ths->flags & PRE_FG_PSI)
1171 Y(free)(ths->psi);
1172
1173 if(ths->flags & PRE_LIN_PSI)
1174 Y(free)(ths->psi);
1175
1176 if (ths->flags & PRE_PHI_HUT)
1177 {
1178 for (t = 0; t < ths->d; t++)
1179 Y(free)(ths->c_phi_inv[t]);
1180 Y(free)(ths->c_phi_inv);
1181 }
1182
1183 if (ths->flags & MALLOC_F)
1184 Y(free)(ths->f);
1185
1186 if(ths->flags & MALLOC_F_HAT)
1187 Y(free)(ths->f_hat);
1188
1189 if (ths->flags & MALLOC_X)
1190 Y(free)(ths->x);
1191
1192 WINDOW_HELP_FINALIZE;
1193
1194 Y(free)(ths->N);
1195 Y(free)(ths->n);
1196 Y(free)(ths->sigma);
1197
1198 Y(free)(ths->r2r_kind);
1199 } /* finalize */
1200