1 /*
2 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19
20 /* $Id: rexec2.c,v 1.20 2003/03/16 23:43:46 stevenj Exp $ */
21 /*
22 * rexec2.c -- alternate rfftw executor, specifically designed for the
23 * multidimensional transforms. Given an extra work array,
24 * expects complex data in FFTW_COMPLEX format, and does
25 * not destroy the input in hc2real transforms.
26 */
27
28 #include "fftw-int.h"
29 #include "rfftw.h"
30
31 /* copies halfcomplex array in (contiguous) to fftw_complex array out. */
rfftw_hc2c(int n,fftw_real * in,fftw_complex * out,int ostride)32 void rfftw_hc2c(int n, fftw_real *in, fftw_complex *out, int ostride)
33 {
34 int n2 = (n + 1) / 2;
35 int i = 1;
36
37 c_re(out[0]) = in[0];
38 c_im(out[0]) = 0.0;
39 for (; i < ((n2 - 1) & 3) + 1; ++i) {
40 c_re(out[i * ostride]) = in[i];
41 c_im(out[i * ostride]) = in[n - i];
42 }
43 for (; i < n2; i += 4) {
44 fftw_real r0, r1, r2, r3;
45 fftw_real i0, i1, i2, i3;
46 r0 = in[i];
47 r1 = in[i + 1];
48 r2 = in[i + 2];
49 r3 = in[i + 3];
50 i3 = in[n - (i + 3)];
51 i2 = in[n - (i + 2)];
52 i1 = in[n - (i + 1)];
53 i0 = in[n - i];
54 c_re(out[i * ostride]) = r0;
55 c_im(out[i * ostride]) = i0;
56 c_re(out[(i + 1) * ostride]) = r1;
57 c_im(out[(i + 1) * ostride]) = i1;
58 c_re(out[(i + 2) * ostride]) = r2;
59 c_im(out[(i + 2) * ostride]) = i2;
60 c_re(out[(i + 3) * ostride]) = r3;
61 c_im(out[(i + 3) * ostride]) = i3;
62 }
63 if ((n & 1) == 0) { /* store the Nyquist frequency */
64 c_re(out[n2 * ostride]) = in[n2];
65 c_im(out[n2 * ostride]) = 0.0;
66 }
67 }
68
69 /* reverse of rfftw_hc2c */
rfftw_c2hc(int n,fftw_complex * in,int istride,fftw_real * out)70 void rfftw_c2hc(int n, fftw_complex *in, int istride, fftw_real *out)
71 {
72 int n2 = (n + 1) / 2;
73 int i = 1;
74
75 out[0] = c_re(in[0]);
76 for (; i < ((n2 - 1) & 3) + 1; ++i) {
77 out[i] = c_re(in[i * istride]);
78 out[n - i] = c_im(in[i * istride]);
79 }
80 for (; i < n2; i += 4) {
81 fftw_real r0, r1, r2, r3;
82 fftw_real i0, i1, i2, i3;
83 r0 = c_re(in[i * istride]);
84 i0 = c_im(in[i * istride]);
85 r1 = c_re(in[(i + 1) * istride]);
86 i1 = c_im(in[(i + 1) * istride]);
87 r2 = c_re(in[(i + 2) * istride]);
88 i2 = c_im(in[(i + 2) * istride]);
89 r3 = c_re(in[(i + 3) * istride]);
90 i3 = c_im(in[(i + 3) * istride]);
91 out[i] = r0;
92 out[i + 1] = r1;
93 out[i + 2] = r2;
94 out[i + 3] = r3;
95 out[n - (i + 3)] = i3;
96 out[n - (i + 2)] = i2;
97 out[n - (i + 1)] = i1;
98 out[n - i] = i0;
99 }
100 if ((n & 1) == 0) /* store the Nyquist frequency */
101 out[n2] = c_re(in[n2 * istride]);
102 }
103
104 /*
105 * in: array of n real numbers (* howmany).
106 * out: array of n/2 + 1 complex numbers (* howmany).
107 * work: array of n real numbers (stride 1)
108 *
109 * We must have out != in if dist < stride.
110 */
rfftw_real2c_aux(fftw_plan plan,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist,fftw_real * work)111 void rfftw_real2c_aux(fftw_plan plan, int howmany,
112 fftw_real *in, int istride, int idist,
113 fftw_complex *out, int ostride, int odist,
114 fftw_real *work)
115 {
116 fftw_plan_node *p = plan->root;
117 int j;
118
119 switch (p->type) {
120 case FFTW_REAL2HC:
121 {
122 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
123 int n = plan->n;
124 int n2 = (n & 1) ? 0 : (n + 1) / 2;
125
126 HACK_ALIGN_STACK_ODD;
127 for (j = 0; j < howmany; ++j, out += odist) {
128 codelet(in + j * idist,
129 &c_re(*out),
130 &c_im(*out),
131 istride, ostride * 2, ostride * 2);
132 c_im(out[0]) = 0.0;
133 c_im(out[n2 * ostride]) = 0.0;
134 }
135 break;
136 }
137
138 default:
139 {
140 int n = plan->n;
141 fftw_recurse_kind recurse_kind = plan->recurse_kind;
142
143 for (j = 0; j < howmany; ++j, in += idist, out += odist) {
144 rfftw_executor_simple(n, in, work, p, istride, 1,
145 recurse_kind);
146 rfftw_hc2c(n, work, out, ostride);
147 }
148 break;
149 }
150 }
151 }
152
153 /*
154 * in: array of n/2 + 1 complex numbers (* howmany).
155 * out: array of n real numbers (* howmany).
156 * work: array of n real numbers (stride 1)
157 *
158 * We must have out != in if dist < stride.
159 */
rfftw_c2real_aux(fftw_plan plan,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist,fftw_real * work)160 void rfftw_c2real_aux(fftw_plan plan, int howmany,
161 fftw_complex *in, int istride, int idist,
162 fftw_real *out, int ostride, int odist,
163 fftw_real *work)
164 {
165 fftw_plan_node *p = plan->root;
166
167 switch (p->type) {
168 case FFTW_HC2REAL:
169 {
170 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
171 int j;
172
173 HACK_ALIGN_STACK_ODD;
174 for (j = 0; j < howmany; ++j)
175 codelet(&c_re(*(in + j * idist)),
176 &c_im(*(in + j * idist)),
177 out + j * odist,
178 istride * 2, istride * 2, ostride);
179 break;
180 }
181
182 default:
183 {
184 int j, n = plan->n;
185 fftw_recurse_kind recurse_kind = plan->recurse_kind;
186
187 for (j = 0; j < howmany; ++j, in += idist, out += odist) {
188 rfftw_c2hc(n, in, istride, work);
189 rfftw_executor_simple(n, work, out, p, 1, ostride,
190 recurse_kind);
191 }
192 break;
193 }
194 }
195 }
196
197 /*
198 * The following two functions are similar to the ones above, BUT:
199 *
200 * work must contain n * howmany elements (stride 1)
201 *
202 * Can handle out == in for any stride/dist.
203 */
rfftw_real2c_overlap_aux(fftw_plan plan,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist,fftw_real * work)204 void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany,
205 fftw_real *in, int istride, int idist,
206 fftw_complex *out, int ostride, int odist,
207 fftw_real *work)
208 {
209 int n = plan->n;
210 int j;
211
212 rfftw(plan, howmany, in, istride, idist, work, 1, n);
213
214 /* copy from work to out: */
215 for (j = 0; j < howmany; ++j, work += n, out += odist)
216 rfftw_hc2c(n, work, out, ostride);
217 }
218
rfftw_c2real_overlap_aux(fftw_plan plan,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist,fftw_real * work)219 void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany,
220 fftw_complex *in, int istride, int idist,
221 fftw_real *out, int ostride, int odist,
222 fftw_real *work)
223 {
224 int n = plan->n;
225 int j;
226
227 /* copy from in to work: */
228 for (j = 0; j < howmany; ++j, in += idist)
229 rfftw_c2hc(n, in, istride, work + j * n);
230
231 rfftw(plan, howmany, work, 1, n, out, ostride, odist);
232 }
233