1 /*
2  * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  *
18  */
19 
20 /* $Id: rexec2.c,v 1.20 2003/03/16 23:43:46 stevenj Exp $ */
21 /*
22  * rexec2.c -- alternate rfftw executor, specifically designed for the
23  *             multidimensional transforms.  Given an extra work array,
24  *             expects complex data in FFTW_COMPLEX format, and does
25  *             not destroy the input in hc2real transforms.
26  */
27 
28 #include "fftw-int.h"
29 #include "rfftw.h"
30 
31 /* copies halfcomplex array in (contiguous) to fftw_complex array out. */
rfftw_hc2c(int n,fftw_real * in,fftw_complex * out,int ostride)32 void rfftw_hc2c(int n, fftw_real *in, fftw_complex *out, int ostride)
33 {
34      int n2 = (n + 1) / 2;
35      int i = 1;
36 
37      c_re(out[0]) = in[0];
38      c_im(out[0]) = 0.0;
39      for (; i < ((n2 - 1) & 3) + 1; ++i) {
40 	  c_re(out[i * ostride]) = in[i];
41 	  c_im(out[i * ostride]) = in[n - i];
42      }
43      for (; i < n2; i += 4) {
44 	  fftw_real r0, r1, r2, r3;
45 	  fftw_real i0, i1, i2, i3;
46 	  r0 = in[i];
47 	  r1 = in[i + 1];
48 	  r2 = in[i + 2];
49 	  r3 = in[i + 3];
50 	  i3 = in[n - (i + 3)];
51 	  i2 = in[n - (i + 2)];
52 	  i1 = in[n - (i + 1)];
53 	  i0 = in[n - i];
54 	  c_re(out[i * ostride]) = r0;
55 	  c_im(out[i * ostride]) = i0;
56 	  c_re(out[(i + 1) * ostride]) = r1;
57 	  c_im(out[(i + 1) * ostride]) = i1;
58 	  c_re(out[(i + 2) * ostride]) = r2;
59 	  c_im(out[(i + 2) * ostride]) = i2;
60 	  c_re(out[(i + 3) * ostride]) = r3;
61 	  c_im(out[(i + 3) * ostride]) = i3;
62      }
63      if ((n & 1) == 0) {	/* store the Nyquist frequency */
64 	  c_re(out[n2 * ostride]) = in[n2];
65 	  c_im(out[n2 * ostride]) = 0.0;
66      }
67 }
68 
69 /* reverse of rfftw_hc2c */
rfftw_c2hc(int n,fftw_complex * in,int istride,fftw_real * out)70 void rfftw_c2hc(int n, fftw_complex *in, int istride, fftw_real *out)
71 {
72      int n2 = (n + 1) / 2;
73      int i = 1;
74 
75      out[0] = c_re(in[0]);
76      for (; i < ((n2 - 1) & 3) + 1; ++i) {
77 	  out[i] = c_re(in[i * istride]);
78 	  out[n - i] = c_im(in[i * istride]);
79      }
80      for (; i < n2; i += 4) {
81 	  fftw_real r0, r1, r2, r3;
82 	  fftw_real i0, i1, i2, i3;
83 	  r0 = c_re(in[i * istride]);
84 	  i0 = c_im(in[i * istride]);
85 	  r1 = c_re(in[(i + 1) * istride]);
86 	  i1 = c_im(in[(i + 1) * istride]);
87 	  r2 = c_re(in[(i + 2) * istride]);
88 	  i2 = c_im(in[(i + 2) * istride]);
89 	  r3 = c_re(in[(i + 3) * istride]);
90 	  i3 = c_im(in[(i + 3) * istride]);
91 	  out[i] = r0;
92 	  out[i + 1] = r1;
93 	  out[i + 2] = r2;
94 	  out[i + 3] = r3;
95 	  out[n - (i + 3)] = i3;
96 	  out[n - (i + 2)] = i2;
97 	  out[n - (i + 1)] = i1;
98 	  out[n - i] = i0;
99      }
100      if ((n & 1) == 0)		/* store the Nyquist frequency */
101 	  out[n2] = c_re(in[n2 * istride]);
102 }
103 
104 /*
105  * in: array of n real numbers (* howmany).
106  * out: array of n/2 + 1 complex numbers (* howmany).
107  * work: array of n real numbers (stride 1)
108  *
109  * We must have out != in if dist < stride.
110  */
rfftw_real2c_aux(fftw_plan plan,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist,fftw_real * work)111 void rfftw_real2c_aux(fftw_plan plan, int howmany,
112 		      fftw_real *in, int istride, int idist,
113 		      fftw_complex *out, int ostride, int odist,
114 		      fftw_real *work)
115 {
116      fftw_plan_node *p = plan->root;
117      int j;
118 
119      switch (p->type) {
120 	 case FFTW_REAL2HC:
121 	      {
122 		   fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
123 		   int n = plan->n;
124 		   int n2 = (n & 1) ? 0 : (n + 1) / 2;
125 
126 		   HACK_ALIGN_STACK_ODD;
127 		   for (j = 0; j < howmany; ++j, out += odist) {
128 			codelet(in + j * idist,
129 				&c_re(*out),
130 				&c_im(*out),
131 				istride, ostride * 2, ostride * 2);
132 			c_im(out[0]) = 0.0;
133 			c_im(out[n2 * ostride]) = 0.0;
134 		   }
135 		   break;
136 	      }
137 
138 	 default:
139 	      {
140 		   int n = plan->n;
141 		   fftw_recurse_kind recurse_kind = plan->recurse_kind;
142 
143 		   for (j = 0; j < howmany; ++j, in += idist, out += odist) {
144 			rfftw_executor_simple(n, in, work, p, istride, 1,
145 					      recurse_kind);
146 			rfftw_hc2c(n, work, out, ostride);
147 		   }
148 		   break;
149 	      }
150      }
151 }
152 
153 /*
154  * in: array of n/2 + 1 complex numbers (* howmany).
155  * out: array of n real numbers (* howmany).
156  * work: array of n real numbers (stride 1)
157  *
158  * We must have out != in if dist < stride.
159  */
rfftw_c2real_aux(fftw_plan plan,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist,fftw_real * work)160 void rfftw_c2real_aux(fftw_plan plan, int howmany,
161 		      fftw_complex *in, int istride, int idist,
162 		      fftw_real *out, int ostride, int odist,
163 		      fftw_real *work)
164 {
165      fftw_plan_node *p = plan->root;
166 
167      switch (p->type) {
168 	 case FFTW_HC2REAL:
169 	      {
170 		   fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
171 		   int j;
172 
173 		   HACK_ALIGN_STACK_ODD;
174 		   for (j = 0; j < howmany; ++j)
175 			codelet(&c_re(*(in + j * idist)),
176 				&c_im(*(in + j * idist)),
177 				out + j * odist,
178 				istride * 2, istride * 2, ostride);
179 		   break;
180 	      }
181 
182 	 default:
183 	      {
184 		   int j, n = plan->n;
185 		   fftw_recurse_kind recurse_kind = plan->recurse_kind;
186 
187 		   for (j = 0; j < howmany; ++j, in += idist, out += odist) {
188 			rfftw_c2hc(n, in, istride, work);
189 			rfftw_executor_simple(n, work, out, p, 1, ostride,
190 					      recurse_kind);
191 		   }
192 		   break;
193 	      }
194      }
195 }
196 
197 /*
198  * The following two functions are similar to the ones above, BUT:
199  *
200  * work must contain n * howmany elements (stride 1)
201  *
202  * Can handle out == in for any stride/dist.
203  */
rfftw_real2c_overlap_aux(fftw_plan plan,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist,fftw_real * work)204 void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany,
205 			      fftw_real *in, int istride, int idist,
206 			      fftw_complex *out, int ostride, int odist,
207 			      fftw_real *work)
208 {
209      int n = plan->n;
210      int j;
211 
212      rfftw(plan, howmany, in, istride, idist, work, 1, n);
213 
214      /* copy from work to out: */
215      for (j = 0; j < howmany; ++j, work += n, out += odist)
216 	  rfftw_hc2c(n, work, out, ostride);
217 }
218 
rfftw_c2real_overlap_aux(fftw_plan plan,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist,fftw_real * work)219 void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany,
220 			      fftw_complex *in, int istride, int idist,
221 			      fftw_real *out, int ostride, int odist,
222 			      fftw_real *work)
223 {
224      int n = plan->n;
225      int j;
226 
227      /* copy from in to work: */
228      for (j = 0; j < howmany; ++j, in += idist)
229 	  rfftw_c2hc(n, in, istride, work + j * n);
230 
231      rfftw(plan, howmany, work, 1, n, out, ostride, odist);
232 }
233