1 /*
2  * Copyright (c) 2003, 2007-14 Matteo Frigo
3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  */
20 
21 #include "rdft/rdft.h"
22 
23 typedef struct {
24      solver super;
25      rdft_kind kind;
26 } S;
27 
28 typedef struct {
29      plan_rdft super;
30      twid *td;
31      INT n, is, os;
32      rdft_kind kind;
33 } P;
34 
35 /***************************************************************************/
36 
cdot_r2hc(INT n,const E * x,const R * w,R * or0,R * oi1)37 static void cdot_r2hc(INT n, const E *x, const R *w, R *or0, R *oi1)
38 {
39      INT i;
40 
41      E rr = x[0], ri = 0;
42      x += 1;
43      for (i = 1; i + i < n; ++i) {
44 	  rr += x[0] * w[0];
45 	  ri += x[1] * w[1];
46 	  x += 2; w += 2;
47      }
48      *or0 = rr;
49      *oi1 = ri;
50 }
51 
hartley_r2hc(INT n,const R * xr,INT xs,E * o,R * pr)52 static void hartley_r2hc(INT n, const R *xr, INT xs, E *o, R *pr)
53 {
54      INT i;
55      E sr;
56      o[0] = sr = xr[0]; o += 1;
57      for (i = 1; i + i < n; ++i) {
58 	  R a, b;
59 	  a = xr[i * xs];
60 	  b =  xr[(n - i) * xs];
61 	  sr += (o[0] = a + b);
62 #if FFT_SIGN == -1
63 	  o[1] = b - a;
64 #else
65 	  o[1] = a - b;
66 #endif
67 	  o += 2;
68      }
69      *pr = sr;
70 }
71 
apply_r2hc(const plan * ego_,R * I,R * O)72 static void apply_r2hc(const plan *ego_, R *I, R *O)
73 {
74      const P *ego = (const P *) ego_;
75      INT i;
76      INT n = ego->n, is = ego->is, os = ego->os;
77      const R *W = ego->td->W;
78      E *buf;
79      size_t bufsz = n * sizeof(E);
80 
81      BUF_ALLOC(E *, buf, bufsz);
82      hartley_r2hc(n, I, is, buf, O);
83 
84      for (i = 1; i + i < n; ++i) {
85 	  cdot_r2hc(n, buf, W, O + i * os, O + (n - i) * os);
86 	  W += n - 1;
87      }
88 
89      BUF_FREE(buf, bufsz);
90 }
91 
92 
cdot_hc2r(INT n,const E * x,const R * w,R * or0,R * or1)93 static void cdot_hc2r(INT n, const E *x, const R *w, R *or0, R *or1)
94 {
95      INT i;
96 
97      E rr = x[0], ii = 0;
98      x += 1;
99      for (i = 1; i + i < n; ++i) {
100 	  rr += x[0] * w[0];
101 	  ii += x[1] * w[1];
102 	  x += 2; w += 2;
103      }
104 #if FFT_SIGN == -1
105      *or0 = rr - ii;
106      *or1 = rr + ii;
107 #else
108      *or0 = rr + ii;
109      *or1 = rr - ii;
110 #endif
111 }
112 
hartley_hc2r(INT n,const R * x,INT xs,E * o,R * pr)113 static void hartley_hc2r(INT n, const R *x, INT xs, E *o, R *pr)
114 {
115      INT i;
116      E sr;
117 
118      o[0] = sr = x[0]; o += 1;
119      for (i = 1; i + i < n; ++i) {
120 	  sr += (o[0] = x[i * xs] + x[i * xs]);
121 	  o[1] = x[(n - i) * xs] + x[(n - i) * xs];
122 	  o += 2;
123      }
124      *pr = sr;
125 }
126 
apply_hc2r(const plan * ego_,R * I,R * O)127 static void apply_hc2r(const plan *ego_, R *I, R *O)
128 {
129      const P *ego = (const P *) ego_;
130      INT i;
131      INT n = ego->n, is = ego->is, os = ego->os;
132      const R *W = ego->td->W;
133      E *buf;
134      size_t bufsz = n * sizeof(E);
135 
136      BUF_ALLOC(E *, buf, bufsz);
137      hartley_hc2r(n, I, is, buf, O);
138 
139      for (i = 1; i + i < n; ++i) {
140 	  cdot_hc2r(n, buf, W, O + i * os, O + (n - i) * os);
141 	  W += n - 1;
142      }
143 
144      BUF_FREE(buf, bufsz);
145 }
146 
147 
148 /***************************************************************************/
149 
awake(plan * ego_,enum wakefulness wakefulness)150 static void awake(plan *ego_, enum wakefulness wakefulness)
151 {
152      P *ego = (P *) ego_;
153      static const tw_instr half_tw[] = {
154 	  { TW_HALF, 1, 0 },
155 	  { TW_NEXT, 1, 0 }
156      };
157 
158      X(twiddle_awake)(wakefulness, &ego->td, half_tw, ego->n, ego->n,
159 		      (ego->n - 1) / 2);
160 }
161 
print(const plan * ego_,printer * p)162 static void print(const plan *ego_, printer *p)
163 {
164      const P *ego = (const P *) ego_;
165 
166      p->print(p, "(rdft-generic-%s-%D)",
167 	      ego->kind == R2HC ? "r2hc" : "hc2r",
168 	      ego->n);
169 }
170 
applicable(const S * ego,const problem * p_,const planner * plnr)171 static int applicable(const S *ego, const problem *p_,
172 		      const planner *plnr)
173 {
174      const problem_rdft *p = (const problem_rdft *) p_;
175      return (1
176 	     && p->sz->rnk == 1
177 	     && p->vecsz->rnk == 0
178 	     && (p->sz->dims[0].n % 2) == 1
179 	     && CIMPLIES(NO_LARGE_GENERICP(plnr), p->sz->dims[0].n < GENERIC_MIN_BAD)
180 	     && CIMPLIES(NO_SLOWP(plnr), p->sz->dims[0].n > GENERIC_MAX_SLOW)
181 	     && X(is_prime)(p->sz->dims[0].n)
182 	     && p->kind[0] == ego->kind
183 	  );
184 }
185 
mkplan(const solver * ego_,const problem * p_,planner * plnr)186 static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
187 {
188      const S *ego = (const S *)ego_;
189      const problem_rdft *p;
190      P *pln;
191      INT n;
192 
193      static const plan_adt padt = {
194 	  X(rdft_solve), awake, print, X(plan_null_destroy)
195      };
196 
197      if (!applicable(ego, p_, plnr))
198           return (plan *)0;
199 
200      p = (const problem_rdft *) p_;
201      pln = MKPLAN_RDFT(P, &padt,
202 		       R2HC_KINDP(p->kind[0]) ? apply_r2hc : apply_hc2r);
203 
204      pln->n = n = p->sz->dims[0].n;
205      pln->is = p->sz->dims[0].is;
206      pln->os = p->sz->dims[0].os;
207      pln->td = 0;
208      pln->kind = ego->kind;
209 
210      pln->super.super.ops.add = (n-1) * 2.5;
211      pln->super.super.ops.mul = 0;
212      pln->super.super.ops.fma = 0.5 * (n-1) * (n-1) ;
213 #if 0 /* these are nice pipelined sequential loads and should cost nothing */
214      pln->super.super.ops.other = (n-1)*(2 + 1 + (n-1));  /* approximate */
215 #endif
216 
217      return &(pln->super.super);
218 }
219 
mksolver(rdft_kind kind)220 static solver *mksolver(rdft_kind kind)
221 {
222      static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 };
223      S *slv = MKSOLVER(S, &sadt);
224      slv->kind = kind;
225      return &(slv->super);
226 }
227 
X(rdft_generic_register)228 void X(rdft_generic_register)(planner *p)
229 {
230      REGISTER_SOLVER(p, mksolver(R2HC));
231      REGISTER_SOLVER(p, mksolver(HC2R));
232 }
233