1 /*
2  * Copyright (c) 2003, 2007-14 Matteo Frigo
3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  */
20 
21 /* This file was automatically generated --- DO NOT EDIT */
22 /* Generated on Thu Dec 10 07:04:42 EST 2020 */
23 
24 #include "dft/codelet-dft.h"
25 
26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27 
28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n1fv_20 -include dft/simd/n1f.h */
29 
30 /*
31  * This function contains 104 FP additions, 50 FP multiplications,
32  * (or, 58 additions, 4 multiplications, 46 fused multiply/add),
33  * 53 stack variables, 4 constants, and 40 memory accesses
34  */
35 #include "dft/simd/n1f.h"
36 
n1fv_20(const R * ri,const R * ii,R * ro,R * io,stride is,stride os,INT v,INT ivs,INT ovs)37 static void n1fv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38 {
39      DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
40      DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
41      DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
42      DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
43      {
44 	  INT i;
45 	  const R *xi;
46 	  R *xo;
47 	  xi = ri;
48 	  xo = ro;
49 	  for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
50 	       V T3, T1r, Tm, T13, TG, TN, TO, TH, T16, T19, T1a, T1v, T1w, T1x, T1s;
51 	       V T1t, T1u, T1d, T1g, T1h, Ti, TE, TB, TL, Tj, TC;
52 	       {
53 		    V T1, T2, T11, Tk, Tl, T12;
54 		    T1 = LD(&(xi[0]), ivs, &(xi[0]));
55 		    T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
56 		    T11 = VADD(T1, T2);
57 		    Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
58 		    Tl = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
59 		    T12 = VADD(Tk, Tl);
60 		    T3 = VSUB(T1, T2);
61 		    T1r = VADD(T11, T12);
62 		    Tm = VSUB(Tk, Tl);
63 		    T13 = VSUB(T11, T12);
64 	       }
65 	       {
66 		    V T6, T14, Tw, T1c, Tz, T1f, T9, T17, Td, T1b, Tp, T15, Ts, T18, Tg;
67 		    V T1e;
68 		    {
69 			 V T4, T5, Tu, Tv;
70 			 T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
71 			 T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
72 			 T6 = VSUB(T4, T5);
73 			 T14 = VADD(T4, T5);
74 			 Tu = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
75 			 Tv = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
76 			 Tw = VSUB(Tu, Tv);
77 			 T1c = VADD(Tu, Tv);
78 		    }
79 		    {
80 			 V Tx, Ty, T7, T8;
81 			 Tx = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
82 			 Ty = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
83 			 Tz = VSUB(Tx, Ty);
84 			 T1f = VADD(Tx, Ty);
85 			 T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
86 			 T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
87 			 T9 = VSUB(T7, T8);
88 			 T17 = VADD(T7, T8);
89 		    }
90 		    {
91 			 V Tb, Tc, Tn, To;
92 			 Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
93 			 Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
94 			 Td = VSUB(Tb, Tc);
95 			 T1b = VADD(Tb, Tc);
96 			 Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
97 			 To = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
98 			 Tp = VSUB(Tn, To);
99 			 T15 = VADD(Tn, To);
100 		    }
101 		    {
102 			 V Tq, Tr, Te, Tf;
103 			 Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
104 			 Tr = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
105 			 Ts = VSUB(Tq, Tr);
106 			 T18 = VADD(Tq, Tr);
107 			 Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
108 			 Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
109 			 Tg = VSUB(Te, Tf);
110 			 T1e = VADD(Te, Tf);
111 		    }
112 		    TG = VSUB(Ts, Tp);
113 		    TN = VSUB(T6, T9);
114 		    TO = VSUB(Td, Tg);
115 		    TH = VSUB(Tz, Tw);
116 		    T16 = VSUB(T14, T15);
117 		    T19 = VSUB(T17, T18);
118 		    T1a = VADD(T16, T19);
119 		    T1v = VADD(T1b, T1c);
120 		    T1w = VADD(T1e, T1f);
121 		    T1x = VADD(T1v, T1w);
122 		    T1s = VADD(T14, T15);
123 		    T1t = VADD(T17, T18);
124 		    T1u = VADD(T1s, T1t);
125 		    T1d = VSUB(T1b, T1c);
126 		    T1g = VSUB(T1e, T1f);
127 		    T1h = VADD(T1d, T1g);
128 		    {
129 			 V Ta, Th, Tt, TA;
130 			 Ta = VADD(T6, T9);
131 			 Th = VADD(Td, Tg);
132 			 Ti = VADD(Ta, Th);
133 			 TE = VSUB(Ta, Th);
134 			 Tt = VADD(Tp, Ts);
135 			 TA = VADD(Tw, Tz);
136 			 TB = VADD(Tt, TA);
137 			 TL = VSUB(TA, Tt);
138 		    }
139 	       }
140 	       Tj = VADD(T3, Ti);
141 	       TC = VADD(Tm, TB);
142 	       ST(&(xo[WS(os, 5)]), VFNMSI(TC, Tj), ovs, &(xo[WS(os, 1)]));
143 	       ST(&(xo[WS(os, 15)]), VFMAI(TC, Tj), ovs, &(xo[WS(os, 1)]));
144 	       {
145 		    V T1A, T1y, T1z, T1E, T1G, T1C, T1D, T1F, T1B;
146 		    T1A = VSUB(T1u, T1x);
147 		    T1y = VADD(T1u, T1x);
148 		    T1z = VFNMS(LDK(KP250000000), T1y, T1r);
149 		    T1C = VSUB(T1s, T1t);
150 		    T1D = VSUB(T1v, T1w);
151 		    T1E = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1D, T1C));
152 		    T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1C, T1D));
153 		    ST(&(xo[0]), VADD(T1r, T1y), ovs, &(xo[0]));
154 		    T1F = VFNMS(LDK(KP559016994), T1A, T1z);
155 		    ST(&(xo[WS(os, 8)]), VFNMSI(T1G, T1F), ovs, &(xo[0]));
156 		    ST(&(xo[WS(os, 12)]), VFMAI(T1G, T1F), ovs, &(xo[0]));
157 		    T1B = VFMA(LDK(KP559016994), T1A, T1z);
158 		    ST(&(xo[WS(os, 4)]), VFMAI(T1E, T1B), ovs, &(xo[0]));
159 		    ST(&(xo[WS(os, 16)]), VFNMSI(T1E, T1B), ovs, &(xo[0]));
160 	       }
161 	       {
162 		    V T1k, T1i, T1j, T1o, T1q, T1m, T1n, T1p, T1l;
163 		    T1k = VSUB(T1a, T1h);
164 		    T1i = VADD(T1a, T1h);
165 		    T1j = VFNMS(LDK(KP250000000), T1i, T13);
166 		    T1m = VSUB(T1d, T1g);
167 		    T1n = VSUB(T16, T19);
168 		    T1o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1n, T1m));
169 		    T1q = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1m, T1n));
170 		    ST(&(xo[WS(os, 10)]), VADD(T13, T1i), ovs, &(xo[0]));
171 		    T1p = VFMA(LDK(KP559016994), T1k, T1j);
172 		    ST(&(xo[WS(os, 6)]), VFNMSI(T1q, T1p), ovs, &(xo[0]));
173 		    ST(&(xo[WS(os, 14)]), VFMAI(T1q, T1p), ovs, &(xo[0]));
174 		    T1l = VFNMS(LDK(KP559016994), T1k, T1j);
175 		    ST(&(xo[WS(os, 2)]), VFMAI(T1o, T1l), ovs, &(xo[0]));
176 		    ST(&(xo[WS(os, 18)]), VFNMSI(T1o, T1l), ovs, &(xo[0]));
177 	       }
178 	       {
179 		    V TI, TP, TX, TU, TM, TW, TF, TT, TK, TD;
180 		    TI = VFMA(LDK(KP618033988), TH, TG);
181 		    TP = VFMA(LDK(KP618033988), TO, TN);
182 		    TX = VFNMS(LDK(KP618033988), TN, TO);
183 		    TU = VFNMS(LDK(KP618033988), TG, TH);
184 		    TK = VFNMS(LDK(KP250000000), TB, Tm);
185 		    TM = VFNMS(LDK(KP559016994), TL, TK);
186 		    TW = VFMA(LDK(KP559016994), TL, TK);
187 		    TD = VFNMS(LDK(KP250000000), Ti, T3);
188 		    TF = VFMA(LDK(KP559016994), TE, TD);
189 		    TT = VFNMS(LDK(KP559016994), TE, TD);
190 		    {
191 			 V TJ, TQ, TZ, T10;
192 			 TJ = VFMA(LDK(KP951056516), TI, TF);
193 			 TQ = VFMA(LDK(KP951056516), TP, TM);
194 			 ST(&(xo[WS(os, 1)]), VFNMSI(TQ, TJ), ovs, &(xo[WS(os, 1)]));
195 			 ST(&(xo[WS(os, 19)]), VFMAI(TQ, TJ), ovs, &(xo[WS(os, 1)]));
196 			 TZ = VFMA(LDK(KP951056516), TU, TT);
197 			 T10 = VFMA(LDK(KP951056516), TX, TW);
198 			 ST(&(xo[WS(os, 13)]), VFNMSI(T10, TZ), ovs, &(xo[WS(os, 1)]));
199 			 ST(&(xo[WS(os, 7)]), VFMAI(T10, TZ), ovs, &(xo[WS(os, 1)]));
200 		    }
201 		    {
202 			 V TR, TS, TV, TY;
203 			 TR = VFNMS(LDK(KP951056516), TI, TF);
204 			 TS = VFNMS(LDK(KP951056516), TP, TM);
205 			 ST(&(xo[WS(os, 9)]), VFNMSI(TS, TR), ovs, &(xo[WS(os, 1)]));
206 			 ST(&(xo[WS(os, 11)]), VFMAI(TS, TR), ovs, &(xo[WS(os, 1)]));
207 			 TV = VFNMS(LDK(KP951056516), TU, TT);
208 			 TY = VFNMS(LDK(KP951056516), TX, TW);
209 			 ST(&(xo[WS(os, 17)]), VFNMSI(TY, TV), ovs, &(xo[WS(os, 1)]));
210 			 ST(&(xo[WS(os, 3)]), VFMAI(TY, TV), ovs, &(xo[WS(os, 1)]));
211 		    }
212 	       }
213 	  }
214      }
215      VLEAVE();
216 }
217 
218 static const kdft_desc desc = { 20, XSIMD_STRING("n1fv_20"), { 58, 4, 46, 0 }, &GENUS, 0, 0, 0, 0 };
219 
XSIMD(codelet_n1fv_20)220 void XSIMD(codelet_n1fv_20) (planner *p) { X(kdft_register) (p, n1fv_20, &desc);
221 }
222 
223 #else
224 
225 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 20 -name n1fv_20 -include dft/simd/n1f.h */
226 
227 /*
228  * This function contains 104 FP additions, 24 FP multiplications,
229  * (or, 92 additions, 12 multiplications, 12 fused multiply/add),
230  * 53 stack variables, 4 constants, and 40 memory accesses
231  */
232 #include "dft/simd/n1f.h"
233 
n1fv_20(const R * ri,const R * ii,R * ro,R * io,stride is,stride os,INT v,INT ivs,INT ovs)234 static void n1fv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
235 {
236      DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
237      DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
238      DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
239      DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
240      {
241 	  INT i;
242 	  const R *xi;
243 	  R *xo;
244 	  xi = ri;
245 	  xo = ro;
246 	  for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
247 	       V T3, T1B, Tm, T1i, TG, TN, TO, TH, T13, T16, T1k, T1u, T1v, T1z, T1r;
248 	       V T1s, T1y, T1a, T1d, T1j, Ti, TD, TB, TL, Tj, TC;
249 	       {
250 		    V T1, T2, T1g, Tk, Tl, T1h;
251 		    T1 = LD(&(xi[0]), ivs, &(xi[0]));
252 		    T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
253 		    T1g = VADD(T1, T2);
254 		    Tk = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
255 		    Tl = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
256 		    T1h = VADD(Tk, Tl);
257 		    T3 = VSUB(T1, T2);
258 		    T1B = VADD(T1g, T1h);
259 		    Tm = VSUB(Tk, Tl);
260 		    T1i = VSUB(T1g, T1h);
261 	       }
262 	       {
263 		    V T6, T18, Tw, T12, Tz, T15, T9, T1b, Td, T11, Tp, T19, Ts, T1c, Tg;
264 		    V T14;
265 		    {
266 			 V T4, T5, Tu, Tv;
267 			 T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
268 			 T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
269 			 T6 = VSUB(T4, T5);
270 			 T18 = VADD(T4, T5);
271 			 Tu = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
272 			 Tv = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
273 			 Tw = VSUB(Tu, Tv);
274 			 T12 = VADD(Tu, Tv);
275 		    }
276 		    {
277 			 V Tx, Ty, T7, T8;
278 			 Tx = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
279 			 Ty = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
280 			 Tz = VSUB(Tx, Ty);
281 			 T15 = VADD(Tx, Ty);
282 			 T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
283 			 T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
284 			 T9 = VSUB(T7, T8);
285 			 T1b = VADD(T7, T8);
286 		    }
287 		    {
288 			 V Tb, Tc, Tn, To;
289 			 Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
290 			 Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
291 			 Td = VSUB(Tb, Tc);
292 			 T11 = VADD(Tb, Tc);
293 			 Tn = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
294 			 To = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
295 			 Tp = VSUB(Tn, To);
296 			 T19 = VADD(Tn, To);
297 		    }
298 		    {
299 			 V Tq, Tr, Te, Tf;
300 			 Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
301 			 Tr = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
302 			 Ts = VSUB(Tq, Tr);
303 			 T1c = VADD(Tq, Tr);
304 			 Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
305 			 Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
306 			 Tg = VSUB(Te, Tf);
307 			 T14 = VADD(Te, Tf);
308 		    }
309 		    TG = VSUB(Ts, Tp);
310 		    TN = VSUB(T6, T9);
311 		    TO = VSUB(Td, Tg);
312 		    TH = VSUB(Tz, Tw);
313 		    T13 = VSUB(T11, T12);
314 		    T16 = VSUB(T14, T15);
315 		    T1k = VADD(T13, T16);
316 		    T1u = VADD(T11, T12);
317 		    T1v = VADD(T14, T15);
318 		    T1z = VADD(T1u, T1v);
319 		    T1r = VADD(T18, T19);
320 		    T1s = VADD(T1b, T1c);
321 		    T1y = VADD(T1r, T1s);
322 		    T1a = VSUB(T18, T19);
323 		    T1d = VSUB(T1b, T1c);
324 		    T1j = VADD(T1a, T1d);
325 		    {
326 			 V Ta, Th, Tt, TA;
327 			 Ta = VADD(T6, T9);
328 			 Th = VADD(Td, Tg);
329 			 Ti = VADD(Ta, Th);
330 			 TD = VMUL(LDK(KP559016994), VSUB(Ta, Th));
331 			 Tt = VADD(Tp, Ts);
332 			 TA = VADD(Tw, Tz);
333 			 TB = VADD(Tt, TA);
334 			 TL = VMUL(LDK(KP559016994), VSUB(TA, Tt));
335 		    }
336 	       }
337 	       Tj = VADD(T3, Ti);
338 	       TC = VBYI(VADD(Tm, TB));
339 	       ST(&(xo[WS(os, 5)]), VSUB(Tj, TC), ovs, &(xo[WS(os, 1)]));
340 	       ST(&(xo[WS(os, 15)]), VADD(Tj, TC), ovs, &(xo[WS(os, 1)]));
341 	       {
342 		    V T1A, T1C, T1D, T1x, T1G, T1t, T1w, T1F, T1E;
343 		    T1A = VMUL(LDK(KP559016994), VSUB(T1y, T1z));
344 		    T1C = VADD(T1y, T1z);
345 		    T1D = VFNMS(LDK(KP250000000), T1C, T1B);
346 		    T1t = VSUB(T1r, T1s);
347 		    T1w = VSUB(T1u, T1v);
348 		    T1x = VBYI(VFMA(LDK(KP951056516), T1t, VMUL(LDK(KP587785252), T1w)));
349 		    T1G = VBYI(VFNMS(LDK(KP587785252), T1t, VMUL(LDK(KP951056516), T1w)));
350 		    ST(&(xo[0]), VADD(T1B, T1C), ovs, &(xo[0]));
351 		    T1F = VSUB(T1D, T1A);
352 		    ST(&(xo[WS(os, 8)]), VSUB(T1F, T1G), ovs, &(xo[0]));
353 		    ST(&(xo[WS(os, 12)]), VADD(T1G, T1F), ovs, &(xo[0]));
354 		    T1E = VADD(T1A, T1D);
355 		    ST(&(xo[WS(os, 4)]), VADD(T1x, T1E), ovs, &(xo[0]));
356 		    ST(&(xo[WS(os, 16)]), VSUB(T1E, T1x), ovs, &(xo[0]));
357 	       }
358 	       {
359 		    V T1n, T1l, T1m, T1f, T1q, T17, T1e, T1p, T1o;
360 		    T1n = VMUL(LDK(KP559016994), VSUB(T1j, T1k));
361 		    T1l = VADD(T1j, T1k);
362 		    T1m = VFNMS(LDK(KP250000000), T1l, T1i);
363 		    T17 = VSUB(T13, T16);
364 		    T1e = VSUB(T1a, T1d);
365 		    T1f = VBYI(VFNMS(LDK(KP587785252), T1e, VMUL(LDK(KP951056516), T17)));
366 		    T1q = VBYI(VFMA(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T17)));
367 		    ST(&(xo[WS(os, 10)]), VADD(T1i, T1l), ovs, &(xo[0]));
368 		    T1p = VADD(T1n, T1m);
369 		    ST(&(xo[WS(os, 6)]), VSUB(T1p, T1q), ovs, &(xo[0]));
370 		    ST(&(xo[WS(os, 14)]), VADD(T1q, T1p), ovs, &(xo[0]));
371 		    T1o = VSUB(T1m, T1n);
372 		    ST(&(xo[WS(os, 2)]), VADD(T1f, T1o), ovs, &(xo[0]));
373 		    ST(&(xo[WS(os, 18)]), VSUB(T1o, T1f), ovs, &(xo[0]));
374 	       }
375 	       {
376 		    V TI, TP, TX, TU, TM, TW, TF, TT, TK, TE;
377 		    TI = VFMA(LDK(KP951056516), TG, VMUL(LDK(KP587785252), TH));
378 		    TP = VFMA(LDK(KP951056516), TN, VMUL(LDK(KP587785252), TO));
379 		    TX = VFNMS(LDK(KP587785252), TN, VMUL(LDK(KP951056516), TO));
380 		    TU = VFNMS(LDK(KP587785252), TG, VMUL(LDK(KP951056516), TH));
381 		    TK = VFMS(LDK(KP250000000), TB, Tm);
382 		    TM = VADD(TK, TL);
383 		    TW = VSUB(TL, TK);
384 		    TE = VFNMS(LDK(KP250000000), Ti, T3);
385 		    TF = VADD(TD, TE);
386 		    TT = VSUB(TE, TD);
387 		    {
388 			 V TJ, TQ, TZ, T10;
389 			 TJ = VADD(TF, TI);
390 			 TQ = VBYI(VSUB(TM, TP));
391 			 ST(&(xo[WS(os, 19)]), VSUB(TJ, TQ), ovs, &(xo[WS(os, 1)]));
392 			 ST(&(xo[WS(os, 1)]), VADD(TJ, TQ), ovs, &(xo[WS(os, 1)]));
393 			 TZ = VADD(TT, TU);
394 			 T10 = VBYI(VADD(TX, TW));
395 			 ST(&(xo[WS(os, 13)]), VSUB(TZ, T10), ovs, &(xo[WS(os, 1)]));
396 			 ST(&(xo[WS(os, 7)]), VADD(TZ, T10), ovs, &(xo[WS(os, 1)]));
397 		    }
398 		    {
399 			 V TR, TS, TV, TY;
400 			 TR = VSUB(TF, TI);
401 			 TS = VBYI(VADD(TP, TM));
402 			 ST(&(xo[WS(os, 11)]), VSUB(TR, TS), ovs, &(xo[WS(os, 1)]));
403 			 ST(&(xo[WS(os, 9)]), VADD(TR, TS), ovs, &(xo[WS(os, 1)]));
404 			 TV = VSUB(TT, TU);
405 			 TY = VBYI(VSUB(TW, TX));
406 			 ST(&(xo[WS(os, 17)]), VSUB(TV, TY), ovs, &(xo[WS(os, 1)]));
407 			 ST(&(xo[WS(os, 3)]), VADD(TV, TY), ovs, &(xo[WS(os, 1)]));
408 		    }
409 	       }
410 	  }
411      }
412      VLEAVE();
413 }
414 
415 static const kdft_desc desc = { 20, XSIMD_STRING("n1fv_20"), { 92, 12, 12, 0 }, &GENUS, 0, 0, 0, 0 };
416 
XSIMD(codelet_n1fv_20)417 void XSIMD(codelet_n1fv_20) (planner *p) { X(kdft_register) (p, n1fv_20, &desc);
418 }
419 
420 #endif
421