1 /*
2  * Copyright (c) 2003, 2007-14 Matteo Frigo
3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  */
20 
21 /* This file was automatically generated --- DO NOT EDIT */
22 /* Generated on Thu Dec 10 07:05:25 EST 2020 */
23 
24 #include "dft/codelet-dft.h"
25 
26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27 
28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include dft/simd/t2f.h */
29 
30 /*
31  * This function contains 248 FP additions, 241 FP multiplications,
32  * (or, 67 additions, 60 multiplications, 181 fused multiply/add),
33  * 147 stack variables, 67 constants, and 50 memory accesses
34  */
35 #include "dft/simd/t2f.h"
36 
t2fv_25(R * ri,R * ii,const R * W,stride rs,INT mb,INT me,INT ms)37 static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38 {
39      DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
40      DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
41      DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
42      DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
43      DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
44      DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
45      DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
46      DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
47      DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
48      DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
49      DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
50      DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
51      DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
52      DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
53      DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
54      DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
55      DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
56      DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
57      DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
58      DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
59      DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
60      DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
61      DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
62      DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
63      DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
64      DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
65      DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
66      DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
67      DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
68      DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
69      DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
70      DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
71      DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
72      DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
73      DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
74      DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
75      DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
76      DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
77      DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
78      DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
79      DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
80      DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
81      DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
82      DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
83      DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
84      DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
85      DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
86      DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
87      DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
88      DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
89      DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
90      DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
91      DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
92      DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
93      DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
94      DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
95      DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
96      DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
97      DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
98      DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
99      DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
100      DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
101      DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
102      DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
103      DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
104      DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
105      DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
106      {
107 	  INT m;
108 	  R *x;
109 	  x = ri;
110 	  for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
111 	       V T1, Te, Tc, Td, T1O, T2X, T3Q, T1x, T2K, T1u, T2L, T1y, T27, T3b, T2R;
112 	       V T2M, T2f, T3M, Ty, T2E, Tv, T2D, Tz, T2a, T3e, T2U, T2F, T2i, T3N, TK;
113 	       V T2B, TS, T2A, TT, T2b, T3f, T2T, T2C, T2j, T3P, T1d, T2H, T1a, T2I, T1e;
114 	       V T28, T3c, T2Q, T2J, T2g;
115 	       {
116 		    V T8, Ta, Tb, T3, T5, T6, T1M, T1N;
117 		    T1 = LD(&(x[0]), ms, &(x[0]));
118 		    {
119 			 V T7, T9, T2, T4;
120 			 T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
121 			 T8 = BYTWJ(&(W[TWVL * 18]), T7);
122 			 T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
123 			 Ta = BYTWJ(&(W[TWVL * 28]), T9);
124 			 Tb = VADD(T8, Ta);
125 			 T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
126 			 T3 = BYTWJ(&(W[TWVL * 8]), T2);
127 			 T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
128 			 T5 = BYTWJ(&(W[TWVL * 38]), T4);
129 			 T6 = VADD(T3, T5);
130 		    }
131 		    Te = VSUB(T6, Tb);
132 		    Tc = VADD(T6, Tb);
133 		    Td = VFNMS(LDK(KP250000000), Tc, T1);
134 		    T1M = VSUB(T3, T5);
135 		    T1N = VSUB(T8, Ta);
136 		    T1O = VFMA(LDK(KP618033988), T1N, T1M);
137 		    T2X = VFNMS(LDK(KP618033988), T1M, T1N);
138 	       }
139 	       {
140 		    V T1g, T1v, T1w, T1l, T1q, T1r, T1f, T1s, T1t;
141 		    T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
142 		    T1g = BYTWJ(&(W[TWVL * 4]), T1f);
143 		    {
144 			 V T1i, T1p, T1k, T1n;
145 			 {
146 			      V T1h, T1o, T1j, T1m;
147 			      T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
148 			      T1i = BYTWJ(&(W[TWVL * 14]), T1h);
149 			      T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
150 			      T1p = BYTWJ(&(W[TWVL * 34]), T1o);
151 			      T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
152 			      T1k = BYTWJ(&(W[TWVL * 44]), T1j);
153 			      T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
154 			      T1n = BYTWJ(&(W[TWVL * 24]), T1m);
155 			 }
156 			 T1v = VSUB(T1i, T1k);
157 			 T1w = VSUB(T1n, T1p);
158 			 T1l = VADD(T1i, T1k);
159 			 T1q = VADD(T1n, T1p);
160 			 T1r = VADD(T1l, T1q);
161 		    }
162 		    T3Q = VADD(T1g, T1r);
163 		    T1x = VFMA(LDK(KP618033988), T1w, T1v);
164 		    T2K = VFNMS(LDK(KP618033988), T1v, T1w);
165 		    T1s = VFNMS(LDK(KP250000000), T1r, T1g);
166 		    T1t = VSUB(T1q, T1l);
167 		    T1u = VFNMS(LDK(KP559016994), T1t, T1s);
168 		    T2L = VFMA(LDK(KP559016994), T1t, T1s);
169 		    T1y = VFNMS(LDK(KP893101515), T1x, T1u);
170 		    T27 = VFNMS(LDK(KP120146378), T1x, T1u);
171 		    T3b = VFMA(LDK(KP066152395), T2L, T2K);
172 		    T2R = VFNMS(LDK(KP786782374), T2K, T2L);
173 		    T2M = VFMA(LDK(KP869845200), T2L, T2K);
174 		    T2f = VFMA(LDK(KP132830569), T1u, T1x);
175 	       }
176 	       {
177 		    V Th, Tw, Tx, Tm, Tr, Ts, Tg, Tt, Tu;
178 		    Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
179 		    Th = BYTWJ(&(W[0]), Tg);
180 		    {
181 			 V Tj, Tq, Tl, To;
182 			 {
183 			      V Ti, Tp, Tk, Tn;
184 			      Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
185 			      Tj = BYTWJ(&(W[TWVL * 10]), Ti);
186 			      Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
187 			      Tq = BYTWJ(&(W[TWVL * 30]), Tp);
188 			      Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
189 			      Tl = BYTWJ(&(W[TWVL * 40]), Tk);
190 			      Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
191 			      To = BYTWJ(&(W[TWVL * 20]), Tn);
192 			 }
193 			 Tw = VSUB(Tj, Tl);
194 			 Tx = VSUB(Tq, To);
195 			 Tm = VADD(Tj, Tl);
196 			 Tr = VADD(To, Tq);
197 			 Ts = VADD(Tm, Tr);
198 		    }
199 		    T3M = VADD(Th, Ts);
200 		    Ty = VFNMS(LDK(KP618033988), Tx, Tw);
201 		    T2E = VFMA(LDK(KP618033988), Tw, Tx);
202 		    Tt = VFNMS(LDK(KP250000000), Ts, Th);
203 		    Tu = VSUB(Tm, Tr);
204 		    Tv = VFMA(LDK(KP559016994), Tu, Tt);
205 		    T2D = VFNMS(LDK(KP559016994), Tu, Tt);
206 		    Tz = VFNMS(LDK(KP244189809), Ty, Tv);
207 		    T2a = VFMA(LDK(KP667278218), Tv, Ty);
208 		    T3e = VFNMS(LDK(KP522847744), T2E, T2D);
209 		    T2U = VFNMS(LDK(KP987388751), T2D, T2E);
210 		    T2F = VFMA(LDK(KP893101515), T2E, T2D);
211 		    T2i = VFNMS(LDK(KP603558818), Ty, Tv);
212 	       }
213 	       {
214 		    V TM, TE, TJ, TN, TO, TP, TL, TQ, TR;
215 		    TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
216 		    TM = BYTWJ(&(W[TWVL * 6]), TL);
217 		    {
218 			 V TB, TI, TD, TG;
219 			 {
220 			      V TA, TH, TC, TF;
221 			      TA = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
222 			      TB = BYTWJ(&(W[TWVL * 46]), TA);
223 			      TH = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
224 			      TI = BYTWJ(&(W[TWVL * 26]), TH);
225 			      TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
226 			      TD = BYTWJ(&(W[TWVL * 16]), TC);
227 			      TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
228 			      TG = BYTWJ(&(W[TWVL * 36]), TF);
229 			 }
230 			 TE = VSUB(TB, TD);
231 			 TJ = VSUB(TG, TI);
232 			 TN = VADD(TD, TB);
233 			 TO = VADD(TI, TG);
234 			 TP = VADD(TN, TO);
235 		    }
236 		    T3N = VADD(TM, TP);
237 		    TK = VFMA(LDK(KP618033988), TJ, TE);
238 		    T2B = VFNMS(LDK(KP618033988), TE, TJ);
239 		    TQ = VFMS(LDK(KP250000000), TP, TM);
240 		    TR = VSUB(TN, TO);
241 		    TS = VFNMS(LDK(KP559016994), TR, TQ);
242 		    T2A = VFMA(LDK(KP559016994), TR, TQ);
243 		    TT = VFNMS(LDK(KP667278218), TS, TK);
244 		    T2b = VFMA(LDK(KP869845200), TS, TK);
245 		    T3f = VFNMS(LDK(KP494780565), T2A, T2B);
246 		    T2T = VFNMS(LDK(KP132830569), T2A, T2B);
247 		    T2C = VFMA(LDK(KP120146378), T2B, T2A);
248 		    T2j = VFNMS(LDK(KP786782374), TK, TS);
249 	       }
250 	       {
251 		    V TW, T1b, T1c, T11, T16, T17, TV, T18, T19;
252 		    TV = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
253 		    TW = BYTWJ(&(W[TWVL * 2]), TV);
254 		    {
255 			 V TY, T15, T10, T13;
256 			 {
257 			      V TX, T14, TZ, T12;
258 			      TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
259 			      TY = BYTWJ(&(W[TWVL * 12]), TX);
260 			      T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
261 			      T15 = BYTWJ(&(W[TWVL * 32]), T14);
262 			      TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
263 			      T10 = BYTWJ(&(W[TWVL * 42]), TZ);
264 			      T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
265 			      T13 = BYTWJ(&(W[TWVL * 22]), T12);
266 			 }
267 			 T1b = VSUB(TY, T10);
268 			 T1c = VSUB(T15, T13);
269 			 T11 = VADD(TY, T10);
270 			 T16 = VADD(T13, T15);
271 			 T17 = VADD(T11, T16);
272 		    }
273 		    T3P = VADD(TW, T17);
274 		    T1d = VFNMS(LDK(KP618033988), T1c, T1b);
275 		    T2H = VFMA(LDK(KP618033988), T1b, T1c);
276 		    T18 = VFNMS(LDK(KP250000000), T17, TW);
277 		    T19 = VSUB(T16, T11);
278 		    T1a = VFNMS(LDK(KP559016994), T19, T18);
279 		    T2I = VFMA(LDK(KP559016994), T19, T18);
280 		    T1e = VFNMS(LDK(KP522847744), T1d, T1a);
281 		    T28 = VFNMS(LDK(KP494780565), T1a, T1d);
282 		    T3c = VFNMS(LDK(KP667278218), T2I, T2H);
283 		    T2Q = VFNMS(LDK(KP059835404), T2H, T2I);
284 		    T2J = VFMA(LDK(KP066152395), T2I, T2H);
285 		    T2g = VFMA(LDK(KP447533225), T1d, T1a);
286 	       }
287 	       {
288 		    V T3Y, T40, T3L, T3S, T3T, T3U, T3Z, T3V;
289 		    {
290 			 V T3W, T3X, T3O, T3R;
291 			 T3W = VSUB(T3M, T3N);
292 			 T3X = VSUB(T3P, T3Q);
293 			 T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W));
294 			 T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X));
295 			 T3L = VADD(T1, Tc);
296 			 T3O = VADD(T3M, T3N);
297 			 T3R = VADD(T3P, T3Q);
298 			 T3S = VADD(T3O, T3R);
299 			 T3T = VFNMS(LDK(KP250000000), T3S, T3L);
300 			 T3U = VSUB(T3O, T3R);
301 		    }
302 		    ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0]));
303 		    T3Z = VFNMS(LDK(KP559016994), T3U, T3T);
304 		    ST(&(x[WS(rs, 10)]), VFMAI(T40, T3Z), ms, &(x[0]));
305 		    ST(&(x[WS(rs, 15)]), VFNMSI(T40, T3Z), ms, &(x[WS(rs, 1)]));
306 		    T3V = VFMA(LDK(KP559016994), T3U, T3T);
307 		    ST(&(x[WS(rs, 5)]), VFNMSI(T3Y, T3V), ms, &(x[WS(rs, 1)]));
308 		    ST(&(x[WS(rs, 20)]), VFMAI(T3Y, T3V), ms, &(x[0]));
309 	       }
310 	       {
311 		    V T2Z, T35, T3B, T3I, T2W, T38, T2O, T32, T2z, T3t, T3h, T3s, T3p, T3F, T3r;
312 		    V T3v, T3C, T3z, T3A;
313 		    T2Z = VFMA(LDK(KP734762448), T2U, T2T);
314 		    T35 = VFNMS(LDK(KP734762448), T2F, T2C);
315 		    T3z = VFMA(LDK(KP845997307), T3c, T3b);
316 		    T3A = VFMA(LDK(KP982009705), T3f, T3e);
317 		    T3B = VFMA(LDK(KP570584518), T3A, T3z);
318 		    T3I = VFNMS(LDK(KP669429328), T3z, T3A);
319 		    {
320 			 V T2S, T2V, T37, T36;
321 			 T2S = VFMA(LDK(KP772036680), T2R, T2Q);
322 			 T2V = VFNMS(LDK(KP734762448), T2U, T2T);
323 			 T36 = VFMA(LDK(KP772036680), T2M, T2J);
324 			 T37 = VFMA(LDK(KP522616830), T2V, T36);
325 			 T2W = VFMA(LDK(KP945422727), T2V, T2S);
326 			 T38 = VFNMS(LDK(KP690983005), T37, T2S);
327 		    }
328 		    {
329 			 V T2N, T2G, T31, T30;
330 			 T2N = VFNMS(LDK(KP772036680), T2M, T2J);
331 			 T2G = VFMA(LDK(KP734762448), T2F, T2C);
332 			 T30 = VFNMS(LDK(KP772036680), T2R, T2Q);
333 			 T31 = VFNMS(LDK(KP522616830), T2G, T30);
334 			 T2O = VFMA(LDK(KP956723877), T2N, T2G);
335 			 T32 = VFMA(LDK(KP763932022), T31, T2N);
336 		    }
337 		    {
338 			 V T3o, T3u, T3l, T3m, T3n;
339 			 T2z = VFNMS(LDK(KP559016994), Te, Td);
340 			 T3m = VFMA(LDK(KP447533225), T2B, T2A);
341 			 T3n = VFMA(LDK(KP578046249), T2D, T2E);
342 			 T3o = VFNMS(LDK(KP921078979), T3n, T3m);
343 			 T3t = VFMA(LDK(KP921078979), T3n, T3m);
344 			 {
345 			      V T3d, T3g, T3j, T3k;
346 			      T3d = VFNMS(LDK(KP845997307), T3c, T3b);
347 			      T3g = VFNMS(LDK(KP982009705), T3f, T3e);
348 			      T3h = VFMA(LDK(KP923225144), T3g, T3d);
349 			      T3u = VFNMS(LDK(KP923225144), T3g, T3d);
350 			      T3j = VFNMS(LDK(KP059835404), T2K, T2L);
351 			      T3k = VFMA(LDK(KP603558818), T2H, T2I);
352 			      T3l = VFMA(LDK(KP845997307), T3k, T3j);
353 			      T3s = VFNMS(LDK(KP845997307), T3k, T3j);
354 			 }
355 			 T3p = VFNMS(LDK(KP906616052), T3o, T3l);
356 			 T3F = VFNMS(LDK(KP904508497), T3u, T3s);
357 			 T3r = VFNMS(LDK(KP237294955), T3h, T2z);
358 			 T3v = VFNMS(LDK(KP997675361), T3u, T3t);
359 			 T3C = VFMA(LDK(KP906616052), T3o, T3l);
360 		    }
361 		    {
362 			 V T2P, T2Y, T3i, T3q;
363 			 T2P = VFMA(LDK(KP992114701), T2O, T2z);
364 			 T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W));
365 			 ST(&(x[WS(rs, 3)]), VFNMSI(T2Y, T2P), ms, &(x[WS(rs, 1)]));
366 			 ST(&(x[WS(rs, 22)]), VFMAI(T2Y, T2P), ms, &(x[0]));
367 			 T3i = VFMA(LDK(KP949179823), T3h, T2z);
368 			 T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p));
369 			 ST(&(x[WS(rs, 2)]), VFNMSI(T3q, T3i), ms, &(x[0]));
370 			 ST(&(x[WS(rs, 23)]), VFMAI(T3q, T3i), ms, &(x[WS(rs, 1)]));
371 		    }
372 		    {
373 			 V T34, T3a, T33, T39;
374 			 T33 = VFNMS(LDK(KP855719849), T32, T2Z);
375 			 T34 = VFMA(LDK(KP897376177), T33, T2z);
376 			 T39 = VFMA(LDK(KP855719849), T38, T35);
377 			 T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X));
378 			 ST(&(x[WS(rs, 8)]), VFNMSI(T3a, T34), ms, &(x[0]));
379 			 ST(&(x[WS(rs, 17)]), VFMAI(T3a, T34), ms, &(x[WS(rs, 1)]));
380 		    }
381 		    {
382 			 V T3x, T3H, T3E, T3K, T3w;
383 			 T3w = VFMA(LDK(KP560319534), T3v, T3s);
384 			 T3x = VFNMS(LDK(KP949179823), T3w, T3r);
385 			 {
386 			      V T3G, T3y, T3J, T3D;
387 			      T3G = VFNMS(LDK(KP681693190), T3F, T3t);
388 			      T3H = VFNMS(LDK(KP860541664), T3G, T3r);
389 			      T3y = VFMA(LDK(KP262346850), T3p, T2X);
390 			      T3J = VFNMS(LDK(KP669429328), T3C, T3I);
391 			      T3D = VFMA(LDK(KP618033988), T3C, T3B);
392 			      T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y));
393 			      T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y));
394 			 }
395 			 ST(&(x[WS(rs, 13)]), VFNMSI(T3E, T3x), ms, &(x[WS(rs, 1)]));
396 			 ST(&(x[WS(rs, 7)]), VFMAI(T3K, T3H), ms, &(x[WS(rs, 1)]));
397 			 ST(&(x[WS(rs, 12)]), VFMAI(T3E, T3x), ms, &(x[0]));
398 			 ST(&(x[WS(rs, 18)]), VFNMSI(T3K, T3H), ms, &(x[0]));
399 		    }
400 	       }
401 	       {
402 		    V T2n, T2t, T1V, T22, T2l, T2w, T2d, T2q, Tf, T1I, T1A, T1E, T1B, T1Z, T1J;
403 		    V T1R, T1W, T1T, T1U;
404 		    T2n = VFNMS(LDK(KP912575812), T2j, T2i);
405 		    T2t = VFNMS(LDK(KP912575812), T2b, T2a);
406 		    T1T = VFNMS(LDK(KP829049696), TT, Tz);
407 		    T1U = VFNMS(LDK(KP831864738), T1y, T1e);
408 		    T1V = VFMA(LDK(KP559154169), T1U, T1T);
409 		    T22 = VFNMS(LDK(KP683113946), T1T, T1U);
410 		    {
411 			 V T2h, T2k, T2v, T2u;
412 			 T2h = VFMA(LDK(KP958953096), T2g, T2f);
413 			 T2k = VFMA(LDK(KP912575812), T2j, T2i);
414 			 T2u = VFMA(LDK(KP867381224), T28, T27);
415 			 T2v = VFMA(LDK(KP447417479), T2k, T2u);
416 			 T2l = VFMA(LDK(KP894834959), T2k, T2h);
417 			 T2w = VFNMS(LDK(KP763932022), T2v, T2h);
418 		    }
419 		    {
420 			 V T29, T2c, T2p, T2o;
421 			 T29 = VFNMS(LDK(KP867381224), T28, T27);
422 			 T2c = VFMA(LDK(KP912575812), T2b, T2a);
423 			 T2o = VFNMS(LDK(KP958953096), T2g, T2f);
424 			 T2p = VFMA(LDK(KP447417479), T2c, T2o);
425 			 T2d = VFNMS(LDK(KP809385824), T2c, T29);
426 			 T2q = VFMA(LDK(KP690983005), T2p, T29);
427 		    }
428 		    {
429 			 V T1Q, T1F, T1P, T1G, T1H;
430 			 Tf = VFMA(LDK(KP559016994), Te, Td);
431 			 T1G = VFMA(LDK(KP578046249), T1a, T1d);
432 			 T1H = VFMA(LDK(KP987388751), T1u, T1x);
433 			 T1I = VFNMS(LDK(KP831864738), T1H, T1G);
434 			 T1Q = VFMA(LDK(KP831864738), T1H, T1G);
435 			 {
436 			      V TU, T1z, T1C, T1D;
437 			      TU = VFMA(LDK(KP829049696), TT, Tz);
438 			      T1z = VFMA(LDK(KP831864738), T1y, T1e);
439 			      T1A = VFMA(LDK(KP904730450), T1z, TU);
440 			      T1F = VFNMS(LDK(KP904730450), T1z, TU);
441 			      T1C = VFMA(LDK(KP269969613), Tv, Ty);
442 			      T1D = VFMA(LDK(KP603558818), TK, TS);
443 			      T1E = VFMA(LDK(KP916574801), T1D, T1C);
444 			      T1P = VFNMS(LDK(KP916574801), T1D, T1C);
445 			 }
446 			 T1B = VFNMS(LDK(KP242145790), T1A, Tf);
447 			 T1Z = VADD(T1E, T1F);
448 			 T1J = VFNMS(LDK(KP904730450), T1I, T1F);
449 			 T1R = VFMA(LDK(KP904730450), T1Q, T1P);
450 			 T1W = VFNMS(LDK(KP904730450), T1Q, T1P);
451 		    }
452 		    {
453 			 V T25, T26, T2e, T2m;
454 			 T25 = VFMA(LDK(KP968583161), T1A, Tf);
455 			 T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O));
456 			 ST(&(x[WS(rs, 1)]), VFNMSI(T26, T25), ms, &(x[WS(rs, 1)]));
457 			 ST(&(x[WS(rs, 24)]), VFMAI(T26, T25), ms, &(x[0]));
458 			 T2e = VFNMS(LDK(KP992114701), T2d, Tf);
459 			 T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O));
460 			 ST(&(x[WS(rs, 4)]), VFMAI(T2m, T2e), ms, &(x[0]));
461 			 ST(&(x[WS(rs, 21)]), VFNMSI(T2m, T2e), ms, &(x[WS(rs, 1)]));
462 		    }
463 		    {
464 			 V T2s, T2y, T2r, T2x;
465 			 T2r = VFNMS(LDK(KP999544308), T2q, T2n);
466 			 T2s = VFNMS(LDK(KP803003575), T2r, Tf);
467 			 T2x = VFNMS(LDK(KP999544308), T2w, T2t);
468 			 T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O));
469 			 ST(&(x[WS(rs, 16)]), VFNMSI(T2y, T2s), ms, &(x[0]));
470 			 ST(&(x[WS(rs, 9)]), VFMAI(T2y, T2s), ms, &(x[WS(rs, 1)]));
471 		    }
472 		    {
473 			 V T1L, T21, T1Y, T24, T1K;
474 			 T1K = VFNMS(LDK(KP618033988), T1J, T1E);
475 			 T1L = VFNMS(LDK(KP876091699), T1K, T1B);
476 			 {
477 			      V T20, T1S, T23, T1X;
478 			      T20 = VFNMS(LDK(KP683113946), T1Z, T1I);
479 			      T21 = VFMA(LDK(KP792626838), T20, T1B);
480 			      T1S = VFNMS(LDK(KP242145790), T1R, T1O);
481 			      T23 = VFMA(LDK(KP617882369), T1W, T22);
482 			      T1X = VFMA(LDK(KP559016994), T1W, T1V);
483 			      T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S));
484 			      T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S));
485 			 }
486 			 ST(&(x[WS(rs, 6)]), VFNMSI(T1Y, T1L), ms, &(x[0]));
487 			 ST(&(x[WS(rs, 14)]), VFMAI(T24, T21), ms, &(x[0]));
488 			 ST(&(x[WS(rs, 19)]), VFMAI(T1Y, T1L), ms, &(x[WS(rs, 1)]));
489 			 ST(&(x[WS(rs, 11)]), VFNMSI(T24, T21), ms, &(x[WS(rs, 1)]));
490 		    }
491 	       }
492 	  }
493      }
494      VLEAVE();
495 }
496 
497 static const tw_instr twinstr[] = {
498      VTW(0, 1),
499      VTW(0, 2),
500      VTW(0, 3),
501      VTW(0, 4),
502      VTW(0, 5),
503      VTW(0, 6),
504      VTW(0, 7),
505      VTW(0, 8),
506      VTW(0, 9),
507      VTW(0, 10),
508      VTW(0, 11),
509      VTW(0, 12),
510      VTW(0, 13),
511      VTW(0, 14),
512      VTW(0, 15),
513      VTW(0, 16),
514      VTW(0, 17),
515      VTW(0, 18),
516      VTW(0, 19),
517      VTW(0, 20),
518      VTW(0, 21),
519      VTW(0, 22),
520      VTW(0, 23),
521      VTW(0, 24),
522      { TW_NEXT, VL, 0 }
523 };
524 
525 static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, { 67, 60, 181, 0 }, 0, 0, 0 };
526 
XSIMD(codelet_t2fv_25)527 void XSIMD(codelet_t2fv_25) (planner *p) {
528      X(kdft_dit_register) (p, t2fv_25, &desc);
529 }
530 #else
531 
532 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include dft/simd/t2f.h */
533 
534 /*
535  * This function contains 248 FP additions, 188 FP multiplications,
536  * (or, 170 additions, 110 multiplications, 78 fused multiply/add),
537  * 99 stack variables, 40 constants, and 50 memory accesses
538  */
539 #include "dft/simd/t2f.h"
540 
t2fv_25(R * ri,R * ii,const R * W,stride rs,INT mb,INT me,INT ms)541 static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
542 {
543      DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
544      DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
545      DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
546      DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
547      DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
548      DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
549      DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
550      DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
551      DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
552      DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
553      DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
554      DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
555      DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
556      DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
557      DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
558      DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
559      DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
560      DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
561      DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
562      DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
563      DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
564      DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
565      DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
566      DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
567      DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
568      DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
569      DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
570      DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
571      DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
572      DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
573      DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
574      DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
575      DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
576      DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
577      DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
578      DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
579      DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
580      DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
581      DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
582      DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
583      {
584 	  INT m;
585 	  R *x;
586 	  x = ri;
587 	  for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
588 	       V Tc, Tb, Td, Te, T1C, T2t, T1E, T1x, T2m, T1u, T3c, T2n, Ty, T2i, Tv;
589 	       V T38, T2j, TS, T2f, TP, T39, T2g, T1d, T2p, T1a, T3b, T2q;
590 	       {
591 		    V T7, T9, Ta, T2, T4, T5, T1D;
592 		    Tc = LD(&(x[0]), ms, &(x[0]));
593 		    {
594 			 V T6, T8, T1, T3;
595 			 T6 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
596 			 T7 = BYTWJ(&(W[TWVL * 18]), T6);
597 			 T8 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
598 			 T9 = BYTWJ(&(W[TWVL * 28]), T8);
599 			 Ta = VADD(T7, T9);
600 			 T1 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
601 			 T2 = BYTWJ(&(W[TWVL * 8]), T1);
602 			 T3 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
603 			 T4 = BYTWJ(&(W[TWVL * 38]), T3);
604 			 T5 = VADD(T2, T4);
605 		    }
606 		    Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta));
607 		    Td = VADD(T5, Ta);
608 		    Te = VFNMS(LDK(KP250000000), Td, Tc);
609 		    T1C = VSUB(T2, T4);
610 		    T1D = VSUB(T7, T9);
611 		    T2t = VMUL(LDK(KP951056516), T1D);
612 		    T1E = VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D));
613 	       }
614 	       {
615 		    V T1r, T1l, T1n, T1o, T1g, T1i, T1j, T1q;
616 		    T1q = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
617 		    T1r = BYTWJ(&(W[TWVL * 4]), T1q);
618 		    {
619 			 V T1k, T1m, T1f, T1h;
620 			 T1k = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
621 			 T1l = BYTWJ(&(W[TWVL * 24]), T1k);
622 			 T1m = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
623 			 T1n = BYTWJ(&(W[TWVL * 34]), T1m);
624 			 T1o = VADD(T1l, T1n);
625 			 T1f = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
626 			 T1g = BYTWJ(&(W[TWVL * 14]), T1f);
627 			 T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
628 			 T1i = BYTWJ(&(W[TWVL * 44]), T1h);
629 			 T1j = VADD(T1g, T1i);
630 		    }
631 		    {
632 			 V T1v, T1w, T1p, T1s, T1t;
633 			 T1v = VSUB(T1g, T1i);
634 			 T1w = VSUB(T1l, T1n);
635 			 T1x = VFMA(LDK(KP475528258), T1v, VMUL(LDK(KP293892626), T1w));
636 			 T2m = VFNMS(LDK(KP293892626), T1v, VMUL(LDK(KP475528258), T1w));
637 			 T1p = VMUL(LDK(KP559016994), VSUB(T1j, T1o));
638 			 T1s = VADD(T1j, T1o);
639 			 T1t = VFNMS(LDK(KP250000000), T1s, T1r);
640 			 T1u = VADD(T1p, T1t);
641 			 T3c = VADD(T1r, T1s);
642 			 T2n = VSUB(T1t, T1p);
643 		    }
644 	       }
645 	       {
646 		    V Ts, Tm, To, Tp, Th, Tj, Tk, Tr;
647 		    Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
648 		    Ts = BYTWJ(&(W[0]), Tr);
649 		    {
650 			 V Tl, Tn, Tg, Ti;
651 			 Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
652 			 Tm = BYTWJ(&(W[TWVL * 20]), Tl);
653 			 Tn = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
654 			 To = BYTWJ(&(W[TWVL * 30]), Tn);
655 			 Tp = VADD(Tm, To);
656 			 Tg = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
657 			 Th = BYTWJ(&(W[TWVL * 10]), Tg);
658 			 Ti = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
659 			 Tj = BYTWJ(&(W[TWVL * 40]), Ti);
660 			 Tk = VADD(Th, Tj);
661 		    }
662 		    {
663 			 V Tw, Tx, Tq, Tt, Tu;
664 			 Tw = VSUB(Th, Tj);
665 			 Tx = VSUB(Tm, To);
666 			 Ty = VFMA(LDK(KP475528258), Tw, VMUL(LDK(KP293892626), Tx));
667 			 T2i = VFNMS(LDK(KP293892626), Tw, VMUL(LDK(KP475528258), Tx));
668 			 Tq = VMUL(LDK(KP559016994), VSUB(Tk, Tp));
669 			 Tt = VADD(Tk, Tp);
670 			 Tu = VFNMS(LDK(KP250000000), Tt, Ts);
671 			 Tv = VADD(Tq, Tu);
672 			 T38 = VADD(Ts, Tt);
673 			 T2j = VSUB(Tu, Tq);
674 		    }
675 	       }
676 	       {
677 		    V TM, TG, TI, TJ, TB, TD, TE, TL;
678 		    TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
679 		    TM = BYTWJ(&(W[TWVL * 6]), TL);
680 		    {
681 			 V TF, TH, TA, TC;
682 			 TF = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
683 			 TG = BYTWJ(&(W[TWVL * 26]), TF);
684 			 TH = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
685 			 TI = BYTWJ(&(W[TWVL * 36]), TH);
686 			 TJ = VADD(TG, TI);
687 			 TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
688 			 TB = BYTWJ(&(W[TWVL * 16]), TA);
689 			 TC = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
690 			 TD = BYTWJ(&(W[TWVL * 46]), TC);
691 			 TE = VADD(TB, TD);
692 		    }
693 		    {
694 			 V TQ, TR, TK, TN, TO;
695 			 TQ = VSUB(TB, TD);
696 			 TR = VSUB(TG, TI);
697 			 TS = VFMA(LDK(KP475528258), TQ, VMUL(LDK(KP293892626), TR));
698 			 T2f = VFNMS(LDK(KP293892626), TQ, VMUL(LDK(KP475528258), TR));
699 			 TK = VMUL(LDK(KP559016994), VSUB(TE, TJ));
700 			 TN = VADD(TE, TJ);
701 			 TO = VFNMS(LDK(KP250000000), TN, TM);
702 			 TP = VADD(TK, TO);
703 			 T39 = VADD(TM, TN);
704 			 T2g = VSUB(TO, TK);
705 		    }
706 	       }
707 	       {
708 		    V T17, T11, T13, T14, TW, TY, TZ, T16;
709 		    T16 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
710 		    T17 = BYTWJ(&(W[TWVL * 2]), T16);
711 		    {
712 			 V T10, T12, TV, TX;
713 			 T10 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
714 			 T11 = BYTWJ(&(W[TWVL * 22]), T10);
715 			 T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
716 			 T13 = BYTWJ(&(W[TWVL * 32]), T12);
717 			 T14 = VADD(T11, T13);
718 			 TV = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
719 			 TW = BYTWJ(&(W[TWVL * 12]), TV);
720 			 TX = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
721 			 TY = BYTWJ(&(W[TWVL * 42]), TX);
722 			 TZ = VADD(TW, TY);
723 		    }
724 		    {
725 			 V T1b, T1c, T15, T18, T19;
726 			 T1b = VSUB(TW, TY);
727 			 T1c = VSUB(T11, T13);
728 			 T1d = VFMA(LDK(KP475528258), T1b, VMUL(LDK(KP293892626), T1c));
729 			 T2p = VFNMS(LDK(KP293892626), T1b, VMUL(LDK(KP475528258), T1c));
730 			 T15 = VMUL(LDK(KP559016994), VSUB(TZ, T14));
731 			 T18 = VADD(TZ, T14);
732 			 T19 = VFNMS(LDK(KP250000000), T18, T17);
733 			 T1a = VADD(T15, T19);
734 			 T3b = VADD(T17, T18);
735 			 T2q = VSUB(T19, T15);
736 		    }
737 	       }
738 	       {
739 		    V T3l, T3m, T3f, T3g, T3e, T3h, T3n, T3i;
740 		    {
741 			 V T3j, T3k, T3a, T3d;
742 			 T3j = VSUB(T38, T39);
743 			 T3k = VSUB(T3b, T3c);
744 			 T3l = VBYI(VFMA(LDK(KP951056516), T3j, VMUL(LDK(KP587785252), T3k)));
745 			 T3m = VBYI(VFNMS(LDK(KP587785252), T3j, VMUL(LDK(KP951056516), T3k)));
746 			 T3f = VADD(Tc, Td);
747 			 T3a = VADD(T38, T39);
748 			 T3d = VADD(T3b, T3c);
749 			 T3g = VADD(T3a, T3d);
750 			 T3e = VMUL(LDK(KP559016994), VSUB(T3a, T3d));
751 			 T3h = VFNMS(LDK(KP250000000), T3g, T3f);
752 		    }
753 		    ST(&(x[0]), VADD(T3f, T3g), ms, &(x[0]));
754 		    T3n = VSUB(T3h, T3e);
755 		    ST(&(x[WS(rs, 10)]), VADD(T3m, T3n), ms, &(x[0]));
756 		    ST(&(x[WS(rs, 15)]), VSUB(T3n, T3m), ms, &(x[WS(rs, 1)]));
757 		    T3i = VADD(T3e, T3h);
758 		    ST(&(x[WS(rs, 5)]), VSUB(T3i, T3l), ms, &(x[WS(rs, 1)]));
759 		    ST(&(x[WS(rs, 20)]), VADD(T3l, T3i), ms, &(x[0]));
760 	       }
761 	       {
762 		    V Tf, T1Z, T20, T21, T29, T2a, T2b, T26, T27, T28, T22, T23, T24, T1L, T1U;
763 		    V T1Q, T1S, T1A, T1V, T1N, T1O, T2d, T2e;
764 		    Tf = VADD(Tb, Te);
765 		    T1Z = VFMA(LDK(KP1_688655851), Ty, VMUL(LDK(KP535826794), Tv));
766 		    T20 = VFMA(LDK(KP1_541026485), TS, VMUL(LDK(KP637423989), TP));
767 		    T21 = VSUB(T1Z, T20);
768 		    T29 = VFMA(LDK(KP851558583), T1d, VMUL(LDK(KP904827052), T1a));
769 		    T2a = VFMA(LDK(KP1_984229402), T1x, VMUL(LDK(KP125333233), T1u));
770 		    T2b = VADD(T29, T2a);
771 		    T26 = VFNMS(LDK(KP844327925), Tv, VMUL(LDK(KP1_071653589), Ty));
772 		    T27 = VFNMS(LDK(KP1_274847979), TS, VMUL(LDK(KP770513242), TP));
773 		    T28 = VADD(T26, T27);
774 		    T22 = VFNMS(LDK(KP425779291), T1a, VMUL(LDK(KP1_809654104), T1d));
775 		    T23 = VFNMS(LDK(KP992114701), T1u, VMUL(LDK(KP250666467), T1x));
776 		    T24 = VADD(T22, T23);
777 		    {
778 			 V T1F, T1G, T1H, T1I, T1J, T1K;
779 			 T1F = VFMA(LDK(KP1_937166322), Ty, VMUL(LDK(KP248689887), Tv));
780 			 T1G = VFMA(LDK(KP1_071653589), TS, VMUL(LDK(KP844327925), TP));
781 			 T1H = VADD(T1F, T1G);
782 			 T1I = VFMA(LDK(KP1_752613360), T1d, VMUL(LDK(KP481753674), T1a));
783 			 T1J = VFMA(LDK(KP1_457937254), T1x, VMUL(LDK(KP684547105), T1u));
784 			 T1K = VADD(T1I, T1J);
785 			 T1L = VADD(T1H, T1K);
786 			 T1U = VSUB(T1J, T1I);
787 			 T1Q = VMUL(LDK(KP559016994), VSUB(T1K, T1H));
788 			 T1S = VSUB(T1G, T1F);
789 		    }
790 		    {
791 			 V Tz, TT, TU, T1e, T1y, T1z;
792 			 Tz = VFNMS(LDK(KP497379774), Ty, VMUL(LDK(KP968583161), Tv));
793 			 TT = VFNMS(LDK(KP1_688655851), TS, VMUL(LDK(KP535826794), TP));
794 			 TU = VADD(Tz, TT);
795 			 T1e = VFNMS(LDK(KP963507348), T1d, VMUL(LDK(KP876306680), T1a));
796 			 T1y = VFNMS(LDK(KP1_369094211), T1x, VMUL(LDK(KP728968627), T1u));
797 			 T1z = VADD(T1e, T1y);
798 			 T1A = VADD(TU, T1z);
799 			 T1V = VMUL(LDK(KP559016994), VSUB(TU, T1z));
800 			 T1N = VSUB(TT, Tz);
801 			 T1O = VSUB(T1e, T1y);
802 		    }
803 		    {
804 			 V T1B, T1M, T25, T2c;
805 			 T1B = VADD(Tf, T1A);
806 			 T1M = VBYI(VADD(T1E, T1L));
807 			 ST(&(x[WS(rs, 1)]), VSUB(T1B, T1M), ms, &(x[WS(rs, 1)]));
808 			 ST(&(x[WS(rs, 24)]), VADD(T1B, T1M), ms, &(x[0]));
809 			 T25 = VADD(Tf, VADD(T21, T24));
810 			 T2c = VBYI(VADD(T1E, VSUB(T28, T2b)));
811 			 ST(&(x[WS(rs, 21)]), VSUB(T25, T2c), ms, &(x[WS(rs, 1)]));
812 			 ST(&(x[WS(rs, 4)]), VADD(T25, T2c), ms, &(x[0]));
813 		    }
814 		    T2d = VBYI(VADD(T1E, VFMA(LDK(KP309016994), T28, VFMA(LDK(KP587785252), VSUB(T23, T22), VFNMS(LDK(KP951056516), VADD(T1Z, T20), VMUL(LDK(KP809016994), T2b))))));
815 		    T2e = VFMA(LDK(KP309016994), T21, VFMA(LDK(KP951056516), VSUB(T26, T27), VFMA(LDK(KP587785252), VSUB(T2a, T29), VFNMS(LDK(KP809016994), T24, Tf))));
816 		    ST(&(x[WS(rs, 9)]), VADD(T2d, T2e), ms, &(x[WS(rs, 1)]));
817 		    ST(&(x[WS(rs, 16)]), VSUB(T2e, T2d), ms, &(x[0]));
818 		    {
819 			 V T1R, T1X, T1W, T1Y, T1P, T1T;
820 			 T1P = VFMS(LDK(KP250000000), T1L, T1E);
821 			 T1R = VBYI(VADD(VFMA(LDK(KP587785252), T1N, VMUL(LDK(KP951056516), T1O)), VSUB(T1P, T1Q)));
822 			 T1X = VBYI(VADD(VFNMS(LDK(KP587785252), T1O, VMUL(LDK(KP951056516), T1N)), VADD(T1P, T1Q)));
823 			 T1T = VFNMS(LDK(KP250000000), T1A, Tf);
824 			 T1W = VFMA(LDK(KP587785252), T1S, VFNMS(LDK(KP951056516), T1U, VSUB(T1T, T1V)));
825 			 T1Y = VFMA(LDK(KP951056516), T1S, VADD(T1V, VFMA(LDK(KP587785252), T1U, T1T)));
826 			 ST(&(x[WS(rs, 11)]), VADD(T1R, T1W), ms, &(x[WS(rs, 1)]));
827 			 ST(&(x[WS(rs, 19)]), VSUB(T1Y, T1X), ms, &(x[WS(rs, 1)]));
828 			 ST(&(x[WS(rs, 14)]), VSUB(T1W, T1R), ms, &(x[0]));
829 			 ST(&(x[WS(rs, 6)]), VADD(T1X, T1Y), ms, &(x[0]));
830 		    }
831 	       }
832 	       {
833 		    V T2u, T2w, T2h, T2k, T2l, T2A, T2B, T2C, T2o, T2r, T2s, T2x, T2y, T2z, T2M;
834 		    V T2X, T2N, T2W, T2R, T31, T2U, T30, T2E, T2F;
835 		    T2u = VFNMS(LDK(KP587785252), T1C, T2t);
836 		    T2w = VSUB(Te, Tb);
837 		    T2h = VFNMS(LDK(KP125333233), T2g, VMUL(LDK(KP1_984229402), T2f));
838 		    T2k = VFMA(LDK(KP1_457937254), T2i, VMUL(LDK(KP684547105), T2j));
839 		    T2l = VSUB(T2h, T2k);
840 		    T2A = VFNMS(LDK(KP1_996053456), T2p, VMUL(LDK(KP062790519), T2q));
841 		    T2B = VFMA(LDK(KP1_541026485), T2m, VMUL(LDK(KP637423989), T2n));
842 		    T2C = VSUB(T2A, T2B);
843 		    T2o = VFNMS(LDK(KP770513242), T2n, VMUL(LDK(KP1_274847979), T2m));
844 		    T2r = VFMA(LDK(KP125581039), T2p, VMUL(LDK(KP998026728), T2q));
845 		    T2s = VSUB(T2o, T2r);
846 		    T2x = VFNMS(LDK(KP1_369094211), T2i, VMUL(LDK(KP728968627), T2j));
847 		    T2y = VFMA(LDK(KP250666467), T2f, VMUL(LDK(KP992114701), T2g));
848 		    T2z = VSUB(T2x, T2y);
849 		    {
850 			 V T2G, T2H, T2I, T2J, T2K, T2L;
851 			 T2G = VFNMS(LDK(KP481753674), T2j, VMUL(LDK(KP1_752613360), T2i));
852 			 T2H = VFMA(LDK(KP851558583), T2f, VMUL(LDK(KP904827052), T2g));
853 			 T2I = VSUB(T2G, T2H);
854 			 T2J = VFNMS(LDK(KP844327925), T2q, VMUL(LDK(KP1_071653589), T2p));
855 			 T2K = VFNMS(LDK(KP998026728), T2n, VMUL(LDK(KP125581039), T2m));
856 			 T2L = VADD(T2J, T2K);
857 			 T2M = VMUL(LDK(KP559016994), VSUB(T2I, T2L));
858 			 T2X = VSUB(T2J, T2K);
859 			 T2N = VADD(T2I, T2L);
860 			 T2W = VADD(T2G, T2H);
861 		    }
862 		    {
863 			 V T2P, T2Q, T2Y, T2S, T2T, T2Z;
864 			 T2P = VFNMS(LDK(KP425779291), T2g, VMUL(LDK(KP1_809654104), T2f));
865 			 T2Q = VFMA(LDK(KP963507348), T2i, VMUL(LDK(KP876306680), T2j));
866 			 T2Y = VADD(T2Q, T2P);
867 			 T2S = VFMA(LDK(KP1_688655851), T2p, VMUL(LDK(KP535826794), T2q));
868 			 T2T = VFMA(LDK(KP1_996053456), T2m, VMUL(LDK(KP062790519), T2n));
869 			 T2Z = VADD(T2S, T2T);
870 			 T2R = VSUB(T2P, T2Q);
871 			 T31 = VADD(T2Y, T2Z);
872 			 T2U = VSUB(T2S, T2T);
873 			 T30 = VMUL(LDK(KP559016994), VSUB(T2Y, T2Z));
874 		    }
875 		    {
876 			 V T36, T37, T2v, T2D;
877 			 T36 = VBYI(VADD(T2u, T2N));
878 			 T37 = VADD(T2w, T31);
879 			 ST(&(x[WS(rs, 2)]), VADD(T36, T37), ms, &(x[0]));
880 			 ST(&(x[WS(rs, 23)]), VSUB(T37, T36), ms, &(x[WS(rs, 1)]));
881 			 T2v = VBYI(VSUB(VADD(T2l, T2s), T2u));
882 			 T2D = VADD(T2w, VADD(T2z, T2C));
883 			 ST(&(x[WS(rs, 3)]), VADD(T2v, T2D), ms, &(x[WS(rs, 1)]));
884 			 ST(&(x[WS(rs, 22)]), VSUB(T2D, T2v), ms, &(x[0]));
885 		    }
886 		    T2E = VFMA(LDK(KP309016994), T2z, VFNMS(LDK(KP809016994), T2C, VFNMS(LDK(KP587785252), VADD(T2r, T2o), VFNMS(LDK(KP951056516), VADD(T2k, T2h), T2w))));
887 		    T2F = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2A, T2B), VFNMS(LDK(KP809016994), T2s, VFNMS(LDK(KP951056516), VADD(T2x, T2y), VMUL(LDK(KP309016994), T2l)))), T2u));
888 		    ST(&(x[WS(rs, 17)]), VSUB(T2E, T2F), ms, &(x[WS(rs, 1)]));
889 		    ST(&(x[WS(rs, 8)]), VADD(T2E, T2F), ms, &(x[0]));
890 		    {
891 			 V T2V, T34, T33, T35, T2O, T32;
892 			 T2O = VFNMS(LDK(KP250000000), T2N, T2u);
893 			 T2V = VBYI(VADD(T2M, VADD(T2O, VFNMS(LDK(KP587785252), T2U, VMUL(LDK(KP951056516), T2R)))));
894 			 T34 = VBYI(VADD(T2O, VSUB(VFMA(LDK(KP587785252), T2R, VMUL(LDK(KP951056516), T2U)), T2M)));
895 			 T32 = VFNMS(LDK(KP250000000), T31, T2w);
896 			 T33 = VFMA(LDK(KP951056516), T2W, VFMA(LDK(KP587785252), T2X, VADD(T30, T32)));
897 			 T35 = VFMA(LDK(KP587785252), T2W, VSUB(VFNMS(LDK(KP951056516), T2X, T32), T30));
898 			 ST(&(x[WS(rs, 7)]), VADD(T2V, T33), ms, &(x[WS(rs, 1)]));
899 			 ST(&(x[WS(rs, 13)]), VSUB(T35, T34), ms, &(x[WS(rs, 1)]));
900 			 ST(&(x[WS(rs, 18)]), VSUB(T33, T2V), ms, &(x[0]));
901 			 ST(&(x[WS(rs, 12)]), VADD(T34, T35), ms, &(x[0]));
902 		    }
903 	       }
904 	  }
905      }
906      VLEAVE();
907 }
908 
909 static const tw_instr twinstr[] = {
910      VTW(0, 1),
911      VTW(0, 2),
912      VTW(0, 3),
913      VTW(0, 4),
914      VTW(0, 5),
915      VTW(0, 6),
916      VTW(0, 7),
917      VTW(0, 8),
918      VTW(0, 9),
919      VTW(0, 10),
920      VTW(0, 11),
921      VTW(0, 12),
922      VTW(0, 13),
923      VTW(0, 14),
924      VTW(0, 15),
925      VTW(0, 16),
926      VTW(0, 17),
927      VTW(0, 18),
928      VTW(0, 19),
929      VTW(0, 20),
930      VTW(0, 21),
931      VTW(0, 22),
932      VTW(0, 23),
933      VTW(0, 24),
934      { TW_NEXT, VL, 0 }
935 };
936 
937 static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, { 170, 110, 78, 0 }, 0, 0, 0 };
938 
XSIMD(codelet_t2fv_25)939 void XSIMD(codelet_t2fv_25) (planner *p) {
940      X(kdft_dit_register) (p, t2fv_25, &desc);
941 }
942 #endif
943