1 /*
2  * Copyright (c) 2003, 2007-14 Matteo Frigo
3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  */
20 
21 /* This file was automatically generated --- DO NOT EDIT */
22 /* Generated on Thu Dec 10 07:05:31 EST 2020 */
23 
24 #include "dft/codelet-dft.h"
25 
26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27 
28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1bv_25 -include dft/simd/t1b.h -sign 1 */
29 
30 /*
31  * This function contains 248 FP additions, 241 FP multiplications,
32  * (or, 67 additions, 60 multiplications, 181 fused multiply/add),
33  * 147 stack variables, 67 constants, and 50 memory accesses
34  */
35 #include "dft/simd/t1b.h"
36 
t1bv_25(R * ri,R * ii,const R * W,stride rs,INT mb,INT me,INT ms)37 static void t1bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38 {
39      DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
40      DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
41      DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
42      DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
43      DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
44      DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
45      DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
46      DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
47      DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
48      DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
49      DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
50      DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
51      DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
52      DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
53      DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
54      DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
55      DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
56      DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
57      DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
58      DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
59      DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
60      DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
61      DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
62      DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
63      DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
64      DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
65      DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
66      DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
67      DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
68      DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
69      DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
70      DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
71      DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
72      DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
73      DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
74      DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
75      DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
76      DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
77      DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
78      DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
79      DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
80      DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
81      DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
82      DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
83      DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
84      DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
85      DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
86      DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
87      DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
88      DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
89      DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
90      DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
91      DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
92      DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
93      DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
94      DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
95      DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
96      DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
97      DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
98      DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
99      DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
100      DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
101      DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
102      DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
103      DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
104      DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
105      DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
106      {
107 	  INT m;
108 	  R *x;
109 	  x = ii;
110 	  for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
111 	       V T1, Te, Tc, Td, T1O, T2X, T3Q, T1x, T2K, T1u, T2L, T1y, T27, T3b, T2R;
112 	       V T2M, T2f, T3M, Ty, T2E, Tv, T2D, Tz, T2a, T3e, T2U, T2F, T2i, T3N, TK;
113 	       V T2B, TS, T2A, TT, T2b, T3f, T2T, T2C, T2j, T3P, T1d, T2H, T1a, T2I, T1e;
114 	       V T28, T3c, T2Q, T2J, T2g;
115 	       {
116 		    V T8, Ta, Tb, T3, T5, T6, T1M, T1N;
117 		    T1 = LD(&(x[0]), ms, &(x[0]));
118 		    {
119 			 V T7, T9, T2, T4;
120 			 T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
121 			 T8 = BYTW(&(W[TWVL * 18]), T7);
122 			 T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
123 			 Ta = BYTW(&(W[TWVL * 28]), T9);
124 			 Tb = VADD(T8, Ta);
125 			 T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
126 			 T3 = BYTW(&(W[TWVL * 8]), T2);
127 			 T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
128 			 T5 = BYTW(&(W[TWVL * 38]), T4);
129 			 T6 = VADD(T3, T5);
130 		    }
131 		    Te = VSUB(T6, Tb);
132 		    Tc = VADD(T6, Tb);
133 		    Td = VFNMS(LDK(KP250000000), Tc, T1);
134 		    T1M = VSUB(T3, T5);
135 		    T1N = VSUB(T8, Ta);
136 		    T1O = VFMA(LDK(KP618033988), T1N, T1M);
137 		    T2X = VFNMS(LDK(KP618033988), T1M, T1N);
138 	       }
139 	       {
140 		    V T1g, T1v, T1w, T1l, T1q, T1r, T1f, T1s, T1t;
141 		    T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
142 		    T1g = BYTW(&(W[TWVL * 4]), T1f);
143 		    {
144 			 V T1i, T1p, T1k, T1n;
145 			 {
146 			      V T1h, T1o, T1j, T1m;
147 			      T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
148 			      T1i = BYTW(&(W[TWVL * 14]), T1h);
149 			      T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
150 			      T1p = BYTW(&(W[TWVL * 34]), T1o);
151 			      T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
152 			      T1k = BYTW(&(W[TWVL * 44]), T1j);
153 			      T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
154 			      T1n = BYTW(&(W[TWVL * 24]), T1m);
155 			 }
156 			 T1v = VSUB(T1i, T1k);
157 			 T1w = VSUB(T1n, T1p);
158 			 T1l = VADD(T1i, T1k);
159 			 T1q = VADD(T1n, T1p);
160 			 T1r = VADD(T1l, T1q);
161 		    }
162 		    T3Q = VADD(T1g, T1r);
163 		    T1x = VFMA(LDK(KP618033988), T1w, T1v);
164 		    T2K = VFNMS(LDK(KP618033988), T1v, T1w);
165 		    T1s = VFNMS(LDK(KP250000000), T1r, T1g);
166 		    T1t = VSUB(T1q, T1l);
167 		    T1u = VFNMS(LDK(KP559016994), T1t, T1s);
168 		    T2L = VFMA(LDK(KP559016994), T1t, T1s);
169 		    T1y = VFNMS(LDK(KP893101515), T1x, T1u);
170 		    T27 = VFNMS(LDK(KP120146378), T1x, T1u);
171 		    T3b = VFMA(LDK(KP066152395), T2L, T2K);
172 		    T2R = VFNMS(LDK(KP786782374), T2K, T2L);
173 		    T2M = VFMA(LDK(KP869845200), T2L, T2K);
174 		    T2f = VFMA(LDK(KP132830569), T1u, T1x);
175 	       }
176 	       {
177 		    V Th, Tw, Tx, Tm, Tr, Ts, Tg, Tt, Tu;
178 		    Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
179 		    Th = BYTW(&(W[0]), Tg);
180 		    {
181 			 V Tj, Tq, Tl, To;
182 			 {
183 			      V Ti, Tp, Tk, Tn;
184 			      Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
185 			      Tj = BYTW(&(W[TWVL * 10]), Ti);
186 			      Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
187 			      Tq = BYTW(&(W[TWVL * 30]), Tp);
188 			      Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
189 			      Tl = BYTW(&(W[TWVL * 40]), Tk);
190 			      Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
191 			      To = BYTW(&(W[TWVL * 20]), Tn);
192 			 }
193 			 Tw = VSUB(Tj, Tl);
194 			 Tx = VSUB(Tq, To);
195 			 Tm = VADD(Tj, Tl);
196 			 Tr = VADD(To, Tq);
197 			 Ts = VADD(Tm, Tr);
198 		    }
199 		    T3M = VADD(Th, Ts);
200 		    Ty = VFNMS(LDK(KP618033988), Tx, Tw);
201 		    T2E = VFMA(LDK(KP618033988), Tw, Tx);
202 		    Tt = VFNMS(LDK(KP250000000), Ts, Th);
203 		    Tu = VSUB(Tm, Tr);
204 		    Tv = VFMA(LDK(KP559016994), Tu, Tt);
205 		    T2D = VFNMS(LDK(KP559016994), Tu, Tt);
206 		    Tz = VFNMS(LDK(KP244189809), Ty, Tv);
207 		    T2a = VFMA(LDK(KP667278218), Tv, Ty);
208 		    T3e = VFNMS(LDK(KP522847744), T2E, T2D);
209 		    T2U = VFNMS(LDK(KP987388751), T2D, T2E);
210 		    T2F = VFMA(LDK(KP893101515), T2E, T2D);
211 		    T2i = VFNMS(LDK(KP603558818), Ty, Tv);
212 	       }
213 	       {
214 		    V TM, TE, TJ, TN, TO, TP, TL, TQ, TR;
215 		    TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
216 		    TM = BYTW(&(W[TWVL * 6]), TL);
217 		    {
218 			 V TB, TI, TD, TG;
219 			 {
220 			      V TA, TH, TC, TF;
221 			      TA = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
222 			      TB = BYTW(&(W[TWVL * 46]), TA);
223 			      TH = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
224 			      TI = BYTW(&(W[TWVL * 26]), TH);
225 			      TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
226 			      TD = BYTW(&(W[TWVL * 16]), TC);
227 			      TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
228 			      TG = BYTW(&(W[TWVL * 36]), TF);
229 			 }
230 			 TE = VSUB(TB, TD);
231 			 TJ = VSUB(TG, TI);
232 			 TN = VADD(TD, TB);
233 			 TO = VADD(TI, TG);
234 			 TP = VADD(TN, TO);
235 		    }
236 		    T3N = VADD(TM, TP);
237 		    TK = VFMA(LDK(KP618033988), TJ, TE);
238 		    T2B = VFNMS(LDK(KP618033988), TE, TJ);
239 		    TQ = VFMS(LDK(KP250000000), TP, TM);
240 		    TR = VSUB(TN, TO);
241 		    TS = VFNMS(LDK(KP559016994), TR, TQ);
242 		    T2A = VFMA(LDK(KP559016994), TR, TQ);
243 		    TT = VFNMS(LDK(KP667278218), TS, TK);
244 		    T2b = VFMA(LDK(KP869845200), TS, TK);
245 		    T3f = VFNMS(LDK(KP494780565), T2A, T2B);
246 		    T2T = VFNMS(LDK(KP132830569), T2A, T2B);
247 		    T2C = VFMA(LDK(KP120146378), T2B, T2A);
248 		    T2j = VFNMS(LDK(KP786782374), TK, TS);
249 	       }
250 	       {
251 		    V TW, T1b, T1c, T11, T16, T17, TV, T18, T19;
252 		    TV = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
253 		    TW = BYTW(&(W[TWVL * 2]), TV);
254 		    {
255 			 V TY, T15, T10, T13;
256 			 {
257 			      V TX, T14, TZ, T12;
258 			      TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
259 			      TY = BYTW(&(W[TWVL * 12]), TX);
260 			      T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
261 			      T15 = BYTW(&(W[TWVL * 32]), T14);
262 			      TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
263 			      T10 = BYTW(&(W[TWVL * 42]), TZ);
264 			      T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
265 			      T13 = BYTW(&(W[TWVL * 22]), T12);
266 			 }
267 			 T1b = VSUB(TY, T10);
268 			 T1c = VSUB(T15, T13);
269 			 T11 = VADD(TY, T10);
270 			 T16 = VADD(T13, T15);
271 			 T17 = VADD(T11, T16);
272 		    }
273 		    T3P = VADD(TW, T17);
274 		    T1d = VFNMS(LDK(KP618033988), T1c, T1b);
275 		    T2H = VFMA(LDK(KP618033988), T1b, T1c);
276 		    T18 = VFNMS(LDK(KP250000000), T17, TW);
277 		    T19 = VSUB(T16, T11);
278 		    T1a = VFNMS(LDK(KP559016994), T19, T18);
279 		    T2I = VFMA(LDK(KP559016994), T19, T18);
280 		    T1e = VFNMS(LDK(KP522847744), T1d, T1a);
281 		    T28 = VFNMS(LDK(KP494780565), T1a, T1d);
282 		    T3c = VFNMS(LDK(KP667278218), T2I, T2H);
283 		    T2Q = VFNMS(LDK(KP059835404), T2H, T2I);
284 		    T2J = VFMA(LDK(KP066152395), T2I, T2H);
285 		    T2g = VFMA(LDK(KP447533225), T1d, T1a);
286 	       }
287 	       {
288 		    V T3Y, T40, T3L, T3S, T3T, T3U, T3Z, T3V;
289 		    {
290 			 V T3W, T3X, T3O, T3R;
291 			 T3W = VSUB(T3M, T3N);
292 			 T3X = VSUB(T3P, T3Q);
293 			 T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W));
294 			 T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X));
295 			 T3L = VADD(T1, Tc);
296 			 T3O = VADD(T3M, T3N);
297 			 T3R = VADD(T3P, T3Q);
298 			 T3S = VADD(T3O, T3R);
299 			 T3T = VFNMS(LDK(KP250000000), T3S, T3L);
300 			 T3U = VSUB(T3O, T3R);
301 		    }
302 		    ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0]));
303 		    T3Z = VFNMS(LDK(KP559016994), T3U, T3T);
304 		    ST(&(x[WS(rs, 10)]), VFNMSI(T40, T3Z), ms, &(x[0]));
305 		    ST(&(x[WS(rs, 15)]), VFMAI(T40, T3Z), ms, &(x[WS(rs, 1)]));
306 		    T3V = VFMA(LDK(KP559016994), T3U, T3T);
307 		    ST(&(x[WS(rs, 5)]), VFMAI(T3Y, T3V), ms, &(x[WS(rs, 1)]));
308 		    ST(&(x[WS(rs, 20)]), VFNMSI(T3Y, T3V), ms, &(x[0]));
309 	       }
310 	       {
311 		    V T2Z, T35, T3B, T3I, T2W, T38, T2O, T32, T2z, T3t, T3h, T3s, T3p, T3F, T3r;
312 		    V T3v, T3C, T3z, T3A;
313 		    T2Z = VFMA(LDK(KP734762448), T2U, T2T);
314 		    T35 = VFNMS(LDK(KP734762448), T2F, T2C);
315 		    T3z = VFMA(LDK(KP845997307), T3c, T3b);
316 		    T3A = VFMA(LDK(KP982009705), T3f, T3e);
317 		    T3B = VFMA(LDK(KP570584518), T3A, T3z);
318 		    T3I = VFNMS(LDK(KP669429328), T3z, T3A);
319 		    {
320 			 V T2S, T2V, T37, T36;
321 			 T2S = VFMA(LDK(KP772036680), T2R, T2Q);
322 			 T2V = VFNMS(LDK(KP734762448), T2U, T2T);
323 			 T36 = VFMA(LDK(KP772036680), T2M, T2J);
324 			 T37 = VFMA(LDK(KP522616830), T2V, T36);
325 			 T2W = VFMA(LDK(KP945422727), T2V, T2S);
326 			 T38 = VFNMS(LDK(KP690983005), T37, T2S);
327 		    }
328 		    {
329 			 V T2N, T2G, T31, T30;
330 			 T2N = VFNMS(LDK(KP772036680), T2M, T2J);
331 			 T2G = VFMA(LDK(KP734762448), T2F, T2C);
332 			 T30 = VFNMS(LDK(KP772036680), T2R, T2Q);
333 			 T31 = VFNMS(LDK(KP522616830), T2G, T30);
334 			 T2O = VFMA(LDK(KP956723877), T2N, T2G);
335 			 T32 = VFMA(LDK(KP763932022), T31, T2N);
336 		    }
337 		    {
338 			 V T3o, T3u, T3l, T3m, T3n;
339 			 T2z = VFNMS(LDK(KP559016994), Te, Td);
340 			 T3m = VFMA(LDK(KP447533225), T2B, T2A);
341 			 T3n = VFMA(LDK(KP578046249), T2D, T2E);
342 			 T3o = VFNMS(LDK(KP921078979), T3n, T3m);
343 			 T3t = VFMA(LDK(KP921078979), T3n, T3m);
344 			 {
345 			      V T3d, T3g, T3j, T3k;
346 			      T3d = VFNMS(LDK(KP845997307), T3c, T3b);
347 			      T3g = VFNMS(LDK(KP982009705), T3f, T3e);
348 			      T3h = VFMA(LDK(KP923225144), T3g, T3d);
349 			      T3u = VFNMS(LDK(KP923225144), T3g, T3d);
350 			      T3j = VFNMS(LDK(KP059835404), T2K, T2L);
351 			      T3k = VFMA(LDK(KP603558818), T2H, T2I);
352 			      T3l = VFMA(LDK(KP845997307), T3k, T3j);
353 			      T3s = VFNMS(LDK(KP845997307), T3k, T3j);
354 			 }
355 			 T3p = VFNMS(LDK(KP906616052), T3o, T3l);
356 			 T3F = VFNMS(LDK(KP904508497), T3u, T3s);
357 			 T3r = VFNMS(LDK(KP237294955), T3h, T2z);
358 			 T3v = VFNMS(LDK(KP997675361), T3u, T3t);
359 			 T3C = VFMA(LDK(KP906616052), T3o, T3l);
360 		    }
361 		    {
362 			 V T2P, T2Y, T3i, T3q;
363 			 T2P = VFMA(LDK(KP992114701), T2O, T2z);
364 			 T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W));
365 			 ST(&(x[WS(rs, 22)]), VFNMSI(T2Y, T2P), ms, &(x[0]));
366 			 ST(&(x[WS(rs, 3)]), VFMAI(T2Y, T2P), ms, &(x[WS(rs, 1)]));
367 			 T3i = VFMA(LDK(KP949179823), T3h, T2z);
368 			 T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p));
369 			 ST(&(x[WS(rs, 23)]), VFNMSI(T3q, T3i), ms, &(x[WS(rs, 1)]));
370 			 ST(&(x[WS(rs, 2)]), VFMAI(T3q, T3i), ms, &(x[0]));
371 		    }
372 		    {
373 			 V T34, T3a, T33, T39;
374 			 T33 = VFNMS(LDK(KP855719849), T32, T2Z);
375 			 T34 = VFMA(LDK(KP897376177), T33, T2z);
376 			 T39 = VFMA(LDK(KP855719849), T38, T35);
377 			 T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X));
378 			 ST(&(x[WS(rs, 8)]), VFMAI(T3a, T34), ms, &(x[0]));
379 			 ST(&(x[WS(rs, 17)]), VFNMSI(T3a, T34), ms, &(x[WS(rs, 1)]));
380 		    }
381 		    {
382 			 V T3x, T3H, T3E, T3K, T3w;
383 			 T3w = VFMA(LDK(KP560319534), T3v, T3s);
384 			 T3x = VFNMS(LDK(KP949179823), T3w, T3r);
385 			 {
386 			      V T3G, T3y, T3J, T3D;
387 			      T3G = VFNMS(LDK(KP681693190), T3F, T3t);
388 			      T3H = VFNMS(LDK(KP860541664), T3G, T3r);
389 			      T3y = VFMA(LDK(KP262346850), T3p, T2X);
390 			      T3J = VFNMS(LDK(KP669429328), T3C, T3I);
391 			      T3D = VFMA(LDK(KP618033988), T3C, T3B);
392 			      T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y));
393 			      T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y));
394 			 }
395 			 ST(&(x[WS(rs, 12)]), VFNMSI(T3E, T3x), ms, &(x[0]));
396 			 ST(&(x[WS(rs, 18)]), VFMAI(T3K, T3H), ms, &(x[0]));
397 			 ST(&(x[WS(rs, 13)]), VFMAI(T3E, T3x), ms, &(x[WS(rs, 1)]));
398 			 ST(&(x[WS(rs, 7)]), VFNMSI(T3K, T3H), ms, &(x[WS(rs, 1)]));
399 		    }
400 	       }
401 	       {
402 		    V T2n, T2t, T1V, T22, T2l, T2w, T2d, T2q, Tf, T1I, T1A, T1E, T1B, T1Z, T1J;
403 		    V T1R, T1W, T1T, T1U;
404 		    T2n = VFNMS(LDK(KP912575812), T2j, T2i);
405 		    T2t = VFNMS(LDK(KP912575812), T2b, T2a);
406 		    T1T = VFNMS(LDK(KP829049696), TT, Tz);
407 		    T1U = VFNMS(LDK(KP831864738), T1y, T1e);
408 		    T1V = VFMA(LDK(KP559154169), T1U, T1T);
409 		    T22 = VFNMS(LDK(KP683113946), T1T, T1U);
410 		    {
411 			 V T2h, T2k, T2v, T2u;
412 			 T2h = VFMA(LDK(KP958953096), T2g, T2f);
413 			 T2k = VFMA(LDK(KP912575812), T2j, T2i);
414 			 T2u = VFMA(LDK(KP867381224), T28, T27);
415 			 T2v = VFMA(LDK(KP447417479), T2k, T2u);
416 			 T2l = VFMA(LDK(KP894834959), T2k, T2h);
417 			 T2w = VFNMS(LDK(KP763932022), T2v, T2h);
418 		    }
419 		    {
420 			 V T29, T2c, T2p, T2o;
421 			 T29 = VFNMS(LDK(KP867381224), T28, T27);
422 			 T2c = VFMA(LDK(KP912575812), T2b, T2a);
423 			 T2o = VFNMS(LDK(KP958953096), T2g, T2f);
424 			 T2p = VFMA(LDK(KP447417479), T2c, T2o);
425 			 T2d = VFNMS(LDK(KP809385824), T2c, T29);
426 			 T2q = VFMA(LDK(KP690983005), T2p, T29);
427 		    }
428 		    {
429 			 V T1Q, T1F, T1P, T1G, T1H;
430 			 Tf = VFMA(LDK(KP559016994), Te, Td);
431 			 T1G = VFMA(LDK(KP578046249), T1a, T1d);
432 			 T1H = VFMA(LDK(KP987388751), T1u, T1x);
433 			 T1I = VFNMS(LDK(KP831864738), T1H, T1G);
434 			 T1Q = VFMA(LDK(KP831864738), T1H, T1G);
435 			 {
436 			      V TU, T1z, T1C, T1D;
437 			      TU = VFMA(LDK(KP829049696), TT, Tz);
438 			      T1z = VFMA(LDK(KP831864738), T1y, T1e);
439 			      T1A = VFMA(LDK(KP904730450), T1z, TU);
440 			      T1F = VFNMS(LDK(KP904730450), T1z, TU);
441 			      T1C = VFMA(LDK(KP269969613), Tv, Ty);
442 			      T1D = VFMA(LDK(KP603558818), TK, TS);
443 			      T1E = VFMA(LDK(KP916574801), T1D, T1C);
444 			      T1P = VFNMS(LDK(KP916574801), T1D, T1C);
445 			 }
446 			 T1B = VFNMS(LDK(KP242145790), T1A, Tf);
447 			 T1Z = VADD(T1E, T1F);
448 			 T1J = VFNMS(LDK(KP904730450), T1I, T1F);
449 			 T1R = VFMA(LDK(KP904730450), T1Q, T1P);
450 			 T1W = VFNMS(LDK(KP904730450), T1Q, T1P);
451 		    }
452 		    {
453 			 V T25, T26, T2e, T2m;
454 			 T25 = VFMA(LDK(KP968583161), T1A, Tf);
455 			 T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O));
456 			 ST(&(x[WS(rs, 1)]), VFMAI(T26, T25), ms, &(x[WS(rs, 1)]));
457 			 ST(&(x[WS(rs, 24)]), VFNMSI(T26, T25), ms, &(x[0]));
458 			 T2e = VFNMS(LDK(KP992114701), T2d, Tf);
459 			 T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O));
460 			 ST(&(x[WS(rs, 4)]), VFNMSI(T2m, T2e), ms, &(x[0]));
461 			 ST(&(x[WS(rs, 21)]), VFMAI(T2m, T2e), ms, &(x[WS(rs, 1)]));
462 		    }
463 		    {
464 			 V T2s, T2y, T2r, T2x;
465 			 T2r = VFNMS(LDK(KP999544308), T2q, T2n);
466 			 T2s = VFNMS(LDK(KP803003575), T2r, Tf);
467 			 T2x = VFNMS(LDK(KP999544308), T2w, T2t);
468 			 T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O));
469 			 ST(&(x[WS(rs, 9)]), VFNMSI(T2y, T2s), ms, &(x[WS(rs, 1)]));
470 			 ST(&(x[WS(rs, 16)]), VFMAI(T2y, T2s), ms, &(x[0]));
471 		    }
472 		    {
473 			 V T1L, T21, T1Y, T24, T1K;
474 			 T1K = VFNMS(LDK(KP618033988), T1J, T1E);
475 			 T1L = VFNMS(LDK(KP876091699), T1K, T1B);
476 			 {
477 			      V T20, T1S, T23, T1X;
478 			      T20 = VFNMS(LDK(KP683113946), T1Z, T1I);
479 			      T21 = VFMA(LDK(KP792626838), T20, T1B);
480 			      T1S = VFNMS(LDK(KP242145790), T1R, T1O);
481 			      T23 = VFMA(LDK(KP617882369), T1W, T22);
482 			      T1X = VFMA(LDK(KP559016994), T1W, T1V);
483 			      T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S));
484 			      T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S));
485 			 }
486 			 ST(&(x[WS(rs, 6)]), VFMAI(T1Y, T1L), ms, &(x[0]));
487 			 ST(&(x[WS(rs, 14)]), VFNMSI(T24, T21), ms, &(x[0]));
488 			 ST(&(x[WS(rs, 19)]), VFNMSI(T1Y, T1L), ms, &(x[WS(rs, 1)]));
489 			 ST(&(x[WS(rs, 11)]), VFMAI(T24, T21), ms, &(x[WS(rs, 1)]));
490 		    }
491 	       }
492 	  }
493      }
494      VLEAVE();
495 }
496 
497 static const tw_instr twinstr[] = {
498      VTW(0, 1),
499      VTW(0, 2),
500      VTW(0, 3),
501      VTW(0, 4),
502      VTW(0, 5),
503      VTW(0, 6),
504      VTW(0, 7),
505      VTW(0, 8),
506      VTW(0, 9),
507      VTW(0, 10),
508      VTW(0, 11),
509      VTW(0, 12),
510      VTW(0, 13),
511      VTW(0, 14),
512      VTW(0, 15),
513      VTW(0, 16),
514      VTW(0, 17),
515      VTW(0, 18),
516      VTW(0, 19),
517      VTW(0, 20),
518      VTW(0, 21),
519      VTW(0, 22),
520      VTW(0, 23),
521      VTW(0, 24),
522      { TW_NEXT, VL, 0 }
523 };
524 
525 static const ct_desc desc = { 25, XSIMD_STRING("t1bv_25"), twinstr, &GENUS, { 67, 60, 181, 0 }, 0, 0, 0 };
526 
XSIMD(codelet_t1bv_25)527 void XSIMD(codelet_t1bv_25) (planner *p) {
528      X(kdft_dit_register) (p, t1bv_25, &desc);
529 }
530 #else
531 
532 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1bv_25 -include dft/simd/t1b.h -sign 1 */
533 
534 /*
535  * This function contains 248 FP additions, 188 FP multiplications,
536  * (or, 171 additions, 111 multiplications, 77 fused multiply/add),
537  * 100 stack variables, 40 constants, and 50 memory accesses
538  */
539 #include "dft/simd/t1b.h"
540 
t1bv_25(R * ri,R * ii,const R * W,stride rs,INT mb,INT me,INT ms)541 static void t1bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
542 {
543      DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
544      DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
545      DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
546      DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
547      DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
548      DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
549      DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
550      DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
551      DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
552      DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
553      DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
554      DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
555      DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
556      DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
557      DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
558      DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
559      DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
560      DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
561      DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
562      DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
563      DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
564      DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
565      DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
566      DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
567      DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
568      DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
569      DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
570      DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
571      DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
572      DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
573      DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
574      DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
575      DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
576      DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
577      DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
578      DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
579      DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
580      DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
581      DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
582      DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
583      {
584 	  INT m;
585 	  R *x;
586 	  x = ii;
587 	  for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
588 	       V T1A, T1z, T1R, T1S, T1B, T1C, T1Q, T2L, T1l, T2v, T1i, T3e, T2u, Tb, T2i;
589 	       V Tj, T3b, T2h, Tv, T2k, TD, T3a, T2l, T11, T2s, TY, T3d, T2r;
590 	       {
591 		    V T1v, T1x, T1y, T1q, T1s, T1t, T1P;
592 		    T1A = LD(&(x[0]), ms, &(x[0]));
593 		    {
594 			 V T1u, T1w, T1p, T1r;
595 			 T1u = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
596 			 T1v = BYTW(&(W[TWVL * 18]), T1u);
597 			 T1w = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
598 			 T1x = BYTW(&(W[TWVL * 28]), T1w);
599 			 T1y = VADD(T1v, T1x);
600 			 T1p = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
601 			 T1q = BYTW(&(W[TWVL * 8]), T1p);
602 			 T1r = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
603 			 T1s = BYTW(&(W[TWVL * 38]), T1r);
604 			 T1t = VADD(T1q, T1s);
605 		    }
606 		    T1z = VMUL(LDK(KP559016994), VSUB(T1t, T1y));
607 		    T1R = VSUB(T1v, T1x);
608 		    T1S = VMUL(LDK(KP587785252), T1R);
609 		    T1B = VADD(T1t, T1y);
610 		    T1C = VFNMS(LDK(KP250000000), T1B, T1A);
611 		    T1P = VSUB(T1q, T1s);
612 		    T1Q = VMUL(LDK(KP951056516), T1P);
613 		    T2L = VMUL(LDK(KP587785252), T1P);
614 	       }
615 	       {
616 		    V T1f, T19, T1b, T1c, T14, T16, T17, T1e;
617 		    T1e = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
618 		    T1f = BYTW(&(W[TWVL * 4]), T1e);
619 		    {
620 			 V T18, T1a, T13, T15;
621 			 T18 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
622 			 T19 = BYTW(&(W[TWVL * 24]), T18);
623 			 T1a = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
624 			 T1b = BYTW(&(W[TWVL * 34]), T1a);
625 			 T1c = VADD(T19, T1b);
626 			 T13 = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
627 			 T14 = BYTW(&(W[TWVL * 14]), T13);
628 			 T15 = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
629 			 T16 = BYTW(&(W[TWVL * 44]), T15);
630 			 T17 = VADD(T14, T16);
631 		    }
632 		    {
633 			 V T1j, T1k, T1d, T1g, T1h;
634 			 T1j = VSUB(T14, T16);
635 			 T1k = VSUB(T19, T1b);
636 			 T1l = VFMA(LDK(KP475528258), T1j, VMUL(LDK(KP293892626), T1k));
637 			 T2v = VFNMS(LDK(KP475528258), T1k, VMUL(LDK(KP293892626), T1j));
638 			 T1d = VMUL(LDK(KP559016994), VSUB(T17, T1c));
639 			 T1g = VADD(T17, T1c);
640 			 T1h = VFNMS(LDK(KP250000000), T1g, T1f);
641 			 T1i = VADD(T1d, T1h);
642 			 T3e = VADD(T1f, T1g);
643 			 T2u = VSUB(T1h, T1d);
644 		    }
645 	       }
646 	       {
647 		    V Tg, T7, T9, Td, T2, T4, Tc, Tf;
648 		    Tf = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
649 		    Tg = BYTW(&(W[TWVL * 6]), Tf);
650 		    {
651 			 V T6, T8, T1, T3;
652 			 T6 = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
653 			 T7 = BYTW(&(W[TWVL * 26]), T6);
654 			 T8 = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
655 			 T9 = BYTW(&(W[TWVL * 36]), T8);
656 			 Td = VADD(T7, T9);
657 			 T1 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
658 			 T2 = BYTW(&(W[TWVL * 16]), T1);
659 			 T3 = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
660 			 T4 = BYTW(&(W[TWVL * 46]), T3);
661 			 Tc = VADD(T2, T4);
662 		    }
663 		    {
664 			 V T5, Ta, Te, Th, Ti;
665 			 T5 = VSUB(T2, T4);
666 			 Ta = VSUB(T7, T9);
667 			 Tb = VFMA(LDK(KP475528258), T5, VMUL(LDK(KP293892626), Ta));
668 			 T2i = VFNMS(LDK(KP475528258), Ta, VMUL(LDK(KP293892626), T5));
669 			 Te = VMUL(LDK(KP559016994), VSUB(Tc, Td));
670 			 Th = VADD(Tc, Td);
671 			 Ti = VFNMS(LDK(KP250000000), Th, Tg);
672 			 Tj = VADD(Te, Ti);
673 			 T3b = VADD(Tg, Th);
674 			 T2h = VSUB(Ti, Te);
675 		    }
676 	       }
677 	       {
678 		    V TA, Tr, Tt, Tx, Tm, To, Tw, Tz;
679 		    Tz = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
680 		    TA = BYTW(&(W[0]), Tz);
681 		    {
682 			 V Tq, Ts, Tl, Tn;
683 			 Tq = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
684 			 Tr = BYTW(&(W[TWVL * 20]), Tq);
685 			 Ts = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
686 			 Tt = BYTW(&(W[TWVL * 30]), Ts);
687 			 Tx = VADD(Tr, Tt);
688 			 Tl = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
689 			 Tm = BYTW(&(W[TWVL * 10]), Tl);
690 			 Tn = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
691 			 To = BYTW(&(W[TWVL * 40]), Tn);
692 			 Tw = VADD(Tm, To);
693 		    }
694 		    {
695 			 V Tp, Tu, Ty, TB, TC;
696 			 Tp = VSUB(Tm, To);
697 			 Tu = VSUB(Tr, Tt);
698 			 Tv = VFMA(LDK(KP475528258), Tp, VMUL(LDK(KP293892626), Tu));
699 			 T2k = VFNMS(LDK(KP475528258), Tu, VMUL(LDK(KP293892626), Tp));
700 			 Ty = VMUL(LDK(KP559016994), VSUB(Tw, Tx));
701 			 TB = VADD(Tw, Tx);
702 			 TC = VFNMS(LDK(KP250000000), TB, TA);
703 			 TD = VADD(Ty, TC);
704 			 T3a = VADD(TA, TB);
705 			 T2l = VSUB(TC, Ty);
706 		    }
707 	       }
708 	       {
709 		    V TV, TP, TR, TS, TK, TM, TN, TU;
710 		    TU = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
711 		    TV = BYTW(&(W[TWVL * 2]), TU);
712 		    {
713 			 V TO, TQ, TJ, TL;
714 			 TO = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
715 			 TP = BYTW(&(W[TWVL * 22]), TO);
716 			 TQ = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
717 			 TR = BYTW(&(W[TWVL * 32]), TQ);
718 			 TS = VADD(TP, TR);
719 			 TJ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
720 			 TK = BYTW(&(W[TWVL * 12]), TJ);
721 			 TL = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
722 			 TM = BYTW(&(W[TWVL * 42]), TL);
723 			 TN = VADD(TK, TM);
724 		    }
725 		    {
726 			 V TZ, T10, TT, TW, TX;
727 			 TZ = VSUB(TK, TM);
728 			 T10 = VSUB(TP, TR);
729 			 T11 = VFMA(LDK(KP475528258), TZ, VMUL(LDK(KP293892626), T10));
730 			 T2s = VFNMS(LDK(KP475528258), T10, VMUL(LDK(KP293892626), TZ));
731 			 TT = VMUL(LDK(KP559016994), VSUB(TN, TS));
732 			 TW = VADD(TN, TS);
733 			 TX = VFNMS(LDK(KP250000000), TW, TV);
734 			 TY = VADD(TT, TX);
735 			 T3d = VADD(TV, TW);
736 			 T2r = VSUB(TX, TT);
737 		    }
738 	       }
739 	       {
740 		    V T3g, T3o, T3k, T3l, T3j, T3m, T3p, T3n;
741 		    {
742 			 V T3c, T3f, T3h, T3i;
743 			 T3c = VSUB(T3a, T3b);
744 			 T3f = VSUB(T3d, T3e);
745 			 T3g = VBYI(VFMA(LDK(KP951056516), T3c, VMUL(LDK(KP587785252), T3f)));
746 			 T3o = VBYI(VFNMS(LDK(KP951056516), T3f, VMUL(LDK(KP587785252), T3c)));
747 			 T3k = VADD(T1A, T1B);
748 			 T3h = VADD(T3a, T3b);
749 			 T3i = VADD(T3d, T3e);
750 			 T3l = VADD(T3h, T3i);
751 			 T3j = VMUL(LDK(KP559016994), VSUB(T3h, T3i));
752 			 T3m = VFNMS(LDK(KP250000000), T3l, T3k);
753 		    }
754 		    ST(&(x[0]), VADD(T3k, T3l), ms, &(x[0]));
755 		    T3p = VSUB(T3m, T3j);
756 		    ST(&(x[WS(rs, 10)]), VADD(T3o, T3p), ms, &(x[0]));
757 		    ST(&(x[WS(rs, 15)]), VSUB(T3p, T3o), ms, &(x[WS(rs, 1)]));
758 		    T3n = VADD(T3j, T3m);
759 		    ST(&(x[WS(rs, 5)]), VADD(T3g, T3n), ms, &(x[WS(rs, 1)]));
760 		    ST(&(x[WS(rs, 20)]), VSUB(T3n, T3g), ms, &(x[0]));
761 	       }
762 	       {
763 		    V T2z, T2M, T2U, T2V, T2W, T34, T35, T36, T2X, T2Y, T2Z, T31, T32, T33, T2n;
764 		    V T2N, T2E, T2K, T2y, T2H, T2A, T2G, T38, T39;
765 		    T2z = VSUB(T1C, T1z);
766 		    T2M = VFNMS(LDK(KP951056516), T1R, T2L);
767 		    T2U = VFMA(LDK(KP1_369094211), T2k, VMUL(LDK(KP728968627), T2l));
768 		    T2V = VFNMS(LDK(KP992114701), T2h, VMUL(LDK(KP250666467), T2i));
769 		    T2W = VADD(T2U, T2V);
770 		    T34 = VFNMS(LDK(KP125581039), T2s, VMUL(LDK(KP998026728), T2r));
771 		    T35 = VFMA(LDK(KP1_274847979), T2v, VMUL(LDK(KP770513242), T2u));
772 		    T36 = VADD(T34, T35);
773 		    T2X = VFMA(LDK(KP1_996053456), T2s, VMUL(LDK(KP062790519), T2r));
774 		    T2Y = VFNMS(LDK(KP637423989), T2u, VMUL(LDK(KP1_541026485), T2v));
775 		    T2Z = VADD(T2X, T2Y);
776 		    T31 = VFNMS(LDK(KP1_457937254), T2k, VMUL(LDK(KP684547105), T2l));
777 		    T32 = VFMA(LDK(KP1_984229402), T2i, VMUL(LDK(KP125333233), T2h));
778 		    T33 = VADD(T31, T32);
779 		    {
780 			 V T2j, T2m, T2I, T2C, T2D, T2J;
781 			 T2j = VFNMS(LDK(KP851558583), T2i, VMUL(LDK(KP904827052), T2h));
782 			 T2m = VFMA(LDK(KP1_752613360), T2k, VMUL(LDK(KP481753674), T2l));
783 			 T2I = VADD(T2m, T2j);
784 			 T2C = VFMA(LDK(KP1_071653589), T2s, VMUL(LDK(KP844327925), T2r));
785 			 T2D = VFMA(LDK(KP125581039), T2v, VMUL(LDK(KP998026728), T2u));
786 			 T2J = VADD(T2C, T2D);
787 			 T2n = VSUB(T2j, T2m);
788 			 T2N = VADD(T2I, T2J);
789 			 T2E = VSUB(T2C, T2D);
790 			 T2K = VMUL(LDK(KP559016994), VSUB(T2I, T2J));
791 		    }
792 		    {
793 			 V T2o, T2p, T2q, T2t, T2w, T2x;
794 			 T2o = VFNMS(LDK(KP963507348), T2k, VMUL(LDK(KP876306680), T2l));
795 			 T2p = VFMA(LDK(KP1_809654104), T2i, VMUL(LDK(KP425779291), T2h));
796 			 T2q = VSUB(T2o, T2p);
797 			 T2t = VFNMS(LDK(KP1_688655851), T2s, VMUL(LDK(KP535826794), T2r));
798 			 T2w = VFNMS(LDK(KP1_996053456), T2v, VMUL(LDK(KP062790519), T2u));
799 			 T2x = VADD(T2t, T2w);
800 			 T2y = VMUL(LDK(KP559016994), VSUB(T2q, T2x));
801 			 T2H = VSUB(T2t, T2w);
802 			 T2A = VADD(T2q, T2x);
803 			 T2G = VADD(T2o, T2p);
804 		    }
805 		    {
806 			 V T2S, T2T, T30, T37;
807 			 T2S = VADD(T2z, T2A);
808 			 T2T = VBYI(VADD(T2M, T2N));
809 			 ST(&(x[WS(rs, 23)]), VSUB(T2S, T2T), ms, &(x[WS(rs, 1)]));
810 			 ST(&(x[WS(rs, 2)]), VADD(T2S, T2T), ms, &(x[0]));
811 			 T30 = VADD(T2z, VADD(T2W, T2Z));
812 			 T37 = VBYI(VSUB(VADD(T33, T36), T2M));
813 			 ST(&(x[WS(rs, 22)]), VSUB(T30, T37), ms, &(x[0]));
814 			 ST(&(x[WS(rs, 3)]), VADD(T30, T37), ms, &(x[WS(rs, 1)]));
815 		    }
816 		    T38 = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T2U, T2V), VFMA(LDK(KP309016994), T33, VFNMS(LDK(KP809016994), T36, VMUL(LDK(KP587785252), VSUB(T2X, T2Y))))), T2M));
817 		    T39 = VFMA(LDK(KP309016994), T2W, VFMA(LDK(KP951056516), VSUB(T32, T31), VFMA(LDK(KP587785252), VSUB(T35, T34), VFNMS(LDK(KP809016994), T2Z, T2z))));
818 		    ST(&(x[WS(rs, 8)]), VADD(T38, T39), ms, &(x[0]));
819 		    ST(&(x[WS(rs, 17)]), VSUB(T39, T38), ms, &(x[WS(rs, 1)]));
820 		    {
821 			 V T2F, T2Q, T2P, T2R, T2B, T2O;
822 			 T2B = VFNMS(LDK(KP250000000), T2A, T2z);
823 			 T2F = VFMA(LDK(KP951056516), T2n, VADD(T2y, VFNMS(LDK(KP587785252), T2E, T2B)));
824 			 T2Q = VFMA(LDK(KP587785252), T2n, VFMA(LDK(KP951056516), T2E, VSUB(T2B, T2y)));
825 			 T2O = VFNMS(LDK(KP250000000), T2N, T2M);
826 			 T2P = VBYI(VADD(VFMA(LDK(KP951056516), T2G, VMUL(LDK(KP587785252), T2H)), VADD(T2K, T2O)));
827 			 T2R = VBYI(VADD(VFNMS(LDK(KP951056516), T2H, VMUL(LDK(KP587785252), T2G)), VSUB(T2O, T2K)));
828 			 ST(&(x[WS(rs, 18)]), VSUB(T2F, T2P), ms, &(x[0]));
829 			 ST(&(x[WS(rs, 12)]), VADD(T2Q, T2R), ms, &(x[0]));
830 			 ST(&(x[WS(rs, 7)]), VADD(T2F, T2P), ms, &(x[WS(rs, 1)]));
831 			 ST(&(x[WS(rs, 13)]), VSUB(T2Q, T2R), ms, &(x[WS(rs, 1)]));
832 		    }
833 	       }
834 	       {
835 		    V T1D, T1T, T21, T22, T23, T2b, T2c, T2d, T24, T25, T26, T28, T29, T2a, TF;
836 		    V T1U, T1I, T1O, T1o, T1L, T1E, T1K, T2f, T2g;
837 		    T1D = VADD(T1z, T1C);
838 		    T1T = VADD(T1Q, T1S);
839 		    T21 = VFMA(LDK(KP1_688655851), Tv, VMUL(LDK(KP535826794), TD));
840 		    T22 = VFMA(LDK(KP1_541026485), Tb, VMUL(LDK(KP637423989), Tj));
841 		    T23 = VSUB(T21, T22);
842 		    T2b = VFMA(LDK(KP851558583), T11, VMUL(LDK(KP904827052), TY));
843 		    T2c = VFMA(LDK(KP1_984229402), T1l, VMUL(LDK(KP125333233), T1i));
844 		    T2d = VADD(T2b, T2c);
845 		    T24 = VFNMS(LDK(KP425779291), TY, VMUL(LDK(KP1_809654104), T11));
846 		    T25 = VFNMS(LDK(KP992114701), T1i, VMUL(LDK(KP250666467), T1l));
847 		    T26 = VADD(T24, T25);
848 		    T28 = VFNMS(LDK(KP1_071653589), Tv, VMUL(LDK(KP844327925), TD));
849 		    T29 = VFNMS(LDK(KP770513242), Tj, VMUL(LDK(KP1_274847979), Tb));
850 		    T2a = VADD(T28, T29);
851 		    {
852 			 V Tk, TE, T1M, T1G, T1H, T1N;
853 			 Tk = VFMA(LDK(KP1_071653589), Tb, VMUL(LDK(KP844327925), Tj));
854 			 TE = VFMA(LDK(KP1_937166322), Tv, VMUL(LDK(KP248689887), TD));
855 			 T1M = VADD(TE, Tk);
856 			 T1G = VFMA(LDK(KP1_752613360), T11, VMUL(LDK(KP481753674), TY));
857 			 T1H = VFMA(LDK(KP1_457937254), T1l, VMUL(LDK(KP684547105), T1i));
858 			 T1N = VADD(T1G, T1H);
859 			 TF = VSUB(Tk, TE);
860 			 T1U = VADD(T1M, T1N);
861 			 T1I = VSUB(T1G, T1H);
862 			 T1O = VMUL(LDK(KP559016994), VSUB(T1M, T1N));
863 		    }
864 		    {
865 			 V TG, TH, TI, T12, T1m, T1n;
866 			 TG = VFNMS(LDK(KP497379774), Tv, VMUL(LDK(KP968583161), TD));
867 			 TH = VFNMS(LDK(KP1_688655851), Tb, VMUL(LDK(KP535826794), Tj));
868 			 TI = VADD(TG, TH);
869 			 T12 = VFNMS(LDK(KP963507348), T11, VMUL(LDK(KP876306680), TY));
870 			 T1m = VFNMS(LDK(KP1_369094211), T1l, VMUL(LDK(KP728968627), T1i));
871 			 T1n = VADD(T12, T1m);
872 			 T1o = VMUL(LDK(KP559016994), VSUB(TI, T1n));
873 			 T1L = VSUB(T12, T1m);
874 			 T1E = VADD(TI, T1n);
875 			 T1K = VSUB(TG, TH);
876 		    }
877 		    {
878 			 V T1Z, T20, T27, T2e;
879 			 T1Z = VADD(T1D, T1E);
880 			 T20 = VBYI(VADD(T1T, T1U));
881 			 ST(&(x[WS(rs, 24)]), VSUB(T1Z, T20), ms, &(x[0]));
882 			 ST(&(x[WS(rs, 1)]), VADD(T1Z, T20), ms, &(x[WS(rs, 1)]));
883 			 T27 = VADD(T1D, VADD(T23, T26));
884 			 T2e = VBYI(VSUB(VADD(T2a, T2d), T1T));
885 			 ST(&(x[WS(rs, 21)]), VSUB(T27, T2e), ms, &(x[WS(rs, 1)]));
886 			 ST(&(x[WS(rs, 4)]), VADD(T27, T2e), ms, &(x[0]));
887 		    }
888 		    T2f = VBYI(VSUB(VFMA(LDK(KP309016994), T2a, VFMA(LDK(KP951056516), VADD(T21, T22), VFNMS(LDK(KP809016994), T2d, VMUL(LDK(KP587785252), VSUB(T24, T25))))), T1T));
889 		    T2g = VFMA(LDK(KP951056516), VSUB(T29, T28), VFMA(LDK(KP309016994), T23, VFMA(LDK(KP587785252), VSUB(T2c, T2b), VFNMS(LDK(KP809016994), T26, T1D))));
890 		    ST(&(x[WS(rs, 9)]), VADD(T2f, T2g), ms, &(x[WS(rs, 1)]));
891 		    ST(&(x[WS(rs, 16)]), VSUB(T2g, T2f), ms, &(x[0]));
892 		    {
893 			 V T1J, T1X, T1W, T1Y, T1F, T1V;
894 			 T1F = VFNMS(LDK(KP250000000), T1E, T1D);
895 			 T1J = VFMA(LDK(KP951056516), TF, VADD(T1o, VFNMS(LDK(KP587785252), T1I, T1F)));
896 			 T1X = VFMA(LDK(KP587785252), TF, VFMA(LDK(KP951056516), T1I, VSUB(T1F, T1o)));
897 			 T1V = VFNMS(LDK(KP250000000), T1U, T1T);
898 			 T1W = VBYI(VADD(VFMA(LDK(KP951056516), T1K, VMUL(LDK(KP587785252), T1L)), VADD(T1O, T1V)));
899 			 T1Y = VBYI(VADD(VFNMS(LDK(KP951056516), T1L, VMUL(LDK(KP587785252), T1K)), VSUB(T1V, T1O)));
900 			 ST(&(x[WS(rs, 19)]), VSUB(T1J, T1W), ms, &(x[WS(rs, 1)]));
901 			 ST(&(x[WS(rs, 11)]), VADD(T1X, T1Y), ms, &(x[WS(rs, 1)]));
902 			 ST(&(x[WS(rs, 6)]), VADD(T1J, T1W), ms, &(x[0]));
903 			 ST(&(x[WS(rs, 14)]), VSUB(T1X, T1Y), ms, &(x[0]));
904 		    }
905 	       }
906 	  }
907      }
908      VLEAVE();
909 }
910 
911 static const tw_instr twinstr[] = {
912      VTW(0, 1),
913      VTW(0, 2),
914      VTW(0, 3),
915      VTW(0, 4),
916      VTW(0, 5),
917      VTW(0, 6),
918      VTW(0, 7),
919      VTW(0, 8),
920      VTW(0, 9),
921      VTW(0, 10),
922      VTW(0, 11),
923      VTW(0, 12),
924      VTW(0, 13),
925      VTW(0, 14),
926      VTW(0, 15),
927      VTW(0, 16),
928      VTW(0, 17),
929      VTW(0, 18),
930      VTW(0, 19),
931      VTW(0, 20),
932      VTW(0, 21),
933      VTW(0, 22),
934      VTW(0, 23),
935      VTW(0, 24),
936      { TW_NEXT, VL, 0 }
937 };
938 
939 static const ct_desc desc = { 25, XSIMD_STRING("t1bv_25"), twinstr, &GENUS, { 171, 111, 77, 0 }, 0, 0, 0 };
940 
XSIMD(codelet_t1bv_25)941 void XSIMD(codelet_t1bv_25) (planner *p) {
942      X(kdft_dit_register) (p, t1bv_25, &desc);
943 }
944 #endif
945