1 /*
2  * Copyright (c) 2003, 2007-14 Matteo Frigo
3  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  */
20 
21 /*
22  * This header file must include every file or define every
23  * type or macro which is required to compile a codelet.
24  */
25 
26 #ifndef __RDFT_CODELET_H__
27 #define __RDFT_CODELET_H__
28 
29 #include "kernel/ifftw.h"
30 
31 /**************************************************************
32  * types of codelets
33  **************************************************************/
34 
35 /* FOOab, with a,b in {0,1}, denotes the FOO transform
36    where a/b say whether the input/output are shifted by
37    half a sample/slot. */
38 
39 typedef enum {
40      R2HC00, R2HC01, R2HC10, R2HC11,
41      HC2R00, HC2R01, HC2R10, HC2R11,
42      DHT,
43      REDFT00, REDFT01, REDFT10, REDFT11, /* real-even == DCT's */
44      RODFT00, RODFT01, RODFT10, RODFT11  /*  real-odd == DST's */
45 } rdft_kind;
46 
47 /* standard R2HC/HC2R transforms are unshifted */
48 #define R2HC R2HC00
49 #define HC2R HC2R00
50 
51 #define R2HCII R2HC01
52 #define HC2RIII HC2R10
53 
54 /* (k) >= R2HC00 produces a warning under gcc because checking x >= 0
55    is superfluous for unsigned values...but it is needed because other
56    compilers (e.g. icc) may define the enum to be a signed int...grrr. */
57 #define R2HC_KINDP(k) ((k) >= R2HC00 && (k) <= R2HC11) /* uses kr2hc_genus */
58 #define HC2R_KINDP(k) ((k) >= HC2R00 && (k) <= HC2R11) /* uses khc2r_genus */
59 
60 #define R2R_KINDP(k) ((k) >= DHT) /* uses kr2r_genus */
61 
62 #define REDFT_KINDP(k) ((k) >= REDFT00 && (k) <= REDFT11)
63 #define RODFT_KINDP(k) ((k) >= RODFT00 && (k) <= RODFT11)
64 #define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11)
65 
66 /* codelets with real input (output) and complex output (input) */
67 typedef struct kr2c_desc_s kr2c_desc;
68 
69 typedef struct {
70      rdft_kind kind;
71      INT vl;
72 } kr2c_genus;
73 
74 struct kr2c_desc_s {
75      INT n;    /* size of transform computed */
76      const char *nam;
77      opcnt ops;
78      const kr2c_genus *genus;
79 };
80 
81 typedef void (*kr2c) (R *R0, R *R1, R *Cr, R *Ci,
82 		      stride rs, stride csr, stride csi,
83 		      INT vl, INT ivs, INT ovs);
84 void X(kr2c_register)(planner *p, kr2c codelet, const kr2c_desc *desc);
85 
86 /* half-complex to half-complex DIT/DIF codelets: */
87 typedef struct hc2hc_desc_s hc2hc_desc;
88 
89 typedef struct {
90      rdft_kind kind;
91      INT vl;
92 } hc2hc_genus;
93 
94 struct hc2hc_desc_s {
95      INT radix;
96      const char *nam;
97      const tw_instr *tw;
98      const hc2hc_genus *genus;
99      opcnt ops;
100 };
101 
102 typedef void (*khc2hc) (R *rioarray, R *iioarray, const R *W,
103 			stride rs, INT mb, INT me, INT ms);
104 void X(khc2hc_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc);
105 
106 /* half-complex to rdft2-complex DIT/DIF codelets: */
107 typedef struct hc2c_desc_s hc2c_desc;
108 
109 typedef enum {
110      HC2C_VIA_RDFT,
111      HC2C_VIA_DFT
112 } hc2c_kind;
113 
114 typedef struct {
115      int (*okp)(
116 	  const R *Rp, const R *Ip, const R *Rm, const R *Im,
117 	  INT rs, INT mb, INT me, INT ms,
118 	  const planner *plnr);
119      rdft_kind kind;
120      INT vl;
121 } hc2c_genus;
122 
123 struct hc2c_desc_s {
124      INT radix;
125      const char *nam;
126      const tw_instr *tw;
127      const hc2c_genus *genus;
128      opcnt ops;
129 };
130 
131 typedef void (*khc2c) (R *Rp, R *Ip, R *Rm, R *Im, const R *W,
132 		       stride rs, INT mb, INT me, INT ms);
133 void X(khc2c_register)(planner *p, khc2c codelet, const hc2c_desc *desc,
134 		       hc2c_kind hc2ckind);
135 
136 extern const solvtab X(solvtab_rdft_r2cf);
137 extern const solvtab X(solvtab_rdft_r2cb);
138 extern const solvtab X(solvtab_rdft_sse2);
139 extern const solvtab X(solvtab_rdft_avx);
140 extern const solvtab X(solvtab_rdft_avx_128_fma);
141 extern const solvtab X(solvtab_rdft_avx2);
142 extern const solvtab X(solvtab_rdft_avx2_128);
143 extern const solvtab X(solvtab_rdft_avx512);
144 extern const solvtab X(solvtab_rdft_kcvi);
145 extern const solvtab X(solvtab_rdft_altivec);
146 extern const solvtab X(solvtab_rdft_vsx);
147 extern const solvtab X(solvtab_rdft_neon);
148 extern const solvtab X(solvtab_rdft_generic_simd128);
149 extern const solvtab X(solvtab_rdft_generic_simd256);
150 
151 /* real-input & output DFT-like codelets (DHT, etc.) */
152 typedef struct kr2r_desc_s kr2r_desc;
153 
154 typedef struct {
155      INT vl;
156 } kr2r_genus;
157 
158 struct kr2r_desc_s {
159      INT n;    /* size of transform computed */
160      const char *nam;
161      opcnt ops;
162      const kr2r_genus *genus;
163      rdft_kind kind;
164 };
165 
166 typedef void (*kr2r) (const R *I, R *O, stride is, stride os,
167 		      INT vl, INT ivs, INT ovs);
168 void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc);
169 
170 extern const solvtab X(solvtab_rdft_r2r);
171 
172 #endif				/* __RDFT_CODELET_H__ */
173