1 /* 2 * Copyright (c) 2003, 2007-14 Matteo Frigo 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 */ 20 21 /* 22 * This header file must include every file or define every 23 * type or macro which is required to compile a codelet. 24 */ 25 26 #ifndef __RDFT_CODELET_H__ 27 #define __RDFT_CODELET_H__ 28 29 #include "kernel/ifftw.h" 30 31 /************************************************************** 32 * types of codelets 33 **************************************************************/ 34 35 /* FOOab, with a,b in {0,1}, denotes the FOO transform 36 where a/b say whether the input/output are shifted by 37 half a sample/slot. */ 38 39 typedef enum { 40 R2HC00, R2HC01, R2HC10, R2HC11, 41 HC2R00, HC2R01, HC2R10, HC2R11, 42 DHT, 43 REDFT00, REDFT01, REDFT10, REDFT11, /* real-even == DCT's */ 44 RODFT00, RODFT01, RODFT10, RODFT11 /* real-odd == DST's */ 45 } rdft_kind; 46 47 /* standard R2HC/HC2R transforms are unshifted */ 48 #define R2HC R2HC00 49 #define HC2R HC2R00 50 51 #define R2HCII R2HC01 52 #define HC2RIII HC2R10 53 54 /* (k) >= R2HC00 produces a warning under gcc because checking x >= 0 55 is superfluous for unsigned values...but it is needed because other 56 compilers (e.g. icc) may define the enum to be a signed int...grrr. */ 57 #define R2HC_KINDP(k) ((k) >= R2HC00 && (k) <= R2HC11) /* uses kr2hc_genus */ 58 #define HC2R_KINDP(k) ((k) >= HC2R00 && (k) <= HC2R11) /* uses khc2r_genus */ 59 60 #define R2R_KINDP(k) ((k) >= DHT) /* uses kr2r_genus */ 61 62 #define REDFT_KINDP(k) ((k) >= REDFT00 && (k) <= REDFT11) 63 #define RODFT_KINDP(k) ((k) >= RODFT00 && (k) <= RODFT11) 64 #define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11) 65 66 /* codelets with real input (output) and complex output (input) */ 67 typedef struct kr2c_desc_s kr2c_desc; 68 69 typedef struct { 70 rdft_kind kind; 71 INT vl; 72 } kr2c_genus; 73 74 struct kr2c_desc_s { 75 INT n; /* size of transform computed */ 76 const char *nam; 77 opcnt ops; 78 const kr2c_genus *genus; 79 }; 80 81 typedef void (*kr2c) (R *R0, R *R1, R *Cr, R *Ci, 82 stride rs, stride csr, stride csi, 83 INT vl, INT ivs, INT ovs); 84 void X(kr2c_register)(planner *p, kr2c codelet, const kr2c_desc *desc); 85 86 /* half-complex to half-complex DIT/DIF codelets: */ 87 typedef struct hc2hc_desc_s hc2hc_desc; 88 89 typedef struct { 90 rdft_kind kind; 91 INT vl; 92 } hc2hc_genus; 93 94 struct hc2hc_desc_s { 95 INT radix; 96 const char *nam; 97 const tw_instr *tw; 98 const hc2hc_genus *genus; 99 opcnt ops; 100 }; 101 102 typedef void (*khc2hc) (R *rioarray, R *iioarray, const R *W, 103 stride rs, INT mb, INT me, INT ms); 104 void X(khc2hc_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc); 105 106 /* half-complex to rdft2-complex DIT/DIF codelets: */ 107 typedef struct hc2c_desc_s hc2c_desc; 108 109 typedef enum { 110 HC2C_VIA_RDFT, 111 HC2C_VIA_DFT 112 } hc2c_kind; 113 114 typedef struct { 115 int (*okp)( 116 const R *Rp, const R *Ip, const R *Rm, const R *Im, 117 INT rs, INT mb, INT me, INT ms, 118 const planner *plnr); 119 rdft_kind kind; 120 INT vl; 121 } hc2c_genus; 122 123 struct hc2c_desc_s { 124 INT radix; 125 const char *nam; 126 const tw_instr *tw; 127 const hc2c_genus *genus; 128 opcnt ops; 129 }; 130 131 typedef void (*khc2c) (R *Rp, R *Ip, R *Rm, R *Im, const R *W, 132 stride rs, INT mb, INT me, INT ms); 133 void X(khc2c_register)(planner *p, khc2c codelet, const hc2c_desc *desc, 134 hc2c_kind hc2ckind); 135 136 extern const solvtab X(solvtab_rdft_r2cf); 137 extern const solvtab X(solvtab_rdft_r2cb); 138 extern const solvtab X(solvtab_rdft_sse2); 139 extern const solvtab X(solvtab_rdft_avx); 140 extern const solvtab X(solvtab_rdft_avx_128_fma); 141 extern const solvtab X(solvtab_rdft_avx2); 142 extern const solvtab X(solvtab_rdft_avx2_128); 143 extern const solvtab X(solvtab_rdft_avx512); 144 extern const solvtab X(solvtab_rdft_kcvi); 145 extern const solvtab X(solvtab_rdft_altivec); 146 extern const solvtab X(solvtab_rdft_vsx); 147 extern const solvtab X(solvtab_rdft_neon); 148 extern const solvtab X(solvtab_rdft_generic_simd128); 149 extern const solvtab X(solvtab_rdft_generic_simd256); 150 151 /* real-input & output DFT-like codelets (DHT, etc.) */ 152 typedef struct kr2r_desc_s kr2r_desc; 153 154 typedef struct { 155 INT vl; 156 } kr2r_genus; 157 158 struct kr2r_desc_s { 159 INT n; /* size of transform computed */ 160 const char *nam; 161 opcnt ops; 162 const kr2r_genus *genus; 163 rdft_kind kind; 164 }; 165 166 typedef void (*kr2r) (const R *I, R *O, stride is, stride os, 167 INT vl, INT ivs, INT ovs); 168 void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc); 169 170 extern const solvtab X(solvtab_rdft_r2r); 171 172 #endif /* __RDFT_CODELET_H__ */ 173