1 // Copyright (c) 2018-2020, The rav1e contributors. All rights reserved 2 // 3 // This source code is subject to the terms of the BSD 2 Clause License and 4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 5 // was not distributed with this source code in the LICENSE file, you can 6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open 7 // Media Patent License 1.0 was not distributed with this source code in the 8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent. 9 10 use super::TxSize; 11 use super::TxType; 12 13 use super::HTX_TAB; 14 use super::VTX_TAB; 15 16 pub type TxfmShift = [i8; 3]; 17 pub type TxfmShifts = [TxfmShift; 3]; 18 19 // Shift so that the first shift is 4 - (bd - 8) to align with the initial 20 // design of daala_tx 21 // 8 bit 4x4 is an exception and only shifts by 3 in the first stage 22 const FWD_SHIFT_4X4: TxfmShifts = [[3, 0, 0], [2, 0, 1], [0, 0, 3]]; 23 const FWD_SHIFT_8X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 24 const FWD_SHIFT_16X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 25 const FWD_SHIFT_32X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]]; 26 const FWD_SHIFT_64X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]]; 27 const FWD_SHIFT_4X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 28 const FWD_SHIFT_8X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 29 const FWD_SHIFT_8X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 30 const FWD_SHIFT_16X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 31 const FWD_SHIFT_16X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]]; 32 const FWD_SHIFT_32X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]]; 33 const FWD_SHIFT_32X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]]; 34 const FWD_SHIFT_64X32: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]]; 35 const FWD_SHIFT_4X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 36 const FWD_SHIFT_16X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 37 const FWD_SHIFT_8X32: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 38 const FWD_SHIFT_32X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]]; 39 const FWD_SHIFT_16X64: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]]; 40 const FWD_SHIFT_64X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]]; 41 42 pub const FWD_TXFM_SHIFT_LS: [TxfmShifts; TxSize::TX_SIZES_ALL] = [ 43 FWD_SHIFT_4X4, 44 FWD_SHIFT_8X8, 45 FWD_SHIFT_16X16, 46 FWD_SHIFT_32X32, 47 FWD_SHIFT_64X64, 48 FWD_SHIFT_4X8, 49 FWD_SHIFT_8X4, 50 FWD_SHIFT_8X16, 51 FWD_SHIFT_16X8, 52 FWD_SHIFT_16X32, 53 FWD_SHIFT_32X16, 54 FWD_SHIFT_32X64, 55 FWD_SHIFT_64X32, 56 FWD_SHIFT_4X16, 57 FWD_SHIFT_16X4, 58 FWD_SHIFT_8X32, 59 FWD_SHIFT_32X8, 60 FWD_SHIFT_16X64, 61 FWD_SHIFT_64X16, 62 ]; 63 64 #[derive(Debug, Clone, Copy, PartialEq)] 65 pub enum TxfmType { 66 DCT4, 67 DCT8, 68 DCT16, 69 DCT32, 70 DCT64, 71 ADST4, 72 ADST8, 73 ADST16, 74 Identity4, 75 Identity8, 76 Identity16, 77 Identity32, 78 Invalid, 79 } 80 81 impl TxfmType { 82 const TX_TYPES_1D: usize = 4; 83 const AV1_TXFM_TYPE_LS: [[TxfmType; Self::TX_TYPES_1D]; 5] = [ 84 [TxfmType::DCT4, TxfmType::ADST4, TxfmType::ADST4, TxfmType::Identity4], 85 [TxfmType::DCT8, TxfmType::ADST8, TxfmType::ADST8, TxfmType::Identity8], 86 [ 87 TxfmType::DCT16, 88 TxfmType::ADST16, 89 TxfmType::ADST16, 90 TxfmType::Identity16, 91 ], 92 [ 93 TxfmType::DCT32, 94 TxfmType::Invalid, 95 TxfmType::Invalid, 96 TxfmType::Identity32, 97 ], 98 [TxfmType::DCT64, TxfmType::Invalid, TxfmType::Invalid, TxfmType::Invalid], 99 ]; 100 } 101 102 #[derive(Debug, Clone, Copy)] 103 pub struct Txfm2DFlipCfg { 104 pub tx_size: TxSize, 105 /// Flip upside down 106 pub ud_flip: bool, 107 /// Flip left to right 108 pub lr_flip: bool, 109 pub shift: TxfmShift, 110 pub txfm_type_col: TxfmType, 111 pub txfm_type_row: TxfmType, 112 } 113 114 impl Txfm2DFlipCfg { fwd(tx_type: TxType, tx_size: TxSize, bd: usize) -> Self115 pub fn fwd(tx_type: TxType, tx_size: TxSize, bd: usize) -> Self { 116 let tx_type_1d_col = VTX_TAB[tx_type as usize]; 117 let tx_type_1d_row = HTX_TAB[tx_type as usize]; 118 let txw_idx = tx_size.width_index(); 119 let txh_idx = tx_size.height_index(); 120 let txfm_type_col = 121 TxfmType::AV1_TXFM_TYPE_LS[txh_idx][tx_type_1d_col as usize]; 122 let txfm_type_row = 123 TxfmType::AV1_TXFM_TYPE_LS[txw_idx][tx_type_1d_row as usize]; 124 assert_ne!(txfm_type_col, TxfmType::Invalid); 125 assert_ne!(txfm_type_row, TxfmType::Invalid); 126 let (ud_flip, lr_flip) = Self::get_flip_cfg(tx_type); 127 128 Txfm2DFlipCfg { 129 tx_size, 130 ud_flip, 131 lr_flip, 132 shift: FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2], 133 txfm_type_col, 134 txfm_type_row, 135 } 136 } 137 138 /// Determine the flip config, returning (ud_flip, lr_flip) get_flip_cfg(tx_type: TxType) -> (bool, bool)139 fn get_flip_cfg(tx_type: TxType) -> (bool, bool) { 140 use self::TxType::*; 141 match tx_type { 142 DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT 143 | V_ADST | H_ADST => (false, false), 144 FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false), 145 DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true), 146 FLIPADST_FLIPADST => (true, true), 147 } 148 } 149 } 150 151 macro_rules! store_coeffs { 152 ( $arr:expr, $( $x:expr ),* ) => { 153 { 154 let mut i: i32 = -1; 155 $( 156 i += 1; 157 $arr[i as usize] = $x; 158 )* 159 } 160 }; 161 } 162 163 macro_rules! impl_1d_tx { 164 () => { 165 impl_1d_tx! {allow(), } 166 }; 167 168 ($m:meta, $($s:ident),*) => { 169 trait RotateKernelPi4<T: TxOperations> { 170 const ADD: $($s)* fn(T, T) -> T; 171 const SUB: $($s)* fn(T, T) -> T; 172 173 #[$m] 174 $($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32))) -> (T, T) { 175 let t = Self::ADD(p1, p0); 176 let (a, out0) = (p0.tx_mul(m.0), t.tx_mul(m.1)); 177 let out1 = Self::SUB(a, out0); 178 (out0, out1) 179 } 180 } 181 182 struct RotatePi4Add; 183 struct RotatePi4AddAvg; 184 struct RotatePi4Sub; 185 struct RotatePi4SubAvg; 186 187 impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Add { 188 const ADD: $($s)* fn(T, T) -> T = T::add; 189 const SUB: $($s)* fn(T, T) -> T = T::sub; 190 } 191 192 impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4AddAvg { 193 const ADD: $($s)* fn(T, T) -> T = T::add_avg; 194 const SUB: $($s)* fn(T, T) -> T = T::sub; 195 } 196 197 impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Sub { 198 const ADD: $($s)* fn(T, T) -> T = T::sub; 199 const SUB: $($s)* fn(T, T) -> T = T::add; 200 } 201 202 impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4SubAvg { 203 const ADD: $($s)* fn(T, T) -> T = T::sub_avg; 204 const SUB: $($s)* fn(T, T) -> T = T::add; 205 } 206 207 trait RotateKernel<T: TxOperations> { 208 const ADD: $($s)* fn(T, T) -> T; 209 const SUB: $($s)* fn(T, T) -> T; 210 const SHIFT: $($s)* fn(T) -> T; 211 212 #[$m] 213 $($s)* fn half_kernel( 214 p0: (T, T), p1: T, m: ((i32, i32), (i32, i32), (i32, i32)), 215 ) -> (T, T) { 216 let t = Self::ADD(p1, p0.0); 217 let (a, b, c) = (p0.1.tx_mul(m.0), p1.tx_mul(m.1), t.tx_mul(m.2)); 218 let out0 = b.add(c); 219 let shifted = Self::SHIFT(c); 220 let out1 = Self::SUB(a, shifted); 221 (out0, out1) 222 } 223 224 #[$m] 225 $($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32), (i32, i32))) -> (T, T) { 226 Self::half_kernel((p0, p0), p1, m) 227 } 228 } 229 230 trait RotateKernelNeg<T: TxOperations> { 231 const ADD: $($s)* fn(T, T) -> T; 232 233 #[$m] 234 $($s)* fn kernel(p0: T, p1: T, m: ((i32, i32), (i32, i32), (i32, i32))) -> (T, T) { 235 let t = Self::ADD(p0, p1); 236 let (a, b, c) = (p0.tx_mul(m.0), p1.tx_mul(m.1), t.tx_mul(m.2)); 237 let out0 = b.sub(c); 238 let out1 = c.sub(a); 239 (out0, out1) 240 } 241 } 242 243 struct RotateAdd; 244 struct RotateAddAvg; 245 struct RotateAddShift; 246 struct RotateSub; 247 struct RotateSubAvg; 248 struct RotateSubShift; 249 struct RotateNeg; 250 struct RotateNegAvg; 251 252 impl<T: TxOperations> RotateKernel<T> for RotateAdd { 253 const ADD: $($s)* fn(T, T) -> T = T::add; 254 const SUB: $($s)* fn(T, T) -> T = T::sub; 255 const SHIFT: $($s)* fn(T) -> T = T::copy_fn; 256 } 257 258 impl<T: TxOperations> RotateKernel<T> for RotateAddAvg { 259 const ADD: $($s)* fn(T, T) -> T = T::add_avg; 260 const SUB: $($s)* fn(T, T) -> T = T::sub; 261 const SHIFT: $($s)* fn(T) -> T = T::copy_fn; 262 } 263 264 impl<T: TxOperations> RotateKernel<T> for RotateAddShift { 265 const ADD: $($s)* fn(T, T) -> T = T::add; 266 const SUB: $($s)* fn(T, T) -> T = T::sub; 267 const SHIFT: $($s)* fn(T) -> T = T::rshift1; 268 } 269 270 impl<T: TxOperations> RotateKernel<T> for RotateSub { 271 const ADD: $($s)* fn(T, T) -> T = T::sub; 272 const SUB: $($s)* fn(T, T) -> T = T::add; 273 const SHIFT: $($s)* fn(T) -> T = T::copy_fn; 274 } 275 276 impl<T: TxOperations> RotateKernel<T> for RotateSubAvg { 277 const ADD: $($s)* fn(T, T) -> T = T::sub_avg; 278 const SUB: $($s)* fn(T, T) -> T = T::add; 279 const SHIFT: $($s)* fn(T) -> T = T::copy_fn; 280 } 281 282 impl<T: TxOperations> RotateKernel<T> for RotateSubShift { 283 const ADD: $($s)* fn(T, T) -> T = T::sub; 284 const SUB: $($s)* fn(T, T) -> T = T::add; 285 const SHIFT: $($s)* fn(T) -> T = T::rshift1; 286 } 287 288 impl<T: TxOperations> RotateKernelNeg<T> for RotateNeg { 289 const ADD: $($s)* fn(T, T) -> T = T::sub; 290 } 291 292 impl<T: TxOperations> RotateKernelNeg<T> for RotateNegAvg { 293 const ADD: $($s)* fn(T, T) -> T = T::sub_avg; 294 } 295 296 #[inline] 297 #[$m] 298 $($s)* fn butterfly_add<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) { 299 let p0 = p0.add(p1); 300 let p0h = p0.rshift1(); 301 let p1h = p1.sub(p0h); 302 ((p0h, p0), p1h) 303 } 304 305 #[inline] 306 #[$m] 307 $($s)* fn butterfly_sub<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) { 308 let p0 = p0.sub(p1); 309 let p0h = p0.rshift1(); 310 let p1h = p1.add(p0h); 311 ((p0h, p0), p1h) 312 } 313 314 #[inline] 315 #[$m] 316 $($s)* fn butterfly_neg<T: TxOperations>(p0: T, p1: T) -> (T, (T, T)) { 317 let p1 = p0.sub(p1); 318 let p1h = p1.rshift1(); 319 let p0h = p0.sub(p1h); 320 (p0h, (p1h, p1)) 321 } 322 323 #[inline] 324 #[$m] 325 $($s)* fn butterfly_add_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) { 326 let p1 = p1h.add(p0.0); 327 let p0 = p0.1.sub(p1); 328 (p0, p1) 329 } 330 331 #[inline] 332 #[$m] 333 $($s)* fn butterfly_sub_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) { 334 let p1 = p1h.sub(p0.0); 335 let p0 = p0.1.add(p1); 336 (p0, p1) 337 } 338 339 #[inline] 340 #[$m] 341 $($s)* fn butterfly_neg_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) { 342 let p0 = p0h.add(p1.0); 343 let p1 = p0.sub(p1.1); 344 (p0, p1) 345 } 346 347 #[$m] 348 $($s)* fn daala_fdct_ii_2_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) { 349 butterfly_neg_asym(p0h, p1) 350 } 351 352 #[$m] 353 $($s)* fn daala_fdst_iv_2_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) { 354 // 473/512 = (Sin[3*Pi/8] + Cos[3*Pi/8])/Sqrt[2] = 0.9238795325112867 355 // 3135/4096 = (Sin[3*Pi/8] - Cos[3*Pi/8])*Sqrt[2] = 0.7653668647301795 356 // 4433/8192 = Cos[3*Pi/8]*Sqrt[2] = 0.5411961001461971 357 RotateAdd::half_kernel(p0, p1h, ((473, 9), (3135, 12), (4433, 13))) 358 } 359 360 #[$m] 361 $($s)* fn daala_fdct_ii_4<T: TxOperations>( 362 q0: T, q1: T, q2: T, q3: T, output: &mut [T], 363 ) { 364 // +/- Butterflies with asymmetric output. 365 let (q0h, q3) = butterfly_neg(q0, q3); 366 let (q1, q2h) = butterfly_add(q1, q2); 367 368 // Embedded 2-point transforms with asymmetric input. 369 let (q0, q1) = daala_fdct_ii_2_asym(q0h, q1); 370 let (q3, q2) = daala_fdst_iv_2_asym(q3, q2h); 371 372 store_coeffs!(output, q0, q1, q2, q3); 373 } 374 375 #[$m] 376 $($s)* fn daala_fdct4<T: TxOperations>(coeffs: &mut [T]) { 377 assert!(coeffs.len() >= 4); 378 let mut temp_out: [T; 4] = [T::zero(); 4]; 379 daala_fdct_ii_4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], &mut temp_out); 380 381 coeffs[0] = temp_out[0]; 382 coeffs[1] = temp_out[2]; 383 coeffs[2] = temp_out[1]; 384 coeffs[3] = temp_out[3]; 385 } 386 387 #[$m] 388 $($s)* fn daala_fdst_vii_4<T: TxOperations>(coeffs: &mut [T]) { 389 assert!(coeffs.len() >= 4); 390 391 let q0 = coeffs[0]; 392 let q1 = coeffs[1]; 393 let q2 = coeffs[2]; 394 let q3 = coeffs[3]; 395 let t0 = q1.add(q3); 396 // t1 = (q0 + q1 - q3)/2 397 let t1 = q1.add(q0.sub_avg(t0)); 398 let t2 = q0.sub(q1); 399 let t3 = q2; 400 let t4 = q0.add(q3); 401 // 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360 402 let t0 = t0.tx_mul((7021, 14)); 403 // 37837/32768 ~= 4*Sin[3*Pi/9]/3 ~= 1.154700538379252 404 let t1 = t1.tx_mul((37837, 15)); 405 // 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139 406 let t2 = t2.tx_mul((21513, 15)); 407 // 37837/32768 ~= 4*Sin[3*Pi/9]/3 ~= 1.154700538379252 408 let t3 = t3.tx_mul((37837, 15)); 409 // 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779 410 let t4 = t4.tx_mul((467, 11)); 411 let t3h = t3.rshift1(); 412 let u4 = t4.add(t3h); 413 coeffs[0] = t0.add(u4); 414 coeffs[1] = t1; 415 coeffs[2] = t0.add(t2.sub(t3h)); 416 coeffs[3] = t2.add(t3.sub(u4)); 417 } 418 419 #[$m] 420 $($s)* fn daala_fdct_ii_2<T: TxOperations>(p0: T, p1: T) -> (T, T) { 421 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 422 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 423 let (p1, p0) = RotatePi4SubAvg::kernel(p1, p0, ((11585, 13), (11585, 13))); 424 (p0, p1) 425 } 426 427 #[$m] 428 $($s)* fn daala_fdst_iv_2<T: TxOperations>(p0: T, p1: T) -> (T, T) { 429 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 430 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461971 431 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 432 RotateAddAvg::kernel(p0, p1, ((10703, 13), (8867, 14), (3135, 12))) 433 } 434 435 #[$m] 436 $($s)* fn daala_fdct_ii_4_asym<T: TxOperations>( 437 q0h: T, q1: (T, T), q2h: T, q3: (T, T), output: &mut [T], 438 ) { 439 // +/- Butterflies with asymmetric input. 440 let (q0, q3) = butterfly_neg_asym(q0h, q3); 441 let (q1, q2) = butterfly_sub_asym(q1, q2h); 442 443 // Embedded 2-point orthonormal transforms. 444 let (q0, q1) = daala_fdct_ii_2(q0, q1); 445 let (q3, q2) = daala_fdst_iv_2(q3, q2); 446 447 store_coeffs!(output, q0, q1, q2, q3); 448 } 449 450 #[$m] 451 $($s)* fn daala_fdst_iv_4_asym<T: TxOperations>( 452 q0: (T, T), q1h: T, q2: (T, T), q3h: T, output: &mut [T], 453 ) { 454 // Stage 0 455 // 9633/16384 = (Sin[7*Pi/16] + Cos[7*Pi/16])/2 = 0.5879378012096793 456 // 12873/8192 = (Sin[7*Pi/16] - Cos[7*Pi/16])*2 = 1.5713899167742045 457 // 12785/32768 = Cos[7*Pi/16]*2 = 0.3901806440322565 458 let (q0, q3) = RotateAddShift::half_kernel( 459 q0, 460 q3h, 461 ((9633, 14), (12873, 13), (12785, 15)), 462 ); 463 // 11363/16384 = (Sin[5*Pi/16] + Cos[5*Pi/16])/2 = 0.6935199226610738 464 // 18081/32768 = (Sin[5*Pi/16] - Cos[5*Pi/16])*2 = 0.5517987585658861 465 // 4551/4096 = Cos[5*Pi/16]*2 = 1.1111404660392044 466 let (q2, q1) = RotateSubShift::half_kernel( 467 q2, 468 q1h, 469 ((11363, 14), (18081, 15), (4551, 12)), 470 ); 471 472 // Stage 1 473 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3); 474 let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1); 475 476 // Stage 2 477 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 478 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 479 let (q2, q1) = RotatePi4AddAvg::kernel(q2, q1, ((11585, 13), (11585, 13))); 480 481 store_coeffs!(output, q0, q1, q2, q3); 482 } 483 484 #[$m] 485 $($s)* fn daala_fdct_ii_8<T: TxOperations>( 486 r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T], 487 ) { 488 // +/- Butterflies with asymmetric output. 489 let (r0h, r7) = butterfly_neg(r0, r7); 490 let (r1, r6h) = butterfly_add(r1, r6); 491 let (r2h, r5) = butterfly_neg(r2, r5); 492 let (r3, r4h) = butterfly_add(r3, r4); 493 494 // Embedded 4-point transforms with asymmetric input. 495 daala_fdct_ii_4_asym(r0h, r1, r2h, r3, &mut output[0..4]); 496 daala_fdst_iv_4_asym(r7, r6h, r5, r4h, &mut output[4..8]); 497 output[4..8].reverse(); 498 } 499 500 #[$m] 501 $($s)* fn daala_fdct8<T: TxOperations>(coeffs: &mut [T]) { 502 assert!(coeffs.len() >= 8); 503 let mut temp_out: [T; 8] = [T::zero(); 8]; 504 daala_fdct_ii_8( 505 coeffs[0], 506 coeffs[1], 507 coeffs[2], 508 coeffs[3], 509 coeffs[4], 510 coeffs[5], 511 coeffs[6], 512 coeffs[7], 513 &mut temp_out, 514 ); 515 516 coeffs[0] = temp_out[0]; 517 coeffs[1] = temp_out[4]; 518 coeffs[2] = temp_out[2]; 519 coeffs[3] = temp_out[6]; 520 coeffs[4] = temp_out[1]; 521 coeffs[5] = temp_out[5]; 522 coeffs[6] = temp_out[3]; 523 coeffs[7] = temp_out[7]; 524 } 525 526 #[$m] 527 $($s)* fn daala_fdst_iv_8<T: TxOperations>( 528 r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T], 529 ) { 530 // Stage 0 531 // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576 532 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363 533 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606 534 let (r0, r7) = 535 RotateAdd::kernel(r0, r7, ((17911, 14), (14699, 14), (803, 13))); 536 // 20435/16384 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.24722501298667123 537 // 21845/32768 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.66665565847774650 538 // 1189/4096 = Cos[13*Pi/32] = 0.29028467725446233 539 let (r6, r1) = 540 RotateSub::kernel(r6, r1, ((20435, 14), (21845, 15), (1189, 12))); 541 // 22173/16384 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526 542 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574 543 // 15447/32768 = Cos[11*Pi/32] = 0.47139673682599764 544 let (r2, r5) = 545 RotateAdd::kernel(r2, r5, ((22173, 14), (3363, 13), (15447, 15))); 546 // 23059/16384 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826 547 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915 548 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455 549 let (r4, r3) = 550 RotateSub::kernel(r4, r3, ((23059, 14), (2271, 14), (5197, 13))); 551 552 // Stage 1 553 let (r0, r3h) = butterfly_add(r0, r3); 554 let (r2, r1h) = butterfly_sub(r2, r1); 555 let (r5, r6h) = butterfly_add(r5, r6); 556 let (r7, r4h) = butterfly_sub(r7, r4); 557 558 // Stage 2 559 let (r7, r6) = butterfly_add_asym(r7, r6h); 560 let (r5, r3) = butterfly_add_asym(r5, r3h); 561 let (r2, r4) = butterfly_add_asym(r2, r4h); 562 let (r0, r1) = butterfly_sub_asym(r0, r1h); 563 564 // Stage 3 565 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 566 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 567 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 568 let (r3, r4) = 569 RotateSubAvg::kernel(r3, r4, ((10703, 13), (8867, 14), (3135, 12))); 570 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 571 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 572 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 573 let (r2, r5) = 574 RotateNegAvg::kernel(r2, r5, ((10703, 13), (8867, 14), (3135, 12))); 575 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 576 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 577 let (r1, r6) = RotatePi4SubAvg::kernel(r1, r6, ((11585, 13), (11585, 13))); 578 579 store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7); 580 } 581 582 #[$m] 583 $($s)* fn daala_fdst8<T: TxOperations>(coeffs: &mut [T]) { 584 assert!(coeffs.len() >= 8); 585 let mut temp_out: [T; 8] = [T::zero(); 8]; 586 daala_fdst_iv_8( 587 coeffs[0], 588 coeffs[1], 589 coeffs[2], 590 coeffs[3], 591 coeffs[4], 592 coeffs[5], 593 coeffs[6], 594 coeffs[7], 595 &mut temp_out, 596 ); 597 598 coeffs[0] = temp_out[0]; 599 coeffs[1] = temp_out[4]; 600 coeffs[2] = temp_out[2]; 601 coeffs[3] = temp_out[6]; 602 coeffs[4] = temp_out[1]; 603 coeffs[5] = temp_out[5]; 604 coeffs[6] = temp_out[3]; 605 coeffs[7] = temp_out[7]; 606 } 607 608 #[$m] 609 $($s)* fn daala_fdst_iv_4<T: TxOperations>( 610 q0: T, q1: T, q2: T, q3: T, output: &mut [T], 611 ) { 612 // Stage 0 613 // 13623/16384 = (Sin[7*Pi/16] + Cos[7*Pi/16])/Sqrt[2] = 0.831469612302545 614 // 4551/4096 = (Sin[7*Pi/16] - Cos[7*Pi/16])*Sqrt[2] = 1.111140466039204 615 // 9041/32768 = Cos[7*Pi/16]*Sqrt[2] = 0.275899379282943 616 let (q0, q3) = 617 RotateAddShift::kernel(q0, q3, ((13623, 14), (4551, 12), (565, 11))); 618 // 16069/16384 = (Sin[5*Pi/16] + Cos[5*Pi/16])/Sqrt[2] = 0.9807852804032304 619 // 12785/32768 = (Sin[5*Pi/16] - Cos[5*Pi/16])*Sqrt[2] = 0.3901806440322566 620 // 1609/2048 = Cos[5*Pi/16]*Sqrt[2] = 0.7856949583871021 621 let (q2, q1) = 622 RotateSubShift::kernel(q2, q1, ((16069, 14), (12785, 15), (1609, 11))); 623 624 // Stage 1 625 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3); 626 let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1); 627 628 // Stage 2 629 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 630 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 631 let (q2, q1) = RotatePi4AddAvg::kernel(q2, q1, ((11585, 13), (11585, 13))); 632 633 store_coeffs!(output, q0, q1, q2, q3); 634 } 635 636 637 #[$m] 638 $($s)* fn daala_fdct_ii_8_asym<T: TxOperations>( 639 r0h: T, r1: (T, T), r2h: T, r3: (T, T), r4h: T, r5: (T, T), r6h: T, 640 r7: (T, T), output: &mut [T], 641 ) { 642 // +/- Butterflies with asymmetric input. 643 let (r0, r7) = butterfly_neg_asym(r0h, r7); 644 let (r1, r6) = butterfly_sub_asym(r1, r6h); 645 let (r2, r5) = butterfly_neg_asym(r2h, r5); 646 let (r3, r4) = butterfly_sub_asym(r3, r4h); 647 648 // Embedded 4-point orthonormal transforms. 649 daala_fdct_ii_4(r0, r1, r2, r3, &mut output[0..4]); 650 daala_fdst_iv_4(r7, r6, r5, r4, &mut output[4..8]); 651 output[4..8].reverse(); 652 } 653 654 #[$m] 655 $($s)* fn daala_fdst_iv_8_asym<T: TxOperations>( 656 r0: (T, T), r1h: T, r2: (T, T), r3h: T, r4: (T, T), r5h: T, r6: (T, T), 657 r7h: T, output: &mut [T], 658 ) { 659 // Stage 0 660 // 12665/16384 = (Sin[15*Pi/32] + Cos[15*Pi/32])/Sqrt[2] = 0.77301045336274 661 // 5197/4096 = (Sin[15*Pi/32] - Cos[15*Pi/32])*Sqrt[2] = 1.26878656832729 662 // 2271/16384 = Cos[15*Pi/32]*Sqrt[2] = 0.13861716919909 663 let (r0, r7) = 664 RotateAdd::half_kernel(r0, r7h, ((12665, 14), (5197, 12), (2271, 14))); 665 // 14449/16384 = Sin[13*Pi/32] + Cos[13*Pi/32])/Sqrt[2] = 0.881921264348355 666 // 30893/32768 = Sin[13*Pi/32] - Cos[13*Pi/32])*Sqrt[2] = 0.942793473651995 667 // 3363/8192 = Cos[13*Pi/32]*Sqrt[2] = 0.410524527522357 668 let (r6, r1) = 669 RotateSub::half_kernel(r6, r1h, ((14449, 14), (30893, 15), (3363, 13))); 670 // 15679/16384 = Sin[11*Pi/32] + Cos[11*Pi/32])/Sqrt[2] = 0.956940335732209 671 // 1189/2048 = Sin[11*Pi/32] - Cos[11*Pi/32])*Sqrt[2] = 0.580569354508925 672 // 5461/8192 = Cos[11*Pi/32]*Sqrt[2] = 0.666655658477747 673 let (r2, r5) = 674 RotateAdd::half_kernel(r2, r5h, ((15679, 14), (1189, 11), (5461, 13))); 675 // 16305/16384 = (Sin[9*Pi/32] + Cos[9*Pi/32])/Sqrt[2] = 0.9951847266721969 676 // 803/4096 = (Sin[9*Pi/32] - Cos[9*Pi/32])*Sqrt[2] = 0.1960342806591213 677 // 14699/16384 = Cos[9*Pi/32]*Sqrt[2] = 0.8971675863426364 678 let (r4, r3) = 679 RotateSub::half_kernel(r4, r3h, ((16305, 14), (803, 12), (14699, 14))); 680 681 // Stage 1 682 let (r0, r3h) = butterfly_add(r0, r3); 683 let (r2, r1h) = butterfly_sub(r2, r1); 684 let (r5, r6h) = butterfly_add(r5, r6); 685 let (r7, r4h) = butterfly_sub(r7, r4); 686 687 // Stage 2 688 let (r7, r6) = butterfly_add_asym(r7, r6h); 689 let (r5, r3) = butterfly_add_asym(r5, r3h); 690 let (r2, r4) = butterfly_add_asym(r2, r4h); 691 let (r0, r1) = butterfly_sub_asym(r0, r1h); 692 693 // Stage 3 694 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 695 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 696 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 697 let (r3, r4) = 698 RotateSubAvg::kernel(r3, r4, ((669, 9), (8867, 14), (3135, 12))); 699 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 700 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 701 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 702 let (r2, r5) = 703 RotateNegAvg::kernel(r2, r5, ((669, 9), (8867, 14), (3135, 12))); 704 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 705 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 706 let (r1, r6) = RotatePi4SubAvg::kernel(r1, r6, ((5793, 12), (11585, 13))); 707 708 store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7); 709 } 710 711 #[$m] 712 $($s)* fn daala_fdct_ii_16<T: TxOperations>( 713 s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T, 714 sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T], 715 ) { 716 // +/- Butterflies with asymmetric output. 717 let (s0h, sf) = butterfly_neg(s0, sf); 718 let (s1, seh) = butterfly_add(s1, se); 719 let (s2h, sd) = butterfly_neg(s2, sd); 720 let (s3, sch) = butterfly_add(s3, sc); 721 let (s4h, sb) = butterfly_neg(s4, sb); 722 let (s5, sah) = butterfly_add(s5, sa); 723 let (s6h, s9) = butterfly_neg(s6, s9); 724 let (s7, s8h) = butterfly_add(s7, s8); 725 726 // Embedded 8-point transforms with asymmetric input. 727 daala_fdct_ii_8_asym(s0h, s1, s2h, s3, s4h, s5, s6h, s7, &mut output[0..8]); 728 daala_fdst_iv_8_asym(sf, seh, sd, sch, sb, sah, s9, s8h, &mut output[8..16]); 729 output[8..16].reverse(); 730 } 731 732 #[$m] 733 $($s)* fn daala_fdct16<T: TxOperations>(coeffs: &mut [T]) { 734 assert!(coeffs.len() >= 16); 735 let mut temp_out: [T; 16] = [T::zero(); 16]; 736 daala_fdct_ii_16( 737 coeffs[0], 738 coeffs[1], 739 coeffs[2], 740 coeffs[3], 741 coeffs[4], 742 coeffs[5], 743 coeffs[6], 744 coeffs[7], 745 coeffs[8], 746 coeffs[9], 747 coeffs[10], 748 coeffs[11], 749 coeffs[12], 750 coeffs[13], 751 coeffs[14], 752 coeffs[15], 753 &mut temp_out, 754 ); 755 756 coeffs[0] = temp_out[0]; 757 coeffs[1] = temp_out[8]; 758 coeffs[2] = temp_out[4]; 759 coeffs[3] = temp_out[12]; 760 coeffs[4] = temp_out[2]; 761 coeffs[5] = temp_out[10]; 762 coeffs[6] = temp_out[6]; 763 coeffs[7] = temp_out[14]; 764 coeffs[8] = temp_out[1]; 765 coeffs[9] = temp_out[9]; 766 coeffs[10] = temp_out[5]; 767 coeffs[11] = temp_out[13]; 768 coeffs[12] = temp_out[3]; 769 coeffs[13] = temp_out[11]; 770 coeffs[14] = temp_out[7]; 771 coeffs[15] = temp_out[15]; 772 } 773 774 #[$m] 775 $($s)* fn daala_fdst_iv_16<T: TxOperations>( 776 s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T, 777 sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T], 778 ) { 779 // Stage 0 780 // 24279/32768 = (Sin[31*Pi/64] + Cos[31*Pi/64])/Sqrt[2] = 0.74095112535496 781 // 11003/8192 = (Sin[31*Pi/64] - Cos[31*Pi/64])*Sqrt[2] = 1.34311790969404 782 // 1137/16384 = Cos[31*Pi/64]*Sqrt[2] = 0.06939217050794 783 let (s0, sf) = 784 RotateAddShift::kernel(s0, sf, ((24279, 15), (11003, 13), (1137, 14))); 785 // 1645/2048 = (Sin[29*Pi/64] + Cos[29*Pi/64])/Sqrt[2] = 0.8032075314806449 786 // 305/256 = (Sin[29*Pi/64] - Cos[29*Pi/64])*Sqrt[2] = 1.1913986089848667 787 // 425/2048 = Cos[29*Pi/64]*Sqrt[2] = 0.2075082269882116 788 let (se, s1) = 789 RotateSubShift::kernel(se, s1, ((1645, 11), (305, 8), (425, 11))); 790 // 14053/32768 = (Sin[27*Pi/64] + Cos[27*Pi/64])/Sqrt[2] = 0.85772861000027 791 // 8423/8192 = (Sin[27*Pi/64] - Cos[27*Pi/64])*Sqrt[2] = 1.02820548838644 792 // 2815/8192 = Cos[27*Pi/64]*Sqrt[2] = 0.34362586580705 793 let (s2, sd) = 794 RotateAddShift::kernel(s2, sd, ((14053, 14), (8423, 13), (2815, 13))); 795 // 14811/16384 = (Sin[25*Pi/64] + Cos[25*Pi/64])/Sqrt[2] = 0.90398929312344 796 // 7005/8192 = (Sin[25*Pi/64] - Cos[25*Pi/64])*Sqrt[2] = 0.85511018686056 797 // 3903/8192 = Cos[25*Pi/64]*Sqrt[2] = 0.47643419969316 798 let (sc, s3) = 799 RotateSubShift::kernel(sc, s3, ((14811, 14), (7005, 13), (3903, 13))); 800 // 30853/32768 = (Sin[23*Pi/64] + Cos[23*Pi/64])/Sqrt[2] = 0.94154406518302 801 // 11039/16384 = (Sin[23*Pi/64] - Cos[23*Pi/64])*Sqrt[2] = 0.67377970678444 802 // 9907/16384 = Cos[23*Pi/64]*Sqrt[2] = 0.60465421179080 803 let (s4, sb) = 804 RotateAddShift::kernel(s4, sb, ((30853, 15), (11039, 14), (9907, 14))); 805 // 15893/16384 = (Sin[21*Pi/64] + Cos[21*Pi/64])/Sqrt[2] = 0.97003125319454 806 // 3981/8192 = (Sin[21*Pi/64] - Cos[21*Pi/64])*Sqrt[2] = 0.89716758634264 807 // 1489/2048 = Cos[21*Pi/64]*Sqrt[2] = 0.72705107329128 808 let (sa, s5) = 809 RotateSubShift::kernel(sa, s5, ((15893, 14), (3981, 13), (1489, 11))); 810 // 32413/32768 = (Sin[19*Pi/64] + Cos[19*Pi/64])/Sqrt[2] = 0.98917650996478 811 // 601/2048 = (Sin[19*Pi/64] - Cos[19*Pi/64])*Sqrt[2] = 0.29346094891072 812 // 13803/16384 = Cos[19*Pi/64]*Sqrt[2] = 0.84244603550942 813 let (s6, s9) = 814 RotateAddShift::kernel(s6, s9, ((32413, 15), (601, 11), (13803, 14))); 815 // 32729/32768 = (Sin[17*Pi/64] + Cos[17*Pi/64])/Sqrt[2] = 0.99879545620517 816 // 201/2048 = (Sin[17*Pi/64] - Cos[17*Pi/64])*Sqrt[2] = 0.09813534865484 817 // 1945/2048 = Cos[17*Pi/64]*Sqrt[2] = 0.94972778187775 818 let (s8, s7) = 819 RotateSubShift::kernel(s8, s7, ((32729, 15), (201, 11), (1945, 11))); 820 821 // Stage 1 822 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7); 823 let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf); 824 let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3); 825 let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb); 826 let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5); 827 let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd); 828 let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1); 829 let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9); 830 831 // Stage 2 832 let ((_s8h, s8), s4h) = butterfly_add(s8, s4); 833 let ((_s7h, s7), sbh) = butterfly_add(s7, sb); 834 let ((_sah, sa), s6h) = butterfly_sub(sa, s6); 835 let ((_s5h, s5), s9h) = butterfly_sub(s5, s9); 836 let (s0, s3h) = butterfly_add(s0, s3); 837 let (sd, seh) = butterfly_add(sd, se); 838 let (s2, s1h) = butterfly_sub(s2, s1); 839 let (sf, sch) = butterfly_sub(sf, sc); 840 841 // Stage 3 842 // 301/256 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 843 // 1609/2048 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 844 // 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 845 let (s8, s7) = 846 RotateAddAvg::kernel(s8, s7, ((301, 8), (1609, 11), (12785, 15))); 847 // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 848 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 849 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 850 let (s9, s6) = 851 RotateAdd::kernel(s9h, s6h, ((11363, 13), (9041, 15), (4551, 13))); 852 // 5681/4096 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 853 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 854 // 4551/4096 = 2*Cos[5*Pi/16] = 1.1111404660392044 855 let (s5, sa) = 856 RotateNegAvg::kernel(s5, sa, ((5681, 12), (9041, 15), (4551, 12))); 857 // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 858 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 859 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 860 let (s4, sb) = 861 RotateNeg::kernel(s4h, sbh, ((9633, 13), (12873, 14), (6393, 15))); 862 863 // Stage 4 864 let (s2, sc) = butterfly_add_asym(s2, sch); 865 let (s0, s1) = butterfly_sub_asym(s0, s1h); 866 let (sf, se) = butterfly_add_asym(sf, seh); 867 let (sd, s3) = butterfly_add_asym(sd, s3h); 868 let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6); 869 let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9); 870 let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb); 871 let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4); 872 873 // Stage 5 874 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 875 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 876 // 3135/4096 = 2*Cos[7*Pi/8] = 0.7653668647301796 877 let (sc, s3) = 878 RotateAddAvg::kernel(sc, s3, ((669, 9), (8867, 14), (3135, 12))); 879 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3870398453221475 880 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 881 // 3135/4096 = 2*Cos[3*Pi/8] = 0.7653668647301796 882 let (s2, sd) = 883 RotateNegAvg::kernel(s2, sd, ((669, 9), (8867, 14), (3135, 12))); 884 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 885 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 886 let (sa, s5) = RotatePi4AddAvg::kernel(sa, s5, ((5793, 12), (11585, 13))); 887 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 888 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 889 let (s6, s9) = RotatePi4AddAvg::kernel(s6, s9, ((5793, 12), (11585, 13))); 890 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 891 // 11585/8192 = 2*Cos[Pi/4] = 1.4142135623730951 892 let (se, s1) = RotatePi4AddAvg::kernel(se, s1, ((5793, 12), (11585, 13))); 893 894 store_coeffs!( 895 output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf 896 ); 897 } 898 899 #[$m] 900 $($s)* fn daala_fdst16<T: TxOperations>(coeffs: &mut [T]) { 901 assert!(coeffs.len() >= 16); 902 let mut temp_out: [T; 16] = [T::zero(); 16]; 903 daala_fdst_iv_16( 904 coeffs[0], 905 coeffs[1], 906 coeffs[2], 907 coeffs[3], 908 coeffs[4], 909 coeffs[5], 910 coeffs[6], 911 coeffs[7], 912 coeffs[8], 913 coeffs[9], 914 coeffs[10], 915 coeffs[11], 916 coeffs[12], 917 coeffs[13], 918 coeffs[14], 919 coeffs[15], 920 &mut temp_out, 921 ); 922 923 coeffs[0] = temp_out[0]; 924 coeffs[1] = temp_out[8]; 925 coeffs[2] = temp_out[4]; 926 coeffs[3] = temp_out[12]; 927 coeffs[4] = temp_out[2]; 928 coeffs[5] = temp_out[10]; 929 coeffs[6] = temp_out[6]; 930 coeffs[7] = temp_out[14]; 931 coeffs[8] = temp_out[1]; 932 coeffs[9] = temp_out[9]; 933 coeffs[10] = temp_out[5]; 934 coeffs[11] = temp_out[13]; 935 coeffs[12] = temp_out[3]; 936 coeffs[13] = temp_out[11]; 937 coeffs[14] = temp_out[7]; 938 coeffs[15] = temp_out[15]; 939 } 940 941 #[$m] 942 $($s)* fn daala_fdct_ii_16_asym<T: TxOperations>( 943 s0h: T, s1: (T, T), s2h: T, s3: (T, T), s4h: T, s5: (T, T), s6h: T, 944 s7: (T, T), s8h: T, s9: (T, T), sah: T, sb: (T, T), sch: T, sd: (T, T), 945 seh: T, sf: (T, T), output: &mut [T], 946 ) { 947 // +/- Butterflies with asymmetric input. 948 let (s0, sf) = butterfly_neg_asym(s0h, sf); 949 let (s1, se) = butterfly_sub_asym(s1, seh); 950 let (s2, sd) = butterfly_neg_asym(s2h, sd); 951 let (s3, sc) = butterfly_sub_asym(s3, sch); 952 let (s4, sb) = butterfly_neg_asym(s4h, sb); 953 let (s5, sa) = butterfly_sub_asym(s5, sah); 954 let (s6, s9) = butterfly_neg_asym(s6h, s9); 955 let (s7, s8) = butterfly_sub_asym(s7, s8h); 956 957 // Embedded 8-point orthonormal transforms. 958 daala_fdct_ii_8(s0, s1, s2, s3, s4, s5, s6, s7, &mut output[0..8]); 959 daala_fdst_iv_8(sf, se, sd, sc, sb, sa, s9, s8, &mut output[8..16]); 960 output[8..16].reverse(); 961 } 962 963 #[$m] 964 $($s)* fn daala_fdst_iv_16_asym<T: TxOperations>( 965 s0: (T, T), s1h: T, s2: (T, T), s3h: T, s4: (T, T), s5h: T, s6: (T, T), 966 s7h: T, s8: (T, T), s9h: T, sa: (T, T), sbh: T, sc: (T, T), sdh: T, 967 se: (T, T), sfh: T, output: &mut [T], 968 ) { 969 // Stage 0 970 // 1073/2048 = (Sin[31*Pi/64] + Cos[31*Pi/64])/2 = 0.5239315652662953 971 // 62241/32768 = (Sin[31*Pi/64] - Cos[31*Pi/64])*2 = 1.8994555637555088 972 // 201/16384 = Cos[31*Pi/64]*2 = 0.0981353486548360 973 let (s0, sf) = 974 RotateAddShift::half_kernel(s0, sfh, ((1073, 11), (62241, 15), (201, 11))); 975 // 18611/32768 = (Sin[29*Pi/64] + Cos[29*Pi/64])/2 = 0.5679534922100714 976 // 55211/32768 = (Sin[29*Pi/64] - Cos[29*Pi/64])*2 = 1.6848920710188384 977 // 601/2048 = Cos[29*Pi/64]*2 = 0.2934609489107235 978 let (se, s1) = RotateSubShift::half_kernel( 979 se, 980 s1h, 981 ((18611, 15), (55211, 15), (601, 11)), 982 ); 983 // 9937/16384 = (Sin[27*Pi/64] + Cos[27*Pi/64])/2 = 0.6065057165489039 984 // 1489/1024 = (Sin[27*Pi/64] - Cos[27*Pi/64])*2 = 1.4541021465825602 985 // 3981/8192 = Cos[27*Pi/64]*2 = 0.4859603598065277 986 let (s2, sd) = 987 RotateAddShift::half_kernel(s2, sdh, ((9937, 14), (1489, 10), (3981, 13))); 988 // 10473/16384 = (Sin[25*Pi/64] + Cos[25*Pi/64])/2 = 0.6392169592876205 989 // 39627/32768 = (Sin[25*Pi/64] - Cos[25*Pi/64])*2 = 1.2093084235816014 990 // 11039/16384 = Cos[25*Pi/64]*2 = 0.6737797067844401 991 let (sc, s3) = RotateSubShift::half_kernel( 992 sc, 993 s3h, 994 ((10473, 14), (39627, 15), (11039, 14)), 995 ); 996 // 2727/4096 = (Sin[23*Pi/64] + Cos[23*Pi/64])/2 = 0.6657721932768628 997 // 3903/4096 = (Sin[23*Pi/64] - Cos[23*Pi/64])*2 = 0.9528683993863225 998 // 7005/8192 = Cos[23*Pi/64]*2 = 0.8551101868605642 999 let (s4, sb) = 1000 RotateAddShift::half_kernel(s4, sbh, ((2727, 12), (3903, 12), (7005, 13))); 1001 // 5619/8192 = (Sin[21*Pi/64] + Cos[21*Pi/64])/2 = 0.6859156770967569 1002 // 2815/4096 = (Sin[21*Pi/64] - Cos[21*Pi/64])*2 = 0.6872517316141069 1003 // 8423/8192 = Cos[21*Pi/64]*2 = 1.0282054883864433 1004 let (sa, s5) = 1005 RotateSubShift::half_kernel(sa, s5h, ((5619, 13), (2815, 12), (8423, 13))); 1006 // 2865/4096 = (Sin[19*Pi/64] + Cos[19*Pi/64])/2 = 0.6994534179865391 1007 // 13588/32768 = (Sin[19*Pi/64] - Cos[19*Pi/64])*2 = 0.4150164539764232 1008 // 305/256 = Cos[19*Pi/64]*2 = 1.1913986089848667 1009 let (s6, s9) = 1010 RotateAddShift::half_kernel(s6, s9h, ((2865, 12), (13599, 15), (305, 8))); 1011 // 23143/32768 = (Sin[17*Pi/64] + Cos[17*Pi/64])/2 = 0.7062550401009887 1012 // 1137/8192 = (Sin[17*Pi/64] - Cos[17*Pi/64])*2 = 0.1387843410158816 1013 // 11003/8192 = Cos[17*Pi/64]*2 = 1.3431179096940367 1014 let (s8, s7) = RotateSubShift::half_kernel( 1015 s8, 1016 s7h, 1017 ((23143, 15), (1137, 13), (11003, 13)), 1018 ); 1019 1020 // Stage 1 1021 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7); 1022 let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf); 1023 let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3); 1024 let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb); 1025 let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5); 1026 let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd); 1027 let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1); 1028 let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9); 1029 1030 // Stage 2 1031 let ((_s8h, s8), s4h) = butterfly_add(s8, s4); 1032 let ((_s7h, s7), sbh) = butterfly_add(s7, sb); 1033 let ((_sah, sa), s6h) = butterfly_sub(sa, s6); 1034 let ((_s5h, s5), s9h) = butterfly_sub(s5, s9); 1035 let (s0, s3h) = butterfly_add(s0, s3); 1036 let (sd, seh) = butterfly_add(sd, se); 1037 let (s2, s1h) = butterfly_sub(s2, s1); 1038 let (sf, sch) = butterfly_sub(sf, sc); 1039 1040 // Stage 3 1041 // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 1042 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 1043 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 1044 let (s8, s7) = 1045 RotateAdd::kernel(s8, s7, ((9633, 13), (12873, 14), (6393, 15))); 1046 // 22725/16384 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 1047 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 1048 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 1049 let (s9, s6) = 1050 RotateAdd::kernel(s9h, s6h, ((22725, 14), (9041, 15), (4551, 13))); 1051 // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 1052 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 1053 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 1054 let (s5, sa) = 1055 RotateNeg::kernel(s5, sa, ((11363, 13), (9041, 15), (4551, 13))); 1056 // 9633/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 1057 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 1058 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 1059 let (s4, sb) = 1060 RotateNeg::kernel(s4h, sbh, ((9633, 13), (12873, 14), (6393, 15))); 1061 1062 // Stage 4 1063 let (s2, sc) = butterfly_add_asym(s2, sch); 1064 let (s0, s1) = butterfly_sub_asym(s0, s1h); 1065 let (sf, se) = butterfly_add_asym(sf, seh); 1066 let (sd, s3) = butterfly_add_asym(sd, s3h); 1067 let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6); 1068 let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9); 1069 let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb); 1070 let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4); 1071 1072 // Stage 5 1073 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1074 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1075 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1076 let (sc, s3) = 1077 RotateAdd::kernel(sc, s3, ((10703, 13), (8867, 14), (3135, 13))); 1078 // 10703/8192 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3870398453221475 1079 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1080 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1081 let (s2, sd) = 1082 RotateNeg::kernel(s2, sd, ((10703, 13), (8867, 14), (3135, 13))); 1083 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1084 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1085 let (sa, s5) = RotatePi4Add::kernel(sa, s5, ((11585, 13), (5793, 13))); 1086 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1087 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1088 let (s6, s9) = RotatePi4Add::kernel(s6, s9, ((11585, 13), (5793, 13))); 1089 // 11585/8192 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1090 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1091 let (se, s1) = RotatePi4Add::kernel(se, s1, ((11585, 13), (5793, 13))); 1092 1093 store_coeffs!( 1094 output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf 1095 ); 1096 } 1097 1098 #[$m] 1099 $($s)* fn daala_fdct_ii_32<T: TxOperations>( 1100 t0: T, t1: T, t2: T, t3: T, t4: T, t5: T, t6: T, t7: T, t8: T, t9: T, ta: T, 1101 tb: T, tc: T, td: T, te: T, tf: T, tg: T, th: T, ti: T, tj: T, tk: T, tl: T, 1102 tm: T, tn: T, to: T, tp: T, tq: T, tr: T, ts: T, tt: T, tu: T, tv: T, 1103 output: &mut [T], 1104 ) { 1105 // +/- Butterflies with asymmetric output. 1106 let (t0h, tv) = butterfly_neg(t0, tv); 1107 let (t1, tuh) = butterfly_add(t1, tu); 1108 let (t2h, tt) = butterfly_neg(t2, tt); 1109 let (t3, tsh) = butterfly_add(t3, ts); 1110 let (t4h, tr) = butterfly_neg(t4, tr); 1111 let (t5, tqh) = butterfly_add(t5, tq); 1112 let (t6h, tp) = butterfly_neg(t6, tp); 1113 let (t7, toh) = butterfly_add(t7, to); 1114 let (t8h, tn) = butterfly_neg(t8, tn); 1115 let (t9, tmh) = butterfly_add(t9, tm); 1116 let (tah, tl) = butterfly_neg(ta, tl); 1117 let (tb, tkh) = butterfly_add(tb, tk); 1118 let (tch, tj) = butterfly_neg(tc, tj); 1119 let (td, tih) = butterfly_add(td, ti); 1120 let (teh, th) = butterfly_neg(te, th); 1121 let (tf, tgh) = butterfly_add(tf, tg); 1122 1123 // Embedded 16-point transforms with asymmetric input. 1124 daala_fdct_ii_16_asym( 1125 t0h, 1126 t1, 1127 t2h, 1128 t3, 1129 t4h, 1130 t5, 1131 t6h, 1132 t7, 1133 t8h, 1134 t9, 1135 tah, 1136 tb, 1137 tch, 1138 td, 1139 teh, 1140 tf, 1141 &mut output[0..16], 1142 ); 1143 daala_fdst_iv_16_asym( 1144 tv, 1145 tuh, 1146 tt, 1147 tsh, 1148 tr, 1149 tqh, 1150 tp, 1151 toh, 1152 tn, 1153 tmh, 1154 tl, 1155 tkh, 1156 tj, 1157 tih, 1158 th, 1159 tgh, 1160 &mut output[16..32], 1161 ); 1162 output[16..32].reverse(); 1163 } 1164 1165 #[$m] 1166 $($s)* fn daala_fdct32<T: TxOperations>(coeffs: &mut [T]) { 1167 assert!(coeffs.len() >= 32); 1168 let mut temp_out: [T; 32] = [T::zero(); 32]; 1169 daala_fdct_ii_32( 1170 coeffs[0], 1171 coeffs[1], 1172 coeffs[2], 1173 coeffs[3], 1174 coeffs[4], 1175 coeffs[5], 1176 coeffs[6], 1177 coeffs[7], 1178 coeffs[8], 1179 coeffs[9], 1180 coeffs[10], 1181 coeffs[11], 1182 coeffs[12], 1183 coeffs[13], 1184 coeffs[14], 1185 coeffs[15], 1186 coeffs[16], 1187 coeffs[17], 1188 coeffs[18], 1189 coeffs[19], 1190 coeffs[20], 1191 coeffs[21], 1192 coeffs[22], 1193 coeffs[23], 1194 coeffs[24], 1195 coeffs[25], 1196 coeffs[26], 1197 coeffs[27], 1198 coeffs[28], 1199 coeffs[29], 1200 coeffs[30], 1201 coeffs[31], 1202 &mut temp_out, 1203 ); 1204 1205 coeffs[0] = temp_out[0]; 1206 coeffs[1] = temp_out[16]; 1207 coeffs[2] = temp_out[8]; 1208 coeffs[3] = temp_out[24]; 1209 coeffs[4] = temp_out[4]; 1210 coeffs[5] = temp_out[20]; 1211 coeffs[6] = temp_out[12]; 1212 coeffs[7] = temp_out[28]; 1213 coeffs[8] = temp_out[2]; 1214 coeffs[9] = temp_out[18]; 1215 coeffs[10] = temp_out[10]; 1216 coeffs[11] = temp_out[26]; 1217 coeffs[12] = temp_out[6]; 1218 coeffs[13] = temp_out[22]; 1219 coeffs[14] = temp_out[14]; 1220 coeffs[15] = temp_out[30]; 1221 coeffs[16] = temp_out[1]; 1222 coeffs[17] = temp_out[17]; 1223 coeffs[18] = temp_out[9]; 1224 coeffs[19] = temp_out[25]; 1225 coeffs[20] = temp_out[5]; 1226 coeffs[21] = temp_out[21]; 1227 coeffs[22] = temp_out[13]; 1228 coeffs[23] = temp_out[29]; 1229 coeffs[24] = temp_out[3]; 1230 coeffs[25] = temp_out[19]; 1231 coeffs[26] = temp_out[11]; 1232 coeffs[27] = temp_out[27]; 1233 coeffs[28] = temp_out[7]; 1234 coeffs[29] = temp_out[23]; 1235 coeffs[30] = temp_out[15]; 1236 coeffs[31] = temp_out[31]; 1237 } 1238 1239 #[$m] 1240 $($s)* fn daala_fdct_ii_32_asym<T: TxOperations>( 1241 t0h: T, t1: (T, T), t2h: T, t3: (T, T), t4h: T, t5: (T, T), t6h: T, 1242 t7: (T, T), t8h: T, t9: (T, T), tah: T, tb: (T, T), tch: T, td: (T, T), 1243 teh: T, tf: (T, T), tgh: T, th: (T, T), tih: T, tj: (T, T), tkh: T, 1244 tl: (T, T), tmh: T, tn: (T, T), toh: T, tp: (T, T), tqh: T, tr: (T, T), 1245 tsh: T, tt: (T, T), tuh: T, tv: (T, T), output: &mut [T], 1246 ) { 1247 // +/- Butterflies with asymmetric input. 1248 let (t0, tv) = butterfly_neg_asym(t0h, tv); 1249 let (t1, tu) = butterfly_sub_asym(t1, tuh); 1250 let (t2, tt) = butterfly_neg_asym(t2h, tt); 1251 let (t3, ts) = butterfly_sub_asym(t3, tsh); 1252 let (t4, tr) = butterfly_neg_asym(t4h, tr); 1253 let (t5, tq) = butterfly_sub_asym(t5, tqh); 1254 let (t6, tp) = butterfly_neg_asym(t6h, tp); 1255 let (t7, to) = butterfly_sub_asym(t7, toh); 1256 let (t8, tn) = butterfly_neg_asym(t8h, tn); 1257 let (t9, tm) = butterfly_sub_asym(t9, tmh); 1258 let (ta, tl) = butterfly_neg_asym(tah, tl); 1259 let (tb, tk) = butterfly_sub_asym(tb, tkh); 1260 let (tc, tj) = butterfly_neg_asym(tch, tj); 1261 let (td, ti) = butterfly_sub_asym(td, tih); 1262 let (te, th) = butterfly_neg_asym(teh, th); 1263 let (tf, tg) = butterfly_sub_asym(tf, tgh); 1264 1265 // Embedded 16-point orthonormal transforms. 1266 daala_fdct_ii_16( 1267 t0, 1268 t1, 1269 t2, 1270 t3, 1271 t4, 1272 t5, 1273 t6, 1274 t7, 1275 t8, 1276 t9, 1277 ta, 1278 tb, 1279 tc, 1280 td, 1281 te, 1282 tf, 1283 &mut output[0..16], 1284 ); 1285 daala_fdst_iv_16( 1286 tv, 1287 tu, 1288 tt, 1289 ts, 1290 tr, 1291 tq, 1292 tp, 1293 to, 1294 tn, 1295 tm, 1296 tl, 1297 tk, 1298 tj, 1299 ti, 1300 th, 1301 tg, 1302 &mut output[16..32], 1303 ); 1304 output[16..32].reverse(); 1305 } 1306 1307 #[$m] 1308 $($s)* fn daala_fdst_iv_32_asym<T: TxOperations>( 1309 t0: (T, T), t1h: T, t2: (T, T), t3h: T, t4: (T, T), t5h: T, t6: (T, T), 1310 t7h: T, t8: (T, T), t9h: T, ta: (T, T), tbh: T, tc: (T, T), tdh: T, 1311 te: (T, T), tfh: T, tg: (T, T), thh: T, ti: (T, T), tjh: T, tk: (T, T), 1312 tlh: T, tm: (T, T), tnh: T, to: (T, T), tph: T, tq: (T, T), trh: T, 1313 ts: (T, T), tth: T, tu: (T, T), tvh: T, output: &mut [T], 1314 ) { 1315 // Stage 0 1316 // 5933/8192 = (Sin[63*Pi/128] + Cos[63*Pi/128])/Sqrt[2] = 0.72424708295147 1317 // 22595/16384 = (Sin[63*Pi/128] - Cos[63*Pi/128])*Sqrt[2] = 1.37908108947413 1318 // 1137/32768 = Cos[63*Pi/128]*Sqrt[2] = 0.03470653821440 1319 let (t0, tv) = 1320 RotateAdd::half_kernel(t0, tvh, ((5933, 13), (22595, 14), (1137, 15))); 1321 // 6203/8192 = (Sin[61*Pi/128] + Cos[61*Pi/128])/Sqrt[2] = 0.75720884650648 1322 // 21403/16384 = (Sin[61*Pi/128] - Cos[61*Pi/128])*Sqrt[2] = 1.30634568590755 1323 // 3409/32768 = Cos[61*Pi/128]*Sqrt[2] = 0.10403600355271 1324 let (tu, t1) = 1325 RotateSub::half_kernel(tu, t1h, ((6203, 13), (21403, 14), (3409, 15))); 1326 // 25833/32768 = (Sin[59*Pi/128] + Cos[59*Pi/128])/Sqrt[2] = 0.78834642762661 1327 // 315/256 = (Sin[59*Pi/128] - Cos[59*Pi/128])*Sqrt[2] = 1.23046318116125 1328 // 5673/32768 = Cos[59*Pi/128]*Sqrt[2] = 0.17311483704598 1329 let (t2, tt) = 1330 RotateAdd::half_kernel(t2, tth, ((25833, 15), (315, 8), (5673, 15))); 1331 // 26791/32768 = (Sin[57*Pi/128] + Cos[57*Pi/128])/Sqrt[2] = 0.81758481315158 1332 // 4717/4096 = (Sin[57*Pi/128] - Cos[57*Pi/128])*Sqrt[2] = 1.15161638283569 1333 // 7923/32768 = Cos[57*Pi/128]*Sqrt[2] = 0.24177662173374 1334 let (ts, t3) = 1335 RotateSub::half_kernel(ts, t3h, ((26791, 15), (4717, 12), (7923, 15))); 1336 // 6921/8192 = (Sin[55*Pi/128] + Cos[55*Pi/128])/Sqrt[2] = 0.84485356524971 1337 // 17531/16384 = (Sin[55*Pi/128] - Cos[55*Pi/128])*Sqrt[2] = 1.06999523977419 1338 // 10153/32768 = Cos[55*Pi/128]*Sqrt[2] = 0.30985594536261 1339 let (t4, tr) = 1340 RotateAdd::half_kernel(t4, trh, ((6921, 13), (17531, 14), (10153, 15))); 1341 // 28511/32768 = (Sin[53*Pi/128] + Cos[53*Pi/128])/Sqrt[2] = 0.87008699110871 1342 // 32303/32768 = (Sin[53*Pi/128] - Cos[53*Pi/128])*Sqrt[2] = 0.98579638445957 1343 // 1545/4096 = Cos[53*Pi/128]*Sqrt[2] = 0.37718879887893 1344 let (tq, t5) = 1345 RotateSub::half_kernel(tq, t5h, ((28511, 15), (32303, 15), (1545, 12))); 1346 // 29269/32768 = (Sin[51*Pi/128] + Cos[51*Pi/128])/Sqrt[2] = 0.89322430119552 1347 // 14733/16384 = (Sin[51*Pi/128] - Cos[51*Pi/128])*Sqrt[2] = 0.89922265930921 1348 // 1817/4096 = Cos[51*Pi/128]*Sqrt[2] = 0.44361297154091 1349 let (t6, tp) = 1350 RotateAdd::half_kernel(t6, tph, ((29269, 15), (14733, 14), (1817, 12))); 1351 // 29957/32768 = (Sin[49*Pi/128] + Cos[49*Pi/128])/Sqrt[2] = 0.91420975570353 1352 // 13279/16384 = (Sin[49*Pi/128] - Cos[49*Pi/128])*Sqrt[2] = 0.81048262800998 1353 // 8339/16384 = Cos[49*Pi/128]*Sqrt[2] = 0.50896844169854 1354 let (to, t7) = 1355 RotateSub::half_kernel(to, t7h, ((29957, 15), (13279, 14), (8339, 14))); 1356 // 7643/8192 = (Sin[47*Pi/128] + Cos[47*Pi/128])/Sqrt[2] = 0.93299279883474 1357 // 11793/16384 = (Sin[47*Pi/128] - Cos[47*Pi/128])*Sqrt[2] = 0.71979007306998 1358 // 18779/32768 = Cos[47*Pi/128]*Sqrt[2] = 0.57309776229975 1359 let (t8, tn) = 1360 RotateAdd::half_kernel(t8, tnh, ((7643, 13), (11793, 14), (18779, 15))); 1361 // 15557/16384 = (Sin[45*Pi/128] + Cos[45*Pi/128])/Sqrt[2] = 0.94952818059304 1362 // 20557/32768 = (Sin[45*Pi/128] - Cos[45*Pi/128])*Sqrt[2] = 0.62736348079778 1363 // 20835/32768 = Cos[45*Pi/128]*Sqrt[2] = 0.63584644019415 1364 let (tm, t9) = 1365 RotateSub::half_kernel(tm, t9h, ((15557, 14), (20557, 15), (20835, 15))); 1366 // 31581/32768 = (Sin[43*Pi/128] + Cos[43*Pi/128])/Sqrt[2] = 0.96377606579544 1367 // 17479/32768 = (Sin[43*Pi/128] - Cos[43*Pi/128])*Sqrt[2] = 0.53342551494980 1368 // 22841/32768 = Cos[43*Pi/128]*Sqrt[2] = 0.69706330832054 1369 let (ta, tl) = 1370 RotateAdd::half_kernel(ta, tlh, ((31581, 15), (17479, 15), (22841, 15))); 1371 // 7993/8192 = (Sin[41*Pi/128] + Cos[41*Pi/128])/Sqrt[2] = 0.97570213003853 1372 // 14359/32768 = (Sin[41*Pi/128] - Cos[41*Pi/128])*Sqrt[2] = 0.43820248031374 1373 // 3099/4096 = Cos[41*Pi/128]*Sqrt[2] = 0.75660088988166 1374 let (tk, tb) = 1375 RotateSub::half_kernel(tk, tbh, ((7993, 13), (14359, 15), (3099, 12))); 1376 // 16143/16384 = (Sin[39*Pi/128] + Cos[39*Pi/128])/Sqrt[2] = 0.98527764238894 1377 // 2801/8192 = (Sin[39*Pi/128] - Cos[39*Pi/128])*Sqrt[2] = 0.34192377752060 1378 // 26683/32768 = Cos[39*Pi/128]*Sqrt[2] = 0.81431575362864 1379 let (tc, tj) = 1380 RotateAdd::half_kernel(tc, tjh, ((16143, 14), (2801, 13), (26683, 15))); 1381 // 16261/16384 = (Sin[37*Pi/128] + Cos[37*Pi/128])/Sqrt[2] = 0.99247953459871 1382 // 4011/16384 = (Sin[37*Pi/128] - Cos[37*Pi/128])*Sqrt[2] = 0.24482135039843 1383 // 14255/16384 = Cos[37*Pi/128]*Sqrt[2] = 0.87006885939949 1384 let (ti, td) = 1385 RotateSub::half_kernel(ti, tdh, ((16261, 14), (4011, 14), (14255, 14))); 1386 // 32679/32768 = (Sin[35*Pi/128] + Cos[35*Pi/128])/Sqrt[2] = 0.99729045667869 1387 // 4821/32768 = (Sin[35*Pi/128] - Cos[35*Pi/128])*Sqrt[2] = 0.14712912719933 1388 // 30269/32768 = Cos[35*Pi/128]*Sqrt[2] = 0.92372589307902 1389 let (te, th) = 1390 RotateAdd::half_kernel(te, thh, ((32679, 15), (4821, 15), (30269, 15))); 1391 // 16379/16384 = (Sin[33*Pi/128] + Cos[33*Pi/128])/Sqrt[2] = 0.99969881869620 1392 // 201/4096 = (Sin[33*Pi/128] - Cos[33*Pi/128])*Sqrt[2] = 0.04908245704582 1393 // 15977/16384 = Cos[33*Pi/128]*Sqrt[2] = 0.97515759017329 1394 let (tg, tf) = 1395 RotateSub::half_kernel(tg, tfh, ((16379, 14), (201, 12), (15977, 14))); 1396 1397 // Stage 1 1398 let (t0, tfh) = butterfly_add(t0, tf); 1399 let (tv, tgh) = butterfly_sub(tv, tg); 1400 let (th, tuh) = butterfly_add(th, tu); 1401 let (te, t1h) = butterfly_sub(te, t1); 1402 let (t2, tdh) = butterfly_add(t2, td); 1403 let (tt, tih) = butterfly_sub(tt, ti); 1404 let (tj, tsh) = butterfly_add(tj, ts); 1405 let (tc, t3h) = butterfly_sub(tc, t3); 1406 let (t4, tbh) = butterfly_add(t4, tb); 1407 let (tr, tkh) = butterfly_sub(tr, tk); 1408 let (tl, tqh) = butterfly_add(tl, tq); 1409 let (ta, t5h) = butterfly_sub(ta, t5); 1410 let (t6, t9h) = butterfly_add(t6, t9); 1411 let (tp, tmh) = butterfly_sub(tp, tm); 1412 let (tn, toh) = butterfly_add(tn, to); 1413 let (t8, t7h) = butterfly_sub(t8, t7); 1414 1415 // Stage 2 1416 let (t0, t7) = butterfly_sub_asym(t0, t7h); 1417 let (tv, to) = butterfly_add_asym(tv, toh); 1418 let (tp, tu) = butterfly_sub_asym(tp, tuh); 1419 let (t6, t1) = butterfly_add_asym(t6, t1h); 1420 let (t2, t5) = butterfly_sub_asym(t2, t5h); 1421 let (tt, tq) = butterfly_add_asym(tt, tqh); 1422 let (tr, ts) = butterfly_sub_asym(tr, tsh); 1423 let (t4, t3) = butterfly_add_asym(t4, t3h); 1424 let (t8, tg) = butterfly_add_asym(t8, tgh); 1425 let (te, tm) = butterfly_sub_asym(te, tmh); 1426 let (tn, tf) = butterfly_add_asym(tn, tfh); 1427 let (th, t9) = butterfly_sub_asym(th, t9h); 1428 let (ta, ti) = butterfly_add_asym(ta, tih); 1429 let (tc, tk) = butterfly_sub_asym(tc, tkh); 1430 let (tl, td) = butterfly_add_asym(tl, tdh); 1431 let (tj, tb) = butterfly_sub_asym(tj, tbh); 1432 1433 // Stage 3 1434 // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576 1435 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363 1436 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606 1437 let (tf, tg) = 1438 RotateSub::kernel(tf, tg, ((17911, 14), (14699, 14), (803, 13))); 1439 // 10217/8192 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.2472250129866712 1440 // 5461/8192 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.6666556584777465 1441 // 1189/4096 = Cos[13*Pi/32] = 0.2902846772544623 1442 let (th, te) = 1443 RotateAdd::kernel(th, te, ((10217, 13), (5461, 13), (1189, 12))); 1444 // 5543/4096 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526 1445 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574 1446 // 7723/16384 = Cos[11*Pi/32] = 0.4713967368259976 1447 let (ti, td) = 1448 RotateAdd::kernel(ti, td, ((5543, 12), (3363, 13), (7723, 14))); 1449 // 11529/8192 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826 1450 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915 1451 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455 1452 let (tc, tj) = 1453 RotateSub::kernel(tc, tj, ((11529, 13), (2271, 14), (5197, 13))); 1454 // 11529/8192 = Sin[9*Pi/32] + Cos[9*Pi/32] = 1.4074037375263826 1455 // 2271/16384 = Sin[9*Pi/32] - Cos[9*Pi/32] = 0.1386171691990915 1456 // 5197/8192 = Cos[9*Pi/32] = 0.6343932841636455 1457 let (tb, tk) = 1458 RotateNeg::kernel(tb, tk, ((11529, 13), (2271, 14), (5197, 13))); 1459 // 5543/4096 = Sin[11*Pi/32] + Cos[11*Pi/32] = 1.3533180011743526 1460 // 3363/8192 = Sin[11*Pi/32] - Cos[11*Pi/32] = 0.4105245275223574 1461 // 7723/16384 = Cos[11*Pi/32] = 0.4713967368259976 1462 let (ta, tl) = 1463 RotateNeg::kernel(ta, tl, ((5543, 12), (3363, 13), (7723, 14))); 1464 // 10217/8192 = Sin[13*Pi/32] + Cos[13*Pi/32] = 1.2472250129866712 1465 // 5461/8192 = Sin[13*Pi/32] - Cos[13*Pi/32] = 0.6666556584777465 1466 // 1189/4096 = Cos[13*Pi/32] = 0.2902846772544623 1467 let (t9, tm) = 1468 RotateNeg::kernel(t9, tm, ((10217, 13), (5461, 13), (1189, 12))); 1469 // 17911/16384 = Sin[15*Pi/32] + Cos[15*Pi/32] = 1.0932018670017576 1470 // 14699/16384 = Sin[15*Pi/32] - Cos[15*Pi/32] = 0.8971675863426363 1471 // 803/8192 = Cos[15*Pi/32] = 0.0980171403295606 1472 let (t8, tn) = 1473 RotateNeg::kernel(t8, tn, ((17911, 14), (14699, 14), (803, 13))); 1474 1475 // Stage 4 1476 let (t3, t0h) = butterfly_sub(t3, t0); 1477 let (ts, tvh) = butterfly_add(ts, tv); 1478 let (tu, tth) = butterfly_sub(tu, tt); 1479 let (t1, t2h) = butterfly_add(t1, t2); 1480 let ((_toh, to), t4h) = butterfly_add(to, t4); 1481 let ((_tqh, tq), t6h) = butterfly_sub(tq, t6); 1482 let ((_t7h, t7), trh) = butterfly_add(t7, tr); 1483 let ((_t5h, t5), tph) = butterfly_sub(t5, tp); 1484 let (tb, t8h) = butterfly_sub(tb, t8); 1485 let (tk, tnh) = butterfly_add(tk, tn); 1486 let (tm, tlh) = butterfly_sub(tm, tl); 1487 let (t9, tah) = butterfly_add(t9, ta); 1488 let (tf, tch) = butterfly_sub(tf, tc); 1489 let (tg, tjh) = butterfly_add(tg, tj); 1490 let (ti, thh) = butterfly_sub(ti, th); 1491 let (td, teh) = butterfly_add(td, te); 1492 1493 // Stage 5 1494 // 301/256 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 1495 // 1609/2048 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 1496 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 1497 let (to, t7) = RotateAdd::kernel(to, t7, ((301, 8), (1609, 11), (6393, 15))); 1498 // 11363/8192 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 1499 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 1500 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 1501 let (tph, t6h) = 1502 RotateAdd::kernel(tph, t6h, ((11363, 13), (9041, 15), (4551, 13))); 1503 // 5681/4096 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 1504 // 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.2758993792829431 1505 // 4551/8192 = Cos[5*Pi/16] = 0.5555702330196022 1506 let (t5, tq) = 1507 RotateNeg::kernel(t5, tq, ((5681, 12), (9041, 15), (4551, 13))); 1508 // 9633/8192 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 1509 // 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 1510 // 6393/32768 = Cos[7*Pi/16] = 0.1950903220161283 1511 let (t4h, trh) = 1512 RotateNeg::kernel(t4h, trh, ((9633, 13), (12873, 14), (6393, 15))); 1513 1514 // Stage 6 1515 let (t1, t0) = butterfly_add_asym(t1, t0h); 1516 let (tu, tv) = butterfly_sub_asym(tu, tvh); 1517 let (ts, t2) = butterfly_sub_asym(ts, t2h); 1518 let (t3, tt) = butterfly_sub_asym(t3, tth); 1519 let (t5, t4) = butterfly_add_asym((t5.rshift1(), t5), t4h); 1520 let (tq, tr) = butterfly_sub_asym((tq.rshift1(), tq), trh); 1521 let (t7, t6) = butterfly_add_asym((t7.rshift1(), t7), t6h); 1522 let (to, tp) = butterfly_sub_asym((to.rshift1(), to), tph); 1523 let (t9, t8) = butterfly_add_asym(t9, t8h); 1524 let (tm, tn) = butterfly_sub_asym(tm, tnh); 1525 let (tk, ta) = butterfly_sub_asym(tk, tah); 1526 let (tb, tl) = butterfly_sub_asym(tb, tlh); 1527 let (ti, tc) = butterfly_add_asym(ti, tch); 1528 let (td, tj) = butterfly_add_asym(td, tjh); 1529 let (tf, te) = butterfly_add_asym(tf, teh); 1530 let (tg, th) = butterfly_sub_asym(tg, thh); 1531 1532 // Stage 7 1533 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1534 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1535 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1536 let (t2, tt) = RotateNeg::kernel(t2, tt, ((669, 9), (8867, 14), (3135, 13))); 1537 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1538 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1539 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1540 let (ts, t3) = RotateAdd::kernel(ts, t3, ((669, 9), (8867, 14), (3135, 13))); 1541 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1542 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1543 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1544 let (ta, tl) = RotateNeg::kernel(ta, tl, ((669, 9), (8867, 14), (3135, 13))); 1545 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1546 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1547 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1548 let (tk, tb) = RotateAdd::kernel(tk, tb, ((669, 9), (8867, 14), (3135, 13))); 1549 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1550 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1551 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1552 let (tc, tj) = RotateAdd::kernel(tc, tj, ((669, 9), (8867, 14), (3135, 13))); 1553 // 669/512 = Sin[3*Pi/8] + Cos[3*Pi/8] = 1.3065629648763766 1554 // 8867/16384 = Sin[3*Pi/8] - Cos[3*Pi/8] = 0.5411961001461969 1555 // 3135/8192 = Cos[3*Pi/8] = 0.3826834323650898 1556 let (ti, td) = RotateNeg::kernel(ti, td, ((669, 9), (8867, 14), (3135, 13))); 1557 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1558 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1559 let (tu, t1) = RotatePi4Add::kernel(tu, t1, ((5793, 12), (5793, 13))); 1560 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1561 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1562 let (tq, t5) = RotatePi4Add::kernel(tq, t5, ((5793, 12), (5793, 13))); 1563 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1564 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1565 let (tp, t6) = RotatePi4Sub::kernel(tp, t6, ((5793, 12), (5793, 13))); 1566 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1567 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1568 let (tm, t9) = RotatePi4Add::kernel(tm, t9, ((5793, 12), (5793, 13))); 1569 // 5793/4096 = Sin[Pi/4] + Cos[Pi/4] = 1.4142135623730951 1570 // 5793/8192 = Cos[Pi/4] = 0.7071067811865475 1571 let (te, th) = RotatePi4Add::kernel(te, th, ((5793, 12), (5793, 13))); 1572 1573 store_coeffs!( 1574 output, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf, 1575 tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv 1576 ); 1577 } 1578 1579 #[allow(clippy::identity_op)] 1580 #[$m] 1581 $($s)* fn daala_fdct64<T: TxOperations>(coeffs: &mut [T]) { 1582 assert!(coeffs.len() >= 64); 1583 // Use arrays to avoid ridiculous variable names 1584 let mut asym: [(T, T); 32] = [(T::zero(), T::zero()); 32]; 1585 let mut half: [T; 32] = [T::zero(); 32]; 1586 // +/- Butterflies with asymmetric output. 1587 { 1588 #[$m] 1589 #[inline] 1590 $($s)* fn butterfly_pair<T: TxOperations>( 1591 half: &mut [T; 32], asym: &mut [(T, T); 32], input: &[T], i: usize 1592 ) { 1593 let j = i * 2; 1594 let (ah, c) = butterfly_neg(input[j], input[63 - j]); 1595 let (b, dh) = butterfly_add(input[j + 1], input[63 - j - 1]); 1596 half[i] = ah; 1597 half[31 - i] = dh; 1598 asym[i] = b; 1599 asym[31 - i] = c; 1600 } 1601 butterfly_pair(&mut half, &mut asym, coeffs, 0); 1602 butterfly_pair(&mut half, &mut asym, coeffs, 1); 1603 butterfly_pair(&mut half, &mut asym, coeffs, 2); 1604 butterfly_pair(&mut half, &mut asym, coeffs, 3); 1605 butterfly_pair(&mut half, &mut asym, coeffs, 4); 1606 butterfly_pair(&mut half, &mut asym, coeffs, 5); 1607 butterfly_pair(&mut half, &mut asym, coeffs, 6); 1608 butterfly_pair(&mut half, &mut asym, coeffs, 7); 1609 butterfly_pair(&mut half, &mut asym, coeffs, 8); 1610 butterfly_pair(&mut half, &mut asym, coeffs, 9); 1611 butterfly_pair(&mut half, &mut asym, coeffs, 10); 1612 butterfly_pair(&mut half, &mut asym, coeffs, 11); 1613 butterfly_pair(&mut half, &mut asym, coeffs, 12); 1614 butterfly_pair(&mut half, &mut asym, coeffs, 13); 1615 butterfly_pair(&mut half, &mut asym, coeffs, 14); 1616 butterfly_pair(&mut half, &mut asym, coeffs, 15); 1617 } 1618 1619 let mut temp_out: [T; 64] = [T::zero(); 64]; 1620 // Embedded 2-point transforms with asymmetric input. 1621 daala_fdct_ii_32_asym( 1622 half[0], 1623 asym[0], 1624 half[1], 1625 asym[1], 1626 half[2], 1627 asym[2], 1628 half[3], 1629 asym[3], 1630 half[4], 1631 asym[4], 1632 half[5], 1633 asym[5], 1634 half[6], 1635 asym[6], 1636 half[7], 1637 asym[7], 1638 half[8], 1639 asym[8], 1640 half[9], 1641 asym[9], 1642 half[10], 1643 asym[10], 1644 half[11], 1645 asym[11], 1646 half[12], 1647 asym[12], 1648 half[13], 1649 asym[13], 1650 half[14], 1651 asym[14], 1652 half[15], 1653 asym[15], 1654 &mut temp_out[0..32], 1655 ); 1656 daala_fdst_iv_32_asym( 1657 asym[31], 1658 half[31], 1659 asym[30], 1660 half[30], 1661 asym[29], 1662 half[29], 1663 asym[28], 1664 half[28], 1665 asym[27], 1666 half[27], 1667 asym[26], 1668 half[26], 1669 asym[25], 1670 half[25], 1671 asym[24], 1672 half[24], 1673 asym[23], 1674 half[23], 1675 asym[22], 1676 half[22], 1677 asym[21], 1678 half[21], 1679 asym[20], 1680 half[20], 1681 asym[19], 1682 half[19], 1683 asym[18], 1684 half[18], 1685 asym[17], 1686 half[17], 1687 asym[16], 1688 half[16], 1689 &mut temp_out[32..64], 1690 ); 1691 temp_out[32..64].reverse(); 1692 1693 // Store a reordered version of output in temp_out 1694 #[$m] 1695 #[inline] 1696 $($s)* fn reorder_4<T: TxOperations>( 1697 output: &mut [T], i: usize, tmp: [T; 64], j: usize 1698 ) { 1699 output[0 + i * 4] = tmp[0 + j]; 1700 output[1 + i * 4] = tmp[32 + j]; 1701 output[2 + i * 4] = tmp[16 + j]; 1702 output[3 + i * 4] = tmp[48 + j]; 1703 } 1704 reorder_4(coeffs, 0, temp_out, 0); 1705 reorder_4(coeffs, 1, temp_out, 8); 1706 reorder_4(coeffs, 2, temp_out, 4); 1707 reorder_4(coeffs, 3, temp_out, 12); 1708 reorder_4(coeffs, 4, temp_out, 2); 1709 reorder_4(coeffs, 5, temp_out, 10); 1710 reorder_4(coeffs, 6, temp_out, 6); 1711 reorder_4(coeffs, 7, temp_out, 14); 1712 1713 reorder_4(coeffs, 8, temp_out, 1); 1714 reorder_4(coeffs, 9, temp_out, 9); 1715 reorder_4(coeffs, 10, temp_out, 5); 1716 reorder_4(coeffs, 11, temp_out, 13); 1717 reorder_4(coeffs, 12, temp_out, 3); 1718 reorder_4(coeffs, 13, temp_out, 11); 1719 reorder_4(coeffs, 14, temp_out, 7); 1720 reorder_4(coeffs, 15, temp_out, 15); 1721 } 1722 1723 #[$m] 1724 $($s)* fn fidentity<T: TxOperations>(_coeffs: &mut [T]) {} 1725 1726 } 1727 1728 } 1729