1 // Copyright (c) 2018-2020, The rav1e contributors. All rights reserved 2 // 3 // This source code is subject to the terms of the BSD 2 Clause License and 4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 5 // was not distributed with this source code in the LICENSE file, you can 6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open 7 // Media Patent License 1.0 was not distributed with this source code in the 8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent. 9 10 use crate::cpu_features::CpuFeatureLevel; 11 use crate::util::*; 12 13 use super::TxType; 14 15 cfg_if::cfg_if! { 16 if #[cfg(nasm_x86_64)] { 17 pub use crate::asm::x86::transform::forward::*; 18 } else { 19 pub use self::rust::*; 20 } 21 } 22 23 pub mod rust { 24 use super::*; 25 26 use crate::transform::forward_shared::*; 27 use crate::transform::{av1_round_shift_array, valid_av1_transform, TxSize}; 28 use simd_helpers::cold_for_target_arch; 29 30 pub trait TxOperations: Copy { zero() -> Self31 fn zero() -> Self; 32 tx_mul(self, _: (i32, i32)) -> Self33 fn tx_mul(self, _: (i32, i32)) -> Self; rshift1(self) -> Self34 fn rshift1(self) -> Self; add(self, b: Self) -> Self35 fn add(self, b: Self) -> Self; sub(self, b: Self) -> Self36 fn sub(self, b: Self) -> Self; add_avg(self, b: Self) -> Self37 fn add_avg(self, b: Self) -> Self; sub_avg(self, b: Self) -> Self38 fn sub_avg(self, b: Self) -> Self; 39 copy_fn(self) -> Self40 fn copy_fn(self) -> Self { 41 self 42 } 43 } 44 45 impl TxOperations for i32 { zero() -> Self46 fn zero() -> Self { 47 0 48 } 49 tx_mul(self, mul: (i32, i32)) -> Self50 fn tx_mul(self, mul: (i32, i32)) -> Self { 51 ((self * mul.0) + (1 << mul.1 >> 1)) >> mul.1 52 } 53 rshift1(self) -> Self54 fn rshift1(self) -> Self { 55 (self + if self < 0 { 1 } else { 0 }) >> 1 56 } 57 add(self, b: Self) -> Self58 fn add(self, b: Self) -> Self { 59 self + b 60 } 61 sub(self, b: Self) -> Self62 fn sub(self, b: Self) -> Self { 63 self - b 64 } 65 add_avg(self, b: Self) -> Self66 fn add_avg(self, b: Self) -> Self { 67 (self + b) >> 1 68 } 69 sub_avg(self, b: Self) -> Self70 fn sub_avg(self, b: Self) -> Self { 71 (self - b) >> 1 72 } 73 } 74 75 impl_1d_tx!(); 76 77 type TxfmFunc = fn(&mut [i32]); 78 get_func(t: TxfmType) -> TxfmFunc79 fn get_func(t: TxfmType) -> TxfmFunc { 80 use self::TxfmType::*; 81 match t { 82 DCT4 => daala_fdct4, 83 DCT8 => daala_fdct8, 84 DCT16 => daala_fdct16, 85 DCT32 => daala_fdct32, 86 DCT64 => daala_fdct64, 87 ADST4 => daala_fdst_vii_4, 88 ADST8 => daala_fdst8, 89 ADST16 => daala_fdst16, 90 Identity4 => fidentity, 91 Identity8 => fidentity, 92 Identity16 => fidentity, 93 Identity32 => fidentity, 94 _ => unreachable!(), 95 } 96 } 97 98 #[cold_for_target_arch("x86_64")] forward_transform<T: Coefficient>( input: &[i16], output: &mut [T], stride: usize, tx_size: TxSize, tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel, )99 pub fn forward_transform<T: Coefficient>( 100 input: &[i16], output: &mut [T], stride: usize, tx_size: TxSize, 101 tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel, 102 ) { 103 assert!(valid_av1_transform(tx_size, tx_type)); 104 105 // Note when assigning txfm_size_col, we use the txfm_size from the 106 // row configuration and vice versa. This is intentionally done to 107 // accurately perform rectangular transforms. When the transform is 108 // rectangular, the number of columns will be the same as the 109 // txfm_size stored in the row cfg struct. It will make no difference 110 // for square transforms. 111 let txfm_size_col = tx_size.width(); 112 let txfm_size_row = tx_size.height(); 113 114 let mut tmp: Aligned<[i32; 64 * 64]> = Aligned::uninitialized(); 115 let buf = &mut tmp.data[..txfm_size_col * txfm_size_row]; 116 117 let cfg = Txfm2DFlipCfg::fwd(tx_type, tx_size, bd); 118 119 let txfm_func_col = get_func(cfg.txfm_type_col); 120 let txfm_func_row = get_func(cfg.txfm_type_row); 121 122 // Columns 123 for c in 0..txfm_size_col { 124 let mut col_coeffs_backing: Aligned<[i32; 64]> = 125 Aligned::uninitialized(); 126 let col_coeffs = &mut col_coeffs_backing.data[..txfm_size_row]; 127 if cfg.ud_flip { 128 // flip upside down 129 for r in 0..txfm_size_row { 130 col_coeffs[r] = (input[(txfm_size_row - r - 1) * stride + c]).into(); 131 } 132 } else { 133 for r in 0..txfm_size_row { 134 col_coeffs[r] = (input[r * stride + c]).into(); 135 } 136 } 137 138 av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[0]); 139 txfm_func_col(col_coeffs); 140 av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[1]); 141 if cfg.lr_flip { 142 for r in 0..txfm_size_row { 143 // flip from left to right 144 buf[r * txfm_size_col + (txfm_size_col - c - 1)] = col_coeffs[r]; 145 } 146 } else { 147 for r in 0..txfm_size_row { 148 buf[r * txfm_size_col + c] = col_coeffs[r]; 149 } 150 } 151 } 152 153 // Rows 154 for r in 0..txfm_size_row { 155 let row_coeffs = &mut buf[r * txfm_size_col..]; 156 txfm_func_row(row_coeffs); 157 av1_round_shift_array(row_coeffs, txfm_size_col, -cfg.shift[2]); 158 159 // Store output in at most 32x32 chunks so that the first 32x32 160 // coefficients are stored first. When we don't have 64 rows, there is no 161 // change in order. With 64 rows, the chunks are in this order 162 // - First 32 rows and first 32 cols 163 // - Last 32 rows and first 32 cols 164 // - First 32 rows and last 32 cols 165 // - Last 32 rows and last 32 cols 166 167 // Output is grouped into 32x32 chunks so a stride of at most 32 is 168 // used for each chunk. 169 let output_stride = txfm_size_row.min(32); 170 171 // Split the first 32 rows from the last 32 rows 172 let output = &mut output 173 [(r >= 32) as usize * output_stride * txfm_size_col.min(32)..]; 174 175 for cg in (0..txfm_size_col).step_by(32) { 176 // Split the first 32 cols from the last 32 cols 177 let output = &mut output[txfm_size_row * cg..]; 178 179 for c in 0..txfm_size_col.min(32) { 180 output[c * output_stride + (r & 31)] = 181 T::cast_from(row_coeffs[c + cg]); 182 } 183 } 184 } 185 } 186 } 187